首页 > 技术文章 > 批量处理NC格式文件

skypanxh 2022-03-21 14:38 原文

方案一:使用Arcpy处理

一、使用ArcMap处理

import arcpy
arcpy.env.overwriteOutput = True

inputnc = "E:/01UrbanWaterSourceSecurity/01Data/Runoff1992-2014/GRUN_v1_GSWP3_WGS84_05_1902_2014.nc"
# 提取出全部波段
arcpy.MakeNetCDFRasterLayer_md(inputnc,"Runoff","X","Y","Runoff_Layer","time","#","BY_VALUE") # 变量名称等查看数据介绍,一般都会有或者参考方案二查看
# 提取单波段
i = 1 # 一般为从1开始
for yr in range(1902,2015): # 年份
    for mon in range(1,13): # 月份
        nctif = arcpy.MakeRasterLayer_management("Runoff_Layer","oneband","#","-180 -90 180 90",str(i)) # 提取单波段
        name =  "E:/01UrbanWaterSourceSecurity/01Data/Runoff1992-2014/tif/" + str(yr) + str(mon).zfill(2)+".tif"
        arcpy.CopyRaster_management(nctif, name) # 单波段结果转tif
        print(name) 
        i = i + 1

方案二:使用python的netCDF4批量处理NC格式文件

一、使用ArcMap提取出第一期数据

1.使用工具箱中的“Make NetCDF Raster Layer”工具,提取出一个数据

image
可以发现该数据有正确的像元大小、坐标系等
image
image

2.导出该数据作为标准数据

image

二、使用ArcMap结合netCDF4

1. 查看数据属性

from netCDF4 import Dataset,num2date

infile = "../01Data/Runoff1992-2014/GRUN_v1_GSWP3_WGS84_05_1902_2014.nc"
data_set = Dataset(infile) # 读取nc文件信息
print(data_set)

输出为

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF3_CLASSIC data model, file format NETCDF3):
    title: GRUN
    version: GRUN 1.0
    meteorological_forcing: GSWP3
    temporal_resolution: monthly
    spatial_resolution: 0.5x0.5
    crs: WGS84
    proj4: +proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs
    EPSG: 4326
    references: Ghiggi et al.,2019. GRUN: An observation-based global gridded runoff dataset from 1902 to 2014. ESSD, doi: https://doi.org/10.5194/essd-2019-32
    authors: Gionata Ghiggi; Lukas Gudmundsson
    contacts: gionata.ghiggi@gmail.com; lukas.gudmundsson@env.ethz.ch
    institution: Land-Climate Dynamics, Institute for Atmospheric and Climate Science, ETH Zürich
    institution_id: IAC ETHZ
    dimensions(sizes): X(720), Y(360), time(1356)
    variables(dimensions): float64 X(X), float64 Y(Y), float64 time(time), float32 Runoff(time, Y, X)
    groups: 

可以看到variables变量X、Y为经纬度,time为时间,Runoff为需要的结果

2.批量导出结果

from osgeo import gdal
from netCDF4 import Dataset,num2date
import numpy as np

def WriteTiff(im_data,inputdir, path):
    raster = gdal.Open(inputdir)
    im_width = raster.RasterXSize #栅格矩阵的列数
    im_height = raster.RasterYSize #栅格矩阵的行数
    im_bands = raster.RasterCount #波段数
    im_geotrans = raster.GetGeoTransform()#获取仿射矩阵信息
    im_proj = raster.GetProjection()#获取投影信息

    
    if 'int8' in im_data.dtype.name:
        datatype = gdal.GDT_Byte
    elif 'int16' in im_data.dtype.name:
        datatype = gdal.GDT_UInt16
    else:
        datatype = gdal.GDT_Float32

    if len(im_data.shape) == 3:
        im_bands, im_height, im_width = im_data.shape
    elif len(im_data.shape) == 2:
        im_data = np.array([im_data])
    else:
        im_bands, (im_height, im_width) = 1, im_data.shape
        # 创建文件
    driver = gdal.GetDriverByName("GTiff")
    dataset = driver.Create(path, im_width, im_height, im_bands, datatype)
    if (dataset != None):
        dataset.SetGeoTransform(im_geotrans)  # 写入仿射变换参数
        dataset.SetProjection(im_proj)  # 写入投影
    for i in range(im_bands):
        dataset.GetRasterBand(i + 1).WriteArray(im_data[i])
    del dataset

infile = "../01Data/Runoff1992-2014/GRUN_v1_GSWP3_WGS84_05_1902_2014.nc"
data_set = Dataset(infile) # 读取nc文件信息
time = data_set.variables["time"][:]  # 获取时间一列
units = data_set.variables["time"].units # 获取第一期时间

#读取样本tif文件的地理信息
intif = "../03ProcessData/runoff_example.tif"

for i in range(0,len(time)):
    yr = num2date(time[i],units).year # 提取年份
    mon = num2date(time[i],units).month    # 提取月份
    value_data = data_set.variables['Runoff'][i]
    
    # 将缺失值改为0
    data = value_data.data
    mask = value_data.mask
    data[np.where(mask == True)] = 0
    
    outputname = "../01Data/Runoff1992-2014/tif/" + str(yr) + str(mon).zfill(2) + ".tif"
    WriteTiff(data,intif , outputname)
    print(outputname)

方案三:只使用代码netCDF4

import os
from osgeo import gdal,osr
from netCDF4 import Dataset,num2date
import numpy as np
import math
import matplotlib.pyplot as plt

#将程序、nc文件和样本tif文件放在同一个目录下
infile = r"D:\Data\pxh\grace\originalData\grace\CSR_GRACE_RL06_Mascons_all-corrections_v01.nc"
#读取nc文件
os.chdir(os.path.abspath('.'))
data_set = Dataset(infile) # 读取nc文件信息
time = data_set.variables["time"][:]  # 获取时间一列
units = data_set.variables["time"].units 
latitude = data_set.variables["lat"][:] # 获取纬度列
longitude = data_set.variables["lon"][:] # 获取经度列
ind = next(x[0] for x in enumerate(longitude) if x[1] >= 180) # 获取大于180的那一列的位置,后面用于从这个位置把后面的值移到前面解决东西半球反了的问题
                                                            # 为了提取出列表中的第一个值(即lon=180时所对应的index)
infileMask = r"D:\Data\pxh\grace\originalData\grace\CSR_GRACE_RL06_Mascons_v01_LandMask.nc"
data_set_Mask = Dataset(infileMask) # 读取nc文件信息
value_mask = data_set_Mask.variables['LO_val'][:]

driver = gdal.GetDriverByName('GTiff')
dtype = gdal.GDT_Float32

osrs = osr.SpatialReference()
osrs.SetWellKnownGeogCS('WGS84')
osrs.ExportToWkt()
geoprojection = osrs.ExportToWkt()

print("开始运行")
for y in range(0,len(time)):
    outdir = infile[:-3] # 获取文件名,去除.nc
    #建立文件夹,将生成的tif放在此文件夹中
    if not os.path.isdir(outdir):
        os.mkdir(outdir)   
    yr = num2date(time[y],units).year # 提取年份
    mon = num2date(time[y],units).month    # 提取月份
    #读取value data,将所得数组上下翻转,lon大于180的部分挪到数组前面,方便最后直接写入tif文件
    value_data = data_set.variables['lwe_thickness'][:][y] # 获取该时间对应的数据
    # value_data = value_data[0,:,:] # 处理4维数据时使用
    # value_data[np.where(value_mask == 0)] = np.nan # 海洋部分变为nan 
    value_data = np.flip(value_data, 0) # 矩阵上下左右反转
    value_data= np.roll(value_data,-ind,axis=1) # 从-ind-1位置把后面的值按列移到前面
        
           
    #输出tif文件
    outtif = outdir + "/" + str(yr) + str(mon).zfill(2) +".tif"# 新建的文件夹/年份+月份(补充为2位的,1变为01等)+.tif   
    geotransform = (-180.0, 360.0/value_data.shape[1], 0.0, 90.0, 0.0, -180/value_data.shape[0])   # 360/重采样后的列数得到输出图像的像元大小          
    outraster = driver.Create(outtif, value_data.shape[1],value_data.shape[0],1, dtype)
    outraster.SetProjection(geoprojection)
    outraster.SetGeoTransform(geotransform)
    outraster.GetRasterBand(1).WriteArray(value_data)
    
    outraster = None
    print(outtif)

根据需求使用注释掉的代码

!注意事项

1.使用时候请自行修改修改输入输出文件路径与变量名称

2.根据需要处理缺失值

3.由于位置问题可能需要旋转数据,推荐使用方案1

4.如果NC文件过大,使用arcmap处理过慢,可以使用方案3

温度和降水数据月合成年

from osgeo import gdal,osr
import numpy as np

def LoadData(filename):
    file = gdal.Open(filename)
    if file == None:
        print(filename + " can't be opened!")
        return
    nb = file.RasterCount

    L = []
    for i in range(1, nb + 1):
        band = file.GetRasterBand(i)
        background = band.GetNoDataValue()
        data = band.ReadAsArray()
        data = data.astype(np.float32)
        index = np.where(data == background)
        data[index] = 0
        L.append(data)
    data = np.stack(L,0)
    if nb == 1:
        data = data[0,:,:]
    return data

def WriteTiff(im_data, im_width, im_height, im_bands, im_geotrans, im_proj, path):
    if 'int8' in im_data.dtype.name:
        datatype = gdal.GDT_Byte
    elif 'int16' in im_data.dtype.name:
        datatype = gdal.GDT_UInt16
    else:
        datatype = gdal.GDT_Float32

    if len(im_data.shape) == 3:
        im_bands, im_height, im_width = im_data.shape
    elif len(im_data.shape) == 2:
        im_data = np.array([im_data])
    else:
        im_bands, (im_height, im_width) = 1, im_data.shape
        # 创建文件
    driver = gdal.GetDriverByName("GTiff")
    dataset = driver.Create(path, im_width, im_height, im_bands, datatype)
    if (dataset != None):
        dataset.SetGeoTransform(im_geotrans)  # 写入仿射变换参数
        dataset.SetProjection(im_proj)  # 写入投影
    for i in range(im_bands):
        dataset.GetRasterBand(i + 1).WriteArray(im_data[i])
    del dataset

indir = "TEM/tif/tmp_Layer2020_12.tif"
arr = LoadData(indir)
days = [31,28,31,30,31,30,31,31,30,31,30,31]

intifs = []
for i in range(len(days)):
    intifs.append(arr[i] * days[i])
    
arr_mean = sum(intifs) / sum(days)
raster = gdal.Open(indir)
dtype = gdal.GDT_Float32
driver = gdal.GetDriverByName('GTiff')

im_width = raster.RasterXSize #栅格矩阵的列数
im_height = raster.RasterYSize #栅格矩阵的行数
im_bands = 1

osrs = osr.SpatialReference()
osrs.SetWellKnownGeogCS('WGS84')
osrs.ExportToWkt()
im_proj = osrs.ExportToWkt()
im_geotrans = raster.GetGeoTransform()#获取仿射矩阵信息
ResultPath = "TEM/tif/tmp_Layer2020_mean.tif"
WriteTiff(arr_mean, im_width, im_height, im_bands, im_geotrans, im_proj, ResultPath)

#--------------------------------------------------------------------------
indir = "PRE/tif/pre_Layer1.tif"
arr = LoadData(indir)

intifs = []
for i in range(12):
    intifs.append(arr[i])
    
arr_sum = sum(intifs)
raster = gdal.Open(indir)
dtype = gdal.GDT_Float32
driver = gdal.GetDriverByName('GTiff')

im_width = raster.RasterXSize #栅格矩阵的列数
im_height = raster.RasterYSize #栅格矩阵的行数
im_bands = 1
im_geotrans = raster.GetGeoTransform()#获取仿射矩阵信息
osrs = osr.SpatialReference()
osrs.SetWellKnownGeogCS('WGS84')
osrs.ExportToWkt()
im_proj = osrs.ExportToWkt()
ResultPath = "PRE/tif/pre_Layer2020_sum1.tif"
WriteTiff(arr_sum, im_width, im_height, im_bands, im_geotrans, im_proj, ResultPath) 

推荐阅读