python - 基数为 10 的 int() 的无效文字:'# NRECS: 1096\n'
问题描述
我对 Python 有点陌生,非常感谢以下代码的帮助。目的是从每个 (258) 文件的相应列中获取单独的变量数据,并将它们转换为单个 NetCDF4 文件。然后将这些 NetCDF 文件导入 ArcGIS 并转换为栅格格式。部分输出文件如下:
# NRECS: 1096
# DT: 24
# STARTDATE: 1999-01-01 00:00:00
# ALMA_OUTPUT: 0
# NVARS: 10
# YEAR MONTH DAY OUT_SNOW_COVER OUT_SURF_TEMP OUT_RUNOFF OUT_BASEFLOW OUT_SWE OUT_EVAP OUT_PREC
1999 01 01 0.0000 -0.6910 0.0000 1.7175 0.0000 1.2187 1.2250
1999 01 02 0.0000 -8.1983 0.0000 1.7042 0.0000 0.0132 0.0000
1999 01 03 0.0000 -13.7701 0.0000 1.6907 0.0000 0.0076 0.0000
1999 01 04 1.0000 -11.0906 0.0000 1.6772 6.1095 0.4404 7.4750
1999 01 05 1.0000 -7.4365 0.0000 1.6637 9.7234 0.6585 4.3000
1999 01 06 1.0000 -6.4047 0.0000 1.6501 12.1842 0.5672 3.0000
1999 01 07 1.0000 -9.1578 0.0000 1.6364 12.0282 0.5211 0.0000
代码如下:
#!/usr/bin/python
#----------------------------------------------------
# Program to convert VIC fluxes files to NetCDF file
# will ask the user which variable he wants to export
# and also for which years. Assumes there is data
# for the entire time period, from 1-jan to 31-dec
# SET UP FOR DAILY TIME STEP. FLUX FILE SHOUD NOT
# CONTAIN HOUR RECORD!!
#----------------------------------------------------
#------------------------------------------------
# Writen by Daniel de Castro Victoria
# dvictori@cena.usp.br or daniel.victoria@gmail.com
# Needs python libraries Numeric and Scientific
# 03-dec-2004
#
# Script updated by Kel Markert
# kel.markert@nasa.gov or kel.markert@uah.edu
#-------------------------------------------------
# import dependencies
from __future__ import print_function
import sys
import os, string
# handle dates...
import datetime as dt
# NetCDF and Numeric
from netCDF4 import *
from numpy import *
def flux2nc(influxes,outpath,var=None,start_year=None,end_year=None):
# building file list and sorted lat lon list
dirin = os.path.dirname(influxes)
try:
file_list = os.listdir(dirin)
except OSError:
raise OSError('Input flux directory not valid, please fix path')
lat_t = []
lon_t = []
lat = []
lon = []
try:
for f in file_list:
lat_t.append(float(str.split(f, "_")[1]))
lon_t.append(float(str.split(f, "_")[2]))
except ValueError:
raise ValueError('Input path contains files that are not flux files')
for i in lat_t:
if i not in lat:
lat.append(i)
for i in lon_t:
if i not in lon:
lon.append(i)
# putting in order. Lat should be from top to botom
# lon from left to right
lon.sort()
lat.sort()
lat.reverse()
del(lat_t)
del(lon_t)
# if variable is not set, get it from user
if var == None:
#determining the parameter to use
print("Choose output parameter")
print("1 - SNOW_COVER")
print("2 - SURFACE_TEMPERATURE")
print("3 - Runoff")
print("4 - Base flow")
print("5 - Snow Water Equivalent")
print("6 - EVAPORATION")
print("7 - PRECIPITATION")
varini = input('Choose output (1 a 7)>')
#getting the column right
if int (varini) < 7:
var = varini + 2
elif varini == 6: #more than one soil layer...
camada = input('which soil layer?>')
var = varini + 2 + camada
#set name of out_file. Named after parameter choice
if var == 3:
var_txt = "ppt"
var_name = "Precipitation"
elif var == 4:
var_txt = "evap"
var_name = "Evapotranspiration"
elif var == 5:
var_txt = "runoff"
var_name = "Runoff"
elif var == 6:
var_txt = "base"
var_name = "Baseflow"
elif var == 7:
var_txt = "swe"
var_name = "Snow Water Equivalent"
else:
var_txt = "soilLyr"+str(camada)
var_name = "Soil moisture, layer {0}".format(camada)
# if the date information is not set get it from user
if start_year == None:
# for what date?
start_year = input("Enter start year:")
if end_year == None:
end_year = input("End year:")
# set date information in datetime object
inidate = dt.date(start_year,1,1)
enddate = dt.date(end_year,12,31)
# calculate number of days in time series
days = enddate.toordinal() - inidate.toordinal()+1
#print "Gridding {0} data...".format(var_name)
#
# create array containig all data
# This is going to be huge. Create an array with -9999 (NoData)
# Then populate the array by reading each flux file
#
all_data = zeros([days,len(lat),len(lon)], dtype=float32)
all_data[:,:,:] = -9999
c = len(file_list)
# for each file in list
for f in file_list:
# get lat & lon and it's index
latitude = float(str.split(f, sep="_")[1])
longitude = float(str.split(f, sep="_")[2])
lat_id = lat.index(latitude)
lon_id = lon.index(longitude)
c = c -1
infile = open(dirin+'/'+f, "r")
lixo = infile.readlines()
infile.close()
dado = []
for l in lixo:
if int(string.split(l, sep="\t")[0]) in range(inidate.year, enddate.year+1):
dado.append(float(str.split(l, sep="\t")[var]))
# putting data inside array.
# Since data has lat & lon fixed uses dimension [:,lat_index,lon_index]
all_data[:,lat_id,lon_id] = dado
del dado # del data to free memory for large datasets
try:
# open netCDF file for writing
ncfile = Dataset(outpath+str(var_txt)+'_'+str(start_year)+".nc", "w")
# set netCDF metadata information
ncfile.Conventions = "CF-1.6"
ncfile.title = "VIC hydrologic flux outputs"
ncfile.source = 'VIC hydrologic model 4.2.d'
ncfile.history = "Created using the script created by NASA SERVIR. " + dt.date.today().isoformat()
ncfile.date_created = str(dt.datetime.now())
ncfile.references = "N/A"
ncfile.comment = "N/A"
ncfile.start_date = inidate.isoformat()
ncfile.end_date = enddate.isoformat()
#create dimensions
ncfile.createDimension("longitude", len(lon))
ncfile.createDimension("latitude", len(lat))
ncfile.createDimension("time", days)
#create variables
latvar = ncfile.createVariable("latitude", float, ("latitude",))
latvar.long_name = "Latitude"
latvar.units = "degrees_north"
latvar[:] = lat
lonvar = ncfile.createVariable("longitude", float, ("longitude",))
lonvar.long_name = "Longitude"
lonvar.units = "degrees_east"
lonvar[:] = lon
timevar = ncfile.createVariable("time", int, ("time",))
timevar.long_name = "Time"
timevar.units = "days since " + inidate.isoformat()
timevar.calendar = 'gregorian'
timevar[:] = range(0, days)
# save gridded flux data to file
data_var = ncfile.createVariable(var_txt, float, ("time","latitude","longitude"))
data_var.long_name = var_name
data_var.missing_value = -9999.0
data_var.units = "mm"
data_var[:] = all_data[:,:,:]
# close the file
ncfile.close()
except IOError:
raise IOError('Output path is not valid, please fix the path string')
return
def main():
# checking user input
if len(sys.argv) != 3:
print("Wrong user input")
print("Convert VIC fluxes files to NetCDF")
print("usage flux2cdf.py <vic flux dir> <out netcdf dir>")
print("DIR INPUTS SHOULD CONTAIN TRAILING /")
sys.exit()
if sys.argv[1][-1] != "/":
print("VIC FLUX DIR SHOULD CONTAIN TRAILING /")
print("fixing it for you...")
sys.argv[1] = sys.argv[1] + "/"
print("IMPORTANT: "+sys.argv[1]+" SHOULD CONTAIN ONLY FLUXES FILES!!!")
flux2nc(sys.argv[1],sys.argv[2])
return
# Execute the main level program if run as standalone
if __name__ == "__main__":
main()
我收到以下错误:
python3 flux2nc_a.py /mnt/d/Spring_2020/VIC/VIC_Output/ /mnt/d/Spring_2020/VIC/VIC_Output/NetCDF
IMPORTANT: /mnt/d/Spring_2020/VIC/VIC_Output/ SHOULD CONTAIN ONLY FLUXES FILES!!!
Choose output parameter
1 - SNOW_COVER
2 - SURFACE_TEMPERATURE
3 - Runoff
4 - Base flow
5 - Snow Water Equivalent
6 - EVAPORATION
7 - PRECIPITATION
Choose output (1 a 7)>1
Enter start year:1999
End year:2000
Traceback (most recent call last):
File "flux2nc_a.py", line 241, in <module>
main()
File "flux2nc_a.py", line 235, in main
flux2nc(sys.argv[1],sys.argv[2])
File "flux2nc_a.py", line 156, in flux2nc
if int(str.split(l, sep="\t")[0]) in range(inidate.year, enddate.year+1):
ValueError: invalid literal for int() with base 10: '# NRECS: 1096\n'
更新 1:感谢 Massimo 修复标题错误后,我现在收到错误:
# STARTDATE: 1999-01-01 00:00:00
# ALMA_OUTPUT: 0
# NVARS: 10
# YEAR MONTH DAY OUT_SNOW_COVER OUT_SURF_TEMP OUT_RUNOFF OUT_BASEFLOW OUT_SWE
OUT_EVAP OUT_PREC
Traceback (most recent call last):
File "flux2nc_b.py", line 242, in <module>
main()
File "flux2nc_b.py", line 236, in main
flux2nc(sys.argv[1],sys.argv[2])
File "flux2nc_b.py", line 162, in flux2nc
all_data[:,lat_id,lon_id] = dado
ValueError: cannot copy sequence with size 0 to array axis with dimension 731
解决方案
我终于用下面的代码解决了。我不得不做出一些改变。感谢大家的帮助。
#!/usr/bin/env python
#----------------------------------------------------
# Program to convert VIC fluxes files to NetCDF file
# will ask the user which variable he wants to export
# and also for which years. Assumes there is data
# for the entire time period, from 1-jan to 31-dec
# SET UP FOR DAILY TIME STEP. FLUX FILE SHOULD NOT
# CONTAIN HOUR RECORD!!
#----------------------------------------------------
#------------------------------------------------
# Writen by Daniel de Castro Victoria
# dvictori@cena.usp.br or daniel.victoria@gmail.com
# 03-dec-2004
#
# 13-mar-2018: Code update. Change libraries and treat
# header lines. Changes done by Stuart Smith (smit1770 at purdue dot edu)
#-------------------------------------------------
import os
import sys
# handle dates...
import datetime
# SciPy netCDF and NumPy
from scipy.io.netcdf import *
from numpy import *
# In case flux files contains header lines
# set the variable according to the number of lines
skip_lines = 6
# checking user input
print len(sys.argv)
if len(sys.argv) != 2:
print "Wrong user input"
print "Convert VIC fluxes files to NetCDF"
print "usage flux2cdf.py <vic flux dir>"
print "VIC FLUX DIR SHOULD CONTAIN TRAILING /"
sys.exit()
if sys.argv[1][-1] != "/":
print "VIC FLUX DIR SHOULD CONTAIN TRAILING /"
print "fixing it for you..."
sys.argv[1] = sys.argv[1] + "/"
print "IMPORTANT: "+sys.argv[1]+" SHOULD CONTAIN ONLY FLUXES FILES!!!"
# building file list and sorted lat lon list
file_list = os.listdir(sys.argv[1])
lat_t = []
lon_t = []
lat = []
lon = []
for f in file_list:
lat_t.append(float(f.split("_")[1]))
lon_t.append(float(f.split("_")[2]))
for i in lat_t:
if i not in lat:
lat.append(i)
for i in lon_t:
if i not in lon:
lon.append(i)
# putting in order. Lat should be from top to bottom
# lon from left to right
lon.sort()
lat.sort()
lat.reverse()
del(lat_t)
del(lon_t)
#determining the parameter to use
print "Choose output parameter"
print "1 - Snow_Cover"
print "2 - Surface_Temperature"
print "3 - Runoff"
print "4 - Base flow"
print "5 - SWE"
print "6 - Precipitation"
print "7 - Evaporation"
print "8 - Soil Moisture"
varini = input('Choose output (1 a 8)>')
#getting the column right
if int (varini) < 8:
var = varini + 2
elif varini == 8: #more than one soil layer...
camada = input('which soil layer?>')
var = varini + 1 + camada
#set name of out_file. Named after parameter choice
if var == 3:
var_txt = "Snow_Cover"
var_name = "Snow_Cover"
elif var == 4:
var_txt = "Surf_Temp"
var_name = "Surface_Temperature"
elif var == 5:
var_txt = "Runoff"
var_name = "Runoff"
elif var == 6:
var_txt = "base"
var_name = "Baseflow"
elif var == 7:
var_txt = "SWE"
var_name = "SWE"
elif var == 8:
var_txt = "Precipitation"
var_name = "Precipitation"
elif var == 9:
var_txt = "Evaporation"
var_name = "Evaporation"
else:
var_txt = "soil_"+str(camada)
var_name = "Soil moisture, layer %i", camada
# for what date?
start_year = input("Enter start year:")
end_year = input("End year:")
inidate = datetime.date(start_year,1,1)
enddate = datetime.date(end_year,12,31)
days = enddate.toordinal() - inidate.toordinal()+1
print "Go grab a coffe, this could take a while..."
#
# create array containing all data
# This is going to be huge. Create an array with -9999 (NoData)
# Then populate the array by reading each flux file
#
all_data = zeros([days,len(lat),len(lon)], float)-9999
c = len(file_list)
# for each file in list
for f in file_list:
# get lat & lon and it's index
latitude = float(f.split("_")[1])
longitude = float(f.split("_")[2])
lat_id = lat.index(latitude)
lon_id = lon.index(longitude)
print "%i files to write." % c
c = c -1
infile = open(sys.argv[1]+f, "r")
# here we skip the number of header lines
# variable set at the begining of the code
lixo = infile.readlines()[skip_lines:]
infile.close()
dado = []
for l in lixo:
if int(l.split("\t")[0]) in range(inidate.year, enddate.year+1):
dado.append(float(l.split("\t")[var]))
# putting data inside array.
# Since data has lat & lon fixed uses dimension [:,lat_index,lon_index]
all_data[:,lat_id,lon_id] = dado
#
# writing NetCDF
#
ncfile = netcdf_file(var_txt+".nc", "w")
ncfile.Conventions = "COARDS"
ncfile.history = "Created using flux2cdf.py. " + datetime.date.today().isoformat()
ncfile.production = "VIC output"
ncfile.start_date = inidate.isoformat()
ncfile.end_date = enddate.isoformat()
#create dimensions
ncfile.createDimension("X", len(lon))
ncfile.createDimension("Y", len(lat))
ncfile.createDimension("T", days)
#create variables
latvar = ncfile.createVariable("Y", "f4", ("Y",))
latvar.long_name = "Latitude"
latvar.units = "degrees_north"
latvar[:] = lat
lonvar = ncfile.createVariable("X", "f4", ("X",))
lonvar.long_name = "Longitude"
lonvar.units = "degrees_east"
lonvar[:] = lon
timevar = ncfile.createVariable("T", "f4", ("T",))
timevar.long_name = "Time"
timevar.units = "days since " + inidate.isoformat()
timevar[:] = range(0, days)
data_var = ncfile.createVariable(var_txt, "f4", ("T","Y","X"))
data_var.long_name = var_name+" calculated by VIC"
data_var.missing_value = -9999.0
data_var.units = "milimeters"
data_var[:] = all_data
ncfile.close()
推荐阅读
- python - Python/Pyinstaller 未正确安装
- python - 希望输出返回 S 曲线
- c++ - 将非 const char * 传递给函数的 const char *& 参数
- sql - 参数 * 只能用于 COUNT(*)
- apache - 如何将非 php 扩展 url 重定向到 php 扩展 url htaccess?
- python - 如何在 Python 中重载构造函数(而不是初始化程序)
- filenet-p8 - org.omg.CORBA.TRANSACTION_ROLLEDBACK:vmcid:0x0 次要代码:0 已完成:否
- docker - 从容器访问本地主机窗口中的 API 服务器
- android-studio - 使 ViewPager 适应 ImageView 的高度
- java - 使用 RxJava 进行 Android 改造:只调用 onSubscribe,而不是 onNext