python - 包含不同数据类型的dict的文件处理(I/O)
问题描述
我想将字典存储在四种不同的文件类型(*.csv
, *.json
, *.txt
, *.dat
)中,其中包含几个str
, a list
of str
, a pd.Dataframe
, np.ndarray
s, np.ndarray
s of np.ndarray
s 和float
s。我尝试了几种可能性,但它们都只起作用了一部分。我无法将 转换dict
为 apd.DataFrame
因为 dict 的条目有不同的长度。否则,选择的方法将是pd.DataFrame.to_csv
/pd.read_csv
和pd.DataFrame.to_json
/ pd.DataFrame.read_json
,分别。
读取文件意味着读取先前程序运行的保存状态(数据将变为 a pd.DataFrame
)。随机值将在程序运行期间被计算替换(或使用早期保存的配置文件中的值),因此无需担心这些np.random.rand
部分。
注释掉的行是我在不同网站上发现的不同可能性(包括官方文档,它没有预期的那么有用)。
写文件
import pandas as pd
import numpy as np
import csv
import os
import json
content = {
'rootpath' : os.path.abspath ( os.getcwd () ),
'indir' : '.',
'outdir' : 'output',
'workdir' : 'temporary',
'datafile' : 'file.csv',
'configfile' : 'configs.txt',
'savefile' : 'contents.csv',
'model' : np.arange (4),
'datafiles' : [os.path.join (os.getcwd (), 'data.csv'), os.path.join (os.getcwd (), 'data.dat')],
'data' : pd.DataFrame ( np.arange (15).reshape (3, 5) ),
'dataid' : 'g_g;r_g;r_r;',
'result' : None,
}
hmm = {
'hmm_a' : np.random.rand (9).reshape (3, 3), # test input of a two-dimensional (np.nd)array because one isn't enough and they will appear (during/after computation)
'hmm_b' : np.zeros (3),
'hmm_mu' : np.random.rand (1),
'hmm_pi' : np.random.rand (3),
'hmm_block' : np.random.rand (27).reshape (3, 3, 3), # test input of a three-dimensional (np.nd)array because of their appearance
}
'''
computation, changing pathname and filename, rest of the program
'''
write_in = content.copy ()
write_in.update (hmm)
path = os.path.join ( write_in ['rootpath'], write_in ['outdir'], write_in ['savefile'] )
p, filetype = os.path.splitext (path)
'''
if write_in == write_in ['data'] : # write_in only contains data
onlydata = True
else :
onlydata = False
'''
for c in write_in :
if type ( write_in [c] ) == np.ndarray :
write_in [c] = write_in [c].tolist ()
elif type ( write_in [c] ) == pd.DataFrame :
#write_in [c] = pd.DataFrame.to_numpy ( write_in [c], copy = True ).tolist () # needs pandas +0.24.0
write_in [c] = write_in [c].values.tolist ()
# saving as *.csv Comma Separated Values
if 'csv' in filetype or 'CSV' in filetype : # If chosen, take care when loading this file. Remember the data structure…!
if onlydata :
with open ( path, mode ) as f : # alternative 0; pd.DataFrame
write_in.to_csv ( f, header = None, index = False ) # alternative 0; pd.DataFrame
#write_in.to_csv ( path, header = None, index = False ) # alternative 1; pd.DataFrame
else : ### works.
# converting the chosen variables to linewise objects
for c in write_in :
if type ( write_in [c] ) == np.ndarray :
write_in [c] = write_in [c].tolist ()
elif type ( write_in [c] ) == pd.DataFrame :
#write_in [c] = write_in [c].to_numpy (copy = True).tolist () # alternative 2
#write_in [c] = DataFrame.to_numpy (write_in [c], copy = True).tolist () # alternative 1
write_in [c] = write_in [c].values.tolist () # alternative 0
# the saving itself
with open ( path, 'w', newline = '' ) as f :
w = csv.writer ( f, delimiter = ',', quotechar = '"' )#, quoting = csv.QUOTE_ALL ) # alternative 1; dict
for key, value in write_in.items () : # alternative 1; dict
w.writerow ( [key, value] ) # alternative 1; dict
# saving as *.json JavaScript Object Notation; the dict should be written into the file as onto the command line.
elif 'json' in filetype or 'JSON' in filetype :
if onlydata :
with open ( path, mode ) as f :
write_in.to_json (f)#, orient = 'columns' )#, orient = DataFrame )#, index = False ) # alternative 0; pd.DataFrame
else : ### works.
# converting the chosen variables to JSON serializable objects
for c in write_in :
if type ( write_in [c] ) == np.ndarray :
write_in [c] = write_in [c].tolist ()
elif type ( write_in [c] ) == pd.DataFrame :
write_in [c] = write_in [c].to_json ( orient = 'columns' )
# the saving itself
with open ( path, mode ) as f :
#f.write ( json.dumps (write_in) ) # alternative 1; dict
json.dump ( write_in, f, indent = 4 ) # alternative 0; dict
# saving as *.txt
elif 'txt' in filetype or 'TXT' in filetype :
if onlydata :
with open ( path, mode ) as f :
f.write ( str (write_in) ) ### Better make this with pd.iterrows
else :
with open ( path, mode ) as f :
f.write ( str (write_in) )
# saving as *.dat; this shouldn't make a difference for the file here as if saved as *.txt.
elif 'dat' in filetype or 'DAT' in filetype :
if onlydata :
with open ( path, mode = mode + 'b' ) as f :
f.write (write_in) ### Better make this with pd.iterrows…?
else :
with open ( path, mode = mode + 'b' ) as f :
f.write (write_in)
else :
print ( 'save_file: Unknown file format. Aborting program part.' )
读取文件
# from *.csv
if 'csv' in filetype or 'CSV' in filetype :
read_out = {}
with open ( path, 'r' ) as f :
reader = csv.reader (f)
for k, v in reader :
read_out [k] = v
#for line in f : # if the above fails
#(key, val) = line.split (',') # if the above fails
#read_out [key] = val # if the above fails
#d = {} # if the above fails
#for key, val in read_out.iterrows () : # if the above fails
#d [key] = val # if the above fails
# converting the str the their original dtype, determined by what's in the str; better have put it in the file…?
for a in read_out :
if read_out [a] == '' :
if 'dir' in a : # Relative paths; set *indir*, *outdir* and *workdir* to *rootpath* if not specified.
read_out [a] = '.'
elif a == 'result' : # No result was achieved.
read_out [a] = None
elif a == 'dataid' :
if '[' == read_out [a] [0] and ']' == read_out [a] [-1] and "', '" in read_out [a] :
read_out [a] = read_out [a].split ("', '") [ 1 : -1 ] # Take away the enclosing brackets -> split up the long str to multiple short ones by the separating sequence of a str (list)
elif "', '" in read_out [a] :
read_out [a] = read_out [a].split ("', '")
elif '; ' in read_out [a] :
read_out [a] = read_out [a].split ('; ')
elif ';' in read_out [a] :
read_out [a] = read_out [a].split (';')
else :
seppi = input ( "read_configs: Couldn't determine the separating character of *dataid*. Please type it (Standard: comma): " )
if seppi == '' :
seppi = ','
if seppi in read_out [a] :
read_out [a] = read_out [a].split (seppi)
else :
read_out [a] = ['g_g', 'r_g', 'r_r']
elif a == 'datafiles' : # input list of data files which weren't taken into computation yet; absolute paths
read_out [a] = read_out [a].split ("'") [ 1 : -1 : 2 ]
elif '[' == read_out [a] [0] and ']' == read_out [a] [-1] : # Should be a np.ndarray because other entries with '[' and ']' are already sorted out.
if read_out [a].count ('[') == read_out [a].count (']') == 1 : # one-dimensional array
#floats = np.array ( read_out [a] [ 1 : -1 ].split (', ') [:] ).astype (np.float128) # alternative 1
#ints = np.array ( read_out [a] [ 1 : -1 ].split (', ') [:] ).astype (np.int64) # alternative 1
floats = np.fromstring ( read_out [a] [ 1 : -1 ], dtype = np.float128, sep = ',' ) # alternative 0
ints = np.fromstring ( read_out [a] [ 1 : -1 ], dtype = np.int64, sep = ',' ) # alternative 0
if ints.all () == floats.all () and not ints.all () == np.zeros (floats.size).all () :
read_out [a] = ints
else :
read_out [a] = floats
else : # multi-dimensional array; actually works only with two-dimensional arrays.
md_array = read_out [a] [ 2 : -2 ].split ('], [') # Removing '[[' from the beginning and ']]' from the end, splitting it up to a list where the elements are the inner np.ndarrays as str.
f, i = [], []
for b in range ( len (md_array) ) : # iterating over the length of *md_array*
#floats = np.array ( md_array [b].split (', ') ).astype (np.float128) # alternative 1
#ints = np.array ( md_array [b].split (', ') ).astype (np.int64) # alternative 1
floats = np.fromstring ( md_array [b], dtype = np.float128, sep = ',' ) # alternative 0
ints = np.fromstring ( md_array [b], dtype = np.int64, sep = ',' ) # alternative 0
f.append (floats)
i.append (ints)
floats = np.array (f)
ints = np.array (i)
if ints.all () == floats.all () and not ints.all () == np.zeros (floats.size).all () :
read_out [a] = ints
else :
read_out [a] = floats
# from *.json; the dict should be written into the file as onto the command line.
elif 'json' in filetype or 'JSON' in filetype :
with open ( path, 'r' ) as f : # alternative 0
read_out = json.load (f) # alternative 0
#f.read ( json.dumps (read_out) ) # alternative 1
# converting the str the their original dtype, determined by what's in the str; better have put it in the file…?
for a in read_out :
if a == 'result' or a == 'datafiles' or a == 'dataid' : # input list of data files which weren't taken into computation yet; absolute paths
pass
elif read_out [a] == '' and 'dir' in a : # Relative paths; set *indir*, *outdir* and *workdir* to *rootpath* if not specified.
read_out [a] = '.'
elif type ( read_out [a] ) == list :
#read_out [a] = pd.read_json ( read_out [a], numpy = True, precise_float = True )
read_out [a] = np.asarray ( read_out [a], dtype = np.float128 )
# from *.txt
elif 'txt' in filetype or 'TXT' in filetype :
with open ( path, 'r' ) as f :
reading = f.read ()
# from *.dat; this shouldn't make a difference for the file here as if saved as *.txt.
elif 'dat' in filetype or 'DAT' in filetype :
with open ( path, 'rb' ) as f :
f.read (read_out)
# Put the variables in the dicts *content* or *hmm*.
for a in read_out :
if 'hmm_' in a :
hmm [a] = read_out [a]
else :
content [a] = read_out [a]
if 'data' in content :
content ['data'] = pd.DataFrame ( content ['data'] )
解决方案
推荐阅读
- mysql - mysql为什么int的字段类型没有长度
- reactjs - 如何在 tsx 文件上使用条带库。得到错误
- c# - 数据库更新的通用方法
- c# - Blazor 服务器端授权
- ruby-on-rails - ECS任务中的Puma突然需要10分钟才能启动
- java - Vertx如何使Eventbus请求等待消费者消息回复
- java - 验证 xml:space="preserve" 是否存在于 xml 请求中,并在 xml 响应中包含 xml:space="preserve"
- javascript - 尝试使用 export {} 导出函数导致错误
- java - 归档构建后如何删除原始詹金斯构建
- python - 在 Python 的数据集中过滤“包含值”