python - 将json转换为excel时在python中出现类型错误
问题描述
我正在尝试将 json 转换为 excel 文件,但我遇到类型错误并且我无法解决该错误。该代码首先将 json 数据转换为字典(我也需要用于剪辑事实)而不是将其转换为 excel。任何帮助,将不胜感激。先感谢您。以下是我的代码:
from io import BytesIO
import sys
sys.path.append('/usr/lib/python3/dist-packages')
import json
import openpyxl
from openpyxl import load_workbook
import logging
logger = logging.getLogger('ExtractJSON')
class ExtractJSON(object):
def __init__(self):
""" Initialisierung """
pass
def convert(self, data):
""" PDF Extract API """
#jsondata = self.json2excel(data)
jsondata = self.json2csv(data)
return jsondata
#=======================================================================
# JSON to Excel
#=======================================================================
def json2excel(self, data):
""" JSON to Excel """
header = ["Seite","ID","Text","Pfad","Fett","Kursiv","Unterstrichen","Versal","PT","Alignment","BlockAlign","InlineAlign","Col","Row","Width",\
"Height","LineHeight","SpaceAfter","Placement","Bemerkung","Bounds","ClipBounds","BBox","Font","HasClip","Lang"]
jsondata = json.loads(data)
elements = jsondata['elements']
excelrow = 1
sid = 0
hd = 1
wb = openpyxl.Workbook()
sheet = wb.active
for head in header:
sheet.cell(excelrow, hd).value = head
hd += 1
for elm in elements:
sid += 1
excelrow += 1
cl = 0
row = self.createRow(elm, sid)
if row is None:
sheet.cell(excelrow, 1).value = "....."
continue
for column in header: # TODO: enumerate!
cl += 1
sheet.cell(excelrow, cl).value = row.get(column, '?????')
kids = elm.get('Kids', [])
kid = 0
for kidelm in kids:
kid += 1
excelrow += 1
cl = 0
sidstr = f"{sid}.{kid}"
row = self.createRow(kidelm, sidstr)
for column in header: # TODO: enumerate! # TODO: refactor
cl += 1
sheet.cell(excelrow, cl).value = row.get(column, '?????')
# FIXME: Check: Do we have recursive 'Kids' in the JSON file?
wbresult = BytesIO()
wb.save(wbresult)
return wbresult.getvalue()
def createRow(self, elm, sid):
""" For Excel """
try:
page = elm.get('Page', '')
if page or page == 0:
page += 1
xpath = elm['Path']
text = elm.get('Text', elm.get('text', ''))
font = elm.get('Font', {})
if not text and not font and xpath.find("Figure") > -1: # TODO temporary
logger.info("Figure found, no font and no text, continue..")
return ""
bold = "x" if font.get('weight', '') == 700 else ''
italic = "x" if font.get('italic', '') else ''
underline = font.get('underline', '') # TODO No example yet
versal = font.get('capital', '') # TODO No example yet
fontsize = elm.get('TextSize', '')
if fontsize and (abs(round(fontsize) - fontsize) < 0.10):
fontsize = round(fontsize)
attributes = elm.get('attributes', {})
alignment = attributes.get('TextAlign', '')
lineHeight = attributes.get('LineHeight', '')
spaceAfter = attributes.get('SpaceAfter', '')
placement = attributes.get('Placement', '')
note = ""
bounds = elm.get('Bounds', '')
clipBounds = elm.get('ClipBounds', '')
if bounds:
bounds = ", ".join([str(round(bd)) for bd in bounds])
if clipBounds:
clipBounds = ", ".join([str(round(bd)) for bd in clipBounds])
font = str(font)
hasClip = elm.get('HasClip', '')
lang = elm.get('Lang', '')
# Tabelle
blockAlign = attributes.get('BlockAlign', '')
inlineAlign = attributes.get('InlineAlign', '')
col = attributes.get('ColIndex', '')
row = attributes.get('RowIndex', '')
if col or col == 0:
col += 1
if row or row == 0:
row += 1
width = attributes.get('width', '')
height = attributes.get('height', '')
bBox = attributes.get('BBox', '')
if bBox:
bBox = ", ".join([str(round(bd)) for bd in bBox])
#
result = {'Seite':page, 'ID':sid, 'Text':text, 'Pfad':xpath, 'Fett':bold, 'Kursiv':italic, 'Unterstrichen':underline,\
'Versal':versal, 'PT':fontsize, 'Alignment':alignment, 'BlockAlign':blockAlign, 'InlineAlign':inlineAlign,\
'Col':col, 'Row':row, 'Width':width, 'Height':height, 'LineHeight':lineHeight, 'SpaceAfter':spaceAfter,\
'Placement':placement, 'Bemerkung':note, 'Bounds':bounds, 'ClipBounds':clipBounds, 'BBox':bBox,\
'Font':font, 'HasClip':hasClip, 'Lang':lang}
except Exception as e:
logger.exception(e)
logger.info(elm)
return ""
return result
#=======================================================================
# JSON to CSV (Alternative for JSON to Excel)
#=======================================================================
def json2csv(self, data):
""" JSON to CSV """
result = "Seite;ID;Text;Pfad;Fett;Kursiv;Unterstrichen;Versal;PT;Alignment;BlockAlign;InlineAlign;Col;Row;Width;Height;LineHeight;SpaceAfter;Placement;"\
"Bemerkung;Bounds;ClipBounds;BBox;Font;HasClip;Lang\n"
jsondata = json.loads(data)
elements = jsondata['elements']
sid = 0
for elm in elements:
sid += 1
result += self.createRow(elm, sid)
kids = elm.get('Kids', [])
kid = 0
for kidelm in kids:
kid += 1
sidstr = f"{sid}.{kid}"
result += self.createRowForCSV(kidelm, sidstr)
filename=json2excel.csv")
return result
def createRowForCSV(self, elm, sid):
""" """
try:
page = elm.get('Page', '')
if page or page == 0:
page += 1
xpath = elm['Path']
text = elm.get('Text', elm.get('text', ''))
font = elm.get('Font', {})
if not text and not font and xpath.find("Figure") > -1: # TODO temporary
logger.info("Figure found, no font and no text, continue..")
return ""
bold = "x" if font.get('weight', '') == 700 else ''
italic = "x" if font.get('italic', '') else ''
underline = font.get('underline', '') # TODO No example yet
versal = font.get('capital', '') # TODO No example yet
fontsize = elm.get('TextSize', '')
if fontsize and (abs(round(fontsize) - fontsize) < 0.10):
fontsize = round(fontsize)
attributes = elm.get('attributes', {})
alignment = attributes.get('TextAlign', '')
lineHeight = attributes.get('LineHeight', '')
spaceAfter = attributes.get('SpaceAfter', '')
placement = attributes.get('Placement', '')
note = ""
bounds = elm.get('Bounds', '')
clipBounds = elm.get('ClipBounds', '')
if bounds:
bounds = ", ".join([str(round(bd)) for bd in bounds])
if clipBounds:
clipBounds = ", ".join([str(round(bd)) for bd in clipBounds])
font = str(font)
hasClip = elm.get('HasClip', '')
lang = elm.get('Lang', '')
# Tabelle
blockAlign = attributes.get('BlockAlign', '')
inlineAlign = attributes.get('InlineAlign', '')
col = attributes.get('ColIndex', '')
row = attributes.get('RowIndex', '')
if col or col == 0:
col += 1
if row or row == 0:
row += 1
width = attributes.get('width', '')
height = attributes.get('height', '')
bBox = attributes.get('BBox', '')
if bBox:
bBox = ", ".join([str(round(bd)) for bd in bBox])
#
result = f"{page};'{sid}';'{text}';{xpath};{bold};{italic};{underline};{versal};{fontsize};{alignment};{blockAlign};{inlineAlign};{col};{row};\
{width};{height};'{lineHeight}';'{spaceAfter}';{placement};{note};{bounds};{clipBounds};{bBox};{font};{hasClip};{lang}\n"
except Exception as e:
logger.exception(e)
logger.info(elm)
return ""
return result
INPUTPATH = "/home/abc/Clips/JSON/structuredData.json"
OUTPUTPATH = "/home/abc/Clips/JSON/result.xlsx"
if __name__ == '__main__':
converter = ExtractJSON()
f = open(INPUTPATH, "r"); data = f.read(); f.close()
print ("JSON converting..")
result = converter.convert(data)
g = open(OUTPUTPATH, "wb"); g.write(result); g.close()
print (f"Excel stored on {OUTPUTPATH}")
sys.exit(0)
我得到的错误是:
JSON converting..
Traceback (most recent call last):
File "/home/abc/Clips/JSON/json2excel.py", line 222, in <module>
result = converter.convert(data)
File "/home/abc/Clips/JSON/json2excel.py", line 23, in convert
jsondata = self.json2csv(data)
File "/home/abc/Clips/JSON/json2excel.py", line 146, in json2csv
result += self.createRow(elm, sid)
TypeError: can only concatenate str (not "dict") to str
解决方案
长话短说,这是您的代码的最小可重现示例:
class ExtractJSON:
def createRowForCSV(self):
return {
'Seite':1, 'ID':2, 'Text':3, 'Pfad':4, 'Fett':5, 'Kursiv':6, 'Unterstrichen':7,
'Versal':9, 'PT':0, 'Alignment':10, 'BlockAlign':11, 'InlineAlign':12,
'Col':13, 'Row':14, 'Width':15, 'Height':16, 'LineHeight':17, 'SpaceAfter':18,
'Placement':19, 'Bemerkung':20, 'Bounds':21, 'ClipBounds':22, 'BBox':23,
'Font':2.4, 'HasClip':"twenty-five", 'Lang':"twenty-6"
}
def json2csv(self):
result = "Seite;ID;Text;Pfad;Fett;Kursiv;Unterstrichen;Versal;PT;Alignment;BlockAlign;InlineAlign;Col;Row;Width;Height;LineHeight;SpaceAfter;Placement;Bemerkung;Bounds;ClipBounds;BBox;Font;HasClip;Lang\n"
result += self.createRowForCSV()
return result
输出
Traceback (most recent call last):
File "Main.py", line 28, in <module>
print(ExtractJSON().json2csv())
File "Main.py", line 17, in json2csv
result += self.createRowForCSV()
TypeError: can only concatenate str (not "dict") to str
现在,我们可以清楚地看到错误的原因是它result
是一个str
但你正在附加到它的输出createRowForCSV
是一个字典。为了进一步可视化这一点,您就像这样做:
"Some text" + {"another": "one"}
这显然会失败。
解决方案1:
手动迭代dict并按顺序获取值。
class ExtractJSON:
...
def json2csv(self):
result = "Seite;ID;Text;Pfad;Fett;Kursiv;Unterstrichen;Versal;PT;Alignment;BlockAlign;InlineAlign;Col;Row;Width;Height;LineHeight;SpaceAfter;Placement;Bemerkung;Bounds;ClipBounds;BBox;Font;HasClip;Lang\n"
result_field_names = result.strip().split(';') # Get the order of the keys
result_dict = self.createRowForCSV() # Get the row in dict format
result_string = ";".join(map(str, [result_dict[key] for key in result_field_names])) # Get the values in order and combine them into a string with ';' as the separator
result += result_string + "\n" # Append to the result
return result
...
解决方案2:
使用csv.DictWriter将响应字典转换createRowForCSV
为 csv 字符串。
import csv
from io import StringIO
class ExtractJSON:
...
def json2csv(self):
result = "Seite;ID;Text;Pfad;Fett;Kursiv;Unterstrichen;Versal;PT;Alignment;BlockAlign;InlineAlign;Col;Row;Width;Height;LineHeight;SpaceAfter;Placement;Bemerkung;Bounds;ClipBounds;BBox;Font;HasClip;Lang\n"
result_field_names = result.strip().split(';') # Get the order of the keys
result_dict = self.createRowForCSV() # Get the row in dict format
result_string = StringIO() # Create the string that will hold the result
csv.DictWriter(result_string, result_field_names, delimiter=';').writerow(result_dict) # Convert the dict to str
result_string.seek(0) # Since result_string is a file-like object, go back to the beginning
result += result_string.read() # Read the result string from the beginning to end and append to the result
return result
...
输出
>>> print(ExtractJSON().json2csv())
Seite;ID;Text;Pfad;Fett;Kursiv;Unterstrichen;Versal;PT;Alignment;BlockAlign;InlineAlign;Col;Row;Width;Height;LineHeight;SpaceAfter;Placement;Bemerkung;Bounds;ClipBounds;BBox;Font;HasClip;Lang
1;2;3;4;5;6;7;9;0;10;11;12;13;14;15;16;17;18;19;20;21;22;23;2.4;twenty-five;twenty-6
推荐阅读
- php - Laravel - 按奇怪的结果排序和分组
- c++ - 采用不同大小数组的构造函数
- javascript - 将一个单词拆分为一个 div,每个字母又是它自己的 div
- c++ - 谁能解释一下这个 BFS 代码是如何工作的?
- c# - 不是子进程的 C# 进程监控应用程序
- angular - 模态关闭后恢复功能
- javascript - 为什么通知组件不起作用
- nservicebus - 更改 Nsb Message 和 SagaData 类
- oracle - 如何使用 MailGun 从 Oracle Apex 发送电子邮件
- ruby-on-rails - Rails Helper Handle Flash with ul