json - 提取数据后在 JSON 行之间插入分隔符
问题描述
我有一个包含 811 JSON 行的文件,我需要对其进行解析。现在,我正在使用以下命令来解析我感兴趣的数据(awk
由于我正在使用的 JSON 没有在正确的数组中提供数据,因此需要):
sed 's/},/},\n/g' 1st_run.json |awk '/"characater"/ { gsub("\"characater\"", "\"char" ++n "\"", $0) } 1'| jq -r '.frames.frame.lps.lp|.characters[]|[.code_ascii,.confidence]|@tsv'
这项工作很好,但我得到了大量没有以任何方式分隔的数据。我怎样才能至少在 JSON 中的每一行之后插入一个分隔符,我有一个有点可解析的结果?
输入
我拥有的 JSON 输入类似于:
...
{"response":{"container":{"id":"80d996a1-c267-4fa4-b3f8-f61ff9fda198","timestamp":"2018-Jul-10 17:00:50.829709"},"id":"00000002-0000-0000-0000-000000000002"},"frames":{"frame":{"id":"398","timestamp":"2016-Nov-30 12:56:47.900000","lps":{"lp":{"licenseplate":"FRJ724","text":"FRJ724","wtext":"FRJ724","confidence":"67","bkcolor":"16777215","color":"16777215","type":"540122","ntip":"6","cct_country_short":"USA","cct_state_short":"NY","tips":{"tip":{"poly":{"p":{"x":"1553","y":"249"},"p":{"x":"1559","y":"249"},"p":{"x":"1559","y":"267"},"p":{"x":"1553","y":"267"}},"bkcolor":"16777215","color":"0","code":"70","code_ascii":"F","confidence":"88"},"tip":{"poly":{"p":{"x":"1561","y":"248"},"p":{"x":"1568","y":"248"},"p":{"x":"1568","y":"267"},"p":{"x":"1561","y":"267"}},"bkcolor":"16777215","color":"0","code":"82","code_ascii":"R","confidence":"96"},"tip":{"poly":{"p":{"x":"1569","y":"248"},"p":{"x":"1575","y":"248"},"p":{"x":"1576","y":"267"},"p":{"x":"1569","y":"267"}},"bkcolor":"16777215","color":"0","code":"74","code_ascii":"J","confidence":"96"},"tip":{"poly":{"p":{"x":"1585","y":"248"},"p":{"x":"1591","y":"248"},"p":{"x":"1591","y":"267"},"p":{"x":"1585","y":"267"}},"bkcolor":"16777215","color":"0","code":"55","code_ascii":"7","confidence":"94"},"tip":{"poly":{"p":{"x":"1593","y":"248"},"p":{"x":"1600","y":"248"},"p":{"x":"1600","y":"267"},"p":{"x":"1593","y":"267"}},"bkcolor":"16777215","color":"0","code":"50","code_ascii":"2","confidence":"88"},"tip":{"poly":{"p":{"x":"1602","y":"248"},"p":{"x":"1607","y":"248"},"p":{"x":"1607","y":"266"},"p":{"x":"1602","y":"266"}},"bkcolor":"16777215","color":"0","code":"52","code_ascii":"4","confidence":"99"}},"ncharacter":"6","characters":{"characater":{"poly":{"p":{"x":"1553","y":"249"},"p":{"x":"1559","y":"249"},"p":{"x":"1559","y":"267"},"p":{"x":"1553","y":"267"}},"bkcolor":"16777215","color":"0","code":"70","code_ascii":"F","confidence":"88"},"characater":{"poly":{"p":{"x":"1561","y":"248"},"p":{"x":"1568","y":"248"},"p":{"x":"1568","y":"267"},"p":{"x":"1561","y":"267"}},"bkcolor":"16777215","color":"0","code":"82","code_ascii":"R","confidence":"96"},"characater":{"poly":{"p":{"x":"1569","y":"248"},"p":{"x":"1575","y":"248"},"p":{"x":"1576","y":"267"},"p":{"x":"1569","y":"267"}},"bkcolor":"16777215","color":"0","code":"74","code_ascii":"J","confidence":"96"},"characater":{"poly":{"p":{"x":"1585","y":"248"},"p":{"x":"1591","y":"248"},"p":{"x":"1591","y":"267"},"p":{"x":"1585","y":"267"}},"bkcolor":"16777215","color":"0","code":"55","code_ascii":"7","confidence":"94"},"characater":{"poly":{"p":{"x":"1593","y":"248"},"p":{"x":"1600","y":"248"},"p":{"x":"1600","y":"267"},"p":{"x":"1593","y":"267"}},"bkcolor":"16777215","color":"0","code":"50","code_ascii":"2","confidence":"88"},"characater":{"poly":{"p":{"x":"1602","y":"248"},"p":{"x":"1607","y":"248"},"p":{"x":"1607","y":"266"},"p":{"x":"1602","y":"266"}},"bkcolor":"16777215","color":"0","code":"52","code_ascii":"4","confidence":"99"}},"det_time_us":"776874","poly":{"p":{"x":"1543","y":"237"},"p":{"x":"1618","y":"237"},"p":{"x":"1618","y":"274"},"p":{"x":"1543","y":"274"}}}},"det_time_us":"1883017"}}}
{"response":{"container":{"id":"fa75e8f8-1b44-4f2f-a09b-6fe3b801ca1b","timestamp":"2018-Jul-10 17:00:55.863641"},"id":"00000002-0000-0000-0000-000000000002"},"frames":{"frame":{"id":"399","timestamp":"2016-Nov-30 12:56:48","lps":{"lp":{"licenseplate":"FRJ724","text":"FRJ724","wtext":"FRJ724","confidence":"47","bkcolor":"16777215","color":"16777215","type":"540122","ntip":"6","cct_country_short":"USA","cct_state_short":"NY","tips":{"tip":{"poly":{"p":{"x":"1553","y":"248"},"p":{"x":"1560","y":"248"},"p":{"x":"1560","y":"266"},"p":{"x":"1554","y":"266"}},"bkcolor":"16777215","color":"0","code":"70","code_ascii":"F","confidence":"96"},"tip":{"poly":{"p":{"x":"1561","y":"248"},"p":{"x":"1568","y":"248"},"p":{"x":"1568","y":"267"},"p":{"x":"1561","y":"267"}},"bkcolor":"16777215","color":"0","code":"82","code_ascii":"R","confidence":"98"},"tip":{"poly":{"p":{"x":"1569","y":"247"},"p":{"x":"1576","y":"247"},"p":{"x":"1576","y":"267"},"p":{"x":"1569","y":"267"}},"bkcolor":"16777215","color":"0","code":"74","code_ascii":"J","confidence":"96"},"tip":{"poly":{"p":{"x":"1586","y":"248"},"p":{"x":"1592","y":"248"},"p":{"x":"1592","y":"267"},"p":{"x":"1586","y":"267"}},"bkcolor":"16777215","color":"0","code":"55","code_ascii":"7","confidence":"95"},"tip":{"poly":{"p":{"x":"1593","y":"248"},"p":{"x":"1600","y":"248"},"p":{"x":"1600","y":"267"},"p":{"x":"1593","y":"267"}},"bkcolor":"16777215","color":"0","code":"50","code_ascii":"2","confidence":"86"},"tip":{"poly":{"p":{"x":"1601","y":"249"},"p":{"x":"1608","y":"249"},"p":{"x":"1608","y":"265"},"p":{"x":"1601","y":"265"}},"bkcolor":"16777215","color":"0","code":"52","code_ascii":"4","confidence":"63"}},"ncharacter":"6","characters":{"characater":{"poly":{"p":{"x":"1553","y":"248"},"p":{"x":"1560","y":"248"},"p":{"x":"1560","y":"266"},"p":{"x":"1554","y":"266"}},"bkcolor":"16777215","color":"0","code":"70","code_ascii":"F","confidence":"96"},"characater":{"poly":{"p":{"x":"1561","y":"248"},"p":{"x":"1568","y":"248"},"p":{"x":"1568","y":"267"},"p":{"x":"1561","y":"267"}},"bkcolor":"16777215","color":"0","code":"82","code_ascii":"R","confidence":"98"},"characater":{"poly":{"p":{"x":"1569","y":"247"},"p":{"x":"1576","y":"247"},"p":{"x":"1576","y":"267"},"p":{"x":"1569","y":"267"}},"bkcolor":"16777215","color":"0","code":"74","code_ascii":"J","confidence":"96"},"characater":{"poly":{"p":{"x":"1586","y":"248"},"p":{"x":"1592","y":"248"},"p":{"x":"1592","y":"267"},"p":{"x":"1586","y":"267"}},"bkcolor":"16777215","color":"0","code":"55","code_ascii":"7","confidence":"95"},"characater":{"poly":{"p":{"x":"1593","y":"248"},"p":{"x":"1600","y":"248"},"p":{"x":"1600","y":"267"},"p":{"x":"1593","y":"267"}},"bkcolor":"16777215","color":"0","code":"50","code_ascii":"2","confidence":"86"},"characater":{"poly":{"p":{"x":"1601","y":"249"},"p":{"x":"1608","y":"249"},"p":{"x":"1608","y":"265"},"p":{"x":"1601","y":"265"}},"bkcolor":"16777215","color":"0","code":"52","code_ascii":"4","confidence":"63"}},"det_time_us":"600136","poly":{"p":{"x":"1543","y":"238"},"p":{"x":"1618","y":"239"},"p":{"x":"1619","y":"274"},"p":{"x":"1543","y":"273"}}}},"det_time_us":"1495308"}}}
{"response":{"container":{"id":"5c9c773c-a72a-488f-bc49-148dcd6cfa0a","timestamp":"2018-Jul-10 17:01:01.756522"},"id":"00000002-0000-0000-0000-000000000002"},"frames":{"frame":{"id":"400","timestamp":"2016-Nov-30 12:56:48.100000","lps":{"lp":{"licenseplate":"FRJ724","text":"FRJ724","wtext":"FRJ724","confidence":"47","bkcolor":"16777215","color":"16777215","type":"540122","ntip":"6","cct_country_short":"USA","cct_state_short":"NY","tips":{"tip":{"poly":{"p":{"x":"1553","y":"248"},"p":{"x":"1560","y":"248"},"p":{"x":"1560","y":"266"},"p":{"x":"1554","y":"266"}},"bkcolor":"16777215","color":"0","code":"70","code_ascii":"F","confidence":"96"},"tip":{"poly":{"p":{"x":"1561","y":"248"},"p":{"x":"1568","y":"248"},"p":{"x":"1568","y":"267"},"p":{"x":"1561","y":"267"}},"bkcolor":"16777215","color":"0","code":"82","code_ascii":"R","confidence":"98"},"tip":{"poly":{"p":{"x":"1569","y":"247"},"p":{"x":"1576","y":"247"},"p":{"x":"1576","y":"267"},"p":{"x":"1569","y":"267"}},"bkcolor":"16777215","color":"0","code":"74","code_ascii":"J","confidence":"96"},"tip":{"poly":{"p":{"x":"1586","y":"248"},"p":{"x":"1592","y":"248"},"p":{"x":"1592","y":"267"},"p":{"x":"1586","y":"267"}},"bkcolor":"16777215","color":"0","code":"55","code_ascii":"7","confidence":"95"},"tip":{"poly":{"p":{"x":"1593","y":"248"},"p":{"x":"1600","y":"248"},"p":{"x":"1600","y":"267"},"p":{"x":"1593","y":"267"}},"bkcolor":"16777215","color":"0","code":"50","code_ascii":"2","confidence":"86"},"tip":{"poly":{"p":{"x":"1601","y":"249"},"p":{"x":"1608","y":"249"},"p":{"x":"1608","y":"265"},"p":{"x":"1601","y":"265"}},"bkcolor":"16777215","color":"0","code":"52","code_ascii":"4","confidence":"63"}},"ncharacter":"6","characters":{"characater":{"poly":{"p":{"x":"1553","y":"248"},"p":{"x":"1560","y":"248"},"p":{"x":"1560","y":"266"},"p":{"x":"1554","y":"266"}},"bkcolor":"16777215","color":"0","code":"70","code_ascii":"F","confidence":"96"},"characater":{"poly":{"p":{"x":"1561","y":"248"},"p":{"x":"1568","y":"248"},"p":{"x":"1568","y":"267"},"p":{"x":"1561","y":"267"}},"bkcolor":"16777215","color":"0","code":"82","code_ascii":"R","confidence":"98"},"characater":{"poly":{"p":{"x":"1569","y":"247"},"p":{"x":"1576","y":"247"},"p":{"x":"1576","y":"267"},"p":{"x":"1569","y":"267"}},"bkcolor":"16777215","color":"0","code":"74","code_ascii":"J","confidence":"96"},"characater":{"poly":{"p":{"x":"1586","y":"248"},"p":{"x":"1592","y":"248"},"p":{"x":"1592","y":"267"},"p":{"x":"1586","y":"267"}},"bkcolor":"16777215","color":"0","code":"55","code_ascii":"7","confidence":"95"},"characater":{"poly":{"p":{"x":"1593","y":"248"},"p":{"x":"1600","y":"248"},"p":{"x":"1600","y":"267"},"p":{"x":"1593","y":"267"}},"bkcolor":"16777215","color":"0","code":"50","code_ascii":"2","confidence":"86"},"characater":{"poly":{"p":{"x":"1601","y":"249"},"p":{"x":"1608","y":"249"},"p":{"x":"1608","y":"265"},"p":{"x":"1601","y":"265"}},"bkcolor":"16777215","color":"0","code":"52","code_ascii":"4","confidence":"63"}},"det_time_us":"457492","poly":{"p":{"x":"1543","y":"238"},"p":{"x":"1618","y":"239"},"p":{"x":"1619","y":"274"},"p":{"x":"1543","y":"273"}}}},"det_time_us":"1311946"}}}
...
创建的输出
4 99
9 95
2 94
3 94
9 97
B 96
A 92
B 94
L 76
E 88
B 90
R 95
1 85
4 99
9 87
2 98
3 97
9 98
B 98
A 94
4 91
9 97
2 90
3 92
9 96
B 98
A 99
预期输出(例如)
在每个 JSON 行之后插入分隔符,与提取的项目数(每行)无关 - (它等于.ncharacter
JSON 中的)
4 99
9 95
2 94
3 94
9 97
B 96
----------
A 92
B 94
L 76
E 88
B 90
R 95
1 85
4 99
----------
9 87
2 98
3 97
9 98
B 98
A 94
4 91
----------
9 97
2 90
3 92
9 96
B 98
A 99
解决方案
好的,
我通过编写一个可以处理格式错误的 JSON 数据的 Python 脚本解决了这个问题。这个想法是分别迭代每一行,然后通过子字符串将内容分解以提取ascii_code
,confidence
最终会丢失如下内容:
#!/usr/bin/python
def mysplit( str ):
spltstr = str.split("code_ascii")
itr = iter(spltstr)
next(itr)
for k in itr:
a = k.split("\"")
print a[2] + " " +a[6]
filepath = 'test2.json'
with open(filepath) as fp:
line = fp.readline()
cnt = 1
while line:
print "----------"
mysplit(line)
line = fp.readline()
cnt += 1
我认为这对我来说应该差不多...
推荐阅读
- sql - 我想使用 DATEDIFF 将两个日期之间的差异转换为整数
- angular - 如何过滤 FormGroup 值?
- vtk - VTK:无法使用多线程渲染窗口
- sass - Angular - 显示字体真棒微调中心不起作用
- r - 如何在 R 中使用 TableGrob 以双向表格图形方式突出显示条件值?
- kotlin - ViewPager2 和 FragmentStatePagerAdapter
- typescript - Typescript 不会在扩展接口中推断值的类型
- python - 什么错误“没有显示名称和没有 $DISPLAY 环境变量”
- c++ - 如果只有一个线程修改了 std::vector,那么同一个线程在从向量读取时是否需要使用锁?
- asp.net - 如何在 Asp.net Core 2 中进行甜甜圈缓存?