python - 使用 Pandas 聚合 JSON 对象中的相似字段
问题描述
我有 JSON 字符串(见代码):
import numpy as np
import pandas as pd
x = [{
"t_registration_number": "31807380529",
"t_customer_inn": "2221123815",
"t_customer_kpp": "222101001",
"t_customer_ogrn": "1072221001709",
"t_customer_short_name": "КАУ ГОСУДАРСТВЕННАЯ ЭКСПЕРТИЗА АЛТАЙСКОГО КРАЯ",
"t_placer_inn": "2221123815",
"t_placer_kpp": "222101001",
"t_placer_ogrn": "1072221001709",
"t_placer_short_name": "КАУ ГОСУДАРСТВЕННАЯ ЭКСПЕРТИЗА АЛТАЙСКОГО КРАЯ",
"r_rus_name": "Алтайский край",
"t_publication_date": "2018-12-28",
"s_inn": "7706196090",
"s_kpp": "",
"s_name": "ООО Страховая компания СОГЛАСИЕ",
"s_lotguid": "f12b1c2a-fb9d-4bff-9430-7fc29ec9dc88",
"n_prc_diff_nmc": "1",
"nlots_nmck": 500000
},
{
"t_registration_number": "31805988205",
"t_customer_inn": "2801010011",
"t_customer_kpp": "280101001",
"t_customer_ogrn": "1022800512624",
"t_customer_short_name": "ГАУЗ АО ДЕТСКАЯ ГКБ",
"t_placer_inn": "2801010011",
"t_placer_kpp": "280101001",
"t_placer_ogrn": "1022800512624",
"t_placer_short_name": "ГАУЗ АО ДЕТСКАЯ ГКБ",
"r_rus_name": "Амурская область",
"t_publication_date": "2018-01-09",
"s_inn": "2723071046",
"s_kpp": "272301001",
"s_name": "ООО МЕДИАС",
"s_lotguid": "2fd7440e-e0a1-4fa0-ae7b-b901b1e378d5",
"n_prc_diff_nmc": "2",
"nlots_nmck": 34384
},
{
"t_registration_number": "31805988205",
"t_customer_inn": "2801010011",
"t_customer_kpp": "280101001",
"t_customer_ogrn": "1022800512624",
"t_customer_short_name": "ГАУЗ АО ДЕТСКАЯ ГКБ",
"t_placer_inn": "2801010011",
"t_placer_kpp": "280101001",
"t_placer_ogrn": "1022800512624",
"t_placer_short_name": "ГАУЗ АО ДЕТСКАЯ ГКБ",
"r_rus_name": "Амурская область",
"t_publication_date": "2018-01-09",
"s_inn": "7018040688",
"s_kpp": "701701001",
"s_name": "ООО СКАН - М",
"s_lotguid": "2fd7440e-e0a1-4fa0-ae7b-b901b1e378d5",
"n_prc_diff_nmc": "2",
"nlots_nmck": 34384
}
]
df = pd.DataFrame(x)
result = df.groupby(['s_lotguid','t_registration_number','t_customer_inn','t_customer_kpp','t_customer_ogrn','t_customer_short_name','t_placer_inn','t_placer_kpp','t_placer_ogrn','t_placer_short_name','r_rus_name','t_publication_date']).agg(','.join)#.to_dict()
最后 2 个 JSON 对象是相似的,除了 3 个字段:'s_inn'、's_kpp'、's_name'
我需要汇总数据,最后得到:
{
"t_registration_number": "31807380529",
"t_customer_inn": "2221123815",
"t_customer_kpp": "222101001",
"t_customer_ogrn": "1072221001709",
"t_customer_short_name": "КАУ ГОСУДАРСТВЕННАЯ ЭКСПЕРТИЗА АЛТАЙСКОГО КРАЯ",
"t_placer_inn": "2221123815",
"t_placer_kpp": "222101001",
"t_placer_ogrn": "1072221001709",
"t_placer_short_name": "КАУ ГОСУДАРСТВЕННАЯ ЭКСПЕРТИЗА АЛТАЙСКОГО КРАЯ",
"r_rus_name": "Алтайский край",
"t_publication_date": "2018-12-28",
"s_inn": "7706196090",
"s_kpp": "",
"s_name": "ООО Страховая компания СОГЛАСИЕ",
"s_lotguid": "f12b1c2a-fb9d-4bff-9430-7fc29ec9dc88",
"n_prc_diff_nmc": "1",
"nlots_nmck": 500000
},
{
"t_registration_number": "31805988205",
"t_customer_inn": "2801010011",
"t_customer_kpp": "280101001",
"t_customer_ogrn": "1022800512624",
"t_customer_short_name": "ГАУЗ АО ДЕТСКАЯ ГКБ",
"t_placer_inn": "2801010011",
"t_placer_kpp": "280101001",
"t_placer_ogrn": "1022800512624",
"t_placer_short_name": "ГАУЗ АО ДЕТСКАЯ ГКБ",
"r_rus_name": "Амурская область",
"t_publication_date": "2018-01-09",
"s_inn": "2723071046, 7018040688",
"s_kpp": "272301001, 701701001",
"s_name": "ООО МЕДИАС, ООО СКАН – М",
"s_lotguid": "2fd7440e-e0a1-4fa0-ae7b-b901b1e378d5",
"n_prc_diff_nmc": "2",
"nlots_nmck": 34384
}
或者在理想情况下:
{
"t_registration_number": "31807380529",
"t_customer_inn": "2221123815",
"t_customer_kpp": "222101001",
"t_customer_ogrn": "1072221001709",
"t_customer_short_name": "КАУ ГОСУДАРСТВЕННАЯ ЭКСПЕРТИЗА АЛТАЙСКОГО КРАЯ",
"t_placer_inn": "2221123815",
"t_placer_kpp": "222101001",
"t_placer_ogrn": "1072221001709",
"t_placer_short_name": "КАУ ГОСУДАРСТВЕННАЯ ЭКСПЕРТИЗА АЛТАЙСКОГО КРАЯ",
"r_rus_name": "Алтайский край",
"t_publication_date": "2018-12-28",
"s_inn": "7706196090",
"s_kpp": "",
"s_name": "ООО Страховая компания СОГЛАСИЕ",
"s_lotguid": "f12b1c2a-fb9d-4bff-9430-7fc29ec9dc88",
"n_prc_diff_nmc": "1",
"nlots_nmck": 500000
},
{
"t_registration_number": "31805988205",
"t_customer_inn": "2801010011",
"t_customer_kpp": "280101001",
"t_customer_ogrn": "1022800512624",
"t_customer_short_name": "ГАУЗ АО ДЕТСКАЯ ГКБ",
"t_placer_inn": "2801010011",
"t_placer_kpp": "280101001",
"t_placer_ogrn": "1022800512624",
"t_placer_short_name": "ГАУЗ АО ДЕТСКАЯ ГКБ",
"r_rus_name": "Амурская область",
"t_publication_date": "2018-01-09",
"aggregated": [
{
"s_inn": "2723071046",
"s_kpp": "272301001",
"s_name": "ООО МЕДИАС",
},
{
"s_inn": "7018040688",
"s_kpp": "701701001",
"s_name": "ООО СКАН – М",
}
],
"s_lotguid": "2fd7440e-e0a1-4fa0-ae7b-b901b1e378d5",
"n_prc_diff_nmc": "2",
"nlots_nmck": 34384
}
有人可以帮我吗?我真的不知道该怎么做。
解决方案
推荐阅读
- asp.net-core - 在 AuthorizationHandler 中访问 ModelState
- arcgis - 在 arcGIS SceneView 中限制地图区域/缩放
- python - 任意 X 和 Y 的最快整数搜索
- django - 小部件形式的多对多字段
- arrays - 在具有关联输入数组时检查验证错误并在 Blade 模板中提取错误
- visual-studio - SSRS 报告数据在报告服务器与本地预览上看起来不同
- python - Python(VS Code)中的模块'cassandra.cluster'中没有名称'Cluster'
- javascript - Javascript scrollIntoView(); 不加载
- tmux - TMUX 添加了一个我无法关闭的窗口
- javascript - 如何在网页中显示设备连接或断开连接