python - 使用 Python 基于多个键对 JSON 进行分组
问题描述
我有这样的数据,
data = [{'vesselId': '1',"vesselName": "ALPHA 01",'weatherStatus': 'Good','ballastFlag':'B','milesPerMT_min': 'cycling','milesPerMT_max': 3,'milesPerMT_avg':6,"fuelPerMilesPerCargo": nan,'milesPerMT_avg': 3,'speedRange':'10-15'},
{'vesselId': '1',"vesselName": "ALPHA 01",'weatherStatus': 'Good','ballastFlag':'L','milesPerMT_min': 'cycling','milesPerMT_max': 45,"fuelPerMilesPerCargo": nan,'milesPerMT_avg': 3,'speedRange':'5-10'},
{'vesselId': '1',"vesselName": "ALPHA 01", 'weatherStatus': 'ROUGH','ballastFlag':'L','milesPerMT_min': 'reading', 'milesPerMT_max': 3.0,"fuelPerMilesPerCargo": nan,'milesPerMT_avg': 3,'speedRange':'10-15'},
{'vesselId': '1',"vesselName": "ALPHA 01", 'weatherStatus': 'ROUGH','ballastFlag':'L','milesPerMT_min': 'reading', 'milesPerMT_max': 3.0,"fuelPerMilesPerCargo": nan,'milesPerMT_avg': 3,'speedRange':'15-20'}]
我已经编写了将数据转换为 json 格式的代码:
new_data = []
not_found = True
for item in data:
for vesselId in new_data:
not_found = True
if item['vesselId'] == vesselId['vesselId']:
not_found = False
for weatherStatus in vesselId['Fuel_Performance']:
if item['weatherStatus'] == weatherStatus['weatherStatus'] :
weatherStatus['milesPerMT'].append({'milesPerMT_min':item['milesPerMT_min'], 'milesPerMT_max':item['milesPerMT_max'],'milesPerMT_avg':item['milesPerMT_avg']})
else:
vesselId['Fuel_Performance'].append({'weatherStatus':item['weatherStatus'],'ballastFlag':item['ballastFlag'], 'milesPerMT':[{'milesPerMT_min':item['milesPerMT_min'], 'milesPerMT_max':item['milesPerMT_max'],'milesPerMT_avg':item['milesPerMT_avg'],'Speed':item['speedRange']}]})
break
if not_found:
new_data.append({'vesselId':item['vesselId'],'vesselName':item['vesselName'] , 'Fuel_Performance':[{'weatherStatus':item['weatherStatus'], \
'ballastFlag':item['ballastFlag'],'milesPerMT':[{'milesPerMT_min':item['milesPerMT_min'], 'milesPerMT_max':item['milesPerMT_max'],'milesPerMT_avg':item['milesPerMT_avg']}],'fuelPerMilesPerCargo': item['fuelPerMilesPerCargo'],'Speed':item['speedRange']}]})
得到的输出是这样的,
[{'Fuel_Performance': [{'Speed': '10-15',
'ballastFlag': 'B',
'fuelPerMilesPerCargo': nan,
'milesPerMT': [{'milesPerMT_avg': 6,
'milesPerMT_max': 3,
'milesPerMT_min': 8},
{'milesPerMT_avg': 3,
'milesPerMT_max': 45,
'milesPerMT_min': 9}],
'weatherStatus': 'Good'},
{'ballastFlag': 'L',
'milesPerMT': [{'Speed': '10-15',
'milesPerMT_avg': 3,
'milesPerMT_max': 3.0,
'milesPerMT_min': 10},
{'milesPerMT_avg': 3,
'milesPerMT_max': 3.0,
'milesPerMT_min': 10},
{'milesPerMT_avg': 3,
'milesPerMT_max': 3.0,
'milesPerMT_min': 11}],
'weatherStatus': 'ROUGH'},
{'ballastFlag': 'L',
'milesPerMT': [{'Speed': '15-20',
'milesPerMT_avg': 3,
'milesPerMT_max': 3.0,
'milesPerMT_min': 11},
{'milesPerMT_avg': 3,
'milesPerMT_max': 3.0,
'milesPerMT_min': 11}],
'weatherStatus': 'ROUGH'}],
'vesselId': '1',
'vesselName': 'ALPHA 01'}]
我想要的方式如下所示,
[
{
"vesselId": 1,
"vesselName": "ALPHA 01",
"fuelPerformance": {
"Good": {
"B": [
{
"speed": "10 - 15",
"milesPerMT": {
"Min": 8,
"Max": 3,
"Avg": 6
},
"fuelPerMilesPerCargo": nan
}
],
"L": [
{
"speed": "5 - 10",
"milesPerMT": {
"Min": 9,
"Max": 45,
"Avg": 3
},
"fuelPerMilesPerCargo": nan
}
]
},
"Rough": {
"L": [
{
"speed": "10 - 15",
"milesPerMT": {
"Min": 10,
"Max": 3,
"Avg": 3
},
"fuelPerMilesPerCargo": nan
},
{
"speed": "15 - 20",
"milesPerMT": {
"Min": 11,
"Max": 3,
"Avg": 3
},
"fuelPerMilesPerCargo": nan
}
]
}
}
}
]
我正在尝试根据以下逻辑对值进行分组
如果 weather_status 是“好”并且 ballast_flag 可以是“B”或“L”,则将 B 和 L 的所有键和值分组。同样,当 weather_status 为“Rough”时。
在我的原始输出中,无法将三个东西(速度、milesPerMT、fuelPerMilesPerCargo)带入 ballast_flag。
在我当前的代码中,我没有得到如何根据 weather_status 和 ballast_flag 对它们进行分组
解决方案
对于循环遍历new_data
并检查是否存在某些元素的问题,我采取了维护一个小的元字典的方法,该字典保存了 vessleId、weatherStatus 和 ballastFlag 的信息,如下所示
{'1': {'Good': ['B', 'L'], 'ROUGH': ['L']}}
蟒蛇脚本:
new_data = []
new_data_meta = {}
def get_vessel(_id):
return list(filter(lambda vessel: vessel["vesselId"] == _id, new_data))[0]
for item in data:
if item["vesselId"] in new_data_meta:
_data = get_vessel(item["vesselId"])
if item["weatherStatus"] in new_data_meta[item["vesselId"]]:
if (
item["ballastFlag"]
in new_data_meta[item["vesselId"]][item["weatherStatus"]]
):
_data["fuelPerformance"][item["weatherStatus"]][
item["ballastFlag"]
].append(
{
"speed": item["speedRange"],
"milesPerMT": {
"Min": item["milesPerMT_min"],
"Max": item["milesPerMT_max"],
"Avg": item["milesPerMT_avg"],
},
"fuelPerMilesPerCargo": item["fuelPerMilesPerCargo"],
}
)
else:
_data["fuelPerformance"][item["weatherStatus"]][item["ballastFlag"]] = [
{
"speed": item["speedRange"],
"milesPerMT": {
"Min": item["milesPerMT_min"],
"Max": item["milesPerMT_max"],
"Avg": item["milesPerMT_avg"],
},
"fuelPerMilesPerCargo": item["fuelPerMilesPerCargo"],
}
]
new_data_meta[item["vesselId"]][item["weatherStatus"]].append(
item["ballastFlag"]
)
else:
_data["fuelPerformance"][item["weatherStatus"]] = {
item["ballastFlag"]: [
{
"speed": item["speedRange"],
"milesPerMT": {
"Min": item["milesPerMT_min"],
"Max": item["milesPerMT_max"],
"Avg": item["milesPerMT_avg"],
},
"fuelPerMilesPerCargo": item["fuelPerMilesPerCargo"],
}
]
}
new_data_meta[item["vesselId"]][item["weatherStatus"]] = [
item["ballastFlag"]
]
else:
new_data.append(
{
"vesselId": item["vesselId"],
"vesselName": item["vesselName"],
"fuelPerformance": {
item["weatherStatus"]: {
item["ballastFlag"]: [
{
"speed": item["speedRange"],
"milesPerMT": {
"Min": item["milesPerMT_min"],
"Max": item["milesPerMT_max"],
"Avg": item["milesPerMT_avg"],
},
"fuelPerMilesPerCargo": item["fuelPerMilesPerCargo"],
}
]
}
},
}
)
new_data_meta[item["vesselId"]] = {item["weatherStatus"]: [item["ballastFlag"]]}
返回以下输出:
[{
"vesselId": "1",
"vesselName": "ALPHA 01",
"fuelPerformance": {
"Good": {
"B": [{
"speed": "10-15",
"milesPerMT": {
"Min": "cycling",
"Max": 3,
"Avg": 3
},
"fuelPerMilesPerCargo": nan
}],
"L": [{
"speed": "5-10",
"milesPerMT": {
"Min": "cycling",
"Max": 45,
"Avg": 3
},
"fuelPerMilesPerCargo": nan
}]
},
"ROUGH": {
"L": [{
"speed": "10-15",
"milesPerMT": {
"Min": "reading",
"Max": 3.0,
"Avg": 3
},
"fuelPerMilesPerCargo": nan
},
{
"speed": "15-20",
"milesPerMT": {
"Min": "reading",
"Max": 3.0,
"Avg": 3
},
"fuelPerMilesPerCargo": nan
}
]
}
}
}]