首页 > 解决方案 > 遍历 JSON 对象并将结果存储在 pandas 数据框中

问题描述

我有一个如下所示的 JSON 对象:

data = {'A': {'code': 'Ok',
  'tracepoints': [None,
   None,
   {'alternatives_count': 0,
    'location': [-122.419189, 37.753805],
    'distance': 28.078003,
    'hint': '5Qg7hUqpFQA2AAAAOgAAAAwAAAAPAAAAiVMWQq2VIEIAuABB7FgoQTYAAAA6AAAADAAAAA8AAAD4RAAACwi0-M0TQALvB7T4yRRAAgEAXwX5Wu6N',
    'name': '23rd Street',
    'matchings_index': 0,
    'waypoint_index': 0},
   {'alternatives_count': 0,
    'location': [-122.417866, 37.75389],
    'distance': 26.825184,
    'hint': 'K8w6BRinFYAdAAAACwAAAA0AAAAAAAAAIxmmQTSs6kCiuRFBAAAAAB0AAAALAAAADQAAAAAAAAD4RAAANg20-CIUQAJNDbT4MRNAAgIAnxD5Wu6N',
    'name': '23rd Street',
    'matchings_index': 0,
    'waypoint_index': 1},
   {'alternatives_count': 0,
    'location': [-122.416896, 37.75395],
    'distance': 16.583412,
    'hint': 'Jcw6BSzMOoUqAAAAQwAAABAAAAANAAAA0i_uQb3SOEKKPC9BG1EaQSoAAABDAAAAEAAAAA0AAAD4RAAAABG0-F4UQALyELT48xRAAgEAnxD5Wu6N',
    'name': '23rd Street',
    'matchings_index': 0,
    'waypoint_index': 2},
   {'alternatives_count': 7,
    'location': [-122.415502, 37.754028],
    'distance': 10.013916,
    'hint': 'Jsw6hbN6kQBmAAAACAAAABAAAAANAAAAQOKOQg89nkCKPC9BEMcOQWYAAAAIAAAAEAAAAA0AAAD4RAAAcha0-KwUQAJ6FrT4UhRAAgEAbwX5Wu6N',
    'name': '23rd Street',
    'matchings_index': 0,
    'waypoint_index': 3}],
  'matchings': [{'duration': 50.6,
    'distance': 325.2,
    'weight': 50.6,
    'geometry': 'y{h_gAh~znhF}@k[OmFMoFcAea@IeD[uMAYKsDMsDAe@}@u_@g@aTMwFMwFwAqq@',
    'confidence': 0.374625,
    'weight_name': 'routability',
    'legs': [{'steps': [],
      'weight': 18.8,
      'distance': 116.7,
      'annotation': {'nodes': [1974590926,
        4763953263,
        65359046,
        4763953265,
        5443374298,
        2007343352]},
      'summary': '',
      'duration': 18.8},
     {'steps': [],
      'weight': 12.2,
      'distance': 85.6,
      'annotation': {'nodes': [5443374298,
        2007343352,
        4763953266,
        65359043,
        4763953269,
        2007343354,
        4763953270]},
      'summary': '',
      'duration': 12.2},
     {'steps': [],
      'weight': 19.6,
      'distance': 122.9,
      'annotation': {'nodes': [2007343354,
        4763953270,
        65334199,
        4763953274,
        2007343347]},
      'summary': '',
      'duration': 19.6}]}]},
 'B': {'code': 'Ok',
  'tracepoints': [{'alternatives_count': 0,
    'location': [-122.387971, 37.727587],
    'distance': 11.53267,
    'hint': 'xHWRAEJ2kYALAAAArQAAAA4AAAAsAAAAnpH1QDVG8EJWgBdBa2v0QQsAAACtAAAADgAAACwAAAD4RAAA_YG0-GOtPwJKgrT4t60_AgIA3wf5Wu6N',
    'name': 'Underwood Avenue',
    'matchings_index': 0,
    'waypoint_index': 0},
   {'alternatives_count': 0,
    'location': [-122.388563, 37.727175],
    'distance': 13.565054,
    'hint': 'w3WRgBuxOgVPAAAACAAAABMAAAASAAAA7ONaQo4CrUDv7U1BJdFAQU8AAAAIAAAAEwAAABIAAAD4RAAArX-0-MerPwIsgLT4gqs_AgIAbw35Wu6N',
    'name': 'Jennings Street',
    'matchings_index': 0,
    'waypoint_index': 1},
   {'alternatives_count': 1,
    'location': [-122.388478, 37.725984],
    'distance': 9.601917,
    'hint': 't3WRABexOoWcAAAAbAAAABEAAAALAAAAdujYQqu4lUJXHD1B9-ruQJwAAABsAAAAEQAAAAsAAAD4RAAAAoC0-CCnPwJCgLT4Zqc_AgIAHxP5Wu6N',
    'name': 'Wallace Avenue',
    'matchings_index': 0,
    'waypoint_index': 2}],
  'matchings': [{'duration': 50,
    'distance': 270.4,
    'weight': 50,
    'geometry': 'euu}fAd_~lhFoAlCMTuAvCvC|Bh@`@hXbUnAdADBhDzCzClCXVzZnW\\X~CnC~@qBLWnWej@',
    'confidence': 1e-06,
    'weight_name': 'routability',
    'legs': [{'steps': [],
      'weight': 17.8,
      'distance': 84.8,
      'annotation': {'nodes': [5443147626,
        6360865540,
        6360865536,
        65307580,
        6360865535,
        6360865539,
        6360865531]},
      'summary': '',
      'duration': 17.8},
     {'steps': [],
      'weight': 32.2,
      'distance': 185.6,
      'annotation': {'nodes': [6360865539,
        6360865531,
        6360865525,
        65343521,
        6360865527,
        6360865529,
        6360865523,
        6360865520,
        65321110,
        6360865519,
        6360865522,
        6376329343]},
      'summary': '',
      'duration': 32.2}]}]},
 'C': {'code': 'Ok',
  'tracepoints': [None,
   None,
   {'alternatives_count': 0,
    'location': [-122.443682, 37.713254],
    'distance': 6.968076,
    'hint': 'QXo6hUR6OgUAAAAANQAAAAAAAAAkAAAAAAAAAOCMMUEAAAAA_Z1yQQAAAAAbAAAAAAAAACQAAAD4RAAAXqiz-GZ1PwKiqLP4hnU_AgAAzxL5Wu6N',
    'name': '',
    'matchings_index': 0,
    'waypoint_index': 0},
   {'alternatives_count': 0,
    'location': [-122.442428, 37.714335],
    'distance': 16.488956,
    'hint': 'E3o6BVRukYAJAAAAIgAAAGgAAAAUAAAA2RnSQL_5uUEPjI9CBTlaQQkAAAAiAAAAaAAAABQAAAD4RAAARK2z-J95PwKTrLP4b3k_AgEAXxX5Wu6N',
    'name': 'Allison Street',
    'matchings_index': 0,
    'waypoint_index': 1},
   {'alternatives_count': 1,
    'location': [-122.441751, 37.712761],
    'distance': 17.311636,
    'hint': 'Fno6hRl6OgWZAAAANwAAAAAAAAAKAAAAH4vUQgKXFkIAAAAAXtbYQJkAAAA3AAAAAAAAAAoAAAD4RAAA6a-z-HlzPwKjsLP4q3M_AgAAHwr5Wu6N',
    'name': 'Allison Street',
    'matchings_index': 0,
    'waypoint_index': 2}],
  'matchings': [{'duration': 64.1,
    'distance': 420.1,
    'weight': 66.7,
    'geometry': 'kuy|fAbyjphFcBxEmE`FqJkKiBqBuP}Qgc@ie@eAiAcB}ArA_Eb@mAjKkDnBo@fe@mOrw@kW',
    'confidence': 7.3e-05,
    'weight_name': 'routability',
    'legs': [{'steps': [],
      'weight': 40.1,
      'distance': 235.2,
      'annotation': {'nodes': [5440513673,
        5440513674,
        5440513675,
        65363070,
        1229920760,
        65307726,
        6906452420,
        1229920717,
        65361047,
        1229920749,
        554163599,
        3978809925]},
      'summary': '',
      'duration': 37.5},
     {'steps': [],
      'weight': 26.6,
      'distance': 184.9,
      'annotation': {'nodes': [554163599, 3978809925, 65345518, 8256268328]},
      'summary': '',
      'duration': 26.6}]}]}}

我想提取每个用户(A、B 和 C)的关键节点下的值,并将这些值与相应的用户一起存储在 pandas 数据框中。如下所示:

    value        user
    1974590926  A
    4763953263  A
    65359046    A
    4763953265  A
    5443374298  A
    2007343352  A
    5443374298  A
    2007343352  A
    4763953266  A
    65359043    A
    4763953269  A
    2007343354  A
    4763953270  A
    2007343354  A
    4763953270  A
    65334199    A
    4763953274  A
    2007343347  A
    5443147626  B
    6360865540  B
    6360865536  B
    65307580    B
    6360865535  B
    6360865539  B
    6360865531  B
    6360865539  B
    6360865531  B
    6360865525  B
    65343521    B
    6360865527  B
    6360865529  B
    6360865523  B
    6360865520  B
    65321110    B
    6360865519  B
    6360865522  B
    6376329343  B
    5440513673  C
    5440513674  C
    5440513675  C
    65363070    C
    1229920760  C
    65307726    C
    6906452420  C
    1229920717  C
    65361047    C
    1229920749  C
    554163599   C
    3978809925  C
    554163599   C
    3978809925  C
    65345518    C
    8256268328  C

我可以使用下面的代码仅提取属于用户 C 的节点并将其存储到 pandas 数据帧。但是,我很难添加用户列和其他节点及其相应的用户。有任何想法吗?

import pandas as pd
nodes_df = pd.DataFrame({'node':{}})

for user in output[user]['matchings'][0]['legs']:
    result  = user['annotation']['nodes']
    values_temp = pd.DataFrame(result, columns=['value'])
    values_df = values_df.append(values_temp, ignore_index=True)
values_df.node = values_df.value.astype(int)
values_df

    value
0   5440513673
1   5440513674
2   5440513675
3   65363070
4   1229920760
5   65307726
6   6906452420
7   1229920717
8   65361047
9   1229920749
10  554163599
11  3978809925
12  554163599
13  3978809925
14  65345518
15  8256268328

标签: pythonjsonpandasloops

解决方案


您可以使用json_normalize()withrecord_pathconcat()users:

dfs = []
for user in output.keys():
    df = pd.json_normalize(output, record_path=[user, 'matchings', 'legs', 'annotation', 'nodes'])
    df['user'] = user
    dfs.append(df)
nodes_df = pd.concat(dfs).rename(columns={0: 'node'})

#        node  user
#  1974590926     A
#  4763953263     A
#    65359046     A
#         ...   ...
#  3978809925     C
#    65345518     C
#  8256268328     C

如果有一些用户丢失matchings,您可以检查if 'matchings' in output[user]

dfs = []
for user in output.keys():
    if 'matchings' in output[user]:
        df = pd.json_normalize(output, record_path=[user, 'matchings', 'legs', 'annotation', 'nodes'])
        df['user'] = user
        dfs.append(df)
nodes_df = pd.concat(dfs).rename(columns={0: 'node'})

如果output键是 like('2018-02-03', 'A')并且您将它们迭代为trip,则需要访问它的dateand useras trip[0]and trip[1]

dfs = []
for trip in output.keys():
    if 'matchings' in output[trip]:
        df = pd.json_normalize(output, record_path=[trip, 'matchings', 'legs', 'annotation', 'nodes'])
        df['date'] = trip[0]
        df['user'] = trip[1]
        dfs.append(df)
nodes_df = pd.concat(dfs).rename(columns={0: 'node'})

推荐阅读