首页 > 解决方案 > 将两个 CSV 转换为一个 json

问题描述

此代码在终端中生成记录,但是当我打开 json 时它是空白的。有人能帮我吗?

import csv
import json

refcsvfile = open('referralsource.csv', 'r')
jsonfile = open('redrock.json', 'w')

concsvfile = open('contacts.csv', 'r')

reffieldnames = ("ReferralSource_Name","OrganizationType","PrimaryRelationshipManager","ReferralSourceStatus","RSContactSourceType"
)
confieldnames = ("ReferralSource_Name","OrganizationName","IsOrganizationContact","FirstName","Middle","LastName","Role","Line1","City","State","Zip","Phone","PhoneType","PhonePrimary","OkToLeaveVM","PhoneActive","E-mail","OkToSendEmail","ContactPrimaryRelationshipManager","IsPrimaryContact","ContactSourceType"
)

refreader = csv.DictReader( refcsvfile, reffieldnames)
conreader = csv.DictReader( concsvfile, confieldnames)

output=[];
refcount=0
for refrow in refreader:
    refrow['ReferralSourceContacts'] = []
    output.append(refrow)
    concsvfile.seek(0)
    for conrow in conreader:
        if(conrow["ReferralSource_Name"]==refrow["ReferralSource_Name"]):
            refrow['ReferralSourceContacts'].append(conrow)
            output.append(refrow)
    refcount = refcount +1
print(output)
json.dump(output, jsonfile,sort_keys=True)

我希望 json 看起来像:

{
    "ReferralSource_Name": "Demo Facility",
    "OrganizationType": "Hospital",
    "RSContactSourceType": "DirectInboundTelephone",
    "ReferralSourceStatus": "Active",
    "PrimaryRelationshipManager": "John Doe",

    },
    "ReferralSourceContacts": [
      {
        "IsOrganizationContact": true,
        "OrganizationName": "Demo Facility",
        "FirstName": "John",
        "LastName": "Smith",
        "Role": "Doctor",
        "Line1": "123 abc Street",
        "Zip": "44720",
        "City": "Canton",
        "State": "OH",
        "Phone": "555-555-555",
        "PhoneType": "Office",
        "PhonePrimary": "True",
        "PhoneActive": "True",
        "Email": "doc@doc.doc",
        "OkToLeaveVm": true,
        "OkToSendEmail": true,
        "ContactSourceType": "DirectInboundTelephone"
        "ContactPrimaryRelationshipManager": "John Doe"
      }

        "IsOrganizationContact": true,
        "OrganizationName": "Test Facility",
        "FirstName": "Jane",
        "LastName": "Smith",
        "Role": "Doctor",
        "Line1": "123 abc Street",
        "Zip": "44720",
        "City": "Canton",
        "State": "OH",
        "Phone": "555-555-555",
        "PhoneType": "Office",
        "PhonePrimary": "True",
        "PhoneActive": "True",
        "Email": "doc2@doc.doc",
        "OkToLeaveVm": true,
        "OkToSendEmail": true,
        "ContactSourceType": "DirectInboundTelephone"
        "ContactPrimaryRelationshipManager": "John Doe"
      }
    ]

基本上,我有一个用于父实体推荐源的文件(将其视为公司),还有另一个用于联系人的 csv(将它们视为公司的人员)。我需要将这两个组合到提到的 JSON 中进行导入。

标签: pythonjsoncsv

解决方案


像这样的东西应该工作......

  • 用于with管理文件。
  • 假设联系人列表不是很大,最好将其加载到内存中,而不是在每一行的开头查找文件。
  • 更好的是,由于它现在在内存中,我们可以使用 acollections.defaultdict按引用字段进行预分组,从而使创建输出成为一个简单的 dict 查找。
import csv
import json
import collections

reffieldnames = (
    "ReferralSource_Name",
    "OrganizationType",
    "PrimaryRelationshipManager",
    "ReferralSourceStatus",
    "RSContactSourceType",
)
confieldnames = (
    "ReferralSource_Name",
    "OrganizationName",
    "IsOrganizationContact",
    "FirstName",
    "Middle",
    "LastName",
    "Role",
    "Line1",
    "City",
    "State",
    "Zip",
    "Phone",
    "PhoneType",
    "PhonePrimary",
    "OkToLeaveVM",
    "PhoneActive",
    "E-mail",
    "OkToSendEmail",
    "ContactPrimaryRelationshipManager",
    "IsPrimaryContact",
    "ContactSourceType",
)

# Read contacts into memory to avoid slow seek/re-read
with open("contacts.csv", "r") as concsvfile:
    conreader = csv.DictReader(concsvfile, confieldnames)
    con_rows = list(conreader)

# Group contacts by referrer for faster lookup
con_map = collections.defaultdict(list)
for con_row in con_rows:
    con_map[con_row["ReferralSource_Name"]].append(con_row)

with open("referralsource.csv", "r") as refcsvfile:
    output = []
    for refrow in csv.DictReader(refcsvfile, reffieldnames):
        refrow["ReferralSourceContacts"] = con_map[refrow["ReferralSource_Name"]]
        output.append(refrow)
    print("len(output):", len(output))

with open("redrock.json", "w") as jsonfile:
    json.dump(output, jsonfile, sort_keys=True)

推荐阅读