首页 > 解决方案 > Python XML 到 CSV - 属性问题

问题描述

今天我尝试将简单的 xml 转换为 csv 文件。XML:

<cars>
<car>
<id>1234</id>
<name>Name Of Car</name>
<description>Nice Car! </description>
<image>http://www.images.com</image>
<cost>432</cost>
<cat>roadster</cat>
<property name="Url">someurl</property>
<property name="Color">red</property>
<property name="Produce">fiat</property>
<property name="SystemID">15</property>
</car>
</cars>
tree = ET.parse("test.xml")
root = tree.getroot()
Resident_data = open('ResidentData.csv', 'w', encoding='utf-8-sig')
csvwriter = csv.writer(Resident_data)
resident_head = []
count = 0
start = 0
for member in root.findall('car'):
    offers = []
    if count == 0:
        resident_head.append("ID")
        resident_head.append("NAME")
        resident_head.append("DESC")
        resident_head.append("IMG")
        resident_head.append("PRICE")
        resident_head.append("TYPE")
        resident_head.append("URL")
        resident_head.append("COLOR")
        resident_head.append("PRODUCE")
        resident_head.append("SYSID")
        
        csvwriter.writerow(resident_head)
        count = count + 1

    ID = member.find('id').text
    offers.append(ID)
    NAME = member.find('name').text
    offers.append(NAME)
    DESC = member.find('description').text
    offers.append(DESC)
    IMG = member.find('image').text
    offers.append(IMG)
    PRICE = member.find('cost').text
    offers.append(PRICE)
    CAT = member.find('cat').text
    offers.append(CAT)
    
    csvwriter.writerow(offers)
    start = start + 1
    print(start)

Resident_data.close()

输出:

ID 姓名 DESC IMG 价格 类型 网址 颜色 生产 系统标识符
1234 车名 好车! http:.. 432 跑车

一切都很好,但此刻,但我不知道如何循环最后一列,因为这里是<property name="x">. 有时 SYSID 或 COLOR 也没有(系统中为空)

附言。在预览中,我的桌子看起来不错...

谢谢!:)

标签: pythonxmlloopscsv

解决方案


见下文(代码根据 xml 数据动态构建 csv 标头)。代码句柄也丢失property了。它假设属性位于第一辆车中。

import xml.etree.ElementTree as ET
import csv
from typing import NamedTuple



xml = '''<?xml version="1.0" encoding="UTF-8"?>
<cars>
   <car>
      <id>1234</id>
      <name>Name Of Car</name>
      <description>Nice Car!</description>
      <image>http://www.images.com</image>
      <cost>432</cost>
      <cat>roadster</cat>
      <property name="Url">someurl</property>
      <property name="Color">red</property>
      <property name="Produce">fiat</property>
      <property name="SystemID">15</property>
   </car>
   <car>
      <id>1235</id>
      <name>Name Of Car1</name>
      <description>Nice Car!1</description>
      <image>http://www.images.com</image>
      <cost>435</cost>
      <cat>roadster45</cat>
      <property name="Url">someurl34</property>
      <!-- property name="Color">green</property -->
      <property name="Produce">nest</property>
      <property name="SystemID">45</property>
   </car>
</cars>'''

class EntryProp(NamedTuple):
  name:str
  is_prop: bool

root = ET.fromstring(xml)
meta = []
data = []
for idx,car in enumerate(root.findall('.//car')):
  if idx == 0:
    for p in list(car):
      if p.tag != 'property':
        meta.append(EntryProp(p.tag,False))
      else:
        meta.append(EntryProp(p.attrib['name'],True))
        
  values = []
  for entry in meta:
    if entry.is_prop:
      ele = car.find(f'property[@name="{entry.name}"]')
      if ele is None:
        values.append('')
      else:
        values.append(ele.text)
    else:
      values.append(car.find(entry.name).text)
  data.append(values)
      

with open('out.csv', 'w') as f:
    wr = csv.writer(f)
    wr.writerow([p.name.upper() for p in meta])
    for entry in data:
      wr.writerow(entry)

出.csv

ID,NAME,DESCRIPTION,IMAGE,COST,CAT,URL,COLOR,PRODUCE,SYSTEMID
1234,Name Of Car,Nice Car!,http://www.images.com,432,roadster,someurl,red,fiat,15
1235,Name Of Car1,Nice Car!1,http://www.images.com,435,roadster45,someurl34,,nest,45

推荐阅读