python - 如何使用 Python 解析 XSD 文件
问题描述
如何解析下面的 XSD 以获取其中的 3 个名称<xsd:complexType name="Register-Type" abstract="true">
我想获取名称“measures”、“description”和“notes”,并将每个名称放在 csv 的列中(没有其他信息,现在只有这 3 个名称显示为标题)。
我正在尝试使用 lxml,但我不知道如何进入我想要的特定 complexType 标记。
下面是 XSD 文件:
<?xml version="1.0" encoding="UTF-8"?>
<xsd:schema targetNamespace="urn:nortel:namespaces:mcp:oms" xmlns:mcp="urn:nortel:namespaces:mcp:oms" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:parm="http://www.nortelnetworks.com/namespaces/mcp/params" xmlns:xmllib="http://www.w3.org/2001/03/XMLSchema/TypeLibrary" elementFormDefault="qualified" attributeFormDefault="unqualified">
<xsd:import namespace="http://www.nortelnetworks.com/namespaces/mcp/params" schemaLocation="../parms/basedefs.xsd"/>
<xsd:import namespace="http://www.w3.org/2001/03/XMLSchema/TypeLibrary" schemaLocation="../w3org/TypeLibrary.xsd"/>
<xsd:simpleType name="Precollector-Type">
<xsd:annotation>
<xsd:documentation xml:lang="en">
This enumerates the types of pre-collector interfaces a helper class
will implement.
</xsd:documentation>
</xsd:annotation>
<xsd:restriction base="xsd:string">
<xsd:enumeration value="centurySecond"/>
<xsd:enumeration value="liveQuery"/>
<xsd:enumeration value="transferPeriod"/>
</xsd:restriction>
</xsd:simpleType>
<xsd:simpleType name="Register-Name-Type">
<xsd:annotation>
<xsd:documentation xml:lang="en">
This type defines the pattern for a allowed OM register name.
</xsd:documentation>
</xsd:annotation>
<xsd:restriction base="xsd:string">
<xsd:minLength value="1"/>
<xsd:pattern value="[a-zA-Z_][a-zA-Z0-9_]*"/>
</xsd:restriction>
</xsd:simpleType>
<xsd:simpleType name="Class-Name-Type">
<xsd:annotation>
<xsd:documentation xml:lang="en">
This type defines the pattern for a allowed class name.
</xsd:documentation>
</xsd:annotation>
<xsd:restriction base="xsd:string">
<xsd:minLength value="1"/>
<xsd:pattern value="[a-zA-Z][a-zA-Z0-9_]*"/>
</xsd:restriction>
</xsd:simpleType>
<xsd:simpleType name="Package-Name-Type">
<xsd:annotation>
<xsd:documentation xml:lang="en">
This type defines the pattern for a allowed package name.
</xsd:documentation>
</xsd:annotation>
<xsd:restriction base="xsd:string">
<xsd:minLength value="1"/>
<xsd:pattern value="([a-zA-Z][a-zA-Z0-9_]*)([.][a-zA-Z][a-zA-Z0-9_]*)*"/>
</xsd:restriction>
</xsd:simpleType>
<xsd:simpleType name="Group-Name-Type">
<xsd:annotation>
<xsd:documentation xml:lang="en">
This type defines the pattern for a allowed OM group name.
</xsd:documentation>
</xsd:annotation>
<xsd:restriction base="xsd:string">
<xsd:minLength value="1"/>
<xsd:maxLength value="32"/>
<xsd:pattern value="[a-zA-Z][a-zA-Z0-9_]*"/>
</xsd:restriction>
</xsd:simpleType>
<xsd:complexType name="NE-List-Type">
<xsd:sequence>
<xsd:element name="ne" minOccurs="1" maxOccurs="unbounded">
<xsd:simpleType>
<xsd:restriction base="parm:NE-Base-Type"/>
</xsd:simpleType>
</xsd:element>
</xsd:sequence>
</xsd:complexType>
<xsd:element name="list" type="mcp:NE-List-Type">
<xsd:unique name="UniqueNEinList">
<xsd:selector xpath="mcp:ne"/>
<xsd:field xpath="."/>
</xsd:unique>
</xsd:element>
<xsd:complexType name="OM-NE-List-Type">
<xsd:annotation>
<xsd:documentation xml:lang="en">
This type defines the way that you define which NE types
will generate a given OM group. It uses the NE base type
that we use for our engineering and configuration parameters.
</xsd:documentation>
</xsd:annotation>
<xsd:choice>
<xsd:element name="all">
<xsd:complexType/>
</xsd:element>
<xsd:element ref="mcp:list"/>
</xsd:choice>
</xsd:complexType>
<xsd:complexType name="OM-Installation-Type">
<xsd:sequence>
<xsd:element name="package" type="mcp:Package-Name-Type"/>
<xsd:element ref="mcp:list"/>
</xsd:sequence>
</xsd:complexType>
<xsd:complexType name="Register-Type" abstract="true">
<xsd:sequence>
<xsd:element name="measures" type="xmllib:text"/>
<xsd:element name="description" type="xmllib:text"/>
<xsd:element name="notes" type="xmllib:text" minOccurs="0"/>
</xsd:sequence>
<xsd:attribute name="name" type="mcp:Register-Name-Type" use="required"/>
</xsd:complexType>
<xsd:complexType name="Counter-Type">
<xsd:complexContent>
<xsd:extension base="mcp:Register-Type"/>
</xsd:complexContent>
</xsd:complexType>
<xsd:complexType name="Usage-Type">
<xsd:complexContent>
<xsd:extension base="mcp:Register-Type">
<xsd:attribute name="hasThresholds" type="xsd:boolean" default="false"/>
</xsd:extension>
</xsd:complexContent>
</xsd:complexType>
<xsd:complexType name="LowWaterGauge-Type">
<xsd:complexContent>
<xsd:extension base="mcp:Register-Type"/>
</xsd:complexContent>
</xsd:complexType>
<xsd:complexType name="HighWaterGauge-Type">
<xsd:complexContent>
<xsd:extension base="mcp:Register-Type"/>
</xsd:complexContent>
</xsd:complexType>
<xsd:complexType name="Gauge-Ref-Type">
<xsd:attribute name="ref" type="mcp:Register-Name-Type" use="required"/>
</xsd:complexType>
<xsd:complexType name="Multi-Gauge-Ref-Type">
<xsd:attribute name="lwref" type="mcp:Register-Name-Type" use="required"/>
<xsd:attribute name="hwref" type="mcp:Register-Name-Type" use="required"/>
</xsd:complexType>
<xsd:complexType name="WaterMark-Type">
<xsd:complexContent>
<xsd:extension base="mcp:Register-Type">
<xsd:choice>
<xsd:element name="lwg" type="mcp:Gauge-Ref-Type"/>
<xsd:element name="hwg" type="mcp:Gauge-Ref-Type"/>
<xsd:element name="bwg" type="mcp:Multi-Gauge-Ref-Type"/>
</xsd:choice>
</xsd:extension>
</xsd:complexContent>
</xsd:complexType>
<xsd:element name="register" type="mcp:Register-Type" abstract="true"/>
<xsd:element name="counter" type="mcp:Counter-Type" substitutionGroup="mcp:register"/>
<xsd:element name="usage" type="mcp:Usage-Type" substitutionGroup="mcp:register"/>
<xsd:element name="lwGauge" type="mcp:LowWaterGauge-Type" substitutionGroup="mcp:register"/>
<xsd:element name="hwGauge" type="mcp:HighWaterGauge-Type" substitutionGroup="mcp:register"/>
<xsd:element name="waterMark" type="mcp:WaterMark-Type" substitutionGroup="mcp:register"/>
<xsd:complexType name="Helper-Class-Type">
<xsd:sequence>
<xsd:element name="package" type="mcp:Package-Name-Type"/>
<xsd:element name="class" type="mcp:Class-Name-Type"/>
<xsd:element name="collector" type="mcp:Precollector-Type" minOccurs="1" maxOccurs="unbounded" />
</xsd:sequence>
</xsd:complexType>
<xsd:complexType name="Row-Type">
<xsd:sequence>
<xsd:element name="package" type="mcp:Package-Name-Type"/>
<xsd:element name="class" type="mcp:Class-Name-Type"/>
<xsd:element ref="mcp:register" maxOccurs="unbounded"/>
<!-- Note that merely having a precollector with a centurySecond element is enough to indicate that you require -->
<!-- a century second scan, so there's no additional markup on the group to show that. -->
<xsd:element name="precollector" minOccurs="0" type="mcp:Helper-Class-Type">
<xsd:unique name="NoDuplicatePrecollectorTypes">
<xsd:selector xpath="mcp:collector"/>
<xsd:field xpath="."/>
</xsd:unique>
</xsd:element>
</xsd:sequence>
</xsd:complexType>
<xsd:complexType name="Group-Description-Type">
<xsd:sequence>
<xsd:element name="description" type="xmllib:text"/>
<xsd:element name="rowKey" type="xmllib:text"/>
</xsd:sequence>
</xsd:complexType>
<xsd:complexType name="Group-Type">
<xsd:sequence>
<xsd:element name="name" type="mcp:Group-Name-Type"/>
<xsd:element name="row" type="mcp:Row-Type">
<xsd:unique name="uniqueRegisterNameInRow">
<xsd:selector xpath="mcp:counter|mcp:usage|mcp:lwGauge|mcp:hwGauge|mcp:waterMark"/>
<xsd:field xpath="@name"/>
</xsd:unique>
<xsd:key name="lwgauge-name">
<xsd:selector xpath="mcp:lwGauge"/>
<xsd:field xpath="@name"/>
</xsd:key>
<xsd:key name="hwgauge-name">
<xsd:selector xpath="mcp:hwGauge"/>
<xsd:field xpath="@name"/>
</xsd:key>
<xsd:key name="multi-gauge-name">
<xsd:selector xpath="mcp:waterMark/mcp:bwg"/>
<xsd:field xpath="@lwref"/>
<xsd:field xpath="@hwref"/>
</xsd:key>
<xsd:key name="unique-lwg-ref">
<xsd:selector xpath="mcp:waterMark/mcp:lwg"/>
<xsd:field xpath="@ref"/>
</xsd:key>
<xsd:key name="unique-hwg-ref">
<xsd:selector xpath="mcp:waterMark/mcp:hwg"/>
<xsd:field xpath="@ref"/>
</xsd:key>
<xsd:key name="unique-bwg-lwref">
<xsd:selector xpath="mcp:waterMark/mcp:bwg"/>
<xsd:field xpath="@lwref"/>
</xsd:key>
<xsd:key name="unique-bwg-hwref">
<xsd:selector xpath="mcp:waterMark/mcp:bwg"/>
<xsd:field xpath="@hwref"/>
</xsd:key>
<!-- Unfortunately, I cannot figure out a way to -->
<!-- prevent someone from referencing a gauge -->
<!-- simultaneously from a l/hwg and bwg element -->
<!-- without using something like schematron. -->
<xsd:keyref name="lwg-ref" refer="mcp:lwgauge-name">
<xsd:selector xpath="mcp:waterMark/mcp:lwg"/>
<xsd:field xpath="@ref"/>
</xsd:keyref>
<xsd:keyref name="hwg-ref" refer="mcp:hwgauge-name">
<xsd:selector xpath="mcp:waterMark/mcp:hwg"/>
<xsd:field xpath="@ref"/>
</xsd:keyref>
<xsd:keyref name="bwg-lwref" refer="mcp:lwgauge-name">
<xsd:selector xpath="mcp:waterMark/mcp:bwg"/>
<xsd:field xpath="@lwref"/>
</xsd:keyref>
<xsd:keyref name="bwg-hwref" refer="mcp:hwgauge-name">
<xsd:selector xpath="mcp:waterMark/mcp:bwg"/>
<xsd:field xpath="@hwref"/>
</xsd:keyref>
</xsd:element>
<xsd:element name="documentation" type="mcp:Group-Description-Type"/>
<xsd:element name="generatedOn" type="mcp:OM-NE-List-Type"/>
</xsd:sequence>
<xsd:attribute name="traffic" type="xsd:boolean" default="false"/>
<xsd:attribute name="neLevel" type="xsd:boolean" default="false">
<xsd:annotation>
<xsd:documentation>
This attribute is set to true for those OM groups that measure
something that can only be counted on the active network element
instance. Many call-related counts require this attribute to
be set to "true" to ensure that any averages are calculated
correctly.
</xsd:documentation>
</xsd:annotation>
</xsd:attribute>
</xsd:complexType>
<xsd:element name="omGroups">
<xsd:annotation>
<xsd:documentation xml:lang="en">
Each OM row class can belong to one and only one OM group.
That is more restrictive than the underlying framework, but
it would be difficult to impossible to make the documentation
markup work since the meaning of the fields would change depending on
the OM group to which the class belonged.
</xsd:documentation>
</xsd:annotation>
<xsd:complexType>
<xsd:sequence>
<xsd:element name="install" type="mcp:OM-Installation-Type" minOccurs="1" maxOccurs="unbounded"/>
<xsd:element name="group" type="mcp:Group-Type" minOccurs="1" maxOccurs="unbounded"/>
</xsd:sequence>
</xsd:complexType>
<!-- Each OM group name must be unique -->
<xsd:unique name="uniqueGroupName">
<xsd:selector xpath="mcp:group"/>
<xsd:field xpath="mcp:name"/>
</xsd:unique>
<!-- Each OM row class can belong to one and only one OM group. -->
<!-- That is more restrictive than the underlying framework, but -->
<!-- it would be difficult to impossible to make the documentation -->
<!-- markup work since the meaning of the fields would change depending on -->
<!-- the OM group to which the class belonged. -->
<xsd:unique name="uniqueRowClass">
<xsd:selector xpath="mcp:group/mcp:row"/>
<xsd:field xpath="mcp:package"/>
<xsd:field xpath="mcp:class"/>
</xsd:unique>
</xsd:element>
</xsd:schema>
我试过的
from xml.etree import ElementTree
import csv
tree = ElementTree.parse('Omschema.xsd')
sitescope_data = open('Out.csv', 'w', newline='', encoding='utf-8')
csvwriter = csv.writer(sitescope_data)
#Create all needed columns here in order and writes them to excel file
def recurse(root):
for child in root:
recurse(child)
print(child.tag)
for event in root.findall('{http://www.w3.org/2001/XMLSchema}complexType'):
event_data = []
event_id = event.find('{http://www.w3.org/2001/XMLSchema}sequence')
if event_id != None:
event_id = event_id.text
event_data.append(event_id)
csvwriter.writerow(event_data)
root = tree.getroot()
recurse(root)
sitescope_data.close()
解决方案
既然你标记了 BeautifulSoup,下面是如何做到这一点:
import csv
from bs4 import BeautifulSoup
soup = BeautifulSoup(your_xml, "xml")
tag_names = soup.find("xsd:complexType", {"name": "Register-Type"})
with open('data.csv', 'w') as f:
headers = [tag['name'] for tag in tag_names.find_all("xsd:element")]
writer = csv.DictWriter(f, fieldnames=headers)
writer.writeheader()
数据.csv:
measures,description,notes
推荐阅读
- amazon-web-services - AWS Route 53 DNS 设置和更改设置
- mysql - 为什么下面的查询没有给出正确的输出
- python - 排序字典返回 NoneType 而不是列表?
- javascript - 为什么我的视频字体很棒的图标在点击时不起作用?
- r - 有没有办法自动从两个或多个列表中提取元素?
- r - 将函数应用于一个表的每个条目到另一个表的每个条目
- jquery - 如何删除jQuery中可滚动div的最后一个div?
- port - GCE 防火墙阻止端口访问,尽管在防火墙规则中设置了“允许”
- python - 如何让 pycharm 识别返回 X 的子类?
- jira - Atlassian 帐户 ID