python - Python中的XML到数据库
问题描述
我有数千个 XML 文件
它有许多标签和许多属性。
我想把数据放在 SQL Server 表中。
表的结构将是这样的
DocumentPath varchar(1000)
Tag varchar(1000)
ID varchar(1000)
Attribute varchar(1000)
Value varchar(1000)
XML 文件看起来像这样
<?xml version="1.0" encoding="UTF-8"?>
<xmi:XMI xmlns:xmi="http://www.omg.org/XMI" xmlns:util="http:///org/" xmi:version="2.0">
<cas:NULL xmi:id="0"/>
<tcas:DocumentAnnotation xmi:id="8" sofa="1" begin="0" end="5769" language="x-unspecified"/>
<structured:DocumentID xmi:id="13" documentID="5"/>
<structured:DocumentIdPrefix xmi:id="15" documentIdPrefix=""/>
<structured:DocumentPath xmi:id="35" documentPath="J:\T9.xmi"/>
<textspan:Segment xmi:id="37" sofa="1" begin="0" end="5769" id="SIMPLE_SEGMENT" preferredText="SIMPLE_SEGMENT"/>
<textspan:Sentence xmi:id="44" sofa="1" begin="0" end="15" sentenceNumber="0"/>
<textspan:Sentence xmi:id="50" sofa="1" begin="17" end="32" sentenceNumber="1"/>
<textspan:Sentence xmi:id="56" sofa="1" begin="37" end="52" sentenceNumber="2"/>
<syntax:SymbolToken xmi:id="2242" sofa="1" begin="18" end="19" tokenNumber="4"/>
<syntax:SymbolToken xmi:id="2250" sofa="1" begin="19" end="20" tokenNumber="5"/>
<syntax:SymbolToken xmi:id="2301" sofa="1" begin="29" end="30" tokenNumber="11"/>
<syntax:SymbolToken xmi:id="2309" sofa="1" begin="30" end="31" tokenNumber="12"/>
<syntax:NumToken xmi:id="2258" sofa="1" begin="20" end="24" tokenNumber="6" numType="1"/>
<syntax:NumToken xmi:id="2275" sofa="1" begin="25" end="26" tokenNumber="8" numType="1"/>
<syntax:NumToken xmi:id="2292" sofa="1" begin="27" end="29" tokenNumber="10" numType="1"/>
<syntax:NumToken xmi:id="2381" sofa="1" begin="57" end="61" tokenNumber="20" numType="1"/>
<textsem:MedicationMention xmi:id="18523" sofa="1" begin="534" end="540" id="0" ontologyConceptArr="18510" typeID="1" discoveryTechnique="1" confidence="0.0" polarity="0" uncertainty="0" conditional="false" generic="false" historyOf="0"/>
<textsem:MedicationMention xmi:id="16831" sofa="1" begin="543" end="547" id="0" ontologyConceptArr="16818" typeID="1" discoveryTechnique="1" confidence="0.0" polarity="0" uncertainty="0" conditional="false" generic="false" historyOf="0"/>
<textsem:MedicationMention xmi:id="16788" sofa="1" begin="558" end="569" id="0" ontologyConceptArr="16753 16773 16763" typeID="1" discoveryTechnique="1" confidence="0.0" polarity="0" uncertainty="0" conditional="false" generic="false" historyOf="0"/>
<textsem:ProcedureMention xmi:id="22518" sofa="1" begin="328" end="343" id="0" ontologyConceptArr="22505" typeID="5" discoveryTechnique="1" confidence="0.0" polarity="0" uncertainty="0" conditional="false" generic="false" historyOf="0"/>
<textsem:ProcedureMention xmi:id="22682" sofa="1" begin="478" end="490" id="0" ontologyConceptArr="22669" typeID="5" discoveryTechnique="1" confidence="0.0" polarity="0" uncertainty="0" conditional="false" generic="false" historyOf="0"/>
<textsem:ProcedureMention xmi:id="22228" sofa="1" begin="495" end="510" id="0" ontologyConceptArr="22215" typeID="5" discoveryTechnique="1" confidence="0.0" polarity="0" uncertainty="0" conditional="false" generic="false" historyOf="0"/>
<textsem:ProcedureMention xmi:id="21938" sofa="1" begin="794" end="809" id="0" ontologyConceptArr="21925" typeID="5" discoveryTechnique="1" confidence="0.0" polarity="0" uncertainty="0" conditional="false" generic="false" historyOf="0"/>
<textsem:ProcedureMention xmi:id="22868" sofa="1" begin="1057" end="1072" id="0" ontologyConceptArr="22855" typeID="5" discoveryTechnique="1" confidence="0.0" polarity="0" uncertainty="0" conditional="false" generic="false" historyOf="0"/>
<textsem:AnatomicalSiteMention xmi:id="24333" sofa="1" begin="786" end="793" id="0" ontologyConceptArr="24320" typeID="6" discoveryTechnique="1" confidence="0.0" polarity="0" uncertainty="0" conditional="false" generic="false" historyOf="0"/>
<textsem:AnatomicalSiteMention xmi:id="25103" sofa="1" begin="842" end="857" id="0" ontologyConceptArr="25090" typeID="6" discoveryTechnique="1" confidence="0.0" polarity="0" uncertainty="0" conditional="false" generic="false" historyOf="0"/>
<textsem:AnatomicalSiteMention xmi:id="23654" sofa="1" begin="842" end="850" id="0" ontologyConceptArr="23641" typeID="6" discoveryTechnique="1" confidence="0.0" polarity="0" uncertainty="0" conditional="false" generic="false" historyOf="0"/>
<textsem:EventMention xmi:id="25137" sofa="1" begin="117" end="124" id="0" ontologyConceptArr="25124" typeID="1003" discoveryTechnique="1" confidence="0.0" polarity="0" uncertainty="0" conditional="false" generic="false" historyOf="0"/>
<textsem:EventMention xmi:id="25201" sofa="1" begin="3448" end="3458" id="0" ontologyConceptArr="25188" typeID="1003" discoveryTechnique="1" confidence="0.0" polarity="0" uncertainty="0" conditional="false" generic="false" historyOf="0"/>
<textsem:EventMention xmi:id="25169" sofa="1" begin="3796" end="3803" id="0" ontologyConceptArr="25156" typeID="1003" discoveryTechnique="1" confidence="0.0" polarity="0" uncertainty="0" conditional="false" generic="false" historyOf="0"/>
<cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text"/>
<structured:SourceData xmi:id="23" noteTypeCode="ClinicalNote" sourceRevisionNbr="0" sourceRevisionDate="2020-03-02 10:48:30"/>
<refsem:UmlsConcept xmi:id="18510" codingScheme="SNOMEDCT_US" code="64197008" score="0.0" disambiguated="false" cui="C0037366" tui="T131" preferredText="Smoke"/>
<refsem:UmlsConcept xmi:id="16818" codingScheme="RXNORM" code="746839" score="0.0" disambiguated="false" cui="C1999262" tui="T122" preferredText="Pack"/>
<refsem:UmlsConcept xmi:id="16753" codingScheme="SNOMEDCT_US" code="410942007" score="0.0" disambiguated="false" cui="C0013227" tui="T121" preferredText="Pharmaceutical Preparations"/>
<cas:View sofa="1" members="8 13 15 17 35"/>
</xmi:XMI>
structured:DocumentPath
标签的值和documentPath attibute
将DocumentPath
在表中
Tag
在 XML 标记的每一行中,例如textspan:Sentence
ID
是xmi:id
价值
属性是每个属性名称,值是属性值
因此这条线
<textspan:Sentence xmi:id="44" sofa="1" begin="0" end="15" sentenceNumber="0"/>
将像这样在表格中
DocumentPath Tag ID Attribute Value
J:\T9.xmi textspan:Sentence 44 sofa 1
J:\T9.xmi textspan:Sentence 44 begin 0
J:\T9.xmi textspan:Sentence 44 end 15
J:\T9.xmi textspan:Sentence 44 sentenceNumber 0
我发现这个网站展示了如何读取一个简单的 XML 文件
但我的情况与每行中的属性数量不同是动态的
解决方案
尝试pip intall untangle
安装untangle
. 一个示例使用您的输入:
from untangle import parse
your_xml = """.. omit.."""
your_obj = parse(your_xml)
your_obj.xmi_XMI.textspan_Segment._attributes
{'xmi:id': '37',
'sofa': '1',
'begin': '0',
'end': '5769',
'id': 'SIMPLE_SEGMENT',
'preferredText': 'SIMPLE_SEGMENT'}
如您所见,所有数据都可以像对象一样访问。
推荐阅读
- objective-c - 为什么我不能在 XPC 服务中为 NSURLSessionDataTask 覆盖 SSL ServerTrust?
- php - 未定义的偏移量:2 和未定义的偏移量:1 和未初始化的字符串偏移量:0
- python - 如何使用 python selenium 在 for 循环中打开多个 chromedrivers?
- elasticsearch - FileBeat:仅主机字段显示为 JSON,而不是字符串
- conv-neural-network - 如何在keras中导入CNN模型
- c# - 当我从另一个窗口更改文本框值时如何更改它
- angular - 使用枚举分配的变量的正确类型定义
- java - 在Android应用程序中将Json转换为Csv格式
- flutter - Flutter 屏幕,在可绘制的屏幕左侧包含一个可滚动和可点击的磁贴
- c++ - 使用 LLVM LegacyPassManager 获取分析结果