首页 > 解决方案 > 使用标记化进行迭代

问题描述

我有一份包含 800 封来往威廉的信件的文件,简化如下:

 <?xml version="1.0" encoding="UTF-8"?>
<root>
    <TEI xml:id="1">
        <correspDesc>
            <correspAction type="sent">
                <persName key="william"/>
            </correspAction>
            <correspAction type="received">
                <persName key="anna"/>
            </correspAction>
        </correspDesc>
        <text/>
    </TEI>
    <TEI xml:id="2">
        <correspDesc>
            <correspAction type="sent">
                <persName key="anna"/>
            </correspAction>
            <correspAction type="received">
                <persName key="william"/>
            </correspAction>
        </correspDesc>
        <text/>
    </TEI>
    <TEI xml:id="3">
        <correspDesc>
            <correspAction type="sent">
                <persName key="william"/>
            </correspAction>
            <correspAction type="received">
                <persName key="bertram"/>
            </correspAction>
        </correspDesc>
        <text/>
    </TEI>
    <TEI xml:id="4">
        <correspDesc>
            <correspAction type="sent">
                <persName key="charlotte"/>
                <persName key="anna"/>
            </correspAction>
            <correspAction type="received">
                <persName key="william"/>
            </correspAction>
        </correspDesc>
        <text/>
    </TEI>
    <TEI xml:id="5">
        <correspDesc>
            <correspAction type="sent">
                <persName key="william"/>
                <persName key="charlotte"/>
                <persName key="bertram"/>
            </correspAction>
            <correspAction type="received">
                <persName key="anna"/>
            </correspAction>
        </correspDesc>
        <text/>
    </TEI>
    <TEI xml:id="6">
        <correspDesc>
            <correspAction type="sent">
                <persName key="abraham"/>
                <persName key="anna"/>
            </correspAction>
            <correspAction type="received">
                <persName key="william"/>
            </correspAction>
        </correspDesc>
        <text/>
    </TEI>
    <TEI xml:id="7">
     <correspDesc>
        <correspAction type="sent">
            <persName key="abraham"/>
            <persName key="william"/>
        </correspAction>
        <correspAction type="received">
            <persName key="charlotte"/>
        </correspAction>
       </correspDesc>
    </TEI>
</root>

我想提取单独的通信(安娜和她的秘书亚伯拉罕属于同一个人,伯特伦,夏洛特)。

输出应该: 1) 在 TEI 元素中写下主要通信作者的姓名(即@correspondence="anna",即使这封信来自亚伯拉罕)

2) 通过信件复制所有信件,即所有来自 anna 和 abraham,之后是 bertram,之后是 charlotte

3)当威廉和夏洛特一起写信给安娜时(就像在第5封信中一样)这封信只属于安娜通信而不属于夏洛特

到目前为止,我有这个:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:xs="http://www.w3.org/2001/XMLSchema"
    exclude-result-prefixes="xs"
    version="2.0">
    <xsl:output method="xml" encoding="utf-8" indent="no"/>

    <xsl:param name="persons">anna-abraham bertram charlotte</xsl:param> <!-- A dash marks correspondences belonging together -->

    <xsl:template match="root">
        <root>
            <xsl:variable name="ruth" select="." as="node()"/>
            <xsl:for-each select="tokenize($persons,' ')">
                <xsl:variable name="correspondents" select="." as="xs:string"/>
                <xsl:variable name="corr-main" select="tokenize($correspondents, '-')[1]" as="xs:string"/>
                <xsl:for-each select="tokenize($correspondents, '-')">
                <xsl:variable name="correspondent" as="xs:string" select="."/>
                <xsl:for-each select="$ruth/TEI[correspDesc[correspAction[@type='received']/persName/@key = 'william' and not(correspAction[@type='received']/persName/@key = $corr-main) and correspAction[@type='sent']/persName/@key = $correspondent]]|
                    $ruth/TEI[correspDesc[correspAction[@type='sent']/persName/@key = 'william' and not(correspAction[@type='sent']/persName/@key = $corr-main) and correspAction[@type='received']/persName/@key = $correspondent]]">
                    <TEI correspondence="{$corr-main}">
                        <xsl:apply-templates select="@*|node()"/>
                    </TEI>
                </xsl:for-each>   
                </xsl:for-each>
            </xsl:for-each>
        </root>
    </xsl:template>

    <!-- Identity template : copy all text nodes, elements and attributes -->  
    <xsl:template match="@*|node()">
        <xsl:copy>
            <xsl:apply-templates select="@*|node()"/>
        </xsl:copy>
    </xsl:template>

</xsl:stylesheet>

有这个输出:

<?xml version="1.0" encoding="utf-8"?><root><TEI correspondence="anna" xml:id="1">


<correspDesc>
        <correspAction type="sent">
            <persName key="william"/>
        </correspAction>
        <correspAction type="received">
            <persName key="anna"/>
        </correspAction>
    </correspDesc>
    <text/>
</TEI><TEI correspondence="anna" xml:id="2">
    <correspDesc>
        <correspAction type="sent">
            <persName key="anna"/>
        </correspAction>
        <correspAction type="received">
            <persName key="william"/>
        </correspAction>
    </correspDesc>
    <text/>
</TEI><TEI correspondence="anna" xml:id="4">
    <correspDesc>
        <correspAction type="sent">
            <persName key="charlotte"/>
            <persName key="anna"/>
        </correspAction>
        <correspAction type="received">
            <persName key="william"/>
        </correspAction>
    </correspDesc>
    <text/>
</TEI><TEI correspondence="anna" xml:id="5">
    <correspDesc>
        <correspAction type="sent">
            <persName key="william"/>
            <persName key="charlotte"/>
            <persName key="bertram"/>
        </correspAction>
        <correspAction type="received">
            <persName key="anna"/>
        </correspAction>
    </correspDesc>
    <text/>
</TEI><TEI correspondence="anna" xml:id="6">
    <correspDesc>
        <correspAction type="sent">
            <persName key="abraham"/>
            <persName key="anna"/>
        </correspAction>
        <correspAction type="received">
            <persName key="william"/>
        </correspAction>
    </correspDesc>
    <text/>
</TEI><TEI correspondence="anna" xml:id="6">
    <correspDesc>
        <correspAction type="sent">
            <persName key="abraham"/>
            <persName key="anna"/>
        </correspAction>
        <correspAction type="received">
            <persName key="william"/>
        </correspAction>
    </correspDesc>
    <text/>
</TEI><TEI correspondence="bertram" xml:id="3">
    <correspDesc>
        <correspAction type="sent">
            <persName key="william"/>
        </correspAction>
        <correspAction type="received">
            <persName key="bertram"/>
        </correspAction>
    </correspDesc>
    <text/>
</TEI><TEI correspondence="charlotte" xml:id="4">
    <correspDesc>
        <correspAction type="sent">
            <persName key="charlotte"/>
            <persName key="anna"/>
        </correspAction>
        <correspAction type="received">
            <persName key="william"/>
        </correspAction>
    </correspDesc>
    <text/>
</TEI></root>

输出是错误的,因为它复制了字母 6(两者都属于对应 =“anna”)并且不输出字母 7,尽管它是由 william 写给 charlotte 的。任何的想法?任何可能的简化?

标签: xsltforeach

解决方案


https://xsltfiddle.liberty-development.net/pPzifp2/7我试图将您的通信组存储到 XPath 3.1中,array(xs:string*)例如[('anna', 'abraham'), 'bertram', 'charlotte']表示您要处理三个通信组,第一个具有两个名称annaabraham,第二个和第三个只有一个名字(即bertramcharlotte)。

对于搜索,我使用键来选择发送者<xsl:key name="sent-by" match="TEI" use="correspDesc/correspAction[@type = 'sent']/persName/@key"/><xsl:key name="received-by" match="TEI" use="correspDesc/correspAction[@type = 'received']/persName/@key"/>william

<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:xs="http://www.w3.org/2001/XMLSchema"
    xmlns:array="http://www.w3.org/2005/xpath-functions/array"
    exclude-result-prefixes="#all"
    version="3.0">

  <xsl:param name="persons" as="array(xs:string*)"
     select="[('anna', 'abraham'), 'bertram', 'charlotte']"/>

  <xsl:mode on-no-match="shallow-copy"/>

  <xsl:output method="xml" indent="yes"/>

  <xsl:key name="sent-by" match="TEI" use="correspDesc/correspAction[@type = 'sent']/persName/@key"/>
  <xsl:key name="received-by" match="TEI" use="correspDesc/correspAction[@type = 'received']/persName/@key"/>

  <xsl:variable name="sent-by-william" select="key('sent-by', 'william')"/>

  <xsl:variable name="received-by-william" select="key('received-by', 'william')"/>

  <xsl:template match="/*">
      <xsl:variable name="root" select="."/>
      <xsl:copy>
          <xsl:for-each select="1 to array:size($persons)">
              <xsl:apply-templates 
                 select="key('sent-by', $persons(.), $root) intersect $received-by-william | 
                         key('received-by', $persons(.), $root) intersect $sent-by-william">
                  <xsl:with-param name="correspondence" select="$persons(.)[1]"/>
              </xsl:apply-templates>
          </xsl:for-each>
      </xsl:copy>
  </xsl:template>

  <xsl:template match="TEI">
      <xsl:param name="correspondence"/>
      <TEI correspondence="{$correspondence}">
          <xsl:apply-templates select="@* | node()"/>
      </TEI>
  </xsl:template>

</xsl:stylesheet>

这给出了结果(一旦我将xml:id="7"元素更正为具有correspDesc元素)

<?xml version="1.0" encoding="UTF-8"?>
<root>
   <TEI correspondence="anna" xml:id="1">
        <correspDesc>
            <correspAction type="sent">
                <persName key="william"/>
            </correspAction>
            <correspAction type="received">
                <persName key="anna"/>
            </correspAction>
        </correspDesc>
        <text/>
    </TEI>
   <TEI correspondence="anna" xml:id="2">
        <correspDesc>
            <correspAction type="sent">
                <persName key="anna"/>
            </correspAction>
            <correspAction type="received">
                <persName key="william"/>
            </correspAction>
        </correspDesc>
        <text/>
    </TEI>
   <TEI correspondence="anna" xml:id="4">
        <correspDesc>
            <correspAction type="sent">
                <persName key="charlotte"/>
                <persName key="anna"/>
            </correspAction>
            <correspAction type="received">
                <persName key="william"/>
            </correspAction>
        </correspDesc>
        <text/>
    </TEI>
   <TEI correspondence="anna" xml:id="5">
        <correspDesc>
            <correspAction type="sent">
                <persName key="william"/>
                <persName key="charlotte"/>
                <persName key="bertram"/>
            </correspAction>
            <correspAction type="received">
                <persName key="anna"/>
            </correspAction>
        </correspDesc>
        <text/>
    </TEI>
   <TEI correspondence="anna" xml:id="6">
        <correspDesc>
            <correspAction type="sent">
                <persName key="abraham"/>
                <persName key="anna"/>
            </correspAction>
            <correspAction type="received">
                <persName key="william"/>
            </correspAction>
        </correspDesc>
        <text/>
    </TEI>
   <TEI correspondence="bertram" xml:id="3">
        <correspDesc>
            <correspAction type="sent">
                <persName key="william"/>
            </correspAction>
            <correspAction type="received">
                <persName key="bertram"/>
            </correspAction>
        </correspDesc>
        <text/>
    </TEI>
   <TEI correspondence="charlotte" xml:id="4">
        <correspDesc>
            <correspAction type="sent">
                <persName key="charlotte"/>
                <persName key="anna"/>
            </correspAction>
            <correspAction type="received">
                <persName key="william"/>
            </correspAction>
        </correspDesc>
        <text/>
    </TEI>
   <TEI correspondence="charlotte" xml:id="7">
        <correspDesc>
            <correspAction type="sent">
                <persName key="abraham"/>
                <persName key="william"/>
            </correspAction>
            <correspAction type="received">
                <persName key="charlotte"/>
            </correspAction>
        </correspDesc>
    </TEI>
</root>

不确定您是否可以使用 XSLT 3(自 Saxon 9.8 起可用),但在 XSLT 中构建一些 XML 结构而不是array(xs:string*)参数当然很容易,如http://xsltransform.net/asnmyG所示:

  <xsl:param name="person-groups">
      <group>
          <person>anna</person>
          <person>abraham</person>
      </group>
      <group>
          <person>bertram</person>
      </group>
      <group>
          <person>charlotte</person>
      </group>
  </xsl:param>


  <xsl:template match="/*">
      <xsl:variable name="root" select="."/>
      <xsl:copy>
          <xsl:for-each select="$person-groups/group">
              <xsl:apply-templates 
                 select="key('sent-by', person, $root) intersect $received-by-william | 
                         key('received-by', person, $root) intersect $sent-by-william">
                  <xsl:with-param name="correspondence" select="person[1]"/>
              </xsl:apply-templates>
          </xsl:for-each>
      </xsl:copy>
  </xsl:template>

当然,如果需要,XPath 3.1 数组或 XSLT 2 XML 结构都可以从您的空格/破折号分隔的字符串序列中构建,例如在http://xsltransform.net/asnmyG/1中使用

  <xsl:param name="persons">anna-abraham bertram charlotte</xsl:param>

  <xsl:param name="person-groups">
      <xsl:for-each select="tokenize($persons, '\s+')">
          <group>
              <xsl:for-each select="tokenize(., '-')">
                  <person>
                      <xsl:value-of select="."/>
                  </person>
              </xsl:for-each>
          </group>
      </xsl:for-each>
  </xsl:param>

或在https://xsltfiddle.liberty-development.net/pPzifp2/8

  <xsl:param name="persons-string">anna-abraham bertram charlotte</xsl:param>

  <xsl:param name="persons" as="array(xs:string*)"
     select="array:join(tokenize($persons-string, '\s+') ! [ tokenize(., '-') ])"/>

推荐阅读