首页 > 解决方案 > 如何使用 C 和 libxml2 库在 XML 文件中查找特定标记的出现?

问题描述

<?xml version='1.0' encoding='utf-8'?>
<UnlimitRectCubes>
    <RectCubes>
        <depth>0</depth>
        <rectangle>
            <height>0</height>
            <width>0</width>
        </rectangle>
    </RectCubes>
    <RectCubes>
        <depth>1</depth>
        <rectangle>
            <height>11</height>
            <width>101</width>
        </rectangle>
    </RectCubes>
    <RectCubes>
        <depth>2</depth>
        <rectangle>
            <height>22</height>
            <width>202</width>
        </rectangle>
    </RectCubes>
</UnlimitRectCubes>

注意:我还包括了 .xml 文件

void print_element_names(xmlNode *a_node, char **findStr)
{
    xmlNode *cur_node = NULL;
    char *key = *findStr;
    cur_node = a_node;
    int len = 0;

while (cur_node)
{
 if ((!xmlStrcmp(cur_node->name, (const xmlChar *)key)))
    {
        len++; //len is int type
    }
  cur_node = cur_node->next;
}
  printf("%d", len);
   //int i = 0;
//xmlChar *array = (xmlChar *)calloc(1, sizeof(xmlChar));
for (cur_node = a_node; cur_node; cur_node = cur_node->next)
{
    if ((cur_node->type == XML_ELEMENT_NODE))
    {
        if ((!xmlStrcmp(cur_node->name, (const xmlChar *)key)))
        {
            //printf("%s", cur_node->children->content);
        }
    }
    print_element_names(cur_node->children, findStr);
}
}

int main(int argc, char **argv)
{
    xmlDoc *doc = NULL;           /* an xml document */
    xmlNode *root_element = NULL; /* a node in a xml tree */
    if (argc != 2)
    {
        return (1);
    }
    LIBXML_TEST_VERSION
    /*  parse the file and get the DOM */
    doc = xmlReadFile(argv[1], NULL, 0);
    if (doc == NULL)
    {
        printf("error could not parse");
        return;
    }
    /*Get the root element*/
    root_element = xmlDocGetRootElement(doc);
    char *myStr = "depth";
    print_element_names(root_element, &myStr);

    xmlFreeDoc(doc);

    /*
        *free the global variables that may have been allocated by the     parser
    */
    xmlCleanupParser();

    return 0;
}

输出:0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0

我需要在 .xml 文件中找到特定标签的出现频率,这里我已将其作为键 (char*) 传递给函数,并使用 xmlStrCmp() 来验证正确的标签。

len 应该打印整数值,但它似乎输出了这个:请参考上面的输出。实际上有 3 个特定标签出现,并且输出 show 有 3 个 1。我相信,我没有正确遍历 XML 树,正确的方法是什么?或者是否有任何可用的 XML 函数来实现这一点?

输入:.xml 文件和作为键传递的感兴趣的标签预期输出:标签的出现次数,在这种情况下,我应该得到 3。

for (cur_node = a_node; cur_node; cur_node = cur_node->next)
{
    if ((cur_node->type == XML_ELEMENT_NODE))
    {
        if ((!xmlStrcmp(cur_node->children->name, (const xmlChar *)key)))
        {
            //printf("%s\n", cur_node->children->content);
        }
         printf("%s", cur_node->name);   
 }
    
    len += print_element_names(cur_node->children, findStr);
}
printf("%d ", len);

更新输出: UnlimitRectCubes0 RectCubes0 depth0 2 0 rectangle0 height0 2 0 width0 2 0 10 0 11 0 RectCubes0 depth0 2 0 rectangle0 height0 2 0 width0 2 0 10 0 11 0 RectCubes0 depth0 2 0 rectangle0 height0 2 0 width0 2 0 10 0 11 0 17 3

标签: cxml-parsinglibxml2

解决方案


您不会将找到的标签数传播到调用级别。

我尝试了这段代码并得到了正确的结果:

/*
 * Compile with:
 * > gcc `xml2-config --cflags` -std=c99 -o test  test.c `xml2-config --libs`
 * */
 
#include <stdio.h>
#include <stdlib.h>
#include <libxml/parser.h>
#include <libxml/tree.h>

int print_element_names(xmlNode *a_node, char **findStr)
{
    xmlNode *cur_node = a_node;
    char *key = *findStr;
    int len = 0;

    if (a_node == NULL)
        return 0;
    
    printf("a_node->name=%s\n", a_node->name);
    while (cur_node)
    {
        printf("cur_node->name=%s, type=%d\n", cur_node->name, cur_node->type);
        if ((!xmlStrcmp(cur_node->name, (const xmlChar *)key)))
        {
            len++; //len is int type
        }
        cur_node = cur_node->next;
    }
    printf("found=%d\n", len);

    for (cur_node = a_node; cur_node; cur_node = cur_node->next)
    {
        if ((cur_node->type == XML_ELEMENT_NODE))
        {
    
            printf("search children for cur_node->name=%s, type=%d\n", cur_node->name, cur_node->type);
            len += print_element_names(cur_node->children, findStr);
        }
    }
    printf("found total for a_node->name %s: %d\n", a_node->name, len);
    return len;
}

int main(int argc, char **argv)
{
    xmlDoc *doc = NULL;           /* an xml document */
    xmlNode *root_element = NULL; /* a node in a xml tree */

    LIBXML_TEST_VERSION
    /*  parse the file and get the DOM */
    doc = xmlReadFile("test.xml", NULL, 0);
    if (doc == NULL)
    {
        printf("error could not parse");
        return 1;
    }
    /*Get the root element*/
    root_element = xmlDocGetRootElement(doc);
    char *myStr = "depth";
    print_element_names(root_element, &myStr);

    xmlFreeDoc(doc);
    xmlCleanupParser();

    return 0;
}

推荐阅读