首页 > 解决方案 > ParseXml 在某些提要 xml 上失​​败

问题描述

制作一个简单的 rss 提要阅读器,用户可以在其中从固定的站点列表中进行选择。它适用于 20 个提要,但在 15 个其他提要中失败(它们都是 35 个有效的 xml 和无效的 xsd)。应用程序不会崩溃,logcat 没有错误,只有警告。

一些对我有用的提要是:

一些不适合我的提要是:

DOM解析器

import android.annotation.SuppressLint;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

public class DOMParser {

    private RSSFeed _feed = new RSSFeed();

    public RSSFeed parseXml(String xml) {

        try {
            // Create required instances
            DocumentBuilderFactory dbf;
            dbf = DocumentBuilderFactory.newInstance();
            DocumentBuilder db = dbf.newDocumentBuilder();

            // Parse the xml
            Document doc = db.parse(xml);
            doc.getDocumentElement().normalize();

            NodeList nl = doc.getElementsByTagName("item");
            int length = nl.getLength();

            for (int i = 0; i < length; i++) {

                RSSItem _item = new RSSItem();

                if (nl.item(i).getNodeType() == Node.ELEMENT_NODE) {

                        Element eElement = (Element) nl.item(i);

                        //Get Link
                        _item.setLink(eElement.getElementsByTagName("link").item(0).getTextContent());

                        //Get Date
                        //Taking pubDate and make it HH:mm dd/MM/yy
                        String TimeDate = eElement.getElementsByTagName("pubDate").item(0).getTextContent();
                        String ZYear = TimeDate.substring(12, 16), ZMonth = TimeDate.substring(8, 11), ZDay = TimeDate.substring(5, 7);
                        String ZHour = TimeDate.substring(17, 19), ZMinute = TimeDate.substring(20, 22);
                        String ZTimezone = TimeDate.substring(26, 30);

                        if (ZMonth.equals("Jar")) {
                            ZMonth = "01";
                        } else if (ZMonth.equals("Feb")) {
                            ZMonth = "02";
                        } else if (ZMonth.equals("Mar")) {
                            ZMonth = "03";
                        } else if (ZMonth.equals("Apr")) {
                            ZMonth = "04";
                        } else if (ZMonth.equals("May")) {
                            ZMonth = "05";
                        } else if (ZMonth.equals("Jun")) {
                            ZMonth = "06";
                        } else if (ZMonth.equals("Jul")) {
                            ZMonth = "07";
                        } else if (ZMonth.equals("Aug")) {
                            ZMonth = "08";
                        } else if (ZMonth.equals("Sep")) {
                            ZMonth = "09";
                        } else if (ZMonth.equals("Oct")) {
                            ZMonth = "10";
                        } else if (ZMonth.equals("Nov")) {
                            ZMonth = "11";
                        } else if (ZMonth.equals("Dec")) {
                            ZMonth = "12";
                        }

                        TimeDate = ZHour + ":" + ZMinute + " " + ZDay + "/" + ZMonth + "/" + ZYear;

                        //Convert date to milliseconds since 00:00 01/01/1970
                        @SuppressLint("SimpleDateFormat") SimpleDateFormat formatter = new SimpleDateFormat("HH:mm dd/MM/yyyy");
                        formatter.setLenient(false);
                        Date date_before = formatter.parse(TimeDate);
                        assert date_before != null;
                        long milliseconds = date_before.getTime();

                        //Add time to reach +0200 Athens/Greece
                        if (ZTimezone.equals("0000")) {
                            milliseconds = milliseconds + 7200000;
                        }

                        _item.setDateComparison((int) milliseconds);

                        // add item to the list
                        _feed.addItem(_item);
                    }
                }
            } catch (ParserConfigurationException | IOException | ParseException | SAXException e) {
            e.printStackTrace();
        }
        // Return the final feed once all the Items are added to the RSSFeed
        // Object(_feed).
        return _feed;

    }

}

logcat 警告

2020-12-28 23:26:34.305 30015-30071/ozma13.riseapp W/System: ClassLoader referenced unknown path: system/framework/mediatek-cta.jar
2020-12-28 23:26:34.423 30015-30071/ozma13.riseapp W/System: ClassLoader referenced unknown path: system/framework/mediatek-cta.jar
2020-12-28 23:26:34.530 30015-30071/ozma13.riseapp W/System.err: org.xml.sax.SAXParseException: Unexpected end of document
2020-12-28 23:26:34.533 30015-30071/ozma13.riseapp W/System.err:     at org.apache.harmony.xml.parsers.DocumentBuilderImpl.parse(DocumentBuilderImpl.java:125)
2020-12-28 23:26:34.534 30015-30071/ozma13.riseapp W/System.err:     at javax.xml.parsers.DocumentBuilder.parse(DocumentBuilder.java:155)
2020-12-28 23:26:34.534 30015-30071/ozma13.riseapp W/System.err:     at ozma13.riseapp.DOMParser.parseXml(DOMParser.java:30)
2020-12-28 23:26:34.534 30015-30071/ozma13.riseapp W/System.err:     at ozma13.riseapp.MainActivity$AsyncLoadXMLFeed.doInBackground(MainActivity.java:532)
2020-12-28 23:26:34.534 30015-30071/ozma13.riseapp W/System.err:     at ozma13.riseapp.MainActivity$AsyncLoadXMLFeed.doInBackground(MainActivity.java:441)
2020-12-28 23:26:34.535 30015-30071/ozma13.riseapp W/System.err:     at android.os.AsyncTask$2.call(AsyncTask.java:333)
2020-12-28 23:26:34.535 30015-30071/ozma13.riseapp W/System.err:     at java.util.concurrent.FutureTask.run(FutureTask.java:266)
2020-12-28 23:26:34.535 30015-30071/ozma13.riseapp W/System.err:     at android.os.AsyncTask$SerialExecutor$1.run(AsyncTask.java:245)
2020-12-28 23:26:34.535 30015-30071/ozma13.riseapp W/System.err:     at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1162)
2020-12-28 23:26:34.536 30015-30071/ozma13.riseapp W/System.err:     at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:636)
2020-12-28 23:26:34.536 30015-30071/ozma13.riseapp W/System.err:     at java.lang.Thread.run(Thread.java:780)
2020-12-28 23:26:35.120 30015-30043/ozma13.riseapp W/ThreadedRenderer: ThreadedRenderer::detachAnimators pid = 30015 threadid = 30043
2020-12-28 23:26:35.183 30015-30043/ozma13.riseapp W/ThreadedRenderer: ThreadedRenderer::detachAnimators pid = 30015 threadid = 30043
2020-12-28 23:26:37.734 30015-30043/ozma13.riseapp W/ThreadedRenderer: ThreadedRenderer::detachAnimators pid = 30015 threadid = 30043

标签: javaxmlparsingrssfeed

解决方案


答案是在非检索 xml 中的“https”和“http”之间切换。我不知道为什么,但这就是我的解决方法!


推荐阅读