java - ParseXml 在某些提要 xml 上失败
问题描述
制作一个简单的 rss 提要阅读器,用户可以在其中从固定的站点列表中进行选择。它适用于 20 个提要,但在 15 个其他提要中失败(它们都是 35 个有效的 xml 和无效的 xsd)。应用程序不会崩溃,logcat 没有错误,只有警告。
一些对我有用的提要是:
一些不适合我的提要是:
DOM解析器
import android.annotation.SuppressLint;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
public class DOMParser {
private RSSFeed _feed = new RSSFeed();
public RSSFeed parseXml(String xml) {
try {
// Create required instances
DocumentBuilderFactory dbf;
dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
// Parse the xml
Document doc = db.parse(xml);
doc.getDocumentElement().normalize();
NodeList nl = doc.getElementsByTagName("item");
int length = nl.getLength();
for (int i = 0; i < length; i++) {
RSSItem _item = new RSSItem();
if (nl.item(i).getNodeType() == Node.ELEMENT_NODE) {
Element eElement = (Element) nl.item(i);
//Get Link
_item.setLink(eElement.getElementsByTagName("link").item(0).getTextContent());
//Get Date
//Taking pubDate and make it HH:mm dd/MM/yy
String TimeDate = eElement.getElementsByTagName("pubDate").item(0).getTextContent();
String ZYear = TimeDate.substring(12, 16), ZMonth = TimeDate.substring(8, 11), ZDay = TimeDate.substring(5, 7);
String ZHour = TimeDate.substring(17, 19), ZMinute = TimeDate.substring(20, 22);
String ZTimezone = TimeDate.substring(26, 30);
if (ZMonth.equals("Jar")) {
ZMonth = "01";
} else if (ZMonth.equals("Feb")) {
ZMonth = "02";
} else if (ZMonth.equals("Mar")) {
ZMonth = "03";
} else if (ZMonth.equals("Apr")) {
ZMonth = "04";
} else if (ZMonth.equals("May")) {
ZMonth = "05";
} else if (ZMonth.equals("Jun")) {
ZMonth = "06";
} else if (ZMonth.equals("Jul")) {
ZMonth = "07";
} else if (ZMonth.equals("Aug")) {
ZMonth = "08";
} else if (ZMonth.equals("Sep")) {
ZMonth = "09";
} else if (ZMonth.equals("Oct")) {
ZMonth = "10";
} else if (ZMonth.equals("Nov")) {
ZMonth = "11";
} else if (ZMonth.equals("Dec")) {
ZMonth = "12";
}
TimeDate = ZHour + ":" + ZMinute + " " + ZDay + "/" + ZMonth + "/" + ZYear;
//Convert date to milliseconds since 00:00 01/01/1970
@SuppressLint("SimpleDateFormat") SimpleDateFormat formatter = new SimpleDateFormat("HH:mm dd/MM/yyyy");
formatter.setLenient(false);
Date date_before = formatter.parse(TimeDate);
assert date_before != null;
long milliseconds = date_before.getTime();
//Add time to reach +0200 Athens/Greece
if (ZTimezone.equals("0000")) {
milliseconds = milliseconds + 7200000;
}
_item.setDateComparison((int) milliseconds);
// add item to the list
_feed.addItem(_item);
}
}
} catch (ParserConfigurationException | IOException | ParseException | SAXException e) {
e.printStackTrace();
}
// Return the final feed once all the Items are added to the RSSFeed
// Object(_feed).
return _feed;
}
}
logcat 警告
2020-12-28 23:26:34.305 30015-30071/ozma13.riseapp W/System: ClassLoader referenced unknown path: system/framework/mediatek-cta.jar
2020-12-28 23:26:34.423 30015-30071/ozma13.riseapp W/System: ClassLoader referenced unknown path: system/framework/mediatek-cta.jar
2020-12-28 23:26:34.530 30015-30071/ozma13.riseapp W/System.err: org.xml.sax.SAXParseException: Unexpected end of document
2020-12-28 23:26:34.533 30015-30071/ozma13.riseapp W/System.err: at org.apache.harmony.xml.parsers.DocumentBuilderImpl.parse(DocumentBuilderImpl.java:125)
2020-12-28 23:26:34.534 30015-30071/ozma13.riseapp W/System.err: at javax.xml.parsers.DocumentBuilder.parse(DocumentBuilder.java:155)
2020-12-28 23:26:34.534 30015-30071/ozma13.riseapp W/System.err: at ozma13.riseapp.DOMParser.parseXml(DOMParser.java:30)
2020-12-28 23:26:34.534 30015-30071/ozma13.riseapp W/System.err: at ozma13.riseapp.MainActivity$AsyncLoadXMLFeed.doInBackground(MainActivity.java:532)
2020-12-28 23:26:34.534 30015-30071/ozma13.riseapp W/System.err: at ozma13.riseapp.MainActivity$AsyncLoadXMLFeed.doInBackground(MainActivity.java:441)
2020-12-28 23:26:34.535 30015-30071/ozma13.riseapp W/System.err: at android.os.AsyncTask$2.call(AsyncTask.java:333)
2020-12-28 23:26:34.535 30015-30071/ozma13.riseapp W/System.err: at java.util.concurrent.FutureTask.run(FutureTask.java:266)
2020-12-28 23:26:34.535 30015-30071/ozma13.riseapp W/System.err: at android.os.AsyncTask$SerialExecutor$1.run(AsyncTask.java:245)
2020-12-28 23:26:34.535 30015-30071/ozma13.riseapp W/System.err: at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1162)
2020-12-28 23:26:34.536 30015-30071/ozma13.riseapp W/System.err: at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:636)
2020-12-28 23:26:34.536 30015-30071/ozma13.riseapp W/System.err: at java.lang.Thread.run(Thread.java:780)
2020-12-28 23:26:35.120 30015-30043/ozma13.riseapp W/ThreadedRenderer: ThreadedRenderer::detachAnimators pid = 30015 threadid = 30043
2020-12-28 23:26:35.183 30015-30043/ozma13.riseapp W/ThreadedRenderer: ThreadedRenderer::detachAnimators pid = 30015 threadid = 30043
2020-12-28 23:26:37.734 30015-30043/ozma13.riseapp W/ThreadedRenderer: ThreadedRenderer::detachAnimators pid = 30015 threadid = 30043
解决方案
答案是在非检索 xml 中的“https”和“http”之间切换。我不知道为什么,但这就是我的解决方法!
推荐阅读
- excel - 有没有办法在考虑某些标准条件的同时将员工平均分配到大型任务列表中?
- windows - Git pull 无法更新本地引用
- excel - InnerText 为特定的跨度类返回空
- asp.net - GET请求在邮递员上工作但不在浏览器中
- node.js - Jsonwebtoken 验证导致 NGINX 超时
- python-3.x - python concurrent.futures 跳过超时进程
- r - 如何将变量的名称及其结果插入 R 中的绘图中
- node.js - AWS S3 访问被本地主机拒绝,但在部署时工作
- php - 如何从xml中获取数据?
- c++ - 为什么可以通过构造函数将临时值分配给引用?