java - Java HTMLUnit WebClient ScriptException 错误
问题描述
我正在使用 HTMLUnit 来抓取网站。我正在使用htmlunit-2.19
版本。我知道这是重复的问题,但相信我,我尝试了我在谷歌找到的所有解决方案,但我仍然得到这个例外。请参阅下面的异常
com.gargoylesoftware.htmlunit.ScriptException: ReferenceError: "jQuery" is not defined. (URL/lib/dropdown/core.js#3)
at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:954) [htmlunit-2.19.jar:2.19]
at net.sourceforge.htmlunit.corejs.javascript.Context.call(Context.java:628) [htmlunit-core-js-2.17.jar:na]
at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.call(ContextFactory.java:513) [htmlunit-core-js-2.17.jar:na]
at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:836) [htmlunit-2.19.jar:2.19]
at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:812) [htmlunit-2.19.jar:2.19]
at com.gargoylesoftware.htmlunit.html.HtmlPage.loadExternalJavaScriptFile(HtmlPage.java:997) [htmlunit-2.19.jar:2.19]
at com.gargoylesoftware.htmlunit.html.HtmlScript.executeScriptIfNeeded(HtmlScript.java:399) [htmlunit-2.19.jar:2.19]
at com.gargoylesoftware.htmlunit.html.HtmlScript$3.execute(HtmlScript.java:277) [htmlunit-2.19.jar:2.19]
at com.gargoylesoftware.htmlunit.html.HtmlScript.onAllChildrenAddedToPage(HtmlScript.java:293) [htmlunit-2.19.jar:2.19]
at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:799) [htmlunit-2.19.jar:2.19]
at org.apache.xerces.parsers.AbstractSAXParser.endElement(Unknown Source) [xercesImpl-2.11.0.jar:na]
at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:756) [htmlunit-2.19.jar:2.19]
at org.cyberneko.html.HTMLTagBalancer.callEndElement(HTMLTagBalancer.java:1170) [nekohtml-1.9.22.jar:1.9.22]
at org.cyberneko.html.HTMLTagBalancer.endElement(HTMLTagBalancer.java:1072) [nekohtml-1.9.22.jar:1.9.22]
at org.cyberneko.html.filters.DefaultFilter.endElement(DefaultFilter.java:206) [nekohtml-1.9.22.jar:na]
at org.cyberneko.html.filters.NamespaceBinder.endElement(NamespaceBinder.java:330) [nekohtml-1.9.22.jar:na]
at org.cyberneko.html.HTMLScanner$ContentScanner.scanEndElement(HTMLScanner.java:3126) [nekohtml-1.9.22.jar:1.9.22]
at org.cyberneko.html.HTMLScanner$ContentScanner.scan(HTMLScanner.java:2093) [nekohtml-1.9.22.jar:1.9.22]
at org.cyberneko.html.HTMLScanner.scanDocument(HTMLScanner.java:920) [nekohtml-1.9.22.jar:1.9.22]
at org.cyberneko.html.HTMLConfiguration.parse(HTMLConfiguration.java:499) [nekohtml-1.9.22.jar:1.9.22]
at org.cyberneko.html.HTMLConfiguration.parse(HTMLConfiguration.java:452) [nekohtml-1.9.22.jar:1.9.22]
at org.apache.xerces.parsers.XMLParser.parse(Unknown Source) [xercesImpl-2.11.0.jar:na]
at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.parse(HTMLParser.java:1039) [htmlunit-2.19.jar:2.19]
at com.gargoylesoftware.htmlunit.html.HTMLParser.parse(HTMLParser.java:252) [htmlunit-2.19.jar:2.19]
at com.gargoylesoftware.htmlunit.html.HTMLParser.parseHtml(HTMLParser.java:198) [htmlunit-2.19.jar:2.19]
at com.gargoylesoftware.htmlunit.DefaultPageCreator.createHtmlPage(DefaultPageCreator.java:271) [htmlunit-2.19.jar:2.19]
at com.gargoylesoftware.htmlunit.DefaultPageCreator.createPage(DefaultPageCreator.java:159) [htmlunit-2.19.jar:2.19]
at com.gargoylesoftware.htmlunit.WebClient.loadWebResponseInto(WebClient.java:478) [htmlunit-2.19.jar:2.19]
at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:352) [htmlunit-2.19.jar:2.19]
at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:417) [htmlunit-2.19.jar:2.19]
at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:402) [htmlunit-2.19.jar:2.19]
at com.company.dashboard.service.impl.ReverseServiceImpl.loginToAds(ReverseServiceImpl.java:447) [classes/:na]
at com.company.dashboard.service.impl.ReverseServiceImpl.loginToAds(ReverseServiceImpl.java:462) [classes/:na]
at com.company.dashboard.service.impl.ReverseServiceImpl.getKeyword(ReverseServiceImpl.java:502) [classes/:na]
at com.company.dashboard.service.impl.ReverseServiceImpl.handleReverseBySetting(ReverseServiceImpl.java:879) [classes/:na]
at com.company.dashboard.thread.ConCurrentRunnable.run(ConCurrentRunnable.java:44) [classes/:na]
at com.company.dashboard.thread.CustomThreadPool$WorkerThread.run(CustomThreadPool.java:53) [classes/:na]
Caused by: net.sourceforge.htmlunit.corejs.javascript.EcmaError: ReferenceError: "jQuery" is not defined. (URL/lib/dropdown/core.js#3)
at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:3935) ~[htmlunit-core-js-2.17.jar:na]
at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:3919) ~[htmlunit-core-js-2.17.jar:na]
at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.notFoundError(ScriptRuntime.java:3996) ~[htmlunit-core-js-2.17.jar:na]
at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.name(ScriptRuntime.java:1846) ~[htmlunit-core-js-2.17.jar:na]
at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpretLoop(Interpreter.java:1627) ~[htmlunit-core-js-2.17.jar:na]
at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpret(Interpreter.java:798) ~[htmlunit-core-js-2.17.jar:na]
at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.call(InterpretedFunction.java:105) ~[htmlunit-core-js-2.17.jar:na]
at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.doTopCall(ContextFactory.java:411) [htmlunit-core-js-2.17.jar:na]
at com.gargoylesoftware.htmlunit.javascript.HtmlUnitContextFactory.doTopCall(HtmlUnitContextFactory.java:309) ~[htmlunit-2.19.jar:2.19]
at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.doTopCall(ScriptRuntime.java:3286) ~[htmlunit-core-js-2.17.jar:na]
at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.exec(InterpretedFunction.java:115) ~[htmlunit-core-js-2.17.jar:na]
at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$3.doRun(JavaScriptEngine.java:827) ~[htmlunit-2.19.jar:2.19]
at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:939) [htmlunit-2.19.jar:2.19]
... 36 common frames omitted
2019-07-13 11:06:01.078 INFO 5686 --- [ Thread-4] c.g.h.javascript.JavaScriptEngine : Caught script exception
我在 google 上进行了研究,发现了许多关于此异常的解决方案,并且我尝试了所有解决方案,但没有一个解决方案有效。
请参阅以下我已应用的解决方案
解决方案 1
WebClient webClient= new WebClient(BrowserVersion.FIREFOX_38);
webClient.setIncorrectnessListener(new IncorrectnessListener() {
@Override
public void notify(String message, Object origin) {
// TODO Auto-generated method stub
}
});
webClient.setCssErrorHandler(new SilentCssErrorHandler() {
});
webClient.setJavaScriptErrorListener(new JavaScriptErrorListener() {
@Override
public void scriptException(InteractivePage page,
ScriptException scriptException) {
// TODO Auto-generated method stub
}
@Override
public void timeoutError(InteractivePage page, long allowedTime,
long executionTime) {
// TODO Auto-generated method stub
}
@Override
public void malformedScriptURL(InteractivePage page, String url,
MalformedURLException malformedURLException) {
// TODO Auto-generated method stub
}
@Override
public void loadScriptError(InteractivePage page, URL scriptUrl,
Exception exception) {
// TODO Auto-generated method stub
}
});
webClient.setHTMLParserListener(new HTMLParserListener() {
@Override
public void error(String message, URL url, String html, int line,
int column, String key) {
// TODO Auto-generated method stub
}
@Override
public void warning(String message, URL url, String html, int line,
int column, String key) {
// TODO Auto-generated method stub
}
});
解决方案 2:
webClient.getOptions().setCssEnabled(false);
webClient.getOptions().setJavaScriptEnabled(true);
webClient.getOptions().setThrowExceptionOnScriptError(false);
webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
webClient.getOptions().setPrintContentOnFailingStatusCode(false);
我找到了其他解决方案,setJavaScriptEnabled(false)
但我需要启用JS
. 如果没有启用JS
,我将无法抓取网站。所以我必须设置启用JS
。
请让我知道我的代码中是否缺少?
解决方案
在不知道页面和有关您的代码的更多详细信息的情况下,我只能尝试提供一些建议
- 你的 HtmlUnit 版本真的过时了(2.19 是从 2015 年 11 月 12 日开始的),我们现在是 2.35.0。请使用最新的....
- 检查真实浏览器的浏览器日志,看看是否还有错误
- webClient.getOptions().setThrowExceptionOnScriptError(false); 如果检测到未处理的 js 异常,则更改 HtmlUnit 的行为以不引发异常。这和真正的浏览器处理 js 异常的方式差不多。但是(与真实浏览器相比)HtmlUnit 仍然会记录此异常。如果您不想了解此问题,则必须配置记录器。
推荐阅读
- javascript - 无法从 firebase/storage 导入 { getStorage } 以与 React Native Expo 图像选择器一起使用
- javascript - 如何检查 cookie Testcafe & JS 的值?
- python - AttributeError:模块“skimage”没有属性“__version__”
- django - AWS Polly 生成的 .mp3 文件根本不运行
- excel - PowerPivot 表中的动态 PercentRank
- python - TIFF 压缩 - Fax3SetupState:对于组 3/4 编码/解码,位/样本必须为 1
- swift - swift - 如何在Swift中用一个条形制作具有多种颜色的UIProgressView?
- android-studio - Android Studio:为 Wear 应用中的布局设置约束
- javascript - 从 DOM 元素中获取 CanvasJS 图表的实例
- arrays - 使用 map 过滤两个边界之间的数组