javascript - JAVA从html脚本中的变量获取Json对象
问题描述
我有一个演示 html,如下所示:
<!DOCTYPE html>
<title>Test</title>
<script>
(function (root) {
/* -- Data -- */
root.SG || (root.SG = {});
root.SG.query = "Ana";
root.SG.timePeriodType = "YEAR";
root.SG.dataType = "SEARCH_ALL";
root.SG.path = "\u002Findex\u002FsearchHeat";
root.SG.data = { "pvList": [[{ "kwdId": 0, "pv": 0, "isPeak": 0, "date": 20200517, "id": 0 }, { "kwdId": 27961, "pv": 150506, "isPeak": 0, "date": 20200518, "id": 13625908607 }, { "kwdId": 27961, "pv": 142961, "isPeak": 0, "date": 20200519, "id": 13625908608 }, { "kwdId": 27961, "pv": 170154, "isPeak": 0, "date": 20200520, "id": 13715544690 }, { "kwdId": 27961, "pv": 160490, "isPeak": 0, "date": 20200521, "id": 13715544691 }]] }
})(this)</script>
<body>Hello
</body>
</html>
我想使用 Java 来获取 root.SG.data 中的对象。
这是我当前的测试代码,但正则表达式结果为空,找不到root.SG.data
String url = "http://zhishu.sogou.com/index/searchHeat?kwdNamesStr=%E5%AE%A0%E7%89%A9&timePeriodType=YEAR&dataType=SEARCH_ALL&queryType=INPUT";
Document doc = Jsoup.connect(url).get();
String script = doc.selectFirst("script").html();
//System.out.println(script);
Pattern p = Pattern.compile("root.SG.data = (.*)", Pattern.MULTILINE);
Matcher m = p.matcher(script);
String wholedata = m.group(1);
解决方案
JavaFX 的 WebView 可以做到这一点。我使用 JavaFX 13 在 Java 15 上测试了以下内容:
import javafx.application.Application;
import javafx.concurrent.Worker.State;
import javafx.scene.Scene;
import javafx.scene.layout.BorderPane;
import javafx.scene.web.WebEngine;
import javafx.scene.web.WebView;
import javafx.stage.Stage;
import netscape.javascript.JSObject;
/**
* JavaFX App
*/
public class App extends Application {
private WebView webView;
private Stage stage;
@Override
public void start(Stage stage) {
this.stage = stage;
//Insert a WebView into scene
this.webView = new WebView();
final Scene scene = new Scene(new BorderPane(webView), 640, 480);
stage.setScene(scene);
stage.setOnShown(event -> stageReady()); //Continue when stage is rendered
stage.show();
}
private void stageReady() {
//Prepare web engine
final WebEngine web = webView.getEngine();
web.setJavaScriptEnabled(true);
web.getLoadWorker().stateProperty().addListener(
(ov, oldState, newState) -> {
if (newState == State.SUCCEEDED) { //Document is loaded
//Maybe the operation should be postponed a little to leave time to Javascript to execute
//It worked as is in my tests though
//Either get SG as an object, then use getMember/getSlot to read properties
JSObject sg = (JSObject) web.executeScript("window.SG");
System.out.println(sg);
//Or get it as a JSON String
System.out.println(web.executeScript("JSON.stringify(window.SG, null, 2)"));
}
});
//Navigate to document
web.load(getClass().getResource("jsonvalue.html").toExternalForm());
}
public static void main(String[] args) {
launch();
}
}
假设您更正的 HTML 以 jsonvalue.html 的形式存储在与App.java相同的目录中:
<!DOCTYPE html>
<html>
<head>
<title>Test</title>
<script>
(function (root) {
/* -- Data -- */
root.SG || (root.SG = {});
root.SG.query = "Ana";
root.SG.timePeriodType = "YEAR";
root.SG.dataType = "SEARCH_ALL";
root.SG.path = "\u002Findex\u002FsearchHeat";
root.SG.data = { "pvList": [[{ "kwdId": 0, "pv": 0, "isPeak": 0, "date": 20200517, "id": 0 }, { "kwdId": 27961, "pv": 150506, "isPeak": 0, "date": 20200518, "id": 13625908607 }, { "kwdId": 27961, "pv": 142961, "isPeak": 0, "date": 20200519, "id": 13625908608 }, { "kwdId": 27961, "pv": 170154, "isPeak": 0, "date": 20200520, "id": 13715544690 }, { "kwdId": 27961, "pv": 160490, "isPeak": 0, "date": 20200521, "id": 13715544691 }]] }
})(this);
</script>
</head>
<body>
Hello
</body>
</html>
推荐阅读
- tensorflow - 提高 Keras 神经网络算法的预测精度
- python - 执行 plt.show() 后 Python 没有响应
- linux - fstrim 如何不受竞争条件的影响?
- r - 编程语言 R:创建一个函数。此函数将一个矩阵转换为另一个矩阵,使得每个奇数都是该数的 3 倍
- sql - 基于多个条件循环遍历表
- asp.net-core - “IdentityServer4 + Core Identy + PostgreSql”无法登录,返回Sub Claim Missing
- c++ - 使用 boost::property_tree::ptree 如何获取特定键的值
- python - 如何确保我的 Django 单例模型在启动时存在?
- android - android - 如何从另一个类更新 mutablieLiveData
- django - 如何运行 Django 定期任务对象的任务 [django-celery-beat]