java - 使用 Jsoup 登录的 Java 抓取网站
问题描述
我想从这里打印一些数据(其中 id="ctl00_cpStud_lblTotalPercentage") ,我只需要输入密码即可访问数据。
密码字段代码:
输入名称="txtPassword" type="password" id="txtPassword" placeholder="输入密码" onkeypress="checkcapslockon(event)"
按钮代码:
输入类型="submit" name="btnSubmit" value="Submit" id="btnSubmit" class="rc-button rc-button-submit"
谁能解释我为什么这段代码不起作用?我已经尝试了很多,但没有任何效果。
public static void main(String[] args) throws Exception {
String url="https://erp.cbit.org.in/?__LASTFOCUS=&__EVENTTARGET=&__EVENTARGUMENT=&__VIEWSTATE=%2FwEPDwUKMTExMDk3MjkwOA9kFgICAQ9kFgICBQ8PFgIeB1Zpc2libGVoZGRkApSadXV8hBd7qi9M9MQf24gQFo1JDdpv3rqIRSVoR5Y%3D&__VIEWSTATEGENERATOR=C2EE9ABB&__EVENTVALIDATION=%2FwEdAAUo8HF9hHYWKGGF3Et0JGNxBjpuGLkudYNkCAonVyADt%2B5PVNfdHmla7NuBu7%2FwrMNjemWCTRgEB59HPczIGVNwgWOkgugWB5Cq9dYD7toQNEwZfb2PCk9YCZQ7UhXsjSWufILYgZp8zPh7f7XDtu2a&txtUserName=160117737089&btnNext=Next";
Connection.Response loginForm = Jsoup
.connect(url)
.data("txtPassword", "8055")
.data("btnSubmit", "Submit")
.method(Connection.Method.GET)
.followRedirects(true)
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0")
.execute();
Connection.Response response = Jsoup.connect(url)
.method(Connection.Method.POST)
.cookies(loginForm.cookies())
.followRedirects(true)
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0")
.execute();
System.out.println(response);
}
但我得到以下错误
Exception in thread "main" org.jsoup.HttpStatusException: HTTP error fetching URL. Status=500, URL=https://erp.cbit.org.in/Login.aspx?ReturnUrl=%2f%3f__LASTFOCUS%3d%26__EVENTTARGET%3d%26__EVENTARGUMENT%3d%26__VIEWSTATE%3d%252FwEPDwUKMTExMDk3MjkwOA9kFgICAQ9kFgICBQ8PFgIeB1Zpc2libGVoZGRkApSadXV8hBd7qi9M9MQf24gQFo1JDdpv3rqIRSVoR5Y%253D%26__VIEWSTATEGENERATOR%3dC2EE9ABB%26__EVENTVALIDATION%3d%252FwEdAAUo8HF9hHYWKGGF3Et0JGNxBjpuGLkudYNkCAonVyADt%252B5PVNfdHmla7NuBu7%252FwrMNjemWCTRgEB59HPczIGVNwgWOkgugWB5Cq9dYD7toQNEwZfb2PCk9YCZQ7UhXsjSWufILYgZp8zPh7f7XDtu2a%26txtUserName%3d160117737089%26btnNext%3dNext%26txtPassword%3d8055%26btnSubmit%3dSubmit&__LASTFOCUS=&__EVENTTARGET=&__EVENTARGUMENT=&__VIEWSTATE=%2FwEPDwUKMTExMDk3MjkwOA9kFgICAQ9kFgICBQ8PFgIeB1Zpc2libGVoZGRkApSadXV8hBd7qi9M9MQf24gQFo1JDdpv3rqIRSVoR5Y%3D&__VIEWSTATEGENERATOR=C2EE9ABB&__EVENTVALIDATION=%2FwEdAAUo8HF9hHYWKGGF3Et0JGNxBjpuGLkudYNkCAonVyADt%2B5PVNfdHmla7NuBu7%2FwrMNjemWCTRgEB59HPczIGVNwgWOkgugWB5Cq9dYD7toQNEwZfb2PCk9YCZQ7UhXsjSWufILYgZp8zPh7f7XDtu2a&txtUserName=160117737089&btnNext=Next&txtPassword=8055&btnSubmit=Submit
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:760)
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:757)
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:705)
at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:295)
at jsouptest.JsoupTest.main(JsoupTest.java:44)
C:\Users\Akhil\AppData\Local\NetBeans\Cache\8.2\executor-snippets\run.xml:53: Java returned: 1
BUILD FAILED (total time: 0 seconds)
更新代码
@Override
protected Void doInBackground(Void... voids) {
try {
//Connect to the website
String url="https://erp.cbit.org.in/?__LASTFOCUS=&__EVENTTARGET=&__EVENTARGUMENT=&__VIEWSTATE=%2FwEPDwUKMTExMDk3MjkwOA9kFgICAQ9kFgICBQ8PFgIeB1Zpc2libGVoZGRkApSadXV8hBd7qi9M9MQf24gQFo1JDdpv3rqIRSVoR5Y%3D&__VIEWSTATEGENERATOR=C2EE9ABB&__EVENTVALIDATION=%2FwEdAAUo8HF9hHYWKGGF3Et0JGNxBjpuGLkudYNkCAonVyADt%2B5PVNfdHmla7NuBu7%2FwrMNjemWCTRgEB59HPczIGVNwgWOkgugWB5Cq9dYD7toQNEwZfb2PCk9YCZQ7UhXsjSWufILYgZp8zPh7f7XDtu2a&txtUserName=160117737089&btnNext=Next";
String url2="https://erp.cbit.org.in/";
Connection.Response loginForm = Jsoup.connect(url)
.method(Connection.Method.GET)
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0")
.execute();
Document doc = loginForm.parse();
Element e = doc.select("input[id=__VIEWSTATE]").first();
String viewState = e.attr("value");
e = doc.select("input[id=__EVENTVALIDATION]").first();
String eventValidation = e.attr("value");
e = doc.select("input[id=__LASTFOCUS]").first();
String lastFocus = e.attr("value");
e = doc.select("input[id=__EVENTTARGET]").first();
String eventTarget = e.attr("value");
e = doc.select("input[id=__VIEWSTATEGENERATOR]").first();
String viewstateGenerator = e.attr("value");
e = doc.select("input[id=__EVENTARGUMENT]").first();
String eventArgument = e.attr("value");
Document document = Jsoup.connect(url2)
.data("cookieexists", "false")
.data("txtPassword","8055")
.data("btnSubmit","Submit")
.data("__LASTFOCUS",lastFocus)
.data("__EVENTTARGET",eventTarget)
.data("__EVENTARGUMENT",eventArgument)
.data("__VIEWSTATEGENERATOR",viewstateGenerator)
.data("__VIEWSTATE", viewState)
.data("__EVENTVALIDATION", eventValidation)
.cookies(loginForm.cookies())
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0")
.get();
Element attendance = document.getElementById("ctl00_cpStud_lblTotalPercentage");
String at=attendance.text();
at=at.replaceAll("[^\\.0123456789]","");
} catch (IOException e) {
Toast.makeText(WaitingActivity.this, "No Internet Connection", Toast.LENGTH_LONG).show();
}
return null;
}
@Override
protected void onPostExecute(Void aVoid) {
super.onPostExecute(aVoid);
Toast.makeText(WaitingActivity.this, at, Toast.LENGTH_LONG).show();
}
解决方案
推荐阅读
- python - 从 tensorflow 对象检测 API 返回 Json 或漂亮的东西
- maven - Maven:liquibase - firebird:原因:liquibase.exception.DatabaseException:GDS 异常。335544569. 动态 SQL 错误
- oracle - ORA-00955: 名称已被 pl/sql 中的现有对象使用
- python - 如何从模型中可用的多个选项中为 django 中的该键添加外键?
- java - 为什么 Apache Ignite sys-thread 消耗这么多内存?
- imageresizer - ImageResizer 与 Azure 文件
- sas - 使用数据线输入日期导致无效数据
- php - php shell 命令中的 rm -f 安全吗?
- c++ - 如何替换字符串中的 QRegExp?
- android - 如何在没有 onClick 的主要活动中使用我的片段中定义的小部件