首页 > 技术文章 > 第六周总结

w669399221 2020-03-27 17:40 原文

本周学习了爬虫的方法,可以爬取疫情数据。

package com.yiqin.paqu;


import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.security.Timestamp;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;

import javax.net.ssl.HttpsURLConnection;
import javax.xml.crypto.Data;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.yiqin.connect.BaseConnection;
public class Paqu {
    
    public static void main(String[] args) throws IOException {
                getListByCountryTypeService2(); 
        }
    // 鏍筓RL
    private static String httpRequset(String requesturl) throws IOException {
        StringBuffer buffer = null;
        BufferedReader bufferedReader = null;
        InputStreamReader inputStreamReader = null;
        InputStream inputStream = null;
        HttpsURLConnection httpsURLConnection = null;
        try {
            URL url = new URL(requesturl);
            httpsURLConnection = (HttpsURLConnection) url.openConnection();
            httpsURLConnection.setDoInput(true);
            httpsURLConnection.setRequestMethod("GET");
            inputStream = httpsURLConnection.getInputStream();
            inputStreamReader = new InputStreamReader(inputStream, "utf-8");
            bufferedReader = new BufferedReader(inputStreamReader);
            buffer = new StringBuffer();
            String str = null;
            while ((str = bufferedReader.readLine()) != null) {
                buffer.append(str);
            }
        } catch (MalformedURLException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

        return buffer.toString();
    }

    /**
     * 鑾峰彇鍏ㄥ浗鍚勪釜鐪佸競鐨勭‘璇娿�佹浜″拰娌绘剤浜烘暟
     * 
     * @return
     */
    
    public static String getAreaStat() {
        String url = "https://ncov.dxy.cn/ncovh5/view/pneumonia";
        String htmlResult = "";
        try {
            htmlResult = httpRequset(url);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        // System.out.println(htmlResult);

        // 姝e垯鑾峰彇鏁版嵁
        // 鍥犱负html鐨勬暟鎹牸寮忕湅鐫�灏卞儚json鏍煎紡锛屾墍浠ユ垜浠鍒欒幏鍙杍son
        String reg = "window.getAreaStat = (.*?)\\}(?=catch)";
        Pattern totalPattern = Pattern.compile(reg);
        Matcher totalMatcher = totalPattern.matcher(htmlResult);

        String result = "";
        if (totalMatcher.find()) {
            result = totalMatcher.group(1);
            System.out.println(result);
            // 鍚勪釜鐪佸競鐨勬槸涓�涓垪琛↙ist锛屽鏋滄兂淇濆瓨鍒版暟鎹簱涓紝瑕侀亶鍘嗙粨鏋滐紝涓嬮潰鏄痙emo
            JSONArray array = JSONArray.parseArray(result);
            try {
                Connection conn =BaseConnection.getConn();
                Statement stmt = conn.createStatement();

                Date date = new Date();//鑾峰緱绯荤粺鏃堕棿.
                SimpleDateFormat sdf =   new SimpleDateFormat( "yyyy-MM-dd hh:mm:ss" );
                String nowTime = sdf.format(date);

                for (int i = 0; i <= 30; i++) {

                    com.alibaba.fastjson.JSONObject jsonObject = com.alibaba.fastjson.JSONObject
                            .parseObject(array.getString(i));

                
                        String provinceName = jsonObject.getString("provinceName");
                        String cityname1 = " ";
                        String currentnum = jsonObject.getString("currentConfirmedCount");
                        String confirmed = jsonObject.getString("confirmedCount");
                        String cured = jsonObject.getString("curedCount");
                        String dead = jsonObject.getString("deadCount");
                        String suspect = jsonObject.getString("suspectedCount");
                        stmt.executeUpdate("insert into info2(Date,Province,City,currentnum,Confirmed_num,Yisi_num,Cured_num,Dead_num) values('"+ nowTime + "','"+ provinceName + "','"+ cityname1+ "','"+currentnum + "','" + confirmed + "','" + suspect +"','" + cured +"','" + dead +"')");
                        
                        JSONArray array2 = jsonObject.getJSONArray("cities");
                        for (int j = 0; j < array2.size(); j++) {
                            com.alibaba.fastjson.JSONObject jsonObject2 = com.alibaba.fastjson.JSONObject
                                    .parseObject(array2.getString(j));
                            String provinceName2 = jsonObject.getString("provinceName");
                            String cityname = jsonObject2.getString("cityName");
                            String confirmed2 = jsonObject2.getString("confirmedCount");
                             String currentnum2 = jsonObject.getString("currentConfirmedCount");
                            String cured2 = jsonObject2.getString("curedCount");
                            String dead2 = jsonObject2.getString("deadCount");
                            String suspect2 = jsonObject2.getString("suspectedCount");
                            stmt.executeUpdate("insert into info3(Date,Province,City,Confirmed_num,Yisi_num,Cured_num,Dead_num) values('"+ nowTime + "','"+ provinceName2 + "','"+ cityname + "','"+currentnum2+ "','" + confirmed2 + "','" + suspect2 +"','" + cured2 +"','" + dead2 +"')");
                }
            }
                stmt.close();
                conn.close();
            } catch (SQLException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
        return result;
    }
    /**
     * 鑾峰彇鍏ㄧ悆鍚勪釜鍥藉鐨勭‘璇娿�佹浜″拰娌绘剤浜烘暟
     * @return
     */
    public static String getListByCountryTypeService2() {
        String url = "https://ncov.dxy.cn/ncovh5/view/pneumonia";
        String htmlResult = "";
        try {
            htmlResult = httpRequset(url);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        // System.out.println(htmlResult);

        // 姝e垯鑾峰彇鏁版嵁
        // 鍥犱负html鐨勬暟鎹牸寮忕湅鐫�灏卞儚json鏍煎紡锛屾墍浠ユ垜浠鍒欒幏鍙杍son
        String reg = "window.getListByCountryTypeService2true = (.*?)\\}(?=catch)";
        Pattern totalPattern = Pattern.compile(reg);
        Matcher totalMatcher = totalPattern.matcher(htmlResult);

        String result = "";
        if (totalMatcher.find()) {
            result = totalMatcher.group(1);
            System.out.println(result);
            // 鍚勪釜鐪佸競鐨勬槸涓�涓垪琛↙ist锛屽鏋滄兂淇濆瓨鍒版暟鎹簱涓紝瑕侀亶鍘嗙粨鏋滐紝涓嬮潰鏄痙emo
            JSONArray array = JSONArray.parseArray(result);
            
            try {
                Connection conn = BaseConnection.getConn();
                Statement stmt = conn.createStatement();

                Date date = new Date();//鑾峰緱绯荤粺鏃堕棿.
                SimpleDateFormat sdf =   new SimpleDateFormat( "yyyy-MM-dd hh:mm:ss" );
                String nowTime = sdf.format(date);

                for (int i = 0; i <array.size(); i++) {
            com.alibaba.fastjson.JSONObject jsonObject = com.alibaba.fastjson.JSONObject
                    .parseObject(array.getString(i));    
                String continents =jsonObject.getString("continents");
                String provinceName = jsonObject.getString("provinceName");
                String confirmed = jsonObject.getString("confirmedCount");
                String cured = jsonObject.getString("curedCount");
                String dead = jsonObject.getString("deadCount");
                
                String suspect = jsonObject.getString("suspectedCount");
                stmt.executeUpdate("insert into info4(Date,Continents,Province,Confirmed_num,Yisi_num,Cured_num,Dead_num) values('"+ nowTime + "','"+ continents + "','"+ provinceName + "','" + confirmed + "','" + suspect +"','" + cured +"','" + dead +"')");
                    
                }
                stmt.close();
                conn.close();
            } catch (SQLException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
    }
        return result;
    }

}
View Code

 

package com.yiqin.paqu;

import java.io.BufferedReader;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.net.MalformedURLException;import java.net.URL;import java.security.Timestamp;import java.sql.Connection;import java.sql.DriverManager;import java.sql.SQLException;import java.sql.Statement;import java.util.regex.Matcher;import java.util.regex.Pattern;
import java.text.SimpleDateFormat;import java.util.Date;import java.util.HashMap;import java.util.Map;
import javax.net.ssl.HttpsURLConnection;import javax.xml.crypto.Data;
import com.alibaba.fastjson.JSONArray;import com.alibaba.fastjson.JSONObject;import com.yiqin.connect.BaseConnection;public class Paqu {public static void main(String[] args) throws IOException {getListByCountryTypeService2(); }// 鏍筓RLprivate static String httpRequset(String requesturl) throws IOException {StringBuffer buffer = null;BufferedReader bufferedReader = null;InputStreamReader inputStreamReader = null;InputStream inputStream = null;HttpsURLConnection httpsURLConnection = null;try {URL url = new URL(requesturl);httpsURLConnection = (HttpsURLConnection) url.openConnection();httpsURLConnection.setDoInput(true);httpsURLConnection.setRequestMethod("GET");inputStream = httpsURLConnection.getInputStream();inputStreamReader = new InputStreamReader(inputStream, "utf-8");bufferedReader = new BufferedReader(inputStreamReader);buffer = new StringBuffer();String str = null;while ((str = bufferedReader.readLine()) != null) {buffer.append(str);}} catch (MalformedURLException e) {// TODO Auto-generated catch blocke.printStackTrace();}
return buffer.toString();}
/** * 鑾峰彇鍏ㄥ浗鍚勪釜鐪佸競鐨勭‘璇娿�佹浜″拰娌绘剤浜烘暟 *  * @return */public static String getAreaStat() {String url = "https://ncov.dxy.cn/ncovh5/view/pneumonia";String htmlResult = "";try {htmlResult = httpRequset(url);} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}// System.out.println(htmlResult);
// 姝e垯鑾峰彇鏁版嵁// 鍥犱负html鐨勬暟鎹牸寮忕湅鐫�灏卞儚json鏍煎紡锛屾墍浠ユ垜浠鍒欒幏鍙杍sonString reg = "window.getAreaStat = (.*?)\\}(?=catch)";Pattern totalPattern = Pattern.compile(reg);Matcher totalMatcher = totalPattern.matcher(htmlResult);
String result = "";if (totalMatcher.find()) {result = totalMatcher.group(1);System.out.println(result);// 鍚勪釜鐪佸競鐨勬槸涓�涓垪琛↙ist锛屽鏋滄兂淇濆瓨鍒版暟鎹簱涓紝瑕侀亶鍘嗙粨鏋滐紝涓嬮潰鏄痙emoJSONArray array = JSONArray.parseArray(result);try {Connection conn =BaseConnection.getConn();Statement stmt = conn.createStatement();
Date date = new Date();//鑾峰緱绯荤粺鏃堕棿.SimpleDateFormat sdf =   new SimpleDateFormat( "yyyy-MM-dd hh:mm:ss" );String nowTime = sdf.format(date);
for (int i = 0; i <= 30; i++) {
com.alibaba.fastjson.JSONObject jsonObject = com.alibaba.fastjson.JSONObject.parseObject(array.getString(i));
    String provinceName = jsonObject.getString("provinceName");    String cityname1 = " ";    String currentnum = jsonObject.getString("currentConfirmedCount");String confirmed = jsonObject.getString("confirmedCount");String cured = jsonObject.getString("curedCount");String dead = jsonObject.getString("deadCount");String suspect = jsonObject.getString("suspectedCount");stmt.executeUpdate("insert into info2(Date,Province,City,currentnum,Confirmed_num,Yisi_num,Cured_num,Dead_num) values('"+ nowTime + "','"+ provinceName + "','"+ cityname1+ "','"+currentnum + "','" + confirmed + "','" + suspect +"','" + cured +"','" + dead +"')");JSONArray array2 = jsonObject.getJSONArray("cities");for (int j = 0; j < array2.size(); j++) {com.alibaba.fastjson.JSONObject jsonObject2 = com.alibaba.fastjson.JSONObject.parseObject(array2.getString(j));String provinceName2 = jsonObject.getString("provinceName");String cityname = jsonObject2.getString("cityName");String confirmed2 = jsonObject2.getString("confirmedCount"); String currentnum2 = jsonObject.getString("currentConfirmedCount");String cured2 = jsonObject2.getString("curedCount");String dead2 = jsonObject2.getString("deadCount");String suspect2 = jsonObject2.getString("suspectedCount");stmt.executeUpdate("insert into info3(Date,Province,City,Confirmed_num,Yisi_num,Cured_num,Dead_num) values('"+ nowTime + "','"+ provinceName2 + "','"+ cityname + "','"+currentnum2+ "','" + confirmed2 + "','" + suspect2 +"','" + cured2 +"','" + dead2 +"')");}}stmt.close();conn.close();} catch (SQLException e) {// TODO Auto-generated catch blocke.printStackTrace();}}return result;}/**     * 鑾峰彇鍏ㄧ悆鍚勪釜鍥藉鐨勭‘璇娿�佹浜″拰娌绘剤浜烘暟     * @return     */public static String getListByCountryTypeService2() {String url = "https://ncov.dxy.cn/ncovh5/view/pneumonia";String htmlResult = "";try {htmlResult = httpRequset(url);} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}// System.out.println(htmlResult);
// 姝e垯鑾峰彇鏁版嵁// 鍥犱负html鐨勬暟鎹牸寮忕湅鐫�灏卞儚json鏍煎紡锛屾墍浠ユ垜浠鍒欒幏鍙杍sonString reg = "window.getListByCountryTypeService2true = (.*?)\\}(?=catch)";Pattern totalPattern = Pattern.compile(reg);Matcher totalMatcher = totalPattern.matcher(htmlResult);
String result = "";if (totalMatcher.find()) {result = totalMatcher.group(1);System.out.println(result);// 鍚勪釜鐪佸競鐨勬槸涓�涓垪琛↙ist锛屽鏋滄兂淇濆瓨鍒版暟鎹簱涓紝瑕侀亶鍘嗙粨鏋滐紝涓嬮潰鏄痙emoJSONArray array = JSONArray.parseArray(result);try {Connection conn = BaseConnection.getConn();Statement stmt = conn.createStatement();
Date date = new Date();//鑾峰緱绯荤粺鏃堕棿.SimpleDateFormat sdf =   new SimpleDateFormat( "yyyy-MM-dd hh:mm:ss" );String nowTime = sdf.format(date);
for (int i = 0; i <array.size(); i++) {com.alibaba.fastjson.JSONObject jsonObject = com.alibaba.fastjson.JSONObject.parseObject(array.getString(i));    String continents =jsonObject.getString("continents");    String provinceName = jsonObject.getString("provinceName");String confirmed = jsonObject.getString("confirmedCount");String cured = jsonObject.getString("curedCount");String dead = jsonObject.getString("deadCount");String suspect = jsonObject.getString("suspectedCount");stmt.executeUpdate("insert into info4(Date,Continents,Province,Confirmed_num,Yisi_num,Cured_num,Dead_num) values('"+ nowTime + "','"+ continents + "','"+ provinceName + "','" + confirmed + "','" + suspect +"','" + cured +"','" + dead +"')");}stmt.close();conn.close();} catch (SQLException e) {// TODO Auto-generated catch blocke.printStackTrace();}}return result;}
}

 

推荐阅读