首页 > 技术文章 > tpot从elastic search拉攻击数据之三 用于拉取的java程序

zealousness 2018-07-30 17:58 原文

 

package download;

import org.json.JSONArray;

import java.io.*;
import java.net.URL;
import java.net.URLConnection;
import java.nio.Buffer;
import java.text.SimpleDateFormat;
import java.util.*;
import org.json.JSONObject;

public class Downloader {

    public static void main(String[] args) throws IOException {
//        Configer.configProxy();
        System.out.println("爬取完成,条数:"+getresult().size());

    }

    public static String indexstr = "";
    public static Properties p;

    public static Properties loadPropertiesFromFile(String filename) throws IOException {
        Properties p = new Properties();
        InputStream input = Downloader.class.getClassLoader().getResourceAsStream(filename);
        p.load(input);
        return p;
    }

    static {
        try {
            p = loadPropertiesFromFile("downloader.properties");
        } catch (IOException e) {
            System.out.println("downloader.properties读取失败");
            e.printStackTrace();
        }
    }

    public static  InputStream get_whitelist_inputstream(){
        //获取配置文件的inputstream
        ClassLoader classLoader=Downloader.class.getClassLoader();
        InputStream whitelist_inputstream=classLoader.getResourceAsStream(p.getProperty("white_list_file"));
        return whitelist_inputstream;

        //获取配置文件的路径名
//        ClassLoader classLoader=Downloader.class.getClassLoader();
//        URL resource=classLoader.getResource(p.getProperty("white_list_file"));
//        String path=resource.getPath();
    }

    public static String get_whitelist_regex() throws IOException {
        InputStream whitelist_inputstream=get_whitelist_inputstream();

        BufferedReader whitelist_reader=new BufferedReader(new InputStreamReader(whitelist_inputstream));

        String whitelist_regex="";
        String line=null;
        while((line=whitelist_reader.readLine())!=null){
            whitelist_regex+="("+line+")|";
        }
        if(whitelist_regex.length()!=0){
            whitelist_regex=whitelist_regex.substring(0,whitelist_regex.length()-1);
        }


        whitelist_inputstream.close();
        whitelist_reader.close();

        return whitelist_regex;

    }

    public static String post(String url, String param, Map<String, String> header) throws IOException {
        PrintWriter out = null;
        BufferedReader in = null;
        String result = "";
        URL realUrl = new URL(url);
        // 打开和URL之间的连接
        URLConnection conn = realUrl.openConnection();
        //设置超时时间
        conn.setConnectTimeout(5000);
        conn.setReadTimeout(15000);
        // 设置通用的请求属性
        if (header != null) {
            for (Map.Entry<String, String> entry : header.entrySet()) {
                conn.setRequestProperty(entry.getKey(), entry.getValue());
            }
        }
        conn.setRequestProperty("accept", "*/*");
        conn.setRequestProperty("connection", "Keep-Alive");
        conn.setRequestProperty("user-agent",
                "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)");


        // 发送POST请求必须设置如下两行
        conn.setDoOutput(true);
        conn.setDoInput(true);
        // 获取URLConnection对象对应的输出流
        out = new PrintWriter(conn.getOutputStream());
        // 发送请求参数
        out.print(param);
        // flush输出流的缓冲
        out.flush();
        // 定义BufferedReader输入流来读取URL的响应
        in = new BufferedReader(
                new InputStreamReader(conn.getInputStream(), "utf8"));
        String line;
        while ((line = in.readLine()) != null) {
            result += line;
        }
        if (out != null) {
            out.close();
        }
        if (in != null) {
            in.close();
        }
        return result;
    }

    public static String get(String url) throws IOException {
        BufferedReader in = null;

        URL realUrl = new URL(url);
        // 打开和URL之间的连接
        URLConnection connection = realUrl.openConnection();
        // 设置通用的请求属性
        connection.setRequestProperty("accept", "*/*");
        connection.setRequestProperty("connection", "Keep-Alive");
        connection.setRequestProperty("user-agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)");
        connection.setConnectTimeout(5000);
        connection.setReadTimeout(5000);


        // 建立实际的连接
        connection.connect();
        // 定义 BufferedReader输入流来读取URL的响应
        in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
        StringBuffer sb = new StringBuffer();
        String line;
        while ((line = in.readLine()) != null) {
            sb.append(line);
        }

        in.close();
        return sb.toString();
    }

    public static void getIndexStr() {
        indexstr = "logstash-" + new SimpleDateFormat("yyyy.MM.dd").format(new Date());
        //indexstr = "tpot_test";//for test
    }

    public static Set<String> getAttackTypeSet() throws IOException {

        getIndexStr();

        String attacktypeurl = p.getProperty("els.host") + "/" + indexstr + "/" + "_mapping?pretty=true";
        System.out.println("【getting all types today】>>" + attacktypeurl);
        String attacktyperesult = get(attacktypeurl);

        //parse json
        JSONObject jobj1 = new JSONObject(attacktyperesult);
        JSONObject jobj2 = jobj1.getJSONObject(indexstr);
        JSONObject jobj3 = jobj2.getJSONObject("mappings");


        return jobj3.keySet();
    }

    public static LinkedList<NearRealtimeIntelligence> getresult() throws IOException {
        LinkedList<NearRealtimeIntelligence> result = new LinkedList<NearRealtimeIntelligence>();
        Set<String> attacktypeset = getAttackTypeSet();

        String param = "{\n" +
                "  \"query\": {\n" +
                "    \"bool\": {\n" +
                "      \"must_not\": [\n" +
                "        {\n" +
                "          \"regexp\":{\n" +
                "            \"src_ip\":\"" + get_whitelist_regex() + "\"\n" +
                "          }\n" +
                "        }\n" +
                "      ]\n" +
                "    }\n" +
                "  },\"size\":" + p.getProperty("els.batch_size") + "\n" +
                "}";

        for (String attacktype : attacktypeset) {

            //忽略default、syslog两个type
            if (attacktype.equals("_default_") || attacktype.equals("Syslog")) {
                continue;
            }

            System.out.println("【getting "+attacktype+" data】");
            String req = p.getProperty("els.host") + "/" + indexstr + "/" + attacktype + "/_search?scroll=" + p.getProperty("scroll_timegap");
            System.out.println("posting url>>" + req);
            String res = post(req, param, null);
            //parse json
            JSONObject res_json = new JSONObject(res);
            JSONObject all_hits = res_json.getJSONObject("hits");
            JSONArray docu_array = all_hits.getJSONArray("hits");

            int total = all_hits.getInt("total");
            int pages = (int) Math.ceil(total / Double.parseDouble(p.getProperty("els.batch_size")));
            System.out.println("数据条数:"+total + " 页数:" + pages);
            String scroll_id = res_json.getString("_scroll_id");

//            System.out.println("######################################batch0");
            for (int j = 0; j < docu_array.length(); j++) {
                JSONObject docu = (JSONObject) docu_array.get(j);
                JSONObject source = docu.getJSONObject("_source");
                if (source.has("src_ip")) {
                    String src_ip = source.getString("src_ip");
                    System.out.println(src_ip);
                    NearRealtimeIntelligence adata=new NearRealtimeIntelligence();
                    adata.setName(src_ip);
                    adata.setSourceName(attacktype);
                    result.add(adata);
                }
            }

            for (int i = 1; i < pages; i++) {
//                System.out.println("######################################batch" + i);
                req = p.getProperty("els.host") + "/_search/scroll";
//                System.out.println("posting url>>" + req);
                String param_scroll = "{\n" +
                        "  \"scroll\":\"" + p.getProperty("scroll_timegap") + "\",\n" +
                        "  \"scroll_id\":\"" + scroll_id + "\"\n" +
                        "}";
                res = post(req, param_scroll, null);
                //parse json
                res_json = new JSONObject(res);
                all_hits = res_json.getJSONObject("hits");
                docu_array = all_hits.getJSONArray("hits");

                for (int j = 0; j < docu_array.length(); j++) {
                    JSONObject docu = (JSONObject) docu_array.get(j);
                    JSONObject source = docu.getJSONObject("_source");
                    if (source.has("src_ip")) {
                        String src_ip = source.getString("src_ip");
//                        System.out.println(src_ip);
                        NearRealtimeIntelligence adata=new NearRealtimeIntelligence();
                        adata.setName(src_ip);
                        adata.setSourceName(attacktype);
                        result.add(adata);
                    }
                }
            }
        }

        return result;
    }
}

 

拉取过程中,注意:

1、请求参数中过滤掉白名单+设置大小分页读取

url: http://xxx.xxx.xxx.xxx:8000/logstash-2018.07.30/Honeytrap/_search?scroll=3m
String param = "{\n" + " \"query\": {\n" + " \"bool\": {\n" + " \"must_not\": [\n" + " {\n" + " \"regexp\":{\n" + " \"src_ip\":\"" + get_whitelist_regex() + "\"\n" + " }\n" + " }\n" + " ]\n" + " }\n" + " },\"size\":" + p.getProperty("els.batch_size") + "\n" + "}";

2、读取文件

获得inputstream

ClassLoader classLoader=Downloader.class.getClassLoader();
InputStream whitelist_inputstream=classLoader.getResourceAsStream(p.getProperty("white_list_file"));

使用inputstream按行读

BufferedReader whitelist_reader=new BufferedReader(new InputStreamReader(whitelist_inputstream));

String line=null;
while((line=whitelist_reader.readLine())!=null){
}

 

3、读取文件

Properties p = new Properties();
InputStream input = Downloader.class.getClassLoader().getResourceAsStream(filename);
p.load(input);

 

4、解析json字符串

JSONObject res_json = new JSONObject(res);
JSONObject all_hits = res_json.getJSONObject("hits");
JSONArray docu_array = all_hits.getJSONArray("hits");

 

推荐阅读