首页 > 技术文章 > java分别通过httpclient和HttpURLConnection获取图片验证码内容

qiaoyeye 2015-11-09 12:20 原文

前面的文章,介绍了如何通过selenium+Tesseract-OCR来识别图片验证码,如果用接口来访问的话,再用selenium就闲的笨重,下面就介绍一下分别通过httpclient和HttpURLConnection,用流的方式获取图片验证码内容。

1.通过HttpURLConnection

package com.imgyzm;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;

import org.openqa.selenium.io.FileHandler;

/** 
 * @author QiaoJiafei 
 * @version 创建时间:2015年11月9日 上午11:31:14 
 * 类说明 
 */
public class GetYZMByURL {
    public static void main(String[] args) throws Exception {
        getYzm();
    }
    
    public static String getYzm() {
        //new一个URL对象
        URL url;
        String s="";
        try {
            url = new URL("http://172.16.30.226:8099/bms/checkcode.do?0.9858807739801705");
            HttpURLConnection conn = (HttpURLConnection)url.openConnection();
            //设置请求方式为"GET"
            conn.setRequestMethod("GET");
            //超时响应时间为5秒
            conn.setConnectTimeout(5 * 1000);
            //通过输入流获取图片数据
            InputStream inStream = conn.getInputStream();
            //得到图片的二进制数据,以二进制封装得到数据,具有通用性
            byte[] data = readInputStream(inStream);
            //new一个文件对象用来保存图片,默认保存当前工程根目录
            File imageFile = new File("D:/BeautyGirl.jpg");
            //创建输出流
            FileOutputStream outStream = new FileOutputStream(imageFile);
            //写入数据
            outStream.write(data);
            //关闭输出流
            outStream.close();
            
            Runtime rt = Runtime.getRuntime();
            rt.exec("cmd.exe /C  tesseract.exe D:\\BeautyGirl.jpg  D:\\ddd\\yzm -1 ");
            Thread.sleep(1000);
            File file = new File("D:\\ddd\\yzm.txt");
            if(file.exists()) {
                FileHandler fh = new FileHandler();
                s = fh.readAsString(file).trim();
                System.out.println("========="+s);
            } else {
                System.out.print("yzm.txt不存在");
            }
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        return s;
        //打开链接
        
    }
    public static byte[] readInputStream(InputStream inStream) throws Exception{
        ByteArrayOutputStream outStream = new ByteArrayOutputStream();
        //创建一个Buffer字符串
        byte[] buffer = new byte[1024];
        //每次读取的字符串长度,如果为-1,代表全部读取完毕
        int len = 0;
        //使用一个输入流从buffer里把数据读取出来
        while( (len=inStream.read(buffer)) != -1 ){
            //用输出流往buffer里写入数据,中间参数代表从哪个位置开始读,len代表读取的长度
            outStream.write(buffer, 0, len);
        }
        //关闭输入流
        inStream.close();
        //把outStream里的数据写入内存
        return outStream.toByteArray();
    }
}

2.通过HttpClient

package com.imgyzm;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.PoolingClientConnectionManager;
import org.apache.http.util.EntityUtils;
import org.openqa.selenium.io.FileHandler;

/** 
 * @author QiaoJiafei 
 * @version 创建时间:2015年11月9日 上午10:53:11 
 * 类说明 
 */
public class GetYZMByHttpClient {
    public static void main(String args[]) throws Exception {
        
        String s="";
        HttpClient httpclient = new DefaultHttpClient(new PoolingClientConnectionManager());
        String imgurl = "http://172.16.30.226:8099/bms/checkcode.do?0.9858807739801705";
        HttpGet ht = new HttpGet(imgurl);
        HttpResponse response = null;
        response = httpclient.execute(ht);
        HttpEntity entity = response.getEntity();
        InputStream inStream = entity.getContent();
        byte[] data = readInputStream(inStream);
        //new一个文件对象用来保存图片,默认保存当前工程根目录
        File imageFile = new File("D:/yzm.jpg");
        //创建输出流
        FileOutputStream outStream = new FileOutputStream(imageFile);
        //写入数据
        outStream.write(data);
        //关闭输出流
        outStream.close();
        
        Runtime rt = Runtime.getRuntime();
        rt.exec("cmd.exe /C  tesseract.exe D:\\yzm.jpg  D:\\ddd\\yzm -1 ");
        Thread.sleep(1000);
        File file = new File("D:\\ddd\\yzm.txt");
        if(file.exists()) {
            FileHandler fh = new FileHandler();
            s = fh.readAsString(file).trim();
            System.out.println("========="+s);
        } else {
            System.out.print("yzm.txt不存在");
        }
        
        
        /*===========下面是登录接口==========*/
        String url = "http://172.16.30.226:8099/bms/staff/login.do?account=admin123&checkcode="+s+"&pwd=aaaaaa1";
        System.out.println("url=========="+url);
        HttpPost httppost = new HttpPost(url);
        response = httpclient.execute(httppost);
        entity = response.getEntity();
        s = EntityUtils.toString(entity, "UTF-8");
        System.out.println(s);
        
    //打开链接
    }
    public static byte[] readInputStream(InputStream inStream) throws Exception{
        ByteArrayOutputStream outStream = new ByteArrayOutputStream();
        //创建一个Buffer字符串
        byte[] buffer = new byte[1024];
        //每次读取的字符串长度,如果为-1,代表全部读取完毕
        int len = 0;
        //使用一个输入流从buffer里把数据读取出来
        while( (len=inStream.read(buffer)) != -1 ){
            //用输出流往buffer里写入数据,中间参数代表从哪个位置开始读,len代表读取的长度
            outStream.write(buffer, 0, len);
        }
        //关闭输入流
        inStream.close();
        //把outStream里的数据写入内存
        return outStream.toByteArray();
    }
}

那么这两种方式有什么区别呢,通过测试,使用HttpURLConnection获取验证码,再被其它接口调用的时候,该验证码已经失效了。而httpclient,只要保证程序接口调用和获取验证码用的是同一个httpclient,获取到的验证码,再被其它接口调用,该验证码仍生效。

保存图片也可以用下面的方法,更简便一些

response = client.execute(httpget);
            entity = response.getEntity();
            InputStream  in = entity.getContent();
            BufferedImage input = ImageIO.read(in);
            ImageIO.write(input, "jpg", new File("D:/11.jpg"));   

 

 

HTPPCLIENT API:http://hc.apache.org/httpcomponents-client-ga/httpclient/apidocs/

推荐阅读