首页 > 解决方案 > 如何将 Python 代码转换为机器人框架

问题描述

我正在尝试使CAPTCHA自动化,我也为此编写了一个 python 代码。现在我被困在一个点上,即我无法通过创建自定义库在机器人框架中调用它。

from PIL import Image
import string
import json
import os
import time
import pytesseract
import cv2
import numpy as np
import re
from tesserocr import PyTessBaseAPI,PSM, OEM
import time
import logging

captcha_url = "http://www.mca.gov.in/mcafoportal/getCapchaImage.do"
regex = re.compile(r'[\n\r\t ]')#special char plus space

def get_captcha2(session):
    res = session.get(captcha_url, timeout = 10)
    with open("a.jpg", "wb") as f: f.write(res.content)
    img = Image.open("a.jpg")
    captcha = pytesseract.image_to_string(img, config='--psm 8 --oem 0 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyz')
    logging.info("cap: %s"%captcha)
    while not (captcha.islower() and captcha.isalpha() and len(captcha) in  [6,7]):
        time.sleep(.05)
        res = session.get(captcha_url)
        with open("a.jpg", "wb") as f: f.write(res.content)
        img = Image.open("a.jpg")
        captcha = pytesseract.image_to_string(img, config='--psm 8')
        logging.info("cap: %s"%captcha)
    return captcha



def get_captcha(req):
    api = PyTessBaseAPI(psm=PSM.SINGLE_WORD, oem = 0)
    api.SetVariable("tessedit_char_whitelist", "abcdefghijklmnopqrstuvwxyz")
    res = req.get(captcha_url, timeout = 10)
    #with open("a.jpg", "wb") as f: f.write(res.content)
    clean_captcha_image(api, res.content)
    captcha = regex.sub("", api.GetUTF8Text())
    conf = api.MeanTextConf()
    cnt = 0
    while (len(captcha) not in  [6,7] or conf<=70) and cnt<=3:
        res = req.get(captcha_url, timeout = 10)
        clean_captcha_image(api, res.content)
        captcha = regex.sub("", api.GetUTF8Text())
        conf = api.MeanTextConf()
        cnt += 1
    return captcha


def break_point(arr):
    for i,n in arr:
        if n:
            break
    return i

def convert_numpy_ipl(trimmed):
    h,w = trimmed.shape
    c = 1
    iplimage = cv.CreateImageHeader((w,h), cv.IPL_DEPTH_8U, c)
    cv.SetData(iplimage, trimmed.tostring(),trimmed.dtype.itemsize * c * (w))
    return iplimage

def clean_captcha_image(api, c_content):
    try:
        arr = np.fromstring(c_content, np.uint8)
        image = cv2.imdecode(arr,0)
        th = cv2.threshold(image,50,255,cv2.THRESH_BINARY|cv2.THRESH_OTSU)[1]

        iplimage = Image.fromarray(th)
        api.SetImage(iplimage)
    except Exception as e:
        print("Unexpected error on clean ",e)


def parse_captcha(filename):
    return pytesseract.image_to_string(Image.open(filename))

if __name__ == "__main__":
    import requests
    session = requests.Session()
    session.headers = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36"}
    get_captcha2(session)

我尝试使用 Robot Framework 自动执行相同的操作,但由于会话超时而失败。我尝试下载图像的那一刻,验证码发生了变化。

标签: pythonseleniumrobotframework

解决方案



推荐阅读