python - 如何将 Python 代码转换为机器人框架
问题描述
我正在尝试使CAPTCHA自动化,我也为此编写了一个 python 代码。现在我被困在一个点上,即我无法通过创建自定义库在机器人框架中调用它。
from PIL import Image
import string
import json
import os
import time
import pytesseract
import cv2
import numpy as np
import re
from tesserocr import PyTessBaseAPI,PSM, OEM
import time
import logging
captcha_url = "http://www.mca.gov.in/mcafoportal/getCapchaImage.do"
regex = re.compile(r'[\n\r\t ]')#special char plus space
def get_captcha2(session):
res = session.get(captcha_url, timeout = 10)
with open("a.jpg", "wb") as f: f.write(res.content)
img = Image.open("a.jpg")
captcha = pytesseract.image_to_string(img, config='--psm 8 --oem 0 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyz')
logging.info("cap: %s"%captcha)
while not (captcha.islower() and captcha.isalpha() and len(captcha) in [6,7]):
time.sleep(.05)
res = session.get(captcha_url)
with open("a.jpg", "wb") as f: f.write(res.content)
img = Image.open("a.jpg")
captcha = pytesseract.image_to_string(img, config='--psm 8')
logging.info("cap: %s"%captcha)
return captcha
def get_captcha(req):
api = PyTessBaseAPI(psm=PSM.SINGLE_WORD, oem = 0)
api.SetVariable("tessedit_char_whitelist", "abcdefghijklmnopqrstuvwxyz")
res = req.get(captcha_url, timeout = 10)
#with open("a.jpg", "wb") as f: f.write(res.content)
clean_captcha_image(api, res.content)
captcha = regex.sub("", api.GetUTF8Text())
conf = api.MeanTextConf()
cnt = 0
while (len(captcha) not in [6,7] or conf<=70) and cnt<=3:
res = req.get(captcha_url, timeout = 10)
clean_captcha_image(api, res.content)
captcha = regex.sub("", api.GetUTF8Text())
conf = api.MeanTextConf()
cnt += 1
return captcha
def break_point(arr):
for i,n in arr:
if n:
break
return i
def convert_numpy_ipl(trimmed):
h,w = trimmed.shape
c = 1
iplimage = cv.CreateImageHeader((w,h), cv.IPL_DEPTH_8U, c)
cv.SetData(iplimage, trimmed.tostring(),trimmed.dtype.itemsize * c * (w))
return iplimage
def clean_captcha_image(api, c_content):
try:
arr = np.fromstring(c_content, np.uint8)
image = cv2.imdecode(arr,0)
th = cv2.threshold(image,50,255,cv2.THRESH_BINARY|cv2.THRESH_OTSU)[1]
iplimage = Image.fromarray(th)
api.SetImage(iplimage)
except Exception as e:
print("Unexpected error on clean ",e)
def parse_captcha(filename):
return pytesseract.image_to_string(Image.open(filename))
if __name__ == "__main__":
import requests
session = requests.Session()
session.headers = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36"}
get_captcha2(session)
我尝试使用 Robot Framework 自动执行相同的操作,但由于会话超时而失败。我尝试下载图像的那一刻,验证码发生了变化。
解决方案
这个例子应该有助于将 Python 代码转换为 Robot Framework 关键字。
推荐阅读
- r - tryCatch 错误处理在闪亮的应用程序中不起作用?
- python - 不同频率时间序列的布尔掩码
- ddev - 启动 DDEV-Local 项目时出现“发现孤儿容器”
- c++ - 程序进入无限循环,但如果删除一些函数或行,它可以工作
- python-3.x - 通过在特定日期范围之间插入其他列来在数据框中创建新列 - Pandas
- mysql - mySQL Left Join 多个值(REPLACE AND SUBSTRING_INDEX FUNCTION 失败)
- html - 将剪切路径定位到 div 角
- c++ - 在 Linux 中编译 C++ 程序时链接 PCL 库
- python - 在 pandas 数据框中的多个观察特征中使用 lambda 替换大于限制的值
- python - 按日期排列的二元组