python - is there a way to able to read text from such images using selenium python
问题描述
https://imgur.com/a/zCmwUEf.jpg
this is the image from whom i am trying to extract text but unable to do so.
import contours
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Users\tan\tesseract\Tesseract-OCR\tesseract.exe'
# Opening the image & storing it in an image object
img = cv2.imread("C:/Users/tan/Desktop/my tppc bots/training challange - Copy/sample4.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, thresh1 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (18, 18))
dilation = cv2.dilate(thresh1, rect_kernel, iterations=1)
contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)
im2 = img.copy()
file = open("recognized.txt", "w+")
file.write("")
file.close()
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
rect = cv2.rectangle(im2, (x, y), (x + w, y + h), (0, 255, 0), 2)
cropped = im2[y:y + h, x:x + w]
file = open("recognized.txt", "a")
text = pytesseract.image_to_string(cropped)
file.write(text)
file.write("\n")
this is my script when i run it, it execute fine but when i open the text file it doesnt show any texts there just empty.
am i doing something wrong?
if someone can help me that be great thanks!
解决方案
我发现easyocr lib在这里很有希望。
导入库
import numpy as np
import easyocr
import cv2
读取图像文件
reader = easyocr.Reader(['en'],gpu = False) # load once only in memory.
image_file_name='capImage.png' # this is the screen snap of your image
image = cv2.imread(image_file_name)
从图像中获取文本
image_text=(reader.readtext(image,detail=0)[0]) # output came as D F7BE1
print(image_text.replace(" ","")) # removed the space and output is : DF7BE1
清理图像选项:
image = cv2.imread(image_file_name)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
sharpen_kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
sharpen = cv2.filter2D(gray, -1, sharpen_kernel)
thresh = cv2.threshold(sharpen, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
height = 100
dim = (800, 800)
resized = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
现在利用图像。
image_text=(reader.readtext(thresh,detail=0)[0])
print(image_text.replace(" ",""))
image_text=(reader.readtext(sharpen,detail=0)[0])
print(image_text.replace(" ",""))
推荐阅读
- python - 在 Python 中使用来自 .env Laravel 的数据
- excel - 使用 Vlookup 进行条件匹配
- android - 我在构建 apk 并安装时有一个关于在颤振中使用 Shared_Preferences 的问题
- android - 如何仅打印 json 映射中值的特定部分?
- java - 平台/框架/基础是如何构建的?AOSP 源
- arrays - AngularJS 计算数组中匹配的次数
- reactjs - React 路由器渲染无效组件然后渲染有效组件
- vba - 如何使用 vba 遍历文档中的所有段落
- eclipse - 如何在 nattable 中按列标题层分组实现文本换行
- mongodb - 如何在 MongoDB Ops Manager 中从故障状态中恢复