首页 > 解决方案 > 如何从图像中读取所有文本?

问题描述

我正在使用 pytesseract 从图像中读取文本。图表.

我面临以下挑战:

image_to_data 能够转换一些,但大部分都不正确。我附上下面的代码

fn = 'testimgnum.png'
image = cv2.imread(fn)

gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (5, 5), 0)    
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]

'''
gray = cv2.Canny(gray, 100, 200)
gray = cv2.dilate(gray, None, iterations=1)
gray = cv2.erode(gray, None, iterations=1)
'''

cv2.imwrite('nt1.png', gray)

cnts = cv2.findContours(gray.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)

(cnts, _) = contours.sort_contours(cnts)
orig = gray.copy()
arr_values = []
ctr = 0
#filter the boxes
custom_config = r'--oem 3 --psm 10000'
k = 0
for c in cnts:
    k=k+1
    #print("c:",c)
    try:
        text_val=''
        box = cv2.minAreaRect(c)
        box = cv2.cv.BoxPoints(box) if imutils.is_cv2() else cv2.boxPoints(box)
        box = np.array(box, dtype="int")
        tl, tr, br, bl = box
        #print(box)
        width = abs(br[0]-tl[0])
        height = abs(tl[1]-br[1])
        x1= min(tl[0],br[0])
        x2= max(tl[0],br[0])
        y1=max(tl[1],br[1])
        y2 = min(tl[1],br[1])
        print(x1,x2,y1,y2, height, width)    
        crop_img = gray[y2-6:y1+6, x1-4:x2+4]
        cv2.imwrite('{0}.png'.format(k), crop_img)

        cv2.imshow("Image", crop_img)
        cv2.waitKey(0)
        #crop_img1 = cv2.resize(crop_img, None, fx=6, fy=6, interpolation=cv2.INTER_CUBIC)
        text_val = pytesseract.image_to_string(crop_img, lang='eng',config='--oem 3 -c tessedit_char_whitelist=0123456789')#, config=custom_config,
        print("text_val:", text_val)
        arr_values.append([x1,x2,y1,y2,width,height,text_val])

    except Exception as e:
        print("e:", e)
        ctr+=1
        pass

我正在使用 tesseract 4 和 python 2.7。我有一个类似问题的工作模型。我使用的是旧版本的 tesseract。

标签: image-recognitionpython-tesseract

解决方案


这就是我的方法:

  1. 屏蔽图像中的白色部分以查找数字,并扩大以合并单个数字。
  2. 查找轮廓,并获得边界矩形。
  3. 使用HSV转换的 ROI 中的饱和度通道作为实际 OCR,也使用该-psm 6选项。

那将是完整的代码:

import cv2
import pytesseract

# Read image
img = cv2.imread('KbZMp.png')

# Mask white-ish parts
mask = (img > 248).all(axis=2).astype(img.dtype) * 255
mask = cv2.dilate(mask, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (11, 11)))

# Find contours w.r.t. the OpenCV version; retrieve bounding rectangles
cnts = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
rects = [cv2.boundingRect(cnt) for cnt in cnts]

# Just for visualization purposes
out = img.copy()

# Iterate bounding rectangles, and OCR
for x, y, w, h in rects:

    # Use saturation channel of ROI for actual OCR
    sat = cv2.cvtColor(img[y:y+h, x:x+w], cv2.COLOR_BGR2HSV)[..., 1]

    # Pytesseract with -psm 6
    text = pytesseract.image_to_string(sat, config='--psm 6')\
        .replace('\n', '').replace('\f', '')

    # Just for visualization purposes
    out = cv2.rectangle(out, (x, y), (x+w, y+h), (0, 255, 0), 2)
    out = cv2.putText(out, text, (x, y-5),
                      cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255))

# Just for visualization purposes
cv2.imshow('out', out)
cv2.waitKey(0)

而且,这是结果的可视化:

输出

警告:我使用曼海姆大学图书馆的特殊版本的 Tesseract 。

----------------------------------------
System information
----------------------------------------
Platform:      Windows-10-10.0.19041-SP0
Python:        3.9.1
PyCharm:       2021.1.1
OpenCV:        4.5.2
pytesseract:   5.0.0-alpha.20201127
----------------------------------------

推荐阅读