首页 > 解决方案 > 如何使用 tesseract 从相机中可靠地提取文本

问题描述

我正在尝试从拍摄手机屏幕照片的相机中可靠地提取文本,更具体地说是数字及其边界框。但是,我得到的结果好坏参半,通常无法找到数字,或者错误地将 5 检测为 9。我知道预处理对于 tesseract 很重要,但任何其他改进这一点的技巧都会很好。测试2.png

import re
import cv2
import numpy as np
import pytesseract
from pytesseract import Output
#from matplotlib import pyplot as plt

src = cv2.imread('test2.png')
image = cv2.rotate(src, cv2.cv2.ROTATE_90_CLOCKWISE)
dim = (1920, 2560)
imager = cv2.resize(image, dim, interpolation=cv2.INTER_LINEAR)


# get grayscale image
def get_grayscale(image):
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# noise removal
def remove_noise(image):
    return cv2.medianBlur(image, 5)

# thresholding
def thresholding(image):
    return cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + 
    cv2.THRESH_OTSU)[1]

gray = get_grayscale(imager)
#nois = remove_noise(gray)
thresh = thresholding(gray)

#invert to get black text on white background
gray = cv2.bitwise_not(thresh)

img = gray

word = '0123456789'

d = pytesseract.image_to_data(img, 
output_type=Output.DICT,config="-c 
tessedit_char_whitelist="+word)
keys = list(d.keys())

print (d)

words = ['1','2','3','4','5','6','7','8','9','0','95.00']
i = 0

for text in d['text']:
    try:
        words.index(text)
        (x, y, w, h) = (d['left'][i], d['top'][i], d['width'] 
        [i], d['height'][i])
        img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
    except:
        i=i
    i = i+1
#center_X = x + w/2
#center_Y = y + h/2
#print(center_X, center_Y)

dim = (480, 640)
imager = cv2.resize(img, dim)
cv2.imshow('img', imager)
cv2.waitKey(0)

标签: ocrtesseractpython-tesseract

解决方案


推荐阅读