首页 > 解决方案 > 如何旋转图像以对齐文本以进行提取?

问题描述

我正在使用 pytessearct 从图像中提取文本。但它不适用于倾斜的图像。考虑下面给出的图像:

图片

这是提取文本的代码,它在不倾斜的图像上运行良好。

img = cv2.imread(<path_to_image>)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5),0)
ret3, thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
def findSignificantContours (img, edgeImg):
    contours, heirarchy = cv2.findContours(edgeImg, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
    # Find level 1 contours
    level1 = []
    for i, tupl in enumerate(heirarchy[0]):
        # Each array is in format (Next, Prev, First child, Parent)
        # Filter the ones without parent
        if tupl[3] == -1:
            tupl = np.insert(tupl, 0, [i])
            level1.append(tupl)
    significant = []
    tooSmall = edgeImg.size * 5 / 100 # If contour isn't covering 5% of total area of image then it probably is too small
    for tupl in level1:
        contour = contours[tupl[0]];
        area = cv2.contourArea(contour)
        if area > tooSmall:
            significant.append([contour, area])
        # Draw the contour on the original image
        cv2.drawContours(img, [contour], 0, (0,255,0),2, cv2.LINE_AA, maxLevel=1)

    significant.sort(key=lambda x: x[1])
    #print ([x[1] for x in significant]);
    mx = (0,0,0,0)      # biggest bounding box so far
    mx_area = 0
    for cont in contours:
        x,y,w,h = cv2.boundingRect(cont)
        area = w*h
        if area > mx_area:
            mx = x,y,w,h
            mx_area = area
            x,y,w,h = mx

  # Output to files
    roi = img[y:y+h,x:x+w]
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (5,5),0)
    ret3, thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    cv2_imshow(thresh)
    text = pytesseract.image_to_string(roi); 
    print(text); print("\n"); print(pytesseract.image_to_string(thresh));
    print("\n")
    return [x[0] for x in significant];

edgeImg_8u = np.asarray(thresh, np.uint8)

# Find contours
significant = findSignificantContours(img, edgeImg_8u)
mask = thresh.copy()
mask[mask > 0] = 0
cv2.fillPoly(mask, significant, 255)
# Invert mask
mask = np.logical_not(mask)

#Finally remove the background
img[mask] = 0;

Tesseract 无法从此图像中提取文本。有没有办法可以旋转它以完美对齐文本,然后将其提供给 pytesseract?如果我的问题需要更清楚,请告诉我。

标签: pythonopencvmachine-learningimage-processingcomputer-vision

解决方案


这是一个简单的方法:

  1. 获取二值图像。 加载图像,转换为灰度高斯模糊,然后是Otsu的阈值

  2. 查找轮廓并排序最大轮廓。我们找到轮廓,然后使用轮廓区域过滤cv2.contourArea()以隔离矩形轮廓。

  3. 执行透视变换。接下来我们执行轮廓近似cv2.contourArea()以获得矩形轮廓。最后我们利用imutils.perspective.four_point_transform实际获得图像的鸟瞰图。


二进制图像

结果

要实际提取文本,请查看

  1. 使用 pytesseract OCR 识别图像中的文本

  2. OCR 的清洁图像

  3. 使用python和opencv检测图像中的文本区域

代码

from imutils.perspective import four_point_transform
import cv2
import numpy

# Load image, grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread("1.jpg")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (7,7), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

# Find contours and sort for largest contour
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
displayCnt = None

for c in cnts:
    # Perform contour approximation
    peri = cv2.arcLength(c, True)
    approx = cv2.approxPolyDP(c, 0.02 * peri, True)
    if len(approx) == 4:
        displayCnt = approx
        break

# Obtain birds' eye view of image
warped = four_point_transform(image, displayCnt.reshape(4, 2))

cv2.imshow("thresh", thresh)
cv2.imshow("warped", warped)
cv2.waitKey()

推荐阅读