首页 > 解决方案 > 仅识别图像中的文本区域

问题描述

我正在开发一个 python 脚本来识别图像中的文本。到目前为止,我已经能够识别图像中分离良好的文本。

在职的:

在职的

分离轮廓:

分离的轮廓

但如果文本区域周围有物体,则无法识别。

失败的:

失败的

当对象的边缘与文本边缘连接时,它被识别为一个轮廓。

连接轮廓:

连接轮廓

我只想检测文本区域的边缘。

我还需要一种方法来获取文本区域的二值化图像(黑色背景中的白色文本)以用于字符分割步骤。目前我正在使用基于边缘像素颜色的反转方法,但它并不总是有效

# read image
img_no = '197'
rgb = cv2.imread(img_no + '.jpg')
# cv2.imshow('original', rgb)

# convert image to grayscale
gray = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)

# bilateral filter
blur = cv2.bilateralFilter(gray, 5, 75, 75)
# cv2.imshow('blur', blur)

# morphological gradient calculation
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
grad = cv2.morphologyEx(blur, cv2.MORPH_GRADIENT, kernel)
# cv2.imshow('gradient', grad)

# binarization
_, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
cv2.imshow('otsu', bw)

# closing
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (6, 1))
closed = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, kernel)
cv2.imshow('closed', closed)

black_bg = np.zeros_like(closed)
black_bg_copy = np.zeros_like(gray)

# finding contours
contours, hierarchy = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

mask = np.zeros(closed.shape, dtype=np.uint8)

for idx in range(len(contours)):
    x, y, w, h = cv2.boundingRect(contours[idx])
    mask[y:y + h, x:x + w] = 0
    area = cv2.contourArea(contours[idx])
    aspect_ratio = float(w) / h
    roi = closed[y:y + h, x:x + w]
    selected = gray.copy()[y:y + h, x:x + w]
    cv2.drawContours(mask, contours, idx, (255, 255, 255), -1)
    r = float(cv2.countNonZero(mask[y:y + h, x:x + w])) / (w * h)

    # identify region of interest
    if r > 0.34 and 0.52 < aspect_ratio < 10 and area > 145.0:
        cv2.rectangle(rgb, (x, y), (x + w - 1, y + h - 1), (0, 255, 0), 2)
        black_bg[y:y + h, x:x + w] = roi
        _, bw_final = cv2.threshold(selected, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
        print("top" + str(bw_final[0, 0]))
        print("bot" + str(bw_final[h - 1, w - 1]))
        #cv2.imshow('black', bw_final)
        if bw_final[0, 0] == 0 or bw_final[h - 1, w - 1] == 0:
            black_bg_copy[y:y + h, x:x + w] = bw_final
        else:
            inverted = cv2.bitwise_not(bw_final)
            black_bg_copy[y:y + h, x:x + w] = inverted

cv2.imshow('rectangles', rgb)
# cv2.imshow('result', black_bg)
cv2.imshow('output', black_bg_copy)

标签: pythonopencv

解决方案


推荐阅读