首页 > 解决方案 > 如何使用 GCP Vision 或 OpenCV 提取图像中的自动和手写文本

问题描述

我编写了一段代码,其中自动文本和书面文本出现在图像中。GCP 视觉 API 无法给我正确的响应。任何人都可以建议我解决任何问题吗?

这是我的形象

在此处输入图像描述

这是我的代码:

import os
import io
from google.cloud import vision
from google.cloud.vision import types
import pandas as pd

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = r'key.json'
client = vision.ImageAnnotatorClient()

IMAGE_FILE = 'datasetone.png'
FILE_PATH = os.path.join(IMAGE_FILE)

with io.open(FILE_PATH, 'rb') as image_file:
    content = image_file.read()

image = vision.types.Image(content=content)
response = client.document_text_detection(image=image)

docText = response.full_text_annotation.text
print(docText)


pages = response.full_text_annotation.pages
for page in pages:
    for block in page.blocks:
        print('block confidence:', block.confidence)

        for paragraph in block.paragraphs:
            print('paragraph confidence:', paragraph.confidence)

            for word in paragraph.words:
                word_text = ''.join([symbol.text for symbol in word.symbols])

                print('Word text: {0} (confidence: {1}'.format(
                    word_text, word.confidence))

                for symbol in word.symbols:
                    print('\tSymbol: {0} (confidence: {1}'.format(
                        symbol.text, symbol.confidence))

输出是

Worksheet
Car number: - 1002110021
Data:- Endter
Shell
Qilchange
Phone: 982838238823
email:xyz@xyz.com
name
pod
pos
Technician
name
desc
code
112121223 Benz
10-10-19
10-10-20
Jhondoe
nil
12244334
Baleno
20-10-10
| 20-20-10
Machel
nil
D901.27
'RIOR

block confidence: 0.9800000190734863
paragraph confidence: 0.9800000190734863
Word text: Worksheet (confidence: 0.9900000095367432
        Symbol: W (confidence: 0.9900000095367432
        Symbol: o (confidence: 0.9900000095367432
        Symbol: r (confidence: 1.0
        Symbol: k (confidence: 0.9900000095367432
        Symbol: s (confidence: 1.0
        Symbol: h (confidence: 1.0
        Symbol: e (confidence: 0.9900000095367432
        Symbol: e (confidence: 1.0
        Symbol: t (confidence: 1.0
Word text: Car (confidence: 0.9800000190734863
        Symbol: C (confidence: 0.9800000190734863
        Symbol: a (confidence: 0.9900000095367432
        Symbol: r (confidence: 0.9900000095367432
Word text: number (confidence: 0.9800000190734863
        Symbol: n (confidence: 0.9599999785423279
        Symbol: u (confidence: 0.9800000190734863
        Symbol: m (confidence: 0.9800000190734863
        Symbol: b (confidence: 0.9800000190734863
        Symbol: e (confidence: 1.0
        Symbol: r (confidence: 1.0
Word text: : (confidence: 0.9900000095367432
        Symbol: : (confidence: 0.9900000095367432
Word text: - (confidence: 0.949999988079071
        Symbol: - (confidence: 0.949999988079071
Word text: 1002110021 (confidence: 0.9700000286102295
        Symbol: 1 (confidence: 0.9599999785423279
        Symbol: 0 (confidence: 0.9800000190734863
        Symbol: 0 (confidence: 0.9800000190734863
        Symbol: 2 (confidence: 0.9399999976158142
        Symbol: 1 (confidence: 0.9599999785423279
        Symbol: 1 (confidence: 0.9900000095367432
        Symbol: 0 (confidence: 0.9599999785423279
        Symbol: 0 (confidence: 0.9900000095367432
        Symbol: 2 (confidence: 0.9900000095367432
        Symbol: 1 (confidence: 0.9900000095367432
Word text: Data (confidence: 0.9900000095367432
        Symbol: D (confidence: 0.9900000095367432
        Symbol: a (confidence: 0.9900000095367432
        Symbol: t (confidence: 0.9900000095367432
        Symbol: a (confidence: 1.0
Word text: : (confidence: 0.9900000095367432
        Symbol: : (confidence: 0.9900000095367432
Word text: - (confidence: 0.9900000095367432
        Symbol: - (confidence: 0.9900000095367432
Word text: Endter (confidence: 0.9900000095367432
        Symbol: E (confidence: 0.9900000095367432
        Symbol: n (confidence: 1.0
        Symbol: d (confidence: 0.9900000095367432
        Symbol: t (confidence: 0.9900000095367432
        Symbol: e (confidence: 1.0
        Symbol: r (confidence: 1.0
block confidence: 0.9700000286102295
paragraph confidence: 0.9700000286102295
Word text: Shell (confidence: 0.9900000095367432
        Symbol: S (confidence: 0.9900000095367432
        Symbol: h (confidence: 0.9900000095367432
        Symbol: e (confidence: 0.9900000095367432
        Symbol: l (confidence: 1.0
        Symbol: l (confidence: 1.0
Word text: Qilchange (confidence: 0.8999999761581421
        Symbol: Q (confidence: 0.6100000143051147
        Symbol: i (confidence: 0.9100000262260437
        Symbol: l (confidence: 0.9100000262260437
        Symbol: c (confidence: 0.8500000238418579
        Symbol: h (confidence: 0.9200000166893005
        Symbol: a (confidence: 0.9900000095367432
        Symbol: n (confidence: 0.9900000095367432
        Symbol: g (confidence: 1.0
        Symbol: e (confidence: 1.0
Word text: Phone (confidence: 0.9900000095367432
        Symbol: P (confidence: 0.9900000095367432
        Symbol: h (confidence: 0.9900000095367432
        Symbol: o (confidence: 1.0
        Symbol: n (confidence: 1.0
        Symbol: e (confidence: 1.0
Word text: : (confidence: 1.0
        Symbol: : (confidence: 1.0
Word text: 982838238823 (confidence: 0.9800000190734863
        Symbol: 9 (confidence: 0.9599999785423279
        Symbol: 8 (confidence: 0.9900000095367432
        Symbol: 2 (confidence: 0.9900000095367432
        Symbol: 8 (confidence: 0.9900000095367432
        Symbol: 3 (confidence: 0.9900000095367432
        Symbol: 8 (confidence: 0.9900000095367432
        Symbol: 2 (confidence: 0.9900000095367432
        Symbol: 3 (confidence: 0.9900000095367432
        Symbol: 8 (confidence: 0.9900000095367432
        Symbol: 8 (confidence: 0.9900000095367432
        Symbol: 2 (confidence: 0.9900000095367432
        Symbol: 3 (confidence: 0.9900000095367432
Word text: email (confidence: 0.9900000095367432
        Symbol: e (confidence: 0.9900000095367432
        Symbol: m (confidence: 0.9900000095367432
        Symbol: a (confidence: 0.9900000095367432
        Symbol: i (confidence: 1.0
        Symbol: l (confidence: 0.9900000095367432
Word text: : (confidence: 0.9900000095367432
        Symbol: : (confidence: 0.9900000095367432
Word text: xyz (confidence: 0.9900000095367432
        Symbol: x (confidence: 0.9900000095367432
        Symbol: y (confidence: 0.9900000095367432
        Symbol: z (confidence: 0.9900000095367432
Word text: @ (confidence: 0.9900000095367432
        Symbol: @ (confidence: 0.9900000095367432
Word text: xyz (confidence: 0.9700000286102295
        Symbol: x (confidence: 0.949999988079071
        Symbol: y (confidence: 0.9900000095367432
        Symbol: z (confidence: 0.9900000095367432
Word text: . (confidence: 0.9900000095367432
        Symbol: . (confidence: 0.9900000095367432
Word text: com (confidence: 0.9900000095367432
        Symbol: c (confidence: 0.9900000095367432
        Symbol: o (confidence: 1.0
        Symbol: m (confidence: 1.0
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: name (confidence: 0.9900000095367432
        Symbol: n (confidence: 0.9900000095367432
        Symbol: a (confidence: 1.0
        Symbol: m (confidence: 0.9900000095367432
        Symbol: e (confidence: 1.0
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: pod (confidence: 0.9900000095367432
        Symbol: p (confidence: 0.9900000095367432
        Symbol: o (confidence: 1.0
        Symbol: d (confidence: 0.9900000095367432
block confidence: 0.5699999928474426
paragraph confidence: 0.5699999928474426
Word text: pos (confidence: 0.5699999928474426
        Symbol: p (confidence: 0.20000000298023224
        Symbol: o (confidence: 0.9900000095367432
        Symbol: s (confidence: 0.5400000214576721
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: Technician (confidence: 0.9900000095367432
        Symbol: T (confidence: 0.9900000095367432
        Symbol: e (confidence: 1.0
        Symbol: c (confidence: 0.9900000095367432
        Symbol: h (confidence: 1.0
        Symbol: n (confidence: 1.0
        Symbol: i (confidence: 1.0
        Symbol: c (confidence: 1.0
        Symbol: i (confidence: 0.9900000095367432
        Symbol: a (confidence: 1.0
        Symbol: n (confidence: 1.0
Word text: name (confidence: 0.9900000095367432
        Symbol: n (confidence: 0.9900000095367432
        Symbol: a (confidence: 1.0
        Symbol: m (confidence: 0.9900000095367432
        Symbol: e (confidence: 1.0
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: desc (confidence: 0.9900000095367432
        Symbol: d (confidence: 0.9900000095367432
        Symbol: e (confidence: 1.0
        Symbol: s (confidence: 1.0
        Symbol: c (confidence: 1.0
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: code (confidence: 0.9900000095367432
        Symbol: c (confidence: 0.9900000095367432
        Symbol: o (confidence: 1.0
        Symbol: d (confidence: 0.9900000095367432
        Symbol: e (confidence: 1.0
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: 112121223 (confidence: 0.9900000095367432
        Symbol: 1 (confidence: 0.9900000095367432
        Symbol: 1 (confidence: 1.0
        Symbol: 2 (confidence: 0.9900000095367432
        Symbol: 1 (confidence: 0.9900000095367432
        Symbol: 2 (confidence: 0.9900000095367432
        Symbol: 1 (confidence: 0.9900000095367432
        Symbol: 2 (confidence: 1.0
        Symbol: 2 (confidence: 0.9900000095367432
        Symbol: 3 (confidence: 0.9900000095367432
Word text: Benz (confidence: 0.9900000095367432
        Symbol: B (confidence: 0.9900000095367432
        Symbol: e (confidence: 0.9900000095367432
        Symbol: n (confidence: 0.9900000095367432
        Symbol: z (confidence: 0.9900000095367432
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: 10 (confidence: 0.9900000095367432
        Symbol: 1 (confidence: 0.9900000095367432
        Symbol: 0 (confidence: 0.9900000095367432
Word text: - (confidence: 0.9900000095367432
        Symbol: - (confidence: 0.9900000095367432
Word text: 10 (confidence: 0.9900000095367432
        Symbol: 1 (confidence: 1.0
        Symbol: 0 (confidence: 0.9900000095367432
Word text: - (confidence: 0.9900000095367432
        Symbol: - (confidence: 0.9900000095367432
Word text: 19 (confidence: 0.9900000095367432
        Symbol: 1 (confidence: 0.9900000095367432
        Symbol: 9 (confidence: 0.9900000095367432
block confidence: 0.9800000190734863
paragraph confidence: 0.9800000190734863
Word text: 10 (confidence: 0.9900000095367432
        Symbol: 1 (confidence: 0.9900000095367432
        Symbol: 0 (confidence: 0.9900000095367432
Word text: - (confidence: 0.9900000095367432
        Symbol: - (confidence: 0.9900000095367432
Word text: 10 (confidence: 0.9900000095367432
        Symbol: 1 (confidence: 0.9900000095367432
        Symbol: 0 (confidence: 0.9900000095367432
Word text: - (confidence: 0.9900000095367432
        Symbol: - (confidence: 0.9900000095367432
Word text: 20 (confidence: 0.9800000190734863
        Symbol: 2 (confidence: 0.9900000095367432
        Symbol: 0 (confidence: 0.9800000190734863
block confidence: 0.9599999785423279
paragraph confidence: 0.9599999785423279
Word text: Jhondoe (confidence: 0.9599999785423279
        Symbol: J (confidence: 0.8100000023841858
        Symbol: h (confidence: 0.9900000095367432
        Symbol: o (confidence: 1.0
        Symbol: n (confidence: 0.9900000095367432
        Symbol: d (confidence: 0.9900000095367432
        Symbol: o (confidence: 0.9900000095367432
        Symbol: e (confidence: 0.9800000190734863
block confidence: 0.9700000286102295
paragraph confidence: 0.9700000286102295
Word text: nil (confidence: 0.9700000286102295
        Symbol: n (confidence: 0.9900000095367432
        Symbol: i (confidence: 0.9900000095367432
        Symbol: l (confidence: 0.9399999976158142
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: 12244334 (confidence: 0.9900000095367432
        Symbol: 1 (confidence: 0.9900000095367432
        Symbol: 2 (confidence: 1.0
        Symbol: 2 (confidence: 0.9900000095367432
        Symbol: 4 (confidence: 0.9900000095367432
        Symbol: 4 (confidence: 0.9900000095367432
        Symbol: 3 (confidence: 0.9900000095367432
        Symbol: 3 (confidence: 0.9900000095367432
        Symbol: 4 (confidence: 0.9900000095367432
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: Baleno (confidence: 0.9900000095367432
        Symbol: B (confidence: 0.9900000095367432
        Symbol: a (confidence: 1.0
        Symbol: l (confidence: 0.9900000095367432
        Symbol: e (confidence: 0.9900000095367432
        Symbol: n (confidence: 0.9900000095367432
        Symbol: o (confidence: 0.9900000095367432
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: 20 (confidence: 0.9900000095367432
        Symbol: 2 (confidence: 0.9900000095367432
        Symbol: 0 (confidence: 0.9900000095367432
Word text: - (confidence: 0.9900000095367432
        Symbol: - (confidence: 0.9900000095367432
Word text: 10 (confidence: 0.9900000095367432
        Symbol: 1 (confidence: 0.9900000095367432
        Symbol: 0 (confidence: 0.9900000095367432
Word text: - (confidence: 0.9900000095367432
        Symbol: - (confidence: 0.9900000095367432
Word text: 10 (confidence: 0.9900000095367432
        Symbol: 1 (confidence: 0.9900000095367432
        Symbol: 0 (confidence: 0.9900000095367432
block confidence: 0.8899999856948853
paragraph confidence: 0.8899999856948853
Word text: | (confidence: 0.4300000071525574
        Symbol: | (confidence: 0.4300000071525574
Word text: 20 (confidence: 0.8399999737739563
        Symbol: 2 (confidence: 0.699999988079071
        Symbol: 0 (confidence: 0.9800000190734863
Word text: - (confidence: 0.949999988079071
        Symbol: - (confidence: 0.949999988079071
Word text: 20 (confidence: 0.9800000190734863
        Symbol: 2 (confidence: 0.9900000095367432
        Symbol: 0 (confidence: 0.9800000190734863
Word text: - (confidence: 1.0
        Symbol: - (confidence: 1.0
Word text: 10 (confidence: 0.9900000095367432
        Symbol: 1 (confidence: 0.9900000095367432
        Symbol: 0 (confidence: 1.0
block confidence: 0.9300000071525574
paragraph confidence: 0.9300000071525574
Word text: Machel (confidence: 0.9300000071525574
        Symbol: M (confidence: 0.9200000166893005
        Symbol: a (confidence: 0.949999988079071
        Symbol: c (confidence: 0.9200000166893005
        Symbol: h (confidence: 0.9599999785423279
        Symbol: e (confidence: 0.949999988079071
        Symbol: l (confidence: 0.9200000166893005
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: nil (confidence: 0.9900000095367432
        Symbol: n (confidence: 0.9900000095367432
        Symbol: i (confidence: 1.0
        Symbol: l (confidence: 0.9900000095367432
block confidence: 0.7599999904632568
paragraph confidence: 0.7599999904632568
Word text: D901 (confidence: 0.7200000286102295
        Symbol: D (confidence: 0.6499999761581421
        Symbol: 9 (confidence: 0.8700000047683716
        Symbol: 0 (confidence: 0.49000000953674316
        Symbol: 1 (confidence: 0.8999999761581421
Word text: . (confidence: 0.6399999856948853
        Symbol: . (confidence: 0.6399999856948853
Word text: 27 (confidence: 0.8600000143051147
        Symbol: 2 (confidence: 0.9399999976158142
        Symbol: 7 (confidence: 0.7799999713897705
Word text: ' (confidence: 0.9300000071525574
        Symbol: ' (confidence: 0.9300000071525574
Word text: RIOR (confidence: 0.7400000095367432
        Symbol: R (confidence: 0.9599999785423279
        Symbol: I (confidence: 0.7799999713897705
        Symbol: O (confidence: 0.8399999737739563
        Symbol: R (confidence: 0.4000000059604645

数据集中存在代码的最后一列是手写的,所以任何人都可以帮助我在存在手写和自动文本时如何识别文本?

如果不是谷歌云视觉,还有什么方法可以根据 opencv 中的每一行查找和打印数据?

标签: pythonopencvimage-processinggoogle-cloud-platformdeep-learning

解决方案


您可以尝试通过删除水平和垂直线来预处理图像,这可能会导致更好的输出

方法#1:水平/垂直内核和轮廓中的“填充”以删除线条

在此处输入图像描述

import cv2

image = cv2.imread('1.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

# Remove horizontal lines
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (30,1))
detect_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=1)
cnts = cv2.findContours(detect_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    cv2.drawContours(thresh, [c], -1, (0,0,0), 1)

# Remove vertical lines
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,30))
detect_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=1)
cnts = cv2.findContours(detect_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    cv2.drawContours(thresh, [c], -1, (0,0,0), 1)

# Remove small noise
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    area = cv2.contourArea(c)
    if area < 10:
        cv2.drawContours(thresh, [c], -1, (0,0,0), -1)

# Bitwise mask with input image
result = cv2.bitwise_and(image, image, mask=thresh)
result[thresh==0] = (255,255,255)

cv2.imshow('result', result)
cv2.imwrite('result.png', result)
cv2.waitKey()

方法 #2:具有掩码和按位运算的水平/垂直内核

在此处输入图像描述

import cv2

image = cv2.imread('1.png')

kernel_vertical = cv2.getStructuringElement(cv2.MORPH_RECT, (1,50))
remove_vertical = 255 - cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel_vertical)

horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50,1))
remove_horizontal = 255 - cv2.morphologyEx(image, cv2.MORPH_CLOSE, horizontal_kernel)

remove_both = cv2.add(remove_vertical, remove_horizontal)
result = cv2.add(remove_both, image)

cv2.imshow('result', result)
cv2.waitKey()

使用 Pytesseract OCR 的结果

Worksheet
Car number - 1002110021
Data - Endter
Shell
Qilchange
Phone 982838238823
email xvz@xyz com
id name pod pos Technician desc code

name
112121223 Benz 10-10-19 10-10-20 Jhondoe nil

ve O \

12244334 Baleno 20-10-10 20-20-10 Machel nil \ \

它似乎无法捕获手写文本,您可能需要使用机器/深度学习来训练自己的分类器,但它对于自动文本非常有效。我们使用--psm 6配置选项来假设一个统一的文本块。在此处查看更多配置选项

import cv2
import pytesseract

pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

image = cv2.imread('result.png')
data = pytesseract.image_to_string(image, lang='eng',config='--psm 6')
print(data)

推荐阅读