python - 用于半导体晶圆 ID 检测的 Tesseract OCR (Python)
问题描述
我正在尝试通过在 Python 中使用 Tesseract OCR 来准备半导体晶圆 ID,但它不是很成功,也-c tessedit_char_whitelist=0123456789XL config
不起作用。读出芯片 ID 为:po4>1
。
我的 OG 图像作为 处理前的图像
我的部分代码如下:
# identify
optCode = pytesseract.image_to_string("c:/opencv/ID_fine_out22.jpg",lang="eng", config=' --psm 6 -c tessedit_char_whitelist=0123456789XL')
# print chip iD
print("ChipID:", optCode)
任何改进 OCR 的想法?也尝试只读取数字。
我也认为 ML 作为一种方法,因为我有大量的样本图像。
解决方案
对于我自己,我用图书馆的pytesseract
一些技术写了一些肮脏的脚本。opencv
您可以在此处选择不同的参数并查看结果。例如,我有名称的图像softserve.png
:
假设您有ocr.py
以下代码:
# import the necessary packages
import argparse
import cv2
import numpy as np
import os
from PIL import Image
import pytesseract
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
help="path to input image to be OCR'd")
ap.add_argument("-b", "--blur", type=str, default=None,
help="type of preprocessing to be done")
ap.add_argument("-t", "--thresh", type=str, default=None,
help="type of preprocessing to be done")
ap.add_argument("-r", "--resize", type=float, default=1.0,
help="type of preprocessing to be done")
ap.add_argument("-m", "--morph", type=str, default=None,
help="type of preprocessing to be done")
args = vars(ap.parse_args())
# load the example image and convert it to grayscale
image = cv2.imread(args["image"])
# Resize to 2x
if args["resize"] != 1:
image = cv2.resize(image, None,
fx=args["resize"], fy=args["resize"],
interpolation=cv2.INTER_CUBIC)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
kernel = np.ones((5, 5), np.uint8)
# make a check to see if median blurring should be done to remove
# noise
if args["blur"] == "medianblur":
gray = cv2.medianBlur(gray, 3)
if args["blur"] == "avgblur":
gray = cv2.blur(gray, (5, 5))
if args["blur"] == "gaussblur":
gray = cv2.GaussianBlur(gray, (5, 5), 0)
if args["blur"] == "medianblur":
gray = cv2.medianBlur(gray, 3)
if args["blur"] == "filter":
gray = cv2.bilateralFilter(gray, 9, 75, 75)
if args["blur"] == "filter2d":
smoothed = cv2.filter2D(gray, -1, kernel)
# check to see if we should apply thresholding to preprocess the
# image
if args["thresh"] == "thresh":
gray = cv2.threshold(gray, 0, 255,
cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
if args["thresh"] == "thresh1":
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY)[1]
if args["thresh"] == "thresh2":
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV)[1]
if args["thresh"] == "thresh3":
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_TRUNC)[1]
if args["thresh"] == "thresh4":
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_TOZERO)[1]
if args["thresh"] == "thresh5":
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_TOZERO_INV)[1]
if args["thresh"] == "thresh6":
gray = cv2.adaptiveThreshold(gray, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 115, 1)
if args["thresh"] == "thresh7":
gray = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY, 115, 1)
if args["thresh"] == "thresh8":
gray = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2)
if args["thresh"] == "thresh9":
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
if args["thresh"] == "thresh10":
# gray = cv2.GaussianBlur(gray, (5, 5), 0)
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
if args["morph"] == "erosion":
gray = cv2.erode(gray, kernel, iterations=1)
if args["morph"] == "dilation":
gray = cv2.dilate(gray, kernel, iterations=1)
if args["morph"] == "opening":
gray = cv2.morphologyEx(gray, cv2.MORPH_OPEN, kernel)
if args["morph"] == "closing":
gray = cv2.morphologyEx(gray, cv2.MORPH_CLOSE, kernel)
# write the grayscale image to disk as a temporary file so we can
# apply OCR to it
filename = "{}.png".format(os.getpid())
cv2.imwrite(filename, gray)
# load the image as a PIL/Pillow image, apply OCR, and then delete
# the temporary file
text = pytesseract.image_to_string(Image.open(filename))
os.remove(filename)
print(text)
with open("output.py", "w") as text_file:
text_file.write(text)
# show the output images
cv2.imshow("Image", image)
cv2.imshow("Output", gray)
cv2.waitKey(0)
如果我只是简单地使用通常的 OCR 而不使用任何东西(例如pytesseract.image_tostring()
):
python3 ocr.py --image softserve.png
我会得到这个文本:
uray ['Amir', 'Barry', 'Chales', ‘Dao']
‘amir’ rss
tee)
print(2)
这是一个非常糟糕的结果,不是吗?
但是玩过之后resize
,thresh
你可以得到一个更漂亮的输出:
python3 ocr.py --image softserve.png --thresh thresh6 --resize 2.675
并在两个打开的窗口中查看 OCR 之前的图像外观:
输出:
names1 = ['Amir', ‘Barry’, ‘Chales', ‘Dao']
if ‘amir' in names1:
@ print(1)
else: «=
@ print(2)
您也可以申请morph
和blur
。您可以从docs阅读更多关于模糊、阈值和形态变换的信息。我希望,您会发现这些信息对您的工作有用opencv
推荐阅读
- apache-spark - 如何从数据框转换为 RDD 并使用案例类返回
- python - 提取列表中具有特定元素的行
- sql-server - 获取动态数据透视结果到临时表 SQL Server
- git - Git 存储库中的 Git 存储库,用于客户端使用分发文件
- java - 如何改进 Java 泛型通配符签名
- r - 如何在 dplyr 中按固定数量的行分组?
- python - 如何修复 portaudio 错误,PortAudio 未初始化?
- reactjs - 网络::ERR_CERT_COMMON_NAME_INVALID,Axios
- swift - 如何使用 UserDefault 添加两个值
- python - 一些子图的属性不能循环工作