首页 > 解决方案 > 如何在每行上绘制单个边界框,裁剪边界框并将图像保存在文件夹 opencv python

问题描述

我正在使用这个 repo 到https://github.com/mindee/doctr进行 OCR 我得到了对下图的推断。

输入图像

以下预测图像如下所示 预测图像

import json

from doctr.io import DocumentFile

from doctr.models import ocr_predictor



model = ocr_predictor(pretrained=True)

# PDF

doc = DocumentFile.from_images("/content/passbook_82_0.jpeg")

# Analyze

result = model(doc)

# Export results in json

with open("/content/preds.json", "w") as f:

    json.dump(result.export(), f)

以下 JSON 文件保存了坐标和置信度值。每个单词都有一个几何,以相对坐标格式给出 [[xmin, ymin], [xmax, ymax]]。

{"pages": [{"page_idx": 0, "dimensions": [294, 632], "orientation": {"value": null, "confidence": null}, "language": {"value": null, "confidence": null}, "blocks": [{"geometry": [[0.0341796875, 0.169921875], [0.9775390625, 0.8896484375]], "lines": [{"geometry": [[0.0341796875, 0.169921875], [0.9775390625, 0.8896484375]], "words": [{"value": "anrdla", "confidence": 0.2445085644721985, "geometry": [[0.11328125, 0.1708984375], [0.1923828125, 0.212890625]]}, {"value": "veedas", "confidence": 0.16714346408843994, "geometry": [[0.1845703125, 0.169921875], [0.2744140625, 0.2158203125]]}, {"value": "State", "confidence": 0.9990315437316895, "geometry": [[0.642578125, 0.1748046875], [0.6982421875, 0.2236328125]]}, {"value": "Bank", "confidence": 0.999847412109375, "geometry": [[0.6982421875, 0.1767578125], [0.7529296875, 0.2236328125]]}, {"value": "of", "confidence": 0.9996934533119202, "geometry": [[0.7548828125, 0.1796875], [0.783203125, 0.22265625]]}, {"value": "India", "confidence": 0.9772151708602905, "geometry": [[0.78125, 0.177734375], [0.8388671875, 0.2236328125]]}, {"value": "REGULAR", "confidence": 0.8577464818954468, "geometry": [[0.0380859375, 0.2744140625], [0.1318359375, 0.310546875]]}, {"value": "SAVINGS", "confidence": 0.994958758354187, "geometry": [[0.140625, 0.27734375], [0.2314453125, 0.310546875]]}, {"value": "BANK", "confidence": 0.9993945956230164, "geometry": [[0.240234375, 0.27734375], [0.296875, 0.3134765625]]}, {"value": "ACCOUNT", "confidence": 0.9985705018043518, "geometry": [[0.3056640625, 0.279296875], [0.39453125, 0.3125]]}, {"value": "DARANGAMELA", "confidence": 0.4595634639263153, "geometry": [[0.73046875, 0.283203125], [0.8720703125, 0.314453125]]}, {"value": "No", "confidence": 0.9307771921157837, "geometry": [[0.0869140625, 0.322265625], [0.1201171875, 0.3564453125]]}, {"value": "CIF", "confidence": 0.9934298992156982, "geometry": [[0.03515625, 0.3212890625], [0.083984375, 0.3583984375]]}, {"value": "88537466750", "confidence": 0.9332917332649231, "geometry": [[0.3037109375, 0.3232421875], [0.4443359375, 0.361328125]]}, {"value": "DARANGAMELA", "confidence": 0.5046369433403015, "geometry": [[0.7294921875, 0.328125], [0.87109375, 0.359375]]}, {"value": "-", "confidence": 0.9411769509315491, "geometry": [[0.1826171875, 0.361328125], [0.1923828125, 0.3740234375]]}, {"value": "Account", "confidence": 0.9907050728797913, "geometry": [[0.0390625, 0.3564453125], [0.130859375, 0.3896484375]]}, {"value": "No", "confidence": 0.9819487929344177, "geometry": [[0.138671875, 0.3544921875], [0.1796875, 0.390625]]}, {"value": "20306950151", "confidence": 0.23563283681869507, "geometry": [[0.3076171875, 0.35546875], [0.4404296875, 0.3876953125]]}, {"value": "Customer", "confidence": 0.9558802843093872, "geometry": [[0.0380859375, 0.38671875], [0.1435546875, 0.4189453125]]}, {"value": "Name:", "confidence": 0.9982724785804749, "geometry": [[0.15234375, 0.3876953125], [0.2177734375, 0.421875]]}, {"value": "RASHO", "confidence": 0.6698806285858154, "geometry": [[0.2783203125, 0.388671875], [0.3466796875, 0.4228515625]]}, {"value": "Ms.", "confidence": 0.9627761840820312, "geometry": [[0.2265625, 0.388671875], [0.2685546875, 0.423828125]]}, {"value": "MANDAL", "confidence": 0.998543381690979, "geometry": [[0.353515625, 0.3916015625], [0.4326171875, 0.4248046875]]}, {"value": "S/D/W/H/O:BISWAMITRA", "confidence": 0.8553118705749512, "geometry": [[0.04296875, 0.4755859375], [0.296875, 0.5126953125]]}, {"value": "MANDAL", "confidence": 0.9796141386032104, "geometry": [[0.3056640625, 0.4794921875], [0.3828125, 0.51171875]]}, {"value": "Address:VILL-", "confidence": 0.7488418221473694, "geometry": [[0.041015625, 0.517578125], [0.2041015625, 0.5517578125]]}, {"value": "-", "confidence": 0.9209287166595459, "geometry": [[0.21484375, 0.5244140625], [0.236328125, 0.5498046875]]}, {"value": "CHUCHUNGJULI", "confidence": 0.5298648476600647, "geometry": [[0.2841796875, 0.5244140625], [0.4267578125, 0.552734375]]}, {"value": "NO", "confidence": 0.9970822334289551, "geometry": [[0.2392578125, 0.51953125], [0.2724609375, 0.5546875]]}, {"value": "Phone:2:284238", "confidence": 0.8328949809074402, "geometry": [[0.7275390625, 0.5283203125], [0.8818359375, 0.5634765625]]}, {"value": "SIDDHINATHPUR", "confidence": 0.5255517959594727, "geometry": [[0.208984375, 0.5703125], [0.3681640625, 0.5986328125]]}, {"value": "P.0-", "confidence": 0.7063982486724854, "geometry": [[0.138671875, 0.5654296875], [0.19921875, 0.6044921875]]}, {"value": "TAMULPUR", "confidence": 0.992978572845459, "geometry": [[0.44140625, 0.5703125], [0.54296875, 0.6025390625]]}, {"value": "P.S-", "confidence": 0.9322847127914429, "geometry": [[0.3779296875, 0.568359375], [0.4345703125, 0.6044921875]]}, {"value": "Email:sbi.091470sb", "confidence": 0.41425037384033203, "geometry": [[0.7275390625, 0.5732421875], [0.9580078125, 0.611328125]]}, {"value": "DIST-", "confidence": 0.9592916369438171, "geometry": [[0.138671875, 0.61328125], [0.2109375, 0.650390625]]}, {"value": "BAKSA", "confidence": 0.9974198341369629, "geometry": [[0.2158203125, 0.61328125], [0.28515625, 0.650390625]]}, {"value": "ASSAM", "confidence": 0.9992387890815735, "geometry": [[0.2900390625, 0.61328125], [0.3603515625, 0.650390625]]}, {"value": "781360", "confidence": 0.9988314509391785, "geometry": [[0.3671875, 0.6123046875], [0.4453125, 0.6494140625]]}, {"value": "Branch", "confidence": 0.9992311596870422, "geometry": [[0.7255859375, 0.6201171875], [0.806640625, 0.6572265625]]}, {"value": "Code:91471", "confidence": 0.6900002956390381, "geometry": [[0.814453125, 0.6220703125], [0.939453125, 0.6611328125]]}, {"value": "Phone:", "confidence": 0.9992220997810364, "geometry": [[0.0390625, 0.65625], [0.1171875, 0.6943359375]]}, {"value": "Date", "confidence": 0.9955922961235046, "geometry": [[0.7236328125, 0.6630859375], [0.7822265625, 0.7021484375]]}, {"value": "of", "confidence": 0.9997377991676331, "geometry": [[0.787109375, 0.6630859375], [0.8212890625, 0.703125]]}, {"value": "Issue:30/06", "confidence": 0.8237747550010681, "geometry": [[0.8310546875, 0.66796875], [0.9716796875, 0.70703125]]}, {"value": "Email:", "confidence": 0.9961839914321899, "geometry": [[0.0380859375, 0.701171875], [0.115234375, 0.7392578125]]}, {"value": "40/06/2020", "confidence": 0.2974379360675812, "geometry": [[0.70703125, 0.6982421875], [0.8583984375, 0.7548828125]]}, {"value": "5529182", "confidence": 0.9492482542991638, "geometry": [[0.8798828125, 0.712890625], [0.9775390625, 0.75390625]]}, {"value": "D.0.B.", "confidence": 0.8802379369735718, "geometry": [[0.0380859375, 0.75], [0.1171875, 0.787109375]]}, {"value": "Minor):", "confidence": 0.9770660996437073, "geometry": [[0.177734375, 0.7451171875], [0.2705078125, 0.78515625]]}, {"value": "(If", "confidence": 0.4771254062652588, "geometry": [[0.1279296875, 0.7431640625], [0.173828125, 0.7880859375]]}, {"value": "IFSC:SBINOOO009147", "confidence": 0.5092097520828247, "geometry": [[0.7314453125, 0.7568359375], [0.93359375, 0.7919921875]]}, {"value": "MOP.:", "confidence": 0.9868963360786438, "geometry": [[0.0341796875, 0.7919921875], [0.103515625, 0.830078125]]}, {"value": "RIMLON1781002512", "confidence": 0.2585943341255188, "geometry": [[0.705078125, 0.798828125], [0.9072265625, 0.83984375]]}, {"value": "No.:0000000203836166", "confidence": 0.8759520649909973, "geometry": [[0.1669921875, 0.8408203125], [0.4189453125, 0.87890625]]}, {"value": "Nom.", "confidence": 0.9955904483795166, "geometry": [[0.0361328125, 0.84765625], [0.091796875, 0.8837890625]]}, {"value": "Reg.", "confidence": 0.9835382699966431, "geometry": [[0.0986328125, 0.8447265625], [0.1572265625, 0.8896484375]]}, {"value": "BranchoMana", "confidence": 0.3897829055786133, "geometry": [[0.6943359375, 0.8486328125], [0.7919921875, 0.8857421875]]}, {"value": "DATION", "confidence": 0.5874566435813904, "geometry": [[0.796875, 0.8515625], [0.8818359375, 0.888671875]]}]}], "artefacts": []}]}]}

我想在每一行上绘制一个边界框,并使用 OpenCV 将裁剪线图像保存在文件夹中。我怎样才能做到这一点?如下所示。

我想要的图片

标签: pythonjsonopencvimage-processingocr

解决方案


您可以抓取一个随机边界框,例如选择右上角并找到所有边界框,其右上角在设定的像素数内。从这组边界框中获取最左上角像素到最右下角像素的坐标。

从原始边界框集中删除子集并重复直到集为空。

我确信有更聪明的解决方案,但如果所有文档都像这样格式化,这应该可以工作。

祝你好运!

伪代码:

line_boxes = []
while items left in bounding_boxes:
    box = bounding_boxes.pop()
    line = []
    for other_box in bounding_boxes:
        if (other_box.upper_right_corner - box.upp_right_corner) < tolerance:
            line.append(other_box)
    
    line_boxes.append(line)

final_lines = []
for line in line_boxes:
    upper_right = get_upper_right_coordinate(line)
    lower_left = get_lower_left_coordinate(line)
    cropped_line = crop_box_from_original_image(upper_right, lower_left)
    final_lines.append(cropped_line)

推荐阅读