python - 将基于类类型的边界框坐标和图像保存到不同的文件夹中
问题描述
我正在使用 OpenCV + Python 应用深度学习模型并将对象分类为 8 类(动物类型),即猫、狗、马、鹿、熊、蜥蜴、猴子,未检测到对象(当未检测到对象时)图片)。
我有一个文件夹,里面有各种动物的图像。我读取了一个文件夹中的所有图像,然后应用深度学习模型来提取每个图像中每个对象的边界框坐标。
我想首先通过将每种类型的动物图像放在相关文件夹中来对每个图像进行分类。其次,将该图像的边界框坐标保存在同一文件夹中。例如,如果网络检测到猫,我想将该图像和相应的坐标(作为文本文件 .text)保存在 cat 文件夹中,如果它在图像中没有找到任何这些对象,则将其放入未检测到的对象中文件夹。
我的问题是如何将原始图像和该对象的边界框坐标保存在 8 个类别文件夹中?
这是我的代码:
import cv2
import numpy as np
import os
import glob
import argparse
import time
img_dir="/path/imgt/"
data_path=os.path.join(img_dir,'*g')
files=glob.glob(data_path)
data=[]
i = 0
for f1 in files:
image=cv2.imread(f1)
data.append(image)
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
help="path to input image")
ap.add_argument("-y", "--yolo", required=True,
help="base path to YOLO directory")
ap.add_argument("-c", "--confidence", type=float, default=0.5,
help="minimum probability to filter weak detections")
ap.add_argument("-t", "--threshold", type=float, default=0.3,
help="threshold when applyong non-maxima suppression")
args = vars(ap.parse_args())
# load the COCO class labels our YOLO model was trained on
labelsPath = os.path.sep.join([args["yolo"], "obj.names"])
LABELS = open(labelsPath).read().strip().split("\n")
# initialize a list of colors to represent each possible class label
np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3),
dtype="uint8")
# derive the paths to the YOLO weights and model configuration
weightsPath = os.path.sep.join([args["yolo"], "yolo-obj_last.weights"])
configPath = os.path.sep.join([args["yolo"], "yolo-obj.cfg"])
# load our YOLO object detector trained on COCO dataset (80 classes)
print("[INFO] loading YOLO from disk...")
net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)
# load our input image and grab its spatial dimensions
# image = cv2.imread(args["image"])
(H, W) = image.shape[:2]
# determine only the *output* layer names that we need from YOLO
ln = net.getLayerNames()
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
# construct a blob from the input image and then perform a forward
# pass of the YOLO object detector, giving us our bounding boxes and
# associated probabilities
blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416),
swapRB=True, crop=False)
net.setInput(blob)
start = time.time()
layerOutputs = net.forward(ln)
end = time.time()
# show timing information on YOLO
print("[INFO] YOLO took {:.6f} seconds".format(end - start))
# initialize our lists of detected bounding boxes, confidences, and
# class IDs, respectively
boxes = []
confidences = []
classIDs = []
# loop over each of the layer outputs
for output in layerOutputs:
# loop over each of the detections
for detection in output:
# extract the class ID and confidence (i.e., probability) of
# the current object detection
scores = detection[5:]
classID = np.argmax(scores)
confidence = scores[classID]
# filter out weak predictions by ensuring the detected
# probability is greater than the minimum probability
if confidence > args["confidence"]:
# scale the bounding box coordinates back relative to the
# size of the image, keeping in mind that YOLO actually
# returns the center (x, y)-coordinates of the bounding
# box followed by the boxes' width and height
box = detection[0:4] * np.array([W, H, W, H])
(centerX, centerY, width, height) = box.astype("int")
# use the center (x, y)-coordinates to derive the top and
# and left corner of the bounding box
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
# update our list of bounding box coordinates, confidences,
# and class IDs
boxes.append([x, y, int(width), int(height)])
confidences.append(float(confidence))
classIDs.append(classID)
# apply non-maxima suppression to suppress weak, overlapping bounding
# boxes
idxs = cv2.dnn.NMSBoxes(boxes, confidences, args["confidence"],
args["threshold"])
# ensure at least one detection exists
if len(idxs) > 0:
# loop over the indexes we are keeping
for i in idxs.flatten():
# extract the bounding box coordinates
(x, y) = (boxes[i][0], boxes[i][1])
(w, h) = (boxes[i][2], boxes[i][3])
# draw a bounding box rectangle and label on the image
color = [int(c) for c in COLORS[classIDs[i]]]
cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
text = "{}: {:.4f}".format(LABELS[classIDs[i]], confidences[i])
cv2.putText(image, text, (x, y - 7), cv2.FONT_HERSHEY_SIMPLEX,0.6, color, 2)
path = '/path/imgr/' + LABELS[classIDs[i]] + '/'
cv2.imwrite(os.path.join(path, 'image' + str(i) + '.jpg'), image)
with open(os.path.join(path, 'image' + str(i) + '.txt'), 'a+') as f:
f.write(str(classIDs[i]) + ' ' + str(x) + ' ' + str(y) + ' ' + str(w) + ' ' + str(h))
文本文件的外观如何?
.txt
-每个.jpg
-image-file 的文件 - 在同一目录中,具有相同的名称,但带有- 扩展名.txt
,并放入文件:此图像上的对象编号和对象坐标,对于新行中的每个对象:<object-class> <x> <y> <width> <height>
其中:
<object-class>
- 整数对象从0
到(classes-1)
<x> <y> <width> <height>
- 浮动值相对于图像的宽度和高度,它可以是相等的(0.0 to 1.0]
,例如:注意<x> = <absolute_x> / <image_width> or <height> = <absolute_height> / <image_height>
:<x> <y>
- 是矩形的中心(不是左上角) 例如,img1.jpg
您将创建img1.txt
包含:
1 0.716797 0.395833 0.216406 0.147222
0 0.687109 0.379167 0.255469 0.158333
1 0.420312 0.395833 0.140625 0.166667
解决方案
也许是这样的:
path = os.path.join('/path/imgr/', LABELS[classID], image_name)
cv2.imwrite(path + '.jpg', image)
with open(path + '.txt'), 'a+') as f:
f.write(str(classID) + ' ' + str(detection[0]) + ' ' + str(detection[1]) + ' ' + str(detection[2]) + ' ' + str(detection[3]) + '\n')
您可能在图像中有多个对象,在这种情况下,它应该写入每个相关文件夹并附加到文本文件(如果存在)。
image_name
将是您生成的内容,您可以使用正在阅读的名称或计数器。
这个片段应该放在 if 语句下的某个地方:
if confidence > args["confidence"]:
我会把它放在最后。您可能需要进行细微的调整,但这就是要点。
更明确地说:
import cv2
import numpy as np
import os
import glob
import argparse
import time
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
#ap.add_argument("-i", "--image", required=True,
# help="path to input image")
ap.add_argument("-y", "--yolo", required=True,
help="base path to YOLO directory")
ap.add_argument("-c", "--confidence", type=float, default=0.5,
help="minimum probability to filter weak detections")
ap.add_argument("-t", "--threshold", type=float, default=0.3,
help="threshold when applyong non-maxima suppression")
args = vars(ap.parse_args())
# load the COCO class labels our YOLO model was trained on
labelsPath = os.path.sep.join([args["yolo"], "obj.names"])
LABELS = open(labelsPath).read().strip().split("\n")
# derive the paths to the YOLO weights and model configuration
weightsPath = os.path.sep.join([args["yolo"], "yolo-obj_last.weights"])
configPath = os.path.sep.join([args["yolo"], "yolo-obj.cfg"])
# load our YOLO object detector trained on COCO dataset (80 classes)
print("[INFO] loading YOLO from disk...")
net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)
# determine only the *output* layer names that we need from YOLO
ln = net.getLayerNames()
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
img_dir="/path/imgt/"
data_path=os.path.join(img_dir,'*g')
files=glob.glob(data_path)
for f1 in files:
# load our input image and grab its spatial dimensions
image=cv2.imread(f1)
# construct a blob from the input image and then perform a forward
# pass of the YOLO object detector, giving us our bounding boxes and
# associated probabilities
blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416),
swapRB=True, crop=False)
net.setInput(blob)
layerOutputs = net.forward(ln)
# loop over each of the layer outputs
for output in layerOutputs:
# loop over each of the detections
for detection in output:
# extract the class ID and confidence (i.e., probability) of
# the current object detection
scores = detection[5:]
classID = np.argmax(scores)
confidence = scores[classID]
box = detection[0:4]
# get upper left corner
box[0] = box[0] - box[2]/2
box[1] = box[1] - box[3]/2
# filter out weak predictions by ensuring the detected
# probability is greater than the minimum probability
if confidence > args["confidence"]:
# write output files
class_dir = os.path.join('/path/imgr/', LABELS[classID])
if not os.path.exists(class_dir):
os.makedirs(class_dir)
path = os.path.join(class_dir, f1.split('/')[-1][:-4])
cv2.imwrite(path + '.jpg', image)
with open(path + '.txt'), 'a+') as f:
f.write(str(classID) + ' ' + str(box[0]) + ' ' + str(box[1]) + ' ' + str(box[2]) + ' ' + str(box[3]) + '\n')
通读它并确保您了解 for 循环中的每个部分在做什么。一旦您对这个最小示例感到满意,您可以添加回非最大抑制并根据需要绘制边界框。
推荐阅读
- javascript - jQuery 脚本:我填充了一个数组,但无法从外部访问它
- haskell - Haskell 错误:无法将类型“a1”与“Int”匹配
- python - 我的模型是否由于“错误的损失函数设计”或“训练循环错误”而无法学习?
- c# - 如何为json设置类?
- javascript - 将数据保存到数据库并发送电子邮件的 php 代码适用于 chrome 但不适用于 safari
- android - 必须在 Android Studio 中更改颜色的按钮问题
- python - 向 python 请求提供 Postman 的环境配置数据
- gerrit - zuul 事件触发时克隆 gerrit 存储库?如何停止?
- java - 需要确定程序中的正数、负数和零数,并将所有正数和负数分别相加
- tensorflow-lite - tensorflow-lite - 使用 tflite 解释器在输出中获取图像