python - 如何在 python 中使用 realsense、darknet 和 deproject_point_to_pixel() 来计算对象的大小
问题描述
我想用可可数据集运行暗网来重新识别图片中的苹果。我现在想通过合并深度框架来找到苹果的大小(长度和宽度)。我该怎么做?我需要边界框包围的区域的大致尺寸。该代码基本上采用彩色框架,然后在我想要的对象(苹果)周围绘制一个边界框。现在我想在现实生活中找到苹果的大小。如何使用深度框、rgb 框和暗网找到苹果的长宽?我不需要苹果的高度。只是粗略的长度和宽度(在这种情况下,现实生活中边界框的尺寸就足够了)。计算这个最简单和最简单的方法是什么?
import darknet
import cv2
import numpy as np
import pyrealsense2 as rs
"""##############. Function definitions. ##################"""
#Define the detection function
def image_detection(image, network, class_names, class_colors, thresh):
# Darknet doesn't accept numpy images.
# Create one with image we reuse for each detect
width = darknet.network_width(network)
height = darknet.network_height(network)
darknet_image = darknet.make_image(width, height, 3)
#image = cv2.imread(image_path)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image_resized = cv2.resize(image_rgb, (width, height),interpolation=cv2.INTER_LINEAR)
darknet.copy_image_from_bytes(darknet_image, image_resized.tobytes())
detections = darknet.detect_image(network, class_names, darknet_image, thresh=thresh)
darknet.free_image(darknet_image)
image = darknet.draw_boxes(detections, image_resized, class_colors)
return cv2.cvtColor(image, cv2.COLOR_BGR2RGB), detections
# Initialize and declare the neural network along with data files, config files etc
quantity_apples = []
config_file = "/home/jetson/Desktop/pano_l515/yolov4.cfg"
data_file = "/home/jetson/Desktop/pano_l515/coco.data"
weights = "/home/jetson/Desktop/pano_l515/yolov4.weights"
network, class_names, class_colors = darknet.load_network(
config_file,
data_file,
weights,
batch_size=1
)
## Realsense from align-depth2color.py
# Create a pipeline
pipeline = rs.pipeline()
# Create a config and configure the pipeline to stream
# different resolutions of color and depth streams
config = rs.config()
# Get device product line for setting a supporting resolution
pipeline_wrapper = rs.pipeline_wrapper(pipeline)
pipeline_profile = config.resolve(pipeline_wrapper)
device = pipeline_profile.get_device()
device_product_line = str(device.get_info(rs.camera_info.product_line))
config.enable_stream(rs.stream.depth, 1024, 768, rs.format.z16, 30)
if device_product_line == 'L500':
print(device_product_line)
config.enable_stream(rs.stream.color, 1280, 720, rs.format.bgr8, 30)
else:
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
# Start streaming
profile = pipeline.start(config)
# Getting the depth sensor's depth scale (see rs-align example for explanation)
depth_sensor = profile.get_device().first_depth_sensor()
depth_scale = depth_sensor.get_depth_scale()
print("Depth Scale is: " , depth_scale)
# We will be removing the background of objects more than
# clipping_distance_in_meters meters away
clipping_distance_in_meters = 1 #1 meter
clipping_distance = clipping_distance_in_meters / depth_scale
# Create an align object
# rs.align allows us to perform alignment of depth frames to others frames
# The "align_to" is the stream type to which we plan to align depth frames.
align_to = rs.stream.color
align = rs.align(align_to)
# Streaming loop
try:
for i in range(0,2):
# Get frameset of color and depth
frames = pipeline.wait_for_frames()
# frames.get_depth_frame() is a 640x360 depth image
# Align the depth frame to color frame
aligned_frames = align.process(frames)
# Get aligned frames
aligned_depth_frame = aligned_frames.get_depth_frame() # aligned_depth_frame is a 640x480 depth image
color_frame = aligned_frames.get_color_frame()
# Validate that both frames are valid
if not aligned_depth_frame or not color_frame:
continue
depth_image = np.asanyarray(aligned_depth_frame.get_data())
color_image = np.asanyarray(color_frame.get_data())
dn_frame_width = 416
dn_frame_height = 416
frame_width = color_image.shape[1]
frame_height = color_image.shape[0]
#### Passing the image to darknet
image, detections = image_detection(color_image, network, class_names, class_colors, thresh=0.05)
for i in range(len(detections)):
xc_percent = detections[i][2][0]/dn_frame_width
yc_percent = detections[i][2][1]/dn_frame_height
w_percent = detections[i][2][2]/dn_frame_width
h_percent = detections[i][2][3]/dn_frame_height
xc = xc_percent*frame_width
yc = yc_percent*frame_height
w = w_percent*frame_width
h = h_percent*frame_height
xmin = xc - w/2.0
ymin = yc - h/2.0
xmax = xc + w/2.0
ymax = yc + h/2.0
#If object is detected, increase the count of the object in the frame
if detections[i][0] == "apple":
cv2.rectangle(color_image, (int(xmin),int(ymin)),(int(xmax),int(ymax)),(0,0,255),2)
cv2.putText(color_image, "apple", (int(xmin), int(ymin-10)), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,0,255), 2)
#cv2.imwrite(output_path, frame)
# Render images:
# depth align to color on left
# depth on right
depth_colormap = cv2.applyColorMap(cv2.convertScaleAbs(depth_image, alpha=0.03), cv2.COLORMAP_JET)
images = np.hstack((color_image, depth_colormap))
cv2.imwrite("test_images.jpg", color_image)
#cv2.namedWindow('Align Example', cv2.WINDOW_NORMAL)
#cv2.imshow('Align Example', images)
key = cv2.waitKey(1)
# Press esc or 'q' to close the image window
#if key & 0xFF == ord('q') or key == 27:
cv2.destroyAllWindows()
#break
finally:
pipeline.stop()
解决方案
推荐阅读
- mongodb - 带有超测试的单元测试 API,带有链式函数的 stubbing mongoose 模型
- python - 以特定方式替换数组中的 NaN
- html - 如何结合引导网格和 flexbox 垂直对齐?
- python - Aldryn-Django 与 Django 版本不兼容
- github - 如何为 Github 存储库或文件创建数据库范围凭据?
- windows - 无效的 ICC 配置文件使用 WIC 从 WCS 返回 opRGB
- php - PHPUnit @covers 对性能的影响
- java - android studio 应用签名耗时过长
- javascript - UnhandledPromiseRejectionWarning: FetchError: invalid json response body at
- graphics - 如何确定两个矩形是否重叠(角度)