首页 > 解决方案 > 如何使用 realsense(L515 相机)找到物体的尺寸

问题描述

我有一个 realsense l 515 相机。我想找到里面一个物体的大小。就我而言,我正在运行暗网来检测虚拟对象。现在一旦检测到对象,我想使用深度框架和颜色框架来计算对象的长度和宽度(大致)。例如,一个苹果。边界框围绕苹果绘制。现在,我如何使用这个边界框数据以及颜色框和深度框来找到苹果的尺寸?由于边界框大致是苹果的大小,我想转换边界框的像素坐标来近似计算现实生活中苹果的尺寸。我在网上阅读了有关使用点云的信息,但我对此并不陌生,所以我很不清楚如何进行。

import darknet
import cv2
import numpy as np
import pyrealsense2 as rs

"""##############. Function definitions. ##################"""

#Define the detection function
def image_detection(image, network, class_names, class_colors, thresh):
    # Darknet doesn't accept numpy images.
    # Create one with image we reuse for each detect
    width = darknet.network_width(network)
    height = darknet.network_height(network)
    darknet_image = darknet.make_image(width, height, 3)

    #image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_resized = cv2.resize(image_rgb, (width, height),interpolation=cv2.INTER_LINEAR)

    darknet.copy_image_from_bytes(darknet_image, image_resized.tobytes())
    detections = darknet.detect_image(network, class_names, darknet_image, thresh=thresh)
    darknet.free_image(darknet_image)
    image = darknet.draw_boxes(detections, image_resized, class_colors)
    return cv2.cvtColor(image, cv2.COLOR_BGR2RGB), detections


# Initialize and declare the neural network along with data files, config files etc 
quantity_apples = []
config_file = "/home/jetson/Desktop/pano_l515/yolov4.cfg"
data_file = "/home/jetson/Desktop/pano_l515/coco.data"
weights = "/home/jetson/Desktop/pano_l515/yolov4.weights"

network, class_names, class_colors = darknet.load_network(
        config_file,
        data_file,
        weights,
        batch_size=1
    )

## Realsense from align-depth2color.py

# Create a pipeline
pipeline = rs.pipeline()

# Create a config and configure the pipeline to stream
#  different resolutions of color and depth streams
config = rs.config()

# Get device product line for setting a supporting resolution
pipeline_wrapper = rs.pipeline_wrapper(pipeline)
pipeline_profile = config.resolve(pipeline_wrapper)
device = pipeline_profile.get_device()
device_product_line = str(device.get_info(rs.camera_info.product_line))

config.enable_stream(rs.stream.depth, 1024, 768, rs.format.z16, 30)

if device_product_line == 'L500':
    print(device_product_line)
    config.enable_stream(rs.stream.color, 1280, 720, rs.format.bgr8, 30)
else:
    config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)

# Start streaming
profile = pipeline.start(config)

# Getting the depth sensor's depth scale (see rs-align example for explanation)
depth_sensor = profile.get_device().first_depth_sensor()
depth_scale = depth_sensor.get_depth_scale()
print("Depth Scale is: " , depth_scale)

# We will be removing the background of objects more than
#  clipping_distance_in_meters meters away
clipping_distance_in_meters = 1 #1 meter
clipping_distance = clipping_distance_in_meters / depth_scale

# Create an align object
# rs.align allows us to perform alignment of depth frames to others frames
# The "align_to" is the stream type to which we plan to align depth frames.
align_to = rs.stream.color
align = rs.align(align_to)


# Streaming loop
try:
    for i in range(0,2):
        # Get frameset of color and depth
        frames = pipeline.wait_for_frames()
        # frames.get_depth_frame() is a 640x360 depth image

        # Align the depth frame to color frame
        aligned_frames = align.process(frames)

        # Get aligned frames
        aligned_depth_frame = aligned_frames.get_depth_frame() # aligned_depth_frame is a 640x480 depth image
        color_frame = aligned_frames.get_color_frame()

        # Validate that both frames are valid
        if not aligned_depth_frame or not color_frame:
            continue

        depth_image = np.asanyarray(aligned_depth_frame.get_data())
        color_image = np.asanyarray(color_frame.get_data())

        dn_frame_width = 416
        dn_frame_height = 416

        frame_width = color_image.shape[1]
        frame_height = color_image.shape[0]

        #### Passing the image to darknet
        image, detections = image_detection(color_image, network, class_names, class_colors, thresh=0.05)


        for i in range(len(detections)):
            xc_percent = detections[i][2][0]/dn_frame_width
            yc_percent = detections[i][2][1]/dn_frame_height 
            w_percent = detections[i][2][2]/dn_frame_width
            h_percent = detections[i][2][3]/dn_frame_height
            xc = xc_percent*frame_width
            yc = yc_percent*frame_height
            w = w_percent*frame_width
            h = h_percent*frame_height
            xmin = xc - w/2.0
            ymin = yc - h/2.0
            xmax = xc + w/2.0
            ymax = yc + h/2.0


            #If object is detected, increase the count of the object in the frame
            if detections[i][0] == "apple":
                cv2.rectangle(color_image, (int(xmin),int(ymin)),(int(xmax),int(ymax)),(0,0,255),2)
                cv2.putText(color_image, "apple", (int(xmin), int(ymin-10)), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,0,255), 2)
        
        #cv2.imwrite(output_path, frame)            
        # Render images:
        #   depth align to color on left
        #   depth on right
        depth_colormap = cv2.applyColorMap(cv2.convertScaleAbs(depth_image, alpha=0.03), cv2.COLORMAP_JET)
        images = np.hstack((color_image, depth_colormap))
        cv2.imwrite("test_images.jpg", color_image)
        #cv2.namedWindow('Align Example', cv2.WINDOW_NORMAL)
        #cv2.imshow('Align Example', images)
        key = cv2.waitKey(1)
        # Press esc or 'q' to close the image window
        #if key & 0xFF == ord('q') or key == 27:
        cv2.destroyAllWindows()
            #break
finally:
    pipeline.stop()

这是到目前为止的输出图像。我该如何进行? 输出

标签: python-3.xopencvcomputer-visionpoint-cloudsrealsense

解决方案


推荐阅读