首页 > 解决方案 > 边界框检测已关闭

问题描述

在此处输入图像描述

嗨 - 我正在玩树莓派的张量流对象检测。https://github.com/tensorflow/examples/blob/master/lite/examples/object_detection/raspberry_pi/README.md

我的问题是,Tensorflow 正确检测到对象。但是,错误地为检测到的对象提供坐标。它让我发疯。我想不通。

PFB:

    def main():
  parser = argparse.ArgumentParser(
      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  parser.add_argument(
      '--model', help='File path of .tflite file.', required=True)
  parser.add_argument(
      '--labels', help='File path of labels file.', required=True)
  parser.add_argument(
      '--threshold',
      help='Score threshold for detected objects.',
      required=False,
      type=float,
      default=0.4)
  args = parser.parse_args()
  np.set_printoptions(threshold=sys.maxsize)
  labels = load_labels(args.labels)
  interpreter = Interpreter(args.model)
  interpreter.allocate_tensors()
  _, input_height, input_width, _ = interpreter.get_input_details()[0]['shape']

  with picamera.PiCamera(
      resolution=(CAMERA_WIDTH, CAMERA_HEIGHT), framerate=30) as camera:
    camera.rotation = 180
    camera.start_preview()
    try:
      print("works!!!")  
      stream = io.BytesIO()
      annotator = Annotator(camera)
      count = 0
      for _ in camera.capture_continuous(
          stream, format='jpeg', use_video_port=True):
        stream.seek(0)
        image = Image.open(stream).convert('RGB').resize(
            (input_width, input_height), Image.ANTIALIAS)
        #image = image.rotate(180, PIL.Image.NEAREST, expand = 1)
        start_time = time.monotonic()
        results = detect_objects(interpreter, image, args.threshold)
        elapsed_ms = (time.monotonic() - start_time) * 1000
        
       
        open_cv_image = np.array(image)

        distance = us.get_distance()
        print(distance)

        print("*********************NUM PY DATA **********************")
        print(results)
        #print(type(image))
        filename = "geeks"
        filename += str(count)
        filename += ".png"

        annotator.clear()
        print("Annotating!!")
        print(annotator)
        annotate_objects(annotator,results,labels,open_cv_image, count)
        annotator.text([5, 0], '%.1fms' % (elapsed_ms))
        annotator.update()
        print(annotator)
        count += 1
        stream.seek(0)
        stream.truncate()

    finally:
      camera.stop_preview()


if __name__ == '__main__':
  main()

def annotate_objects(annotator, results, labels, npcv, count):
  """Draws the bounding box and label for each object in the results."""
  window_name = 'Image'
  image = npcv
  filename="image"
  filename+=str(count)
  filename+=".jpg"
  for obj in results:
    # Convert the bounding box figures from relative coordinates
    # to absolute coordinates based on the original resolution
    ymin, xmin, ymax, xmax = obj['bounding_box']
    xmin = int(xmin * CAMERA_WIDTH)
    xmax = int(xmax * CAMERA_WIDTH)
    ymin = int(ymin * CAMERA_HEIGHT)
    ymax = int(ymax * CAMERA_HEIGHT)

    if obj['score'] >= 0.60:
      start_point = (xmin,ymin)

      end_point = (xmax,ymax) 

      color = (255, 0, 0) 
      thickness = 20


      image = cv2.rectangle(image, start_point, end_point, color, thickness)

      font = cv2.FONT_HERSHEY_SIMPLEX

      org = (xmin, ymin)

      fontScale = 0.5

      color = (255, 0, 0) 
   

      text = labels[obj['class_id']]

      thickness = 2

      image = cv2.putText(image, text , org, font,  
                   fontScale, color, thickness, cv2.LINE_AA)

    annotator.bounding_box([xmin, ymin, xmax, ymax])
    annotator.text([xmin, ymin],
                   '%s\n%.2f' % (labels[obj['class_id']], obj['score']))

    print(labels[obj['class_id']], obj['score'])
    print(xmin, ymin, xmax, ymax)
  cv2.imwrite(filename, image)

标签: python-3.xtensorflowopencv

解决方案


我看不出有什么问题,但它似乎与坐标系有关,因为框的右下角超出了框架,或者可能是调整大小。我将从查看这些值开始 - 你能打印出来吗:

  • CAMERA_WIDTH,CAMERA_HEIGHT
  • input_height,input_width
  • ymin, xmin, ymax,xmax
  • start_point#(xmin,ymin)
  • end_point #(xmax,ymax)

然后显示模型输入图像:

  • image= Image.open(stream).convert('RGB').resize((input_width, input_height), Image.ANTIALIAS)

然后显示您用于绘制边界框的图像:

  • open_cv_image

推荐阅读