首页 > 解决方案 > 如何过滤SSD检测器中检测到的对象?

问题描述

我只需要检测人并忽略其他物体

如何过滤类

我尝试了更多时间,但不幸的是失败了

我从这里下载了 mscoco_label_map.pbtxt 在此处输入链接描述

freeze_inference_graph.pb 从这里输入链接描述

这是我的代码:

#!/usr/bin/env python

import os
import cv2
import logging
import numpy as np
import tensorflow as tf

from tqdm import tqdm
from abc import ABCMeta, abstractmethod
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
from imutils.video import WebcamVideoStream


class ObjectDetector():
    """
    Base class for object detectors used by the package.
    """
    __metaclass__ = ABCMeta

    def __init__(self):
        # create logger
        self._logger = logging.getLogger('dodo_detector')
        self._logger.setLevel(logging.DEBUG)
        # create file handler which logs even debug messages
        self._fh = logging.FileHandler('/tmp/dodo_detector.log')
        self._fh.setLevel(logging.DEBUG)
        # create console handler with a higher log level
        self._ch = logging.StreamHandler()
        self._ch.setLevel(logging.DEBUG)
        # create formatter and add it to the handlers
        self._formatter = logging.Formatter('[%(asctime)s - %(name)s]: %(levelname)s: %(message)s')
        self._fh.setFormatter(self._formatter)
        self._ch.setFormatter(self._formatter)
        # add the handlers to the logger
        self._logger.addHandler(self._fh)
        self._logger.addHandler(self._ch)

    @abstractmethod
    def from_image(self, frame):
        """
        Detects objects in an image

        :param frame: a numpy.ndarray containing the image where objects will be detected
        :return: a tuple containing the image, with objects marked by rectangles,
                 and a dictionary listing objects and their locations as `(ymin, xmin, ymax, xmax)`
        """
        pass

    def _detect_from_stream(self, get_frame, stream):
        """
        This internal method detects objects from images retrieved from a stream, given a method that extracts frames from this stream

        :param get_frame: a method that extracts frames from the stream
        :param stream: an object representing a stream of images
        """
        ret, frame = get_frame(stream)

        while ret:
            marked_frame, objects = self.from_image(frame)
            print ((objects))

            cv2.imshow("image", marked_frame)
            if cv2.waitKey(1) == 27:
                break  # ESC to quit

            ret, frame = get_frame(stream)

        cv2.destroyAllWindows()

    def from_camera(self, camera_id=0):
        """
        Detects objects in frames from a camera feed

        :param camera_id: the ID of the camera in the system
        """

        def get_frame(stream):
            frame = stream.read()
            ret = True
            return ret, frame

        stream = WebcamVideoStream(src=camera_id)

        stream.start()
        self._detect_from_stream(get_frame, stream)
        stream.stop()

    def from_video(self, filepath):
        """
        Detects objects in frames from a video file

        :param filepath: the path to the video file
        """

        def get_frame(stream):
            ret, frame = stream.read()
            return ret, frame

        stream = cv2.VideoCapture()
        stream.open(filename=filepath)

        self._detect_from_stream(get_frame, stream)




class SingleShotDetector(ObjectDetector):
    """
    Object detector powered by the TensorFlow Object Detection API.

    :param path_to_frozen_graph: path to the frozen inference graph file, a file with a `.pb` extension.
    :param path_to_labels: path to the label map, a text file with the `.pbtxt` extension.
    :param num_classes: number of object classes that will be detected. If None, it will be guessed by the contents of the label map.
    :param confidence: a value between 0 and 1 representing the confidence level the network has in the detection to consider it an actual detection.
    """

    def __init__(self, path_to_frozen_graph, path_to_labels, num_classes=None, confidence=.8):
        super(ObjectDetector, self).__init__()

        if not 0 < confidence <= 1:
            raise ValueError("confidence must be between 0 and 1")

        # load (frozen) tensorflow model into memory
#        path_to_frozen_graph= '/frozen_inference_graph.pb'
        self._detection_graph = tf.Graph()
        with self._detection_graph.as_default():
            od_graph_def = tf.GraphDef()
            with tf.gfile.GFile(path_to_frozen_graph, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')

        # Label maps map indices to category names, so that when our convolution
        # network predicts 5, we know that this corresponds to airplane.
        # Here we use internal utility functions, but anything that returns a
        # dictionary mapping integers to appropriate string labels would be fine
#        path_to_labels= 'mscoco_label_map.pbtxt'
        label_map = label_map_util.load_labelmap(path_to_labels)

        # this is a workaround to guess the number of classes by the contents of the label map
        # it may not be perfect
        if num_classes is None:
            label_map_contents = open(path_to_labels, 'r').read()
            num_classes = label_map_contents.count('name:')

        categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=num_classes, use_display_name=True)
        self._category_index = label_map_util.create_category_index(categories)
        print self._category_index 
        print ('categories', categories)

        self._categories = {}
#        print self._categories 
#
        self._categories_public = []
        for tmp in categories:
            self._categories[int(tmp['id'])] = tmp['name']
            self._categories_public.append(tmp['name'])

        self._confidence = confidence

        # create a session that will be used until our detector is set on fire by the gc
        self._session = tf.Session(graph=self._detection_graph)

    @property
    def confidence(self):
        return self._confidence

    @property
    def categories(self):
        return self._categories_public

    @confidence.setter
    def confidence(self, value):
        self._confidence = value

    def from_image(self, frame):
        # object recognition begins here
        height, width, z = frame.shape

        image_np_expanded = np.expand_dims(frame, axis=0)
        image_tensor = self._detection_graph.get_tensor_by_name('image_tensor:0')
        # Each box represents a part of the image where a particular object was detected.
        boxes = self._detection_graph.get_tensor_by_name('detection_boxes:0')
        # Each score represent how level of confidence for each of the objects.
        # Score is shown on the result image, together with the class label.
        scores = self._detection_graph.get_tensor_by_name('detection_scores:0')
        classes = self._detection_graph.get_tensor_by_name('detection_classes:0')
#        print classes
        num_detections = self._detection_graph.get_tensor_by_name('num_detections:0')

        # Actual detection
        boxes, scores, classes, num_detections = self._session.run([boxes, scores, classes, num_detections], feed_dict={image_tensor: image_np_expanded})

        # count how many scores are above the designated threshold
        worthy_detections = sum(score >= self._confidence for score in scores[0])
        # self._logger.debug('Found ' + str(worthy_detections) + ' objects')

        detected_objects = {}
        # analyze all worthy detections
        for x in range(worthy_detections):

            # capture the class of the detected object
            class_name = self._categories[int(classes[0][x])]

            # get the detection box around the object
            box_objects = boxes[0][x]

            # positions of the box are between 0 and 1, relative to the size of the image
            # we multiply them by the size of the image to get the box location in pixels
            ymin = int(box_objects[0] * height)
            xmin = int(box_objects[1] * width)
            ymax = int(box_objects[2] * height)
            xmax = int(box_objects[3] * width)
#            print ymin

            if class_name not in detected_objects:
                detected_objects[class_name] = []
#                print detected_objects

            detected_objects[class_name].append((ymin, xmin, ymax, xmax))

        # Visualization of the results of a detection.
        vis_util.visualize_boxes_and_labels_on_image_array(
            frame,
            np.squeeze(boxes),
            np.squeeze(classes).astype(np.int32),
            np.squeeze(scores),
            self._category_index,
            use_normalized_coordinates=True,
            line_thickness=8,
            min_score_thresh=self._confidence
        )

        return frame, detected_objects
def main():
#    SingleShotDetector(self.path_to_frozen_graph, self.path_to_labels)
    SingleShotDetector('frozen_inference_graph.pb', 'mscoco_label_map.pbtxt').from_camera(0)
if __name__ == '__main__':
    main() 

请帮助我或任何建议

先感谢您

标签: python-2.7opencvtensorflowreal-timeobject-detection

解决方案


推荐阅读