首页 > 解决方案 > 任何替代方法可以创建线程并将面部检测器与手部检测器并行化?

问题描述

任何替代方法可以创建线程并将面部检测器与手部检测器并行化?

总之,我正在创建一个能够对个人进行面部识别的脚本,并且可以用手执行命令。我目前的问题是我想不出更好的方法来减少执行时间

这是一个基于 Murtaza 的 Workshop 脚本和修改的手部跟踪的 .py 文件,我在此处使用了一些功能。

import cv2
import mediapipe as mp
import time
import math
import numpy as np

class handDetector():
    def __init__(self, mode=False, maxHands=2, detectionCon=0.5, trackCon=0.5):
        self.mode = mode
        self.maxHands = maxHands
        self.detectionCon= detectionCon
        self.trackCon = trackCon

        self.mpHands = mp.solutions.hands
        self.hands = self.mpHands.Hands(self.mode, self.maxHands,
                                        self.detectionCon, self.trackCon)
        self.mpDraw = mp.solutions.drawing_utils
        self.tipIds = [4, 8, 12, 16, 20]

    def findHands(self, img, draw=True):
        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.results = self.hands.process(imgRGB)

        if self.results.multi_hand_landmarks:
            for handLms in self.results.multi_hand_landmarks:
                if draw:
                    self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS)
        return img

    def findPosition(self, img, handNo=0, draw=True):
        xList = []
        yList = []
        bbox = []
        self.lmList = []
        if self.results.multi_hand_landmarks:
            mainHand = self.results.multi_hand_landmarks[handNo]
            for id, lm in enumerate(mainHand.landmark):
                h, w, c = img.shape
                cx, cy = int(lm.x * w), int(lm.y * h)
                xList.append(cx)
                yList.append(cy)
                self.lmList.append([id, cx, cy])
                if draw:
                    cv2.circle(img, (cx, cy), 5, (255, 0, 255), cv2.FILLED)

            xmin, xmax = min(xList), max(xList)
            ymin, ymax = min(yList), max(yList)
            bbox = xmin, ymin, xmax, ymax
            if draw:
                cv2.rectangle(img, (xmin - 20, ymin - 20), (xmax + 20, ymax + 20), (0, 255, 0), 2)

        return self.lmList, bbox

    def fingersUp(self):
        fingers = []
        if len(fingers) == 0:
            return None
        #Thumb
        if self.lmList[self.tipIds[0]][1] > self.lmList[self.tipIds[0]-1][1]:
            fingers.append(1)
        else:
            fingers.append(0)

        #Fingers
        for id in range(1,5):
            if self.lmList[self.tipIds[id]][2] < self.lmList[self.tipIds[id]-2][2]:
                fingers.append(1)
            else:
                fingers.append(0)

        return fingers

    def findDistance(self, p1, p2, img, draw=True, r=15, t=3):
        x1, y1 = self.lmList[p1][1:]
        x2, y2 = self.lmList[p2][1:]
        cx, cy = (x1 + x2) // 2, (y1+y2) // 2

        if draw:
            cv2.line(img, (x1,y1), (x2, y2), (255, 0, 255), t)
            cv2.circle(img, (x1,y1), r, (255, 0, 255), cv2.FILLED)
            cv2.circle(img, (x2,y2), r, (255, 0, 255), cv2.FILLED)
            cv2.circle(img, (cx, cy), r, (0, 0, 255), cv2.FILLED)
        length = math.hypot(x2-x1, y2-y1)
        return length, img, [x1, y1, x2, y2, cx, cy]

def main():
    pTime = 0
    cTime = 0
    cap = cv2.VideoCapture(0)
    detector = handDetector()
    while True:
        success, img = cap.read()
        img = detector.findHands(img)
        lmList, bbox = detector.findPosition(img)
        if len(lmList) != 0:
            print(lmList[4])

        cTime = time.time()
        fps = 1/(cTime-pTime)
        pTime = cTime

        cv2.putText(img, str(int(fps)), (10, 70), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 255), 3)

        cv2.imshow("Image", img)
        cv2.waitKey(1)

if __name__ == "__main__":
    main()

这里是获取、训练和检测脚本

# the following script is written in python language, in which a script is created that calls the web camera of the ordered to detect faces and be able to recognice faces individually, check first if that face is in face_recog folder, if exist write a text said "hi, <user id>", if not exist create a folder and save images inside face_recog (Ex:"face_recog/samu/User.samu.1.jpg"). Cascade path is "haarcascades/", recognition faces path is  "face_recog/" (Edited by Sama, thanks Codex)

import cv2
import os

cam = cv2.VideoCapture(0)
cam.set(3, 640) # set video width
cam.set(4, 480) # set video height

face_detector = cv2.CascadeClassifier('haarcascades/haarcascade_frontalface_default.xml')

# For each person, enter one numeric face id
face_id = input('\n enter user id end press <return> ==>  ')

print("\n [INFO] Initializing face capture. Look the camera and wait ...")
# Initialize individual sampling face count
flag = False
for file in os.listdir("face_recog/"):
            if file.endswith(".jpg"):
                if file.startswith("User."+str(face_id)):
                    if not flag:
                        flag = True
                        count = int(file.split(".")[-2])
                    if flag and count < int(file.split(".")[-2]):
                        count = int(file.split(".")[-2])
if not flag:
    count = 0

while(True):

    ret, img = cam.read()
    img = cv2.flip(img, 1) # flip video image vertically
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    faces = face_detector.detectMultiScale(gray, 1.3, 5)

    for (x,y,w,h) in faces:

        cv2.rectangle(img, (x,y), (x+w,y+h), (255,0,0), 2)
        count += 1

        # Save the captured image into the datasets folder
        cv2.imwrite("face_recog/User." + str(face_id) + '.' + str(count) + ".jpg", gray[y:y+h,x:x+w])

        cv2.imshow('image', img)

    k = cv2.waitKey(100) & 0xff # Press 'ESC' for exiting video
    if k == 27:
        break
    elif count >= 300: # Take 30 face sample and stop video
         break

# Do a bit of cleanup
print("\n [INFO] Exiting Program and cleanup stuff")
cam.release()
cv2.destroyAllWindows()
<!-- end snippet -->

#now we train the model to detect the percentage of accuracy of the next face that appears on the screen, if it is greater than 75%, then it is written under the square drawn "hello, "in green, but write "not recognized" in red.

import cv2
import numpy as np
from PIL import Image
import os

# Path for face image database
path = 'face_recog'

recognizer = cv2.face.LBPHFaceRecognizer_create()
detector = cv2.CascadeClassifier("haarcascades/haarcascade_frontalface_default.xml")

# function to get the images and label data
def getImagesAndLabels(path):

    imagePaths = [os.path.join(path,f) for f in os.listdir(path)]
    faceSamples=[]
    ids = []

    for imagePath in imagePaths:

        PIL_img = Image.open(imagePath).convert('L') # convert it to grayscale
        img_numpy = np.array(PIL_img,'uint8')

        id = int(os.path.split(imagePath)[-1].split(".")[1])
        faces = detector.detectMultiScale(img_numpy)

        for (x,y,w,h) in faces:
            faceSamples.append(img_numpy[y:y+h,x:x+w])
            ids.append(id)

    return faceSamples,ids

print ("\n [INFO] Training faces. It will take a few seconds. Wait ...")
faces,ids = getImagesAndLabels(path)
recognizer.train(faces, np.array(ids))

# Save the model into trainer/trainer.yml
recognizer.write('trainer/trainer.yml') # recognizer.save() worked on Mac, but not on Pi

# Print the numer of faces trained and end program
print("\n [INFO] {0} faces trained. Exiting Program".format(len(np.unique(ids))))

#now we use the recognizer to recognize the face, if the percentage of accuracy is greater than 75%, then it is written under the square drawn "hello, "in green, but write "not recognized" in red.
import cv2
import sys
import threading
import concurrent.futures
import pyttsx3
import HandTracking as htm
from time import sleep

recognizer = cv2.face.LBPHFaceRecognizer_create()
recognizer.read('trainer/trainer.yml')
cascadePath = "haarcascades/haarcascade_frontalface_default.xml"
faceCascade = cv2.CascadeClassifier(cascadePath)

font = cv2.FONT_HERSHEY_SIMPLEX

#For hands
detector = htm.handDetector(maxHands=4)

# iniciate id counter
id = 0
# iniciate timer
timeFlip = 0
# iniciate string
stringText = ""
# names related to ids: example ==> Marcelo: id=1,  etc
names = ['None', 'Sama', 'Codex', 'Davinci', 'Debora', 'Ian', '...']

# Initialize and start realtime video capture
cam = cv2.VideoCapture(0)
cam.set(3, 640)  # set video widht
cam.set(4, 480)  # set video height

# Define min window size to be recognized as a face
minW = 0.1 * cam.get(3)
minH = 0.1 * cam.get(4)
# this function is used to convert the given string to speech


def text_to_speech(text):
    engine = pyttsx3.init()
    engine.setProperty('rate', 220)
    engine.say(text)
    engine.runAndWait()
    del engine


def typing(text):
    for char in text:
        sleep(0.04)
        sys.stdout.write(char)
        sys.stdout.flush()


def parallel(text):
    with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
        future_tasks = {executor.submit(text_to_speech, text), executor.submit(typing, text)}
        for future in concurrent.futures.as_completed(future_tasks):
            try:
                data = future.result()
            except Exception as e:
                print(e)


def multiThread_with_TTS(text):
    threading.Thread(
        target=parallel, args=(text,), daemon=True
    ).start()


def multiThread_with_HANDS(img):
    threading.Thread(
        target=detector.findHands, args=(img,), daemon=True
    ).start()
    # lmList, bbox = detector.findPosition(img)
    threading.Thread(
        target=detector.findPosition, args=(img,), daemon=True
    ).start()
    if detector.fingersUp() is not None:
        print(detector.fingersUp())


while True:
    ret, img = cam.read()
    imgH = detector.findHands(img)
    lmList, bbox = detector.findPosition(imgH)
    if detector.fingersUp() is not None:
        print(detector.fingersUp())
    #multiThread_with_HANDS(img)
    img = cv2.flip(img, 1)  # Flip vertically
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    faces = faceCascade.detectMultiScale(
        gray,
        scaleFactor=1.2,
        minNeighbors=5,
        minSize=(int(minW), int(minH)),
    )
    for (x, y, w, h) in faces:
        cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
        id, confidence = recognizer.predict(gray[y:y + h, x:x + w])
        # Check if confidence is less them 100 ==> "0" is perfect match
        if confidence < 100:
            id = names[id]
            confidence = "  {0}%".format(round(100 - confidence))
            if timeFlip == 0:
                timeFlip = 1
                if stringText != id:
                    stringText = id
                    multiThread_with_TTS(" Hi " + id)
                    #parallel(" Hi " + id)
            else:
                timeFlip += 1
                if timeFlip > 35:
                    timeFlip = 0
        else:
            id = "unknown"
            confidence = "  {0}%".format(round(100 - confidence))

        cv2.putText(img, str(id), (x + 5, y - 5), font, 1, (255, 255, 255), 2)
        cv2.putText(img, str(confidence), (x + 5, y + h - 5), font, 1, (255, 255, 0), 1)

    cv2.imshow('camera', img)

    k = cv2.waitKey(10) & 0xff  # Press 'ESC' for exiting video
    if k == 27:
        parallel(" Bye " + id + ", see you later!")
        break
    if k == 32 and stringText != id:
        multiThread_with_TTS(" You are " + id+", hi!")
        #parallel(" You are " + id+", hi!")
    if k == 32 and stringText == id:
        multiThread_with_TTS(" You are " + id + ", hi again!")
        #parallel(" You are " + id + ", hi again!")

# Do a bit of cleanup
print("\n [INFO] Exiting Program and cleanup stuff")
cam.release()

cv2.destroyAllWindows()

问题出在最后一个脚本中,在里面,出现了对线程的调用,当执行它时,对下一个线程的调用出现并与前一个重叠,如果我按顺序执行,则帧速率下降

def multiThread_with_HANDS(img):
    threading.Thread(
        target=detector.findHands, args=(img,), daemon=True
    ).start()
    # lmList, bbox = detector.findPosition(img)
    threading.Thread(
        target=detector.findPosition, args=(img,), daemon=True
    ).start()
    if detector.fingersUp() is not None:
        print(detector.fingersUp())

有没有人想到如何在不大幅降低帧速率的情况下解决问题?

标签: machine-learningcomputer-visionpython-multithreadingface-recognition

解决方案


推荐阅读