machine-learning - 任何替代方法可以创建线程并将面部检测器与手部检测器并行化?
问题描述
任何替代方法可以创建线程并将面部检测器与手部检测器并行化?
总之,我正在创建一个能够对个人进行面部识别的脚本,并且可以用手执行命令。我目前的问题是我想不出更好的方法来减少执行时间
这是一个基于 Murtaza 的 Workshop 脚本和修改的手部跟踪的 .py 文件,我在此处使用了一些功能。
import cv2
import mediapipe as mp
import time
import math
import numpy as np
class handDetector():
def __init__(self, mode=False, maxHands=2, detectionCon=0.5, trackCon=0.5):
self.mode = mode
self.maxHands = maxHands
self.detectionCon= detectionCon
self.trackCon = trackCon
self.mpHands = mp.solutions.hands
self.hands = self.mpHands.Hands(self.mode, self.maxHands,
self.detectionCon, self.trackCon)
self.mpDraw = mp.solutions.drawing_utils
self.tipIds = [4, 8, 12, 16, 20]
def findHands(self, img, draw=True):
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
self.results = self.hands.process(imgRGB)
if self.results.multi_hand_landmarks:
for handLms in self.results.multi_hand_landmarks:
if draw:
self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS)
return img
def findPosition(self, img, handNo=0, draw=True):
xList = []
yList = []
bbox = []
self.lmList = []
if self.results.multi_hand_landmarks:
mainHand = self.results.multi_hand_landmarks[handNo]
for id, lm in enumerate(mainHand.landmark):
h, w, c = img.shape
cx, cy = int(lm.x * w), int(lm.y * h)
xList.append(cx)
yList.append(cy)
self.lmList.append([id, cx, cy])
if draw:
cv2.circle(img, (cx, cy), 5, (255, 0, 255), cv2.FILLED)
xmin, xmax = min(xList), max(xList)
ymin, ymax = min(yList), max(yList)
bbox = xmin, ymin, xmax, ymax
if draw:
cv2.rectangle(img, (xmin - 20, ymin - 20), (xmax + 20, ymax + 20), (0, 255, 0), 2)
return self.lmList, bbox
def fingersUp(self):
fingers = []
if len(fingers) == 0:
return None
#Thumb
if self.lmList[self.tipIds[0]][1] > self.lmList[self.tipIds[0]-1][1]:
fingers.append(1)
else:
fingers.append(0)
#Fingers
for id in range(1,5):
if self.lmList[self.tipIds[id]][2] < self.lmList[self.tipIds[id]-2][2]:
fingers.append(1)
else:
fingers.append(0)
return fingers
def findDistance(self, p1, p2, img, draw=True, r=15, t=3):
x1, y1 = self.lmList[p1][1:]
x2, y2 = self.lmList[p2][1:]
cx, cy = (x1 + x2) // 2, (y1+y2) // 2
if draw:
cv2.line(img, (x1,y1), (x2, y2), (255, 0, 255), t)
cv2.circle(img, (x1,y1), r, (255, 0, 255), cv2.FILLED)
cv2.circle(img, (x2,y2), r, (255, 0, 255), cv2.FILLED)
cv2.circle(img, (cx, cy), r, (0, 0, 255), cv2.FILLED)
length = math.hypot(x2-x1, y2-y1)
return length, img, [x1, y1, x2, y2, cx, cy]
def main():
pTime = 0
cTime = 0
cap = cv2.VideoCapture(0)
detector = handDetector()
while True:
success, img = cap.read()
img = detector.findHands(img)
lmList, bbox = detector.findPosition(img)
if len(lmList) != 0:
print(lmList[4])
cTime = time.time()
fps = 1/(cTime-pTime)
pTime = cTime
cv2.putText(img, str(int(fps)), (10, 70), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 255), 3)
cv2.imshow("Image", img)
cv2.waitKey(1)
if __name__ == "__main__":
main()
这里是获取、训练和检测脚本
# the following script is written in python language, in which a script is created that calls the web camera of the ordered to detect faces and be able to recognice faces individually, check first if that face is in face_recog folder, if exist write a text said "hi, <user id>", if not exist create a folder and save images inside face_recog (Ex:"face_recog/samu/User.samu.1.jpg"). Cascade path is "haarcascades/", recognition faces path is "face_recog/" (Edited by Sama, thanks Codex)
import cv2
import os
cam = cv2.VideoCapture(0)
cam.set(3, 640) # set video width
cam.set(4, 480) # set video height
face_detector = cv2.CascadeClassifier('haarcascades/haarcascade_frontalface_default.xml')
# For each person, enter one numeric face id
face_id = input('\n enter user id end press <return> ==> ')
print("\n [INFO] Initializing face capture. Look the camera and wait ...")
# Initialize individual sampling face count
flag = False
for file in os.listdir("face_recog/"):
if file.endswith(".jpg"):
if file.startswith("User."+str(face_id)):
if not flag:
flag = True
count = int(file.split(".")[-2])
if flag and count < int(file.split(".")[-2]):
count = int(file.split(".")[-2])
if not flag:
count = 0
while(True):
ret, img = cam.read()
img = cv2.flip(img, 1) # flip video image vertically
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = face_detector.detectMultiScale(gray, 1.3, 5)
for (x,y,w,h) in faces:
cv2.rectangle(img, (x,y), (x+w,y+h), (255,0,0), 2)
count += 1
# Save the captured image into the datasets folder
cv2.imwrite("face_recog/User." + str(face_id) + '.' + str(count) + ".jpg", gray[y:y+h,x:x+w])
cv2.imshow('image', img)
k = cv2.waitKey(100) & 0xff # Press 'ESC' for exiting video
if k == 27:
break
elif count >= 300: # Take 30 face sample and stop video
break
# Do a bit of cleanup
print("\n [INFO] Exiting Program and cleanup stuff")
cam.release()
cv2.destroyAllWindows()
<!-- end snippet -->
#now we train the model to detect the percentage of accuracy of the next face that appears on the screen, if it is greater than 75%, then it is written under the square drawn "hello, "in green, but write "not recognized" in red.
import cv2
import numpy as np
from PIL import Image
import os
# Path for face image database
path = 'face_recog'
recognizer = cv2.face.LBPHFaceRecognizer_create()
detector = cv2.CascadeClassifier("haarcascades/haarcascade_frontalface_default.xml")
# function to get the images and label data
def getImagesAndLabels(path):
imagePaths = [os.path.join(path,f) for f in os.listdir(path)]
faceSamples=[]
ids = []
for imagePath in imagePaths:
PIL_img = Image.open(imagePath).convert('L') # convert it to grayscale
img_numpy = np.array(PIL_img,'uint8')
id = int(os.path.split(imagePath)[-1].split(".")[1])
faces = detector.detectMultiScale(img_numpy)
for (x,y,w,h) in faces:
faceSamples.append(img_numpy[y:y+h,x:x+w])
ids.append(id)
return faceSamples,ids
print ("\n [INFO] Training faces. It will take a few seconds. Wait ...")
faces,ids = getImagesAndLabels(path)
recognizer.train(faces, np.array(ids))
# Save the model into trainer/trainer.yml
recognizer.write('trainer/trainer.yml') # recognizer.save() worked on Mac, but not on Pi
# Print the numer of faces trained and end program
print("\n [INFO] {0} faces trained. Exiting Program".format(len(np.unique(ids))))
#now we use the recognizer to recognize the face, if the percentage of accuracy is greater than 75%, then it is written under the square drawn "hello, "in green, but write "not recognized" in red.
import cv2
import sys
import threading
import concurrent.futures
import pyttsx3
import HandTracking as htm
from time import sleep
recognizer = cv2.face.LBPHFaceRecognizer_create()
recognizer.read('trainer/trainer.yml')
cascadePath = "haarcascades/haarcascade_frontalface_default.xml"
faceCascade = cv2.CascadeClassifier(cascadePath)
font = cv2.FONT_HERSHEY_SIMPLEX
#For hands
detector = htm.handDetector(maxHands=4)
# iniciate id counter
id = 0
# iniciate timer
timeFlip = 0
# iniciate string
stringText = ""
# names related to ids: example ==> Marcelo: id=1, etc
names = ['None', 'Sama', 'Codex', 'Davinci', 'Debora', 'Ian', '...']
# Initialize and start realtime video capture
cam = cv2.VideoCapture(0)
cam.set(3, 640) # set video widht
cam.set(4, 480) # set video height
# Define min window size to be recognized as a face
minW = 0.1 * cam.get(3)
minH = 0.1 * cam.get(4)
# this function is used to convert the given string to speech
def text_to_speech(text):
engine = pyttsx3.init()
engine.setProperty('rate', 220)
engine.say(text)
engine.runAndWait()
del engine
def typing(text):
for char in text:
sleep(0.04)
sys.stdout.write(char)
sys.stdout.flush()
def parallel(text):
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
future_tasks = {executor.submit(text_to_speech, text), executor.submit(typing, text)}
for future in concurrent.futures.as_completed(future_tasks):
try:
data = future.result()
except Exception as e:
print(e)
def multiThread_with_TTS(text):
threading.Thread(
target=parallel, args=(text,), daemon=True
).start()
def multiThread_with_HANDS(img):
threading.Thread(
target=detector.findHands, args=(img,), daemon=True
).start()
# lmList, bbox = detector.findPosition(img)
threading.Thread(
target=detector.findPosition, args=(img,), daemon=True
).start()
if detector.fingersUp() is not None:
print(detector.fingersUp())
while True:
ret, img = cam.read()
imgH = detector.findHands(img)
lmList, bbox = detector.findPosition(imgH)
if detector.fingersUp() is not None:
print(detector.fingersUp())
#multiThread_with_HANDS(img)
img = cv2.flip(img, 1) # Flip vertically
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = faceCascade.detectMultiScale(
gray,
scaleFactor=1.2,
minNeighbors=5,
minSize=(int(minW), int(minH)),
)
for (x, y, w, h) in faces:
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
id, confidence = recognizer.predict(gray[y:y + h, x:x + w])
# Check if confidence is less them 100 ==> "0" is perfect match
if confidence < 100:
id = names[id]
confidence = " {0}%".format(round(100 - confidence))
if timeFlip == 0:
timeFlip = 1
if stringText != id:
stringText = id
multiThread_with_TTS(" Hi " + id)
#parallel(" Hi " + id)
else:
timeFlip += 1
if timeFlip > 35:
timeFlip = 0
else:
id = "unknown"
confidence = " {0}%".format(round(100 - confidence))
cv2.putText(img, str(id), (x + 5, y - 5), font, 1, (255, 255, 255), 2)
cv2.putText(img, str(confidence), (x + 5, y + h - 5), font, 1, (255, 255, 0), 1)
cv2.imshow('camera', img)
k = cv2.waitKey(10) & 0xff # Press 'ESC' for exiting video
if k == 27:
parallel(" Bye " + id + ", see you later!")
break
if k == 32 and stringText != id:
multiThread_with_TTS(" You are " + id+", hi!")
#parallel(" You are " + id+", hi!")
if k == 32 and stringText == id:
multiThread_with_TTS(" You are " + id + ", hi again!")
#parallel(" You are " + id + ", hi again!")
# Do a bit of cleanup
print("\n [INFO] Exiting Program and cleanup stuff")
cam.release()
cv2.destroyAllWindows()
问题出在最后一个脚本中,在里面,出现了对线程的调用,当执行它时,对下一个线程的调用出现并与前一个重叠,如果我按顺序执行,则帧速率下降
def multiThread_with_HANDS(img):
threading.Thread(
target=detector.findHands, args=(img,), daemon=True
).start()
# lmList, bbox = detector.findPosition(img)
threading.Thread(
target=detector.findPosition, args=(img,), daemon=True
).start()
if detector.fingersUp() is not None:
print(detector.fingersUp())
有没有人想到如何在不大幅降低帧速率的情况下解决问题?
解决方案
推荐阅读
- c# - C#删除边界内的字符串
- python - Python CSV 错误:行包含 NULL 字节,但在文件中找不到 NULL 字节
- ios - 在 Swift 中,从 App Extension 目标运行时如何忽略部分代码?
- apache - Apache Intermittant Hang 是网络延迟吗?
- c# - 如何为自定义用户控件提供数据触发器属性?
- qnamaker - 我的租户在 QnA 制造商中显示为红色文本是否有原因?
- postgresql - PostgreSQL - 声明一个可以存储多个值的变量
- c++ - 简单单向表的最大排序
- java - 主/细节(活动/片段)布局,从Android移植到代号一
- c++ - 打印用 void 指针加载的 char 数组的值