#machine-learning #computer-vision #python-multithreading #face-recognition
Вопрос:
Есть ли альтернатива созданию потоков и распараллеливанию детектора лиц с детектором рук?
Таким образом, я создаю скрипт, способный распознавать лица человека и с помощью команды рукой он может выполнять команды. Моя текущая проблема заключается в том, что я не могу придумать лучшего способа сократить время выполнения
Это .py, который создает отслеживание рук на основе и модифицированный сценарием мастерской Муртазы, я беру здесь некоторые функции.
import cv2
import mediapipe as mp
import time
import math
import numpy as np
class handDetector():
def __init__(self, mode=False, maxHands=2, detectionCon=0.5, trackCon=0.5):
self.mode = mode
self.maxHands = maxHands
self.detectionCon= detectionCon
self.trackCon = trackCon
self.mpHands = mp.solutions.hands
self.hands = self.mpHands.Hands(self.mode, self.maxHands,
self.detectionCon, self.trackCon)
self.mpDraw = mp.solutions.drawing_utils
self.tipIds = [4, 8, 12, 16, 20]
def findHands(self, img, draw=True):
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
self.results = self.hands.process(imgRGB)
if self.results.multi_hand_landmarks:
for handLms in self.results.multi_hand_landmarks:
if draw:
self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS)
return img
def findPosition(self, img, handNo=0, draw=True):
xList = []
yList = []
bbox = []
self.lmList = []
if self.results.multi_hand_landmarks:
mainHand = self.results.multi_hand_landmarks[handNo]
for id, lm in enumerate(mainHand.landmark):
h, w, c = img.shape
cx, cy = int(lm.x * w), int(lm.y * h)
xList.append(cx)
yList.append(cy)
self.lmList.append([id, cx, cy])
if draw:
cv2.circle(img, (cx, cy), 5, (255, 0, 255), cv2.FILLED)
xmin, xmax = min(xList), max(xList)
ymin, ymax = min(yList), max(yList)
bbox = xmin, ymin, xmax, ymax
if draw:
cv2.rectangle(img, (xmin - 20, ymin - 20), (xmax 20, ymax 20), (0, 255, 0), 2)
return self.lmList, bbox
def fingersUp(self):
fingers = []
if len(fingers) == 0:
return None
#Thumb
if self.lmList[self.tipIds[0]][1] > self.lmList[self.tipIds[0]-1][1]:
fingers.append(1)
else:
fingers.append(0)
#Fingers
for id in range(1,5):
if self.lmList[self.tipIds[id]][2] < self.lmList[self.tipIds[id]-2][2]:
fingers.append(1)
else:
fingers.append(0)
return fingers
def findDistance(self, p1, p2, img, draw=True, r=15, t=3):
x1, y1 = self.lmList[p1][1:]
x2, y2 = self.lmList[p2][1:]
cx, cy = (x1 x2) // 2, (y1 y2) // 2
if draw:
cv2.line(img, (x1,y1), (x2, y2), (255, 0, 255), t)
cv2.circle(img, (x1,y1), r, (255, 0, 255), cv2.FILLED)
cv2.circle(img, (x2,y2), r, (255, 0, 255), cv2.FILLED)
cv2.circle(img, (cx, cy), r, (0, 0, 255), cv2.FILLED)
length = math.hypot(x2-x1, y2-y1)
return length, img, [x1, y1, x2, y2, cx, cy]
def main():
pTime = 0
cTime = 0
cap = cv2.VideoCapture(0)
detector = handDetector()
while True:
success, img = cap.read()
img = detector.findHands(img)
lmList, bbox = detector.findPosition(img)
if len(lmList) != 0:
print(lmList[4])
cTime = time.time()
fps = 1/(cTime-pTime)
pTime = cTime
cv2.putText(img, str(int(fps)), (10, 70), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 255), 3)
cv2.imshow("Image", img)
cv2.waitKey(1)
if __name__ == "__main__":
main()
а вот сценарий получения, обучения и обнаружения
# the following script is written in python language, in which a script is created that calls the web camera of the ordered to detect faces and be able to recognice faces individually, check first if that face is in face_recog folder, if exist write a text said "hi, <user id>", if not exist create a folder and save images inside face_recog (Ex:"face_recog/samu/User.samu.1.jpg"). Cascade path is "haarcascades/", recognition faces path is "face_recog/" (Edited by Sama, thanks Codex) import cv2 import os cam = cv2.VideoCapture(0) cam.set(3, 640) # set video width cam.set(4, 480) # set video height face_detector = cv2.CascadeClassifier('haarcascades/haarcascade_frontalface_default.xml') # For each person, enter one numeric face id face_id = input('n enter user id end press <return> ==> ') print("n [INFO] Initializing face capture. Look the camera and wait ...") # Initialize individual sampling face count flag = False for file in os.listdir("face_recog/"): if file.endswith(".jpg"): if file.startswith("User." str(face_id)): if not flag: flag = True count = int(file.split(".")[-2]) if flag and count < int(file.split(".")[-2]): count = int(file.split(".")[-2]) if not flag: count = 0 while(True): ret, img = cam.read() img = cv2.flip(img, 1) # flip video image vertically gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) faces = face_detector.detectMultiScale(gray, 1.3, 5) for (x,y,w,h) in faces: cv2.rectangle(img, (x,y), (x w,y h), (255,0,0), 2) count = 1 # Save the captured image into the datasets folder cv2.imwrite("face_recog/User." str(face_id) '.' str(count) ".jpg", gray[y:y h,x:x w]) cv2.imshow('image', img) k = cv2.waitKey(100) amp; 0xff # Press 'ESC' for exiting video if k == 27: break elif count >= 300: # Take 30 face sample and stop video break # Do a bit of cleanup print("n [INFO] Exiting Program and cleanup stuff") cam.release() cv2.destroyAllWindows() <!-- end snippet -->
#now we train the model to detect the percentage of accuracy of the next face that appears on the screen, if it is greater than 75%, then it is written under the square drawn "hello, "in green, but write "not recognized" in red. import cv2 import numpy as np from PIL import Image import os # Path for face image database path = 'face_recog' recognizer = cv2.face.LBPHFaceRecognizer_create() detector = cv2.CascadeClassifier("haarcascades/haarcascade_frontalface_default.xml") # function to get the images and label data def getImagesAndLabels(path): imagePaths = [os.path.join(path,f) for f in os.listdir(path)] faceSamples=[] ids = [] for imagePath in imagePaths: PIL_img = Image.open(imagePath).convert('L') # convert it to grayscale img_numpy = np.array(PIL_img,'uint8') id = int(os.path.split(imagePath)[-1].split(".")[1]) faces = detector.detectMultiScale(img_numpy) for (x,y,w,h) in faces: faceSamples.append(img_numpy[y:y h,x:x w]) ids.append(id) return faceSamples,ids print ("n [INFO] Training faces. It will take a few seconds. Wait ...") faces,ids = getImagesAndLabels(path) recognizer.train(faces, np.array(ids)) # Save the model into trainer/trainer.yml recognizer.write('trainer/trainer.yml') # recognizer.save() worked on Mac, but not on Pi # Print the numer of faces trained and end program print("n [INFO] {0} faces trained. Exiting Program".format(len(np.unique(ids)))) #now we use the recognizer to recognize the face, if the percentage of accuracy is greater than 75%, then it is written under the square drawn "hello, "in green, but write "not recognized" in red. import cv2 import sys import threading import concurrent.futures import pyttsx3 import HandTracking as htm from time import sleep recognizer = cv2.face.LBPHFaceRecognizer_create() recognizer.read('trainer/trainer.yml') cascadePath = "haarcascades/haarcascade_frontalface_default.xml" faceCascade = cv2.CascadeClassifier(cascadePath) font = cv2.FONT_HERSHEY_SIMPLEX #For hands detector = htm.handDetector(maxHands=4) # iniciate id counter id = 0 # iniciate timer timeFlip = 0 # iniciate string stringText = "" # names related to ids: example ==> Marcelo: id=1, etc names = ['None', 'Sama', 'Codex', 'Davinci', 'Debora', 'Ian', '...'] # Initialize and start realtime video capture cam = cv2.VideoCapture(0) cam.set(3, 640) # set video widht cam.set(4, 480) # set video height # Define min window size to be recognized as a face minW = 0.1 * cam.get(3) minH = 0.1 * cam.get(4) # this function is used to convert the given string to speech def text_to_speech(text): engine = pyttsx3.init() engine.setProperty('rate', 220) engine.say(text) engine.runAndWait() del engine def typing(text): for char in text: sleep(0.04) sys.stdout.write(char) sys.stdout.flush() def parallel(text): with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: future_tasks = {executor.submit(text_to_speech, text), executor.submit(typing, text)} for future in concurrent.futures.as_completed(future_tasks): try: data = future.result() except Exception as e: print(e) def multiThread_with_TTS(text): threading.Thread( target=parallel, args=(text,), daemon=True ).start() def multiThread_with_HANDS(img): threading.Thread( target=detector.findHands, args=(img,), daemon=True ).start() # lmList, bbox = detector.findPosition(img) threading.Thread( target=detector.findPosition, args=(img,), daemon=True ).start() if detector.fingersUp() is not None: print(detector.fingersUp()) while True: ret, img = cam.read() imgH = detector.findHands(img) lmList, bbox = detector.findPosition(imgH) if detector.fingersUp() is not None: print(detector.fingersUp()) #multiThread_with_HANDS(img) img = cv2.flip(img, 1) # Flip vertically gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) faces = faceCascade.detectMultiScale( gray, scaleFactor=1.2, minNeighbors=5, minSize=(int(minW), int(minH)), ) for (x, y, w, h) in faces: cv2.rectangle(img, (x, y), (x w, y h), (0, 255, 0), 2) id, confidence = recognizer.predict(gray[y:y h, x:x w]) # Check if confidence is less them 100 ==> "0" is perfect match if confidence < 100: id = names[id] confidence = " {0}%".format(round(100 - confidence)) if timeFlip == 0: timeFlip = 1 if stringText != id: stringText = id multiThread_with_TTS(" Hi " id) #parallel(" Hi " id) else: timeFlip = 1 if timeFlip > 35: timeFlip = 0 else: id = "unknown" confidence = " {0}%".format(round(100 - confidence)) cv2.putText(img, str(id), (x 5, y - 5), font, 1, (255, 255, 255), 2) cv2.putText(img, str(confidence), (x 5, y h - 5), font, 1, (255, 255, 0), 1) cv2.imshow('camera', img) k = cv2.waitKey(10) amp; 0xff # Press 'ESC' for exiting video if k == 27: parallel(" Bye " id ", see you later!") break if k == 32 and stringText != id: multiThread_with_TTS(" You are " id ", hi!") #parallel(" You are " id ", hi!") if k == 32 and stringText == id: multiThread_with_TTS(" You are " id ", hi again!") #parallel(" You are " id ", hi again!") # Do a bit of cleanup print("n [INFO] Exiting Program and cleanup stuff") cam.release() cv2.destroyAllWindows()
Проблема в том, что в последнем скрипте внутри появляется вызов потока, и при его выполнении появляется вызов следующего и перекрывает предыдущий, если я делаю это последовательно, частота кадров падает
def multiThread_with_HANDS(img): threading.Thread( target=detector.findHands, args=(img,), daemon=True ).start() # lmList, bbox = detector.findPosition(img) threading.Thread( target=detector.findPosition, args=(img,), daemon=True ).start() if detector.fingersUp() is not None: print(detector.fingersUp())
Кто-нибудь думает о том, как решить проблему, не резко снижая частоту кадров?
Комментарии:
1. Вы сравниваете лицо в камере со списком лиц? У меня есть одно предложение: вместо того, чтобы проверять каждое лицо в базе данных по отдельности, проверяйте их параллельно. т. е. вместо: для лиц в лицах: если (лицо == изображение камеры): doThis() сделайте что-то вроде faces.ParallelMap(лямбда x: если (x == изображение камеры): doThis)