Есть ли альтернатива созданию потоков и распараллеливанию детектора лиц с детектором рук?

#machine-learning #computer-vision #python-multithreading #face-recognition

Вопрос:

Есть ли альтернатива созданию потоков и распараллеливанию детектора лиц с детектором рук?

Таким образом, я создаю скрипт, способный распознавать лица человека и с помощью команды рукой он может выполнять команды. Моя текущая проблема заключается в том, что я не могу придумать лучшего способа сократить время выполнения

Это .py, который создает отслеживание рук на основе и модифицированный сценарием мастерской Муртазы, я беру здесь некоторые функции.

 import cv2
import mediapipe as mp
import time
import math
import numpy as np

class handDetector():
    def __init__(self, mode=False, maxHands=2, detectionCon=0.5, trackCon=0.5):
        self.mode = mode
        self.maxHands = maxHands
        self.detectionCon= detectionCon
        self.trackCon = trackCon

        self.mpHands = mp.solutions.hands
        self.hands = self.mpHands.Hands(self.mode, self.maxHands,
                                        self.detectionCon, self.trackCon)
        self.mpDraw = mp.solutions.drawing_utils
        self.tipIds = [4, 8, 12, 16, 20]

    def findHands(self, img, draw=True):
        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.results = self.hands.process(imgRGB)

        if self.results.multi_hand_landmarks:
            for handLms in self.results.multi_hand_landmarks:
                if draw:
                    self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS)
        return img

    def findPosition(self, img, handNo=0, draw=True):
        xList = []
        yList = []
        bbox = []
        self.lmList = []
        if self.results.multi_hand_landmarks:
            mainHand = self.results.multi_hand_landmarks[handNo]
            for id, lm in enumerate(mainHand.landmark):
                h, w, c = img.shape
                cx, cy = int(lm.x * w), int(lm.y * h)
                xList.append(cx)
                yList.append(cy)
                self.lmList.append([id, cx, cy])
                if draw:
                    cv2.circle(img, (cx, cy), 5, (255, 0, 255), cv2.FILLED)

            xmin, xmax = min(xList), max(xList)
            ymin, ymax = min(yList), max(yList)
            bbox = xmin, ymin, xmax, ymax
            if draw:
                cv2.rectangle(img, (xmin - 20, ymin - 20), (xmax   20, ymax   20), (0, 255, 0), 2)

        return self.lmList, bbox

    def fingersUp(self):
        fingers = []
        if len(fingers) == 0:
            return None
        #Thumb
        if self.lmList[self.tipIds[0]][1] > self.lmList[self.tipIds[0]-1][1]:
            fingers.append(1)
        else:
            fingers.append(0)

        #Fingers
        for id in range(1,5):
            if self.lmList[self.tipIds[id]][2] < self.lmList[self.tipIds[id]-2][2]:
                fingers.append(1)
            else:
                fingers.append(0)

        return fingers

    def findDistance(self, p1, p2, img, draw=True, r=15, t=3):
        x1, y1 = self.lmList[p1][1:]
        x2, y2 = self.lmList[p2][1:]
        cx, cy = (x1   x2) // 2, (y1 y2) // 2

        if draw:
            cv2.line(img, (x1,y1), (x2, y2), (255, 0, 255), t)
            cv2.circle(img, (x1,y1), r, (255, 0, 255), cv2.FILLED)
            cv2.circle(img, (x2,y2), r, (255, 0, 255), cv2.FILLED)
            cv2.circle(img, (cx, cy), r, (0, 0, 255), cv2.FILLED)
        length = math.hypot(x2-x1, y2-y1)
        return length, img, [x1, y1, x2, y2, cx, cy]

def main():
    pTime = 0
    cTime = 0
    cap = cv2.VideoCapture(0)
    detector = handDetector()
    while True:
        success, img = cap.read()
        img = detector.findHands(img)
        lmList, bbox = detector.findPosition(img)
        if len(lmList) != 0:
            print(lmList[4])

        cTime = time.time()
        fps = 1/(cTime-pTime)
        pTime = cTime

        cv2.putText(img, str(int(fps)), (10, 70), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 255), 3)

        cv2.imshow("Image", img)
        cv2.waitKey(1)

if __name__ == "__main__":
    main()
 

а вот сценарий получения, обучения и обнаружения

 # the following script is written in python language, in which a script is created that calls the web camera of the ordered to detect faces and be able to recognice faces individually, check first if that face is in face_recog folder, if exist write a text said "hi, <user id>", if not exist create a folder and save images inside face_recog (Ex:"face_recog/samu/User.samu.1.jpg"). Cascade path is "haarcascades/", recognition faces path is  "face_recog/" (Edited by Sama, thanks Codex)

import cv2
import os

cam = cv2.VideoCapture(0)
cam.set(3, 640) # set video width
cam.set(4, 480) # set video height

face_detector = cv2.CascadeClassifier('haarcascades/haarcascade_frontalface_default.xml')

# For each person, enter one numeric face id
face_id = input('n enter user id end press <return> ==>  ')

print("n [INFO] Initializing face capture. Look the camera and wait ...")
# Initialize individual sampling face count
flag = False
for file in os.listdir("face_recog/"):
            if file.endswith(".jpg"):
                if file.startswith("User." str(face_id)):
                    if not flag:
                        flag = True
                        count = int(file.split(".")[-2])
                    if flag and count < int(file.split(".")[-2]):
                        count = int(file.split(".")[-2])
if not flag:
    count = 0

while(True):

    ret, img = cam.read()
    img = cv2.flip(img, 1) # flip video image vertically
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    faces = face_detector.detectMultiScale(gray, 1.3, 5)

    for (x,y,w,h) in faces:

        cv2.rectangle(img, (x,y), (x w,y h), (255,0,0), 2)
        count  = 1

        # Save the captured image into the datasets folder
        cv2.imwrite("face_recog/User."   str(face_id)   '.'   str(count)   ".jpg", gray[y:y h,x:x w])

        cv2.imshow('image', img)

    k = cv2.waitKey(100) amp; 0xff # Press 'ESC' for exiting video
    if k == 27:
        break
    elif count >= 300: # Take 30 face sample and stop video
         break

# Do a bit of cleanup
print("n [INFO] Exiting Program and cleanup stuff")
cam.release()
cv2.destroyAllWindows()
<!-- end snippet -->


 #now we train the model to detect the percentage of accuracy of the next face that appears on the screen, if it is greater than 75%, then it is written under the square drawn "hello, "in green, but write "not recognized" in red.

import cv2
import numpy as np
from PIL import Image
import os

# Path for face image database
path = 'face_recog'

recognizer = cv2.face.LBPHFaceRecognizer_create()
detector = cv2.CascadeClassifier("haarcascades/haarcascade_frontalface_default.xml")

# function to get the images and label data
def getImagesAndLabels(path):

    imagePaths = [os.path.join(path,f) for f in os.listdir(path)]
    faceSamples=[]
    ids = []

    for imagePath in imagePaths:

        PIL_img = Image.open(imagePath).convert('L') # convert it to grayscale
        img_numpy = np.array(PIL_img,'uint8')

        id = int(os.path.split(imagePath)[-1].split(".")[1])
        faces = detector.detectMultiScale(img_numpy)

        for (x,y,w,h) in faces:
            faceSamples.append(img_numpy[y:y h,x:x w])
            ids.append(id)

    return faceSamples,ids

print ("n [INFO] Training faces. It will take a few seconds. Wait ...")
faces,ids = getImagesAndLabels(path)
recognizer.train(faces, np.array(ids))

# Save the model into trainer/trainer.yml
recognizer.write('trainer/trainer.yml') # recognizer.save() worked on Mac, but not on Pi

# Print the numer of faces trained and end program
print("n [INFO] {0} faces trained. Exiting Program".format(len(np.unique(ids))))

#now we use the recognizer to recognize the face, if the percentage of accuracy is greater than 75%, then it is written under the square drawn "hello, "in green, but write "not recognized" in red.
import cv2
import sys
import threading
import concurrent.futures
import pyttsx3
import HandTracking as htm
from time import sleep

recognizer = cv2.face.LBPHFaceRecognizer_create()
recognizer.read('trainer/trainer.yml')
cascadePath = "haarcascades/haarcascade_frontalface_default.xml"
faceCascade = cv2.CascadeClassifier(cascadePath)

font = cv2.FONT_HERSHEY_SIMPLEX

#For hands
detector = htm.handDetector(maxHands=4)

# iniciate id counter
id = 0
# iniciate timer
timeFlip = 0
# iniciate string
stringText = ""
# names related to ids: example ==> Marcelo: id=1,  etc
names = ['None', 'Sama', 'Codex', 'Davinci', 'Debora', 'Ian', '...']

# Initialize and start realtime video capture
cam = cv2.VideoCapture(0)
cam.set(3, 640)  # set video widht
cam.set(4, 480)  # set video height

# Define min window size to be recognized as a face
minW = 0.1 * cam.get(3)
minH = 0.1 * cam.get(4)
# this function is used to convert the given string to speech


def text_to_speech(text):
    engine = pyttsx3.init()
    engine.setProperty('rate', 220)
    engine.say(text)
    engine.runAndWait()
    del engine


def typing(text):
    for char in text:
        sleep(0.04)
        sys.stdout.write(char)
        sys.stdout.flush()


def parallel(text):
    with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
        future_tasks = {executor.submit(text_to_speech, text), executor.submit(typing, text)}
        for future in concurrent.futures.as_completed(future_tasks):
            try:
                data = future.result()
            except Exception as e:
                print(e)


def multiThread_with_TTS(text):
    threading.Thread(
        target=parallel, args=(text,), daemon=True
    ).start()


def multiThread_with_HANDS(img):
    threading.Thread(
        target=detector.findHands, args=(img,), daemon=True
    ).start()
    # lmList, bbox = detector.findPosition(img)
    threading.Thread(
        target=detector.findPosition, args=(img,), daemon=True
    ).start()
    if detector.fingersUp() is not None:
        print(detector.fingersUp())


while True:
    ret, img = cam.read()
    imgH = detector.findHands(img)
    lmList, bbox = detector.findPosition(imgH)
    if detector.fingersUp() is not None:
        print(detector.fingersUp())
    #multiThread_with_HANDS(img)
    img = cv2.flip(img, 1)  # Flip vertically
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    faces = faceCascade.detectMultiScale(
        gray,
        scaleFactor=1.2,
        minNeighbors=5,
        minSize=(int(minW), int(minH)),
    )
    for (x, y, w, h) in faces:
        cv2.rectangle(img, (x, y), (x   w, y   h), (0, 255, 0), 2)
        id, confidence = recognizer.predict(gray[y:y   h, x:x   w])
        # Check if confidence is less them 100 ==> "0" is perfect match
        if confidence < 100:
            id = names[id]
            confidence = "  {0}%".format(round(100 - confidence))
            if timeFlip == 0:
                timeFlip = 1
                if stringText != id:
                    stringText = id
                    multiThread_with_TTS(" Hi "   id)
                    #parallel(" Hi "   id)
            else:
                timeFlip  = 1
                if timeFlip > 35:
                    timeFlip = 0
        else:
            id = "unknown"
            confidence = "  {0}%".format(round(100 - confidence))

        cv2.putText(img, str(id), (x   5, y - 5), font, 1, (255, 255, 255), 2)
        cv2.putText(img, str(confidence), (x   5, y   h - 5), font, 1, (255, 255, 0), 1)

    cv2.imshow('camera', img)

    k = cv2.waitKey(10) amp; 0xff  # Press 'ESC' for exiting video
    if k == 27:
        parallel(" Bye "   id   ", see you later!")
        break
    if k == 32 and stringText != id:
        multiThread_with_TTS(" You are "   id ", hi!")
        #parallel(" You are "   id ", hi!")
    if k == 32 and stringText == id:
        multiThread_with_TTS(" You are "   id   ", hi again!")
        #parallel(" You are "   id   ", hi again!")

# Do a bit of cleanup
print("n [INFO] Exiting Program and cleanup stuff")
cam.release()

cv2.destroyAllWindows() 

Проблема в том, что в последнем скрипте внутри появляется вызов потока, и при его выполнении появляется вызов следующего и перекрывает предыдущий, если я делаю это последовательно, частота кадров падает

 def multiThread_with_HANDS(img):
    threading.Thread(
        target=detector.findHands, args=(img,), daemon=True
    ).start()
    # lmList, bbox = detector.findPosition(img)
    threading.Thread(
        target=detector.findPosition, args=(img,), daemon=True
    ).start()
    if detector.fingersUp() is not None:
        print(detector.fingersUp())
 

Кто-нибудь думает о том, как решить проблему, не резко снижая частоту кадров?

Комментарии:

1. Вы сравниваете лицо в камере со списком лиц? У меня есть одно предложение: вместо того, чтобы проверять каждое лицо в базе данных по отдельности, проверяйте их параллельно. т. е. вместо: для лиц в лицах: если (лицо == изображение камеры): doThis() сделайте что-то вроде faces.ParallelMap(лямбда x: если (x == изображение камеры): doThis)