[ Python ] 미디어파이프(Mediapipe)를 이용한 가상 마우스

Mediapipe Hands는 매우 높은 인식율과 성능을 보여주는 손 및 손가락 Tracking 솔루션입니다. 손의 모양과 움직임을 인식하여 손의 제스처 기반으로 다양한 기능을 제공 할 수 있습니다. 이미지에서 손 랜드마크 모델을 통해 21개의 관절 키포인트를 예측합니다.

Mediapipe와 PyAutoGUI를 이용하여 매우 간단하게 가상 마우스 만들 수 있습니다. 참고로 PyAutoGUI는 마우스와 키보드를 제어하는 Python 모듈입니다.

Install

 pip install mediapipe
pip install pyautogui

Class declaration

손의 제스처를 확인하기 위한 Class를 정의합니다. Mediapipe 라이브러리를 이용하며 손의 위치와 펼쳐진 손가락의 정보, 거리 측정 등을 수행합니다.

 import math
import cv2
import mediapipe as mp
import time
 
class handDetector():
    def __init__(self, mode=False, maxHands=2, modelComplexity=1, detectionCon=0.5, trackCon=0.5):
        self.lmList = []
        self.results = None
        self.mode = mode
        self.maxHands = maxHands
        self.modelComplex = modelComplexity
        self.detectionCon = detectionCon
        self.trackCon = trackCon
 
        self.mpHands = mp.solutions.hands
        self.hands = self.mpHands.Hands(self.mode, self.maxHands, self.modelComplex, self.detectionCon, self.trackCon)
        self.mpDraw = mp.solutions.drawing_utils
        self.tipIds = [4, 8, 12, 16, 20]
 
    def findHands(self, img, draw=True):
        rgb_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.results = self.hands.process(rgb_img)
        # print(results.multi_hand_landmarks)
 
        if self.results.multi_hand_landmarks:
            for handLms in self.results.multi_hand_landmarks:
                if draw:
                    self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS)
 
        return img
 
    def findPosition(self, img, handNo=0, draw=True):
        xList = []
        yList = []
        bbox = []
        self.lmList = []
 
        if self.results.multi_hand_landmarks:
            myHand = self.results.multi_hand_landmarks[handNo]
 
            for id, lm in enumerate(myHand.landmark):
                # print(id, lm)
                h, w, c = img.shape
                cx, cy = int(lm.x * w), int(lm.y * h)
                xList.append(cx)
                yList.append(cy)
                # print(id, cx, cy)
                self.lmList.append([id, cx, cy])
 
                if draw:
                    cv2.circle(img, (cx, cy), 6, (0, 0, 255), cv2.FILLED)
 
            xmin, xmax = min(xList), max(xList)
            ymin, ymax = min(yList), max(yList)
            bbox = xmin, ymin, xmax, ymax
 
            if draw:
                cv2.rectangle(img, (bbox[0]-20, bbox[1]-20), (bbox[2]+20, bbox[3]+20), (0, 255, 0), 2)
 
        return self.lmList, bbox
 
    def fingersUp(self):
        fingers = []
        # Thumb
        if self.lmList[self.tipIds[0]][1] < self.lmList[self.tipIds[0] - 1][1]:
            fingers.append(1)
        else:
            fingers.append(0)
        # 4 Fingers
        for id in range(1, 5):
            if self.lmList[self.tipIds[id]][2] < self.lmList[self.tipIds[id] - 2][2]:
                fingers.append(1)
            else:
                fingers.append(0)
 
        return fingers
 
    def findDistance(self, p1, p2, img, draw=True):
        x1, y1 = self.lmList[p1][1:]
        x2, y2 = self.lmList[p2][1:]
        cx, cy = (x1 + x2) // 2, (y1 + y2) // 2
 
        if draw:
            cv2.line(img, (x1, y1), (x2, y2), (0, 255, 255), 3)
            cv2.circle(img, (x1, y1), 6, (0, 255, 255), cv2.FILLED)
            cv2.circle(img, (x2, y2), 6, (0, 255, 255), cv2.FILLED)
            cv2.circle(img, (cx, cy), 6, (0, 255, 255), cv2.FILLED)
 
        length = math.hypot(x2 - x1, y2 - y1)
 
        return length, img, [x1, y1, x2, y2, cx, cy]
 
    def findAngle(self, p1, p2, p3, img, draw=True):
        # Get the landmarks
        x1, y1 = self.lmList[p1][1:]
        x2, y2 = self.lmList[p2][1:]
        x3, y3 = self.lmList[p3][1:]
        # Calculate the Angle
        angle = math.degrees(math.atan2(y3 - y2, x3 - x2) - math.atan2(y1 - y2, x1 - x2))
        if angle < 0:
            angle += 360
 
        # print(angle)
 
        # Draw
        if draw:
            cv2.line(img, (x1, y1), (x2, y2), (255, 255, 255), 3)
            cv2.line(img, (x3, y3), (x2, y2), (255, 255, 255), 3)
            cv2.circle(img, (x1, y1), 15, (0, 0, 255), cv2.FILLED)
            cv2.circle(img, (x1, y1), 15, (0, 0, 255), cv2.FILLED)
            cv2.circle(img, (x2, y2), 15, (0, 0, 255), cv2.FILLED)
            # cv2.putText(img, str(int(angle)), (x2 - 50, y2 + 50), cv2.FONT_HERSHEY_PLAIN, 2, (0, 0, 255), 2)
 
        return angle

Run

손가락 제스처를 이용하여 Mouse를 동작합니다.

 import cv2
import time
import numpy as np
import hand_detector as hd
import pyautogui
 
 
wCam, hCam = 640, 480
frameR = 100
smoothening = 7
 
pTime = 0
plocX, plocY = 0, 0
clocX, clocY = 0, 0
 
cap = cv2.VideoCapture(0)
cap.set(3, wCam)
cap.set(4, hCam)
detector = hd.handDetector(detectionCon=0.7)
wScr, hScr = pyautogui.size()
print(wScr, hScr)
 
while True:
    success, img = cap.read()
    img = detector.findHands(img)
    lmList, bbox = detector.findPosition(img)
    output = img.copy()
 
    if len(lmList) != 0:
        # print(lmList[4], lmList[8])
        x1, y1 = lmList[8][1:]
        x2, y2 = lmList[12][1:]
 
        fingers = detector.fingersUp()
        # print(fingers)
        cv2.rectangle(img, (frameR, frameR), (wCam - frameR, hCam - frameR), (205, 250, 255), -1)
        img = cv2.addWeighted(img, 0.5, output, 1 - .5, 0, output)
 
        # Only Index Finger : Moving Mode
        if fingers[1] == 1 and fingers[2] == 0:
            # Convert Coordinates
            x3 = np.interp(x1, (frameR, wCam - frameR), (0, wScr))
            y3 = np.interp(y1, (frameR, hCam - frameR), (0, hScr))
 
            # Smoothen Values
            clocX = plocX + (x3 - plocX) / smoothening
            clocY = plocY + (y3 - plocY) / smoothening
 
            # Move Mouse
            pyautogui.moveTo(wScr - clocX, clocY)
            cv2.circle(img, (x1, y1), 6, (255, 28, 0), cv2.FILLED)
            plocX, plocY = clocX, clocY
            # cv2.putText(img, 'Moving Mode', (20, 50), cv2.FONT_HERSHEY_PLAIN, 2, (255, 0, 0), 2)
 
        # Both Index and middle fingers are up : Clicking Mode
        if fingers[1] == 1 and fingers[2] == 1:
            # Find distance between fingers
            length, img, lineInfo = detector.findDistance(8, 12, img)
 
            # Click mouse if distance short
            if length < 40:
                cv2.circle(img, (lineInfo[4], lineInfo[5]), 6, (0, 255, 0), cv2.FILLED)
                # cv2.putText(img, 'Click!!', (20, 50), cv2.FONT_HERSHEY_PLAIN, 2, (0, 255, 0), 2)
                pyautogui.click()
 
    cTime = time.time()
    fps = 1 / (cTime - pTime)
    pTime = cTime
 
    cv2.imshow("Vitual mouse monitor", cv2.flip(img, 1))
    cv2.setWindowProperty("Vitual mouse monitor", cv2.WND_PROP_TOPMOST, 1)
    cv2.waitKey(1)

수행결과

동작은 잘됩니다. 하지만 불편합니다. 그래도 놀라운 것은 CPU환경에서도 매우 빠르며 인식이 잘된다는 것입니다.

Source

[ Github ] virtual_mouse

이 글은 (새창열림) 본 저작자 표시 규칙 하에 배포할 수 있습니다. 자세한 내용은 Creative Commons 라이선스를 확인하세요.

'Tech & Development > AI' 카테고리의 다른 글

[ Pynecone ] ChatGPT App 만들기 (Python) (0)	2023.03.28
[ Pynecone ] DALL·E 모델로 이미지를 생성 App 만들기 (Python) (0)	2023.03.28
Fine tuning GPT3 Model (0)	2023.01.04
Object Detection 정리 (History) (0)	2022.12.14
[ Python ] Object Detection using MobileNet SSD (D/L) (0)	2022.11.29

다른 글

다른 글 더 둘러보기

내 블로그 - 관리자 홈 전환	`Q` `Q`
새 글 쓰기	`W` `W`

글 수정 (권한 있는 경우)	`E` `E`
댓글 영역으로 이동	`C` `C`

이 페이지의 URL 복사	`S` `S`
맨 위로 이동	`T` `T`
티스토리 홈 이동	`H` `H`
단축키 안내	`Shift` + `/` `⇧` + `/`

[ Python ] 미디어파이프(Mediapipe)를 이용한 가상 마우스

Install

Class declaration

Run

수행결과

Source

'Tech & Development > AI' 카테고리의 다른 글

댓글

이 글 공유하기

티스토리툴바

단축키

내 블로그

블로그 게시글

모든 영역

Install

Class declaration

Run

수행결과

Source

'Tech & Development > AI' 카테고리의 다른 글

댓글

이 글 공유하기

다른 글

[ Pynecone ] ChatGPT App 만들기 (Python)

[ Pynecone ] DALL·E 모델로 이미지를 생성 App 만들기 (Python)

Fine tuning GPT3 Model

Object Detection 정리 (History)

티스토리툴바

단축키

내 블로그

블로그 게시글

모든 영역