Spaces:

Anushree1
/

Facial_Handmark_recognition

Runtime error

App Files Files Community

Anushree1 commited on Nov 21, 2024

Commit

7728777

verified ·

1 Parent(s): 7fbcbe9

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -77

app.py CHANGED Viewed

@@ -1,83 +1,75 @@
 import cv2
 import mediapipe as mp
-import tensorflow as tf
 import numpy as np
 import gradio as gr
-# MediaPipe setup for hand and face landmark detection
-mp_face_mesh = mp.solutions.face_mesh
-mp_hands = mp.solutions.hands
-# Load the pre-trained sign language model
-model = tf.keras.models.load_model('sign_language_model.h5')
-# Initialize MediaPipe Hands and FaceMesh
-hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)
-face_mesh = mp_face_mesh.FaceMesh(min_detection_confidence=0.7, min_tracking_confidence=0.7)
-# Function for webcam input and processing landmarks
-def process_frame(frame):
-    # Convert the frame to RGB (as MediaPipe uses RGB images)
-    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-    results_hands = hands.process(image_rgb)
-    results_face = face_mesh.process(image_rgb)
-    # Draw landmarks for hands
-    if results_hands.multi_hand_landmarks:
-        for hand_landmarks in results_hands.multi_hand_landmarks:
-            mp.solutions.drawing_utils.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
-    # Draw landmarks for face
-    if results_face.multi_face_landmarks:
-        for face_landmarks in results_face.multi_face_landmarks:
-            mp.solutions.drawing_utils.draw_landmarks(frame, face_landmarks, mp_face_mesh.FACEMESH_CONTOURS)
-    # Convert back to BGR for OpenCV
-    return frame
-# Function for performing sign language gesture prediction
-def predict_gesture(frame):
-    # Preprocessing the frame for gesture recognition
-    image = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-    image = cv2.resize(image, (28, 28))
-    image = np.expand_dims(image, axis=-1)
-    image = image / 255.0
-    image = np.expand_dims(image, axis=0)
-    # Prediction using the loaded model
-    prediction = model.predict(image)
-    predicted_class = np.argmax(prediction, axis=1)
-    # Mapping the predicted class to corresponding ASL alphabet
-    asl_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    return asl_alphabet[predicted_class[0]]
-# Gradio Interface for live webcam input and prediction
-def webcam_input():
-    cap = cv2.VideoCapture(0)  # Open the webcam (try 1 or 2 if 0 doesn't work)
-    if not cap.isOpened():
-        return "Error: Cannot open camera"
-    while True:
-        ret, frame = cap.read()
-        if not ret:
-            break
-        # Process the frame to detect landmarks and make predictions
-        processed_frame = process_frame(frame)
-        prediction = predict_gesture(processed_frame)
-        # Display predicted gesture on the frame
-        cv2.putText(processed_frame, f"Prediction: {prediction}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
-        # Show the frame with predictions
-        cv2.imshow("Hand and Facial Landmark Recognition", processed_frame)
-        if cv2.waitKey(1) & 0xFF == ord('q'):
-            break
-    cap.release()
-    cv2.destroyAllWindows()
-# Start the Gradio interface
-gr.Interface(fn=webcam_input, live=True, capture_video=True).launch()

 import cv2
 import mediapipe as mp
 import numpy as np
+import tensorflow as tf
 import gradio as gr
+# Load pre-trained model (replace 'model.h5' with the actual model path)
+model = tf.keras.models.load_model("sign_language_model.h5")
+# MediaPipe Hands setup
+mp_hands = mp.solutions.hands
+mp_drawing = mp.solutions.drawing_utils
+# Define labels (replace with your model's classes)
+labels = ["A", "B", "C", "D", "E"]  # Example labels
+# Process webcam frame and predict sign language gesture
+def recognize_sign(image):
+    # Convert the input image to RGB
+    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    # MediaPipe Hand Detection
+    with mp_hands.Hands(static_image_mode=False,
+                        max_num_hands=1,
+                        min_detection_confidence=0.7,
+                        min_tracking_confidence=0.7) as hands:
+        result = hands.process(rgb_image)
+        if result.multi_hand_landmarks:
+            for hand_landmarks in result.multi_hand_landmarks:
+                # Draw landmarks on the image
+                mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
+                # Extract hand landmark coordinates as features
+                landmarks = []
+                for lm in hand_landmarks.landmark:
+                    landmarks.extend([lm.x, lm.y, lm.z])
+                # Reshape data for prediction
+                features = np.array(landmarks).reshape(1, -1)
+                # Predict gesture
+                prediction = model.predict(features)
+                gesture = labels[np.argmax(prediction)]
+                # Display the predicted gesture on the image
+                h, w, _ = image.shape
+                cv2.putText(image, gesture, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
+                return gesture, image
+        return "No hand detected", image
+# Gradio interface wrapper
+def gradio_wrapper(image):
+    # Convert Gradio input to OpenCV format
+    image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+    gesture, annotated_image = recognize_sign(image)
+    # Convert the annotated image back to RGB for display
+    annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
+    return gesture, annotated_image
+# Create Gradio Interface
+interface = gr.Interface(
+    fn=gradio_wrapper,
+    inputs=gr.inputs.Image(source="webcam", tool=None),
+    outputs=[gr.outputs.Textbox(label="Predicted Gesture"),
+             gr.outputs.Image(label="Annotated Image")],
+    live=True,
+    title="Sign Language Recognition",
+    description="Predicts sign language gestures using TensorFlow and MediaPipe."
+)
+# Launch the Gradio app
+if __name__ == "__main__":
+    interface.launch()