Spaces:
Sleeping
Sleeping
import gradio as gr | |
import boto3 | |
from botocore.exceptions import BotoCoreError, ClientError | |
from PIL import Image | |
import numpy as np | |
import io | |
import pyttsx3 | |
import time | |
import os | |
import subprocess | |
aws_key_id = os.environ['aws_access_key_id'] | |
aws_secret = os.environ['aws_secret_access_key'] | |
# Initialize AWS Rekognition client | |
try: | |
client = boto3.client('rekognition', | |
region_name='us-east-1', | |
aws_access_key_id= aws_key_id, | |
aws_secret_access_key = aws_secret) | |
except (BotoCoreError, ClientError) as error: | |
print('Error: ', error) | |
def speak_text(text): | |
# Specify the text to be converted to speech | |
text = "Hello, this is an example of using the built-in `espeak-ng` command for text-to-speech conversion on Linux." | |
# Use the `espeak-ng` command to convert the text to speech and play it | |
os.system(f"espeak-ng '{text}'") | |
def recognize_emotions(image): | |
""" | |
This function takes an image as input, and returns the emotion with the highest confidence level in the face using AWS Rekognition | |
""" | |
# Convert the NumPy array to PIL image | |
pil_image = Image.fromarray(np.uint8(image)) | |
# Convert the PIL image to bytes | |
with io.BytesIO() as output: | |
pil_image.save(output, format="JPEG") | |
contents = output.getvalue() | |
# Perform detection on the image using AWS Rekognition | |
response = client.detect_faces( | |
Image={ | |
'Bytes': contents | |
}, | |
Attributes=['ALL'] | |
) | |
# If no faces are detected, return None | |
if not response['FaceDetails']: | |
return None | |
# Extract the emotions detected in the face | |
emotions = response['FaceDetails'][0]['Emotions'] | |
# Find the emotion with the highest confidence level | |
max_confidence = 0 | |
max_emotion = '' | |
for emotion in emotions: | |
if emotion['Confidence'] > max_confidence: | |
max_confidence = emotion['Confidence'] | |
max_emotion = emotion['Type'] | |
speak_text(f'This person is {max_emotion}') | |
#espeak(f'This person is {max_emotion}') | |
# Return the emotion with the highest confidence level as a string | |
return str(max_emotion) | |
# Create Gradio interface | |
iface = gr.Interface(recognize_emotions, | |
inputs=gr.Image(source="webcam", streaming=True), | |
outputs="text", | |
title="How does this person feel?", | |
description="Helping you understand what others think") | |
# Launch the interface | |
iface.launch() |