Spaces:
Sleeping
Sleeping
import gradio as gr | |
import torch | |
import cv2 | |
import numpy as np | |
from PIL import Image, ImageDraw, ImageFont | |
from transformers import pipeline | |
import logging | |
import time | |
from typing import Tuple, List, Dict, Optional | |
# Configure logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
# Model configuration | |
MODEL_NAME = "abhilash88/face-emotion-detection" | |
# Emotion labels mapping | |
EMOTION_LABELS = { | |
'LABEL_0': 'angry', | |
'LABEL_1': 'disgust', | |
'LABEL_2': 'fear', | |
'LABEL_3': 'happy', | |
'LABEL_4': 'sad', | |
'LABEL_5': 'surprise', | |
'LABEL_6': 'neutral' | |
} | |
# Emotion colors for visualization | |
EMOTION_COLORS = { | |
'angry': '#FF4444', | |
'disgust': '#AA4444', | |
'fear': '#4444FF', | |
'happy': '#44FF44', | |
'sad': '#4444AA', | |
'surprise': '#FFAA44', | |
'neutral': '#AAAAAA' | |
} | |
# Global variables for model | |
emotion_classifier = None | |
face_cascade = None | |
def load_models(): | |
"""Load the emotion detection model and face cascade""" | |
global emotion_classifier, face_cascade | |
try: | |
logger.info(f"Loading emotion detection model: {MODEL_NAME}") | |
# Try loading with different configurations | |
try: | |
emotion_classifier = pipeline( | |
"image-classification", | |
model=MODEL_NAME, | |
top_k=None | |
) | |
except Exception as e1: | |
logger.warning(f"Failed with top_k=None, trying without: {e1}") | |
try: | |
emotion_classifier = pipeline( | |
"image-classification", | |
model=MODEL_NAME | |
) | |
except Exception as e2: | |
logger.warning(f"Failed with default config, trying basic setup: {e2}") | |
# Fallback to manual model loading | |
from transformers import AutoImageProcessor, AutoModelForImageClassification | |
processor = AutoImageProcessor.from_pretrained(MODEL_NAME) | |
model = AutoModelForImageClassification.from_pretrained(MODEL_NAME) | |
emotion_classifier = pipeline( | |
"image-classification", | |
model=model, | |
image_processor=processor | |
) | |
logger.info("Emotion detection model loaded successfully") | |
# Load OpenCV face cascade | |
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml') | |
if face_cascade.empty(): | |
logger.error("Failed to load face cascade classifier") | |
return False | |
logger.info("Face detection cascade loaded successfully") | |
return True | |
except Exception as e: | |
logger.error(f"Error loading models: {e}") | |
return False | |
def detect_faces_improved(image: np.ndarray, min_face_size: int = 80) -> List[Tuple[int, int, int, int]]: | |
""" | |
Improved face detection with better parameters to reduce false positives | |
and merge overlapping detections | |
""" | |
try: | |
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) | |
# Use more strict parameters to reduce false positives | |
faces = face_cascade.detectMultiScale( | |
gray, | |
scaleFactor=1.05, # Smaller scale factor for more careful detection | |
minNeighbors=8, # Higher min neighbors to be more strict | |
minSize=(min_face_size, min_face_size), # Larger minimum size | |
maxSize=(int(min(image.shape[:2]) * 0.8), int(min(image.shape[:2]) * 0.8)), # Maximum size | |
flags=cv2.CASCADE_SCALE_IMAGE | cv2.CASCADE_DO_CANNY_PRUNING | |
) | |
if len(faces) == 0: | |
return [] | |
# Convert to list and merge overlapping detections | |
faces_list = faces.tolist() | |
merged_faces = merge_overlapping_faces(faces_list) | |
# Filter faces that are too small relative to image size | |
image_area = image.shape[0] * image.shape[1] | |
filtered_faces = [] | |
for (x, y, w, h) in merged_faces: | |
face_area = w * h | |
# Face should be at least 0.5% of image area but not more than 80% | |
if 0.005 < (face_area / image_area) < 0.8: | |
# Additional validation: check aspect ratio (faces are roughly square) | |
aspect_ratio = w / h | |
if 0.7 <= aspect_ratio <= 1.4: # Allow some variance but not extreme rectangles | |
filtered_faces.append((x, y, w, h)) | |
return filtered_faces | |
except Exception as e: | |
logger.error(f"Error detecting faces: {e}") | |
return [] | |
def merge_overlapping_faces(faces: List[Tuple[int, int, int, int]], overlap_threshold: float = 0.3) -> List[Tuple[int, int, int, int]]: | |
""" | |
Merge overlapping face detections to avoid duplicates | |
""" | |
if len(faces) <= 1: | |
return faces | |
# Calculate IoU (Intersection over Union) for all pairs | |
merged = [] | |
used = [False] * len(faces) | |
for i in range(len(faces)): | |
if used[i]: | |
continue | |
current_face = faces[i] | |
merged_face = list(current_face) | |
count = 1 | |
used[i] = True | |
for j in range(i + 1, len(faces)): | |
if used[j]: | |
continue | |
if calculate_iou(current_face, faces[j]) > overlap_threshold: | |
# Merge by averaging coordinates | |
merged_face[0] = (merged_face[0] * count + faces[j][0]) // (count + 1) | |
merged_face[1] = (merged_face[1] * count + faces[j][1]) // (count + 1) | |
merged_face[2] = (merged_face[2] * count + faces[j][2]) // (count + 1) | |
merged_face[3] = (merged_face[3] * count + faces[j][3]) // (count + 1) | |
count += 1 | |
used[j] = True | |
merged.append(tuple(merged_face)) | |
return merged | |
def calculate_iou(box1: Tuple[int, int, int, int], box2: Tuple[int, int, int, int]) -> float: | |
"""Calculate Intersection over Union of two bounding boxes""" | |
x1, y1, w1, h1 = box1 | |
x2, y2, w2, h2 = box2 | |
# Calculate intersection | |
x_left = max(x1, x2) | |
y_top = max(y1, y2) | |
x_right = min(x1 + w1, x2 + w2) | |
y_bottom = min(y1 + h1, y2 + h2) | |
if x_right < x_left or y_bottom < y_top: | |
return 0.0 | |
intersection = (x_right - x_left) * (y_bottom - y_top) | |
# Calculate union | |
area1 = w1 * h1 | |
area2 = w2 * h2 | |
union = area1 + area2 - intersection | |
return intersection / union if union > 0 else 0.0 | |
def predict_emotion(face_image: Image.Image) -> List[Dict]: | |
"""Predict emotion for a single face""" | |
try: | |
if emotion_classifier is None: | |
logger.warning("Emotion classifier not loaded, returning neutral") | |
return [{"label": "neutral", "score": 1.0}] | |
# Resize image for better performance and consistency | |
face_image = face_image.resize((224, 224)) | |
# The pipeline returns results in different formats depending on configuration | |
results = emotion_classifier(face_image) | |
# Handle different return formats and map labels to emotion names | |
processed_results = [] | |
if isinstance(results, list): | |
for result in results: | |
if isinstance(result, dict) and 'label' in result and 'score' in result: | |
# Map LABEL_X to actual emotion name | |
emotion_name = EMOTION_LABELS.get(result['label'], result['label']) | |
processed_results.append({ | |
'label': emotion_name, | |
'score': result['score'] | |
}) | |
elif isinstance(results, dict): | |
# Single prediction | |
emotion_name = EMOTION_LABELS.get(results['label'], results['label']) | |
processed_results = [{ | |
'label': emotion_name, | |
'score': results['score'] | |
}] | |
if not processed_results: | |
logger.warning("No valid results, returning neutral") | |
return [{"label": "neutral", "score": 1.0}] | |
return processed_results | |
except Exception as e: | |
logger.error(f"Error predicting emotion: {e}") | |
return [{"label": "neutral", "score": 1.0}] | |
def draw_emotion_results(image: Image.Image, faces: List, emotions: List, confidence_threshold: float = 0.5) -> Image.Image: | |
"""Draw bounding boxes and emotion labels on the image""" | |
try: | |
draw = ImageDraw.Draw(image) | |
# Try to load a font, fallback to default if not available | |
try: | |
font = ImageFont.truetype("arial.ttf", 20) | |
except: | |
try: | |
font = ImageFont.truetype("DejaVuSans.ttf", 20) | |
except: | |
font = ImageFont.load_default() | |
for i, (x, y, w, h) in enumerate(faces): | |
if i < len(emotions): | |
# Get top emotion above threshold | |
valid_emotions = [e for e in emotions[i] if e['score'] >= confidence_threshold] | |
if not valid_emotions: | |
continue | |
top_emotion = max(valid_emotions, key=lambda x: x['score']) | |
emotion_label = top_emotion['label'] | |
confidence = top_emotion['score'] | |
# Get color for this emotion | |
color = EMOTION_COLORS.get(emotion_label, '#FFFFFF') | |
# Draw bounding box with thicker line | |
draw.rectangle([(x, y), (x + w, y + h)], outline=color, width=4) | |
# Draw emotion label with better formatting | |
label_text = f"{emotion_label.upper()}" | |
confidence_text = f"{confidence:.1%}" | |
# Calculate text size for background | |
bbox1 = draw.textbbox((0, 0), label_text, font=font) | |
bbox2 = draw.textbbox((0, 0), confidence_text, font=font) | |
text_width = max(bbox1[2] - bbox1[0], bbox2[2] - bbox2[0]) + 20 | |
text_height = (bbox1[3] - bbox1[1]) + (bbox2[3] - bbox2[1]) + 15 | |
# Draw background for text | |
draw.rectangle( | |
[(x, y - text_height - 10), (x + text_width, y)], | |
fill=color | |
) | |
# Draw emotion label | |
draw.text((x + 10, y - text_height - 5), label_text, fill='white', font=font) | |
# Draw confidence | |
draw.text((x + 10, y - text_height + 20), confidence_text, fill='white', font=font) | |
return image | |
except Exception as e: | |
logger.error(f"Error drawing results: {e}") | |
return image | |
def process_image(image: Image.Image, confidence_threshold: float = 0.5, min_face_size: int = 80) -> Tuple[Image.Image, str]: | |
"""Process an image for emotion detection with improved face detection""" | |
try: | |
if image is None: | |
return None, "No image provided" | |
# Convert PIL to numpy array | |
image_np = np.array(image) | |
# Detect faces with improved method | |
faces = detect_faces_improved(image_np, min_face_size) | |
if not faces: | |
return image, "β No faces detected in the image. Try adjusting the minimum face size or use an image with clearer faces." | |
# Process each face | |
emotions_list = [] | |
valid_faces = [] | |
for (x, y, w, h) in faces: | |
# Extract face region with some padding | |
padding = max(10, min(w, h) // 10) | |
x_pad = max(0, x - padding) | |
y_pad = max(0, y - padding) | |
w_pad = min(image.width - x_pad, w + 2 * padding) | |
h_pad = min(image.height - y_pad, h + 2 * padding) | |
face_region = image.crop((x_pad, y_pad, x_pad + w_pad, y_pad + h_pad)) | |
# Predict emotion | |
emotions = predict_emotion(face_region) | |
# Check if any emotion meets the confidence threshold | |
valid_emotions = [e for e in emotions if e['score'] >= confidence_threshold] | |
if valid_emotions: | |
emotions_list.append(emotions) | |
valid_faces.append((x, y, w, h)) | |
if not valid_faces: | |
return image, f"β οΈ {len(faces)} face(s) detected but no emotions above {confidence_threshold:.1f} confidence threshold. Try lowering the threshold." | |
# Draw results | |
result_image = draw_emotion_results(image.copy(), valid_faces, emotions_list, confidence_threshold) | |
# Create summary text | |
summary_lines = [f"β **Successfully detected {len(valid_faces)} face(s) with confident emotion predictions:**\n"] | |
for i, emotions in enumerate(emotions_list): | |
# Sort emotions by confidence | |
sorted_emotions = sorted(emotions, key=lambda x: x['score'], reverse=True) | |
top_emotion = sorted_emotions[0] | |
# Add emoji for emotion | |
emotion_emoji = { | |
'angry': 'π ', 'disgust': 'π€’', 'fear': 'π¨', | |
'happy': 'π', 'sad': 'π’', 'surprise': 'π²', 'neutral': 'π' | |
}.get(top_emotion['label'], 'π') | |
summary_lines.append(f"**Face {i+1}:** {emotion_emoji} **{top_emotion['label'].title()}** ({top_emotion['score']:.1%} confidence)") | |
# Add top 3 emotions for detailed analysis | |
if len(sorted_emotions) > 1: | |
summary_lines.append(" π Other detected emotions:") | |
for emotion in sorted_emotions[1:4]: # Top 3 others | |
if emotion['score'] >= confidence_threshold: | |
emoji = { | |
'angry': 'π ', 'disgust': 'π€’', 'fear': 'π¨', | |
'happy': 'π', 'sad': 'π’', 'surprise': 'π²', 'neutral': 'π' | |
}.get(emotion['label'], 'π') | |
summary_lines.append(f" β’ {emoji} {emotion['label'].title()}: {emotion['score']:.1%}") | |
summary_lines.append("") | |
summary = "\n".join(summary_lines) | |
return result_image, summary | |
except Exception as e: | |
logger.error(f"Error processing image: {e}") | |
return image, f"β Error processing image: {str(e)}" | |
def analyze_emotions_batch(files) -> str: | |
"""Analyze emotions in multiple uploaded files""" | |
try: | |
if not files: | |
return "No files provided" | |
all_results = [] | |
for idx, file in enumerate(files): | |
try: | |
# Open the image file | |
image = Image.open(file.name) | |
# Convert PIL to numpy array | |
image_np = np.array(image) | |
# Detect faces with improved method | |
faces = detect_faces_improved(image_np) | |
if not faces: | |
all_results.append(f"π File {idx+1} ({file.name}): No faces detected") | |
continue | |
# Process each face | |
image_emotions = [] | |
for (x, y, w, h) in faces: | |
# Extract face region | |
face_region = image.crop((x, y, x + w, y + h)) | |
# Predict emotion | |
emotions = predict_emotion(face_region) | |
top_emotion = max(emotions, key=lambda x: x['score']) | |
image_emotions.append(f"{top_emotion['label']} ({top_emotion['score']:.1%})") | |
all_results.append(f"π File {idx+1} ({file.name}): {len(faces)} face(s) - {', '.join(image_emotions)}") | |
except Exception as e: | |
all_results.append(f"π File {idx+1}: Error processing - {str(e)}") | |
return "\n".join(all_results) | |
except Exception as e: | |
logger.error(f"Error in batch analysis: {e}") | |
return f"Error in batch analysis: {str(e)}" | |
def get_emotion_statistics(image: Image.Image) -> str: | |
"""Get detailed emotion statistics for an image""" | |
try: | |
if image is None: | |
return "No image provided" | |
# Convert PIL to numpy array | |
image_np = np.array(image) | |
# Detect faces with improved method | |
faces = detect_faces_improved(image_np) | |
if not faces: | |
return "β No faces detected in the image" | |
# Collect all emotions | |
all_emotions = {} | |
face_details = [] | |
for i, (x, y, w, h) in enumerate(faces): | |
# Extract face region | |
face_region = image.crop((x, y, x + w, y + h)) | |
# Predict emotion | |
emotions = predict_emotion(face_region) | |
# Store face details | |
sorted_emotions = sorted(emotions, key=lambda x: x['score'], reverse=True) | |
face_details.append({ | |
'face_num': i + 1, | |
'position': (x, y, w, h), | |
'emotions': sorted_emotions | |
}) | |
for emotion_data in emotions: | |
emotion = emotion_data['label'] | |
score = emotion_data['score'] | |
if emotion not in all_emotions: | |
all_emotions[emotion] = [] | |
all_emotions[emotion].append(score) | |
# Calculate statistics | |
stats_lines = [f"π **Detailed Emotion Analysis for {len(faces)} face(s):**\n"] | |
# Per-face breakdown | |
for face_detail in face_details: | |
stats_lines.append(f"### π€ Face {face_detail['face_num']}:") | |
top_emotion = face_detail['emotions'][0] | |
stats_lines.append(f"**Primary emotion:** {top_emotion['label'].title()} ({top_emotion['score']:.1%})") | |
stats_lines.append("**All emotions detected:**") | |
for emotion in face_detail['emotions']: | |
bar_length = int(emotion['score'] * 20) # Scale to 20 chars | |
bar = "β" * bar_length + "β" * (20 - bar_length) | |
stats_lines.append(f" {emotion['label'].title()}: {bar} {emotion['score']:.1%}") | |
stats_lines.append("") | |
# Overall statistics | |
if len(faces) > 1: | |
stats_lines.append("### π Overall Statistics:") | |
for emotion, scores in all_emotions.items(): | |
avg_score = np.mean(scores) | |
max_score = np.max(scores) | |
count = len(scores) | |
stats_lines.append(f"**{emotion.title()}:**") | |
stats_lines.append(f" - Average confidence: {avg_score:.1%}") | |
stats_lines.append(f" - Maximum confidence: {max_score:.1%}") | |
stats_lines.append(f" - Faces showing this emotion: {count}/{len(faces)}") | |
stats_lines.append("") | |
return "\n".join(stats_lines) | |
except Exception as e: | |
logger.error(f"Error calculating statistics: {e}") | |
return f"β Error calculating statistics: {str(e)}" | |
# Create simplified Gradio interface | |
def create_interface(): | |
custom_css = """ | |
.main-header { | |
text-align: center; | |
color: #2563eb; | |
margin-bottom: 2rem; | |
} | |
.gradio-container { | |
max-width: 1200px; | |
margin: auto; | |
} | |
""" | |
with gr.Blocks( | |
title="Face Emotion Detection - Improved", | |
theme=gr.themes.Soft(), | |
css=custom_css | |
) as iface: | |
# Header | |
gr.Markdown( | |
""" | |
# π Face Emotion Detection (Improved) | |
### Accurate emotion recognition with enhanced face detection | |
This improved version includes better face detection algorithms to reduce false positives | |
and provides more accurate emotion classification for detected faces. | |
""", | |
elem_classes=["main-header"] | |
) | |
with gr.Tab("πΌοΈ Single Image Analysis"): | |
with gr.Row(): | |
with gr.Column(scale=1): | |
image_input = gr.Image( | |
label="Upload Image", | |
type="pil", | |
height=400 | |
) | |
with gr.Row(): | |
confidence_slider = gr.Slider( | |
minimum=0.1, | |
maximum=1.0, | |
value=0.5, | |
step=0.1, | |
label="π― Confidence Threshold", | |
info="Minimum confidence to display emotions" | |
) | |
face_size_slider = gr.Slider( | |
minimum=30, | |
maximum=200, | |
value=80, | |
step=10, | |
label="π€ Minimum Face Size", | |
info="Minimum face size (pixels) to detect" | |
) | |
analyze_btn = gr.Button("π Analyze Emotions", variant="primary", size="lg") | |
with gr.Column(scale=1): | |
output_image = gr.Image( | |
label="Emotion Detection Results", | |
height=400 | |
) | |
result_text = gr.Textbox( | |
label="Detection Results", | |
lines=8, | |
show_copy_button=True | |
) | |
with gr.Tab("π Detailed Statistics"): | |
with gr.Row(): | |
with gr.Column(scale=1): | |
stats_image_input = gr.Image( | |
label="Upload Image for Statistical Analysis", | |
type="pil", | |
height=400 | |
) | |
analyze_stats_btn = gr.Button("π Generate Detailed Statistics", variant="primary", size="lg") | |
with gr.Column(scale=1): | |
stats_output = gr.Markdown( | |
value="Upload an image and click 'Generate Detailed Statistics' to see comprehensive emotion analysis...", | |
label="Emotion Statistics" | |
) | |
with gr.Tab("π Batch Processing"): | |
with gr.Column(): | |
batch_images_input = gr.File( | |
label="Upload Multiple Images", | |
file_count="multiple", | |
file_types=["image"] | |
) | |
batch_process_btn = gr.Button("β‘ Process All Images", variant="primary", size="lg") | |
batch_results_output = gr.Textbox( | |
label="Batch Processing Results", | |
lines=15, | |
show_copy_button=True | |
) | |
with gr.Tab("βΉοΈ About & Tips"): | |
gr.Markdown( | |
""" | |
## π§ Improvements Made | |
### β Enhanced Face Detection | |
- **Stricter parameters** to reduce false positives | |
- **Overlap detection** to merge duplicate face detections | |
- **Size filtering** to ignore unrealistic face sizes | |
- **Aspect ratio validation** to filter non-face rectangles | |
### π― Better Accuracy | |
- **Confidence thresholds** to filter uncertain predictions | |
- **Improved preprocessing** for better emotion recognition | |
- **Face padding** for better context in emotion detection | |
### π Performance Optimizations | |
- **Removed problematic live camera** feature | |
- **Streamlined interface** for better user experience | |
- **Better error handling** and user feedback | |
## π Supported Emotions | |
- π **Angry** - Expressions of anger, frustration | |
- π€’ **Disgust** - Expressions of revulsion or distaste | |
- π¨ **Fear** - Expressions of fear, anxiety | |
- π **Happy** - Expressions of joy, contentment | |
- π’ **Sad** - Expressions of sadness, sorrow | |
- π² **Surprise** - Expressions of surprise, amazement | |
- π **Neutral** - Calm, neutral expressions | |
## π‘ Tips for Best Results | |
1. **Use clear, well-lit images** with visible faces | |
2. **Adjust confidence threshold** if you get too many/few results | |
3. **Modify minimum face size** based on your image resolution | |
4. **Frontal face views** work better than profile shots | |
5. **Avoid heavily shadowed or blurry faces** | |
## π§ Troubleshooting | |
- **No faces detected?** Try lowering the minimum face size | |
- **Too many false detections?** Increase the minimum face size or confidence threshold | |
- **Missing obvious faces?** Lower the confidence threshold | |
- **Multiple boxes on same face?** The system should automatically merge them now | |
--- | |
**Model:** [abhilash88/face-emotion-detection](https://huggingface.co/abhilash88/face-emotion-detection) | |
""" | |
) | |
# Event handlers | |
analyze_btn.click( | |
fn=process_image, | |
inputs=[image_input, confidence_slider, face_size_slider], | |
outputs=[output_image, result_text], | |
api_name="analyze_image" | |
) | |
analyze_stats_btn.click( | |
fn=get_emotion_statistics, | |
inputs=stats_image_input, | |
outputs=stats_output, | |
api_name="get_statistics" | |
) | |
batch_process_btn.click( | |
fn=analyze_emotions_batch, | |
inputs=batch_images_input, | |
outputs=batch_results_output, | |
api_name="batch_process" | |
) | |
# Example images | |
gr.Examples( | |
examples=[ | |
"https://images.unsplash.com/photo-1507003211169-0a1dd7228f2d?w=400&h=400&fit=crop&crop=face", | |
"https://images.unsplash.com/photo-1554151228-14d9def656e4?w=400&h=400&fit=crop&crop=face", | |
"https://images.unsplash.com/photo-1472099645785-5658abf4ff4e?w=400&h=400&fit=crop&crop=face", | |
], | |
inputs=image_input, | |
label="πΌοΈ Try these example images" | |
) | |
return iface | |
# Initialize and launch | |
if __name__ == "__main__": | |
logger.info("Initializing Improved Face Emotion Detection System...") | |
if load_models(): | |
logger.info("Models loaded successfully!") | |
iface = create_interface() | |
iface.launch( | |
share=False, | |
show_error=True, | |
server_name="0.0.0.0", | |
server_port=7860, | |
show_api=True | |
) | |
else: | |
logger.error("Failed to load models. Please check your model configuration.") | |
with gr.Blocks() as error_iface: | |
gr.Markdown( | |
""" | |
# β οΈ Model Loading Error | |
The emotion detection model failed to load. Please check: | |
1. Network connectivity | |
2. Model dependencies | |
3. System logs for details | |
""" | |
) | |
error_iface.launch( | |
share=False, | |
show_error=True, | |
server_name="0.0.0.0", | |
server_port=7860 | |
) |