import gradio as gr import torch import cv2 import numpy as np from PIL import Image, ImageDraw, ImageFont from transformers import pipeline import logging import time from typing import Tuple, List, Dict, Optional # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Model configuration MODEL_NAME = "abhilash88/face-emotion-detection" # Emotion labels mapping EMOTION_LABELS = { 'LABEL_0': 'angry', 'LABEL_1': 'disgust', 'LABEL_2': 'fear', 'LABEL_3': 'happy', 'LABEL_4': 'sad', 'LABEL_5': 'surprise', 'LABEL_6': 'neutral' } # Emotion colors for visualization EMOTION_COLORS = { 'angry': '#FF4444', 'disgust': '#AA4444', 'fear': '#4444FF', 'happy': '#44FF44', 'sad': '#4444AA', 'surprise': '#FFAA44', 'neutral': '#AAAAAA' } # Global variables for model emotion_classifier = None face_cascade = None def load_models(): """Load the emotion detection model and face cascade""" global emotion_classifier, face_cascade try: logger.info(f"Loading emotion detection model: {MODEL_NAME}") # Try loading with different configurations try: emotion_classifier = pipeline( "image-classification", model=MODEL_NAME, top_k=None ) except Exception as e1: logger.warning(f"Failed with top_k=None, trying without: {e1}") try: emotion_classifier = pipeline( "image-classification", model=MODEL_NAME ) except Exception as e2: logger.warning(f"Failed with default config, trying basic setup: {e2}") # Fallback to manual model loading from transformers import AutoImageProcessor, AutoModelForImageClassification processor = AutoImageProcessor.from_pretrained(MODEL_NAME) model = AutoModelForImageClassification.from_pretrained(MODEL_NAME) emotion_classifier = pipeline( "image-classification", model=model, image_processor=processor ) logger.info("Emotion detection model loaded successfully") # Load OpenCV face cascade face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml') if face_cascade.empty(): logger.error("Failed to load face cascade classifier") return False logger.info("Face detection cascade loaded successfully") return True except Exception as e: logger.error(f"Error loading models: {e}") return False def detect_faces_improved(image: np.ndarray, min_face_size: int = 80) -> List[Tuple[int, int, int, int]]: """ Improved face detection with better parameters to reduce false positives and merge overlapping detections """ try: gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) # Use more strict parameters to reduce false positives faces = face_cascade.detectMultiScale( gray, scaleFactor=1.05, # Smaller scale factor for more careful detection minNeighbors=8, # Higher min neighbors to be more strict minSize=(min_face_size, min_face_size), # Larger minimum size maxSize=(int(min(image.shape[:2]) * 0.8), int(min(image.shape[:2]) * 0.8)), # Maximum size flags=cv2.CASCADE_SCALE_IMAGE | cv2.CASCADE_DO_CANNY_PRUNING ) if len(faces) == 0: return [] # Convert to list and merge overlapping detections faces_list = faces.tolist() merged_faces = merge_overlapping_faces(faces_list) # Filter faces that are too small relative to image size image_area = image.shape[0] * image.shape[1] filtered_faces = [] for (x, y, w, h) in merged_faces: face_area = w * h # Face should be at least 0.5% of image area but not more than 80% if 0.005 < (face_area / image_area) < 0.8: # Additional validation: check aspect ratio (faces are roughly square) aspect_ratio = w / h if 0.7 <= aspect_ratio <= 1.4: # Allow some variance but not extreme rectangles filtered_faces.append((x, y, w, h)) return filtered_faces except Exception as e: logger.error(f"Error detecting faces: {e}") return [] def merge_overlapping_faces(faces: List[Tuple[int, int, int, int]], overlap_threshold: float = 0.3) -> List[Tuple[int, int, int, int]]: """ Merge overlapping face detections to avoid duplicates """ if len(faces) <= 1: return faces # Calculate IoU (Intersection over Union) for all pairs merged = [] used = [False] * len(faces) for i in range(len(faces)): if used[i]: continue current_face = faces[i] merged_face = list(current_face) count = 1 used[i] = True for j in range(i + 1, len(faces)): if used[j]: continue if calculate_iou(current_face, faces[j]) > overlap_threshold: # Merge by averaging coordinates merged_face[0] = (merged_face[0] * count + faces[j][0]) // (count + 1) merged_face[1] = (merged_face[1] * count + faces[j][1]) // (count + 1) merged_face[2] = (merged_face[2] * count + faces[j][2]) // (count + 1) merged_face[3] = (merged_face[3] * count + faces[j][3]) // (count + 1) count += 1 used[j] = True merged.append(tuple(merged_face)) return merged def calculate_iou(box1: Tuple[int, int, int, int], box2: Tuple[int, int, int, int]) -> float: """Calculate Intersection over Union of two bounding boxes""" x1, y1, w1, h1 = box1 x2, y2, w2, h2 = box2 # Calculate intersection x_left = max(x1, x2) y_top = max(y1, y2) x_right = min(x1 + w1, x2 + w2) y_bottom = min(y1 + h1, y2 + h2) if x_right < x_left or y_bottom < y_top: return 0.0 intersection = (x_right - x_left) * (y_bottom - y_top) # Calculate union area1 = w1 * h1 area2 = w2 * h2 union = area1 + area2 - intersection return intersection / union if union > 0 else 0.0 def predict_emotion(face_image: Image.Image) -> List[Dict]: """Predict emotion for a single face""" try: if emotion_classifier is None: logger.warning("Emotion classifier not loaded, returning neutral") return [{"label": "neutral", "score": 1.0}] # Resize image for better performance and consistency face_image = face_image.resize((224, 224)) # The pipeline returns results in different formats depending on configuration results = emotion_classifier(face_image) # Handle different return formats and map labels to emotion names processed_results = [] if isinstance(results, list): for result in results: if isinstance(result, dict) and 'label' in result and 'score' in result: # Map LABEL_X to actual emotion name emotion_name = EMOTION_LABELS.get(result['label'], result['label']) processed_results.append({ 'label': emotion_name, 'score': result['score'] }) elif isinstance(results, dict): # Single prediction emotion_name = EMOTION_LABELS.get(results['label'], results['label']) processed_results = [{ 'label': emotion_name, 'score': results['score'] }] if not processed_results: logger.warning("No valid results, returning neutral") return [{"label": "neutral", "score": 1.0}] return processed_results except Exception as e: logger.error(f"Error predicting emotion: {e}") return [{"label": "neutral", "score": 1.0}] def draw_emotion_results(image: Image.Image, faces: List, emotions: List, confidence_threshold: float = 0.5) -> Image.Image: """Draw bounding boxes and emotion labels on the image""" try: draw = ImageDraw.Draw(image) # Try to load a font, fallback to default if not available try: font = ImageFont.truetype("arial.ttf", 20) except: try: font = ImageFont.truetype("DejaVuSans.ttf", 20) except: font = ImageFont.load_default() for i, (x, y, w, h) in enumerate(faces): if i < len(emotions): # Get top emotion above threshold valid_emotions = [e for e in emotions[i] if e['score'] >= confidence_threshold] if not valid_emotions: continue top_emotion = max(valid_emotions, key=lambda x: x['score']) emotion_label = top_emotion['label'] confidence = top_emotion['score'] # Get color for this emotion color = EMOTION_COLORS.get(emotion_label, '#FFFFFF') # Draw bounding box with thicker line draw.rectangle([(x, y), (x + w, y + h)], outline=color, width=4) # Draw emotion label with better formatting label_text = f"{emotion_label.upper()}" confidence_text = f"{confidence:.1%}" # Calculate text size for background bbox1 = draw.textbbox((0, 0), label_text, font=font) bbox2 = draw.textbbox((0, 0), confidence_text, font=font) text_width = max(bbox1[2] - bbox1[0], bbox2[2] - bbox2[0]) + 20 text_height = (bbox1[3] - bbox1[1]) + (bbox2[3] - bbox2[1]) + 15 # Draw background for text draw.rectangle( [(x, y - text_height - 10), (x + text_width, y)], fill=color ) # Draw emotion label draw.text((x + 10, y - text_height - 5), label_text, fill='white', font=font) # Draw confidence draw.text((x + 10, y - text_height + 20), confidence_text, fill='white', font=font) return image except Exception as e: logger.error(f"Error drawing results: {e}") return image def process_image(image: Image.Image, confidence_threshold: float = 0.5, min_face_size: int = 80) -> Tuple[Image.Image, str]: """Process an image for emotion detection with improved face detection""" try: if image is None: return None, "No image provided" # Convert PIL to numpy array image_np = np.array(image) # Detect faces with improved method faces = detect_faces_improved(image_np, min_face_size) if not faces: return image, "❌ No faces detected in the image. Try adjusting the minimum face size or use an image with clearer faces." # Process each face emotions_list = [] valid_faces = [] for (x, y, w, h) in faces: # Extract face region with some padding padding = max(10, min(w, h) // 10) x_pad = max(0, x - padding) y_pad = max(0, y - padding) w_pad = min(image.width - x_pad, w + 2 * padding) h_pad = min(image.height - y_pad, h + 2 * padding) face_region = image.crop((x_pad, y_pad, x_pad + w_pad, y_pad + h_pad)) # Predict emotion emotions = predict_emotion(face_region) # Check if any emotion meets the confidence threshold valid_emotions = [e for e in emotions if e['score'] >= confidence_threshold] if valid_emotions: emotions_list.append(emotions) valid_faces.append((x, y, w, h)) if not valid_faces: return image, f"âš ī¸ {len(faces)} face(s) detected but no emotions above {confidence_threshold:.1f} confidence threshold. Try lowering the threshold." # Draw results result_image = draw_emotion_results(image.copy(), valid_faces, emotions_list, confidence_threshold) # Create summary text summary_lines = [f"✅ **Successfully detected {len(valid_faces)} face(s) with confident emotion predictions:**\n"] for i, emotions in enumerate(emotions_list): # Sort emotions by confidence sorted_emotions = sorted(emotions, key=lambda x: x['score'], reverse=True) top_emotion = sorted_emotions[0] # Add emoji for emotion emotion_emoji = { 'angry': '😠', 'disgust': 'đŸ¤ĸ', 'fear': '😨', 'happy': '😊', 'sad': 'đŸ˜ĸ', 'surprise': '😲', 'neutral': '😐' }.get(top_emotion['label'], '😐') summary_lines.append(f"**Face {i+1}:** {emotion_emoji} **{top_emotion['label'].title()}** ({top_emotion['score']:.1%} confidence)") # Add top 3 emotions for detailed analysis if len(sorted_emotions) > 1: summary_lines.append(" 📊 Other detected emotions:") for emotion in sorted_emotions[1:4]: # Top 3 others if emotion['score'] >= confidence_threshold: emoji = { 'angry': '😠', 'disgust': 'đŸ¤ĸ', 'fear': '😨', 'happy': '😊', 'sad': 'đŸ˜ĸ', 'surprise': '😲', 'neutral': '😐' }.get(emotion['label'], '😐') summary_lines.append(f" â€ĸ {emoji} {emotion['label'].title()}: {emotion['score']:.1%}") summary_lines.append("") summary = "\n".join(summary_lines) return result_image, summary except Exception as e: logger.error(f"Error processing image: {e}") return image, f"❌ Error processing image: {str(e)}" def analyze_emotions_batch(files) -> str: """Analyze emotions in multiple uploaded files""" try: if not files: return "No files provided" all_results = [] for idx, file in enumerate(files): try: # Open the image file image = Image.open(file.name) # Convert PIL to numpy array image_np = np.array(image) # Detect faces with improved method faces = detect_faces_improved(image_np) if not faces: all_results.append(f"📁 File {idx+1} ({file.name}): No faces detected") continue # Process each face image_emotions = [] for (x, y, w, h) in faces: # Extract face region face_region = image.crop((x, y, x + w, y + h)) # Predict emotion emotions = predict_emotion(face_region) top_emotion = max(emotions, key=lambda x: x['score']) image_emotions.append(f"{top_emotion['label']} ({top_emotion['score']:.1%})") all_results.append(f"📁 File {idx+1} ({file.name}): {len(faces)} face(s) - {', '.join(image_emotions)}") except Exception as e: all_results.append(f"📁 File {idx+1}: Error processing - {str(e)}") return "\n".join(all_results) except Exception as e: logger.error(f"Error in batch analysis: {e}") return f"Error in batch analysis: {str(e)}" def get_emotion_statistics(image: Image.Image) -> str: """Get detailed emotion statistics for an image""" try: if image is None: return "No image provided" # Convert PIL to numpy array image_np = np.array(image) # Detect faces with improved method faces = detect_faces_improved(image_np) if not faces: return "❌ No faces detected in the image" # Collect all emotions all_emotions = {} face_details = [] for i, (x, y, w, h) in enumerate(faces): # Extract face region face_region = image.crop((x, y, x + w, y + h)) # Predict emotion emotions = predict_emotion(face_region) # Store face details sorted_emotions = sorted(emotions, key=lambda x: x['score'], reverse=True) face_details.append({ 'face_num': i + 1, 'position': (x, y, w, h), 'emotions': sorted_emotions }) for emotion_data in emotions: emotion = emotion_data['label'] score = emotion_data['score'] if emotion not in all_emotions: all_emotions[emotion] = [] all_emotions[emotion].append(score) # Calculate statistics stats_lines = [f"📊 **Detailed Emotion Analysis for {len(faces)} face(s):**\n"] # Per-face breakdown for face_detail in face_details: stats_lines.append(f"### 👤 Face {face_detail['face_num']}:") top_emotion = face_detail['emotions'][0] stats_lines.append(f"**Primary emotion:** {top_emotion['label'].title()} ({top_emotion['score']:.1%})") stats_lines.append("**All emotions detected:**") for emotion in face_detail['emotions']: bar_length = int(emotion['score'] * 20) # Scale to 20 chars bar = "█" * bar_length + "░" * (20 - bar_length) stats_lines.append(f" {emotion['label'].title()}: {bar} {emotion['score']:.1%}") stats_lines.append("") # Overall statistics if len(faces) > 1: stats_lines.append("### 📈 Overall Statistics:") for emotion, scores in all_emotions.items(): avg_score = np.mean(scores) max_score = np.max(scores) count = len(scores) stats_lines.append(f"**{emotion.title()}:**") stats_lines.append(f" - Average confidence: {avg_score:.1%}") stats_lines.append(f" - Maximum confidence: {max_score:.1%}") stats_lines.append(f" - Faces showing this emotion: {count}/{len(faces)}") stats_lines.append("") return "\n".join(stats_lines) except Exception as e: logger.error(f"Error calculating statistics: {e}") return f"❌ Error calculating statistics: {str(e)}" # Create simplified Gradio interface def create_interface(): custom_css = """ .main-header { text-align: center; color: #2563eb; margin-bottom: 2rem; } .gradio-container { max-width: 1200px; margin: auto; } """ with gr.Blocks( title="Face Emotion Detection - Improved", theme=gr.themes.Soft(), css=custom_css ) as iface: # Header gr.Markdown( """ # 😊 Face Emotion Detection (Improved) ### Accurate emotion recognition with enhanced face detection This improved version includes better face detection algorithms to reduce false positives and provides more accurate emotion classification for detected faces. """, elem_classes=["main-header"] ) with gr.Tab("đŸ–ŧī¸ Single Image Analysis"): with gr.Row(): with gr.Column(scale=1): image_input = gr.Image( label="Upload Image", type="pil", height=400 ) with gr.Row(): confidence_slider = gr.Slider( minimum=0.1, maximum=1.0, value=0.5, step=0.1, label="đŸŽ¯ Confidence Threshold", info="Minimum confidence to display emotions" ) face_size_slider = gr.Slider( minimum=30, maximum=200, value=80, step=10, label="👤 Minimum Face Size", info="Minimum face size (pixels) to detect" ) analyze_btn = gr.Button("🔍 Analyze Emotions", variant="primary", size="lg") with gr.Column(scale=1): output_image = gr.Image( label="Emotion Detection Results", height=400 ) result_text = gr.Textbox( label="Detection Results", lines=8, show_copy_button=True ) with gr.Tab("📊 Detailed Statistics"): with gr.Row(): with gr.Column(scale=1): stats_image_input = gr.Image( label="Upload Image for Statistical Analysis", type="pil", height=400 ) analyze_stats_btn = gr.Button("📈 Generate Detailed Statistics", variant="primary", size="lg") with gr.Column(scale=1): stats_output = gr.Markdown( value="Upload an image and click 'Generate Detailed Statistics' to see comprehensive emotion analysis...", label="Emotion Statistics" ) with gr.Tab("🔄 Batch Processing"): with gr.Column(): batch_images_input = gr.File( label="Upload Multiple Images", file_count="multiple", file_types=["image"] ) batch_process_btn = gr.Button("⚡ Process All Images", variant="primary", size="lg") batch_results_output = gr.Textbox( label="Batch Processing Results", lines=15, show_copy_button=True ) with gr.Tab("â„šī¸ About & Tips"): gr.Markdown( """ ## 🔧 Improvements Made ### ✅ Enhanced Face Detection - **Stricter parameters** to reduce false positives - **Overlap detection** to merge duplicate face detections - **Size filtering** to ignore unrealistic face sizes - **Aspect ratio validation** to filter non-face rectangles ### đŸŽ¯ Better Accuracy - **Confidence thresholds** to filter uncertain predictions - **Improved preprocessing** for better emotion recognition - **Face padding** for better context in emotion detection ### 🚀 Performance Optimizations - **Removed problematic live camera** feature - **Streamlined interface** for better user experience - **Better error handling** and user feedback ## 📚 Supported Emotions - 😠 **Angry** - Expressions of anger, frustration - đŸ¤ĸ **Disgust** - Expressions of revulsion or distaste - 😨 **Fear** - Expressions of fear, anxiety - 😊 **Happy** - Expressions of joy, contentment - đŸ˜ĸ **Sad** - Expressions of sadness, sorrow - 😲 **Surprise** - Expressions of surprise, amazement - 😐 **Neutral** - Calm, neutral expressions ## 💡 Tips for Best Results 1. **Use clear, well-lit images** with visible faces 2. **Adjust confidence threshold** if you get too many/few results 3. **Modify minimum face size** based on your image resolution 4. **Frontal face views** work better than profile shots 5. **Avoid heavily shadowed or blurry faces** ## 🔧 Troubleshooting - **No faces detected?** Try lowering the minimum face size - **Too many false detections?** Increase the minimum face size or confidence threshold - **Missing obvious faces?** Lower the confidence threshold - **Multiple boxes on same face?** The system should automatically merge them now --- **Model:** [abhilash88/face-emotion-detection](https://huggingface.co/abhilash88/face-emotion-detection) """ ) # Event handlers analyze_btn.click( fn=process_image, inputs=[image_input, confidence_slider, face_size_slider], outputs=[output_image, result_text], api_name="analyze_image" ) analyze_stats_btn.click( fn=get_emotion_statistics, inputs=stats_image_input, outputs=stats_output, api_name="get_statistics" ) batch_process_btn.click( fn=analyze_emotions_batch, inputs=batch_images_input, outputs=batch_results_output, api_name="batch_process" ) # Example images gr.Examples( examples=[ "https://images.unsplash.com/photo-1507003211169-0a1dd7228f2d?w=400&h=400&fit=crop&crop=face", "https://images.unsplash.com/photo-1554151228-14d9def656e4?w=400&h=400&fit=crop&crop=face", "https://images.unsplash.com/photo-1472099645785-5658abf4ff4e?w=400&h=400&fit=crop&crop=face", ], inputs=image_input, label="đŸ–ŧī¸ Try these example images" ) return iface # Initialize and launch if __name__ == "__main__": logger.info("Initializing Improved Face Emotion Detection System...") if load_models(): logger.info("Models loaded successfully!") iface = create_interface() iface.launch( share=False, show_error=True, server_name="0.0.0.0", server_port=7860, show_api=True ) else: logger.error("Failed to load models. Please check your model configuration.") with gr.Blocks() as error_iface: gr.Markdown( """ # âš ī¸ Model Loading Error The emotion detection model failed to load. Please check: 1. Network connectivity 2. Model dependencies 3. System logs for details """ ) error_iface.launch( share=False, show_error=True, server_name="0.0.0.0", server_port=7860 )