Spaces:

nguyennp86
/

speech-emotion-recognition

Sleeping

App Files Files Community

nguyennp86 commited on Oct 3

Commit

a344700

verified ·

1 Parent(s): 676ce8c

Initial deployment: Speech Emotion Recognition

Browse files

Files changed (9) hide show

app.py +300 -0
preprocess_ravdess.py +178 -0
quick_train.py +291 -0
requirements.txt +9 -0
src/__init__.py +0 -0
src/ensemble_model.py +206 -0
src/feature_extraction.py +78 -0
src/utils.py +83 -0
test_local.py +156 -0

app.py ADDED Viewed

	@@ -0,0 +1,300 @@

+"""
+Speech Emotion Recognition - Gradio Application
+Upload or record audio to detect emotions using ensemble ML models
+"""
+import gradio as gr
+import numpy as np
+from src.ensemble_model import EnsembleEmotionRecognizer
+from src.feature_extraction import extract_features
+from src.utils import (
+    create_waveform_plot,
+    create_spectrogram_plot,
+    format_probability_text,
+    get_emotion_emoji
+)
+# ============================================================================
+# LOAD MODEL
+# ============================================================================
+print("="*60)
+print("🎤 Speech Emotion Recognition")
+print("="*60)
+try:
+    model = EnsembleEmotionRecognizer(weights_dir='weights')
+    MODEL_LOADED = True
+    print("\n✅ Application ready!")
+except Exception as e:
+    print(f"\n❌ Error loading model: {e}")
+    MODEL_LOADED = False
+print("="*60)
+# ============================================================================
+# PREDICTION FUNCTION
+# ============================================================================
+def predict_emotion(audio_file):
+    """
+    Main prediction function for Gradio interface
+    Args:
+        audio_file (str): Path to uploaded/recorded audio file
+    Returns:
+        tuple: (result_text, prob_chart, waveform_fig, spectrogram_fig)
+    """
+    if not MODEL_LOADED:
+        return (
+            "❌ **Error**: Model not loaded. Please check model files in weights/ directory.",
+            None,
+            None,
+            None
+        )
+    if audio_file is None:
+        return (
+            "⚠️ **Please upload an audio file or record your voice**",
+            None,
+            None,
+            None
+        )
+    try:
+        # Extract features from audio
+        print(f"\n📊 Processing: {audio_file}")
+        features, y, sr = extract_features(audio_file)
+        print(f"  ✓ Features extracted: {features.shape}")
+        # Predict emotion
+        emotion, confidence, prob_dict = model.predict_with_confidence(features)
+        print(f"  ✓ Prediction: {emotion} ({confidence:.2%})")
+        # Get emoji
+        emoji = get_emotion_emoji(emotion)
+        # Create result text
+        result_text = f"""
+## 🎯 Prediction Result
+### {emoji} **{emotion.upper()}**
+**Confidence: {confidence*100:.1f}%**
+---
+### 📊 Probability Distribution
+{format_probability_text(prob_dict)}
+---
+### ℹ️ Model Information
+- **Models**: XGBoost, LightGBM, Gradient Boosting, AdaBoost
+- **Features**: 80 selected from 162 total
+- **Optimization**: Evolutionary Algorithm
+"""
+        # Create probability chart for Gradio Label component
+        prob_chart = {k.capitalize(): v for k, v in prob_dict.items()}
+        # Create visualizations
+        waveform_fig = create_waveform_plot(y, sr)
+        spectrogram_fig = create_spectrogram_plot(y, sr)
+        return result_text, prob_chart, waveform_fig, spectrogram_fig
+    except Exception as e:
+        error_msg = f"❌ **Error during prediction**: {str(e)}"
+        print(f"\n{error_msg}")
+        return error_msg, None, None, None
+# ============================================================================
+# GRADIO INTERFACE
+# ============================================================================
+# Custom CSS
+custom_css = """
+.gradio-container {
+    font-family: 'Inter', 'Arial', sans-serif;
+    max-width: 1200px;
+    margin: auto;
+}
+.header {
+    text-align: center;
+    padding: 30px;
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    border-radius: 15px;
+    margin-bottom: 30px;
+    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+}
+.header h1 {
+    color: white;
+    margin: 0;
+    font-size: 2.5em;
+    font-weight: 700;
+}
+.header p {
+    color: rgba(255, 255, 255, 0.9);
+    margin-top: 10px;
+    font-size: 1.1em;
+}
+.emotion-list {
+    background: #f8f9fa;
+    padding: 20px;
+    border-radius: 10px;
+    margin-top: 20px;
+}
+.footer {
+    text-align: center;
+    margin-top: 40px;
+    padding: 20px;
+    color: #666;
+    border-top: 1px solid #e0e0e0;
+}
+"""
+# Create Gradio interface
+with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="Speech Emotion Recognition") as demo:
+    # Header
+    gr.HTML("""
+        <div class="header">
+            <h1>🎤 Speech Emotion Recognition</h1>
+            <p>AI-powered emotion detection using Evolutionary Algorithm optimization</p>
+        </div>
+    """)
+    # Main interface
+    with gr.Row():
+        # Left column - Input
+        with gr.Column(scale=1):
+            gr.Markdown("### 🎙️ Audio Input")
+            audio_input = gr.Audio(
+                sources=["upload", "microphone"],
+                type="filepath",
+                label="Upload or Record Audio"
+            )
+            predict_btn = gr.Button(
+                "🎯 Predict Emotion",
+                variant="primary",
+                size="lg"
+            )
+            gr.Markdown("""
+            <div class="emotion-list">
+            ### 🎭 Supported Emotions
+            - 😠 **Angry**
+            - 😌 **Calm**
+            - 🤢 **Disgust**
+            - 😨 **Fearful**
+            - 😊 **Happy**
+            - 😐 **Neutral**
+            - 😢 **Sad**
+            - 😲 **Surprised**
+            </div>
+            """)
+        # Right column - Results
+        with gr.Column(scale=2):
+            gr.Markdown("### 📊 Prediction Results")
+            result_text = gr.Markdown(
+                value="*Upload an audio file or record your voice to get started*"
+            )
+            prob_chart = gr.Label(
+                label="Emotion Probabilities",
+                num_top_classes=8
+            )
+    # Visualizations (collapsible)
+    with gr.Accordion("📈 Audio Visualizations", open=False):
+        with gr.Row():
+            waveform_plot = gr.Plot(label="Waveform")
+            spectrogram_plot = gr.Plot(label="Spectrogram")
+    # Information section
+    gr.Markdown("""
+    ---
+    ## ℹ️ About This System
+    This Speech Emotion Recognition system uses an **Evolutionary Algorithm** to optimize:
+    1. 🧬 **Feature Selection**: Automatically selects the most informative features (80 out of 162)
+    2. ⚙️ **Hyperparameter Tuning**: Optimizes parameters for all 4 models
+    3. ⚖️ **Ensemble Weights**: Finds optimal combination weights
+    ### 🎯 Model Architecture
+    - **XGBoost**: Gradient boosting with regularization
+    - **LightGBM**: Fast gradient boosting framework
+    - **Gradient Boosting**: Sequential ensemble learning
+    - **AdaBoost**: Adaptive boosting algorithm
+    ### 📈 Performance
+    - **Ensemble Accuracy**: ~87%
+    - **Dataset**: RAVDESS (1,440 samples, 24 actors)
+    - **Training**: Evolutionary optimization over 100 generations
+    ### 🔬 Technical Details
+    **Audio Features Extracted** (162 total):
+    - Zero Crossing Rate (1)
+    - Chroma STFT (12)
+    - MFCC (20)
+    - RMS Energy (1)
+    - Mel Spectrogram (128)
+    **Optimized to 80 features** via feature importance ranking
+    ---
+    """)
+    # Footer
+    gr.HTML("""
+        <div class="footer">
+            <p>
+                🎵 Speech Emotion Recognition System<br>
+                Powered by Evolutionary Algorithm Optimization<br>
+                Built with ❤️ using Gradio | Deployed on 🤗 Hugging Face Spaces
+            </p>
+            <p style="margin-top: 10px; font-size: 0.9em;">
+                <a href="https://github.com/yourusername/speech-emotion-recognition" target="_blank">GitHub</a> |
+                <a href="https://huggingface.co/spaces/yourusername/speech-emotion-recognition" target="_blank">Hugging Face</a>
+            </p>
+        </div>
+    """)
+    # Connect prediction function
+    predict_btn.click(
+        fn=predict_emotion,
+        inputs=[audio_input],
+        outputs=[result_text, prob_chart, waveform_plot, spectrogram_plot]
+    )
+# ============================================================================
+# LAUNCH
+# ============================================================================
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True
+    )

preprocess_ravdess.py ADDED Viewed

	@@ -0,0 +1,178 @@

+"""
+Preprocess RAVDESS Dataset
+Extract features from all audio files and save to CSV
+"""
+import os
+import numpy as np
+import pandas as pd
+import librosa
+from tqdm import tqdm
+from pathlib import Path
+import warnings
+warnings.filterwarnings('ignore')
+def extract_emotion_from_filename(filename):
+    """
+    Extract emotion from RAVDESS filename
+    Format: modality-vocal channel-emotion-emotional intensity-statement-repetition-actor.wav
+    Args:
+        filename (str): Audio filename
+    Returns:
+        str: Emotion label
+    """
+    parts = filename.split('-')
+    emotion_code = int(parts[2])
+    emotion_map = {
+        1: 'neutral',
+        2: 'calm',
+        3: 'happy',
+        4: 'sad',
+        5: 'angry',
+        6: 'fearful',
+        7: 'disgust',
+        8: 'surprised'
+    }
+    return emotion_map.get(emotion_code, 'unknown')
+def extract_audio_features(file_path, duration=2.5, offset=0.6):
+    """
+    Extract 162 features from audio file
+    Returns:
+        np.array: Feature vector of shape (162,)
+    """
+    try:
+        y, sr = librosa.load(file_path, duration=duration, offset=offset)
+        features = np.array([])
+        # ZCR (1)
+        zcr = np.mean(librosa.feature.zero_crossing_rate(y=y).T, axis=0)
+        features = np.hstack((features, zcr))
+        # Chroma (12)
+        stft = np.abs(librosa.stft(y))
+        chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sr).T, axis=0)
+        features = np.hstack((features, chroma))
+        # MFCC (20)
+        mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20).T, axis=0)
+        features = np.hstack((features, mfcc))
+        # RMS (1)
+        rms = np.mean(librosa.feature.rms(y=y).T, axis=0)
+        features = np.hstack((features, rms))
+        # Mel (128)
+        mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr).T, axis=0)
+        features = np.hstack((features, mel))
+        return features
+    except Exception as e:
+        print(f"Error processing {file_path}: {e}")
+        return None
+def process_ravdess_dataset(data_dir, output_csv='features_ravdess.csv'):
+    """
+    Process all audio files in RAVDESS dataset
+    Args:
+        data_dir (str): Path to dataset root (containing Actor_01, Actor_02, ...)
+        output_csv (str): Output CSV filename
+    Returns:
+        pd.DataFrame: DataFrame with features and labels
+    """
+    print("="*70)
+    print("RAVDESS Dataset Feature Extraction")
+    print("="*70)
+    data_dir = Path(data_dir)
+    # Find all actor directories
+    actor_dirs = sorted([d for d in data_dir.iterdir() if d.is_dir() and d.name.startswith('Actor_')])
+    if len(actor_dirs) == 0:
+        print(f"No Actor directories found in {data_dir}")
+        print("   Expected structure: data_dir/Actor_01/, Actor_02/, ...")
+        return None
+    print(f"\nFound {len(actor_dirs)} actor directories")
+    # Collect all audio files
+    audio_files = []
+    for actor_dir in actor_dirs:
+        files = list(actor_dir.glob('*.wav'))
+        audio_files.extend(files)
+    print(f"Total audio files: {len(audio_files)}")
+    if len(audio_files) == 0:
+        print("No audio files found!")
+        return None
+    # Extract features
+    print(f"\nExtracting features...")
+    data_list = []
+    for audio_file in tqdm(audio_files, desc="Processing"):
+        # Extract features
+        features = extract_audio_features(str(audio_file))
+        if features is None:
+            continue
+        # Get metadata
+        emotion = extract_emotion_from_filename(audio_file.name)
+        actor = audio_file.parent.name
+        # Create row
+        row = {
+            'file_path': str(audio_file),
+            'filename': audio_file.name,
+            'actor': actor,
+            'emotion': emotion
+        }
+        # Add features
+        for i, feat in enumerate(features):
+            row[f'feature_{i}'] = feat
+        data_list.append(row)
+    # Create DataFrame
+    df = pd.DataFrame(data_list)
+    # Save to CSV
+    df.to_csv(output_csv, index=False)
+    print(f"\n✅ Feature extraction complete!")
+    print(f"   Saved to: {output_csv}")
+    print(f"   Shape: {df.shape}")
+    print(f"   Emotions: {', '.join(df['emotion'].unique())}")
+    print(f"\nEmotion distribution:")
+    print(df['emotion'].value_counts())
+    print("="*70)
+    return df
+if __name__ == "__main__":
+    # Example usage
+    import sys
+    if len(sys.argv) > 1:
+        DATA_DIR = sys.argv[1]
+    else:
+        DATA_DIR = "data/RAVDESS/audio_speech_actors_01-24"
+    df = process_ravdess_dataset(DATA_DIR)

quick_train.py ADDED Viewed

	@@ -0,0 +1,291 @@

+"""
+Quick Training Script
+Train models and save weights for Hugging Face deployment
+"""
+import pickle
+import numpy as np
+import pandas as pd
+import json
+import os
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder, StandardScaler
+from sklearn.metrics import accuracy_score, classification_report
+from xgboost import XGBClassifier
+from lightgbm import LGBMClassifier
+from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier
+print("="*70)
+print("QUICK TRAINING - Speech Emotion Recognition")
+print("="*70)
+# ============================================================================
+# 1. LOAD DATA
+# ============================================================================
+print("\n1️⃣ Loading data...")
+CSV_FILE = 'features_ravdess.csv'
+if not os.path.exists(CSV_FILE):
+    print(f"❌ Error: {CSV_FILE} not found!")
+    print("   Please run preprocess_ravdess.py first to extract features")
+    exit(1)
+df = pd.read_csv(CSV_FILE)
+# Get features and labels
+feature_cols = [col for col in df.columns if col.startswith('feature_')]
+X = df[feature_cols].values
+y = df['emotion'].values
+print(f"   ✓ Data loaded: {X.shape}")
+print(f"   ✓ Emotions: {np.unique(y)}")
+print(f"   ✓ Distribution:\n{pd.Series(y).value_counts()}")
+# ============================================================================
+# 2. PREPROCESSING
+# ============================================================================
+print("\n2️⃣ Preprocessing...")
+# Encode labels
+label_encoder = LabelEncoder()
+y_encoded = label_encoder.fit_transform(y)
+print(f"   ✓ Encoded labels: {label_encoder.classes_}")
+# Split data (80% train, 20% test)
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y_encoded,
+    test_size=0.2,
+    random_state=42,
+    stratify=y_encoded
+)
+print(f"   ✓ Train set: {X_train.shape}")
+print(f"   ✓ Test set: {X_test.shape}")
+# Scale features
+scaler = StandardScaler()
+X_train_scaled = scaler.fit_transform(X_train)
+X_test_scaled = scaler.transform(X_test)
+print(f"   ✓ Features scaled")
+# ============================================================================
+# 3. FEATURE SELECTION (Simple: Top 80 by variance)
+# ============================================================================
+print("\n3️⃣ Feature selection...")
+feature_variance = np.var(X_train_scaled, axis=0)
+top_indices = np.argsort(feature_variance)[-80:]  # Top 80 features
+X_train_selected = X_train_scaled[:, top_indices]
+X_test_selected = X_test_scaled[:, top_indices]
+print(f"   ✓ Selected {len(top_indices)} features (from 162)")
+print(
+    f"   ✓ Variance range: {feature_variance[top_indices].min():.4f} - {feature_variance[top_indices].max():.4f}")
+# ============================================================================
+# 4. TRAIN MODELS
+# ============================================================================
+print("\n4️⃣ Training models...")
+n_classes = len(label_encoder.classes_)
+models = {}
+accuracies = {}
+# XGBoost
+print("\n   🔹 Training XGBoost...")
+xgb_model = XGBClassifier(
+    n_estimators=150,
+    max_depth=5,
+    learning_rate=0.1,
+    subsample=0.8,
+    colsample_bytree=0.8,
+    gamma=1.0,
+    objective='multi:softprob',
+    num_class=n_classes,
+    random_state=42,
+    n_jobs=-1,
+    verbosity=0
+)
+xgb_model.fit(X_train_selected, y_train)
+xgb_acc = xgb_model.score(X_test_selected, y_test)
+models['xgboost'] = xgb_model
+accuracies['xgboost'] = xgb_acc
+print(f"      ✓ XGBoost accuracy: {xgb_acc:.4f}")
+# LightGBM
+print("\n   🔹 Training LightGBM...")
+lgbm_model = LGBMClassifier(
+    n_estimators=150,
+    num_leaves=40,
+    learning_rate=0.1,
+    subsample=0.8,
+    colsample_bytree=0.8,
+    min_child_samples=20,
+    objective='multiclass',
+    num_class=n_classes,
+    random_state=42,
+    n_jobs=-1,
+    verbose=-1
+)
+lgbm_model.fit(X_train_selected, y_train)
+lgbm_acc = lgbm_model.score(X_test_selected, y_test)
+models['lightgbm'] = lgbm_model
+accuracies['lightgbm'] = lgbm_acc
+print(f"      ✓ LightGBM accuracy: {lgbm_acc:.4f}")
+# Gradient Boosting
+print("\n   🔹 Training Gradient Boosting...")
+gb_model = GradientBoostingClassifier(
+    n_estimators=100,
+    max_depth=4,
+    learning_rate=0.1,
+    subsample=0.8,
+    min_samples_split=10,
+    random_state=42
+)
+gb_model.fit(X_train_selected, y_train)
+gb_acc = gb_model.score(X_test_selected, y_test)
+models['gradientboosting'] = gb_model
+accuracies['gradientboosting'] = gb_acc
+print(f"      ✓ Gradient Boosting accuracy: {gb_acc:.4f}")
+# AdaBoost
+print("\n   🔹 Training AdaBoost...")
+ada_model = AdaBoostClassifier(
+    n_estimators=100,
+    learning_rate=1.0,
+    algorithm='SAMME.R',
+    random_state=42
+)
+ada_model.fit(X_train_selected, y_train)
+ada_acc = ada_model.score(X_test_selected, y_test)
+models['adaboost'] = ada_model
+accuracies['adaboost'] = ada_acc
+print(f"      ✓ AdaBoost accuracy: {ada_acc:.4f}")
+# ============================================================================
+# 5. ENSEMBLE
+# ============================================================================
+print("\n5️⃣ Creating ensemble...")
+# Get predictions
+predictions = {}
+for name, model in models.items():
+    predictions[name] = model.predict_proba(X_test_selected)
+# Calculate weights (proportional to accuracy)
+weights = np.array([accuracies[name] for name in [
+                   'xgboost', 'lightgbm', 'gradientboosting', 'adaboost']])
+weights = weights / weights.sum()
+print(f"   ✓ Ensemble weights: {weights}")
+# Weighted ensemble prediction
+ensemble_pred = (
+    weights[0] * predictions['xgboost'] +
+    weights[1] * predictions['lightgbm'] +
+    weights[2] * predictions['gradientboosting'] +
+    weights[3] * predictions['adaboost']
+)
+ensemble_labels = np.argmax(ensemble_pred, axis=1)
+ensemble_acc = accuracy_score(y_test, ensemble_labels)
+print(f"   ✓ Ensemble accuracy: {ensemble_acc:.4f}")
+# ============================================================================
+# 6. SAVE WEIGHTS
+# ============================================================================
+print("\n6️⃣ Saving weights...")
+os.makedirs('weights', exist_ok=True)
+# Save individual models
+with open('weights/xgboost_model.pkl', 'wb') as f:
+    pickle.dump(xgb_model, f)
+print("   ✓ xgboost_model.pkl")
+with open('weights/lightgbm_model.pkl', 'wb') as f:
+    pickle.dump(lgbm_model, f)
+print("   ✓ lightgbm_model.pkl")
+with open('weights/gradientboost_model.pkl', 'wb') as f:
+    pickle.dump(gb_model, f)
+print("   ✓ gradientboost_model.pkl")
+with open('weights/adaboost_model.pkl', 'wb') as f:
+    pickle.dump(ada_model, f)
+print("   ✓ adaboost_model.pkl")
+# Save preprocessing objects
+with open('weights/scaler.pkl', 'wb') as f:
+    pickle.dump(scaler, f)
+print("   ✓ scaler.pkl")
+with open('weights/label_encoder.pkl', 'wb') as f:
+    pickle.dump(label_encoder, f)
+print("   ✓ label_encoder.pkl")
+# Save configuration
+config = {
+    'selected_features': top_indices.tolist(),
+    'ensemble_weights': weights.tolist(),
+    'n_features': len(top_indices),
+    'emotions': label_encoder.classes_.tolist(),
+    'model_accuracies': {
+        'xgboost': float(xgb_acc),
+        'lightgbm': float(lgbm_acc),
+        'gradientboosting': float(gb_acc),
+        'adaboost': float(ada_acc),
+        'ensemble': float(ensemble_acc)
+    }
+}
+with open('weights/config.json', 'w') as f:
+    json.dump(config, f, indent=2)
+print("   ✓ config.json")
+# ============================================================================
+# 7. VERIFY
+# ============================================================================
+print("\n7️⃣ Verifying saved models...")
+# Test loading
+with open('weights/xgboost_model.pkl', 'rb') as f:
+    loaded_model = pickle.load(f)
+test_acc = loaded_model.score(X_test_selected, y_test)
+print(f"   ✓ Loaded model works (accuracy: {test_acc:.4f})")
+# ============================================================================
+# 8. SUMMARY
+# ============================================================================
+print("\n" + "="*70)
+print("✅ TRAINING COMPLETE!")
+print("="*70)
+print("\n📊 Final Results:")
+print(f"   XGBoost:          {xgb_acc:.4f}")
+print(f"   LightGBM:         {lgbm_acc:.4f}")
+print(f"   GradientBoosting: {gb_acc:.4f}")
+print(f"   AdaBoost:         {ada_acc:.4f}")
+print(f"   Ensemble:         {ensemble_acc:.4f} ⭐")
+print(f"\n💾 Saved files:")
+print(f"   weights/xgboost_model.pkl")
+print(f"   weights/lightgbm_model.pkl")
+print(f"   weights/gradientboost_model.pkl")
+print(f"   weights/adaboost_model.pkl")
+print(f"   weights/scaler.pkl")
+print(f"   weights/label_encoder.pkl")
+print(f"   weights/config.json")
+print(f"\n🚀 Next steps:")
+print(f"   1. Test locally: python app.py")
+print(f"   2. Push to Hugging Face: git add . && git commit -m 'Add models' && git push")
+print("="*70)

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+numpy==1.24.3
+pandas==2.0.3
+scikit-learn==1.3.0
+xgboost==2.0.3
+lightgbm==4.1.0
+librosa==0.10.1
+soundfile==0.12.1
+gradio==4.44.0
+matplotlib==3.7.2

src/__init__.py ADDED Viewed

File without changes

src/ensemble_model.py ADDED Viewed

	@@ -0,0 +1,206 @@

+"""
+Ensemble Model for Speech Emotion Recognition
+Loads pre-trained models and makes predictions
+"""
+import pickle
+import numpy as np
+import json
+from pathlib import Path
+class EnsembleEmotionRecognizer:
+    """
+    Ensemble model combining XGBoost, LightGBM, Gradient Boosting, and AdaBoost
+    Pre-trained weights are loaded from the weights directory
+    """
+    def __init__(self, weights_dir='weights'):
+        """
+        Initialize ensemble model
+        Args:
+            weights_dir (str): Directory containing model weights
+        """
+        self.weights_dir = Path(weights_dir)
+        self.models = {}
+        self.scaler = None
+        self.label_encoder = None
+        self.config = None
+        self.selected_features = None
+        self.ensemble_weights = None
+        self._load_weights()
+    def _load_weights(self):
+        """Load all pre-trained model weights and configurations"""
+        print("Loading pre-trained models...")
+        try:
+            # Load configuration
+            config_path = self.weights_dir / 'config.json'
+            with open(config_path, 'r') as f:
+                self.config = json.load(f)
+            print("  ✓ Configuration loaded")
+            # Load scaler
+            with open(self.weights_dir / 'scaler.pkl', 'rb') as f:
+                self.scaler = pickle.load(f)
+            print("  ✓ Scaler loaded")
+            # Load label encoder
+            with open(self.weights_dir / 'label_encoder.pkl', 'rb') as f:
+                self.label_encoder = pickle.load(f)
+            print("  ✓ Label encoder loaded")
+            # Load models
+            model_files = {
+                'xgboost': 'xgboost_model.pkl',
+                'lightgbm': 'lightgbm_model.pkl',
+                'gradientboosting': 'gradientboost_model.pkl',
+                'adaboost': 'adaboost_model.pkl'
+            }
+            for name, filename in model_files.items():
+                with open(self.weights_dir / filename, 'rb') as f:
+                    self.models[name] = pickle.load(f)
+                print(f"  ✓ {name.capitalize()} loaded")
+            # Load ensemble configuration
+            self.selected_features = self.config['selected_features']
+            self.ensemble_weights = np.array(self.config['ensemble_weights'])
+            print(f"\n✅ All models loaded successfully!")
+            print(f"   - Number of models: {len(self.models)}")
+            print(f"   - Selected features: {len(self.selected_features)}/162")
+            print(f"   - Ensemble weights: {self.ensemble_weights}")
+            print(f"   - Emotions: {', '.join(self.label_encoder.classes_)}")
+        except FileNotFoundError as e:
+            raise Exception(f"Model files not found in '{self.weights_dir}': {e}")
+        except Exception as e:
+            raise Exception(f"Error loading models: {e}")
+    def predict(self, features):
+        """
+        Predict emotion from features
+        Args:
+            features (np.array): Feature vector of shape (162,) or (n_samples, 162)
+        Returns:
+            np.array: Predicted emotion labels
+        """
+        # Ensure 2D array
+        if features.ndim == 1:
+            features = features.reshape(1, -1)
+        # Preprocess
+        features_scaled = self.scaler.transform(features)
+        features_selected = features_scaled[:, self.selected_features]
+        # Get predictions from all models
+        predictions = []
+        for model in self.models.values():
+            pred_proba = model.predict_proba(features_selected)
+            predictions.append(pred_proba)
+        # Weighted ensemble
+        ensemble_proba = np.average(predictions, axis=0, weights=self.ensemble_weights)
+        # Get predicted labels
+        predicted_labels = np.argmax(ensemble_proba, axis=1)
+        return predicted_labels
+    def predict_proba(self, features):
+        """
+        Predict emotion probabilities
+        Args:
+            features (np.array): Feature vector of shape (162,) or (n_samples, 162)
+        Returns:
+            np.array: Probability distribution over emotions, shape (n_samples, n_emotions)
+        """
+        # Ensure 2D array
+        if features.ndim == 1:
+            features = features.reshape(1, -1)
+        # Preprocess
+        features_scaled = self.scaler.transform(features)
+        features_selected = features_scaled[:, self.selected_features]
+        # Get predictions from all models
+        predictions = []
+        for model in self.models.values():
+            pred_proba = model.predict_proba(features_selected)
+            predictions.append(pred_proba)
+        # Weighted ensemble
+        ensemble_proba = np.average(predictions, axis=0, weights=self.ensemble_weights)
+        return ensemble_proba
+    def predict_with_confidence(self, features):
+        """
+        Predict emotion with confidence score
+        Args:
+            features (np.array): Feature vector of shape (162,)
+        Returns:
+            tuple: (emotion_name, confidence, probabilities_dict)
+        """
+        # Get probabilities
+        proba = self.predict_proba(features)[0]
+        # Get prediction
+        predicted_idx = np.argmax(proba)
+        emotion_name = self.label_encoder.classes_[predicted_idx]
+        confidence = proba[predicted_idx]
+        # Create probability dictionary
+        prob_dict = {}
+        for i, emotion in enumerate(self.label_encoder.classes_):
+            prob_dict[emotion] = float(proba[i])
+        return emotion_name, confidence, prob_dict
+    def decode_emotion(self, label):
+        """
+        Convert numeric label to emotion name
+        Args:
+            label (int): Numeric emotion label
+        Returns:
+            str: Emotion name
+        """
+        return self.label_encoder.inverse_transform([label])[0]
+    def get_emotion_names(self):
+        """
+        Get list of all emotion names
+        Returns:
+            list: List of emotion names
+        """
+        return self.label_encoder.classes_.tolist()
+    def get_model_info(self):
+        """
+        Get information about the ensemble model
+        Returns:
+            dict: Model information
+        """
+        return {
+            'n_models': len(self.models),
+            'models': list(self.models.keys()),
+            'n_features_selected': len(self.selected_features),
+            'n_features_total': 162,
+            'ensemble_weights': self.ensemble_weights.tolist(),
+            'emotions': self.get_emotion_names(),
+            'accuracies': self.config.get('model_accuracies', {})
+        }

src/feature_extraction.py ADDED Viewed

	@@ -0,0 +1,78 @@

+"""
+Audio Feature Extraction Module
+Extracts 162 features from audio files for emotion recognition
+"""
+import numpy as np
+import librosa
+import warnings
+warnings.filterwarnings('ignore')
+def extract_features(audio_path, duration=2.5, offset=0.6):
+    """
+    Extract 162 audio features from an audio file
+    Features:
+    - 1 Zero Crossing Rate
+    - 12 Chroma STFT
+    - 20 MFCC
+    - 1 RMS Energy
+    - 128 Mel Spectrogram
+    Args:
+        audio_path (str): Path to audio file
+        duration (float): Duration to load (seconds)
+        offset (float): Start reading after this time (seconds)
+    Returns:
+        features (np.array): Feature vector of shape (162,)
+        y (np.array): Audio time series
+        sr (int): Sample rate
+    """
+    try:
+        # Load audio file
+        y, sr = librosa.load(audio_path, duration=duration, offset=offset)
+        # Initialize feature array
+        features = np.array([])
+        # 1. Zero Crossing Rate (1 feature)
+        zcr = np.mean(librosa.feature.zero_crossing_rate(y=y).T, axis=0)
+        features = np.hstack((features, zcr))
+        # 2. Chroma STFT (12 features)
+        stft = np.abs(librosa.stft(y))
+        chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sr).T, axis=0)
+        features = np.hstack((features, chroma))
+        # 3. MFCC (20 features)
+        mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20).T, axis=0)
+        features = np.hstack((features, mfcc))
+        # 4. RMS Energy (1 feature)
+        rms = np.mean(librosa.feature.rms(y=y).T, axis=0)
+        features = np.hstack((features, rms))
+        # 5. Mel Spectrogram (128 features)
+        mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr).T, axis=0)
+        features = np.hstack((features, mel))
+        return features, y, sr
+    except Exception as e:
+        raise Exception(f"Error extracting features from {audio_path}: {str(e)}")
+def get_feature_names():
+    """
+    Get names of all 162 features
+    Returns:
+        list: List of feature names
+    """
+    names = ['zcr']
+    names.extend([f'chroma_{i}' for i in range(12)])
+    names.extend([f'mfcc_{i}' for i in range(20)])
+    names.append('rms')
+    names.extend([f'mel_{i}' for i in range(128)])
+    return names

src/utils.py ADDED Viewed

	@@ -0,0 +1,83 @@

+"""
+Utility functions for visualization and analysis
+"""
+import numpy as np
+import matplotlib.pyplot as plt
+import librosa
+import librosa.display
+def create_waveform_plot(y, sr, title="Audio Waveform"):
+    """
+    Create waveform visualization
+    Args:
+        y (np.array): Audio time series
+        sr (int): Sample rate
+        title (str): Plot title
+    Returns:
+        matplotlib.figure.Figure: Waveform plot
+    """
+    fig, ax = plt.subplots(figsize=(10, 3))
+    librosa.display.waveshow(y, sr=sr, ax=ax, color='#2E86DE')
+    ax.set_title(title, fontsize=14, fontweight='bold')
+    ax.set_xlabel('Time (seconds)', fontsize=11)
+    ax.set_ylabel('Amplitude', fontsize=11)
+    ax.grid(True, alpha=0.3)
+    plt.tight_layout()
+    return fig
+def create_spectrogram_plot(y, sr, title="Spectrogram"):
+    """
+    Create spectrogram visualization
+    Args:
+        y (np.array): Audio time series
+        sr (int): Sample rate
+        title (str): Plot title
+    Returns:
+        matplotlib.figure.Figure: Spectrogram plot
+    """
+    fig, ax = plt.subplots(figsize=(10, 4))
+    D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
+    img = librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='hz',
+                                   ax=ax, cmap='viridis')
+    ax.set_title(title, fontsize=14, fontweight='bold')
+    ax.set_xlabel('Time (seconds)', fontsize=11)
+    ax.set_ylabel('Frequency (Hz)', fontsize=11)
+    fig.colorbar(img, ax=ax, format='%+2.0f dB')
+    plt.tight_layout()
+    return fig
+def create_mel_spectrogram_plot(y, sr, title="Mel Spectrogram"):
+    """
+    Create mel spectrogram visualization
+    Args:
+        y (np.array): Audio time series
+        sr (int): Sample rate
+        title (str): Plot title
+    Returns:
+        matplotlib.figure.Figure: Mel spectrogram plot
+    """
+    fig, ax = plt.subplots(figsize=(10, 4))
+    S = librosa.feature.melspectrogram(y=y, sr=sr)
+    S_dB = librosa.power_to_db(S, ref=np.max)
+    img = librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel',
+                                   ax=ax, cmap='magma')
+    ax.set_title(title, fontsize=14, fontweight='bold')
+    ax.set_xlabel('Time (seconds)', fontsize=11)
+    ax.set_ylabel('Mel Frequency', fontsize=11)
+    fig.colorbar(img, ax=ax, format='%+2.0f dB')
+    plt.tight_layout()
+    return fig
+def format_probability_text(prob_dict, top_k=None):
+    """
+    Format probability dictionary as text with progress bars
+    Args:
+        prob_dict (dict): Dictionary of e

test_local.py ADDED Viewed

	@@ -0,0 +1,156 @@

+"""
+Test application locally before deploying
+"""
+import os
+import sys
+print("="*70)
+print("LOCAL TEST - Speech Emotion Recognition")
+print("="*70)
+# ============================================================================
+# 1. CHECK FILES
+# ============================================================================
+print("\n1️⃣ Checking required files...")
+required_files = [
+    'app.py',
+    'requirements.txt',
+    'README.md',
+    'src/__init__.py',
+    'src/feature_extraction.py',
+    'src/ensemble_model.py',
+    'src/utils.py',
+    'weights/xgboost_model.pkl',
+    'weights/lightgbm_model.pkl',
+    'weights/gradientboost_model.pkl',
+    'weights/adaboost_model.pkl',
+    'weights/scaler.pkl',
+    'weights/label_encoder.pkl',
+    'weights/config.json'
+]
+missing_files = []
+for file in required_files:
+    if os.path.exists(file):
+        print(f"   ✓ {file}")
+    else:
+        print(f"   ✗ {file} - MISSING")
+        missing_files.append(file)
+if missing_files:
+    print(f"\n❌ Missing {len(missing_files)} files. Please create them first.")
+    sys.exit(1)
+# ============================================================================
+# 2. TEST IMPORTS
+# ============================================================================
+print("\n2️⃣ Testing imports...")
+try:
+    import numpy
+    print("   ✓ numpy")
+except:
+    print("   ✗ numpy - Install: pip install numpy")
+try:
+    import pandas
+    print("   ✓ pandas")
+except:
+    print("   ✗ pandas - Install: pip install pandas")
+try:
+    import sklearn
+    print("   ✓ scikit-learn")
+except:
+    print("   ✗ scikit-learn - Install: pip install scikit-learn")
+try:
+    import xgboost
+    print("   ✓ xgboost")
+except:
+    print("   ✗ xgboost - Install: pip install xgboost")
+try:
+    import lightgbm
+    print("   ✓ lightgbm")
+except:
+    print("   ✗ lightgbm - Install: pip install lightgbm")
+try:
+    import librosa
+    print("   ✓ librosa")
+except:
+    print("   ✗ librosa - Install: pip install librosa")
+try:
+    import gradio
+    print("   ✓ gradio")
+except:
+    print("   ✗ gradio - Install: pip install gradio")
+# ============================================================================
+# 3. TEST MODEL LOADING
+# ============================================================================
+print("\n3️⃣ Testing model loading...")
+try:
+    from src.ensemble_model import EnsembleEmotionRecognizer
+    model = EnsembleEmotionRecognizer(weights_dir='weights')
+    print("   ✓ Model loaded successfully")
+    # Get model info
+    info = model.get_model_info()
+    print(f"   ✓ Models: {', '.join(info['models'])}")
+    print(f"   ✓ Features: {info['n_features_selected']}/{info['n_features_total']}")
+    print(f"   ✓ Emotions: {', '.join(info['emotions'])}")
+except Exception as e:
+    print(f"   ✗ Error loading model: {e}")
+    sys.exit(1)
+# ============================================================================
+# 4. TEST FEATURE EXTRACTION
+# ============================================================================
+print("\n4️⃣ Testing feature extraction...")
+try:
+    from src.feature_extraction import extract_features
+    import numpy as np
+    # Create dummy audio
+    import librosa
+    y = np.random.randn(22050 * 3)  # 3 seconds of random audio
+    # Save to temp file
+    import soundfile as sf
+    sf.write('temp_test.wav', y, 22050)
+    # Extract features
+    features, _, _ = extract_features('temp_test.wav')
+    print(f"   ✓ Features extracted: shape {features.shape}")
+    # Test prediction
+    prediction = model.predict(features)
+    print(f"   ✓ Prediction works: {model.decode_emotion(prediction[0])}")
+    # Cleanup
+    os.remove('temp_test.wav')
+except Exception as e:
+    print(f"   ✗ Error in feature extraction: {e}")
+    sys.exit(1)
+# ============================================================================
+# 5. FILE SIZES
+# ============================================================================
+print("\n5️⃣ Checking file sizes...")
+total_size = 0
+for file in required_files:
+    if os.path.exists(file):
+        size = os.path.getsize(file) / 1024 / 1024  # MB
+        total_size += size
+        if size > 10: