nguyennp86 commited on
Commit
a344700
Β·
verified Β·
1 Parent(s): 676ce8c

Initial deployment: Speech Emotion Recognition

Browse files
app.py ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Speech Emotion Recognition - Gradio Application
3
+ Upload or record audio to detect emotions using ensemble ML models
4
+ """
5
+
6
+ import gradio as gr
7
+ import numpy as np
8
+ from src.ensemble_model import EnsembleEmotionRecognizer
9
+ from src.feature_extraction import extract_features
10
+ from src.utils import (
11
+ create_waveform_plot,
12
+ create_spectrogram_plot,
13
+ format_probability_text,
14
+ get_emotion_emoji
15
+ )
16
+
17
+ # ============================================================================
18
+ # LOAD MODEL
19
+ # ============================================================================
20
+
21
+ print("="*60)
22
+ print("🎀 Speech Emotion Recognition")
23
+ print("="*60)
24
+
25
+ try:
26
+ model = EnsembleEmotionRecognizer(weights_dir='weights')
27
+ MODEL_LOADED = True
28
+ print("\nβœ… Application ready!")
29
+ except Exception as e:
30
+ print(f"\n❌ Error loading model: {e}")
31
+ MODEL_LOADED = False
32
+
33
+ print("="*60)
34
+
35
+ # ============================================================================
36
+ # PREDICTION FUNCTION
37
+ # ============================================================================
38
+
39
+ def predict_emotion(audio_file):
40
+ """
41
+ Main prediction function for Gradio interface
42
+
43
+ Args:
44
+ audio_file (str): Path to uploaded/recorded audio file
45
+
46
+ Returns:
47
+ tuple: (result_text, prob_chart, waveform_fig, spectrogram_fig)
48
+ """
49
+ if not MODEL_LOADED:
50
+ return (
51
+ "❌ **Error**: Model not loaded. Please check model files in weights/ directory.",
52
+ None,
53
+ None,
54
+ None
55
+ )
56
+
57
+ if audio_file is None:
58
+ return (
59
+ "⚠️ **Please upload an audio file or record your voice**",
60
+ None,
61
+ None,
62
+ None
63
+ )
64
+
65
+ try:
66
+ # Extract features from audio
67
+ print(f"\nπŸ“Š Processing: {audio_file}")
68
+ features, y, sr = extract_features(audio_file)
69
+ print(f" βœ“ Features extracted: {features.shape}")
70
+
71
+ # Predict emotion
72
+ emotion, confidence, prob_dict = model.predict_with_confidence(features)
73
+ print(f" βœ“ Prediction: {emotion} ({confidence:.2%})")
74
+
75
+ # Get emoji
76
+ emoji = get_emotion_emoji(emotion)
77
+
78
+ # Create result text
79
+ result_text = f"""
80
+ ## 🎯 Prediction Result
81
+
82
+ ### {emoji} **{emotion.upper()}**
83
+
84
+ **Confidence: {confidence*100:.1f}%**
85
+
86
+ ---
87
+
88
+ ### πŸ“Š Probability Distribution
89
+
90
+ {format_probability_text(prob_dict)}
91
+
92
+ ---
93
+
94
+ ### ℹ️ Model Information
95
+
96
+ - **Models**: XGBoost, LightGBM, Gradient Boosting, AdaBoost
97
+ - **Features**: 80 selected from 162 total
98
+ - **Optimization**: Evolutionary Algorithm
99
+ """
100
+
101
+ # Create probability chart for Gradio Label component
102
+ prob_chart = {k.capitalize(): v for k, v in prob_dict.items()}
103
+
104
+ # Create visualizations
105
+ waveform_fig = create_waveform_plot(y, sr)
106
+ spectrogram_fig = create_spectrogram_plot(y, sr)
107
+
108
+ return result_text, prob_chart, waveform_fig, spectrogram_fig
109
+
110
+ except Exception as e:
111
+ error_msg = f"❌ **Error during prediction**: {str(e)}"
112
+ print(f"\n{error_msg}")
113
+ return error_msg, None, None, None
114
+
115
+ # ============================================================================
116
+ # GRADIO INTERFACE
117
+ # ============================================================================
118
+
119
+ # Custom CSS
120
+ custom_css = """
121
+ .gradio-container {
122
+ font-family: 'Inter', 'Arial', sans-serif;
123
+ max-width: 1200px;
124
+ margin: auto;
125
+ }
126
+
127
+ .header {
128
+ text-align: center;
129
+ padding: 30px;
130
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
131
+ border-radius: 15px;
132
+ margin-bottom: 30px;
133
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
134
+ }
135
+
136
+ .header h1 {
137
+ color: white;
138
+ margin: 0;
139
+ font-size: 2.5em;
140
+ font-weight: 700;
141
+ }
142
+
143
+ .header p {
144
+ color: rgba(255, 255, 255, 0.9);
145
+ margin-top: 10px;
146
+ font-size: 1.1em;
147
+ }
148
+
149
+ .emotion-list {
150
+ background: #f8f9fa;
151
+ padding: 20px;
152
+ border-radius: 10px;
153
+ margin-top: 20px;
154
+ }
155
+
156
+ .footer {
157
+ text-align: center;
158
+ margin-top: 40px;
159
+ padding: 20px;
160
+ color: #666;
161
+ border-top: 1px solid #e0e0e0;
162
+ }
163
+ """
164
+
165
+ # Create Gradio interface
166
+ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="Speech Emotion Recognition") as demo:
167
+
168
+ # Header
169
+ gr.HTML("""
170
+ <div class="header">
171
+ <h1>🎀 Speech Emotion Recognition</h1>
172
+ <p>AI-powered emotion detection using Evolutionary Algorithm optimization</p>
173
+ </div>
174
+ """)
175
+
176
+ # Main interface
177
+ with gr.Row():
178
+ # Left column - Input
179
+ with gr.Column(scale=1):
180
+ gr.Markdown("### πŸŽ™οΈ Audio Input")
181
+
182
+ audio_input = gr.Audio(
183
+ sources=["upload", "microphone"],
184
+ type="filepath",
185
+ label="Upload or Record Audio"
186
+ )
187
+
188
+ predict_btn = gr.Button(
189
+ "🎯 Predict Emotion",
190
+ variant="primary",
191
+ size="lg"
192
+ )
193
+
194
+ gr.Markdown("""
195
+ <div class="emotion-list">
196
+
197
+ ### 🎭 Supported Emotions
198
+
199
+ - 😠 **Angry**
200
+ - 😌 **Calm**
201
+ - 🀒 **Disgust**
202
+ - 😨 **Fearful**
203
+ - 😊 **Happy**
204
+ - 😐 **Neutral**
205
+ - 😒 **Sad**
206
+ - 😲 **Surprised**
207
+
208
+ </div>
209
+ """)
210
+
211
+ # Right column - Results
212
+ with gr.Column(scale=2):
213
+ gr.Markdown("### πŸ“Š Prediction Results")
214
+
215
+ result_text = gr.Markdown(
216
+ value="*Upload an audio file or record your voice to get started*"
217
+ )
218
+
219
+ prob_chart = gr.Label(
220
+ label="Emotion Probabilities",
221
+ num_top_classes=8
222
+ )
223
+
224
+ # Visualizations (collapsible)
225
+ with gr.Accordion("πŸ“ˆ Audio Visualizations", open=False):
226
+ with gr.Row():
227
+ waveform_plot = gr.Plot(label="Waveform")
228
+ spectrogram_plot = gr.Plot(label="Spectrogram")
229
+
230
+ # Information section
231
+ gr.Markdown("""
232
+ ---
233
+
234
+ ## ℹ️ About This System
235
+
236
+ This Speech Emotion Recognition system uses an **Evolutionary Algorithm** to optimize:
237
+
238
+ 1. 🧬 **Feature Selection**: Automatically selects the most informative features (80 out of 162)
239
+ 2. βš™οΈ **Hyperparameter Tuning**: Optimizes parameters for all 4 models
240
+ 3. βš–οΈ **Ensemble Weights**: Finds optimal combination weights
241
+
242
+ ### 🎯 Model Architecture
243
+
244
+ - **XGBoost**: Gradient boosting with regularization
245
+ - **LightGBM**: Fast gradient boosting framework
246
+ - **Gradient Boosting**: Sequential ensemble learning
247
+ - **AdaBoost**: Adaptive boosting algorithm
248
+
249
+ ### πŸ“ˆ Performance
250
+
251
+ - **Ensemble Accuracy**: ~87%
252
+ - **Dataset**: RAVDESS (1,440 samples, 24 actors)
253
+ - **Training**: Evolutionary optimization over 100 generations
254
+
255
+ ### πŸ”¬ Technical Details
256
+
257
+ **Audio Features Extracted** (162 total):
258
+ - Zero Crossing Rate (1)
259
+ - Chroma STFT (12)
260
+ - MFCC (20)
261
+ - RMS Energy (1)
262
+ - Mel Spectrogram (128)
263
+
264
+ **Optimized to 80 features** via feature importance ranking
265
+
266
+ ---
267
+ """)
268
+
269
+ # Footer
270
+ gr.HTML("""
271
+ <div class="footer">
272
+ <p>
273
+ 🎡 Speech Emotion Recognition System<br>
274
+ Powered by Evolutionary Algorithm Optimization<br>
275
+ Built with ❀️ using Gradio | Deployed on πŸ€— Hugging Face Spaces
276
+ </p>
277
+ <p style="margin-top: 10px; font-size: 0.9em;">
278
+ <a href="https://github.com/yourusername/speech-emotion-recognition" target="_blank">GitHub</a> |
279
+ <a href="https://huggingface.co/spaces/yourusername/speech-emotion-recognition" target="_blank">Hugging Face</a>
280
+ </p>
281
+ </div>
282
+ """)
283
+
284
+ # Connect prediction function
285
+ predict_btn.click(
286
+ fn=predict_emotion,
287
+ inputs=[audio_input],
288
+ outputs=[result_text, prob_chart, waveform_plot, spectrogram_plot]
289
+ )
290
+
291
+ # ============================================================================
292
+ # LAUNCH
293
+ # ============================================================================
294
+
295
+ if __name__ == "__main__":
296
+ demo.launch(
297
+ server_name="0.0.0.0",
298
+ server_port=7860,
299
+ show_error=True
300
+ )
preprocess_ravdess.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Preprocess RAVDESS Dataset
3
+ Extract features from all audio files and save to CSV
4
+ """
5
+
6
+ import os
7
+ import numpy as np
8
+ import pandas as pd
9
+ import librosa
10
+ from tqdm import tqdm
11
+ from pathlib import Path
12
+ import warnings
13
+ warnings.filterwarnings('ignore')
14
+
15
+
16
+ def extract_emotion_from_filename(filename):
17
+ """
18
+ Extract emotion from RAVDESS filename
19
+ Format: modality-vocal channel-emotion-emotional intensity-statement-repetition-actor.wav
20
+
21
+ Args:
22
+ filename (str): Audio filename
23
+
24
+ Returns:
25
+ str: Emotion label
26
+ """
27
+ parts = filename.split('-')
28
+ emotion_code = int(parts[2])
29
+
30
+ emotion_map = {
31
+ 1: 'neutral',
32
+ 2: 'calm',
33
+ 3: 'happy',
34
+ 4: 'sad',
35
+ 5: 'angry',
36
+ 6: 'fearful',
37
+ 7: 'disgust',
38
+ 8: 'surprised'
39
+ }
40
+
41
+ return emotion_map.get(emotion_code, 'unknown')
42
+
43
+
44
+ def extract_audio_features(file_path, duration=2.5, offset=0.6):
45
+ """
46
+ Extract 162 features from audio file
47
+
48
+ Returns:
49
+ np.array: Feature vector of shape (162,)
50
+ """
51
+ try:
52
+ y, sr = librosa.load(file_path, duration=duration, offset=offset)
53
+
54
+ features = np.array([])
55
+
56
+ # ZCR (1)
57
+ zcr = np.mean(librosa.feature.zero_crossing_rate(y=y).T, axis=0)
58
+ features = np.hstack((features, zcr))
59
+
60
+ # Chroma (12)
61
+ stft = np.abs(librosa.stft(y))
62
+ chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sr).T, axis=0)
63
+ features = np.hstack((features, chroma))
64
+
65
+ # MFCC (20)
66
+ mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20).T, axis=0)
67
+ features = np.hstack((features, mfcc))
68
+
69
+ # RMS (1)
70
+ rms = np.mean(librosa.feature.rms(y=y).T, axis=0)
71
+ features = np.hstack((features, rms))
72
+
73
+ # Mel (128)
74
+ mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr).T, axis=0)
75
+ features = np.hstack((features, mel))
76
+
77
+ return features
78
+
79
+ except Exception as e:
80
+ print(f"Error processing {file_path}: {e}")
81
+ return None
82
+
83
+
84
+ def process_ravdess_dataset(data_dir, output_csv='features_ravdess.csv'):
85
+ """
86
+ Process all audio files in RAVDESS dataset
87
+
88
+ Args:
89
+ data_dir (str): Path to dataset root (containing Actor_01, Actor_02, ...)
90
+ output_csv (str): Output CSV filename
91
+
92
+ Returns:
93
+ pd.DataFrame: DataFrame with features and labels
94
+ """
95
+
96
+ print("="*70)
97
+ print("RAVDESS Dataset Feature Extraction")
98
+ print("="*70)
99
+
100
+ data_dir = Path(data_dir)
101
+
102
+ # Find all actor directories
103
+ actor_dirs = sorted([d for d in data_dir.iterdir() if d.is_dir() and d.name.startswith('Actor_')])
104
+
105
+ if len(actor_dirs) == 0:
106
+ print(f"No Actor directories found in {data_dir}")
107
+ print(" Expected structure: data_dir/Actor_01/, Actor_02/, ...")
108
+ return None
109
+
110
+ print(f"\nFound {len(actor_dirs)} actor directories")
111
+
112
+ # Collect all audio files
113
+ audio_files = []
114
+ for actor_dir in actor_dirs:
115
+ files = list(actor_dir.glob('*.wav'))
116
+ audio_files.extend(files)
117
+
118
+ print(f"Total audio files: {len(audio_files)}")
119
+
120
+ if len(audio_files) == 0:
121
+ print("No audio files found!")
122
+ return None
123
+
124
+ # Extract features
125
+ print(f"\nExtracting features...")
126
+ data_list = []
127
+
128
+ for audio_file in tqdm(audio_files, desc="Processing"):
129
+ # Extract features
130
+ features = extract_audio_features(str(audio_file))
131
+
132
+ if features is None:
133
+ continue
134
+
135
+ # Get metadata
136
+ emotion = extract_emotion_from_filename(audio_file.name)
137
+ actor = audio_file.parent.name
138
+
139
+ # Create row
140
+ row = {
141
+ 'file_path': str(audio_file),
142
+ 'filename': audio_file.name,
143
+ 'actor': actor,
144
+ 'emotion': emotion
145
+ }
146
+
147
+ # Add features
148
+ for i, feat in enumerate(features):
149
+ row[f'feature_{i}'] = feat
150
+
151
+ data_list.append(row)
152
+
153
+ # Create DataFrame
154
+ df = pd.DataFrame(data_list)
155
+
156
+ # Save to CSV
157
+ df.to_csv(output_csv, index=False)
158
+
159
+ print(f"\nβœ… Feature extraction complete!")
160
+ print(f" Saved to: {output_csv}")
161
+ print(f" Shape: {df.shape}")
162
+ print(f" Emotions: {', '.join(df['emotion'].unique())}")
163
+ print(f"\nEmotion distribution:")
164
+ print(df['emotion'].value_counts())
165
+ print("="*70)
166
+
167
+ return df
168
+
169
+
170
+ if __name__ == "__main__":
171
+ # Example usage
172
+ import sys
173
+ if len(sys.argv) > 1:
174
+ DATA_DIR = sys.argv[1]
175
+ else:
176
+ DATA_DIR = "data/RAVDESS/audio_speech_actors_01-24"
177
+
178
+ df = process_ravdess_dataset(DATA_DIR)
quick_train.py ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Quick Training Script
3
+ Train models and save weights for Hugging Face deployment
4
+ """
5
+
6
+ import pickle
7
+ import numpy as np
8
+ import pandas as pd
9
+ import json
10
+ import os
11
+ from sklearn.model_selection import train_test_split
12
+ from sklearn.preprocessing import LabelEncoder, StandardScaler
13
+ from sklearn.metrics import accuracy_score, classification_report
14
+ from xgboost import XGBClassifier
15
+ from lightgbm import LGBMClassifier
16
+ from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier
17
+
18
+ print("="*70)
19
+ print("QUICK TRAINING - Speech Emotion Recognition")
20
+ print("="*70)
21
+
22
+ # ============================================================================
23
+ # 1. LOAD DATA
24
+ # ============================================================================
25
+ print("\n1️⃣ Loading data...")
26
+
27
+ CSV_FILE = 'features_ravdess.csv'
28
+
29
+ if not os.path.exists(CSV_FILE):
30
+ print(f"❌ Error: {CSV_FILE} not found!")
31
+ print(" Please run preprocess_ravdess.py first to extract features")
32
+ exit(1)
33
+
34
+ df = pd.read_csv(CSV_FILE)
35
+
36
+ # Get features and labels
37
+ feature_cols = [col for col in df.columns if col.startswith('feature_')]
38
+ X = df[feature_cols].values
39
+ y = df['emotion'].values
40
+
41
+ print(f" βœ“ Data loaded: {X.shape}")
42
+ print(f" βœ“ Emotions: {np.unique(y)}")
43
+ print(f" βœ“ Distribution:\n{pd.Series(y).value_counts()}")
44
+
45
+ # ============================================================================
46
+ # 2. PREPROCESSING
47
+ # ============================================================================
48
+ print("\n2️⃣ Preprocessing...")
49
+
50
+ # Encode labels
51
+ label_encoder = LabelEncoder()
52
+ y_encoded = label_encoder.fit_transform(y)
53
+
54
+ print(f" βœ“ Encoded labels: {label_encoder.classes_}")
55
+
56
+ # Split data (80% train, 20% test)
57
+ X_train, X_test, y_train, y_test = train_test_split(
58
+ X, y_encoded,
59
+ test_size=0.2,
60
+ random_state=42,
61
+ stratify=y_encoded
62
+ )
63
+
64
+ print(f" βœ“ Train set: {X_train.shape}")
65
+ print(f" βœ“ Test set: {X_test.shape}")
66
+
67
+ # Scale features
68
+ scaler = StandardScaler()
69
+ X_train_scaled = scaler.fit_transform(X_train)
70
+ X_test_scaled = scaler.transform(X_test)
71
+
72
+ print(f" βœ“ Features scaled")
73
+
74
+ # ============================================================================
75
+ # 3. FEATURE SELECTION (Simple: Top 80 by variance)
76
+ # ============================================================================
77
+ print("\n3️⃣ Feature selection...")
78
+
79
+ feature_variance = np.var(X_train_scaled, axis=0)
80
+ top_indices = np.argsort(feature_variance)[-80:] # Top 80 features
81
+
82
+ X_train_selected = X_train_scaled[:, top_indices]
83
+ X_test_selected = X_test_scaled[:, top_indices]
84
+
85
+ print(f" βœ“ Selected {len(top_indices)} features (from 162)")
86
+ print(
87
+ f" βœ“ Variance range: {feature_variance[top_indices].min():.4f} - {feature_variance[top_indices].max():.4f}")
88
+
89
+ # ============================================================================
90
+ # 4. TRAIN MODELS
91
+ # ============================================================================
92
+ print("\n4️⃣ Training models...")
93
+
94
+ n_classes = len(label_encoder.classes_)
95
+ models = {}
96
+ accuracies = {}
97
+
98
+ # XGBoost
99
+ print("\n πŸ”Ή Training XGBoost...")
100
+ xgb_model = XGBClassifier(
101
+ n_estimators=150,
102
+ max_depth=5,
103
+ learning_rate=0.1,
104
+ subsample=0.8,
105
+ colsample_bytree=0.8,
106
+ gamma=1.0,
107
+ objective='multi:softprob',
108
+ num_class=n_classes,
109
+ random_state=42,
110
+ n_jobs=-1,
111
+ verbosity=0
112
+ )
113
+ xgb_model.fit(X_train_selected, y_train)
114
+ xgb_acc = xgb_model.score(X_test_selected, y_test)
115
+ models['xgboost'] = xgb_model
116
+ accuracies['xgboost'] = xgb_acc
117
+ print(f" βœ“ XGBoost accuracy: {xgb_acc:.4f}")
118
+
119
+ # LightGBM
120
+ print("\n πŸ”Ή Training LightGBM...")
121
+ lgbm_model = LGBMClassifier(
122
+ n_estimators=150,
123
+ num_leaves=40,
124
+ learning_rate=0.1,
125
+ subsample=0.8,
126
+ colsample_bytree=0.8,
127
+ min_child_samples=20,
128
+ objective='multiclass',
129
+ num_class=n_classes,
130
+ random_state=42,
131
+ n_jobs=-1,
132
+ verbose=-1
133
+ )
134
+ lgbm_model.fit(X_train_selected, y_train)
135
+ lgbm_acc = lgbm_model.score(X_test_selected, y_test)
136
+ models['lightgbm'] = lgbm_model
137
+ accuracies['lightgbm'] = lgbm_acc
138
+ print(f" βœ“ LightGBM accuracy: {lgbm_acc:.4f}")
139
+
140
+ # Gradient Boosting
141
+ print("\n πŸ”Ή Training Gradient Boosting...")
142
+ gb_model = GradientBoostingClassifier(
143
+ n_estimators=100,
144
+ max_depth=4,
145
+ learning_rate=0.1,
146
+ subsample=0.8,
147
+ min_samples_split=10,
148
+ random_state=42
149
+ )
150
+ gb_model.fit(X_train_selected, y_train)
151
+ gb_acc = gb_model.score(X_test_selected, y_test)
152
+ models['gradientboosting'] = gb_model
153
+ accuracies['gradientboosting'] = gb_acc
154
+ print(f" βœ“ Gradient Boosting accuracy: {gb_acc:.4f}")
155
+
156
+ # AdaBoost
157
+ print("\n πŸ”Ή Training AdaBoost...")
158
+ ada_model = AdaBoostClassifier(
159
+ n_estimators=100,
160
+ learning_rate=1.0,
161
+ algorithm='SAMME.R',
162
+ random_state=42
163
+ )
164
+ ada_model.fit(X_train_selected, y_train)
165
+ ada_acc = ada_model.score(X_test_selected, y_test)
166
+ models['adaboost'] = ada_model
167
+ accuracies['adaboost'] = ada_acc
168
+ print(f" βœ“ AdaBoost accuracy: {ada_acc:.4f}")
169
+
170
+ # ============================================================================
171
+ # 5. ENSEMBLE
172
+ # ============================================================================
173
+ print("\n5️⃣ Creating ensemble...")
174
+
175
+ # Get predictions
176
+ predictions = {}
177
+ for name, model in models.items():
178
+ predictions[name] = model.predict_proba(X_test_selected)
179
+
180
+ # Calculate weights (proportional to accuracy)
181
+ weights = np.array([accuracies[name] for name in [
182
+ 'xgboost', 'lightgbm', 'gradientboosting', 'adaboost']])
183
+ weights = weights / weights.sum()
184
+
185
+ print(f" βœ“ Ensemble weights: {weights}")
186
+
187
+ # Weighted ensemble prediction
188
+ ensemble_pred = (
189
+ weights[0] * predictions['xgboost'] +
190
+ weights[1] * predictions['lightgbm'] +
191
+ weights[2] * predictions['gradientboosting'] +
192
+ weights[3] * predictions['adaboost']
193
+ )
194
+
195
+ ensemble_labels = np.argmax(ensemble_pred, axis=1)
196
+ ensemble_acc = accuracy_score(y_test, ensemble_labels)
197
+
198
+ print(f" βœ“ Ensemble accuracy: {ensemble_acc:.4f}")
199
+
200
+ # ============================================================================
201
+ # 6. SAVE WEIGHTS
202
+ # ============================================================================
203
+ print("\n6️⃣ Saving weights...")
204
+
205
+ os.makedirs('weights', exist_ok=True)
206
+
207
+ # Save individual models
208
+ with open('weights/xgboost_model.pkl', 'wb') as f:
209
+ pickle.dump(xgb_model, f)
210
+ print(" βœ“ xgboost_model.pkl")
211
+
212
+ with open('weights/lightgbm_model.pkl', 'wb') as f:
213
+ pickle.dump(lgbm_model, f)
214
+ print(" βœ“ lightgbm_model.pkl")
215
+
216
+ with open('weights/gradientboost_model.pkl', 'wb') as f:
217
+ pickle.dump(gb_model, f)
218
+ print(" βœ“ gradientboost_model.pkl")
219
+
220
+ with open('weights/adaboost_model.pkl', 'wb') as f:
221
+ pickle.dump(ada_model, f)
222
+ print(" βœ“ adaboost_model.pkl")
223
+
224
+ # Save preprocessing objects
225
+ with open('weights/scaler.pkl', 'wb') as f:
226
+ pickle.dump(scaler, f)
227
+ print(" βœ“ scaler.pkl")
228
+
229
+ with open('weights/label_encoder.pkl', 'wb') as f:
230
+ pickle.dump(label_encoder, f)
231
+ print(" βœ“ label_encoder.pkl")
232
+
233
+ # Save configuration
234
+ config = {
235
+ 'selected_features': top_indices.tolist(),
236
+ 'ensemble_weights': weights.tolist(),
237
+ 'n_features': len(top_indices),
238
+ 'emotions': label_encoder.classes_.tolist(),
239
+ 'model_accuracies': {
240
+ 'xgboost': float(xgb_acc),
241
+ 'lightgbm': float(lgbm_acc),
242
+ 'gradientboosting': float(gb_acc),
243
+ 'adaboost': float(ada_acc),
244
+ 'ensemble': float(ensemble_acc)
245
+ }
246
+ }
247
+
248
+ with open('weights/config.json', 'w') as f:
249
+ json.dump(config, f, indent=2)
250
+ print(" βœ“ config.json")
251
+
252
+ # ============================================================================
253
+ # 7. VERIFY
254
+ # ============================================================================
255
+ print("\n7️⃣ Verifying saved models...")
256
+
257
+ # Test loading
258
+ with open('weights/xgboost_model.pkl', 'rb') as f:
259
+ loaded_model = pickle.load(f)
260
+
261
+ test_acc = loaded_model.score(X_test_selected, y_test)
262
+ print(f" βœ“ Loaded model works (accuracy: {test_acc:.4f})")
263
+
264
+ # ============================================================================
265
+ # 8. SUMMARY
266
+ # ============================================================================
267
+ print("\n" + "="*70)
268
+ print("βœ… TRAINING COMPLETE!")
269
+ print("="*70)
270
+
271
+ print("\nπŸ“Š Final Results:")
272
+ print(f" XGBoost: {xgb_acc:.4f}")
273
+ print(f" LightGBM: {lgbm_acc:.4f}")
274
+ print(f" GradientBoosting: {gb_acc:.4f}")
275
+ print(f" AdaBoost: {ada_acc:.4f}")
276
+ print(f" Ensemble: {ensemble_acc:.4f} ⭐")
277
+
278
+ print(f"\nπŸ’Ύ Saved files:")
279
+ print(f" weights/xgboost_model.pkl")
280
+ print(f" weights/lightgbm_model.pkl")
281
+ print(f" weights/gradientboost_model.pkl")
282
+ print(f" weights/adaboost_model.pkl")
283
+ print(f" weights/scaler.pkl")
284
+ print(f" weights/label_encoder.pkl")
285
+ print(f" weights/config.json")
286
+
287
+ print(f"\nπŸš€ Next steps:")
288
+ print(f" 1. Test locally: python app.py")
289
+ print(f" 2. Push to Hugging Face: git add . && git commit -m 'Add models' && git push")
290
+
291
+ print("="*70)
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ numpy==1.24.3
2
+ pandas==2.0.3
3
+ scikit-learn==1.3.0
4
+ xgboost==2.0.3
5
+ lightgbm==4.1.0
6
+ librosa==0.10.1
7
+ soundfile==0.12.1
8
+ gradio==4.44.0
9
+ matplotlib==3.7.2
src/__init__.py ADDED
File without changes
src/ensemble_model.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Ensemble Model for Speech Emotion Recognition
3
+ Loads pre-trained models and makes predictions
4
+ """
5
+
6
+ import pickle
7
+ import numpy as np
8
+ import json
9
+ from pathlib import Path
10
+
11
+ class EnsembleEmotionRecognizer:
12
+ """
13
+ Ensemble model combining XGBoost, LightGBM, Gradient Boosting, and AdaBoost
14
+ Pre-trained weights are loaded from the weights directory
15
+ """
16
+
17
+ def __init__(self, weights_dir='weights'):
18
+ """
19
+ Initialize ensemble model
20
+
21
+ Args:
22
+ weights_dir (str): Directory containing model weights
23
+ """
24
+ self.weights_dir = Path(weights_dir)
25
+ self.models = {}
26
+ self.scaler = None
27
+ self.label_encoder = None
28
+ self.config = None
29
+ self.selected_features = None
30
+ self.ensemble_weights = None
31
+
32
+ self._load_weights()
33
+
34
+ def _load_weights(self):
35
+ """Load all pre-trained model weights and configurations"""
36
+
37
+ print("Loading pre-trained models...")
38
+
39
+ try:
40
+ # Load configuration
41
+ config_path = self.weights_dir / 'config.json'
42
+ with open(config_path, 'r') as f:
43
+ self.config = json.load(f)
44
+ print(" βœ“ Configuration loaded")
45
+
46
+ # Load scaler
47
+ with open(self.weights_dir / 'scaler.pkl', 'rb') as f:
48
+ self.scaler = pickle.load(f)
49
+ print(" βœ“ Scaler loaded")
50
+
51
+ # Load label encoder
52
+ with open(self.weights_dir / 'label_encoder.pkl', 'rb') as f:
53
+ self.label_encoder = pickle.load(f)
54
+ print(" βœ“ Label encoder loaded")
55
+
56
+ # Load models
57
+ model_files = {
58
+ 'xgboost': 'xgboost_model.pkl',
59
+ 'lightgbm': 'lightgbm_model.pkl',
60
+ 'gradientboosting': 'gradientboost_model.pkl',
61
+ 'adaboost': 'adaboost_model.pkl'
62
+ }
63
+
64
+ for name, filename in model_files.items():
65
+ with open(self.weights_dir / filename, 'rb') as f:
66
+ self.models[name] = pickle.load(f)
67
+ print(f" βœ“ {name.capitalize()} loaded")
68
+
69
+ # Load ensemble configuration
70
+ self.selected_features = self.config['selected_features']
71
+ self.ensemble_weights = np.array(self.config['ensemble_weights'])
72
+
73
+ print(f"\nβœ… All models loaded successfully!")
74
+ print(f" - Number of models: {len(self.models)}")
75
+ print(f" - Selected features: {len(self.selected_features)}/162")
76
+ print(f" - Ensemble weights: {self.ensemble_weights}")
77
+ print(f" - Emotions: {', '.join(self.label_encoder.classes_)}")
78
+
79
+ except FileNotFoundError as e:
80
+ raise Exception(f"Model files not found in '{self.weights_dir}': {e}")
81
+ except Exception as e:
82
+ raise Exception(f"Error loading models: {e}")
83
+
84
+ def predict(self, features):
85
+ """
86
+ Predict emotion from features
87
+
88
+ Args:
89
+ features (np.array): Feature vector of shape (162,) or (n_samples, 162)
90
+
91
+ Returns:
92
+ np.array: Predicted emotion labels
93
+ """
94
+ # Ensure 2D array
95
+ if features.ndim == 1:
96
+ features = features.reshape(1, -1)
97
+
98
+ # Preprocess
99
+ features_scaled = self.scaler.transform(features)
100
+ features_selected = features_scaled[:, self.selected_features]
101
+
102
+ # Get predictions from all models
103
+ predictions = []
104
+ for model in self.models.values():
105
+ pred_proba = model.predict_proba(features_selected)
106
+ predictions.append(pred_proba)
107
+
108
+ # Weighted ensemble
109
+ ensemble_proba = np.average(predictions, axis=0, weights=self.ensemble_weights)
110
+
111
+ # Get predicted labels
112
+ predicted_labels = np.argmax(ensemble_proba, axis=1)
113
+
114
+ return predicted_labels
115
+
116
+ def predict_proba(self, features):
117
+ """
118
+ Predict emotion probabilities
119
+
120
+ Args:
121
+ features (np.array): Feature vector of shape (162,) or (n_samples, 162)
122
+
123
+ Returns:
124
+ np.array: Probability distribution over emotions, shape (n_samples, n_emotions)
125
+ """
126
+ # Ensure 2D array
127
+ if features.ndim == 1:
128
+ features = features.reshape(1, -1)
129
+
130
+ # Preprocess
131
+ features_scaled = self.scaler.transform(features)
132
+ features_selected = features_scaled[:, self.selected_features]
133
+
134
+ # Get predictions from all models
135
+ predictions = []
136
+ for model in self.models.values():
137
+ pred_proba = model.predict_proba(features_selected)
138
+ predictions.append(pred_proba)
139
+
140
+ # Weighted ensemble
141
+ ensemble_proba = np.average(predictions, axis=0, weights=self.ensemble_weights)
142
+
143
+ return ensemble_proba
144
+
145
+ def predict_with_confidence(self, features):
146
+ """
147
+ Predict emotion with confidence score
148
+
149
+ Args:
150
+ features (np.array): Feature vector of shape (162,)
151
+
152
+ Returns:
153
+ tuple: (emotion_name, confidence, probabilities_dict)
154
+ """
155
+ # Get probabilities
156
+ proba = self.predict_proba(features)[0]
157
+
158
+ # Get prediction
159
+ predicted_idx = np.argmax(proba)
160
+ emotion_name = self.label_encoder.classes_[predicted_idx]
161
+ confidence = proba[predicted_idx]
162
+
163
+ # Create probability dictionary
164
+ prob_dict = {}
165
+ for i, emotion in enumerate(self.label_encoder.classes_):
166
+ prob_dict[emotion] = float(proba[i])
167
+
168
+ return emotion_name, confidence, prob_dict
169
+
170
+ def decode_emotion(self, label):
171
+ """
172
+ Convert numeric label to emotion name
173
+
174
+ Args:
175
+ label (int): Numeric emotion label
176
+
177
+ Returns:
178
+ str: Emotion name
179
+ """
180
+ return self.label_encoder.inverse_transform([label])[0]
181
+
182
+ def get_emotion_names(self):
183
+ """
184
+ Get list of all emotion names
185
+
186
+ Returns:
187
+ list: List of emotion names
188
+ """
189
+ return self.label_encoder.classes_.tolist()
190
+
191
+ def get_model_info(self):
192
+ """
193
+ Get information about the ensemble model
194
+
195
+ Returns:
196
+ dict: Model information
197
+ """
198
+ return {
199
+ 'n_models': len(self.models),
200
+ 'models': list(self.models.keys()),
201
+ 'n_features_selected': len(self.selected_features),
202
+ 'n_features_total': 162,
203
+ 'ensemble_weights': self.ensemble_weights.tolist(),
204
+ 'emotions': self.get_emotion_names(),
205
+ 'accuracies': self.config.get('model_accuracies', {})
206
+ }
src/feature_extraction.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Audio Feature Extraction Module
3
+ Extracts 162 features from audio files for emotion recognition
4
+ """
5
+
6
+ import numpy as np
7
+ import librosa
8
+ import warnings
9
+ warnings.filterwarnings('ignore')
10
+
11
+ def extract_features(audio_path, duration=2.5, offset=0.6):
12
+ """
13
+ Extract 162 audio features from an audio file
14
+
15
+ Features:
16
+ - 1 Zero Crossing Rate
17
+ - 12 Chroma STFT
18
+ - 20 MFCC
19
+ - 1 RMS Energy
20
+ - 128 Mel Spectrogram
21
+
22
+ Args:
23
+ audio_path (str): Path to audio file
24
+ duration (float): Duration to load (seconds)
25
+ offset (float): Start reading after this time (seconds)
26
+
27
+ Returns:
28
+ features (np.array): Feature vector of shape (162,)
29
+ y (np.array): Audio time series
30
+ sr (int): Sample rate
31
+ """
32
+ try:
33
+ # Load audio file
34
+ y, sr = librosa.load(audio_path, duration=duration, offset=offset)
35
+
36
+ # Initialize feature array
37
+ features = np.array([])
38
+
39
+ # 1. Zero Crossing Rate (1 feature)
40
+ zcr = np.mean(librosa.feature.zero_crossing_rate(y=y).T, axis=0)
41
+ features = np.hstack((features, zcr))
42
+
43
+ # 2. Chroma STFT (12 features)
44
+ stft = np.abs(librosa.stft(y))
45
+ chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sr).T, axis=0)
46
+ features = np.hstack((features, chroma))
47
+
48
+ # 3. MFCC (20 features)
49
+ mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20).T, axis=0)
50
+ features = np.hstack((features, mfcc))
51
+
52
+ # 4. RMS Energy (1 feature)
53
+ rms = np.mean(librosa.feature.rms(y=y).T, axis=0)
54
+ features = np.hstack((features, rms))
55
+
56
+ # 5. Mel Spectrogram (128 features)
57
+ mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr).T, axis=0)
58
+ features = np.hstack((features, mel))
59
+
60
+ return features, y, sr
61
+
62
+ except Exception as e:
63
+ raise Exception(f"Error extracting features from {audio_path}: {str(e)}")
64
+
65
+
66
+ def get_feature_names():
67
+ """
68
+ Get names of all 162 features
69
+
70
+ Returns:
71
+ list: List of feature names
72
+ """
73
+ names = ['zcr']
74
+ names.extend([f'chroma_{i}' for i in range(12)])
75
+ names.extend([f'mfcc_{i}' for i in range(20)])
76
+ names.append('rms')
77
+ names.extend([f'mel_{i}' for i in range(128)])
78
+ return names
src/utils.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Utility functions for visualization and analysis
3
+ """
4
+
5
+ import numpy as np
6
+ import matplotlib.pyplot as plt
7
+ import librosa
8
+ import librosa.display
9
+
10
+ def create_waveform_plot(y, sr, title="Audio Waveform"):
11
+ """
12
+ Create waveform visualization
13
+
14
+ Args:
15
+ y (np.array): Audio time series
16
+ sr (int): Sample rate
17
+ title (str): Plot title
18
+
19
+ Returns:
20
+ matplotlib.figure.Figure: Waveform plot
21
+ """
22
+ fig, ax = plt.subplots(figsize=(10, 3))
23
+ librosa.display.waveshow(y, sr=sr, ax=ax, color='#2E86DE')
24
+ ax.set_title(title, fontsize=14, fontweight='bold')
25
+ ax.set_xlabel('Time (seconds)', fontsize=11)
26
+ ax.set_ylabel('Amplitude', fontsize=11)
27
+ ax.grid(True, alpha=0.3)
28
+ plt.tight_layout()
29
+ return fig
30
+
31
+ def create_spectrogram_plot(y, sr, title="Spectrogram"):
32
+ """
33
+ Create spectrogram visualization
34
+
35
+ Args:
36
+ y (np.array): Audio time series
37
+ sr (int): Sample rate
38
+ title (str): Plot title
39
+
40
+ Returns:
41
+ matplotlib.figure.Figure: Spectrogram plot
42
+ """
43
+ fig, ax = plt.subplots(figsize=(10, 4))
44
+ D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
45
+ img = librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='hz',
46
+ ax=ax, cmap='viridis')
47
+ ax.set_title(title, fontsize=14, fontweight='bold')
48
+ ax.set_xlabel('Time (seconds)', fontsize=11)
49
+ ax.set_ylabel('Frequency (Hz)', fontsize=11)
50
+ fig.colorbar(img, ax=ax, format='%+2.0f dB')
51
+ plt.tight_layout()
52
+ return fig
53
+
54
+ def create_mel_spectrogram_plot(y, sr, title="Mel Spectrogram"):
55
+ """
56
+ Create mel spectrogram visualization
57
+
58
+ Args:
59
+ y (np.array): Audio time series
60
+ sr (int): Sample rate
61
+ title (str): Plot title
62
+
63
+ Returns:
64
+ matplotlib.figure.Figure: Mel spectrogram plot
65
+ """
66
+ fig, ax = plt.subplots(figsize=(10, 4))
67
+ S = librosa.feature.melspectrogram(y=y, sr=sr)
68
+ S_dB = librosa.power_to_db(S, ref=np.max)
69
+ img = librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel',
70
+ ax=ax, cmap='magma')
71
+ ax.set_title(title, fontsize=14, fontweight='bold')
72
+ ax.set_xlabel('Time (seconds)', fontsize=11)
73
+ ax.set_ylabel('Mel Frequency', fontsize=11)
74
+ fig.colorbar(img, ax=ax, format='%+2.0f dB')
75
+ plt.tight_layout()
76
+ return fig
77
+
78
+ def format_probability_text(prob_dict, top_k=None):
79
+ """
80
+ Format probability dictionary as text with progress bars
81
+
82
+ Args:
83
+ prob_dict (dict): Dictionary of e
test_local.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Test application locally before deploying
3
+ """
4
+
5
+ import os
6
+ import sys
7
+
8
+ print("="*70)
9
+ print("LOCAL TEST - Speech Emotion Recognition")
10
+ print("="*70)
11
+
12
+ # ============================================================================
13
+ # 1. CHECK FILES
14
+ # ============================================================================
15
+ print("\n1️⃣ Checking required files...")
16
+
17
+ required_files = [
18
+ 'app.py',
19
+ 'requirements.txt',
20
+ 'README.md',
21
+ 'src/__init__.py',
22
+ 'src/feature_extraction.py',
23
+ 'src/ensemble_model.py',
24
+ 'src/utils.py',
25
+ 'weights/xgboost_model.pkl',
26
+ 'weights/lightgbm_model.pkl',
27
+ 'weights/gradientboost_model.pkl',
28
+ 'weights/adaboost_model.pkl',
29
+ 'weights/scaler.pkl',
30
+ 'weights/label_encoder.pkl',
31
+ 'weights/config.json'
32
+ ]
33
+
34
+ missing_files = []
35
+ for file in required_files:
36
+ if os.path.exists(file):
37
+ print(f" βœ“ {file}")
38
+ else:
39
+ print(f" βœ— {file} - MISSING")
40
+ missing_files.append(file)
41
+
42
+ if missing_files:
43
+ print(f"\n❌ Missing {len(missing_files)} files. Please create them first.")
44
+ sys.exit(1)
45
+
46
+ # ============================================================================
47
+ # 2. TEST IMPORTS
48
+ # ============================================================================
49
+ print("\n2️⃣ Testing imports...")
50
+
51
+ try:
52
+ import numpy
53
+ print(" βœ“ numpy")
54
+ except:
55
+ print(" βœ— numpy - Install: pip install numpy")
56
+
57
+ try:
58
+ import pandas
59
+ print(" βœ“ pandas")
60
+ except:
61
+ print(" βœ— pandas - Install: pip install pandas")
62
+
63
+ try:
64
+ import sklearn
65
+ print(" βœ“ scikit-learn")
66
+ except:
67
+ print(" βœ— scikit-learn - Install: pip install scikit-learn")
68
+
69
+ try:
70
+ import xgboost
71
+ print(" βœ“ xgboost")
72
+ except:
73
+ print(" βœ— xgboost - Install: pip install xgboost")
74
+
75
+ try:
76
+ import lightgbm
77
+ print(" βœ“ lightgbm")
78
+ except:
79
+ print(" βœ— lightgbm - Install: pip install lightgbm")
80
+
81
+ try:
82
+ import librosa
83
+ print(" βœ“ librosa")
84
+ except:
85
+ print(" βœ— librosa - Install: pip install librosa")
86
+
87
+ try:
88
+ import gradio
89
+ print(" βœ“ gradio")
90
+ except:
91
+ print(" βœ— gradio - Install: pip install gradio")
92
+
93
+ # ============================================================================
94
+ # 3. TEST MODEL LOADING
95
+ # ============================================================================
96
+ print("\n3️⃣ Testing model loading...")
97
+
98
+ try:
99
+ from src.ensemble_model import EnsembleEmotionRecognizer
100
+
101
+ model = EnsembleEmotionRecognizer(weights_dir='weights')
102
+ print(" βœ“ Model loaded successfully")
103
+
104
+ # Get model info
105
+ info = model.get_model_info()
106
+ print(f" βœ“ Models: {', '.join(info['models'])}")
107
+ print(f" βœ“ Features: {info['n_features_selected']}/{info['n_features_total']}")
108
+ print(f" βœ“ Emotions: {', '.join(info['emotions'])}")
109
+
110
+ except Exception as e:
111
+ print(f" βœ— Error loading model: {e}")
112
+ sys.exit(1)
113
+
114
+ # ============================================================================
115
+ # 4. TEST FEATURE EXTRACTION
116
+ # ============================================================================
117
+ print("\n4️⃣ Testing feature extraction...")
118
+
119
+ try:
120
+ from src.feature_extraction import extract_features
121
+ import numpy as np
122
+
123
+ # Create dummy audio
124
+ import librosa
125
+ y = np.random.randn(22050 * 3) # 3 seconds of random audio
126
+
127
+ # Save to temp file
128
+ import soundfile as sf
129
+ sf.write('temp_test.wav', y, 22050)
130
+
131
+ # Extract features
132
+ features, _, _ = extract_features('temp_test.wav')
133
+ print(f" βœ“ Features extracted: shape {features.shape}")
134
+
135
+ # Test prediction
136
+ prediction = model.predict(features)
137
+ print(f" βœ“ Prediction works: {model.decode_emotion(prediction[0])}")
138
+
139
+ # Cleanup
140
+ os.remove('temp_test.wav')
141
+
142
+ except Exception as e:
143
+ print(f" βœ— Error in feature extraction: {e}")
144
+ sys.exit(1)
145
+
146
+ # ============================================================================
147
+ # 5. FILE SIZES
148
+ # ============================================================================
149
+ print("\n5️⃣ Checking file sizes...")
150
+
151
+ total_size = 0
152
+ for file in required_files:
153
+ if os.path.exists(file):
154
+ size = os.path.getsize(file) / 1024 / 1024 # MB
155
+ total_size += size
156
+ if size > 10: