|
|
"""
|
|
|
Quick Training Script
|
|
|
Train models and save weights for Hugging Face deployment
|
|
|
"""
|
|
|
|
|
|
import pickle
|
|
|
import numpy as np
|
|
|
import pandas as pd
|
|
|
import json
|
|
|
import os
|
|
|
from sklearn.model_selection import train_test_split
|
|
|
from sklearn.preprocessing import LabelEncoder, StandardScaler
|
|
|
from sklearn.metrics import accuracy_score, classification_report
|
|
|
from xgboost import XGBClassifier
|
|
|
from lightgbm import LGBMClassifier
|
|
|
from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier
|
|
|
|
|
|
print("="*70)
|
|
|
print("QUICK TRAINING - Speech Emotion Recognition")
|
|
|
print("="*70)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("\n1οΈβ£ Loading data...")
|
|
|
|
|
|
CSV_FILE = 'features_ravdess.csv'
|
|
|
|
|
|
if not os.path.exists(CSV_FILE):
|
|
|
print(f"β Error: {CSV_FILE} not found!")
|
|
|
print(" Please run preprocess_ravdess.py first to extract features")
|
|
|
exit(1)
|
|
|
|
|
|
df = pd.read_csv(CSV_FILE)
|
|
|
|
|
|
|
|
|
feature_cols = [col for col in df.columns if col.startswith('feature_')]
|
|
|
X = df[feature_cols].values
|
|
|
y = df['emotion'].values
|
|
|
|
|
|
print(f" β Data loaded: {X.shape}")
|
|
|
print(f" β Emotions: {np.unique(y)}")
|
|
|
print(f" β Distribution:\n{pd.Series(y).value_counts()}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("\n2οΈβ£ Preprocessing...")
|
|
|
|
|
|
|
|
|
label_encoder = LabelEncoder()
|
|
|
y_encoded = label_encoder.fit_transform(y)
|
|
|
|
|
|
print(f" β Encoded labels: {label_encoder.classes_}")
|
|
|
|
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(
|
|
|
X, y_encoded,
|
|
|
test_size=0.2,
|
|
|
random_state=42,
|
|
|
stratify=y_encoded
|
|
|
)
|
|
|
|
|
|
print(f" β Train set: {X_train.shape}")
|
|
|
print(f" β Test set: {X_test.shape}")
|
|
|
|
|
|
|
|
|
scaler = StandardScaler()
|
|
|
X_train_scaled = scaler.fit_transform(X_train)
|
|
|
X_test_scaled = scaler.transform(X_test)
|
|
|
|
|
|
print(f" β Features scaled")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("\n3οΈβ£ Feature selection...")
|
|
|
|
|
|
feature_variance = np.var(X_train_scaled, axis=0)
|
|
|
top_indices = np.argsort(feature_variance)[-80:]
|
|
|
|
|
|
X_train_selected = X_train_scaled[:, top_indices]
|
|
|
X_test_selected = X_test_scaled[:, top_indices]
|
|
|
|
|
|
print(f" β Selected {len(top_indices)} features (from 162)")
|
|
|
print(
|
|
|
f" β Variance range: {feature_variance[top_indices].min():.4f} - {feature_variance[top_indices].max():.4f}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("\n4οΈβ£ Training models...")
|
|
|
|
|
|
n_classes = len(label_encoder.classes_)
|
|
|
models = {}
|
|
|
accuracies = {}
|
|
|
|
|
|
|
|
|
print("\n πΉ Training XGBoost...")
|
|
|
xgb_model = XGBClassifier(
|
|
|
n_estimators=150,
|
|
|
max_depth=5,
|
|
|
learning_rate=0.1,
|
|
|
subsample=0.8,
|
|
|
colsample_bytree=0.8,
|
|
|
gamma=1.0,
|
|
|
objective='multi:softprob',
|
|
|
num_class=n_classes,
|
|
|
random_state=42,
|
|
|
n_jobs=-1,
|
|
|
verbosity=0
|
|
|
)
|
|
|
xgb_model.fit(X_train_selected, y_train)
|
|
|
xgb_acc = xgb_model.score(X_test_selected, y_test)
|
|
|
models['xgboost'] = xgb_model
|
|
|
accuracies['xgboost'] = xgb_acc
|
|
|
print(f" β XGBoost accuracy: {xgb_acc:.4f}")
|
|
|
|
|
|
|
|
|
print("\n πΉ Training LightGBM...")
|
|
|
lgbm_model = LGBMClassifier(
|
|
|
n_estimators=150,
|
|
|
num_leaves=40,
|
|
|
learning_rate=0.1,
|
|
|
subsample=0.8,
|
|
|
colsample_bytree=0.8,
|
|
|
min_child_samples=20,
|
|
|
objective='multiclass',
|
|
|
num_class=n_classes,
|
|
|
random_state=42,
|
|
|
n_jobs=-1,
|
|
|
verbose=-1
|
|
|
)
|
|
|
lgbm_model.fit(X_train_selected, y_train)
|
|
|
lgbm_acc = lgbm_model.score(X_test_selected, y_test)
|
|
|
models['lightgbm'] = lgbm_model
|
|
|
accuracies['lightgbm'] = lgbm_acc
|
|
|
print(f" β LightGBM accuracy: {lgbm_acc:.4f}")
|
|
|
|
|
|
|
|
|
print("\n πΉ Training Gradient Boosting...")
|
|
|
gb_model = GradientBoostingClassifier(
|
|
|
n_estimators=100,
|
|
|
max_depth=4,
|
|
|
learning_rate=0.1,
|
|
|
subsample=0.8,
|
|
|
min_samples_split=10,
|
|
|
random_state=42
|
|
|
)
|
|
|
gb_model.fit(X_train_selected, y_train)
|
|
|
gb_acc = gb_model.score(X_test_selected, y_test)
|
|
|
models['gradientboosting'] = gb_model
|
|
|
accuracies['gradientboosting'] = gb_acc
|
|
|
print(f" β Gradient Boosting accuracy: {gb_acc:.4f}")
|
|
|
|
|
|
|
|
|
print("\n πΉ Training AdaBoost...")
|
|
|
ada_model = AdaBoostClassifier(
|
|
|
n_estimators=100,
|
|
|
learning_rate=1.0,
|
|
|
algorithm='SAMME.R',
|
|
|
random_state=42
|
|
|
)
|
|
|
ada_model.fit(X_train_selected, y_train)
|
|
|
ada_acc = ada_model.score(X_test_selected, y_test)
|
|
|
models['adaboost'] = ada_model
|
|
|
accuracies['adaboost'] = ada_acc
|
|
|
print(f" β AdaBoost accuracy: {ada_acc:.4f}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("\n5οΈβ£ Creating ensemble...")
|
|
|
|
|
|
|
|
|
predictions = {}
|
|
|
for name, model in models.items():
|
|
|
predictions[name] = model.predict_proba(X_test_selected)
|
|
|
|
|
|
|
|
|
weights = np.array([accuracies[name] for name in [
|
|
|
'xgboost', 'lightgbm', 'gradientboosting', 'adaboost']])
|
|
|
weights = weights / weights.sum()
|
|
|
|
|
|
print(f" β Ensemble weights: {weights}")
|
|
|
|
|
|
|
|
|
ensemble_pred = (
|
|
|
weights[0] * predictions['xgboost'] +
|
|
|
weights[1] * predictions['lightgbm'] +
|
|
|
weights[2] * predictions['gradientboosting'] +
|
|
|
weights[3] * predictions['adaboost']
|
|
|
)
|
|
|
|
|
|
ensemble_labels = np.argmax(ensemble_pred, axis=1)
|
|
|
ensemble_acc = accuracy_score(y_test, ensemble_labels)
|
|
|
|
|
|
print(f" β Ensemble accuracy: {ensemble_acc:.4f}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("\n6οΈβ£ Saving weights...")
|
|
|
|
|
|
os.makedirs('weights', exist_ok=True)
|
|
|
|
|
|
|
|
|
with open('weights/xgboost_model.pkl', 'wb') as f:
|
|
|
pickle.dump(xgb_model, f)
|
|
|
print(" β xgboost_model.pkl")
|
|
|
|
|
|
with open('weights/lightgbm_model.pkl', 'wb') as f:
|
|
|
pickle.dump(lgbm_model, f)
|
|
|
print(" β lightgbm_model.pkl")
|
|
|
|
|
|
with open('weights/gradientboost_model.pkl', 'wb') as f:
|
|
|
pickle.dump(gb_model, f)
|
|
|
print(" β gradientboost_model.pkl")
|
|
|
|
|
|
with open('weights/adaboost_model.pkl', 'wb') as f:
|
|
|
pickle.dump(ada_model, f)
|
|
|
print(" β adaboost_model.pkl")
|
|
|
|
|
|
|
|
|
with open('weights/scaler.pkl', 'wb') as f:
|
|
|
pickle.dump(scaler, f)
|
|
|
print(" β scaler.pkl")
|
|
|
|
|
|
with open('weights/label_encoder.pkl', 'wb') as f:
|
|
|
pickle.dump(label_encoder, f)
|
|
|
print(" β label_encoder.pkl")
|
|
|
|
|
|
|
|
|
config = {
|
|
|
'selected_features': top_indices.tolist(),
|
|
|
'ensemble_weights': weights.tolist(),
|
|
|
'n_features': len(top_indices),
|
|
|
'emotions': label_encoder.classes_.tolist(),
|
|
|
'model_accuracies': {
|
|
|
'xgboost': float(xgb_acc),
|
|
|
'lightgbm': float(lgbm_acc),
|
|
|
'gradientboosting': float(gb_acc),
|
|
|
'adaboost': float(ada_acc),
|
|
|
'ensemble': float(ensemble_acc)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
with open('weights/config.json', 'w') as f:
|
|
|
json.dump(config, f, indent=2)
|
|
|
print(" β config.json")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("\n7οΈβ£ Verifying saved models...")
|
|
|
|
|
|
|
|
|
with open('weights/xgboost_model.pkl', 'rb') as f:
|
|
|
loaded_model = pickle.load(f)
|
|
|
|
|
|
test_acc = loaded_model.score(X_test_selected, y_test)
|
|
|
print(f" β Loaded model works (accuracy: {test_acc:.4f})")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("\n" + "="*70)
|
|
|
print("β
TRAINING COMPLETE!")
|
|
|
print("="*70)
|
|
|
|
|
|
print("\nπ Final Results:")
|
|
|
print(f" XGBoost: {xgb_acc:.4f}")
|
|
|
print(f" LightGBM: {lgbm_acc:.4f}")
|
|
|
print(f" GradientBoosting: {gb_acc:.4f}")
|
|
|
print(f" AdaBoost: {ada_acc:.4f}")
|
|
|
print(f" Ensemble: {ensemble_acc:.4f} β")
|
|
|
|
|
|
print(f"\nπΎ Saved files:")
|
|
|
print(f" weights/xgboost_model.pkl")
|
|
|
print(f" weights/lightgbm_model.pkl")
|
|
|
print(f" weights/gradientboost_model.pkl")
|
|
|
print(f" weights/adaboost_model.pkl")
|
|
|
print(f" weights/scaler.pkl")
|
|
|
print(f" weights/label_encoder.pkl")
|
|
|
print(f" weights/config.json")
|
|
|
|
|
|
print(f"\nπ Next steps:")
|
|
|
print(f" 1. Test locally: python app.py")
|
|
|
print(f" 2. Push to Hugging Face: git add . && git commit -m 'Add models' && git push")
|
|
|
|
|
|
print("="*70)
|
|
|
|