speech-emotion-recognition / src /feature_extraction.py
nguyennp86's picture
update project with selection
cafbe14
"""
Audio Feature Extraction Module
Extracts audio features with configurable feature types and MFCC count
"""
import numpy as np
import librosa
import warnings
import config
warnings.filterwarnings('ignore')
def extract_features(audio_path, duration=2.5, offset=0.6, feature_types=None, n_mfcc=None):
"""
Extract audio features based on selected feature types
Features:
- ZCR: Zero Crossing Rate (1)
- Chroma: Chroma STFT (12)
- MFCC: Mel-frequency cepstral coefficients (20-40, configurable)
- RMS: RMS Energy (1)
- Mel: Mel Spectrogram (128)
Args:
audio_path (str): Path to audio file
duration (float): Duration to load (seconds)
offset (float): Start reading after this time (seconds)
feature_types (list): List of feature types to extract
['zcr', 'chroma', 'mfcc', 'rms', 'mel']
If None, extract all features
n_mfcc (int): Number of MFCC coefficients (20-40)
If None, use default from config
Returns:
features (np.array): Feature vector
y (np.array): Audio time series
sr (int): Sample rate
feature_info (dict): Information about extracted features
"""
if feature_types is None:
feature_types = config.DEFAULT_FEATURE_TYPES
if n_mfcc is None:
n_mfcc = config.MFCC_DEFAULT
# Validate MFCC count
n_mfcc = max(config.MFCC_MIN, min(n_mfcc, config.MFCC_MAX))
try:
# Load audio file
y, sr = librosa.load(audio_path, duration=duration, offset=offset)
# Initialize feature array
features = np.array([])
feature_info = {
'types_used': feature_types,
'counts': {},
'total': 0,
'n_mfcc': n_mfcc if 'mfcc' in feature_types else 0
}
# 1. Zero Crossing Rate (1 feature)
if 'zcr' in feature_types:
zcr = np.mean(librosa.feature.zero_crossing_rate(y=y).T, axis=0)
features = np.hstack((features, zcr))
feature_info['counts']['zcr'] = 1
# 2. Chroma STFT (12 features)
if 'chroma' in feature_types:
stft = np.abs(librosa.stft(y))
chroma = np.mean(librosa.feature.chroma_stft(
S=stft, sr=sr).T, axis=0)
features = np.hstack((features, chroma))
feature_info['counts']['chroma'] = 12
# 3. MFCC (20-40 features, CONFIGURABLE)
if 'mfcc' in feature_types:
mfcc = np.mean(librosa.feature.mfcc(
y=y, sr=sr, n_mfcc=n_mfcc).T, axis=0)
features = np.hstack((features, mfcc))
feature_info['counts']['mfcc'] = n_mfcc
# 4. RMS Energy (1 feature)
if 'rms' in feature_types:
rms = np.mean(librosa.feature.rms(y=y).T, axis=0)
features = np.hstack((features, rms))
feature_info['counts']['rms'] = 1
# 5. Mel Spectrogram (128 features)
if 'mel' in feature_types:
mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr).T, axis=0)
features = np.hstack((features, mel))
feature_info['counts']['mel'] = 128
feature_info['total'] = len(features)
return features, y, sr, feature_info
except Exception as e:
raise Exception(
f"Error extracting features from {audio_path}: {str(e)}")
def get_feature_names(feature_types=None, n_mfcc=None):
"""
Get names of features based on selected types
Args:
feature_types (list): List of feature types
n_mfcc (int): Number of MFCC coefficients
Returns:
list: List of feature names
"""
if feature_types is None:
feature_types = config.DEFAULT_FEATURE_TYPES
if n_mfcc is None:
n_mfcc = config.MFCC_DEFAULT
names = []
if 'zcr' in feature_types:
names.append('zcr')
if 'chroma' in feature_types:
names.extend([f'chroma_{i}' for i in range(12)])
if 'mfcc' in feature_types:
names.extend([f'mfcc_{i}' for i in range(n_mfcc)])
if 'rms' in feature_types:
names.append('rms')
if 'mel' in feature_types:
names.extend([f'mel_{i}' for i in range(128)])
return names
def get_feature_count(feature_types=None, n_mfcc=None):
"""
Get total feature count for selected types
Args:
feature_types (list): List of feature types
n_mfcc (int): Number of MFCC coefficients
Returns:
int: Total number of features
"""
if feature_types is None:
feature_types = config.DEFAULT_FEATURE_TYPES
if n_mfcc is None:
n_mfcc = config.MFCC_DEFAULT
count = 0
if 'zcr' in feature_types:
count += 1
if 'chroma' in feature_types:
count += 12
if 'mfcc' in feature_types:
count += n_mfcc # VARIABLE
if 'rms' in feature_types:
count += 1
if 'mel' in feature_types:
count += 128
return count
def get_feature_indices(feature_types=None, n_mfcc=None, total_mfcc_in_dataset=None):
"""
Get feature indices for selected types (for existing datasets)
Args:
feature_types (list): List of feature types to keep
n_mfcc (int): Number of MFCC to keep
total_mfcc_in_dataset (int): Total MFCC in the dataset
Returns:
np.array: Indices of features to keep
"""
if feature_types is None:
feature_types = config.DEFAULT_FEATURE_TYPES
if n_mfcc is None:
n_mfcc = config.MFCC_DEFAULT
if total_mfcc_in_dataset is None:
total_mfcc_in_dataset = config.MFCC_DEFAULT
indices = []
current_idx = 0
# ZCR (1)
if 'zcr' in feature_types:
indices.extend(range(current_idx, current_idx + 1))
current_idx += 1
# Chroma (12)
if 'chroma' in feature_types:
indices.extend(range(current_idx, current_idx + 12))
current_idx += 12
# MFCC (variable)
if 'mfcc' in feature_types:
# Only take first n_mfcc coefficients
indices.extend(range(current_idx, current_idx +
min(n_mfcc, total_mfcc_in_dataset)))
current_idx += total_mfcc_in_dataset
# RMS (1)
if 'rms' in feature_types:
indices.extend(range(current_idx, current_idx + 1))
current_idx += 1
# Mel (128)
if 'mel' in feature_types:
indices.extend(range(current_idx, current_idx + 128))
current_idx += 128
return np.array(indices)