|
|
"""
|
|
|
Audio Feature Extraction Module
|
|
|
Extracts audio features with configurable feature types and MFCC count
|
|
|
"""
|
|
|
|
|
|
import numpy as np
|
|
|
import librosa
|
|
|
import warnings
|
|
|
import config
|
|
|
warnings.filterwarnings('ignore')
|
|
|
|
|
|
|
|
|
def extract_features(audio_path, duration=2.5, offset=0.6, feature_types=None, n_mfcc=None):
|
|
|
"""
|
|
|
Extract audio features based on selected feature types
|
|
|
|
|
|
Features:
|
|
|
- ZCR: Zero Crossing Rate (1)
|
|
|
- Chroma: Chroma STFT (12)
|
|
|
- MFCC: Mel-frequency cepstral coefficients (20-40, configurable)
|
|
|
- RMS: RMS Energy (1)
|
|
|
- Mel: Mel Spectrogram (128)
|
|
|
|
|
|
Args:
|
|
|
audio_path (str): Path to audio file
|
|
|
duration (float): Duration to load (seconds)
|
|
|
offset (float): Start reading after this time (seconds)
|
|
|
feature_types (list): List of feature types to extract
|
|
|
['zcr', 'chroma', 'mfcc', 'rms', 'mel']
|
|
|
If None, extract all features
|
|
|
n_mfcc (int): Number of MFCC coefficients (20-40)
|
|
|
If None, use default from config
|
|
|
|
|
|
Returns:
|
|
|
features (np.array): Feature vector
|
|
|
y (np.array): Audio time series
|
|
|
sr (int): Sample rate
|
|
|
feature_info (dict): Information about extracted features
|
|
|
"""
|
|
|
|
|
|
if feature_types is None:
|
|
|
feature_types = config.DEFAULT_FEATURE_TYPES
|
|
|
|
|
|
if n_mfcc is None:
|
|
|
n_mfcc = config.MFCC_DEFAULT
|
|
|
|
|
|
|
|
|
n_mfcc = max(config.MFCC_MIN, min(n_mfcc, config.MFCC_MAX))
|
|
|
|
|
|
try:
|
|
|
|
|
|
y, sr = librosa.load(audio_path, duration=duration, offset=offset)
|
|
|
|
|
|
|
|
|
features = np.array([])
|
|
|
feature_info = {
|
|
|
'types_used': feature_types,
|
|
|
'counts': {},
|
|
|
'total': 0,
|
|
|
'n_mfcc': n_mfcc if 'mfcc' in feature_types else 0
|
|
|
}
|
|
|
|
|
|
|
|
|
if 'zcr' in feature_types:
|
|
|
zcr = np.mean(librosa.feature.zero_crossing_rate(y=y).T, axis=0)
|
|
|
features = np.hstack((features, zcr))
|
|
|
feature_info['counts']['zcr'] = 1
|
|
|
|
|
|
|
|
|
if 'chroma' in feature_types:
|
|
|
stft = np.abs(librosa.stft(y))
|
|
|
chroma = np.mean(librosa.feature.chroma_stft(
|
|
|
S=stft, sr=sr).T, axis=0)
|
|
|
features = np.hstack((features, chroma))
|
|
|
feature_info['counts']['chroma'] = 12
|
|
|
|
|
|
|
|
|
if 'mfcc' in feature_types:
|
|
|
mfcc = np.mean(librosa.feature.mfcc(
|
|
|
y=y, sr=sr, n_mfcc=n_mfcc).T, axis=0)
|
|
|
features = np.hstack((features, mfcc))
|
|
|
feature_info['counts']['mfcc'] = n_mfcc
|
|
|
|
|
|
|
|
|
if 'rms' in feature_types:
|
|
|
rms = np.mean(librosa.feature.rms(y=y).T, axis=0)
|
|
|
features = np.hstack((features, rms))
|
|
|
feature_info['counts']['rms'] = 1
|
|
|
|
|
|
|
|
|
if 'mel' in feature_types:
|
|
|
mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr).T, axis=0)
|
|
|
features = np.hstack((features, mel))
|
|
|
feature_info['counts']['mel'] = 128
|
|
|
|
|
|
feature_info['total'] = len(features)
|
|
|
|
|
|
return features, y, sr, feature_info
|
|
|
|
|
|
except Exception as e:
|
|
|
raise Exception(
|
|
|
f"Error extracting features from {audio_path}: {str(e)}")
|
|
|
|
|
|
|
|
|
def get_feature_names(feature_types=None, n_mfcc=None):
|
|
|
"""
|
|
|
Get names of features based on selected types
|
|
|
|
|
|
Args:
|
|
|
feature_types (list): List of feature types
|
|
|
n_mfcc (int): Number of MFCC coefficients
|
|
|
|
|
|
Returns:
|
|
|
list: List of feature names
|
|
|
"""
|
|
|
if feature_types is None:
|
|
|
feature_types = config.DEFAULT_FEATURE_TYPES
|
|
|
|
|
|
if n_mfcc is None:
|
|
|
n_mfcc = config.MFCC_DEFAULT
|
|
|
|
|
|
names = []
|
|
|
|
|
|
if 'zcr' in feature_types:
|
|
|
names.append('zcr')
|
|
|
|
|
|
if 'chroma' in feature_types:
|
|
|
names.extend([f'chroma_{i}' for i in range(12)])
|
|
|
|
|
|
if 'mfcc' in feature_types:
|
|
|
names.extend([f'mfcc_{i}' for i in range(n_mfcc)])
|
|
|
|
|
|
if 'rms' in feature_types:
|
|
|
names.append('rms')
|
|
|
|
|
|
if 'mel' in feature_types:
|
|
|
names.extend([f'mel_{i}' for i in range(128)])
|
|
|
|
|
|
return names
|
|
|
|
|
|
|
|
|
def get_feature_count(feature_types=None, n_mfcc=None):
|
|
|
"""
|
|
|
Get total feature count for selected types
|
|
|
|
|
|
Args:
|
|
|
feature_types (list): List of feature types
|
|
|
n_mfcc (int): Number of MFCC coefficients
|
|
|
|
|
|
Returns:
|
|
|
int: Total number of features
|
|
|
"""
|
|
|
if feature_types is None:
|
|
|
feature_types = config.DEFAULT_FEATURE_TYPES
|
|
|
|
|
|
if n_mfcc is None:
|
|
|
n_mfcc = config.MFCC_DEFAULT
|
|
|
|
|
|
count = 0
|
|
|
|
|
|
if 'zcr' in feature_types:
|
|
|
count += 1
|
|
|
if 'chroma' in feature_types:
|
|
|
count += 12
|
|
|
if 'mfcc' in feature_types:
|
|
|
count += n_mfcc
|
|
|
if 'rms' in feature_types:
|
|
|
count += 1
|
|
|
if 'mel' in feature_types:
|
|
|
count += 128
|
|
|
|
|
|
return count
|
|
|
|
|
|
|
|
|
def get_feature_indices(feature_types=None, n_mfcc=None, total_mfcc_in_dataset=None):
|
|
|
"""
|
|
|
Get feature indices for selected types (for existing datasets)
|
|
|
|
|
|
Args:
|
|
|
feature_types (list): List of feature types to keep
|
|
|
n_mfcc (int): Number of MFCC to keep
|
|
|
total_mfcc_in_dataset (int): Total MFCC in the dataset
|
|
|
|
|
|
Returns:
|
|
|
np.array: Indices of features to keep
|
|
|
"""
|
|
|
if feature_types is None:
|
|
|
feature_types = config.DEFAULT_FEATURE_TYPES
|
|
|
|
|
|
if n_mfcc is None:
|
|
|
n_mfcc = config.MFCC_DEFAULT
|
|
|
|
|
|
if total_mfcc_in_dataset is None:
|
|
|
total_mfcc_in_dataset = config.MFCC_DEFAULT
|
|
|
|
|
|
indices = []
|
|
|
current_idx = 0
|
|
|
|
|
|
|
|
|
if 'zcr' in feature_types:
|
|
|
indices.extend(range(current_idx, current_idx + 1))
|
|
|
current_idx += 1
|
|
|
|
|
|
|
|
|
if 'chroma' in feature_types:
|
|
|
indices.extend(range(current_idx, current_idx + 12))
|
|
|
current_idx += 12
|
|
|
|
|
|
|
|
|
if 'mfcc' in feature_types:
|
|
|
|
|
|
indices.extend(range(current_idx, current_idx +
|
|
|
min(n_mfcc, total_mfcc_in_dataset)))
|
|
|
current_idx += total_mfcc_in_dataset
|
|
|
|
|
|
|
|
|
if 'rms' in feature_types:
|
|
|
indices.extend(range(current_idx, current_idx + 1))
|
|
|
current_idx += 1
|
|
|
|
|
|
|
|
|
if 'mel' in feature_types:
|
|
|
indices.extend(range(current_idx, current_idx + 128))
|
|
|
current_idx += 128
|
|
|
|
|
|
return np.array(indices)
|
|
|
|