""" Audio Feature Extraction Module Extracts audio features with configurable feature types and MFCC count """ import numpy as np import librosa import warnings import config warnings.filterwarnings('ignore') def extract_features(audio_path, duration=2.5, offset=0.6, feature_types=None, n_mfcc=None): """ Extract audio features based on selected feature types Features: - ZCR: Zero Crossing Rate (1) - Chroma: Chroma STFT (12) - MFCC: Mel-frequency cepstral coefficients (20-40, configurable) - RMS: RMS Energy (1) - Mel: Mel Spectrogram (128) Args: audio_path (str): Path to audio file duration (float): Duration to load (seconds) offset (float): Start reading after this time (seconds) feature_types (list): List of feature types to extract ['zcr', 'chroma', 'mfcc', 'rms', 'mel'] If None, extract all features n_mfcc (int): Number of MFCC coefficients (20-40) If None, use default from config Returns: features (np.array): Feature vector y (np.array): Audio time series sr (int): Sample rate feature_info (dict): Information about extracted features """ if feature_types is None: feature_types = config.DEFAULT_FEATURE_TYPES if n_mfcc is None: n_mfcc = config.MFCC_DEFAULT # Validate MFCC count n_mfcc = max(config.MFCC_MIN, min(n_mfcc, config.MFCC_MAX)) try: # Load audio file y, sr = librosa.load(audio_path, duration=duration, offset=offset) # Initialize feature array features = np.array([]) feature_info = { 'types_used': feature_types, 'counts': {}, 'total': 0, 'n_mfcc': n_mfcc if 'mfcc' in feature_types else 0 } # 1. Zero Crossing Rate (1 feature) if 'zcr' in feature_types: zcr = np.mean(librosa.feature.zero_crossing_rate(y=y).T, axis=0) features = np.hstack((features, zcr)) feature_info['counts']['zcr'] = 1 # 2. Chroma STFT (12 features) if 'chroma' in feature_types: stft = np.abs(librosa.stft(y)) chroma = np.mean(librosa.feature.chroma_stft( S=stft, sr=sr).T, axis=0) features = np.hstack((features, chroma)) feature_info['counts']['chroma'] = 12 # 3. MFCC (20-40 features, CONFIGURABLE) if 'mfcc' in feature_types: mfcc = np.mean(librosa.feature.mfcc( y=y, sr=sr, n_mfcc=n_mfcc).T, axis=0) features = np.hstack((features, mfcc)) feature_info['counts']['mfcc'] = n_mfcc # 4. RMS Energy (1 feature) if 'rms' in feature_types: rms = np.mean(librosa.feature.rms(y=y).T, axis=0) features = np.hstack((features, rms)) feature_info['counts']['rms'] = 1 # 5. Mel Spectrogram (128 features) if 'mel' in feature_types: mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr).T, axis=0) features = np.hstack((features, mel)) feature_info['counts']['mel'] = 128 feature_info['total'] = len(features) return features, y, sr, feature_info except Exception as e: raise Exception( f"Error extracting features from {audio_path}: {str(e)}") def get_feature_names(feature_types=None, n_mfcc=None): """ Get names of features based on selected types Args: feature_types (list): List of feature types n_mfcc (int): Number of MFCC coefficients Returns: list: List of feature names """ if feature_types is None: feature_types = config.DEFAULT_FEATURE_TYPES if n_mfcc is None: n_mfcc = config.MFCC_DEFAULT names = [] if 'zcr' in feature_types: names.append('zcr') if 'chroma' in feature_types: names.extend([f'chroma_{i}' for i in range(12)]) if 'mfcc' in feature_types: names.extend([f'mfcc_{i}' for i in range(n_mfcc)]) if 'rms' in feature_types: names.append('rms') if 'mel' in feature_types: names.extend([f'mel_{i}' for i in range(128)]) return names def get_feature_count(feature_types=None, n_mfcc=None): """ Get total feature count for selected types Args: feature_types (list): List of feature types n_mfcc (int): Number of MFCC coefficients Returns: int: Total number of features """ if feature_types is None: feature_types = config.DEFAULT_FEATURE_TYPES if n_mfcc is None: n_mfcc = config.MFCC_DEFAULT count = 0 if 'zcr' in feature_types: count += 1 if 'chroma' in feature_types: count += 12 if 'mfcc' in feature_types: count += n_mfcc # VARIABLE if 'rms' in feature_types: count += 1 if 'mel' in feature_types: count += 128 return count def get_feature_indices(feature_types=None, n_mfcc=None, total_mfcc_in_dataset=None): """ Get feature indices for selected types (for existing datasets) Args: feature_types (list): List of feature types to keep n_mfcc (int): Number of MFCC to keep total_mfcc_in_dataset (int): Total MFCC in the dataset Returns: np.array: Indices of features to keep """ if feature_types is None: feature_types = config.DEFAULT_FEATURE_TYPES if n_mfcc is None: n_mfcc = config.MFCC_DEFAULT if total_mfcc_in_dataset is None: total_mfcc_in_dataset = config.MFCC_DEFAULT indices = [] current_idx = 0 # ZCR (1) if 'zcr' in feature_types: indices.extend(range(current_idx, current_idx + 1)) current_idx += 1 # Chroma (12) if 'chroma' in feature_types: indices.extend(range(current_idx, current_idx + 12)) current_idx += 12 # MFCC (variable) if 'mfcc' in feature_types: # Only take first n_mfcc coefficients indices.extend(range(current_idx, current_idx + min(n_mfcc, total_mfcc_in_dataset))) current_idx += total_mfcc_in_dataset # RMS (1) if 'rms' in feature_types: indices.extend(range(current_idx, current_idx + 1)) current_idx += 1 # Mel (128) if 'mel' in feature_types: indices.extend(range(current_idx, current_idx + 128)) current_idx += 128 return np.array(indices)