File size: 6,750 Bytes
a344700 cafbe14 a344700 cafbe14 a344700 cafbe14 a344700 cafbe14 a344700 cafbe14 a344700 cafbe14 a344700 cafbe14 a344700 cafbe14 a344700 cafbe14 a344700 cafbe14 a344700 cafbe14 a344700 cafbe14 a344700 cafbe14 a344700 cafbe14 a344700 cafbe14 a344700 cafbe14 a344700 cafbe14 a344700 cafbe14 a344700 cafbe14 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
"""
Audio Feature Extraction Module
Extracts audio features with configurable feature types and MFCC count
"""
import numpy as np
import librosa
import warnings
import config
warnings.filterwarnings('ignore')
def extract_features(audio_path, duration=2.5, offset=0.6, feature_types=None, n_mfcc=None):
"""
Extract audio features based on selected feature types
Features:
- ZCR: Zero Crossing Rate (1)
- Chroma: Chroma STFT (12)
- MFCC: Mel-frequency cepstral coefficients (20-40, configurable)
- RMS: RMS Energy (1)
- Mel: Mel Spectrogram (128)
Args:
audio_path (str): Path to audio file
duration (float): Duration to load (seconds)
offset (float): Start reading after this time (seconds)
feature_types (list): List of feature types to extract
['zcr', 'chroma', 'mfcc', 'rms', 'mel']
If None, extract all features
n_mfcc (int): Number of MFCC coefficients (20-40)
If None, use default from config
Returns:
features (np.array): Feature vector
y (np.array): Audio time series
sr (int): Sample rate
feature_info (dict): Information about extracted features
"""
if feature_types is None:
feature_types = config.DEFAULT_FEATURE_TYPES
if n_mfcc is None:
n_mfcc = config.MFCC_DEFAULT
# Validate MFCC count
n_mfcc = max(config.MFCC_MIN, min(n_mfcc, config.MFCC_MAX))
try:
# Load audio file
y, sr = librosa.load(audio_path, duration=duration, offset=offset)
# Initialize feature array
features = np.array([])
feature_info = {
'types_used': feature_types,
'counts': {},
'total': 0,
'n_mfcc': n_mfcc if 'mfcc' in feature_types else 0
}
# 1. Zero Crossing Rate (1 feature)
if 'zcr' in feature_types:
zcr = np.mean(librosa.feature.zero_crossing_rate(y=y).T, axis=0)
features = np.hstack((features, zcr))
feature_info['counts']['zcr'] = 1
# 2. Chroma STFT (12 features)
if 'chroma' in feature_types:
stft = np.abs(librosa.stft(y))
chroma = np.mean(librosa.feature.chroma_stft(
S=stft, sr=sr).T, axis=0)
features = np.hstack((features, chroma))
feature_info['counts']['chroma'] = 12
# 3. MFCC (20-40 features, CONFIGURABLE)
if 'mfcc' in feature_types:
mfcc = np.mean(librosa.feature.mfcc(
y=y, sr=sr, n_mfcc=n_mfcc).T, axis=0)
features = np.hstack((features, mfcc))
feature_info['counts']['mfcc'] = n_mfcc
# 4. RMS Energy (1 feature)
if 'rms' in feature_types:
rms = np.mean(librosa.feature.rms(y=y).T, axis=0)
features = np.hstack((features, rms))
feature_info['counts']['rms'] = 1
# 5. Mel Spectrogram (128 features)
if 'mel' in feature_types:
mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr).T, axis=0)
features = np.hstack((features, mel))
feature_info['counts']['mel'] = 128
feature_info['total'] = len(features)
return features, y, sr, feature_info
except Exception as e:
raise Exception(
f"Error extracting features from {audio_path}: {str(e)}")
def get_feature_names(feature_types=None, n_mfcc=None):
"""
Get names of features based on selected types
Args:
feature_types (list): List of feature types
n_mfcc (int): Number of MFCC coefficients
Returns:
list: List of feature names
"""
if feature_types is None:
feature_types = config.DEFAULT_FEATURE_TYPES
if n_mfcc is None:
n_mfcc = config.MFCC_DEFAULT
names = []
if 'zcr' in feature_types:
names.append('zcr')
if 'chroma' in feature_types:
names.extend([f'chroma_{i}' for i in range(12)])
if 'mfcc' in feature_types:
names.extend([f'mfcc_{i}' for i in range(n_mfcc)])
if 'rms' in feature_types:
names.append('rms')
if 'mel' in feature_types:
names.extend([f'mel_{i}' for i in range(128)])
return names
def get_feature_count(feature_types=None, n_mfcc=None):
"""
Get total feature count for selected types
Args:
feature_types (list): List of feature types
n_mfcc (int): Number of MFCC coefficients
Returns:
int: Total number of features
"""
if feature_types is None:
feature_types = config.DEFAULT_FEATURE_TYPES
if n_mfcc is None:
n_mfcc = config.MFCC_DEFAULT
count = 0
if 'zcr' in feature_types:
count += 1
if 'chroma' in feature_types:
count += 12
if 'mfcc' in feature_types:
count += n_mfcc # VARIABLE
if 'rms' in feature_types:
count += 1
if 'mel' in feature_types:
count += 128
return count
def get_feature_indices(feature_types=None, n_mfcc=None, total_mfcc_in_dataset=None):
"""
Get feature indices for selected types (for existing datasets)
Args:
feature_types (list): List of feature types to keep
n_mfcc (int): Number of MFCC to keep
total_mfcc_in_dataset (int): Total MFCC in the dataset
Returns:
np.array: Indices of features to keep
"""
if feature_types is None:
feature_types = config.DEFAULT_FEATURE_TYPES
if n_mfcc is None:
n_mfcc = config.MFCC_DEFAULT
if total_mfcc_in_dataset is None:
total_mfcc_in_dataset = config.MFCC_DEFAULT
indices = []
current_idx = 0
# ZCR (1)
if 'zcr' in feature_types:
indices.extend(range(current_idx, current_idx + 1))
current_idx += 1
# Chroma (12)
if 'chroma' in feature_types:
indices.extend(range(current_idx, current_idx + 12))
current_idx += 12
# MFCC (variable)
if 'mfcc' in feature_types:
# Only take first n_mfcc coefficients
indices.extend(range(current_idx, current_idx +
min(n_mfcc, total_mfcc_in_dataset)))
current_idx += total_mfcc_in_dataset
# RMS (1)
if 'rms' in feature_types:
indices.extend(range(current_idx, current_idx + 1))
current_idx += 1
# Mel (128)
if 'mel' in feature_types:
indices.extend(range(current_idx, current_idx + 128))
current_idx += 128
return np.array(indices)
|