Spaces:

nguyennp86
/

speech-emotion-recognition

Sleeping

App Files Files Community

speech-emotion-recognition / src /feature_extraction.py

nguyennp86

update project with selection

cafbe14 2 months ago

raw

history blame contribute delete

6.75 kB

	"""
	Audio Feature Extraction Module
	Extracts audio features with configurable feature types and MFCC count
	"""

	import numpy as np
	import librosa
	import warnings
	import config
	warnings.filterwarnings('ignore')


	def extract_features(audio_path, duration=2.5, offset=0.6, feature_types=None, n_mfcc=None):
	"""
	Extract audio features based on selected feature types

	Features:
	- ZCR: Zero Crossing Rate (1)
	- Chroma: Chroma STFT (12)
	- MFCC: Mel-frequency cepstral coefficients (20-40, configurable)
	- RMS: RMS Energy (1)
	- Mel: Mel Spectrogram (128)

	Args:
	audio_path (str): Path to audio file
	duration (float): Duration to load (seconds)
	offset (float): Start reading after this time (seconds)
	feature_types (list): List of feature types to extract
	['zcr', 'chroma', 'mfcc', 'rms', 'mel']
	If None, extract all features
	n_mfcc (int): Number of MFCC coefficients (20-40)
	If None, use default from config

	Returns:
	features (np.array): Feature vector
	y (np.array): Audio time series
	sr (int): Sample rate
	feature_info (dict): Information about extracted features
	"""

	if feature_types is None:
	feature_types = config.DEFAULT_FEATURE_TYPES

	if n_mfcc is None:
	n_mfcc = config.MFCC_DEFAULT

	# Validate MFCC count
	n_mfcc = max(config.MFCC_MIN, min(n_mfcc, config.MFCC_MAX))

	try:
	# Load audio file
	y, sr = librosa.load(audio_path, duration=duration, offset=offset)

	# Initialize feature array
	features = np.array([])
	feature_info = {
	'types_used': feature_types,
	'counts': {},
	'total': 0,
	'n_mfcc': n_mfcc if 'mfcc' in feature_types else 0
	}

	# 1. Zero Crossing Rate (1 feature)
	if 'zcr' in feature_types:
	zcr = np.mean(librosa.feature.zero_crossing_rate(y=y).T, axis=0)
	features = np.hstack((features, zcr))
	feature_info['counts']['zcr'] = 1

	# 2. Chroma STFT (12 features)
	if 'chroma' in feature_types:
	stft = np.abs(librosa.stft(y))
	chroma = np.mean(librosa.feature.chroma_stft(
	S=stft, sr=sr).T, axis=0)
	features = np.hstack((features, chroma))
	feature_info['counts']['chroma'] = 12

	# 3. MFCC (20-40 features, CONFIGURABLE)
	if 'mfcc' in feature_types:
	mfcc = np.mean(librosa.feature.mfcc(
	y=y, sr=sr, n_mfcc=n_mfcc).T, axis=0)
	features = np.hstack((features, mfcc))
	feature_info['counts']['mfcc'] = n_mfcc

	# 4. RMS Energy (1 feature)
	if 'rms' in feature_types:
	rms = np.mean(librosa.feature.rms(y=y).T, axis=0)
	features = np.hstack((features, rms))
	feature_info['counts']['rms'] = 1

	# 5. Mel Spectrogram (128 features)
	if 'mel' in feature_types:
	mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr).T, axis=0)
	features = np.hstack((features, mel))
	feature_info['counts']['mel'] = 128

	feature_info['total'] = len(features)

	return features, y, sr, feature_info

	except Exception as e:
	raise Exception(
	f"Error extracting features from {audio_path}: {str(e)}")


	def get_feature_names(feature_types=None, n_mfcc=None):
	"""
	Get names of features based on selected types

	Args:
	feature_types (list): List of feature types
	n_mfcc (int): Number of MFCC coefficients

	Returns:
	list: List of feature names
	"""
	if feature_types is None:
	feature_types = config.DEFAULT_FEATURE_TYPES

	if n_mfcc is None:
	n_mfcc = config.MFCC_DEFAULT

	names = []

	if 'zcr' in feature_types:
	names.append('zcr')

	if 'chroma' in feature_types:
	names.extend([f'chroma_{i}' for i in range(12)])

	if 'mfcc' in feature_types:
	names.extend([f'mfcc_{i}' for i in range(n_mfcc)])

	if 'rms' in feature_types:
	names.append('rms')

	if 'mel' in feature_types:
	names.extend([f'mel_{i}' for i in range(128)])

	return names


	def get_feature_count(feature_types=None, n_mfcc=None):
	"""
	Get total feature count for selected types

	Args:
	feature_types (list): List of feature types
	n_mfcc (int): Number of MFCC coefficients

	Returns:
	int: Total number of features
	"""
	if feature_types is None:
	feature_types = config.DEFAULT_FEATURE_TYPES

	if n_mfcc is None:
	n_mfcc = config.MFCC_DEFAULT

	count = 0

	if 'zcr' in feature_types:
	count += 1
	if 'chroma' in feature_types:
	count += 12
	if 'mfcc' in feature_types:
	count += n_mfcc # VARIABLE
	if 'rms' in feature_types:
	count += 1
	if 'mel' in feature_types:
	count += 128

	return count


	def get_feature_indices(feature_types=None, n_mfcc=None, total_mfcc_in_dataset=None):
	"""
	Get feature indices for selected types (for existing datasets)

	Args:
	feature_types (list): List of feature types to keep
	n_mfcc (int): Number of MFCC to keep
	total_mfcc_in_dataset (int): Total MFCC in the dataset

	Returns:
	np.array: Indices of features to keep
	"""
	if feature_types is None:
	feature_types = config.DEFAULT_FEATURE_TYPES

	if n_mfcc is None:
	n_mfcc = config.MFCC_DEFAULT

	if total_mfcc_in_dataset is None:
	total_mfcc_in_dataset = config.MFCC_DEFAULT

	indices = []
	current_idx = 0

	# ZCR (1)
	if 'zcr' in feature_types:
	indices.extend(range(current_idx, current_idx + 1))
	current_idx += 1

	# Chroma (12)
	if 'chroma' in feature_types:
	indices.extend(range(current_idx, current_idx + 12))
	current_idx += 12

	# MFCC (variable)
	if 'mfcc' in feature_types:
	# Only take first n_mfcc coefficients
	indices.extend(range(current_idx, current_idx +
	min(n_mfcc, total_mfcc_in_dataset)))
	current_idx += total_mfcc_in_dataset

	# RMS (1)
	if 'rms' in feature_types:
	indices.extend(range(current_idx, current_idx + 1))
	current_idx += 1

	# Mel (128)
	if 'mel' in feature_types:
	indices.extend(range(current_idx, current_idx + 128))
	current_idx += 128

	return np.array(indices)