"""
Audio Feature Extraction Module
Extracts audio features with configurable feature types and MFCC count
"""

import numpy as np
import librosa
import warnings
import config
warnings.filterwarnings('ignore')


def extract_features(audio_path, duration=2.5, offset=0.6, feature_types=None, n_mfcc=None):
    """
    Extract audio features based on selected feature types

    Features:
    - ZCR: Zero Crossing Rate (1)
    - Chroma: Chroma STFT (12)
    - MFCC: Mel-frequency cepstral coefficients (20-40, configurable)
    - RMS: RMS Energy (1)
    - Mel: Mel Spectrogram (128)

    Args:
        audio_path (str): Path to audio file
        duration (float): Duration to load (seconds)
        offset (float): Start reading after this time (seconds)
        feature_types (list): List of feature types to extract
                             ['zcr', 'chroma', 'mfcc', 'rms', 'mel']
                             If None, extract all features
        n_mfcc (int): Number of MFCC coefficients (20-40)
                     If None, use default from config

    Returns:
        features (np.array): Feature vector
        y (np.array): Audio time series
        sr (int): Sample rate
        feature_info (dict): Information about extracted features
    """

    if feature_types is None:
        feature_types = config.DEFAULT_FEATURE_TYPES

    if n_mfcc is None:
        n_mfcc = config.MFCC_DEFAULT

    # Validate MFCC count
    n_mfcc = max(config.MFCC_MIN, min(n_mfcc, config.MFCC_MAX))

    try:
        # Load audio file
        y, sr = librosa.load(audio_path, duration=duration, offset=offset)

        # Initialize feature array
        features = np.array([])
        feature_info = {
            'types_used': feature_types,
            'counts': {},
            'total': 0,
            'n_mfcc': n_mfcc if 'mfcc' in feature_types else 0
        }

        # 1. Zero Crossing Rate (1 feature)
        if 'zcr' in feature_types:
            zcr = np.mean(librosa.feature.zero_crossing_rate(y=y).T, axis=0)
            features = np.hstack((features, zcr))
            feature_info['counts']['zcr'] = 1

        # 2. Chroma STFT (12 features)
        if 'chroma' in feature_types:
            stft = np.abs(librosa.stft(y))
            chroma = np.mean(librosa.feature.chroma_stft(
                S=stft, sr=sr).T, axis=0)
            features = np.hstack((features, chroma))
            feature_info['counts']['chroma'] = 12

        # 3. MFCC (20-40 features, CONFIGURABLE)
        if 'mfcc' in feature_types:
            mfcc = np.mean(librosa.feature.mfcc(
                y=y, sr=sr, n_mfcc=n_mfcc).T, axis=0)
            features = np.hstack((features, mfcc))
            feature_info['counts']['mfcc'] = n_mfcc

        # 4. RMS Energy (1 feature)
        if 'rms' in feature_types:
            rms = np.mean(librosa.feature.rms(y=y).T, axis=0)
            features = np.hstack((features, rms))
            feature_info['counts']['rms'] = 1

        # 5. Mel Spectrogram (128 features)
        if 'mel' in feature_types:
            mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr).T, axis=0)
            features = np.hstack((features, mel))
            feature_info['counts']['mel'] = 128

        feature_info['total'] = len(features)

        return features, y, sr, feature_info

    except Exception as e:
        raise Exception(
            f"Error extracting features from {audio_path}: {str(e)}")


def get_feature_names(feature_types=None, n_mfcc=None):
    """
    Get names of features based on selected types

    Args:
        feature_types (list): List of feature types
        n_mfcc (int): Number of MFCC coefficients

    Returns:
        list: List of feature names
    """
    if feature_types is None:
        feature_types = config.DEFAULT_FEATURE_TYPES

    if n_mfcc is None:
        n_mfcc = config.MFCC_DEFAULT

    names = []

    if 'zcr' in feature_types:
        names.append('zcr')

    if 'chroma' in feature_types:
        names.extend([f'chroma_{i}' for i in range(12)])

    if 'mfcc' in feature_types:
        names.extend([f'mfcc_{i}' for i in range(n_mfcc)])

    if 'rms' in feature_types:
        names.append('rms')

    if 'mel' in feature_types:
        names.extend([f'mel_{i}' for i in range(128)])

    return names


def get_feature_count(feature_types=None, n_mfcc=None):
    """
    Get total feature count for selected types

    Args:
        feature_types (list): List of feature types
        n_mfcc (int): Number of MFCC coefficients

    Returns:
        int: Total number of features
    """
    if feature_types is None:
        feature_types = config.DEFAULT_FEATURE_TYPES

    if n_mfcc is None:
        n_mfcc = config.MFCC_DEFAULT

    count = 0

    if 'zcr' in feature_types:
        count += 1
    if 'chroma' in feature_types:
        count += 12
    if 'mfcc' in feature_types:
        count += n_mfcc  # VARIABLE
    if 'rms' in feature_types:
        count += 1
    if 'mel' in feature_types:
        count += 128

    return count


def get_feature_indices(feature_types=None, n_mfcc=None, total_mfcc_in_dataset=None):
    """
    Get feature indices for selected types (for existing datasets)

    Args:
        feature_types (list): List of feature types to keep
        n_mfcc (int): Number of MFCC to keep
        total_mfcc_in_dataset (int): Total MFCC in the dataset

    Returns:
        np.array: Indices of features to keep
    """
    if feature_types is None:
        feature_types = config.DEFAULT_FEATURE_TYPES

    if n_mfcc is None:
        n_mfcc = config.MFCC_DEFAULT

    if total_mfcc_in_dataset is None:
        total_mfcc_in_dataset = config.MFCC_DEFAULT

    indices = []
    current_idx = 0

    # ZCR (1)
    if 'zcr' in feature_types:
        indices.extend(range(current_idx, current_idx + 1))
    current_idx += 1

    # Chroma (12)
    if 'chroma' in feature_types:
        indices.extend(range(current_idx, current_idx + 12))
    current_idx += 12

    # MFCC (variable)
    if 'mfcc' in feature_types:
        # Only take first n_mfcc coefficients
        indices.extend(range(current_idx, current_idx +
                       min(n_mfcc, total_mfcc_in_dataset)))
    current_idx += total_mfcc_in_dataset

    # RMS (1)
    if 'rms' in feature_types:
        indices.extend(range(current_idx, current_idx + 1))
    current_idx += 1

    # Mel (128)
    if 'mel' in feature_types:
        indices.extend(range(current_idx, current_idx + 128))
    current_idx += 128

    return np.array(indices)