import os
import argparse
import csv
import json
import glob
from typing import Any, Optional, Union, Collection

import tqdm
import numpy as np
import librosa
from librosa.core.spectrum import _spectrogram
import musdb
import essentia
import essentia.standard
import pyloudnorm as pyln

from utils import str2bool, db2linear


def spectral_crest(
    *,
    y: Optional[np.ndarray] = None,
    S: Optional[np.ndarray] = None,
    n_fft: int = 2048,
    hop_length: int = 512,
    win_length: Optional[int] = None,
    window: str = "hann",
    center: bool = True,
    pad_mode: str = "constant",
    amin: float = 1e-10,
    power: float = 2.0,
) -> np.ndarray:
    """Compute spectral crest

    Spectral crest (or tonality coefficient) is a measure of
    the ratio of the maximum of the spectrum to the arithmetic mean of the spectrum

    A higher spectral crest => more tonality,
    A lower spectral crest => more noisy.


    Parameters
    ----------
    y : np.ndarray [shape=(..., n)] or None
        audio time series. Multi-channel is supported.
    S : np.ndarray [shape=(..., d, t)] or None
        (optional) pre-computed spectrogram magnitude
    n_fft : int > 0 [scalar]
        FFT window size
    hop_length : int > 0 [scalar]
        hop length for STFT. See `librosa.stft` for details.
    win_length : int <= n_fft [scalar]
        Each frame of audio is windowed by `window()`.
        The window will be of length `win_length` and then padded
        with zeros to match ``n_fft``.
        If unspecified, defaults to ``win_length = n_fft``.
    window : string, tuple, number, function, or np.ndarray [shape=(n_fft,)]
        - a window specification (string, tuple, or number);
          see `scipy.signal.get_window`
        - a window function, such as `scipy.signal.windows.hann`
        - a vector or array of length ``n_fft``
        .. see also:: `librosa.filters.get_window`
    center : boolean
        - If `True`, the signal ``y`` is padded so that frame
          ``t`` is centered at ``y[t * hop_length]``.
        - If `False`, then frame `t` begins at ``y[t * hop_length]``
    pad_mode : string
        If ``center=True``, the padding mode to use at the edges of the signal.
        By default, STFT uses zero padding.
    amin : float > 0 [scalar]
        minimum threshold for ``S`` (=added noise floor for numerical stability)
    power : float > 0 [scalar]
        Exponent for the magnitude spectrogram.
        e.g., 1 for energy, 2 for power, etc.
        Power spectrogram is usually used for computing spectral flatness.

    Returns
    -------
    crest : np.ndarray [shape=(..., 1, t)]
        spectral crest for each frame.


    """

    S, n_fft = _spectrogram(
        y=y,
        S=S,
        n_fft=n_fft,
        hop_length=hop_length,
        power=1.0,
        win_length=win_length,
        window=window,
        center=center,
        pad_mode=pad_mode,
    )

    S_thresh = np.maximum(amin, S**power)
    # gmean = np.exp(np.mean(np.log(S_thresh), axis=-2, keepdims=True))
    gmax = np.max(S_thresh, axis=-2, keepdims=True)
    amean = np.mean(S_thresh, axis=-2, keepdims=True)
    crest: np.ndarray = gmax / amean
    return crest


parser = argparse.ArgumentParser(description="model test.py")

parser.add_argument(
    "--target",
    type=str,
    default="all",
    help="target source. all, vocals, drums, bass, other",
)
parser.add_argument(
    "--root", type=str, default="/path/to/musdb18hq_loudnorm"
)
parser.add_argument("--exp_name", type=str, default="delimit_6_s")
parser.add_argument(
    "--output_directory",
    type=str,
    default="/path/to/results",
)
parser.add_argument(
    "--calc_results",
    type=str2bool,
    default=True,
    help="calculate results or musdb-hq or musdb-XL test dataset",
)


args, _ = parser.parse_known_args()

args.sample_rate = 44100

args.test_output_dir = f"{args.output_directory}/test/{args.exp_name}"

if args.calc_results:
    track_list = glob.glob(
        f"{args.output_directory}/test/{args.exp_name}/*/{args.target}.wav"
    )
else:
    if args.target == "all":
        track_list = glob.glob(f"{args.root}/*/mixture.wav")
    else:
        track_list = glob.glob(f"{args.root}/*/{args.target}.wav")

i = 0


dynamic_complexity = essentia.standard.DynamicComplexity()
loudness_range = essentia.standard.LoudnessEBUR128()
spectral_centroid = essentia.standard.SpectralCentroidTime()
crest = essentia.standard.Crest()
dynamic_spread = essentia.standard.DistributionShape()
central_moments = essentia.standard.CentralMoments()

dict_song_score = {}
list_rms = []
list_crest_factor = []
list_dc_score = []
list_lra_score = []
list_sc_hertz = []
list_sf_score = []
list_spectral_crest_score = []

for track in tqdm.tqdm(track_list):
    audio_name = os.path.basename(os.path.dirname(track))
    gt_source_librosa = librosa.load(f"{track}", sr=args.sample_rate, mono=False)[
        0
    ]  # (nb_channels, nb_samples)
    gt_source_librosa_mono = librosa.to_mono(gt_source_librosa)  # (nb_samples)

    gt_source_essentia = essentia.standard.AudioLoader(filename=f"{track}")()[
        0
    ]  # (nb_samples, nb_channels)
    gt_source_essentia_cat = np.concatenate(
        [gt_source_essentia[:, 0], gt_source_essentia[:, 1]]
    )  # (nb_samples * nb_channels)
    gt_source_essentia_mono = np.mean(gt_source_essentia, axis=1)  # (nb_samples)

    rms = np.sqrt(np.mean(gt_source_essentia_cat**2))
    crest_factor = np.max(np.abs(gt_source_essentia_cat)) / rms

    dc_score, _ = dynamic_complexity(gt_source_essentia_mono)
    _, _, _, lra_score = loudness_range(gt_source_essentia)
    sc_hertz = spectral_centroid(gt_source_essentia_mono)
    sf_score = np.mean(librosa.feature.spectral_flatness(gt_source_librosa_mono))
    spectral_crest_score = np.mean(spectral_crest(y=gt_source_librosa_mono))

    dict_song_score[audio_name] = {
        "rms": float(rms),
        "crest_factor": float(crest_factor),
        "dynamic_complexity_score": float(dc_score),
        "lra_score": float(lra_score),
        "spectral_centroid_hertz": float(sc_hertz),
        "spectral_flatness_score": float(sf_score),
        "spectral_crest_score": float(spectral_crest_score),
    }
    list_rms.append(rms)
    list_crest_factor.append(crest_factor)
    list_dc_score.append(dc_score)
    list_lra_score.append(lra_score)
    list_sc_hertz.append(sc_hertz)
    list_sf_score.append(sf_score)
    list_spectral_crest_score.append(spectral_crest_score)

    i += 1

if args.calc_results:
    print(f"{args.exp_name} on {args.target}")
else:
    print(f"{os.path.basename(args.root)} on {args.target}")
print(f"rms: {np.mean(list_rms)}")
print(f"crest_factor: {np.mean(list_crest_factor)}")
print(f"dynamic_complexity_score: {np.mean(list_dc_score)}")
print(f"lra_score: {np.mean(list_lra_score)}")
print(f"sc_hertz: {np.mean(list_sc_hertz)}")
print(f"sf_score: {np.mean(list_sf_score)}")
print(f"spectral_crest_score: {np.mean(list_spectral_crest_score)}")


# save dict_song_score to json file
if args.target == "all":
    file_name = "score_features"
else:
    file_name = f"score_feature_{args.target}"
if args.calc_results:
    with open(
        f"{args.output_directory}/test/{args.exp_name}/{file_name}.json", "w"
    ) as f:
        json.dump(dict_song_score, f, indent=4)
else:
    with open(f"{args.root}/{file_name}.json", "w") as f:
        json.dump(dict_song_score, f, indent=4)