File size: 2,682 Bytes
b9a6dd9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from typing import Tuple

import numpy as np

from espnet2.sds.utils.utils import int2float


def TTS_psuedomos(TTS_audio_output: Tuple[int, np.ndarray]) -> str:
    """
    Compute and return speech quality metrics
    for the given synthesized audio output
    using the Versa library.

    Args:
        TTS_audio_output (Tuple[int, np.ndarray]):
            A tuple containing:
                - The first element (int): The frame rate of the audio.
                - The second element (np.ndarray): The audio signal,
                typically a NumPy array.

    Returns:
        str:
            A formatted string containing each metric name
            and its corresponding score, for example:

            utmos: 3.54
            dnsmos: 3.47
            plcmos: 3.62
            sheet_ssqa: 4.03

    Raises:
        ImportError:
            If the Versa library is not installed or cannot be imported.

    Example:
        >>> tts_audio_output = (16000, audio_array)
        >>> result = TTS_psuedomos(tts_audio_output)
        >>> print(result)
        utmos: 3.54
        dnsmos: 3.47
        plcmos: 3.62
        sheet_ssqa: 4.03
    """
    try:
        from versa import (
            pseudo_mos_metric,
            pseudo_mos_setup,
            sheet_ssqa,
            sheet_ssqa_setup,
        )
    except Exception as e:
        print("Error: Versa is not properly installed.")
        raise e

    predictor_dict, predictor_fs = pseudo_mos_setup(
        use_gpu=True,
        predictor_types=["utmos", "dnsmos", "plcmos"],
        predictor_args={
            "utmos": {"fs": 16000},
            "dnsmos": {"fs": 16000},
            "plcmos": {"fs": 16000},
        },
    )
    score_modules = {
        "module": pseudo_mos_metric,
        "args": {
            "predictor_dict": predictor_dict,
            "predictor_fs": predictor_fs,
            "use_gpu": True,
        },
    }
    dict1 = score_modules["module"](
        int2float(TTS_audio_output[1]),
        TTS_audio_output[0],
        **score_modules["args"],
    )
    str1 = ""
    for k in dict1:
        str1 = str1 + f"{k}: {dict1[k]:.2f}\n"
    sheet_model = sheet_ssqa_setup(
        model_tag="default",
        model_path=None,
        model_config=None,
        use_gpu=True,
    )
    score_modules = {
        "module": sheet_ssqa,
        "args": {"model": sheet_model, "use_gpu": True},
    }
    dict1 = score_modules["module"](
        score_modules["args"]["model"],
        int2float(TTS_audio_output[1]),
        TTS_audio_output[0],
        use_gpu=score_modules["args"]["use_gpu"],
    )
    for k in dict1:
        str1 = str1 + f"{k}: {dict1[k]:.2f}\n"
    return str1