File size: 3,059 Bytes
8718761
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import warnings
import importlib.metadata

import whisper.tokenizer

from .utils import get_func_parameters

_COMPATIBLE_WHISPER_VERSIONS = (
    '20230314',
    '20230918',
    '20231105',
    '20231106',
    '20231117',
)
_required_whisper_ver = _COMPATIBLE_WHISPER_VERSIONS[-1]

_TOKENIZER_PARAMS = get_func_parameters(whisper.tokenizer.get_tokenizer)


def warn_compatibility_issues(
        whisper_module,
        ignore: bool = False,
        additional_msg: str = ''
):
    compatibility_warning = ''
    if not ignore:
        if whisper_module.__version__ not in _COMPATIBLE_WHISPER_VERSIONS:
            compatibility_warning += (f'Whisper {whisper_module.__version__} is installed.'
                                      f'Versions confirm to be compatible: {", ".join(_COMPATIBLE_WHISPER_VERSIONS)}\n')
        _is_whisper_repo_version = bool(importlib.metadata.distribution('openai-whisper').read_text('direct_url.json'))
        if _is_whisper_repo_version:
            compatibility_warning += ('The detected version appears to be installed from the repository '
                                      'which can have compatibility issues '
                                      'due to multiple commits sharing the same version number. '
                                      f'It is recommended to install version {_required_whisper_ver} from PyPI.\n')

        if compatibility_warning:
            compatibility_warning = (
                    'The installed version of Whisper might be incompatible.\n'
                    + compatibility_warning +
                    'To prevent errors and performance issues, reinstall correct version with: '
                    f'"pip install --upgrade --no-deps --force-reinstall openai-whisper=={_required_whisper_ver}".'
            )
            if additional_msg:
                compatibility_warning += f' {additional_msg}'
            warnings.warn(compatibility_warning)


def get_tokenizer(model=None, is_faster_model: bool = False, **kwargs):
    """
    Backward compatible wrapper of :func:`whisper.tokenizer.get_tokenizer` and
    :class:`faster_whisper.tokenizer.Tokenizer`.
    """
    if is_faster_model:
        import faster_whisper.tokenizer
        tokenizer = faster_whisper.tokenizer.Tokenizer
        params = get_func_parameters(tokenizer)
        if model is not None and 'tokenizer' not in kwargs:
            kwargs['tokenizer'] = model.hf_tokenizer
    else:
        tokenizer = whisper.tokenizer.get_tokenizer
        params = _TOKENIZER_PARAMS
    if model is not None and 'multilingual' not in kwargs:
        kwargs['multilingual'] = \
            (model.is_multilingual if hasattr(model, 'is_multilingual') else model.model.is_multilingual)
    if 'num_languages' in params:
        if hasattr(model, 'num_languages'):
            kwargs['num_languages'] = \
                (model.num_languages if hasattr(model, 'num_languages') else model.model.num_languages)
    elif 'num_languages' in kwargs:
        del kwargs['num_languages']
    return tokenizer(**kwargs)