import warnings import importlib.metadata import whisper.tokenizer from .utils import get_func_parameters _COMPATIBLE_WHISPER_VERSIONS = ( '20230314', '20230918', '20231105', '20231106', '20231117', ) _required_whisper_ver = _COMPATIBLE_WHISPER_VERSIONS[-1] _TOKENIZER_PARAMS = get_func_parameters(whisper.tokenizer.get_tokenizer) def warn_compatibility_issues( whisper_module, ignore: bool = False, additional_msg: str = '' ): compatibility_warning = '' if not ignore: if whisper_module.__version__ not in _COMPATIBLE_WHISPER_VERSIONS: compatibility_warning += (f'Whisper {whisper_module.__version__} is installed.' f'Versions confirm to be compatible: {", ".join(_COMPATIBLE_WHISPER_VERSIONS)}\n') _is_whisper_repo_version = bool(importlib.metadata.distribution('openai-whisper').read_text('direct_url.json')) if _is_whisper_repo_version: compatibility_warning += ('The detected version appears to be installed from the repository ' 'which can have compatibility issues ' 'due to multiple commits sharing the same version number. ' f'It is recommended to install version {_required_whisper_ver} from PyPI.\n') if compatibility_warning: compatibility_warning = ( 'The installed version of Whisper might be incompatible.\n' + compatibility_warning + 'To prevent errors and performance issues, reinstall correct version with: ' f'"pip install --upgrade --no-deps --force-reinstall openai-whisper=={_required_whisper_ver}".' ) if additional_msg: compatibility_warning += f' {additional_msg}' warnings.warn(compatibility_warning) def get_tokenizer(model=None, is_faster_model: bool = False, **kwargs): """ Backward compatible wrapper of :func:`whisper.tokenizer.get_tokenizer` and :class:`faster_whisper.tokenizer.Tokenizer`. """ if is_faster_model: import faster_whisper.tokenizer tokenizer = faster_whisper.tokenizer.Tokenizer params = get_func_parameters(tokenizer) if model is not None and 'tokenizer' not in kwargs: kwargs['tokenizer'] = model.hf_tokenizer else: tokenizer = whisper.tokenizer.get_tokenizer params = _TOKENIZER_PARAMS if model is not None and 'multilingual' not in kwargs: kwargs['multilingual'] = \ (model.is_multilingual if hasattr(model, 'is_multilingual') else model.model.is_multilingual) if 'num_languages' in params: if hasattr(model, 'num_languages'): kwargs['num_languages'] = \ (model.num_languages if hasattr(model, 'num_languages') else model.model.num_languages) elif 'num_languages' in kwargs: del kwargs['num_languages'] return tokenizer(**kwargs)