Spaces:
Sleeping
Sleeping
File size: 1,327 Bytes
bbd4e37 00f6f1d b22bcbc 82d8c86 bbd4e37 0ae9155 bbd4e37 0ae9155 bbd4e37 b22bcbc 00f6f1d b22bcbc 00f6f1d 82d8c86 bbd4e37 82d8c86 bbd4e37 0ae9155 00f6f1d b22bcbc 0ae9155 82d8c86 0ae9155 bbd4e37 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
from faster_whisper import WhisperModel
model = None
model_size = None
initial_prompt = None
language = 'ja'
transcribe_kwargs = {}
def load_model(_model_size):
global model_size, model
if _model_size and model_size != _model_size:
model_size = _model_size
try:
model = WhisperModel(model_size, device="cuda", compute_type="float16")
except:
model = WhisperModel(model_size, device="cpu", compute_type="int8")
def set_prompt(prompt, _language = None):
global initial_prompt, language
initial_prompt = prompt
if _language:
language = _language
def set_transcribe_kwargs(args):
global transcribe_kwargs
transcribe_kwargs = args
def speech_to_text(audio_file, _model_size = None):
global model_size, model, transcribe_kwargs
load_model(_model_size)
segments, info = model.transcribe(
audio_file,
initial_prompt=initial_prompt,
language=language,
beam_size=5,
vad_filter=True,
without_timestamps=False,
**transcribe_kwargs,
)
text_only = ''
text_with_timestamps = ''
for segment in segments:
text_only += f"{segment.text}\n"
text_with_timestamps += f"{segment.start:.2f}\t{segment.end:.2f}\t{segment.text}\n"
return text_only, text_with_timestamps
|