aka7774 commited on
Commit
bbd4e37
·
verified ·
1 Parent(s): de95cc5

Create fn.py

Browse files
Files changed (1) hide show
  1. fn.py +39 -0
fn.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from faster_whisper import WhisperModel
3
+
4
+ model = None
5
+ model_size = None
6
+
7
+ def load_model(_model_size):
8
+ global model_size, model
9
+
10
+ if _model_size and model_size != _model_size:
11
+ model_size = _model_size
12
+
13
+ if torch.cuda.is_available():
14
+ model = WhisperModel(model_size, device="cuda", compute_type="float16")
15
+ # model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
16
+ else:
17
+ model = WhisperModel(model_size, device="cpu", compute_type="int8")
18
+
19
+ def speech_to_text(audio_file, _model_size = None):
20
+ global model_size, model
21
+
22
+ load_model(_model_size)
23
+
24
+ with torch.no_grad():
25
+ segments, info = model.transcribe(
26
+ audio_file,
27
+ language='ja',
28
+ beam_size=5,
29
+ vad_filter=True,
30
+ without_timestamps=False,
31
+ )
32
+
33
+ text_only = ''
34
+ text_with_timestamps = ''
35
+ for segment in segments:
36
+ text_only += f"{segment.text}\n"
37
+ text_with_timestamps += f"{segment.start:.2f}\t{segment.end:.2f}\t{segment.text}\n"
38
+
39
+ return text_only, text_with_timestamps