Spaces:
Sleeping
Sleeping
Commit
·
2fa0634
1
Parent(s):
d4d00fe
prints language selected and the transcribed text in english
Browse files
app.py
CHANGED
@@ -6,8 +6,23 @@ import whisper
|
|
6 |
model = whisper.load_model("small")
|
7 |
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
-
|
|
|
11 |
# audio = tuple (sample_rate, frames) or (sample_rate, (frames, channels))
|
12 |
if mic_audio is not None:
|
13 |
input_audio = mic_audio
|
@@ -20,14 +35,18 @@ def predict(audio, mic_audio=None):
|
|
20 |
audio = whisper.pad_or_trim(audio)
|
21 |
|
22 |
mel = whisper.log_mel_spectrogram(audio).to(model.device)
|
23 |
-
|
24 |
-
language, probs = model.detect_language(mel)
|
25 |
-
|
26 |
options = whisper.DecodingOptions(fp16 = False)
|
27 |
result = whisper.decode(model, mel, options)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
-
print(result.text)
|
30 |
-
return result.text,
|
31 |
|
32 |
|
33 |
|
@@ -43,6 +62,19 @@ gr.Interface(
|
|
43 |
inputs=[
|
44 |
gr.Audio(label="Upload Speech", source="upload", type="filepath"),
|
45 |
gr.Audio(label="Record Speech", source="microphone", type="filepath"),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
],
|
47 |
outputs=[
|
48 |
gr.Text(label="Transcription"),
|
|
|
6 |
model = whisper.load_model("small")
|
7 |
|
8 |
|
9 |
+
language_id_lookup = {
|
10 |
+
"English" : "en",
|
11 |
+
"German" : "de",
|
12 |
+
"Greek" : "el",
|
13 |
+
"Spanish" : "es",
|
14 |
+
"Finnish" : "fi",
|
15 |
+
"Russian" : "ru",
|
16 |
+
"Hungarian" : "hu",
|
17 |
+
"Dutch" : "nl",
|
18 |
+
"French" : "fr",
|
19 |
+
'Polish' : "pl",
|
20 |
+
'Portuguese': "pt",
|
21 |
+
'Italian' : "it",
|
22 |
+
}
|
23 |
|
24 |
+
|
25 |
+
def predict(audio, mic_audio=None, language):
|
26 |
# audio = tuple (sample_rate, frames) or (sample_rate, (frames, channels))
|
27 |
if mic_audio is not None:
|
28 |
input_audio = mic_audio
|
|
|
35 |
audio = whisper.pad_or_trim(audio)
|
36 |
|
37 |
mel = whisper.log_mel_spectrogram(audio).to(model.device)
|
|
|
|
|
|
|
38 |
options = whisper.DecodingOptions(fp16 = False)
|
39 |
result = whisper.decode(model, mel, options)
|
40 |
+
|
41 |
+
if(language == "Detect Language"):
|
42 |
+
outLanguage, probs = model.detect_language(mel)
|
43 |
+
else:
|
44 |
+
outLanguage = language_id_lookup(language.split()[0])
|
45 |
+
|
46 |
+
|
47 |
|
48 |
+
print(result.text + " " + outLanguage)
|
49 |
+
return result.text, outLanguage
|
50 |
|
51 |
|
52 |
|
|
|
62 |
inputs=[
|
63 |
gr.Audio(label="Upload Speech", source="upload", type="filepath"),
|
64 |
gr.Audio(label="Record Speech", source="microphone", type="filepath"),
|
65 |
+
gr.inputs.Dropdown(['English Text',
|
66 |
+
'German Text',
|
67 |
+
'Greek Text',
|
68 |
+
'Spanish Text',
|
69 |
+
'Finnish Text',
|
70 |
+
'Russian Text',
|
71 |
+
'Hungarian Text',
|
72 |
+
'Dutch Text',
|
73 |
+
'French Text',
|
74 |
+
'Polish Text',
|
75 |
+
'Portuguese Text',
|
76 |
+
'Italian Text',
|
77 |
+
'Detect Language'], type="value", default='English Text', label="Select the Language of the that you are speaking in.")
|
78 |
],
|
79 |
outputs=[
|
80 |
gr.Text(label="Transcription"),
|