chinmaydan commited on
Commit
2fa0634
·
1 Parent(s): d4d00fe

prints language selected and the transcribed text in english

Browse files
Files changed (1) hide show
  1. app.py +38 -6
app.py CHANGED
@@ -6,8 +6,23 @@ import whisper
6
  model = whisper.load_model("small")
7
 
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- def predict(audio, mic_audio=None):
 
11
  # audio = tuple (sample_rate, frames) or (sample_rate, (frames, channels))
12
  if mic_audio is not None:
13
  input_audio = mic_audio
@@ -20,14 +35,18 @@ def predict(audio, mic_audio=None):
20
  audio = whisper.pad_or_trim(audio)
21
 
22
  mel = whisper.log_mel_spectrogram(audio).to(model.device)
23
-
24
- language, probs = model.detect_language(mel)
25
-
26
  options = whisper.DecodingOptions(fp16 = False)
27
  result = whisper.decode(model, mel, options)
 
 
 
 
 
 
 
28
 
29
- print(result.text)
30
- return result.text, result.language
31
 
32
 
33
 
@@ -43,6 +62,19 @@ gr.Interface(
43
  inputs=[
44
  gr.Audio(label="Upload Speech", source="upload", type="filepath"),
45
  gr.Audio(label="Record Speech", source="microphone", type="filepath"),
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  ],
47
  outputs=[
48
  gr.Text(label="Transcription"),
 
6
  model = whisper.load_model("small")
7
 
8
 
9
+ language_id_lookup = {
10
+ "English" : "en",
11
+ "German" : "de",
12
+ "Greek" : "el",
13
+ "Spanish" : "es",
14
+ "Finnish" : "fi",
15
+ "Russian" : "ru",
16
+ "Hungarian" : "hu",
17
+ "Dutch" : "nl",
18
+ "French" : "fr",
19
+ 'Polish' : "pl",
20
+ 'Portuguese': "pt",
21
+ 'Italian' : "it",
22
+ }
23
 
24
+
25
+ def predict(audio, mic_audio=None, language):
26
  # audio = tuple (sample_rate, frames) or (sample_rate, (frames, channels))
27
  if mic_audio is not None:
28
  input_audio = mic_audio
 
35
  audio = whisper.pad_or_trim(audio)
36
 
37
  mel = whisper.log_mel_spectrogram(audio).to(model.device)
 
 
 
38
  options = whisper.DecodingOptions(fp16 = False)
39
  result = whisper.decode(model, mel, options)
40
+
41
+ if(language == "Detect Language"):
42
+ outLanguage, probs = model.detect_language(mel)
43
+ else:
44
+ outLanguage = language_id_lookup(language.split()[0])
45
+
46
+
47
 
48
+ print(result.text + " " + outLanguage)
49
+ return result.text, outLanguage
50
 
51
 
52
 
 
62
  inputs=[
63
  gr.Audio(label="Upload Speech", source="upload", type="filepath"),
64
  gr.Audio(label="Record Speech", source="microphone", type="filepath"),
65
+ gr.inputs.Dropdown(['English Text',
66
+ 'German Text',
67
+ 'Greek Text',
68
+ 'Spanish Text',
69
+ 'Finnish Text',
70
+ 'Russian Text',
71
+ 'Hungarian Text',
72
+ 'Dutch Text',
73
+ 'French Text',
74
+ 'Polish Text',
75
+ 'Portuguese Text',
76
+ 'Italian Text',
77
+ 'Detect Language'], type="value", default='English Text', label="Select the Language of the that you are speaking in.")
78
  ],
79
  outputs=[
80
  gr.Text(label="Transcription"),