chinmaydan commited on
Commit
89adfe3
·
1 Parent(s): 875c690

deciding on final language

Browse files
Files changed (1) hide show
  1. app.py +9 -1
app.py CHANGED
@@ -48,6 +48,9 @@ def predict(audio, language, mic_audio=None):
48
  else:
49
  outLanguage = language_id_lookup[language.split()[0]]
50
 
 
 
 
51
  options = whisper.DecodingOptions(fp16 = False, language = outLanguage)
52
  result = whisper.decode(model, mel, options)
53
  outLanguage = result.language
@@ -57,11 +60,13 @@ def predict(audio, language, mic_audio=None):
57
 
58
 
59
  print(result.text + " " + outLanguage)
 
 
60
  return result.text, outLanguage
61
 
62
 
63
 
64
- title = "Demo for Whisper -> Something -> XLS-R"
65
 
66
  description = """
67
  <b>How to use:</b> Upload an audio file or record using the microphone. The audio is into the whisper model developed by openai.
@@ -69,6 +74,7 @@ The output is the text transcription of the audio in the language you inputted.
69
  tell you what language it detected.
70
  """
71
 
 
72
  gr.Interface(
73
  fn=predict,
74
  inputs=[
@@ -83,6 +89,8 @@ gr.Interface(
83
  'Detect Language'], type="value", default='English Text', label="Select the Language of the that you are speaking in."),
84
  gr.Audio(label="Record Speech", source="microphone", type="filepath"),
85
  ],
 
 
86
  outputs=[
87
  gr.Text(label="Transcription"),
88
  ],
 
48
  else:
49
  outLanguage = language_id_lookup[language.split()[0]]
50
 
51
+ # Runs the audio through the whisper model and gets the DecodingResult object, which has the features:
52
+ # audio_features (Tensor), language, language_probs, tokens, text, avg_logprob, no_speech_prob, temperature, compression_ratio
53
+
54
  options = whisper.DecodingOptions(fp16 = False, language = outLanguage)
55
  result = whisper.decode(model, mel, options)
56
  outLanguage = result.language
 
60
 
61
 
62
  print(result.text + " " + outLanguage)
63
+
64
+ # Returns the text and the language
65
  return result.text, outLanguage
66
 
67
 
68
 
69
+ title = "Demo for Whisper (ASR) -> Something -> IMS Toucan (TTS)"
70
 
71
  description = """
72
  <b>How to use:</b> Upload an audio file or record using the microphone. The audio is into the whisper model developed by openai.
 
74
  tell you what language it detected.
75
  """
76
 
77
+ # The gradio interface
78
  gr.Interface(
79
  fn=predict,
80
  inputs=[
 
89
  'Detect Language'], type="value", default='English Text', label="Select the Language of the that you are speaking in."),
90
  gr.Audio(label="Record Speech", source="microphone", type="filepath"),
91
  ],
92
+ # To change to output audio, replace the outputs line with
93
+ # outputs=gr.outputs.Audio(type="numpy", label=None)
94
  outputs=[
95
  gr.Text(label="Transcription"),
96
  ],