Spaces:

owaski-demo
/

Demo

Sleeping

chinmaydan commited on Feb 27, 2023

Commit

89adfe3

1 Parent(s): 875c690

deciding on final language

Files changed (1) hide show

app.py CHANGED Viewed

@@ -48,6 +48,9 @@ def predict(audio, language, mic_audio=None):
     else:
         outLanguage = language_id_lookup[language.split()[0]]
     options = whisper.DecodingOptions(fp16 = False, language = outLanguage)
     result = whisper.decode(model, mel, options)
     outLanguage = result.language
@@ -57,11 +60,13 @@ def predict(audio, language, mic_audio=None):
     print(result.text + " " + outLanguage)
     return result.text, outLanguage
-title = "Demo for Whisper -> Something -> XLS-R"
 description = """
 <b>How to use:</b> Upload an audio file or record using the microphone. The audio is into the whisper model developed by openai.
@@ -69,6 +74,7 @@ The output is the text transcription of the audio in the language you inputted.
 tell you what language it detected.
 """
 gr.Interface(
     fn=predict,
     inputs=[
@@ -83,6 +89,8 @@ gr.Interface(
                             'Detect Language'], type="value", default='English Text', label="Select the Language of the that you are speaking in."),
         gr.Audio(label="Record Speech", source="microphone", type="filepath"),
     ],
     outputs=[
         gr.Text(label="Transcription"),
     ],

     else:
         outLanguage = language_id_lookup[language.split()[0]]
+    # Runs the audio through the whisper model and gets the DecodingResult object, which has the features:
+    # audio_features (Tensor), language, language_probs, tokens, text, avg_logprob, no_speech_prob, temperature, compression_ratio
     options = whisper.DecodingOptions(fp16 = False, language = outLanguage)
     result = whisper.decode(model, mel, options)
     outLanguage = result.language
     print(result.text + " " + outLanguage)
+    # Returns the text and the language
     return result.text, outLanguage
+title = "Demo for Whisper (ASR) -> Something -> IMS Toucan (TTS)"
 description = """
 <b>How to use:</b> Upload an audio file or record using the microphone. The audio is into the whisper model developed by openai.
 tell you what language it detected.
 """
+# The gradio interface
 gr.Interface(
     fn=predict,
     inputs=[
                             'Detect Language'], type="value", default='English Text', label="Select the Language of the that you are speaking in."),
         gr.Audio(label="Record Speech", source="microphone", type="filepath"),
     ],
+    # To change to output audio, replace the outputs line with
+    # outputs=gr.outputs.Audio(type="numpy", label=None)
     outputs=[
         gr.Text(label="Transcription"),
     ],