Spaces:
Sleeping
Sleeping
Commit
·
89adfe3
1
Parent(s):
875c690
deciding on final language
Browse files
app.py
CHANGED
@@ -48,6 +48,9 @@ def predict(audio, language, mic_audio=None):
|
|
48 |
else:
|
49 |
outLanguage = language_id_lookup[language.split()[0]]
|
50 |
|
|
|
|
|
|
|
51 |
options = whisper.DecodingOptions(fp16 = False, language = outLanguage)
|
52 |
result = whisper.decode(model, mel, options)
|
53 |
outLanguage = result.language
|
@@ -57,11 +60,13 @@ def predict(audio, language, mic_audio=None):
|
|
57 |
|
58 |
|
59 |
print(result.text + " " + outLanguage)
|
|
|
|
|
60 |
return result.text, outLanguage
|
61 |
|
62 |
|
63 |
|
64 |
-
title = "Demo for Whisper -> Something ->
|
65 |
|
66 |
description = """
|
67 |
<b>How to use:</b> Upload an audio file or record using the microphone. The audio is into the whisper model developed by openai.
|
@@ -69,6 +74,7 @@ The output is the text transcription of the audio in the language you inputted.
|
|
69 |
tell you what language it detected.
|
70 |
"""
|
71 |
|
|
|
72 |
gr.Interface(
|
73 |
fn=predict,
|
74 |
inputs=[
|
@@ -83,6 +89,8 @@ gr.Interface(
|
|
83 |
'Detect Language'], type="value", default='English Text', label="Select the Language of the that you are speaking in."),
|
84 |
gr.Audio(label="Record Speech", source="microphone", type="filepath"),
|
85 |
],
|
|
|
|
|
86 |
outputs=[
|
87 |
gr.Text(label="Transcription"),
|
88 |
],
|
|
|
48 |
else:
|
49 |
outLanguage = language_id_lookup[language.split()[0]]
|
50 |
|
51 |
+
# Runs the audio through the whisper model and gets the DecodingResult object, which has the features:
|
52 |
+
# audio_features (Tensor), language, language_probs, tokens, text, avg_logprob, no_speech_prob, temperature, compression_ratio
|
53 |
+
|
54 |
options = whisper.DecodingOptions(fp16 = False, language = outLanguage)
|
55 |
result = whisper.decode(model, mel, options)
|
56 |
outLanguage = result.language
|
|
|
60 |
|
61 |
|
62 |
print(result.text + " " + outLanguage)
|
63 |
+
|
64 |
+
# Returns the text and the language
|
65 |
return result.text, outLanguage
|
66 |
|
67 |
|
68 |
|
69 |
+
title = "Demo for Whisper (ASR) -> Something -> IMS Toucan (TTS)"
|
70 |
|
71 |
description = """
|
72 |
<b>How to use:</b> Upload an audio file or record using the microphone. The audio is into the whisper model developed by openai.
|
|
|
74 |
tell you what language it detected.
|
75 |
"""
|
76 |
|
77 |
+
# The gradio interface
|
78 |
gr.Interface(
|
79 |
fn=predict,
|
80 |
inputs=[
|
|
|
89 |
'Detect Language'], type="value", default='English Text', label="Select the Language of the that you are speaking in."),
|
90 |
gr.Audio(label="Record Speech", source="microphone", type="filepath"),
|
91 |
],
|
92 |
+
# To change to output audio, replace the outputs line with
|
93 |
+
# outputs=gr.outputs.Audio(type="numpy", label=None)
|
94 |
outputs=[
|
95 |
gr.Text(label="Transcription"),
|
96 |
],
|