update
Browse files
app.py
CHANGED
|
@@ -92,14 +92,7 @@ def whisper_stt(audio):
|
|
| 92 |
# print the recognized text
|
| 93 |
print(f"transcript is : {result_transc.text}")
|
| 94 |
print(f"translation is : {result_transl.text}")
|
| 95 |
-
|
| 96 |
-
# decode the audio
|
| 97 |
-
#options = whisper.DecodingOptions(fp16 = False, language='en') #lang
|
| 98 |
-
#result = whisper.decode(model, mel, options)
|
| 99 |
-
|
| 100 |
-
# print the recognized text
|
| 101 |
-
# print(f"transcript is : {result.text}")
|
| 102 |
-
# return result.text, lang
|
| 103 |
return result_transc.text, result_transl.text, lang
|
| 104 |
|
| 105 |
|
|
@@ -146,10 +139,10 @@ def lang_model_response(prompt, language):
|
|
| 146 |
output = response.json()
|
| 147 |
output_tmp = output[0]['generated_text']
|
| 148 |
print(f"Bloom API Response is : {output_tmp}")
|
| 149 |
-
if language == 'en':
|
| 150 |
-
|
| 151 |
-
else:
|
| 152 |
-
|
| 153 |
print(f"Final Bloom Response after splits is: {solution}")
|
| 154 |
return solution
|
| 155 |
|
|
@@ -163,19 +156,35 @@ def tts(text, language):
|
|
| 163 |
coquiTTS.get_tts(text, fp, speaker = {"language" : language})
|
| 164 |
return fp.name
|
| 165 |
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
"
|
| 180 |
-
|
| 181 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
# print the recognized text
|
| 93 |
print(f"transcript is : {result_transc.text}")
|
| 94 |
print(f"translation is : {result_transl.text}")
|
| 95 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
return result_transc.text, result_transl.text, lang
|
| 97 |
|
| 98 |
|
|
|
|
| 139 |
output = response.json()
|
| 140 |
output_tmp = output[0]['generated_text']
|
| 141 |
print(f"Bloom API Response is : {output_tmp}")
|
| 142 |
+
#if language == 'en':
|
| 143 |
+
solution = output_tmp.split("Answer: ")[2].split("\n")[0]
|
| 144 |
+
#else:
|
| 145 |
+
# solution = output_tmp.split(".")[1]
|
| 146 |
print(f"Final Bloom Response after splits is: {solution}")
|
| 147 |
return solution
|
| 148 |
|
|
|
|
| 156 |
coquiTTS.get_tts(text, fp, speaker = {"language" : language})
|
| 157 |
return fp.name
|
| 158 |
|
| 159 |
+
demo = gr.Blocks()
|
| 160 |
+
with demo:
|
| 161 |
+
gr.Markdown("<h1><center>Talk to Your Multilingual AI Assistant</center></h1>")
|
| 162 |
+
gr.Markdown(
|
| 163 |
+
"""Model pipeline consisting of - Whisper for Speech-to-text, Bloom for Text-generation, andd CoquiTTS for Text-To-Speech. <br> Front end using Gradio Block API.
|
| 164 |
+
""")
|
| 165 |
+
with gr.Row():
|
| 166 |
+
with gr.Column():
|
| 167 |
+
in_audio = gr.Audio(source="microphone", type="filepath", label='Record your voice here') #type='filepath'
|
| 168 |
+
b1 = gr.Button("AI response (Whisper - Bloom - Coqui pipeline)")
|
| 169 |
+
out_transcript = gr.Textbox(label= 'As is Transcript using OpenAI Whisper')
|
| 170 |
+
out_translation_en = gr.Textbox(label= 'English Translation of audio using OpenAI Whisper')
|
| 171 |
+
with gr.Column():
|
| 172 |
+
out_audio = gr.Audio(label='AI response in Audio form in your preferred language')
|
| 173 |
+
out_generated_text = gr.Textbox(label= 'AI response to your query in your preferred language using Bloom! ')
|
| 174 |
+
out_generated_text_en = gr.Textbox(label= 'AI response to your query in English using Bloom! ')
|
| 175 |
|
| 176 |
+
b1.click(driver_fun,inputs=[in_audio], outputs=[out_transcript, out_translation_en, out_generated_text,out_generated_text, out_audio])
|
| 177 |
+
|
| 178 |
+
demo.launch(enable_queue=True, debug=True)
|
| 179 |
+
|
| 180 |
+
#gr.Interface(
|
| 181 |
+
# title = 'Testing Whisper',
|
| 182 |
+
# fn=driver_fun,
|
| 183 |
+
# inputs=[
|
| 184 |
+
# gr.Audio(source="microphone", type="filepath"), #streaming = True,
|
| 185 |
+
# # "state"
|
| 186 |
+
# ],
|
| 187 |
+
# outputs=[
|
| 188 |
+
# "textbox", "textbox", "textbox", "textbox", "audio",
|
| 189 |
+
# ],
|
| 190 |
+
# live=True).launch()
|