youngtsai commited on
Commit
20da85e
·
1 Parent(s): 6e5cbf8

audio_output

Browse files
Files changed (2) hide show
  1. app.py +25 -2
  2. requirements.txt +2 -1
app.py CHANGED
@@ -2,6 +2,8 @@ import os
2
  import gradio as gr
3
  from openai import OpenAI
4
  import json
 
 
5
 
6
 
7
  OPEN_AI_KEY = os.getenv("OPEN_AI_KEY")
@@ -265,6 +267,24 @@ def paragraph_save(refine_paragraph):
265
  """
266
  return refine_paragraph
267
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
 
269
  with gr.Blocks() as demo:
270
  with gr.Row():
@@ -462,7 +482,7 @@ with gr.Blocks() as demo:
462
  gr.Markdown("## 11. Save and Share")
463
  paragraph_save_button = gr.Button("Save and Share")
464
  paragraph_save_output = gr.Textbox(label="Save and Share")
465
-
466
 
467
 
468
 
@@ -588,7 +608,10 @@ with gr.Blocks() as demo:
588
  correct_grammatical_spelling_errors_output,
589
  refine_output
590
  ],
591
- output=paragraph_save_output
 
 
 
592
  )
593
 
594
  demo.launch()
 
2
  import gradio as gr
3
  from openai import OpenAI
4
  import json
5
+ from transformers import pipeline
6
+ import soundfile as sf
7
 
8
 
9
  OPEN_AI_KEY = os.getenv("OPEN_AI_KEY")
 
267
  """
268
  return refine_paragraph
269
 
270
+ def paragraph_save_and_tts(paragraph_text):
271
+ """
272
+ Saves the paragraph text and generates an audio file using Hugging Face's TTS.
273
+ """
274
+ # Instantiate the text-to-speech pipeline
275
+ tts = pipeline("text-to-speech", model="facebook/fastspeech2-en-ljspeech")
276
+
277
+ # Generate speech
278
+ speech = tts(paragraph_text)
279
+
280
+ # Define the path for the audio file
281
+ audio_path = "/mnt/data/generated_audio.wav"
282
+
283
+ # Save the audio file
284
+ sf.write(audio_path, speech["array"], speech["sampling_rate"])
285
+
286
+ # Return the path to the audio file along with the text
287
+ return paragraph_text, audio_path
288
 
289
  with gr.Blocks() as demo:
290
  with gr.Row():
 
482
  gr.Markdown("## 11. Save and Share")
483
  paragraph_save_button = gr.Button("Save and Share")
484
  paragraph_save_output = gr.Textbox(label="Save and Share")
485
+ audio_output = gr.Audio(label="Generated Speech")
486
 
487
 
488
 
 
608
  correct_grammatical_spelling_errors_output,
609
  refine_output
610
  ],
611
+ output=[
612
+ paragraph_save_output,
613
+ audio_output
614
+ ]
615
  )
616
 
617
  demo.launch()
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  gradio
2
  openai>=1.0.0
3
-
 
 
1
  gradio
2
  openai>=1.0.0
3
+ transformers
4
+ soundfile