Spaces:

Steveeeeeeen
/

Llasagna-1b-tts

Running on Zero

App Files Files Community

Steveeeeeeen HF Staff commited on Feb 6

Commit

22b9e3b

verified ·

1 Parent(s): 497563a

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -5

app.py CHANGED Viewed

@@ -36,13 +36,26 @@ whisper_turbo_pipe = pipeline(
 )
 SPEAKERS = {
-    "Male 1": {
         "path": "speakers/female_1.mp3",
         "transcript": "e lo stesso alessi che andò ad aprire non riconobbe antoni il quale tornava con la sporta sotto il braccio tanto era mutato coperto di polvere e con la barba lungacome fu entrato e si fu messo a sedere in un cantuccio non osavano quasi fargli festa.",
         "description": "Una voce femminile.",
     },
 }
 def preview_speaker(display_name):
     """Returns the audio and transcript for preview"""
     speaker_name = speaker_display_dict[display_name]
@@ -158,13 +171,30 @@ def infer(sample_audio_path, target_text, progress=gr.Progress()):
 with gr.Blocks() as app_tts:
     gr.Markdown("# Zero Shot Voice Clone TTS")
-    ref_audio_input = gr.Audio(label="Reference Audio", type="filepath")
     gen_text_input = gr.Textbox(label="Text to Generate", lines=10)
     generate_btn = gr.Button("Synthesize", variant="primary")
     audio_output = gr.Audio(label="Synthesized Audio")
     generate_btn.click(
         infer,
         inputs=[
@@ -183,7 +213,7 @@ with gr.Blocks() as app_credits:
 """)
 with gr.Blocks() as app:
-    gr.HTML("<img src='https://huggingface.co/datasets/Steveeeeeeen/random_images/blob/main/llasagna.png' alt='Llasagna' style='width: 100%; height: auto;'>", elem_id="banner")
     gr.Markdown(
         """
 # Llasagna 1b TTS

 )
 SPEAKERS = {
+    "Female 1": {
         "path": "speakers/female_1.mp3",
         "transcript": "e lo stesso alessi che andò ad aprire non riconobbe antoni il quale tornava con la sporta sotto il braccio tanto era mutato coperto di polvere e con la barba lungacome fu entrato e si fu messo a sedere in un cantuccio non osavano quasi fargli festa.",
         "description": "Una voce femminile.",
     },
+    "Male 1": {
+        "path": "speakers/male_1.mp3",
+        "transcript": "Hello, this is a sample voice recording for demonstration purposes.",
+        "description": "A male voice with neutral accent.",
+    },
+    "Female 2": {
+        "path": "speakers/female_2.mp3",
+        "transcript": "This is another sample recording to showcase the voice cloning capabilities.",
+        "description": "A female voice with clear articulation.",
+    },
 }
+banner_url = "https://huggingface.co/datasets/Steveeeeeeen/random_images/resolve/main/llasagna.png"
+BANNER = f'<div style="display: flex; justify-content: space-around;"><img src="{banner_url}" alt="Banner" style="width: 40vw; min-width: 150px; max-width: 300px;"> </div>'
 def preview_speaker(display_name):
     """Returns the audio and transcript for preview"""
     speaker_name = speaker_display_dict[display_name]
 with gr.Blocks() as app_tts:
     gr.Markdown("# Zero Shot Voice Clone TTS")
+    with gr.Row():
+        ref_audio_input = gr.Audio(label="Reference Audio", type="filepath")
+        speaker_dropdown = gr.Dropdown(
+            choices=list(SPEAKERS.keys()),
+            label="Or select a predefined speaker",
+            value=None
+        )
     gen_text_input = gr.Textbox(label="Text to Generate", lines=10)
     generate_btn = gr.Button("Synthesize", variant="primary")
     audio_output = gr.Audio(label="Synthesized Audio")
+    def update_audio(speaker):
+        if speaker in SPEAKERS:
+            return SPEAKERS[speaker]["path"]
+        return None
+    speaker_dropdown.change(
+        fn=update_audio,
+        inputs=[speaker_dropdown],
+        outputs=[ref_audio_input]
+    )
     generate_btn.click(
         infer,
         inputs=[
 """)
 with gr.Blocks() as app:
+    gr.HTML(BANNER, elem_id="banner")
     gr.Markdown(
         """
 # Llasagna 1b TTS