Orpheus-TTS

Running on Zero

App Files Files Community

kadirnar commited on 19 days ago

Commit

ae2987b

verified ·

1 Parent(s): 1452227

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -15

app.py CHANGED Viewed

@@ -161,20 +161,46 @@ def generate_speech(text, voice, temperature, top_p, repetition_penalty, max_new
         print(f"Error generating speech: {e}")
         return None
-# Examples for the UI
 examples = [
-    ["Hey there my name is Tara, <chuckle> and I'm a speech generation model that can sound like a person.", "tara", 0.6, 0.95, 1.1, 1200],
-    ["I've also been taught to understand and produce paralinguistic things <sigh> like sighing, or <laugh> laughing, or <yawn> yawning!", "dan", 0.7, 0.95, 1.1, 1200],
-    ["I live in San Francisco, and have, uhm let's see, 3 billion 7 hundred ... <gasp> well, lets just say a lot of parameters.", "leah", 0.6, 0.9, 1.2, 1200],
-    ["Sometimes when I talk too much, I need to <cough> excuse myself. <sniffle> The weather has been quite cold lately.", "leo", 0.65, 0.9, 1.1, 1200],
-    ["Public speaking can be challenging. <groan> But with enough practice, anyone can become better at it.", "jess", 0.7, 0.95, 1.1, 1200],
-    ["The hike was exhausting but the view from the top was absolutely breathtaking! <sigh> It was totally worth it.", "mia", 0.65, 0.9, 1.15, 1200],
-    ["Did you hear that joke? <laugh> I couldn't stop laughing when I first heard it. <chuckle> It's still funny.", "zac", 0.7, 0.95, 1.1, 1200],
-    ["After running the marathon, I was so tired <yawn> and needed a long rest. <sigh> But I felt accomplished.", "zoe", 0.6, 0.95, 1.1, 1200]
 ]
-# Available voices
-VOICES = ["tara", "leah", "jess", "leo", "dan", "mia", "zac", "zoe"]
 # Available Emotive Tags
 EMOTIVE_TAGS = ["`<laugh>`", "`<chuckle>`", "`<sigh>`", "`<cough>`", "`<sniffle>`", "`<groan>`", "`<yawn>`", "`<gasp>`"]
@@ -182,17 +208,21 @@ EMOTIVE_TAGS = ["`<laugh>`", "`<chuckle>`", "`<sigh>`", "`<cough>`", "`<sniffle>
 # Create Gradio interface
 with gr.Blocks(title="VyvoTTS Multi-Speaker") as demo:
     gr.Markdown(f"""
-    # 🎵 VyvoTTS Multi-Speaker
     VyvoTTS is a text-to-speech model by Vyvo team using LFM2 architecture, trained on multiple diverse open-source datasets.
     Since some datasets may contain transcription errors or quality issues, output quality can vary.
     Higher quality datasets typically produce better speech synthesis results.
     ## Tips for better prompts:
     - Add paralinguistic elements like {", ".join(EMOTIVE_TAGS)} or `uhm` for more human-like speech.
     - Longer text prompts generally work better than very short phrases
     - Increasing `repetition_penalty` and `temperature` makes the model speak faster.
-    **Note:** Output quality may vary depending on the source dataset quality for each voice.
     """)
     with gr.Row():
         with gr.Column(scale=3):
@@ -203,8 +233,8 @@ with gr.Blocks(title="VyvoTTS Multi-Speaker") as demo:
             )
             voice = gr.Dropdown(
                 choices=VOICES,
-                value="tara",
-                label="Voice"
             )
             with gr.Accordion("Advanced Settings", open=False):

         print(f"Error generating speech: {e}")
         return None
+# Examples for the UI - Genshin karakterleri ile
 examples = [
+    ["Hey there! I am ready to help you on your adventure in Teyvat.", "Tighnari", 0.6, 0.95, 1.1, 1200],
+    ["The wind brings new adventures and ancient secrets to discover.", "Kaeya", 0.7, 0.95, 1.1, 1200],
+    ["Let me share the wisdom of the elements with you, traveler.", "Nahida", 0.6, 0.9, 1.2, 1200],
+    ["Every journey begins with a single step forward into the unknown.", "Noelle", 0.65, 0.9, 1.1, 1200],
+    ["The stars above guide us through even the darkest of nights.", "Furina", 0.7, 0.95, 1.1, 1200],
+    ["Together we can explore the mysteries of this vast world.", "Lyney", 0.65, 0.9, 1.15, 1200],
+    ["Knowledge is power, but wisdom is knowing how to use it.", "Alhaitham", 0.7, 0.95, 1.1, 1200],
+    ["The beauty of Sumeru never fails to take my breath away.", "Collei", 0.6, 0.95, 1.1, 1200]
 ]
+# Available voices - Genshin karakterleri ve diğerleri
+VOICES = [
+    "Stephen_Fry",
+    "Tighnari",
+    "Thoma",
+    "Shikanoin_Heizou",
+    "Noelle",
+    "Ningguang",
+    "Nilou",
+    "Neuvillette",
+    "Navia",
+    "Nahida",
+    "Mualani",
+    "Lyney",
+    "Lynette",
+    "Layla",
+    "Kaveh",
+    "Kaeya",
+    "Furina",
+    "Dehya",
+    "Cyno",
+    "Collei",
+    "Beidou",
+    "Alhaitham",
+    "Arataki_Itto",
+    "Jenny_Voice",
+    "Optimus_Prime"
+]
 # Available Emotive Tags
 EMOTIVE_TAGS = ["`<laugh>`", "`<chuckle>`", "`<sigh>`", "`<cough>`", "`<sniffle>`", "`<groan>`", "`<yawn>`", "`<gasp>`"]
 # Create Gradio interface
 with gr.Blocks(title="VyvoTTS Multi-Speaker") as demo:
     gr.Markdown(f"""
+    # 🎮 VyvoTTS Multi-Speaker
     VyvoTTS is a text-to-speech model by Vyvo team using LFM2 architecture, trained on multiple diverse open-source datasets.
     Since some datasets may contain transcription errors or quality issues, output quality can vary.
     Higher quality datasets typically produce better speech synthesis results.
+    **Available Character Voices:**
+    🌟 Genshin Impact: Tighnari, Thoma, Heizou, Noelle, Ningguang, Nilou, Neuvillette, Navia, Nahida, Mualani, Lyney, Lynette, Layla, Kaveh, Kaeya, Furina, Dehya, Cyno, Collei, Beidou, Alhaitham, Itto
+    🎭 Others: Stephen Fry, Jenny Voice, Optimus Prime
     ## Tips for better prompts:
     - Add paralinguistic elements like {", ".join(EMOTIVE_TAGS)} or `uhm` for more human-like speech.
     - Longer text prompts generally work better than very short phrases
     - Increasing `repetition_penalty` and `temperature` makes the model speak faster.
+    **Note:** Output quality may vary depending on the source dataset quality for each character voice.
     """)
     with gr.Row():
         with gr.Column(scale=3):
             )
             voice = gr.Dropdown(
                 choices=VOICES,
+                value="Tighnari",
+                label="Character Voice"
             )
             with gr.Accordion("Advanced Settings", open=False):