Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -161,20 +161,46 @@ def generate_speech(text, voice, temperature, top_p, repetition_penalty, max_new
|
|
161 |
print(f"Error generating speech: {e}")
|
162 |
return None
|
163 |
|
164 |
-
# Examples for the UI
|
165 |
examples = [
|
166 |
-
["Hey there
|
167 |
-
["
|
168 |
-
["
|
169 |
-
["
|
170 |
-
["
|
171 |
-
["
|
172 |
-
["
|
173 |
-
["
|
174 |
]
|
175 |
|
176 |
-
# Available voices
|
177 |
-
VOICES = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
|
179 |
# Available Emotive Tags
|
180 |
EMOTIVE_TAGS = ["`<laugh>`", "`<chuckle>`", "`<sigh>`", "`<cough>`", "`<sniffle>`", "`<groan>`", "`<yawn>`", "`<gasp>`"]
|
@@ -182,17 +208,21 @@ EMOTIVE_TAGS = ["`<laugh>`", "`<chuckle>`", "`<sigh>`", "`<cough>`", "`<sniffle>
|
|
182 |
# Create Gradio interface
|
183 |
with gr.Blocks(title="VyvoTTS Multi-Speaker") as demo:
|
184 |
gr.Markdown(f"""
|
185 |
-
#
|
186 |
VyvoTTS is a text-to-speech model by Vyvo team using LFM2 architecture, trained on multiple diverse open-source datasets.
|
187 |
Since some datasets may contain transcription errors or quality issues, output quality can vary.
|
188 |
Higher quality datasets typically produce better speech synthesis results.
|
189 |
|
|
|
|
|
|
|
|
|
190 |
## Tips for better prompts:
|
191 |
- Add paralinguistic elements like {", ".join(EMOTIVE_TAGS)} or `uhm` for more human-like speech.
|
192 |
- Longer text prompts generally work better than very short phrases
|
193 |
- Increasing `repetition_penalty` and `temperature` makes the model speak faster.
|
194 |
|
195 |
-
**Note:** Output quality may vary depending on the source dataset quality for each voice.
|
196 |
""")
|
197 |
with gr.Row():
|
198 |
with gr.Column(scale=3):
|
@@ -203,8 +233,8 @@ with gr.Blocks(title="VyvoTTS Multi-Speaker") as demo:
|
|
203 |
)
|
204 |
voice = gr.Dropdown(
|
205 |
choices=VOICES,
|
206 |
-
value="
|
207 |
-
label="Voice"
|
208 |
)
|
209 |
|
210 |
with gr.Accordion("Advanced Settings", open=False):
|
|
|
161 |
print(f"Error generating speech: {e}")
|
162 |
return None
|
163 |
|
164 |
+
# Examples for the UI - Genshin karakterleri ile
|
165 |
examples = [
|
166 |
+
["Hey there! I am ready to help you on your adventure in Teyvat.", "Tighnari", 0.6, 0.95, 1.1, 1200],
|
167 |
+
["The wind brings new adventures and ancient secrets to discover.", "Kaeya", 0.7, 0.95, 1.1, 1200],
|
168 |
+
["Let me share the wisdom of the elements with you, traveler.", "Nahida", 0.6, 0.9, 1.2, 1200],
|
169 |
+
["Every journey begins with a single step forward into the unknown.", "Noelle", 0.65, 0.9, 1.1, 1200],
|
170 |
+
["The stars above guide us through even the darkest of nights.", "Furina", 0.7, 0.95, 1.1, 1200],
|
171 |
+
["Together we can explore the mysteries of this vast world.", "Lyney", 0.65, 0.9, 1.15, 1200],
|
172 |
+
["Knowledge is power, but wisdom is knowing how to use it.", "Alhaitham", 0.7, 0.95, 1.1, 1200],
|
173 |
+
["The beauty of Sumeru never fails to take my breath away.", "Collei", 0.6, 0.95, 1.1, 1200]
|
174 |
]
|
175 |
|
176 |
+
# Available voices - Genshin karakterleri ve diğerleri
|
177 |
+
VOICES = [
|
178 |
+
"Stephen_Fry",
|
179 |
+
"Tighnari",
|
180 |
+
"Thoma",
|
181 |
+
"Shikanoin_Heizou",
|
182 |
+
"Noelle",
|
183 |
+
"Ningguang",
|
184 |
+
"Nilou",
|
185 |
+
"Neuvillette",
|
186 |
+
"Navia",
|
187 |
+
"Nahida",
|
188 |
+
"Mualani",
|
189 |
+
"Lyney",
|
190 |
+
"Lynette",
|
191 |
+
"Layla",
|
192 |
+
"Kaveh",
|
193 |
+
"Kaeya",
|
194 |
+
"Furina",
|
195 |
+
"Dehya",
|
196 |
+
"Cyno",
|
197 |
+
"Collei",
|
198 |
+
"Beidou",
|
199 |
+
"Alhaitham",
|
200 |
+
"Arataki_Itto",
|
201 |
+
"Jenny_Voice",
|
202 |
+
"Optimus_Prime"
|
203 |
+
]
|
204 |
|
205 |
# Available Emotive Tags
|
206 |
EMOTIVE_TAGS = ["`<laugh>`", "`<chuckle>`", "`<sigh>`", "`<cough>`", "`<sniffle>`", "`<groan>`", "`<yawn>`", "`<gasp>`"]
|
|
|
208 |
# Create Gradio interface
|
209 |
with gr.Blocks(title="VyvoTTS Multi-Speaker") as demo:
|
210 |
gr.Markdown(f"""
|
211 |
+
# 🎮 VyvoTTS Multi-Speaker
|
212 |
VyvoTTS is a text-to-speech model by Vyvo team using LFM2 architecture, trained on multiple diverse open-source datasets.
|
213 |
Since some datasets may contain transcription errors or quality issues, output quality can vary.
|
214 |
Higher quality datasets typically produce better speech synthesis results.
|
215 |
|
216 |
+
**Available Character Voices:**
|
217 |
+
🌟 Genshin Impact: Tighnari, Thoma, Heizou, Noelle, Ningguang, Nilou, Neuvillette, Navia, Nahida, Mualani, Lyney, Lynette, Layla, Kaveh, Kaeya, Furina, Dehya, Cyno, Collei, Beidou, Alhaitham, Itto
|
218 |
+
🎭 Others: Stephen Fry, Jenny Voice, Optimus Prime
|
219 |
+
|
220 |
## Tips for better prompts:
|
221 |
- Add paralinguistic elements like {", ".join(EMOTIVE_TAGS)} or `uhm` for more human-like speech.
|
222 |
- Longer text prompts generally work better than very short phrases
|
223 |
- Increasing `repetition_penalty` and `temperature` makes the model speak faster.
|
224 |
|
225 |
+
**Note:** Output quality may vary depending on the source dataset quality for each character voice.
|
226 |
""")
|
227 |
with gr.Row():
|
228 |
with gr.Column(scale=3):
|
|
|
233 |
)
|
234 |
voice = gr.Dropdown(
|
235 |
choices=VOICES,
|
236 |
+
value="Tighnari",
|
237 |
+
label="Character Voice"
|
238 |
)
|
239 |
|
240 |
with gr.Accordion("Advanced Settings", open=False):
|