Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -83,7 +83,12 @@ class TTS_Interface:
|
|
83 |
clone_speaker_identity=False,
|
84 |
lang="en")
|
85 |
|
86 |
-
return "alignment.png",
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
def split_audio(self, path_to_audio, text_list):
|
89 |
# extract audio
|
@@ -171,6 +176,9 @@ iface = gr.Interface(fn=meta_model.read,
|
|
171 |
"Voice 3"], type="value", default="Voice 3", label="Speaker selection for the third sentence")],
|
172 |
outputs=[gr.outputs.Image(label="Alignment of Phonemes to Audio"),
|
173 |
gr.outputs.Audio(type="file", label="Original Audio"),
|
|
|
|
|
|
|
174 |
gr.outputs.Audio(type="numpy", label="Customized Audio")],
|
175 |
layout="vertical",
|
176 |
title="IMS Toucan Speech Customization through Voice Cloning Demo",
|
@@ -178,6 +186,6 @@ iface = gr.Interface(fn=meta_model.read,
|
|
178 |
theme="default",
|
179 |
allow_flagging="never",
|
180 |
allow_screenshot=False,
|
181 |
-
description="In this demo, an audio is split automatically into individual sentences. Then each of the sentences is re-synthesized into speech with the exact same prosody, but with a voice that you can choose. This allows customizing any existing read speech while retaining as much from the original reading as possible.",
|
182 |
article=article)
|
183 |
iface.launch(enable_queue=True)
|
|
|
83 |
clone_speaker_identity=False,
|
84 |
lang="en")
|
85 |
|
86 |
+
return "alignment.png", \
|
87 |
+
reference_audio, \
|
88 |
+
self.speaker_path_lookup["Voice 1"], \
|
89 |
+
self.speaker_path_lookup["Voice 2"], \
|
90 |
+
self.speaker_path_lookup["Voice 3"], \
|
91 |
+
(48000, float2pcm(torch.cat([part_1, part_2, part_3], dim=0).numpy()))
|
92 |
|
93 |
def split_audio(self, path_to_audio, text_list):
|
94 |
# extract audio
|
|
|
176 |
"Voice 3"], type="value", default="Voice 3", label="Speaker selection for the third sentence")],
|
177 |
outputs=[gr.outputs.Image(label="Alignment of Phonemes to Audio"),
|
178 |
gr.outputs.Audio(type="file", label="Original Audio"),
|
179 |
+
gr.outputs.Audio(type="file", label="Reference-Voice 1"),
|
180 |
+
gr.outputs.Audio(type="file", label="Reference-Voice 2"),
|
181 |
+
gr.outputs.Audio(type="file", label="Reference-Voice 3"),
|
182 |
gr.outputs.Audio(type="numpy", label="Customized Audio")],
|
183 |
layout="vertical",
|
184 |
title="IMS Toucan Speech Customization through Voice Cloning Demo",
|
|
|
186 |
theme="default",
|
187 |
allow_flagging="never",
|
188 |
allow_screenshot=False,
|
189 |
+
description="In this demo, an audio is split automatically into individual sentences. Then each of the sentences is re-synthesized into speech with the exact same prosody, but with a voice that you can choose. This allows customizing any existing read speech while retaining as much from the original reading as possible. Unfortunately, we cannot show you the reference audio and the reference voices ahead of time, so they will be displayed together with the resulting cloned speech.",
|
190 |
article=article)
|
191 |
iface.launch(enable_queue=True)
|