Spaces:
Runtime error
Runtime error
Vaibhav Srivastav
commited on
Commit
·
0b28f04
1
Parent(s):
e134a02
up
Browse files
app.py
CHANGED
@@ -35,7 +35,7 @@ speaker_embeddings = sorted([key for key in processor.speaker_embeddings.keys()
|
|
35 |
|
36 |
SAMPLE_RATE = 24_000
|
37 |
|
38 |
-
vocos = Vocos.from_pretrained("
|
39 |
|
40 |
# import model
|
41 |
if device == "cpu":
|
@@ -45,7 +45,7 @@ else:
|
|
45 |
bark = bark.to_bettertransformer()
|
46 |
|
47 |
|
48 |
-
#
|
49 |
def generate_audio(text, voice_preset = None, lag = 0):
|
50 |
if voice_preset not in speaker_embeddings:
|
51 |
voice_preset = None
|
@@ -63,13 +63,10 @@ def generate_audio(text, voice_preset = None, lag = 0):
|
|
63 |
print("Fine tokens generated")
|
64 |
|
65 |
with torch.no_grad():
|
66 |
-
|
67 |
-
encodec_waveform = bark.codec_decode(fine_output)
|
68 |
-
|
69 |
features = vocos.codes_to_features(fine_output.transpose(0,1))
|
70 |
vocos_waveform = vocos.decode(features, bandwidth_id=torch.tensor([2], device=device))
|
71 |
|
72 |
-
return (SAMPLE_RATE,
|
73 |
|
74 |
|
75 |
# Gradio blocks demo
|
@@ -90,9 +87,8 @@ with gr.Blocks() as demo_blocks:
|
|
90 |
btn = gr.Button("Bark with Vocos TTS")
|
91 |
|
92 |
with gr.Row():
|
93 |
-
out_audio_encodec = gr.Audio(type="numpy", autoplay=False, label="original output", show_label=True)
|
94 |
out_audio_vocos = gr.Audio(type="numpy", autoplay=False, label="vocos enhanced output", show_label=True)
|
95 |
|
96 |
-
btn.click(generate_audio, [inp_text, dd], [
|
97 |
|
98 |
demo_blocks.queue().launch(debug=True)
|
|
|
35 |
|
36 |
SAMPLE_RATE = 24_000
|
37 |
|
38 |
+
vocos = Vocos.from_pretrained("charactr/vocos-encodec-24khz").to(device)
|
39 |
|
40 |
# import model
|
41 |
if device == "cpu":
|
|
|
45 |
bark = bark.to_bettertransformer()
|
46 |
|
47 |
|
48 |
+
# Inference
|
49 |
def generate_audio(text, voice_preset = None, lag = 0):
|
50 |
if voice_preset not in speaker_embeddings:
|
51 |
voice_preset = None
|
|
|
63 |
print("Fine tokens generated")
|
64 |
|
65 |
with torch.no_grad():
|
|
|
|
|
|
|
66 |
features = vocos.codes_to_features(fine_output.transpose(0,1))
|
67 |
vocos_waveform = vocos.decode(features, bandwidth_id=torch.tensor([2], device=device))
|
68 |
|
69 |
+
return (SAMPLE_RATE, vocos_waveform.cpu().squeeze().numpy())
|
70 |
|
71 |
|
72 |
# Gradio blocks demo
|
|
|
87 |
btn = gr.Button("Bark with Vocos TTS")
|
88 |
|
89 |
with gr.Row():
|
|
|
90 |
out_audio_vocos = gr.Audio(type="numpy", autoplay=False, label="vocos enhanced output", show_label=True)
|
91 |
|
92 |
+
btn.click(generate_audio, [inp_text, dd], [out_audio_vocos])
|
93 |
|
94 |
demo_blocks.queue().launch(debug=True)
|