Spaces:

AlexK-PL
/

Tacotron2_GST_eng

Sleeping

AlexK-PL commited on Sep 3, 2023

Commit

c472fbf

1 Parent(s): 848f2f7

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -26,14 +26,14 @@ def init_models(hparams):
     vocoder_model.load_state_dict(checkpoint['model_g'])
     vocoder_model = vocoder_model.to('cuda')
     vocoder_model.eval(inference=False)
-    gst_head_scores = np.array([0.5, 0.15, 0.35])  # originally ([0.5, 0.15, 0.35])
-    gst_scores = torch.from_numpy(gst_head_scores).cuda().float()
 def synthesize(text):
     sequence = np.array(text_to_sequence(text, ['english_cleaners']))[None, :]
     sequence = torch.from_numpy(sequence).to(device='cuda', dtype=torch.int64)
     mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence, gst_scores)
     # mel2wav inference:
@@ -42,11 +42,10 @@ def synthesize(text):
     audio_numpy = audio.data.cpu().detach().numpy()
-    return [22050, audio_numpy]
-if __name__ == "__main__":
-    init_models(hparams)
-    iface = gr.Interface(fn=synthesize, inputs="text", outputs="audio")
-    iface.launch()

     vocoder_model.load_state_dict(checkpoint['model_g'])
     vocoder_model = vocoder_model.to('cuda')
     vocoder_model.eval(inference=False)
 def synthesize(text):
     sequence = np.array(text_to_sequence(text, ['english_cleaners']))[None, :]
     sequence = torch.from_numpy(sequence).to(device='cuda', dtype=torch.int64)
+    gst_head_scores = np.array([0.5, 0.15, 0.35])  # originally ([0.5, 0.15, 0.35])
+    gst_scores = torch.from_numpy(gst_head_scores).cuda().float()
     mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence, gst_scores)
     # mel2wav inference:
     audio_numpy = audio.data.cpu().detach().numpy()
+    return (22050, audio_numpy)
+init_models(hparams)
+iface = gr.Interface(fn=synthesize, inputs="text", outputs=[gr.Audio(label="Generated Speech", type="numpy"),])
+iface.launch()