SpeakerCreater / app.py
CazC's picture
Add text input and generate audio output
c607d95
raw
history blame
1.02 kB
import gradio as gr
import numpy as np
import scipy.io.wavfile
import torch
import torch.nn.functional as F
from whisperspeech.pipeline import Pipeline
import time
def process_audio(audio_elem,text="This is a test voice genereation"):
scipy.io.wavfile.write('test.mp3', audio_elem[0], audio_elem[1])
# print out details about ut
pipe = Pipeline(s2a_ref='collabora/whisperspeech:s2a-q4-base-en+pl.model')
# save audio_elem as a file
speaker = pipe.extract_spk_emb("test.mp3")
speaker2 = speaker.cpu().numpy() # Move tensor from GPU to CPU and convert to numpy array
#save it locally
np.savez_compressed("speaker", features=speaker2)
try:
pipe.generate_to_file('test.wav', text, lang='en', cps=10.5, speaker=speaker)
except Exception as e:
print("Error: ", e)
return "speaker.npz", "test.wav"
# Define Gradio interface
with gr.Interface(fn=process_audio, inputs=["audio","text"], outputs=["file",'audio']) as iface:
iface.launch()