File size: 1,015 Bytes
61fdf99
 
 
 
 
 
c607d95
61fdf99
c607d95
 
 
61fdf99
 
 
 
 
c607d95
61fdf99
c607d95
 
 
 
 
 
 
 
 
 
61fdf99
 
 
c607d95
61fdf99
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import gradio as gr
import numpy as np
import scipy.io.wavfile
import torch
import torch.nn.functional as F
from whisperspeech.pipeline import Pipeline
import time

def process_audio(audio_elem,text="This is a test voice genereation"):
    
    scipy.io.wavfile.write('test.mp3', audio_elem[0], audio_elem[1])
    
    # print out details about ut
    pipe = Pipeline(s2a_ref='collabora/whisperspeech:s2a-q4-base-en+pl.model')
    # save audio_elem as a file
    speaker = pipe.extract_spk_emb("test.mp3")
    speaker2 = speaker.cpu().numpy()  # Move tensor from GPU to CPU and convert to numpy array
    #save it locally
    np.savez_compressed("speaker", features=speaker2)
    try:

        pipe.generate_to_file('test.wav', text, lang='en', cps=10.5, speaker=speaker)
    except Exception as e:
        print("Error: ", e)
    
    return "speaker.npz", "test.wav"




# Define Gradio interface
with gr.Interface(fn=process_audio, inputs=["audio","text"], outputs=["file",'audio']) as iface:
    iface.launch()