File size: 1,927 Bytes
9132718
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import gradio as gr
from kokoro_onnx import Kokoro
import soundfile as sf
import tempfile
import os

class TextToSpeechApp:
    def __init__(self):
        # Initialize Kokoro
        self.kokoro = Kokoro("kokoro-v0_19.onnx", "voices.json")
        
        # Available voices
        self.voices = [
            'af', 'af_bella', 'af_nicole', 'af_sarah', 'af_sky',
            'am_adam', 'am_michael', 'bf_emma', 'bf_isabella',
            'bm_george', 'bm_lewis'
        ]

    def generate_speech(self, text, voice, speed):
        try:
            # Generate audio
            samples, sample_rate = self.kokoro.create(
                text,
                voice=voice,
                speed=float(speed)
            )
            
            # Create temporary file
            temp_dir = tempfile.mkdtemp()
            temp_path = os.path.join(temp_dir, "output.wav")
            
            # Save to temporary file
            sf.write(temp_path, samples, sample_rate)
            
            return temp_path
            
        except Exception as e:
            return f"Error: {str(e)}"

    def create_interface(self):
        interface = gr.Interface(
            fn=self.generate_speech,
            inputs=[
                gr.Textbox(label="Enter text to convert", lines=5),
                gr.Dropdown(choices=self.voices, label="Select Voice", value=self.voices[0]),
                gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speech Speed")
            ],
            outputs=gr.Audio(label="Generated Speech"),
            title="Text to Speech Converter",
            description="Convert text to speech using different voices and speeds."
        )
        return interface

def main():
    app = TextToSpeechApp()
    interface = app.create_interface()
    # Launch with a public URL
    interface.launch(server_name="0.0.0.0", share=True)

if __name__ == "__main__":
    main()