Vishwas1 commited on
Commit
a0a99a6
·
verified ·
1 Parent(s): e327671

Upload 6 files

Browse files
Files changed (3) hide show
  1. README.md +2 -2
  2. app_minimal.py +61 -0
  3. requirements.txt +1 -1
README.md CHANGED
@@ -4,8 +4,8 @@ emoji: 🎤
4
  colorFrom: blue
5
  colorTo: purple
6
  sdk: gradio
7
- sdk_version: 4.44.1
8
- app_file: app_simple.py
9
  pinned: false
10
  license: mit
11
  ---
 
4
  colorFrom: blue
5
  colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 4.35.2
8
+ app_file: app_minimal.py
9
  pinned: false
10
  license: mit
11
  ---
app_minimal.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ from kittentts import KittenTTS
4
+
5
+ # Initialize the model
6
+ model = KittenTTS("KittenML/kitten-tts-nano-0.1")
7
+
8
+ # Available voices
9
+ AVAILABLE_VOICES = [
10
+ 'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f',
11
+ 'expr-voice-4-m', 'expr-voice-4-f', 'expr-voice-5-m', 'expr-voice-5-f'
12
+ ]
13
+
14
+ def generate_speech(text, voice):
15
+ """Generate speech from text using KittenTTS"""
16
+ if not text.strip():
17
+ return None, "Please enter some text to generate speech."
18
+
19
+ try:
20
+ # Generate audio
21
+ audio = model.generate(text, voice=voice)
22
+
23
+ # Convert to the format expected by Gradio
24
+ if len(audio.shape) > 1:
25
+ audio = audio.mean(axis=1) # Convert stereo to mono if needed
26
+
27
+ # Normalize audio
28
+ audio = audio / np.max(np.abs(audio)) if np.max(np.abs(audio)) > 0 else audio
29
+
30
+ return audio, f"✅ Successfully generated speech with voice: {voice}"
31
+
32
+ except Exception as e:
33
+ return None, f"❌ Error generating speech: {str(e)}"
34
+
35
+ # Create the interface using Interface instead of Blocks
36
+ demo = gr.Interface(
37
+ fn=generate_speech,
38
+ inputs=[
39
+ gr.Textbox(label="Enter your text", placeholder="Type your text here...", lines=3),
40
+ gr.Dropdown(choices=AVAILABLE_VOICES, value=AVAILABLE_VOICES[1], label="Select Voice")
41
+ ],
42
+ outputs=[
43
+ gr.Audio(label="Generated Audio"),
44
+ gr.Textbox(label="Status", interactive=False)
45
+ ],
46
+ title="🎤 KittenTTS - High Quality Text-to-Speech",
47
+ description="Generate natural-sounding speech from text using the KittenTTS model",
48
+ examples=[
49
+ ["Hello! This is a demonstration of the KittenTTS model.", "expr-voice-2-f"],
50
+ ["The quick brown fox jumps over the lazy dog.", "expr-voice-2-m"],
51
+ ["Welcome to our high-quality text-to-speech system.", "expr-voice-3-f"],
52
+ ]
53
+ )
54
+
55
+ # Launch the demo
56
+ if __name__ == "__main__":
57
+ demo.launch(
58
+ server_name="0.0.0.0",
59
+ server_port=7860,
60
+ share=True
61
+ )
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- gradio==4.44.1
2
  https://github.com/KittenML/KittenTTS/releases/download/0.1/kittentts-0.1.0-py3-none-any.whl
3
  soundfile
4
  numpy
 
1
+ gradio==4.35.2
2
  https://github.com/KittenML/KittenTTS/releases/download/0.1/kittentts-0.1.0-py3-none-any.whl
3
  soundfile
4
  numpy