YoussefSharawy91 commited on
Commit
fe9d571
·
verified ·
1 Parent(s): b163bfb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -56
app.py CHANGED
@@ -20,46 +20,60 @@ pipelines['a'].g2p.lexicon.golds['kokoro'] = 'kˈOkəɹO'
20
 
21
  def text_to_audio(text, speed=1.0):
22
  """Convert text to audio using Kokoro model.
 
23
  Args:
24
  text: The text to convert to speech
25
  speed: Speech speed multiplier (0.5-2.0, where 1.0 is normal speed)
26
- Returns:
 
27
  Audio data as a tuple of (sample_rate, audio_array)
28
  """
29
  if not text:
30
  return None
 
31
  pipeline = pipelines['a'] # Use English pipeline
32
  voice = "af_heart" # Default voice (US English, female, Heart)
 
33
  # Process the text
34
  pack = pipeline.load_voice(voice)
 
35
  for _, ps, _ in pipeline(text, voice, speed):
36
  ref_s = pack[len(ps)-1]
 
37
  # Generate audio
38
  try:
39
  audio = model(ps, ref_s, speed)
40
  except Exception as e:
41
  raise gr.Error(f"Error generating audio: {str(e)}")
 
42
  # Return the audio with 24kHz sample rate
43
  return 24000, audio.numpy()
 
44
  return None
45
 
46
  def text_to_audio_b64(text, speed=1.0):
47
  """Convert text to audio and return as base64 encoded WAV file.
 
48
  Args:
49
  text: The text to convert to speech
50
  speed: Speech speed multiplier (0.5-2.0, where 1.0 is normal speed)
 
51
  Returns:
52
  Base64 encoded WAV file as a string
53
  """
54
  import soundfile as sf
 
55
  result = text_to_audio(text, speed)
56
  if result is None:
57
  return None
 
58
  sample_rate, audio_data = result
 
59
  # Save to BytesIO object
60
  wav_io = io.BytesIO()
61
  sf.write(wav_io, audio_data, sample_rate, format='WAV')
62
  wav_io.seek(0)
 
63
  # Convert to base64
64
  wav_b64 = base64.b64encode(wav_io.read()).decode('utf-8')
65
  return wav_b64
@@ -68,64 +82,60 @@ def text_to_audio_b64(text, speed=1.0):
68
  with gr.Blocks(title="Kokoro Text-to-Audio MCP") as app:
69
  gr.Markdown("# 🎵 Kokoro Text-to-Audio MCP")
70
  gr.Markdown("Convert text to speech using the Kokoro-82M model")
71
- # Tab for Kokoro Text-to-Audio
72
- with gr.Tab("Kokoro Text-to-Audio"):
73
- with gr.Row():
74
- with gr.Column():
75
- text_input = gr.Textbox(
76
- label="Enter your text",
77
- placeholder="Type something to convert to audio...",
78
- lines=5
79
- )
80
- speed_slider = gr.Slider(
81
- minimum=0.5,
82
- maximum=2.0,
83
- value=1.0,
84
- step=0.1,
85
- label="Speech Speed"
86
- )
87
- submit_btn = gr.Button("Generate Audio")
88
- with gr.Column():
89
- audio_output = gr.Audio(label="Generated Audio", type="numpy")
90
- submit_btn.click(
91
- fn=text_to_audio,
92
- inputs=[text_input, speed_slider],
93
- outputs=[audio_output]
94
- )
95
- gr.Markdown("---") # Horizontal line for separation
96
- gr.Markdown("### Usage Tips")
97
- gr.Markdown("- Adjust the speed slider to modify the pace of speech")
98
- # Add section about MCP support
99
- with gr.Accordion("MCP Support (for LLMs)", open=False):
100
- gr.Markdown("""
101
- ### MCP Support
102
- This app supports the Model Context Protocol (MCP), allowing Large Language Models like Claude Desktop to use it as a tool.
103
- To use this app with an MCP client, add the following configuration:
104
- ```json
105
- {
106
- "mcpServers": {
107
- "kokoroTTS": {
108
- "url": "[https://fdaudens-kokoro-mcp.hf.space/gradio_api/mcp/sse](https://fdaudens-kokoro-mcp.hf.space/gradio_api/mcp/sse)"
109
- }
110
- }
 
 
 
111
  }
112
- ```
113
- Replace `your-app-url.hf.space` with your actual Hugging Face Space URL.
114
- """)
115
- # Tab for AIxCel Space using an iframe, now pointing to localhost
116
- with gr.Tab("AIxCel"):
117
- # *** CRUCIAL CHANGE HERE: iframe src points to localhost:8080 ***
118
- gr.HTML(
119
- '<iframe src="http://localhost:8080/" ' # Assuming AIxCel serves from root '/'
120
- 'style="width: 100%; height: 800px; border: none;" '
121
- 'allow="accelerometer; ambient-light-sensor; camera; encrypted-media; geolocation; gyroscope; hid; microphone; midi; clipboard-read; clipboard-write; web-share" '
122
- 'sandbox="allow-forms allow-modals allow-popups allow-presentation allow-same-origin allow-scripts">'
123
- '</iframe>'
124
- )
125
- gr.Markdown("If the AIxCel app does not load, please visit it directly: [AIxCel Space](https://huggingface.co/spaces/YoussefSharawy91/AIxCel)")
126
 
127
  # Launch the app with MCP support
128
  if __name__ == "__main__":
129
  # Check for environment variable to enable MCP
130
  enable_mcp = os.environ.get('GRADIO_MCP_SERVER', 'True').lower() in ('true', '1', 't')
131
- app.launch(mcp_server=enable_mcp)
 
 
20
 
21
  def text_to_audio(text, speed=1.0):
22
  """Convert text to audio using Kokoro model.
23
+
24
  Args:
25
  text: The text to convert to speech
26
  speed: Speech speed multiplier (0.5-2.0, where 1.0 is normal speed)
27
+
28
+ Returns:
29
  Audio data as a tuple of (sample_rate, audio_array)
30
  """
31
  if not text:
32
  return None
33
+
34
  pipeline = pipelines['a'] # Use English pipeline
35
  voice = "af_heart" # Default voice (US English, female, Heart)
36
+
37
  # Process the text
38
  pack = pipeline.load_voice(voice)
39
+
40
  for _, ps, _ in pipeline(text, voice, speed):
41
  ref_s = pack[len(ps)-1]
42
+
43
  # Generate audio
44
  try:
45
  audio = model(ps, ref_s, speed)
46
  except Exception as e:
47
  raise gr.Error(f"Error generating audio: {str(e)}")
48
+
49
  # Return the audio with 24kHz sample rate
50
  return 24000, audio.numpy()
51
+
52
  return None
53
 
54
  def text_to_audio_b64(text, speed=1.0):
55
  """Convert text to audio and return as base64 encoded WAV file.
56
+
57
  Args:
58
  text: The text to convert to speech
59
  speed: Speech speed multiplier (0.5-2.0, where 1.0 is normal speed)
60
+
61
  Returns:
62
  Base64 encoded WAV file as a string
63
  """
64
  import soundfile as sf
65
+
66
  result = text_to_audio(text, speed)
67
  if result is None:
68
  return None
69
+
70
  sample_rate, audio_data = result
71
+
72
  # Save to BytesIO object
73
  wav_io = io.BytesIO()
74
  sf.write(wav_io, audio_data, sample_rate, format='WAV')
75
  wav_io.seek(0)
76
+
77
  # Convert to base64
78
  wav_b64 = base64.b64encode(wav_io.read()).decode('utf-8')
79
  return wav_b64
 
82
  with gr.Blocks(title="Kokoro Text-to-Audio MCP") as app:
83
  gr.Markdown("# 🎵 Kokoro Text-to-Audio MCP")
84
  gr.Markdown("Convert text to speech using the Kokoro-82M model")
85
+
86
+ with gr.Row():
87
+ with gr.Column():
88
+ text_input = gr.Textbox(
89
+ label="Enter your text",
90
+ placeholder="Type something to convert to audio...",
91
+ lines=5
92
+ )
93
+ speed_slider = gr.Slider(
94
+ minimum=0.5,
95
+ maximum=2.0,
96
+ value=1.0,
97
+ step=0.1,
98
+ label="Speech Speed"
99
+ )
100
+ submit_btn = gr.Button("Generate Audio")
101
+
102
+ with gr.Column():
103
+ audio_output = gr.Audio(label="Generated Audio", type="numpy")
104
+
105
+ submit_btn.click(
106
+ fn=text_to_audio,
107
+ inputs=[text_input, speed_slider],
108
+ outputs=[audio_output]
109
+ )
110
+
111
+ gr.Markdown("### Usage Tips")
112
+ gr.Markdown("- Adjust the speed slider to modify the pace of speech")
113
+
114
+ # Add section about MCP support
115
+ with gr.Accordion("MCP Support (for LLMs)", open=False):
116
+ gr.Markdown("""
117
+ ### MCP Support
118
+
119
+ This app supports the Model Context Protocol (MCP), allowing Large Language Models like Claude Desktop to use it as a tool.
120
+
121
+ To use this app with an MCP client, add the following configuration:
122
+
123
+ ```json
124
+ {
125
+ "mcpServers": {
126
+ "kokoroTTS": {
127
+ "url": "https://fdaudens-kokoro-mcp.hf.space/gradio_api/mcp/sse"
128
  }
129
+ }
130
+ }
131
+ ```
132
+
133
+ Replace `your-app-url.hf.space` with your actual Hugging Face Space URL.
134
+ """)
 
 
 
 
 
 
 
 
135
 
136
  # Launch the app with MCP support
137
  if __name__ == "__main__":
138
  # Check for environment variable to enable MCP
139
  enable_mcp = os.environ.get('GRADIO_MCP_SERVER', 'True').lower() in ('true', '1', 't')
140
+
141
+ app.launch(mcp_server=True)