github-actions[bot] commited on
Commit
69427f5
Β·
1 Parent(s): 675c4cb

πŸ€– Auto-deploy from GitHub (push) - fe69584 - 2025-07-28 05:16:36 UTC

Browse files
apps/gradio-app/README.md CHANGED
@@ -1,11 +1,12 @@
1
  # Fitness Gradio App
2
 
3
- Web interface for the Fitness AI Assistant using Gradio with voice input support.
4
 
5
  ## Features
6
 
7
  - Interactive chat interface with multimodal input
8
  - **Voice input** via microphone button (powered by Groq Whisper)
 
9
  - Multi-provider model support (Groq, Anthropic, OpenAI)
10
  - Real-time streaming responses
11
  - Fitness plan generation
@@ -14,7 +15,7 @@ Web interface for the Fitness AI Assistant using Gradio with voice input support
14
  ## Quick Start
15
 
16
  ```bash
17
- # Set your API key for voice functionality
18
  $env:GROQ_API_KEY = "your-groq-api-key"
19
 
20
  # Install and run
@@ -22,12 +23,19 @@ poetry install
22
  poetry run python -m fitness_gradio.main
23
  ```
24
 
25
- ## Voice Setup
26
 
27
  1. Get a [Groq API key](https://console.groq.com/keys)
28
  2. Set `GROQ_API_KEY` environment variable
29
- 3. Click the microphone button in the chat interface
30
- 4. Allow browser microphone access when prompted
 
 
 
 
 
 
 
31
 
32
  See [VOICE_SETUP.md](VOICE_SETUP.md) for detailed setup instructions.
33
 
 
1
  # Fitness Gradio App
2
 
3
+ Web interface for the Fitness AI Assistant using Gradio with voice input and text-to-speech support.
4
 
5
  ## Features
6
 
7
  - Interactive chat interface with multimodal input
8
  - **Voice input** via microphone button (powered by Groq Whisper)
9
+ - **Text-to-Speech output** with 19 English and 4 Arabic voices (powered by Groq PlayAI TTS)
10
  - Multi-provider model support (Groq, Anthropic, OpenAI)
11
  - Real-time streaming responses
12
  - Fitness plan generation
 
15
  ## Quick Start
16
 
17
  ```bash
18
+ # Set your API key for voice functionality and TTS
19
  $env:GROQ_API_KEY = "your-groq-api-key"
20
 
21
  # Install and run
 
23
  poetry run python -m fitness_gradio.main
24
  ```
25
 
26
+ ## Voice & TTS Setup
27
 
28
  1. Get a [Groq API key](https://console.groq.com/keys)
29
  2. Set `GROQ_API_KEY` environment variable
30
+ 3. **Voice Input**: Click the microphone button in the chat interface
31
+ 4. **Text-to-Speech**: Enable the "πŸ”Š Enable Text-to-Speech" checkbox
32
+ 5. Allow browser microphone access when prompted (for voice input)
33
+
34
+ ### Available TTS Voices
35
+
36
+ **English (playai-tts)**: 19 voices including Celeste-PlayAI (default), Fritz-PlayAI, Arista-PlayAI, Atlas-PlayAI, Basil-PlayAI, Briggs-PlayAI, Calum-PlayAI, Cheyenne-PlayAI, and more.
37
+
38
+ **Arabic (playai-tts-arabic)**: 4 voices including Amira-PlayAI (default), Ahmad-PlayAI, Khalid-PlayAI, Nasser-PlayAI.
39
 
40
  See [VOICE_SETUP.md](VOICE_SETUP.md) for detailed setup instructions.
41
 
apps/gradio-app/src/fitness_gradio/ui/app.py CHANGED
@@ -36,11 +36,16 @@ class FitnessAppUI:
36
  (model_dropdown, selected_model) = UIComponents.create_model_selection_section()
37
 
38
  # Main chat interface
39
- chatbot = UIComponents.create_chatbot()
 
 
 
 
 
40
  chat_input = UIComponents.create_chat_input()
41
 
42
  # Control buttons
43
- clear_btn, streaming_toggle = UIComponents.create_control_buttons()
44
 
45
  # Examples section
46
  UIComponents.create_examples_section(chat_input)
@@ -51,8 +56,8 @@ class FitnessAppUI:
51
 
52
  # Event handlers
53
  self._setup_event_handlers(
54
- chatbot, chat_input, clear_btn, streaming_toggle,
55
- model_dropdown, selected_model
56
  )
57
 
58
  def _setup_event_handlers(
@@ -61,8 +66,10 @@ class FitnessAppUI:
61
  chat_input: gr.MultimodalTextbox,
62
  clear_btn: gr.Button,
63
  streaming_toggle: gr.Checkbox,
 
64
  model_dropdown: gr.Dropdown,
65
- selected_model: gr.Textbox
 
66
  ) -> None:
67
  """Set up all event handlers."""
68
 
@@ -70,12 +77,13 @@ class FitnessAppUI:
70
  chat_msg = chat_input.submit(
71
  UIHandlers.add_message_with_audio,
72
  [chatbot, chat_input],
73
- [chatbot, chat_input]
 
74
  )
75
  bot_msg = chat_msg.then(
76
  UIHandlers.dynamic_bot,
77
- [chatbot, streaming_toggle, selected_model],
78
- chatbot,
79
  api_name="bot_response"
80
  )
81
  bot_msg.then(lambda: gr.MultimodalTextbox(interactive=True), None, [chat_input])
 
36
  (model_dropdown, selected_model) = UIComponents.create_model_selection_section()
37
 
38
  # Main chat interface
39
+ with gr.Row():
40
+ with gr.Column():
41
+ chatbot = UIComponents.create_chatbot()
42
+ with gr.Column(scale=0.3):
43
+ output_audio = UIComponents.create_output_audio()
44
+
45
  chat_input = UIComponents.create_chat_input()
46
 
47
  # Control buttons
48
+ clear_btn, streaming_toggle, tts_toggle = UIComponents.create_control_buttons()
49
 
50
  # Examples section
51
  UIComponents.create_examples_section(chat_input)
 
56
 
57
  # Event handlers
58
  self._setup_event_handlers(
59
+ chatbot, chat_input, clear_btn, streaming_toggle, tts_toggle,
60
+ model_dropdown, selected_model, output_audio
61
  )
62
 
63
  def _setup_event_handlers(
 
66
  chat_input: gr.MultimodalTextbox,
67
  clear_btn: gr.Button,
68
  streaming_toggle: gr.Checkbox,
69
+ tts_toggle: gr.Checkbox,
70
  model_dropdown: gr.Dropdown,
71
+ selected_model: gr.Textbox,
72
+ output_audio: gr.Audio
73
  ) -> None:
74
  """Set up all event handlers."""
75
 
 
77
  chat_msg = chat_input.submit(
78
  UIHandlers.add_message_with_audio,
79
  [chatbot, chat_input],
80
+ [chatbot, chat_input],
81
+ queue=False
82
  )
83
  bot_msg = chat_msg.then(
84
  UIHandlers.dynamic_bot,
85
+ [chatbot, streaming_toggle, tts_toggle, selected_model],
86
+ [chatbot, output_audio],
87
  api_name="bot_response"
88
  )
89
  bot_msg.then(lambda: gr.MultimodalTextbox(interactive=True), None, [chat_input])
apps/gradio-app/src/fitness_gradio/ui/components.py CHANGED
@@ -158,6 +158,18 @@ class UIComponents:
158
  render_markdown=True
159
  )
160
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  @staticmethod
162
  def create_chat_input() -> gr.MultimodalTextbox:
163
  """Create the chat input component."""
@@ -166,16 +178,17 @@ class UIComponents:
166
  file_count="multiple",
167
  placeholder="Ask me about fitness, request a workout plan, or get meal planning advice...",
168
  show_label=False,
169
- sources=["microphone", "upload"], # Re-enable microphone in multimodal for the circular button
 
170
  )
171
 
172
  @staticmethod
173
  def create_control_buttons() -> tuple:
174
  """
175
- Create the control buttons (clear, streaming toggle).
176
 
177
  Returns:
178
- Tuple of (clear_btn, streaming_toggle)
179
  """
180
  with gr.Row():
181
  clear_btn = gr.Button("πŸ—‘οΈ Clear Conversation", variant="secondary", size="sm")
@@ -184,8 +197,13 @@ class UIComponents:
184
  value=True,
185
  info="Stream responses in real-time as the agent generates them"
186
  )
 
 
 
 
 
187
 
188
- return clear_btn, streaming_toggle
189
 
190
  @staticmethod
191
  def create_examples_section(chat_input: gr.MultimodalTextbox) -> gr.Examples:
 
158
  render_markdown=True
159
  )
160
 
161
+ @staticmethod
162
+ def create_output_audio() -> gr.Audio:
163
+ """Create the output audio component for TTS responses."""
164
+ return gr.Audio(
165
+ label="πŸ”Š Audio Response",
166
+ streaming=False, # Disable streaming to avoid ffmpeg issues
167
+ autoplay=True,
168
+ show_download_button=True,
169
+ show_share_button=False,
170
+ format="wav" # Explicitly set format to WAV
171
+ )
172
+
173
  @staticmethod
174
  def create_chat_input() -> gr.MultimodalTextbox:
175
  """Create the chat input component."""
 
178
  file_count="multiple",
179
  placeholder="Ask me about fitness, request a workout plan, or get meal planning advice...",
180
  show_label=False,
181
+ sources=["microphone", "upload"], # Enable microphone and file uploads
182
+ submit_btn=True, # Ensure submit button is available
183
  )
184
 
185
  @staticmethod
186
  def create_control_buttons() -> tuple:
187
  """
188
+ Create the control buttons (clear, streaming toggle, TTS toggle).
189
 
190
  Returns:
191
+ Tuple of (clear_btn, streaming_toggle, tts_toggle)
192
  """
193
  with gr.Row():
194
  clear_btn = gr.Button("πŸ—‘οΈ Clear Conversation", variant="secondary", size="sm")
 
197
  value=True,
198
  info="Stream responses in real-time as the agent generates them"
199
  )
200
+ tts_toggle = gr.Checkbox(
201
+ label="πŸ”Š Enable Text-to-Speech",
202
+ value=False,
203
+ info="Convert AI responses to speech using Groq's TTS models"
204
+ )
205
 
206
+ return clear_btn, streaming_toggle, tts_toggle
207
 
208
  @staticmethod
209
  def create_examples_section(chat_input: gr.MultimodalTextbox) -> gr.Examples:
apps/gradio-app/src/fitness_gradio/ui/handlers.py CHANGED
@@ -9,6 +9,7 @@ from typing import List, Dict, Union, Generator, Any, Tuple, Optional
9
  from fitness_core.agents import FitnessAgent
10
  from fitness_core.services import ConversationManager, AgentRunner, ResponseFormatter
11
  from fitness_core.utils import get_logger
 
12
 
13
  logger = get_logger(__name__)
14
 
@@ -137,60 +138,100 @@ Please check your API keys and try a different model."""
137
  Tuple of (updated_history, cleared_input)
138
  """
139
  try:
 
140
  user_content_parts = []
 
 
141
 
142
  # Handle file uploads (including audio from microphone)
143
  if message.get("files"):
 
144
  for file_path in message["files"]:
145
  if file_path: # Validate file path exists
 
146
  # Check if this is an audio file (from microphone recording)
147
  if UIHandlers.is_audio_file(file_path):
148
- logger.info(f"Processing audio file: {file_path}")
149
  # Process audio file for transcription
150
  transcribed_text = UIHandlers.process_audio_file(file_path)
151
 
152
  if transcribed_text and not transcribed_text.startswith("["):
153
- # Add voice message indicator to the text
154
- display_text = f"🎀 {transcribed_text}"
155
  user_content_parts.append(transcribed_text) # Add clean text to conversation
156
-
157
- # Add to Gradio history for display
158
- history.append({
159
- "role": "user",
160
- "content": display_text
161
- })
162
  else:
163
- # Show transcription error
164
- history.append({
165
- "role": "user",
166
- "content": f"🎀 {transcribed_text}"
167
- })
168
  else:
169
  # Handle non-audio file uploads
170
  file_content = f"[File uploaded: {file_path}]"
171
  user_content_parts.append(file_content)
172
- # Add to Gradio history for display
173
- history.append({
174
- "role": "user",
175
- "content": {"path": file_path}
176
- })
177
 
178
  # Handle text input
 
179
  if message.get("text") and message["text"].strip():
180
  text_content = message["text"].strip()
181
  user_content_parts.append(text_content)
182
- # Add to Gradio history for display
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  history.append({
184
- "role": "user",
185
  "content": text_content
186
  })
 
 
 
 
 
 
 
 
 
 
 
187
 
188
  # Add to conversation manager (combine all content)
189
  if user_content_parts:
190
  combined_content = "\n".join(user_content_parts)
191
  conversation_manager.add_user_message(combined_content)
192
- logger.info(f"Added user message to conversation. {conversation_manager.get_history_summary()}")
 
 
193
 
 
194
  return history, gr.MultimodalTextbox(value=None, interactive=False)
195
 
196
  except Exception as e:
@@ -264,17 +305,19 @@ Please check your API keys and try a different model."""
264
  @staticmethod
265
  def bot_with_real_streaming(
266
  history: List[Dict],
267
- model_name: str = None
268
- ) -> Generator[List[Dict], None, None]:
 
269
  """
270
  Bot function with real-time streaming from the agent
271
 
272
  Args:
273
  history: Current Gradio chat history (for display only)
274
  model_name: Model to use for the agent
 
275
 
276
  Yields:
277
- Updated history with real-time streaming response
278
  """
279
  try:
280
  # Get agent instance with specified model
@@ -294,15 +337,18 @@ Please check your API keys and try a different model."""
294
  try:
295
  content_chunks = []
296
  final_result = None
 
297
 
298
  for chunk in AgentRunner.run_agent_with_streaming_sync(agent, agent_input):
299
  if chunk['type'] == 'final_result':
300
  final_result = chunk['result']
301
  if chunk['content']:
302
  content_chunks.append(chunk['content'])
 
303
  elif chunk['type'] == 'error':
304
  final_result = chunk['result']
305
  content_chunks.append(chunk['content'])
 
306
 
307
  # Update conversation manager
308
  if final_result:
@@ -313,34 +359,50 @@ Please check your API keys and try a different model."""
313
  if content_chunks:
314
  for content in content_chunks:
315
  history[-1]["content"] = content
316
- yield history
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  else:
318
- history[-1]["content"] = "I apologize, but I didn't receive a response. Please try again."
319
- yield history
 
320
 
321
  except Exception as e:
322
  logger.error(f"Error in streaming execution: {str(e)}")
323
- history[-1]["content"] = f"Sorry, I encountered an error while processing your request: {str(e)}"
324
- yield history
 
325
 
326
  except Exception as e:
327
  logger.error(f"Bot streaming function error: {str(e)}")
328
  if len(history) == 0 or history[-1].get("role") != "assistant":
329
  history.append({"role": "assistant", "content": ""})
330
  history[-1]["content"] = "I apologize, but I'm experiencing technical difficulties. Please try again in a moment."
331
- yield history
332
 
333
  @staticmethod
334
- def bot(history: List[Dict], model_name: str = None) -> Generator[List[Dict], None, None]:
335
  """
336
  Main bot function with simulated streaming
337
 
338
  Args:
339
  history: Current Gradio chat history (for display only)
340
  model_name: Model to use for the agent
 
341
 
342
  Yields:
343
- Updated history with bot response
344
  """
345
  try:
346
  # Get agent instance with specified model
@@ -361,36 +423,50 @@ Please check your API keys and try a different model."""
361
  response = ResponseFormatter.extract_response_content(result)
362
 
363
  # Stream the response with simulated typing
364
- yield from ResponseFormatter.stream_response(response, history)
 
 
 
 
 
 
 
 
 
 
 
365
 
366
  except Exception as e:
367
  logger.error(f"Bot function error: {str(e)}")
368
  error_response = "I apologize, but I'm experiencing technical difficulties. Please try again in a moment."
369
- yield from ResponseFormatter.stream_response(error_response, history)
 
370
 
371
  @staticmethod
372
  def dynamic_bot(
373
  history: List[Dict],
374
- use_real_streaming: bool = True,
 
375
  model_name: str = None
376
- ) -> Generator[List[Dict], None, None]:
377
  """
378
- Dynamic bot function that can switch between streaming modes
379
 
380
  Args:
381
  history: Current Gradio chat history (for display only)
382
  use_real_streaming: Whether to use real-time streaming from agent
 
383
  model_name: Model to use for the agent
384
 
385
  Yields:
386
- Updated history with bot response
387
  """
388
  if use_real_streaming:
389
  logger.info("Using real-time streaming mode")
390
- yield from UIHandlers.bot_with_real_streaming(history, model_name)
391
  else:
392
  logger.info("Using simulated streaming mode")
393
- yield from UIHandlers.bot(history, model_name)
394
 
395
  @staticmethod
396
  def clear_conversation() -> List[Dict]:
@@ -404,3 +480,41 @@ Please check your API keys and try a different model."""
404
  conversation_manager.clear_history()
405
  logger.info("Conversation history cleared")
406
  return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  from fitness_core.agents import FitnessAgent
10
  from fitness_core.services import ConversationManager, AgentRunner, ResponseFormatter
11
  from fitness_core.utils import get_logger
12
+ from .tts_utils import generate_speech_for_text, generate_speech_for_session, clean_tts_markup
13
 
14
  logger = get_logger(__name__)
15
 
 
138
  Tuple of (updated_history, cleared_input)
139
  """
140
  try:
141
+ logger.info(f"Processing message: {message}")
142
  user_content_parts = []
143
+ has_audio_content = False
144
+ audio_transcription = None
145
 
146
  # Handle file uploads (including audio from microphone)
147
  if message.get("files"):
148
+ logger.info(f"Found {len(message['files'])} files in message")
149
  for file_path in message["files"]:
150
  if file_path: # Validate file path exists
151
+ logger.info(f"Processing file: {file_path}")
152
  # Check if this is an audio file (from microphone recording)
153
  if UIHandlers.is_audio_file(file_path):
154
+ logger.info(f"Detected audio file: {file_path}")
155
  # Process audio file for transcription
156
  transcribed_text = UIHandlers.process_audio_file(file_path)
157
 
158
  if transcribed_text and not transcribed_text.startswith("["):
159
+ audio_transcription = transcribed_text
 
160
  user_content_parts.append(transcribed_text) # Add clean text to conversation
161
+ has_audio_content = True
162
+ logger.info(f"Successfully transcribed audio: '{transcribed_text[:50]}...'")
 
 
 
 
163
  else:
164
+ # Handle transcription error
165
+ audio_transcription = transcribed_text
166
+ has_audio_content = True # Still mark as audio content even if failed
167
+ logger.warning(f"Audio transcription failed: {transcribed_text}")
 
168
  else:
169
  # Handle non-audio file uploads
170
  file_content = f"[File uploaded: {file_path}]"
171
  user_content_parts.append(file_content)
172
+ logger.info(f"Added file upload to content: {file_path}")
173
+ else:
174
+ logger.info("No files found in message")
 
 
175
 
176
  # Handle text input
177
+ text_content = None
178
  if message.get("text") and message["text"].strip():
179
  text_content = message["text"].strip()
180
  user_content_parts.append(text_content)
181
+ logger.info(f"Found text content: '{text_content[:50]}...'")
182
+ else:
183
+ logger.info("No text content found in message")
184
+
185
+ # Add appropriate message to chat history
186
+ if has_audio_content and audio_transcription:
187
+ if audio_transcription.startswith("["):
188
+ # Transcription error - show error message
189
+ display_text = f"🎀 {audio_transcription}"
190
+ else:
191
+ # Successful transcription - show with microphone icon
192
+ display_text = f"🎀 {audio_transcription}"
193
+
194
+ history.append({
195
+ "role": "user",
196
+ "content": display_text
197
+ })
198
+ logger.info(f"Added audio message to chat history: '{display_text}'")
199
+
200
+ # If there's also text content, add it separately
201
+ if text_content:
202
+ history.append({
203
+ "role": "user",
204
+ "content": text_content
205
+ })
206
+ logger.info(f"Added additional text content to history: '{text_content[:50]}...'")
207
+
208
+ elif text_content:
209
+ # Only text content, no audio
210
  history.append({
211
+ "role": "user",
212
  "content": text_content
213
  })
214
+ logger.info(f"Added text-only message to chat history: '{text_content[:50]}...'")
215
+
216
+ elif message.get("files") and not has_audio_content:
217
+ # File uploads that aren't audio
218
+ for file_path in message["files"]:
219
+ if file_path and not UIHandlers.is_audio_file(file_path):
220
+ history.append({
221
+ "role": "user",
222
+ "content": {"path": file_path}
223
+ })
224
+ logger.info(f"Added file upload to history: {file_path}")
225
 
226
  # Add to conversation manager (combine all content)
227
  if user_content_parts:
228
  combined_content = "\n".join(user_content_parts)
229
  conversation_manager.add_user_message(combined_content)
230
+ logger.info(f"Added user message to conversation manager. Content parts: {len(user_content_parts)}, Combined: '{combined_content[:100]}...', {conversation_manager.get_history_summary()}")
231
+ else:
232
+ logger.warning("No user content parts found in message - this may indicate an issue")
233
 
234
+ logger.info(f"Final history length: {len(history)}")
235
  return history, gr.MultimodalTextbox(value=None, interactive=False)
236
 
237
  except Exception as e:
 
305
  @staticmethod
306
  def bot_with_real_streaming(
307
  history: List[Dict],
308
+ model_name: str = None,
309
+ use_tts: bool = False
310
+ ) -> Generator[Tuple[List[Dict], Optional[str]], None, None]:
311
  """
312
  Bot function with real-time streaming from the agent
313
 
314
  Args:
315
  history: Current Gradio chat history (for display only)
316
  model_name: Model to use for the agent
317
+ use_tts: Whether to generate text-to-speech for the response
318
 
319
  Yields:
320
+ Tuple of (Updated history, audio_file_path or None)
321
  """
322
  try:
323
  # Get agent instance with specified model
 
337
  try:
338
  content_chunks = []
339
  final_result = None
340
+ final_content = ""
341
 
342
  for chunk in AgentRunner.run_agent_with_streaming_sync(agent, agent_input):
343
  if chunk['type'] == 'final_result':
344
  final_result = chunk['result']
345
  if chunk['content']:
346
  content_chunks.append(chunk['content'])
347
+ final_content = chunk['content']
348
  elif chunk['type'] == 'error':
349
  final_result = chunk['result']
350
  content_chunks.append(chunk['content'])
351
+ final_content = chunk['content']
352
 
353
  # Update conversation manager
354
  if final_result:
 
359
  if content_chunks:
360
  for content in content_chunks:
361
  history[-1]["content"] = content
362
+ yield history, None # No audio during streaming
363
+ final_content = content
364
+
365
+ # Generate TTS for the final response if enabled
366
+ if use_tts and final_content:
367
+ audio_file = UIHandlers._generate_tts_for_response_sync(final_content)
368
+ if audio_file:
369
+ # Return the final history with the audio file
370
+ yield history, audio_file
371
+ else:
372
+ yield history, None
373
+ else:
374
+ yield history, None
375
+
376
  else:
377
+ error_msg = "I apologize, but I didn't receive a response. Please try again."
378
+ history[-1]["content"] = error_msg
379
+ yield history, None
380
 
381
  except Exception as e:
382
  logger.error(f"Error in streaming execution: {str(e)}")
383
+ error_msg = f"Sorry, I encountered an error while processing your request: {str(e)}"
384
+ history[-1]["content"] = error_msg
385
+ yield history, None
386
 
387
  except Exception as e:
388
  logger.error(f"Bot streaming function error: {str(e)}")
389
  if len(history) == 0 or history[-1].get("role") != "assistant":
390
  history.append({"role": "assistant", "content": ""})
391
  history[-1]["content"] = "I apologize, but I'm experiencing technical difficulties. Please try again in a moment."
392
+ yield history, None
393
 
394
  @staticmethod
395
+ def bot(history: List[Dict], model_name: str = None, use_tts: bool = False) -> Generator[Tuple[List[Dict], Optional[str]], None, None]:
396
  """
397
  Main bot function with simulated streaming
398
 
399
  Args:
400
  history: Current Gradio chat history (for display only)
401
  model_name: Model to use for the agent
402
+ use_tts: Whether to generate text-to-speech for the response
403
 
404
  Yields:
405
+ Tuple of (Updated history, audio_file_path or None)
406
  """
407
  try:
408
  # Get agent instance with specified model
 
423
  response = ResponseFormatter.extract_response_content(result)
424
 
425
  # Stream the response with simulated typing
426
+ for updated_history in ResponseFormatter.stream_response(response, history):
427
+ yield updated_history, None # No audio during streaming
428
+
429
+ # Generate TTS for the final response if enabled
430
+ if use_tts and response:
431
+ audio_file = UIHandlers._generate_tts_for_response_sync(response)
432
+ if audio_file:
433
+ yield history, audio_file
434
+ else:
435
+ yield history, None
436
+ else:
437
+ yield history, None
438
 
439
  except Exception as e:
440
  logger.error(f"Bot function error: {str(e)}")
441
  error_response = "I apologize, but I'm experiencing technical difficulties. Please try again in a moment."
442
+ for updated_history in ResponseFormatter.stream_response(error_response, history):
443
+ yield updated_history, None
444
 
445
  @staticmethod
446
  def dynamic_bot(
447
  history: List[Dict],
448
+ use_real_streaming: bool = True,
449
+ use_tts: bool = False,
450
  model_name: str = None
451
+ ) -> Generator[Tuple[List[Dict], Optional[str]], None, None]:
452
  """
453
+ Dynamic bot function that can switch between streaming modes and TTS
454
 
455
  Args:
456
  history: Current Gradio chat history (for display only)
457
  use_real_streaming: Whether to use real-time streaming from agent
458
+ use_tts: Whether to generate text-to-speech for the response
459
  model_name: Model to use for the agent
460
 
461
  Yields:
462
+ Tuple of (Updated history, audio_file_path or None)
463
  """
464
  if use_real_streaming:
465
  logger.info("Using real-time streaming mode")
466
+ yield from UIHandlers.bot_with_real_streaming(history, model_name, use_tts)
467
  else:
468
  logger.info("Using simulated streaming mode")
469
+ yield from UIHandlers.bot(history, model_name, use_tts)
470
 
471
  @staticmethod
472
  def clear_conversation() -> List[Dict]:
 
480
  conversation_manager.clear_history()
481
  logger.info("Conversation history cleared")
482
  return []
483
+
484
+ @staticmethod
485
+ def _generate_tts_for_response_sync(text: str) -> Optional[str]:
486
+ """
487
+ Generate TTS audio for a response text synchronously.
488
+
489
+ Args:
490
+ text: The text to convert to speech
491
+
492
+ Returns:
493
+ Path to generated audio file or None if error
494
+ """
495
+ try:
496
+ if not text or not text.strip():
497
+ return None
498
+
499
+ # Clean the text for TTS
500
+ clean_text = clean_tts_markup(text)
501
+
502
+ # Limit text length for TTS (Groq has 10K char limit)
503
+ if len(clean_text) > 8000: # Leave some buffer
504
+ clean_text = clean_text[:8000] + "..."
505
+ logger.info(f"Truncated TTS text to 8000 characters")
506
+
507
+ logger.info(f"Generating TTS for response ({len(clean_text)} chars)")
508
+
509
+ # Generate TTS using session persistence
510
+ audio_file = generate_speech_for_session(clean_text)
511
+ if audio_file:
512
+ logger.info(f"TTS audio generated: {audio_file}")
513
+ return audio_file
514
+ else:
515
+ logger.warning("Failed to generate TTS audio")
516
+ return None
517
+
518
+ except Exception as e:
519
+ logger.error(f"TTS generation error: {str(e)}")
520
+ return None
apps/gradio-app/src/fitness_gradio/ui/styles.py CHANGED
@@ -53,6 +53,30 @@ MAIN_CSS = """
53
  font-weight: bold;
54
  }
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  /* Ensure all text in model-info respects dark theme */
57
  .model-info * {
58
  color: inherit !important;
@@ -88,7 +112,9 @@ HELP_CONTENT = """
88
  **How to interact:**
89
  - **πŸ’¬ Type messages** in the text box
90
  - **🎀 Record voice messages** using the circular microphone button (requires Groq API key)
91
- - **πŸ“Ž Upload files** if needed for context
 
 
92
 
93
  **To get the best results:**
94
  - Tell me your fitness level (beginner, intermediate, advanced)
@@ -98,6 +124,12 @@ HELP_CONTENT = """
98
 
99
  **Voice Input Setup:**
100
  - Set your `GROQ_API_KEY` environment variable to enable voice transcription
 
 
 
 
 
 
101
  - Click the circular microphone icon in the input box and speak your message
102
  - The system will convert your speech to text automatically using Groq's Whisper
103
 
 
53
  font-weight: bold;
54
  }
55
 
56
+ /* TTS control styling */
57
+ .tts-checkbox {
58
+ background: linear-gradient(135deg, rgba(99, 102, 241, 0.1), rgba(139, 92, 246, 0.1)) !important;
59
+ border: 1px solid rgba(99, 102, 241, 0.3) !important;
60
+ border-radius: 8px !important;
61
+ padding: 8px !important;
62
+ transition: all 0.3s ease !important;
63
+ }
64
+
65
+ .tts-checkbox:hover {
66
+ background: linear-gradient(135deg, rgba(99, 102, 241, 0.2), rgba(139, 92, 246, 0.2)) !important;
67
+ border-color: rgba(99, 102, 241, 0.5) !important;
68
+ }
69
+
70
+ .tts-active {
71
+ animation: pulse-tts 2s infinite;
72
+ }
73
+
74
+ @keyframes pulse-tts {
75
+ 0% { box-shadow: 0 0 0 0 rgba(99, 102, 241, 0.7); }
76
+ 70% { box-shadow: 0 0 0 10px rgba(99, 102, 241, 0); }
77
+ 100% { box-shadow: 0 0 0 0 rgba(99, 102, 241, 0); }
78
+ }
79
+
80
  /* Ensure all text in model-info respects dark theme */
81
  .model-info * {
82
  color: inherit !important;
 
112
  **How to interact:**
113
  - **πŸ’¬ Type messages** in the text box
114
  - **🎀 Record voice messages** using the circular microphone button (requires Groq API key)
115
+ - **οΏ½ Enable Text-to-Speech** to hear AI responses spoken aloud (requires Groq API key)
116
+ - **πŸš€ Enable Real-time Streaming** for faster response display
117
+ - **οΏ½πŸ“Ž Upload files** if needed for context
118
 
119
  **To get the best results:**
120
  - Tell me your fitness level (beginner, intermediate, advanced)
 
124
 
125
  **Voice Input Setup:**
126
  - Set your `GROQ_API_KEY` environment variable to enable voice transcription
127
+
128
+ **Text-to-Speech Setup:**
129
+ - Set your `GROQ_API_KEY` environment variable to enable audio generation
130
+ - Choose from 19 English voices or 4 Arabic voices
131
+ - Audio is automatically generated when TTS is enabled
132
+ - Responses are cleaned of markdown formatting for better speech quality
133
  - Click the circular microphone icon in the input box and speak your message
134
  - The system will convert your speech to text automatically using Groq's Whisper
135
 
apps/gradio-app/src/fitness_gradio/ui/tts_utils.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Text-to-Speech utilities using Groq's TTS models.
3
+ """
4
+ import os
5
+ import tempfile
6
+ import logging
7
+ import requests
8
+ from typing import Optional, Union
9
+ from pathlib import Path
10
+
11
+ from fitness_core.utils import get_logger
12
+
13
+ logger = get_logger(__name__)
14
+
15
+
16
+ class GroqTTS:
17
+ """Groq Text-to-Speech service wrapper."""
18
+
19
+ # Available English voices for playai-tts
20
+ ENGLISH_VOICES = [
21
+ "Arista-PlayAI", "Atlas-PlayAI", "Basil-PlayAI", "Briggs-PlayAI",
22
+ "Calum-PlayAI", "Celeste-PlayAI", "Cheyenne-PlayAI", "Chip-PlayAI",
23
+ "Cillian-PlayAI", "Deedee-PlayAI", "Fritz-PlayAI", "Gail-PlayAI",
24
+ "Indigo-PlayAI", "Mamaw-PlayAI", "Mason-PlayAI", "Mikail-PlayAI",
25
+ "Mitch-PlayAI", "Quinn-PlayAI", "Thunder-PlayAI"
26
+ ]
27
+
28
+ # Available Arabic voices for playai-tts-arabic
29
+ ARABIC_VOICES = [
30
+ "Ahmad-PlayAI", "Amira-PlayAI", "Khalid-PlayAI", "Nasser-PlayAI"
31
+ ]
32
+
33
+ # Default voice selections
34
+ DEFAULT_ENGLISH_VOICE = "Celeste-PlayAI" # Pleasant female voice
35
+ DEFAULT_ARABIC_VOICE = "Amira-PlayAI" # Pleasant female voice
36
+
37
+ def __init__(self, api_key: Optional[str] = None):
38
+ """
39
+ Initialize the GroqTTS client.
40
+
41
+ Args:
42
+ api_key: Groq API key. If None, will try to get from GROQ_API_KEY env var.
43
+ """
44
+ self.api_key = api_key or os.getenv("GROQ_API_KEY")
45
+ if not self.api_key:
46
+ raise ValueError("Groq API key is required. Set GROQ_API_KEY environment variable or pass api_key parameter.")
47
+
48
+ self.temp_dir = Path(tempfile.gettempdir()) / "fitness_app_tts"
49
+ self.temp_dir.mkdir(exist_ok=True)
50
+
51
+ # Create a session-specific directory for persistent audio files
52
+ self.session_dir = self.temp_dir / "session_audio"
53
+ self.session_dir.mkdir(exist_ok=True)
54
+
55
+ def text_to_speech(
56
+ self,
57
+ text: str,
58
+ voice: Optional[str] = None,
59
+ model: str = "playai-tts",
60
+ response_format: str = "wav",
61
+ output_file: Optional[Union[str, Path]] = None
62
+ ) -> Optional[str]:
63
+ """
64
+ Convert text to speech using Groq's TTS API.
65
+
66
+ Args:
67
+ text: Text to convert to speech (max 10K characters)
68
+ voice: Voice to use. If None, uses default voice based on model
69
+ model: TTS model to use ("playai-tts" or "playai-tts-arabic")
70
+ response_format: Audio format ("wav")
71
+ output_file: Path to save audio file. If None, creates temp file
72
+
73
+ Returns:
74
+ Path to the generated audio file, or None if error
75
+ """
76
+ try:
77
+ # Validate text length
78
+ if len(text) > 10000:
79
+ logger.warning(f"Text too long ({len(text)} chars), truncating to 10K characters")
80
+ text = text[:10000]
81
+
82
+ # Set default voice based on model
83
+ if voice is None:
84
+ if model == "playai-tts-arabic":
85
+ voice = self.DEFAULT_ARABIC_VOICE
86
+ else:
87
+ voice = self.DEFAULT_ENGLISH_VOICE
88
+
89
+ # Validate voice for model
90
+ if model == "playai-tts" and voice not in self.ENGLISH_VOICES:
91
+ logger.warning(f"Voice {voice} not valid for English model, using default")
92
+ voice = self.DEFAULT_ENGLISH_VOICE
93
+ elif model == "playai-tts-arabic" and voice not in self.ARABIC_VOICES:
94
+ logger.warning(f"Voice {voice} not valid for Arabic model, using default")
95
+ voice = self.DEFAULT_ARABIC_VOICE
96
+
97
+ # Create output file path - use session directory for persistence
98
+ if output_file is None:
99
+ # Create a unique filename using a hash of the text and timestamp
100
+ import time
101
+ timestamp = int(time.time() * 1000) # milliseconds for uniqueness
102
+ text_hash = hash(text) % 100000
103
+ output_file = self.session_dir / f"tts_output_{text_hash}_{timestamp}.wav"
104
+ else:
105
+ output_file = Path(output_file)
106
+
107
+ logger.info(f"Generating TTS for {len(text)} chars using {model} with {voice}")
108
+
109
+ # Generate speech using the correct API structure
110
+ headers = {
111
+ "Authorization": f"Bearer {self.api_key}",
112
+ "Content-Type": "application/json"
113
+ }
114
+
115
+ data = {
116
+ "model": model,
117
+ "input": text,
118
+ "voice": voice,
119
+ "response_format": response_format
120
+ }
121
+
122
+ response = requests.post(
123
+ "https://api.groq.com/openai/v1/audio/speech",
124
+ headers=headers,
125
+ json=data
126
+ )
127
+
128
+ if response.status_code == 200:
129
+ # Write audio content to file
130
+ with open(output_file, 'wb') as f:
131
+ f.write(response.content)
132
+
133
+ logger.info(f"TTS audio saved to: {output_file}")
134
+ return str(output_file)
135
+ else:
136
+ logger.error(f"TTS API error: {response.status_code} - {response.text}")
137
+ return None
138
+
139
+ except Exception as e:
140
+ logger.error(f"Error generating TTS: {str(e)}")
141
+ return None
142
+
143
+ def cleanup_temp_files(self, max_age_hours: int = 24) -> None:
144
+ """
145
+ Clean up old temporary audio files.
146
+
147
+ Args:
148
+ max_age_hours: Delete files older than this many hours
149
+ """
150
+ try:
151
+ import time
152
+ current_time = time.time()
153
+ max_age_seconds = max_age_hours * 3600
154
+
155
+ for file_path in self.temp_dir.glob("*.wav"):
156
+ if current_time - file_path.stat().st_mtime > max_age_seconds:
157
+ file_path.unlink()
158
+ logger.debug(f"Deleted old TTS file: {file_path}")
159
+
160
+ except Exception as e:
161
+ logger.error(f"Error cleaning up temp files: {str(e)}")
162
+
163
+ @classmethod
164
+ def get_available_voices(cls, model: str = "playai-tts") -> list[str]:
165
+ """
166
+ Get list of available voices for a model.
167
+
168
+ Args:
169
+ model: Model name ("playai-tts" or "playai-tts-arabic")
170
+
171
+ Returns:
172
+ List of available voice names
173
+ """
174
+ if model == "playai-tts-arabic":
175
+ return cls.ARABIC_VOICES.copy()
176
+ else:
177
+ return cls.ENGLISH_VOICES.copy()
178
+
179
+
180
+ # Global TTS instance (lazy initialized)
181
+ _tts_instance: Optional[GroqTTS] = None
182
+
183
+
184
+ def get_tts_instance() -> Optional[GroqTTS]:
185
+ """
186
+ Get or create a global TTS instance.
187
+
188
+ Returns:
189
+ GroqTTS instance or None if API key not available
190
+ """
191
+ global _tts_instance
192
+
193
+ if _tts_instance is None:
194
+ try:
195
+ _tts_instance = GroqTTS()
196
+ except ValueError as e:
197
+ logger.warning(f"TTS not available: {str(e)}")
198
+ return None
199
+
200
+ return _tts_instance
201
+
202
+
203
+ def generate_speech_for_text(text: str, voice: Optional[str] = None) -> Optional[str]:
204
+ """
205
+ Convenience function to generate speech for text.
206
+
207
+ Args:
208
+ text: Text to convert to speech
209
+ voice: Voice to use (optional)
210
+
211
+ Returns:
212
+ Path to generated audio file or None if error
213
+ """
214
+ tts = get_tts_instance()
215
+ if tts is None:
216
+ return None
217
+
218
+ return tts.text_to_speech(text, voice=voice)
219
+
220
+
221
+ def generate_speech_for_session(text: str, voice: Optional[str] = None) -> Optional[str]:
222
+ """
223
+ Generate speech for text with session persistence for chat interface.
224
+
225
+ Args:
226
+ text: Text to convert to speech
227
+ voice: Voice to use (optional)
228
+
229
+ Returns:
230
+ Path to generated audio file that persists for the session, or None if error
231
+ """
232
+ tts = get_tts_instance()
233
+ if tts is None:
234
+ return None
235
+
236
+ # Generate audio in session directory for persistence
237
+ audio_file = tts.text_to_speech(text, voice=voice)
238
+ if audio_file:
239
+ # Ensure the file is in the session directory
240
+ audio_path = Path(audio_file)
241
+ if audio_path.exists():
242
+ logger.info(f"Session TTS audio available at: {audio_file}")
243
+ return str(audio_file)
244
+
245
+ return None
246
+
247
+
248
+ def clean_tts_markup(text: str) -> str:
249
+ """
250
+ Clean text for TTS by removing markdown and other markup.
251
+
252
+ Args:
253
+ text: Text that may contain markdown
254
+
255
+ Returns:
256
+ Clean text suitable for TTS
257
+ """
258
+ import re
259
+
260
+ # Remove markdown formatting
261
+ text = re.sub(r'\*\*(.*?)\*\*', r'\1', text) # Bold
262
+ text = re.sub(r'\*(.*?)\*', r'\1', text) # Italics
263
+ text = re.sub(r'`(.*?)`', r'\1', text) # Inline code
264
+ text = re.sub(r'```.*?```', '', text, flags=re.DOTALL) # Code blocks
265
+ text = re.sub(r'#{1,6}\s*(.*)', r'\1', text) # Headers
266
+ text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text) # Links
267
+ text = re.sub(r'!\[([^\]]*)\]\([^\)]+\)', '', text) # Images
268
+ text = re.sub(r'^[-*+]\s+', '', text, flags=re.MULTILINE) # Lists
269
+ text = re.sub(r'^\d+\.\s+', '', text, flags=re.MULTILINE) # Numbered lists
270
+ text = re.sub(r'^>\s+', '', text, flags=re.MULTILINE) # Quotes
271
+
272
+ # Clean up extra whitespace
273
+ text = re.sub(r'\n\s*\n', '\n\n', text) # Multiple newlines
274
+ text = re.sub(r'[ \t]+', ' ', text) # Multiple spaces
275
+ text = text.strip()
276
+
277
+ return text
apps/gradio-app/test_audio_debug.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Debug script to test audio transcription functionality
4
+ """
5
+ import os
6
+ import sys
7
+ import logging
8
+
9
+ # Add the src directory to the Python path
10
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
11
+
12
+ from fitness_gradio.ui.handlers import UIHandlers
13
+
14
+ # Set up logging
15
+ logging.basicConfig(level=logging.INFO)
16
+ logger = logging.getLogger(__name__)
17
+
18
+ def test_audio_message_processing():
19
+ """Test the add_message_with_audio function"""
20
+
21
+ # Mock message with audio file (you'd need to provide a real audio file path to test)
22
+ test_audio_file = "test_audio.wav" # Replace with actual audio file path
23
+
24
+ # Test case 1: Audio only message
25
+ mock_audio_message = {
26
+ "files": [test_audio_file],
27
+ "text": ""
28
+ }
29
+
30
+ # Test case 2: Text only message
31
+ mock_text_message = {
32
+ "files": [],
33
+ "text": "Hello, this is a test message"
34
+ }
35
+
36
+ # Test case 3: Combined audio and text
37
+ mock_combined_message = {
38
+ "files": [test_audio_file],
39
+ "text": "Additional text content"
40
+ }
41
+
42
+ initial_history = []
43
+
44
+ print("Testing audio message processing...")
45
+
46
+ # Test text message
47
+ print("\n=== Testing text-only message ===")
48
+ try:
49
+ history, input_state = UIHandlers.add_message_with_audio(initial_history.copy(), mock_text_message)
50
+ print(f"History after text message: {history}")
51
+ print(f"Input state: {input_state}")
52
+ except Exception as e:
53
+ print(f"Error with text message: {e}")
54
+
55
+ # Test audio message (only if audio file exists)
56
+ if os.path.exists(test_audio_file):
57
+ print("\n=== Testing audio-only message ===")
58
+ try:
59
+ history, input_state = UIHandlers.add_message_with_audio(initial_history.copy(), mock_audio_message)
60
+ print(f"History after audio message: {history}")
61
+ print(f"Input state: {input_state}")
62
+ except Exception as e:
63
+ print(f"Error with audio message: {e}")
64
+ else:
65
+ print(f"\n=== Skipping audio test (file {test_audio_file} not found) ===")
66
+
67
+ # Test is_audio_file function
68
+ print("\n=== Testing audio file detection ===")
69
+ test_files = [
70
+ "test.wav",
71
+ "test.mp3",
72
+ "test.m4a",
73
+ "test.txt",
74
+ "test.jpg",
75
+ "test.webm"
76
+ ]
77
+
78
+ for test_file in test_files:
79
+ is_audio = UIHandlers.is_audio_file(test_file)
80
+ print(f"{test_file}: {'Audio' if is_audio else 'Not audio'}")
81
+
82
+ if __name__ == "__main__":
83
+ test_audio_message_processing()
apps/gradio-app/test_tts.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Test TTS functionality
3
+ """
4
+ import os
5
+ from fitness_gradio.ui.tts_utils import GroqTTS, generate_speech_for_text, clean_tts_markup
6
+
7
+ def test_tts_setup():
8
+ """Test basic TTS setup and functionality."""
9
+ print("Testing TTS setup...")
10
+
11
+ # Check if API key is available
12
+ api_key = os.getenv("GROQ_API_KEY")
13
+ if not api_key:
14
+ print("❌ GROQ_API_KEY not found in environment variables")
15
+ print("Please set GROQ_API_KEY to test TTS functionality")
16
+ return False
17
+
18
+ print("βœ… GROQ_API_KEY found")
19
+
20
+ # Test TTS instance creation
21
+ try:
22
+ tts = GroqTTS()
23
+ print("βœ… GroqTTS instance created successfully")
24
+ except Exception as e:
25
+ print(f"❌ Failed to create GroqTTS instance: {e}")
26
+ return False
27
+
28
+ # Test text cleaning
29
+ test_text = "**Hello** this is a *test* with `code` and [links](http://example.com)"
30
+ cleaned = clean_tts_markup(test_text)
31
+ print(f"Text cleaning test:")
32
+ print(f" Original: {test_text}")
33
+ print(f" Cleaned: {cleaned}")
34
+
35
+ # Test voice lists
36
+ english_voices = GroqTTS.get_available_voices("playai-tts")
37
+ arabic_voices = GroqTTS.get_available_voices("playai-tts-arabic")
38
+ print(f"βœ… Available English voices: {len(english_voices)}")
39
+ print(f"βœ… Available Arabic voices: {len(arabic_voices)}")
40
+
41
+ print("πŸŽ‰ All TTS setup tests passed!")
42
+ print("Note: Actual TTS generation will be tested when the UI is used with a valid API key.")
43
+ return True
44
+
45
+ if __name__ == "__main__":
46
+ test_tts_setup()
shared/src/fitness_core/agents/providers.py CHANGED
@@ -65,7 +65,10 @@ class ModelProvider:
65
  # Whisper models (Speech-to-Text)
66
  "whisper-large-v3": "litellm/groq/whisper-large-v3", # Whisper Large v3
67
  "whisper-large-v3-turbo": "litellm/groq/whisper-large-v3-turbo", # Whisper Large v3 Turbo
68
- "o3-mini": "o3-mini", # Latest reasoning model
 
 
 
69
  }
70
 
71
  @classmethod
@@ -102,6 +105,8 @@ class ModelProvider:
102
  "kimi-k2-instruct": "Moonshot Kimi K2 MoE - 1T parameters with tool use (Moonshot via Groq)",
103
  "whisper-large-v3": "OpenAI Whisper Large v3 - best speech-to-text (OpenAI via Groq)",
104
  "whisper-large-v3-turbo": "OpenAI Whisper Large v3 Turbo - faster speech-to-text (OpenAI via Groq)",
 
 
105
  }
106
  return model_info.get(model_name, "Model information not available")
107
 
 
65
  # Whisper models (Speech-to-Text)
66
  "whisper-large-v3": "litellm/groq/whisper-large-v3", # Whisper Large v3
67
  "whisper-large-v3-turbo": "litellm/groq/whisper-large-v3-turbo", # Whisper Large v3 Turbo
68
+
69
+ # PlayAI TTS models (Text-to-Speech)
70
+ "playai-tts": "litellm/groq/playai-tts", # English TTS model
71
+ "playai-tts-arabic": "litellm/groq/playai-tts-arabic", # Arabic TTS model
72
  }
73
 
74
  @classmethod
 
105
  "kimi-k2-instruct": "Moonshot Kimi K2 MoE - 1T parameters with tool use (Moonshot via Groq)",
106
  "whisper-large-v3": "OpenAI Whisper Large v3 - best speech-to-text (OpenAI via Groq)",
107
  "whisper-large-v3-turbo": "OpenAI Whisper Large v3 Turbo - faster speech-to-text (OpenAI via Groq)",
108
+ "playai-tts": "PlayAI English TTS - high-quality text-to-speech with 19 voices (PlayAI via Groq)",
109
+ "playai-tts-arabic": "PlayAI Arabic TTS - high-quality Arabic text-to-speech with 4 voices (PlayAI via Groq)",
110
  }
111
  return model_info.get(model_name, "Model information not available")
112