prithivMLmods commited on
Commit
eaab4db
Β·
verified Β·
1 Parent(s): 90baac7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +124 -52
app.py CHANGED
@@ -17,7 +17,7 @@ import numpy as np
17
  from PIL import Image
18
  import edge_tts
19
  import trimesh
20
- import soundfile as sf # New import for audio file reading
21
 
22
  import supervision as sv
23
  from ultralytics import YOLO as YOLODetector
@@ -36,7 +36,13 @@ from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
36
  from diffusers import ShapEImg2ImgPipeline, ShapEPipeline
37
  from diffusers.utils import export_to_ply
38
 
 
 
 
 
 
39
  os.system('pip install backoff')
 
40
  # Global constants and helper functions
41
 
42
  MAX_SEED = np.iinfo(np.int32).max
@@ -56,8 +62,67 @@ def glb_to_data_url(glb_path: str) -> str:
56
  b64_data = base64.b64encode(data).decode("utf-8")
57
  return f"data:model/gltf-binary;base64,{b64_data}"
58
 
59
- # Model class for Text-to-3D Generation (ShapE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  class Model:
62
  def __init__(self):
63
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -115,8 +180,9 @@ class Model:
115
  export_to_ply(images[0], ply_path.name)
116
  return self.to_glb(ply_path.name)
117
 
 
118
  # New Tools for Web Functionality using DuckDuckGo and smolagents
119
-
120
  from typing import Any, Optional
121
  from smolagents.tools import Tool
122
  import duckduckgo_search
@@ -168,27 +234,21 @@ class VisitWebpageTool(Tool):
168
  "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
169
  ) from e
170
  try:
171
- # Send a GET request to the URL with a 20-second timeout
172
  response = requests.get(url, timeout=20)
173
- response.raise_for_status() # Raise an exception for bad status codes
174
-
175
- # Convert the HTML content to Markdown
176
  markdown_content = markdownify(response.text).strip()
177
-
178
- # Remove multiple line breaks
179
  markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
180
-
181
  return truncate_content(markdown_content, 10000)
182
-
183
  except requests.exceptions.Timeout:
184
  return "The request timed out. Please try again later or check the URL."
185
  except RequestException as e:
186
  return f"Error fetching the webpage: {str(e)}"
187
  except Exception as e:
188
  return f"An unexpected error occurred: {str(e)}"
189
-
190
- # rAgent Reasoning using Llama mode OpenAI
191
 
 
 
 
192
  from openai import OpenAI
193
 
194
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
@@ -213,7 +273,6 @@ def ragent_reasoning(prompt: str, history: list[dict], max_tokens: int = 2048, t
213
  Uses the Llama mode OpenAI model to perform a structured reasoning chain.
214
  """
215
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
216
- # Incorporate conversation history (if any)
217
  for msg in history:
218
  if msg.get("role") == "user":
219
  messages.append({"role": "user", "content": msg["content"]})
@@ -237,12 +296,10 @@ def ragent_reasoning(prompt: str, history: list[dict], max_tokens: int = 2048, t
237
  # ------------------------------------------------------------------------------
238
  # New Phi-4 Multimodal Feature (Image & Audio)
239
  # ------------------------------------------------------------------------------
240
- # Define prompt structure for Phi-4
241
  phi4_user_prompt = '<|user|>'
242
  phi4_assistant_prompt = '<|assistant|>'
243
  phi4_prompt_suffix = '<|end|>'
244
 
245
- # Load Phi-4 multimodal model and processor using unique variable names
246
  phi4_model_path = "microsoft/Phi-4-multimodal-instruct"
247
  phi4_processor = AutoProcessor.from_pretrained(phi4_model_path, trust_remote_code=True)
248
  phi4_model = AutoModelForCausalLM.from_pretrained(
@@ -276,9 +333,9 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
276
 
277
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
278
 
 
279
  # Load Models and Pipelines for Chat, Image, and Multimodal Processing
280
- # Load the text-only model and tokenizer (for pure text chat)
281
-
282
  model_id = "prithivMLmods/FastThink-0.5B-Tiny"
283
  tokenizer = AutoTokenizer.from_pretrained(model_id)
284
  model = AutoModelForCausalLM.from_pretrained(
@@ -288,13 +345,11 @@ model = AutoModelForCausalLM.from_pretrained(
288
  )
289
  model.eval()
290
 
291
- # Voices for text-to-speech
292
  TTS_VOICES = [
293
- "en-US-JennyNeural", # @tts1
294
- "en-US-GuyNeural", # @tts2
295
  ]
296
 
297
- # Load multimodal processor and model (e.g. for OCR and image processing)
298
  MODEL_ID = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
299
  processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
300
  model_m = Qwen2VLForConditionalGeneration.from_pretrained(
@@ -303,20 +358,15 @@ model_m = Qwen2VLForConditionalGeneration.from_pretrained(
303
  torch_dtype=torch.float16
304
  ).to("cuda").eval()
305
 
306
- # Asynchronous text-to-speech
307
-
308
  async def text_to_speech(text: str, voice: str, output_file="output.mp3"):
309
  """Convert text to speech using Edge TTS and save as MP3"""
310
  communicate = edge_tts.Communicate(text, voice)
311
  await communicate.save(output_file)
312
  return output_file
313
 
314
- # Utility function to clean conversation history
315
-
316
  def clean_chat_history(chat_history):
317
  """
318
  Filter out any chat entries whose "content" is not a string.
319
- This helps prevent errors when concatenating previous messages.
320
  """
321
  cleaned = []
322
  for msg in chat_history:
@@ -324,14 +374,14 @@ def clean_chat_history(chat_history):
324
  cleaned.append(msg)
325
  return cleaned
326
 
 
327
  # Stable Diffusion XL Pipeline for Image Generation
328
- # Model In Use : SG161222/RealVisXL_V5.0_Lightning
329
-
330
- MODEL_ID_SD = os.getenv("MODEL_VAL_PATH") # SDXL Model repository path via env variable
331
  MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "4096"))
332
  USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
333
  ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1"
334
- BATCH_SIZE = int(os.getenv("BATCH_SIZE", "1")) # For batched image generation
335
 
336
  sd_pipe = StableDiffusionXLPipeline.from_pretrained(
337
  MODEL_ID_SD,
@@ -389,7 +439,6 @@ def generate_image_fn(
389
  options["use_resolution_binning"] = True
390
 
391
  images = []
392
- # Process in batches
393
  for i in range(0, num_images, BATCH_SIZE):
394
  batch_options = options.copy()
395
  batch_options["prompt"] = options["prompt"][i:i+BATCH_SIZE]
@@ -404,8 +453,9 @@ def generate_image_fn(
404
  image_paths = [save_image(img) for img in images]
405
  return image_paths, seed
406
 
 
407
  # Text-to-3D Generation using the ShapE Pipeline
408
-
409
  @spaces.GPU(duration=120, enable_queue=True)
410
  def generate_3d_fn(
411
  prompt: str,
@@ -423,7 +473,9 @@ def generate_3d_fn(
423
  glb_path = model3d.run_text(prompt, seed=seed, guidance_scale=guidance_scale, num_steps=num_steps)
424
  return glb_path, seed
425
 
 
426
  # YOLO Object Detection Setup
 
427
  YOLO_MODEL_REPO = "strangerzonehf/Flux-Ultimate-LoRA-Collection"
428
  YOLO_CHECKPOINT_NAME = "images/demo.pt"
429
  yolo_model_path = hf_hub_download(repo_id=YOLO_MODEL_REPO, filename=YOLO_CHECKPOINT_NAME)
@@ -443,8 +495,9 @@ def detect_objects(image: np.ndarray):
443
 
444
  return Image.fromarray(annotated_image)
445
 
446
- # Chat Generation Function with support for @tts, @image, @3d, @web, @rAgent, @yolo, and now @phi4 commands
447
-
 
448
  @spaces.GPU
449
  def generate(
450
  input_dict: dict,
@@ -463,7 +516,8 @@ def generate(
463
  - "@web": triggers a web search or webpage visit.
464
  - "@rAgent": initiates a reasoning chain using Llama mode.
465
  - "@yolo": triggers object detection using YOLO.
466
- - **"@phi4": triggers multimodal (image/audio) processing using the Phi-4 model.**
 
467
  """
468
  text = input_dict["text"]
469
  files = input_dict.get("files", [])
@@ -479,7 +533,6 @@ def generate(
479
  num_steps=64,
480
  randomize_seed=True,
481
  )
482
- # Copy the GLB file to a static folder.
483
  static_folder = os.path.join(os.getcwd(), "static")
484
  if not os.path.exists(static_folder):
485
  os.makedirs(static_folder)
@@ -513,7 +566,6 @@ def generate(
513
  # --- Web Search/Visit branch ---
514
  if text.strip().lower().startswith("@web"):
515
  web_command = text[len("@web"):].strip()
516
- # If the command starts with "visit", then treat the rest as a URL
517
  if web_command.lower().startswith("visit"):
518
  url = web_command[len("visit"):].strip()
519
  yield "🌍 Visiting webpage..."
@@ -521,7 +573,6 @@ def generate(
521
  content = visitor.forward(url)
522
  yield content
523
  else:
524
- # Otherwise, treat the rest as a search query.
525
  query = web_command
526
  yield "🧀 Performing a web search ..."
527
  searcher = DuckDuckGoSearchTool()
@@ -533,18 +584,24 @@ def generate(
533
  if text.strip().lower().startswith("@ragent"):
534
  prompt = text[len("@ragent"):].strip()
535
  yield "πŸ“ Initiating reasoning chain using Llama mode..."
536
- # Pass the current chat history (cleaned) to help inform the chain.
537
  for partial in ragent_reasoning(prompt, clean_chat_history(chat_history)):
538
  yield partial
539
  return
540
 
 
 
 
 
 
 
 
 
541
  # --- YOLO Object Detection branch ---
542
  if text.strip().lower().startswith("@yolo"):
543
  yield "πŸ” Running object detection with YOLO..."
544
  if not files or len(files) == 0:
545
  yield "Error: Please attach an image for YOLO object detection."
546
  return
547
- # Use the first attached image
548
  input_file = files[0]
549
  try:
550
  if isinstance(input_file, str):
@@ -568,15 +625,12 @@ def generate(
568
  if not question:
569
  yield "Error: Please provide a question after @phi4."
570
  return
571
- # Determine input type (Image or Audio) from the first file
572
  input_file = files[0]
573
  try:
574
- # If file is already a PIL Image, treat as image
575
  if isinstance(input_file, Image.Image):
576
  input_type = "Image"
577
  file_for_phi4 = input_file
578
  else:
579
- # Try opening as image; if it fails, assume audio
580
  try:
581
  file_for_phi4 = Image.open(input_file)
582
  input_type = "Image"
@@ -598,10 +652,8 @@ def generate(
598
  yield "Invalid file type for @phi4 multimodal processing."
599
  return
600
 
601
- # Initialize the streamer
602
  streamer = TextIteratorStreamer(phi4_processor, skip_prompt=True, skip_special_tokens=True)
603
 
604
- # Prepare generation kwargs
605
  generation_kwargs = {
606
  **inputs,
607
  "streamer": streamer,
@@ -609,16 +661,14 @@ def generate(
609
  "num_logits_to_keep": 0,
610
  }
611
 
612
- # Start generation in a separate thread
613
  thread = Thread(target=phi4_model.generate, kwargs=generation_kwargs)
614
  thread.start()
615
 
616
- # Stream the response
617
  buffer = ""
618
  yield "πŸ€” Processing with Phi-4..."
619
  for new_text in streamer:
620
  buffer += new_text
621
- time.sleep(0.01) # Small delay to simulate real-time streaming
622
  yield buffer
623
  return
624
 
@@ -698,8 +748,9 @@ def generate(
698
  output_file = asyncio.run(text_to_speech(final_response, voice))
699
  yield gr.Audio(output_file, autoplay=True)
700
 
 
701
  # Gradio Chat Interface Setup and Launch
702
-
703
  demo = gr.ChatInterface(
704
  fn=generate,
705
  additional_inputs=[
@@ -731,18 +782,39 @@ demo = gr.ChatInterface(
731
  label="Query Input",
732
  file_types=["image", "audio"],
733
  file_count="multiple",
734
- placeholder="β€Ž @tts1, @tts2, @image, @3d, @phi4 [image, audio], @rAgent, @web, @yolo, default [plain text]"
735
  ),
736
  stop_btn="Stop Generation",
737
  multimodal=True,
738
  )
739
 
740
- # Ensure the static folder exists
741
  if not os.path.exists("static"):
742
  os.makedirs("static")
743
 
744
  from fastapi.staticfiles import StaticFiles
745
  demo.app.mount("/static", StaticFiles(directory="static"), name="static")
746
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
747
  if __name__ == "__main__":
748
  demo.queue(max_size=20).launch(share=True)
 
17
  from PIL import Image
18
  import edge_tts
19
  import trimesh
20
+ import soundfile as sf # For audio file reading
21
 
22
  import supervision as sv
23
  from ultralytics import YOLO as YOLODetector
 
36
  from diffusers import ShapEImg2ImgPipeline, ShapEPipeline
37
  from diffusers.utils import export_to_ply
38
 
39
+ # Additional imports for the new DeepseekR1 feature and FastAPI endpoints
40
+ import openai
41
+ from fastapi import FastAPI, HTTPException
42
+ from fastapi.middleware.cors import CORSMiddleware
43
+
44
  os.system('pip install backoff')
45
+
46
  # Global constants and helper functions
47
 
48
  MAX_SEED = np.iinfo(np.int32).max
 
62
  b64_data = base64.b64encode(data).decode("utf-8")
63
  return f"data:model/gltf-binary;base64,{b64_data}"
64
 
65
+ # ---------------------------
66
+ # Sambanova DeepseekR1 Clients and Chat Function
67
+ # ---------------------------
68
+ sambanova_client = openai.OpenAI(
69
+ api_key=os.environ.get("SAMBANOVA_API_KEY"),
70
+ base_url="https://api.sambanova.ai/v1",
71
+ )
72
+ sambanova_client2 = openai.OpenAI(
73
+ api_key=os.environ.get("SAMBANOVA_API_KEY_2"),
74
+ base_url="https://api.sambanova.ai/v1",
75
+ )
76
+ sambanova_client3 = openai.OpenAI(
77
+ api_key=os.environ.get("SAMBANOVA_API_KEY_3"),
78
+ base_url="https://api.sambanova.ai/v1",
79
+ )
80
 
81
+ def chat_response(prompt: str) -> str:
82
+ """
83
+ Generate a chat response using the primary Sambanova API.
84
+ If it fails, fallback to the second, and then the third API.
85
+ """
86
+ messages = [
87
+ {"role": "system", "content": "You are a helpful assistant."},
88
+ {"role": "user", "content": prompt},
89
+ ]
90
+ errors = {}
91
+ try:
92
+ response = sambanova_client.chat.completions.create(
93
+ model="DeepSeek-R1-Distill-Llama-70B",
94
+ messages=messages,
95
+ temperature=0.1,
96
+ top_p=0.1
97
+ )
98
+ return response.choices[0].message.content
99
+ except Exception as e:
100
+ errors['client1'] = str(e)
101
+ try:
102
+ response2 = sambanova_client2.chat.completions.create(
103
+ model="DeepSeek-R1-Distill-Llama-70B",
104
+ messages=messages,
105
+ temperature=0.1,
106
+ top_p=0.1
107
+ )
108
+ return response2.choices[0].message.content
109
+ except Exception as e2:
110
+ errors['client2'] = str(e2)
111
+ try:
112
+ response3 = sambanova_client3.chat.completions.create(
113
+ model="DeepSeek-R1-Distill-Llama-70B",
114
+ messages=messages,
115
+ temperature=0.1,
116
+ top_p=0.1
117
+ )
118
+ return response3.choices[0].message.content
119
+ except Exception as e3:
120
+ errors['client3'] = str(e3)
121
+ return f"Primary error: {errors['client1']}; Second error: {errors['client2']}; Third error: {errors['client3']}"
122
+
123
+ # ---------------------------
124
+ # Model class for Text-to-3D Generation (ShapE)
125
+ # ---------------------------
126
  class Model:
127
  def __init__(self):
128
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
180
  export_to_ply(images[0], ply_path.name)
181
  return self.to_glb(ply_path.name)
182
 
183
+ # ---------------------------
184
  # New Tools for Web Functionality using DuckDuckGo and smolagents
185
+ # ---------------------------
186
  from typing import Any, Optional
187
  from smolagents.tools import Tool
188
  import duckduckgo_search
 
234
  "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
235
  ) from e
236
  try:
 
237
  response = requests.get(url, timeout=20)
238
+ response.raise_for_status()
 
 
239
  markdown_content = markdownify(response.text).strip()
 
 
240
  markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
 
241
  return truncate_content(markdown_content, 10000)
 
242
  except requests.exceptions.Timeout:
243
  return "The request timed out. Please try again later or check the URL."
244
  except RequestException as e:
245
  return f"Error fetching the webpage: {str(e)}"
246
  except Exception as e:
247
  return f"An unexpected error occurred: {str(e)}"
 
 
248
 
249
+ # ---------------------------
250
+ # rAgent Reasoning using Llama mode OpenAI
251
+ # ---------------------------
252
  from openai import OpenAI
253
 
254
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
 
273
  Uses the Llama mode OpenAI model to perform a structured reasoning chain.
274
  """
275
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
 
276
  for msg in history:
277
  if msg.get("role") == "user":
278
  messages.append({"role": "user", "content": msg["content"]})
 
296
  # ------------------------------------------------------------------------------
297
  # New Phi-4 Multimodal Feature (Image & Audio)
298
  # ------------------------------------------------------------------------------
 
299
  phi4_user_prompt = '<|user|>'
300
  phi4_assistant_prompt = '<|assistant|>'
301
  phi4_prompt_suffix = '<|end|>'
302
 
 
303
  phi4_model_path = "microsoft/Phi-4-multimodal-instruct"
304
  phi4_processor = AutoProcessor.from_pretrained(phi4_model_path, trust_remote_code=True)
305
  phi4_model = AutoModelForCausalLM.from_pretrained(
 
333
 
334
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
335
 
336
+ # ---------------------------
337
  # Load Models and Pipelines for Chat, Image, and Multimodal Processing
338
+ # ---------------------------
 
339
  model_id = "prithivMLmods/FastThink-0.5B-Tiny"
340
  tokenizer = AutoTokenizer.from_pretrained(model_id)
341
  model = AutoModelForCausalLM.from_pretrained(
 
345
  )
346
  model.eval()
347
 
 
348
  TTS_VOICES = [
349
+ "en-US-JennyNeural",
350
+ "en-US-GuyNeural",
351
  ]
352
 
 
353
  MODEL_ID = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
354
  processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
355
  model_m = Qwen2VLForConditionalGeneration.from_pretrained(
 
358
  torch_dtype=torch.float16
359
  ).to("cuda").eval()
360
 
 
 
361
  async def text_to_speech(text: str, voice: str, output_file="output.mp3"):
362
  """Convert text to speech using Edge TTS and save as MP3"""
363
  communicate = edge_tts.Communicate(text, voice)
364
  await communicate.save(output_file)
365
  return output_file
366
 
 
 
367
  def clean_chat_history(chat_history):
368
  """
369
  Filter out any chat entries whose "content" is not a string.
 
370
  """
371
  cleaned = []
372
  for msg in chat_history:
 
374
  cleaned.append(msg)
375
  return cleaned
376
 
377
+ # ---------------------------
378
  # Stable Diffusion XL Pipeline for Image Generation
379
+ # ---------------------------
380
+ MODEL_ID_SD = os.getenv("MODEL_VAL_PATH")
 
381
  MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "4096"))
382
  USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
383
  ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1"
384
+ BATCH_SIZE = int(os.getenv("BATCH_SIZE", "1"))
385
 
386
  sd_pipe = StableDiffusionXLPipeline.from_pretrained(
387
  MODEL_ID_SD,
 
439
  options["use_resolution_binning"] = True
440
 
441
  images = []
 
442
  for i in range(0, num_images, BATCH_SIZE):
443
  batch_options = options.copy()
444
  batch_options["prompt"] = options["prompt"][i:i+BATCH_SIZE]
 
453
  image_paths = [save_image(img) for img in images]
454
  return image_paths, seed
455
 
456
+ # ---------------------------
457
  # Text-to-3D Generation using the ShapE Pipeline
458
+ # ---------------------------
459
  @spaces.GPU(duration=120, enable_queue=True)
460
  def generate_3d_fn(
461
  prompt: str,
 
473
  glb_path = model3d.run_text(prompt, seed=seed, guidance_scale=guidance_scale, num_steps=num_steps)
474
  return glb_path, seed
475
 
476
+ # ---------------------------
477
  # YOLO Object Detection Setup
478
+ # ---------------------------
479
  YOLO_MODEL_REPO = "strangerzonehf/Flux-Ultimate-LoRA-Collection"
480
  YOLO_CHECKPOINT_NAME = "images/demo.pt"
481
  yolo_model_path = hf_hub_download(repo_id=YOLO_MODEL_REPO, filename=YOLO_CHECKPOINT_NAME)
 
495
 
496
  return Image.fromarray(annotated_image)
497
 
498
+ # ---------------------------
499
+ # Chat Generation Function with Special Commands
500
+ # ---------------------------
501
  @spaces.GPU
502
  def generate(
503
  input_dict: dict,
 
516
  - "@web": triggers a web search or webpage visit.
517
  - "@rAgent": initiates a reasoning chain using Llama mode.
518
  - "@yolo": triggers object detection using YOLO.
519
+ - "@phi4": triggers multimodal (image/audio) processing using the Phi-4 model.
520
+ - **"@deepseekr1": queries the Sambanova DeepSeek-R1 model with fallback APIs.**
521
  """
522
  text = input_dict["text"]
523
  files = input_dict.get("files", [])
 
533
  num_steps=64,
534
  randomize_seed=True,
535
  )
 
536
  static_folder = os.path.join(os.getcwd(), "static")
537
  if not os.path.exists(static_folder):
538
  os.makedirs(static_folder)
 
566
  # --- Web Search/Visit branch ---
567
  if text.strip().lower().startswith("@web"):
568
  web_command = text[len("@web"):].strip()
 
569
  if web_command.lower().startswith("visit"):
570
  url = web_command[len("visit"):].strip()
571
  yield "🌍 Visiting webpage..."
 
573
  content = visitor.forward(url)
574
  yield content
575
  else:
 
576
  query = web_command
577
  yield "🧀 Performing a web search ..."
578
  searcher = DuckDuckGoSearchTool()
 
584
  if text.strip().lower().startswith("@ragent"):
585
  prompt = text[len("@ragent"):].strip()
586
  yield "πŸ“ Initiating reasoning chain using Llama mode..."
 
587
  for partial in ragent_reasoning(prompt, clean_chat_history(chat_history)):
588
  yield partial
589
  return
590
 
591
+ # --- DeepSeek-R1 branch ---
592
+ if text.strip().lower().startswith("@deepseekr1"):
593
+ prompt = text[len("@deepseekr1"):].strip()
594
+ yield "πŸ” Querying DeepSeek-R1..."
595
+ response = chat_response(prompt)
596
+ yield response
597
+ return
598
+
599
  # --- YOLO Object Detection branch ---
600
  if text.strip().lower().startswith("@yolo"):
601
  yield "πŸ” Running object detection with YOLO..."
602
  if not files or len(files) == 0:
603
  yield "Error: Please attach an image for YOLO object detection."
604
  return
 
605
  input_file = files[0]
606
  try:
607
  if isinstance(input_file, str):
 
625
  if not question:
626
  yield "Error: Please provide a question after @phi4."
627
  return
 
628
  input_file = files[0]
629
  try:
 
630
  if isinstance(input_file, Image.Image):
631
  input_type = "Image"
632
  file_for_phi4 = input_file
633
  else:
 
634
  try:
635
  file_for_phi4 = Image.open(input_file)
636
  input_type = "Image"
 
652
  yield "Invalid file type for @phi4 multimodal processing."
653
  return
654
 
 
655
  streamer = TextIteratorStreamer(phi4_processor, skip_prompt=True, skip_special_tokens=True)
656
 
 
657
  generation_kwargs = {
658
  **inputs,
659
  "streamer": streamer,
 
661
  "num_logits_to_keep": 0,
662
  }
663
 
 
664
  thread = Thread(target=phi4_model.generate, kwargs=generation_kwargs)
665
  thread.start()
666
 
 
667
  buffer = ""
668
  yield "πŸ€” Processing with Phi-4..."
669
  for new_text in streamer:
670
  buffer += new_text
671
+ time.sleep(0.01)
672
  yield buffer
673
  return
674
 
 
748
  output_file = asyncio.run(text_to_speech(final_response, voice))
749
  yield gr.Audio(output_file, autoplay=True)
750
 
751
+ # ---------------------------
752
  # Gradio Chat Interface Setup and Launch
753
+ # ---------------------------
754
  demo = gr.ChatInterface(
755
  fn=generate,
756
  additional_inputs=[
 
782
  label="Query Input",
783
  file_types=["image", "audio"],
784
  file_count="multiple",
785
+ placeholder="β€Ž @tts1, @tts2, @image, @3d, @phi4 [image, audio], @rAgent, @web, @yolo, @deepseekr1, default [plain text]"
786
  ),
787
  stop_btn="Stop Generation",
788
  multimodal=True,
789
  )
790
 
 
791
  if not os.path.exists("static"):
792
  os.makedirs("static")
793
 
794
  from fastapi.staticfiles import StaticFiles
795
  demo.app.mount("/static", StaticFiles(directory="static"), name="static")
796
 
797
+ # ---------------------------
798
+ # Mount FastAPI Middleware and Endpoint for DeepSeek-R1
799
+ # ---------------------------
800
+ demo.app.add_middleware(
801
+ CORSMiddleware,
802
+ allow_origins=["*"],
803
+ allow_credentials=True,
804
+ allow_methods=["*"],
805
+ allow_headers=["*"],
806
+ )
807
+
808
+ @demo.app.post("/chat")
809
+ async def chat_endpoint(prompt: str):
810
+ """
811
+ FastAPI endpoint for the Sambanova DeepSeek-R1 chatbot.
812
+ """
813
+ result = chat_response(prompt)
814
+ return {"response": result}
815
+
816
+ # ---------------------------
817
+ # Main Execution
818
+ # ---------------------------
819
  if __name__ == "__main__":
820
  demo.queue(max_size=20).launch(share=True)