Spaces:

prithivMLmods
/

Imagineo-Chat

Running on Zero

App Files Files Community

prithivMLmods commited on 8 days ago

Commit

d144c92

verified ·

1 Parent(s): b55b5cd

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -14

app.py CHANGED Viewed

@@ -14,11 +14,6 @@ import numpy as np
 from PIL import Image
 import edge_tts
-import sambanova_gradio
-# Load the reasoning model from sambanova_gradio.
-# This returns a callable interface for inference.
-reasoning_model = gr.load("DeepSeek-R1-Distill-Llama-70B", src=sambanova_gradio.registry, accept_token=True)
 from transformers import (
     AutoModelForCausalLM,
     AutoTokenizer,
@@ -29,6 +24,9 @@ from transformers import (
 from transformers.image_utils import load_image
 from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
@@ -56,6 +54,38 @@ model_m = Qwen2VLForConditionalGeneration.from_pretrained(
     torch_dtype=torch.float16
 ).to("cuda").eval()
 async def text_to_speech(text: str, voice: str, output_file="output.mp3"):
     communicate = edge_tts.Communicate(text, voice)
     await communicate.save(output_file)
@@ -94,6 +124,9 @@ ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1"
 dtype = torch.float16 if device.type == "cuda" else torch.float32
 if torch.cuda.is_available():
     # Lightning 5 model
     pipe = StableDiffusionXLPipeline.from_pretrained(
@@ -179,6 +212,9 @@ def save_image(img: Image.Image) -> str:
     img.save(unique_name)
     return unique_name
 @spaces.GPU
 def generate(
     input_dict: dict,
@@ -193,6 +229,7 @@ def generate(
     files = input_dict.get("files", [])
     lower_text = text.lower().strip()
     # Check if the prompt is an image generation command using model flags.
     if (lower_text.startswith("@lightningv5") or
         lower_text.startswith("@lightningv4") or
@@ -245,17 +282,20 @@ def generate(
         yield gr.Image(image_path)
         return
-    # New reasoning branch.
-    elif lower_text.startswith("@reasoning"):
-        # Remove the reasoning flag and clean the prompt.
-        prompt_clean = re.sub(r"@reasoning", "", text, flags=re.IGNORECASE).strip().strip('"')
-        yield "Processing reasoning request..."
-        # Call the reasoning model (this call might be synchronous; adjust if needed).
-        reasoning_response = reasoning_model(prompt_clean)
-        yield reasoning_response
         return
     # Otherwise, handle text/chat (and TTS) generation.
     tts_prefix = "@tts"
     is_tts = any(text.strip().lower().startswith(f"{tts_prefix}{i}") for i in range(1, 3))
     voice_index = next((i for i in range(1, 3) if text.strip().lower().startswith(f"{tts_prefix}{i}")), None)
@@ -360,7 +400,7 @@ demo = gr.ChatInterface(
         ['@turbov3 "Abstract art, colorful and vibrant"'],
         ["Write a Python function to check if a number is prime."],
         ["@tts2 What causes rainbows to form?"],
-        ["@reasoning Explain the significance of Gödel's incompleteness theorems."],
     ],
     cache_examples=False,
     type="messages",

 from PIL import Image
 import edge_tts
 from transformers import (
     AutoModelForCausalLM,
     AutoTokenizer,
 from transformers.image_utils import load_image
 from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
+# -----------------------------
+# Existing global variables and model setup
+# -----------------------------
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
     torch_dtype=torch.float16
 ).to("cuda").eval()
+# -----------------------------
+# New reasoning feature setup
+# -----------------------------
+from openai import OpenAI
+api_key = os.getenv("SAMBANOVA_API_KEY")
+client_reasoning = OpenAI(
+    base_url="https://api.sambanova.ai/v1/",
+    api_key=api_key,
+)
+def reasoning_predict(message, history):
+    """
+    This function appends the user's reasoning request to the history,
+    then streams the response from the Sambanova API using the model
+    'DeepSeek-R1-Distill-Llama-70B'.
+    """
+    history.append({"role": "user", "content": message})
+    stream = client_reasoning.chat.completions.create(
+        messages=history,
+        model="DeepSeek-R1-Distill-Llama-70B",
+        stream=True,
+    )
+    chunks = []
+    for chunk in stream:
+        # Accumulate streamed content and yield the current full response
+        chunks.append(chunk.choices[0].delta.content or "")
+        yield "".join(chunks)
+# -----------------------------
+# Utility Functions and Checks
+# -----------------------------
 async def text_to_speech(text: str, voice: str, output_file="output.mp3"):
     communicate = edge_tts.Communicate(text, voice)
     await communicate.save(output_file)
 dtype = torch.float16 if device.type == "cuda" else torch.float32
+# -----------------------------
+# Image Generation Models Setup
+# -----------------------------
 if torch.cuda.is_available():
     # Lightning 5 model
     pipe = StableDiffusionXLPipeline.from_pretrained(
     img.save(unique_name)
     return unique_name
+# -----------------------------
+# Main Generation Function with Reasoning Integration
+# -----------------------------
 @spaces.GPU
 def generate(
     input_dict: dict,
     files = input_dict.get("files", [])
     lower_text = text.lower().strip()
     # Check if the prompt is an image generation command using model flags.
     if (lower_text.startswith("@lightningv5") or
         lower_text.startswith("@lightningv4") or
         yield gr.Image(image_path)
         return
+    # -----------------------------
+    # NEW: Reasoning Branch
+    # -----------------------------
+    if lower_text.startswith("@reasoning"):
+        reasoning_text = text.replace("@reasoning", "").strip()
+        reasoning_history = clean_chat_history(chat_history)
+        yield "Reasoning..."
+        for response in reasoning_predict(reasoning_text, reasoning_history):
+            yield response
         return
+    # -----------------------------
     # Otherwise, handle text/chat (and TTS) generation.
+    # -----------------------------
     tts_prefix = "@tts"
     is_tts = any(text.strip().lower().startswith(f"{tts_prefix}{i}") for i in range(1, 3))
     voice_index = next((i for i in range(1, 3) if text.strip().lower().startswith(f"{tts_prefix}{i}")), None)
         ['@turbov3 "Abstract art, colorful and vibrant"'],
         ["Write a Python function to check if a number is prime."],
         ["@tts2 What causes rainbows to form?"],
+        ["@reasoning How does quantum entanglement work and what are its implications?"],
     ],
     cache_examples=False,
     type="messages",