Spaces:

beyoru
/

Func_calling

Sleeping

App Files Files Community

beyoru commited on Jan 25

Commit

80b54e9

verified ·

1 Parent(s): 31391ab

Update app.py

Browse files

Files changed (1) hide show

app.py +111 -112

app.py CHANGED Viewed

@@ -1,119 +1,118 @@
-import re
-import torch
-from threading import Thread
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
-MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
-CONTEXT_LENGTH = 4096
-# Add special tokens for thinking process
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-tokenizer.add_special_tokens({
-    "additional_special_tokens": ["<think>", "</think>"]
-})
-model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
-model.resize_token_embeddings(len(tokenizer))
-def predict(message, history, show_thinking, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
-    stop_tokens = ["<|endoftext|>", "<|im_end|>", "|im_end|", "</think>"]
-    instruction = f'<|im_start|>system\n{system_prompt}\n<|im_end|>\n'
-    # Format chat history
-    for user, assistant in history:
-        instruction += f'<|im_start|>user\n{user}\n<|im_end|>\n<|im_start|>assistant\n{assistant}\n<|im_end|>\n'
-    instruction += f'<|im_start|>user\n{message}\n<|im_end|>\n<|im_start|>assistant\n'
-    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
-    enc = tokenizer(instruction, return_tensors="pt", truncation=True, max_length=CONTEXT_LENGTH)
-    input_ids, attention_mask = enc.input_ids, enc.attention_mask
-    generate_kwargs = dict(
-        input_ids=input_ids,
-        attention_mask=attention_mask,
-        streamer=streamer,
-        do_sample=True,
-        temperature=temperature,
-        max_new_tokens=max_new_tokens,
-        top_k=top_k,
-        repetition_penalty=repetition_penalty,
-        top_p=top_p
     )
-    t = Thread(target=model.generate, kwargs=generate_kwargs)
-    t.start()
-    outputs = []
-    thinking_buffer = []
-    in_thinking = False
-    current_chunk = ""
-    for new_token in streamer:
-        current_chunk += new_token
-        # Check for thinking tags
-        if "<think>" in current_chunk and not in_thinking:
-            in_thinking = True
-            pre, _, post = current_chunk.partition("<think>")
-            if pre:
-                outputs.append(pre)
-                yield _clean_output("".join(outputs), show_thinking)
-            current_chunk = post
-        if "</think>" in current_chunk and in_thinking:
-            in_thinking = False
-            pre, _, post = current_chunk.partition("</think>")
-            thinking_buffer.append(pre)
-            if show_thinking:
-                outputs.extend(thinking_buffer)
-            thinking_buffer = []
-            current_chunk = post
-        if in_thinking:
-            thinking_buffer.append(current_chunk)
-            if show_thinking:
-                outputs.append(current_chunk)
-                yield _clean_output("".join(outputs), show_thinking)
-            current_chunk = ""
-        else:
-            if current_chunk:
-                outputs.append(current_chunk)
-                yield _clean_output("".join(outputs), show_thinking)
-                current_chunk = ""
-def _clean_output(text: str, show_thinking: bool) -> str:
-    # Remove residual tags and format thinking content
-    text = re.sub(r'\s*<think>\s*', '\n\n*Thinking:* ', text)
-    text = re.sub(r'\s*</think>\s*', ' ', text)
-    text = re.sub(r'(\*Thinking:\*)(?! )', r'\1 ', text)
-    return text.strip()
-# Create interface with toggle
-gr.ChatInterface(
-    predict,
-    additional_inputs=[
-        gr.Checkbox(value=True, label="🔍 Show Thinking Process"),
-        gr.Textbox(
-            "You are an AI assistant. First analyze requests using <think> tags, then provide answers. "
-            "Put all reasoning between <think> and </think> tags.",
-            label="System Prompt"
-        ),
-        gr.Slider(0, 1, 0.6, label="🌡️ Temperature"),
-        gr.Slider(0, 4096, 512, label="📏 Max New Tokens"),
-        gr.Slider(1, 80, 40, label="🎛️ Top K"),
-        gr.Slider(0.1, 2.0, 1.1, label="🔄 Repetition Penalty"),
-        gr.Slider(0, 1, 0.95, label="🧮 Top P"),
-    ],
-    css="""
-    .thinking {
-        color: #666;
-        font-style: italic;
-        border-left: 3px solid #ddd;
-        padding-left: 1em;
-        margin: 0.5em 0;
-    }
-    """,
-    title="DeepSeek AI Assistant with Reasoning",
-    description="Toggle the 'Show Thinking Process' checkbox to view/hide the model's internal reasoning"
-).queue().launch()

 import gradio as gr
+from huggingface_hub import InferenceClient
+import string
+import numpy as np
+from transformers import AutoTokenizer
+import onnxruntime as ort
+import os
+# Initialize client and models
+client = InferenceClient(api_key=os.environ.get('HF_TOKEN'))
+# Constants for EOU calculation
+PUNCS = string.punctuation.replace("'", "")
+MAX_HISTORY = 4
+MAX_HISTORY_TOKENS = 512
+EOU_THRESHOLD = 0.5
+# Initialize tokenizer and ONNX session
+HG_MODEL = "livekit/turn-detector"
+ONNX_FILENAME = "model_quantized.onnx"
+tokenizer = AutoTokenizer.from_pretrained(HG_MODEL)
+onnx_session = ort.InferenceSession(ONNX_FILENAME, providers=["CPUExecutionProvider"])
+# Helper functions for EOU
+def softmax(logits):
+    exp_logits = np.exp(logits - np.max(logits))
+    return exp_logits / np.sum(exp_logits)
+def normalize_text(text):
+    def strip_puncs(text):
+        return text.translate(str.maketrans("", "", PUNCS))
+    return " ".join(strip_puncs(text).lower().split())
+def format_chat_ctx(chat_ctx):
+    new_chat_ctx = []
+    for msg in chat_ctx:
+        if msg["role"] in ("user", "assistant"):
+            content = normalize_text(msg["content"])
+            if content:
+                msg["content"] = content
+                new_chat_ctx.append(msg)
+    convo_text = tokenizer.apply_chat_template(
+        new_chat_ctx, add_generation_prompt=False, add_special_tokens=False, tokenize=False
+    )
+    ix = convo_text.rfind("<|im_end|>")
+    return convo_text[:ix]
+def calculate_eou(chat_ctx, session):
+    formatted_text = format_chat_ctx(chat_ctx[-MAX_HISTORY:])
+    inputs = tokenizer(
+        formatted_text,
+        return_tensors="np",
+        truncation=True,
+        max_length=MAX_HISTORY_TOKENS,
     )
+    input_ids = np.array(inputs["input_ids"], dtype=np.int64)
+    outputs = session.run(["logits"], {"input_ids": input_ids})
+    logits = outputs[0][0, -1, :]
+    probs = softmax(logits)
+    eou_token_id = tokenizer.encode("<|im_end|>")[-1]
+    return probs[eou_token_id]
+# Chatbot logic with EOU
+messages = []
+def chatbot(user_input):
+    global messages
+    # Exit condition
+    if user_input.lower() == "exit":
+        messages = []  # Reset conversation history
+        return "Chat ended. Refresh the page to start again."
+    # Add user message to conversation history
+    messages.append({"role": "user", "content": user_input})
+    # Calculate EOU to determine if user has finished typing
+    eou_prob = calculate_eou(messages, onnx_session)
+    if eou_prob < EOU_THRESHOLD:
+        yield "[I'm waiting for you to complete the sentence...]"
+        return
+    # Stream the chatbot's response
+    stream = client.chat.completions.create(
+        model="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+        messages=messages,
+        temperature=0.5,
+        max_tokens=2048,
+        top_p=0.7,
+        stream=True
+    )
+    bot_response = ""
+    for chunk in stream:
+        bot_response += chunk.choices[0].delta.content
+        yield bot_response
+    # Add final bot response to conversation history
+    messages.append({"role": "assistant", "content": bot_response})
+# Create Gradio interface
+with gr.Blocks(theme='darkdefault') as demo:
+    gr.Markdown("""# Chat with DeepSeek-R1
+    Type your message below to interact with the chatbot. Type "exit" to end the conversation.
+    """)
+    with gr.Row():
+        with gr.Column():
+            user_input = gr.Textbox(label="Your Message", placeholder="Type your message here...")
+            submit_button = gr.Button("Send")
+        with gr.Column():
+            chat_output = gr.Textbox(label="Chatbot Response", interactive=False)
+    # Define interactions
+    submit_button.click(chatbot, inputs=[user_input], outputs=[chat_output])
+# Launch the app
+demo.launch()