File size: 4,521 Bytes
afec331
80b54e9
 
 
 
 
 
afec331
80b54e9
 
afec331
80b54e9
 
 
8d35409
80b54e9
31391ab
80b54e9
 
 
 
 
31391ab
80b54e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
afec331
80b54e9
 
 
 
 
 
 
c95ef21
 
 
 
 
 
 
 
 
 
 
80b54e9
 
 
9015f33
80b54e9
 
 
 
 
1a8afea
80b54e9
 
 
 
 
 
 
 
 
 
 
 
 
db912f7
8d35409
db912f7
80b54e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import gradio as gr
from huggingface_hub import InferenceClient
import string
import numpy as np
from transformers import AutoTokenizer
import onnxruntime as ort
import os

# Initialize client and models
client = InferenceClient(api_key=os.environ.get('HF_TOKEN'))

# Constants for EOU calculation
PUNCS = string.punctuation.replace("'", "")
MAX_HISTORY = 4
MAX_HISTORY_TOKENS = 1024
EOU_THRESHOLD = 0.5

# Initialize tokenizer and ONNX session
HG_MODEL = "livekit/turn-detector"
ONNX_FILENAME = "model_quantized.onnx"
tokenizer = AutoTokenizer.from_pretrained(HG_MODEL)
onnx_session = ort.InferenceSession(ONNX_FILENAME, providers=["CPUExecutionProvider"])

# Helper functions for EOU
def softmax(logits):
    exp_logits = np.exp(logits - np.max(logits))
    return exp_logits / np.sum(exp_logits)

def normalize_text(text):
    def strip_puncs(text):
        return text.translate(str.maketrans("", "", PUNCS))
    return " ".join(strip_puncs(text).lower().split())

def format_chat_ctx(chat_ctx):
    new_chat_ctx = []
    for msg in chat_ctx:
        if msg["role"] in ("user", "assistant"):
            content = normalize_text(msg["content"])
            if content:
                msg["content"] = content
                new_chat_ctx.append(msg)
    convo_text = tokenizer.apply_chat_template(
        new_chat_ctx, add_generation_prompt=False, add_special_tokens=False, tokenize=False
    )
    ix = convo_text.rfind("<|im_end|>")
    return convo_text[:ix]

def calculate_eou(chat_ctx, session):
    formatted_text = format_chat_ctx(chat_ctx[-MAX_HISTORY:])
    inputs = tokenizer(
        formatted_text,
        return_tensors="np",
        truncation=True,
        max_length=MAX_HISTORY_TOKENS,
    )
    input_ids = np.array(inputs["input_ids"], dtype=np.int64)
    outputs = session.run(["logits"], {"input_ids": input_ids})
    logits = outputs[0][0, -1, :]
    probs = softmax(logits)
    eou_token_id = tokenizer.encode("<|im_end|>")[-1]
    return probs[eou_token_id]



messages = [
    {"role": "system", "content": os.environ.get("CHARACTER_DESC", "You are a helpful assistant.")},
    { "role": "system", "content": "You are Aiko, a tsundere girl who’s tough on the outside but secretly cares." },
    { "role": "user", "content": "Hey, Aiko, you seem really good at math. Can you help me with this problem?" },
    { "role": "assistant", "content": "Tch, you're so hopeless. How do you even survive on your own? Fine, I’ll help you, but don’t expect me to babysit you all the time!" },
    { "role": "user", "content": "Thanks, Aiko. You’re actually really nice when you want to be." },
    { "role": "assistant", "content": "W-What are you talking about?! I’m not nice! I just didn’t want you bothering me again with your stupid questions!" }
]


def chatbot(user_input):
    global messages
    
    # Exit condition
    if user_input.lower() == "exit":
        messages = []  # Reset conversation history
        return "Chat ended. Refresh the page to start again."

    
    # Add user message to conversation history
    messages.append({"role": "user", "content": user_input})

    # Calculate EOU to determine if user has finished typing
    eou_prob = calculate_eou(messages, onnx_session)
    if eou_prob < EOU_THRESHOLD:
        yield "[I'm waiting for you to complete the sentence...]"
        return

    # Stream the chatbot's response
    stream = client.chat.completions.create(
        model="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
        messages=messages,
        temperature=0.6,
        max_tokens=1024,
        top_p=0.95,
        stream=True
    )

    bot_response = ""
    for chunk in stream:
        bot_response += chunk.choices[0].delta.content
        yield bot_response

    # Add final bot response to conversation history
    messages.append({"role": "assistant", "content": bot_response})

# Create Gradio interface
with gr.Blocks(theme='darkdefault') as demo:
    gr.Markdown("""# Chat with DeepSeek-R1
    Type your message below to interact with the chatbot. Type "exit" to end the conversation.
    """)

    with gr.Row():
        with gr.Column():
            user_input = gr.Textbox(label="Your Message", placeholder="Type your message here...")
            submit_button = gr.Button("Send")
        with gr.Column():
            chat_output = gr.Textbox(label="Chatbot Response", interactive=False)

    # Define interactions
    submit_button.click(chatbot, inputs=[user_input], outputs=[chat_output])

# Launch the app
demo.launch()