Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -116,23 +116,22 @@ def retrieve_context(query, max_results=6, max_chars=600):
|
|
116 |
return []
|
117 |
|
118 |
def format_conversation(history, system_prompt, tokenizer):
|
|
|
|
|
|
|
119 |
if hasattr(tokenizer, "chat_template") and tokenizer.chat_template:
|
120 |
-
|
121 |
-
messages = [{"role": "system", "content": system_prompt.strip()}] + history
|
122 |
-
else:
|
123 |
-
messages = [{"role": "system", "content": system_prompt.strip()}]
|
124 |
return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, enable_thinking=True)
|
125 |
else:
|
126 |
# Fallback for base LMs without chat template
|
127 |
prompt = system_prompt.strip() + "\n"
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
prompt += "Assistant: "
|
136 |
return prompt
|
137 |
|
138 |
def chat_response(user_msg, chat_history, system_prompt,
|
@@ -177,15 +176,7 @@ def chat_response(user_msg, chat_history, system_prompt,
|
|
177 |
enriched = system_prompt
|
178 |
|
179 |
pipe = load_pipeline(model_name)
|
180 |
-
|
181 |
-
# TODO:
|
182 |
-
debug += "\nLOAD MODEL:\n" + model_name
|
183 |
prompt = format_conversation(history, enriched, pipe["tokenizer"])
|
184 |
-
|
185 |
-
|
186 |
-
# TODO:
|
187 |
-
debug += "\nPROMPT:\n" + prompt
|
188 |
-
|
189 |
prompt_debug = f"\n\n--- Prompt Preview ---\n```\n{prompt}\n```"
|
190 |
streamer = TextIterStreamer(pipe["tokenizer"],
|
191 |
skip_prompt=True,
|
@@ -218,9 +209,6 @@ def chat_response(user_msg, chat_history, system_prompt,
|
|
218 |
break
|
219 |
text = chunk
|
220 |
|
221 |
-
# TODO:
|
222 |
-
debug += "\nRESPONSE:\n" + text
|
223 |
-
|
224 |
# Detect start of thinking
|
225 |
if not in_thought and '<think>' in text:
|
226 |
in_thought = True
|
|
|
116 |
return []
|
117 |
|
118 |
def format_conversation(history, system_prompt, tokenizer):
|
119 |
+
if history is None:
|
120 |
+
history = []
|
121 |
+
|
122 |
if hasattr(tokenizer, "chat_template") and tokenizer.chat_template:
|
123 |
+
messages = [{"role": "system", "content": system_prompt.strip()}] + history
|
|
|
|
|
|
|
124 |
return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, enable_thinking=True)
|
125 |
else:
|
126 |
# Fallback for base LMs without chat template
|
127 |
prompt = system_prompt.strip() + "\n"
|
128 |
+
for msg in history:
|
129 |
+
if msg['role'] == 'user':
|
130 |
+
prompt += "User: " + msg['content'].strip() + "\n"
|
131 |
+
elif msg['role'] == 'assistant':
|
132 |
+
prompt += "Assistant: " + msg['content'].strip() + "\n"
|
133 |
+
if not prompt.strip().endswith("Assistant:"):
|
134 |
+
prompt += "Assistant: "
|
|
|
135 |
return prompt
|
136 |
|
137 |
def chat_response(user_msg, chat_history, system_prompt,
|
|
|
176 |
enriched = system_prompt
|
177 |
|
178 |
pipe = load_pipeline(model_name)
|
|
|
|
|
|
|
179 |
prompt = format_conversation(history, enriched, pipe["tokenizer"])
|
|
|
|
|
|
|
|
|
|
|
180 |
prompt_debug = f"\n\n--- Prompt Preview ---\n```\n{prompt}\n```"
|
181 |
streamer = TextIterStreamer(pipe["tokenizer"],
|
182 |
skip_prompt=True,
|
|
|
209 |
break
|
210 |
text = chunk
|
211 |
|
|
|
|
|
|
|
212 |
# Detect start of thinking
|
213 |
if not in_thought and '<think>' in text:
|
214 |
in_thought = True
|