gr0010 commited on
Commit
0b925be
·
verified ·
1 Parent(s): 71748cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -360
app.py CHANGED
@@ -1,334 +1,4 @@
1
- import os
2
- import torch
3
- import gradio as gr
4
- import spaces
5
- from transformers import AutoModelForCausalLM, AutoTokenizer
6
-
7
- # -------------------------------------------------
8
- # Model setup (loaded once at startup)
9
- # -------------------------------------------------
10
- model_name = "gr0010/Art-0-8B-development"
11
-
12
- # Load model and tokenizer globally
13
- print("Loading model and tokenizer...")
14
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
15
-
16
- # Load model in CPU first, will move to GPU when needed
17
- model = AutoModelForCausalLM.from_pretrained(
18
- model_name,
19
- torch_dtype=torch.bfloat16,
20
- device_map="cuda", # Direct CUDA loading for ZeroGPU
21
- trust_remote_code=True,
22
- )
23
- print("Model loaded successfully!")
24
-
25
- # -------------------------------------------------
26
- # Core generation and parsing logic with Zero GPU
27
- # -------------------------------------------------
28
- @spaces.GPU(duration=120) # Request GPU for up to 120 seconds
29
- def generate_and_parse(messages: list, temperature: float = 0.6,
30
- top_p: float = 0.95, top_k: int = 20,
31
- min_p: float = 0.0, max_new_tokens: int = 32768):
32
- """
33
- Takes a clean list of messages, generates a response,
34
- and parses it into thinking and answer parts.
35
- Decorated with @spaces.GPU for Zero GPU allocation.
36
- """
37
- # Apply chat template with enable_thinking=True for Qwen3
38
- prompt_text = tokenizer.apply_chat_template(
39
- messages,
40
- tokenize=False,
41
- add_generation_prompt=True,
42
- enable_thinking=True # Explicitly enable thinking mode
43
- )
44
-
45
- # --- CONSOLE DEBUG OUTPUT ---
46
- print("\n" + "="*50)
47
- print("--- RAW PROMPT SENT TO MODEL ---")
48
- print(prompt_text[:500] + "..." if len(prompt_text) > 500 else prompt_text)
49
- print("="*50 + "\n")
50
-
51
- model_inputs = tokenizer([prompt_text], return_tensors="pt").to("cuda")
52
-
53
- with torch.no_grad():
54
- generated_ids = model.generate(
55
- **model_inputs,
56
- max_new_tokens=max_new_tokens,
57
- do_sample=True,
58
- temperature=temperature,
59
- top_p=top_p,
60
- top_k=top_k,
61
- min_p=min_p,
62
- pad_token_id=tokenizer.eos_token_id,
63
- )
64
-
65
- output_token_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
66
-
67
- thinking = ""
68
- answer = ""
69
- try:
70
- # Find the </think> token to separate thinking from answer
71
- end_think_token_id = 151668 # </think>
72
- if end_think_token_id in output_token_ids:
73
- end_think_idx = output_token_ids.index(end_think_token_id) + 1
74
- thinking_tokens = output_token_ids[:end_think_idx]
75
- answer_tokens = output_token_ids[end_think_idx:]
76
-
77
- thinking = tokenizer.decode(thinking_tokens, skip_special_tokens=True).strip()
78
- # Remove <think> and </think> tags from thinking
79
- thinking = thinking.replace("<think>", "").replace("</think>", "").strip()
80
-
81
- answer = tokenizer.decode(answer_tokens, skip_special_tokens=True).strip()
82
- else:
83
- # If no </think> token found, treat everything as answer
84
- answer = tokenizer.decode(output_token_ids, skip_special_tokens=True).strip()
85
- # Remove any stray <think> tags
86
- answer = answer.replace("<think>", "").replace("</think>", "")
87
- except (ValueError, IndexError):
88
- answer = tokenizer.decode(output_token_ids, skip_special_tokens=True).strip()
89
- answer = answer.replace("<think>", "").replace("</think>", "")
90
-
91
- return thinking, answer
92
-
93
- # -------------------------------------------------
94
- # Gradio UI Logic
95
- # -------------------------------------------------
96
-
97
- # Custom CSS for better styling
98
- custom_css = """
99
- .model-info {
100
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
101
- padding: 1rem;
102
- border-radius: 10px;
103
- margin-bottom: 1rem;
104
- color: white;
105
- }
106
- .model-info a {
107
- color: #fff;
108
- text-decoration: underline;
109
- font-weight: bold;
110
- }
111
- .cta-section {
112
- background: #f0f0f0;
113
- padding: 1rem;
114
- border-radius: 10px;
115
- margin-bottom: 1rem;
116
- text-align: center;
117
- }
118
- .cta-section a {
119
- display: inline-block;
120
- margin: 0 0.5rem;
121
- padding: 0.5rem 1rem;
122
- background: #ff6b6b;
123
- color: white;
124
- text-decoration: none;
125
- border-radius: 5px;
126
- transition: background 0.3s;
127
- }
128
- .cta-section a:hover {
129
- background: #ff5252;
130
- }
131
- """
132
-
133
- with gr.Blocks(theme=gr.themes.Soft(), fill_height=True, css=custom_css) as demo:
134
- # Separate states for display and model context
135
- display_history_state = gr.State([]) # For Gradio chatbot display
136
- model_history_state = gr.State([]) # Clean history for model
137
- is_generating_state = gr.State(False) # To prevent multiple submissions
138
-
139
- # Model info and CTA section
140
- gr.HTML("""
141
- <div class="model-info">
142
- <h1 style="margin: 0; font-size: 2em;">🎨 Art-0 8B Thinking Chatbot</h1>
143
- <p style="margin: 0.5rem 0;">
144
- Powered by <a href="https://huggingface.co/gr0010/Art-0-8B-development" target="_blank">Art-0-8B-development</a>
145
- - A fine-tuned Qwen3-8B model with advanced reasoning capabilities
146
- </p>
147
- </div>
148
-
149
- <div class="cta-section">
150
- <strong>💡 Enjoying this model?</strong>
151
- <a href="https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME" target="_blank">⭐ Like this Space</a>
152
- <a href="https://huggingface.co/gr0010/Art-0-8B-development/discussions" target="_blank">💬 Leave Feedback</a>
153
- <a href="https://huggingface.co/gr0010" target="_blank">👤 Follow AGI-0</a>
154
- </div>
155
- """)
156
-
157
- gr.Markdown(
158
- """
159
- Chat with Art-0-8B, featuring transparent reasoning display and custom personality instructions.
160
- The model shows its internal thought process when solving problems.
161
- """
162
- )
163
-
164
- # System prompt at the top (main feature)
165
- with gr.Group():
166
- gr.Markdown("### 🎭 System Prompt (Personality & Behavior)")
167
- system_prompt = gr.Textbox(
168
- value="""Personality Instructions:
169
- You are an AI assistant named Art developed by AGI-0.
170
- Reasoning Instructions:
171
- Think using bullet points and short sentences to simulate thoughts and emoticons to simulate emotions""",
172
- label="System Prompt",
173
- info="Define the model's personality and reasoning style",
174
- lines=5,
175
- interactive=True
176
- )
177
-
178
- # Main chat interface
179
- chatbot = gr.Chatbot(
180
- label="Conversation",
181
- elem_id="chatbot",
182
- bubble_full_width=False,
183
- height=500,
184
- show_copy_button=True,
185
- type="messages"
186
- )
187
-
188
- with gr.Row():
189
- user_input = gr.Textbox(
190
- show_label=False,
191
- placeholder="Type your message here...",
192
- scale=4,
193
- container=False,
194
- interactive=True
195
- )
196
- submit_btn = gr.Button(
197
- "Send",
198
- variant="primary",
199
- scale=1,
200
- interactive=True
201
- )
202
-
203
- with gr.Row():
204
- clear_btn = gr.Button("🗑️ Clear History", variant="secondary")
205
- retry_btn = gr.Button("🔄 Retry Last", variant="secondary")
206
-
207
- # Example prompts
208
- gr.Examples(
209
- examples=[
210
- ["Give me a short introduction to large language models."],
211
- ["What are the benefits of using transformers in AI?"],
212
- ["There are 5 birds on a branch. A hunter shoots one. How many birds are left?"],
213
- ["Explain quantum computing step by step."],
214
- ["Write a Python function to calculate the factorial of a number."],
215
- ["What makes Art-0 different from other AI models?"],
216
- ],
217
- inputs=user_input,
218
- label="💡 Example Prompts"
219
- )
220
-
221
- # Advanced settings at the bottom
222
- with gr.Accordion("⚙️ Advanced Generation Settings", open=False):
223
- with gr.Row():
224
- temperature = gr.Slider(
225
- minimum=0.1,
226
- maximum=2.0,
227
- value=0.6,
228
- step=0.1,
229
- label="Temperature",
230
- info="Controls randomness (higher = more creative)"
231
- )
232
- top_p = gr.Slider(
233
- minimum=0.1,
234
- maximum=1.0,
235
- value=0.95,
236
- step=0.05,
237
- label="Top-p",
238
- info="Nucleus sampling threshold"
239
- )
240
- with gr.Row():
241
- top_k = gr.Slider(
242
- minimum=1,
243
- maximum=100,
244
- value=20,
245
- step=1,
246
- label="Top-k",
247
- info="Number of top tokens to consider"
248
- )
249
- min_p = gr.Slider(
250
- minimum=0.0,
251
- maximum=1.0,
252
- value=0.0,
253
- step=0.01,
254
- label="Min-p",
255
- info="Minimum probability threshold for token sampling"
256
- )
257
- with gr.Row():
258
- max_new_tokens = gr.Slider(
259
- minimum=128,
260
- maximum=32768,
261
- value=32768,
262
- step=128,
263
- label="Max New Tokens",
264
- info="Maximum response length"
265
- )
266
-
267
- def handle_user_message(user_message: str, display_history: list, model_history: list,
268
- system_prompt_text: str, is_generating: bool,
269
- temp: float, top_p_val: float, top_k_val: int,
270
- min_p_val: float, max_tokens: int):
271
- """
272
- Handles user input, updates histories, and generates the model's response.
273
- """
274
- # Prevent multiple submissions
275
- if is_generating or not user_message.strip():
276
- return {
277
- chatbot: display_history,
278
- display_history_state: display_history,
279
- model_history_state: model_history,
280
- is_generating_state: is_generating,
281
- user_input: user_message,
282
- submit_btn: gr.update(interactive=not is_generating)
283
- }
284
-
285
- # Set generating state
286
- is_generating = True
287
-
288
- # Update model history (clean format for model)
289
- model_history.append({"role": "user", "content": user_message.strip()})
290
-
291
- # Update display history (for Gradio chatbot)
292
- display_history.append([user_message.strip(), None])
293
-
294
- # Yield intermediate state to show user message and disable input
295
- yield {
296
- chatbot: display_history,
297
- display_history_state: display_history,
298
- model_history_state: model_history,
299
- is_generating_state: is_generating,
300
- user_input: "",
301
- submit_btn: gr.update(interactive=False, value="🔄 Generating...")
302
- }
303
-
304
- # Prepare messages for model (include system prompt)
305
- messages_for_model = []
306
- if system_prompt_text.strip():
307
- messages_for_model.append({"role": "system", "content": system_prompt_text.strip()})
308
- messages_for_model.extend(model_history)
309
-
310
- try:
311
- # Generate response with hyperparameters
312
- thinking, answer = generate_and_parse(
313
- messages_for_model,
314
- temperature=temp,
315
- top_p=top_p_val,
316
- top_k=top_k_val,
317
- min_p=min_p_val,
318
- max_new_tokens=max_tokens
319
- )
320
-
321
- # Format response for display
322
- if thinking and thinking.strip():
323
- formatted_response = f"""<details>
324
- <summary><b>🤔 Show Reasoning Process</b></summary>
325
-
326
- ```
327
- {thinking}
328
- ```
329
-
330
  </details>
331
-
332
  {answer}"""
333
  else:
334
  formatted_response = answer
@@ -368,33 +38,44 @@ Think using bullet points and short sentences to simulate thoughts and emoticons
368
  submit_btn: gr.update(interactive=True, value="Send")
369
  }
370
 
 
371
  def retry_last(display_history: list, model_history: list, system_prompt_text: str,
372
- temp: float, top_p_val: float, top_k_val: int,
373
  min_p_val: float, max_tokens: int):
374
- """Retry the last user message"""
375
- if not model_history or len(model_history) < 2:
376
- return {
 
 
 
 
377
  chatbot: display_history,
378
  display_history_state: display_history,
379
  model_history_state: model_history,
380
  is_generating_state: False
381
  }
 
 
 
 
 
 
 
 
 
382
 
383
- # Remove last assistant message
384
- if model_history[-1]["role"] == "assistant":
385
- model_history = model_history[:-1]
386
- display_history = display_history[:-1]
387
-
388
- # Get last user message
389
- last_user_msg = model_history[-1]["content"]
390
- model_history = model_history[:-1]
391
-
392
- # Regenerate
393
- return handle_user_message(
394
- last_user_msg, display_history[:-1], model_history,
395
  system_prompt_text, False, temp, top_p_val, top_k_val, min_p_val, max_tokens
396
  )
397
 
 
398
  def on_input_change(text, is_generating):
399
  """Handle input text changes"""
400
  return gr.update(interactive=not is_generating and bool(text.strip()))
@@ -402,9 +83,9 @@ Think using bullet points and short sentences to simulate thoughts and emoticons
402
  # Event listeners
403
  submit_event = submit_btn.click(
404
  handle_user_message,
405
- inputs=[user_input, display_history_state, model_history_state, system_prompt,
406
  is_generating_state, temperature, top_p, top_k, min_p, max_new_tokens],
407
- outputs=[chatbot, display_history_state, model_history_state, is_generating_state,
408
  user_input, submit_btn],
409
  show_progress=True
410
  )
@@ -421,7 +102,7 @@ Think using bullet points and short sentences to simulate thoughts and emoticons
421
  # Clear button event
422
  clear_btn.click(
423
  clear_history,
424
- outputs=[chatbot, display_history_state, model_history_state, is_generating_state,
425
  user_input, submit_btn]
426
  )
427
 
@@ -441,16 +122,5 @@ Think using bullet points and short sentences to simulate thoughts and emoticons
441
  outputs=[submit_btn]
442
  )
443
 
444
- # Footer
445
- gr.HTML("""
446
- <div style="text-align: center; margin-top: 2rem; padding: 1rem; background: #f8f9fa; border-radius: 10px;">
447
- <p style="margin: 0; color: #666;">
448
- 🚀 Powered by <strong>Zero GPU</strong> on Hugging Face Spaces |
449
- Built with ❤️ using Gradio |
450
- Model by <a href="https://huggingface.co/gr0010" target="_blank">AGI-0</a>
451
- </p>
452
- </div>
453
- """)
454
-
455
  if __name__ == "__main__":
456
  demo.launch(debug=True, share=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  </details>
 
2
  {answer}"""
3
  else:
4
  formatted_response = answer
 
38
  submit_btn: gr.update(interactive=True, value="Send")
39
  }
40
 
41
+ # --- CORRECTED FUNCTION ---
42
  def retry_last(display_history: list, model_history: list, system_prompt_text: str,
43
+ temp: float, top_p_val: float, top_k_val: int,
44
  min_p_val: float, max_tokens: int):
45
+ """
46
+ Retry the last user message with corrected history and generator handling.
47
+ """
48
+ # Safety check: ensure there is a history and the last message was from the assistant
49
+ if not model_history or model_history[-1]["role"] != "assistant":
50
+ # If nothing to retry, yield the current state and stop
51
+ yield {
52
  chatbot: display_history,
53
  display_history_state: display_history,
54
  model_history_state: model_history,
55
  is_generating_state: False
56
  }
57
+ return
58
+
59
+ # Correctly remove the last turn (assistant response + user query)
60
+ model_history.pop() # Remove assistant's message
61
+ display_history.pop() # Remove assistant's message from display
62
+
63
+ # Get the last user message to resubmit it, then remove it
64
+ last_user_entry = model_history.pop()
65
+ last_user_msg = last_user_entry["content"]
66
 
67
+ # We also pop the user message from the display history because
68
+ # handle_user_message will add it back.
69
+ if display_history:
70
+ display_history.pop()
71
+
72
+ # Use 'yield from' to properly call the generator and pass its updates
73
+ yield from handle_user_message(
74
+ last_user_msg, display_history, model_history,
 
 
 
 
75
  system_prompt_text, False, temp, top_p_val, top_k_val, min_p_val, max_tokens
76
  )
77
 
78
+
79
  def on_input_change(text, is_generating):
80
  """Handle input text changes"""
81
  return gr.update(interactive=not is_generating and bool(text.strip()))
 
83
  # Event listeners
84
  submit_event = submit_btn.click(
85
  handle_user_message,
86
+ inputs=[user_input, display_history_state, model_history_state, system_prompt,
87
  is_generating_state, temperature, top_p, top_k, min_p, max_new_tokens],
88
+ outputs=[chatbot, display_history_state, model_history_state, is_generating_state,
89
  user_input, submit_btn],
90
  show_progress=True
91
  )
 
102
  # Clear button event
103
  clear_btn.click(
104
  clear_history,
105
+ outputs=[chatbot, display_history_state, model_history_state, is_generating_state,
106
  user_input, submit_btn]
107
  )
108
 
 
122
  outputs=[submit_btn]
123
  )
124
 
 
 
 
 
 
 
 
 
 
 
 
125
  if __name__ == "__main__":
126
  demo.launch(debug=True, share=False)