Asilbek14 commited on
Commit
ce493a4
·
verified ·
1 Parent(s): 5cd9ed0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -9
app.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
  import gradio as gr
3
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
4
 
5
  # ---------------- CONFIG ----------------
6
- MODEL_NAME = "google/gemma-3-270m-it" # instruction-tuned Gemma 3 270M (for better instruction following)
7
  SYSTEM_PROMPT_DEFAULT = (
8
  "You are a formal and polite AI assistant. "
9
  "Always respond appropriately depending on the selected explanation style."
@@ -15,52 +15,69 @@ TOP_P_DEFAULT = 0.9
15
 
16
  # ---------------- LOAD MODEL ----------------
17
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
18
- model = AutoModelForSeq2SeqLM.from_pretrained(
 
19
  MODEL_NAME,
20
- torch_dtype=torch.float32 # safe for CPU
21
  )
22
 
23
  generator = pipeline(
24
- "text2text-generation",
25
  model=model,
26
  tokenizer=tokenizer,
27
- device=-1 # CPU
28
  )
29
 
30
  # ---------------- HELPERS ----------------
31
  def format_prompt(chat_history, user_message, system_message, response_style):
 
32
  prompt = system_message + "\n\n"
 
 
33
  for turn in chat_history:
34
  if turn["role"] == "user":
35
  prompt += f"{turn['content']}\n"
 
 
36
  prompt += f"{user_message}\n"
 
 
37
  if response_style == "No explanation":
38
  prompt += " Answer concisely with no explanation."
39
  elif response_style == "Short explanation":
40
  prompt += " Answer briefly with a one-sentence explanation."
41
  elif response_style == "Detailed explanation":
42
  prompt += " Answer in detail with reasoning and examples."
 
43
  return prompt
44
 
 
45
  # ---------------- CHAT FUNCTION ----------------
46
  def chat(user_message, chat_history, system_message, max_tokens, temperature, top_p, response_style):
47
  chat_history = chat_history or []
48
  prompt = format_prompt(chat_history, user_message, system_message, response_style)
 
49
  output = generator(
50
  prompt,
51
  max_new_tokens=max_tokens,
52
  do_sample=True,
53
  temperature=temperature,
54
  top_p=top_p,
55
- )[0]["generated_text"]
56
- response = output.replace(prompt, "").strip()
 
 
 
 
57
  chat_history.append({"role": "user", "content": user_message})
58
  chat_history.append({"role": "assistant", "content": response})
 
59
  return "", chat_history
60
 
 
61
  # ---------------- UI ----------------
62
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
63
- gr.Markdown("# 🧠 Gemma 3 270M Chat Assistant")
64
 
65
  chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True)
66
 
 
1
  import torch
2
  import gradio as gr
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
 
5
  # ---------------- CONFIG ----------------
6
+ MODEL_NAME = "google/gemma-3-270m-it" # instruction-tuned Gemma 3 model
7
  SYSTEM_PROMPT_DEFAULT = (
8
  "You are a formal and polite AI assistant. "
9
  "Always respond appropriately depending on the selected explanation style."
 
15
 
16
  # ---------------- LOAD MODEL ----------------
17
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
18
+
19
+ model = AutoModelForCausalLM.from_pretrained(
20
  MODEL_NAME,
21
+ torch_dtype=torch.float32, # safe for CPU
22
  )
23
 
24
  generator = pipeline(
25
+ "text-generation", # ✅ causal LM (not seq2seq)
26
  model=model,
27
  tokenizer=tokenizer,
28
+ device=-1 # ✅ force CPU
29
  )
30
 
31
  # ---------------- HELPERS ----------------
32
  def format_prompt(chat_history, user_message, system_message, response_style):
33
+ # Start with system message
34
  prompt = system_message + "\n\n"
35
+
36
+ # Add only user messages (optional: you can also add last assistant reply if needed)
37
  for turn in chat_history:
38
  if turn["role"] == "user":
39
  prompt += f"{turn['content']}\n"
40
+
41
+ # Add the new user message
42
  prompt += f"{user_message}\n"
43
+
44
+ # Optionally instruct for explanation style
45
  if response_style == "No explanation":
46
  prompt += " Answer concisely with no explanation."
47
  elif response_style == "Short explanation":
48
  prompt += " Answer briefly with a one-sentence explanation."
49
  elif response_style == "Detailed explanation":
50
  prompt += " Answer in detail with reasoning and examples."
51
+
52
  return prompt
53
 
54
+
55
  # ---------------- CHAT FUNCTION ----------------
56
  def chat(user_message, chat_history, system_message, max_tokens, temperature, top_p, response_style):
57
  chat_history = chat_history or []
58
  prompt = format_prompt(chat_history, user_message, system_message, response_style)
59
+
60
  output = generator(
61
  prompt,
62
  max_new_tokens=max_tokens,
63
  do_sample=True,
64
  temperature=temperature,
65
  top_p=top_p,
66
+ )[0]['generated_text']
67
+
68
+ # For causal LMs, output includes the prompt → strip it
69
+ response = output[len(prompt):].strip()
70
+
71
+ # Save user and assistant content without labels
72
  chat_history.append({"role": "user", "content": user_message})
73
  chat_history.append({"role": "assistant", "content": response})
74
+
75
  return "", chat_history
76
 
77
+
78
  # ---------------- UI ----------------
79
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
80
+ gr.Markdown("# 🧠 Gemma-3-270M Chat Assistant (CPU-safe)")
81
 
82
  chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True)
83