Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import torch
|
2 |
import gradio as gr
|
3 |
-
from transformers import AutoTokenizer,
|
4 |
|
5 |
# ---------------- CONFIG ----------------
|
6 |
-
MODEL_NAME = "google/gemma-3-270m-it"
|
7 |
SYSTEM_PROMPT_DEFAULT = (
|
8 |
"You are a formal and polite AI assistant. "
|
9 |
"Always respond appropriately depending on the selected explanation style."
|
@@ -15,52 +15,69 @@ TOP_P_DEFAULT = 0.9
|
|
15 |
|
16 |
# ---------------- LOAD MODEL ----------------
|
17 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
18 |
-
|
|
|
19 |
MODEL_NAME,
|
20 |
-
torch_dtype=torch.float32
|
21 |
)
|
22 |
|
23 |
generator = pipeline(
|
24 |
-
"
|
25 |
model=model,
|
26 |
tokenizer=tokenizer,
|
27 |
-
device=-1
|
28 |
)
|
29 |
|
30 |
# ---------------- HELPERS ----------------
|
31 |
def format_prompt(chat_history, user_message, system_message, response_style):
|
|
|
32 |
prompt = system_message + "\n\n"
|
|
|
|
|
33 |
for turn in chat_history:
|
34 |
if turn["role"] == "user":
|
35 |
prompt += f"{turn['content']}\n"
|
|
|
|
|
36 |
prompt += f"{user_message}\n"
|
|
|
|
|
37 |
if response_style == "No explanation":
|
38 |
prompt += " Answer concisely with no explanation."
|
39 |
elif response_style == "Short explanation":
|
40 |
prompt += " Answer briefly with a one-sentence explanation."
|
41 |
elif response_style == "Detailed explanation":
|
42 |
prompt += " Answer in detail with reasoning and examples."
|
|
|
43 |
return prompt
|
44 |
|
|
|
45 |
# ---------------- CHAT FUNCTION ----------------
|
46 |
def chat(user_message, chat_history, system_message, max_tokens, temperature, top_p, response_style):
|
47 |
chat_history = chat_history or []
|
48 |
prompt = format_prompt(chat_history, user_message, system_message, response_style)
|
|
|
49 |
output = generator(
|
50 |
prompt,
|
51 |
max_new_tokens=max_tokens,
|
52 |
do_sample=True,
|
53 |
temperature=temperature,
|
54 |
top_p=top_p,
|
55 |
-
)[0][
|
56 |
-
|
|
|
|
|
|
|
|
|
57 |
chat_history.append({"role": "user", "content": user_message})
|
58 |
chat_history.append({"role": "assistant", "content": response})
|
|
|
59 |
return "", chat_history
|
60 |
|
|
|
61 |
# ---------------- UI ----------------
|
62 |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
|
63 |
-
gr.Markdown("# 🧠 Gemma
|
64 |
|
65 |
chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True)
|
66 |
|
|
|
1 |
import torch
|
2 |
import gradio as gr
|
3 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
4 |
|
5 |
# ---------------- CONFIG ----------------
|
6 |
+
MODEL_NAME = "google/gemma-3-270m-it" # ✅ instruction-tuned Gemma 3 model
|
7 |
SYSTEM_PROMPT_DEFAULT = (
|
8 |
"You are a formal and polite AI assistant. "
|
9 |
"Always respond appropriately depending on the selected explanation style."
|
|
|
15 |
|
16 |
# ---------------- LOAD MODEL ----------------
|
17 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
18 |
+
|
19 |
+
model = AutoModelForCausalLM.from_pretrained(
|
20 |
MODEL_NAME,
|
21 |
+
torch_dtype=torch.float32, # ✅ safe for CPU
|
22 |
)
|
23 |
|
24 |
generator = pipeline(
|
25 |
+
"text-generation", # ✅ causal LM (not seq2seq)
|
26 |
model=model,
|
27 |
tokenizer=tokenizer,
|
28 |
+
device=-1 # ✅ force CPU
|
29 |
)
|
30 |
|
31 |
# ---------------- HELPERS ----------------
|
32 |
def format_prompt(chat_history, user_message, system_message, response_style):
|
33 |
+
# Start with system message
|
34 |
prompt = system_message + "\n\n"
|
35 |
+
|
36 |
+
# Add only user messages (optional: you can also add last assistant reply if needed)
|
37 |
for turn in chat_history:
|
38 |
if turn["role"] == "user":
|
39 |
prompt += f"{turn['content']}\n"
|
40 |
+
|
41 |
+
# Add the new user message
|
42 |
prompt += f"{user_message}\n"
|
43 |
+
|
44 |
+
# Optionally instruct for explanation style
|
45 |
if response_style == "No explanation":
|
46 |
prompt += " Answer concisely with no explanation."
|
47 |
elif response_style == "Short explanation":
|
48 |
prompt += " Answer briefly with a one-sentence explanation."
|
49 |
elif response_style == "Detailed explanation":
|
50 |
prompt += " Answer in detail with reasoning and examples."
|
51 |
+
|
52 |
return prompt
|
53 |
|
54 |
+
|
55 |
# ---------------- CHAT FUNCTION ----------------
|
56 |
def chat(user_message, chat_history, system_message, max_tokens, temperature, top_p, response_style):
|
57 |
chat_history = chat_history or []
|
58 |
prompt = format_prompt(chat_history, user_message, system_message, response_style)
|
59 |
+
|
60 |
output = generator(
|
61 |
prompt,
|
62 |
max_new_tokens=max_tokens,
|
63 |
do_sample=True,
|
64 |
temperature=temperature,
|
65 |
top_p=top_p,
|
66 |
+
)[0]['generated_text']
|
67 |
+
|
68 |
+
# For causal LMs, output includes the prompt → strip it
|
69 |
+
response = output[len(prompt):].strip()
|
70 |
+
|
71 |
+
# Save user and assistant content without labels
|
72 |
chat_history.append({"role": "user", "content": user_message})
|
73 |
chat_history.append({"role": "assistant", "content": response})
|
74 |
+
|
75 |
return "", chat_history
|
76 |
|
77 |
+
|
78 |
# ---------------- UI ----------------
|
79 |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
|
80 |
+
gr.Markdown("# 🧠 Gemma-3-270M Chat Assistant (CPU-safe)")
|
81 |
|
82 |
chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True)
|
83 |
|