Spaces:

Asilbek14
/

zephyr-for-mobile

Running

App Files Files Community

Asilbek14 commited on 12 days ago

Commit

ce493a4

verified ·

1 Parent(s): 5cd9ed0

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -9

app.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 # ---------------- CONFIG ----------------
-MODEL_NAME = "google/gemma-3-270m-it"  # instruction-tuned Gemma 3 270M (for better instruction following)
 SYSTEM_PROMPT_DEFAULT = (
     "You are a formal and polite AI assistant. "
     "Always respond appropriately depending on the selected explanation style."
@@ -15,52 +15,69 @@ TOP_P_DEFAULT = 0.9
 # ---------------- LOAD MODEL ----------------
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-model = AutoModelForSeq2SeqLM.from_pretrained(
     MODEL_NAME,
-    torch_dtype=torch.float32  # safe for CPU
 )
 generator = pipeline(
-    "text2text-generation",
     model=model,
     tokenizer=tokenizer,
-    device=-1  # CPU
 )
 # ---------------- HELPERS ----------------
 def format_prompt(chat_history, user_message, system_message, response_style):
     prompt = system_message + "\n\n"
     for turn in chat_history:
         if turn["role"] == "user":
             prompt += f"{turn['content']}\n"
     prompt += f"{user_message}\n"
     if response_style == "No explanation":
         prompt += " Answer concisely with no explanation."
     elif response_style == "Short explanation":
         prompt += " Answer briefly with a one-sentence explanation."
     elif response_style == "Detailed explanation":
         prompt += " Answer in detail with reasoning and examples."
     return prompt
 # ---------------- CHAT FUNCTION ----------------
 def chat(user_message, chat_history, system_message, max_tokens, temperature, top_p, response_style):
     chat_history = chat_history or []
     prompt = format_prompt(chat_history, user_message, system_message, response_style)
     output = generator(
         prompt,
         max_new_tokens=max_tokens,
         do_sample=True,
         temperature=temperature,
         top_p=top_p,
-    )[0]["generated_text"]
-    response = output.replace(prompt, "").strip()
     chat_history.append({"role": "user", "content": user_message})
     chat_history.append({"role": "assistant", "content": response})
     return "", chat_history
 # ---------------- UI ----------------
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
-    gr.Markdown("# 🧠 Gemma 3 270M Chat Assistant")
     chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True)

 import torch
 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 # ---------------- CONFIG ----------------
+MODEL_NAME = "google/gemma-3-270m-it"   # ✅ instruction-tuned Gemma 3 model
 SYSTEM_PROMPT_DEFAULT = (
     "You are a formal and polite AI assistant. "
     "Always respond appropriately depending on the selected explanation style."
 # ---------------- LOAD MODEL ----------------
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
+    torch_dtype=torch.float32,   # ✅ safe for CPU
 )
 generator = pipeline(
+    "text-generation",   # ✅ causal LM (not seq2seq)
     model=model,
     tokenizer=tokenizer,
+    device=-1   # ✅ force CPU
 )
 # ---------------- HELPERS ----------------
 def format_prompt(chat_history, user_message, system_message, response_style):
+    # Start with system message
     prompt = system_message + "\n\n"
+    # Add only user messages (optional: you can also add last assistant reply if needed)
     for turn in chat_history:
         if turn["role"] == "user":
             prompt += f"{turn['content']}\n"
+    # Add the new user message
     prompt += f"{user_message}\n"
+    # Optionally instruct for explanation style
     if response_style == "No explanation":
         prompt += " Answer concisely with no explanation."
     elif response_style == "Short explanation":
         prompt += " Answer briefly with a one-sentence explanation."
     elif response_style == "Detailed explanation":
         prompt += " Answer in detail with reasoning and examples."
     return prompt
 # ---------------- CHAT FUNCTION ----------------
 def chat(user_message, chat_history, system_message, max_tokens, temperature, top_p, response_style):
     chat_history = chat_history or []
     prompt = format_prompt(chat_history, user_message, system_message, response_style)
     output = generator(
         prompt,
         max_new_tokens=max_tokens,
         do_sample=True,
         temperature=temperature,
         top_p=top_p,
+    )[0]['generated_text']
+    # For causal LMs, output includes the prompt → strip it
+    response = output[len(prompt):].strip()
+    # Save user and assistant content without labels
     chat_history.append({"role": "user", "content": user_message})
     chat_history.append({"role": "assistant", "content": response})
     return "", chat_history
 # ---------------- UI ----------------
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
+    gr.Markdown("# 🧠 Gemma-3-270M Chat Assistant (CPU-safe)")
     chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True)