Spaces:

Asilbek14
/

zephyr-for-mobile

Running

Asilbek14 commited on Aug 21

Commit

8d81ea9

verified ·

1 Parent(s): a7d7df3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 # ---------------- CONFIG ----------------
-MODEL_NAME = "mosaicml/mpt-7b-instruct"  # Fully open-source, no gated access
 SYSTEM_PROMPT_DEFAULT = (
     "You are a formal and polite AI assistant. "
     "Always respond appropriately depending on the selected explanation style."
@@ -17,15 +18,15 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
-    trust_remote_code=True,  # required for MPT
-    torch_dtype="auto"
 )
 generator = pipeline(
     "text-generation",
     model=model,
     tokenizer=tokenizer,
-    device=0 if model.device.type == "cuda" else -1,
 )
 # ---------------- HELPERS ----------------
@@ -57,7 +58,6 @@ def chat(user_message, chat_history, system_message, max_tokens, temperature, to
         top_p=top_p,
     )[0]['generated_text']
-    # Remove prompt part from output
     response = output[len(prompt):].strip()
     chat_history.append({"role": "user", "content": user_message})
@@ -67,7 +67,7 @@ def chat(user_message, chat_history, system_message, max_tokens, temperature, to
 # ---------------- UI ----------------
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
-    gr.Markdown("# 🧠 MPT-7B Instruct Chat Assistant")
     chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True)

+import torch
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 # ---------------- CONFIG ----------------
+MODEL_NAME = "google/gemma-2b"   # much smaller, runs on CPU free tier
 SYSTEM_PROMPT_DEFAULT = (
     "You are a formal and polite AI assistant. "
     "Always respond appropriately depending on the selected explanation style."
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
+    trust_remote_code=True,
+    torch_dtype=torch.float32   # ✅ force CPU-safe precision
 )
 generator = pipeline(
     "text-generation",
     model=model,
     tokenizer=tokenizer,
+    device=-1   # ✅ always CPU for free hosting
 )
 # ---------------- HELPERS ----------------
         top_p=top_p,
     )[0]['generated_text']
     response = output[len(prompt):].strip()
     chat_history.append({"role": "user", "content": user_message})
 # ---------------- UI ----------------
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
+    gr.Markdown("# 🧠 Gemma-2B Chat Assistant (CPU-safe)")
     chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True)