Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,9 @@
|
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
3 |
|
4 |
# ---------------- CONFIG ----------------
|
5 |
-
MODEL_NAME = "
|
6 |
SYSTEM_PROMPT_DEFAULT = (
|
7 |
"You are a formal and polite AI assistant. "
|
8 |
"Always respond appropriately depending on the selected explanation style."
|
@@ -17,15 +18,15 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
|
17 |
|
18 |
model = AutoModelForCausalLM.from_pretrained(
|
19 |
MODEL_NAME,
|
20 |
-
trust_remote_code=True,
|
21 |
-
torch_dtype=
|
22 |
)
|
23 |
|
24 |
generator = pipeline(
|
25 |
"text-generation",
|
26 |
model=model,
|
27 |
tokenizer=tokenizer,
|
28 |
-
device
|
29 |
)
|
30 |
|
31 |
# ---------------- HELPERS ----------------
|
@@ -57,7 +58,6 @@ def chat(user_message, chat_history, system_message, max_tokens, temperature, to
|
|
57 |
top_p=top_p,
|
58 |
)[0]['generated_text']
|
59 |
|
60 |
-
# Remove prompt part from output
|
61 |
response = output[len(prompt):].strip()
|
62 |
|
63 |
chat_history.append({"role": "user", "content": user_message})
|
@@ -67,7 +67,7 @@ def chat(user_message, chat_history, system_message, max_tokens, temperature, to
|
|
67 |
|
68 |
# ---------------- UI ----------------
|
69 |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
|
70 |
-
gr.Markdown("# 🧠
|
71 |
|
72 |
chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True)
|
73 |
|
|
|
1 |
+
import torch
|
2 |
import gradio as gr
|
3 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
4 |
|
5 |
# ---------------- CONFIG ----------------
|
6 |
+
MODEL_NAME = "google/gemma-2b" # much smaller, runs on CPU free tier
|
7 |
SYSTEM_PROMPT_DEFAULT = (
|
8 |
"You are a formal and polite AI assistant. "
|
9 |
"Always respond appropriately depending on the selected explanation style."
|
|
|
18 |
|
19 |
model = AutoModelForCausalLM.from_pretrained(
|
20 |
MODEL_NAME,
|
21 |
+
trust_remote_code=True,
|
22 |
+
torch_dtype=torch.float32 # ✅ force CPU-safe precision
|
23 |
)
|
24 |
|
25 |
generator = pipeline(
|
26 |
"text-generation",
|
27 |
model=model,
|
28 |
tokenizer=tokenizer,
|
29 |
+
device=-1 # ✅ always CPU for free hosting
|
30 |
)
|
31 |
|
32 |
# ---------------- HELPERS ----------------
|
|
|
58 |
top_p=top_p,
|
59 |
)[0]['generated_text']
|
60 |
|
|
|
61 |
response = output[len(prompt):].strip()
|
62 |
|
63 |
chat_history.append({"role": "user", "content": user_message})
|
|
|
67 |
|
68 |
# ---------------- UI ----------------
|
69 |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
|
70 |
+
gr.Markdown("# 🧠 Gemma-2B Chat Assistant (CPU-safe)")
|
71 |
|
72 |
chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True)
|
73 |
|