Asilbek14 commited on
Commit
8d81ea9
·
verified ·
1 Parent(s): a7d7df3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -1,8 +1,9 @@
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
 
4
  # ---------------- CONFIG ----------------
5
- MODEL_NAME = "mosaicml/mpt-7b-instruct" # Fully open-source, no gated access
6
  SYSTEM_PROMPT_DEFAULT = (
7
  "You are a formal and polite AI assistant. "
8
  "Always respond appropriately depending on the selected explanation style."
@@ -17,15 +18,15 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
17
 
18
  model = AutoModelForCausalLM.from_pretrained(
19
  MODEL_NAME,
20
- trust_remote_code=True, # required for MPT
21
- torch_dtype="auto"
22
  )
23
 
24
  generator = pipeline(
25
  "text-generation",
26
  model=model,
27
  tokenizer=tokenizer,
28
- device=0 if model.device.type == "cuda" else -1,
29
  )
30
 
31
  # ---------------- HELPERS ----------------
@@ -57,7 +58,6 @@ def chat(user_message, chat_history, system_message, max_tokens, temperature, to
57
  top_p=top_p,
58
  )[0]['generated_text']
59
 
60
- # Remove prompt part from output
61
  response = output[len(prompt):].strip()
62
 
63
  chat_history.append({"role": "user", "content": user_message})
@@ -67,7 +67,7 @@ def chat(user_message, chat_history, system_message, max_tokens, temperature, to
67
 
68
  # ---------------- UI ----------------
69
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
70
- gr.Markdown("# 🧠 MPT-7B Instruct Chat Assistant")
71
 
72
  chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True)
73
 
 
1
+ import torch
2
  import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
 
5
  # ---------------- CONFIG ----------------
6
+ MODEL_NAME = "google/gemma-2b" # much smaller, runs on CPU free tier
7
  SYSTEM_PROMPT_DEFAULT = (
8
  "You are a formal and polite AI assistant. "
9
  "Always respond appropriately depending on the selected explanation style."
 
18
 
19
  model = AutoModelForCausalLM.from_pretrained(
20
  MODEL_NAME,
21
+ trust_remote_code=True,
22
+ torch_dtype=torch.float32 # ✅ force CPU-safe precision
23
  )
24
 
25
  generator = pipeline(
26
  "text-generation",
27
  model=model,
28
  tokenizer=tokenizer,
29
+ device=-1 # always CPU for free hosting
30
  )
31
 
32
  # ---------------- HELPERS ----------------
 
58
  top_p=top_p,
59
  )[0]['generated_text']
60
 
 
61
  response = output[len(prompt):].strip()
62
 
63
  chat_history.append({"role": "user", "content": user_message})
 
67
 
68
  # ---------------- UI ----------------
69
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
70
+ gr.Markdown("# 🧠 Gemma-2B Chat Assistant (CPU-safe)")
71
 
72
  chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True)
73