CallmeKaito commited on
Commit
b14177c
·
verified ·
1 Parent(s): c3272a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -5,12 +5,13 @@ from peft import PeftModel
5
  import torch
6
 
7
  # Load model and tokenizer once using caching
 
 
8
  @st.cache_resource
9
  def load_model():
10
  base_model = AutoModelForCausalLM.from_pretrained("unsloth/Llama-3.2-1B-Instruct")
11
  model = PeftModel.from_pretrained(base_model, "CallmeKaito/llama-3.2-1b-it-brainrot")
12
  tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-1B-Instruct")
13
- device = "cuda" if torch.cuda.is_available() else "cpu"
14
  model = model.to(device)
15
  return model, tokenizer
16
 
@@ -51,7 +52,7 @@ if prompt := st.chat_input("What's up?"):
51
  )
52
 
53
  # Tokenize and generate
54
- inputs = tokenizer(chat_prompt, return_tensors="pt").to("cuda")
55
  outputs = model.generate(
56
  **inputs,
57
  max_new_tokens=500,
 
5
  import torch
6
 
7
  # Load model and tokenizer once using caching
8
+
9
+ device = "cuda" if torch.cuda.is_available() else "cpu"
10
  @st.cache_resource
11
  def load_model():
12
  base_model = AutoModelForCausalLM.from_pretrained("unsloth/Llama-3.2-1B-Instruct")
13
  model = PeftModel.from_pretrained(base_model, "CallmeKaito/llama-3.2-1b-it-brainrot")
14
  tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-1B-Instruct")
 
15
  model = model.to(device)
16
  return model, tokenizer
17
 
 
52
  )
53
 
54
  # Tokenize and generate
55
+ inputs = tokenizer(chat_prompt, return_tensors="pt").to(device)
56
  outputs = model.generate(
57
  **inputs,
58
  max_new_tokens=500,