Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -5,12 +5,13 @@ from peft import PeftModel
|
|
5 |
import torch
|
6 |
|
7 |
# Load model and tokenizer once using caching
|
|
|
|
|
8 |
@st.cache_resource
|
9 |
def load_model():
|
10 |
base_model = AutoModelForCausalLM.from_pretrained("unsloth/Llama-3.2-1B-Instruct")
|
11 |
model = PeftModel.from_pretrained(base_model, "CallmeKaito/llama-3.2-1b-it-brainrot")
|
12 |
tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-1B-Instruct")
|
13 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
14 |
model = model.to(device)
|
15 |
return model, tokenizer
|
16 |
|
@@ -51,7 +52,7 @@ if prompt := st.chat_input("What's up?"):
|
|
51 |
)
|
52 |
|
53 |
# Tokenize and generate
|
54 |
-
inputs = tokenizer(chat_prompt, return_tensors="pt").to(
|
55 |
outputs = model.generate(
|
56 |
**inputs,
|
57 |
max_new_tokens=500,
|
|
|
5 |
import torch
|
6 |
|
7 |
# Load model and tokenizer once using caching
|
8 |
+
|
9 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
10 |
@st.cache_resource
|
11 |
def load_model():
|
12 |
base_model = AutoModelForCausalLM.from_pretrained("unsloth/Llama-3.2-1B-Instruct")
|
13 |
model = PeftModel.from_pretrained(base_model, "CallmeKaito/llama-3.2-1b-it-brainrot")
|
14 |
tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-1B-Instruct")
|
|
|
15 |
model = model.to(device)
|
16 |
return model, tokenizer
|
17 |
|
|
|
52 |
)
|
53 |
|
54 |
# Tokenize and generate
|
55 |
+
inputs = tokenizer(chat_prompt, return_tensors="pt").to(device)
|
56 |
outputs = model.generate(
|
57 |
**inputs,
|
58 |
max_new_tokens=500,
|