Spaces:
Sleeping
Sleeping
ee
Browse files
app.py
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
import gradio as gr
|
|
|
|
|
2 |
# from huggingface_hub import InferenceClient
|
3 |
from transformers import pipeline
|
4 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
@@ -8,8 +10,9 @@ HF_TOKEN = os.getenv('HF_TOKEN')
|
|
8 |
checkpoint = "zidsi/SLlamica_PT4SFT_v2"
|
9 |
device = "cuda" # "cuda" or "cpu"
|
10 |
tokenizer = AutoTokenizer.from_pretrained(checkpoint,token=HF_TOKEN)
|
11 |
-
model = AutoModelForCausalLM.from_pretrained(checkpoint,token=HF_TOKEN)
|
12 |
-
|
|
|
13 |
def predict(message, history):
|
14 |
history.append({"role": "user", "content": message})
|
15 |
input_text = tokenizer.apply_chat_template(history, tokenize=False)
|
|
|
1 |
import gradio as gr
|
2 |
+
import spaces
|
3 |
+
|
4 |
# from huggingface_hub import InferenceClient
|
5 |
from transformers import pipeline
|
6 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
10 |
checkpoint = "zidsi/SLlamica_PT4SFT_v2"
|
11 |
device = "cuda" # "cuda" or "cpu"
|
12 |
tokenizer = AutoTokenizer.from_pretrained(checkpoint,token=HF_TOKEN)
|
13 |
+
model = AutoModelForCausalLM.from_pretrained(checkpoint,token=HF_TOKEN)
|
14 |
+
model.to(device)
|
15 |
+
@spaces.GPU
|
16 |
def predict(message, history):
|
17 |
history.append({"role": "user", "content": message})
|
18 |
input_text = tokenizer.apply_chat_template(history, tokenize=False)
|