Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,6 +9,7 @@ Original file is located at
|
|
| 9 |
|
| 10 |
import torch
|
| 11 |
import gradio as gr
|
|
|
|
| 12 |
from threading import Thread
|
| 13 |
from unsloth import FastLanguageModel
|
| 14 |
from transformers import TextStreamer
|
|
@@ -46,7 +47,7 @@ FastLanguageModel.for_inference(model)
|
|
| 46 |
text_streamer = TextStreamer(tokenizer, skip_prompt=False,skip_special_tokens=True)
|
| 47 |
|
| 48 |
# Get the device based on GPU availability
|
| 49 |
-
device = 'cuda'
|
| 50 |
|
| 51 |
# Move model into device
|
| 52 |
model = model.to(device)
|
|
@@ -61,6 +62,7 @@ class StopOnTokens(StoppingCriteria):
|
|
| 61 |
|
| 62 |
# Current implementation does not support conversation based on history.
|
| 63 |
# Highly recommend to experiment on various hyper parameters to compare qualities.
|
|
|
|
| 64 |
def predict(message, history):
|
| 65 |
stop = StopOnTokens()
|
| 66 |
messages = alpaca_prompt.format(
|
|
|
|
| 9 |
|
| 10 |
import torch
|
| 11 |
import gradio as gr
|
| 12 |
+
import spaces
|
| 13 |
from threading import Thread
|
| 14 |
from unsloth import FastLanguageModel
|
| 15 |
from transformers import TextStreamer
|
|
|
|
| 47 |
text_streamer = TextStreamer(tokenizer, skip_prompt=False,skip_special_tokens=True)
|
| 48 |
|
| 49 |
# Get the device based on GPU availability
|
| 50 |
+
device = 'cuda'
|
| 51 |
|
| 52 |
# Move model into device
|
| 53 |
model = model.to(device)
|
|
|
|
| 62 |
|
| 63 |
# Current implementation does not support conversation based on history.
|
| 64 |
# Highly recommend to experiment on various hyper parameters to compare qualities.
|
| 65 |
+
@spaces.GPU(duration=gpu_timeout)
|
| 66 |
def predict(message, history):
|
| 67 |
stop = StopOnTokens()
|
| 68 |
messages = alpaca_prompt.format(
|