Dorjzodovsuren commited on
Commit
5130789
·
verified ·
1 Parent(s): 59b2771

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -1
app.py CHANGED
@@ -9,6 +9,7 @@ Original file is located at
9
 
10
  import torch
11
  import gradio as gr
 
12
  from threading import Thread
13
  from unsloth import FastLanguageModel
14
  from transformers import TextStreamer
@@ -46,7 +47,7 @@ FastLanguageModel.for_inference(model)
46
  text_streamer = TextStreamer(tokenizer, skip_prompt=False,skip_special_tokens=True)
47
 
48
  # Get the device based on GPU availability
49
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
50
 
51
  # Move model into device
52
  model = model.to(device)
@@ -61,6 +62,7 @@ class StopOnTokens(StoppingCriteria):
61
 
62
  # Current implementation does not support conversation based on history.
63
  # Highly recommend to experiment on various hyper parameters to compare qualities.
 
64
  def predict(message, history):
65
  stop = StopOnTokens()
66
  messages = alpaca_prompt.format(
 
9
 
10
  import torch
11
  import gradio as gr
12
+ import spaces
13
  from threading import Thread
14
  from unsloth import FastLanguageModel
15
  from transformers import TextStreamer
 
47
  text_streamer = TextStreamer(tokenizer, skip_prompt=False,skip_special_tokens=True)
48
 
49
  # Get the device based on GPU availability
50
+ device = 'cuda'
51
 
52
  # Move model into device
53
  model = model.to(device)
 
62
 
63
  # Current implementation does not support conversation based on history.
64
  # Highly recommend to experiment on various hyper parameters to compare qualities.
65
+ @spaces.GPU(duration=gpu_timeout)
66
  def predict(message, history):
67
  stop = StopOnTokens()
68
  messages = alpaca_prompt.format(