posterllava commited on
Commit
9668a19
·
1 Parent(s): deac2a1

support ZeroGPU, fixed

Browse files
Files changed (2) hide show
  1. app.py +0 -2
  2. llava/serve/model_worker.py +2 -0
app.py CHANGED
@@ -1,4 +1,3 @@
1
- +import spaces
2
  import sys
3
  import os
4
  import argparse
@@ -25,7 +24,6 @@ def start_controller():
25
  print(controller_command)
26
  return subprocess.Popen(controller_command)
27
 
28
29
  def start_worker(model_path: str, bits=16):
30
  print(f"Starting the model worker for the model {model_path}")
31
  model_name = model_path.strip("/").split("/")[-1]
 
 
1
  import sys
2
  import os
3
  import argparse
 
24
  print(controller_command)
25
  return subprocess.Popen(controller_command)
26
 
 
27
  def start_worker(model_path: str, bits=16):
28
  print(f"Starting the model worker for the model {model_path}")
29
  model_name = model_path.strip("/").split("/")[-1]
llava/serve/model_worker.py CHANGED
@@ -1,6 +1,7 @@
1
  """
2
  A model worker executes the model.
3
  """
 
4
  import argparse
5
  import asyncio
6
  import json
@@ -119,6 +120,7 @@ class ModelWorker:
119
  "queue_length": self.get_queue_length(),
120
  }
121
 
 
122
  @torch.inference_mode()
123
  def generate_stream(self, params):
124
  tokenizer, model, image_processor = self.tokenizer, self.model, self.image_processor
 
1
  """
2
  A model worker executes the model.
3
  """
4
+ import spaces
5
  import argparse
6
  import asyncio
7
  import json
 
120
  "queue_length": self.get_queue_length(),
121
  }
122
 
123
+ @spaces.GPU
124
  @torch.inference_mode()
125
  def generate_stream(self, params):
126
  tokenizer, model, image_processor = self.tokenizer, self.model, self.image_processor