Spaces:
Runtime error
Runtime error
Commit
·
9668a19
1
Parent(s):
deac2a1
support ZeroGPU, fixed
Browse files- app.py +0 -2
- llava/serve/model_worker.py +2 -0
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
+import spaces
|
2 |
import sys
|
3 |
import os
|
4 |
import argparse
|
@@ -25,7 +24,6 @@ def start_controller():
|
|
25 |
print(controller_command)
|
26 |
return subprocess.Popen(controller_command)
|
27 |
|
28 | |
29 |
def start_worker(model_path: str, bits=16):
|
30 |
print(f"Starting the model worker for the model {model_path}")
|
31 |
model_name = model_path.strip("/").split("/")[-1]
|
|
|
|
|
1 |
import sys
|
2 |
import os
|
3 |
import argparse
|
|
|
24 |
print(controller_command)
|
25 |
return subprocess.Popen(controller_command)
|
26 |
|
|
|
27 |
def start_worker(model_path: str, bits=16):
|
28 |
print(f"Starting the model worker for the model {model_path}")
|
29 |
model_name = model_path.strip("/").split("/")[-1]
|
llava/serve/model_worker.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
"""
|
2 |
A model worker executes the model.
|
3 |
"""
|
|
|
4 |
import argparse
|
5 |
import asyncio
|
6 |
import json
|
@@ -119,6 +120,7 @@ class ModelWorker:
|
|
119 |
"queue_length": self.get_queue_length(),
|
120 |
}
|
121 |
|
|
|
122 |
@torch.inference_mode()
|
123 |
def generate_stream(self, params):
|
124 |
tokenizer, model, image_processor = self.tokenizer, self.model, self.image_processor
|
|
|
1 |
"""
|
2 |
A model worker executes the model.
|
3 |
"""
|
4 |
+
import spaces
|
5 |
import argparse
|
6 |
import asyncio
|
7 |
import json
|
|
|
120 |
"queue_length": self.get_queue_length(),
|
121 |
}
|
122 |
|
123 |
+
@spaces.GPU
|
124 |
@torch.inference_mode()
|
125 |
def generate_stream(self, params):
|
126 |
tokenizer, model, image_processor = self.tokenizer, self.model, self.image_processor
|