Spaces:

remyxai
/

VQASynth

Running on Zero

App Files Files Community

salma-remyx commited on Nov 14, 2024

Commit

5b7dfe2

1 Parent(s): 5614004

remove global

Browse files

Files changed (1) hide show

app.py +25 -19

app.py CHANGED Viewed

@@ -25,6 +25,27 @@ except OSError:
     download("en_core_web_sm")
     nlp = spacy.load("en_core_web_sm")
 def find_subject(doc):
     for token in doc:
         # Check if the token is a subject
@@ -52,30 +73,20 @@ def caption_refiner(caption):
 @spaces.GPU
 def sam2(image, input_boxes, model_id="facebook/sam-vit-base"):
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    model = SamModel.from_pretrained(model_id).to(device)
-    processor = SamProcessor.from_pretrained(model_id)
-    inputs = processor(image, input_boxes=[[input_boxes]], return_tensors="pt").to(device)
     with torch.no_grad():
-        outputs = model(**inputs)
-    masks = processor.image_processor.post_process_masks(
         outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()
     )
     return masks
-@spaces.GPU
-def load_florence2(model_id="microsoft/Florence-2-base-ft", device='cuda'):
-    torch_dtype = torch.float16 if device == 'cuda' else torch.float32
-    florence_model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch_dtype, trust_remote_code=True).to(device)
-    florence_processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
-    return florence_model, florence_processor
 @spaces.GPU
 def florence2(image, prompt="", task="<OD>"):
-    device = florence_model.device
     torch_dtype = florence_model.dtype
-    inputs = florence_processor(text=task + prompt, images=image, return_tensors="pt").to(device, torch_dtype)
     generated_ids = florence_model.generate(
         input_ids=inputs["input_ids"],
         pixel_values=inputs["pixel_values"],
@@ -467,10 +478,5 @@ def build_demo():
     return demo
 if __name__ == "__main__":
-    global model, transform, florence_model, florence_processor
-    model, transform = depth_pro.create_model_and_transforms(device='cuda')
-    florence_model, florence_processor = load_florence2(device='cuda')
     demo = build_demo()
     demo.launch(share=True)

     download("en_core_web_sm")
     nlp = spacy.load("en_core_web_sm")
+# Load Florence and SAM models once at the top for reuse
+DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
+def load_florence2(model_id="microsoft/Florence-2-base-ft", device=DEVICE):
+    torch_dtype = torch.float16 if device == 'cuda' else torch.float32
+    florence_model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch_dtype, trust_remote_code=True).to(device)
+    florence_processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
+    return florence_model, florence_processor
+florence_model, florence_processor = load_florence2()  # Loaded globally for reuse
+def load_sam_model(model_id="facebook/sam-vit-base", device=DEVICE):
+    sam_model = SamModel.from_pretrained(model_id).to(device)
+    sam_processor = SamProcessor.from_pretrained(model_id)
+    return sam_model, sam_processor
+sam_model, sam_processor = load_sam_model()  # Loaded globally for reuse
+# Depth model, transform, and other assets
+model, transform = depth_pro.create_model_and_transforms(device=DEVICE)
 def find_subject(doc):
     for token in doc:
         # Check if the token is a subject
 @spaces.GPU
 def sam2(image, input_boxes, model_id="facebook/sam-vit-base"):
+    inputs = sam_processor(image, input_boxes=[[input_boxes]], return_tensors="pt").to(DEVICE)
     with torch.no_grad():
+        outputs = sam_model(**inputs)
+    masks = sam_processor.image_processor.post_process_masks(
         outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()
     )
     return masks
 @spaces.GPU
 def florence2(image, prompt="", task="<OD>"):
     torch_dtype = florence_model.dtype
+    inputs = florence_processor(text=task + prompt, images=image, return_tensors="pt").to(DEVICE, torch_dtype)
     generated_ids = florence_model.generate(
         input_ids=inputs["input_ids"],
         pixel_values=inputs["pixel_values"],
     return demo
 if __name__ == "__main__":
     demo = build_demo()
     demo.launch(share=True)