Spaces:

merve
/

DINOv3-keypoint-matching

Running on Zero

App Files Files Community

merve HF Staff commited on 4 days ago

Commit

a1b7c96

verified ·

1 Parent(s): 89718c6

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -29

app.py CHANGED Viewed

@@ -15,35 +15,27 @@ DINO_MODELS = {
     "DINOv3 Large ConvNeXT": "facebook/dinov3-convnext-large-pretrain-lvd1689m"
 }
-current_processor = None
-current_model = None
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 def load_model(model_name):
-    global current_processor, current_model
     model_path = DINO_MODELS[model_name]
-    try:
-        current_processor = AutoImageProcessor.from_pretrained(model_path)
-        current_model = AutoModel.from_pretrained(model_path)
-        current_model = current_model.to(DEVICE)
-        return f"✅ Model '{model_name}' loaded successfully!"
-    except Exception as e:
-        return f"❌ Error loading model '{model_name}': {str(e)}"
 @spaces.GPU()
-def extract_features(image):
-    original_size = image.size
-    inputs = current_processor(images=image, return_tensors="pt")
-    inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
-    model_size = current_processor.size['height']
     with torch.no_grad():
-        outputs = current_model(**inputs)
         features = outputs.last_hidden_state
     return features, original_size, model_size
@@ -63,14 +55,13 @@ def find_correspondences(features1, features2, threshold=0.8):
     max_sim1 = torch.max(similarity, dim=-1)[0]
     max_sim2 = torch.max(similarity, dim=-2)[0]
-    mutual_matches = matches2[0, matches1[0]] == torch.arange(N1).to(DEVICE)
     good_matches = (max_sim1[0] > threshold) & mutual_matches
-    return matches1[0][good_matches], torch.arange(N1).to(DEVICE)[good_matches], max_sim1[0][good_matches]
 def patch_to_image_coords(patch_idx, original_size, model_size, patch_size=14):
     orig_w, orig_h = original_size
     patches_h = model_size // patch_size
     patches_w = model_size // patch_size
@@ -90,14 +81,13 @@ def patch_to_image_coords(patch_idx, original_size, model_size, patch_size=14):
 def match_keypoints(image1, image2, model_name):
     if image1 is None or image2 is None:
-        return None, "Please upload both images"
     load_model(model_name)
     img1_pil = Image.fromarray(image1).convert('RGB')
     img2_pil = Image.fromarray(image2).convert('RGB')
     features1, original_size1, model_size1 = extract_features(img1_pil)
     features2, original_size2, model_size2 = extract_features(img2_pil)
@@ -170,7 +160,6 @@ with gr.Blocks(title="DINOv3 Keypoint Matching") as demo:
         with gr.Column(scale=2):
             output_image = gr.Image(label="Matched Keypoints")
-    # Connect model selector to status bar
     model_selector.change(
         fn=load_model,
         inputs=[model_selector],
@@ -189,4 +178,4 @@ with gr.Blocks(title="DINOv3 Keypoint Matching") as demo:
     )
 if __name__ == "__main__":
-    demo.launch()

     "DINOv3 Large ConvNeXT": "facebook/dinov3-convnext-large-pretrain-lvd1689m"
 }
 def load_model(model_name):
+    global processor, model
     model_path = DINO_MODELS[model_name]
+    processor = AutoImageProcessor.from_pretrained(model_path)
+    model = AutoModel.from_pretrained(model_path)
+    model = model.to(device)
+    return f"✅ Model '{model_name}' loaded successfully!"
+load_model("DINOv3 Base ViT")
 @spaces.GPU()
+def extract_features(image):
+    original_size = image.size
+    inputs = processor(images=image, return_tensors="pt")
+    inputs = {k: v.to(device) for k, v in inputs.items()}
+    model_size = processor.size['height']
     with torch.no_grad():
+        outputs = model(**inputs)
         features = outputs.last_hidden_state
     return features, original_size, model_size
     max_sim1 = torch.max(similarity, dim=-1)[0]
     max_sim2 = torch.max(similarity, dim=-2)[0]
+    mutual_matches = matches2[0, matches1[0]] == torch.arange(N1).to(device)
     good_matches = (max_sim1[0] > threshold) & mutual_matches
+    return matches1[0][good_matches], torch.arange(N1).to(device)[good_matches], max_sim1[0][good_matches]
 def patch_to_image_coords(patch_idx, original_size, model_size, patch_size=14):
     orig_w, orig_h = original_size
     patches_h = model_size // patch_size
     patches_w = model_size // patch_size
 def match_keypoints(image1, image2, model_name):
     if image1 is None or image2 is None:
+        return None
     load_model(model_name)
     img1_pil = Image.fromarray(image1).convert('RGB')
     img2_pil = Image.fromarray(image2).convert('RGB')
     features1, original_size1, model_size1 = extract_features(img1_pil)
     features2, original_size2, model_size2 = extract_features(img2_pil)
         with gr.Column(scale=2):
             output_image = gr.Image(label="Matched Keypoints")
     model_selector.change(
         fn=load_model,
         inputs=[model_selector],
     )
 if __name__ == "__main__":
+    demo.launch(share=True)