image-matching-webui

Running

App Files Files Community

Vincentqyw commited on Jul 24, 2023

Commit

f517bbf

1 Parent(s): 8af5ecd

fix: roma cpu

Browse files

Files changed (5) hide show

app.py +1 -2
third_party/Roma/roma/models/encoders.py +4 -2
third_party/Roma/roma/models/matcher.py +9 -7
third_party/Roma/roma/models/transformer/__init__.py +3 -1
third_party/Roma/roma/utils/local_correlation.py +6 -4

app.py CHANGED Viewed

@@ -86,8 +86,7 @@ def ui_reset_state(
 def run(config):
-    with gr.Blocks(css="footer {visibility: hidden}"
-    ) as app:
         gr.Markdown(
             """
             <p align="center">

 def run(config):
+    with gr.Blocks(css="footer {visibility: hidden}") as app:
         gr.Markdown(
             """
             <p align="center">

third_party/Roma/roma/models/encoders.py CHANGED Viewed

@@ -6,6 +6,8 @@ import torch.nn.functional as F
 import torchvision.models as tvm
 import gc
 class ResNet50(nn.Module):
     def __init__(
@@ -47,7 +49,7 @@ class ResNet50(nn.Module):
             self.amp_dtype = torch.float32
     def forward(self, x, **kwargs):
-        with torch.autocast("cuda", enabled=self.amp, dtype=self.amp_dtype):
             net = self.net
             feats = {1: x}
             x = net.conv1(x)
@@ -90,7 +92,7 @@ class VGG19(nn.Module):
             self.amp_dtype = torch.float32
     def forward(self, x, **kwargs):
-        with torch.autocast("cuda", enabled=self.amp, dtype=self.amp_dtype):
             feats = {}
             scale = 1
             for layer in self.layers:

 import torchvision.models as tvm
 import gc
+device = "cuda" if torch.cuda.is_available() else "cpu"
 class ResNet50(nn.Module):
     def __init__(
             self.amp_dtype = torch.float32
     def forward(self, x, **kwargs):
+        with torch.autocast(device, enabled=self.amp, dtype=self.amp_dtype):
             net = self.net
             feats = {1: x}
             x = net.conv1(x)
             self.amp_dtype = torch.float32
     def forward(self, x, **kwargs):
+        with torch.autocast(device, enabled=self.amp, dtype=self.amp_dtype):
             feats = {}
             scale = 1
             for layer in self.layers:

third_party/Roma/roma/models/matcher.py CHANGED Viewed

@@ -14,6 +14,8 @@ from roma.utils.local_correlation import local_correlation
 from roma.utils.utils import cls_to_flow_refine
 from roma.utils.kde import kde
 class ConvRefiner(nn.Module):
     def __init__(
@@ -118,7 +120,7 @@ class ConvRefiner(nn.Module):
     def forward(self, x, y, flow, scale_factor=1, logits=None):
         b, c, hs, ws = x.shape
-        with torch.autocast("cuda", enabled=self.amp, dtype=self.amp_dtype):
             with torch.no_grad():
                 x_hat = F.grid_sample(
                     y,
@@ -129,8 +131,8 @@ class ConvRefiner(nn.Module):
             if self.has_displacement_emb:
                 im_A_coords = torch.meshgrid(
                     (
-                        torch.linspace(-1 + 1 / hs, 1 - 1 / hs, hs, device="cuda"),
-                        torch.linspace(-1 + 1 / ws, 1 - 1 / ws, ws, device="cuda"),
                     )
                 )
                 im_A_coords = torch.stack((im_A_coords[1], im_A_coords[0]))
@@ -423,7 +425,7 @@ class Decoder(nn.Module):
             corresps[ins] = {}
             f1_s, f2_s = f1[ins], f2[ins]
             if new_scale in self.proj:
-                with torch.autocast("cuda", self.amp_dtype):
                     f1_s, f2_s = self.proj[new_scale](f1_s), self.proj[new_scale](f2_s)
             if ins in coarse_scales:
@@ -643,7 +645,7 @@ class RegressionMatcher(nn.Module):
         device=None,
     ):
         if device is None:
-            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         from PIL import Image
         if isinstance(im_A_path, (str, os.PathLike)):
@@ -739,8 +741,8 @@ class RegressionMatcher(nn.Module):
             # Create im_A meshgrid
             im_A_coords = torch.meshgrid(
                 (
-                    torch.linspace(-1 + 1 / hs, 1 - 1 / hs, hs, device="cuda"),
-                    torch.linspace(-1 + 1 / ws, 1 - 1 / ws, ws, device="cuda"),
                 )
             )
             im_A_coords = torch.stack((im_A_coords[1], im_A_coords[0]))

 from roma.utils.utils import cls_to_flow_refine
 from roma.utils.kde import kde
+device = "cuda" if torch.cuda.is_available() else "cpu"
 class ConvRefiner(nn.Module):
     def __init__(
     def forward(self, x, y, flow, scale_factor=1, logits=None):
         b, c, hs, ws = x.shape
+        with torch.autocast(device, enabled=self.amp, dtype=self.amp_dtype):
             with torch.no_grad():
                 x_hat = F.grid_sample(
                     y,
             if self.has_displacement_emb:
                 im_A_coords = torch.meshgrid(
                     (
+                        torch.linspace(-1 + 1 / hs, 1 - 1 / hs, hs, device=device),
+                        torch.linspace(-1 + 1 / ws, 1 - 1 / ws, ws, device=device),
                     )
                 )
                 im_A_coords = torch.stack((im_A_coords[1], im_A_coords[0]))
             corresps[ins] = {}
             f1_s, f2_s = f1[ins], f2[ins]
             if new_scale in self.proj:
+                with torch.autocast(device, self.amp_dtype):
                     f1_s, f2_s = self.proj[new_scale](f1_s), self.proj[new_scale](f2_s)
             if ins in coarse_scales:
         device=None,
     ):
         if device is None:
+            device = torch.device(device if torch.cuda.is_available() else "cpu")
         from PIL import Image
         if isinstance(im_A_path, (str, os.PathLike)):
             # Create im_A meshgrid
             im_A_coords = torch.meshgrid(
                 (
+                    torch.linspace(-1 + 1 / hs, 1 - 1 / hs, hs, device=device),
+                    torch.linspace(-1 + 1 / ws, 1 - 1 / ws, ws, device=device),
                 )
             )
             im_A_coords = torch.stack((im_A_coords[1], im_A_coords[0]))

third_party/Roma/roma/models/transformer/__init__.py CHANGED Viewed

@@ -7,6 +7,8 @@ from .layers.block import Block
 from .layers.attention import MemEffAttention
 from .dinov2 import vit_large
 class TransformerDecoder(nn.Module):
     def __init__(
@@ -51,7 +53,7 @@ class TransformerDecoder(nn.Module):
         return self._scales.copy()
     def forward(self, gp_posterior, features, old_stuff, new_scale):
-        with torch.autocast("cuda", dtype=self.amp_dtype, enabled=self.amp):
             B, C, H, W = gp_posterior.shape
             x = torch.cat((gp_posterior, features), dim=1)
             B, C, H, W = x.shape

 from .layers.attention import MemEffAttention
 from .dinov2 import vit_large
+device = "cuda" if torch.cuda.is_available() else "cpu"
 class TransformerDecoder(nn.Module):
     def __init__(
         return self._scales.copy()
     def forward(self, gp_posterior, features, old_stuff, new_scale):
+        with torch.autocast(device, dtype=self.amp_dtype, enabled=self.amp):
             B, C, H, W = gp_posterior.shape
             x = torch.cat((gp_posterior, features), dim=1)
             B, C, H, W = x.shape

third_party/Roma/roma/utils/local_correlation.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import torch
 import torch.nn.functional as F
 def local_correlation(
     feature0,
@@ -20,8 +22,8 @@ def local_correlation(
         # If flow is None, assume feature0 and feature1 are aligned
         coords = torch.meshgrid(
             (
-                torch.linspace(-1 + 1 / h, 1 - 1 / h, h, device="cuda"),
-                torch.linspace(-1 + 1 / w, 1 - 1 / w, w, device="cuda"),
             )
         )
         coords = torch.stack((coords[1], coords[0]), dim=-1)[None].expand(B, h, w, 2)
@@ -30,10 +32,10 @@ def local_correlation(
     local_window = torch.meshgrid(
         (
             torch.linspace(
-                -2 * local_radius / h, 2 * local_radius / h, 2 * r + 1, device="cuda"
             ),
             torch.linspace(
-                -2 * local_radius / w, 2 * local_radius / w, 2 * r + 1, device="cuda"
             ),
         )
     )

 import torch
 import torch.nn.functional as F
+device = "cuda" if torch.cuda.is_available() else "cpu"
 def local_correlation(
     feature0,
         # If flow is None, assume feature0 and feature1 are aligned
         coords = torch.meshgrid(
             (
+                torch.linspace(-1 + 1 / h, 1 - 1 / h, h, device=device),
+                torch.linspace(-1 + 1 / w, 1 - 1 / w, w, device=device),
             )
         )
         coords = torch.stack((coords[1], coords[0]), dim=-1)[None].expand(B, h, w, 2)
     local_window = torch.meshgrid(
         (
             torch.linspace(
+                -2 * local_radius / h, 2 * local_radius / h, 2 * r + 1, device=device
             ),
             torch.linspace(
+                -2 * local_radius / w, 2 * local_radius / w, 2 * r + 1, device=device
             ),
         )
     )