Shilpaj commited on
Commit
f8ecba6
·
1 Parent(s): fdcadea

Refactor: Modifications for inference on GPU

Browse files
Files changed (3) hide show
  1. app.py +104 -27
  2. inference.py +78 -75
  3. requirements.txt +1 -0
app.py CHANGED
@@ -17,11 +17,16 @@ def load_model(model_path: str):
17
  """
18
  Load the model.
19
  """
20
- # Load the pre-trained ResNet50 model using the new weights parameter
 
 
 
 
21
  model = models.resnet50(weights=None)
 
22
 
23
- # Load custom weights from a .pth file with CPU mapping
24
- state_dict = torch.load(model_path)
25
 
26
  # Filter out unexpected keys
27
  filtered_state_dict = {k: v for k, v in state_dict['model_state_dict'].items() if k in model.state_dict()}
@@ -42,10 +47,35 @@ def load_classes():
42
  return classes
43
 
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  def main():
46
  """
47
  Main function for the application.
48
  """
 
 
49
  # Load the model at startup
50
  model = load_model("resnet50_imagenet1k.pth")
51
 
@@ -63,25 +93,51 @@ def main():
63
  gr.Markdown(
64
  """
65
  Visualize Class Activations Maps generated by the model's layer for the predicted class.
66
- This is used to see what the model is actually looking at in the image.
67
  """
68
  )
 
 
69
  with gr.Row():
70
- img_input = gr.Image(label="Input Image", type="numpy", height=224)
 
 
 
 
 
71
  with gr.Column():
72
  label_output = gr.Label(label="Predictions")
73
- gradcam_output = gr.Image(label="GradCAM Output", height=224)
 
 
 
 
74
 
75
  with gr.Row():
76
- alpha_slider = gr.Slider(0, 1, value=0.5, label="Activation Map Transparency")
77
- top_k_slider = gr.Slider(1, 10, value=3, step=1, label="Number of Top Predictions")
78
- target_layer_slider = gr.Slider(1, 6, value=4, step=1, label="Target Layer Number")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
  gradcam_button = gr.Button("Generate GradCAM")
81
 
82
- def inference_wrapper(image, alpha, top_k, target_layer):
83
- return inference(image, alpha, top_k, target_layer, model=model, classes=classes)
84
-
85
  gradcam_button.click(
86
  fn=inference_wrapper,
87
  inputs=[
@@ -90,30 +146,51 @@ def main():
90
  top_k_slider,
91
  target_layer_slider
92
  ],
93
- outputs=[label_output, gradcam_output]
 
 
 
94
  )
95
 
 
96
  gr.Examples(
97
  examples=[
98
- ["./assets/examples/dog.jpg", 0.5, 3, 4],
99
- ["./assets/examples/cat.jpg", 0.5, 3, 4],
100
- ["./assets/examples/frog.jpg", 0.5, 3, 4],
101
- ["./assets/examples/bird.jpg", 0.5, 3, 4],
102
- ["./assets/examples/shark-plane.jpg", 0.5, 3, 4],
103
- ["./assets/examples/car.jpg", 0.5, 3, 4],
104
- ["./assets/examples/truck.jpg", 0.5, 3, 4],
105
- ["./assets/examples/horse.jpg", 0.5, 3, 4],
106
- ["./assets/examples/plane.jpg", 0.5, 3, 4],
107
- ["./assets/examples/ship.png", 0.5, 3, 4]
 
 
 
 
 
 
 
 
 
 
108
  ],
109
- inputs=[img_input, alpha_slider, top_k_slider, target_layer_slider],
110
- outputs=[label_output, gradcam_output],
111
  fn=inference_wrapper,
112
- cache_examples=True
 
113
  )
114
 
115
  # Launch the demo
116
- demo.launch(server_name="0.0.0.0", debug=True)
 
 
 
 
 
 
 
 
117
 
118
 
119
  if __name__ == "__main__":
 
17
  """
18
  Load the model.
19
  """
20
+ # Check if CUDA is available and set device
21
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
22
+ print(f"Using device: {device}")
23
+
24
+ # Load the pre-trained ResNet50 model
25
  model = models.resnet50(weights=None)
26
+ model = model.to(device)
27
 
28
+ # Load custom weights from a .pth file
29
+ state_dict = torch.load(model_path, map_location=device)
30
 
31
  # Filter out unexpected keys
32
  filtered_state_dict = {k: v for k, v in state_dict['model_state_dict'].items() if k in model.state_dict()}
 
47
  return classes
48
 
49
 
50
+ def inference_wrapper(image, alpha, top_k, target_layer):
51
+ """
52
+ Wrapper function for inference with error handling
53
+ """
54
+ try:
55
+ if image is None:
56
+ return None, None
57
+
58
+ with torch.cuda.amp.autocast(): # Enable automatic mixed precision
59
+ with torch.no_grad(): # Disable gradient calculation
60
+ return inference(
61
+ image,
62
+ alpha,
63
+ top_k,
64
+ target_layer,
65
+ model=model,
66
+ classes=classes
67
+ )
68
+ except Exception as e:
69
+ print(f"Error in inference: {str(e)}")
70
+ return gr.Error(f"Error processing image: {str(e)}")
71
+
72
+
73
  def main():
74
  """
75
  Main function for the application.
76
  """
77
+ global model, classes # Make these global so they're accessible to inference_wrapper
78
+
79
  # Load the model at startup
80
  model = load_model("resnet50_imagenet1k.pth")
81
 
 
93
  gr.Markdown(
94
  """
95
  Visualize Class Activations Maps generated by the model's layer for the predicted class.
 
96
  """
97
  )
98
+
99
+ # Define inputs
100
  with gr.Row():
101
+ img_input = gr.Image(
102
+ label="Input Image",
103
+ type="numpy",
104
+ height=224,
105
+ width=224
106
+ )
107
  with gr.Column():
108
  label_output = gr.Label(label="Predictions")
109
+ gradcam_output = gr.Image(
110
+ label="GradCAM Output",
111
+ height=224,
112
+ width=224
113
+ )
114
 
115
  with gr.Row():
116
+ alpha_slider = gr.Slider(
117
+ minimum=0,
118
+ maximum=1,
119
+ value=0.5,
120
+ step=0.1,
121
+ label="Activation Map Transparency"
122
+ )
123
+ top_k_slider = gr.Slider(
124
+ minimum=1,
125
+ maximum=10,
126
+ value=3,
127
+ step=1,
128
+ label="Number of Top Predictions"
129
+ )
130
+ target_layer_slider = gr.Slider(
131
+ minimum=1,
132
+ maximum=6,
133
+ value=4,
134
+ step=1,
135
+ label="Target Layer Number"
136
+ )
137
 
138
  gradcam_button = gr.Button("Generate GradCAM")
139
 
140
+ # Set up the click event
 
 
141
  gradcam_button.click(
142
  fn=inference_wrapper,
143
  inputs=[
 
146
  top_k_slider,
147
  target_layer_slider
148
  ],
149
+ outputs=[
150
+ label_output,
151
+ gradcam_output
152
+ ]
153
  )
154
 
155
+ # Example section
156
  gr.Examples(
157
  examples=[
158
+ ["assets/examples/dog.jpg", 0.5, 3, 4],
159
+ ["assets/examples/cat.jpg", 0.5, 3, 4],
160
+ ["assets/examples/frog.jpg", 0.5, 3, 4],
161
+ ["assets/examples/bird.jpg", 0.5, 3, 4],
162
+ ["assets/examples/shark-plane.jpg", 0.5, 3, 4],
163
+ ["assets/examples/car.jpg", 0.5, 3, 4],
164
+ ["assets/examples/truck.jpg", 0.5, 3, 4],
165
+ ["assets/examples/horse.jpg", 0.5, 3, 4],
166
+ ["assets/examples/plane.jpg", 0.5, 3, 4],
167
+ ["assets/examples/ship.png", 0.5, 3, 4]
168
+ ],
169
+ inputs=[
170
+ img_input,
171
+ alpha_slider,
172
+ top_k_slider,
173
+ target_layer_slider
174
+ ],
175
+ outputs=[
176
+ label_output,
177
+ gradcam_output
178
  ],
 
 
179
  fn=inference_wrapper,
180
+ cache_examples=True,
181
+ label="Click on any example to run GradCAM"
182
  )
183
 
184
  # Launch the demo
185
+ demo.launch(
186
+ server_name="0.0.0.0",
187
+ server_port=7860,
188
+ share=False,
189
+ debug=True,
190
+ enable_queue=True,
191
+ show_error=True,
192
+ max_threads=4
193
+ )
194
 
195
 
196
  if __name__ == "__main__":
inference.py CHANGED
@@ -20,80 +20,83 @@ from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
20
  @spaces.GPU
21
  def inference(image, alpha, top_k, target_layer, model=None, classes=None):
22
  """
23
- Function to run inference on the input image
24
- :param image: Image provided by the user
25
- :param alpha: Percentage of cam overlap over the input image
26
- :param top_k: Number of top predictions for the input image
27
- :param target_layer: Layer for which GradCam to be shown
28
- :param model: Model to use for inference
29
- :param classes: Classes to use for inference
30
  """
31
- # Save a copy of input img
32
- org_img = image.copy()
33
-
34
- # Calculate mean over each channel of input image
35
- mean_r, mean_g, mean_b = np.mean(image[:, :, 0]/255.), np.mean(image[:, :, 1]/255.), np.mean(image[:, :, 2]/255.)
36
-
37
- # Calculate Standard deviation over each channel
38
- std_r, std_g, std_b = np.std(image[:, :, 0]/255.), np.std(image[:, :, 1]/255.), np.std(image[:, :, 2]/255.)
39
-
40
- # Convert img to tensor and normalize it
41
- _transform = transforms.Compose([
42
- transforms.ToTensor(),
43
- transforms.Normalize((mean_r, mean_g, mean_b), (std_r, std_g, std_b))
44
- ])
45
-
46
- # Preprocess the input image
47
- input_tensor = _transform(image)
48
-
49
- # Create a mini-batch as expected by the model
50
- input_tensor = input_tensor.unsqueeze(0)
51
-
52
- # Move the input and model to GPU if available
53
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
54
- input_tensor = input_tensor.to(device)
55
- model.to(device)
56
-
57
- # Get Model Predictions
58
- with torch.no_grad():
59
- outputs = model(input_tensor)
60
- probabilities = torch.softmax(outputs, dim=1)[0]
61
- del outputs
62
- confidences = {classes[i]: float(probabilities[i]) for i in range(1000)}
63
-
64
- # Select the top classes based on user input
65
- sorted_confidences = sorted(confidences.items(), key=lambda val: val[1], reverse=True)
66
- show_confidences = OrderedDict(sorted_confidences[:top_k])
67
-
68
- # Map layer numbers to meaningful parts of the ResNet architecture
69
- _layers = {
70
- 1: model.conv1, # Initial convolution layer
71
- 2: model.layer1[-1], # Last bottleneck of first residual block
72
- 3: model.layer2[-1], # Last bottleneck of second residual block
73
- 4: model.layer3[-1], # Last bottleneck of third residual block
74
- 5: model.layer4[-1], # Last bottleneck of fourth residual block
75
- 6: model.layer4[-1] # Changed from fc to last conv layer for better visualization
76
- }
77
-
78
- # Ensure valid layer selection
79
- target_layer = min(max(target_layer, 1), 6)
80
- target_layers = [_layers[target_layer]]
81
-
82
- # Get the class activations from the selected layer
83
- cam = GradCAM(model=model, target_layers=target_layers)
84
-
85
- # Get the most probable class index
86
- top_class = max(confidences.items(), key=lambda x: x[1])[0]
87
- class_idx = classes.index(top_class)
88
-
89
- # Generate GradCAM for the top predicted class
90
- grayscale_cam = cam(input_tensor=input_tensor,
91
- targets=[ClassifierOutputTarget(class_idx)],
92
- aug_smooth=True,
93
- eigen_smooth=True)
94
  model.eval()
95
- grayscale_cam = grayscale_cam[0, :]
96
-
97
- # Overlay input image with Class activations
98
- visualization = show_cam_on_image(org_img/255., grayscale_cam, use_rgb=True, image_weight=alpha)
99
- return show_confidences, visualization
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  @spaces.GPU
21
  def inference(image, alpha, top_k, target_layer, model=None, classes=None):
22
  """
23
+ Run inference with GradCAM visualization
 
 
 
 
 
 
24
  """
25
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
26
+
27
+ # Ensure model is on correct device and in eval mode
28
+ model = model.to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  model.eval()
30
+
31
+ # Convert input to tensor and move to GPU
32
+ if isinstance(image, np.ndarray):
33
+ image_tensor = torch.from_numpy(image).to(device)
34
+ if image_tensor.ndim == 3:
35
+ image_tensor = image_tensor.unsqueeze(0)
36
+ else:
37
+ image_tensor = image.to(device)
38
+
39
+ with torch.cuda.amp.autocast(): # Enable automatic mixed precision
40
+ with torch.no_grad():
41
+ # Save a copy of input img
42
+ org_img = image.copy()
43
+
44
+ # Calculate mean over each channel of input image
45
+ mean_r, mean_g, mean_b = np.mean(image[:, :, 0]/255.), np.mean(image[:, :, 1]/255.), np.mean(image[:, :, 2]/255.)
46
+
47
+ # Calculate Standard deviation over each channel
48
+ std_r, std_g, std_b = np.std(image[:, :, 0]/255.), np.std(image[:, :, 1]/255.), np.std(image[:, :, 2]/255.)
49
+
50
+ # Convert img to tensor and normalize it
51
+ _transform = transforms.Compose([
52
+ transforms.ToTensor(),
53
+ transforms.Normalize((mean_r, mean_g, mean_b), (std_r, std_g, std_b))
54
+ ])
55
+
56
+ # Preprocess the input image
57
+ input_tensor = _transform(image)
58
+
59
+ # Create a mini-batch as expected by the model
60
+ input_tensor = input_tensor.unsqueeze(0)
61
+
62
+ # Get Model Predictions
63
+ outputs = model(input_tensor)
64
+ probabilities = torch.softmax(outputs, dim=1)[0]
65
+ del outputs
66
+ confidences = {classes[i]: float(probabilities[i]) for i in range(1000)}
67
+
68
+ # Select the top classes based on user input
69
+ sorted_confidences = sorted(confidences.items(), key=lambda val: val[1], reverse=True)
70
+ show_confidences = OrderedDict(sorted_confidences[:top_k])
71
+
72
+ # Map layer numbers to meaningful parts of the ResNet architecture
73
+ _layers = {
74
+ 1: model.conv1, # Initial convolution layer
75
+ 2: model.layer1[-1], # Last bottleneck of first residual block
76
+ 3: model.layer2[-1], # Last bottleneck of second residual block
77
+ 4: model.layer3[-1], # Last bottleneck of third residual block
78
+ 5: model.layer4[-1], # Last bottleneck of fourth residual block
79
+ 6: model.layer4[-1] # Changed from fc to last conv layer for better visualization
80
+ }
81
+
82
+ # Ensure valid layer selection
83
+ target_layer = min(max(target_layer, 1), 6)
84
+ target_layers = [_layers[target_layer]]
85
+
86
+ # Get the class activations from the selected layer
87
+ cam = GradCAM(model=model, target_layers=target_layers)
88
+
89
+ # Get the most probable class index
90
+ top_class = max(confidences.items(), key=lambda x: x[1])[0]
91
+ class_idx = classes.index(top_class)
92
+
93
+ # Generate GradCAM for the top predicted class
94
+ grayscale_cam = cam(input_tensor=input_tensor,
95
+ targets=[ClassifierOutputTarget(class_idx)],
96
+ aug_smooth=True,
97
+ eigen_smooth=True)
98
+ grayscale_cam = grayscale_cam[0, :]
99
+
100
+ # Overlay input image with Class activations
101
+ visualization = show_cam_on_image(org_img/255., grayscale_cam, use_rgb=True, image_weight=alpha)
102
+ return show_confidences, visualization
requirements.txt CHANGED
@@ -3,3 +3,4 @@ grad-cam
3
  numpy<2.0.0
4
  torch==2.0.1
5
  torchvision==0.15.2
 
 
3
  numpy<2.0.0
4
  torch==2.0.1
5
  torchvision==0.15.2
6
+ Pillow>=9.0.0