hqfang commited on
Commit
e58b48b
·
verified ·
1 Parent(s): 8242e7b

Update image_processing_molmoact.py

Browse files
Files changed (1) hide show
  1. image_processing_molmoact.py +4 -12
image_processing_molmoact.py CHANGED
@@ -160,12 +160,8 @@ def siglip_resize_and_pad(
160
  desired_output_size: Tuple[int, int],
161
  ) -> Tuple[np.ndarray, np.ndarray]:
162
  desired_output_size = _ensure_pyint_size2(desired_output_size)
163
- if len(image.shape) == 3:
164
- is_video = False
165
- image = torch.permute(torch.from_numpy(image), [2, 0, 1])
166
- else:
167
- is_video = True
168
- image = torch.permute(torch.from_numpy(image), [0, 3, 1, 2])
169
  dtype = image.dtype
170
  if torch.is_floating_point(image):
171
  in_min = 0.0
@@ -190,12 +186,8 @@ def siglip_resize_and_pad(
190
  resized = resized.to(torch.float32)
191
  resized = (resized - in_min) / (in_max - in_min)
192
 
193
- if is_video:
194
- resized = torch.permute(resized, [0, 2, 3, 1]).numpy()
195
- image_mask = None
196
- else:
197
- resized = torch.permute(resized, [1, 2, 0]).numpy()
198
- image_mask = np.ones_like(resized[:, :, 0], dtype=np.bool_)
199
 
200
  return resized, image_mask
201
 
 
160
  desired_output_size: Tuple[int, int],
161
  ) -> Tuple[np.ndarray, np.ndarray]:
162
  desired_output_size = _ensure_pyint_size2(desired_output_size)
163
+ # by default, image is a single image
164
+ image = torch.permute(torch.from_numpy(image), [2, 0, 1])
 
 
 
 
165
  dtype = image.dtype
166
  if torch.is_floating_point(image):
167
  in_min = 0.0
 
186
  resized = resized.to(torch.float32)
187
  resized = (resized - in_min) / (in_max - in_min)
188
 
189
+ resized = torch.permute(resized, [1, 2, 0]).numpy()
190
+ image_mask = np.ones_like(resized[:, :, 0], dtype=np.bool_)
 
 
 
 
191
 
192
  return resized, image_mask
193