Spaces:
Runtime error
Runtime error
import project_path | |
import os | |
import cv2 | |
import numpy as np | |
import json | |
from threading import Lock | |
from contextlib import contextmanager | |
import torch | |
from torch.utils.data import Dataset | |
from PIL import Image | |
from datetime import datetime | |
# assumes yolov5 on sys.path | |
from lib.yolov5.utils.general import xyxy2xywh | |
from lib.yolov5.utils.augmentations import letterbox | |
from lib.yolov5.utils.dataloaders import create_dataloader as create_yolo_dataloader | |
from backend.pyDIDSON import pyDIDSON | |
from backend.aris import BEAM_WIDTH_DIR | |
# use this flag to test the difference between direct ARIS dataloading and | |
# using the jpeg compressed version. very slow. not much difference observed. | |
TEST_JPG_COMPRESSION = False | |
# # # # # # | |
# Factory(ish) methods for DataLoader creation. Easy entry points to this module. | |
# # # # # # | |
def create_dataloader_aris(aris_filepath, beam_width_dir=BEAM_WIDTH_DIR, annotations_file=None, batch_size=32, stride=64, pad=0.5, img_size=896, rank=-1, world_size=1, workers=0, | |
disable_output=False, cache_bg_frames=False, num_frames_bg_subtract=1000): | |
""" | |
Get a PyTorch Dataset and DataLoader for ARIS files with (optional) associated fisheye-formatted labels. | |
""" | |
print('dataset', datetime.now()) | |
# Make sure only the first process in DDP process the dataset first, and the following others can use the cache | |
# this is a no-op for a single-gpu machine | |
with torch_distributed_zero_first(rank): | |
dataset = YOLOARISBatchedDataset(aris_filepath, beam_width_dir, annotations_file, stride, pad, img_size, | |
disable_output=disable_output, cache_bg_frames=cache_bg_frames, num_frames_bg_subtract=num_frames_bg_subtract) | |
batch_size = min(batch_size, len(dataset)) | |
nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers]) # number of workers | |
print('dataloader', datetime.now()) | |
if not disable_output: | |
print("dataset size", len(dataset)) | |
print("dataset shape", dataset.shape) | |
print("Num workers", nw) | |
# sampler = torch.utils.data.distributed.DistributedSampler(dataset) if rank != -1 else None # if extending to multi-GPU inference, will need this | |
dataloader = torch.utils.data.dataloader.DataLoader(dataset, | |
batch_size=None, | |
sampler=OnePerBatchSampler(data_source=dataset, batch_size=batch_size), | |
num_workers=nw, | |
pin_memory=True, | |
collate_fn=collate_fn) | |
print('done', datetime.now()) | |
return dataloader, dataset | |
def create_dataloader_frames(frames_path, batch_size=32, model_stride_max=32, | |
pad=0.5, img_size=896, rank=-1, world_size=1, workers=0, disable_output=False): | |
""" | |
Create a DataLoader for a directory of frames without labels. | |
Args: | |
model_stride_max: use model.stride.max() | |
""" | |
gs = max(int(model_stride_max), 32) # grid size (max stride) | |
return create_yolo_dataloader(frames_path, img_size, batch_size, gs, single_cls=False, augment=False, | |
hyp=None, cache=None, rect=True, rank=rank, | |
workers=workers, pad=pad)[0] | |
def create_dataloader_frames_only(frames_path, batch_size=32, img_size=896, workers=0): | |
""" | |
Create a DataLoader for a directory of frames without labels. | |
Args: | |
model_stride_max: use model.stride.max() | |
""" | |
return YOLOFrameDataset(frames_path, img_size=img_size, batch_size=batch_size) | |
# # # # # # | |
# End factory(ish) methods | |
# # # # # # | |
import os | |
import pandas as pd | |
from torchvision.io import read_image | |
import re | |
class YOLOFrameDataset(Dataset): | |
def __init__(self, img_dir, img_size=896, batch_size=32, stride=64, pad=0.5): | |
self.img_dir = img_dir | |
self.img_size = img_size | |
self.files = os.listdir(img_dir) | |
self.files = list(filter(lambda f: f[-4:] == ".jpg", self.files)) | |
self.files.sort(key=lambda f: int(re.sub('\D', '', f))) | |
temp_img = read_image(os.path.join(self.img_dir, self.files[0])) | |
size = temp_img.shape | |
self.ydim = size[1] | |
self.xdim = size[2] | |
n = len(self.files) | |
aspect_ratio = self.ydim / self.xdim | |
if aspect_ratio < 1: | |
shape = [aspect_ratio, 1] | |
elif aspect_ratio > 1: | |
shape = [1, 1 / aspect_ratio] | |
self.original_shape = (self.ydim, self.xdim) | |
self.shape = np.ceil(np.array(shape) * img_size / stride + pad).astype(int) * stride | |
self.batch_indices = [] | |
for i in range(0, n, batch_size): | |
self.batch_indices.append((i, min(n, i+batch_size))) | |
def load_image(cls, img, img_size=896): | |
"""Loads and resizes 1 image from dataset, returns img, original hw, resized hw. | |
Modified from ScaledYOLOv4.datasets.load_image() | |
""" | |
h0, w0 = img.shape[:2] | |
h1, w1 = h0, w0 | |
r = img_size / max(h0, w0) | |
if r != 1: # always resize down, only resize up if training with augmentation | |
interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR | |
img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp) | |
h1, w1 = img.shape[:2] | |
return img, (h0, w0), (h1, w1) # img, hw_original, hw_resized | |
def __len__(self): | |
return len(self.batch_indices) | |
def __iter__(self): | |
for batch_idx in self.batch_indices: | |
batch = [] | |
labels = None | |
shapes = [] | |
for i in range(batch_idx[0], batch_idx[1]): | |
img_name = self.files[i] | |
img_path = os.path.join(self.img_dir, img_name) | |
image = Image.open(img_path) | |
image = np.asarray(image) | |
img, (h0, w0), (h, w) = self.load_image(image, img_size=self.img_size) | |
# Letterbox | |
img, ratio, pad = letterbox(img, self.shape, auto=False, scaleup=False) | |
shape = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling | |
img = img.transpose(2, 0, 1) # to -> C x H x W | |
img = np.ascontiguousarray(img) | |
img = torch.from_numpy(img) | |
shapes.append(shape) | |
batch.append(img) | |
image = torch.stack(batch) | |
yield (image, labels, shapes) | |
class ARISBatchedDataset(Dataset): | |
def __init__(self, aris_filepath, beam_width_dir, annotations_file, batch_size, num_frames_bg_subtract=1000, disable_output=False, | |
cache_bg_frames=False): | |
""" | |
A PyTorch Dataset class for loading an ARIS file and (optional) associated fisheye-format labels. | |
This class handles the ARIS frame extraction and 3-channel representation generation. | |
It is called a "BatchedDataset" because it loads contiguous frames in self.batch_size chunks. | |
** The PyTorch sampler must be aware of this!! ** Use the OnePerBatchSampler in this module when using this Dataset. | |
Args: | |
cache_bg_frames: keep the frames used for bg subtraction stored in memory. careful of memory issues. only recommended | |
for small values of num_frames_bg_subtract | |
""" | |
# open ARIS data stream - TODO: make sure this is one per worker | |
self.data = open(aris_filepath, 'rb') | |
self.data_lock = Lock() | |
self.beam_width_dir = beam_width_dir | |
self.disable_output = disable_output | |
self.aris_filepath = aris_filepath | |
self.cache_bg_frames = cache_bg_frames | |
# get header info | |
self.didson = pyDIDSON(self.aris_filepath, beam_width_dir=beam_width_dir) | |
self.xdim = self.didson.info['xdim'] | |
self.ydim = self.didson.info['ydim'] | |
# disable automatic batching - do it ourselves, reading batch_size frames from | |
# the ARIS file at a time | |
self.batch_size = batch_size | |
# load fisheye annotations | |
if annotations_file is None: | |
if not self.disable_output: | |
print("Loading file with no labels.") | |
self.start_frame = self.didson.info['startframe'] | |
self.end_frame = self.didson.info['endframe'] or self.didson.info['numframes'] | |
self.labels = None | |
else: | |
self._load_labels(annotations_file) | |
# intiialize the background subtraction | |
self.num_frames_bg_subtract = num_frames_bg_subtract | |
self._init_bg_frame() | |
def _init_bg_frame(self): | |
""" | |
Intialize bg frame for bg subtraction. | |
Uses min(self.num_frames_bg_subtract, total_frames) frames to do mean subtraction. | |
Caches these frames in self.extracted_frames for reuse. | |
""" | |
# ensure the number of frames used is a multiple of self.batch_size so we can cache them and retrieve full batches | |
# add 1 extra frame to be used for optical flow calculation | |
num_frames_bg = min(self.end_frame - self.start_frame, self.num_frames_bg_subtract // self.batch_size * self.batch_size + 1) | |
if not self.disable_output: | |
print("Initializing mean frame for background subtraction using", num_frames_bg, "frames...") | |
frames_for_bg_subtract = self.didson.load_frames(start_frame=self.start_frame, end_frame=self.start_frame + num_frames_bg) | |
### NEW WAY ### | |
# save memory (and time?) by computing these in a streaming fashion vs. in a big batch | |
self.mean_blurred_frame = np.zeros([self.ydim, self.xdim], dtype=np.float32) | |
max_blurred_frame = np.zeros([self.ydim, self.xdim], dtype=np.float32) | |
for i in range(frames_for_bg_subtract.shape[0]): | |
blurred = cv2.GaussianBlur( | |
frames_for_bg_subtract[i], | |
(5,5), | |
0) | |
self.mean_blurred_frame += blurred | |
max_blurred_frame = np.maximum(max_blurred_frame, np.abs(blurred)) | |
self.mean_blurred_frame /= frames_for_bg_subtract.shape[0] | |
max_blurred_frame -= self.mean_blurred_frame | |
self.mean_normalization_value = np.max(max_blurred_frame) | |
# cache these for later | |
self.extracted_frames = [] | |
# Because of the optical flow computation, we only go to end_frame - 1 | |
next_blur = None | |
for i in range(len(frames_for_bg_subtract) - 1): | |
if next_blur is None: | |
this_blur = ((cv2.GaussianBlur(frames_for_bg_subtract[i], (5,5), 0) - self.mean_blurred_frame) / self.mean_normalization_value + 1) / 2 | |
else: | |
this_blur = next_blur | |
next_blur = ((cv2.GaussianBlur(frames_for_bg_subtract[i+1], (5,5), 0) - self.mean_blurred_frame) / self.mean_normalization_value + 1) / 2 | |
frame_image = np.dstack([frames_for_bg_subtract[i], | |
this_blur * 255, | |
np.abs(next_blur - this_blur) * 255]).astype(np.uint8, copy=False) | |
if TEST_JPG_COMPRESSION: | |
from PIL import Image | |
import os | |
Image.fromarray(frame_image).save(f"tmp{i}.jpg", quality=95) | |
frame_image = cv2.imread(f"tmp{i}.jpg")[:, :, ::-1] # BGR to RGB | |
os.remove(f"tmp{i}.jpg") | |
if self.cache_bg_frames: | |
self.extracted_frames.append(frame_image) | |
if not self.disable_output: | |
print("Done initializing background frame.") | |
def _load_labels(self, fisheye_json): | |
"""Load labels from a fisheye-formatted json file into self.labels in normalized | |
xywh format. | |
""" | |
js = json.load(open(fisheye_json, 'r')) | |
labels = [] | |
for frame in js['frames']: | |
l = [] | |
for fish in frame['fish']: | |
x, y, w, h = xyxy2xywh(fish['bbox']) | |
cx = x + w/2.0 | |
cy = y + h/2.0 | |
# Each row is `class x_center y_center width height` format. (Normalized) | |
l.append([0, cx, cy, w, h]) | |
l = np.array(l, dtype=np.float32) | |
if len(l) == 0: | |
l = np.zeros((0, 5), dtype=np.float32) | |
labels.append(l) | |
self.labels = labels | |
self.start_frame = js['start_frame'] | |
self.end_frame = js['end_frame'] | |
def __len__(self): | |
# account for optical flow - we can't do the last frame | |
return self.end_frame - self.start_frame - 1 | |
def _postprocess(self, frame_images, frame_labels): | |
raise NotImplementedError | |
def __getitem__(self, idx): | |
""" | |
Return a numpy array representing this batch of frames and labels according to pyARIS frame extraction logic. | |
This class returns a full batch rather than just 1 example, assuming a OnePerBatchSampler is used. | |
""" | |
final_idx = min(idx+self.batch_size, len(self)) | |
frame_labels = self.labels[idx:final_idx] if self.labels else None | |
# see if we have already cached this from bg subtraction | |
# assumes len(self.extracted_frames) is a multiple of self.batch_size | |
if idx+1 < len(self.extracted_frames): | |
return self._postprocess(self.extracted_frames[idx:final_idx], frame_labels) | |
else: | |
frames = self.didson.load_frames(start_frame=self.start_frame+idx, end_frame=self.start_frame + final_idx + 1) | |
blurred_frames = frames.astype(np.float32) | |
for i in range(frames.shape[0]): | |
blurred_frames[i] = cv2.GaussianBlur( | |
blurred_frames[i], | |
(5,5), | |
0 | |
) | |
blurred_frames -= self.mean_blurred_frame | |
blurred_frames /= self.mean_normalization_value | |
blurred_frames += 1 | |
blurred_frames /= 2 | |
frame_images = np.stack([ frames[:-1], blurred_frames[:-1] * 255, np.abs(blurred_frames[1:] - blurred_frames[:-1]) * 255 ], axis=-1).astype(np.uint8, copy=False) | |
if TEST_JPG_COMPRESSION: | |
from PIL import Image | |
import os | |
new_frame_images = [] | |
for image in frame_images: | |
Image.fromarray(image).save(f"tmp{idx}.jpg", quality=95) | |
image = cv2.imread(f"tmp{idx}.jpg")[:, :, ::-1] # BGR to RGB | |
os.remove(f"tmp{idx}.jpg") | |
new_frame_images.append(image) | |
frame_images = new_frame_images | |
return self._postprocess(frame_images, frame_labels) | |
class YOLOARISBatchedDataset(ARISBatchedDataset): | |
"""An ARIS Dataset that works with YOLOv5 inference.""" | |
def __init__(self, aris_filepath, beam_width_dir, annotations_file, stride=64, pad=0.5, img_size=896, batch_size=32, | |
disable_output=False, cache_bg_frames=False, num_frames_bg_subtract=1000): | |
super().__init__(aris_filepath, beam_width_dir, annotations_file, batch_size, disable_output=disable_output, cache_bg_frames=cache_bg_frames, num_frames_bg_subtract=num_frames_bg_subtract) | |
# compute shapes for letterbox | |
aspect_ratio = self.ydim / self.xdim | |
if aspect_ratio < 1: | |
shape = [aspect_ratio, 1] | |
elif aspect_ratio > 1: | |
shape = [1, 1 / aspect_ratio] | |
self.original_shape = (self.ydim, self.xdim) | |
self.shape = np.ceil(np.array(shape) * img_size / stride + pad).astype(int) * stride | |
def load_image(cls, img, img_size=896): | |
"""Loads and resizes 1 image from dataset, returns img, original hw, resized hw. | |
Modified from ScaledYOLOv4.datasets.load_image() | |
""" | |
h0, w0 = img.shape[:2] # orig hw | |
r = img_size / max(h0, w0) # resize image to img_size | |
if r != 1: # always resize down, only resize up if training with augmentation | |
interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR | |
img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp) | |
return img, (h0, w0), img.shape[:2] # img, hw_original, hw_resized | |
def _postprocess(self, frame_images, frame_labels): | |
""" | |
Return a batch of data in the format used by ScaledYOLOv4. | |
That is, a list of tuples, on tuple per image in the batch: | |
[ | |
(img ->torch.Tensor, | |
labels ->torch.Tensor, | |
shapes ->tuple describing image original dimensions and scaled/padded dimensions | |
), | |
... | |
] | |
""" | |
outputs = [] | |
frame_labels = frame_labels or [ None for _ in frame_images ] | |
for image, x in zip(frame_images, frame_labels): | |
img, (h0, w0), (h, w) = self.load_image(image) | |
# Letterbox | |
img, ratio, pad = letterbox(img, self.shape, auto=False, scaleup=False) | |
shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling | |
img = img.transpose(2, 0, 1) # to -> C x H x W | |
img = np.ascontiguousarray(img) | |
# Load labels | |
# Convert from normalized xywh to pixel xyxy format in order to add padding from letterbox | |
labels = [] | |
if x is not None and x.size > 0: | |
labels = x.copy() | |
labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width | |
labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height | |
labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0] | |
labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1] | |
# convert back to normalized xywh with padding | |
nL = len(labels) # number of labels | |
labels_out = torch.zeros((nL, 6)) | |
if nL: | |
labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # convert xyxy to xywh | |
labels[:, [2, 4]] /= img.shape[1] # normalized height 0-1 | |
labels[:, [1, 3]] /= img.shape[2] # normalized width 0-1 | |
labels_out[:, 1:] = torch.from_numpy(labels) | |
outputs.append( (torch.from_numpy(img), labels_out, shapes) ) | |
return outputs | |
def torch_distributed_zero_first(local_rank: int): | |
""" | |
Decorator to make all processes in distributed training wait for each local_master to do something. | |
""" | |
if local_rank not in [-1, 0]: | |
torch.distributed.barrier() | |
yield | |
if local_rank == 0: | |
torch.distributed.barrier() | |
class OnePerBatchSampler(torch.utils.data.Sampler): | |
"""Yields the first index of each batch, given a batch size. | |
In other words, returns multiples of self.batch_size up to the size of the Dataset. | |
This is a workaround for Pytorch's standard batch creation that allows us to manually | |
select contiguous segments of an ARIS clip for each batch. | |
""" | |
def __init__(self, data_source, batch_size): | |
self.data_source = data_source | |
self.batch_size = batch_size | |
def __iter__(self): | |
idxs = [i*self.batch_size for i in range(len(self))] | |
return iter(idxs) | |
def __len__(self): | |
return len(self.data_source) // self.batch_size | |
def collate_fn(batch): | |
"""See YOLOv5.utils.datasets.collate_fn""" | |
if not len(batch): | |
print("help!") | |
print(batch) | |
img, label, shapes = zip(*batch) # transposed | |
for i, l in enumerate(label): | |
l[:, 0] = i # add target image index for build_targets() | |
return torch.stack(img, 0), torch.cat(label, 0), shapes |