import os import time import copy import logging import numpy as np import torch import random import matplotlib.pyplot as plt from detectron2.config import configurable from detectron2.data import detection_utils as utils from detectron2.data import transforms as T from transformers import BertTokenizer from pycocotools import mask as coco_mask import albumentations as A # from albumentations.pytorch import ToTensorV2 from PIL import Image, ImageDraw, ImageFilter from detectron2.utils.visualizer import Visualizer def convert_coco_poly_to_mask(segmentations, height, width): masks = [] for polygons in segmentations: rles = coco_mask.frPyObjects(polygons, height, width) mask = coco_mask.decode(rles) if len(mask.shape) < 3: mask = mask[..., None] mask = torch.as_tensor(mask, dtype=torch.uint8) mask = mask.any(dim=2) masks.append(mask) if masks: masks = torch.stack(masks, dim=0) else: masks = torch.zeros((0, height, width), dtype=torch.uint8) return masks def build_transform_train(cfg): image_size = cfg.img_size # min_scale = cfg.INPUT.MIN_SCALE augmentation = [] augmentation.extend([ T.Resize((image_size, image_size)) ]) return augmentation def build_transform_test(cfg): image_size = cfg.img_size augmentation = [] augmentation.extend([ T.Resize((image_size, image_size)) ]) return augmentation def COCOVisualization(dataloader, dirname="coco-aug-data-vis"): mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) denorm = A.Normalize( mean=[-m / s for m, s in zip(mean, std)], std=[1.0 / s for s in std], max_pixel_value=1.0 ) tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") sent_idx = 0 os.makedirs(dirname, exist_ok=True) # dataloader = build_detection_train_loader(cfg, mapper=mapper) it = iter(dataloader) batch = next(it) n_sample = random.randint(4, len(batch)) for i in range(n_sample): batch = next(it) img, gt_mask, lang_tokens, lang_mask = batch img_np = np.transpose(img.cpu().numpy(), (1,2,0)) # img_denorm = denorm(image=img_np)['image'] # img_ndarray = (img_denorm*255).astype(np.uint8) seg_target = gt_mask[:,:].cpu().numpy() tokens = lang_tokens.reshape(-1).cpu().numpy() sentences = tokenizer.decode(tokens, skip_special_tokens=True) fpath = os.path.join(dirname, f'sample_{i+1}.jpg') fig = plt.figure(figsize=(10,6)) ax1 = fig.add_subplot(1,2,1) ax1.imshow(img_np.astype('uint8')) ax1.set_xlabel("Mosaic Image") ax2 = fig.add_subplot(1,2,2) ax2.imshow(seg_target) ax2.set_xlabel("Segmentation Map") plt.suptitle(sentences) plt.tight_layout() plt.savefig(fpath) # if 'gt_masks' in batch[0].keys(): # for i in range(n_sample): # data = batch[i] # img = data['image'].unsqueeze(0) # img_np = np.transpose(img[0].cpu().numpy(), (1,2,0)) # img_denorm = denorm(image=img_np)['image'] # img_ndarray = (img_denorm*255).astype(np.uint8) # seg_target = data['gt_masks'].squeeze(0) # tensor_embedding = data['lang_tokens'][:,:] # sentences = tokenizer.decode(tensor_embedding[0], skip_special_tokens=True) # # tokens = [ds.tokenizer.decode([w], skip_special_tokens=False) for w in tensor_embedding[0]] # # tokens = [x for x in tokens if x!='[PAD]'] # fpath = os.path.join(dirname, os.path.basename(data["file_name"])) # fig = plt.figure(figsize=(10,6)) # ax1 = fig.add_subplot(1,2,1) # ax1.imshow(img_ndarray) # ax1.set_xlabel("Mosaic Image") # ax2 = fig.add_subplot(1,2,2) # ax2.imshow(seg_target) # ax2.set_xlabel("Segmentation Map") # plt.suptitle(sentences) # plt.tight_layout() # plt.savefig(fpath) # else : # for i in range(n_sample): # d = batch[i] # img = np.array(Image.open(d["file_name"])) # visualizer = Visualizer(img, metadata={}) # vis = visualizer.draw_dataset_dict(d) # fpath = os.path.join(dirname, os.path.basename(d["file_name"])) # vis.save(fpath) def MosaicVisualization(dataloader, dirname="coco-aug-data-vis", n_sample=4): mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) denorm = A.Normalize( mean=[-m / s for m, s in zip(mean, std)], std=[1.0 / s for s in std], max_pixel_value=1.0 ) tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") os.makedirs(dirname, exist_ok=True) # dataloader = build_detection_train_loader(cfg, mapper=mapper) it = iter(dataloader) while(n_sample): try : data = next(it) # n_sample = random.randint(1, len(batch)) # if 'seg_target' in batch[0].keys(): # for i in range(n_sample): # data = batch[i] img = data['image'] img_np = np.transpose(img.cpu().numpy(), (1,2,0)) img_denorm = denorm(image=img_np)['image'] img_ndarray = (img_denorm*255).astype(np.uint8) seg_target = data['seg_target'] tensor_embedding = data['sentence'].reshape(-1).cpu().numpy() sentences = tokenizer.decode(tensor_embedding, skip_special_tokens=True) # tokens = [ds.tokenizer.decode([w], skip_special_tokens=False) for w in tensor_embedding[0]] # tokens = [x for x in tokens if x!='[PAD]'] fpath = os.path.join(dirname, f'sample_{n_sample}.jpg') fig = plt.figure(figsize=(10,6)) ax1 = fig.add_subplot(1,2,1) ax1.imshow(img_ndarray) ax1.set_xlabel("Mosaic Image") ax2 = fig.add_subplot(1,2,2) ax2.imshow(seg_target) ax2.set_xlabel("Segmentation Map") plt.suptitle(sentences) plt.tight_layout() plt.savefig(fpath) n_sample -= 1 except : break # else : # for i in range(n_sample): # d = batch[i] # img = np.array(Image.open(d["file_name"])) # visualizer = Visualizer(img, metadata={}) # vis = visualizer.draw_dataset_dict(d) # fpath = os.path.join(dirname, os.path.basename(d["file_name"])) # vis.save(fpath)