from configs import CFG import os import requests import zipfile from pycocotools.coco import COCO import torch import cv2 import albumentations as A import soundfile as sf # Load Coco dataset def download_dataset(data_dir="../datasets"): # Create caption and image directories annotations_dir = os.path.join(data_dir, "annotations") images_dir = os.path.join(data_dir, "train2014") # Download annotations (captions) zip_file = os.path.join(annotations_dir, "annotations.zip") url = "http://images.cocodataset.org/annotations/annotations_trainval2014.zip" response = requests.get(url, stream=True) # write chunk in zip file with open(zip_file, "wb") as f: # 8192 = 8KB chunks (block or piece of data) for chunk in response.iter_content(chunk_size=8192): f.write(chunk) # unzip file with zipfile.ZipFile(zip_file, "r") as zip_ref: zip_ref.extractall(data_dir) # Extract all contents to the specified directory os.remove(zip_file) # Download train images zip_file = os.path.join(images_dir, "train2014.zip") url = "http://images.cocodataset.org/zips/train2014.zip" response = requests.get(url, stream=True) # write chunk in zip file with open(zip_file, "wb") as f: for chunk in response.iter_content(chunk_size=8192): f.write(chunk) # unzip file with zipfile.ZipFile(zip_file, "r") as zip_ref: zip_ref.extractall(data_dir) # Extract all contents to the specified directory os.remove(zip_file) # Download val images images_dir = os.path.join(data_dir, "val2014") zip_file = os.path.join(images_dir, "val2014.zip") url = "http://images.cocodataset.org/zips/val2014.zip" response = requests.get(url, stream=True) # write chunk in zip file with open(zip_file, "wb") as f: for chunk in response.iter_content(chunk_size=8192): f.write(chunk) # unzip file with zipfile.ZipFile(zip_file, "r") as zip_ref: zip_ref.extractall(data_dir) # Extract all contents to the specified directory os.remove(zip_file) def make_pairs(annotation_json_files, image_dir, max_captions=3): images = os.listdir(annotation_json_files) image_caption = [(os.path.join(annotation_json_files, image), "an image") for image in images] return image_caption