{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/data/projects/yejin/VerbCentric_RIS/ReferFormer\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/yejin/.conda/envs/VerbCentric_RIS/lib/python3.9/site-packages/IPython/core/magics/osm.py:417: UserWarning: using dhist requires you to install the `pickleshare` library.\n", " self.shell.db['dhist'] = compress_dhist(dhist)[-100:]\n" ] } ], "source": [ "%cd /home/yejin/data/projects/yejin/VerbCentric_RIS/ReferFormer" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. manual 필터링 반영" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import re\n", "import json" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "selected_frames_df = pd.read_json(\"/home/yejin/data/dataset/VRIS/mbench/ytvos/selected_instances.jsonl\", lines = True)\n", "manual_selected = pd.read_json(\"manual_selected_frames.jsonl\", lines = True)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
segmentationbboxareafile_nameheightwidthlabelcategory_namesentences
0[[1081.0, 719.5, 1051.0, 719.5, 1050.5, 716.0,...[708.5, 156.5, 420.0, 563.0]131357.2500917dcfc4_00000.png720128064zebra{'tokens': ['a', 'zebra', 'on', 'the', 'right'...
\n", "
" ], "text/plain": [ " segmentation \\\n", "0 [[1081.0, 719.5, 1051.0, 719.5, 1050.5, 716.0,... \n", "\n", " bbox area file_name height \\\n", "0 [708.5, 156.5, 420.0, 563.0] 131357.25 00917dcfc4_00000.png 720 \n", "\n", " width label category_name \\\n", "0 1280 64 zebra \n", "\n", " sentences \n", "0 {'tokens': ['a', 'zebra', 'on', 'the', 'right'... " ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "selected_frames_df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for i in range(len(manual_selected)):\n", " idx = manual_selected.loc[i, \"index\"]\n", " new_sent = manual_selected.loc[i, 'new_sent']\n", "\n", " if new_sent != \"\":\n", " new_sent_dict = {\n", " \"tokens\" : new_sent.split(' '),\n", " \"raw\" : new_sent,\n", " \"sent\" : re.sub('[^A-Za-z0-9\\s]+', '', new_sent.lower())\n", " }\n", " selected_frames_df.at[idx, 'sentences'] = new_sent_dict" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "manual_selected_frames = selected_frames_df.loc[manual_selected['index'].values]" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "manual_selected_frames.to_json(\"revised_frames.jsonl\", orient='records', lines=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. lmdb로 변환하기 위해 마스크 저장하기" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import argparse\n", "import os\n", "import os.path as osp\n", "import lmdb\n", "import pyarrow as pa\n", "import json\n", "from tqdm import tqdm\n", "import matplotlib.pyplot as plt\n", "from skimage import io\n", "import numpy as np\n", "from shapely.geometry import Polygon, MultiPolygon\n", "from matplotlib.collections import PatchCollection\n", "from pycocotools import mask\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "#jsonl 파일을 {index: json_obj, ... }형식으로\n", "\n", "json_data = []\n", "\n", "with open('revised_frames.jsonl', 'rb') as f:\n", " for line in f:\n", " json_data.append(json.loads(line)) " ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "def getMask(ann):\n", " # return mask, area and mask-center\n", " if type(ann['segmentation'][0]) == list: # polygon\n", " rle = mask.frPyObjects(ann['segmentation'], ann['height'],\n", " ann['width'])\n", " else:\n", " rle = ann['segmentation']\n", " # for i in range(len(rle['counts'])):\n", " # print(rle)\n", " m = mask.decode(rle)\n", " m = np.sum(\n", " m, axis=2\n", " ) # sometimes there are multiple binary map (corresponding to multiple segs)\n", " m = m.astype(np.uint8) # convert to np.uint8\n", " # compute area\n", " area = sum(mask.area(rle)) # should be close to ann['area']\n", " return {'mask': m, 'area': area}\n", " # # position\n", " # position_x = np.mean(np.where(m==1)[1]) # [1] means columns (matlab style) -> x (c style)\n", " # position_y = np.mean(np.where(m==1)[0]) # [0] means rows (matlab style) -> y (c style)\n", " # # mass position (if there were multiple regions, we use the largest one.)\n", " # label_m = label(m, connectivity=m.ndim)\n", " # regions = regionprops(label_m)\n", " # if len(regions) > 0:\n", " # \tlargest_id = np.argmax(np.array([props.filled_area for props in regions]))\n", " # \tlargest_props = regions[largest_id]\n", " # \tmass_y, mass_x = largest_props.centroid\n", " # else:\n", " # \tmass_x, mass_y = position_x, position_y\n", " # # if centroid is not in mask, we find the closest point to it from mask\n", " # if m[mass_y, mass_x] != 1:\n", " # \tprint 'Finding closes mask point ...'\n", " # \tkernel = np.ones((10, 10),np.uint8)\n", " # \tme = cv2.erode(m, kernel, iterations = 1)\n", " # \tpoints = zip(np.where(me == 1)[0].tolist(), np.where(me == 1)[1].tolist()) # row, col style\n", " # \tpoints = np.array(points)\n", " # \tdist = np.sum((points - (mass_y, mass_x))**2, axis=1)\n", " # \tid = np.argsort(dist)[0]\n", " # \tmass_y, mass_x = points[id]\n", " # \t# return\n", " # return {'mask': m, 'area': area, 'position_x': position_x, 'position_y': position_y, 'mass_x': mass_x, 'mass_y': mass_y}\n", " # # show image and mask\n", " # I = io.imread(osp.join(self.IMAGE_DIR, image['file_name']))\n", " # plt.figure()\n", " # plt.imshow(I)\n", " # ax = plt.gca()\n", " # img = np.ones( (m.shape[0], m.shape[1], 3) )\n", " # color_mask = np.array([2.0,166.0,101.0])/255\n", " # for i in range(3):\n", " # img[:,:,i] = color_mask[i]\n", " # ax.imshow(np.dstack( (img, m*0.5) ))\n", " # plt.show()\n", "\n", "def showMask(ann, image_dir, mask_dir):\n", " \n", " fig, ax = plt.subplots()\n", " I = io.imread(osp.join(image_dir, ann['file_name']))\n", " ax.imshow(I)\n", "\n", " M = getMask(ann)\n", " msk = M['mask']\n", " #msk = io.imread(osp.join(mask_dir, ann['file_name']))\n", " \n", " ax.imshow(msk, alpha = 0.5)\n", " ax.set_title(ann['sentences']['sent'])\n", " plt.show()\n", "\n", "\n", "\n", "def saveMask(ann, mask_dir, seg_id):\n", " M = getMask(ann)\n", " msk = M['mask']\n", " height, width = msk.shape\n", " \n", " fig, ax = plt.subplots(figsize=(width / 100, height / 100), dpi=100)\n", " ax.imshow(msk, cmap='gray', vmin=0, vmax=1)\n", "\n", " save_path = f'{mask_dir}/{seg_id}'\n", " plt.axis('off')\n", " plt.subplots_adjust(left=0, right=1, top=1, bottom=0) # Remove padding\n", "\n", " fig.savefig(save_path, dpi=100, bbox_inches='tight', pad_inches=0)\n", " \n", " plt.close(fig)" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "for i in range(len(json_data)):\n", " #showMask(json_data[i], image_dir = '/home/yejin/data/dataset/VRIS/mbench/ytvos/selected_frames', mask_dir = '/home/yejin/data/dataset/VRIS/mbench/ytvos/filtered_masks')\n", " saveMask(json_data[i], '/home/yejin/data/dataset/VRIS/mbench/ytvos/filtered_masks_segid', i)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "##############안 쓰는 함수!###################\n", "# 마스크 저장\n", "# annotation dictionary as input\n", "def saveMask(annotation, mask_dir, seg_box='seg'):\n", " image_width = annotation['width']\n", " image_height = annotation['height']\n", "\n", " fig, ax = plt.subplots(figsize=(image_width / 100, image_height / 100), facecolor='black') # figsize 단위는 인치, DPI 고려\n", " ax.set_facecolor('black')\n", " \n", " \n", " if seg_box == 'seg':\n", " polygons = []\n", " color = (1, 1, 1)\n", " \n", " if type(annotation['segmentation'][0]) == list:\n", " # polygon used for refcoco*\n", " for seg in annotation['segmentation']:\n", " poly = np.array(seg).reshape((int(len(seg) / 2), 2))\n", " polygons.append(Polygon(poly))\n", "\n", " p = PatchCollection(polygons,\n", " facecolors=(1, 1, 1),\n", " linewidths=0)\n", " ax.add_collection(p)\n", "\n", " # 축 범위를 이미지 크기에 맞게 설정\n", " ax.set_xlim(0, image_width)\n", " ax.set_ylim(0, image_height)\n", " \n", " # y축 방향 뒤집기 (이미지 좌표계와 일치)\n", " ax.invert_yaxis()\n", " \n", " # 플롯 표시\n", " #plt.axis('equal') # 축 비율을 동일하게 설정\n", " #plt.show()\n", "\n", " #플롯 저장\n", " plt.axis('off') # 축 숨김 (선택 사항)\n", " save_path = f'{mask_dir}/{annotation[\"file_name\"]}'\n", " plt.savefig(save_path, bbox_inches='tight', pad_inches=0, facecolor='black')\n", "\n", "for annotation in json_data:\n", " saveMask(annotation, mask_dir='/home/yejin/data/dataset/VRIS/mbench/ytvos/filtered_masks')\n", " " ] } ], "metadata": { "kernelspec": { "display_name": "VerbCentric_RIS", "language": "python", "name": "verbcentric_ris" } }, "nbformat": 4, "nbformat_minor": 2 }