{ "cells": [ { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "import os\n", "import argparse\n", "import sys\n", "import opts\n", "import matplotlib.pyplot as plt\n", "import matplotlib.patches as patches\n", "import textwrap\n", "\n", "from PIL import Image, ImageDraw\n", "import json\n", "import numpy as np\n", "from mbench.ytvos_ref import build as build_ytvos_ref" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "img_folder = 'data/ref-youtube-vos/train'\n", "text_colors = ['red', 'blue']" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "with open('mbench/result_revised50.json') as file:\n", " data = json.load(file)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "def bounding_box(img):\n", " rows = np.any(img, axis=1)\n", " cols = np.any(img, axis=0)\n", " rmin, rmax = np.where(rows)[0][[0, -1]]\n", " cmin, cmax = np.where(cols)[0][[0, -1]]\n", " return rmin, rmax, cmin, cmax # y1, y2, x1, x2 " ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [], "source": [ "def showImageRef(vid_id):\n", " vid_data = data[vid_id]\n", " cats = list(vid_data.keys())\n", "\n", " for cat in cats:\n", " cat_data = vid_data[cat]\n", " frames = list(cat_data.keys())\n", " \n", " for frame in frames:\n", " frame_data = cat_data[frame]\n", " \n", " img_path = os.path.join(img_folder, 'JPEGImages', vid_id, frame + '.jpg')\n", " mask_path = os.path.join(img_folder, 'Annotations', vid_id, frame + '.png')\n", " img = Image.open(img_path).convert('RGB')\n", " mask = Image.open(mask_path).convert('P')\n", " mask = np.array(mask)\n", " \n", " if frame_data:\n", " obj_ids = list(frame_data.keys())\n", " obj_nums = len(obj_ids)\n", "\n", " fig, axes = plt.subplots(1, obj_nums, figsize=(16, obj_nums))\n", "\n", " for i in range(len(obj_ids)):\n", " obj_id = obj_ids[i]\n", " obj_data = frame_data[obj_id]\n", " if obj_data:\n", " ref_exp = obj_data['ref_exp']\n", " isValid = obj_data['isValid']\n", "\n", " obj_mask = (mask == int(obj_id)).astype(np.float32)\n", " if (obj_mask > 0).any():\n", " y1, y2, x1, x2 = bounding_box(obj_mask)\n", " box = np.array([x1, y1, x2, y2])\n", " else:\n", " box = np.array([0, 0, 0, 0])\n", " \n", " if obj_nums == 1:\n", " ax = axes\n", " else:\n", " ax = axes[i]\n", " ax.imshow(img)\n", " width, height = box[2] - box[0], box[3] - box[1]\n", " rect = patches.Rectangle((x1, y1), width, height, linewidth=2, edgecolor='red', facecolor='none')\n", " ax.add_patch(rect)\n", "\n", " wrapped_text = \"\\n\".join(textwrap.wrap(ref_exp, width=30))\n", " ax.annotate(wrapped_text, xy=(0.5, -1.5), xycoords=\"axes fraction\", ha = \"center\", color=text_colors[isValid])\n", " \n", " plt.suptitle(f\"video: {vid_id} - cat: {cat} - frame: {frame}\")\n", " plt.show()" ] }, { "cell_type": "code", "execution_count": 142, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "04667fabaa\n" ] } ], "source": [ "vid_id = list(data.keys())[49]\n", "print(vid_id)\n", "showImageRef(vid_id)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "referformer", "language": "python", "name": "referformer" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.16" } }, "nbformat": 4, "nbformat_minor": 2 }