Spaces:

Hasanmog
/

Peft-GroundingDINO

Runtime error

App Files Files Community

Hasanmog commited on Jun 15, 2024

Commit

624960b

1 Parent(s): 76047a1

update

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
.ipynb_checkpoints/app-checkpoint.py +10 -12
.ipynb_checkpoints/requirements-checkpoint.txt +17 -0
.ipynb_checkpoints/test-checkpoint.ipynb +113 -0
LICENSE +201 -0
README.md +0 -13
app.py +8 -8
datasets/.ipynb_checkpoints/__init__-checkpoint.py +0 -23
datasets/.ipynb_checkpoints/coco-checkpoint.py +0 -649
datasets/.ipynb_checkpoints/dataset-checkpoint.py +0 -44
datasets/.ipynb_checkpoints/odvg-checkpoint.py +0 -258
datasets/.ipynb_checkpoints/transforms-checkpoint.py +0 -285
datasets/__init__.py +0 -23
datasets/__pycache__/__init__.cpython-310.pyc +0 -0
datasets/__pycache__/coco.cpython-310.pyc +0 -0
datasets/__pycache__/coco_eval.cpython-310.pyc +0 -0
datasets/__pycache__/cocogrounding_eval.cpython-310.pyc +0 -0
datasets/__pycache__/data_util.cpython-310.pyc +0 -0
datasets/__pycache__/odvg.cpython-310.pyc +0 -0
datasets/__pycache__/panoptic_eval.cpython-310.pyc +0 -0
datasets/__pycache__/random_crop.cpython-310.pyc +0 -0
datasets/__pycache__/sltransform.cpython-310.pyc +0 -0
datasets/__pycache__/transforms.cpython-310.pyc +0 -0
datasets/coco.py +0 -649
datasets/coco_eval.py +0 -266
datasets/coco_panoptic.py +0 -99
datasets/data_util.py +0 -170
datasets/dataset.py +0 -44
datasets/odvg.py +0 -258
datasets/panoptic_eval.py +0 -44
datasets/random_crop.py +0 -135
datasets/sltransform.py +0 -247
environment.yaml +248 -0
groundingdino.egg-info/PKG-INFO +213 -0
groundingdino.egg-info/SOURCES.txt +46 -0
groundingdino.egg-info/dependency_links.txt +1 -0
groundingdino.egg-info/requires.txt +10 -0
groundingdino.egg-info/top_level.txt +1 -0
groundingdino/.ipynb_checkpoints/__init__-checkpoint.py +0 -0
groundingdino/.ipynb_checkpoints/version-checkpoint.py +1 -0
groundingdino/__init__.py +0 -0
groundingdino/__pycache__/__init__.cpython-310.pyc +0 -0
groundingdino/config/.ipynb_checkpoints/GroundingDINO_SwinB_cfg-checkpoint.py +43 -0
groundingdino/config/GroundingDINO_SwinB_cfg.py +43 -0
groundingdino/config/GroundingDINO_SwinT_OGC.py +43 -0
groundingdino/config/__init__.py +0 -0
groundingdino/datasets/__init__.py +0 -0
groundingdino/datasets/__pycache__/__init__.cpython-310.pyc +0 -0
groundingdino/datasets/__pycache__/transforms.cpython-310.pyc +0 -0
{datasets → groundingdino/datasets}/cocogrounding_eval.py +1 -3

.gitattributes CHANGED Viewed

@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 util/ filter=lfs diff=lfs merge=lfs -text
 models/GroundingDINO/ops/build/temp.linux-x86_64-3.10/home/jamada/jupyterlab/projects/gdino-peft/vlm4eo/Open-GroundingDino/models/GroundingDINO/ops/src/vision.o filter=lfs diff=lfs merge=lfs -text

 *tfevents* filter=lfs diff=lfs merge=lfs -text
 util/ filter=lfs diff=lfs merge=lfs -text
 models/GroundingDINO/ops/build/temp.linux-x86_64-3.10/home/jamada/jupyterlab/projects/gdino-peft/vlm4eo/Open-GroundingDino/models/GroundingDINO/ops/src/vision.o filter=lfs diff=lfs merge=lfs -text
+groundingdino/_C.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text

.ipynb_checkpoints/app-checkpoint.py CHANGED Viewed

@@ -3,7 +3,6 @@ from functools import partial
 import cv2
 import requests
 import os
-import sys
 from io import BytesIO
 from PIL import Image
 import numpy as np
@@ -17,28 +16,27 @@ import torch
 # prepare the environment
 os.system("python setup.py build develop --user")
 os.system("pip install packaging==21.3")
-os.system("pip install gradio==3.50.2")
 warnings.filterwarnings("ignore")
 import gradio as gr
-parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
-sys.path.append(parent_dir)
-from models import build_model
-from util.slconfig import SLConfig
-from util.utils import clean_state_dict
-from util.inference import annotate, load_image, predict
-import datasets.transforms as T
 from huggingface_hub import hf_hub_download
 # Use this command for evaluate the Grounding DINO model
-config_file = "cfg_odvg.py"
-ckpt_repo_id = "Hasanmog/Peft-GroundingDINO"
-ckpt_filenmae = "Best.pth"
 def load_model_hf(model_config_path, repo_id, filename, device='cpu'):

 import cv2
 import requests
 import os
 from io import BytesIO
 from PIL import Image
 import numpy as np
 # prepare the environment
 os.system("python setup.py build develop --user")
 os.system("pip install packaging==21.3")
+os.system("pip install gradio")
 warnings.filterwarnings("ignore")
 import gradio as gr
+from groundingdino.models import build_model
+from groundingdino.util.slconfig import SLConfig
+from groundingdino.util.utils import clean_state_dict
+from groundingdino.util.inference import annotate, load_image, predict
+import groundingdino.datasets.transforms as T
 from huggingface_hub import hf_hub_download
 # Use this command for evaluate the Grounding DINO model
+config_file = "groundingdino/config/GroundingDINO_SwinT_OGC.py"
+ckpt_repo_id = "ShilongLiu/GroundingDINO"
+ckpt_filenmae = "groundingdino_swint_ogc.pth"
 def load_model_hf(model_config_path, repo_id, filename, device='cpu'):

.ipynb_checkpoints/requirements-checkpoint.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+cython
+submitit
+scipy
+termcolor
+addict
+yapf==0.40.1
+timm
+torch
+torchvision
+transformers
+numpy
+opencv-python
+supervision==0.6.0
+pycocotools
+pyyaml>3.10
+colorlog
+loralib

.ipynb_checkpoints/test-checkpoint.ipynb ADDED Viewed

	@@ -0,0 +1,113 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "final text_encoder_type: bert-base-uncased\n"
+     ]
+    },
+    {
+     "data": {
+      "application/json": {
+       "ascii": false,
+       "bar_format": null,
+       "colour": null,
+       "elapsed": 0.014210224151611328,
+       "initial": 0,
+       "n": 0,
+       "ncols": null,
+       "nrows": null,
+       "postfix": null,
+       "prefix": "Downloading model.safetensors",
+       "rate": null,
+       "total": 440449768,
+       "unit": "B",
+       "unit_divisor": 1000,
+       "unit_scale": true
+      },
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "5922f34578364d36afa13de9f01254bd",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/root/miniconda3/lib/python3.8/site-packages/transformers/modeling_utils.py:881: FutureWarning: The `device` argument is deprecated and will be removed in v5 of Transformers.\n",
+      "  warnings.warn(\n",
+      "/root/miniconda3/lib/python3.8/site-packages/torch/utils/checkpoint.py:31: UserWarning: None of the inputs have requires_grad=True. Gradients will be None\n",
+      "  warnings.warn(\"None of the inputs have requires_grad=True. Gradients will be None\")\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from groundingdino.util.inference import load_model, load_image, predict, annotate\n",
+    "import cv2\n",
+    "\n",
+    "model = load_model(\"groundingdino/config/GroundingDINO_SwinT_OGC.py\", \"../04-06-segment-anything/weights/groundingdino_swint_ogc.pth\")\n",
+    "IMAGE_PATH = \".asset/cat_dog.jpeg\"\n",
+    "TEXT_PROMPT = \"chair . person . dog .\"\n",
+    "BOX_TRESHOLD = 0.35\n",
+    "TEXT_TRESHOLD = 0.25\n",
+    "\n",
+    "image_source, image = load_image(IMAGE_PATH)\n",
+    "\n",
+    "boxes, logits, phrases = predict(\n",
+    "    model=model,\n",
+    "    image=image,\n",
+    "    caption=TEXT_PROMPT,\n",
+    "    box_threshold=BOX_TRESHOLD,\n",
+    "    text_threshold=TEXT_TRESHOLD\n",
+    ")\n",
+    "\n",
+    "annotated_frame = annotate(image_source=image_source, boxes=boxes, logits=logits, phrases=phrases)\n",
+    "cv2.imwrite(\"annotated_image.jpg\", annotated_frame)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright 2023 - present, IDEA Research.
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README.md DELETED Viewed

@@ -1,13 +0,0 @@
----
-title: Peft GroundingDINO
-emoji: 🐢
-colorFrom: indigo
-colorTo: purple
-sdk: gradio
-sdk_version: 4.36.1
-app_file: app.py
-pinned: false
-license: mit
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ from functools import partial
 import cv2
 import requests
 import os
-import sys
 from io import BytesIO
 from PIL import Image
 import numpy as np
@@ -17,24 +16,25 @@ import torch
 # prepare the environment
 os.system("python setup.py build develop --user")
 os.system("pip install packaging==21.3")
-os.system("pip install gradio==3.50.2")
 warnings.filterwarnings("ignore")
 import gradio as gr
-from models import build_model
-from util.slconfig import SLConfig
-from util.utils import clean_state_dict
-from util.inference import annotate, load_image, predict
-import datasets.transforms as T
 from huggingface_hub import hf_hub_download
 # Use this command for evaluate the Grounding DINO model
-config_file = "cfg_odvg.py"
 ckpt_repo_id = "Hasanmog/Peft-GroundingDINO"
 ckpt_filenmae = "Best.pth"

 import cv2
 import requests
 import os
 from io import BytesIO
 from PIL import Image
 import numpy as np
 # prepare the environment
 os.system("python setup.py build develop --user")
 os.system("pip install packaging==21.3")
+os.system("pip install gradio")
 warnings.filterwarnings("ignore")
 import gradio as gr
+from groundingdino.models import build_model
+from groundingdino.util.slconfig import SLConfig
+from groundingdino.util.utils import clean_state_dict
+from groundingdino.util.inference import annotate, load_image, predict
+import groundingdino.datasets.transforms as T
 from huggingface_hub import hf_hub_download
 # Use this command for evaluate the Grounding DINO model
+config_file = "groundingdino/config/GroundingDINO_SwinB_OGC.py"
 ckpt_repo_id = "Hasanmog/Peft-GroundingDINO"
 ckpt_filenmae = "Best.pth"

datasets/.ipynb_checkpoints/__init__-checkpoint.py DELETED Viewed

@@ -1,23 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import torch.utils.data
-import torchvision
-from .coco import build as build_coco
-def get_coco_api_from_dataset(dataset):
-    for _ in range(10):
-        # if isinstance(dataset, torchvision.datasets.CocoDetection):
-        #     break
-        if isinstance(dataset, torch.utils.data.Subset):
-            dataset = dataset.dataset
-    if isinstance(dataset, torchvision.datasets.CocoDetection):
-        return dataset.coco
-def build_dataset(image_set, args, datasetinfo):
-    if datasetinfo["dataset_mode"] == 'coco':
-        return build_coco(image_set, args, datasetinfo)
-    if datasetinfo["dataset_mode"] == 'odvg':
-        from .odvg import build_odvg
-        return build_odvg(image_set, args, datasetinfo)
-    raise ValueError(f'dataset {args.dataset_file} not supported')

datasets/.ipynb_checkpoints/coco-checkpoint.py DELETED Viewed

@@ -1,649 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-"""
-COCO dataset which returns image_id for evaluation.
-Mostly copy-paste from https://github.com/pytorch/vision/blob/13b35ff/references/detection/coco_utils.py
-"""
-if __name__=="__main__":
-    # for debug only
-    import os, sys
-    sys.path.append(os.path.dirname(sys.path[0]))
-from torchvision.datasets.vision import VisionDataset
-import json
-from pathlib import Path
-import random
-import os
-from typing import Any, Callable, List, Optional, Tuple
-from PIL import Image
-import torch
-import torch.utils.data
-import torchvision
-from pycocotools import mask as coco_mask
-from datasets.data_util import preparing_dataset
-import datasets.transforms as T
-from util.box_ops import box_cxcywh_to_xyxy, box_iou
-__all__ = ['build']
-class label2compat():
-    def __init__(self) -> None:
-        self.category_map_str = {"1": 1, "2": 2, "3": 3, "4": 4, "5": 5, "6": 6, "7": 7, "8": 8, "9": 9, "10": 10, "11": 11, "13": 12, "14": 13, "15": 14, "16": 15, "17": 16, "18": 17, "19": 18, "20": 19, "21": 20, "22": 21, "23": 22, "24": 23, "25": 24, "27": 25, "28": 26, "31": 27, "32": 28, "33": 29, "34": 30, "35": 31, "36": 32, "37": 33, "38": 34, "39": 35, "40": 36, "41": 37, "42": 38, "43": 39, "44": 40, "46": 41, "47": 42, "48": 43, "49": 44, "50": 45, "51": 46, "52": 47, "53": 48, "54": 49, "55": 50, "56": 51, "57": 52, "58": 53, "59": 54, "60": 55, "61": 56, "62": 57, "63": 58, "64": 59, "65": 60, "67": 61, "70": 62, "72": 63, "73": 64, "74": 65, "75": 66, "76": 67, "77": 68, "78": 69, "79": 70, "80": 71, "81": 72, "82": 73, "84": 74, "85": 75, "86": 76, "87": 77, "88": 78, "89": 79, "90": 80}
-        self.category_map = {int(k):v for k,v in self.category_map_str.items()}
-    def __call__(self, target, img=None):
-        labels = target['labels']
-        res = torch.zeros(labels.shape, dtype=labels.dtype)
-        for idx, item in enumerate(labels):
-            res[idx] = self.category_map[item.item()] - 1
-        target['label_compat'] = res
-        if img is not None:
-            return target, img
-        else:
-            return target
-class label_compat2onehot():
-    def __init__(self, num_class=80, num_output_objs=1):
-        self.num_class = num_class
-        self.num_output_objs = num_output_objs
-        if num_output_objs != 1:
-            raise DeprecationWarning("num_output_objs!=1, which is only used for comparison")
-    def __call__(self, target, img=None):
-        labels = target['label_compat']
-        place_dict = {k:0 for k in range(self.num_class)}
-        if self.num_output_objs == 1:
-            res = torch.zeros(self.num_class)
-            for i in labels:
-                itm = i.item()
-                res[itm] = 1.0
-        else:
-            # compat with baseline
-            res = torch.zeros(self.num_class, self.num_output_objs)
-            for i in labels:
-                itm = i.item()
-                res[itm][place_dict[itm]] = 1.0
-                place_dict[itm] += 1
-        target['label_compat_onehot'] = res
-        if img is not None:
-            return target, img
-        else:
-            return target
-class box_label_catter():
-    def __init__(self):
-        pass
-    def __call__(self, target, img=None):
-        labels = target['label_compat']
-        boxes = target['boxes']
-        box_label = torch.cat((boxes, labels.unsqueeze(-1)), 1)
-        target['box_label'] = box_label
-        if img is not None:
-            return target, img
-        else:
-            return target
-class RandomSelectBoxlabels():
-    def __init__(self, num_classes, leave_one_out=False, blank_prob=0.8,
-                    prob_first_item = 0.0,
-                    prob_random_item = 0.0,
-                    prob_last_item = 0.8,
-                    prob_stop_sign = 0.2
-                ) -> None:
-        self.num_classes = num_classes
-        self.leave_one_out = leave_one_out
-        self.blank_prob = blank_prob
-        self.set_state(prob_first_item, prob_random_item, prob_last_item, prob_stop_sign)
-    def get_state(self):
-        return [self.prob_first_item, self.prob_random_item, self.prob_last_item, self.prob_stop_sign]
-    def set_state(self, prob_first_item, prob_random_item, prob_last_item, prob_stop_sign):
-        sum_prob = prob_first_item + prob_random_item + prob_last_item + prob_stop_sign
-        assert sum_prob - 1 < 1e-6, \
-            f"Sum up all prob = {sum_prob}. prob_first_item:{prob_first_item}" \
-            + f"prob_random_item:{prob_random_item}, prob_last_item:{prob_last_item}" \
-            + f"prob_stop_sign:{prob_stop_sign}"
-        self.prob_first_item = prob_first_item
-        self.prob_random_item = prob_random_item
-        self.prob_last_item = prob_last_item
-        self.prob_stop_sign = prob_stop_sign
-    def sample_for_pred_first_item(self, box_label: torch.FloatTensor):
-        box_label_known = torch.Tensor(0,5)
-        box_label_unknown = box_label
-        return box_label_known, box_label_unknown
-    def sample_for_pred_random_item(self, box_label: torch.FloatTensor):
-        n_select = int(random.random() * box_label.shape[0])
-        box_label = box_label[torch.randperm(box_label.shape[0])]
-        box_label_known = box_label[:n_select]
-        box_label_unknown = box_label[n_select:]
-        return box_label_known, box_label_unknown
-    def sample_for_pred_last_item(self, box_label: torch.FloatTensor):
-        box_label_perm = box_label[torch.randperm(box_label.shape[0])]
-        known_label_list = []
-        box_label_known = []
-        box_label_unknown = []
-        for item in box_label_perm:
-            label_i = item[4].item()
-            if label_i in known_label_list:
-                box_label_known.append(item)
-            else:
-                # first item
-                box_label_unknown.append(item)
-                known_label_list.append(label_i)
-        box_label_known = torch.stack(box_label_known) if len(box_label_known) > 0 else torch.Tensor(0,5)
-        box_label_unknown = torch.stack(box_label_unknown) if len(box_label_unknown) > 0 else torch.Tensor(0,5)
-        return box_label_known, box_label_unknown
-    def sample_for_pred_stop_sign(self, box_label: torch.FloatTensor):
-        box_label_unknown = torch.Tensor(0,5)
-        box_label_known = box_label
-        return box_label_known, box_label_unknown
-    def __call__(self, target, img=None):
-        box_label = target['box_label'] # K, 5
-        dice_number = random.random()
-        if dice_number < self.prob_first_item:
-            box_label_known, box_label_unknown = self.sample_for_pred_first_item(box_label)
-        elif dice_number < self.prob_first_item + self.prob_random_item:
-            box_label_known, box_label_unknown = self.sample_for_pred_random_item(box_label)
-        elif dice_number < self.prob_first_item + self.prob_random_item + self.prob_last_item:
-            box_label_known, box_label_unknown = self.sample_for_pred_last_item(box_label)
-        else:
-            box_label_known, box_label_unknown = self.sample_for_pred_stop_sign(box_label)
-        target['label_onehot_known'] = label2onehot(box_label_known[:,-1], self.num_classes)
-        target['label_onehot_unknown'] = label2onehot(box_label_unknown[:, -1], self.num_classes)
-        target['box_label_known'] = box_label_known
-        target['box_label_unknown'] = box_label_unknown
-        return target, img
-class RandomDrop():
-    def __init__(self, p=0.2) -> None:
-        self.p = p
-    def __call__(self, target, img=None):
-        known_box = target['box_label_known']
-        num_known_box = known_box.size(0)
-        idxs = torch.rand(num_known_box)
-        # indices = torch.randperm(num_known_box)[:int((1-self).p*num_known_box + 0.5 + random.random())]
-        target['box_label_known'] = known_box[idxs > self.p]
-        return target, img
-class BboxPertuber():
-    def __init__(self, max_ratio = 0.02, generate_samples = 1000) -> None:
-        self.max_ratio = max_ratio
-        self.generate_samples = generate_samples
-        self.samples = self.generate_pertube_samples()
-        self.idx = 0
-    def generate_pertube_samples(self):
-        import torch
-        samples = (torch.rand(self.generate_samples, 5) - 0.5) * 2 * self.max_ratio
-        return samples
-    def __call__(self, target, img):
-        known_box = target['box_label_known'] # Tensor(K,5), K known bbox
-        K = known_box.shape[0]
-        known_box_pertube = torch.zeros(K, 6) # 4:bbox, 1:prob, 1:label
-        if K == 0:
-            pass
-        else:
-            if self.idx + K > self.generate_samples:
-                self.idx = 0
-            delta = self.samples[self.idx: self.idx + K, :]
-            known_box_pertube[:, :4] = known_box[:, :4] + delta[:, :4]
-            iou = (torch.diag(box_iou(box_cxcywh_to_xyxy(known_box[:, :4]), box_cxcywh_to_xyxy(known_box_pertube[:, :4]))[0])) * (1 + delta[:, -1])
-            known_box_pertube[:, 4].copy_(iou)
-            known_box_pertube[:, -1].copy_(known_box[:, -1])
-        target['box_label_known_pertube'] = known_box_pertube
-        return target, img
-class RandomCutout():
-    def __init__(self, factor=0.5) -> None:
-        self.factor = factor
-    def __call__(self, target, img=None):
-        unknown_box = target['box_label_unknown']           # Ku, 5
-        known_box = target['box_label_known_pertube']       # Kk, 6
-        Ku = unknown_box.size(0)
-        known_box_add = torch.zeros(Ku, 6) # Ku, 6
-        known_box_add[:, :5] = unknown_box
-        known_box_add[:, 5].uniform_(0.5, 1)
-        known_box_add[:, :2] += known_box_add[:, 2:4] * (torch.rand(Ku, 2) - 0.5) / 2
-        known_box_add[:, 2:4] /= 2
-        target['box_label_known_pertube'] = torch.cat((known_box, known_box_add))
-        return target, img
-class RandomSelectBoxes():
-    def __init__(self, num_class=80) -> None:
-        Warning("This is such a slow function and will be deprecated soon!!!")
-        self.num_class = num_class
-    def __call__(self, target, img=None):
-        boxes = target['boxes']
-        labels = target['label_compat']
-        # transform to list of tensors
-        boxs_list = [[] for i in range(self.num_class)]
-        for idx, item in enumerate(boxes):
-            label = labels[idx].item()
-            boxs_list[label].append(item)
-        boxs_list_tensor = [torch.stack(i) if len(i) > 0 else torch.Tensor(0,4) for i in boxs_list]
-        # random selection
-        box_known = []
-        box_unknown = []
-        for idx, item in enumerate(boxs_list_tensor):
-            ncnt = item.shape[0]
-            nselect = int(random.random() * ncnt) # close in both sides, much faster than random.randint
-            item = item[torch.randperm(ncnt)]
-            # random.shuffle(item)
-            box_known.append(item[:nselect])
-            box_unknown.append(item[nselect:])
-        # box_known_tensor = [torch.stack(i) if len(i) > 0 else torch.Tensor(0,4) for i in box_known]
-        # box_unknown_tensor = [torch.stack(i) if len(i) > 0 else torch.Tensor(0,4) for i in box_unknown]
-        # print('box_unknown_tensor:', box_unknown_tensor)
-        target['known_box'] = box_known
-        target['unknown_box'] = box_unknown
-        return target, img
-def label2onehot(label, num_classes):
-    """
-    label: Tensor(K)
-    """
-    res = torch.zeros(num_classes)
-    for i in label:
-        itm = int(i.item())
-        res[itm] = 1.0
-    return res
-class MaskCrop():
-    def __init__(self) -> None:
-        pass
-    def __call__(self, target, img):
-        known_box = target['known_box']
-        h,w = img.shape[1:] # h,w
-        # imgsize = target['orig_size'] # h,w
-        scale = torch.Tensor([w, h, w, h])
-        # _cnt = 0
-        for boxes in known_box:
-            if boxes.shape[0] == 0:
-                continue
-            box_xyxy = box_cxcywh_to_xyxy(boxes) * scale
-            for box in box_xyxy:
-                x1, y1, x2, y2 = [int(i) for i in box.tolist()]
-                img[:, y1:y2, x1:x2] = 0
-                # _cnt += 1
-        # print("_cnt:", _cnt)
-        return target, img
-dataset_hook_register = {
-    'label2compat': label2compat,
-    'label_compat2onehot': label_compat2onehot,
-    'box_label_catter': box_label_catter,
-    'RandomSelectBoxlabels': RandomSelectBoxlabels,
-    'RandomSelectBoxes': RandomSelectBoxes,
-    'MaskCrop': MaskCrop,
-    'BboxPertuber': BboxPertuber,
-}
-class CocoDetection(torchvision.datasets.CocoDetection):
-    def __init__(self, img_folder, ann_file, transforms, return_masks, aux_target_hacks=None):
-        super(CocoDetection, self).__init__(img_folder, ann_file)
-        self._transforms = transforms
-        self.prepare = ConvertCocoPolysToMask(return_masks)
-        self.aux_target_hacks = aux_target_hacks
-    def change_hack_attr(self, hackclassname, attrkv_dict):
-        target_class = dataset_hook_register[hackclassname]
-        for item in self.aux_target_hacks:
-            if isinstance(item, target_class):
-                for k,v in attrkv_dict.items():
-                    setattr(item, k, v)
-    def get_hack(self, hackclassname):
-        target_class = dataset_hook_register[hackclassname]
-        for item in self.aux_target_hacks:
-            if isinstance(item, target_class):
-                return item
-    def _load_image(self, id: int) -> Image.Image:
-        path = self.coco.loadImgs(id)[0]["file_name"]
-        abs_path = os.path.join(self.root, path)
-        return Image.open(abs_path).convert("RGB")
-    def __getitem__(self, idx):
-        """
-        Output:
-            - target: dict of multiple items
-                - boxes: Tensor[num_box, 4]. \
-                    Init type: x0,y0,x1,y1. unnormalized data.
-                    Final type: cx,cy,w,h. normalized data.
-        """
-        try:
-            img, target = super(CocoDetection, self).__getitem__(idx)
-        except:
-            print("Error idx: {}".format(idx))
-            idx += 1
-            img, target = super(CocoDetection, self).__getitem__(idx)
-        image_id = self.ids[idx]
-        target = {'image_id': image_id, 'annotations': target}
-        img, target = self.prepare(img, target)
-        if self._transforms is not None:
-            img, target = self._transforms(img, target)
-        # convert to needed format
-        if self.aux_target_hacks is not None:
-            for hack_runner in self.aux_target_hacks:
-                target, img = hack_runner(target, img=img)
-        return img, target
-def convert_coco_poly_to_mask(segmentations, height, width):
-    masks = []
-    for polygons in segmentations:
-        rles = coco_mask.frPyObjects(polygons, height, width)
-        mask = coco_mask.decode(rles)
-        if len(mask.shape) < 3:
-            mask = mask[..., None]
-        mask = torch.as_tensor(mask, dtype=torch.uint8)
-        mask = mask.any(dim=2)
-        masks.append(mask)
-    if masks:
-        masks = torch.stack(masks, dim=0)
-    else:
-        masks = torch.zeros((0, height, width), dtype=torch.uint8)
-    return masks
-class ConvertCocoPolysToMask(object):
-    def __init__(self, return_masks=False):
-        self.return_masks = return_masks
-    def __call__(self, image, target):
-        w, h = image.size
-        image_id = target["image_id"]
-        image_id = torch.tensor([image_id])
-        anno = target["annotations"]
-        anno = [obj for obj in anno if 'iscrowd' not in obj or obj['iscrowd'] == 0]
-        boxes = [obj["bbox"] for obj in anno]
-        # guard against no boxes via resizing
-        boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
-        boxes[:, 2:] += boxes[:, :2]
-        boxes[:, 0::2].clamp_(min=0, max=w)
-        boxes[:, 1::2].clamp_(min=0, max=h)
-        classes = [obj["category_id"] for obj in anno]
-        classes = torch.tensor(classes, dtype=torch.int64)
-        if self.return_masks:
-            segmentations = [obj["segmentation"] for obj in anno]
-            masks = convert_coco_poly_to_mask(segmentations, h, w)
-        keypoints = None
-        if anno and "keypoints" in anno[0]:
-            keypoints = [obj["keypoints"] for obj in anno]
-            keypoints = torch.as_tensor(keypoints, dtype=torch.float32)
-            num_keypoints = keypoints.shape[0]
-            if num_keypoints:
-                keypoints = keypoints.view(num_keypoints, -1, 3)
-        keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
-        boxes = boxes[keep]
-        classes = classes[keep]
-        if self.return_masks:
-            masks = masks[keep]
-        if keypoints is not None:
-            keypoints = keypoints[keep]
-        target = {}
-        target["boxes"] = boxes
-        target["labels"] = classes
-        if self.return_masks:
-            target["masks"] = masks
-        target["image_id"] = image_id
-        if keypoints is not None:
-            target["keypoints"] = keypoints
-        # for conversion to coco api
-        area = torch.tensor([obj["area"] for obj in anno])
-        iscrowd = torch.tensor([obj["iscrowd"] if "iscrowd" in obj else 0 for obj in anno])
-        target["area"] = area[keep]
-        target["iscrowd"] = iscrowd[keep]
-        target["orig_size"] = torch.as_tensor([int(h), int(w)])
-        target["size"] = torch.as_tensor([int(h), int(w)])
-        return image, target
-def make_coco_transforms(image_set, fix_size=False, strong_aug=False, args=None):
-    normalize = T.Compose([
-        T.ToTensor(),
-        T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
-    ])
-    # config the params for data aug
-    scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800]
-    max_size = 1333
-    scales2_resize = [400, 500, 600]
-    scales2_crop = [384, 600]
-    # update args from config files
-    scales = getattr(args, 'data_aug_scales', scales)
-    max_size = getattr(args, 'data_aug_max_size', max_size)
-    scales2_resize = getattr(args, 'data_aug_scales2_resize', scales2_resize)
-    scales2_crop = getattr(args, 'data_aug_scales2_crop', scales2_crop)
-    # resize them
-    data_aug_scale_overlap = getattr(args, 'data_aug_scale_overlap', None)
-    if data_aug_scale_overlap is not None and data_aug_scale_overlap > 0:
-        data_aug_scale_overlap = float(data_aug_scale_overlap)
-        scales = [int(i*data_aug_scale_overlap) for i in scales]
-        max_size = int(max_size*data_aug_scale_overlap)
-        scales2_resize = [int(i*data_aug_scale_overlap) for i in scales2_resize]
-        scales2_crop = [int(i*data_aug_scale_overlap) for i in scales2_crop]
-    datadict_for_print = {
-        'scales': scales,
-        'max_size': max_size,
-        'scales2_resize': scales2_resize,
-        'scales2_crop': scales2_crop
-    }
-    # print("data_aug_params:", json.dumps(datadict_for_print, indent=2))
-    if image_set == 'train':
-        if fix_size:
-            return T.Compose([
-                T.RandomHorizontalFlip(),
-                T.RandomResize([(max_size, max(scales))]),
-                # T.RandomResize([(512, 512)]),
-                normalize,
-            ])
-        if strong_aug:
-            import datasets.sltransform as SLT
-            return T.Compose([
-                T.RandomHorizontalFlip(),
-                T.RandomSelect(
-                    T.RandomResize(scales, max_size=max_size),
-                    T.Compose([
-                        T.RandomResize(scales2_resize),
-                        T.RandomSizeCrop(*scales2_crop),
-                        T.RandomResize(scales, max_size=max_size),
-                    ])
-                ),
-                SLT.RandomSelectMulti([
-                    SLT.RandomCrop(),
-                    SLT.LightingNoise(),
-                    SLT.AdjustBrightness(2),
-                    SLT.AdjustContrast(2),
-                ]),
-                normalize,
-            ])
-        return T.Compose([
-            T.RandomHorizontalFlip(),
-            T.RandomSelect(
-                T.RandomResize(scales, max_size=max_size),
-                T.Compose([
-                    T.RandomResize(scales2_resize),
-                    T.RandomSizeCrop(*scales2_crop),
-                    T.RandomResize(scales, max_size=max_size),
-                ])
-            ),
-            normalize,
-        ])
-    if image_set in ['val', 'eval_debug', 'train_reg', 'test']:
-        if os.environ.get("GFLOPS_DEBUG_SHILONG", False) == 'INFO':
-            print("Under debug mode for flops calculation only!!!!!!!!!!!!!!!!")
-            return T.Compose([
-                T.ResizeDebug((1280, 800)),
-                normalize,
-            ])
-        return T.Compose([
-            T.RandomResize([max(scales)], max_size=max_size),
-            normalize,
-        ])
-    raise ValueError(f'unknown {image_set}')
-def get_aux_target_hacks_list(image_set, args):
-    if args.modelname in ['q2bs_mask', 'q2bs']:
-        aux_target_hacks_list = [
-            label2compat(),
-            label_compat2onehot(),
-            RandomSelectBoxes(num_class=args.num_classes)
-        ]
-        if args.masked_data and image_set == 'train':
-            # aux_target_hacks_list.append()
-            aux_target_hacks_list.append(MaskCrop())
-    elif args.modelname in ['q2bm_v2', 'q2bs_ce', 'q2op', 'q2ofocal', 'q2opclip', 'q2ocqonly']:
-        aux_target_hacks_list = [
-            label2compat(),
-            label_compat2onehot(),
-            box_label_catter(),
-            RandomSelectBoxlabels(num_classes=args.num_classes,
-                                    prob_first_item=args.prob_first_item,
-                                    prob_random_item=args.prob_random_item,
-                                    prob_last_item=args.prob_last_item,
-                                    prob_stop_sign=args.prob_stop_sign,
-                                    ),
-            BboxPertuber(max_ratio=0.02, generate_samples=1000),
-        ]
-    elif args.modelname in ['q2omask', 'q2osa']:
-        if args.coco_aug:
-            aux_target_hacks_list = [
-                label2compat(),
-                label_compat2onehot(),
-                box_label_catter(),
-                RandomSelectBoxlabels(num_classes=args.num_classes,
-                                        prob_first_item=args.prob_first_item,
-                                        prob_random_item=args.prob_random_item,
-                                        prob_last_item=args.prob_last_item,
-                                        prob_stop_sign=args.prob_stop_sign,
-                                        ),
-                RandomDrop(p=0.2),
-                BboxPertuber(max_ratio=0.02, generate_samples=1000),
-                RandomCutout(factor=0.5)
-            ]
-        else:
-            aux_target_hacks_list = [
-                label2compat(),
-                label_compat2onehot(),
-                box_label_catter(),
-                RandomSelectBoxlabels(num_classes=args.num_classes,
-                                        prob_first_item=args.prob_first_item,
-                                        prob_random_item=args.prob_random_item,
-                                        prob_last_item=args.prob_last_item,
-                                        prob_stop_sign=args.prob_stop_sign,
-                                        ),
-                BboxPertuber(max_ratio=0.02, generate_samples=1000),
-            ]
-    else:
-        aux_target_hacks_list = None
-    return aux_target_hacks_list
-def build(image_set, args, datasetinfo):
-    img_folder = datasetinfo["root"]
-    ann_file = datasetinfo["anno"]
-    # copy to local path
-    if os.environ.get('DATA_COPY_SHILONG') == 'INFO':
-        preparing_dataset(dict(img_folder=img_folder, ann_file=ann_file), image_set, args)
-    try:
-        strong_aug = args.strong_aug
-    except:
-        strong_aug = False
-    print(img_folder, ann_file)
-    dataset = CocoDetection(img_folder, ann_file,
-            transforms=make_coco_transforms(image_set, fix_size=args.fix_size, strong_aug=strong_aug, args=args),
-            return_masks=args.masks,
-            aux_target_hacks=None,
-        )
-    return dataset
-if __name__ == "__main__":
-    # Objects365 Val example
-    dataset_o365 = CocoDetection(
-            '/path/Objects365/train/',
-            "/path/Objects365/slannos/anno_preprocess_train_v2.json",
-            transforms=None,
-            return_masks=False,
-        )
-    print('len(dataset_o365):', len(dataset_o365))

datasets/.ipynb_checkpoints/dataset-checkpoint.py DELETED Viewed

@@ -1,44 +0,0 @@
-from __future__ import print_function
-import torch
-import torchvision.datasets as datasets
-from torch.utils.data import Dataset
-from PIL import Image
-from .tsv_io import TSVFile
-import numpy as np
-import base64
-import io
-class TSVDataset(Dataset):
-    """ TSV dataset for ImageNet 1K training
-    """
-    def __init__(self, tsv_file, transform=None, target_transform=None):
-        self.tsv = TSVFile(tsv_file)
-        self.transform = transform
-        self.target_transform = target_transform
-    def __getitem__(self, index):
-        """
-        Args:
-            index (int): Index
-        Returns:
-            tuple: (image, target) where target is class_index of the target class.
-        """
-        row = self.tsv.seek(index)
-        image_data = base64.b64decode(row[-1])
-        image = Image.open(io.BytesIO(image_data))
-        image = image.convert('RGB')
-        target = int(row[1])
-        if self.transform is not None:
-            img = self.transform(image)
-        else:
-            img = image
-        if self.target_transform is not None:
-            target = self.target_transform(target)
-        return img, target
-    def __len__(self):
-        return self.tsv.num_rows()

datasets/.ipynb_checkpoints/odvg-checkpoint.py DELETED Viewed

@@ -1,258 +0,0 @@
-from torchvision.datasets.vision import VisionDataset
-import os.path
-from typing import Callable, Optional
-import json
-from PIL import Image
-import torch
-import random
-import os, sys
-sys.path.append(os.path.dirname(sys.path[0]))
-import datasets.transforms as T
-class ODVGDataset(VisionDataset):
-    """
-    Args:
-        root (string): Root directory where images are downloaded to.
-        anno (string): Path to json annotation file.
-        label_map_anno (string):  Path to json label mapping file. Only for Object Detection
-        transform (callable, optional): A function/transform that  takes in an PIL image
-            and returns a transformed version. E.g, ``transforms.PILToTensor``
-        target_transform (callable, optional): A function/transform that takes in the
-            target and transforms it.
-        transforms (callable, optional): A function/transform that takes input sample and its target as entry
-            and returns a transformed version.
-    """
-    def __init__(
-        self,
-        root: str,
-        anno: str,
-        label_map_anno: str = None,
-        max_labels: int = 80,
-        transform: Optional[Callable] = None,
-        target_transform: Optional[Callable] = None,
-        transforms: Optional[Callable] = None,
-    ) -> None:
-        super().__init__(root, transforms, transform, target_transform)
-        self.root = root
-        self.dataset_mode = "OD" if label_map_anno else "VG"
-        self.max_labels = max_labels
-        if self.dataset_mode == "OD":
-            self.load_label_map(label_map_anno)
-        self._load_metas(anno)
-        self.get_dataset_info()
-    def load_label_map(self, label_map_anno):
-        with open(label_map_anno, 'r') as file:
-            self.label_map = json.load(file)
-        self.label_index = set(self.label_map.keys())
-    def _load_metas(self, anno):
-      with open(anno, 'r') as f:
-          self.metas = json.load(f)
-    def get_dataset_info(self):
-        print(f"  == total images: {len(self)}")
-        if self.dataset_mode == "OD":
-            print(f"  == total labels: {len(self.label_map)}")
-    def __getitem__(self, index: int):
-        meta = self.metas[index]
-        rel_path = meta["filename"]
-        abs_path = os.path.join(self.root, rel_path)
-        if not os.path.exists(abs_path):
-            raise FileNotFoundError(f"{abs_path} not found.")
-        image = Image.open(abs_path).convert('RGB')
-        w, h = image.size
-        if self.dataset_mode == "OD":
-            anno = meta["detection"]
-            instances = [obj for obj in anno["instances"]]
-            boxes = [obj["bbox"] for obj in instances]
-            # generate vg_labels
-            # pos bbox labels
-            ori_classes = [str(obj["label"]) for obj in instances]
-            pos_labels = set(ori_classes)
-            # neg bbox labels
-            neg_labels = self.label_index.difference(pos_labels)
-            vg_labels = list(pos_labels)
-            num_to_add = min(len(neg_labels), self.max_labels-len(pos_labels))
-            if num_to_add > 0:
-                vg_labels.extend(random.sample(neg_labels, num_to_add))
-            # shuffle
-            for i in range(len(vg_labels)-1, 0, -1):
-                j = random.randint(0, i)
-                vg_labels[i], vg_labels[j] = vg_labels[j], vg_labels[i]
-            caption_list = [self.label_map[lb] for lb in vg_labels]
-            caption_dict = {item:index for index, item in enumerate(caption_list)}
-            caption = ' . '.join(caption_list) + ' .'
-            classes = [caption_dict[self.label_map[str(obj["label"])]] for obj in instances]
-            boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
-            classes = torch.tensor(classes, dtype=torch.int64)
-        elif self.dataset_mode == "VG":
-            anno = meta["Grounding"]
-            instances = [obj for obj in anno["regions"]]
-            boxes = [obj["bbox"] for obj in instances]
-            caption_list = [obj["phrase"] for obj in instances]
-            c = list(zip(boxes, caption_list))
-            random.shuffle(c)
-            boxes[:], caption_list[:] = zip(*c)
-            uni_caption_list  = list(set(caption_list))
-            label_map = {}
-            for idx in range(len(uni_caption_list)):
-                label_map[uni_caption_list[idx]] = idx
-            classes = [label_map[cap] for cap in caption_list]
-            caption = ' . '.join(uni_caption_list) + ' .'
-            boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
-            classes = torch.tensor(classes, dtype=torch.int64)
-            caption_list = uni_caption_list
-            # print("caption_list" , caption_list)
-            # print("caption" , caption)
-            # print("boxes" , boxes)
-        target = {}
-        target["image_id"] = rel_path.strip(".jpg")
-        target["size"] = torch.as_tensor([int(h), int(w)])
-        target["cap_list"] = caption_list
-        target["caption"] = caption
-        target["boxes"] = boxes
-        target["labels"] = classes
-        # print(" image_id " , target["image_id"])
-        # size, cap_list, caption, bboxes, labels
-        if self.transforms is not None:
-            image, target = self.transforms(image, target)
-        return image, target
-    def __len__(self) -> int:
-        return len(self.metas)
-def make_coco_transforms(image_set, fix_size=False, strong_aug=False, args=None):
-    normalize = T.Compose([
-        T.ToTensor(),
-        T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
-    ])
-    # config the params for data aug
-    scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800]
-    max_size = 1333
-    scales2_resize = [400, 500, 600]
-    scales2_crop = [384, 600]
-    # update args from config files
-    scales = getattr(args, 'data_aug_scales', scales)
-    max_size = getattr(args, 'data_aug_max_size', max_size)
-    scales2_resize = getattr(args, 'data_aug_scales2_resize', scales2_resize)
-    scales2_crop = getattr(args, 'data_aug_scales2_crop', scales2_crop)
-    # resize them
-    data_aug_scale_overlap = getattr(args, 'data_aug_scale_overlap', None)
-    if data_aug_scale_overlap is not None and data_aug_scale_overlap > 0:
-        data_aug_scale_overlap = float(data_aug_scale_overlap)
-        scales = [int(i*data_aug_scale_overlap) for i in scales]
-        max_size = int(max_size*data_aug_scale_overlap)
-        scales2_resize = [int(i*data_aug_scale_overlap) for i in scales2_resize]
-        scales2_crop = [int(i*data_aug_scale_overlap) for i in scales2_crop]
-    # datadict_for_print = {
-    #     'scales': scales,
-    #     'max_size': max_size,
-    #     'scales2_resize': scales2_resize,
-    #     'scales2_crop': scales2_crop
-    # }
-    # print("data_aug_params:", json.dumps(datadict_for_print, indent=2))
-    if image_set == 'train':
-        if fix_size:
-            return T.Compose([
-                T.RandomHorizontalFlip(),
-                T.RandomResize([(max_size, max(scales))]),
-                normalize,
-            ])
-        if strong_aug:
-            import datasets.sltransform as SLT
-            return T.Compose([
-                T.RandomHorizontalFlip(),
-                T.RandomSelect(
-                    T.RandomResize(scales, max_size=max_size),
-                    T.Compose([
-                        T.RandomResize(scales2_resize),
-                        T.RandomSizeCrop(*scales2_crop),
-                        T.RandomResize(scales, max_size=max_size),
-                    ])
-                ),
-                SLT.RandomSelectMulti([
-                    SLT.RandomCrop(),
-                    SLT.LightingNoise(),
-                    SLT.AdjustBrightness(2),
-                    SLT.AdjustContrast(2),
-                ]),
-                normalize,
-            ])
-        return T.Compose([
-            T.RandomHorizontalFlip(),
-            T.RandomSelect(
-                T.RandomResize(scales, max_size=max_size),
-                T.Compose([
-                    T.RandomResize(scales2_resize),
-                    T.RandomSizeCrop(*scales2_crop),
-                    T.RandomResize(scales, max_size=max_size),
-                ])
-            ),
-            normalize,
-        ])
-    if image_set in ['val', 'eval_debug', 'train_reg', 'test']:
-        if os.environ.get("GFLOPS_DEBUG_SHILONG", False) == 'INFO':
-            print("Under debug mode for flops calculation only!!!!!!!!!!!!!!!!")
-            return T.Compose([
-                T.ResizeDebug((1280, 800)),
-                normalize,
-            ])
-        return T.Compose([
-            T.RandomResize([max(scales)], max_size=max_size),
-            normalize,
-        ])
-    raise ValueError(f'unknown {image_set}')
-def build_odvg(image_set, args, datasetinfo):
-    img_folder = datasetinfo["root"]
-    ann_file = datasetinfo["anno"]
-    label_map = datasetinfo["label_map"] if "label_map" in datasetinfo else None
-    try:
-        strong_aug = args.strong_aug
-    except:
-        strong_aug = False # False originally
-    print(img_folder, ann_file, label_map)
-    dataset = ODVGDataset(img_folder, ann_file, label_map, max_labels=args.max_labels,
-            transforms=make_coco_transforms(image_set, fix_size=args.fix_size, strong_aug=strong_aug, args=args),
-    )
-    return dataset
-if __name__=="__main__":
-    dataset_vg = ODVGDataset("path/GRIT-20M/data/","path/GRIT-20M/anno/grit_odvg_10k.jsonl",)
-    print(len(dataset_vg))
-    data = dataset_vg[random.randint(0, 100)]
-    print(data)
-    dataset_od = ODVGDataset("pathl/V3Det/",
-        "path/V3Det/annotations/v3det_2023_v1_all_odvg.jsonl",
-        "path/V3Det/annotations/v3det_label_map.json",
-    )
-    print(len(dataset_od))
-    data = dataset_od[random.randint(0, 100)]
-    print(data)

datasets/.ipynb_checkpoints/transforms-checkpoint.py DELETED Viewed

@@ -1,285 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-"""
-Transforms and data augmentation for both image + bbox.
-"""
-import random
-import PIL
-import torch
-import torchvision.transforms as T
-import torchvision.transforms.functional as F
-from util.box_ops import box_xyxy_to_cxcywh
-from util.misc import interpolate
-def crop(image, target, region):
-    cropped_image = F.crop(image, *region)
-    target = target.copy()
-    i, j, h, w = region
-    # should we do something wrt the original size?
-    target["size"] = torch.tensor([h, w])
-    fields = ["labels", "area"]
-    if "boxes" in target:
-        boxes = target["boxes"]
-        max_size = torch.as_tensor([w, h], dtype=torch.float32)
-        cropped_boxes = boxes - torch.as_tensor([j, i, j, i])
-        cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size)
-        cropped_boxes = cropped_boxes.clamp(min=0)
-        area = (cropped_boxes[:, 1, :] - cropped_boxes[:, 0, :]).prod(dim=1)
-        target["boxes"] = cropped_boxes.reshape(-1, 4)
-        target["area"] = area
-        fields.append("boxes")
-    if "masks" in target:
-        # FIXME should we update the area here if there are no boxes?
-        target['masks'] = target['masks'][:, i:i + h, j:j + w]
-        fields.append("masks")
-    # remove elements for which the boxes or masks that have zero area
-    if "boxes" in target or "masks" in target:
-        # favor boxes selection when defining which elements to keep
-        # this is compatible with previous implementation
-        if "boxes" in target:
-            cropped_boxes = target['boxes'].reshape(-1, 2, 2)
-            keep = torch.all(cropped_boxes[:, 1, :] > cropped_boxes[:, 0, :], dim=1)
-        else:
-            keep = target['masks'].flatten(1).any(1)
-        for field in fields:
-            target[field] = target[field][keep]
-    return cropped_image, target
-def hflip(image, target):
-    flipped_image = F.hflip(image)
-    w, h = image.size
-    target = target.copy()
-    if "boxes" in target:
-        boxes = target["boxes"]
-        boxes = boxes[:, [2, 1, 0, 3]] * torch.as_tensor([-1, 1, -1, 1]) + torch.as_tensor([w, 0, w, 0])
-        target["boxes"] = boxes
-    if "masks" in target:
-        target['masks'] = target['masks'].flip(-1)
-    return flipped_image, target
-def resize(image, target, size, max_size=None):
-    # size can be min_size (scalar) or (w, h) tuple
-    def get_size_with_aspect_ratio(image_size, size, max_size=None):
-        w, h = image_size
-        if max_size is not None:
-            min_original_size = float(min((w, h)))
-            max_original_size = float(max((w, h)))
-            if max_original_size / min_original_size * size > max_size:
-                size = int(round(max_size * min_original_size / max_original_size))
-        if (w <= h and w == size) or (h <= w and h == size):
-            return (h, w)
-        if w < h:
-            ow = size
-            oh = int(size * h / w)
-        else:
-            oh = size
-            ow = int(size * w / h)
-        return (oh, ow)
-    def get_size(image_size, size, max_size=None):
-        if isinstance(size, (list, tuple)):
-            return size[::-1]
-        else:
-            return get_size_with_aspect_ratio(image_size, size, max_size)
-    size = get_size(image.size, size, max_size)
-    rescaled_image = F.resize(image, size)
-    if target is None:
-        return rescaled_image, None
-    ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(rescaled_image.size, image.size))
-    ratio_width, ratio_height = ratios
-    target = target.copy()
-    if "boxes" in target:
-        boxes = target["boxes"]
-        scaled_boxes = boxes * torch.as_tensor([ratio_width, ratio_height, ratio_width, ratio_height])
-        target["boxes"] = scaled_boxes
-    if "area" in target:
-        area = target["area"]
-        scaled_area = area * (ratio_width * ratio_height)
-        target["area"] = scaled_area
-    h, w = size
-    target["size"] = torch.tensor([h, w])
-    if "masks" in target:
-        target['masks'] = interpolate(
-            target['masks'][:, None].float(), size, mode="nearest")[:, 0] > 0.5
-    return rescaled_image, target
-def pad(image, target, padding):
-    # assumes that we only pad on the bottom right corners
-    padded_image = F.pad(image, (0, 0, padding[0], padding[1]))
-    if target is None:
-        return padded_image, None
-    target = target.copy()
-    # should we do something wrt the original size?
-    target["size"] = torch.tensor(padded_image.size[::-1])
-    if "masks" in target:
-        target['masks'] = torch.nn.functional.pad(target['masks'], (0, padding[0], 0, padding[1]))
-    return padded_image, target
-class ResizeDebug(object):
-    def __init__(self, size):
-        self.size = size
-    def __call__(self, img, target):
-        return resize(img, target, self.size)
-class RandomCrop(object):
-    def __init__(self, size):
-        self.size = size
-    def __call__(self, img, target):
-        region = T.RandomCrop.get_params(img, self.size)
-        return crop(img, target, region)
-class RandomSizeCrop(object):
-    def __init__(self, min_size: int, max_size: int):
-        self.min_size = min_size
-        self.max_size = max_size
-    def __call__(self, img: PIL.Image.Image, target: dict):
-        w = random.randint(self.min_size, min(img.width, self.max_size))
-        h = random.randint(self.min_size, min(img.height, self.max_size))
-        region = T.RandomCrop.get_params(img, [h, w])
-        return crop(img, target, region)
-class CenterCrop(object):
-    def __init__(self, size):
-        self.size = size
-    def __call__(self, img, target):
-        image_width, image_height = img.size
-        crop_height, crop_width = self.size
-        crop_top = int(round((image_height - crop_height) / 2.))
-        crop_left = int(round((image_width - crop_width) / 2.))
-        return crop(img, target, (crop_top, crop_left, crop_height, crop_width))
-class RandomHorizontalFlip(object):
-    def __init__(self, p=0.5):
-        self.p = p
-    def __call__(self, img, target):
-        if random.random() < self.p:
-            return hflip(img, target)
-        return img, target
-class RandomResize(object):
-    def __init__(self, sizes, max_size=None):
-        assert isinstance(sizes, (list, tuple))
-        self.sizes = sizes
-        self.max_size = max_size
-    def __call__(self, img, target=None):
-        size = random.choice(self.sizes)
-        return resize(img, target, size, self.max_size)
-class RandomPad(object):
-    def __init__(self, max_pad):
-        self.max_pad = max_pad
-    def __call__(self, img, target):
-        pad_x = random.randint(0, self.max_pad)
-        pad_y = random.randint(0, self.max_pad)
-        return pad(img, target, (pad_x, pad_y))
-class RandomSelect(object):
-    """
-    Randomly selects between transforms1 and transforms2,
-    with probability p for transforms1 and (1 - p) for transforms2
-    """
-    def __init__(self, transforms1, transforms2, p=0.5):
-        self.transforms1 = transforms1
-        self.transforms2 = transforms2
-        self.p = p
-    def __call__(self, img, target):
-        if random.random() < self.p:
-            return self.transforms1(img, target)
-        return self.transforms2(img, target)
-class ToTensor(object):
-    def __call__(self, img, target):
-        return F.to_tensor(img), target
-class RandomErasing(object):
-    def __init__(self, *args, **kwargs):
-        self.eraser = T.RandomErasing(*args, **kwargs)
-    def __call__(self, img, target):
-        return self.eraser(img), target
-class Normalize(object):
-    def __init__(self, mean, std):
-        self.mean = mean
-        self.std = std
-    def __call__(self, image, target=None):
-        image = F.normalize(image, mean=self.mean, std=self.std)
-        if target is None:
-            return image, None
-        target = target.copy()
-        h, w = image.shape[-2:]
-        if "boxes" in target:
-            boxes = target["boxes"]
-            boxes = box_xyxy_to_cxcywh(boxes)
-            boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32)
-            target["boxes"] = boxes
-        return image, target
-class Compose(object):
-    def __init__(self, transforms):
-        self.transforms = transforms
-    def __call__(self, image, target):
-        for t in self.transforms:
-            image, target = t(image, target)
-        return image, target
-    def __repr__(self):
-        format_string = self.__class__.__name__ + "("
-        for t in self.transforms:
-            format_string += "\n"
-            format_string += "    {0}".format(t)
-        format_string += "\n)"
-        return format_string

datasets/__init__.py DELETED Viewed

@@ -1,23 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import torch.utils.data
-import torchvision
-from .coco import build as build_coco
-def get_coco_api_from_dataset(dataset):
-    for _ in range(10):
-        # if isinstance(dataset, torchvision.datasets.CocoDetection):
-        #     break
-        if isinstance(dataset, torch.utils.data.Subset):
-            dataset = dataset.dataset
-    if isinstance(dataset, torchvision.datasets.CocoDetection):
-        return dataset.coco
-def build_dataset(image_set, args, datasetinfo):
-    if datasetinfo["dataset_mode"] == 'coco':
-        return build_coco(image_set, args, datasetinfo)
-    if datasetinfo["dataset_mode"] == 'odvg':
-        from .odvg import build_odvg
-        return build_odvg(image_set, args, datasetinfo)
-    raise ValueError(f'dataset {args.dataset_file} not supported')

datasets/__pycache__/__init__.cpython-310.pyc DELETED Viewed

Binary file (899 Bytes)

datasets/__pycache__/coco.cpython-310.pyc DELETED Viewed

Binary file (20.2 kB)

datasets/__pycache__/coco_eval.cpython-310.pyc DELETED Viewed

Binary file (7.42 kB)

datasets/__pycache__/cocogrounding_eval.cpython-310.pyc DELETED Viewed

Binary file (7.44 kB)

datasets/__pycache__/data_util.cpython-310.pyc DELETED Viewed

Binary file (4.55 kB)

datasets/__pycache__/odvg.cpython-310.pyc DELETED Viewed

Binary file (8.21 kB)

datasets/__pycache__/panoptic_eval.cpython-310.pyc DELETED Viewed

Binary file (1.87 kB)

datasets/__pycache__/random_crop.cpython-310.pyc DELETED Viewed

Binary file (3.69 kB)

datasets/__pycache__/sltransform.cpython-310.pyc DELETED Viewed

Binary file (7.68 kB)

datasets/__pycache__/transforms.cpython-310.pyc DELETED Viewed

Binary file (9.53 kB)

datasets/coco.py DELETED Viewed

@@ -1,649 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-"""
-COCO dataset which returns image_id for evaluation.
-Mostly copy-paste from https://github.com/pytorch/vision/blob/13b35ff/references/detection/coco_utils.py
-"""
-if __name__=="__main__":
-    # for debug only
-    import os, sys
-    sys.path.append(os.path.dirname(sys.path[0]))
-from torchvision.datasets.vision import VisionDataset
-import json
-from pathlib import Path
-import random
-import os
-from typing import Any, Callable, List, Optional, Tuple
-from PIL import Image
-import torch
-import torch.utils.data
-import torchvision
-from pycocotools import mask as coco_mask
-from datasets.data_util import preparing_dataset
-import datasets.transforms as T
-from util.box_ops import box_cxcywh_to_xyxy, box_iou
-__all__ = ['build']
-class label2compat():
-    def __init__(self) -> None:
-        self.category_map_str = {"1": 1, "2": 2, "3": 3, "4": 4, "5": 5, "6": 6, "7": 7, "8": 8, "9": 9, "10": 10, "11": 11, "13": 12, "14": 13, "15": 14, "16": 15, "17": 16, "18": 17, "19": 18, "20": 19, "21": 20, "22": 21, "23": 22, "24": 23, "25": 24, "27": 25, "28": 26, "31": 27, "32": 28, "33": 29, "34": 30, "35": 31, "36": 32, "37": 33, "38": 34, "39": 35, "40": 36, "41": 37, "42": 38, "43": 39, "44": 40, "46": 41, "47": 42, "48": 43, "49": 44, "50": 45, "51": 46, "52": 47, "53": 48, "54": 49, "55": 50, "56": 51, "57": 52, "58": 53, "59": 54, "60": 55, "61": 56, "62": 57, "63": 58, "64": 59, "65": 60, "67": 61, "70": 62, "72": 63, "73": 64, "74": 65, "75": 66, "76": 67, "77": 68, "78": 69, "79": 70, "80": 71, "81": 72, "82": 73, "84": 74, "85": 75, "86": 76, "87": 77, "88": 78, "89": 79, "90": 80}
-        self.category_map = {int(k):v for k,v in self.category_map_str.items()}
-    def __call__(self, target, img=None):
-        labels = target['labels']
-        res = torch.zeros(labels.shape, dtype=labels.dtype)
-        for idx, item in enumerate(labels):
-            res[idx] = self.category_map[item.item()] - 1
-        target['label_compat'] = res
-        if img is not None:
-            return target, img
-        else:
-            return target
-class label_compat2onehot():
-    def __init__(self, num_class=80, num_output_objs=1):
-        self.num_class = num_class
-        self.num_output_objs = num_output_objs
-        if num_output_objs != 1:
-            raise DeprecationWarning("num_output_objs!=1, which is only used for comparison")
-    def __call__(self, target, img=None):
-        labels = target['label_compat']
-        place_dict = {k:0 for k in range(self.num_class)}
-        if self.num_output_objs == 1:
-            res = torch.zeros(self.num_class)
-            for i in labels:
-                itm = i.item()
-                res[itm] = 1.0
-        else:
-            # compat with baseline
-            res = torch.zeros(self.num_class, self.num_output_objs)
-            for i in labels:
-                itm = i.item()
-                res[itm][place_dict[itm]] = 1.0
-                place_dict[itm] += 1
-        target['label_compat_onehot'] = res
-        if img is not None:
-            return target, img
-        else:
-            return target
-class box_label_catter():
-    def __init__(self):
-        pass
-    def __call__(self, target, img=None):
-        labels = target['label_compat']
-        boxes = target['boxes']
-        box_label = torch.cat((boxes, labels.unsqueeze(-1)), 1)
-        target['box_label'] = box_label
-        if img is not None:
-            return target, img
-        else:
-            return target
-class RandomSelectBoxlabels():
-    def __init__(self, num_classes, leave_one_out=False, blank_prob=0.8,
-                    prob_first_item = 0.0,
-                    prob_random_item = 0.0,
-                    prob_last_item = 0.8,
-                    prob_stop_sign = 0.2
-                ) -> None:
-        self.num_classes = num_classes
-        self.leave_one_out = leave_one_out
-        self.blank_prob = blank_prob
-        self.set_state(prob_first_item, prob_random_item, prob_last_item, prob_stop_sign)
-    def get_state(self):
-        return [self.prob_first_item, self.prob_random_item, self.prob_last_item, self.prob_stop_sign]
-    def set_state(self, prob_first_item, prob_random_item, prob_last_item, prob_stop_sign):
-        sum_prob = prob_first_item + prob_random_item + prob_last_item + prob_stop_sign
-        assert sum_prob - 1 < 1e-6, \
-            f"Sum up all prob = {sum_prob}. prob_first_item:{prob_first_item}" \
-            + f"prob_random_item:{prob_random_item}, prob_last_item:{prob_last_item}" \
-            + f"prob_stop_sign:{prob_stop_sign}"
-        self.prob_first_item = prob_first_item
-        self.prob_random_item = prob_random_item
-        self.prob_last_item = prob_last_item
-        self.prob_stop_sign = prob_stop_sign
-    def sample_for_pred_first_item(self, box_label: torch.FloatTensor):
-        box_label_known = torch.Tensor(0,5)
-        box_label_unknown = box_label
-        return box_label_known, box_label_unknown
-    def sample_for_pred_random_item(self, box_label: torch.FloatTensor):
-        n_select = int(random.random() * box_label.shape[0])
-        box_label = box_label[torch.randperm(box_label.shape[0])]
-        box_label_known = box_label[:n_select]
-        box_label_unknown = box_label[n_select:]
-        return box_label_known, box_label_unknown
-    def sample_for_pred_last_item(self, box_label: torch.FloatTensor):
-        box_label_perm = box_label[torch.randperm(box_label.shape[0])]
-        known_label_list = []
-        box_label_known = []
-        box_label_unknown = []
-        for item in box_label_perm:
-            label_i = item[4].item()
-            if label_i in known_label_list:
-                box_label_known.append(item)
-            else:
-                # first item
-                box_label_unknown.append(item)
-                known_label_list.append(label_i)
-        box_label_known = torch.stack(box_label_known) if len(box_label_known) > 0 else torch.Tensor(0,5)
-        box_label_unknown = torch.stack(box_label_unknown) if len(box_label_unknown) > 0 else torch.Tensor(0,5)
-        return box_label_known, box_label_unknown
-    def sample_for_pred_stop_sign(self, box_label: torch.FloatTensor):
-        box_label_unknown = torch.Tensor(0,5)
-        box_label_known = box_label
-        return box_label_known, box_label_unknown
-    def __call__(self, target, img=None):
-        box_label = target['box_label'] # K, 5
-        dice_number = random.random()
-        if dice_number < self.prob_first_item:
-            box_label_known, box_label_unknown = self.sample_for_pred_first_item(box_label)
-        elif dice_number < self.prob_first_item + self.prob_random_item:
-            box_label_known, box_label_unknown = self.sample_for_pred_random_item(box_label)
-        elif dice_number < self.prob_first_item + self.prob_random_item + self.prob_last_item:
-            box_label_known, box_label_unknown = self.sample_for_pred_last_item(box_label)
-        else:
-            box_label_known, box_label_unknown = self.sample_for_pred_stop_sign(box_label)
-        target['label_onehot_known'] = label2onehot(box_label_known[:,-1], self.num_classes)
-        target['label_onehot_unknown'] = label2onehot(box_label_unknown[:, -1], self.num_classes)
-        target['box_label_known'] = box_label_known
-        target['box_label_unknown'] = box_label_unknown
-        return target, img
-class RandomDrop():
-    def __init__(self, p=0.2) -> None:
-        self.p = p
-    def __call__(self, target, img=None):
-        known_box = target['box_label_known']
-        num_known_box = known_box.size(0)
-        idxs = torch.rand(num_known_box)
-        # indices = torch.randperm(num_known_box)[:int((1-self).p*num_known_box + 0.5 + random.random())]
-        target['box_label_known'] = known_box[idxs > self.p]
-        return target, img
-class BboxPertuber():
-    def __init__(self, max_ratio = 0.02, generate_samples = 1000) -> None:
-        self.max_ratio = max_ratio
-        self.generate_samples = generate_samples
-        self.samples = self.generate_pertube_samples()
-        self.idx = 0
-    def generate_pertube_samples(self):
-        import torch
-        samples = (torch.rand(self.generate_samples, 5) - 0.5) * 2 * self.max_ratio
-        return samples
-    def __call__(self, target, img):
-        known_box = target['box_label_known'] # Tensor(K,5), K known bbox
-        K = known_box.shape[0]
-        known_box_pertube = torch.zeros(K, 6) # 4:bbox, 1:prob, 1:label
-        if K == 0:
-            pass
-        else:
-            if self.idx + K > self.generate_samples:
-                self.idx = 0
-            delta = self.samples[self.idx: self.idx + K, :]
-            known_box_pertube[:, :4] = known_box[:, :4] + delta[:, :4]
-            iou = (torch.diag(box_iou(box_cxcywh_to_xyxy(known_box[:, :4]), box_cxcywh_to_xyxy(known_box_pertube[:, :4]))[0])) * (1 + delta[:, -1])
-            known_box_pertube[:, 4].copy_(iou)
-            known_box_pertube[:, -1].copy_(known_box[:, -1])
-        target['box_label_known_pertube'] = known_box_pertube
-        return target, img
-class RandomCutout():
-    def __init__(self, factor=0.5) -> None:
-        self.factor = factor
-    def __call__(self, target, img=None):
-        unknown_box = target['box_label_unknown']           # Ku, 5
-        known_box = target['box_label_known_pertube']       # Kk, 6
-        Ku = unknown_box.size(0)
-        known_box_add = torch.zeros(Ku, 6) # Ku, 6
-        known_box_add[:, :5] = unknown_box
-        known_box_add[:, 5].uniform_(0.5, 1)
-        known_box_add[:, :2] += known_box_add[:, 2:4] * (torch.rand(Ku, 2) - 0.5) / 2
-        known_box_add[:, 2:4] /= 2
-        target['box_label_known_pertube'] = torch.cat((known_box, known_box_add))
-        return target, img
-class RandomSelectBoxes():
-    def __init__(self, num_class=80) -> None:
-        Warning("This is such a slow function and will be deprecated soon!!!")
-        self.num_class = num_class
-    def __call__(self, target, img=None):
-        boxes = target['boxes']
-        labels = target['label_compat']
-        # transform to list of tensors
-        boxs_list = [[] for i in range(self.num_class)]
-        for idx, item in enumerate(boxes):
-            label = labels[idx].item()
-            boxs_list[label].append(item)
-        boxs_list_tensor = [torch.stack(i) if len(i) > 0 else torch.Tensor(0,4) for i in boxs_list]
-        # random selection
-        box_known = []
-        box_unknown = []
-        for idx, item in enumerate(boxs_list_tensor):
-            ncnt = item.shape[0]
-            nselect = int(random.random() * ncnt) # close in both sides, much faster than random.randint
-            item = item[torch.randperm(ncnt)]
-            # random.shuffle(item)
-            box_known.append(item[:nselect])
-            box_unknown.append(item[nselect:])
-        # box_known_tensor = [torch.stack(i) if len(i) > 0 else torch.Tensor(0,4) for i in box_known]
-        # box_unknown_tensor = [torch.stack(i) if len(i) > 0 else torch.Tensor(0,4) for i in box_unknown]
-        # print('box_unknown_tensor:', box_unknown_tensor)
-        target['known_box'] = box_known
-        target['unknown_box'] = box_unknown
-        return target, img
-def label2onehot(label, num_classes):
-    """
-    label: Tensor(K)
-    """
-    res = torch.zeros(num_classes)
-    for i in label:
-        itm = int(i.item())
-        res[itm] = 1.0
-    return res
-class MaskCrop():
-    def __init__(self) -> None:
-        pass
-    def __call__(self, target, img):
-        known_box = target['known_box']
-        h,w = img.shape[1:] # h,w
-        # imgsize = target['orig_size'] # h,w
-        scale = torch.Tensor([w, h, w, h])
-        # _cnt = 0
-        for boxes in known_box:
-            if boxes.shape[0] == 0:
-                continue
-            box_xyxy = box_cxcywh_to_xyxy(boxes) * scale
-            for box in box_xyxy:
-                x1, y1, x2, y2 = [int(i) for i in box.tolist()]
-                img[:, y1:y2, x1:x2] = 0
-                # _cnt += 1
-        # print("_cnt:", _cnt)
-        return target, img
-dataset_hook_register = {
-    'label2compat': label2compat,
-    'label_compat2onehot': label_compat2onehot,
-    'box_label_catter': box_label_catter,
-    'RandomSelectBoxlabels': RandomSelectBoxlabels,
-    'RandomSelectBoxes': RandomSelectBoxes,
-    'MaskCrop': MaskCrop,
-    'BboxPertuber': BboxPertuber,
-}
-class CocoDetection(torchvision.datasets.CocoDetection):
-    def __init__(self, img_folder, ann_file, transforms, return_masks, aux_target_hacks=None):
-        super(CocoDetection, self).__init__(img_folder, ann_file)
-        self._transforms = transforms
-        self.prepare = ConvertCocoPolysToMask(return_masks)
-        self.aux_target_hacks = aux_target_hacks
-    def change_hack_attr(self, hackclassname, attrkv_dict):
-        target_class = dataset_hook_register[hackclassname]
-        for item in self.aux_target_hacks:
-            if isinstance(item, target_class):
-                for k,v in attrkv_dict.items():
-                    setattr(item, k, v)
-    def get_hack(self, hackclassname):
-        target_class = dataset_hook_register[hackclassname]
-        for item in self.aux_target_hacks:
-            if isinstance(item, target_class):
-                return item
-    def _load_image(self, id: int) -> Image.Image:
-        path = self.coco.loadImgs(id)[0]["file_name"]
-        abs_path = os.path.join(self.root, path)
-        return Image.open(abs_path).convert("RGB")
-    def __getitem__(self, idx):
-        """
-        Output:
-            - target: dict of multiple items
-                - boxes: Tensor[num_box, 4]. \
-                    Init type: x0,y0,x1,y1. unnormalized data.
-                    Final type: cx,cy,w,h. normalized data.
-        """
-        try:
-            img, target = super(CocoDetection, self).__getitem__(idx)
-        except:
-            print("Error idx: {}".format(idx))
-            idx += 1
-            img, target = super(CocoDetection, self).__getitem__(idx)
-        image_id = self.ids[idx]
-        target = {'image_id': image_id, 'annotations': target}
-        img, target = self.prepare(img, target)
-        if self._transforms is not None:
-            img, target = self._transforms(img, target)
-        # convert to needed format
-        if self.aux_target_hacks is not None:
-            for hack_runner in self.aux_target_hacks:
-                target, img = hack_runner(target, img=img)
-        return img, target
-def convert_coco_poly_to_mask(segmentations, height, width):
-    masks = []
-    for polygons in segmentations:
-        rles = coco_mask.frPyObjects(polygons, height, width)
-        mask = coco_mask.decode(rles)
-        if len(mask.shape) < 3:
-            mask = mask[..., None]
-        mask = torch.as_tensor(mask, dtype=torch.uint8)
-        mask = mask.any(dim=2)
-        masks.append(mask)
-    if masks:
-        masks = torch.stack(masks, dim=0)
-    else:
-        masks = torch.zeros((0, height, width), dtype=torch.uint8)
-    return masks
-class ConvertCocoPolysToMask(object):
-    def __init__(self, return_masks=False):
-        self.return_masks = return_masks
-    def __call__(self, image, target):
-        w, h = image.size
-        image_id = target["image_id"]
-        image_id = torch.tensor([image_id])
-        anno = target["annotations"]
-        anno = [obj for obj in anno if 'iscrowd' not in obj or obj['iscrowd'] == 0]
-        boxes = [obj["bbox"] for obj in anno]
-        # guard against no boxes via resizing
-        boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
-        boxes[:, 2:] += boxes[:, :2]
-        boxes[:, 0::2].clamp_(min=0, max=w)
-        boxes[:, 1::2].clamp_(min=0, max=h)
-        classes = [obj["category_id"] for obj in anno]
-        classes = torch.tensor(classes, dtype=torch.int64)
-        if self.return_masks:
-            segmentations = [obj["segmentation"] for obj in anno]
-            masks = convert_coco_poly_to_mask(segmentations, h, w)
-        keypoints = None
-        if anno and "keypoints" in anno[0]:
-            keypoints = [obj["keypoints"] for obj in anno]
-            keypoints = torch.as_tensor(keypoints, dtype=torch.float32)
-            num_keypoints = keypoints.shape[0]
-            if num_keypoints:
-                keypoints = keypoints.view(num_keypoints, -1, 3)
-        keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
-        boxes = boxes[keep]
-        classes = classes[keep]
-        if self.return_masks:
-            masks = masks[keep]
-        if keypoints is not None:
-            keypoints = keypoints[keep]
-        target = {}
-        target["boxes"] = boxes
-        target["labels"] = classes
-        if self.return_masks:
-            target["masks"] = masks
-        target["image_id"] = image_id
-        if keypoints is not None:
-            target["keypoints"] = keypoints
-        # for conversion to coco api
-        area = torch.tensor([obj["area"] for obj in anno])
-        iscrowd = torch.tensor([obj["iscrowd"] if "iscrowd" in obj else 0 for obj in anno])
-        target["area"] = area[keep]
-        target["iscrowd"] = iscrowd[keep]
-        target["orig_size"] = torch.as_tensor([int(h), int(w)])
-        target["size"] = torch.as_tensor([int(h), int(w)])
-        return image, target
-def make_coco_transforms(image_set, fix_size=False, strong_aug=False, args=None):
-    normalize = T.Compose([
-        T.ToTensor(),
-        T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
-    ])
-    # config the params for data aug
-    scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800]
-    max_size = 1333
-    scales2_resize = [400, 500, 600]
-    scales2_crop = [384, 600]
-    # update args from config files
-    scales = getattr(args, 'data_aug_scales', scales)
-    max_size = getattr(args, 'data_aug_max_size', max_size)
-    scales2_resize = getattr(args, 'data_aug_scales2_resize', scales2_resize)
-    scales2_crop = getattr(args, 'data_aug_scales2_crop', scales2_crop)
-    # resize them
-    data_aug_scale_overlap = getattr(args, 'data_aug_scale_overlap', None)
-    if data_aug_scale_overlap is not None and data_aug_scale_overlap > 0:
-        data_aug_scale_overlap = float(data_aug_scale_overlap)
-        scales = [int(i*data_aug_scale_overlap) for i in scales]
-        max_size = int(max_size*data_aug_scale_overlap)
-        scales2_resize = [int(i*data_aug_scale_overlap) for i in scales2_resize]
-        scales2_crop = [int(i*data_aug_scale_overlap) for i in scales2_crop]
-    datadict_for_print = {
-        'scales': scales,
-        'max_size': max_size,
-        'scales2_resize': scales2_resize,
-        'scales2_crop': scales2_crop
-    }
-    # print("data_aug_params:", json.dumps(datadict_for_print, indent=2))
-    if image_set == 'train':
-        if fix_size:
-            return T.Compose([
-                T.RandomHorizontalFlip(),
-                T.RandomResize([(max_size, max(scales))]),
-                # T.RandomResize([(512, 512)]),
-                normalize,
-            ])
-        if strong_aug:
-            import datasets.sltransform as SLT
-            return T.Compose([
-                T.RandomHorizontalFlip(),
-                T.RandomSelect(
-                    T.RandomResize(scales, max_size=max_size),
-                    T.Compose([
-                        T.RandomResize(scales2_resize),
-                        T.RandomSizeCrop(*scales2_crop),
-                        T.RandomResize(scales, max_size=max_size),
-                    ])
-                ),
-                SLT.RandomSelectMulti([
-                    SLT.RandomCrop(),
-                    SLT.LightingNoise(),
-                    SLT.AdjustBrightness(2),
-                    SLT.AdjustContrast(2),
-                ]),
-                normalize,
-            ])
-        return T.Compose([
-            T.RandomHorizontalFlip(),
-            T.RandomSelect(
-                T.RandomResize(scales, max_size=max_size),
-                T.Compose([
-                    T.RandomResize(scales2_resize),
-                    T.RandomSizeCrop(*scales2_crop),
-                    T.RandomResize(scales, max_size=max_size),
-                ])
-            ),
-            normalize,
-        ])
-    if image_set in ['val', 'eval_debug', 'train_reg', 'test']:
-        if os.environ.get("GFLOPS_DEBUG_SHILONG", False) == 'INFO':
-            print("Under debug mode for flops calculation only!!!!!!!!!!!!!!!!")
-            return T.Compose([
-                T.ResizeDebug((1280, 800)),
-                normalize,
-            ])
-        return T.Compose([
-            T.RandomResize([max(scales)], max_size=max_size),
-            normalize,
-        ])
-    raise ValueError(f'unknown {image_set}')
-def get_aux_target_hacks_list(image_set, args):
-    if args.modelname in ['q2bs_mask', 'q2bs']:
-        aux_target_hacks_list = [
-            label2compat(),
-            label_compat2onehot(),
-            RandomSelectBoxes(num_class=args.num_classes)
-        ]
-        if args.masked_data and image_set == 'train':
-            # aux_target_hacks_list.append()
-            aux_target_hacks_list.append(MaskCrop())
-    elif args.modelname in ['q2bm_v2', 'q2bs_ce', 'q2op', 'q2ofocal', 'q2opclip', 'q2ocqonly']:
-        aux_target_hacks_list = [
-            label2compat(),
-            label_compat2onehot(),
-            box_label_catter(),
-            RandomSelectBoxlabels(num_classes=args.num_classes,
-                                    prob_first_item=args.prob_first_item,
-                                    prob_random_item=args.prob_random_item,
-                                    prob_last_item=args.prob_last_item,
-                                    prob_stop_sign=args.prob_stop_sign,
-                                    ),
-            BboxPertuber(max_ratio=0.02, generate_samples=1000),
-        ]
-    elif args.modelname in ['q2omask', 'q2osa']:
-        if args.coco_aug:
-            aux_target_hacks_list = [
-                label2compat(),
-                label_compat2onehot(),
-                box_label_catter(),
-                RandomSelectBoxlabels(num_classes=args.num_classes,
-                                        prob_first_item=args.prob_first_item,
-                                        prob_random_item=args.prob_random_item,
-                                        prob_last_item=args.prob_last_item,
-                                        prob_stop_sign=args.prob_stop_sign,
-                                        ),
-                RandomDrop(p=0.2),
-                BboxPertuber(max_ratio=0.02, generate_samples=1000),
-                RandomCutout(factor=0.5)
-            ]
-        else:
-            aux_target_hacks_list = [
-                label2compat(),
-                label_compat2onehot(),
-                box_label_catter(),
-                RandomSelectBoxlabels(num_classes=args.num_classes,
-                                        prob_first_item=args.prob_first_item,
-                                        prob_random_item=args.prob_random_item,
-                                        prob_last_item=args.prob_last_item,
-                                        prob_stop_sign=args.prob_stop_sign,
-                                        ),
-                BboxPertuber(max_ratio=0.02, generate_samples=1000),
-            ]
-    else:
-        aux_target_hacks_list = None
-    return aux_target_hacks_list
-def build(image_set, args, datasetinfo):
-    img_folder = datasetinfo["root"]
-    ann_file = datasetinfo["anno"]
-    # copy to local path
-    if os.environ.get('DATA_COPY_SHILONG') == 'INFO':
-        preparing_dataset(dict(img_folder=img_folder, ann_file=ann_file), image_set, args)
-    try:
-        strong_aug = args.strong_aug
-    except:
-        strong_aug = False
-    print(img_folder, ann_file)
-    dataset = CocoDetection(img_folder, ann_file,
-            transforms=make_coco_transforms(image_set, fix_size=args.fix_size, strong_aug=strong_aug, args=args),
-            return_masks=args.masks,
-            aux_target_hacks=None,
-        )
-    return dataset
-if __name__ == "__main__":
-    # Objects365 Val example
-    dataset_o365 = CocoDetection(
-            '/path/Objects365/train/',
-            "/path/Objects365/slannos/anno_preprocess_train_v2.json",
-            transforms=None,
-            return_masks=False,
-        )
-    print('len(dataset_o365):', len(dataset_o365))

datasets/coco_eval.py DELETED Viewed

@@ -1,266 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-"""
-COCO evaluator that works in distributed mode.
-Mostly copy-paste from https://github.com/pytorch/vision/blob/edfd5a7/references/detection/coco_eval.py
-The difference is that there is less copy-pasting from pycocotools
-in the end of the file, as python3 can suppress prints with contextlib
-"""
-import os
-import contextlib
-import copy
-import numpy as np
-import torch
-from pycocotools.cocoeval import COCOeval
-from pycocotools.coco import COCO
-import pycocotools.mask as mask_util
-from util.misc import all_gather
-class CocoEvaluator(object):
-    def __init__(self, coco_gt, iou_types, useCats=True):
-        assert isinstance(iou_types, (list, tuple))
-        coco_gt = copy.deepcopy(coco_gt)
-        self.coco_gt = coco_gt
-        self.iou_types = iou_types
-        self.coco_eval = {}
-        for iou_type in iou_types:
-            self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type)
-            self.coco_eval[iou_type].useCats = useCats
-        self.img_ids = []
-        self.eval_imgs = {k: [] for k in iou_types}
-        self.useCats = useCats
-    def update(self, predictions):
-        img_ids = list(np.unique(list(predictions.keys())))
-        self.img_ids.extend(img_ids)
-        for iou_type in self.iou_types:
-            results = self.prepare(predictions, iou_type)
-            # suppress pycocotools prints
-            with open(os.devnull, 'w') as devnull:
-                with contextlib.redirect_stdout(devnull):
-                    coco_dt = COCO.loadRes(self.coco_gt, results) if results else COCO()
-            coco_eval = self.coco_eval[iou_type]
-            coco_eval.cocoDt = coco_dt
-            coco_eval.params.imgIds = list(img_ids)
-            coco_eval.params.useCats = self.useCats
-            img_ids, eval_imgs = evaluate(coco_eval)
-            self.eval_imgs[iou_type].append(eval_imgs)
-    def synchronize_between_processes(self):
-        for iou_type in self.iou_types:
-            self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)
-            create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type])
-    def accumulate(self):
-        for coco_eval in self.coco_eval.values():
-            coco_eval.accumulate()
-    def summarize(self):
-        for iou_type, coco_eval in self.coco_eval.items():
-            print("IoU metric: {}".format(iou_type))
-            coco_eval.summarize()
-    def prepare(self, predictions, iou_type):
-        if iou_type == "bbox":
-            return self.prepare_for_coco_detection(predictions)
-        elif iou_type == "segm":
-            return self.prepare_for_coco_segmentation(predictions)
-        elif iou_type == "keypoints":
-            return self.prepare_for_coco_keypoint(predictions)
-        else:
-            raise ValueError("Unknown iou type {}".format(iou_type))
-    def prepare_for_coco_detection(self, predictions):
-        coco_results = []
-        for original_id, prediction in predictions.items():
-            if len(prediction) == 0:
-                continue
-            boxes = prediction["boxes"]
-            boxes = convert_to_xywh(boxes).tolist()
-            if not isinstance(prediction["scores"], list):
-                scores = prediction["scores"].tolist()
-            else:
-                scores = prediction["scores"]
-            if not isinstance(prediction["labels"], list):
-                labels = prediction["labels"].tolist()
-            else:
-                labels = prediction["labels"]
-            try:
-                coco_results.extend(
-                    [
-                        {
-                            "image_id": original_id,
-                            "category_id": labels[k],
-                            "bbox": box,
-                            "score": scores[k],
-                        }
-                        for k, box in enumerate(boxes)
-                    ]
-                )
-            except:
-                import ipdb; ipdb.set_trace()
-        return coco_results
-    def prepare_for_coco_segmentation(self, predictions):
-        coco_results = []
-        for original_id, prediction in predictions.items():
-            if len(prediction) == 0:
-                continue
-            scores = prediction["scores"]
-            labels = prediction["labels"]
-            masks = prediction["masks"]
-            masks = masks > 0.5
-            scores = prediction["scores"].tolist()
-            labels = prediction["labels"].tolist()
-            rles = [
-                mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0]
-                for mask in masks
-            ]
-            for rle in rles:
-                rle["counts"] = rle["counts"].decode("utf-8")
-            coco_results.extend(
-                [
-                    {
-                        "image_id": original_id,
-                        "category_id": labels[k],
-                        "segmentation": rle,
-                        "score": scores[k],
-                    }
-                    for k, rle in enumerate(rles)
-                ]
-            )
-        return coco_results
-    def prepare_for_coco_keypoint(self, predictions):
-        coco_results = []
-        for original_id, prediction in predictions.items():
-            if len(prediction) == 0:
-                continue
-            boxes = prediction["boxes"]
-            boxes = convert_to_xywh(boxes).tolist()
-            scores = prediction["scores"].tolist()
-            labels = prediction["labels"].tolist()
-            keypoints = prediction["keypoints"]
-            keypoints = keypoints.flatten(start_dim=1).tolist()
-            coco_results.extend(
-                [
-                    {
-                        "image_id": original_id,
-                        "category_id": labels[k],
-                        'keypoints': keypoint,
-                        "score": scores[k],
-                    }
-                    for k, keypoint in enumerate(keypoints)
-                ]
-            )
-        return coco_results
-def convert_to_xywh(boxes):
-    xmin, ymin, xmax, ymax = boxes.unbind(1)
-    return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)
-def merge(img_ids, eval_imgs):
-    all_img_ids = all_gather(img_ids)
-    all_eval_imgs = all_gather(eval_imgs)
-    merged_img_ids = []
-    for p in all_img_ids:
-        merged_img_ids.extend(p)
-    merged_eval_imgs = []
-    for p in all_eval_imgs:
-        merged_eval_imgs.append(p)
-    merged_img_ids = np.array(merged_img_ids)
-    merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)
-    # keep only unique (and in sorted order) images
-    merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
-    merged_eval_imgs = merged_eval_imgs[..., idx]
-    return merged_img_ids, merged_eval_imgs
-def create_common_coco_eval(coco_eval, img_ids, eval_imgs):
-    img_ids, eval_imgs = merge(img_ids, eval_imgs)
-    img_ids = list(img_ids)
-    eval_imgs = list(eval_imgs.flatten())
-    coco_eval.evalImgs = eval_imgs
-    coco_eval.params.imgIds = img_ids
-    coco_eval._paramsEval = copy.deepcopy(coco_eval.params)
-#################################################################
-# From pycocotools, just removed the prints and fixed
-# a Python3 bug about unicode not defined
-#################################################################
-def evaluate(self):
-    '''
-    Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
-    :return: None
-    '''
-    p = self.params
-    # add backward compatibility if useSegm is specified in params
-    if p.useSegm is not None:
-        p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
-        print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))
-    p.imgIds = list(np.unique(p.imgIds))
-    if p.useCats:
-        p.catIds = list(np.unique(p.catIds))
-    p.maxDets = sorted(p.maxDets)
-    self.params = p
-    self._prepare()
-    # loop through images, area range, max detection number
-    catIds = p.catIds if p.useCats else [-1]
-    if p.iouType == 'segm' or p.iouType == 'bbox':
-        computeIoU = self.computeIoU
-    elif p.iouType == 'keypoints':
-        computeIoU = self.computeOks
-    self.ious = {
-        (imgId, catId): computeIoU(imgId, catId)
-        for imgId in p.imgIds
-        for catId in catIds}
-    evaluateImg = self.evaluateImg
-    maxDet = p.maxDets[-1]
-    evalImgs = [
-        evaluateImg(imgId, catId, areaRng, maxDet)
-        for catId in catIds
-        for areaRng in p.areaRng
-        for imgId in p.imgIds
-    ]
-    # this is NOT in the pycocotools code, but could be done outside
-    evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds))
-    self._paramsEval = copy.deepcopy(self.params)
-    return p.imgIds, evalImgs
-#################################################################
-# end of straight copy from pycocotools, just removing the prints
-#################################################################

datasets/coco_panoptic.py DELETED Viewed

@@ -1,99 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import json
-from pathlib import Path
-import numpy as np
-import torch
-from PIL import Image
-from panopticapi.utils import rgb2id
-from util.box_ops import masks_to_boxes
-from .coco import make_coco_transforms
-class CocoPanoptic:
-    def __init__(self, img_folder, ann_folder, ann_file, transforms=None, return_masks=True):
-        with open(ann_file, 'r') as f:
-            self.coco = json.load(f)
-        # sort 'images' field so that they are aligned with 'annotations'
-        # i.e., in alphabetical order
-        self.coco['images'] = sorted(self.coco['images'], key=lambda x: x['id'])
-        # sanity check
-        if "annotations" in self.coco:
-            for img, ann in zip(self.coco['images'], self.coco['annotations']):
-                assert img['file_name'][:-4] == ann['file_name'][:-4]
-        self.img_folder = img_folder
-        self.ann_folder = ann_folder
-        self.ann_file = ann_file
-        self.transforms = transforms
-        self.return_masks = return_masks
-    def __getitem__(self, idx):
-        ann_info = self.coco['annotations'][idx] if "annotations" in self.coco else self.coco['images'][idx]
-        img_path = Path(self.img_folder) / ann_info['file_name'].replace('.png', '.jpg')
-        ann_path = Path(self.ann_folder) / ann_info['file_name']
-        img = Image.open(img_path).convert('RGB')
-        w, h = img.size
-        if "segments_info" in ann_info:
-            masks = np.asarray(Image.open(ann_path), dtype=np.uint32)
-            masks = rgb2id(masks)
-            ids = np.array([ann['id'] for ann in ann_info['segments_info']])
-            masks = masks == ids[:, None, None]
-            masks = torch.as_tensor(masks, dtype=torch.uint8)
-            labels = torch.tensor([ann['category_id'] for ann in ann_info['segments_info']], dtype=torch.int64)
-        target = {}
-        target['image_id'] = torch.tensor([ann_info['image_id'] if "image_id" in ann_info else ann_info["id"]])
-        if self.return_masks:
-            target['masks'] = masks
-        target['labels'] = labels
-        target["boxes"] = masks_to_boxes(masks)
-        target['size'] = torch.as_tensor([int(h), int(w)])
-        target['orig_size'] = torch.as_tensor([int(h), int(w)])
-        if "segments_info" in ann_info:
-            for name in ['iscrowd', 'area']:
-                target[name] = torch.tensor([ann[name] for ann in ann_info['segments_info']])
-        if self.transforms is not None:
-            img, target = self.transforms(img, target)
-        return img, target
-    def __len__(self):
-        return len(self.coco['images'])
-    def get_height_and_width(self, idx):
-        img_info = self.coco['images'][idx]
-        height = img_info['height']
-        width = img_info['width']
-        return height, width
-def build(image_set, args):
-    img_folder_root = Path(args.coco_path)
-    ann_folder_root = Path(args.coco_panoptic_path)
-    assert img_folder_root.exists(), f'provided COCO path {img_folder_root} does not exist'
-    assert ann_folder_root.exists(), f'provided COCO path {ann_folder_root} does not exist'
-    mode = 'panoptic'
-    PATHS = {
-        "train": ("train2017", Path("annotations") / f'{mode}_train2017.json'),
-        "val": ("val2017", Path("annotations") / f'{mode}_val2017.json'),
-    }
-    img_folder, ann_file = PATHS[image_set]
-    img_folder_path = img_folder_root / img_folder
-    ann_folder = ann_folder_root / f'{mode}_{img_folder}'
-    ann_file = ann_folder_root / ann_file
-    dataset = CocoPanoptic(img_folder_path, ann_folder, ann_file,
-                           transforms=make_coco_transforms(image_set), return_masks=args.masks)
-    return dataset

datasets/data_util.py DELETED Viewed

@@ -1,170 +0,0 @@
-import os
-import os.path as osp
-import shutil
-import time
-import datetime
-import torch
-from util.slconfig import SLConfig
-class Error(OSError):
-    pass
-def slcopytree(src, dst, symlinks=False, ignore=None, copy_function=shutil.copyfile,
-             ignore_dangling_symlinks=False):
-    """
-    modified from shutil.copytree without copystat.
-    Recursively copy a directory tree.
-    The destination directory must not already exist.
-    If exception(s) occur, an Error is raised with a list of reasons.
-    If the optional symlinks flag is true, symbolic links in the
-    source tree result in symbolic links in the destination tree; if
-    it is false, the contents of the files pointed to by symbolic
-    links are copied. If the file pointed by the symlink doesn't
-    exist, an exception will be added in the list of errors raised in
-    an Error exception at the end of the copy process.
-    You can set the optional ignore_dangling_symlinks flag to true if you
-    want to silence this exception. Notice that this has no effect on
-    platforms that don't support os.symlink.
-    The optional ignore argument is a callable. If given, it
-    is called with the `src` parameter, which is the directory
-    being visited by copytree(), and `names` which is the list of
-    `src` contents, as returned by os.listdir():
-        callable(src, names) -> ignored_names
-    Since copytree() is called recursively, the callable will be
-    called once for each directory that is copied. It returns a
-    list of names relative to the `src` directory that should
-    not be copied.
-    The optional copy_function argument is a callable that will be used
-    to copy each file. It will be called with the source path and the
-    destination path as arguments. By default, copy2() is used, but any
-    function that supports the same signature (like copy()) can be used.
-    """
-    errors = []
-    if os.path.isdir(src):
-        names = os.listdir(src)
-        if ignore is not None:
-            ignored_names = ignore(src, names)
-        else:
-            ignored_names = set()
-        os.makedirs(dst)
-        for name in names:
-            if name in ignored_names:
-                continue
-            srcname = os.path.join(src, name)
-            dstname = os.path.join(dst, name)
-            try:
-                if os.path.islink(srcname):
-                    linkto = os.readlink(srcname)
-                    if symlinks:
-                        # We can't just leave it to `copy_function` because legacy
-                        # code with a custom `copy_function` may rely on copytree
-                        # doing the right thing.
-                        os.symlink(linkto, dstname)
-                    else:
-                        # ignore dangling symlink if the flag is on
-                        if not os.path.exists(linkto) and ignore_dangling_symlinks:
-                            continue
-                        # otherwise let the copy occurs. copy2 will raise an error
-                        if os.path.isdir(srcname):
-                            slcopytree(srcname, dstname, symlinks, ignore,
-                                    copy_function)
-                        else:
-                            copy_function(srcname, dstname)
-                elif os.path.isdir(srcname):
-                    slcopytree(srcname, dstname, symlinks, ignore, copy_function)
-                else:
-                    # Will raise a SpecialFileError for unsupported file types
-                    copy_function(srcname, dstname)
-            # catch the Error from the recursive copytree so that we can
-            # continue with other files
-            except Error as err:
-                errors.extend(err.args[0])
-            except OSError as why:
-                errors.append((srcname, dstname, str(why)))
-    else:
-        copy_function(src, dst)
-    if errors:
-        raise Error(errors)
-    return dst
-def check_and_copy(src_path, tgt_path):
-    if os.path.exists(tgt_path):
-        return None
-    return slcopytree(src_path, tgt_path)
-def remove(srcpath):
-    if os.path.isdir(srcpath):
-        return shutil.rmtree(srcpath)
-    else:
-        return os.remove(srcpath)
-def preparing_dataset(pathdict, image_set, args):
-    start_time = time.time()
-    dataset_file = args.dataset_file
-    data_static_info = SLConfig.fromfile('util/static_data_path.py')
-    static_dict = data_static_info[dataset_file][image_set]
-    copyfilelist = []
-    for k,tgt_v in pathdict.items():
-        if os.path.exists(tgt_v):
-            if args.local_rank == 0:
-                print("path <{}> exist. remove it!".format(tgt_v))
-                remove(tgt_v)
-            # continue
-        if args.local_rank == 0:
-            src_v = static_dict[k]
-            assert isinstance(src_v, str)
-            if src_v.endswith('.zip'):
-                # copy
-                cp_tgt_dir = os.path.dirname(tgt_v)
-                filename = os.path.basename(src_v)
-                cp_tgt_path = os.path.join(cp_tgt_dir, filename)
-                print('Copy from <{}> to <{}>.'.format(src_v, cp_tgt_path))
-                os.makedirs(cp_tgt_dir, exist_ok=True)
-                check_and_copy(src_v, cp_tgt_path)
-                # unzip
-                import zipfile
-                print("Starting unzip <{}>".format(cp_tgt_path))
-                with zipfile.ZipFile(cp_tgt_path, 'r') as zip_ref:
-                    zip_ref.extractall(os.path.dirname(cp_tgt_path))
-                copyfilelist.append(cp_tgt_path)
-                copyfilelist.append(tgt_v)
-            else:
-                print('Copy from <{}> to <{}>.'.format(src_v, tgt_v))
-                os.makedirs(os.path.dirname(tgt_v), exist_ok=True)
-                check_and_copy(src_v, tgt_v)
-                copyfilelist.append(tgt_v)
-    if len(copyfilelist) == 0:
-        copyfilelist = None
-    args.copyfilelist = copyfilelist
-    if args.distributed:
-        torch.distributed.barrier()
-    total_time = time.time() - start_time
-    if copyfilelist:
-        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
-        print('Data copy time {}'.format(total_time_str))
-    return copyfilelist

datasets/dataset.py DELETED Viewed

@@ -1,44 +0,0 @@
-from __future__ import print_function
-import torch
-import torchvision.datasets as datasets
-from torch.utils.data import Dataset
-from PIL import Image
-from .tsv_io import TSVFile
-import numpy as np
-import base64
-import io
-class TSVDataset(Dataset):
-    """ TSV dataset for ImageNet 1K training
-    """
-    def __init__(self, tsv_file, transform=None, target_transform=None):
-        self.tsv = TSVFile(tsv_file)
-        self.transform = transform
-        self.target_transform = target_transform
-    def __getitem__(self, index):
-        """
-        Args:
-            index (int): Index
-        Returns:
-            tuple: (image, target) where target is class_index of the target class.
-        """
-        row = self.tsv.seek(index)
-        image_data = base64.b64decode(row[-1])
-        image = Image.open(io.BytesIO(image_data))
-        image = image.convert('RGB')
-        target = int(row[1])
-        if self.transform is not None:
-            img = self.transform(image)
-        else:
-            img = image
-        if self.target_transform is not None:
-            target = self.target_transform(target)
-        return img, target
-    def __len__(self):
-        return self.tsv.num_rows()

datasets/odvg.py DELETED Viewed

@@ -1,258 +0,0 @@
-from torchvision.datasets.vision import VisionDataset
-import os.path
-from typing import Callable, Optional
-import json
-from PIL import Image
-import torch
-import random
-import os, sys
-sys.path.append(os.path.dirname(sys.path[0]))
-import datasets.transforms as T
-class ODVGDataset(VisionDataset):
-    """
-    Args:
-        root (string): Root directory where images are downloaded to.
-        anno (string): Path to json annotation file.
-        label_map_anno (string):  Path to json label mapping file. Only for Object Detection
-        transform (callable, optional): A function/transform that  takes in an PIL image
-            and returns a transformed version. E.g, ``transforms.PILToTensor``
-        target_transform (callable, optional): A function/transform that takes in the
-            target and transforms it.
-        transforms (callable, optional): A function/transform that takes input sample and its target as entry
-            and returns a transformed version.
-    """
-    def __init__(
-        self,
-        root: str,
-        anno: str,
-        label_map_anno: str = None,
-        max_labels: int = 80,
-        transform: Optional[Callable] = None,
-        target_transform: Optional[Callable] = None,
-        transforms: Optional[Callable] = None,
-    ) -> None:
-        super().__init__(root, transforms, transform, target_transform)
-        self.root = root
-        self.dataset_mode = "OD" if label_map_anno else "VG"
-        self.max_labels = max_labels
-        if self.dataset_mode == "OD":
-            self.load_label_map(label_map_anno)
-        self._load_metas(anno)
-        self.get_dataset_info()
-    def load_label_map(self, label_map_anno):
-        with open(label_map_anno, 'r') as file:
-            self.label_map = json.load(file)
-        self.label_index = set(self.label_map.keys())
-    def _load_metas(self, anno):
-      with open(anno, 'r') as f:
-          self.metas = json.load(f)
-    def get_dataset_info(self):
-        print(f"  == total images: {len(self)}")
-        if self.dataset_mode == "OD":
-            print(f"  == total labels: {len(self.label_map)}")
-    def __getitem__(self, index: int):
-        meta = self.metas[index]
-        rel_path = meta["filename"]
-        abs_path = os.path.join(self.root, rel_path)
-        if not os.path.exists(abs_path):
-            raise FileNotFoundError(f"{abs_path} not found.")
-        image = Image.open(abs_path).convert('RGB')
-        w, h = image.size
-        if self.dataset_mode == "OD":
-            anno = meta["detection"]
-            instances = [obj for obj in anno["instances"]]
-            boxes = [obj["bbox"] for obj in instances]
-            # generate vg_labels
-            # pos bbox labels
-            ori_classes = [str(obj["label"]) for obj in instances]
-            pos_labels = set(ori_classes)
-            # neg bbox labels
-            neg_labels = self.label_index.difference(pos_labels)
-            vg_labels = list(pos_labels)
-            num_to_add = min(len(neg_labels), self.max_labels-len(pos_labels))
-            if num_to_add > 0:
-                vg_labels.extend(random.sample(neg_labels, num_to_add))
-            # shuffle
-            for i in range(len(vg_labels)-1, 0, -1):
-                j = random.randint(0, i)
-                vg_labels[i], vg_labels[j] = vg_labels[j], vg_labels[i]
-            caption_list = [self.label_map[lb] for lb in vg_labels]
-            caption_dict = {item:index for index, item in enumerate(caption_list)}
-            caption = ' . '.join(caption_list) + ' .'
-            classes = [caption_dict[self.label_map[str(obj["label"])]] for obj in instances]
-            boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
-            classes = torch.tensor(classes, dtype=torch.int64)
-        elif self.dataset_mode == "VG":
-            anno = meta["Grounding"]
-            instances = [obj for obj in anno["regions"]]
-            boxes = [obj["bbox"] for obj in instances]
-            caption_list = [obj["phrase"] for obj in instances]
-            c = list(zip(boxes, caption_list))
-            random.shuffle(c)
-            boxes[:], caption_list[:] = zip(*c)
-            uni_caption_list  = list(set(caption_list))
-            label_map = {}
-            for idx in range(len(uni_caption_list)):
-                label_map[uni_caption_list[idx]] = idx
-            classes = [label_map[cap] for cap in caption_list]
-            caption = ' . '.join(uni_caption_list) + ' .'
-            boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
-            classes = torch.tensor(classes, dtype=torch.int64)
-            caption_list = uni_caption_list
-            # print("caption_list" , caption_list)
-            # print("caption" , caption)
-            # print("boxes" , boxes)
-        target = {}
-        target["image_id"] = rel_path.strip(".jpg")
-        target["size"] = torch.as_tensor([int(h), int(w)])
-        target["cap_list"] = caption_list
-        target["caption"] = caption
-        target["boxes"] = boxes
-        target["labels"] = classes
-        # print(" image_id " , target["image_id"])
-        # size, cap_list, caption, bboxes, labels
-        if self.transforms is not None:
-            image, target = self.transforms(image, target)
-        return image, target
-    def __len__(self) -> int:
-        return len(self.metas)
-def make_coco_transforms(image_set, fix_size=False, strong_aug=False, args=None):
-    normalize = T.Compose([
-        T.ToTensor(),
-        T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
-    ])
-    # config the params for data aug
-    scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800]
-    max_size = 1333
-    scales2_resize = [400, 500, 600]
-    scales2_crop = [384, 600]
-    # update args from config files
-    scales = getattr(args, 'data_aug_scales', scales)
-    max_size = getattr(args, 'data_aug_max_size', max_size)
-    scales2_resize = getattr(args, 'data_aug_scales2_resize', scales2_resize)
-    scales2_crop = getattr(args, 'data_aug_scales2_crop', scales2_crop)
-    # resize them
-    data_aug_scale_overlap = getattr(args, 'data_aug_scale_overlap', None)
-    if data_aug_scale_overlap is not None and data_aug_scale_overlap > 0:
-        data_aug_scale_overlap = float(data_aug_scale_overlap)
-        scales = [int(i*data_aug_scale_overlap) for i in scales]
-        max_size = int(max_size*data_aug_scale_overlap)
-        scales2_resize = [int(i*data_aug_scale_overlap) for i in scales2_resize]
-        scales2_crop = [int(i*data_aug_scale_overlap) for i in scales2_crop]
-    # datadict_for_print = {
-    #     'scales': scales,
-    #     'max_size': max_size,
-    #     'scales2_resize': scales2_resize,
-    #     'scales2_crop': scales2_crop
-    # }
-    # print("data_aug_params:", json.dumps(datadict_for_print, indent=2))
-    if image_set == 'train':
-        if fix_size:
-            return T.Compose([
-                T.RandomHorizontalFlip(),
-                T.RandomResize([(max_size, max(scales))]),
-                normalize,
-            ])
-        if strong_aug:
-            import datasets.sltransform as SLT
-            return T.Compose([
-                T.RandomHorizontalFlip(),
-                T.RandomSelect(
-                    T.RandomResize(scales, max_size=max_size),
-                    T.Compose([
-                        T.RandomResize(scales2_resize),
-                        T.RandomSizeCrop(*scales2_crop),
-                        T.RandomResize(scales, max_size=max_size),
-                    ])
-                ),
-                SLT.RandomSelectMulti([
-                    SLT.RandomCrop(),
-                    SLT.LightingNoise(),
-                    SLT.AdjustBrightness(2),
-                    SLT.AdjustContrast(2),
-                ]),
-                normalize,
-            ])
-        return T.Compose([
-            T.RandomHorizontalFlip(),
-            T.RandomSelect(
-                T.RandomResize(scales, max_size=max_size),
-                T.Compose([
-                    T.RandomResize(scales2_resize),
-                    T.RandomSizeCrop(*scales2_crop),
-                    T.RandomResize(scales, max_size=max_size),
-                ])
-            ),
-            normalize,
-        ])
-    if image_set in ['val', 'eval_debug', 'train_reg', 'test']:
-        if os.environ.get("GFLOPS_DEBUG_SHILONG", False) == 'INFO':
-            print("Under debug mode for flops calculation only!!!!!!!!!!!!!!!!")
-            return T.Compose([
-                T.ResizeDebug((1280, 800)),
-                normalize,
-            ])
-        return T.Compose([
-            T.RandomResize([max(scales)], max_size=max_size),
-            normalize,
-        ])
-    raise ValueError(f'unknown {image_set}')
-def build_odvg(image_set, args, datasetinfo):
-    img_folder = datasetinfo["root"]
-    ann_file = datasetinfo["anno"]
-    label_map = datasetinfo["label_map"] if "label_map" in datasetinfo else None
-    try:
-        strong_aug = args.strong_aug
-    except:
-        strong_aug = False # False originally
-    print(img_folder, ann_file, label_map)
-    dataset = ODVGDataset(img_folder, ann_file, label_map, max_labels=args.max_labels,
-            transforms=make_coco_transforms(image_set, fix_size=args.fix_size, strong_aug=strong_aug, args=args),
-    )
-    return dataset
-if __name__=="__main__":
-    dataset_vg = ODVGDataset("path/GRIT-20M/data/","path/GRIT-20M/anno/grit_odvg_10k.jsonl",)
-    print(len(dataset_vg))
-    data = dataset_vg[random.randint(0, 100)]
-    print(data)
-    dataset_od = ODVGDataset("pathl/V3Det/",
-        "path/V3Det/annotations/v3det_2023_v1_all_odvg.jsonl",
-        "path/V3Det/annotations/v3det_label_map.json",
-    )
-    print(len(dataset_od))
-    data = dataset_od[random.randint(0, 100)]
-    print(data)

datasets/panoptic_eval.py DELETED Viewed

@@ -1,44 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import json
-import os
-import util.misc as utils
-try:
-    from panopticapi.evaluation import pq_compute
-except ImportError:
-    pass
-class PanopticEvaluator(object):
-    def __init__(self, ann_file, ann_folder, output_dir="panoptic_eval"):
-        self.gt_json = ann_file
-        self.gt_folder = ann_folder
-        if utils.is_main_process():
-            if not os.path.exists(output_dir):
-                os.mkdir(output_dir)
-        self.output_dir = output_dir
-        self.predictions = []
-    def update(self, predictions):
-        for p in predictions:
-            with open(os.path.join(self.output_dir, p["file_name"]), "wb") as f:
-                f.write(p.pop("png_string"))
-        self.predictions += predictions
-    def synchronize_between_processes(self):
-        all_predictions = utils.all_gather(self.predictions)
-        merged_predictions = []
-        for p in all_predictions:
-            merged_predictions += p
-        self.predictions = merged_predictions
-    def summarize(self):
-        if utils.is_main_process():
-            json_data = {"annotations": self.predictions}
-            predictions_json = os.path.join(self.output_dir, "predictions.json")
-            with open(predictions_json, "w") as f:
-                f.write(json.dumps(json_data))
-            return pq_compute(self.gt_json, predictions_json, gt_folder=self.gt_folder, pred_folder=self.output_dir)
-        return None

datasets/random_crop.py DELETED Viewed

@@ -1,135 +0,0 @@
-import PIL #version 1.2.0
-import torch
-import os
-import torchvision.transforms.functional as F
-import numpy as np
-import random
-def intersect(boxes1, boxes2):
-    '''
-        Find intersection of every box combination between two sets of box
-        boxes1: bounding boxes 1, a tensor of dimensions (n1, 4)
-        boxes2: bounding boxes 2, a tensor of dimensions (n2, 4)
-        Out: Intersection each of boxes1 with respect to each of boxes2,
-             a tensor of dimensions (n1, n2)
-    '''
-    n1 = boxes1.size(0)
-    n2 = boxes2.size(0)
-    max_xy =  torch.min(boxes1[:, 2:].unsqueeze(1).expand(n1, n2, 2),
-                        boxes2[:, 2:].unsqueeze(0).expand(n1, n2, 2))
-    min_xy = torch.max(boxes1[:, :2].unsqueeze(1).expand(n1, n2, 2),
-                       boxes2[:, :2].unsqueeze(0).expand(n1, n2, 2))
-    inter = torch.clamp(max_xy - min_xy , min=0)  # (n1, n2, 2)
-    return inter[:, :, 0] * inter[:, :, 1]  #(n1, n2)
-def find_IoU(boxes1, boxes2):
-    '''
-        Find IoU between every boxes set of boxes
-        boxes1: a tensor of dimensions (n1, 4) (left, top, right , bottom)
-        boxes2: a tensor of dimensions (n2, 4)
-        Out: IoU each of boxes1 with respect to each of boxes2, a tensor of
-             dimensions (n1, n2)
-        Formula:
-        (box1 ∩ box2) / (box1 u box2) = (box1 ∩ box2) / (area(box1) + area(box2) - (box1 ∩ box2 ))
-    '''
-    inter = intersect(boxes1, boxes2)
-    area_boxes1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
-    area_boxes2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
-    area_boxes1 = area_boxes1.unsqueeze(1).expand_as(inter) #(n1, n2)
-    area_boxes2 = area_boxes2.unsqueeze(0).expand_as(inter)  #(n1, n2)
-    union = (area_boxes1 + area_boxes2 - inter)
-    return inter / union
-def random_crop(image, boxes, labels, difficulties=None):
-    '''
-        image: A PIL image
-        boxes: Bounding boxes, a tensor of dimensions (#objects, 4)
-        labels: labels of object, a tensor of dimensions (#objects)
-        difficulties: difficulties of detect object, a tensor of dimensions (#objects)
-        Out: cropped image , new boxes, new labels, new difficulties
-    '''
-    if type(image) == PIL.Image.Image:
-        image = F.to_tensor(image)
-    original_h = image.size(1)
-    original_w = image.size(2)
-    while True:
-        mode = random.choice([0.1, 0.3, 0.5, 0.9, None])
-        if mode is None:
-            return F.to_pil_image(image), boxes, labels, difficulties
-        new_image = image
-        new_boxes = boxes
-        new_difficulties = difficulties
-        new_labels = labels
-        for _ in range(50):
-            # Crop dimensions: [0.3, 1] of original dimensions
-            new_h = random.uniform(0.3*original_h, original_h)
-            new_w = random.uniform(0.3*original_w, original_w)
-            # Aspect ratio constraint b/t .5 & 2
-            if new_h/new_w < 0.5 or new_h/new_w > 2:
-                continue
-            #Crop coordinate
-            left = random.uniform(0, original_w - new_w)
-            right = left + new_w
-            top = random.uniform(0, original_h - new_h)
-            bottom = top + new_h
-            crop = torch.FloatTensor([int(left), int(top), int(right), int(bottom)])
-            # Calculate IoU  between the crop and the bounding boxes
-            overlap = find_IoU(crop.unsqueeze(0), boxes) #(1, #objects)
-            overlap = overlap.squeeze(0)
-            # If not a single bounding box has a IoU of greater than the minimum, try again
-            if overlap.shape[0] == 0:
-                continue
-            if overlap.max().item() < mode:
-                continue
-            #Crop
-            new_image = image[:, int(top):int(bottom), int(left):int(right)] #(3, new_h, new_w)
-            #Center of bounding boxes
-            center_bb = (boxes[:, :2] + boxes[:, 2:])/2.0
-            #Find bounding box has been had center in crop
-            center_in_crop = (center_bb[:, 0] >left) * (center_bb[:, 0] < right
-                             ) *(center_bb[:, 1] > top) * (center_bb[:, 1] < bottom)    #( #objects)
-            if not center_in_crop.any():
-                continue
-            #take matching bounding box
-            new_boxes = boxes[center_in_crop, :]
-            #take matching labels
-            new_labels = labels[center_in_crop]
-            #take matching difficulities
-            if difficulties is not None:
-                new_difficulties = difficulties[center_in_crop]
-            else:
-                new_difficulties = None
-            #Use the box left and top corner or the crop's
-            new_boxes[:, :2] = torch.max(new_boxes[:, :2], crop[:2])
-            #adjust to crop
-            new_boxes[:, :2] -= crop[:2]
-            new_boxes[:, 2:] = torch.min(new_boxes[:, 2:],crop[2:])
-            #adjust to crop
-            new_boxes[:, 2:] -= crop[:2]
-            return F.to_pil_image(new_image), new_boxes, new_labels, new_difficulties

datasets/sltransform.py DELETED Viewed

@@ -1,247 +0,0 @@
-# modified from https://github.com/anhtuan85/Data-Augmentation-for-Object-Detection/blob/master/augmentation.ipynb
-import PIL #version 1.2.0
-from PIL import Image #version 6.1.0
-import torch
-import os
-import torchvision.transforms.functional as F
-import numpy as np
-import random
-from .random_crop import random_crop
-from util.box_ops import box_cxcywh_to_xyxy, box_xyxy_to_cxcywh
-class AdjustContrast:
-    def __init__(self, contrast_factor):
-        self.contrast_factor = contrast_factor
-    def __call__(self, img, target):
-        """
-        img (PIL Image or Tensor): Image to be adjusted.
-        """
-        _contrast_factor = ((random.random() + 1.0) / 2.0) * self.contrast_factor
-        img = F.adjust_contrast(img, _contrast_factor)
-        return img, target
-class AdjustBrightness:
-    def __init__(self, brightness_factor):
-        self.brightness_factor = brightness_factor
-    def __call__(self, img, target):
-        """
-        img (PIL Image or Tensor): Image to be adjusted.
-        """
-        _brightness_factor = ((random.random() + 1.0) / 2.0) * self.brightness_factor
-        img = F.adjust_brightness(img, _brightness_factor)
-        return img, target
-def lighting_noise(image):
-    '''
-        color channel swap in image
-        image: A PIL image
-    '''
-    new_image = image
-    perms = ((0, 1, 2), (0, 2, 1), (1, 0, 2),
-             (1, 2, 0), (2, 0, 1), (2, 1, 0))
-    swap = perms[random.randint(0, len(perms)- 1)]
-    new_image = F.to_tensor(new_image)
-    new_image = new_image[swap, :, :]
-    new_image = F.to_pil_image(new_image)
-    return new_image
-class LightingNoise:
-    def __init__(self) -> None:
-        pass
-    def __call__(self, img, target):
-        return lighting_noise(img), target
-def rotate(image, boxes, angle):
-    '''
-        Rotate image and bounding box
-        image: A Pil image (w, h)
-        boxes: A tensors of dimensions (#objects, 4)
-        Out: rotated image (w, h), rotated boxes
-    '''
-    new_image = image.copy()
-    new_boxes = boxes.clone()
-    #Rotate image, expand = True
-    w = image.width
-    h = image.height
-    cx = w/2
-    cy = h/2
-    new_image = new_image.rotate(angle, expand=True)
-    angle = np.radians(angle)
-    alpha = np.cos(angle)
-    beta = np.sin(angle)
-    #Get affine matrix
-    AffineMatrix = torch.tensor([[alpha, beta, (1-alpha)*cx - beta*cy],
-                                 [-beta, alpha, beta*cx + (1-alpha)*cy]])
-    #Rotation boxes
-    box_width = (boxes[:,2] - boxes[:,0]).reshape(-1,1)
-    box_height = (boxes[:,3] - boxes[:,1]).reshape(-1,1)
-    #Get corners for boxes
-    x1 = boxes[:,0].reshape(-1,1)
-    y1 = boxes[:,1].reshape(-1,1)
-    x2 = x1 + box_width
-    y2 = y1
-    x3 = x1
-    y3 = y1 + box_height
-    x4 = boxes[:,2].reshape(-1,1)
-    y4 = boxes[:,3].reshape(-1,1)
-    corners = torch.stack((x1,y1,x2,y2,x3,y3,x4,y4), dim= 1)
-    # corners.reshape(-1, 8)    #Tensors of dimensions (#objects, 8)
-    corners = corners.reshape(-1,2) #Tensors of dimension (4* #objects, 2)
-    corners = torch.cat((corners, torch.ones(corners.shape[0], 1)), dim= 1) #(Tensors of dimension (4* #objects, 3))
-    cos = np.abs(AffineMatrix[0, 0])
-    sin = np.abs(AffineMatrix[0, 1])
-    nW = int((h * sin) + (w * cos))
-    nH = int((h * cos) + (w * sin))
-    AffineMatrix[0, 2] += (nW / 2) - cx
-    AffineMatrix[1, 2] += (nH / 2) - cy
-    #Apply affine transform
-    rotate_corners = torch.mm(AffineMatrix, corners.t().to(torch.float64)).t()
-    rotate_corners = rotate_corners.reshape(-1,8)
-    x_corners = rotate_corners[:,[0,2,4,6]]
-    y_corners = rotate_corners[:,[1,3,5,7]]
-    #Get (x_min, y_min, x_max, y_max)
-    x_min, _ = torch.min(x_corners, dim= 1)
-    x_min = x_min.reshape(-1, 1)
-    y_min, _ = torch.min(y_corners, dim= 1)
-    y_min = y_min.reshape(-1, 1)
-    x_max, _ = torch.max(x_corners, dim= 1)
-    x_max = x_max.reshape(-1, 1)
-    y_max, _ = torch.max(y_corners, dim= 1)
-    y_max = y_max.reshape(-1, 1)
-    new_boxes = torch.cat((x_min, y_min, x_max, y_max), dim= 1)
-    scale_x = new_image.width / w
-    scale_y = new_image.height / h
-    #Resize new image to (w, h)
-    new_image = new_image.resize((w, h))
-    #Resize boxes
-    new_boxes /= torch.Tensor([scale_x, scale_y, scale_x, scale_y])
-    new_boxes[:, 0] = torch.clamp(new_boxes[:, 0], 0, w)
-    new_boxes[:, 1] = torch.clamp(new_boxes[:, 1], 0, h)
-    new_boxes[:, 2] = torch.clamp(new_boxes[:, 2], 0, w)
-    new_boxes[:, 3] = torch.clamp(new_boxes[:, 3], 0, h)
-    return new_image, new_boxes
-# def convert_xywh_to_xyxy(boxes: torch.Tensor):
-#     _boxes = boxes.clone()
-#     box_xy = _boxes[:, :2]
-#     box_wh = _boxes[:, 2:]
-#     box_x1y1 = box_xy - box_wh/2
-#     box_x2y2 = box_xy + box_wh/2
-#     box_xyxy = torch.cat((box_x1y1, box_x2y2), dim=-1)
-#     return box_xyxy
-class Rotate:
-    def __init__(self, angle=10) -> None:
-        self.angle = angle
-    def __call__(self, img, target):
-        w,h = img.size
-        whwh = torch.Tensor([w, h, w, h])
-        boxes_xyxy = box_cxcywh_to_xyxy(target['boxes']) * whwh
-        img, boxes_new = rotate(img, boxes_xyxy, self.angle)
-        target['boxes'] = box_xyxy_to_cxcywh(boxes_new).to(boxes_xyxy.dtype) / (whwh + 1e-3)
-        return img, target
-class RandomCrop:
-    def __init__(self) -> None:
-        pass
-    def __call__(self, img, target):
-        w,h = img.size
-        try:
-            boxes_xyxy = target['boxes']
-            labels = target['labels']
-            img, new_boxes, new_labels, _ = random_crop(img, boxes_xyxy, labels)
-            target['boxes'] = new_boxes
-            target['labels'] = new_labels
-        except Exception as e:
-            pass
-        return img, target
-class RandomCropDebug:
-    def __init__(self) -> None:
-        pass
-    def __call__(self, img, target):
-        boxes_xyxy = target['boxes'].clone()
-        labels = target['labels'].clone()
-        img, new_boxes, new_labels, _ = random_crop(img, boxes_xyxy, labels)
-        target['boxes'] = new_boxes
-        target['labels'] = new_labels
-        return img, target
-class RandomSelectMulti(object):
-    """
-    Randomly selects between transforms1 and transforms2,
-    """
-    def __init__(self, transformslist, p=-1):
-        self.transformslist = transformslist
-        self.p = p
-        assert p == -1
-    def __call__(self, img, target):
-        if self.p == -1:
-            return random.choice(self.transformslist)(img, target)
-class Albumentations:
-    def __init__(self):
-        import albumentations as A
-        self.transform = A.Compose([
-            A.Blur(p=0.01),
-            A.MedianBlur(p=0.01),
-            A.ToGray(p=0.01),
-            A.CLAHE(p=0.01),
-            A.RandomBrightnessContrast(p=0.005),
-            A.RandomGamma(p=0.005),
-            A.ImageCompression(quality_lower=75, p=0.005)],
-            bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))
-    def __call__(self, img, target, p=1.0):
-        """
-        Input:
-            target['boxes']: xyxy, unnormalized data.
-        """
-        boxes_raw = target['boxes']
-        labels_raw = target['labels']
-        img_np = np.array(img)
-        if self.transform and random.random() < p:
-            new_res = self.transform(image=img_np, bboxes=boxes_raw, class_labels=labels_raw)  # transformed
-            boxes_new = torch.Tensor(new_res['bboxes']).to(boxes_raw.dtype).reshape_as(boxes_raw)
-            img_np = new_res['image']
-            labels_new = torch.Tensor(new_res['class_labels']).to(labels_raw.dtype)
-        img_new = Image.fromarray(img_np)
-        target['boxes'] = boxes_new
-        target['labels'] = labels_new
-        return img_new, target

environment.yaml ADDED Viewed

	@@ -0,0 +1,248 @@

+name: dino
+channels:
+  - pytorch
+  - nvidia
+  - conda-forge
+  - defaults
+dependencies:
+  - addict=2.4.0=pyhd8ed1ab_2
+  - aiohttp=3.8.5=py39ha55989b_0
+  - aiosignal=1.3.1=pyhd8ed1ab_0
+  - asttokens=2.0.5=pyhd3eb1b0_0
+  - async-timeout=4.0.3=pyhd8ed1ab_0
+  - attrs=23.1.0=pyh71513ae_1
+  - aws-c-auth=0.7.0=h6f3c987_2
+  - aws-c-cal=0.6.0=h6ba3258_0
+  - aws-c-common=0.8.23=hcfcfb64_0
+  - aws-c-compression=0.2.17=h420beca_1
+  - aws-c-event-stream=0.3.1=had47b81_1
+  - aws-c-http=0.7.11=h72ba615_0
+  - aws-c-io=0.13.28=ha35c040_0
+  - aws-c-mqtt=0.8.14=h4941efa_2
+  - aws-c-s3=0.3.13=he04eaa7_2
+  - aws-c-sdkutils=0.1.11=h420beca_1
+  - aws-checksums=0.1.16=h420beca_1
+  - aws-crt-cpp=0.20.3=h247a981_4
+  - aws-sdk-cpp=1.10.57=h1a0519f_17
+  - backcall=0.2.0=pyhd3eb1b0_0
+  - blas=2.118=mkl
+  - blas-devel=3.9.0=18_win64_mkl
+  - brotli=1.0.9=hcfcfb64_9
+  - brotli-bin=1.0.9=hcfcfb64_9
+  - brotli-python=1.0.9=py39h99910a6_9
+  - bzip2=1.0.8=h8ffe710_4
+  - c-ares=1.19.1=hcfcfb64_0
+  - ca-certificates=2023.08.22=haa95532_0
+  - certifi=2023.7.22=py39haa95532_0
+  - charset-normalizer=3.2.0=pyhd8ed1ab_0
+  - click=8.1.7=win_pyh7428d3b_0
+  - colorama=0.4.6=pyhd8ed1ab_0
+  - comm=0.1.2=py39haa95532_0
+  - contourpy=1.1.1=py39h1f6ef14_1
+  - cuda-cccl=12.2.140=0
+  - cuda-cudart=11.8.89=0
+  - cuda-cudart-dev=11.8.89=0
+  - cuda-cupti=11.8.87=0
+  - cuda-libraries=11.8.0=0
+  - cuda-libraries-dev=11.8.0=0
+  - cuda-nvrtc=11.8.89=0
+  - cuda-nvrtc-dev=11.8.89=0
+  - cuda-nvtx=11.8.86=0
+  - cuda-profiler-api=12.2.140=0
+  - cuda-runtime=11.8.0=0
+  - cycler=0.11.0=pyhd8ed1ab_0
+  - cython=3.0.0=py39h2bbff1b_0
+  - dataclasses=0.8=pyhc8e2a94_3
+  - datasets=2.14.5=pyhd8ed1ab_0
+  - debugpy=1.6.7=py39hd77b12b_0
+  - decorator=5.1.1=pyhd3eb1b0_0
+  - dill=0.3.7=pyhd8ed1ab_0
+  - exceptiongroup=1.0.4=py39haa95532_0
+  - executing=0.8.3=pyhd3eb1b0_0
+  - filelock=3.12.4=pyhd8ed1ab_0
+  - fonttools=4.42.1=py39ha55989b_0
+  - freeglut=3.2.2=h63175ca_2
+  - freetype=2.12.1=hdaf720e_2
+  - frozenlist=1.4.0=py39ha55989b_1
+  - fsspec=2023.6.0=pyh1a96a4e_0
+  - gettext=0.21.1=h5728263_0
+  - glib=2.78.0=h12be248_0
+  - glib-tools=2.78.0=h12be248_0
+  - gst-plugins-base=1.22.6=h001b923_1
+  - gstreamer=1.22.6=hb4038d2_1
+  - huggingface_hub=0.17.3=pyhd8ed1ab_0
+  - icu=70.1=h0e60522_0
+  - idna=3.4=pyhd8ed1ab_0
+  - importlib-metadata=6.8.0=pyha770c72_0
+  - importlib-resources=6.1.0=pyhd8ed1ab_0
+  - importlib_metadata=6.8.0=hd8ed1ab_0
+  - importlib_resources=6.1.0=pyhd8ed1ab_0
+  - intel-openmp=2023.2.0=h57928b3_49503
+  - ipykernel=6.25.0=py39h9909e9c_0
+  - ipython=8.15.0=py39haa95532_0
+  - jasper=2.0.33=hc2e4405_1
+  - jedi=0.18.1=py39haa95532_1
+  - jinja2=3.1.2=pyhd8ed1ab_1
+  - joblib=1.3.2=pyhd8ed1ab_0
+  - jpeg=9e=hcfcfb64_3
+  - jupyter_client=8.1.0=py39haa95532_0
+  - jupyter_core=5.3.0=py39haa95532_0
+  - kiwisolver=1.4.5=py39h1f6ef14_1
+  - krb5=1.20.1=heb0366b_0
+  - lcms2=2.14=h90d422f_0
+  - lerc=4.0.0=h63175ca_0
+  - libabseil=20230125.3=cxx17_h63175ca_0
+  - libarrow=12.0.1=h12e5d06_5_cpu
+  - libblas=3.9.0=18_win64_mkl
+  - libbrotlicommon=1.0.9=hcfcfb64_9
+  - libbrotlidec=1.0.9=hcfcfb64_9
+  - libbrotlienc=1.0.9=hcfcfb64_9
+  - libcblas=3.9.0=18_win64_mkl
+  - libclang=15.0.7=default_h77d9078_3
+  - libclang13=15.0.7=default_h77d9078_3
+  - libcrc32c=1.1.2=h0e60522_0
+  - libcublas=11.11.3.6=0
+  - libcublas-dev=11.11.3.6=0
+  - libcufft=10.9.0.58=0
+  - libcufft-dev=10.9.0.58=0
+  - libcurand=10.3.3.141=0
+  - libcurand-dev=10.3.3.141=0
+  - libcurl=8.1.2=h68f0423_0
+  - libcusolver=11.4.1.48=0
+  - libcusolver-dev=11.4.1.48=0
+  - libcusparse=11.7.5.86=0
+  - libcusparse-dev=11.7.5.86=0
+  - libdeflate=1.14=hcfcfb64_0
+  - libevent=2.1.12=h3671451_1
+  - libffi=3.4.2=h8ffe710_5
+  - libglib=2.78.0=he8f3873_0
+  - libgoogle-cloud=2.12.0=h00b2bdc_1
+  - libgrpc=1.54.3=ha177ca7_0
+  - libhwloc=2.9.3=default_haede6df_1009
+  - libiconv=1.17=h8ffe710_0
+  - liblapack=3.9.0=18_win64_mkl
+  - liblapacke=3.9.0=18_win64_mkl
+  - libnpp=11.8.0.86=0
+  - libnpp-dev=11.8.0.86=0
+  - libnvjpeg=11.9.0.86=0
+  - libnvjpeg-dev=11.9.0.86=0
+  - libogg=1.3.4=h8ffe710_1
+  - libopencv=4.5.3=py39h488c12c_8
+  - libpng=1.6.39=h19919ed_0
+  - libprotobuf=3.21.12=h12be248_2
+  - libsodium=1.0.18=h62dcd97_0
+  - libsqlite=3.43.0=hcfcfb64_0
+  - libssh2=1.11.0=h7dfc565_0
+  - libthrift=0.18.1=h06f6336_2
+  - libtiff=4.4.0=hc4f729c_5
+  - libutf8proc=2.8.0=h82a8f57_0
+  - libuv=1.44.2=hcfcfb64_1
+  - libvorbis=1.3.7=h0e60522_0
+  - libwebp-base=1.3.2=hcfcfb64_0
+  - libxcb=1.13=hcd874cb_1004
+  - libxml2=2.11.5=hc3477c8_1
+  - libzlib=1.2.13=hcfcfb64_5
+  - lz4-c=1.9.4=hcfcfb64_0
+  - m2w64-gcc-libgfortran=5.3.0=6
+  - m2w64-gcc-libs=5.3.0=7
+  - m2w64-gcc-libs-core=5.3.0=7
+  - m2w64-gmp=6.1.0=2
+  - m2w64-libwinpthread-git=5.0.0.4634.697f757=2
+  - markupsafe=2.1.3=py39ha55989b_1
+  - matplotlib-base=3.8.0=py39hf19769e_1
+  - matplotlib-inline=0.1.6=py39haa95532_0
+  - mkl=2022.1.0=h6a75c08_874
+  - mkl-devel=2022.1.0=h57928b3_875
+  - mkl-include=2022.1.0=h6a75c08_874
+  - mpmath=1.3.0=pyhd8ed1ab_0
+  - msys2-conda-epoch=20160418=1
+  - multidict=6.0.4=py39ha55989b_0
+  - multiprocess=0.70.15=py39ha55989b_1
+  - munkres=1.1.4=pyh9f0ad1d_0
+  - nest-asyncio=1.5.6=py39haa95532_0
+  - networkx=3.1=pyhd8ed1ab_0
+  - numpy=1.26.0=py39hddb5d58_0
+  - opencv=4.5.3=py39hcbf5309_8
+  - openjpeg=2.5.0=hc9384bd_1
+  - openssl=3.1.3=hcfcfb64_0
+  - orc=1.9.0=hada7b9e_1
+  - packaging=23.1=pyhd8ed1ab_0
+  - pandas=2.1.1=py39h32e6231_0
+  - parso=0.8.3=pyhd3eb1b0_0
+  - pcre2=10.40=h17e33f8_0
+  - pickleshare=0.7.5=pyhd3eb1b0_1003
+  - pillow=9.2.0=py39h595c93f_3
+  - pip=23.2.1=pyhd8ed1ab_0
+  - platformdirs=3.10.0=pyhd8ed1ab_0
+  - prompt-toolkit=3.0.36=py39haa95532_0
+  - psutil=5.9.0=py39h2bbff1b_0
+  - pthread-stubs=0.4=hcd874cb_1001
+  - pthreads-win32=2.9.1=hfa6e2cd_3
+  - pure_eval=0.2.2=pyhd3eb1b0_0
+  - py-opencv=4.5.3=py39h00e5391_8
+  - pyarrow=12.0.1=py39hca4e8af_5_cpu
+  - pycocotools=2.0.6=py39hc266a54_1
+  - pygments=2.15.1=py39haa95532_1
+  - pyparsing=3.1.1=pyhd8ed1ab_0
+  - pysocks=1.7.1=pyh0701188_6
+  - python=3.9.18=h4de0772_0_cpython
+  - python-dateutil=2.8.2=pyhd8ed1ab_0
+  - python-tzdata=2023.3=pyhd8ed1ab_0
+  - python-xxhash=3.3.0=py39ha55989b_1
+  - python_abi=3.9=4_cp39
+  - pytorch=2.0.1=py3.9_cuda11.8_cudnn8_0
+  - pytorch-cuda=11.8=h24eeafa_5
+  - pytorch-mutex=1.0=cuda
+  - pytz=2023.3.post1=pyhd8ed1ab_0
+  - pywin32=305=py39h2bbff1b_0
+  - pyyaml=6.0.1=py39ha55989b_1
+  - pyzmq=25.1.0=py39hd77b12b_0
+  - qt-main=5.15.8=h720456b_6
+  - re2=2023.03.02=hd4eee63_0
+  - regex=2023.8.8=py39ha55989b_1
+  - requests=2.31.0=pyhd8ed1ab_0
+  - sacremoses=0.0.53=pyhd8ed1ab_0
+  - safetensors=0.3.3=py39hf21820d_1
+  - setuptools=68.2.2=pyhd8ed1ab_0
+  - six=1.16.0=pyh6c4a22f_0
+  - snappy=1.1.10=hfb803bf_0
+  - stack_data=0.2.0=pyhd3eb1b0_0
+  - sympy=1.12=pyh04b8f61_3
+  - tbb=2021.10.0=h91493d7_1
+  - timm=0.9.7=pyhd8ed1ab_0
+  - tk=8.6.13=hcfcfb64_0
+  - tokenizers=0.13.3=py39hca44cb7_0
+  - tomli=2.0.1=pyhd8ed1ab_0
+  - tornado=6.3.2=py39h2bbff1b_0
+  - tqdm=4.66.1=pyhd8ed1ab_0
+  - traitlets=5.7.1=py39haa95532_0
+  - transformers=4.33.2=pyhd8ed1ab_0
+  - typing-extensions=4.8.0=hd8ed1ab_0
+  - typing_extensions=4.8.0=pyha770c72_0
+  - tzdata=2023c=h71feb2d_0
+  - ucrt=10.0.22621.0=h57928b3_0
+  - unicodedata2=15.0.0=py39ha55989b_1
+  - urllib3=2.0.5=pyhd8ed1ab_0
+  - vc=14.3=h64f974e_17
+  - vc14_runtime=14.36.32532=hdcecf7f_17
+  - vs2015_runtime=14.36.32532=h05e6639_17
+  - wcwidth=0.2.5=pyhd3eb1b0_0
+  - wheel=0.41.2=pyhd8ed1ab_0
+  - win_inet_pton=1.1.0=pyhd8ed1ab_6
+  - xorg-libxau=1.0.11=hcd874cb_0
+  - xorg-libxdmcp=1.1.3=hcd874cb_0
+  - xxhash=0.8.2=hcfcfb64_0
+  - xz=5.2.6=h8d14728_0
+  - yaml=0.2.5=h8ffe710_2
+  - yapf=0.40.1=pyhd8ed1ab_0
+  - yarl=1.9.2=py39ha55989b_0
+  - zeromq=4.3.4=hd77b12b_0
+  - zipp=3.17.0=pyhd8ed1ab_0
+  - zlib=1.2.13=hcfcfb64_5
+  - zstd=1.5.5=h12be248_0
+  - pip:
+      - opencv-python==4.8.0.76
+      - supervision==0.6.0
+      - torchaudio==2.0.2
+      - torchvision==0.15.2
+prefix: C:\Users\Makoto\miniconda3\envs\dino

groundingdino.egg-info/PKG-INFO ADDED Viewed

	@@ -0,0 +1,213 @@

+Metadata-Version: 2.1
+Name: groundingdino
+Version: 0.1.0
+Summary: open-set object detector
+Home-page: https://github.com/IDEA-Research/GroundingDINO
+Author: International Digital Economy Academy, Shilong Liu
+License:                                  Apache License
+                                   Version 2.0, January 2004
+                                http://www.apache.org/licenses/
+           TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+           1. Definitions.
+              "License" shall mean the terms and conditions for use, reproduction,
+              and distribution as defined by Sections 1 through 9 of this document.
+              "Licensor" shall mean the copyright owner or entity authorized by
+              the copyright owner that is granting the License.
+              "Legal Entity" shall mean the union of the acting entity and all
+              other entities that control, are controlled by, or are under common
+              control with that entity. For the purposes of this definition,
+              "control" means (i) the power, direct or indirect, to cause the
+              direction or management of such entity, whether by contract or
+              otherwise, or (ii) ownership of fifty percent (50%) or more of the
+              outstanding shares, or (iii) beneficial ownership of such entity.
+              "You" (or "Your") shall mean an individual or Legal Entity
+              exercising permissions granted by this License.
+              "Source" form shall mean the preferred form for making modifications,
+              including but not limited to software source code, documentation
+              source, and configuration files.
+              "Object" form shall mean any form resulting from mechanical
+              transformation or translation of a Source form, including but
+              not limited to compiled object code, generated documentation,
+              and conversions to other media types.
+              "Work" shall mean the work of authorship, whether in Source or
+              Object form, made available under the License, as indicated by a
+              copyright notice that is included in or attached to the work
+              (an example is provided in the Appendix below).
+              "Derivative Works" shall mean any work, whether in Source or Object
+              form, that is based on (or derived from) the Work and for which the
+              editorial revisions, annotations, elaborations, or other modifications
+              represent, as a whole, an original work of authorship. For the purposes
+              of this License, Derivative Works shall not include works that remain
+              separable from, or merely link (or bind by name) to the interfaces of,
+              the Work and Derivative Works thereof.
+              "Contribution" shall mean any work of authorship, including
+              the original version of the Work and any modifications or additions
+              to that Work or Derivative Works thereof, that is intentionally
+              submitted to Licensor for inclusion in the Work by the copyright owner
+              or by an individual or Legal Entity authorized to submit on behalf of
+              the copyright owner. For the purposes of this definition, "submitted"
+              means any form of electronic, verbal, or written communication sent
+              to the Licensor or its representatives, including but not limited to
+              communication on electronic mailing lists, source code control systems,
+              and issue tracking systems that are managed by, or on behalf of, the
+              Licensor for the purpose of discussing and improving the Work, but
+              excluding communication that is conspicuously marked or otherwise
+              designated in writing by the copyright owner as "Not a Contribution."
+              "Contributor" shall mean Licensor and any individual or Legal Entity
+              on behalf of whom a Contribution has been received by Licensor and
+              subsequently incorporated within the Work.
+           2. Grant of Copyright License. Subject to the terms and conditions of
+              this License, each Contributor hereby grants to You a perpetual,
+              worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+              copyright license to reproduce, prepare Derivative Works of,
+              publicly display, publicly perform, sublicense, and distribute the
+              Work and such Derivative Works in Source or Object form.
+           3. Grant of Patent License. Subject to the terms and conditions of
+              this License, each Contributor hereby grants to You a perpetual,
+              worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+              (except as stated in this section) patent license to make, have made,
+              use, offer to sell, sell, import, and otherwise transfer the Work,
+              where such license applies only to those patent claims licensable
+              by such Contributor that are necessarily infringed by their
+              Contribution(s) alone or by combination of their Contribution(s)
+              with the Work to which such Contribution(s) was submitted. If You
+              institute patent litigation against any entity (including a
+              cross-claim or counterclaim in a lawsuit) alleging that the Work
+              or a Contribution incorporated within the Work constitutes direct
+              or contributory patent infringement, then any patent licenses
+              granted to You under this License for that Work shall terminate
+              as of the date such litigation is filed.
+           4. Redistribution. You may reproduce and distribute copies of the
+              Work or Derivative Works thereof in any medium, with or without
+              modifications, and in Source or Object form, provided that You
+              meet the following conditions:
+              (a) You must give any other recipients of the Work or
+                  Derivative Works a copy of this License; and
+              (b) You must cause any modified files to carry prominent notices
+                  stating that You changed the files; and
+              (c) You must retain, in the Source form of any Derivative Works
+                  that You distribute, all copyright, patent, trademark, and
+                  attribution notices from the Source form of the Work,
+                  excluding those notices that do not pertain to any part of
+                  the Derivative Works; and
+              (d) If the Work includes a "NOTICE" text file as part of its
+                  distribution, then any Derivative Works that You distribute must
+                  include a readable copy of the attribution notices contained
+                  within such NOTICE file, excluding those notices that do not
+                  pertain to any part of the Derivative Works, in at least one
+                  of the following places: within a NOTICE text file distributed
+                  as part of the Derivative Works; within the Source form or
+                  documentation, if provided along with the Derivative Works; or,
+                  within a display generated by the Derivative Works, if and
+                  wherever such third-party notices normally appear. The contents
+                  of the NOTICE file are for informational purposes only and
+                  do not modify the License. You may add Your own attribution
+                  notices within Derivative Works that You distribute, alongside
+                  or as an addendum to the NOTICE text from the Work, provided
+                  that such additional attribution notices cannot be construed
+                  as modifying the License.
+              You may add Your own copyright statement to Your modifications and
+              may provide additional or different license terms and conditions
+              for use, reproduction, or distribution of Your modifications, or
+              for any such Derivative Works as a whole, provided Your use,
+              reproduction, and distribution of the Work otherwise complies with
+              the conditions stated in this License.
+           5. Submission of Contributions. Unless You explicitly state otherwise,
+              any Contribution intentionally submitted for inclusion in the Work
+              by You to the Licensor shall be under the terms and conditions of
+              this License, without any additional terms or conditions.
+              Notwithstanding the above, nothing herein shall supersede or modify
+              the terms of any separate license agreement you may have executed
+              with Licensor regarding such Contributions.
+           6. Trademarks. This License does not grant permission to use the trade
+              names, trademarks, service marks, or product names of the Licensor,
+              except as required for reasonable and customary use in describing the
+              origin of the Work and reproducing the content of the NOTICE file.
+           7. Disclaimer of Warranty. Unless required by applicable law or
+              agreed to in writing, Licensor provides the Work (and each
+              Contributor provides its Contributions) on an "AS IS" BASIS,
+              WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+              implied, including, without limitation, any warranties or conditions
+              of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+              PARTICULAR PURPOSE. You are solely responsible for determining the
+              appropriateness of using or redistributing the Work and assume any
+              risks associated with Your exercise of permissions under this License.
+           8. Limitation of Liability. In no event and under no legal theory,
+              whether in tort (including negligence), contract, or otherwise,
+              unless required by applicable law (such as deliberate and grossly
+              negligent acts) or agreed to in writing, shall any Contributor be
+              liable to You for damages, including any direct, indirect, special,
+              incidental, or consequential damages of any character arising as a
+              result of this License or out of the use or inability to use the
+              Work (including but not limited to damages for loss of goodwill,
+              work stoppage, computer failure or malfunction, or any and all
+              other commercial damages or losses), even if such Contributor
+              has been advised of the possibility of such damages.
+           9. Accepting Warranty or Additional Liability. While redistributing
+              the Work or Derivative Works thereof, You may choose to offer,
+              and charge a fee for, acceptance of support, warranty, indemnity,
+              or other liability obligations and/or rights consistent with this
+              License. However, in accepting such obligations, You may act only
+              on Your own behalf and on Your sole responsibility, not on behalf
+              of any other Contributor, and only if You agree to indemnify,
+              defend, and hold each Contributor harmless for any liability
+              incurred by, or claims asserted against, such Contributor by reason
+              of your accepting any such warranty or additional liability.
+           END OF TERMS AND CONDITIONS
+           APPENDIX: How to apply the Apache License to your work.
+              To apply the Apache License to your work, attach the following
+              boilerplate notice, with the fields enclosed by brackets "[]"
+              replaced with your own identifying information. (Don't include
+              the brackets!)  The text should be enclosed in the appropriate
+              comment syntax for the file format. We also recommend that a
+              file or class name and description of purpose be included on the
+              same "printed page" as the copyright notice for easier
+              identification within third-party archives.
+           Copyright 2023 - present, IDEA Research.
+           Licensed under the Apache License, Version 2.0 (the "License");
+           you may not use this file except in compliance with the License.
+           You may obtain a copy of the License at
+               http://www.apache.org/licenses/LICENSE-2.0
+           Unless required by applicable law or agreed to in writing, software
+           distributed under the License is distributed on an "AS IS" BASIS,
+           WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+           See the License for the specific language governing permissions and
+           limitations under the License.
+Platform: UNKNOWN
+License-File: LICENSE
+UNKNOWN

groundingdino.egg-info/SOURCES.txt ADDED Viewed

	@@ -0,0 +1,46 @@

+LICENSE
+README.md
+setup.py
+/home/jamada/jupyterlab/projects/gdino-peft/gdino-official/GroundingDINO/groundingdino/models/GroundingDINO/csrc/cuda_version.cu
+/home/jamada/jupyterlab/projects/gdino-peft/gdino-official/GroundingDINO/groundingdino/models/GroundingDINO/csrc/vision.cpp
+/home/jamada/jupyterlab/projects/gdino-peft/gdino-official/GroundingDINO/groundingdino/models/GroundingDINO/csrc/MsDeformAttn/ms_deform_attn_cpu.cpp
+/home/jamada/jupyterlab/projects/gdino-peft/gdino-official/GroundingDINO/groundingdino/models/GroundingDINO/csrc/MsDeformAttn/ms_deform_attn_cuda.cu
+groundingdino/__init__.py
+groundingdino/version.py
+groundingdino.egg-info/PKG-INFO
+groundingdino.egg-info/SOURCES.txt
+groundingdino.egg-info/dependency_links.txt
+groundingdino.egg-info/requires.txt
+groundingdino.egg-info/top_level.txt
+groundingdino/config/GroundingDINO_SwinB_cfg.py
+groundingdino/config/GroundingDINO_SwinT_OGC.py
+groundingdino/config/__init__.py
+groundingdino/datasets/__init__.py
+groundingdino/datasets/cocogrounding_eval.py
+groundingdino/datasets/transforms.py
+groundingdino/models/__init__.py
+groundingdino/models/registry.py
+groundingdino/models/GroundingDINO/__init__.py
+groundingdino/models/GroundingDINO/bertwarper.py
+groundingdino/models/GroundingDINO/fuse_modules.py
+groundingdino/models/GroundingDINO/groundingdino.py
+groundingdino/models/GroundingDINO/ms_deform_attn.py
+groundingdino/models/GroundingDINO/transformer.py
+groundingdino/models/GroundingDINO/transformer_vanilla.py
+groundingdino/models/GroundingDINO/utils.py
+groundingdino/models/GroundingDINO/backbone/__init__.py
+groundingdino/models/GroundingDINO/backbone/backbone.py
+groundingdino/models/GroundingDINO/backbone/position_encoding.py
+groundingdino/models/GroundingDINO/backbone/swin_transformer.py
+groundingdino/util/__init__.py
+groundingdino/util/box_ops.py
+groundingdino/util/get_tokenlizer.py
+groundingdino/util/inference.py
+groundingdino/util/logger.py
+groundingdino/util/misc.py
+groundingdino/util/slconfig.py
+groundingdino/util/slio.py
+groundingdino/util/time_counter.py
+groundingdino/util/utils.py
+groundingdino/util/visualizer.py
+groundingdino/util/vl_utils.py

groundingdino.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+

groundingdino.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+addict
+numpy
+opencv-python
+pycocotools
+supervision
+timm
+torch
+torchvision
+transformers
+yapf

groundingdino.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ groundingdino

groundingdino/.ipynb_checkpoints/__init__-checkpoint.py ADDED Viewed

File without changes

groundingdino/.ipynb_checkpoints/version-checkpoint.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ __version__ = '0.1.0'

groundingdino/__init__.py ADDED Viewed

File without changes

groundingdino/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (182 Bytes). View file

groundingdino/config/.ipynb_checkpoints/GroundingDINO_SwinB_cfg-checkpoint.py ADDED Viewed

	@@ -0,0 +1,43 @@

+batch_size = 1
+modelname = "groundingdino"
+backbone = "swin_B_384_22k"
+position_embedding = "sine"
+pe_temperatureH = 20
+pe_temperatureW = 20
+return_interm_indices = [1, 2, 3]
+backbone_freeze_keywords = None
+enc_layers = 6
+dec_layers = 6
+pre_norm = False
+dim_feedforward = 2048
+hidden_dim = 256
+dropout = 0.0
+nheads = 8
+num_queries = 900
+query_dim = 4
+num_patterns = 0
+num_feature_levels = 4
+enc_n_points = 4
+dec_n_points = 4
+two_stage_type = "standard"
+two_stage_bbox_embed_share = False
+two_stage_class_embed_share = False
+transformer_activation = "relu"
+dec_pred_bbox_embed_share = True
+dn_box_noise_scale = 1.0
+dn_label_noise_ratio = 0.5
+dn_label_coef = 1.0
+dn_bbox_coef = 1.0
+embed_init_tgt = True
+dn_labelbook_size = 2000
+max_text_len = 256
+text_encoder_type = "bert-base-uncased"
+use_text_enhancer = True
+use_fusion_layer = True
+use_checkpoint = True
+use_transformer_ckpt = True
+use_text_cross_attention = True
+text_dropout = 0.0
+fusion_dropout = 0.0
+fusion_droppath = 0.1
+sub_sentence_present = True

groundingdino/config/GroundingDINO_SwinB_cfg.py ADDED Viewed

	@@ -0,0 +1,43 @@

+batch_size = 1
+modelname = "groundingdino"
+backbone = "swin_B_384_22k"
+position_embedding = "sine"
+pe_temperatureH = 20
+pe_temperatureW = 20
+return_interm_indices = [1, 2, 3]
+backbone_freeze_keywords = None
+enc_layers = 6
+dec_layers = 6
+pre_norm = False
+dim_feedforward = 2048
+hidden_dim = 256
+dropout = 0.0
+nheads = 8
+num_queries = 900
+query_dim = 4
+num_patterns = 0
+num_feature_levels = 4
+enc_n_points = 4
+dec_n_points = 4
+two_stage_type = "standard"
+two_stage_bbox_embed_share = False
+two_stage_class_embed_share = False
+transformer_activation = "relu"
+dec_pred_bbox_embed_share = True
+dn_box_noise_scale = 1.0
+dn_label_noise_ratio = 0.5
+dn_label_coef = 1.0
+dn_bbox_coef = 1.0
+embed_init_tgt = True
+dn_labelbook_size = 2000
+max_text_len = 256
+text_encoder_type = "bert-base-uncased"
+use_text_enhancer = True
+use_fusion_layer = True
+use_checkpoint = True
+use_transformer_ckpt = True
+use_text_cross_attention = True
+text_dropout = 0.0
+fusion_dropout = 0.0
+fusion_droppath = 0.1
+sub_sentence_present = True

groundingdino/config/GroundingDINO_SwinT_OGC.py ADDED Viewed

	@@ -0,0 +1,43 @@

+batch_size = 1
+modelname = "groundingdino"
+backbone = "swin_T_224_1k"
+position_embedding = "sine"
+pe_temperatureH = 20
+pe_temperatureW = 20
+return_interm_indices = [1, 2, 3]
+backbone_freeze_keywords = None
+enc_layers = 6
+dec_layers = 6
+pre_norm = False
+dim_feedforward = 2048
+hidden_dim = 256
+dropout = 0.0
+nheads = 8
+num_queries = 900
+query_dim = 4
+num_patterns = 0
+num_feature_levels = 4
+enc_n_points = 4
+dec_n_points = 4
+two_stage_type = "standard"
+two_stage_bbox_embed_share = False
+two_stage_class_embed_share = False
+transformer_activation = "relu"
+dec_pred_bbox_embed_share = True
+dn_box_noise_scale = 1.0
+dn_label_noise_ratio = 0.5
+dn_label_coef = 1.0
+dn_bbox_coef = 1.0
+embed_init_tgt = True
+dn_labelbook_size = 2000
+max_text_len = 256
+text_encoder_type = "bert-base-uncased"
+use_text_enhancer = True
+use_fusion_layer = True
+use_checkpoint = True
+use_transformer_ckpt = True
+use_text_cross_attention = True
+text_dropout = 0.0
+fusion_dropout = 0.0
+fusion_droppath = 0.1
+sub_sentence_present = True

groundingdino/config/__init__.py ADDED Viewed

File without changes

groundingdino/datasets/__init__.py ADDED Viewed

File without changes

groundingdino/datasets/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (191 Bytes). View file

groundingdino/datasets/__pycache__/transforms.cpython-310.pyc ADDED Viewed

Binary file (10.2 kB). View file

{datasets → groundingdino/datasets}/cocogrounding_eval.py RENAMED Viewed

@@ -45,7 +45,7 @@ class CocoGroundingEvaluator(object):
     def update(self, predictions):
         img_ids = list(np.unique(list(predictions.keys())))
         self.img_ids.extend(img_ids)
-        # import pdb;pdb.set_trace()
         for iou_type in self.iou_types:
             results = self.prepare(predictions, iou_type)
@@ -223,8 +223,6 @@ def evaluate(self):
     """
     # tic = time.time()
     # print('Running per image evaluation...')
-    # import pdb;pdb.set_trace()
     p = self.params
     # add backward compatibility if useSegm is specified in params
     if p.useSegm is not None:

     def update(self, predictions):
         img_ids = list(np.unique(list(predictions.keys())))
         self.img_ids.extend(img_ids)
         for iou_type in self.iou_types:
             results = self.prepare(predictions, iou_type)
     """
     # tic = time.time()
     # print('Running per image evaluation...')
     p = self.params
     # add backward compatibility if useSegm is specified in params
     if p.useSegm is not None: