Spaces:

ZebangCheng
/

Emotion-LLaMA

Running on A10G

+"""
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+from minigpt4.datasets.builders.base_dataset_builder import load_dataset_config
+from minigpt4.datasets.builders.image_text_pair_builder import (
+    FirstfaceCaptionBuilder,
+)
+from minigpt4.common.registry import registry
+__all__ = [
+    "FirstfaceCaptionBuilder",
+]
+def load_dataset(name, cfg_path=None, vis_path=None, data_type=None):
+    """
+    Example
+    >>> dataset = load_dataset("coco_caption", cfg=None)
+    >>> splits = dataset.keys()
+    >>> print([len(dataset[split]) for split in splits])
+    """
+    if cfg_path is None:
+        cfg = None
+    else:
+        cfg = load_dataset_config(cfg_path)
+    try:
+        builder = registry.get_builder_class(name)(cfg)
+    except TypeError:
+        print(
+            f"Dataset {name} not found. Available datasets:\n"
+            + ", ".join([str(k) for k in dataset_zoo.get_names()])
+        )
+        exit(1)
+    if vis_path is not None:
+        if data_type is None:
+            # use default data type in the config
+            data_type = builder.config.data_type
+        assert (
+            data_type in builder.config.build_info
+        ), f"Invalid data_type {data_type} for {name}."
+        builder.config.build_info.get(data_type).storage = vis_path
+    dataset = builder.build_datasets()
+    return dataset
+class DatasetZoo:
+    def __init__(self) -> None:
+        self.dataset_zoo = {
+            k: list(v.DATASET_CONFIG_DICT.keys())
+            for k, v in sorted(registry.mapping["builder_name_mapping"].items())
+        }
+    def get_names(self):
+        return list(self.dataset_zoo.keys())
+dataset_zoo = DatasetZoo()

minigpt4/datasets/builders/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (2.33 kB). View file

minigpt4/datasets/builders/__pycache__/base_dataset_builder.cpython-39.pyc ADDED Viewed

Binary file (6.12 kB). View file

minigpt4/datasets/builders/__pycache__/image_text_pair_builder.cpython-39.pyc ADDED Viewed

Binary file (1.49 kB). View file

minigpt4/datasets/builders/base_dataset_builder.py ADDED Viewed

	@@ -0,0 +1,237 @@

+"""
+ This file is from
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+import logging
+import os
+import shutil
+import warnings
+from omegaconf import OmegaConf
+import torch.distributed as dist
+from torchvision.datasets.utils import download_url
+import minigpt4.common.utils as utils
+from minigpt4.common.dist_utils import is_dist_avail_and_initialized, is_main_process
+from minigpt4.common.registry import registry
+from minigpt4.processors.base_processor import BaseProcessor
+class BaseDatasetBuilder:
+    train_dataset_cls, eval_dataset_cls = None, None
+    def __init__(self, cfg=None):
+        super().__init__()
+        if cfg is None:
+            # help to create datasets from default config.
+            self.config = load_dataset_config(self.default_config_path())
+        elif isinstance(cfg, str):
+            self.config = load_dataset_config(cfg)
+        else:
+            # when called from task.build_dataset()
+            self.config = cfg
+        self.data_type = self.config.data_type
+        print("BaseDatasetBuilder data type:", self.data_type)
+        self.vis_processors = {"train": BaseProcessor(), "eval": BaseProcessor()}
+        self.text_processors = {"train": BaseProcessor(), "eval": BaseProcessor()}
+    def build_datasets(self):
+        # download, split, etc...
+        # only called on 1 GPU/TPU in distributed
+        if is_main_process():
+            self._download_data()
+        if is_dist_avail_and_initialized():
+            dist.barrier()
+        # at this point, all the annotations and image/videos should be all downloaded to the specified locations.
+        logging.info("Building datasets...")
+        datasets = self.build()  # dataset['train'/'val'/'test']
+        return datasets
+    def build_processors(self):
+        vis_proc_cfg = self.config.get("vis_processor")
+        txt_proc_cfg = self.config.get("text_processor")
+        if vis_proc_cfg is not None:
+            vis_train_cfg = vis_proc_cfg.get("train")
+            vis_eval_cfg = vis_proc_cfg.get("eval")
+            self.vis_processors["train"] = self._build_proc_from_cfg(vis_train_cfg)
+            self.vis_processors["eval"] = self._build_proc_from_cfg(vis_eval_cfg)
+        if txt_proc_cfg is not None:
+            txt_train_cfg = txt_proc_cfg.get("train")
+            txt_eval_cfg = txt_proc_cfg.get("eval")
+            self.text_processors["train"] = self._build_proc_from_cfg(txt_train_cfg)
+            self.text_processors["eval"] = self._build_proc_from_cfg(txt_eval_cfg)
+    @staticmethod
+    def _build_proc_from_cfg(cfg):
+        return (
+            registry.get_processor_class(cfg.name).from_config(cfg)
+            if cfg is not None
+            else None
+        )
+    @classmethod
+    def default_config_path(cls, type="default"):
+        return utils.get_abs_path(cls.DATASET_CONFIG_DICT[type])
+    def _download_data(self):
+        self._download_ann()
+        self._download_vis()
+    def _download_ann(self):
+        """
+        Download annotation files if necessary.
+        All the vision-language datasets should have annotations of unified format.
+        storage_path can be:
+          (1) relative/absolute: will be prefixed with env.cache_root to make full path if relative.
+          (2) basename/dirname: will be suffixed with base name of URL if dirname is provided.
+        Local annotation paths should be relative.
+        """
+        anns = self.config.build_info.annotations
+        splits = anns.keys()
+        cache_root = registry.get_path("cache_root")
+        for split in splits:
+            info = anns[split]
+            urls, storage_paths = info.get("url", None), info.storage
+            if isinstance(urls, str):
+                urls = [urls]
+            if isinstance(storage_paths, str):
+                storage_paths = [storage_paths]
+            assert len(urls) == len(storage_paths)
+            for url_or_filename, storage_path in zip(urls, storage_paths):
+                # if storage_path is relative, make it full by prefixing with cache_root.
+                if not os.path.isabs(storage_path):
+                    storage_path = os.path.join(cache_root, storage_path)
+                dirname = os.path.dirname(storage_path)
+                if not os.path.exists(dirname):
+                    os.makedirs(dirname)
+                if os.path.isfile(url_or_filename):
+                    src, dst = url_or_filename, storage_path
+                    if not os.path.exists(dst):
+                        shutil.copyfile(src=src, dst=dst)
+                    else:
+                        logging.info("Using existing file {}.".format(dst))
+                else:
+                    if os.path.isdir(storage_path):
+                        # if only dirname is provided, suffix with basename of URL.
+                        raise ValueError(
+                            "Expecting storage_path to be a file path, got directory {}".format(
+                                storage_path
+                            )
+                        )
+                    else:
+                        filename = os.path.basename(storage_path)
+                    download_url(url=url_or_filename, root=dirname, filename=filename)
+    def _download_vis(self):
+        storage_path = self.config.build_info.get(self.data_type).storage
+        storage_path = utils.get_cache_path(storage_path)
+        if not os.path.exists(storage_path):
+            warnings.warn(
+                f"""
+                The specified path {storage_path} for visual inputs does not exist.
+                Please provide a correct path to the visual inputs or
+                refer to datasets/download_scripts/README.md for downloading instructions.
+                """
+            )
+    def build(self):
+        """
+        Create by split datasets inheriting torch.utils.data.Datasets.
+        # build() can be dataset-specific. Overwrite to customize.
+        """
+        self.build_processors()
+        build_info = self.config.build_info
+        ann_info = build_info.annotations
+        vis_info = build_info.get(self.data_type)
+        datasets = dict()
+        for split in ann_info.keys():
+            if split not in ["train", "val", "test"]:
+                continue
+            is_train = split == "train"
+            # processors
+            vis_processor = (
+                self.vis_processors["train"]
+                if is_train
+                else self.vis_processors["eval"]
+            )
+            text_processor = (
+                self.text_processors["train"]
+                if is_train
+                else self.text_processors["eval"]
+            )
+            # annotation path
+            ann_paths = ann_info.get(split).storage
+            if isinstance(ann_paths, str):
+                ann_paths = [ann_paths]
+            abs_ann_paths = []
+            for ann_path in ann_paths:
+                if not os.path.isabs(ann_path):
+                    ann_path = utils.get_cache_path(ann_path)
+                abs_ann_paths.append(ann_path)
+            ann_paths = abs_ann_paths
+            # visual data storage path
+            vis_path = os.path.join(vis_info.storage, split)
+            if not os.path.isabs(vis_path):
+                # vis_path = os.path.join(utils.get_cache_path(), vis_path)
+                vis_path = utils.get_cache_path(vis_path)
+            if not os.path.exists(vis_path):
+                warnings.warn("storage path {} does not exist.".format(vis_path))
+            # create datasets
+            dataset_cls = self.train_dataset_cls if is_train else self.eval_dataset_cls
+            datasets[split] = dataset_cls(
+                vis_processor=vis_processor,
+                text_processor=text_processor,
+                ann_paths=ann_paths,
+                vis_root=vis_path,
+            )
+        return datasets
+def load_dataset_config(cfg_path):
+    cfg = OmegaConf.load(cfg_path).datasets
+    cfg = cfg[list(cfg.keys())[0]]
+    return cfg

minigpt4/datasets/builders/image_text_pair_builder.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import os
+import logging
+import warnings
+from minigpt4.common.registry import registry
+from minigpt4.datasets.builders.base_dataset_builder import BaseDatasetBuilder
+from minigpt4.datasets.datasets.first_face import FeatureFaceDataset
+# FeatureFaceDataset
+@registry.register_builder("feature_face_caption")
+class FirstfaceCaptionBuilder(BaseDatasetBuilder):
+    train_dataset_cls = FeatureFaceDataset
+    DATASET_CONFIG_DICT = {"default": "configs/datasets/firstface/featureface.yaml"}
+    def _download_ann(self):
+        pass
+    def _download_vis(self):
+        pass
+    def build(self):
+        self.build_processors()
+        build_info = self.config.build_info
+        datasets = dict()
+        split = "train"
+        # create datasets
+        # [NOTE] return inner_datasets (wds.DataPipeline)
+        dataset_cls = self.train_dataset_cls
+        datasets[split] = dataset_cls(
+            vis_processor=self.vis_processors[split],
+            text_processor=self.text_processors[split],
+            ann_path=build_info.ann_path,
+            vis_root=build_info.image_path,
+        )
+        return datasets

minigpt4/datasets/data_utils.py ADDED Viewed

	@@ -0,0 +1,199 @@

+"""
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+import gzip
+import logging
+import os
+import random as rnd
+import tarfile
+import zipfile
+import random
+from typing import List
+from tqdm import tqdm
+import decord
+from decord import VideoReader
+import webdataset as wds
+import numpy as np
+import torch
+from torch.utils.data.dataset import IterableDataset
+from minigpt4.common.registry import registry
+from minigpt4.datasets.datasets.base_dataset import ConcatDataset
+decord.bridge.set_bridge("torch")
+MAX_INT = registry.get("MAX_INT")
+class ChainDataset(wds.DataPipeline):
+    r"""Dataset for chaining multiple :class:`DataPipeline` s.
+    This class is useful to assemble different existing dataset streams. The
+    chaining operation is done on-the-fly, so concatenating large-scale
+    datasets with this class will be efficient.
+    Args:
+        datasets (iterable of IterableDataset): datasets to be chained together
+    """
+    def __init__(self, datasets: List[wds.DataPipeline]) -> None:
+        super().__init__()
+        self.datasets = datasets
+        self.prob = []
+        self.names = []
+        for dataset in self.datasets:
+            if hasattr(dataset, 'name'):
+                self.names.append(dataset.name)
+            else:
+                self.names.append('Unknown')
+            if hasattr(dataset, 'sample_ratio'):
+                self.prob.append(dataset.sample_ratio)
+            else:
+                self.prob.append(1)
+                logging.info("One of the datapipeline doesn't define ratio and set to 1 automatically.")
+    def __iter__(self):
+        datastreams = [iter(dataset) for dataset in self.datasets]
+        while True:
+            select_datastream = random.choices(datastreams, weights=self.prob, k=1)[0]
+            yield next(select_datastream)
+def apply_to_sample(f, sample):
+    if len(sample) == 0:
+        return {}
+    def _apply(x):
+        if torch.is_tensor(x):
+            return f(x)
+        elif isinstance(x, dict):
+            return {key: _apply(value) for key, value in x.items()}
+        elif isinstance(x, list):
+            return [_apply(x) for x in x]
+        else:
+            return x
+    return _apply(sample)
+def move_to_cuda(sample):
+    def _move_to_cuda(tensor):
+        return tensor.cuda()
+    return apply_to_sample(_move_to_cuda, sample)
+def prepare_sample(samples, cuda_enabled=True):
+    if cuda_enabled:
+        samples = move_to_cuda(samples)
+    # TODO fp16 support
+    return samples
+def reorg_datasets_by_split(datasets, batch_sizes):
+    """
+    Organizes datasets by split.
+    Args:
+        datasets: dict of torch.utils.data.Dataset objects by name.
+    Returns:
+        Dict of datasets by split {split_name: List[Datasets]}.
+    """
+    # if len(datasets) == 1:
+    #     return datasets[list(datasets.keys())[0]]
+    # else:
+    reorg_datasets = dict()
+    reorg_batch_sizes = dict()
+    # reorganize by split
+    for dataset_name, dataset in datasets.items():
+        for split_name, dataset_split in dataset.items():
+            if split_name not in reorg_datasets:
+                reorg_datasets[split_name] = [dataset_split]
+                reorg_batch_sizes[split_name] = [batch_sizes[dataset_name]]
+            else:
+                reorg_datasets[split_name].append(dataset_split)
+                reorg_batch_sizes[split_name].append(batch_sizes[dataset_name])
+    return reorg_datasets, reorg_batch_sizes
+def concat_datasets(datasets):
+    """
+    Concatenates multiple datasets into a single dataset.
+    It supports may-style datasets and DataPipeline from WebDataset. Currently, does not support
+    generic IterableDataset because it requires creating separate samplers.
+    Now only supports conctenating training datasets and assuming validation and testing
+    have only a single dataset. This is because metrics should not be computed on the concatenated
+    datasets.
+    Args:
+        datasets: dict of torch.utils.data.Dataset objects by split.
+    Returns:
+        Dict of concatenated datasets by split, "train" is the concatenation of multiple datasets,
+        "val" and "test" remain the same.
+        If the input training datasets contain both map-style and DataPipeline datasets, returns
+        a tuple, where the first element is a concatenated map-style dataset and the second
+        element is a chained DataPipeline dataset.
+    """
+    # concatenate datasets in the same split
+    for split_name in datasets:
+        if split_name != "train":
+            assert (
+                len(datasets[split_name]) == 1
+            ), "Do not support multiple {} datasets.".format(split_name)
+            datasets[split_name] = datasets[split_name][0]
+        else:
+            iterable_datasets, map_datasets = [], []
+            for dataset in datasets[split_name]:
+                if isinstance(dataset, wds.DataPipeline):
+                    logging.info(
+                        "Dataset {} is IterableDataset, can't be concatenated.".format(
+                            dataset
+                        )
+                    )
+                    iterable_datasets.append(dataset)
+                elif isinstance(dataset, IterableDataset):
+                    raise NotImplementedError(
+                        "Do not support concatenation of generic IterableDataset."
+                    )
+                else:
+                    map_datasets.append(dataset)
+            # if len(iterable_datasets) > 0:
+            # concatenate map-style datasets and iterable-style datasets separately
+            if len(iterable_datasets) > 1:
+                chained_datasets = (
+                    ChainDataset(iterable_datasets)
+                )
+            elif len(iterable_datasets) == 1:
+                chained_datasets = iterable_datasets[0]
+            else:
+                chained_datasets = None
+            concat_datasets = (
+                ConcatDataset(map_datasets) if len(map_datasets) > 0 else None
+            )
+            train_datasets = concat_datasets, chained_datasets
+            train_datasets = tuple([x for x in train_datasets if x is not None])
+            train_datasets = (
+                train_datasets[0] if len(train_datasets) == 1 else train_datasets
+            )
+            datasets[split_name] = train_datasets
+    return datasets

minigpt4/datasets/datasets/__init__.py ADDED Viewed

File without changes

minigpt4/datasets/datasets/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (161 Bytes). View file

minigpt4/datasets/datasets/__pycache__/aok_vqa_datasets.cpython-39.pyc ADDED Viewed

Binary file (3.83 kB). View file

minigpt4/datasets/datasets/__pycache__/base_dataset.cpython-39.pyc ADDED Viewed

Binary file (2.84 kB). View file

minigpt4/datasets/datasets/__pycache__/caption_datasets.cpython-39.pyc ADDED Viewed

Binary file (4.59 kB). View file

minigpt4/datasets/datasets/__pycache__/cc_sbu_dataset.cpython-39.pyc ADDED Viewed

Binary file (1.84 kB). View file

minigpt4/datasets/datasets/__pycache__/coco_caption.cpython-39.pyc ADDED Viewed

Binary file (4.21 kB). View file

minigpt4/datasets/datasets/__pycache__/coco_dataset.cpython-39.pyc ADDED Viewed

Binary file (11.9 kB). View file

minigpt4/datasets/datasets/__pycache__/coco_vqa_datasets.cpython-39.pyc ADDED Viewed

Binary file (4.04 kB). View file

minigpt4/datasets/datasets/__pycache__/dataloader_utils.cpython-39.pyc ADDED Viewed

Binary file (5.07 kB). View file

minigpt4/datasets/datasets/__pycache__/face_emotion.cpython-39.pyc ADDED Viewed

Binary file (4.23 kB). View file

minigpt4/datasets/datasets/__pycache__/first_face.cpython-39.pyc ADDED Viewed

Binary file (5.41 kB). View file

minigpt4/datasets/datasets/__pycache__/flickr.cpython-39.pyc ADDED Viewed

Binary file (4.19 kB). View file

minigpt4/datasets/datasets/__pycache__/gqa_datasets.cpython-39.pyc ADDED Viewed

Binary file (2.05 kB). View file

minigpt4/datasets/datasets/__pycache__/laion_dataset.cpython-39.pyc ADDED Viewed

Binary file (1.39 kB). View file

minigpt4/datasets/datasets/__pycache__/llava_dataset.cpython-39.pyc ADDED Viewed

Binary file (4.1 kB). View file

minigpt4/datasets/datasets/__pycache__/multitask_conversation.cpython-39.pyc ADDED Viewed

Binary file (2.4 kB). View file

minigpt4/datasets/datasets/__pycache__/ocrvqa_dataset.cpython-39.pyc ADDED Viewed

Binary file (2.67 kB). View file

minigpt4/datasets/datasets/__pycache__/text_caps.cpython-39.pyc ADDED Viewed

Binary file (2.79 kB). View file

minigpt4/datasets/datasets/__pycache__/unnatural_instruction.cpython-39.pyc ADDED Viewed

Binary file (1.83 kB). View file

minigpt4/datasets/datasets/__pycache__/vg_dataset.cpython-39.pyc ADDED Viewed

Binary file (3.06 kB). View file

minigpt4/datasets/datasets/__pycache__/vqa_datasets.cpython-39.pyc ADDED Viewed

Binary file (6.2 kB). View file

minigpt4/datasets/datasets/base_dataset.py ADDED Viewed

	@@ -0,0 +1,78 @@

+"""
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+import json
+from typing import Iterable
+from torch.utils.data import Dataset, ConcatDataset
+from torch.utils.data.dataloader import default_collate
+class BaseDataset(Dataset):
+    def __init__(
+        self, vis_processor=None, text_processor=None, vis_root=None, ann_paths=[]
+    ):
+        """
+        vis_root (string): Root directory of images (e.g. coco/images/)
+        ann_root (string): directory to store the annotation file
+        """
+        self.vis_root = vis_root
+        self.annotation = []
+        # print("ann paths", ann_paths)
+        for ann_path in ann_paths:
+            # print("ann_path", ann_path)
+            ann = json.load(open(ann_path, "r"))
+            if isinstance(ann, dict):
+                self.annotation.extend(json.load(open(ann_path, "r"))['annotations'])
+                # self.annotation.extend(json.load(open(ann_path, "r")))
+            else:
+                self.annotation.extend(json.load(open(ann_path, "r")))
+        self.vis_processor = vis_processor
+        self.text_processor = text_processor
+        self._add_instance_ids()
+    def __len__(self):
+        return len(self.annotation)
+    def collater(self, samples):
+        return default_collate(samples)
+    def set_processors(self, vis_processor, text_processor):
+        self.vis_processor = vis_processor
+        self.text_processor = text_processor
+    def _add_instance_ids(self, key="instance_id"):
+        for idx, ann in enumerate(self.annotation):
+            ann[key] = str(idx)
+class ConcatDataset(ConcatDataset):
+    def __init__(self, datasets: Iterable[Dataset]) -> None:
+        super().__init__(datasets)
+    def collater(self, samples):
+        # TODO For now only supports datasets with same underlying collater implementations
+        all_keys = set()
+        for s in samples:
+            all_keys.update(s)
+        shared_keys = all_keys
+        for s in samples:
+            shared_keys = shared_keys & set(s.keys())
+        samples_shared_keys = []
+        for s in samples:
+            samples_shared_keys.append({k: s[k] for k in s.keys() if k in shared_keys})
+        return self.datasets[0].collater(samples_shared_keys)

minigpt4/datasets/datasets/dataloader_utils.py ADDED Viewed

	@@ -0,0 +1,162 @@

+"""
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+import time
+import random
+import torch
+from minigpt4.datasets.data_utils import move_to_cuda
+from torch.utils.data import DataLoader
+class MultiIterLoader:
+    """
+    A simple wrapper for iterating over multiple iterators.
+    Args:
+        loaders (List[Loader]): List of Iterator loaders.
+        ratios (List[float]): List of ratios to sample from each loader. If None, all loaders are sampled uniformly.
+    """
+    def __init__(self, loaders, ratios=None):
+        # assert all loaders has __next__ method
+        for loader in loaders:
+            assert hasattr(
+                loader, "__next__"
+            ), "Loader {} has no __next__ method.".format(loader)
+        if ratios is None:
+            ratios = [1.0] * len(loaders)
+        else:
+            assert len(ratios) == len(loaders)
+            ratios = [float(ratio) / sum(ratios) for ratio in ratios]
+        self.loaders = loaders
+        self.ratios = ratios
+    def __next__(self):
+        # random sample from each loader by ratio
+        loader_idx = random.choices(range(len(self.loaders)), self.ratios, k=1)[0]
+        return next(self.loaders[loader_idx])
+class PrefetchLoader(object):
+    """
+    Modified from https://github.com/ChenRocks/UNITER.
+    overlap compute and cuda data transfer
+    (copied and then modified from nvidia apex)
+    """
+    def __init__(self, loader):
+        self.loader = loader
+        self.stream = torch.cuda.Stream()
+    def __iter__(self):
+        loader_it = iter(self.loader)
+        self.preload(loader_it)
+        batch = self.next(loader_it)
+        while batch is not None:
+            is_tuple = isinstance(batch, tuple)
+            if is_tuple:
+                task, batch = batch
+            if is_tuple:
+                yield task, batch
+            else:
+                yield batch
+            batch = self.next(loader_it)
+    def __len__(self):
+        return len(self.loader)
+    def preload(self, it):
+        try:
+            self.batch = next(it)
+        except StopIteration:
+            self.batch = None
+            return
+        # if record_stream() doesn't work, another option is to make sure
+        # device inputs are created on the main stream.
+        # self.next_input_gpu = torch.empty_like(self.next_input,
+        #                                        device='cuda')
+        # self.next_target_gpu = torch.empty_like(self.next_target,
+        #                                         device='cuda')
+        # Need to make sure the memory allocated for next_* is not still in use
+        # by the main stream at the time we start copying to next_*:
+        # self.stream.wait_stream(torch.cuda.current_stream())
+        with torch.cuda.stream(self.stream):
+            self.batch = move_to_cuda(self.batch)
+            # more code for the alternative if record_stream() doesn't work:
+            # copy_ will record the use of the pinned source tensor in this
+            # side stream.
+            # self.next_input_gpu.copy_(self.next_input, non_blocking=True)
+            # self.next_target_gpu.copy_(self.next_target, non_blocking=True)
+            # self.next_input = self.next_input_gpu
+            # self.next_target = self.next_target_gpu
+    def next(self, it):
+        torch.cuda.current_stream().wait_stream(self.stream)
+        batch = self.batch
+        if batch is not None:
+            record_cuda_stream(batch)
+        self.preload(it)
+        return batch
+    def __getattr__(self, name):
+        method = self.loader.__getattribute__(name)
+        return method
+def record_cuda_stream(batch):
+    if isinstance(batch, torch.Tensor):
+        batch.record_stream(torch.cuda.current_stream())
+    elif isinstance(batch, list) or isinstance(batch, tuple):
+        for t in batch:
+            record_cuda_stream(t)
+    elif isinstance(batch, dict):
+        for t in batch.values():
+            record_cuda_stream(t)
+    else:
+        pass
+class IterLoader:
+    """
+    A wrapper to convert DataLoader as an infinite iterator.
+    Modified from:
+        https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/iter_based_runner.py
+    """
+    def __init__(self, dataloader: DataLoader, use_distributed: bool = False):
+        self._dataloader = dataloader
+        self.iter_loader = iter(self._dataloader)
+        self._use_distributed = use_distributed
+        self._epoch = 0
+    @property
+    def epoch(self) -> int:
+        return self._epoch
+    def __next__(self):
+        try:
+            data = next(self.iter_loader)
+        except StopIteration:
+            self._epoch += 1
+            if hasattr(self._dataloader.sampler, "set_epoch") and self._use_distributed:
+                self._dataloader.sampler.set_epoch(self._epoch)
+            time.sleep(2)  # Prevent possible deadlock during epoch transition
+            self.iter_loader = iter(self._dataloader)
+            data = next(self.iter_loader)
+        return data
+    def __iter__(self):
+        return self
+    def __len__(self):
+        return len(self._dataloader)

minigpt4/datasets/datasets/first_face.py ADDED Viewed

	@@ -0,0 +1,174 @@

+import glob
+import os
+import json
+import pickle
+import random
+import time
+import itertools
+import pandas as pd
+import json
+import torch.nn.functional as F
+import numpy as np
+from PIL import Image
+import skimage.io as io
+import matplotlib.pyplot as plt
+from matplotlib.collections import PatchCollection
+from matplotlib.patches import Polygon, Rectangle
+import torch
+from torch.utils.data import Dataset
+import webdataset as wds
+import cv2
+from minigpt4.datasets.datasets.base_dataset import BaseDataset
+class FeatureFaceDataset(Dataset):
+    def __init__(self, vis_processor, text_processor, vis_root, ann_path):
+        self.vis_root = vis_root
+        self.vis_processor = vis_processor
+        self.text_processor = text_processor
+        self.caption_instruction_pool = [
+            "Please describe the details of the expression and tone the video.",
+            "Can you provide a description of the facial expression and tone shown by the person in the video?",
+            "Could you outline the facial expressions and vocal tones displayed in the video?",
+            "Detail the expressions and tone used in the video.",
+            "Explain the visual and auditory expressions captured in the video.",
+            "Provide an analysis of the expressions and tone featured in the video.",
+        ]
+        self.emotion_instruction_pool = [
+            "Please determine which emotion label in the video represents: happy, sad, neutral, angry, worried, surprise, fear, contempt, doubt.",
+            # "Please determine which emotion label in the video represents: happy, sad, neutral, angry, worried, surprise.",
+            # "Identify the displayed emotion in the video: is it happy, sad, neutral, angry, worried, or surprise?",
+            # "Determine the emotional state shown in the video, choosing from happy, sad, neutral, angry, worried, or surprise.",
+            # "Please ascertain the specific emotion portrayed in the video, whether it be happy, sad, neutral, angry, worried, or surprise.",
+            # "Assess and label the emotion evident in the video: could it be happy, sad, neutral, angry, worried, surprise?",
+        ]
+        self.reason_instruction_pool = [
+            "Please analyze all the clues in the video and reason out the emotional label of the person in the video.",
+            "What is the emotional state of the person in the video? Please tell me the reason.",
+            "What are the facial expressions and vocal tone used in the video? What is the intended meaning behind his words? Which emotion does this reflect?",
+            "Please integrate information from various modalities to infer the emotional category of the person in the video.",
+            "Could you describe the emotion-related features of the individual in the video? What emotional category do they fall into?",
+        ]
+        # self.task_pool = [
+        #    "emotion",
+        #    "reason",
+        #    "infer",
+        # ]
+        self.task_pool = [
+           "emotion",
+        ]
+        print("ann_path: ", ann_path)
+        self.ann_path = ann_path
+        self.file_path = os.path.dirname(ann_path)
+        self.tmp = [x.strip().split(' ') for x in open(ann_path)]
+        print(('video number:%d' % (len(self.tmp))))
+        # emos = ['neutral', 'angry', 'happy', 'sad', 'worried', 'surprise']
+        emos = ['neutral', 'angry', 'happy', 'sad', 'worried', 'surprise', 'fear', 'contempt', 'doubt']
+        self.emo2idx, self.idx2emo = {}, {}
+        for ii, emo in enumerate(emos): self.emo2idx[emo] = ii
+        for ii, emo in enumerate(emos): self.emo2idx[ii] = emo
+        json_file_path = "/home/user/selected_face/face_emotion/AU_filter_merge.json"
+        with open(json_file_path, 'r') as json_file:
+            self.AU_filter_json = json.load(json_file)
+        reason_json_file_path = "/home/user/selected_face/face_emotion/0512_target_smp_end.json"
+        with open(reason_json_file_path, 'r') as json_file:
+            self.reason_dict = json.load(json_file)
+        self.character_lines = pd.read_csv('/home/user/selected_face/face_emotion/transcription_en_all.csv')
+    def __len__(self):
+        return len(self.tmp)
+    def __getitem__(self, index):
+        t = self.tmp[index]
+        video_name = t[0]
+        image_file = '{}.jpg'.format(video_name)
+        image_path = os.path.join(self.vis_root, image_file)
+        image = Image.open(image_path).convert("RGB")
+        image = self.vis_processor(image)
+        FaceMAE_feats, VideoMAE_feats, Audio_feats = self.get(video_name)
+        if len(VideoMAE_feats.shape) == 1:
+            VideoMAE_feats = VideoMAE_feats.unsqueeze(0)
+        if len(Audio_feats.shape) == 1:
+            Audio_feats = Audio_feats.unsqueeze(0)
+        if len(FaceMAE_feats.shape) == 1:
+            FaceMAE_feats = FaceMAE_feats.unsqueeze(0)
+        video_features = torch.cat((FaceMAE_feats, VideoMAE_feats, Audio_feats), dim=0)
+        # random task
+        task = random.choice(self.task_pool)
+        if task == "emotion":
+            caption = t[2] # llama2 putput only emotion class
+            caption = self.text_processor(caption)
+            instruction_pool = self.emotion_instruction_pool
+        elif task == "reason":
+            caption = self.reason_dict[video_name]['smp_reason_caption']
+            infer_str = " Therefore, it is inferred that his emotional state is: "
+            caption = caption + infer_str + t[2]
+            # caption = "" # for test reasoning
+            caption = self.text_processor(caption)
+            instruction_pool = self.reason_instruction_pool
+        elif task == "infer":
+            infer_str = " Therefore, it is inferred that his emotional state is: "
+            caption = t[2]
+            instruction_pool = [
+                self.reason_dict[video_name]['reason_caption'] + infer_str,
+            ]
+        elif task == "caption":
+            caption = self.AU_filter_json[video_name]['caption']
+            caption = self.text_processor(caption)
+            instruction_pool = self.caption_instruction_pool
+        emotion = self.emo2idx[t[2]]
+        sentence = self.character_lines.loc[self.character_lines['name'] == video_name, 'sentence'].values[0]
+        character_line = "The person in video says: {}. ".format(sentence)
+        instruction = "<video><VideoHere></video> <feature><FeatureHere></feature> {} [{}] {} ".format(character_line, task, random.choice(instruction_pool))
+        return {
+            "image": image,
+            "video_features": video_features,
+            "instruction_input": instruction,
+            "answer": caption,
+            "emotion": emotion,
+            "image_id": video_name
+        }
+    def get(self, video_name):
+        # FaceMAE feature
+        FaceMAE_feats_path = os.path.join(self.file_path, 'mae_340_UTT', video_name + '.npy')
+        FaceMAE_feats = torch.tensor(np.load(FaceMAE_feats_path))
+        # VideoMAE feature
+        VideoMAE_feats_path = os.path.join(self.file_path, 'maeV_399_UTT', video_name + '.npy')
+        VideoMAE_feats = torch.tensor(np.load(VideoMAE_feats_path))
+        # Audio feature
+        Audio_feats_path = os.path.join(self.file_path, 'HL-UTT', video_name + '.npy')
+        Audio_feats = torch.tensor(np.load(Audio_feats_path))
+        return FaceMAE_feats, VideoMAE_feats, Audio_feats