Spaces:
Running
on
A10G
Running
on
A10G
Commit
·
c75b625
1
Parent(s):
4a87f94
configs
Browse files- minigpt4/configs/datasets/firstface/featureface.yaml +11 -0
- minigpt4/configs/datasets/flickr/caption_to_phrase.yaml +6 -0
- minigpt4/configs/datasets/flickr/default.yaml +6 -0
- minigpt4/configs/datasets/flickr/object_to_phrase.yaml +6 -0
- minigpt4/configs/datasets/gqa/balanced_val.yaml +21 -0
- minigpt4/configs/datasets/laion/defaults.yaml +5 -0
- minigpt4/configs/datasets/llava/conversation.yaml +7 -0
- minigpt4/configs/datasets/llava/detail.yaml +6 -0
- minigpt4/configs/datasets/llava/reason.yaml +7 -0
- minigpt4/configs/datasets/multitask_conversation/default.yaml +7 -0
- minigpt4/configs/datasets/nlp/unnatural_instruction.yaml +5 -0
- minigpt4/configs/datasets/ocrvqa/ocrvqa.yaml +6 -0
- minigpt4/configs/datasets/okvqa/defaults.yaml +21 -0
- minigpt4/configs/datasets/textcaps/caption.yaml +9 -0
- minigpt4/configs/datasets/vg/ref.yaml +5 -0
- minigpt4/configs/default.yaml +5 -0
- minigpt4/configs/models/minigpt4_llama2.yaml +29 -0
- minigpt4/configs/models/minigpt4_vicuna0.yaml +32 -0
- minigpt4/configs/models/minigpt_v2.yaml +32 -0
minigpt4/configs/datasets/firstface/featureface.yaml
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
datasets:
|
2 |
+
feature_face_caption:
|
3 |
+
data_type: images
|
4 |
+
|
5 |
+
build_info:
|
6 |
+
# image_path: /home/user/selected_face/first_face/images
|
7 |
+
image_path: /home/user/selected_face/first_face/first_frames
|
8 |
+
|
9 |
+
ann_path: /home/user/selected_face/face_emotion/relative_train_NCEV.txt
|
10 |
+
# ann_path: /home/user/selected_face/face_emotion/all_label_NCEV.txt
|
11 |
+
# ann_path: /home/user/selected_face/face_emotion/target_label_list_0512_smp.txt
|
minigpt4/configs/datasets/flickr/caption_to_phrase.yaml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
datasets:
|
2 |
+
flickr_CaptionToPhrase:
|
3 |
+
data_type: images
|
4 |
+
build_info:
|
5 |
+
image_path: /path/to/filtered_flikcr/images
|
6 |
+
ann_path: /path/to/filtered_flickr/captiontobbox.json
|
minigpt4/configs/datasets/flickr/default.yaml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
datasets:
|
2 |
+
flickr_grounded_caption:
|
3 |
+
data_type: images
|
4 |
+
build_info:
|
5 |
+
image_path: /path/to/filtered_flikcr/images
|
6 |
+
ann_path: /path/to/filtered_flikcr/groundedcaption.json
|
minigpt4/configs/datasets/flickr/object_to_phrase.yaml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
datasets:
|
2 |
+
flickr_ObjectToPhrase:
|
3 |
+
data_type: images
|
4 |
+
build_info:
|
5 |
+
image_path: /path/to/filtered_flikcr/images
|
6 |
+
ann_path: /path/to/filtered_flikcr/phrasetobbox.json
|
minigpt4/configs/datasets/gqa/balanced_val.yaml
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2022, salesforce.com, inc.
|
2 |
+
# All rights reserved.
|
3 |
+
# SPDX-License-Identifier: BSD-3-Clause
|
4 |
+
# For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
|
5 |
+
|
6 |
+
datasets:
|
7 |
+
gqa:
|
8 |
+
# data_dir: ${env.data_dir}/datasets
|
9 |
+
data_type: images # [images|videos|features]
|
10 |
+
|
11 |
+
build_info:
|
12 |
+
# Be careful not to append minus sign (-) before split to avoid itemizing
|
13 |
+
annotations:
|
14 |
+
train:
|
15 |
+
url:
|
16 |
+
- https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/gqa/train_balanced_questions.json
|
17 |
+
storage:
|
18 |
+
- /path/to/gqa/train_balanced_questions.json
|
19 |
+
|
20 |
+
images:
|
21 |
+
storage: /path/to/gqa/images
|
minigpt4/configs/datasets/laion/defaults.yaml
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
datasets:
|
2 |
+
laion:
|
3 |
+
data_type: images
|
4 |
+
build_info:
|
5 |
+
storage: /path/to/laion_dataset/{00000..10488}.tar
|
minigpt4/configs/datasets/llava/conversation.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
datasets:
|
2 |
+
|
3 |
+
llava_conversation:
|
4 |
+
data_type: images
|
5 |
+
build_info:
|
6 |
+
image_path: /path/to/coco/images
|
7 |
+
ann_path: /path/to/llava/conversation_58k.json
|
minigpt4/configs/datasets/llava/detail.yaml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
datasets:
|
2 |
+
llava_detail:
|
3 |
+
data_type: images
|
4 |
+
build_info:
|
5 |
+
image_path: /path/to/coco/images
|
6 |
+
ann_path: /path/to/llava/detail_23k.json
|
minigpt4/configs/datasets/llava/reason.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
datasets:
|
2 |
+
|
3 |
+
llava_reason:
|
4 |
+
data_type: images
|
5 |
+
build_info:
|
6 |
+
image_path: /path/to/coco/images
|
7 |
+
ann_path: /path/to/llava/complex_reasoning_77k.json
|
minigpt4/configs/datasets/multitask_conversation/default.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
datasets:
|
2 |
+
multitask_conversation:
|
3 |
+
data_type: images
|
4 |
+
build_info:
|
5 |
+
|
6 |
+
image_path: /path/to/coco/images
|
7 |
+
ann_path: /path/to/multitask_conversation/multi_task_conversation.json
|
minigpt4/configs/datasets/nlp/unnatural_instruction.yaml
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
datasets:
|
2 |
+
unnatural_instruction:
|
3 |
+
data_type: text
|
4 |
+
build_info:
|
5 |
+
ann_path: /path/to/unnatural_instructions/filtered_unnatural_instruction.json
|
minigpt4/configs/datasets/ocrvqa/ocrvqa.yaml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
datasets:
|
2 |
+
ocrvqa:
|
3 |
+
data_type: images
|
4 |
+
build_info:
|
5 |
+
image_path: /path/to/ocrvqa/images
|
6 |
+
ann_path: /path/to/ocrvqa/dataset.json
|
minigpt4/configs/datasets/okvqa/defaults.yaml
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2022, salesforce.com, inc.
|
2 |
+
# All rights reserved.
|
3 |
+
# SPDX-License-Identifier: BSD-3-Clause
|
4 |
+
# For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
|
5 |
+
|
6 |
+
datasets:
|
7 |
+
ok_vqa:
|
8 |
+
# data_dir: ${env.data_dir}/datasets
|
9 |
+
data_type: images # [images|videos|features]
|
10 |
+
|
11 |
+
build_info:
|
12 |
+
# Be careful not to append minus sign (-) before split to avoid itemizing
|
13 |
+
annotations:
|
14 |
+
train:
|
15 |
+
url:
|
16 |
+
# TODO make this order insensitive
|
17 |
+
- https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/okvqa/okvqa_train.json
|
18 |
+
storage:
|
19 |
+
- /path/to/okvqa/okvqa_train.json
|
20 |
+
images:
|
21 |
+
storage: /path/to/coco/images
|
minigpt4/configs/datasets/textcaps/caption.yaml
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
datasets:
|
2 |
+
textcaps_caption:
|
3 |
+
data_type: images
|
4 |
+
|
5 |
+
build_info:
|
6 |
+
image_path: /path/to/textcaps/train_images
|
7 |
+
ann_path: /path/to/textcaps/TextCaps_0.1_train.json
|
8 |
+
|
9 |
+
|
minigpt4/configs/datasets/vg/ref.yaml
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
datasets:
|
2 |
+
refvg:
|
3 |
+
data_type: images
|
4 |
+
build_info:
|
5 |
+
data_dir: /path/to/visual_genome
|
minigpt4/configs/default.yaml
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
env:
|
2 |
+
# For default users
|
3 |
+
# cache_root: "cache"
|
4 |
+
# For internal use with persistent storage
|
5 |
+
cache_root: "/export/home/.cache/minigpt4"
|
minigpt4/configs/models/minigpt4_llama2.yaml
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model:
|
2 |
+
arch: minigpt4
|
3 |
+
|
4 |
+
# vit encoder
|
5 |
+
image_size: 224
|
6 |
+
drop_path_rate: 0
|
7 |
+
use_grad_checkpoint: False
|
8 |
+
vit_precision: "fp16"
|
9 |
+
freeze_vit: True
|
10 |
+
has_qformer: False
|
11 |
+
|
12 |
+
# generation configs
|
13 |
+
prompt: ""
|
14 |
+
|
15 |
+
llama_model: "please set this value to the path of llama2-chat-7b"
|
16 |
+
|
17 |
+
preprocess:
|
18 |
+
vis_processor:
|
19 |
+
train:
|
20 |
+
name: "blip2_image_train"
|
21 |
+
image_size: 224
|
22 |
+
eval:
|
23 |
+
name: "blip2_image_eval"
|
24 |
+
image_size: 224
|
25 |
+
text_processor:
|
26 |
+
train:
|
27 |
+
name: "blip_caption"
|
28 |
+
eval:
|
29 |
+
name: "blip_caption"
|
minigpt4/configs/models/minigpt4_vicuna0.yaml
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model:
|
2 |
+
arch: minigpt4
|
3 |
+
|
4 |
+
# vit encoder
|
5 |
+
image_size: 224
|
6 |
+
drop_path_rate: 0
|
7 |
+
use_grad_checkpoint: False
|
8 |
+
vit_precision: "fp16"
|
9 |
+
freeze_vit: True
|
10 |
+
freeze_qformer: True
|
11 |
+
|
12 |
+
# Q-Former
|
13 |
+
num_query_token: 32
|
14 |
+
|
15 |
+
# generation configs
|
16 |
+
prompt: ""
|
17 |
+
|
18 |
+
llama_model: "please set this value to the path of vicuna model"
|
19 |
+
|
20 |
+
preprocess:
|
21 |
+
vis_processor:
|
22 |
+
train:
|
23 |
+
name: "blip2_image_train"
|
24 |
+
image_size: 224
|
25 |
+
eval:
|
26 |
+
name: "blip2_image_eval"
|
27 |
+
image_size: 224
|
28 |
+
text_processor:
|
29 |
+
train:
|
30 |
+
name: "blip_caption"
|
31 |
+
eval:
|
32 |
+
name: "blip_caption"
|
minigpt4/configs/models/minigpt_v2.yaml
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model:
|
2 |
+
arch: minigpt_v2
|
3 |
+
|
4 |
+
# vit encoder
|
5 |
+
image_size: 448
|
6 |
+
drop_path_rate: 0
|
7 |
+
use_grad_checkpoint: False
|
8 |
+
vit_precision: "fp16"
|
9 |
+
freeze_vit: True
|
10 |
+
|
11 |
+
# generation configs
|
12 |
+
prompt: ""
|
13 |
+
|
14 |
+
llama_model: /home/czb/project/MiniGPT-4-main/checkpoints/Llama-2-7b-chat-hf
|
15 |
+
# llama_model: "/home/user/project/Emotion-LLaMA/checkpoints/Llama-2-7b-chat-hf"
|
16 |
+
lora_r: 64
|
17 |
+
lora_alpha: 16
|
18 |
+
|
19 |
+
|
20 |
+
preprocess:
|
21 |
+
vis_processor:
|
22 |
+
train:
|
23 |
+
name: "blip2_image_train"
|
24 |
+
image_size: 448
|
25 |
+
eval:
|
26 |
+
name: "blip2_image_eval"
|
27 |
+
image_size: 448
|
28 |
+
text_processor:
|
29 |
+
train:
|
30 |
+
name: "blip_caption"
|
31 |
+
eval:
|
32 |
+
name: "blip_caption"
|