ga89tiy
commited on
Commit
·
1db0e44
1
Parent(s):
e7184d5
cleanup
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- LLAVA_Biovil/cog.yaml +0 -37
- LLAVA_Biovil/install.md +0 -6
- LLAVA_Biovil/predict.py +0 -157
- LLAVA_Biovil/pyproject.toml +0 -36
- LLAVA_Biovil/scripts/convert_gqa_for_eval.py +0 -18
- LLAVA_Biovil/scripts/convert_mmbench_for_submission.py +0 -27
- LLAVA_Biovil/scripts/convert_mmvet_for_eval.py +0 -18
- LLAVA_Biovil/scripts/convert_seed_for_submission.py +0 -74
- LLAVA_Biovil/scripts/convert_sqa_to_llava.py +0 -88
- LLAVA_Biovil/scripts/convert_sqa_to_llava_base_prompt.py +0 -334
- LLAVA_Biovil/scripts/convert_vizwiz_for_submission.py +0 -47
- LLAVA_Biovil/scripts/convert_vqav2_for_submission.py +0 -56
- LLAVA_Biovil/scripts/extract_mm_projector.py +0 -47
- LLAVA_Biovil/scripts/finetune.sh +0 -48
- LLAVA_Biovil/scripts/finetune_full_schedule.sh +0 -48
- LLAVA_Biovil/scripts/finetune_lora.sh +0 -49
- LLAVA_Biovil/scripts/finetune_qlora.sh +0 -50
- LLAVA_Biovil/scripts/finetune_sqa.sh +0 -36
- LLAVA_Biovil/scripts/merge_lora_weights.py +0 -22
- LLAVA_Biovil/scripts/pretrain.sh +0 -46
- LLAVA_Biovil/scripts/pretrain_xformers.sh +0 -44
- LLAVA_Biovil/scripts/sqa_eval_batch.sh +0 -13
- LLAVA_Biovil/scripts/sqa_eval_gather.sh +0 -18
- LLAVA_Biovil/scripts/v1_5/eval/gqa.sh +0 -39
- LLAVA_Biovil/scripts/v1_5/eval/llavabench.sh +0 -23
- LLAVA_Biovil/scripts/v1_5/eval/mmbench.sh +0 -19
- LLAVA_Biovil/scripts/v1_5/eval/mmbench_cn.sh +0 -20
- LLAVA_Biovil/scripts/v1_5/eval/mme.sh +0 -17
- LLAVA_Biovil/scripts/v1_5/eval/mmvet.sh +0 -16
- LLAVA_Biovil/scripts/v1_5/eval/pope.sh +0 -14
- LLAVA_Biovil/scripts/v1_5/eval/qbench.sh +0 -18
- LLAVA_Biovil/scripts/v1_5/eval/qbench_zh.sh +0 -20
- LLAVA_Biovil/scripts/v1_5/eval/seed.sh +0 -39
- LLAVA_Biovil/scripts/v1_5/eval/sqa.sh +0 -16
- LLAVA_Biovil/scripts/v1_5/eval/textvqa.sh +0 -13
- LLAVA_Biovil/scripts/v1_5/eval/vizwiz.sh +0 -14
- LLAVA_Biovil/scripts/v1_5/eval/vqav2.sh +0 -36
- LLAVA_Biovil/scripts/v1_5/finetune.sh +0 -37
- LLAVA_Biovil/scripts/v1_5/finetune_lora.sh +0 -38
- LLAVA_Biovil/scripts/v1_5/finetune_task.sh +0 -36
- LLAVA_Biovil/scripts/v1_5/finetune_task_lora.sh +0 -37
- LLAVA_Biovil/scripts/v1_5/pretrain.sh +0 -35
- LLAVA_Biovil/slurm_config.conf +0 -60
- LLAVA_Biovil/slurm_config_biovil_frozen.conf +0 -60
- LLAVA_Biovil/slurm_config_biovil_frozen_v5.conf +0 -60
- LLAVA_Biovil/slurm_config_biovil_unfrozen.conf +0 -61
- LLAVA_Biovil/slurm_config_biovil_unfrozen_v5.conf +0 -61
- LLAVA_Biovil/slurm_config_llavamed.conf +0 -61
- LLAVA_Biovil/slurm_config_ms_cxr_t.conf +0 -61
- LLAVA_Biovil/slurm_config_pretrain.conf +0 -61
LLAVA_Biovil/cog.yaml
DELETED
@@ -1,37 +0,0 @@
|
|
1 |
-
# Configuration for Cog ⚙️
|
2 |
-
# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md
|
3 |
-
|
4 |
-
build:
|
5 |
-
gpu: true
|
6 |
-
|
7 |
-
python_version: "3.11"
|
8 |
-
|
9 |
-
python_packages:
|
10 |
-
- "torch==2.0.1"
|
11 |
-
- "accelerate==0.21.0"
|
12 |
-
- "bitsandbytes==0.41.0"
|
13 |
-
- "deepspeed==0.9.5"
|
14 |
-
- "einops-exts==0.0.4"
|
15 |
-
- "einops==0.6.1"
|
16 |
-
- "gradio==3.35.2"
|
17 |
-
- "gradio_client==0.2.9"
|
18 |
-
- "httpx==0.24.0"
|
19 |
-
- "markdown2==2.4.10"
|
20 |
-
- "numpy==1.26.0"
|
21 |
-
- "peft==0.4.0"
|
22 |
-
- "scikit-learn==1.2.2"
|
23 |
-
- "sentencepiece==0.1.99"
|
24 |
-
- "shortuuid==1.0.11"
|
25 |
-
- "timm==0.6.13"
|
26 |
-
- "tokenizers==0.13.3"
|
27 |
-
- "torch==2.0.1"
|
28 |
-
- "torchvision==0.15.2"
|
29 |
-
- "transformers==4.31.0"
|
30 |
-
- "wandb==0.15.12"
|
31 |
-
- "wavedrom==2.0.3.post3"
|
32 |
-
- "Pygments==2.16.1"
|
33 |
-
run:
|
34 |
-
- curl -o /usr/local/bin/pget -L "https://github.com/replicate/pget/releases/download/v0.0.3/pget" && chmod +x /usr/local/bin/pget
|
35 |
-
|
36 |
-
# predict.py defines how predictions are run on your model
|
37 |
-
predict: "predict.py:Predictor"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/install.md
DELETED
@@ -1,6 +0,0 @@
|
|
1 |
-
step 1: clone Llava
|
2 |
-
step 2: git clone https://github.com/Dao-AILab/flash-attention.git
|
3 |
-
step 3: conda install pytorch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 pytorch-cuda=11.7 -c pytorch -c nvidia
|
4 |
-
step 4: pip install -e .
|
5 |
-
step 5: pip install -e ".[train]"
|
6 |
-
step 6: in flash attention folder, run: python setup.py install
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/predict.py
DELETED
@@ -1,157 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
|
3 |
-
from llava import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN
|
4 |
-
from llava import conv_templates, SeparatorStyle
|
5 |
-
from llava import load_pretrained_model
|
6 |
-
from llava import disable_torch_init
|
7 |
-
from llava import tokenizer_image_token, KeywordsStoppingCriteria
|
8 |
-
from transformers.generation.streamers import TextIteratorStreamer
|
9 |
-
|
10 |
-
from PIL import Image
|
11 |
-
|
12 |
-
import requests
|
13 |
-
from io import BytesIO
|
14 |
-
|
15 |
-
from cog import BasePredictor, Input, Path, ConcatenateIterator
|
16 |
-
import time
|
17 |
-
import subprocess
|
18 |
-
from threading import Thread
|
19 |
-
|
20 |
-
import os
|
21 |
-
os.environ["HUGGINGFACE_HUB_CACHE"] = os.getcwd() + "/weights"
|
22 |
-
|
23 |
-
# url for the weights mirror
|
24 |
-
REPLICATE_WEIGHTS_URL = "https://weights.replicate.delivery/default"
|
25 |
-
# files to download from the weights mirrors
|
26 |
-
weights = [
|
27 |
-
{
|
28 |
-
"dest": "liuhaotian/llava-v1.5-13b",
|
29 |
-
# git commit hash from huggingface
|
30 |
-
"src": "llava-v1.5-13b/006818fc465ebda4c003c0998674d9141d8d95f8",
|
31 |
-
"files": [
|
32 |
-
"config.json",
|
33 |
-
"generation_config.json",
|
34 |
-
"pytorch_model-00001-of-00003.bin",
|
35 |
-
"pytorch_model-00002-of-00003.bin",
|
36 |
-
"pytorch_model-00003-of-00003.bin",
|
37 |
-
"pytorch_model.bin.index.json",
|
38 |
-
"special_tokens_map.json",
|
39 |
-
"tokenizer.model",
|
40 |
-
"tokenizer_config.json",
|
41 |
-
]
|
42 |
-
},
|
43 |
-
{
|
44 |
-
"dest": "openai/clip-vit-large-patch14-336",
|
45 |
-
"src": "clip-vit-large-patch14-336/ce19dc912ca5cd21c8a653c79e251e808ccabcd1",
|
46 |
-
"files": [
|
47 |
-
"config.json",
|
48 |
-
"preprocessor_config.json",
|
49 |
-
"pytorch_model.bin"
|
50 |
-
],
|
51 |
-
}
|
52 |
-
]
|
53 |
-
|
54 |
-
def download_json(url: str, dest: Path):
|
55 |
-
res = requests.get(url, allow_redirects=True)
|
56 |
-
if res.status_code == 200 and res.content:
|
57 |
-
with dest.open("wb") as f:
|
58 |
-
f.write(res.content)
|
59 |
-
else:
|
60 |
-
print(f"Failed to download {url}. Status code: {res.status_code}")
|
61 |
-
|
62 |
-
def download_weights(baseurl: str, basedest: str, files: list[str]):
|
63 |
-
basedest = Path(basedest)
|
64 |
-
start = time.time()
|
65 |
-
print("downloading to: ", basedest)
|
66 |
-
basedest.mkdir(parents=True, exist_ok=True)
|
67 |
-
for f in files:
|
68 |
-
dest = basedest / f
|
69 |
-
url = os.path.join(REPLICATE_WEIGHTS_URL, baseurl, f)
|
70 |
-
if not dest.exists():
|
71 |
-
print("downloading url: ", url)
|
72 |
-
if dest.suffix == ".json":
|
73 |
-
download_json(url, dest)
|
74 |
-
else:
|
75 |
-
subprocess.check_call(["pget", url, str(dest)], close_fds=False)
|
76 |
-
print("downloading took: ", time.time() - start)
|
77 |
-
|
78 |
-
class Predictor(BasePredictor):
|
79 |
-
def setup(self) -> None:
|
80 |
-
"""Load the model into memory to make running multiple predictions efficient"""
|
81 |
-
for weight in weights:
|
82 |
-
download_weights(weight["src"], weight["dest"], weight["files"])
|
83 |
-
disable_torch_init()
|
84 |
-
|
85 |
-
self.tokenizer, self.model, self.image_processor, self.context_len = load_pretrained_model("liuhaotian/llava-v1.5-13b", model_name="llava-v1.5-13b", model_base=None, load_8bit=False, load_4bit=False)
|
86 |
-
|
87 |
-
def predict(
|
88 |
-
self,
|
89 |
-
image: Path = Input(description="Input image"),
|
90 |
-
prompt: str = Input(description="Prompt to use for text generation"),
|
91 |
-
top_p: float = Input(description="When decoding text, samples from the top p percentage of most likely tokens; lower to ignore less likely tokens", ge=0.0, le=1.0, default=1.0),
|
92 |
-
temperature: float = Input(description="Adjusts randomness of outputs, greater than 1 is random and 0 is deterministic", default=0.2, ge=0.0),
|
93 |
-
max_tokens: int = Input(description="Maximum number of tokens to generate. A word is generally 2-3 tokens", default=1024, ge=0),
|
94 |
-
) -> ConcatenateIterator[str]:
|
95 |
-
"""Run a single prediction on the model"""
|
96 |
-
|
97 |
-
conv_mode = "llava_v1"
|
98 |
-
conv = conv_templates[conv_mode].copy()
|
99 |
-
|
100 |
-
image_data = load_image(str(image))
|
101 |
-
image_tensor = self.image_processor.preprocess(image_data, return_tensors='pt')['pixel_values'].half().cuda()
|
102 |
-
|
103 |
-
# loop start
|
104 |
-
|
105 |
-
# just one turn, always prepend image token
|
106 |
-
inp = DEFAULT_IMAGE_TOKEN + '\n' + prompt
|
107 |
-
conv.append_message(conv.roles[0], inp)
|
108 |
-
|
109 |
-
conv.append_message(conv.roles[1], None)
|
110 |
-
prompt = conv.get_prompt()
|
111 |
-
|
112 |
-
input_ids = tokenizer_image_token(prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0).cuda()
|
113 |
-
stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
|
114 |
-
keywords = [stop_str]
|
115 |
-
stopping_criteria = KeywordsStoppingCriteria(keywords, self.tokenizer, input_ids)
|
116 |
-
streamer = TextIteratorStreamer(self.tokenizer, skip_prompt=True, timeout=20.0)
|
117 |
-
|
118 |
-
with torch.inference_mode():
|
119 |
-
thread = Thread(target=self.model.generate, kwargs=dict(
|
120 |
-
inputs=input_ids,
|
121 |
-
images=image_tensor,
|
122 |
-
do_sample=True,
|
123 |
-
temperature=temperature,
|
124 |
-
top_p=top_p,
|
125 |
-
max_new_tokens=max_tokens,
|
126 |
-
streamer=streamer,
|
127 |
-
use_cache=True,
|
128 |
-
stopping_criteria=[stopping_criteria]))
|
129 |
-
thread.start()
|
130 |
-
# workaround: second-to-last token is always " "
|
131 |
-
# but we want to keep it if it's not the second-to-last token
|
132 |
-
prepend_space = False
|
133 |
-
for new_text in streamer:
|
134 |
-
if new_text == " ":
|
135 |
-
prepend_space = True
|
136 |
-
continue
|
137 |
-
if new_text.endswith(stop_str):
|
138 |
-
new_text = new_text[:-len(stop_str)].strip()
|
139 |
-
prepend_space = False
|
140 |
-
elif prepend_space:
|
141 |
-
new_text = " " + new_text
|
142 |
-
prepend_space = False
|
143 |
-
if len(new_text):
|
144 |
-
yield new_text
|
145 |
-
if prepend_space:
|
146 |
-
yield " "
|
147 |
-
thread.join()
|
148 |
-
|
149 |
-
|
150 |
-
def load_image(image_file):
|
151 |
-
if image_file.startswith('http') or image_file.startswith('https'):
|
152 |
-
response = requests.get(image_file)
|
153 |
-
image = Image.open(BytesIO(response.content)).convert('RGB')
|
154 |
-
else:
|
155 |
-
image = Image.open(image_file).convert('RGB')
|
156 |
-
return image
|
157 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/pyproject.toml
DELETED
@@ -1,36 +0,0 @@
|
|
1 |
-
[build-system]
|
2 |
-
requires = ["setuptools>=61.0"]
|
3 |
-
build-backend = "setuptools.build_meta"
|
4 |
-
|
5 |
-
[project]
|
6 |
-
name = "llava"
|
7 |
-
version = "1.1.3"
|
8 |
-
description = "Towards GPT-4 like large language and visual assistant."
|
9 |
-
readme = "README.md"
|
10 |
-
requires-python = ">=3.8"
|
11 |
-
classifiers = [
|
12 |
-
"Programming Language :: Python :: 3",
|
13 |
-
"License :: OSI Approved :: Apache Software License",
|
14 |
-
]
|
15 |
-
dependencies = [
|
16 |
-
"torch==2.0.1", "torchvision==0.15.2",
|
17 |
-
"transformers==4.31.0", "tokenizers>=0.12.1,<0.14", "sentencepiece==0.1.99", "shortuuid",
|
18 |
-
"accelerate==0.21.0", "peft==0.4.0", "bitsandbytes==0.41.0",
|
19 |
-
"pydantic<2,>=1", "markdown2[all]", "numpy", "scikit-learn==1.2.2",
|
20 |
-
"gradio==3.35.2", "gradio_client==0.2.9",
|
21 |
-
"requests", "httpx==0.24.0", "uvicorn", "fastapi",
|
22 |
-
"einops==0.6.1", "einops-exts==0.0.4", "timm==0.6.13",
|
23 |
-
]
|
24 |
-
|
25 |
-
[project.optional-dependencies]
|
26 |
-
train = ["deepspeed==0.9.5", "ninja", "wandb"]
|
27 |
-
|
28 |
-
[project.urls]
|
29 |
-
"Homepage" = "https://llava-vl.github.io"
|
30 |
-
"Bug Tracker" = "https://github.com/haotian-liu/LLaVA/issues"
|
31 |
-
|
32 |
-
[tool.setuptools.packages.find]
|
33 |
-
exclude = ["assets*", "benchmark*", "docs", "dist*", "playground*", "scripts*", "tests*"]
|
34 |
-
|
35 |
-
[tool.wheel]
|
36 |
-
exclude = ["assets*", "benchmark*", "docs", "dist*", "playground*", "scripts*", "tests*"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/convert_gqa_for_eval.py
DELETED
@@ -1,18 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import json
|
3 |
-
import argparse
|
4 |
-
|
5 |
-
parser = argparse.ArgumentParser()
|
6 |
-
parser.add_argument("--src", type=str)
|
7 |
-
parser.add_argument("--dst", type=str)
|
8 |
-
args = parser.parse_args()
|
9 |
-
|
10 |
-
all_answers = []
|
11 |
-
for line_idx, line in enumerate(open(args.src)):
|
12 |
-
res = json.loads(line)
|
13 |
-
question_id = res['question_id']
|
14 |
-
text = res['text'].rstrip('.').lower()
|
15 |
-
all_answers.append({"questionId": question_id, "prediction": text})
|
16 |
-
|
17 |
-
with open(args.dst, 'w') as f:
|
18 |
-
json.dump(all_answers, f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/convert_mmbench_for_submission.py
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import json
|
3 |
-
import argparse
|
4 |
-
import pandas as pd
|
5 |
-
|
6 |
-
def get_args():
|
7 |
-
parser = argparse.ArgumentParser()
|
8 |
-
parser.add_argument("--annotation-file", type=str, required=True)
|
9 |
-
parser.add_argument("--result-dir", type=str, required=True)
|
10 |
-
parser.add_argument("--upload-dir", type=str, required=True)
|
11 |
-
parser.add_argument("--experiment", type=str, required=True)
|
12 |
-
|
13 |
-
return parser.parse_args()
|
14 |
-
|
15 |
-
if __name__ == "__main__":
|
16 |
-
args = get_args()
|
17 |
-
|
18 |
-
df = pd.read_table(args.annotation_file)
|
19 |
-
|
20 |
-
cur_df = df.copy()
|
21 |
-
cur_df = cur_df.drop(columns=['hint', 'category', 'source', 'image', 'comment', 'l2-category'])
|
22 |
-
cur_df.insert(6, 'prediction', None)
|
23 |
-
for pred in open(os.path.join(args.result_dir, f"{args.experiment}.jsonl")):
|
24 |
-
pred = json.loads(pred)
|
25 |
-
cur_df.loc[df['index'] == pred['question_id'], 'prediction'] = pred['text']
|
26 |
-
|
27 |
-
cur_df.to_excel(os.path.join(args.upload_dir, f"{args.experiment}.xlsx"), index=False, engine='openpyxl')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/convert_mmvet_for_eval.py
DELETED
@@ -1,18 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import json
|
3 |
-
import argparse
|
4 |
-
|
5 |
-
parser = argparse.ArgumentParser()
|
6 |
-
parser.add_argument("--src", type=str)
|
7 |
-
parser.add_argument("--dst", type=str)
|
8 |
-
args = parser.parse_args()
|
9 |
-
|
10 |
-
cur_result = {}
|
11 |
-
|
12 |
-
for line in open(args.src):
|
13 |
-
data = json.loads(line)
|
14 |
-
qid = data['question_id']
|
15 |
-
cur_result[f'v1_{qid}'] = data['text']
|
16 |
-
|
17 |
-
with open(args.dst, 'w') as f:
|
18 |
-
json.dump(cur_result, f, indent=2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/convert_seed_for_submission.py
DELETED
@@ -1,74 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import json
|
3 |
-
import argparse
|
4 |
-
|
5 |
-
|
6 |
-
def get_args():
|
7 |
-
parser = argparse.ArgumentParser()
|
8 |
-
parser.add_argument("--annotation-file", type=str)
|
9 |
-
parser.add_argument("--result-file", type=str)
|
10 |
-
parser.add_argument("--result-upload-file", type=str)
|
11 |
-
return parser.parse_args()
|
12 |
-
|
13 |
-
|
14 |
-
def eval_single(result_file, eval_only_type=None):
|
15 |
-
results = {}
|
16 |
-
for line in open(result_file):
|
17 |
-
row = json.loads(line)
|
18 |
-
results[row['question_id']] = row
|
19 |
-
|
20 |
-
type_counts = {}
|
21 |
-
correct_counts = {}
|
22 |
-
for question_data in data['questions']:
|
23 |
-
if eval_only_type is not None and question_data['data_type'] != eval_only_type: continue
|
24 |
-
data_type = question_data['question_type_id']
|
25 |
-
type_counts[data_type] = type_counts.get(data_type, 0) + 1
|
26 |
-
try:
|
27 |
-
question_id = int(question_data['question_id'])
|
28 |
-
except:
|
29 |
-
question_id = question_data['question_id']
|
30 |
-
if question_id not in results:
|
31 |
-
correct_counts[data_type] = correct_counts.get(data_type, 0)
|
32 |
-
continue
|
33 |
-
row = results[question_id]
|
34 |
-
if row['text'] == question_data['answer']:
|
35 |
-
correct_counts[data_type] = correct_counts.get(data_type, 0) + 1
|
36 |
-
|
37 |
-
total_count = 0
|
38 |
-
total_correct = 0
|
39 |
-
for data_type in sorted(type_counts.keys()):
|
40 |
-
accuracy = correct_counts[data_type] / type_counts[data_type] * 100
|
41 |
-
if eval_only_type is None:
|
42 |
-
print(f"{ques_type_id_to_name[data_type]}: {accuracy:.2f}%")
|
43 |
-
|
44 |
-
total_count += type_counts[data_type]
|
45 |
-
total_correct += correct_counts[data_type]
|
46 |
-
|
47 |
-
total_accuracy = total_correct / total_count * 100
|
48 |
-
if eval_only_type is None:
|
49 |
-
print(f"Total accuracy: {total_accuracy:.2f}%")
|
50 |
-
else:
|
51 |
-
print(f"{eval_only_type} accuracy: {total_accuracy:.2f}%")
|
52 |
-
|
53 |
-
return results
|
54 |
-
|
55 |
-
if __name__ == "__main__":
|
56 |
-
args = get_args()
|
57 |
-
data = json.load(open(args.annotation_file))
|
58 |
-
ques_type_id_to_name = {id:n for n,id in data['question_type'].items()}
|
59 |
-
|
60 |
-
results = eval_single(args.result_file)
|
61 |
-
eval_single(args.result_file, eval_only_type='image')
|
62 |
-
eval_single(args.result_file, eval_only_type='video')
|
63 |
-
|
64 |
-
with open(args.result_upload_file, 'w') as fp:
|
65 |
-
for question in data['questions']:
|
66 |
-
qid = question['question_id']
|
67 |
-
if qid in results:
|
68 |
-
result = results[qid]
|
69 |
-
else:
|
70 |
-
result = results[int(qid)]
|
71 |
-
fp.write(json.dumps({
|
72 |
-
'question_id': qid,
|
73 |
-
'prediction': result['text']
|
74 |
-
}) + '\n')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/convert_sqa_to_llava.py
DELETED
@@ -1,88 +0,0 @@
|
|
1 |
-
import json
|
2 |
-
import os
|
3 |
-
import fire
|
4 |
-
import re
|
5 |
-
from convert_sqa_to_llava_base_prompt import build_prompt_chatbot
|
6 |
-
|
7 |
-
|
8 |
-
def convert_to_llava(base_dir, split, prompt_format="QCM-LEA"):
|
9 |
-
split_indices = json.load(open(os.path.join(base_dir, "pid_splits.json")))[split]
|
10 |
-
problems = json.load(open(os.path.join(base_dir, "problems.json")))
|
11 |
-
|
12 |
-
split_problems = build_prompt_chatbot(
|
13 |
-
problems, split_indices, prompt_format,
|
14 |
-
use_caption=False, is_test=False)
|
15 |
-
|
16 |
-
target_format = []
|
17 |
-
for prob_id, (input, output) in split_problems.items():
|
18 |
-
if input.startswith('Question: '):
|
19 |
-
input = input.replace('Question: ', '')
|
20 |
-
if output.startswith('Answer: '):
|
21 |
-
output = output.replace('Answer: ', '')
|
22 |
-
|
23 |
-
raw_prob_data = problems[prob_id]
|
24 |
-
if raw_prob_data['image'] is None:
|
25 |
-
target_format.append({
|
26 |
-
"id": prob_id,
|
27 |
-
"conversations": [
|
28 |
-
{'from': 'human', 'value': f"{input}"},
|
29 |
-
{'from': 'gpt', 'value': f"{output}"},
|
30 |
-
],
|
31 |
-
})
|
32 |
-
|
33 |
-
else:
|
34 |
-
target_format.append({
|
35 |
-
"id": prob_id,
|
36 |
-
"image": os.path.join(prob_id, raw_prob_data['image']),
|
37 |
-
"conversations": [
|
38 |
-
{'from': 'human', 'value': f"{input}\n<image>"},
|
39 |
-
{'from': 'gpt', 'value': f"{output}"},
|
40 |
-
],
|
41 |
-
})
|
42 |
-
|
43 |
-
print(f'Number of samples: {len(target_format)}')
|
44 |
-
|
45 |
-
with open(os.path.join(base_dir, f"llava_{split}_{prompt_format}.json"), "w") as f:
|
46 |
-
json.dump(target_format, f, indent=2)
|
47 |
-
|
48 |
-
|
49 |
-
def convert_to_jsonl(base_dir, split, prompt_format="QCM-LEPA"):
|
50 |
-
split_indices = json.load(open(os.path.join(base_dir, "pid_splits.json")))[split]
|
51 |
-
problems = json.load(open(os.path.join(base_dir, "problems.json")))
|
52 |
-
|
53 |
-
split_problems = build_prompt_chatbot(
|
54 |
-
problems, split_indices, prompt_format,
|
55 |
-
use_caption=False, is_test=False)
|
56 |
-
|
57 |
-
writer = open(os.path.join(base_dir, f"scienceqa_{split}_{prompt_format}.jsonl"), "w")
|
58 |
-
for prob_id, (input, output) in split_problems.items():
|
59 |
-
if input.startswith('Question: '):
|
60 |
-
input = input.replace('Question: ', '')
|
61 |
-
if output.startswith('Answer: '):
|
62 |
-
output = output.replace('Answer: ', '')
|
63 |
-
|
64 |
-
raw_prob_data = problems[prob_id]
|
65 |
-
if raw_prob_data['image'] is None:
|
66 |
-
data = {
|
67 |
-
"id": prob_id,
|
68 |
-
"instruction": f"{input}",
|
69 |
-
"output": f"{output}",
|
70 |
-
}
|
71 |
-
|
72 |
-
else:
|
73 |
-
data = {
|
74 |
-
"id": prob_id,
|
75 |
-
"image": os.path.join(prob_id, raw_prob_data['image']),
|
76 |
-
"instruction": f"{input}\n<image>",
|
77 |
-
"output": f"{output}",
|
78 |
-
}
|
79 |
-
writer.write(json.dumps(data) + '\n')
|
80 |
-
writer.close()
|
81 |
-
|
82 |
-
|
83 |
-
def main(task, **kwargs):
|
84 |
-
globals()[task](**kwargs)
|
85 |
-
|
86 |
-
|
87 |
-
if __name__ == "__main__":
|
88 |
-
fire.Fire(main)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/convert_sqa_to_llava_base_prompt.py
DELETED
@@ -1,334 +0,0 @@
|
|
1 |
-
def get_question_text(problem):
|
2 |
-
question = problem['question']
|
3 |
-
return question
|
4 |
-
|
5 |
-
|
6 |
-
def get_context_text(problem, use_caption):
|
7 |
-
txt_context = problem['hint']
|
8 |
-
img_context = problem['caption'] if use_caption else ""
|
9 |
-
context = " ".join([txt_context, img_context]).strip()
|
10 |
-
if context == "":
|
11 |
-
context = "N/A"
|
12 |
-
return context
|
13 |
-
|
14 |
-
|
15 |
-
def get_choice_text(probelm, options):
|
16 |
-
choices = probelm['choices']
|
17 |
-
choice_list = []
|
18 |
-
for i, c in enumerate(choices):
|
19 |
-
choice_list.append("({}) {}".format(options[i], c))
|
20 |
-
choice_txt = " ".join(choice_list)
|
21 |
-
#print(choice_txt)
|
22 |
-
return choice_txt
|
23 |
-
|
24 |
-
|
25 |
-
def get_answer(problem, options):
|
26 |
-
return options[problem['answer']]
|
27 |
-
|
28 |
-
|
29 |
-
def get_lecture_text(problem):
|
30 |
-
# \\n: GPT-3 can generate the lecture with more tokens.
|
31 |
-
lecture = problem['lecture'].replace("\n", "\\n")
|
32 |
-
return lecture
|
33 |
-
|
34 |
-
|
35 |
-
def get_solution_text(problem):
|
36 |
-
# \\n: GPT-3 can generate the solution with more tokens
|
37 |
-
solution = problem['solution'].replace("\n", "\\n")
|
38 |
-
return solution
|
39 |
-
|
40 |
-
|
41 |
-
def create_one_example_chatbot(format, question, context, choice, answer, lecture, solution, test_example=True):
|
42 |
-
|
43 |
-
input_format, output_format = format.split("-")
|
44 |
-
|
45 |
-
## Inputs
|
46 |
-
if input_format == "CQM":
|
47 |
-
input = f"Context: {context}\nQuestion: {question}\nOptions: {choice}\n"
|
48 |
-
elif input_format == "QCM":
|
49 |
-
input = f"Question: {question}\nContext: {context}\nOptions: {choice}\n"
|
50 |
-
# upper bound experiment
|
51 |
-
elif input_format == "QCML":
|
52 |
-
input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture}\n"
|
53 |
-
elif input_format == "QCME":
|
54 |
-
input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {solution}\n"
|
55 |
-
elif input_format == "QCMLE":
|
56 |
-
input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture} {solution}\n"
|
57 |
-
|
58 |
-
elif input_format == "QCLM":
|
59 |
-
input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture}\nOptions: {choice}\n"
|
60 |
-
elif input_format == "QCEM":
|
61 |
-
input = f"Question: {question}\nContext: {context}\nBECAUSE: {solution}\nOptions: {choice}\n"
|
62 |
-
elif input_format == "QCLEM":
|
63 |
-
input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture} {solution}\nOptions: {choice}\n"
|
64 |
-
|
65 |
-
# Outputs
|
66 |
-
if test_example:
|
67 |
-
output = "Answer:"
|
68 |
-
elif output_format == 'A':
|
69 |
-
output = f"Answer: The answer is {answer}."
|
70 |
-
|
71 |
-
elif output_format == 'AL':
|
72 |
-
output = f"Answer: The answer is {answer}. BECAUSE: {solution}"
|
73 |
-
elif output_format == 'AE':
|
74 |
-
output = f"Answer: The answer is {answer}. BECAUSE: {lecture}"
|
75 |
-
elif output_format == 'ALE':
|
76 |
-
output = f"Answer: The answer is {answer}. BECAUSE: {lecture} {solution}"
|
77 |
-
elif output_format == 'AEL':
|
78 |
-
output = f"Answer: The answer is {answer}. BECAUSE: {solution} {lecture}"
|
79 |
-
|
80 |
-
elif output_format == 'LA':
|
81 |
-
output = f"Answer: {lecture} The answer is {answer}."
|
82 |
-
elif output_format == 'EA':
|
83 |
-
output = f"Answer: {solution} The answer is {answer}."
|
84 |
-
elif output_format == 'LEA':
|
85 |
-
output = f"Answer: {lecture} {solution} The answer is {answer}."
|
86 |
-
elif output_format == 'ELA':
|
87 |
-
output = f"Answer: {solution} {lecture} The answer is {answer}."
|
88 |
-
elif output_format == 'LEPA':
|
89 |
-
output = ''
|
90 |
-
if len(lecture.strip()) > 0:
|
91 |
-
output += f"LECTURE: {lecture}\n"
|
92 |
-
if len(solution.strip()) > 0:
|
93 |
-
output += f"SOLUTION: {solution}\n"
|
94 |
-
output += '###\n'
|
95 |
-
output += f"ANSWER: {answer}."
|
96 |
-
|
97 |
-
input = input.replace(" ", " ").strip()
|
98 |
-
output = output.replace(" ", " ").strip()
|
99 |
-
if input.endswith("BECAUSE:"):
|
100 |
-
input = input.replace("BECAUSE:", "").strip()
|
101 |
-
if output.endswith("BECAUSE:"):
|
102 |
-
output = output.replace("BECAUSE:", "").strip()
|
103 |
-
return input, output
|
104 |
-
|
105 |
-
|
106 |
-
def create_one_example(format, question, context, choice, answer, lecture, solution, test_example=True):
|
107 |
-
|
108 |
-
input_format, output_format = format.split("-")
|
109 |
-
|
110 |
-
## Inputs
|
111 |
-
if input_format == "CQM":
|
112 |
-
input = f"Context: {context}\nQuestion: {question}\nOptions: {choice}\n"
|
113 |
-
elif input_format == "QCM":
|
114 |
-
input = f"Question: {question}\nContext: {context}\nOptions: {choice}\n"
|
115 |
-
# upper bound experiment
|
116 |
-
elif input_format == "QCML":
|
117 |
-
input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture}\n"
|
118 |
-
elif input_format == "QCME":
|
119 |
-
input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {solution}\n"
|
120 |
-
elif input_format == "QCMLE":
|
121 |
-
input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture} {solution}\n"
|
122 |
-
|
123 |
-
elif input_format == "QCLM":
|
124 |
-
input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture}\nOptions: {choice}\n"
|
125 |
-
elif input_format == "QCEM":
|
126 |
-
input = f"Question: {question}\nContext: {context}\nBECAUSE: {solution}\nOptions: {choice}\n"
|
127 |
-
elif input_format == "QCLEM":
|
128 |
-
input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture} {solution}\nOptions: {choice}\n"
|
129 |
-
|
130 |
-
# Outputs
|
131 |
-
if test_example:
|
132 |
-
output = "Answer:"
|
133 |
-
elif output_format == 'A':
|
134 |
-
output = f"Answer: The answer is {answer}."
|
135 |
-
|
136 |
-
elif output_format == 'AL':
|
137 |
-
output = f"Answer: The answer is {answer}. BECAUSE: {solution}"
|
138 |
-
elif output_format == 'AE':
|
139 |
-
output = f"Answer: The answer is {answer}. BECAUSE: {lecture}"
|
140 |
-
elif output_format == 'ALE':
|
141 |
-
output = f"Answer: The answer is {answer}. BECAUSE: {lecture} {solution}"
|
142 |
-
elif output_format == 'AEL':
|
143 |
-
output = f"Answer: The answer is {answer}. BECAUSE: {solution} {lecture}"
|
144 |
-
|
145 |
-
elif output_format == 'LA':
|
146 |
-
output = f"Answer: {lecture} The answer is {answer}."
|
147 |
-
elif output_format == 'EA':
|
148 |
-
output = f"Answer: {solution} The answer is {answer}."
|
149 |
-
elif output_format == 'LEA':
|
150 |
-
output = f"Answer: {lecture} {solution} The answer is {answer}."
|
151 |
-
elif output_format == 'ELA':
|
152 |
-
output = f"Answer: {solution} {lecture} The answer is {answer}."
|
153 |
-
|
154 |
-
text = input + output
|
155 |
-
text = text.replace(" ", " ").strip()
|
156 |
-
if text.endswith("BECAUSE:"):
|
157 |
-
text = text.replace("BECAUSE:", "").strip()
|
158 |
-
return text
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
def create_one_example_gpt4(format, question, context, choice, answer, lecture, solution, test_example=True):
|
163 |
-
|
164 |
-
input_format, output_format = format.split("-")
|
165 |
-
|
166 |
-
## Inputs
|
167 |
-
if input_format == "CQM":
|
168 |
-
input = f"Context: {context}\nQuestion: {question}\nOptions: {choice}\n"
|
169 |
-
elif input_format == "QCM":
|
170 |
-
input = f"Question: {question}\nContext: {context}\nOptions: {choice}\n"
|
171 |
-
# upper bound experiment
|
172 |
-
elif input_format == "QCML":
|
173 |
-
input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture}\n"
|
174 |
-
elif input_format == "QCME":
|
175 |
-
input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {solution}\n"
|
176 |
-
elif input_format == "QCMLE":
|
177 |
-
input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture} {solution}\n"
|
178 |
-
|
179 |
-
elif input_format == "QCLM":
|
180 |
-
input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture}\nOptions: {choice}\n"
|
181 |
-
elif input_format == "QCEM":
|
182 |
-
input = f"Question: {question}\nContext: {context}\nBECAUSE: {solution}\nOptions: {choice}\n"
|
183 |
-
elif input_format == "QCLEM":
|
184 |
-
input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture} {solution}\nOptions: {choice}\n"
|
185 |
-
|
186 |
-
# Outputs
|
187 |
-
if test_example:
|
188 |
-
output = "Answer:"
|
189 |
-
elif output_format == 'A':
|
190 |
-
output = f"Answer: The answer is {answer}."
|
191 |
-
|
192 |
-
elif output_format == 'AL':
|
193 |
-
output = f"Answer: The answer is {answer}. BECAUSE: {solution}"
|
194 |
-
elif output_format == 'AE':
|
195 |
-
output = f"Answer: The answer is {answer}. BECAUSE: {lecture}"
|
196 |
-
elif output_format == 'ALE':
|
197 |
-
output = f"Answer: The answer is {answer}. BECAUSE: {lecture} {solution}"
|
198 |
-
elif output_format == 'AEL':
|
199 |
-
output = f"Answer: The answer is {answer}. BECAUSE: {solution} {lecture}"
|
200 |
-
|
201 |
-
elif output_format == 'LA':
|
202 |
-
output = f"Answer: {lecture} The answer is {answer}."
|
203 |
-
elif output_format == 'EA':
|
204 |
-
output = f"Answer: {solution} The answer is {answer}."
|
205 |
-
elif output_format == 'LEA':
|
206 |
-
output = f"Answer: {lecture} {solution} The answer is {answer}."
|
207 |
-
elif output_format == 'ELA':
|
208 |
-
output = f"Answer: {solution} {lecture} The answer is {answer}."
|
209 |
-
|
210 |
-
input = input.replace(" ", " ").strip()
|
211 |
-
output = output.replace(" ", " ").strip()
|
212 |
-
if output.endswith("BECAUSE:"):
|
213 |
-
output = output.replace("BECAUSE:", "").strip()
|
214 |
-
|
215 |
-
user_prompt = {"role": "user", "content": f"Can you explain {input}?"}
|
216 |
-
assistant_prompt = {"role": "assistant", "content": f"{output}"}
|
217 |
-
|
218 |
-
return user_prompt, assistant_prompt
|
219 |
-
|
220 |
-
|
221 |
-
def build_prompt_chatbot(problems, shot_qids, prompt_format, use_caption=False, options=["A", "B", "C", "D", "E"], is_test=False):
|
222 |
-
examples = {}
|
223 |
-
|
224 |
-
for qid in shot_qids:
|
225 |
-
question = get_question_text(problems[qid])
|
226 |
-
context = get_context_text(problems[qid], use_caption)
|
227 |
-
choice = get_choice_text(problems[qid], options)
|
228 |
-
answer = get_answer(problems[qid], options)
|
229 |
-
lecture = get_lecture_text(problems[qid]).replace('\\n', '\n')
|
230 |
-
solution = get_solution_text(problems[qid]).replace('\\n', '\n')
|
231 |
-
|
232 |
-
train_example = create_one_example_chatbot(prompt_format,
|
233 |
-
question,
|
234 |
-
context,
|
235 |
-
choice,
|
236 |
-
answer,
|
237 |
-
lecture,
|
238 |
-
solution,
|
239 |
-
test_example=is_test)
|
240 |
-
examples[qid] = train_example
|
241 |
-
return examples
|
242 |
-
|
243 |
-
|
244 |
-
def build_prompt(problems, shot_qids, test_qid, args):
|
245 |
-
|
246 |
-
examples = []
|
247 |
-
|
248 |
-
# n-shot training examples
|
249 |
-
for qid in shot_qids:
|
250 |
-
question = get_question_text(problems[qid])
|
251 |
-
context = get_context_text(problems[qid], args.use_caption)
|
252 |
-
choice = get_choice_text(problems[qid], args.options)
|
253 |
-
answer = get_answer(problems[qid], args.options)
|
254 |
-
lecture = get_lecture_text(problems[qid])
|
255 |
-
solution = get_solution_text(problems[qid])
|
256 |
-
|
257 |
-
train_example = create_one_example(args.prompt_format,
|
258 |
-
question,
|
259 |
-
context,
|
260 |
-
choice,
|
261 |
-
answer,
|
262 |
-
lecture,
|
263 |
-
solution,
|
264 |
-
test_example=False)
|
265 |
-
examples.append(train_example)
|
266 |
-
|
267 |
-
# test example
|
268 |
-
question = get_question_text(problems[test_qid])
|
269 |
-
context = get_context_text(problems[test_qid], args.use_caption)
|
270 |
-
choice = get_choice_text(problems[test_qid], args.options)
|
271 |
-
answer = get_answer(problems[test_qid], args.options)
|
272 |
-
lecture = get_lecture_text(problems[test_qid])
|
273 |
-
solution = get_solution_text(problems[test_qid])
|
274 |
-
|
275 |
-
test_example = create_one_example(args.prompt_format,
|
276 |
-
question,
|
277 |
-
context,
|
278 |
-
choice,
|
279 |
-
answer,
|
280 |
-
lecture,
|
281 |
-
solution,
|
282 |
-
test_example=True)
|
283 |
-
examples.append(test_example)
|
284 |
-
|
285 |
-
# create the prompt input
|
286 |
-
prompt_input = '\n\n'.join(examples)
|
287 |
-
|
288 |
-
return prompt_input
|
289 |
-
|
290 |
-
|
291 |
-
def build_prompt_gpt4(problems, shot_qids, test_qid, args):
|
292 |
-
|
293 |
-
prompt_array = [{"role": "system", "content": "You are a helpful assistant."}]
|
294 |
-
|
295 |
-
# n-shot training examples
|
296 |
-
for qid in shot_qids:
|
297 |
-
question = get_question_text(problems[qid])
|
298 |
-
context = get_context_text(problems[qid], args.use_caption)
|
299 |
-
choice = get_choice_text(problems[qid], args.options)
|
300 |
-
answer = get_answer(problems[qid], args.options)
|
301 |
-
lecture = get_lecture_text(problems[qid])
|
302 |
-
solution = get_solution_text(problems[qid])
|
303 |
-
|
304 |
-
user_prompt, assistant_prompt = create_one_example_gpt4(args.prompt_format,
|
305 |
-
question,
|
306 |
-
context,
|
307 |
-
choice,
|
308 |
-
answer,
|
309 |
-
lecture,
|
310 |
-
solution,
|
311 |
-
test_example=False)
|
312 |
-
prompt_array.append(user_prompt)
|
313 |
-
prompt_array.append(assistant_prompt)
|
314 |
-
|
315 |
-
# test example
|
316 |
-
question = get_question_text(problems[test_qid])
|
317 |
-
context = get_context_text(problems[test_qid], args.use_caption)
|
318 |
-
choice = get_choice_text(problems[test_qid], args.options)
|
319 |
-
answer = get_answer(problems[test_qid], args.options)
|
320 |
-
lecture = get_lecture_text(problems[test_qid])
|
321 |
-
solution = get_solution_text(problems[test_qid])
|
322 |
-
|
323 |
-
user_prompt, assistant_prompt = create_one_example_gpt4(args.prompt_format,
|
324 |
-
question,
|
325 |
-
context,
|
326 |
-
choice,
|
327 |
-
answer,
|
328 |
-
lecture,
|
329 |
-
solution,
|
330 |
-
test_example=True)
|
331 |
-
prompt_array.append(user_prompt)
|
332 |
-
prompt_array.append(assistant_prompt)
|
333 |
-
|
334 |
-
return prompt_array
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/convert_vizwiz_for_submission.py
DELETED
@@ -1,47 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import argparse
|
3 |
-
import json
|
4 |
-
|
5 |
-
from LLAV.llava.eval.m4c_evaluator import EvalAIAnswerProcessor
|
6 |
-
|
7 |
-
|
8 |
-
def parse_args():
|
9 |
-
parser = argparse.ArgumentParser()
|
10 |
-
parser.add_argument('--annotation-file', type=str, required=True)
|
11 |
-
parser.add_argument('--result-file', type=str, required=True)
|
12 |
-
parser.add_argument('--result-upload-file', type=str, required=True)
|
13 |
-
return parser.parse_args()
|
14 |
-
|
15 |
-
|
16 |
-
if __name__ == '__main__':
|
17 |
-
|
18 |
-
args = parse_args()
|
19 |
-
|
20 |
-
os.makedirs(os.path.dirname(args.result_upload_file), exist_ok=True)
|
21 |
-
|
22 |
-
results = []
|
23 |
-
error_line = 0
|
24 |
-
for line_idx, line in enumerate(open(args.result_file)):
|
25 |
-
try:
|
26 |
-
results.append(json.loads(line))
|
27 |
-
except:
|
28 |
-
error_line += 1
|
29 |
-
results = {x['question_id']: x['text'] for x in results}
|
30 |
-
test_split = [json.loads(line) for line in open(args.annotation_file)]
|
31 |
-
split_ids = set([x['question_id'] for x in test_split])
|
32 |
-
|
33 |
-
print(f'total results: {len(results)}, total split: {len(test_split)}, error_line: {error_line}')
|
34 |
-
|
35 |
-
all_answers = []
|
36 |
-
|
37 |
-
answer_processor = EvalAIAnswerProcessor()
|
38 |
-
|
39 |
-
for x in test_split:
|
40 |
-
assert x['question_id'] in results
|
41 |
-
all_answers.append({
|
42 |
-
'image': x['image'],
|
43 |
-
'answer': answer_processor(results[x['question_id']])
|
44 |
-
})
|
45 |
-
|
46 |
-
with open(args.result_upload_file, 'w') as f:
|
47 |
-
json.dump(all_answers, f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/convert_vqav2_for_submission.py
DELETED
@@ -1,56 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import argparse
|
3 |
-
import json
|
4 |
-
|
5 |
-
from LLAV.llava.eval.m4c_evaluator import EvalAIAnswerProcessor
|
6 |
-
|
7 |
-
|
8 |
-
def parse_args():
|
9 |
-
parser = argparse.ArgumentParser()
|
10 |
-
parser.add_argument('--dir', type=str, default="./playground/data/eval/vqav2")
|
11 |
-
parser.add_argument('--ckpt', type=str, required=True)
|
12 |
-
parser.add_argument('--split', type=str, required=True)
|
13 |
-
return parser.parse_args()
|
14 |
-
|
15 |
-
|
16 |
-
if __name__ == '__main__':
|
17 |
-
|
18 |
-
args = parse_args()
|
19 |
-
|
20 |
-
src = os.path.join(args.dir, 'answers', args.split, args.ckpt, 'merge.jsonl')
|
21 |
-
test_split = os.path.join(args.dir, 'llava_vqav2_mscoco_test2015.jsonl')
|
22 |
-
dst = os.path.join(args.dir, 'answers_upload', args.split, f'{args.ckpt}.json')
|
23 |
-
os.makedirs(os.path.dirname(dst), exist_ok=True)
|
24 |
-
|
25 |
-
results = []
|
26 |
-
error_line = 0
|
27 |
-
for line_idx, line in enumerate(open(src)):
|
28 |
-
try:
|
29 |
-
results.append(json.loads(line))
|
30 |
-
except:
|
31 |
-
error_line += 1
|
32 |
-
|
33 |
-
results = {x['question_id']: x['text'] for x in results}
|
34 |
-
test_split = [json.loads(line) for line in open(test_split)]
|
35 |
-
split_ids = set([x['question_id'] for x in test_split])
|
36 |
-
|
37 |
-
print(f'total results: {len(results)}, total split: {len(test_split)}, error_line: {error_line}')
|
38 |
-
|
39 |
-
all_answers = []
|
40 |
-
|
41 |
-
answer_processor = EvalAIAnswerProcessor()
|
42 |
-
|
43 |
-
for x in test_split:
|
44 |
-
if x['question_id'] not in results:
|
45 |
-
all_answers.append({
|
46 |
-
'question_id': x['question_id'],
|
47 |
-
'answer': ''
|
48 |
-
})
|
49 |
-
else:
|
50 |
-
all_answers.append({
|
51 |
-
'question_id': x['question_id'],
|
52 |
-
'answer': answer_processor(results[x['question_id']])
|
53 |
-
})
|
54 |
-
|
55 |
-
with open(dst, 'w') as f:
|
56 |
-
json.dump(all_answers, open(dst, 'w'))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/extract_mm_projector.py
DELETED
@@ -1,47 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
This is just a utility that I use to extract the projector for quantized models.
|
3 |
-
It is NOT necessary at all to train, or run inference/serve demos.
|
4 |
-
Use this script ONLY if you fully understand its implications.
|
5 |
-
"""
|
6 |
-
|
7 |
-
|
8 |
-
import os
|
9 |
-
import argparse
|
10 |
-
import torch
|
11 |
-
import json
|
12 |
-
from collections import defaultdict
|
13 |
-
|
14 |
-
|
15 |
-
def parse_args():
|
16 |
-
parser = argparse.ArgumentParser(description='Extract MMProjector weights')
|
17 |
-
parser.add_argument('--model-path', type=str, help='model folder')
|
18 |
-
parser.add_argument('--output', type=str, help='output file')
|
19 |
-
args = parser.parse_args()
|
20 |
-
return args
|
21 |
-
|
22 |
-
|
23 |
-
if __name__ == '__main__':
|
24 |
-
args = parse_args()
|
25 |
-
|
26 |
-
keys_to_match = ['mm_projector']
|
27 |
-
ckpt_to_key = defaultdict(list)
|
28 |
-
try:
|
29 |
-
model_indices = json.load(open(os.path.join(args.model_path, 'pytorch_model.bin.index.json')))
|
30 |
-
for k, v in model_indices['weight_map'].items():
|
31 |
-
if any(key_match in k for key_match in keys_to_match):
|
32 |
-
ckpt_to_key[v].append(k)
|
33 |
-
except FileNotFoundError:
|
34 |
-
# Smaller models or model checkpoints saved by DeepSpeed.
|
35 |
-
v = 'pytorch_model.bin'
|
36 |
-
for k in torch.load(os.path.join(args.model_path, v), map_location='cpu').keys():
|
37 |
-
if any(key_match in k for key_match in keys_to_match):
|
38 |
-
ckpt_to_key[v].append(k)
|
39 |
-
|
40 |
-
loaded_weights = {}
|
41 |
-
|
42 |
-
for ckpt_name, weight_keys in ckpt_to_key.items():
|
43 |
-
ckpt = torch.load(os.path.join(args.model_path, ckpt_name), map_location='cpu')
|
44 |
-
for k in weight_keys:
|
45 |
-
loaded_weights[k] = ckpt[k]
|
46 |
-
|
47 |
-
torch.save(loaded_weights, args.output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/finetune.sh
DELETED
@@ -1,48 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
# IMPORTANT: this is the training script for the original LLaVA, NOT FOR LLaVA V1.5!
|
4 |
-
|
5 |
-
# Uncomment and set the following variables correspondingly to run this script:
|
6 |
-
|
7 |
-
################## VICUNA ##################
|
8 |
-
# PROMPT_VERSION=v1
|
9 |
-
# MODEL_VERSION="vicuna-v1-3-7b"
|
10 |
-
################## VICUNA ##################
|
11 |
-
|
12 |
-
################## LLaMA-2 ##################
|
13 |
-
# PROMPT_VERSION="llava_llama_2"
|
14 |
-
# MODEL_VERSION="llama-2-7b-chat"
|
15 |
-
################## LLaMA-2 ##################
|
16 |
-
|
17 |
-
deepspeed llava/train/train_mem.py \
|
18 |
-
--deepspeed ./scripts/zero2.json \
|
19 |
-
--model_name_or_path ./checkpoints/$MODEL_VERSION \
|
20 |
-
--version $PROMPT_VERSION \
|
21 |
-
--data_path ./playground/data/llava_instruct_80k.json \
|
22 |
-
--image_folder /path/to/coco/train2017 \
|
23 |
-
--vision_tower openai/clip-vit-large-patch14 \
|
24 |
-
--pretrain_mm_mlp_adapter ./checkpoints/llava-$MODEL_VERSION-pretrain/mm_projector.bin \
|
25 |
-
--mm_vision_select_layer -2 \
|
26 |
-
--mm_use_im_start_end False \
|
27 |
-
--mm_use_im_patch_token False \
|
28 |
-
--bf16 True \
|
29 |
-
--output_dir ./checkpoints/llava-$MODEL_VERSION-finetune \
|
30 |
-
--num_train_epochs 1 \
|
31 |
-
--per_device_train_batch_size 16 \
|
32 |
-
--per_device_eval_batch_size 4 \
|
33 |
-
--gradient_accumulation_steps 1 \
|
34 |
-
--evaluation_strategy "no" \
|
35 |
-
--save_strategy "steps" \
|
36 |
-
--save_steps 50000 \
|
37 |
-
--save_total_limit 1 \
|
38 |
-
--learning_rate 2e-5 \
|
39 |
-
--weight_decay 0. \
|
40 |
-
--warmup_ratio 0.03 \
|
41 |
-
--lr_scheduler_type "cosine" \
|
42 |
-
--logging_steps 1 \
|
43 |
-
--tf32 True \
|
44 |
-
--model_max_length 2048 \
|
45 |
-
--gradient_checkpointing True \
|
46 |
-
--dataloader_num_workers 4 \
|
47 |
-
--lazy_preprocess True \
|
48 |
-
--report_to wandb
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/finetune_full_schedule.sh
DELETED
@@ -1,48 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
# IMPORTANT: this is the training script for the original LLaVA, NOT FOR LLaVA V1.5!
|
4 |
-
|
5 |
-
# Uncomment and set the following variables correspondingly to run this script:
|
6 |
-
|
7 |
-
################## VICUNA ##################
|
8 |
-
# PROMPT_VERSION=v1
|
9 |
-
# MODEL_VERSION="vicuna-v1-3-7b"
|
10 |
-
################## VICUNA ##################
|
11 |
-
|
12 |
-
################## LLaMA-2 ##################
|
13 |
-
# PROMPT_VERSION="llava_llama_2"
|
14 |
-
# MODEL_VERSION="llama-2-7b-chat"
|
15 |
-
################## LLaMA-2 ##################
|
16 |
-
|
17 |
-
deepspeed llava/train/train_mem.py \
|
18 |
-
--deepspeed ./scripts/zero2.json \
|
19 |
-
--model_name_or_path ./checkpoints/$MODEL_VERSION \
|
20 |
-
--version $PROMPT_VERSION \
|
21 |
-
--data_path ./playground/data/llava_instruct_158k.json \
|
22 |
-
--image_folder /path/to/coco/train2017 \
|
23 |
-
--vision_tower openai/clip-vit-large-patch14 \
|
24 |
-
--pretrain_mm_mlp_adapter ./checkpoints/llava-$MODEL_VERSION-pretrain/mm_projector.bin \
|
25 |
-
--mm_vision_select_layer -2 \
|
26 |
-
--mm_use_im_start_end False \
|
27 |
-
--mm_use_im_patch_token False \
|
28 |
-
--bf16 True \
|
29 |
-
--output_dir ./checkpoints/llava-$MODEL_VERSION-finetune \
|
30 |
-
--num_train_epochs 3 \
|
31 |
-
--per_device_train_batch_size 16 \
|
32 |
-
--per_device_eval_batch_size 4 \
|
33 |
-
--gradient_accumulation_steps 1 \
|
34 |
-
--evaluation_strategy "no" \
|
35 |
-
--save_strategy "steps" \
|
36 |
-
--save_steps 50000 \
|
37 |
-
--save_total_limit 1 \
|
38 |
-
--learning_rate 2e-5 \
|
39 |
-
--weight_decay 0. \
|
40 |
-
--warmup_ratio 0.03 \
|
41 |
-
--lr_scheduler_type "cosine" \
|
42 |
-
--logging_steps 1 \
|
43 |
-
--tf32 True \
|
44 |
-
--model_max_length 2048 \
|
45 |
-
--gradient_checkpointing True \
|
46 |
-
--dataloader_num_workers 4 \
|
47 |
-
--lazy_preprocess True \
|
48 |
-
--report_to wandb
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/finetune_lora.sh
DELETED
@@ -1,49 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
# IMPORTANT: this is the training script for the original LLaVA, NOT FOR LLaVA V1.5!
|
4 |
-
|
5 |
-
# Uncomment and set the following variables correspondingly to run this script:
|
6 |
-
|
7 |
-
################## VICUNA ##################
|
8 |
-
# PROMPT_VERSION=v1
|
9 |
-
# MODEL_VERSION="vicuna-v1-3-7b"
|
10 |
-
################## VICUNA ##################
|
11 |
-
|
12 |
-
################## LLaMA-2 ##################
|
13 |
-
# PROMPT_VERSION="llava_llama_2"
|
14 |
-
# MODEL_VERSION="llama-2-7b-chat"
|
15 |
-
################## LLaMA-2 ##################
|
16 |
-
|
17 |
-
deepspeed llava/train/train_mem.py \
|
18 |
-
--deepspeed ./scripts/zero2.json \
|
19 |
-
--lora_enable True \
|
20 |
-
--model_name_or_path ./checkpoints/$MODEL_VERSION \
|
21 |
-
--version $PROMPT_VERSION \
|
22 |
-
--data_path ./playground/data/llava_instruct_80k.json \
|
23 |
-
--image_folder /path/to/coco/train2017 \
|
24 |
-
--vision_tower openai/clip-vit-large-patch14 \
|
25 |
-
--pretrain_mm_mlp_adapter ./checkpoints/llava-$MODEL_VERSION-pretrain/mm_projector.bin \
|
26 |
-
--mm_vision_select_layer -2 \
|
27 |
-
--mm_use_im_start_end False \
|
28 |
-
--mm_use_im_patch_token False \
|
29 |
-
--bf16 True \
|
30 |
-
--output_dir ./checkpoints/llava-$MODEL_VERSION-finetune_lora \
|
31 |
-
--num_train_epochs 1 \
|
32 |
-
--per_device_train_batch_size 16 \
|
33 |
-
--per_device_eval_batch_size 4 \
|
34 |
-
--gradient_accumulation_steps 1 \
|
35 |
-
--evaluation_strategy "no" \
|
36 |
-
--save_strategy "steps" \
|
37 |
-
--save_steps 50000 \
|
38 |
-
--save_total_limit 1 \
|
39 |
-
--learning_rate 2e-5 \
|
40 |
-
--weight_decay 0. \
|
41 |
-
--warmup_ratio 0.03 \
|
42 |
-
--lr_scheduler_type "cosine" \
|
43 |
-
--logging_steps 1 \
|
44 |
-
--tf32 True \
|
45 |
-
--model_max_length 2048 \
|
46 |
-
--gradient_checkpointing True \
|
47 |
-
--lazy_preprocess True \
|
48 |
-
--dataloader_num_workers 4 \
|
49 |
-
--report_to wandb
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/finetune_qlora.sh
DELETED
@@ -1,50 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
# IMPORTANT: this is the training script for the original LLaVA, NOT FOR LLaVA V1.5!
|
4 |
-
|
5 |
-
# Uncomment and set the following variables correspondingly to run this script:
|
6 |
-
|
7 |
-
################## VICUNA ##################
|
8 |
-
# PROMPT_VERSION=v1
|
9 |
-
# MODEL_VERSION="vicuna-v1-3-7b"
|
10 |
-
################## VICUNA ##################
|
11 |
-
|
12 |
-
################## LLaMA-2 ##################
|
13 |
-
# PROMPT_VERSION="llava_llama_2"
|
14 |
-
# MODEL_VERSION="llama-2-7b-chat"
|
15 |
-
################## LLaMA-2 ##################
|
16 |
-
|
17 |
-
deepspeed llava/train/train_mem.py \
|
18 |
-
--deepspeed ./scripts/zero2.json \
|
19 |
-
--lora_enable True \
|
20 |
-
--bits 4 \
|
21 |
-
--model_name_or_path ./checkpoints/$MODEL_VERSION \
|
22 |
-
--version $PROMPT_VERSION \
|
23 |
-
--data_path ./playground/data/llava_instruct_80k.json \
|
24 |
-
--image_folder /path/to/coco/train2017 \
|
25 |
-
--vision_tower openai/clip-vit-large-patch14 \
|
26 |
-
--pretrain_mm_mlp_adapter ./checkpoints/llava-$MODEL_VERSION-pretrain/mm_projector.bin \
|
27 |
-
--mm_vision_select_layer -2 \
|
28 |
-
--mm_use_im_start_end False \
|
29 |
-
--mm_use_im_patch_token False \
|
30 |
-
--bf16 True \
|
31 |
-
--output_dir ./checkpoints/llava-$MODEL_VERSION-finetune_lora \
|
32 |
-
--num_train_epochs 1 \
|
33 |
-
--per_device_train_batch_size 16 \
|
34 |
-
--per_device_eval_batch_size 4 \
|
35 |
-
--gradient_accumulation_steps 1 \
|
36 |
-
--evaluation_strategy "no" \
|
37 |
-
--save_strategy "steps" \
|
38 |
-
--save_steps 50000 \
|
39 |
-
--save_total_limit 1 \
|
40 |
-
--learning_rate 2e-5 \
|
41 |
-
--weight_decay 0. \
|
42 |
-
--warmup_ratio 0.03 \
|
43 |
-
--lr_scheduler_type "cosine" \
|
44 |
-
--logging_steps 1 \
|
45 |
-
--tf32 True \
|
46 |
-
--model_max_length 2048 \
|
47 |
-
--gradient_checkpointing True \
|
48 |
-
--lazy_preprocess True \
|
49 |
-
--dataloader_num_workers 4 \
|
50 |
-
--report_to wandb
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/finetune_sqa.sh
DELETED
@@ -1,36 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
# IMPORTANT: this is the training script for the original LLaVA, NOT FOR LLaVA V1.5!
|
4 |
-
|
5 |
-
deepspeed llava/train/train_mem.py \
|
6 |
-
--deepspeed ./scripts/zero2.json \
|
7 |
-
--model_name_or_path lmsys/vicuna-13b-v1.3 \
|
8 |
-
--version $PROMPT_VERSION \
|
9 |
-
--data_path /Data/ScienceQA/data/scienceqa/llava_train_QCM-LEA.json \
|
10 |
-
--image_folder /Data/ScienceQA/data/scienceqa/images/train \
|
11 |
-
--vision_tower openai/clip-vit-large-patch14 \
|
12 |
-
--pretrain_mm_mlp_adapter ./checkpoints/huggingface/liuhaotian/llava-pretrain-vicuna-13b-v1.3/mm_projector.bin \
|
13 |
-
--mm_vision_select_layer -2 \
|
14 |
-
--mm_use_im_start_end False \
|
15 |
-
--mm_use_im_patch_token False \
|
16 |
-
--bf16 True \
|
17 |
-
--output_dir ./checkpoints/llava-vicuna-13b-v1.3-pretrain_lcs558k_plain-ScienceQA_QCM_LEA-12e \
|
18 |
-
--num_train_epochs 12 \
|
19 |
-
--per_device_train_batch_size 16 \
|
20 |
-
--per_device_eval_batch_size 4 \
|
21 |
-
--gradient_accumulation_steps 1 \
|
22 |
-
--evaluation_strategy "no" \
|
23 |
-
--save_strategy "steps" \
|
24 |
-
--save_steps 50000 \
|
25 |
-
--save_total_limit 1 \
|
26 |
-
--learning_rate 2e-5 \
|
27 |
-
--weight_decay 0. \
|
28 |
-
--warmup_ratio 0.03 \
|
29 |
-
--lr_scheduler_type "cosine" \
|
30 |
-
--logging_steps 1 \
|
31 |
-
--tf32 True \
|
32 |
-
--model_max_length 2048 \
|
33 |
-
--gradient_checkpointing True \
|
34 |
-
--dataloader_num_workers 4 \
|
35 |
-
--lazy_preprocess True \
|
36 |
-
--report_to wandb
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/merge_lora_weights.py
DELETED
@@ -1,22 +0,0 @@
|
|
1 |
-
import argparse
|
2 |
-
from LLAV.llava.model.builder import load_pretrained_model
|
3 |
-
from LLAV.llava.mm_utils import get_model_name_from_path
|
4 |
-
|
5 |
-
|
6 |
-
def merge_lora(args):
|
7 |
-
model_name = get_model_name_from_path(args.model_path)
|
8 |
-
tokenizer, model, image_processor, context_len = load_pretrained_model(args.model_path, args.model_base, model_name, device_map='cpu')
|
9 |
-
|
10 |
-
model.save_pretrained(args.save_model_path)
|
11 |
-
tokenizer.save_pretrained(args.save_model_path)
|
12 |
-
|
13 |
-
|
14 |
-
if __name__ == "__main__":
|
15 |
-
parser = argparse.ArgumentParser()
|
16 |
-
parser.add_argument("--model-path", type=str, required=True)
|
17 |
-
parser.add_argument("--model-base", type=str, required=True)
|
18 |
-
parser.add_argument("--save-model-path", type=str, required=True)
|
19 |
-
|
20 |
-
args = parser.parse_args()
|
21 |
-
|
22 |
-
merge_lora(args)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/pretrain.sh
DELETED
@@ -1,46 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
# IMPORTANT: this is the training script for the original LLaVA, NOT FOR LLaVA V1.5!
|
4 |
-
|
5 |
-
# Uncomment and set the following variables correspondingly to run this script:
|
6 |
-
|
7 |
-
# MODEL_VERSION=vicuna-v1-3-7b
|
8 |
-
# MODEL_VERSION=llama-2-7b-chat
|
9 |
-
|
10 |
-
########### DO NOT CHANGE ###########
|
11 |
-
########### USE THIS FOR BOTH ###########
|
12 |
-
PROMPT_VERSION=plain
|
13 |
-
########### DO NOT CHANGE ###########
|
14 |
-
|
15 |
-
deepspeed llava/train/train_mem.py \
|
16 |
-
--deepspeed ./scripts/zero2.json \
|
17 |
-
--model_name_or_path ./checkpoints/$MODEL_VERSION \
|
18 |
-
--version $PROMPT_VERSION \
|
19 |
-
--data_path /path/to/pretrain_data.json \
|
20 |
-
--image_folder /path/to/images \
|
21 |
-
--vision_tower openai/clip-vit-large-patch14 \
|
22 |
-
--tune_mm_mlp_adapter True \
|
23 |
-
--mm_vision_select_layer -2 \
|
24 |
-
--mm_use_im_start_end False \
|
25 |
-
--mm_use_im_patch_token False \
|
26 |
-
--bf16 True \
|
27 |
-
--output_dir ./checkpoints/llava-$MODEL_VERSION-pretrain \
|
28 |
-
--num_train_epochs 1 \
|
29 |
-
--per_device_train_batch_size 16 \
|
30 |
-
--per_device_eval_batch_size 4 \
|
31 |
-
--gradient_accumulation_steps 1 \
|
32 |
-
--evaluation_strategy "no" \
|
33 |
-
--save_strategy "steps" \
|
34 |
-
--save_steps 24000 \
|
35 |
-
--save_total_limit 1 \
|
36 |
-
--learning_rate 2e-3 \
|
37 |
-
--weight_decay 0. \
|
38 |
-
--warmup_ratio 0.03 \
|
39 |
-
--lr_scheduler_type "cosine" \
|
40 |
-
--logging_steps 1 \
|
41 |
-
--tf32 True \
|
42 |
-
--model_max_length 2048 \
|
43 |
-
--gradient_checkpointing True \
|
44 |
-
--dataloader_num_workers 4 \
|
45 |
-
--lazy_preprocess True \
|
46 |
-
--report_to wandb
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/pretrain_xformers.sh
DELETED
@@ -1,44 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
# Uncomment and set the following variables correspondingly to run this script:
|
4 |
-
|
5 |
-
# MODEL_VERSION=vicuna-v1-3-7b
|
6 |
-
# MODEL_VERSION=llama-2-7b-chat
|
7 |
-
|
8 |
-
########### DO NOT CHANGE ###########
|
9 |
-
########### USE THIS FOR BOTH ###########
|
10 |
-
PROMPT_VERSION=plain
|
11 |
-
########### DO NOT CHANGE ###########
|
12 |
-
|
13 |
-
deepspeed llava/train/train_xformers.py \
|
14 |
-
--deepspeed ./scripts/zero2.json \
|
15 |
-
--model_name_or_path ./checkpoints/$MODEL_VERSION \
|
16 |
-
--version $PROMPT_VERSION \
|
17 |
-
--data_path /path/to/pretrain_data.json \
|
18 |
-
--image_folder /path/to/images \
|
19 |
-
--vision_tower openai/clip-vit-large-patch14 \
|
20 |
-
--tune_mm_mlp_adapter True \
|
21 |
-
--mm_vision_select_layer -2 \
|
22 |
-
--mm_use_im_start_end False \
|
23 |
-
--mm_use_im_patch_token False \
|
24 |
-
--bf16 False \
|
25 |
-
--output_dir ./checkpoints/llava-$MODEL_VERSION-pretrain \
|
26 |
-
--num_train_epochs 1 \
|
27 |
-
--per_device_train_batch_size 4 \
|
28 |
-
--per_device_eval_batch_size 4 \
|
29 |
-
--gradient_accumulation_steps 4 \
|
30 |
-
--evaluation_strategy "no" \
|
31 |
-
--save_strategy "steps" \
|
32 |
-
--save_steps 24000 \
|
33 |
-
--save_total_limit 1 \
|
34 |
-
--learning_rate 2e-3 \
|
35 |
-
--weight_decay 0. \
|
36 |
-
--warmup_ratio 0.03 \
|
37 |
-
--lr_scheduler_type "cosine" \
|
38 |
-
--logging_steps 1 \
|
39 |
-
--tf32 False \
|
40 |
-
--model_max_length 2048 \
|
41 |
-
--gradient_checkpointing True \
|
42 |
-
--dataloader_num_workers 4 \
|
43 |
-
--lazy_preprocess True \
|
44 |
-
--report_to wandb
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/sqa_eval_batch.sh
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
CHUNKS=8
|
4 |
-
for IDX in {0..7}; do
|
5 |
-
CUDA_VISIBLE_DEVICES=$IDX python -m llava.eval.model_vqa_science \
|
6 |
-
--model-path liuhaotian/llava-lcs558k-scienceqa-vicuna-13b-v1.3 \
|
7 |
-
--question-file ~/haotian/datasets/ScienceQA/data/scienceqa/llava_test_QCM-LEA.json \
|
8 |
-
--image-folder ~/haotian/datasets/ScienceQA/data/scienceqa/images/test \
|
9 |
-
--answers-file ./test_llava-13b-chunk$CHUNKS_$IDX.jsonl \
|
10 |
-
--num-chunks $CHUNKS \
|
11 |
-
--chunk-idx $IDX \
|
12 |
-
--conv-mode llava_v1 &
|
13 |
-
done
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/sqa_eval_gather.sh
DELETED
@@ -1,18 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
CHUNKS=8
|
4 |
-
output_file="test_llava-13b.jsonl"
|
5 |
-
|
6 |
-
# Clear out the output file if it exists.
|
7 |
-
> "$output_file"
|
8 |
-
|
9 |
-
# Loop through the indices and concatenate each file.
|
10 |
-
for idx in $(seq 0 $((CHUNKS-1))); do
|
11 |
-
cat "./test_llava-13b-chunk${idx}.jsonl" >> "$output_file"
|
12 |
-
done
|
13 |
-
|
14 |
-
python llava/eval/eval_science_qa.py \
|
15 |
-
--base-dir ~/haotian/datasets/ScienceQA/data/scienceqa \
|
16 |
-
--result-file ./test_llava-13b.jsonl \
|
17 |
-
--output-file ./test_llava-13b_output.json \
|
18 |
-
--output-result ./test_llava-13b_result.json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/v1_5/eval/gqa.sh
DELETED
@@ -1,39 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
gpu_list="${CUDA_VISIBLE_DEVICES:-0}"
|
4 |
-
IFS=',' read -ra GPULIST <<< "$gpu_list"
|
5 |
-
|
6 |
-
CHUNKS=${#GPULIST[@]}
|
7 |
-
|
8 |
-
CKPT="llava-v1.5-13b"
|
9 |
-
SPLIT="llava_gqa_testdev_balanced"
|
10 |
-
GQADIR="./playground/data/eval/gqa/data"
|
11 |
-
|
12 |
-
for IDX in $(seq 0 $((CHUNKS-1))); do
|
13 |
-
CUDA_VISIBLE_DEVICES=${GPULIST[$IDX]} python -m llava.eval.model_vqa_loader \
|
14 |
-
--model-path liuhaotian/llava-v1.5-13b \
|
15 |
-
--question-file ./playground/data/eval/gqa/$SPLIT.jsonl \
|
16 |
-
--image-folder ./playground/data/eval/gqa/data/images \
|
17 |
-
--answers-file ./playground/data/eval/gqa/answers/$SPLIT/$CKPT/${CHUNKS}_${IDX}.jsonl \
|
18 |
-
--num-chunks $CHUNKS \
|
19 |
-
--chunk-idx $IDX \
|
20 |
-
--temperature 0 \
|
21 |
-
--conv-mode vicuna_v1 &
|
22 |
-
done
|
23 |
-
|
24 |
-
wait
|
25 |
-
|
26 |
-
output_file=./playground/data/eval/gqa/answers/$SPLIT/$CKPT/merge.jsonl
|
27 |
-
|
28 |
-
# Clear out the output file if it exists.
|
29 |
-
> "$output_file"
|
30 |
-
|
31 |
-
# Loop through the indices and concatenate each file.
|
32 |
-
for IDX in $(seq 0 $((CHUNKS-1))); do
|
33 |
-
cat ./playground/data/eval/gqa/answers/$SPLIT/$CKPT/${CHUNKS}_${IDX}.jsonl >> "$output_file"
|
34 |
-
done
|
35 |
-
|
36 |
-
python scripts/convert_gqa_for_eval.py --src $output_file --dst $GQADIR/testdev_balanced_predictions.json
|
37 |
-
|
38 |
-
cd $GQADIR
|
39 |
-
python eval/eval.py --tier testdev_balanced
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/v1_5/eval/llavabench.sh
DELETED
@@ -1,23 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
python -m llava.eval.model_vqa \
|
4 |
-
--model-path liuhaotian/llava-v1.5-13b \
|
5 |
-
--question-file ./playground/data/eval/llava-bench-in-the-wild/questions.jsonl \
|
6 |
-
--image-folder ./playground/data/eval/llava-bench-in-the-wild/images \
|
7 |
-
--answers-file ./playground/data/eval/llava-bench-in-the-wild/answers/llava-v1.5-13b.jsonl \
|
8 |
-
--temperature 0 \
|
9 |
-
--conv-mode vicuna_v1
|
10 |
-
|
11 |
-
mkdir -p playground/data/eval/llava-bench-in-the-wild/reviews
|
12 |
-
|
13 |
-
python llava/eval/eval_gpt_review_bench.py \
|
14 |
-
--question playground/data/eval/llava-bench-in-the-wild/questions.jsonl \
|
15 |
-
--context playground/data/eval/llava-bench-in-the-wild/context.jsonl \
|
16 |
-
--rule llava/eval/table/rule.json \
|
17 |
-
--answer-list \
|
18 |
-
playground/data/eval/llava-bench-in-the-wild/answers_gpt4.jsonl \
|
19 |
-
playground/data/eval/llava-bench-in-the-wild/answers/llava-v1.5-13b.jsonl \
|
20 |
-
--output \
|
21 |
-
playground/data/eval/llava-bench-in-the-wild/reviews/llava-v1.5-13b.jsonl
|
22 |
-
|
23 |
-
python llava/eval/summarize_gpt_review.py -f playground/data/eval/llava-bench-in-the-wild/reviews/llava-v1.5-13b.jsonl
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/v1_5/eval/mmbench.sh
DELETED
@@ -1,19 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
SPLIT="mmbench_dev_20230712"
|
4 |
-
|
5 |
-
python -m llava.eval.model_vqa_mmbench \
|
6 |
-
--model-path liuhaotian/llava-v1.5-13b \
|
7 |
-
--question-file ./playground/data/eval/mmbench/$SPLIT.tsv \
|
8 |
-
--answers-file ./playground/data/eval/mmbench/answers/$SPLIT/llava-v1.5-13b.jsonl \
|
9 |
-
--single-pred-prompt \
|
10 |
-
--temperature 0 \
|
11 |
-
--conv-mode vicuna_v1
|
12 |
-
|
13 |
-
mkdir -p playground/data/eval/mmbench/answers_upload/$SPLIT
|
14 |
-
|
15 |
-
python scripts/convert_mmbench_for_submission.py \
|
16 |
-
--annotation-file ./playground/data/eval/mmbench/$SPLIT.tsv \
|
17 |
-
--result-dir ./playground/data/eval/mmbench/answers/$SPLIT \
|
18 |
-
--upload-dir ./playground/data/eval/mmbench/answers_upload/$SPLIT \
|
19 |
-
--experiment llava-v1.5-13b
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/v1_5/eval/mmbench_cn.sh
DELETED
@@ -1,20 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
SPLIT="mmbench_dev_cn_20231003"
|
4 |
-
|
5 |
-
python -m llava.eval.model_vqa_mmbench \
|
6 |
-
--model-path liuhaotian/llava-v1.5-13b \
|
7 |
-
--question-file ./playground/data/eval/mmbench_cn/$SPLIT.tsv \
|
8 |
-
--answers-file ./playground/data/eval/mmbench_cn/answers/$SPLIT/llava-v1.5-13b.jsonl \
|
9 |
-
--lang cn \
|
10 |
-
--single-pred-prompt \
|
11 |
-
--temperature 0 \
|
12 |
-
--conv-mode vicuna_v1
|
13 |
-
|
14 |
-
mkdir -p playground/data/eval/mmbench/answers_upload/$SPLIT
|
15 |
-
|
16 |
-
python scripts/convert_mmbench_for_submission.py \
|
17 |
-
--annotation-file ./playground/data/eval/mmbench_cn/$SPLIT.tsv \
|
18 |
-
--result-dir ./playground/data/eval/mmbench_cn/answers/$SPLIT \
|
19 |
-
--upload-dir ./playground/data/eval/mmbench_cn/answers_upload/$SPLIT \
|
20 |
-
--experiment llava-v1.5-13b
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/v1_5/eval/mme.sh
DELETED
@@ -1,17 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
python -m llava.eval.model_vqa_loader \
|
4 |
-
--model-path liuhaotian/llava-v1.5-13b \
|
5 |
-
--question-file ./playground/data/eval/MME/llava_mme.jsonl \
|
6 |
-
--image-folder ./playground/data/eval/MME/MME_Benchmark_release_version \
|
7 |
-
--answers-file ./playground/data/eval/MME/answers/llava-v1.5-13b.jsonl \
|
8 |
-
--temperature 0 \
|
9 |
-
--conv-mode vicuna_v1
|
10 |
-
|
11 |
-
cd ./playground/data/eval/MME
|
12 |
-
|
13 |
-
python convert_answer_to_mme.py --experiment llava-v1.5-13b
|
14 |
-
|
15 |
-
cd eval_tool
|
16 |
-
|
17 |
-
python calculation.py --results_dir answers/llava-v1.5-13b
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/v1_5/eval/mmvet.sh
DELETED
@@ -1,16 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
python -m llava.eval.model_vqa \
|
4 |
-
--model-path liuhaotian/llava-v1.5-13b \
|
5 |
-
--question-file ./playground/data/eval/mm-vet/llava-mm-vet.jsonl \
|
6 |
-
--image-folder ./playground/data/eval/mm-vet/images \
|
7 |
-
--answers-file ./playground/data/eval/mm-vet/answers/llava-v1.5-13b.jsonl \
|
8 |
-
--temperature 0 \
|
9 |
-
--conv-mode vicuna_v1
|
10 |
-
|
11 |
-
mkdir -p ./playground/data/eval/mm-vet/results
|
12 |
-
|
13 |
-
python scripts/convert_mmvet_for_eval.py \
|
14 |
-
--src ./playground/data/eval/mm-vet/answers/llava-v1.5-13b.jsonl \
|
15 |
-
--dst ./playground/data/eval/mm-vet/results/llava-v1.5-13b.json
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/v1_5/eval/pope.sh
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
python -m llava.eval.model_vqa_loader \
|
4 |
-
--model-path liuhaotian/llava-v1.5-13b \
|
5 |
-
--question-file ./playground/data/eval/pope/llava_pope_test.jsonl \
|
6 |
-
--image-folder ./playground/data/eval/pope/val2014 \
|
7 |
-
--answers-file ./playground/data/eval/pope/answers/llava-v1.5-13b.jsonl \
|
8 |
-
--temperature 0 \
|
9 |
-
--conv-mode vicuna_v1
|
10 |
-
|
11 |
-
python llava/eval/eval_pope.py \
|
12 |
-
--annotation-dir ./playground/data/eval/pope/coco \
|
13 |
-
--question-file ./playground/data/eval/pope/llava_pope_test.jsonl \
|
14 |
-
--result-file ./playground/data/eval/pope/answers/llava-v1.5-13b.jsonl
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/v1_5/eval/qbench.sh
DELETED
@@ -1,18 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
if [ "$1" = "dev" ]; then
|
4 |
-
echo "Evaluating in 'dev' split."
|
5 |
-
elif [ "$1" = "test" ]; then
|
6 |
-
echo "Evaluating in 'test' split."
|
7 |
-
else
|
8 |
-
echo "Unknown split, please choose between 'dev' and 'test'."
|
9 |
-
exit 1
|
10 |
-
fi
|
11 |
-
|
12 |
-
python -m llava.eval.model_vqa_qbench \
|
13 |
-
--model-path liuhaotian/llava-v1.5-13b \
|
14 |
-
--image-folder ./playground/data/eval/qbench/images_llvisionqa/ \
|
15 |
-
--questions-file ./playground/data/eval/qbench/llvisionqa_$1.json \
|
16 |
-
--answers-file ./playground/data/eval/qbench/llvisionqa_$1_answers.jsonl \
|
17 |
-
--conv-mode llava_v1 \
|
18 |
-
--lang en
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/v1_5/eval/qbench_zh.sh
DELETED
@@ -1,20 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
if [ "$1" = "dev" ]; then
|
4 |
-
ZH_SPLIT="验证集"
|
5 |
-
echo "Evaluating in 'dev' split."
|
6 |
-
elif [ "$1" = "test" ]; then
|
7 |
-
ZH_SPLIT="测试集"
|
8 |
-
echo "Evaluating in 'test' split."
|
9 |
-
else
|
10 |
-
echo "Unknown split, please choose between 'dev' and 'test'."
|
11 |
-
exit 1
|
12 |
-
fi
|
13 |
-
|
14 |
-
python -m llava.eval.model_vqa_qbench \
|
15 |
-
--model-path liuhaotian/llava-v1.5-13b \
|
16 |
-
--image-folder ./playground/data/eval/qbench/images_llvisionqa/ \
|
17 |
-
--questions-file ./playground/data/eval/qbench/质衡-问答-$ZH_SPLIT.json \
|
18 |
-
--answers-file ./playground/data/eval/qbench/llvisionqa_zh_$1_answers.jsonl \
|
19 |
-
--conv-mode llava_v1 \
|
20 |
-
--lang zh
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/v1_5/eval/seed.sh
DELETED
@@ -1,39 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
gpu_list="${CUDA_VISIBLE_DEVICES:-0}"
|
4 |
-
IFS=',' read -ra GPULIST <<< "$gpu_list"
|
5 |
-
|
6 |
-
CHUNKS=${#GPULIST[@]}
|
7 |
-
|
8 |
-
CKPT="llava-v1.5-13b"
|
9 |
-
|
10 |
-
for IDX in $(seq 0 $((CHUNKS-1))); do
|
11 |
-
CUDA_VISIBLE_DEVICES=${GPULIST[$IDX]} python -m llava.eval.model_vqa_loader \
|
12 |
-
--model-path liuhaotian/llava-v1.5-13b \
|
13 |
-
--question-file ./playground/data/eval/seed_bench/llava-seed-bench.jsonl \
|
14 |
-
--image-folder ./playground/data/eval/seed_bench \
|
15 |
-
--answers-file ./playground/data/eval/seed_bench/answers/$CKPT/${CHUNKS}_${IDX}.jsonl \
|
16 |
-
--num-chunks $CHUNKS \
|
17 |
-
--chunk-idx $IDX \
|
18 |
-
--temperature 0 \
|
19 |
-
--conv-mode vicuna_v1 &
|
20 |
-
done
|
21 |
-
|
22 |
-
wait
|
23 |
-
|
24 |
-
output_file=./playground/data/eval/seed_bench/answers/$CKPT/merge.jsonl
|
25 |
-
|
26 |
-
# Clear out the output file if it exists.
|
27 |
-
> "$output_file"
|
28 |
-
|
29 |
-
# Loop through the indices and concatenate each file.
|
30 |
-
for IDX in $(seq 0 $((CHUNKS-1))); do
|
31 |
-
cat ./playground/data/eval/seed_bench/answers/$CKPT/${CHUNKS}_${IDX}.jsonl >> "$output_file"
|
32 |
-
done
|
33 |
-
|
34 |
-
# Evaluate
|
35 |
-
python scripts/convert_seed_for_submission.py \
|
36 |
-
--annotation-file ./playground/data/eval/seed_bench/SEED-Bench.json \
|
37 |
-
--result-file $output_file \
|
38 |
-
--result-upload-file ./playground/data/eval/seed_bench/answers_upload/llava-v1.5-13b.jsonl
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/v1_5/eval/sqa.sh
DELETED
@@ -1,16 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
python -m llava.eval.model_vqa_science \
|
4 |
-
--model-path liuhaotian/llava-v1.5-13b \
|
5 |
-
--question-file ./playground/data/eval/scienceqa/llava_test_CQM-A.json \
|
6 |
-
--image-folder ./playground/data/eval/scienceqa/images/test \
|
7 |
-
--answers-file ./playground/data/eval/scienceqa/answers/llava-v1.5-13b.jsonl \
|
8 |
-
--single-pred-prompt \
|
9 |
-
--temperature 0 \
|
10 |
-
--conv-mode vicuna_v1
|
11 |
-
|
12 |
-
python llava/eval/eval_science_qa.py \
|
13 |
-
--base-dir ./playground/data/eval/scienceqa \
|
14 |
-
--result-file ./playground/data/eval/scienceqa/answers/llava-v1.5-13b.jsonl \
|
15 |
-
--output-file ./playground/data/eval/scienceqa/answers/llava-v1.5-13b_output.jsonl \
|
16 |
-
--output-result ./playground/data/eval/scienceqa/answers/llava-v1.5-13b_result.json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/v1_5/eval/textvqa.sh
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
python -m llava.eval.model_vqa_loader \
|
4 |
-
--model-path liuhaotian/llava-v1.5-13b \
|
5 |
-
--question-file ./playground/data/eval/textvqa/llava_textvqa_val_v051_ocr.jsonl \
|
6 |
-
--image-folder ./playground/data/eval/textvqa/train_images \
|
7 |
-
--answers-file ./playground/data/eval/textvqa/answers/llava-v1.5-13b.jsonl \
|
8 |
-
--temperature 0 \
|
9 |
-
--conv-mode vicuna_v1
|
10 |
-
|
11 |
-
python -m llava.eval.eval_textvqa \
|
12 |
-
--annotation-file ./playground/data/eval/textvqa/TextVQA_0.5.1_val.json \
|
13 |
-
--result-file ./playground/data/eval/textvqa/answers/llava-v1.5-13b.jsonl
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/v1_5/eval/vizwiz.sh
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
python -m llava.eval.model_vqa_loader \
|
4 |
-
--model-path liuhaotian/llava-v1.5-13b \
|
5 |
-
--question-file ./playground/data/eval/vizwiz/llava_test.jsonl \
|
6 |
-
--image-folder ./playground/data/eval/vizwiz/test \
|
7 |
-
--answers-file ./playground/data/eval/vizwiz/answers/llava-v1.5-13b.jsonl \
|
8 |
-
--temperature 0 \
|
9 |
-
--conv-mode vicuna_v1
|
10 |
-
|
11 |
-
python scripts/convert_vizwiz_for_submission.py \
|
12 |
-
--annotation-file ./playground/data/eval/vizwiz/llava_test.jsonl \
|
13 |
-
--result-file ./playground/data/eval/vizwiz/answers/llava-v1.5-13b.jsonl \
|
14 |
-
--result-upload-file ./playground/data/eval/vizwiz/answers_upload/llava-v1.5-13b.json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/v1_5/eval/vqav2.sh
DELETED
@@ -1,36 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
gpu_list="${CUDA_VISIBLE_DEVICES:-0}"
|
4 |
-
IFS=',' read -ra GPULIST <<< "$gpu_list"
|
5 |
-
|
6 |
-
CHUNKS=${#GPULIST[@]}
|
7 |
-
|
8 |
-
CKPT="llava-v1.5-13b"
|
9 |
-
SPLIT="llava_vqav2_mscoco_test-dev2015"
|
10 |
-
|
11 |
-
for IDX in $(seq 0 $((CHUNKS-1))); do
|
12 |
-
CUDA_VISIBLE_DEVICES=${GPULIST[$IDX]} python -m llava.eval.model_vqa_loader \
|
13 |
-
--model-path liuhaotian/llava-v1.5-13b \
|
14 |
-
--question-file ./playground/data/eval/vqav2/$SPLIT.jsonl \
|
15 |
-
--image-folder ./playground/data/eval/vqav2/test2015 \
|
16 |
-
--answers-file ./playground/data/eval/vqav2/answers/$SPLIT/$CKPT/${CHUNKS}_${IDX}.jsonl \
|
17 |
-
--num-chunks $CHUNKS \
|
18 |
-
--chunk-idx $IDX \
|
19 |
-
--temperature 0 \
|
20 |
-
--conv-mode vicuna_v1 &
|
21 |
-
done
|
22 |
-
|
23 |
-
wait
|
24 |
-
|
25 |
-
output_file=./playground/data/eval/vqav2/answers/$SPLIT/$CKPT/merge.jsonl
|
26 |
-
|
27 |
-
# Clear out the output file if it exists.
|
28 |
-
> "$output_file"
|
29 |
-
|
30 |
-
# Loop through the indices and concatenate each file.
|
31 |
-
for IDX in $(seq 0 $((CHUNKS-1))); do
|
32 |
-
cat ./playground/data/eval/vqav2/answers/$SPLIT/$CKPT/${CHUNKS}_${IDX}.jsonl >> "$output_file"
|
33 |
-
done
|
34 |
-
|
35 |
-
python scripts/convert_vqav2_for_submission.py --split $SPLIT --ckpt $CKPT
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/v1_5/finetune.sh
DELETED
@@ -1,37 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
deepspeed llava/train/train_mem.py \
|
4 |
-
--deepspeed ./scripts/zero3.json \
|
5 |
-
--model_name_or_path lmsys/vicuna-13b-v1.5 \
|
6 |
-
--version v1 \
|
7 |
-
--data_path ./playground/data/llava_v1_5_mix665k.json \
|
8 |
-
--image_folder ./playground/data \
|
9 |
-
--vision_tower openai/clip-vit-large-patch14-336 \
|
10 |
-
--pretrain_mm_mlp_adapter ./checkpoints/llava-v1.5-13b-pretrain/mm_projector.bin \
|
11 |
-
--mm_projector_type mlp2x_gelu \
|
12 |
-
--mm_vision_select_layer -2 \
|
13 |
-
--mm_use_im_start_end False \
|
14 |
-
--mm_use_im_patch_token False \
|
15 |
-
--image_aspect_ratio pad \
|
16 |
-
--group_by_modality_length True \
|
17 |
-
--bf16 True \
|
18 |
-
--output_dir ./checkpoints/llava-v1.5-13b \
|
19 |
-
--num_train_epochs 1 \
|
20 |
-
--per_device_train_batch_size 16 \
|
21 |
-
--per_device_eval_batch_size 4 \
|
22 |
-
--gradient_accumulation_steps 1 \
|
23 |
-
--evaluation_strategy "no" \
|
24 |
-
--save_strategy "steps" \
|
25 |
-
--save_steps 50000 \
|
26 |
-
--save_total_limit 1 \
|
27 |
-
--learning_rate 2e-5 \
|
28 |
-
--weight_decay 0. \
|
29 |
-
--warmup_ratio 0.03 \
|
30 |
-
--lr_scheduler_type "cosine" \
|
31 |
-
--logging_steps 1 \
|
32 |
-
--tf32 True \
|
33 |
-
--model_max_length 2048 \
|
34 |
-
--gradient_checkpointing True \
|
35 |
-
--dataloader_num_workers 4 \
|
36 |
-
--lazy_preprocess True \
|
37 |
-
--report_to wandb
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/v1_5/finetune_lora.sh
DELETED
@@ -1,38 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
deepspeed llava/train/train_mem.py \
|
4 |
-
--lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \
|
5 |
-
--deepspeed ./scripts/zero3.json \
|
6 |
-
--model_name_or_path lmsys/vicuna-13b-v1.5 \
|
7 |
-
--version v1 \
|
8 |
-
--data_path ./playground/data/llava_v1_5_mix665k.json \
|
9 |
-
--image_folder ./playground/data \
|
10 |
-
--vision_tower openai/clip-vit-large-patch14-336 \
|
11 |
-
--pretrain_mm_mlp_adapter ./checkpoints/llava-v1.5-13b-pretrain/mm_projector.bin \
|
12 |
-
--mm_projector_type mlp2x_gelu \
|
13 |
-
--mm_vision_select_layer -2 \
|
14 |
-
--mm_use_im_start_end False \
|
15 |
-
--mm_use_im_patch_token False \
|
16 |
-
--image_aspect_ratio pad \
|
17 |
-
--group_by_modality_length True \
|
18 |
-
--bf16 True \
|
19 |
-
--output_dir ./checkpoints/llava-v1.5-13b-lora \
|
20 |
-
--num_train_epochs 1 \
|
21 |
-
--per_device_train_batch_size 16 \
|
22 |
-
--per_device_eval_batch_size 4 \
|
23 |
-
--gradient_accumulation_steps 1 \
|
24 |
-
--evaluation_strategy "no" \
|
25 |
-
--save_strategy "steps" \
|
26 |
-
--save_steps 50000 \
|
27 |
-
--save_total_limit 1 \
|
28 |
-
--learning_rate 2e-4 \
|
29 |
-
--weight_decay 0. \
|
30 |
-
--warmup_ratio 0.03 \
|
31 |
-
--lr_scheduler_type "cosine" \
|
32 |
-
--logging_steps 1 \
|
33 |
-
--tf32 True \
|
34 |
-
--model_max_length 2048 \
|
35 |
-
--gradient_checkpointing True \
|
36 |
-
--dataloader_num_workers 4 \
|
37 |
-
--lazy_preprocess True \
|
38 |
-
--report_to wandb
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/v1_5/finetune_task.sh
DELETED
@@ -1,36 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
deepspeed llava/train/train_mem.py \
|
4 |
-
--deepspeed ./scripts/zero3.json \
|
5 |
-
--model_name_or_path liuhaotian/llava-v1.5-13b \
|
6 |
-
--version v1 \
|
7 |
-
--data_path ./playground/data/llava_v1_5_mix665k.json \
|
8 |
-
--image_folder ./playground/data \
|
9 |
-
--vision_tower openai/clip-vit-large-patch14-336 \
|
10 |
-
--mm_projector_type mlp2x_gelu \
|
11 |
-
--mm_vision_select_layer -2 \
|
12 |
-
--mm_use_im_start_end False \
|
13 |
-
--mm_use_im_patch_token False \
|
14 |
-
--image_aspect_ratio pad \
|
15 |
-
--group_by_modality_length True \
|
16 |
-
--bf16 True \
|
17 |
-
--output_dir ./checkpoints/llava-v1.5-13b-task \
|
18 |
-
--num_train_epochs 1 \
|
19 |
-
--per_device_train_batch_size 16 \
|
20 |
-
--per_device_eval_batch_size 4 \
|
21 |
-
--gradient_accumulation_steps 1 \
|
22 |
-
--evaluation_strategy "no" \
|
23 |
-
--save_strategy "steps" \
|
24 |
-
--save_steps 50000 \
|
25 |
-
--save_total_limit 1 \
|
26 |
-
--learning_rate 2e-5 \
|
27 |
-
--weight_decay 0. \
|
28 |
-
--warmup_ratio 0.03 \
|
29 |
-
--lr_scheduler_type "cosine" \
|
30 |
-
--logging_steps 1 \
|
31 |
-
--tf32 True \
|
32 |
-
--model_max_length 2048 \
|
33 |
-
--gradient_checkpointing True \
|
34 |
-
--dataloader_num_workers 4 \
|
35 |
-
--lazy_preprocess True \
|
36 |
-
--report_to wandb
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/v1_5/finetune_task_lora.sh
DELETED
@@ -1,37 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
deepspeed llava/train/train_mem.py \
|
4 |
-
--lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \
|
5 |
-
--deepspeed ./scripts/zero3.json \
|
6 |
-
--model_name_or_path liuhaotian/llava-v1.5-13b \
|
7 |
-
--version v1 \
|
8 |
-
--data_path ./playground/data/llava_v1_5_mix665k.json \
|
9 |
-
--image_folder ./playground/data \
|
10 |
-
--vision_tower openai/clip-vit-large-patch14-336 \
|
11 |
-
--mm_projector_type mlp2x_gelu \
|
12 |
-
--mm_vision_select_layer -2 \
|
13 |
-
--mm_use_im_start_end False \
|
14 |
-
--mm_use_im_patch_token False \
|
15 |
-
--image_aspect_ratio pad \
|
16 |
-
--group_by_modality_length True \
|
17 |
-
--bf16 True \
|
18 |
-
--output_dir ./checkpoints/llava-v1.5-13b-task-lora \
|
19 |
-
--num_train_epochs 1 \
|
20 |
-
--per_device_train_batch_size 16 \
|
21 |
-
--per_device_eval_batch_size 4 \
|
22 |
-
--gradient_accumulation_steps 1 \
|
23 |
-
--evaluation_strategy "no" \
|
24 |
-
--save_strategy "steps" \
|
25 |
-
--save_steps 50000 \
|
26 |
-
--save_total_limit 1 \
|
27 |
-
--learning_rate 2e-4 \
|
28 |
-
--weight_decay 0. \
|
29 |
-
--warmup_ratio 0.03 \
|
30 |
-
--lr_scheduler_type "cosine" \
|
31 |
-
--logging_steps 1 \
|
32 |
-
--tf32 True \
|
33 |
-
--model_max_length 2048 \
|
34 |
-
--gradient_checkpointing True \
|
35 |
-
--dataloader_num_workers 4 \
|
36 |
-
--lazy_preprocess True \
|
37 |
-
--report_to wandb
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/scripts/v1_5/pretrain.sh
DELETED
@@ -1,35 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
deepspeed llava/train/train_mem.py \
|
4 |
-
--deepspeed ./scripts/zero2.json \
|
5 |
-
--model_name_or_path lmsys/vicuna-13b-v1.5 \
|
6 |
-
--version plain \
|
7 |
-
--data_path ./playground/data/LLaVA-Pretrain/blip_laion_cc_sbu_558k.json \
|
8 |
-
--image_folder ./playground/data/LLaVA-Pretrain/images \
|
9 |
-
--vision_tower openai/clip-vit-large-patch14-336 \
|
10 |
-
--mm_projector_type mlp2x_gelu \
|
11 |
-
--tune_mm_mlp_adapter True \
|
12 |
-
--mm_vision_select_layer -2 \
|
13 |
-
--mm_use_im_start_end False \
|
14 |
-
--mm_use_im_patch_token False \
|
15 |
-
--bf16 True \
|
16 |
-
--output_dir ./checkpoints/llava-v1.5-13b-pretrain \
|
17 |
-
--num_train_epochs 1 \
|
18 |
-
--per_device_train_batch_size 32 \
|
19 |
-
--per_device_eval_batch_size 4 \
|
20 |
-
--gradient_accumulation_steps 1 \
|
21 |
-
--evaluation_strategy "no" \
|
22 |
-
--save_strategy "steps" \
|
23 |
-
--save_steps 24000 \
|
24 |
-
--save_total_limit 1 \
|
25 |
-
--learning_rate 1e-3 \
|
26 |
-
--weight_decay 0. \
|
27 |
-
--warmup_ratio 0.03 \
|
28 |
-
--lr_scheduler_type "cosine" \
|
29 |
-
--logging_steps 1 \
|
30 |
-
--tf32 True \
|
31 |
-
--model_max_length 2048 \
|
32 |
-
--gradient_checkpointing True \
|
33 |
-
--dataloader_num_workers 4 \
|
34 |
-
--lazy_preprocess True \
|
35 |
-
--report_to wandb
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/slurm_config.conf
DELETED
@@ -1,60 +0,0 @@
|
|
1 |
-
#!/bin/sh
|
2 |
-
|
3 |
-
#SBATCH --job-name=radialog
|
4 |
-
#SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files
|
5 |
-
#SBATCH --error=oracle-%A.err # Standard error of the script
|
6 |
-
#SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds)
|
7 |
-
#SBATCH --gres=gpu:1 # Number of GPUs if needed
|
8 |
-
#SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU)
|
9 |
-
#SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128?
|
10 |
-
|
11 |
-
# activate corresponding environment
|
12 |
-
# conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal
|
13 |
-
source ~/miniconda3/etc/profile.d/conda.sh
|
14 |
-
conda activate llava_raddialog
|
15 |
-
# FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES.
|
16 |
-
# POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED
|
17 |
-
|
18 |
-
export GPUS_PER_NODE=1
|
19 |
-
#export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups
|
20 |
-
#export MASTER_PORT=9901
|
21 |
-
export MASTER_ADDR=$(hostname)
|
22 |
-
export MASTER_PORT=29719
|
23 |
-
|
24 |
-
srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \
|
25 |
-
--lora_enable True --bits 4 --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-4 \
|
26 |
-
--deepspeed ./scripts/zero2.json \
|
27 |
-
--model_name_or_path liuhaotian/llava-v1.5-7b \
|
28 |
-
--version v1 \
|
29 |
-
--data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/mimic_cxr_instruct_llava.json \
|
30 |
-
--image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \
|
31 |
-
--vision_tower openai/clip-vit-large-patch14-336 \
|
32 |
-
--mm_projector_type mlp2x_gelu \
|
33 |
-
--mm_vision_select_layer -2 \
|
34 |
-
--mm_use_im_start_end False \
|
35 |
-
--mm_use_im_patch_token False \
|
36 |
-
--image_aspect_ratio pad \
|
37 |
-
--group_by_modality_length True \
|
38 |
-
--bf16 True \
|
39 |
-
--output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_cosine_llava_unfreeze \
|
40 |
-
--num_train_epochs 1 \
|
41 |
-
--per_device_train_batch_size 16 \
|
42 |
-
--per_device_eval_batch_size 4 \
|
43 |
-
--gradient_accumulation_steps 8 \
|
44 |
-
--evaluation_strategy "no" \
|
45 |
-
--save_strategy "steps" \
|
46 |
-
--save_steps 500 \
|
47 |
-
--learning_rate 2e-4 \
|
48 |
-
--max_grad_norm 0.1 \
|
49 |
-
--weight_decay 0. \
|
50 |
-
--warmup_ratio 0.03 \
|
51 |
-
--lr_scheduler_type "cosine" \
|
52 |
-
--logging_steps 1 \
|
53 |
-
--tf32 True \
|
54 |
-
--model_max_length 1300 \
|
55 |
-
--gradient_checkpointing True \
|
56 |
-
--dataloader_num_workers 4 \
|
57 |
-
--lazy_preprocess True \
|
58 |
-
--report_to wandb \
|
59 |
-
--run_name llava-v1.5-7b-task-lora_radialog_cosine_llava_unfreeze \
|
60 |
-
--unfreeze_n_vision_tower_layers 12
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/slurm_config_biovil_frozen.conf
DELETED
@@ -1,60 +0,0 @@
|
|
1 |
-
#!/bin/sh
|
2 |
-
|
3 |
-
#SBATCH --job-name=ins_v4_frozen
|
4 |
-
#SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files
|
5 |
-
#SBATCH --error=oracle-%A.err # Standard error of the script
|
6 |
-
#SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds)
|
7 |
-
#SBATCH --gres=gpu:1 # Number of GPUs if needed
|
8 |
-
#SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU)
|
9 |
-
#SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128?
|
10 |
-
# activate corresponding environment
|
11 |
-
# conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal
|
12 |
-
source ~/miniconda3/etc/profile.d/conda.sh
|
13 |
-
conda activate llava_raddialog
|
14 |
-
# FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES.
|
15 |
-
# POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED
|
16 |
-
|
17 |
-
export PYTHONPATH="/home/guests/chantal_pellegrini/RaDialog_LLaVA:$PYTHONPATH"
|
18 |
-
|
19 |
-
export GPUS_PER_NODE=1
|
20 |
-
#export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups
|
21 |
-
#export MASTER_PORT=9901
|
22 |
-
export MASTER_ADDR=$(hostname)
|
23 |
-
export MASTER_PORT=29719
|
24 |
-
|
25 |
-
srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \
|
26 |
-
--lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \
|
27 |
-
--deepspeed ./scripts/zero2.json \
|
28 |
-
--model_name_or_path liuhaotian/llava-v1.5-7b \
|
29 |
-
--version v1 \
|
30 |
-
--data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/mimic_cxr_instruct_llava_v4_sentenized.json \
|
31 |
-
--image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \
|
32 |
-
--vision_tower biovil \
|
33 |
-
--mm_projector_type mlp2x_gelu \
|
34 |
-
--mm_vision_select_layer -2 \
|
35 |
-
--mm_use_im_start_end False \
|
36 |
-
--mm_use_im_patch_token False \
|
37 |
-
--image_aspect_ratio pad \
|
38 |
-
--group_by_modality_length True \
|
39 |
-
--bf16 True \
|
40 |
-
--output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_instruct_llava_biovil_frozen_2e-5_5epochs_v4_sentenized \
|
41 |
-
--num_train_epochs 5 \
|
42 |
-
--per_device_train_batch_size 2 \
|
43 |
-
--per_device_eval_batch_size 4 \
|
44 |
-
--gradient_accumulation_steps 64 \
|
45 |
-
--evaluation_strategy "no" \
|
46 |
-
--save_strategy "steps" \
|
47 |
-
--save_steps 1500 \
|
48 |
-
--learning_rate 2e-5 \
|
49 |
-
--max_grad_norm 0.1 \
|
50 |
-
--weight_decay 0. \
|
51 |
-
--warmup_ratio 0.03 \
|
52 |
-
--lr_scheduler_type "cosine" \
|
53 |
-
--logging_steps 1 \
|
54 |
-
--tf32 True \
|
55 |
-
--model_max_length 1300 \
|
56 |
-
--gradient_checkpointing False \
|
57 |
-
--dataloader_num_workers 4 \
|
58 |
-
--lazy_preprocess True \
|
59 |
-
--report_to wandb \
|
60 |
-
--run_name llava-v1.5-7b-task-lora_radialog_instruct_llava_biovil_frozen_2e-5_5epochs_v4_sentenized
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/slurm_config_biovil_frozen_v5.conf
DELETED
@@ -1,60 +0,0 @@
|
|
1 |
-
#!/bin/sh
|
2 |
-
|
3 |
-
#SBATCH --job-name=ins_v5_frozen
|
4 |
-
#SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files
|
5 |
-
#SBATCH --error=oracle-%A.err # Standard error of the script
|
6 |
-
#SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds)
|
7 |
-
#SBATCH --gres=gpu:1 # Number of GPUs if needed
|
8 |
-
#SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU)
|
9 |
-
#SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128?
|
10 |
-
# activate corresponding environment
|
11 |
-
# conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal
|
12 |
-
source ~/miniconda3/etc/profile.d/conda.sh
|
13 |
-
conda activate llava_raddialog
|
14 |
-
# FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES.
|
15 |
-
# POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED
|
16 |
-
|
17 |
-
export PYTHONPATH="/home/guests/chantal_pellegrini/RaDialog_LLaVA:$PYTHONPATH"
|
18 |
-
|
19 |
-
export GPUS_PER_NODE=1
|
20 |
-
#export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups
|
21 |
-
#export MASTER_PORT=9901
|
22 |
-
export MASTER_ADDR=$(hostname)
|
23 |
-
export MASTER_PORT=29711
|
24 |
-
|
25 |
-
srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \
|
26 |
-
--lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \
|
27 |
-
--deepspeed ./scripts/zero2.json \
|
28 |
-
--model_name_or_path liuhaotian/llava-v1.5-7b \
|
29 |
-
--version v1 \
|
30 |
-
--data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/mimic_cxr_instruct_llava_v5.json \
|
31 |
-
--image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \
|
32 |
-
--vision_tower biovil \
|
33 |
-
--mm_projector_type mlp2x_gelu \
|
34 |
-
--mm_vision_select_layer -2 \
|
35 |
-
--mm_use_im_start_end False \
|
36 |
-
--mm_use_im_patch_token False \
|
37 |
-
--image_aspect_ratio pad \
|
38 |
-
--group_by_modality_length True \
|
39 |
-
--bf16 True \
|
40 |
-
--output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_instruct_llava_biovil_frozen_2e-5_5epochs_v5 \
|
41 |
-
--num_train_epochs 5 \
|
42 |
-
--per_device_train_batch_size 2 \
|
43 |
-
--per_device_eval_batch_size 4 \
|
44 |
-
--gradient_accumulation_steps 64 \
|
45 |
-
--evaluation_strategy "no" \
|
46 |
-
--save_strategy "steps" \
|
47 |
-
--save_steps 1500 \
|
48 |
-
--learning_rate 2e-5 \
|
49 |
-
--max_grad_norm 0.1 \
|
50 |
-
--weight_decay 0. \
|
51 |
-
--warmup_ratio 0.03 \
|
52 |
-
--lr_scheduler_type "cosine" \
|
53 |
-
--logging_steps 1 \
|
54 |
-
--tf32 True \
|
55 |
-
--model_max_length 1300 \
|
56 |
-
--gradient_checkpointing False \
|
57 |
-
--dataloader_num_workers 4 \
|
58 |
-
--lazy_preprocess True \
|
59 |
-
--report_to wandb \
|
60 |
-
--run_name llava-v1.5-7b-task-lora_radialog_instruct_llava_biovil_frozen_2e-5_5epochs_v5
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/slurm_config_biovil_unfrozen.conf
DELETED
@@ -1,61 +0,0 @@
|
|
1 |
-
#!/bin/sh
|
2 |
-
|
3 |
-
#SBATCH --job-name=ins_v4_unfrozen
|
4 |
-
#SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files
|
5 |
-
#SBATCH --error=oracle-%A.err # Standard error of the script
|
6 |
-
#SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds)
|
7 |
-
#SBATCH --gres=gpu:1 # Number of GPUs if needed
|
8 |
-
#SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU)
|
9 |
-
#SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128?
|
10 |
-
# activate corresponding environment
|
11 |
-
# conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal
|
12 |
-
source ~/miniconda3/etc/profile.d/conda.sh
|
13 |
-
conda activate llava_raddialog
|
14 |
-
# FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES.
|
15 |
-
# POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED
|
16 |
-
|
17 |
-
export PYTHONPATH="/home/guests/chantal_pellegrini/RaDialog_LLaVA:$PYTHONPATH"
|
18 |
-
|
19 |
-
export GPUS_PER_NODE=1
|
20 |
-
#export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups
|
21 |
-
#export MASTER_PORT=9901
|
22 |
-
export MASTER_ADDR=$(hostname)
|
23 |
-
export MASTER_PORT=29718
|
24 |
-
|
25 |
-
srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \
|
26 |
-
--lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \
|
27 |
-
--deepspeed ./scripts/zero2.json \
|
28 |
-
--model_name_or_path liuhaotian/llava-v1.5-7b \
|
29 |
-
--version v1 \
|
30 |
-
--data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/mimic_cxr_instruct_llava_v4_sentenized.json \
|
31 |
-
--image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \
|
32 |
-
--vision_tower biovil \
|
33 |
-
--mm_projector_type mlp2x_gelu \
|
34 |
-
--mm_vision_select_layer -2 \
|
35 |
-
--mm_use_im_start_end False \
|
36 |
-
--mm_use_im_patch_token False \
|
37 |
-
--image_aspect_ratio pad \
|
38 |
-
--group_by_modality_length True \
|
39 |
-
--bf16 True \
|
40 |
-
--output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_instruct_llava_biovil_unfrozen_2e-5_5epochs_v4_sentenized \
|
41 |
-
--num_train_epochs 5 \
|
42 |
-
--per_device_train_batch_size 2 \
|
43 |
-
--per_device_eval_batch_size 4 \
|
44 |
-
--gradient_accumulation_steps 64 \
|
45 |
-
--evaluation_strategy "no" \
|
46 |
-
--save_strategy "steps" \
|
47 |
-
--save_steps 1500 \
|
48 |
-
--learning_rate 2e-5 \
|
49 |
-
--max_grad_norm 0.1 \
|
50 |
-
--weight_decay 0. \
|
51 |
-
--warmup_ratio 0.03 \
|
52 |
-
--lr_scheduler_type "cosine" \
|
53 |
-
--logging_steps 1 \
|
54 |
-
--tf32 True \
|
55 |
-
--model_max_length 1300 \
|
56 |
-
--gradient_checkpointing False \
|
57 |
-
--dataloader_num_workers 4 \
|
58 |
-
--lazy_preprocess True \
|
59 |
-
--report_to wandb \
|
60 |
-
--run_name llava-v1.5-7b-task-lora_radialog_instruct_llava_biovil_unfrozen_2e-5_5epochs_v4_sentenized \
|
61 |
-
--unfreeze_n_vision_tower_layers 12
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/slurm_config_biovil_unfrozen_v5.conf
DELETED
@@ -1,61 +0,0 @@
|
|
1 |
-
#!/bin/sh
|
2 |
-
|
3 |
-
#SBATCH --job-name=ins_v5_unfrozen
|
4 |
-
#SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files
|
5 |
-
#SBATCH --error=oracle-%A.err # Standard error of the script
|
6 |
-
#SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds)
|
7 |
-
#SBATCH --gres=gpu:1 # Number of GPUs if needed
|
8 |
-
#SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU)
|
9 |
-
#SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128?
|
10 |
-
# activate corresponding environment
|
11 |
-
# conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal
|
12 |
-
source ~/miniconda3/etc/profile.d/conda.sh
|
13 |
-
conda activate llava_raddialog
|
14 |
-
# FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES.
|
15 |
-
# POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED
|
16 |
-
|
17 |
-
export PYTHONPATH="/home/guests/chantal_pellegrini/RaDialog_LLaVA:$PYTHONPATH"
|
18 |
-
|
19 |
-
export GPUS_PER_NODE=1
|
20 |
-
#export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups
|
21 |
-
#export MASTER_PORT=9901
|
22 |
-
export MASTER_ADDR=$(hostname)
|
23 |
-
export MASTER_PORT=29712
|
24 |
-
|
25 |
-
srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \
|
26 |
-
--lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \
|
27 |
-
--deepspeed ./scripts/zero2.json \
|
28 |
-
--model_name_or_path liuhaotian/llava-v1.5-7b \
|
29 |
-
--version v1 \
|
30 |
-
--data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/mimic_cxr_instruct_llava_v5.json \
|
31 |
-
--image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \
|
32 |
-
--vision_tower biovil \
|
33 |
-
--mm_projector_type mlp2x_gelu \
|
34 |
-
--mm_vision_select_layer -2 \
|
35 |
-
--mm_use_im_start_end False \
|
36 |
-
--mm_use_im_patch_token False \
|
37 |
-
--image_aspect_ratio pad \
|
38 |
-
--group_by_modality_length True \
|
39 |
-
--bf16 True \
|
40 |
-
--output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_instruct_llava_biovil_unfrozen_2e-5_5epochs_v5 \
|
41 |
-
--num_train_epochs 5 \
|
42 |
-
--per_device_train_batch_size 2 \
|
43 |
-
--per_device_eval_batch_size 4 \
|
44 |
-
--gradient_accumulation_steps 64 \
|
45 |
-
--evaluation_strategy "no" \
|
46 |
-
--save_strategy "steps" \
|
47 |
-
--save_steps 1500 \
|
48 |
-
--learning_rate 2e-5 \
|
49 |
-
--max_grad_norm 0.1 \
|
50 |
-
--weight_decay 0. \
|
51 |
-
--warmup_ratio 0.03 \
|
52 |
-
--lr_scheduler_type "cosine" \
|
53 |
-
--logging_steps 1 \
|
54 |
-
--tf32 True \
|
55 |
-
--model_max_length 1300 \
|
56 |
-
--gradient_checkpointing False \
|
57 |
-
--dataloader_num_workers 4 \
|
58 |
-
--lazy_preprocess True \
|
59 |
-
--report_to wandb \
|
60 |
-
--run_name llava-v1.5-7b-task-lora_radialog_instruct_llava_biovil_unfrozen_2e-5_5epochs_v5 \
|
61 |
-
--unfreeze_n_vision_tower_layers 12
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/slurm_config_llavamed.conf
DELETED
@@ -1,61 +0,0 @@
|
|
1 |
-
#!/bin/sh
|
2 |
-
|
3 |
-
#SBATCH --job-name=rd_llavamed
|
4 |
-
#SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files
|
5 |
-
#SBATCH --error=oracle-%A.err # Standard error of the script
|
6 |
-
#SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds)
|
7 |
-
#SBATCH --gres=gpu:1 # Number of GPUs if needed
|
8 |
-
#SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU)
|
9 |
-
#SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128?
|
10 |
-
|
11 |
-
# activate corresponding environment
|
12 |
-
# conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal
|
13 |
-
source ~/miniconda3/etc/profile.d/conda.sh
|
14 |
-
conda activate llava_raddialog
|
15 |
-
# FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES.
|
16 |
-
# POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED
|
17 |
-
|
18 |
-
export PYTHONPATH="/home/guests/chantal_pellegrini/RaDialog_LLaVA:$PYTHONPATH"
|
19 |
-
|
20 |
-
export GPUS_PER_NODE=1
|
21 |
-
#export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups
|
22 |
-
#export MASTER_PORT=9901
|
23 |
-
export MASTER_ADDR=$(hostname)
|
24 |
-
export MASTER_PORT=29719
|
25 |
-
|
26 |
-
srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \
|
27 |
-
--lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-4 \
|
28 |
-
--deepspeed ./scripts/zero2.json \
|
29 |
-
--model_name_or_path /home/guests/shared/LLaMA/7B_LLaVAMed \
|
30 |
-
--version llava_med \
|
31 |
-
--data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/mimic_cxr_instruct_llava.json \
|
32 |
-
--image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \
|
33 |
-
--vision_tower biovil \
|
34 |
-
--mm_projector_type mlp2x_gelu \
|
35 |
-
--mm_vision_select_layer -2 \
|
36 |
-
--mm_use_im_start_end True \
|
37 |
-
--mm_use_im_patch_token True \
|
38 |
-
--image_aspect_ratio square \
|
39 |
-
--group_by_modality_length False \
|
40 |
-
--bf16 True \
|
41 |
-
--output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_cosine_llavamed_biovil \
|
42 |
-
--num_train_epochs 1 \
|
43 |
-
--per_device_train_batch_size 2 \
|
44 |
-
--per_device_eval_batch_size 4 \
|
45 |
-
--gradient_accumulation_steps 64 \
|
46 |
-
--evaluation_strategy "no" \
|
47 |
-
--save_strategy "steps" \
|
48 |
-
--save_steps 500 \
|
49 |
-
--learning_rate 2e-4 \
|
50 |
-
--max_grad_norm 0.1 \
|
51 |
-
--weight_decay 0. \
|
52 |
-
--warmup_ratio 0.03 \
|
53 |
-
--lr_scheduler_type "cosine" \
|
54 |
-
--logging_steps 1 \
|
55 |
-
--tf32 True \
|
56 |
-
--model_max_length 1300 \
|
57 |
-
--gradient_checkpointing False \
|
58 |
-
--dataloader_num_workers 4 \
|
59 |
-
--lazy_preprocess True \
|
60 |
-
--report_to wandb \
|
61 |
-
--run_name llava-v1.5-7b-task-lora_radialog_cosine_llavamed_biovil
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/slurm_config_ms_cxr_t.conf
DELETED
@@ -1,61 +0,0 @@
|
|
1 |
-
#!/bin/sh
|
2 |
-
|
3 |
-
#SBATCH --job-name=cxrt_concat
|
4 |
-
#SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files
|
5 |
-
#SBATCH --error=oracle-%A.err # Standard error of the script
|
6 |
-
#SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds)
|
7 |
-
#SBATCH --gres=gpu:1 # Number of GPUs if needed
|
8 |
-
#SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU)
|
9 |
-
#SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128?
|
10 |
-
|
11 |
-
# activate corresponding environment
|
12 |
-
# conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal
|
13 |
-
source ~/miniconda3/etc/profile.d/conda.sh
|
14 |
-
conda activate llava_raddialog
|
15 |
-
# FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES.
|
16 |
-
# POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED
|
17 |
-
export PYTHONPATH="/home/guests/chantal_pellegrini/RaDialog_LLaVA:$PYTHONPATH"
|
18 |
-
|
19 |
-
export GPUS_PER_NODE=1
|
20 |
-
#export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups
|
21 |
-
#export MASTER_PORT=9901
|
22 |
-
export MASTER_ADDR=$(hostname)
|
23 |
-
export MASTER_PORT=29715
|
24 |
-
|
25 |
-
srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \
|
26 |
-
--lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \
|
27 |
-
--deepspeed ./scripts/zero2.json \
|
28 |
-
--model_name_or_path liuhaotian/llava-v1.5-7b \
|
29 |
-
--version v1 \
|
30 |
-
--data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/ms_cxr_t_llava.json \
|
31 |
-
--image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \
|
32 |
-
--vision_tower biovil \
|
33 |
-
--mm_projector_type mlp2x_gelu \
|
34 |
-
--mm_vision_select_layer -2 \
|
35 |
-
--mm_use_im_start_end False \
|
36 |
-
--mm_use_im_patch_token False \
|
37 |
-
--image_aspect_ratio pad \
|
38 |
-
--group_by_modality_length True \
|
39 |
-
--bf16 True \
|
40 |
-
--output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_cxr_t_frozen_pool_concat \
|
41 |
-
--num_train_epochs 10 \
|
42 |
-
--per_device_train_batch_size 2 \
|
43 |
-
--per_device_eval_batch_size 4 \
|
44 |
-
--gradient_accumulation_steps 64 \
|
45 |
-
--evaluation_strategy "no" \
|
46 |
-
--save_strategy "epoch" \
|
47 |
-
--save_steps 500 \
|
48 |
-
--learning_rate 2e-5 \
|
49 |
-
--max_grad_norm 0.1 \
|
50 |
-
--weight_decay 0. \
|
51 |
-
--warmup_ratio 0.03 \
|
52 |
-
--lr_scheduler_type "cosine" \
|
53 |
-
--logging_steps 1 \
|
54 |
-
--tf32 True \
|
55 |
-
--model_max_length 550 \
|
56 |
-
--gradient_checkpointing False \
|
57 |
-
--dataloader_num_workers 4 \
|
58 |
-
--lazy_preprocess True \
|
59 |
-
--report_to wandb \
|
60 |
-
--run_name llava-v1.5-7b-task-lora_radialog_cxr_t_frozen_pool_concat \
|
61 |
-
--mv_type "pool_concat"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLAVA_Biovil/slurm_config_pretrain.conf
DELETED
@@ -1,61 +0,0 @@
|
|
1 |
-
#!/bin/sh
|
2 |
-
|
3 |
-
#SBATCH --job-name=oracle
|
4 |
-
#SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files
|
5 |
-
#SBATCH --error=oracle-%A.err # Standard error of the script
|
6 |
-
#SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds)
|
7 |
-
#SBATCH --gres=gpu:1 # Number of GPUs if needed
|
8 |
-
#SBATCH --cpus-per-task=4 # Number of CPUs (Don't use more than 24 per GPU)
|
9 |
-
#SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128?
|
10 |
-
|
11 |
-
# activate corresponding environment
|
12 |
-
# conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal
|
13 |
-
source ~/miniconda3/etc/profile.d/conda.sh
|
14 |
-
conda activate oracle
|
15 |
-
# FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES.
|
16 |
-
# POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED
|
17 |
-
|
18 |
-
export GPUS_PER_NODE=1
|
19 |
-
#export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups
|
20 |
-
#export MASTER_PORT=9901
|
21 |
-
export MASTER_ADDR=$(hostname)
|
22 |
-
export MASTER_PORT=29508
|
23 |
-
|
24 |
-
|
25 |
-
srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \
|
26 |
-
--deepspeed ./scripts/zero2.json \
|
27 |
-
--model_name_or_path liuhaotian/llava-v1.5-7b \
|
28 |
-
--version v1 \
|
29 |
-
--data_path /home/guests/ege_oezsoy/Oracle/data/llava_samples/train.json \
|
30 |
-
--image_folder / \
|
31 |
-
--vision_tower openai/clip-vit-large-patch14-336 \
|
32 |
-
--mm_projector_type mlp2x_gelu \
|
33 |
-
--tune_mm_mlp_adapter True \
|
34 |
-
--mm_vision_select_layer -2 \
|
35 |
-
--mm_use_im_start_end False \
|
36 |
-
--mm_use_im_patch_token False \
|
37 |
-
--image_aspect_ratio pad \
|
38 |
-
--group_by_modality_length True \
|
39 |
-
--bf16 True \
|
40 |
-
--output_dir ./checkpoints/llava-v1.5-7b-task-4dor_pretrain_linear_weighting \
|
41 |
-
--num_train_epochs 50 \
|
42 |
-
--per_device_train_batch_size 16 \
|
43 |
-
--per_device_eval_batch_size 4 \
|
44 |
-
--gradient_accumulation_steps 1 \
|
45 |
-
--evaluation_strategy "no" \
|
46 |
-
--save_strategy "epoch" \
|
47 |
-
--save_steps 10 \
|
48 |
-
--save_total_limit 1 \
|
49 |
-
--learning_rate 2e-5 \
|
50 |
-
--max_grad_norm 0.1 \
|
51 |
-
--weight_decay 0. \
|
52 |
-
--warmup_ratio 0.03 \
|
53 |
-
--lr_scheduler_type "cosine" \
|
54 |
-
--logging_steps 1 \
|
55 |
-
--tf32 True \
|
56 |
-
--model_max_length 2048 \
|
57 |
-
--gradient_checkpointing True \
|
58 |
-
--dataloader_num_workers 4 \
|
59 |
-
--lazy_preprocess True \
|
60 |
-
--report_to wandb \
|
61 |
-
--run_name llava-v1.5-7b-task-4dor_pretrain_linear_weighting
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|