lyclyc52 commited on
Commit
8f5852f
·
1 Parent(s): 48ca1e2

Update: add training and testing scripts

Browse files
scripts/finetune/hf_download.sh ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ while :
2
+ do
3
+ python debug_code/test_llavanext.py
4
+ done
scripts/finetune/test_llava.sh ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ accelerate launch --config_file ./hf_config/single_gpu_config.yml \
2
+ run_finetune_llava.py \
3
+ --test \
4
+ --checkpoint_path ./model_ckpt/llava3_mix_instr/checkpoints/checkpoint_00003 \
scripts/finetune/train_llava.sh ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ accelerate launch --config_file /mnt1/lyc/LLaVA-NeXT/multi_gpu_config.yml \
2
+ finetune_llava.py \
3
+ --wandb \
scripts/interleave/eval_all.sh ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+
2
+ # evaluate
3
+ ./scripts/interleave/eval_interleave_3d.sh /path/to/ckpt /path/to/images multi_image_in_domain
4
+ ./scripts/interleave/eval_interleave_3d.sh /path/to/ckpt /path/to/images multi_image_out_domain
5
+ ./scripts/interleave/eval_interleave_3d.sh /path/to/ckpt /path/to/images multi_view_in_domain
scripts/interleave/eval_interleave_3d.sh ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ alias python=python3
2
+ CKPT_PATH=$1
3
+ NAME=$(echo "$CKPT_PATH" | awk -F'/' '{print $NF}')
4
+ echo $NAME
5
+ ##### set images path
6
+ DATA_PATH=$2
7
+ EVAL_TYPE=$3
8
+ JSON_PATH=$2/$3.json
9
+ ############################### eval multi-image
10
+ RESULT_NAME="logs/${NAME}/${EVAL_TYPE}"
11
+ echo $RESULT_NAME
12
+
13
+ mkdir -p logs/${NAME}
14
+
15
+ file_path=${RESULT_NAME}/result.jsonl
16
+
17
+ bash scripts/interleave/eval_multiprocess.sh \
18
+ ${CKPT_PATH} \
19
+ ${JSON_PATH} \
20
+ ${RESULT_NAME} \
21
+ ${DATA_PATH} \
22
+ "" \
23
+ 8 0
24
+
25
+ python3 llava/eval/evaluate_interleave.py --result-dir ${RESULT_NAME}
26
+
27
+
28
+
scripts/interleave/eval_multiprocess.sh ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Check if three arguments are passed
4
+ if [ "$#" -ne 7 ]; then
5
+ echo "Usage: $0 <model_path> <question_path> <base_answer_path> <image_folder> <extra_prompt> <N> <temperature>"
6
+ exit 1
7
+ fi
8
+
9
+ # Assign the command line arguments to variables
10
+ model_path=$1
11
+ question_path=$2
12
+ base_answer_path=$3
13
+ image_folder=$4
14
+ extra_prompt=$5
15
+ N=$6
16
+ temperature=$7
17
+
18
+ # Loop over each chunk/process
19
+ for (( chunk_id=0; chunk_id<N; chunk_id++ ))
20
+ do
21
+ # Define the answer path for each chunk
22
+ answer_path="${base_answer_path}/result_${chunk_id}.jsonl"
23
+ if [ -f "$answer_path" ]; then
24
+ rm "$answer_path"
25
+ fi
26
+ # Run the Python program in the background
27
+ CUDA_VISIBLE_DEVICES="$chunk_id" python3 llava/eval/model_vqa.py --model-path "$model_path" --question-file "$question_path" --answers-file "$answer_path" --num-chunks "$N" --chunk-idx "$chunk_id" --image-folder "$image_folder" --extra-prompt "$extra_prompt" --temperature "$temperature" &
28
+
29
+ # Uncomment below if you need a slight delay between starting each process
30
+ # sleep 0.1
31
+ done
32
+
33
+ # Wait for all background processes to finish
34
+ wait
35
+
36
+ merged_file="${base_answer_path}/result.jsonl"
37
+ if [ -f "$merged_file" ]; then
38
+ rm "$merged_file"
39
+ fi
40
+ # Merge all the JSONL files into one
41
+ #cat "${base_answer_path}"_*.jsonl > "${base_answer_path}.jsonl"
42
+ for ((i=0; i<N; i++)); do
43
+ input_file="${base_answer_path}/result_${i}.jsonl"
44
+ cat "$input_file" >> "${base_answer_path}/result.jsonl"
45
+ done
46
+ # remove the unmerged files
47
+ for (( chunk_id=0; chunk_id<N; chunk_id++ ))
48
+ do
49
+ # Define the answer path for each chunk
50
+ answer_path="${base_answer_path}/result_${chunk_id}.jsonl"
51
+ if [ -f "$answer_path" ]; then
52
+ rm "$answer_path"
53
+ fi
54
+ done
scripts/video/demo/video_demo.sh ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ ROOT_DIR="/mnt/bn/vl-research/workspace/yhzhang/LLaVA-NeXT"
3
+
4
+ if [ ! -e $ROOT_DIR ]; then
5
+ echo "The root dir does not exist. Exiting the script."
6
+ exit 1
7
+ fi
8
+
9
+ cd $ROOT_DIR
10
+
11
+ export PYTHONWARNINGS=ignore
12
+ export TOKENIZERS_PARALLELISM=false
13
+
14
+ CKPT=$1
15
+ CONV_MODE=$2
16
+ FRAMES=$3
17
+ POOL_STRIDE=$4
18
+ OVERWRITE=$5
19
+ VIDEO_PATH=$6
20
+
21
+
22
+ if [ "$OVERWRITE" = False ]; then
23
+ SAVE_DIR=$(basename $CKPT)_${CONV_MODE}_frames_${FRAMES}_stride_${POOL_STRIDE}_overwrite_${OVERWRITE}
24
+
25
+ else
26
+ SAVE_DIR=$(basename $CKPT)_${CONV_MODE}_frames_${FRAMES}_stride_${POOL_STRIDE}
27
+ fi
28
+
29
+ python3 playground/demo/video_demo.py \
30
+ --model-path $CKPT \
31
+ --video_path ${VIDEO_PATH} \
32
+ --output_dir ./work_dirs/video_demo/$SAVE_DIR \
33
+ --output_name pred \
34
+ --chunk-idx $(($IDX - 1)) \
35
+ --overwrite ${OVERWRITE} \
36
+ --mm_spatial_pool_stride ${POOL_STRIDE:-4} \
37
+ --for_get_frames_num $FRAMES \
38
+ --conv-mode $CONV_MODE
39
+
40
+
scripts/video/eval/activitynet_eval.sh ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ ROOT_DIR="root to LLaVA-NeXT-Video"
3
+
4
+ if [ ! -e $ROOT_DIR ]; then
5
+ echo "The root dir does not exist. Exiting the script."
6
+ exit 1
7
+ fi
8
+
9
+ cd $ROOT_DIR
10
+
11
+ export PYTHONWARNINGS=ignore
12
+ export TOKENIZERS_PARALLELISM=false
13
+ CUDA_VISIBLE_DEVICES='0,1,2,3,4,5,6,7'
14
+ gpu_list="${CUDA_VISIBLE_DEVICES}"
15
+ GPULIST=(${(s:,:)gpu_list})
16
+
17
+ CHUNKS=${#GPULIST[@]}
18
+ echo "Using $CHUNKS GPUs"
19
+
20
+ CKPT=$1
21
+ CONV_MODE=$2
22
+ FRAMES=$3
23
+ OVERWRITE=$4
24
+ PREDEFINED_CONFIGURE=$5
25
+ mm_spatial_pool_stride=$6
26
+ MODEL_MAX_LENGTH=${7:-0}
27
+
28
+ CKPT=$1
29
+ CONV_MODE=$2
30
+ FRAMES=$3
31
+ POOL_STRIDE=$4
32
+ OVERWRITE=$5
33
+ CHUNKS=${6:-1}
34
+
35
+ PATCHIFY=False
36
+
37
+
38
+ OPENAIKEY="INPUT YOUR OPENAI API"
39
+
40
+
41
+ if [ "$OVERWRITE" = False ]; then
42
+ SAVE_DIR=$(basename $CKPT)_${CONV_MODE}_frames_${FRAMES}_stride_${POOL_STRIDE}_overwrite_${OVERWRITE}
43
+
44
+ else
45
+ SAVE_DIR=$(basename $CKPT)_${CONV_MODE}_frames_${FRAMES}_stride_${POOL_STRIDE}
46
+ fi
47
+
48
+ echo $SAVE_DIR
49
+
50
+ # for IDX in {1..$CHUNKS}; do
51
+ # GPU_ID=${GPULIST[$IDX]} # Note: Zsh arrays are 1-indexed by default
52
+
53
+ # # GPU_FREE=0
54
+ # # while [ $GPU_FREE -eq 0 ]; do
55
+ # # # Using nvidia-smi to get the memory usage of the GPU with ID $GPU_ID
56
+ # # # Parsing the output to extract the memory usage, and checking if it is "0"
57
+ # # MEM_USAGE=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i $GPU_ID | tr -d '[:space:]')
58
+
59
+ # # if [ "$MEM_USAGE" -eq 0 ]; then
60
+ # # GPU_FREE=1
61
+ # # echo "GPU $GPU_ID is free."
62
+ # # else
63
+ # # echo "GPU $GPU_ID is in use. Memory used: ${MEM_USAGE}MiB. Checking again in 100 seconds..."
64
+ # # sleep 100
65
+ # # fi
66
+ # # done
67
+
68
+ # echo "Running on GPU $GPU_ID"
69
+ # CUDA_VISIBLE_DEVICES=$GPU_ID python3 llavavid/eval/model_activitynet_qa.py \
70
+ # --model-path $CKPT \
71
+ # --video_dir ./data/llava_video/ActivityNet-QA/all_test \
72
+ # --gt_file_question ./data/llava_video/ActivityNet-QA/test_q.json \
73
+ # --gt_file_answers ./data/llava_videoActivityNet-QA/test_a.json \
74
+ # --output_dir ./work_dirs/eval_activitynet/$SAVE_DIR \
75
+ # --output_name pred \
76
+ # --num-chunks $CHUNKS \
77
+ # --chunk-idx $(($IDX - 1)) \
78
+ # --overwrite ${OVERWRITE} \
79
+ # --patchify_video_feature ${PATCHIFY} \
80
+ # --predefined_configure ${PREDEFINED_CONFIGURE} \
81
+ # --mm_spatial_pool_stride ${mm_spatial_pool_stride:-4} \
82
+ # --for_get_frames_num $FRAMES \
83
+ # --model-max-length ${MODEL_MAX_LENGTH:-0} \
84
+ # --conv-mode $CONV_MODE &
85
+
86
+ # done
87
+
88
+ # wait
89
+
90
+ python3 llavavid/eval/eval_activitynet_qa.py \
91
+ --pred_path ./work_dirs/eval_activitynet/$SAVE_DIR \
92
+ --output_dir ./work_dirs/eval_activitynet/$SAVE_DIR/results \
93
+ --output_json ./work_dirs/eval_activitynet/$SAVE_DIR/results.json \
94
+ --num_chunks $CHUNKS \
95
+ --api_key $OPENAIKEY \
96
+ # --num_tasks 16 \
scripts/video/eval/video_chatgpt_benchmark_eval_shard.sh ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ ROOT_DIR="root to LLaVA-NeXT-Video"
3
+
4
+ if [ ! -e $ROOT_DIR ]; then
5
+ echo "The root dir does not exist. Exiting the script."
6
+ exit 1
7
+ fi
8
+
9
+ cd $ROOT_DIR
10
+
11
+ export python3WARNINGS=ignore
12
+ export TOKENIZERS_PARALLELISM=false
13
+ # CUDA_VISIBLE_DEVICES='0,1,2,3,4,5,6,7'
14
+ gpu_list="${CUDA_VISIBLE_DEVICES}"
15
+ GPULIST=(${(s:,:)gpu_list})
16
+
17
+ # CHUNKS=${#GPULIST[@]}
18
+ # echo "Using $CHUNKS GPUs"
19
+
20
+ CKPT=$1
21
+ CONV_MODE=$2
22
+ FRAMES=$3
23
+ POOL_STRIDE=$4
24
+ OVERWRITE=$5
25
+ CHUNKS=${6:-1}
26
+
27
+ OPENAIKEY="INPUT YOUR OPENAI API"
28
+
29
+ if [ "$OVERWRITE" = False ]; then
30
+ SAVE_DIR=$(basename $CKPT)_${CONV_MODE}_frames_${FRAMES}_stride_${POOL_STRIDE}_overwrite_${OVERWRITE}
31
+
32
+ else
33
+ SAVE_DIR=$(basename $CKPT)_${CONV_MODE}_frames_${FRAMES}_stride_${POOL_STRIDE}
34
+ fi
35
+
36
+ echo $SAVE_DIR
37
+
38
+ # Assuming GPULIST is a bash array containing your GPUs
39
+ GPULIST=(0 1 2 3 4 5 6 7)
40
+
41
+ # Get the number of GPUs
42
+ NUM_GPUS=${#GPULIST[@]}
43
+
44
+ # Calculate GPUs per chunk
45
+ GPUS_PER_CHUNK=$((NUM_GPUS / CHUNKS))
46
+
47
+
48
+ for IDX in $(seq 1 $CHUNKS); do
49
+ START=$(((IDX-1) * GPUS_PER_CHUNK))
50
+ LENGTH=$GPUS_PER_CHUNK # Length for slicing, not the end index
51
+
52
+ CHUNK_GPUS=(${GPULIST[@]:$START:$LENGTH})
53
+
54
+ # Convert the chunk GPUs array to a comma-separated string
55
+ CHUNK_GPUS_STR=$(IFS=,; echo "${CHUNK_GPUS[*]}")
56
+
57
+ # ALL_GPUS_FREE=0
58
+ # while [ $ALL_GPUS_FREE -eq 0 ]; do
59
+ # ALL_GPUS_FREE=1 # Assume all GPUs are free initially
60
+
61
+ # for GPU_ID in $CHUNK_GPUS; do
62
+ # MEM_USAGE=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i $GPU_ID | tr -d '[:space:]')
63
+
64
+ # # Assuming a GPU is considered free if its memory usage is less than 100 MiB
65
+ # if [ "$MEM_USAGE" -ge 100 ]; then
66
+ # ALL_GPUS_FREE=0
67
+ # echo "GPU $GPU_ID is in use. Memory used: ${MEM_USAGE}MiB."
68
+ # break # Exit the loop early as we found a GPU that is not free
69
+ # fi
70
+ # done
71
+
72
+ # if [ $ALL_GPUS_FREE -eq 0 ]; then
73
+ # echo "Not all GPUs in chunk are free. Checking again in 100 seconds..."
74
+ # sleep 100
75
+ # fi
76
+ # done
77
+
78
+ echo "CUDA_VISIBLE_DEVICES=$CHUNK_GPUS_STR"
79
+ CUDA_VISIBLE_DEVICES=$CHUNK_GPUS_STR python3 llava/eval/model_video_chatgpt_general.py \
80
+ --model-path $CKPT \
81
+ --video_dir ./data/llava_video/video-chatgpt/evaluation/Test_Videos/ \
82
+ --gt_file ./data/llava_video/video-chatgpt/evaluation/generic_qa.json \
83
+ --output_dir ./work_dirs/eval_video_chatgpt/$SAVE_DIR \
84
+ --output_name pred \
85
+ --num-chunks $CHUNKS \
86
+ --chunk-idx $(($IDX - 1)) \
87
+ --overwrite ${OVERWRITE:-true} \
88
+ --mm_spatial_pool_stride ${POOL_STRIDE:-4} \
89
+ --for_get_frames_num $FRAMES \
90
+ --conv-mode $CONV_MODE &
91
+ done
92
+
93
+ wait
94
+
95
+ python3 llava/eval/evaluate_benchmark_1_correctness.py \
96
+ --pred_path ./work_dirs/eval_video_chatgpt/$SAVE_DIR \
97
+ --output_dir ./work_dirs/eval_video_chatgpt/$SAVE_DIR/correctness_results \
98
+ --output_json ./work_dirs/eval_video_chatgpt/$SAVE_DIR/correctness_results.json \
99
+ --num_chunks $CHUNKS \
100
+ --output_name pred \
101
+ --num_tasks 16 \
102
+ --api_key $OPENAIKEY \
103
+
104
+
105
+ python3 llava/eval/evaluate_benchmark_2_detailed_orientation.py \
106
+ --pred_path ./work_dirs/eval_video_chatgpt/$SAVE_DIR \
107
+ --output_dir ./work_dirs/eval_video_chatgpt/$SAVE_DIR/detail_results \
108
+ --output_json ./work_dirs/eval_video_chatgpt/$SAVE_DIR/detail_results.json \
109
+ --num_chunks $CHUNKS \
110
+ --output_name pred \
111
+ --num_tasks 16 \
112
+ --api_key $OPENAIKEY \
113
+
114
+
115
+ python3 llava/eval/evaluate_benchmark_3_context.py \
116
+ --pred_path ./work_dirs/eval_video_chatgpt/$SAVE_DIR \
117
+ --output_dir ./work_dirs/eval_video_chatgpt/$SAVE_DIR/context_results \
118
+ --output_json ./work_dirs/eval_video_chatgpt/$SAVE_DIR/context_results.json \
119
+ --num_chunks $CHUNKS \
120
+ --output_name pred \
121
+ --num_tasks 16 \
122
+ --api_key $OPENAIKEY \
123
+
124
+
125
+
126
+ for IDX in $(seq 1 $CHUNKS); do
127
+ START=$(((IDX-1) * GPUS_PER_CHUNK))
128
+ LENGTH=$GPUS_PER_CHUNK # Length for slicing, not the end index
129
+
130
+ CHUNK_GPUS=(${GPULIST[@]:$START:$LENGTH})
131
+
132
+ # Convert the chunk GPUs array to a comma-separated string
133
+ CHUNK_GPUS_STR=$(IFS=,; echo "${CHUNK_GPUS[*]}")
134
+
135
+ # ALL_GPUS_FREE=0
136
+ # while [ $ALL_GPUS_FREE -eq 0 ]; do
137
+ # ALL_GPUS_FREE=1 # Assume all GPUs are free initially
138
+
139
+ # for GPU_ID in $CHUNK_GPUS; do
140
+ # MEM_USAGE=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i $GPU_ID | tr -d '[:space:]')
141
+
142
+ # # Assuming a GPU is considered free if its memory usage is less than 100 MiB
143
+ # if [ "$MEM_USAGE" -ge 100 ]; then
144
+ # ALL_GPUS_FREE=0
145
+ # echo "GPU $GPU_ID is in use. Memory used: ${MEM_USAGE}MiB."
146
+ # break # Exit the loop early as we found a GPU that is not free
147
+ # fi
148
+ # done
149
+
150
+ # if [ $ALL_GPUS_FREE -eq 0 ]; then
151
+ # echo "Not all GPUs in chunk are free. Checking again in 100 seconds..."
152
+ # sleep 100
153
+ # fi
154
+ # done
155
+
156
+ echo "CUDA_VISIBLE_DEVICES=$CHUNK_GPUS_STR"
157
+ CUDA_VISIBLE_DEVICES=$CHUNK_GPUS_STR python3 llava/eval/model_video_chatgpt_general.py \
158
+ --model-path $CKPT \
159
+ --video_dir ./data/llava_video/video-chatgpt/evaluation/Test_Videos/ \
160
+ --gt_file ./data/llava_video/video-chatgpt/evaluation/temporal_qa.json \
161
+ --output_dir ./work_dirs/eval_video_chatgpt/$SAVE_DIR \
162
+ --output_name pred_temporal \
163
+ --num-chunks $CHUNKS \
164
+ --chunk-idx $(($IDX - 1)) \
165
+ --for_get_frames_num $FRAMES \
166
+ --overwrite ${OVERWRITE} \
167
+ --mm_spatial_pool_stride ${POOL_STRIDE:-4} \
168
+ --conv-mode $CONV_MODE &
169
+
170
+ done
171
+
172
+ wait
173
+
174
+
175
+ python3 llava/eval/evaluate_benchmark_4_temporal.py \
176
+ --pred_path ./work_dirs/eval_video_chatgpt/$SAVE_DIR \
177
+ --output_dir ./work_dirs/eval_video_chatgpt/$SAVE_DIR/temporal_results \
178
+ --output_json ./work_dirs/eval_video_chatgpt/$SAVE_DIR/temporal_results.json \
179
+ --num_chunks $CHUNKS \
180
+ --output_name pred_temporal \
181
+ --num_tasks 16 \
182
+ --api_key $OPENAIKEY \
183
+
184
+
185
+
186
+ for IDX in $(seq 1 $CHUNKS); do
187
+ START=$(((IDX-1) * GPUS_PER_CHUNK))
188
+ LENGTH=$GPUS_PER_CHUNK # Length for slicing, not the end index
189
+
190
+ CHUNK_GPUS=(${GPULIST[@]:$START:$LENGTH})
191
+
192
+ # Convert the chunk GPUs array to a comma-separated string
193
+ CHUNK_GPUS_STR=$(IFS=,; echo "${CHUNK_GPUS[*]}")
194
+
195
+ # ALL_GPUS_FREE=0
196
+ # while [ $ALL_GPUS_FREE -eq 0 ]; do
197
+ # ALL_GPUS_FREE=1 # Assume all GPUs are free initially
198
+
199
+ # for GPU_ID in $CHUNK_GPUS; do
200
+ # MEM_USAGE=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i $GPU_ID | tr -d '[:space:]')
201
+
202
+ # # Assuming a GPU is considered free if its memory usage is less than 100 MiB
203
+ # if [ "$MEM_USAGE" -ge 100 ]; then
204
+ # ALL_GPUS_FREE=0
205
+ # echo "GPU $GPU_ID is in use. Memory used: ${MEM_USAGE}MiB."
206
+ # break # Exit the loop early as we found a GPU that is not free
207
+ # fi
208
+ # done
209
+
210
+ # if [ $ALL_GPUS_FREE -eq 0 ]; then
211
+ # echo "Not all GPUs in chunk are free. Checking again in 100 seconds..."
212
+ # sleep 100
213
+ # fi
214
+ # done
215
+
216
+ echo "CUDA_VISIBLE_DEVICES=$CHUNK_GPUS_STR"
217
+ CUDA_VISIBLE_DEVICES=$CHUNK_GPUS_STR python3 llava/eval/model_video_chatgpt_consistency.py \
218
+ --model-path $CKPT \
219
+ --video_dir ./data/llava_video/video-chatgpt/evaluation/Test_Videos/ \
220
+ --gt_file ./data/llava_video/video-chatgpt/evaluation/consistency_qa.json \
221
+ --output_dir ./work_dirs/eval_video_chatgpt/$SAVE_DIR \
222
+ --output_name pred_consistency \
223
+ --num-chunks $CHUNKS \
224
+ --chunk-idx $(($IDX - 1)) \
225
+ --mm_spatial_pool_stride ${POOL_STRIDE:-4} \
226
+ --for_get_frames_num $FRAMES \
227
+ --overwrite ${OVERWRITE} \
228
+ --conv-mode $CONV_MODE &
229
+ done
230
+
231
+ wait
232
+
233
+
234
+ python3 llava/eval/evaluate_benchmark_5_consistency.py \
235
+ --pred_path ./work_dirs/eval_video_chatgpt/$SAVE_DIR \
236
+ --output_dir ./work_dirs/eval_video_chatgpt/$SAVE_DIR/consistency_results \
237
+ --output_json ./work_dirs/eval_video_chatgpt/$SAVE_DIR/consistency_results.json \
238
+ --num_chunks $CHUNKS \
239
+ --output_name pred_consistency \
240
+ --num_tasks 16 \
241
+ --api_key $OPENAIKEY \
242
+
scripts/video/eval/video_description_from_t2v.sh ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ ROOT_DIR="/mnt/bn/vl-research/workspace/yhzhang/llava-next-video"
3
+
4
+ if [ ! -e $ROOT_DIR ]; then
5
+ echo "The root dir does not exist. Exiting the script."
6
+ exit 1
7
+ fi
8
+
9
+ cd $ROOT_DIR
10
+
11
+ export PYTHONWARNINGS=ignore
12
+ export TOKENIZERS_PARALLELISM=false
13
+
14
+ CKPT=$1
15
+ CONV_MODE=$2
16
+ FRAMES=$3
17
+ POOL_STRIDE=$4
18
+ OVERWRITE=$5
19
+ CHUNKS=${6:-1}
20
+ DO_CENTER_CROP=${7:-False}
21
+
22
+ echo "Using $CHUNKS GPUs"
23
+
24
+ LOAD_8BIT=False
25
+
26
+
27
+ if [ "$OVERWRITE" = False ]; then
28
+ if [ "$MODEL_MAX_LENGTH" = 0 ]; then
29
+ SAVE_DIR=$(basename $CKPT)_${CONV_MODE}_frames_${FRAMES}_overwrite_${OVERWRITE}
30
+ else
31
+ SAVE_DIR=$(basename $CKPT)_${CONV_MODE}_frames_${FRAMES}_overwrite_${OVERWRITE}
32
+ fi
33
+ else
34
+ SAVE_DIR=$(basename $CKPT)_${CONV_MODE}_frames_${FRAMES}_stride_${POOL_STRIDE}
35
+ fi
36
+
37
+ SAVE_DIR=${SAVE_DIR}_do_center_crop_${DO_CENTER_CROP}
38
+ # Assuming GPULIST is a bash array containing your GPUs
39
+ GPULIST=(0 1 2 3 4 5 6 7)
40
+ # GPULIST=(0)
41
+
42
+ # Get the number of GPUs
43
+ NUM_GPUS=${#GPULIST[@]}
44
+
45
+ # Calculate GPUs per chunk
46
+ GPUS_PER_CHUNK=$((NUM_GPUS / CHUNKS))
47
+
48
+
49
+ for IDX in $(seq 1 $CHUNKS); do
50
+ START=$(((IDX-1) * GPUS_PER_CHUNK))
51
+ LENGTH=$GPUS_PER_CHUNK # Length for slicing, not the end index
52
+
53
+ CHUNK_GPUS=(${GPULIST[@]:$START:$LENGTH})
54
+
55
+ # Convert the chunk GPUs array to a comma-separated string
56
+ CHUNK_GPUS_STR=$(IFS=,; echo "${CHUNK_GPUS[*]}")
57
+
58
+ # ALL_GPUS_FREE=0
59
+ # while [ $ALL_GPUS_FREE -eq 0 ]; do
60
+ # ALL_GPUS_FREE=1 # Assume all GPUs are free initially
61
+
62
+ # for GPU_ID in $CHUNK_GPUS; do
63
+ # MEM_USAGE=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i $GPU_ID | tr -d '[:space:]')
64
+
65
+ # # Assuming a GPU is considered free if its memory usage is less than 100 MiB
66
+ # if [ "$MEM_USAGE" -ge 100 ]; then
67
+ # ALL_GPUS_FREE=0
68
+ # echo "GPU $GPU_ID is in use. Memory used: ${MEM_USAGE}MiB."
69
+ # break # Exit the loop early as we found a GPU that is not free
70
+ # fi
71
+ # done
72
+
73
+ # if [ $ALL_GPUS_FREE -eq 0 ]; then
74
+ # echo "Not all GPUs in chunk are free. Checking again in 100 seconds..."
75
+ # sleep 100
76
+ # fi
77
+ # done
78
+
79
+ echo "CUDA_VISIBLE_DEVICES=$CHUNK_GPUS_STR"
80
+ CUDA_VISIBLE_DEVICES=$CHUNK_GPUS_STR python3 llava/eval/model_video_description_from_t2v.py \
81
+ --model-path $CKPT \
82
+ --gt_file /mnt/bn/vl-research-1t/tuyen/webvid_hdvg_movie_pond5_for_captioning_evaluation/webvid_hdvg_movie_pond5_for_captioning_evaluation.processed.csv \
83
+ --output_dir ./work_dirs/eval_video_description_from_t2v/$SAVE_DIR \
84
+ --output_name pred \
85
+ --num-chunks $CHUNKS \
86
+ --chunk-idx $(($IDX - 1)) \
87
+ --overwrite ${OVERWRITE} \
88
+ --mm_spatial_pool_stride ${POOL_STRIDE:-4} \
89
+ --for_get_frames_num $FRAMES \
90
+ --load_8bit $LOAD_8BIT \
91
+ --do_center_crop $DO_CENTER_CROP \
92
+ --conv-mode $CONV_MODE &
93
+ done
94
+
95
+ wait
96
+
97
+ cat ${ROOT_DIR}/work_dirs/eval_video_description_from_t2v/$SAVE_DIR/${CHUNKS}* > ${ROOT_DIR}/work_dirs/eval_video_description_from_t2v/$SAVE_DIR/pred.json
98
+
scripts/video/eval/video_detail_description_eval_only.sh ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ ROOT_DIR="root to LLaVA-NeXT-Video"
3
+
4
+ if [ ! -e $ROOT_DIR ]; then
5
+ echo "The root dir does not exist. Exiting the script."
6
+ exit 1
7
+ fi
8
+
9
+ cd $ROOT_DIR
10
+
11
+ export PYTHONWARNINGS=ignore
12
+ export TOKENIZERS_PARALLELISM=false
13
+
14
+ OPENAIKEY="INPUT YOUR OPENAI API"
15
+
16
+ SAVE_DIR=$1
17
+
18
+ python3 llava/eval/evaluate_benchmark_video_detail_description.py \
19
+ --pred_path ./work_dirs/eval_video_detail_description/$SAVE_DIR/pred.json \
20
+ --output_dir ./work_dirs/eval_video_detail_description/$SAVE_DIR/detail_results \
21
+ --output_json ./work_dirs/eval_video_detail_description/$SAVE_DIR/detail_results.json \
22
+ --num_chunks 1 \
23
+ --num_tasks 16 \
24
+ --api_key $OPENAIKEY \
scripts/video/eval/video_detail_description_eval_shard.sh ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ ROOT_DIR="/mnt/bn/vl-research/workspace/yhzhang/llava-next-video"
3
+
4
+ if [ ! -e $ROOT_DIR ]; then
5
+ echo "The root dir does not exist. Exiting the script."
6
+ exit 1
7
+ fi
8
+
9
+ cd $ROOT_DIR
10
+
11
+ export PYTHONWARNINGS=ignore
12
+ export TOKENIZERS_PARALLELISM=false
13
+
14
+ OPENAIKEY="INPUT YOUR OPENAI API"
15
+
16
+ CKPT=$1
17
+ CONV_MODE=$2
18
+ FRAMES=$3
19
+ POOL_STRIDE=$4
20
+ OVERWRITE=$5
21
+ CHUNKS=${6:-1}
22
+
23
+ echo "Using $CHUNKS GPUs"
24
+
25
+ if [ "$OVERWRITE" = False ]; then
26
+ SAVE_DIR=$(basename $CKPT)_${CONV_MODE}_frames_${FRAMES}_stride_${POOL_STRIDE}_overwrite_${OVERWRITE}
27
+
28
+ else
29
+ SAVE_DIR=$(basename $CKPT)_${CONV_MODE}_frames_${FRAMES}_stride_${POOL_STRIDE}
30
+ fi
31
+
32
+ # Assuming GPULIST is a bash array containing your GPUs
33
+ GPULIST=(0 1 2 3 4 5 6 7)
34
+
35
+ # Get the number of GPUs
36
+ NUM_GPUS=${#GPULIST[@]}
37
+
38
+ # Calculate GPUs per chunk
39
+ GPUS_PER_CHUNK=$((NUM_GPUS / CHUNKS))
40
+
41
+
42
+ for IDX in $(seq 1 $CHUNKS); do
43
+ START=$(((IDX-1) * GPUS_PER_CHUNK))
44
+ LENGTH=$GPUS_PER_CHUNK # Length for slicing, not the end index
45
+
46
+ CHUNK_GPUS=(${GPULIST[@]:$START:$LENGTH})
47
+
48
+ # Convert the chunk GPUs array to a comma-separated string
49
+ CHUNK_GPUS_STR=$(IFS=,; echo "${CHUNK_GPUS[*]}")
50
+
51
+ # ALL_GPUS_FREE=0
52
+ # while [ $ALL_GPUS_FREE -eq 0 ]; do
53
+ # ALL_GPUS_FREE=1 # Assume all GPUs are free initially
54
+
55
+ # for GPU_ID in $CHUNK_GPUS; do
56
+ # MEM_USAGE=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i $GPU_ID | tr -d '[:space:]')
57
+
58
+ # # Assuming a GPU is considered free if its memory usage is less than 100 MiB
59
+ # if [ "$MEM_USAGE" -ge 100 ]; then
60
+ # ALL_GPUS_FREE=0
61
+ # echo "GPU $GPU_ID is in use. Memory used: ${MEM_USAGE}MiB."
62
+ # break # Exit the loop early as we found a GPU that is not free
63
+ # fi
64
+ # done
65
+
66
+ # if [ $ALL_GPUS_FREE -eq 0 ]; then
67
+ # echo "Not all GPUs in chunk are free. Checking again in 100 seconds..."
68
+ # sleep 100
69
+ # fi
70
+ # done
71
+
72
+ echo "CUDA_VISIBLE_DEVICES=$CHUNK_GPUS_STR"
73
+ CUDA_VISIBLE_DEVICES=$CHUNK_GPUS_STR python3 llava/eval/model_video_detail_description.py \
74
+ --model-path $CKPT \
75
+ --video_dir ./data/llava_video/video-chatgpt/evaluation/Test_Videos/ \
76
+ --output_dir ./work_dirs/eval_video_detail_description/$SAVE_DIR \
77
+ --output_name pred \
78
+ --num-chunks $CHUNKS \
79
+ --chunk-idx $(($IDX - 1)) \
80
+ --overwrite ${OVERWRITE} \
81
+ --mm_spatial_pool_stride ${POOL_STRIDE:-4} \
82
+ --for_get_frames_num $FRAMES \
83
+ --conv-mode $CONV_MODE &
84
+ done
85
+
86
+ wait
87
+
88
+ python3 llava/eval/evaluate_benchmark_video_detail_description.py \
89
+ --pred_path ./work_dirs/eval_video_detail_description/$SAVE_DIR \
90
+ --output_dir ./work_dirs/eval_video_detail_description/$SAVE_DIR/detail_results \
91
+ --output_json ./work_dirs/eval_video_detail_description/$SAVE_DIR/detail_results.json \
92
+ --num_chunks $CHUNKS \
93
+ --num_tasks 16 \
94
+ --api_key $OPENAIKEY \
95
+