English
mjjung commited on
Commit
ecb36d0
·
1 Parent(s): e8c69d3
Files changed (2) hide show
  1. VideoLLaMA-7B-ActivityNet-VTune.pth +3 -0
  2. log.txt +105 -0
VideoLLaMA-7B-ActivityNet-VTune.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ff8913891b40a17e7368bf28cc949972bdb30d696faedce625e7d8ff07e8524
3
+ size 265436196
log.txt ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "run": {
3
+ "task": "video_text_pretrain",
4
+ "lr_sched": "linear_warmup_cosine_lr",
5
+ "init_lr": 3e-05,
6
+ "min_lr": 1e-05,
7
+ "warmup_lr": 1e-06,
8
+ "weight_decay": 0.05,
9
+ "max_epoch": 3,
10
+ "iters_per_epoch": 51377,
11
+ "batch_size_train": 1,
12
+ "batch_size_eval": 4,
13
+ "num_workers": 4,
14
+ "warmup_steps": 25688,
15
+ "seed": 42,
16
+ "output_dir": "/data/mjjung/Video-LLaMA/videollama_activitynet_vtune",
17
+ "amp": true,
18
+ "resume_ckpt_path": null,
19
+ "evaluate": false,
20
+ "train_splits": [
21
+ "train"
22
+ ],
23
+ "device": "cuda",
24
+ "world_size": 4,
25
+ "dist_url": "env://",
26
+ "distributed": true,
27
+ "rank": 0,
28
+ "gpu": 0,
29
+ "dist_backend": "nccl"
30
+ },
31
+ "model": {
32
+ "arch": "video_llama",
33
+ "image_size": 224,
34
+ "drop_path_rate": 0,
35
+ "use_grad_checkpoint": false,
36
+ "vit_precision": "fp16",
37
+ "freeze_vit": true,
38
+ "freeze_qformer": true,
39
+ "num_query_token": 32,
40
+ "llama_model": "/data/mjjung/vicuna-7b-v1.5",
41
+ "prompt": "",
42
+ "model_type": "pretrain_vicuna",
43
+ "imagebind_ckpt_path": "/data/mjjung/Video-LLaMA/Video-LLaMA-2-7B-Finetuned/",
44
+ "ckpt": "/data/mjjung/Video-LLaMA/Video-LLaMA-2-7B-Finetuned/VL_LLaMA_2_7B_Finetuned.pth",
45
+ "equip_audio_branch": false,
46
+ "frozen_llama_proj": false,
47
+ "frozen_video_Qformer": false,
48
+ "frozen_audio_Qformer": true,
49
+ "fusion_head_layers": 2,
50
+ "max_frame_pos": 32,
51
+ "fusion_header_type": "seqTransf",
52
+ "max_txt_len": 320,
53
+ "end_sym": "###",
54
+ "prompt_path": "",
55
+ "prompt_template": "###Human: {} ###Assistant: "
56
+ },
57
+ "preprocess": {
58
+ "vis_processor": {
59
+ "train": {
60
+ "name": "alpro_video_train",
61
+ "image_size": 224,
62
+ "n_frms": 8
63
+ },
64
+ "eval": {
65
+ "name": "alpro_video_eval",
66
+ "image_size": 224,
67
+ "n_frms": 8
68
+ }
69
+ },
70
+ "text_processor": {
71
+ "train": {
72
+ "name": "blip_caption"
73
+ },
74
+ "eval": {
75
+ "name": "blip_caption"
76
+ }
77
+ }
78
+ },
79
+ "datasets": {
80
+ "webvid_instruct": {
81
+ "data_type": "video",
82
+ "build_info": {
83
+ "anno_dir": "data/activitynet_filtered_qa_grounding.json",
84
+ "videos_dir": "/data/video_datasets/"
85
+ },
86
+ "vis_processor": {
87
+ "train": {
88
+ "name": "alpro_video_train",
89
+ "n_frms": 8,
90
+ "image_size": 224
91
+ }
92
+ },
93
+ "text_processor": {
94
+ "train": {
95
+ "name": "blip_caption"
96
+ }
97
+ },
98
+ "num_video_query_token": 32,
99
+ "tokenizer_name": "/data/mjjung/vicuna-7b-v1.5",
100
+ "model_type": "vicuna"
101
+ }
102
+ }
103
+ }
104
+ {"train_lr": "0.000", "train_loss": "0.230"}
105
+ {"train_lr": "0.000", "train_loss": "0.204"}