ylwt commited on
Commit
9e6d799
·
1 Parent(s): 28d0ea7

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +6 -0
  2. llama_factory/v2-20241220-pos/README.md +62 -0
  3. llama_factory/v2-20241220-pos/adapter_config.json +26 -0
  4. llama_factory/v2-20241220-pos/added_tokens.json +16 -0
  5. llama_factory/v2-20241220-pos/all_results.json +8 -0
  6. llama_factory/v2-20241220-pos/chat_template.json +3 -0
  7. llama_factory/v2-20241220-pos/merges.txt +0 -0
  8. llama_factory/v2-20241220-pos/preprocessor_config.json +29 -0
  9. llama_factory/v2-20241220-pos/special_tokens_map.json +31 -0
  10. llama_factory/v2-20241220-pos/tokenizer_config.json +144 -0
  11. llama_factory/v2-20241220-pos/train_results.json +8 -0
  12. llama_factory/v2-20241220-pos/trainer_log.jsonl +13 -0
  13. llama_factory/v2-20241220-pos/trainer_state.json +126 -0
  14. llama_factory/v2-20241220-pos/training_loss.png +0 -0
  15. llama_factory/v2-20241220-pos/vocab.json +0 -0
  16. llama_factory/v2-20241220/README.md +62 -0
  17. llama_factory/v2-20241220/adapter_config.json +26 -0
  18. llama_factory/v2-20241220/added_tokens.json +16 -0
  19. llama_factory/v2-20241220/all_results.json +8 -0
  20. llama_factory/v2-20241220/chat_template.json +3 -0
  21. llama_factory/v2-20241220/merges.txt +0 -0
  22. llama_factory/v2-20241220/preprocessor_config.json +29 -0
  23. llama_factory/v2-20241220/special_tokens_map.json +31 -0
  24. llama_factory/v2-20241220/tokenizer_config.json +144 -0
  25. llama_factory/v2-20241220/train_results.json +8 -0
  26. llama_factory/v2-20241220/trainer_log.jsonl +19 -0
  27. llama_factory/v2-20241220/trainer_state.json +168 -0
  28. llama_factory/v2-20241220/training_loss.png +0 -0
  29. llama_factory/v2-20241220/vocab.json +0 -0
  30. llama_factory/v3-20241225-1_10/README.md +62 -0
  31. llama_factory/v3-20241225-1_10/adapter_config.json +26 -0
  32. llama_factory/v3-20241225-1_10/added_tokens.json +16 -0
  33. llama_factory/v3-20241225-1_10/all_results.json +8 -0
  34. llama_factory/v3-20241225-1_10/chat_template.json +3 -0
  35. llama_factory/v3-20241225-1_10/merges.txt +0 -0
  36. llama_factory/v3-20241225-1_10/preprocessor_config.json +29 -0
  37. llama_factory/v3-20241225-1_10/special_tokens_map.json +31 -0
  38. llama_factory/v3-20241225-1_10/tokenizer_config.json +144 -0
  39. llama_factory/v3-20241225-1_10/train_results.json +8 -0
  40. llama_factory/v3-20241225-1_10/trainer_log.jsonl +82 -0
  41. llama_factory/v3-20241225-1_10/trainer_state.json +609 -0
  42. llama_factory/v3-20241225-1_10/training_loss.png +0 -0
  43. llama_factory/v3-20241225-1_10/vocab.json +0 -0
  44. llama_factory/v3-20241225-1_5/README.md +62 -0
  45. llama_factory/v3-20241225-1_5/adapter_config.json +26 -0
  46. llama_factory/v3-20241225-1_5/added_tokens.json +16 -0
  47. llama_factory/v3-20241225-1_5/all_results.json +8 -0
  48. llama_factory/v3-20241225-1_5/chat_template.json +3 -0
  49. llama_factory/v3-20241225-1_5/merges.txt +0 -0
  50. llama_factory/v3-20241225-1_5/preprocessor_config.json +29 -0
.gitattributes CHANGED
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ llama_factory/v2-20241220/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ llama_factory/v2-20241220-pos/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ llama_factory/v3-20241225/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
+ llama_factory/v3-20241225-1_10/tokenizer.json filter=lfs diff=lfs merge=lfs -text
40
+ llama_factory/v3-20241225-1_5/tokenizer.json filter=lfs diff=lfs merge=lfs -text
41
+ llama_factory/v3-20241225-pos/tokenizer.json filter=lfs diff=lfs merge=lfs -text
llama_factory/v2-20241220-pos/README.md ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ license: other
4
+ base_model: /disk/maziao/model-zoo/hf-repo/models--Qwen--Qwen2-VL-7B-Instruct
5
+ tags:
6
+ - llama-factory
7
+ - lora
8
+ - generated_from_trainer
9
+ model-index:
10
+ - name: v2-20241220-pos
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # v2-20241220-pos
18
+
19
+ This model is a fine-tuned version of [/disk/maziao/model-zoo/hf-repo/models--Qwen--Qwen2-VL-7B-Instruct](https://huggingface.co//disk/maziao/model-zoo/hf-repo/models--Qwen--Qwen2-VL-7B-Instruct) on the M^2RAR-20241220-MLLM-Pos dataset.
20
+
21
+ ## Model description
22
+
23
+ More information needed
24
+
25
+ ## Intended uses & limitations
26
+
27
+ More information needed
28
+
29
+ ## Training and evaluation data
30
+
31
+ More information needed
32
+
33
+ ## Training procedure
34
+
35
+ ### Training hyperparameters
36
+
37
+ The following hyperparameters were used during training:
38
+ - learning_rate: 0.0001
39
+ - train_batch_size: 2
40
+ - eval_batch_size: 8
41
+ - seed: 42
42
+ - distributed_type: multi-GPU
43
+ - num_devices: 2
44
+ - gradient_accumulation_steps: 16
45
+ - total_train_batch_size: 64
46
+ - total_eval_batch_size: 16
47
+ - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
48
+ - lr_scheduler_type: cosine
49
+ - lr_scheduler_warmup_ratio: 0.1
50
+ - num_epochs: 3.0
51
+
52
+ ### Training results
53
+
54
+
55
+
56
+ ### Framework versions
57
+
58
+ - PEFT 0.12.0
59
+ - Transformers 4.46.1
60
+ - Pytorch 2.3.1+cu121
61
+ - Datasets 3.1.0
62
+ - Tokenizers 0.20.3
llama_factory/v2-20241220-pos/adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/disk/maziao/model-zoo/hf-repo/models--Qwen--Qwen2-VL-7B-Instruct",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 256,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 128,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": "^(?!.*visual).*(?:q_proj|down_proj|v_proj|o_proj|k_proj|up_proj|gate_proj).*",
23
+ "task_type": "CAUSAL_LM",
24
+ "use_dora": false,
25
+ "use_rslora": false
26
+ }
llama_factory/v2-20241220-pos/added_tokens.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|box_end|>": 151649,
3
+ "<|box_start|>": 151648,
4
+ "<|endoftext|>": 151643,
5
+ "<|im_end|>": 151645,
6
+ "<|im_start|>": 151644,
7
+ "<|image_pad|>": 151655,
8
+ "<|object_ref_end|>": 151647,
9
+ "<|object_ref_start|>": 151646,
10
+ "<|quad_end|>": 151651,
11
+ "<|quad_start|>": 151650,
12
+ "<|video_pad|>": 151656,
13
+ "<|vision_end|>": 151653,
14
+ "<|vision_pad|>": 151654,
15
+ "<|vision_start|>": 151652
16
+ }
llama_factory/v2-20241220-pos/all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.56,
3
+ "total_flos": 4.173594118670254e+17,
4
+ "train_loss": 1.6055979132652283,
5
+ "train_runtime": 1821.5091,
6
+ "train_samples_per_second": 0.494,
7
+ "train_steps_per_second": 0.007
8
+ }
llama_factory/v2-20241220-pos/chat_template.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
3
+ }
llama_factory/v2-20241220-pos/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
llama_factory/v2-20241220-pos/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": true,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.48145466,
8
+ 0.4578275,
9
+ 0.40821073
10
+ ],
11
+ "image_processor_type": "Qwen2VLImageProcessor",
12
+ "image_std": [
13
+ 0.26862954,
14
+ 0.26130258,
15
+ 0.27577711
16
+ ],
17
+ "max_pixels": 12845056,
18
+ "merge_size": 2,
19
+ "min_pixels": 3136,
20
+ "patch_size": 14,
21
+ "processor_class": "Qwen2VLProcessor",
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "max_pixels": 12845056,
26
+ "min_pixels": 3136
27
+ },
28
+ "temporal_patch_size": 2
29
+ }
llama_factory/v2-20241220-pos/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
llama_factory/v2-20241220-pos/tokenizer_config.json ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "151646": {
29
+ "content": "<|object_ref_start|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "151647": {
37
+ "content": "<|object_ref_end|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "151648": {
45
+ "content": "<|box_start|>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "151649": {
53
+ "content": "<|box_end|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "151650": {
61
+ "content": "<|quad_start|>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "151651": {
69
+ "content": "<|quad_end|>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "151652": {
77
+ "content": "<|vision_start|>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "151653": {
85
+ "content": "<|vision_end|>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "151654": {
93
+ "content": "<|vision_pad|>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "151655": {
101
+ "content": "<|image_pad|>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "151656": {
109
+ "content": "<|video_pad|>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ }
116
+ },
117
+ "additional_special_tokens": [
118
+ "<|im_start|>",
119
+ "<|im_end|>",
120
+ "<|object_ref_start|>",
121
+ "<|object_ref_end|>",
122
+ "<|box_start|>",
123
+ "<|box_end|>",
124
+ "<|quad_start|>",
125
+ "<|quad_end|>",
126
+ "<|vision_start|>",
127
+ "<|vision_end|>",
128
+ "<|vision_pad|>",
129
+ "<|image_pad|>",
130
+ "<|video_pad|>"
131
+ ],
132
+ "bos_token": null,
133
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
134
+ "clean_up_tokenization_spaces": false,
135
+ "eos_token": "<|im_end|>",
136
+ "errors": "replace",
137
+ "model_max_length": 32768,
138
+ "pad_token": "<|endoftext|>",
139
+ "padding_side": "right",
140
+ "processor_class": "Qwen2VLProcessor",
141
+ "split_special_tokens": false,
142
+ "tokenizer_class": "Qwen2Tokenizer",
143
+ "unk_token": null
144
+ }
llama_factory/v2-20241220-pos/train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.56,
3
+ "total_flos": 4.173594118670254e+17,
4
+ "train_loss": 1.6055979132652283,
5
+ "train_runtime": 1821.5091,
6
+ "train_samples_per_second": 0.494,
7
+ "train_steps_per_second": 0.007
8
+ }
llama_factory/v2-20241220-pos/trainer_log.jsonl ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"current_steps": 1, "total_steps": 12, "loss": 1.7861, "lr": 5e-05, "epoch": 0.21333333333333335, "percentage": 8.33, "elapsed_time": "0:02:26", "remaining_time": "0:26:52"}
2
+ {"current_steps": 2, "total_steps": 12, "loss": 1.7271, "lr": 0.0001, "epoch": 0.4266666666666667, "percentage": 16.67, "elapsed_time": "0:05:12", "remaining_time": "0:26:04"}
3
+ {"current_steps": 3, "total_steps": 12, "loss": 1.6749, "lr": 9.755282581475769e-05, "epoch": 0.64, "percentage": 25.0, "elapsed_time": "0:07:38", "remaining_time": "0:22:55"}
4
+ {"current_steps": 4, "total_steps": 12, "loss": 1.6561, "lr": 9.045084971874738e-05, "epoch": 0.8533333333333334, "percentage": 33.33, "elapsed_time": "0:10:17", "remaining_time": "0:20:35"}
5
+ {"current_steps": 5, "total_steps": 12, "loss": 1.6393, "lr": 7.938926261462366e-05, "epoch": 1.0666666666666667, "percentage": 41.67, "elapsed_time": "0:12:23", "remaining_time": "0:17:20"}
6
+ {"current_steps": 6, "total_steps": 12, "loss": 1.5852, "lr": 6.545084971874738e-05, "epoch": 1.28, "percentage": 50.0, "elapsed_time": "0:14:45", "remaining_time": "0:14:45"}
7
+ {"current_steps": 7, "total_steps": 12, "loss": 1.5696, "lr": 5e-05, "epoch": 1.4933333333333334, "percentage": 58.33, "elapsed_time": "0:17:15", "remaining_time": "0:12:19"}
8
+ {"current_steps": 8, "total_steps": 12, "loss": 1.5286, "lr": 3.4549150281252636e-05, "epoch": 1.7066666666666666, "percentage": 66.67, "elapsed_time": "0:19:31", "remaining_time": "0:09:45"}
9
+ {"current_steps": 9, "total_steps": 12, "loss": 1.5246, "lr": 2.061073738537635e-05, "epoch": 1.92, "percentage": 75.0, "elapsed_time": "0:21:51", "remaining_time": "0:07:17"}
10
+ {"current_steps": 10, "total_steps": 12, "loss": 1.5174, "lr": 9.549150281252633e-06, "epoch": 2.1333333333333333, "percentage": 83.33, "elapsed_time": "0:24:56", "remaining_time": "0:04:59"}
11
+ {"current_steps": 11, "total_steps": 12, "loss": 1.525, "lr": 2.4471741852423237e-06, "epoch": 2.3466666666666667, "percentage": 91.67, "elapsed_time": "0:26:57", "remaining_time": "0:02:27"}
12
+ {"current_steps": 12, "total_steps": 12, "loss": 1.5334, "lr": 0.0, "epoch": 2.56, "percentage": 100.0, "elapsed_time": "0:29:49", "remaining_time": "0:00:00"}
13
+ {"current_steps": 12, "total_steps": 12, "epoch": 2.56, "percentage": 100.0, "elapsed_time": "0:30:21", "remaining_time": "0:00:00"}
llama_factory/v2-20241220-pos/trainer_state.json ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.56,
5
+ "eval_steps": 500,
6
+ "global_step": 12,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.21333333333333335,
13
+ "grad_norm": 0.5428073406219482,
14
+ "learning_rate": 5e-05,
15
+ "loss": 1.7861,
16
+ "step": 1
17
+ },
18
+ {
19
+ "epoch": 0.4266666666666667,
20
+ "grad_norm": 0.5242983102798462,
21
+ "learning_rate": 0.0001,
22
+ "loss": 1.7271,
23
+ "step": 2
24
+ },
25
+ {
26
+ "epoch": 0.64,
27
+ "grad_norm": 0.36863189935684204,
28
+ "learning_rate": 9.755282581475769e-05,
29
+ "loss": 1.6749,
30
+ "step": 3
31
+ },
32
+ {
33
+ "epoch": 0.8533333333333334,
34
+ "grad_norm": 0.4955863058567047,
35
+ "learning_rate": 9.045084971874738e-05,
36
+ "loss": 1.6561,
37
+ "step": 4
38
+ },
39
+ {
40
+ "epoch": 1.0666666666666667,
41
+ "grad_norm": 0.3081843852996826,
42
+ "learning_rate": 7.938926261462366e-05,
43
+ "loss": 1.6393,
44
+ "step": 5
45
+ },
46
+ {
47
+ "epoch": 1.28,
48
+ "grad_norm": 0.23544219136238098,
49
+ "learning_rate": 6.545084971874738e-05,
50
+ "loss": 1.5852,
51
+ "step": 6
52
+ },
53
+ {
54
+ "epoch": 1.4933333333333334,
55
+ "grad_norm": 0.22464187443256378,
56
+ "learning_rate": 5e-05,
57
+ "loss": 1.5696,
58
+ "step": 7
59
+ },
60
+ {
61
+ "epoch": 1.7066666666666666,
62
+ "grad_norm": 0.188608318567276,
63
+ "learning_rate": 3.4549150281252636e-05,
64
+ "loss": 1.5286,
65
+ "step": 8
66
+ },
67
+ {
68
+ "epoch": 1.92,
69
+ "grad_norm": 0.1828916221857071,
70
+ "learning_rate": 2.061073738537635e-05,
71
+ "loss": 1.5246,
72
+ "step": 9
73
+ },
74
+ {
75
+ "epoch": 2.1333333333333333,
76
+ "grad_norm": 0.18487173318862915,
77
+ "learning_rate": 9.549150281252633e-06,
78
+ "loss": 1.5174,
79
+ "step": 10
80
+ },
81
+ {
82
+ "epoch": 2.3466666666666667,
83
+ "grad_norm": 0.17777486145496368,
84
+ "learning_rate": 2.4471741852423237e-06,
85
+ "loss": 1.525,
86
+ "step": 11
87
+ },
88
+ {
89
+ "epoch": 2.56,
90
+ "grad_norm": 0.1764741837978363,
91
+ "learning_rate": 0.0,
92
+ "loss": 1.5334,
93
+ "step": 12
94
+ },
95
+ {
96
+ "epoch": 2.56,
97
+ "step": 12,
98
+ "total_flos": 4.173594118670254e+17,
99
+ "train_loss": 1.6055979132652283,
100
+ "train_runtime": 1821.5091,
101
+ "train_samples_per_second": 0.494,
102
+ "train_steps_per_second": 0.007
103
+ }
104
+ ],
105
+ "logging_steps": 1,
106
+ "max_steps": 12,
107
+ "num_input_tokens_seen": 0,
108
+ "num_train_epochs": 3,
109
+ "save_steps": 200,
110
+ "stateful_callbacks": {
111
+ "TrainerControl": {
112
+ "args": {
113
+ "should_epoch_stop": false,
114
+ "should_evaluate": false,
115
+ "should_log": false,
116
+ "should_save": true,
117
+ "should_training_stop": true
118
+ },
119
+ "attributes": {}
120
+ }
121
+ },
122
+ "total_flos": 4.173594118670254e+17,
123
+ "train_batch_size": 2,
124
+ "trial_name": null,
125
+ "trial_params": null
126
+ }
llama_factory/v2-20241220-pos/training_loss.png ADDED
llama_factory/v2-20241220-pos/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
llama_factory/v2-20241220/README.md ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ license: other
4
+ base_model: /disk/maziao/model-zoo/hf-repo/models--Qwen--Qwen2-VL-7B-Instruct
5
+ tags:
6
+ - llama-factory
7
+ - lora
8
+ - generated_from_trainer
9
+ model-index:
10
+ - name: llama_factory
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # llama_factory
18
+
19
+ This model is a fine-tuned version of [/disk/maziao/model-zoo/hf-repo/models--Qwen--Qwen2-VL-7B-Instruct](https://huggingface.co//disk/maziao/model-zoo/hf-repo/models--Qwen--Qwen2-VL-7B-Instruct) on the M^2RAR-20241220-MLLM dataset.
20
+
21
+ ## Model description
22
+
23
+ More information needed
24
+
25
+ ## Intended uses & limitations
26
+
27
+ More information needed
28
+
29
+ ## Training and evaluation data
30
+
31
+ More information needed
32
+
33
+ ## Training procedure
34
+
35
+ ### Training hyperparameters
36
+
37
+ The following hyperparameters were used during training:
38
+ - learning_rate: 0.0001
39
+ - train_batch_size: 2
40
+ - eval_batch_size: 8
41
+ - seed: 42
42
+ - distributed_type: multi-GPU
43
+ - num_devices: 2
44
+ - gradient_accumulation_steps: 16
45
+ - total_train_batch_size: 64
46
+ - total_eval_batch_size: 16
47
+ - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
48
+ - lr_scheduler_type: cosine
49
+ - lr_scheduler_warmup_ratio: 0.1
50
+ - num_epochs: 3.0
51
+
52
+ ### Training results
53
+
54
+
55
+
56
+ ### Framework versions
57
+
58
+ - PEFT 0.12.0
59
+ - Transformers 4.46.1
60
+ - Pytorch 2.3.1+cu121
61
+ - Datasets 3.1.0
62
+ - Tokenizers 0.20.3
llama_factory/v2-20241220/adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/disk/maziao/model-zoo/hf-repo/models--Qwen--Qwen2-VL-7B-Instruct",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 256,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 128,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": "^(?!.*visual).*(?:up_proj|q_proj|o_proj|gate_proj|v_proj|down_proj|k_proj).*",
23
+ "task_type": "CAUSAL_LM",
24
+ "use_dora": false,
25
+ "use_rslora": false
26
+ }
llama_factory/v2-20241220/added_tokens.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|box_end|>": 151649,
3
+ "<|box_start|>": 151648,
4
+ "<|endoftext|>": 151643,
5
+ "<|im_end|>": 151645,
6
+ "<|im_start|>": 151644,
7
+ "<|image_pad|>": 151655,
8
+ "<|object_ref_end|>": 151647,
9
+ "<|object_ref_start|>": 151646,
10
+ "<|quad_end|>": 151651,
11
+ "<|quad_start|>": 151650,
12
+ "<|video_pad|>": 151656,
13
+ "<|vision_end|>": 151653,
14
+ "<|vision_pad|>": 151654,
15
+ "<|vision_start|>": 151652
16
+ }
llama_factory/v2-20241220/all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.88,
3
+ "total_flos": 5.97913280723288e+17,
4
+ "train_loss": 1.5128402180141873,
5
+ "train_runtime": 2593.9911,
6
+ "train_samples_per_second": 0.463,
7
+ "train_steps_per_second": 0.007
8
+ }
llama_factory/v2-20241220/chat_template.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
3
+ }
llama_factory/v2-20241220/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
llama_factory/v2-20241220/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": true,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.48145466,
8
+ 0.4578275,
9
+ 0.40821073
10
+ ],
11
+ "image_processor_type": "Qwen2VLImageProcessor",
12
+ "image_std": [
13
+ 0.26862954,
14
+ 0.26130258,
15
+ 0.27577711
16
+ ],
17
+ "max_pixels": 12845056,
18
+ "merge_size": 2,
19
+ "min_pixels": 3136,
20
+ "patch_size": 14,
21
+ "processor_class": "Qwen2VLProcessor",
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "max_pixels": 12845056,
26
+ "min_pixels": 3136
27
+ },
28
+ "temporal_patch_size": 2
29
+ }
llama_factory/v2-20241220/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
llama_factory/v2-20241220/tokenizer_config.json ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "151646": {
29
+ "content": "<|object_ref_start|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "151647": {
37
+ "content": "<|object_ref_end|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "151648": {
45
+ "content": "<|box_start|>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "151649": {
53
+ "content": "<|box_end|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "151650": {
61
+ "content": "<|quad_start|>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "151651": {
69
+ "content": "<|quad_end|>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "151652": {
77
+ "content": "<|vision_start|>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "151653": {
85
+ "content": "<|vision_end|>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "151654": {
93
+ "content": "<|vision_pad|>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "151655": {
101
+ "content": "<|image_pad|>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "151656": {
109
+ "content": "<|video_pad|>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ }
116
+ },
117
+ "additional_special_tokens": [
118
+ "<|im_start|>",
119
+ "<|im_end|>",
120
+ "<|object_ref_start|>",
121
+ "<|object_ref_end|>",
122
+ "<|box_start|>",
123
+ "<|box_end|>",
124
+ "<|quad_start|>",
125
+ "<|quad_end|>",
126
+ "<|vision_start|>",
127
+ "<|vision_end|>",
128
+ "<|vision_pad|>",
129
+ "<|image_pad|>",
130
+ "<|video_pad|>"
131
+ ],
132
+ "bos_token": null,
133
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
134
+ "clean_up_tokenization_spaces": false,
135
+ "eos_token": "<|im_end|>",
136
+ "errors": "replace",
137
+ "model_max_length": 32768,
138
+ "pad_token": "<|endoftext|>",
139
+ "padding_side": "right",
140
+ "processor_class": "Qwen2VLProcessor",
141
+ "split_special_tokens": false,
142
+ "tokenizer_class": "Qwen2Tokenizer",
143
+ "unk_token": null
144
+ }
llama_factory/v2-20241220/train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.88,
3
+ "total_flos": 5.97913280723288e+17,
4
+ "train_loss": 1.5128402180141873,
5
+ "train_runtime": 2593.9911,
6
+ "train_samples_per_second": 0.463,
7
+ "train_steps_per_second": 0.007
8
+ }
llama_factory/v2-20241220/trainer_log.jsonl ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"current_steps": 1, "total_steps": 18, "loss": 1.6395, "lr": 5e-05, "epoch": 0.16, "percentage": 5.56, "elapsed_time": "0:02:35", "remaining_time": "0:44:07"}
2
+ {"current_steps": 2, "total_steps": 18, "loss": 1.6454, "lr": 0.0001, "epoch": 0.32, "percentage": 11.11, "elapsed_time": "0:05:01", "remaining_time": "0:40:08"}
3
+ {"current_steps": 3, "total_steps": 18, "loss": 1.6031, "lr": 9.903926402016153e-05, "epoch": 0.48, "percentage": 16.67, "elapsed_time": "0:07:13", "remaining_time": "0:36:09"}
4
+ {"current_steps": 4, "total_steps": 18, "loss": 1.6443, "lr": 9.619397662556435e-05, "epoch": 0.64, "percentage": 22.22, "elapsed_time": "0:09:34", "remaining_time": "0:33:31"}
5
+ {"current_steps": 5, "total_steps": 18, "loss": 1.5788, "lr": 9.157348061512727e-05, "epoch": 0.8, "percentage": 27.78, "elapsed_time": "0:12:14", "remaining_time": "0:31:49"}
6
+ {"current_steps": 6, "total_steps": 18, "loss": 1.586, "lr": 8.535533905932738e-05, "epoch": 0.96, "percentage": 33.33, "elapsed_time": "0:14:59", "remaining_time": "0:29:59"}
7
+ {"current_steps": 7, "total_steps": 18, "loss": 1.5621, "lr": 7.777851165098012e-05, "epoch": 1.12, "percentage": 38.89, "elapsed_time": "0:17:18", "remaining_time": "0:27:11"}
8
+ {"current_steps": 8, "total_steps": 18, "loss": 1.4846, "lr": 6.91341716182545e-05, "epoch": 1.28, "percentage": 44.44, "elapsed_time": "0:20:05", "remaining_time": "0:25:07"}
9
+ {"current_steps": 9, "total_steps": 18, "loss": 1.488, "lr": 5.9754516100806423e-05, "epoch": 1.44, "percentage": 50.0, "elapsed_time": "0:22:24", "remaining_time": "0:22:24"}
10
+ {"current_steps": 10, "total_steps": 18, "loss": 1.4901, "lr": 5e-05, "epoch": 1.6, "percentage": 55.56, "elapsed_time": "0:24:58", "remaining_time": "0:19:58"}
11
+ {"current_steps": 11, "total_steps": 18, "loss": 1.4207, "lr": 4.0245483899193595e-05, "epoch": 1.76, "percentage": 61.11, "elapsed_time": "0:27:21", "remaining_time": "0:17:24"}
12
+ {"current_steps": 12, "total_steps": 18, "loss": 1.4992, "lr": 3.086582838174551e-05, "epoch": 1.92, "percentage": 66.67, "elapsed_time": "0:29:33", "remaining_time": "0:14:46"}
13
+ {"current_steps": 13, "total_steps": 18, "loss": 1.4965, "lr": 2.2221488349019903e-05, "epoch": 2.08, "percentage": 72.22, "elapsed_time": "0:31:52", "remaining_time": "0:12:15"}
14
+ {"current_steps": 14, "total_steps": 18, "loss": 1.4594, "lr": 1.4644660940672627e-05, "epoch": 2.24, "percentage": 77.78, "elapsed_time": "0:33:51", "remaining_time": "0:09:40"}
15
+ {"current_steps": 15, "total_steps": 18, "loss": 1.42, "lr": 8.426519384872733e-06, "epoch": 2.4, "percentage": 83.33, "elapsed_time": "0:35:57", "remaining_time": "0:07:11"}
16
+ {"current_steps": 16, "total_steps": 18, "loss": 1.411, "lr": 3.8060233744356633e-06, "epoch": 2.56, "percentage": 88.89, "elapsed_time": "0:38:11", "remaining_time": "0:04:46"}
17
+ {"current_steps": 17, "total_steps": 18, "loss": 1.4063, "lr": 9.607359798384785e-07, "epoch": 2.7199999999999998, "percentage": 94.44, "elapsed_time": "0:40:31", "remaining_time": "0:02:23"}
18
+ {"current_steps": 18, "total_steps": 18, "loss": 1.3964, "lr": 0.0, "epoch": 2.88, "percentage": 100.0, "elapsed_time": "0:42:45", "remaining_time": "0:00:00"}
19
+ {"current_steps": 18, "total_steps": 18, "epoch": 2.88, "percentage": 100.0, "elapsed_time": "0:43:13", "remaining_time": "0:00:00"}
llama_factory/v2-20241220/trainer_state.json ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.88,
5
+ "eval_steps": 500,
6
+ "global_step": 18,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.16,
13
+ "grad_norm": 0.49046701192855835,
14
+ "learning_rate": 5e-05,
15
+ "loss": 1.6395,
16
+ "step": 1
17
+ },
18
+ {
19
+ "epoch": 0.32,
20
+ "grad_norm": 0.47982165217399597,
21
+ "learning_rate": 0.0001,
22
+ "loss": 1.6454,
23
+ "step": 2
24
+ },
25
+ {
26
+ "epoch": 0.48,
27
+ "grad_norm": 0.3478511571884155,
28
+ "learning_rate": 9.903926402016153e-05,
29
+ "loss": 1.6031,
30
+ "step": 3
31
+ },
32
+ {
33
+ "epoch": 0.64,
34
+ "grad_norm": 0.465070515871048,
35
+ "learning_rate": 9.619397662556435e-05,
36
+ "loss": 1.6443,
37
+ "step": 4
38
+ },
39
+ {
40
+ "epoch": 0.8,
41
+ "grad_norm": 0.28730812668800354,
42
+ "learning_rate": 9.157348061512727e-05,
43
+ "loss": 1.5788,
44
+ "step": 5
45
+ },
46
+ {
47
+ "epoch": 0.96,
48
+ "grad_norm": 0.2595618665218353,
49
+ "learning_rate": 8.535533905932738e-05,
50
+ "loss": 1.586,
51
+ "step": 6
52
+ },
53
+ {
54
+ "epoch": 1.12,
55
+ "grad_norm": 0.22310923039913177,
56
+ "learning_rate": 7.777851165098012e-05,
57
+ "loss": 1.5621,
58
+ "step": 7
59
+ },
60
+ {
61
+ "epoch": 1.28,
62
+ "grad_norm": 0.20716898143291473,
63
+ "learning_rate": 6.91341716182545e-05,
64
+ "loss": 1.4846,
65
+ "step": 8
66
+ },
67
+ {
68
+ "epoch": 1.44,
69
+ "grad_norm": 0.20206721127033234,
70
+ "learning_rate": 5.9754516100806423e-05,
71
+ "loss": 1.488,
72
+ "step": 9
73
+ },
74
+ {
75
+ "epoch": 1.6,
76
+ "grad_norm": 0.1985960155725479,
77
+ "learning_rate": 5e-05,
78
+ "loss": 1.4901,
79
+ "step": 10
80
+ },
81
+ {
82
+ "epoch": 1.76,
83
+ "grad_norm": 0.18855780363082886,
84
+ "learning_rate": 4.0245483899193595e-05,
85
+ "loss": 1.4207,
86
+ "step": 11
87
+ },
88
+ {
89
+ "epoch": 1.92,
90
+ "grad_norm": 0.18942952156066895,
91
+ "learning_rate": 3.086582838174551e-05,
92
+ "loss": 1.4992,
93
+ "step": 12
94
+ },
95
+ {
96
+ "epoch": 2.08,
97
+ "grad_norm": 0.19068744778633118,
98
+ "learning_rate": 2.2221488349019903e-05,
99
+ "loss": 1.4965,
100
+ "step": 13
101
+ },
102
+ {
103
+ "epoch": 2.24,
104
+ "grad_norm": 0.1741170436143875,
105
+ "learning_rate": 1.4644660940672627e-05,
106
+ "loss": 1.4594,
107
+ "step": 14
108
+ },
109
+ {
110
+ "epoch": 2.4,
111
+ "grad_norm": 0.16722294688224792,
112
+ "learning_rate": 8.426519384872733e-06,
113
+ "loss": 1.42,
114
+ "step": 15
115
+ },
116
+ {
117
+ "epoch": 2.56,
118
+ "grad_norm": 0.1697998344898224,
119
+ "learning_rate": 3.8060233744356633e-06,
120
+ "loss": 1.411,
121
+ "step": 16
122
+ },
123
+ {
124
+ "epoch": 2.7199999999999998,
125
+ "grad_norm": 0.17374582588672638,
126
+ "learning_rate": 9.607359798384785e-07,
127
+ "loss": 1.4063,
128
+ "step": 17
129
+ },
130
+ {
131
+ "epoch": 2.88,
132
+ "grad_norm": 0.17088964581489563,
133
+ "learning_rate": 0.0,
134
+ "loss": 1.3964,
135
+ "step": 18
136
+ },
137
+ {
138
+ "epoch": 2.88,
139
+ "step": 18,
140
+ "total_flos": 5.97913280723288e+17,
141
+ "train_loss": 1.5128402180141873,
142
+ "train_runtime": 2593.9911,
143
+ "train_samples_per_second": 0.463,
144
+ "train_steps_per_second": 0.007
145
+ }
146
+ ],
147
+ "logging_steps": 1,
148
+ "max_steps": 18,
149
+ "num_input_tokens_seen": 0,
150
+ "num_train_epochs": 3,
151
+ "save_steps": 200,
152
+ "stateful_callbacks": {
153
+ "TrainerControl": {
154
+ "args": {
155
+ "should_epoch_stop": false,
156
+ "should_evaluate": false,
157
+ "should_log": false,
158
+ "should_save": true,
159
+ "should_training_stop": true
160
+ },
161
+ "attributes": {}
162
+ }
163
+ },
164
+ "total_flos": 5.97913280723288e+17,
165
+ "train_batch_size": 2,
166
+ "trial_name": null,
167
+ "trial_params": null
168
+ }
llama_factory/v2-20241220/training_loss.png ADDED
llama_factory/v2-20241220/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
llama_factory/v3-20241225-1_10/README.md ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ license: other
4
+ base_model: /disk/maziao/model-zoo/hf-repo/models--Qwen--Qwen2-VL-7B-Instruct
5
+ tags:
6
+ - llama-factory
7
+ - lora
8
+ - generated_from_trainer
9
+ model-index:
10
+ - name: v3-20241225-1_10
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # v3-20241225-1_10
18
+
19
+ This model is a fine-tuned version of [/disk/maziao/model-zoo/hf-repo/models--Qwen--Qwen2-VL-7B-Instruct](https://huggingface.co//disk/maziao/model-zoo/hf-repo/models--Qwen--Qwen2-VL-7B-Instruct) on the M^2RAR-20241225-MLLM-1_10 dataset.
20
+
21
+ ## Model description
22
+
23
+ More information needed
24
+
25
+ ## Intended uses & limitations
26
+
27
+ More information needed
28
+
29
+ ## Training and evaluation data
30
+
31
+ More information needed
32
+
33
+ ## Training procedure
34
+
35
+ ### Training hyperparameters
36
+
37
+ The following hyperparameters were used during training:
38
+ - learning_rate: 0.0001
39
+ - train_batch_size: 2
40
+ - eval_batch_size: 8
41
+ - seed: 42
42
+ - distributed_type: multi-GPU
43
+ - num_devices: 2
44
+ - gradient_accumulation_steps: 16
45
+ - total_train_batch_size: 64
46
+ - total_eval_batch_size: 16
47
+ - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
48
+ - lr_scheduler_type: cosine
49
+ - lr_scheduler_warmup_ratio: 0.1
50
+ - num_epochs: 3.0
51
+
52
+ ### Training results
53
+
54
+
55
+
56
+ ### Framework versions
57
+
58
+ - PEFT 0.12.0
59
+ - Transformers 4.46.1
60
+ - Pytorch 2.3.1+cu121
61
+ - Datasets 3.1.0
62
+ - Tokenizers 0.20.3
llama_factory/v3-20241225-1_10/adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/disk/maziao/model-zoo/hf-repo/models--Qwen--Qwen2-VL-7B-Instruct",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 256,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 128,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": "^(?!.*visual).*(?:o_proj|down_proj|q_proj|gate_proj|up_proj|v_proj|k_proj).*",
23
+ "task_type": "CAUSAL_LM",
24
+ "use_dora": false,
25
+ "use_rslora": false
26
+ }
llama_factory/v3-20241225-1_10/added_tokens.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|box_end|>": 151649,
3
+ "<|box_start|>": 151648,
4
+ "<|endoftext|>": 151643,
5
+ "<|im_end|>": 151645,
6
+ "<|im_start|>": 151644,
7
+ "<|image_pad|>": 151655,
8
+ "<|object_ref_end|>": 151647,
9
+ "<|object_ref_start|>": 151646,
10
+ "<|quad_end|>": 151651,
11
+ "<|quad_start|>": 151650,
12
+ "<|video_pad|>": 151656,
13
+ "<|vision_end|>": 151653,
14
+ "<|vision_pad|>": 151654,
15
+ "<|vision_start|>": 151652
16
+ }
llama_factory/v3-20241225-1_10/all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.9547511312217196,
3
+ "total_flos": 1.9929234183615939e+18,
4
+ "train_loss": 1.4086421551527801,
5
+ "train_runtime": 8849.9709,
6
+ "train_samples_per_second": 0.598,
7
+ "train_steps_per_second": 0.009
8
+ }
llama_factory/v3-20241225-1_10/chat_template.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
3
+ }
llama_factory/v3-20241225-1_10/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
llama_factory/v3-20241225-1_10/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": true,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.48145466,
8
+ 0.4578275,
9
+ 0.40821073
10
+ ],
11
+ "image_processor_type": "Qwen2VLImageProcessor",
12
+ "image_std": [
13
+ 0.26862954,
14
+ 0.26130258,
15
+ 0.27577711
16
+ ],
17
+ "max_pixels": 12845056,
18
+ "merge_size": 2,
19
+ "min_pixels": 3136,
20
+ "patch_size": 14,
21
+ "processor_class": "Qwen2VLProcessor",
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "max_pixels": 12845056,
26
+ "min_pixels": 3136
27
+ },
28
+ "temporal_patch_size": 2
29
+ }
llama_factory/v3-20241225-1_10/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
llama_factory/v3-20241225-1_10/tokenizer_config.json ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "151646": {
29
+ "content": "<|object_ref_start|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "151647": {
37
+ "content": "<|object_ref_end|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "151648": {
45
+ "content": "<|box_start|>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "151649": {
53
+ "content": "<|box_end|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "151650": {
61
+ "content": "<|quad_start|>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "151651": {
69
+ "content": "<|quad_end|>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "151652": {
77
+ "content": "<|vision_start|>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "151653": {
85
+ "content": "<|vision_end|>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "151654": {
93
+ "content": "<|vision_pad|>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "151655": {
101
+ "content": "<|image_pad|>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "151656": {
109
+ "content": "<|video_pad|>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ }
116
+ },
117
+ "additional_special_tokens": [
118
+ "<|im_start|>",
119
+ "<|im_end|>",
120
+ "<|object_ref_start|>",
121
+ "<|object_ref_end|>",
122
+ "<|box_start|>",
123
+ "<|box_end|>",
124
+ "<|quad_start|>",
125
+ "<|quad_end|>",
126
+ "<|vision_start|>",
127
+ "<|vision_end|>",
128
+ "<|vision_pad|>",
129
+ "<|image_pad|>",
130
+ "<|video_pad|>"
131
+ ],
132
+ "bos_token": null,
133
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
134
+ "clean_up_tokenization_spaces": false,
135
+ "eos_token": "<|im_end|>",
136
+ "errors": "replace",
137
+ "model_max_length": 32768,
138
+ "pad_token": "<|endoftext|>",
139
+ "padding_side": "right",
140
+ "processor_class": "Qwen2VLProcessor",
141
+ "split_special_tokens": false,
142
+ "tokenizer_class": "Qwen2Tokenizer",
143
+ "unk_token": null
144
+ }
llama_factory/v3-20241225-1_10/train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.9547511312217196,
3
+ "total_flos": 1.9929234183615939e+18,
4
+ "train_loss": 1.4086421551527801,
5
+ "train_runtime": 8849.9709,
6
+ "train_samples_per_second": 0.598,
7
+ "train_steps_per_second": 0.009
8
+ }
llama_factory/v3-20241225-1_10/trainer_log.jsonl ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"current_steps": 1, "total_steps": 81, "loss": 1.6301, "lr": 1.1111111111111112e-05, "epoch": 0.03619909502262444, "percentage": 1.23, "elapsed_time": "0:01:57", "remaining_time": "2:36:51"}
2
+ {"current_steps": 2, "total_steps": 81, "loss": 1.6279, "lr": 2.2222222222222223e-05, "epoch": 0.07239819004524888, "percentage": 2.47, "elapsed_time": "0:03:51", "remaining_time": "2:32:14"}
3
+ {"current_steps": 3, "total_steps": 81, "loss": 1.5965, "lr": 3.3333333333333335e-05, "epoch": 0.1085972850678733, "percentage": 3.7, "elapsed_time": "0:05:22", "remaining_time": "2:19:46"}
4
+ {"current_steps": 4, "total_steps": 81, "loss": 1.6076, "lr": 4.4444444444444447e-05, "epoch": 0.14479638009049775, "percentage": 4.94, "elapsed_time": "0:07:16", "remaining_time": "2:19:57"}
5
+ {"current_steps": 5, "total_steps": 81, "loss": 1.6144, "lr": 5.555555555555556e-05, "epoch": 0.18099547511312217, "percentage": 6.17, "elapsed_time": "0:09:17", "remaining_time": "2:21:09"}
6
+ {"current_steps": 6, "total_steps": 81, "loss": 1.5657, "lr": 6.666666666666667e-05, "epoch": 0.2171945701357466, "percentage": 7.41, "elapsed_time": "0:11:14", "remaining_time": "2:20:31"}
7
+ {"current_steps": 7, "total_steps": 81, "loss": 1.5651, "lr": 7.777777777777778e-05, "epoch": 0.25339366515837103, "percentage": 8.64, "elapsed_time": "0:12:54", "remaining_time": "2:16:29"}
8
+ {"current_steps": 8, "total_steps": 81, "loss": 1.5488, "lr": 8.888888888888889e-05, "epoch": 0.2895927601809955, "percentage": 9.88, "elapsed_time": "0:14:46", "remaining_time": "2:14:46"}
9
+ {"current_steps": 9, "total_steps": 81, "loss": 1.5898, "lr": 0.0001, "epoch": 0.3257918552036199, "percentage": 11.11, "elapsed_time": "0:16:59", "remaining_time": "2:15:59"}
10
+ {"current_steps": 10, "total_steps": 81, "loss": 1.5448, "lr": 9.99524110790929e-05, "epoch": 0.36199095022624433, "percentage": 12.35, "elapsed_time": "0:18:38", "remaining_time": "2:12:19"}
11
+ {"current_steps": 11, "total_steps": 81, "loss": 1.5285, "lr": 9.980973490458728e-05, "epoch": 0.39819004524886875, "percentage": 13.58, "elapsed_time": "0:20:38", "remaining_time": "2:11:19"}
12
+ {"current_steps": 12, "total_steps": 81, "loss": 1.5645, "lr": 9.957224306869053e-05, "epoch": 0.4343891402714932, "percentage": 14.81, "elapsed_time": "0:21:58", "remaining_time": "2:06:21"}
13
+ {"current_steps": 13, "total_steps": 81, "loss": 1.4987, "lr": 9.924038765061042e-05, "epoch": 0.47058823529411764, "percentage": 16.05, "elapsed_time": "0:23:58", "remaining_time": "2:05:23"}
14
+ {"current_steps": 14, "total_steps": 81, "loss": 1.4982, "lr": 9.881480035599667e-05, "epoch": 0.5067873303167421, "percentage": 17.28, "elapsed_time": "0:25:56", "remaining_time": "2:04:08"}
15
+ {"current_steps": 15, "total_steps": 81, "loss": 1.5061, "lr": 9.829629131445342e-05, "epoch": 0.5429864253393665, "percentage": 18.52, "elapsed_time": "0:27:36", "remaining_time": "2:01:29"}
16
+ {"current_steps": 16, "total_steps": 81, "loss": 1.4963, "lr": 9.768584753741134e-05, "epoch": 0.579185520361991, "percentage": 19.75, "elapsed_time": "0:29:04", "remaining_time": "1:58:08"}
17
+ {"current_steps": 17, "total_steps": 81, "loss": 1.5242, "lr": 9.698463103929542e-05, "epoch": 0.6153846153846154, "percentage": 20.99, "elapsed_time": "0:31:04", "remaining_time": "1:56:57"}
18
+ {"current_steps": 18, "total_steps": 81, "loss": 1.49, "lr": 9.619397662556435e-05, "epoch": 0.6515837104072398, "percentage": 22.22, "elapsed_time": "0:33:00", "remaining_time": "1:55:32"}
19
+ {"current_steps": 19, "total_steps": 81, "loss": 1.5342, "lr": 9.53153893518325e-05, "epoch": 0.6877828054298643, "percentage": 23.46, "elapsed_time": "0:34:40", "remaining_time": "1:53:07"}
20
+ {"current_steps": 20, "total_steps": 81, "loss": 1.5256, "lr": 9.435054165891109e-05, "epoch": 0.7239819004524887, "percentage": 24.69, "elapsed_time": "0:36:06", "remaining_time": "1:50:07"}
21
+ {"current_steps": 21, "total_steps": 81, "loss": 1.5167, "lr": 9.330127018922194e-05, "epoch": 0.7601809954751131, "percentage": 25.93, "elapsed_time": "0:37:59", "remaining_time": "1:48:33"}
22
+ {"current_steps": 22, "total_steps": 81, "loss": 1.5255, "lr": 9.21695722906443e-05, "epoch": 0.7963800904977375, "percentage": 27.16, "elapsed_time": "0:39:30", "remaining_time": "1:45:57"}
23
+ {"current_steps": 23, "total_steps": 81, "loss": 1.5062, "lr": 9.09576022144496e-05, "epoch": 0.832579185520362, "percentage": 28.4, "elapsed_time": "0:41:33", "remaining_time": "1:44:47"}
24
+ {"current_steps": 24, "total_steps": 81, "loss": 1.4894, "lr": 8.966766701456177e-05, "epoch": 0.8687782805429864, "percentage": 29.63, "elapsed_time": "0:43:16", "remaining_time": "1:42:46"}
25
+ {"current_steps": 25, "total_steps": 81, "loss": 1.4553, "lr": 8.83022221559489e-05, "epoch": 0.9049773755656109, "percentage": 30.86, "elapsed_time": "0:45:18", "remaining_time": "1:41:29"}
26
+ {"current_steps": 26, "total_steps": 81, "loss": 1.4824, "lr": 8.68638668405062e-05, "epoch": 0.9411764705882353, "percentage": 32.1, "elapsed_time": "0:47:17", "remaining_time": "1:40:03"}
27
+ {"current_steps": 27, "total_steps": 81, "loss": 1.5244, "lr": 8.535533905932738e-05, "epoch": 0.9773755656108597, "percentage": 33.33, "elapsed_time": "0:49:09", "remaining_time": "1:38:18"}
28
+ {"current_steps": 28, "total_steps": 81, "loss": 1.4473, "lr": 8.377951038078302e-05, "epoch": 1.0248868778280542, "percentage": 34.57, "elapsed_time": "0:50:46", "remaining_time": "1:36:06"}
29
+ {"current_steps": 29, "total_steps": 81, "loss": 1.4101, "lr": 8.213938048432697e-05, "epoch": 1.0610859728506787, "percentage": 35.8, "elapsed_time": "0:52:32", "remaining_time": "1:34:13"}
30
+ {"current_steps": 30, "total_steps": 81, "loss": 1.4621, "lr": 8.043807145043604e-05, "epoch": 1.0972850678733033, "percentage": 37.04, "elapsed_time": "0:54:57", "remaining_time": "1:33:26"}
31
+ {"current_steps": 31, "total_steps": 81, "loss": 1.4011, "lr": 7.86788218175523e-05, "epoch": 1.1334841628959276, "percentage": 38.27, "elapsed_time": "0:56:27", "remaining_time": "1:31:03"}
32
+ {"current_steps": 32, "total_steps": 81, "loss": 1.4529, "lr": 7.68649804173412e-05, "epoch": 1.169683257918552, "percentage": 39.51, "elapsed_time": "0:58:11", "remaining_time": "1:29:06"}
33
+ {"current_steps": 33, "total_steps": 81, "loss": 1.4195, "lr": 7.500000000000001e-05, "epoch": 1.2058823529411764, "percentage": 40.74, "elapsed_time": "0:59:41", "remaining_time": "1:26:48"}
34
+ {"current_steps": 34, "total_steps": 81, "loss": 1.4106, "lr": 7.308743066175172e-05, "epoch": 1.242081447963801, "percentage": 41.98, "elapsed_time": "1:01:39", "remaining_time": "1:25:14"}
35
+ {"current_steps": 35, "total_steps": 81, "loss": 1.3482, "lr": 7.113091308703498e-05, "epoch": 1.2782805429864252, "percentage": 43.21, "elapsed_time": "1:03:15", "remaining_time": "1:23:08"}
36
+ {"current_steps": 36, "total_steps": 81, "loss": 1.4463, "lr": 6.91341716182545e-05, "epoch": 1.3144796380090498, "percentage": 44.44, "elapsed_time": "1:05:39", "remaining_time": "1:22:03"}
37
+ {"current_steps": 37, "total_steps": 81, "loss": 1.3528, "lr": 6.710100716628344e-05, "epoch": 1.3506787330316743, "percentage": 45.68, "elapsed_time": "1:07:30", "remaining_time": "1:20:17"}
38
+ {"current_steps": 38, "total_steps": 81, "loss": 1.3881, "lr": 6.503528997521366e-05, "epoch": 1.3868778280542986, "percentage": 46.91, "elapsed_time": "1:09:10", "remaining_time": "1:18:16"}
39
+ {"current_steps": 39, "total_steps": 81, "loss": 1.374, "lr": 6.294095225512603e-05, "epoch": 1.4230769230769231, "percentage": 48.15, "elapsed_time": "1:10:59", "remaining_time": "1:16:27"}
40
+ {"current_steps": 40, "total_steps": 81, "loss": 1.3717, "lr": 6.0821980696905146e-05, "epoch": 1.4592760180995474, "percentage": 49.38, "elapsed_time": "1:12:39", "remaining_time": "1:14:28"}
41
+ {"current_steps": 41, "total_steps": 81, "loss": 1.37, "lr": 5.868240888334653e-05, "epoch": 1.495475113122172, "percentage": 50.62, "elapsed_time": "1:14:09", "remaining_time": "1:12:21"}
42
+ {"current_steps": 42, "total_steps": 81, "loss": 1.359, "lr": 5.6526309611002594e-05, "epoch": 1.5316742081447963, "percentage": 51.85, "elapsed_time": "1:15:42", "remaining_time": "1:10:17"}
43
+ {"current_steps": 43, "total_steps": 81, "loss": 1.421, "lr": 5.435778713738292e-05, "epoch": 1.5678733031674208, "percentage": 53.09, "elapsed_time": "1:17:41", "remaining_time": "1:08:39"}
44
+ {"current_steps": 44, "total_steps": 81, "loss": 1.4027, "lr": 5.218096936826681e-05, "epoch": 1.6040723981900453, "percentage": 54.32, "elapsed_time": "1:19:38", "remaining_time": "1:06:58"}
45
+ {"current_steps": 45, "total_steps": 81, "loss": 1.3634, "lr": 5e-05, "epoch": 1.6402714932126696, "percentage": 55.56, "elapsed_time": "1:21:20", "remaining_time": "1:05:04"}
46
+ {"current_steps": 46, "total_steps": 81, "loss": 1.361, "lr": 4.781903063173321e-05, "epoch": 1.6764705882352942, "percentage": 56.79, "elapsed_time": "1:22:56", "remaining_time": "1:03:06"}
47
+ {"current_steps": 47, "total_steps": 81, "loss": 1.3613, "lr": 4.564221286261709e-05, "epoch": 1.7126696832579187, "percentage": 58.02, "elapsed_time": "1:24:43", "remaining_time": "1:01:17"}
48
+ {"current_steps": 48, "total_steps": 81, "loss": 1.367, "lr": 4.347369038899744e-05, "epoch": 1.748868778280543, "percentage": 59.26, "elapsed_time": "1:26:23", "remaining_time": "0:59:23"}
49
+ {"current_steps": 49, "total_steps": 81, "loss": 1.3626, "lr": 4.131759111665349e-05, "epoch": 1.7850678733031673, "percentage": 60.49, "elapsed_time": "1:28:19", "remaining_time": "0:57:40"}
50
+ {"current_steps": 50, "total_steps": 81, "loss": 1.3847, "lr": 3.917801930309486e-05, "epoch": 1.8212669683257918, "percentage": 61.73, "elapsed_time": "1:30:31", "remaining_time": "0:56:07"}
51
+ {"current_steps": 51, "total_steps": 81, "loss": 1.3584, "lr": 3.705904774487396e-05, "epoch": 1.8574660633484164, "percentage": 62.96, "elapsed_time": "1:32:36", "remaining_time": "0:54:28"}
52
+ {"current_steps": 52, "total_steps": 81, "loss": 1.393, "lr": 3.4964710024786354e-05, "epoch": 1.8936651583710407, "percentage": 64.2, "elapsed_time": "1:34:40", "remaining_time": "0:52:47"}
53
+ {"current_steps": 53, "total_steps": 81, "loss": 1.3333, "lr": 3.289899283371657e-05, "epoch": 1.9298642533936652, "percentage": 65.43, "elapsed_time": "1:36:20", "remaining_time": "0:50:53"}
54
+ {"current_steps": 54, "total_steps": 81, "loss": 1.3567, "lr": 3.086582838174551e-05, "epoch": 1.9660633484162897, "percentage": 66.67, "elapsed_time": "1:38:07", "remaining_time": "0:49:03"}
55
+ {"current_steps": 55, "total_steps": 81, "loss": 1.3388, "lr": 2.886908691296504e-05, "epoch": 2.013574660633484, "percentage": 67.9, "elapsed_time": "1:40:11", "remaining_time": "0:47:21"}
56
+ {"current_steps": 56, "total_steps": 81, "loss": 1.3288, "lr": 2.6912569338248315e-05, "epoch": 2.0497737556561084, "percentage": 69.14, "elapsed_time": "1:41:57", "remaining_time": "0:45:31"}
57
+ {"current_steps": 57, "total_steps": 81, "loss": 1.2998, "lr": 2.500000000000001e-05, "epoch": 2.085972850678733, "percentage": 70.37, "elapsed_time": "1:43:55", "remaining_time": "0:43:45"}
58
+ {"current_steps": 58, "total_steps": 81, "loss": 1.2406, "lr": 2.3135019582658802e-05, "epoch": 2.1221719457013575, "percentage": 71.6, "elapsed_time": "1:45:51", "remaining_time": "0:41:58"}
59
+ {"current_steps": 59, "total_steps": 81, "loss": 1.2579, "lr": 2.132117818244771e-05, "epoch": 2.158371040723982, "percentage": 72.84, "elapsed_time": "1:47:46", "remaining_time": "0:40:11"}
60
+ {"current_steps": 60, "total_steps": 81, "loss": 1.2737, "lr": 1.9561928549563968e-05, "epoch": 2.1945701357466065, "percentage": 74.07, "elapsed_time": "1:49:12", "remaining_time": "0:38:13"}
61
+ {"current_steps": 61, "total_steps": 81, "loss": 1.3014, "lr": 1.7860619515673033e-05, "epoch": 2.230769230769231, "percentage": 75.31, "elapsed_time": "1:51:16", "remaining_time": "0:36:28"}
62
+ {"current_steps": 62, "total_steps": 81, "loss": 1.3528, "lr": 1.622048961921699e-05, "epoch": 2.266968325791855, "percentage": 76.54, "elapsed_time": "1:53:05", "remaining_time": "0:34:39"}
63
+ {"current_steps": 63, "total_steps": 81, "loss": 1.2639, "lr": 1.4644660940672627e-05, "epoch": 2.3031674208144794, "percentage": 77.78, "elapsed_time": "1:54:54", "remaining_time": "0:32:49"}
64
+ {"current_steps": 64, "total_steps": 81, "loss": 1.3244, "lr": 1.3136133159493802e-05, "epoch": 2.339366515837104, "percentage": 79.01, "elapsed_time": "1:57:14", "remaining_time": "0:31:08"}
65
+ {"current_steps": 65, "total_steps": 81, "loss": 1.3534, "lr": 1.1697777844051105e-05, "epoch": 2.3755656108597285, "percentage": 80.25, "elapsed_time": "1:59:18", "remaining_time": "0:29:22"}
66
+ {"current_steps": 66, "total_steps": 81, "loss": 1.2637, "lr": 1.0332332985438248e-05, "epoch": 2.411764705882353, "percentage": 81.48, "elapsed_time": "2:00:39", "remaining_time": "0:27:25"}
67
+ {"current_steps": 67, "total_steps": 81, "loss": 1.3538, "lr": 9.042397785550405e-06, "epoch": 2.4479638009049776, "percentage": 82.72, "elapsed_time": "2:02:40", "remaining_time": "0:25:37"}
68
+ {"current_steps": 68, "total_steps": 81, "loss": 1.3247, "lr": 7.830427709355725e-06, "epoch": 2.484162895927602, "percentage": 83.95, "elapsed_time": "2:04:17", "remaining_time": "0:23:45"}
69
+ {"current_steps": 69, "total_steps": 81, "loss": 1.2721, "lr": 6.698729810778065e-06, "epoch": 2.520361990950226, "percentage": 85.19, "elapsed_time": "2:06:07", "remaining_time": "0:21:56"}
70
+ {"current_steps": 70, "total_steps": 81, "loss": 1.2966, "lr": 5.649458341088915e-06, "epoch": 2.5565610859728505, "percentage": 86.42, "elapsed_time": "2:07:32", "remaining_time": "0:20:02"}
71
+ {"current_steps": 71, "total_steps": 81, "loss": 1.2594, "lr": 4.684610648167503e-06, "epoch": 2.5927601809954752, "percentage": 87.65, "elapsed_time": "2:09:29", "remaining_time": "0:18:14"}
72
+ {"current_steps": 72, "total_steps": 81, "loss": 1.2951, "lr": 3.8060233744356633e-06, "epoch": 2.6289592760180995, "percentage": 88.89, "elapsed_time": "2:11:02", "remaining_time": "0:16:22"}
73
+ {"current_steps": 73, "total_steps": 81, "loss": 1.3408, "lr": 3.0153689607045845e-06, "epoch": 2.665158371040724, "percentage": 90.12, "elapsed_time": "2:13:04", "remaining_time": "0:14:35"}
74
+ {"current_steps": 74, "total_steps": 81, "loss": 1.2951, "lr": 2.314152462588659e-06, "epoch": 2.7013574660633486, "percentage": 91.36, "elapsed_time": "2:15:00", "remaining_time": "0:12:46"}
75
+ {"current_steps": 75, "total_steps": 81, "loss": 1.2927, "lr": 1.70370868554659e-06, "epoch": 2.737556561085973, "percentage": 92.59, "elapsed_time": "2:16:56", "remaining_time": "0:10:57"}
76
+ {"current_steps": 76, "total_steps": 81, "loss": 1.3133, "lr": 1.1851996440033319e-06, "epoch": 2.773755656108597, "percentage": 93.83, "elapsed_time": "2:18:48", "remaining_time": "0:09:07"}
77
+ {"current_steps": 77, "total_steps": 81, "loss": 1.3135, "lr": 7.596123493895991e-07, "epoch": 2.8099547511312215, "percentage": 95.06, "elapsed_time": "2:20:21", "remaining_time": "0:07:17"}
78
+ {"current_steps": 78, "total_steps": 81, "loss": 1.2631, "lr": 4.277569313094809e-07, "epoch": 2.8461538461538463, "percentage": 96.3, "elapsed_time": "2:22:03", "remaining_time": "0:05:27"}
79
+ {"current_steps": 79, "total_steps": 81, "loss": 1.2773, "lr": 1.9026509541272275e-07, "epoch": 2.8823529411764706, "percentage": 97.53, "elapsed_time": "2:23:50", "remaining_time": "0:03:38"}
80
+ {"current_steps": 80, "total_steps": 81, "loss": 1.2997, "lr": 4.7588920907110094e-08, "epoch": 2.918552036199095, "percentage": 98.77, "elapsed_time": "2:25:32", "remaining_time": "0:01:49"}
81
+ {"current_steps": 81, "total_steps": 81, "loss": 1.268, "lr": 0.0, "epoch": 2.9547511312217196, "percentage": 100.0, "elapsed_time": "2:26:50", "remaining_time": "0:00:00"}
82
+ {"current_steps": 81, "total_steps": 81, "epoch": 2.9547511312217196, "percentage": 100.0, "elapsed_time": "2:27:29", "remaining_time": "0:00:00"}
llama_factory/v3-20241225-1_10/trainer_state.json ADDED
@@ -0,0 +1,609 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.9547511312217196,
5
+ "eval_steps": 500,
6
+ "global_step": 81,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03619909502262444,
13
+ "grad_norm": 0.4522567093372345,
14
+ "learning_rate": 1.1111111111111112e-05,
15
+ "loss": 1.6301,
16
+ "step": 1
17
+ },
18
+ {
19
+ "epoch": 0.07239819004524888,
20
+ "grad_norm": 0.4915740489959717,
21
+ "learning_rate": 2.2222222222222223e-05,
22
+ "loss": 1.6279,
23
+ "step": 2
24
+ },
25
+ {
26
+ "epoch": 0.1085972850678733,
27
+ "grad_norm": 0.4293298125267029,
28
+ "learning_rate": 3.3333333333333335e-05,
29
+ "loss": 1.5965,
30
+ "step": 3
31
+ },
32
+ {
33
+ "epoch": 0.14479638009049775,
34
+ "grad_norm": 0.30257710814476013,
35
+ "learning_rate": 4.4444444444444447e-05,
36
+ "loss": 1.6076,
37
+ "step": 4
38
+ },
39
+ {
40
+ "epoch": 0.18099547511312217,
41
+ "grad_norm": 0.338920921087265,
42
+ "learning_rate": 5.555555555555556e-05,
43
+ "loss": 1.6144,
44
+ "step": 5
45
+ },
46
+ {
47
+ "epoch": 0.2171945701357466,
48
+ "grad_norm": 0.393881618976593,
49
+ "learning_rate": 6.666666666666667e-05,
50
+ "loss": 1.5657,
51
+ "step": 6
52
+ },
53
+ {
54
+ "epoch": 0.25339366515837103,
55
+ "grad_norm": 0.2913854718208313,
56
+ "learning_rate": 7.777777777777778e-05,
57
+ "loss": 1.5651,
58
+ "step": 7
59
+ },
60
+ {
61
+ "epoch": 0.2895927601809955,
62
+ "grad_norm": 0.2615966498851776,
63
+ "learning_rate": 8.888888888888889e-05,
64
+ "loss": 1.5488,
65
+ "step": 8
66
+ },
67
+ {
68
+ "epoch": 0.3257918552036199,
69
+ "grad_norm": 0.2777751386165619,
70
+ "learning_rate": 0.0001,
71
+ "loss": 1.5898,
72
+ "step": 9
73
+ },
74
+ {
75
+ "epoch": 0.36199095022624433,
76
+ "grad_norm": 0.24253539741039276,
77
+ "learning_rate": 9.99524110790929e-05,
78
+ "loss": 1.5448,
79
+ "step": 10
80
+ },
81
+ {
82
+ "epoch": 0.39819004524886875,
83
+ "grad_norm": 0.21929268538951874,
84
+ "learning_rate": 9.980973490458728e-05,
85
+ "loss": 1.5285,
86
+ "step": 11
87
+ },
88
+ {
89
+ "epoch": 0.4343891402714932,
90
+ "grad_norm": 0.2206713855266571,
91
+ "learning_rate": 9.957224306869053e-05,
92
+ "loss": 1.5645,
93
+ "step": 12
94
+ },
95
+ {
96
+ "epoch": 0.47058823529411764,
97
+ "grad_norm": 0.19960802793502808,
98
+ "learning_rate": 9.924038765061042e-05,
99
+ "loss": 1.4987,
100
+ "step": 13
101
+ },
102
+ {
103
+ "epoch": 0.5067873303167421,
104
+ "grad_norm": 0.20623129606246948,
105
+ "learning_rate": 9.881480035599667e-05,
106
+ "loss": 1.4982,
107
+ "step": 14
108
+ },
109
+ {
110
+ "epoch": 0.5429864253393665,
111
+ "grad_norm": 0.2134510725736618,
112
+ "learning_rate": 9.829629131445342e-05,
113
+ "loss": 1.5061,
114
+ "step": 15
115
+ },
116
+ {
117
+ "epoch": 0.579185520361991,
118
+ "grad_norm": 0.20414741337299347,
119
+ "learning_rate": 9.768584753741134e-05,
120
+ "loss": 1.4963,
121
+ "step": 16
122
+ },
123
+ {
124
+ "epoch": 0.6153846153846154,
125
+ "grad_norm": 0.19349144399166107,
126
+ "learning_rate": 9.698463103929542e-05,
127
+ "loss": 1.5242,
128
+ "step": 17
129
+ },
130
+ {
131
+ "epoch": 0.6515837104072398,
132
+ "grad_norm": 0.19233722984790802,
133
+ "learning_rate": 9.619397662556435e-05,
134
+ "loss": 1.49,
135
+ "step": 18
136
+ },
137
+ {
138
+ "epoch": 0.6877828054298643,
139
+ "grad_norm": 0.1920696496963501,
140
+ "learning_rate": 9.53153893518325e-05,
141
+ "loss": 1.5342,
142
+ "step": 19
143
+ },
144
+ {
145
+ "epoch": 0.7239819004524887,
146
+ "grad_norm": 0.18896055221557617,
147
+ "learning_rate": 9.435054165891109e-05,
148
+ "loss": 1.5256,
149
+ "step": 20
150
+ },
151
+ {
152
+ "epoch": 0.7601809954751131,
153
+ "grad_norm": 0.18282486498355865,
154
+ "learning_rate": 9.330127018922194e-05,
155
+ "loss": 1.5167,
156
+ "step": 21
157
+ },
158
+ {
159
+ "epoch": 0.7963800904977375,
160
+ "grad_norm": 0.19715480506420135,
161
+ "learning_rate": 9.21695722906443e-05,
162
+ "loss": 1.5255,
163
+ "step": 22
164
+ },
165
+ {
166
+ "epoch": 0.832579185520362,
167
+ "grad_norm": 0.17317330837249756,
168
+ "learning_rate": 9.09576022144496e-05,
169
+ "loss": 1.5062,
170
+ "step": 23
171
+ },
172
+ {
173
+ "epoch": 0.8687782805429864,
174
+ "grad_norm": 0.18779075145721436,
175
+ "learning_rate": 8.966766701456177e-05,
176
+ "loss": 1.4894,
177
+ "step": 24
178
+ },
179
+ {
180
+ "epoch": 0.9049773755656109,
181
+ "grad_norm": 0.1841159462928772,
182
+ "learning_rate": 8.83022221559489e-05,
183
+ "loss": 1.4553,
184
+ "step": 25
185
+ },
186
+ {
187
+ "epoch": 0.9411764705882353,
188
+ "grad_norm": 0.1778552532196045,
189
+ "learning_rate": 8.68638668405062e-05,
190
+ "loss": 1.4824,
191
+ "step": 26
192
+ },
193
+ {
194
+ "epoch": 0.9773755656108597,
195
+ "grad_norm": 0.18053092062473297,
196
+ "learning_rate": 8.535533905932738e-05,
197
+ "loss": 1.5244,
198
+ "step": 27
199
+ },
200
+ {
201
+ "epoch": 1.0248868778280542,
202
+ "grad_norm": 0.17003145813941956,
203
+ "learning_rate": 8.377951038078302e-05,
204
+ "loss": 1.4473,
205
+ "step": 28
206
+ },
207
+ {
208
+ "epoch": 1.0610859728506787,
209
+ "grad_norm": 0.1815570592880249,
210
+ "learning_rate": 8.213938048432697e-05,
211
+ "loss": 1.4101,
212
+ "step": 29
213
+ },
214
+ {
215
+ "epoch": 1.0972850678733033,
216
+ "grad_norm": 0.17359577119350433,
217
+ "learning_rate": 8.043807145043604e-05,
218
+ "loss": 1.4621,
219
+ "step": 30
220
+ },
221
+ {
222
+ "epoch": 1.1334841628959276,
223
+ "grad_norm": 0.18072284758090973,
224
+ "learning_rate": 7.86788218175523e-05,
225
+ "loss": 1.4011,
226
+ "step": 31
227
+ },
228
+ {
229
+ "epoch": 1.169683257918552,
230
+ "grad_norm": 0.18027463555335999,
231
+ "learning_rate": 7.68649804173412e-05,
232
+ "loss": 1.4529,
233
+ "step": 32
234
+ },
235
+ {
236
+ "epoch": 1.2058823529411764,
237
+ "grad_norm": 0.18900279700756073,
238
+ "learning_rate": 7.500000000000001e-05,
239
+ "loss": 1.4195,
240
+ "step": 33
241
+ },
242
+ {
243
+ "epoch": 1.242081447963801,
244
+ "grad_norm": 0.17625436186790466,
245
+ "learning_rate": 7.308743066175172e-05,
246
+ "loss": 1.4106,
247
+ "step": 34
248
+ },
249
+ {
250
+ "epoch": 1.2782805429864252,
251
+ "grad_norm": 0.18460573256015778,
252
+ "learning_rate": 7.113091308703498e-05,
253
+ "loss": 1.3482,
254
+ "step": 35
255
+ },
256
+ {
257
+ "epoch": 1.3144796380090498,
258
+ "grad_norm": 0.1863405853509903,
259
+ "learning_rate": 6.91341716182545e-05,
260
+ "loss": 1.4463,
261
+ "step": 36
262
+ },
263
+ {
264
+ "epoch": 1.3506787330316743,
265
+ "grad_norm": 0.188517764210701,
266
+ "learning_rate": 6.710100716628344e-05,
267
+ "loss": 1.3528,
268
+ "step": 37
269
+ },
270
+ {
271
+ "epoch": 1.3868778280542986,
272
+ "grad_norm": 0.1890035718679428,
273
+ "learning_rate": 6.503528997521366e-05,
274
+ "loss": 1.3881,
275
+ "step": 38
276
+ },
277
+ {
278
+ "epoch": 1.4230769230769231,
279
+ "grad_norm": 0.19418419897556305,
280
+ "learning_rate": 6.294095225512603e-05,
281
+ "loss": 1.374,
282
+ "step": 39
283
+ },
284
+ {
285
+ "epoch": 1.4592760180995474,
286
+ "grad_norm": 0.1959460824728012,
287
+ "learning_rate": 6.0821980696905146e-05,
288
+ "loss": 1.3717,
289
+ "step": 40
290
+ },
291
+ {
292
+ "epoch": 1.495475113122172,
293
+ "grad_norm": 0.20840130746364594,
294
+ "learning_rate": 5.868240888334653e-05,
295
+ "loss": 1.37,
296
+ "step": 41
297
+ },
298
+ {
299
+ "epoch": 1.5316742081447963,
300
+ "grad_norm": 0.21801875531673431,
301
+ "learning_rate": 5.6526309611002594e-05,
302
+ "loss": 1.359,
303
+ "step": 42
304
+ },
305
+ {
306
+ "epoch": 1.5678733031674208,
307
+ "grad_norm": 0.20987379550933838,
308
+ "learning_rate": 5.435778713738292e-05,
309
+ "loss": 1.421,
310
+ "step": 43
311
+ },
312
+ {
313
+ "epoch": 1.6040723981900453,
314
+ "grad_norm": 0.19880157709121704,
315
+ "learning_rate": 5.218096936826681e-05,
316
+ "loss": 1.4027,
317
+ "step": 44
318
+ },
319
+ {
320
+ "epoch": 1.6402714932126696,
321
+ "grad_norm": 0.20954720675945282,
322
+ "learning_rate": 5e-05,
323
+ "loss": 1.3634,
324
+ "step": 45
325
+ },
326
+ {
327
+ "epoch": 1.6764705882352942,
328
+ "grad_norm": 0.20119519531726837,
329
+ "learning_rate": 4.781903063173321e-05,
330
+ "loss": 1.361,
331
+ "step": 46
332
+ },
333
+ {
334
+ "epoch": 1.7126696832579187,
335
+ "grad_norm": 0.20504230260849,
336
+ "learning_rate": 4.564221286261709e-05,
337
+ "loss": 1.3613,
338
+ "step": 47
339
+ },
340
+ {
341
+ "epoch": 1.748868778280543,
342
+ "grad_norm": 0.20827656984329224,
343
+ "learning_rate": 4.347369038899744e-05,
344
+ "loss": 1.367,
345
+ "step": 48
346
+ },
347
+ {
348
+ "epoch": 1.7850678733031673,
349
+ "grad_norm": 0.21221719682216644,
350
+ "learning_rate": 4.131759111665349e-05,
351
+ "loss": 1.3626,
352
+ "step": 49
353
+ },
354
+ {
355
+ "epoch": 1.8212669683257918,
356
+ "grad_norm": 0.21247024834156036,
357
+ "learning_rate": 3.917801930309486e-05,
358
+ "loss": 1.3847,
359
+ "step": 50
360
+ },
361
+ {
362
+ "epoch": 1.8574660633484164,
363
+ "grad_norm": 0.21873697638511658,
364
+ "learning_rate": 3.705904774487396e-05,
365
+ "loss": 1.3584,
366
+ "step": 51
367
+ },
368
+ {
369
+ "epoch": 1.8936651583710407,
370
+ "grad_norm": 0.21476612985134125,
371
+ "learning_rate": 3.4964710024786354e-05,
372
+ "loss": 1.393,
373
+ "step": 52
374
+ },
375
+ {
376
+ "epoch": 1.9298642533936652,
377
+ "grad_norm": 0.21541458368301392,
378
+ "learning_rate": 3.289899283371657e-05,
379
+ "loss": 1.3333,
380
+ "step": 53
381
+ },
382
+ {
383
+ "epoch": 1.9660633484162897,
384
+ "grad_norm": 0.2195931226015091,
385
+ "learning_rate": 3.086582838174551e-05,
386
+ "loss": 1.3567,
387
+ "step": 54
388
+ },
389
+ {
390
+ "epoch": 2.013574660633484,
391
+ "grad_norm": 0.22267796099185944,
392
+ "learning_rate": 2.886908691296504e-05,
393
+ "loss": 1.3388,
394
+ "step": 55
395
+ },
396
+ {
397
+ "epoch": 2.0497737556561084,
398
+ "grad_norm": 0.2236703336238861,
399
+ "learning_rate": 2.6912569338248315e-05,
400
+ "loss": 1.3288,
401
+ "step": 56
402
+ },
403
+ {
404
+ "epoch": 2.085972850678733,
405
+ "grad_norm": 0.22568458318710327,
406
+ "learning_rate": 2.500000000000001e-05,
407
+ "loss": 1.2998,
408
+ "step": 57
409
+ },
410
+ {
411
+ "epoch": 2.1221719457013575,
412
+ "grad_norm": 0.22315552830696106,
413
+ "learning_rate": 2.3135019582658802e-05,
414
+ "loss": 1.2406,
415
+ "step": 58
416
+ },
417
+ {
418
+ "epoch": 2.158371040723982,
419
+ "grad_norm": 0.22059525549411774,
420
+ "learning_rate": 2.132117818244771e-05,
421
+ "loss": 1.2579,
422
+ "step": 59
423
+ },
424
+ {
425
+ "epoch": 2.1945701357466065,
426
+ "grad_norm": 0.24967165291309357,
427
+ "learning_rate": 1.9561928549563968e-05,
428
+ "loss": 1.2737,
429
+ "step": 60
430
+ },
431
+ {
432
+ "epoch": 2.230769230769231,
433
+ "grad_norm": 0.2367183119058609,
434
+ "learning_rate": 1.7860619515673033e-05,
435
+ "loss": 1.3014,
436
+ "step": 61
437
+ },
438
+ {
439
+ "epoch": 2.266968325791855,
440
+ "grad_norm": 0.2517261207103729,
441
+ "learning_rate": 1.622048961921699e-05,
442
+ "loss": 1.3528,
443
+ "step": 62
444
+ },
445
+ {
446
+ "epoch": 2.3031674208144794,
447
+ "grad_norm": 0.235427126288414,
448
+ "learning_rate": 1.4644660940672627e-05,
449
+ "loss": 1.2639,
450
+ "step": 63
451
+ },
452
+ {
453
+ "epoch": 2.339366515837104,
454
+ "grad_norm": 0.22943593561649323,
455
+ "learning_rate": 1.3136133159493802e-05,
456
+ "loss": 1.3244,
457
+ "step": 64
458
+ },
459
+ {
460
+ "epoch": 2.3755656108597285,
461
+ "grad_norm": 0.22623376548290253,
462
+ "learning_rate": 1.1697777844051105e-05,
463
+ "loss": 1.3534,
464
+ "step": 65
465
+ },
466
+ {
467
+ "epoch": 2.411764705882353,
468
+ "grad_norm": 0.240826815366745,
469
+ "learning_rate": 1.0332332985438248e-05,
470
+ "loss": 1.2637,
471
+ "step": 66
472
+ },
473
+ {
474
+ "epoch": 2.4479638009049776,
475
+ "grad_norm": 0.2174130380153656,
476
+ "learning_rate": 9.042397785550405e-06,
477
+ "loss": 1.3538,
478
+ "step": 67
479
+ },
480
+ {
481
+ "epoch": 2.484162895927602,
482
+ "grad_norm": 0.23589667677879333,
483
+ "learning_rate": 7.830427709355725e-06,
484
+ "loss": 1.3247,
485
+ "step": 68
486
+ },
487
+ {
488
+ "epoch": 2.520361990950226,
489
+ "grad_norm": 0.24112889170646667,
490
+ "learning_rate": 6.698729810778065e-06,
491
+ "loss": 1.2721,
492
+ "step": 69
493
+ },
494
+ {
495
+ "epoch": 2.5565610859728505,
496
+ "grad_norm": 0.23095567524433136,
497
+ "learning_rate": 5.649458341088915e-06,
498
+ "loss": 1.2966,
499
+ "step": 70
500
+ },
501
+ {
502
+ "epoch": 2.5927601809954752,
503
+ "grad_norm": 0.2425680309534073,
504
+ "learning_rate": 4.684610648167503e-06,
505
+ "loss": 1.2594,
506
+ "step": 71
507
+ },
508
+ {
509
+ "epoch": 2.6289592760180995,
510
+ "grad_norm": 0.2282722145318985,
511
+ "learning_rate": 3.8060233744356633e-06,
512
+ "loss": 1.2951,
513
+ "step": 72
514
+ },
515
+ {
516
+ "epoch": 2.665158371040724,
517
+ "grad_norm": 0.2365112006664276,
518
+ "learning_rate": 3.0153689607045845e-06,
519
+ "loss": 1.3408,
520
+ "step": 73
521
+ },
522
+ {
523
+ "epoch": 2.7013574660633486,
524
+ "grad_norm": 0.23960916697978973,
525
+ "learning_rate": 2.314152462588659e-06,
526
+ "loss": 1.2951,
527
+ "step": 74
528
+ },
529
+ {
530
+ "epoch": 2.737556561085973,
531
+ "grad_norm": 0.23407232761383057,
532
+ "learning_rate": 1.70370868554659e-06,
533
+ "loss": 1.2927,
534
+ "step": 75
535
+ },
536
+ {
537
+ "epoch": 2.773755656108597,
538
+ "grad_norm": 0.23612752556800842,
539
+ "learning_rate": 1.1851996440033319e-06,
540
+ "loss": 1.3133,
541
+ "step": 76
542
+ },
543
+ {
544
+ "epoch": 2.8099547511312215,
545
+ "grad_norm": 0.24510255455970764,
546
+ "learning_rate": 7.596123493895991e-07,
547
+ "loss": 1.3135,
548
+ "step": 77
549
+ },
550
+ {
551
+ "epoch": 2.8461538461538463,
552
+ "grad_norm": 0.25504744052886963,
553
+ "learning_rate": 4.277569313094809e-07,
554
+ "loss": 1.2631,
555
+ "step": 78
556
+ },
557
+ {
558
+ "epoch": 2.8823529411764706,
559
+ "grad_norm": 0.22783517837524414,
560
+ "learning_rate": 1.9026509541272275e-07,
561
+ "loss": 1.2773,
562
+ "step": 79
563
+ },
564
+ {
565
+ "epoch": 2.918552036199095,
566
+ "grad_norm": 0.23234006762504578,
567
+ "learning_rate": 4.7588920907110094e-08,
568
+ "loss": 1.2997,
569
+ "step": 80
570
+ },
571
+ {
572
+ "epoch": 2.9547511312217196,
573
+ "grad_norm": 0.24794724583625793,
574
+ "learning_rate": 0.0,
575
+ "loss": 1.268,
576
+ "step": 81
577
+ },
578
+ {
579
+ "epoch": 2.9547511312217196,
580
+ "step": 81,
581
+ "total_flos": 1.9929234183615939e+18,
582
+ "train_loss": 1.4086421551527801,
583
+ "train_runtime": 8849.9709,
584
+ "train_samples_per_second": 0.598,
585
+ "train_steps_per_second": 0.009
586
+ }
587
+ ],
588
+ "logging_steps": 1,
589
+ "max_steps": 81,
590
+ "num_input_tokens_seen": 0,
591
+ "num_train_epochs": 3,
592
+ "save_steps": 200,
593
+ "stateful_callbacks": {
594
+ "TrainerControl": {
595
+ "args": {
596
+ "should_epoch_stop": false,
597
+ "should_evaluate": false,
598
+ "should_log": false,
599
+ "should_save": true,
600
+ "should_training_stop": true
601
+ },
602
+ "attributes": {}
603
+ }
604
+ },
605
+ "total_flos": 1.9929234183615939e+18,
606
+ "train_batch_size": 2,
607
+ "trial_name": null,
608
+ "trial_params": null
609
+ }
llama_factory/v3-20241225-1_10/training_loss.png ADDED
llama_factory/v3-20241225-1_10/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
llama_factory/v3-20241225-1_5/README.md ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ license: other
4
+ base_model: /disk/maziao/model-zoo/hf-repo/models--Qwen--Qwen2-VL-7B-Instruct
5
+ tags:
6
+ - llama-factory
7
+ - lora
8
+ - generated_from_trainer
9
+ model-index:
10
+ - name: v3-20241225-1_5
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # v3-20241225-1_5
18
+
19
+ This model is a fine-tuned version of [/disk/maziao/model-zoo/hf-repo/models--Qwen--Qwen2-VL-7B-Instruct](https://huggingface.co//disk/maziao/model-zoo/hf-repo/models--Qwen--Qwen2-VL-7B-Instruct) on the M^2RAR-20241225-MLLM-1_5 dataset.
20
+
21
+ ## Model description
22
+
23
+ More information needed
24
+
25
+ ## Intended uses & limitations
26
+
27
+ More information needed
28
+
29
+ ## Training and evaluation data
30
+
31
+ More information needed
32
+
33
+ ## Training procedure
34
+
35
+ ### Training hyperparameters
36
+
37
+ The following hyperparameters were used during training:
38
+ - learning_rate: 0.0001
39
+ - train_batch_size: 2
40
+ - eval_batch_size: 8
41
+ - seed: 42
42
+ - distributed_type: multi-GPU
43
+ - num_devices: 2
44
+ - gradient_accumulation_steps: 16
45
+ - total_train_batch_size: 64
46
+ - total_eval_batch_size: 16
47
+ - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
48
+ - lr_scheduler_type: cosine
49
+ - lr_scheduler_warmup_ratio: 0.1
50
+ - num_epochs: 3.0
51
+
52
+ ### Training results
53
+
54
+
55
+
56
+ ### Framework versions
57
+
58
+ - PEFT 0.12.0
59
+ - Transformers 4.46.1
60
+ - Pytorch 2.3.1+cu121
61
+ - Datasets 3.1.0
62
+ - Tokenizers 0.20.3
llama_factory/v3-20241225-1_5/adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/disk/maziao/model-zoo/hf-repo/models--Qwen--Qwen2-VL-7B-Instruct",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 256,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 128,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": "^(?!.*visual).*(?:q_proj|down_proj|k_proj|up_proj|gate_proj|v_proj|o_proj).*",
23
+ "task_type": "CAUSAL_LM",
24
+ "use_dora": false,
25
+ "use_rslora": false
26
+ }
llama_factory/v3-20241225-1_5/added_tokens.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|box_end|>": 151649,
3
+ "<|box_start|>": 151648,
4
+ "<|endoftext|>": 151643,
5
+ "<|im_end|>": 151645,
6
+ "<|im_start|>": 151644,
7
+ "<|image_pad|>": 151655,
8
+ "<|object_ref_end|>": 151647,
9
+ "<|object_ref_start|>": 151646,
10
+ "<|quad_end|>": 151651,
11
+ "<|quad_start|>": 151650,
12
+ "<|video_pad|>": 151656,
13
+ "<|vision_end|>": 151653,
14
+ "<|vision_pad|>": 151654,
15
+ "<|vision_start|>": 151652
16
+ }
llama_factory/v3-20241225-1_5/all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.987551867219917,
3
+ "total_flos": 2.252458950505005e+18,
4
+ "train_loss": 1.4155288457870483,
5
+ "train_runtime": 9888.3803,
6
+ "train_samples_per_second": 0.584,
7
+ "train_steps_per_second": 0.009
8
+ }
llama_factory/v3-20241225-1_5/chat_template.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
3
+ }
llama_factory/v3-20241225-1_5/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
llama_factory/v3-20241225-1_5/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": true,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.48145466,
8
+ 0.4578275,
9
+ 0.40821073
10
+ ],
11
+ "image_processor_type": "Qwen2VLImageProcessor",
12
+ "image_std": [
13
+ 0.26862954,
14
+ 0.26130258,
15
+ 0.27577711
16
+ ],
17
+ "max_pixels": 12845056,
18
+ "merge_size": 2,
19
+ "min_pixels": 3136,
20
+ "patch_size": 14,
21
+ "processor_class": "Qwen2VLProcessor",
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "max_pixels": 12845056,
26
+ "min_pixels": 3136
27
+ },
28
+ "temporal_patch_size": 2
29
+ }