GameRuiner commited on
Commit
85d2392
·
0 Parent(s):

initial commit

Browse files
.gitattributes ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ tokenizer_config.json filter=lfs diff=lfs merge=lfs -text
38
+ checkpoint-530/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
+ checkpoint-530/tokenizer_config.json filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ #pdm.lock
113
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114
+ # in version control.
115
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116
+ .pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121
+ __pypackages__/
122
+
123
+ # Celery stuff
124
+ celerybeat-schedule
125
+ celerybeat.pid
126
+
127
+ # SageMath parsed files
128
+ *.sage.py
129
+
130
+ # Environments
131
+ .env
132
+ .venv
133
+ env/
134
+ venv/
135
+ ENV/
136
+ env.bak/
137
+ venv.bak/
138
+
139
+ # Spyder project settings
140
+ .spyderproject
141
+ .spyproject
142
+
143
+ # Rope project settings
144
+ .ropeproject
145
+
146
+ # mkdocs documentation
147
+ /site
148
+
149
+ # mypy
150
+ .mypy_cache/
151
+ .dmypy.json
152
+ dmypy.json
153
+
154
+ # Pyre type checker
155
+ .pyre/
156
+
157
+ # pytype static type analyzer
158
+ .pytype/
159
+
160
+ # Cython debug symbols
161
+ cython_debug/
162
+
163
+ # PyCharm
164
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
167
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168
+ #.idea/
169
+
170
+ # Ruff stuff:
171
+ .ruff_cache/
172
+
173
+ # PyPI configuration file
174
+ .pypirc
README.md ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: HKUSTAudio/Llasa-1B-Multilingual
4
+ datasets:
5
+ - amu-cai/pl-asr-bigos-v2
6
+ language:
7
+ - pl
8
+ tags:
9
+ - speech
10
+ - audio
11
+ - polish
12
+ - llama
13
+ - tts
14
+ - fine-tuned
15
+ - text-to-speech
16
+ model-index:
17
+ - name: From Llasa to Łazanki
18
+ results: []
19
+ ---
20
+
21
+ # From Llasa to Łazanki: Fine-tuned Llasa-1B on Polish Speech
22
+
23
+ This is a fine-tuned version of [`HKUSTAudio/Llasa-1B-Multilingual`](https://huggingface.co/HKUSTAudio/Llasa-1B-Multilingual), adapted for **Polish Text-to-Speech (TTS)**.
24
+ It was fine-tuned on the [`pl-asr-bigos-v2`](https://huggingface.co/datasets/amu-cai/pl-asr-bigos-v2) dataset, specifically the `mozilla-common_voice_15-23` subset, which includes high-quality Polish speech recordings suitable for training TTS models.
25
+
26
+ ---
27
+
28
+ ## 🧠 Base Model
29
+
30
+ [Llasa-1B-Multilingual](https://huggingface.co/HKUSTAudio/Llasa-1B-Multilingual) model developed by HKUST. The approach leverages the LLAMA-initialized text BPE tokenizer, which can handle multilingual text without the need to design language-specific G2P (grapheme-to-phoneme) systems.
31
+
32
+ ---
33
+
34
+ ## 🗣 Fine-tuning Details
35
+
36
+ - **Dataset**: [PL-ASR-BIGOS-v2](https://huggingface.co/datasets/amu-cai/pl-asr-bigos-v2), `mozilla-common_voice_15-23` subset
37
+ - **Language**: 🇵🇱 Polish
38
+ - **Task**: Text to speech
checkpoint-530/config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "HKUSTAudio/Llasa-1B-Multilingual",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 128000,
9
+ "eos_token_id": [
10
+ 128001,
11
+ 128008,
12
+ 128009
13
+ ],
14
+ "head_dim": 64,
15
+ "hidden_act": "silu",
16
+ "hidden_size": 2048,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 8192,
19
+ "max_position_embeddings": 131072,
20
+ "mlp_bias": false,
21
+ "model_type": "llama",
22
+ "num_attention_heads": 32,
23
+ "num_hidden_layers": 16,
24
+ "num_key_value_heads": 8,
25
+ "pretraining_tp": 1,
26
+ "rms_norm_eps": 1e-05,
27
+ "rope_scaling": {
28
+ "factor": 32.0,
29
+ "high_freq_factor": 4.0,
30
+ "low_freq_factor": 1.0,
31
+ "original_max_position_embeddings": 8192,
32
+ "rope_type": "llama3"
33
+ },
34
+ "rope_theta": 500000.0,
35
+ "tie_word_embeddings": true,
36
+ "torch_dtype": "bfloat16",
37
+ "transformers_version": "4.48.3",
38
+ "use_cache": true,
39
+ "vocab_size": 193800
40
+ }
checkpoint-530/generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 128000,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 128001,
6
+ 128008,
7
+ 128009
8
+ ],
9
+ "temperature": 0.6,
10
+ "top_p": 0.9,
11
+ "transformers_version": "4.48.3"
12
+ }
checkpoint-530/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fd05ab97760fbc82c2216cc474d9b4aeaef7398f505336843fc1567e096d636
3
+ size 2740113872
checkpoint-530/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6db470190e6dd274906201b0e78082ac0ac9ec6d70da5db94bd98139b4ee2387
3
+ size 2783469178
checkpoint-530/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cf9097d4513154245c48236b6ec5137b7ee2a21c9f58f2cba798ea275c6026f
3
+ size 14244
checkpoint-530/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2154c128f8be4cd95af425c31ed87f550f6eb515d99bf0ecda87df0720395913
3
+ size 1064
checkpoint-530/special_tokens_map.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin_of_text|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|eot_id|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|end_of_text|>"
17
+ }
checkpoint-530/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71d92f3dbf3c23d734e6356241cef149b42fe79848176a54145b6f9a886fd73b
3
+ size 29521206
checkpoint-530/tokenizer_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c51ef4d5bc8bb8449bc16cd24176a66e0edbea7cbd3d3dbe29f102686cb4068
3
+ size 11710463
checkpoint-530/trainer_state.json ADDED
@@ -0,0 +1,412 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.9937264742785445,
5
+ "eval_steps": 1000,
6
+ "global_step": 530,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0,
13
+ "eval_loss": 8.071162223815918,
14
+ "eval_runtime": 2.2876,
15
+ "eval_samples_per_second": 5.246,
16
+ "eval_steps_per_second": 2.623,
17
+ "step": 0
18
+ },
19
+ {
20
+ "epoch": 0.037641154328732745,
21
+ "grad_norm": 5.03125,
22
+ "learning_rate": 3.125e-05,
23
+ "loss": 7.955,
24
+ "step": 10
25
+ },
26
+ {
27
+ "epoch": 0.07528230865746549,
28
+ "grad_norm": 2.9375,
29
+ "learning_rate": 4.9992528946077346e-05,
30
+ "loss": 7.6295,
31
+ "step": 20
32
+ },
33
+ {
34
+ "epoch": 0.11292346298619825,
35
+ "grad_norm": 3.203125,
36
+ "learning_rate": 4.9908530862563093e-05,
37
+ "loss": 7.5144,
38
+ "step": 30
39
+ },
40
+ {
41
+ "epoch": 0.15056461731493098,
42
+ "grad_norm": 4.46875,
43
+ "learning_rate": 4.9731510619790654e-05,
44
+ "loss": 7.4984,
45
+ "step": 40
46
+ },
47
+ {
48
+ "epoch": 0.18820577164366373,
49
+ "grad_norm": 3.34375,
50
+ "learning_rate": 4.9462129308745496e-05,
51
+ "loss": 7.4781,
52
+ "step": 50
53
+ },
54
+ {
55
+ "epoch": 0.2258469259723965,
56
+ "grad_norm": 4.125,
57
+ "learning_rate": 4.910139294746038e-05,
58
+ "loss": 7.4421,
59
+ "step": 60
60
+ },
61
+ {
62
+ "epoch": 0.26348808030112925,
63
+ "grad_norm": 3.046875,
64
+ "learning_rate": 4.865064872399048e-05,
65
+ "loss": 7.4148,
66
+ "step": 70
67
+ },
68
+ {
69
+ "epoch": 0.30112923462986196,
70
+ "grad_norm": 2.984375,
71
+ "learning_rate": 4.8111579965271914e-05,
72
+ "loss": 7.4157,
73
+ "step": 80
74
+ },
75
+ {
76
+ "epoch": 0.33877038895859474,
77
+ "grad_norm": 3.0625,
78
+ "learning_rate": 4.748619985065281e-05,
79
+ "loss": 7.391,
80
+ "step": 90
81
+ },
82
+ {
83
+ "epoch": 0.37641154328732745,
84
+ "grad_norm": 2.953125,
85
+ "learning_rate": 4.677684389357392e-05,
86
+ "loss": 7.3722,
87
+ "step": 100
88
+ },
89
+ {
90
+ "epoch": 0.41405269761606023,
91
+ "grad_norm": 2.921875,
92
+ "learning_rate": 4.598616121947642e-05,
93
+ "loss": 7.3537,
94
+ "step": 110
95
+ },
96
+ {
97
+ "epoch": 0.451693851944793,
98
+ "grad_norm": 3.46875,
99
+ "learning_rate": 4.51171046725099e-05,
100
+ "loss": 7.3362,
101
+ "step": 120
102
+ },
103
+ {
104
+ "epoch": 0.4893350062735257,
105
+ "grad_norm": 2.78125,
106
+ "learning_rate": 4.4172919787987646e-05,
107
+ "loss": 7.3172,
108
+ "step": 130
109
+ },
110
+ {
111
+ "epoch": 0.5269761606022585,
112
+ "grad_norm": 3.109375,
113
+ "learning_rate": 4.315713267177201e-05,
114
+ "loss": 7.3077,
115
+ "step": 140
116
+ },
117
+ {
118
+ "epoch": 0.5646173149309912,
119
+ "grad_norm": 2.9375,
120
+ "learning_rate": 4.207353683185503e-05,
121
+ "loss": 7.3343,
122
+ "step": 150
123
+ },
124
+ {
125
+ "epoch": 0.6022584692597239,
126
+ "grad_norm": 2.953125,
127
+ "learning_rate": 4.0926179011312346e-05,
128
+ "loss": 7.3015,
129
+ "step": 160
130
+ },
131
+ {
132
+ "epoch": 0.6398996235884568,
133
+ "grad_norm": 2.6875,
134
+ "learning_rate": 3.971934407553797e-05,
135
+ "loss": 7.3051,
136
+ "step": 170
137
+ },
138
+ {
139
+ "epoch": 0.6775407779171895,
140
+ "grad_norm": 2.875,
141
+ "learning_rate": 3.845753901019931e-05,
142
+ "loss": 7.284,
143
+ "step": 180
144
+ },
145
+ {
146
+ "epoch": 0.7151819322459222,
147
+ "grad_norm": 2.765625,
148
+ "learning_rate": 3.7145476089672884e-05,
149
+ "loss": 7.2721,
150
+ "step": 190
151
+ },
152
+ {
153
+ "epoch": 0.7528230865746549,
154
+ "grad_norm": 2.703125,
155
+ "learning_rate": 3.5788055278819096e-05,
156
+ "loss": 7.2775,
157
+ "step": 200
158
+ },
159
+ {
160
+ "epoch": 0.7904642409033877,
161
+ "grad_norm": 2.59375,
162
+ "learning_rate": 3.4390345933817326e-05,
163
+ "loss": 7.2813,
164
+ "step": 210
165
+ },
166
+ {
167
+ "epoch": 0.8281053952321205,
168
+ "grad_norm": 2.546875,
169
+ "learning_rate": 3.295756787040076e-05,
170
+ "loss": 7.2794,
171
+ "step": 220
172
+ },
173
+ {
174
+ "epoch": 0.8657465495608532,
175
+ "grad_norm": 2.46875,
176
+ "learning_rate": 3.1495071870192465e-05,
177
+ "loss": 7.2772,
178
+ "step": 230
179
+ },
180
+ {
181
+ "epoch": 0.903387703889586,
182
+ "grad_norm": 2.640625,
183
+ "learning_rate": 3.000831969794271e-05,
184
+ "loss": 7.274,
185
+ "step": 240
186
+ },
187
+ {
188
+ "epoch": 0.9410288582183187,
189
+ "grad_norm": 2.640625,
190
+ "learning_rate": 2.8502863704294235e-05,
191
+ "loss": 7.2748,
192
+ "step": 250
193
+ },
194
+ {
195
+ "epoch": 0.9786700125470514,
196
+ "grad_norm": 3.15625,
197
+ "learning_rate": 2.6984326090249974e-05,
198
+ "loss": 7.275,
199
+ "step": 260
200
+ },
201
+ {
202
+ "epoch": 1.015056461731493,
203
+ "grad_norm": 2.296875,
204
+ "learning_rate": 2.5458377910781118e-05,
205
+ "loss": 7.1885,
206
+ "step": 270
207
+ },
208
+ {
209
+ "epoch": 1.052697616060226,
210
+ "grad_norm": 2.546875,
211
+ "learning_rate": 2.3930717895987563e-05,
212
+ "loss": 7.0831,
213
+ "step": 280
214
+ },
215
+ {
216
+ "epoch": 1.0903387703889587,
217
+ "grad_norm": 2.59375,
218
+ "learning_rate": 2.2407051168904147e-05,
219
+ "loss": 7.0635,
220
+ "step": 290
221
+ },
222
+ {
223
+ "epoch": 1.1279799247176914,
224
+ "grad_norm": 2.578125,
225
+ "learning_rate": 2.0893067939432276e-05,
226
+ "loss": 7.0866,
227
+ "step": 300
228
+ },
229
+ {
230
+ "epoch": 1.165621079046424,
231
+ "grad_norm": 2.59375,
232
+ "learning_rate": 1.9394422253965264e-05,
233
+ "loss": 7.0638,
234
+ "step": 310
235
+ },
236
+ {
237
+ "epoch": 1.2032622333751568,
238
+ "grad_norm": 2.4375,
239
+ "learning_rate": 1.7916710880068162e-05,
240
+ "loss": 7.0785,
241
+ "step": 320
242
+ },
243
+ {
244
+ "epoch": 1.2409033877038895,
245
+ "grad_norm": 2.515625,
246
+ "learning_rate": 1.6465452405068305e-05,
247
+ "loss": 7.0816,
248
+ "step": 330
249
+ },
250
+ {
251
+ "epoch": 1.2785445420326225,
252
+ "grad_norm": 2.484375,
253
+ "learning_rate": 1.504606662661378e-05,
254
+ "loss": 7.056,
255
+ "step": 340
256
+ },
257
+ {
258
+ "epoch": 1.3161856963613552,
259
+ "grad_norm": 2.484375,
260
+ "learning_rate": 1.3663854312166968e-05,
261
+ "loss": 7.0823,
262
+ "step": 350
263
+ },
264
+ {
265
+ "epoch": 1.353826850690088,
266
+ "grad_norm": 2.421875,
267
+ "learning_rate": 1.2323977403022316e-05,
268
+ "loss": 7.0715,
269
+ "step": 360
270
+ },
271
+ {
272
+ "epoch": 1.3914680050188206,
273
+ "grad_norm": 2.625,
274
+ "learning_rate": 1.1031439736777327e-05,
275
+ "loss": 7.0598,
276
+ "step": 370
277
+ },
278
+ {
279
+ "epoch": 1.4291091593475533,
280
+ "grad_norm": 2.46875,
281
+ "learning_rate": 9.791068360249819e-06,
282
+ "loss": 7.1039,
283
+ "step": 380
284
+ },
285
+ {
286
+ "epoch": 1.466750313676286,
287
+ "grad_norm": 2.53125,
288
+ "learning_rate": 8.607495502629193e-06,
289
+ "loss": 7.0684,
290
+ "step": 390
291
+ },
292
+ {
293
+ "epoch": 1.5043914680050188,
294
+ "grad_norm": 2.4375,
295
+ "learning_rate": 7.485141276183927e-06,
296
+ "loss": 7.0906,
297
+ "step": 400
298
+ },
299
+ {
300
+ "epoch": 1.5420326223337515,
301
+ "grad_norm": 2.46875,
302
+ "learning_rate": 6.428197169130346e-06,
303
+ "loss": 7.0594,
304
+ "step": 410
305
+ },
306
+ {
307
+ "epoch": 1.5796737766624842,
308
+ "grad_norm": 2.46875,
309
+ "learning_rate": 5.44061039230942e-06,
310
+ "loss": 7.0873,
311
+ "step": 420
312
+ },
313
+ {
314
+ "epoch": 1.617314930991217,
315
+ "grad_norm": 2.578125,
316
+ "learning_rate": 4.526069138129674e-06,
317
+ "loss": 7.0843,
318
+ "step": 430
319
+ },
320
+ {
321
+ "epoch": 1.6549560853199499,
322
+ "grad_norm": 2.390625,
323
+ "learning_rate": 3.6879888068274304e-06,
324
+ "loss": 7.0676,
325
+ "step": 440
326
+ },
327
+ {
328
+ "epoch": 1.6925972396486826,
329
+ "grad_norm": 2.484375,
330
+ "learning_rate": 2.9294992514831366e-06,
331
+ "loss": 7.0672,
332
+ "step": 450
333
+ },
334
+ {
335
+ "epoch": 1.7302383939774153,
336
+ "grad_norm": 2.484375,
337
+ "learning_rate": 2.2534330894278672e-06,
338
+ "loss": 7.0841,
339
+ "step": 460
340
+ },
341
+ {
342
+ "epoch": 1.767879548306148,
343
+ "grad_norm": 2.375,
344
+ "learning_rate": 1.6623151236916683e-06,
345
+ "loss": 7.0784,
346
+ "step": 470
347
+ },
348
+ {
349
+ "epoch": 1.805520702634881,
350
+ "grad_norm": 2.421875,
351
+ "learning_rate": 1.1583529139998578e-06,
352
+ "loss": 7.0761,
353
+ "step": 480
354
+ },
355
+ {
356
+ "epoch": 1.8431618569636137,
357
+ "grad_norm": 2.5,
358
+ "learning_rate": 7.434285325303908e-07,
359
+ "loss": 7.0788,
360
+ "step": 490
361
+ },
362
+ {
363
+ "epoch": 1.8808030112923464,
364
+ "grad_norm": 2.359375,
365
+ "learning_rate": 4.1909153522079193e-07,
366
+ "loss": 7.0586,
367
+ "step": 500
368
+ },
369
+ {
370
+ "epoch": 1.9184441656210791,
371
+ "grad_norm": 2.375,
372
+ "learning_rate": 1.8655317487370762e-07,
373
+ "loss": 7.0723,
374
+ "step": 510
375
+ },
376
+ {
377
+ "epoch": 1.9560853199498118,
378
+ "grad_norm": 2.46875,
379
+ "learning_rate": 4.6681877672488664e-08,
380
+ "loss": 7.0732,
381
+ "step": 520
382
+ },
383
+ {
384
+ "epoch": 1.9937264742785445,
385
+ "grad_norm": 2.40625,
386
+ "learning_rate": 0.0,
387
+ "loss": 7.0647,
388
+ "step": 530
389
+ }
390
+ ],
391
+ "logging_steps": 10,
392
+ "max_steps": 530,
393
+ "num_input_tokens_seen": 0,
394
+ "num_train_epochs": 2,
395
+ "save_steps": 999,
396
+ "stateful_callbacks": {
397
+ "TrainerControl": {
398
+ "args": {
399
+ "should_epoch_stop": false,
400
+ "should_evaluate": false,
401
+ "should_log": false,
402
+ "should_save": true,
403
+ "should_training_stop": true
404
+ },
405
+ "attributes": {}
406
+ }
407
+ },
408
+ "total_flos": 4.559234065886085e+17,
409
+ "train_batch_size": 24,
410
+ "trial_name": null,
411
+ "trial_params": null
412
+ }
checkpoint-530/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2385a64eee32d54ec3e8e605589b6646e81f3573e8d4fbfe5b17b4826442f24
3
+ size 5523622102
config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "HKUSTAudio/Llasa-1B-Multilingual",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 128000,
9
+ "eos_token_id": [
10
+ 128001,
11
+ 128008,
12
+ 128009
13
+ ],
14
+ "head_dim": 64,
15
+ "hidden_act": "silu",
16
+ "hidden_size": 2048,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 8192,
19
+ "max_position_embeddings": 131072,
20
+ "mlp_bias": false,
21
+ "model_type": "llama",
22
+ "num_attention_heads": 32,
23
+ "num_hidden_layers": 16,
24
+ "num_key_value_heads": 8,
25
+ "pretraining_tp": 1,
26
+ "rms_norm_eps": 1e-05,
27
+ "rope_scaling": {
28
+ "factor": 32.0,
29
+ "high_freq_factor": 4.0,
30
+ "low_freq_factor": 1.0,
31
+ "original_max_position_embeddings": 8192,
32
+ "rope_type": "llama3"
33
+ },
34
+ "rope_theta": 500000.0,
35
+ "tie_word_embeddings": true,
36
+ "torch_dtype": "bfloat16",
37
+ "transformers_version": "4.48.3",
38
+ "use_cache": true,
39
+ "vocab_size": 193800
40
+ }
generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 128000,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 128001,
6
+ 128008,
7
+ 128009
8
+ ],
9
+ "temperature": 0.6,
10
+ "top_p": 0.9,
11
+ "transformers_version": "4.48.3"
12
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fd05ab97760fbc82c2216cc474d9b4aeaef7398f505336843fc1567e096d636
3
+ size 2740113872
special_tokens_map.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin_of_text|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|eot_id|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|end_of_text|>"
17
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71d92f3dbf3c23d734e6356241cef149b42fe79848176a54145b6f9a886fd73b
3
+ size 29521206
tokenizer_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c51ef4d5bc8bb8449bc16cd24176a66e0edbea7cbd3d3dbe29f102686cb4068
3
+ size 11710463
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2385a64eee32d54ec3e8e605589b6646e81f3573e8d4fbfe5b17b4826442f24
3
+ size 5523622102