macqueen01 commited on
Commit
d17ed18
·
verified ·
1 Parent(s): 28ea7f2

Training in progress, epoch 3

Browse files
config.json CHANGED
@@ -29,8 +29,8 @@
29
  },
30
  "rope_theta": 500000.0,
31
  "tie_word_embeddings": true,
32
- "torch_dtype": "bfloat16",
33
- "transformers_version": "4.46.1",
34
  "use_cache": false,
35
  "vocab_size": 128256
36
  }
 
29
  },
30
  "rope_theta": 500000.0,
31
  "tie_word_embeddings": true,
32
+ "torch_dtype": "float16",
33
+ "transformers_version": "4.46.2",
34
  "use_cache": false,
35
  "vocab_size": 128256
36
  }
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01c5b0a8b5e7cd6ce31312ec7f3613565626b594f0ba71a6a409c1260308e426
3
- size 4965801344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53e398468bbe19f8601d992c08d1a2ba3517ed7ac0c9ea1250cdf26789193d17
3
+ size 4965801152
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4ceb4e31b2ebc30f66aaded057b6cc38b87d17b1eb44c7e1765ccc44bf940a4
3
- size 1547842640
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d6c9110ee616e0e7ae4642794095286f8b44b20ed18d0d9e9118081a7ccbf77
3
+ size 1528966512
model.safetensors.index.json CHANGED
@@ -1,16 +1,58 @@
1
  {
2
  "metadata": {
3
- "total_size": 6513610752
4
  },
5
  "weight_map": {
6
- "cross_attention.Wkv.bias": "model-00002-of-00002.safetensors",
7
- "cross_attention.Wkv.weight": "model-00002-of-00002.safetensors",
8
- "cross_attention.Wq.bias": "model-00002-of-00002.safetensors",
9
- "cross_attention.Wq.weight": "model-00002-of-00002.safetensors",
10
- "cross_attention.out_proj.bias": "model-00002-of-00002.safetensors",
11
- "cross_attention.out_proj.weight": "model-00002-of-00002.safetensors",
12
- "image_proj.bias": "model-00002-of-00002.safetensors",
13
- "image_proj.weight": "model-00002-of-00002.safetensors",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  "llama_model.model.embed_tokens.weight": "model-00001-of-00002.safetensors",
15
  "llama_model.model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
16
  "llama_model.model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 6494730240
4
  },
5
  "weight_map": {
6
+ "cross_attention.heads.0.key.bias": "model-00002-of-00002.safetensors",
7
+ "cross_attention.heads.0.key.weight": "model-00002-of-00002.safetensors",
8
+ "cross_attention.heads.0.query.bias": "model-00002-of-00002.safetensors",
9
+ "cross_attention.heads.0.query.weight": "model-00002-of-00002.safetensors",
10
+ "cross_attention.heads.0.value.bias": "model-00002-of-00002.safetensors",
11
+ "cross_attention.heads.0.value.weight": "model-00002-of-00002.safetensors",
12
+ "cross_attention.heads.1.key.bias": "model-00002-of-00002.safetensors",
13
+ "cross_attention.heads.1.key.weight": "model-00002-of-00002.safetensors",
14
+ "cross_attention.heads.1.query.bias": "model-00002-of-00002.safetensors",
15
+ "cross_attention.heads.1.query.weight": "model-00002-of-00002.safetensors",
16
+ "cross_attention.heads.1.value.bias": "model-00002-of-00002.safetensors",
17
+ "cross_attention.heads.1.value.weight": "model-00002-of-00002.safetensors",
18
+ "cross_attention.heads.2.key.bias": "model-00002-of-00002.safetensors",
19
+ "cross_attention.heads.2.key.weight": "model-00002-of-00002.safetensors",
20
+ "cross_attention.heads.2.query.bias": "model-00002-of-00002.safetensors",
21
+ "cross_attention.heads.2.query.weight": "model-00002-of-00002.safetensors",
22
+ "cross_attention.heads.2.value.bias": "model-00002-of-00002.safetensors",
23
+ "cross_attention.heads.2.value.weight": "model-00002-of-00002.safetensors",
24
+ "cross_attention.heads.3.key.bias": "model-00002-of-00002.safetensors",
25
+ "cross_attention.heads.3.key.weight": "model-00002-of-00002.safetensors",
26
+ "cross_attention.heads.3.query.bias": "model-00002-of-00002.safetensors",
27
+ "cross_attention.heads.3.query.weight": "model-00002-of-00002.safetensors",
28
+ "cross_attention.heads.3.value.bias": "model-00002-of-00002.safetensors",
29
+ "cross_attention.heads.3.value.weight": "model-00002-of-00002.safetensors",
30
+ "cross_attention.heads.4.key.bias": "model-00002-of-00002.safetensors",
31
+ "cross_attention.heads.4.key.weight": "model-00002-of-00002.safetensors",
32
+ "cross_attention.heads.4.query.bias": "model-00002-of-00002.safetensors",
33
+ "cross_attention.heads.4.query.weight": "model-00002-of-00002.safetensors",
34
+ "cross_attention.heads.4.value.bias": "model-00002-of-00002.safetensors",
35
+ "cross_attention.heads.4.value.weight": "model-00002-of-00002.safetensors",
36
+ "cross_attention.heads.5.key.bias": "model-00002-of-00002.safetensors",
37
+ "cross_attention.heads.5.key.weight": "model-00002-of-00002.safetensors",
38
+ "cross_attention.heads.5.query.bias": "model-00002-of-00002.safetensors",
39
+ "cross_attention.heads.5.query.weight": "model-00002-of-00002.safetensors",
40
+ "cross_attention.heads.5.value.bias": "model-00002-of-00002.safetensors",
41
+ "cross_attention.heads.5.value.weight": "model-00002-of-00002.safetensors",
42
+ "cross_attention.heads.6.key.bias": "model-00002-of-00002.safetensors",
43
+ "cross_attention.heads.6.key.weight": "model-00002-of-00002.safetensors",
44
+ "cross_attention.heads.6.query.bias": "model-00002-of-00002.safetensors",
45
+ "cross_attention.heads.6.query.weight": "model-00002-of-00002.safetensors",
46
+ "cross_attention.heads.6.value.bias": "model-00002-of-00002.safetensors",
47
+ "cross_attention.heads.6.value.weight": "model-00002-of-00002.safetensors",
48
+ "cross_attention.heads.7.key.bias": "model-00002-of-00002.safetensors",
49
+ "cross_attention.heads.7.key.weight": "model-00002-of-00002.safetensors",
50
+ "cross_attention.heads.7.query.bias": "model-00002-of-00002.safetensors",
51
+ "cross_attention.heads.7.query.weight": "model-00002-of-00002.safetensors",
52
+ "cross_attention.heads.7.value.bias": "model-00002-of-00002.safetensors",
53
+ "cross_attention.heads.7.value.weight": "model-00002-of-00002.safetensors",
54
+ "cross_attention.output_linear.bias": "model-00002-of-00002.safetensors",
55
+ "cross_attention.output_linear.weight": "model-00002-of-00002.safetensors",
56
  "llama_model.model.embed_tokens.weight": "model-00001-of-00002.safetensors",
57
  "llama_model.model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
58
  "llama_model.model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b98c07ee1994bcc03bd5d507efed729bcd395fce7601eb2e411b564467ea482
3
- size 6456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a561bc35ea25ffc08623bb5f07cd8e4757f3c0b68fa2ea16f086ce3920e1c49
3
+ size 6075