hflog 1TuanPham commited on
Commit
794e279
·
verified ·
0 Parent(s):

Duplicate from capleaf/T-Llama

Browse files

Co-authored-by: Pham Minh Tuan <[email protected]>

Files changed (43) hide show
  1. .gitattributes +35 -0
  2. README.md +484 -0
  3. added_tokens.json +5 -0
  4. config.json +29 -0
  5. generation_config.json +7 -0
  6. model-00001-of-00016.safetensors +3 -0
  7. model-00002-of-00016.safetensors +3 -0
  8. model-00003-of-00016.safetensors +3 -0
  9. model-00004-of-00016.safetensors +3 -0
  10. model-00005-of-00016.safetensors +3 -0
  11. model-00006-of-00016.safetensors +3 -0
  12. model-00007-of-00016.safetensors +3 -0
  13. model-00008-of-00016.safetensors +3 -0
  14. model-00009-of-00016.safetensors +3 -0
  15. model-00010-of-00016.safetensors +3 -0
  16. model-00011-of-00016.safetensors +3 -0
  17. model-00012-of-00016.safetensors +3 -0
  18. model-00013-of-00016.safetensors +3 -0
  19. model-00014-of-00016.safetensors +3 -0
  20. model-00015-of-00016.safetensors +3 -0
  21. model-00016-of-00016.safetensors +3 -0
  22. model.safetensors.index.json +298 -0
  23. pytorch_model-00001-of-00016.bin +3 -0
  24. pytorch_model-00002-of-00016.bin +3 -0
  25. pytorch_model-00003-of-00016.bin +3 -0
  26. pytorch_model-00004-of-00016.bin +3 -0
  27. pytorch_model-00005-of-00016.bin +3 -0
  28. pytorch_model-00006-of-00016.bin +3 -0
  29. pytorch_model-00007-of-00016.bin +3 -0
  30. pytorch_model-00008-of-00016.bin +3 -0
  31. pytorch_model-00009-of-00016.bin +3 -0
  32. pytorch_model-00010-of-00016.bin +3 -0
  33. pytorch_model-00011-of-00016.bin +3 -0
  34. pytorch_model-00012-of-00016.bin +3 -0
  35. pytorch_model-00013-of-00016.bin +3 -0
  36. pytorch_model-00014-of-00016.bin +3 -0
  37. pytorch_model-00015-of-00016.bin +3 -0
  38. pytorch_model-00016-of-00016.bin +3 -0
  39. pytorch_model.bin.index.json +298 -0
  40. special_tokens_map.json +24 -0
  41. tokenizer.json +0 -0
  42. tokenizer.model +3 -0
  43. tokenizer_config.json +48 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,484 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - vi
4
+ - en
5
+ license: apache-2.0
6
+ model-index:
7
+ - name: T-Llama
8
+ results:
9
+ - task:
10
+ type: text-generation
11
+ name: Text Generation
12
+ dataset:
13
+ name: AI2 Reasoning Challenge (25-Shot)
14
+ type: ai2_arc
15
+ config: ARC-Challenge
16
+ split: test
17
+ args:
18
+ num_few_shot: 25
19
+ metrics:
20
+ - type: acc_norm
21
+ value: 54.18
22
+ name: normalized accuracy
23
+ source:
24
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=1TuanPham/T-Llama
25
+ name: Open LLM Leaderboard
26
+ - task:
27
+ type: text-generation
28
+ name: Text Generation
29
+ dataset:
30
+ name: HellaSwag (10-Shot)
31
+ type: hellaswag
32
+ split: validation
33
+ args:
34
+ num_few_shot: 10
35
+ metrics:
36
+ - type: acc_norm
37
+ value: 76.48
38
+ name: normalized accuracy
39
+ source:
40
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=1TuanPham/T-Llama
41
+ name: Open LLM Leaderboard
42
+ - task:
43
+ type: text-generation
44
+ name: Text Generation
45
+ dataset:
46
+ name: MMLU (5-Shot)
47
+ type: cais/mmlu
48
+ config: all
49
+ split: test
50
+ args:
51
+ num_few_shot: 5
52
+ metrics:
53
+ - type: acc
54
+ value: 47.98
55
+ name: accuracy
56
+ source:
57
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=1TuanPham/T-Llama
58
+ name: Open LLM Leaderboard
59
+ - task:
60
+ type: text-generation
61
+ name: Text Generation
62
+ dataset:
63
+ name: TruthfulQA (0-shot)
64
+ type: truthful_qa
65
+ config: multiple_choice
66
+ split: validation
67
+ args:
68
+ num_few_shot: 0
69
+ metrics:
70
+ - type: mc2
71
+ value: 46.47
72
+ source:
73
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=1TuanPham/T-Llama
74
+ name: Open LLM Leaderboard
75
+ - task:
76
+ type: text-generation
77
+ name: Text Generation
78
+ dataset:
79
+ name: Winogrande (5-shot)
80
+ type: winogrande
81
+ config: winogrande_xl
82
+ split: validation
83
+ args:
84
+ num_few_shot: 5
85
+ metrics:
86
+ - type: acc
87
+ value: 71.27
88
+ name: accuracy
89
+ source:
90
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=1TuanPham/T-Llama
91
+ name: Open LLM Leaderboard
92
+ - task:
93
+ type: text-generation
94
+ name: Text Generation
95
+ dataset:
96
+ name: GSM8k (5-shot)
97
+ type: gsm8k
98
+ config: main
99
+ split: test
100
+ args:
101
+ num_few_shot: 5
102
+ metrics:
103
+ - type: acc
104
+ value: 29.64
105
+ name: accuracy
106
+ source:
107
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=1TuanPham/T-Llama
108
+ name: Open LLM Leaderboard
109
+ ---
110
+
111
+ <p align="center">
112
+ <img src="https://cdn-uploads.huggingface.co/production/uploads/63905e87df447b438817b2cd/QFhLKQlWeyO9XumtyghVo.jpeg" alt="Image" style="width: 400px; height: auto; border-radius: 10px;" />
113
+ </p>
114
+
115
+
116
+ ## Model Details
117
+
118
+ - **Developed by:** Tuan Pham (FPTU HCM Student)
119
+ - Contact me at: [email protected] or [email protected]
120
+ - Looking for intern opportunity :D
121
+ - **Model type:** Llama2-7B Decoder-only
122
+ - **Finetuned from model :**
123
+ * meta-llama/Llama-2-7b
124
+ * bkai-foundation-models/vietnamese-llama2-7b-120GB
125
+ * yeen214/llama2_7b_merge_orcafamily.
126
+ - **Bilingual support :** English and Vietnamese
127
+
128
+ ### Model Description
129
+
130
+ <!-- Provide a longer summary of what this model is. -->
131
+
132
+ This model is a proof of effort that one man can fine-tune his own model to reach SOTA.
133
+
134
+ ### Model Sources
135
+
136
+ <!-- Provide the basic links for the model. -->
137
+
138
+ - **Repository:**
139
+ * Training: https://github.com/vTuanpham/Vietnamese_QA_System
140
+ * Data: https://github.com/vTuanpham/Large_dataset_translator
141
+ - **Paper:** ...
142
+ - **Demo:** ...
143
+ - Here is a kaggle and colab script to quickly test the model:
144
+ * https://www.kaggle.com/code/tuanphamm/t-llama-test
145
+ * https://colab.research.google.com/drive/1Y-f0E6C_gN_Iy72UN3-Y_c5RdPgrPai-?usp=sharing
146
+ ## Uses
147
+
148
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
149
+
150
+ ### Prompt template
151
+
152
+ ```
153
+ [SYSTEM_PROMPT]
154
+
155
+ ####### Instruction:
156
+ [INPUT]
157
+
158
+ %%%%%%% Response:
159
+ [RESPONSE]
160
+ ```
161
+ Recommend keeping the system prompt in english.
162
+ ## How to Get Started with the Model
163
+
164
+ Use the code below to get started with the model.
165
+ ```python
166
+ import torch
167
+ from torch.cuda.amp import autocast
168
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer, pipeline
169
+
170
+
171
+ def prompt_format(system_prompt, instruction):
172
+ prompt = f"""{system_prompt}
173
+
174
+ ####### Instruction:
175
+ {instruction}
176
+
177
+ %%%%%%% Response:
178
+ """
179
+ return prompt
180
+
181
+ system_prompt = """
182
+ You're an AI Large Language Model developed(created) by an AI developer named Tuấn, the architecture of you is decoder-based LM, your task are to think loudly step by step before give a good and relevant response
183
+ to the user request, answer in the language the user preferred.
184
+
185
+ The AI has been trained to answer questions, provide recommendations, and help with decision making. The AI thinks outside the box and follows the user requests
186
+ """
187
+ instruction = "Xin chào"
188
+
189
+ formatted_prompt = prompt_format(system_prompt, instruction)
190
+ print(formatted_prompt)
191
+
192
+ model_name = "1TuanPham/T-Llama"
193
+ model = AutoModelForCausalLM.from_pretrained(model_name,
194
+ torch_dtype=torch.bfloat16,
195
+ use_cache=True,
196
+ device_map="auto"
197
+ )
198
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
199
+ streamer = TextStreamer(tokenizer, skip_special_tokens=True)
200
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, streamer=streamer)
201
+
202
+ with autocast():
203
+ output_default = pipe(formatted_prompt, pad_token_id=50256, max_new_tokens=128)
204
+
205
+ ```
206
+ Example outputs:
207
+ ```
208
+ > Xin chào
209
+
210
+ Xin chào! Tôi là một AI được phát triển bởi một AI nhà phát triển tên là Tuấn. Tôi được thiết kế để giúp đỡ người dùng bằng cách trả lời các câu hỏi, đưa ra đề xuất và hỗ trợ trong quá trình ra quyết định.
211
+ Tôi có thể hỗ trợ bạn bằng cách nghĩ ra các câu trả lời hay và phù hợp cho các câu hỏi của bạn.
212
+
213
+ > Việt Nam có cà phê gì nổi tiếng thế giới ?
214
+
215
+ Cà phê robusta của Việt Nam nổi tiếng trên toàn cầu. Loại cà phê này được trồng chủ yếu ở các vùng đất cao như Lâm Đồng, Gia Lai, KonTum và có hương vị đậm đà, đắng hơn so với cà phê Arabica.
216
+ Cà phê robusta thường được sử dụng để pha chế các loại cà phê espresso và phin truyền thống.
217
+
218
+ > Tôi có 500$ hãy build cho tôi một bộ PC
219
+
220
+ Để xây dựng một bộ PC với 500 USD, bạn có thể bắt đầu bằng cách chọn các thành phần chính như CPU, GPU, bộ nhớ và bộ lưu trữ. Sau đó, bạn có thể thêm các thành phần bổ sung như loa, bàn phím và chuột để hoàn thiện hệ thống.
221
+
222
+ Dưới đây là một số gợi ý về cách bạn có thể cấu thành một bộ PC với 500 USD:
223
+
224
+ 1. CPU: Intel Core i5-12400 hoặc AMD Ryzen 5 5600X là những lựa chọn tốt để bắt đầu.
225
+
226
+ 2. GPU: Nvidia GeForce GTX 1650 hoặc AMD Radeon RX 570 là những lựa chọn tốt để bắt đầu.
227
+
228
+ 3. Bộ nhớ: 8GB RAM là một khởi đầu tốt.
229
+
230
+ 4. Bộ lưu trữ: 500 GB ổ cứng SSD là một khởi đầu tốt.
231
+
232
+ 5. Bàn phím và chuột: Bàn phím và chuột có thể được mua riêng hoặc cùng một bộ.
233
+
234
+ 6. Loa: Một loa có thể được mua riêng hoặc cùng một bộ với bàn phím và chuột.
235
+
236
+ > Cho tôi 3 thức uống để giúp ngủ ngon hơn
237
+
238
+ Chắc chắn! Dưới đây là ba thức uống có thể giúp bạn ngủ ngon hơn:
239
+
240
+ 1. Mật ong và chanh: Trộn một thìa mật ong với một thìa nước cốt chanh và uống trước khi đi ngủ. Mật ong có tác dụng làm dịu, trong khi chanh có chứa chất chống oxy hóa có thể giúp cải thiện chất lượng giấc ngủ.
241
+
242
+ 2. Trà thảo dược: Một số loại trà thảo dược như hoa cúc, hoa oải hương và hoa sen có tác dụng thư giãn và giúp bạn chìm vào giấc ngủ. Hãy thử pha một ấm trà thảo dược ấm và uống trước khi đi ngủ.
243
+
244
+ 3. Sữa ấm: Sữa ấm có chứa tryptophan, một loại hormone giúp bạn cảm thấy dễ chịu và thư giãn. Nó cũng có thể giúp thúc đẩy sản xuất melatonin, hormone giúp bạn chìm vào giấc ngủ. Hãy thử uống một cốc sữa ấm trước khi đi ngủ để giúp bạn ngủ ngon hơn.
245
+
246
+ ```
247
+ The translated data has been thoroughly cleaned from the coding task, so in theory, the model should be able to code normally by incorporating the coding capabilities from the English coding examples task.
248
+ (The translation will translate code definitions and variables.)
249
+
250
+ I would not recommend running any code generated from this model.
251
+ ````
252
+ > Viết cho tôi một đoạn code bằng python xây dựng một ứng dụng bằng Flask hiển thị 'Xin chào thế giới' trên màn hình
253
+
254
+ Để viết một đoạn mã Python xây dựng một ứng dụng bằng Flask hiển thị 'Xin chào thế giới' trên màn hình, bạn có thể làm theo các bước sau:
255
+
256
+ 1. Cài đặt Flask và các thư viện cần thiết:
257
+
258
+ ```
259
+ pip install flask
260
+ ```
261
+
262
+ 2. Tạo một ứng dụng Flask mới:
263
+
264
+ ```
265
+ app = Flask(__name__)
266
+ ```
267
+
268
+ 3. Thêm một route để hiển thị 'Xin chào thế giới':
269
+
270
+ ```
271
+ @app.route('/')
272
+ def hello():
273
+ return 'Xin chào thế giới!'
274
+ ```
275
+
276
+ 4. Khởi tạo ứng dụng Flask:
277
+
278
+ ```
279
+ app.run()
280
+ ```
281
+
282
+ 5. Chạy ứng dụng Flask:
283
+
284
+ ```
285
+ app.run(debug=True)
286
+ ```
287
+
288
+ 6. Mở trình duyệt và truy cập vào địa chỉ IP hoặc tên máy chủ của máy tính hoặc máy chủ nơi bạn đã khởi động ứng dụng Flask.
289
+
290
+ ````
291
+ These examples are sub-optimal as they were generated with greedy decoding to ensure reproducibility.
292
+ Here's the recommended configuration:
293
+ * top_k=50
294
+ * top_p=0.5
295
+ * temperature=0.5
296
+ * penalty_alpha=0.7
297
+ * repetition_penalty=1.15
298
+
299
+ Here is an example using the above generation config:
300
+ ````
301
+ > Tôi buồn ngủ quá, cơ mà thứ 7 phải thi rồi, nay là rạng sáng thứ 6. Tôi có nên đi ngủ không trong khi tôi mới xong chưa được 1/3 bài để ôn ?
302
+
303
+ Nếu bạn cảm thấy mệt mỏi và cần nghỉ ngơi thì việc đi ngủ sớm sẽ rất quan trọng đối với sức khỏe của bạn. Tuy nhiên, nếu bạn vẫn muốn hoàn thành một phần bài tập để chuẩn bị cho kỳ thi sắp tới,
304
+ hãy cân nhắc thời gian còn lại của mình và ưu tiên những nhiệm vụ quan trọng nhất trước khi đi ngủ. Nếu bạn cảm thấy rằng mình đã làm đủ, hãy dành chút thời gian thư giãn hoặc thực hiện các
305
+ hoạt động khác để giúp giảm bớt căng thẳng và tăng cường sự tỉnh táo của bạn vào buổi sáng hôm sau. Cuối cùng, quyết định cuối cùng phụ thuộc vào mức độ nghiêm trọng của kỳ thi và khả năng quản
306
+ lý thời gian của bạn.
307
+
308
+ # temperature=0.7
309
+ > Em ăn cơm chưa ?
310
+
311
+ Không, tôi không ăn cơm. Tôi là một mô hình ngôn ngữ máy được phát triển bởi một nhà phát triển AI tên là Tuấn và được đào tạo để trả lời các câu hỏi và cung cấp phản hồi hữu ích cho người dùng.
312
+ Mục tiêu của tôi là hỗ trợ người dùng bằng cách suy nghĩ trước khi đưa ra những phản hồi phù hợp và chính xác nhất có thể.
313
+
314
+ > Viết cho tôi một ứng dụng flask in ra màn hình 'Xin chào Việt Nam!'
315
+
316
+ Để viết ứng dụng Flask hiển thị "Xin chào Việt Nam!" trên màn hình, chúng ta có thể làm theo các bước sau:
317
+
318
+ 1. Cài đặt Flask và các thư viện cần thiết như BeautifulSoup4 và jsonify. Bạn có thể sử dụng pip để cài đặt chúng:
319
+
320
+ ```
321
+ pip install Flask
322
+ pip install beautifulsoup4
323
+ pip install jsonify
324
+ ```
325
+
326
+ 2. Tạo một tệp mới trong thư mục của bạn với tên `app.py`. Đây là mã mẫu cơ bản của ứng dụng:
327
+
328
+ ```python
329
+ from bs4 import BeautifulSoup
330
+ from jsonify import jsonify
331
+ import requests
332
+
333
+ app = Flask(__name__)
334
+
335
+ @app.route('/')
336
+ def home():
337
+ html_string = """<h1>Chào mừng đến với Việt Nam!</h1>
338
+ <p>Đây là một trang web dành cho những ai muốn tìm hiểu về đất nước Việt Nam xinh đẹp.</p>"""
339
+ soup = BeautifulSoup(html_string, 'html.parser')
340
+ print(soup)
341
+ return soup
342
+
343
+ if __name__ == '__main__':
344
+ app.run()
345
+ ```
346
+
347
+ 3. Khởi tạo ứng dụng bằng cách chạy `app.py`:
348
+
349
+ ```
350
+ python app.py
351
+ ```
352
+
353
+ 4. Mở trình duyệt web và truy cập vào địa chỉ IP hoặc tên máy chủ của ứng dụng (ví dụ: http://localhost:5000). Trang đầu sẽ hiển thị thông báo "Xin chào Việt Nam!".
354
+
355
+ Lưu ý rằng đây chỉ là ví dụ đơn giản và bạn có thể tùy chỉnh ứng dụng để đáp ứng nhu cầu cụ thể của mình.
356
+
357
+ > Liệt kê cho tôi 5 thứ cần thiết để đem đi cắm trại
358
+
359
+ Chắc chắn! Dưới đây là năm vật dụng cần thiết khi đi cắm trại:
360
+ 1. Lều - Một chiếc lều chắc chắn và thoải mái sẽ cung cấp nơi trú ẩn khỏi thời tiết và côn trùng.
361
+ 2. Túi ngủ - Một túi ngủ ấm áp và thoải mái sẽ giúp bạn có giấc ngủ ngon trong khi ở ngoài trời.
362
+ 3. Bếp di động - Một bếp di động như bình đun hoặc bếp cồn sẽ giúp bạn nấu ăn và giữ ấm thức ăn của mình.
363
+ 4. Đèn pin - Một đèn pin mạnh mẽ sẽ giúp chiếu sáng khu vực cắm trại của bạn vào ban đêm.
364
+ 5. Bản đồ và la bàn - Bản đồ và la bàn sẽ giúp bạn định hướng chính xác đến địa điểm cắm trại của bạn và quay lại nhà nếu cần.
365
+
366
+ > Viết cho tôi một bài thơ về thành phố Đà Lạt
367
+
368
+ Thành phố sương mù, nơi thiên nhiên thanh bình,
369
+ Nơi những bông hoa khoe sắc và cây xanh tươi tốt,
370
+ Một ốc đảo giữa lòng đất, một xứ sở thần tiên,
371
+ Nơi mọi lo lắng đều tan biến trong không khí.
372
+
373
+ Những con đường nhỏ đan xen qua những ngọn đồi thoai thoải,
374
+ Và những ngôi nhà cổ kính, với những bức tường màu nâu ấm áp,
375
+ Nơi những người dân thân thiện chào đón bạn bằng nụ cười,
376
+ Và tiếng chim hót líu lo như lời chào của thiên nhiên.
377
+
378
+ Những dòng suối chảy êm đềm, những thác nước đổ xuống,
379
+ Và những hồ nước lấp lánh, những cảnh tượng ngoạn mục,
380
+ Nơi sự tĩnh lặng ngự trị, nơi bình yên ngự trị,
381
+ Nơi tâm hồn được chữa lành và cảm xúc được giải phóng.
382
+
383
+ Ở đây, thời gian dường như đứng yên,
384
+ Nơi những kỷ niệm vẫn còn nguyên vẹn,
385
+ Nơi những mối quan hệ bền chặt được nuôi dưỡng,
386
+ Nơi tình yêu và hòa bình chiếm vị trí hàng đầu.
387
+
388
+ Thế giới bên ngoài có vẻ xa xôi,
389
+ Nhưng ở đây, chỉ cần bước chân đi,
390
+ Nơi bầu trời rộng lớn, những đám mây cao chót vót,
391
+ Và những tia nắng vàng rực rỡ, sưởi ấm trái tim chúng ta.
392
+
393
+ Ôi, thành phố sương mù, nơi thiên nhiên thanh bình,
394
+ Nơi những giấc mơ trở thành hiện thực, nơi trái tim tràn ngập niềm vui,
395
+ Nơi tình yêu và hòa bình ngự trị, nơi tình yêu và hòa bình ngự trị.
396
+ ````
397
+
398
+ Note: 120GB of pre-trained Vietnamese  data might not be enough for a general question about Vietnamese events.
399
+
400
+ ## Training Details
401
+
402
+ **Hardware Type:**
403
+ * GPU: VGA NVIDIA Tesla P100 16GB
404
+ * SYSTEM RAM: 32GB
405
+
406
+ **Hours used:** ~47.5 days Approx*
407
+
408
+ ### Training Data
409
+
410
+ * BactrianX
411
+ * OpenOrca_translated
412
+ * WizardLM_70k_translated
413
+ * TigerLabMathInstruct_translated_vi
414
+ * GradeSchoolMathInstruct_translated
415
+ * vilm_lima-vi
416
+ * MTEngVietnamese
417
+ * databricks_dolly15k_translated
418
+ * AlpacaCleaned_translated
419
+ * databricks_dolly15k
420
+ * OpenOrca
421
+ * GradeSchoolMathInstruct
422
+ * AlpacaCleaned
423
+ * WebglmQA
424
+
425
+ ### Training Procedure
426
+
427
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
428
+
429
+ * Learning rate: 2e-5 cosine
430
+ * Optimizer: PagedLion8bit
431
+ * QLora: rank: 64 /Q: 4-bit
432
+ * Batch size: 2
433
+ * Gradient accumulation: 128
434
+ * Effective batch size: 256
435
+ * 250k examples of 70% Vietnamese 30% English for 3.37 epoch
436
+ * 350k examples of 60% Vietnamese 40% English for 1.4 epoch
437
+
438
+ ### Training loss
439
+
440
+ Last 10 runs:
441
+
442
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/63905e87df447b438817b2cd/rV8Go_YFZv7QcR_FhFxp-.png)
443
+
444
+ Each line is 12 hours
445
+
446
+ All run plots:
447
+
448
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/63905e87df447b438817b2cd/bxdq_0Mu8seBJCJNmpoEb.png)
449
+
450
+ A bad way to visualize i know...
451
+
452
+ ## Evaluation
453
+
454
+ <!-- This section describes the evaluation protocols and provides the results. -->
455
+
456
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/63905e87df447b438817b2cd/z1ZTm7Tab4tQbVPgQW1hU.png)
457
+
458
+ Our model currently sits at TOP-5 on the VMLU benchmark
459
+
460
+ # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
461
+ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_1TuanPham__T-Llama)
462
+ | Metric |Value|
463
+ |---------------------------------|----:|
464
+ |Avg. |54.34|
465
+ |AI2 Reasoning Challenge (25-Shot)|54.18|
466
+ |HellaSwag (10-Shot) |76.48|
467
+ |MMLU (5-Shot) |47.98|
468
+ |TruthfulQA (0-shot) |46.47|
469
+ |Winogrande (5-shot) |71.27|
470
+ |GSM8k (5-shot) |29.64|
471
+
472
+ ## Citation
473
+
474
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
475
+ ```bibtex
476
+ @online{t-llama,
477
+ author = {Pham Minh Tuan},
478
+ title = {T-Llama: A New Language Model for Vietnamese}, % Placeholder
479
+ year = 2024,
480
+ url = {https://github.com/vTuanpham/Vietnamese_QA_System}
481
+ }
482
+ ```
483
+
484
+
added_tokens.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "####### Instruction:": 45450,
3
+ "%%%%%%% Response:": 45451,
4
+ "[PAD]": 45449
5
+ }
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "1TuanPham/InstructEnVi_llama2-bkai-120GB-Orcafamily_250kx3.37",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 4096,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 11008,
14
+ "max_position_embeddings": 2048,
15
+ "model_type": "llama",
16
+ "num_attention_heads": 32,
17
+ "num_hidden_layers": 32,
18
+ "num_key_value_heads": 32,
19
+ "pad_token_id": 0,
20
+ "pretraining_tp": 1,
21
+ "rms_norm_eps": 1e-05,
22
+ "rope_scaling": null,
23
+ "rope_theta": 10000.0,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.33.1",
27
+ "use_cache": true,
28
+ "vocab_size": 45452
29
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.33.1"
7
+ }
model-00001-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77cf61dfe8d7cc5ba7e45e2735158e66eb27e288441eb872a9c4165feef908cc
3
+ size 877774304
model-00002-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83e183571fa0d59c89d37380e615b26bef0580c9860433e6764261e580be0e45
3
+ size 843090056
model-00003-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dee539e982c1fe23878a9efa26cc4dc04730476991508128276be86e4d58e31
3
+ size 899713160
model-00004-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b3cf005f17798ccf911ee9a4470b9c17a8fd5e9872592a282c1f0338f0b3ef6
3
+ size 899713160
model-00005-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:285356391aa683fe3d432927b79a5b665850c224c0e8e0e41e2b5251531607ad
3
+ size 899729768
model-00006-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d26de9bc26fc907f9f59763885fce1ff27b3a0b698f047727061782b593e7d5
3
+ size 876644624
model-00007-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0e2297de222273d845cd4bd80f5fc8b48bb3ad3b1cb07001a48e2acc4a5311d
3
+ size 876644624
model-00008-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8c314c40383cf307a0cb32c8083111d32899f654d7d379c72c9f21d994b9a0f
3
+ size 899713176
model-00009-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ac2533e4f261297f0f4a843174e6d7163021b076921f1897726da73939b077f
3
+ size 899713176
model-00010-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b8f7cc46f4a0f5d6182c8fd7f9006c718b7623a7904d6103fb5cbd2904b6809
3
+ size 899729792
model-00011-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb2c0435e6c06a660b561708086f2abc0d282d44dbf0b090b0effca92d091ee0
3
+ size 876644624
model-00012-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4c9f59a5806f6b3841abb0adce8708ab5c8fdd8a68bc1f2a11346da268123a1
3
+ size 876644624
model-00013-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e57abc99cd940dd911748fd5f6f1ab5a944e93682a9641824e6a1afd553bae3f
3
+ size 899713176
model-00014-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bde07f5d9ab19c769c3ef22831373557798f9914d08b7f4c1f5f292948558263
3
+ size 899713176
model-00015-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dab70189a88988c5876653e5241c653a1294fc9ce16349c5748e6e68779ee003
3
+ size 899738072
model-00016-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cddadf45d718b2f24f865ced4dc2bf91fc857dadb07db74b672a909b9571d34
3
+ size 372342912
model.safetensors.index.json ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 13697228800
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00016-of-00016.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00016.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00016.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00016.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00016.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00016.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00016.safetensors",
13
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00016.safetensors",
14
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00016.safetensors",
15
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00016.safetensors",
16
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00016.safetensors",
17
+ "model.layers.1.input_layernorm.weight": "model-00002-of-00016.safetensors",
18
+ "model.layers.1.mlp.down_proj.weight": "model-00002-of-00016.safetensors",
19
+ "model.layers.1.mlp.gate_proj.weight": "model-00002-of-00016.safetensors",
20
+ "model.layers.1.mlp.up_proj.weight": "model-00002-of-00016.safetensors",
21
+ "model.layers.1.post_attention_layernorm.weight": "model-00002-of-00016.safetensors",
22
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00016.safetensors",
23
+ "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00016.safetensors",
24
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00016.safetensors",
25
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00016.safetensors",
26
+ "model.layers.10.input_layernorm.weight": "model-00006-of-00016.safetensors",
27
+ "model.layers.10.mlp.down_proj.weight": "model-00006-of-00016.safetensors",
28
+ "model.layers.10.mlp.gate_proj.weight": "model-00006-of-00016.safetensors",
29
+ "model.layers.10.mlp.up_proj.weight": "model-00006-of-00016.safetensors",
30
+ "model.layers.10.post_attention_layernorm.weight": "model-00006-of-00016.safetensors",
31
+ "model.layers.10.self_attn.k_proj.weight": "model-00006-of-00016.safetensors",
32
+ "model.layers.10.self_attn.o_proj.weight": "model-00006-of-00016.safetensors",
33
+ "model.layers.10.self_attn.q_proj.weight": "model-00006-of-00016.safetensors",
34
+ "model.layers.10.self_attn.v_proj.weight": "model-00006-of-00016.safetensors",
35
+ "model.layers.11.input_layernorm.weight": "model-00006-of-00016.safetensors",
36
+ "model.layers.11.mlp.down_proj.weight": "model-00006-of-00016.safetensors",
37
+ "model.layers.11.mlp.gate_proj.weight": "model-00006-of-00016.safetensors",
38
+ "model.layers.11.mlp.up_proj.weight": "model-00006-of-00016.safetensors",
39
+ "model.layers.11.post_attention_layernorm.weight": "model-00006-of-00016.safetensors",
40
+ "model.layers.11.self_attn.k_proj.weight": "model-00006-of-00016.safetensors",
41
+ "model.layers.11.self_attn.o_proj.weight": "model-00006-of-00016.safetensors",
42
+ "model.layers.11.self_attn.q_proj.weight": "model-00006-of-00016.safetensors",
43
+ "model.layers.11.self_attn.v_proj.weight": "model-00006-of-00016.safetensors",
44
+ "model.layers.12.input_layernorm.weight": "model-00007-of-00016.safetensors",
45
+ "model.layers.12.mlp.down_proj.weight": "model-00007-of-00016.safetensors",
46
+ "model.layers.12.mlp.gate_proj.weight": "model-00007-of-00016.safetensors",
47
+ "model.layers.12.mlp.up_proj.weight": "model-00007-of-00016.safetensors",
48
+ "model.layers.12.post_attention_layernorm.weight": "model-00007-of-00016.safetensors",
49
+ "model.layers.12.self_attn.k_proj.weight": "model-00006-of-00016.safetensors",
50
+ "model.layers.12.self_attn.o_proj.weight": "model-00007-of-00016.safetensors",
51
+ "model.layers.12.self_attn.q_proj.weight": "model-00006-of-00016.safetensors",
52
+ "model.layers.12.self_attn.v_proj.weight": "model-00007-of-00016.safetensors",
53
+ "model.layers.13.input_layernorm.weight": "model-00007-of-00016.safetensors",
54
+ "model.layers.13.mlp.down_proj.weight": "model-00007-of-00016.safetensors",
55
+ "model.layers.13.mlp.gate_proj.weight": "model-00007-of-00016.safetensors",
56
+ "model.layers.13.mlp.up_proj.weight": "model-00007-of-00016.safetensors",
57
+ "model.layers.13.post_attention_layernorm.weight": "model-00007-of-00016.safetensors",
58
+ "model.layers.13.self_attn.k_proj.weight": "model-00007-of-00016.safetensors",
59
+ "model.layers.13.self_attn.o_proj.weight": "model-00007-of-00016.safetensors",
60
+ "model.layers.13.self_attn.q_proj.weight": "model-00007-of-00016.safetensors",
61
+ "model.layers.13.self_attn.v_proj.weight": "model-00007-of-00016.safetensors",
62
+ "model.layers.14.input_layernorm.weight": "model-00008-of-00016.safetensors",
63
+ "model.layers.14.mlp.down_proj.weight": "model-00008-of-00016.safetensors",
64
+ "model.layers.14.mlp.gate_proj.weight": "model-00008-of-00016.safetensors",
65
+ "model.layers.14.mlp.up_proj.weight": "model-00008-of-00016.safetensors",
66
+ "model.layers.14.post_attention_layernorm.weight": "model-00008-of-00016.safetensors",
67
+ "model.layers.14.self_attn.k_proj.weight": "model-00007-of-00016.safetensors",
68
+ "model.layers.14.self_attn.o_proj.weight": "model-00007-of-00016.safetensors",
69
+ "model.layers.14.self_attn.q_proj.weight": "model-00007-of-00016.safetensors",
70
+ "model.layers.14.self_attn.v_proj.weight": "model-00007-of-00016.safetensors",
71
+ "model.layers.15.input_layernorm.weight": "model-00008-of-00016.safetensors",
72
+ "model.layers.15.mlp.down_proj.weight": "model-00008-of-00016.safetensors",
73
+ "model.layers.15.mlp.gate_proj.weight": "model-00008-of-00016.safetensors",
74
+ "model.layers.15.mlp.up_proj.weight": "model-00008-of-00016.safetensors",
75
+ "model.layers.15.post_attention_layernorm.weight": "model-00008-of-00016.safetensors",
76
+ "model.layers.15.self_attn.k_proj.weight": "model-00008-of-00016.safetensors",
77
+ "model.layers.15.self_attn.o_proj.weight": "model-00008-of-00016.safetensors",
78
+ "model.layers.15.self_attn.q_proj.weight": "model-00008-of-00016.safetensors",
79
+ "model.layers.15.self_attn.v_proj.weight": "model-00008-of-00016.safetensors",
80
+ "model.layers.16.input_layernorm.weight": "model-00009-of-00016.safetensors",
81
+ "model.layers.16.mlp.down_proj.weight": "model-00009-of-00016.safetensors",
82
+ "model.layers.16.mlp.gate_proj.weight": "model-00008-of-00016.safetensors",
83
+ "model.layers.16.mlp.up_proj.weight": "model-00009-of-00016.safetensors",
84
+ "model.layers.16.post_attention_layernorm.weight": "model-00009-of-00016.safetensors",
85
+ "model.layers.16.self_attn.k_proj.weight": "model-00008-of-00016.safetensors",
86
+ "model.layers.16.self_attn.o_proj.weight": "model-00008-of-00016.safetensors",
87
+ "model.layers.16.self_attn.q_proj.weight": "model-00008-of-00016.safetensors",
88
+ "model.layers.16.self_attn.v_proj.weight": "model-00008-of-00016.safetensors",
89
+ "model.layers.17.input_layernorm.weight": "model-00009-of-00016.safetensors",
90
+ "model.layers.17.mlp.down_proj.weight": "model-00009-of-00016.safetensors",
91
+ "model.layers.17.mlp.gate_proj.weight": "model-00009-of-00016.safetensors",
92
+ "model.layers.17.mlp.up_proj.weight": "model-00009-of-00016.safetensors",
93
+ "model.layers.17.post_attention_layernorm.weight": "model-00009-of-00016.safetensors",
94
+ "model.layers.17.self_attn.k_proj.weight": "model-00009-of-00016.safetensors",
95
+ "model.layers.17.self_attn.o_proj.weight": "model-00009-of-00016.safetensors",
96
+ "model.layers.17.self_attn.q_proj.weight": "model-00009-of-00016.safetensors",
97
+ "model.layers.17.self_attn.v_proj.weight": "model-00009-of-00016.safetensors",
98
+ "model.layers.18.input_layernorm.weight": "model-00010-of-00016.safetensors",
99
+ "model.layers.18.mlp.down_proj.weight": "model-00010-of-00016.safetensors",
100
+ "model.layers.18.mlp.gate_proj.weight": "model-00009-of-00016.safetensors",
101
+ "model.layers.18.mlp.up_proj.weight": "model-00009-of-00016.safetensors",
102
+ "model.layers.18.post_attention_layernorm.weight": "model-00010-of-00016.safetensors",
103
+ "model.layers.18.self_attn.k_proj.weight": "model-00009-of-00016.safetensors",
104
+ "model.layers.18.self_attn.o_proj.weight": "model-00009-of-00016.safetensors",
105
+ "model.layers.18.self_attn.q_proj.weight": "model-00009-of-00016.safetensors",
106
+ "model.layers.18.self_attn.v_proj.weight": "model-00009-of-00016.safetensors",
107
+ "model.layers.19.input_layernorm.weight": "model-00010-of-00016.safetensors",
108
+ "model.layers.19.mlp.down_proj.weight": "model-00010-of-00016.safetensors",
109
+ "model.layers.19.mlp.gate_proj.weight": "model-00010-of-00016.safetensors",
110
+ "model.layers.19.mlp.up_proj.weight": "model-00010-of-00016.safetensors",
111
+ "model.layers.19.post_attention_layernorm.weight": "model-00010-of-00016.safetensors",
112
+ "model.layers.19.self_attn.k_proj.weight": "model-00010-of-00016.safetensors",
113
+ "model.layers.19.self_attn.o_proj.weight": "model-00010-of-00016.safetensors",
114
+ "model.layers.19.self_attn.q_proj.weight": "model-00010-of-00016.safetensors",
115
+ "model.layers.19.self_attn.v_proj.weight": "model-00010-of-00016.safetensors",
116
+ "model.layers.2.input_layernorm.weight": "model-00002-of-00016.safetensors",
117
+ "model.layers.2.mlp.down_proj.weight": "model-00002-of-00016.safetensors",
118
+ "model.layers.2.mlp.gate_proj.weight": "model-00002-of-00016.safetensors",
119
+ "model.layers.2.mlp.up_proj.weight": "model-00002-of-00016.safetensors",
120
+ "model.layers.2.post_attention_layernorm.weight": "model-00002-of-00016.safetensors",
121
+ "model.layers.2.self_attn.k_proj.weight": "model-00002-of-00016.safetensors",
122
+ "model.layers.2.self_attn.o_proj.weight": "model-00002-of-00016.safetensors",
123
+ "model.layers.2.self_attn.q_proj.weight": "model-00002-of-00016.safetensors",
124
+ "model.layers.2.self_attn.v_proj.weight": "model-00002-of-00016.safetensors",
125
+ "model.layers.20.input_layernorm.weight": "model-00010-of-00016.safetensors",
126
+ "model.layers.20.mlp.down_proj.weight": "model-00010-of-00016.safetensors",
127
+ "model.layers.20.mlp.gate_proj.weight": "model-00010-of-00016.safetensors",
128
+ "model.layers.20.mlp.up_proj.weight": "model-00010-of-00016.safetensors",
129
+ "model.layers.20.post_attention_layernorm.weight": "model-00010-of-00016.safetensors",
130
+ "model.layers.20.self_attn.k_proj.weight": "model-00010-of-00016.safetensors",
131
+ "model.layers.20.self_attn.o_proj.weight": "model-00010-of-00016.safetensors",
132
+ "model.layers.20.self_attn.q_proj.weight": "model-00010-of-00016.safetensors",
133
+ "model.layers.20.self_attn.v_proj.weight": "model-00010-of-00016.safetensors",
134
+ "model.layers.21.input_layernorm.weight": "model-00011-of-00016.safetensors",
135
+ "model.layers.21.mlp.down_proj.weight": "model-00011-of-00016.safetensors",
136
+ "model.layers.21.mlp.gate_proj.weight": "model-00011-of-00016.safetensors",
137
+ "model.layers.21.mlp.up_proj.weight": "model-00011-of-00016.safetensors",
138
+ "model.layers.21.post_attention_layernorm.weight": "model-00011-of-00016.safetensors",
139
+ "model.layers.21.self_attn.k_proj.weight": "model-00011-of-00016.safetensors",
140
+ "model.layers.21.self_attn.o_proj.weight": "model-00011-of-00016.safetensors",
141
+ "model.layers.21.self_attn.q_proj.weight": "model-00011-of-00016.safetensors",
142
+ "model.layers.21.self_attn.v_proj.weight": "model-00011-of-00016.safetensors",
143
+ "model.layers.22.input_layernorm.weight": "model-00011-of-00016.safetensors",
144
+ "model.layers.22.mlp.down_proj.weight": "model-00011-of-00016.safetensors",
145
+ "model.layers.22.mlp.gate_proj.weight": "model-00011-of-00016.safetensors",
146
+ "model.layers.22.mlp.up_proj.weight": "model-00011-of-00016.safetensors",
147
+ "model.layers.22.post_attention_layernorm.weight": "model-00011-of-00016.safetensors",
148
+ "model.layers.22.self_attn.k_proj.weight": "model-00011-of-00016.safetensors",
149
+ "model.layers.22.self_attn.o_proj.weight": "model-00011-of-00016.safetensors",
150
+ "model.layers.22.self_attn.q_proj.weight": "model-00011-of-00016.safetensors",
151
+ "model.layers.22.self_attn.v_proj.weight": "model-00011-of-00016.safetensors",
152
+ "model.layers.23.input_layernorm.weight": "model-00012-of-00016.safetensors",
153
+ "model.layers.23.mlp.down_proj.weight": "model-00012-of-00016.safetensors",
154
+ "model.layers.23.mlp.gate_proj.weight": "model-00012-of-00016.safetensors",
155
+ "model.layers.23.mlp.up_proj.weight": "model-00012-of-00016.safetensors",
156
+ "model.layers.23.post_attention_layernorm.weight": "model-00012-of-00016.safetensors",
157
+ "model.layers.23.self_attn.k_proj.weight": "model-00011-of-00016.safetensors",
158
+ "model.layers.23.self_attn.o_proj.weight": "model-00012-of-00016.safetensors",
159
+ "model.layers.23.self_attn.q_proj.weight": "model-00011-of-00016.safetensors",
160
+ "model.layers.23.self_attn.v_proj.weight": "model-00012-of-00016.safetensors",
161
+ "model.layers.24.input_layernorm.weight": "model-00012-of-00016.safetensors",
162
+ "model.layers.24.mlp.down_proj.weight": "model-00012-of-00016.safetensors",
163
+ "model.layers.24.mlp.gate_proj.weight": "model-00012-of-00016.safetensors",
164
+ "model.layers.24.mlp.up_proj.weight": "model-00012-of-00016.safetensors",
165
+ "model.layers.24.post_attention_layernorm.weight": "model-00012-of-00016.safetensors",
166
+ "model.layers.24.self_attn.k_proj.weight": "model-00012-of-00016.safetensors",
167
+ "model.layers.24.self_attn.o_proj.weight": "model-00012-of-00016.safetensors",
168
+ "model.layers.24.self_attn.q_proj.weight": "model-00012-of-00016.safetensors",
169
+ "model.layers.24.self_attn.v_proj.weight": "model-00012-of-00016.safetensors",
170
+ "model.layers.25.input_layernorm.weight": "model-00013-of-00016.safetensors",
171
+ "model.layers.25.mlp.down_proj.weight": "model-00013-of-00016.safetensors",
172
+ "model.layers.25.mlp.gate_proj.weight": "model-00013-of-00016.safetensors",
173
+ "model.layers.25.mlp.up_proj.weight": "model-00013-of-00016.safetensors",
174
+ "model.layers.25.post_attention_layernorm.weight": "model-00013-of-00016.safetensors",
175
+ "model.layers.25.self_attn.k_proj.weight": "model-00012-of-00016.safetensors",
176
+ "model.layers.25.self_attn.o_proj.weight": "model-00012-of-00016.safetensors",
177
+ "model.layers.25.self_attn.q_proj.weight": "model-00012-of-00016.safetensors",
178
+ "model.layers.25.self_attn.v_proj.weight": "model-00012-of-00016.safetensors",
179
+ "model.layers.26.input_layernorm.weight": "model-00013-of-00016.safetensors",
180
+ "model.layers.26.mlp.down_proj.weight": "model-00013-of-00016.safetensors",
181
+ "model.layers.26.mlp.gate_proj.weight": "model-00013-of-00016.safetensors",
182
+ "model.layers.26.mlp.up_proj.weight": "model-00013-of-00016.safetensors",
183
+ "model.layers.26.post_attention_layernorm.weight": "model-00013-of-00016.safetensors",
184
+ "model.layers.26.self_attn.k_proj.weight": "model-00013-of-00016.safetensors",
185
+ "model.layers.26.self_attn.o_proj.weight": "model-00013-of-00016.safetensors",
186
+ "model.layers.26.self_attn.q_proj.weight": "model-00013-of-00016.safetensors",
187
+ "model.layers.26.self_attn.v_proj.weight": "model-00013-of-00016.safetensors",
188
+ "model.layers.27.input_layernorm.weight": "model-00014-of-00016.safetensors",
189
+ "model.layers.27.mlp.down_proj.weight": "model-00014-of-00016.safetensors",
190
+ "model.layers.27.mlp.gate_proj.weight": "model-00013-of-00016.safetensors",
191
+ "model.layers.27.mlp.up_proj.weight": "model-00014-of-00016.safetensors",
192
+ "model.layers.27.post_attention_layernorm.weight": "model-00014-of-00016.safetensors",
193
+ "model.layers.27.self_attn.k_proj.weight": "model-00013-of-00016.safetensors",
194
+ "model.layers.27.self_attn.o_proj.weight": "model-00013-of-00016.safetensors",
195
+ "model.layers.27.self_attn.q_proj.weight": "model-00013-of-00016.safetensors",
196
+ "model.layers.27.self_attn.v_proj.weight": "model-00013-of-00016.safetensors",
197
+ "model.layers.28.input_layernorm.weight": "model-00014-of-00016.safetensors",
198
+ "model.layers.28.mlp.down_proj.weight": "model-00014-of-00016.safetensors",
199
+ "model.layers.28.mlp.gate_proj.weight": "model-00014-of-00016.safetensors",
200
+ "model.layers.28.mlp.up_proj.weight": "model-00014-of-00016.safetensors",
201
+ "model.layers.28.post_attention_layernorm.weight": "model-00014-of-00016.safetensors",
202
+ "model.layers.28.self_attn.k_proj.weight": "model-00014-of-00016.safetensors",
203
+ "model.layers.28.self_attn.o_proj.weight": "model-00014-of-00016.safetensors",
204
+ "model.layers.28.self_attn.q_proj.weight": "model-00014-of-00016.safetensors",
205
+ "model.layers.28.self_attn.v_proj.weight": "model-00014-of-00016.safetensors",
206
+ "model.layers.29.input_layernorm.weight": "model-00015-of-00016.safetensors",
207
+ "model.layers.29.mlp.down_proj.weight": "model-00015-of-00016.safetensors",
208
+ "model.layers.29.mlp.gate_proj.weight": "model-00014-of-00016.safetensors",
209
+ "model.layers.29.mlp.up_proj.weight": "model-00014-of-00016.safetensors",
210
+ "model.layers.29.post_attention_layernorm.weight": "model-00015-of-00016.safetensors",
211
+ "model.layers.29.self_attn.k_proj.weight": "model-00014-of-00016.safetensors",
212
+ "model.layers.29.self_attn.o_proj.weight": "model-00014-of-00016.safetensors",
213
+ "model.layers.29.self_attn.q_proj.weight": "model-00014-of-00016.safetensors",
214
+ "model.layers.29.self_attn.v_proj.weight": "model-00014-of-00016.safetensors",
215
+ "model.layers.3.input_layernorm.weight": "model-00003-of-00016.safetensors",
216
+ "model.layers.3.mlp.down_proj.weight": "model-00003-of-00016.safetensors",
217
+ "model.layers.3.mlp.gate_proj.weight": "model-00003-of-00016.safetensors",
218
+ "model.layers.3.mlp.up_proj.weight": "model-00003-of-00016.safetensors",
219
+ "model.layers.3.post_attention_layernorm.weight": "model-00003-of-00016.safetensors",
220
+ "model.layers.3.self_attn.k_proj.weight": "model-00002-of-00016.safetensors",
221
+ "model.layers.3.self_attn.o_proj.weight": "model-00002-of-00016.safetensors",
222
+ "model.layers.3.self_attn.q_proj.weight": "model-00002-of-00016.safetensors",
223
+ "model.layers.3.self_attn.v_proj.weight": "model-00002-of-00016.safetensors",
224
+ "model.layers.30.input_layernorm.weight": "model-00015-of-00016.safetensors",
225
+ "model.layers.30.mlp.down_proj.weight": "model-00015-of-00016.safetensors",
226
+ "model.layers.30.mlp.gate_proj.weight": "model-00015-of-00016.safetensors",
227
+ "model.layers.30.mlp.up_proj.weight": "model-00015-of-00016.safetensors",
228
+ "model.layers.30.post_attention_layernorm.weight": "model-00015-of-00016.safetensors",
229
+ "model.layers.30.self_attn.k_proj.weight": "model-00015-of-00016.safetensors",
230
+ "model.layers.30.self_attn.o_proj.weight": "model-00015-of-00016.safetensors",
231
+ "model.layers.30.self_attn.q_proj.weight": "model-00015-of-00016.safetensors",
232
+ "model.layers.30.self_attn.v_proj.weight": "model-00015-of-00016.safetensors",
233
+ "model.layers.31.input_layernorm.weight": "model-00015-of-00016.safetensors",
234
+ "model.layers.31.mlp.down_proj.weight": "model-00015-of-00016.safetensors",
235
+ "model.layers.31.mlp.gate_proj.weight": "model-00015-of-00016.safetensors",
236
+ "model.layers.31.mlp.up_proj.weight": "model-00015-of-00016.safetensors",
237
+ "model.layers.31.post_attention_layernorm.weight": "model-00015-of-00016.safetensors",
238
+ "model.layers.31.self_attn.k_proj.weight": "model-00015-of-00016.safetensors",
239
+ "model.layers.31.self_attn.o_proj.weight": "model-00015-of-00016.safetensors",
240
+ "model.layers.31.self_attn.q_proj.weight": "model-00015-of-00016.safetensors",
241
+ "model.layers.31.self_attn.v_proj.weight": "model-00015-of-00016.safetensors",
242
+ "model.layers.4.input_layernorm.weight": "model-00003-of-00016.safetensors",
243
+ "model.layers.4.mlp.down_proj.weight": "model-00003-of-00016.safetensors",
244
+ "model.layers.4.mlp.gate_proj.weight": "model-00003-of-00016.safetensors",
245
+ "model.layers.4.mlp.up_proj.weight": "model-00003-of-00016.safetensors",
246
+ "model.layers.4.post_attention_layernorm.weight": "model-00003-of-00016.safetensors",
247
+ "model.layers.4.self_attn.k_proj.weight": "model-00003-of-00016.safetensors",
248
+ "model.layers.4.self_attn.o_proj.weight": "model-00003-of-00016.safetensors",
249
+ "model.layers.4.self_attn.q_proj.weight": "model-00003-of-00016.safetensors",
250
+ "model.layers.4.self_attn.v_proj.weight": "model-00003-of-00016.safetensors",
251
+ "model.layers.5.input_layernorm.weight": "model-00004-of-00016.safetensors",
252
+ "model.layers.5.mlp.down_proj.weight": "model-00004-of-00016.safetensors",
253
+ "model.layers.5.mlp.gate_proj.weight": "model-00003-of-00016.safetensors",
254
+ "model.layers.5.mlp.up_proj.weight": "model-00004-of-00016.safetensors",
255
+ "model.layers.5.post_attention_layernorm.weight": "model-00004-of-00016.safetensors",
256
+ "model.layers.5.self_attn.k_proj.weight": "model-00003-of-00016.safetensors",
257
+ "model.layers.5.self_attn.o_proj.weight": "model-00003-of-00016.safetensors",
258
+ "model.layers.5.self_attn.q_proj.weight": "model-00003-of-00016.safetensors",
259
+ "model.layers.5.self_attn.v_proj.weight": "model-00003-of-00016.safetensors",
260
+ "model.layers.6.input_layernorm.weight": "model-00004-of-00016.safetensors",
261
+ "model.layers.6.mlp.down_proj.weight": "model-00004-of-00016.safetensors",
262
+ "model.layers.6.mlp.gate_proj.weight": "model-00004-of-00016.safetensors",
263
+ "model.layers.6.mlp.up_proj.weight": "model-00004-of-00016.safetensors",
264
+ "model.layers.6.post_attention_layernorm.weight": "model-00004-of-00016.safetensors",
265
+ "model.layers.6.self_attn.k_proj.weight": "model-00004-of-00016.safetensors",
266
+ "model.layers.6.self_attn.o_proj.weight": "model-00004-of-00016.safetensors",
267
+ "model.layers.6.self_attn.q_proj.weight": "model-00004-of-00016.safetensors",
268
+ "model.layers.6.self_attn.v_proj.weight": "model-00004-of-00016.safetensors",
269
+ "model.layers.7.input_layernorm.weight": "model-00005-of-00016.safetensors",
270
+ "model.layers.7.mlp.down_proj.weight": "model-00005-of-00016.safetensors",
271
+ "model.layers.7.mlp.gate_proj.weight": "model-00004-of-00016.safetensors",
272
+ "model.layers.7.mlp.up_proj.weight": "model-00004-of-00016.safetensors",
273
+ "model.layers.7.post_attention_layernorm.weight": "model-00005-of-00016.safetensors",
274
+ "model.layers.7.self_attn.k_proj.weight": "model-00004-of-00016.safetensors",
275
+ "model.layers.7.self_attn.o_proj.weight": "model-00004-of-00016.safetensors",
276
+ "model.layers.7.self_attn.q_proj.weight": "model-00004-of-00016.safetensors",
277
+ "model.layers.7.self_attn.v_proj.weight": "model-00004-of-00016.safetensors",
278
+ "model.layers.8.input_layernorm.weight": "model-00005-of-00016.safetensors",
279
+ "model.layers.8.mlp.down_proj.weight": "model-00005-of-00016.safetensors",
280
+ "model.layers.8.mlp.gate_proj.weight": "model-00005-of-00016.safetensors",
281
+ "model.layers.8.mlp.up_proj.weight": "model-00005-of-00016.safetensors",
282
+ "model.layers.8.post_attention_layernorm.weight": "model-00005-of-00016.safetensors",
283
+ "model.layers.8.self_attn.k_proj.weight": "model-00005-of-00016.safetensors",
284
+ "model.layers.8.self_attn.o_proj.weight": "model-00005-of-00016.safetensors",
285
+ "model.layers.8.self_attn.q_proj.weight": "model-00005-of-00016.safetensors",
286
+ "model.layers.8.self_attn.v_proj.weight": "model-00005-of-00016.safetensors",
287
+ "model.layers.9.input_layernorm.weight": "model-00005-of-00016.safetensors",
288
+ "model.layers.9.mlp.down_proj.weight": "model-00005-of-00016.safetensors",
289
+ "model.layers.9.mlp.gate_proj.weight": "model-00005-of-00016.safetensors",
290
+ "model.layers.9.mlp.up_proj.weight": "model-00005-of-00016.safetensors",
291
+ "model.layers.9.post_attention_layernorm.weight": "model-00005-of-00016.safetensors",
292
+ "model.layers.9.self_attn.k_proj.weight": "model-00005-of-00016.safetensors",
293
+ "model.layers.9.self_attn.o_proj.weight": "model-00005-of-00016.safetensors",
294
+ "model.layers.9.self_attn.q_proj.weight": "model-00005-of-00016.safetensors",
295
+ "model.layers.9.self_attn.v_proj.weight": "model-00005-of-00016.safetensors",
296
+ "model.norm.weight": "model-00015-of-00016.safetensors"
297
+ }
298
+ }
pytorch_model-00001-of-00016.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47b2b0d5c9ee53a16043c2a4e27c4b9d545015f9f710f681381a5896e420d1af
3
+ size 877778068
pytorch_model-00002-of-00016.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db75a2caa9a5e32c045c1a23eadaa82858ea20f56e55069ca4ab1fea98b8c2b9
3
+ size 843095104
pytorch_model-00003-of-00016.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04958125cb2ef16e6970c0fe7c028759872154dc76cd815ea78ac964dd90cefc
3
+ size 899718208
pytorch_model-00004-of-00016.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:995294dda453a9eb082e6a37a16d7bc2960f949b850dbc9f9c4dd3cf02895740
3
+ size 899718208
pytorch_model-00005-of-00016.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:787ce03670b285f5eee5501826bad324fdbb072da02931e906ae5f7d04a830c8
3
+ size 899735268
pytorch_model-00006-of-00016.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:127984e253e012bea6a577f1251312831cc025e3f3f8e316d6fa7e3b0936ca2d
3
+ size 876649874
pytorch_model-00007-of-00016.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0c2d2fa2eea936c1085df25d2975005a4377e2b8502141859a1fba89f0a4ca2
3
+ size 876649874
pytorch_model-00008-of-00016.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:047b3e3beeb9f2b96bd80d1e31fa5cba0436793859a611488b2b5548f1efa923
3
+ size 899718208
pytorch_model-00009-of-00016.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18f77dfba72578bf01652c2038272be6fc433b2194bcd786dbd63d9381c99eb3
3
+ size 899718208
pytorch_model-00010-of-00016.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8750d022aa8d6f359c215024d24fdea48fae7fbcf82e016e6ceb56702072c09d
3
+ size 899735268
pytorch_model-00011-of-00016.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e635fcb39c0d31bf9ee4c945dd53405b5ba2b0c01c5e7c271b5e5b08ecdebf39
3
+ size 876649874
pytorch_model-00012-of-00016.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14f348259d44a4d11f9312c981d8c15d199cfb8f2bb671c8a636b5810c767833
3
+ size 876649874
pytorch_model-00013-of-00016.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03d10b7191c97672813edbaa03b69a167fc809deec7f868bb42b02127af1ed27
3
+ size 899718208
pytorch_model-00014-of-00016.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06988204bc074045ff1281cf4430554080e1c210a929c747d9c50be0e68bc1d6
3
+ size 899718208
pytorch_model-00015-of-00016.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9ff351a1077eb9b0d45383143ef13ab1ef922d5d6905d4fdef25e1e73f3c08a
3
+ size 899743734
pytorch_model-00016-of-00016.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be1989782cd2ff4eb8ce4ae6ac240e8115a33e2306c7f79d3f8085e6f75ee64b
3
+ size 372344197
pytorch_model.bin.index.json ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 13697228800
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "pytorch_model-00016-of-00016.bin",
7
+ "model.embed_tokens.weight": "pytorch_model-00001-of-00016.bin",
8
+ "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00016.bin",
9
+ "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00016.bin",
10
+ "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00016.bin",
11
+ "model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00016.bin",
12
+ "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00016.bin",
13
+ "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00016.bin",
14
+ "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00016.bin",
15
+ "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00016.bin",
16
+ "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00016.bin",
17
+ "model.layers.1.input_layernorm.weight": "pytorch_model-00002-of-00016.bin",
18
+ "model.layers.1.mlp.down_proj.weight": "pytorch_model-00002-of-00016.bin",
19
+ "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00002-of-00016.bin",
20
+ "model.layers.1.mlp.up_proj.weight": "pytorch_model-00002-of-00016.bin",
21
+ "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00002-of-00016.bin",
22
+ "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00016.bin",
23
+ "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00002-of-00016.bin",
24
+ "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00016.bin",
25
+ "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00016.bin",
26
+ "model.layers.10.input_layernorm.weight": "pytorch_model-00006-of-00016.bin",
27
+ "model.layers.10.mlp.down_proj.weight": "pytorch_model-00006-of-00016.bin",
28
+ "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00006-of-00016.bin",
29
+ "model.layers.10.mlp.up_proj.weight": "pytorch_model-00006-of-00016.bin",
30
+ "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00006-of-00016.bin",
31
+ "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00006-of-00016.bin",
32
+ "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00006-of-00016.bin",
33
+ "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00006-of-00016.bin",
34
+ "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00006-of-00016.bin",
35
+ "model.layers.11.input_layernorm.weight": "pytorch_model-00006-of-00016.bin",
36
+ "model.layers.11.mlp.down_proj.weight": "pytorch_model-00006-of-00016.bin",
37
+ "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00006-of-00016.bin",
38
+ "model.layers.11.mlp.up_proj.weight": "pytorch_model-00006-of-00016.bin",
39
+ "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00006-of-00016.bin",
40
+ "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00006-of-00016.bin",
41
+ "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00006-of-00016.bin",
42
+ "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00006-of-00016.bin",
43
+ "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00006-of-00016.bin",
44
+ "model.layers.12.input_layernorm.weight": "pytorch_model-00007-of-00016.bin",
45
+ "model.layers.12.mlp.down_proj.weight": "pytorch_model-00007-of-00016.bin",
46
+ "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00007-of-00016.bin",
47
+ "model.layers.12.mlp.up_proj.weight": "pytorch_model-00007-of-00016.bin",
48
+ "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00007-of-00016.bin",
49
+ "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00006-of-00016.bin",
50
+ "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00007-of-00016.bin",
51
+ "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00006-of-00016.bin",
52
+ "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00007-of-00016.bin",
53
+ "model.layers.13.input_layernorm.weight": "pytorch_model-00007-of-00016.bin",
54
+ "model.layers.13.mlp.down_proj.weight": "pytorch_model-00007-of-00016.bin",
55
+ "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00007-of-00016.bin",
56
+ "model.layers.13.mlp.up_proj.weight": "pytorch_model-00007-of-00016.bin",
57
+ "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00007-of-00016.bin",
58
+ "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00007-of-00016.bin",
59
+ "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00007-of-00016.bin",
60
+ "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00007-of-00016.bin",
61
+ "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00007-of-00016.bin",
62
+ "model.layers.14.input_layernorm.weight": "pytorch_model-00008-of-00016.bin",
63
+ "model.layers.14.mlp.down_proj.weight": "pytorch_model-00008-of-00016.bin",
64
+ "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00008-of-00016.bin",
65
+ "model.layers.14.mlp.up_proj.weight": "pytorch_model-00008-of-00016.bin",
66
+ "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00008-of-00016.bin",
67
+ "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00007-of-00016.bin",
68
+ "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00007-of-00016.bin",
69
+ "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00007-of-00016.bin",
70
+ "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00007-of-00016.bin",
71
+ "model.layers.15.input_layernorm.weight": "pytorch_model-00008-of-00016.bin",
72
+ "model.layers.15.mlp.down_proj.weight": "pytorch_model-00008-of-00016.bin",
73
+ "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00008-of-00016.bin",
74
+ "model.layers.15.mlp.up_proj.weight": "pytorch_model-00008-of-00016.bin",
75
+ "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00008-of-00016.bin",
76
+ "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00008-of-00016.bin",
77
+ "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00008-of-00016.bin",
78
+ "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00008-of-00016.bin",
79
+ "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00008-of-00016.bin",
80
+ "model.layers.16.input_layernorm.weight": "pytorch_model-00009-of-00016.bin",
81
+ "model.layers.16.mlp.down_proj.weight": "pytorch_model-00009-of-00016.bin",
82
+ "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00008-of-00016.bin",
83
+ "model.layers.16.mlp.up_proj.weight": "pytorch_model-00009-of-00016.bin",
84
+ "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00009-of-00016.bin",
85
+ "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00008-of-00016.bin",
86
+ "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00008-of-00016.bin",
87
+ "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00008-of-00016.bin",
88
+ "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00008-of-00016.bin",
89
+ "model.layers.17.input_layernorm.weight": "pytorch_model-00009-of-00016.bin",
90
+ "model.layers.17.mlp.down_proj.weight": "pytorch_model-00009-of-00016.bin",
91
+ "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00009-of-00016.bin",
92
+ "model.layers.17.mlp.up_proj.weight": "pytorch_model-00009-of-00016.bin",
93
+ "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00009-of-00016.bin",
94
+ "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00009-of-00016.bin",
95
+ "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00009-of-00016.bin",
96
+ "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00009-of-00016.bin",
97
+ "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00009-of-00016.bin",
98
+ "model.layers.18.input_layernorm.weight": "pytorch_model-00010-of-00016.bin",
99
+ "model.layers.18.mlp.down_proj.weight": "pytorch_model-00010-of-00016.bin",
100
+ "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00009-of-00016.bin",
101
+ "model.layers.18.mlp.up_proj.weight": "pytorch_model-00009-of-00016.bin",
102
+ "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00010-of-00016.bin",
103
+ "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00009-of-00016.bin",
104
+ "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00009-of-00016.bin",
105
+ "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00009-of-00016.bin",
106
+ "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00009-of-00016.bin",
107
+ "model.layers.19.input_layernorm.weight": "pytorch_model-00010-of-00016.bin",
108
+ "model.layers.19.mlp.down_proj.weight": "pytorch_model-00010-of-00016.bin",
109
+ "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00010-of-00016.bin",
110
+ "model.layers.19.mlp.up_proj.weight": "pytorch_model-00010-of-00016.bin",
111
+ "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00010-of-00016.bin",
112
+ "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00010-of-00016.bin",
113
+ "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00010-of-00016.bin",
114
+ "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00010-of-00016.bin",
115
+ "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00010-of-00016.bin",
116
+ "model.layers.2.input_layernorm.weight": "pytorch_model-00002-of-00016.bin",
117
+ "model.layers.2.mlp.down_proj.weight": "pytorch_model-00002-of-00016.bin",
118
+ "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00002-of-00016.bin",
119
+ "model.layers.2.mlp.up_proj.weight": "pytorch_model-00002-of-00016.bin",
120
+ "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00002-of-00016.bin",
121
+ "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00002-of-00016.bin",
122
+ "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00002-of-00016.bin",
123
+ "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00002-of-00016.bin",
124
+ "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00002-of-00016.bin",
125
+ "model.layers.20.input_layernorm.weight": "pytorch_model-00010-of-00016.bin",
126
+ "model.layers.20.mlp.down_proj.weight": "pytorch_model-00010-of-00016.bin",
127
+ "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00010-of-00016.bin",
128
+ "model.layers.20.mlp.up_proj.weight": "pytorch_model-00010-of-00016.bin",
129
+ "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00010-of-00016.bin",
130
+ "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00010-of-00016.bin",
131
+ "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00010-of-00016.bin",
132
+ "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00010-of-00016.bin",
133
+ "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00010-of-00016.bin",
134
+ "model.layers.21.input_layernorm.weight": "pytorch_model-00011-of-00016.bin",
135
+ "model.layers.21.mlp.down_proj.weight": "pytorch_model-00011-of-00016.bin",
136
+ "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00011-of-00016.bin",
137
+ "model.layers.21.mlp.up_proj.weight": "pytorch_model-00011-of-00016.bin",
138
+ "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00011-of-00016.bin",
139
+ "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00011-of-00016.bin",
140
+ "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00011-of-00016.bin",
141
+ "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00011-of-00016.bin",
142
+ "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00011-of-00016.bin",
143
+ "model.layers.22.input_layernorm.weight": "pytorch_model-00011-of-00016.bin",
144
+ "model.layers.22.mlp.down_proj.weight": "pytorch_model-00011-of-00016.bin",
145
+ "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00011-of-00016.bin",
146
+ "model.layers.22.mlp.up_proj.weight": "pytorch_model-00011-of-00016.bin",
147
+ "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00011-of-00016.bin",
148
+ "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00011-of-00016.bin",
149
+ "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00011-of-00016.bin",
150
+ "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00011-of-00016.bin",
151
+ "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00011-of-00016.bin",
152
+ "model.layers.23.input_layernorm.weight": "pytorch_model-00012-of-00016.bin",
153
+ "model.layers.23.mlp.down_proj.weight": "pytorch_model-00012-of-00016.bin",
154
+ "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00012-of-00016.bin",
155
+ "model.layers.23.mlp.up_proj.weight": "pytorch_model-00012-of-00016.bin",
156
+ "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00012-of-00016.bin",
157
+ "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00011-of-00016.bin",
158
+ "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00012-of-00016.bin",
159
+ "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00011-of-00016.bin",
160
+ "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00012-of-00016.bin",
161
+ "model.layers.24.input_layernorm.weight": "pytorch_model-00012-of-00016.bin",
162
+ "model.layers.24.mlp.down_proj.weight": "pytorch_model-00012-of-00016.bin",
163
+ "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00012-of-00016.bin",
164
+ "model.layers.24.mlp.up_proj.weight": "pytorch_model-00012-of-00016.bin",
165
+ "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00012-of-00016.bin",
166
+ "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00012-of-00016.bin",
167
+ "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00012-of-00016.bin",
168
+ "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00012-of-00016.bin",
169
+ "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00012-of-00016.bin",
170
+ "model.layers.25.input_layernorm.weight": "pytorch_model-00013-of-00016.bin",
171
+ "model.layers.25.mlp.down_proj.weight": "pytorch_model-00013-of-00016.bin",
172
+ "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00013-of-00016.bin",
173
+ "model.layers.25.mlp.up_proj.weight": "pytorch_model-00013-of-00016.bin",
174
+ "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00013-of-00016.bin",
175
+ "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00012-of-00016.bin",
176
+ "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00012-of-00016.bin",
177
+ "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00012-of-00016.bin",
178
+ "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00012-of-00016.bin",
179
+ "model.layers.26.input_layernorm.weight": "pytorch_model-00013-of-00016.bin",
180
+ "model.layers.26.mlp.down_proj.weight": "pytorch_model-00013-of-00016.bin",
181
+ "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00013-of-00016.bin",
182
+ "model.layers.26.mlp.up_proj.weight": "pytorch_model-00013-of-00016.bin",
183
+ "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00013-of-00016.bin",
184
+ "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00013-of-00016.bin",
185
+ "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00013-of-00016.bin",
186
+ "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00013-of-00016.bin",
187
+ "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00013-of-00016.bin",
188
+ "model.layers.27.input_layernorm.weight": "pytorch_model-00014-of-00016.bin",
189
+ "model.layers.27.mlp.down_proj.weight": "pytorch_model-00014-of-00016.bin",
190
+ "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00013-of-00016.bin",
191
+ "model.layers.27.mlp.up_proj.weight": "pytorch_model-00014-of-00016.bin",
192
+ "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00014-of-00016.bin",
193
+ "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00013-of-00016.bin",
194
+ "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00013-of-00016.bin",
195
+ "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00013-of-00016.bin",
196
+ "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00013-of-00016.bin",
197
+ "model.layers.28.input_layernorm.weight": "pytorch_model-00014-of-00016.bin",
198
+ "model.layers.28.mlp.down_proj.weight": "pytorch_model-00014-of-00016.bin",
199
+ "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00014-of-00016.bin",
200
+ "model.layers.28.mlp.up_proj.weight": "pytorch_model-00014-of-00016.bin",
201
+ "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00014-of-00016.bin",
202
+ "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00014-of-00016.bin",
203
+ "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00014-of-00016.bin",
204
+ "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00014-of-00016.bin",
205
+ "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00014-of-00016.bin",
206
+ "model.layers.29.input_layernorm.weight": "pytorch_model-00015-of-00016.bin",
207
+ "model.layers.29.mlp.down_proj.weight": "pytorch_model-00015-of-00016.bin",
208
+ "model.layers.29.mlp.gate_proj.weight": "pytorch_model-00014-of-00016.bin",
209
+ "model.layers.29.mlp.up_proj.weight": "pytorch_model-00014-of-00016.bin",
210
+ "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00015-of-00016.bin",
211
+ "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00014-of-00016.bin",
212
+ "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00014-of-00016.bin",
213
+ "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00014-of-00016.bin",
214
+ "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00014-of-00016.bin",
215
+ "model.layers.3.input_layernorm.weight": "pytorch_model-00003-of-00016.bin",
216
+ "model.layers.3.mlp.down_proj.weight": "pytorch_model-00003-of-00016.bin",
217
+ "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00003-of-00016.bin",
218
+ "model.layers.3.mlp.up_proj.weight": "pytorch_model-00003-of-00016.bin",
219
+ "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00003-of-00016.bin",
220
+ "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00002-of-00016.bin",
221
+ "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00002-of-00016.bin",
222
+ "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00002-of-00016.bin",
223
+ "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00002-of-00016.bin",
224
+ "model.layers.30.input_layernorm.weight": "pytorch_model-00015-of-00016.bin",
225
+ "model.layers.30.mlp.down_proj.weight": "pytorch_model-00015-of-00016.bin",
226
+ "model.layers.30.mlp.gate_proj.weight": "pytorch_model-00015-of-00016.bin",
227
+ "model.layers.30.mlp.up_proj.weight": "pytorch_model-00015-of-00016.bin",
228
+ "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00015-of-00016.bin",
229
+ "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00015-of-00016.bin",
230
+ "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00015-of-00016.bin",
231
+ "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00015-of-00016.bin",
232
+ "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00015-of-00016.bin",
233
+ "model.layers.31.input_layernorm.weight": "pytorch_model-00015-of-00016.bin",
234
+ "model.layers.31.mlp.down_proj.weight": "pytorch_model-00015-of-00016.bin",
235
+ "model.layers.31.mlp.gate_proj.weight": "pytorch_model-00015-of-00016.bin",
236
+ "model.layers.31.mlp.up_proj.weight": "pytorch_model-00015-of-00016.bin",
237
+ "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00015-of-00016.bin",
238
+ "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00015-of-00016.bin",
239
+ "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00015-of-00016.bin",
240
+ "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00015-of-00016.bin",
241
+ "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00015-of-00016.bin",
242
+ "model.layers.4.input_layernorm.weight": "pytorch_model-00003-of-00016.bin",
243
+ "model.layers.4.mlp.down_proj.weight": "pytorch_model-00003-of-00016.bin",
244
+ "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00003-of-00016.bin",
245
+ "model.layers.4.mlp.up_proj.weight": "pytorch_model-00003-of-00016.bin",
246
+ "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00003-of-00016.bin",
247
+ "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00003-of-00016.bin",
248
+ "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00003-of-00016.bin",
249
+ "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00003-of-00016.bin",
250
+ "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00003-of-00016.bin",
251
+ "model.layers.5.input_layernorm.weight": "pytorch_model-00004-of-00016.bin",
252
+ "model.layers.5.mlp.down_proj.weight": "pytorch_model-00004-of-00016.bin",
253
+ "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00003-of-00016.bin",
254
+ "model.layers.5.mlp.up_proj.weight": "pytorch_model-00004-of-00016.bin",
255
+ "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00004-of-00016.bin",
256
+ "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00003-of-00016.bin",
257
+ "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00003-of-00016.bin",
258
+ "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00003-of-00016.bin",
259
+ "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00003-of-00016.bin",
260
+ "model.layers.6.input_layernorm.weight": "pytorch_model-00004-of-00016.bin",
261
+ "model.layers.6.mlp.down_proj.weight": "pytorch_model-00004-of-00016.bin",
262
+ "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00004-of-00016.bin",
263
+ "model.layers.6.mlp.up_proj.weight": "pytorch_model-00004-of-00016.bin",
264
+ "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00004-of-00016.bin",
265
+ "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00004-of-00016.bin",
266
+ "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00004-of-00016.bin",
267
+ "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00004-of-00016.bin",
268
+ "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00004-of-00016.bin",
269
+ "model.layers.7.input_layernorm.weight": "pytorch_model-00005-of-00016.bin",
270
+ "model.layers.7.mlp.down_proj.weight": "pytorch_model-00005-of-00016.bin",
271
+ "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00004-of-00016.bin",
272
+ "model.layers.7.mlp.up_proj.weight": "pytorch_model-00004-of-00016.bin",
273
+ "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00005-of-00016.bin",
274
+ "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00004-of-00016.bin",
275
+ "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00004-of-00016.bin",
276
+ "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00004-of-00016.bin",
277
+ "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00004-of-00016.bin",
278
+ "model.layers.8.input_layernorm.weight": "pytorch_model-00005-of-00016.bin",
279
+ "model.layers.8.mlp.down_proj.weight": "pytorch_model-00005-of-00016.bin",
280
+ "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00005-of-00016.bin",
281
+ "model.layers.8.mlp.up_proj.weight": "pytorch_model-00005-of-00016.bin",
282
+ "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00005-of-00016.bin",
283
+ "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00005-of-00016.bin",
284
+ "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00005-of-00016.bin",
285
+ "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00005-of-00016.bin",
286
+ "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00005-of-00016.bin",
287
+ "model.layers.9.input_layernorm.weight": "pytorch_model-00005-of-00016.bin",
288
+ "model.layers.9.mlp.down_proj.weight": "pytorch_model-00005-of-00016.bin",
289
+ "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00005-of-00016.bin",
290
+ "model.layers.9.mlp.up_proj.weight": "pytorch_model-00005-of-00016.bin",
291
+ "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00005-of-00016.bin",
292
+ "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00005-of-00016.bin",
293
+ "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00005-of-00016.bin",
294
+ "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00005-of-00016.bin",
295
+ "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00005-of-00016.bin",
296
+ "model.norm.weight": "pytorch_model-00015-of-00016.bin"
297
+ }
298
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": "<unk>"
24
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1363cd114e300eecf639f5e06624087f257bcbfed6820242aef9be6c8bb89de1
3
+ size 716289
tokenizer_config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<unk>",
5
+ "lstrip": false,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": true,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": true,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "45449": {
28
+ "content": "[PAD]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ }
35
+ },
36
+ "bos_token": "<s>",
37
+ "clean_up_tokenization_spaces": false,
38
+ "eos_token": "</s>",
39
+ "legacy": false,
40
+ "model_max_length": 1000000000000000019884624838656,
41
+ "pad_token": "[PAD]",
42
+ "sp_model_kwargs": {},
43
+ "spaces_between_special_tokens": false,
44
+ "tokenizer_class": "LlamaTokenizer",
45
+ "unk_token": "<unk>",
46
+ "use_default_system_prompt": false,
47
+ "use_fast": true
48
+ }