Upload folder using huggingface_hub
Browse files- config.json +4 -45
- generation_config.json +2 -2
- model.safetensors.index.json +0 -0
- tokenizer.json +63 -4
config.json
CHANGED
|
@@ -1,51 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "/home/meta-llama/Meta-Llama-3.1-70B-Instruct",
|
| 3 |
"architectures": [
|
| 4 |
"LlamaForCausalLM"
|
| 5 |
],
|
| 6 |
"attention_bias": false,
|
| 7 |
"attention_dropout": 0.0,
|
| 8 |
"bos_token_id": 128000,
|
| 9 |
-
"compression_config": {
|
| 10 |
-
"config_groups": {
|
| 11 |
-
"group_0": {
|
| 12 |
-
"input_activations": {
|
| 13 |
-
"block_structure": null,
|
| 14 |
-
"dynamic": true,
|
| 15 |
-
"group_size": null,
|
| 16 |
-
"num_bits": 8,
|
| 17 |
-
"observer": "memoryless",
|
| 18 |
-
"observer_kwargs": {},
|
| 19 |
-
"strategy": "token",
|
| 20 |
-
"symmetric": true,
|
| 21 |
-
"type": "float"
|
| 22 |
-
},
|
| 23 |
-
"output_activations": null,
|
| 24 |
-
"targets": [
|
| 25 |
-
"Linear"
|
| 26 |
-
],
|
| 27 |
-
"weights": {
|
| 28 |
-
"block_structure": null,
|
| 29 |
-
"dynamic": false,
|
| 30 |
-
"group_size": null,
|
| 31 |
-
"num_bits": 8,
|
| 32 |
-
"observer": "minmax",
|
| 33 |
-
"observer_kwargs": {},
|
| 34 |
-
"strategy": "channel",
|
| 35 |
-
"symmetric": true,
|
| 36 |
-
"type": "float"
|
| 37 |
-
}
|
| 38 |
-
}
|
| 39 |
-
},
|
| 40 |
-
"format": "naive-quantized",
|
| 41 |
-
"global_compression_ratio": 1.2405356243234333,
|
| 42 |
-
"ignore": [
|
| 43 |
-
"lm_head"
|
| 44 |
-
],
|
| 45 |
-
"kv_cache_scheme": null,
|
| 46 |
-
"quant_method": "compressed-tensors",
|
| 47 |
-
"quantization_status": "frozen"
|
| 48 |
-
},
|
| 49 |
"eos_token_id": [
|
| 50 |
128001,
|
| 51 |
128008,
|
|
@@ -65,15 +24,15 @@
|
|
| 65 |
"rms_norm_eps": 1e-05,
|
| 66 |
"rope_scaling": {
|
| 67 |
"factor": 8.0,
|
| 68 |
-
"high_freq_factor": 4.0,
|
| 69 |
"low_freq_factor": 1.0,
|
|
|
|
| 70 |
"original_max_position_embeddings": 8192,
|
| 71 |
"rope_type": "llama3"
|
| 72 |
},
|
| 73 |
"rope_theta": 500000.0,
|
| 74 |
"tie_word_embeddings": false,
|
| 75 |
-
"torch_dtype": "
|
| 76 |
-
"transformers_version": "4.
|
| 77 |
"use_cache": true,
|
| 78 |
"vocab_size": 128256
|
| 79 |
-
}
|
|
|
|
| 1 |
{
|
|
|
|
| 2 |
"architectures": [
|
| 3 |
"LlamaForCausalLM"
|
| 4 |
],
|
| 5 |
"attention_bias": false,
|
| 6 |
"attention_dropout": 0.0,
|
| 7 |
"bos_token_id": 128000,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
"eos_token_id": [
|
| 9 |
128001,
|
| 10 |
128008,
|
|
|
|
| 24 |
"rms_norm_eps": 1e-05,
|
| 25 |
"rope_scaling": {
|
| 26 |
"factor": 8.0,
|
|
|
|
| 27 |
"low_freq_factor": 1.0,
|
| 28 |
+
"high_freq_factor": 4.0,
|
| 29 |
"original_max_position_embeddings": 8192,
|
| 30 |
"rope_type": "llama3"
|
| 31 |
},
|
| 32 |
"rope_theta": 500000.0,
|
| 33 |
"tie_word_embeddings": false,
|
| 34 |
+
"torch_dtype": "bfloat16",
|
| 35 |
+
"transformers_version": "4.42.3",
|
| 36 |
"use_cache": true,
|
| 37 |
"vocab_size": 128256
|
| 38 |
+
}
|
generation_config.json
CHANGED
|
@@ -8,5 +8,5 @@
|
|
| 8 |
],
|
| 9 |
"temperature": 0.6,
|
| 10 |
"top_p": 0.9,
|
| 11 |
-
"transformers_version": "4.
|
| 12 |
-
}
|
|
|
|
| 8 |
],
|
| 9 |
"temperature": 0.6,
|
| 10 |
"top_p": 0.9,
|
| 11 |
+
"transformers_version": "4.42.3"
|
| 12 |
+
}
|
model.safetensors.index.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer.json
CHANGED
|
@@ -2329,10 +2329,69 @@
|
|
| 2329 |
]
|
| 2330 |
},
|
| 2331 |
"post_processor": {
|
| 2332 |
-
"type": "
|
| 2333 |
-
"
|
| 2334 |
-
|
| 2335 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2336 |
},
|
| 2337 |
"decoder": {
|
| 2338 |
"type": "ByteLevel",
|
|
|
|
| 2329 |
]
|
| 2330 |
},
|
| 2331 |
"post_processor": {
|
| 2332 |
+
"type": "Sequence",
|
| 2333 |
+
"processors": [
|
| 2334 |
+
{
|
| 2335 |
+
"type": "ByteLevel",
|
| 2336 |
+
"add_prefix_space": true,
|
| 2337 |
+
"trim_offsets": false,
|
| 2338 |
+
"use_regex": true
|
| 2339 |
+
},
|
| 2340 |
+
{
|
| 2341 |
+
"type": "TemplateProcessing",
|
| 2342 |
+
"single": [
|
| 2343 |
+
{
|
| 2344 |
+
"SpecialToken": {
|
| 2345 |
+
"id": "<|begin_of_text|>",
|
| 2346 |
+
"type_id": 0
|
| 2347 |
+
}
|
| 2348 |
+
},
|
| 2349 |
+
{
|
| 2350 |
+
"Sequence": {
|
| 2351 |
+
"id": "A",
|
| 2352 |
+
"type_id": 0
|
| 2353 |
+
}
|
| 2354 |
+
}
|
| 2355 |
+
],
|
| 2356 |
+
"pair": [
|
| 2357 |
+
{
|
| 2358 |
+
"SpecialToken": {
|
| 2359 |
+
"id": "<|begin_of_text|>",
|
| 2360 |
+
"type_id": 0
|
| 2361 |
+
}
|
| 2362 |
+
},
|
| 2363 |
+
{
|
| 2364 |
+
"Sequence": {
|
| 2365 |
+
"id": "A",
|
| 2366 |
+
"type_id": 0
|
| 2367 |
+
}
|
| 2368 |
+
},
|
| 2369 |
+
{
|
| 2370 |
+
"SpecialToken": {
|
| 2371 |
+
"id": "<|begin_of_text|>",
|
| 2372 |
+
"type_id": 1
|
| 2373 |
+
}
|
| 2374 |
+
},
|
| 2375 |
+
{
|
| 2376 |
+
"Sequence": {
|
| 2377 |
+
"id": "B",
|
| 2378 |
+
"type_id": 1
|
| 2379 |
+
}
|
| 2380 |
+
}
|
| 2381 |
+
],
|
| 2382 |
+
"special_tokens": {
|
| 2383 |
+
"<|begin_of_text|>": {
|
| 2384 |
+
"id": "<|begin_of_text|>",
|
| 2385 |
+
"ids": [
|
| 2386 |
+
128000
|
| 2387 |
+
],
|
| 2388 |
+
"tokens": [
|
| 2389 |
+
"<|begin_of_text|>"
|
| 2390 |
+
]
|
| 2391 |
+
}
|
| 2392 |
+
}
|
| 2393 |
+
}
|
| 2394 |
+
]
|
| 2395 |
},
|
| 2396 |
"decoder": {
|
| 2397 |
"type": "ByteLevel",
|