doberst commited on
Commit
1836906
·
verified ·
1 Parent(s): cea0db9

Upload 4 files

Browse files
Files changed (3) hide show
  1. README.md +41 -3
  2. config.json +4 -2
  3. hash_record_sha256.json +4 -0
README.md CHANGED
@@ -1,3 +1,41 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ inference: false
4
+ base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
5
+ base_model_relation: quantized
6
+ tags:
7
+ - green
8
+ - llmware-chat
9
+ - p1
10
+ - gguf
11
+ - emerald
12
+ ---
13
+
14
+ # tiny-llama-chat-gguf
15
+
16
+ **tiny-llama-chat-gguf** is an GGUF Q4_K_M int4 quantized version of TinyLlama-Chat, providing a very fast, very small inference implementation, optimized for AI PCs.
17
+
18
+ [**tiny-llama-chat**](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) is the official chat finetuned version of tiny-llama.
19
+
20
+
21
+ ### Model Description
22
+
23
+ - **Developed by:** TinyLlama
24
+ - **Quantized by:** llmware
25
+ - **Model type:** llama
26
+ - **Parameters:** 1.1 billion
27
+ - **Model Parent:** TinyLlama-1.1B-Chat-v1.0
28
+ - **Language(s) (NLP):** English
29
+ - **License:** Apache 2.0
30
+ - **Uses:** Chat and general purpose LLM
31
+ - **RAG Benchmark Accuracy Score:** NA
32
+ - **Quantization:** int4
33
+
34
+
35
+ ## Model Card Contact
36
+
37
+ [llmware on github](https://www.github.com/llmware-ai/llmware)
38
+
39
+ [llmware on hf](https://www.huggingface.co/llmware)
40
+
41
+ [llmware website](https://www.llmware.ai)
config.json CHANGED
@@ -1,8 +1,10 @@
1
  {
 
2
  "architectures": [
3
  "LlamaForCausalLM"
4
  ],
5
  "attention_bias": false,
 
6
  "bos_token_id": 1,
7
  "eos_token_id": 2,
8
  "hidden_act": "silu",
@@ -10,6 +12,7 @@
10
  "initializer_range": 0.02,
11
  "intermediate_size": 5632,
12
  "max_position_embeddings": 2048,
 
13
  "model_type": "llama",
14
  "num_attention_heads": 32,
15
  "num_hidden_layers": 22,
@@ -19,8 +22,7 @@
19
  "rope_scaling": null,
20
  "rope_theta": 10000.0,
21
  "tie_word_embeddings": false,
22
- "torch_dtype": "bfloat16",
23
- "transformers_version": "4.35.0",
24
  "use_cache": true,
25
  "vocab_size": 32000
26
  }
 
1
  {
2
+ "_name_or_path": "tiny-llama-chat-gguf",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
  "bos_token_id": 1,
9
  "eos_token_id": 2,
10
  "hidden_act": "silu",
 
12
  "initializer_range": 0.02,
13
  "intermediate_size": 5632,
14
  "max_position_embeddings": 2048,
15
+ "mlp_bias": false,
16
  "model_type": "llama",
17
  "num_attention_heads": 32,
18
  "num_hidden_layers": 22,
 
22
  "rope_scaling": null,
23
  "rope_theta": 10000.0,
24
  "tie_word_embeddings": false,
25
+ "transformers_version": "4.41.2",
 
26
  "use_cache": true,
27
  "vocab_size": 32000
28
  }
hash_record_sha256.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "tiny-llama-chat.gguf": "1f7b92940e5711806cf4ea62a4ae588261ffb29f05e77f6dec282ac1ac41fdca",
3
+ "time_stamp": "2025-02-08_151503"
4
+ }