sausheong commited on
Commit
c4a4b04
·
1 Parent(s): 2f18517

initial commit

Browse files
README.md CHANGED
@@ -109,55 +109,6 @@ generated_text = tokenizer.decode(outputs[0].tolist())
109
  print(generated_text)
110
  ```
111
 
112
- ### Generation Strategies
113
-
114
- The model supports multiple generation strategies optimized for different use cases:
115
-
116
- #### 1. Focused Generation (Low Temperature)
117
- ```python
118
- # Best for factual, coherent content
119
- outputs = model.generate(
120
- input_ids,
121
- max_new_tokens=128,
122
- do_sample=True,
123
- temperature=0.3,
124
- top_p=0.8,
125
- top_k=20,
126
- repetition_penalty=1.3,
127
- no_repeat_ngram_size=5,
128
- )
129
- ```
130
-
131
- #### 2. Balanced Generation (Medium Temperature)
132
- ```python
133
- # Good balance of creativity and coherence
134
- outputs = model.generate(
135
- input_ids,
136
- max_new_tokens=128,
137
- do_sample=True,
138
- temperature=0.7,
139
- top_p=0.9,
140
- top_k=40,
141
- repetition_penalty=1.2,
142
- no_repeat_ngram_size=4,
143
- )
144
- ```
145
-
146
- #### 3. Creative Generation (High Temperature)
147
- ```python
148
- # Most creative and diverse outputs
149
- outputs = model.generate(
150
- input_ids,
151
- max_new_tokens=128,
152
- do_sample=True,
153
- temperature=0.9,
154
- top_p=0.95,
155
- top_k=50,
156
- repetition_penalty=1.15,
157
- no_repeat_ngram_size=3,
158
- )
159
- ```
160
-
161
  ## Performance
162
 
163
 
 
109
  print(generated_text)
110
  ```
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  ## Performance
113
 
114
 
config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SSLLMForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.1,
6
+ "bos_token_id": 100256,
7
+ "d_ff": 2560,
8
+ "d_model": 768,
9
+ "dropout_rate": 0.1,
10
+ "eos_token_id": 100257,
11
+ "hidden_size": 768,
12
+ "intermediate_size": 2560,
13
+ "max_position_embeddings": 1024,
14
+ "max_seq_len": 1024,
15
+ "model_type": "ssllm",
16
+ "num_attention_heads": 12,
17
+ "num_heads": 12,
18
+ "num_hidden_layers": 10,
19
+ "num_layers": 10,
20
+ "pad_token_id": 100257,
21
+ "stochastic_depth_rate": 0.1,
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.52.4",
24
+ "vocab_size": 100277
25
+ }
generation_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 100256,
3
+ "eos_token_id": 100257,
4
+ "pad_token_id": 100257,
5
+ "max_length": 1024,
6
+ "do_sample": true,
7
+ "temperature": 0.7,
8
+ "top_p": 0.9,
9
+ "repetition_penalty": 1.1,
10
+ "no_repeat_ngram_size": 3
11
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c2a35c0354a62e08a22386cd7ed6844fb44cf6a840a252d3250d6d54ebe85a6
3
+ size 871704100
tokenizer_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "tokenizer_class": "tiktoken",
3
+ "model_name": "cl100k_base",
4
+ "vocab_size": 100277,
5
+ "bos_token": "",
6
+ "eos_token": "",
7
+ "pad_token": "",
8
+ "unk_token": "",
9
+ "mask_token": "",
10
+ "additional_special_tokens": []
11
+ }
tokenizer_info.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Tokenizer: cl100k_base (tiktoken)
2
+ Vocabulary size: 100277
3
+ BOS token ID: 100256
4
+ EOS token ID: 100257
5
+ PAD token ID: 100257