ayjays132 commited on
Commit
e5babdd
·
verified ·
1 Parent(s): 3e45498

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +159 -158
config.json CHANGED
@@ -1,158 +1,159 @@
1
- {
2
- "_name_or_path": "ayjays132/CustomGPT2Conversational",
3
- "activation_function": "gelu_new",
4
- "advanced_model_options": {
5
- "contextual_embeddings": {
6
- "approaches": [
7
- "contextual_attention_mechanisms",
8
- "semantic_embedding_regularization"
9
- ],
10
- "enable": true
11
- },
12
- "dynamic_adaptation": {
13
- "enable": true,
14
- "techniques": [
15
- "adaptive_layer_dropping",
16
- "dynamic_context_window"
17
- ]
18
- },
19
- "innovative_neuron_growth": {
20
- "enable": true,
21
- "strategies": [
22
- "selective_neuron_pruning",
23
- "progressive_neuron_expansion"
24
- ]
25
- },
26
- "memory_optimization": {
27
- "enable": true,
28
- "methods": [
29
- "gradient_checkpointing",
30
- "memory-efficient_attention"
31
- ]
32
- },
33
- "meta_learning": {
34
- "approaches": [
35
- "meta_learning_rate_adjustment",
36
- "online_adaptation"
37
- ],
38
- "enable": true
39
- },
40
- "secret_advanced_options": {
41
- "adaptive_token_embedding": {
42
- "enable": true,
43
- "strategies": [
44
- "dynamic_embedding_resizing",
45
- "contextual_embedding_scaling"
46
- ]
47
- },
48
- "future_context_prediction": {
49
- "enable": true,
50
- "techniques": [
51
- "lookahead_context_integration",
52
- "predictive_attention_mechanisms"
53
- ]
54
- },
55
- "multi_modal_integration": {
56
- "enable": true,
57
- "methods": [
58
- "text_image_alignment",
59
- "cross_modal_attention"
60
- ]
61
- }
62
- }
63
- },
64
- "architectures": [
65
- "GPT2LMHeadModel"
66
- ],
67
- "attn_pdrop": 0.1,
68
- "bos_token_id": 50267,
69
- "context_window": 20,
70
- "contextual_embedding_dim": 1024,
71
- "device": "cuda",
72
- "dropout_rate": 0.1,
73
- "early_stopping": true,
74
- "embd_pdrop": 0.1,
75
- "embedding_dim": 1024,
76
- "eos_token_id": 50267,
77
- "hidden_dim": 1024,
78
- "initializer_range": 0.02,
79
- "innovative_growth_capacity": 50000,
80
- "integration_settings": {
81
- "config_name": "config.json",
82
- "load_from_transformers": true,
83
- "pytorch_dump_folder_path": "./model_save",
84
- "pytorch_model_bin_name": "pytorch_model.bin"
85
- },
86
- "layer_norm_epsilon": 1e-05,
87
- "max_length": 1024,
88
- "max_memory_size": 100000,
89
- "max_neurons": 100,
90
- "meta_learning_rate": 0.001,
91
- "min_length": 50,
92
- "model_type": "gpt2",
93
- "n_ctx": 1024,
94
- "n_embd": 1024,
95
- "n_head": 16,
96
- "n_inner": null,
97
- "n_layer": 24,
98
- "n_positions": 1024,
99
- "no_repeat_ngram_size": 2,
100
- "num_beams": 5,
101
- "num_embeddings": 50257,
102
- "num_heads": 64,
103
- "num_layers": 24,
104
- "output_attentions": true,
105
- "output_hidden_states": true,
106
- "pad_token_id": 50267,
107
- "planning_enabled": true,
108
- "planning_layers": 2,
109
- "reasoning_enabled": true,
110
- "reasoning_layers": 2,
111
- "reorder_and_upcast_attn": false,
112
- "resid_pdrop": 0.1,
113
- "scale_attn_by_inverse_layer_idx": false,
114
- "scale_attn_weights": true,
115
- "sep_token_id": 50267,
116
- "special_tokens": {
117
- "additional_special_tokens": [
118
- "<greeting>",
119
- "<farewell>",
120
- "<thank>",
121
- "<apology>"
122
- ],
123
- "bos_token": "<bos>",
124
- "cls_token": "<cls>",
125
- "eos_token": "<eos>",
126
- "mask_token": "<mask>",
127
- "pad_token": "<pad>",
128
- "sep_token": "<sep>",
129
- "unk_token": "<unk>"
130
- },
131
- "state_shape": null,
132
- "summary_activation": null,
133
- "summary_first_dropout": 0.1,
134
- "summary_proj_to_labels": true,
135
- "summary_type": "cls_index",
136
- "summary_use_proj": true,
137
- "target_q_model": null,
138
- "task_specific_params": {
139
- "text-generation": {
140
- "do_sample": true,
141
- "early_stopping": true,
142
- "length_penalty": 1.0,
143
- "max_length": 2048,
144
- "min_length": 64,
145
- "no_repeat_ngram_size": 2,
146
- "num_beams": 8,
147
- "num_return_sequences": 3,
148
- "repetition_penalty": 1.2,
149
- "temperature": 0.9,
150
- "top_k": 50,
151
- "top_p": 0.95
152
- }
153
- },
154
- "torch_dtype": "float32",
155
- "transformers_version": "4.44.2",
156
- "use_cache": true,
157
- "vocab_size": 50257
158
- }
 
 
1
+ {
2
+ "_name_or_path": "ayjays132/CustomGPT2Conversational",
3
+ "model_type": "gpt2",
4
+ "activation_function": "gelu_new",
5
+ "advanced_model_options": {
6
+ "contextual_embeddings": {
7
+ "approaches": [
8
+ "contextual_attention_mechanisms",
9
+ "semantic_embedding_regularization"
10
+ ],
11
+ "enable": true
12
+ },
13
+ "dynamic_adaptation": {
14
+ "enable": true,
15
+ "techniques": [
16
+ "adaptive_layer_dropping",
17
+ "dynamic_context_window"
18
+ ]
19
+ },
20
+ "innovative_neuron_growth": {
21
+ "enable": true,
22
+ "strategies": [
23
+ "selective_neuron_pruning",
24
+ "progressive_neuron_expansion"
25
+ ]
26
+ },
27
+ "memory_optimization": {
28
+ "enable": true,
29
+ "methods": [
30
+ "gradient_checkpointing",
31
+ "memory-efficient_attention"
32
+ ]
33
+ },
34
+ "meta_learning": {
35
+ "approaches": [
36
+ "meta_learning_rate_adjustment",
37
+ "online_adaptation"
38
+ ],
39
+ "enable": true
40
+ },
41
+ "secret_advanced_options": {
42
+ "adaptive_token_embedding": {
43
+ "enable": true,
44
+ "strategies": [
45
+ "dynamic_embedding_resizing",
46
+ "contextual_embedding_scaling"
47
+ ]
48
+ },
49
+ "future_context_prediction": {
50
+ "enable": true,
51
+ "techniques": [
52
+ "lookahead_context_integration",
53
+ "predictive_attention_mechanisms"
54
+ ]
55
+ },
56
+ "multi_modal_integration": {
57
+ "enable": true,
58
+ "methods": [
59
+ "text_image_alignment",
60
+ "cross_modal_attention"
61
+ ]
62
+ }
63
+ }
64
+ },
65
+ "architectures": [
66
+ "GPT2LMHeadModel"
67
+ ],
68
+ "attn_pdrop": 0.1,
69
+ "bos_token_id": 50267,
70
+ "context_window": 20,
71
+ "contextual_embedding_dim": 1024,
72
+ "device": "cuda",
73
+ "dropout_rate": 0.1,
74
+ "early_stopping": true,
75
+ "embd_pdrop": 0.1,
76
+ "embedding_dim": 1024,
77
+ "eos_token_id": 50267,
78
+ "hidden_dim": 1024,
79
+ "initializer_range": 0.02,
80
+ "innovative_growth_capacity": 50000,
81
+ "integration_settings": {
82
+ "config_name": "config.json",
83
+ "load_from_transformers": true,
84
+ "pytorch_dump_folder_path": "./model_save",
85
+ "pytorch_model_bin_name": "pytorch_model.bin"
86
+ },
87
+ "layer_norm_epsilon": 1e-05,
88
+ "max_length": 1024,
89
+ "max_memory_size": 100000,
90
+ "max_neurons": 100,
91
+ "meta_learning_rate": 0.001,
92
+ "min_length": 50,
93
+ "model_type": "gpt2",
94
+ "n_ctx": 1024,
95
+ "n_embd": 1024,
96
+ "n_head": 16,
97
+ "n_inner": null,
98
+ "n_layer": 24,
99
+ "n_positions": 1024,
100
+ "no_repeat_ngram_size": 2,
101
+ "num_beams": 5,
102
+ "num_embeddings": 50257,
103
+ "num_heads": 64,
104
+ "num_layers": 24,
105
+ "output_attentions": true,
106
+ "output_hidden_states": true,
107
+ "pad_token_id": 50267,
108
+ "planning_enabled": true,
109
+ "planning_layers": 2,
110
+ "reasoning_enabled": true,
111
+ "reasoning_layers": 2,
112
+ "reorder_and_upcast_attn": false,
113
+ "resid_pdrop": 0.1,
114
+ "scale_attn_by_inverse_layer_idx": false,
115
+ "scale_attn_weights": true,
116
+ "sep_token_id": 50267,
117
+ "special_tokens": {
118
+ "additional_special_tokens": [
119
+ "<greeting>",
120
+ "<farewell>",
121
+ "<thank>",
122
+ "<apology>"
123
+ ],
124
+ "bos_token": "<bos>",
125
+ "cls_token": "<cls>",
126
+ "eos_token": "<eos>",
127
+ "mask_token": "<mask>",
128
+ "pad_token": "<pad>",
129
+ "sep_token": "<sep>",
130
+ "unk_token": "<unk>"
131
+ },
132
+ "state_shape": null,
133
+ "summary_activation": null,
134
+ "summary_first_dropout": 0.1,
135
+ "summary_proj_to_labels": true,
136
+ "summary_type": "cls_index",
137
+ "summary_use_proj": true,
138
+ "target_q_model": null,
139
+ "task_specific_params": {
140
+ "text-generation": {
141
+ "do_sample": true,
142
+ "early_stopping": true,
143
+ "length_penalty": 1.0,
144
+ "max_length": 2048,
145
+ "min_length": 64,
146
+ "no_repeat_ngram_size": 2,
147
+ "num_beams": 8,
148
+ "num_return_sequences": 3,
149
+ "repetition_penalty": 1.2,
150
+ "temperature": 0.9,
151
+ "top_k": 50,
152
+ "top_p": 0.95
153
+ }
154
+ },
155
+ "torch_dtype": "float32",
156
+ "transformers_version": "4.44.2",
157
+ "use_cache": true,
158
+ "vocab_size": 50257
159
+ }