nielsr HF staff commited on
Commit
c0612ab
·
verified ·
1 Parent(s): 6cdc4cf

Add pipeline tag and link to project page

Browse files

This PR improves the model card by:
- Adding the relevant `pipeline_tag`, ensuring people can find your model at https://huggingface.co/models?pipeline_tag=text-generation
- Adding a link to the project page at https://abdelfattah-lab.github.io/TokenButler/

Files changed (1) hide show
  1. README.md +129 -4
README.md CHANGED
@@ -1,9 +1,11 @@
1
  ---
2
- license: mit
3
- library_name: transformers
4
  base_model:
5
  - deepseek-ai/DeepSeek-R1-Distill-Llama-8B
 
 
 
6
  ---
 
7
  # TokenButler
8
  <!-- markdownlint-disable first-line-h1 -->
9
  <!-- markdownlint-disable html -->
@@ -17,7 +19,7 @@ base_model:
17
  <hr>
18
  <div align="center" style="line-height: 1;">
19
  <!-- Paper Badge -->
20
- <a href="https://github.com/abdelfattah-lab/TokenButler/blob/main/TokenButler_Draft.pdf" target="_blank" style="margin: 2px;">
21
  <img alt="Paper"
22
  src="https://img.shields.io/badge/Paper-View-orange?logo=readthedocs&logoColor=white"
23
  style="display: inline-block; vertical-align: middle;"/>
@@ -28,6 +30,12 @@ base_model:
28
  src="https://img.shields.io/badge/GitHub-Repo-black?logo=github&logoColor=white"
29
  style="display: inline-block; vertical-align: middle;"/>
30
  </a>
 
 
 
 
 
 
31
  </div>
32
 
33
  <br>
@@ -72,4 +80,121 @@ model = set_sparsity(model, "fixed_60pc")
72
  # Custom Synthetic Task
73
  <div align="center">
74
  <img src="https://github.com/abdelfattah-lab/TokenButler/blob/main/figs/datasetfig.png?raw=true" width="100%" alt="Synthetic Tasks" />
75
- </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
 
 
2
  base_model:
3
  - deepseek-ai/DeepSeek-R1-Distill-Llama-8B
4
+ library_name: transformers
5
+ license: mit
6
+ pipeline_tag: text-generation
7
  ---
8
+
9
  # TokenButler
10
  <!-- markdownlint-disable first-line-h1 -->
11
  <!-- markdownlint-disable html -->
 
19
  <hr>
20
  <div align="center" style="line-height: 1;">
21
  <!-- Paper Badge -->
22
+ <a href="https://arxiv.org/abs/2503.07518" target="_blank" style="margin: 2px;">
23
  <img alt="Paper"
24
  src="https://img.shields.io/badge/Paper-View-orange?logo=readthedocs&logoColor=white"
25
  style="display: inline-block; vertical-align: middle;"/>
 
30
  src="https://img.shields.io/badge/GitHub-Repo-black?logo=github&logoColor=white"
31
  style="display: inline-block; vertical-align: middle;"/>
32
  </a>
33
+ <!-- Project Page Badge -->
34
+ <a href="https://abdelfattah-lab.github.io/TokenButler/" target="_blank" style="margin: 2px;">
35
+ <img alt="Project Page"
36
+ src="https://img.shields.io/badge/Project%20Page-🌐-lightgrey"
37
+ style="display: inline-block; vertical-align: middle;"/>
38
+ </a>
39
  </div>
40
 
41
  <br>
 
80
  # Custom Synthetic Task
81
  <div align="center">
82
  <img src="https://github.com/abdelfattah-lab/TokenButler/blob/main/figs/datasetfig.png?raw=true" width="100%" alt="Synthetic Tasks" />
83
+ </div>
84
+
85
+ # File information
86
+
87
+ The repository contains the following file information:
88
+
89
+ Filename: tokenizer.json
90
+ Content: "Content of the file is larger than 50 KB, too long to display."
91
+
92
+ Filename: pytorch_model.bin.index.json
93
+ Content: "Content of the file is larger than 50 KB, too long to display."
94
+
95
+ Filename: generation_config.json
96
+ Content: {
97
+ "_from_model_config": true,
98
+ "bos_token_id": 128000,
99
+ "eos_token_id": 128001,
100
+ "transformers_version": "4.48.3"
101
+ }
102
+
103
+ Filename: tokenizer_config.json
104
+ Content: {
105
+ "add_bos_token": true,
106
+ "add_eos_token": false,
107
+ "bos_token": {
108
+ "__type": "AddedToken",
109
+ "content": "<\uff5cbegin\u2581of\u2581sentence\uff5c>",
110
+ "lstrip": false,
111
+ "normalized": true,
112
+ "rstrip": false,
113
+ "single_word": false
114
+ },
115
+ "clean_up_tokenization_spaces": false,
116
+ "eos_token": {
117
+ "__type": "AddedToken",
118
+ "content": "<\uff5cend\u2581of\u2581sentence\uff5c>",
119
+ "lstrip": false,
120
+ "normalized": true,
121
+ "rstrip": false,
122
+ "single_word": false
123
+ },
124
+ "legacy": true,
125
+ "model_max_length": 16384,
126
+ "pad_token": {
127
+ "__type": "AddedToken",
128
+ "content": "<\uff5cend\u2581of\u2581sentence\uff5c>",
129
+ "lstrip": false,
130
+ "normalized": true,
131
+ "rstrip": false,
132
+ "single_word": false
133
+ },
134
+ "sp_model_kwargs": {},
135
+ "unk_token": null,
136
+ "tokenizer_class": "LlamaTokenizerFast",
137
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<\uff5cUser\uff5c>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<\uff5cAssistant\uff5c><\uff5ctool\u2581calls\u2581begin\uff5c><\uff5ctool\u2581call\u2581begin\uff5c>' + tool['type'] + '<\uff5ctool\u2581sep\uff5c>' + tool['function']['name'] + '\
138
+ ' + '```json' + '\
139
+ ' + tool['function']['arguments'] + '\
140
+ ' + '```' + '<\uff5ctool\u2581call\u2581end\uff5c>'}}{%- set ns.is_first = true -%}{%- else %}{{'\
141
+ ' + '<\uff5ctool\u2581call\u2581begin\uff5c>' + tool['type'] + '<\uff5ctool\u2581sep\uff5c>' + tool['function']['name'] + '\
142
+ ' + '```json' + '\
143
+ ' + tool['function']['arguments'] + '\
144
+ ' + '```' + '<\uff5ctool\u2581call\u2581end\uff5c>'}}{{'<\uff5ctool\u2581calls\u2581end\uff5c><\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<\uff5ctool\u2581outputs\u2581end\uff5c>' + message['content'] + '<\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{{'<\uff5cAssistant\uff5c>' + content + '<\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- endif %}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<\uff5ctool\u2581outputs\u2581begin\uff5c><\uff5ctool\u2581output\u2581begin\uff5c>' + message['content'] + '<\uff5ctool\u2581output\u2581end\uff5c>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\
145
+ <\uff5ctool\u2581output\u2581begin\uff5c>' + message['content'] + '<\uff5ctool\u2581output\u2581end\uff5c>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<\uff5ctool\u2581outputs\u2581end\uff5c>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<\uff5cAssistant\uff5c><think>\
146
+ '}}{% endif %}"
147
+ }
148
+
149
+ Filename: config.json
150
+ Content: {
151
+ "architectures": [
152
+ "modeling_llama_butler.LlamaButlerForCausalLM"
153
+ ],
154
+ "attention_bias": false,
155
+ "attention_dropout": 0.0,
156
+ "attn_reduce_factor": 8,
157
+ "auto_map": {
158
+ "AutoConfig": "modeling_llama_butler.LlamaButlerConfig",
159
+ "AutoModel": "modeling_llama_butler.LlamaButlerForCausalLM",
160
+ "AutoModelForCausalLM": "modeling_llama_butler.LlamaButlerForCausalLM"
161
+ },
162
+ "bos_token_id": 128000,
163
+ "dDash": 32,
164
+ "eos_token_id": 128001,
165
+ "eval_llm_mode": "ExpPred",
166
+ "flash_attn": false,
167
+ "head_attn_reduce_factor": 2,
168
+ "head_dim": 128,
169
+ "hidden_act": "silu",
170
+ "hidden_size": 4096,
171
+ "initializer_range": 0.02,
172
+ "intdim": 1024,
173
+ "intermediate_size": 14336,
174
+ "lookahead": 0,
175
+ "max_position_embeddings": 131072,
176
+ "min_sparse_index": 8,
177
+ "mlp_bias": false,
178
+ "model_type": "llama_butler",
179
+ "num_attention_heads": 32,
180
+ "num_hidden_layers": 32,
181
+ "num_key_value_heads": 8,
182
+ "pretraining_tp": 1,
183
+ "producer_frequency": 32,
184
+ "rms_norm_eps": 1e-05,
185
+ "rope_scaling": {
186
+ "factor": 8.0,
187
+ "high_freq_factor": 4.0,
188
+ "low_freq_factor": 1.0,
189
+ "original_max_position_embeddings": 8192,
190
+ "rope_type": "llama3"
191
+ },
192
+ "rope_theta": 500000.0,
193
+ "sliding_window": 128,
194
+ "token_sparse_method": "fixed_50pc",
195
+ "torch_dtype": "float32",
196
+ "train_headpredictor": false,
197
+ "transformers_version": "4.48.3",
198
+ "use_cache": true,
199
+ "vocab_size": 128256
200
+ }