Add pipeline tag and link to project page

This PR improves the model card by:
- Adding the relevant `pipeline_tag`, ensuring people can find your model at https://huggingface.co/models?pipeline_tag=text-generation
- Adding a link to the project page at https://abdelfattah-lab.github.io/TokenButler/

Files changed (1) hide show

README.md +129 -4

README.md CHANGED Viewed

@@ -1,9 +1,11 @@
 ---
-license: mit
-library_name: transformers
 base_model:
 - deepseek-ai/DeepSeek-R1-Distill-Llama-8B
 ---
 # TokenButler
 <!-- markdownlint-disable first-line-h1 -->
 <!-- markdownlint-disable html -->
@@ -17,7 +19,7 @@ base_model:
 <hr>
 <div align="center" style="line-height: 1;">
   <!-- Paper Badge -->
-  <a href="https://github.com/abdelfattah-lab/TokenButler/blob/main/TokenButler_Draft.pdf" target="_blank" style="margin: 2px;">
     <img alt="Paper"
          src="https://img.shields.io/badge/Paper-View-orange?logo=readthedocs&logoColor=white"
          style="display: inline-block; vertical-align: middle;"/>
@@ -28,6 +30,12 @@ base_model:
          src="https://img.shields.io/badge/GitHub-Repo-black?logo=github&logoColor=white"
          style="display: inline-block; vertical-align: middle;"/>
   </a>
 </div>
 <br>
@@ -72,4 +80,121 @@ model = set_sparsity(model, "fixed_60pc")
 # Custom Synthetic Task
 <div align="center">
   <img src="https://github.com/abdelfattah-lab/TokenButler/blob/main/figs/datasetfig.png?raw=true" width="100%" alt="Synthetic Tasks" />
-</div>

 ---
 base_model:
 - deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+library_name: transformers
+license: mit
+pipeline_tag: text-generation
 ---
 # TokenButler
 <!-- markdownlint-disable first-line-h1 -->
 <!-- markdownlint-disable html -->
 <hr>
 <div align="center" style="line-height: 1;">
   <!-- Paper Badge -->
+  <a href="https://arxiv.org/abs/2503.07518" target="_blank" style="margin: 2px;">
     <img alt="Paper"
          src="https://img.shields.io/badge/Paper-View-orange?logo=readthedocs&logoColor=white"
          style="display: inline-block; vertical-align: middle;"/>
          src="https://img.shields.io/badge/GitHub-Repo-black?logo=github&logoColor=white"
          style="display: inline-block; vertical-align: middle;"/>
   </a>
+    <!-- Project Page Badge -->
+  <a href="https://abdelfattah-lab.github.io/TokenButler/" target="_blank" style="margin: 2px;">
+    <img alt="Project Page"
+         src="https://img.shields.io/badge/Project%20Page-🌐-lightgrey"
+         style="display: inline-block; vertical-align: middle;"/>
+  </a>
 </div>
 <br>
 # Custom Synthetic Task
 <div align="center">
   <img src="https://github.com/abdelfattah-lab/TokenButler/blob/main/figs/datasetfig.png?raw=true" width="100%" alt="Synthetic Tasks" />
+</div>
+# File information
+The repository contains the following file information:
+Filename: tokenizer.json
+Content: "Content of the file is larger than 50 KB, too long to display."
+Filename: pytorch_model.bin.index.json
+Content: "Content of the file is larger than 50 KB, too long to display."
+Filename: generation_config.json
+Content: {
+  "_from_model_config": true,
+  "bos_token_id": 128000,
+  "eos_token_id": 128001,
+  "transformers_version": "4.48.3"
+}
+Filename: tokenizer_config.json
+Content: {
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "<\uff5cbegin\u2581of\u2581sentence\uff5c>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "clean_up_tokenization_spaces": false,
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "<\uff5cend\u2581of\u2581sentence\uff5c>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "legacy": true,
+  "model_max_length": 16384,
+  "pad_token": {
+    "__type": "AddedToken",
+    "content": "<\uff5cend\u2581of\u2581sentence\uff5c>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sp_model_kwargs": {},
+  "unk_token": null,
+  "tokenizer_class": "LlamaTokenizerFast",
+  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<\uff5cUser\uff5c>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<\uff5cAssistant\uff5c><\uff5ctool\u2581calls\u2581begin\uff5c><\uff5ctool\u2581call\u2581begin\uff5c>' + tool['type'] + '<\uff5ctool\u2581sep\uff5c>' + tool['function']['name'] + '\
+' + '```json' + '\
+' + tool['function']['arguments'] + '\
+' + '```' + '<\uff5ctool\u2581call\u2581end\uff5c>'}}{%- set ns.is_first = true -%}{%- else %}{{'\
+' + '<\uff5ctool\u2581call\u2581begin\uff5c>' + tool['type'] + '<\uff5ctool\u2581sep\uff5c>' + tool['function']['name'] + '\
+' + '```json' + '\
+' + tool['function']['arguments'] + '\
+' + '```' + '<\uff5ctool\u2581call\u2581end\uff5c>'}}{{'<\uff5ctool\u2581calls\u2581end\uff5c><\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<\uff5ctool\u2581outputs\u2581end\uff5c>' + message['content'] + '<\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{{'<\uff5cAssistant\uff5c>' + content + '<\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- endif %}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<\uff5ctool\u2581outputs\u2581begin\uff5c><\uff5ctool\u2581output\u2581begin\uff5c>' + message['content'] + '<\uff5ctool\u2581output\u2581end\uff5c>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\
+<\uff5ctool\u2581output\u2581begin\uff5c>' + message['content'] + '<\uff5ctool\u2581output\u2581end\uff5c>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<\uff5ctool\u2581outputs\u2581end\uff5c>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<\uff5cAssistant\uff5c><think>\
+'}}{% endif %}"
+}
+Filename: config.json
+Content: {
+  "architectures": [
+    "modeling_llama_butler.LlamaButlerForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attn_reduce_factor": 8,
+  "auto_map": {
+    "AutoConfig": "modeling_llama_butler.LlamaButlerConfig",
+    "AutoModel": "modeling_llama_butler.LlamaButlerForCausalLM",
+    "AutoModelForCausalLM": "modeling_llama_butler.LlamaButlerForCausalLM"
+  },
+  "bos_token_id": 128000,
+  "dDash": 32,
+  "eos_token_id": 128001,
+  "eval_llm_mode": "ExpPred",
+  "flash_attn": false,
+  "head_attn_reduce_factor": 2,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intdim": 1024,
+  "intermediate_size": 14336,
+  "lookahead": 0,
+  "max_position_embeddings": 131072,
+  "min_sparse_index": 8,
+  "mlp_bias": false,
+  "model_type": "llama_butler",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
+  "pretraining_tp": 1,
+  "producer_frequency": 32,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 8.0,
+    "high_freq_factor": 4.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "sliding_window": 128,
+  "token_sparse_method": "fixed_50pc",
+  "torch_dtype": "float32",
+  "train_headpredictor": false,
+  "transformers_version": "4.48.3",
+  "use_cache": true,
+  "vocab_size": 128256
+}