File size: 10,382 Bytes
1393b01 796d506 0585716 796d506 26a1157 7295302 ad03828 796d506 64e99f5 796d506 7295302 211a715 26a1157 796d506 7295302 c1d0430 796d506 7295302 796d506 6ef37cd 922a193 1393b01 75d1ef3 0585716 796d506 0585716 75d1ef3 796d506 211a715 ad03828 0585716 796d506 2a7f249 75d1ef3 1393b01 75d1ef3 b323e3d 5888100 796d506 64e99f5 75d1ef3 6188097 0585716 75d1ef3 0585716 75d1ef3 0585716 75d1ef3 0585716 211a715 011301a 8be82f3 e7f2f83 796d506 205190d 75d1ef3 b323e3d 75d1ef3 0585716 75d1ef3 0585716 75d1ef3 6ef37cd 75d1ef3 ad03828 b323e3d ad03828 1393b01 ad03828 0585716 ad03828 011301a ad03828 b8352d5 64e99f5 ad03828 6ef37cd 26a1157 13fef30 26a1157 75d1ef3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 |
import os
import pathlib
import random
import string
import tempfile
import time
from concurrent.futures import ThreadPoolExecutor
from typing import Iterable, List
import gradio as gr
import huggingface_hub
import torch
import yaml
from gradio_logsview.logsview import Log, LogsView, LogsViewRunner
from mergekit.config import MergeConfiguration
from clean_community_org import garbage_collect_empty_models
has_gpu = torch.cuda.is_available()
cli = "mergekit-yaml config.yaml merge --copy-tokenizer" + (
" --cuda --low-cpu-memory --allow-crimes" if has_gpu else " --allow-crimes --out-shard-size 1B --lazy-unpickle"
)
MARKDOWN_DESCRIPTION = """
# mergekit-gui
The fastest way to perform a model merge 🔥
Specify a YAML configuration file (see examples below) and a HF token and this app will perform the merge and upload the merged model to your user profile.
"""
MARKDOWN_ARTICLE = """
___
## Merge Configuration
[Mergekit](https://github.com/arcee-ai/mergekit) configurations are YAML documents specifying the operations to perform in order to produce your merged model.
Below are the primary elements of a configuration file:
- `merge_method`: Specifies the method to use for merging models. See [Merge Methods](https://github.com/arcee-ai/mergekit#merge-methods) for a list.
- `slices`: Defines slices of layers from different models to be used. This field is mutually exclusive with `models`.
- `models`: Defines entire models to be used for merging. This field is mutually exclusive with `slices`.
- `base_model`: Specifies the base model used in some merging methods.
- `parameters`: Holds various parameters such as weights and densities, which can also be specified at different levels of the configuration.
- `dtype`: Specifies the data type used for the merging operation.
- `tokenizer_source`: Determines how to construct a tokenizer for the merged model.
## Merge Methods
A quick overview of the currently supported merge methods:
| Method | `merge_method` value | Multi-Model | Uses base model |
| -------------------------------------------------------------------------------------------- | -------------------- | ----------- | --------------- |
| Linear ([Model Soups](https://arxiv.org/abs/2203.05482)) | `linear` | ✅ | ❌ |
| SLERP | `slerp` | ❌ | ✅ |
| [Task Arithmetic](https://arxiv.org/abs/2212.04089) | `task_arithmetic` | ✅ | ✅ |
| [TIES](https://arxiv.org/abs/2306.01708) | `ties` | ✅ | ✅ |
| [DARE](https://arxiv.org/abs/2311.03099) [TIES](https://arxiv.org/abs/2306.01708) | `dare_ties` | ✅ | ✅ |
| [DARE](https://arxiv.org/abs/2311.03099) [Task Arithmetic](https://arxiv.org/abs/2212.04089) | `dare_linear` | ✅ | ✅ |
| Passthrough | `passthrough` | ❌ | ❌ |
| [Model Stock](https://arxiv.org/abs/2403.19522) | `model_stock` | ✅ | ✅ |
## Citation
This GUI is powered by [Arcee's MergeKit](https://arxiv.org/abs/2403.13257).
If you use it in your research, please cite the following paper:
@article{goddard2024arcee,
title={Arcee's MergeKit: A Toolkit for Merging Large Language Models},
author={Goddard, Charles and Siriwardhana, Shamane and Ehghaghi, Malikeh and Meyers, Luke and Karpukhin, Vlad and Benedict, Brian and McQuade, Mark and Solawetz, Jacob},
journal={arXiv preprint arXiv:2403.13257},
year={2024}
}
This Space is heavily inspired by LazyMergeKit by Maxime Labonne (see [Colab](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb)).
"""
examples = [[str(f)] for f in pathlib.Path("examples").glob("*.yaml")]
COMMUNITY_HF_TOKEN = os.getenv("COMMUNITY_HF_TOKEN")
def merge_multiple_methods(yaml_config: str, hf_token: str, repo_name: str, profile_name: str) -> Iterable[List[Log]]:
runner = LogsViewRunner()
if not yaml_config:
yield runner.log("Empty yaml, pick an example below", level="ERROR")
return
try:
merge_config = MergeConfiguration.model_validate(yaml.safe_load(yaml_config))
except Exception as e:
yield runner.log(f"Invalid yaml {e}", level="ERROR")
return
methods_to_merge = ['dare_ties', 'ties']
current_yaml_config = yaml_config
merged_model_path = None
for method in methods_to_merge:
yield from run_merge_for_method(method, current_yaml_config, hf_token, repo_name, profile_name, runner)
current_yaml_config = get_merged_yaml(current_yaml_config, method)
yield runner.log(f"Model merged with {method}. Proceeding to next method...")
merged_model_path = "final_merged_model" # Placeholder, adjust based on your process
if merged_model_path:
yield runner.log(f"Model successfully merged using all methods. Saving unified model to {merged_model_path}")
# Save final YAML
example_yaml = generate_example_yaml(methods_to_merge)
yield runner.log(f"Generated example YAML: {example_yaml}")
# Here, you could potentially upload the final merged model
# Upload logic goes here if needed
def get_merged_yaml(original_yaml: str, method: str) -> str:
yaml_data = yaml.safe_load(original_yaml)
yaml_data['merge_method'] = method
return yaml.dump(yaml_data)
def run_merge_for_method(method: str, yaml_config: str, hf_token: str, repo_name: str, profile_name: str, runner: LogsViewRunner):
yaml_data = yaml.safe_load(yaml_config)
yaml_data['merge_method'] = method
new_yaml_config = yaml.dump(yaml_data)
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdirname:
tmpdir = pathlib.Path(tmpdirname)
merged_path = tmpdir / "merged"
merged_path.mkdir(parents=True, exist_ok=True)
config_path = merged_path / "config.yaml"
config_path.write_text(new_yaml_config)
yield runner.log(f"Merge configuration saved for {method} in {config_path}")
if not repo_name:
repo_name = f"{profile_name}/mergekit-{method}" if profile_name else f"mergekit-{method}"
repo_name += "-" + "".join(random.choices(string.ascii_lowercase, k=7))
repo_name = repo_name.replace("/", "-").strip("-")
try:
yield runner.log(f"Creating repo for {method} {repo_name}")
repo_url = huggingface_hub.HfApi(token=hf_token).create_repo(repo_name, exist_ok=True)
yield runner.log(f"Repo created for {method}: {repo_url}")
except Exception as e:
yield runner.log(f"Error creating repo for {method}: {e}", level="ERROR")
return
tmp_env = os.environ.copy()
tmp_env["HF_HOME"] = f"{tmpdirname}/.cache"
full_cli = cli + f" --lora-merge-cache {tmpdirname}/.lora_cache"
yield from runner.run_command(full_cli.split(), cwd=merged_path, env=tmp_env)
if runner.exit_code != 0:
yield runner.log(f"Merge for {method} failed. Deleting repo as no model is uploaded.", level="ERROR")
huggingface_hub.HfApi(token=hf_token).delete_repo(repo_url.repo_id)
return
yield runner.log(f"Model merged with {method}. Uploading to HF.")
yield from runner.run_python(
huggingface_hub.HfApi(token=hf_token).upload_folder,
repo_id=repo_url.repo_id,
folder_path=merged_path / "merge",
)
yield runner.log(f"Model successfully uploaded to HF with {method}: {repo_url.repo_id}")
def generate_example_yaml(methods: List[str]) -> str:
"""Genera un archivo YAML de ejemplo que refleja la secuencia de métodos de fusión aplicados"""
example_yaml = {
'merge_method': 'linear', # O el método final que decidas usar
'models': ['model1', 'model2', 'model3'], # Ejemplo de modelos a fusionar
'slices': None, # Puedes agregar slices si es necesario
'parameters': {
'normalize': False,
'weight': 0.5
},
'tokenizer_source': 'union', # Definir el tokenizer
}
example_yaml['merge_method_sequence'] = methods
return yaml.dump(example_yaml)
with gr.Blocks() as demo:
gr.Markdown(MARKDOWN_DESCRIPTION)
with gr.Row():
filename = gr.Textbox(visible=False, label="filename")
config = gr.Code(language="yaml", lines=10, label="config.yaml")
with gr.Column():
token = gr.Textbox(
lines=1,
label="HF Write Token",
info="https://hf.co/settings/token",
type="password",
placeholder="Optional. Will upload merged model to MergeKit Community if empty.",
)
repo_name = gr.Textbox(
lines=1,
label="Repo name",
placeholder="Optional. Will create a random name if empty.",
)
profile_name = gr.Textbox(
lines=1,
label="Hugging Face Profile Name",
placeholder="Enter your Hugging Face profile name.",
)
button = gr.Button("Merge", variant="primary")
logs = LogsView(label="Terminal output")
gr.Examples(
examples,
fn=lambda s: (s,),
run_on_click=True,
label="Examples",
inputs=[filename],
outputs=[config],
)
gr.Markdown(MARKDOWN_ARTICLE)
button.click(fn=merge_multiple_methods, inputs=[config, token, repo_name, profile_name], outputs=[logs])
def _garbage_collect_every_hour():
while True:
try:
garbage_collect_empty_models(token=COMMUNITY_HF_TOKEN)
except Exception as e:
print("Error running garbage collection", e)
time.sleep(3600)
pool = ThreadPoolExecutor()
pool.submit(_garbage_collect_every_hour)
demo.queue(default_concurrency_limit=2).launch()
|