Spaces:
Paused
Paused
Update optimum_neuron_export.py
Browse files- optimum_neuron_export.py +634 -119
optimum_neuron_export.py
CHANGED
@@ -1,32 +1,113 @@
|
|
|
|
1 |
import os
|
2 |
import shutil
|
3 |
-
from tempfile import TemporaryDirectory
|
4 |
-
from typing import List, Optional, Tuple, Dict, Any
|
|
|
5 |
from huggingface_hub import (
|
6 |
CommitOperationAdd,
|
7 |
HfApi,
|
8 |
ModelCard,
|
9 |
Discussion,
|
10 |
CommitInfo,
|
|
|
|
|
11 |
)
|
12 |
from huggingface_hub.file_download import repo_folder_name
|
13 |
-
from optimum.exporters.neuron import main_export
|
14 |
from optimum.exporters.tasks import TasksManager
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
SPACES_URL = "https://huggingface.co/spaces/optimum/neuron-export"
|
|
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
""
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
def previous_pr(api: "HfApi", model_id: str, pr_title: str) -> Optional["Discussion"]:
|
32 |
try:
|
@@ -42,144 +123,578 @@ def previous_pr(api: "HfApi", model_id: str, pr_title: str) -> Optional["Discuss
|
|
42 |
return discussion
|
43 |
return None
|
44 |
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
48 |
|
49 |
-
|
50 |
-
|
51 |
-
try:
|
52 |
-
task = TasksManager.infer_task_from_model(model_id)
|
53 |
-
except Exception as e:
|
54 |
-
raise Exception(f"Could not infer task for model {model_id}: {e}")
|
55 |
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
try:
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
token=token,
|
66 |
-
|
67 |
-
|
68 |
-
dynamic_batch_size=False,
|
69 |
-
do_validation=False, # Disable validation for now to avoid issues
|
70 |
-
trust_remote_code=False,
|
71 |
-
force_download=False,
|
72 |
-
local_files_only=False,
|
73 |
-
# Default optimization level (O2 is the default from original code)
|
74 |
-
optlevel="2",
|
75 |
-
# Other defaults
|
76 |
-
tensor_parallel_size=1,
|
77 |
-
disable_neuron_cache=False,
|
78 |
-
inline_weights_to_neff=True,
|
79 |
-
output_attentions=False,
|
80 |
-
output_hidden_states=False,
|
81 |
-
# Add input shapes for common models
|
82 |
-
batch_size=1,
|
83 |
-
sequence_length=128,
|
84 |
)
|
85 |
-
|
|
|
86 |
except Exception as e:
|
87 |
-
|
88 |
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
-
operations
|
91 |
-
|
92 |
-
path_in_repo=os.path.join("neuron", file_name),
|
93 |
-
path_or_fileobj=os.path.join(folder, file_name),
|
94 |
-
)
|
95 |
-
for file_name in os.listdir(folder)
|
96 |
-
if os.path.isfile(os.path.join(folder, file_name)) # Only add files, not directories
|
97 |
-
]
|
98 |
-
|
99 |
try:
|
100 |
card = ModelCard.load(model_id, token=token)
|
101 |
-
if card.data.tags is None:
|
102 |
card.data.tags = []
|
103 |
if "neuron" not in card.data.tags:
|
104 |
card.data.tags.append("neuron")
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
except Exception as e:
|
113 |
-
|
114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
|
116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
|
118 |
def convert(
|
119 |
api: "HfApi",
|
120 |
model_id: str,
|
121 |
-
|
|
|
122 |
force: bool = False,
|
123 |
token: str = None,
|
124 |
-
|
125 |
-
|
|
|
|
|
|
|
126 |
info = api.model_info(model_id, token=token)
|
127 |
-
filenames =
|
128 |
requesting_user = api.whoami(token=token)["name"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
|
130 |
-
|
131 |
-
|
132 |
-
os.makedirs(folder, exist_ok=True)
|
133 |
-
new_pr = None
|
134 |
-
|
135 |
try:
|
136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
|
138 |
-
if
|
139 |
-
raise Exception(
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
)
|
148 |
-
else:
|
149 |
-
operations = export_and_git_add(model_id, task, folder, token=token)
|
150 |
|
151 |
-
|
152 |
-
|
|
|
|
|
|
|
153 |
|
154 |
-
|
155 |
-
π€ Neuron Export Bot: On behalf of [{requesting_user}](https://huggingface.co/{requesting_user}), adding AWS Neuron-optimized model files.
|
156 |
-
|
157 |
-
Neuron-optimized models can achieve high-performance inference on AWS Inferentia and Trainium chips. Learn more:
|
158 |
-
- [AWS Neuron Documentation](https://awsdocs-neuron.readthedocs-hosted.com)
|
159 |
-
- [π€ Optimum Neuron Guide](https://huggingface.co/docs/optimum-neuron/index)
|
160 |
-
"""
|
161 |
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
create_pr=True,
|
168 |
-
token=token,
|
169 |
)
|
|
|
|
|
|
|
|
|
|
|
170 |
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
|
|
|
185 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
import os
|
3 |
import shutil
|
4 |
+
from tempfile import TemporaryDirectory, NamedTemporaryFile
|
5 |
+
from typing import List, Union, Optional, Tuple, Dict, Any, Generator
|
6 |
+
from pathlib import Path
|
7 |
from huggingface_hub import (
|
8 |
CommitOperationAdd,
|
9 |
HfApi,
|
10 |
ModelCard,
|
11 |
Discussion,
|
12 |
CommitInfo,
|
13 |
+
create_repo,
|
14 |
+
RepoUrl,
|
15 |
)
|
16 |
from huggingface_hub.file_download import repo_folder_name
|
|
|
17 |
from optimum.exporters.tasks import TasksManager
|
18 |
+
from optimum.exporters.neuron.model_configs import *
|
19 |
+
from optimum.neuron import (
|
20 |
+
NeuronModelForFeatureExtraction,
|
21 |
+
NeuronModelForSentenceTransformers,
|
22 |
+
NeuronModelForMaskedLM,
|
23 |
+
NeuronModelForQuestionAnswering,
|
24 |
+
NeuronModelForSequenceClassification,
|
25 |
+
NeuronModelForTokenClassification,
|
26 |
+
NeuronModelForMultipleChoice,
|
27 |
+
NeuronModelForImageClassification,
|
28 |
+
NeuronModelForSemanticSegmentation,
|
29 |
+
NeuronModelForObjectDetection,
|
30 |
+
NeuronModelForAudioClassification,
|
31 |
+
NeuronModelForAudioFrameClassification,
|
32 |
+
NeuronModelForCTC,
|
33 |
+
NeuronModelForXVector,
|
34 |
+
NeuronModelForCausalLM,
|
35 |
+
NeuronModelForConditionalGeneration,
|
36 |
+
)
|
37 |
+
from optimum.neuron import (
|
38 |
+
NeuronDiffusionPipelineBase,
|
39 |
+
NeuronStableDiffusionPipeline,
|
40 |
+
NeuronStableDiffusionImg2ImgPipeline,
|
41 |
+
NeuronStableDiffusionInpaintPipeline,
|
42 |
+
NeuronStableDiffusionInstructPix2PixPipeline,
|
43 |
+
NeuronLatentConsistencyModelPipeline,
|
44 |
+
NeuronStableDiffusionXLPipeline,
|
45 |
+
NeuronStableDiffusionXLImg2ImgPipeline,
|
46 |
+
NeuronStableDiffusionXLInpaintPipeline,
|
47 |
+
NeuronStableDiffusionControlNetPipeline,
|
48 |
+
NeuronStableDiffusionXLControlNetPipeline,
|
49 |
+
NeuronPixArtAlphaPipeline,
|
50 |
+
NeuronPixArtSigmaPipeline,
|
51 |
+
NeuronFluxPipeline
|
52 |
+
)
|
53 |
+
from optimum.neuron.cache.entries.cache_entry import ModelCacheEntry
|
54 |
|
55 |
SPACES_URL = "https://huggingface.co/spaces/optimum/neuron-export"
|
56 |
+
CACHE_REPO_ID = "badaoui/optimum-neuron_compile-cache"
|
57 |
|
58 |
+
# Task to NeuronModel mapping for transformers
|
59 |
+
TASK_TO_MODEL_CLASS = {
|
60 |
+
"feature-extraction": NeuronModelForFeatureExtraction,
|
61 |
+
"sentence-transformers": NeuronModelForSentenceTransformers,
|
62 |
+
"fill-mask": NeuronModelForMaskedLM,
|
63 |
+
"question-answering": NeuronModelForQuestionAnswering,
|
64 |
+
"text-classification": NeuronModelForSequenceClassification,
|
65 |
+
"token-classification": NeuronModelForTokenClassification,
|
66 |
+
"multiple-choice": NeuronModelForMultipleChoice,
|
67 |
+
"image-classification": NeuronModelForImageClassification,
|
68 |
+
"semantic-segmentation": NeuronModelForSemanticSegmentation,
|
69 |
+
"object-detection": NeuronModelForObjectDetection,
|
70 |
+
"audio-classification": NeuronModelForAudioClassification,
|
71 |
+
"audio-frame-classification": NeuronModelForAudioFrameClassification,
|
72 |
+
"automatic-speech-recognition": NeuronModelForCTC,
|
73 |
+
"audio-xvector": NeuronModelForXVector,
|
74 |
+
"text-generation": NeuronModelForCausalLM,
|
75 |
+
"text2text-generation": NeuronModelForSeq2SeqLM,
|
76 |
+
}
|
77 |
+
|
78 |
+
# Diffusion pipeline mapping
|
79 |
+
DIFFUSION_PIPELINE_MAPPING = {
|
80 |
+
"text-to-image": NeuronStableDiffusionPipeline,
|
81 |
+
"image-to-image": NeuronStableDiffusionImg2ImgPipeline,
|
82 |
+
"inpaint": NeuronStableDiffusionInpaintPipeline,
|
83 |
+
"instruct-pix2pix": NeuronStableDiffusionInstructPix2PixPipeline,
|
84 |
+
"latent-consistency": NeuronLatentConsistencyModelPipeline,
|
85 |
+
"stable_diffusion": NeuronStableDiffusionPipeline,
|
86 |
+
"stable-diffusion-xl": NeuronStableDiffusionXLPipeline,
|
87 |
+
"stable-diffusion-xl-img2img": NeuronStableDiffusionXLImg2ImgPipeline,
|
88 |
+
"stable-diffusion-xl-inpaint": NeuronStableDiffusionXLInpaintPipeline,
|
89 |
+
"controlnet": NeuronStableDiffusionControlNetPipeline,
|
90 |
+
"controlnet-xl": NeuronStableDiffusionXLControlNetPipeline,
|
91 |
+
"pixart-alpha": NeuronPixArtAlphaPipeline,
|
92 |
+
"pixart-sigma": NeuronPixArtSigmaPipeline,
|
93 |
+
"flux": NeuronFluxPipeline,
|
94 |
+
}
|
95 |
+
|
96 |
+
def get_default_input_shapes(task_or_pipeline: str) -> Dict[str, int]:
|
97 |
+
"""Get default input shapes based on task type or diffusion pipeline type."""
|
98 |
+
if task_or_pipeline in ["feature-extraction", "sentence-transformers", "fill-mask", "question-answering", "text-classification", "token-classification","text-generation","text2text-generation"]:
|
99 |
+
return {"batch_size": 1, "sequence_length": 128}
|
100 |
+
elif task_or_pipeline == "multiple-choice":
|
101 |
+
return {"batch_size": 1, "num_choices": 4, "sequence_length": 128}
|
102 |
+
elif task_or_pipeline in ["image-classification", "semantic-segmentation", "object-detection"]:
|
103 |
+
return {"batch_size": 1, "num_channels": 3, "height": 224, "width": 224}
|
104 |
+
elif task_or_pipeline in ["audio-classification", "audio-frame-classification", "automatic-speech-recognition", "audio-xvector"]:
|
105 |
+
return {"batch_size": 1, "audio_sequence_length": 16000}
|
106 |
+
elif task_or_pipeline in DIFFUSION_PIPELINE_MAPPING:
|
107 |
+
return {"batch_size": 1, "height": 1024, "width": 1024, "num_images_per_prompt": 1}
|
108 |
+
else:
|
109 |
+
# Default to text-based shapes
|
110 |
+
return {"batch_size": 1, "sequence_length": 128}
|
111 |
|
112 |
def previous_pr(api: "HfApi", model_id: str, pr_title: str) -> Optional["Discussion"]:
|
113 |
try:
|
|
|
123 |
return discussion
|
124 |
return None
|
125 |
|
126 |
+
|
127 |
+
def get_local_cache_structure(local_cache_base: str = "/var/tmp/neuron-compile-cache") -> Dict[str, List[str]]:
|
128 |
+
"""
|
129 |
+
Get the structure of the local Neuron cache to preserve it in the hub.
|
130 |
+
Returns a dict mapping neuronxcc folders to their MODULE folders.
|
131 |
+
"""
|
132 |
+
cache_structure = {}
|
133 |
|
134 |
+
if not os.path.exists(local_cache_base):
|
135 |
+
return cache_structure
|
|
|
|
|
|
|
|
|
136 |
|
137 |
+
try:
|
138 |
+
for item in os.listdir(local_cache_base):
|
139 |
+
item_path = os.path.join(local_cache_base, item)
|
140 |
+
if os.path.isdir(item_path) and item.startswith('neuronxcc-'):
|
141 |
+
modules = []
|
142 |
+
for subitem in os.listdir(item_path):
|
143 |
+
subitem_path = os.path.join(item_path, subitem)
|
144 |
+
if os.path.isdir(subitem_path) and subitem.startswith('MODULE_'):
|
145 |
+
modules.append(subitem)
|
146 |
+
|
147 |
+
if modules:
|
148 |
+
cache_structure[item] = modules
|
149 |
+
|
150 |
+
except Exception as e:
|
151 |
+
print(f"Warning: Could not read local cache structure: {e}")
|
152 |
|
153 |
+
return cache_structure
|
154 |
+
|
155 |
+
def upload_cache_files(cache_dir: str, cache_repo_id: str, token: str) -> Generator[Union[str, CommitInfo], None, None]:
|
156 |
+
"""
|
157 |
+
Upload cache files to the cache repository and create PR.
|
158 |
+
This is a generator function.
|
159 |
+
"""
|
160 |
try:
|
161 |
+
api = HfApi(token=token)
|
162 |
+
|
163 |
+
# Create cache operations
|
164 |
+
cache_operations = []
|
165 |
+
for root, _, files in os.walk(cache_dir):
|
166 |
+
for file in files:
|
167 |
+
file_path = os.path.join(root, file)
|
168 |
+
rel_path = os.path.relpath(file_path, cache_dir)
|
169 |
+
cache_operations.append(
|
170 |
+
CommitOperationAdd(
|
171 |
+
path_in_repo=rel_path,
|
172 |
+
path_or_fileobj=file_path,
|
173 |
+
)
|
174 |
+
)
|
175 |
+
|
176 |
+
yield f"π€ Found {len(cache_operations)} cache files to upload."
|
177 |
+
|
178 |
+
if cache_operations:
|
179 |
+
# Create PR in cache repository
|
180 |
+
cache_pr_title = f"Add Neuron cache for {os.path.basename(cache_dir)}"
|
181 |
+
cache_commit_description = """
|
182 |
+
π€ Neuron Cache Bot: Adding compiled Neuron cache artifacts.
|
183 |
+
|
184 |
+
This PR contains the compiled neuronxcc cache files that can be used to speed up model loading for AWS Neuron devices.
|
185 |
+
"""
|
186 |
+
|
187 |
+
cache_pr = api.create_commit(
|
188 |
+
repo_id=cache_repo_id,
|
189 |
+
operations=cache_operations,
|
190 |
+
commit_message=cache_pr_title,
|
191 |
+
commit_description=cache_commit_description,
|
192 |
+
create_pr=True,
|
193 |
+
token=token,
|
194 |
+
)
|
195 |
+
|
196 |
+
yield f"β
Cache PR created successfully: https://huggingface.co/{cache_repo_id}/discussions/{cache_pr.pr_num}"
|
197 |
+
# Yield the final PR object so the caller can use it
|
198 |
+
yield cache_pr
|
199 |
+
else:
|
200 |
+
yield "β οΈ No cache files found to upload."
|
201 |
+
yield None
|
202 |
+
|
203 |
+
except Exception as e:
|
204 |
+
yield f"β Cache upload failed: {e}"
|
205 |
+
raise
|
206 |
+
|
207 |
+
def export_and_git_add(model_id: str, task_or_pipeline: str, model_type: str, folder: str, token: str) -> Any:
|
208 |
+
if task_or_pipeline == "auto":
|
209 |
+
try:
|
210 |
+
task_or_pipeline = TasksManager.infer_task_from_model(model_id)
|
211 |
+
except Exception as e:
|
212 |
+
raise Exception(f"β Could not infer task for model {model_id}: {e}")
|
213 |
+
|
214 |
+
yield f"π¦ Exporting model `{model_id}` for task `{task_or_pipeline}`..."
|
215 |
+
|
216 |
+
model_class = TASK_TO_MODEL_CLASS.get(task_or_pipeline) if model_type == "transformers" else DIFFUSION_PIPELINE_MAPPING.get(task_or_pipeline)
|
217 |
+
if model_class is None:
|
218 |
+
supported = list(TASK_TO_MODEL_CLASS.keys()) if model_type == "transformers" else list(DIFFUSION_PIPELINE_MAPPING.keys())
|
219 |
+
raise Exception(f"β Unsupported task/pipeline: {task_or_pipeline}. Supported: {supported}")
|
220 |
+
|
221 |
+
input_shapes = get_default_input_shapes(task_or_pipeline)
|
222 |
+
yield f"π§ Using input shapes: {input_shapes}"
|
223 |
+
|
224 |
+
try:
|
225 |
+
model = model_class.from_pretrained(
|
226 |
+
model_id,
|
227 |
+
torch_dtype=torch.bfloat16,
|
228 |
+
export=True,
|
229 |
token=token,
|
230 |
+
tensor_parallel_size=4,
|
231 |
+
**input_shapes,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
232 |
)
|
233 |
+
model.save_pretrained(folder)
|
234 |
+
yield "β
Export completed successfully."
|
235 |
except Exception as e:
|
236 |
+
yield f"β Export failed with error: {e}"
|
237 |
raise
|
238 |
+
|
239 |
+
operations = []
|
240 |
+
for root, _, files in os.walk(folder):
|
241 |
+
for filename in files:
|
242 |
+
file_path = os.path.join(root, filename)
|
243 |
+
repo_path = os.path.relpath(file_path, folder)
|
244 |
+
operations.append(CommitOperationAdd(path_in_repo=repo_path, path_or_fileobj=file_path))
|
245 |
|
246 |
+
yield f"π Found {len(operations)} files to upload"
|
247 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
248 |
try:
|
249 |
card = ModelCard.load(model_id, token=token)
|
250 |
+
if not hasattr(card.data, "tags") or card.data.tags is None:
|
251 |
card.data.tags = []
|
252 |
if "neuron" not in card.data.tags:
|
253 |
card.data.tags.append("neuron")
|
254 |
+
|
255 |
+
readme_path = os.path.join(folder, "README.md")
|
256 |
+
card.save(readme_path)
|
257 |
+
|
258 |
+
# Check if README.md is already in operations, if so update, else add
|
259 |
+
readme_op = next((op for op in operations if op.path_in_repo == "README.md"), None)
|
260 |
+
if readme_op:
|
261 |
+
readme_op.path_or_fileobj = readme_path
|
262 |
+
else:
|
263 |
+
operations.append(CommitOperationAdd(path_in_repo="README.md", path_or_fileobj=readme_path))
|
264 |
+
|
265 |
+
except Exception as e:
|
266 |
+
yield f"β οΈ Warning: Could not update model card: {e}"
|
267 |
+
|
268 |
+
yield ("__RETURN__", operations)
|
269 |
+
|
270 |
+
def generate_neuron_repo_name(api, original_model_id: str, task_or_pipeline: str, token:str) -> str:
|
271 |
+
"""Generate a name for the Neuron-optimized repository."""
|
272 |
+
# Replace 'Β©' with '-' and add neuron suffix
|
273 |
+
requesting_user = api.whoami(token=token)["name"]
|
274 |
+
base_name = original_model_id.replace('/', '-')
|
275 |
+
return f"{requesting_user}/{base_name}-neuron"
|
276 |
+
|
277 |
+
def create_neuron_repo_and_upload(
|
278 |
+
operations: List[CommitOperationAdd],
|
279 |
+
original_model_id: str,
|
280 |
+
model_type: str,
|
281 |
+
task_or_pipeline: str,
|
282 |
+
requesting_user: str,
|
283 |
+
token: str,
|
284 |
+
) -> Generator[Union[str, RepoUrl], None, None]:
|
285 |
+
"""
|
286 |
+
Creates a new repository with Neuron files and uploads them.
|
287 |
+
"""
|
288 |
+
api = HfApi(token=token)
|
289 |
+
|
290 |
+
if task_or_pipeline == "auto":
|
291 |
+
try:
|
292 |
+
task_or_pipeline = TasksManager.infer_task_from_model(original_model_id)
|
293 |
+
except Exception as e:
|
294 |
+
raise Exception(f"β Could not infer task for model {original_model_id}: {e}")
|
295 |
+
|
296 |
+
# Generate repository name
|
297 |
+
neuron_repo_name = generate_neuron_repo_name(api, original_model_id, task_or_pipeline, token)
|
298 |
+
|
299 |
+
yield f"ποΈ Creating new repository: {neuron_repo_name}"
|
300 |
+
|
301 |
+
try:
|
302 |
+
# Create the repository
|
303 |
+
repo_url = create_repo(
|
304 |
+
repo_id=neuron_repo_name,
|
305 |
+
token=token,
|
306 |
+
repo_type="model",
|
307 |
+
private=False,
|
308 |
+
exist_ok=True,
|
309 |
+
)
|
310 |
+
|
311 |
+
yield f"β
Repository created: {repo_url}"
|
312 |
+
|
313 |
+
# Get the appropriate class name for the Python example
|
314 |
+
if model_type == "transformers":
|
315 |
+
model_class = TASK_TO_MODEL_CLASS.get(task_or_pipeline)
|
316 |
+
else:
|
317 |
+
model_class = DIFFUSION_PIPELINE_MAPPING.get(task_or_pipeline)
|
318 |
+
|
319 |
+
model_class_name = model_class.__name__ if model_class else "NeuronModel"
|
320 |
+
|
321 |
+
# Create enhanced model card for the Neuron repo
|
322 |
+
neuron_readme_content = f"""---
|
323 |
+
tags:
|
324 |
+
- neuron
|
325 |
+
- optimized
|
326 |
+
- aws-neuron
|
327 |
+
- {task_or_pipeline}
|
328 |
+
base_model: {original_model_id}
|
329 |
+
---
|
330 |
+
|
331 |
+
# Neuron-Optimized {original_model_id}
|
332 |
+
|
333 |
+
This repository contains AWS Neuron-optimized files for [{original_model_id}](https://huggingface.co/{original_model_id}).
|
334 |
+
|
335 |
+
## Model Details
|
336 |
+
|
337 |
+
- **Base Model**: [{original_model_id}](https://huggingface.co/{original_model_id})
|
338 |
+
- **Task**: {task_or_pipeline}
|
339 |
+
- **Optimization**: AWS Neuron compilation
|
340 |
+
- **Generated by**: [{requesting_user}](https://huggingface.co/{requesting_user})
|
341 |
+
- **Generated using**: [Optimum Neuron Compiler Space]({SPACES_URL})
|
342 |
+
|
343 |
+
## Usage
|
344 |
+
|
345 |
+
This model has been optimized for AWS Neuron devices (Inferentia/Trainium). To use it:
|
346 |
+
|
347 |
+
```python
|
348 |
+
from optimum.neuron import {model_class_name}
|
349 |
+
|
350 |
+
model = {model_class_name}.from_pretrained("{neuron_repo_name}")
|
351 |
+
```
|
352 |
+
|
353 |
+
## Performance
|
354 |
+
|
355 |
+
These files are pre-compiled for AWS Neuron devices and should provide improved inference performance compared to the original model when deployed on Inferentia or Trainium instances.
|
356 |
+
|
357 |
+
## Original Model
|
358 |
+
|
359 |
+
For the original model, training details, and more information, please visit: [{original_model_id}](https://huggingface.co/{original_model_id})
|
360 |
+
"""
|
361 |
+
|
362 |
+
# Update the README in operations
|
363 |
+
readme_op = next((op for op in operations if op.path_in_repo == "README.md"), None)
|
364 |
+
if readme_op:
|
365 |
+
# Create a temporary file with the new content
|
366 |
+
with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
367 |
+
f.write(neuron_readme_content)
|
368 |
+
readme_op.path_or_fileobj = f.name
|
369 |
+
else:
|
370 |
+
# Add new README operation
|
371 |
+
with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
372 |
+
f.write(neuron_readme_content)
|
373 |
+
operations.append(CommitOperationAdd(path_in_repo="README.md", path_or_fileobj=f.name))
|
374 |
+
|
375 |
+
# Upload files to the new repository
|
376 |
+
commit_message = f"Add Neuron-optimized files for {original_model_id}"
|
377 |
+
commit_description = f"""
|
378 |
+
π€ Neuron Export Bot: Adding AWS Neuron-optimized model files.
|
379 |
+
|
380 |
+
Original model: [{original_model_id}](https://huggingface.co/{original_model_id})
|
381 |
+
Task: {task_or_pipeline}
|
382 |
+
Generated by: [{requesting_user}](https://huggingface.co/{requesting_user})
|
383 |
+
Generated using: [Optimum Neuron Compiler Space]({SPACES_URL})
|
384 |
+
|
385 |
+
These files have been pre-compiled for AWS Neuron devices (Inferentia/Trainium) and should provide improved inference performance.
|
386 |
+
"""
|
387 |
+
|
388 |
+
yield f"π€ Uploading {len(operations)} files to {neuron_repo_name}..."
|
389 |
+
|
390 |
+
commit_info = api.create_commit(
|
391 |
+
repo_id=neuron_repo_name,
|
392 |
+
operations=operations,
|
393 |
+
commit_message=commit_message,
|
394 |
+
commit_description=commit_description,
|
395 |
+
token=token,
|
396 |
+
)
|
397 |
+
|
398 |
+
yield f"β
Files uploaded successfully to: https://huggingface.co/{neuron_repo_name}"
|
399 |
+
yield repo_url
|
400 |
+
|
401 |
+
except Exception as e:
|
402 |
+
yield f"β Failed to create/upload to Neuron repository: {e}"
|
403 |
+
raise
|
404 |
+
|
405 |
+
def create_readme_pr_for_original_model(
|
406 |
+
original_model_id: str,
|
407 |
+
neuron_repo_name: str,
|
408 |
+
task_or_pipeline: str,
|
409 |
+
requesting_user: str,
|
410 |
+
token: str,
|
411 |
+
) -> Generator[Union[str, CommitInfo], None, None]:
|
412 |
+
"""
|
413 |
+
Creates a PR on the original model repository to add a link to the Neuron-optimized version.
|
414 |
+
"""
|
415 |
+
api = HfApi(token=token)
|
416 |
+
|
417 |
+
yield f"π Creating PR to add Neuron repo link in {original_model_id}..."
|
418 |
+
|
419 |
+
try:
|
420 |
+
# Check if there's already an open PR
|
421 |
+
pr_title = "Add link to Neuron-optimized version"
|
422 |
+
existing_pr = previous_pr(api, original_model_id, pr_title)
|
423 |
+
|
424 |
+
if existing_pr:
|
425 |
+
yield f"β οΈ PR already exists: https://huggingface.co/{original_model_id}/discussions/{existing_pr.num}"
|
426 |
+
return
|
427 |
+
|
428 |
+
# Get the current README
|
429 |
+
try:
|
430 |
+
current_readme_path = api.hf_hub_download(
|
431 |
+
repo_id=original_model_id,
|
432 |
+
filename="README.md",
|
433 |
+
token=token,
|
434 |
)
|
435 |
+
with open(current_readme_path, 'r', encoding='utf-8') as f:
|
436 |
+
readme_content = f.read()
|
437 |
+
except Exception:
|
438 |
+
# If README doesn't exist, create a basic one
|
439 |
+
readme_content = f"# {original_model_id}\n\n"
|
440 |
+
|
441 |
+
# Add Neuron optimization section, separated by a horizontal rule
|
442 |
+
neuron_section = f"""
|
443 |
+
---
|
444 |
+
## π AWS Neuron Optimized Version Available
|
445 |
+
|
446 |
+
A Neuron-optimized version of this model is available for improved performance on AWS Inferentia/Trainium instances:
|
447 |
+
|
448 |
+
**[{neuron_repo_name}](https://huggingface.co/{neuron_repo_name})**
|
449 |
+
|
450 |
+
The Neuron-optimized version provides:
|
451 |
+
- Pre-compiled artifacts for faster loading
|
452 |
+
- Optimized performance on AWS Neuron devices
|
453 |
+
- Same model capabilities with improved inference speed
|
454 |
+
"""
|
455 |
+
|
456 |
+
# Append the Neuron section to the end of the README
|
457 |
+
updated_readme = readme_content.rstrip() + "\n" + neuron_section
|
458 |
+
|
459 |
+
# Create temporary file with updated README
|
460 |
+
with NamedTemporaryFile(mode='w', suffix='.md', delete=False, encoding="utf-8") as f:
|
461 |
+
f.write(updated_readme)
|
462 |
+
temp_readme_path = f.name
|
463 |
+
|
464 |
+
# Create the PR
|
465 |
+
operations = [CommitOperationAdd(path_in_repo="README.md", path_or_fileobj=temp_readme_path)]
|
466 |
+
|
467 |
+
commit_description = f"""
|
468 |
+
π€ Neuron Export Bot: Adding link to Neuron-optimized version.
|
469 |
+
|
470 |
+
A Neuron-optimized version of this model has been created at [{neuron_repo_name}](https://huggingface.co/{neuron_repo_name}).
|
471 |
+
|
472 |
+
The optimized version provides improved performance on AWS Inferentia/Trainium instances with pre-compiled artifacts.
|
473 |
+
|
474 |
+
Generated by: [{requesting_user}](https://huggingface.co/{requesting_user})
|
475 |
+
Generated using: [Optimum Neuron Compiler Space]({SPACES_URL})
|
476 |
+
"""
|
477 |
+
|
478 |
+
pr = api.create_commit(
|
479 |
+
repo_id=original_model_id,
|
480 |
+
operations=operations,
|
481 |
+
commit_message=pr_title,
|
482 |
+
commit_description=commit_description,
|
483 |
+
create_pr=True,
|
484 |
+
token=token,
|
485 |
)
|
486 |
+
|
487 |
+
yield f"β
README PR created: https://huggingface.co/{original_model_id}/discussions/{pr.pr_num}"
|
488 |
+
yield pr
|
489 |
+
|
490 |
+
# Clean up temporary file
|
491 |
+
os.unlink(temp_readme_path)
|
492 |
+
|
493 |
except Exception as e:
|
494 |
+
yield f"β Failed to create README PR: {e}"
|
495 |
+
raise
|
496 |
+
|
497 |
+
# --- Updated upload_to_custom_repo function (unchanged) ---
|
498 |
+
def upload_to_custom_repo(
|
499 |
+
operations: List[CommitOperationAdd],
|
500 |
+
custom_repo_id: str,
|
501 |
+
original_model_id: str,
|
502 |
+
requesting_user: str,
|
503 |
+
token: str,
|
504 |
+
) -> Generator[Union[str, CommitInfo], None, None]:
|
505 |
+
"""
|
506 |
+
Uploads neuron files to a custom repository and creates a PR.
|
507 |
+
"""
|
508 |
+
yield f"π€ Preparing to upload to custom repo: {custom_repo_id}"
|
509 |
+
api = HfApi(token=token)
|
510 |
|
511 |
+
try:
|
512 |
+
# Ensure the custom repo exists
|
513 |
+
api.repo_info(repo_id=custom_repo_id, repo_type="model")
|
514 |
+
except Exception as e:
|
515 |
+
yield f"β Could not access custom repository `{custom_repo_id}`. Please ensure it exists and you have write access. Error: {e}"
|
516 |
+
raise
|
517 |
+
|
518 |
+
pr_title = f"Add Neuron-optimized files for {original_model_id}"
|
519 |
+
commit_description = f"""
|
520 |
+
π€ Neuron Export Bot: On behalf of [{requesting_user}](https://huggingface.co/{requesting_user}), adding AWS Neuron-optimized model files for `{original_model_id}`.
|
521 |
+
|
522 |
+
These files were generated using the [Optimum Neuron Compiler Space](https://huggingface.co/spaces/optimum/neuron-export).
|
523 |
+
"""
|
524 |
+
|
525 |
+
try:
|
526 |
+
custom_pr = api.create_commit(
|
527 |
+
repo_id=custom_repo_id,
|
528 |
+
operations=operations,
|
529 |
+
commit_message=pr_title,
|
530 |
+
commit_description=commit_description,
|
531 |
+
create_pr=True,
|
532 |
+
token=token,
|
533 |
+
)
|
534 |
+
yield f"β
Custom PR created successfully: https://huggingface.co/{custom_repo_id}/discussions/{custom_pr.pr_num}"
|
535 |
+
yield custom_pr
|
536 |
+
|
537 |
+
except Exception as e:
|
538 |
+
yield f"β Failed to create PR in custom repository: {e}"
|
539 |
+
raise
|
540 |
|
541 |
def convert(
|
542 |
api: "HfApi",
|
543 |
model_id: str,
|
544 |
+
task_or_pipeline: str,
|
545 |
+
model_type: str = "transformers",
|
546 |
force: bool = False,
|
547 |
token: str = None,
|
548 |
+
pr_options: Dict = None,
|
549 |
+
) -> Generator[Tuple[str, Any], None, None]:
|
550 |
+
if pr_options is None:
|
551 |
+
pr_options = {}
|
552 |
+
|
553 |
info = api.model_info(model_id, token=token)
|
554 |
+
filenames = {s.rfilename for s in info.siblings}
|
555 |
requesting_user = api.whoami(token=token)["name"]
|
556 |
+
|
557 |
+
if not any(pr_options.values()):
|
558 |
+
yield "1", "β οΈ No option selected. Please choose at least one option."
|
559 |
+
return
|
560 |
+
|
561 |
+
if pr_options.get("create_custom_pr") and not pr_options.get("custom_repo_id"):
|
562 |
+
yield "1", "β οΈ Custom PR selected but no repository ID was provided."
|
563 |
+
return
|
564 |
+
|
565 |
+
yield "0", f"π Starting export process with options: {pr_options}..."
|
566 |
+
|
567 |
+
with TemporaryDirectory() as temp_dir:
|
568 |
+
export_folder = os.path.join(temp_dir, "export")
|
569 |
+
cache_mirror_dir = os.path.join(temp_dir, "cache_mirror")
|
570 |
+
os.makedirs(export_folder, exist_ok=True)
|
571 |
+
os.makedirs(cache_mirror_dir, exist_ok=True)
|
572 |
|
573 |
+
result_info = {}
|
574 |
+
|
|
|
|
|
|
|
575 |
try:
|
576 |
+
# --- Export Logic ---
|
577 |
+
export_gen = export_and_git_add(model_id, task_or_pipeline, model_type, export_folder, token=token)
|
578 |
+
operations = None
|
579 |
+
for message in export_gen:
|
580 |
+
if isinstance(message, tuple) and message[0] == "__RETURN__":
|
581 |
+
operations = message[1]
|
582 |
+
break
|
583 |
+
else:
|
584 |
+
yield "0", message
|
585 |
|
586 |
+
if not operations:
|
587 |
+
raise Exception("Export process did not produce any files to commit.")
|
588 |
+
|
589 |
+
# --- Cache Handling ---
|
590 |
+
cache_files_available = False
|
591 |
+
if pr_options.get("create_cache_pr"):
|
592 |
+
yield "0", "Checking for local cache files..."
|
593 |
+
local_cache_structure = get_local_cache_structure()
|
594 |
+
yield "0", f"ποΈ Found cache structure: {len(local_cache_structure)} neuronxcc folders"
|
595 |
+
|
596 |
+
if local_cache_structure:
|
597 |
+
cache_files_available = True
|
598 |
+
local_cache_base = "/var/tmp/neuron-compile-cache"
|
599 |
+
# Copy cache files to a temporary mirror directory for upload
|
600 |
+
shutil.copytree(local_cache_base, cache_mirror_dir, dirs_exist_ok=True)
|
601 |
+
yield "0", "Copied cache files to a temporary location for upload."
|
602 |
+
|
603 |
+
# --- New Repository Creation (Replaces Model PR) ---
|
604 |
+
if pr_options.get("create_neuron_repo"):
|
605 |
+
yield "0", "ποΈ Creating new Neuron-optimized repository..."
|
606 |
+
neuron_repo_url = None
|
607 |
+
# Generate the repo name first so we can use it consistently
|
608 |
+
neuron_repo_name = generate_neuron_repo_name(api, model_id, task_or_pipeline, token)
|
609 |
+
|
610 |
+
repo_creation_gen = create_neuron_repo_and_upload(
|
611 |
+
operations, model_id, model_type, task_or_pipeline, requesting_user, token
|
612 |
)
|
|
|
|
|
613 |
|
614 |
+
for msg in repo_creation_gen:
|
615 |
+
if isinstance(msg, str):
|
616 |
+
yield "0", msg
|
617 |
+
else:
|
618 |
+
neuron_repo_url = msg
|
619 |
|
620 |
+
result_info["neuron_repo"] = f"https://huggingface.co/{neuron_repo_name}"
|
|
|
|
|
|
|
|
|
|
|
|
|
621 |
|
622 |
+
# Automatically create a PR on the original model to add a link
|
623 |
+
yield "0", "π Creating PR to add Neuron repo link to original model..."
|
624 |
+
readme_pr = None
|
625 |
+
readme_pr_gen = create_readme_pr_for_original_model(
|
626 |
+
model_id, neuron_repo_name, task_or_pipeline, requesting_user, token
|
|
|
|
|
627 |
)
|
628 |
+
for msg in readme_pr_gen:
|
629 |
+
if isinstance(msg, str):
|
630 |
+
yield "0", msg
|
631 |
+
else:
|
632 |
+
readme_pr = msg
|
633 |
|
634 |
+
if readme_pr:
|
635 |
+
result_info["readme_pr"] = f"https://huggingface.co/{model_id}/discussions/{readme_pr.pr_num}"
|
636 |
+
|
637 |
+
# --- Cache Repository PR ---
|
638 |
+
if pr_options.get("create_cache_pr"):
|
639 |
+
if cache_files_available:
|
640 |
+
yield "0", "π€ Creating PR in cache repository..."
|
641 |
+
cache_pr = None
|
642 |
+
cache_upload_gen = upload_cache_files(cache_mirror_dir, CACHE_REPO_ID, token)
|
643 |
+
for msg in cache_upload_gen:
|
644 |
+
if isinstance(msg, str):
|
645 |
+
yield "0", msg
|
646 |
+
else:
|
647 |
+
cache_pr = msg
|
648 |
+
if cache_pr:
|
649 |
+
result_info["cache_pr"] = f"https://huggingface.co/{CACHE_REPO_ID}/discussions/{cache_pr.pr_num}"
|
650 |
+
else:
|
651 |
+
yield "0", "β οΈ No new cache files were generated to upload."
|
652 |
+
|
653 |
+
# --- Custom Repository PR ---
|
654 |
+
if pr_options.get("create_custom_pr"):
|
655 |
+
custom_repo_id = pr_options["custom_repo_id"]
|
656 |
+
yield "0", f"π€ Creating PR in custom repository: {custom_repo_id}..."
|
657 |
+
custom_pr = None
|
658 |
+
custom_upload_gen = upload_to_custom_repo(operations, custom_repo_id, model_id, requesting_user, token)
|
659 |
+
for msg in custom_upload_gen:
|
660 |
+
if isinstance(msg, str):
|
661 |
+
yield "0", msg
|
662 |
+
else:
|
663 |
+
custom_pr = msg
|
664 |
+
if custom_pr:
|
665 |
+
result_info["custom_pr"] = f"https://huggingface.co/{custom_repo_id}/discussions/{custom_pr.pr_num}"
|
666 |
|
667 |
+
yield "0", result_info
|
668 |
|
669 |
+
except Exception as e:
|
670 |
+
yield "1", f"β Conversion failed with a critical error: {e}"
|
671 |
+
# Re-raise the exception to be caught by the outer try-except in the Gradio app if needed
|
672 |
+
raise
|
673 |
+
|
674 |
+
def list_cached_models(cache_repo_id: str, token: str = None) -> Dict[str, List[str]]:
|
675 |
+
"""
|
676 |
+
List all cached neuronxcc folders in the repository.
|
677 |
+
"""
|
678 |
+
try:
|
679 |
+
api = HfApi(token=token)
|
680 |
+
repo_files = api.list_repo_files(cache_repo_id, token=token)
|
681 |
+
|
682 |
+
# Group files by neuronxcc folder
|
683 |
+
neuronxcc_cache = {}
|
684 |
+
for file_path in repo_files:
|
685 |
+
# Extract neuronxcc folder from path
|
686 |
+
parts = file_path.split('/')
|
687 |
+
if len(parts) >= 3 and parts[0].startswith('neuronxcc-'):
|
688 |
+
neuronxcc_folder = parts[0]
|
689 |
+
module_folder = parts[1]
|
690 |
+
|
691 |
+
if neuronxcc_folder not in neuronxcc_cache:
|
692 |
+
neuronxcc_cache[neuronxcc_folder] = set()
|
693 |
+
neuronxcc_cache[neuronxcc_folder].add(module_folder)
|
694 |
+
|
695 |
+
# Convert sets to lists
|
696 |
+
return {k: list(v) for k, v in neuronxcc_cache.items()}
|
697 |
+
|
698 |
+
except Exception as e:
|
699 |
+
print(f"Failed to list cached models: {e}")
|
700 |
+
return {}
|