badaoui HF Staff commited on
Commit
900a193
Β·
verified Β·
1 Parent(s): 23e1414

Update optimum_neuron_export.py

Browse files
Files changed (1) hide show
  1. optimum_neuron_export.py +634 -119
optimum_neuron_export.py CHANGED
@@ -1,32 +1,113 @@
 
1
  import os
2
  import shutil
3
- from tempfile import TemporaryDirectory
4
- from typing import List, Optional, Tuple, Dict, Any
 
5
  from huggingface_hub import (
6
  CommitOperationAdd,
7
  HfApi,
8
  ModelCard,
9
  Discussion,
10
  CommitInfo,
 
 
11
  )
12
  from huggingface_hub.file_download import repo_folder_name
13
- from optimum.exporters.neuron import main_export
14
  from optimum.exporters.tasks import TasksManager
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  SPACES_URL = "https://huggingface.co/spaces/optimum/neuron-export"
 
17
 
18
- def get_default_compiler_kwargs() -> Dict[str, Any]:
19
- """
20
- Get default compiler kwargs for neuron export.
21
- Based on infer_compiler_kwargs function from the original code.
22
- """
23
- return {
24
- "auto_cast": None, # Default to None (equivalent to "none")
25
- "auto_cast_type": None,
26
- # Add other compiler-specific kwargs if needed
27
- # "disable_fast_relayout": False, # Only available for certain compilers
28
- # "disable_fallback": False, # Only available for certain compilers
29
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  def previous_pr(api: "HfApi", model_id: str, pr_title: str) -> Optional["Discussion"]:
32
  try:
@@ -42,144 +123,578 @@ def previous_pr(api: "HfApi", model_id: str, pr_title: str) -> Optional["Discuss
42
  return discussion
43
  return None
44
 
45
- def export_and_git_add(model_id: str, task: str, folder: str, token: str) -> List:
46
- # Get default compiler kwargs
47
- compiler_kwargs = get_default_compiler_kwargs()
 
 
 
 
48
 
49
- # Infer task if it's "auto"
50
- if task == "auto":
51
- try:
52
- task = TasksManager.infer_task_from_model(model_id)
53
- except Exception as e:
54
- raise Exception(f"Could not infer task for model {model_id}: {e}")
55
 
56
- print(f"Exporting model {model_id} with task: {task}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
 
 
 
 
 
 
 
58
  try:
59
- # Call main_export with all required parameters
60
- main_export(
61
- model_name_or_path=model_id,
62
- output=folder,
63
- compiler_kwargs=compiler_kwargs,
64
- task=task,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  token=token,
66
- # Add other commonly needed parameters with sensible defaults
67
- torch_dtype=None, # Let it use model's default
68
- dynamic_batch_size=False,
69
- do_validation=False, # Disable validation for now to avoid issues
70
- trust_remote_code=False,
71
- force_download=False,
72
- local_files_only=False,
73
- # Default optimization level (O2 is the default from original code)
74
- optlevel="2",
75
- # Other defaults
76
- tensor_parallel_size=1,
77
- disable_neuron_cache=False,
78
- inline_weights_to_neff=True,
79
- output_attentions=False,
80
- output_hidden_states=False,
81
- # Add input shapes for common models
82
- batch_size=1,
83
- sequence_length=128,
84
  )
85
- print(f"Export completed successfully")
 
86
  except Exception as e:
87
- print(f"Export failed with error: {e}")
88
  raise
 
 
 
 
 
 
 
89
 
90
- operations = [
91
- CommitOperationAdd(
92
- path_in_repo=os.path.join("neuron", file_name),
93
- path_or_fileobj=os.path.join(folder, file_name),
94
- )
95
- for file_name in os.listdir(folder)
96
- if os.path.isfile(os.path.join(folder, file_name)) # Only add files, not directories
97
- ]
98
-
99
  try:
100
  card = ModelCard.load(model_id, token=token)
101
- if card.data.tags is None:
102
  card.data.tags = []
103
  if "neuron" not in card.data.tags:
104
  card.data.tags.append("neuron")
105
- card.save(os.path.join(folder, "README.md"))
106
- operations.append(
107
- CommitOperationAdd(
108
- path_in_repo="README.md",
109
- path_or_fileobj=os.path.join(folder, "README.md")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  )
 
 
 
 
 
 
 
112
  except Exception as e:
113
- print(f"Warning: Could not update model card: {e}")
114
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
- return operations
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
  def convert(
119
  api: "HfApi",
120
  model_id: str,
121
- task: str,
 
122
  force: bool = False,
123
  token: str = None,
124
- ) -> Tuple[str, "CommitInfo"]:
125
- pr_title = "Adding Neuron-optimized model files"
 
 
 
126
  info = api.model_info(model_id, token=token)
127
- filenames = set(s.rfilename for s in info.siblings)
128
  requesting_user = api.whoami(token=token)["name"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
- with TemporaryDirectory() as d:
131
- folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
132
- os.makedirs(folder, exist_ok=True)
133
- new_pr = None
134
-
135
  try:
136
- pr = previous_pr(api, model_id, pr_title)
 
 
 
 
 
 
 
 
137
 
138
- if any(fname.startswith("neuron/") for fname in filenames) and not force:
139
- raise Exception(
140
- f"Model {model_id} already has Neuron files, skipping export."
141
- )
142
- elif pr is not None and not force:
143
- url = f"https://huggingface.co/{model_id}/discussions/{pr.num}"
144
- new_pr = pr
145
- raise Exception(
146
- f"Model {model_id} already has an open PR: [{url}]({url})"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  )
148
- else:
149
- operations = export_and_git_add(model_id, task, folder, token=token)
150
 
151
- if not operations:
152
- raise Exception("No files were generated during export")
 
 
 
153
 
154
- commit_description = f"""
155
- πŸ€– Neuron Export Bot: On behalf of [{requesting_user}](https://huggingface.co/{requesting_user}), adding AWS Neuron-optimized model files.
156
-
157
- Neuron-optimized models can achieve high-performance inference on AWS Inferentia and Trainium chips. Learn more:
158
- - [AWS Neuron Documentation](https://awsdocs-neuron.readthedocs-hosted.com)
159
- - [πŸ€— Optimum Neuron Guide](https://huggingface.co/docs/optimum-neuron/index)
160
- """
161
 
162
- new_pr = api.create_commit(
163
- repo_id=model_id,
164
- operations=operations,
165
- commit_message=pr_title,
166
- commit_description=commit_description,
167
- create_pr=True,
168
- token=token,
169
  )
 
 
 
 
 
170
 
171
- except Exception as e:
172
- # Clean up folder before re-raising
173
- if os.path.exists(folder):
174
- shutil.rmtree(folder, ignore_errors=True)
175
- print(f"Conversion failed with error: {e}") # Print the actual error
176
- return "1", str(e) # Return error code and message
177
-
178
- finally:
179
- # Ensure cleanup
180
- if os.path.exists(folder):
181
- shutil.rmtree(folder, ignore_errors=True)
182
-
183
- return "0", new_pr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
 
185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
  import os
3
  import shutil
4
+ from tempfile import TemporaryDirectory, NamedTemporaryFile
5
+ from typing import List, Union, Optional, Tuple, Dict, Any, Generator
6
+ from pathlib import Path
7
  from huggingface_hub import (
8
  CommitOperationAdd,
9
  HfApi,
10
  ModelCard,
11
  Discussion,
12
  CommitInfo,
13
+ create_repo,
14
+ RepoUrl,
15
  )
16
  from huggingface_hub.file_download import repo_folder_name
 
17
  from optimum.exporters.tasks import TasksManager
18
+ from optimum.exporters.neuron.model_configs import *
19
+ from optimum.neuron import (
20
+ NeuronModelForFeatureExtraction,
21
+ NeuronModelForSentenceTransformers,
22
+ NeuronModelForMaskedLM,
23
+ NeuronModelForQuestionAnswering,
24
+ NeuronModelForSequenceClassification,
25
+ NeuronModelForTokenClassification,
26
+ NeuronModelForMultipleChoice,
27
+ NeuronModelForImageClassification,
28
+ NeuronModelForSemanticSegmentation,
29
+ NeuronModelForObjectDetection,
30
+ NeuronModelForAudioClassification,
31
+ NeuronModelForAudioFrameClassification,
32
+ NeuronModelForCTC,
33
+ NeuronModelForXVector,
34
+ NeuronModelForCausalLM,
35
+ NeuronModelForConditionalGeneration,
36
+ )
37
+ from optimum.neuron import (
38
+ NeuronDiffusionPipelineBase,
39
+ NeuronStableDiffusionPipeline,
40
+ NeuronStableDiffusionImg2ImgPipeline,
41
+ NeuronStableDiffusionInpaintPipeline,
42
+ NeuronStableDiffusionInstructPix2PixPipeline,
43
+ NeuronLatentConsistencyModelPipeline,
44
+ NeuronStableDiffusionXLPipeline,
45
+ NeuronStableDiffusionXLImg2ImgPipeline,
46
+ NeuronStableDiffusionXLInpaintPipeline,
47
+ NeuronStableDiffusionControlNetPipeline,
48
+ NeuronStableDiffusionXLControlNetPipeline,
49
+ NeuronPixArtAlphaPipeline,
50
+ NeuronPixArtSigmaPipeline,
51
+ NeuronFluxPipeline
52
+ )
53
+ from optimum.neuron.cache.entries.cache_entry import ModelCacheEntry
54
 
55
  SPACES_URL = "https://huggingface.co/spaces/optimum/neuron-export"
56
+ CACHE_REPO_ID = "badaoui/optimum-neuron_compile-cache"
57
 
58
+ # Task to NeuronModel mapping for transformers
59
+ TASK_TO_MODEL_CLASS = {
60
+ "feature-extraction": NeuronModelForFeatureExtraction,
61
+ "sentence-transformers": NeuronModelForSentenceTransformers,
62
+ "fill-mask": NeuronModelForMaskedLM,
63
+ "question-answering": NeuronModelForQuestionAnswering,
64
+ "text-classification": NeuronModelForSequenceClassification,
65
+ "token-classification": NeuronModelForTokenClassification,
66
+ "multiple-choice": NeuronModelForMultipleChoice,
67
+ "image-classification": NeuronModelForImageClassification,
68
+ "semantic-segmentation": NeuronModelForSemanticSegmentation,
69
+ "object-detection": NeuronModelForObjectDetection,
70
+ "audio-classification": NeuronModelForAudioClassification,
71
+ "audio-frame-classification": NeuronModelForAudioFrameClassification,
72
+ "automatic-speech-recognition": NeuronModelForCTC,
73
+ "audio-xvector": NeuronModelForXVector,
74
+ "text-generation": NeuronModelForCausalLM,
75
+ "text2text-generation": NeuronModelForSeq2SeqLM,
76
+ }
77
+
78
+ # Diffusion pipeline mapping
79
+ DIFFUSION_PIPELINE_MAPPING = {
80
+ "text-to-image": NeuronStableDiffusionPipeline,
81
+ "image-to-image": NeuronStableDiffusionImg2ImgPipeline,
82
+ "inpaint": NeuronStableDiffusionInpaintPipeline,
83
+ "instruct-pix2pix": NeuronStableDiffusionInstructPix2PixPipeline,
84
+ "latent-consistency": NeuronLatentConsistencyModelPipeline,
85
+ "stable_diffusion": NeuronStableDiffusionPipeline,
86
+ "stable-diffusion-xl": NeuronStableDiffusionXLPipeline,
87
+ "stable-diffusion-xl-img2img": NeuronStableDiffusionXLImg2ImgPipeline,
88
+ "stable-diffusion-xl-inpaint": NeuronStableDiffusionXLInpaintPipeline,
89
+ "controlnet": NeuronStableDiffusionControlNetPipeline,
90
+ "controlnet-xl": NeuronStableDiffusionXLControlNetPipeline,
91
+ "pixart-alpha": NeuronPixArtAlphaPipeline,
92
+ "pixart-sigma": NeuronPixArtSigmaPipeline,
93
+ "flux": NeuronFluxPipeline,
94
+ }
95
+
96
+ def get_default_input_shapes(task_or_pipeline: str) -> Dict[str, int]:
97
+ """Get default input shapes based on task type or diffusion pipeline type."""
98
+ if task_or_pipeline in ["feature-extraction", "sentence-transformers", "fill-mask", "question-answering", "text-classification", "token-classification","text-generation","text2text-generation"]:
99
+ return {"batch_size": 1, "sequence_length": 128}
100
+ elif task_or_pipeline == "multiple-choice":
101
+ return {"batch_size": 1, "num_choices": 4, "sequence_length": 128}
102
+ elif task_or_pipeline in ["image-classification", "semantic-segmentation", "object-detection"]:
103
+ return {"batch_size": 1, "num_channels": 3, "height": 224, "width": 224}
104
+ elif task_or_pipeline in ["audio-classification", "audio-frame-classification", "automatic-speech-recognition", "audio-xvector"]:
105
+ return {"batch_size": 1, "audio_sequence_length": 16000}
106
+ elif task_or_pipeline in DIFFUSION_PIPELINE_MAPPING:
107
+ return {"batch_size": 1, "height": 1024, "width": 1024, "num_images_per_prompt": 1}
108
+ else:
109
+ # Default to text-based shapes
110
+ return {"batch_size": 1, "sequence_length": 128}
111
 
112
  def previous_pr(api: "HfApi", model_id: str, pr_title: str) -> Optional["Discussion"]:
113
  try:
 
123
  return discussion
124
  return None
125
 
126
+
127
+ def get_local_cache_structure(local_cache_base: str = "/var/tmp/neuron-compile-cache") -> Dict[str, List[str]]:
128
+ """
129
+ Get the structure of the local Neuron cache to preserve it in the hub.
130
+ Returns a dict mapping neuronxcc folders to their MODULE folders.
131
+ """
132
+ cache_structure = {}
133
 
134
+ if not os.path.exists(local_cache_base):
135
+ return cache_structure
 
 
 
 
136
 
137
+ try:
138
+ for item in os.listdir(local_cache_base):
139
+ item_path = os.path.join(local_cache_base, item)
140
+ if os.path.isdir(item_path) and item.startswith('neuronxcc-'):
141
+ modules = []
142
+ for subitem in os.listdir(item_path):
143
+ subitem_path = os.path.join(item_path, subitem)
144
+ if os.path.isdir(subitem_path) and subitem.startswith('MODULE_'):
145
+ modules.append(subitem)
146
+
147
+ if modules:
148
+ cache_structure[item] = modules
149
+
150
+ except Exception as e:
151
+ print(f"Warning: Could not read local cache structure: {e}")
152
 
153
+ return cache_structure
154
+
155
+ def upload_cache_files(cache_dir: str, cache_repo_id: str, token: str) -> Generator[Union[str, CommitInfo], None, None]:
156
+ """
157
+ Upload cache files to the cache repository and create PR.
158
+ This is a generator function.
159
+ """
160
  try:
161
+ api = HfApi(token=token)
162
+
163
+ # Create cache operations
164
+ cache_operations = []
165
+ for root, _, files in os.walk(cache_dir):
166
+ for file in files:
167
+ file_path = os.path.join(root, file)
168
+ rel_path = os.path.relpath(file_path, cache_dir)
169
+ cache_operations.append(
170
+ CommitOperationAdd(
171
+ path_in_repo=rel_path,
172
+ path_or_fileobj=file_path,
173
+ )
174
+ )
175
+
176
+ yield f"πŸ“€ Found {len(cache_operations)} cache files to upload."
177
+
178
+ if cache_operations:
179
+ # Create PR in cache repository
180
+ cache_pr_title = f"Add Neuron cache for {os.path.basename(cache_dir)}"
181
+ cache_commit_description = """
182
+ πŸ€– Neuron Cache Bot: Adding compiled Neuron cache artifacts.
183
+
184
+ This PR contains the compiled neuronxcc cache files that can be used to speed up model loading for AWS Neuron devices.
185
+ """
186
+
187
+ cache_pr = api.create_commit(
188
+ repo_id=cache_repo_id,
189
+ operations=cache_operations,
190
+ commit_message=cache_pr_title,
191
+ commit_description=cache_commit_description,
192
+ create_pr=True,
193
+ token=token,
194
+ )
195
+
196
+ yield f"βœ… Cache PR created successfully: https://huggingface.co/{cache_repo_id}/discussions/{cache_pr.pr_num}"
197
+ # Yield the final PR object so the caller can use it
198
+ yield cache_pr
199
+ else:
200
+ yield "⚠️ No cache files found to upload."
201
+ yield None
202
+
203
+ except Exception as e:
204
+ yield f"❌ Cache upload failed: {e}"
205
+ raise
206
+
207
+ def export_and_git_add(model_id: str, task_or_pipeline: str, model_type: str, folder: str, token: str) -> Any:
208
+ if task_or_pipeline == "auto":
209
+ try:
210
+ task_or_pipeline = TasksManager.infer_task_from_model(model_id)
211
+ except Exception as e:
212
+ raise Exception(f"❌ Could not infer task for model {model_id}: {e}")
213
+
214
+ yield f"πŸ“¦ Exporting model `{model_id}` for task `{task_or_pipeline}`..."
215
+
216
+ model_class = TASK_TO_MODEL_CLASS.get(task_or_pipeline) if model_type == "transformers" else DIFFUSION_PIPELINE_MAPPING.get(task_or_pipeline)
217
+ if model_class is None:
218
+ supported = list(TASK_TO_MODEL_CLASS.keys()) if model_type == "transformers" else list(DIFFUSION_PIPELINE_MAPPING.keys())
219
+ raise Exception(f"❌ Unsupported task/pipeline: {task_or_pipeline}. Supported: {supported}")
220
+
221
+ input_shapes = get_default_input_shapes(task_or_pipeline)
222
+ yield f"πŸ”§ Using input shapes: {input_shapes}"
223
+
224
+ try:
225
+ model = model_class.from_pretrained(
226
+ model_id,
227
+ torch_dtype=torch.bfloat16,
228
+ export=True,
229
  token=token,
230
+ tensor_parallel_size=4,
231
+ **input_shapes,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  )
233
+ model.save_pretrained(folder)
234
+ yield "βœ… Export completed successfully."
235
  except Exception as e:
236
+ yield f"❌ Export failed with error: {e}"
237
  raise
238
+
239
+ operations = []
240
+ for root, _, files in os.walk(folder):
241
+ for filename in files:
242
+ file_path = os.path.join(root, filename)
243
+ repo_path = os.path.relpath(file_path, folder)
244
+ operations.append(CommitOperationAdd(path_in_repo=repo_path, path_or_fileobj=file_path))
245
 
246
+ yield f"πŸ“ Found {len(operations)} files to upload"
247
+
 
 
 
 
 
 
 
248
  try:
249
  card = ModelCard.load(model_id, token=token)
250
+ if not hasattr(card.data, "tags") or card.data.tags is None:
251
  card.data.tags = []
252
  if "neuron" not in card.data.tags:
253
  card.data.tags.append("neuron")
254
+
255
+ readme_path = os.path.join(folder, "README.md")
256
+ card.save(readme_path)
257
+
258
+ # Check if README.md is already in operations, if so update, else add
259
+ readme_op = next((op for op in operations if op.path_in_repo == "README.md"), None)
260
+ if readme_op:
261
+ readme_op.path_or_fileobj = readme_path
262
+ else:
263
+ operations.append(CommitOperationAdd(path_in_repo="README.md", path_or_fileobj=readme_path))
264
+
265
+ except Exception as e:
266
+ yield f"⚠️ Warning: Could not update model card: {e}"
267
+
268
+ yield ("__RETURN__", operations)
269
+
270
+ def generate_neuron_repo_name(api, original_model_id: str, task_or_pipeline: str, token:str) -> str:
271
+ """Generate a name for the Neuron-optimized repository."""
272
+ # Replace 'Β©' with '-' and add neuron suffix
273
+ requesting_user = api.whoami(token=token)["name"]
274
+ base_name = original_model_id.replace('/', '-')
275
+ return f"{requesting_user}/{base_name}-neuron"
276
+
277
+ def create_neuron_repo_and_upload(
278
+ operations: List[CommitOperationAdd],
279
+ original_model_id: str,
280
+ model_type: str,
281
+ task_or_pipeline: str,
282
+ requesting_user: str,
283
+ token: str,
284
+ ) -> Generator[Union[str, RepoUrl], None, None]:
285
+ """
286
+ Creates a new repository with Neuron files and uploads them.
287
+ """
288
+ api = HfApi(token=token)
289
+
290
+ if task_or_pipeline == "auto":
291
+ try:
292
+ task_or_pipeline = TasksManager.infer_task_from_model(original_model_id)
293
+ except Exception as e:
294
+ raise Exception(f"❌ Could not infer task for model {original_model_id}: {e}")
295
+
296
+ # Generate repository name
297
+ neuron_repo_name = generate_neuron_repo_name(api, original_model_id, task_or_pipeline, token)
298
+
299
+ yield f"πŸ—οΈ Creating new repository: {neuron_repo_name}"
300
+
301
+ try:
302
+ # Create the repository
303
+ repo_url = create_repo(
304
+ repo_id=neuron_repo_name,
305
+ token=token,
306
+ repo_type="model",
307
+ private=False,
308
+ exist_ok=True,
309
+ )
310
+
311
+ yield f"βœ… Repository created: {repo_url}"
312
+
313
+ # Get the appropriate class name for the Python example
314
+ if model_type == "transformers":
315
+ model_class = TASK_TO_MODEL_CLASS.get(task_or_pipeline)
316
+ else:
317
+ model_class = DIFFUSION_PIPELINE_MAPPING.get(task_or_pipeline)
318
+
319
+ model_class_name = model_class.__name__ if model_class else "NeuronModel"
320
+
321
+ # Create enhanced model card for the Neuron repo
322
+ neuron_readme_content = f"""---
323
+ tags:
324
+ - neuron
325
+ - optimized
326
+ - aws-neuron
327
+ - {task_or_pipeline}
328
+ base_model: {original_model_id}
329
+ ---
330
+
331
+ # Neuron-Optimized {original_model_id}
332
+
333
+ This repository contains AWS Neuron-optimized files for [{original_model_id}](https://huggingface.co/{original_model_id}).
334
+
335
+ ## Model Details
336
+
337
+ - **Base Model**: [{original_model_id}](https://huggingface.co/{original_model_id})
338
+ - **Task**: {task_or_pipeline}
339
+ - **Optimization**: AWS Neuron compilation
340
+ - **Generated by**: [{requesting_user}](https://huggingface.co/{requesting_user})
341
+ - **Generated using**: [Optimum Neuron Compiler Space]({SPACES_URL})
342
+
343
+ ## Usage
344
+
345
+ This model has been optimized for AWS Neuron devices (Inferentia/Trainium). To use it:
346
+
347
+ ```python
348
+ from optimum.neuron import {model_class_name}
349
+
350
+ model = {model_class_name}.from_pretrained("{neuron_repo_name}")
351
+ ```
352
+
353
+ ## Performance
354
+
355
+ These files are pre-compiled for AWS Neuron devices and should provide improved inference performance compared to the original model when deployed on Inferentia or Trainium instances.
356
+
357
+ ## Original Model
358
+
359
+ For the original model, training details, and more information, please visit: [{original_model_id}](https://huggingface.co/{original_model_id})
360
+ """
361
+
362
+ # Update the README in operations
363
+ readme_op = next((op for op in operations if op.path_in_repo == "README.md"), None)
364
+ if readme_op:
365
+ # Create a temporary file with the new content
366
+ with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
367
+ f.write(neuron_readme_content)
368
+ readme_op.path_or_fileobj = f.name
369
+ else:
370
+ # Add new README operation
371
+ with NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
372
+ f.write(neuron_readme_content)
373
+ operations.append(CommitOperationAdd(path_in_repo="README.md", path_or_fileobj=f.name))
374
+
375
+ # Upload files to the new repository
376
+ commit_message = f"Add Neuron-optimized files for {original_model_id}"
377
+ commit_description = f"""
378
+ πŸ€– Neuron Export Bot: Adding AWS Neuron-optimized model files.
379
+
380
+ Original model: [{original_model_id}](https://huggingface.co/{original_model_id})
381
+ Task: {task_or_pipeline}
382
+ Generated by: [{requesting_user}](https://huggingface.co/{requesting_user})
383
+ Generated using: [Optimum Neuron Compiler Space]({SPACES_URL})
384
+
385
+ These files have been pre-compiled for AWS Neuron devices (Inferentia/Trainium) and should provide improved inference performance.
386
+ """
387
+
388
+ yield f"πŸ“€ Uploading {len(operations)} files to {neuron_repo_name}..."
389
+
390
+ commit_info = api.create_commit(
391
+ repo_id=neuron_repo_name,
392
+ operations=operations,
393
+ commit_message=commit_message,
394
+ commit_description=commit_description,
395
+ token=token,
396
+ )
397
+
398
+ yield f"βœ… Files uploaded successfully to: https://huggingface.co/{neuron_repo_name}"
399
+ yield repo_url
400
+
401
+ except Exception as e:
402
+ yield f"❌ Failed to create/upload to Neuron repository: {e}"
403
+ raise
404
+
405
+ def create_readme_pr_for_original_model(
406
+ original_model_id: str,
407
+ neuron_repo_name: str,
408
+ task_or_pipeline: str,
409
+ requesting_user: str,
410
+ token: str,
411
+ ) -> Generator[Union[str, CommitInfo], None, None]:
412
+ """
413
+ Creates a PR on the original model repository to add a link to the Neuron-optimized version.
414
+ """
415
+ api = HfApi(token=token)
416
+
417
+ yield f"πŸ“ Creating PR to add Neuron repo link in {original_model_id}..."
418
+
419
+ try:
420
+ # Check if there's already an open PR
421
+ pr_title = "Add link to Neuron-optimized version"
422
+ existing_pr = previous_pr(api, original_model_id, pr_title)
423
+
424
+ if existing_pr:
425
+ yield f"⚠️ PR already exists: https://huggingface.co/{original_model_id}/discussions/{existing_pr.num}"
426
+ return
427
+
428
+ # Get the current README
429
+ try:
430
+ current_readme_path = api.hf_hub_download(
431
+ repo_id=original_model_id,
432
+ filename="README.md",
433
+ token=token,
434
  )
435
+ with open(current_readme_path, 'r', encoding='utf-8') as f:
436
+ readme_content = f.read()
437
+ except Exception:
438
+ # If README doesn't exist, create a basic one
439
+ readme_content = f"# {original_model_id}\n\n"
440
+
441
+ # Add Neuron optimization section, separated by a horizontal rule
442
+ neuron_section = f"""
443
+ ---
444
+ ## πŸš€ AWS Neuron Optimized Version Available
445
+
446
+ A Neuron-optimized version of this model is available for improved performance on AWS Inferentia/Trainium instances:
447
+
448
+ **[{neuron_repo_name}](https://huggingface.co/{neuron_repo_name})**
449
+
450
+ The Neuron-optimized version provides:
451
+ - Pre-compiled artifacts for faster loading
452
+ - Optimized performance on AWS Neuron devices
453
+ - Same model capabilities with improved inference speed
454
+ """
455
+
456
+ # Append the Neuron section to the end of the README
457
+ updated_readme = readme_content.rstrip() + "\n" + neuron_section
458
+
459
+ # Create temporary file with updated README
460
+ with NamedTemporaryFile(mode='w', suffix='.md', delete=False, encoding="utf-8") as f:
461
+ f.write(updated_readme)
462
+ temp_readme_path = f.name
463
+
464
+ # Create the PR
465
+ operations = [CommitOperationAdd(path_in_repo="README.md", path_or_fileobj=temp_readme_path)]
466
+
467
+ commit_description = f"""
468
+ πŸ€– Neuron Export Bot: Adding link to Neuron-optimized version.
469
+
470
+ A Neuron-optimized version of this model has been created at [{neuron_repo_name}](https://huggingface.co/{neuron_repo_name}).
471
+
472
+ The optimized version provides improved performance on AWS Inferentia/Trainium instances with pre-compiled artifacts.
473
+
474
+ Generated by: [{requesting_user}](https://huggingface.co/{requesting_user})
475
+ Generated using: [Optimum Neuron Compiler Space]({SPACES_URL})
476
+ """
477
+
478
+ pr = api.create_commit(
479
+ repo_id=original_model_id,
480
+ operations=operations,
481
+ commit_message=pr_title,
482
+ commit_description=commit_description,
483
+ create_pr=True,
484
+ token=token,
485
  )
486
+
487
+ yield f"βœ… README PR created: https://huggingface.co/{original_model_id}/discussions/{pr.pr_num}"
488
+ yield pr
489
+
490
+ # Clean up temporary file
491
+ os.unlink(temp_readme_path)
492
+
493
  except Exception as e:
494
+ yield f"❌ Failed to create README PR: {e}"
495
+ raise
496
+
497
+ # --- Updated upload_to_custom_repo function (unchanged) ---
498
+ def upload_to_custom_repo(
499
+ operations: List[CommitOperationAdd],
500
+ custom_repo_id: str,
501
+ original_model_id: str,
502
+ requesting_user: str,
503
+ token: str,
504
+ ) -> Generator[Union[str, CommitInfo], None, None]:
505
+ """
506
+ Uploads neuron files to a custom repository and creates a PR.
507
+ """
508
+ yield f"πŸ“€ Preparing to upload to custom repo: {custom_repo_id}"
509
+ api = HfApi(token=token)
510
 
511
+ try:
512
+ # Ensure the custom repo exists
513
+ api.repo_info(repo_id=custom_repo_id, repo_type="model")
514
+ except Exception as e:
515
+ yield f"❌ Could not access custom repository `{custom_repo_id}`. Please ensure it exists and you have write access. Error: {e}"
516
+ raise
517
+
518
+ pr_title = f"Add Neuron-optimized files for {original_model_id}"
519
+ commit_description = f"""
520
+ πŸ€– Neuron Export Bot: On behalf of [{requesting_user}](https://huggingface.co/{requesting_user}), adding AWS Neuron-optimized model files for `{original_model_id}`.
521
+
522
+ These files were generated using the [Optimum Neuron Compiler Space](https://huggingface.co/spaces/optimum/neuron-export).
523
+ """
524
+
525
+ try:
526
+ custom_pr = api.create_commit(
527
+ repo_id=custom_repo_id,
528
+ operations=operations,
529
+ commit_message=pr_title,
530
+ commit_description=commit_description,
531
+ create_pr=True,
532
+ token=token,
533
+ )
534
+ yield f"βœ… Custom PR created successfully: https://huggingface.co/{custom_repo_id}/discussions/{custom_pr.pr_num}"
535
+ yield custom_pr
536
+
537
+ except Exception as e:
538
+ yield f"❌ Failed to create PR in custom repository: {e}"
539
+ raise
540
 
541
  def convert(
542
  api: "HfApi",
543
  model_id: str,
544
+ task_or_pipeline: str,
545
+ model_type: str = "transformers",
546
  force: bool = False,
547
  token: str = None,
548
+ pr_options: Dict = None,
549
+ ) -> Generator[Tuple[str, Any], None, None]:
550
+ if pr_options is None:
551
+ pr_options = {}
552
+
553
  info = api.model_info(model_id, token=token)
554
+ filenames = {s.rfilename for s in info.siblings}
555
  requesting_user = api.whoami(token=token)["name"]
556
+
557
+ if not any(pr_options.values()):
558
+ yield "1", "⚠️ No option selected. Please choose at least one option."
559
+ return
560
+
561
+ if pr_options.get("create_custom_pr") and not pr_options.get("custom_repo_id"):
562
+ yield "1", "⚠️ Custom PR selected but no repository ID was provided."
563
+ return
564
+
565
+ yield "0", f"πŸš€ Starting export process with options: {pr_options}..."
566
+
567
+ with TemporaryDirectory() as temp_dir:
568
+ export_folder = os.path.join(temp_dir, "export")
569
+ cache_mirror_dir = os.path.join(temp_dir, "cache_mirror")
570
+ os.makedirs(export_folder, exist_ok=True)
571
+ os.makedirs(cache_mirror_dir, exist_ok=True)
572
 
573
+ result_info = {}
574
+
 
 
 
575
  try:
576
+ # --- Export Logic ---
577
+ export_gen = export_and_git_add(model_id, task_or_pipeline, model_type, export_folder, token=token)
578
+ operations = None
579
+ for message in export_gen:
580
+ if isinstance(message, tuple) and message[0] == "__RETURN__":
581
+ operations = message[1]
582
+ break
583
+ else:
584
+ yield "0", message
585
 
586
+ if not operations:
587
+ raise Exception("Export process did not produce any files to commit.")
588
+
589
+ # --- Cache Handling ---
590
+ cache_files_available = False
591
+ if pr_options.get("create_cache_pr"):
592
+ yield "0", "Checking for local cache files..."
593
+ local_cache_structure = get_local_cache_structure()
594
+ yield "0", f"πŸ—‚οΈ Found cache structure: {len(local_cache_structure)} neuronxcc folders"
595
+
596
+ if local_cache_structure:
597
+ cache_files_available = True
598
+ local_cache_base = "/var/tmp/neuron-compile-cache"
599
+ # Copy cache files to a temporary mirror directory for upload
600
+ shutil.copytree(local_cache_base, cache_mirror_dir, dirs_exist_ok=True)
601
+ yield "0", "Copied cache files to a temporary location for upload."
602
+
603
+ # --- New Repository Creation (Replaces Model PR) ---
604
+ if pr_options.get("create_neuron_repo"):
605
+ yield "0", "πŸ—οΈ Creating new Neuron-optimized repository..."
606
+ neuron_repo_url = None
607
+ # Generate the repo name first so we can use it consistently
608
+ neuron_repo_name = generate_neuron_repo_name(api, model_id, task_or_pipeline, token)
609
+
610
+ repo_creation_gen = create_neuron_repo_and_upload(
611
+ operations, model_id, model_type, task_or_pipeline, requesting_user, token
612
  )
 
 
613
 
614
+ for msg in repo_creation_gen:
615
+ if isinstance(msg, str):
616
+ yield "0", msg
617
+ else:
618
+ neuron_repo_url = msg
619
 
620
+ result_info["neuron_repo"] = f"https://huggingface.co/{neuron_repo_name}"
 
 
 
 
 
 
621
 
622
+ # Automatically create a PR on the original model to add a link
623
+ yield "0", "πŸ“ Creating PR to add Neuron repo link to original model..."
624
+ readme_pr = None
625
+ readme_pr_gen = create_readme_pr_for_original_model(
626
+ model_id, neuron_repo_name, task_or_pipeline, requesting_user, token
 
 
627
  )
628
+ for msg in readme_pr_gen:
629
+ if isinstance(msg, str):
630
+ yield "0", msg
631
+ else:
632
+ readme_pr = msg
633
 
634
+ if readme_pr:
635
+ result_info["readme_pr"] = f"https://huggingface.co/{model_id}/discussions/{readme_pr.pr_num}"
636
+
637
+ # --- Cache Repository PR ---
638
+ if pr_options.get("create_cache_pr"):
639
+ if cache_files_available:
640
+ yield "0", "πŸ“€ Creating PR in cache repository..."
641
+ cache_pr = None
642
+ cache_upload_gen = upload_cache_files(cache_mirror_dir, CACHE_REPO_ID, token)
643
+ for msg in cache_upload_gen:
644
+ if isinstance(msg, str):
645
+ yield "0", msg
646
+ else:
647
+ cache_pr = msg
648
+ if cache_pr:
649
+ result_info["cache_pr"] = f"https://huggingface.co/{CACHE_REPO_ID}/discussions/{cache_pr.pr_num}"
650
+ else:
651
+ yield "0", "⚠️ No new cache files were generated to upload."
652
+
653
+ # --- Custom Repository PR ---
654
+ if pr_options.get("create_custom_pr"):
655
+ custom_repo_id = pr_options["custom_repo_id"]
656
+ yield "0", f"πŸ“€ Creating PR in custom repository: {custom_repo_id}..."
657
+ custom_pr = None
658
+ custom_upload_gen = upload_to_custom_repo(operations, custom_repo_id, model_id, requesting_user, token)
659
+ for msg in custom_upload_gen:
660
+ if isinstance(msg, str):
661
+ yield "0", msg
662
+ else:
663
+ custom_pr = msg
664
+ if custom_pr:
665
+ result_info["custom_pr"] = f"https://huggingface.co/{custom_repo_id}/discussions/{custom_pr.pr_num}"
666
 
667
+ yield "0", result_info
668
 
669
+ except Exception as e:
670
+ yield "1", f"❌ Conversion failed with a critical error: {e}"
671
+ # Re-raise the exception to be caught by the outer try-except in the Gradio app if needed
672
+ raise
673
+
674
+ def list_cached_models(cache_repo_id: str, token: str = None) -> Dict[str, List[str]]:
675
+ """
676
+ List all cached neuronxcc folders in the repository.
677
+ """
678
+ try:
679
+ api = HfApi(token=token)
680
+ repo_files = api.list_repo_files(cache_repo_id, token=token)
681
+
682
+ # Group files by neuronxcc folder
683
+ neuronxcc_cache = {}
684
+ for file_path in repo_files:
685
+ # Extract neuronxcc folder from path
686
+ parts = file_path.split('/')
687
+ if len(parts) >= 3 and parts[0].startswith('neuronxcc-'):
688
+ neuronxcc_folder = parts[0]
689
+ module_folder = parts[1]
690
+
691
+ if neuronxcc_folder not in neuronxcc_cache:
692
+ neuronxcc_cache[neuronxcc_folder] = set()
693
+ neuronxcc_cache[neuronxcc_folder].add(module_folder)
694
+
695
+ # Convert sets to lists
696
+ return {k: list(v) for k, v in neuronxcc_cache.items()}
697
+
698
+ except Exception as e:
699
+ print(f"Failed to list cached models: {e}")
700
+ return {}