Update app.py
Browse files
app.py
CHANGED
@@ -12,6 +12,7 @@ import mlx_vlm
|
|
12 |
import mlx.core as mx
|
13 |
from safetensors import safe_open
|
14 |
|
|
|
15 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
16 |
os.environ["HF_HUB_CACHE"] = "cache"
|
17 |
os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
|
@@ -117,36 +118,14 @@ def modify_tensors(mlx_path: str) -> None:
|
|
117 |
logging.error(f"Failed to save modified {weights_file}: {e}")
|
118 |
logging.info("Tensor modification complete.")
|
119 |
|
120 |
-
def
|
121 |
-
"""
|
122 |
-
Obtiene la lista de modelos del usuario autenticado mediante su token.
|
123 |
-
"""
|
124 |
-
if oauth_token is None or oauth_token.token is None:
|
125 |
-
return []
|
126 |
-
try:
|
127 |
-
user_info = whoami(oauth_token.token)
|
128 |
-
username = user_info["name"]
|
129 |
-
api = HfApi(token=oauth_token.token)
|
130 |
-
models = api.list_models(author=username)
|
131 |
-
# Devuelve la lista de model IDs
|
132 |
-
return [model.modelId for model in models]
|
133 |
-
except Exception as e:
|
134 |
-
print(f"Error fetching user models: {e}")
|
135 |
-
return []
|
136 |
-
|
137 |
-
def process_model(model_id, q_method, oauth_token: gr.OAuthToken, user_model):
|
138 |
-
"""
|
139 |
-
Procesa el modelo a convertir.
|
140 |
-
|
141 |
-
Si se selecciona un modelo en 'My Models', se usa ese valor; de lo contrario, se utiliza el modelo obtenido desde el buscador.
|
142 |
-
"""
|
143 |
if oauth_token is None or oauth_token.token is None:
|
144 |
return "You must be logged in to use MLX-my-repo", "error.png"
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
model_name =
|
150 |
try:
|
151 |
username = whoami(oauth_token.token)["name"]
|
152 |
except Exception as e:
|
@@ -157,29 +136,29 @@ def process_model(model_id, q_method, oauth_token: gr.OAuthToken, user_model):
|
|
157 |
if q_method == "FP16":
|
158 |
upload_repo = f"{username}/{model_name}-mlx-fp16"
|
159 |
try:
|
160 |
-
mlx_lm.convert(
|
161 |
except Exception as e1:
|
162 |
try:
|
163 |
-
mlx_vlm.convert(
|
164 |
except Exception as e2:
|
165 |
-
mlx_lm.convert(
|
166 |
else:
|
167 |
q_bits = QUANT_PARAMS[q_method]
|
168 |
upload_repo = f"{username}/{model_name}-mlx-{q_bits}Bit"
|
169 |
try:
|
170 |
-
mlx_lm.convert(
|
171 |
except Exception as e1:
|
172 |
try:
|
173 |
-
mlx_vlm.convert(
|
174 |
except Exception as e2:
|
175 |
-
mlx_lm.convert(
|
176 |
try:
|
177 |
modify_tensors(mlx_path)
|
178 |
except FileNotFoundError as e:
|
179 |
return f"Error modifying tensors: {e}", "error.png"
|
180 |
except Exception as e:
|
181 |
return f"Error during tensor modification: {e}", "error.png"
|
182 |
-
upload_to_hub(path=mlx_path, upload_repo=upload_repo, hf_path=
|
183 |
return (f'Find your repo <a href="https://huggingface.co/{upload_repo}" target="_blank" style="text-decoration:underline">here</a>', "llama.png")
|
184 |
except Exception as e:
|
185 |
return f"Error: {e}", "error.png"
|
@@ -190,33 +169,11 @@ def process_model(model_id, q_method, oauth_token: gr.OAuthToken, user_model):
|
|
190 |
css = """.gradio-container { overflow-y: auto; }"""
|
191 |
with gr.Blocks(css=css) as demo:
|
192 |
gr.Markdown("You must be logged in to use MLX-my-repo.")
|
193 |
-
#
|
194 |
-
|
195 |
-
|
196 |
-
# Botón y dropdown para cargar los modelos del usuario
|
197 |
-
load_button = gr.Button("Load My Models")
|
198 |
-
user_models = gr.Dropdown(choices=[], label="My Models", info="List of your Hugging Face models", interactive=True)
|
199 |
-
load_button.click(fn=get_user_models, inputs=token_input, outputs=user_models)
|
200 |
-
|
201 |
-
# Buscador de modelos en Hugging Face
|
202 |
model_id = HuggingfaceHubSearch(label="Hub Model ID", placeholder="Search for model id on Huggingface", search_type="model")
|
203 |
-
q_method = gr.Dropdown(
|
204 |
-
|
205 |
-
label="Conversion Method",
|
206 |
-
info="MLX conversion type (FP16 for float16, Q2–Q8 for quantized models)",
|
207 |
-
value="Q4", filterable=False, visible=True
|
208 |
-
)
|
209 |
-
|
210 |
-
# La función process_model ahora recibe:
|
211 |
-
# model_id (buscador), q_method, token y user_models (dropdown)
|
212 |
-
iface = gr.Interface(
|
213 |
-
fn=process_model,
|
214 |
-
inputs=[model_id, q_method, token_input, user_models],
|
215 |
-
outputs=[gr.Markdown(label="output"), gr.Image(show_label=False)],
|
216 |
-
title="Create your own MLX Models, blazingly fast ⚡!",
|
217 |
-
description="The space takes an HF repo as an input, converts it to MLX format (FP16 or quantized), and creates a Public/Private repo under your HF user namespace.",
|
218 |
-
api_name=False
|
219 |
-
)
|
220 |
|
221 |
def restart_space():
|
222 |
try:
|
|
|
12 |
import mlx.core as mx
|
13 |
from safetensors import safe_open
|
14 |
|
15 |
+
# Se utiliza el token de HF para el cache inicial (si se tiene configurado)
|
16 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
17 |
os.environ["HF_HUB_CACHE"] = "cache"
|
18 |
os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
|
|
|
118 |
logging.error(f"Failed to save modified {weights_file}: {e}")
|
119 |
logging.info("Tensor modification complete.")
|
120 |
|
121 |
+
def process_model(model_id, q_method, oauth_token: gr.OAuthToken | None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
if oauth_token is None or oauth_token.token is None:
|
123 |
return "You must be logged in to use MLX-my-repo", "error.png"
|
124 |
+
|
125 |
+
# Se actualiza la variable de entorno HF_TOKEN para que al descargar modelos gated se use la key del perfil
|
126 |
+
os.environ["HF_TOKEN"] = oauth_token.token
|
127 |
+
|
128 |
+
model_name = model_id.split("/")[-1]
|
129 |
try:
|
130 |
username = whoami(oauth_token.token)["name"]
|
131 |
except Exception as e:
|
|
|
136 |
if q_method == "FP16":
|
137 |
upload_repo = f"{username}/{model_name}-mlx-fp16"
|
138 |
try:
|
139 |
+
mlx_lm.convert(model_id, mlx_path=mlx_path, quantize=False, dtype="float16")
|
140 |
except Exception as e1:
|
141 |
try:
|
142 |
+
mlx_vlm.convert(model_id, mlx_path=mlx_path, quantize=False, dtype="float16")
|
143 |
except Exception as e2:
|
144 |
+
mlx_lm.convert(model_id, mlx_path=mlx_path, quantize=False, dtype="float16")
|
145 |
else:
|
146 |
q_bits = QUANT_PARAMS[q_method]
|
147 |
upload_repo = f"{username}/{model_name}-mlx-{q_bits}Bit"
|
148 |
try:
|
149 |
+
mlx_lm.convert(model_id, mlx_path=mlx_path, quantize=True, q_bits=q_bits)
|
150 |
except Exception as e1:
|
151 |
try:
|
152 |
+
mlx_vlm.convert(model_id, mlx_path=mlx_path, quantize=True, q_bits=q_bits)
|
153 |
except Exception as e2:
|
154 |
+
mlx_lm.convert(model_id, mlx_path=mlx_path, quantize=True, q_bits=q_bits)
|
155 |
try:
|
156 |
modify_tensors(mlx_path)
|
157 |
except FileNotFoundError as e:
|
158 |
return f"Error modifying tensors: {e}", "error.png"
|
159 |
except Exception as e:
|
160 |
return f"Error during tensor modification: {e}", "error.png"
|
161 |
+
upload_to_hub(path=mlx_path, upload_repo=upload_repo, hf_path=model_id, oauth_token=oauth_token)
|
162 |
return (f'Find your repo <a href="https://huggingface.co/{upload_repo}" target="_blank" style="text-decoration:underline">here</a>', "llama.png")
|
163 |
except Exception as e:
|
164 |
return f"Error: {e}", "error.png"
|
|
|
169 |
css = """.gradio-container { overflow-y: auto; }"""
|
170 |
with gr.Blocks(css=css) as demo:
|
171 |
gr.Markdown("You must be logged in to use MLX-my-repo.")
|
172 |
+
# Se utiliza el LoginButton para obtener el token del usuario, el cual se usará para acceder a modelos gated
|
173 |
+
gr.LoginButton(min_width=250)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
model_id = HuggingfaceHubSearch(label="Hub Model ID", placeholder="Search for model id on Huggingface", search_type="model")
|
175 |
+
q_method = gr.Dropdown(["FP16", "Q2", "Q3", "Q4", "Q6", "Q8"], label="Conversion Method", info="MLX conversion type (FP16 for float16, Q2–Q8 for quantized models)", value="Q4", filterable=False, visible=True)
|
176 |
+
iface = gr.Interface(fn=process_model, inputs=[model_id, q_method], outputs=[gr.Markdown(label="output"), gr.Image(show_label=False)], title="Create your own MLX Models, blazingly fast ⚡!", description="The space takes an HF repo as an input, converts it to MLX format (FP16 or quantized), and creates a Public/Private repo under your HF user namespace.", api_name=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
|
178 |
def restart_space():
|
179 |
try:
|