Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,14 @@ from PIL import Image
|
|
8 |
import gradio as gr
|
9 |
from huggingface_hub import login, HfApi
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
def log_message(msg, level="info"):
|
12 |
global GRADIO_LOG
|
13 |
if level == "info":
|
@@ -18,14 +26,6 @@ def log_message(msg, level="info"):
|
|
18 |
logging.debug(msg)
|
19 |
GRADIO_LOG += msg + "\n"
|
20 |
|
21 |
-
GRADIO_LOG = ""
|
22 |
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
23 |
-
os.environ["CUDA_VISIBLE_DEVICES"] = ""
|
24 |
-
device = torch.device("cpu")
|
25 |
-
checkpoint_path = pathlib.Path("/tmp/gemma_pytorch_models/checkpoint.pth")
|
26 |
-
save_dir = pathlib.Path("/tmp/gemma_pytorch_models/")
|
27 |
-
save_dir.mkdir(exist_ok=True, parents=True)
|
28 |
-
|
29 |
def load_checkpoint(model):
|
30 |
if checkpoint_path.exists():
|
31 |
try:
|
@@ -351,94 +351,43 @@ def distillation_loss(student_logits, teacher_logits):
|
|
351 |
loss_hard = F.mse_loss(student_logits, teacher_logits)
|
352 |
return alpha * loss_soft + (1 - alpha) * loss_hard
|
353 |
|
354 |
-
optimizer = optim.Adam(student_model.parameters(), lr=1e-4)
|
355 |
-
student_model.train()
|
356 |
-
print("Inicio del entrenamiento por destilaci贸n")
|
357 |
-
for epoch in range(1):
|
358 |
-
print(f"脡poca {epoch+1}")
|
359 |
-
for text in tqdm([], desc="Entrenamiento destilaci贸n"):
|
360 |
-
optimizer.zero_grad()
|
361 |
-
inputs = student_tokenizer(text, return_tensors="pt", padding="max_length", truncation=True, max_length=16)
|
362 |
-
with torch.no_grad():
|
363 |
-
t1_inputs = teacher1_tokenizer(text=[text], return_tensors="pt", padding="max_length", truncation=True, max_length=16)
|
364 |
-
t2_inputs = teacher2_tokenizer(text=[text], return_tensors="pt", padding="max_length", truncation=True, max_length=16)
|
365 |
-
teacher1_out = teacher1_model(**t1_inputs, max_new_tokens=10)
|
366 |
-
teacher2_out = teacher2_model(**t2_inputs, max_new_tokens=10)
|
367 |
-
teacher_logits = (teacher1_out.logits + teacher2_out.logits) / 2.0
|
368 |
-
student_out = student_model(**inputs)
|
369 |
-
student_logits = student_out.logits
|
370 |
-
loss = distillation_loss(student_logits, teacher_logits)
|
371 |
-
loss.backward()
|
372 |
-
optimizer.step()
|
373 |
-
print(f"Texto: {text} | Loss: {loss.item():.6f}")
|
374 |
-
print("Entrenamiento completado")
|
375 |
-
save_checkpoint(student_model)
|
376 |
-
print("Aplicando cuantizaci贸n din谩mica al modelo student")
|
377 |
-
student_quantized = torch.quantization.quantize_dynamic(student_model, {nn.Linear}, dtype=torch.qint8)
|
378 |
-
def save_model_state(model, filename):
|
379 |
-
state_dict = model.state_dict()
|
380 |
-
np_state = {k: v.cpu().numpy() for k, v in state_dict.items()}
|
381 |
-
save_file(np_state, str(filename))
|
382 |
-
for _ in tqdm(range(1), desc="Guardando modelos"):
|
383 |
-
pass
|
384 |
-
teacher1_file = save_dir / "gemma_teacher_model_quant.safetensors"
|
385 |
-
teacher2_file = save_dir / "llama_teacher_model_quant.safetensors"
|
386 |
-
student_file = save_dir / "gemma_student_model_quant.safetensors"
|
387 |
-
save_model_state(teacher1_quantized, teacher1_file)
|
388 |
-
save_model_state(teacher2_quantized, teacher2_file)
|
389 |
-
save_model_state(student_quantized, student_file)
|
390 |
-
print(f"Modelos guardados en {save_dir}")
|
391 |
-
try:
|
392 |
-
user_info = HfApi().whoami(token=hf_token_input)
|
393 |
-
username = user_info["name"]
|
394 |
-
repo_id_t1 = f"{username}/gemma-teacher-pytorch-safetensors"
|
395 |
-
HfApi().create_repo(repo_id_t1, token=hf_token_input, exist_ok=True)
|
396 |
-
HfApi().upload_file(token=hf_token_input, path_or_fileobj=str(teacher1_file), path_in_repo="gemma_teacher_model_quant.safetensors", repo_id=repo_id_t1)
|
397 |
-
repo_id_t2 = f"{username}/llama-teacher-pytorch-safetensors"
|
398 |
-
HfApi().create_repo(repo_id_t2, token=hf_token_input, exist_ok=True)
|
399 |
-
HfApi().upload_file(token=hf_token_input, path_or_fileobj=str(teacher2_file), path_in_repo="llama_teacher_model_quant.safetensors", repo_id=repo_id_t2)
|
400 |
-
repo_id_student = f"{username}/gemma-student-pytorch-safetensors"
|
401 |
-
HfApi().create_repo(repo_id_student, token=hf_token_input, exist_ok=True)
|
402 |
-
HfApi().upload_file(token=hf_token_input, path_or_fileobj=str(student_file), path_in_repo="gemma_student_model_quant.safetensors", repo_id=repo_id_student)
|
403 |
-
print("Modelos subidos al Hub de Hugging Face")
|
404 |
-
except Exception as e:
|
405 |
-
print(f"Error subiendo modelos: {e}")
|
406 |
-
|
407 |
def run_pipeline(teacher1_id, teacher2_id, student_id, hf_token_input):
|
|
|
|
|
408 |
try:
|
409 |
login(token=hf_token_input)
|
410 |
api = HfApi()
|
411 |
proc = AutoProcessor.from_pretrained(teacher1_id)
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
if
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
print("Modelos cargados")
|
424 |
except Exception as e:
|
425 |
return f"Error cargando modelos: {e}"
|
426 |
try:
|
427 |
-
t1_task = getattr(
|
428 |
except Exception:
|
429 |
t1_task = "image-to-text"
|
430 |
try:
|
431 |
-
t2_task = getattr(
|
432 |
except Exception:
|
433 |
t2_task = "text-generation"
|
434 |
try:
|
435 |
-
s_task = getattr(
|
436 |
except Exception:
|
437 |
s_task = "text-generation"
|
438 |
try:
|
439 |
-
pipe_t1 = pipeline(t1_task, model=
|
440 |
-
pipe_t2 = pipeline(t2_task, model=
|
441 |
-
pipe_s = pipeline(s_task, model=
|
442 |
print("Pipelines creados:")
|
443 |
print("Teacher1:", pipe_t1.task)
|
444 |
print("Teacher2:", pipe_t2.task)
|
@@ -455,6 +404,60 @@ def run_pipeline(teacher1_id, teacher2_id, student_id, hf_token_input):
|
|
455 |
print("Student:", out_s)
|
456 |
except Exception as e:
|
457 |
return f"Error en ejecuci贸n de pipelines de prueba: {e}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
458 |
return GRADIO_LOG + "\nProceso completado."
|
459 |
|
460 |
iface = gr.Interface(
|
@@ -470,4 +473,4 @@ iface = gr.Interface(
|
|
470 |
description="Ingrese los IDs de los modelos y su token HF para ejecutar el pipeline autom谩ticamente."
|
471 |
)
|
472 |
|
473 |
-
iface.launch()
|
|
|
8 |
import gradio as gr
|
9 |
from huggingface_hub import login, HfApi
|
10 |
|
11 |
+
GRADIO_LOG = ""
|
12 |
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
13 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = ""
|
14 |
+
device = torch.device("cpu")
|
15 |
+
checkpoint_path = pathlib.Path("/tmp/gemma_pytorch_models/checkpoint.pth")
|
16 |
+
save_dir = pathlib.Path("/tmp/gemma_pytorch_models/")
|
17 |
+
save_dir.mkdir(exist_ok=True, parents=True)
|
18 |
+
|
19 |
def log_message(msg, level="info"):
|
20 |
global GRADIO_LOG
|
21 |
if level == "info":
|
|
|
26 |
logging.debug(msg)
|
27 |
GRADIO_LOG += msg + "\n"
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
def load_checkpoint(model):
|
30 |
if checkpoint_path.exists():
|
31 |
try:
|
|
|
351 |
loss_hard = F.mse_loss(student_logits, teacher_logits)
|
352 |
return alpha * loss_soft + (1 - alpha) * loss_hard
|
353 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
354 |
def run_pipeline(teacher1_id, teacher2_id, student_id, hf_token_input):
|
355 |
+
global GRADIO_LOG
|
356 |
+
GRADIO_LOG = ""
|
357 |
try:
|
358 |
login(token=hf_token_input)
|
359 |
api = HfApi()
|
360 |
proc = AutoProcessor.from_pretrained(teacher1_id)
|
361 |
+
teacher1_model = AutoModelForImageTextToText.from_pretrained(teacher1_id)
|
362 |
+
teacher1_model.to(device); teacher1_model.eval()
|
363 |
+
teacher1_tokenizer = proc.tokenizer if hasattr(proc, "tokenizer") else proc
|
364 |
+
teacher2_tokenizer = AutoTokenizer.from_pretrained(teacher2_id)
|
365 |
+
teacher2_model = AutoModelForCausalLM.from_pretrained(teacher2_id)
|
366 |
+
teacher2_model.to(device); teacher2_model.eval()
|
367 |
+
student_tokenizer = AutoTokenizer.from_pretrained(student_id)
|
368 |
+
if student_tokenizer.pad_token is None:
|
369 |
+
student_tokenizer.add_special_tokens({'pad_token': student_tokenizer.eos_token if student_tokenizer.eos_token is not None else "[PAD]"})
|
370 |
+
student_model = AutoModelForCausalLM.from_pretrained(student_id)
|
371 |
+
student_model.to(device)
|
372 |
print("Modelos cargados")
|
373 |
except Exception as e:
|
374 |
return f"Error cargando modelos: {e}"
|
375 |
try:
|
376 |
+
t1_task = getattr(teacher1_model.config, "task_type", "image-to-text")
|
377 |
except Exception:
|
378 |
t1_task = "image-to-text"
|
379 |
try:
|
380 |
+
t2_task = getattr(teacher2_model.config, "task", "text-generation")
|
381 |
except Exception:
|
382 |
t2_task = "text-generation"
|
383 |
try:
|
384 |
+
s_task = getattr(student_model.config, "task", "text-generation")
|
385 |
except Exception:
|
386 |
s_task = "text-generation"
|
387 |
try:
|
388 |
+
pipe_t1 = pipeline(t1_task, model=teacher1_model, tokenizer=teacher1_tokenizer, device=-1)
|
389 |
+
pipe_t2 = pipeline(t2_task, model=teacher2_model, tokenizer=teacher2_tokenizer, device=-1)
|
390 |
+
pipe_s = pipeline(s_task, model=student_model, tokenizer=student_tokenizer, device=-1)
|
391 |
print("Pipelines creados:")
|
392 |
print("Teacher1:", pipe_t1.task)
|
393 |
print("Teacher2:", pipe_t2.task)
|
|
|
404 |
print("Student:", out_s)
|
405 |
except Exception as e:
|
406 |
return f"Error en ejecuci贸n de pipelines de prueba: {e}"
|
407 |
+
|
408 |
+
optimizer = optim.Adam(student_model.parameters(), lr=1e-4)
|
409 |
+
student_model.train()
|
410 |
+
print("Inicio del entrenamiento por destilaci贸n")
|
411 |
+
texts = ["Texto de ejemplo 1", "Texto de ejemplo 2"]
|
412 |
+
for epoch in range(1):
|
413 |
+
print(f"脡poca {epoch+1}")
|
414 |
+
for text in tqdm(texts, desc="Entrenamiento destilaci贸n"):
|
415 |
+
optimizer.zero_grad()
|
416 |
+
inputs = student_tokenizer(text, return_tensors="pt", padding="max_length", truncation=True, max_length=16)
|
417 |
+
with torch.no_grad():
|
418 |
+
t1_inputs = teacher1_tokenizer(text=[text], return_tensors="pt", padding="max_length", truncation=True, max_length=16)
|
419 |
+
t2_inputs = teacher2_tokenizer(text=[text], return_tensors="pt", padding="max_length", truncation=True, max_length=16)
|
420 |
+
teacher1_out = teacher1_model(**t1_inputs, max_new_tokens=10)
|
421 |
+
teacher2_out = teacher2_model(**t2_inputs, max_new_tokens=10)
|
422 |
+
teacher_logits = (teacher1_out.logits + teacher2_out.logits) / 2.0
|
423 |
+
student_out = student_model(**inputs)
|
424 |
+
student_logits = student_out.logits
|
425 |
+
loss = distillation_loss(student_logits, teacher_logits)
|
426 |
+
loss.backward()
|
427 |
+
optimizer.step()
|
428 |
+
print(f"Texto: {text} | Loss: {loss.item():.6f}")
|
429 |
+
print("Entrenamiento completado")
|
430 |
+
save_checkpoint(student_model)
|
431 |
+
print("Aplicando cuantizaci贸n din谩mica al modelo student")
|
432 |
+
student_quantized = torch.quantization.quantize_dynamic(student_model, {nn.Linear}, dtype=torch.qint8)
|
433 |
+
def save_model_state(model, filename):
|
434 |
+
state_dict = model.state_dict()
|
435 |
+
np_state = {k: v.cpu().numpy() for k, v in state_dict.items()}
|
436 |
+
save_file(np_state, str(filename))
|
437 |
+
for _ in tqdm(range(1), desc="Guardando modelos"):
|
438 |
+
pass
|
439 |
+
teacher1_file = save_dir / "gemma_teacher_model_quant.safetensors"
|
440 |
+
teacher2_file = save_dir / "llama_teacher_model_quant.safetensors"
|
441 |
+
student_file = save_dir / "gemma_student_model_quant.safetensors"
|
442 |
+
save_model_state(teacher1_model, teacher1_file)
|
443 |
+
save_model_state(teacher2_model, teacher2_file)
|
444 |
+
save_model_state(student_quantized, student_file)
|
445 |
+
print(f"Modelos guardados en {save_dir}")
|
446 |
+
try:
|
447 |
+
user_info = HfApi().whoami(token=hf_token_input)
|
448 |
+
username = user_info["name"]
|
449 |
+
repo_id_t1 = f"{username}/gemma-teacher-pytorch-safetensors"
|
450 |
+
HfApi().create_repo(repo_id_t1, token=hf_token_input, exist_ok=True)
|
451 |
+
HfApi().upload_file(token=hf_token_input, path_or_fileobj=str(teacher1_file), path_in_repo="gemma_teacher_model_quant.safetensors", repo_id=repo_id_t1)
|
452 |
+
repo_id_t2 = f"{username}/llama-teacher-pytorch-safetensors"
|
453 |
+
HfApi().create_repo(repo_id_t2, token=hf_token_input, exist_ok=True)
|
454 |
+
HfApi().upload_file(token=hf_token_input, path_or_fileobj=str(teacher2_file), path_in_repo="llama_teacher_model_quant.safetensors", repo_id=repo_id_t2)
|
455 |
+
repo_id_student = f"{username}/gemma-student-pytorch-safetensors"
|
456 |
+
HfApi().create_repo(repo_id_student, token=hf_token_input, exist_ok=True)
|
457 |
+
HfApi().upload_file(token=hf_token_input, path_or_fileobj=str(student_file), path_in_repo="gemma_student_model_quant.safetensors", repo_id=repo_id_student)
|
458 |
+
print("Modelos subidos al Hub de Hugging Face")
|
459 |
+
except Exception as e:
|
460 |
+
print(f"Error subiendo modelos: {e}")
|
461 |
return GRADIO_LOG + "\nProceso completado."
|
462 |
|
463 |
iface = gr.Interface(
|
|
|
473 |
description="Ingrese los IDs de los modelos y su token HF para ejecutar el pipeline autom谩ticamente."
|
474 |
)
|
475 |
|
476 |
+
iface.launch()
|