Saving train state of step 5
Browse files
distil-whisper/events.out.tfevents.1715170439.server02.1907732.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f244db9e60fe4f96d3efe5ffc86d99cdf07af033d92613ce0ebfc2522073a140
|
3 |
+
size 392
|
run_distillation.py
CHANGED
@@ -1567,6 +1567,7 @@ def main():
|
|
1567 |
train_dataloader = accelerator.skip_first_batches(train_dataloader, resume_step)
|
1568 |
resume_step = None
|
1569 |
|
|
|
1570 |
for batch in train_dataloader:
|
1571 |
with accelerator.accumulate(student_model):
|
1572 |
loss, train_metric = train_step(batch, temperature=training_args.temperature)
|
@@ -1615,29 +1616,34 @@ def main():
|
|
1615 |
)
|
1616 |
|
1617 |
if training_args.do_eval and (cur_step % eval_steps == 0 or cur_step == total_train_steps):
|
|
|
1618 |
train_time += time.time() - train_start
|
1619 |
student_model.eval()
|
|
|
1620 |
# ======================== Evaluating ==============================
|
|
|
1621 |
for eval_split in all_eval_splits:
|
1622 |
eval_metrics = []
|
1623 |
eval_preds = []
|
1624 |
eval_labels = []
|
1625 |
eval_start = time.time()
|
1626 |
|
1627 |
-
validation_dataloader = DataLoader(
|
1628 |
-
vectorized_datasets[eval_split],
|
1629 |
-
collate_fn=data_collator,
|
1630 |
-
batch_size=per_device_eval_batch_size,
|
1631 |
-
drop_last=False,
|
1632 |
-
num_workers=dataloader_num_workers,
|
1633 |
-
prefetch_factor=prefetch_factor,
|
1634 |
pin_memory=training_args.dataloader_pin_memory,
|
1635 |
)
|
1636 |
-
validation_dataloader = accelerator.prepare(validation_dataloader)
|
1637 |
|
|
|
|
|
|
|
1638 |
for batch in tqdm(
|
1639 |
validation_dataloader,
|
1640 |
-
desc=f"Evaluating {eval_split}...",
|
1641 |
position=2,
|
1642 |
disable=not accelerator.is_local_main_process,
|
1643 |
):
|
@@ -1648,6 +1654,7 @@ def main():
|
|
1648 |
|
1649 |
# generation
|
1650 |
if training_args.predict_with_generate:
|
|
|
1651 |
generated_ids = generate_step(batch)
|
1652 |
# Gather all predictions and targets
|
1653 |
generated_ids, labels = accelerator.gather_for_metrics(
|
|
|
1567 |
train_dataloader = accelerator.skip_first_batches(train_dataloader, resume_step)
|
1568 |
resume_step = None
|
1569 |
|
1570 |
+
|
1571 |
for batch in train_dataloader:
|
1572 |
with accelerator.accumulate(student_model):
|
1573 |
loss, train_metric = train_step(batch, temperature=training_args.temperature)
|
|
|
1616 |
)
|
1617 |
|
1618 |
if training_args.do_eval and (cur_step % eval_steps == 0 or cur_step == total_train_steps):
|
1619 |
+
print("evaluating dsakdlaskdfl;skl;afksdl;fdasl;fkdl;askfl;asdkfldskfl;das")
|
1620 |
train_time += time.time() - train_start
|
1621 |
student_model.eval()
|
1622 |
+
|
1623 |
# ======================== Evaluating ==============================
|
1624 |
+
|
1625 |
for eval_split in all_eval_splits:
|
1626 |
eval_metrics = []
|
1627 |
eval_preds = []
|
1628 |
eval_labels = []
|
1629 |
eval_start = time.time()
|
1630 |
|
1631 |
+
validation_dataloader = DataLoader(
|
1632 |
+
vectorized_datasets[eval_split],
|
1633 |
+
collate_fn=data_collator,
|
1634 |
+
batch_size=per_device_eval_batch_size,
|
1635 |
+
drop_last=False,
|
1636 |
+
num_workers=dataloader_num_workers,
|
1637 |
+
prefetch_factor=prefetch_factor,
|
1638 |
pin_memory=training_args.dataloader_pin_memory,
|
1639 |
)
|
|
|
1640 |
|
1641 |
+
|
1642 |
+
validation_dataloader = accelerator.prepare(validation_dataloader)
|
1643 |
+
|
1644 |
for batch in tqdm(
|
1645 |
validation_dataloader,
|
1646 |
+
desc=f"Evaluating {eval_split}...",
|
1647 |
position=2,
|
1648 |
disable=not accelerator.is_local_main_process,
|
1649 |
):
|
|
|
1654 |
|
1655 |
# generation
|
1656 |
if training_args.predict_with_generate:
|
1657 |
+
|
1658 |
generated_ids = generate_step(batch)
|
1659 |
# Gather all predictions and targets
|
1660 |
generated_ids, labels = accelerator.gather_for_metrics(
|