Saving train state of step 5
Browse files
distil-whisper/events.out.tfevents.1715183755.server02.1990428.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0354de01094b8592de53be68e8334fc420b4c287e9e9001240278094f8d39757
|
3 |
+
size 428
|
distil-whisper/events.out.tfevents.1715185948.server02.2003546.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9d1df0578ebc524aef4c901dc2bc8d268d478f60a8425cb059994bd0aa32c8c
|
3 |
+
size 88
|
distil-whisper/events.out.tfevents.1715198685.server02.2050598.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95c1c723bbc492f1204ace70eb4d257d87b54510f03dd9c27d25658eb3400728
|
3 |
+
size 392
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc0c1ad5d126bcd257bd671c07641b701a001e808c0e63178fe6a801f1587bbe
|
3 |
+
size 3025686376
|
run_distillation.py
CHANGED
@@ -1278,16 +1278,17 @@ def main():
|
|
1278 |
# 11. Define Evaluation Metrics
|
1279 |
def compute_metrics(preds, labels):
|
1280 |
# replace padded labels by the padding token
|
1281 |
-
|
1282 |
-
print(f" labels : {labels}")
|
1283 |
for idx in range(len(labels)):
|
1284 |
labels[idx][labels[idx] == -100] = tokenizer.pad_token_id
|
1285 |
|
1286 |
pred_str = tokenizer.batch_decode(preds, skip_special_tokens=True, decode_with_timestamps=return_timestamps)
|
|
|
1287 |
# we do not want to group tokens when computing the metrics
|
|
|
1288 |
label_str = tokenizer.batch_decode(labels, skip_special_tokens=True)
|
1289 |
wer_ortho = 100 * metric.compute(predictions=pred_str, references=label_str)
|
1290 |
-
|
1291 |
# normalize everything and re-compute the WER
|
1292 |
norm_pred_str = [normalizer(pred) for pred in pred_str]
|
1293 |
norm_label_str = [normalizer(label) for label in label_str]
|
@@ -1755,5 +1756,5 @@ def main():
|
|
1755 |
if __name__ == "__main__":
|
1756 |
main()
|
1757 |
'''
|
1758 |
-
accelerate launch --mixed_precision=bf16 run_distillation.py --model_name_or_path "./distil-large-v3-init" --teacher_model_name_or_path "openai/whisper-large-v3" --train_dataset_name "mozilla-foundation/common_voice_15_0" --train_dataset_config_name "de" --train_split_name "train" --text_column_name "sentence" --eval_dataset_name "mozilla-foundation/common_voice_15_0" --eval_dataset_config_name "de" --eval_split_name "validation" --eval_text_column_name "sentence" --eval_steps 5 --save_steps
|
1759 |
'''
|
|
|
1278 |
# 11. Define Evaluation Metrics
|
1279 |
def compute_metrics(preds, labels):
|
1280 |
# replace padded labels by the padding token
|
1281 |
+
|
|
|
1282 |
for idx in range(len(labels)):
|
1283 |
labels[idx][labels[idx] == -100] = tokenizer.pad_token_id
|
1284 |
|
1285 |
pred_str = tokenizer.batch_decode(preds, skip_special_tokens=True, decode_with_timestamps=return_timestamps)
|
1286 |
+
print(f" pred_str : {pred_str}")
|
1287 |
# we do not want to group tokens when computing the metrics
|
1288 |
+
|
1289 |
label_str = tokenizer.batch_decode(labels, skip_special_tokens=True)
|
1290 |
wer_ortho = 100 * metric.compute(predictions=pred_str, references=label_str)
|
1291 |
+
print(f" label_str : {label_str}")
|
1292 |
# normalize everything and re-compute the WER
|
1293 |
norm_pred_str = [normalizer(pred) for pred in pred_str]
|
1294 |
norm_label_str = [normalizer(label) for label in label_str]
|
|
|
1756 |
if __name__ == "__main__":
|
1757 |
main()
|
1758 |
'''
|
1759 |
+
accelerate launch --mixed_precision=bf16 run_distillation.py --model_name_or_path "./distil-large-v3-init" --teacher_model_name_or_path "openai/whisper-large-v3" --train_dataset_name "mozilla-foundation/common_voice_15_0" --train_dataset_config_name "de" --train_split_name "train" --text_column_name "sentence" --eval_dataset_name "mozilla-foundation/common_voice_15_0" --eval_dataset_config_name "de" --eval_split_name "validation" --eval_text_column_name "sentence" --eval_steps 5 --save_steps 5 --warmup_steps 500 --learning_rate 1e-4 --lr_scheduler_type "linear" --logging_steps 5 --save_total_limit 1 --max_steps 15 --per_device_train_batch_size 4 --per_device_eval_batch_size 2 --dataloader_num_workers 2 --preprocessing_num_workers 2 --ddp_timeout 7200 --dtype "bfloat16" --output_dir "./" --use_pseudo_labels "false" --condition_on_prev_probability "0.0" --do_train --do_eval --gradient_checkpointing --overwrite_output_dir --predict_with_generate --freeze_encoder --streaming --push_to_hub --language de --max_eval_samples 50
|
1760 |
'''
|