xezpeleta commited on
Commit
b6dcc5c
·
verified ·
1 Parent(s): d84aac4

Training in progress, step 1000

Browse files
README.md CHANGED
@@ -7,8 +7,6 @@ tags:
7
  - generated_from_trainer
8
  datasets:
9
  - asierhv/composite_corpus_eu_v2.1
10
- language:
11
- - eu
12
  metrics:
13
  - wer
14
  model-index:
 
7
  - generated_from_trainer
8
  datasets:
9
  - asierhv/composite_corpus_eu_v2.1
 
 
10
  metrics:
11
  - wer
12
  model-index:
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07cfdf761c97ff20bb92439c8d2aa688992c5dc607decc2ce7fe7161d9db9e7f
3
  size 4993448880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f90393365a7608a490b562e2c4d4b86380fa95db974370b7eb6e5eee3d93508
3
  size 4993448880
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ff0450e9cc19db3496689ac7771bc899dc40448f074095faa38b20458a782d3
3
  size 1180663192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b221a4a91dbabd555a9fefa153d0f7778c7256b345fc674cad52b3ae7a222b78
3
  size 1180663192
run.sh CHANGED
@@ -1,10 +1,10 @@
1
- WANDB_PROJECT=whisper-medium-eu \
2
  python run_speech_recognition_seq2seq_streaming.py \
3
  --model_name_or_path="openai/whisper-large-v3" \
4
  --dataset_name="asierhv/composite_corpus_eu_v2.1" \
5
  --language="basque" \
6
  --train_split_name="train" \
7
- --eval_split_name="dev_parl+test_parl+test_cv+test_oslr" \
8
  --model_index_name="Whisper Large Basque" \
9
  --max_steps="20000" \
10
  --output_dir="./" \
@@ -30,7 +30,6 @@ WANDB_PROJECT=whisper-medium-eu \
30
  --gradient_checkpointing \
31
  --fp16 \
32
  --overwrite_output_dir \
33
- --resume_from_checkpoint="checkpoint-10000" \
34
  --do_train \
35
  --do_eval \
36
  --predict_with_generate \
@@ -38,4 +37,4 @@ WANDB_PROJECT=whisper-medium-eu \
38
  --streaming \
39
  --push_to_hub \
40
  --report_to "wandb" \
41
- --run_name "whisper-large-eu-v3"
 
1
+ WANDB_PROJECT=whisper \
2
  python run_speech_recognition_seq2seq_streaming.py \
3
  --model_name_or_path="openai/whisper-large-v3" \
4
  --dataset_name="asierhv/composite_corpus_eu_v2.1" \
5
  --language="basque" \
6
  --train_split_name="train" \
7
+ --eval_split_name="dev" \
8
  --model_index_name="Whisper Large Basque" \
9
  --max_steps="20000" \
10
  --output_dir="./" \
 
30
  --gradient_checkpointing \
31
  --fp16 \
32
  --overwrite_output_dir \
 
33
  --do_train \
34
  --do_eval \
35
  --predict_with_generate \
 
37
  --streaming \
38
  --push_to_hub \
39
  --report_to "wandb" \
40
+ --run_name "whisper-large-eu-v3-25.02-r1"
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7976217da07f1f6b61d94b0655d73812ba2b4478e1d3f5d21cac23d2cd1cce18
3
  size 5496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35b687ecb445b882e871a8e50bf8f8f0cf5da184c31d6cc7f22385f74addd658
3
  size 5496