|
--- |
|
tags: |
|
- espnet |
|
- audio |
|
- automatic-speech-recognition |
|
language: |
|
- en |
|
datasets: |
|
- jibo_kids |
|
license: cc-by-4.0 |
|
--- |
|
|
|
## ESPnet2 ASR model |
|
|
|
### `balaji1312/jibo_kids_wavlm_aed_transformer` |
|
|
|
This model was trained by using recipe in [espnet](https://github.com/espnet/espnet/). |
|
|
|
### Demo: How to use in ESPnet2 |
|
|
|
Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html) |
|
if you haven't done that already. |
|
|
|
```bash |
|
cd espnet |
|
|
|
pip install -e . |
|
cd egs2/jibo_kids/asr1 |
|
./run.sh --skip_data_prep false --skip_train true --download_model balaji1312/jibo_kids_wavlm_aed_transformer |
|
``` |
|
|
|
<!-- Generated by scripts/utils/show_asr_result.sh --> |
|
# RESULTS |
|
## Environments |
|
- date: `Thu Jan 30 06:18:01 EST 2025` |
|
- python version: `3.9.19 (main, May 6 2024, 19:43:03) [GCC 11.2.0]` |
|
- espnet version: `espnet 202402` |
|
- pytorch version: `pytorch 2.4.0` |
|
- Git hash: `c46aa9a594ff83d52cbf61d84c5650325d1ce527` |
|
- Commit date: `Sun Oct 13 14:39:31 2024 -0400` |
|
|
|
## exp/asr_train_asr_wavlm_transformer_raw_en_bpe1024 |
|
### WER |
|
|
|
|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err| |
|
|---|---|---|---|---|---|---|---|---| |
|
|decode_asr_asr_model_valid.acc.best/test|1044|3686|56.1|31.4|12.5|8.1|52.0|62.3| |
|
|
|
### CER |
|
|
|
|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err| |
|
|---|---|---|---|---|---|---|---|---| |
|
|decode_asr_asr_model_valid.acc.best/test|1044|16215|75.4|8.1|16.6|9.4|34.1|62.3| |
|
|
|
### TER |
|
|
|
|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err| |
|
|---|---|---|---|---|---|---|---|---| |
|
|decode_asr_asr_model_valid.acc.best/test|1044|5220|64.5|18.0|17.5|10.4|45.9|62.3| |
|
|
|
## exp/asr_train_asr_wavlm_transformer_raw_en_bpe1024/decode_asr_asr_model_valid.acc.best |
|
### WER |
|
|
|
|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err| |
|
|---|---|---|---|---|---|---|---|---| |
|
|org/dev|853|2372|59.8|31.2|8.9|7.2|47.3|64.0| |
|
|
|
### CER |
|
|
|
|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err| |
|
|---|---|---|---|---|---|---|---|---| |
|
|org/dev|853|9855|78.3|7.3|14.3|8.4|30.1|64.0| |
|
|
|
### TER |
|
|
|
|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err| |
|
|---|---|---|---|---|---|---|---|---| |
|
|org/dev|853|3590|68.2|16.2|15.6|6.4|38.3|64.0| |
|
|
|
## ASR config |
|
|
|
<details><summary>expand</summary> |
|
|
|
``` |
|
config: conf/tuning/train_asr_wavlm_transformer.yaml |
|
print_config: false |
|
log_level: INFO |
|
drop_last_iter: false |
|
dry_run: false |
|
iterator_type: sequence |
|
valid_iterator_type: null |
|
output_dir: exp/asr_train_asr_wavlm_transformer_raw_en_bpe1024 |
|
ngpu: 1 |
|
seed: 2022 |
|
num_workers: 4 |
|
num_att_plot: 0 |
|
dist_backend: nccl |
|
dist_init_method: env:// |
|
dist_world_size: null |
|
dist_rank: null |
|
local_rank: 0 |
|
dist_master_addr: null |
|
dist_master_port: null |
|
dist_launcher: null |
|
multiprocessing_distributed: false |
|
unused_parameters: false |
|
sharded_ddp: false |
|
use_deepspeed: false |
|
deepspeed_config: null |
|
cudnn_enabled: true |
|
cudnn_benchmark: false |
|
cudnn_deterministic: false |
|
use_tf32: false |
|
collect_stats: false |
|
write_collected_feats: false |
|
max_epoch: 100 |
|
patience: null |
|
val_scheduler_criterion: |
|
- valid |
|
- loss |
|
early_stopping_criterion: |
|
- valid |
|
- loss |
|
- min |
|
best_model_criterion: |
|
- - valid |
|
- acc |
|
- max |
|
keep_nbest_models: 4 |
|
nbest_averaging_interval: 0 |
|
grad_clip: 5.0 |
|
grad_clip_type: 2.0 |
|
grad_noise: false |
|
accum_grad: 4 |
|
no_forward_run: false |
|
resume: true |
|
train_dtype: float32 |
|
use_amp: true |
|
log_interval: 400 |
|
use_matplotlib: true |
|
use_tensorboard: true |
|
create_graph_in_tensorboard: false |
|
use_wandb: false |
|
wandb_project: null |
|
wandb_id: null |
|
wandb_entity: null |
|
wandb_name: null |
|
wandb_model_log_interval: -1 |
|
detect_anomaly: false |
|
use_adapter: false |
|
adapter: lora |
|
save_strategy: all |
|
adapter_conf: {} |
|
pretrain_path: null |
|
init_param: [] |
|
ignore_init_mismatch: false |
|
freeze_param: |
|
- frontend.upstream |
|
num_iters_per_epoch: null |
|
batch_size: 20 |
|
valid_batch_size: null |
|
batch_bins: 1200000 |
|
valid_batch_bins: null |
|
category_sample_size: 10 |
|
train_shape_file: |
|
- exp/asr_stats_raw_en_bpe1024/train/speech_shape |
|
- exp/asr_stats_raw_en_bpe1024/train/text_shape.bpe |
|
valid_shape_file: |
|
- exp/asr_stats_raw_en_bpe1024/valid/speech_shape |
|
- exp/asr_stats_raw_en_bpe1024/valid/text_shape.bpe |
|
batch_type: numel |
|
valid_batch_type: null |
|
fold_length: |
|
- 80000 |
|
- 150 |
|
sort_in_batch: descending |
|
shuffle_within_batch: false |
|
sort_batch: descending |
|
multiple_iterator: false |
|
chunk_length: 500 |
|
chunk_shift_ratio: 0.5 |
|
num_cache_chunks: 1024 |
|
chunk_excluded_key_prefixes: [] |
|
chunk_default_fs: null |
|
chunk_max_abs_length: null |
|
chunk_discard_short_samples: true |
|
train_data_path_and_name_and_type: |
|
- - dump/raw/train/wav.scp |
|
- speech |
|
- sound |
|
- - dump/raw/train/text |
|
- text |
|
- text |
|
valid_data_path_and_name_and_type: |
|
- - dump/raw/dev/wav.scp |
|
- speech |
|
- sound |
|
- - dump/raw/dev/text |
|
- text |
|
- text |
|
multi_task_dataset: false |
|
allow_variable_data_keys: false |
|
max_cache_size: 0.0 |
|
max_cache_fd: 32 |
|
allow_multi_rates: false |
|
valid_max_cache_size: null |
|
exclude_weight_decay: false |
|
exclude_weight_decay_conf: {} |
|
optim: adam |
|
optim_conf: |
|
lr: 0.002 |
|
weight_decay: 1.0e-06 |
|
scheduler: warmuplr |
|
scheduler_conf: |
|
warmup_steps: 15000 |
|
token_list: |
|
- <blank> |
|
- <unk> |
|
- . |
|
- ▁I |
|
- ▁AND |
|
- '''' |
|
- ▁A |
|
- ▁YOU |
|
- S |
|
- ▁IT |
|
- T |
|
- ▁TO |
|
- ▁THE |
|
- ▁LIKE |
|
- ▁THAT |
|
- ▁NO |
|
- ▁BECAUSE |
|
- ▁ONE |
|
- ▁THEN |
|
- ▁DON |
|
- ▁TEETH |
|
- ▁TWO |
|
- ▁FIVE |
|
- ▁KNOW |
|
- ▁MY |
|
- ▁SO |
|
- ▁YOUR |
|
- ▁IS |
|
- ▁THEM |
|
- ▁DO |
|
- ▁SIX |
|
- ▁THREE |
|
- ▁G |
|
- ▁U |
|
- ▁TEN |
|
- ▁FOUR |
|
- ▁GET |
|
- ▁O |
|
- ▁K |
|
- ▁B |
|
- ▁L |
|
- ▁N |
|
- ▁S |
|
- ▁E |
|
- ▁M |
|
- ▁BRUSH |
|
- ▁THIS |
|
- ▁T |
|
- ▁CAN |
|
- ▁SEVEN |
|
- ▁EIGHT |
|
- ▁C |
|
- ▁HAVE |
|
- ▁PUT |
|
- ▁MAKE |
|
- ▁W |
|
- ▁J |
|
- ▁F |
|
- ▁IN |
|
- ▁P |
|
- ▁NINE |
|
- ▁Y |
|
- ▁D |
|
- ▁V |
|
- ▁OKAY |
|
- ▁Q |
|
- ▁Z |
|
- ▁ZERO |
|
- ▁IF |
|
- ▁H |
|
- ▁WHAT |
|
- ▁COUNT |
|
- ING |
|
- ▁R |
|
- ▁X |
|
- ▁OF |
|
- ▁HOW |
|
- ▁ |
|
- ▁WANT |
|
- ▁COLOR |
|
- ▁JUST |
|
- ▁WITH |
|
- ▁ON |
|
- N |
|
- ▁AN |
|
- ▁MIX |
|
- ▁COLORS |
|
- ▁THEY |
|
- ▁YEAH |
|
- ▁YES |
|
- ▁UP |
|
- ▁BLUE |
|
- ▁BY |
|
- ▁GO |
|
- M |
|
- ▁THERE |
|
- ▁ALL |
|
- ▁OR |
|
- ▁CLEAN |
|
- ED |
|
- ▁SEE |
|
- ▁BUT |
|
- ▁USE |
|
- ▁FOR |
|
- ▁BE |
|
- ▁TOOTHPASTE |
|
- ▁WAS |
|
- ▁UM |
|
- ▁LETTER |
|
- ▁NEED |
|
- ▁HE |
|
- ▁WILL |
|
- ▁PLUS |
|
- ▁DOG |
|
- ▁RED |
|
- RE |
|
- ▁PURPLE |
|
- ▁NOT |
|
- ▁CAVITIES |
|
- ▁OH |
|
- ▁ARE |
|
- ▁THINK |
|
- ▁WHY |
|
- ▁SHE |
|
- ▁DID |
|
- ▁HAT |
|
- Y |
|
- ▁PAINT |
|
- ▁BRUSHING |
|
- ▁BOX |
|
- ▁TOOTHBRUSH |
|
- ▁SICK |
|
- ▁OUT |
|
- ▁ME |
|
- ▁JUG |
|
- ▁DOES |
|
- ▁FLU |
|
- ▁MAKES |
|
- ▁WIG |
|
- ▁SH |
|
- ▁MAN |
|
- ▁WE |
|
- ▁MORE |
|
- OULD |
|
- ▁PLAY |
|
- ▁SOME |
|
- ▁JIBO |
|
- ▁GREEN |
|
- ▁VAN |
|
- ▁NUMBER |
|
- ▁YELLOW |
|
- ▁REALLY |
|
- D |
|
- ▁WHITE |
|
- ▁PINK |
|
- ▁WATER |
|
- ▁QUIZ |
|
- ▁NOW |
|
- ▁UH |
|
- ▁DIFFERENT |
|
- ▁RIGHT |
|
- IND |
|
- ▁SAY |
|
- ▁TREE |
|
- LL |
|
- CH |
|
- ▁HELP |
|
- ▁HUNDRED |
|
- ▁LOOK |
|
- ▁COULD |
|
- ▁COUNTING |
|
- ▁WAY |
|
- ▁MAYBE |
|
- ▁EASY |
|
- ▁WOULD |
|
- ▁BLACK |
|
- ▁TAKE |
|
- ▁HER |
|
- ▁LI |
|
- E |
|
- TTLE |
|
- F |
|
- ▁AL |
|
- ▁THING |
|
- ▁ELSE |
|
- ▁WELL |
|
- LY |
|
- ▁TOGETHER |
|
- ▁WHEN |
|
- ▁SIDE |
|
- ▁CAVITY |
|
- ▁FIRST |
|
- ▁DOWN |
|
- ▁DAY |
|
- ▁OTHER |
|
- ▁HERE |
|
- ▁CUBES |
|
- ▁COUNTED |
|
- ▁EVERY |
|
- ▁SA |
|
- ▁TELL |
|
- ▁DAD |
|
- ▁ORANGE |
|
- ▁SAME |
|
- ▁SOMETIMES |
|
- ▁MANY |
|
- OTHER |
|
- ID |
|
- ▁WON |
|
- ▁BIT |
|
- ▁HI |
|
- ▁TOO |
|
- ▁TIME |
|
- UH |
|
- ▁WAIT |
|
- ▁NOTHING |
|
- ▁FALL |
|
- ▁NAME |
|
- ▁LOT |
|
- ▁THAN |
|
- ▁EH |
|
- ▁MEAN |
|
- ▁NEW |
|
- W |
|
- H |
|
- ▁TOOTH |
|
- ER |
|
- ▁FLOSS |
|
- ▁START |
|
- ▁BROWN |
|
- ▁STACK |
|
- ▁NOPE |
|
- ▁GOOD |
|
- A |
|
- L |
|
- ▁LET |
|
- ▁WHI |
|
- O |
|
- ▁ALREADY |
|
- ▁INAUDIBLE |
|
- ▁MOUTH |
|
- ▁EAT |
|
- ▁HAS |
|
- ▁DONE |
|
- ▁THOSE |
|
- ▁BETTER |
|
- ▁FUN |
|
- ▁GERMS |
|
- TO |
|
- ▁UMM |
|
- CK |
|
- SO |
|
- EVEN |
|
- ▁WASH |
|
- ▁ACTUALLY |
|
- ▁DRINK |
|
- ▁FRIEND |
|
- ▁REMEMBER |
|
- ▁SUGAR |
|
- ▁SOMETHING |
|
- ▁HARD |
|
- ▁COME |
|
- ▁PAINTING |
|
- ▁SPI |
|
- ▁AT |
|
- I |
|
- TER |
|
- ▁MUCH |
|
- ▁GUESS |
|
- ▁HIM |
|
- ▁HA |
|
- IGHT |
|
- Z |
|
- ▁FRO |
|
- ▁IMPORTANT |
|
- ▁AGAIN |
|
- ▁STUFF |
|
- ▁BACK |
|
- ▁BUGS |
|
- ▁NIGHT |
|
- ▁ADD |
|
- G |
|
- ▁EA |
|
- HIS |
|
- K |
|
- EVER |
|
- ▁TH |
|
- ▁DARK |
|
- ▁FORGOT |
|
- ▁MOM |
|
- BODY |
|
- ▁UHHUH |
|
- ▁BAD |
|
- ▁TURN |
|
- ▁ANY |
|
- AH |
|
- EL |
|
- U |
|
- AKING |
|
- VERY |
|
- ▁GONNA |
|
- ▁FOUAH |
|
- ▁SURE |
|
- ▁PULL |
|
- ▁LONG |
|
- ▁KEEP |
|
- ES |
|
- P |
|
- ▁WAYS |
|
- TING |
|
- ALLY |
|
- VE |
|
- ONE |
|
- ▁QUESTION |
|
- ▁PAPER |
|
- ▁STU |
|
- YTHING |
|
- ▁SHOW |
|
- ▁CALLED |
|
- ▁LOVE |
|
- ▁MM |
|
- ▁TRY |
|
- ▁BYE |
|
- ▁TOP |
|
- LD |
|
- ▁MMM |
|
- ▁PE |
|
- ▁NUMBERS |
|
- BLE |
|
- PLE |
|
- ▁CUBE |
|
- OUT |
|
- R |
|
- ▁BOTTOM |
|
- ▁FAVORITE |
|
- ▁SPANISH |
|
- ▁TONGUE |
|
- ▁SCHOOL |
|
- ▁TWENTY |
|
- ▁MHM |
|
- ▁FRONT |
|
- ▁STAY |
|
- ▁SPELL |
|
- ▁TEEF |
|
- ▁LAST |
|
- ▁GUM |
|
- ▁HOLD |
|
- TY |
|
- ▁GROUPS |
|
- ▁OFF |
|
- ▁EQUALS |
|
- ▁FINGERS |
|
- ▁QUI |
|
- RAB |
|
- ▁MEANS |
|
- AW |
|
- ▁UHH |
|
- IT |
|
- WEE |
|
- ▁CH |
|
- ▁AM |
|
- ▁SI |
|
- RY |
|
- SIX |
|
- ▁WI |
|
- ▁BEAUTIFUL |
|
- ▁DENTIST |
|
- ▁HEALTHY |
|
- ▁HURT |
|
- ▁ZEWO |
|
- ▁KNEW |
|
- ▁MATH |
|
- ▁BOY |
|
- ▁HOLE |
|
- ▁DIRTY |
|
- ▁YET |
|
- ▁EX |
|
- ▁STARTED |
|
- ▁LIGHT |
|
- ▁THESE |
|
- ▁CU |
|
- B |
|
- ▁THINGS |
|
- ▁GRA |
|
- ▁WHO |
|
- ▁TWOS |
|
- ▁CIRCLE |
|
- ▁YO |
|
- ▁FINGER |
|
- ▁BA |
|
- CE |
|
- OTH |
|
- X |
|
- IR |
|
- MOST |
|
- ▁LEARN |
|
- FIVE |
|
- CI |
|
- ▁ANSWER |
|
- ▁EASIER |
|
- ▁LAUGHS |
|
- ▁MORNING |
|
- ▁MOUTHWASH |
|
- ▁PICTURE |
|
- ▁RINSE |
|
- ▁FORGET |
|
- ▁SISTER |
|
- ▁THOUGH |
|
- ▁TALKING |
|
- ▁GROW |
|
- ▁WHERE |
|
- ▁MINUTES |
|
- ▁SUP |
|
- ▁WISH |
|
- ▁OUR |
|
- ▁STI |
|
- ▁FLOSSING |
|
- SIC |
|
- EPT |
|
- ▁BIG |
|
- PER |
|
- ▁AH |
|
- TH |
|
- TEN |
|
- EN |
|
- ▁FAI |
|
- ▁ONES |
|
- ▁EQUAL |
|
- ▁SP |
|
- KAY |
|
- SIDE |
|
- WAYS |
|
- ▁AROUND |
|
- ▁PRETTY |
|
- ▁RAINBOW |
|
- ▁VIOLET |
|
- ▁LEFT |
|
- ▁GIRL |
|
- ▁SENSE |
|
- ▁SOUND |
|
- ▁EYES |
|
- ▁EVERYTHING |
|
- ▁GUY |
|
- ▁SHINY |
|
- ▁ELEVEN |
|
- ▁READY |
|
- ▁STICK |
|
- ▁FROG |
|
- ▁FOOD |
|
- ▁KEY |
|
- DE |
|
- ▁PL |
|
- ▁PART |
|
- OVE |
|
- ▁PR |
|
- ▁ROT |
|
- ▁TEE |
|
- ▁WERE |
|
- VER |
|
- ▁DIS |
|
- ▁HEY |
|
- USH |
|
- OH |
|
- IN |
|
- ISH |
|
- OVER |
|
- EEN |
|
- ▁MIND |
|
- ▁AB |
|
- SE |
|
- SH |
|
- DENTAL |
|
- OOL |
|
- ET |
|
- AR |
|
- ICK |
|
- NA |
|
- ENT |
|
- ▁BU |
|
- AT |
|
- UNTI |
|
- OW |
|
- OK |
|
- ▁EL |
|
- ▁MA |
|
- ▁QU |
|
- ▁WOR |
|
- ▁SIN |
|
- AKE |
|
- AND |
|
- ▁PRETEND |
|
- ▁BUS |
|
- ▁PLA |
|
- ▁CALL |
|
- ▁ONETWOTHREEFOUR |
|
- ▁CLASS |
|
- ▁CONNECT |
|
- ▁DISCOVER |
|
- ▁HOUSE |
|
- ▁RABBIT |
|
- ▁SQUEEZE |
|
- ▁THOUSAND |
|
- ▁ROBOT |
|
- ▁SCRUB |
|
- ▁SMELL |
|
- EXT |
|
- ▁BROTHER |
|
- ▁PILE |
|
- ▁BOTTLE |
|
- ▁PAINTBRUSH |
|
- IMA |
|
- ▁CROCODILE |
|
- ▁JUMP |
|
- ▁CANNOT |
|
- ▁TWICE |
|
- ▁STOP |
|
- UNCH |
|
- ▁SKIN |
|
- ▁TUR |
|
- ▁MOVING |
|
- IES |
|
- ▁FAST |
|
- ▁PRETENDING |
|
- EEP |
|
- ▁SHAKING |
|
- ▁MAY |
|
- ▁FAKE |
|
- ▁AWAY |
|
- ▁DI |
|
- ▁HAPP |
|
- ▁DUH |
|
- OO |
|
- ▁JUH |
|
- LE |
|
- ▁HUH |
|
- ▁BUH |
|
- BOOK |
|
- WENT |
|
- ▁CA |
|
- OSE |
|
- EM |
|
- IC |
|
- AG |
|
- ▁LETTERS |
|
- IS |
|
- EW |
|
- ONG |
|
- V |
|
- AL |
|
- PAY |
|
- REE |
|
- EE |
|
- ▁TIMES |
|
- ▁SPIN |
|
- UR |
|
- CU |
|
- GER |
|
- ▁TR |
|
- ▁AW |
|
- UGH |
|
- UT |
|
- ▁BL |
|
- ▁SL |
|
- ▁FORT |
|
- ▁GE |
|
- EA |
|
- ▁TA |
|
- GU |
|
- ▁FINISH |
|
- ▁UN |
|
- READ |
|
- THER |
|
- DAY |
|
- ▁BLA |
|
- ▁ARTIST |
|
- ▁BACKWARDS |
|
- ▁DOCTOR |
|
- ▁DREAMS |
|
- ▁EXPLA |
|
- ▁MIDDLE |
|
- ▁MOUSE |
|
- ▁PROB |
|
- ▁RINSING |
|
- ▁STRAIGHT |
|
- ▁SUNFLOWER |
|
- ▁TOOTHPICK |
|
- ▁TWELVE |
|
- ▁VULTURE |
|
- ▁CONFUS |
|
- TION |
|
- ▁HOME |
|
- ▁OPEN |
|
- ▁SORRY |
|
- ▁BORING |
|
- ▁MINE |
|
- ▁ENOUGH |
|
- ▁HELLO |
|
- ▁BORED |
|
- RITE |
|
- ▁TOWER |
|
- ▁BUIL |
|
- ▁ODD |
|
- ▁UNP |
|
- ▁APPLY |
|
- ▁ANYMORE |
|
- ▁FOUW |
|
- APE |
|
- OUNT |
|
- ▁FIFT |
|
- ▁ZEBRA |
|
- ▁LION |
|
- ▁BLAH |
|
- ▁BLOCK |
|
- ▁COP |
|
- ▁HMM |
|
- ▁ASK |
|
- ▁BAB |
|
- ▁DARKER |
|
- ▁HEAR |
|
- ▁CHO |
|
- ▁CLOSE |
|
- ▁JACK |
|
- ▁FULL |
|
- ▁CUP |
|
- ▁WHE |
|
- ▁IDEA |
|
- ▁PIRATES |
|
- ▁SPE |
|
- ▁HEAD |
|
- ▁GIVE |
|
- ▁END |
|
- DER |
|
- ▁HAND |
|
- ▁BOXES |
|
- ▁BEST |
|
- ▁LEARNING |
|
- ▁MESS |
|
- ▁MOST |
|
- ▁FLA |
|
- LIT |
|
- ▁AC |
|
- ▁AHW |
|
- ▁FUH |
|
- ▁LU |
|
- ▁SSS |
|
- OWN |
|
- ▁PUH |
|
- ▁PW |
|
- POS |
|
- ▁CIRCLES |
|
- ENS |
|
- LK |
|
- ▁PLAYING |
|
- AIL |
|
- AP |
|
- PIT |
|
- NG |
|
- ▁LETTU |
|
- IK |
|
- DDING |
|
- HH |
|
- PPER |
|
- ▁GW |
|
- ABL |
|
- OL |
|
- ▁KID |
|
- DING |
|
- ▁KA |
|
- ERS |
|
- ▁FI |
|
- LIP |
|
- ▁SE |
|
- ▁TREES |
|
- UN |
|
- ▁RO |
|
- ATE |
|
- ND |
|
- ▁FO |
|
- ICE |
|
- IF |
|
- HW |
|
- AY |
|
- ▁BIGGE |
|
- UST |
|
- ▁DE |
|
- ▁KI |
|
- ▁LOS |
|
- ▁THA |
|
- ▁PAN |
|
- IL |
|
- MB |
|
- ▁BOO |
|
- SPE |
|
- 'NO' |
|
- ACK |
|
- ▁FIN |
|
- C |
|
- ▁GROUP |
|
- ▁GERM |
|
- EAD |
|
- ▁SOMETIME |
|
- LZ |
|
- IVE |
|
- UP |
|
- TWO |
|
- HIRT |
|
- HRO |
|
- JELLYFISH |
|
- ▁PAR |
|
- PART |
|
- IBO |
|
- WHAT |
|
- KEY |
|
- FOUR |
|
- AME |
|
- ANGE |
|
- EC |
|
- TIME |
|
- ▁REAL |
|
- ELEPHANT |
|
- ▁BATHROOM |
|
- ▁BIVY |
|
- ▁BRACES |
|
- ▁FLOWER |
|
- ▁GARFIELD |
|
- ▁GARGLE |
|
- ▁KOALA |
|
- ▁PROBLEMS |
|
- ▁SEVENEIGHTNINE |
|
- ▁STINKY |
|
- ▁SWORD |
|
- ▁UPPERCASE |
|
- EMBER |
|
- FUL |
|
- ▁SEPARAT |
|
- ▁BEFORE |
|
- ▁BROKE |
|
- ▁LOUD |
|
- ▁MONSTER |
|
- ▁MOUF |
|
- ▁POOP |
|
- ▁SHINNY |
|
- ▁DRAW |
|
- ▁MAILBOX |
|
- ▁HUNGRY |
|
- ▁BREAK |
|
- ▁SARA |
|
- ▁JOB |
|
- ▁WATCH |
|
- ▁SPARKL |
|
- ▁SHORT |
|
- ▁WEEK |
|
- ▁BIRD |
|
- ▁MOMMY |
|
- ▁LOOSE |
|
- ▁GREAT |
|
- ▁PRETTIER |
|
- ▁SMIL |
|
- ▁FACE |
|
- ▁HAV |
|
- ▁PIECE |
|
- ▁FUNNY |
|
- ▁UNDER |
|
- ▁SLOWER |
|
- ACT |
|
- ▁PLEA |
|
- ▁VEHA |
|
- ▁PEAR |
|
- ▁FEEL |
|
- ▁SPIDER |
|
- ▁WORSE |
|
- ▁SWI |
|
- ▁AYE |
|
- UNU |
|
- ▁EVER |
|
- ▁HOPE |
|
- ▁SIGN |
|
- AK |
|
- UIZ |
|
- ▁SOFT |
|
- ▁POP |
|
- ▁TEEH |
|
- ▁DEH |
|
- IBLE |
|
- ▁SIDEWAYS |
|
- ROT |
|
- ▁ORDER |
|
- ▁FINISHED |
|
- ▁JELLYFISH |
|
- ▁FELL |
|
- KEU |
|
- ▁IMPO |
|
- HEAD |
|
- UM |
|
- ▁PRESS |
|
- ▁SECONDS |
|
- ▁LEA |
|
- ▁MOLD |
|
- LLUH |
|
- ▁READ |
|
- ▁ONETWO |
|
- ▁LINE |
|
- FE |
|
- ▁FOH |
|
- ▁HOT |
|
- ▁FOU |
|
- ▁MOH |
|
- ▁DEN |
|
- ▁WIN |
|
- ▁NINETY |
|
- IRTY |
|
- ▁TWEE |
|
- OUR |
|
- IRED |
|
- TLE |
|
- ▁HEH |
|
- ▁JU |
|
- PASTE |
|
- ▁FEVER |
|
- ▁WR |
|
- ▁PAI |
|
- MINT |
|
- TEEN |
|
- ▁WASHING |
|
- ▁BI |
|
- ▁NAH |
|
- DY |
|
- ▁RA |
|
- ▁DA |
|
- AHW |
|
- ▁YUH |
|
- ULL |
|
- ▁WL |
|
- UHTY |
|
- ▁SHO |
|
- ▁CUH |
|
- ASTE |
|
- OOD |
|
- ▁LAM |
|
- ▁CI |
|
- OLD |
|
- UNN |
|
- NUH |
|
- OCK |
|
- US |
|
- ▁SM |
|
- MPLE |
|
- ▁HIT |
|
- ▁THRO |
|
- ▁DEU |
|
- HOLE |
|
- ▁THINKING |
|
- UBB |
|
- ▁FU |
|
- ▁PI |
|
- ▁SMO |
|
- ▁VO |
|
- AN |
|
- UG |
|
- ▁HM |
|
- UE |
|
- GLE |
|
- ▁MOV |
|
- LI |
|
- ▁BLU |
|
- PORT |
|
- ▁WED |
|
- ▁TRI |
|
- ▁CHE |
|
- CA |
|
- ▁SC |
|
- ▁STO |
|
- ▁BED |
|
- ▁TELLS |
|
- ▁MI |
|
- OR |
|
- TTER |
|
- NES |
|
- OUGH |
|
- ▁AR |
|
- ROW |
|
- UA |
|
- AB |
|
- IG |
|
- OF |
|
- MAN |
|
- RK |
|
- OUN |
|
- ROUGH |
|
- LUH |
|
- DENT |
|
- ▁PIE |
|
- LAP |
|
- KUH |
|
- OT |
|
- RSE |
|
- ▁LA |
|
- ▁PAST |
|
- ▁ANOTH |
|
- OP |
|
- EP |
|
- ▁LATE |
|
- AM |
|
- LU |
|
- ▁WOO |
|
- HUH |
|
- ▁CER |
|
- OU |
|
- IPP |
|
- ▁CO |
|
- EH |
|
- TE |
|
- WHERE |
|
- ASH |
|
- PPY |
|
- WAY |
|
- RO |
|
- SHE |
|
- OST |
|
- AIN |
|
- ▁SECOND |
|
- ▁PIRATE |
|
- ▁MINUTE |
|
- ABET |
|
- ▁DIFFEREN |
|
- BE |
|
- IGH |
|
- ▁COO |
|
- ▁WHA |
|
- BRUSH |
|
- TRA |
|
- ▁PRES |
|
- ▁TRYIN |
|
- ▁GIV |
|
- OPE |
|
- SHIN |
|
- STRO |
|
- SIGN |
|
- ▁PLU |
|
- ZEBRA |
|
- LION |
|
- CROCODILE |
|
- LATE |
|
- UF |
|
- EQUALS |
|
- COME |
|
- UBE |
|
- J |
|
- TOGETHER |
|
- MAYBE |
|
- BOX |
|
- CLEAN |
|
- THEY |
|
- JIBO |
|
- EASY |
|
- MOUTH |
|
- ▁TALK |
|
- ▁SKI |
|
- WHY |
|
- WICE |
|
- CAUSE |
|
- UMP |
|
- TRIC |
|
- ▁CLOS |
|
- ▁SEVE |
|
- ▁DIRT |
|
- ▁NUMB |
|
- YOU |
|
- PRI |
|
- ▁JIB |
|
- ETTIER |
|
- FFERENT |
|
- ERCASE |
|
- ROOM |
|
- ▁DIFF |
|
- ▁JELLY |
|
- ▁SEVENEIGHT |
|
- ORGE |
|
- ▁YELL |
|
- DRA |
|
- ▁SLOW |
|
- ▁MON |
|
- ▁BUG |
|
- YPE |
|
- ▁BRU |
|
- COL |
|
- PUS |
|
- WO |
|
- INET |
|
- NGRY |
|
- BRUSHING |
|
- ▁CUB |
|
- OCTO |
|
- HIC |
|
- UDE |
|
- RUB |
|
- MOR |
|
- LOCK |
|
- ▁BR |
|
- YOUR |
|
- ▁STR |
|
- ▁KNE |
|
- ▁CRO |
|
- ▁BO |
|
- UALLY |
|
- ▁TOOTHB |
|
- ▁ANYMO |
|
- UKU |
|
- ▁GUE |
|
- MA |
|
- ENTY |
|
- PHA |
|
- ▁QUE |
|
- PF |
|
- KE |
|
- NOW |
|
- ▁LAS |
|
- ▁SHIN |
|
- ARN |
|
- GE |
|
- ▁MAIL |
|
- RUSHING |
|
- Q |
|
- <sos/eos> |
|
init: null |
|
input_size: null |
|
ctc_conf: |
|
dropout_rate: 0.0 |
|
ctc_type: builtin |
|
reduce: true |
|
ignore_nan_grad: null |
|
zero_infinity: true |
|
brctc_risk_strategy: exp |
|
brctc_group_strategy: end |
|
brctc_risk_factor: 0.0 |
|
joint_net_conf: null |
|
use_preprocessor: true |
|
use_lang_prompt: false |
|
use_nlp_prompt: false |
|
token_type: bpe |
|
bpemodel: data/en_token_list/bpe_unigram1024/bpe.model |
|
non_linguistic_symbols: null |
|
cleaner: null |
|
g2p: null |
|
speech_volume_normalize: null |
|
rir_scp: null |
|
rir_apply_prob: 1.0 |
|
noise_scp: null |
|
noise_apply_prob: 1.0 |
|
noise_db_range: '13_15' |
|
short_noise_thres: 0.5 |
|
aux_ctc_tasks: [] |
|
frontend: s3prl |
|
frontend_conf: |
|
frontend_conf: |
|
upstream: wavlm_large |
|
download_dir: ./hub |
|
multilayer_feature: true |
|
fs: 16k |
|
specaug: specaug |
|
specaug_conf: |
|
apply_time_warp: true |
|
time_warp_window: 5 |
|
time_warp_mode: bicubic |
|
apply_freq_mask: true |
|
freq_mask_width_range: |
|
- 0 |
|
- 27 |
|
num_freq_mask: 2 |
|
apply_time_mask: true |
|
time_mask_width_ratio_range: |
|
- 0.0 |
|
- 0.05 |
|
num_time_mask: 5 |
|
normalize: utterance_mvn |
|
normalize_conf: {} |
|
model: espnet |
|
model_conf: |
|
ctc_weight: 0.3 |
|
lsm_weight: 0.1 |
|
length_normalized_loss: false |
|
extract_feats_in_collect_stats: false |
|
preencoder: linear |
|
preencoder_conf: |
|
input_size: 1024 |
|
output_size: 80 |
|
encoder: transformer |
|
encoder_conf: |
|
output_size: 256 |
|
attention_heads: 4 |
|
linear_units: 1024 |
|
num_blocks: 18 |
|
dropout_rate: 0.1 |
|
positional_dropout_rate: 0.1 |
|
attention_dropout_rate: 0.1 |
|
input_layer: conv2d2 |
|
normalize_before: true |
|
postencoder: null |
|
postencoder_conf: {} |
|
decoder: transformer |
|
decoder_conf: |
|
attention_heads: 4 |
|
linear_units: 2048 |
|
num_blocks: 6 |
|
dropout_rate: 0.1 |
|
positional_dropout_rate: 0.1 |
|
self_attention_dropout_rate: 0.1 |
|
src_attention_dropout_rate: 0.1 |
|
preprocessor: default |
|
preprocessor_conf: {} |
|
required: |
|
- output_dir |
|
- token_list |
|
version: '202402' |
|
distributed: false |
|
``` |
|
|
|
</details> |
|
|
|
|
|
|
|
### Citing ESPnet |
|
|
|
```BibTex |
|
@inproceedings{watanabe2018espnet, |
|
author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai}, |
|
title={{ESPnet}: End-to-End Speech Processing Toolkit}, |
|
year={2018}, |
|
booktitle={Proceedings of Interspeech}, |
|
pages={2207--2211}, |
|
doi={10.21437/Interspeech.2018-1456}, |
|
url={http://dx.doi.org/10.21437/Interspeech.2018-1456} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
``` |
|
|
|
or arXiv: |
|
|
|
```bibtex |
|
@misc{watanabe2018espnet, |
|
title={ESPnet: End-to-End Speech Processing Toolkit}, |
|
author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai}, |
|
year={2018}, |
|
eprint={1804.00015}, |
|
archivePrefix={arXiv}, |
|
primaryClass={cs.CL} |
|
} |
|
``` |
|
|