{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# HuggingFace challenge - Debugger notebook\n", "Run this notebook to verify your libraries versions, check GPU config and run a quick training" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "T2utsYSKszvv" }, "outputs": [], "source": [ "import platform\n", "import multiprocessing\n", "\n", "import torch\n", "import transformers\n", "import datasets\n", "\n", "import soundfile" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Print main infos" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "5P6I-W9ts-kR", "outputId": "939bd550-1486-46a6-8371-e82ada0f448c" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Platform: Linux-5.11.0-37-generic-x86_64-with-glibc2.10\n", "CPU cores: 60\n", "Python version: 3.8.8\n", "PyTorch version: 1.10.1+cu102\n", "GPU is visible: True\n", "Transformers version: 4.16.0.dev0\n", "Datasets version: 1.17.1.dev0\n", "soundfile version: 0.10.3\n" ] } ], "source": [ "print(f\"Platform: {platform.platform()}\")\n", "print(f\"CPU cores: {multiprocessing.cpu_count()}\")\n", "\n", "print(f\"Python version: {platform.python_version()}\")\n", "\n", "print(f\"PyTorch version: {torch.__version__}\")\n", "print(f\"GPU is visible: {torch.cuda.is_available()}\")\n", "\n", "print(f\"Transformers version: {transformers.__version__}\")\n", "print(f\"Datasets version: {datasets.__version__}\")\n", "\n", "print(f\"soundfile version: {soundfile.__version__}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Check your GPU informations (if any)\n", "If you launched an AI Training job with GPU resources, they should be listed below (Tesla V100s 32GB).\n", "Driver and CUDA version " ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "YT7fRnKctggU", "outputId": "f355a3e0-20da-489f-bd1f-5e508e792a68" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Thu Jan 27 11:15:57 2022 \n", "+-----------------------------------------------------------------------------+\n", "| NVIDIA-SMI 470.57.02 Driver Version: 470.57.02 CUDA Version: 11.4 |\n", "|-------------------------------+----------------------+----------------------+\n", "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", "| | | MIG M. |\n", "|===============================+======================+======================|\n", "| 0 Tesla V100S-PCI... Off | 00000000:00:06.0 Off | 0 |\n", "| N/A 35C P0 25W / 250W | 4MiB / 32510MiB | 0% Default |\n", "| | | N/A |\n", "+-------------------------------+----------------------+----------------------+\n", " \n", "+-----------------------------------------------------------------------------+\n", "| Processes: |\n", "| GPU GI CI PID Type Process name GPU Memory |\n", "| ID ID Usage |\n", "|=============================================================================|\n", "| No running processes found |\n", "+-----------------------------------------------------------------------------+\n" ] } ], "source": [ "!nvidia-smi" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2fa897b4afc049229144599af9e3f807", "version_major": 2, "version_minor": 0 }, "text/plain": [ "VBox(children=(HTML(value='<center>\\n<img src=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from huggingface_hub import notebook_login\n", "\n", "notebook_login()" ] }, { "cell_type": "markdown", "metadata": { "id": "TorMtpwPv6RQ" }, "source": [ "## Quick training run with a dummy model and data\n", "more information on https://github.com/huggingface/transformers/tree/master/examples/pytorch/speech-recognition" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "fevoJD15u4Ss", "outputId": "5861d34e-745b-45ee-e780-ed363043e655" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--2022-01-22 15:01:09-- https://raw.githubusercontent.com/huggingface/transformers/master/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.109.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 30348 (30K) [text/plain]\n", "Saving to: ‘run_speech_recognition_ctc.py’\n", "\n", "run_speech_recognit 100%[===================>] 29.64K --.-KB/s in 0.001s \n", "\n", "2022-01-22 15:01:09 (20.1 MB/s) - ‘run_speech_recognition_ctc.py’ saved [30348/30348]\n", "\n" ] } ], "source": [ "!wget -O run_speech_recognition_ctc.py https://raw.githubusercontent.com/huggingface/transformers/master/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# \t--learning_rate=\"7.5e-5\" \\\n", "# 84.5" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Mz4bubhxxsad", "outputId": "23398525-cc19-43c2-9fec-497e06214f29" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "01/27/2022 12:08:42 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: True\n", "01/27/2022 12:08:42 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", "_n_gpu=1,\n", "adafactor=False,\n", "adam_beta1=0.9,\n", "adam_beta2=0.999,\n", "adam_epsilon=1e-08,\n", "bf16=False,\n", "bf16_full_eval=False,\n", "dataloader_drop_last=False,\n", "dataloader_num_workers=0,\n", "dataloader_pin_memory=True,\n", "ddp_bucket_cap_mb=None,\n", "ddp_find_unused_parameters=None,\n", "debug=[],\n", "deepspeed=None,\n", "disable_tqdm=False,\n", "do_eval=True,\n", "do_predict=False,\n", "do_train=True,\n", "eval_accumulation_steps=None,\n", "eval_steps=500,\n", "evaluation_strategy=IntervalStrategy.STEPS,\n", "fp16=True,\n", "fp16_backend=auto,\n", "fp16_full_eval=False,\n", "fp16_opt_level=O1,\n", "gradient_accumulation_steps=1,\n", "gradient_checkpointing=True,\n", "greater_is_better=None,\n", "group_by_length=True,\n", "half_precision_backend=auto,\n", "hub_model_id=None,\n", "hub_strategy=HubStrategy.EVERY_SAVE,\n", "hub_token=<HUB_TOKEN>,\n", "ignore_data_skip=False,\n", "label_names=None,\n", "label_smoothing_factor=0.0,\n", "learning_rate=0.0003,\n", "length_column_name=input_length,\n", "load_best_model_at_end=False,\n", "local_rank=-1,\n", "log_level=-1,\n", "log_level_replica=-1,\n", "log_on_each_node=True,\n", "logging_dir=./wav2vec2-large-xls-r-300m-georgian/runs/Jan27_12-08-42_job-8be8b741-e32e-4579-bbec-1e00d9824b4f,\n", "logging_first_step=False,\n", "logging_nan_inf_filter=True,\n", "logging_steps=100,\n", "logging_strategy=IntervalStrategy.STEPS,\n", "lr_scheduler_type=SchedulerType.LINEAR,\n", "max_grad_norm=1.0,\n", "max_steps=-1,\n", "metric_for_best_model=None,\n", "mp_parameters=,\n", "no_cuda=False,\n", "num_train_epochs=100.0,\n", "optim=OptimizerNames.ADAMW_HF,\n", "output_dir=./wav2vec2-large-xls-r-300m-georgian,\n", "overwrite_output_dir=True,\n", "past_index=-1,\n", "per_device_eval_batch_size=32,\n", "per_device_train_batch_size=32,\n", "prediction_loss_only=False,\n", "push_to_hub=True,\n", "push_to_hub_model_id=None,\n", "push_to_hub_organization=None,\n", "push_to_hub_token=<PUSH_TO_HUB_TOKEN>,\n", "remove_unused_columns=True,\n", "report_to=[],\n", "resume_from_checkpoint=None,\n", "run_name=./wav2vec2-large-xls-r-300m-georgian,\n", "save_on_each_node=False,\n", "save_steps=500,\n", "save_strategy=IntervalStrategy.STEPS,\n", "save_total_limit=2,\n", "seed=42,\n", "sharded_ddp=[],\n", "skip_memory_metrics=True,\n", "tf32=None,\n", "tpu_metrics_debug=False,\n", "tpu_num_cores=None,\n", "use_legacy_prediction_loop=False,\n", "warmup_ratio=0.0,\n", "warmup_steps=500,\n", "weight_decay=0.0,\n", "xpu_backend=None,\n", ")\n", "01/27/2022 12:08:44 - WARNING - datasets.builder - Reusing dataset common_voice (/workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/ka/7.0.0/fe20cac47c166e25b1f096ab661832e3da7cf298ed4a91dcaa1343ad972d175b)\n", "01/27/2022 12:08:46 - WARNING - datasets.builder - Reusing dataset common_voice (/workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/ka/7.0.0/fe20cac47c166e25b1f096ab661832e3da7cf298ed4a91dcaa1343ad972d175b)\n", "01/27/2022 12:08:46 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/ka/7.0.0/fe20cac47c166e25b1f096ab661832e3da7cf298ed4a91dcaa1343ad972d175b/cache-ee96a598f17b1f41.arrow\n", "01/27/2022 12:08:46 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/ka/7.0.0/fe20cac47c166e25b1f096ab661832e3da7cf298ed4a91dcaa1343ad972d175b/cache-d682850d734dbca0.arrow\n", "loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/config.json from cache at /workspace/.cache/huggingface/transformers/dabc27df63e37bd2a7a221c7774e35f36a280fbdf917cf54cadfc7df8c786f6f.a3e4c3c967d9985881e0ae550a5f6f668f897db5ab2e0802f9b97973b15970e6\n", "Model config Wav2Vec2Config {\n", " \"_name_or_path\": \"facebook/wav2vec2-xls-r-300m\",\n", " \"activation_dropout\": 0.0,\n", " \"adapter_kernel_size\": 3,\n", " \"adapter_stride\": 2,\n", " \"add_adapter\": false,\n", " \"apply_spec_augment\": true,\n", " \"architectures\": [\n", " \"Wav2Vec2ForPreTraining\"\n", " ],\n", " \"attention_dropout\": 0.1,\n", " \"bos_token_id\": 1,\n", " \"classifier_proj_size\": 256,\n", " \"codevector_dim\": 768,\n", " \"contrastive_logits_temperature\": 0.1,\n", " \"conv_bias\": true,\n", " \"conv_dim\": [\n", " 512,\n", " 512,\n", " 512,\n", " 512,\n", " 512,\n", " 512,\n", " 512\n", " ],\n", " \"conv_kernel\": [\n", " 10,\n", " 3,\n", " 3,\n", " 3,\n", " 3,\n", " 2,\n", " 2\n", " ],\n", " \"conv_stride\": [\n", " 5,\n", " 2,\n", " 2,\n", " 2,\n", " 2,\n", " 2,\n", " 2\n", " ],\n", " \"ctc_loss_reduction\": \"sum\",\n", " \"ctc_zero_infinity\": false,\n", " \"diversity_loss_weight\": 0.1,\n", " \"do_stable_layer_norm\": true,\n", " \"eos_token_id\": 2,\n", " \"feat_extract_activation\": \"gelu\",\n", " \"feat_extract_dropout\": 0.0,\n", " \"feat_extract_norm\": \"layer\",\n", " \"feat_proj_dropout\": 0.1,\n", " \"feat_quantizer_dropout\": 0.0,\n", " \"final_dropout\": 0.0,\n", " \"gradient_checkpointing\": false,\n", " \"hidden_act\": \"gelu\",\n", " \"hidden_dropout\": 0.1,\n", " \"hidden_size\": 1024,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 4096,\n", " \"layer_norm_eps\": 1e-05,\n", " \"layerdrop\": 0.1,\n", " \"mask_feature_length\": 10,\n", " \"mask_feature_min_masks\": 0,\n", " \"mask_feature_prob\": 0.0,\n", " \"mask_time_length\": 10,\n", " \"mask_time_min_masks\": 2,\n", " \"mask_time_prob\": 0.075,\n", " \"model_type\": \"wav2vec2\",\n", " \"num_adapter_layers\": 3,\n", " \"num_attention_heads\": 16,\n", " \"num_codevector_groups\": 2,\n", " \"num_codevectors_per_group\": 320,\n", " \"num_conv_pos_embedding_groups\": 16,\n", " \"num_conv_pos_embeddings\": 128,\n", " \"num_feat_extract_layers\": 7,\n", " \"num_hidden_layers\": 24,\n", " \"num_negatives\": 100,\n", " \"output_hidden_size\": 1024,\n", " \"pad_token_id\": 0,\n", " \"proj_codevector_dim\": 768,\n", " \"tdnn_dilation\": [\n", " 1,\n", " 2,\n", " 3,\n", " 1,\n", " 1\n", " ],\n", " \"tdnn_dim\": [\n", " 512,\n", " 512,\n", " 512,\n", " 512,\n", " 1500\n", " ],\n", " \"tdnn_kernel\": [\n", " 5,\n", " 3,\n", " 3,\n", " 1,\n", " 1\n", " ],\n", " \"torch_dtype\": \"float32\",\n", " \"transformers_version\": \"4.16.0.dev0\",\n", " \"use_weighted_layer_sum\": false,\n", " \"vocab_size\": 32,\n", " \"xvector_output_dim\": 512\n", "}\n", "\n", "100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 4.06ba/s]\n", "100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 17.06ba/s]\n", "Didn't find file ./wav2vec2-large-xls-r-300m-georgian/tokenizer.json. We won't load it.\n", "loading file ./wav2vec2-large-xls-r-300m-georgian/vocab.json\n", "loading file ./wav2vec2-large-xls-r-300m-georgian/tokenizer_config.json\n", "loading file ./wav2vec2-large-xls-r-300m-georgian/added_tokens.json\n", "loading file ./wav2vec2-large-xls-r-300m-georgian/special_tokens_map.json\n", "loading file None\n", "Adding <s> to the vocabulary\n", "Adding </s> to the vocabulary\n", "loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/config.json from cache at /workspace/.cache/huggingface/transformers/dabc27df63e37bd2a7a221c7774e35f36a280fbdf917cf54cadfc7df8c786f6f.a3e4c3c967d9985881e0ae550a5f6f668f897db5ab2e0802f9b97973b15970e6\n", "Model config Wav2Vec2Config {\n", " \"_name_or_path\": \"facebook/wav2vec2-xls-r-300m\",\n", " \"activation_dropout\": 0.0,\n", " \"adapter_kernel_size\": 3,\n", " \"adapter_stride\": 2,\n", " \"add_adapter\": false,\n", " \"apply_spec_augment\": true,\n", " \"architectures\": [\n", " \"Wav2Vec2ForPreTraining\"\n", " ],\n", " \"attention_dropout\": 0.1,\n", " \"bos_token_id\": 1,\n", " \"classifier_proj_size\": 256,\n", " \"codevector_dim\": 768,\n", " \"contrastive_logits_temperature\": 0.1,\n", " \"conv_bias\": true,\n", " \"conv_dim\": [\n", " 512,\n", " 512,\n", " 512,\n", " 512,\n", " 512,\n", " 512,\n", " 512\n", " ],\n", " \"conv_kernel\": [\n", " 10,\n", " 3,\n", " 3,\n", " 3,\n", " 3,\n", " 2,\n", " 2\n", " ],\n", " \"conv_stride\": [\n", " 5,\n", " 2,\n", " 2,\n", " 2,\n", " 2,\n", " 2,\n", " 2\n", " ],\n", " \"ctc_loss_reduction\": \"sum\",\n", " \"ctc_zero_infinity\": false,\n", " \"diversity_loss_weight\": 0.1,\n", " \"do_stable_layer_norm\": true,\n", " \"eos_token_id\": 2,\n", " \"feat_extract_activation\": \"gelu\",\n", " \"feat_extract_dropout\": 0.0,\n", " \"feat_extract_norm\": \"layer\",\n", " \"feat_proj_dropout\": 0.1,\n", " \"feat_quantizer_dropout\": 0.0,\n", " \"final_dropout\": 0.0,\n", " \"gradient_checkpointing\": false,\n", " \"hidden_act\": \"gelu\",\n", " \"hidden_dropout\": 0.1,\n", " \"hidden_size\": 1024,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 4096,\n", " \"layer_norm_eps\": 1e-05,\n", " \"layerdrop\": 0.1,\n", " \"mask_feature_length\": 10,\n", " \"mask_feature_min_masks\": 0,\n", " \"mask_feature_prob\": 0.0,\n", " \"mask_time_length\": 10,\n", " \"mask_time_min_masks\": 2,\n", " \"mask_time_prob\": 0.075,\n", " \"model_type\": \"wav2vec2\",\n", " \"num_adapter_layers\": 3,\n", " \"num_attention_heads\": 16,\n", " \"num_codevector_groups\": 2,\n", " \"num_codevectors_per_group\": 320,\n", " \"num_conv_pos_embedding_groups\": 16,\n", " \"num_conv_pos_embeddings\": 128,\n", " \"num_feat_extract_layers\": 7,\n", " \"num_hidden_layers\": 24,\n", " \"num_negatives\": 100,\n", " \"output_hidden_size\": 1024,\n", " \"pad_token_id\": 0,\n", " \"proj_codevector_dim\": 768,\n", " \"tdnn_dilation\": [\n", " 1,\n", " 2,\n", " 3,\n", " 1,\n", " 1\n", " ],\n", " \"tdnn_dim\": [\n", " 512,\n", " 512,\n", " 512,\n", " 512,\n", " 1500\n", " ],\n", " \"tdnn_kernel\": [\n", " 5,\n", " 3,\n", " 3,\n", " 1,\n", " 1\n", " ],\n", " \"torch_dtype\": \"float32\",\n", " \"transformers_version\": \"4.16.0.dev0\",\n", " \"use_weighted_layer_sum\": false,\n", " \"vocab_size\": 32,\n", " \"xvector_output_dim\": 512\n", "}\n", "\n", "loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/preprocessor_config.json from cache at /workspace/.cache/huggingface/transformers/6fb028b95b394059e7d3b367bbca2382b576c66aebe896f04d2cd34e1b575f5b.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326\n", "Feature extractor Wav2Vec2FeatureExtractor {\n", " \"do_normalize\": true,\n", " \"feature_extractor_type\": \"Wav2Vec2FeatureExtractor\",\n", " \"feature_size\": 1,\n", " \"padding_side\": \"right\",\n", " \"padding_value\": 0,\n", " \"return_attention_mask\": true,\n", " \"sampling_rate\": 16000\n", "}\n", "\n", "loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/pytorch_model.bin from cache at /workspace/.cache/huggingface/transformers/1e6a6507f3b689035cd4b247e2a37c154e27f39143f31357a49b4e38baeccc36.1edb32803799e27ed554eb7dd935f6745b1a0b17b0ea256442fe24db6eb546cd\n", "Some weights of the model checkpoint at facebook/wav2vec2-xls-r-300m were not used when initializing Wav2Vec2ForCTC: ['quantizer.weight_proj.bias', 'project_q.weight', 'quantizer.codevectors', 'project_hid.weight', 'quantizer.weight_proj.weight', 'project_q.bias', 'project_hid.bias']\n", "- This IS expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-300m and are newly initialized: ['lm_head.bias', 'lm_head.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", "preprocess datasets: 100%|█████████████████| 2686/2686 [00:26<00:00, 101.24ex/s]\n", "preprocess datasets: 100%|█████████████████| 1225/1225 [00:11<00:00, 107.06ex/s]\n", "100%|████████████████████████████████████████████| 3/3 [00:00<00:00, 473.24ba/s]\n", "100%|████████████████████████████████████████████| 2/2 [00:00<00:00, 625.36ba/s]\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/preprocessor_config.json\n", "tokenizer config file saved in ./wav2vec2-large-xls-r-300m-georgian/tokenizer_config.json\n", "Special tokens file saved in ./wav2vec2-large-xls-r-300m-georgian/special_tokens_map.json\n", "added tokens file saved in ./wav2vec2-large-xls-r-300m-georgian/added_tokens.json\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/config.json\n", "loading feature extractor configuration file ./wav2vec2-large-xls-r-300m-georgian/preprocessor_config.json\n", "loading configuration file ./wav2vec2-large-xls-r-300m-georgian/config.json\n", "Model config Wav2Vec2Config {\n", " \"_name_or_path\": \"./wav2vec2-large-xls-r-300m-georgian\",\n", " \"activation_dropout\": 0.1,\n", " \"adapter_kernel_size\": 3,\n", " \"adapter_stride\": 2,\n", " \"add_adapter\": false,\n", " \"apply_spec_augment\": true,\n", " \"architectures\": [\n", " \"Wav2Vec2ForPreTraining\"\n", " ],\n", " \"attention_dropout\": 0.0,\n", " \"bos_token_id\": 1,\n", " \"classifier_proj_size\": 256,\n", " \"codevector_dim\": 768,\n", " \"contrastive_logits_temperature\": 0.1,\n", " \"conv_bias\": true,\n", " \"conv_dim\": [\n", " 512,\n", " 512,\n", " 512,\n", " 512,\n", " 512,\n", " 512,\n", " 512\n", " ],\n", " \"conv_kernel\": [\n", " 10,\n", " 3,\n", " 3,\n", " 3,\n", " 3,\n", " 2,\n", " 2\n", " ],\n", " \"conv_stride\": [\n", " 5,\n", " 2,\n", " 2,\n", " 2,\n", " 2,\n", " 2,\n", " 2\n", " ],\n", " \"ctc_loss_reduction\": \"mean\",\n", " \"ctc_zero_infinity\": false,\n", " \"diversity_loss_weight\": 0.1,\n", " \"do_stable_layer_norm\": true,\n", " \"eos_token_id\": 2,\n", " \"feat_extract_activation\": \"gelu\",\n", " \"feat_extract_dropout\": 0.0,\n", " \"feat_extract_norm\": \"layer\",\n", " \"feat_proj_dropout\": 0.0,\n", " \"feat_quantizer_dropout\": 0.0,\n", " \"final_dropout\": 0.0,\n", " \"hidden_act\": \"gelu\",\n", " \"hidden_dropout\": 0.0,\n", " \"hidden_size\": 1024,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 4096,\n", " \"layer_norm_eps\": 1e-05,\n", " \"layerdrop\": 0.0,\n", " \"mask_feature_length\": 64,\n", " \"mask_feature_min_masks\": 0,\n", " \"mask_feature_prob\": 0.25,\n", " \"mask_time_length\": 10,\n", " \"mask_time_min_masks\": 2,\n", " \"mask_time_prob\": 0.75,\n", " \"model_type\": \"wav2vec2\",\n", " \"num_adapter_layers\": 3,\n", " \"num_attention_heads\": 16,\n", " \"num_codevector_groups\": 2,\n", " \"num_codevectors_per_group\": 320,\n", " \"num_conv_pos_embedding_groups\": 16,\n", " \"num_conv_pos_embeddings\": 128,\n", " \"num_feat_extract_layers\": 7,\n", " \"num_hidden_layers\": 24,\n", " \"num_negatives\": 100,\n", " \"output_hidden_size\": 1024,\n", " \"pad_token_id\": 36,\n", " \"proj_codevector_dim\": 768,\n", " \"tdnn_dilation\": [\n", " 1,\n", " 2,\n", " 3,\n", " 1,\n", " 1\n", " ],\n", " \"tdnn_dim\": [\n", " 512,\n", " 512,\n", " 512,\n", " 512,\n", " 1500\n", " ],\n", " \"tdnn_kernel\": [\n", " 5,\n", " 3,\n", " 3,\n", " 1,\n", " 1\n", " ],\n", " \"torch_dtype\": \"float32\",\n", " \"transformers_version\": \"4.16.0.dev0\",\n", " \"use_weighted_layer_sum\": false,\n", " \"vocab_size\": 39,\n", " \"xvector_output_dim\": 512\n", "}\n", "\n", "loading feature extractor configuration file ./wav2vec2-large-xls-r-300m-georgian/preprocessor_config.json\n", "Feature extractor Wav2Vec2FeatureExtractor {\n", " \"do_normalize\": true,\n", " \"feature_extractor_type\": \"Wav2Vec2FeatureExtractor\",\n", " \"feature_size\": 1,\n", " \"padding_side\": \"right\",\n", " \"padding_value\": 0,\n", " \"return_attention_mask\": true,\n", " \"sampling_rate\": 16000\n", "}\n", "\n", "Didn't find file ./wav2vec2-large-xls-r-300m-georgian/tokenizer.json. We won't load it.\n", "loading file ./wav2vec2-large-xls-r-300m-georgian/vocab.json\n", "loading file ./wav2vec2-large-xls-r-300m-georgian/tokenizer_config.json\n", "loading file ./wav2vec2-large-xls-r-300m-georgian/added_tokens.json\n", "loading file ./wav2vec2-large-xls-r-300m-georgian/special_tokens_map.json\n", "loading file None\n", "Adding <s> to the vocabulary\n", "Adding </s> to the vocabulary\n", "/workspace/votic_training/./wav2vec2-large-xls-r-300m-georgian is already a clone of https://huggingface.co/infinitejoy/wav2vec2-large-xls-r-300m-georgian. Make sure you pull the latest changes with `repo.git_pull()`.\n", "01/27/2022 12:09:40 - WARNING - huggingface_hub.repository - /workspace/votic_training/./wav2vec2-large-xls-r-300m-georgian is already a clone of https://huggingface.co/infinitejoy/wav2vec2-large-xls-r-300m-georgian. Make sure you pull the latest changes with `repo.git_pull()`.\n", "Using amp half precision backend\n", "The following columns in the training set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", "/opt/conda/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", " warnings.warn(\n", "***** Running training *****\n", " Num examples = 2686\n", " Num Epochs = 100\n", " Instantaneous batch size per device = 32\n", " Total train batch size (w. parallel, distributed & accumulation) = 32\n", " Gradient Accumulation steps = 1\n", " Total optimization steps = 8400\n", "{'loss': 6.951, 'learning_rate': 5.88e-05, 'epoch': 1.19} \n", "{'loss': 3.1694, 'learning_rate': 0.0001188, 'epoch': 2.38} \n", "{'loss': 3.0357, 'learning_rate': 0.00017879999999999998, 'epoch': 3.57} \n", "{'loss': 2.8568, 'learning_rate': 0.0002388, 'epoch': 4.76} \n", "{'loss': 1.8805, 'learning_rate': 0.0002988, 'epoch': 5.95} \n", " 6%|██▎ | 500/8400 [14:21<2:21:00, 1.07s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", "***** Running Evaluation *****\n", " Num examples = 1225\n", " Batch size = 32\n", "\n", " 0%| | 0/39 [00:00<?, ?it/s]\u001b[A\n", " 5%|██▎ | 2/39 [00:01<00:28, 1.30it/s]\u001b[A\n", " 8%|███▍ | 3/39 [00:03<00:42, 1.18s/it]\u001b[A\n", " 10%|████▌ | 4/39 [00:04<00:45, 1.31s/it]\u001b[A\n", " 13%|█████▋ | 5/39 [00:06<00:46, 1.38s/it]\u001b[A\n", " 15%|██████▊ | 6/39 [00:07<00:45, 1.39s/it]\u001b[A\n", " 18%|███████▉ | 7/39 [00:09<00:45, 1.43s/it]\u001b[A\n", " 21%|█████████ | 8/39 [00:10<00:45, 1.47s/it]\u001b[A\n", " 23%|██████████▏ | 9/39 [00:12<00:45, 1.51s/it]\u001b[A\n", " 26%|███████████ | 10/39 [00:13<00:43, 1.51s/it]\u001b[A\n", " 28%|████████████▏ | 11/39 [00:15<00:41, 1.48s/it]\u001b[A\n", " 31%|█████████████▏ | 12/39 [00:16<00:40, 1.51s/it]\u001b[A\n", " 33%|██████████████▎ | 13/39 [00:18<00:38, 1.50s/it]\u001b[A\n", " 36%|███████████████▍ | 14/39 [00:19<00:36, 1.47s/it]\u001b[A\n", " 38%|████████████████▌ | 15/39 [00:21<00:34, 1.44s/it]\u001b[A\n", " 41%|█████████████████▋ | 16/39 [00:22<00:34, 1.48s/it]\u001b[A\n", " 44%|██████████████████▋ | 17/39 [00:24<00:32, 1.47s/it]\u001b[A\n", " 46%|███████████████████▊ | 18/39 [00:25<00:27, 1.33s/it]\u001b[A\n", " 49%|████████████████████▉ | 19/39 [00:26<00:24, 1.23s/it]\u001b[A\n", " 51%|██████████████████████ | 20/39 [00:27<00:21, 1.15s/it]\u001b[A\n", " 54%|███████████████████████▏ | 21/39 [00:28<00:19, 1.08s/it]\u001b[A\n", " 56%|████████████████████████▎ | 22/39 [00:28<00:17, 1.02s/it]\u001b[A\n", " 59%|█████████████████████████▎ | 23/39 [00:29<00:15, 1.01it/s]\u001b[A\n", " 62%|██████████████████████████▍ | 24/39 [00:30<00:14, 1.02it/s]\u001b[A\n", " 64%|███████████████████████████▌ | 25/39 [00:31<00:13, 1.01it/s]\u001b[A\n", " 67%|████████████████████████████▋ | 26/39 [00:32<00:13, 1.01s/it]\u001b[A\n", " 69%|█████████████████████████████▊ | 27/39 [00:34<00:12, 1.04s/it]\u001b[A\n", " 72%|██████████████████████████████▊ | 28/39 [00:34<00:10, 1.01it/s]\u001b[A\n", " 74%|███████████████████████████████▉ | 29/39 [00:35<00:10, 1.01s/it]\u001b[A\n", " 77%|█████████████████████████████████ | 30/39 [00:37<00:09, 1.04s/it]\u001b[A\n", " 79%|██████████████████████████████████▏ | 31/39 [00:38<00:08, 1.03s/it]\u001b[A\n", " 82%|███████████████████████████████████▎ | 32/39 [00:39<00:07, 1.05s/it]\u001b[A\n", " 85%|████████████████████████████████████▍ | 33/39 [00:40<00:06, 1.06s/it]\u001b[A\n", " 87%|█████████████████████████████████████▍ | 34/39 [00:41<00:05, 1.03s/it]\u001b[A\n", " 90%|██████████████████████████████████████▌ | 35/39 [00:42<00:03, 1.02it/s]\u001b[A\n", " 92%|███████████████████████████████████████▋ | 36/39 [00:43<00:02, 1.01it/s]\u001b[A\n", " 95%|████████████████████████████████████████▊ | 37/39 [00:44<00:01, 1.00it/s]\u001b[A\n", " 97%|█████████████████████████████████████████▉ | 38/39 [00:45<00:00, 1.01it/s]\u001b[A\n", " \u001b[A\n", "\u001b[A{'eval_loss': 0.7547174692153931, 'eval_wer': 0.8438120450033091, 'eval_runtime': 47.8349, 'eval_samples_per_second': 25.609, 'eval_steps_per_second': 0.815, 'epoch': 5.95}\n", " 6%|██▎ | 500/8400 [15:08<2:21:00, 1.07s/it]\n", "100%|███████████████████████████████████████████| 39/39 [00:46<00:00, 1.31it/s]\u001b[A\n", " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-georgian/checkpoint-500\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-500/config.json\n", "Model weights saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-500/pytorch_model.bin\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-500/preprocessor_config.json\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/preprocessor_config.json\n", "{'loss': 1.5197, 'learning_rate': 0.0002962784810126582, 'epoch': 7.14} \n", "{'loss': 1.3887, 'learning_rate': 0.0002924810126582278, 'epoch': 8.33} \n", "{'loss': 1.3124, 'learning_rate': 0.00028868354430379743, 'epoch': 9.52} \n", "{'loss': 1.2433, 'learning_rate': 0.00028488607594936706, 'epoch': 10.71} \n", " 12%|████▍ | 992/8400 [29:50<4:07:05, 2.00s/it]\n", " 13%|█████▋ | 5/39 [00:06<00:47, 1.40s/it]\u001b[A\n", " 15%|██████▊ | 6/39 [00:07<00:46, 1.42s/it]\u001b[A\n", " 18%|███████▉ | 7/39 [00:09<00:46, 1.46s/it]\u001b[A\n", " 21%|█████████ | 8/39 [00:10<00:46, 1.48s/it]\u001b[A\n", " 23%|██████████▏ | 9/39 [00:12<00:46, 1.54s/it]\u001b[A\n", " 26%|███████████ | 10/39 [00:14<00:44, 1.54s/it]\u001b[A\n", "{'loss': 1.1794, 'learning_rate': 0.0002772911392405063, 'epoch': 13.1} \u001b[A\n", "{'loss': 1.1509, 'learning_rate': 0.0002734936708860759, 'epoch': 14.29} \n", "{'loss': 1.1197, 'learning_rate': 0.0002696962025316455, 'epoch': 15.48} \n", "{'loss': 1.0924, 'learning_rate': 0.00026589873417721515, 'epoch': 16.67} \n", "{'loss': 1.0822, 'learning_rate': 0.0002621012658227848, 'epoch': 17.86} \n", " 18%|██████▌ | 1500/8400 [46:33<3:10:32, 1.66s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", "***** Running Evaluation *****\n", " Num examples = 1225\n", " Batch size = 32\n", "\n", " 0%| | 0/39 [00:00<?, ?it/s]\u001b[A\n", " 5%|██▎ | 2/39 [00:01<00:27, 1.33it/s]\u001b[A\n", " 8%|███▍ | 3/39 [00:03<00:41, 1.16s/it]\u001b[A\n", " 10%|████▌ | 4/39 [00:04<00:45, 1.29s/it]\u001b[A\n", " 13%|█████▋ | 5/39 [00:06<00:47, 1.39s/it]\u001b[A\n", " 15%|██████▊ | 6/39 [00:07<00:46, 1.40s/it]\u001b[A\n", " 18%|███████▉ | 7/39 [00:09<00:46, 1.44s/it]\u001b[A\n", " 21%|█████████ | 8/39 [00:10<00:45, 1.47s/it]\u001b[A\n", " 23%|██████████▏ | 9/39 [00:12<00:45, 1.51s/it]\u001b[A\n", " 26%|███████████ | 10/39 [00:13<00:43, 1.52s/it]\u001b[A\n", " 28%|████████████▏ | 11/39 [00:15<00:41, 1.49s/it]\u001b[A\n", " 31%|█████████████▏ | 12/39 [00:16<00:41, 1.53s/it]\u001b[A\n", " 33%|██████████████▎ | 13/39 [00:18<00:39, 1.51s/it]\u001b[A\n", " 36%|███████████████▍ | 14/39 [00:19<00:37, 1.48s/it]\u001b[A\n", " 38%|████████████████▌ | 15/39 [00:21<00:34, 1.45s/it]\u001b[A\n", " 41%|█████████████████▋ | 16/39 [00:22<00:34, 1.49s/it]\u001b[A\n", " 44%|██████████████████▋ | 17/39 [00:24<00:32, 1.48s/it]\u001b[A\n", " 46%|███████████████████▊ | 18/39 [00:25<00:27, 1.33s/it]\u001b[A\n", " 49%|████████████████████▉ | 19/39 [00:26<00:24, 1.22s/it]\u001b[A\n", " 51%|██████████████████████ | 20/39 [00:27<00:21, 1.14s/it]\u001b[A\n", " 54%|███████████████████████▏ | 21/39 [00:28<00:19, 1.07s/it]\u001b[A\n", " 56%|████████████████████████▎ | 22/39 [00:28<00:17, 1.01s/it]\u001b[A\n", " 59%|█████████████████████████▎ | 23/39 [00:29<00:15, 1.01it/s]\u001b[A\n", " 62%|██████████████████████████▍ | 24/39 [00:30<00:14, 1.02it/s]\u001b[A\n", " 64%|███████████████████████████▌ | 25/39 [00:31<00:13, 1.01it/s]\u001b[A\n", " 67%|████████████████████████████▋ | 26/39 [00:32<00:13, 1.00s/it]\u001b[A\n", " 69%|█████████████████████████████▊ | 27/39 [00:33<00:12, 1.02s/it]\u001b[A\n", " 72%|██████████████████████████████▊ | 28/39 [00:34<00:10, 1.03it/s]\u001b[A\n", " 74%|███████████████████████████████▉ | 29/39 [00:35<00:10, 1.00s/it]\u001b[A\n", " 77%|█████████████████████████████████ | 30/39 [00:36<00:09, 1.03s/it]\u001b[A\n", " 79%|██████████████████████████████████▏ | 31/39 [00:37<00:08, 1.01s/it]\u001b[A\n", " 82%|███████████████████████████████████▎ | 32/39 [00:39<00:07, 1.03s/it]\u001b[A\n", " 85%|████████████████████████████████████▍ | 33/39 [00:40<00:06, 1.05s/it]\u001b[A\n", " 87%|█████████████████████████████████████▍ | 34/39 [00:41<00:05, 1.02s/it]\u001b[A\n", " 90%|██████████████████████████████████████▌ | 35/39 [00:41<00:03, 1.01it/s]\u001b[A\n", " 92%|███████████████████████████████████████▋ | 36/39 [00:42<00:02, 1.01it/s]\u001b[A\n", " 95%|████████████████████████████████████████▊ | 37/39 [00:43<00:01, 1.01it/s]\u001b[A\n", " 97%|█████████████████████████████████████████▉ | 38/39 [00:44<00:00, 1.02it/s]\u001b[A\n", " \u001b[A\n", "\u001b[A{'eval_loss': 0.402739018201828, 'eval_wer': 0.5777630708140304, 'eval_runtime': 47.5558, 'eval_samples_per_second': 25.759, 'eval_steps_per_second': 0.82, 'epoch': 17.86}\n", " 18%|██████▌ | 1500/8400 [47:21<3:10:32, 1.66s/it]\n", "100%|███████████████████████████████████████████| 39/39 [00:46<00:00, 1.32it/s]\u001b[A\n", " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-georgian/checkpoint-1500\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-1500/config.json\n", "Model weights saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-1500/pytorch_model.bin\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-1500/preprocessor_config.json\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/preprocessor_config.json\n", "Deleting older checkpoint [wav2vec2-large-xls-r-300m-georgian/checkpoint-500] due to args.save_total_limit\n", "{'loss': 1.0552, 'learning_rate': 0.0002583037974683544, 'epoch': 19.05} \n", "{'loss': 1.0453, 'learning_rate': 0.000254506329113924, 'epoch': 20.24} \n", "{'loss': 1.0218, 'learning_rate': 0.00025070886075949367, 'epoch': 21.43} \n", "{'loss': 1.024, 'learning_rate': 0.0002469113924050633, 'epoch': 22.62} \n", "{'loss': 0.9938, 'learning_rate': 0.00024311392405063287, 'epoch': 23.81} \n", " 24%|████████▎ | 2000/8400 [1:03:05<3:30:16, 1.97s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", "***** Running Evaluation *****\n", " Num examples = 1225\n", " Batch size = 32\n", "\n", " 0%| | 0/39 [00:00<?, ?it/s]\u001b[A\n", " 5%|██▎ | 2/39 [00:01<00:28, 1.30it/s]\u001b[A\n", " 8%|███▍ | 3/39 [00:03<00:42, 1.17s/it]\u001b[A\n", " 10%|████▌ | 4/39 [00:04<00:45, 1.30s/it]\u001b[A\n", " 13%|█████▋ | 5/39 [00:06<00:46, 1.38s/it]\u001b[A\n", " 15%|██████▊ | 6/39 [00:07<00:46, 1.41s/it]\u001b[A\n", " 18%|███████▉ | 7/39 [00:09<00:46, 1.44s/it]\u001b[A\n", " 21%|█████████ | 8/39 [00:10<00:45, 1.46s/it]\u001b[A\n", " 23%|██████████▏ | 9/39 [00:12<00:45, 1.51s/it]\u001b[A\n", " 26%|███████████ | 10/39 [00:13<00:43, 1.51s/it]\u001b[A\n", " 28%|████████████▏ | 11/39 [00:15<00:41, 1.48s/it]\u001b[A\n", " 31%|█████████████▏ | 12/39 [00:16<00:41, 1.52s/it]\u001b[A\n", " 33%|██████████████▎ | 13/39 [00:18<00:39, 1.50s/it]\u001b[A\n", " 36%|███████████████▍ | 14/39 [00:19<00:36, 1.47s/it]\u001b[A\n", " 38%|████████████████▌ | 15/39 [00:21<00:34, 1.45s/it]\u001b[A\n", " 41%|█████████████████▋ | 16/39 [00:22<00:33, 1.48s/it]\u001b[A\n", " 44%|██████████████████▋ | 17/39 [00:24<00:32, 1.47s/it]\u001b[A\n", " 46%|███████████████████▊ | 18/39 [00:25<00:27, 1.32s/it]\u001b[A\n", " 49%|████████████████████▉ | 19/39 [00:26<00:24, 1.22s/it]\u001b[A\n", " 51%|██████████████████████ | 20/39 [00:27<00:21, 1.14s/it]\u001b[A\n", " 54%|███████████████████████▏ | 21/39 [00:27<00:19, 1.06s/it]\u001b[A\n", " 56%|████████████████████████▎ | 22/39 [00:28<00:17, 1.01s/it]\u001b[A\n", " 59%|█████████████████████████▎ | 23/39 [00:29<00:15, 1.02it/s]\u001b[A\n", " 62%|██████████████████████████▍ | 24/39 [00:30<00:14, 1.02it/s]\u001b[A\n", " 64%|███████████████████████████▌ | 25/39 [00:31<00:13, 1.02it/s]\u001b[A\n", " 67%|████████████████████████████▋ | 26/39 [00:32<00:12, 1.00it/s]\u001b[A\n", " 69%|█████████████████████████████▊ | 27/39 [00:33<00:12, 1.01s/it]\u001b[A\n", " 72%|██████████████████████████████▊ | 28/39 [00:34<00:10, 1.04it/s]\u001b[A\n", " 74%|███████████████████████████████▉ | 29/39 [00:35<00:09, 1.00it/s]\u001b[A\n", " 77%|█████████████████████████████████ | 30/39 [00:36<00:09, 1.02s/it]\u001b[A\n", " 79%|██████████████████████████████████▏ | 31/39 [00:37<00:07, 1.00it/s]\u001b[A\n", " 82%|███████████████████████████████████▎ | 32/39 [00:38<00:07, 1.03s/it]\u001b[A\n", " 85%|████████████████████████████████████▍ | 33/39 [00:39<00:06, 1.05s/it]\u001b[A\n", " 87%|█████████████████████████████████████▍ | 34/39 [00:40<00:05, 1.02s/it]\u001b[A\n", " 90%|██████████████████████████████████████▌ | 35/39 [00:41<00:03, 1.01it/s]\u001b[A\n", " 92%|███████████████████████████████████████▋ | 36/39 [00:42<00:02, 1.01it/s]\u001b[A\n", " 95%|████████████████████████████████████████▊ | 37/39 [00:43<00:02, 1.00s/it]\u001b[A\n", " 97%|█████████████████████████████████████████▉ | 38/39 [00:44<00:00, 1.00it/s]\u001b[A\n", " \u001b[A\n", "\u001b[A{'eval_loss': 0.38469693064689636, 'eval_wer': 0.5523935583498787, 'eval_runtime': 47.562, 'eval_samples_per_second': 25.756, 'eval_steps_per_second': 0.82, 'epoch': 23.81}\n", " 24%|████████▎ | 2000/8400 [1:03:53<3:30:16, 1.97s/it]\n", "100%|███████████████████████████████████████████| 39/39 [00:46<00:00, 1.31it/s]\u001b[A\n", " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-georgian/checkpoint-2000\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-2000/config.json\n", "Model weights saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-2000/pytorch_model.bin\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-2000/preprocessor_config.json\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/preprocessor_config.json\n", "Deleting older checkpoint [wav2vec2-large-xls-r-300m-georgian/checkpoint-1000] due to args.save_total_limit\n", "{'loss': 0.9897, 'learning_rate': 0.0002393164556962025, 'epoch': 25.0} \n", "{'loss': 0.9857, 'learning_rate': 0.00023551898734177216, 'epoch': 26.19} \n", "{'loss': 0.9537, 'learning_rate': 0.00023172151898734174, 'epoch': 27.38} \n", "{'loss': 0.941, 'learning_rate': 0.0002279240506329114, 'epoch': 28.57} \n", "{'loss': 0.9383, 'learning_rate': 0.000224126582278481, 'epoch': 29.76} \n", " 30%|██████████▍ | 2500/8400 [1:19:35<3:32:41, 2.16s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", "***** Running Evaluation *****\n", " Num examples = 1225\n", " Batch size = 32\n", "\n", " 0%| | 0/39 [00:00<?, ?it/s]\u001b[A\n", " 5%|██▎ | 2/39 [00:01<00:27, 1.33it/s]\u001b[A\n", " 8%|███▍ | 3/39 [00:03<00:41, 1.15s/it]\u001b[A\n", " 10%|████▌ | 4/39 [00:04<00:44, 1.28s/it]\u001b[A\n", " 13%|█████▋ | 5/39 [00:06<00:46, 1.37s/it]\u001b[A\n", " 15%|██████▊ | 6/39 [00:07<00:45, 1.39s/it]\u001b[A\n", " 18%|███████▉ | 7/39 [00:09<00:45, 1.42s/it]\u001b[A\n", " 21%|█████████ | 8/39 [00:10<00:45, 1.46s/it]\u001b[A\n", " 23%|██████████▏ | 9/39 [00:12<00:45, 1.51s/it]\u001b[A\n", " 26%|███████████ | 10/39 [00:13<00:44, 1.52s/it]\u001b[A\n", " 28%|████████████▏ | 11/39 [00:15<00:41, 1.50s/it]\u001b[A\n", " 31%|█████████████▏ | 12/39 [00:16<00:41, 1.53s/it]\u001b[A\n", " 33%|██████████████▎ | 13/39 [00:18<00:39, 1.53s/it]\u001b[A\n", " 36%|███████████████▍ | 14/39 [00:19<00:37, 1.50s/it]\u001b[A\n", " 38%|████████████████▌ | 15/39 [00:21<00:35, 1.47s/it]\u001b[A\n", " 41%|█████████████████▋ | 16/39 [00:22<00:34, 1.50s/it]\u001b[A\n", " 44%|██████████████████▋ | 17/39 [00:24<00:32, 1.49s/it]\u001b[A\n", " 46%|███████████████████▊ | 18/39 [00:25<00:28, 1.35s/it]\u001b[A\n", " 49%|████████████████████▉ | 19/39 [00:26<00:24, 1.24s/it]\u001b[A\n", " 51%|██████████████████████ | 20/39 [00:27<00:22, 1.16s/it]\u001b[A\n", " 54%|███████████████████████▏ | 21/39 [00:28<00:19, 1.09s/it]\u001b[A\n", " 56%|████████████████████████▎ | 22/39 [00:29<00:17, 1.03s/it]\u001b[A\n", " 59%|█████████████████████████▎ | 23/39 [00:30<00:16, 1.01s/it]\u001b[A\n", " 62%|██████████████████████████▍ | 24/39 [00:31<00:15, 1.00s/it]\u001b[A\n", " 64%|███████████████████████████▌ | 25/39 [00:32<00:14, 1.00s/it]\u001b[A\n", " 67%|████████████████████████████▋ | 26/39 [00:33<00:13, 1.02s/it]\u001b[A\n", " 69%|█████████████████████████████▊ | 27/39 [00:34<00:12, 1.02s/it]\u001b[A\n", " 72%|██████████████████████████████▊ | 28/39 [00:35<00:10, 1.03it/s]\u001b[A\n", " 74%|███████████████████████████████▉ | 29/39 [00:36<00:10, 1.01s/it]\u001b[A\n", " 77%|█████████████████████████████████ | 30/39 [00:37<00:09, 1.03s/it]\u001b[A\n", " 79%|██████████████████████████████████▏ | 31/39 [00:38<00:08, 1.01s/it]\u001b[A\n", " 82%|███████████████████████████████████▎ | 32/39 [00:39<00:07, 1.03s/it]\u001b[A\n", " 85%|████████████████████████████████████▍ | 33/39 [00:40<00:06, 1.04s/it]\u001b[A\n", " 87%|█████████████████████████████████████▍ | 34/39 [00:41<00:05, 1.02s/it]\u001b[A\n", " 90%|██████████████████████████████████████▌ | 35/39 [00:42<00:03, 1.02it/s]\u001b[A\n", " 92%|███████████████████████████████████████▋ | 36/39 [00:43<00:02, 1.01it/s]\u001b[A\n", " 95%|████████████████████████████████████████▊ | 37/39 [00:44<00:01, 1.01it/s]\u001b[A\n", " 97%|█████████████████████████████████████████▉ | 38/39 [00:45<00:00, 1.02it/s]\u001b[A\n", " \u001b[A\n", "\u001b[A{'eval_loss': 0.3845490515232086, 'eval_wer': 0.5204059121994264, 'eval_runtime': 47.7826, 'eval_samples_per_second': 25.637, 'eval_steps_per_second': 0.816, 'epoch': 29.76}\n", " 30%|██████████▍ | 2500/8400 [1:20:23<3:32:41, 2.16s/it]\n", "100%|███████████████████████████████████████████| 39/39 [00:46<00:00, 1.31it/s]\u001b[A\n", " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-georgian/checkpoint-2500\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-2500/config.json\n", "Model weights saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-2500/pytorch_model.bin\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-2500/preprocessor_config.json\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/preprocessor_config.json\n", "Deleting older checkpoint [wav2vec2-large-xls-r-300m-georgian/checkpoint-1500] due to args.save_total_limit\n", "{'loss': 0.9304, 'learning_rate': 0.00022032911392405062, 'epoch': 30.95} \n", "{'loss': 0.925, 'learning_rate': 0.00021653164556962025, 'epoch': 32.14} \n", "{'loss': 0.9119, 'learning_rate': 0.00021273417721518986, 'epoch': 33.33} \n", "{'loss': 0.8988, 'learning_rate': 0.00020893670886075949, 'epoch': 34.52} \n", "{'loss': 0.8932, 'learning_rate': 0.0002051392405063291, 'epoch': 35.71} \n", " 36%|████████████▌ | 3000/8400 [1:36:04<1:38:40, 1.10s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", "***** Running Evaluation *****\n", " Num examples = 1225\n", " Batch size = 32\n", "\n", " 0%| | 0/39 [00:00<?, ?it/s]\u001b[A\n", " 5%|██▎ | 2/39 [00:01<00:27, 1.33it/s]\u001b[A\n", " 8%|███▍ | 3/39 [00:03<00:41, 1.15s/it]\u001b[A\n", " 10%|████▌ | 4/39 [00:04<00:45, 1.29s/it]\u001b[A\n", " 13%|█████▋ | 5/39 [00:06<00:47, 1.41s/it]\u001b[A\n", " 15%|██████▊ | 6/39 [00:07<00:47, 1.43s/it]\u001b[A\n", " 18%|███████▉ | 7/39 [00:09<00:46, 1.44s/it]\u001b[A\n", " 21%|█████████ | 8/39 [00:10<00:45, 1.47s/it]\u001b[A\n", " 23%|██████████▏ | 9/39 [00:12<00:45, 1.51s/it]\u001b[A\n", " 26%|███████████ | 10/39 [00:13<00:43, 1.51s/it]\u001b[A\n", " 28%|████████████▏ | 11/39 [00:15<00:41, 1.47s/it]\u001b[A\n", " 31%|█████████████▏ | 12/39 [00:16<00:40, 1.51s/it]\u001b[A\n", " 33%|██████████████▎ | 13/39 [00:18<00:38, 1.49s/it]\u001b[A\n", " 36%|███████████████▍ | 14/39 [00:19<00:36, 1.46s/it]\u001b[A\n", " 38%|████████████████▌ | 15/39 [00:21<00:34, 1.43s/it]\u001b[A\n", " 41%|█████████████████▋ | 16/39 [00:22<00:33, 1.47s/it]\u001b[A\n", " 44%|██████████████████▋ | 17/39 [00:24<00:32, 1.46s/it]\u001b[A\n", " 46%|███████████████████▊ | 18/39 [00:25<00:27, 1.32s/it]\u001b[A\n", " 49%|████████████████████▉ | 19/39 [00:26<00:24, 1.23s/it]\u001b[A\n", " 51%|██████████████████████ | 20/39 [00:27<00:21, 1.14s/it]\u001b[A\n", " 54%|███████████████████████▏ | 21/39 [00:27<00:19, 1.06s/it]\u001b[A\n", " 56%|████████████████████████▎ | 22/39 [00:28<00:17, 1.01s/it]\u001b[A\n", " 59%|█████████████████████████▎ | 23/39 [00:29<00:15, 1.02it/s]\u001b[A\n", " 62%|██████████████████████████▍ | 24/39 [00:30<00:14, 1.03it/s]\u001b[A\n", " 64%|███████████████████████████▌ | 25/39 [00:31<00:13, 1.02it/s]\u001b[A\n", " 67%|████████████████████████████▋ | 26/39 [00:32<00:12, 1.00it/s]\u001b[A\n", " 69%|█████████████████████████████▊ | 27/39 [00:33<00:12, 1.01s/it]\u001b[A\n", " 72%|██████████████████████████████▊ | 28/39 [00:34<00:10, 1.04it/s]\u001b[A\n", " 74%|███████████████████████████████▉ | 29/39 [00:35<00:09, 1.00it/s]\u001b[A\n", " 77%|█████████████████████████████████ | 30/39 [00:36<00:09, 1.03s/it]\u001b[A\n", " 79%|██████████████████████████████████▏ | 31/39 [00:37<00:08, 1.03s/it]\u001b[A\n", " 82%|███████████████████████████████████▎ | 32/39 [00:38<00:07, 1.05s/it]\u001b[A\n", " 85%|████████████████████████████████████▍ | 33/39 [00:40<00:06, 1.07s/it]\u001b[A\n", " 87%|█████████████████████████████████████▍ | 34/39 [00:40<00:05, 1.03s/it]\u001b[A\n", " 90%|██████████████████████████████████████▌ | 35/39 [00:41<00:03, 1.01it/s]\u001b[A\n", " 92%|███████████████████████████████████████▋ | 36/39 [00:42<00:03, 1.00s/it]\u001b[A\n", " 95%|████████████████████████████████████████▊ | 37/39 [00:43<00:01, 1.00it/s]\u001b[A\n", " 97%|█████████████████████████████████████████▉ | 38/39 [00:44<00:00, 1.02it/s]\u001b[A\n", " \u001b[A\n", "\u001b[A{'eval_loss': 0.3832888603210449, 'eval_wer': 0.529671299360247, 'eval_runtime': 47.4524, 'eval_samples_per_second': 25.815, 'eval_steps_per_second': 0.822, 'epoch': 35.71}\n", " 36%|████████████▌ | 3000/8400 [1:36:51<1:38:40, 1.10s/it]\n", "100%|███████████████████████████████████████████| 39/39 [00:45<00:00, 1.32it/s]\u001b[A\n", " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-georgian/checkpoint-3000\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-3000/config.json\n", "Model weights saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-3000/pytorch_model.bin\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-3000/preprocessor_config.json\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/preprocessor_config.json\n", "Deleting older checkpoint [wav2vec2-large-xls-r-300m-georgian/checkpoint-2000] due to args.save_total_limit\n", "{'loss': 0.8709, 'learning_rate': 0.00020134177215189872, 'epoch': 36.9} \n", "{'loss': 0.8767, 'learning_rate': 0.00019754430379746835, 'epoch': 38.1} \n", "{'loss': 0.8677, 'learning_rate': 0.00019374683544303795, 'epoch': 39.29} \n", "{'loss': 0.8642, 'learning_rate': 0.00018994936708860758, 'epoch': 40.48} \n", "{'loss': 0.8495, 'learning_rate': 0.00018615189873417718, 'epoch': 41.67} \n", " 42%|██████████████▌ | 3500/8400 [1:52:40<1:53:50, 1.39s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", "***** Running Evaluation *****\n", " Num examples = 1225\n", " Batch size = 32\n", "\n", " 0%| | 0/39 [00:00<?, ?it/s]\u001b[A\n", " 5%|██▎ | 2/39 [00:01<00:28, 1.30it/s]\u001b[A\n", " 8%|███▍ | 3/39 [00:03<00:42, 1.17s/it]\u001b[A\n", " 10%|████▌ | 4/39 [00:04<00:45, 1.31s/it]\u001b[A\n", " 13%|█████▋ | 5/39 [00:06<00:47, 1.41s/it]\u001b[A\n", " 15%|██████▊ | 6/39 [00:07<00:46, 1.42s/it]\u001b[A\n", " 18%|███████▉ | 7/39 [00:09<00:46, 1.44s/it]\u001b[A\n", " 21%|█████████ | 8/39 [00:10<00:45, 1.47s/it]\u001b[A\n", " 23%|██████████▏ | 9/39 [00:12<00:45, 1.52s/it]\u001b[A\n", " 26%|███████████ | 10/39 [00:13<00:43, 1.51s/it]\u001b[A\n", " 28%|████████████▏ | 11/39 [00:15<00:41, 1.48s/it]\u001b[A\n", " 31%|█████████████▏ | 12/39 [00:16<00:40, 1.51s/it]\u001b[A\n", " 33%|██████████████▎ | 13/39 [00:18<00:38, 1.50s/it]\u001b[A\n", " 36%|███████████████▍ | 14/39 [00:19<00:36, 1.47s/it]\u001b[A\n", " 38%|████████████████▌ | 15/39 [00:21<00:34, 1.45s/it]\u001b[A\n", " 41%|█████████████████▋ | 16/39 [00:22<00:34, 1.49s/it]\u001b[A\n", " 44%|██████████████████▋ | 17/39 [00:24<00:32, 1.47s/it]\u001b[A\n", " 46%|███████████████████▊ | 18/39 [00:25<00:27, 1.32s/it]\u001b[A\n", " 49%|████████████████████▉ | 19/39 [00:26<00:24, 1.22s/it]\u001b[A\n", " 51%|██████████████████████ | 20/39 [00:27<00:21, 1.13s/it]\u001b[A\n", " 54%|███████████████████████▏ | 21/39 [00:28<00:19, 1.07s/it]\u001b[A\n", " 56%|████████████████████████▎ | 22/39 [00:28<00:17, 1.01s/it]\u001b[A\n", " 59%|█████████████████████████▎ | 23/39 [00:29<00:15, 1.01it/s]\u001b[A\n", " 62%|██████████████████████████▍ | 24/39 [00:30<00:14, 1.01it/s]\u001b[A\n", " 64%|███████████████████████████▌ | 25/39 [00:31<00:14, 1.00s/it]\u001b[A\n", " 67%|████████████████████████████▋ | 26/39 [00:32<00:13, 1.01s/it]\u001b[A\n", " 69%|█████████████████████████████▊ | 27/39 [00:33<00:12, 1.01s/it]\u001b[A\n", " 72%|██████████████████████████████▊ | 28/39 [00:34<00:10, 1.04it/s]\u001b[A\n", " 74%|███████████████████████████████▉ | 29/39 [00:35<00:09, 1.00it/s]\u001b[A\n", " 77%|█████████████████████████████████ | 30/39 [00:36<00:09, 1.03s/it]\u001b[A\n", " 79%|██████████████████████████████████▏ | 31/39 [00:37<00:08, 1.01s/it]\u001b[A\n", " 82%|███████████████████████████████████▎ | 32/39 [00:39<00:07, 1.03s/it]\u001b[A\n", " 85%|████████████████████████████████████▍ | 33/39 [00:40<00:06, 1.05s/it]\u001b[A\n", " 87%|█████████████████████████████████████▍ | 34/39 [00:41<00:05, 1.02s/it]\u001b[A\n", " 90%|██████████████████████████████████████▌ | 35/39 [00:41<00:03, 1.02it/s]\u001b[A\n", " 92%|███████████████████████████████████████▋ | 36/39 [00:42<00:02, 1.01it/s]\u001b[A\n", " 95%|████████████████████████████████████████▊ | 37/39 [00:43<00:01, 1.01it/s]\u001b[A\n", " 97%|█████████████████████████████████████████▉ | 38/39 [00:44<00:00, 1.01it/s]\u001b[A\n", " \u001b[A\n", "\u001b[A{'eval_loss': 0.37586063146591187, 'eval_wer': 0.5036399735274653, 'eval_runtime': 47.6167, 'eval_samples_per_second': 25.726, 'eval_steps_per_second': 0.819, 'epoch': 41.67}\n", " 42%|██████████████▌ | 3500/8400 [1:53:28<1:53:50, 1.39s/it]\n", "100%|███████████████████████████████████████████| 39/39 [00:46<00:00, 1.31it/s]\u001b[A\n", " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-georgian/checkpoint-3500\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-3500/config.json\n", "Model weights saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-3500/pytorch_model.bin\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-3500/preprocessor_config.json\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/preprocessor_config.json\n", "Deleting older checkpoint [wav2vec2-large-xls-r-300m-georgian/checkpoint-2500] due to args.save_total_limit\n", "{'loss': 0.8546, 'learning_rate': 0.0001823544303797468, 'epoch': 42.86} \n", "{'loss': 0.8455, 'learning_rate': 0.00017855696202531644, 'epoch': 44.05} \n", "{'loss': 0.8308, 'learning_rate': 0.00017475949367088604, 'epoch': 45.24} \n", "{'loss': 0.8179, 'learning_rate': 0.0001709620253164557, 'epoch': 46.43} \n", "{'loss': 0.8201, 'learning_rate': 0.00016716455696202527, 'epoch': 47.62} \n", " 48%|████████████████▋ | 4000/8400 [2:09:14<1:59:59, 1.64s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", "***** Running Evaluation *****\n", " Num examples = 1225\n", " Batch size = 32\n", "\n", " 0%| | 0/39 [00:00<?, ?it/s]\u001b[A\n", " 5%|██▎ | 2/39 [00:01<00:28, 1.30it/s]\u001b[A\n", " 8%|███▍ | 3/39 [00:03<00:42, 1.18s/it]\u001b[A\n", " 10%|████▌ | 4/39 [00:04<00:45, 1.31s/it]\u001b[A\n", " 13%|█████▋ | 5/39 [00:06<00:47, 1.38s/it]\u001b[A\n", " 15%|██████▊ | 6/39 [00:07<00:46, 1.40s/it]\u001b[A\n", " 18%|███████▉ | 7/39 [00:09<00:45, 1.44s/it]\u001b[A\n", " 21%|█████████ | 8/39 [00:10<00:45, 1.47s/it]\u001b[A\n", " 23%|██████████▏ | 9/39 [00:12<00:45, 1.53s/it]\u001b[A\n", " 26%|███████████ | 10/39 [00:14<00:44, 1.53s/it]\u001b[A\n", " 28%|████████████▏ | 11/39 [00:15<00:42, 1.50s/it]\u001b[A\n", " 31%|█████████████▏ | 12/39 [00:17<00:41, 1.54s/it]\u001b[A\n", " 33%|██████████████▎ | 13/39 [00:18<00:39, 1.52s/it]\u001b[A\n", " 36%|███████████████▍ | 14/39 [00:19<00:37, 1.50s/it]\u001b[A\n", " 38%|████████████████▌ | 15/39 [00:21<00:35, 1.47s/it]\u001b[A\n", " 41%|█████████████████▋ | 16/39 [00:22<00:34, 1.49s/it]\u001b[A\n", " 44%|██████████████████▋ | 17/39 [00:24<00:32, 1.48s/it]\u001b[A\n", " 46%|███████████████████▊ | 18/39 [00:25<00:27, 1.33s/it]\u001b[A\n", " 49%|████████████████████▉ | 19/39 [00:26<00:24, 1.22s/it]\u001b[A\n", " 51%|██████████████████████ | 20/39 [00:27<00:21, 1.14s/it]\u001b[A\n", " 54%|███████████████████████▏ | 21/39 [00:28<00:19, 1.07s/it]\u001b[A\n", " 56%|████████████████████████▎ | 22/39 [00:29<00:17, 1.01s/it]\u001b[A\n", " 59%|█████████████████████████▎ | 23/39 [00:30<00:15, 1.01it/s]\u001b[A\n", " 62%|██████████████████████████▍ | 24/39 [00:30<00:14, 1.01it/s]\u001b[A\n", " 64%|███████████████████████████▌ | 25/39 [00:32<00:13, 1.01it/s]\u001b[A\n", " 67%|████████████████████████████▋ | 26/39 [00:33<00:13, 1.01s/it]\u001b[A\n", " 69%|█████████████████████████████▊ | 27/39 [00:34<00:12, 1.02s/it]\u001b[A\n", " 72%|██████████████████████████████▊ | 28/39 [00:34<00:10, 1.04it/s]\u001b[A\n", " 74%|███████████████████████████████▉ | 29/39 [00:36<00:10, 1.00s/it]\u001b[A\n", " 77%|█████████████████████████████████ | 30/39 [00:37<00:09, 1.03s/it]\u001b[A\n", " 79%|██████████████████████████████████▏ | 31/39 [00:38<00:08, 1.02s/it]\u001b[A\n", " 82%|███████████████████████████████████▎ | 32/39 [00:39<00:07, 1.04s/it]\u001b[A\n", " 85%|████████████████████████████████████▍ | 33/39 [00:40<00:06, 1.06s/it]\u001b[A\n", " 87%|█████████████████████████████████████▍ | 34/39 [00:41<00:05, 1.02s/it]\u001b[A\n", " 90%|██████████████████████████████████████▌ | 35/39 [00:42<00:03, 1.02it/s]\u001b[A\n", " 92%|███████████████████████████████████████▋ | 36/39 [00:43<00:02, 1.01it/s]\u001b[A\n", " 95%|████████████████████████████████████████▊ | 37/39 [00:44<00:01, 1.00it/s]\u001b[A\n", " 97%|█████████████████████████████████████████▉ | 38/39 [00:45<00:00, 1.01it/s]\u001b[A\n", " \u001b[A\n", "\u001b[A{'eval_loss': 0.36158040165901184, 'eval_wer': 0.48588131480255903, 'eval_runtime': 47.7093, 'eval_samples_per_second': 25.676, 'eval_steps_per_second': 0.817, 'epoch': 47.62}\n", " 48%|████████████████▋ | 4000/8400 [2:10:01<1:59:59, 1.64s/it]\n", "100%|███████████████████████████████████████████| 39/39 [00:46<00:00, 1.32it/s]\u001b[A\n", " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-georgian/checkpoint-4000\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-4000/config.json\n", "Model weights saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-4000/pytorch_model.bin\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-4000/preprocessor_config.json\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/preprocessor_config.json\n", "Deleting older checkpoint [wav2vec2-large-xls-r-300m-georgian/checkpoint-3000] due to args.save_total_limit\n", "{'loss': 0.8052, 'learning_rate': 0.00016340506329113924, 'epoch': 48.81} \n", "{'loss': 0.7943, 'learning_rate': 0.00015581012658227847, 'epoch': 51.19} \n", "{'loss': 0.7811, 'learning_rate': 0.00015201265822784808, 'epoch': 52.38} \n", "{'loss': 0.7794, 'learning_rate': 0.0001482151898734177, 'epoch': 53.57} \n", " 54%|██████████████████▊ | 4500/8400 [2:25:50<2:07:23, 1.96s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", "***** Running Evaluation *****\n", " Num examples = 1225\n", " Batch size = 32\n", "\n", " 0%| | 0/39 [00:00<?, ?it/s]\u001b[A\n", " 5%|██▎ | 2/39 [00:01<00:27, 1.33it/s]\u001b[A\n", " 8%|███▍ | 3/39 [00:03<00:42, 1.17s/it]\u001b[A\n", " 10%|████▌ | 4/39 [00:04<00:45, 1.30s/it]\u001b[A\n", " 13%|█████▋ | 5/39 [00:06<00:46, 1.38s/it]\u001b[A\n", " 15%|██████▊ | 6/39 [00:07<00:45, 1.39s/it]\u001b[A\n", " 18%|███████▉ | 7/39 [00:09<00:45, 1.42s/it]\u001b[A\n", " 21%|█████████ | 8/39 [00:10<00:44, 1.45s/it]\u001b[A\n", " 23%|██████████▏ | 9/39 [00:12<00:45, 1.50s/it]\u001b[A\n", " 26%|███████████ | 10/39 [00:13<00:43, 1.50s/it]\u001b[A\n", " 28%|████████████▏ | 11/39 [00:15<00:41, 1.49s/it]\u001b[A\n", " 31%|█████████████▏ | 12/39 [00:16<00:40, 1.51s/it]\u001b[A\n", " 33%|██████████████▎ | 13/39 [00:18<00:38, 1.50s/it]\u001b[A\n", " 36%|███████████████▍ | 14/39 [00:19<00:36, 1.47s/it]\u001b[A\n", " 38%|████████████████▌ | 15/39 [00:21<00:34, 1.44s/it]\u001b[A\n", " 41%|█████████████████▋ | 16/39 [00:22<00:33, 1.47s/it]\u001b[A\n", " 44%|██████████████████▋ | 17/39 [00:24<00:32, 1.46s/it]\u001b[A\n", " 46%|███████████████████▊ | 18/39 [00:25<00:27, 1.31s/it]\u001b[A\n", " 49%|████████████████████▉ | 19/39 [00:26<00:24, 1.21s/it]\u001b[A\n", " 51%|██████████████████████ | 20/39 [00:26<00:21, 1.12s/it]\u001b[A\n", " 54%|███████████████████████▏ | 21/39 [00:27<00:18, 1.05s/it]\u001b[A\n", " 56%|████████████████████████▎ | 22/39 [00:28<00:17, 1.00s/it]\u001b[A\n", " 59%|█████████████████████████▎ | 23/39 [00:29<00:15, 1.02it/s]\u001b[A\n", " 62%|██████████████████████████▍ | 24/39 [00:30<00:14, 1.02it/s]\u001b[A\n", " 64%|███████████████████████████▌ | 25/39 [00:31<00:14, 1.00s/it]\u001b[A\n", " 67%|████████████████████████████▋ | 26/39 [00:32<00:13, 1.03s/it]\u001b[A\n", " 69%|█████████████████████████████▊ | 27/39 [00:33<00:12, 1.02s/it]\u001b[A\n", " 72%|██████████████████████████████▊ | 28/39 [00:34<00:10, 1.02it/s]\u001b[A\n", " 74%|███████████████████████████████▉ | 29/39 [00:35<00:10, 1.01s/it]\u001b[A\n", " 77%|█████████████████████████████████ | 30/39 [00:36<00:09, 1.02s/it]\u001b[A\n", " 79%|██████████████████████████████████▏ | 31/39 [00:37<00:08, 1.00s/it]\u001b[A\n", " 82%|███████████████████████████████████▎ | 32/39 [00:38<00:07, 1.02s/it]\u001b[A\n", " 85%|████████████████████████████████████▍ | 33/39 [00:39<00:06, 1.04s/it]\u001b[A\n", " 87%|█████████████████████████████████████▍ | 34/39 [00:40<00:05, 1.01s/it]\u001b[A\n", " 90%|██████████████████████████████████████▌ | 35/39 [00:41<00:03, 1.03it/s]\u001b[A\n", " 92%|███████████████████████████████████████▋ | 36/39 [00:42<00:02, 1.02it/s]\u001b[A\n", " 95%|████████████████████████████████████████▊ | 37/39 [00:43<00:01, 1.01it/s]\u001b[A\n", " 97%|█████████████████████████████████████████▉ | 38/39 [00:44<00:00, 1.01it/s]\u001b[A\n", " \u001b[A\n", "\u001b[A{'eval_loss': 0.38744866847991943, 'eval_wer': 0.49382307522611957, 'eval_runtime': 47.4049, 'eval_samples_per_second': 25.841, 'eval_steps_per_second': 0.823, 'epoch': 53.57}\n", " 54%|██████████████████▊ | 4500/8400 [2:26:38<2:07:23, 1.96s/it]\n", "100%|███████████████████████████████████████████| 39/39 [00:45<00:00, 1.31it/s]\u001b[A\n", " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-georgian/checkpoint-4500\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-4500/config.json\n", "Model weights saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-4500/pytorch_model.bin\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-4500/preprocessor_config.json\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/preprocessor_config.json\n", "Deleting older checkpoint [wav2vec2-large-xls-r-300m-georgian/checkpoint-3500] due to args.save_total_limit\n", "{'loss': 0.7793, 'learning_rate': 0.00014441772151898733, 'epoch': 54.76} \n", "{'loss': 0.7729, 'learning_rate': 0.00014062025316455696, 'epoch': 55.95} \n", "{'loss': 0.76, 'learning_rate': 0.00013682278481012657, 'epoch': 57.14} \n", "{'loss': 0.7593, 'learning_rate': 0.0001330253164556962, 'epoch': 58.33} \n", "{'loss': 0.735, 'learning_rate': 0.0001292278481012658, 'epoch': 59.52} \n", " 60%|████████████████████▊ | 5000/8400 [2:42:19<2:04:43, 2.20s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", "***** Running Evaluation *****\n", " Num examples = 1225\n", " Batch size = 32\n", "\n", " 0%| | 0/39 [00:00<?, ?it/s]\u001b[A\n", " 5%|██▎ | 2/39 [00:01<00:27, 1.35it/s]\u001b[A\n", " 8%|███▍ | 3/39 [00:03<00:41, 1.14s/it]\u001b[A\n", " 10%|████▌ | 4/39 [00:04<00:45, 1.29s/it]\u001b[A\n", " 13%|█████▋ | 5/39 [00:06<00:46, 1.37s/it]\u001b[A\n", " 15%|██████▊ | 6/39 [00:07<00:45, 1.38s/it]\u001b[A\n", " 18%|███████▉ | 7/39 [00:09<00:45, 1.44s/it]\u001b[A\n", " 21%|█████████ | 8/39 [00:10<00:46, 1.49s/it]\u001b[A\n", " 23%|██████████▏ | 9/39 [00:12<00:46, 1.54s/it]\u001b[A\n", " 26%|███████████ | 10/39 [00:13<00:44, 1.53s/it]\u001b[A\n", " 28%|████████████▏ | 11/39 [00:15<00:41, 1.49s/it]\u001b[A\n", " 31%|█████████████▏ | 12/39 [00:16<00:40, 1.51s/it]\u001b[A\n", " 33%|██████████████▎ | 13/39 [00:18<00:38, 1.49s/it]\u001b[A\n", " 36%|███████████████▍ | 14/39 [00:19<00:36, 1.47s/it]\u001b[A\n", " 38%|████████████████▌ | 15/39 [00:21<00:34, 1.44s/it]\u001b[A\n", " 41%|█████████████████▋ | 16/39 [00:22<00:34, 1.48s/it]\u001b[A\n", " 44%|██████████████████▋ | 17/39 [00:24<00:32, 1.47s/it]\u001b[A\n", " 46%|███████████████████▊ | 18/39 [00:25<00:27, 1.32s/it]\u001b[A\n", " 49%|████████████████████▉ | 19/39 [00:26<00:24, 1.22s/it]\u001b[A\n", " 51%|██████████████████████ | 20/39 [00:27<00:21, 1.14s/it]\u001b[A\n", " 54%|███████████████████████▏ | 21/39 [00:28<00:19, 1.07s/it]\u001b[A\n", " 56%|████████████████████████▎ | 22/39 [00:28<00:17, 1.01s/it]\u001b[A\n", " 59%|█████████████████████████▎ | 23/39 [00:29<00:15, 1.02it/s]\u001b[A\n", " 62%|██████████████████████████▍ | 24/39 [00:30<00:14, 1.03it/s]\u001b[A\n", " 64%|███████████████████████████▌ | 25/39 [00:31<00:13, 1.02it/s]\u001b[A\n", " 67%|████████████████████████████▋ | 26/39 [00:32<00:12, 1.01it/s]\u001b[A\n", " 69%|█████████████████████████████▊ | 27/39 [00:33<00:12, 1.01s/it]\u001b[A\n", " 72%|██████████████████████████████▊ | 28/39 [00:34<00:10, 1.05it/s]\u001b[A\n", " 74%|███████████████████████████████▉ | 29/39 [00:35<00:09, 1.01it/s]\u001b[A\n", " 77%|█████████████████████████████████ | 30/39 [00:36<00:09, 1.01s/it]\u001b[A\n", " 79%|██████████████████████████████████▏ | 31/39 [00:37<00:08, 1.00s/it]\u001b[A\n", " 82%|███████████████████████████████████▎ | 32/39 [00:38<00:07, 1.03s/it]\u001b[A\n", " 85%|████████████████████████████████████▍ | 33/39 [00:39<00:06, 1.06s/it]\u001b[A\n", " 87%|█████████████████████████████████████▍ | 34/39 [00:40<00:05, 1.04s/it]\u001b[A\n", " 90%|██████████████████████████████████████▌ | 35/39 [00:41<00:03, 1.00it/s]\u001b[A\n", " 92%|███████████████████████████████████████▋ | 36/39 [00:42<00:03, 1.00s/it]\u001b[A\n", " 95%|████████████████████████████████████████▊ | 37/39 [00:43<00:01, 1.00it/s]\u001b[A\n", " 97%|█████████████████████████████████████████▉ | 38/39 [00:44<00:00, 1.01it/s]\u001b[A\n", " \u001b[A\n", "\u001b[A{'eval_loss': 0.374796986579895, 'eval_wer': 0.47816015883520846, 'eval_runtime': 47.4755, 'eval_samples_per_second': 25.803, 'eval_steps_per_second': 0.821, 'epoch': 59.52}\n", " 60%|████████████████████▊ | 5000/8400 [2:43:07<2:04:43, 2.20s/it]\n", "100%|███████████████████████████████████████████| 39/39 [00:45<00:00, 1.31it/s]\u001b[A\n", " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-georgian/checkpoint-5000\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-5000/config.json\n", "Model weights saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-5000/pytorch_model.bin\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-5000/preprocessor_config.json\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/preprocessor_config.json\n", "Deleting older checkpoint [wav2vec2-large-xls-r-300m-georgian/checkpoint-4000] due to args.save_total_limit\n", "{'loss': 0.7415, 'learning_rate': 0.00012543037974683543, 'epoch': 60.71} \n", "{'loss': 0.7206, 'learning_rate': 0.00012163291139240506, 'epoch': 61.9} \n", "{'loss': 0.7117, 'learning_rate': 0.00011783544303797467, 'epoch': 63.1} \n", "{'loss': 0.7213, 'learning_rate': 0.00011403797468354429, 'epoch': 64.29} \n", "{'loss': 0.7082, 'learning_rate': 0.0001102405063291139, 'epoch': 65.48} \n", " 65%|████████████████████████▏ | 5500/8400 [2:58:43<51:53, 1.07s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", "***** Running Evaluation *****\n", " Num examples = 1225\n", " Batch size = 32\n", "\n", " 0%| | 0/39 [00:00<?, ?it/s]\u001b[A\n", " 5%|██▎ | 2/39 [00:01<00:28, 1.32it/s]\u001b[A\n", " 8%|███▍ | 3/39 [00:03<00:42, 1.17s/it]\u001b[A\n", " 10%|████▌ | 4/39 [00:04<00:46, 1.32s/it]\u001b[A\n", " 13%|█████▋ | 5/39 [00:06<00:48, 1.42s/it]\u001b[A\n", " 15%|██████▊ | 6/39 [00:07<00:47, 1.44s/it]\u001b[A\n", " 18%|███████▉ | 7/39 [00:09<00:46, 1.46s/it]\u001b[A\n", " 21%|█████████ | 8/39 [00:10<00:45, 1.48s/it]\u001b[A\n", " 23%|██████████▏ | 9/39 [00:12<00:45, 1.53s/it]\u001b[A\n", " 26%|███████████ | 10/39 [00:14<00:44, 1.55s/it]\u001b[A\n", " 28%|████████████▏ | 11/39 [00:15<00:42, 1.51s/it]\u001b[A\n", " 31%|█████████████▏ | 12/39 [00:17<00:42, 1.57s/it]\u001b[A\n", " 33%|██████████████▎ | 13/39 [00:18<00:40, 1.56s/it]\u001b[A\n", " 36%|███████████████▍ | 14/39 [00:20<00:38, 1.53s/it]\u001b[A\n", " 38%|████████████████▌ | 15/39 [00:21<00:35, 1.48s/it]\u001b[A\n", " 41%|█████████████████▋ | 16/39 [00:23<00:34, 1.50s/it]\u001b[A\n", " 44%|██████████████████▋ | 17/39 [00:24<00:32, 1.49s/it]\u001b[A\n", " 46%|███████████████████▊ | 18/39 [00:25<00:28, 1.34s/it]\u001b[A\n", " 49%|████████████████████▉ | 19/39 [00:26<00:24, 1.23s/it]\u001b[A\n", " 51%|██████████████████████ | 20/39 [00:27<00:21, 1.15s/it]\u001b[A\n", " 54%|███████████████████████▏ | 21/39 [00:28<00:19, 1.08s/it]\u001b[A\n", " 56%|████████████████████████▎ | 22/39 [00:29<00:17, 1.02s/it]\u001b[A\n", " 59%|█████████████████████████▎ | 23/39 [00:30<00:15, 1.01it/s]\u001b[A\n", " 62%|██████████████████████████▍ | 24/39 [00:31<00:14, 1.02it/s]\u001b[A\n", " 64%|███████████████████████████▌ | 25/39 [00:32<00:13, 1.01it/s]\u001b[A\n", " 67%|████████████████████████████▋ | 26/39 [00:33<00:13, 1.01s/it]\u001b[A\n", " 69%|█████████████████████████████▊ | 27/39 [00:34<00:12, 1.02s/it]\u001b[A\n", " 72%|██████████████████████████████▊ | 28/39 [00:35<00:10, 1.03it/s]\u001b[A\n", " 74%|███████████████████████████████▉ | 29/39 [00:36<00:10, 1.00s/it]\u001b[A\n", " 77%|█████████████████████████████████ | 30/39 [00:37<00:09, 1.03s/it]\u001b[A\n", " 79%|██████████████████████████████████▏ | 31/39 [00:38<00:08, 1.01s/it]\u001b[A\n", " 82%|███████████████████████████████████▎ | 32/39 [00:39<00:07, 1.03s/it]\u001b[A\n", " 85%|████████████████████████████████████▍ | 33/39 [00:40<00:06, 1.07s/it]\u001b[A\n", " 87%|█████████████████████████████████████▍ | 34/39 [00:41<00:05, 1.03s/it]\u001b[A\n", " 90%|██████████████████████████████████████▌ | 35/39 [00:42<00:03, 1.02it/s]\u001b[A\n", " 92%|███████████████████████████████████████▋ | 36/39 [00:43<00:02, 1.01it/s]\u001b[A\n", " 95%|████████████████████████████████████████▊ | 37/39 [00:44<00:01, 1.01it/s]\u001b[A\n", " 97%|█████████████████████████████████████████▉ | 38/39 [00:45<00:00, 1.02it/s]\u001b[A\n", " \u001b[A\n", "\u001b[A{'eval_loss': 0.3615438938140869, 'eval_wer': 0.4674608427090227, 'eval_runtime': 48.0099, 'eval_samples_per_second': 25.516, 'eval_steps_per_second': 0.812, 'epoch': 65.48}\n", " 65%|████████████████████████▏ | 5500/8400 [2:59:31<51:53, 1.07s/it]\n", "100%|███████████████████████████████████████████| 39/39 [00:46<00:00, 1.32it/s]\u001b[A\n", " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-georgian/checkpoint-5500\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-5500/config.json\n", "Model weights saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-5500/pytorch_model.bin\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-5500/preprocessor_config.json\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/preprocessor_config.json\n", "Deleting older checkpoint [wav2vec2-large-xls-r-300m-georgian/checkpoint-4500] due to args.save_total_limit\n", "{'loss': 0.705, 'learning_rate': 0.00010644303797468355, 'epoch': 66.67} \n", "{'loss': 0.6942, 'learning_rate': 0.00010264556962025316, 'epoch': 67.86} \n", "{'loss': 0.6894, 'learning_rate': 9.884810126582278e-05, 'epoch': 69.05} \n", "{'loss': 0.6807, 'learning_rate': 9.50506329113924e-05, 'epoch': 70.24} \n", "{'loss': 0.669, 'learning_rate': 9.129113924050632e-05, 'epoch': 71.43} \n", " 71%|██████████████████████████▍ | 6000/8400 [3:15:13<54:25, 1.36s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", "***** Running Evaluation *****\n", " Num examples = 1225\n", " Batch size = 32\n", "\n", " 0%| | 0/39 [00:00<?, ?it/s]\u001b[A\n", " 5%|██▎ | 2/39 [00:01<00:27, 1.34it/s]\u001b[A\n", " 8%|███▍ | 3/39 [00:03<00:41, 1.15s/it]\u001b[A\n", " 10%|████▌ | 4/39 [00:04<00:44, 1.28s/it]\u001b[A\n", " 13%|█████▋ | 5/39 [00:06<00:46, 1.36s/it]\u001b[A\n", " 15%|██████▊ | 6/39 [00:07<00:45, 1.38s/it]\u001b[A\n", " 18%|███████▉ | 7/39 [00:09<00:45, 1.41s/it]\u001b[A\n", " 21%|█████████ | 8/39 [00:10<00:44, 1.44s/it]\u001b[A\n", " 23%|██████████▏ | 9/39 [00:12<00:44, 1.49s/it]\u001b[A\n", " 26%|███████████ | 10/39 [00:13<00:43, 1.49s/it]\u001b[A\n", " 28%|████████████▏ | 11/39 [00:15<00:40, 1.46s/it]\u001b[A\n", " 31%|█████████████▏ | 12/39 [00:16<00:40, 1.49s/it]\u001b[A\n", " 33%|██████████████▎ | 13/39 [00:18<00:38, 1.48s/it]\u001b[A\n", " 36%|███████████████▍ | 14/39 [00:19<00:36, 1.46s/it]\u001b[A\n", " 38%|████████████████▌ | 15/39 [00:20<00:34, 1.43s/it]\u001b[A\n", " 41%|█████████████████▋ | 16/39 [00:22<00:33, 1.47s/it]\u001b[A\n", " 44%|██████████████████▋ | 17/39 [00:23<00:32, 1.46s/it]\u001b[A\n", " 46%|███████████████████▊ | 18/39 [00:24<00:27, 1.31s/it]\u001b[A\n", " 49%|████████████████████▉ | 19/39 [00:25<00:24, 1.23s/it]\u001b[A\n", " 51%|██████████████████████ | 20/39 [00:26<00:21, 1.14s/it]\u001b[A\n", " 54%|███████████████████████▏ | 21/39 [00:27<00:19, 1.06s/it]\u001b[A\n", " 56%|████████████████████████▎ | 22/39 [00:28<00:16, 1.00it/s]\u001b[A\n", " 59%|█████████████████████████▎ | 23/39 [00:29<00:15, 1.03it/s]\u001b[A\n", " 62%|██████████████████████████▍ | 24/39 [00:30<00:14, 1.04it/s]\u001b[A\n", " 64%|███████████████████████████▌ | 25/39 [00:31<00:13, 1.03it/s]\u001b[A\n", " 67%|████████████████████████████▋ | 26/39 [00:32<00:12, 1.02it/s]\u001b[A\n", " 69%|█████████████████████████████▊ | 27/39 [00:33<00:11, 1.00it/s]\u001b[A\n", " 72%|██████████████████████████████▊ | 28/39 [00:34<00:10, 1.05it/s]\u001b[A\n", " 74%|███████████████████████████████▉ | 29/39 [00:35<00:09, 1.01it/s]\u001b[A\n", " 77%|█████████████████████████████████ | 30/39 [00:36<00:09, 1.01s/it]\u001b[A\n", " 79%|██████████████████████████████████▏ | 31/39 [00:37<00:07, 1.01it/s]\u001b[A\n", " 82%|███████████████████████████████████▎ | 32/39 [00:38<00:07, 1.01s/it]\u001b[A\n", " 85%|████████████████████████████████████▍ | 33/39 [00:39<00:06, 1.03s/it]\u001b[A\n", " 87%|█████████████████████████████████████▍ | 34/39 [00:40<00:04, 1.00it/s]\u001b[A\n", " 90%|██████████████████████████████████████▌ | 35/39 [00:41<00:03, 1.04it/s]\u001b[A\n", " 92%|███████████████████████████████████████▋ | 36/39 [00:42<00:02, 1.03it/s]\u001b[A\n", " 95%|████████████████████████████████████████▊ | 37/39 [00:43<00:01, 1.04it/s]\u001b[A\n", " 97%|█████████████████████████████████████████▉ | 38/39 [00:44<00:00, 1.04it/s]\u001b[A\n", " \u001b[A\n", "\u001b[A{'eval_loss': 0.3796931207180023, 'eval_wer': 0.4600705934259872, 'eval_runtime': 46.7914, 'eval_samples_per_second': 26.18, 'eval_steps_per_second': 0.833, 'epoch': 71.43}\n", " 71%|██████████████████████████▍ | 6000/8400 [3:16:00<54:25, 1.36s/it]\n", "100%|███████████████████████████████████████████| 39/39 [00:45<00:00, 1.35it/s]\u001b[A\n", " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-georgian/checkpoint-6000\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-6000/config.json\n", "Model weights saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-6000/pytorch_model.bin\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-6000/preprocessor_config.json\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/preprocessor_config.json\n", "Deleting older checkpoint [wav2vec2-large-xls-r-300m-georgian/checkpoint-5000] due to args.save_total_limit\n", "{'loss': 0.6803, 'learning_rate': 8.749367088607594e-05, 'epoch': 72.62} \n", "{'loss': 0.6706, 'learning_rate': 8.369620253164557e-05, 'epoch': 73.81} \n", "{'loss': 0.6615, 'learning_rate': 7.989873417721518e-05, 'epoch': 75.0} \n", "{'loss': 0.6541, 'learning_rate': 7.61012658227848e-05, 'epoch': 76.19} \n", "{'loss': 0.6457, 'learning_rate': 7.230379746835442e-05, 'epoch': 77.38} \n", " 77%|████████████████████████████▋ | 6500/8400 [3:31:44<53:34, 1.69s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", "***** Running Evaluation *****\n", " Num examples = 1225\n", " Batch size = 32\n", "\n", " 0%| | 0/39 [00:00<?, ?it/s]\u001b[A\n", " 5%|██▎ | 2/39 [00:01<00:27, 1.34it/s]\u001b[A\n", " 8%|███▍ | 3/39 [00:03<00:40, 1.14s/it]\u001b[A\n", " 10%|████▌ | 4/39 [00:04<00:44, 1.27s/it]\u001b[A\n", " 13%|█████▋ | 5/39 [00:06<00:46, 1.38s/it]\u001b[A\n", " 15%|██████▊ | 6/39 [00:07<00:46, 1.40s/it]\u001b[A\n", " 18%|███████▉ | 7/39 [00:09<00:45, 1.43s/it]\u001b[A\n", " 21%|█████████ | 8/39 [00:10<00:45, 1.46s/it]\u001b[A\n", " 23%|██████████▏ | 9/39 [00:12<00:45, 1.51s/it]\u001b[A\n", " 26%|███████████ | 10/39 [00:13<00:43, 1.51s/it]\u001b[A\n", " 28%|████████████▏ | 11/39 [00:15<00:41, 1.48s/it]\u001b[A\n", " 31%|█████████████▏ | 12/39 [00:16<00:40, 1.51s/it]\u001b[A\n", " 33%|██████████████▎ | 13/39 [00:18<00:38, 1.49s/it]\u001b[A\n", " 36%|███████████████▍ | 14/39 [00:19<00:36, 1.48s/it]\u001b[A\n", " 38%|████████████████▌ | 15/39 [00:21<00:34, 1.44s/it]\u001b[A\n", " 41%|█████████████████▋ | 16/39 [00:22<00:33, 1.47s/it]\u001b[A\n", " 44%|██████████████████▋ | 17/39 [00:24<00:31, 1.45s/it]\u001b[A\n", " 46%|███████████████████▊ | 18/39 [00:24<00:27, 1.30s/it]\u001b[A\n", " 49%|████████████████████▉ | 19/39 [00:25<00:23, 1.20s/it]\u001b[A\n", " 51%|██████████████████████ | 20/39 [00:26<00:21, 1.13s/it]\u001b[A\n", " 54%|███████████████████████▏ | 21/39 [00:27<00:18, 1.05s/it]\u001b[A\n", " 56%|████████████████████████▎ | 22/39 [00:28<00:16, 1.01it/s]\u001b[A\n", " 59%|█████████████████████████▎ | 23/39 [00:29<00:15, 1.04it/s]\u001b[A\n", " 62%|██████████████████████████▍ | 24/39 [00:30<00:14, 1.04it/s]\u001b[A\n", " 64%|███████████████████████████▌ | 25/39 [00:31<00:13, 1.04it/s]\u001b[A\n", " 67%|████████████████████████████▋ | 26/39 [00:32<00:12, 1.02it/s]\u001b[A\n", " 69%|█████████████████████████████▊ | 27/39 [00:33<00:11, 1.01it/s]\u001b[A\n", " 72%|██████████████████████████████▊ | 28/39 [00:34<00:10, 1.06it/s]\u001b[A\n", " 74%|███████████████████████████████▉ | 29/39 [00:35<00:09, 1.00it/s]\u001b[A\n", " 77%|█████████████████████████████████ | 30/39 [00:36<00:09, 1.02s/it]\u001b[A\n", " 79%|██████████████████████████████████▏ | 31/39 [00:37<00:07, 1.01it/s]\u001b[A\n", " 82%|███████████████████████████████████▎ | 32/39 [00:38<00:07, 1.01s/it]\u001b[A\n", " 85%|████████████████████████████████████▍ | 33/39 [00:39<00:06, 1.02s/it]\u001b[A\n", " 87%|█████████████████████████████████████▍ | 34/39 [00:40<00:04, 1.01it/s]\u001b[A\n", " 90%|██████████████████████████████████████▌ | 35/39 [00:41<00:03, 1.05it/s]\u001b[A\n", " 92%|███████████████████████████████████████▋ | 36/39 [00:42<00:02, 1.04it/s]\u001b[A\n", " 95%|████████████████████████████████████████▊ | 37/39 [00:43<00:01, 1.04it/s]\u001b[A\n", " 97%|█████████████████████████████████████████▉ | 38/39 [00:44<00:00, 1.06it/s]\u001b[A\n", " \u001b[A\n", "\u001b[A{'eval_loss': 0.38117340207099915, 'eval_wer': 0.4514670196337966, 'eval_runtime': 46.7869, 'eval_samples_per_second': 26.183, 'eval_steps_per_second': 0.834, 'epoch': 77.38}\n", " 77%|████████████████████████████▋ | 6500/8400 [3:32:31<53:34, 1.69s/it]\n", "100%|███████████████████████████████████████████| 39/39 [00:45<00:00, 1.37it/s]\u001b[A\n", " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-georgian/checkpoint-6500\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-6500/config.json\n", "Model weights saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-6500/pytorch_model.bin\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-6500/preprocessor_config.json\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/preprocessor_config.json\n", "Deleting older checkpoint [wav2vec2-large-xls-r-300m-georgian/checkpoint-5500] due to args.save_total_limit\n", "{'loss': 0.6379, 'learning_rate': 6.850632911392405e-05, 'epoch': 78.57} \n", "{'loss': 0.6387, 'learning_rate': 6.470886075949366e-05, 'epoch': 79.76} \n", "{'loss': 0.6186, 'learning_rate': 6.091139240506329e-05, 'epoch': 80.95} \n", "{'loss': 0.6098, 'learning_rate': 5.331645569620252e-05, 'epoch': 83.33} \n", " 83%|██████████████████████████████▊ | 7000/8400 [3:48:04<44:36, 1.91s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", "***** Running Evaluation *****\n", " Num examples = 1225\n", " Batch size = 32\n", "\n", " 0%| | 0/39 [00:00<?, ?it/s]\u001b[A\n", " 5%|██▎ | 2/39 [00:01<00:27, 1.36it/s]\u001b[A\n", " 8%|███▍ | 3/39 [00:03<00:40, 1.14s/it]\u001b[A\n", " 10%|████▌ | 4/39 [00:04<00:44, 1.27s/it]\u001b[A\n", " 13%|█████▋ | 5/39 [00:06<00:46, 1.35s/it]\u001b[A\n", " 15%|██████▊ | 6/39 [00:07<00:45, 1.37s/it]\u001b[A\n", " 18%|███████▉ | 7/39 [00:09<00:45, 1.41s/it]\u001b[A\n", " 21%|█████████ | 8/39 [00:10<00:44, 1.43s/it]\u001b[A\n", " 23%|██████████▏ | 9/39 [00:12<00:44, 1.48s/it]\u001b[A\n", " 26%|███████████ | 10/39 [00:13<00:42, 1.47s/it]\u001b[A\n", " 28%|████████████▏ | 11/39 [00:14<00:40, 1.45s/it]\u001b[A\n", " 31%|█████████████▏ | 12/39 [00:16<00:39, 1.48s/it]\u001b[A\n", " 33%|██████████████▎ | 13/39 [00:17<00:38, 1.46s/it]\u001b[A\n", " 36%|███████████████▍ | 14/39 [00:19<00:35, 1.44s/it]\u001b[A\n", " 38%|████████████████▌ | 15/39 [00:20<00:33, 1.41s/it]\u001b[A\n", " 41%|█████████████████▋ | 16/39 [00:22<00:33, 1.45s/it]\u001b[A\n", " 44%|██████████████████▋ | 17/39 [00:23<00:31, 1.44s/it]\u001b[A\n", " 46%|███████████████████▊ | 18/39 [00:24<00:27, 1.31s/it]\u001b[A\n", " 49%|████████████████████▉ | 19/39 [00:25<00:23, 1.20s/it]\u001b[A\n", " 51%|██████████████████████ | 20/39 [00:26<00:21, 1.13s/it]\u001b[A\n", " 54%|███████████████████████▏ | 21/39 [00:27<00:19, 1.06s/it]\u001b[A\n", " 56%|████████████████████████▎ | 22/39 [00:28<00:17, 1.00s/it]\u001b[A\n", " 59%|█████████████████████████▎ | 23/39 [00:29<00:15, 1.01it/s]\u001b[A\n", " 62%|██████████████████████████▍ | 24/39 [00:30<00:14, 1.02it/s]\u001b[A\n", " 64%|███████████████████████████▌ | 25/39 [00:31<00:13, 1.01it/s]\u001b[A\n", " 67%|████████████████████████████▋ | 26/39 [00:32<00:12, 1.00it/s]\u001b[A\n", " 69%|█████████████████████████████▊ | 27/39 [00:33<00:12, 1.03s/it]\u001b[A\n", " 72%|██████████████████████████████▊ | 28/39 [00:34<00:10, 1.03it/s]\u001b[A\n", " 74%|███████████████████████████████▉ | 29/39 [00:35<00:09, 1.00it/s]\u001b[A\n", " 77%|█████████████████████████████████ | 30/39 [00:36<00:09, 1.01s/it]\u001b[A\n", " 79%|██████████████████████████████████▏ | 31/39 [00:37<00:07, 1.01it/s]\u001b[A\n", " 82%|███████████████████████████████████▎ | 32/39 [00:38<00:07, 1.01s/it]\u001b[A\n", " 85%|████████████████████████████████████▍ | 33/39 [00:39<00:06, 1.03s/it]\u001b[A\n", " 87%|█████████████████████████████████████▍ | 34/39 [00:40<00:04, 1.00it/s]\u001b[A\n", " 90%|██████████████████████████████████████▌ | 35/39 [00:41<00:03, 1.04it/s]\u001b[A\n", " 92%|███████████████████████████████████████▋ | 36/39 [00:42<00:02, 1.03it/s]\u001b[A\n", " 95%|████████████████████████████████████████▊ | 37/39 [00:43<00:01, 1.04it/s]\u001b[A\n", " 97%|█████████████████████████████████████████▉ | 38/39 [00:44<00:00, 1.05it/s]\u001b[A\n", " \u001b[A\n", "\u001b[A{'eval_loss': 0.3660217821598053, 'eval_wer': 0.4342598720494154, 'eval_runtime': 46.6679, 'eval_samples_per_second': 26.249, 'eval_steps_per_second': 0.836, 'epoch': 83.33}\n", " 83%|██████████████████████████████▊ | 7000/8400 [3:48:50<44:36, 1.91s/it]\n", "100%|███████████████████████████████████████████| 39/39 [00:45<00:00, 1.36it/s]\u001b[A\n", " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-georgian/checkpoint-7000\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-7000/config.json\n", "Model weights saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-7000/pytorch_model.bin\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-7000/preprocessor_config.json\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/preprocessor_config.json\n", "Deleting older checkpoint [wav2vec2-large-xls-r-300m-georgian/checkpoint-6000] due to args.save_total_limit\n", "{'loss': 0.6124, 'learning_rate': 4.9518987341772145e-05, 'epoch': 84.52} \n", "{'loss': 0.5954, 'learning_rate': 4.572151898734177e-05, 'epoch': 85.71} \n", "{'loss': 0.5993, 'learning_rate': 4.192405063291139e-05, 'epoch': 86.9} \n", "{'loss': 0.5861, 'learning_rate': 3.812658227848101e-05, 'epoch': 88.1} \n", "{'loss': 0.5874, 'learning_rate': 3.432911392405063e-05, 'epoch': 89.29} \n", " 89%|█████████████████████████████████ | 7500/8400 [4:04:25<32:05, 2.14s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", "***** Running Evaluation *****\n", " Num examples = 1225\n", " Batch size = 32\n", "\n", " 0%| | 0/39 [00:00<?, ?it/s]\u001b[A\n", " 5%|██▎ | 2/39 [00:01<00:27, 1.34it/s]\u001b[A\n", " 8%|███▍ | 3/39 [00:03<00:41, 1.14s/it]\u001b[A\n", " 10%|████▌ | 4/39 [00:04<00:44, 1.28s/it]\u001b[A\n", " 13%|█████▋ | 5/39 [00:06<00:46, 1.37s/it]\u001b[A\n", " 15%|██████▊ | 6/39 [00:07<00:45, 1.39s/it]\u001b[A\n", " 18%|███████▉ | 7/39 [00:09<00:45, 1.42s/it]\u001b[A\n", " 21%|█████████ | 8/39 [00:10<00:44, 1.45s/it]\u001b[A\n", " 23%|██████████▏ | 9/39 [00:12<00:44, 1.50s/it]\u001b[A\n", " 26%|███████████ | 10/39 [00:13<00:43, 1.50s/it]\u001b[A\n", " 28%|████████████▏ | 11/39 [00:15<00:41, 1.47s/it]\u001b[A\n", " 31%|█████████████▏ | 12/39 [00:16<00:40, 1.50s/it]\u001b[A\n", " 33%|██████████████▎ | 13/39 [00:18<00:38, 1.49s/it]\u001b[A\n", " 36%|███████████████▍ | 14/39 [00:19<00:36, 1.46s/it]\u001b[A\n", " 38%|████████████████▌ | 15/39 [00:20<00:34, 1.43s/it]\u001b[A\n", " 41%|█████████████████▋ | 16/39 [00:22<00:33, 1.47s/it]\u001b[A\n", " 44%|██████████████████▋ | 17/39 [00:23<00:32, 1.46s/it]\u001b[A\n", " 46%|███████████████████▊ | 18/39 [00:24<00:27, 1.31s/it]\u001b[A\n", " 49%|████████████████████▉ | 19/39 [00:25<00:24, 1.21s/it]\u001b[A\n", " 51%|██████████████████████ | 20/39 [00:26<00:21, 1.12s/it]\u001b[A\n", " 54%|███████████████████████▏ | 21/39 [00:27<00:18, 1.05s/it]\u001b[A\n", " 56%|████████████████████████▎ | 22/39 [00:28<00:16, 1.00it/s]\u001b[A\n", " 59%|█████████████████████████▎ | 23/39 [00:29<00:15, 1.03it/s]\u001b[A\n", " 62%|██████████████████████████▍ | 24/39 [00:30<00:14, 1.04it/s]\u001b[A\n", " 64%|███████████████████████████▌ | 25/39 [00:31<00:13, 1.03it/s]\u001b[A\n", " 67%|████████████████████████████▋ | 26/39 [00:32<00:12, 1.01it/s]\u001b[A\n", " 69%|█████████████████████████████▊ | 27/39 [00:33<00:12, 1.00s/it]\u001b[A\n", " 72%|██████████████████████████████▊ | 28/39 [00:34<00:10, 1.04it/s]\u001b[A\n", " 74%|███████████████████████████████▉ | 29/39 [00:35<00:09, 1.00it/s]\u001b[A\n", " 77%|█████████████████████████████████ | 30/39 [00:36<00:09, 1.02s/it]\u001b[A\n", " 79%|██████████████████████████████████▏ | 31/39 [00:37<00:07, 1.00it/s]\u001b[A\n", " 82%|███████████████████████████████████▎ | 32/39 [00:38<00:07, 1.03s/it]\u001b[A\n", " 85%|████████████████████████████████████▍ | 33/39 [00:39<00:06, 1.04s/it]\u001b[A\n", " 87%|█████████████████████████████████████▍ | 34/39 [00:40<00:05, 1.02s/it]\u001b[A\n", " 90%|██████████████████████████████████████▌ | 35/39 [00:41<00:03, 1.02it/s]\u001b[A\n", " 92%|███████████████████████████████████████▋ | 36/39 [00:42<00:02, 1.02it/s]\u001b[A\n", " 95%|████████████████████████████████████████▊ | 37/39 [00:43<00:01, 1.02it/s]\u001b[A\n", " 97%|█████████████████████████████████████████▉ | 38/39 [00:44<00:00, 1.03it/s]\u001b[A\n", " \u001b[A\n", "\u001b[A{'eval_loss': 0.3640037775039673, 'eval_wer': 0.4256562982572248, 'eval_runtime': 47.0, 'eval_samples_per_second': 26.064, 'eval_steps_per_second': 0.83, 'epoch': 89.29}\n", " 89%|█████████████████████████████████ | 7500/8400 [4:05:12<32:05, 2.14s/it]\n", "100%|███████████████████████████████████████████| 39/39 [00:45<00:00, 1.34it/s]\u001b[A\n", " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-georgian/checkpoint-7500\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-7500/config.json\n", "Model weights saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-7500/pytorch_model.bin\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-7500/preprocessor_config.json\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/preprocessor_config.json\n", "Deleting older checkpoint [wav2vec2-large-xls-r-300m-georgian/checkpoint-6500] due to args.save_total_limit\n", "{'loss': 0.5818, 'learning_rate': 3.053164556962025e-05, 'epoch': 90.48} \n", "{'loss': 0.5755, 'learning_rate': 2.673417721518987e-05, 'epoch': 91.67} \n", "{'loss': 0.5837, 'learning_rate': 2.293670886075949e-05, 'epoch': 92.86} \n", "{'loss': 0.5727, 'learning_rate': 1.9139240506329114e-05, 'epoch': 94.05} \n", "{'loss': 0.5627, 'learning_rate': 1.5341772151898733e-05, 'epoch': 95.24} \n", " 95%|███████████████████████████████████▏ | 8000/8400 [4:20:41<06:45, 1.01s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", "***** Running Evaluation *****\n", " Num examples = 1225\n", " Batch size = 32\n", "\n", " 0%| | 0/39 [00:00<?, ?it/s]\u001b[A\n", " 5%|██▎ | 2/39 [00:01<00:27, 1.34it/s]\u001b[A\n", " 8%|███▍ | 3/39 [00:03<00:42, 1.17s/it]\u001b[A\n", " 10%|████▌ | 4/39 [00:04<00:45, 1.30s/it]\u001b[A\n", " 13%|█████▋ | 5/39 [00:06<00:46, 1.37s/it]\u001b[A\n", " 15%|██████▊ | 6/39 [00:07<00:46, 1.39s/it]\u001b[A\n", " 18%|███████▉ | 7/39 [00:09<00:45, 1.42s/it]\u001b[A\n", " 21%|█████████ | 8/39 [00:10<00:45, 1.45s/it]\u001b[A\n", " 23%|██████████▏ | 9/39 [00:12<00:44, 1.50s/it]\u001b[A\n", " 26%|███████████ | 10/39 [00:13<00:43, 1.49s/it]\u001b[A\n", " 28%|████████████▏ | 11/39 [00:15<00:41, 1.47s/it]\u001b[A\n", " 31%|█████████████▏ | 12/39 [00:16<00:40, 1.50s/it]\u001b[A\n", " 33%|██████████████▎ | 13/39 [00:18<00:38, 1.48s/it]\u001b[A\n", " 36%|███████████████▍ | 14/39 [00:19<00:36, 1.46s/it]\u001b[A\n", " 38%|████████████████▌ | 15/39 [00:20<00:34, 1.43s/it]\u001b[A\n", " 41%|█████████████████▋ | 16/39 [00:22<00:33, 1.46s/it]\u001b[A\n", " 44%|██████████████████▋ | 17/39 [00:23<00:31, 1.45s/it]\u001b[A\n", " 46%|███████████████████▊ | 18/39 [00:24<00:27, 1.30s/it]\u001b[A\n", " 49%|████████████████████▉ | 19/39 [00:25<00:23, 1.19s/it]\u001b[A\n", " 51%|██████████████████████ | 20/39 [00:26<00:21, 1.12s/it]\u001b[A\n", " 54%|███████████████████████▏ | 21/39 [00:27<00:19, 1.07s/it]\u001b[A\n", " 56%|████████████████████████▎ | 22/39 [00:28<00:17, 1.00s/it]\u001b[A\n", " 59%|█████████████████████████▎ | 23/39 [00:29<00:15, 1.03it/s]\u001b[A\n", " 62%|██████████████████████████▍ | 24/39 [00:30<00:14, 1.04it/s]\u001b[A\n", " 64%|███████████████████████████▌ | 25/39 [00:31<00:13, 1.04it/s]\u001b[A\n", " 67%|████████████████████████████▋ | 26/39 [00:32<00:12, 1.02it/s]\u001b[A\n", " 69%|█████████████████████████████▊ | 27/39 [00:33<00:11, 1.01it/s]\u001b[A\n", " 72%|██████████████████████████████▊ | 28/39 [00:34<00:10, 1.06it/s]\u001b[A\n", " 74%|███████████████████████████████▉ | 29/39 [00:35<00:09, 1.03it/s]\u001b[A\n", " 77%|█████████████████████████████████ | 30/39 [00:36<00:08, 1.00it/s]\u001b[A\n", " 79%|██████████████████████████████████▏ | 31/39 [00:37<00:07, 1.02it/s]\u001b[A\n", " 82%|███████████████████████████████████▎ | 32/39 [00:38<00:07, 1.01s/it]\u001b[A\n", " 85%|████████████████████████████████████▍ | 33/39 [00:39<00:06, 1.02s/it]\u001b[A\n", " 87%|█████████████████████████████████████▍ | 34/39 [00:40<00:04, 1.01it/s]\u001b[A\n", " 90%|██████████████████████████████████████▌ | 35/39 [00:41<00:03, 1.05it/s]\u001b[A\n", " 92%|███████████████████████████████████████▋ | 36/39 [00:42<00:02, 1.04it/s]\u001b[A\n", " 95%|████████████████████████████████████████▊ | 37/39 [00:43<00:01, 1.05it/s]\u001b[A\n", " 97%|█████████████████████████████████████████▉ | 38/39 [00:44<00:00, 1.06it/s]\u001b[A\n", " \u001b[A\n", "\u001b[A{'eval_loss': 0.3660779297351837, 'eval_wer': 0.4238914626075447, 'eval_runtime': 46.6417, 'eval_samples_per_second': 26.264, 'eval_steps_per_second': 0.836, 'epoch': 95.24}\n", " 95%|███████████████████████████████████▏ | 8000/8400 [4:21:28<06:45, 1.01s/it]\n", "100%|███████████████████████████████████████████| 39/39 [00:45<00:00, 1.37it/s]\u001b[A\n", " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-georgian/checkpoint-8000\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-8000/config.json\n", "Model weights saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-8000/pytorch_model.bin\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/checkpoint-8000/preprocessor_config.json\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/preprocessor_config.json\n", "Deleting older checkpoint [wav2vec2-large-xls-r-300m-georgian/checkpoint-7000] due to args.save_total_limit\n", "{'loss': 0.5602, 'learning_rate': 1.1582278481012656e-05, 'epoch': 96.43} \n", "{'loss': 0.5714, 'learning_rate': 3.987341772151899e-06, 'epoch': 98.81} \n", "{'loss': 0.5538, 'learning_rate': 1.8987341772151897e-07, 'epoch': 100.0} \n", "100%|█████████████████████████████████████| 8400/8400 [4:34:11<00:00, 1.42s/it]\n", "\n", "Training completed. Do not forget to share your model on huggingface.co/models =)\n", "\n", "\n", "{'train_runtime': 16451.1257, 'train_samples_per_second': 16.327, 'train_steps_per_second': 0.511, 'train_loss': 0.9825477345784506, 'epoch': 100.0}\n", "100%|█████████████████████████████████████| 8400/8400 [4:34:11<00:00, 1.96s/it]\n", "Saving model checkpoint to ./wav2vec2-large-xls-r-300m-georgian\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/config.json\n", "Model weights saved in ./wav2vec2-large-xls-r-300m-georgian/pytorch_model.bin\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/preprocessor_config.json\n", "***** train metrics *****\n", " epoch = 100.0\n", " train_loss = 0.9825\n", " train_runtime = 4:34:11.12\n", " train_samples = 2686\n", " train_samples_per_second = 16.327\n", " train_steps_per_second = 0.511\n", "01/27/2022 16:43:57 - INFO - __main__ - *** Evaluate ***\n", "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", "***** Running Evaluation *****\n", " Num examples = 1225\n", " Batch size = 32\n", "100%|███████████████████████████████████████████| 39/39 [00:45<00:00, 1.16s/it]\n", "***** eval metrics *****\n", " epoch = 100.0\n", " eval_loss = 0.3666\n", " eval_runtime = 0:00:46.68\n", " eval_samples = 1225\n", " eval_samples_per_second = 26.238\n", " eval_steps_per_second = 0.835\n", " eval_wer = 0.4211\n", "Saving model checkpoint to ./wav2vec2-large-xls-r-300m-georgian\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/config.json\n", "Model weights saved in ./wav2vec2-large-xls-r-300m-georgian/pytorch_model.bin\n", "Configuration saved in ./wav2vec2-large-xls-r-300m-georgian/preprocessor_config.json\n", "Upload file pytorch_model.bin: 99%|██████▉| 1.17G/1.18G [02:05<00:00, 9.72MB/s]To https://huggingface.co/infinitejoy/wav2vec2-large-xls-r-300m-georgian\n", " 2dc9d89..464889d main -> main\n", "\n", "01/27/2022 16:48:08 - WARNING - huggingface_hub.repository - To https://huggingface.co/infinitejoy/wav2vec2-large-xls-r-300m-georgian\n", " 2dc9d89..464889d main -> main\n", "\n", "Upload file pytorch_model.bin: 100%|███████| 1.18G/1.18G [02:06<00:00, 9.96MB/s]\n", "Dropping the following result as it does not have all the necessary fields:\n", "{'dataset': {'name': 'MOZILLA-FOUNDATION/COMMON_VOICE_7_0 - KA', 'type': 'common_voice', 'args': 'Config: ka, Training split: train+validation, Eval split: test'}}\n", "To https://huggingface.co/infinitejoy/wav2vec2-large-xls-r-300m-georgian\n", " 464889d..6e7a45b main -> main\n", "\n", "01/27/2022 16:48:15 - WARNING - huggingface_hub.repository - To https://huggingface.co/infinitejoy/wav2vec2-large-xls-r-300m-georgian\n", " 464889d..6e7a45b main -> main\n", "\n" ] } ], "source": [ "!python run_speech_recognition_ctc.py \\\n", "\t--dataset_name=\"mozilla-foundation/common_voice_7_0\" \\\n", "\t--model_name_or_path=\"facebook/wav2vec2-xls-r-300m\" \\\n", "\t--dataset_config_name=\"ka\" \\\n", "\t--output_dir=\"./wav2vec2-large-xls-r-300m-georgian\" \\\n", "\t--overwrite_output_dir \\\n", "\t--num_train_epochs=\"100\" \\\n", "\t--per_device_train_batch_size=\"32\" \\\n", "\t--per_device_eval_batch_size=\"32\" \\\n", "\t--gradient_accumulation_steps=\"1\" \\\n", "\t--learning_rate=\"3e-4\" \\\n", "\t--warmup_steps=\"500\" \\\n", "\t--length_column_name=\"input_length\" \\\n", "\t--evaluation_strategy=\"steps\" \\\n", "\t--text_column_name=\"sentence\" \\\n", "\t--chars_to_ignore , ? . ! \\- \\; \\: \\\" “ % ‘ ” � — ’ … – \\\n", "\t--save_steps=\"500\" \\\n", "\t--eval_steps=\"500\" \\\n", "\t--logging_steps=\"100\" \\\n", "\t--layerdrop=\"0.0\" \\\n", "\t--activation_dropout=\"0.1\" \\\n", "\t--save_total_limit=\"2\" \\\n", "\t--freeze_feature_encoder \\\n", "\t--feat_proj_dropout=\"0.0\" \\\n", "\t--mask_time_prob=\"0.75\" \\\n", "\t--mask_time_length=\"10\" \\\n", "\t--mask_feature_prob=\"0.25\" \\\n", "\t--mask_feature_length=\"64\" \\\n", "\t--gradient_checkpointing \\\n", "\t--use_auth_token \\\n", "\t--fp16 \\\n", "\t--group_by_length \\\n", "\t--do_train --do_eval \\\n", " --push_to_hub" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# !rm -rf wav2vec2-large-xls-r-300m-bashkir" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!ls -ltr" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Filesystem Size Used Avail Use% Mounted on\n", "overlay 3.5T 1.2T 2.2T 34% /\n", "tmpfs 64M 0 64M 0% /dev\n", "tmpfs 87G 0 87G 0% /sys/fs/cgroup\n", "tmpfs 87G 0 87G 0% /dev/shm\n", "/dev/md0 3.5T 1.2T 2.2T 34% /etc/group\n", "tmpfs 87G 12K 87G 1% /proc/driver/nvidia\n", "/dev/vda1 49G 6.5G 42G 14% /usr/bin/nvidia-smi\n", "udev 87G 0 87G 0% /dev/nvidia0\n", "tmpfs 87G 0 87G 0% /proc/acpi\n", "tmpfs 87G 0 87G 0% /proc/scsi\n", "tmpfs 87G 0 87G 0% /sys/firmware\n" ] } ], "source": [ "!df -h" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Reusing dataset common_voice (/workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/ka/7.0.0/fe20cac47c166e25b1f096ab661832e3da7cf298ed4a91dcaa1343ad972d175b)\n", "Reusing dataset common_voice (/workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/ka/7.0.0/fe20cac47c166e25b1f096ab661832e3da7cf298ed4a91dcaa1343ad972d175b)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "2686\n" ] } ], "source": [ "from datasets import load_dataset, load_metric, Audio\n", "\n", "common_voice_train = load_dataset(\"mozilla-foundation/common_voice_7_0\", \"ka\", use_auth_token=True, split=\"train+validation\")\n", "common_voice_test = load_dataset(\"mozilla-foundation/common_voice_7_0\", \"ka\", use_auth_token=True, split=\"test\")\n", "\n", "print(len(common_voice_train))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Dataset({\n", " features: ['client_id', 'path', 'audio', 'sentence', 'up_votes', 'down_votes', 'age', 'gender', 'accent', 'locale', 'segment'],\n", " num_rows: 2686\n", "})" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "common_voice_train" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8393.75" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(common_voice_train) * 100 / 32" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "common_voice_train = common_voice_train.remove_columns([\"accent\", \"age\", \"client_id\", \"down_votes\", \"gender\", \"locale\", \"segment\", \"up_votes\"])\n", "common_voice_test = common_voice_test.remove_columns([\"accent\", \"age\", \"client_id\", \"down_votes\", \"gender\", \"locale\", \"segment\", \"up_votes\"])" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "from datasets import ClassLabel\n", "import random\n", "import pandas as pd\n", "from IPython.display import display, HTML\n", "\n", "def show_random_elements(dataset, num_examples=10):\n", " assert num_examples <= len(dataset), \"Can't pick more elements than there are in the dataset.\"\n", " picks = []\n", " for _ in range(num_examples):\n", " pick = random.randint(0, len(dataset)-1)\n", " while pick in picks:\n", " pick = random.randint(0, len(dataset)-1)\n", " picks.append(pick)\n", " \n", " df = pd.DataFrame(dataset[picks])\n", " display(HTML(df.to_html()))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>sentence</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>ეს ემბლემა წარმოადგენს მხიარულ, პატარა კაცს, რომელიც ჯანსაღი და ენერგიული მოქალაქეების სიმბოლოა.</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>სახელი ეწოდა ნაყოფიერების რომაული ღვთაების, მერკურის დედის, მაიას მიხედვით.</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>პრეზიდენტმა შერილ ბუნ-ისააკმა წევრობის ახალი წესები შემოიღო.</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>სიცოცხლის მოკლე ხანგრძლივობა პოპულაციის ზრდის ნელ ტემპს განაპირობებს.</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>იგი ხელმძღვანელობდა მეცნიერებათა აკადემიის გეოლოგიისა და მინერალოგიის ინსტიტუტის მინერალოგიის სექტორს.</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>ნუსხურსა და მხედრულში იგი ქვედა სამ ხაზში იწერება.</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>ოტელოს როლს რასელ ტომასი შეასრულებს, დეზდემონას კი – ლეა გროსეტო.</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>ექსპერტთა შეფასებით ნახევარი მილიონი თევზი დაიღუპა, ზოგიერთი სახეობა კი საერთოდ გაქრა.</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>შესაძლოა ადრე ხაჭოს შერევით მზადდებოდა.</td>\n", " </tr>\n", " <tr>\n", " <th>9</th>\n", " <td>აგრეთვე დადებითი იყო კრიტიკოსების შეფასებები.</td>\n", " </tr>\n", " </tbody>\n", "</table>" ], "text/plain": [ "<IPython.core.display.HTML object>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "show_random_elements(common_voice_train.remove_columns([\"path\", \"audio\"]), num_examples=10)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "import re\n", "chars_to_remove_regex = '[\\,\\?\\.\\!\\-\\;\\:\\\"\\“\\%\\‘\\”\\�\\—\\’\\…\\–]'\n", "\n", "def remove_special_characters(batch):\n", " batch[\"sentence\"] = re.sub(chars_to_remove_regex, '', batch[\"sentence\"]).lower()\n", " return batch" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2a03c54c25734f67ac41b3740e0238ff", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/2686 [00:00<?, ?ex/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "4472af4d26fd411ebe2020b4dfdf2870", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1225 [00:00<?, ?ex/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "common_voice_train = common_voice_train.map(remove_special_characters)\n", "common_voice_test = common_voice_test.map(remove_special_characters)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "# start_with_ar = common_voice_train.filter(lambda example: \"−\" in example['sentence'])\n", "# start_with_ar[0]" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "# start_with_ar" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "def replace_hatted_characters(batch):\n", "# batch[\"sentence\"] = re.sub('[â]', 'a', batch[\"sentence\"])\n", "# batch[\"sentence\"] = re.sub('[î]', 'i', batch[\"sentence\"])\n", "# batch[\"sentence\"] = re.sub('[ô]', 'o', batch[\"sentence\"])\n", "# batch[\"sentence\"] = re.sub('[û]', 'u', batch[\"sentence\"])\n", " return batch" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "fc570f5c23414502bc100c36be06cca0", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/2686 [00:00<?, ?ex/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "51252c149f9441cc8fd74794ed1d7277", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1225 [00:00<?, ?ex/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "common_voice_train = common_voice_train.map(replace_hatted_characters)\n", "common_voice_test = common_voice_test.map(replace_hatted_characters)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "def extract_all_chars(batch):\n", " all_text = \" \".join(batch[\"sentence\"])\n", " vocab = list(set(all_text))\n", " return {\"vocab\": [vocab], \"all_text\": [all_text]}" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "3e6b445c8a314ea38a5164f768602ae0", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1 [00:00<?, ?ba/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a6b56031cfea47d78335c849ddf544d5", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1 [00:00<?, ?ba/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "vocab_train = common_voice_train.map(extract_all_chars, batched=True, batch_size=-1, keep_in_memory=True, remove_columns=common_voice_train.column_names)\n", "vocab_test = common_voice_test.map(extract_all_chars, batched=True, batch_size=-1, keep_in_memory=True, remove_columns=common_voice_test.column_names)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "vocab_list = list(set(vocab_train[\"vocab\"][0]) | set(vocab_test[\"vocab\"][0]))" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{' ': 0,\n", " 'ა': 1,\n", " 'ბ': 2,\n", " 'გ': 3,\n", " 'დ': 4,\n", " 'ე': 5,\n", " 'ვ': 6,\n", " 'ზ': 7,\n", " 'თ': 8,\n", " 'ი': 9,\n", " 'კ': 10,\n", " 'ლ': 11,\n", " 'მ': 12,\n", " 'ნ': 13,\n", " 'ო': 14,\n", " 'პ': 15,\n", " 'ჟ': 16,\n", " 'რ': 17,\n", " 'ს': 18,\n", " 'ტ': 19,\n", " 'უ': 20,\n", " 'ფ': 21,\n", " 'ქ': 22,\n", " 'ღ': 23,\n", " 'ყ': 24,\n", " 'შ': 25,\n", " 'ჩ': 26,\n", " 'ც': 27,\n", " 'ძ': 28,\n", " 'წ': 29,\n", " 'ჭ': 30,\n", " 'ხ': 31,\n", " 'ჯ': 32,\n", " 'ჰ': 33,\n", " '„': 34}" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vocab_dict = {v: k for k, v in enumerate(sorted(vocab_list))}\n", "vocab_dict" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "file ./config.json not found\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "37\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/lib/python3.8/site-packages/huggingface_hub/hf_api.py:1001: FutureWarning: `create_repo` now takes `token` as an optional positional argument. Be sure to adapt your code!\n", " warnings.warn(\n", "Cloning https://huggingface.co/infinitejoy/wav2vec2-large-xls-r-300m-georgian into local empty directory.\n", "To https://huggingface.co/infinitejoy/wav2vec2-large-xls-r-300m-georgian\n", " 0ae94a2..f7ddf2b main -> main\n", "\n" ] }, { "data": { "text/plain": [ "'https://huggingface.co/infinitejoy/wav2vec2-large-xls-r-300m-georgian/commit/f7ddf2b62bce10ef8de99d8446558744958780a3'" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vocab_dict[\"|\"] = vocab_dict[\" \"]\n", "del vocab_dict[\" \"]\n", "\n", "vocab_dict[\"[UNK]\"] = len(vocab_dict)\n", "vocab_dict[\"[PAD]\"] = len(vocab_dict)\n", "print(len(vocab_dict))\n", "\n", "import json\n", "with open('./vocab.json', 'w') as vocab_file:\n", " json.dump(vocab_dict, vocab_file)\n", " \n", "from transformers import Wav2Vec2CTCTokenizer\n", "\n", "tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(\"./\", unk_token=\"[UNK]\", pad_token=\"[PAD]\", word_delimiter_token=\"|\")\n", "\n", "repo_name = \"wav2vec2-large-xls-r-300m-georgian\"\n", "\n", "tokenizer.push_to_hub(repo_name)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--2022-01-27 16:52:27-- https://raw.githubusercontent.com/huggingface/transformers/master/examples/research_projects/robust-speech-event/eval.py\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.108.133, 185.199.111.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 4738 (4.6K) [text/plain]\n", "Saving to: ‘eval.py’\n", "\n", "eval.py 100%[===================>] 4.63K --.-KB/s in 0s \n", "\n", "2022-01-27 16:52:27 (21.9 MB/s) - ‘eval.py’ saved [4738/4738]\n", "\n", "total 1232584\n", "-rw-r--r-- 1 ovh ovh 399 Jan 27 12:09 vocab.json\n", "-rw-r--r-- 1 ovh ovh 294 Jan 27 12:09 tokenizer_config.json\n", "-rw-r--r-- 1 ovh ovh 695 Jan 27 12:09 special_tokens_map.json\n", "-rw-r--r-- 1 ovh ovh 23 Jan 27 12:09 added_tokens.json\n", "drwxr-xr-x 2 ovh ovh 4096 Jan 27 16:15 checkpoint-7500\n", "drwxr-xr-x 2 ovh ovh 4096 Jan 27 16:31 checkpoint-8000\n", "-rw-r--r-- 1 ovh ovh 197 Jan 27 16:43 train_results.json\n", "-rw-r--r-- 1 ovh ovh 14654 Jan 27 16:43 trainer_state.json\n", "-rw-r--r-- 1 ovh ovh 225 Jan 27 16:44 eval_results.json\n", "-rw-r--r-- 1 ovh ovh 2033 Jan 27 16:44 config.json\n", "-rw-r--r-- 1 ovh ovh 400 Jan 27 16:44 all_results.json\n", "-rw-r--r-- 1 ovh ovh 1262083569 Jan 27 16:44 pytorch_model.bin\n", "-rw-r--r-- 1 ovh ovh 3055 Jan 27 16:44 training_args.bin\n", "-rw-r--r-- 1 ovh ovh 212 Jan 27 16:44 preprocessor_config.json\n", "-rw-r--r-- 1 ovh ovh 2484 Jan 27 16:48 README.md\n", "-rw-r--r-- 1 ovh ovh 4738 Jan 27 16:52 eval.py\n" ] } ], "source": [ "!wget -O eval.py https://raw.githubusercontent.com/huggingface/transformers/master/examples/research_projects/robust-speech-event/eval.py\n", "!cp eval.py wav2vec2-large-xls-r-300m-georgian\n", "!ls -ltr wav2vec2-large-xls-r-300m-georgian" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Error: mkl-service + Intel(R) MKL: MKL_THREADING_LAYER=INTEL is incompatible with libgomp-a34b3233.so.1 library.\n", "\tTry to import numpy first or set the threading layer accordingly. Set MKL_SERVICE_FORCE_INTEL to force it.\n" ] } ], "source": [ "!cd wav2vec2-large-xls-r-300m-georgian; python eval.py \\\n", " --model_id ./ --dataset mozilla-foundation/common_voice_7_0 --config ka --split test --log_outputs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!cd wav2vec2-large-xls-r-300m-georgian; python eval.py \\\n", " --model_id infinitejoy/wav2vec2-large-xls-r-300m-georgian --dataset speech-recognition-community-v2/dev_data \\\n", " --config ka --split validation --chunk_length_s 10 --stride_length_s 1" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "24592b0be30e4eafb1949cf09d1c4fb4", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading: 0%| | 0.00/260 [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f9bf2ab0d2fa4d3f9235cc6d1ab772f1", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading: 0%| | 0.00/574 [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b0791474a34043da8057e06741472ade", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading: 0%| | 0.00/23.0 [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1ccbd582d616458b87c76ac8dc5b6b36", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading: 0%| | 0.00/309 [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# from transformers import AutoModelForCTC, Wav2Vec2Processor\n", "\n", "# model = AutoModelForCTC.from_pretrained(\"infinitejoy/wav2vec2-large-xls-r-300m-bashkir\")\n", "# processor = Wav2Vec2Processor.from_pretrained(\"infinitejoy/wav2vec2-large-xls-r-300m-bashkir\")\n", "\n" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "013fabff2ea243a0a728a79b8f54ae09", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading: 0%| | 0.00/1.99k [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a8d9ca6d024f46f58301bfbcc475e41a", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading: 0%| | 0.00/1.18G [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b336e2647c05466d87a11dfa326e30d6", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading: 0%| | 0.00/212 [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "8e6962320ad944439261482617be4869", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading: 0%| | 0.00/260 [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "99de2ef750aa49fd986965d66853a5ea", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading: 0%| | 0.00/520 [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "765670f93e5f4c2e849c98d53e616f38", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading: 0%| | 0.00/23.0 [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "812abafc8f6b49e3a498718d034a379b", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading: 0%| | 0.00/309 [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "ename": "AssertionError", "evalue": "55", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m<ipython-input-23-c6863db4730f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mlogits\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_values\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlogits\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0;32massert\u001b[0m \u001b[0mlogits\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m32\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlogits\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mAssertionError\u001b[0m: 55" ] } ], "source": [ "# from transformers import AutoModelForCTC, AutoProcessor\n", "# from datasets import load_dataset\n", "\n", "# model = AutoModelForCTC.from_pretrained(\"infinitejoy/wav2vec2-large-xls-r-300m-bashkir\")\n", "# processor = AutoProcessor.from_pretrained(\"infinitejoy/wav2vec2-large-xls-r-300m-bashkir\")\n", "\n", "# input_values = processor(common_voice_test[0][\"audio\"][\"array\"], return_tensors=\"pt\", sampling_rate=16_000).input_values\n", "# # input_values = input_values.to(\"cuda\")\n", "\n", "# logits = model(input_values).logits\n", "\n", "# assert logits.shape[-1] == 32, logits.shape[-1]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Reusing dataset common_voice (/workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/ka/7.0.0/fe20cac47c166e25b1f096ab661832e3da7cf298ed4a91dcaa1343ad972d175b)\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "103935b56b6c4edea05219e5a86d101e", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading: 0%| | 0.00/1.99k [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d9682c10e03745128977cd42bdcbcdff", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading: 0%| | 0.00/212 [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c87cc16a11e14f2dbe9955fd1c6a7e16", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading: 0%| | 0.00/1.18G [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f37c56108b09417bb4dbe6f69877a3cc", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading: 0%| | 0.00/294 [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9f9da241a3114b06af2dd11eb304c182", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading: 0%| | 0.00/399 [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "de810cf06c704dc9920c1a2a1952eafc", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading: 0%| | 0.00/23.0 [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f861c8469acb4ec4b10bd7c1df6105f7", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading: 0%| | 0.00/695 [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "6337b3fd84ed4b1cb456d64205083265", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/10 [00:00<?, ?ex/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "['მადლობა ღმერთს გადავრჩით', 'რაც დაგვრჩება ეგ იქნება ახალი რაოდენობა ასეულების', 'ახლა მოდით რაღაც განსხვავებული გავაკეთოთ', 'ის უკრავს და ზედმიწევნით სწავლობს ბლუზს', 'საინტერესოა გაგრძელდება თუ არა ასეთი კანონზომიერება უფრო დიდ რიცხვებშიც', 'მის შემქმნელად გერმანელი იმიგრანტი ჩარლზ ფელტმენი მიიჩნევა', 'ჰონგკონგელმა სტუდენტებმა ვიდეოთამაში შექმნეს რომელიც მიმდინარე პროცესებს ასახავს', 'მსახიობმა გლენ ქლოუსმა გაიმარჯვა ქალის დრამატული როლის საუკეთესო შემსრულებელთა ნომინაციაში ფილმით „ცოლი', 'თანამედროვე ციფრულმა ანიმაციამ დიდი ზეგავლენა მოახდინა ვიდეო თამაშების ინდუსტრიაზე', 'ერთ თვეში გავხსენი ეს რესტორანი ხუთ მაისს აღდგომას გაიხსნა']\n" ] }, { "data": { "text/plain": [ "'მადლობა ღმერთს გადავრჩით!'" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from datasets import Audio, Dataset, load_dataset, load_metric\n", "from transformers import AutoFeatureExtractor, pipeline\n", "\n", "dataset = load_dataset(\"mozilla-foundation/common_voice_7_0\", \"ka\", use_auth_token=True, split=\"train+validation\")\n", "\n", "# for testing: only process the first two examples as a test\n", "dataset = dataset.select(range(10))\n", "\n", "repo_name = 'infinitejoy/wav2vec2-large-xls-r-300m-georgian'\n", "\n", "# load processor\n", "feature_extractor = AutoFeatureExtractor.from_pretrained(repo_name)\n", "# feature_extractor = processor_with_lm.feature_extractor\n", "sampling_rate = feature_extractor.sampling_rate\n", "\n", "# resample audio\n", "dataset = dataset.cast_column(\"audio\", Audio(sampling_rate=sampling_rate))\n", "\n", "# load eval pipeline\n", "asr = pipeline(\"automatic-speech-recognition\", model=repo_name, feature_extractor=feature_extractor)\n", "\n", "# map function to decode audio\n", "def map_to_pred(batch):\n", " prediction = asr(\n", " batch[\"audio\"][\"array\"])\n", "\n", " batch[\"prediction\"] = prediction[\"text\"]\n", " batch[\"target\"] = batch[\"sentence\"]\n", " return batch\n", "\n", "# run inference on all examples\n", "result = dataset.map(map_to_pred, remove_columns=dataset.column_names)\n", "print(result[\"prediction\"])\n", "\n", "result[0]['target']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "accelerator": "GPU", "colab": { "authorship_tag": "ABX9TyM3OaMlm9YQtKpl28c8gBBd", "include_colab_link": true, "name": "DebugOVHTransformers.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" } }, "nbformat": 4, "nbformat_minor": 4 }