Upload 4 files

Browse files

Files changed (4) hide show

fingerprint.pb +3 -0
keras_metadata.pb +3 -0
mohirai_dataset.ipynb +271 -0
saved_model.pb +3 -0

fingerprint.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7f5e5f0e80de537e1005e7699585df8ceebbca2bc86432109b7f61c6a2bef77
+size 56

keras_metadata.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:100488d663b105f13a3cb912a8f7f6a7f9f5deb32f3b1b738fcb1185b2210605
+size 8505

mohirai_dataset.ipynb ADDED Viewed

	@@ -0,0 +1,271 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "outputs": [],
+   "source": [
+    "import pandas as pd \n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import torch"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-10-30T12:25:25.621457Z",
+     "start_time": "2023-10-30T12:25:23.392395Z"
+    }
+   },
+   "id": "93ea0b02ff2584f4"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "outputs": [],
+   "source": [
+    "rus = \"/Users/macbookpro/Downloads/cv-corpus-10.0-delta-2022-07-04/ru/clips\"\n",
+    "eng = \"/Users/macbookpro/Downloads/cv-corpus-11.0-delta-2022-09-21 2/en/clips\"\n",
+    "uzb = \"/Users/macbookpro/Downloads/cv-corpus-11.0-delta-2022-09-21/uz/clips\""
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-10-30T12:25:28.760740Z",
+     "start_time": "2023-10-30T12:25:28.547364Z"
+    }
+   },
+   "id": "99a2f44cd8bfc808"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "initial_id",
+   "metadata": {
+    "collapsed": true,
+    "ExecuteTime": {
+     "end_time": "2023-10-30T15:43:45.888685Z",
+     "start_time": "2023-10-30T15:43:45.481594Z"
+    }
+   },
+   "outputs": [
+    {
+     "ename": "RuntimeError",
+     "evalue": "No ffmpeg exe could be found. Install ffmpeg on your system, or set the IMAGEIO_FFMPEG_EXE environment variable.",
+     "output_type": "error",
+     "traceback": [
+      "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
+      "\u001B[0;31mRuntimeError\u001B[0m                              Traceback (most recent call last)",
+      "Cell \u001B[0;32mIn[36], line 2\u001B[0m\n\u001B[1;32m      1\u001B[0m \u001B[38;5;28;01mimport\u001B[39;00m \u001B[38;5;21;01mos\u001B[39;00m\n\u001B[0;32m----> 2\u001B[0m \u001B[38;5;28;01mimport\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01meditor\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m \u001B[38;5;21;01mmp\u001B[39;00m\n\u001B[1;32m      4\u001B[0m \u001B[38;5;66;03m# Directories containing MP3 files\u001B[39;00m\n\u001B[1;32m      5\u001B[0m uz_path \u001B[38;5;241m=\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m/Users/macbookpro/Downloads/destination_directory/uz\u001B[39m\u001B[38;5;124m\"\u001B[39m\n",
+      "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/moviepy/editor.py:36\u001B[0m\n\u001B[1;32m     33\u001B[0m os\u001B[38;5;241m.\u001B[39menviron[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mPYGAME_HIDE_SUPPORT_PROMPT\u001B[39m\u001B[38;5;124m'\u001B[39m] \u001B[38;5;241m=\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m1\u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m     35\u001B[0m \u001B[38;5;66;03m# Clips\u001B[39;00m\n\u001B[0;32m---> 36\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mvideo\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mVideoFileClip\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m VideoFileClip\n\u001B[1;32m     37\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mvideo\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mImageSequenceClip\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m ImageSequenceClip\n\u001B[1;32m     38\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mvideo\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mdownloader\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m download_webfile\n",
+      "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/moviepy/video/io/VideoFileClip.py:3\u001B[0m\n\u001B[1;32m      1\u001B[0m \u001B[38;5;28;01mimport\u001B[39;00m \u001B[38;5;21;01mos\u001B[39;00m\n\u001B[0;32m----> 3\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01maudio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mAudioFileClip\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m AudioFileClip\n\u001B[1;32m      4\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mClip\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m Clip\n\u001B[1;32m      5\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mvideo\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mffmpeg_reader\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m FFMPEG_VideoReader\n",
+      "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/moviepy/audio/io/AudioFileClip.py:3\u001B[0m\n\u001B[1;32m      1\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01m__future__\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m division\n\u001B[0;32m----> 3\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01maudio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mAudioClip\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m AudioClip\n\u001B[1;32m      4\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01maudio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mreaders\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m FFMPEG_AudioReader\n\u001B[1;32m      7\u001B[0m \u001B[38;5;28;01mclass\u001B[39;00m \u001B[38;5;21;01mAudioFileClip\u001B[39;00m(AudioClip):\n",
+      "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/moviepy/audio/AudioClip.py:7\u001B[0m\n\u001B[1;32m      4\u001B[0m \u001B[38;5;28;01mimport\u001B[39;00m \u001B[38;5;21;01mproglog\u001B[39;00m\n\u001B[1;32m      5\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mtqdm\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m tqdm\n\u001B[0;32m----> 7\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01maudio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mffmpeg_audiowriter\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m ffmpeg_audiowrite\n\u001B[1;32m      8\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mClip\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m Clip\n\u001B[1;32m      9\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mdecorators\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m requires_duration\n",
+      "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/moviepy/audio/io/ffmpeg_audiowriter.py:7\u001B[0m\n\u001B[1;32m      4\u001B[0m \u001B[38;5;28;01mimport\u001B[39;00m \u001B[38;5;21;01mproglog\u001B[39;00m\n\u001B[1;32m      6\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mcompat\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m DEVNULL\n\u001B[0;32m----> 7\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mconfig\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m get_setting\n\u001B[1;32m      8\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mdecorators\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m requires_duration\n\u001B[1;32m     11\u001B[0m \u001B[38;5;28;01mclass\u001B[39;00m \u001B[38;5;21;01mFFMPEG_AudioWriter\u001B[39;00m:\n",
+      "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/moviepy/config.py:36\u001B[0m\n\u001B[1;32m     34\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m FFMPEG_BINARY\u001B[38;5;241m==\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mffmpeg-imageio\u001B[39m\u001B[38;5;124m'\u001B[39m:\n\u001B[1;32m     35\u001B[0m     \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mimageio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mplugins\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mffmpeg\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m get_exe\n\u001B[0;32m---> 36\u001B[0m     FFMPEG_BINARY \u001B[38;5;241m=\u001B[39m get_exe()\n\u001B[1;32m     38\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m FFMPEG_BINARY\u001B[38;5;241m==\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mauto-detect\u001B[39m\u001B[38;5;124m'\u001B[39m:\n\u001B[1;32m     40\u001B[0m     \u001B[38;5;28;01mif\u001B[39;00m try_cmd([\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mffmpeg\u001B[39m\u001B[38;5;124m'\u001B[39m])[\u001B[38;5;241m0\u001B[39m]:\n",
+      "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/imageio/plugins/ffmpeg.py:173\u001B[0m, in \u001B[0;36mget_exe\u001B[0;34m()\u001B[0m\n\u001B[1;32m    170\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mget_exe\u001B[39m():  \u001B[38;5;66;03m# pragma: no cover\u001B[39;00m\n\u001B[1;32m    171\u001B[0m \u001B[38;5;250m    \u001B[39m\u001B[38;5;124;03m\"\"\"Wrapper for imageio_ffmpeg.get_ffmpeg_exe()\"\"\"\u001B[39;00m\n\u001B[0;32m--> 173\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m imageio_ffmpeg\u001B[38;5;241m.\u001B[39mget_ffmpeg_exe()\n",
+      "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/imageio_ffmpeg/_utils.py:34\u001B[0m, in \u001B[0;36mget_ffmpeg_exe\u001B[0;34m()\u001B[0m\n\u001B[1;32m     31\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m exe\n\u001B[1;32m     33\u001B[0m \u001B[38;5;66;03m# Nothing was found\u001B[39;00m\n\u001B[0;32m---> 34\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mRuntimeError\u001B[39;00m(\n\u001B[1;32m     35\u001B[0m     \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mNo ffmpeg exe could be found. Install ffmpeg on your system, \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m     36\u001B[0m     \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mor set the IMAGEIO_FFMPEG_EXE environment variable.\u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m     37\u001B[0m )\n",
+      "\u001B[0;31mRuntimeError\u001B[0m: No ffmpeg exe could be found. Install ffmpeg on your system, or set the IMAGEIO_FFMPEG_EXE environment variable."
+     ]
+    }
+   ],
+   "source": [
+    "!pip install tensorflow keras numpy\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/macbookpro/anaconda3/lib/python3.11/site-packages/transformers/configuration_utils.py:380: UserWarning: Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the `Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`.\n",
+      "  warnings.warn(\n",
+      "/Users/macbookpro/anaconda3/lib/python3.11/site-packages/torch/nn/utils/weight_norm.py:30: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.\n",
+      "  warnings.warn(\"torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.\")\n",
+      "Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2ForSequenceClassification: ['project_hid.bias', 'project_q.bias', 'quantizer.codevectors', 'project_hid.weight', 'quantizer.weight_proj.weight', 'project_q.weight', 'quantizer.weight_proj.bias']\n",
+      "- This IS expected if you are initializing Wav2Vec2ForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing Wav2Vec2ForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "Some weights of Wav2Vec2ForSequenceClassification were not initialized from the model checkpoint at facebook/wav2vec2-base and are newly initialized: ['classifier.weight', 'projector.bias', 'projector.weight', 'classifier.bias']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    },
+    {
+     "ename": "RuntimeError",
+     "evalue": "Couldn't find appropriate backend to handle uri /Users/macbookpro/Downloads/destination_directory/eng/common_voice_en_33673624.mp3 and format None.",
+     "output_type": "error",
+     "traceback": [
+      "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
+      "\u001B[0;31mRuntimeError\u001B[0m                              Traceback (most recent call last)",
+      "Cell \u001B[0;32mIn[20], line 73\u001B[0m\n\u001B[1;32m     71\u001B[0m model\u001B[38;5;241m.\u001B[39mtrain()\n\u001B[1;32m     72\u001B[0m total_loss \u001B[38;5;241m=\u001B[39m \u001B[38;5;241m0\u001B[39m\n\u001B[0;32m---> 73\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m batch \u001B[38;5;129;01min\u001B[39;00m dataloader:\n\u001B[1;32m     74\u001B[0m     inputs, labels \u001B[38;5;241m=\u001B[39m batch\n\u001B[1;32m     75\u001B[0m     inputs \u001B[38;5;241m=\u001B[39m {key: value\u001B[38;5;241m.\u001B[39msqueeze(\u001B[38;5;241m1\u001B[39m)\u001B[38;5;241m.\u001B[39mto(device) \u001B[38;5;28;01mfor\u001B[39;00m key, value \u001B[38;5;129;01min\u001B[39;00m inputs\u001B[38;5;241m.\u001B[39mitems()}\n",
+      "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/torch/utils/data/dataloader.py:630\u001B[0m, in \u001B[0;36m_BaseDataLoaderIter.__next__\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m    627\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_sampler_iter \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[1;32m    628\u001B[0m     \u001B[38;5;66;03m# TODO(https://github.com/pytorch/pytorch/issues/76750)\u001B[39;00m\n\u001B[1;32m    629\u001B[0m     \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_reset()  \u001B[38;5;66;03m# type: ignore[call-arg]\u001B[39;00m\n\u001B[0;32m--> 630\u001B[0m data \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_next_data()\n\u001B[1;32m    631\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_num_yielded \u001B[38;5;241m+\u001B[39m\u001B[38;5;241m=\u001B[39m \u001B[38;5;241m1\u001B[39m\n\u001B[1;32m    632\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_dataset_kind \u001B[38;5;241m==\u001B[39m _DatasetKind\u001B[38;5;241m.\u001B[39mIterable \u001B[38;5;129;01mand\u001B[39;00m \\\n\u001B[1;32m    633\u001B[0m         \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_IterableDataset_len_called \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m \u001B[38;5;129;01mand\u001B[39;00m \\\n\u001B[1;32m    634\u001B[0m         \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_num_yielded \u001B[38;5;241m>\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_IterableDataset_len_called:\n",
+      "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/torch/utils/data/dataloader.py:674\u001B[0m, in \u001B[0;36m_SingleProcessDataLoaderIter._next_data\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m    672\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21m_next_data\u001B[39m(\u001B[38;5;28mself\u001B[39m):\n\u001B[1;32m    673\u001B[0m     index \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_next_index()  \u001B[38;5;66;03m# may raise StopIteration\u001B[39;00m\n\u001B[0;32m--> 674\u001B[0m     data \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_dataset_fetcher\u001B[38;5;241m.\u001B[39mfetch(index)  \u001B[38;5;66;03m# may raise StopIteration\u001B[39;00m\n\u001B[1;32m    675\u001B[0m     \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_pin_memory:\n\u001B[1;32m    676\u001B[0m         data \u001B[38;5;241m=\u001B[39m _utils\u001B[38;5;241m.\u001B[39mpin_memory\u001B[38;5;241m.\u001B[39mpin_memory(data, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_pin_memory_device)\n",
+      "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py:51\u001B[0m, in \u001B[0;36m_MapDatasetFetcher.fetch\u001B[0;34m(self, possibly_batched_index)\u001B[0m\n\u001B[1;32m     49\u001B[0m         data \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdataset\u001B[38;5;241m.\u001B[39m__getitems__(possibly_batched_index)\n\u001B[1;32m     50\u001B[0m     \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m---> 51\u001B[0m         data \u001B[38;5;241m=\u001B[39m [\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdataset[idx] \u001B[38;5;28;01mfor\u001B[39;00m idx \u001B[38;5;129;01min\u001B[39;00m possibly_batched_index]\n\u001B[1;32m     52\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m     53\u001B[0m     data \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdataset[possibly_batched_index]\n",
+      "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py:51\u001B[0m, in \u001B[0;36m<listcomp>\u001B[0;34m(.0)\u001B[0m\n\u001B[1;32m     49\u001B[0m         data \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdataset\u001B[38;5;241m.\u001B[39m__getitems__(possibly_batched_index)\n\u001B[1;32m     50\u001B[0m     \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m---> 51\u001B[0m         data \u001B[38;5;241m=\u001B[39m [\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdataset[idx] \u001B[38;5;28;01mfor\u001B[39;00m idx \u001B[38;5;129;01min\u001B[39;00m possibly_batched_index]\n\u001B[1;32m     52\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m     53\u001B[0m     data \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdataset[possibly_batched_index]\n",
+      "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/torch/utils/data/dataset.py:302\u001B[0m, in \u001B[0;36mConcatDataset.__getitem__\u001B[0;34m(self, idx)\u001B[0m\n\u001B[1;32m    300\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m    301\u001B[0m     sample_idx \u001B[38;5;241m=\u001B[39m idx \u001B[38;5;241m-\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcumulative_sizes[dataset_idx \u001B[38;5;241m-\u001B[39m \u001B[38;5;241m1\u001B[39m]\n\u001B[0;32m--> 302\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdatasets[dataset_idx][sample_idx]\n",
+      "Cell \u001B[0;32mIn[20], line 32\u001B[0m, in \u001B[0;36mAudioDataset.__getitem__\u001B[0;34m(self, idx)\u001B[0m\n\u001B[1;32m     29\u001B[0m file_path \u001B[38;5;241m=\u001B[39m os\u001B[38;5;241m.\u001B[39mpath\u001B[38;5;241m.\u001B[39mjoin(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mpath, os\u001B[38;5;241m.\u001B[39mlistdir(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mpath)[idx])\n\u001B[1;32m     31\u001B[0m \u001B[38;5;66;03m# Load the audio waveform with the specified sampling rate\u001B[39;00m\n\u001B[0;32m---> 32\u001B[0m waveform, sample_rate \u001B[38;5;241m=\u001B[39m torchaudio\u001B[38;5;241m.\u001B[39mload(file_path, normalize\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m, num_frames\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mmax_seq_length)\n\u001B[1;32m     34\u001B[0m \u001B[38;5;66;03m# Pad or truncate the waveform to the specified max_seq_length\u001B[39;00m\n\u001B[1;32m     35\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m waveform\u001B[38;5;241m.\u001B[39msize(\u001B[38;5;241m1\u001B[39m) \u001B[38;5;241m<\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mmax_seq_length:\n\u001B[1;32m     36\u001B[0m     \u001B[38;5;66;03m# Pad if too short\u001B[39;00m\n",
+      "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/torchaudio/_backend/utils.py:202\u001B[0m, in \u001B[0;36mget_load_func.<locals>.load\u001B[0;34m(uri, frame_offset, num_frames, normalize, channels_first, format, buffer_size, backend)\u001B[0m\n\u001B[1;32m    116\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mload\u001B[39m(\n\u001B[1;32m    117\u001B[0m     uri: Union[BinaryIO, \u001B[38;5;28mstr\u001B[39m, os\u001B[38;5;241m.\u001B[39mPathLike],\n\u001B[1;32m    118\u001B[0m     frame_offset: \u001B[38;5;28mint\u001B[39m \u001B[38;5;241m=\u001B[39m \u001B[38;5;241m0\u001B[39m,\n\u001B[0;32m   (...)\u001B[0m\n\u001B[1;32m    124\u001B[0m     backend: Optional[\u001B[38;5;28mstr\u001B[39m] \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m,\n\u001B[1;32m    125\u001B[0m ) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m Tuple[torch\u001B[38;5;241m.\u001B[39mTensor, \u001B[38;5;28mint\u001B[39m]:\n\u001B[1;32m    126\u001B[0m \u001B[38;5;250m    \u001B[39m\u001B[38;5;124;03m\"\"\"Load audio data from source.\u001B[39;00m\n\u001B[1;32m    127\u001B[0m \n\u001B[1;32m    128\u001B[0m \u001B[38;5;124;03m    By default (``normalize=True``, ``channels_first=True``), this function returns Tensor with\u001B[39;00m\n\u001B[0;32m   (...)\u001B[0m\n\u001B[1;32m    200\u001B[0m \u001B[38;5;124;03m            `[channel, time]` else `[time, channel]`.\u001B[39;00m\n\u001B[1;32m    201\u001B[0m \u001B[38;5;124;03m    \"\"\"\u001B[39;00m\n\u001B[0;32m--> 202\u001B[0m     backend \u001B[38;5;241m=\u001B[39m dispatcher(uri, \u001B[38;5;28mformat\u001B[39m, backend)\n\u001B[1;32m    203\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m backend\u001B[38;5;241m.\u001B[39mload(uri, frame_offset, num_frames, normalize, channels_first, \u001B[38;5;28mformat\u001B[39m, buffer_size)\n",
+      "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/torchaudio/_backend/utils.py:114\u001B[0m, in \u001B[0;36mget_load_func.<locals>.dispatcher\u001B[0;34m(uri, format, backend_name)\u001B[0m\n\u001B[1;32m    112\u001B[0m     \u001B[38;5;28;01mif\u001B[39;00m backend\u001B[38;5;241m.\u001B[39mcan_decode(uri, \u001B[38;5;28mformat\u001B[39m):\n\u001B[1;32m    113\u001B[0m         \u001B[38;5;28;01mreturn\u001B[39;00m backend\n\u001B[0;32m--> 114\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mRuntimeError\u001B[39;00m(\u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mCouldn\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mt find appropriate backend to handle uri \u001B[39m\u001B[38;5;132;01m{\u001B[39;00muri\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m and format \u001B[39m\u001B[38;5;132;01m{\u001B[39;00m\u001B[38;5;28mformat\u001B[39m\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m.\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n",
+      "\u001B[0;31mRuntimeError\u001B[0m: Couldn't find appropriate backend to handle uri /Users/macbookpro/Downloads/destination_directory/eng/common_voice_en_33673624.mp3 and format None."
+     ]
+    }
+   ],
+   "source": [
+    "!pip install torch torchaudio torchaudio-nightly\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-10-30T15:08:59.919075Z",
+     "start_time": "2023-10-30T15:08:55.303752Z"
+    }
+   },
+   "id": "1b897f30b17fee38"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Selected and copied 800 files from each source directory to their respective destination directories.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import random\n",
+    "import shutil\n",
+    "\n",
+    "# Source directories\n",
+    "rus_audio_dir = \"/Users/macbookpro/Downloads/cv-corpus-10.0-delta-2022-07-04/ru/clips\"\n",
+    "eng_audio_dir = \"/Users/macbookpro/Downloads/cv-corpus-11.0-delta-2022-09-21 2/en/clips\"\n",
+    "uzb_audio_dir = \"/Users/macbookpro/Downloads/cv-corpus-11.0-delta-2022-09-21/uz/clips\"\n",
+    "\n",
+    "# Destination directories\n",
+    "rus_destination_dir = \"/Users/macbookpro/Downloads/selected_audio2/ru\"\n",
+    "eng_destination_dir = \"/Users/macbookpro/Downloads/selected_audio2/en\"\n",
+    "uzb_destination_dir = \"/Users/macbookpro/Downloads/selected_audio2/uz\"\n",
+    "\n",
+    "# Number of files to select from each source directory\n",
+    "num_files_to_select = 800\n",
+    "\n",
+    "# Ensure the destination directories exist\n",
+    "os.makedirs(rus_destination_dir, exist_ok=True)\n",
+    "os.makedirs(eng_destination_dir, exist_ok=True)\n",
+    "os.makedirs(uzb_destination_dir, exist_ok=True)\n",
+    "\n",
+    "# Helper function to copy files from source to destination\n",
+    "def copy_files(source_dir, destination_dir, num_files):\n",
+    "    all_files = os.listdir(source_dir)\n",
+    "    selected_files = random.sample(all_files, num_files)\n",
+    "    for file in selected_files:\n",
+    "        source_file_path = os.path.join(source_dir, file)\n",
+    "        destination_file_path = os.path.join(destination_dir, file)\n",
+    "        shutil.copy2(source_file_path, destination_file_path)\n",
+    "\n",
+    "# Copy 800 files from each source directory to their respective destination directories\n",
+    "copy_files(rus_audio_dir, rus_destination_dir, num_files_to_select)\n",
+    "copy_files(eng_audio_dir, eng_destination_dir, num_files_to_select)\n",
+    "copy_files(uzb_audio_dir, uzb_destination_dir, num_files_to_select)\n",
+    "\n",
+    "print(\"Selected and copied 800 files from each source directory to their respective destination directories.\")\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-10-30T15:57:38.041545Z",
+     "start_time": "2023-10-30T15:57:36.315193Z"
+    }
+   },
+   "id": "95300e7298cf8b48"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Selected and copied 2000 files to the destination directory.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import random\n",
+    "import shutil\n",
+    "\n",
+    "# Source directory\n",
+    "source_dir = \"/Users/macbookpro/Downloads/destination_directory/eng\"\n",
+    "\n",
+    "# Destination directory for the selected files\n",
+    "destination_dir = \"/Users/macbookpro/Downloads/destination_directory/eng_selected1\"\n",
+    "\n",
+    "# Number of files to select (2000 in this case)\n",
+    "num_files_to_select = 600\n",
+    "\n",
+    "# Ensure the destination directory exists\n",
+    "os.makedirs(destination_dir, exist_ok=True)\n",
+    "\n",
+    "# List all files in the source directory\n",
+    "all_files = os.listdir(source_dir)\n",
+    "\n",
+    "# Randomly select num_files_to_select files\n",
+    "selected_files = random.sample(all_files, num_files_to_select)\n",
+    "\n",
+    "# Copy the selected files to the destination directory\n",
+    "for file in selected_files:\n",
+    "    source_file_path = os.path.join(source_dir, file)\n",
+    "    destination_file_path = os.path.join(destination_dir, file)\n",
+    "    shutil.copy2(source_file_path, destination_file_path)\n",
+    "\n",
+    "print(\"Selected and copied 2000 files to the destination directory.\")\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-10-30T15:55:06.078528Z",
+     "start_time": "2023-10-30T15:55:05.786275Z"
+    }
+   },
+   "id": "5598c4da8ce84f"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "ae9d0b9cfd931697"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

saved_model.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9bd307be2af5f2036e47edbc7d4c6df6a093b4c8f0a49a018d818dc3bb9ce734
+size 101880