Jurabek commited on
Commit
b2a1edd
·
1 Parent(s): 91b631c

Upload 4 files

Browse files
Files changed (4) hide show
  1. fingerprint.pb +3 -0
  2. keras_metadata.pb +3 -0
  3. mohirai_dataset.ipynb +271 -0
  4. saved_model.pb +3 -0
fingerprint.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7f5e5f0e80de537e1005e7699585df8ceebbca2bc86432109b7f61c6a2bef77
3
+ size 56
keras_metadata.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:100488d663b105f13a3cb912a8f7f6a7f9f5deb32f3b1b738fcb1185b2210605
3
+ size 8505
mohirai_dataset.ipynb ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "outputs": [],
7
+ "source": [
8
+ "import pandas as pd \n",
9
+ "import matplotlib.pyplot as plt\n",
10
+ "import numpy as np\n",
11
+ "import torch"
12
+ ],
13
+ "metadata": {
14
+ "collapsed": false,
15
+ "ExecuteTime": {
16
+ "end_time": "2023-10-30T12:25:25.621457Z",
17
+ "start_time": "2023-10-30T12:25:23.392395Z"
18
+ }
19
+ },
20
+ "id": "93ea0b02ff2584f4"
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": 2,
25
+ "outputs": [],
26
+ "source": [
27
+ "rus = \"/Users/macbookpro/Downloads/cv-corpus-10.0-delta-2022-07-04/ru/clips\"\n",
28
+ "eng = \"/Users/macbookpro/Downloads/cv-corpus-11.0-delta-2022-09-21 2/en/clips\"\n",
29
+ "uzb = \"/Users/macbookpro/Downloads/cv-corpus-11.0-delta-2022-09-21/uz/clips\""
30
+ ],
31
+ "metadata": {
32
+ "collapsed": false,
33
+ "ExecuteTime": {
34
+ "end_time": "2023-10-30T12:25:28.760740Z",
35
+ "start_time": "2023-10-30T12:25:28.547364Z"
36
+ }
37
+ },
38
+ "id": "99a2f44cd8bfc808"
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "execution_count": 36,
43
+ "id": "initial_id",
44
+ "metadata": {
45
+ "collapsed": true,
46
+ "ExecuteTime": {
47
+ "end_time": "2023-10-30T15:43:45.888685Z",
48
+ "start_time": "2023-10-30T15:43:45.481594Z"
49
+ }
50
+ },
51
+ "outputs": [
52
+ {
53
+ "ename": "RuntimeError",
54
+ "evalue": "No ffmpeg exe could be found. Install ffmpeg on your system, or set the IMAGEIO_FFMPEG_EXE environment variable.",
55
+ "output_type": "error",
56
+ "traceback": [
57
+ "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
58
+ "\u001B[0;31mRuntimeError\u001B[0m Traceback (most recent call last)",
59
+ "Cell \u001B[0;32mIn[36], line 2\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[38;5;28;01mimport\u001B[39;00m \u001B[38;5;21;01mos\u001B[39;00m\n\u001B[0;32m----> 2\u001B[0m \u001B[38;5;28;01mimport\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01meditor\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m \u001B[38;5;21;01mmp\u001B[39;00m\n\u001B[1;32m 4\u001B[0m \u001B[38;5;66;03m# Directories containing MP3 files\u001B[39;00m\n\u001B[1;32m 5\u001B[0m uz_path \u001B[38;5;241m=\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m/Users/macbookpro/Downloads/destination_directory/uz\u001B[39m\u001B[38;5;124m\"\u001B[39m\n",
60
+ "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/moviepy/editor.py:36\u001B[0m\n\u001B[1;32m 33\u001B[0m os\u001B[38;5;241m.\u001B[39menviron[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mPYGAME_HIDE_SUPPORT_PROMPT\u001B[39m\u001B[38;5;124m'\u001B[39m] \u001B[38;5;241m=\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m1\u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m 35\u001B[0m \u001B[38;5;66;03m# Clips\u001B[39;00m\n\u001B[0;32m---> 36\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mvideo\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mVideoFileClip\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m VideoFileClip\n\u001B[1;32m 37\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mvideo\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mImageSequenceClip\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m ImageSequenceClip\n\u001B[1;32m 38\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mvideo\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mdownloader\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m download_webfile\n",
61
+ "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/moviepy/video/io/VideoFileClip.py:3\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[38;5;28;01mimport\u001B[39;00m \u001B[38;5;21;01mos\u001B[39;00m\n\u001B[0;32m----> 3\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01maudio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mAudioFileClip\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m AudioFileClip\n\u001B[1;32m 4\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mClip\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m Clip\n\u001B[1;32m 5\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mvideo\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mffmpeg_reader\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m FFMPEG_VideoReader\n",
62
+ "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/moviepy/audio/io/AudioFileClip.py:3\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01m__future__\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m division\n\u001B[0;32m----> 3\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01maudio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mAudioClip\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m AudioClip\n\u001B[1;32m 4\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01maudio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mreaders\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m FFMPEG_AudioReader\n\u001B[1;32m 7\u001B[0m \u001B[38;5;28;01mclass\u001B[39;00m \u001B[38;5;21;01mAudioFileClip\u001B[39;00m(AudioClip):\n",
63
+ "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/moviepy/audio/AudioClip.py:7\u001B[0m\n\u001B[1;32m 4\u001B[0m \u001B[38;5;28;01mimport\u001B[39;00m \u001B[38;5;21;01mproglog\u001B[39;00m\n\u001B[1;32m 5\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mtqdm\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m tqdm\n\u001B[0;32m----> 7\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01maudio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mffmpeg_audiowriter\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m ffmpeg_audiowrite\n\u001B[1;32m 8\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mClip\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m Clip\n\u001B[1;32m 9\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mdecorators\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m requires_duration\n",
64
+ "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/moviepy/audio/io/ffmpeg_audiowriter.py:7\u001B[0m\n\u001B[1;32m 4\u001B[0m \u001B[38;5;28;01mimport\u001B[39;00m \u001B[38;5;21;01mproglog\u001B[39;00m\n\u001B[1;32m 6\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mcompat\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m DEVNULL\n\u001B[0;32m----> 7\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mconfig\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m get_setting\n\u001B[1;32m 8\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mdecorators\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m requires_duration\n\u001B[1;32m 11\u001B[0m \u001B[38;5;28;01mclass\u001B[39;00m \u001B[38;5;21;01mFFMPEG_AudioWriter\u001B[39;00m:\n",
65
+ "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/moviepy/config.py:36\u001B[0m\n\u001B[1;32m 34\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m FFMPEG_BINARY\u001B[38;5;241m==\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mffmpeg-imageio\u001B[39m\u001B[38;5;124m'\u001B[39m:\n\u001B[1;32m 35\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mimageio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mplugins\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mffmpeg\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m get_exe\n\u001B[0;32m---> 36\u001B[0m FFMPEG_BINARY \u001B[38;5;241m=\u001B[39m get_exe()\n\u001B[1;32m 38\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m FFMPEG_BINARY\u001B[38;5;241m==\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mauto-detect\u001B[39m\u001B[38;5;124m'\u001B[39m:\n\u001B[1;32m 40\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m try_cmd([\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mffmpeg\u001B[39m\u001B[38;5;124m'\u001B[39m])[\u001B[38;5;241m0\u001B[39m]:\n",
66
+ "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/imageio/plugins/ffmpeg.py:173\u001B[0m, in \u001B[0;36mget_exe\u001B[0;34m()\u001B[0m\n\u001B[1;32m 170\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mget_exe\u001B[39m(): \u001B[38;5;66;03m# pragma: no cover\u001B[39;00m\n\u001B[1;32m 171\u001B[0m \u001B[38;5;250m \u001B[39m\u001B[38;5;124;03m\"\"\"Wrapper for imageio_ffmpeg.get_ffmpeg_exe()\"\"\"\u001B[39;00m\n\u001B[0;32m--> 173\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m imageio_ffmpeg\u001B[38;5;241m.\u001B[39mget_ffmpeg_exe()\n",
67
+ "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/imageio_ffmpeg/_utils.py:34\u001B[0m, in \u001B[0;36mget_ffmpeg_exe\u001B[0;34m()\u001B[0m\n\u001B[1;32m 31\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m exe\n\u001B[1;32m 33\u001B[0m \u001B[38;5;66;03m# Nothing was found\u001B[39;00m\n\u001B[0;32m---> 34\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mRuntimeError\u001B[39;00m(\n\u001B[1;32m 35\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mNo ffmpeg exe could be found. Install ffmpeg on your system, \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m 36\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mor set the IMAGEIO_FFMPEG_EXE environment variable.\u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m 37\u001B[0m )\n",
68
+ "\u001B[0;31mRuntimeError\u001B[0m: No ffmpeg exe could be found. Install ffmpeg on your system, or set the IMAGEIO_FFMPEG_EXE environment variable."
69
+ ]
70
+ }
71
+ ],
72
+ "source": [
73
+ "!pip install tensorflow keras numpy\n"
74
+ ]
75
+ },
76
+ {
77
+ "cell_type": "code",
78
+ "execution_count": 20,
79
+ "outputs": [
80
+ {
81
+ "name": "stderr",
82
+ "output_type": "stream",
83
+ "text": [
84
+ "/Users/macbookpro/anaconda3/lib/python3.11/site-packages/transformers/configuration_utils.py:380: UserWarning: Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the `Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`.\n",
85
+ " warnings.warn(\n",
86
+ "/Users/macbookpro/anaconda3/lib/python3.11/site-packages/torch/nn/utils/weight_norm.py:30: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.\n",
87
+ " warnings.warn(\"torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.\")\n",
88
+ "Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2ForSequenceClassification: ['project_hid.bias', 'project_q.bias', 'quantizer.codevectors', 'project_hid.weight', 'quantizer.weight_proj.weight', 'project_q.weight', 'quantizer.weight_proj.bias']\n",
89
+ "- This IS expected if you are initializing Wav2Vec2ForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
90
+ "- This IS NOT expected if you are initializing Wav2Vec2ForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
91
+ "Some weights of Wav2Vec2ForSequenceClassification were not initialized from the model checkpoint at facebook/wav2vec2-base and are newly initialized: ['classifier.weight', 'projector.bias', 'projector.weight', 'classifier.bias']\n",
92
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
93
+ ]
94
+ },
95
+ {
96
+ "ename": "RuntimeError",
97
+ "evalue": "Couldn't find appropriate backend to handle uri /Users/macbookpro/Downloads/destination_directory/eng/common_voice_en_33673624.mp3 and format None.",
98
+ "output_type": "error",
99
+ "traceback": [
100
+ "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
101
+ "\u001B[0;31mRuntimeError\u001B[0m Traceback (most recent call last)",
102
+ "Cell \u001B[0;32mIn[20], line 73\u001B[0m\n\u001B[1;32m 71\u001B[0m model\u001B[38;5;241m.\u001B[39mtrain()\n\u001B[1;32m 72\u001B[0m total_loss \u001B[38;5;241m=\u001B[39m \u001B[38;5;241m0\u001B[39m\n\u001B[0;32m---> 73\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m batch \u001B[38;5;129;01min\u001B[39;00m dataloader:\n\u001B[1;32m 74\u001B[0m inputs, labels \u001B[38;5;241m=\u001B[39m batch\n\u001B[1;32m 75\u001B[0m inputs \u001B[38;5;241m=\u001B[39m {key: value\u001B[38;5;241m.\u001B[39msqueeze(\u001B[38;5;241m1\u001B[39m)\u001B[38;5;241m.\u001B[39mto(device) \u001B[38;5;28;01mfor\u001B[39;00m key, value \u001B[38;5;129;01min\u001B[39;00m inputs\u001B[38;5;241m.\u001B[39mitems()}\n",
103
+ "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/torch/utils/data/dataloader.py:630\u001B[0m, in \u001B[0;36m_BaseDataLoaderIter.__next__\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 627\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_sampler_iter \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[1;32m 628\u001B[0m \u001B[38;5;66;03m# TODO(https://github.com/pytorch/pytorch/issues/76750)\u001B[39;00m\n\u001B[1;32m 629\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_reset() \u001B[38;5;66;03m# type: ignore[call-arg]\u001B[39;00m\n\u001B[0;32m--> 630\u001B[0m data \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_next_data()\n\u001B[1;32m 631\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_num_yielded \u001B[38;5;241m+\u001B[39m\u001B[38;5;241m=\u001B[39m \u001B[38;5;241m1\u001B[39m\n\u001B[1;32m 632\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_dataset_kind \u001B[38;5;241m==\u001B[39m _DatasetKind\u001B[38;5;241m.\u001B[39mIterable \u001B[38;5;129;01mand\u001B[39;00m \\\n\u001B[1;32m 633\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_IterableDataset_len_called \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m \u001B[38;5;129;01mand\u001B[39;00m \\\n\u001B[1;32m 634\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_num_yielded \u001B[38;5;241m>\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_IterableDataset_len_called:\n",
104
+ "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/torch/utils/data/dataloader.py:674\u001B[0m, in \u001B[0;36m_SingleProcessDataLoaderIter._next_data\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 672\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21m_next_data\u001B[39m(\u001B[38;5;28mself\u001B[39m):\n\u001B[1;32m 673\u001B[0m index \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_next_index() \u001B[38;5;66;03m# may raise StopIteration\u001B[39;00m\n\u001B[0;32m--> 674\u001B[0m data \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_dataset_fetcher\u001B[38;5;241m.\u001B[39mfetch(index) \u001B[38;5;66;03m# may raise StopIteration\u001B[39;00m\n\u001B[1;32m 675\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_pin_memory:\n\u001B[1;32m 676\u001B[0m data \u001B[38;5;241m=\u001B[39m _utils\u001B[38;5;241m.\u001B[39mpin_memory\u001B[38;5;241m.\u001B[39mpin_memory(data, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_pin_memory_device)\n",
105
+ "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py:51\u001B[0m, in \u001B[0;36m_MapDatasetFetcher.fetch\u001B[0;34m(self, possibly_batched_index)\u001B[0m\n\u001B[1;32m 49\u001B[0m data \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdataset\u001B[38;5;241m.\u001B[39m__getitems__(possibly_batched_index)\n\u001B[1;32m 50\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m---> 51\u001B[0m data \u001B[38;5;241m=\u001B[39m [\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdataset[idx] \u001B[38;5;28;01mfor\u001B[39;00m idx \u001B[38;5;129;01min\u001B[39;00m possibly_batched_index]\n\u001B[1;32m 52\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m 53\u001B[0m data \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdataset[possibly_batched_index]\n",
106
+ "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py:51\u001B[0m, in \u001B[0;36m<listcomp>\u001B[0;34m(.0)\u001B[0m\n\u001B[1;32m 49\u001B[0m data \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdataset\u001B[38;5;241m.\u001B[39m__getitems__(possibly_batched_index)\n\u001B[1;32m 50\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m---> 51\u001B[0m data \u001B[38;5;241m=\u001B[39m [\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdataset[idx] \u001B[38;5;28;01mfor\u001B[39;00m idx \u001B[38;5;129;01min\u001B[39;00m possibly_batched_index]\n\u001B[1;32m 52\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m 53\u001B[0m data \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdataset[possibly_batched_index]\n",
107
+ "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/torch/utils/data/dataset.py:302\u001B[0m, in \u001B[0;36mConcatDataset.__getitem__\u001B[0;34m(self, idx)\u001B[0m\n\u001B[1;32m 300\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m 301\u001B[0m sample_idx \u001B[38;5;241m=\u001B[39m idx \u001B[38;5;241m-\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcumulative_sizes[dataset_idx \u001B[38;5;241m-\u001B[39m \u001B[38;5;241m1\u001B[39m]\n\u001B[0;32m--> 302\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdatasets[dataset_idx][sample_idx]\n",
108
+ "Cell \u001B[0;32mIn[20], line 32\u001B[0m, in \u001B[0;36mAudioDataset.__getitem__\u001B[0;34m(self, idx)\u001B[0m\n\u001B[1;32m 29\u001B[0m file_path \u001B[38;5;241m=\u001B[39m os\u001B[38;5;241m.\u001B[39mpath\u001B[38;5;241m.\u001B[39mjoin(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mpath, os\u001B[38;5;241m.\u001B[39mlistdir(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mpath)[idx])\n\u001B[1;32m 31\u001B[0m \u001B[38;5;66;03m# Load the audio waveform with the specified sampling rate\u001B[39;00m\n\u001B[0;32m---> 32\u001B[0m waveform, sample_rate \u001B[38;5;241m=\u001B[39m torchaudio\u001B[38;5;241m.\u001B[39mload(file_path, normalize\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m, num_frames\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mmax_seq_length)\n\u001B[1;32m 34\u001B[0m \u001B[38;5;66;03m# Pad or truncate the waveform to the specified max_seq_length\u001B[39;00m\n\u001B[1;32m 35\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m waveform\u001B[38;5;241m.\u001B[39msize(\u001B[38;5;241m1\u001B[39m) \u001B[38;5;241m<\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mmax_seq_length:\n\u001B[1;32m 36\u001B[0m \u001B[38;5;66;03m# Pad if too short\u001B[39;00m\n",
109
+ "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/torchaudio/_backend/utils.py:202\u001B[0m, in \u001B[0;36mget_load_func.<locals>.load\u001B[0;34m(uri, frame_offset, num_frames, normalize, channels_first, format, buffer_size, backend)\u001B[0m\n\u001B[1;32m 116\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mload\u001B[39m(\n\u001B[1;32m 117\u001B[0m uri: Union[BinaryIO, \u001B[38;5;28mstr\u001B[39m, os\u001B[38;5;241m.\u001B[39mPathLike],\n\u001B[1;32m 118\u001B[0m frame_offset: \u001B[38;5;28mint\u001B[39m \u001B[38;5;241m=\u001B[39m \u001B[38;5;241m0\u001B[39m,\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 124\u001B[0m backend: Optional[\u001B[38;5;28mstr\u001B[39m] \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m,\n\u001B[1;32m 125\u001B[0m ) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m Tuple[torch\u001B[38;5;241m.\u001B[39mTensor, \u001B[38;5;28mint\u001B[39m]:\n\u001B[1;32m 126\u001B[0m \u001B[38;5;250m \u001B[39m\u001B[38;5;124;03m\"\"\"Load audio data from source.\u001B[39;00m\n\u001B[1;32m 127\u001B[0m \n\u001B[1;32m 128\u001B[0m \u001B[38;5;124;03m By default (``normalize=True``, ``channels_first=True``), this function returns Tensor with\u001B[39;00m\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 200\u001B[0m \u001B[38;5;124;03m `[channel, time]` else `[time, channel]`.\u001B[39;00m\n\u001B[1;32m 201\u001B[0m \u001B[38;5;124;03m \"\"\"\u001B[39;00m\n\u001B[0;32m--> 202\u001B[0m backend \u001B[38;5;241m=\u001B[39m dispatcher(uri, \u001B[38;5;28mformat\u001B[39m, backend)\n\u001B[1;32m 203\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m backend\u001B[38;5;241m.\u001B[39mload(uri, frame_offset, num_frames, normalize, channels_first, \u001B[38;5;28mformat\u001B[39m, buffer_size)\n",
110
+ "File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/torchaudio/_backend/utils.py:114\u001B[0m, in \u001B[0;36mget_load_func.<locals>.dispatcher\u001B[0;34m(uri, format, backend_name)\u001B[0m\n\u001B[1;32m 112\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m backend\u001B[38;5;241m.\u001B[39mcan_decode(uri, \u001B[38;5;28mformat\u001B[39m):\n\u001B[1;32m 113\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m backend\n\u001B[0;32m--> 114\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mRuntimeError\u001B[39;00m(\u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mCouldn\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mt find appropriate backend to handle uri \u001B[39m\u001B[38;5;132;01m{\u001B[39;00muri\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m and format \u001B[39m\u001B[38;5;132;01m{\u001B[39;00m\u001B[38;5;28mformat\u001B[39m\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m.\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n",
111
+ "\u001B[0;31mRuntimeError\u001B[0m: Couldn't find appropriate backend to handle uri /Users/macbookpro/Downloads/destination_directory/eng/common_voice_en_33673624.mp3 and format None."
112
+ ]
113
+ }
114
+ ],
115
+ "source": [
116
+ "!pip install torch torchaudio torchaudio-nightly\n"
117
+ ],
118
+ "metadata": {
119
+ "collapsed": false,
120
+ "ExecuteTime": {
121
+ "end_time": "2023-10-30T15:08:59.919075Z",
122
+ "start_time": "2023-10-30T15:08:55.303752Z"
123
+ }
124
+ },
125
+ "id": "1b897f30b17fee38"
126
+ },
127
+ {
128
+ "cell_type": "code",
129
+ "execution_count": 40,
130
+ "outputs": [
131
+ {
132
+ "name": "stdout",
133
+ "output_type": "stream",
134
+ "text": [
135
+ "Selected and copied 800 files from each source directory to their respective destination directories.\n"
136
+ ]
137
+ }
138
+ ],
139
+ "source": [
140
+ "import os\n",
141
+ "import random\n",
142
+ "import shutil\n",
143
+ "\n",
144
+ "# Source directories\n",
145
+ "rus_audio_dir = \"/Users/macbookpro/Downloads/cv-corpus-10.0-delta-2022-07-04/ru/clips\"\n",
146
+ "eng_audio_dir = \"/Users/macbookpro/Downloads/cv-corpus-11.0-delta-2022-09-21 2/en/clips\"\n",
147
+ "uzb_audio_dir = \"/Users/macbookpro/Downloads/cv-corpus-11.0-delta-2022-09-21/uz/clips\"\n",
148
+ "\n",
149
+ "# Destination directories\n",
150
+ "rus_destination_dir = \"/Users/macbookpro/Downloads/selected_audio2/ru\"\n",
151
+ "eng_destination_dir = \"/Users/macbookpro/Downloads/selected_audio2/en\"\n",
152
+ "uzb_destination_dir = \"/Users/macbookpro/Downloads/selected_audio2/uz\"\n",
153
+ "\n",
154
+ "# Number of files to select from each source directory\n",
155
+ "num_files_to_select = 800\n",
156
+ "\n",
157
+ "# Ensure the destination directories exist\n",
158
+ "os.makedirs(rus_destination_dir, exist_ok=True)\n",
159
+ "os.makedirs(eng_destination_dir, exist_ok=True)\n",
160
+ "os.makedirs(uzb_destination_dir, exist_ok=True)\n",
161
+ "\n",
162
+ "# Helper function to copy files from source to destination\n",
163
+ "def copy_files(source_dir, destination_dir, num_files):\n",
164
+ " all_files = os.listdir(source_dir)\n",
165
+ " selected_files = random.sample(all_files, num_files)\n",
166
+ " for file in selected_files:\n",
167
+ " source_file_path = os.path.join(source_dir, file)\n",
168
+ " destination_file_path = os.path.join(destination_dir, file)\n",
169
+ " shutil.copy2(source_file_path, destination_file_path)\n",
170
+ "\n",
171
+ "# Copy 800 files from each source directory to their respective destination directories\n",
172
+ "copy_files(rus_audio_dir, rus_destination_dir, num_files_to_select)\n",
173
+ "copy_files(eng_audio_dir, eng_destination_dir, num_files_to_select)\n",
174
+ "copy_files(uzb_audio_dir, uzb_destination_dir, num_files_to_select)\n",
175
+ "\n",
176
+ "print(\"Selected and copied 800 files from each source directory to their respective destination directories.\")\n"
177
+ ],
178
+ "metadata": {
179
+ "collapsed": false,
180
+ "ExecuteTime": {
181
+ "end_time": "2023-10-30T15:57:38.041545Z",
182
+ "start_time": "2023-10-30T15:57:36.315193Z"
183
+ }
184
+ },
185
+ "id": "95300e7298cf8b48"
186
+ },
187
+ {
188
+ "cell_type": "code",
189
+ "execution_count": 39,
190
+ "outputs": [
191
+ {
192
+ "name": "stdout",
193
+ "output_type": "stream",
194
+ "text": [
195
+ "Selected and copied 2000 files to the destination directory.\n"
196
+ ]
197
+ }
198
+ ],
199
+ "source": [
200
+ "import os\n",
201
+ "import random\n",
202
+ "import shutil\n",
203
+ "\n",
204
+ "# Source directory\n",
205
+ "source_dir = \"/Users/macbookpro/Downloads/destination_directory/eng\"\n",
206
+ "\n",
207
+ "# Destination directory for the selected files\n",
208
+ "destination_dir = \"/Users/macbookpro/Downloads/destination_directory/eng_selected1\"\n",
209
+ "\n",
210
+ "# Number of files to select (2000 in this case)\n",
211
+ "num_files_to_select = 600\n",
212
+ "\n",
213
+ "# Ensure the destination directory exists\n",
214
+ "os.makedirs(destination_dir, exist_ok=True)\n",
215
+ "\n",
216
+ "# List all files in the source directory\n",
217
+ "all_files = os.listdir(source_dir)\n",
218
+ "\n",
219
+ "# Randomly select num_files_to_select files\n",
220
+ "selected_files = random.sample(all_files, num_files_to_select)\n",
221
+ "\n",
222
+ "# Copy the selected files to the destination directory\n",
223
+ "for file in selected_files:\n",
224
+ " source_file_path = os.path.join(source_dir, file)\n",
225
+ " destination_file_path = os.path.join(destination_dir, file)\n",
226
+ " shutil.copy2(source_file_path, destination_file_path)\n",
227
+ "\n",
228
+ "print(\"Selected and copied 2000 files to the destination directory.\")\n"
229
+ ],
230
+ "metadata": {
231
+ "collapsed": false,
232
+ "ExecuteTime": {
233
+ "end_time": "2023-10-30T15:55:06.078528Z",
234
+ "start_time": "2023-10-30T15:55:05.786275Z"
235
+ }
236
+ },
237
+ "id": "5598c4da8ce84f"
238
+ },
239
+ {
240
+ "cell_type": "code",
241
+ "execution_count": null,
242
+ "outputs": [],
243
+ "source": [],
244
+ "metadata": {
245
+ "collapsed": false
246
+ },
247
+ "id": "ae9d0b9cfd931697"
248
+ }
249
+ ],
250
+ "metadata": {
251
+ "kernelspec": {
252
+ "display_name": "Python 3",
253
+ "language": "python",
254
+ "name": "python3"
255
+ },
256
+ "language_info": {
257
+ "codemirror_mode": {
258
+ "name": "ipython",
259
+ "version": 2
260
+ },
261
+ "file_extension": ".py",
262
+ "mimetype": "text/x-python",
263
+ "name": "python",
264
+ "nbconvert_exporter": "python",
265
+ "pygments_lexer": "ipython2",
266
+ "version": "2.7.6"
267
+ }
268
+ },
269
+ "nbformat": 4,
270
+ "nbformat_minor": 5
271
+ }
saved_model.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bd307be2af5f2036e47edbc7d4c6df6a093b4c8f0a49a018d818dc3bb9ce734
3
+ size 101880