Upload 4 files
Browse files- fingerprint.pb +3 -0
- keras_metadata.pb +3 -0
- mohirai_dataset.ipynb +271 -0
- saved_model.pb +3 -0
fingerprint.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7f5e5f0e80de537e1005e7699585df8ceebbca2bc86432109b7f61c6a2bef77
|
3 |
+
size 56
|
keras_metadata.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:100488d663b105f13a3cb912a8f7f6a7f9f5deb32f3b1b738fcb1185b2210605
|
3 |
+
size 8505
|
mohirai_dataset.ipynb
ADDED
@@ -0,0 +1,271 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"outputs": [],
|
7 |
+
"source": [
|
8 |
+
"import pandas as pd \n",
|
9 |
+
"import matplotlib.pyplot as plt\n",
|
10 |
+
"import numpy as np\n",
|
11 |
+
"import torch"
|
12 |
+
],
|
13 |
+
"metadata": {
|
14 |
+
"collapsed": false,
|
15 |
+
"ExecuteTime": {
|
16 |
+
"end_time": "2023-10-30T12:25:25.621457Z",
|
17 |
+
"start_time": "2023-10-30T12:25:23.392395Z"
|
18 |
+
}
|
19 |
+
},
|
20 |
+
"id": "93ea0b02ff2584f4"
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"cell_type": "code",
|
24 |
+
"execution_count": 2,
|
25 |
+
"outputs": [],
|
26 |
+
"source": [
|
27 |
+
"rus = \"/Users/macbookpro/Downloads/cv-corpus-10.0-delta-2022-07-04/ru/clips\"\n",
|
28 |
+
"eng = \"/Users/macbookpro/Downloads/cv-corpus-11.0-delta-2022-09-21 2/en/clips\"\n",
|
29 |
+
"uzb = \"/Users/macbookpro/Downloads/cv-corpus-11.0-delta-2022-09-21/uz/clips\""
|
30 |
+
],
|
31 |
+
"metadata": {
|
32 |
+
"collapsed": false,
|
33 |
+
"ExecuteTime": {
|
34 |
+
"end_time": "2023-10-30T12:25:28.760740Z",
|
35 |
+
"start_time": "2023-10-30T12:25:28.547364Z"
|
36 |
+
}
|
37 |
+
},
|
38 |
+
"id": "99a2f44cd8bfc808"
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"cell_type": "code",
|
42 |
+
"execution_count": 36,
|
43 |
+
"id": "initial_id",
|
44 |
+
"metadata": {
|
45 |
+
"collapsed": true,
|
46 |
+
"ExecuteTime": {
|
47 |
+
"end_time": "2023-10-30T15:43:45.888685Z",
|
48 |
+
"start_time": "2023-10-30T15:43:45.481594Z"
|
49 |
+
}
|
50 |
+
},
|
51 |
+
"outputs": [
|
52 |
+
{
|
53 |
+
"ename": "RuntimeError",
|
54 |
+
"evalue": "No ffmpeg exe could be found. Install ffmpeg on your system, or set the IMAGEIO_FFMPEG_EXE environment variable.",
|
55 |
+
"output_type": "error",
|
56 |
+
"traceback": [
|
57 |
+
"\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
|
58 |
+
"\u001B[0;31mRuntimeError\u001B[0m Traceback (most recent call last)",
|
59 |
+
"Cell \u001B[0;32mIn[36], line 2\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[38;5;28;01mimport\u001B[39;00m \u001B[38;5;21;01mos\u001B[39;00m\n\u001B[0;32m----> 2\u001B[0m \u001B[38;5;28;01mimport\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01meditor\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m \u001B[38;5;21;01mmp\u001B[39;00m\n\u001B[1;32m 4\u001B[0m \u001B[38;5;66;03m# Directories containing MP3 files\u001B[39;00m\n\u001B[1;32m 5\u001B[0m uz_path \u001B[38;5;241m=\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m/Users/macbookpro/Downloads/destination_directory/uz\u001B[39m\u001B[38;5;124m\"\u001B[39m\n",
|
60 |
+
"File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/moviepy/editor.py:36\u001B[0m\n\u001B[1;32m 33\u001B[0m os\u001B[38;5;241m.\u001B[39menviron[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mPYGAME_HIDE_SUPPORT_PROMPT\u001B[39m\u001B[38;5;124m'\u001B[39m] \u001B[38;5;241m=\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m1\u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m 35\u001B[0m \u001B[38;5;66;03m# Clips\u001B[39;00m\n\u001B[0;32m---> 36\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mvideo\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mVideoFileClip\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m VideoFileClip\n\u001B[1;32m 37\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mvideo\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mImageSequenceClip\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m ImageSequenceClip\n\u001B[1;32m 38\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mvideo\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mdownloader\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m download_webfile\n",
|
61 |
+
"File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/moviepy/video/io/VideoFileClip.py:3\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[38;5;28;01mimport\u001B[39;00m \u001B[38;5;21;01mos\u001B[39;00m\n\u001B[0;32m----> 3\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01maudio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mAudioFileClip\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m AudioFileClip\n\u001B[1;32m 4\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mClip\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m Clip\n\u001B[1;32m 5\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mvideo\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mffmpeg_reader\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m FFMPEG_VideoReader\n",
|
62 |
+
"File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/moviepy/audio/io/AudioFileClip.py:3\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01m__future__\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m division\n\u001B[0;32m----> 3\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01maudio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mAudioClip\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m AudioClip\n\u001B[1;32m 4\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01maudio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mreaders\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m FFMPEG_AudioReader\n\u001B[1;32m 7\u001B[0m \u001B[38;5;28;01mclass\u001B[39;00m \u001B[38;5;21;01mAudioFileClip\u001B[39;00m(AudioClip):\n",
|
63 |
+
"File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/moviepy/audio/AudioClip.py:7\u001B[0m\n\u001B[1;32m 4\u001B[0m \u001B[38;5;28;01mimport\u001B[39;00m \u001B[38;5;21;01mproglog\u001B[39;00m\n\u001B[1;32m 5\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mtqdm\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m tqdm\n\u001B[0;32m----> 7\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01maudio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mffmpeg_audiowriter\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m ffmpeg_audiowrite\n\u001B[1;32m 8\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mClip\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m Clip\n\u001B[1;32m 9\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mdecorators\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m requires_duration\n",
|
64 |
+
"File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/moviepy/audio/io/ffmpeg_audiowriter.py:7\u001B[0m\n\u001B[1;32m 4\u001B[0m \u001B[38;5;28;01mimport\u001B[39;00m \u001B[38;5;21;01mproglog\u001B[39;00m\n\u001B[1;32m 6\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mcompat\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m DEVNULL\n\u001B[0;32m----> 7\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mconfig\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m get_setting\n\u001B[1;32m 8\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mmoviepy\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mdecorators\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m requires_duration\n\u001B[1;32m 11\u001B[0m \u001B[38;5;28;01mclass\u001B[39;00m \u001B[38;5;21;01mFFMPEG_AudioWriter\u001B[39;00m:\n",
|
65 |
+
"File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/moviepy/config.py:36\u001B[0m\n\u001B[1;32m 34\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m FFMPEG_BINARY\u001B[38;5;241m==\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mffmpeg-imageio\u001B[39m\u001B[38;5;124m'\u001B[39m:\n\u001B[1;32m 35\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mimageio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mplugins\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mffmpeg\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m get_exe\n\u001B[0;32m---> 36\u001B[0m FFMPEG_BINARY \u001B[38;5;241m=\u001B[39m get_exe()\n\u001B[1;32m 38\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m FFMPEG_BINARY\u001B[38;5;241m==\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mauto-detect\u001B[39m\u001B[38;5;124m'\u001B[39m:\n\u001B[1;32m 40\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m try_cmd([\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mffmpeg\u001B[39m\u001B[38;5;124m'\u001B[39m])[\u001B[38;5;241m0\u001B[39m]:\n",
|
66 |
+
"File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/imageio/plugins/ffmpeg.py:173\u001B[0m, in \u001B[0;36mget_exe\u001B[0;34m()\u001B[0m\n\u001B[1;32m 170\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mget_exe\u001B[39m(): \u001B[38;5;66;03m# pragma: no cover\u001B[39;00m\n\u001B[1;32m 171\u001B[0m \u001B[38;5;250m \u001B[39m\u001B[38;5;124;03m\"\"\"Wrapper for imageio_ffmpeg.get_ffmpeg_exe()\"\"\"\u001B[39;00m\n\u001B[0;32m--> 173\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m imageio_ffmpeg\u001B[38;5;241m.\u001B[39mget_ffmpeg_exe()\n",
|
67 |
+
"File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/imageio_ffmpeg/_utils.py:34\u001B[0m, in \u001B[0;36mget_ffmpeg_exe\u001B[0;34m()\u001B[0m\n\u001B[1;32m 31\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m exe\n\u001B[1;32m 33\u001B[0m \u001B[38;5;66;03m# Nothing was found\u001B[39;00m\n\u001B[0;32m---> 34\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mRuntimeError\u001B[39;00m(\n\u001B[1;32m 35\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mNo ffmpeg exe could be found. Install ffmpeg on your system, \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m 36\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mor set the IMAGEIO_FFMPEG_EXE environment variable.\u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m 37\u001B[0m )\n",
|
68 |
+
"\u001B[0;31mRuntimeError\u001B[0m: No ffmpeg exe could be found. Install ffmpeg on your system, or set the IMAGEIO_FFMPEG_EXE environment variable."
|
69 |
+
]
|
70 |
+
}
|
71 |
+
],
|
72 |
+
"source": [
|
73 |
+
"!pip install tensorflow keras numpy\n"
|
74 |
+
]
|
75 |
+
},
|
76 |
+
{
|
77 |
+
"cell_type": "code",
|
78 |
+
"execution_count": 20,
|
79 |
+
"outputs": [
|
80 |
+
{
|
81 |
+
"name": "stderr",
|
82 |
+
"output_type": "stream",
|
83 |
+
"text": [
|
84 |
+
"/Users/macbookpro/anaconda3/lib/python3.11/site-packages/transformers/configuration_utils.py:380: UserWarning: Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the `Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`.\n",
|
85 |
+
" warnings.warn(\n",
|
86 |
+
"/Users/macbookpro/anaconda3/lib/python3.11/site-packages/torch/nn/utils/weight_norm.py:30: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.\n",
|
87 |
+
" warnings.warn(\"torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.\")\n",
|
88 |
+
"Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2ForSequenceClassification: ['project_hid.bias', 'project_q.bias', 'quantizer.codevectors', 'project_hid.weight', 'quantizer.weight_proj.weight', 'project_q.weight', 'quantizer.weight_proj.bias']\n",
|
89 |
+
"- This IS expected if you are initializing Wav2Vec2ForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
|
90 |
+
"- This IS NOT expected if you are initializing Wav2Vec2ForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
|
91 |
+
"Some weights of Wav2Vec2ForSequenceClassification were not initialized from the model checkpoint at facebook/wav2vec2-base and are newly initialized: ['classifier.weight', 'projector.bias', 'projector.weight', 'classifier.bias']\n",
|
92 |
+
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
93 |
+
]
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"ename": "RuntimeError",
|
97 |
+
"evalue": "Couldn't find appropriate backend to handle uri /Users/macbookpro/Downloads/destination_directory/eng/common_voice_en_33673624.mp3 and format None.",
|
98 |
+
"output_type": "error",
|
99 |
+
"traceback": [
|
100 |
+
"\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
|
101 |
+
"\u001B[0;31mRuntimeError\u001B[0m Traceback (most recent call last)",
|
102 |
+
"Cell \u001B[0;32mIn[20], line 73\u001B[0m\n\u001B[1;32m 71\u001B[0m model\u001B[38;5;241m.\u001B[39mtrain()\n\u001B[1;32m 72\u001B[0m total_loss \u001B[38;5;241m=\u001B[39m \u001B[38;5;241m0\u001B[39m\n\u001B[0;32m---> 73\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m batch \u001B[38;5;129;01min\u001B[39;00m dataloader:\n\u001B[1;32m 74\u001B[0m inputs, labels \u001B[38;5;241m=\u001B[39m batch\n\u001B[1;32m 75\u001B[0m inputs \u001B[38;5;241m=\u001B[39m {key: value\u001B[38;5;241m.\u001B[39msqueeze(\u001B[38;5;241m1\u001B[39m)\u001B[38;5;241m.\u001B[39mto(device) \u001B[38;5;28;01mfor\u001B[39;00m key, value \u001B[38;5;129;01min\u001B[39;00m inputs\u001B[38;5;241m.\u001B[39mitems()}\n",
|
103 |
+
"File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/torch/utils/data/dataloader.py:630\u001B[0m, in \u001B[0;36m_BaseDataLoaderIter.__next__\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 627\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_sampler_iter \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[1;32m 628\u001B[0m \u001B[38;5;66;03m# TODO(https://github.com/pytorch/pytorch/issues/76750)\u001B[39;00m\n\u001B[1;32m 629\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_reset() \u001B[38;5;66;03m# type: ignore[call-arg]\u001B[39;00m\n\u001B[0;32m--> 630\u001B[0m data \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_next_data()\n\u001B[1;32m 631\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_num_yielded \u001B[38;5;241m+\u001B[39m\u001B[38;5;241m=\u001B[39m \u001B[38;5;241m1\u001B[39m\n\u001B[1;32m 632\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_dataset_kind \u001B[38;5;241m==\u001B[39m _DatasetKind\u001B[38;5;241m.\u001B[39mIterable \u001B[38;5;129;01mand\u001B[39;00m \\\n\u001B[1;32m 633\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_IterableDataset_len_called \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m \u001B[38;5;129;01mand\u001B[39;00m \\\n\u001B[1;32m 634\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_num_yielded \u001B[38;5;241m>\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_IterableDataset_len_called:\n",
|
104 |
+
"File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/torch/utils/data/dataloader.py:674\u001B[0m, in \u001B[0;36m_SingleProcessDataLoaderIter._next_data\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 672\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21m_next_data\u001B[39m(\u001B[38;5;28mself\u001B[39m):\n\u001B[1;32m 673\u001B[0m index \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_next_index() \u001B[38;5;66;03m# may raise StopIteration\u001B[39;00m\n\u001B[0;32m--> 674\u001B[0m data \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_dataset_fetcher\u001B[38;5;241m.\u001B[39mfetch(index) \u001B[38;5;66;03m# may raise StopIteration\u001B[39;00m\n\u001B[1;32m 675\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_pin_memory:\n\u001B[1;32m 676\u001B[0m data \u001B[38;5;241m=\u001B[39m _utils\u001B[38;5;241m.\u001B[39mpin_memory\u001B[38;5;241m.\u001B[39mpin_memory(data, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_pin_memory_device)\n",
|
105 |
+
"File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py:51\u001B[0m, in \u001B[0;36m_MapDatasetFetcher.fetch\u001B[0;34m(self, possibly_batched_index)\u001B[0m\n\u001B[1;32m 49\u001B[0m data \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdataset\u001B[38;5;241m.\u001B[39m__getitems__(possibly_batched_index)\n\u001B[1;32m 50\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m---> 51\u001B[0m data \u001B[38;5;241m=\u001B[39m [\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdataset[idx] \u001B[38;5;28;01mfor\u001B[39;00m idx \u001B[38;5;129;01min\u001B[39;00m possibly_batched_index]\n\u001B[1;32m 52\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m 53\u001B[0m data \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdataset[possibly_batched_index]\n",
|
106 |
+
"File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py:51\u001B[0m, in \u001B[0;36m<listcomp>\u001B[0;34m(.0)\u001B[0m\n\u001B[1;32m 49\u001B[0m data \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdataset\u001B[38;5;241m.\u001B[39m__getitems__(possibly_batched_index)\n\u001B[1;32m 50\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m---> 51\u001B[0m data \u001B[38;5;241m=\u001B[39m [\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdataset[idx] \u001B[38;5;28;01mfor\u001B[39;00m idx \u001B[38;5;129;01min\u001B[39;00m possibly_batched_index]\n\u001B[1;32m 52\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m 53\u001B[0m data \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdataset[possibly_batched_index]\n",
|
107 |
+
"File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/torch/utils/data/dataset.py:302\u001B[0m, in \u001B[0;36mConcatDataset.__getitem__\u001B[0;34m(self, idx)\u001B[0m\n\u001B[1;32m 300\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m 301\u001B[0m sample_idx \u001B[38;5;241m=\u001B[39m idx \u001B[38;5;241m-\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcumulative_sizes[dataset_idx \u001B[38;5;241m-\u001B[39m \u001B[38;5;241m1\u001B[39m]\n\u001B[0;32m--> 302\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdatasets[dataset_idx][sample_idx]\n",
|
108 |
+
"Cell \u001B[0;32mIn[20], line 32\u001B[0m, in \u001B[0;36mAudioDataset.__getitem__\u001B[0;34m(self, idx)\u001B[0m\n\u001B[1;32m 29\u001B[0m file_path \u001B[38;5;241m=\u001B[39m os\u001B[38;5;241m.\u001B[39mpath\u001B[38;5;241m.\u001B[39mjoin(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mpath, os\u001B[38;5;241m.\u001B[39mlistdir(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mpath)[idx])\n\u001B[1;32m 31\u001B[0m \u001B[38;5;66;03m# Load the audio waveform with the specified sampling rate\u001B[39;00m\n\u001B[0;32m---> 32\u001B[0m waveform, sample_rate \u001B[38;5;241m=\u001B[39m torchaudio\u001B[38;5;241m.\u001B[39mload(file_path, normalize\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m, num_frames\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mmax_seq_length)\n\u001B[1;32m 34\u001B[0m \u001B[38;5;66;03m# Pad or truncate the waveform to the specified max_seq_length\u001B[39;00m\n\u001B[1;32m 35\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m waveform\u001B[38;5;241m.\u001B[39msize(\u001B[38;5;241m1\u001B[39m) \u001B[38;5;241m<\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mmax_seq_length:\n\u001B[1;32m 36\u001B[0m \u001B[38;5;66;03m# Pad if too short\u001B[39;00m\n",
|
109 |
+
"File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/torchaudio/_backend/utils.py:202\u001B[0m, in \u001B[0;36mget_load_func.<locals>.load\u001B[0;34m(uri, frame_offset, num_frames, normalize, channels_first, format, buffer_size, backend)\u001B[0m\n\u001B[1;32m 116\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mload\u001B[39m(\n\u001B[1;32m 117\u001B[0m uri: Union[BinaryIO, \u001B[38;5;28mstr\u001B[39m, os\u001B[38;5;241m.\u001B[39mPathLike],\n\u001B[1;32m 118\u001B[0m frame_offset: \u001B[38;5;28mint\u001B[39m \u001B[38;5;241m=\u001B[39m \u001B[38;5;241m0\u001B[39m,\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 124\u001B[0m backend: Optional[\u001B[38;5;28mstr\u001B[39m] \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m,\n\u001B[1;32m 125\u001B[0m ) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m Tuple[torch\u001B[38;5;241m.\u001B[39mTensor, \u001B[38;5;28mint\u001B[39m]:\n\u001B[1;32m 126\u001B[0m \u001B[38;5;250m \u001B[39m\u001B[38;5;124;03m\"\"\"Load audio data from source.\u001B[39;00m\n\u001B[1;32m 127\u001B[0m \n\u001B[1;32m 128\u001B[0m \u001B[38;5;124;03m By default (``normalize=True``, ``channels_first=True``), this function returns Tensor with\u001B[39;00m\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 200\u001B[0m \u001B[38;5;124;03m `[channel, time]` else `[time, channel]`.\u001B[39;00m\n\u001B[1;32m 201\u001B[0m \u001B[38;5;124;03m \"\"\"\u001B[39;00m\n\u001B[0;32m--> 202\u001B[0m backend \u001B[38;5;241m=\u001B[39m dispatcher(uri, \u001B[38;5;28mformat\u001B[39m, backend)\n\u001B[1;32m 203\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m backend\u001B[38;5;241m.\u001B[39mload(uri, frame_offset, num_frames, normalize, channels_first, \u001B[38;5;28mformat\u001B[39m, buffer_size)\n",
|
110 |
+
"File \u001B[0;32m~/anaconda3/lib/python3.11/site-packages/torchaudio/_backend/utils.py:114\u001B[0m, in \u001B[0;36mget_load_func.<locals>.dispatcher\u001B[0;34m(uri, format, backend_name)\u001B[0m\n\u001B[1;32m 112\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m backend\u001B[38;5;241m.\u001B[39mcan_decode(uri, \u001B[38;5;28mformat\u001B[39m):\n\u001B[1;32m 113\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m backend\n\u001B[0;32m--> 114\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mRuntimeError\u001B[39;00m(\u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mCouldn\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mt find appropriate backend to handle uri \u001B[39m\u001B[38;5;132;01m{\u001B[39;00muri\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m and format \u001B[39m\u001B[38;5;132;01m{\u001B[39;00m\u001B[38;5;28mformat\u001B[39m\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m.\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n",
|
111 |
+
"\u001B[0;31mRuntimeError\u001B[0m: Couldn't find appropriate backend to handle uri /Users/macbookpro/Downloads/destination_directory/eng/common_voice_en_33673624.mp3 and format None."
|
112 |
+
]
|
113 |
+
}
|
114 |
+
],
|
115 |
+
"source": [
|
116 |
+
"!pip install torch torchaudio torchaudio-nightly\n"
|
117 |
+
],
|
118 |
+
"metadata": {
|
119 |
+
"collapsed": false,
|
120 |
+
"ExecuteTime": {
|
121 |
+
"end_time": "2023-10-30T15:08:59.919075Z",
|
122 |
+
"start_time": "2023-10-30T15:08:55.303752Z"
|
123 |
+
}
|
124 |
+
},
|
125 |
+
"id": "1b897f30b17fee38"
|
126 |
+
},
|
127 |
+
{
|
128 |
+
"cell_type": "code",
|
129 |
+
"execution_count": 40,
|
130 |
+
"outputs": [
|
131 |
+
{
|
132 |
+
"name": "stdout",
|
133 |
+
"output_type": "stream",
|
134 |
+
"text": [
|
135 |
+
"Selected and copied 800 files from each source directory to their respective destination directories.\n"
|
136 |
+
]
|
137 |
+
}
|
138 |
+
],
|
139 |
+
"source": [
|
140 |
+
"import os\n",
|
141 |
+
"import random\n",
|
142 |
+
"import shutil\n",
|
143 |
+
"\n",
|
144 |
+
"# Source directories\n",
|
145 |
+
"rus_audio_dir = \"/Users/macbookpro/Downloads/cv-corpus-10.0-delta-2022-07-04/ru/clips\"\n",
|
146 |
+
"eng_audio_dir = \"/Users/macbookpro/Downloads/cv-corpus-11.0-delta-2022-09-21 2/en/clips\"\n",
|
147 |
+
"uzb_audio_dir = \"/Users/macbookpro/Downloads/cv-corpus-11.0-delta-2022-09-21/uz/clips\"\n",
|
148 |
+
"\n",
|
149 |
+
"# Destination directories\n",
|
150 |
+
"rus_destination_dir = \"/Users/macbookpro/Downloads/selected_audio2/ru\"\n",
|
151 |
+
"eng_destination_dir = \"/Users/macbookpro/Downloads/selected_audio2/en\"\n",
|
152 |
+
"uzb_destination_dir = \"/Users/macbookpro/Downloads/selected_audio2/uz\"\n",
|
153 |
+
"\n",
|
154 |
+
"# Number of files to select from each source directory\n",
|
155 |
+
"num_files_to_select = 800\n",
|
156 |
+
"\n",
|
157 |
+
"# Ensure the destination directories exist\n",
|
158 |
+
"os.makedirs(rus_destination_dir, exist_ok=True)\n",
|
159 |
+
"os.makedirs(eng_destination_dir, exist_ok=True)\n",
|
160 |
+
"os.makedirs(uzb_destination_dir, exist_ok=True)\n",
|
161 |
+
"\n",
|
162 |
+
"# Helper function to copy files from source to destination\n",
|
163 |
+
"def copy_files(source_dir, destination_dir, num_files):\n",
|
164 |
+
" all_files = os.listdir(source_dir)\n",
|
165 |
+
" selected_files = random.sample(all_files, num_files)\n",
|
166 |
+
" for file in selected_files:\n",
|
167 |
+
" source_file_path = os.path.join(source_dir, file)\n",
|
168 |
+
" destination_file_path = os.path.join(destination_dir, file)\n",
|
169 |
+
" shutil.copy2(source_file_path, destination_file_path)\n",
|
170 |
+
"\n",
|
171 |
+
"# Copy 800 files from each source directory to their respective destination directories\n",
|
172 |
+
"copy_files(rus_audio_dir, rus_destination_dir, num_files_to_select)\n",
|
173 |
+
"copy_files(eng_audio_dir, eng_destination_dir, num_files_to_select)\n",
|
174 |
+
"copy_files(uzb_audio_dir, uzb_destination_dir, num_files_to_select)\n",
|
175 |
+
"\n",
|
176 |
+
"print(\"Selected and copied 800 files from each source directory to their respective destination directories.\")\n"
|
177 |
+
],
|
178 |
+
"metadata": {
|
179 |
+
"collapsed": false,
|
180 |
+
"ExecuteTime": {
|
181 |
+
"end_time": "2023-10-30T15:57:38.041545Z",
|
182 |
+
"start_time": "2023-10-30T15:57:36.315193Z"
|
183 |
+
}
|
184 |
+
},
|
185 |
+
"id": "95300e7298cf8b48"
|
186 |
+
},
|
187 |
+
{
|
188 |
+
"cell_type": "code",
|
189 |
+
"execution_count": 39,
|
190 |
+
"outputs": [
|
191 |
+
{
|
192 |
+
"name": "stdout",
|
193 |
+
"output_type": "stream",
|
194 |
+
"text": [
|
195 |
+
"Selected and copied 2000 files to the destination directory.\n"
|
196 |
+
]
|
197 |
+
}
|
198 |
+
],
|
199 |
+
"source": [
|
200 |
+
"import os\n",
|
201 |
+
"import random\n",
|
202 |
+
"import shutil\n",
|
203 |
+
"\n",
|
204 |
+
"# Source directory\n",
|
205 |
+
"source_dir = \"/Users/macbookpro/Downloads/destination_directory/eng\"\n",
|
206 |
+
"\n",
|
207 |
+
"# Destination directory for the selected files\n",
|
208 |
+
"destination_dir = \"/Users/macbookpro/Downloads/destination_directory/eng_selected1\"\n",
|
209 |
+
"\n",
|
210 |
+
"# Number of files to select (2000 in this case)\n",
|
211 |
+
"num_files_to_select = 600\n",
|
212 |
+
"\n",
|
213 |
+
"# Ensure the destination directory exists\n",
|
214 |
+
"os.makedirs(destination_dir, exist_ok=True)\n",
|
215 |
+
"\n",
|
216 |
+
"# List all files in the source directory\n",
|
217 |
+
"all_files = os.listdir(source_dir)\n",
|
218 |
+
"\n",
|
219 |
+
"# Randomly select num_files_to_select files\n",
|
220 |
+
"selected_files = random.sample(all_files, num_files_to_select)\n",
|
221 |
+
"\n",
|
222 |
+
"# Copy the selected files to the destination directory\n",
|
223 |
+
"for file in selected_files:\n",
|
224 |
+
" source_file_path = os.path.join(source_dir, file)\n",
|
225 |
+
" destination_file_path = os.path.join(destination_dir, file)\n",
|
226 |
+
" shutil.copy2(source_file_path, destination_file_path)\n",
|
227 |
+
"\n",
|
228 |
+
"print(\"Selected and copied 2000 files to the destination directory.\")\n"
|
229 |
+
],
|
230 |
+
"metadata": {
|
231 |
+
"collapsed": false,
|
232 |
+
"ExecuteTime": {
|
233 |
+
"end_time": "2023-10-30T15:55:06.078528Z",
|
234 |
+
"start_time": "2023-10-30T15:55:05.786275Z"
|
235 |
+
}
|
236 |
+
},
|
237 |
+
"id": "5598c4da8ce84f"
|
238 |
+
},
|
239 |
+
{
|
240 |
+
"cell_type": "code",
|
241 |
+
"execution_count": null,
|
242 |
+
"outputs": [],
|
243 |
+
"source": [],
|
244 |
+
"metadata": {
|
245 |
+
"collapsed": false
|
246 |
+
},
|
247 |
+
"id": "ae9d0b9cfd931697"
|
248 |
+
}
|
249 |
+
],
|
250 |
+
"metadata": {
|
251 |
+
"kernelspec": {
|
252 |
+
"display_name": "Python 3",
|
253 |
+
"language": "python",
|
254 |
+
"name": "python3"
|
255 |
+
},
|
256 |
+
"language_info": {
|
257 |
+
"codemirror_mode": {
|
258 |
+
"name": "ipython",
|
259 |
+
"version": 2
|
260 |
+
},
|
261 |
+
"file_extension": ".py",
|
262 |
+
"mimetype": "text/x-python",
|
263 |
+
"name": "python",
|
264 |
+
"nbconvert_exporter": "python",
|
265 |
+
"pygments_lexer": "ipython2",
|
266 |
+
"version": "2.7.6"
|
267 |
+
}
|
268 |
+
},
|
269 |
+
"nbformat": 4,
|
270 |
+
"nbformat_minor": 5
|
271 |
+
}
|
saved_model.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9bd307be2af5f2036e47edbc7d4c6df6a093b4c8f0a49a018d818dc3bb9ce734
|
3 |
+
size 101880
|