File size: 4,294 Bytes
497ac96 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
{
"builder_name": "common_voice_11_0",
"citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n",
"config_name": "ur",
"dataset_size": 57641379,
"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak. The dataset currently consists of 16413 validated hours of speech in 100 languages, but more voices and languages are always added.",
"download_checksums": {
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/n_shards.json": {
"num_bytes": 12179,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/audio/ur/train/ur_train_0.tar": {
"num_bytes": 110970880,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/audio/ur/dev/ur_dev_0.tar": {
"num_bytes": 84695040,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/audio/ur/test/ur_test_0.tar": {
"num_bytes": 84951040,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/audio/ur/other/ur_other_0.tar": {
"num_bytes": 992716800,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/audio/ur/other/ur_other_1.tar": {
"num_bytes": 874895360,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/audio/ur/other/ur_other_2.tar": {
"num_bytes": 130252800,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/audio/ur/invalidated/ur_invalidated_0.tar": {
"num_bytes": 91883520,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/transcript/ur/train.tsv": {
"num_bytes": 1039872,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/transcript/ur/dev.tsv": {
"num_bytes": 817949,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/transcript/ur/test.tsv": {
"num_bytes": 806965,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/transcript/ur/other.tsv": {
"num_bytes": 21175312,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/transcript/ur/invalidated.tsv": {
"num_bytes": 858871,
"checksum": null
}
},
"download_size": 2395076588,
"features": {
"audio": {
"sampling_rate": 48000,
"_type": "Audio"
},
"sentence": {
"dtype": "string",
"_type": "Value"
}
},
"homepage": "https://commonvoice.mozilla.org/en/datasets",
"license": "https://creativecommons.org/publicdomain/zero/1.0/",
"size_in_bytes": 2452717967,
"splits": {
"train": {
"name": "train",
"num_bytes": 2410889,
"num_examples": 4129,
"dataset_name": "common_voice_11_0"
},
"validation": {
"name": "validation",
"num_bytes": 1901452,
"num_examples": 3303,
"dataset_name": "common_voice_11_0"
},
"test": {
"name": "test",
"num_bytes": 1896640,
"num_examples": 3302,
"dataset_name": "common_voice_11_0"
},
"other": {
"name": "other",
"num_bytes": 49446711,
"num_examples": 85123,
"dataset_name": "common_voice_11_0"
},
"invalidated": {
"name": "invalidated",
"num_bytes": 1985687,
"num_examples": 3275,
"dataset_name": "common_voice_11_0"
}
},
"version": {
"version_str": "11.0.0",
"major": 11,
"minor": 0,
"patch": 0
}
} |