File size: 4,294 Bytes

497ac96

{
  "builder_name": "common_voice_11_0",
  "citation": "@inproceedings{commonvoice:2020,\n  author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n  title = {Common Voice: A Massively-Multilingual Speech Corpus},\n  booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n  pages = {4211--4215},\n  year = 2020\n}\n",
  "config_name": "ur",
  "dataset_size": 57641379,
  "description": "Common Voice is Mozilla's initiative to help teach machines how real people speak. The dataset currently consists of 16413 validated hours of speech  in 100 languages, but more voices and languages are always added.",
  "download_checksums": {
    "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/n_shards.json": {
      "num_bytes": 12179,
      "checksum": null
    },
    "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/audio/ur/train/ur_train_0.tar": {
      "num_bytes": 110970880,
      "checksum": null
    },
    "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/audio/ur/dev/ur_dev_0.tar": {
      "num_bytes": 84695040,
      "checksum": null
    },
    "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/audio/ur/test/ur_test_0.tar": {
      "num_bytes": 84951040,
      "checksum": null
    },
    "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/audio/ur/other/ur_other_0.tar": {
      "num_bytes": 992716800,
      "checksum": null
    },
    "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/audio/ur/other/ur_other_1.tar": {
      "num_bytes": 874895360,
      "checksum": null
    },
    "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/audio/ur/other/ur_other_2.tar": {
      "num_bytes": 130252800,
      "checksum": null
    },
    "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/audio/ur/invalidated/ur_invalidated_0.tar": {
      "num_bytes": 91883520,
      "checksum": null
    },
    "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/transcript/ur/train.tsv": {
      "num_bytes": 1039872,
      "checksum": null
    },
    "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/transcript/ur/dev.tsv": {
      "num_bytes": 817949,
      "checksum": null
    },
    "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/transcript/ur/test.tsv": {
      "num_bytes": 806965,
      "checksum": null
    },
    "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/transcript/ur/other.tsv": {
      "num_bytes": 21175312,
      "checksum": null
    },
    "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/transcript/ur/invalidated.tsv": {
      "num_bytes": 858871,
      "checksum": null
    }
  },
  "download_size": 2395076588,
  "features": {
    "audio": {
      "sampling_rate": 48000,
      "_type": "Audio"
    },
    "sentence": {
      "dtype": "string",
      "_type": "Value"
    }
  },
  "homepage": "https://commonvoice.mozilla.org/en/datasets",
  "license": "https://creativecommons.org/publicdomain/zero/1.0/",
  "size_in_bytes": 2452717967,
  "splits": {
    "train": {
      "name": "train",
      "num_bytes": 2410889,
      "num_examples": 4129,
      "dataset_name": "common_voice_11_0"
    },
    "validation": {
      "name": "validation",
      "num_bytes": 1901452,
      "num_examples": 3303,
      "dataset_name": "common_voice_11_0"
    },
    "test": {
      "name": "test",
      "num_bytes": 1896640,
      "num_examples": 3302,
      "dataset_name": "common_voice_11_0"
    },
    "other": {
      "name": "other",
      "num_bytes": 49446711,
      "num_examples": 85123,
      "dataset_name": "common_voice_11_0"
    },
    "invalidated": {
      "name": "invalidated",
      "num_bytes": 1985687,
      "num_examples": 3275,
      "dataset_name": "common_voice_11_0"
    }
  },
  "version": {
    "version_str": "11.0.0",
    "major": 11,
    "minor": 0,
    "patch": 0
  }
}