wget https://dumps.wikimedia.org/kkwiki/latest/kkwiki-latest-pages-articles.xml.bz2 | |
wget http://data.statmt.org/cc-100/kk.txt.xz | |
unxz kk.txt.xz | |
python3 -m wikiextractor.WikiExtractor kkwiki-latest-pages-articles.xml.bz2 --output extracted --json |
wget https://dumps.wikimedia.org/kkwiki/latest/kkwiki-latest-pages-articles.xml.bz2 | |
wget http://data.statmt.org/cc-100/kk.txt.xz | |
unxz kk.txt.xz | |
python3 -m wikiextractor.WikiExtractor kkwiki-latest-pages-articles.xml.bz2 --output extracted --json |