Spaces:
Running
Running
File size: 976 Bytes
0841c28 7b40c73 c149479 0841c28 911bee9 c149479 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
set -ex
mkdir -p cache
cd cache
if ! [ -f acl-anthology/bin/anthology/anthology.py ]; then
git clone https://github.com/acl-org/acl-anthology
else
cd acl-anthology
git pull
cd ..
fi
cd acl-anthology/bin
pip install --no-cache-dir -r ./requirements.txt
python -c '
import json
from anthology import Anthology
anthology = Anthology(importdir="../data")
pops = ["xml_booktitle", "xml_title", "xml_url", "xml_abstract"]
papers = []
for paper in anthology.papers.values():
p = paper.as_dict()
if "xml_abstract" in p:
p["abstract"] = paper.get_abstract(form="latex")
for popkey in pops:
if popkey in p:
p.pop(popkey)
if "author" in p:
p["author"] = [a[0].as_dict() for a in p["author"]]
if "editor" in p:
p["editor"] = [a[0].as_dict() for a in p["editor"]]
papers.append(p)
with open("../../aclanthology.json", "wt", encoding="utf8") as fout:
json.dump(papers, fout, ensure_ascii=False)
'
|