Spaces:
Running
Running
File size: 1,732 Bytes
c149479 fe3c056 02e4a72 c149479 02e4a72 c149479 02e4a72 0841c28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import pathlib
import re
from src.engine import SearchAPI
from src.interfaces import Paper
from src.utils import load_json, parse_bib_month
class AclanthologyPaperList(SearchAPI):
def __init__(self, cache_filepath: pathlib.Path) -> None:
super().__init__()
data = load_json(cache_filepath)
self.papers = []
for d in data:
authors = " , ".join(
[self.extract_author_full(author) for author in d.get("author", [])]
)
venue = d.get("venue", [])
if venue:
venue = venue[0]
year = int(d.get("year", "9999"))
month = parse_bib_month(d.get("month", "99"))
paper = Paper(
d.get("title", ""),
authors,
d.get("abstract", ""),
d.get("url", ""),
d.get("doi", ""),
venue,
year,
month,
)
if not paper.title:
continue
self.papers.append(paper)
def extract_author_full(self, name: dict) -> str:
full_name = ""
if hasattr(name, "full"):
match = re.search(r".*?\((.*?)\)", name)
if match:
full_name = match.group(1)
else:
full_name = f"{name['first']} {name['last']}"
return full_name
@classmethod
def build_paper_list(cls, cache_filepath: str):
return cls(cache_filepath)
@classmethod
def build_and_search(
cls, cache_filepath: str, query: dict, max_results: int = -1
) -> list[Paper]:
obj = cls.build_paper_list(cache_filepath)
return obj.search(query)[:max_results]
|