File size: 1,732 Bytes
c149479
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe3c056
02e4a72
c149479
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02e4a72
 
 
 
 
 
c149479
02e4a72
 
 
0841c28
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import pathlib
import re

from src.engine import SearchAPI
from src.interfaces import Paper
from src.utils import load_json, parse_bib_month


class AclanthologyPaperList(SearchAPI):
    def __init__(self, cache_filepath: pathlib.Path) -> None:
        super().__init__()

        data = load_json(cache_filepath)

        self.papers = []
        for d in data:
            authors = " , ".join(
                [self.extract_author_full(author) for author in d.get("author", [])]
            )
            venue = d.get("venue", [])
            if venue:
                venue = venue[0]
            year = int(d.get("year", "9999"))
            month = parse_bib_month(d.get("month", "99"))
            paper = Paper(
                d.get("title", ""),
                authors,
                d.get("abstract", ""),
                d.get("url", ""),
                d.get("doi", ""),
                venue,
                year,
                month,
            )
            if not paper.title:
                continue
            self.papers.append(paper)

    def extract_author_full(self, name: dict) -> str:
        full_name = ""
        if hasattr(name, "full"):
            match = re.search(r".*?\((.*?)\)", name)
            if match:
                full_name = match.group(1)
        else:
            full_name = f"{name['first']} {name['last']}"

        return full_name

    @classmethod
    def build_paper_list(cls, cache_filepath: str):
        return cls(cache_filepath)

    @classmethod
    def build_and_search(
        cls, cache_filepath: str, query: dict, max_results: int = -1
    ) -> list[Paper]:
        obj = cls.build_paper_list(cache_filepath)
        return obj.search(query)[:max_results]