Spaces:

Spico
/

paper-hero

Running

paper-hero / src /interfaces /aclanthology.py

- add `build_paper_list` and `build_and_search` methods to help build demo (direct API)

0841c28 about 2 years ago

1.73 kB

	import pathlib
	import re

	from src.engine import SearchAPI
	from src.interfaces import Paper
	from src.utils import load_json, parse_bib_month


	class AclanthologyPaperList(SearchAPI):
	def __init__(self, cache_filepath: pathlib.Path) -> None:
	super().__init__()

	data = load_json(cache_filepath)

	self.papers = []
	for d in data:
	authors = " , ".join(
	[self.extract_author_full(author) for author in d.get("author", [])]
	)
	venue = d.get("venue", [])
	if venue:
	venue = venue[0]
	year = int(d.get("year", "9999"))
	month = parse_bib_month(d.get("month", "99"))
	paper = Paper(
	d.get("title", ""),
	authors,
	d.get("abstract", ""),
	d.get("url", ""),
	d.get("doi", ""),
	venue,
	year,
	month,
	)
	if not paper.title:
	continue
	self.papers.append(paper)

	def extract_author_full(self, name: dict) -> str:
	full_name = ""
	if hasattr(name, "full"):
	match = re.search(r".?\((.?)\)", name)
	if match:
	full_name = match.group(1)
	else:
	full_name = f"{name['first']} {name['last']}"

	return full_name

	@classmethod
	def build_paper_list(cls, cache_filepath: str):
	return cls(cache_filepath)

	@classmethod
	def build_and_search(
	cls, cache_filepath: str, query: dict, max_results: int = -1
	) -> list[Paper]:
	obj = cls.build_paper_list(cache_filepath)
	return obj.search(query)[:max_results]