Spaces:

Spico
/

paper-hero

Running

File size: 4,642 Bytes

import logging
import os
import pathlib
import tempfile
import uuid

import uvicorn
from fastapi import FastAPI
from fastapi.responses import FileResponse
from pydantic import BaseModel

from src.interfaces.aclanthology import AclanthologyPaperList
from src.interfaces.arxiv import ArxivPaperList
from src.interfaces.dblp import DblpPaperList
from src.utils import dump_json, load_json


class SearchQuery(BaseModel):
    method: str
    query: dict
    max_results: int = 1000
    return_content: bool = False


REMOVE_CACHE = False
ACL_CACHE_FILEPATH = "./cache/aclanthology.json"
app = FastAPI()
logger = logging.getLogger("uvicorn.default")


def get_uid():
    return uuid.uuid4().urn.split(":")[-1]


@app.get("/")
async def api():
    return FileResponse("./index.html", media_type="text/html")


@app.post("/api/")
async def api(q: SearchQuery):  # noqa: F811
    ret = {
        "ok": False,
        "cand": 0,
        "paper": 0,
        "url": "",
        "token": "",
        "msg": "",
        "content": [],
    }
    if q.method not in ["aclanthology", "arxiv", "dblp"]:
        ret["msg"] = f"{q.method} method not supported"
        return ret

    papers = []
    cache_filepath = ""
    if q.method == "aclanthology":
        cache_filepath = ACL_CACHE_FILEPATH
        plist = AclanthologyPaperList.build_paper_list(ACL_CACHE_FILEPATH)
        papers = plist.search(q.query)[: q.max_results]
        ret["ok"] = True
        ret["msg"] = f"#candidates: {len(plist.papers)}"
        ret["cand"] = len(plist.papers)
    elif q.method == "arxiv":
        _, cache_filepath = tempfile.mkstemp(
            prefix="arxiv.cache.", suffix=".xml", text=True
        )
        plist = ArxivPaperList.build_paper_list(
            cache_filepath, q.query, max_results=q.max_results
        )
        papers = plist.search(q.query)[: q.max_results]
        ret["ok"] = True
        ret["msg"] = f"#candidates: {len(plist.papers)}"
        ret["cand"] = len(plist.papers)
    elif q.method == "dblp":
        _, cache_filepath = tempfile.mkstemp(
            prefix="dblp.cache.", suffix=".json", text=True
        )
        plist = DblpPaperList.build_paper_list(
            cache_filepath, q.query, max_results=q.max_results
        )
        papers = plist.search(q.query)[: q.max_results]
        ret["ok"] = True
        ret["msg"] = f"#candidates: {len(plist.papers)}"
        ret["cand"] = len(plist.papers)

    if papers:
        papers = [p.as_dict() for p in papers]
        ret["paper"] = len(papers)
        if q.return_content:
            ret["content"] = papers
        else:
            _, result_filepath = tempfile.mkstemp(
                prefix=f"{q.method}.search.", suffix=".json", text=True
            )
            ret["url"] = result_filepath
            ret["token"] = get_uid()
            cache = {
                "token": ret["token"],
                "url": ret["url"],
                "content": papers,
            }
            dump_json(cache, result_filepath)

    if REMOVE_CACHE and q.method != "aclanthology":
        os.remove(cache_filepath)

    logger.info(
        (
            f"m: {q.method}, q: {q.query}, cands: {len(plist.papers)},"
            f" max: {q.max_results}, #papers: {len(papers)}, cache: {cache_filepath}"
            f" ret.url: {ret.get('url', '')}"
        )
    )

    return ret


@app.get("/download/")
async def download(u: str, t: str):  # noqa: F811
    logger.info(f"{u=}, {t=}")
    results_filepath = pathlib.Path(u)
    token = t
    if results_filepath.exists():
        data = load_json(results_filepath)
        if data["token"] == token:
            filename = results_filepath.name
            prefix, _, middle, suffix = filename.split(".")
            _, download_filepath = tempfile.mkstemp(
                prefix=f"{prefix}.download.", suffix=".json"
            )
            dump_json(data["content"], download_filepath, indent=2)
            logger.info(f"Download: from {u} to {download_filepath}")
            return FileResponse(download_filepath, filename=f"{prefix}.json")
    return {"ok": False, "msg": "file not exist or token mismatch"}


if __name__ == "__main__":
    log_config = uvicorn.config.LOGGING_CONFIG
    log_config["formatters"]["access"]["fmt"] = (
        "%(asctime)s | " + log_config["formatters"]["access"]["fmt"]
    )
    log_config["formatters"]["default"]["fmt"] = (
        "%(asctime)s | " + log_config["formatters"]["default"]["fmt"]
    )
    uvicorn.run(
        "server:app",
        host="0.0.0.0",
        port=7860,
        log_level="debug",
        log_config=log_config,
        reload=False,
    )