File size: 4,642 Bytes
0841c28
 
 
7b40c73
 
0841c28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import logging
import os
import pathlib
import tempfile
import uuid

import uvicorn
from fastapi import FastAPI
from fastapi.responses import FileResponse
from pydantic import BaseModel

from src.interfaces.aclanthology import AclanthologyPaperList
from src.interfaces.arxiv import ArxivPaperList
from src.interfaces.dblp import DblpPaperList
from src.utils import dump_json, load_json


class SearchQuery(BaseModel):
    method: str
    query: dict
    max_results: int = 1000
    return_content: bool = False


REMOVE_CACHE = False
ACL_CACHE_FILEPATH = "./cache/aclanthology.json"
app = FastAPI()
logger = logging.getLogger("uvicorn.default")


def get_uid():
    return uuid.uuid4().urn.split(":")[-1]


@app.get("/")
async def api():
    return FileResponse("./index.html", media_type="text/html")


@app.post("/api/")
async def api(q: SearchQuery):  # noqa: F811
    ret = {
        "ok": False,
        "cand": 0,
        "paper": 0,
        "url": "",
        "token": "",
        "msg": "",
        "content": [],
    }
    if q.method not in ["aclanthology", "arxiv", "dblp"]:
        ret["msg"] = f"{q.method} method not supported"
        return ret

    papers = []
    cache_filepath = ""
    if q.method == "aclanthology":
        cache_filepath = ACL_CACHE_FILEPATH
        plist = AclanthologyPaperList.build_paper_list(ACL_CACHE_FILEPATH)
        papers = plist.search(q.query)[: q.max_results]
        ret["ok"] = True
        ret["msg"] = f"#candidates: {len(plist.papers)}"
        ret["cand"] = len(plist.papers)
    elif q.method == "arxiv":
        _, cache_filepath = tempfile.mkstemp(
            prefix="arxiv.cache.", suffix=".xml", text=True
        )
        plist = ArxivPaperList.build_paper_list(
            cache_filepath, q.query, max_results=q.max_results
        )
        papers = plist.search(q.query)[: q.max_results]
        ret["ok"] = True
        ret["msg"] = f"#candidates: {len(plist.papers)}"
        ret["cand"] = len(plist.papers)
    elif q.method == "dblp":
        _, cache_filepath = tempfile.mkstemp(
            prefix="dblp.cache.", suffix=".json", text=True
        )
        plist = DblpPaperList.build_paper_list(
            cache_filepath, q.query, max_results=q.max_results
        )
        papers = plist.search(q.query)[: q.max_results]
        ret["ok"] = True
        ret["msg"] = f"#candidates: {len(plist.papers)}"
        ret["cand"] = len(plist.papers)

    if papers:
        papers = [p.as_dict() for p in papers]
        ret["paper"] = len(papers)
        if q.return_content:
            ret["content"] = papers
        else:
            _, result_filepath = tempfile.mkstemp(
                prefix=f"{q.method}.search.", suffix=".json", text=True
            )
            ret["url"] = result_filepath
            ret["token"] = get_uid()
            cache = {
                "token": ret["token"],
                "url": ret["url"],
                "content": papers,
            }
            dump_json(cache, result_filepath)

    if REMOVE_CACHE and q.method != "aclanthology":
        os.remove(cache_filepath)

    logger.info(
        (
            f"m: {q.method}, q: {q.query}, cands: {len(plist.papers)},"
            f" max: {q.max_results}, #papers: {len(papers)}, cache: {cache_filepath}"
            f" ret.url: {ret.get('url', '')}"
        )
    )

    return ret


@app.get("/download/")
async def download(u: str, t: str):  # noqa: F811
    logger.info(f"{u=}, {t=}")
    results_filepath = pathlib.Path(u)
    token = t
    if results_filepath.exists():
        data = load_json(results_filepath)
        if data["token"] == token:
            filename = results_filepath.name
            prefix, _, middle, suffix = filename.split(".")
            _, download_filepath = tempfile.mkstemp(
                prefix=f"{prefix}.download.", suffix=".json"
            )
            dump_json(data["content"], download_filepath, indent=2)
            logger.info(f"Download: from {u} to {download_filepath}")
            return FileResponse(download_filepath, filename=f"{prefix}.json")
    return {"ok": False, "msg": "file not exist or token mismatch"}


if __name__ == "__main__":
    log_config = uvicorn.config.LOGGING_CONFIG
    log_config["formatters"]["access"]["fmt"] = (
        "%(asctime)s | " + log_config["formatters"]["access"]["fmt"]
    )
    log_config["formatters"]["default"]["fmt"] = (
        "%(asctime)s | " + log_config["formatters"]["default"]["fmt"]
    )
    uvicorn.run(
        "server:app",
        host="0.0.0.0",
        port=7860,
        log_level="debug",
        log_config=log_config,
        reload=False,
    )