Spaces:
Running
Running
File size: 4,642 Bytes
0841c28 7b40c73 0841c28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
import logging
import os
import pathlib
import tempfile
import uuid
import uvicorn
from fastapi import FastAPI
from fastapi.responses import FileResponse
from pydantic import BaseModel
from src.interfaces.aclanthology import AclanthologyPaperList
from src.interfaces.arxiv import ArxivPaperList
from src.interfaces.dblp import DblpPaperList
from src.utils import dump_json, load_json
class SearchQuery(BaseModel):
method: str
query: dict
max_results: int = 1000
return_content: bool = False
REMOVE_CACHE = False
ACL_CACHE_FILEPATH = "./cache/aclanthology.json"
app = FastAPI()
logger = logging.getLogger("uvicorn.default")
def get_uid():
return uuid.uuid4().urn.split(":")[-1]
@app.get("/")
async def api():
return FileResponse("./index.html", media_type="text/html")
@app.post("/api/")
async def api(q: SearchQuery): # noqa: F811
ret = {
"ok": False,
"cand": 0,
"paper": 0,
"url": "",
"token": "",
"msg": "",
"content": [],
}
if q.method not in ["aclanthology", "arxiv", "dblp"]:
ret["msg"] = f"{q.method} method not supported"
return ret
papers = []
cache_filepath = ""
if q.method == "aclanthology":
cache_filepath = ACL_CACHE_FILEPATH
plist = AclanthologyPaperList.build_paper_list(ACL_CACHE_FILEPATH)
papers = plist.search(q.query)[: q.max_results]
ret["ok"] = True
ret["msg"] = f"#candidates: {len(plist.papers)}"
ret["cand"] = len(plist.papers)
elif q.method == "arxiv":
_, cache_filepath = tempfile.mkstemp(
prefix="arxiv.cache.", suffix=".xml", text=True
)
plist = ArxivPaperList.build_paper_list(
cache_filepath, q.query, max_results=q.max_results
)
papers = plist.search(q.query)[: q.max_results]
ret["ok"] = True
ret["msg"] = f"#candidates: {len(plist.papers)}"
ret["cand"] = len(plist.papers)
elif q.method == "dblp":
_, cache_filepath = tempfile.mkstemp(
prefix="dblp.cache.", suffix=".json", text=True
)
plist = DblpPaperList.build_paper_list(
cache_filepath, q.query, max_results=q.max_results
)
papers = plist.search(q.query)[: q.max_results]
ret["ok"] = True
ret["msg"] = f"#candidates: {len(plist.papers)}"
ret["cand"] = len(plist.papers)
if papers:
papers = [p.as_dict() for p in papers]
ret["paper"] = len(papers)
if q.return_content:
ret["content"] = papers
else:
_, result_filepath = tempfile.mkstemp(
prefix=f"{q.method}.search.", suffix=".json", text=True
)
ret["url"] = result_filepath
ret["token"] = get_uid()
cache = {
"token": ret["token"],
"url": ret["url"],
"content": papers,
}
dump_json(cache, result_filepath)
if REMOVE_CACHE and q.method != "aclanthology":
os.remove(cache_filepath)
logger.info(
(
f"m: {q.method}, q: {q.query}, cands: {len(plist.papers)},"
f" max: {q.max_results}, #papers: {len(papers)}, cache: {cache_filepath}"
f" ret.url: {ret.get('url', '')}"
)
)
return ret
@app.get("/download/")
async def download(u: str, t: str): # noqa: F811
logger.info(f"{u=}, {t=}")
results_filepath = pathlib.Path(u)
token = t
if results_filepath.exists():
data = load_json(results_filepath)
if data["token"] == token:
filename = results_filepath.name
prefix, _, middle, suffix = filename.split(".")
_, download_filepath = tempfile.mkstemp(
prefix=f"{prefix}.download.", suffix=".json"
)
dump_json(data["content"], download_filepath, indent=2)
logger.info(f"Download: from {u} to {download_filepath}")
return FileResponse(download_filepath, filename=f"{prefix}.json")
return {"ok": False, "msg": "file not exist or token mismatch"}
if __name__ == "__main__":
log_config = uvicorn.config.LOGGING_CONFIG
log_config["formatters"]["access"]["fmt"] = (
"%(asctime)s | " + log_config["formatters"]["access"]["fmt"]
)
log_config["formatters"]["default"]["fmt"] = (
"%(asctime)s | " + log_config["formatters"]["default"]["fmt"]
)
uvicorn.run(
"server:app",
host="0.0.0.0",
port=7860,
log_level="debug",
log_config=log_config,
reload=False,
)
|