Spaces:
Sleeping
Sleeping
File size: 4,772 Bytes
f0c7f30 3aba82e f0c7f30 a67a3ad f0c7f30 3aba82e f0c7f30 3aba82e f0c7f30 3aba82e f0c7f30 a67a3ad f0c7f30 3aba82e f0c7f30 a67a3ad f0c7f30 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
import base64
import os
from collections import defaultdict
from datetime import date, datetime, timedelta
from io import BytesIO
import dotenv
from datasets import load_dataset
from dateutil.parser import parse
from dateutil.tz import tzutc
from fasthtml.common import *
from huggingface_hub import login, whoami
dotenv.load_dotenv()
style = Style("""
.grid { margin-bottom: 1rem; }
.card { display: flex; flex-direction: column; }
.card img { margin-bottom: 0.5rem; }
.card h5 { margin: 0; font-size: 0.9rem; line-height: 1.2; }
.card a { color: inherit; text-decoration: none; }
.card a:hover { text-decoration: underline; }
""")
app, rt = fast_app(html_style=(style,))
login(token=os.environ.get("HF_TOKEN"))
hf_user = whoami(os.environ.get("HF_TOKEN"))["name"]
HF_REPO_ID_TXT = f"{hf_user}/zotero-answer-ai-texts"
HF_REPO_ID_IMG = f"{hf_user}/zotero-answer-ai-images"
abstract_ds = load_dataset(HF_REPO_ID_TXT, "abstracts", split="train")
article_ds = load_dataset(HF_REPO_ID_TXT, "articles", split="train")
image_ds = load_dataset(HF_REPO_ID_IMG, "images_first_page", split="train")
def parse_date(date_string):
try:
return parse(date_string).astimezone(tzutc()).date()
except ValueError:
return date.today()
def get_week_start(date_obj):
return date_obj - timedelta(days=date_obj.weekday())
week2articles = defaultdict(list)
for article in article_ds:
date_added = parse_date(article["date_added"])
week_start = get_week_start(date_added)
week2articles[week_start].append(article["arxiv_id"])
weeks = sorted(week2articles.keys(), reverse=True)
arxiv2article = {article["arxiv_id"]: article for article in article_ds}
arxiv2abstract = {abstract["arxiv_id"]: abstract for abstract in abstract_ds}
arxiv2image = {image["arxiv_id"]: image for image in image_ds}
def get_article_details(arxiv_id):
article = arxiv2article.get(arxiv_id, {})
abstract = arxiv2abstract.get(arxiv_id, {})
image = arxiv2image.get(arxiv_id, {})
return article, abstract, image
def generate_week_content(current_week):
week_index = weeks.index(current_week)
prev_week = weeks[week_index + 1] if week_index < len(weeks) - 1 else None
next_week = weeks[week_index - 1] if week_index > 0 else None
nav_buttons = Group(
Button(
"β Previous Week",
hx_get=f"/week/{prev_week}" if prev_week else "#",
hx_target="#content",
hx_swap="innerHTML",
disabled=not prev_week,
),
Button(
"Next Week β",
hx_get=f"/week/{next_week}" if next_week else "#",
hx_target="#content",
hx_swap="innerHTML",
disabled=not next_week,
),
)
articles = week2articles[current_week]
article_cards = []
for arxiv_id in articles:
article, abstract, image = get_article_details(arxiv_id)
article_title = article["contents"][0].get("paper_title", "article") if article["contents"] else "article"
card_content = [
H5(
A(
article_title,
href=f"https://arxiv.org/abs/{arxiv_id}",
target="_blank",
)
)
]
if image:
pil_image = image["image"] # image[0]["image"]
img_byte_arr = BytesIO()
pil_image.save(img_byte_arr, format="JPEG")
img_byte_arr = img_byte_arr.getvalue()
image_url = f"data:image/jpeg;base64,{base64.b64encode(img_byte_arr).decode('utf-8')}"
card_content.insert(
0,
Img(
src=image_url,
alt="Article image",
style="max-width: 100%; height: auto; margin-bottom: 15px;",
),
)
article_cards.append(Card(*card_content, cls="mb-4"))
grid = Grid(
*article_cards,
style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 1rem;",
)
week_end = current_week + timedelta(days=6)
return Div(
nav_buttons,
H3(f"Week of {current_week.strftime('%B %d')} - {week_end.strftime('%B %d, %Y')} ({len(articles)} articles)"),
grid,
nav_buttons,
id="content",
)
@rt("/")
def get():
return Titled("AnswerAI Zotero Weekly", generate_week_content(weeks[0]))
@rt("/week/{date}")
def get(date: str):
try:
current_week = datetime.strptime(date, "%Y-%m-%d").date()
return generate_week_content(current_week)
except Exception as e:
return Div(f"Error displaying articles: {str(e)}")
serve()
|