Spaces:
Running
Running
background scheduler
Browse files- app.py +47 -15
- requirements.txt +2 -1
- update.py +41 -7
app.py
CHANGED
|
@@ -1,14 +1,18 @@
|
|
| 1 |
import json
|
| 2 |
import os
|
| 3 |
from datetime import datetime, timezone, timedelta
|
|
|
|
| 4 |
|
| 5 |
import meilisearch
|
| 6 |
from fasthtml.common import *
|
| 7 |
from markdown import markdown
|
| 8 |
from dotenv import load_dotenv
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
from constants import MeilisearchIndexFields
|
| 11 |
-
from update import process_webhook
|
| 12 |
|
| 13 |
loaded = load_dotenv("./.env", override=True)
|
| 14 |
print("Loaded .env file:", loaded)
|
|
@@ -19,7 +23,21 @@ ms_client = meilisearch.Client(MS_URL, MS_SEARCH_KEY)
|
|
| 19 |
|
| 20 |
css_content = open("styles.css").read()
|
| 21 |
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
md_exts = "codehilite", "smarty", "extra", "sane_lists"
|
|
@@ -29,7 +47,8 @@ def Markdown(s, exts=md_exts, **kw):
|
|
| 29 |
return Div(NotStr(markdown(s, extensions=exts)), **kw)
|
| 30 |
|
| 31 |
|
| 32 |
-
scroll_script = Script(
|
|
|
|
| 33 |
document.addEventListener('DOMContentLoaded', function() {
|
| 34 |
var scrollButton = document.getElementById('scroll-top-btn');
|
| 35 |
|
|
@@ -46,7 +65,8 @@ document.addEventListener('DOMContentLoaded', function() {
|
|
| 46 |
document.documentElement.scrollTop = 0; // For Chrome, Firefox, IE and Opera
|
| 47 |
};
|
| 48 |
});
|
| 49 |
-
"""
|
|
|
|
| 50 |
|
| 51 |
|
| 52 |
def date_range_inputs(start_date, end_date):
|
|
@@ -80,7 +100,7 @@ def search_form(start_date, end_date):
|
|
| 80 |
|
| 81 |
|
| 82 |
def iso_to_unix_timestamp(iso_string):
|
| 83 |
-
dt =
|
| 84 |
return int(dt.timestamp())
|
| 85 |
|
| 86 |
|
|
@@ -94,7 +114,10 @@ def make_query(query, start_date, end_date, page=1, limit=10):
|
|
| 94 |
twenty_three_hours_59_minutes_59_seconds_in_seconds = (23 * 60 + 59) * 60 + 59
|
| 95 |
|
| 96 |
after_timestamp = iso_to_unix_timestamp(start_date)
|
| 97 |
-
before_timestamp =
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
options = {
|
| 100 |
"limit": limit,
|
|
@@ -102,13 +125,18 @@ def make_query(query, start_date, end_date, page=1, limit=10):
|
|
| 102 |
"filter": f"{MeilisearchIndexFields.UPDATED_AT.value} >= {after_timestamp} AND {MeilisearchIndexFields.UPDATED_AT.value} < {before_timestamp}",
|
| 103 |
"attributesToCrop": [MeilisearchIndexFields.CONTENT.value],
|
| 104 |
"cropLength": 30,
|
| 105 |
-
"attributesToHighlight": [
|
|
|
|
|
|
|
|
|
|
| 106 |
"highlightPreTag": '<span class="highlight">',
|
| 107 |
"highlightPostTag": "</span>",
|
|
|
|
| 108 |
}
|
| 109 |
|
| 110 |
-
|
| 111 |
-
|
|
|
|
| 112 |
|
| 113 |
|
| 114 |
def search_results(query, start_date, end_date, page=1):
|
|
@@ -136,7 +164,9 @@ def make_card(result):
|
|
| 136 |
result = result["_formatted"]
|
| 137 |
|
| 138 |
url = result[MeilisearchIndexFields.URL.value]
|
| 139 |
-
date = unix_timestamp_to_nice_format(
|
|
|
|
|
|
|
| 140 |
|
| 141 |
return Div(
|
| 142 |
Div(
|
|
@@ -156,7 +186,7 @@ def make_pagination(current_page, total_hits, limit=10):
|
|
| 156 |
|
| 157 |
if current_page > 1:
|
| 158 |
children.append(
|
| 159 |
-
|
| 160 |
"Previous",
|
| 161 |
hx_post=f"/search?page={current_page-1}",
|
| 162 |
hx_target="#search-results",
|
|
@@ -178,8 +208,10 @@ def make_pagination(current_page, total_hits, limit=10):
|
|
| 178 |
|
| 179 |
return Div(*children, cls="pagination")
|
| 180 |
|
| 181 |
-
|
| 182 |
-
|
|
|
|
|
|
|
| 183 |
style="""
|
| 184 |
position: fixed;
|
| 185 |
bottom: 20px;
|
|
@@ -191,9 +223,10 @@ scroll_button = Button("Scroll to Top",
|
|
| 191 |
border-radius: 5px;
|
| 192 |
padding: 10px 15px;
|
| 193 |
cursor: pointer;
|
| 194 |
-
"""
|
| 195 |
)
|
| 196 |
|
|
|
|
| 197 |
@rt("/")
|
| 198 |
def get():
|
| 199 |
end_date = datetime.now()
|
|
@@ -217,7 +250,6 @@ def post(query: str, start_date: str, end_date: str, page: int = 1):
|
|
| 217 |
|
| 218 |
@app.post("/webhook")
|
| 219 |
async def hf_webhook(request):
|
| 220 |
-
|
| 221 |
return await process_webhook(request)
|
| 222 |
|
| 223 |
|
|
|
|
| 1 |
import json
|
| 2 |
import os
|
| 3 |
from datetime import datetime, timezone, timedelta
|
| 4 |
+
from dateutil import parser as dateparser
|
| 5 |
|
| 6 |
import meilisearch
|
| 7 |
from fasthtml.common import *
|
| 8 |
from markdown import markdown
|
| 9 |
from dotenv import load_dotenv
|
| 10 |
+
from apscheduler.schedulers.background import BackgroundScheduler
|
| 11 |
+
from apscheduler.triggers.cron import CronTrigger
|
| 12 |
+
from contextlib import asynccontextmanager
|
| 13 |
|
| 14 |
from constants import MeilisearchIndexFields
|
| 15 |
+
from update import process_webhook, update_webhooks
|
| 16 |
|
| 17 |
loaded = load_dotenv("./.env", override=True)
|
| 18 |
print("Loaded .env file:", loaded)
|
|
|
|
| 23 |
|
| 24 |
css_content = open("styles.css").read()
|
| 25 |
|
| 26 |
+
|
| 27 |
+
@asynccontextmanager
|
| 28 |
+
async def lifespan(app):
|
| 29 |
+
# Setup
|
| 30 |
+
scheduler = BackgroundScheduler()
|
| 31 |
+
scheduler.add_job(update_webhooks, CronTrigger.from_crontab("0 */3 * * *"))
|
| 32 |
+
scheduler.start()
|
| 33 |
+
|
| 34 |
+
yield
|
| 35 |
+
|
| 36 |
+
# Cleanup
|
| 37 |
+
scheduler.shutdown()
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
app, rt = fast_app(hdrs=(Style(css_content),), lifespan=lifespan)
|
| 41 |
|
| 42 |
|
| 43 |
md_exts = "codehilite", "smarty", "extra", "sane_lists"
|
|
|
|
| 47 |
return Div(NotStr(markdown(s, extensions=exts)), **kw)
|
| 48 |
|
| 49 |
|
| 50 |
+
scroll_script = Script(
|
| 51 |
+
"""
|
| 52 |
document.addEventListener('DOMContentLoaded', function() {
|
| 53 |
var scrollButton = document.getElementById('scroll-top-btn');
|
| 54 |
|
|
|
|
| 65 |
document.documentElement.scrollTop = 0; // For Chrome, Firefox, IE and Opera
|
| 66 |
};
|
| 67 |
});
|
| 68 |
+
"""
|
| 69 |
+
)
|
| 70 |
|
| 71 |
|
| 72 |
def date_range_inputs(start_date, end_date):
|
|
|
|
| 100 |
|
| 101 |
|
| 102 |
def iso_to_unix_timestamp(iso_string):
|
| 103 |
+
dt = dateparser.isoparse(iso_string)
|
| 104 |
return int(dt.timestamp())
|
| 105 |
|
| 106 |
|
|
|
|
| 114 |
twenty_three_hours_59_minutes_59_seconds_in_seconds = (23 * 60 + 59) * 60 + 59
|
| 115 |
|
| 116 |
after_timestamp = iso_to_unix_timestamp(start_date)
|
| 117 |
+
before_timestamp = (
|
| 118 |
+
iso_to_unix_timestamp(end_date)
|
| 119 |
+
+ twenty_three_hours_59_minutes_59_seconds_in_seconds
|
| 120 |
+
)
|
| 121 |
|
| 122 |
options = {
|
| 123 |
"limit": limit,
|
|
|
|
| 125 |
"filter": f"{MeilisearchIndexFields.UPDATED_AT.value} >= {after_timestamp} AND {MeilisearchIndexFields.UPDATED_AT.value} < {before_timestamp}",
|
| 126 |
"attributesToCrop": [MeilisearchIndexFields.CONTENT.value],
|
| 127 |
"cropLength": 30,
|
| 128 |
+
"attributesToHighlight": [
|
| 129 |
+
MeilisearchIndexFields.CONTENT.value,
|
| 130 |
+
MeilisearchIndexFields.TITLE.value,
|
| 131 |
+
],
|
| 132 |
"highlightPreTag": '<span class="highlight">',
|
| 133 |
"highlightPostTag": "</span>",
|
| 134 |
+
"distinct": MeilisearchIndexFields.URL.value,
|
| 135 |
}
|
| 136 |
|
| 137 |
+
return ms_client.index(MeilisearchIndexFields.INDEX_NAME.value).search(
|
| 138 |
+
query=query, opt_params=options
|
| 139 |
+
)
|
| 140 |
|
| 141 |
|
| 142 |
def search_results(query, start_date, end_date, page=1):
|
|
|
|
| 164 |
result = result["_formatted"]
|
| 165 |
|
| 166 |
url = result[MeilisearchIndexFields.URL.value]
|
| 167 |
+
date = unix_timestamp_to_nice_format(
|
| 168 |
+
int(result[MeilisearchIndexFields.UPDATED_AT.value])
|
| 169 |
+
)
|
| 170 |
|
| 171 |
return Div(
|
| 172 |
Div(
|
|
|
|
| 186 |
|
| 187 |
if current_page > 1:
|
| 188 |
children.append(
|
| 189 |
+
Button(
|
| 190 |
"Previous",
|
| 191 |
hx_post=f"/search?page={current_page-1}",
|
| 192 |
hx_target="#search-results",
|
|
|
|
| 208 |
|
| 209 |
return Div(*children, cls="pagination")
|
| 210 |
|
| 211 |
+
|
| 212 |
+
scroll_button = Button(
|
| 213 |
+
"Scroll to Top",
|
| 214 |
+
id="scroll-top-btn",
|
| 215 |
style="""
|
| 216 |
position: fixed;
|
| 217 |
bottom: 20px;
|
|
|
|
| 223 |
border-radius: 5px;
|
| 224 |
padding: 10px 15px;
|
| 225 |
cursor: pointer;
|
| 226 |
+
""",
|
| 227 |
)
|
| 228 |
|
| 229 |
+
|
| 230 |
@rt("/")
|
| 231 |
def get():
|
| 232 |
end_date = datetime.now()
|
|
|
|
| 250 |
|
| 251 |
@app.post("/webhook")
|
| 252 |
async def hf_webhook(request):
|
|
|
|
| 253 |
return await process_webhook(request)
|
| 254 |
|
| 255 |
|
requirements.txt
CHANGED
|
@@ -5,4 +5,5 @@ fasthtml-hf
|
|
| 5 |
markdown
|
| 6 |
meilisearch
|
| 7 |
huggingface_hub
|
| 8 |
-
requests
|
|
|
|
|
|
| 5 |
markdown
|
| 6 |
meilisearch
|
| 7 |
huggingface_hub
|
| 8 |
+
requests
|
| 9 |
+
apscheduler
|
update.py
CHANGED
|
@@ -171,28 +171,62 @@ def update_discussion_status(payload):
|
|
| 171 |
print("Update request:", update_request)
|
| 172 |
|
| 173 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
|
| 175 |
def update_webhooks():
|
| 176 |
"""
|
| 177 |
-
|
| 178 |
"""
|
|
|
|
|
|
|
| 179 |
|
| 180 |
existing_webhooks = api.list_webhooks()
|
| 181 |
|
| 182 |
webhook_url = os.environ["HF_WEBHOOK_URL"]
|
| 183 |
|
| 184 |
-
|
| 185 |
|
| 186 |
-
if len(
|
| 187 |
print("More than one webhook found")
|
| 188 |
-
print(
|
| 189 |
print("updating the first one")
|
| 190 |
|
| 191 |
-
id2update =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
|
| 193 |
# get trending models
|
| 194 |
|
| 195 |
-
trending_models = api.list_models(sort="likes7d", direction=-1, limit=
|
| 196 |
|
| 197 |
to_add = []
|
| 198 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
print("Update request:", update_request)
|
| 172 |
|
| 173 |
|
| 174 |
+
def is_user(user_or_org):
|
| 175 |
+
api_url = f"https://huggingface.co/api/users/{user_or_org}/overview"
|
| 176 |
+
response = requests.get(api_url)
|
| 177 |
+
return response.status_code == 200
|
| 178 |
+
|
| 179 |
|
| 180 |
def update_webhooks():
|
| 181 |
"""
|
| 182 |
+
Update the old webhook every so often with trending models.
|
| 183 |
"""
|
| 184 |
+
|
| 185 |
+
print("Updating webhook")
|
| 186 |
|
| 187 |
existing_webhooks = api.list_webhooks()
|
| 188 |
|
| 189 |
webhook_url = os.environ["HF_WEBHOOK_URL"]
|
| 190 |
|
| 191 |
+
webhook2update = [x for x in existing_webhooks if x.url == webhook_url]
|
| 192 |
|
| 193 |
+
if len(webhook2update) > 1:
|
| 194 |
print("More than one webhook found")
|
| 195 |
+
print(webhook2update)
|
| 196 |
print("updating the first one")
|
| 197 |
|
| 198 |
+
id2update = webhook2update[0].id
|
| 199 |
+
|
| 200 |
+
watch_dict = {}
|
| 201 |
+
|
| 202 |
+
for ww in webhook2update[0].watched:
|
| 203 |
+
watch_dict[ww.name] = ww.type
|
| 204 |
|
| 205 |
# get trending models
|
| 206 |
|
| 207 |
+
trending_models = api.list_models(sort="likes7d", direction=-1, limit=1000)
|
| 208 |
|
| 209 |
to_add = []
|
| 210 |
+
|
| 211 |
+
for m in trending_models:
|
| 212 |
+
org_or_user = m.id.split("/")[0]
|
| 213 |
+
if org_or_user in watch_dict:
|
| 214 |
+
continue
|
| 215 |
+
if is_user(org_or_user):
|
| 216 |
+
to_add.append({"name": m.id, "type": "user"})
|
| 217 |
+
else:
|
| 218 |
+
to_add.append({"name": m.id, "type": "org"})
|
| 219 |
+
|
| 220 |
+
new_watched = webhook2update[0].watched + to_add
|
| 221 |
+
|
| 222 |
+
print("There are now", len(new_watched), "items in the watched list")
|
| 223 |
+
|
| 224 |
+
api.update_webhook(
|
| 225 |
+
id=id2update,
|
| 226 |
+
url=webhook_url,
|
| 227 |
+
watched=new_watched,
|
| 228 |
+
domains=["discussion"],
|
| 229 |
+
secret=WEBHOOK_SECRET,
|
| 230 |
+
)
|
| 231 |
+
|
| 232 |
+
|