Spaces:

nbroad
/

hf-discussion-search

Running

App Files Files Community

nbroad commited on Oct 5, 2024

Commit

b12faee

verified ·

1 Parent(s): 1606cd0

background scheduler

Browse files

Files changed (3) hide show

app.py +47 -15
requirements.txt +2 -1
update.py +41 -7

app.py CHANGED Viewed

@@ -1,14 +1,18 @@
 import json
 import os
 from datetime import datetime, timezone, timedelta
 import meilisearch
 from fasthtml.common import *
 from markdown import markdown
 from dotenv import load_dotenv
 from constants import MeilisearchIndexFields
-from update import process_webhook
 loaded = load_dotenv("./.env", override=True)
 print("Loaded .env file:", loaded)
@@ -19,7 +23,21 @@ ms_client = meilisearch.Client(MS_URL, MS_SEARCH_KEY)
 css_content = open("styles.css").read()
-app, rt = fast_app(hdrs=(Style(css_content),))
 md_exts = "codehilite", "smarty", "extra", "sane_lists"
@@ -29,7 +47,8 @@ def Markdown(s, exts=md_exts, **kw):
     return Div(NotStr(markdown(s, extensions=exts)), **kw)
-scroll_script = Script("""
 document.addEventListener('DOMContentLoaded', function() {
     var scrollButton = document.getElementById('scroll-top-btn');
@@ -46,7 +65,8 @@ document.addEventListener('DOMContentLoaded', function() {
         document.documentElement.scrollTop = 0; // For Chrome, Firefox, IE and Opera
     };
 });
-""")
 def date_range_inputs(start_date, end_date):
@@ -80,7 +100,7 @@ def search_form(start_date, end_date):
 def iso_to_unix_timestamp(iso_string):
-    dt = datetime.fromisoformat(iso_string)
     return int(dt.timestamp())
@@ -94,7 +114,10 @@ def make_query(query, start_date, end_date, page=1, limit=10):
     twenty_three_hours_59_minutes_59_seconds_in_seconds = (23 * 60 + 59) * 60 + 59
     after_timestamp = iso_to_unix_timestamp(start_date)
-    before_timestamp = iso_to_unix_timestamp(end_date) + twenty_three_hours_59_minutes_59_seconds_in_seconds
     options = {
         "limit": limit,
@@ -102,13 +125,18 @@ def make_query(query, start_date, end_date, page=1, limit=10):
         "filter": f"{MeilisearchIndexFields.UPDATED_AT.value} >= {after_timestamp} AND {MeilisearchIndexFields.UPDATED_AT.value} < {before_timestamp}",
         "attributesToCrop": [MeilisearchIndexFields.CONTENT.value],
         "cropLength": 30,
-        "attributesToHighlight": [MeilisearchIndexFields.CONTENT.value, MeilisearchIndexFields.TITLE.value],
         "highlightPreTag": '<span class="highlight">',
         "highlightPostTag": "</span>",
     }
-    return ms_client.index(MeilisearchIndexFields.INDEX_NAME.value).search(query=query, opt_params=options)
 def search_results(query, start_date, end_date, page=1):
@@ -136,7 +164,9 @@ def make_card(result):
     result = result["_formatted"]
     url = result[MeilisearchIndexFields.URL.value]
-    date = unix_timestamp_to_nice_format(int(result[MeilisearchIndexFields.UPDATED_AT.value]))
     return Div(
         Div(
@@ -156,7 +186,7 @@ def make_pagination(current_page, total_hits, limit=10):
     if current_page > 1:
         children.append(
-                Button(
                 "Previous",
                 hx_post=f"/search?page={current_page-1}",
                 hx_target="#search-results",
@@ -178,8 +208,10 @@ def make_pagination(current_page, total_hits, limit=10):
     return Div(*children, cls="pagination")
-scroll_button = Button("Scroll to Top",
-    id="scroll-top-btn",
     style="""
         position: fixed;
         bottom: 20px;
@@ -191,9 +223,10 @@ scroll_button = Button("Scroll to Top",
         border-radius: 5px;
         padding: 10px 15px;
         cursor: pointer;
-    """
 )
 @rt("/")
 def get():
     end_date = datetime.now()
@@ -217,7 +250,6 @@ def post(query: str, start_date: str, end_date: str, page: int = 1):
 @app.post("/webhook")
 async def hf_webhook(request):
     return await process_webhook(request)

 import json
 import os
 from datetime import datetime, timezone, timedelta
+from dateutil import parser as dateparser
 import meilisearch
 from fasthtml.common import *
 from markdown import markdown
 from dotenv import load_dotenv
+from apscheduler.schedulers.background import BackgroundScheduler
+from apscheduler.triggers.cron import CronTrigger
+from contextlib import asynccontextmanager
 from constants import MeilisearchIndexFields
+from update import process_webhook, update_webhooks
 loaded = load_dotenv("./.env", override=True)
 print("Loaded .env file:", loaded)
 css_content = open("styles.css").read()
+@asynccontextmanager
+async def lifespan(app):
+    # Setup
+    scheduler = BackgroundScheduler()
+    scheduler.add_job(update_webhooks, CronTrigger.from_crontab("0 */3 * * *"))
+    scheduler.start()
+    yield
+    # Cleanup
+    scheduler.shutdown()
+app, rt = fast_app(hdrs=(Style(css_content),), lifespan=lifespan)
 md_exts = "codehilite", "smarty", "extra", "sane_lists"
     return Div(NotStr(markdown(s, extensions=exts)), **kw)
+scroll_script = Script(
+    """
 document.addEventListener('DOMContentLoaded', function() {
     var scrollButton = document.getElementById('scroll-top-btn');
         document.documentElement.scrollTop = 0; // For Chrome, Firefox, IE and Opera
     };
 });
+"""
+)
 def date_range_inputs(start_date, end_date):
 def iso_to_unix_timestamp(iso_string):
+    dt = dateparser.isoparse(iso_string)
     return int(dt.timestamp())
     twenty_three_hours_59_minutes_59_seconds_in_seconds = (23 * 60 + 59) * 60 + 59
     after_timestamp = iso_to_unix_timestamp(start_date)
+    before_timestamp = (
+        iso_to_unix_timestamp(end_date)
+        + twenty_three_hours_59_minutes_59_seconds_in_seconds
+    )
     options = {
         "limit": limit,
         "filter": f"{MeilisearchIndexFields.UPDATED_AT.value} >= {after_timestamp} AND {MeilisearchIndexFields.UPDATED_AT.value} < {before_timestamp}",
         "attributesToCrop": [MeilisearchIndexFields.CONTENT.value],
         "cropLength": 30,
+        "attributesToHighlight": [
+            MeilisearchIndexFields.CONTENT.value,
+            MeilisearchIndexFields.TITLE.value,
+        ],
         "highlightPreTag": '<span class="highlight">',
         "highlightPostTag": "</span>",
+        "distinct": MeilisearchIndexFields.URL.value,
     }
+    return ms_client.index(MeilisearchIndexFields.INDEX_NAME.value).search(
+        query=query, opt_params=options
+    )
 def search_results(query, start_date, end_date, page=1):
     result = result["_formatted"]
     url = result[MeilisearchIndexFields.URL.value]
+    date = unix_timestamp_to_nice_format(
+        int(result[MeilisearchIndexFields.UPDATED_AT.value])
+    )
     return Div(
         Div(
     if current_page > 1:
         children.append(
+            Button(
                 "Previous",
                 hx_post=f"/search?page={current_page-1}",
                 hx_target="#search-results",
     return Div(*children, cls="pagination")
+scroll_button = Button(
+    "Scroll to Top",
+    id="scroll-top-btn",
     style="""
         position: fixed;
         bottom: 20px;
         border-radius: 5px;
         padding: 10px 15px;
         cursor: pointer;
+    """,
 )
 @rt("/")
 def get():
     end_date = datetime.now()
 @app.post("/webhook")
 async def hf_webhook(request):
     return await process_webhook(request)

requirements.txt CHANGED Viewed

@@ -5,4 +5,5 @@ fasthtml-hf
 markdown
 meilisearch
 huggingface_hub
-requests

 markdown
 meilisearch
 huggingface_hub
+requests
+apscheduler

update.py CHANGED Viewed

@@ -171,28 +171,62 @@ def update_discussion_status(payload):
         print("Update request:", update_request)
 def update_webhooks():
     """
-    Delete the old
     """
     existing_webhooks = api.list_webhooks()
     webhook_url = os.environ["HF_WEBHOOK_URL"]
-    id2update = [x for x in existing_webhooks if x.url == webhook_url]
-    if len(id2update) > 1:
         print("More than one webhook found")
-        print(id2update)
         print("updating the first one")
-    id2update = id2update[0]
     # get trending models
-    trending_models = api.list_models(sort="likes7d", direction=-1, limit=100)
     to_add = []

         print("Update request:", update_request)
+def is_user(user_or_org):
+    api_url = f"https://huggingface.co/api/users/{user_or_org}/overview"
+    response = requests.get(api_url)
+    return response.status_code == 200
 def update_webhooks():
     """
+    Update the old webhook every so often with trending models.
     """
+    print("Updating webhook")
     existing_webhooks = api.list_webhooks()
     webhook_url = os.environ["HF_WEBHOOK_URL"]
+    webhook2update = [x for x in existing_webhooks if x.url == webhook_url]
+    if len(webhook2update) > 1:
         print("More than one webhook found")
+        print(webhook2update)
         print("updating the first one")
+    id2update = webhook2update[0].id
+    watch_dict = {}
+    for ww in webhook2update[0].watched:
+        watch_dict[ww.name] = ww.type
     # get trending models
+    trending_models = api.list_models(sort="likes7d", direction=-1, limit=1000)
     to_add = []
+    for m in trending_models:
+      org_or_user = m.id.split("/")[0]
+      if org_or_user in watch_dict:
+          continue
+      if is_user(org_or_user):
+          to_add.append({"name": m.id, "type": "user"})
+      else:
+          to_add.append({"name": m.id, "type": "org"})
+      new_watched = webhook2update[0].watched + to_add
+      print("There are now", len(new_watched), "items in the watched list")
+      api.update_webhook(
+          id=id2update,
+          url=webhook_url,
+          watched=new_watched,
+          domains=["discussion"],
+          secret=WEBHOOK_SECRET,
+      )