Spaces:
Running
Running
from bs4 import BeautifulSoup | |
import asyncio | |
import aiohttp | |
from typing import List, Dict, Union | |
import json | |
BASE_URL = "https://hacker-news.firebaseio.com/v0" | |
async def fetch_item(session: aiohttp.ClientSession, item_id: int): | |
""" | |
Asynchronously fetches details of a story by its ID. | |
Args: | |
session: Aiohttp ClientSession for making HTTP requests. | |
item_id (int): The ID of the item to fetch. | |
Returns: | |
dict: Details of the story. | |
""" | |
url = f"{BASE_URL}/item/{item_id}.json" | |
async with session.get(url) as response: | |
return await response.json() | |
async def fetch_story_ids(story_type: str = "top", limit: int = None): | |
""" | |
Asynchronously fetches the top story IDs. | |
Args: | |
story_type: The story type. Defaults to top (`topstories.json`) | |
limit: The limit of stories to be fetched. | |
Returns: | |
List[int]: A list of top story IDs. | |
""" | |
url = f"{BASE_URL}/{story_type}stories.json" | |
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session: | |
async with session.get(url) as response: | |
story_ids = await response.json() | |
if limit: | |
story_ids = story_ids[:limit] | |
return story_ids | |
async def fetch_text(session, url): | |
""" | |
Fetches the text from a URL (if there's text to be fetched). If it fails, | |
it will return an informative message to the LLM. | |
Args: | |
session: `aiohttp` session | |
url: The story URL | |
Returns: | |
A string representing whether the story text or an informative error (represented as a string) | |
""" | |
try: | |
async with session.get(url) as response: | |
if response.status == 200: | |
html_content = await response.text() | |
soup = BeautifulSoup(html_content, 'html.parser') | |
text_content = soup.get_text() | |
return text_content | |
else: | |
return f"Unable to fetch content from {url}. Status code: {response.status}" | |
except Exception as e: | |
return f"An error occurred: {e}" | |
async def get_hn_stories(limit: int = 5, keywords: List[str] = None, story_type: str = "top"): | |
""" | |
Asynchronously fetches the top Hacker News stories based on the provided parameters. | |
Args: | |
limit (int): The number of top stories to retrieve. Default is 10. | |
keywords (List[str]): A list of keywords to filter the top stories. | |
story_type (str): The story type | |
Returns: | |
List[Dict[str, Union[str, int]]]: A list of dictionaries containing | |
'story_id', 'title', 'url', and 'score' of the stories. | |
""" | |
if limit and keywords is None: | |
story_ids = await fetch_story_ids(story_type, limit) | |
else: | |
story_ids = await fetch_story_ids(story_type) | |
async def fetch_and_filter_stories(story_id): | |
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session: | |
story = await fetch_item(session, story_id) | |
return story | |
tasks = [fetch_and_filter_stories(story_id) for story_id in story_ids] | |
stories = await asyncio.gather(*tasks) | |
filtered_stories = [] | |
for story in stories: | |
story_info = { | |
"title": story.get("title"), | |
"url": story.get("url"), | |
"score": story.get("score"), | |
"story_id": story.get("id"), | |
} | |
if keywords is None or any(keyword.lower() in story['title'].lower() for keyword in keywords): | |
filtered_stories.append(story_info) | |
return filtered_stories[:limit] | |
async def get_relevant_comments(story_id: int, limit: int =10): | |
""" | |
Get the most relevant comments for a Hacker News item. | |
Args: | |
story_id: The ID of the Hacker News item. | |
limit: The number of comments to retrieve (default is 10). | |
Returns: | |
A list of dictionaries, each containing comment details. | |
""" | |
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session: | |
story = await fetch_item(session, story_id) | |
if 'kids' not in story: | |
return "This item doesn't have comments." | |
comment_ids = story['kids'] | |
comment_details = await asyncio.gather(*[fetch_item(session, cid) for cid in comment_ids]) | |
comment_details.sort(key=lambda comment: comment.get('score', 0), reverse=True) | |
relevant_comments = comment_details[:limit] | |
relevant_comments = [comment["text"] for comment in relevant_comments] | |
return json.dumps(relevant_comments) | |
async def get_story_content(story_url: str): | |
""" | |
Gets the content of the story using BeautifulSoup. | |
Args: | |
story_url: A string representing the story URL | |
Returns: | |
The content of the story | |
""" | |
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session: | |
story_content = await fetch_text(session, story_url) | |
return story_content | |