from bs4 import BeautifulSoup import asyncio import aiohttp from typing import List, Dict, Union import json BASE_URL = "https://hacker-news.firebaseio.com/v0" async def fetch_item(session: aiohttp.ClientSession, item_id: int): """ Asynchronously fetches details of a story by its ID. Args: session: Aiohttp ClientSession for making HTTP requests. item_id (int): The ID of the item to fetch. Returns: dict: Details of the story. """ url = f"{BASE_URL}/item/{item_id}.json" async with session.get(url) as response: return await response.json() async def fetch_story_ids(story_type: str = "top", limit: int = None): """ Asynchronously fetches the top story IDs. Args: story_type: The story type. Defaults to top (`topstories.json`) limit: The limit of stories to be fetched. Returns: List[int]: A list of top story IDs. """ url = f"{BASE_URL}/{story_type}stories.json" async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session: async with session.get(url) as response: story_ids = await response.json() if limit: story_ids = story_ids[:limit] return story_ids async def fetch_text(session, url): """ Fetches the text from a URL (if there's text to be fetched). If it fails, it will return an informative message to the LLM. Args: session: `aiohttp` session url: The story URL Returns: A string representing whether the story text or an informative error (represented as a string) """ try: async with session.get(url) as response: if response.status == 200: html_content = await response.text() soup = BeautifulSoup(html_content, 'html.parser') text_content = soup.get_text() return text_content else: return f"Unable to fetch content from {url}. Status code: {response.status}" except Exception as e: return f"An error occurred: {e}" async def get_hn_stories(limit: int = 5, keywords: List[str] = None, story_type: str = "top"): """ Asynchronously fetches the top Hacker News stories based on the provided parameters. Args: limit (int): The number of top stories to retrieve. Default is 10. keywords (List[str]): A list of keywords to filter the top stories. story_type (str): The story type Returns: List[Dict[str, Union[str, int]]]: A list of dictionaries containing 'story_id', 'title', 'url', and 'score' of the stories. """ if limit and keywords is None: story_ids = await fetch_story_ids(story_type, limit) else: story_ids = await fetch_story_ids(story_type) async def fetch_and_filter_stories(story_id): async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session: story = await fetch_item(session, story_id) return story tasks = [fetch_and_filter_stories(story_id) for story_id in story_ids] stories = await asyncio.gather(*tasks) filtered_stories = [] for story in stories: story_info = { "title": story.get("title"), "url": story.get("url"), "score": story.get("score"), "story_id": story.get("id"), } if keywords is None or any(keyword.lower() in story['title'].lower() for keyword in keywords): filtered_stories.append(story_info) return filtered_stories[:limit] async def get_relevant_comments(story_id: int, limit: int =10): """ Get the most relevant comments for a Hacker News item. Args: story_id: The ID of the Hacker News item. limit: The number of comments to retrieve (default is 10). Returns: A list of dictionaries, each containing comment details. """ async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session: story = await fetch_item(session, story_id) if 'kids' not in story: return "This item doesn't have comments." comment_ids = story['kids'] comment_details = await asyncio.gather(*[fetch_item(session, cid) for cid in comment_ids]) comment_details.sort(key=lambda comment: comment.get('score', 0), reverse=True) relevant_comments = comment_details[:limit] relevant_comments = [comment["text"] for comment in relevant_comments] return json.dumps(relevant_comments) async def get_story_content(story_url: str): """ Gets the content of the story using BeautifulSoup. Args: story_url: A string representing the story URL Returns: The content of the story """ async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session: story_content = await fetch_text(session, story_url) return story_content