Spaces:

xuyingliKepler
/

hackernews_bot

Running

App Files Files Community

hackernews_bot / app /functions.py

xuyingliKepler

Upload 4 files

c869a11 over 1 year ago

raw

history blame contribute delete

5.02 kB

	from bs4 import BeautifulSoup
	import asyncio
	import aiohttp
	from typing import List, Dict, Union
	import json

	BASE_URL = "https://hacker-news.firebaseio.com/v0"


	async def fetch_item(session: aiohttp.ClientSession, item_id: int):
	"""
	Asynchronously fetches details of a story by its ID.

	Args:
	session: Aiohttp ClientSession for making HTTP requests.
	item_id (int): The ID of the item to fetch.

	Returns:
	dict: Details of the story.
	"""
	url = f"{BASE_URL}/item/{item_id}.json"
	async with session.get(url) as response:
	return await response.json()


	async def fetch_story_ids(story_type: str = "top", limit: int = None):
	"""
	Asynchronously fetches the top story IDs.

	Args:
	story_type: The story type. Defaults to top (`topstories.json`)
	limit: The limit of stories to be fetched.

	Returns:
	List[int]: A list of top story IDs.
	"""
	url = f"{BASE_URL}/{story_type}stories.json"
	async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
	async with session.get(url) as response:
	story_ids = await response.json()

	if limit:
	story_ids = story_ids[:limit]

	return story_ids


	async def fetch_text(session, url):
	"""
	Fetches the text from a URL (if there's text to be fetched). If it fails,
	it will return an informative message to the LLM.

	Args:
	session: `aiohttp` session
	url: The story URL

	Returns:
	A string representing whether the story text or an informative error (represented as a string)
	"""
	try:
	async with session.get(url) as response:
	if response.status == 200:

	html_content = await response.text()
	soup = BeautifulSoup(html_content, 'html.parser')
	text_content = soup.get_text()

	return text_content
	else:
	return f"Unable to fetch content from {url}. Status code: {response.status}"
	except Exception as e:
	return f"An error occurred: {e}"


	async def get_hn_stories(limit: int = 5, keywords: List[str] = None, story_type: str = "top"):
	"""
	Asynchronously fetches the top Hacker News stories based on the provided parameters.

	Args:
	limit (int): The number of top stories to retrieve. Default is 10.
	keywords (List[str]): A list of keywords to filter the top stories.
	story_type (str): The story type

	Returns:
	List[Dict[str, Union[str, int]]]: A list of dictionaries containing
	'story_id', 'title', 'url', and 'score' of the stories.
	"""

	if limit and keywords is None:
	story_ids = await fetch_story_ids(story_type, limit)
	else:
	story_ids = await fetch_story_ids(story_type)

	async def fetch_and_filter_stories(story_id):
	async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
	story = await fetch_item(session, story_id)
	return story

	tasks = [fetch_and_filter_stories(story_id) for story_id in story_ids]
	stories = await asyncio.gather(*tasks)

	filtered_stories = []
	for story in stories:
	story_info = {
	"title": story.get("title"),
	"url": story.get("url"),
	"score": story.get("score"),
	"story_id": story.get("id"),
	}

	if keywords is None or any(keyword.lower() in story['title'].lower() for keyword in keywords):
	filtered_stories.append(story_info)

	return filtered_stories[:limit]


	async def get_relevant_comments(story_id: int, limit: int =10):
	"""
	Get the most relevant comments for a Hacker News item.

	Args:
	story_id: The ID of the Hacker News item.
	limit: The number of comments to retrieve (default is 10).

	Returns:
	A list of dictionaries, each containing comment details.
	"""
	async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
	story = await fetch_item(session, story_id)

	if 'kids' not in story:
	return "This item doesn't have comments."

	comment_ids = story['kids']

	comment_details = await asyncio.gather(*[fetch_item(session, cid) for cid in comment_ids])
	comment_details.sort(key=lambda comment: comment.get('score', 0), reverse=True)

	relevant_comments = comment_details[:limit]
	relevant_comments = [comment["text"] for comment in relevant_comments]

	return json.dumps(relevant_comments)


	async def get_story_content(story_url: str):
	"""
	Gets the content of the story using BeautifulSoup.

	Args:
	story_url: A string representing the story URL

	Returns:
	The content of the story
	"""
	async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
	story_content = await fetch_text(session, story_url)
	return story_content