Spaces:

phxdev
/

podcaster

Running

podcaster / scraper.py

marks

Added scrappingbee

ec1f53c about 2 months ago

1.34 kB

	import os
	from scrapingbee import ScrapingBeeClient
	from logger import setup_logger
	import json

	logger = setup_logger("scraper")

	# Initialize the ScrapingBee client with API key
	client = ScrapingBeeClient(api_key=os.getenv('SCRAPINGBEE_API_KEY', ''))

	def scrape_url(url: str) -> str:
	"""
	Scrape content from URL using ScrapingBee with AI extraction

	Args:
	url: The URL to scrape

	Returns:
	str: Extracted text content or error message
	"""
	try:
	logger.info(f"Scraping URL: {url}")
	response = client.get(
	url,
	params={
	'stealth_proxy': True,
	'country_code': 'us',
	'ai_query': 'Extract the main text content from this page'
	}
	)

	if response.status_code == 200:
	logger.info(f"Successfully scraped URL: {url}")
	return response.text if response.text else "No content could be extracted from the URL"
	else:
	logger.error(f"Failed to scrape URL: {url}, Status: {response.status_code}")
	return f"Failed to download the URL. Status code: {response.status_code}"

	except Exception as e:
	logger.error(f"Error scraping URL: {url}", exc_info=True)
	return f"Error scraping the URL: {str(e)}"