podcaster / scraper.py
marks
Added scrappingbee
ec1f53c
import os
from scrapingbee import ScrapingBeeClient
from logger import setup_logger
import json
logger = setup_logger("scraper")
# Initialize the ScrapingBee client with API key
client = ScrapingBeeClient(api_key=os.getenv('SCRAPINGBEE_API_KEY', ''))
def scrape_url(url: str) -> str:
"""
Scrape content from URL using ScrapingBee with AI extraction
Args:
url: The URL to scrape
Returns:
str: Extracted text content or error message
"""
try:
logger.info(f"Scraping URL: {url}")
response = client.get(
url,
params={
'stealth_proxy': True,
'country_code': 'us',
'ai_query': 'Extract the main text content from this page'
}
)
if response.status_code == 200:
logger.info(f"Successfully scraped URL: {url}")
return response.text if response.text else "No content could be extracted from the URL"
else:
logger.error(f"Failed to scrape URL: {url}, Status: {response.status_code}")
return f"Failed to download the URL. Status code: {response.status_code}"
except Exception as e:
logger.error(f"Error scraping URL: {url}", exc_info=True)
return f"Error scraping the URL: {str(e)}"