Spaces:

phxdev
/

podcaster

Running

podcaster / scraper.py

marks

More changes

0ebcd8e 2 months ago

595 Bytes

	def scrape_url(url):
	from browser_use import Browser
	from bs4 import BeautifulSoup

	# Initialize the browser
	browser = Browser()

	# Open the URL
	browser.open(url)

	# Get the page content
	content = browser.get_page_source()

	# Close the browser
	browser.close()

	# Parse the HTML content
	soup = BeautifulSoup(content, 'html.parser')

	# Extract relevant text (modify the selector as needed)
	text_elements = soup.find_all(['main'])
	text_content = ' '.join([element.get_text() for element in text_elements])

	return text_content.strip()