podcaster / scraper.py
marks
More changes
0ebcd8e
raw
history blame
595 Bytes
def scrape_url(url):
from browser_use import Browser
from bs4 import BeautifulSoup
# Initialize the browser
browser = Browser()
# Open the URL
browser.open(url)
# Get the page content
content = browser.get_page_source()
# Close the browser
browser.close()
# Parse the HTML content
soup = BeautifulSoup(content, 'html.parser')
# Extract relevant text (modify the selector as needed)
text_elements = soup.find_all(['main'])
text_content = ' '.join([element.get_text() for element in text_elements])
return text_content.strip()