Spaces:
Build error
Build error
import requests | |
from bs4 import BeautifulSoup | |
try: | |
from ..character import set_website_content | |
except: | |
from gpt_computer_agent.character import set_website_content | |
def train(url: str) -> bool: | |
try: | |
# Go to url and extract these elements | |
meta_properties = [ | |
"og:description", | |
"og:site_name", | |
"og:title", | |
"og:type", | |
"og:url", | |
] | |
# Fetch the webpage content | |
response = requests.get(url) | |
soup = BeautifulSoup(response.content, "html.parser") | |
# Extract the meta tags | |
meta_tags = soup.find_all("meta") | |
# Initialize the data dictionary | |
data = {} | |
# Loop through the meta tags and extract the content | |
for tag in meta_tags: | |
if tag.get("property") in meta_properties: | |
data[tag.get("property")] = tag.get("content") | |
# Also add the other useful information texts from the webpage | |
data["title"] = soup.title.string | |
data["h1"] = soup.h1.string | |
data["p"] = soup.p.string | |
text = soup.get_text(separator="\n", strip=True) | |
data["text"] = text | |
data["url"] = url | |
# Now create an string with good looking like this | |
# Title: {title} | |
the_string = "" | |
for key, value in data.items(): | |
the_string += f"{key}: {value}\n" | |
set_website_content(the_string) | |
return True | |
except Exception as e: | |
return e | |