from urllib.request import urlopen, Request from urllib.error import URLError from bs4 import BeautifulSoup import re def scrape_website(url): try: # Set the user agent header headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36" } # Create a request object with the URL and headers request = Request(url, headers=headers) # Send the GET request to the specified URL response = urlopen(request) # Check if the request was successful if response.getcode() == 200: # Read the response content content = response.read() # Create a BeautifulSoup object with the HTML content soup = BeautifulSoup(content, 'html.parser') # Find and extract the desired text from the webpage text_elements = soup.find_all('p') # Extract all
elements, you can customize this based on your needs # Append the extracted text to a string result = "" for element in text_elements: text = re.sub('<.*?>', '', str(element)) # Remove HTML tags using regular expressions result += text + " " return result.strip() # Remove leading/trailing spaces and return the result string else: print("Failed to retrieve the webpage.") except URLError as e: print("An error occurred while making the request:", e) def segmentation(text): # Maximum length for each chunk total_predictions = 0 human_written_count = 0 ai_generated_count = 0 # Loop over the text in chunks of maximum length for i in range(0, len(text), max_length): chunk = text[i:i+max_length] prob, predicted_label = predict(chunk) # Call the predict function # Count the predictions total_predictions += 1 if predicted_label == 'Human-Written': human_written_count += 1 elif predicted_label == 'AI-Generated': ai_generated_count += 1 # Calculate probabilities human_written_prob = human_written_count / total_predictions ai_generated_prob = ai_generated_count / total_predictions return human_written_prob,ai_generated_prob