File size: 2,351 Bytes

from urllib.request import urlopen, Request
from urllib.error import URLError
from bs4 import BeautifulSoup
import re
def scrape_website(url):
    try:
        # Set the user agent header
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
        }

        # Create a request object with the URL and headers
        request = Request(url, headers=headers)

        # Send the GET request to the specified URL
        response = urlopen(request)

        # Check if the request was successful
        if response.getcode() == 200:
            # Read the response content
            content = response.read()

            # Create a BeautifulSoup object with the HTML content
            soup = BeautifulSoup(content, 'html.parser')

            # Find and extract the desired text from the webpage
            text_elements = soup.find_all('p')  # Extract all <p> elements, you can customize this based on your needs

            # Append the extracted text to a string
            result = ""
            for element in text_elements:
                text = re.sub('<.*?>', '', str(element))  # Remove HTML tags using regular expressions
                result += text + " "

            return result.strip()  # Remove leading/trailing spaces and return the result string
        else:
            print("Failed to retrieve the webpage.")
    except URLError as e:
        print("An error occurred while making the request:", e)
def segmentation(text):  # Maximum length for each chunk
    total_predictions = 0
    human_written_count = 0
    ai_generated_count = 0
    # Loop over the text in chunks of maximum length
    for i in range(0, len(text), max_length):
        chunk = text[i:i+max_length]
        
        prob, predicted_label = predict(chunk)  # Call the predict function
        
        # Count the predictions
        total_predictions += 1
        if predicted_label == 'Human-Written':
            human_written_count += 1
        elif predicted_label == 'AI-Generated':
            ai_generated_count += 1

    # Calculate probabilities
    human_written_prob = human_written_count / total_predictions
    ai_generated_prob = ai_generated_count / total_predictions
    return human_written_prob,ai_generated_prob