File size: 2,351 Bytes
9b75ad5 5b007f9 e97b5b6 5b007f9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
from urllib.request import urlopen, Request
from urllib.error import URLError
from bs4 import BeautifulSoup
import re
def scrape_website(url):
try:
# Set the user agent header
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
}
# Create a request object with the URL and headers
request = Request(url, headers=headers)
# Send the GET request to the specified URL
response = urlopen(request)
# Check if the request was successful
if response.getcode() == 200:
# Read the response content
content = response.read()
# Create a BeautifulSoup object with the HTML content
soup = BeautifulSoup(content, 'html.parser')
# Find and extract the desired text from the webpage
text_elements = soup.find_all('p') # Extract all <p> elements, you can customize this based on your needs
# Append the extracted text to a string
result = ""
for element in text_elements:
text = re.sub('<.*?>', '', str(element)) # Remove HTML tags using regular expressions
result += text + " "
return result.strip() # Remove leading/trailing spaces and return the result string
else:
print("Failed to retrieve the webpage.")
except URLError as e:
print("An error occurred while making the request:", e)
def segmentation(text): # Maximum length for each chunk
total_predictions = 0
human_written_count = 0
ai_generated_count = 0
# Loop over the text in chunks of maximum length
for i in range(0, len(text), max_length):
chunk = text[i:i+max_length]
prob, predicted_label = predict(chunk) # Call the predict function
# Count the predictions
total_predictions += 1
if predicted_label == 'Human-Written':
human_written_count += 1
elif predicted_label == 'AI-Generated':
ai_generated_count += 1
# Calculate probabilities
human_written_prob = human_written_count / total_predictions
ai_generated_prob = ai_generated_count / total_predictions
return human_written_prob,ai_generated_prob
|