AI-HUMAN-detector / functions.py
idajikuu's picture
Update functions.py
65f3039
from urllib.request import urlopen, Request
from urllib.error import URLError
from bs4 import BeautifulSoup
import re
def scrape_website(url):
try:
# Set the user agent header
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
}
# Create a request object with the URL and headers
request = Request(url, headers=headers)
# Send the GET request to the specified URL
response = urlopen(request)
# Check if the request was successful
if response.getcode() == 200:
# Read the response content
content = response.read()
# Create a BeautifulSoup object with the HTML content
soup = BeautifulSoup(content, 'html.parser')
# Find and extract the desired text from the webpage
text_elements = soup.find_all('p') # Extract all <p> elements, you can customize this based on your needs
# Append the extracted text to a string
result = ""
for element in text_elements:
text = re.sub('<.*?>', '', str(element)) # Remove HTML tags using regular expressions
result += text + " "
return result.strip() # Remove leading/trailing spaces and return the result string
else:
print("Failed to retrieve the webpage.")
except URLError as e:
print("An error occurred while making the request:", e)
def segmentation(text): # Maximum length for each chunk
total_predictions = 0
human_written_count = 0
ai_generated_count = 0
# Loop over the text in chunks of maximum length
for i in range(0, len(text), max_length):
chunk = text[i:i+max_length]
prob, predicted_label = predict(chunk) # Call the predict function
# Count the predictions
total_predictions += 1
if predicted_label == 'Human-Written':
human_written_count += 1
elif predicted_label == 'AI-Generated':
ai_generated_count += 1
# Calculate probabilities
human_written_prob = human_written_count / total_predictions
ai_generated_prob = ai_generated_count / total_predictions
return human_written_prob,ai_generated_prob