idajikuu
/

AI-HUMAN-detector

Text Classification

Inference Endpoints

Model card Files Files and versions Community

AI-HUMAN-detector / functions.py

idajikuu's picture

Update functions.py

65f3039 almost 2 years ago

history blame contribute delete

2.35 kB

	from urllib.request import urlopen, Request
	from urllib.error import URLError
	from bs4 import BeautifulSoup
	import re
	def scrape_website(url):
	try:
	# Set the user agent header
	headers = {
	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
	}

	# Create a request object with the URL and headers
	request = Request(url, headers=headers)

	# Send the GET request to the specified URL
	response = urlopen(request)

	# Check if the request was successful
	if response.getcode() == 200:
	# Read the response content
	content = response.read()

	# Create a BeautifulSoup object with the HTML content
	soup = BeautifulSoup(content, 'html.parser')

	# Find and extract the desired text from the webpage
	text_elements = soup.find_all('p') # Extract all <p> elements, you can customize this based on your needs

	# Append the extracted text to a string
	result = ""
	for element in text_elements:
	text = re.sub('<.*?>', '', str(element)) # Remove HTML tags using regular expressions
	result += text + " "

	return result.strip() # Remove leading/trailing spaces and return the result string
	else:
	print("Failed to retrieve the webpage.")
	except URLError as e:
	print("An error occurred while making the request:", e)
	def segmentation(text): # Maximum length for each chunk
	total_predictions = 0
	human_written_count = 0
	ai_generated_count = 0
	# Loop over the text in chunks of maximum length
	for i in range(0, len(text), max_length):
	chunk = text[i:i+max_length]

	prob, predicted_label = predict(chunk) # Call the predict function

	# Count the predictions
	total_predictions += 1
	if predicted_label == 'Human-Written':
	human_written_count += 1
	elif predicted_label == 'AI-Generated':
	ai_generated_count += 1

	# Calculate probabilities
	human_written_prob = human_written_count / total_predictions
	ai_generated_prob = ai_generated_count / total_predictions
	return human_written_prob,ai_generated_prob