|
from urllib.request import urlopen, Request |
|
from urllib.error import URLError |
|
from bs4 import BeautifulSoup |
|
import re |
|
def scrape_website(url): |
|
try: |
|
|
|
headers = { |
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36" |
|
} |
|
|
|
|
|
request = Request(url, headers=headers) |
|
|
|
|
|
response = urlopen(request) |
|
|
|
|
|
if response.getcode() == 200: |
|
|
|
content = response.read() |
|
|
|
|
|
soup = BeautifulSoup(content, 'html.parser') |
|
|
|
|
|
text_elements = soup.find_all('p') |
|
|
|
|
|
result = "" |
|
for element in text_elements: |
|
text = re.sub('<.*?>', '', str(element)) |
|
result += text + " " |
|
|
|
return result.strip() |
|
else: |
|
print("Failed to retrieve the webpage.") |
|
except URLError as e: |
|
print("An error occurred while making the request:", e) |
|
def segmentation(text): |
|
total_predictions = 0 |
|
human_written_count = 0 |
|
ai_generated_count = 0 |
|
|
|
for i in range(0, len(text), max_length): |
|
chunk = text[i:i+max_length] |
|
|
|
prob, predicted_label = predict(chunk) |
|
|
|
|
|
total_predictions += 1 |
|
if predicted_label == 'Human-Written': |
|
human_written_count += 1 |
|
elif predicted_label == 'AI-Generated': |
|
ai_generated_count += 1 |
|
|
|
|
|
human_written_prob = human_written_count / total_predictions |
|
ai_generated_prob = ai_generated_count / total_predictions |
|
return human_written_prob,ai_generated_prob |
|
|