|
import gradio as gr |
|
import requests |
|
from bs4 import BeautifulSoup |
|
import urllib.parse |
|
|
|
def fetch_website(url): |
|
try: |
|
headers = { |
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0 Safari/537.36" |
|
} |
|
response = requests.get(url, headers=headers, timeout=10) |
|
response.raise_for_status() |
|
|
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
title = soup.title.string if soup.title else "No Title" |
|
|
|
|
|
base_url = urllib.parse.urljoin(url, '/') |
|
links = [ |
|
urllib.parse.urljoin(base_url, a.get('href')) |
|
for a in soup.find_all('a', href=True) |
|
] |
|
|
|
return { |
|
"title": title, |
|
"url": url, |
|
"status_code": response.status_code, |
|
"content_preview": response.text[:1000], |
|
"links": "\n".join(links[:20]) |
|
} |
|
except Exception as e: |
|
return { |
|
"title": "Error", |
|
"url": "", |
|
"status_code": 500, |
|
"content_preview": str(e), |
|
"links": "" |
|
} |
|
|
|
|
|
with gr.Blocks(theme="default") as demo: |
|
gr.Markdown("# π Proxy Website Surfer\nEnter a URL to fetch its contents through this HuggingFace-powered proxy.") |
|
|
|
with gr.Row(): |
|
url_input = gr.Textbox(label="Enter URL", placeholder="https://example.com") |
|
|
|
submit_btn = gr.Button("Fetch Site") |
|
|
|
with gr.Row(): |
|
title_output = gr.Textbox(label="Page Title") |
|
status_output = gr.Number(label="HTTP Status Code") |
|
|
|
content_output = gr.Textbox(label="Content Preview (first 1000 characters)", lines=10) |
|
link_output = gr.Textbox(label="Extracted Links", lines=15) |
|
|
|
def wrapper(url): |
|
result = fetch_website(url) |
|
return ( |
|
result["title"], |
|
result["status_code"], |
|
result["content_preview"], |
|
result["links"] |
|
) |
|
|
|
submit_btn.click( |
|
fn=wrapper, |
|
inputs=url_input, |
|
outputs=[title_output, status_output, content_output, link_output] |
|
) |
|
|
|
demo.launch() |