pxy / app.py
dkescape's picture
Create app.py
7437948 verified
import gradio as gr
import requests
from bs4 import BeautifulSoup
import urllib.parse
def fetch_website(url):
try:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0 Safari/537.36"
}
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
title = soup.title.string if soup.title else "No Title"
# Extract links from page
base_url = urllib.parse.urljoin(url, '/')
links = [
urllib.parse.urljoin(base_url, a.get('href'))
for a in soup.find_all('a', href=True)
]
return {
"title": title,
"url": url,
"status_code": response.status_code,
"content_preview": response.text[:1000], # First 1000 chars
"links": "\n".join(links[:20]) # Show up to 20 links
}
except Exception as e:
return {
"title": "Error",
"url": "",
"status_code": 500,
"content_preview": str(e),
"links": ""
}
# UI Definition
with gr.Blocks(theme="default") as demo:
gr.Markdown("# 🌐 Proxy Website Surfer\nEnter a URL to fetch its contents through this HuggingFace-powered proxy.")
with gr.Row():
url_input = gr.Textbox(label="Enter URL", placeholder="https://example.com")
submit_btn = gr.Button("Fetch Site")
with gr.Row():
title_output = gr.Textbox(label="Page Title")
status_output = gr.Number(label="HTTP Status Code")
content_output = gr.Textbox(label="Content Preview (first 1000 characters)", lines=10)
link_output = gr.Textbox(label="Extracted Links", lines=15)
def wrapper(url):
result = fetch_website(url)
return (
result["title"],
result["status_code"],
result["content_preview"],
result["links"]
)
submit_btn.click(
fn=wrapper,
inputs=url_input,
outputs=[title_output, status_output, content_output, link_output]
)
demo.launch()