import gradio as gr import requests from bs4 import BeautifulSoup import urllib.parse def fetch_website(url): try: headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0 Safari/537.36" } response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') title = soup.title.string if soup.title else "No Title" # Extract links from page base_url = urllib.parse.urljoin(url, '/') links = [ urllib.parse.urljoin(base_url, a.get('href')) for a in soup.find_all('a', href=True) ] return { "title": title, "url": url, "status_code": response.status_code, "content_preview": response.text[:1000], # First 1000 chars "links": "\n".join(links[:20]) # Show up to 20 links } except Exception as e: return { "title": "Error", "url": "", "status_code": 500, "content_preview": str(e), "links": "" } # UI Definition with gr.Blocks(theme="default") as demo: gr.Markdown("# 🌐 Proxy Website Surfer\nEnter a URL to fetch its contents through this HuggingFace-powered proxy.") with gr.Row(): url_input = gr.Textbox(label="Enter URL", placeholder="https://example.com") submit_btn = gr.Button("Fetch Site") with gr.Row(): title_output = gr.Textbox(label="Page Title") status_output = gr.Number(label="HTTP Status Code") content_output = gr.Textbox(label="Content Preview (first 1000 characters)", lines=10) link_output = gr.Textbox(label="Extracted Links", lines=15) def wrapper(url): result = fetch_website(url) return ( result["title"], result["status_code"], result["content_preview"], result["links"] ) submit_btn.click( fn=wrapper, inputs=url_input, outputs=[title_output, status_output, content_output, link_output] ) demo.launch()