Spaces:
Running
Running
import gradio as gr | |
import os | |
import time | |
import json | |
import requests | |
from firecrawl import FirecrawlApp | |
# Configuration | |
API_KEY = "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" | |
FIRECRAWL_API_KEY = "fc-b9c9f8db590f4ea99b122d93eaf5160b" | |
API_URL = "https://openrouter.ai/api/v1/chat/completions" | |
SCRAPED_DATA_DIR = "scraped_data" | |
HARDCODED_DATA_FILE = "innotechtitans_data.json" | |
os.makedirs(SCRAPED_DATA_DIR, exist_ok=True) | |
# Function to scrape website data | |
def scrape_data(url): | |
app = FirecrawlApp(api_key=FIRECRAWL_API_KEY) | |
scraped_data = app.scrape_url(url, {'pageOptions': {'onlyMainContent': True}}) | |
if 'markdown' not in scraped_data: | |
return "Error: Unable to scrape data." | |
domain_name = url.split("//")[-1].split("/")[0] | |
file_path = os.path.join(SCRAPED_DATA_DIR, f"{domain_name}.md") | |
with open(file_path, 'w', encoding='utf-8') as file: | |
file.write(scraped_data['markdown']) | |
return f"β Scraped data saved as {domain_name}.md. Load it to proceed." | |
# Function to load data from a markdown file | |
def load_data(file_name): | |
file_path = os.path.join(SCRAPED_DATA_DIR, file_name) | |
if not os.path.exists(file_path): | |
return "β Error: File not found." | |
with open(file_path, 'r', encoding='utf-8') as file: | |
global loaded_data | |
loaded_data = file.read() | |
return "β Data loaded successfully. You can now ask questions." | |
# Function to send a query to LLM | |
def ask_question(question): | |
if not loaded_data: | |
return "β οΈ No data loaded. Please scrape a website or load data first." | |
headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"} | |
payload = { | |
"model": "deepseek/deepseek-chat:free", | |
"messages": [{"role": "user", "content": f"{loaded_data}\n\n{question}"}] | |
} | |
response = requests.post(API_URL, json=payload, headers=headers) | |
if response.status_code == 200: | |
return response.json().get("choices", [{}])[0].get("message", {}).get("content", "No response.") | |
return "β Error: Unable to generate response." | |
# Gradio Interface | |
with gr.Blocks(theme=gr.themes.Default()) as demo: | |
gr.Markdown(""" | |
<h1 style='text-align: center;'>πΈοΈ Web Scraper & AI QnA</h1> | |
<p style='text-align: center; font-size: 18px;'>Scrape websites and ask AI-powered questions!</p> | |
""") | |
with gr.Tab("Scrape Website"): | |
with gr.Row(): | |
url_input = gr.Textbox(label="π Website URL", placeholder="Enter URL to scrape") | |
scrape_button = gr.Button("π Scrape", variant="primary") | |
scrape_output = gr.Markdown() | |
scrape_button.click(scrape_data, inputs=[url_input], outputs=[scrape_output]) | |
with gr.Tab("Load Data"): | |
with gr.Row(): | |
file_input = gr.Textbox(label="π Markdown File Name", placeholder="Enter filename (e.g., site.md)") | |
load_button = gr.Button("π₯ Load", variant="primary") | |
load_output = gr.Markdown() | |
load_button.click(load_data, inputs=[file_input], outputs=[load_output]) | |
with gr.Tab("Ask AI"): | |
with gr.Row(): | |
question_input = gr.Textbox(label="β Ask a Question", placeholder="Ask based on loaded data") | |
ask_button = gr.Button("π¬ Ask", variant="primary") | |
answer_output = gr.Markdown() | |
ask_button.click(ask_question, inputs=[question_input], outputs=[answer_output]) | |
demo.launch() | |