ahn1305's picture
file name change
b875e40
import gradio as gr
import os
import time
import json
import requests
from firecrawl import FirecrawlApp
# Configuration
API_KEY = "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"
FIRECRAWL_API_KEY = "fc-b9c9f8db590f4ea99b122d93eaf5160b"
API_URL = "https://openrouter.ai/api/v1/chat/completions"
SCRAPED_DATA_DIR = "scraped_data"
HARDCODED_DATA_FILE = "innotechtitans_data.json"
os.makedirs(SCRAPED_DATA_DIR, exist_ok=True)
# Function to scrape website data
def scrape_data(url):
app = FirecrawlApp(api_key=FIRECRAWL_API_KEY)
scraped_data = app.scrape_url(url, {'pageOptions': {'onlyMainContent': True}})
if 'markdown' not in scraped_data:
return "Error: Unable to scrape data."
domain_name = url.split("//")[-1].split("/")[0]
file_path = os.path.join(SCRAPED_DATA_DIR, f"{domain_name}.md")
with open(file_path, 'w', encoding='utf-8') as file:
file.write(scraped_data['markdown'])
return f"βœ… Scraped data saved as {domain_name}.md. Load it to proceed."
# Function to load data from a markdown file
def load_data(file_name):
file_path = os.path.join(SCRAPED_DATA_DIR, file_name)
if not os.path.exists(file_path):
return "❌ Error: File not found."
with open(file_path, 'r', encoding='utf-8') as file:
global loaded_data
loaded_data = file.read()
return "βœ… Data loaded successfully. You can now ask questions."
# Function to send a query to LLM
def ask_question(question):
if not loaded_data:
return "⚠️ No data loaded. Please scrape a website or load data first."
headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"}
payload = {
"model": "deepseek/deepseek-chat:free",
"messages": [{"role": "user", "content": f"{loaded_data}\n\n{question}"}]
}
response = requests.post(API_URL, json=payload, headers=headers)
if response.status_code == 200:
return response.json().get("choices", [{}])[0].get("message", {}).get("content", "No response.")
return "❌ Error: Unable to generate response."
# Gradio Interface
with gr.Blocks(theme=gr.themes.Default()) as demo:
gr.Markdown("""
<h1 style='text-align: center;'>πŸ•ΈοΈ Web Scraper & AI QnA</h1>
<p style='text-align: center; font-size: 18px;'>Scrape websites and ask AI-powered questions!</p>
""")
with gr.Tab("Scrape Website"):
with gr.Row():
url_input = gr.Textbox(label="🌐 Website URL", placeholder="Enter URL to scrape")
scrape_button = gr.Button("πŸš€ Scrape", variant="primary")
scrape_output = gr.Markdown()
scrape_button.click(scrape_data, inputs=[url_input], outputs=[scrape_output])
with gr.Tab("Load Data"):
with gr.Row():
file_input = gr.Textbox(label="πŸ“‚ Markdown File Name", placeholder="Enter filename (e.g., site.md)")
load_button = gr.Button("πŸ“₯ Load", variant="primary")
load_output = gr.Markdown()
load_button.click(load_data, inputs=[file_input], outputs=[load_output])
with gr.Tab("Ask AI"):
with gr.Row():
question_input = gr.Textbox(label="❓ Ask a Question", placeholder="Ask based on loaded data")
ask_button = gr.Button("πŸ’¬ Ask", variant="primary")
answer_output = gr.Markdown()
ask_button.click(ask_question, inputs=[question_input], outputs=[answer_output])
demo.launch()