ahn1305 commited on
Commit
c54a725
Β·
1 Parent(s): 846be1b

initial commit

Browse files
Files changed (2) hide show
  1. requirements.txt +74 -0
  2. uqnaapp.py +89 -0
requirements.txt ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ annotated-types==0.7.0
3
+ anyio==4.4.0
4
+ beautifulsoup4==4.12.3
5
+ bs4==0.0.2
6
+ cachetools==5.5.0
7
+ certifi==2024.2.2
8
+ charset-normalizer==3.3.2
9
+ click==8.1.7
10
+ exceptiongroup==1.2.1
11
+ fastapi==0.115.3
12
+ ffmpy==0.4.0
13
+ filelock==3.15.4
14
+ firecrawl-py==0.0.12
15
+ fpdf==1.7.2
16
+ fsspec==2024.6.1
17
+ gradio==5.3.0
18
+ gradio_client==1.4.2
19
+ grpcio==1.66.0
20
+ grpcio-status==1.62.3
21
+ h11==0.14.0
22
+ httpcore==1.0.5
23
+ httplib2==0.22.0
24
+ httpx==0.26.0
25
+ huggingface-hub==0.26.1
26
+ idna==3.7
27
+ iniconfig==2.0.0
28
+ Jinja2==3.1.4
29
+ Markdown==3.7
30
+ markdown-it-py==3.0.0
31
+ MarkupSafe==2.1.5
32
+ mdurl==0.1.2
33
+ numpy==2.1.2
34
+ orjson==3.10.10
35
+ packaging==24.0
36
+ pandas==2.2.3
37
+ pillow==10.4.0
38
+ pluggy==1.5.0
39
+ proto-plus==1.24.0
40
+ protobuf==4.25.4
41
+ pyasn1==0.6.0
42
+ pyasn1_modules==0.4.0
43
+ pydantic==2.7.2
44
+ pydantic_core==2.18.3
45
+ pydub==0.25.1
46
+ Pygments==2.18.0
47
+ pyparsing==3.1.2
48
+ pytest==8.2.1
49
+ python-dateutil==2.9.0.post0
50
+ python-dotenv==1.0.1
51
+ python-multipart==0.0.12
52
+ pytz==2024.2
53
+ PyYAML==6.0.1
54
+ requests==2.32.3
55
+ responses==0.24.1
56
+ rich==13.9.3
57
+ rsa==4.9
58
+ ruff==0.7.1
59
+ semantic-version==2.10.0
60
+ shellingham==1.5.4
61
+ six==1.16.0
62
+ sniffio==1.3.1
63
+ soupsieve==2.5
64
+ starlette==0.41.0
65
+ tomli==2.0.1
66
+ tomlkit==0.12.0
67
+ tqdm==4.66.5
68
+ typer==0.12.5
69
+ typing_extensions==4.12.0
70
+ tzdata==2024.2
71
+ uritemplate==4.1.1
72
+ urllib3==2.2.1
73
+ uvicorn==0.32.0
74
+ websockets==12.0
uqnaapp.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import time
4
+ import json
5
+ import requests
6
+ from firecrawl import FirecrawlApp
7
+
8
+ # Configuration
9
+ API_KEY = "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"
10
+ FIRECRAWL_API_KEY = "fc-b9c9f8db590f4ea99b122d93eaf5160b"
11
+ API_URL = "https://openrouter.ai/api/v1/chat/completions"
12
+ SCRAPED_DATA_DIR = "scraped_data"
13
+ HARDCODED_DATA_FILE = "innotechtitans_data.json"
14
+ os.makedirs(SCRAPED_DATA_DIR, exist_ok=True)
15
+
16
+ # Function to scrape website data
17
+ def scrape_data(url):
18
+ app = FirecrawlApp(api_key=FIRECRAWL_API_KEY)
19
+ scraped_data = app.scrape_url(url, {'pageOptions': {'onlyMainContent': True}})
20
+
21
+ if 'markdown' not in scraped_data:
22
+ return "Error: Unable to scrape data."
23
+
24
+ domain_name = url.split("//")[-1].split("/")[0]
25
+ file_path = os.path.join(SCRAPED_DATA_DIR, f"{domain_name}.md")
26
+
27
+ with open(file_path, 'w', encoding='utf-8') as file:
28
+ file.write(scraped_data['markdown'])
29
+
30
+ return f"βœ… Scraped data saved as {domain_name}.md. Load it to proceed."
31
+
32
+ # Function to load data from a markdown file
33
+ def load_data(file_name):
34
+ file_path = os.path.join(SCRAPED_DATA_DIR, file_name)
35
+ if not os.path.exists(file_path):
36
+ return "❌ Error: File not found."
37
+
38
+ with open(file_path, 'r', encoding='utf-8') as file:
39
+ global loaded_data
40
+ loaded_data = file.read()
41
+
42
+ return "βœ… Data loaded successfully. You can now ask questions."
43
+
44
+ # Function to send a query to LLM
45
+ def ask_question(question):
46
+ if not loaded_data:
47
+ return "⚠️ No data loaded. Please scrape a website or load data first."
48
+
49
+ headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"}
50
+ payload = {
51
+ "model": "deepseek/deepseek-chat:free",
52
+ "messages": [{"role": "user", "content": f"{loaded_data}\n\n{question}"}]
53
+ }
54
+
55
+ response = requests.post(API_URL, json=payload, headers=headers)
56
+
57
+ if response.status_code == 200:
58
+ return response.json().get("choices", [{}])[0].get("message", {}).get("content", "No response.")
59
+ return "❌ Error: Unable to generate response."
60
+
61
+ # Gradio Interface
62
+ with gr.Blocks(theme=gr.themes.Default()) as demo:
63
+ gr.Markdown("""
64
+ <h1 style='text-align: center;'>πŸ•ΈοΈ Web Scraper & AI QnA</h1>
65
+ <p style='text-align: center; font-size: 18px;'>Scrape websites and ask AI-powered questions!</p>
66
+ """)
67
+
68
+ with gr.Tab("Scrape Website"):
69
+ with gr.Row():
70
+ url_input = gr.Textbox(label="🌐 Website URL", placeholder="Enter URL to scrape")
71
+ scrape_button = gr.Button("πŸš€ Scrape", variant="primary")
72
+ scrape_output = gr.Markdown()
73
+ scrape_button.click(scrape_data, inputs=[url_input], outputs=[scrape_output])
74
+
75
+ with gr.Tab("Load Data"):
76
+ with gr.Row():
77
+ file_input = gr.Textbox(label="πŸ“‚ Markdown File Name", placeholder="Enter filename (e.g., site.md)")
78
+ load_button = gr.Button("πŸ“₯ Load", variant="primary")
79
+ load_output = gr.Markdown()
80
+ load_button.click(load_data, inputs=[file_input], outputs=[load_output])
81
+
82
+ with gr.Tab("Ask AI"):
83
+ with gr.Row():
84
+ question_input = gr.Textbox(label="❓ Ask a Question", placeholder="Ask based on loaded data")
85
+ ask_button = gr.Button("πŸ’¬ Ask", variant="primary")
86
+ answer_output = gr.Markdown()
87
+ ask_button.click(ask_question, inputs=[question_input], outputs=[answer_output])
88
+
89
+ demo.launch()