Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -8,23 +8,32 @@ import gradio as gr
|
|
8 |
import os
|
9 |
import ast
|
10 |
|
|
|
11 |
load_dotenv()
|
12 |
google_api_key = os.getenv("GOOGLE_API_KEY")
|
|
|
13 |
model = "gemini-2.5-flash"
|
14 |
|
15 |
@tool
|
16 |
def web_scrape_tool(urls: str) -> str:
|
|
|
|
|
|
|
|
|
17 |
try:
|
18 |
url_list = ast.literal_eval(urls)
|
19 |
if not isinstance(url_list, list) or not all(isinstance(url, str) for url in url_list):
|
20 |
return "Invalid input format. Please provide a list of URLs as a string."
|
21 |
-
except
|
22 |
return "Invalid input format. Please provide a list of URLs as a string."
|
23 |
-
|
24 |
combined_content = []
|
25 |
for url in url_list:
|
26 |
try:
|
27 |
-
loader = WebBaseLoader(
|
|
|
|
|
|
|
28 |
documents = loader.load()
|
29 |
for doc in documents:
|
30 |
combined_content.append(doc.page_content)
|
@@ -32,24 +41,26 @@ def web_scrape_tool(urls: str) -> str:
|
|
32 |
combined_content.append(f"Could not scrape {url}. Error: {e}")
|
33 |
return "\n\n".join(combined_content)
|
34 |
|
35 |
-
#
|
36 |
llm = ChatGoogleGenerativeAI(model=model, google_api_key=google_api_key)
|
37 |
tools = [web_scrape_tool]
|
38 |
prompt = hub.pull("hwchase17/react")
|
39 |
-
agent = create_react_agent(llm, tools, prompt)
|
40 |
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=False, handle_parsing_errors=True)
|
41 |
|
42 |
-
# Gradio interface
|
43 |
-
def ask_agent(
|
44 |
try:
|
45 |
-
result = agent_executor.invoke({"input":
|
46 |
-
return result[
|
47 |
except Exception as e:
|
48 |
-
return f"Error: {
|
49 |
|
|
|
50 |
gr.Interface(
|
51 |
fn=ask_agent,
|
52 |
-
inputs=gr.Textbox(
|
53 |
-
outputs=gr.Textbox(label="Agent's Response"),
|
54 |
-
title="Web Scraping Agent
|
|
|
55 |
).launch()
|
|
|
8 |
import os
|
9 |
import ast
|
10 |
|
11 |
+
# Load environment variables
|
12 |
load_dotenv()
|
13 |
google_api_key = os.getenv("GOOGLE_API_KEY")
|
14 |
+
user_agent = os.getenv("USER_AGENT", "Caramel AI Bot")
|
15 |
model = "gemini-2.5-flash"
|
16 |
|
17 |
@tool
|
18 |
def web_scrape_tool(urls: str) -> str:
|
19 |
+
"""Scrapes content from a list of URLs.
|
20 |
+
The input should be a string representation of a Python list of URLs
|
21 |
+
(e.g., "['https://hereandnowai.com', 'https://hereandnow.co.in']").
|
22 |
+
Returns the concatenated text content of all scraped pages."""
|
23 |
try:
|
24 |
url_list = ast.literal_eval(urls)
|
25 |
if not isinstance(url_list, list) or not all(isinstance(url, str) for url in url_list):
|
26 |
return "Invalid input format. Please provide a list of URLs as a string."
|
27 |
+
except (ValueError, SyntaxError):
|
28 |
return "Invalid input format. Please provide a list of URLs as a string."
|
29 |
+
|
30 |
combined_content = []
|
31 |
for url in url_list:
|
32 |
try:
|
33 |
+
loader = WebBaseLoader(
|
34 |
+
[url],
|
35 |
+
requests_kwargs={"headers": {"User-Agent": user_agent}},
|
36 |
+
)
|
37 |
documents = loader.load()
|
38 |
for doc in documents:
|
39 |
combined_content.append(doc.page_content)
|
|
|
41 |
combined_content.append(f"Could not scrape {url}. Error: {e}")
|
42 |
return "\n\n".join(combined_content)
|
43 |
|
44 |
+
# Setup the LLM and Agent
|
45 |
llm = ChatGoogleGenerativeAI(model=model, google_api_key=google_api_key)
|
46 |
tools = [web_scrape_tool]
|
47 |
prompt = hub.pull("hwchase17/react")
|
48 |
+
agent = create_react_agent(llm=llm, tools=tools, prompt=prompt)
|
49 |
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=False, handle_parsing_errors=True)
|
50 |
|
51 |
+
# Gradio interface function
|
52 |
+
def ask_agent(user_input):
|
53 |
try:
|
54 |
+
result = agent_executor.invoke({"input": user_input})
|
55 |
+
return result["output"]
|
56 |
except Exception as e:
|
57 |
+
return f"❌ Error: {e}"
|
58 |
|
59 |
+
# Launch Gradio app
|
60 |
gr.Interface(
|
61 |
fn=ask_agent,
|
62 |
+
inputs=gr.Textbox(lines=2, placeholder="Ask a question with a URL...", label="Your Query"),
|
63 |
+
outputs=gr.Textbox(lines=10, label="Agent's Response"),
|
64 |
+
title="Gemini Web Scraping Agent",
|
65 |
+
description="Ask a question about one or more web pages. Include URLs in the question."
|
66 |
).launch()
|