Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- app.py +57 -0
- requirements.txt +6 -0
app.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_community.document_loaders import WebBaseLoader
|
2 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
3 |
+
from langchain.agents import AgentExecutor, create_react_agent
|
4 |
+
from langchain import hub
|
5 |
+
from langchain.tools import tool
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
import os
|
8 |
+
import ast
|
9 |
+
|
10 |
+
load_dotenv()
|
11 |
+
google_api_key = os.getenv("GOOGLE_API_KEY")
|
12 |
+
model = "gemini-2.5-flash"
|
13 |
+
|
14 |
+
@tool
|
15 |
+
def web_scrape_tool(urls: str) -> str:
|
16 |
+
"""
|
17 |
+
Scrapes content from a list of URLs.
|
18 |
+
The input should be a string representation of a python list of URLs
|
19 |
+
(e.g., "['https://hereandnowai.com', 'https://hereandnow.co.in']")
|
20 |
+
Returns the concatenated text content of all scraped pages.
|
21 |
+
"""
|
22 |
+
try:
|
23 |
+
url_list = ast.literal_eval(urls)
|
24 |
+
if not isinstance(url_list, list) or not all(isinstance(url, str) for url in url_list):
|
25 |
+
return "Invalid input format. please provide a list of URLs as a string (e.g., \"['https://hereandnowai.com', 'https://hereandnow.co.in']\")"
|
26 |
+
except (ValueError, SyntaxError):
|
27 |
+
return "Invalid input format. please provide a list of URLs as a string (e.g., \"['https://hereandnowai.com', 'https://hereandnow.co.in']\")"
|
28 |
+
|
29 |
+
combined_content = []
|
30 |
+
for url in url_list:
|
31 |
+
try:
|
32 |
+
loader = WebBaseLoader(
|
33 |
+
[url], requests_kwargs={"headers": {"User-Agent": "Caramel AI"}}
|
34 |
+
)
|
35 |
+
documents = loader.load()
|
36 |
+
for doc in documents:
|
37 |
+
combined_content.append(doc.page_content)
|
38 |
+
except Exception as e:
|
39 |
+
combined_content.append(f"Could not scrape {url}. Error: {e}")
|
40 |
+
return "\n\n".join(combined_content)
|
41 |
+
|
42 |
+
def run_web_scraping_agent():
|
43 |
+
"""
|
44 |
+
Creates and runs an agent that can use the web scrape tool
|
45 |
+
"""
|
46 |
+
llm = ChatGoogleGenerativeAI(model=model, google_api_key=google_api_key)
|
47 |
+
tools = [web_scrape_tool]
|
48 |
+
prompt = hub.pull("hwchase17/react")
|
49 |
+
agent = create_react_agent(llm, tools, prompt)
|
50 |
+
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True, handle_parsing_errors=True)
|
51 |
+
print("\n--- Query 1: Get content from the home page ---")
|
52 |
+
question_home_page = "What is the cto of HERE AND NOW AI? The url is https://hereandnowai.com/about-here-and-now-ai/"
|
53 |
+
response_home_page = agent_executor.invoke({"input": question_home_page})
|
54 |
+
print(f"Agent's response: {response_home_page['output']}")
|
55 |
+
|
56 |
+
if __name__ == "__main__":
|
57 |
+
run_web_scraping_agent()
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain
|
2 |
+
langchain-google-genai
|
3 |
+
langchain-community
|
4 |
+
python-dotenv
|
5 |
+
gradio
|
6 |
+
google-generativeai
|