Krithikesh77 commited on
Commit
99b894b
·
verified ·
1 Parent(s): f50e910

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +57 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders import WebBaseLoader
2
+ from langchain_google_genai import ChatGoogleGenerativeAI
3
+ from langchain.agents import AgentExecutor, create_react_agent
4
+ from langchain import hub
5
+ from langchain.tools import tool
6
+ from dotenv import load_dotenv
7
+ import os
8
+ import ast
9
+
10
+ load_dotenv()
11
+ google_api_key = os.getenv("GOOGLE_API_KEY")
12
+ model = "gemini-2.5-flash"
13
+
14
+ @tool
15
+ def web_scrape_tool(urls: str) -> str:
16
+ """
17
+ Scrapes content from a list of URLs.
18
+ The input should be a string representation of a python list of URLs
19
+ (e.g., "['https://hereandnowai.com', 'https://hereandnow.co.in']")
20
+ Returns the concatenated text content of all scraped pages.
21
+ """
22
+ try:
23
+ url_list = ast.literal_eval(urls)
24
+ if not isinstance(url_list, list) or not all(isinstance(url, str) for url in url_list):
25
+ return "Invalid input format. please provide a list of URLs as a string (e.g., \"['https://hereandnowai.com', 'https://hereandnow.co.in']\")"
26
+ except (ValueError, SyntaxError):
27
+ return "Invalid input format. please provide a list of URLs as a string (e.g., \"['https://hereandnowai.com', 'https://hereandnow.co.in']\")"
28
+
29
+ combined_content = []
30
+ for url in url_list:
31
+ try:
32
+ loader = WebBaseLoader(
33
+ [url], requests_kwargs={"headers": {"User-Agent": "Caramel AI"}}
34
+ )
35
+ documents = loader.load()
36
+ for doc in documents:
37
+ combined_content.append(doc.page_content)
38
+ except Exception as e:
39
+ combined_content.append(f"Could not scrape {url}. Error: {e}")
40
+ return "\n\n".join(combined_content)
41
+
42
+ def run_web_scraping_agent():
43
+ """
44
+ Creates and runs an agent that can use the web scrape tool
45
+ """
46
+ llm = ChatGoogleGenerativeAI(model=model, google_api_key=google_api_key)
47
+ tools = [web_scrape_tool]
48
+ prompt = hub.pull("hwchase17/react")
49
+ agent = create_react_agent(llm, tools, prompt)
50
+ agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True, handle_parsing_errors=True)
51
+ print("\n--- Query 1: Get content from the home page ---")
52
+ question_home_page = "What is the cto of HERE AND NOW AI? The url is https://hereandnowai.com/about-here-and-now-ai/"
53
+ response_home_page = agent_executor.invoke({"input": question_home_page})
54
+ print(f"Agent's response: {response_home_page['output']}")
55
+
56
+ if __name__ == "__main__":
57
+ run_web_scraping_agent()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ langchain
2
+ langchain-google-genai
3
+ langchain-community
4
+ python-dotenv
5
+ gradio
6
+ google-generativeai