Krithikesh77 commited on
Commit
fd204d9
·
verified ·
1 Parent(s): 83fcef5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -35
app.py CHANGED
@@ -1,66 +1,70 @@
1
- from langchain_community.document_loaders import WebBaseLoader
2
  from langchain_google_genai import ChatGoogleGenerativeAI
 
 
 
3
  from langchain.agents import AgentExecutor, create_react_agent
4
  from langchain import hub
5
  from langchain.tools import tool
6
- from dotenv import load_dotenv
7
- import gradio as gr
8
- import os
9
  import ast
 
10
 
11
  # Load environment variables
12
  load_dotenv()
13
- google_api_key = os.getenv("GOOGLE_API_KEY")
14
- user_agent = os.getenv("USER_AGENT", "Caramel AI Bot")
15
- model = "gemini-2.5-flash"
16
 
17
  @tool
18
- def web_scrape_tool(urls: str) -> str:
19
- """Scrapes content from a list of URLs.
20
- The input should be a string representation of a Python list of URLs
21
- (e.g., "['https://hereandnowai.com', 'https://hereandnow.co.in']").
22
- Returns the concatenated text content of all scraped pages."""
23
  try:
24
- url_list = ast.literal_eval(urls)
25
  if not isinstance(url_list, list) or not all(isinstance(url, str) for url in url_list):
26
- return "Invalid input format. Please provide a list of URLs as a string."
27
- except (ValueError, SyntaxError):
28
- return "Invalid input format. Please provide a list of URLs as a string."
29
-
30
  combined_content = []
31
  for url in url_list:
32
  try:
33
  loader = WebBaseLoader(
34
  [url],
35
- requests_kwargs={"headers": {"User-Agent": user_agent}},
36
  )
37
  documents = loader.load()
38
  for doc in documents:
39
  combined_content.append(doc.page_content)
40
  except Exception as e:
41
- combined_content.append(f"Could not scrape {url}. Error: {e}")
42
- return "\n\n".join(combined_content)
 
43
 
44
- # Setup the LLM and Agent
45
- llm = ChatGoogleGenerativeAI(model=model, google_api_key=google_api_key)
46
- tools = [web_scrape_tool]
47
  prompt = hub.pull("hwchase17/react")
48
- agent = create_react_agent(llm=llm, tools=tools, prompt=prompt)
49
  agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=False, handle_parsing_errors=True)
50
 
51
- # Gradio interface function
52
  def ask_agent(user_input):
53
  try:
54
- result = agent_executor.invoke({"input": user_input})
55
- return result["output"]
56
  except Exception as e:
57
- return f"Error: {e}"
58
 
59
- # Launch Gradio app
60
- gr.Interface(
61
  fn=ask_agent,
62
- inputs=gr.Textbox(lines=2, placeholder="Ask a question with a URL...", label="Your Query"),
63
- outputs=gr.Textbox(lines=10, label="Agent's Response"),
64
- title="Gemini Web Scraping Agent",
65
- description="Ask a question about one or more web pages. Include URLs in the question."
66
- ).launch()
 
 
 
 
1
+ import gradio as gr
2
  from langchain_google_genai import ChatGoogleGenerativeAI
3
+ from dotenv import load_dotenv
4
+ import os
5
+ from langchain_community.document_loaders import WebBaseLoader
6
  from langchain.agents import AgentExecutor, create_react_agent
7
  from langchain import hub
8
  from langchain.tools import tool
 
 
 
9
  import ast
10
+ import warnings
11
 
12
  # Load environment variables
13
  load_dotenv()
14
+ api_key = os.getenv("GOOGLE_API_KEY")
15
+ model = 'gemini-2.5-flash'
 
16
 
17
  @tool
18
+ def web_scrap_tool(url):
19
+ ''' Scrapes content from a list of URLs.
20
+ The input should be a string representation of a Python list of URLs (e.g., "['https://example.com']").
21
+ Returns the concatenated text content of all scraped pages.
22
+ '''
23
  try:
24
+ url_list = ast.literal_eval(url)
25
  if not isinstance(url_list, list) or not all(isinstance(url, str) for url in url_list):
26
+ raise ValueError("Input must be a list of URLs as strings. Example: ['https://example.com']")
27
+ except (ValueError, SyntaxError) as e:
28
+ raise ValueError(f"Invalid input format: {e}. Please provide a valid python list of URLs.")
29
+
30
  combined_content = []
31
  for url in url_list:
32
  try:
33
  loader = WebBaseLoader(
34
  [url],
35
+ requests_kwargs={"headers": {"User-Agent": "caramel-AI"}}
36
  )
37
  documents = loader.load()
38
  for doc in documents:
39
  combined_content.append(doc.page_content)
40
  except Exception as e:
41
+ combined_content.append(f"Could not scrape {url}. Error: {e}.")
42
+
43
+ return "\n".join(combined_content)
44
 
45
+ # Create a single instance of the agent executor
46
+ llm = ChatGoogleGenerativeAI(model=model, api_key=api_key)
47
+ tools = [web_scrap_tool]
48
  prompt = hub.pull("hwchase17/react")
49
+ agent = create_react_agent(llm, tools, prompt=prompt)
50
  agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=False, handle_parsing_errors=True)
51
 
52
+ # Function for Gradio interface
53
  def ask_agent(user_input):
54
  try:
55
+ result = agent_executor.invoke({'input': user_input})
56
+ return result['output']
57
  except Exception as e:
58
+ return f"Error: {str(e)}"
59
 
60
+ # Gradio UI
61
+ ui = gr.Interface(
62
  fn=ask_agent,
63
+ inputs=gr.Textbox(label="Ask about a webpage", placeholder="e.g., What is the content of the 'About Us' page of https://example.com?"),
64
+ outputs=gr.Textbox(label="Agent Response"),
65
+ title="Web Scraping Agent",
66
+ description="Ask a question about the contents of any webpage. The agent will scrape and respond accordingly."
67
+ )
68
+
69
+ if __name__ == "__main__":
70
+ ui.launch()