Krithikesh77 commited on
Commit
83fcef5
·
verified ·
1 Parent(s): 7e39d57

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -13
app.py CHANGED
@@ -8,23 +8,32 @@ import gradio as gr
8
  import os
9
  import ast
10
 
 
11
  load_dotenv()
12
  google_api_key = os.getenv("GOOGLE_API_KEY")
 
13
  model = "gemini-2.5-flash"
14
 
15
  @tool
16
  def web_scrape_tool(urls: str) -> str:
 
 
 
 
17
  try:
18
  url_list = ast.literal_eval(urls)
19
  if not isinstance(url_list, list) or not all(isinstance(url, str) for url in url_list):
20
  return "Invalid input format. Please provide a list of URLs as a string."
21
- except Exception:
22
  return "Invalid input format. Please provide a list of URLs as a string."
23
-
24
  combined_content = []
25
  for url in url_list:
26
  try:
27
- loader = WebBaseLoader([url], requests_kwargs={"headers": {"User-Agent": "Caramel AI"}})
 
 
 
28
  documents = loader.load()
29
  for doc in documents:
30
  combined_content.append(doc.page_content)
@@ -32,24 +41,26 @@ def web_scrape_tool(urls: str) -> str:
32
  combined_content.append(f"Could not scrape {url}. Error: {e}")
33
  return "\n\n".join(combined_content)
34
 
35
- # Preload agent only once (efficient)
36
  llm = ChatGoogleGenerativeAI(model=model, google_api_key=google_api_key)
37
  tools = [web_scrape_tool]
38
  prompt = hub.pull("hwchase17/react")
39
- agent = create_react_agent(llm, tools, prompt)
40
  agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=False, handle_parsing_errors=True)
41
 
42
- # Gradio interface
43
- def ask_agent(user_question):
44
  try:
45
- result = agent_executor.invoke({"input": user_question})
46
- return result['output']
47
  except Exception as e:
48
- return f"Error: {str(e)}"
49
 
 
50
  gr.Interface(
51
  fn=ask_agent,
52
- inputs=gr.Textbox(label="Ask a question (include the URL)"),
53
- outputs=gr.Textbox(label="Agent's Response"),
54
- title="Web Scraping Agent with Gemini"
 
55
  ).launch()
 
8
  import os
9
  import ast
10
 
11
+ # Load environment variables
12
  load_dotenv()
13
  google_api_key = os.getenv("GOOGLE_API_KEY")
14
+ user_agent = os.getenv("USER_AGENT", "Caramel AI Bot")
15
  model = "gemini-2.5-flash"
16
 
17
  @tool
18
  def web_scrape_tool(urls: str) -> str:
19
+ """Scrapes content from a list of URLs.
20
+ The input should be a string representation of a Python list of URLs
21
+ (e.g., "['https://hereandnowai.com', 'https://hereandnow.co.in']").
22
+ Returns the concatenated text content of all scraped pages."""
23
  try:
24
  url_list = ast.literal_eval(urls)
25
  if not isinstance(url_list, list) or not all(isinstance(url, str) for url in url_list):
26
  return "Invalid input format. Please provide a list of URLs as a string."
27
+ except (ValueError, SyntaxError):
28
  return "Invalid input format. Please provide a list of URLs as a string."
29
+
30
  combined_content = []
31
  for url in url_list:
32
  try:
33
+ loader = WebBaseLoader(
34
+ [url],
35
+ requests_kwargs={"headers": {"User-Agent": user_agent}},
36
+ )
37
  documents = loader.load()
38
  for doc in documents:
39
  combined_content.append(doc.page_content)
 
41
  combined_content.append(f"Could not scrape {url}. Error: {e}")
42
  return "\n\n".join(combined_content)
43
 
44
+ # Setup the LLM and Agent
45
  llm = ChatGoogleGenerativeAI(model=model, google_api_key=google_api_key)
46
  tools = [web_scrape_tool]
47
  prompt = hub.pull("hwchase17/react")
48
+ agent = create_react_agent(llm=llm, tools=tools, prompt=prompt)
49
  agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=False, handle_parsing_errors=True)
50
 
51
+ # Gradio interface function
52
+ def ask_agent(user_input):
53
  try:
54
+ result = agent_executor.invoke({"input": user_input})
55
+ return result["output"]
56
  except Exception as e:
57
+ return f"Error: {e}"
58
 
59
+ # Launch Gradio app
60
  gr.Interface(
61
  fn=ask_agent,
62
+ inputs=gr.Textbox(lines=2, placeholder="Ask a question with a URL...", label="Your Query"),
63
+ outputs=gr.Textbox(lines=10, label="Agent's Response"),
64
+ title="Gemini Web Scraping Agent",
65
+ description="Ask a question about one or more web pages. Include URLs in the question."
66
  ).launch()