Spaces:

Krithikesh77
/

Project7

Sleeping

App Files Files Community

Krithikesh77 commited on Aug 1

Commit

fd204d9

verified ·

1 Parent(s): 83fcef5

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -35

app.py CHANGED Viewed

@@ -1,66 +1,70 @@
-from langchain_community.document_loaders import WebBaseLoader
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain.agents import AgentExecutor, create_react_agent
 from langchain import hub
 from langchain.tools import tool
-from dotenv import load_dotenv
-import gradio as gr
-import os
 import ast
 # Load environment variables
 load_dotenv()
-google_api_key = os.getenv("GOOGLE_API_KEY")
-user_agent = os.getenv("USER_AGENT", "Caramel AI Bot")
-model = "gemini-2.5-flash"
 @tool
-def web_scrape_tool(urls: str) -> str:
-    """Scrapes content from a list of URLs.
-    The input should be a string representation of a Python list of URLs
-    (e.g., "['https://hereandnowai.com', 'https://hereandnow.co.in']").
-    Returns the concatenated text content of all scraped pages."""
     try:
-        url_list = ast.literal_eval(urls)
         if not isinstance(url_list, list) or not all(isinstance(url, str) for url in url_list):
-            return "Invalid input format. Please provide a list of URLs as a string."
-    except (ValueError, SyntaxError):
-        return "Invalid input format. Please provide a list of URLs as a string."
     combined_content = []
     for url in url_list:
         try:
             loader = WebBaseLoader(
                 [url],
-                requests_kwargs={"headers": {"User-Agent": user_agent}},
             )
             documents = loader.load()
             for doc in documents:
                 combined_content.append(doc.page_content)
         except Exception as e:
-            combined_content.append(f"Could not scrape {url}. Error: {e}")
-    return "\n\n".join(combined_content)
-# Setup the LLM and Agent
-llm = ChatGoogleGenerativeAI(model=model, google_api_key=google_api_key)
-tools = [web_scrape_tool]
 prompt = hub.pull("hwchase17/react")
-agent = create_react_agent(llm=llm, tools=tools, prompt=prompt)
 agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=False, handle_parsing_errors=True)
-# Gradio interface function
 def ask_agent(user_input):
     try:
-        result = agent_executor.invoke({"input": user_input})
-        return result["output"]
     except Exception as e:
-        return f"❌ Error: {e}"
-# Launch Gradio app
-gr.Interface(
     fn=ask_agent,
-    inputs=gr.Textbox(lines=2, placeholder="Ask a question with a URL...", label="Your Query"),
-    outputs=gr.Textbox(lines=10, label="Agent's Response"),
-    title="Gemini Web Scraping Agent",
-    description="Ask a question about one or more web pages. Include URLs in the question."
-).launch()

+import gradio as gr
 from langchain_google_genai import ChatGoogleGenerativeAI
+from dotenv import load_dotenv
+import os
+from langchain_community.document_loaders import WebBaseLoader
 from langchain.agents import AgentExecutor, create_react_agent
 from langchain import hub
 from langchain.tools import tool
 import ast
+import warnings
 # Load environment variables
 load_dotenv()
+api_key = os.getenv("GOOGLE_API_KEY")
+model = 'gemini-2.5-flash'
 @tool
+def web_scrap_tool(url):
+    ''' Scrapes content from a list of URLs.
+        The input should be a string representation of a Python list of URLs (e.g., "['https://example.com']").
+        Returns the concatenated text content of all scraped pages.
+    '''
     try:
+        url_list = ast.literal_eval(url)
         if not isinstance(url_list, list) or not all(isinstance(url, str) for url in url_list):
+            raise ValueError("Input must be a list of URLs as strings. Example: ['https://example.com']")
+    except (ValueError, SyntaxError) as e:
+        raise ValueError(f"Invalid input format: {e}. Please provide a valid python list of URLs.")
     combined_content = []
     for url in url_list:
         try:
             loader = WebBaseLoader(
                 [url],
+                requests_kwargs={"headers": {"User-Agent": "caramel-AI"}}
             )
             documents = loader.load()
             for doc in documents:
                 combined_content.append(doc.page_content)
         except Exception as e:
+            combined_content.append(f"Could not scrape {url}. Error: {e}.")
+    return "\n".join(combined_content)
+# Create a single instance of the agent executor
+llm = ChatGoogleGenerativeAI(model=model, api_key=api_key)
+tools = [web_scrap_tool]
 prompt = hub.pull("hwchase17/react")
+agent = create_react_agent(llm, tools, prompt=prompt)
 agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=False, handle_parsing_errors=True)
+# Function for Gradio interface
 def ask_agent(user_input):
     try:
+        result = agent_executor.invoke({'input': user_input})
+        return result['output']
     except Exception as e:
+        return f"Error: {str(e)}"
+# Gradio UI
+ui = gr.Interface(
     fn=ask_agent,
+    inputs=gr.Textbox(label="Ask about a webpage", placeholder="e.g., What is the content of the 'About Us' page of https://example.com?"),
+    outputs=gr.Textbox(label="Agent Response"),
+    title="Web Scraping Agent",
+    description="Ask a question about the contents of any webpage. The agent will scrape and respond accordingly."
+)
+if __name__ == "__main__":
+    ui.launch()