Spaces:

jesusvilela
/

DearDreadyUnit4

Sleeping

App Files Files Community

jesusvilela commited on Jun 25

Commit

d984c3b

verified ·

1 Parent(s): f404032

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -12

app.py CHANGED Viewed

@@ -48,7 +48,7 @@ from langchain.prompts import PromptTemplate
 from langchain.tools import BaseTool, tool as lc_tool_decorator
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain.agents import AgentExecutor, create_react_agent
-from langchain_community.tools import DuckDuckGoSearchRun
 from langchain_experimental.tools import PythonREPLTool
 # LangGraph Conditional Imports
@@ -127,7 +127,7 @@ except ImportError as e:
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 GEMINI_MODEL_NAME = "gemini-2.5-pro"
-GEMINI_FLASH_MULTIMODAL_MODEL_NAME = "gemini-2.5-pro"
 SCORING_API_BASE_URL = os.getenv("SCORING_API_URL", DEFAULT_API_URL)
 MAX_FILE_SIZE_BYTES = 50 * 1024 * 1024
 LOCAL_FILE_STORE_PATH = "./Data"
@@ -139,6 +139,7 @@ WHISPER_MODEL: Optional[Any] = None
 # --- Environment Variables & API Keys ---
 GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
 HUGGINGFACE_TOKEN = os.environ.get("HF_TOKEN")
 # --- Setup Logging ---
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(name)s - %(module)s:%(lineno)d - %(message)s')
@@ -255,7 +256,7 @@ def _download_file(file_identifier: str, task_id_for_file: Optional[str] = None)
             name_without_ext, current_ext = os.path.splitext(effective_save_path)
             if not current_ext:
                 content_type_header = r.headers.get('content-type', '')
-                content_type_val = content_type_header.split(';')[0].strip() if content_type_header else ''
                 if content_type_val:
                     guessed_ext = mimetypes.guess_extension(content_type_val)
                     if guessed_ext: effective_save_path += guessed_ext; logger.info(f"Added guessed ext: {guessed_ext}")
@@ -324,10 +325,10 @@ def transcribe_audio_tool(action_input_json_str: str) -> str:
 @lc_tool_decorator
 def direct_multimodal_gemini_tool(action_input_json_str: str) -> str:
-    """Processes an image file (URL or local path) along with a text prompt using a Gemini multimodal model (gemini-1.5-flash-preview-0514) for tasks like image description, Q&A about the image, or text generation based on the image. Input: JSON '{\"file_identifier\": \"IMAGE_FILENAME_OR_URL\", \"text_prompt\": \"Your question or instruction related to the image.\", \"task_id\": \"TASK_ID_IF_GAIA_FILENAME_ONLY\" (optional)}'. Returns the model's text response."""
     global google_genai_client
     if not google_genai_client: return "Error: google-genai SDK client not initialized."
-    if not PIL_TESSERACT_AVAILABLE : return "Error: Pillow (PIL) library not available for image processing." # Relies on PIL_TESSERACT_AVAILABLE for PIL
     try:
         data = json.loads(action_input_json_str)
         file_identifier = data.get("file_identifier")
@@ -342,7 +343,7 @@ def direct_multimodal_gemini_tool(action_input_json_str: str) -> str:
         except Exception as e_img_open: return f"Error opening image file {local_image_path}: {str(e_img_open)}"
         model_id_for_client = f"models/{GEMINI_FLASH_MULTIMODAL_MODEL_NAME}" if not GEMINI_FLASH_MULTIMODAL_MODEL_NAME.startswith("models/") else GEMINI_FLASH_MULTIMODAL_MODEL_NAME
-        response = google_genai_client.models.generate_content( # Corrected to use google_genai_client.models
             model=model_id_for_client, contents=[pil_image, text_prompt]
         )
         logger.info(f"Direct Multimodal Tool: Response received from {model_id_for_client} received.")
@@ -361,7 +362,7 @@ You have access to the following tools. Use them if necessary.
 TOOL USAGE:
 - To use a tool, your response must include a `tool_calls` attribute in the AIMessage. Each tool call should be a dictionary with "name", "args" (a dictionary of arguments), and "id".
 - For file tools ('read_pdf_tool', 'ocr_image_tool', 'transcribe_audio_tool', 'direct_multimodal_gemini_tool'): The `args` field must be a dictionary with a single key 'action_input_json_str' whose value is a JSON STRING. Example: {{"action_input_json_str": "{{\\"file_identifier\\": \\"file.pdf\\", \\"task_id\\": \\"123\\"}}"}}.
-- 'web_search': `args` is like '{{"query": "search query"}}'.
 - 'python_repl': `args` is like '{{"query": "python code string"}}'. Use print() for output.
 RESPONSE FORMAT:
 Final AIMessage should contain ONLY the answer in 'content' and NO 'tool_calls'. If using tools, 'content' can be thought process, with 'tool_calls'.
@@ -372,7 +373,7 @@ Goal: EXACT MATCH answer. No extra text/markdown.
 Tools: {tools}
 Process: Question -> Thought -> Action (ONE of [{tool_names}]) -> Action Input -> Observation -> Thought ... -> Final Answer: [exact answer]
 Tool Inputs:
-- web_search: Your search query string.
 - python_repl: Python code string. Use print(). For Excel/CSV, use pandas: import pandas as pd; df = pd.read_excel('./Data/TASKID_filename.xlsx'); print(df.head())
 - read_pdf_tool, ocr_image_tool, transcribe_audio_tool, direct_multimodal_gemini_tool: JSON string like '{{"file_identifier": "FILENAME_OR_URL", "task_id": "CURRENT_TASK_ID_IF_FILENAME"}}'.
 If tool fails or info missing, Final Answer: N/A. Do NOT use unlisted tools.
@@ -394,7 +395,7 @@ def initialize_agent_and_tools(force_reinit=False):
             google_api_key=GOOGLE_API_KEY,
             temperature=0.0,
             timeout=120,
-            convert_system_message_to_human=False # Explicitly set to False
         )
         logger.info(f"LangChain LLM (Planner) initialized: {GEMINI_MODEL_NAME} (Using default safety settings, convert_system_message_to_human=False)")
     except Exception as e:
@@ -407,8 +408,11 @@ def initialize_agent_and_tools(force_reinit=False):
     if WHISPER_AVAILABLE: TOOLS.append(transcribe_audio_tool)
     if google_genai_client and PIL_TESSERACT_AVAILABLE: TOOLS.append(direct_multimodal_gemini_tool); logger.info("Added 'direct_multimodal_gemini_tool'.")
     else: logger.warning("'direct_multimodal_gemini_tool' NOT added (client or PIL missing).")
-    try: search_tool = DuckDuckGoSearchRun(name="web_search"); search_tool.description = "Web search. Input: query."; TOOLS.append(search_tool)
-    except Exception as e: logger.warning(f"DuckDuckGoSearchRun init failed: {e}")
     try: python_repl = PythonREPLTool(name="python_repl"); python_repl.description = "Python REPL. print() for output. The input is a single string of code."; TOOLS.append(python_repl)
     except Exception as e: logger.warning(f"PythonREPLTool init failed: {e}")
     logger.info(f"Final tools list for agent: {[t.name for t in TOOLS]}")
@@ -622,7 +626,7 @@ with gr.Blocks(css=".gradio-container {max-width:1280px !important;margin:auto !
 1. **Login with Hugging Face** using the button below. Your HF username will be used for submission.
 2. Click 'Run Evaluation & Submit' to process GAIA questions (typically 20).
 3. **Goal: 30%+ (6/20).** Agent uses Gemini Pro ({GEMINI_MODEL_NAME}) as planner. Tools include Web Search, Python, PDF, OCR, Audio/YouTube, and a new Direct Multimodal tool using Gemini Flash ({GEMINI_FLASH_MULTIMODAL_MODEL_NAME}).
-4. Ensure `GOOGLE_API_KEY` and `HUGGINGFACE_TOKEN` are Space secrets.
 5. Check Space logs for details. LangGraph is attempted (ReAct fallback).""")
     agent_status_display = gr.Markdown("**Agent Status:** Initializing...")
@@ -667,6 +671,7 @@ if __name__ == "__main__":
     missing_vars_startup_list_global.clear()
     if not GOOGLE_API_KEY: missing_vars_startup_list_global.append("GOOGLE_API_KEY")
     if not HUGGINGFACE_TOKEN: missing_vars_startup_list_global.append("HUGGINGFACE_TOKEN (for GAIA API)")
     try:
         logger.info("Pre-initializing agent...")

 from langchain.tools import BaseTool, tool as lc_tool_decorator
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain.agents import AgentExecutor, create_react_agent
+from langchain_community.tools.tavily_search import TavilySearchResults
 from langchain_experimental.tools import PythonREPLTool
 # LangGraph Conditional Imports
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 GEMINI_MODEL_NAME = "gemini-2.5-pro"
+GEMINI_FLASH_MULTIMODAL_MODEL_NAME = "gemini-1.5-flash-latest"
 SCORING_API_BASE_URL = os.getenv("SCORING_API_URL", DEFAULT_API_URL)
 MAX_FILE_SIZE_BYTES = 50 * 1024 * 1024
 LOCAL_FILE_STORE_PATH = "./Data"
 # --- Environment Variables & API Keys ---
 GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
 HUGGINGFACE_TOKEN = os.environ.get("HF_TOKEN")
+TAVILY_API_KEY = os.environ.get("TAVILY_API_KEY")
 # --- Setup Logging ---
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(name)s - %(module)s:%(lineno)d - %(message)s')
             name_without_ext, current_ext = os.path.splitext(effective_save_path)
             if not current_ext:
                 content_type_header = r.headers.get('content-type', '')
+                content_type_val = content_type_header.split(';').strip() if content_type_header else ''
                 if content_type_val:
                     guessed_ext = mimetypes.guess_extension(content_type_val)
                     if guessed_ext: effective_save_path += guessed_ext; logger.info(f"Added guessed ext: {guessed_ext}")
 @lc_tool_decorator
 def direct_multimodal_gemini_tool(action_input_json_str: str) -> str:
+    """Processes an image file (URL or local path) along with a text prompt using a Gemini multimodal model (gemini-1.5-flash-latest) for tasks like image description, Q&A about the image, or text generation based on the image. Input: JSON '{\"file_identifier\": \"IMAGE_FILENAME_OR_URL\", \"text_prompt\": \"Your question or instruction related to the image.\", \"task_id\": \"TASK_ID_IF_GAIA_FILENAME_ONLY\" (optional)}'. Returns the model's text response."""
     global google_genai_client
     if not google_genai_client: return "Error: google-genai SDK client not initialized."
+    if not PIL_TESSERACT_AVAILABLE : return "Error: Pillow (PIL) library not available for image processing."
     try:
         data = json.loads(action_input_json_str)
         file_identifier = data.get("file_identifier")
         except Exception as e_img_open: return f"Error opening image file {local_image_path}: {str(e_img_open)}"
         model_id_for_client = f"models/{GEMINI_FLASH_MULTIMODAL_MODEL_NAME}" if not GEMINI_FLASH_MULTIMODAL_MODEL_NAME.startswith("models/") else GEMINI_FLASH_MULTIMODAL_MODEL_NAME
+        response = google_genai_client.models.generate_content(
             model=model_id_for_client, contents=[pil_image, text_prompt]
         )
         logger.info(f"Direct Multimodal Tool: Response received from {model_id_for_client} received.")
 TOOL USAGE:
 - To use a tool, your response must include a `tool_calls` attribute in the AIMessage. Each tool call should be a dictionary with "name", "args" (a dictionary of arguments), and "id".
 - For file tools ('read_pdf_tool', 'ocr_image_tool', 'transcribe_audio_tool', 'direct_multimodal_gemini_tool'): The `args` field must be a dictionary with a single key 'action_input_json_str' whose value is a JSON STRING. Example: {{"action_input_json_str": "{{\\"file_identifier\\": \\"file.pdf\\", \\"task_id\\": \\"123\\"}}"}}.
+- 'tavily_search_results_json': `args` is like '{{"query": "search query"}}'.
 - 'python_repl': `args` is like '{{"query": "python code string"}}'. Use print() for output.
 RESPONSE FORMAT:
 Final AIMessage should contain ONLY the answer in 'content' and NO 'tool_calls'. If using tools, 'content' can be thought process, with 'tool_calls'.
 Tools: {tools}
 Process: Question -> Thought -> Action (ONE of [{tool_names}]) -> Action Input -> Observation -> Thought ... -> Final Answer: [exact answer]
 Tool Inputs:
+- tavily_search_results_json: Your search query string.
 - python_repl: Python code string. Use print(). For Excel/CSV, use pandas: import pandas as pd; df = pd.read_excel('./Data/TASKID_filename.xlsx'); print(df.head())
 - read_pdf_tool, ocr_image_tool, transcribe_audio_tool, direct_multimodal_gemini_tool: JSON string like '{{"file_identifier": "FILENAME_OR_URL", "task_id": "CURRENT_TASK_ID_IF_FILENAME"}}'.
 If tool fails or info missing, Final Answer: N/A. Do NOT use unlisted tools.
             google_api_key=GOOGLE_API_KEY,
             temperature=0.0,
             timeout=120,
+            convert_system_message_to_human=False
         )
         logger.info(f"LangChain LLM (Planner) initialized: {GEMINI_MODEL_NAME} (Using default safety settings, convert_system_message_to_human=False)")
     except Exception as e:
     if WHISPER_AVAILABLE: TOOLS.append(transcribe_audio_tool)
     if google_genai_client and PIL_TESSERACT_AVAILABLE: TOOLS.append(direct_multimodal_gemini_tool); logger.info("Added 'direct_multimodal_gemini_tool'.")
     else: logger.warning("'direct_multimodal_gemini_tool' NOT added (client or PIL missing).")
+    try:
+        search_tool = TavilySearchResults(max_results=3)
+        TOOLS.append(search_tool)
+        logger.info("Added 'TavilySearchResults' tool.")
+    except Exception as e: logger.warning(f"TavilySearchResults init failed: {e}")
     try: python_repl = PythonREPLTool(name="python_repl"); python_repl.description = "Python REPL. print() for output. The input is a single string of code."; TOOLS.append(python_repl)
     except Exception as e: logger.warning(f"PythonREPLTool init failed: {e}")
     logger.info(f"Final tools list for agent: {[t.name for t in TOOLS]}")
 1. **Login with Hugging Face** using the button below. Your HF username will be used for submission.
 2. Click 'Run Evaluation & Submit' to process GAIA questions (typically 20).
 3. **Goal: 30%+ (6/20).** Agent uses Gemini Pro ({GEMINI_MODEL_NAME}) as planner. Tools include Web Search, Python, PDF, OCR, Audio/YouTube, and a new Direct Multimodal tool using Gemini Flash ({GEMINI_FLASH_MULTIMODAL_MODEL_NAME}).
+4. Ensure `GOOGLE_API_KEY`, `HUGGINGFACE_TOKEN`, and `TAVILY_API_KEY` are Space secrets.
 5. Check Space logs for details. LangGraph is attempted (ReAct fallback).""")
     agent_status_display = gr.Markdown("**Agent Status:** Initializing...")
     missing_vars_startup_list_global.clear()
     if not GOOGLE_API_KEY: missing_vars_startup_list_global.append("GOOGLE_API_KEY")
     if not HUGGINGFACE_TOKEN: missing_vars_startup_list_global.append("HUGGINGFACE_TOKEN (for GAIA API)")
+    if not TAVILY_API_KEY: missing_vars_startup_list_global.append("TAVILY_API_KEY (for Tavily Search)")
     try:
         logger.info("Pre-initializing agent...")