Spaces:

jesusvilela
/

DearDreadyUnit4

Sleeping

App Files Files Community

jesusvilela commited on May 26

Commit

17f2778

verified ·

1 Parent(s): 6751bf5

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -57

app.py CHANGED Viewed

@@ -11,60 +11,42 @@ from urllib.parse import urlparse
 import mimetypes
 import subprocess # For yt-dlp
-from huggingface_hub import get_space_runtime
 # --- Global Variables for Startup Status ---
-# These will be populated in __main__ and accessed by the demo.load function
 missing_vars_startup_list_global = []
 agent_pre_init_status_msg_global = "Agent status will be determined at startup."
-# File Processing Libs (conditionally import for tools)
-try:
-    from PyPDF2 import PdfReader
-    PYPDF2_AVAILABLE = True
-except ImportError:
-    PYPDF2_AVAILABLE = False
-try:
-    from PIL import Image
-    import pytesseract
-    PIL_TESSERACT_AVAILABLE = True
-except ImportError:
-    PIL_TESSERACT_AVAILABLE = False
-try:
-    import whisper
-    WHISPER_AVAILABLE = True
-except ImportError:
-    WHISPER_AVAILABLE = False
-# Google Generative AI types
 from google.generativeai.types import HarmCategory, HarmBlockThreshold
-# LangChain Core components
 from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, ToolMessage, AnyMessage
 from langchain.prompts import PromptTemplate
 from langchain.tools import BaseTool, tool as lc_tool_decorator
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain.agents import AgentExecutor, create_react_agent
-# Prebuilt Tools
 from langchain_community.tools import DuckDuckGoSearchRun
 from langchain_experimental.tools import PythonREPLTool
-# Conditional imports for type checking and LangGraph specific types
 if TYPE_CHECKING:
     from langgraph.graph import StateGraph as StateGraphAliasedForHinting
     from langgraph.prebuilt import ToolExecutor as ToolExecutorAliasedForHinting
     from typing_extensions import TypedDict
     from langgraph.checkpoint.base import BaseCheckpointSaver
-# LangGraph Imports
 LANGGRAPH_FLAVOR_AVAILABLE = False
 LG_StateGraph: Optional[Type[Any]] = None
 LG_ToolExecutor: Optional[Type[Any]] = None
-LG_END: Optional[Any] = None
 LG_ToolInvocation: Optional[Type[Any]] = None
 add_messages: Optional[Any] = None
 MemorySaver_Class: Optional[Type[Any]] = None
@@ -75,7 +57,7 @@ LLM_INSTANCE: Optional[ChatGoogleGenerativeAI] = None
 LANGGRAPH_MEMORY_SAVER: Optional[Any] = None
 try:
-    from langgraph.graph import StateGraph, END # Ensure END is imported
     from langgraph.prebuilt import ToolExecutor, ToolInvocation as LGToolInvocationActual
     from langgraph.graph.message import add_messages as lg_add_messages
     from langgraph.checkpoint.memory import MemorySaver as LGMemorySaver
@@ -98,8 +80,7 @@ WHISPER_MODEL: Optional[Any] = None
 # --- Environment Variables & API Keys ---
 GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
-HUGGINGFACE_TOKEN = os.environ.get("HF_TOKEN")
-HF_USERNAME_FOR_SUBMISSION = os.environ.get("HF_USERNAME")
 # --- Setup Logging ---
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(name)s - %(module)s:%(lineno)d - %(message)s')
@@ -205,7 +186,7 @@ def _download_file(file_identifier: str, task_id_for_file: Optional[str] = None)
             name_without_ext, current_ext = os.path.splitext(effective_save_path)
             if not current_ext:
                 content_type_header = r.headers.get('content-type', '')
-                content_type_val = content_type_header.split(';').strip() if content_type_header else '' # CORRECTED
                 if content_type_val:
                     guessed_ext = mimetypes.guess_extension(content_type_val)
                     if guessed_ext: effective_save_path += guessed_ext; logger.info(f"Added guessed ext: {guessed_ext}")
@@ -397,15 +378,24 @@ def construct_prompt_for_agent(q: Dict[str,Any]) -> str:
     return f"Task ID:{tid}{level}{files_info}\n\nQuestion:{q_str}"
 # --- Main Submission Logic ---
-def run_and_submit_all():
-    global AGENT_INSTANCE, HF_USERNAME_FOR_SUBMISSION
-    space_id, username = os.getenv("SPACE_ID"), HF_USERNAME_FOR_SUBMISSION
-    if not username: logger.error("CRITICAL: HF_USERNAME not set."); return "HF_USERNAME secret missing.",None
-    logger.info(f"Using username for submission: {username}")
-    if not AGENT_INSTANCE:
-        try: logger.info("Agent not pre-init. Init now..."); initialize_agent_and_tools()
-        except Exception as e: return f"Agent on-demand init failed: {e}",None
-        if not AGENT_INSTANCE: return "Agent still None after on-demand init.",None
     agent_code=f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_dev"
     q_url,s_url=f"{DEFAULT_API_URL}/questions",f"{DEFAULT_API_URL}/submit"
@@ -418,7 +408,7 @@ def run_and_submit_all():
     except Exception as e:logger.error(f"Fetch questions error: {e}",exc_info=True);return f"Fetch questions error:{e}",None
     res_log,ans_payload=[],[]
-    logger.info(f"Running agent on {len(q_data)} questions for user '{username}'...")
     for i,item in enumerate(q_data):
         tid,q_txt=item.get("task_id"),item.get("question")
         if not tid or q_txt is None:logger.warning(f"Skipping item: {item}");continue
@@ -434,12 +424,12 @@ def run_and_submit_all():
             res_log.append({"Task ID":tid,"Question":q_txt,"Full Agent Prompt":prompt,"Raw Agent Output":err_ans,"Submitted Answer":"N/A [AGENT_ERROR]"})
     if not ans_payload:return "Agent no answers.",pd.DataFrame(res_log)
-    sub_data={"username":username.strip(),"agent_code":agent_code,"answers":ans_payload}
-    logger.info(f"Submitting {len(ans_payload)} answers to {s_url} for user '{username}'...")
     sub_h={"Content-Type":"application/json",**auth_h}
     try:
         r=requests.post(s_url,json=sub_data,headers=sub_h,timeout=120);r.raise_for_status();res_data=r.json()
-        msg=(f"User:{res_data.get('username',username)}\nScore:{res_data.get('score','N/A')}% ({res_data.get('correct_count','?')}/{res_data.get('total_attempted','?')})\nMsg:{res_data.get('message','N/A')}")
         logger.info(f"Submission OK! {msg}");return f"Submission OK!\n{msg}",pd.DataFrame(res_log,columns=["Task ID","Question","Full Agent Prompt","Raw Agent Output","Submitted Answer"])
     except requests.exceptions.HTTPError as e:
         err=f"HTTP {e.response.status_code}. Detail:{e.response.text[:200]}"; logger.error(f"Submit Fail:{err}",exc_info=True); return f"Submit Fail:{err}",pd.DataFrame(res_log)
@@ -447,24 +437,28 @@ def run_and_submit_all():
 # --- Build Gradio Interface ---
 with gr.Blocks(css=".gradio-container {max-width:1280px !important;margin:auto !important;}",theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# GAIA Agent Challenge Runner v6 (HF_USERNAME Only)")
     gr.Markdown(f"""**Instructions:**
-1. Ensure `HF_USERNAME` (Your HF username for leaderboard), `GOOGLE_API_KEY` (for Gemini), and `HUGGINGFACE_TOKEN` (for GAIA API) are set as Space secrets.
 2. Click 'Run Evaluation & Submit' to process GAIA questions (typically 20).
 3. **Goal: 30%+ (6/20).** Agent uses Gemini 2.5 Pro ({GEMINI_MODEL_NAME}), Web Search, Python, PDF, OCR, Audio/YouTube.
-4. Check Space logs for details. LangGraph is attempted (ReAct fallback).""") # Removed dynamic API call
     agent_status_display = gr.Markdown("**Agent Status:** Initializing...")
     missing_secrets_display = gr.Markdown("")
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=7, interactive=False)
     results_table = gr.DataFrame(label="Q&A Log", headers=["Task ID","Question","Prompt","Raw","Submitted"], wrap=True)
     run_button.click(fn=run_and_submit_all, outputs=[status_output,results_table], api_name="run_evaluation")
     def update_ui_on_load_fn_within_context():
-        # These globals are populated in __main__ before demo.launch()
-        global missing_vars_startup_list_global, agent_pre_init_status_msg_global
         secrets_msg_md = ""
         if missing_vars_startup_list_global:
             secrets_msg_md = f"<font color='red'>**⚠️ Secrets Missing:** {', '.join(missing_vars_startup_list_global)}.</font>"
@@ -485,18 +479,17 @@ with gr.Blocks(css=".gradio-container {max-width:1280px !important;margin:auto !
     demo.load(update_ui_on_load_fn_within_context, [], [agent_status_display, missing_secrets_display])
 if __name__ == "__main__":
-    logger.info("Application starting up (v6)...")
     if not PYPDF2_AVAILABLE: logger.warning("PyPDF2 (PDF tool) NOT AVAILABLE.")
     if not PIL_TESSERACT_AVAILABLE: logger.warning("Pillow/Pytesseract (OCR tool) NOT AVAILABLE.")
     if not WHISPER_AVAILABLE: logger.warning("Whisper (Audio tool) NOT AVAILABLE.")
     if LANGGRAPH_FLAVOR_AVAILABLE: logger.info("Core LangGraph (StateGraph, END) loaded.")
     else: logger.warning("Core LangGraph FAILED import. ReAct fallback. Check requirements.txt (langgraph, langchain-core, typing-extensions) & Space build logs for errors.")
-    # Populate global status variables BEFORE demo.launch()
-    missing_vars_startup_list_global.clear() # Clear in case of script reload in some environments
     if not GOOGLE_API_KEY: missing_vars_startup_list_global.append("GOOGLE_API_KEY")
-    if not HUGGINGFACE_TOKEN: missing_vars_startup_list_global.append("HUGGINGFACE_TOKEN")
-    if not HF_USERNAME_FOR_SUBMISSION: missing_vars_startup_list_global.append("HF_USERNAME")
     try:
         logger.info("Pre-initializing agent...")

 import mimetypes
 import subprocess # For yt-dlp
+from huggingface_hub import get_space_runtime # Keep for agent_code_url, though not for username
 # --- Global Variables for Startup Status ---
 missing_vars_startup_list_global = []
 agent_pre_init_status_msg_global = "Agent status will be determined at startup."
+# File Processing Libs
+try: from PyPDF2 import PdfReader; PYPDF2_AVAILABLE = True
+except ImportError: PYPDF2_AVAILABLE = False
+try: from PIL import Image; import pytesseract; PIL_TESSERACT_AVAILABLE = True
+except ImportError: PIL_TESSERACT_AVAILABLE = False
+try: import whisper; WHISPER_AVAILABLE = True
+except ImportError: WHISPER_AVAILABLE = False
+# Google GenAI
 from google.generativeai.types import HarmCategory, HarmBlockThreshold
+# LangChain
 from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, ToolMessage, AnyMessage
 from langchain.prompts import PromptTemplate
 from langchain.tools import BaseTool, tool as lc_tool_decorator
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain.agents import AgentExecutor, create_react_agent
 from langchain_community.tools import DuckDuckGoSearchRun
 from langchain_experimental.tools import PythonREPLTool
+# LangGraph Conditional Imports
 if TYPE_CHECKING:
     from langgraph.graph import StateGraph as StateGraphAliasedForHinting
     from langgraph.prebuilt import ToolExecutor as ToolExecutorAliasedForHinting
     from typing_extensions import TypedDict
     from langgraph.checkpoint.base import BaseCheckpointSaver
 LANGGRAPH_FLAVOR_AVAILABLE = False
 LG_StateGraph: Optional[Type[Any]] = None
 LG_ToolExecutor: Optional[Type[Any]] = None
+LG_END: Optional[Any] = None # Must be imported if used
 LG_ToolInvocation: Optional[Type[Any]] = None
 add_messages: Optional[Any] = None
 MemorySaver_Class: Optional[Type[Any]] = None
 LANGGRAPH_MEMORY_SAVER: Optional[Any] = None
 try:
+    from langgraph.graph import StateGraph, END # Import END here
     from langgraph.prebuilt import ToolExecutor, ToolInvocation as LGToolInvocationActual
     from langgraph.graph.message import add_messages as lg_add_messages
     from langgraph.checkpoint.memory import MemorySaver as LGMemorySaver
 # --- Environment Variables & API Keys ---
 GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
+HUGGINGFACE_TOKEN = os.environ.get("HF_TOKEN") # For GAIA API auth, NOT for username
 # --- Setup Logging ---
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(name)s - %(module)s:%(lineno)d - %(message)s')
             name_without_ext, current_ext = os.path.splitext(effective_save_path)
             if not current_ext:
                 content_type_header = r.headers.get('content-type', '')
+                content_type_val = content_type_header.split(';').strip() if content_type_header else ''
                 if content_type_val:
                     guessed_ext = mimetypes.guess_extension(content_type_val)
                     if guessed_ext: effective_save_path += guessed_ext; logger.info(f"Added guessed ext: {guessed_ext}")
     return f"Task ID:{tid}{level}{files_info}\n\nQuestion:{q_str}"
 # --- Main Submission Logic ---
+def run_and_submit_all(profile: Optional[gr.OAuthProfile] = None): # Re-added profile as per Gradio standard
+    global AGENT_INSTANCE
+    space_id = os.getenv("SPACE_ID")
+    username_for_submission = None
+    if profile and hasattr(profile, 'username') and profile.username:
+        username_for_submission = profile.username
+        logger.info(f"Username from OAuth profile: {username_for_submission}")
+    else:
+        # This case means user is not logged in via HF Login Button, or OAuth is not configured for the Space
+        logger.warning("OAuth profile not available or username missing. Submission might fail or be attributed to a default/fallback if allowed by API.")
+        # As per strict template, we should stop if no profile.
+        return "Hugging Face login required. Please use the login button and try again.", None
+    if AGENT_INSTANCE is None:
+        try: logger.info("Agent not pre-initialized. Initializing now for run..."); initialize_agent_and_tools()
+        except Exception as e: return f"Agent on-demand initialization failed: {e}", None
+        if AGENT_INSTANCE is None: return "Agent is still None after on-demand initialization attempt.", None
     agent_code=f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_dev"
     q_url,s_url=f"{DEFAULT_API_URL}/questions",f"{DEFAULT_API_URL}/submit"
     except Exception as e:logger.error(f"Fetch questions error: {e}",exc_info=True);return f"Fetch questions error:{e}",None
     res_log,ans_payload=[],[]
+    logger.info(f"Running agent on {len(q_data)} questions for user '{username_for_submission}'...")
     for i,item in enumerate(q_data):
         tid,q_txt=item.get("task_id"),item.get("question")
         if not tid or q_txt is None:logger.warning(f"Skipping item: {item}");continue
             res_log.append({"Task ID":tid,"Question":q_txt,"Full Agent Prompt":prompt,"Raw Agent Output":err_ans,"Submitted Answer":"N/A [AGENT_ERROR]"})
     if not ans_payload:return "Agent no answers.",pd.DataFrame(res_log)
+    sub_data={"username":username_for_submission.strip(),"agent_code":agent_code,"answers":ans_payload}
+    logger.info(f"Submitting {len(ans_payload)} answers to {s_url} for user '{username_for_submission}'...")
     sub_h={"Content-Type":"application/json",**auth_h}
     try:
         r=requests.post(s_url,json=sub_data,headers=sub_h,timeout=120);r.raise_for_status();res_data=r.json()
+        msg=(f"User:{res_data.get('username',username_for_submission)}\nScore:{res_data.get('score','N/A')}% ({res_data.get('correct_count','?')}/{res_data.get('total_attempted','?')})\nMsg:{res_data.get('message','N/A')}")
         logger.info(f"Submission OK! {msg}");return f"Submission OK!\n{msg}",pd.DataFrame(res_log,columns=["Task ID","Question","Full Agent Prompt","Raw Agent Output","Submitted Answer"])
     except requests.exceptions.HTTPError as e:
         err=f"HTTP {e.response.status_code}. Detail:{e.response.text[:200]}"; logger.error(f"Submit Fail:{err}",exc_info=True); return f"Submit Fail:{err}",pd.DataFrame(res_log)
 # --- Build Gradio Interface ---
 with gr.Blocks(css=".gradio-container {max-width:1280px !important;margin:auto !important;}",theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# GAIA Agent Challenge Runner v7 (OAuth for Username)")
     gr.Markdown(f"""**Instructions:**
+1. **Login with Hugging Face** using the button below. Your HF username will be used for submission.
 2. Click 'Run Evaluation & Submit' to process GAIA questions (typically 20).
 3. **Goal: 30%+ (6/20).** Agent uses Gemini 2.5 Pro ({GEMINI_MODEL_NAME}), Web Search, Python, PDF, OCR, Audio/YouTube.
+4. Ensure `GOOGLE_API_KEY` and `HUGGINGFACE_TOKEN` are Space secrets.
+5. Check Space logs for details. LangGraph is attempted (ReAct fallback).""")
     agent_status_display = gr.Markdown("**Agent Status:** Initializing...")
     missing_secrets_display = gr.Markdown("")
+    gr.LoginButton() # Added back as per template standard for username
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=7, interactive=False)
     results_table = gr.DataFrame(label="Q&A Log", headers=["Task ID","Question","Prompt","Raw","Submitted"], wrap=True)
+    # The `profile` argument in `run_and_submit_all` will be populated by Gradio
+    # if the user is logged in via the `gr.LoginButton()` flow.
     run_button.click(fn=run_and_submit_all, outputs=[status_output,results_table], api_name="run_evaluation")
     def update_ui_on_load_fn_within_context():
+        global missing_vars_startup_list_global, agent_pre_init_status_msg_global
         secrets_msg_md = ""
         if missing_vars_startup_list_global:
             secrets_msg_md = f"<font color='red'>**⚠️ Secrets Missing:** {', '.join(missing_vars_startup_list_global)}.</font>"
     demo.load(update_ui_on_load_fn_within_context, [], [agent_status_display, missing_secrets_display])
 if __name__ == "__main__":
+    logger.info("Application starting up (v7)...")
     if not PYPDF2_AVAILABLE: logger.warning("PyPDF2 (PDF tool) NOT AVAILABLE.")
     if not PIL_TESSERACT_AVAILABLE: logger.warning("Pillow/Pytesseract (OCR tool) NOT AVAILABLE.")
     if not WHISPER_AVAILABLE: logger.warning("Whisper (Audio tool) NOT AVAILABLE.")
     if LANGGRAPH_FLAVOR_AVAILABLE: logger.info("Core LangGraph (StateGraph, END) loaded.")
     else: logger.warning("Core LangGraph FAILED import. ReAct fallback. Check requirements.txt (langgraph, langchain-core, typing-extensions) & Space build logs for errors.")
+    missing_vars_startup_list_global.clear()
     if not GOOGLE_API_KEY: missing_vars_startup_list_global.append("GOOGLE_API_KEY")
+    if not HUGGINGFACE_TOKEN: missing_vars_startup_list_global.append("HUGGINGFACE_TOKEN (for GAIA API)")
+    # HF_USERNAME_FOR_SUBMISSION is no longer checked here as OAuth is primary
     try:
         logger.info("Pre-initializing agent...")