abtsousa commited on
Commit
e8c805a
·
1 Parent(s): 03f4295

Implement cache directory management and update file fetching to use cache if no working directory is provided

Browse files
Files changed (6) hide show
  1. .gitignore +3 -0
  2. app.py +67 -62
  3. config.py +19 -0
  4. tools/code_interpreter.py +3 -1
  5. tools/files.py +2 -3
  6. utils.py +7 -4
.gitignore CHANGED
@@ -201,3 +201,6 @@ __marimo__/
201
 
202
  # Streamlit
203
  .streamlit/secrets.toml
 
 
 
 
201
 
202
  # Streamlit
203
  .streamlit/secrets.toml
204
+
205
+ # Cache directory for file operations
206
+ cache/
app.py CHANGED
@@ -5,11 +5,10 @@ import pandas as pd
5
  from os import getenv
6
  from dotenv import load_dotenv
7
  import asyncio
8
- import tempfile
9
 
10
  from agent.agent import OracleBot
11
- from config import start_phoenix, APP_NAME, DEFAULT_API_URL
12
- from utils import fetch_task_file, extract_task_id_from_question_data
13
 
14
  load_dotenv()
15
 
@@ -29,6 +28,7 @@ start_phoenix()
29
  # Simplified concurrent processor: launch all tasks immediately and await them together
30
  async def process_questions(agent: OracleBot, questions_data: list, working_dir: str):
31
  print(f"Running agent on {len(questions_data)} questions concurrently (simple fan-out)...")
 
32
 
33
  async def handle(item: dict):
34
  task_id = item.get("task_id")
@@ -114,66 +114,67 @@ async def run_and_submit_all( profile: gr.OAuthProfile | None):
114
  return f"An unexpected error occurred fetching questions: {e}", None
115
 
116
  # 3. Run your Agent concurrently (simple gather)
117
- # Create a temporary working directory for this session
118
- with tempfile.TemporaryDirectory() as working_dir:
119
- results_log, answers_payload = await process_questions(agent, questions_data, working_dir)
120
-
121
- # Remove everything before "FINAL ANSWER: " in submitted answers
122
- for answer in answers_payload:
123
- if "submitted_answer" in answer:
124
- answer["submitted_answer"] = answer["submitted_answer"].split("FINAL ANSWER: ", 1)[-1].strip()
125
-
126
- if not answers_payload:
127
- print("Agent did not produce any answers to submit.")
128
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
129
-
130
- # 4. Prepare Submission
131
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
132
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
133
- print(status_update)
134
-
135
- # 5. Submit
136
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  try:
138
- response = requests.post(submit_url, json=submission_data, timeout=60)
139
- response.raise_for_status()
140
- result_data = response.json()
141
- final_status = (
142
- f"Submission Successful!\n"
143
- f"User: {result_data.get('username')}\n"
144
- f"Overall Score: {result_data.get('score', 'N/A')}% "
145
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
146
- f"Message: {result_data.get('message', 'No message received.')}"
147
- )
148
- print("Submission successful.")
149
- results_df = pd.DataFrame(results_log)
150
- return final_status, results_df
151
- except requests.exceptions.HTTPError as e:
152
- error_detail = f"Server responded with status {e.response.status_code}."
153
- try:
154
- error_json = e.response.json()
155
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
156
- except requests.exceptions.JSONDecodeError:
157
- error_detail += f" Response: {e.response.text[:500]}"
158
- status_message = f"Submission Failed: {error_detail}"
159
- print(status_message)
160
- results_df = pd.DataFrame(results_log)
161
- return status_message, results_df
162
- except requests.exceptions.Timeout:
163
- status_message = "Submission Failed: The request timed out."
164
- print(status_message)
165
- results_df = pd.DataFrame(results_log)
166
- return status_message, results_df
167
- except requests.exceptions.RequestException as e:
168
- status_message = f"Submission Failed: Network error - {e}"
169
- print(status_message)
170
- results_df = pd.DataFrame(results_log)
171
- return status_message, results_df
172
- except Exception as e:
173
- status_message = f"An unexpected error occurred during submission: {e}"
174
- print(status_message)
175
- results_df = pd.DataFrame(results_log)
176
- return status_message, results_df
177
 
178
 
179
  # --- Build Gradio Interface using Blocks ---
@@ -209,6 +210,10 @@ with gr.Blocks() as demo:
209
 
210
  if __name__ == "__main__":
211
  print("\n" + "-"*30 + " App Starting " + "-"*30)
 
 
 
 
212
  # Check for SPACE_HOST and SPACE_ID at startup for information
213
  space_host_startup = os.getenv("SPACE_HOST")
214
  space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
 
5
  from os import getenv
6
  from dotenv import load_dotenv
7
  import asyncio
 
8
 
9
  from agent.agent import OracleBot
10
+ from utils import fetch_task_file
11
+ from config import start_phoenix, APP_NAME, DEFAULT_API_URL, initialize_cache_directory, CACHE_DIR
12
 
13
  load_dotenv()
14
 
 
28
  # Simplified concurrent processor: launch all tasks immediately and await them together
29
  async def process_questions(agent: OracleBot, questions_data: list, working_dir: str):
30
  print(f"Running agent on {len(questions_data)} questions concurrently (simple fan-out)...")
31
+ print(f"Using working directory: {working_dir}")
32
 
33
  async def handle(item: dict):
34
  task_id = item.get("task_id")
 
114
  return f"An unexpected error occurred fetching questions: {e}", None
115
 
116
  # 3. Run your Agent concurrently (simple gather)
117
+ # Use the cache directory for this session
118
+ working_dir = CACHE_DIR
119
+
120
+ results_log, answers_payload = await process_questions(agent, questions_data, working_dir)
121
+
122
+ # Remove everything before "FINAL ANSWER: " in submitted answers
123
+ for answer in answers_payload:
124
+ if "submitted_answer" in answer:
125
+ answer["submitted_answer"] = answer["submitted_answer"].split("FINAL ANSWER: ", 1)[-1].strip()
126
+
127
+ if not answers_payload:
128
+ print("Agent did not produce any answers to submit.")
129
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
130
+
131
+ # 4. Prepare Submission
132
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
133
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
134
+ print(status_update)
135
+
136
+ # 5. Submit
137
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
138
+ try:
139
+ response = requests.post(submit_url, json=submission_data, timeout=60)
140
+ response.raise_for_status()
141
+ result_data = response.json()
142
+ final_status = (
143
+ f"Submission Successful!\n"
144
+ f"User: {result_data.get('username')}\n"
145
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
146
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
147
+ f"Message: {result_data.get('message', 'No message received.')}"
148
+ )
149
+ print("Submission successful.")
150
+ results_df = pd.DataFrame(results_log)
151
+ return final_status, results_df
152
+ except requests.exceptions.HTTPError as e:
153
+ error_detail = f"Server responded with status {e.response.status_code}."
154
  try:
155
+ error_json = e.response.json()
156
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
157
+ except requests.exceptions.JSONDecodeError:
158
+ error_detail += f" Response: {e.response.text[:500]}"
159
+ status_message = f"Submission Failed: {error_detail}"
160
+ print(status_message)
161
+ results_df = pd.DataFrame(results_log)
162
+ return status_message, results_df
163
+ except requests.exceptions.Timeout:
164
+ status_message = "Submission Failed: The request timed out."
165
+ print(status_message)
166
+ results_df = pd.DataFrame(results_log)
167
+ return status_message, results_df
168
+ except requests.exceptions.RequestException as e:
169
+ status_message = f"Submission Failed: Network error - {e}"
170
+ print(status_message)
171
+ results_df = pd.DataFrame(results_log)
172
+ return status_message, results_df
173
+ except Exception as e:
174
+ status_message = f"An unexpected error occurred during submission: {e}"
175
+ print(status_message)
176
+ results_df = pd.DataFrame(results_log)
177
+ return status_message, results_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
 
180
  # --- Build Gradio Interface using Blocks ---
 
210
 
211
  if __name__ == "__main__":
212
  print("\n" + "-"*30 + " App Starting " + "-"*30)
213
+
214
+ # Initialize cache directory
215
+ cache_directory = initialize_cache_directory()
216
+
217
  # Check for SPACE_HOST and SPACE_ID at startup for information
218
  space_host_startup = os.getenv("SPACE_HOST")
219
  space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
config.py CHANGED
@@ -1,11 +1,30 @@
1
  """Phoenix tracing setup for the OracleBot application."""
2
 
3
  import logging
 
 
4
  from phoenix.otel import register
5
 
6
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
7
  APP_NAME = "OracleBot"
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  def start_phoenix(project_name: str = APP_NAME) -> None:
10
  """Setup Phoenix tracing for the agent.
11
 
 
1
  """Phoenix tracing setup for the OracleBot application."""
2
 
3
  import logging
4
+ import shutil
5
+ from pathlib import Path
6
  from phoenix.otel import register
7
 
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
  APP_NAME = "OracleBot"
10
 
11
+ # Define cache directory path globally
12
+ CACHE_DIR = Path(__file__).parent / "cache"
13
+
14
+ def initialize_cache_directory():
15
+ """Initialize the cache directory for file operations."""
16
+ # Remove existing cache directory if it exists
17
+ if CACHE_DIR.exists():
18
+ print(f"Removing existing cache directory: {CACHE_DIR}")
19
+ shutil.rmtree(CACHE_DIR)
20
+
21
+ # Create fresh cache directory
22
+ CACHE_DIR.mkdir(parents=True, exist_ok=True)
23
+ print(f"Created fresh cache directory: {CACHE_DIR}")
24
+
25
+ return str(CACHE_DIR)
26
+
27
+
28
  def start_phoenix(project_name: str = APP_NAME) -> None:
29
  """Setup Phoenix tracing for the agent.
30
 
tools/code_interpreter.py CHANGED
@@ -14,6 +14,7 @@ import pandas as pd
14
  import matplotlib.pyplot as plt
15
  from PIL import Image
16
  from langchain_core.tools import tool
 
17
 
18
  @tool
19
  def execute_code_multilang(code: str, language: str = "python") -> str:
@@ -92,7 +93,8 @@ class CodeInterpreter:
92
  "cmath", "uuid", "tempfile", "requests", "urllib"
93
  ]
94
  self.max_execution_time = max_execution_time
95
- self.working_directory = working_directory or os.path.join(os.getcwd())
 
96
  if not os.path.exists(self.working_directory):
97
  os.makedirs(self.working_directory)
98
 
 
14
  import matplotlib.pyplot as plt
15
  from PIL import Image
16
  from langchain_core.tools import tool
17
+ from config import CACHE_DIR
18
 
19
  @tool
20
  def execute_code_multilang(code: str, language: str = "python") -> str:
 
93
  "cmath", "uuid", "tempfile", "requests", "urllib"
94
  ]
95
  self.max_execution_time = max_execution_time
96
+ # Use cache directory instead of current working directory
97
+ self.working_directory = working_directory or str(CACHE_DIR)
98
  if not os.path.exists(self.working_directory):
99
  os.makedirs(self.working_directory)
100
 
tools/files.py CHANGED
@@ -1,5 +1,4 @@
1
  from langchain_community.agent_toolkits import FileManagementToolkit
2
- from tempfile import TemporaryDirectory
3
 
4
- working_directory = TemporaryDirectory()
5
- file_management_toolkit = FileManagementToolkit(working_directory=working_directory)
 
1
  from langchain_community.agent_toolkits import FileManagementToolkit
2
+ from config import CACHE_DIR
3
 
4
+ file_management_toolkit = FileManagementToolkit(root_dir=str(CACHE_DIR))
 
utils.py CHANGED
@@ -1,21 +1,24 @@
1
  import os
2
  import requests
3
- import tempfile
4
  from pathlib import Path
5
- from config import DEFAULT_API_URL
6
 
7
 
8
- def fetch_task_file(task_id: str, working_dir: str) -> str | None:
9
  """
10
  Fetch the file associated with a task_id from the API and save it to the working directory.
11
 
12
  Args:
13
  task_id: The task ID to fetch the file for
14
- working_dir: The working directory to save the file to
15
 
16
  Returns:
17
  The path to the downloaded file, or None if no file exists or error occurred
18
  """
 
 
 
 
19
  try:
20
  files_url = f"{DEFAULT_API_URL}/files/{task_id}"
21
  response = requests.get(files_url, timeout=30)
 
1
  import os
2
  import requests
 
3
  from pathlib import Path
4
+ from config import DEFAULT_API_URL, CACHE_DIR
5
 
6
 
7
+ def fetch_task_file(task_id: str, working_dir: str | None = None) -> str | None:
8
  """
9
  Fetch the file associated with a task_id from the API and save it to the working directory.
10
 
11
  Args:
12
  task_id: The task ID to fetch the file for
13
+ working_dir: The working directory to save the file to. If None, uses the global cache directory.
14
 
15
  Returns:
16
  The path to the downloaded file, or None if no file exists or error occurred
17
  """
18
+ # Use cache directory if working_dir is not provided
19
+ if working_dir is None:
20
+ working_dir = str(CACHE_DIR)
21
+
22
  try:
23
  files_url = f"{DEFAULT_API_URL}/files/{task_id}"
24
  response = requests.get(files_url, timeout=30)