hasanain9 commited on
Commit
ae8739e
Β·
verified Β·
1 Parent(s): 3fd9d2e
Files changed (1) hide show
  1. app.py +74 -95
app.py CHANGED
@@ -2,131 +2,110 @@ import os
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
- import datetime
6
- from openai import OpenAI
7
 
 
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
 
10
- class ToolEnhancedAgent:
 
11
  def __init__(self):
12
- api_key = os.getenv("OPENAI_API_KEY")
13
- if not api_key:
14
- raise ValueError("OPENAI_API_KEY is not set.")
15
- self.client = OpenAI(api_key=api_key)
16
- print("ToolEnhancedAgent initialized.")
17
-
18
- def use_tool(self, tool_name: str, input_text: str) -> str:
19
- try:
20
- if tool_name == "calculator":
21
- return str(eval(input_text))
22
- elif tool_name == "date":
23
- return str(datetime.datetime.now().date())
24
- elif tool_name == "wikipedia":
25
- res = requests.get(f"https://en.wikipedia.org/api/rest_v1/page/summary/{input_text}")
26
- if res.status_code == 200:
27
- return res.json().get("extract", "No summary found.")
28
- else:
29
- return "No summary found."
30
- else:
31
- return "[Unknown Tool]"
32
- except Exception as e:
33
- return f"[Tool Error: {e}]"
34
 
35
  def __call__(self, question: str) -> str:
36
- try:
37
- prompt = f"""
38
- You are an advanced AI assistant with access to tools like calculator, date lookup, and Wikipedia.
39
- Follow this format:
40
- Step 1: Think step-by-step.
41
- Step 2: Use tool if needed.
42
- Step 3: Final answer.
43
-
44
- Question: {question}
45
- Answer:
46
- """
47
- messages = [
48
- {"role": "system", "content": "You are a helpful assistant that uses tools and thinks step-by-step."},
49
- {"role": "user", "content": prompt}
50
- ]
51
- response = self.client.chat.completions.create(
52
- model="gpt-4",
53
- messages=messages,
54
- temperature=0.3,
55
- max_tokens=700
56
- )
57
- return response.choices[0].message.content.strip()
58
- except Exception as e:
59
- return f"[Agent Error: {e}]"
60
 
61
- def run_and_submit_all():
62
- profile_username = os.getenv("HF_USER") or "anonymous"
63
- space_id = os.getenv("SPACE_ID")
 
 
 
 
 
 
 
 
 
64
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
65
-
66
- api_url = DEFAULT_API_URL
67
- questions_url = f"{api_url}/questions"
68
- submit_url = f"{api_url}/submit"
69
 
70
- try:
71
- agent = ToolEnhancedAgent()
72
- except Exception as e:
73
- return f"Failed to initialize agent: {e}", pd.DataFrame()
74
 
 
75
  try:
76
- res = requests.get(questions_url, timeout=20)
77
- res.raise_for_status()
78
- questions_data = res.json()
 
79
  except Exception as e:
80
- return f"Error fetching questions: {e}", pd.DataFrame()
81
 
 
82
  answers_payload = []
83
- log = []
84
-
85
- for q in questions_data:
86
  task_id = q.get("task_id")
87
- question = q.get("question")
88
- if not task_id or not question:
89
  continue
90
  try:
91
- answer = agent(question)
92
  except Exception as e:
93
- answer = f"[Error]: {e}"
94
- answers_payload.append({"task_id": task_id, "submitted_answer": answer})
95
- log.append({"Task ID": task_id, "Question": question, "Submitted Answer": answer})
96
 
97
- if not answers_payload:
98
- return "No answers submitted.", pd.DataFrame(log)
 
99
 
 
100
  submission = {
101
- "username": profile_username,
102
  "agent_code": agent_code,
103
- "answers": answers_payload
104
  }
105
 
106
  try:
107
- res = requests.post(submit_url, json=submission, timeout=30)
108
  res.raise_for_status()
109
  result = res.json()
110
- msg = (
111
- f"βœ… Submission Successful!\n"
112
- f"User: {result.get('username')}\n"
113
- f"Score: {result.get('score')}%\n"
114
- f"Correct: {result.get('correct_count')}/{result.get('total_attempted')}\n"
115
- f"Message: {result.get('message')}"
116
  )
117
- return msg, pd.DataFrame(log)
118
  except Exception as e:
119
- return f"❌ Submission Failed: {e}", pd.DataFrame(log)
120
 
121
- with gr.Blocks() as demo:
122
- gr.Markdown("# πŸ” GAIA Agent Evaluator")
123
- gr.Markdown("Log in, then click the button to evaluate your agent.")
124
- run_button = gr.Button("▢️ Run Evaluation & Submit All Answers")
125
- status = gr.Textbox(label="Status", lines=4)
126
- table = gr.DataFrame(label="Agent Logs")
127
 
128
- run_button.click(fn=run_and_submit_all, outputs=[status, table])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
  if __name__ == "__main__":
131
- print("Launching GAIA App...")
132
- demo.launch()
 
 
 
 
 
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
 
 
5
 
6
+ # --- Constants ---
7
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
8
 
9
+ # --- Agent Definition (replace with your own logic) ---
10
+ class BasicAgent:
11
  def __init__(self):
12
+ print("βœ… BasicAgent initialized.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  def __call__(self, question: str) -> str:
15
+ print(f"πŸ“₯ Question received: {question[:60]}...")
16
+ return "Paris" if "capital of France" in question else "42"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ # --- Evaluation Function ---
19
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
20
+ space_id = os.getenv("SPACE_ID") or "your-username/your-space" # fallback
21
+
22
+ if not profile:
23
+ return "⚠️ Please log in to Hugging Face to submit.", None
24
+
25
+ username = profile.username
26
+ print(f"πŸ” Logged in as: {username}")
27
+
28
+ questions_url = f"{DEFAULT_API_URL}/questions"
29
+ submit_url = f"{DEFAULT_API_URL}/submit"
30
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
 
 
 
 
31
 
32
+ # Instantiate your agent
33
+ agent = BasicAgent()
 
 
34
 
35
+ # Fetch questions
36
  try:
37
+ response = requests.get(questions_url, timeout=15)
38
+ response.raise_for_status()
39
+ questions = response.json()
40
+ print(f"πŸ“¦ {len(questions)} questions fetched.")
41
  except Exception as e:
42
+ return f"❌ Failed to fetch questions: {e}", None
43
 
44
+ # Process answers
45
  answers_payload = []
46
+ logs = []
47
+ for q in questions:
 
48
  task_id = q.get("task_id")
49
+ question_text = q.get("question")
50
+ if not task_id or not question_text:
51
  continue
52
  try:
53
+ answer = agent(question_text)
54
  except Exception as e:
55
+ answer = f"AGENT ERROR: {e}"
 
 
56
 
57
+ answers_payload.append({"task_id": task_id, "submitted_answer": answer})
58
+ logs.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": answer})
59
+ print(f"βœ… Task ID: {task_id} | Answer: {answer}")
60
 
61
+ # Submit answers
62
  submission = {
63
+ "username": username,
64
  "agent_code": agent_code,
65
+ "answers": answers_payload,
66
  }
67
 
68
  try:
69
+ res = requests.post(submit_url, json=submission, timeout=60)
70
  res.raise_for_status()
71
  result = res.json()
72
+ status = (
73
+ f"πŸŽ‰ Submission Successful!\n"
74
+ f"User: {result.get('username', 'N/A')}\n"
75
+ f"Score: {result.get('score', 0)}%\n"
76
+ f"Correct: {result.get('correct_count', 0)}/{result.get('total_attempted', 0)}\n"
77
+ f"Message: {result.get('message', '')}"
78
  )
 
79
  except Exception as e:
80
+ status = f"❌ Submission failed: {e}"
81
 
82
+ return status, pd.DataFrame(logs)
 
 
 
 
 
83
 
84
+ # --- Gradio UI ---
85
+ with gr.Blocks() as demo:
86
+ gr.Markdown("# πŸ€– GAIA Evaluation Agent")
87
+ gr.Markdown("""
88
+ 1. Log in with your Hugging Face account below.
89
+ 2. Click the button to evaluate and submit answers.
90
+ 3. Your score and submission details will appear below.
91
+ """)
92
+
93
+ profile_input = gr.OAuthProfile()
94
+ run_button = gr.Button("πŸš€ Run Evaluation & Submit All Answers")
95
+ status_output = gr.Textbox(label="Submission Result", lines=6, interactive=False)
96
+ results_table = gr.DataFrame(label="Answer Log")
97
+
98
+ run_button.click(
99
+ fn=run_and_submit_all,
100
+ inputs=[profile_input],
101
+ outputs=[status_output, results_table]
102
+ )
103
 
104
  if __name__ == "__main__":
105
+ print("\n================== GAIA Agent App Starting ==================")
106
+ if os.getenv("SPACE_ID"):
107
+ print(f"πŸ“¦ SPACE_ID = {os.getenv('SPACE_ID')}")
108
+ else:
109
+ print("ℹ️ No SPACE_ID found. Using fallback link.")
110
+
111
+ demo.launch(debug=True)