Final_Assignment_Template

Sleeping

App Files Files Community

hasanain9 commited on May 26

Commit

d8dafef

verified ·

1 Parent(s): 177be6f

test3

Browse files

Files changed (1) hide show

app.py +67 -54

app.py CHANGED Viewed

@@ -5,20 +5,19 @@ import pandas as pd
 import gradio as gr
 from openai import OpenAI
-DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# -------- Tool-Enhanced Agent --------
 class ToolEnhancedAgent:
     def __init__(self):
         api_key = os.getenv("OPENAI_API_KEY")
         if not api_key:
-            raise ValueError("OPENAI_API_KEY is not set.")
         self.client = OpenAI(api_key=api_key)
-        print("✅ ToolEnhancedAgent initialized with GPT-4 + CoT + Tools.")
     def use_tool(self, tool_name: str, input_text: str) -> str:
         try:
             if tool_name == "calculator":
                 return str(eval(input_text))
             elif tool_name == "date":
                 return str(datetime.datetime.now().date())
@@ -31,117 +30,131 @@ class ToolEnhancedAgent:
     def search_wikipedia(self, query: str) -> str:
         try:
-            res = requests.get(f"https://en.wikipedia.org/api/rest_v1/page/summary/{query}")
             if res.status_code == 200:
                 return res.json().get("extract", "No summary found.")
             else:
                 return f"No Wikipedia summary found for '{query}'."
         except Exception as e:
-            return f"Wiki error: {e}"
     def __call__(self, question: str) -> str:
         prompt = (
-            "You are a helpful AI assistant. You can use tools (calculator, date, wikipedia). "
-            "Think step-by-step before answering.\n\n"
             f"Question: {question}\n"
-            "Answer (show thinking steps):"
         )
         try:
             response = self.client.chat.completions.create(
                 model="gpt-4",
                 messages=[
-                    {"role": "system", "content": "You are a smart assistant that uses tools and thinks step-by-step."},
                     {"role": "user", "content": prompt}
                 ],
                 temperature=0.3,
                 max_tokens=700,
             )
             answer = response.choices[0].message.content.strip()
-            print(f"📤 Agent response: {answer[:100]}...")
             return answer
         except Exception as e:
             print(f"[Agent Error]: {e}")
             return f"[Agent Error: {e}]"
-# -------- Evaluation & Submission Function --------
 def run_and_submit_all(profile: gr.OAuthProfile | None):
-    space_id = os.getenv("SPACE_ID")
     if not profile:
-        return "Please login with your Hugging Face account.", None
-    username = profile.username
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    questions_url = f"{DEFAULT_API_URL}/questions"
-    submit_url = f"{DEFAULT_API_URL}/submit"
     try:
         agent = ToolEnhancedAgent()
     except Exception as e:
-        return f"Agent init error: {e}", None
     try:
         response = requests.get(questions_url, timeout=15)
         response.raise_for_status()
         questions = response.json()
     except Exception as e:
-        return f"Failed to fetch questions: {e}", None
     results_log = []
-    answers_payload = []
     for item in questions:
         task_id = item.get("task_id")
         question_text = item.get("question")
-        if not task_id or not question_text:
             continue
         try:
             answer = agent(question_text)
         except Exception as e:
-            answer = f"[Agent Error: {e}]"
-        results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": answer})
-        answers_payload.append({"task_id": task_id, "submitted_answer": answer})
-    submission = {
         "username": username,
-        "agent_code": agent_code,
-        "answers": answers_payload,
     }
     try:
-        response = requests.post(submit_url, json=submission, timeout=60)
-        response.raise_for_status()
-        result = response.json()
-        status = (
-            f"✅ Submission Successful!\n"
-            f"User: {result.get('username')}\n"
-            f"Score: {result.get('score')}%\n"
-            f"Correct: {result.get('correct_count')}/{result.get('total_attempted')}\n"
-            f"Message: {result.get('message')}"
         )
-    except Exception as e:
-        status = f"❌ Submission failed: {e}"
-    return status, pd.DataFrame(results_log)
-# -------- Gradio Interface --------
 with gr.Blocks() as demo:
-    gr.Markdown("## 🤖 GAIA Agent Evaluation with ToolEnhancedAgent")
     gr.Markdown(
         """
-        - This Space lets you run your agent on GAIA benchmark.
-        - Login with your HF account, click "Run Evaluation", and wait for the results.
         """
     )
-    gr.LoginButton()
-    run_button = gr.Button("🚀 Run Evaluation & Submit")
-    status_output = gr.Textbox(label="Status / Score", lines=6, interactive=False)
-    table_output = gr.DataFrame(label="Agent Answers")
-    run_button.click(fn=run_and_submit_all, outputs=[status_output, table_output])
-# -------- Launch App --------
 if __name__ == "__main__":
-    print("✅ Launching GAIA Agent Evaluation App")
-    demo.launch(debug=True)

 import gradio as gr
 from openai import OpenAI
+# --- Agent yang memakai GPT-4 dengan Chain-of-Thought dan Tools ---
 class ToolEnhancedAgent:
     def __init__(self):
         api_key = os.getenv("OPENAI_API_KEY")
         if not api_key:
+            raise ValueError("OPENAI_API_KEY is not set in environment variables.")
         self.client = OpenAI(api_key=api_key)
+        print("ToolEnhancedAgent initialized with GPT-4 + CoT + Tools.")
     def use_tool(self, tool_name: str, input_text: str) -> str:
         try:
             if tool_name == "calculator":
+                # Hati-hati menggunakan eval, sebaiknya batasi ekspresi jika produksi
                 return str(eval(input_text))
             elif tool_name == "date":
                 return str(datetime.datetime.now().date())
     def search_wikipedia(self, query: str) -> str:
         try:
+            url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{query}"
+            res = requests.get(url)
             if res.status_code == 200:
                 return res.json().get("extract", "No summary found.")
             else:
                 return f"No Wikipedia summary found for '{query}'."
         except Exception as e:
+            return f"Wikipedia error: {e}"
     def __call__(self, question: str) -> str:
         prompt = (
+            "You are a helpful AI assistant. Use tools if needed. Think step-by-step before answering.\n"
             f"Question: {question}\n"
+            "Answer (show your thinking steps):"
         )
         try:
             response = self.client.chat.completions.create(
                 model="gpt-4",
                 messages=[
+                    {"role": "system", "content": "You are a smart assistant that can use tools and think step-by-step."},
                     {"role": "user", "content": prompt}
                 ],
                 temperature=0.3,
                 max_tokens=700,
             )
             answer = response.choices[0].message.content.strip()
+            print(f"Answer generated (truncated): {answer[:150]}...")
             return answer
         except Exception as e:
             print(f"[Agent Error]: {e}")
             return f"[Agent Error: {e}]"
+# --- Fungsi utama untuk fetch soal, jalankan agent, dan submit jawaban ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     if not profile:
+        return "Please login to Hugging Face to submit answers.", None
+    username = profile.username
+    api_url = "https://agents-course-unit4-scoring.hf.space"
+    questions_url = f"{api_url}/questions"
+    submit_url = f"{api_url}/submit"
     try:
         agent = ToolEnhancedAgent()
     except Exception as e:
+        return f"Error initializing agent: {e}", None
     try:
         response = requests.get(questions_url, timeout=15)
         response.raise_for_status()
         questions = response.json()
+        if not questions:
+            return "No questions fetched or invalid response.", None
     except Exception as e:
+        return f"Error fetching questions: {e}", None
+    answers = []
     results_log = []
     for item in questions:
         task_id = item.get("task_id")
         question_text = item.get("question")
+        if not task_id or question_text is None:
             continue
         try:
             answer = agent(question_text)
+            answers.append({"task_id": task_id, "submitted_answer": answer})
+            results_log.append({
+                "Task ID": task_id,
+                "Question": question_text,
+                "Submitted Answer": answer
+            })
         except Exception as e:
+            results_log.append({
+                "Task ID": task_id,
+                "Question": question_text,
+                "Submitted Answer": f"[Agent Error: {e}]"
+            })
+    if not answers:
+        return "Agent did not produce any answers.", pd.DataFrame(results_log)
+    agent_code_url = f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main"
+    submission_payload = {
         "username": username,
+        "agent_code": agent_code_url,
+        "answers": answers
     }
     try:
+        submit_response = requests.post(submit_url, json=submission_payload, timeout=60)
+        submit_response.raise_for_status()
+        result_data = submit_response.json()
+        final_status = (
+            f"Submission Successful!\n"
+            f"User: {result_data.get('username')}\n"
+            f"Score: {result_data.get('score', 'N/A')}%\n"
+            f"Correct: {result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')}\n"
+            f"Message: {result_data.get('message', 'No message received.')}"
         )
+        return final_status, pd.DataFrame(results_log)
+    except Exception as e:
+        return f"Submission failed: {e}", pd.DataFrame(results_log)
+# --- Gradio UI ---
 with gr.Blocks() as demo:
+    gr.Markdown("# Tool Enhanced Agent for GAIA Benchmark")
     gr.Markdown(
         """
+        1. Login with your Hugging Face account.
+        2. Click 'Run & Submit All Answers' to evaluate the agent on GAIA tasks.
+        3. View your score and answers below.
         """
     )
+    login = gr.LoginButton()
+    run_button = gr.Button("Run Evaluation & Submit All Answers")
+    status_output = gr.Textbox(label="Status / Submission Result", lines=7)
+    results_table = gr.DataFrame(label="Questions and Answers")
+    run_button.click(fn=run_and_submit_all, inputs=login, outputs=[status_output, results_table])
 if __name__ == "__main__":
+    demo.launch()