Spaces:
Sleeping
Sleeping
test4
Browse files
app.py
CHANGED
@@ -3,11 +3,11 @@ import gradio as gr
|
|
3 |
import requests
|
4 |
import openai
|
5 |
|
6 |
-
# Setup OpenAI API Key
|
7 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
8 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
9 |
|
10 |
-
#
|
11 |
class GPTAgent:
|
12 |
def __init__(self, model="gpt-4"):
|
13 |
self.model = model
|
@@ -19,7 +19,7 @@ class GPTAgent:
|
|
19 |
messages=[
|
20 |
{
|
21 |
"role": "system",
|
22 |
-
"content": "You are a helpful assistant that
|
23 |
},
|
24 |
{"role": "user", "content": question},
|
25 |
],
|
@@ -30,30 +30,30 @@ class GPTAgent:
|
|
30 |
except Exception as e:
|
31 |
return f"ERROR: {str(e)}"
|
32 |
|
33 |
-
#
|
34 |
-
def run_and_submit_all(
|
35 |
-
username = getattr(
|
36 |
space_id = os.getenv("SPACE_ID", "anonymous/Final_Assignment_Template")
|
37 |
|
38 |
try:
|
39 |
agent = GPTAgent()
|
40 |
except Exception as e:
|
41 |
-
return f"β Agent
|
42 |
|
43 |
try:
|
44 |
questions = requests.get(f"{DEFAULT_API_URL}/questions", timeout=10).json()
|
45 |
except Exception as e:
|
46 |
-
return f"β
|
47 |
|
48 |
answers = []
|
49 |
logs = []
|
50 |
|
51 |
-
for
|
52 |
task_id = q.get("task_id")
|
53 |
question = q.get("question")
|
54 |
answer = agent(question)
|
55 |
answers.append({"task_id": task_id, "answer": answer})
|
56 |
-
logs.append(f"### Q{
|
57 |
|
58 |
try:
|
59 |
result = requests.post(
|
@@ -75,16 +75,16 @@ def run_and_submit_all(profile):
|
|
75 |
|
76 |
return summary, "\n\n".join(logs)
|
77 |
|
78 |
-
#
|
79 |
with gr.Blocks() as demo:
|
80 |
-
gr.Markdown("#
|
81 |
|
82 |
-
|
83 |
-
|
84 |
-
|
|
|
|
|
85 |
|
86 |
-
run_button
|
87 |
|
88 |
-
|
89 |
-
|
90 |
-
demo.launch()
|
|
|
3 |
import requests
|
4 |
import openai
|
5 |
|
6 |
+
# Setup OpenAI API Key
|
7 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
8 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
9 |
|
10 |
+
# Agent GPT
|
11 |
class GPTAgent:
|
12 |
def __init__(self, model="gpt-4"):
|
13 |
self.model = model
|
|
|
19 |
messages=[
|
20 |
{
|
21 |
"role": "system",
|
22 |
+
"content": "You are a helpful assistant that only replies with A, B, C, or D.",
|
23 |
},
|
24 |
{"role": "user", "content": question},
|
25 |
],
|
|
|
30 |
except Exception as e:
|
31 |
return f"ERROR: {str(e)}"
|
32 |
|
33 |
+
# Evaluation Function
|
34 |
+
def run_and_submit_all(user_profile):
|
35 |
+
username = getattr(user_profile, "username", "anonymous")
|
36 |
space_id = os.getenv("SPACE_ID", "anonymous/Final_Assignment_Template")
|
37 |
|
38 |
try:
|
39 |
agent = GPTAgent()
|
40 |
except Exception as e:
|
41 |
+
return f"β Agent error: {e}", ""
|
42 |
|
43 |
try:
|
44 |
questions = requests.get(f"{DEFAULT_API_URL}/questions", timeout=10).json()
|
45 |
except Exception as e:
|
46 |
+
return f"β Could not fetch questions: {e}", ""
|
47 |
|
48 |
answers = []
|
49 |
logs = []
|
50 |
|
51 |
+
for idx, q in enumerate(questions, 1):
|
52 |
task_id = q.get("task_id")
|
53 |
question = q.get("question")
|
54 |
answer = agent(question)
|
55 |
answers.append({"task_id": task_id, "answer": answer})
|
56 |
+
logs.append(f"### Q{idx} - {task_id}\n{question}\n**Answer:** {answer}\n")
|
57 |
|
58 |
try:
|
59 |
result = requests.post(
|
|
|
75 |
|
76 |
return summary, "\n\n".join(logs)
|
77 |
|
78 |
+
# Gradio UI
|
79 |
with gr.Blocks() as demo:
|
80 |
+
gr.Markdown("# π§ GAIA Benchmark Agent\nLogin with your Hugging Face account to begin.")
|
81 |
|
82 |
+
login_btn = gr.LoginButton()
|
83 |
+
profile_output = gr.OAuthProfile()
|
84 |
+
run_button = gr.Button("βΆ Run Evaluation")
|
85 |
+
summary = gr.Textbox(label="π Result", lines=2)
|
86 |
+
log_output = gr.Textbox(label="π Log", lines=20, show_copy_button=True)
|
87 |
|
88 |
+
run_button.click(fn=run_and_submit_all, inputs=[profile_output], outputs=[summary, log_output])
|
89 |
|
90 |
+
demo.launch()
|
|
|
|