daiqi commited on
Commit
539f705
·
verified ·
1 Parent(s): b2d3387

Update commit_results.jsonl

Browse files
Files changed (1) hide show
  1. commit_results.jsonl +0 -3
commit_results.jsonl CHANGED
@@ -177,6 +177,3 @@
177
  {"Score": 47.4, "Name": "MageBench", "BaseModel": "Random Baseline", "Env.": "Sokoban", "Target-research": "Model-Eval-Global", "Subset": "all", "Link": "xxx", "State": "Checked"}
178
  {"Score": 47.4, "Name": "MageBench", "BaseModel": "Random Baseline", "Env.": "Sokoban", "Target-research": "Model-Eval-Online", "Subset": "all", "Link": "xxx", "State": "Checked"}
179
  {"Score": 16.64, "Name": "MageBench", "BaseModel": "Random Baseline", "Env.": "Football", "Target-research": "Model-Eval-Online", "Subset": "all", "Link": "xxx", "State": "Checked"}
180
- {"Score": 94.53, "Name": "testing", "BaseModel": "testing", "Env": "Football", "Target-research": "Agent-Eval-Prompt", "Subset": "all", "Link": "testing", "State": "Checking"}
181
- {"Score": 91.53, "Name": "testing", "BaseModel": "testing", "Env": "WebUI", "Target-research": "Agent-Eval-Finetune", "Subset": "mini", "Link": "testing", "State": "Checking"}
182
- {"Score": 32.33, "Name": "testing", "BaseModel": "testing", "Env": "Sokoban", "Target-research": "Model-Eval-Online", "Subset": "mini", "Link": "testing", "State": "Checking"}
 
177
  {"Score": 47.4, "Name": "MageBench", "BaseModel": "Random Baseline", "Env.": "Sokoban", "Target-research": "Model-Eval-Global", "Subset": "all", "Link": "xxx", "State": "Checked"}
178
  {"Score": 47.4, "Name": "MageBench", "BaseModel": "Random Baseline", "Env.": "Sokoban", "Target-research": "Model-Eval-Online", "Subset": "all", "Link": "xxx", "State": "Checked"}
179
  {"Score": 16.64, "Name": "MageBench", "BaseModel": "Random Baseline", "Env.": "Football", "Target-research": "Model-Eval-Online", "Subset": "all", "Link": "xxx", "State": "Checked"}