Chloe Anastasiades commited on
Commit
b36c3c5
·
unverified ·
1 Parent(s): abb9f0a

Map from new task names (#55)

Browse files
Files changed (1) hide show
  1. leaderboard_transformer.py +22 -0
leaderboard_transformer.py CHANGED
@@ -17,28 +17,50 @@ INFORMAL_TO_FORMAL_NAME_MAP = {
17
 
18
  # Validation Names
19
  "arxivdigestables_validation": "ArxivDIGESTables-Clean",
 
20
  "sqa_dev": "ScholarQA-CS2",
 
21
  "litqa2_validation": "LitQA2-FullText",
 
22
  "paper_finder_validation": "PaperFindingBench",
 
23
  "paper_finder_litqa2_validation": "LitQA2-FullText-Search",
 
24
  "discoverybench_validation": "DiscoveryBench",
 
25
  "core_bench_validation": "CORE-Bench-Hard",
 
26
  "ds1000_validation": "DS-1000",
 
27
  "e2e_discovery_validation": "E2E-Bench",
 
28
  "e2e_discovery_hard_validation": "E2E-Bench-Hard",
 
29
  "super_validation": "SUPER-Expert",
 
30
  # Test Names
31
  "paper_finder_test": "PaperFindingBench",
 
32
  "paper_finder_litqa2_test": "LitQA2-FullText-Search",
 
33
  "sqa_test": "ScholarQA-CS2",
 
34
  "arxivdigestables_test": "ArxivDIGESTables-Clean",
 
35
  "litqa2_test": "LitQA2-FullText",
 
36
  "discoverybench_test": "DiscoveryBench",
 
37
  "core_bench_test": "CORE-Bench-Hard",
 
38
  "ds1000_test": "DS-1000",
 
39
  "e2e_discovery_test": "E2E-Bench",
 
40
  "e2e_discovery_hard_test": "E2E-Bench-Hard",
 
41
  "super_test": "SUPER-Expert",
 
42
  }
43
  ORDER_MAP = {
44
  'Overall_keys': [
 
17
 
18
  # Validation Names
19
  "arxivdigestables_validation": "ArxivDIGESTables-Clean",
20
+ "ArxivDIGESTables_Clean_validation": "ArxivDIGESTables-Clean",
21
  "sqa_dev": "ScholarQA-CS2",
22
+ "ScholarQA_CS2_validation": "ScholarQA-CS2",
23
  "litqa2_validation": "LitQA2-FullText",
24
+ "LitQA2_FullText_validation": "LitQA2-FullText",
25
  "paper_finder_validation": "PaperFindingBench",
26
+ "PaperFindingBench_validation": "PaperFindingBench",
27
  "paper_finder_litqa2_validation": "LitQA2-FullText-Search",
28
+ "LitQA2_FullText_Search_validation": "LitQA2-FullText-Search",
29
  "discoverybench_validation": "DiscoveryBench",
30
+ "DiscoveryBench_validation": "DiscoveryBench",
31
  "core_bench_validation": "CORE-Bench-Hard",
32
+ "CORE_Bench_Hard_validation": "CORE-Bench-Hard",
33
  "ds1000_validation": "DS-1000",
34
+ "DS_1000_validation": "DS-1000",
35
  "e2e_discovery_validation": "E2E-Bench",
36
+ "E2E_Bench_validation": "E2E-Bench",
37
  "e2e_discovery_hard_validation": "E2E-Bench-Hard",
38
+ "E2E_Bench_Hard_validation": "E2E-Bench-Hard",
39
  "super_validation": "SUPER-Expert",
40
+ "SUPER_Expert_validation": "SUPER-Expert",
41
  # Test Names
42
  "paper_finder_test": "PaperFindingBench",
43
+ "PaperFindingBench_test": "PaperFindingBench",
44
  "paper_finder_litqa2_test": "LitQA2-FullText-Search",
45
+ "LitQA2_FullText_Search_test": "LitQA2-FullText-Search",
46
  "sqa_test": "ScholarQA-CS2",
47
+ "ScholarQA_CS2_test": "ScholarQA-CS2",
48
  "arxivdigestables_test": "ArxivDIGESTables-Clean",
49
+ "ArxivDIGESTables_Clean_test": "ArxivDIGESTables-Clean",
50
  "litqa2_test": "LitQA2-FullText",
51
+ "LitQA2_FullText_test": "LitQA2-FullText",
52
  "discoverybench_test": "DiscoveryBench",
53
+ "DiscoveryBench_test": "DiscoveryBench",
54
  "core_bench_test": "CORE-Bench-Hard",
55
+ "CORE_Bench_Hard_test": "CORE-Bench-Hard",
56
  "ds1000_test": "DS-1000",
57
+ "DS_1000_test": "DS-1000",
58
  "e2e_discovery_test": "E2E-Bench",
59
+ "E2E_Bench_test": "E2E-Bench",
60
  "e2e_discovery_hard_test": "E2E-Bench-Hard",
61
+ "E2E_Bench_Hard_test": "E2E-Bench-Hard",
62
  "super_test": "SUPER-Expert",
63
+ "SUPER_Expert_test": "SUPER-Expert",
64
  }
65
  ORDER_MAP = {
66
  'Overall_keys': [