Commit
Β·
1d11ffb
1
Parent(s):
2f64f8a
Update latest data
Browse filesThis view is limited to 50 files because it contains too many changes. Β
See raw diff
- create_leaderboard.py +4 -1
- data/data_viewer.jsonl +2 -2
- data/fact_results/claude-3-5-sonnet-with-search/fact_result.txt +3 -0
- data/fact_results/claude-3-7-sonnet-with-search/fact_result.txt +3 -0
- data/fact_results/doubao-deepresearch/fact_result.txt +3 -0
- data/fact_results/gemini-2.5-flash-preview-04-17/fact_result.txt +1 -0
- data/fact_results/gemini-2.5-pro-deepresearch/fact_result.txt +3 -0
- data/fact_results/gemini-2.5-pro-preview-05-06/fact_result.txt +1 -0
- data/fact_results/gpt-4.1-mini/fact_result.txt +3 -0
- data/fact_results/gpt-4.1/fact_result.txt +3 -0
- data/fact_results/gpt-4o-mini-search-preview/fact_result.txt +3 -0
- data/fact_results/gpt-4o-search-preview/fact_result.txt +3 -0
- data/fact_results/grok-deeper-search/fact_result.txt +3 -0
- data/fact_results/openai-deepresearch/fact_result.txt +3 -0
- data/fact_results/perplexity-Research/fact_result.txt +3 -0
- data/fact_results/sonar-pro/fact_result.txt +3 -0
- data/fact_results/sonar-reasoning-pro/fact_result.txt +3 -0
- data/fact_results/sonar-reasoning/fact_result.txt +3 -0
- data/fact_results/sonar/fact_result.txt +3 -0
- data/leaderboard.csv +19 -16
- data/raw_data/claude-research.jsonl +3 -0
- data/raw_data/doubao-deepresearch.jsonl +3 -0
- data/raw_data/{gemini-2.5-flash-with-grounding.jsonl β gemini-2.5-flash-preview-04-17.jsonl} +0 -0
- data/raw_data/gemini-2.5-pro-deepresearch.jsonl +2 -2
- data/raw_data/{gemini-2.5-pro-with-grounding.jsonl β gemini-2.5-pro-preview-05-06.jsonl} +0 -0
- data/raw_data/{gpt-4.1-mini-with-search.jsonl β gpt-4.1-mini.jsonl} +0 -0
- data/raw_data/{gpt-4.1-with-search.jsonl β gpt-4.1.jsonl} +2 -2
- data/raw_data/grok-deeper-search.jsonl +2 -2
- data/raw_data/kimi-researcher.jsonl +3 -0
- data/raw_data/openai-deepresearch.jsonl +2 -2
- data/raw_data/perplexity-Research.jsonl +2 -2
- data/raw_data/{perplexity-sonar-pro.jsonl β sonar-pro.jsonl} +1 -1
- data/raw_data/{perplexity-sonar-reasoning-pro.jsonl β sonar-reasoning-pro.jsonl} +1 -1
- data/raw_data/{perplexity-sonar-reasoning.jsonl β sonar-reasoning.jsonl} +0 -0
- data/raw_data/{perplexity-sonar.jsonl β sonar.jsonl} +1 -1
- data/raw_results/claude-3-5-sonnet-with-search.jsonl +0 -3
- data/raw_results/claude-3-5-sonnet-with-search/race_result.txt +5 -0
- data/raw_results/claude-3-5-sonnet-with-search/raw_results.jsonl +3 -0
- data/raw_results/claude-3-7-sonnet-with-search.jsonl +0 -3
- data/raw_results/claude-3-7-sonnet-with-search/race_result.txt +5 -0
- data/raw_results/claude-3-7-sonnet-with-search/raw_results.jsonl +3 -0
- data/raw_results/claude-research/race_result.txt +5 -0
- data/raw_results/claude-research/raw_results.jsonl +3 -0
- data/raw_results/doubao-deepresearch/race_result.txt +5 -0
- data/raw_results/doubao-deepresearch/raw_results.jsonl +3 -0
- data/raw_results/gemini-2.5-flash-preview-04-17/race_result.txt +5 -0
- data/raw_results/gemini-2.5-flash-preview-04-17/raw_results.jsonl +3 -0
- data/raw_results/gemini-2.5-flash-with-grounding.jsonl +0 -3
- data/raw_results/gemini-2.5-pro-deepresearch.jsonl +0 -3
- data/raw_results/gemini-2.5-pro-deepresearch/race_result.txt +5 -0
create_leaderboard.py
CHANGED
@@ -66,7 +66,10 @@ with gr.Blocks(title="DeepResearch Bench") as demo:
|
|
66 |
<a href="https://deepresearch-bench.github.io" target="_blank">Website</a> |
|
67 |
<a href="https://arxiv.org/abs/2506.11763" target="_blank">Paper</a> |
|
68 |
<a href="#" target="_blank">Eval Dataset</a> |
|
69 |
-
Total models:
|
|
|
|
|
|
|
70 |
</div>
|
71 |
""")
|
72 |
|
|
|
66 |
<a href="https://deepresearch-bench.github.io" target="_blank">Website</a> |
|
67 |
<a href="https://arxiv.org/abs/2506.11763" target="_blank">Paper</a> |
|
68 |
<a href="#" target="_blank">Eval Dataset</a> |
|
69 |
+
Total models: 19 | Last Update: 29 Dec 2024<br>
|
70 |
+
<small style="color: #666; font-size: 0.9em;">
|
71 |
+
Race judge model: gemini-2.5-pro | Fact-checking models: gemini-2.5-flash
|
72 |
+
</small>
|
73 |
</div>
|
74 |
""")
|
75 |
|
data/data_viewer.jsonl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:647067a9eec626525fa41f257123b5b35f9daf6e9862467e9dc259f987ce621f
|
3 |
+
size 40834049
|
data/fact_results/claude-3-5-sonnet-with-search/fact_result.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
total_citations: 9.94
|
2 |
+
total_valid_citations: 9.35
|
3 |
+
valid_rate: 0.9406438631790744
|
data/fact_results/claude-3-7-sonnet-with-search/fact_result.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
total_citations: 28.07
|
2 |
+
total_valid_citations: 24.51
|
3 |
+
valid_rate: 0.8731742073387959
|
data/fact_results/doubao-deepresearch/fact_result.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
citations_per_task: 99.5510
|
2 |
+
supported_per_task: 52.6224
|
3 |
+
valid_rate: 0.5286
|
data/fact_results/gemini-2.5-flash-preview-04-17/fact_result.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
No tasks with valid results.
|
data/fact_results/gemini-2.5-pro-deepresearch/fact_result.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
citations_per_task: 211.1616
|
2 |
+
supported_per_task: 165.3434
|
3 |
+
valid_rate: 0.7830
|
data/fact_results/gemini-2.5-pro-preview-05-06/fact_result.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
No tasks with valid results.
|
data/fact_results/gpt-4.1-mini/fact_result.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
total_citations: 4.85
|
2 |
+
total_valid_citations: 4.1
|
3 |
+
valid_rate: 0.845360824742268
|
data/fact_results/gpt-4.1/fact_result.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
total_citations: 4.746987951807229
|
2 |
+
total_valid_citations: 4.265060240963855
|
3 |
+
valid_rate: 0.8984771573604061
|
data/fact_results/gpt-4o-mini-search-preview/fact_result.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
total_citations: 5.651162790697675
|
2 |
+
total_valid_citations: 4.616279069767442
|
3 |
+
valid_rate: 0.8168724279835391
|
data/fact_results/gpt-4o-search-preview/fact_result.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
total_citations: 5.825581395348837
|
2 |
+
total_valid_citations: 5.046511627906977
|
3 |
+
valid_rate: 0.8662674650698603
|
data/fact_results/grok-deeper-search/fact_result.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
total_citations: 11.741935483870968
|
2 |
+
total_valid_citations: 8.580645161290322
|
3 |
+
valid_rate: 0.7307692307692307
|
data/fact_results/openai-deepresearch/fact_result.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
total_citations: 53.04040404040404
|
2 |
+
total_valid_citations: 39.78787878787879
|
3 |
+
valid_rate: 0.7501428299371549
|
data/fact_results/perplexity-Research/fact_result.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
total_citations: 37.76
|
2 |
+
total_valid_citations: 31.2
|
3 |
+
valid_rate: 0.826271186440678
|
data/fact_results/sonar-pro/fact_result.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
total_citations: 21.01
|
2 |
+
total_valid_citations: 16.75
|
3 |
+
valid_rate: 0.7972394098048549
|
data/fact_results/sonar-reasoning-pro/fact_result.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
total_citations: 20.78
|
2 |
+
total_valid_citations: 9.39
|
3 |
+
valid_rate: 0.45187680461982677
|
data/fact_results/sonar-reasoning/fact_result.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
total_citations: 25.43
|
2 |
+
total_valid_citations: 13.37
|
3 |
+
valid_rate: 0.525756979944947
|
data/fact_results/sonar/fact_result.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
total_citations: 13.97872340425532
|
2 |
+
total_valid_citations: 10.680851063829786
|
3 |
+
valid_rate: 0.7640791476407914
|
data/leaderboard.csv
CHANGED
@@ -1,17 +1,20 @@
|
|
1 |
model,overall_score,comprehensiveness,insight,instruction_following,readability,citation_accuracy,effective_citations
|
2 |
-
gemini-2.5-pro-deepresearch,48.
|
3 |
-
openai-deepresearch,46.
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
perplexity-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
gpt-4.1
|
17 |
-
|
|
|
|
|
|
|
|
1 |
model,overall_score,comprehensiveness,insight,instruction_following,readability,citation_accuracy,effective_citations
|
2 |
+
gemini-2.5-pro-deepresearch,48.92,48.45,48.30,49.29,49.77,78.30,165.34
|
3 |
+
openai-deepresearch,46.45,46.46,43.73,49.39,47.22,75.01,39.79
|
4 |
+
claude-research,45.00,45.34,42.79,47.58,44.66,-,-
|
5 |
+
doubao-deepresearch,44.34,44.84,40.56,47.95,44.69,52.86,52.62
|
6 |
+
kimi-researcher,42.69,42.82,39.40,45.30,44.68,-,-
|
7 |
+
perplexity-Research,40.46,39.10,35.65,46.11,43.08,82.63,31.20
|
8 |
+
grok-deeper-search,38.22,36.08,30.89,46.59,42.17,73.08,8.58
|
9 |
+
sonar-reasoning-pro,37.76,34.96,31.65,44.93,42.42,45.19,9.39
|
10 |
+
sonar-reasoning,37.75,34.73,32.59,44.42,42.39,52.58,13.37
|
11 |
+
claude-3-7-sonnet-with-search,36.63,35.95,31.29,44.05,36.07,87.32,24.51
|
12 |
+
sonar-pro,36.19,33.92,29.69,43.39,41.07,79.72,16.75
|
13 |
+
gemini-2.5-pro-preview-05-06,31.90,31.75,24.61,40.24,32.76,-,-
|
14 |
+
gpt-4o-search-preview,30.74,27.81,20.44,41.01,37.60,86.63,5.05
|
15 |
+
sonar,30.64,27.14,21.62,40.70,37.46,76.41,10.68
|
16 |
+
gpt-4.1,29.31,25.59,18.42,40.63,36.49,89.85,4.27
|
17 |
+
gemini-2.5-flash-preview-04-17,29.19,28.97,21.62,37.80,29.97,-,-
|
18 |
+
gpt-4o-mini-search-preview,27.62,24.24,16.62,38.59,35.27,81.69,4.62
|
19 |
+
gpt-4.1-mini,26.62,22.86,15.39,38.18,34.49,84.54,4.10
|
20 |
+
claude-3-5-sonnet-with-search,23.95,21.28,16.20,32.41,29.87,94.06,9.35
|
data/raw_data/claude-research.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:300f8dbc8242a5852bbe44098403f35fac1e4136e2274c93f0a3d659fee00d7f
|
3 |
+
size 1513379
|
data/raw_data/doubao-deepresearch.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b9512cedf730486da486ba9d0ec305213ca8176bd00b93da788367f090717f2
|
3 |
+
size 7451876
|
data/raw_data/{gemini-2.5-flash-with-grounding.jsonl β gemini-2.5-flash-preview-04-17.jsonl}
RENAMED
File without changes
|
data/raw_data/gemini-2.5-pro-deepresearch.jsonl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3705106cc42a38b7c7bcd90e42aaec7f688a0d52179329e3074fcda99ea544e7
|
3 |
+
size 8523153
|
data/raw_data/{gemini-2.5-pro-with-grounding.jsonl β gemini-2.5-pro-preview-05-06.jsonl}
RENAMED
File without changes
|
data/raw_data/{gpt-4.1-mini-with-search.jsonl β gpt-4.1-mini.jsonl}
RENAMED
File without changes
|
data/raw_data/{gpt-4.1-with-search.jsonl β gpt-4.1.jsonl}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a7611ff4a5cd8824aa711c9759d7dff02a990a57fbb36d699442484431bd5662
|
3 |
+
size 492409
|
data/raw_data/grok-deeper-search.jsonl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f14c64de8c22d66b5a1c08af0cb0d829d9a4b671378a2952230b2219d258f0ba
|
3 |
+
size 1149833
|
data/raw_data/kimi-researcher.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:868ac817d88b63a7a253ecf4439b85205ad1c49f2879f4b46f1a9a34d6cf804f
|
3 |
+
size 3773315
|
data/raw_data/openai-deepresearch.jsonl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a9dbbf7f18d8c985bc4d4f450089eb4bb73e77dbf7168a1bb4c81f811e06d84
|
3 |
+
size 6903838
|
data/raw_data/perplexity-Research.jsonl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a3b855862c99f108abf97b9e402b43eb4d3376c3ec93c2e0a9c871b70d0736e
|
3 |
+
size 1747879
|
data/raw_data/{perplexity-sonar-pro.jsonl β sonar-pro.jsonl}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 750234
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:39dbb2c1eae9fe1bc32abaedebe75bbc643ba18ec25e3360726ffb9d514c52ec
|
3 |
size 750234
|
data/raw_data/{perplexity-sonar-reasoning-pro.jsonl β sonar-reasoning-pro.jsonl}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 495156
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ffa11b21375455127385b57c69442a2772aa96f0d5b410e274d994eacf920c00
|
3 |
size 495156
|
data/raw_data/{perplexity-sonar-reasoning.jsonl β sonar-reasoning.jsonl}
RENAMED
File without changes
|
data/raw_data/{perplexity-sonar.jsonl β sonar.jsonl}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 574856
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:96626a86dca8c67848e8ce9e71e76c4cddec7066ba209e2018d13781ad23f17f
|
3 |
size 574856
|
data/raw_results/claude-3-5-sonnet-with-search.jsonl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:c0c47d1bab126886420bd53bb41a8905cdfb97f105711bcc2f5a27e3d53652ea
|
3 |
-
size 1992421
|
|
|
|
|
|
|
|
data/raw_results/claude-3-5-sonnet-with-search/race_result.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Comprehensiveness: 0.2128
|
2 |
+
Insight: 0.1620
|
3 |
+
Instruction Following: 0.3241
|
4 |
+
Readability: 0.2987
|
5 |
+
Overall Score: 0.2395
|
data/raw_results/claude-3-5-sonnet-with-search/raw_results.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0423e9b7888c1070e43272284414d91deb490e38d48d0f7d6afc004db6fea291
|
3 |
+
size 52686
|
data/raw_results/claude-3-7-sonnet-with-search.jsonl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:2b3a6bf74400c89d24fa47853ab034ed3696ee0694c2d190ba83c3f5dcd8a0ef
|
3 |
-
size 2002379
|
|
|
|
|
|
|
|
data/raw_results/claude-3-7-sonnet-with-search/race_result.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Comprehensiveness: 0.3595
|
2 |
+
Insight: 0.3129
|
3 |
+
Instruction Following: 0.4405
|
4 |
+
Readability: 0.3607
|
5 |
+
Overall Score: 0.3663
|
data/raw_results/claude-3-7-sonnet-with-search/raw_results.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0fde53e238b0f658ccc09d1efb7d6a13e276cd9d67e9e722adc5531dc1561853
|
3 |
+
size 52517
|
data/raw_results/claude-research/race_result.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Comprehensiveness: 0.4534
|
2 |
+
Insight: 0.4279
|
3 |
+
Instruction Following: 0.4758
|
4 |
+
Readability: 0.4466
|
5 |
+
Overall Score: 0.4500
|
data/raw_results/claude-research/raw_results.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb6616b89c3b38eb5f4822cdf82326648652c3fb19b5aca5ada99552cba4f529
|
3 |
+
size 52454
|
data/raw_results/doubao-deepresearch/race_result.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Comprehensiveness: 0.4484
|
2 |
+
Insight: 0.4056
|
3 |
+
Instruction Following: 0.4795
|
4 |
+
Readability: 0.4469
|
5 |
+
Overall Score: 0.4434
|
data/raw_results/doubao-deepresearch/raw_results.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:732d86bd51bfb687821590ecc227cbce30663d309069e812d6e4bba103ca5e2a
|
3 |
+
size 51890
|
data/raw_results/gemini-2.5-flash-preview-04-17/race_result.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Comprehensiveness: 0.2897
|
2 |
+
Insight: 0.2162
|
3 |
+
Instruction Following: 0.3780
|
4 |
+
Readability: 0.2997
|
5 |
+
Overall Score: 0.2919
|
data/raw_results/gemini-2.5-flash-preview-04-17/raw_results.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ee20445df1bc4ca52e8c0de0d8f02d65e808df8f834b789fed749004e2bc3a4
|
3 |
+
size 52637
|
data/raw_results/gemini-2.5-flash-with-grounding.jsonl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:43b9f71819babb5c00f65f0dd71d707323fb803c585bd74976f49cdc34ab80aa
|
3 |
-
size 1951481
|
|
|
|
|
|
|
|
data/raw_results/gemini-2.5-pro-deepresearch.jsonl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:ac2fc53c99697e3276c98d735ed630df6fa49d2972c70a5409adc1958ecaa7b7
|
3 |
-
size 1937730
|
|
|
|
|
|
|
|
data/raw_results/gemini-2.5-pro-deepresearch/race_result.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Comprehensiveness: 0.4845
|
2 |
+
Insight: 0.4830
|
3 |
+
Instruction Following: 0.4929
|
4 |
+
Readability: 0.4977
|
5 |
+
Overall Score: 0.4892
|