Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
Β·
99f5740
1
Parent(s):
97493e3
added deepseek v3
Browse files- output/{mistral-small-2501 β deepseek-v3-0324}/BFCL_v3_irrelevance.parquet +2 -2
- output/{mistral-small-2501 β deepseek-v3-0324}/BFCL_v3_multi_turn_base_multi_func_call.parquet +2 -2
- output/{mistral-small-2501 β deepseek-v3-0324}/BFCL_v3_multi_turn_base_single_func_call.parquet +2 -2
- output/{mistral-small-2501 β deepseek-v3-0324}/BFCL_v3_multi_turn_composite.parquet +2 -2
- output/{mistral-small-2501 β deepseek-v3-0324}/BFCL_v3_multi_turn_long_context.parquet +2 -2
- output/{mistral-small-2501 β deepseek-v3-0324}/BFCL_v3_multi_turn_miss_func.parquet +2 -2
- output/{mistral-small-2501 β deepseek-v3-0324}/BFCL_v3_multi_turn_miss_param.parquet +2 -2
- output/{mistral-small-2501 β deepseek-v3-0324}/tau_long_context.parquet +2 -2
- output/{mistral-small-2501 β deepseek-v3-0324}/toolace_single_func_call_1.parquet +2 -2
- output/{mistral-small-2501 β deepseek-v3-0324}/toolace_single_func_call_2.parquet +2 -2
- output/{mistral-small-2501 β deepseek-v3-0324}/xlam_multiple_tool_multiple_call.parquet +2 -2
- output/{mistral-small-2501 β deepseek-v3-0324}/xlam_multiple_tool_single_call.parquet +2 -2
- output/{mistral-small-2501 β deepseek-v3-0324}/xlam_single_tool_multiple_call.parquet +2 -2
- output/{mistral-small-2501 β deepseek-v3-0324}/xlam_single_tool_single_call.parquet +2 -2
- output/{mistral-small-2501 β deepseek-v3-0324}/xlam_tool_miss.parquet +2 -2
- results.csv +2 -2
output/{mistral-small-2501 β deepseek-v3-0324}/BFCL_v3_irrelevance.parquet
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16c171dbd70a28d968bb83f7ac43fafd1c12a1ffc802954fc72146bc598cbf43
|
3 |
+
size 40093
|
output/{mistral-small-2501 β deepseek-v3-0324}/BFCL_v3_multi_turn_base_multi_func_call.parquet
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:471746761be82f67ebebbc11a8e8c5fd5427936db12199c45faa3201d01f337b
|
3 |
+
size 24620
|
output/{mistral-small-2501 β deepseek-v3-0324}/BFCL_v3_multi_turn_base_single_func_call.parquet
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ce1f1a0fac62757b0f03919a59035265cd1ca3c25278726845ef443eb6029c0
|
3 |
+
size 22360
|
output/{mistral-small-2501 β deepseek-v3-0324}/BFCL_v3_multi_turn_composite.parquet
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd94ebc610db212275776f7ebbbaac4fc528c6e2c52021dc2a58a1217613c607
|
3 |
+
size 52333
|
output/{mistral-small-2501 β deepseek-v3-0324}/BFCL_v3_multi_turn_long_context.parquet
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:031850c4fd5f9197ec6027c969e27a0f0f6603088c5dbcad84cdaf3560a48346
|
3 |
+
size 40007
|
output/{mistral-small-2501 β deepseek-v3-0324}/BFCL_v3_multi_turn_miss_func.parquet
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a2fec559eb8c92889663a1dbb3d49f834bf796eb440d54442d8cc0bfb2b8f25
|
3 |
+
size 45887
|
output/{mistral-small-2501 β deepseek-v3-0324}/BFCL_v3_multi_turn_miss_param.parquet
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2bbcbe9635848c867f6df3572e90129710898e4895860e08d451d543c49d9725
|
3 |
+
size 47661
|
output/{mistral-small-2501 β deepseek-v3-0324}/tau_long_context.parquet
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:283e28354dc9f0bb07db7b01b7728026c6bf2035dac0bef4bc0610e8b086fc50
|
3 |
+
size 45119
|
output/{mistral-small-2501 β deepseek-v3-0324}/toolace_single_func_call_1.parquet
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c99d6fdb22091d9ca86f52380f2dcab5dec0efc35ad1b16fa52ba361d1d9dcf
|
3 |
+
size 16730
|
output/{mistral-small-2501 β deepseek-v3-0324}/toolace_single_func_call_2.parquet
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bfbe6f97edc557377a812af97a440168d4a32bc05603c8fbd123fce73ef3949d
|
3 |
+
size 12012
|
output/{mistral-small-2501 β deepseek-v3-0324}/xlam_multiple_tool_multiple_call.parquet
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c720e550a70c958f4e95c55359352080c2f573414893b09d514924ff9ddbf372
|
3 |
+
size 107285
|
output/{mistral-small-2501 β deepseek-v3-0324}/xlam_multiple_tool_single_call.parquet
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff3f1422a96523108f0fbfecce164edcf784fe18c211f99ba46f85a332642459
|
3 |
+
size 40696
|
output/{mistral-small-2501 β deepseek-v3-0324}/xlam_single_tool_multiple_call.parquet
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ecc172015abfbd58879906a0fd91190785480405b25d14d20877cd0ef76ca25b
|
3 |
+
size 32717
|
output/{mistral-small-2501 β deepseek-v3-0324}/xlam_single_tool_single_call.parquet
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0fc6aae2e68efa98cbbabf82953f8b4b762911694d70cfef408d2b1fbe2450d9
|
3 |
+
size 45832
|
output/{mistral-small-2501 β deepseek-v3-0324}/xlam_tool_miss.parquet
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:50fe87dcf1acc1e80c7a52a9a19f5de42b3437b0ed6921acbe8475b9548f3001
|
3 |
+
size 55183
|
results.csv
CHANGED
@@ -3,6 +3,7 @@ claude-3-7-sonnet-20250219,Private,Reasoning,Anthropic,3,15,0.953,0.96,0.95,0.92
|
|
3 |
gemini-2.0-flash-001,Private,Normal,Google,0.15,0.6,0.938,0.95,0.93,0.91,0.94,0.9,0.96,0.92,0.95,0.89,0.91,0.98,0.93,0.97,0.98,0.93,0.965
|
4 |
gemini-2.0-flash-lite-001,Private,Normal,Google,0.075,0.3,0.933,0.96,0.91,0.81,0.98,0.98,0.9,0.91,0.92,0.98,0.86,0.99,0.87,0.97,0.96,0.95,0.975
|
5 |
mistral-small-2503,Open source,Normal,Mistral,0.1,0.3,0.912,0.93,0.89,0.85,0.93,0.86,0.91,0.9,1,0.83,0.81,0.99,0.87,0.99,0.95,0.9,0.975
|
|
|
6 |
gpt-4o-2024-11-20,Private,Normal,OpenAI,2.5,10,0.900,0.92,0.88,0.85,0.9,0.92,0.95,0.88,0.99,0.63,0.83,0.98,0.89,0.98,0.98,0.86,0.965
|
7 |
gpt-4.5-preview-2025-02-27,Private,Normal,OpenAI,75,150,0.900,0.93,0.87,0.85,0.91,0.92,0.97,0.92,0.99,0.67,0.85,0.98,0.85,1,0.98,0.8,0.915
|
8 |
gemini-1.5-flash,Private,Normal,Google,0.075,0.3,0.895,0.88,0.91,0.9,0.9,0.89,0.87,0.91,0.83,0.71,0.87,0.98,0.89,0.94,0.93,0.92,0.99
|
@@ -12,7 +13,6 @@ o1-2024-12-17,Private,Reasoning,OpenAI,15,60,0.876,0.83,0.92,0.89,0.92,0.98,0.71
|
|
12 |
amazon.nova-pro-v1,Private,Normal,Amazon,0.8,3.2,0.868,0.94,0.79,0.77,0.81,0.94,0.97,0.73,0.93,0.93,0.78,0.92,0.81,0.94,0.97,0.75,0.9
|
13 |
amazon.nova-lite-v1,Private,Normal,Amazon,0.06,0.24,0.868,0.91,0.83,0.83,0.87,0.83,0.9,0.9,0.93,0.91,0.75,0.94,0.74,0.88,0.96,0.78,0.925
|
14 |
o3-mini-2025-01-31,Private,Reasoning,OpenAI,1.1,4.4,0.847,0.80,0.90,0.87,0.91,0.84,0.72,0.93,0.98,0.63,0.85,0.97,0.84,1,0.43,0.91,0.975
|
15 |
-
mistral-small-2501,Open source,Normal,Mistral,0.1,0.3,0.832,0.88,0.78,0.83,0.78,0.92,0.97,0.76,0.99,0.62,0.8,0.82,0.77,0.95,0.92,0.74,0.775
|
16 |
gpt-4o-mini,Private,Normal,OpenAI,0.15,0.6,0.832,0.85,0.82,0.82,0.85,0.51,0.98,0.83,1,0.54,0.83,0.94,0.83,0.96,0.99,0.73,0.835
|
17 |
amazon.nova-micro-v1,Private,Normal,Amazon,0.035,0.14,0.829,0.90,0.75,0.77,0.79,0.8,0.97,0.69,0.87,0.89,0.74,0.93,0.68,0.91,0.96,0.7,0.91
|
18 |
qwen2.5-72b-instruct,Open source,Normal,Alibaba,0.9,0.9,0.817,0.80,0.84,0.84,0.87,0.92,0.63,0.86,0.99,0.66,0.79,0.99,0.77,0.97,0.42,0.78,0.95
|
@@ -24,4 +24,4 @@ mistral-small-2409,Private,Normal,Mistral,0.2,0.6,0.750,0.82,0.68,0.7,0.77,0.72,
|
|
24 |
ministral-8b-2410,Private,Normal,Mistral,0.1,0.1,0.689,0.73,0.65,0.75,0.59,0.73,0.98,0.66,0.98,0.34,0.78,0.24,0.81,0.9,0.95,0.53,0.41
|
25 |
Meta-Llama-3.1-8B-Instruct-Turbo,Open source,Normal,Meta,0.2,0.2,0.678,0.71,0.64,0.77,0.49,0.44,0.96,0.66,0.98,0.25,0.73,0.48,0.76,0.93,0.96,0.51,0.575
|
26 |
open-mistral-nemo-2407,Open source,Normal,Mistral,0.15,0.15,0.661,0.68,0.64,0.7,0.64,0.51,0.98,0.68,0.99,0.26,0.78,0.21,0.75,0.9,0.94,0.51,0.41
|
27 |
-
Dataset Avg,,,,,,,0.86,0.
|
|
|
3 |
gemini-2.0-flash-001,Private,Normal,Google,0.15,0.6,0.938,0.95,0.93,0.91,0.94,0.9,0.96,0.92,0.95,0.89,0.91,0.98,0.93,0.97,0.98,0.93,0.965
|
4 |
gemini-2.0-flash-lite-001,Private,Normal,Google,0.075,0.3,0.933,0.96,0.91,0.81,0.98,0.98,0.9,0.91,0.92,0.98,0.86,0.99,0.87,0.97,0.96,0.95,0.975
|
5 |
mistral-small-2503,Open source,Normal,Mistral,0.1,0.3,0.912,0.93,0.89,0.85,0.93,0.86,0.91,0.9,1,0.83,0.81,0.99,0.87,0.99,0.95,0.9,0.975
|
6 |
+
deepseek-v3-0324,Open source,Normal,Deepseek,0.27,1.1,0.905,0.91,0.90,0.93,0.9,0.77,0.98,0.87,1,0.7,0.92,0.96,0.91,0.96,0.98,0.84,0.95
|
7 |
gpt-4o-2024-11-20,Private,Normal,OpenAI,2.5,10,0.900,0.92,0.88,0.85,0.9,0.92,0.95,0.88,0.99,0.63,0.83,0.98,0.89,0.98,0.98,0.86,0.965
|
8 |
gpt-4.5-preview-2025-02-27,Private,Normal,OpenAI,75,150,0.900,0.93,0.87,0.85,0.91,0.92,0.97,0.92,0.99,0.67,0.85,0.98,0.85,1,0.98,0.8,0.915
|
9 |
gemini-1.5-flash,Private,Normal,Google,0.075,0.3,0.895,0.88,0.91,0.9,0.9,0.89,0.87,0.91,0.83,0.71,0.87,0.98,0.89,0.94,0.93,0.92,0.99
|
|
|
13 |
amazon.nova-pro-v1,Private,Normal,Amazon,0.8,3.2,0.868,0.94,0.79,0.77,0.81,0.94,0.97,0.73,0.93,0.93,0.78,0.92,0.81,0.94,0.97,0.75,0.9
|
14 |
amazon.nova-lite-v1,Private,Normal,Amazon,0.06,0.24,0.868,0.91,0.83,0.83,0.87,0.83,0.9,0.9,0.93,0.91,0.75,0.94,0.74,0.88,0.96,0.78,0.925
|
15 |
o3-mini-2025-01-31,Private,Reasoning,OpenAI,1.1,4.4,0.847,0.80,0.90,0.87,0.91,0.84,0.72,0.93,0.98,0.63,0.85,0.97,0.84,1,0.43,0.91,0.975
|
|
|
16 |
gpt-4o-mini,Private,Normal,OpenAI,0.15,0.6,0.832,0.85,0.82,0.82,0.85,0.51,0.98,0.83,1,0.54,0.83,0.94,0.83,0.96,0.99,0.73,0.835
|
17 |
amazon.nova-micro-v1,Private,Normal,Amazon,0.035,0.14,0.829,0.90,0.75,0.77,0.79,0.8,0.97,0.69,0.87,0.89,0.74,0.93,0.68,0.91,0.96,0.7,0.91
|
18 |
qwen2.5-72b-instruct,Open source,Normal,Alibaba,0.9,0.9,0.817,0.80,0.84,0.84,0.87,0.92,0.63,0.86,0.99,0.66,0.79,0.99,0.77,0.97,0.42,0.78,0.95
|
|
|
24 |
ministral-8b-2410,Private,Normal,Mistral,0.1,0.1,0.689,0.73,0.65,0.75,0.59,0.73,0.98,0.66,0.98,0.34,0.78,0.24,0.81,0.9,0.95,0.53,0.41
|
25 |
Meta-Llama-3.1-8B-Instruct-Turbo,Open source,Normal,Meta,0.2,0.2,0.678,0.71,0.64,0.77,0.49,0.44,0.96,0.66,0.98,0.25,0.73,0.48,0.76,0.93,0.96,0.51,0.575
|
26 |
open-mistral-nemo-2407,Open source,Normal,Mistral,0.15,0.15,0.661,0.68,0.64,0.7,0.64,0.51,0.98,0.68,0.99,0.26,0.78,0.21,0.75,0.9,0.94,0.51,0.41
|
27 |
+
Dataset Avg,,,,,,,0.86,0.82,0.82,0.82,0.81,0.90,0.82,0.96,0.68,0.82,0.87,0.82,0.93,0.88,0.77,0.85
|