pratikbhavsar commited on
Commit
99f5740
Β·
1 Parent(s): 97493e3

added deepseek v3

Browse files
output/{mistral-small-2501 β†’ deepseek-v3-0324}/BFCL_v3_irrelevance.parquet RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d1f5575bac38de57aa3ffbd47a605aa2d50535c15c380aa589dce7da757c37c
3
- size 34457
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16c171dbd70a28d968bb83f7ac43fafd1c12a1ffc802954fc72146bc598cbf43
3
+ size 40093
output/{mistral-small-2501 β†’ deepseek-v3-0324}/BFCL_v3_multi_turn_base_multi_func_call.parquet RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f572de78c8b18b1de9cfa28b2cb86840ca05b000a8c5d754257e27784232ec99
3
- size 22288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:471746761be82f67ebebbc11a8e8c5fd5427936db12199c45faa3201d01f337b
3
+ size 24620
output/{mistral-small-2501 β†’ deepseek-v3-0324}/BFCL_v3_multi_turn_base_single_func_call.parquet RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:158bb34db42a4149bd551a47b0ccd74ca370e92276c96556620731abca86c228
3
- size 21869
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ce1f1a0fac62757b0f03919a59035265cd1ca3c25278726845ef443eb6029c0
3
+ size 22360
output/{mistral-small-2501 β†’ deepseek-v3-0324}/BFCL_v3_multi_turn_composite.parquet RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cd510d392dc22e33f5b5dd620501415a2d40a38736eb0dcfe8057a24ccb2c1e
3
- size 37681
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd94ebc610db212275776f7ebbbaac4fc528c6e2c52021dc2a58a1217613c607
3
+ size 52333
output/{mistral-small-2501 β†’ deepseek-v3-0324}/BFCL_v3_multi_turn_long_context.parquet RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f71a6db126ce6135c7a82f0cae4701041ba598173535169d2f5b2cf39fca6c45
3
- size 35179
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:031850c4fd5f9197ec6027c969e27a0f0f6603088c5dbcad84cdaf3560a48346
3
+ size 40007
output/{mistral-small-2501 β†’ deepseek-v3-0324}/BFCL_v3_multi_turn_miss_func.parquet RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0401813ef618a6da8c60238d2eec00118c03f4e28dc810bc50a7ac87a798ba25
3
- size 35564
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a2fec559eb8c92889663a1dbb3d49f834bf796eb440d54442d8cc0bfb2b8f25
3
+ size 45887
output/{mistral-small-2501 β†’ deepseek-v3-0324}/BFCL_v3_multi_turn_miss_param.parquet RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cc168c2931f9ed461efdb91ee809ade9f092613b3d4a542c5155df21b5e33fa
3
- size 36579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bbcbe9635848c867f6df3572e90129710898e4895860e08d451d543c49d9725
3
+ size 47661
output/{mistral-small-2501 β†’ deepseek-v3-0324}/tau_long_context.parquet RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2318522680f2c54050da9773099cf787ee680c047697fa37bb1f395c32dc1634
3
- size 37570
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:283e28354dc9f0bb07db7b01b7728026c6bf2035dac0bef4bc0610e8b086fc50
3
+ size 45119
output/{mistral-small-2501 β†’ deepseek-v3-0324}/toolace_single_func_call_1.parquet RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d052bb4bbdf8a4b418305d185297ddb098ddf818b381cd541136228305efc3ce
3
- size 14905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c99d6fdb22091d9ca86f52380f2dcab5dec0efc35ad1b16fa52ba361d1d9dcf
3
+ size 16730
output/{mistral-small-2501 β†’ deepseek-v3-0324}/toolace_single_func_call_2.parquet RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39b1fb656c21148e21c585b58ce82647e748b0045659daecd42a8bfc3a0eecb9
3
- size 10302
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfbe6f97edc557377a812af97a440168d4a32bc05603c8fbd123fce73ef3949d
3
+ size 12012
output/{mistral-small-2501 β†’ deepseek-v3-0324}/xlam_multiple_tool_multiple_call.parquet RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa8db2630ce302e16ef948885632346cd8008154737649c2bb675972d09bf1ca
3
- size 91560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c720e550a70c958f4e95c55359352080c2f573414893b09d514924ff9ddbf372
3
+ size 107285
output/{mistral-small-2501 β†’ deepseek-v3-0324}/xlam_multiple_tool_single_call.parquet RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b02e39f1bf1b52b2a0269bdf464289cefdaf69f0311b21124e3a4caaf78530f
3
- size 37546
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff3f1422a96523108f0fbfecce164edcf784fe18c211f99ba46f85a332642459
3
+ size 40696
output/{mistral-small-2501 β†’ deepseek-v3-0324}/xlam_single_tool_multiple_call.parquet RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:159df7de0f244b043c97fdc99600f7da9b1de1a94f0f7b985bb337db71d34695
3
- size 26915
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecc172015abfbd58879906a0fd91190785480405b25d14d20877cd0ef76ca25b
3
+ size 32717
output/{mistral-small-2501 β†’ deepseek-v3-0324}/xlam_single_tool_single_call.parquet RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd2e8fc7dc73648dd5051ccb4f65a17fa932a4ac60c8fe611731feb1d6cedc80
3
- size 41430
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fc6aae2e68efa98cbbabf82953f8b4b762911694d70cfef408d2b1fbe2450d9
3
+ size 45832
output/{mistral-small-2501 β†’ deepseek-v3-0324}/xlam_tool_miss.parquet RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63a128128e5aa05109924c78f6c075ceeb67952576f63fa587726b263ff19a7a
3
- size 45045
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50fe87dcf1acc1e80c7a52a9a19f5de42b3437b0ed6921acbe8475b9548f3001
3
+ size 55183
results.csv CHANGED
@@ -3,6 +3,7 @@ claude-3-7-sonnet-20250219,Private,Reasoning,Anthropic,3,15,0.953,0.96,0.95,0.92
3
  gemini-2.0-flash-001,Private,Normal,Google,0.15,0.6,0.938,0.95,0.93,0.91,0.94,0.9,0.96,0.92,0.95,0.89,0.91,0.98,0.93,0.97,0.98,0.93,0.965
4
  gemini-2.0-flash-lite-001,Private,Normal,Google,0.075,0.3,0.933,0.96,0.91,0.81,0.98,0.98,0.9,0.91,0.92,0.98,0.86,0.99,0.87,0.97,0.96,0.95,0.975
5
  mistral-small-2503,Open source,Normal,Mistral,0.1,0.3,0.912,0.93,0.89,0.85,0.93,0.86,0.91,0.9,1,0.83,0.81,0.99,0.87,0.99,0.95,0.9,0.975
 
6
  gpt-4o-2024-11-20,Private,Normal,OpenAI,2.5,10,0.900,0.92,0.88,0.85,0.9,0.92,0.95,0.88,0.99,0.63,0.83,0.98,0.89,0.98,0.98,0.86,0.965
7
  gpt-4.5-preview-2025-02-27,Private,Normal,OpenAI,75,150,0.900,0.93,0.87,0.85,0.91,0.92,0.97,0.92,0.99,0.67,0.85,0.98,0.85,1,0.98,0.8,0.915
8
  gemini-1.5-flash,Private,Normal,Google,0.075,0.3,0.895,0.88,0.91,0.9,0.9,0.89,0.87,0.91,0.83,0.71,0.87,0.98,0.89,0.94,0.93,0.92,0.99
@@ -12,7 +13,6 @@ o1-2024-12-17,Private,Reasoning,OpenAI,15,60,0.876,0.83,0.92,0.89,0.92,0.98,0.71
12
  amazon.nova-pro-v1,Private,Normal,Amazon,0.8,3.2,0.868,0.94,0.79,0.77,0.81,0.94,0.97,0.73,0.93,0.93,0.78,0.92,0.81,0.94,0.97,0.75,0.9
13
  amazon.nova-lite-v1,Private,Normal,Amazon,0.06,0.24,0.868,0.91,0.83,0.83,0.87,0.83,0.9,0.9,0.93,0.91,0.75,0.94,0.74,0.88,0.96,0.78,0.925
14
  o3-mini-2025-01-31,Private,Reasoning,OpenAI,1.1,4.4,0.847,0.80,0.90,0.87,0.91,0.84,0.72,0.93,0.98,0.63,0.85,0.97,0.84,1,0.43,0.91,0.975
15
- mistral-small-2501,Open source,Normal,Mistral,0.1,0.3,0.832,0.88,0.78,0.83,0.78,0.92,0.97,0.76,0.99,0.62,0.8,0.82,0.77,0.95,0.92,0.74,0.775
16
  gpt-4o-mini,Private,Normal,OpenAI,0.15,0.6,0.832,0.85,0.82,0.82,0.85,0.51,0.98,0.83,1,0.54,0.83,0.94,0.83,0.96,0.99,0.73,0.835
17
  amazon.nova-micro-v1,Private,Normal,Amazon,0.035,0.14,0.829,0.90,0.75,0.77,0.79,0.8,0.97,0.69,0.87,0.89,0.74,0.93,0.68,0.91,0.96,0.7,0.91
18
  qwen2.5-72b-instruct,Open source,Normal,Alibaba,0.9,0.9,0.817,0.80,0.84,0.84,0.87,0.92,0.63,0.86,0.99,0.66,0.79,0.99,0.77,0.97,0.42,0.78,0.95
@@ -24,4 +24,4 @@ mistral-small-2409,Private,Normal,Mistral,0.2,0.6,0.750,0.82,0.68,0.7,0.77,0.72,
24
  ministral-8b-2410,Private,Normal,Mistral,0.1,0.1,0.689,0.73,0.65,0.75,0.59,0.73,0.98,0.66,0.98,0.34,0.78,0.24,0.81,0.9,0.95,0.53,0.41
25
  Meta-Llama-3.1-8B-Instruct-Turbo,Open source,Normal,Meta,0.2,0.2,0.678,0.71,0.64,0.77,0.49,0.44,0.96,0.66,0.98,0.25,0.73,0.48,0.76,0.93,0.96,0.51,0.575
26
  open-mistral-nemo-2407,Open source,Normal,Mistral,0.15,0.15,0.661,0.68,0.64,0.7,0.64,0.51,0.98,0.68,0.99,0.26,0.78,0.21,0.75,0.9,0.94,0.51,0.41
27
- Dataset Avg,,,,,,,0.86,0.81,0.82,0.81,0.81,0.90,0.82,0.95,0.68,0.81,0.86,0.82,0.93,0.88,0.76,0.85
 
3
  gemini-2.0-flash-001,Private,Normal,Google,0.15,0.6,0.938,0.95,0.93,0.91,0.94,0.9,0.96,0.92,0.95,0.89,0.91,0.98,0.93,0.97,0.98,0.93,0.965
4
  gemini-2.0-flash-lite-001,Private,Normal,Google,0.075,0.3,0.933,0.96,0.91,0.81,0.98,0.98,0.9,0.91,0.92,0.98,0.86,0.99,0.87,0.97,0.96,0.95,0.975
5
  mistral-small-2503,Open source,Normal,Mistral,0.1,0.3,0.912,0.93,0.89,0.85,0.93,0.86,0.91,0.9,1,0.83,0.81,0.99,0.87,0.99,0.95,0.9,0.975
6
+ deepseek-v3-0324,Open source,Normal,Deepseek,0.27,1.1,0.905,0.91,0.90,0.93,0.9,0.77,0.98,0.87,1,0.7,0.92,0.96,0.91,0.96,0.98,0.84,0.95
7
  gpt-4o-2024-11-20,Private,Normal,OpenAI,2.5,10,0.900,0.92,0.88,0.85,0.9,0.92,0.95,0.88,0.99,0.63,0.83,0.98,0.89,0.98,0.98,0.86,0.965
8
  gpt-4.5-preview-2025-02-27,Private,Normal,OpenAI,75,150,0.900,0.93,0.87,0.85,0.91,0.92,0.97,0.92,0.99,0.67,0.85,0.98,0.85,1,0.98,0.8,0.915
9
  gemini-1.5-flash,Private,Normal,Google,0.075,0.3,0.895,0.88,0.91,0.9,0.9,0.89,0.87,0.91,0.83,0.71,0.87,0.98,0.89,0.94,0.93,0.92,0.99
 
13
  amazon.nova-pro-v1,Private,Normal,Amazon,0.8,3.2,0.868,0.94,0.79,0.77,0.81,0.94,0.97,0.73,0.93,0.93,0.78,0.92,0.81,0.94,0.97,0.75,0.9
14
  amazon.nova-lite-v1,Private,Normal,Amazon,0.06,0.24,0.868,0.91,0.83,0.83,0.87,0.83,0.9,0.9,0.93,0.91,0.75,0.94,0.74,0.88,0.96,0.78,0.925
15
  o3-mini-2025-01-31,Private,Reasoning,OpenAI,1.1,4.4,0.847,0.80,0.90,0.87,0.91,0.84,0.72,0.93,0.98,0.63,0.85,0.97,0.84,1,0.43,0.91,0.975
 
16
  gpt-4o-mini,Private,Normal,OpenAI,0.15,0.6,0.832,0.85,0.82,0.82,0.85,0.51,0.98,0.83,1,0.54,0.83,0.94,0.83,0.96,0.99,0.73,0.835
17
  amazon.nova-micro-v1,Private,Normal,Amazon,0.035,0.14,0.829,0.90,0.75,0.77,0.79,0.8,0.97,0.69,0.87,0.89,0.74,0.93,0.68,0.91,0.96,0.7,0.91
18
  qwen2.5-72b-instruct,Open source,Normal,Alibaba,0.9,0.9,0.817,0.80,0.84,0.84,0.87,0.92,0.63,0.86,0.99,0.66,0.79,0.99,0.77,0.97,0.42,0.78,0.95
 
24
  ministral-8b-2410,Private,Normal,Mistral,0.1,0.1,0.689,0.73,0.65,0.75,0.59,0.73,0.98,0.66,0.98,0.34,0.78,0.24,0.81,0.9,0.95,0.53,0.41
25
  Meta-Llama-3.1-8B-Instruct-Turbo,Open source,Normal,Meta,0.2,0.2,0.678,0.71,0.64,0.77,0.49,0.44,0.96,0.66,0.98,0.25,0.73,0.48,0.76,0.93,0.96,0.51,0.575
26
  open-mistral-nemo-2407,Open source,Normal,Mistral,0.15,0.15,0.661,0.68,0.64,0.7,0.64,0.51,0.98,0.68,0.99,0.26,0.78,0.21,0.75,0.9,0.94,0.51,0.41
27
+ Dataset Avg,,,,,,,0.86,0.82,0.82,0.82,0.81,0.90,0.82,0.96,0.68,0.82,0.87,0.82,0.93,0.88,0.77,0.85