abrek commited on
Commit
ef12e6a
·
verified ·
1 Parent(s): 9853b08

Delete wrong upload

Browse files
zero-shot/Qwen3-1.7B-Base.json DELETED
@@ -1,189 +0,0 @@
1
- {
2
- "model": {
3
- "model": "Qwen/Qwen3-1.7B-Base",
4
- "architecture": "Qwen3ForCausalLM",
5
- "dtype": "bfloat16",
6
- "type": "pretrained",
7
- "num_parameters": "1.7b",
8
- "api": "hf"
9
- },
10
- "results": [
11
- {
12
- "name": "xquad_tr",
13
- "task": "extractive_question_answering",
14
- "exact_match": 0.14705882352941177,
15
- "f1": 0.2870820139107337
16
- },
17
- {
18
- "name": "xcopa_tr",
19
- "task": "multiple_choice",
20
- "acc": 0.576,
21
- "acc_norm": 0.576
22
- },
23
- {
24
- "name": "turkish_plu",
25
- "task": "multiple_choice",
26
- "acc": 0.4416,
27
- "acc_norm": 0.38656
28
- },
29
- {
30
- "name": "turkish_plu_goal_inference",
31
- "task": "multiple_choice",
32
- "acc": 0.3859020310633214,
33
- "acc_norm": 0.25925925925925924
34
- },
35
- {
36
- "name": "turkish_plu_next_event_prediction",
37
- "task": "multiple_choice",
38
- "acc": 0.40610687022900765,
39
- "acc_norm": 0.26106870229007634
40
- },
41
- {
42
- "name": "turkish_plu_step_inference",
43
- "task": "multiple_choice",
44
- "acc": 0.2973856209150327,
45
- "acc_norm": 0.34477124183006536
46
- },
47
- {
48
- "name": "turkish_plu_step_ordering",
49
- "task": "multiple_choice",
50
- "acc": 0.5964740450538688,
51
- "acc_norm": 0.5964740450538688
52
- },
53
- {
54
- "name": "turkce_atasozleri",
55
- "task": "multiple_choice",
56
- "acc": 0.576878612716763,
57
- "acc_norm": 0.576878612716763
58
- },
59
- {
60
- "name": "check_worthiness",
61
- "task": "multiple_choice",
62
- "acc": 0.37842778793418647,
63
- "acc_norm": 0.6238574040219378
64
- },
65
- {
66
- "name": "relevance_judgment",
67
- "task": "multiple_choice",
68
- "acc": 0.43327239488117003,
69
- "acc_norm": 0.5781535648994516
70
- },
71
- {
72
- "name": "tquad",
73
- "task": "extractive_question_answering",
74
- "exact_match": 0.09417040358744394,
75
- "f1": 0.26322715675405095
76
- },
77
- {
78
- "name": "sts_tr",
79
- "task": "text_classification",
80
- "acc": 0.15663524292965916,
81
- "acc_norm": 0.11965192168237854
82
- },
83
- {
84
- "name": "offenseval_tr",
85
- "task": "text_classification",
86
- "acc": 0.768140589569161,
87
- "acc_norm": 0.7970521541950113
88
- },
89
- {
90
- "name": "mnli_tr",
91
- "task": "natural_language_inference",
92
- "acc": 0.3478,
93
- "acc_norm": 0.3213
94
- },
95
- {
96
- "name": "snli_tr",
97
- "task": "natural_language_inference",
98
- "acc": 0.3343,
99
- "acc_norm": 0.3237
100
- },
101
- {
102
- "name": "xnli_tr",
103
- "task": "natural_language_inference",
104
- "acc": 0.3333333333333333,
105
- "acc_norm": 0.3333333333333333
106
- },
107
- {
108
- "name": "news_cat",
109
- "task": "text_classification",
110
- "acc": 0.628,
111
- "acc_norm": 0.34
112
- },
113
- {
114
- "name": "ironytr",
115
- "task": "text_classification",
116
- "acc": 0.55,
117
- "acc_norm": 0.5
118
- },
119
- {
120
- "name": "exams_tr",
121
- "task": "multiple_choice",
122
- "acc": 0.26717557251908397,
123
- "acc_norm": 0.31297709923664124
124
- },
125
- {
126
- "name": "circumflex_tr",
127
- "task": "multiple_choice",
128
- "acc": 0.6142857142857143,
129
- "acc_norm": 0.6142857142857143
130
- },
131
- {
132
- "name": "bilmecebench",
133
- "task": "multiple_choice",
134
- "acc": 0.33710407239819007,
135
- "acc_norm": 0.33710407239819007
136
- },
137
- {
138
- "name": "belebele_tr",
139
- "task": "multiple_choice",
140
- "acc": 0.5933333333333334,
141
- "acc_norm": 0.5933333333333334
142
- },
143
- {
144
- "name": "turkishmmlu",
145
- "task": "multiple_choice",
146
- "acc": 0.4,
147
- "acc_norm": 0.4
148
- },
149
- {
150
- "name": "xlsum_tr",
151
- "task": "summarization",
152
- "rouge1": 0.04781539873117025,
153
- "rouge2": 0.01992023353880694,
154
- "rougeL": 0.036905992546053354
155
- },
156
- {
157
- "name": "wmt-tr-en-prompt",
158
- "task": "machine_translation",
159
- "wer": 1.122812394923734,
160
- "bleu": 0.08632239330032426
161
- },
162
- {
163
- "name": "wiki_lingua_tr",
164
- "task": "summarization",
165
- "rouge1": 0.1905933745444374,
166
- "rouge2": 0.05729029075401719,
167
- "rougeL": 0.1389441266800664
168
- },
169
- {
170
- "name": "tr-wikihow-summ",
171
- "task": "summarization",
172
- "rouge1": 0.1573146712521122,
173
- "rouge2": 0.04753989405427132,
174
- "rougeL": 0.11305186768825831
175
- },
176
- {
177
- "name": "mlsum_tr",
178
- "task": "summarization",
179
- "rouge1": 0.18820054046451806,
180
- "rouge2": 0.11945259097094729,
181
- "rougeL": 0.15322863222214073
182
- },
183
- {
184
- "name": "gecturk_generation",
185
- "task": "grammatical_error_correction",
186
- "exact_match": 0.13134960758823247
187
- }
188
- ]
189
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
zero-shot/Qwen3-1.7B.json DELETED
@@ -1,240 +0,0 @@
1
- {
2
- "model": {
3
- "model": "Qwen/Qwen3-1.7B",
4
- "architecture": "Qwen3ForCausalLM",
5
- "dtype": "bfloat16",
6
- "type": "instruction-tuned",
7
- "num_parameters": "1.7b",
8
- "api": "hf"
9
- },
10
- "results": [
11
- {
12
- "name": "xquad_tr",
13
- "task": "extractive_question_answering",
14
- "exact_match": 0.40252100840336136,
15
- "f1": 0.6176467678580342
16
- },
17
- {
18
- "name": "xcopa_tr",
19
- "task": "multiple_choice",
20
- "acc": 0.646,
21
- "acc_norm": 0.646
22
- },
23
- {
24
- "name": "turkish_plu",
25
- "task": "multiple_choice",
26
- "acc": 0.48736,
27
- "acc_norm": 0.5392
28
- },
29
- {
30
- "name": "turkish_plu_goal_inference",
31
- "task": "multiple_choice",
32
- "acc": 0.4133811230585424,
33
- "acc_norm": 0.4324970131421744
34
- },
35
- {
36
- "name": "turkish_plu_next_event_prediction",
37
- "task": "multiple_choice",
38
- "acc": 0.4870229007633588,
39
- "acc_norm": 0.5816793893129771
40
- },
41
- {
42
- "name": "turkish_plu_step_inference",
43
- "task": "multiple_choice",
44
- "acc": 0.35294117647058826,
45
- "acc_norm": 0.49019607843137253
46
- },
47
- {
48
- "name": "turkish_plu_step_ordering",
49
- "task": "multiple_choice",
50
- "acc": 0.6287952987267384,
51
- "acc_norm": 0.6287952987267384
52
- },
53
- {
54
- "name": "check_worthiness",
55
- "task": "multiple_choice",
56
- "acc": 0.37614259597806216,
57
- "acc_norm": 0.3756855575868373
58
- },
59
- {
60
- "name": "relevance_judgment",
61
- "task": "multiple_choice",
62
- "acc": 0.4506398537477148,
63
- "acc_norm": 0.5708409506398537
64
- },
65
- {
66
- "name": "tquad",
67
- "task": "extractive_question_answering",
68
- "exact_match": 0.34753363228699546,
69
- "f1": 0.614345609122
70
- },
71
- {
72
- "name": "sts_tr",
73
- "task": "text_classification",
74
- "acc": 0.2037708484408992,
75
- "acc_norm": 0.2610587382160986
76
- },
77
- {
78
- "name": "offenseval_tr",
79
- "task": "text_classification",
80
- "acc": 0.22023809523809523,
81
- "acc_norm": 0.2962018140589569
82
- },
83
- {
84
- "name": "mnli_tr",
85
- "task": "natural_language_inference",
86
- "acc": 0.3482,
87
- "acc_norm": 0.3389
88
- },
89
- {
90
- "name": "snli_tr",
91
- "task": "natural_language_inference",
92
- "acc": 0.3373,
93
- "acc_norm": 0.3271
94
- },
95
- {
96
- "name": "xnli_tr",
97
- "task": "natural_language_inference",
98
- "acc": 0.3335329341317365,
99
- "acc_norm": 0.33532934131736525
100
- },
101
- {
102
- "name": "news_cat",
103
- "task": "text_classification",
104
- "acc": 0.524,
105
- "acc_norm": 0.348
106
- },
107
- {
108
- "name": "mkqa_tr",
109
- "task": "extractive_question_answering",
110
- "exact_match": 0.050014797277300974,
111
- "f1": 0.11195620922043903
112
- },
113
- {
114
- "name": "ironytr",
115
- "task": "text_classification",
116
- "acc": 0.5616666666666666,
117
- "acc_norm": 0.6183333333333333
118
- },
119
- {
120
- "name": "exams_tr",
121
- "task": "multiple_choice",
122
- "acc": 0.33078880407124683,
123
- "acc_norm": 0.35877862595419846
124
- },
125
- {
126
- "name": "belebele_tr",
127
- "task": "multiple_choice",
128
- "acc": 0.8122222222222222,
129
- "acc_norm": 0.8122222222222222
130
- },
131
- {
132
- "name": "xlsum_tr",
133
- "task": "summarization",
134
- "rouge1": 0.2866278776668776,
135
- "rouge2": 0.1308383753682692,
136
- "rougeL": 0.22217070278595147
137
- },
138
- {
139
- "name": "wmt-tr-en-prompt",
140
- "task": "machine_translation",
141
- "wer": 1.6546507240124098,
142
- "bleu": 0.08096461200991427
143
- },
144
- {
145
- "name": "wiki_lingua_tr",
146
- "task": "summarization",
147
- "rouge1": 0.20802332507327073,
148
- "rouge2": 0.06755910819968403,
149
- "rougeL": 0.15425156655216665
150
- },
151
- {
152
- "name": "tr-wikihow-summ",
153
- "task": "summarization",
154
- "rouge1": 0.22012543165161014,
155
- "rouge2": 0.06567086903148794,
156
- "rougeL": 0.15604855476586732
157
- },
158
- {
159
- "name": "mlsum_tr",
160
- "task": "summarization",
161
- "rouge1": 0.3928051448993858,
162
- "rouge2": 0.25674608200884674,
163
- "rougeL": 0.3276023476233169
164
- },
165
- {
166
- "name": "gecturk_generation",
167
- "task": "grammatical_error_correction",
168
- "exact_match": 0.1888
169
- },
170
- {
171
- "name": "turkce_atasozleri",
172
- "task": "multiple_choice",
173
- "acc": 0.753757225433526,
174
- "acc_norm": 0.753757225433526
175
- },
176
- {
177
- "name": "turkishmmlu",
178
- "task": "multiple_choice",
179
- "acc": 0.5622222222222222,
180
- "acc_norm": 0.5622222222222222
181
- },
182
- {
183
- "name": "bilmecebench",
184
- "task": "multiple_choice",
185
- "acc": 0.4751131221719457,
186
- "acc_norm": 0.4751131221719457
187
- },
188
- {
189
- "name": "circumflex_tr",
190
- "task": "multiple_choice",
191
- "acc": 0.5857142857142857,
192
- "acc_norm": 0.5857142857142857
193
- },
194
- {
195
- "name": "turkishmmlu",
196
- "task": "multiple_choice",
197
- "acc": 0.38222222222222224,
198
- "acc_norm": 0.38222222222222224
199
- },
200
- {
201
- "name": "xlsum_tr",
202
- "task": "summarization",
203
- "rouge1": 0.27820377739060753,
204
- "rouge2": 0.12397426441534459,
205
- "rougeL": 0.2141557453953809
206
- },
207
- {
208
- "name": "wmt-tr-en-prompt",
209
- "task": "machine_translation",
210
- "wer": 1.4553342171690737,
211
- "bleu": 0.05579047680001111
212
- },
213
- {
214
- "name": "wiki_lingua_tr",
215
- "task": "summarization",
216
- "rouge1": 0.18975012020130533,
217
- "rouge2": 0.056217857712315425,
218
- "rougeL": 0.14164939611392058
219
- },
220
- {
221
- "name": "tr-wikihow-summ",
222
- "task": "summarization",
223
- "rouge1": 0.20337048755281356,
224
- "rouge2": 0.05824591585148973,
225
- "rougeL": 0.14892270355472415
226
- },
227
- {
228
- "name": "mlsum_tr",
229
- "task": "summarization",
230
- "rouge1": 0.3633354972284383,
231
- "rouge2": 0.22049361975220266,
232
- "rougeL": 0.29386670924548397
233
- },
234
- {
235
- "name": "gecturk_generation",
236
- "task": "grammatical_error_correction",
237
- "exact_match": 0.3063700707785642
238
- }
239
- ]
240
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
zero-shot/Qwen3-14B-Base.json DELETED
@@ -1,189 +0,0 @@
1
- {
2
- "model": {
3
- "model": "Qwen/Qwen3-14B",
4
- "architecture": "Qwen3ForCausalLM",
5
- "dtype": "bfloat16",
6
- "type": "pretrained",
7
- "num_parameters": "14b",
8
- "api": "hf"
9
- },
10
- "results": [
11
- {
12
- "name": "xquad_tr",
13
- "task": "extractive_question_answering",
14
- "exact_match": 0.3512605042016807,
15
- "f1": 0.5497491980200822
16
- },
17
- {
18
- "name": "xcopa_tr",
19
- "task": "multiple_choice",
20
- "acc": 0.676,
21
- "acc_norm": 0.676
22
- },
23
- {
24
- "name": "turkish_plu",
25
- "task": "multiple_choice",
26
- "acc": 0.53248,
27
- "acc_norm": 0.44448
28
- },
29
- {
30
- "name": "turkish_plu_goal_inference",
31
- "task": "multiple_choice",
32
- "acc": 0.4432497013142174,
33
- "acc_norm": 0.3010752688172043
34
- },
35
- {
36
- "name": "turkish_plu_next_event_prediction",
37
- "task": "multiple_choice",
38
- "acc": 0.5145038167938931,
39
- "acc_norm": 0.2870229007633588
40
- },
41
- {
42
- "name": "turkish_plu_step_inference",
43
- "task": "multiple_choice",
44
- "acc": 0.40032679738562094,
45
- "acc_norm": 0.3888888888888889
46
- },
47
- {
48
- "name": "turkish_plu_step_ordering",
49
- "task": "multiple_choice",
50
- "acc": 0.6963761018609207,
51
- "acc_norm": 0.6963761018609207
52
- },
53
- {
54
- "name": "turkce_atasozleri",
55
- "task": "multiple_choice",
56
- "acc": 0.8283236994219653,
57
- "acc_norm": 0.8283236994219653
58
- },
59
- {
60
- "name": "check_worthiness",
61
- "task": "multiple_choice",
62
- "acc": 0.37614259597806216,
63
- "acc_norm": 0.6238574040219378
64
- },
65
- {
66
- "name": "relevance_judgment",
67
- "task": "multiple_choice",
68
- "acc": 0.42047531992687387,
69
- "acc_norm": 0.5781535648994516
70
- },
71
- {
72
- "name": "tquad",
73
- "task": "extractive_question_answering",
74
- "exact_match": 0.20179372197309417,
75
- "f1": 0.41015757440015116
76
- },
77
- {
78
- "name": "sts_tr",
79
- "task": "text_classification",
80
- "acc": 0.18491660623640319,
81
- "acc_norm": 0.17113850616388687
82
- },
83
- {
84
- "name": "offenseval_tr",
85
- "task": "text_classification",
86
- "acc": 0.7650226757369615,
87
- "acc_norm": 0.7970521541950113
88
- },
89
- {
90
- "name": "mnli_tr",
91
- "task": "natural_language_inference",
92
- "acc": 0.2776,
93
- "acc_norm": 0.3213
94
- },
95
- {
96
- "name": "snli_tr",
97
- "task": "natural_language_inference",
98
- "acc": 0.2946,
99
- "acc_norm": 0.3237
100
- },
101
- {
102
- "name": "xnli_tr",
103
- "task": "natural_language_inference",
104
- "acc": 0.26387225548902193,
105
- "acc_norm": 0.3333333333333333
106
- },
107
- {
108
- "name": "news_cat",
109
- "task": "text_classification",
110
- "acc": 0.696,
111
- "acc_norm": 0.344
112
- },
113
- {
114
- "name": "ironytr",
115
- "task": "text_classification",
116
- "acc": 0.735,
117
- "acc_norm": 0.5
118
- },
119
- {
120
- "name": "exams_tr",
121
- "task": "multiple_choice",
122
- "acc": 0.35877862595419846,
123
- "acc_norm": 0.3333333333333333
124
- },
125
- {
126
- "name": "circumflex_tr",
127
- "task": "multiple_choice",
128
- "acc": 0.5428571428571428,
129
- "acc_norm": 0.5428571428571428
130
- },
131
- {
132
- "name": "bilmecebench",
133
- "task": "multiple_choice",
134
- "acc": 0.6470588235294118,
135
- "acc_norm": 0.6470588235294118
136
- },
137
- {
138
- "name": "belebele_tr",
139
- "task": "multiple_choice",
140
- "acc": 0.8388888888888889,
141
- "acc_norm": 0.8388888888888889
142
- },
143
- {
144
- "name": "turkishmmlu",
145
- "task": "multiple_choice",
146
- "acc": 0.6777777777777778,
147
- "acc_norm": 0.6777777777777778
148
- },
149
- {
150
- "name": "xlsum_tr",
151
- "task": "summarization",
152
- "rouge1": 0.11473487738310617,
153
- "rouge2": 0.060121517882459646,
154
- "rougeL": 0.09836107933415753
155
- },
156
- {
157
- "name": "wmt-tr-en-prompt",
158
- "task": "machine_translation",
159
- "wer": 1.1492106652129537,
160
- "bleu": 0.11818272894053798
161
- },
162
- {
163
- "name": "wiki_lingua_tr",
164
- "task": "summarization",
165
- "rouge1": 0.13000651376079725,
166
- "rouge2": 0.03942796488947396,
167
- "rougeL": 0.09588108114897731
168
- },
169
- {
170
- "name": "tr-wikihow-summ",
171
- "task": "summarization",
172
- "rouge1": 0.042343650286353535,
173
- "rouge2": 0.012407404121300508,
174
- "rougeL": 0.03134101717443596
175
- },
176
- {
177
- "name": "mlsum_tr",
178
- "task": "summarization",
179
- "rouge1": 0.282634405530366,
180
- "rouge2": 0.18883921790708064,
181
- "rougeL": 0.2382731729763113
182
- },
183
- {
184
- "name": "gecturk_generation",
185
- "task": "grammatical_error_correction",
186
- "exact_match": 0.32360729934036303
187
- }
188
- ]
189
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
zero-shot/Qwen3-14B.json DELETED
@@ -1,189 +0,0 @@
1
- {
2
- "model": {
3
- "model": "Qwen/Qwen3-14B",
4
- "architecture": "Qwen3ForCausalLM",
5
- "dtype": "bfloat16",
6
- "type": "instruction-tuned",
7
- "num_parameters": "14b",
8
- "api": "hf"
9
- },
10
- "results": [
11
- {
12
- "name": "xquad_tr",
13
- "task": "extractive_question_answering",
14
- "exact_match": 0.17394957983193277,
15
- "f1": 0.2667005758476218
16
- },
17
- {
18
- "name": "xcopa_tr",
19
- "task": "multiple_choice",
20
- "acc": 0.674,
21
- "acc_norm": 0.674
22
- },
23
- {
24
- "name": "turkish_plu",
25
- "task": "multiple_choice",
26
- "acc": 0.4944,
27
- "acc_norm": 0.42432
28
- },
29
- {
30
- "name": "turkish_plu_goal_inference",
31
- "task": "multiple_choice",
32
- "acc": 0.39904420549581837,
33
- "acc_norm": 0.2867383512544803
34
- },
35
- {
36
- "name": "turkish_plu_next_event_prediction",
37
- "task": "multiple_choice",
38
- "acc": 0.5038167938931297,
39
- "acc_norm": 0.28549618320610687
40
- },
41
- {
42
- "name": "turkish_plu_step_inference",
43
- "task": "multiple_choice",
44
- "acc": 0.3562091503267974,
45
- "acc_norm": 0.38562091503267976
46
- },
47
- {
48
- "name": "turkish_plu_step_ordering",
49
- "task": "multiple_choice",
50
- "acc": 0.6493633692458374,
51
- "acc_norm": 0.6493633692458374
52
- },
53
- {
54
- "name": "turkce_atasozleri",
55
- "task": "multiple_choice",
56
- "acc": 0.8127167630057803,
57
- "acc_norm": 0.8127167630057803
58
- },
59
- {
60
- "name": "check_worthiness",
61
- "task": "multiple_choice",
62
- "acc": 0.38619744058500916,
63
- "acc_norm": 0.6238574040219378
64
- },
65
- {
66
- "name": "relevance_judgment",
67
- "task": "multiple_choice",
68
- "acc": 0.4218464351005484,
69
- "acc_norm": 0.5781535648994516
70
- },
71
- {
72
- "name": "tquad",
73
- "task": "extractive_question_answering",
74
- "exact_match": 0.1547085201793722,
75
- "f1": 0.31146380255216244
76
- },
77
- {
78
- "name": "sts_tr",
79
- "task": "text_classification",
80
- "acc": 0.1319796954314721,
81
- "acc_norm": 0.116751269035533
82
- },
83
- {
84
- "name": "offenseval_tr",
85
- "task": "text_classification",
86
- "acc": 0.7644557823129252,
87
- "acc_norm": 0.7970521541950113
88
- },
89
- {
90
- "name": "mnli_tr",
91
- "task": "natural_language_inference",
92
- "acc": 0.3356,
93
- "acc_norm": 0.3213
94
- },
95
- {
96
- "name": "snli_tr",
97
- "task": "natural_language_inference",
98
- "acc": 0.3044,
99
- "acc_norm": 0.3237
100
- },
101
- {
102
- "name": "xnli_tr",
103
- "task": "natural_language_inference",
104
- "acc": 0.3251497005988024,
105
- "acc_norm": 0.3333333333333333
106
- },
107
- {
108
- "name": "news_cat",
109
- "task": "text_classification",
110
- "acc": 0.424,
111
- "acc_norm": 0.232
112
- },
113
- {
114
- "name": "ironytr",
115
- "task": "text_classification",
116
- "acc": 0.7,
117
- "acc_norm": 0.5
118
- },
119
- {
120
- "name": "exams_tr",
121
- "task": "multiple_choice",
122
- "acc": 0.33587786259541985,
123
- "acc_norm": 0.3384223918575064
124
- },
125
- {
126
- "name": "circumflex_tr",
127
- "task": "multiple_choice",
128
- "acc": 0.5714285714285714,
129
- "acc_norm": 0.5714285714285714
130
- },
131
- {
132
- "name": "bilmecebench",
133
- "task": "multiple_choice",
134
- "acc": 0.5769230769230769,
135
- "acc_norm": 0.5769230769230769
136
- },
137
- {
138
- "name": "belebele_tr",
139
- "task": "multiple_choice",
140
- "acc": 0.8355555555555556,
141
- "acc_norm": 0.8355555555555556
142
- },
143
- {
144
- "name": "turkishmmlu",
145
- "task": "multiple_choice",
146
- "acc": 0.6355555555555555,
147
- "acc_norm": 0.6355555555555555
148
- },
149
- {
150
- "name": "xlsum_tr",
151
- "task": "summarization",
152
- "rouge1": 0.2763159346655365,
153
- "rouge2": 0.13580436770127194,
154
- "rougeL": 0.2140432080171992
155
- },
156
- {
157
- "name": "wmt-tr-en-prompt",
158
- "task": "machine_translation",
159
- "wer": 0.8811716956156485,
160
- "bleu": 0.08106810125402132
161
- },
162
- {
163
- "name": "wiki_lingua_tr",
164
- "task": "summarization",
165
- "rouge1": 0.20105611846416252,
166
- "rouge2": 0.07453258043188488,
167
- "rougeL": 0.15681666038287218
168
- },
169
- {
170
- "name": "tr-wikihow-summ",
171
- "task": "summarization",
172
- "rouge1": 0.16377028131791377,
173
- "rouge2": 0.04758033115002652,
174
- "rougeL": 0.1184993075488721
175
- },
176
- {
177
- "name": "mlsum_tr",
178
- "task": "summarization",
179
- "rouge1": 0.3844389772184301,
180
- "rouge2": 0.23720643221445104,
181
- "rougeL": 0.3119516170965996
182
- },
183
- {
184
- "name": "gecturk_generation",
185
- "task": "grammatical_error_correction",
186
- "exact_match": 0.05931917762049208
187
- }
188
- ]
189
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
zero-shot/Qwen3-8B-Base.json DELETED
@@ -1,190 +0,0 @@
1
- {
2
- "model": {
3
- "model": "Qwen/Qwen3-8B-Base",
4
- "architecture": "Qwen3ForCausalLM",
5
- "dtype": "bfloat16",
6
- "type": "pretrained",
7
- "num_parameters": "8b",
8
- "api": "hf"
9
- },
10
- "results": [
11
- {
12
- "name": "xquad_tr",
13
- "task": "extractive_question_answering",
14
- "exact_match": 0.3050420168067227,
15
- "f1": 0.49570394447625915
16
- },
17
- {
18
- "name": "xcopa_tr",
19
- "task": "multiple_choice",
20
- "acc": 0.656,
21
- "acc_norm": 0.656
22
- },
23
- {
24
- "name": "turkish_plu",
25
- "task": "multiple_choice",
26
- "acc": 0.504,
27
- "acc_norm": 0.42496
28
- },
29
- {
30
- "name": "turkish_plu_goal_inference",
31
- "task": "multiple_choice",
32
- "acc": 0.43847072879330945,
33
- "acc_norm": 0.28793309438470727
34
- },
35
- {
36
- "name": "turkish_plu_next_event_prediction",
37
- "task": "multiple_choice",
38
- "acc": 0.48396946564885496,
39
- "acc_norm": 0.28396946564885495
40
- },
41
- {
42
- "name": "turkish_plu_step_inference",
43
- "task": "multiple_choice",
44
- "acc": 0.369281045751634,
45
- "acc_norm": 0.38562091503267976
46
- },
47
- {
48
- "name": "turkish_plu_step_ordering",
49
- "task": "multiple_choice",
50
- "acc": 0.6513222331047992,
51
- "acc_norm": 0.6513222331047992
52
- },
53
- {
54
- "name": "turkce_atasozleri",
55
- "task": "multiple_choice",
56
- "acc": 0.7728323699421965,
57
- "acc_norm": 0.7728323699421965
58
- },
59
- {
60
- "name": "check_worthiness",
61
- "task": "multiple_choice",
62
- "acc": 0.37614259597806216,
63
- "acc_norm": 0.6238574040219378
64
- },
65
- {
66
- "name": "relevance_judgment",
67
- "task": "multiple_choice",
68
- "acc": 0.4218464351005484,
69
- "acc_norm": 0.5781535648994516
70
- },
71
- {
72
- "name": "tquad",
73
- "task": "extractive_question_answering",
74
- "exact_match": 0.15134529147982062,
75
- "f1": 0.3212345707810023
76
- },
77
- {
78
- "name": "sts_tr",
79
- "task": "text_classification",
80
- "acc": 0.21174764321972445,
81
- "acc_norm": 0.1312545322697607
82
- },
83
- {
84
- "name": "offenseval_tr",
85
- "task": "text_classification",
86
- "acc": 0.8061224489795918,
87
- "acc_norm": 0.7970521541950113
88
- },
89
- {
90
- "name": "mnli_tr",
91
- "task": "natural_language_inference",
92
- "acc": 0.3458,
93
- "acc_norm": 0.3213
94
- },
95
- {
96
- "name": "snli_tr",
97
- "task": "natural_language_inference",
98
- "acc": 0.2771,
99
- "acc_norm": 0.3237
100
- },
101
- {
102
- "name": "xnli_tr",
103
- "task": "natural_language_inference",
104
- "acc": 0.33552894211576845,
105
- "acc_norm": 0.3333333333333333
106
- },
107
- {
108
- "name": "news_cat",
109
- "task": "text_classification",
110
- "acc": 0.64,
111
- "acc_norm": 0.312
112
- },
113
- {
114
- "name": "ironytr",
115
- "task": "text_classification",
116
- "acc": 0.5066666666666667,
117
- "acc_norm": 0.5
118
- },
119
- {
120
- "name": "exams_tr",
121
- "task": "multiple_choice",
122
- "acc": 0.3511450381679389,
123
- "acc_norm": 0.3384223918575064
124
- },
125
- {
126
- "name": "circumflex_tr",
127
- "task": "multiple_choice",
128
- "acc": 0.5714285714285714,
129
- "acc_norm": 0.5714285714285714
130
- },
131
- {
132
- "name": "bilmecebench",
133
- "task": "multiple_choice",
134
- "acc": 0.5723981900452488,
135
- "acc_norm": 0.5723981900452488
136
- },
137
- {
138
- "name": "belebele_tr",
139
- "task": "multiple_choice",
140
- "acc": 0.8044444444444444,
141
- "acc_norm": 0.8044444444444444
142
- },
143
- {
144
- "name": "turkishmmlu",
145
- "task": "multiple_choice",
146
- "acc": 0.6433333333333333,
147
- "acc_norm": 0.6433333333333333
148
- },
149
- {
150
- "name": "xlsum_tr",
151
- "task": "summarization",
152
- "rouge1": 0.21850802086584964,
153
- "rouge2": 0.09972850301278577,
154
- "rougeL": 0.17530849751123678
155
- },
156
- {
157
- "name": "wmt-tr-en-prompt",
158
- "task": "machine_translation",
159
- "wer": 2.085601196490251,
160
- "bleu": 0.0867760476179405
161
- },
162
- {
163
- "name": "wiki_lingua_tr",
164
- "task": "summarization",
165
- "rouge1": 0.1790179356445129,
166
- "rouge2": 0.05536224163778491,
167
- "rougeL": 0.13228141339234906
168
- },
169
- {
170
- "name": "tr-wikihow-summ",
171
- "task": "summarization",
172
- "rouge1": 0.085160471573291,
173
- "rouge2": 0.024521598751734066,
174
- "rougeL": 0.061909395020943954
175
- },
176
- {
177
- "name": "mlsum_tr",
178
- "task": "summarization",
179
- "rouge1": 0.36692022983430356,
180
- "rouge2": 0.24462313366233399,
181
- "rougeL": 0.30870270053456245
182
- },
183
- {
184
- "name": "gecturk_generation",
185
- "task": "grammatical_error_correction",
186
- "exact_match": 0.3391111753093553
187
- }
188
-
189
- ]
190
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
zero-shot/Qwen3-8B.json DELETED
@@ -1,190 +0,0 @@
1
- {
2
- "model": {
3
- "model": "Qwen/Qwen3-8B",
4
- "architecture": "Qwen3ForCausalLM",
5
- "dtype": "bfloat16",
6
- "type": "instruction-tuned",
7
- "num_parameters": "8b",
8
- "api": "hf"
9
- },
10
- "results": [
11
- {
12
- "name": "xquad_tr",
13
- "task": "extractive_question_answering",
14
- "exact_match": 0.27058823529411763,
15
- "f1": 0.44020531602192525
16
- },
17
- {
18
- "name": "xcopa_tr",
19
- "task": "multiple_choice",
20
- "acc": 0.656,
21
- "acc_norm": 0.656
22
- },
23
- {
24
- "name": "turkish_plu",
25
- "task": "multiple_choice",
26
- "acc": 0.47072,
27
- "acc_norm": 0.41504
28
- },
29
- {
30
- "name": "turkish_plu_goal_inference",
31
- "task": "multiple_choice",
32
- "acc": 0.3859020310633214,
33
- "acc_norm": 0.2724014336917563
34
- },
35
- {
36
- "name": "turkish_plu_next_event_prediction",
37
- "task": "multiple_choice",
38
- "acc": 0.44122137404580153,
39
- "acc_norm": 0.2717557251908397
40
- },
41
- {
42
- "name": "turkish_plu_step_inference",
43
- "task": "multiple_choice",
44
- "acc": 0.32189542483660133,
45
- "acc_norm": 0.3741830065359477
46
- },
47
- {
48
- "name": "turkish_plu_step_ordering",
49
- "task": "multiple_choice",
50
- "acc": 0.6483839373163565,
51
- "acc_norm": 0.6483839373163565
52
- },
53
- {
54
- "name": "turkce_atasozleri",
55
- "task": "multiple_choice",
56
- "acc": 0.7549132947976879,
57
- "acc_norm": 0.7549132947976879
58
- },
59
- {
60
- "name": "check_worthiness",
61
- "task": "multiple_choice",
62
- "acc": 0.4424131627056673,
63
- "acc_norm": 0.6238574040219378
64
- },
65
- {
66
- "name": "relevance_judgment",
67
- "task": "multiple_choice",
68
- "acc": 0.42230347349177333,
69
- "acc_norm": 0.5781535648994516
70
- },
71
- {
72
- "name": "tquad",
73
- "task": "extractive_question_answering",
74
- "exact_match": 0.18385650224215247,
75
- "f1": 0.3611274526033501
76
- },
77
- {
78
- "name": "sts_tr",
79
- "task": "text_classification",
80
- "acc": 0.21972443799854968,
81
- "acc_norm": 0.12037708484408992
82
- },
83
- {
84
- "name": "offenseval_tr",
85
- "task": "text_classification",
86
- "acc": 0.30413832199546487,
87
- "acc_norm": 0.7970521541950113
88
- },
89
- {
90
- "name": "mnli_tr",
91
- "task": "natural_language_inference",
92
- "acc": 0.3807,
93
- "acc_norm": 0.3213
94
- },
95
- {
96
- "name": "snli_tr",
97
- "task": "natural_language_inference",
98
- "acc": 0.3116,
99
- "acc_norm": 0.3237
100
- },
101
- {
102
- "name": "xnli_tr",
103
- "task": "natural_language_inference",
104
- "acc": 0.3664670658682635,
105
- "acc_norm": 0.3333333333333333
106
- },
107
- {
108
- "name": "news_cat",
109
- "task": "text_classification",
110
- "acc": 0.556,
111
- "acc_norm": 0.28
112
- },
113
- {
114
- "name": "ironytr",
115
- "task": "text_classification",
116
- "acc": 0.515,
117
- "acc_norm": 0.5
118
- },
119
- {
120
- "name": "exams_tr",
121
- "task": "multiple_choice",
122
- "acc": 0.29770992366412213,
123
- "acc_norm": 0.3104325699745547
124
- },
125
- {
126
- "name": "circumflex_tr",
127
- "task": "multiple_choice",
128
- "acc": 0.5,
129
- "acc_norm": 0.5
130
- },
131
- {
132
- "name": "bilmecebench",
133
- "task": "multiple_choice",
134
- "acc": 0.5316742081447964,
135
- "acc_norm": 0.5316742081447964
136
- },
137
- {
138
- "name": "belebele_tr",
139
- "task": "multiple_choice",
140
- "acc": 0.8111111111111111,
141
- "acc_norm": 0.8111111111111111
142
- },
143
- {
144
- "name": "turkishmmlu",
145
- "task": "multiple_choice",
146
- "acc": 0.5966666666666667,
147
- "acc_norm": 0.5966666666666667
148
- },
149
- {
150
- "name": "xlsum_tr",
151
- "task": "summarization",
152
- "rouge1": 0.30917291128339996,
153
- "rouge2": 0.14832485972104875,
154
- "rougeL": 0.23785344498387093
155
- },
156
- {
157
- "name": "wmt-tr-en-prompt",
158
- "task": "machine_translation",
159
- "wer": 1.7919998446671352,
160
- "bleu": 0.07228422067800695
161
- },
162
- {
163
- "name": "wiki_lingua_tr",
164
- "task": "summarization",
165
- "rouge1": 0.15988237371586872,
166
- "rouge2": 0.0532048337283498,
167
- "rougeL": 0.12305004914854117
168
- },
169
- {
170
- "name": "tr-wikihow-summ",
171
- "task": "summarization",
172
- "rouge1": 0.2116496152990347,
173
- "rouge2": 0.06436161936676124,
174
- "rougeL": 0.15251131714516145
175
- },
176
- {
177
- "name": "mlsum_tr",
178
- "task": "summarization",
179
- "rouge1": 0.3892482949562466,
180
- "rouge2": 0.23951248403090875,
181
- "rougeL": 0.31613997907641456
182
- },
183
- {
184
- "name": "gecturk_generation",
185
- "task": "grammatical_error_correction",
186
- "exact_match": 0.1439164138860802
187
- }
188
-
189
- ]
190
- }