abrek commited on
Commit
9853b08
·
verified ·
1 Parent(s): 7e86d15

Add Qwen3 Results

Browse files
results/zero-shot/Qwen3-1.7B-Base.json ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "model": "Qwen/Qwen3-1.7B-Base",
4
+ "architecture": "Qwen3ForCausalLM",
5
+ "dtype": "bfloat16",
6
+ "type": "pretrained",
7
+ "num_parameters": "1.7b",
8
+ "api": "hf"
9
+ },
10
+ "results": [
11
+ {
12
+ "name": "xquad_tr",
13
+ "task": "extractive_question_answering",
14
+ "exact_match": 0.14705882352941177,
15
+ "f1": 0.2870820139107337
16
+ },
17
+ {
18
+ "name": "xcopa_tr",
19
+ "task": "multiple_choice",
20
+ "acc": 0.576,
21
+ "acc_norm": 0.576
22
+ },
23
+ {
24
+ "name": "turkish_plu",
25
+ "task": "multiple_choice",
26
+ "acc": 0.4416,
27
+ "acc_norm": 0.38656
28
+ },
29
+ {
30
+ "name": "turkish_plu_goal_inference",
31
+ "task": "multiple_choice",
32
+ "acc": 0.3859020310633214,
33
+ "acc_norm": 0.25925925925925924
34
+ },
35
+ {
36
+ "name": "turkish_plu_next_event_prediction",
37
+ "task": "multiple_choice",
38
+ "acc": 0.40610687022900765,
39
+ "acc_norm": 0.26106870229007634
40
+ },
41
+ {
42
+ "name": "turkish_plu_step_inference",
43
+ "task": "multiple_choice",
44
+ "acc": 0.2973856209150327,
45
+ "acc_norm": 0.34477124183006536
46
+ },
47
+ {
48
+ "name": "turkish_plu_step_ordering",
49
+ "task": "multiple_choice",
50
+ "acc": 0.5964740450538688,
51
+ "acc_norm": 0.5964740450538688
52
+ },
53
+ {
54
+ "name": "turkce_atasozleri",
55
+ "task": "multiple_choice",
56
+ "acc": 0.576878612716763,
57
+ "acc_norm": 0.576878612716763
58
+ },
59
+ {
60
+ "name": "check_worthiness",
61
+ "task": "multiple_choice",
62
+ "acc": 0.37842778793418647,
63
+ "acc_norm": 0.6238574040219378
64
+ },
65
+ {
66
+ "name": "relevance_judgment",
67
+ "task": "multiple_choice",
68
+ "acc": 0.43327239488117003,
69
+ "acc_norm": 0.5781535648994516
70
+ },
71
+ {
72
+ "name": "tquad",
73
+ "task": "extractive_question_answering",
74
+ "exact_match": 0.09417040358744394,
75
+ "f1": 0.26322715675405095
76
+ },
77
+ {
78
+ "name": "sts_tr",
79
+ "task": "text_classification",
80
+ "acc": 0.15663524292965916,
81
+ "acc_norm": 0.11965192168237854
82
+ },
83
+ {
84
+ "name": "offenseval_tr",
85
+ "task": "text_classification",
86
+ "acc": 0.768140589569161,
87
+ "acc_norm": 0.7970521541950113
88
+ },
89
+ {
90
+ "name": "mnli_tr",
91
+ "task": "natural_language_inference",
92
+ "acc": 0.3478,
93
+ "acc_norm": 0.3213
94
+ },
95
+ {
96
+ "name": "snli_tr",
97
+ "task": "natural_language_inference",
98
+ "acc": 0.3343,
99
+ "acc_norm": 0.3237
100
+ },
101
+ {
102
+ "name": "xnli_tr",
103
+ "task": "natural_language_inference",
104
+ "acc": 0.3333333333333333,
105
+ "acc_norm": 0.3333333333333333
106
+ },
107
+ {
108
+ "name": "news_cat",
109
+ "task": "text_classification",
110
+ "acc": 0.628,
111
+ "acc_norm": 0.34
112
+ },
113
+ {
114
+ "name": "ironytr",
115
+ "task": "text_classification",
116
+ "acc": 0.55,
117
+ "acc_norm": 0.5
118
+ },
119
+ {
120
+ "name": "exams_tr",
121
+ "task": "multiple_choice",
122
+ "acc": 0.26717557251908397,
123
+ "acc_norm": 0.31297709923664124
124
+ },
125
+ {
126
+ "name": "circumflex_tr",
127
+ "task": "multiple_choice",
128
+ "acc": 0.6142857142857143,
129
+ "acc_norm": 0.6142857142857143
130
+ },
131
+ {
132
+ "name": "bilmecebench",
133
+ "task": "multiple_choice",
134
+ "acc": 0.33710407239819007,
135
+ "acc_norm": 0.33710407239819007
136
+ },
137
+ {
138
+ "name": "belebele_tr",
139
+ "task": "multiple_choice",
140
+ "acc": 0.5933333333333334,
141
+ "acc_norm": 0.5933333333333334
142
+ },
143
+ {
144
+ "name": "turkishmmlu",
145
+ "task": "multiple_choice",
146
+ "acc": 0.4,
147
+ "acc_norm": 0.4
148
+ },
149
+ {
150
+ "name": "xlsum_tr",
151
+ "task": "summarization",
152
+ "rouge1": 0.04781539873117025,
153
+ "rouge2": 0.01992023353880694,
154
+ "rougeL": 0.036905992546053354
155
+ },
156
+ {
157
+ "name": "wmt-tr-en-prompt",
158
+ "task": "machine_translation",
159
+ "wer": 1.122812394923734,
160
+ "bleu": 0.08632239330032426
161
+ },
162
+ {
163
+ "name": "wiki_lingua_tr",
164
+ "task": "summarization",
165
+ "rouge1": 0.1905933745444374,
166
+ "rouge2": 0.05729029075401719,
167
+ "rougeL": 0.1389441266800664
168
+ },
169
+ {
170
+ "name": "tr-wikihow-summ",
171
+ "task": "summarization",
172
+ "rouge1": 0.1573146712521122,
173
+ "rouge2": 0.04753989405427132,
174
+ "rougeL": 0.11305186768825831
175
+ },
176
+ {
177
+ "name": "mlsum_tr",
178
+ "task": "summarization",
179
+ "rouge1": 0.18820054046451806,
180
+ "rouge2": 0.11945259097094729,
181
+ "rougeL": 0.15322863222214073
182
+ },
183
+ {
184
+ "name": "gecturk_generation",
185
+ "task": "grammatical_error_correction",
186
+ "exact_match": 0.13134960758823247
187
+ }
188
+ ]
189
+ }
results/zero-shot/Qwen3-1.7B.json ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "model": "Qwen/Qwen3-1.7B",
4
+ "architecture": "Qwen3ForCausalLM",
5
+ "dtype": "bfloat16",
6
+ "type": "instruction-tuned",
7
+ "num_parameters": "1.7b",
8
+ "api": "hf"
9
+ },
10
+ "results": [
11
+ {
12
+ "name": "xquad_tr",
13
+ "task": "extractive_question_answering",
14
+ "exact_match": 0.40252100840336136,
15
+ "f1": 0.6176467678580342
16
+ },
17
+ {
18
+ "name": "xcopa_tr",
19
+ "task": "multiple_choice",
20
+ "acc": 0.646,
21
+ "acc_norm": 0.646
22
+ },
23
+ {
24
+ "name": "turkish_plu",
25
+ "task": "multiple_choice",
26
+ "acc": 0.48736,
27
+ "acc_norm": 0.5392
28
+ },
29
+ {
30
+ "name": "turkish_plu_goal_inference",
31
+ "task": "multiple_choice",
32
+ "acc": 0.4133811230585424,
33
+ "acc_norm": 0.4324970131421744
34
+ },
35
+ {
36
+ "name": "turkish_plu_next_event_prediction",
37
+ "task": "multiple_choice",
38
+ "acc": 0.4870229007633588,
39
+ "acc_norm": 0.5816793893129771
40
+ },
41
+ {
42
+ "name": "turkish_plu_step_inference",
43
+ "task": "multiple_choice",
44
+ "acc": 0.35294117647058826,
45
+ "acc_norm": 0.49019607843137253
46
+ },
47
+ {
48
+ "name": "turkish_plu_step_ordering",
49
+ "task": "multiple_choice",
50
+ "acc": 0.6287952987267384,
51
+ "acc_norm": 0.6287952987267384
52
+ },
53
+ {
54
+ "name": "check_worthiness",
55
+ "task": "multiple_choice",
56
+ "acc": 0.37614259597806216,
57
+ "acc_norm": 0.3756855575868373
58
+ },
59
+ {
60
+ "name": "relevance_judgment",
61
+ "task": "multiple_choice",
62
+ "acc": 0.4506398537477148,
63
+ "acc_norm": 0.5708409506398537
64
+ },
65
+ {
66
+ "name": "tquad",
67
+ "task": "extractive_question_answering",
68
+ "exact_match": 0.34753363228699546,
69
+ "f1": 0.614345609122
70
+ },
71
+ {
72
+ "name": "sts_tr",
73
+ "task": "text_classification",
74
+ "acc": 0.2037708484408992,
75
+ "acc_norm": 0.2610587382160986
76
+ },
77
+ {
78
+ "name": "offenseval_tr",
79
+ "task": "text_classification",
80
+ "acc": 0.22023809523809523,
81
+ "acc_norm": 0.2962018140589569
82
+ },
83
+ {
84
+ "name": "mnli_tr",
85
+ "task": "natural_language_inference",
86
+ "acc": 0.3482,
87
+ "acc_norm": 0.3389
88
+ },
89
+ {
90
+ "name": "snli_tr",
91
+ "task": "natural_language_inference",
92
+ "acc": 0.3373,
93
+ "acc_norm": 0.3271
94
+ },
95
+ {
96
+ "name": "xnli_tr",
97
+ "task": "natural_language_inference",
98
+ "acc": 0.3335329341317365,
99
+ "acc_norm": 0.33532934131736525
100
+ },
101
+ {
102
+ "name": "news_cat",
103
+ "task": "text_classification",
104
+ "acc": 0.524,
105
+ "acc_norm": 0.348
106
+ },
107
+ {
108
+ "name": "mkqa_tr",
109
+ "task": "extractive_question_answering",
110
+ "exact_match": 0.050014797277300974,
111
+ "f1": 0.11195620922043903
112
+ },
113
+ {
114
+ "name": "ironytr",
115
+ "task": "text_classification",
116
+ "acc": 0.5616666666666666,
117
+ "acc_norm": 0.6183333333333333
118
+ },
119
+ {
120
+ "name": "exams_tr",
121
+ "task": "multiple_choice",
122
+ "acc": 0.33078880407124683,
123
+ "acc_norm": 0.35877862595419846
124
+ },
125
+ {
126
+ "name": "belebele_tr",
127
+ "task": "multiple_choice",
128
+ "acc": 0.8122222222222222,
129
+ "acc_norm": 0.8122222222222222
130
+ },
131
+ {
132
+ "name": "xlsum_tr",
133
+ "task": "summarization",
134
+ "rouge1": 0.2866278776668776,
135
+ "rouge2": 0.1308383753682692,
136
+ "rougeL": 0.22217070278595147
137
+ },
138
+ {
139
+ "name": "wmt-tr-en-prompt",
140
+ "task": "machine_translation",
141
+ "wer": 1.6546507240124098,
142
+ "bleu": 0.08096461200991427
143
+ },
144
+ {
145
+ "name": "wiki_lingua_tr",
146
+ "task": "summarization",
147
+ "rouge1": 0.20802332507327073,
148
+ "rouge2": 0.06755910819968403,
149
+ "rougeL": 0.15425156655216665
150
+ },
151
+ {
152
+ "name": "tr-wikihow-summ",
153
+ "task": "summarization",
154
+ "rouge1": 0.22012543165161014,
155
+ "rouge2": 0.06567086903148794,
156
+ "rougeL": 0.15604855476586732
157
+ },
158
+ {
159
+ "name": "mlsum_tr",
160
+ "task": "summarization",
161
+ "rouge1": 0.3928051448993858,
162
+ "rouge2": 0.25674608200884674,
163
+ "rougeL": 0.3276023476233169
164
+ },
165
+ {
166
+ "name": "gecturk_generation",
167
+ "task": "grammatical_error_correction",
168
+ "exact_match": 0.1888
169
+ },
170
+ {
171
+ "name": "turkce_atasozleri",
172
+ "task": "multiple_choice",
173
+ "acc": 0.753757225433526,
174
+ "acc_norm": 0.753757225433526
175
+ },
176
+ {
177
+ "name": "turkishmmlu",
178
+ "task": "multiple_choice",
179
+ "acc": 0.5622222222222222,
180
+ "acc_norm": 0.5622222222222222
181
+ },
182
+ {
183
+ "name": "bilmecebench",
184
+ "task": "multiple_choice",
185
+ "acc": 0.4751131221719457,
186
+ "acc_norm": 0.4751131221719457
187
+ },
188
+ {
189
+ "name": "circumflex_tr",
190
+ "task": "multiple_choice",
191
+ "acc": 0.5857142857142857,
192
+ "acc_norm": 0.5857142857142857
193
+ },
194
+ {
195
+ "name": "turkishmmlu",
196
+ "task": "multiple_choice",
197
+ "acc": 0.38222222222222224,
198
+ "acc_norm": 0.38222222222222224
199
+ },
200
+ {
201
+ "name": "xlsum_tr",
202
+ "task": "summarization",
203
+ "rouge1": 0.27820377739060753,
204
+ "rouge2": 0.12397426441534459,
205
+ "rougeL": 0.2141557453953809
206
+ },
207
+ {
208
+ "name": "wmt-tr-en-prompt",
209
+ "task": "machine_translation",
210
+ "wer": 1.4553342171690737,
211
+ "bleu": 0.05579047680001111
212
+ },
213
+ {
214
+ "name": "wiki_lingua_tr",
215
+ "task": "summarization",
216
+ "rouge1": 0.18975012020130533,
217
+ "rouge2": 0.056217857712315425,
218
+ "rougeL": 0.14164939611392058
219
+ },
220
+ {
221
+ "name": "tr-wikihow-summ",
222
+ "task": "summarization",
223
+ "rouge1": 0.20337048755281356,
224
+ "rouge2": 0.05824591585148973,
225
+ "rougeL": 0.14892270355472415
226
+ },
227
+ {
228
+ "name": "mlsum_tr",
229
+ "task": "summarization",
230
+ "rouge1": 0.3633354972284383,
231
+ "rouge2": 0.22049361975220266,
232
+ "rougeL": 0.29386670924548397
233
+ },
234
+ {
235
+ "name": "gecturk_generation",
236
+ "task": "grammatical_error_correction",
237
+ "exact_match": 0.3063700707785642
238
+ }
239
+ ]
240
+ }
results/zero-shot/Qwen3-14B-Base.json ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "model": "Qwen/Qwen3-14B",
4
+ "architecture": "Qwen3ForCausalLM",
5
+ "dtype": "bfloat16",
6
+ "type": "pretrained",
7
+ "num_parameters": "14b",
8
+ "api": "hf"
9
+ },
10
+ "results": [
11
+ {
12
+ "name": "xquad_tr",
13
+ "task": "extractive_question_answering",
14
+ "exact_match": 0.3512605042016807,
15
+ "f1": 0.5497491980200822
16
+ },
17
+ {
18
+ "name": "xcopa_tr",
19
+ "task": "multiple_choice",
20
+ "acc": 0.676,
21
+ "acc_norm": 0.676
22
+ },
23
+ {
24
+ "name": "turkish_plu",
25
+ "task": "multiple_choice",
26
+ "acc": 0.53248,
27
+ "acc_norm": 0.44448
28
+ },
29
+ {
30
+ "name": "turkish_plu_goal_inference",
31
+ "task": "multiple_choice",
32
+ "acc": 0.4432497013142174,
33
+ "acc_norm": 0.3010752688172043
34
+ },
35
+ {
36
+ "name": "turkish_plu_next_event_prediction",
37
+ "task": "multiple_choice",
38
+ "acc": 0.5145038167938931,
39
+ "acc_norm": 0.2870229007633588
40
+ },
41
+ {
42
+ "name": "turkish_plu_step_inference",
43
+ "task": "multiple_choice",
44
+ "acc": 0.40032679738562094,
45
+ "acc_norm": 0.3888888888888889
46
+ },
47
+ {
48
+ "name": "turkish_plu_step_ordering",
49
+ "task": "multiple_choice",
50
+ "acc": 0.6963761018609207,
51
+ "acc_norm": 0.6963761018609207
52
+ },
53
+ {
54
+ "name": "turkce_atasozleri",
55
+ "task": "multiple_choice",
56
+ "acc": 0.8283236994219653,
57
+ "acc_norm": 0.8283236994219653
58
+ },
59
+ {
60
+ "name": "check_worthiness",
61
+ "task": "multiple_choice",
62
+ "acc": 0.37614259597806216,
63
+ "acc_norm": 0.6238574040219378
64
+ },
65
+ {
66
+ "name": "relevance_judgment",
67
+ "task": "multiple_choice",
68
+ "acc": 0.42047531992687387,
69
+ "acc_norm": 0.5781535648994516
70
+ },
71
+ {
72
+ "name": "tquad",
73
+ "task": "extractive_question_answering",
74
+ "exact_match": 0.20179372197309417,
75
+ "f1": 0.41015757440015116
76
+ },
77
+ {
78
+ "name": "sts_tr",
79
+ "task": "text_classification",
80
+ "acc": 0.18491660623640319,
81
+ "acc_norm": 0.17113850616388687
82
+ },
83
+ {
84
+ "name": "offenseval_tr",
85
+ "task": "text_classification",
86
+ "acc": 0.7650226757369615,
87
+ "acc_norm": 0.7970521541950113
88
+ },
89
+ {
90
+ "name": "mnli_tr",
91
+ "task": "natural_language_inference",
92
+ "acc": 0.2776,
93
+ "acc_norm": 0.3213
94
+ },
95
+ {
96
+ "name": "snli_tr",
97
+ "task": "natural_language_inference",
98
+ "acc": 0.2946,
99
+ "acc_norm": 0.3237
100
+ },
101
+ {
102
+ "name": "xnli_tr",
103
+ "task": "natural_language_inference",
104
+ "acc": 0.26387225548902193,
105
+ "acc_norm": 0.3333333333333333
106
+ },
107
+ {
108
+ "name": "news_cat",
109
+ "task": "text_classification",
110
+ "acc": 0.696,
111
+ "acc_norm": 0.344
112
+ },
113
+ {
114
+ "name": "ironytr",
115
+ "task": "text_classification",
116
+ "acc": 0.735,
117
+ "acc_norm": 0.5
118
+ },
119
+ {
120
+ "name": "exams_tr",
121
+ "task": "multiple_choice",
122
+ "acc": 0.35877862595419846,
123
+ "acc_norm": 0.3333333333333333
124
+ },
125
+ {
126
+ "name": "circumflex_tr",
127
+ "task": "multiple_choice",
128
+ "acc": 0.5428571428571428,
129
+ "acc_norm": 0.5428571428571428
130
+ },
131
+ {
132
+ "name": "bilmecebench",
133
+ "task": "multiple_choice",
134
+ "acc": 0.6470588235294118,
135
+ "acc_norm": 0.6470588235294118
136
+ },
137
+ {
138
+ "name": "belebele_tr",
139
+ "task": "multiple_choice",
140
+ "acc": 0.8388888888888889,
141
+ "acc_norm": 0.8388888888888889
142
+ },
143
+ {
144
+ "name": "turkishmmlu",
145
+ "task": "multiple_choice",
146
+ "acc": 0.6777777777777778,
147
+ "acc_norm": 0.6777777777777778
148
+ },
149
+ {
150
+ "name": "xlsum_tr",
151
+ "task": "summarization",
152
+ "rouge1": 0.11473487738310617,
153
+ "rouge2": 0.060121517882459646,
154
+ "rougeL": 0.09836107933415753
155
+ },
156
+ {
157
+ "name": "wmt-tr-en-prompt",
158
+ "task": "machine_translation",
159
+ "wer": 1.1492106652129537,
160
+ "bleu": 0.11818272894053798
161
+ },
162
+ {
163
+ "name": "wiki_lingua_tr",
164
+ "task": "summarization",
165
+ "rouge1": 0.13000651376079725,
166
+ "rouge2": 0.03942796488947396,
167
+ "rougeL": 0.09588108114897731
168
+ },
169
+ {
170
+ "name": "tr-wikihow-summ",
171
+ "task": "summarization",
172
+ "rouge1": 0.042343650286353535,
173
+ "rouge2": 0.012407404121300508,
174
+ "rougeL": 0.03134101717443596
175
+ },
176
+ {
177
+ "name": "mlsum_tr",
178
+ "task": "summarization",
179
+ "rouge1": 0.282634405530366,
180
+ "rouge2": 0.18883921790708064,
181
+ "rougeL": 0.2382731729763113
182
+ },
183
+ {
184
+ "name": "gecturk_generation",
185
+ "task": "grammatical_error_correction",
186
+ "exact_match": 0.32360729934036303
187
+ }
188
+ ]
189
+ }
results/zero-shot/Qwen3-14B.json ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "model": "Qwen/Qwen3-14B",
4
+ "architecture": "Qwen3ForCausalLM",
5
+ "dtype": "bfloat16",
6
+ "type": "instruction-tuned",
7
+ "num_parameters": "14b",
8
+ "api": "hf"
9
+ },
10
+ "results": [
11
+ {
12
+ "name": "xquad_tr",
13
+ "task": "extractive_question_answering",
14
+ "exact_match": 0.17394957983193277,
15
+ "f1": 0.2667005758476218
16
+ },
17
+ {
18
+ "name": "xcopa_tr",
19
+ "task": "multiple_choice",
20
+ "acc": 0.674,
21
+ "acc_norm": 0.674
22
+ },
23
+ {
24
+ "name": "turkish_plu",
25
+ "task": "multiple_choice",
26
+ "acc": 0.4944,
27
+ "acc_norm": 0.42432
28
+ },
29
+ {
30
+ "name": "turkish_plu_goal_inference",
31
+ "task": "multiple_choice",
32
+ "acc": 0.39904420549581837,
33
+ "acc_norm": 0.2867383512544803
34
+ },
35
+ {
36
+ "name": "turkish_plu_next_event_prediction",
37
+ "task": "multiple_choice",
38
+ "acc": 0.5038167938931297,
39
+ "acc_norm": 0.28549618320610687
40
+ },
41
+ {
42
+ "name": "turkish_plu_step_inference",
43
+ "task": "multiple_choice",
44
+ "acc": 0.3562091503267974,
45
+ "acc_norm": 0.38562091503267976
46
+ },
47
+ {
48
+ "name": "turkish_plu_step_ordering",
49
+ "task": "multiple_choice",
50
+ "acc": 0.6493633692458374,
51
+ "acc_norm": 0.6493633692458374
52
+ },
53
+ {
54
+ "name": "turkce_atasozleri",
55
+ "task": "multiple_choice",
56
+ "acc": 0.8127167630057803,
57
+ "acc_norm": 0.8127167630057803
58
+ },
59
+ {
60
+ "name": "check_worthiness",
61
+ "task": "multiple_choice",
62
+ "acc": 0.38619744058500916,
63
+ "acc_norm": 0.6238574040219378
64
+ },
65
+ {
66
+ "name": "relevance_judgment",
67
+ "task": "multiple_choice",
68
+ "acc": 0.4218464351005484,
69
+ "acc_norm": 0.5781535648994516
70
+ },
71
+ {
72
+ "name": "tquad",
73
+ "task": "extractive_question_answering",
74
+ "exact_match": 0.1547085201793722,
75
+ "f1": 0.31146380255216244
76
+ },
77
+ {
78
+ "name": "sts_tr",
79
+ "task": "text_classification",
80
+ "acc": 0.1319796954314721,
81
+ "acc_norm": 0.116751269035533
82
+ },
83
+ {
84
+ "name": "offenseval_tr",
85
+ "task": "text_classification",
86
+ "acc": 0.7644557823129252,
87
+ "acc_norm": 0.7970521541950113
88
+ },
89
+ {
90
+ "name": "mnli_tr",
91
+ "task": "natural_language_inference",
92
+ "acc": 0.3356,
93
+ "acc_norm": 0.3213
94
+ },
95
+ {
96
+ "name": "snli_tr",
97
+ "task": "natural_language_inference",
98
+ "acc": 0.3044,
99
+ "acc_norm": 0.3237
100
+ },
101
+ {
102
+ "name": "xnli_tr",
103
+ "task": "natural_language_inference",
104
+ "acc": 0.3251497005988024,
105
+ "acc_norm": 0.3333333333333333
106
+ },
107
+ {
108
+ "name": "news_cat",
109
+ "task": "text_classification",
110
+ "acc": 0.424,
111
+ "acc_norm": 0.232
112
+ },
113
+ {
114
+ "name": "ironytr",
115
+ "task": "text_classification",
116
+ "acc": 0.7,
117
+ "acc_norm": 0.5
118
+ },
119
+ {
120
+ "name": "exams_tr",
121
+ "task": "multiple_choice",
122
+ "acc": 0.33587786259541985,
123
+ "acc_norm": 0.3384223918575064
124
+ },
125
+ {
126
+ "name": "circumflex_tr",
127
+ "task": "multiple_choice",
128
+ "acc": 0.5714285714285714,
129
+ "acc_norm": 0.5714285714285714
130
+ },
131
+ {
132
+ "name": "bilmecebench",
133
+ "task": "multiple_choice",
134
+ "acc": 0.5769230769230769,
135
+ "acc_norm": 0.5769230769230769
136
+ },
137
+ {
138
+ "name": "belebele_tr",
139
+ "task": "multiple_choice",
140
+ "acc": 0.8355555555555556,
141
+ "acc_norm": 0.8355555555555556
142
+ },
143
+ {
144
+ "name": "turkishmmlu",
145
+ "task": "multiple_choice",
146
+ "acc": 0.6355555555555555,
147
+ "acc_norm": 0.6355555555555555
148
+ },
149
+ {
150
+ "name": "xlsum_tr",
151
+ "task": "summarization",
152
+ "rouge1": 0.2763159346655365,
153
+ "rouge2": 0.13580436770127194,
154
+ "rougeL": 0.2140432080171992
155
+ },
156
+ {
157
+ "name": "wmt-tr-en-prompt",
158
+ "task": "machine_translation",
159
+ "wer": 0.8811716956156485,
160
+ "bleu": 0.08106810125402132
161
+ },
162
+ {
163
+ "name": "wiki_lingua_tr",
164
+ "task": "summarization",
165
+ "rouge1": 0.20105611846416252,
166
+ "rouge2": 0.07453258043188488,
167
+ "rougeL": 0.15681666038287218
168
+ },
169
+ {
170
+ "name": "tr-wikihow-summ",
171
+ "task": "summarization",
172
+ "rouge1": 0.16377028131791377,
173
+ "rouge2": 0.04758033115002652,
174
+ "rougeL": 0.1184993075488721
175
+ },
176
+ {
177
+ "name": "mlsum_tr",
178
+ "task": "summarization",
179
+ "rouge1": 0.3844389772184301,
180
+ "rouge2": 0.23720643221445104,
181
+ "rougeL": 0.3119516170965996
182
+ },
183
+ {
184
+ "name": "gecturk_generation",
185
+ "task": "grammatical_error_correction",
186
+ "exact_match": 0.05931917762049208
187
+ }
188
+ ]
189
+ }
results/zero-shot/Qwen3-8B-Base.json ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "model": "Qwen/Qwen3-8B-Base",
4
+ "architecture": "Qwen3ForCausalLM",
5
+ "dtype": "bfloat16",
6
+ "type": "pretrained",
7
+ "num_parameters": "8b",
8
+ "api": "hf"
9
+ },
10
+ "results": [
11
+ {
12
+ "name": "xquad_tr",
13
+ "task": "extractive_question_answering",
14
+ "exact_match": 0.3050420168067227,
15
+ "f1": 0.49570394447625915
16
+ },
17
+ {
18
+ "name": "xcopa_tr",
19
+ "task": "multiple_choice",
20
+ "acc": 0.656,
21
+ "acc_norm": 0.656
22
+ },
23
+ {
24
+ "name": "turkish_plu",
25
+ "task": "multiple_choice",
26
+ "acc": 0.504,
27
+ "acc_norm": 0.42496
28
+ },
29
+ {
30
+ "name": "turkish_plu_goal_inference",
31
+ "task": "multiple_choice",
32
+ "acc": 0.43847072879330945,
33
+ "acc_norm": 0.28793309438470727
34
+ },
35
+ {
36
+ "name": "turkish_plu_next_event_prediction",
37
+ "task": "multiple_choice",
38
+ "acc": 0.48396946564885496,
39
+ "acc_norm": 0.28396946564885495
40
+ },
41
+ {
42
+ "name": "turkish_plu_step_inference",
43
+ "task": "multiple_choice",
44
+ "acc": 0.369281045751634,
45
+ "acc_norm": 0.38562091503267976
46
+ },
47
+ {
48
+ "name": "turkish_plu_step_ordering",
49
+ "task": "multiple_choice",
50
+ "acc": 0.6513222331047992,
51
+ "acc_norm": 0.6513222331047992
52
+ },
53
+ {
54
+ "name": "turkce_atasozleri",
55
+ "task": "multiple_choice",
56
+ "acc": 0.7728323699421965,
57
+ "acc_norm": 0.7728323699421965
58
+ },
59
+ {
60
+ "name": "check_worthiness",
61
+ "task": "multiple_choice",
62
+ "acc": 0.37614259597806216,
63
+ "acc_norm": 0.6238574040219378
64
+ },
65
+ {
66
+ "name": "relevance_judgment",
67
+ "task": "multiple_choice",
68
+ "acc": 0.4218464351005484,
69
+ "acc_norm": 0.5781535648994516
70
+ },
71
+ {
72
+ "name": "tquad",
73
+ "task": "extractive_question_answering",
74
+ "exact_match": 0.15134529147982062,
75
+ "f1": 0.3212345707810023
76
+ },
77
+ {
78
+ "name": "sts_tr",
79
+ "task": "text_classification",
80
+ "acc": 0.21174764321972445,
81
+ "acc_norm": 0.1312545322697607
82
+ },
83
+ {
84
+ "name": "offenseval_tr",
85
+ "task": "text_classification",
86
+ "acc": 0.8061224489795918,
87
+ "acc_norm": 0.7970521541950113
88
+ },
89
+ {
90
+ "name": "mnli_tr",
91
+ "task": "natural_language_inference",
92
+ "acc": 0.3458,
93
+ "acc_norm": 0.3213
94
+ },
95
+ {
96
+ "name": "snli_tr",
97
+ "task": "natural_language_inference",
98
+ "acc": 0.2771,
99
+ "acc_norm": 0.3237
100
+ },
101
+ {
102
+ "name": "xnli_tr",
103
+ "task": "natural_language_inference",
104
+ "acc": 0.33552894211576845,
105
+ "acc_norm": 0.3333333333333333
106
+ },
107
+ {
108
+ "name": "news_cat",
109
+ "task": "text_classification",
110
+ "acc": 0.64,
111
+ "acc_norm": 0.312
112
+ },
113
+ {
114
+ "name": "ironytr",
115
+ "task": "text_classification",
116
+ "acc": 0.5066666666666667,
117
+ "acc_norm": 0.5
118
+ },
119
+ {
120
+ "name": "exams_tr",
121
+ "task": "multiple_choice",
122
+ "acc": 0.3511450381679389,
123
+ "acc_norm": 0.3384223918575064
124
+ },
125
+ {
126
+ "name": "circumflex_tr",
127
+ "task": "multiple_choice",
128
+ "acc": 0.5714285714285714,
129
+ "acc_norm": 0.5714285714285714
130
+ },
131
+ {
132
+ "name": "bilmecebench",
133
+ "task": "multiple_choice",
134
+ "acc": 0.5723981900452488,
135
+ "acc_norm": 0.5723981900452488
136
+ },
137
+ {
138
+ "name": "belebele_tr",
139
+ "task": "multiple_choice",
140
+ "acc": 0.8044444444444444,
141
+ "acc_norm": 0.8044444444444444
142
+ },
143
+ {
144
+ "name": "turkishmmlu",
145
+ "task": "multiple_choice",
146
+ "acc": 0.6433333333333333,
147
+ "acc_norm": 0.6433333333333333
148
+ },
149
+ {
150
+ "name": "xlsum_tr",
151
+ "task": "summarization",
152
+ "rouge1": 0.21850802086584964,
153
+ "rouge2": 0.09972850301278577,
154
+ "rougeL": 0.17530849751123678
155
+ },
156
+ {
157
+ "name": "wmt-tr-en-prompt",
158
+ "task": "machine_translation",
159
+ "wer": 2.085601196490251,
160
+ "bleu": 0.0867760476179405
161
+ },
162
+ {
163
+ "name": "wiki_lingua_tr",
164
+ "task": "summarization",
165
+ "rouge1": 0.1790179356445129,
166
+ "rouge2": 0.05536224163778491,
167
+ "rougeL": 0.13228141339234906
168
+ },
169
+ {
170
+ "name": "tr-wikihow-summ",
171
+ "task": "summarization",
172
+ "rouge1": 0.085160471573291,
173
+ "rouge2": 0.024521598751734066,
174
+ "rougeL": 0.061909395020943954
175
+ },
176
+ {
177
+ "name": "mlsum_tr",
178
+ "task": "summarization",
179
+ "rouge1": 0.36692022983430356,
180
+ "rouge2": 0.24462313366233399,
181
+ "rougeL": 0.30870270053456245
182
+ },
183
+ {
184
+ "name": "gecturk_generation",
185
+ "task": "grammatical_error_correction",
186
+ "exact_match": 0.3391111753093553
187
+ }
188
+
189
+ ]
190
+ }
results/zero-shot/Qwen3-8B.json ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "model": "Qwen/Qwen3-8B",
4
+ "architecture": "Qwen3ForCausalLM",
5
+ "dtype": "bfloat16",
6
+ "type": "instruction-tuned",
7
+ "num_parameters": "8b",
8
+ "api": "hf"
9
+ },
10
+ "results": [
11
+ {
12
+ "name": "xquad_tr",
13
+ "task": "extractive_question_answering",
14
+ "exact_match": 0.27058823529411763,
15
+ "f1": 0.44020531602192525
16
+ },
17
+ {
18
+ "name": "xcopa_tr",
19
+ "task": "multiple_choice",
20
+ "acc": 0.656,
21
+ "acc_norm": 0.656
22
+ },
23
+ {
24
+ "name": "turkish_plu",
25
+ "task": "multiple_choice",
26
+ "acc": 0.47072,
27
+ "acc_norm": 0.41504
28
+ },
29
+ {
30
+ "name": "turkish_plu_goal_inference",
31
+ "task": "multiple_choice",
32
+ "acc": 0.3859020310633214,
33
+ "acc_norm": 0.2724014336917563
34
+ },
35
+ {
36
+ "name": "turkish_plu_next_event_prediction",
37
+ "task": "multiple_choice",
38
+ "acc": 0.44122137404580153,
39
+ "acc_norm": 0.2717557251908397
40
+ },
41
+ {
42
+ "name": "turkish_plu_step_inference",
43
+ "task": "multiple_choice",
44
+ "acc": 0.32189542483660133,
45
+ "acc_norm": 0.3741830065359477
46
+ },
47
+ {
48
+ "name": "turkish_plu_step_ordering",
49
+ "task": "multiple_choice",
50
+ "acc": 0.6483839373163565,
51
+ "acc_norm": 0.6483839373163565
52
+ },
53
+ {
54
+ "name": "turkce_atasozleri",
55
+ "task": "multiple_choice",
56
+ "acc": 0.7549132947976879,
57
+ "acc_norm": 0.7549132947976879
58
+ },
59
+ {
60
+ "name": "check_worthiness",
61
+ "task": "multiple_choice",
62
+ "acc": 0.4424131627056673,
63
+ "acc_norm": 0.6238574040219378
64
+ },
65
+ {
66
+ "name": "relevance_judgment",
67
+ "task": "multiple_choice",
68
+ "acc": 0.42230347349177333,
69
+ "acc_norm": 0.5781535648994516
70
+ },
71
+ {
72
+ "name": "tquad",
73
+ "task": "extractive_question_answering",
74
+ "exact_match": 0.18385650224215247,
75
+ "f1": 0.3611274526033501
76
+ },
77
+ {
78
+ "name": "sts_tr",
79
+ "task": "text_classification",
80
+ "acc": 0.21972443799854968,
81
+ "acc_norm": 0.12037708484408992
82
+ },
83
+ {
84
+ "name": "offenseval_tr",
85
+ "task": "text_classification",
86
+ "acc": 0.30413832199546487,
87
+ "acc_norm": 0.7970521541950113
88
+ },
89
+ {
90
+ "name": "mnli_tr",
91
+ "task": "natural_language_inference",
92
+ "acc": 0.3807,
93
+ "acc_norm": 0.3213
94
+ },
95
+ {
96
+ "name": "snli_tr",
97
+ "task": "natural_language_inference",
98
+ "acc": 0.3116,
99
+ "acc_norm": 0.3237
100
+ },
101
+ {
102
+ "name": "xnli_tr",
103
+ "task": "natural_language_inference",
104
+ "acc": 0.3664670658682635,
105
+ "acc_norm": 0.3333333333333333
106
+ },
107
+ {
108
+ "name": "news_cat",
109
+ "task": "text_classification",
110
+ "acc": 0.556,
111
+ "acc_norm": 0.28
112
+ },
113
+ {
114
+ "name": "ironytr",
115
+ "task": "text_classification",
116
+ "acc": 0.515,
117
+ "acc_norm": 0.5
118
+ },
119
+ {
120
+ "name": "exams_tr",
121
+ "task": "multiple_choice",
122
+ "acc": 0.29770992366412213,
123
+ "acc_norm": 0.3104325699745547
124
+ },
125
+ {
126
+ "name": "circumflex_tr",
127
+ "task": "multiple_choice",
128
+ "acc": 0.5,
129
+ "acc_norm": 0.5
130
+ },
131
+ {
132
+ "name": "bilmecebench",
133
+ "task": "multiple_choice",
134
+ "acc": 0.5316742081447964,
135
+ "acc_norm": 0.5316742081447964
136
+ },
137
+ {
138
+ "name": "belebele_tr",
139
+ "task": "multiple_choice",
140
+ "acc": 0.8111111111111111,
141
+ "acc_norm": 0.8111111111111111
142
+ },
143
+ {
144
+ "name": "turkishmmlu",
145
+ "task": "multiple_choice",
146
+ "acc": 0.5966666666666667,
147
+ "acc_norm": 0.5966666666666667
148
+ },
149
+ {
150
+ "name": "xlsum_tr",
151
+ "task": "summarization",
152
+ "rouge1": 0.30917291128339996,
153
+ "rouge2": 0.14832485972104875,
154
+ "rougeL": 0.23785344498387093
155
+ },
156
+ {
157
+ "name": "wmt-tr-en-prompt",
158
+ "task": "machine_translation",
159
+ "wer": 1.7919998446671352,
160
+ "bleu": 0.07228422067800695
161
+ },
162
+ {
163
+ "name": "wiki_lingua_tr",
164
+ "task": "summarization",
165
+ "rouge1": 0.15988237371586872,
166
+ "rouge2": 0.0532048337283498,
167
+ "rougeL": 0.12305004914854117
168
+ },
169
+ {
170
+ "name": "tr-wikihow-summ",
171
+ "task": "summarization",
172
+ "rouge1": 0.2116496152990347,
173
+ "rouge2": 0.06436161936676124,
174
+ "rougeL": 0.15251131714516145
175
+ },
176
+ {
177
+ "name": "mlsum_tr",
178
+ "task": "summarization",
179
+ "rouge1": 0.3892482949562466,
180
+ "rouge2": 0.23951248403090875,
181
+ "rougeL": 0.31613997907641456
182
+ },
183
+ {
184
+ "name": "gecturk_generation",
185
+ "task": "grammatical_error_correction",
186
+ "exact_match": 0.1439164138860802
187
+ }
188
+
189
+ ]
190
+ }