Spaces:
Running
Running
Delete wrong upload
Browse files- zero-shot/Qwen3-1.7B-Base.json +0 -189
- zero-shot/Qwen3-1.7B.json +0 -240
- zero-shot/Qwen3-14B-Base.json +0 -189
- zero-shot/Qwen3-14B.json +0 -189
- zero-shot/Qwen3-8B-Base.json +0 -190
- zero-shot/Qwen3-8B.json +0 -190
zero-shot/Qwen3-1.7B-Base.json
DELETED
@@ -1,189 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"model": {
|
3 |
-
"model": "Qwen/Qwen3-1.7B-Base",
|
4 |
-
"architecture": "Qwen3ForCausalLM",
|
5 |
-
"dtype": "bfloat16",
|
6 |
-
"type": "pretrained",
|
7 |
-
"num_parameters": "1.7b",
|
8 |
-
"api": "hf"
|
9 |
-
},
|
10 |
-
"results": [
|
11 |
-
{
|
12 |
-
"name": "xquad_tr",
|
13 |
-
"task": "extractive_question_answering",
|
14 |
-
"exact_match": 0.14705882352941177,
|
15 |
-
"f1": 0.2870820139107337
|
16 |
-
},
|
17 |
-
{
|
18 |
-
"name": "xcopa_tr",
|
19 |
-
"task": "multiple_choice",
|
20 |
-
"acc": 0.576,
|
21 |
-
"acc_norm": 0.576
|
22 |
-
},
|
23 |
-
{
|
24 |
-
"name": "turkish_plu",
|
25 |
-
"task": "multiple_choice",
|
26 |
-
"acc": 0.4416,
|
27 |
-
"acc_norm": 0.38656
|
28 |
-
},
|
29 |
-
{
|
30 |
-
"name": "turkish_plu_goal_inference",
|
31 |
-
"task": "multiple_choice",
|
32 |
-
"acc": 0.3859020310633214,
|
33 |
-
"acc_norm": 0.25925925925925924
|
34 |
-
},
|
35 |
-
{
|
36 |
-
"name": "turkish_plu_next_event_prediction",
|
37 |
-
"task": "multiple_choice",
|
38 |
-
"acc": 0.40610687022900765,
|
39 |
-
"acc_norm": 0.26106870229007634
|
40 |
-
},
|
41 |
-
{
|
42 |
-
"name": "turkish_plu_step_inference",
|
43 |
-
"task": "multiple_choice",
|
44 |
-
"acc": 0.2973856209150327,
|
45 |
-
"acc_norm": 0.34477124183006536
|
46 |
-
},
|
47 |
-
{
|
48 |
-
"name": "turkish_plu_step_ordering",
|
49 |
-
"task": "multiple_choice",
|
50 |
-
"acc": 0.5964740450538688,
|
51 |
-
"acc_norm": 0.5964740450538688
|
52 |
-
},
|
53 |
-
{
|
54 |
-
"name": "turkce_atasozleri",
|
55 |
-
"task": "multiple_choice",
|
56 |
-
"acc": 0.576878612716763,
|
57 |
-
"acc_norm": 0.576878612716763
|
58 |
-
},
|
59 |
-
{
|
60 |
-
"name": "check_worthiness",
|
61 |
-
"task": "multiple_choice",
|
62 |
-
"acc": 0.37842778793418647,
|
63 |
-
"acc_norm": 0.6238574040219378
|
64 |
-
},
|
65 |
-
{
|
66 |
-
"name": "relevance_judgment",
|
67 |
-
"task": "multiple_choice",
|
68 |
-
"acc": 0.43327239488117003,
|
69 |
-
"acc_norm": 0.5781535648994516
|
70 |
-
},
|
71 |
-
{
|
72 |
-
"name": "tquad",
|
73 |
-
"task": "extractive_question_answering",
|
74 |
-
"exact_match": 0.09417040358744394,
|
75 |
-
"f1": 0.26322715675405095
|
76 |
-
},
|
77 |
-
{
|
78 |
-
"name": "sts_tr",
|
79 |
-
"task": "text_classification",
|
80 |
-
"acc": 0.15663524292965916,
|
81 |
-
"acc_norm": 0.11965192168237854
|
82 |
-
},
|
83 |
-
{
|
84 |
-
"name": "offenseval_tr",
|
85 |
-
"task": "text_classification",
|
86 |
-
"acc": 0.768140589569161,
|
87 |
-
"acc_norm": 0.7970521541950113
|
88 |
-
},
|
89 |
-
{
|
90 |
-
"name": "mnli_tr",
|
91 |
-
"task": "natural_language_inference",
|
92 |
-
"acc": 0.3478,
|
93 |
-
"acc_norm": 0.3213
|
94 |
-
},
|
95 |
-
{
|
96 |
-
"name": "snli_tr",
|
97 |
-
"task": "natural_language_inference",
|
98 |
-
"acc": 0.3343,
|
99 |
-
"acc_norm": 0.3237
|
100 |
-
},
|
101 |
-
{
|
102 |
-
"name": "xnli_tr",
|
103 |
-
"task": "natural_language_inference",
|
104 |
-
"acc": 0.3333333333333333,
|
105 |
-
"acc_norm": 0.3333333333333333
|
106 |
-
},
|
107 |
-
{
|
108 |
-
"name": "news_cat",
|
109 |
-
"task": "text_classification",
|
110 |
-
"acc": 0.628,
|
111 |
-
"acc_norm": 0.34
|
112 |
-
},
|
113 |
-
{
|
114 |
-
"name": "ironytr",
|
115 |
-
"task": "text_classification",
|
116 |
-
"acc": 0.55,
|
117 |
-
"acc_norm": 0.5
|
118 |
-
},
|
119 |
-
{
|
120 |
-
"name": "exams_tr",
|
121 |
-
"task": "multiple_choice",
|
122 |
-
"acc": 0.26717557251908397,
|
123 |
-
"acc_norm": 0.31297709923664124
|
124 |
-
},
|
125 |
-
{
|
126 |
-
"name": "circumflex_tr",
|
127 |
-
"task": "multiple_choice",
|
128 |
-
"acc": 0.6142857142857143,
|
129 |
-
"acc_norm": 0.6142857142857143
|
130 |
-
},
|
131 |
-
{
|
132 |
-
"name": "bilmecebench",
|
133 |
-
"task": "multiple_choice",
|
134 |
-
"acc": 0.33710407239819007,
|
135 |
-
"acc_norm": 0.33710407239819007
|
136 |
-
},
|
137 |
-
{
|
138 |
-
"name": "belebele_tr",
|
139 |
-
"task": "multiple_choice",
|
140 |
-
"acc": 0.5933333333333334,
|
141 |
-
"acc_norm": 0.5933333333333334
|
142 |
-
},
|
143 |
-
{
|
144 |
-
"name": "turkishmmlu",
|
145 |
-
"task": "multiple_choice",
|
146 |
-
"acc": 0.4,
|
147 |
-
"acc_norm": 0.4
|
148 |
-
},
|
149 |
-
{
|
150 |
-
"name": "xlsum_tr",
|
151 |
-
"task": "summarization",
|
152 |
-
"rouge1": 0.04781539873117025,
|
153 |
-
"rouge2": 0.01992023353880694,
|
154 |
-
"rougeL": 0.036905992546053354
|
155 |
-
},
|
156 |
-
{
|
157 |
-
"name": "wmt-tr-en-prompt",
|
158 |
-
"task": "machine_translation",
|
159 |
-
"wer": 1.122812394923734,
|
160 |
-
"bleu": 0.08632239330032426
|
161 |
-
},
|
162 |
-
{
|
163 |
-
"name": "wiki_lingua_tr",
|
164 |
-
"task": "summarization",
|
165 |
-
"rouge1": 0.1905933745444374,
|
166 |
-
"rouge2": 0.05729029075401719,
|
167 |
-
"rougeL": 0.1389441266800664
|
168 |
-
},
|
169 |
-
{
|
170 |
-
"name": "tr-wikihow-summ",
|
171 |
-
"task": "summarization",
|
172 |
-
"rouge1": 0.1573146712521122,
|
173 |
-
"rouge2": 0.04753989405427132,
|
174 |
-
"rougeL": 0.11305186768825831
|
175 |
-
},
|
176 |
-
{
|
177 |
-
"name": "mlsum_tr",
|
178 |
-
"task": "summarization",
|
179 |
-
"rouge1": 0.18820054046451806,
|
180 |
-
"rouge2": 0.11945259097094729,
|
181 |
-
"rougeL": 0.15322863222214073
|
182 |
-
},
|
183 |
-
{
|
184 |
-
"name": "gecturk_generation",
|
185 |
-
"task": "grammatical_error_correction",
|
186 |
-
"exact_match": 0.13134960758823247
|
187 |
-
}
|
188 |
-
]
|
189 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
zero-shot/Qwen3-1.7B.json
DELETED
@@ -1,240 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"model": {
|
3 |
-
"model": "Qwen/Qwen3-1.7B",
|
4 |
-
"architecture": "Qwen3ForCausalLM",
|
5 |
-
"dtype": "bfloat16",
|
6 |
-
"type": "instruction-tuned",
|
7 |
-
"num_parameters": "1.7b",
|
8 |
-
"api": "hf"
|
9 |
-
},
|
10 |
-
"results": [
|
11 |
-
{
|
12 |
-
"name": "xquad_tr",
|
13 |
-
"task": "extractive_question_answering",
|
14 |
-
"exact_match": 0.40252100840336136,
|
15 |
-
"f1": 0.6176467678580342
|
16 |
-
},
|
17 |
-
{
|
18 |
-
"name": "xcopa_tr",
|
19 |
-
"task": "multiple_choice",
|
20 |
-
"acc": 0.646,
|
21 |
-
"acc_norm": 0.646
|
22 |
-
},
|
23 |
-
{
|
24 |
-
"name": "turkish_plu",
|
25 |
-
"task": "multiple_choice",
|
26 |
-
"acc": 0.48736,
|
27 |
-
"acc_norm": 0.5392
|
28 |
-
},
|
29 |
-
{
|
30 |
-
"name": "turkish_plu_goal_inference",
|
31 |
-
"task": "multiple_choice",
|
32 |
-
"acc": 0.4133811230585424,
|
33 |
-
"acc_norm": 0.4324970131421744
|
34 |
-
},
|
35 |
-
{
|
36 |
-
"name": "turkish_plu_next_event_prediction",
|
37 |
-
"task": "multiple_choice",
|
38 |
-
"acc": 0.4870229007633588,
|
39 |
-
"acc_norm": 0.5816793893129771
|
40 |
-
},
|
41 |
-
{
|
42 |
-
"name": "turkish_plu_step_inference",
|
43 |
-
"task": "multiple_choice",
|
44 |
-
"acc": 0.35294117647058826,
|
45 |
-
"acc_norm": 0.49019607843137253
|
46 |
-
},
|
47 |
-
{
|
48 |
-
"name": "turkish_plu_step_ordering",
|
49 |
-
"task": "multiple_choice",
|
50 |
-
"acc": 0.6287952987267384,
|
51 |
-
"acc_norm": 0.6287952987267384
|
52 |
-
},
|
53 |
-
{
|
54 |
-
"name": "check_worthiness",
|
55 |
-
"task": "multiple_choice",
|
56 |
-
"acc": 0.37614259597806216,
|
57 |
-
"acc_norm": 0.3756855575868373
|
58 |
-
},
|
59 |
-
{
|
60 |
-
"name": "relevance_judgment",
|
61 |
-
"task": "multiple_choice",
|
62 |
-
"acc": 0.4506398537477148,
|
63 |
-
"acc_norm": 0.5708409506398537
|
64 |
-
},
|
65 |
-
{
|
66 |
-
"name": "tquad",
|
67 |
-
"task": "extractive_question_answering",
|
68 |
-
"exact_match": 0.34753363228699546,
|
69 |
-
"f1": 0.614345609122
|
70 |
-
},
|
71 |
-
{
|
72 |
-
"name": "sts_tr",
|
73 |
-
"task": "text_classification",
|
74 |
-
"acc": 0.2037708484408992,
|
75 |
-
"acc_norm": 0.2610587382160986
|
76 |
-
},
|
77 |
-
{
|
78 |
-
"name": "offenseval_tr",
|
79 |
-
"task": "text_classification",
|
80 |
-
"acc": 0.22023809523809523,
|
81 |
-
"acc_norm": 0.2962018140589569
|
82 |
-
},
|
83 |
-
{
|
84 |
-
"name": "mnli_tr",
|
85 |
-
"task": "natural_language_inference",
|
86 |
-
"acc": 0.3482,
|
87 |
-
"acc_norm": 0.3389
|
88 |
-
},
|
89 |
-
{
|
90 |
-
"name": "snli_tr",
|
91 |
-
"task": "natural_language_inference",
|
92 |
-
"acc": 0.3373,
|
93 |
-
"acc_norm": 0.3271
|
94 |
-
},
|
95 |
-
{
|
96 |
-
"name": "xnli_tr",
|
97 |
-
"task": "natural_language_inference",
|
98 |
-
"acc": 0.3335329341317365,
|
99 |
-
"acc_norm": 0.33532934131736525
|
100 |
-
},
|
101 |
-
{
|
102 |
-
"name": "news_cat",
|
103 |
-
"task": "text_classification",
|
104 |
-
"acc": 0.524,
|
105 |
-
"acc_norm": 0.348
|
106 |
-
},
|
107 |
-
{
|
108 |
-
"name": "mkqa_tr",
|
109 |
-
"task": "extractive_question_answering",
|
110 |
-
"exact_match": 0.050014797277300974,
|
111 |
-
"f1": 0.11195620922043903
|
112 |
-
},
|
113 |
-
{
|
114 |
-
"name": "ironytr",
|
115 |
-
"task": "text_classification",
|
116 |
-
"acc": 0.5616666666666666,
|
117 |
-
"acc_norm": 0.6183333333333333
|
118 |
-
},
|
119 |
-
{
|
120 |
-
"name": "exams_tr",
|
121 |
-
"task": "multiple_choice",
|
122 |
-
"acc": 0.33078880407124683,
|
123 |
-
"acc_norm": 0.35877862595419846
|
124 |
-
},
|
125 |
-
{
|
126 |
-
"name": "belebele_tr",
|
127 |
-
"task": "multiple_choice",
|
128 |
-
"acc": 0.8122222222222222,
|
129 |
-
"acc_norm": 0.8122222222222222
|
130 |
-
},
|
131 |
-
{
|
132 |
-
"name": "xlsum_tr",
|
133 |
-
"task": "summarization",
|
134 |
-
"rouge1": 0.2866278776668776,
|
135 |
-
"rouge2": 0.1308383753682692,
|
136 |
-
"rougeL": 0.22217070278595147
|
137 |
-
},
|
138 |
-
{
|
139 |
-
"name": "wmt-tr-en-prompt",
|
140 |
-
"task": "machine_translation",
|
141 |
-
"wer": 1.6546507240124098,
|
142 |
-
"bleu": 0.08096461200991427
|
143 |
-
},
|
144 |
-
{
|
145 |
-
"name": "wiki_lingua_tr",
|
146 |
-
"task": "summarization",
|
147 |
-
"rouge1": 0.20802332507327073,
|
148 |
-
"rouge2": 0.06755910819968403,
|
149 |
-
"rougeL": 0.15425156655216665
|
150 |
-
},
|
151 |
-
{
|
152 |
-
"name": "tr-wikihow-summ",
|
153 |
-
"task": "summarization",
|
154 |
-
"rouge1": 0.22012543165161014,
|
155 |
-
"rouge2": 0.06567086903148794,
|
156 |
-
"rougeL": 0.15604855476586732
|
157 |
-
},
|
158 |
-
{
|
159 |
-
"name": "mlsum_tr",
|
160 |
-
"task": "summarization",
|
161 |
-
"rouge1": 0.3928051448993858,
|
162 |
-
"rouge2": 0.25674608200884674,
|
163 |
-
"rougeL": 0.3276023476233169
|
164 |
-
},
|
165 |
-
{
|
166 |
-
"name": "gecturk_generation",
|
167 |
-
"task": "grammatical_error_correction",
|
168 |
-
"exact_match": 0.1888
|
169 |
-
},
|
170 |
-
{
|
171 |
-
"name": "turkce_atasozleri",
|
172 |
-
"task": "multiple_choice",
|
173 |
-
"acc": 0.753757225433526,
|
174 |
-
"acc_norm": 0.753757225433526
|
175 |
-
},
|
176 |
-
{
|
177 |
-
"name": "turkishmmlu",
|
178 |
-
"task": "multiple_choice",
|
179 |
-
"acc": 0.5622222222222222,
|
180 |
-
"acc_norm": 0.5622222222222222
|
181 |
-
},
|
182 |
-
{
|
183 |
-
"name": "bilmecebench",
|
184 |
-
"task": "multiple_choice",
|
185 |
-
"acc": 0.4751131221719457,
|
186 |
-
"acc_norm": 0.4751131221719457
|
187 |
-
},
|
188 |
-
{
|
189 |
-
"name": "circumflex_tr",
|
190 |
-
"task": "multiple_choice",
|
191 |
-
"acc": 0.5857142857142857,
|
192 |
-
"acc_norm": 0.5857142857142857
|
193 |
-
},
|
194 |
-
{
|
195 |
-
"name": "turkishmmlu",
|
196 |
-
"task": "multiple_choice",
|
197 |
-
"acc": 0.38222222222222224,
|
198 |
-
"acc_norm": 0.38222222222222224
|
199 |
-
},
|
200 |
-
{
|
201 |
-
"name": "xlsum_tr",
|
202 |
-
"task": "summarization",
|
203 |
-
"rouge1": 0.27820377739060753,
|
204 |
-
"rouge2": 0.12397426441534459,
|
205 |
-
"rougeL": 0.2141557453953809
|
206 |
-
},
|
207 |
-
{
|
208 |
-
"name": "wmt-tr-en-prompt",
|
209 |
-
"task": "machine_translation",
|
210 |
-
"wer": 1.4553342171690737,
|
211 |
-
"bleu": 0.05579047680001111
|
212 |
-
},
|
213 |
-
{
|
214 |
-
"name": "wiki_lingua_tr",
|
215 |
-
"task": "summarization",
|
216 |
-
"rouge1": 0.18975012020130533,
|
217 |
-
"rouge2": 0.056217857712315425,
|
218 |
-
"rougeL": 0.14164939611392058
|
219 |
-
},
|
220 |
-
{
|
221 |
-
"name": "tr-wikihow-summ",
|
222 |
-
"task": "summarization",
|
223 |
-
"rouge1": 0.20337048755281356,
|
224 |
-
"rouge2": 0.05824591585148973,
|
225 |
-
"rougeL": 0.14892270355472415
|
226 |
-
},
|
227 |
-
{
|
228 |
-
"name": "mlsum_tr",
|
229 |
-
"task": "summarization",
|
230 |
-
"rouge1": 0.3633354972284383,
|
231 |
-
"rouge2": 0.22049361975220266,
|
232 |
-
"rougeL": 0.29386670924548397
|
233 |
-
},
|
234 |
-
{
|
235 |
-
"name": "gecturk_generation",
|
236 |
-
"task": "grammatical_error_correction",
|
237 |
-
"exact_match": 0.3063700707785642
|
238 |
-
}
|
239 |
-
]
|
240 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
zero-shot/Qwen3-14B-Base.json
DELETED
@@ -1,189 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"model": {
|
3 |
-
"model": "Qwen/Qwen3-14B",
|
4 |
-
"architecture": "Qwen3ForCausalLM",
|
5 |
-
"dtype": "bfloat16",
|
6 |
-
"type": "pretrained",
|
7 |
-
"num_parameters": "14b",
|
8 |
-
"api": "hf"
|
9 |
-
},
|
10 |
-
"results": [
|
11 |
-
{
|
12 |
-
"name": "xquad_tr",
|
13 |
-
"task": "extractive_question_answering",
|
14 |
-
"exact_match": 0.3512605042016807,
|
15 |
-
"f1": 0.5497491980200822
|
16 |
-
},
|
17 |
-
{
|
18 |
-
"name": "xcopa_tr",
|
19 |
-
"task": "multiple_choice",
|
20 |
-
"acc": 0.676,
|
21 |
-
"acc_norm": 0.676
|
22 |
-
},
|
23 |
-
{
|
24 |
-
"name": "turkish_plu",
|
25 |
-
"task": "multiple_choice",
|
26 |
-
"acc": 0.53248,
|
27 |
-
"acc_norm": 0.44448
|
28 |
-
},
|
29 |
-
{
|
30 |
-
"name": "turkish_plu_goal_inference",
|
31 |
-
"task": "multiple_choice",
|
32 |
-
"acc": 0.4432497013142174,
|
33 |
-
"acc_norm": 0.3010752688172043
|
34 |
-
},
|
35 |
-
{
|
36 |
-
"name": "turkish_plu_next_event_prediction",
|
37 |
-
"task": "multiple_choice",
|
38 |
-
"acc": 0.5145038167938931,
|
39 |
-
"acc_norm": 0.2870229007633588
|
40 |
-
},
|
41 |
-
{
|
42 |
-
"name": "turkish_plu_step_inference",
|
43 |
-
"task": "multiple_choice",
|
44 |
-
"acc": 0.40032679738562094,
|
45 |
-
"acc_norm": 0.3888888888888889
|
46 |
-
},
|
47 |
-
{
|
48 |
-
"name": "turkish_plu_step_ordering",
|
49 |
-
"task": "multiple_choice",
|
50 |
-
"acc": 0.6963761018609207,
|
51 |
-
"acc_norm": 0.6963761018609207
|
52 |
-
},
|
53 |
-
{
|
54 |
-
"name": "turkce_atasozleri",
|
55 |
-
"task": "multiple_choice",
|
56 |
-
"acc": 0.8283236994219653,
|
57 |
-
"acc_norm": 0.8283236994219653
|
58 |
-
},
|
59 |
-
{
|
60 |
-
"name": "check_worthiness",
|
61 |
-
"task": "multiple_choice",
|
62 |
-
"acc": 0.37614259597806216,
|
63 |
-
"acc_norm": 0.6238574040219378
|
64 |
-
},
|
65 |
-
{
|
66 |
-
"name": "relevance_judgment",
|
67 |
-
"task": "multiple_choice",
|
68 |
-
"acc": 0.42047531992687387,
|
69 |
-
"acc_norm": 0.5781535648994516
|
70 |
-
},
|
71 |
-
{
|
72 |
-
"name": "tquad",
|
73 |
-
"task": "extractive_question_answering",
|
74 |
-
"exact_match": 0.20179372197309417,
|
75 |
-
"f1": 0.41015757440015116
|
76 |
-
},
|
77 |
-
{
|
78 |
-
"name": "sts_tr",
|
79 |
-
"task": "text_classification",
|
80 |
-
"acc": 0.18491660623640319,
|
81 |
-
"acc_norm": 0.17113850616388687
|
82 |
-
},
|
83 |
-
{
|
84 |
-
"name": "offenseval_tr",
|
85 |
-
"task": "text_classification",
|
86 |
-
"acc": 0.7650226757369615,
|
87 |
-
"acc_norm": 0.7970521541950113
|
88 |
-
},
|
89 |
-
{
|
90 |
-
"name": "mnli_tr",
|
91 |
-
"task": "natural_language_inference",
|
92 |
-
"acc": 0.2776,
|
93 |
-
"acc_norm": 0.3213
|
94 |
-
},
|
95 |
-
{
|
96 |
-
"name": "snli_tr",
|
97 |
-
"task": "natural_language_inference",
|
98 |
-
"acc": 0.2946,
|
99 |
-
"acc_norm": 0.3237
|
100 |
-
},
|
101 |
-
{
|
102 |
-
"name": "xnli_tr",
|
103 |
-
"task": "natural_language_inference",
|
104 |
-
"acc": 0.26387225548902193,
|
105 |
-
"acc_norm": 0.3333333333333333
|
106 |
-
},
|
107 |
-
{
|
108 |
-
"name": "news_cat",
|
109 |
-
"task": "text_classification",
|
110 |
-
"acc": 0.696,
|
111 |
-
"acc_norm": 0.344
|
112 |
-
},
|
113 |
-
{
|
114 |
-
"name": "ironytr",
|
115 |
-
"task": "text_classification",
|
116 |
-
"acc": 0.735,
|
117 |
-
"acc_norm": 0.5
|
118 |
-
},
|
119 |
-
{
|
120 |
-
"name": "exams_tr",
|
121 |
-
"task": "multiple_choice",
|
122 |
-
"acc": 0.35877862595419846,
|
123 |
-
"acc_norm": 0.3333333333333333
|
124 |
-
},
|
125 |
-
{
|
126 |
-
"name": "circumflex_tr",
|
127 |
-
"task": "multiple_choice",
|
128 |
-
"acc": 0.5428571428571428,
|
129 |
-
"acc_norm": 0.5428571428571428
|
130 |
-
},
|
131 |
-
{
|
132 |
-
"name": "bilmecebench",
|
133 |
-
"task": "multiple_choice",
|
134 |
-
"acc": 0.6470588235294118,
|
135 |
-
"acc_norm": 0.6470588235294118
|
136 |
-
},
|
137 |
-
{
|
138 |
-
"name": "belebele_tr",
|
139 |
-
"task": "multiple_choice",
|
140 |
-
"acc": 0.8388888888888889,
|
141 |
-
"acc_norm": 0.8388888888888889
|
142 |
-
},
|
143 |
-
{
|
144 |
-
"name": "turkishmmlu",
|
145 |
-
"task": "multiple_choice",
|
146 |
-
"acc": 0.6777777777777778,
|
147 |
-
"acc_norm": 0.6777777777777778
|
148 |
-
},
|
149 |
-
{
|
150 |
-
"name": "xlsum_tr",
|
151 |
-
"task": "summarization",
|
152 |
-
"rouge1": 0.11473487738310617,
|
153 |
-
"rouge2": 0.060121517882459646,
|
154 |
-
"rougeL": 0.09836107933415753
|
155 |
-
},
|
156 |
-
{
|
157 |
-
"name": "wmt-tr-en-prompt",
|
158 |
-
"task": "machine_translation",
|
159 |
-
"wer": 1.1492106652129537,
|
160 |
-
"bleu": 0.11818272894053798
|
161 |
-
},
|
162 |
-
{
|
163 |
-
"name": "wiki_lingua_tr",
|
164 |
-
"task": "summarization",
|
165 |
-
"rouge1": 0.13000651376079725,
|
166 |
-
"rouge2": 0.03942796488947396,
|
167 |
-
"rougeL": 0.09588108114897731
|
168 |
-
},
|
169 |
-
{
|
170 |
-
"name": "tr-wikihow-summ",
|
171 |
-
"task": "summarization",
|
172 |
-
"rouge1": 0.042343650286353535,
|
173 |
-
"rouge2": 0.012407404121300508,
|
174 |
-
"rougeL": 0.03134101717443596
|
175 |
-
},
|
176 |
-
{
|
177 |
-
"name": "mlsum_tr",
|
178 |
-
"task": "summarization",
|
179 |
-
"rouge1": 0.282634405530366,
|
180 |
-
"rouge2": 0.18883921790708064,
|
181 |
-
"rougeL": 0.2382731729763113
|
182 |
-
},
|
183 |
-
{
|
184 |
-
"name": "gecturk_generation",
|
185 |
-
"task": "grammatical_error_correction",
|
186 |
-
"exact_match": 0.32360729934036303
|
187 |
-
}
|
188 |
-
]
|
189 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
zero-shot/Qwen3-14B.json
DELETED
@@ -1,189 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"model": {
|
3 |
-
"model": "Qwen/Qwen3-14B",
|
4 |
-
"architecture": "Qwen3ForCausalLM",
|
5 |
-
"dtype": "bfloat16",
|
6 |
-
"type": "instruction-tuned",
|
7 |
-
"num_parameters": "14b",
|
8 |
-
"api": "hf"
|
9 |
-
},
|
10 |
-
"results": [
|
11 |
-
{
|
12 |
-
"name": "xquad_tr",
|
13 |
-
"task": "extractive_question_answering",
|
14 |
-
"exact_match": 0.17394957983193277,
|
15 |
-
"f1": 0.2667005758476218
|
16 |
-
},
|
17 |
-
{
|
18 |
-
"name": "xcopa_tr",
|
19 |
-
"task": "multiple_choice",
|
20 |
-
"acc": 0.674,
|
21 |
-
"acc_norm": 0.674
|
22 |
-
},
|
23 |
-
{
|
24 |
-
"name": "turkish_plu",
|
25 |
-
"task": "multiple_choice",
|
26 |
-
"acc": 0.4944,
|
27 |
-
"acc_norm": 0.42432
|
28 |
-
},
|
29 |
-
{
|
30 |
-
"name": "turkish_plu_goal_inference",
|
31 |
-
"task": "multiple_choice",
|
32 |
-
"acc": 0.39904420549581837,
|
33 |
-
"acc_norm": 0.2867383512544803
|
34 |
-
},
|
35 |
-
{
|
36 |
-
"name": "turkish_plu_next_event_prediction",
|
37 |
-
"task": "multiple_choice",
|
38 |
-
"acc": 0.5038167938931297,
|
39 |
-
"acc_norm": 0.28549618320610687
|
40 |
-
},
|
41 |
-
{
|
42 |
-
"name": "turkish_plu_step_inference",
|
43 |
-
"task": "multiple_choice",
|
44 |
-
"acc": 0.3562091503267974,
|
45 |
-
"acc_norm": 0.38562091503267976
|
46 |
-
},
|
47 |
-
{
|
48 |
-
"name": "turkish_plu_step_ordering",
|
49 |
-
"task": "multiple_choice",
|
50 |
-
"acc": 0.6493633692458374,
|
51 |
-
"acc_norm": 0.6493633692458374
|
52 |
-
},
|
53 |
-
{
|
54 |
-
"name": "turkce_atasozleri",
|
55 |
-
"task": "multiple_choice",
|
56 |
-
"acc": 0.8127167630057803,
|
57 |
-
"acc_norm": 0.8127167630057803
|
58 |
-
},
|
59 |
-
{
|
60 |
-
"name": "check_worthiness",
|
61 |
-
"task": "multiple_choice",
|
62 |
-
"acc": 0.38619744058500916,
|
63 |
-
"acc_norm": 0.6238574040219378
|
64 |
-
},
|
65 |
-
{
|
66 |
-
"name": "relevance_judgment",
|
67 |
-
"task": "multiple_choice",
|
68 |
-
"acc": 0.4218464351005484,
|
69 |
-
"acc_norm": 0.5781535648994516
|
70 |
-
},
|
71 |
-
{
|
72 |
-
"name": "tquad",
|
73 |
-
"task": "extractive_question_answering",
|
74 |
-
"exact_match": 0.1547085201793722,
|
75 |
-
"f1": 0.31146380255216244
|
76 |
-
},
|
77 |
-
{
|
78 |
-
"name": "sts_tr",
|
79 |
-
"task": "text_classification",
|
80 |
-
"acc": 0.1319796954314721,
|
81 |
-
"acc_norm": 0.116751269035533
|
82 |
-
},
|
83 |
-
{
|
84 |
-
"name": "offenseval_tr",
|
85 |
-
"task": "text_classification",
|
86 |
-
"acc": 0.7644557823129252,
|
87 |
-
"acc_norm": 0.7970521541950113
|
88 |
-
},
|
89 |
-
{
|
90 |
-
"name": "mnli_tr",
|
91 |
-
"task": "natural_language_inference",
|
92 |
-
"acc": 0.3356,
|
93 |
-
"acc_norm": 0.3213
|
94 |
-
},
|
95 |
-
{
|
96 |
-
"name": "snli_tr",
|
97 |
-
"task": "natural_language_inference",
|
98 |
-
"acc": 0.3044,
|
99 |
-
"acc_norm": 0.3237
|
100 |
-
},
|
101 |
-
{
|
102 |
-
"name": "xnli_tr",
|
103 |
-
"task": "natural_language_inference",
|
104 |
-
"acc": 0.3251497005988024,
|
105 |
-
"acc_norm": 0.3333333333333333
|
106 |
-
},
|
107 |
-
{
|
108 |
-
"name": "news_cat",
|
109 |
-
"task": "text_classification",
|
110 |
-
"acc": 0.424,
|
111 |
-
"acc_norm": 0.232
|
112 |
-
},
|
113 |
-
{
|
114 |
-
"name": "ironytr",
|
115 |
-
"task": "text_classification",
|
116 |
-
"acc": 0.7,
|
117 |
-
"acc_norm": 0.5
|
118 |
-
},
|
119 |
-
{
|
120 |
-
"name": "exams_tr",
|
121 |
-
"task": "multiple_choice",
|
122 |
-
"acc": 0.33587786259541985,
|
123 |
-
"acc_norm": 0.3384223918575064
|
124 |
-
},
|
125 |
-
{
|
126 |
-
"name": "circumflex_tr",
|
127 |
-
"task": "multiple_choice",
|
128 |
-
"acc": 0.5714285714285714,
|
129 |
-
"acc_norm": 0.5714285714285714
|
130 |
-
},
|
131 |
-
{
|
132 |
-
"name": "bilmecebench",
|
133 |
-
"task": "multiple_choice",
|
134 |
-
"acc": 0.5769230769230769,
|
135 |
-
"acc_norm": 0.5769230769230769
|
136 |
-
},
|
137 |
-
{
|
138 |
-
"name": "belebele_tr",
|
139 |
-
"task": "multiple_choice",
|
140 |
-
"acc": 0.8355555555555556,
|
141 |
-
"acc_norm": 0.8355555555555556
|
142 |
-
},
|
143 |
-
{
|
144 |
-
"name": "turkishmmlu",
|
145 |
-
"task": "multiple_choice",
|
146 |
-
"acc": 0.6355555555555555,
|
147 |
-
"acc_norm": 0.6355555555555555
|
148 |
-
},
|
149 |
-
{
|
150 |
-
"name": "xlsum_tr",
|
151 |
-
"task": "summarization",
|
152 |
-
"rouge1": 0.2763159346655365,
|
153 |
-
"rouge2": 0.13580436770127194,
|
154 |
-
"rougeL": 0.2140432080171992
|
155 |
-
},
|
156 |
-
{
|
157 |
-
"name": "wmt-tr-en-prompt",
|
158 |
-
"task": "machine_translation",
|
159 |
-
"wer": 0.8811716956156485,
|
160 |
-
"bleu": 0.08106810125402132
|
161 |
-
},
|
162 |
-
{
|
163 |
-
"name": "wiki_lingua_tr",
|
164 |
-
"task": "summarization",
|
165 |
-
"rouge1": 0.20105611846416252,
|
166 |
-
"rouge2": 0.07453258043188488,
|
167 |
-
"rougeL": 0.15681666038287218
|
168 |
-
},
|
169 |
-
{
|
170 |
-
"name": "tr-wikihow-summ",
|
171 |
-
"task": "summarization",
|
172 |
-
"rouge1": 0.16377028131791377,
|
173 |
-
"rouge2": 0.04758033115002652,
|
174 |
-
"rougeL": 0.1184993075488721
|
175 |
-
},
|
176 |
-
{
|
177 |
-
"name": "mlsum_tr",
|
178 |
-
"task": "summarization",
|
179 |
-
"rouge1": 0.3844389772184301,
|
180 |
-
"rouge2": 0.23720643221445104,
|
181 |
-
"rougeL": 0.3119516170965996
|
182 |
-
},
|
183 |
-
{
|
184 |
-
"name": "gecturk_generation",
|
185 |
-
"task": "grammatical_error_correction",
|
186 |
-
"exact_match": 0.05931917762049208
|
187 |
-
}
|
188 |
-
]
|
189 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
zero-shot/Qwen3-8B-Base.json
DELETED
@@ -1,190 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"model": {
|
3 |
-
"model": "Qwen/Qwen3-8B-Base",
|
4 |
-
"architecture": "Qwen3ForCausalLM",
|
5 |
-
"dtype": "bfloat16",
|
6 |
-
"type": "pretrained",
|
7 |
-
"num_parameters": "8b",
|
8 |
-
"api": "hf"
|
9 |
-
},
|
10 |
-
"results": [
|
11 |
-
{
|
12 |
-
"name": "xquad_tr",
|
13 |
-
"task": "extractive_question_answering",
|
14 |
-
"exact_match": 0.3050420168067227,
|
15 |
-
"f1": 0.49570394447625915
|
16 |
-
},
|
17 |
-
{
|
18 |
-
"name": "xcopa_tr",
|
19 |
-
"task": "multiple_choice",
|
20 |
-
"acc": 0.656,
|
21 |
-
"acc_norm": 0.656
|
22 |
-
},
|
23 |
-
{
|
24 |
-
"name": "turkish_plu",
|
25 |
-
"task": "multiple_choice",
|
26 |
-
"acc": 0.504,
|
27 |
-
"acc_norm": 0.42496
|
28 |
-
},
|
29 |
-
{
|
30 |
-
"name": "turkish_plu_goal_inference",
|
31 |
-
"task": "multiple_choice",
|
32 |
-
"acc": 0.43847072879330945,
|
33 |
-
"acc_norm": 0.28793309438470727
|
34 |
-
},
|
35 |
-
{
|
36 |
-
"name": "turkish_plu_next_event_prediction",
|
37 |
-
"task": "multiple_choice",
|
38 |
-
"acc": 0.48396946564885496,
|
39 |
-
"acc_norm": 0.28396946564885495
|
40 |
-
},
|
41 |
-
{
|
42 |
-
"name": "turkish_plu_step_inference",
|
43 |
-
"task": "multiple_choice",
|
44 |
-
"acc": 0.369281045751634,
|
45 |
-
"acc_norm": 0.38562091503267976
|
46 |
-
},
|
47 |
-
{
|
48 |
-
"name": "turkish_plu_step_ordering",
|
49 |
-
"task": "multiple_choice",
|
50 |
-
"acc": 0.6513222331047992,
|
51 |
-
"acc_norm": 0.6513222331047992
|
52 |
-
},
|
53 |
-
{
|
54 |
-
"name": "turkce_atasozleri",
|
55 |
-
"task": "multiple_choice",
|
56 |
-
"acc": 0.7728323699421965,
|
57 |
-
"acc_norm": 0.7728323699421965
|
58 |
-
},
|
59 |
-
{
|
60 |
-
"name": "check_worthiness",
|
61 |
-
"task": "multiple_choice",
|
62 |
-
"acc": 0.37614259597806216,
|
63 |
-
"acc_norm": 0.6238574040219378
|
64 |
-
},
|
65 |
-
{
|
66 |
-
"name": "relevance_judgment",
|
67 |
-
"task": "multiple_choice",
|
68 |
-
"acc": 0.4218464351005484,
|
69 |
-
"acc_norm": 0.5781535648994516
|
70 |
-
},
|
71 |
-
{
|
72 |
-
"name": "tquad",
|
73 |
-
"task": "extractive_question_answering",
|
74 |
-
"exact_match": 0.15134529147982062,
|
75 |
-
"f1": 0.3212345707810023
|
76 |
-
},
|
77 |
-
{
|
78 |
-
"name": "sts_tr",
|
79 |
-
"task": "text_classification",
|
80 |
-
"acc": 0.21174764321972445,
|
81 |
-
"acc_norm": 0.1312545322697607
|
82 |
-
},
|
83 |
-
{
|
84 |
-
"name": "offenseval_tr",
|
85 |
-
"task": "text_classification",
|
86 |
-
"acc": 0.8061224489795918,
|
87 |
-
"acc_norm": 0.7970521541950113
|
88 |
-
},
|
89 |
-
{
|
90 |
-
"name": "mnli_tr",
|
91 |
-
"task": "natural_language_inference",
|
92 |
-
"acc": 0.3458,
|
93 |
-
"acc_norm": 0.3213
|
94 |
-
},
|
95 |
-
{
|
96 |
-
"name": "snli_tr",
|
97 |
-
"task": "natural_language_inference",
|
98 |
-
"acc": 0.2771,
|
99 |
-
"acc_norm": 0.3237
|
100 |
-
},
|
101 |
-
{
|
102 |
-
"name": "xnli_tr",
|
103 |
-
"task": "natural_language_inference",
|
104 |
-
"acc": 0.33552894211576845,
|
105 |
-
"acc_norm": 0.3333333333333333
|
106 |
-
},
|
107 |
-
{
|
108 |
-
"name": "news_cat",
|
109 |
-
"task": "text_classification",
|
110 |
-
"acc": 0.64,
|
111 |
-
"acc_norm": 0.312
|
112 |
-
},
|
113 |
-
{
|
114 |
-
"name": "ironytr",
|
115 |
-
"task": "text_classification",
|
116 |
-
"acc": 0.5066666666666667,
|
117 |
-
"acc_norm": 0.5
|
118 |
-
},
|
119 |
-
{
|
120 |
-
"name": "exams_tr",
|
121 |
-
"task": "multiple_choice",
|
122 |
-
"acc": 0.3511450381679389,
|
123 |
-
"acc_norm": 0.3384223918575064
|
124 |
-
},
|
125 |
-
{
|
126 |
-
"name": "circumflex_tr",
|
127 |
-
"task": "multiple_choice",
|
128 |
-
"acc": 0.5714285714285714,
|
129 |
-
"acc_norm": 0.5714285714285714
|
130 |
-
},
|
131 |
-
{
|
132 |
-
"name": "bilmecebench",
|
133 |
-
"task": "multiple_choice",
|
134 |
-
"acc": 0.5723981900452488,
|
135 |
-
"acc_norm": 0.5723981900452488
|
136 |
-
},
|
137 |
-
{
|
138 |
-
"name": "belebele_tr",
|
139 |
-
"task": "multiple_choice",
|
140 |
-
"acc": 0.8044444444444444,
|
141 |
-
"acc_norm": 0.8044444444444444
|
142 |
-
},
|
143 |
-
{
|
144 |
-
"name": "turkishmmlu",
|
145 |
-
"task": "multiple_choice",
|
146 |
-
"acc": 0.6433333333333333,
|
147 |
-
"acc_norm": 0.6433333333333333
|
148 |
-
},
|
149 |
-
{
|
150 |
-
"name": "xlsum_tr",
|
151 |
-
"task": "summarization",
|
152 |
-
"rouge1": 0.21850802086584964,
|
153 |
-
"rouge2": 0.09972850301278577,
|
154 |
-
"rougeL": 0.17530849751123678
|
155 |
-
},
|
156 |
-
{
|
157 |
-
"name": "wmt-tr-en-prompt",
|
158 |
-
"task": "machine_translation",
|
159 |
-
"wer": 2.085601196490251,
|
160 |
-
"bleu": 0.0867760476179405
|
161 |
-
},
|
162 |
-
{
|
163 |
-
"name": "wiki_lingua_tr",
|
164 |
-
"task": "summarization",
|
165 |
-
"rouge1": 0.1790179356445129,
|
166 |
-
"rouge2": 0.05536224163778491,
|
167 |
-
"rougeL": 0.13228141339234906
|
168 |
-
},
|
169 |
-
{
|
170 |
-
"name": "tr-wikihow-summ",
|
171 |
-
"task": "summarization",
|
172 |
-
"rouge1": 0.085160471573291,
|
173 |
-
"rouge2": 0.024521598751734066,
|
174 |
-
"rougeL": 0.061909395020943954
|
175 |
-
},
|
176 |
-
{
|
177 |
-
"name": "mlsum_tr",
|
178 |
-
"task": "summarization",
|
179 |
-
"rouge1": 0.36692022983430356,
|
180 |
-
"rouge2": 0.24462313366233399,
|
181 |
-
"rougeL": 0.30870270053456245
|
182 |
-
},
|
183 |
-
{
|
184 |
-
"name": "gecturk_generation",
|
185 |
-
"task": "grammatical_error_correction",
|
186 |
-
"exact_match": 0.3391111753093553
|
187 |
-
}
|
188 |
-
|
189 |
-
]
|
190 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
zero-shot/Qwen3-8B.json
DELETED
@@ -1,190 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"model": {
|
3 |
-
"model": "Qwen/Qwen3-8B",
|
4 |
-
"architecture": "Qwen3ForCausalLM",
|
5 |
-
"dtype": "bfloat16",
|
6 |
-
"type": "instruction-tuned",
|
7 |
-
"num_parameters": "8b",
|
8 |
-
"api": "hf"
|
9 |
-
},
|
10 |
-
"results": [
|
11 |
-
{
|
12 |
-
"name": "xquad_tr",
|
13 |
-
"task": "extractive_question_answering",
|
14 |
-
"exact_match": 0.27058823529411763,
|
15 |
-
"f1": 0.44020531602192525
|
16 |
-
},
|
17 |
-
{
|
18 |
-
"name": "xcopa_tr",
|
19 |
-
"task": "multiple_choice",
|
20 |
-
"acc": 0.656,
|
21 |
-
"acc_norm": 0.656
|
22 |
-
},
|
23 |
-
{
|
24 |
-
"name": "turkish_plu",
|
25 |
-
"task": "multiple_choice",
|
26 |
-
"acc": 0.47072,
|
27 |
-
"acc_norm": 0.41504
|
28 |
-
},
|
29 |
-
{
|
30 |
-
"name": "turkish_plu_goal_inference",
|
31 |
-
"task": "multiple_choice",
|
32 |
-
"acc": 0.3859020310633214,
|
33 |
-
"acc_norm": 0.2724014336917563
|
34 |
-
},
|
35 |
-
{
|
36 |
-
"name": "turkish_plu_next_event_prediction",
|
37 |
-
"task": "multiple_choice",
|
38 |
-
"acc": 0.44122137404580153,
|
39 |
-
"acc_norm": 0.2717557251908397
|
40 |
-
},
|
41 |
-
{
|
42 |
-
"name": "turkish_plu_step_inference",
|
43 |
-
"task": "multiple_choice",
|
44 |
-
"acc": 0.32189542483660133,
|
45 |
-
"acc_norm": 0.3741830065359477
|
46 |
-
},
|
47 |
-
{
|
48 |
-
"name": "turkish_plu_step_ordering",
|
49 |
-
"task": "multiple_choice",
|
50 |
-
"acc": 0.6483839373163565,
|
51 |
-
"acc_norm": 0.6483839373163565
|
52 |
-
},
|
53 |
-
{
|
54 |
-
"name": "turkce_atasozleri",
|
55 |
-
"task": "multiple_choice",
|
56 |
-
"acc": 0.7549132947976879,
|
57 |
-
"acc_norm": 0.7549132947976879
|
58 |
-
},
|
59 |
-
{
|
60 |
-
"name": "check_worthiness",
|
61 |
-
"task": "multiple_choice",
|
62 |
-
"acc": 0.4424131627056673,
|
63 |
-
"acc_norm": 0.6238574040219378
|
64 |
-
},
|
65 |
-
{
|
66 |
-
"name": "relevance_judgment",
|
67 |
-
"task": "multiple_choice",
|
68 |
-
"acc": 0.42230347349177333,
|
69 |
-
"acc_norm": 0.5781535648994516
|
70 |
-
},
|
71 |
-
{
|
72 |
-
"name": "tquad",
|
73 |
-
"task": "extractive_question_answering",
|
74 |
-
"exact_match": 0.18385650224215247,
|
75 |
-
"f1": 0.3611274526033501
|
76 |
-
},
|
77 |
-
{
|
78 |
-
"name": "sts_tr",
|
79 |
-
"task": "text_classification",
|
80 |
-
"acc": 0.21972443799854968,
|
81 |
-
"acc_norm": 0.12037708484408992
|
82 |
-
},
|
83 |
-
{
|
84 |
-
"name": "offenseval_tr",
|
85 |
-
"task": "text_classification",
|
86 |
-
"acc": 0.30413832199546487,
|
87 |
-
"acc_norm": 0.7970521541950113
|
88 |
-
},
|
89 |
-
{
|
90 |
-
"name": "mnli_tr",
|
91 |
-
"task": "natural_language_inference",
|
92 |
-
"acc": 0.3807,
|
93 |
-
"acc_norm": 0.3213
|
94 |
-
},
|
95 |
-
{
|
96 |
-
"name": "snli_tr",
|
97 |
-
"task": "natural_language_inference",
|
98 |
-
"acc": 0.3116,
|
99 |
-
"acc_norm": 0.3237
|
100 |
-
},
|
101 |
-
{
|
102 |
-
"name": "xnli_tr",
|
103 |
-
"task": "natural_language_inference",
|
104 |
-
"acc": 0.3664670658682635,
|
105 |
-
"acc_norm": 0.3333333333333333
|
106 |
-
},
|
107 |
-
{
|
108 |
-
"name": "news_cat",
|
109 |
-
"task": "text_classification",
|
110 |
-
"acc": 0.556,
|
111 |
-
"acc_norm": 0.28
|
112 |
-
},
|
113 |
-
{
|
114 |
-
"name": "ironytr",
|
115 |
-
"task": "text_classification",
|
116 |
-
"acc": 0.515,
|
117 |
-
"acc_norm": 0.5
|
118 |
-
},
|
119 |
-
{
|
120 |
-
"name": "exams_tr",
|
121 |
-
"task": "multiple_choice",
|
122 |
-
"acc": 0.29770992366412213,
|
123 |
-
"acc_norm": 0.3104325699745547
|
124 |
-
},
|
125 |
-
{
|
126 |
-
"name": "circumflex_tr",
|
127 |
-
"task": "multiple_choice",
|
128 |
-
"acc": 0.5,
|
129 |
-
"acc_norm": 0.5
|
130 |
-
},
|
131 |
-
{
|
132 |
-
"name": "bilmecebench",
|
133 |
-
"task": "multiple_choice",
|
134 |
-
"acc": 0.5316742081447964,
|
135 |
-
"acc_norm": 0.5316742081447964
|
136 |
-
},
|
137 |
-
{
|
138 |
-
"name": "belebele_tr",
|
139 |
-
"task": "multiple_choice",
|
140 |
-
"acc": 0.8111111111111111,
|
141 |
-
"acc_norm": 0.8111111111111111
|
142 |
-
},
|
143 |
-
{
|
144 |
-
"name": "turkishmmlu",
|
145 |
-
"task": "multiple_choice",
|
146 |
-
"acc": 0.5966666666666667,
|
147 |
-
"acc_norm": 0.5966666666666667
|
148 |
-
},
|
149 |
-
{
|
150 |
-
"name": "xlsum_tr",
|
151 |
-
"task": "summarization",
|
152 |
-
"rouge1": 0.30917291128339996,
|
153 |
-
"rouge2": 0.14832485972104875,
|
154 |
-
"rougeL": 0.23785344498387093
|
155 |
-
},
|
156 |
-
{
|
157 |
-
"name": "wmt-tr-en-prompt",
|
158 |
-
"task": "machine_translation",
|
159 |
-
"wer": 1.7919998446671352,
|
160 |
-
"bleu": 0.07228422067800695
|
161 |
-
},
|
162 |
-
{
|
163 |
-
"name": "wiki_lingua_tr",
|
164 |
-
"task": "summarization",
|
165 |
-
"rouge1": 0.15988237371586872,
|
166 |
-
"rouge2": 0.0532048337283498,
|
167 |
-
"rougeL": 0.12305004914854117
|
168 |
-
},
|
169 |
-
{
|
170 |
-
"name": "tr-wikihow-summ",
|
171 |
-
"task": "summarization",
|
172 |
-
"rouge1": 0.2116496152990347,
|
173 |
-
"rouge2": 0.06436161936676124,
|
174 |
-
"rougeL": 0.15251131714516145
|
175 |
-
},
|
176 |
-
{
|
177 |
-
"name": "mlsum_tr",
|
178 |
-
"task": "summarization",
|
179 |
-
"rouge1": 0.3892482949562466,
|
180 |
-
"rouge2": 0.23951248403090875,
|
181 |
-
"rougeL": 0.31613997907641456
|
182 |
-
},
|
183 |
-
{
|
184 |
-
"name": "gecturk_generation",
|
185 |
-
"task": "grammatical_error_correction",
|
186 |
-
"exact_match": 0.1439164138860802
|
187 |
-
}
|
188 |
-
|
189 |
-
]
|
190 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|