Safetensors
Romanian
mistral
Eval Results
mihaimasala commited on
Commit
f65f19f
·
verified ·
1 Parent(s): cc50492

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +474 -476
README.md CHANGED
@@ -3,485 +3,483 @@ license: cc-by-nc-4.0
3
  language:
4
  - ro
5
  base_model:
6
- - mistralai/Mistral-7B-v0.1
7
  datasets:
8
  - OpenLLM-Ro/ro_dpo_helpsteer
9
-
10
  model-index:
11
- - name: OpenLLM-Ro/RoMistral-7b-Instruct-DPO-2024-10-09
12
- results:
13
- - task:
14
- type: text-generation
15
- dataset:
16
- name: RoMT-Bench
17
- type: RoMT-Bench
18
- metrics:
19
- - name: Score
20
- type: Score
21
- value: 5.88
22
- - task:
23
- type: text-generation
24
- dataset:
25
- name: RoCulturaBench
26
- type: RoCulturaBench
27
- metrics:
28
- - name: Score
29
- type: Score
30
- value: 4.72
31
- - task:
32
- type: text-generation
33
- dataset:
34
- name: Romanian_Academic_Benchmarks
35
- type: Romanian_Academic_Benchmarks
36
- metrics:
37
- - name: Average accuracy
38
- type: accuracy
39
- value: 51.95
40
- - task:
41
- type: text-generation
42
- dataset:
43
- name: OpenLLM-Ro/ro_arc_challenge
44
- type: OpenLLM-Ro/ro_arc_challenge
45
- metrics:
46
- - name: Average accuracy
47
- type: accuracy
48
- value: 50.73
49
- - task:
50
- type: text-generation
51
- dataset:
52
- name: OpenLLM-Ro/ro_mmlu
53
- type: OpenLLM-Ro/ro_mmlu
54
- metrics:
55
- - name: Average accuracy
56
- type: accuracy
57
- value: 47.88
58
- - task:
59
- type: text-generation
60
- dataset:
61
- name: OpenLLM-Ro/ro_winogrande
62
- type: OpenLLM-Ro/ro_winogrande
63
- metrics:
64
- - name: Average accuracy
65
- type: accuracy
66
- value: 68.41
67
- - task:
68
- type: text-generation
69
- dataset:
70
- name: OpenLLM-Ro/ro_hellaswag
71
- type: OpenLLM-Ro/ro_hellaswag
72
- metrics:
73
- - name: Average accuracy
74
- type: accuracy
75
- value: 62.27
76
- - task:
77
- type: text-generation
78
- dataset:
79
- name: OpenLLM-Ro/ro_gsm8k
80
- type: OpenLLM-Ro/ro_gsm8k
81
- metrics:
82
- - name: Average accuracy
83
- type: accuracy
84
- value: 32.27
85
- - task:
86
- type: text-generation
87
- dataset:
88
- name: OpenLLM-Ro/ro_truthfulqa
89
- type: OpenLLM-Ro/ro_truthfulqa
90
- metrics:
91
- - name: Average accuracy
92
- type: accuracy
93
- value: 50.12
94
- - task:
95
- type: text-generation
96
- dataset:
97
- name: LaRoSeDa_binary
98
- type: LaRoSeDa_binary
99
- metrics:
100
- - name: Average macro-f1
101
- type: macro-f1
102
- value: 82.13
103
- - task:
104
- type: text-generation
105
- dataset:
106
- name: LaRoSeDa_multiclass
107
- type: LaRoSeDa_multiclass
108
- metrics:
109
- - name: Average macro-f1
110
- type: macro-f1
111
- value: 65.24
112
- - task:
113
- type: text-generation
114
- dataset:
115
- name: LaRoSeDa_binary_finetuned
116
- type: LaRoSeDa_binary_finetuned
117
- metrics:
118
- - name: Average macro-f1
119
- type: macro-f1
120
- value: 0.00
121
- - task:
122
- type: text-generation
123
- dataset:
124
- name: LaRoSeDa_multiclass_finetuned
125
- type: LaRoSeDa_multiclass_finetuned
126
- metrics:
127
- - name: Average macro-f1
128
- type: macro-f1
129
- value: 0.00
130
- - task:
131
- type: text-generation
132
- dataset:
133
- name: WMT_EN-RO
134
- type: WMT_EN-RO
135
- metrics:
136
- - name: Average bleu
137
- type: bleu
138
- value: 26.25
139
- - task:
140
- type: text-generation
141
- dataset:
142
- name: WMT_RO-EN
143
- type: WMT_RO-EN
144
- metrics:
145
- - name: Average bleu
146
- type: bleu
147
- value: 6.09
148
- - task:
149
- type: text-generation
150
- dataset:
151
- name: WMT_EN-RO_finetuned
152
- type: WMT_EN-RO_finetuned
153
- metrics:
154
- - name: Average bleu
155
- type: bleu
156
- value: 0.00
157
- - task:
158
- type: text-generation
159
- dataset:
160
- name: WMT_RO-EN_finetuned
161
- type: WMT_RO-EN_finetuned
162
- metrics:
163
- - name: Average bleu
164
- type: bleu
165
- value: 0.00
166
- - task:
167
- type: text-generation
168
- dataset:
169
- name: XQuAD
170
- type: XQuAD
171
- metrics:
172
- - name: Average exact_match
173
- type: exact_match
174
- value: 23.40
175
- - task:
176
- type: text-generation
177
- dataset:
178
- name: XQuAD
179
- type: XQuAD
180
- metrics:
181
- - name: Average f1
182
- type: f1
183
- value: 45.80
184
- - task:
185
- type: text-generation
186
- dataset:
187
- name: XQuAD_finetuned
188
- type: XQuAD_finetuned
189
- metrics:
190
- - name: Average exact_match
191
- type: exact_match
192
- value: 0.00
193
- - task:
194
- type: text-generation
195
- dataset:
196
- name: XQuAD_finetuned
197
- type: XQuAD_finetuned
198
- metrics:
199
- - name: Average f1
200
- type: f1
201
- value: 0.00
202
- - task:
203
- type: text-generation
204
- dataset:
205
- name: STS
206
- type: STS
207
- metrics:
208
- - name: Average spearman
209
- type: spearman
210
- value: 77.33
211
- - task:
212
- type: text-generation
213
- dataset:
214
- name: STS
215
- type: STS
216
- metrics:
217
- - name: Average pearson
218
- type: pearson
219
- value: 76.60
220
- - task:
221
- type: text-generation
222
- dataset:
223
- name: STS_finetuned
224
- type: STS_finetuned
225
- metrics:
226
- - name: Average spearman
227
- type: spearman
228
- value: 0.00
229
- - task:
230
- type: text-generation
231
- dataset:
232
- name: STS_finetuned
233
- type: STS_finetuned
234
- metrics:
235
- - name: Average pearson
236
- type: pearson
237
- value: 0.00
238
- - task:
239
- type: text-generation
240
- dataset:
241
- name: RoMT-Bench
242
- type: RoMT-Bench
243
- metrics:
244
- - name: First turn
245
- type: Score
246
- value: 6.44
247
- - name: Second turn
248
- type: Score
249
- value: 5.33
250
- - task:
251
- type: text-generation
252
- dataset:
253
- name: OpenLLM-Ro/ro_arc_challenge
254
- type: OpenLLM-Ro/ro_arc_challenge
255
- metrics:
256
- - name: 0-shot
257
- type: accuracy
258
- value: 51.67
259
- - name: 1-shot
260
- type: accuracy
261
- value: 45.59
262
- - name: 3-shot
263
- type: accuracy
264
- value: 48.24
265
- - name: 5-shot
266
- type: accuracy
267
- value: 50.21
268
- - name: 10-shot
269
- type: accuracy
270
- value: 54.07
271
- - name: 25-shot
272
- type: accuracy
273
- value: 54.58
274
- - task:
275
- type: text-generation
276
- dataset:
277
- name: OpenLLM-Ro/ro_mmlu
278
- type: OpenLLM-Ro/ro_mmlu
279
- metrics:
280
- - name: 0-shot
281
- type: accuracy
282
- value: 40.86
283
- - name: 1-shot
284
- type: accuracy
285
- value: 48.67
286
- - name: 3-shot
287
- type: accuracy
288
- value: 51.26
289
- - name: 5-shot
290
- type: accuracy
291
- value: 50.75
292
- - task:
293
- type: text-generation
294
- dataset:
295
- name: OpenLLM-Ro/ro_winogrande
296
- type: OpenLLM-Ro/ro_winogrande
297
- metrics:
298
- - name: 0-shot
299
- type: accuracy
300
- value: 64.80
301
- - name: 1-shot
302
- type: accuracy
303
- value: 68.19
304
- - name: 3-shot
305
- type: accuracy
306
- value: 70.09
307
- - name: 5-shot
308
- type: accuracy
309
- value: 70.56
310
- - task:
311
- type: text-generation
312
- dataset:
313
- name: OpenLLM-Ro/ro_hellaswag
314
- type: OpenLLM-Ro/ro_hellaswag
315
- metrics:
316
- - name: 0-shot
317
- type: accuracy
318
- value: 61.96
319
- - name: 1-shot
320
- type: accuracy
321
- value: 60.88
322
- - name: 3-shot
323
- type: accuracy
324
- value: 61.86
325
- - name: 5-shot
326
- type: accuracy
327
- value: 62.73
328
- - name: 10-shot
329
- type: accuracy
330
- value: 63.93
331
- - task:
332
- type: text-generation
333
- dataset:
334
- name: OpenLLM-Ro/ro_gsm8k
335
- type: OpenLLM-Ro/ro_gsm8k
336
- metrics:
337
- - name: 0-shot
338
- type: accuracy
339
- value: 23.28
340
- - name: 1-shot
341
- type: accuracy
342
- value: 34.95
343
- - name: 3-shot
344
- type: accuracy
345
- value: 38.59
346
- - task:
347
- type: text-generation
348
- dataset:
349
- name: LaRoSeDa_binary
350
- type: LaRoSeDa_binary
351
- metrics:
352
- - name: 0-shot
353
- type: macro-f1
354
- value: 34.36
355
- - name: 1-shot
356
- type: macro-f1
357
- value: 97.87
358
- - name: 3-shot
359
- type: macro-f1
360
- value: 98.40
361
- - name: 5-shot
362
- type: macro-f1
363
- value: 97.90
364
- - task:
365
- type: text-generation
366
- dataset:
367
- name: LaRoSeDa_multiclass
368
- type: LaRoSeDa_multiclass
369
- metrics:
370
- - name: 0-shot
371
- type: macro-f1
372
- value: 66.17
373
- - name: 1-shot
374
- type: macro-f1
375
- value: 65.93
376
- - name: 3-shot
377
- type: macro-f1
378
- value: 61.86
379
- - name: 5-shot
380
- type: macro-f1
381
- value: 66.99
382
- - task:
383
- type: text-generation
384
- dataset:
385
- name: WMT_EN-RO
386
- type: WMT_EN-RO
387
- metrics:
388
- - name: 0-shot
389
- type: bleu
390
- value: 18.43
391
- - name: 1-shot
392
- type: bleu
393
- value: 28.25
394
- - name: 3-shot
395
- type: bleu
396
- value: 29.45
397
- - name: 5-shot
398
- type: bleu
399
- value: 28.88
400
- - task:
401
- type: text-generation
402
- dataset:
403
- name: WMT_RO-EN
404
- type: WMT_RO-EN
405
- metrics:
406
- - name: 0-shot
407
- type: bleu
408
- value: 2.80
409
- - name: 1-shot
410
- type: bleu
411
- value: 2.90
412
- - name: 3-shot
413
- type: bleu
414
- value: 6.63
415
- - name: 5-shot
416
- type: bleu
417
- value: 12.04
418
- - task:
419
- type: text-generation
420
- dataset:
421
- name: XQuAD_EM
422
- type: XQuAD_EM
423
- metrics:
424
- - name: 0-shot
425
- type: exact_match
426
- value: 5.04
427
- - name: 1-shot
428
- type: exact_match
429
- value: 22.44
430
- - name: 3-shot
431
- type: exact_match
432
- value: 30.42
433
- - name: 5-shot
434
- type: exact_match
435
- value: 35.71
436
- - task:
437
- type: text-generation
438
- dataset:
439
- name: XQuAD_F1
440
- type: XQuAD_F1
441
- metrics:
442
- - name: 0-shot
443
- type: f1
444
- value: 23.36
445
- - name: 1-shot
446
- type: f1
447
- value: 44.63
448
- - name: 3-shot
449
- type: f1
450
- value: 54.78
451
- - name: 5-shot
452
- type: f1
453
- value: 60.43
454
- - task:
455
- type: text-generation
456
- dataset:
457
- name: STS
458
- type: STS
459
- metrics:
460
- - name: 0-shot
461
- type: spearman
462
- value: 73.38
463
- - name: 1-shot
464
- type: spearman
465
- value: 78.93
466
- - name: 3-shot
467
- type: spearman
468
- value: 79.68
469
- - task:
470
- type: text-generation
471
- dataset:
472
- name: STS
473
- type: STS
474
- metrics:
475
- - name: 0-shot
476
- type: pearson
477
- value: 73.93
478
- - name: 1-shot
479
- type: pearson
480
- value: 77.69
481
- - name: 3-shot
482
- type: pearson
483
- value: 78.17
484
-
485
  ---
486
 
487
  # Model Card for Model ID
 
3
  language:
4
  - ro
5
  base_model:
6
+ - OpenLLM-Ro/RoMistral-7b-Instruct-2024-10-09
7
  datasets:
8
  - OpenLLM-Ro/ro_dpo_helpsteer
 
9
  model-index:
10
+ - name: OpenLLM-Ro/RoMistral-7b-Instruct-DPO-2024-10-09
11
+ results:
12
+ - task:
13
+ type: text-generation
14
+ dataset:
15
+ name: RoMT-Bench
16
+ type: RoMT-Bench
17
+ metrics:
18
+ - name: Score
19
+ type: Score
20
+ value: 5.88
21
+ - task:
22
+ type: text-generation
23
+ dataset:
24
+ name: RoCulturaBench
25
+ type: RoCulturaBench
26
+ metrics:
27
+ - name: Score
28
+ type: Score
29
+ value: 4.72
30
+ - task:
31
+ type: text-generation
32
+ dataset:
33
+ name: Romanian_Academic_Benchmarks
34
+ type: Romanian_Academic_Benchmarks
35
+ metrics:
36
+ - name: Average accuracy
37
+ type: accuracy
38
+ value: 51.95
39
+ - task:
40
+ type: text-generation
41
+ dataset:
42
+ name: OpenLLM-Ro/ro_arc_challenge
43
+ type: OpenLLM-Ro/ro_arc_challenge
44
+ metrics:
45
+ - name: Average accuracy
46
+ type: accuracy
47
+ value: 50.73
48
+ - task:
49
+ type: text-generation
50
+ dataset:
51
+ name: OpenLLM-Ro/ro_mmlu
52
+ type: OpenLLM-Ro/ro_mmlu
53
+ metrics:
54
+ - name: Average accuracy
55
+ type: accuracy
56
+ value: 47.88
57
+ - task:
58
+ type: text-generation
59
+ dataset:
60
+ name: OpenLLM-Ro/ro_winogrande
61
+ type: OpenLLM-Ro/ro_winogrande
62
+ metrics:
63
+ - name: Average accuracy
64
+ type: accuracy
65
+ value: 68.41
66
+ - task:
67
+ type: text-generation
68
+ dataset:
69
+ name: OpenLLM-Ro/ro_hellaswag
70
+ type: OpenLLM-Ro/ro_hellaswag
71
+ metrics:
72
+ - name: Average accuracy
73
+ type: accuracy
74
+ value: 62.27
75
+ - task:
76
+ type: text-generation
77
+ dataset:
78
+ name: OpenLLM-Ro/ro_gsm8k
79
+ type: OpenLLM-Ro/ro_gsm8k
80
+ metrics:
81
+ - name: Average accuracy
82
+ type: accuracy
83
+ value: 32.27
84
+ - task:
85
+ type: text-generation
86
+ dataset:
87
+ name: OpenLLM-Ro/ro_truthfulqa
88
+ type: OpenLLM-Ro/ro_truthfulqa
89
+ metrics:
90
+ - name: Average accuracy
91
+ type: accuracy
92
+ value: 50.12
93
+ - task:
94
+ type: text-generation
95
+ dataset:
96
+ name: LaRoSeDa_binary
97
+ type: LaRoSeDa_binary
98
+ metrics:
99
+ - name: Average macro-f1
100
+ type: macro-f1
101
+ value: 82.13
102
+ - task:
103
+ type: text-generation
104
+ dataset:
105
+ name: LaRoSeDa_multiclass
106
+ type: LaRoSeDa_multiclass
107
+ metrics:
108
+ - name: Average macro-f1
109
+ type: macro-f1
110
+ value: 65.24
111
+ - task:
112
+ type: text-generation
113
+ dataset:
114
+ name: LaRoSeDa_binary_finetuned
115
+ type: LaRoSeDa_binary_finetuned
116
+ metrics:
117
+ - name: Average macro-f1
118
+ type: macro-f1
119
+ value: 0
120
+ - task:
121
+ type: text-generation
122
+ dataset:
123
+ name: LaRoSeDa_multiclass_finetuned
124
+ type: LaRoSeDa_multiclass_finetuned
125
+ metrics:
126
+ - name: Average macro-f1
127
+ type: macro-f1
128
+ value: 0
129
+ - task:
130
+ type: text-generation
131
+ dataset:
132
+ name: WMT_EN-RO
133
+ type: WMT_EN-RO
134
+ metrics:
135
+ - name: Average bleu
136
+ type: bleu
137
+ value: 26.25
138
+ - task:
139
+ type: text-generation
140
+ dataset:
141
+ name: WMT_RO-EN
142
+ type: WMT_RO-EN
143
+ metrics:
144
+ - name: Average bleu
145
+ type: bleu
146
+ value: 6.09
147
+ - task:
148
+ type: text-generation
149
+ dataset:
150
+ name: WMT_EN-RO_finetuned
151
+ type: WMT_EN-RO_finetuned
152
+ metrics:
153
+ - name: Average bleu
154
+ type: bleu
155
+ value: 0
156
+ - task:
157
+ type: text-generation
158
+ dataset:
159
+ name: WMT_RO-EN_finetuned
160
+ type: WMT_RO-EN_finetuned
161
+ metrics:
162
+ - name: Average bleu
163
+ type: bleu
164
+ value: 0
165
+ - task:
166
+ type: text-generation
167
+ dataset:
168
+ name: XQuAD
169
+ type: XQuAD
170
+ metrics:
171
+ - name: Average exact_match
172
+ type: exact_match
173
+ value: 23.4
174
+ - task:
175
+ type: text-generation
176
+ dataset:
177
+ name: XQuAD
178
+ type: XQuAD
179
+ metrics:
180
+ - name: Average f1
181
+ type: f1
182
+ value: 45.8
183
+ - task:
184
+ type: text-generation
185
+ dataset:
186
+ name: XQuAD_finetuned
187
+ type: XQuAD_finetuned
188
+ metrics:
189
+ - name: Average exact_match
190
+ type: exact_match
191
+ value: 0
192
+ - task:
193
+ type: text-generation
194
+ dataset:
195
+ name: XQuAD_finetuned
196
+ type: XQuAD_finetuned
197
+ metrics:
198
+ - name: Average f1
199
+ type: f1
200
+ value: 0
201
+ - task:
202
+ type: text-generation
203
+ dataset:
204
+ name: STS
205
+ type: STS
206
+ metrics:
207
+ - name: Average spearman
208
+ type: spearman
209
+ value: 77.33
210
+ - task:
211
+ type: text-generation
212
+ dataset:
213
+ name: STS
214
+ type: STS
215
+ metrics:
216
+ - name: Average pearson
217
+ type: pearson
218
+ value: 76.6
219
+ - task:
220
+ type: text-generation
221
+ dataset:
222
+ name: STS_finetuned
223
+ type: STS_finetuned
224
+ metrics:
225
+ - name: Average spearman
226
+ type: spearman
227
+ value: 0
228
+ - task:
229
+ type: text-generation
230
+ dataset:
231
+ name: STS_finetuned
232
+ type: STS_finetuned
233
+ metrics:
234
+ - name: Average pearson
235
+ type: pearson
236
+ value: 0
237
+ - task:
238
+ type: text-generation
239
+ dataset:
240
+ name: RoMT-Bench
241
+ type: RoMT-Bench
242
+ metrics:
243
+ - name: First turn
244
+ type: Score
245
+ value: 6.44
246
+ - name: Second turn
247
+ type: Score
248
+ value: 5.33
249
+ - task:
250
+ type: text-generation
251
+ dataset:
252
+ name: OpenLLM-Ro/ro_arc_challenge
253
+ type: OpenLLM-Ro/ro_arc_challenge
254
+ metrics:
255
+ - name: 0-shot
256
+ type: accuracy
257
+ value: 51.67
258
+ - name: 1-shot
259
+ type: accuracy
260
+ value: 45.59
261
+ - name: 3-shot
262
+ type: accuracy
263
+ value: 48.24
264
+ - name: 5-shot
265
+ type: accuracy
266
+ value: 50.21
267
+ - name: 10-shot
268
+ type: accuracy
269
+ value: 54.07
270
+ - name: 25-shot
271
+ type: accuracy
272
+ value: 54.58
273
+ - task:
274
+ type: text-generation
275
+ dataset:
276
+ name: OpenLLM-Ro/ro_mmlu
277
+ type: OpenLLM-Ro/ro_mmlu
278
+ metrics:
279
+ - name: 0-shot
280
+ type: accuracy
281
+ value: 40.86
282
+ - name: 1-shot
283
+ type: accuracy
284
+ value: 48.67
285
+ - name: 3-shot
286
+ type: accuracy
287
+ value: 51.26
288
+ - name: 5-shot
289
+ type: accuracy
290
+ value: 50.75
291
+ - task:
292
+ type: text-generation
293
+ dataset:
294
+ name: OpenLLM-Ro/ro_winogrande
295
+ type: OpenLLM-Ro/ro_winogrande
296
+ metrics:
297
+ - name: 0-shot
298
+ type: accuracy
299
+ value: 64.8
300
+ - name: 1-shot
301
+ type: accuracy
302
+ value: 68.19
303
+ - name: 3-shot
304
+ type: accuracy
305
+ value: 70.09
306
+ - name: 5-shot
307
+ type: accuracy
308
+ value: 70.56
309
+ - task:
310
+ type: text-generation
311
+ dataset:
312
+ name: OpenLLM-Ro/ro_hellaswag
313
+ type: OpenLLM-Ro/ro_hellaswag
314
+ metrics:
315
+ - name: 0-shot
316
+ type: accuracy
317
+ value: 61.96
318
+ - name: 1-shot
319
+ type: accuracy
320
+ value: 60.88
321
+ - name: 3-shot
322
+ type: accuracy
323
+ value: 61.86
324
+ - name: 5-shot
325
+ type: accuracy
326
+ value: 62.73
327
+ - name: 10-shot
328
+ type: accuracy
329
+ value: 63.93
330
+ - task:
331
+ type: text-generation
332
+ dataset:
333
+ name: OpenLLM-Ro/ro_gsm8k
334
+ type: OpenLLM-Ro/ro_gsm8k
335
+ metrics:
336
+ - name: 0-shot
337
+ type: accuracy
338
+ value: 23.28
339
+ - name: 1-shot
340
+ type: accuracy
341
+ value: 34.95
342
+ - name: 3-shot
343
+ type: accuracy
344
+ value: 38.59
345
+ - task:
346
+ type: text-generation
347
+ dataset:
348
+ name: LaRoSeDa_binary
349
+ type: LaRoSeDa_binary
350
+ metrics:
351
+ - name: 0-shot
352
+ type: macro-f1
353
+ value: 34.36
354
+ - name: 1-shot
355
+ type: macro-f1
356
+ value: 97.87
357
+ - name: 3-shot
358
+ type: macro-f1
359
+ value: 98.4
360
+ - name: 5-shot
361
+ type: macro-f1
362
+ value: 97.9
363
+ - task:
364
+ type: text-generation
365
+ dataset:
366
+ name: LaRoSeDa_multiclass
367
+ type: LaRoSeDa_multiclass
368
+ metrics:
369
+ - name: 0-shot
370
+ type: macro-f1
371
+ value: 66.17
372
+ - name: 1-shot
373
+ type: macro-f1
374
+ value: 65.93
375
+ - name: 3-shot
376
+ type: macro-f1
377
+ value: 61.86
378
+ - name: 5-shot
379
+ type: macro-f1
380
+ value: 66.99
381
+ - task:
382
+ type: text-generation
383
+ dataset:
384
+ name: WMT_EN-RO
385
+ type: WMT_EN-RO
386
+ metrics:
387
+ - name: 0-shot
388
+ type: bleu
389
+ value: 18.43
390
+ - name: 1-shot
391
+ type: bleu
392
+ value: 28.25
393
+ - name: 3-shot
394
+ type: bleu
395
+ value: 29.45
396
+ - name: 5-shot
397
+ type: bleu
398
+ value: 28.88
399
+ - task:
400
+ type: text-generation
401
+ dataset:
402
+ name: WMT_RO-EN
403
+ type: WMT_RO-EN
404
+ metrics:
405
+ - name: 0-shot
406
+ type: bleu
407
+ value: 2.8
408
+ - name: 1-shot
409
+ type: bleu
410
+ value: 2.9
411
+ - name: 3-shot
412
+ type: bleu
413
+ value: 6.63
414
+ - name: 5-shot
415
+ type: bleu
416
+ value: 12.04
417
+ - task:
418
+ type: text-generation
419
+ dataset:
420
+ name: XQuAD_EM
421
+ type: XQuAD_EM
422
+ metrics:
423
+ - name: 0-shot
424
+ type: exact_match
425
+ value: 5.04
426
+ - name: 1-shot
427
+ type: exact_match
428
+ value: 22.44
429
+ - name: 3-shot
430
+ type: exact_match
431
+ value: 30.42
432
+ - name: 5-shot
433
+ type: exact_match
434
+ value: 35.71
435
+ - task:
436
+ type: text-generation
437
+ dataset:
438
+ name: XQuAD_F1
439
+ type: XQuAD_F1
440
+ metrics:
441
+ - name: 0-shot
442
+ type: f1
443
+ value: 23.36
444
+ - name: 1-shot
445
+ type: f1
446
+ value: 44.63
447
+ - name: 3-shot
448
+ type: f1
449
+ value: 54.78
450
+ - name: 5-shot
451
+ type: f1
452
+ value: 60.43
453
+ - task:
454
+ type: text-generation
455
+ dataset:
456
+ name: STS
457
+ type: STS
458
+ metrics:
459
+ - name: 0-shot
460
+ type: spearman
461
+ value: 73.38
462
+ - name: 1-shot
463
+ type: spearman
464
+ value: 78.93
465
+ - name: 3-shot
466
+ type: spearman
467
+ value: 79.68
468
+ - task:
469
+ type: text-generation
470
+ dataset:
471
+ name: STS
472
+ type: STS
473
+ metrics:
474
+ - name: 0-shot
475
+ type: pearson
476
+ value: 73.93
477
+ - name: 1-shot
478
+ type: pearson
479
+ value: 77.69
480
+ - name: 3-shot
481
+ type: pearson
482
+ value: 78.17
 
483
  ---
484
 
485
  # Model Card for Model ID