Commit
·
a13fb2c
1
Parent(s):
3335d76
Add eval
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +132 -0
- 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_0.jsonl +0 -0
- 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_1.jsonl +0 -0
- 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_2.jsonl +0 -0
- 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_3.jsonl +0 -0
- 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_4.jsonl +0 -0
- 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_5.jsonl +0 -0
- 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_0.jsonl +0 -0
- 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_1.jsonl +0 -0
- 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_2.jsonl +0 -0
- 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_3.jsonl +0 -0
- 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_4.jsonl +0 -0
- 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_5.jsonl +0 -0
- 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl +0 -0
- 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl +0 -0
- 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl +0 -0
- 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl +0 -0
- 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl +0 -0
- 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl +0 -0
- 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_0.jsonl +0 -0
- 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_1.jsonl +0 -0
- 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_2.jsonl +0 -0
- 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_3.jsonl +0 -0
- 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_4.jsonl +0 -0
- 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_5.jsonl +0 -0
- 2b855b11bc4seed1/evaluation/generation/merged.csv +1 -0
- 2b855b11bc4seed1/evaluation/generation/merged.json +1 -0
- 2b855b11bc4seed3/evaluation/generation/agg.2b855b11bc4seed3_GEM-web_nlg_en_PALM_prompt_4.json +1 -0
- 2b855b11bc4seed3/evaluation/generation/agg.2b855b11bc4seed3_GEM-web_nlg_en_PALM_prompt_5.json +1 -0
- 2b855b11bc4seed3/evaluation/generation/agg.2b855b11bc4seed3_GEM-wiki_lingua_en_tldr_en_5.json +1 -0
- 2b855b11bc4seed3/evaluation/generation/agg.2b855b11bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_4.json +1 -0
- 2b855b11bc4seed3/evaluation/generation/agg.2b855b11bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_5.json +1 -0
- 2b855b11bc4seed3/evaluation/generation/agg.2b855b11bc4seed3_gem_xsum_article_DOC_summary_4.json +1 -0
- 2b855b11bc4seed3/evaluation/generation/agg.2b855b11bc4seed3_gem_xsum_article_DOC_summary_5.json +1 -0
- 2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_GEM-web_nlg_en_PALM_prompt_4.jsonl +3 -0
- 2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_GEM-web_nlg_en_PALM_prompt_5.jsonl +3 -0
- 2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_GEM-wiki_lingua_en_tldr_en_5.jsonl +3 -0
- 2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl +3 -0
- 2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl +3 -0
- 2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_gem_xsum_article_DOC_summary_4.jsonl +3 -0
- 2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_gem_xsum_article_DOC_summary_5.jsonl +3 -0
- 2b855b11bc4seed3/evaluation/generation/merged.csv +18 -4
- 2b855b11bc4seed3/evaluation/generation/merged.json +1 -1
- 2b855b11bc4seed3/evaluation/generation/slim.2b855b11bc4seed3_GEM-web_nlg_en_PALM_prompt_4.json +133 -0
- 2b855b11bc4seed3/evaluation/generation/slim.2b855b11bc4seed3_GEM-web_nlg_en_PALM_prompt_5.json +133 -0
- 2b855b11bc4seed3/evaluation/generation/slim.2b855b11bc4seed3_GEM-wiki_lingua_en_tldr_en_5.json +133 -0
- 2b855b11bc4seed3/evaluation/generation/slim.2b855b11bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_4.json +133 -0
- 2b855b11bc4seed3/evaluation/generation/slim.2b855b11bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_5.json +133 -0
- 2b855b11bc4seed3/evaluation/generation/slim.2b855b11bc4seed3_gem_xsum_article_DOC_summary_4.json +133 -0
- 2b855b11bc4seed3/evaluation/generation/slim.2b855b11bc4seed3_gem_xsum_article_DOC_summary_5.json +133 -0
.gitattributes
CHANGED
@@ -428,3 +428,135 @@ evaluation/seed2/generation/examples.limited=3000.model=seed2.task=GEM-wiki_ling
|
|
428 |
2b855b1b25c4seed1/evaluation/generation/examples.2b855b1b25c4seed1_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
429 |
2b855b1b25c4seed2/evaluation/generation/examples.2b855b1b25c4seed2_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
430 |
2b855b1b25c4seed4/evaluation/generation/examples.2b855b1b25c4seed4_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
428 |
2b855b1b25c4seed1/evaluation/generation/examples.2b855b1b25c4seed1_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
429 |
2b855b1b25c4seed2/evaluation/generation/examples.2b855b1b25c4seed2_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
430 |
2b855b1b25c4seed4/evaluation/generation/examples.2b855b1b25c4seed4_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
431 |
+
2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
432 |
+
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
433 |
+
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
434 |
+
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
435 |
+
2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
436 |
+
2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
437 |
+
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
438 |
+
2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
439 |
+
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
440 |
+
2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
441 |
+
2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
442 |
+
2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
443 |
+
2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
444 |
+
2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
445 |
+
2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
446 |
+
2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
447 |
+
2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
448 |
+
2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
449 |
+
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
450 |
+
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
451 |
+
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
452 |
+
2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
453 |
+
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
454 |
+
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
455 |
+
2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
456 |
+
2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
457 |
+
2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
458 |
+
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
459 |
+
2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
460 |
+
2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
461 |
+
2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
462 |
+
2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
463 |
+
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
464 |
+
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
465 |
+
2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
466 |
+
2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
467 |
+
2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
468 |
+
2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
469 |
+
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
470 |
+
2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
471 |
+
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
472 |
+
2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
473 |
+
2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
474 |
+
2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
475 |
+
2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
476 |
+
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
477 |
+
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
478 |
+
2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
479 |
+
2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
480 |
+
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
481 |
+
2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
482 |
+
2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
483 |
+
2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
484 |
+
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
485 |
+
2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
486 |
+
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
487 |
+
2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
488 |
+
2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
489 |
+
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
490 |
+
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
491 |
+
2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
492 |
+
2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
493 |
+
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
494 |
+
2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
495 |
+
2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
496 |
+
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
497 |
+
2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
498 |
+
2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
499 |
+
2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
500 |
+
2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
501 |
+
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
502 |
+
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
503 |
+
2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
504 |
+
2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
505 |
+
2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
506 |
+
2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
507 |
+
2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
508 |
+
2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
509 |
+
2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
510 |
+
2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
511 |
+
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
512 |
+
2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
513 |
+
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
514 |
+
2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
515 |
+
2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
516 |
+
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
517 |
+
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
518 |
+
2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
519 |
+
2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
520 |
+
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
521 |
+
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
522 |
+
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
523 |
+
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
524 |
+
2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
525 |
+
2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
526 |
+
2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
527 |
+
2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
528 |
+
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
529 |
+
2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
530 |
+
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
531 |
+
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
532 |
+
2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
533 |
+
2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
534 |
+
2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
535 |
+
2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
536 |
+
2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
537 |
+
2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
538 |
+
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
539 |
+
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
540 |
+
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
541 |
+
2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
542 |
+
2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
543 |
+
2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
544 |
+
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
545 |
+
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
546 |
+
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
547 |
+
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
548 |
+
2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
549 |
+
2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
550 |
+
2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
551 |
+
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
552 |
+
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
553 |
+
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
554 |
+
2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
555 |
+
2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
556 |
+
2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
557 |
+
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
558 |
+
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
559 |
+
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
560 |
+
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
561 |
+
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
562 |
+
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_0.jsonl
ADDED
File without changes
|
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_1.jsonl
ADDED
File without changes
|
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_2.jsonl
ADDED
File without changes
|
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_3.jsonl
ADDED
File without changes
|
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_4.jsonl
ADDED
File without changes
|
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_5.jsonl
ADDED
File without changes
|
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_0.jsonl
ADDED
File without changes
|
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_1.jsonl
ADDED
File without changes
|
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_2.jsonl
ADDED
File without changes
|
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_3.jsonl
ADDED
File without changes
|
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_4.jsonl
ADDED
File without changes
|
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_5.jsonl
ADDED
File without changes
|
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl
ADDED
File without changes
|
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl
ADDED
File without changes
|
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl
ADDED
File without changes
|
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl
ADDED
File without changes
|
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl
ADDED
File without changes
|
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl
ADDED
File without changes
|
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_0.jsonl
ADDED
File without changes
|
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_1.jsonl
ADDED
File without changes
|
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_2.jsonl
ADDED
File without changes
|
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_3.jsonl
ADDED
File without changes
|
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_4.jsonl
ADDED
File without changes
|
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_5.jsonl
ADDED
File without changes
|
2b855b11bc4seed1/evaluation/generation/merged.csv
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
dataset,fewshots,prompt,metric,value
|
2b855b11bc4seed1/evaluation/generation/merged.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{}
|
2b855b11bc4seed3/evaluation/generation/agg.2b855b11bc4seed3_GEM-web_nlg_en_PALM_prompt_4.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.466035571971691, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.03473185881932128}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.06948874429007668, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.00126969268323737}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.35504271560944006, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004806804977630298}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.10915182875204862, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0017368992622529642}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.032080423637990074, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007748948128844898}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.1749610451479523, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0035756989094432607}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.05069482326429204, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011084349852182986}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.06534721122278908, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001160283231789772}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.33105657281482775, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004330159547841201}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.102624870740898, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015972476492216417}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.06651082429118176, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0012081627405732267}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3391017904387808, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004545927615205131}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.10445663963047473, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001657522536981241}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b11bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b11bc4seed3/evaluation/generation/agg.2b855b11bc4seed3_GEM-web_nlg_en_PALM_prompt_5.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.47567398731333893, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.024264359754404757}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.0696852083209599, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0013577090826179866}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.3612843042707264, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004861238941500158}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.10892336928116716, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0016854927070877707}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.0315904391098698, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007431193984384711}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.177085732300076, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0035275863817017116}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.05008036673402714, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0010613654934549248}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.0651341368207596, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012591521998187908}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.33426205604441794, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004323619187529501}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.10170780198533885, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001554714639457137}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.06639663258609937, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001291170836697709}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.34334559580001384, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0045556002073834055}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.10372985125549432, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016004866013084038}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b11bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b11bc4seed3/evaluation/generation/agg.2b855b11bc4seed3_GEM-wiki_lingua_en_tldr_en_5.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.008542236722467199, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0008747857935002281}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.012984770525311046, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0012451725161618523}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.008780004115444839, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0008275079317754659}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.0020271291197224393, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00029783255687868237}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.0032641829590180815, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00043766038421818427}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.002147119319405267, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00027623822033813396}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.006544489279360034, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0006478926275365654}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.010413387705542463, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.001029849799449182}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.006842107389797507, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0006383034087266817}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.00789904438059008, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0008062204339147679}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.012088341751228384, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0011647198145518685}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.00814217266519704, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0007660037005158112}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.5732639330570601e-06, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 2.4552136635733976e-06}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b11bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b11bc4seed3/evaluation/generation/agg.2b855b11bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_4.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 11.29834992541636, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.21750627906833597}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.48699136269149845, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0030725910034362365}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.39873667481511255, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002641833719734696}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.41733074609230864, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021862637249920026}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.22696779992360078, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002388090320486047}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.18435752956149218, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002022279759231637}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.19240159571656937, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0018744046004783308}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.3681077705552102, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0027056437550490925}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3004900799799888, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002254464504837015}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.31456390703859133, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0019506135524216602}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.41003964030862644, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002940274288350555}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.3361130546577144, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0025408101507360696}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.35140686520524467, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002186992225871175}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b11bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b11bc4seed3/evaluation/generation/agg.2b855b11bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_5.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 11.269628945064296, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.15756104774510044}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.4903478553495967, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.00308746448334887}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.39563780199750853, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002621989888992289}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.41656475829237966, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021719641372564386}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.23014095309503432, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0023899391576289145}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.18355072919581486, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001968342781813886}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.19301313468237216, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0018394274162893405}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.3721457131973453, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0027221328586820265}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.29906088364914196, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0022373123686497648}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.3150475005541494, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0019395006953993484}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.41476921512799597, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0029434738207341333}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.33464526234291403, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002498553893092118}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.3521891699869018, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0021596292682356695}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b11bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b11bc4seed3/evaluation/generation/agg.2b855b11bc4seed3_gem_xsum_article_DOC_summary_4.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.03793206438705544, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0024366223683722097}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.06987736475836392, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004064805072138278}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.04400316033856543, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002490349738342324}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.006979598507473323, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008901794799735351}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.013423754512870454, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0013418442840833563}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.008063468290742252, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0007981358698909123}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.030091382509093004, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0020782922503738504}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.05458303198935503, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003197618040713862}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.034178653761551166, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0019416567180353596}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.03177113843460543, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002181794130601312}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.05751126056018169, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0033952763901923607}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.036128933672296124, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0020726114306530577}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.5192386395102333, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.13002341898619957}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b11bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b11bc4seed3/evaluation/generation/agg.2b855b11bc4seed3_gem_xsum_article_DOC_summary_5.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.0025333563065761205, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0007200688467666427}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.0020475737491748073, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0005527163320441051}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.0022218780996264655, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0006107839354088714}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.0002052993867644256, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00010263779675566133}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.00015805735145357788, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 8.125011510560468e-05}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.0001768729417917658, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 8.939544334231789e-05}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.0018767772604883414, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0005169601507247577}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.0015653832711829878, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0004179534887542103}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.0016699276460120504, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0004477526803373312}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.0018647403070412589, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0005045589409446839}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.001573237052621675, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0004187928988454184}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.0016709735398329094, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.00044431903582722767}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 9.778811073469078e-39, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 1.521718069448933e-33}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b11bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_GEM-web_nlg_en_PALM_prompt_4.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:822ebb3e7d80bc055cca88683ef6739ef54c6e9352e11e41a0b4b93d0f0fc0d7
|
3 |
+
size 7862036
|
2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_GEM-web_nlg_en_PALM_prompt_5.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd43b411e8cb0b582dd1fa445c0344b5a8ab2aa220b7a069658327d8998d724a
|
3 |
+
size 8773910
|
2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_GEM-wiki_lingua_en_tldr_en_5.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:29c03532573129e0924a3941e35775b51f963dee1d69ee18028034b66d149ac8
|
3 |
+
size 34799318
|
2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ea51bea8ef170e701ef64d133006120aab85d4fe71465fb30cc13df2bfb59c47
|
3 |
+
size 8289971
|
2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:568f322556552de1a3dc7850e9c8de9e10f7790c36fcdeca8b3521182a5bc668
|
3 |
+
size 9374208
|
2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_gem_xsum_article_DOC_summary_4.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10d196b079c34336896043475bb74f33f92b92bf8b4a71dd4641ed98af2fd447
|
3 |
+
size 11672598
|
2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_gem_xsum_article_DOC_summary_5.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be12ce862ba9524728d86caa84fa14184ada1c03e6bf04c3caf7e92c03132d2f
|
3 |
+
size 13897474
|
2b855b11bc4seed3/evaluation/generation/merged.csv
CHANGED
@@ -7,7 +7,11 @@ e2e_nlg_cleaned,2,generate_text_restaurant,rouge2_fmeasure,0.18538437537983646
|
|
7 |
e2e_nlg_cleaned,2,median,rouge2_fmeasure,0.18538437537983646
|
8 |
e2e_nlg_cleaned,3,generate_text_restaurant,rouge2_fmeasure,0.19219003937378554
|
9 |
e2e_nlg_cleaned,3,median,rouge2_fmeasure,0.19219003937378554
|
10 |
-
e2e_nlg_cleaned,
|
|
|
|
|
|
|
|
|
11 |
gem_xsum,0,article_DOC_summary,rouge2_fmeasure,0.04347043245633625
|
12 |
gem_xsum,0,median,rouge2_fmeasure,0.04347043245633625
|
13 |
gem_xsum,1,article_DOC_summary,rouge2_fmeasure,0.030277850873655133
|
@@ -16,7 +20,11 @@ gem_xsum,2,article_DOC_summary,rouge2_fmeasure,0.03015426920788573
|
|
16 |
gem_xsum,2,median,rouge2_fmeasure,0.03015426920788573
|
17 |
gem_xsum,3,article_DOC_summary,rouge2_fmeasure,0.028265095806897757
|
18 |
gem_xsum,3,median,rouge2_fmeasure,0.028265095806897757
|
19 |
-
gem_xsum,
|
|
|
|
|
|
|
|
|
20 |
web_nlg_en,0,PALM_prompt,rouge2_fmeasure,0.048503194247737774
|
21 |
web_nlg_en,0,median,rouge2_fmeasure,0.048503194247737774
|
22 |
web_nlg_en,1,PALM_prompt,rouge2_fmeasure,0.04633905642415022
|
@@ -25,7 +33,11 @@ web_nlg_en,2,PALM_prompt,rouge2_fmeasure,0.0482705113559789
|
|
25 |
web_nlg_en,2,median,rouge2_fmeasure,0.0482705113559789
|
26 |
web_nlg_en,3,PALM_prompt,rouge2_fmeasure,0.0486263549461623
|
27 |
web_nlg_en,3,median,rouge2_fmeasure,0.0486263549461623
|
28 |
-
web_nlg_en,
|
|
|
|
|
|
|
|
|
29 |
wiki_lingua_en,0,tldr_en,rouge2_fmeasure,0.03524633277968111
|
30 |
wiki_lingua_en,0,median,rouge2_fmeasure,0.03524633277968111
|
31 |
wiki_lingua_en,1,tldr_en,rouge2_fmeasure,0.04022404252550308
|
@@ -36,4 +48,6 @@ wiki_lingua_en,3,tldr_en,rouge2_fmeasure,0.039320031366549095
|
|
36 |
wiki_lingua_en,3,median,rouge2_fmeasure,0.039320031366549095
|
37 |
wiki_lingua_en,4,tldr_en,rouge2_fmeasure,0.01267310048414024
|
38 |
wiki_lingua_en,4,median,rouge2_fmeasure,0.01267310048414024
|
39 |
-
wiki_lingua_en,
|
|
|
|
|
|
7 |
e2e_nlg_cleaned,2,median,rouge2_fmeasure,0.18538437537983646
|
8 |
e2e_nlg_cleaned,3,generate_text_restaurant,rouge2_fmeasure,0.19219003937378554
|
9 |
e2e_nlg_cleaned,3,median,rouge2_fmeasure,0.19219003937378554
|
10 |
+
e2e_nlg_cleaned,4,generate_text_restaurant,rouge2_fmeasure,0.19240159571656937
|
11 |
+
e2e_nlg_cleaned,4,median,rouge2_fmeasure,0.19240159571656937
|
12 |
+
e2e_nlg_cleaned,5,generate_text_restaurant,rouge2_fmeasure,0.19301313468237216
|
13 |
+
e2e_nlg_cleaned,5,median,rouge2_fmeasure,0.19301313468237216
|
14 |
+
e2e_nlg_cleaned,5,average,multiple,0.16087250097829975
|
15 |
gem_xsum,0,article_DOC_summary,rouge2_fmeasure,0.04347043245633625
|
16 |
gem_xsum,0,median,rouge2_fmeasure,0.04347043245633625
|
17 |
gem_xsum,1,article_DOC_summary,rouge2_fmeasure,0.030277850873655133
|
|
|
20 |
gem_xsum,2,median,rouge2_fmeasure,0.03015426920788573
|
21 |
gem_xsum,3,article_DOC_summary,rouge2_fmeasure,0.028265095806897757
|
22 |
gem_xsum,3,median,rouge2_fmeasure,0.028265095806897757
|
23 |
+
gem_xsum,4,article_DOC_summary,rouge2_fmeasure,0.008063468290742252
|
24 |
+
gem_xsum,4,median,rouge2_fmeasure,0.008063468290742252
|
25 |
+
gem_xsum,5,article_DOC_summary,rouge2_fmeasure,0.0001768729417917658
|
26 |
+
gem_xsum,5,median,rouge2_fmeasure,0.0001768729417917658
|
27 |
+
gem_xsum,5,average,multiple,0.02340133159621815
|
28 |
web_nlg_en,0,PALM_prompt,rouge2_fmeasure,0.048503194247737774
|
29 |
web_nlg_en,0,median,rouge2_fmeasure,0.048503194247737774
|
30 |
web_nlg_en,1,PALM_prompt,rouge2_fmeasure,0.04633905642415022
|
|
|
33 |
web_nlg_en,2,median,rouge2_fmeasure,0.0482705113559789
|
34 |
web_nlg_en,3,PALM_prompt,rouge2_fmeasure,0.0486263549461623
|
35 |
web_nlg_en,3,median,rouge2_fmeasure,0.0486263549461623
|
36 |
+
web_nlg_en,4,PALM_prompt,rouge2_fmeasure,0.05069482326429204
|
37 |
+
web_nlg_en,4,median,rouge2_fmeasure,0.05069482326429204
|
38 |
+
web_nlg_en,5,PALM_prompt,rouge2_fmeasure,0.05008036673402714
|
39 |
+
web_nlg_en,5,median,rouge2_fmeasure,0.05008036673402714
|
40 |
+
web_nlg_en,5,average,multiple,0.0487523844953914
|
41 |
wiki_lingua_en,0,tldr_en,rouge2_fmeasure,0.03524633277968111
|
42 |
wiki_lingua_en,0,median,rouge2_fmeasure,0.03524633277968111
|
43 |
wiki_lingua_en,1,tldr_en,rouge2_fmeasure,0.04022404252550308
|
|
|
48 |
wiki_lingua_en,3,median,rouge2_fmeasure,0.039320031366549095
|
49 |
wiki_lingua_en,4,tldr_en,rouge2_fmeasure,0.01267310048414024
|
50 |
wiki_lingua_en,4,median,rouge2_fmeasure,0.01267310048414024
|
51 |
+
wiki_lingua_en,5,tldr_en,rouge2_fmeasure,0.002147119319405267
|
52 |
+
wiki_lingua_en,5,median,rouge2_fmeasure,0.002147119319405267
|
53 |
+
wiki_lingua_en,5,average,multiple,0.02945130750091407
|
2b855b11bc4seed3/evaluation/generation/merged.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"GEM/web_nlg_en": {"0": {"PALM_prompt": {"bleu": 0.3003906758006876, "bleu_stderr": 0.027331800335698254, "rouge1_fmeasure": 0.10522756302657624, "rouge1_fmeasure_stderr": 0.0020564917165852665, "rouge1_precision": 0.06963270011987571, "rouge1_precision_stderr": 0.0016168408610962013, "rouge1_recall": 0.29729464197458877, "rouge1_recall_stderr": 0.0047714353450774415, "rouge2_fmeasure": 0.048503194247737774, "rouge2_fmeasure_stderr": 0.001258248656235562, "rouge2_precision": 0.03200346886185345, "rouge2_precision_stderr": 0.000957625394662931, "rouge2_recall": 0.140470675875411, "rouge2_recall_stderr": 0.0032183793551318674, "rougeL_fmeasure": 0.10106959021191211, "rougeL_fmeasure_stderr": 0.0018911974850607549, "rougeL_precision": 0.06646767562482549, "rougeL_precision_stderr": 0.0014351668395974513, "rougeL_recall": 0.28816568568111717, "rougeL_recall_stderr": 0.004622479804486804, "rougeLsum_fmeasure": 0.10010042995723817, "rougeLsum_fmeasure_stderr": 0.001929318666410727, "rougeLsum_precision": 0.06620461518234684, "rougeLsum_precision_stderr": 0.00151460835911578, "rougeLsum_recall": 0.282597952018841, "rougeLsum_recall_stderr": 0.004481787483630335}}, "1": {"PALM_prompt": {"bleu": 0.32423805540348105, "bleu_stderr": 0.024690542644449438, "rouge1_fmeasure": 0.10249916346044939, "rouge1_fmeasure_stderr": 0.0018656887279984073, "rouge1_precision": 0.06685340388470082, "rouge1_precision_stderr": 0.0015253847039623312, "rouge1_recall": 0.323514427623318, "rouge1_recall_stderr": 0.004639043635695871, "rouge2_fmeasure": 0.04633905642415022, "rouge2_fmeasure_stderr": 0.0011829999453798483, "rouge2_precision": 0.030000286950072674, "rouge2_precision_stderr": 0.0008643544653956381, "rouge2_recall": 0.14824220519105014, "rouge2_recall_stderr": 0.0031972015283258494, "rougeL_fmeasure": 0.09712356208435262, "rougeL_fmeasure_stderr": 0.0017347095170401298, "rougeL_precision": 0.06336375493461167, "rougeL_precision_stderr": 0.0014215770633909588, "rougeL_recall": 0.303094617033746, "rougeL_recall_stderr": 0.004191983337899737, "rougeLsum_fmeasure": 0.09830433934297841, "rougeLsum_fmeasure_stderr": 0.0017883137206561263, "rougeLsum_precision": 0.06416576148923457, "rougeLsum_precision_stderr": 0.0014659099244540854, "rougeLsum_recall": 0.30832609036704645, "rougeLsum_recall_stderr": 0.004339774784201872}}, "2": {"PALM_prompt": {"bleu": 0.33841996991287543, "bleu_stderr": 0.013366615983706327, "rouge1_fmeasure": 0.10597979981258299, "rouge1_fmeasure_stderr": 0.0017714336298557038, "rouge1_precision": 0.06765631897767002, "rouge1_precision_stderr": 0.0013046336481251101, "rouge1_recall": 0.3402361179206784, "rouge1_recall_stderr": 0.004704696452770623, "rouge2_fmeasure": 0.0482705113559789, "rouge2_fmeasure_stderr": 0.001132968575984557, "rouge2_precision": 0.03077023716855104, "rouge2_precision_stderr": 0.0008032546127820459, "rouge2_recall": 0.1606656566085052, "rouge2_recall_stderr": 0.0033693273708689585, "rougeL_fmeasure": 0.10046626150365084, "rougeL_fmeasure_stderr": 0.0016642034527337758, "rougeL_precision": 0.06412349572198735, "rougeL_precision_stderr": 0.0012122011908497355, "rougeL_recall": 0.31907776335571136, "rougeL_recall_stderr": 0.004279931218687767, "rougeLsum_fmeasure": 0.10141858866959458, "rougeLsum_fmeasure_stderr": 0.001700099907526394, "rougeLsum_precision": 0.06475854902642648, "rougeLsum_precision_stderr": 0.0012483946453731626, "rougeLsum_recall": 0.3241391405150877, "rougeLsum_recall_stderr": 0.004418718258324822}}, "3": {"PALM_prompt": {"bleu": 0.40584434052309054, "bleu_stderr": 0.02616462426887616, "rouge1_fmeasure": 0.10602175173953958, "rouge1_fmeasure_stderr": 0.001741987498486231, "rouge1_precision": 0.06757351073723121, "rouge1_precision_stderr": 0.0012679743548778455, "rouge1_recall": 0.3440037162848658, "rouge1_recall_stderr": 0.0049119409175133995, "rouge2_fmeasure": 0.0486263549461623, "rouge2_fmeasure_stderr": 0.0010900986383698252, "rouge2_precision": 0.030886249533870456, "rouge2_precision_stderr": 0.0007629220092981514, "rouge2_recall": 0.16556453686704345, "rouge2_recall_stderr": 0.0034492328057988348, "rougeL_fmeasure": 0.10011209012325656, "rougeL_fmeasure_stderr": 0.0016356941636110295, "rougeL_precision": 0.06384738095162408, "rougeL_precision_stderr": 0.0011862226633864972, "rougeL_recall": 0.3209539378169773, "rougeL_recall_stderr": 0.004410637240630364, "rougeLsum_fmeasure": 0.10161889625106496, "rougeLsum_fmeasure_stderr": 0.0016751216394668587, "rougeLsum_precision": 0.06483859720845632, "rougeLsum_precision_stderr": 0.0012202108024313588, "rougeLsum_recall": 0.3270171129897123, "rougeLsum_recall_stderr": 0.0045709512856975595}}}, "GEM/wiki_lingua_en": {"0": {"tldr_en": {"bleu": 1.4622087249250515, "bleu_stderr": 0.04207910392007385, "rouge1_fmeasure": 0.1767157419817045, "rouge1_fmeasure_stderr": 0.0018123734169975135, "rouge1_precision": 0.1506827740817673, "rouge1_precision_stderr": 0.001852883253242218, "rouge1_recall": 0.25686230777525454, "rouge1_recall_stderr": 0.00260278766504943, "rouge2_fmeasure": 0.03524633277968111, "rouge2_fmeasure_stderr": 0.0008101513042416649, "rouge2_precision": 0.02971277123757481, "rouge2_precision_stderr": 0.000707355818382236, "rouge2_recall": 0.05307915600648516, "rouge2_recall_stderr": 0.0013686059525554353, "rougeL_fmeasure": 0.13759253266149163, "rougeL_fmeasure_stderr": 0.001281432881200856, "rougeL_precision": 0.11575873439522533, "rougeL_precision_stderr": 0.0012737353859565011, "rougeL_recall": 0.2052922273561128, "rougeL_recall_stderr": 0.0021096414533161158, "rougeLsum_fmeasure": 0.16173441706919453, "rougeLsum_fmeasure_stderr": 0.0016450542485275806, "rougeLsum_precision": 0.1374963134041478, "rougeLsum_precision_stderr": 0.0016657330727227028, "rougeLsum_recall": 0.2362819209208244, "rougeLsum_recall_stderr": 0.002418312889630038}}, "1": {"tldr_en": {"bleu": 1.972919037460516, "bleu_stderr": 0.05682843274487135, "rouge1_fmeasure": 0.19185955437187688, "rouge1_fmeasure_stderr": 0.001834411329990832, "rouge1_precision": 0.1655651982573561, "rouge1_precision_stderr": 0.001967434204420624, "rouge1_recall": 0.2778367078547306, "rouge1_recall_stderr": 0.0026376055817718498, "rouge2_fmeasure": 0.04022404252550308, "rouge2_fmeasure_stderr": 0.0009067575883966806, "rouge2_precision": 0.03468172497365085, "rouge2_precision_stderr": 0.0008414429849505186, "rouge2_recall": 0.06032773756466252, "rouge2_recall_stderr": 0.0014755233968311602, "rougeL_fmeasure": 0.1394283655875047, "rougeL_fmeasure_stderr": 0.0012494508344161473, "rougeL_precision": 0.1190010376024555, "rougeL_precision_stderr": 0.0013201442960543233, "rougeL_recall": 0.20730876012944102, "rougeL_recall_stderr": 0.0020918631972439913, "rougeLsum_fmeasure": 0.1798108549879984, "rougeLsum_fmeasure_stderr": 0.00170035102934959, "rougeLsum_precision": 0.15494002660111156, "rougeLsum_precision_stderr": 0.001825015399111526, "rougeLsum_recall": 0.26137076597079567, "rougeLsum_recall_stderr": 0.002487191761867969}}, "2": {"tldr_en": {"bleu": 2.2743708557193902, "bleu_stderr": 0.06189529268097383, "rouge1_fmeasure": 0.20283722636774154, "rouge1_fmeasure_stderr": 0.001838028419312966, "rouge1_precision": 0.17638523863069297, "rouge1_precision_stderr": 0.002020755519678356, "rouge1_recall": 0.2911840829982234, "rouge1_recall_stderr": 0.0026772675380155376, "rouge2_fmeasure": 0.04709721853020564, "rouge2_fmeasure_stderr": 0.000949364569447851, "rouge2_precision": 0.04064975395757813, "rouge2_precision_stderr": 0.0008728139678303188, "rouge2_recall": 0.0700705095693168, "rouge2_recall_stderr": 0.0016162550664557144, "rougeL_fmeasure": 0.14915790559695724, "rougeL_fmeasure_stderr": 0.001266186326972188, "rougeL_precision": 0.128540063496423, "rougeL_precision_stderr": 0.00138883173221342, "rougeL_recall": 0.21916109079967522, "rougeL_recall_stderr": 0.002145845723677153, "rougeLsum_fmeasure": 0.1899159372074719, "rougeLsum_fmeasure_stderr": 0.0017127052022618613, "rougeLsum_precision": 0.16486271659279425, "rougeLsum_precision_stderr": 0.0018790326981489205, "rougeLsum_recall": 0.2736737223320281, "rougeLsum_recall_stderr": 0.0025545391606520626}}, "3": {"tldr_en": {"bleu": 2.261325582734008, "bleu_stderr": 0.07440671017406066, "rouge1_fmeasure": 0.17007228676988045, "rouge1_fmeasure_stderr": 0.0021295726072665254, "rouge1_precision": 0.15457296707448556, "rouge1_precision_stderr": 0.0023843459800257866, "rouge1_recall": 0.24244601688859282, "rouge1_recall_stderr": 0.0031047631304705336, "rouge2_fmeasure": 0.039320031366549095, "rouge2_fmeasure_stderr": 0.0009156298192238671, "rouge2_precision": 0.03563497026047448, "rouge2_precision_stderr": 0.0009697902407615488, "rouge2_recall": 0.05775187966143085, "rouge2_recall_stderr": 0.0014973785566537818, "rougeL_fmeasure": 0.12582015691412737, "rougeL_fmeasure_stderr": 0.0015236321853914047, "rougeL_precision": 0.11385012472654804, "rougeL_precision_stderr": 0.0017484820235318678, "rougeL_recall": 0.18331103625638695, "rougeL_recall_stderr": 0.0024579268481887276, "rougeLsum_fmeasure": 0.15899932831508662, "rougeLsum_fmeasure_stderr": 0.0019839606960568193, "rougeLsum_precision": 0.1443112386465537, "rougeLsum_precision_stderr": 0.002222016297651524, "rougeLsum_recall": 0.22741868584839184, "rougeLsum_recall_stderr": 0.0029377295753792703}}, "4": {"tldr_en": {"bleu": 0.5600187004785872, "bleu_stderr": 0.034329899401054034, "rouge1_fmeasure": 0.054359788577698374, "rouge1_fmeasure_stderr": 0.001834775379938349, "rouge1_precision": 0.05049412311337655, "rouge1_precision_stderr": 0.0018775705845440543, "rouge1_recall": 0.07991007309327065, "rouge1_recall_stderr": 0.002748908637336395, "rouge2_fmeasure": 0.01267310048414024, "rouge2_fmeasure_stderr": 0.0006260839320397936, "rouge2_precision": 0.01142264448143536, "rouge2_precision_stderr": 0.0006812961216581213, "rouge2_recall": 0.02008142422629143, "rouge2_recall_stderr": 0.0010993317750224779, "rougeL_fmeasure": 0.04187829090557222, "rougeL_fmeasure_stderr": 0.001389559278464916, "rougeL_precision": 0.038958076750695146, "rougeL_precision_stderr": 0.0014576150060918941, "rougeL_recall": 0.0629580148095348, "rougeL_recall_stderr": 0.002201566030491766, "rougeLsum_fmeasure": 0.05081727487259366, "rougeLsum_fmeasure_stderr": 0.0017142240431756424, "rougeLsum_precision": 0.047269915920336, "rougeLsum_precision_stderr": 0.0017705102362424203, "rougeLsum_recall": 0.07497632668531767, "rougeLsum_recall_stderr": 0.002589565865103579}}}, "e2e_nlg_cleaned": {"0": {"generate_text_restaurant": {"bleu": 0.2918074829245511, "bleu_stderr": 0.03334721114977758, "rouge1_fmeasure": 0.1600243176507681, "rouge1_fmeasure_stderr": 0.0010733278398958096, "rouge1_precision": 0.12561487707508034, "rouge1_precision_stderr": 0.0013825187846368096, "rouge1_recall": 0.25305668157788624, "rouge1_recall_stderr": 0.0013315623225900873, "rouge2_fmeasure": 0.033687666872503644, "rouge2_fmeasure_stderr": 0.0005133425312314838, "rouge2_precision": 0.026668315991920277, "rouge2_precision_stderr": 0.0005306038751357611, "rouge2_recall": 0.05412553888370808, "rouge2_recall_stderr": 0.0007946156915472112, "rougeL_fmeasure": 0.15622553271845338, "rougeL_fmeasure_stderr": 0.0010043221686026895, "rougeL_precision": 0.12177712671014992, "rougeL_precision_stderr": 0.001240068402500465, "rougeL_recall": 0.24840753894632903, "rougeL_recall_stderr": 0.001318527645790754, "rougeLsum_fmeasure": 0.13789602591344347, "rougeLsum_fmeasure_stderr": 0.0009870480569216915, "rougeLsum_precision": 0.10841189553256889, "rougeLsum_precision_stderr": 0.0012618615939597366, "rougeLsum_recall": 0.21839285442742476, "rougeLsum_recall_stderr": 0.0012454131726385412}}, "1": {"generate_text_restaurant": {"bleu": 9.67590369741921, "bleu_stderr": 0.09901656476390079, "rouge1_fmeasure": 0.39957774138885144, "rouge1_fmeasure_stderr": 0.0022278577374241887, "rouge1_precision": 0.45349765710233814, "rouge1_precision_stderr": 0.0029408652429081406, "rouge1_recall": 0.39709628674803366, "rouge1_recall_stderr": 0.002799520545788188, "rouge2_fmeasure": 0.16855819384473125, "rouge2_fmeasure_stderr": 0.001773629505406216, "rouge2_precision": 0.19448143773761467, "rouge2_precision_stderr": 0.002225394284814737, "rouge2_recall": 0.16778603132296563, "rouge2_recall_stderr": 0.0019654958159185396, "rougeL_fmeasure": 0.2890014884493394, "rougeL_fmeasure_stderr": 0.0018207916834884527, "rougeL_precision": 0.33043423858845145, "rougeL_precision_stderr": 0.0024856924454187785, "rougeL_recall": 0.2872561015612187, "rougeL_recall_stderr": 0.0022337982582298155, "rougeLsum_fmeasure": 0.3277959949146152, "rougeLsum_fmeasure_stderr": 0.0021102604579282852, "rougeLsum_precision": 0.3738597805796585, "rougeLsum_precision_stderr": 0.002773351046379512, "rougeLsum_recall": 0.3249821747731812, "rougeLsum_recall_stderr": 0.002515405285337629}}, "2": {"generate_text_restaurant": {"bleu": 10.91262584267496, "bleu_stderr": 0.12763668087089558, "rouge1_fmeasure": 0.4158184072582917, "rouge1_fmeasure_stderr": 0.0021875821824657578, "rouge1_precision": 0.4745007823219277, "rouge1_precision_stderr": 0.003096779743530949, "rouge1_recall": 0.41114421373702703, "rouge1_recall_stderr": 0.002795411314326619, "rouge2_fmeasure": 0.18538437537983646, "rouge2_fmeasure_stderr": 0.001799521509176773, "rouge2_precision": 0.21433784297140893, "rouge2_precision_stderr": 0.002331836767735763, "rouge2_recall": 0.18401740949966608, "rouge2_recall_stderr": 0.002021878491927928, "rougeL_fmeasure": 0.30338837358825094, "rougeL_fmeasure_stderr": 0.001840114059497487, "rougeL_precision": 0.34796254903830837, "rougeL_precision_stderr": 0.002621056636644695, "rougeL_recall": 0.2999843023857228, "rougeL_recall_stderr": 0.002269391756360346, "rougeLsum_fmeasure": 0.3438444001935511, "rougeLsum_fmeasure_stderr": 0.0020872281136323037, "rougeLsum_precision": 0.39290245826296444, "rougeLsum_precision_stderr": 0.0028605438328826085, "rougeLsum_recall": 0.3399505800342592, "rougeLsum_recall_stderr": 0.0025542752245032124}}, "3": {"generate_text_restaurant": {"bleu": 11.413052941435677, "bleu_stderr": 0.20476608953978004, "rouge1_fmeasure": 0.42003617329938386, "rouge1_fmeasure_stderr": 0.002154444681203657, "rouge1_precision": 0.4836426637277247, "rouge1_precision_stderr": 0.003041696429497936, "rouge1_recall": 0.4077462553545315, "rouge1_recall_stderr": 0.002698117142879397, "rouge2_fmeasure": 0.19219003937378554, "rouge2_fmeasure_stderr": 0.0018883864751046853, "rouge2_precision": 0.22368030301486044, "rouge2_precision_stderr": 0.0024069791642568504, "rouge2_recall": 0.18735894030210265, "rouge2_recall_stderr": 0.0020871489281372663, "rougeL_fmeasure": 0.31324626443128384, "rougeL_fmeasure_stderr": 0.0019216880227233767, "rougeL_precision": 0.36189364939827273, "rougeL_precision_stderr": 0.0026889223009721307, "rougeL_recall": 0.30429571782108805, "rougeL_recall_stderr": 0.0023058551990661367, "rougeLsum_fmeasure": 0.35143133030157764, "rougeLsum_fmeasure_stderr": 0.002142111791450537, "rougeLsum_precision": 0.40514213772293617, "rougeLsum_precision_stderr": 0.0029172807069507275, "rougeLsum_recall": 0.341129701516605, "rougeLsum_recall_stderr": 0.0025465447925427515}}}, "gem_xsum": {"0": {"article_DOC_summary": {"bleu": 1.7452599273208484, "bleu_stderr": 0.0710626335421352, "rouge1_fmeasure": 0.2032531810901225, "rouge1_fmeasure_stderr": 0.002372876068699817, "rouge1_precision": 0.14818700118728248, "rouge1_precision_stderr": 0.0018676651463230222, "rouge1_recall": 0.34586554355363025, "rouge1_recall_stderr": 0.0041641658204401725, "rouge2_fmeasure": 0.04347043245633625, "rouge2_fmeasure_stderr": 0.001442207400667641, "rouge2_precision": 0.03119345685386172, "rouge2_precision_stderr": 0.0010373843022792317, "rouge2_recall": 0.07667587881403153, "rouge2_recall_stderr": 0.002653660348427432, "rougeL_fmeasure": 0.1528350870269447, "rougeL_fmeasure_stderr": 0.0017715223191776047, "rougeL_precision": 0.11119723252661128, "rougeL_precision_stderr": 0.0013663860116280308, "rougeL_recall": 0.2616100987791669, "rougeL_recall_stderr": 0.003275713171944408, "rougeLsum_fmeasure": 0.16043210769047098, "rougeLsum_fmeasure_stderr": 0.0020055581696195052, "rougeLsum_precision": 0.11658163341774634, "rougeLsum_precision_stderr": 0.0015235968275521983, "rougeLsum_recall": 0.275178892490744, "rougeLsum_recall_stderr": 0.0036968416298391823}}, "1": {"article_DOC_summary": {"bleu": 1.1604575025128918, "bleu_stderr": 0.0877746649601935, "rouge1_fmeasure": 0.16834116026562992, "rouge1_fmeasure_stderr": 0.0023007208182279906, "rouge1_precision": 0.11944768188603072, "rouge1_precision_stderr": 0.0017101747872860968, "rouge1_recall": 0.2967764173333572, "rouge1_recall_stderr": 0.003944563464315701, "rouge2_fmeasure": 0.030277850873655133, "rouge2_fmeasure_stderr": 0.0012385139115491765, "rouge2_precision": 0.021232879052259627, "rouge2_precision_stderr": 0.0008714791712295506, "rouge2_recall": 0.05526393564583485, "rouge2_recall_stderr": 0.0023240893343291387, "rougeL_fmeasure": 0.12978985120588027, "rougeL_fmeasure_stderr": 0.0017321643764184145, "rougeL_precision": 0.09192594935218694, "rougeL_precision_stderr": 0.0012769463345594439, "rougeL_recall": 0.23019911126154574, "rougeL_recall_stderr": 0.003091431421711302, "rougeLsum_fmeasure": 0.13564693066989447, "rougeLsum_fmeasure_stderr": 0.0018863533793062786, "rougeLsum_precision": 0.09602849667357606, "rougeLsum_precision_stderr": 0.001386138103889359, "rougeLsum_recall": 0.2407455765901895, "rougeLsum_recall_stderr": 0.003358547528460653}}, "2": {"article_DOC_summary": {"bleu": 1.1805459815971808, "bleu_stderr": 0.08801398952015627, "rouge1_fmeasure": 0.16960096474510714, "rouge1_fmeasure_stderr": 0.002303219538316631, "rouge1_precision": 0.12017928149855549, "rouge1_precision_stderr": 0.0017155806595076542, "rouge1_recall": 0.3000656582759754, "rouge1_recall_stderr": 0.003942220611475929, "rouge2_fmeasure": 0.03015426920788573, "rouge2_fmeasure_stderr": 0.001263243793733084, "rouge2_precision": 0.02115270486812927, "rouge2_precision_stderr": 0.0008856577807583306, "rouge2_recall": 0.054787883745542026, "rouge2_recall_stderr": 0.0023745228517959276, "rougeL_fmeasure": 0.131124075957363, "rougeL_fmeasure_stderr": 0.0017424293214564424, "rougeL_precision": 0.0927144051400674, "rougeL_precision_stderr": 0.0012857006594715614, "rougeL_recall": 0.23362729816209124, "rougeL_recall_stderr": 0.0031066367525268844, "rougeLsum_fmeasure": 0.1367233861657065, "rougeLsum_fmeasure_stderr": 0.0018905430841393567, "rougeLsum_precision": 0.09669042239875854, "rougeLsum_precision_stderr": 0.0013887653900610069, "rougeLsum_recall": 0.24335819875183146, "rougeLsum_recall_stderr": 0.003364341261182289}}, "3": {"article_DOC_summary": {"bleu": 1.2117893477080361, "bleu_stderr": 0.1149778380988921, "rouge1_fmeasure": 0.15889837582063066, "rouge1_fmeasure_stderr": 0.0024042734536394335, "rouge1_precision": 0.11536457953211869, "rouge1_precision_stderr": 0.0019139955193594944, "rouge1_recall": 0.2768098516237596, "rouge1_recall_stderr": 0.004205225171304428, "rouge2_fmeasure": 0.028265095806897757, "rouge2_fmeasure_stderr": 0.0012619861741006862, "rouge2_precision": 0.020015331184403896, "rouge2_precision_stderr": 0.0009021836723852284, "rouge2_recall": 0.05127981526296904, "rouge2_recall_stderr": 0.002364022279427071, "rougeL_fmeasure": 0.12453970375141683, "rougeL_fmeasure_stderr": 0.0018237860596141172, "rougeL_precision": 0.08997967190382496, "rougeL_precision_stderr": 0.0014235921883162194, "rougeL_recall": 0.21857370135939078, "rougeL_recall_stderr": 0.003310622426465869, "rougeLsum_fmeasure": 0.1276545465727294, "rougeLsum_fmeasure_stderr": 0.001972377201117121, "rougeLsum_precision": 0.09224753114626984, "rougeLsum_precision_stderr": 0.0015272098267094347, "rougeLsum_recall": 0.2238960772414025, "rougeLsum_recall_stderr": 0.003566353512085928}}}}
|
|
|
1 |
+
{"GEM/web_nlg_en": {"0": {"PALM_prompt": {"bleu": 0.3003906758006876, "bleu_stderr": 0.027331800335698254, "rouge1_fmeasure": 0.10522756302657624, "rouge1_fmeasure_stderr": 0.0020564917165852665, "rouge1_precision": 0.06963270011987571, "rouge1_precision_stderr": 0.0016168408610962013, "rouge1_recall": 0.29729464197458877, "rouge1_recall_stderr": 0.0047714353450774415, "rouge2_fmeasure": 0.048503194247737774, "rouge2_fmeasure_stderr": 0.001258248656235562, "rouge2_precision": 0.03200346886185345, "rouge2_precision_stderr": 0.000957625394662931, "rouge2_recall": 0.140470675875411, "rouge2_recall_stderr": 0.0032183793551318674, "rougeL_fmeasure": 0.10106959021191211, "rougeL_fmeasure_stderr": 0.0018911974850607549, "rougeL_precision": 0.06646767562482549, "rougeL_precision_stderr": 0.0014351668395974513, "rougeL_recall": 0.28816568568111717, "rougeL_recall_stderr": 0.004622479804486804, "rougeLsum_fmeasure": 0.10010042995723817, "rougeLsum_fmeasure_stderr": 0.001929318666410727, "rougeLsum_precision": 0.06620461518234684, "rougeLsum_precision_stderr": 0.00151460835911578, "rougeLsum_recall": 0.282597952018841, "rougeLsum_recall_stderr": 0.004481787483630335}}, "1": {"PALM_prompt": {"bleu": 0.32423805540348105, "bleu_stderr": 0.024690542644449438, "rouge1_fmeasure": 0.10249916346044939, "rouge1_fmeasure_stderr": 0.0018656887279984073, "rouge1_precision": 0.06685340388470082, "rouge1_precision_stderr": 0.0015253847039623312, "rouge1_recall": 0.323514427623318, "rouge1_recall_stderr": 0.004639043635695871, "rouge2_fmeasure": 0.04633905642415022, "rouge2_fmeasure_stderr": 0.0011829999453798483, "rouge2_precision": 0.030000286950072674, "rouge2_precision_stderr": 0.0008643544653956381, "rouge2_recall": 0.14824220519105014, "rouge2_recall_stderr": 0.0031972015283258494, "rougeL_fmeasure": 0.09712356208435262, "rougeL_fmeasure_stderr": 0.0017347095170401298, "rougeL_precision": 0.06336375493461167, "rougeL_precision_stderr": 0.0014215770633909588, "rougeL_recall": 0.303094617033746, "rougeL_recall_stderr": 0.004191983337899737, "rougeLsum_fmeasure": 0.09830433934297841, "rougeLsum_fmeasure_stderr": 0.0017883137206561263, "rougeLsum_precision": 0.06416576148923457, "rougeLsum_precision_stderr": 0.0014659099244540854, "rougeLsum_recall": 0.30832609036704645, "rougeLsum_recall_stderr": 0.004339774784201872}}, "2": {"PALM_prompt": {"bleu": 0.33841996991287543, "bleu_stderr": 0.013366615983706327, "rouge1_fmeasure": 0.10597979981258299, "rouge1_fmeasure_stderr": 0.0017714336298557038, "rouge1_precision": 0.06765631897767002, "rouge1_precision_stderr": 0.0013046336481251101, "rouge1_recall": 0.3402361179206784, "rouge1_recall_stderr": 0.004704696452770623, "rouge2_fmeasure": 0.0482705113559789, "rouge2_fmeasure_stderr": 0.001132968575984557, "rouge2_precision": 0.03077023716855104, "rouge2_precision_stderr": 0.0008032546127820459, "rouge2_recall": 0.1606656566085052, "rouge2_recall_stderr": 0.0033693273708689585, "rougeL_fmeasure": 0.10046626150365084, "rougeL_fmeasure_stderr": 0.0016642034527337758, "rougeL_precision": 0.06412349572198735, "rougeL_precision_stderr": 0.0012122011908497355, "rougeL_recall": 0.31907776335571136, "rougeL_recall_stderr": 0.004279931218687767, "rougeLsum_fmeasure": 0.10141858866959458, "rougeLsum_fmeasure_stderr": 0.001700099907526394, "rougeLsum_precision": 0.06475854902642648, "rougeLsum_precision_stderr": 0.0012483946453731626, "rougeLsum_recall": 0.3241391405150877, "rougeLsum_recall_stderr": 0.004418718258324822}}, "3": {"PALM_prompt": {"bleu": 0.40584434052309054, "bleu_stderr": 0.02616462426887616, "rouge1_fmeasure": 0.10602175173953958, "rouge1_fmeasure_stderr": 0.001741987498486231, "rouge1_precision": 0.06757351073723121, "rouge1_precision_stderr": 0.0012679743548778455, "rouge1_recall": 0.3440037162848658, "rouge1_recall_stderr": 0.0049119409175133995, "rouge2_fmeasure": 0.0486263549461623, "rouge2_fmeasure_stderr": 0.0010900986383698252, "rouge2_precision": 0.030886249533870456, "rouge2_precision_stderr": 0.0007629220092981514, "rouge2_recall": 0.16556453686704345, "rouge2_recall_stderr": 0.0034492328057988348, "rougeL_fmeasure": 0.10011209012325656, "rougeL_fmeasure_stderr": 0.0016356941636110295, "rougeL_precision": 0.06384738095162408, "rougeL_precision_stderr": 0.0011862226633864972, "rougeL_recall": 0.3209539378169773, "rougeL_recall_stderr": 0.004410637240630364, "rougeLsum_fmeasure": 0.10161889625106496, "rougeLsum_fmeasure_stderr": 0.0016751216394668587, "rougeLsum_precision": 0.06483859720845632, "rougeLsum_precision_stderr": 0.0012202108024313588, "rougeLsum_recall": 0.3270171129897123, "rougeLsum_recall_stderr": 0.0045709512856975595}}, "4": {"PALM_prompt": {"bleu": 0.466035571971691, "bleu_stderr": 0.03473185881932128, "rouge1_fmeasure": 0.10915182875204862, "rouge1_fmeasure_stderr": 0.0017368992622529642, "rouge1_precision": 0.06948874429007668, "rouge1_precision_stderr": 0.00126969268323737, "rouge1_recall": 0.35504271560944006, "rouge1_recall_stderr": 0.004806804977630298, "rouge2_fmeasure": 0.05069482326429204, "rouge2_fmeasure_stderr": 0.0011084349852182986, "rouge2_precision": 0.032080423637990074, "rouge2_precision_stderr": 0.0007748948128844898, "rouge2_recall": 0.1749610451479523, "rouge2_recall_stderr": 0.0035756989094432607, "rougeL_fmeasure": 0.102624870740898, "rougeL_fmeasure_stderr": 0.0015972476492216417, "rougeL_precision": 0.06534721122278908, "rougeL_precision_stderr": 0.001160283231789772, "rougeL_recall": 0.33105657281482775, "rougeL_recall_stderr": 0.004330159547841201, "rougeLsum_fmeasure": 0.10445663963047473, "rougeLsum_fmeasure_stderr": 0.001657522536981241, "rougeLsum_precision": 0.06651082429118176, "rougeLsum_precision_stderr": 0.0012081627405732267, "rougeLsum_recall": 0.3391017904387808, "rougeLsum_recall_stderr": 0.004545927615205131}}, "5": {"PALM_prompt": {"bleu": 0.47567398731333893, "bleu_stderr": 0.024264359754404757, "rouge1_fmeasure": 0.10892336928116716, "rouge1_fmeasure_stderr": 0.0016854927070877707, "rouge1_precision": 0.0696852083209599, "rouge1_precision_stderr": 0.0013577090826179866, "rouge1_recall": 0.3612843042707264, "rouge1_recall_stderr": 0.004861238941500158, "rouge2_fmeasure": 0.05008036673402714, "rouge2_fmeasure_stderr": 0.0010613654934549248, "rouge2_precision": 0.0315904391098698, "rouge2_precision_stderr": 0.0007431193984384711, "rouge2_recall": 0.177085732300076, "rouge2_recall_stderr": 0.0035275863817017116, "rougeL_fmeasure": 0.10170780198533885, "rougeL_fmeasure_stderr": 0.001554714639457137, "rougeL_precision": 0.0651341368207596, "rougeL_precision_stderr": 0.0012591521998187908, "rougeL_recall": 0.33426205604441794, "rougeL_recall_stderr": 0.004323619187529501, "rougeLsum_fmeasure": 0.10372985125549432, "rougeLsum_fmeasure_stderr": 0.0016004866013084038, "rougeLsum_precision": 0.06639663258609937, "rougeLsum_precision_stderr": 0.001291170836697709, "rougeLsum_recall": 0.34334559580001384, "rougeLsum_recall_stderr": 0.0045556002073834055}}}, "GEM/wiki_lingua_en": {"0": {"tldr_en": {"bleu": 1.4622087249250515, "bleu_stderr": 0.04207910392007385, "rouge1_fmeasure": 0.1767157419817045, "rouge1_fmeasure_stderr": 0.0018123734169975135, "rouge1_precision": 0.1506827740817673, "rouge1_precision_stderr": 0.001852883253242218, "rouge1_recall": 0.25686230777525454, "rouge1_recall_stderr": 0.00260278766504943, "rouge2_fmeasure": 0.03524633277968111, "rouge2_fmeasure_stderr": 0.0008101513042416649, "rouge2_precision": 0.02971277123757481, "rouge2_precision_stderr": 0.000707355818382236, "rouge2_recall": 0.05307915600648516, "rouge2_recall_stderr": 0.0013686059525554353, "rougeL_fmeasure": 0.13759253266149163, "rougeL_fmeasure_stderr": 0.001281432881200856, "rougeL_precision": 0.11575873439522533, "rougeL_precision_stderr": 0.0012737353859565011, "rougeL_recall": 0.2052922273561128, "rougeL_recall_stderr": 0.0021096414533161158, "rougeLsum_fmeasure": 0.16173441706919453, "rougeLsum_fmeasure_stderr": 0.0016450542485275806, "rougeLsum_precision": 0.1374963134041478, "rougeLsum_precision_stderr": 0.0016657330727227028, "rougeLsum_recall": 0.2362819209208244, "rougeLsum_recall_stderr": 0.002418312889630038}}, "1": {"tldr_en": {"bleu": 1.972919037460516, "bleu_stderr": 0.05682843274487135, "rouge1_fmeasure": 0.19185955437187688, "rouge1_fmeasure_stderr": 0.001834411329990832, "rouge1_precision": 0.1655651982573561, "rouge1_precision_stderr": 0.001967434204420624, "rouge1_recall": 0.2778367078547306, "rouge1_recall_stderr": 0.0026376055817718498, "rouge2_fmeasure": 0.04022404252550308, "rouge2_fmeasure_stderr": 0.0009067575883966806, "rouge2_precision": 0.03468172497365085, "rouge2_precision_stderr": 0.0008414429849505186, "rouge2_recall": 0.06032773756466252, "rouge2_recall_stderr": 0.0014755233968311602, "rougeL_fmeasure": 0.1394283655875047, "rougeL_fmeasure_stderr": 0.0012494508344161473, "rougeL_precision": 0.1190010376024555, "rougeL_precision_stderr": 0.0013201442960543233, "rougeL_recall": 0.20730876012944102, "rougeL_recall_stderr": 0.0020918631972439913, "rougeLsum_fmeasure": 0.1798108549879984, "rougeLsum_fmeasure_stderr": 0.00170035102934959, "rougeLsum_precision": 0.15494002660111156, "rougeLsum_precision_stderr": 0.001825015399111526, "rougeLsum_recall": 0.26137076597079567, "rougeLsum_recall_stderr": 0.002487191761867969}}, "2": {"tldr_en": {"bleu": 2.2743708557193902, "bleu_stderr": 0.06189529268097383, "rouge1_fmeasure": 0.20283722636774154, "rouge1_fmeasure_stderr": 0.001838028419312966, "rouge1_precision": 0.17638523863069297, "rouge1_precision_stderr": 0.002020755519678356, "rouge1_recall": 0.2911840829982234, "rouge1_recall_stderr": 0.0026772675380155376, "rouge2_fmeasure": 0.04709721853020564, "rouge2_fmeasure_stderr": 0.000949364569447851, "rouge2_precision": 0.04064975395757813, "rouge2_precision_stderr": 0.0008728139678303188, "rouge2_recall": 0.0700705095693168, "rouge2_recall_stderr": 0.0016162550664557144, "rougeL_fmeasure": 0.14915790559695724, "rougeL_fmeasure_stderr": 0.001266186326972188, "rougeL_precision": 0.128540063496423, "rougeL_precision_stderr": 0.00138883173221342, "rougeL_recall": 0.21916109079967522, "rougeL_recall_stderr": 0.002145845723677153, "rougeLsum_fmeasure": 0.1899159372074719, "rougeLsum_fmeasure_stderr": 0.0017127052022618613, "rougeLsum_precision": 0.16486271659279425, "rougeLsum_precision_stderr": 0.0018790326981489205, "rougeLsum_recall": 0.2736737223320281, "rougeLsum_recall_stderr": 0.0025545391606520626}}, "3": {"tldr_en": {"bleu": 2.261325582734008, "bleu_stderr": 0.07440671017406066, "rouge1_fmeasure": 0.17007228676988045, "rouge1_fmeasure_stderr": 0.0021295726072665254, "rouge1_precision": 0.15457296707448556, "rouge1_precision_stderr": 0.0023843459800257866, "rouge1_recall": 0.24244601688859282, "rouge1_recall_stderr": 0.0031047631304705336, "rouge2_fmeasure": 0.039320031366549095, "rouge2_fmeasure_stderr": 0.0009156298192238671, "rouge2_precision": 0.03563497026047448, "rouge2_precision_stderr": 0.0009697902407615488, "rouge2_recall": 0.05775187966143085, "rouge2_recall_stderr": 0.0014973785566537818, "rougeL_fmeasure": 0.12582015691412737, "rougeL_fmeasure_stderr": 0.0015236321853914047, "rougeL_precision": 0.11385012472654804, "rougeL_precision_stderr": 0.0017484820235318678, "rougeL_recall": 0.18331103625638695, "rougeL_recall_stderr": 0.0024579268481887276, "rougeLsum_fmeasure": 0.15899932831508662, "rougeLsum_fmeasure_stderr": 0.0019839606960568193, "rougeLsum_precision": 0.1443112386465537, "rougeLsum_precision_stderr": 0.002222016297651524, "rougeLsum_recall": 0.22741868584839184, "rougeLsum_recall_stderr": 0.0029377295753792703}}, "4": {"tldr_en": {"bleu": 0.5600187004785872, "bleu_stderr": 0.034329899401054034, "rouge1_fmeasure": 0.054359788577698374, "rouge1_fmeasure_stderr": 0.001834775379938349, "rouge1_precision": 0.05049412311337655, "rouge1_precision_stderr": 0.0018775705845440543, "rouge1_recall": 0.07991007309327065, "rouge1_recall_stderr": 0.002748908637336395, "rouge2_fmeasure": 0.01267310048414024, "rouge2_fmeasure_stderr": 0.0006260839320397936, "rouge2_precision": 0.01142264448143536, "rouge2_precision_stderr": 0.0006812961216581213, "rouge2_recall": 0.02008142422629143, "rouge2_recall_stderr": 0.0010993317750224779, "rougeL_fmeasure": 0.04187829090557222, "rougeL_fmeasure_stderr": 0.001389559278464916, "rougeL_precision": 0.038958076750695146, "rougeL_precision_stderr": 0.0014576150060918941, "rougeL_recall": 0.0629580148095348, "rougeL_recall_stderr": 0.002201566030491766, "rougeLsum_fmeasure": 0.05081727487259366, "rougeLsum_fmeasure_stderr": 0.0017142240431756424, "rougeLsum_precision": 0.047269915920336, "rougeLsum_precision_stderr": 0.0017705102362424203, "rougeLsum_recall": 0.07497632668531767, "rougeLsum_recall_stderr": 0.002589565865103579}}, "5": {"tldr_en": {"bleu": 1.5732639330570601e-06, "bleu_stderr": 2.4552136635733976e-06, "rouge1_fmeasure": 0.008780004115444839, "rouge1_fmeasure_stderr": 0.0008275079317754659, "rouge1_precision": 0.008542236722467199, "rouge1_precision_stderr": 0.0008747857935002281, "rouge1_recall": 0.012984770525311046, "rouge1_recall_stderr": 0.0012451725161618523, "rouge2_fmeasure": 0.002147119319405267, "rouge2_fmeasure_stderr": 0.00027623822033813396, "rouge2_precision": 0.0020271291197224393, "rouge2_precision_stderr": 0.00029783255687868237, "rouge2_recall": 0.0032641829590180815, "rouge2_recall_stderr": 0.00043766038421818427, "rougeL_fmeasure": 0.006842107389797507, "rougeL_fmeasure_stderr": 0.0006383034087266817, "rougeL_precision": 0.006544489279360034, "rougeL_precision_stderr": 0.0006478926275365654, "rougeL_recall": 0.010413387705542463, "rougeL_recall_stderr": 0.001029849799449182, "rougeLsum_fmeasure": 0.00814217266519704, "rougeLsum_fmeasure_stderr": 0.0007660037005158112, "rougeLsum_precision": 0.00789904438059008, "rougeLsum_precision_stderr": 0.0008062204339147679, "rougeLsum_recall": 0.012088341751228384, "rougeLsum_recall_stderr": 0.0011647198145518685}}}, "e2e_nlg_cleaned": {"0": {"generate_text_restaurant": {"bleu": 0.2918074829245511, "bleu_stderr": 0.03334721114977758, "rouge1_fmeasure": 0.1600243176507681, "rouge1_fmeasure_stderr": 0.0010733278398958096, "rouge1_precision": 0.12561487707508034, "rouge1_precision_stderr": 0.0013825187846368096, "rouge1_recall": 0.25305668157788624, "rouge1_recall_stderr": 0.0013315623225900873, "rouge2_fmeasure": 0.033687666872503644, "rouge2_fmeasure_stderr": 0.0005133425312314838, "rouge2_precision": 0.026668315991920277, "rouge2_precision_stderr": 0.0005306038751357611, "rouge2_recall": 0.05412553888370808, "rouge2_recall_stderr": 0.0007946156915472112, "rougeL_fmeasure": 0.15622553271845338, "rougeL_fmeasure_stderr": 0.0010043221686026895, "rougeL_precision": 0.12177712671014992, "rougeL_precision_stderr": 0.001240068402500465, "rougeL_recall": 0.24840753894632903, "rougeL_recall_stderr": 0.001318527645790754, "rougeLsum_fmeasure": 0.13789602591344347, "rougeLsum_fmeasure_stderr": 0.0009870480569216915, "rougeLsum_precision": 0.10841189553256889, "rougeLsum_precision_stderr": 0.0012618615939597366, "rougeLsum_recall": 0.21839285442742476, "rougeLsum_recall_stderr": 0.0012454131726385412}}, "1": {"generate_text_restaurant": {"bleu": 9.67590369741921, "bleu_stderr": 0.09901656476390079, "rouge1_fmeasure": 0.39957774138885144, "rouge1_fmeasure_stderr": 0.0022278577374241887, "rouge1_precision": 0.45349765710233814, "rouge1_precision_stderr": 0.0029408652429081406, "rouge1_recall": 0.39709628674803366, "rouge1_recall_stderr": 0.002799520545788188, "rouge2_fmeasure": 0.16855819384473125, "rouge2_fmeasure_stderr": 0.001773629505406216, "rouge2_precision": 0.19448143773761467, "rouge2_precision_stderr": 0.002225394284814737, "rouge2_recall": 0.16778603132296563, "rouge2_recall_stderr": 0.0019654958159185396, "rougeL_fmeasure": 0.2890014884493394, "rougeL_fmeasure_stderr": 0.0018207916834884527, "rougeL_precision": 0.33043423858845145, "rougeL_precision_stderr": 0.0024856924454187785, "rougeL_recall": 0.2872561015612187, "rougeL_recall_stderr": 0.0022337982582298155, "rougeLsum_fmeasure": 0.3277959949146152, "rougeLsum_fmeasure_stderr": 0.0021102604579282852, "rougeLsum_precision": 0.3738597805796585, "rougeLsum_precision_stderr": 0.002773351046379512, "rougeLsum_recall": 0.3249821747731812, "rougeLsum_recall_stderr": 0.002515405285337629}}, "2": {"generate_text_restaurant": {"bleu": 10.91262584267496, "bleu_stderr": 0.12763668087089558, "rouge1_fmeasure": 0.4158184072582917, "rouge1_fmeasure_stderr": 0.0021875821824657578, "rouge1_precision": 0.4745007823219277, "rouge1_precision_stderr": 0.003096779743530949, "rouge1_recall": 0.41114421373702703, "rouge1_recall_stderr": 0.002795411314326619, "rouge2_fmeasure": 0.18538437537983646, "rouge2_fmeasure_stderr": 0.001799521509176773, "rouge2_precision": 0.21433784297140893, "rouge2_precision_stderr": 0.002331836767735763, "rouge2_recall": 0.18401740949966608, "rouge2_recall_stderr": 0.002021878491927928, "rougeL_fmeasure": 0.30338837358825094, "rougeL_fmeasure_stderr": 0.001840114059497487, "rougeL_precision": 0.34796254903830837, "rougeL_precision_stderr": 0.002621056636644695, "rougeL_recall": 0.2999843023857228, "rougeL_recall_stderr": 0.002269391756360346, "rougeLsum_fmeasure": 0.3438444001935511, "rougeLsum_fmeasure_stderr": 0.0020872281136323037, "rougeLsum_precision": 0.39290245826296444, "rougeLsum_precision_stderr": 0.0028605438328826085, "rougeLsum_recall": 0.3399505800342592, "rougeLsum_recall_stderr": 0.0025542752245032124}}, "3": {"generate_text_restaurant": {"bleu": 11.413052941435677, "bleu_stderr": 0.20476608953978004, "rouge1_fmeasure": 0.42003617329938386, "rouge1_fmeasure_stderr": 0.002154444681203657, "rouge1_precision": 0.4836426637277247, "rouge1_precision_stderr": 0.003041696429497936, "rouge1_recall": 0.4077462553545315, "rouge1_recall_stderr": 0.002698117142879397, "rouge2_fmeasure": 0.19219003937378554, "rouge2_fmeasure_stderr": 0.0018883864751046853, "rouge2_precision": 0.22368030301486044, "rouge2_precision_stderr": 0.0024069791642568504, "rouge2_recall": 0.18735894030210265, "rouge2_recall_stderr": 0.0020871489281372663, "rougeL_fmeasure": 0.31324626443128384, "rougeL_fmeasure_stderr": 0.0019216880227233767, "rougeL_precision": 0.36189364939827273, "rougeL_precision_stderr": 0.0026889223009721307, "rougeL_recall": 0.30429571782108805, "rougeL_recall_stderr": 0.0023058551990661367, "rougeLsum_fmeasure": 0.35143133030157764, "rougeLsum_fmeasure_stderr": 0.002142111791450537, "rougeLsum_precision": 0.40514213772293617, "rougeLsum_precision_stderr": 0.0029172807069507275, "rougeLsum_recall": 0.341129701516605, "rougeLsum_recall_stderr": 0.0025465447925427515}}, "4": {"generate_text_restaurant": {"bleu": 11.29834992541636, "bleu_stderr": 0.21750627906833597, "rouge1_fmeasure": 0.41733074609230864, "rouge1_fmeasure_stderr": 0.0021862637249920026, "rouge1_precision": 0.48699136269149845, "rouge1_precision_stderr": 0.0030725910034362365, "rouge1_recall": 0.39873667481511255, "rouge1_recall_stderr": 0.002641833719734696, "rouge2_fmeasure": 0.19240159571656937, "rouge2_fmeasure_stderr": 0.0018744046004783308, "rouge2_precision": 0.22696779992360078, "rouge2_precision_stderr": 0.002388090320486047, "rouge2_recall": 0.18435752956149218, "rouge2_recall_stderr": 0.002022279759231637, "rougeL_fmeasure": 0.31456390703859133, "rougeL_fmeasure_stderr": 0.0019506135524216602, "rougeL_precision": 0.3681077705552102, "rougeL_precision_stderr": 0.0027056437550490925, "rougeL_recall": 0.3004900799799888, "rougeL_recall_stderr": 0.002254464504837015, "rougeLsum_fmeasure": 0.35140686520524467, "rougeLsum_fmeasure_stderr": 0.002186992225871175, "rougeLsum_precision": 0.41003964030862644, "rougeLsum_precision_stderr": 0.002940274288350555, "rougeLsum_recall": 0.3361130546577144, "rougeLsum_recall_stderr": 0.0025408101507360696}}, "5": {"generate_text_restaurant": {"bleu": 11.269628945064296, "bleu_stderr": 0.15756104774510044, "rouge1_fmeasure": 0.41656475829237966, "rouge1_fmeasure_stderr": 0.0021719641372564386, "rouge1_precision": 0.4903478553495967, "rouge1_precision_stderr": 0.00308746448334887, "rouge1_recall": 0.39563780199750853, "rouge1_recall_stderr": 0.002621989888992289, "rouge2_fmeasure": 0.19301313468237216, "rouge2_fmeasure_stderr": 0.0018394274162893405, "rouge2_precision": 0.23014095309503432, "rouge2_precision_stderr": 0.0023899391576289145, "rouge2_recall": 0.18355072919581486, "rouge2_recall_stderr": 0.001968342781813886, "rougeL_fmeasure": 0.3150475005541494, "rougeL_fmeasure_stderr": 0.0019395006953993484, "rougeL_precision": 0.3721457131973453, "rougeL_precision_stderr": 0.0027221328586820265, "rougeL_recall": 0.29906088364914196, "rougeL_recall_stderr": 0.0022373123686497648, "rougeLsum_fmeasure": 0.3521891699869018, "rougeLsum_fmeasure_stderr": 0.0021596292682356695, "rougeLsum_precision": 0.41476921512799597, "rougeLsum_precision_stderr": 0.0029434738207341333, "rougeLsum_recall": 0.33464526234291403, "rougeLsum_recall_stderr": 0.002498553893092118}}}, "gem_xsum": {"0": {"article_DOC_summary": {"bleu": 1.7452599273208484, "bleu_stderr": 0.0710626335421352, "rouge1_fmeasure": 0.2032531810901225, "rouge1_fmeasure_stderr": 0.002372876068699817, "rouge1_precision": 0.14818700118728248, "rouge1_precision_stderr": 0.0018676651463230222, "rouge1_recall": 0.34586554355363025, "rouge1_recall_stderr": 0.0041641658204401725, "rouge2_fmeasure": 0.04347043245633625, "rouge2_fmeasure_stderr": 0.001442207400667641, "rouge2_precision": 0.03119345685386172, "rouge2_precision_stderr": 0.0010373843022792317, "rouge2_recall": 0.07667587881403153, "rouge2_recall_stderr": 0.002653660348427432, "rougeL_fmeasure": 0.1528350870269447, "rougeL_fmeasure_stderr": 0.0017715223191776047, "rougeL_precision": 0.11119723252661128, "rougeL_precision_stderr": 0.0013663860116280308, "rougeL_recall": 0.2616100987791669, "rougeL_recall_stderr": 0.003275713171944408, "rougeLsum_fmeasure": 0.16043210769047098, "rougeLsum_fmeasure_stderr": 0.0020055581696195052, "rougeLsum_precision": 0.11658163341774634, "rougeLsum_precision_stderr": 0.0015235968275521983, "rougeLsum_recall": 0.275178892490744, "rougeLsum_recall_stderr": 0.0036968416298391823}}, "1": {"article_DOC_summary": {"bleu": 1.1604575025128918, "bleu_stderr": 0.0877746649601935, "rouge1_fmeasure": 0.16834116026562992, "rouge1_fmeasure_stderr": 0.0023007208182279906, "rouge1_precision": 0.11944768188603072, "rouge1_precision_stderr": 0.0017101747872860968, "rouge1_recall": 0.2967764173333572, "rouge1_recall_stderr": 0.003944563464315701, "rouge2_fmeasure": 0.030277850873655133, "rouge2_fmeasure_stderr": 0.0012385139115491765, "rouge2_precision": 0.021232879052259627, "rouge2_precision_stderr": 0.0008714791712295506, "rouge2_recall": 0.05526393564583485, "rouge2_recall_stderr": 0.0023240893343291387, "rougeL_fmeasure": 0.12978985120588027, "rougeL_fmeasure_stderr": 0.0017321643764184145, "rougeL_precision": 0.09192594935218694, "rougeL_precision_stderr": 0.0012769463345594439, "rougeL_recall": 0.23019911126154574, "rougeL_recall_stderr": 0.003091431421711302, "rougeLsum_fmeasure": 0.13564693066989447, "rougeLsum_fmeasure_stderr": 0.0018863533793062786, "rougeLsum_precision": 0.09602849667357606, "rougeLsum_precision_stderr": 0.001386138103889359, "rougeLsum_recall": 0.2407455765901895, "rougeLsum_recall_stderr": 0.003358547528460653}}, "2": {"article_DOC_summary": {"bleu": 1.1805459815971808, "bleu_stderr": 0.08801398952015627, "rouge1_fmeasure": 0.16960096474510714, "rouge1_fmeasure_stderr": 0.002303219538316631, "rouge1_precision": 0.12017928149855549, "rouge1_precision_stderr": 0.0017155806595076542, "rouge1_recall": 0.3000656582759754, "rouge1_recall_stderr": 0.003942220611475929, "rouge2_fmeasure": 0.03015426920788573, "rouge2_fmeasure_stderr": 0.001263243793733084, "rouge2_precision": 0.02115270486812927, "rouge2_precision_stderr": 0.0008856577807583306, "rouge2_recall": 0.054787883745542026, "rouge2_recall_stderr": 0.0023745228517959276, "rougeL_fmeasure": 0.131124075957363, "rougeL_fmeasure_stderr": 0.0017424293214564424, "rougeL_precision": 0.0927144051400674, "rougeL_precision_stderr": 0.0012857006594715614, "rougeL_recall": 0.23362729816209124, "rougeL_recall_stderr": 0.0031066367525268844, "rougeLsum_fmeasure": 0.1367233861657065, "rougeLsum_fmeasure_stderr": 0.0018905430841393567, "rougeLsum_precision": 0.09669042239875854, "rougeLsum_precision_stderr": 0.0013887653900610069, "rougeLsum_recall": 0.24335819875183146, "rougeLsum_recall_stderr": 0.003364341261182289}}, "3": {"article_DOC_summary": {"bleu": 1.2117893477080361, "bleu_stderr": 0.1149778380988921, "rouge1_fmeasure": 0.15889837582063066, "rouge1_fmeasure_stderr": 0.0024042734536394335, "rouge1_precision": 0.11536457953211869, "rouge1_precision_stderr": 0.0019139955193594944, "rouge1_recall": 0.2768098516237596, "rouge1_recall_stderr": 0.004205225171304428, "rouge2_fmeasure": 0.028265095806897757, "rouge2_fmeasure_stderr": 0.0012619861741006862, "rouge2_precision": 0.020015331184403896, "rouge2_precision_stderr": 0.0009021836723852284, "rouge2_recall": 0.05127981526296904, "rouge2_recall_stderr": 0.002364022279427071, "rougeL_fmeasure": 0.12453970375141683, "rougeL_fmeasure_stderr": 0.0018237860596141172, "rougeL_precision": 0.08997967190382496, "rougeL_precision_stderr": 0.0014235921883162194, "rougeL_recall": 0.21857370135939078, "rougeL_recall_stderr": 0.003310622426465869, "rougeLsum_fmeasure": 0.1276545465727294, "rougeLsum_fmeasure_stderr": 0.001972377201117121, "rougeLsum_precision": 0.09224753114626984, "rougeLsum_precision_stderr": 0.0015272098267094347, "rougeLsum_recall": 0.2238960772414025, "rougeLsum_recall_stderr": 0.003566353512085928}}, "4": {"article_DOC_summary": {"bleu": 0.5192386395102333, "bleu_stderr": 0.13002341898619957, "rouge1_fmeasure": 0.04400316033856543, "rouge1_fmeasure_stderr": 0.002490349738342324, "rouge1_precision": 0.03793206438705544, "rouge1_precision_stderr": 0.0024366223683722097, "rouge1_recall": 0.06987736475836392, "rouge1_recall_stderr": 0.004064805072138278, "rouge2_fmeasure": 0.008063468290742252, "rouge2_fmeasure_stderr": 0.0007981358698909123, "rouge2_precision": 0.006979598507473323, "rouge2_precision_stderr": 0.0008901794799735351, "rouge2_recall": 0.013423754512870454, "rouge2_recall_stderr": 0.0013418442840833563, "rougeL_fmeasure": 0.034178653761551166, "rougeL_fmeasure_stderr": 0.0019416567180353596, "rougeL_precision": 0.030091382509093004, "rougeL_precision_stderr": 0.0020782922503738504, "rougeL_recall": 0.05458303198935503, "rougeL_recall_stderr": 0.003197618040713862, "rougeLsum_fmeasure": 0.036128933672296124, "rougeLsum_fmeasure_stderr": 0.0020726114306530577, "rougeLsum_precision": 0.03177113843460543, "rougeLsum_precision_stderr": 0.002181794130601312, "rougeLsum_recall": 0.05751126056018169, "rougeLsum_recall_stderr": 0.0033952763901923607}}, "5": {"article_DOC_summary": {"bleu": 9.778811073469078e-39, "bleu_stderr": 1.521718069448933e-33, "rouge1_fmeasure": 0.0022218780996264655, "rouge1_fmeasure_stderr": 0.0006107839354088714, "rouge1_precision": 0.0025333563065761205, "rouge1_precision_stderr": 0.0007200688467666427, "rouge1_recall": 0.0020475737491748073, "rouge1_recall_stderr": 0.0005527163320441051, "rouge2_fmeasure": 0.0001768729417917658, "rouge2_fmeasure_stderr": 8.939544334231789e-05, "rouge2_precision": 0.0002052993867644256, "rouge2_precision_stderr": 0.00010263779675566133, "rouge2_recall": 0.00015805735145357788, "rouge2_recall_stderr": 8.125011510560468e-05, "rougeL_fmeasure": 0.0016699276460120504, "rougeL_fmeasure_stderr": 0.0004477526803373312, "rougeL_precision": 0.0018767772604883414, "rougeL_precision_stderr": 0.0005169601507247577, "rougeL_recall": 0.0015653832711829878, "rougeL_recall_stderr": 0.0004179534887542103, "rougeLsum_fmeasure": 0.0016709735398329094, "rougeLsum_fmeasure_stderr": 0.00044431903582722767, "rougeLsum_precision": 0.0018647403070412589, "rougeLsum_precision_stderr": 0.0005045589409446839, "rougeLsum_recall": 0.001573237052621675, "rougeLsum_recall_stderr": 0.0004187928988454184}}}}
|
2b855b11bc4seed3/evaluation/generation/slim.2b855b11bc4seed3_GEM-web_nlg_en_PALM_prompt_4.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/web_nlg_en",
|
5 |
+
"prompt_name": "PALM_prompt",
|
6 |
+
"bleu": 0.466035571971691,
|
7 |
+
"dataset_path": "GEM/web_nlg",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"bleu_stderr": 0.03473185881932128
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/web_nlg_en",
|
14 |
+
"prompt_name": "PALM_prompt",
|
15 |
+
"rouge1_precision": 0.06948874429007668,
|
16 |
+
"dataset_path": "GEM/web_nlg",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_precision_stderr": 0.00126969268323737
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/web_nlg_en",
|
23 |
+
"prompt_name": "PALM_prompt",
|
24 |
+
"rouge1_recall": 0.35504271560944006,
|
25 |
+
"dataset_path": "GEM/web_nlg",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_recall_stderr": 0.004806804977630298
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/web_nlg_en",
|
32 |
+
"prompt_name": "PALM_prompt",
|
33 |
+
"rouge1_fmeasure": 0.10915182875204862,
|
34 |
+
"dataset_path": "GEM/web_nlg",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge1_fmeasure_stderr": 0.0017368992622529642
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/web_nlg_en",
|
41 |
+
"prompt_name": "PALM_prompt",
|
42 |
+
"rouge2_precision": 0.032080423637990074,
|
43 |
+
"dataset_path": "GEM/web_nlg",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_precision_stderr": 0.0007748948128844898
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/web_nlg_en",
|
50 |
+
"prompt_name": "PALM_prompt",
|
51 |
+
"rouge2_recall": 0.1749610451479523,
|
52 |
+
"dataset_path": "GEM/web_nlg",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_recall_stderr": 0.0035756989094432607
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/web_nlg_en",
|
59 |
+
"prompt_name": "PALM_prompt",
|
60 |
+
"rouge2_fmeasure": 0.05069482326429204,
|
61 |
+
"dataset_path": "GEM/web_nlg",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rouge2_fmeasure_stderr": 0.0011084349852182986
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/web_nlg_en",
|
68 |
+
"prompt_name": "PALM_prompt",
|
69 |
+
"rougeL_precision": 0.06534721122278908,
|
70 |
+
"dataset_path": "GEM/web_nlg",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_precision_stderr": 0.001160283231789772
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/web_nlg_en",
|
77 |
+
"prompt_name": "PALM_prompt",
|
78 |
+
"rougeL_recall": 0.33105657281482775,
|
79 |
+
"dataset_path": "GEM/web_nlg",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_recall_stderr": 0.004330159547841201
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/web_nlg_en",
|
86 |
+
"prompt_name": "PALM_prompt",
|
87 |
+
"rougeL_fmeasure": 0.102624870740898,
|
88 |
+
"dataset_path": "GEM/web_nlg",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeL_fmeasure_stderr": 0.0015972476492216417
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/web_nlg_en",
|
95 |
+
"prompt_name": "PALM_prompt",
|
96 |
+
"rougeLsum_precision": 0.06651082429118176,
|
97 |
+
"dataset_path": "GEM/web_nlg",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_precision_stderr": 0.0012081627405732267
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/web_nlg_en",
|
104 |
+
"prompt_name": "PALM_prompt",
|
105 |
+
"rougeLsum_recall": 0.3391017904387808,
|
106 |
+
"dataset_path": "GEM/web_nlg",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_recall_stderr": 0.004545927615205131
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/web_nlg_en",
|
113 |
+
"prompt_name": "PALM_prompt",
|
114 |
+
"rougeLsum_fmeasure": 0.10445663963047473,
|
115 |
+
"dataset_path": "GEM/web_nlg",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"rougeLsum_fmeasure_stderr": 0.001657522536981241
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b11bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 4,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
2b855b11bc4seed3/evaluation/generation/slim.2b855b11bc4seed3_GEM-web_nlg_en_PALM_prompt_5.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/web_nlg_en",
|
5 |
+
"prompt_name": "PALM_prompt",
|
6 |
+
"bleu": 0.47567398731333893,
|
7 |
+
"dataset_path": "GEM/web_nlg",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"bleu_stderr": 0.024264359754404757
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/web_nlg_en",
|
14 |
+
"prompt_name": "PALM_prompt",
|
15 |
+
"rouge1_precision": 0.0696852083209599,
|
16 |
+
"dataset_path": "GEM/web_nlg",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_precision_stderr": 0.0013577090826179866
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/web_nlg_en",
|
23 |
+
"prompt_name": "PALM_prompt",
|
24 |
+
"rouge1_recall": 0.3612843042707264,
|
25 |
+
"dataset_path": "GEM/web_nlg",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_recall_stderr": 0.004861238941500158
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/web_nlg_en",
|
32 |
+
"prompt_name": "PALM_prompt",
|
33 |
+
"rouge1_fmeasure": 0.10892336928116716,
|
34 |
+
"dataset_path": "GEM/web_nlg",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge1_fmeasure_stderr": 0.0016854927070877707
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/web_nlg_en",
|
41 |
+
"prompt_name": "PALM_prompt",
|
42 |
+
"rouge2_precision": 0.0315904391098698,
|
43 |
+
"dataset_path": "GEM/web_nlg",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_precision_stderr": 0.0007431193984384711
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/web_nlg_en",
|
50 |
+
"prompt_name": "PALM_prompt",
|
51 |
+
"rouge2_recall": 0.177085732300076,
|
52 |
+
"dataset_path": "GEM/web_nlg",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_recall_stderr": 0.0035275863817017116
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/web_nlg_en",
|
59 |
+
"prompt_name": "PALM_prompt",
|
60 |
+
"rouge2_fmeasure": 0.05008036673402714,
|
61 |
+
"dataset_path": "GEM/web_nlg",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rouge2_fmeasure_stderr": 0.0010613654934549248
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/web_nlg_en",
|
68 |
+
"prompt_name": "PALM_prompt",
|
69 |
+
"rougeL_precision": 0.0651341368207596,
|
70 |
+
"dataset_path": "GEM/web_nlg",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_precision_stderr": 0.0012591521998187908
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/web_nlg_en",
|
77 |
+
"prompt_name": "PALM_prompt",
|
78 |
+
"rougeL_recall": 0.33426205604441794,
|
79 |
+
"dataset_path": "GEM/web_nlg",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_recall_stderr": 0.004323619187529501
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/web_nlg_en",
|
86 |
+
"prompt_name": "PALM_prompt",
|
87 |
+
"rougeL_fmeasure": 0.10170780198533885,
|
88 |
+
"dataset_path": "GEM/web_nlg",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeL_fmeasure_stderr": 0.001554714639457137
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/web_nlg_en",
|
95 |
+
"prompt_name": "PALM_prompt",
|
96 |
+
"rougeLsum_precision": 0.06639663258609937,
|
97 |
+
"dataset_path": "GEM/web_nlg",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_precision_stderr": 0.001291170836697709
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/web_nlg_en",
|
104 |
+
"prompt_name": "PALM_prompt",
|
105 |
+
"rougeLsum_recall": 0.34334559580001384,
|
106 |
+
"dataset_path": "GEM/web_nlg",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_recall_stderr": 0.0045556002073834055
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/web_nlg_en",
|
113 |
+
"prompt_name": "PALM_prompt",
|
114 |
+
"rougeLsum_fmeasure": 0.10372985125549432,
|
115 |
+
"dataset_path": "GEM/web_nlg",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"rougeLsum_fmeasure_stderr": 0.0016004866013084038
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b11bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 5,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
2b855b11bc4seed3/evaluation/generation/slim.2b855b11bc4seed3_GEM-wiki_lingua_en_tldr_en_5.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/wiki_lingua_en",
|
5 |
+
"prompt_name": "tldr_en",
|
6 |
+
"rouge1_precision": 0.008542236722467199,
|
7 |
+
"dataset_path": "GEM/wiki_lingua",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"rouge1_precision_stderr": 0.0008747857935002281
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/wiki_lingua_en",
|
14 |
+
"prompt_name": "tldr_en",
|
15 |
+
"rouge1_recall": 0.012984770525311046,
|
16 |
+
"dataset_path": "GEM/wiki_lingua",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_recall_stderr": 0.0012451725161618523
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/wiki_lingua_en",
|
23 |
+
"prompt_name": "tldr_en",
|
24 |
+
"rouge1_fmeasure": 0.008780004115444839,
|
25 |
+
"dataset_path": "GEM/wiki_lingua",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_fmeasure_stderr": 0.0008275079317754659
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/wiki_lingua_en",
|
32 |
+
"prompt_name": "tldr_en",
|
33 |
+
"rouge2_precision": 0.0020271291197224393,
|
34 |
+
"dataset_path": "GEM/wiki_lingua",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge2_precision_stderr": 0.00029783255687868237
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/wiki_lingua_en",
|
41 |
+
"prompt_name": "tldr_en",
|
42 |
+
"rouge2_recall": 0.0032641829590180815,
|
43 |
+
"dataset_path": "GEM/wiki_lingua",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_recall_stderr": 0.00043766038421818427
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/wiki_lingua_en",
|
50 |
+
"prompt_name": "tldr_en",
|
51 |
+
"rouge2_fmeasure": 0.002147119319405267,
|
52 |
+
"dataset_path": "GEM/wiki_lingua",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_fmeasure_stderr": 0.00027623822033813396
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/wiki_lingua_en",
|
59 |
+
"prompt_name": "tldr_en",
|
60 |
+
"rougeL_precision": 0.006544489279360034,
|
61 |
+
"dataset_path": "GEM/wiki_lingua",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rougeL_precision_stderr": 0.0006478926275365654
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/wiki_lingua_en",
|
68 |
+
"prompt_name": "tldr_en",
|
69 |
+
"rougeL_recall": 0.010413387705542463,
|
70 |
+
"dataset_path": "GEM/wiki_lingua",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_recall_stderr": 0.001029849799449182
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/wiki_lingua_en",
|
77 |
+
"prompt_name": "tldr_en",
|
78 |
+
"rougeL_fmeasure": 0.006842107389797507,
|
79 |
+
"dataset_path": "GEM/wiki_lingua",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_fmeasure_stderr": 0.0006383034087266817
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/wiki_lingua_en",
|
86 |
+
"prompt_name": "tldr_en",
|
87 |
+
"rougeLsum_precision": 0.00789904438059008,
|
88 |
+
"dataset_path": "GEM/wiki_lingua",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeLsum_precision_stderr": 0.0008062204339147679
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/wiki_lingua_en",
|
95 |
+
"prompt_name": "tldr_en",
|
96 |
+
"rougeLsum_recall": 0.012088341751228384,
|
97 |
+
"dataset_path": "GEM/wiki_lingua",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_recall_stderr": 0.0011647198145518685
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/wiki_lingua_en",
|
104 |
+
"prompt_name": "tldr_en",
|
105 |
+
"rougeLsum_fmeasure": 0.00814217266519704,
|
106 |
+
"dataset_path": "GEM/wiki_lingua",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_fmeasure_stderr": 0.0007660037005158112
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/wiki_lingua_en",
|
113 |
+
"prompt_name": "tldr_en",
|
114 |
+
"bleu": 1.5732639330570601e-06,
|
115 |
+
"dataset_path": "GEM/wiki_lingua",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"bleu_stderr": 2.4552136635733976e-06
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b11bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 5,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
2b855b11bc4seed3/evaluation/generation/slim.2b855b11bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_4.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "e2e_nlg_cleaned",
|
5 |
+
"prompt_name": "generate_text_restaurant",
|
6 |
+
"bleu": 11.29834992541636,
|
7 |
+
"dataset_path": "e2e_nlg_cleaned",
|
8 |
+
"dataset_name": null,
|
9 |
+
"subset": null,
|
10 |
+
"bleu_stderr": 0.21750627906833597
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "e2e_nlg_cleaned",
|
14 |
+
"prompt_name": "generate_text_restaurant",
|
15 |
+
"rouge1_precision": 0.48699136269149845,
|
16 |
+
"dataset_path": "e2e_nlg_cleaned",
|
17 |
+
"dataset_name": null,
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_precision_stderr": 0.0030725910034362365
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "e2e_nlg_cleaned",
|
23 |
+
"prompt_name": "generate_text_restaurant",
|
24 |
+
"rouge1_recall": 0.39873667481511255,
|
25 |
+
"dataset_path": "e2e_nlg_cleaned",
|
26 |
+
"dataset_name": null,
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_recall_stderr": 0.002641833719734696
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "e2e_nlg_cleaned",
|
32 |
+
"prompt_name": "generate_text_restaurant",
|
33 |
+
"rouge1_fmeasure": 0.41733074609230864,
|
34 |
+
"dataset_path": "e2e_nlg_cleaned",
|
35 |
+
"dataset_name": null,
|
36 |
+
"subset": null,
|
37 |
+
"rouge1_fmeasure_stderr": 0.0021862637249920026
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "e2e_nlg_cleaned",
|
41 |
+
"prompt_name": "generate_text_restaurant",
|
42 |
+
"rouge2_precision": 0.22696779992360078,
|
43 |
+
"dataset_path": "e2e_nlg_cleaned",
|
44 |
+
"dataset_name": null,
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_precision_stderr": 0.002388090320486047
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "e2e_nlg_cleaned",
|
50 |
+
"prompt_name": "generate_text_restaurant",
|
51 |
+
"rouge2_recall": 0.18435752956149218,
|
52 |
+
"dataset_path": "e2e_nlg_cleaned",
|
53 |
+
"dataset_name": null,
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_recall_stderr": 0.002022279759231637
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "e2e_nlg_cleaned",
|
59 |
+
"prompt_name": "generate_text_restaurant",
|
60 |
+
"rouge2_fmeasure": 0.19240159571656937,
|
61 |
+
"dataset_path": "e2e_nlg_cleaned",
|
62 |
+
"dataset_name": null,
|
63 |
+
"subset": null,
|
64 |
+
"rouge2_fmeasure_stderr": 0.0018744046004783308
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "e2e_nlg_cleaned",
|
68 |
+
"prompt_name": "generate_text_restaurant",
|
69 |
+
"rougeL_precision": 0.3681077705552102,
|
70 |
+
"dataset_path": "e2e_nlg_cleaned",
|
71 |
+
"dataset_name": null,
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_precision_stderr": 0.0027056437550490925
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "e2e_nlg_cleaned",
|
77 |
+
"prompt_name": "generate_text_restaurant",
|
78 |
+
"rougeL_recall": 0.3004900799799888,
|
79 |
+
"dataset_path": "e2e_nlg_cleaned",
|
80 |
+
"dataset_name": null,
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_recall_stderr": 0.002254464504837015
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "e2e_nlg_cleaned",
|
86 |
+
"prompt_name": "generate_text_restaurant",
|
87 |
+
"rougeL_fmeasure": 0.31456390703859133,
|
88 |
+
"dataset_path": "e2e_nlg_cleaned",
|
89 |
+
"dataset_name": null,
|
90 |
+
"subset": null,
|
91 |
+
"rougeL_fmeasure_stderr": 0.0019506135524216602
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "e2e_nlg_cleaned",
|
95 |
+
"prompt_name": "generate_text_restaurant",
|
96 |
+
"rougeLsum_precision": 0.41003964030862644,
|
97 |
+
"dataset_path": "e2e_nlg_cleaned",
|
98 |
+
"dataset_name": null,
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_precision_stderr": 0.002940274288350555
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "e2e_nlg_cleaned",
|
104 |
+
"prompt_name": "generate_text_restaurant",
|
105 |
+
"rougeLsum_recall": 0.3361130546577144,
|
106 |
+
"dataset_path": "e2e_nlg_cleaned",
|
107 |
+
"dataset_name": null,
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_recall_stderr": 0.0025408101507360696
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "e2e_nlg_cleaned",
|
113 |
+
"prompt_name": "generate_text_restaurant",
|
114 |
+
"rougeLsum_fmeasure": 0.35140686520524467,
|
115 |
+
"dataset_path": "e2e_nlg_cleaned",
|
116 |
+
"dataset_name": null,
|
117 |
+
"subset": null,
|
118 |
+
"rougeLsum_fmeasure_stderr": 0.002186992225871175
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b11bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 4,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
2b855b11bc4seed3/evaluation/generation/slim.2b855b11bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_5.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "e2e_nlg_cleaned",
|
5 |
+
"prompt_name": "generate_text_restaurant",
|
6 |
+
"bleu": 11.269628945064296,
|
7 |
+
"dataset_path": "e2e_nlg_cleaned",
|
8 |
+
"dataset_name": null,
|
9 |
+
"subset": null,
|
10 |
+
"bleu_stderr": 0.15756104774510044
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "e2e_nlg_cleaned",
|
14 |
+
"prompt_name": "generate_text_restaurant",
|
15 |
+
"rouge1_precision": 0.4903478553495967,
|
16 |
+
"dataset_path": "e2e_nlg_cleaned",
|
17 |
+
"dataset_name": null,
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_precision_stderr": 0.00308746448334887
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "e2e_nlg_cleaned",
|
23 |
+
"prompt_name": "generate_text_restaurant",
|
24 |
+
"rouge1_recall": 0.39563780199750853,
|
25 |
+
"dataset_path": "e2e_nlg_cleaned",
|
26 |
+
"dataset_name": null,
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_recall_stderr": 0.002621989888992289
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "e2e_nlg_cleaned",
|
32 |
+
"prompt_name": "generate_text_restaurant",
|
33 |
+
"rouge1_fmeasure": 0.41656475829237966,
|
34 |
+
"dataset_path": "e2e_nlg_cleaned",
|
35 |
+
"dataset_name": null,
|
36 |
+
"subset": null,
|
37 |
+
"rouge1_fmeasure_stderr": 0.0021719641372564386
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "e2e_nlg_cleaned",
|
41 |
+
"prompt_name": "generate_text_restaurant",
|
42 |
+
"rouge2_precision": 0.23014095309503432,
|
43 |
+
"dataset_path": "e2e_nlg_cleaned",
|
44 |
+
"dataset_name": null,
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_precision_stderr": 0.0023899391576289145
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "e2e_nlg_cleaned",
|
50 |
+
"prompt_name": "generate_text_restaurant",
|
51 |
+
"rouge2_recall": 0.18355072919581486,
|
52 |
+
"dataset_path": "e2e_nlg_cleaned",
|
53 |
+
"dataset_name": null,
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_recall_stderr": 0.001968342781813886
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "e2e_nlg_cleaned",
|
59 |
+
"prompt_name": "generate_text_restaurant",
|
60 |
+
"rouge2_fmeasure": 0.19301313468237216,
|
61 |
+
"dataset_path": "e2e_nlg_cleaned",
|
62 |
+
"dataset_name": null,
|
63 |
+
"subset": null,
|
64 |
+
"rouge2_fmeasure_stderr": 0.0018394274162893405
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "e2e_nlg_cleaned",
|
68 |
+
"prompt_name": "generate_text_restaurant",
|
69 |
+
"rougeL_precision": 0.3721457131973453,
|
70 |
+
"dataset_path": "e2e_nlg_cleaned",
|
71 |
+
"dataset_name": null,
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_precision_stderr": 0.0027221328586820265
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "e2e_nlg_cleaned",
|
77 |
+
"prompt_name": "generate_text_restaurant",
|
78 |
+
"rougeL_recall": 0.29906088364914196,
|
79 |
+
"dataset_path": "e2e_nlg_cleaned",
|
80 |
+
"dataset_name": null,
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_recall_stderr": 0.0022373123686497648
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "e2e_nlg_cleaned",
|
86 |
+
"prompt_name": "generate_text_restaurant",
|
87 |
+
"rougeL_fmeasure": 0.3150475005541494,
|
88 |
+
"dataset_path": "e2e_nlg_cleaned",
|
89 |
+
"dataset_name": null,
|
90 |
+
"subset": null,
|
91 |
+
"rougeL_fmeasure_stderr": 0.0019395006953993484
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "e2e_nlg_cleaned",
|
95 |
+
"prompt_name": "generate_text_restaurant",
|
96 |
+
"rougeLsum_precision": 0.41476921512799597,
|
97 |
+
"dataset_path": "e2e_nlg_cleaned",
|
98 |
+
"dataset_name": null,
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_precision_stderr": 0.0029434738207341333
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "e2e_nlg_cleaned",
|
104 |
+
"prompt_name": "generate_text_restaurant",
|
105 |
+
"rougeLsum_recall": 0.33464526234291403,
|
106 |
+
"dataset_path": "e2e_nlg_cleaned",
|
107 |
+
"dataset_name": null,
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_recall_stderr": 0.002498553893092118
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "e2e_nlg_cleaned",
|
113 |
+
"prompt_name": "generate_text_restaurant",
|
114 |
+
"rougeLsum_fmeasure": 0.3521891699869018,
|
115 |
+
"dataset_path": "e2e_nlg_cleaned",
|
116 |
+
"dataset_name": null,
|
117 |
+
"subset": null,
|
118 |
+
"rougeLsum_fmeasure_stderr": 0.0021596292682356695
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b11bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 5,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
2b855b11bc4seed3/evaluation/generation/slim.2b855b11bc4seed3_gem_xsum_article_DOC_summary_4.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "gem_xsum",
|
5 |
+
"prompt_name": "article_DOC_summary",
|
6 |
+
"rouge1_precision": 0.03793206438705544,
|
7 |
+
"dataset_path": "GEM/xsum",
|
8 |
+
"dataset_name": null,
|
9 |
+
"subset": "",
|
10 |
+
"rouge1_precision_stderr": 0.0024366223683722097
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "gem_xsum",
|
14 |
+
"prompt_name": "article_DOC_summary",
|
15 |
+
"rouge1_recall": 0.06987736475836392,
|
16 |
+
"dataset_path": "GEM/xsum",
|
17 |
+
"dataset_name": null,
|
18 |
+
"subset": "",
|
19 |
+
"rouge1_recall_stderr": 0.004064805072138278
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "gem_xsum",
|
23 |
+
"prompt_name": "article_DOC_summary",
|
24 |
+
"rouge1_fmeasure": 0.04400316033856543,
|
25 |
+
"dataset_path": "GEM/xsum",
|
26 |
+
"dataset_name": null,
|
27 |
+
"subset": "",
|
28 |
+
"rouge1_fmeasure_stderr": 0.002490349738342324
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "gem_xsum",
|
32 |
+
"prompt_name": "article_DOC_summary",
|
33 |
+
"rouge2_precision": 0.006979598507473323,
|
34 |
+
"dataset_path": "GEM/xsum",
|
35 |
+
"dataset_name": null,
|
36 |
+
"subset": "",
|
37 |
+
"rouge2_precision_stderr": 0.0008901794799735351
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "gem_xsum",
|
41 |
+
"prompt_name": "article_DOC_summary",
|
42 |
+
"rouge2_recall": 0.013423754512870454,
|
43 |
+
"dataset_path": "GEM/xsum",
|
44 |
+
"dataset_name": null,
|
45 |
+
"subset": "",
|
46 |
+
"rouge2_recall_stderr": 0.0013418442840833563
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "gem_xsum",
|
50 |
+
"prompt_name": "article_DOC_summary",
|
51 |
+
"rouge2_fmeasure": 0.008063468290742252,
|
52 |
+
"dataset_path": "GEM/xsum",
|
53 |
+
"dataset_name": null,
|
54 |
+
"subset": "",
|
55 |
+
"rouge2_fmeasure_stderr": 0.0007981358698909123
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "gem_xsum",
|
59 |
+
"prompt_name": "article_DOC_summary",
|
60 |
+
"rougeL_precision": 0.030091382509093004,
|
61 |
+
"dataset_path": "GEM/xsum",
|
62 |
+
"dataset_name": null,
|
63 |
+
"subset": "",
|
64 |
+
"rougeL_precision_stderr": 0.0020782922503738504
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "gem_xsum",
|
68 |
+
"prompt_name": "article_DOC_summary",
|
69 |
+
"rougeL_recall": 0.05458303198935503,
|
70 |
+
"dataset_path": "GEM/xsum",
|
71 |
+
"dataset_name": null,
|
72 |
+
"subset": "",
|
73 |
+
"rougeL_recall_stderr": 0.003197618040713862
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "gem_xsum",
|
77 |
+
"prompt_name": "article_DOC_summary",
|
78 |
+
"rougeL_fmeasure": 0.034178653761551166,
|
79 |
+
"dataset_path": "GEM/xsum",
|
80 |
+
"dataset_name": null,
|
81 |
+
"subset": "",
|
82 |
+
"rougeL_fmeasure_stderr": 0.0019416567180353596
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "gem_xsum",
|
86 |
+
"prompt_name": "article_DOC_summary",
|
87 |
+
"rougeLsum_precision": 0.03177113843460543,
|
88 |
+
"dataset_path": "GEM/xsum",
|
89 |
+
"dataset_name": null,
|
90 |
+
"subset": "",
|
91 |
+
"rougeLsum_precision_stderr": 0.002181794130601312
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "gem_xsum",
|
95 |
+
"prompt_name": "article_DOC_summary",
|
96 |
+
"rougeLsum_recall": 0.05751126056018169,
|
97 |
+
"dataset_path": "GEM/xsum",
|
98 |
+
"dataset_name": null,
|
99 |
+
"subset": "",
|
100 |
+
"rougeLsum_recall_stderr": 0.0033952763901923607
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "gem_xsum",
|
104 |
+
"prompt_name": "article_DOC_summary",
|
105 |
+
"rougeLsum_fmeasure": 0.036128933672296124,
|
106 |
+
"dataset_path": "GEM/xsum",
|
107 |
+
"dataset_name": null,
|
108 |
+
"subset": "",
|
109 |
+
"rougeLsum_fmeasure_stderr": 0.0020726114306530577
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "gem_xsum",
|
113 |
+
"prompt_name": "article_DOC_summary",
|
114 |
+
"bleu": 0.5192386395102333,
|
115 |
+
"dataset_path": "GEM/xsum",
|
116 |
+
"dataset_name": null,
|
117 |
+
"subset": "",
|
118 |
+
"bleu_stderr": 0.13002341898619957
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b11bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 4,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
2b855b11bc4seed3/evaluation/generation/slim.2b855b11bc4seed3_gem_xsum_article_DOC_summary_5.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "gem_xsum",
|
5 |
+
"prompt_name": "article_DOC_summary",
|
6 |
+
"rouge1_precision": 0.0025333563065761205,
|
7 |
+
"dataset_path": "GEM/xsum",
|
8 |
+
"dataset_name": null,
|
9 |
+
"subset": "",
|
10 |
+
"rouge1_precision_stderr": 0.0007200688467666427
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "gem_xsum",
|
14 |
+
"prompt_name": "article_DOC_summary",
|
15 |
+
"rouge1_recall": 0.0020475737491748073,
|
16 |
+
"dataset_path": "GEM/xsum",
|
17 |
+
"dataset_name": null,
|
18 |
+
"subset": "",
|
19 |
+
"rouge1_recall_stderr": 0.0005527163320441051
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "gem_xsum",
|
23 |
+
"prompt_name": "article_DOC_summary",
|
24 |
+
"rouge1_fmeasure": 0.0022218780996264655,
|
25 |
+
"dataset_path": "GEM/xsum",
|
26 |
+
"dataset_name": null,
|
27 |
+
"subset": "",
|
28 |
+
"rouge1_fmeasure_stderr": 0.0006107839354088714
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "gem_xsum",
|
32 |
+
"prompt_name": "article_DOC_summary",
|
33 |
+
"rouge2_precision": 0.0002052993867644256,
|
34 |
+
"dataset_path": "GEM/xsum",
|
35 |
+
"dataset_name": null,
|
36 |
+
"subset": "",
|
37 |
+
"rouge2_precision_stderr": 0.00010263779675566133
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "gem_xsum",
|
41 |
+
"prompt_name": "article_DOC_summary",
|
42 |
+
"rouge2_recall": 0.00015805735145357788,
|
43 |
+
"dataset_path": "GEM/xsum",
|
44 |
+
"dataset_name": null,
|
45 |
+
"subset": "",
|
46 |
+
"rouge2_recall_stderr": 8.125011510560468e-05
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "gem_xsum",
|
50 |
+
"prompt_name": "article_DOC_summary",
|
51 |
+
"rouge2_fmeasure": 0.0001768729417917658,
|
52 |
+
"dataset_path": "GEM/xsum",
|
53 |
+
"dataset_name": null,
|
54 |
+
"subset": "",
|
55 |
+
"rouge2_fmeasure_stderr": 8.939544334231789e-05
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "gem_xsum",
|
59 |
+
"prompt_name": "article_DOC_summary",
|
60 |
+
"rougeL_precision": 0.0018767772604883414,
|
61 |
+
"dataset_path": "GEM/xsum",
|
62 |
+
"dataset_name": null,
|
63 |
+
"subset": "",
|
64 |
+
"rougeL_precision_stderr": 0.0005169601507247577
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "gem_xsum",
|
68 |
+
"prompt_name": "article_DOC_summary",
|
69 |
+
"rougeL_recall": 0.0015653832711829878,
|
70 |
+
"dataset_path": "GEM/xsum",
|
71 |
+
"dataset_name": null,
|
72 |
+
"subset": "",
|
73 |
+
"rougeL_recall_stderr": 0.0004179534887542103
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "gem_xsum",
|
77 |
+
"prompt_name": "article_DOC_summary",
|
78 |
+
"rougeL_fmeasure": 0.0016699276460120504,
|
79 |
+
"dataset_path": "GEM/xsum",
|
80 |
+
"dataset_name": null,
|
81 |
+
"subset": "",
|
82 |
+
"rougeL_fmeasure_stderr": 0.0004477526803373312
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "gem_xsum",
|
86 |
+
"prompt_name": "article_DOC_summary",
|
87 |
+
"rougeLsum_precision": 0.0018647403070412589,
|
88 |
+
"dataset_path": "GEM/xsum",
|
89 |
+
"dataset_name": null,
|
90 |
+
"subset": "",
|
91 |
+
"rougeLsum_precision_stderr": 0.0005045589409446839
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "gem_xsum",
|
95 |
+
"prompt_name": "article_DOC_summary",
|
96 |
+
"rougeLsum_recall": 0.001573237052621675,
|
97 |
+
"dataset_path": "GEM/xsum",
|
98 |
+
"dataset_name": null,
|
99 |
+
"subset": "",
|
100 |
+
"rougeLsum_recall_stderr": 0.0004187928988454184
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "gem_xsum",
|
104 |
+
"prompt_name": "article_DOC_summary",
|
105 |
+
"rougeLsum_fmeasure": 0.0016709735398329094,
|
106 |
+
"dataset_path": "GEM/xsum",
|
107 |
+
"dataset_name": null,
|
108 |
+
"subset": "",
|
109 |
+
"rougeLsum_fmeasure_stderr": 0.00044431903582722767
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "gem_xsum",
|
113 |
+
"prompt_name": "article_DOC_summary",
|
114 |
+
"bleu": 9.778811073469078e-39,
|
115 |
+
"dataset_path": "GEM/xsum",
|
116 |
+
"dataset_name": null,
|
117 |
+
"subset": "",
|
118 |
+
"bleu_stderr": 1.521718069448933e-33
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b11bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 5,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|