Commit
·
dec248e
1
Parent(s):
a13fb2c
Add eval
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +125 -0
- 2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_1.json +1 -0
- 2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_2.json +1 -0
- 2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_3.json +1 -0
- 2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_4.json +1 -0
- 2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_5.json +1 -0
- 2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_2.json +1 -0
- 2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_3.json +1 -0
- 2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_4.json +1 -0
- 2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_5.json +1 -0
- 2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_1.json +1 -0
- 2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_2.json +1 -0
- 2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_3.json +1 -0
- 2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_4.json +1 -0
- 2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_5.json +1 -0
- 2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_gem_xsum_article_DOC_summary_1.json +1 -0
- 2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_gem_xsum_article_DOC_summary_2.json +1 -0
- 2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_gem_xsum_article_DOC_summary_3.json +1 -0
- 2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_gem_xsum_article_DOC_summary_4.json +1 -0
- 2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_gem_xsum_article_DOC_summary_5.json +1 -0
- 2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_1.jsonl +3 -0
- 2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_2.jsonl +3 -0
- 2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_3.jsonl +3 -0
- 2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_4.jsonl +3 -0
- 2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_5.jsonl +3 -0
- 2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_2.jsonl +3 -0
- 2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_3.jsonl +3 -0
- 2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_4.jsonl +3 -0
- 2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_5.jsonl +3 -0
- 2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl +3 -0
- 2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl +3 -0
- 2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl +3 -0
- 2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl +3 -0
- 2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl +3 -0
- 2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_gem_xsum_article_DOC_summary_1.jsonl +3 -0
- 2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_gem_xsum_article_DOC_summary_2.jsonl +3 -0
- 2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_gem_xsum_article_DOC_summary_3.jsonl +3 -0
- 2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_gem_xsum_article_DOC_summary_4.jsonl +3 -0
- 2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_gem_xsum_article_DOC_summary_5.jsonl +3 -0
- 2b855b18bc4seed1/evaluation/generation/merged.csv +42 -4
- 2b855b18bc4seed1/evaluation/generation/merged.json +1 -1
- 2b855b18bc4seed1/evaluation/generation/slim.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_1.json +133 -0
- 2b855b18bc4seed1/evaluation/generation/slim.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_2.json +133 -0
- 2b855b18bc4seed1/evaluation/generation/slim.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_3.json +133 -0
- 2b855b18bc4seed1/evaluation/generation/slim.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_4.json +133 -0
- 2b855b18bc4seed1/evaluation/generation/slim.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_5.json +133 -0
- 2b855b18bc4seed1/evaluation/generation/slim.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_2.json +133 -0
- 2b855b18bc4seed1/evaluation/generation/slim.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_3.json +133 -0
- 2b855b18bc4seed1/evaluation/generation/slim.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_4.json +133 -0
- 2b855b18bc4seed1/evaluation/generation/slim.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_5.json +133 -0
.gitattributes
CHANGED
@@ -560,3 +560,128 @@ evaluation/seed2/generation/examples.limited=3000.model=seed2.task=GEM-wiki_ling
|
|
560 |
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
561 |
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
562 |
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
560 |
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
561 |
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
562 |
2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
563 |
+
2b855b18bc4seed4/evaluation/generation/examples.2b855b18bc4seed4_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
564 |
+
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
565 |
+
2b855b18bc4seed2/evaluation/generation/examples.2b855b18bc4seed2_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
566 |
+
2b855b18bc4seed2/evaluation/generation/examples.2b855b18bc4seed2_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
567 |
+
2b855b28bc4seed1/evaluation/generation/examples.2b855b28bc4seed1_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
568 |
+
2b855b18bc4seed4/evaluation/generation/examples.2b855b18bc4seed4_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
569 |
+
2b855b28bc4seed1/evaluation/generation/examples.2b855b28bc4seed1_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
570 |
+
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
571 |
+
2b855b18bc4seed2/evaluation/generation/examples.2b855b18bc4seed2_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
572 |
+
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
573 |
+
2b855b18bc4seed2/evaluation/generation/examples.2b855b18bc4seed2_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
574 |
+
2b855b18bc4seed4/evaluation/generation/examples.2b855b18bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
575 |
+
2b855b18bc4seed4/evaluation/generation/examples.2b855b18bc4seed4_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
576 |
+
2b855b28bc4seed2/evaluation/generation/examples.2b855b28bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
577 |
+
2b855b28bc4seed3/evaluation/generation/examples.2b855b28bc4seed3_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
578 |
+
2b855b18bc4seed4/evaluation/generation/examples.2b855b18bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
579 |
+
2b855b18bc4seed4/evaluation/generation/examples.2b855b18bc4seed4_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
580 |
+
2b855b28bc4seed1/evaluation/generation/examples.2b855b28bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
581 |
+
2b855b28bc4seed1/evaluation/generation/examples.2b855b28bc4seed1_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
582 |
+
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
583 |
+
2b855b18bc4seed2/evaluation/generation/examples.2b855b18bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
584 |
+
2b855b18bc4seed2/evaluation/generation/examples.2b855b18bc4seed2_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
585 |
+
2b855b18bc4seed2/evaluation/generation/examples.2b855b18bc4seed2_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
586 |
+
2b855b28bc4seed1/evaluation/generation/examples.2b855b28bc4seed1_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
587 |
+
2b855b28bc4seed1/evaluation/generation/examples.2b855b28bc4seed1_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
588 |
+
2b855b18bc4seed2/evaluation/generation/examples.2b855b18bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
589 |
+
2b855b18bc4seed4/evaluation/generation/examples.2b855b18bc4seed4_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
590 |
+
2b855b28bc4seed3/evaluation/generation/examples.2b855b28bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
591 |
+
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
592 |
+
2b855b18bc4seed4/evaluation/generation/examples.2b855b18bc4seed4_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
593 |
+
2b855b18bc4seed4/evaluation/generation/examples.2b855b18bc4seed4_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
594 |
+
2b855b18bc4seed4/evaluation/generation/examples.2b855b18bc4seed4_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
595 |
+
2b855b18bc4seed4/evaluation/generation/examples.2b855b18bc4seed4_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
596 |
+
2b855b18bc4seed4/evaluation/generation/examples.2b855b18bc4seed4_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
597 |
+
2b855b28bc4seed3/evaluation/generation/examples.2b855b28bc4seed3_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
598 |
+
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
599 |
+
2b855b18bc4seed2/evaluation/generation/examples.2b855b18bc4seed2_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
600 |
+
2b855b18bc4seed2/evaluation/generation/examples.2b855b18bc4seed2_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
601 |
+
2b855b28bc4seed1/evaluation/generation/examples.2b855b28bc4seed1_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
602 |
+
2b855b28bc4seed2/evaluation/generation/examples.2b855b28bc4seed2_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
603 |
+
2b855b28bc4seed2/evaluation/generation/examples.2b855b28bc4seed2_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
604 |
+
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
605 |
+
2b855b18bc4seed2/evaluation/generation/examples.2b855b18bc4seed2_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
606 |
+
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
607 |
+
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
608 |
+
2b855b18bc4seed4/evaluation/generation/examples.2b855b18bc4seed4_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
609 |
+
2b855b28bc4seed1/evaluation/generation/examples.2b855b28bc4seed1_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
610 |
+
2b855b28bc4seed2/evaluation/generation/examples.2b855b28bc4seed2_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
611 |
+
2b855b28bc4seed1/evaluation/generation/examples.2b855b28bc4seed1_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
612 |
+
2b855b28bc4seed2/evaluation/generation/examples.2b855b28bc4seed2_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
613 |
+
2b855b28bc4seed2/evaluation/generation/examples.2b855b28bc4seed2_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
614 |
+
2b855b18bc4seed2/evaluation/generation/examples.2b855b18bc4seed2_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
615 |
+
2b855b28bc4seed1/evaluation/generation/examples.2b855b28bc4seed1_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
616 |
+
2b855b28bc4seed2/evaluation/generation/examples.2b855b28bc4seed2_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
617 |
+
2b855b28bc4seed3/evaluation/generation/examples.2b855b28bc4seed3_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
618 |
+
2b855b18bc4seed2/evaluation/generation/examples.2b855b18bc4seed2_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
619 |
+
2b855b28bc4seed2/evaluation/generation/examples.2b855b28bc4seed2_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
620 |
+
2b855b28bc4seed3/evaluation/generation/examples.2b855b28bc4seed3_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
621 |
+
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
622 |
+
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
623 |
+
2b855b18bc4seed4/evaluation/generation/examples.2b855b18bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
624 |
+
2b855b18bc4seed4/evaluation/generation/examples.2b855b18bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
625 |
+
2b855b28bc4seed1/evaluation/generation/examples.2b855b28bc4seed1_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
626 |
+
2b855b28bc4seed1/evaluation/generation/examples.2b855b28bc4seed1_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
627 |
+
2b855b18bc4seed4/evaluation/generation/examples.2b855b18bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
628 |
+
2b855b28bc4seed1/evaluation/generation/examples.2b855b28bc4seed1_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
629 |
+
2b855b28bc4seed2/evaluation/generation/examples.2b855b28bc4seed2_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
630 |
+
2b855b28bc4seed2/evaluation/generation/examples.2b855b28bc4seed2_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
631 |
+
2b855b28bc4seed3/evaluation/generation/examples.2b855b28bc4seed3_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
632 |
+
2b855b18bc4seed2/evaluation/generation/examples.2b855b18bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
633 |
+
2b855b28bc4seed2/evaluation/generation/examples.2b855b28bc4seed2_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
634 |
+
2b855b18bc4seed2/evaluation/generation/examples.2b855b18bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
635 |
+
2b855b18bc4seed2/evaluation/generation/examples.2b855b18bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
636 |
+
2b855b18bc4seed4/evaluation/generation/examples.2b855b18bc4seed4_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
637 |
+
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
638 |
+
2b855b18bc4seed4/evaluation/generation/examples.2b855b18bc4seed4_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
639 |
+
2b855b28bc4seed1/evaluation/generation/examples.2b855b28bc4seed1_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
640 |
+
2b855b28bc4seed2/evaluation/generation/examples.2b855b28bc4seed2_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
641 |
+
2b855b28bc4seed2/evaluation/generation/examples.2b855b28bc4seed2_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
642 |
+
2b855b18bc4seed2/evaluation/generation/examples.2b855b18bc4seed2_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
643 |
+
2b855b28bc4seed1/evaluation/generation/examples.2b855b28bc4seed1_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
644 |
+
2b855b28bc4seed1/evaluation/generation/examples.2b855b28bc4seed1_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
645 |
+
2b855b28bc4seed2/evaluation/generation/examples.2b855b28bc4seed2_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
646 |
+
2b855b28bc4seed3/evaluation/generation/examples.2b855b28bc4seed3_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
647 |
+
2b855b18bc4seed4/evaluation/generation/examples.2b855b18bc4seed4_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
648 |
+
2b855b28bc4seed1/evaluation/generation/examples.2b855b28bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
649 |
+
2b855b28bc4seed2/evaluation/generation/examples.2b855b28bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
650 |
+
2b855b28bc4seed2/evaluation/generation/examples.2b855b28bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
651 |
+
2b855b28bc4seed3/evaluation/generation/examples.2b855b28bc4seed3_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
652 |
+
2b855b28bc4seed1/evaluation/generation/examples.2b855b28bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
653 |
+
2b855b28bc4seed1/evaluation/generation/examples.2b855b28bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
654 |
+
2b855b28bc4seed1/evaluation/generation/examples.2b855b28bc4seed1_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
655 |
+
2b855b28bc4seed2/evaluation/generation/examples.2b855b28bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
656 |
+
2b855b28bc4seed3/evaluation/generation/examples.2b855b28bc4seed3_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
657 |
+
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
658 |
+
2b855b18bc4seed2/evaluation/generation/examples.2b855b18bc4seed2_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
659 |
+
2b855b18bc4seed2/evaluation/generation/examples.2b855b18bc4seed2_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
660 |
+
2b855b28bc4seed1/evaluation/generation/examples.2b855b28bc4seed1_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
661 |
+
2b855b28bc4seed2/evaluation/generation/examples.2b855b28bc4seed2_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
662 |
+
2b855b28bc4seed3/evaluation/generation/examples.2b855b28bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
663 |
+
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
664 |
+
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
665 |
+
2b855b18bc4seed2/evaluation/generation/examples.2b855b18bc4seed2_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
666 |
+
2b855b28bc4seed1/evaluation/generation/examples.2b855b28bc4seed1_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
667 |
+
2b855b28bc4seed2/evaluation/generation/examples.2b855b28bc4seed2_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
668 |
+
2b855b28bc4seed2/evaluation/generation/examples.2b855b28bc4seed2_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
669 |
+
2b855b28bc4seed2/evaluation/generation/examples.2b855b28bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
670 |
+
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
671 |
+
2b855b18bc4seed2/evaluation/generation/examples.2b855b18bc4seed2_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
672 |
+
2b855b18bc4seed4/evaluation/generation/examples.2b855b18bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
673 |
+
2b855b18bc4seed4/evaluation/generation/examples.2b855b18bc4seed4_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
674 |
+
2b855b18bc4seed4/evaluation/generation/examples.2b855b18bc4seed4_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
675 |
+
2b855b28bc4seed1/evaluation/generation/examples.2b855b28bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
676 |
+
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
677 |
+
2b855b18bc4seed2/evaluation/generation/examples.2b855b18bc4seed2_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
678 |
+
2b855b28bc4seed1/evaluation/generation/examples.2b855b28bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
679 |
+
2b855b28bc4seed2/evaluation/generation/examples.2b855b28bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
680 |
+
2b855b28bc4seed2/evaluation/generation/examples.2b855b28bc4seed2_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
681 |
+
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
682 |
+
2b855b18bc4seed4/evaluation/generation/examples.2b855b18bc4seed4_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
683 |
+
2b855b28bc4seed2/evaluation/generation/examples.2b855b28bc4seed2_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
684 |
+
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
685 |
+
2b855b18bc4seed2/evaluation/generation/examples.2b855b18bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
686 |
+
2b855b18bc4seed2/evaluation/generation/examples.2b855b18bc4seed2_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
687 |
+
2b855b18bc4seed4/evaluation/generation/examples.2b855b18bc4seed4_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_1.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.440505975058967, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.03925926885568198}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.07303258540385325, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0013956948759738636}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.36507400240791016, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005207277796989448}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.11401113521512295, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0019030453976470602}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.03358434632060951, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.000842389625141015}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.17329341204300425, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0034450528353974098}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.05247257816338737, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011827703450859254}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.06874932087487405, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012613572237216058}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.3423137073673857, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004722054196952728}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.10741785748120532, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017301490825739295}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.0696948101540746, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001324079492115929}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.34613176577415133, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004811317119389049}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.10872891126332167, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018051794462450466}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b18bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_2.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.46666858091535174, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.018833413013635402}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.07341298925567588, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0013121366271023323}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.3800249377740647, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005112909181278494}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.11510966757275029, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0017858390071285543}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.03360825336454427, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007932424750607851}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.1862779416722373, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0036516850220037643}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.05294107579757583, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001118710876229426}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.0688709827206235, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001210999027849737}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.3529927082231747, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004590770405531237}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.10791868479229756, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016523165241163688}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.07001967212649517, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0012551792002818921}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.35981102966802486, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004723476880811747}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.10968844522778035, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017066636269961225}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b18bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_3.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.5148612937193335, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.023258764360096086}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.0751652044966289, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0013667007055530862}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.38579633865658397, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005096444006387486}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.11777172363016282, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0018284085963843188}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.03468953327441809, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008291540847161602}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.19053185137760673, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0036905598739581467}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.05463630172665221, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011564896623965845}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.07035911446447023, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012335375128364402}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.3580868532189359, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004531834139292797}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.11025599769233292, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016650757546179005}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.07160900018554582, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0012846552736932328}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.36585158014383645, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0047053685337424276}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.1122101583306418, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017262902999793785}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b18bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_4.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.5647418608164382, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.02321598077608985}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.07569359025980668, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0013630553991433284}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.3976443232379198, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0049461871286812726}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.11875570085857609, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001786404318427974}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.03486410172214847, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008176827715067252}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.1970252853734279, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003650596187090204}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.054985463513762434, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001122119667177031}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.06997073281118363, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001205133821327662}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.3649664345785377, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0044083367200935175}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.10987977139003118, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016089540199373295}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.07185747380368572, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001285044701214991}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3764949005356292, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0045956668994714435}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.11268637889960822, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016883140502033781}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b18bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_5.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.5482714677989973, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.021675276789228544}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.0762060675085337, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001355574274158061}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.4042879305483473, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005164234121569231}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.1197570139047874, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001779946242752045}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.03517530439964603, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008100317312804931}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.20231681062642728, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0038157106473818547}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.05563099955118052, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011201364188582906}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.0704939357267132, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001212222390700002}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.3699300529634866, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.00453694448261558}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.11073414338981681, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016091973313620345}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.07217372171442632, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0012650450721543114}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3805359472242292, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004740284500553459}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.11336245134873023, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016677643371672299}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b18bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_2.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.1845535355915783, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0024946135883453604}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.2695517129175979, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0026211468275759665}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.19430590275766726, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0018344032028598204}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.04458948641409641, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0014120706650682048}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.062060456824661644, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0014736004894038064}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.04377271235002487, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009267663731236799}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.13779046902481237, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.00197834834988283}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.20396222281462895, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0020998128273290364}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.1442517974455914, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0012894698921139642}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.1735217764883826, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002385080324318098}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.2539494164062588, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002490453523195178}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.18239177790885439, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017083970519236546}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 2.1278736826393443, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.060513597002875216}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b18bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_3.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.16344143511311807, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0028639585038610004}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.21705752761660305, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003046622002813931}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.15879291411941776, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020860678913316435}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.03946657591259567, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0015428944821802232}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.04950947134943765, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0014616851671373423}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.03512846249009941, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008934035750431312}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.12569962559045028, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0023604226829550966}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.16607365892731357, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002405649884473064}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.11995388090764494, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001534190538296726}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.1539380841235904, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0027372639212194004}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.20424916839256455, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0028790801501679176}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.1490069929693716, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001947503662307039}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.9219980526586609, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06329741681309228}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b18bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_4.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.0571224955926628, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0023662002964983195}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.07207313604028937, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0025952504300447}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.05149268824953048, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001797776534238523}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.013902404245260857, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0010495099961780859}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.017002286132095295, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0009860774655885615}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.011715471831871544, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.000644787176192932}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.04528881825982844, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001980637805535765}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.056145552183085444, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002049147231321551}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.039716516162972955, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0013806219368805613}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.05374955984533459, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002257048632231794}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.06742318539230084, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002435351108083965}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.04810176172739388, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016777266418722225}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.48857425145328925, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.03366566494624167}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b18bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_5.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.009747204395211186, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0012001063626629186}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.010818217770801558, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0011201653112752584}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.007694154582353487, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0007786894464533563}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.00346911836539752, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007785617188891463}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.0028992540395902716, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0004889741309162621}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.0020696459090992687, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0003052852492354327}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.00820275902366193, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0010899421875228057}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.008616034645548207, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0009109421330906957}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.006094780573936507, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0006210929394899102}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.009145684266485845, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001140205318254729}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.01010245236626395, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0010491484523406256}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.007121401627227017, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0007144210238327558}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.1456693667719014e-05, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 1.723913664635584e-05}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b18bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_1.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 10.634437155547644, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.12197898843602793}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5036238927741198, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0030639625905961826}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.40514436210115906, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0029117373605182816}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.42455028471653405, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002281102372997913}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.22689564783747476, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0024061538562456454}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.18052137985902375, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0020545376786729216}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.18895700263359685, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0018830672668286648}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.36864728762379756, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002713281513726948}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.29363759779765686, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0023376030038032096}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.3084688443741967, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0019428767162060765}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.4127256111014694, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002948793559219785}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.33058511824023096, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0026356389474789365}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.3468756867188751, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0022023250835616795}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b18bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_2.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 12.301413822806838, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.20262240014836314}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5399575561130455, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003266325155497623}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.4213105802567709, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002885247215704974}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.44741112713767206, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0022577398764700904}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.26499561529306176, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002709454676436721}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.20275553070716468, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0021379579214280894}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.21548365533974562, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0019747739093204414}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.4079476620735534, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0029508638184802447}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.31544746800641166, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0023905576125930693}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.33576511255116304, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0019955199500755402}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.45262380773022504, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0031661976419364917}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.3517025086244611, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002668549137492389}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.3739342428197366, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0022272664682864217}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b18bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_3.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 12.735025253733303, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.1543696231726414}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5522600594628029, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0032337983631537323}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.4216891031745554, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0028154912712571967}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.453141053851804, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002219078490330037}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.2734460336080908, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002687888305690842}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.2056138268240387, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0021506806137587555}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.22078340091336748, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001984445730938754}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.42214813908034704, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002984968214396239}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3194742391133158, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002376039465602172}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.3441511648112412, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0020173164636790013}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.4678001402841005, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003166148782356431}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.3561146609929946, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002639031001812239}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.38306881886799177, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0022312065315876588}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b18bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_4.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 12.980269120476393, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.15679846983981172}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5540966162409572, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0032485268958775157}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.4193651329533393, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0027376128022198886}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.45385752995359196, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0022261159480431664}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.27761999296577594, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00272673637476465}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.2057871362479186, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002107586634613798}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.22319746312995775, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001997523485956697}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.42526614941701973, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0029723620616554118}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3197042271839403, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0023489094620180295}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.3466328962197939, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002037579341369906}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.46993141251157605, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0031752194176965357}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.3554430255918313, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002627784362118594}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.3846017339795443, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0022611591223662183}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b18bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_5.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 12.969399191953066, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.19941195172606874}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5599067602424606, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003248452924564786}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.4190398684201684, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0026925252692156097}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.45648227298778776, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002201391493069159}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.28210620578099194, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0027749512966293586}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.20583337920488634, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0020734330299209463}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.2252413723142184, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0020001196385859236}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.43041795385401294, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0030021706663444104}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.32016551351833494, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0023326595557985243}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.34937543650436564, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0020568420192510293}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.47570615877600253, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0031943178349575175}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.35522919936052744, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0025627959197265897}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.3872005526696519, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0022387438546437395}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b18bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_gem_xsum_article_DOC_summary_1.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.12139290293794137, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0017186435592393995}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.30012669343441956, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003961780861183236}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.17078360208253435, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.00231048386226248}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.02269523637794389, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008701271780094139}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.05820863019650622, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002317649222526862}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.03221746451360407, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012284696045860488}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.09406591592734283, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012562940275390134}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.23499863397575185, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003092447038121959}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.1326696954050304, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017050900928064741}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.09691207312328678, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0013715038329889693}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.24201399682685507, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0033300150361477363}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.1367237105605167, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018683833664150447}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.225124596618229, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.055943295576705576}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b18bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_gem_xsum_article_DOC_summary_2.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.11848036366865702, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0017246954146993116}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.29422169708209367, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0039719116230109855}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.16695694916198175, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002323924165593795}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.02223074116005843, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009192612875289195}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.05711930773667224, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0024203482856540786}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.03165272210206821, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0013070047041779823}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.09275491223725246, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001294346257886036}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.23228470677963386, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003140444207327633}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.13096007444561938, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001758251379995231}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.09564362984293287, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0014057525880751447}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.24031553341263567, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003453370614981004}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.13519296873601647, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019247440048122983}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.2269137145831717, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.034355594509920095}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b18bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_gem_xsum_article_DOC_summary_3.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.1173850576603971, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0019398969338036355}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.2787275834874179, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004195016676211932}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.1615135868779298, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002470602580719195}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.020922413744292524, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009719043275883042}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.052023279960273304, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002365693856611846}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.029224092135722014, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001297780921679781}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.09118615638595852, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001472658898846434}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.2185043098420338, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0032923416071559855}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.1256992016836946, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018584540223603558}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.09425406240290829, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0016210406647485212}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.2252382766073172, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003589188921251769}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.12975495548659838, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002051211253222052}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.222710009995911, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.07496047751936895}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b18bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_gem_xsum_article_DOC_summary_4.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.03826276388948949, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0024801127678932854}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.06926541130759056, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003978351037602963}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.04449306237674175, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002539825023521398}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.006320743361780383, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008990691695882139}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.012019413723960436, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00124174296434464}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.007503219745005152, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008079284857284853}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.03046353149195033, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0021011732217140183}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.05431907246732738, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0031184241903121235}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.034713569625035294, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0019657575195258263}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.032690090587705486, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002217377898664004}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.05835102596740354, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0033728750674449325}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.03737907165768346, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002128608781763403}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.5349754476502416, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08512835589666831}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b18bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b18bc4seed1/evaluation/generation/agg.2b855b18bc4seed1_gem_xsum_article_DOC_summary_5.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.002520510024599048, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0007565625350760487}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.0021570243419933768, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.000655053772708356}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.0022847697499340787, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0006896753720124607}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.00019899326225630333, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 9.93981067283471e-05}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.00017192081565000543, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 8.690421994979313e-05}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.00018332767216436527, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 9.192832778053751e-05}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.0019439458549511707, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0005457370791986779}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.0016548236956347244, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0004639416377074074}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.0017523449957555426, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0004894311950718817}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.0021468629280125277, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0006084449815322187}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.0018062594366931065, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0005018570788553906}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.0019247498547093978, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0005375495150800006}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 9.902494720086563e-38, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 1.8754348008316148e-32}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b18bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_1.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:39f143e3687985d30b70f52fcb3583e5c9fa75e5ab4acb3e52d6d9806b1e58a6
|
3 |
+
size 5151681
|
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_2.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a101ff7a9ddff2317c8b8f1d8a80cd248fe3f2167977a9700ca748aef13143a7
|
3 |
+
size 6054793
|
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_3.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5273e849de7e7928d47d43d50e641c69e3b7138591fb48e338f7c3146557bacc
|
3 |
+
size 6954535
|
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_4.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fcea88df372353ee6bd8b79732848ee1fd13984f2f1257172dde4fb174f88001
|
3 |
+
size 7877356
|
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_5.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8dfad058d9a0f88549e81465ab8f07731e4227c4c020380f9aaeabe26147dcf7
|
3 |
+
size 8773378
|
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_2.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1f95d88f31acf10b35b2057fa6c0868cd683088ba8bb6ca07c68134f61c218c
|
3 |
+
size 18874175
|
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_3.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e048bca15ce4c2979a27b15d4f6e1876e04d82b07c80ef80d9caf2f78b9d8a9
|
3 |
+
size 24289741
|
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_4.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6029f953df15d2758f403b047ae71e2160ed495c01be56d4a190c5ef9984620f
|
3 |
+
size 29457366
|
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_5.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b536ec2f44a61f9439996be2edc11869c31a5bb9a47043ee0d2fda257c9ce97
|
3 |
+
size 34797211
|
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b15e25de06e9874cc5715e661493fadcaa252ff6f893b8232e08c89f4cf3c66
|
3 |
+
size 5036346
|
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a11597211c0d3089680fed16f5023f45f25a41942b71f27e90d5b2b14a9ebf39
|
3 |
+
size 6111097
|
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5bd529c39ea0e5cba19f44fa1649ea38f56ad513a4241fc8b2e30888808b4d25
|
3 |
+
size 7186108
|
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3bd672ea4ec399504c1887a77bcde65206992adc20cbab45a030b769a574a2ea
|
3 |
+
size 8260987
|
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b924654de8ba143937d26a77d18d9b56e32b74b977a139d1c36ca771a809886f
|
3 |
+
size 9345427
|
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_gem_xsum_article_DOC_summary_1.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aed43f457acda1e1ebe55c07595ba47356d87012d83dc62d5ef52d8514c35857
|
3 |
+
size 5098374
|
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_gem_xsum_article_DOC_summary_2.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b68dfc40d4c9dbbc3b84702631ba56ec9d74dcbfb4c57b11f3b02378c7fb881c
|
3 |
+
size 7372821
|
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_gem_xsum_article_DOC_summary_3.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24dc992239d56ceb8fbc881e5be46a303791186c60a87e5503f47926ae403ac5
|
3 |
+
size 9641423
|
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_gem_xsum_article_DOC_summary_4.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c059c5be30133442eb88f24a2ba58a7398c62e2337461d7088be16562cc8569b
|
3 |
+
size 11670534
|
2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_gem_xsum_article_DOC_summary_5.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e918142fe49299f37ffa8bd540d34939148863484055b6b4884a0cd8f508fabf
|
3 |
+
size 13897565
|
2b855b18bc4seed1/evaluation/generation/merged.csv
CHANGED
@@ -1,15 +1,53 @@
|
|
1 |
dataset,fewshots,prompt,metric,value
|
2 |
e2e_nlg_cleaned,0,generate_text_restaurant,rouge2_fmeasure,0.02952151752219146
|
3 |
e2e_nlg_cleaned,0,median,rouge2_fmeasure,0.02952151752219146
|
4 |
-
e2e_nlg_cleaned,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
gem_xsum,0,article_DOC_summary,rouge2_fmeasure,0.04662550924488616
|
6 |
gem_xsum,0,median,rouge2_fmeasure,0.04662550924488616
|
7 |
-
gem_xsum,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
web_nlg_en,0,PALM_prompt,rouge2_fmeasure,0.05092459037567072
|
9 |
web_nlg_en,0,median,rouge2_fmeasure,0.05092459037567072
|
10 |
-
web_nlg_en,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
wiki_lingua_en,0,tldr_en,rouge2_fmeasure,0.03456822091856771
|
12 |
wiki_lingua_en,0,median,rouge2_fmeasure,0.03456822091856771
|
13 |
wiki_lingua_en,1,tldr_en,rouge2_fmeasure,0.04384272768611954
|
14 |
wiki_lingua_en,1,median,rouge2_fmeasure,0.04384272768611954
|
15 |
-
wiki_lingua_en,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
dataset,fewshots,prompt,metric,value
|
2 |
e2e_nlg_cleaned,0,generate_text_restaurant,rouge2_fmeasure,0.02952151752219146
|
3 |
e2e_nlg_cleaned,0,median,rouge2_fmeasure,0.02952151752219146
|
4 |
+
e2e_nlg_cleaned,1,generate_text_restaurant,rouge2_fmeasure,0.18895700263359685
|
5 |
+
e2e_nlg_cleaned,1,median,rouge2_fmeasure,0.18895700263359685
|
6 |
+
e2e_nlg_cleaned,2,generate_text_restaurant,rouge2_fmeasure,0.21548365533974562
|
7 |
+
e2e_nlg_cleaned,2,median,rouge2_fmeasure,0.21548365533974562
|
8 |
+
e2e_nlg_cleaned,3,generate_text_restaurant,rouge2_fmeasure,0.22078340091336748
|
9 |
+
e2e_nlg_cleaned,3,median,rouge2_fmeasure,0.22078340091336748
|
10 |
+
e2e_nlg_cleaned,4,generate_text_restaurant,rouge2_fmeasure,0.22319746312995775
|
11 |
+
e2e_nlg_cleaned,4,median,rouge2_fmeasure,0.22319746312995775
|
12 |
+
e2e_nlg_cleaned,5,generate_text_restaurant,rouge2_fmeasure,0.2252413723142184
|
13 |
+
e2e_nlg_cleaned,5,median,rouge2_fmeasure,0.2252413723142184
|
14 |
+
e2e_nlg_cleaned,5,average,multiple,0.1838640686421796
|
15 |
gem_xsum,0,article_DOC_summary,rouge2_fmeasure,0.04662550924488616
|
16 |
gem_xsum,0,median,rouge2_fmeasure,0.04662550924488616
|
17 |
+
gem_xsum,1,article_DOC_summary,rouge2_fmeasure,0.03221746451360407
|
18 |
+
gem_xsum,1,median,rouge2_fmeasure,0.03221746451360407
|
19 |
+
gem_xsum,2,article_DOC_summary,rouge2_fmeasure,0.03165272210206821
|
20 |
+
gem_xsum,2,median,rouge2_fmeasure,0.03165272210206821
|
21 |
+
gem_xsum,3,article_DOC_summary,rouge2_fmeasure,0.029224092135722014
|
22 |
+
gem_xsum,3,median,rouge2_fmeasure,0.029224092135722014
|
23 |
+
gem_xsum,4,article_DOC_summary,rouge2_fmeasure,0.007503219745005152
|
24 |
+
gem_xsum,4,median,rouge2_fmeasure,0.007503219745005152
|
25 |
+
gem_xsum,5,article_DOC_summary,rouge2_fmeasure,0.00018332767216436527
|
26 |
+
gem_xsum,5,median,rouge2_fmeasure,0.00018332767216436527
|
27 |
+
gem_xsum,5,average,multiple,0.02456772256890833
|
28 |
web_nlg_en,0,PALM_prompt,rouge2_fmeasure,0.05092459037567072
|
29 |
web_nlg_en,0,median,rouge2_fmeasure,0.05092459037567072
|
30 |
+
web_nlg_en,1,PALM_prompt,rouge2_fmeasure,0.05247257816338737
|
31 |
+
web_nlg_en,1,median,rouge2_fmeasure,0.05247257816338737
|
32 |
+
web_nlg_en,2,PALM_prompt,rouge2_fmeasure,0.05294107579757583
|
33 |
+
web_nlg_en,2,median,rouge2_fmeasure,0.05294107579757583
|
34 |
+
web_nlg_en,3,PALM_prompt,rouge2_fmeasure,0.05463630172665221
|
35 |
+
web_nlg_en,3,median,rouge2_fmeasure,0.05463630172665221
|
36 |
+
web_nlg_en,4,PALM_prompt,rouge2_fmeasure,0.054985463513762434
|
37 |
+
web_nlg_en,4,median,rouge2_fmeasure,0.054985463513762434
|
38 |
+
web_nlg_en,5,PALM_prompt,rouge2_fmeasure,0.05563099955118052
|
39 |
+
web_nlg_en,5,median,rouge2_fmeasure,0.05563099955118052
|
40 |
+
web_nlg_en,5,average,multiple,0.053598501521371515
|
41 |
wiki_lingua_en,0,tldr_en,rouge2_fmeasure,0.03456822091856771
|
42 |
wiki_lingua_en,0,median,rouge2_fmeasure,0.03456822091856771
|
43 |
wiki_lingua_en,1,tldr_en,rouge2_fmeasure,0.04384272768611954
|
44 |
wiki_lingua_en,1,median,rouge2_fmeasure,0.04384272768611954
|
45 |
+
wiki_lingua_en,2,tldr_en,rouge2_fmeasure,0.04377271235002487
|
46 |
+
wiki_lingua_en,2,median,rouge2_fmeasure,0.04377271235002487
|
47 |
+
wiki_lingua_en,3,tldr_en,rouge2_fmeasure,0.03512846249009941
|
48 |
+
wiki_lingua_en,3,median,rouge2_fmeasure,0.03512846249009941
|
49 |
+
wiki_lingua_en,4,tldr_en,rouge2_fmeasure,0.011715471831871544
|
50 |
+
wiki_lingua_en,4,median,rouge2_fmeasure,0.011715471831871544
|
51 |
+
wiki_lingua_en,5,tldr_en,rouge2_fmeasure,0.0020696459090992687
|
52 |
+
wiki_lingua_en,5,median,rouge2_fmeasure,0.0020696459090992687
|
53 |
+
wiki_lingua_en,5,average,multiple,0.028516206864297056
|
2b855b18bc4seed1/evaluation/generation/merged.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"GEM/web_nlg_en": {"0": {"PALM_prompt": {"bleu": 0.38372871385421375, "bleu_stderr": 0.0402021806877473, "rouge1_fmeasure": 0.10680911195694968, "rouge1_fmeasure_stderr": 0.0019785575854032573, "rouge1_precision": 0.07104924494394081, "rouge1_precision_stderr": 0.0015991216899841948, "rouge1_recall": 0.3013449542068681, "rouge1_recall_stderr": 0.0047705500959025525, "rouge2_fmeasure": 0.05092459037567072, "rouge2_fmeasure_stderr": 0.0012525133690105767, "rouge2_precision": 0.034103184636403855, "rouge2_precision_stderr": 0.0010465031492496973, "rouge2_recall": 0.14745328076985265, "rouge2_recall_stderr": 0.003229584027558794, "rougeL_fmeasure": 0.10416477893295947, "rougeL_fmeasure_stderr": 0.0018839753099339747, "rougeL_precision": 0.06915352511246627, "rougeL_precision_stderr": 0.0015230243018002692, "rougeL_recall": 0.29576101664453625, "rougeL_recall_stderr": 0.00470384720362342, "rougeLsum_fmeasure": 0.10265429965730934, "rougeLsum_fmeasure_stderr": 0.0018853972629204819, "rougeLsum_precision": 0.06829432527794399, "rougeLsum_precision_stderr": 0.0015295183253466098, "rougeLsum_recall": 0.28914893825477744, "rougeLsum_recall_stderr": 0.004510560764293423}}}, "GEM/wiki_lingua_en": {"0": {"tldr_en": {"bleu": 1.491882993736668, "bleu_stderr": 0.0604760906018356, "rouge1_fmeasure": 0.1759865804471638, "rouge1_fmeasure_stderr": 0.0017732039671299322, "rouge1_precision": 0.1498820689096658, "rouge1_precision_stderr": 0.0018275348360710444, "rouge1_recall": 0.2556299443788622, "rouge1_recall_stderr": 0.00248160486647333, "rouge2_fmeasure": 0.03456822091856771, "rouge2_fmeasure_stderr": 0.0008155263346381764, "rouge2_precision": 0.02941407255552426, "rouge2_precision_stderr": 0.0007317707658910503, "rouge2_recall": 0.05122352165204346, "rouge2_recall_stderr": 0.0013185372955846983, "rougeL_fmeasure": 0.13745418190482725, "rougeL_fmeasure_stderr": 0.0012579714854610159, "rougeL_precision": 0.11564617257577034, "rougeL_precision_stderr": 0.0012699602508405065, "rougeL_recall": 0.20467382242588833, "rougeL_recall_stderr": 0.0020035941778714923, "rougeLsum_fmeasure": 0.1618306473302698, "rougeLsum_fmeasure_stderr": 0.0016193856451530825, "rougeLsum_precision": 0.13755009094489865, "rougeLsum_precision_stderr": 0.0016636080312347197, "rougeLsum_recall": 0.23612507536924462, "rougeLsum_recall_stderr": 0.0023092678826538886}}, "1": {"tldr_en": {"bleu": 2.1436267653679546, "bleu_stderr": 0.0638073855275041, "rouge1_fmeasure": 0.1975005332862954, "rouge1_fmeasure_stderr": 0.0019356691188220237, "rouge1_precision": 0.17285833157163866, "rouge1_precision_stderr": 0.0021279704844651363, "rouge1_recall": 0.28314824615747214, "rouge1_recall_stderr": 0.00279455354643831, "rouge2_fmeasure": 0.04384272768611954, "rouge2_fmeasure_stderr": 0.0009596471350980362, "rouge2_precision": 0.0384184201971702, "rouge2_precision_stderr": 0.0009409832381367588, "rouge2_recall": 0.06508924927390396, "rouge2_recall_stderr": 0.001591323036503726, "rougeL_fmeasure": 0.14506525863258568, "rougeL_fmeasure_stderr": 0.0013181172617599275, "rougeL_precision": 0.12593266679512835, "rougeL_precision_stderr": 0.0014634697072188599, "rougeL_recall": 0.21304467465973717, "rougeL_recall_stderr": 0.0022115044388905185, "rougeLsum_fmeasure": 0.18425139806084484, "rougeLsum_fmeasure_stderr": 0.001799100293277034, "rougeLsum_precision": 0.16101664785254152, "rougeLsum_precision_stderr": 0.0019730319858541094, "rougeLsum_recall": 0.26504813030482266, "rougeLsum_recall_stderr": 0.002642752049345963}}}, "e2e_nlg_cleaned": {"0": {"generate_text_restaurant": {"bleu": 3.5568265416850715, "bleu_stderr": 0.06342744631128652, "rouge1_fmeasure": 0.09330515979646851, "rouge1_fmeasure_stderr": 0.0020629838379730794, "rouge1_precision": 0.08693722544256345, "rouge1_precision_stderr": 0.002343255123550843, "rouge1_recall": 0.12292068428817848, "rouge1_recall_stderr": 0.002719689516817894, "rouge2_fmeasure": 0.02952151752219146, "rouge2_fmeasure_stderr": 0.0008886196728869984, "rouge2_precision": 0.02529674798890196, "rouge2_precision_stderr": 0.0008129003273986918, "rouge2_recall": 0.040333921659969615, "rouge2_recall_stderr": 0.001229134965827211, "rougeL_fmeasure": 0.08789040657147261, "rougeL_fmeasure_stderr": 0.0019465412654236043, "rougeL_precision": 0.08035979365511788, "rougeL_precision_stderr": 0.002075584850514779, "rougeL_recall": 0.1166646556495462, "rougeL_recall_stderr": 0.0026043870982970425, "rougeLsum_fmeasure": 0.0790120057222766, "rougeLsum_fmeasure_stderr": 0.0017753745619975567, "rougeLsum_precision": 0.0733377427466678, "rougeLsum_precision_stderr": 0.001983289544738296, "rougeLsum_recall": 0.10444279836388244, "rougeLsum_recall_stderr": 0.002362609255378589}}}, "gem_xsum": {"0": {"article_DOC_summary": {"bleu": 1.946115392938811, "bleu_stderr": 0.05712427893814158, "rouge1_fmeasure": 0.20822893238963622, "rouge1_fmeasure_stderr": 0.0025732099733206905, "rouge1_precision": 0.15996807239746452, "rouge1_precision_stderr": 0.002287623127618875, "rouge1_recall": 0.3348486397737723, "rouge1_recall_stderr": 0.004426564044230394, "rouge2_fmeasure": 0.04662550924488616, "rouge2_fmeasure_stderr": 0.0015808380503390949, "rouge2_precision": 0.03502290300971957, "rouge2_precision_stderr": 0.0012502860060335647, "rouge2_recall": 0.07878986425845928, "rouge2_recall_stderr": 0.0027497076817277815, "rougeL_fmeasure": 0.15716969719770496, "rougeL_fmeasure_stderr": 0.001890560693868338, "rougeL_precision": 0.12042436779200985, "rougeL_precision_stderr": 0.0016727906904202222, "rougeL_recall": 0.2547012746203713, "rougeL_recall_stderr": 0.0034518315416116534, "rougeLsum_fmeasure": 0.16384073920324538, "rougeLsum_fmeasure_stderr": 0.0021628411307558727, "rougeLsum_precision": 0.12509348444149254, "rougeLsum_precision_stderr": 0.0018171135290296517, "rougeLsum_recall": 0.26636152259041823, "rougeLsum_recall_stderr": 0.003922514826770532}}}}
|
|
|
1 |
+
{"GEM/web_nlg_en": {"0": {"PALM_prompt": {"bleu": 0.38372871385421375, "bleu_stderr": 0.0402021806877473, "rouge1_fmeasure": 0.10680911195694968, "rouge1_fmeasure_stderr": 0.0019785575854032573, "rouge1_precision": 0.07104924494394081, "rouge1_precision_stderr": 0.0015991216899841948, "rouge1_recall": 0.3013449542068681, "rouge1_recall_stderr": 0.0047705500959025525, "rouge2_fmeasure": 0.05092459037567072, "rouge2_fmeasure_stderr": 0.0012525133690105767, "rouge2_precision": 0.034103184636403855, "rouge2_precision_stderr": 0.0010465031492496973, "rouge2_recall": 0.14745328076985265, "rouge2_recall_stderr": 0.003229584027558794, "rougeL_fmeasure": 0.10416477893295947, "rougeL_fmeasure_stderr": 0.0018839753099339747, "rougeL_precision": 0.06915352511246627, "rougeL_precision_stderr": 0.0015230243018002692, "rougeL_recall": 0.29576101664453625, "rougeL_recall_stderr": 0.00470384720362342, "rougeLsum_fmeasure": 0.10265429965730934, "rougeLsum_fmeasure_stderr": 0.0018853972629204819, "rougeLsum_precision": 0.06829432527794399, "rougeLsum_precision_stderr": 0.0015295183253466098, "rougeLsum_recall": 0.28914893825477744, "rougeLsum_recall_stderr": 0.004510560764293423}}, "1": {"PALM_prompt": {"bleu": 0.440505975058967, "bleu_stderr": 0.03925926885568198, "rouge1_fmeasure": 0.11401113521512295, "rouge1_fmeasure_stderr": 0.0019030453976470602, "rouge1_precision": 0.07303258540385325, "rouge1_precision_stderr": 0.0013956948759738636, "rouge1_recall": 0.36507400240791016, "rouge1_recall_stderr": 0.005207277796989448, "rouge2_fmeasure": 0.05247257816338737, "rouge2_fmeasure_stderr": 0.0011827703450859254, "rouge2_precision": 0.03358434632060951, "rouge2_precision_stderr": 0.000842389625141015, "rouge2_recall": 0.17329341204300425, "rouge2_recall_stderr": 0.0034450528353974098, "rougeL_fmeasure": 0.10741785748120532, "rougeL_fmeasure_stderr": 0.0017301490825739295, "rougeL_precision": 0.06874932087487405, "rougeL_precision_stderr": 0.0012613572237216058, "rougeL_recall": 0.3423137073673857, "rougeL_recall_stderr": 0.004722054196952728, "rougeLsum_fmeasure": 0.10872891126332167, "rougeLsum_fmeasure_stderr": 0.0018051794462450466, "rougeLsum_precision": 0.0696948101540746, "rougeLsum_precision_stderr": 0.001324079492115929, "rougeLsum_recall": 0.34613176577415133, "rougeLsum_recall_stderr": 0.004811317119389049}}, "2": {"PALM_prompt": {"bleu": 0.46666858091535174, "bleu_stderr": 0.018833413013635402, "rouge1_fmeasure": 0.11510966757275029, "rouge1_fmeasure_stderr": 0.0017858390071285543, "rouge1_precision": 0.07341298925567588, "rouge1_precision_stderr": 0.0013121366271023323, "rouge1_recall": 0.3800249377740647, "rouge1_recall_stderr": 0.005112909181278494, "rouge2_fmeasure": 0.05294107579757583, "rouge2_fmeasure_stderr": 0.001118710876229426, "rouge2_precision": 0.03360825336454427, "rouge2_precision_stderr": 0.0007932424750607851, "rouge2_recall": 0.1862779416722373, "rouge2_recall_stderr": 0.0036516850220037643, "rougeL_fmeasure": 0.10791868479229756, "rougeL_fmeasure_stderr": 0.0016523165241163688, "rougeL_precision": 0.0688709827206235, "rougeL_precision_stderr": 0.001210999027849737, "rougeL_recall": 0.3529927082231747, "rougeL_recall_stderr": 0.004590770405531237, "rougeLsum_fmeasure": 0.10968844522778035, "rougeLsum_fmeasure_stderr": 0.0017066636269961225, "rougeLsum_precision": 0.07001967212649517, "rougeLsum_precision_stderr": 0.0012551792002818921, "rougeLsum_recall": 0.35981102966802486, "rougeLsum_recall_stderr": 0.004723476880811747}}, "3": {"PALM_prompt": {"bleu": 0.5148612937193335, "bleu_stderr": 0.023258764360096086, "rouge1_fmeasure": 0.11777172363016282, "rouge1_fmeasure_stderr": 0.0018284085963843188, "rouge1_precision": 0.0751652044966289, "rouge1_precision_stderr": 0.0013667007055530862, "rouge1_recall": 0.38579633865658397, "rouge1_recall_stderr": 0.005096444006387486, "rouge2_fmeasure": 0.05463630172665221, "rouge2_fmeasure_stderr": 0.0011564896623965845, "rouge2_precision": 0.03468953327441809, "rouge2_precision_stderr": 0.0008291540847161602, "rouge2_recall": 0.19053185137760673, "rouge2_recall_stderr": 0.0036905598739581467, "rougeL_fmeasure": 0.11025599769233292, "rougeL_fmeasure_stderr": 0.0016650757546179005, "rougeL_precision": 0.07035911446447023, "rougeL_precision_stderr": 0.0012335375128364402, "rougeL_recall": 0.3580868532189359, "rougeL_recall_stderr": 0.004531834139292797, "rougeLsum_fmeasure": 0.1122101583306418, "rougeLsum_fmeasure_stderr": 0.0017262902999793785, "rougeLsum_precision": 0.07160900018554582, "rougeLsum_precision_stderr": 0.0012846552736932328, "rougeLsum_recall": 0.36585158014383645, "rougeLsum_recall_stderr": 0.0047053685337424276}}, "4": {"PALM_prompt": {"bleu": 0.5647418608164382, "bleu_stderr": 0.02321598077608985, "rouge1_fmeasure": 0.11875570085857609, "rouge1_fmeasure_stderr": 0.001786404318427974, "rouge1_precision": 0.07569359025980668, "rouge1_precision_stderr": 0.0013630553991433284, "rouge1_recall": 0.3976443232379198, "rouge1_recall_stderr": 0.0049461871286812726, "rouge2_fmeasure": 0.054985463513762434, "rouge2_fmeasure_stderr": 0.001122119667177031, "rouge2_precision": 0.03486410172214847, "rouge2_precision_stderr": 0.0008176827715067252, "rouge2_recall": 0.1970252853734279, "rouge2_recall_stderr": 0.003650596187090204, "rougeL_fmeasure": 0.10987977139003118, "rougeL_fmeasure_stderr": 0.0016089540199373295, "rougeL_precision": 0.06997073281118363, "rougeL_precision_stderr": 0.001205133821327662, "rougeL_recall": 0.3649664345785377, "rougeL_recall_stderr": 0.0044083367200935175, "rougeLsum_fmeasure": 0.11268637889960822, "rougeLsum_fmeasure_stderr": 0.0016883140502033781, "rougeLsum_precision": 0.07185747380368572, "rougeLsum_precision_stderr": 0.001285044701214991, "rougeLsum_recall": 0.3764949005356292, "rougeLsum_recall_stderr": 0.0045956668994714435}}, "5": {"PALM_prompt": {"bleu": 0.5482714677989973, "bleu_stderr": 0.021675276789228544, "rouge1_fmeasure": 0.1197570139047874, "rouge1_fmeasure_stderr": 0.001779946242752045, "rouge1_precision": 0.0762060675085337, "rouge1_precision_stderr": 0.001355574274158061, "rouge1_recall": 0.4042879305483473, "rouge1_recall_stderr": 0.005164234121569231, "rouge2_fmeasure": 0.05563099955118052, "rouge2_fmeasure_stderr": 0.0011201364188582906, "rouge2_precision": 0.03517530439964603, "rouge2_precision_stderr": 0.0008100317312804931, "rouge2_recall": 0.20231681062642728, "rouge2_recall_stderr": 0.0038157106473818547, "rougeL_fmeasure": 0.11073414338981681, "rougeL_fmeasure_stderr": 0.0016091973313620345, "rougeL_precision": 0.0704939357267132, "rougeL_precision_stderr": 0.001212222390700002, "rougeL_recall": 0.3699300529634866, "rougeL_recall_stderr": 0.00453694448261558, "rougeLsum_fmeasure": 0.11336245134873023, "rougeLsum_fmeasure_stderr": 0.0016677643371672299, "rougeLsum_precision": 0.07217372171442632, "rougeLsum_precision_stderr": 0.0012650450721543114, "rougeLsum_recall": 0.3805359472242292, "rougeLsum_recall_stderr": 0.004740284500553459}}}, "GEM/wiki_lingua_en": {"0": {"tldr_en": {"bleu": 1.491882993736668, "bleu_stderr": 0.0604760906018356, "rouge1_fmeasure": 0.1759865804471638, "rouge1_fmeasure_stderr": 0.0017732039671299322, "rouge1_precision": 0.1498820689096658, "rouge1_precision_stderr": 0.0018275348360710444, "rouge1_recall": 0.2556299443788622, "rouge1_recall_stderr": 0.00248160486647333, "rouge2_fmeasure": 0.03456822091856771, "rouge2_fmeasure_stderr": 0.0008155263346381764, "rouge2_precision": 0.02941407255552426, "rouge2_precision_stderr": 0.0007317707658910503, "rouge2_recall": 0.05122352165204346, "rouge2_recall_stderr": 0.0013185372955846983, "rougeL_fmeasure": 0.13745418190482725, "rougeL_fmeasure_stderr": 0.0012579714854610159, "rougeL_precision": 0.11564617257577034, "rougeL_precision_stderr": 0.0012699602508405065, "rougeL_recall": 0.20467382242588833, "rougeL_recall_stderr": 0.0020035941778714923, "rougeLsum_fmeasure": 0.1618306473302698, "rougeLsum_fmeasure_stderr": 0.0016193856451530825, "rougeLsum_precision": 0.13755009094489865, "rougeLsum_precision_stderr": 0.0016636080312347197, "rougeLsum_recall": 0.23612507536924462, "rougeLsum_recall_stderr": 0.0023092678826538886}}, "1": {"tldr_en": {"bleu": 2.1436267653679546, "bleu_stderr": 0.0638073855275041, "rouge1_fmeasure": 0.1975005332862954, "rouge1_fmeasure_stderr": 0.0019356691188220237, "rouge1_precision": 0.17285833157163866, "rouge1_precision_stderr": 0.0021279704844651363, "rouge1_recall": 0.28314824615747214, "rouge1_recall_stderr": 0.00279455354643831, "rouge2_fmeasure": 0.04384272768611954, "rouge2_fmeasure_stderr": 0.0009596471350980362, "rouge2_precision": 0.0384184201971702, "rouge2_precision_stderr": 0.0009409832381367588, "rouge2_recall": 0.06508924927390396, "rouge2_recall_stderr": 0.001591323036503726, "rougeL_fmeasure": 0.14506525863258568, "rougeL_fmeasure_stderr": 0.0013181172617599275, "rougeL_precision": 0.12593266679512835, "rougeL_precision_stderr": 0.0014634697072188599, "rougeL_recall": 0.21304467465973717, "rougeL_recall_stderr": 0.0022115044388905185, "rougeLsum_fmeasure": 0.18425139806084484, "rougeLsum_fmeasure_stderr": 0.001799100293277034, "rougeLsum_precision": 0.16101664785254152, "rougeLsum_precision_stderr": 0.0019730319858541094, "rougeLsum_recall": 0.26504813030482266, "rougeLsum_recall_stderr": 0.002642752049345963}}, "2": {"tldr_en": {"bleu": 2.1278736826393443, "bleu_stderr": 0.060513597002875216, "rouge1_fmeasure": 0.19430590275766726, "rouge1_fmeasure_stderr": 0.0018344032028598204, "rouge1_precision": 0.1845535355915783, "rouge1_precision_stderr": 0.0024946135883453604, "rouge1_recall": 0.2695517129175979, "rouge1_recall_stderr": 0.0026211468275759665, "rouge2_fmeasure": 0.04377271235002487, "rouge2_fmeasure_stderr": 0.0009267663731236799, "rouge2_precision": 0.04458948641409641, "rouge2_precision_stderr": 0.0014120706650682048, "rouge2_recall": 0.062060456824661644, "rouge2_recall_stderr": 0.0014736004894038064, "rougeL_fmeasure": 0.1442517974455914, "rougeL_fmeasure_stderr": 0.0012894698921139642, "rougeL_precision": 0.13779046902481237, "rougeL_precision_stderr": 0.00197834834988283, "rougeL_recall": 0.20396222281462895, "rougeL_recall_stderr": 0.0020998128273290364, "rougeLsum_fmeasure": 0.18239177790885439, "rougeLsum_fmeasure_stderr": 0.0017083970519236546, "rougeLsum_precision": 0.1735217764883826, "rougeLsum_precision_stderr": 0.002385080324318098, "rougeLsum_recall": 0.2539494164062588, "rougeLsum_recall_stderr": 0.002490453523195178}}, "3": {"tldr_en": {"bleu": 1.9219980526586609, "bleu_stderr": 0.06329741681309228, "rouge1_fmeasure": 0.15879291411941776, "rouge1_fmeasure_stderr": 0.0020860678913316435, "rouge1_precision": 0.16344143511311807, "rouge1_precision_stderr": 0.0028639585038610004, "rouge1_recall": 0.21705752761660305, "rouge1_recall_stderr": 0.003046622002813931, "rouge2_fmeasure": 0.03512846249009941, "rouge2_fmeasure_stderr": 0.0008934035750431312, "rouge2_precision": 0.03946657591259567, "rouge2_precision_stderr": 0.0015428944821802232, "rouge2_recall": 0.04950947134943765, "rouge2_recall_stderr": 0.0014616851671373423, "rougeL_fmeasure": 0.11995388090764494, "rougeL_fmeasure_stderr": 0.001534190538296726, "rougeL_precision": 0.12569962559045028, "rougeL_precision_stderr": 0.0023604226829550966, "rougeL_recall": 0.16607365892731357, "rougeL_recall_stderr": 0.002405649884473064, "rougeLsum_fmeasure": 0.1490069929693716, "rougeLsum_fmeasure_stderr": 0.001947503662307039, "rougeLsum_precision": 0.1539380841235904, "rougeLsum_precision_stderr": 0.0027372639212194004, "rougeLsum_recall": 0.20424916839256455, "rougeLsum_recall_stderr": 0.0028790801501679176}}, "4": {"tldr_en": {"bleu": 0.48857425145328925, "bleu_stderr": 0.03366566494624167, "rouge1_fmeasure": 0.05149268824953048, "rouge1_fmeasure_stderr": 0.001797776534238523, "rouge1_precision": 0.0571224955926628, "rouge1_precision_stderr": 0.0023662002964983195, "rouge1_recall": 0.07207313604028937, "rouge1_recall_stderr": 0.0025952504300447, "rouge2_fmeasure": 0.011715471831871544, "rouge2_fmeasure_stderr": 0.000644787176192932, "rouge2_precision": 0.013902404245260857, "rouge2_precision_stderr": 0.0010495099961780859, "rouge2_recall": 0.017002286132095295, "rouge2_recall_stderr": 0.0009860774655885615, "rougeL_fmeasure": 0.039716516162972955, "rougeL_fmeasure_stderr": 0.0013806219368805613, "rougeL_precision": 0.04528881825982844, "rougeL_precision_stderr": 0.001980637805535765, "rougeL_recall": 0.056145552183085444, "rougeL_recall_stderr": 0.002049147231321551, "rougeLsum_fmeasure": 0.04810176172739388, "rougeLsum_fmeasure_stderr": 0.0016777266418722225, "rougeLsum_precision": 0.05374955984533459, "rougeLsum_precision_stderr": 0.002257048632231794, "rougeLsum_recall": 0.06742318539230084, "rougeLsum_recall_stderr": 0.002435351108083965}}, "5": {"tldr_en": {"bleu": 1.1456693667719014e-05, "bleu_stderr": 1.723913664635584e-05, "rouge1_fmeasure": 0.007694154582353487, "rouge1_fmeasure_stderr": 0.0007786894464533563, "rouge1_precision": 0.009747204395211186, "rouge1_precision_stderr": 0.0012001063626629186, "rouge1_recall": 0.010818217770801558, "rouge1_recall_stderr": 0.0011201653112752584, "rouge2_fmeasure": 0.0020696459090992687, "rouge2_fmeasure_stderr": 0.0003052852492354327, "rouge2_precision": 0.00346911836539752, "rouge2_precision_stderr": 0.0007785617188891463, "rouge2_recall": 0.0028992540395902716, "rouge2_recall_stderr": 0.0004889741309162621, "rougeL_fmeasure": 0.006094780573936507, "rougeL_fmeasure_stderr": 0.0006210929394899102, "rougeL_precision": 0.00820275902366193, "rougeL_precision_stderr": 0.0010899421875228057, "rougeL_recall": 0.008616034645548207, "rougeL_recall_stderr": 0.0009109421330906957, "rougeLsum_fmeasure": 0.007121401627227017, "rougeLsum_fmeasure_stderr": 0.0007144210238327558, "rougeLsum_precision": 0.009145684266485845, "rougeLsum_precision_stderr": 0.001140205318254729, "rougeLsum_recall": 0.01010245236626395, "rougeLsum_recall_stderr": 0.0010491484523406256}}}, "e2e_nlg_cleaned": {"0": {"generate_text_restaurant": {"bleu": 3.5568265416850715, "bleu_stderr": 0.06342744631128652, "rouge1_fmeasure": 0.09330515979646851, "rouge1_fmeasure_stderr": 0.0020629838379730794, "rouge1_precision": 0.08693722544256345, "rouge1_precision_stderr": 0.002343255123550843, "rouge1_recall": 0.12292068428817848, "rouge1_recall_stderr": 0.002719689516817894, "rouge2_fmeasure": 0.02952151752219146, "rouge2_fmeasure_stderr": 0.0008886196728869984, "rouge2_precision": 0.02529674798890196, "rouge2_precision_stderr": 0.0008129003273986918, "rouge2_recall": 0.040333921659969615, "rouge2_recall_stderr": 0.001229134965827211, "rougeL_fmeasure": 0.08789040657147261, "rougeL_fmeasure_stderr": 0.0019465412654236043, "rougeL_precision": 0.08035979365511788, "rougeL_precision_stderr": 0.002075584850514779, "rougeL_recall": 0.1166646556495462, "rougeL_recall_stderr": 0.0026043870982970425, "rougeLsum_fmeasure": 0.0790120057222766, "rougeLsum_fmeasure_stderr": 0.0017753745619975567, "rougeLsum_precision": 0.0733377427466678, "rougeLsum_precision_stderr": 0.001983289544738296, "rougeLsum_recall": 0.10444279836388244, "rougeLsum_recall_stderr": 0.002362609255378589}}, "1": {"generate_text_restaurant": {"bleu": 10.634437155547644, "bleu_stderr": 0.12197898843602793, "rouge1_fmeasure": 0.42455028471653405, "rouge1_fmeasure_stderr": 0.002281102372997913, "rouge1_precision": 0.5036238927741198, "rouge1_precision_stderr": 0.0030639625905961826, "rouge1_recall": 0.40514436210115906, "rouge1_recall_stderr": 0.0029117373605182816, "rouge2_fmeasure": 0.18895700263359685, "rouge2_fmeasure_stderr": 0.0018830672668286648, "rouge2_precision": 0.22689564783747476, "rouge2_precision_stderr": 0.0024061538562456454, "rouge2_recall": 0.18052137985902375, "rouge2_recall_stderr": 0.0020545376786729216, "rougeL_fmeasure": 0.3084688443741967, "rougeL_fmeasure_stderr": 0.0019428767162060765, "rougeL_precision": 0.36864728762379756, "rougeL_precision_stderr": 0.002713281513726948, "rougeL_recall": 0.29363759779765686, "rougeL_recall_stderr": 0.0023376030038032096, "rougeLsum_fmeasure": 0.3468756867188751, "rougeLsum_fmeasure_stderr": 0.0022023250835616795, "rougeLsum_precision": 0.4127256111014694, "rougeLsum_precision_stderr": 0.002948793559219785, "rougeLsum_recall": 0.33058511824023096, "rougeLsum_recall_stderr": 0.0026356389474789365}}, "2": {"generate_text_restaurant": {"bleu": 12.301413822806838, "bleu_stderr": 0.20262240014836314, "rouge1_fmeasure": 0.44741112713767206, "rouge1_fmeasure_stderr": 0.0022577398764700904, "rouge1_precision": 0.5399575561130455, "rouge1_precision_stderr": 0.003266325155497623, "rouge1_recall": 0.4213105802567709, "rouge1_recall_stderr": 0.002885247215704974, "rouge2_fmeasure": 0.21548365533974562, "rouge2_fmeasure_stderr": 0.0019747739093204414, "rouge2_precision": 0.26499561529306176, "rouge2_precision_stderr": 0.002709454676436721, "rouge2_recall": 0.20275553070716468, "rouge2_recall_stderr": 0.0021379579214280894, "rougeL_fmeasure": 0.33576511255116304, "rougeL_fmeasure_stderr": 0.0019955199500755402, "rougeL_precision": 0.4079476620735534, "rougeL_precision_stderr": 0.0029508638184802447, "rougeL_recall": 0.31544746800641166, "rougeL_recall_stderr": 0.0023905576125930693, "rougeLsum_fmeasure": 0.3739342428197366, "rougeLsum_fmeasure_stderr": 0.0022272664682864217, "rougeLsum_precision": 0.45262380773022504, "rougeLsum_precision_stderr": 0.0031661976419364917, "rougeLsum_recall": 0.3517025086244611, "rougeLsum_recall_stderr": 0.002668549137492389}}, "3": {"generate_text_restaurant": {"bleu": 12.735025253733303, "bleu_stderr": 0.1543696231726414, "rouge1_fmeasure": 0.453141053851804, "rouge1_fmeasure_stderr": 0.002219078490330037, "rouge1_precision": 0.5522600594628029, "rouge1_precision_stderr": 0.0032337983631537323, "rouge1_recall": 0.4216891031745554, "rouge1_recall_stderr": 0.0028154912712571967, "rouge2_fmeasure": 0.22078340091336748, "rouge2_fmeasure_stderr": 0.001984445730938754, "rouge2_precision": 0.2734460336080908, "rouge2_precision_stderr": 0.002687888305690842, "rouge2_recall": 0.2056138268240387, "rouge2_recall_stderr": 0.0021506806137587555, "rougeL_fmeasure": 0.3441511648112412, "rougeL_fmeasure_stderr": 0.0020173164636790013, "rougeL_precision": 0.42214813908034704, "rougeL_precision_stderr": 0.002984968214396239, "rougeL_recall": 0.3194742391133158, "rougeL_recall_stderr": 0.002376039465602172, "rougeLsum_fmeasure": 0.38306881886799177, "rougeLsum_fmeasure_stderr": 0.0022312065315876588, "rougeLsum_precision": 0.4678001402841005, "rougeLsum_precision_stderr": 0.003166148782356431, "rougeLsum_recall": 0.3561146609929946, "rougeLsum_recall_stderr": 0.002639031001812239}}, "4": {"generate_text_restaurant": {"bleu": 12.980269120476393, "bleu_stderr": 0.15679846983981172, "rouge1_fmeasure": 0.45385752995359196, "rouge1_fmeasure_stderr": 0.0022261159480431664, "rouge1_precision": 0.5540966162409572, "rouge1_precision_stderr": 0.0032485268958775157, "rouge1_recall": 0.4193651329533393, "rouge1_recall_stderr": 0.0027376128022198886, "rouge2_fmeasure": 0.22319746312995775, "rouge2_fmeasure_stderr": 0.001997523485956697, "rouge2_precision": 0.27761999296577594, "rouge2_precision_stderr": 0.00272673637476465, "rouge2_recall": 0.2057871362479186, "rouge2_recall_stderr": 0.002107586634613798, "rougeL_fmeasure": 0.3466328962197939, "rougeL_fmeasure_stderr": 0.002037579341369906, "rougeL_precision": 0.42526614941701973, "rougeL_precision_stderr": 0.0029723620616554118, "rougeL_recall": 0.3197042271839403, "rougeL_recall_stderr": 0.0023489094620180295, "rougeLsum_fmeasure": 0.3846017339795443, "rougeLsum_fmeasure_stderr": 0.0022611591223662183, "rougeLsum_precision": 0.46993141251157605, "rougeLsum_precision_stderr": 0.0031752194176965357, "rougeLsum_recall": 0.3554430255918313, "rougeLsum_recall_stderr": 0.002627784362118594}}, "5": {"generate_text_restaurant": {"bleu": 12.969399191953066, "bleu_stderr": 0.19941195172606874, "rouge1_fmeasure": 0.45648227298778776, "rouge1_fmeasure_stderr": 0.002201391493069159, "rouge1_precision": 0.5599067602424606, "rouge1_precision_stderr": 0.003248452924564786, "rouge1_recall": 0.4190398684201684, "rouge1_recall_stderr": 0.0026925252692156097, "rouge2_fmeasure": 0.2252413723142184, "rouge2_fmeasure_stderr": 0.0020001196385859236, "rouge2_precision": 0.28210620578099194, "rouge2_precision_stderr": 0.0027749512966293586, "rouge2_recall": 0.20583337920488634, "rouge2_recall_stderr": 0.0020734330299209463, "rougeL_fmeasure": 0.34937543650436564, "rougeL_fmeasure_stderr": 0.0020568420192510293, "rougeL_precision": 0.43041795385401294, "rougeL_precision_stderr": 0.0030021706663444104, "rougeL_recall": 0.32016551351833494, "rougeL_recall_stderr": 0.0023326595557985243, "rougeLsum_fmeasure": 0.3872005526696519, "rougeLsum_fmeasure_stderr": 0.0022387438546437395, "rougeLsum_precision": 0.47570615877600253, "rougeLsum_precision_stderr": 0.0031943178349575175, "rougeLsum_recall": 0.35522919936052744, "rougeLsum_recall_stderr": 0.0025627959197265897}}}, "gem_xsum": {"0": {"article_DOC_summary": {"bleu": 1.946115392938811, "bleu_stderr": 0.05712427893814158, "rouge1_fmeasure": 0.20822893238963622, "rouge1_fmeasure_stderr": 0.0025732099733206905, "rouge1_precision": 0.15996807239746452, "rouge1_precision_stderr": 0.002287623127618875, "rouge1_recall": 0.3348486397737723, "rouge1_recall_stderr": 0.004426564044230394, "rouge2_fmeasure": 0.04662550924488616, "rouge2_fmeasure_stderr": 0.0015808380503390949, "rouge2_precision": 0.03502290300971957, "rouge2_precision_stderr": 0.0012502860060335647, "rouge2_recall": 0.07878986425845928, "rouge2_recall_stderr": 0.0027497076817277815, "rougeL_fmeasure": 0.15716969719770496, "rougeL_fmeasure_stderr": 0.001890560693868338, "rougeL_precision": 0.12042436779200985, "rougeL_precision_stderr": 0.0016727906904202222, "rougeL_recall": 0.2547012746203713, "rougeL_recall_stderr": 0.0034518315416116534, "rougeLsum_fmeasure": 0.16384073920324538, "rougeLsum_fmeasure_stderr": 0.0021628411307558727, "rougeLsum_precision": 0.12509348444149254, "rougeLsum_precision_stderr": 0.0018171135290296517, "rougeLsum_recall": 0.26636152259041823, "rougeLsum_recall_stderr": 0.003922514826770532}}, "1": {"article_DOC_summary": {"bleu": 1.225124596618229, "bleu_stderr": 0.055943295576705576, "rouge1_fmeasure": 0.17078360208253435, "rouge1_fmeasure_stderr": 0.00231048386226248, "rouge1_precision": 0.12139290293794137, "rouge1_precision_stderr": 0.0017186435592393995, "rouge1_recall": 0.30012669343441956, "rouge1_recall_stderr": 0.003961780861183236, "rouge2_fmeasure": 0.03221746451360407, "rouge2_fmeasure_stderr": 0.0012284696045860488, "rouge2_precision": 0.02269523637794389, "rouge2_precision_stderr": 0.0008701271780094139, "rouge2_recall": 0.05820863019650622, "rouge2_recall_stderr": 0.002317649222526862, "rougeL_fmeasure": 0.1326696954050304, "rougeL_fmeasure_stderr": 0.0017050900928064741, "rougeL_precision": 0.09406591592734283, "rougeL_precision_stderr": 0.0012562940275390134, "rougeL_recall": 0.23499863397575185, "rougeL_recall_stderr": 0.003092447038121959, "rougeLsum_fmeasure": 0.1367237105605167, "rougeLsum_fmeasure_stderr": 0.0018683833664150447, "rougeLsum_precision": 0.09691207312328678, "rougeLsum_precision_stderr": 0.0013715038329889693, "rougeLsum_recall": 0.24201399682685507, "rougeLsum_recall_stderr": 0.0033300150361477363}}, "2": {"article_DOC_summary": {"bleu": 1.2269137145831717, "bleu_stderr": 0.034355594509920095, "rouge1_fmeasure": 0.16695694916198175, "rouge1_fmeasure_stderr": 0.002323924165593795, "rouge1_precision": 0.11848036366865702, "rouge1_precision_stderr": 0.0017246954146993116, "rouge1_recall": 0.29422169708209367, "rouge1_recall_stderr": 0.0039719116230109855, "rouge2_fmeasure": 0.03165272210206821, "rouge2_fmeasure_stderr": 0.0013070047041779823, "rouge2_precision": 0.02223074116005843, "rouge2_precision_stderr": 0.0009192612875289195, "rouge2_recall": 0.05711930773667224, "rouge2_recall_stderr": 0.0024203482856540786, "rougeL_fmeasure": 0.13096007444561938, "rougeL_fmeasure_stderr": 0.001758251379995231, "rougeL_precision": 0.09275491223725246, "rougeL_precision_stderr": 0.001294346257886036, "rougeL_recall": 0.23228470677963386, "rougeL_recall_stderr": 0.003140444207327633, "rougeLsum_fmeasure": 0.13519296873601647, "rougeLsum_fmeasure_stderr": 0.0019247440048122983, "rougeLsum_precision": 0.09564362984293287, "rougeLsum_precision_stderr": 0.0014057525880751447, "rougeLsum_recall": 0.24031553341263567, "rougeLsum_recall_stderr": 0.003453370614981004}}, "3": {"article_DOC_summary": {"bleu": 1.222710009995911, "bleu_stderr": 0.07496047751936895, "rouge1_fmeasure": 0.1615135868779298, "rouge1_fmeasure_stderr": 0.002470602580719195, "rouge1_precision": 0.1173850576603971, "rouge1_precision_stderr": 0.0019398969338036355, "rouge1_recall": 0.2787275834874179, "rouge1_recall_stderr": 0.004195016676211932, "rouge2_fmeasure": 0.029224092135722014, "rouge2_fmeasure_stderr": 0.001297780921679781, "rouge2_precision": 0.020922413744292524, "rouge2_precision_stderr": 0.0009719043275883042, "rouge2_recall": 0.052023279960273304, "rouge2_recall_stderr": 0.002365693856611846, "rougeL_fmeasure": 0.1256992016836946, "rougeL_fmeasure_stderr": 0.0018584540223603558, "rougeL_precision": 0.09118615638595852, "rougeL_precision_stderr": 0.001472658898846434, "rougeL_recall": 0.2185043098420338, "rougeL_recall_stderr": 0.0032923416071559855, "rougeLsum_fmeasure": 0.12975495548659838, "rougeLsum_fmeasure_stderr": 0.002051211253222052, "rougeLsum_precision": 0.09425406240290829, "rougeLsum_precision_stderr": 0.0016210406647485212, "rougeLsum_recall": 0.2252382766073172, "rougeLsum_recall_stderr": 0.003589188921251769}}, "4": {"article_DOC_summary": {"bleu": 0.5349754476502416, "bleu_stderr": 0.08512835589666831, "rouge1_fmeasure": 0.04449306237674175, "rouge1_fmeasure_stderr": 0.002539825023521398, "rouge1_precision": 0.03826276388948949, "rouge1_precision_stderr": 0.0024801127678932854, "rouge1_recall": 0.06926541130759056, "rouge1_recall_stderr": 0.003978351037602963, "rouge2_fmeasure": 0.007503219745005152, "rouge2_fmeasure_stderr": 0.0008079284857284853, "rouge2_precision": 0.006320743361780383, "rouge2_precision_stderr": 0.0008990691695882139, "rouge2_recall": 0.012019413723960436, "rouge2_recall_stderr": 0.00124174296434464, "rougeL_fmeasure": 0.034713569625035294, "rougeL_fmeasure_stderr": 0.0019657575195258263, "rougeL_precision": 0.03046353149195033, "rougeL_precision_stderr": 0.0021011732217140183, "rougeL_recall": 0.05431907246732738, "rougeL_recall_stderr": 0.0031184241903121235, "rougeLsum_fmeasure": 0.03737907165768346, "rougeLsum_fmeasure_stderr": 0.002128608781763403, "rougeLsum_precision": 0.032690090587705486, "rougeLsum_precision_stderr": 0.002217377898664004, "rougeLsum_recall": 0.05835102596740354, "rougeLsum_recall_stderr": 0.0033728750674449325}}, "5": {"article_DOC_summary": {"bleu": 9.902494720086563e-38, "bleu_stderr": 1.8754348008316148e-32, "rouge1_fmeasure": 0.0022847697499340787, "rouge1_fmeasure_stderr": 0.0006896753720124607, "rouge1_precision": 0.002520510024599048, "rouge1_precision_stderr": 0.0007565625350760487, "rouge1_recall": 0.0021570243419933768, "rouge1_recall_stderr": 0.000655053772708356, "rouge2_fmeasure": 0.00018332767216436527, "rouge2_fmeasure_stderr": 9.192832778053751e-05, "rouge2_precision": 0.00019899326225630333, "rouge2_precision_stderr": 9.93981067283471e-05, "rouge2_recall": 0.00017192081565000543, "rouge2_recall_stderr": 8.690421994979313e-05, "rougeL_fmeasure": 0.0017523449957555426, "rougeL_fmeasure_stderr": 0.0004894311950718817, "rougeL_precision": 0.0019439458549511707, "rougeL_precision_stderr": 0.0005457370791986779, "rougeL_recall": 0.0016548236956347244, "rougeL_recall_stderr": 0.0004639416377074074, "rougeLsum_fmeasure": 0.0019247498547093978, "rougeLsum_fmeasure_stderr": 0.0005375495150800006, "rougeLsum_precision": 0.0021468629280125277, "rougeLsum_precision_stderr": 0.0006084449815322187, "rougeLsum_recall": 0.0018062594366931065, "rougeLsum_recall_stderr": 0.0005018570788553906}}}}
|
2b855b18bc4seed1/evaluation/generation/slim.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_1.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/web_nlg_en",
|
5 |
+
"prompt_name": "PALM_prompt",
|
6 |
+
"bleu": 0.440505975058967,
|
7 |
+
"dataset_path": "GEM/web_nlg",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"bleu_stderr": 0.03925926885568198
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/web_nlg_en",
|
14 |
+
"prompt_name": "PALM_prompt",
|
15 |
+
"rouge1_precision": 0.07303258540385325,
|
16 |
+
"dataset_path": "GEM/web_nlg",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_precision_stderr": 0.0013956948759738636
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/web_nlg_en",
|
23 |
+
"prompt_name": "PALM_prompt",
|
24 |
+
"rouge1_recall": 0.36507400240791016,
|
25 |
+
"dataset_path": "GEM/web_nlg",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_recall_stderr": 0.005207277796989448
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/web_nlg_en",
|
32 |
+
"prompt_name": "PALM_prompt",
|
33 |
+
"rouge1_fmeasure": 0.11401113521512295,
|
34 |
+
"dataset_path": "GEM/web_nlg",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge1_fmeasure_stderr": 0.0019030453976470602
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/web_nlg_en",
|
41 |
+
"prompt_name": "PALM_prompt",
|
42 |
+
"rouge2_precision": 0.03358434632060951,
|
43 |
+
"dataset_path": "GEM/web_nlg",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_precision_stderr": 0.000842389625141015
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/web_nlg_en",
|
50 |
+
"prompt_name": "PALM_prompt",
|
51 |
+
"rouge2_recall": 0.17329341204300425,
|
52 |
+
"dataset_path": "GEM/web_nlg",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_recall_stderr": 0.0034450528353974098
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/web_nlg_en",
|
59 |
+
"prompt_name": "PALM_prompt",
|
60 |
+
"rouge2_fmeasure": 0.05247257816338737,
|
61 |
+
"dataset_path": "GEM/web_nlg",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rouge2_fmeasure_stderr": 0.0011827703450859254
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/web_nlg_en",
|
68 |
+
"prompt_name": "PALM_prompt",
|
69 |
+
"rougeL_precision": 0.06874932087487405,
|
70 |
+
"dataset_path": "GEM/web_nlg",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_precision_stderr": 0.0012613572237216058
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/web_nlg_en",
|
77 |
+
"prompt_name": "PALM_prompt",
|
78 |
+
"rougeL_recall": 0.3423137073673857,
|
79 |
+
"dataset_path": "GEM/web_nlg",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_recall_stderr": 0.004722054196952728
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/web_nlg_en",
|
86 |
+
"prompt_name": "PALM_prompt",
|
87 |
+
"rougeL_fmeasure": 0.10741785748120532,
|
88 |
+
"dataset_path": "GEM/web_nlg",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeL_fmeasure_stderr": 0.0017301490825739295
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/web_nlg_en",
|
95 |
+
"prompt_name": "PALM_prompt",
|
96 |
+
"rougeLsum_precision": 0.0696948101540746,
|
97 |
+
"dataset_path": "GEM/web_nlg",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_precision_stderr": 0.001324079492115929
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/web_nlg_en",
|
104 |
+
"prompt_name": "PALM_prompt",
|
105 |
+
"rougeLsum_recall": 0.34613176577415133,
|
106 |
+
"dataset_path": "GEM/web_nlg",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_recall_stderr": 0.004811317119389049
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/web_nlg_en",
|
113 |
+
"prompt_name": "PALM_prompt",
|
114 |
+
"rougeLsum_fmeasure": 0.10872891126332167,
|
115 |
+
"dataset_path": "GEM/web_nlg",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"rougeLsum_fmeasure_stderr": 0.0018051794462450466
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b18bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 1,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
2b855b18bc4seed1/evaluation/generation/slim.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_2.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/web_nlg_en",
|
5 |
+
"prompt_name": "PALM_prompt",
|
6 |
+
"bleu": 0.46666858091535174,
|
7 |
+
"dataset_path": "GEM/web_nlg",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"bleu_stderr": 0.018833413013635402
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/web_nlg_en",
|
14 |
+
"prompt_name": "PALM_prompt",
|
15 |
+
"rouge1_precision": 0.07341298925567588,
|
16 |
+
"dataset_path": "GEM/web_nlg",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_precision_stderr": 0.0013121366271023323
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/web_nlg_en",
|
23 |
+
"prompt_name": "PALM_prompt",
|
24 |
+
"rouge1_recall": 0.3800249377740647,
|
25 |
+
"dataset_path": "GEM/web_nlg",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_recall_stderr": 0.005112909181278494
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/web_nlg_en",
|
32 |
+
"prompt_name": "PALM_prompt",
|
33 |
+
"rouge1_fmeasure": 0.11510966757275029,
|
34 |
+
"dataset_path": "GEM/web_nlg",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge1_fmeasure_stderr": 0.0017858390071285543
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/web_nlg_en",
|
41 |
+
"prompt_name": "PALM_prompt",
|
42 |
+
"rouge2_precision": 0.03360825336454427,
|
43 |
+
"dataset_path": "GEM/web_nlg",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_precision_stderr": 0.0007932424750607851
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/web_nlg_en",
|
50 |
+
"prompt_name": "PALM_prompt",
|
51 |
+
"rouge2_recall": 0.1862779416722373,
|
52 |
+
"dataset_path": "GEM/web_nlg",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_recall_stderr": 0.0036516850220037643
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/web_nlg_en",
|
59 |
+
"prompt_name": "PALM_prompt",
|
60 |
+
"rouge2_fmeasure": 0.05294107579757583,
|
61 |
+
"dataset_path": "GEM/web_nlg",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rouge2_fmeasure_stderr": 0.001118710876229426
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/web_nlg_en",
|
68 |
+
"prompt_name": "PALM_prompt",
|
69 |
+
"rougeL_precision": 0.0688709827206235,
|
70 |
+
"dataset_path": "GEM/web_nlg",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_precision_stderr": 0.001210999027849737
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/web_nlg_en",
|
77 |
+
"prompt_name": "PALM_prompt",
|
78 |
+
"rougeL_recall": 0.3529927082231747,
|
79 |
+
"dataset_path": "GEM/web_nlg",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_recall_stderr": 0.004590770405531237
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/web_nlg_en",
|
86 |
+
"prompt_name": "PALM_prompt",
|
87 |
+
"rougeL_fmeasure": 0.10791868479229756,
|
88 |
+
"dataset_path": "GEM/web_nlg",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeL_fmeasure_stderr": 0.0016523165241163688
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/web_nlg_en",
|
95 |
+
"prompt_name": "PALM_prompt",
|
96 |
+
"rougeLsum_precision": 0.07001967212649517,
|
97 |
+
"dataset_path": "GEM/web_nlg",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_precision_stderr": 0.0012551792002818921
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/web_nlg_en",
|
104 |
+
"prompt_name": "PALM_prompt",
|
105 |
+
"rougeLsum_recall": 0.35981102966802486,
|
106 |
+
"dataset_path": "GEM/web_nlg",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_recall_stderr": 0.004723476880811747
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/web_nlg_en",
|
113 |
+
"prompt_name": "PALM_prompt",
|
114 |
+
"rougeLsum_fmeasure": 0.10968844522778035,
|
115 |
+
"dataset_path": "GEM/web_nlg",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"rougeLsum_fmeasure_stderr": 0.0017066636269961225
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b18bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 2,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
2b855b18bc4seed1/evaluation/generation/slim.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_3.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/web_nlg_en",
|
5 |
+
"prompt_name": "PALM_prompt",
|
6 |
+
"bleu": 0.5148612937193335,
|
7 |
+
"dataset_path": "GEM/web_nlg",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"bleu_stderr": 0.023258764360096086
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/web_nlg_en",
|
14 |
+
"prompt_name": "PALM_prompt",
|
15 |
+
"rouge1_precision": 0.0751652044966289,
|
16 |
+
"dataset_path": "GEM/web_nlg",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_precision_stderr": 0.0013667007055530862
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/web_nlg_en",
|
23 |
+
"prompt_name": "PALM_prompt",
|
24 |
+
"rouge1_recall": 0.38579633865658397,
|
25 |
+
"dataset_path": "GEM/web_nlg",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_recall_stderr": 0.005096444006387486
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/web_nlg_en",
|
32 |
+
"prompt_name": "PALM_prompt",
|
33 |
+
"rouge1_fmeasure": 0.11777172363016282,
|
34 |
+
"dataset_path": "GEM/web_nlg",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge1_fmeasure_stderr": 0.0018284085963843188
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/web_nlg_en",
|
41 |
+
"prompt_name": "PALM_prompt",
|
42 |
+
"rouge2_precision": 0.03468953327441809,
|
43 |
+
"dataset_path": "GEM/web_nlg",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_precision_stderr": 0.0008291540847161602
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/web_nlg_en",
|
50 |
+
"prompt_name": "PALM_prompt",
|
51 |
+
"rouge2_recall": 0.19053185137760673,
|
52 |
+
"dataset_path": "GEM/web_nlg",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_recall_stderr": 0.0036905598739581467
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/web_nlg_en",
|
59 |
+
"prompt_name": "PALM_prompt",
|
60 |
+
"rouge2_fmeasure": 0.05463630172665221,
|
61 |
+
"dataset_path": "GEM/web_nlg",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rouge2_fmeasure_stderr": 0.0011564896623965845
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/web_nlg_en",
|
68 |
+
"prompt_name": "PALM_prompt",
|
69 |
+
"rougeL_precision": 0.07035911446447023,
|
70 |
+
"dataset_path": "GEM/web_nlg",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_precision_stderr": 0.0012335375128364402
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/web_nlg_en",
|
77 |
+
"prompt_name": "PALM_prompt",
|
78 |
+
"rougeL_recall": 0.3580868532189359,
|
79 |
+
"dataset_path": "GEM/web_nlg",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_recall_stderr": 0.004531834139292797
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/web_nlg_en",
|
86 |
+
"prompt_name": "PALM_prompt",
|
87 |
+
"rougeL_fmeasure": 0.11025599769233292,
|
88 |
+
"dataset_path": "GEM/web_nlg",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeL_fmeasure_stderr": 0.0016650757546179005
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/web_nlg_en",
|
95 |
+
"prompt_name": "PALM_prompt",
|
96 |
+
"rougeLsum_precision": 0.07160900018554582,
|
97 |
+
"dataset_path": "GEM/web_nlg",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_precision_stderr": 0.0012846552736932328
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/web_nlg_en",
|
104 |
+
"prompt_name": "PALM_prompt",
|
105 |
+
"rougeLsum_recall": 0.36585158014383645,
|
106 |
+
"dataset_path": "GEM/web_nlg",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_recall_stderr": 0.0047053685337424276
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/web_nlg_en",
|
113 |
+
"prompt_name": "PALM_prompt",
|
114 |
+
"rougeLsum_fmeasure": 0.1122101583306418,
|
115 |
+
"dataset_path": "GEM/web_nlg",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"rougeLsum_fmeasure_stderr": 0.0017262902999793785
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b18bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 3,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
2b855b18bc4seed1/evaluation/generation/slim.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_4.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/web_nlg_en",
|
5 |
+
"prompt_name": "PALM_prompt",
|
6 |
+
"bleu": 0.5647418608164382,
|
7 |
+
"dataset_path": "GEM/web_nlg",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"bleu_stderr": 0.02321598077608985
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/web_nlg_en",
|
14 |
+
"prompt_name": "PALM_prompt",
|
15 |
+
"rouge1_precision": 0.07569359025980668,
|
16 |
+
"dataset_path": "GEM/web_nlg",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_precision_stderr": 0.0013630553991433284
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/web_nlg_en",
|
23 |
+
"prompt_name": "PALM_prompt",
|
24 |
+
"rouge1_recall": 0.3976443232379198,
|
25 |
+
"dataset_path": "GEM/web_nlg",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_recall_stderr": 0.0049461871286812726
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/web_nlg_en",
|
32 |
+
"prompt_name": "PALM_prompt",
|
33 |
+
"rouge1_fmeasure": 0.11875570085857609,
|
34 |
+
"dataset_path": "GEM/web_nlg",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge1_fmeasure_stderr": 0.001786404318427974
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/web_nlg_en",
|
41 |
+
"prompt_name": "PALM_prompt",
|
42 |
+
"rouge2_precision": 0.03486410172214847,
|
43 |
+
"dataset_path": "GEM/web_nlg",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_precision_stderr": 0.0008176827715067252
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/web_nlg_en",
|
50 |
+
"prompt_name": "PALM_prompt",
|
51 |
+
"rouge2_recall": 0.1970252853734279,
|
52 |
+
"dataset_path": "GEM/web_nlg",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_recall_stderr": 0.003650596187090204
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/web_nlg_en",
|
59 |
+
"prompt_name": "PALM_prompt",
|
60 |
+
"rouge2_fmeasure": 0.054985463513762434,
|
61 |
+
"dataset_path": "GEM/web_nlg",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rouge2_fmeasure_stderr": 0.001122119667177031
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/web_nlg_en",
|
68 |
+
"prompt_name": "PALM_prompt",
|
69 |
+
"rougeL_precision": 0.06997073281118363,
|
70 |
+
"dataset_path": "GEM/web_nlg",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_precision_stderr": 0.001205133821327662
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/web_nlg_en",
|
77 |
+
"prompt_name": "PALM_prompt",
|
78 |
+
"rougeL_recall": 0.3649664345785377,
|
79 |
+
"dataset_path": "GEM/web_nlg",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_recall_stderr": 0.0044083367200935175
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/web_nlg_en",
|
86 |
+
"prompt_name": "PALM_prompt",
|
87 |
+
"rougeL_fmeasure": 0.10987977139003118,
|
88 |
+
"dataset_path": "GEM/web_nlg",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeL_fmeasure_stderr": 0.0016089540199373295
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/web_nlg_en",
|
95 |
+
"prompt_name": "PALM_prompt",
|
96 |
+
"rougeLsum_precision": 0.07185747380368572,
|
97 |
+
"dataset_path": "GEM/web_nlg",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_precision_stderr": 0.001285044701214991
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/web_nlg_en",
|
104 |
+
"prompt_name": "PALM_prompt",
|
105 |
+
"rougeLsum_recall": 0.3764949005356292,
|
106 |
+
"dataset_path": "GEM/web_nlg",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_recall_stderr": 0.0045956668994714435
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/web_nlg_en",
|
113 |
+
"prompt_name": "PALM_prompt",
|
114 |
+
"rougeLsum_fmeasure": 0.11268637889960822,
|
115 |
+
"dataset_path": "GEM/web_nlg",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"rougeLsum_fmeasure_stderr": 0.0016883140502033781
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b18bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 4,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
2b855b18bc4seed1/evaluation/generation/slim.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_5.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/web_nlg_en",
|
5 |
+
"prompt_name": "PALM_prompt",
|
6 |
+
"bleu": 0.5482714677989973,
|
7 |
+
"dataset_path": "GEM/web_nlg",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"bleu_stderr": 0.021675276789228544
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/web_nlg_en",
|
14 |
+
"prompt_name": "PALM_prompt",
|
15 |
+
"rouge1_precision": 0.0762060675085337,
|
16 |
+
"dataset_path": "GEM/web_nlg",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_precision_stderr": 0.001355574274158061
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/web_nlg_en",
|
23 |
+
"prompt_name": "PALM_prompt",
|
24 |
+
"rouge1_recall": 0.4042879305483473,
|
25 |
+
"dataset_path": "GEM/web_nlg",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_recall_stderr": 0.005164234121569231
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/web_nlg_en",
|
32 |
+
"prompt_name": "PALM_prompt",
|
33 |
+
"rouge1_fmeasure": 0.1197570139047874,
|
34 |
+
"dataset_path": "GEM/web_nlg",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge1_fmeasure_stderr": 0.001779946242752045
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/web_nlg_en",
|
41 |
+
"prompt_name": "PALM_prompt",
|
42 |
+
"rouge2_precision": 0.03517530439964603,
|
43 |
+
"dataset_path": "GEM/web_nlg",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_precision_stderr": 0.0008100317312804931
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/web_nlg_en",
|
50 |
+
"prompt_name": "PALM_prompt",
|
51 |
+
"rouge2_recall": 0.20231681062642728,
|
52 |
+
"dataset_path": "GEM/web_nlg",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_recall_stderr": 0.0038157106473818547
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/web_nlg_en",
|
59 |
+
"prompt_name": "PALM_prompt",
|
60 |
+
"rouge2_fmeasure": 0.05563099955118052,
|
61 |
+
"dataset_path": "GEM/web_nlg",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rouge2_fmeasure_stderr": 0.0011201364188582906
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/web_nlg_en",
|
68 |
+
"prompt_name": "PALM_prompt",
|
69 |
+
"rougeL_precision": 0.0704939357267132,
|
70 |
+
"dataset_path": "GEM/web_nlg",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_precision_stderr": 0.001212222390700002
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/web_nlg_en",
|
77 |
+
"prompt_name": "PALM_prompt",
|
78 |
+
"rougeL_recall": 0.3699300529634866,
|
79 |
+
"dataset_path": "GEM/web_nlg",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_recall_stderr": 0.00453694448261558
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/web_nlg_en",
|
86 |
+
"prompt_name": "PALM_prompt",
|
87 |
+
"rougeL_fmeasure": 0.11073414338981681,
|
88 |
+
"dataset_path": "GEM/web_nlg",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeL_fmeasure_stderr": 0.0016091973313620345
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/web_nlg_en",
|
95 |
+
"prompt_name": "PALM_prompt",
|
96 |
+
"rougeLsum_precision": 0.07217372171442632,
|
97 |
+
"dataset_path": "GEM/web_nlg",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_precision_stderr": 0.0012650450721543114
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/web_nlg_en",
|
104 |
+
"prompt_name": "PALM_prompt",
|
105 |
+
"rougeLsum_recall": 0.3805359472242292,
|
106 |
+
"dataset_path": "GEM/web_nlg",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_recall_stderr": 0.004740284500553459
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/web_nlg_en",
|
113 |
+
"prompt_name": "PALM_prompt",
|
114 |
+
"rougeLsum_fmeasure": 0.11336245134873023,
|
115 |
+
"dataset_path": "GEM/web_nlg",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"rougeLsum_fmeasure_stderr": 0.0016677643371672299
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b18bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 5,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
2b855b18bc4seed1/evaluation/generation/slim.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_2.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/wiki_lingua_en",
|
5 |
+
"prompt_name": "tldr_en",
|
6 |
+
"rouge1_precision": 0.1845535355915783,
|
7 |
+
"dataset_path": "GEM/wiki_lingua",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"rouge1_precision_stderr": 0.0024946135883453604
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/wiki_lingua_en",
|
14 |
+
"prompt_name": "tldr_en",
|
15 |
+
"rouge1_recall": 0.2695517129175979,
|
16 |
+
"dataset_path": "GEM/wiki_lingua",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_recall_stderr": 0.0026211468275759665
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/wiki_lingua_en",
|
23 |
+
"prompt_name": "tldr_en",
|
24 |
+
"rouge1_fmeasure": 0.19430590275766726,
|
25 |
+
"dataset_path": "GEM/wiki_lingua",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_fmeasure_stderr": 0.0018344032028598204
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/wiki_lingua_en",
|
32 |
+
"prompt_name": "tldr_en",
|
33 |
+
"rouge2_precision": 0.04458948641409641,
|
34 |
+
"dataset_path": "GEM/wiki_lingua",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge2_precision_stderr": 0.0014120706650682048
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/wiki_lingua_en",
|
41 |
+
"prompt_name": "tldr_en",
|
42 |
+
"rouge2_recall": 0.062060456824661644,
|
43 |
+
"dataset_path": "GEM/wiki_lingua",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_recall_stderr": 0.0014736004894038064
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/wiki_lingua_en",
|
50 |
+
"prompt_name": "tldr_en",
|
51 |
+
"rouge2_fmeasure": 0.04377271235002487,
|
52 |
+
"dataset_path": "GEM/wiki_lingua",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_fmeasure_stderr": 0.0009267663731236799
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/wiki_lingua_en",
|
59 |
+
"prompt_name": "tldr_en",
|
60 |
+
"rougeL_precision": 0.13779046902481237,
|
61 |
+
"dataset_path": "GEM/wiki_lingua",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rougeL_precision_stderr": 0.00197834834988283
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/wiki_lingua_en",
|
68 |
+
"prompt_name": "tldr_en",
|
69 |
+
"rougeL_recall": 0.20396222281462895,
|
70 |
+
"dataset_path": "GEM/wiki_lingua",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_recall_stderr": 0.0020998128273290364
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/wiki_lingua_en",
|
77 |
+
"prompt_name": "tldr_en",
|
78 |
+
"rougeL_fmeasure": 0.1442517974455914,
|
79 |
+
"dataset_path": "GEM/wiki_lingua",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_fmeasure_stderr": 0.0012894698921139642
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/wiki_lingua_en",
|
86 |
+
"prompt_name": "tldr_en",
|
87 |
+
"rougeLsum_precision": 0.1735217764883826,
|
88 |
+
"dataset_path": "GEM/wiki_lingua",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeLsum_precision_stderr": 0.002385080324318098
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/wiki_lingua_en",
|
95 |
+
"prompt_name": "tldr_en",
|
96 |
+
"rougeLsum_recall": 0.2539494164062588,
|
97 |
+
"dataset_path": "GEM/wiki_lingua",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_recall_stderr": 0.002490453523195178
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/wiki_lingua_en",
|
104 |
+
"prompt_name": "tldr_en",
|
105 |
+
"rougeLsum_fmeasure": 0.18239177790885439,
|
106 |
+
"dataset_path": "GEM/wiki_lingua",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_fmeasure_stderr": 0.0017083970519236546
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/wiki_lingua_en",
|
113 |
+
"prompt_name": "tldr_en",
|
114 |
+
"bleu": 2.1278736826393443,
|
115 |
+
"dataset_path": "GEM/wiki_lingua",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"bleu_stderr": 0.060513597002875216
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b18bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 2,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
2b855b18bc4seed1/evaluation/generation/slim.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_3.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/wiki_lingua_en",
|
5 |
+
"prompt_name": "tldr_en",
|
6 |
+
"rouge1_precision": 0.16344143511311807,
|
7 |
+
"dataset_path": "GEM/wiki_lingua",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"rouge1_precision_stderr": 0.0028639585038610004
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/wiki_lingua_en",
|
14 |
+
"prompt_name": "tldr_en",
|
15 |
+
"rouge1_recall": 0.21705752761660305,
|
16 |
+
"dataset_path": "GEM/wiki_lingua",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_recall_stderr": 0.003046622002813931
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/wiki_lingua_en",
|
23 |
+
"prompt_name": "tldr_en",
|
24 |
+
"rouge1_fmeasure": 0.15879291411941776,
|
25 |
+
"dataset_path": "GEM/wiki_lingua",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_fmeasure_stderr": 0.0020860678913316435
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/wiki_lingua_en",
|
32 |
+
"prompt_name": "tldr_en",
|
33 |
+
"rouge2_precision": 0.03946657591259567,
|
34 |
+
"dataset_path": "GEM/wiki_lingua",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge2_precision_stderr": 0.0015428944821802232
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/wiki_lingua_en",
|
41 |
+
"prompt_name": "tldr_en",
|
42 |
+
"rouge2_recall": 0.04950947134943765,
|
43 |
+
"dataset_path": "GEM/wiki_lingua",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_recall_stderr": 0.0014616851671373423
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/wiki_lingua_en",
|
50 |
+
"prompt_name": "tldr_en",
|
51 |
+
"rouge2_fmeasure": 0.03512846249009941,
|
52 |
+
"dataset_path": "GEM/wiki_lingua",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_fmeasure_stderr": 0.0008934035750431312
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/wiki_lingua_en",
|
59 |
+
"prompt_name": "tldr_en",
|
60 |
+
"rougeL_precision": 0.12569962559045028,
|
61 |
+
"dataset_path": "GEM/wiki_lingua",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rougeL_precision_stderr": 0.0023604226829550966
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/wiki_lingua_en",
|
68 |
+
"prompt_name": "tldr_en",
|
69 |
+
"rougeL_recall": 0.16607365892731357,
|
70 |
+
"dataset_path": "GEM/wiki_lingua",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_recall_stderr": 0.002405649884473064
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/wiki_lingua_en",
|
77 |
+
"prompt_name": "tldr_en",
|
78 |
+
"rougeL_fmeasure": 0.11995388090764494,
|
79 |
+
"dataset_path": "GEM/wiki_lingua",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_fmeasure_stderr": 0.001534190538296726
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/wiki_lingua_en",
|
86 |
+
"prompt_name": "tldr_en",
|
87 |
+
"rougeLsum_precision": 0.1539380841235904,
|
88 |
+
"dataset_path": "GEM/wiki_lingua",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeLsum_precision_stderr": 0.0027372639212194004
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/wiki_lingua_en",
|
95 |
+
"prompt_name": "tldr_en",
|
96 |
+
"rougeLsum_recall": 0.20424916839256455,
|
97 |
+
"dataset_path": "GEM/wiki_lingua",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_recall_stderr": 0.0028790801501679176
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/wiki_lingua_en",
|
104 |
+
"prompt_name": "tldr_en",
|
105 |
+
"rougeLsum_fmeasure": 0.1490069929693716,
|
106 |
+
"dataset_path": "GEM/wiki_lingua",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_fmeasure_stderr": 0.001947503662307039
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/wiki_lingua_en",
|
113 |
+
"prompt_name": "tldr_en",
|
114 |
+
"bleu": 1.9219980526586609,
|
115 |
+
"dataset_path": "GEM/wiki_lingua",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"bleu_stderr": 0.06329741681309228
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b18bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 3,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
2b855b18bc4seed1/evaluation/generation/slim.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_4.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/wiki_lingua_en",
|
5 |
+
"prompt_name": "tldr_en",
|
6 |
+
"rouge1_precision": 0.0571224955926628,
|
7 |
+
"dataset_path": "GEM/wiki_lingua",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"rouge1_precision_stderr": 0.0023662002964983195
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/wiki_lingua_en",
|
14 |
+
"prompt_name": "tldr_en",
|
15 |
+
"rouge1_recall": 0.07207313604028937,
|
16 |
+
"dataset_path": "GEM/wiki_lingua",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_recall_stderr": 0.0025952504300447
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/wiki_lingua_en",
|
23 |
+
"prompt_name": "tldr_en",
|
24 |
+
"rouge1_fmeasure": 0.05149268824953048,
|
25 |
+
"dataset_path": "GEM/wiki_lingua",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_fmeasure_stderr": 0.001797776534238523
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/wiki_lingua_en",
|
32 |
+
"prompt_name": "tldr_en",
|
33 |
+
"rouge2_precision": 0.013902404245260857,
|
34 |
+
"dataset_path": "GEM/wiki_lingua",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge2_precision_stderr": 0.0010495099961780859
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/wiki_lingua_en",
|
41 |
+
"prompt_name": "tldr_en",
|
42 |
+
"rouge2_recall": 0.017002286132095295,
|
43 |
+
"dataset_path": "GEM/wiki_lingua",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_recall_stderr": 0.0009860774655885615
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/wiki_lingua_en",
|
50 |
+
"prompt_name": "tldr_en",
|
51 |
+
"rouge2_fmeasure": 0.011715471831871544,
|
52 |
+
"dataset_path": "GEM/wiki_lingua",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_fmeasure_stderr": 0.000644787176192932
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/wiki_lingua_en",
|
59 |
+
"prompt_name": "tldr_en",
|
60 |
+
"rougeL_precision": 0.04528881825982844,
|
61 |
+
"dataset_path": "GEM/wiki_lingua",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rougeL_precision_stderr": 0.001980637805535765
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/wiki_lingua_en",
|
68 |
+
"prompt_name": "tldr_en",
|
69 |
+
"rougeL_recall": 0.056145552183085444,
|
70 |
+
"dataset_path": "GEM/wiki_lingua",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_recall_stderr": 0.002049147231321551
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/wiki_lingua_en",
|
77 |
+
"prompt_name": "tldr_en",
|
78 |
+
"rougeL_fmeasure": 0.039716516162972955,
|
79 |
+
"dataset_path": "GEM/wiki_lingua",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_fmeasure_stderr": 0.0013806219368805613
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/wiki_lingua_en",
|
86 |
+
"prompt_name": "tldr_en",
|
87 |
+
"rougeLsum_precision": 0.05374955984533459,
|
88 |
+
"dataset_path": "GEM/wiki_lingua",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeLsum_precision_stderr": 0.002257048632231794
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/wiki_lingua_en",
|
95 |
+
"prompt_name": "tldr_en",
|
96 |
+
"rougeLsum_recall": 0.06742318539230084,
|
97 |
+
"dataset_path": "GEM/wiki_lingua",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_recall_stderr": 0.002435351108083965
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/wiki_lingua_en",
|
104 |
+
"prompt_name": "tldr_en",
|
105 |
+
"rougeLsum_fmeasure": 0.04810176172739388,
|
106 |
+
"dataset_path": "GEM/wiki_lingua",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_fmeasure_stderr": 0.0016777266418722225
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/wiki_lingua_en",
|
113 |
+
"prompt_name": "tldr_en",
|
114 |
+
"bleu": 0.48857425145328925,
|
115 |
+
"dataset_path": "GEM/wiki_lingua",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"bleu_stderr": 0.03366566494624167
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b18bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 4,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
2b855b18bc4seed1/evaluation/generation/slim.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_5.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/wiki_lingua_en",
|
5 |
+
"prompt_name": "tldr_en",
|
6 |
+
"rouge1_precision": 0.009747204395211186,
|
7 |
+
"dataset_path": "GEM/wiki_lingua",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"rouge1_precision_stderr": 0.0012001063626629186
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/wiki_lingua_en",
|
14 |
+
"prompt_name": "tldr_en",
|
15 |
+
"rouge1_recall": 0.010818217770801558,
|
16 |
+
"dataset_path": "GEM/wiki_lingua",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_recall_stderr": 0.0011201653112752584
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/wiki_lingua_en",
|
23 |
+
"prompt_name": "tldr_en",
|
24 |
+
"rouge1_fmeasure": 0.007694154582353487,
|
25 |
+
"dataset_path": "GEM/wiki_lingua",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_fmeasure_stderr": 0.0007786894464533563
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/wiki_lingua_en",
|
32 |
+
"prompt_name": "tldr_en",
|
33 |
+
"rouge2_precision": 0.00346911836539752,
|
34 |
+
"dataset_path": "GEM/wiki_lingua",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge2_precision_stderr": 0.0007785617188891463
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/wiki_lingua_en",
|
41 |
+
"prompt_name": "tldr_en",
|
42 |
+
"rouge2_recall": 0.0028992540395902716,
|
43 |
+
"dataset_path": "GEM/wiki_lingua",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_recall_stderr": 0.0004889741309162621
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/wiki_lingua_en",
|
50 |
+
"prompt_name": "tldr_en",
|
51 |
+
"rouge2_fmeasure": 0.0020696459090992687,
|
52 |
+
"dataset_path": "GEM/wiki_lingua",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_fmeasure_stderr": 0.0003052852492354327
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/wiki_lingua_en",
|
59 |
+
"prompt_name": "tldr_en",
|
60 |
+
"rougeL_precision": 0.00820275902366193,
|
61 |
+
"dataset_path": "GEM/wiki_lingua",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rougeL_precision_stderr": 0.0010899421875228057
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/wiki_lingua_en",
|
68 |
+
"prompt_name": "tldr_en",
|
69 |
+
"rougeL_recall": 0.008616034645548207,
|
70 |
+
"dataset_path": "GEM/wiki_lingua",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_recall_stderr": 0.0009109421330906957
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/wiki_lingua_en",
|
77 |
+
"prompt_name": "tldr_en",
|
78 |
+
"rougeL_fmeasure": 0.006094780573936507,
|
79 |
+
"dataset_path": "GEM/wiki_lingua",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_fmeasure_stderr": 0.0006210929394899102
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/wiki_lingua_en",
|
86 |
+
"prompt_name": "tldr_en",
|
87 |
+
"rougeLsum_precision": 0.009145684266485845,
|
88 |
+
"dataset_path": "GEM/wiki_lingua",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeLsum_precision_stderr": 0.001140205318254729
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/wiki_lingua_en",
|
95 |
+
"prompt_name": "tldr_en",
|
96 |
+
"rougeLsum_recall": 0.01010245236626395,
|
97 |
+
"dataset_path": "GEM/wiki_lingua",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_recall_stderr": 0.0010491484523406256
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/wiki_lingua_en",
|
104 |
+
"prompt_name": "tldr_en",
|
105 |
+
"rougeLsum_fmeasure": 0.007121401627227017,
|
106 |
+
"dataset_path": "GEM/wiki_lingua",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_fmeasure_stderr": 0.0007144210238327558
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/wiki_lingua_en",
|
113 |
+
"prompt_name": "tldr_en",
|
114 |
+
"bleu": 1.1456693667719014e-05,
|
115 |
+
"dataset_path": "GEM/wiki_lingua",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"bleu_stderr": 1.723913664635584e-05
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b18bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 5,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|