Muennighoff commited on Feb 25, 2023

Commit

123ca89

1 Parent(s): bbf6528

Add files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +230 -0
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_0.json +1 -0
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_1.json +1 -0
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_2.json +1 -0
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_3.json +1 -0
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_4.json +1 -0
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_5.json +1 -0
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_0.json +1 -0
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_1.json +1 -0
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_2.json +1 -0
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_3.json +1 -0
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_4.json +1 -0
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_5.json +1 -0
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_3.json +1 -0
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_4.json +1 -0
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_5.json +1 -0
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_gem_xsum_article_DOC_summary_3.json +1 -0
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_gem_xsum_article_DOC_summary_4.json +1 -0
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_gem_xsum_article_DOC_summary_5.json +1 -0
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_0.jsonl +3 -0
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_1.jsonl +3 -0
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_2.jsonl +3 -0
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_3.jsonl +3 -0
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_4.jsonl +3 -0
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_5.jsonl +3 -0
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_0.jsonl +3 -0
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_1.jsonl +3 -0
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_2.jsonl +3 -0
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_3.jsonl +3 -0
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_4.jsonl +3 -0
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_5.jsonl +3 -0
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl +3 -0
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl +3 -0
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl +3 -0
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_3.jsonl +3 -0
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_4.jsonl +3 -0
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_5.jsonl +3 -0
2b855b11boscarseed1/evaluation/generation/merged.csv +53 -0
2b855b11boscarseed1/evaluation/generation/merged.json +1 -0
2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_0.json +133 -0
2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_1.json +133 -0
2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_2.json +133 -0
2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_3.json +133 -0
2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_4.json +133 -0
2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_5.json +133 -0
2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_0.json +133 -0
2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_1.json +133 -0
2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_2.json +133 -0
2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_3.json +133 -0
2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_4.json +133 -0

.gitattributes CHANGED Viewed

@@ -249,3 +249,233 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
 2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text

 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
 2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
+2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text

2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_0.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.3801106976509281, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.030814977906570784}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.09254608062110982, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003969016029393206}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.2514658272132534, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005470193417734038}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.09839951058099769, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0024506647455068047}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.04251830138142587, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0025980589101773847}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.11876788025878558, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0033890059840335}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.04579430308932667, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0014988641201888384}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.08735615223930215, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0037276787142384374}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.24225619592357273, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.005268443646117898}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.09334972395562345, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0022379508481899127}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.08738746346078487, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003771697087126988}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.23626985324693767, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.005054786193539789}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.09257280431668473, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0022654509452080854}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_1.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.5239721644834189, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.03609798297687262}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.13082237107727746, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0041887696056868914}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.29894851591422195, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0051748543299946}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.14826158596606656, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0034048406078764424}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.06041678931722799, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0025853119808927536}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.1438349096175946, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0034922545459490128}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.06901771335292758, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0020860447213003814}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.11585043732455273, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003667689089676262}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.27586821042029314, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004795036002022945}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.13229541662919794, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002864700499056504}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.11910114676043697, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0037751935052806325}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.27899773980715475, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004795091819308235}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.13555256797626594, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002979591459721934}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_2.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.6155617098906994, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04320711987914936}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.15764962909670655, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004811814678098913}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.3156619362796641, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005146634895664645}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.16893359766104024, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0038194664000741757}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.07547310729323965, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002923357742977468}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.16143870017973952, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0037206349376128402}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.08285140079808971, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0024706623511134077}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.1391887515979127, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.004157638480121235}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.292888997189622, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004754219104081643}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.15099445149853782, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0032341025124660036}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.14269660006491244, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.004284849569606708}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.29574913885784265, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004809657463857144}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.1540461900906591, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.003343046732804547}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_3.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.7638442842916376, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.043708542511138676}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.17306675943358993, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.005175271126864724}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.3366968869783645, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0052711987086574656}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.18548340026259344, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.004223744602181456}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.08724475546238586, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.003316626156011152}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.17335505514005853, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003911222192069402}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.09385858633035163, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0028582868714185404}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.15183832422624116, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.004487449653529665}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.30970296664886204, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0048061266737124434}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.16463283674108964, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0035889811954755296}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.15662013259115215, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.004651735217163702}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3138687175842859, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004862972063876745}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.16864351715068068, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0037107680564301263}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_4.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.8719331256074123, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.057879221179053614}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.1851425968057546, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.005242772266486215}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.3521224313867968, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005038061225917597}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.19456487112141713, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.004109079813244224}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.09493067862691192, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0033973584763323903}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.18378368228130243, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003782300251501581}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.10023987500778037, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00282151252673031}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.163782526391218, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.004592058011452551}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.3247189913662455, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004610930009186234}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.17333927997130413, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.003493985482674142}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.16903111233990037, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.004770080380107174}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.32924041133628895, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004661708271273997}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.17795216339276917, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0036338202974834652}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_5.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.9071599045860481, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04132800578881959}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.19439128617510687, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.005421482683712362}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.360346127905188, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005185217658659474}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.20471471819889542, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.004366365024283305}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.10302514882290766, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0036082227594639027}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.19046117331587684, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0038953705603308754}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.10719970952871183, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00298368361864098}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.1710926703224802, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.004698450665775182}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.33314261355257685, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004754137888970346}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.18231989678501748, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.00369484632221736}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.17698148297717947, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.004898716157488609}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3375011370591266, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0047979049681730265}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.18722830056278536, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0038488776142113694}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_0.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.1476470442480304, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0027640730190324147}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.1987093879718628, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0029637609362227523}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.14881520480086885, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021587431282467824}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.0287482827215408, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009244009986468182}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.04207438489658476, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0013026460720942934}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.030793880936937773, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008589680063776107}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.11919226323771576, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0023730668962417723}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.1605716758450966, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002356213726056233}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.11844891250439296, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016052290609412721}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.13689809260299124, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002643420711314581}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.18318434106372952, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002750769375883103}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.1370648388242123, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019890326379503644}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.7689251791579401, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08056354651616494}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_1.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.22376650173536908, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0033099883221279717}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.233044248806137, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002952305065889663}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.19163795576049275, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002095941856037326}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.0566885511786849, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.001922144512262528}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.054244362067460944, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0015009900283382725}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.04458039177873789, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001138994780925126}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.17115197989214356, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002730566360378985}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.1753866049193498, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0022597816497720055}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.14368563344942828, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015580386025042744}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.20943173970561998, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003140748148754858}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.21808332623771157, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002768104713925207}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.17897651689986416, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019496135779428591}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 2.690348502752936, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04816844705930497}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_2.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.29318751960337, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003906796008685731}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.2507046446902222, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0029715006311925958}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.22003513847299133, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021076546284530782}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.08627504991656038, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002487880526812255}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.06676278485251605, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0016295916757062398}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.058544685834468764, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012428137662634153}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.22625403204772632, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003335160888542255}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.187612565720097, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0022917340918039352}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.1646868100175827, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015819419652510194}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.27427864211994335, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0037481794388014565}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.23337797750912687, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002789848859667374}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.20484462592482003, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019789253350355387}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 3.431375370657664, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04760571110301349}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_3.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.2680095180907205, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004391894212593074}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.19475234917332057, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003183500503824711}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.17923839306645714, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002399847035078048}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.07985656912272211, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0026354691331866935}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.05177357998459295, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0014956995793890587}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.047670044472559195, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012123884267024713}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.21163784293597465, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003752063265513303}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.14785575418501767, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002456148738115927}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.13674199106912197, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018380053950085664}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.25180299854737964, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.004210636657378989}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.18186826651932245, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002992911142368979}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.16742344251678107, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0022503819271764707}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 2.483185496130131, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08015886448893912}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_4.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.09408573017762623, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0036374250034598983}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.062322473280787063, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0024278586980927977}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.058650885364296294, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020654868574018297}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.031658574230410935, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002078738928518879}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.017760904175768037, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001047213466542941}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.01681560917128696, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008901092196544399}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.0777613311142758, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0031390664493088517}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.0487201551142007, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0018838472253448453}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.04642380293882198, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016260955707942933}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.0887232057330063, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0034775731184571623}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.05793373755686782, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002257623454700228}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.05466679297892778, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019233345508114297}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.07172095791495998, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.00789659723575077}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_5.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.01565502170194915, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0016289778088393908}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.009727975798126835, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.001037130264924102}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.009287298413081357, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0008981455376742198}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.005637375900266298, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008792716431913606}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.0026767834375694384, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0003637992491962634}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.0027003256417814083, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00035332419738820606}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.013183113805468402, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0014306544932015776}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.007603533275554497, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0008034168084209064}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.0074307876775269515, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0007182295177501359}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.014972493827476105, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0015795900577164992}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.009084241224624135, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0009715729115244676}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.008708338368004326, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0008427383878531978}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 3.989972548640045e-14, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 3.805866896172829e-13}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_3.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 14.083744966873544, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.13510527315117016}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5736572827150358, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0032384910127830125}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.45360607830143157, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00292439830442952}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.4820649513030502, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002281818602228921}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.2890815394423686, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0027576555191893308}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.2258474956058803, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0023037492151092706}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.23993823867723907, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.002119970027404284}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.4312357546460138, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003024393522392191}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3384918202097905, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0025049483648365892}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.360441051331739, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0021288701320219035}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.4832111553759888, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0032277890930449953}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.38118510800546923, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0027860575186553676}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.4054184452840819, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0023556882595847915}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_4.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 14.517833527253206, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.15177793846842857}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5749779290519293, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0032341752537122363}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.45475966418004093, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0028643752694128203}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.48422210730223353, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002254899743809727}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.2940709470644466, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0028229255550865045}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.22940149257285175, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002309314382978281}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.24433665653182082, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0021460412936580016}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.4319655914057995, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003014148626675866}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.33988537212359804, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002502036523908583}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.3623534452047717, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0021481098655366693}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.4850678394646162, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003225531635325354}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.3834152683878887, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002767686832931401}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.4082890917863408, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0023637430000640375}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_5.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 14.324593922782439, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.15241024294192065}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5781515372518692, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003222285163472006}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.4515334921668262, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0028702658791373904}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.48388952068811447, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002269811342308489}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.2959581192468744, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002871716283121458}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.22736711269872564, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0023134890859256136}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.2439106267338791, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0021758038765035117}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.43626029829832247, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.00305806886721717}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3384883136722747, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0024808236614542274}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.36343854325074254, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002159899813151206}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.4884840799940051, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003239765144780713}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.38127900697364553, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0027775578392814186}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.40862207341517703, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0023818673387890456}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_gem_xsum_article_DOC_summary_3.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.1422502333933988, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0024472995402754156}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.30983358436212066, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0046212886851808195}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.18666478200505243, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002739760762211627}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.027222625611619575, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0010616231746636696}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.06332962384389315, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002357369300824623}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.03663776309033808, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0013441419160810165}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.10615716840760141, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0018146571946823608}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.23288499948545, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003534724466115114}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.1393564669391015, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0020056393472558214}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.11259274644929758, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0019647845845397575}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.24767381751028186, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003919141916162093}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.14799512098760817, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0022200856382967975}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.392324866942817, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.10333126789792332}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_gem_xsum_article_DOC_summary_4.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.04678909556843838, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0029899451023121886}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.08036818701688478, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004523562391482061}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.052736675331581245, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0028824641785999464}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.009587305236726377, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.001262427047456841}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.01705350789198973, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0015073512113313507}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.010697176811020805, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009584744397015601}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.03558582877518959, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0024020712090389224}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.06057475710697018, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0034243280850370624}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.039672294104886395, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002180086549370051}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.03769649897971859, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0025149825952445427}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.06477261558492757, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0037095733376953295}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.0423806306620413, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0023629304701070007}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.7258559232581546, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.10507057432049555}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_gem_xsum_article_DOC_summary_5.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.00279871811503332, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0008181720087639885}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.002210852921667167, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0006418342106197504}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.0024263300495769666, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0007044351988650494}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.00025897151313355527, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00011577315282337257}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.00019125158969776062, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 8.696066491461003e-05}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.00021830317883798518, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 9.811329959110373e-05}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.0020472883264226207, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0005641122660611984}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.0016493867049964376, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.00045298383898349467}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.001790621018457292, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0004902962943570475}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.0022311468898594308, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0006382468727242592}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.00179354722651545, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0005153565164091152}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.0019503315839039083, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0005574612507559669}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.1269861498697588e-39, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 5.474659303973499e-34}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_0.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:57a5ef177c04bd68fcea8a3c1510172960f297e46a1cdbbfd6e2dfcef9d5b042
+size 3837292

2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_1.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3fc4c06f3b499deb995437419652e3b035ba3335eee17d3448a372b352bda78c
+size 4717607

2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_2.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3122da653c4d8121ab578c301dfdbef8faada95ef5d68f25223abeae853925e1
+size 5557036

2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_3.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:32c59eb0315bb94f710bef270d51edeeef82dbbc1e4b4f87039b9865c8b5cd7a
+size 6464001

2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_4.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0fc4edd863510066fd085c5f44f64f27460332d51d6b7909ad1407c50ed6abbd
+size 7346922

2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_5.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ff66e93dcd2c5ceeda19a6c748198fc03740794dc2e7332a617477a43ed6e73
+size 8236047

2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_0.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3c0e5544079dd758916c08d8abb59f8e59cb197b649cb375a01b1397cf0011b6
+size 7525283

2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_1.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1548d672bbf50f85117bd5d14c541c8d1d50acc09064d7470ba4f87ba9f1b3cc
+size 13085418

2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_2.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2e1918223eb02df317c99cf02e97d2a259cd10124e183adca6568e03627adacc
+size 18628284

2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_3.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e1c0df3ac36cf02328cc932fd5aa3b193394ce8d4ee4a658a20765d5d2f513f6
+size 24052227

2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_4.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:69d35a9d7dbd698fc11d554aa58f44a10005430c73d6a7e7898b66c7d28743aa
+size 29371822

2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_5.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:80e7a43ce56e97a4accf09f438b275f2db53838d3733afedd585d07d6875e4f2
+size 34783939

2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0af222e1f3a7d6de15bef8b102e4b0137c59cb9b9a9446da1942ffb45462dfc1
+size 7175409

2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba20fae0860784720f6249a963764b73a4ddc769f425480db554c1edf4a5c787
+size 8254125

2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06c063ff506ba0adbe7b0b8b934c5e8232cf3ae663faf352409510b886d6c777
+size 9337214

2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_3.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a9480dc6730f5e9c64debd64cec332ef568120a71cc48ece316c1af8f7f9b5f0
+size 9626426

2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_4.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7347292c96bb4c023ea387a73208bcc1659ef87175649a15d64fe169105e5fb5
+size 11668577

2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_5.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f95019b418a440e683993690c37173ae15cbebb47caedfe6273d0eab3dba7b75
+size 13897460

2b855b11boscarseed1/evaluation/generation/merged.csv ADDED Viewed

	@@ -0,0 +1,53 @@

+dataset,fewshots,prompt,metric,value
+e2e_nlg_cleaned,0,generate_text_restaurant,rouge2_fmeasure,0.02782065901980275
+e2e_nlg_cleaned,0,median,rouge2_fmeasure,0.02782065901980275
+e2e_nlg_cleaned,1,generate_text_restaurant,rouge2_fmeasure,0.21501029566008578
+e2e_nlg_cleaned,1,median,rouge2_fmeasure,0.21501029566008578
+e2e_nlg_cleaned,2,generate_text_restaurant,rouge2_fmeasure,0.2351759267020243
+e2e_nlg_cleaned,2,median,rouge2_fmeasure,0.2351759267020243
+e2e_nlg_cleaned,3,generate_text_restaurant,rouge2_fmeasure,0.23993823867723907
+e2e_nlg_cleaned,3,median,rouge2_fmeasure,0.23993823867723907
+e2e_nlg_cleaned,4,generate_text_restaurant,rouge2_fmeasure,0.24433665653182082
+e2e_nlg_cleaned,4,median,rouge2_fmeasure,0.24433665653182082
+e2e_nlg_cleaned,5,generate_text_restaurant,rouge2_fmeasure,0.2439106267338791
+e2e_nlg_cleaned,5,median,rouge2_fmeasure,0.2439106267338791
+e2e_nlg_cleaned,5,average,multiple,0.20103206722080863
+gem_xsum,0,article_DOC_summary,rouge2_fmeasure,0.03971064304403434
+gem_xsum,0,median,rouge2_fmeasure,0.03971064304403434
+gem_xsum,1,article_DOC_summary,rouge2_fmeasure,0.03654673156140313
+gem_xsum,1,median,rouge2_fmeasure,0.03654673156140313
+gem_xsum,2,article_DOC_summary,rouge2_fmeasure,0.03789731741399596
+gem_xsum,2,median,rouge2_fmeasure,0.03789731741399596
+gem_xsum,3,article_DOC_summary,rouge2_fmeasure,0.03663776309033808
+gem_xsum,3,median,rouge2_fmeasure,0.03663776309033808
+gem_xsum,4,article_DOC_summary,rouge2_fmeasure,0.010697176811020805
+gem_xsum,4,median,rouge2_fmeasure,0.010697176811020805
+gem_xsum,5,article_DOC_summary,rouge2_fmeasure,0.00021830317883798518
+gem_xsum,5,median,rouge2_fmeasure,0.00021830317883798518
+gem_xsum,5,average,multiple,0.02695132251660505
+web_nlg_en,0,PALM_prompt,rouge2_fmeasure,0.04579430308932667
+web_nlg_en,0,median,rouge2_fmeasure,0.04579430308932667
+web_nlg_en,1,PALM_prompt,rouge2_fmeasure,0.06901771335292758
+web_nlg_en,1,median,rouge2_fmeasure,0.06901771335292758
+web_nlg_en,2,PALM_prompt,rouge2_fmeasure,0.08285140079808971
+web_nlg_en,2,median,rouge2_fmeasure,0.08285140079808971
+web_nlg_en,3,PALM_prompt,rouge2_fmeasure,0.09385858633035163
+web_nlg_en,3,median,rouge2_fmeasure,0.09385858633035163
+web_nlg_en,4,PALM_prompt,rouge2_fmeasure,0.10023987500778037
+web_nlg_en,4,median,rouge2_fmeasure,0.10023987500778037
+web_nlg_en,5,PALM_prompt,rouge2_fmeasure,0.10719970952871183
+web_nlg_en,5,median,rouge2_fmeasure,0.10719970952871183
+web_nlg_en,5,average,multiple,0.0831602646845313
+wiki_lingua_en,0,tldr_en,rouge2_fmeasure,0.030793880936937773
+wiki_lingua_en,0,median,rouge2_fmeasure,0.030793880936937773
+wiki_lingua_en,1,tldr_en,rouge2_fmeasure,0.04458039177873789
+wiki_lingua_en,1,median,rouge2_fmeasure,0.04458039177873789
+wiki_lingua_en,2,tldr_en,rouge2_fmeasure,0.058544685834468764
+wiki_lingua_en,2,median,rouge2_fmeasure,0.058544685834468764
+wiki_lingua_en,3,tldr_en,rouge2_fmeasure,0.047670044472559195
+wiki_lingua_en,3,median,rouge2_fmeasure,0.047670044472559195
+wiki_lingua_en,4,tldr_en,rouge2_fmeasure,0.01681560917128696
+wiki_lingua_en,4,median,rouge2_fmeasure,0.01681560917128696
+wiki_lingua_en,5,tldr_en,rouge2_fmeasure,0.0027003256417814083
+wiki_lingua_en,5,median,rouge2_fmeasure,0.0027003256417814083
+wiki_lingua_en,5,average,multiple,0.03351748963929533

2b855b11boscarseed1/evaluation/generation/merged.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"GEM/web_nlg_en": {"0": {"PALM_prompt": {"bleu": 0.3801106976509281, "bleu_stderr": 0.030814977906570784, "rouge1_fmeasure": 0.09839951058099769, "rouge1_fmeasure_stderr": 0.0024506647455068047, "rouge1_precision": 0.09254608062110982, "rouge1_precision_stderr": 0.003969016029393206, "rouge1_recall": 0.2514658272132534, "rouge1_recall_stderr": 0.005470193417734038, "rouge2_fmeasure": 0.04579430308932667, "rouge2_fmeasure_stderr": 0.0014988641201888384, "rouge2_precision": 0.04251830138142587, "rouge2_precision_stderr": 0.0025980589101773847, "rouge2_recall": 0.11876788025878558, "rouge2_recall_stderr": 0.0033890059840335, "rougeL_fmeasure": 0.09334972395562345, "rougeL_fmeasure_stderr": 0.0022379508481899127, "rougeL_precision": 0.08735615223930215, "rougeL_precision_stderr": 0.0037276787142384374, "rougeL_recall": 0.24225619592357273, "rougeL_recall_stderr": 0.005268443646117898, "rougeLsum_fmeasure": 0.09257280431668473, "rougeLsum_fmeasure_stderr": 0.0022654509452080854, "rougeLsum_precision": 0.08738746346078487, "rougeLsum_precision_stderr": 0.003771697087126988, "rougeLsum_recall": 0.23626985324693767, "rougeLsum_recall_stderr": 0.005054786193539789}}, "1": {"PALM_prompt": {"bleu": 0.5239721644834189, "bleu_stderr": 0.03609798297687262, "rouge1_fmeasure": 0.14826158596606656, "rouge1_fmeasure_stderr": 0.0034048406078764424, "rouge1_precision": 0.13082237107727746, "rouge1_precision_stderr": 0.0041887696056868914, "rouge1_recall": 0.29894851591422195, "rouge1_recall_stderr": 0.0051748543299946, "rouge2_fmeasure": 0.06901771335292758, "rouge2_fmeasure_stderr": 0.0020860447213003814, "rouge2_precision": 0.06041678931722799, "rouge2_precision_stderr": 0.0025853119808927536, "rouge2_recall": 0.1438349096175946, "rouge2_recall_stderr": 0.0034922545459490128, "rougeL_fmeasure": 0.13229541662919794, "rougeL_fmeasure_stderr": 0.002864700499056504, "rougeL_precision": 0.11585043732455273, "rougeL_precision_stderr": 0.003667689089676262, "rougeL_recall": 0.27586821042029314, "rougeL_recall_stderr": 0.004795036002022945, "rougeLsum_fmeasure": 0.13555256797626594, "rougeLsum_fmeasure_stderr": 0.002979591459721934, "rougeLsum_precision": 0.11910114676043697, "rougeLsum_precision_stderr": 0.0037751935052806325, "rougeLsum_recall": 0.27899773980715475, "rougeLsum_recall_stderr": 0.004795091819308235}}, "2": {"PALM_prompt": {"bleu": 0.6155617098906994, "bleu_stderr": 0.04320711987914936, "rouge1_fmeasure": 0.16893359766104024, "rouge1_fmeasure_stderr": 0.0038194664000741757, "rouge1_precision": 0.15764962909670655, "rouge1_precision_stderr": 0.004811814678098913, "rouge1_recall": 0.3156619362796641, "rouge1_recall_stderr": 0.005146634895664645, "rouge2_fmeasure": 0.08285140079808971, "rouge2_fmeasure_stderr": 0.0024706623511134077, "rouge2_precision": 0.07547310729323965, "rouge2_precision_stderr": 0.002923357742977468, "rouge2_recall": 0.16143870017973952, "rouge2_recall_stderr": 0.0037206349376128402, "rougeL_fmeasure": 0.15099445149853782, "rougeL_fmeasure_stderr": 0.0032341025124660036, "rougeL_precision": 0.1391887515979127, "rougeL_precision_stderr": 0.004157638480121235, "rougeL_recall": 0.292888997189622, "rougeL_recall_stderr": 0.004754219104081643, "rougeLsum_fmeasure": 0.1540461900906591, "rougeLsum_fmeasure_stderr": 0.003343046732804547, "rougeLsum_precision": 0.14269660006491244, "rougeLsum_precision_stderr": 0.004284849569606708, "rougeLsum_recall": 0.29574913885784265, "rougeLsum_recall_stderr": 0.004809657463857144}}, "3": {"PALM_prompt": {"bleu": 0.7638442842916376, "bleu_stderr": 0.043708542511138676, "rouge1_fmeasure": 0.18548340026259344, "rouge1_fmeasure_stderr": 0.004223744602181456, "rouge1_precision": 0.17306675943358993, "rouge1_precision_stderr": 0.005175271126864724, "rouge1_recall": 0.3366968869783645, "rouge1_recall_stderr": 0.0052711987086574656, "rouge2_fmeasure": 0.09385858633035163, "rouge2_fmeasure_stderr": 0.0028582868714185404, "rouge2_precision": 0.08724475546238586, "rouge2_precision_stderr": 0.003316626156011152, "rouge2_recall": 0.17335505514005853, "rouge2_recall_stderr": 0.003911222192069402, "rougeL_fmeasure": 0.16463283674108964, "rougeL_fmeasure_stderr": 0.0035889811954755296, "rougeL_precision": 0.15183832422624116, "rougeL_precision_stderr": 0.004487449653529665, "rougeL_recall": 0.30970296664886204, "rougeL_recall_stderr": 0.0048061266737124434, "rougeLsum_fmeasure": 0.16864351715068068, "rougeLsum_fmeasure_stderr": 0.0037107680564301263, "rougeLsum_precision": 0.15662013259115215, "rougeLsum_precision_stderr": 0.004651735217163702, "rougeLsum_recall": 0.3138687175842859, "rougeLsum_recall_stderr": 0.004862972063876745}}, "4": {"PALM_prompt": {"bleu": 0.8719331256074123, "bleu_stderr": 0.057879221179053614, "rouge1_fmeasure": 0.19456487112141713, "rouge1_fmeasure_stderr": 0.004109079813244224, "rouge1_precision": 0.1851425968057546, "rouge1_precision_stderr": 0.005242772266486215, "rouge1_recall": 0.3521224313867968, "rouge1_recall_stderr": 0.005038061225917597, "rouge2_fmeasure": 0.10023987500778037, "rouge2_fmeasure_stderr": 0.00282151252673031, "rouge2_precision": 0.09493067862691192, "rouge2_precision_stderr": 0.0033973584763323903, "rouge2_recall": 0.18378368228130243, "rouge2_recall_stderr": 0.003782300251501581, "rougeL_fmeasure": 0.17333927997130413, "rougeL_fmeasure_stderr": 0.003493985482674142, "rougeL_precision": 0.163782526391218, "rougeL_precision_stderr": 0.004592058011452551, "rougeL_recall": 0.3247189913662455, "rougeL_recall_stderr": 0.004610930009186234, "rougeLsum_fmeasure": 0.17795216339276917, "rougeLsum_fmeasure_stderr": 0.0036338202974834652, "rougeLsum_precision": 0.16903111233990037, "rougeLsum_precision_stderr": 0.004770080380107174, "rougeLsum_recall": 0.32924041133628895, "rougeLsum_recall_stderr": 0.004661708271273997}}, "5": {"PALM_prompt": {"bleu": 0.9071599045860481, "bleu_stderr": 0.04132800578881959, "rouge1_fmeasure": 0.20471471819889542, "rouge1_fmeasure_stderr": 0.004366365024283305, "rouge1_precision": 0.19439128617510687, "rouge1_precision_stderr": 0.005421482683712362, "rouge1_recall": 0.360346127905188, "rouge1_recall_stderr": 0.005185217658659474, "rouge2_fmeasure": 0.10719970952871183, "rouge2_fmeasure_stderr": 0.00298368361864098, "rouge2_precision": 0.10302514882290766, "rouge2_precision_stderr": 0.0036082227594639027, "rouge2_recall": 0.19046117331587684, "rouge2_recall_stderr": 0.0038953705603308754, "rougeL_fmeasure": 0.18231989678501748, "rougeL_fmeasure_stderr": 0.00369484632221736, "rougeL_precision": 0.1710926703224802, "rougeL_precision_stderr": 0.004698450665775182, "rougeL_recall": 0.33314261355257685, "rougeL_recall_stderr": 0.004754137888970346, "rougeLsum_fmeasure": 0.18722830056278536, "rougeLsum_fmeasure_stderr": 0.0038488776142113694, "rougeLsum_precision": 0.17698148297717947, "rougeLsum_precision_stderr": 0.004898716157488609, "rougeLsum_recall": 0.3375011370591266, "rougeLsum_recall_stderr": 0.0047979049681730265}}}, "GEM/wiki_lingua_en": {"0": {"tldr_en": {"bleu": 1.7689251791579401, "bleu_stderr": 0.08056354651616494, "rouge1_fmeasure": 0.14881520480086885, "rouge1_fmeasure_stderr": 0.0021587431282467824, "rouge1_precision": 0.1476470442480304, "rouge1_precision_stderr": 0.0027640730190324147, "rouge1_recall": 0.1987093879718628, "rouge1_recall_stderr": 0.0029637609362227523, "rouge2_fmeasure": 0.030793880936937773, "rouge2_fmeasure_stderr": 0.0008589680063776107, "rouge2_precision": 0.0287482827215408, "rouge2_precision_stderr": 0.0009244009986468182, "rouge2_recall": 0.04207438489658476, "rouge2_recall_stderr": 0.0013026460720942934, "rougeL_fmeasure": 0.11844891250439296, "rougeL_fmeasure_stderr": 0.0016052290609412721, "rougeL_precision": 0.11919226323771576, "rougeL_precision_stderr": 0.0023730668962417723, "rougeL_recall": 0.1605716758450966, "rougeL_recall_stderr": 0.002356213726056233, "rougeLsum_fmeasure": 0.1370648388242123, "rougeLsum_fmeasure_stderr": 0.0019890326379503644, "rougeLsum_precision": 0.13689809260299124, "rougeLsum_precision_stderr": 0.002643420711314581, "rougeLsum_recall": 0.18318434106372952, "rougeLsum_recall_stderr": 0.002750769375883103}}, "1": {"tldr_en": {"bleu": 2.690348502752936, "bleu_stderr": 0.04816844705930497, "rouge1_fmeasure": 0.19163795576049275, "rouge1_fmeasure_stderr": 0.002095941856037326, "rouge1_precision": 0.22376650173536908, "rouge1_precision_stderr": 0.0033099883221279717, "rouge1_recall": 0.233044248806137, "rouge1_recall_stderr": 0.002952305065889663, "rouge2_fmeasure": 0.04458039177873789, "rouge2_fmeasure_stderr": 0.001138994780925126, "rouge2_precision": 0.0566885511786849, "rouge2_precision_stderr": 0.001922144512262528, "rouge2_recall": 0.054244362067460944, "rouge2_recall_stderr": 0.0015009900283382725, "rougeL_fmeasure": 0.14368563344942828, "rougeL_fmeasure_stderr": 0.0015580386025042744, "rougeL_precision": 0.17115197989214356, "rougeL_precision_stderr": 0.002730566360378985, "rougeL_recall": 0.1753866049193498, "rougeL_recall_stderr": 0.0022597816497720055, "rougeLsum_fmeasure": 0.17897651689986416, "rougeLsum_fmeasure_stderr": 0.0019496135779428591, "rougeLsum_precision": 0.20943173970561998, "rougeLsum_precision_stderr": 0.003140748148754858, "rougeLsum_recall": 0.21808332623771157, "rougeLsum_recall_stderr": 0.002768104713925207}}, "2": {"tldr_en": {"bleu": 3.431375370657664, "bleu_stderr": 0.04760571110301349, "rouge1_fmeasure": 0.22003513847299133, "rouge1_fmeasure_stderr": 0.0021076546284530782, "rouge1_precision": 0.29318751960337, "rouge1_precision_stderr": 0.003906796008685731, "rouge1_recall": 0.2507046446902222, "rouge1_recall_stderr": 0.0029715006311925958, "rouge2_fmeasure": 0.058544685834468764, "rouge2_fmeasure_stderr": 0.0012428137662634153, "rouge2_precision": 0.08627504991656038, "rouge2_precision_stderr": 0.002487880526812255, "rouge2_recall": 0.06676278485251605, "rouge2_recall_stderr": 0.0016295916757062398, "rougeL_fmeasure": 0.1646868100175827, "rougeL_fmeasure_stderr": 0.0015819419652510194, "rougeL_precision": 0.22625403204772632, "rougeL_precision_stderr": 0.003335160888542255, "rougeL_recall": 0.187612565720097, "rougeL_recall_stderr": 0.0022917340918039352, "rougeLsum_fmeasure": 0.20484462592482003, "rougeLsum_fmeasure_stderr": 0.0019789253350355387, "rougeLsum_precision": 0.27427864211994335, "rougeLsum_precision_stderr": 0.0037481794388014565, "rougeLsum_recall": 0.23337797750912687, "rougeLsum_recall_stderr": 0.002789848859667374}}, "3": {"tldr_en": {"bleu": 2.483185496130131, "bleu_stderr": 0.08015886448893912, "rouge1_fmeasure": 0.17923839306645714, "rouge1_fmeasure_stderr": 0.002399847035078048, "rouge1_precision": 0.2680095180907205, "rouge1_precision_stderr": 0.004391894212593074, "rouge1_recall": 0.19475234917332057, "rouge1_recall_stderr": 0.003183500503824711, "rouge2_fmeasure": 0.047670044472559195, "rouge2_fmeasure_stderr": 0.0012123884267024713, "rouge2_precision": 0.07985656912272211, "rouge2_precision_stderr": 0.0026354691331866935, "rouge2_recall": 0.05177357998459295, "rouge2_recall_stderr": 0.0014956995793890587, "rougeL_fmeasure": 0.13674199106912197, "rougeL_fmeasure_stderr": 0.0018380053950085664, "rougeL_precision": 0.21163784293597465, "rougeL_precision_stderr": 0.003752063265513303, "rougeL_recall": 0.14785575418501767, "rougeL_recall_stderr": 0.002456148738115927, "rougeLsum_fmeasure": 0.16742344251678107, "rougeLsum_fmeasure_stderr": 0.0022503819271764707, "rougeLsum_precision": 0.25180299854737964, "rougeLsum_precision_stderr": 0.004210636657378989, "rougeLsum_recall": 0.18186826651932245, "rougeLsum_recall_stderr": 0.002992911142368979}}, "4": {"tldr_en": {"bleu": 0.07172095791495998, "bleu_stderr": 0.00789659723575077, "rouge1_fmeasure": 0.058650885364296294, "rouge1_fmeasure_stderr": 0.0020654868574018297, "rouge1_precision": 0.09408573017762623, "rouge1_precision_stderr": 0.0036374250034598983, "rouge1_recall": 0.062322473280787063, "rouge1_recall_stderr": 0.0024278586980927977, "rouge2_fmeasure": 0.01681560917128696, "rouge2_fmeasure_stderr": 0.0008901092196544399, "rouge2_precision": 0.031658574230410935, "rouge2_precision_stderr": 0.002078738928518879, "rouge2_recall": 0.017760904175768037, "rouge2_recall_stderr": 0.001047213466542941, "rougeL_fmeasure": 0.04642380293882198, "rougeL_fmeasure_stderr": 0.0016260955707942933, "rougeL_precision": 0.0777613311142758, "rougeL_precision_stderr": 0.0031390664493088517, "rougeL_recall": 0.0487201551142007, "rougeL_recall_stderr": 0.0018838472253448453, "rougeLsum_fmeasure": 0.05466679297892778, "rougeLsum_fmeasure_stderr": 0.0019233345508114297, "rougeLsum_precision": 0.0887232057330063, "rougeLsum_precision_stderr": 0.0034775731184571623, "rougeLsum_recall": 0.05793373755686782, "rougeLsum_recall_stderr": 0.002257623454700228}}, "5": {"tldr_en": {"bleu": 3.989972548640045e-14, "bleu_stderr": 3.805866896172829e-13, "rouge1_fmeasure": 0.009287298413081357, "rouge1_fmeasure_stderr": 0.0008981455376742198, "rouge1_precision": 0.01565502170194915, "rouge1_precision_stderr": 0.0016289778088393908, "rouge1_recall": 0.009727975798126835, "rouge1_recall_stderr": 0.001037130264924102, "rouge2_fmeasure": 0.0027003256417814083, "rouge2_fmeasure_stderr": 0.00035332419738820606, "rouge2_precision": 0.005637375900266298, "rouge2_precision_stderr": 0.0008792716431913606, "rouge2_recall": 0.0026767834375694384, "rouge2_recall_stderr": 0.0003637992491962634, "rougeL_fmeasure": 0.0074307876775269515, "rougeL_fmeasure_stderr": 0.0007182295177501359, "rougeL_precision": 0.013183113805468402, "rougeL_precision_stderr": 0.0014306544932015776, "rougeL_recall": 0.007603533275554497, "rougeL_recall_stderr": 0.0008034168084209064, "rougeLsum_fmeasure": 0.008708338368004326, "rougeLsum_fmeasure_stderr": 0.0008427383878531978, "rougeLsum_precision": 0.014972493827476105, "rougeLsum_precision_stderr": 0.0015795900577164992, "rougeLsum_recall": 0.009084241224624135, "rougeLsum_recall_stderr": 0.0009715729115244676}}}, "e2e_nlg_cleaned": {"0": {"generate_text_restaurant": {"bleu": 0.8938384110720313, "bleu_stderr": 0.04442740312402193, "rouge1_fmeasure": 0.12367657246465094, "rouge1_fmeasure_stderr": 0.0014697284619829536, "rouge1_precision": 0.09374072562693576, "rouge1_precision_stderr": 0.0012230359583290014, "rouge1_recall": 0.1941308443869652, "rouge1_recall_stderr": 0.0022578323155766997, "rouge2_fmeasure": 0.02782065901980275, "rouge2_fmeasure_stderr": 0.0007104099274964277, "rouge2_precision": 0.020960965090776735, "rouge2_precision_stderr": 0.0006244327243966665, "rouge2_recall": 0.04544625505084656, "rouge2_recall_stderr": 0.0011979457762283947, "rougeL_fmeasure": 0.10860196142327254, "rougeL_fmeasure_stderr": 0.0012302518402255934, "rougeL_precision": 0.08203779654362248, "rougeL_precision_stderr": 0.0010243499554408882, "rougeL_recall": 0.17174001350939913, "rougeL_recall_stderr": 0.0019699700948681647, "rougeLsum_fmeasure": 0.10973134394419841, "rougeLsum_fmeasure_stderr": 0.0013237761734567783, "rougeLsum_precision": 0.08310697182038314, "rougeLsum_precision_stderr": 0.0011057328214282471, "rougeLsum_recall": 0.17275727213813283, "rougeLsum_recall_stderr": 0.002060390288561184}}, "1": {"generate_text_restaurant": {"bleu": 11.564210778804036, "bleu_stderr": 0.1198765256137471, "rouge1_fmeasure": 0.4605347726818318, "rouge1_fmeasure_stderr": 0.0023127571845312834, "rouge1_precision": 0.5637173176604086, "rouge1_precision_stderr": 0.003321797810373166, "rouge1_recall": 0.42980056601194444, "rouge1_recall_stderr": 0.002973789269502846, "rouge2_fmeasure": 0.21501029566008578, "rouge2_fmeasure_stderr": 0.002028484285834072, "rouge2_precision": 0.26763533541615536, "rouge2_precision_stderr": 0.0027609708835756447, "rouge2_recall": 0.20040046914166157, "rouge2_recall_stderr": 0.0021517714918030005, "rougeL_fmeasure": 0.3329465663050009, "rougeL_fmeasure_stderr": 0.002054461632613191, "rougeL_precision": 0.4107410670750688, "rougeL_precision_stderr": 0.003020208536626803, "rougeL_recall": 0.30963490201281724, "rougeL_recall_stderr": 0.002399599836596746, "rougeLsum_fmeasure": 0.3743551835626005, "rougeLsum_fmeasure_stderr": 0.0022879473483603647, "rougeLsum_precision": 0.45928342602402417, "rougeLsum_precision_stderr": 0.0032079639333423174, "rougeLsum_recall": 0.3490148298085739, "rougeLsum_recall_stderr": 0.002713242494960982}}, "2": {"generate_text_restaurant": {"bleu": 13.400542456523908, "bleu_stderr": 0.1609394772064794, "rouge1_fmeasure": 0.4798036150204005, "rouge1_fmeasure_stderr": 0.002282274159276462, "rouge1_precision": 0.5764234982441327, "rouge1_precision_stderr": 0.0033021249054890935, "rouge1_recall": 0.4503974768175401, "rouge1_recall_stderr": 0.002919712590777667, "rouge2_fmeasure": 0.2351759267020243, "rouge2_fmeasure_stderr": 0.002088790334860442, "rouge2_precision": 0.28675558273290647, "rouge2_precision_stderr": 0.002781811689703388, "rouge2_recall": 0.22076517179049052, "rouge2_recall_stderr": 0.0022524340062285335, "rougeL_fmeasure": 0.3549448525372181, "rougeL_fmeasure_stderr": 0.0021193431273246735, "rougeL_precision": 0.4286366628943883, "rougeL_precision_stderr": 0.0030557015039043693, "rougeL_recall": 0.3325081715277142, "rougeL_recall_stderr": 0.0024750818471041087, "rougeLsum_fmeasure": 0.39953599561234265, "rougeLsum_fmeasure_stderr": 0.0023322731932715716, "rougeLsum_precision": 0.48074271393713025, "rougeLsum_precision_stderr": 0.0032624614219069863, "rougeLsum_recall": 0.37474313833603423, "rougeLsum_recall_stderr": 0.0027481389029155967}}, "3": {"generate_text_restaurant": {"bleu": 14.083744966873544, "bleu_stderr": 0.13510527315117016, "rouge1_fmeasure": 0.4820649513030502, "rouge1_fmeasure_stderr": 0.002281818602228921, "rouge1_precision": 0.5736572827150358, "rouge1_precision_stderr": 0.0032384910127830125, "rouge1_recall": 0.45360607830143157, "rouge1_recall_stderr": 0.00292439830442952, "rouge2_fmeasure": 0.23993823867723907, "rouge2_fmeasure_stderr": 0.002119970027404284, "rouge2_precision": 0.2890815394423686, "rouge2_precision_stderr": 0.0027576555191893308, "rouge2_recall": 0.2258474956058803, "rouge2_recall_stderr": 0.0023037492151092706, "rougeL_fmeasure": 0.360441051331739, "rougeL_fmeasure_stderr": 0.0021288701320219035, "rougeL_precision": 0.4312357546460138, "rougeL_precision_stderr": 0.003024393522392191, "rougeL_recall": 0.3384918202097905, "rougeL_recall_stderr": 0.0025049483648365892, "rougeLsum_fmeasure": 0.4054184452840819, "rougeLsum_fmeasure_stderr": 0.0023556882595847915, "rougeLsum_precision": 0.4832111553759888, "rougeLsum_precision_stderr": 0.0032277890930449953, "rougeLsum_recall": 0.38118510800546923, "rougeLsum_recall_stderr": 0.0027860575186553676}}, "4": {"generate_text_restaurant": {"bleu": 14.517833527253206, "bleu_stderr": 0.15177793846842857, "rouge1_fmeasure": 0.48422210730223353, "rouge1_fmeasure_stderr": 0.002254899743809727, "rouge1_precision": 0.5749779290519293, "rouge1_precision_stderr": 0.0032341752537122363, "rouge1_recall": 0.45475966418004093, "rouge1_recall_stderr": 0.0028643752694128203, "rouge2_fmeasure": 0.24433665653182082, "rouge2_fmeasure_stderr": 0.0021460412936580016, "rouge2_precision": 0.2940709470644466, "rouge2_precision_stderr": 0.0028229255550865045, "rouge2_recall": 0.22940149257285175, "rouge2_recall_stderr": 0.002309314382978281, "rougeL_fmeasure": 0.3623534452047717, "rougeL_fmeasure_stderr": 0.0021481098655366693, "rougeL_precision": 0.4319655914057995, "rougeL_precision_stderr": 0.003014148626675866, "rougeL_recall": 0.33988537212359804, "rougeL_recall_stderr": 0.002502036523908583, "rougeLsum_fmeasure": 0.4082890917863408, "rougeLsum_fmeasure_stderr": 0.0023637430000640375, "rougeLsum_precision": 0.4850678394646162, "rougeLsum_precision_stderr": 0.003225531635325354, "rougeLsum_recall": 0.3834152683878887, "rougeLsum_recall_stderr": 0.002767686832931401}}, "5": {"generate_text_restaurant": {"bleu": 14.324593922782439, "bleu_stderr": 0.15241024294192065, "rouge1_fmeasure": 0.48388952068811447, "rouge1_fmeasure_stderr": 0.002269811342308489, "rouge1_precision": 0.5781515372518692, "rouge1_precision_stderr": 0.003222285163472006, "rouge1_recall": 0.4515334921668262, "rouge1_recall_stderr": 0.0028702658791373904, "rouge2_fmeasure": 0.2439106267338791, "rouge2_fmeasure_stderr": 0.0021758038765035117, "rouge2_precision": 0.2959581192468744, "rouge2_precision_stderr": 0.002871716283121458, "rouge2_recall": 0.22736711269872564, "rouge2_recall_stderr": 0.0023134890859256136, "rougeL_fmeasure": 0.36343854325074254, "rougeL_fmeasure_stderr": 0.002159899813151206, "rougeL_precision": 0.43626029829832247, "rougeL_precision_stderr": 0.00305806886721717, "rougeL_recall": 0.3384883136722747, "rougeL_recall_stderr": 0.0024808236614542274, "rougeLsum_fmeasure": 0.40862207341517703, "rougeLsum_fmeasure_stderr": 0.0023818673387890456, "rougeLsum_precision": 0.4884840799940051, "rougeLsum_precision_stderr": 0.003239765144780713, "rougeLsum_recall": 0.38127900697364553, "rougeLsum_recall_stderr": 0.0027775578392814186}}}, "gem_xsum": {"0": {"article_DOC_summary": {"bleu": 1.7044284159611478, "bleu_stderr": 0.10158710863644861, "rouge1_fmeasure": 0.1824311356993241, "rouge1_fmeasure_stderr": 0.0027742455429407478, "rouge1_precision": 0.13211376744686648, "rouge1_precision_stderr": 0.0021078227737627523, "rouge1_recall": 0.30958091840657137, "rouge1_recall_stderr": 0.004718256158826077, "rouge2_fmeasure": 0.03971064304403434, "rouge2_fmeasure_stderr": 0.0014694772612363063, "rouge2_precision": 0.028436679980413212, "rouge2_precision_stderr": 0.0010660997414346854, "rouge2_recall": 0.06982063723069834, "rouge2_recall_stderr": 0.0026535400196015854, "rougeL_fmeasure": 0.14006214823111385, "rougeL_fmeasure_stderr": 0.0021160537297509855, "rougeL_precision": 0.10122789771540168, "rougeL_precision_stderr": 0.0015927078993417136, "rougeL_recall": 0.23911281116753666, "rougeL_recall_stderr": 0.0037320083765561033, "rougeLsum_fmeasure": 0.14375817680635894, "rougeLsum_fmeasure_stderr": 0.002322989973976244, "rougeLsum_precision": 0.10386228512699824, "rougeLsum_precision_stderr": 0.0017447889334045853, "rougeLsum_recall": 0.2456731268051541, "rougeLsum_recall_stderr": 0.004084619780933895}}, "1": {"article_DOC_summary": {"bleu": 1.414251403634457, "bleu_stderr": 0.09836035081115309, "rouge1_fmeasure": 0.19239078349475108, "rouge1_fmeasure_stderr": 0.002465960954886116, "rouge1_precision": 0.1405432198297611, "rouge1_precision_stderr": 0.001878679789941618, "rouge1_recall": 0.3276606298436086, "rouge1_recall_stderr": 0.004376293585107094, "rouge2_fmeasure": 0.03654673156140313, "rouge2_fmeasure_stderr": 0.0013456659236836456, "rouge2_precision": 0.026022309438382178, "rouge2_precision_stderr": 0.0009602804697530564, "rouge2_recall": 0.06491047997705596, "rouge2_recall_stderr": 0.002480531229175089, "rougeL_fmeasure": 0.14178613311816618, "rougeL_fmeasure_stderr": 0.0017458776019216176, "rougeL_precision": 0.10365291271701285, "rougeL_precision_stderr": 0.001348668367178514, "rougeL_recall": 0.24268394669042775, "rougeL_recall_stderr": 0.0032299728759827427, "rougeLsum_fmeasure": 0.15166775898349918, "rougeLsum_fmeasure_stderr": 0.001985950973310373, "rougeLsum_precision": 0.11061497335307985, "rougeLsum_precision_stderr": 0.001495607110282887, "rougeLsum_recall": 0.2601523504391386, "rougeLsum_recall_stderr": 0.003678694746255839}}, "2": {"article_DOC_summary": {"bleu": 1.4466481969495988, "bleu_stderr": 0.060446243451377536, "rouge1_fmeasure": 0.1926288064781011, "rouge1_fmeasure_stderr": 0.0024488782590945946, "rouge1_precision": 0.14212593864186113, "rouge1_precision_stderr": 0.0020278826787379143, "rouge1_recall": 0.3256945308193834, "rouge1_recall_stderr": 0.004183499164539368, "rouge2_fmeasure": 0.03789731741399596, "rouge2_fmeasure_stderr": 0.0013200459196047046, "rouge2_precision": 0.027670527801186028, "rouge2_precision_stderr": 0.0010263896507194106, "rouge2_recall": 0.06591916139804017, "rouge2_recall_stderr": 0.0023227491521481056, "rougeL_fmeasure": 0.14362364844292436, "rougeL_fmeasure_stderr": 0.0017716917176277007, "rougeL_precision": 0.10597609939483277, "rougeL_precision_stderr": 0.0014946429223521406, "rougeL_recall": 0.244127383947085, "rougeL_recall_stderr": 0.003151449183932117, "rougeLsum_fmeasure": 0.15326170376419787, "rougeLsum_fmeasure_stderr": 0.002014338226707074, "rougeLsum_precision": 0.11288346015402125, "rougeLsum_precision_stderr": 0.0016505088448165134, "rougeLsum_recall": 0.2608104370267327, "rougeLsum_recall_stderr": 0.003565773707068147}}, "3": {"article_DOC_summary": {"bleu": 1.392324866942817, "bleu_stderr": 0.10333126789792332, "rouge1_fmeasure": 0.18666478200505243, "rouge1_fmeasure_stderr": 0.002739760762211627, "rouge1_precision": 0.1422502333933988, "rouge1_precision_stderr": 0.0024472995402754156, "rouge1_recall": 0.30983358436212066, "rouge1_recall_stderr": 0.0046212886851808195, "rouge2_fmeasure": 0.03663776309033808, "rouge2_fmeasure_stderr": 0.0013441419160810165, "rouge2_precision": 0.027222625611619575, "rouge2_precision_stderr": 0.0010616231746636696, "rouge2_recall": 0.06332962384389315, "rouge2_recall_stderr": 0.002357369300824623, "rougeL_fmeasure": 0.1393564669391015, "rougeL_fmeasure_stderr": 0.0020056393472558214, "rougeL_precision": 0.10615716840760141, "rougeL_precision_stderr": 0.0018146571946823608, "rougeL_recall": 0.23288499948545, "rougeL_recall_stderr": 0.003534724466115114, "rougeLsum_fmeasure": 0.14799512098760817, "rougeLsum_fmeasure_stderr": 0.0022200856382967975, "rougeLsum_precision": 0.11259274644929758, "rougeLsum_precision_stderr": 0.0019647845845397575, "rougeLsum_recall": 0.24767381751028186, "rougeLsum_recall_stderr": 0.003919141916162093}}, "4": {"article_DOC_summary": {"bleu": 0.7258559232581546, "bleu_stderr": 0.10507057432049555, "rouge1_fmeasure": 0.052736675331581245, "rouge1_fmeasure_stderr": 0.0028824641785999464, "rouge1_precision": 0.04678909556843838, "rouge1_precision_stderr": 0.0029899451023121886, "rouge1_recall": 0.08036818701688478, "rouge1_recall_stderr": 0.004523562391482061, "rouge2_fmeasure": 0.010697176811020805, "rouge2_fmeasure_stderr": 0.0009584744397015601, "rouge2_precision": 0.009587305236726377, "rouge2_precision_stderr": 0.001262427047456841, "rouge2_recall": 0.01705350789198973, "rouge2_recall_stderr": 0.0015073512113313507, "rougeL_fmeasure": 0.039672294104886395, "rougeL_fmeasure_stderr": 0.002180086549370051, "rougeL_precision": 0.03558582877518959, "rougeL_precision_stderr": 0.0024020712090389224, "rougeL_recall": 0.06057475710697018, "rougeL_recall_stderr": 0.0034243280850370624, "rougeLsum_fmeasure": 0.0423806306620413, "rougeLsum_fmeasure_stderr": 0.0023629304701070007, "rougeLsum_precision": 0.03769649897971859, "rougeLsum_precision_stderr": 0.0025149825952445427, "rougeLsum_recall": 0.06477261558492757, "rougeLsum_recall_stderr": 0.0037095733376953295}}, "5": {"article_DOC_summary": {"bleu": 1.1269861498697588e-39, "bleu_stderr": 5.474659303973499e-34, "rouge1_fmeasure": 0.0024263300495769666, "rouge1_fmeasure_stderr": 0.0007044351988650494, "rouge1_precision": 0.00279871811503332, "rouge1_precision_stderr": 0.0008181720087639885, "rouge1_recall": 0.002210852921667167, "rouge1_recall_stderr": 0.0006418342106197504, "rouge2_fmeasure": 0.00021830317883798518, "rouge2_fmeasure_stderr": 9.811329959110373e-05, "rouge2_precision": 0.00025897151313355527, "rouge2_precision_stderr": 0.00011577315282337257, "rouge2_recall": 0.00019125158969776062, "rouge2_recall_stderr": 8.696066491461003e-05, "rougeL_fmeasure": 0.001790621018457292, "rougeL_fmeasure_stderr": 0.0004902962943570475, "rougeL_precision": 0.0020472883264226207, "rougeL_precision_stderr": 0.0005641122660611984, "rougeL_recall": 0.0016493867049964376, "rougeL_recall_stderr": 0.00045298383898349467, "rougeLsum_fmeasure": 0.0019503315839039083, "rougeLsum_fmeasure_stderr": 0.0005574612507559669, "rougeLsum_precision": 0.0022311468898594308, "rougeLsum_precision_stderr": 0.0006382468727242592, "rougeLsum_recall": 0.00179354722651545, "rougeLsum_recall_stderr": 0.0005153565164091152}}}}

2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_0.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "results": [
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "bleu": 0.3801106976509281,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.030814977906570784
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_precision": 0.09254608062110982,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.003969016029393206
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_recall": 0.2514658272132534,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.005470193417734038
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_fmeasure": 0.09839951058099769,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0024506647455068047
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_precision": 0.04251830138142587,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0025980589101773847
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_recall": 0.11876788025878558,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.0033890059840335
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_fmeasure": 0.04579430308932667,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0014988641201888384
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_precision": 0.08735615223930215,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.0037276787142384374
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_recall": 0.24225619592357273,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.005268443646117898
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_fmeasure": 0.09334972395562345,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0022379508481899127
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_precision": 0.08738746346078487,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.003771697087126988
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_recall": 0.23626985324693767,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.005054786193539789
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_fmeasure": 0.09257280431668473,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0022654509452080854
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 0,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}

2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_1.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "results": [
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "bleu": 0.5239721644834189,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.03609798297687262
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_precision": 0.13082237107727746,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.0041887696056868914
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_recall": 0.29894851591422195,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.0051748543299946
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_fmeasure": 0.14826158596606656,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0034048406078764424
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_precision": 0.06041678931722799,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0025853119808927536
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_recall": 0.1438349096175946,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.0034922545459490128
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_fmeasure": 0.06901771335292758,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0020860447213003814
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_precision": 0.11585043732455273,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.003667689089676262
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_recall": 0.27586821042029314,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.004795036002022945
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_fmeasure": 0.13229541662919794,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.002864700499056504
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_precision": 0.11910114676043697,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.0037751935052806325
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_recall": 0.27899773980715475,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.004795091819308235
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_fmeasure": 0.13555256797626594,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.002979591459721934
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 1,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}

2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_2.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "results": [
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "bleu": 0.6155617098906994,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.04320711987914936
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_precision": 0.15764962909670655,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.004811814678098913
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_recall": 0.3156619362796641,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.005146634895664645
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_fmeasure": 0.16893359766104024,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0038194664000741757
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_precision": 0.07547310729323965,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.002923357742977468
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_recall": 0.16143870017973952,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.0037206349376128402
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_fmeasure": 0.08285140079808971,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0024706623511134077
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_precision": 0.1391887515979127,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.004157638480121235
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_recall": 0.292888997189622,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.004754219104081643
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_fmeasure": 0.15099445149853782,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0032341025124660036
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_precision": 0.14269660006491244,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.004284849569606708
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_recall": 0.29574913885784265,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.004809657463857144
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_fmeasure": 0.1540461900906591,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.003343046732804547
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 2,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}

2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_3.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "results": [
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "bleu": 0.7638442842916376,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.043708542511138676
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_precision": 0.17306675943358993,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.005175271126864724
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_recall": 0.3366968869783645,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.0052711987086574656
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_fmeasure": 0.18548340026259344,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.004223744602181456
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_precision": 0.08724475546238586,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.003316626156011152
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_recall": 0.17335505514005853,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.003911222192069402
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_fmeasure": 0.09385858633035163,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0028582868714185404
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_precision": 0.15183832422624116,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.004487449653529665
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_recall": 0.30970296664886204,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.0048061266737124434
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_fmeasure": 0.16463283674108964,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0035889811954755296
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_precision": 0.15662013259115215,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.004651735217163702
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_recall": 0.3138687175842859,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.004862972063876745
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_fmeasure": 0.16864351715068068,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0037107680564301263
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 3,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}

2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_4.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "results": [
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "bleu": 0.8719331256074123,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.057879221179053614
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_precision": 0.1851425968057546,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.005242772266486215
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_recall": 0.3521224313867968,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.005038061225917597
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_fmeasure": 0.19456487112141713,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.004109079813244224
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_precision": 0.09493067862691192,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0033973584763323903
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_recall": 0.18378368228130243,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.003782300251501581
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_fmeasure": 0.10023987500778037,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.00282151252673031
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_precision": 0.163782526391218,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.004592058011452551
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_recall": 0.3247189913662455,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.004610930009186234
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_fmeasure": 0.17333927997130413,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.003493985482674142
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_precision": 0.16903111233990037,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.004770080380107174
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_recall": 0.32924041133628895,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.004661708271273997
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_fmeasure": 0.17795216339276917,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0036338202974834652
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 4,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}

2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_5.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "results": [
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "bleu": 0.9071599045860481,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.04132800578881959
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_precision": 0.19439128617510687,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.005421482683712362
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_recall": 0.360346127905188,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.005185217658659474
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_fmeasure": 0.20471471819889542,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.004366365024283305
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_precision": 0.10302514882290766,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0036082227594639027
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_recall": 0.19046117331587684,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.0038953705603308754
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_fmeasure": 0.10719970952871183,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.00298368361864098
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_precision": 0.1710926703224802,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.004698450665775182
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_recall": 0.33314261355257685,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.004754137888970346
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_fmeasure": 0.18231989678501748,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.00369484632221736
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_precision": 0.17698148297717947,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.004898716157488609
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_recall": 0.3375011370591266,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.0047979049681730265
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_fmeasure": 0.18722830056278536,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0038488776142113694
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 5,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}

2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_0.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "results": [
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_precision": 0.1476470442480304,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.0027640730190324147
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_recall": 0.1987093879718628,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.0029637609362227523
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_fmeasure": 0.14881520480086885,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0021587431282467824
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_precision": 0.0287482827215408,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0009244009986468182
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_recall": 0.04207438489658476,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.0013026460720942934
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_fmeasure": 0.030793880936937773,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0008589680063776107
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_precision": 0.11919226323771576,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.0023730668962417723
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_recall": 0.1605716758450966,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.002356213726056233
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_fmeasure": 0.11844891250439296,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0016052290609412721
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_precision": 0.13689809260299124,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.002643420711314581
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_recall": 0.18318434106372952,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.002750769375883103
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_fmeasure": 0.1370648388242123,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0019890326379503644
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "bleu": 1.7689251791579401,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.08056354651616494
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 0,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}

2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_1.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "results": [
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_precision": 0.22376650173536908,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.0033099883221279717
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_recall": 0.233044248806137,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.002952305065889663
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_fmeasure": 0.19163795576049275,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.002095941856037326
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_precision": 0.0566885511786849,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.001922144512262528
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_recall": 0.054244362067460944,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.0015009900283382725
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_fmeasure": 0.04458039177873789,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.001138994780925126
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_precision": 0.17115197989214356,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.002730566360378985
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_recall": 0.1753866049193498,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.0022597816497720055
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_fmeasure": 0.14368563344942828,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0015580386025042744
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_precision": 0.20943173970561998,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.003140748148754858
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_recall": 0.21808332623771157,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.002768104713925207
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_fmeasure": 0.17897651689986416,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0019496135779428591
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "bleu": 2.690348502752936,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.04816844705930497
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 1,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}

2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_2.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "results": [
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_precision": 0.29318751960337,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.003906796008685731
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_recall": 0.2507046446902222,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.0029715006311925958
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_fmeasure": 0.22003513847299133,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0021076546284530782
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_precision": 0.08627504991656038,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.002487880526812255
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_recall": 0.06676278485251605,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.0016295916757062398
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_fmeasure": 0.058544685834468764,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0012428137662634153
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_precision": 0.22625403204772632,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.003335160888542255
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_recall": 0.187612565720097,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.0022917340918039352
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_fmeasure": 0.1646868100175827,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0015819419652510194
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_precision": 0.27427864211994335,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.0037481794388014565
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_recall": 0.23337797750912687,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.002789848859667374
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_fmeasure": 0.20484462592482003,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0019789253350355387
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "bleu": 3.431375370657664,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.04760571110301349
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 2,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}

2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_3.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "results": [
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_precision": 0.2680095180907205,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.004391894212593074
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_recall": 0.19475234917332057,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.003183500503824711
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_fmeasure": 0.17923839306645714,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.002399847035078048
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_precision": 0.07985656912272211,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0026354691331866935
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_recall": 0.05177357998459295,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.0014956995793890587
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_fmeasure": 0.047670044472559195,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0012123884267024713
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_precision": 0.21163784293597465,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.003752063265513303
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_recall": 0.14785575418501767,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.002456148738115927
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_fmeasure": 0.13674199106912197,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0018380053950085664
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_precision": 0.25180299854737964,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.004210636657378989
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_recall": 0.18186826651932245,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.002992911142368979
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_fmeasure": 0.16742344251678107,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0022503819271764707
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "bleu": 2.483185496130131,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.08015886448893912
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 3,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}

2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_4.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "results": [
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_precision": 0.09408573017762623,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.0036374250034598983
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_recall": 0.062322473280787063,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.0024278586980927977
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_fmeasure": 0.058650885364296294,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0020654868574018297
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_precision": 0.031658574230410935,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.002078738928518879
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_recall": 0.017760904175768037,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.001047213466542941
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_fmeasure": 0.01681560917128696,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0008901092196544399
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_precision": 0.0777613311142758,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.0031390664493088517
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_recall": 0.0487201551142007,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.0018838472253448453
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_fmeasure": 0.04642380293882198,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0016260955707942933
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_precision": 0.0887232057330063,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.0034775731184571623
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_recall": 0.05793373755686782,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.002257623454700228
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_fmeasure": 0.05466679297892778,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0019233345508114297
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "bleu": 0.07172095791495998,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.00789659723575077
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 4,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}