Muennighoff commited on
Commit
550ef7f
·
1 Parent(s): 123ca89
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +321 -0
  2. 2b855b18boscarseed3/evaluation/generation/agg.2b855b18boscarseed3_GEM-web_nlg_en_PALM_prompt_4.json +1 -0
  3. 2b855b18boscarseed3/evaluation/generation/agg.2b855b18boscarseed3_GEM-web_nlg_en_PALM_prompt_5.json +1 -0
  4. 2b855b18boscarseed3/evaluation/generation/agg.2b855b18boscarseed3_GEM-wiki_lingua_en_tldr_en_4.json +1 -0
  5. 2b855b18boscarseed3/evaluation/generation/agg.2b855b18boscarseed3_GEM-wiki_lingua_en_tldr_en_5.json +1 -0
  6. 2b855b18boscarseed3/evaluation/generation/agg.2b855b18boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_3.json +1 -0
  7. 2b855b18boscarseed3/evaluation/generation/agg.2b855b18boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_4.json +1 -0
  8. 2b855b18boscarseed3/evaluation/generation/agg.2b855b18boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_5.json +1 -0
  9. 2b855b18boscarseed3/evaluation/generation/agg.2b855b18boscarseed3_gem_xsum_article_DOC_summary_4.json +1 -0
  10. 2b855b18boscarseed3/evaluation/generation/agg.2b855b18boscarseed3_gem_xsum_article_DOC_summary_5.json +1 -0
  11. 2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_GEM-web_nlg_en_PALM_prompt_4.jsonl +3 -0
  12. 2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_GEM-web_nlg_en_PALM_prompt_5.jsonl +3 -0
  13. 2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_GEM-wiki_lingua_en_tldr_en_4.jsonl +3 -0
  14. 2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_GEM-wiki_lingua_en_tldr_en_5.jsonl +3 -0
  15. 2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl +3 -0
  16. 2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl +3 -0
  17. 2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl +3 -0
  18. 2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_gem_xsum_article_DOC_summary_4.jsonl +3 -0
  19. 2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_gem_xsum_article_DOC_summary_5.jsonl +3 -0
  20. 2b855b18boscarseed3/evaluation/generation/merged.csv +22 -4
  21. 2b855b18boscarseed3/evaluation/generation/merged.json +1 -1
  22. 2b855b18boscarseed3/evaluation/generation/slim.2b855b18boscarseed3_GEM-web_nlg_en_PALM_prompt_4.json +133 -0
  23. 2b855b18boscarseed3/evaluation/generation/slim.2b855b18boscarseed3_GEM-web_nlg_en_PALM_prompt_5.json +133 -0
  24. 2b855b18boscarseed3/evaluation/generation/slim.2b855b18boscarseed3_GEM-wiki_lingua_en_tldr_en_4.json +133 -0
  25. 2b855b18boscarseed3/evaluation/generation/slim.2b855b18boscarseed3_GEM-wiki_lingua_en_tldr_en_5.json +133 -0
  26. 2b855b18boscarseed3/evaluation/generation/slim.2b855b18boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_3.json +133 -0
  27. 2b855b18boscarseed3/evaluation/generation/slim.2b855b18boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_4.json +133 -0
  28. 2b855b18boscarseed3/evaluation/generation/slim.2b855b18boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_5.json +133 -0
  29. 2b855b18boscarseed3/evaluation/generation/slim.2b855b18boscarseed3_gem_xsum_article_DOC_summary_4.json +133 -0
  30. 2b855b18boscarseed3/evaluation/generation/slim.2b855b18boscarseed3_gem_xsum_article_DOC_summary_5.json +133 -0
  31. 2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_GEM-web_nlg_en_PALM_prompt_0.json +1 -0
  32. 2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_GEM-web_nlg_en_PALM_prompt_1.json +1 -0
  33. 2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_GEM-web_nlg_en_PALM_prompt_2.json +1 -0
  34. 2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_GEM-web_nlg_en_PALM_prompt_3.json +1 -0
  35. 2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_GEM-web_nlg_en_PALM_prompt_4.json +1 -0
  36. 2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_GEM-web_nlg_en_PALM_prompt_5.json +1 -0
  37. 2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_GEM-wiki_lingua_en_tldr_en_0.json +1 -0
  38. 2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_GEM-wiki_lingua_en_tldr_en_1.json +1 -0
  39. 2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_GEM-wiki_lingua_en_tldr_en_2.json +1 -0
  40. 2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_GEM-wiki_lingua_en_tldr_en_3.json +1 -0
  41. 2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_GEM-wiki_lingua_en_tldr_en_4.json +1 -0
  42. 2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_GEM-wiki_lingua_en_tldr_en_5.json +1 -0
  43. 2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_0.json +1 -0
  44. 2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_1.json +1 -0
  45. 2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_2.json +1 -0
  46. 2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_3.json +1 -0
  47. 2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_4.json +1 -0
  48. 2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_5.json +1 -0
  49. 2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_gem_xsum_article_DOC_summary_0.json +1 -0
  50. 2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_gem_xsum_article_DOC_summary_1.json +1 -0
.gitattributes CHANGED
@@ -479,3 +479,324 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
479
  2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
480
  2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
481
  2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
479
  2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
480
  2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
481
  2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
482
+ 2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
483
+ 2b855b28boscarseed1/evaluation/generation/examples.2b855b28boscarseed1_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
484
+ 2b855b28boscarseed1/evaluation/generation/examples.2b855b28boscarseed1_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
485
+ 2b855b4boscarseed3/evaluation/generation/examples.2b855b4boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
486
+ 2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
487
+ 2b855b18boscarseed4/evaluation/generation/examples.2b855b18boscarseed4_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
488
+ 2b855b4boscarseed3/evaluation/generation/examples.2b855b4boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
489
+ 2b855b9boscarseed3/evaluation/generation/examples.2b855b9boscarseed3_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
490
+ 2b855b4boscarseed1/evaluation/generation/examples.2b855b4boscarseed1_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
491
+ 2b855b4boscarseed2/evaluation/generation/examples.2b855b4boscarseed2_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
492
+ 2b855b9boscarseed1/evaluation/generation/examples.2b855b9boscarseed1_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
493
+ 2b855b9boscarseed4/evaluation/generation/examples.2b855b9boscarseed4_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
494
+ 2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
495
+ 2b855b28boscarseed2/evaluation/generation/examples.2b855b28boscarseed2_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
496
+ 2b855b4boscarseed3/evaluation/generation/examples.2b855b4boscarseed3_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
497
+ 2b855b28boscarseed3/evaluation/generation/examples.2b855b28boscarseed3_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
498
+ 2b855b28boscarseed4/evaluation/generation/examples.2b855b28boscarseed4_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
499
+ 2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
500
+ 2b855b18boscarseed4/evaluation/generation/examples.2b855b18boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
501
+ 2b855b18boscarseed4/evaluation/generation/examples.2b855b18boscarseed4_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
502
+ 2b855b18boscarseed4/evaluation/generation/examples.2b855b18boscarseed4_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
503
+ 2b855b28boscarseed3/evaluation/generation/examples.2b855b28boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
504
+ 2b855b28boscarseed3/evaluation/generation/examples.2b855b28boscarseed3_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
505
+ 2b855b28boscarseed4/evaluation/generation/examples.2b855b28boscarseed4_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
506
+ 2b855b28boscarseed4/evaluation/generation/examples.2b855b28boscarseed4_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
507
+ 2b855b4boscarseed3/evaluation/generation/examples.2b855b4boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
508
+ 2b855b9boscarseed1/evaluation/generation/examples.2b855b9boscarseed1_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
509
+ 2b855b9boscarseed4/evaluation/generation/examples.2b855b9boscarseed4_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
510
+ 2b855b18boscarseed4/evaluation/generation/examples.2b855b18boscarseed4_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
511
+ 2b855b28boscarseed4/evaluation/generation/examples.2b855b28boscarseed4_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
512
+ 2b855b4boscarseed2/evaluation/generation/examples.2b855b4boscarseed2_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
513
+ 2b855b4boscarseed2/evaluation/generation/examples.2b855b4boscarseed2_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
514
+ 2b855b4boscarseed3/evaluation/generation/examples.2b855b4boscarseed3_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
515
+ 2b855b9boscarseed1/evaluation/generation/examples.2b855b9boscarseed1_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
516
+ 2b855b4boscarseed1/evaluation/generation/examples.2b855b4boscarseed1_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
517
+ 2b855b4boscarseed2/evaluation/generation/examples.2b855b4boscarseed2_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
518
+ 2b855b9boscarseed1/evaluation/generation/examples.2b855b9boscarseed1_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
519
+ 2b855b18boscarseed4/evaluation/generation/examples.2b855b18boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
520
+ 2b855b28boscarseed3/evaluation/generation/examples.2b855b28boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
521
+ 2b855b4boscarseed3/evaluation/generation/examples.2b855b4boscarseed3_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
522
+ 2b855b9boscarseed2/evaluation/generation/examples.2b855b9boscarseed2_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
523
+ 2b855b9boscarseed4/evaluation/generation/examples.2b855b9boscarseed4_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
524
+ 2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
525
+ 2b855b4boscarseed4/evaluation/generation/examples.2b855b4boscarseed4_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
526
+ 2b855b9boscarseed1/evaluation/generation/examples.2b855b9boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
527
+ 2b855b9boscarseed2/evaluation/generation/examples.2b855b9boscarseed2_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
528
+ 2b855b9boscarseed3/evaluation/generation/examples.2b855b9boscarseed3_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
529
+ 2b855b4boscarseed2/evaluation/generation/examples.2b855b4boscarseed2_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
530
+ 2b855b4boscarseed4/evaluation/generation/examples.2b855b4boscarseed4_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
531
+ 2b855b9boscarseed1/evaluation/generation/examples.2b855b9boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
532
+ 2b855b9boscarseed1/evaluation/generation/examples.2b855b9boscarseed1_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
533
+ 2b855b9boscarseed2/evaluation/generation/examples.2b855b9boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
534
+ 2b855b9boscarseed2/evaluation/generation/examples.2b855b9boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
535
+ 2b855b28boscarseed2/evaluation/generation/examples.2b855b28boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
536
+ 2b855b28boscarseed4/evaluation/generation/examples.2b855b28boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
537
+ 2b855b4boscarseed1/evaluation/generation/examples.2b855b4boscarseed1_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
538
+ 2b855b4boscarseed2/evaluation/generation/examples.2b855b4boscarseed2_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
539
+ 2b855b9boscarseed1/evaluation/generation/examples.2b855b9boscarseed1_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
540
+ 2b855b28boscarseed1/evaluation/generation/examples.2b855b28boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
541
+ 2b855b28boscarseed1/evaluation/generation/examples.2b855b28boscarseed1_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
542
+ 2b855b28boscarseed2/evaluation/generation/examples.2b855b28boscarseed2_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
543
+ 2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
544
+ 2b855b28boscarseed3/evaluation/generation/examples.2b855b28boscarseed3_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
545
+ 2b855b28boscarseed4/evaluation/generation/examples.2b855b28boscarseed4_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
546
+ 2b855b4boscarseed4/evaluation/generation/examples.2b855b4boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
547
+ 2b855b18boscarseed4/evaluation/generation/examples.2b855b18boscarseed4_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
548
+ 2b855b18boscarseed4/evaluation/generation/examples.2b855b18boscarseed4_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
549
+ 2b855b28boscarseed3/evaluation/generation/examples.2b855b28boscarseed3_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
550
+ 2b855b28boscarseed4/evaluation/generation/examples.2b855b28boscarseed4_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
551
+ 2b855b4boscarseed1/evaluation/generation/examples.2b855b4boscarseed1_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
552
+ 2b855b4boscarseed3/evaluation/generation/examples.2b855b4boscarseed3_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
553
+ 2b855b4boscarseed1/evaluation/generation/examples.2b855b4boscarseed1_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
554
+ 2b855b9boscarseed3/evaluation/generation/examples.2b855b9boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
555
+ 2b855b28boscarseed2/evaluation/generation/examples.2b855b28boscarseed2_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
556
+ 2b855b4boscarseed1/evaluation/generation/examples.2b855b4boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
557
+ 2b855b4boscarseed1/evaluation/generation/examples.2b855b4boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
558
+ 2b855b9boscarseed4/evaluation/generation/examples.2b855b9boscarseed4_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
559
+ 2b855b4boscarseed2/evaluation/generation/examples.2b855b4boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
560
+ 2b855b4boscarseed2/evaluation/generation/examples.2b855b4boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
561
+ 2b855b9boscarseed3/evaluation/generation/examples.2b855b9boscarseed3_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
562
+ 2b855b9boscarseed4/evaluation/generation/examples.2b855b9boscarseed4_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
563
+ 2b855b9boscarseed4/evaluation/generation/examples.2b855b9boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
564
+ 2b855b28boscarseed2/evaluation/generation/examples.2b855b28boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
565
+ 2b855b28boscarseed4/evaluation/generation/examples.2b855b28boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
566
+ 2b855b28boscarseed4/evaluation/generation/examples.2b855b28boscarseed4_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
567
+ 2b855b4boscarseed1/evaluation/generation/examples.2b855b4boscarseed1_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
568
+ 2b855b4boscarseed4/evaluation/generation/examples.2b855b4boscarseed4_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
569
+ 2b855b9boscarseed2/evaluation/generation/examples.2b855b9boscarseed2_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
570
+ 2b855b18boscarseed4/evaluation/generation/examples.2b855b18boscarseed4_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
571
+ 2b855b28boscarseed3/evaluation/generation/examples.2b855b28boscarseed3_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
572
+ 2b855b4boscarseed3/evaluation/generation/examples.2b855b4boscarseed3_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
573
+ 2b855b9boscarseed1/evaluation/generation/examples.2b855b9boscarseed1_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
574
+ 2b855b9boscarseed4/evaluation/generation/examples.2b855b9boscarseed4_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
575
+ 2b855b9boscarseed4/evaluation/generation/examples.2b855b9boscarseed4_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
576
+ 2b855b4boscarseed4/evaluation/generation/examples.2b855b4boscarseed4_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
577
+ 2b855b9boscarseed1/evaluation/generation/examples.2b855b9boscarseed1_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
578
+ 2b855b18boscarseed4/evaluation/generation/examples.2b855b18boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
579
+ 2b855b28boscarseed1/evaluation/generation/examples.2b855b28boscarseed1_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
580
+ 2b855b28boscarseed3/evaluation/generation/examples.2b855b28boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
581
+ 2b855b4boscarseed2/evaluation/generation/examples.2b855b4boscarseed2_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
582
+ 2b855b4boscarseed2/evaluation/generation/examples.2b855b4boscarseed2_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
583
+ 2b855b4boscarseed3/evaluation/generation/examples.2b855b4boscarseed3_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
584
+ 2b855b9boscarseed2/evaluation/generation/examples.2b855b9boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
585
+ 2b855b9boscarseed2/evaluation/generation/examples.2b855b9boscarseed2_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
586
+ 2b855b28boscarseed2/evaluation/generation/examples.2b855b28boscarseed2_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
587
+ 2b855b4boscarseed4/evaluation/generation/examples.2b855b4boscarseed4_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
588
+ 2b855b4boscarseed4/evaluation/generation/examples.2b855b4boscarseed4_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
589
+ 2b855b9boscarseed2/evaluation/generation/examples.2b855b9boscarseed2_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
590
+ 2b855b4boscarseed3/evaluation/generation/examples.2b855b4boscarseed3_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
591
+ 2b855b4boscarseed1/evaluation/generation/examples.2b855b4boscarseed1_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
592
+ 2b855b4boscarseed3/evaluation/generation/examples.2b855b4boscarseed3_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
593
+ 2b855b4boscarseed4/evaluation/generation/examples.2b855b4boscarseed4_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
594
+ 2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
595
+ 2b855b18boscarseed4/evaluation/generation/examples.2b855b18boscarseed4_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
596
+ 2b855b9boscarseed2/evaluation/generation/examples.2b855b9boscarseed2_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
597
+ 2b855b4boscarseed4/evaluation/generation/examples.2b855b4boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
598
+ 2b855b9boscarseed1/evaluation/generation/examples.2b855b9boscarseed1_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
599
+ 2b855b18boscarseed4/evaluation/generation/examples.2b855b18boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
600
+ 2b855b28boscarseed1/evaluation/generation/examples.2b855b28boscarseed1_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
601
+ 2b855b28boscarseed2/evaluation/generation/examples.2b855b28boscarseed2_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
602
+ 2b855b28boscarseed3/evaluation/generation/examples.2b855b28boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
603
+ 2b855b28boscarseed3/evaluation/generation/examples.2b855b28boscarseed3_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
604
+ 2b855b28boscarseed4/evaluation/generation/examples.2b855b28boscarseed4_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
605
+ 2b855b4boscarseed2/evaluation/generation/examples.2b855b4boscarseed2_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
606
+ 2b855b4boscarseed3/evaluation/generation/examples.2b855b4boscarseed3_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
607
+ 2b855b4boscarseed4/evaluation/generation/examples.2b855b4boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
608
+ 2b855b9boscarseed3/evaluation/generation/examples.2b855b9boscarseed3_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
609
+ 2b855b9boscarseed3/evaluation/generation/examples.2b855b9boscarseed3_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
610
+ 2b855b28boscarseed1/evaluation/generation/examples.2b855b28boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
611
+ 2b855b28boscarseed2/evaluation/generation/examples.2b855b28boscarseed2_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
612
+ 2b855b4boscarseed3/evaluation/generation/examples.2b855b4boscarseed3_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
613
+ 2b855b9boscarseed4/evaluation/generation/examples.2b855b9boscarseed4_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
614
+ 2b855b18boscarseed4/evaluation/generation/examples.2b855b18boscarseed4_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
615
+ 2b855b4boscarseed4/evaluation/generation/examples.2b855b4boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
616
+ 2b855b4boscarseed4/evaluation/generation/examples.2b855b4boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
617
+ 2b855b4boscarseed1/evaluation/generation/examples.2b855b4boscarseed1_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
618
+ 2b855b9boscarseed3/evaluation/generation/examples.2b855b9boscarseed3_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
619
+ 2b855b18boscarseed4/evaluation/generation/examples.2b855b18boscarseed4_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
620
+ 2b855b28boscarseed1/evaluation/generation/examples.2b855b28boscarseed1_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
621
+ 2b855b28boscarseed1/evaluation/generation/examples.2b855b28boscarseed1_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
622
+ 2b855b28boscarseed3/evaluation/generation/examples.2b855b28boscarseed3_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
623
+ 2b855b28boscarseed4/evaluation/generation/examples.2b855b28boscarseed4_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
624
+ 2b855b9boscarseed1/evaluation/generation/examples.2b855b9boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
625
+ 2b855b4boscarseed1/evaluation/generation/examples.2b855b4boscarseed1_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
626
+ 2b855b4boscarseed2/evaluation/generation/examples.2b855b4boscarseed2_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
627
+ 2b855b18boscarseed4/evaluation/generation/examples.2b855b18boscarseed4_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
628
+ 2b855b18boscarseed4/evaluation/generation/examples.2b855b18boscarseed4_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
629
+ 2b855b18boscarseed4/evaluation/generation/examples.2b855b18boscarseed4_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
630
+ 2b855b28boscarseed3/evaluation/generation/examples.2b855b28boscarseed3_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
631
+ 2b855b28boscarseed4/evaluation/generation/examples.2b855b28boscarseed4_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
632
+ 2b855b4boscarseed1/evaluation/generation/examples.2b855b4boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
633
+ 2b855b9boscarseed1/evaluation/generation/examples.2b855b9boscarseed1_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
634
+ 2b855b9boscarseed1/evaluation/generation/examples.2b855b9boscarseed1_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
635
+ 2b855b9boscarseed2/evaluation/generation/examples.2b855b9boscarseed2_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
636
+ 2b855b18boscarseed4/evaluation/generation/examples.2b855b18boscarseed4_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
637
+ 2b855b28boscarseed2/evaluation/generation/examples.2b855b28boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
638
+ 2b855b28boscarseed4/evaluation/generation/examples.2b855b28boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
639
+ 2b855b4boscarseed2/evaluation/generation/examples.2b855b4boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
640
+ 2b855b4boscarseed2/evaluation/generation/examples.2b855b4boscarseed2_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
641
+ 2b855b4boscarseed4/evaluation/generation/examples.2b855b4boscarseed4_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
642
+ 2b855b28boscarseed1/evaluation/generation/examples.2b855b28boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
643
+ 2b855b9boscarseed3/evaluation/generation/examples.2b855b9boscarseed3_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
644
+ 2b855b9boscarseed1/evaluation/generation/examples.2b855b9boscarseed1_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
645
+ 2b855b9boscarseed2/evaluation/generation/examples.2b855b9boscarseed2_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
646
+ 2b855b18boscarseed4/evaluation/generation/examples.2b855b18boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
647
+ 2b855b28boscarseed1/evaluation/generation/examples.2b855b28boscarseed1_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
648
+ 2b855b4boscarseed1/evaluation/generation/examples.2b855b4boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
649
+ 2b855b4boscarseed1/evaluation/generation/examples.2b855b4boscarseed1_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
650
+ 2b855b4boscarseed2/evaluation/generation/examples.2b855b4boscarseed2_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
651
+ 2b855b4boscarseed3/evaluation/generation/examples.2b855b4boscarseed3_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
652
+ 2b855b28boscarseed1/evaluation/generation/examples.2b855b28boscarseed1_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
653
+ 2b855b4boscarseed1/evaluation/generation/examples.2b855b4boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
654
+ 2b855b4boscarseed1/evaluation/generation/examples.2b855b4boscarseed1_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
655
+ 2b855b9boscarseed3/evaluation/generation/examples.2b855b9boscarseed3_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
656
+ 2b855b9boscarseed4/evaluation/generation/examples.2b855b9boscarseed4_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
657
+ 2b855b28boscarseed3/evaluation/generation/examples.2b855b28boscarseed3_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
658
+ 2b855b28boscarseed4/evaluation/generation/examples.2b855b28boscarseed4_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
659
+ 2b855b4boscarseed2/evaluation/generation/examples.2b855b4boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
660
+ 2b855b4boscarseed2/evaluation/generation/examples.2b855b4boscarseed2_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
661
+ 2b855b9boscarseed3/evaluation/generation/examples.2b855b9boscarseed3_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
662
+ 2b855b9boscarseed4/evaluation/generation/examples.2b855b9boscarseed4_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
663
+ 2b855b28boscarseed1/evaluation/generation/examples.2b855b28boscarseed1_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
664
+ 2b855b28boscarseed3/evaluation/generation/examples.2b855b28boscarseed3_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
665
+ 2b855b28boscarseed4/evaluation/generation/examples.2b855b28boscarseed4_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
666
+ 2b855b4boscarseed2/evaluation/generation/examples.2b855b4boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
667
+ 2b855b4boscarseed3/evaluation/generation/examples.2b855b4boscarseed3_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
668
+ 2b855b9boscarseed1/evaluation/generation/examples.2b855b9boscarseed1_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
669
+ 2b855b28boscarseed2/evaluation/generation/examples.2b855b28boscarseed2_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
670
+ 2b855b28boscarseed2/evaluation/generation/examples.2b855b28boscarseed2_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
671
+ 2b855b4boscarseed3/evaluation/generation/examples.2b855b4boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
672
+ 2b855b28boscarseed2/evaluation/generation/examples.2b855b28boscarseed2_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
673
+ 2b855b4boscarseed3/evaluation/generation/examples.2b855b4boscarseed3_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
674
+ 2b855b9boscarseed4/evaluation/generation/examples.2b855b9boscarseed4_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
675
+ 2b855b9boscarseed2/evaluation/generation/examples.2b855b9boscarseed2_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
676
+ 2b855b9boscarseed3/evaluation/generation/examples.2b855b9boscarseed3_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
677
+ 2b855b28boscarseed3/evaluation/generation/examples.2b855b28boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
678
+ 2b855b4boscarseed1/evaluation/generation/examples.2b855b4boscarseed1_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
679
+ 2b855b4boscarseed4/evaluation/generation/examples.2b855b4boscarseed4_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
680
+ 2b855b4boscarseed4/evaluation/generation/examples.2b855b4boscarseed4_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
681
+ 2b855b9boscarseed2/evaluation/generation/examples.2b855b9boscarseed2_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
682
+ 2b855b9boscarseed1/evaluation/generation/examples.2b855b9boscarseed1_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
683
+ 2b855b9boscarseed2/evaluation/generation/examples.2b855b9boscarseed2_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
684
+ 2b855b28boscarseed1/evaluation/generation/examples.2b855b28boscarseed1_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
685
+ 2b855b28boscarseed1/evaluation/generation/examples.2b855b28boscarseed1_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
686
+ 2b855b28boscarseed2/evaluation/generation/examples.2b855b28boscarseed2_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
687
+ 2b855b4boscarseed1/evaluation/generation/examples.2b855b4boscarseed1_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
688
+ 2b855b4boscarseed2/evaluation/generation/examples.2b855b4boscarseed2_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
689
+ 2b855b4boscarseed4/evaluation/generation/examples.2b855b4boscarseed4_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
690
+ 2b855b28boscarseed2/evaluation/generation/examples.2b855b28boscarseed2_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
691
+ 2b855b9boscarseed4/evaluation/generation/examples.2b855b9boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
692
+ 2b855b9boscarseed3/evaluation/generation/examples.2b855b9boscarseed3_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
693
+ 2b855b18boscarseed4/evaluation/generation/examples.2b855b18boscarseed4_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
694
+ 2b855b28boscarseed4/evaluation/generation/examples.2b855b28boscarseed4_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
695
+ 2b855b9boscarseed1/evaluation/generation/examples.2b855b9boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
696
+ 2b855b9boscarseed1/evaluation/generation/examples.2b855b9boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
697
+ 2b855b9boscarseed2/evaluation/generation/examples.2b855b9boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
698
+ 2b855b9boscarseed3/evaluation/generation/examples.2b855b9boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
699
+ 2b855b28boscarseed1/evaluation/generation/examples.2b855b28boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
700
+ 2b855b4boscarseed4/evaluation/generation/examples.2b855b4boscarseed4_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
701
+ 2b855b9boscarseed2/evaluation/generation/examples.2b855b9boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
702
+ 2b855b9boscarseed2/evaluation/generation/examples.2b855b9boscarseed2_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
703
+ 2b855b9boscarseed4/evaluation/generation/examples.2b855b9boscarseed4_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
704
+ 2b855b9boscarseed3/evaluation/generation/examples.2b855b9boscarseed3_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
705
+ 2b855b28boscarseed1/evaluation/generation/examples.2b855b28boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
706
+ 2b855b28boscarseed2/evaluation/generation/examples.2b855b28boscarseed2_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
707
+ 2b855b28boscarseed2/evaluation/generation/examples.2b855b28boscarseed2_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
708
+ 2b855b4boscarseed4/evaluation/generation/examples.2b855b4boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
709
+ 2b855b4boscarseed4/evaluation/generation/examples.2b855b4boscarseed4_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
710
+ 2b855b9boscarseed2/evaluation/generation/examples.2b855b9boscarseed2_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
711
+ 2b855b28boscarseed2/evaluation/generation/examples.2b855b28boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
712
+ 2b855b28boscarseed4/evaluation/generation/examples.2b855b28boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
713
+ 2b855b4boscarseed3/evaluation/generation/examples.2b855b4boscarseed3_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
714
+ 2b855b4boscarseed3/evaluation/generation/examples.2b855b4boscarseed3_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
715
+ 2b855b9boscarseed2/evaluation/generation/examples.2b855b9boscarseed2_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
716
+ 2b855b4boscarseed1/evaluation/generation/examples.2b855b4boscarseed1_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
717
+ 2b855b4boscarseed4/evaluation/generation/examples.2b855b4boscarseed4_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
718
+ 2b855b9boscarseed2/evaluation/generation/examples.2b855b9boscarseed2_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
719
+ 2b855b18boscarseed4/evaluation/generation/examples.2b855b18boscarseed4_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
720
+ 2b855b4boscarseed3/evaluation/generation/examples.2b855b4boscarseed3_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
721
+ 2b855b4boscarseed3/evaluation/generation/examples.2b855b4boscarseed3_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
722
+ 2b855b9boscarseed4/evaluation/generation/examples.2b855b9boscarseed4_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
723
+ 2b855b9boscarseed4/evaluation/generation/examples.2b855b9boscarseed4_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
724
+ 2b855b4boscarseed2/evaluation/generation/examples.2b855b4boscarseed2_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
725
+ 2b855b4boscarseed3/evaluation/generation/examples.2b855b4boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
726
+ 2b855b4boscarseed3/evaluation/generation/examples.2b855b4boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
727
+ 2b855b9boscarseed3/evaluation/generation/examples.2b855b9boscarseed3_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
728
+ 2b855b9boscarseed1/evaluation/generation/examples.2b855b9boscarseed1_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
729
+ 2b855b9boscarseed4/evaluation/generation/examples.2b855b9boscarseed4_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
730
+ 2b855b18boscarseed4/evaluation/generation/examples.2b855b18boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
731
+ 2b855b28boscarseed1/evaluation/generation/examples.2b855b28boscarseed1_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
732
+ 2b855b28boscarseed3/evaluation/generation/examples.2b855b28boscarseed3_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
733
+ 2b855b28boscarseed4/evaluation/generation/examples.2b855b28boscarseed4_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
734
+ 2b855b4boscarseed2/evaluation/generation/examples.2b855b4boscarseed2_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
735
+ 2b855b9boscarseed1/evaluation/generation/examples.2b855b9boscarseed1_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
736
+ 2b855b9boscarseed2/evaluation/generation/examples.2b855b9boscarseed2_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
737
+ 2b855b9boscarseed3/evaluation/generation/examples.2b855b9boscarseed3_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
738
+ 2b855b28boscarseed1/evaluation/generation/examples.2b855b28boscarseed1_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
739
+ 2b855b28boscarseed3/evaluation/generation/examples.2b855b28boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
740
+ 2b855b28boscarseed3/evaluation/generation/examples.2b855b28boscarseed3_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
741
+ 2b855b28boscarseed3/evaluation/generation/examples.2b855b28boscarseed3_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
742
+ 2b855b28boscarseed4/evaluation/generation/examples.2b855b28boscarseed4_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
743
+ 2b855b28boscarseed4/evaluation/generation/examples.2b855b28boscarseed4_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
744
+ 2b855b9boscarseed4/evaluation/generation/examples.2b855b9boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
745
+ 2b855b9boscarseed4/evaluation/generation/examples.2b855b9boscarseed4_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
746
+ 2b855b9boscarseed4/evaluation/generation/examples.2b855b9boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
747
+ 2b855b18boscarseed4/evaluation/generation/examples.2b855b18boscarseed4_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
748
+ 2b855b28boscarseed3/evaluation/generation/examples.2b855b28boscarseed3_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
749
+ 2b855b4boscarseed1/evaluation/generation/examples.2b855b4boscarseed1_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
750
+ 2b855b4boscarseed1/evaluation/generation/examples.2b855b4boscarseed1_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
751
+ 2b855b4boscarseed2/evaluation/generation/examples.2b855b4boscarseed2_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
752
+ 2b855b9boscarseed1/evaluation/generation/examples.2b855b9boscarseed1_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
753
+ 2b855b4boscarseed1/evaluation/generation/examples.2b855b4boscarseed1_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
754
+ 2b855b9boscarseed3/evaluation/generation/examples.2b855b9boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
755
+ 2b855b9boscarseed3/evaluation/generation/examples.2b855b9boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
756
+ 2b855b9boscarseed3/evaluation/generation/examples.2b855b9boscarseed3_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
757
+ 2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
758
+ 2b855b28boscarseed2/evaluation/generation/examples.2b855b28boscarseed2_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
759
+ 2b855b28boscarseed3/evaluation/generation/examples.2b855b28boscarseed3_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
760
+ 2b855b28boscarseed4/evaluation/generation/examples.2b855b28boscarseed4_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
761
+ 2b855b4boscarseed4/evaluation/generation/examples.2b855b4boscarseed4_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
762
+ 2b855b9boscarseed2/evaluation/generation/examples.2b855b9boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
763
+ 2b855b9boscarseed4/evaluation/generation/examples.2b855b9boscarseed4_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
764
+ 2b855b4boscarseed2/evaluation/generation/examples.2b855b4boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
765
+ 2b855b4boscarseed3/evaluation/generation/examples.2b855b4boscarseed3_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
766
+ 2b855b9boscarseed1/evaluation/generation/examples.2b855b9boscarseed1_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
767
+ 2b855b9boscarseed3/evaluation/generation/examples.2b855b9boscarseed3_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
768
+ 2b855b9boscarseed4/evaluation/generation/examples.2b855b9boscarseed4_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
769
+ 2b855b9boscarseed4/evaluation/generation/examples.2b855b9boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
770
+ 2b855b9boscarseed4/evaluation/generation/examples.2b855b9boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
771
+ 2b855b28boscarseed1/evaluation/generation/examples.2b855b28boscarseed1_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
772
+ 2b855b28boscarseed2/evaluation/generation/examples.2b855b28boscarseed2_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
773
+ 2b855b28boscarseed3/evaluation/generation/examples.2b855b28boscarseed3_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
774
+ 2b855b28boscarseed4/evaluation/generation/examples.2b855b28boscarseed4_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
775
+ 2b855b4boscarseed1/evaluation/generation/examples.2b855b4boscarseed1_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
776
+ 2b855b4boscarseed2/evaluation/generation/examples.2b855b4boscarseed2_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
777
+ 2b855b9boscarseed3/evaluation/generation/examples.2b855b9boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
778
+ 2b855b9boscarseed3/evaluation/generation/examples.2b855b9boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
779
+ 2b855b28boscarseed1/evaluation/generation/examples.2b855b28boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
780
+ 2b855b28boscarseed2/evaluation/generation/examples.2b855b28boscarseed2_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
781
+ 2b855b28boscarseed2/evaluation/generation/examples.2b855b28boscarseed2_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
782
+ 2b855b4boscarseed4/evaluation/generation/examples.2b855b4boscarseed4_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
783
+ 2b855b9boscarseed1/evaluation/generation/examples.2b855b9boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
784
+ 2b855b9boscarseed2/evaluation/generation/examples.2b855b9boscarseed2_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
785
+ 2b855b28boscarseed2/evaluation/generation/examples.2b855b28boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
786
+ 2b855b28boscarseed4/evaluation/generation/examples.2b855b28boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
787
+ 2b855b4boscarseed1/evaluation/generation/examples.2b855b4boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
788
+ 2b855b9boscarseed3/evaluation/generation/examples.2b855b9boscarseed3_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
789
+ 2b855b9boscarseed2/evaluation/generation/examples.2b855b9boscarseed2_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
790
+ 2b855b9boscarseed3/evaluation/generation/examples.2b855b9boscarseed3_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
791
+ 2b855b28boscarseed1/evaluation/generation/examples.2b855b28boscarseed1_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
792
+ 2b855b28boscarseed2/evaluation/generation/examples.2b855b28boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
793
+ 2b855b28boscarseed3/evaluation/generation/examples.2b855b28boscarseed3_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
794
+ 2b855b28boscarseed4/evaluation/generation/examples.2b855b28boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
795
+ 2b855b4boscarseed4/evaluation/generation/examples.2b855b4boscarseed4_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
796
+ 2b855b4boscarseed4/evaluation/generation/examples.2b855b4boscarseed4_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
797
+ 2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
798
+ 2b855b18boscarseed4/evaluation/generation/examples.2b855b18boscarseed4_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
799
+ 2b855b28boscarseed1/evaluation/generation/examples.2b855b28boscarseed1_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
800
+ 2b855b28boscarseed1/evaluation/generation/examples.2b855b28boscarseed1_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
801
+ 2b855b28boscarseed2/evaluation/generation/examples.2b855b28boscarseed2_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
802
+ 2b855b28boscarseed3/evaluation/generation/examples.2b855b28boscarseed3_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
2b855b18boscarseed3/evaluation/generation/agg.2b855b18boscarseed3_GEM-web_nlg_en_PALM_prompt_4.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.7503302834538067, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0402816045060437}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.17209973620193267, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.005047840493621876}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.3637510419209303, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004667216670957412}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.18968150933143582, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.003940084164169744}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.09074863043296012, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0033847322187014256}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.18934860539994808, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003551268691823376}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.09769455469909966, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0026898671929461455}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.15132731104989547, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.004296079897911914}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.3392207177743361, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004346379157120974}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.17009734401273233, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.003316032712008811}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.15670285935286377, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.004521862460431119}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.34256056402766993, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004340802406659253}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.17420043534898244, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0034478375868970887}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b18boscarseed3/evaluation/generation/agg.2b855b18boscarseed3_GEM-web_nlg_en_PALM_prompt_5.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.8802082281295327, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.03585155302087567}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.2015386177119827, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0056627338976135595}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.3778003683962055, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00478414375889329}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.2100307213117374, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.004280889499111591}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.10934470263927108, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0038246604808114346}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.19982577035446628, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003701169618935952}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.11089680744069119, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.002955811011440687}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.1766978585338184, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.004870142265891642}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.34962603195773095, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004451899185074764}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.18728902811936157, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.003646672780047176}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.1830292512610349, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.00511823428201166}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.353603903022092, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004471111589359183}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.1920899272461596, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0037931786488217814}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b18boscarseed3/evaluation/generation/agg.2b855b18boscarseed3_GEM-wiki_lingua_en_tldr_en_4.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.1055334273233753, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004005109623144804}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.04830991975106493, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.001956273095648371}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.05349208128293813, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0019435876334074464}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.03529801291930799, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002325457268720388}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.012534282753832842, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0008254659390003825}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.014363695304766007, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008381194097495501}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.08979847395448344, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003545822235055598}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.038917636717379904, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0015673979923397155}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.04364703267936979, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015875798918702792}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.09964328288443035, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003821151145375782}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.04484118865254346, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0018130620488983893}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.049841630097568694, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018082077986885854}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.00654223663156633, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0027758848324662983}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b18boscarseed3/evaluation/generation/agg.2b855b18boscarseed3_GEM-wiki_lingua_en_tldr_en_5.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.017659398078161862, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001842951988111103}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.008267332980204723, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0009686556728017771}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.008969589470047824, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0009287248458710643}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.006656029964148255, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0010762435401541755}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.0026394181085068096, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0004263557024540143}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.0028872404480968354, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0004217887748055091}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.015617411068671785, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0016930430963125933}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.006935211584416843, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0007968820212509419}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.007666950322090918, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0007998263132713956}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.01678289905280236, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0017533652046718578}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.007835054650781782, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0009174649411865726}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.008516964231671285, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0008818018744518781}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 3.2932483830177584e-22, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 4.968624247175887e-20}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b18boscarseed3/evaluation/generation/agg.2b855b18boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_3.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 13.768530954975475, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.14594564253938233}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5513149605843634, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003223926598984129}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.4544657860924009, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0029435858030621223}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.4732988552296844, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0023072398938351565}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.2713161825786521, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002652312584043066}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.22274524949051497, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0022849365352015077}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.23099189410387647, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0020736584515027023}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.40182792574019505, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0028897593436573114}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3300569194031173, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002494937679218632}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.34354436912494557, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0020702299844247204}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.45467251256274516, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0031283683413357516}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.3750256251060309, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0028037931259678327}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.3901751322155334, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002337466007692747}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b18boscarseed3/evaluation/generation/agg.2b855b18boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_4.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 13.955354009824239, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.17621205158559158}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5568639783306667, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0032035266773002813}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.4565383457910882, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0028310725817788825}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.477320750868792, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0022064402436474525}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.27463832252094134, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002656556546241727}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.2237734377709334, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0022284036836192115}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.2332985109627194, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0020392691038979755}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.4053609865770688, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002864650388473525}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3320809453199833, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0024559602375536747}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.3467659886451906, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002050748177099438}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.45966718146546615, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0030934720926802956}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.3782310346965992, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002754847069758433}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.39460741009932404, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0022906964526605337}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b18boscarseed3/evaluation/generation/agg.2b855b18boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_5.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 13.809469952975402, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.19234612711909144}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5620558517895992, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003280696774072891}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.45294101366960293, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0027912967652229588}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.4773967399542214, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002203954037576556}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.28049075780574995, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002723905086717432}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.22268647687005963, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002185025554068672}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.23481550856452307, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.002031448354554745}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.41130704812365865, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002906913984621831}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3317608784263896, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0024614789723510556}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.3489632758060724, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002066334034877156}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.4652537741581926, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0031761710705153377}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.3759647022751929, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0027254534040526504}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.3955511870962712, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002297775237896474}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b18boscarseed3/evaluation/generation/agg.2b855b18boscarseed3_gem_xsum_article_DOC_summary_4.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.06423392835301732, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003890454592785219}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.061064960878397495, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0035529952635305366}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.05669884311129999, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.003233190697529867}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.013119210106822345, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0015803091283963062}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.01188116199451332, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0012074230140535814}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.011230809043560832, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001150436285440537}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.048643821762800375, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0030508122483730435}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.045534601833835085, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0026917002780029826}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.04231694926938603, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0024588550815058375}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.04908506848683216, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0030727749370763383}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.04640232937561536, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0027809753817619994}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.042878966271196875, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0025011086060412416}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.1900845439712902, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04734201088429449}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b18boscarseed3/evaluation/generation/agg.2b855b18boscarseed3_gem_xsum_article_DOC_summary_5.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.0034539022767811622, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001194139379020227}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.001875281856314757, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0005400708989179161}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.0022256871499288173, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.000655351445773999}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.0006133203936083778, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00038964743374472006}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.00022420975251163933, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00011871281237923162}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.0003049361539927578, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00016618277661248796}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.002376363953379656, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.000833979092494122}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.0012732794478251294, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0003799759956513556}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.0015288199568875144, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0004717229876146918}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.002582083748444623, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0008671989447029644}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.0015157649497445937, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0004622173727756195}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.0017404000288000973, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0005261342065394403}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 5.949462853290167e-51, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 3.3116242110429246e-45}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_GEM-web_nlg_en_PALM_prompt_4.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e16ef4c9d70277b97052532c28acc75f8bfaec62018be8a5a2fa1f1e2d5d5d8c
3
+ size 7418103
2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_GEM-web_nlg_en_PALM_prompt_5.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3116937bde5922bf9287cd3a86e019e38b23b87d9041dc40a0d12693cf5d9482
3
+ size 8260847
2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_GEM-wiki_lingua_en_tldr_en_4.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48a67e99faae54da59379192c883621489dbf1231e9c282f5b6c7cb6fca2d425
3
+ size 29336182
2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_GEM-wiki_lingua_en_tldr_en_5.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17e8b63e3ea806a66f642731690c4e4ef453b7f26d4d4ad0c8a3e75d2fc4de44
3
+ size 34778140
2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edb7df6768e860f25fd378b972a3a5e52463e43cc099ffac278e3c4e256c8caf
3
+ size 7192478
2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27153b068e6d246f1f7e7392f409e7af435c0e947a6a51cbb3d13f6775e61f0a
3
+ size 8268271
2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85a67031b5aa132abf151a9ebae6abca557e5f0465425385951f57b375ddeae2
3
+ size 9351012
2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_gem_xsum_article_DOC_summary_4.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:423b118ae04fefb730fd84c88ffc3cd8e2ce34403402cfee2bc2489178b90850
3
+ size 11636896
2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_gem_xsum_article_DOC_summary_5.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ada598961d65a04fd80c6ed1f8b6d8b8c1313d9fad1ae24e536568c867440d1a
3
+ size 13897161
2b855b18boscarseed3/evaluation/generation/merged.csv CHANGED
@@ -5,7 +5,13 @@ e2e_nlg_cleaned,1,generate_text_restaurant,rouge2_fmeasure,0.20273574208408596
5
  e2e_nlg_cleaned,1,median,rouge2_fmeasure,0.20273574208408596
6
  e2e_nlg_cleaned,2,generate_text_restaurant,rouge2_fmeasure,0.22409655455897465
7
  e2e_nlg_cleaned,2,median,rouge2_fmeasure,0.22409655455897465
8
- e2e_nlg_cleaned,2,average,multiple,0.16812386809170374
 
 
 
 
 
 
9
  gem_xsum,0,article_DOC_summary,rouge2_fmeasure,0.04482048555975224
10
  gem_xsum,0,median,rouge2_fmeasure,0.04482048555975224
11
  gem_xsum,1,article_DOC_summary,rouge2_fmeasure,0.044418308641362814
@@ -14,7 +20,11 @@ gem_xsum,2,article_DOC_summary,rouge2_fmeasure,0.04563831775128178
14
  gem_xsum,2,median,rouge2_fmeasure,0.04563831775128178
15
  gem_xsum,3,article_DOC_summary,rouge2_fmeasure,0.042886506849894174
16
  gem_xsum,3,median,rouge2_fmeasure,0.042886506849894174
17
- gem_xsum,3,average,multiple,0.04444090470057275
 
 
 
 
18
  web_nlg_en,0,PALM_prompt,rouge2_fmeasure,0.05424150072683682
19
  web_nlg_en,0,median,rouge2_fmeasure,0.05424150072683682
20
  web_nlg_en,1,PALM_prompt,rouge2_fmeasure,0.06923364476381255
@@ -23,7 +33,11 @@ web_nlg_en,2,PALM_prompt,rouge2_fmeasure,0.08088052499730423
23
  web_nlg_en,2,median,rouge2_fmeasure,0.08088052499730423
24
  web_nlg_en,3,PALM_prompt,rouge2_fmeasure,0.09167274798729065
25
  web_nlg_en,3,median,rouge2_fmeasure,0.09167274798729065
26
- web_nlg_en,3,average,multiple,0.07400710461881106
 
 
 
 
27
  wiki_lingua_en,0,tldr_en,rouge2_fmeasure,0.04704761224432566
28
  wiki_lingua_en,0,median,rouge2_fmeasure,0.04704761224432566
29
  wiki_lingua_en,1,tldr_en,rouge2_fmeasure,0.02115631835055968
@@ -32,4 +46,8 @@ wiki_lingua_en,2,tldr_en,rouge2_fmeasure,0.05076363678961688
32
  wiki_lingua_en,2,median,rouge2_fmeasure,0.05076363678961688
33
  wiki_lingua_en,3,tldr_en,rouge2_fmeasure,0.04492302045628534
34
  wiki_lingua_en,3,median,rouge2_fmeasure,0.04492302045628534
35
- wiki_lingua_en,3,average,multiple,0.04097264696019689
 
 
 
 
 
5
  e2e_nlg_cleaned,1,median,rouge2_fmeasure,0.20273574208408596
6
  e2e_nlg_cleaned,2,generate_text_restaurant,rouge2_fmeasure,0.22409655455897465
7
  e2e_nlg_cleaned,2,median,rouge2_fmeasure,0.22409655455897465
8
+ e2e_nlg_cleaned,3,generate_text_restaurant,rouge2_fmeasure,0.23099189410387647
9
+ e2e_nlg_cleaned,3,median,rouge2_fmeasure,0.23099189410387647
10
+ e2e_nlg_cleaned,4,generate_text_restaurant,rouge2_fmeasure,0.2332985109627194
11
+ e2e_nlg_cleaned,4,median,rouge2_fmeasure,0.2332985109627194
12
+ e2e_nlg_cleaned,5,generate_text_restaurant,rouge2_fmeasure,0.23481550856452307
13
+ e2e_nlg_cleaned,5,median,rouge2_fmeasure,0.23481550856452307
14
+ e2e_nlg_cleaned,5,average,multiple,0.20057958631770503
15
  gem_xsum,0,article_DOC_summary,rouge2_fmeasure,0.04482048555975224
16
  gem_xsum,0,median,rouge2_fmeasure,0.04482048555975224
17
  gem_xsum,1,article_DOC_summary,rouge2_fmeasure,0.044418308641362814
 
20
  gem_xsum,2,median,rouge2_fmeasure,0.04563831775128178
21
  gem_xsum,3,article_DOC_summary,rouge2_fmeasure,0.042886506849894174
22
  gem_xsum,3,median,rouge2_fmeasure,0.042886506849894174
23
+ gem_xsum,4,article_DOC_summary,rouge2_fmeasure,0.011230809043560832
24
+ gem_xsum,4,median,rouge2_fmeasure,0.011230809043560832
25
+ gem_xsum,5,article_DOC_summary,rouge2_fmeasure,0.0003049361539927578
26
+ gem_xsum,5,median,rouge2_fmeasure,0.0003049361539927578
27
+ gem_xsum,5,average,multiple,0.0315498939999741
28
  web_nlg_en,0,PALM_prompt,rouge2_fmeasure,0.05424150072683682
29
  web_nlg_en,0,median,rouge2_fmeasure,0.05424150072683682
30
  web_nlg_en,1,PALM_prompt,rouge2_fmeasure,0.06923364476381255
 
33
  web_nlg_en,2,median,rouge2_fmeasure,0.08088052499730423
34
  web_nlg_en,3,PALM_prompt,rouge2_fmeasure,0.09167274798729065
35
  web_nlg_en,3,median,rouge2_fmeasure,0.09167274798729065
36
+ web_nlg_en,4,PALM_prompt,rouge2_fmeasure,0.09769455469909966
37
+ web_nlg_en,4,median,rouge2_fmeasure,0.09769455469909966
38
+ web_nlg_en,5,PALM_prompt,rouge2_fmeasure,0.11089680744069119
39
+ web_nlg_en,5,median,rouge2_fmeasure,0.11089680744069119
40
+ web_nlg_en,5,average,multiple,0.08410329676917251
41
  wiki_lingua_en,0,tldr_en,rouge2_fmeasure,0.04704761224432566
42
  wiki_lingua_en,0,median,rouge2_fmeasure,0.04704761224432566
43
  wiki_lingua_en,1,tldr_en,rouge2_fmeasure,0.02115631835055968
 
46
  wiki_lingua_en,2,median,rouge2_fmeasure,0.05076363678961688
47
  wiki_lingua_en,3,tldr_en,rouge2_fmeasure,0.04492302045628534
48
  wiki_lingua_en,3,median,rouge2_fmeasure,0.04492302045628534
49
+ wiki_lingua_en,4,tldr_en,rouge2_fmeasure,0.014363695304766007
50
+ wiki_lingua_en,4,median,rouge2_fmeasure,0.014363695304766007
51
+ wiki_lingua_en,5,tldr_en,rouge2_fmeasure,0.0028872404480968354
52
+ wiki_lingua_en,5,median,rouge2_fmeasure,0.0028872404480968354
53
+ wiki_lingua_en,5,average,multiple,0.030190253932275066
2b855b18boscarseed3/evaluation/generation/merged.json CHANGED
@@ -1 +1 @@
1
- {"GEM/web_nlg_en": {"0": {"PALM_prompt": {"bleu": 0.4327056216526176, "bleu_stderr": 0.04372693419980847, "rouge1_fmeasure": 0.11561559994702363, "rouge1_fmeasure_stderr": 0.0022349577144307273, "rouge1_precision": 0.08120292881387756, "rouge1_precision_stderr": 0.002306316231404108, "rouge1_recall": 0.3120345446803705, "rouge1_recall_stderr": 0.005372909934713825, "rouge2_fmeasure": 0.05424150072683682, "rouge2_fmeasure_stderr": 0.0013372863957073417, "rouge2_precision": 0.03720829109819314, "rouge2_precision_stderr": 0.0014023448550832475, "rouge2_recall": 0.15261875747822895, "rouge2_recall_stderr": 0.00339031268383416, "rougeL_fmeasure": 0.11092185835290592, "rougeL_fmeasure_stderr": 0.002081793848469044, "rougeL_precision": 0.07773451061272615, "rougeL_precision_stderr": 0.002178465210306159, "rougeL_recall": 0.3025718897667384, "rougeL_recall_stderr": 0.005219032532466267, "rougeLsum_fmeasure": 0.10847909677805251, "rougeLsum_fmeasure_stderr": 0.0020741019070329024, "rougeLsum_precision": 0.07640311322341631, "rougeLsum_precision_stderr": 0.0021934299284831534, "rougeLsum_recall": 0.2932969878608124, "rougeLsum_recall_stderr": 0.004966268605421209}}, "1": {"PALM_prompt": {"bleu": 0.5170049640136369, "bleu_stderr": 0.03648679066962773, "rouge1_fmeasure": 0.14453580078700035, "rouge1_fmeasure_stderr": 0.0033541420725561855, "rouge1_precision": 0.13136221660069292, "rouge1_precision_stderr": 0.004388592114571626, "rouge1_recall": 0.2896405801661563, "rouge1_recall_stderr": 0.004953943443354722, "rouge2_fmeasure": 0.06923364476381255, "rouge2_fmeasure_stderr": 0.0021424956917440686, "rouge2_precision": 0.061473985181738404, "rouge2_precision_stderr": 0.0027700384901446383, "rouge2_recall": 0.14331487051134867, "rouge2_recall_stderr": 0.0033623310736128884, "rougeL_fmeasure": 0.13164203226260543, "rougeL_fmeasure_stderr": 0.002888601323729016, "rougeL_precision": 0.11885571450646416, "rougeL_precision_stderr": 0.003923661833356943, "rougeL_recall": 0.27126007497231336, "rougeL_recall_stderr": 0.0045873803474269104, "rougeLsum_fmeasure": 0.13353893256396349, "rougeLsum_fmeasure_stderr": 0.002967623053584408, "rougeLsum_precision": 0.12129133148954015, "rougeLsum_precision_stderr": 0.004045561118181283, "rougeLsum_recall": 0.2728234608851354, "rougeLsum_recall_stderr": 0.004583585366471488}}, "2": {"PALM_prompt": {"bleu": 0.5928013588165197, "bleu_stderr": 0.033775591670924525, "rouge1_fmeasure": 0.16409184419431355, "rouge1_fmeasure_stderr": 0.0035513646129875266, "rouge1_precision": 0.14559162616819946, "rouge1_precision_stderr": 0.004520623053825127, "rouge1_recall": 0.3289662638216203, "rouge1_recall_stderr": 0.004774112241093821, "rouge2_fmeasure": 0.08088052499730423, "rouge2_fmeasure_stderr": 0.0023389596994276268, "rouge2_precision": 0.07076232768638219, "rouge2_precision_stderr": 0.0027948927893596243, "rouge2_recall": 0.1674067035637957, "rouge2_recall_stderr": 0.0034923352179481795, "rougeL_fmeasure": 0.14878433573038657, "rougeL_fmeasure_stderr": 0.0030237132146357392, "rougeL_precision": 0.12928227924507313, "rougeL_precision_stderr": 0.0038455860156473096, "rougeL_recall": 0.30914412278215103, "rougeL_recall_stderr": 0.004469912060350128, "rougeLsum_fmeasure": 0.1514985119069214, "rougeLsum_fmeasure_stderr": 0.0031385516550478396, "rougeLsum_precision": 0.1330215775577541, "rougeLsum_precision_stderr": 0.004052559898165496, "rougeLsum_recall": 0.31112999938214775, "rougeLsum_recall_stderr": 0.00446078662626257}}, "3": {"PALM_prompt": {"bleu": 0.6807686634087953, "bleu_stderr": 0.027313109829673613, "rouge1_fmeasure": 0.17936348343657485, "rouge1_fmeasure_stderr": 0.003950557413709246, "rouge1_precision": 0.16284509580783738, "rouge1_precision_stderr": 0.004940954307153824, "rouge1_recall": 0.34165193448645764, "rouge1_recall_stderr": 0.004823144187146308, "rouge2_fmeasure": 0.09167274798729065, "rouge2_fmeasure_stderr": 0.0027093176290479337, "rouge2_precision": 0.0848751566517133, "rouge2_precision_stderr": 0.0033582769970308215, "rouge2_recall": 0.1755604725689479, "rouge2_recall_stderr": 0.0036199598913957873, "rougeL_fmeasure": 0.1614067806393073, "rougeL_fmeasure_stderr": 0.0033574645190345828, "rougeL_precision": 0.14478799010613622, "rougeL_precision_stderr": 0.0043027476999933385, "rougeL_recall": 0.31861959582186916, "rougeL_recall_stderr": 0.004460751088877849, "rougeLsum_fmeasure": 0.1647031537003239, "rougeLsum_fmeasure_stderr": 0.003470868557645694, "rougeLsum_precision": 0.14863622609613492, "rougeLsum_precision_stderr": 0.004457573202802981, "rougeLsum_recall": 0.32177137457614585, "rougeLsum_recall_stderr": 0.004496971251428065}}}, "GEM/wiki_lingua_en": {"0": {"tldr_en": {"bleu": 2.901333720728356, "bleu_stderr": 0.10226215319645214, "rouge1_fmeasure": 0.19490685891150464, "rouge1_fmeasure_stderr": 0.002228792147804532, "rouge1_precision": 0.17704255603612853, "rouge1_precision_stderr": 0.002383452581089663, "rouge1_recall": 0.2623176452518483, "rouge1_recall_stderr": 0.0030809405530689694, "rouge2_fmeasure": 0.04704761224432566, "rouge2_fmeasure_stderr": 0.0010447119831656043, "rouge2_precision": 0.04225836992348151, "rouge2_precision_stderr": 0.0009823631202670515, "rouge2_recall": 0.0651070954044141, "rouge2_recall_stderr": 0.0015933009474170402, "rougeL_fmeasure": 0.14883828387862688, "rougeL_fmeasure_stderr": 0.0016138603489545347, "rougeL_precision": 0.13397353229615103, "rougeL_precision_stderr": 0.0017128207643543572, "rougeL_recall": 0.2047034234132007, "rougeL_recall_stderr": 0.0024598017469020465, "rougeLsum_fmeasure": 0.18138749699425952, "rougeLsum_fmeasure_stderr": 0.0020853809149012385, "rougeLsum_precision": 0.16480116835931327, "rougeLsum_precision_stderr": 0.002236603075129391, "rougeLsum_recall": 0.2444741077779754, "rougeLsum_recall_stderr": 0.0028949045881182804}}, "1": {"tldr_en": {"bleu": 1.3189071143549054, "bleu_stderr": 0.056260491668375046, "rouge1_fmeasure": 0.13555859612142038, "rouge1_fmeasure_stderr": 0.0018520687451681944, "rouge1_precision": 0.17711498903253858, "rouge1_precision_stderr": 0.003187054524812852, "rouge1_recall": 0.1495992184775029, "rouge1_recall_stderr": 0.002418260025690305, "rouge2_fmeasure": 0.02115631835055968, "rouge2_fmeasure_stderr": 0.0009205511637097431, "rouge2_precision": 0.0341101685597815, "rouge2_precision_stderr": 0.0018789309734762675, "rouge2_recall": 0.023354524799664208, "rouge2_recall_stderr": 0.0011331997231529378, "rougeL_fmeasure": 0.10594836166403426, "rougeL_fmeasure_stderr": 0.0014231609544825045, "rougeL_precision": 0.14166539453787275, "rougeL_precision_stderr": 0.0027278885597432283, "rougeL_recall": 0.11669878560815988, "rougeL_recall_stderr": 0.0018535132456267542, "rougeLsum_fmeasure": 0.12879863711565903, "rougeLsum_fmeasure_stderr": 0.001726843722306648, "rougeLsum_precision": 0.16834898466576845, "rougeLsum_precision_stderr": 0.0030251794839351825, "rougeLsum_recall": 0.1423605907639815, "rougeLsum_recall_stderr": 0.002274408434168799}}, "2": {"tldr_en": {"bleu": 2.597656232065304, "bleu_stderr": 0.08476221272138684, "rouge1_fmeasure": 0.19262617050178363, "rouge1_fmeasure_stderr": 0.002146390888601266, "rouge1_precision": 0.33080414705726846, "rouge1_precision_stderr": 0.004527044176321152, "rouge1_recall": 0.18732091518823407, "rouge1_recall_stderr": 0.002670996345244806, "rouge2_fmeasure": 0.05076363678961688, "rouge2_fmeasure_stderr": 0.001285862396880349, "rouge2_precision": 0.10217185278394622, "rouge2_precision_stderr": 0.003185145158037599, "rouge2_recall": 0.047867194110325716, "rouge2_recall_stderr": 0.0013584412752363497, "rougeL_fmeasure": 0.1514762968428363, "rougeL_fmeasure_stderr": 0.0017236567886180802, "rougeL_precision": 0.2700683731360322, "rougeL_precision_stderr": 0.004038807694219432, "rougeL_recall": 0.1454002358157948, "rougeL_recall_stderr": 0.002085035435615555, "rougeLsum_fmeasure": 0.18118727913460206, "rougeLsum_fmeasure_stderr": 0.0020192927966468316, "rougeLsum_precision": 0.31288849874154145, "rougeLsum_precision_stderr": 0.0043682724182277964, "rougeLsum_recall": 0.1760508499049095, "rougeLsum_recall_stderr": 0.0025131233168500626}}, "3": {"tldr_en": {"bleu": 1.5327724758686312, "bleu_stderr": 0.07653894357963317, "rouge1_fmeasure": 0.16552713462017304, "rouge1_fmeasure_stderr": 0.0023801833824345906, "rouge1_precision": 0.3130609147315037, "rouge1_precision_stderr": 0.005000892320236708, "rouge1_recall": 0.1527000073472306, "rouge1_recall_stderr": 0.002761140329661955, "rouge2_fmeasure": 0.04492302045628534, "rouge2_fmeasure_stderr": 0.001274382067297563, "rouge2_precision": 0.09833930805862205, "rouge2_precision_stderr": 0.003247626466243916, "rouge2_recall": 0.040868184528014447, "rouge2_recall_stderr": 0.0013445135083341307, "rougeL_fmeasure": 0.13195736438525596, "rougeL_fmeasure_stderr": 0.001900983992382293, "rougeL_precision": 0.2587094915639364, "rougeL_precision_stderr": 0.004404450322230028, "rougeL_recall": 0.12026892289436068, "rougeL_recall_stderr": 0.0021669801289898433, "rougeLsum_fmeasure": 0.1553327481622336, "rougeLsum_fmeasure_stderr": 0.0022358037272639875, "rougeLsum_precision": 0.29620144156557976, "rougeLsum_precision_stderr": 0.004809308828398635, "rougeLsum_recall": 0.14302097468993186, "rougeLsum_recall_stderr": 0.002591728517974711}}}, "e2e_nlg_cleaned": {"0": {"generate_text_restaurant": {"bleu": 4.323068230856815, "bleu_stderr": 0.11827517538305429, "rouge1_fmeasure": 0.21393795207816774, "rouge1_fmeasure_stderr": 0.0022172290840673714, "rouge1_precision": 0.20184258114816994, "rouge1_precision_stderr": 0.0024967227724980905, "rouge1_recall": 0.2741487724431522, "rouge1_recall_stderr": 0.002981425456738163, "rouge2_fmeasure": 0.07753930763205061, "rouge2_fmeasure_stderr": 0.001232753531339659, "rouge2_precision": 0.06981744534756419, "rouge2_precision_stderr": 0.0012528227998197135, "rouge2_recall": 0.10177066403058643, "rouge2_recall_stderr": 0.0016700677677734437, "rougeL_fmeasure": 0.18270253265655753, "rougeL_fmeasure_stderr": 0.0018385441584275236, "rougeL_precision": 0.17210749550231633, "rougeL_precision_stderr": 0.0020923336446562898, "rougeL_recall": 0.23511595699194335, "rougeL_recall_stderr": 0.0025537545321839632, "rougeLsum_fmeasure": 0.1878544007720258, "rougeLsum_fmeasure_stderr": 0.002020568816598664, "rougeLsum_precision": 0.17701660182876539, "rougeLsum_precision_stderr": 0.002212280556195139, "rougeLsum_recall": 0.24069254181513441, "rougeLsum_recall_stderr": 0.002731459462683422}}, "1": {"generate_text_restaurant": {"bleu": 11.12198726945363, "bleu_stderr": 0.09790982230134608, "rouge1_fmeasure": 0.4497122250058307, "rouge1_fmeasure_stderr": 0.002358274244539043, "rouge1_precision": 0.539424536665453, "rouge1_precision_stderr": 0.003200550693703833, "rouge1_recall": 0.42606169809315014, "rouge1_recall_stderr": 0.0030177511882706815, "rouge2_fmeasure": 0.20273574208408596, "rouge2_fmeasure_stderr": 0.0019952609736603347, "rouge2_precision": 0.24625082577450813, "rouge2_precision_stderr": 0.0025762628646406487, "rouge2_recall": 0.19229199204554204, "rouge2_recall_stderr": 0.002156249467345367, "rougeL_fmeasure": 0.32245123379575574, "rougeL_fmeasure_stderr": 0.00201216095760112, "rougeL_precision": 0.3898625952386598, "rougeL_precision_stderr": 0.00284396973940831, "rougeL_recall": 0.3047168374284729, "rougeL_recall_stderr": 0.0024099953927670574, "rougeLsum_fmeasure": 0.3634264521254899, "rougeLsum_fmeasure_stderr": 0.002293352251398984, "rougeLsum_precision": 0.43735223920241745, "rougeLsum_precision_stderr": 0.0030890151835425124, "rougeLsum_recall": 0.34382758409741837, "rougeLsum_recall_stderr": 0.002730730270528412}}, "2": {"generate_text_restaurant": {"bleu": 13.078853702923132, "bleu_stderr": 0.2155366018848879, "rouge1_fmeasure": 0.4688028872513949, "rouge1_fmeasure_stderr": 0.002257227013292593, "rouge1_precision": 0.5477662052819872, "rouge1_precision_stderr": 0.003235092315505865, "rouge1_recall": 0.45095887544798596, "rouge1_recall_stderr": 0.002938286116143171, "rouge2_fmeasure": 0.22409655455897465, "rouge2_fmeasure_stderr": 0.002006039376986731, "rouge2_precision": 0.265196570018947, "rouge2_precision_stderr": 0.002651500690772085, "rouge2_recall": 0.2158536847398131, "rouge2_recall_stderr": 0.0022076507629365054, "rougeL_fmeasure": 0.3410370656782091, "rougeL_fmeasure_stderr": 0.002033956805662392, "rougeL_precision": 0.400095294175111, "rougeL_precision_stderr": 0.002883412837425683, "rougeL_recall": 0.32801072044830487, "rougeL_recall_stderr": 0.0024748977975824578, "rougeLsum_fmeasure": 0.38535371000678353, "rougeLsum_fmeasure_stderr": 0.0022722669571099276, "rougeLsum_precision": 0.4508394877881969, "rougeLsum_precision_stderr": 0.00311858732792832, "rougeLsum_recall": 0.370539435057591, "rougeLsum_recall_stderr": 0.0027526630435952522}}}, "gem_xsum": {"0": {"article_DOC_summary": {"bleu": 1.7275911083855118, "bleu_stderr": 0.09315346519689312, "rouge1_fmeasure": 0.19865214203885193, "rouge1_fmeasure_stderr": 0.0025545783551457458, "rouge1_precision": 0.14341594283459833, "rouge1_precision_stderr": 0.0019150227608556209, "rouge1_recall": 0.33746025963895177, "rouge1_recall_stderr": 0.004396420817141737, "rouge2_fmeasure": 0.04482048555975224, "rouge2_fmeasure_stderr": 0.0014416784806632838, "rouge2_precision": 0.03201351724162917, "rouge2_precision_stderr": 0.0010356434821340256, "rouge2_recall": 0.07835505162865627, "rouge2_recall_stderr": 0.0025959352489549627, "rougeL_fmeasure": 0.14902776732684508, "rougeL_fmeasure_stderr": 0.001891372900724768, "rougeL_precision": 0.10751247708787921, "rougeL_precision_stderr": 0.001414700879319645, "rougeL_recall": 0.2541352831094459, "rougeL_recall_stderr": 0.003353803946860742, "rougeLsum_fmeasure": 0.15674139294988435, "rougeLsum_fmeasure_stderr": 0.00212246709615498, "rougeLsum_precision": 0.11294396508891945, "rougeLsum_precision_stderr": 0.001572036287070283, "rougeLsum_recall": 0.26761583095187486, "rougeLsum_recall_stderr": 0.0037473927221216433}}, "1": {"article_DOC_summary": {"bleu": 2.0186469544251735, "bleu_stderr": 0.1617291885425493, "rouge1_fmeasure": 0.21444912809637723, "rouge1_fmeasure_stderr": 0.0031796389298071976, "rouge1_precision": 0.20709667913473623, "rouge1_precision_stderr": 0.003804885301753494, "rouge1_recall": 0.26348276211668675, "rouge1_recall_stderr": 0.004043289954314806, "rouge2_fmeasure": 0.044418308641362814, "rouge2_fmeasure_stderr": 0.0019691773545816222, "rouge2_precision": 0.04383663455526474, "rouge2_precision_stderr": 0.00214583996427979, "rouge2_recall": 0.05388920501753989, "rouge2_recall_stderr": 0.0023226693119193275, "rougeL_fmeasure": 0.1623869671006431, "rougeL_fmeasure_stderr": 0.0025460769742929506, "rougeL_precision": 0.15712258874107546, "rougeL_precision_stderr": 0.003082174207025799, "rougeL_recall": 0.19988523818040707, "rougeL_recall_stderr": 0.0031736460739436286, "rougeLsum_fmeasure": 0.16649088600178905, "rougeLsum_fmeasure_stderr": 0.0026032467500540024, "rougeLsum_precision": 0.1603205164350595, "rougeLsum_precision_stderr": 0.0030835504137508457, "rougeLsum_recall": 0.2062303531639936, "rougeLsum_recall_stderr": 0.0034043264584466795}}, "2": {"article_DOC_summary": {"bleu": 2.2145229821006605, "bleu_stderr": 0.1260377464556724, "rouge1_fmeasure": 0.21986453565549552, "rouge1_fmeasure_stderr": 0.0031797103554287768, "rouge1_precision": 0.21815659979255142, "rouge1_precision_stderr": 0.003915968292612965, "rouge1_recall": 0.25916715473443264, "rouge1_recall_stderr": 0.003786573638513296, "rouge2_fmeasure": 0.04563831775128178, "rouge2_fmeasure_stderr": 0.0019892126630855363, "rouge2_precision": 0.046529716608035045, "rouge2_precision_stderr": 0.002251988137878124, "rouge2_recall": 0.052720735928175166, "rouge2_recall_stderr": 0.0022189416521812295, "rougeL_fmeasure": 0.16582930193810858, "rougeL_fmeasure_stderr": 0.002542926945635016, "rougeL_precision": 0.16430612216912469, "rougeL_precision_stderr": 0.0031021595613689294, "rougeL_recall": 0.19661860705667725, "rougeL_recall_stderr": 0.0030656994440208998, "rougeLsum_fmeasure": 0.16793897815284758, "rougeLsum_fmeasure_stderr": 0.002581747319926458, "rougeLsum_precision": 0.16611558298427548, "rougeLsum_precision_stderr": 0.00311738411294024, "rougeLsum_recall": 0.19958288390451825, "rougeLsum_recall_stderr": 0.003185887552936416}}, "3": {"article_DOC_summary": {"bleu": 2.0462739047977316, "bleu_stderr": 0.1211474412912817, "rouge1_fmeasure": 0.20657839021439553, "rouge1_fmeasure_stderr": 0.003333626734019319, "rouge1_precision": 0.20625419841912476, "rouge1_precision_stderr": 0.004018382000661381, "rouge1_recall": 0.24250606659620239, "rouge1_recall_stderr": 0.004028957037863405, "rouge2_fmeasure": 0.042886506849894174, "rouge2_fmeasure_stderr": 0.0018706621721287574, "rouge2_precision": 0.043339248978693325, "rouge2_precision_stderr": 0.002075403315567557, "rouge2_recall": 0.05003229789469728, "rouge2_recall_stderr": 0.0021657987374288, "rougeL_fmeasure": 0.15731407769184894, "rougeL_fmeasure_stderr": 0.0026060631597964816, "rougeL_precision": 0.15689184780635723, "rougeL_precision_stderr": 0.003186996560712215, "rougeL_recall": 0.18611465932657437, "rougeL_recall_stderr": 0.003226322626989969, "rougeLsum_fmeasure": 0.15821652483758794, "rougeLsum_fmeasure_stderr": 0.0026533496025248726, "rougeLsum_precision": 0.1577316821070431, "rougeLsum_precision_stderr": 0.003210671374104237, "rougeLsum_recall": 0.18721163975798155, "rougeLsum_recall_stderr": 0.003316732278746064}}}}
 
1
+ {"GEM/web_nlg_en": {"0": {"PALM_prompt": {"bleu": 0.4327056216526176, "bleu_stderr": 0.04372693419980847, "rouge1_fmeasure": 0.11561559994702363, "rouge1_fmeasure_stderr": 0.0022349577144307273, "rouge1_precision": 0.08120292881387756, "rouge1_precision_stderr": 0.002306316231404108, "rouge1_recall": 0.3120345446803705, "rouge1_recall_stderr": 0.005372909934713825, "rouge2_fmeasure": 0.05424150072683682, "rouge2_fmeasure_stderr": 0.0013372863957073417, "rouge2_precision": 0.03720829109819314, "rouge2_precision_stderr": 0.0014023448550832475, "rouge2_recall": 0.15261875747822895, "rouge2_recall_stderr": 0.00339031268383416, "rougeL_fmeasure": 0.11092185835290592, "rougeL_fmeasure_stderr": 0.002081793848469044, "rougeL_precision": 0.07773451061272615, "rougeL_precision_stderr": 0.002178465210306159, "rougeL_recall": 0.3025718897667384, "rougeL_recall_stderr": 0.005219032532466267, "rougeLsum_fmeasure": 0.10847909677805251, "rougeLsum_fmeasure_stderr": 0.0020741019070329024, "rougeLsum_precision": 0.07640311322341631, "rougeLsum_precision_stderr": 0.0021934299284831534, "rougeLsum_recall": 0.2932969878608124, "rougeLsum_recall_stderr": 0.004966268605421209}}, "1": {"PALM_prompt": {"bleu": 0.5170049640136369, "bleu_stderr": 0.03648679066962773, "rouge1_fmeasure": 0.14453580078700035, "rouge1_fmeasure_stderr": 0.0033541420725561855, "rouge1_precision": 0.13136221660069292, "rouge1_precision_stderr": 0.004388592114571626, "rouge1_recall": 0.2896405801661563, "rouge1_recall_stderr": 0.004953943443354722, "rouge2_fmeasure": 0.06923364476381255, "rouge2_fmeasure_stderr": 0.0021424956917440686, "rouge2_precision": 0.061473985181738404, "rouge2_precision_stderr": 0.0027700384901446383, "rouge2_recall": 0.14331487051134867, "rouge2_recall_stderr": 0.0033623310736128884, "rougeL_fmeasure": 0.13164203226260543, "rougeL_fmeasure_stderr": 0.002888601323729016, "rougeL_precision": 0.11885571450646416, "rougeL_precision_stderr": 0.003923661833356943, "rougeL_recall": 0.27126007497231336, "rougeL_recall_stderr": 0.0045873803474269104, "rougeLsum_fmeasure": 0.13353893256396349, "rougeLsum_fmeasure_stderr": 0.002967623053584408, "rougeLsum_precision": 0.12129133148954015, "rougeLsum_precision_stderr": 0.004045561118181283, "rougeLsum_recall": 0.2728234608851354, "rougeLsum_recall_stderr": 0.004583585366471488}}, "2": {"PALM_prompt": {"bleu": 0.5928013588165197, "bleu_stderr": 0.033775591670924525, "rouge1_fmeasure": 0.16409184419431355, "rouge1_fmeasure_stderr": 0.0035513646129875266, "rouge1_precision": 0.14559162616819946, "rouge1_precision_stderr": 0.004520623053825127, "rouge1_recall": 0.3289662638216203, "rouge1_recall_stderr": 0.004774112241093821, "rouge2_fmeasure": 0.08088052499730423, "rouge2_fmeasure_stderr": 0.0023389596994276268, "rouge2_precision": 0.07076232768638219, "rouge2_precision_stderr": 0.0027948927893596243, "rouge2_recall": 0.1674067035637957, "rouge2_recall_stderr": 0.0034923352179481795, "rougeL_fmeasure": 0.14878433573038657, "rougeL_fmeasure_stderr": 0.0030237132146357392, "rougeL_precision": 0.12928227924507313, "rougeL_precision_stderr": 0.0038455860156473096, "rougeL_recall": 0.30914412278215103, "rougeL_recall_stderr": 0.004469912060350128, "rougeLsum_fmeasure": 0.1514985119069214, "rougeLsum_fmeasure_stderr": 0.0031385516550478396, "rougeLsum_precision": 0.1330215775577541, "rougeLsum_precision_stderr": 0.004052559898165496, "rougeLsum_recall": 0.31112999938214775, "rougeLsum_recall_stderr": 0.00446078662626257}}, "3": {"PALM_prompt": {"bleu": 0.6807686634087953, "bleu_stderr": 0.027313109829673613, "rouge1_fmeasure": 0.17936348343657485, "rouge1_fmeasure_stderr": 0.003950557413709246, "rouge1_precision": 0.16284509580783738, "rouge1_precision_stderr": 0.004940954307153824, "rouge1_recall": 0.34165193448645764, "rouge1_recall_stderr": 0.004823144187146308, "rouge2_fmeasure": 0.09167274798729065, "rouge2_fmeasure_stderr": 0.0027093176290479337, "rouge2_precision": 0.0848751566517133, "rouge2_precision_stderr": 0.0033582769970308215, "rouge2_recall": 0.1755604725689479, "rouge2_recall_stderr": 0.0036199598913957873, "rougeL_fmeasure": 0.1614067806393073, "rougeL_fmeasure_stderr": 0.0033574645190345828, "rougeL_precision": 0.14478799010613622, "rougeL_precision_stderr": 0.0043027476999933385, "rougeL_recall": 0.31861959582186916, "rougeL_recall_stderr": 0.004460751088877849, "rougeLsum_fmeasure": 0.1647031537003239, "rougeLsum_fmeasure_stderr": 0.003470868557645694, "rougeLsum_precision": 0.14863622609613492, "rougeLsum_precision_stderr": 0.004457573202802981, "rougeLsum_recall": 0.32177137457614585, "rougeLsum_recall_stderr": 0.004496971251428065}}, "4": {"PALM_prompt": {"bleu": 0.7503302834538067, "bleu_stderr": 0.0402816045060437, "rouge1_fmeasure": 0.18968150933143582, "rouge1_fmeasure_stderr": 0.003940084164169744, "rouge1_precision": 0.17209973620193267, "rouge1_precision_stderr": 0.005047840493621876, "rouge1_recall": 0.3637510419209303, "rouge1_recall_stderr": 0.004667216670957412, "rouge2_fmeasure": 0.09769455469909966, "rouge2_fmeasure_stderr": 0.0026898671929461455, "rouge2_precision": 0.09074863043296012, "rouge2_precision_stderr": 0.0033847322187014256, "rouge2_recall": 0.18934860539994808, "rouge2_recall_stderr": 0.003551268691823376, "rougeL_fmeasure": 0.17009734401273233, "rougeL_fmeasure_stderr": 0.003316032712008811, "rougeL_precision": 0.15132731104989547, "rougeL_precision_stderr": 0.004296079897911914, "rougeL_recall": 0.3392207177743361, "rougeL_recall_stderr": 0.004346379157120974, "rougeLsum_fmeasure": 0.17420043534898244, "rougeLsum_fmeasure_stderr": 0.0034478375868970887, "rougeLsum_precision": 0.15670285935286377, "rougeLsum_precision_stderr": 0.004521862460431119, "rougeLsum_recall": 0.34256056402766993, "rougeLsum_recall_stderr": 0.004340802406659253}}, "5": {"PALM_prompt": {"bleu": 0.8802082281295327, "bleu_stderr": 0.03585155302087567, "rouge1_fmeasure": 0.2100307213117374, "rouge1_fmeasure_stderr": 0.004280889499111591, "rouge1_precision": 0.2015386177119827, "rouge1_precision_stderr": 0.0056627338976135595, "rouge1_recall": 0.3778003683962055, "rouge1_recall_stderr": 0.00478414375889329, "rouge2_fmeasure": 0.11089680744069119, "rouge2_fmeasure_stderr": 0.002955811011440687, "rouge2_precision": 0.10934470263927108, "rouge2_precision_stderr": 0.0038246604808114346, "rouge2_recall": 0.19982577035446628, "rouge2_recall_stderr": 0.003701169618935952, "rougeL_fmeasure": 0.18728902811936157, "rougeL_fmeasure_stderr": 0.003646672780047176, "rougeL_precision": 0.1766978585338184, "rougeL_precision_stderr": 0.004870142265891642, "rougeL_recall": 0.34962603195773095, "rougeL_recall_stderr": 0.004451899185074764, "rougeLsum_fmeasure": 0.1920899272461596, "rougeLsum_fmeasure_stderr": 0.0037931786488217814, "rougeLsum_precision": 0.1830292512610349, "rougeLsum_precision_stderr": 0.00511823428201166, "rougeLsum_recall": 0.353603903022092, "rougeLsum_recall_stderr": 0.004471111589359183}}}, "GEM/wiki_lingua_en": {"0": {"tldr_en": {"bleu": 2.901333720728356, "bleu_stderr": 0.10226215319645214, "rouge1_fmeasure": 0.19490685891150464, "rouge1_fmeasure_stderr": 0.002228792147804532, "rouge1_precision": 0.17704255603612853, "rouge1_precision_stderr": 0.002383452581089663, "rouge1_recall": 0.2623176452518483, "rouge1_recall_stderr": 0.0030809405530689694, "rouge2_fmeasure": 0.04704761224432566, "rouge2_fmeasure_stderr": 0.0010447119831656043, "rouge2_precision": 0.04225836992348151, "rouge2_precision_stderr": 0.0009823631202670515, "rouge2_recall": 0.0651070954044141, "rouge2_recall_stderr": 0.0015933009474170402, "rougeL_fmeasure": 0.14883828387862688, "rougeL_fmeasure_stderr": 0.0016138603489545347, "rougeL_precision": 0.13397353229615103, "rougeL_precision_stderr": 0.0017128207643543572, "rougeL_recall": 0.2047034234132007, "rougeL_recall_stderr": 0.0024598017469020465, "rougeLsum_fmeasure": 0.18138749699425952, "rougeLsum_fmeasure_stderr": 0.0020853809149012385, "rougeLsum_precision": 0.16480116835931327, "rougeLsum_precision_stderr": 0.002236603075129391, "rougeLsum_recall": 0.2444741077779754, "rougeLsum_recall_stderr": 0.0028949045881182804}}, "1": {"tldr_en": {"bleu": 1.3189071143549054, "bleu_stderr": 0.056260491668375046, "rouge1_fmeasure": 0.13555859612142038, "rouge1_fmeasure_stderr": 0.0018520687451681944, "rouge1_precision": 0.17711498903253858, "rouge1_precision_stderr": 0.003187054524812852, "rouge1_recall": 0.1495992184775029, "rouge1_recall_stderr": 0.002418260025690305, "rouge2_fmeasure": 0.02115631835055968, "rouge2_fmeasure_stderr": 0.0009205511637097431, "rouge2_precision": 0.0341101685597815, "rouge2_precision_stderr": 0.0018789309734762675, "rouge2_recall": 0.023354524799664208, "rouge2_recall_stderr": 0.0011331997231529378, "rougeL_fmeasure": 0.10594836166403426, "rougeL_fmeasure_stderr": 0.0014231609544825045, "rougeL_precision": 0.14166539453787275, "rougeL_precision_stderr": 0.0027278885597432283, "rougeL_recall": 0.11669878560815988, "rougeL_recall_stderr": 0.0018535132456267542, "rougeLsum_fmeasure": 0.12879863711565903, "rougeLsum_fmeasure_stderr": 0.001726843722306648, "rougeLsum_precision": 0.16834898466576845, "rougeLsum_precision_stderr": 0.0030251794839351825, "rougeLsum_recall": 0.1423605907639815, "rougeLsum_recall_stderr": 0.002274408434168799}}, "2": {"tldr_en": {"bleu": 2.597656232065304, "bleu_stderr": 0.08476221272138684, "rouge1_fmeasure": 0.19262617050178363, "rouge1_fmeasure_stderr": 0.002146390888601266, "rouge1_precision": 0.33080414705726846, "rouge1_precision_stderr": 0.004527044176321152, "rouge1_recall": 0.18732091518823407, "rouge1_recall_stderr": 0.002670996345244806, "rouge2_fmeasure": 0.05076363678961688, "rouge2_fmeasure_stderr": 0.001285862396880349, "rouge2_precision": 0.10217185278394622, "rouge2_precision_stderr": 0.003185145158037599, "rouge2_recall": 0.047867194110325716, "rouge2_recall_stderr": 0.0013584412752363497, "rougeL_fmeasure": 0.1514762968428363, "rougeL_fmeasure_stderr": 0.0017236567886180802, "rougeL_precision": 0.2700683731360322, "rougeL_precision_stderr": 0.004038807694219432, "rougeL_recall": 0.1454002358157948, "rougeL_recall_stderr": 0.002085035435615555, "rougeLsum_fmeasure": 0.18118727913460206, "rougeLsum_fmeasure_stderr": 0.0020192927966468316, "rougeLsum_precision": 0.31288849874154145, "rougeLsum_precision_stderr": 0.0043682724182277964, "rougeLsum_recall": 0.1760508499049095, "rougeLsum_recall_stderr": 0.0025131233168500626}}, "3": {"tldr_en": {"bleu": 1.5327724758686312, "bleu_stderr": 0.07653894357963317, "rouge1_fmeasure": 0.16552713462017304, "rouge1_fmeasure_stderr": 0.0023801833824345906, "rouge1_precision": 0.3130609147315037, "rouge1_precision_stderr": 0.005000892320236708, "rouge1_recall": 0.1527000073472306, "rouge1_recall_stderr": 0.002761140329661955, "rouge2_fmeasure": 0.04492302045628534, "rouge2_fmeasure_stderr": 0.001274382067297563, "rouge2_precision": 0.09833930805862205, "rouge2_precision_stderr": 0.003247626466243916, "rouge2_recall": 0.040868184528014447, "rouge2_recall_stderr": 0.0013445135083341307, "rougeL_fmeasure": 0.13195736438525596, "rougeL_fmeasure_stderr": 0.001900983992382293, "rougeL_precision": 0.2587094915639364, "rougeL_precision_stderr": 0.004404450322230028, "rougeL_recall": 0.12026892289436068, "rougeL_recall_stderr": 0.0021669801289898433, "rougeLsum_fmeasure": 0.1553327481622336, "rougeLsum_fmeasure_stderr": 0.0022358037272639875, "rougeLsum_precision": 0.29620144156557976, "rougeLsum_precision_stderr": 0.004809308828398635, "rougeLsum_recall": 0.14302097468993186, "rougeLsum_recall_stderr": 0.002591728517974711}}, "4": {"tldr_en": {"bleu": 0.00654223663156633, "bleu_stderr": 0.0027758848324662983, "rouge1_fmeasure": 0.05349208128293813, "rouge1_fmeasure_stderr": 0.0019435876334074464, "rouge1_precision": 0.1055334273233753, "rouge1_precision_stderr": 0.004005109623144804, "rouge1_recall": 0.04830991975106493, "rouge1_recall_stderr": 0.001956273095648371, "rouge2_fmeasure": 0.014363695304766007, "rouge2_fmeasure_stderr": 0.0008381194097495501, "rouge2_precision": 0.03529801291930799, "rouge2_precision_stderr": 0.002325457268720388, "rouge2_recall": 0.012534282753832842, "rouge2_recall_stderr": 0.0008254659390003825, "rougeL_fmeasure": 0.04364703267936979, "rougeL_fmeasure_stderr": 0.0015875798918702792, "rougeL_precision": 0.08979847395448344, "rougeL_precision_stderr": 0.003545822235055598, "rougeL_recall": 0.038917636717379904, "rougeL_recall_stderr": 0.0015673979923397155, "rougeLsum_fmeasure": 0.049841630097568694, "rougeLsum_fmeasure_stderr": 0.0018082077986885854, "rougeLsum_precision": 0.09964328288443035, "rougeLsum_precision_stderr": 0.003821151145375782, "rougeLsum_recall": 0.04484118865254346, "rougeLsum_recall_stderr": 0.0018130620488983893}}, "5": {"tldr_en": {"bleu": 3.2932483830177584e-22, "bleu_stderr": 4.968624247175887e-20, "rouge1_fmeasure": 0.008969589470047824, "rouge1_fmeasure_stderr": 0.0009287248458710643, "rouge1_precision": 0.017659398078161862, "rouge1_precision_stderr": 0.001842951988111103, "rouge1_recall": 0.008267332980204723, "rouge1_recall_stderr": 0.0009686556728017771, "rouge2_fmeasure": 0.0028872404480968354, "rouge2_fmeasure_stderr": 0.0004217887748055091, "rouge2_precision": 0.006656029964148255, "rouge2_precision_stderr": 0.0010762435401541755, "rouge2_recall": 0.0026394181085068096, "rouge2_recall_stderr": 0.0004263557024540143, "rougeL_fmeasure": 0.007666950322090918, "rougeL_fmeasure_stderr": 0.0007998263132713956, "rougeL_precision": 0.015617411068671785, "rougeL_precision_stderr": 0.0016930430963125933, "rougeL_recall": 0.006935211584416843, "rougeL_recall_stderr": 0.0007968820212509419, "rougeLsum_fmeasure": 0.008516964231671285, "rougeLsum_fmeasure_stderr": 0.0008818018744518781, "rougeLsum_precision": 0.01678289905280236, "rougeLsum_precision_stderr": 0.0017533652046718578, "rougeLsum_recall": 0.007835054650781782, "rougeLsum_recall_stderr": 0.0009174649411865726}}}, "e2e_nlg_cleaned": {"0": {"generate_text_restaurant": {"bleu": 4.323068230856815, "bleu_stderr": 0.11827517538305429, "rouge1_fmeasure": 0.21393795207816774, "rouge1_fmeasure_stderr": 0.0022172290840673714, "rouge1_precision": 0.20184258114816994, "rouge1_precision_stderr": 0.0024967227724980905, "rouge1_recall": 0.2741487724431522, "rouge1_recall_stderr": 0.002981425456738163, "rouge2_fmeasure": 0.07753930763205061, "rouge2_fmeasure_stderr": 0.001232753531339659, "rouge2_precision": 0.06981744534756419, "rouge2_precision_stderr": 0.0012528227998197135, "rouge2_recall": 0.10177066403058643, "rouge2_recall_stderr": 0.0016700677677734437, "rougeL_fmeasure": 0.18270253265655753, "rougeL_fmeasure_stderr": 0.0018385441584275236, "rougeL_precision": 0.17210749550231633, "rougeL_precision_stderr": 0.0020923336446562898, "rougeL_recall": 0.23511595699194335, "rougeL_recall_stderr": 0.0025537545321839632, "rougeLsum_fmeasure": 0.1878544007720258, "rougeLsum_fmeasure_stderr": 0.002020568816598664, "rougeLsum_precision": 0.17701660182876539, "rougeLsum_precision_stderr": 0.002212280556195139, "rougeLsum_recall": 0.24069254181513441, "rougeLsum_recall_stderr": 0.002731459462683422}}, "1": {"generate_text_restaurant": {"bleu": 11.12198726945363, "bleu_stderr": 0.09790982230134608, "rouge1_fmeasure": 0.4497122250058307, "rouge1_fmeasure_stderr": 0.002358274244539043, "rouge1_precision": 0.539424536665453, "rouge1_precision_stderr": 0.003200550693703833, "rouge1_recall": 0.42606169809315014, "rouge1_recall_stderr": 0.0030177511882706815, "rouge2_fmeasure": 0.20273574208408596, "rouge2_fmeasure_stderr": 0.0019952609736603347, "rouge2_precision": 0.24625082577450813, "rouge2_precision_stderr": 0.0025762628646406487, "rouge2_recall": 0.19229199204554204, "rouge2_recall_stderr": 0.002156249467345367, "rougeL_fmeasure": 0.32245123379575574, "rougeL_fmeasure_stderr": 0.00201216095760112, "rougeL_precision": 0.3898625952386598, "rougeL_precision_stderr": 0.00284396973940831, "rougeL_recall": 0.3047168374284729, "rougeL_recall_stderr": 0.0024099953927670574, "rougeLsum_fmeasure": 0.3634264521254899, "rougeLsum_fmeasure_stderr": 0.002293352251398984, "rougeLsum_precision": 0.43735223920241745, "rougeLsum_precision_stderr": 0.0030890151835425124, "rougeLsum_recall": 0.34382758409741837, "rougeLsum_recall_stderr": 0.002730730270528412}}, "2": {"generate_text_restaurant": {"bleu": 13.078853702923132, "bleu_stderr": 0.2155366018848879, "rouge1_fmeasure": 0.4688028872513949, "rouge1_fmeasure_stderr": 0.002257227013292593, "rouge1_precision": 0.5477662052819872, "rouge1_precision_stderr": 0.003235092315505865, "rouge1_recall": 0.45095887544798596, "rouge1_recall_stderr": 0.002938286116143171, "rouge2_fmeasure": 0.22409655455897465, "rouge2_fmeasure_stderr": 0.002006039376986731, "rouge2_precision": 0.265196570018947, "rouge2_precision_stderr": 0.002651500690772085, "rouge2_recall": 0.2158536847398131, "rouge2_recall_stderr": 0.0022076507629365054, "rougeL_fmeasure": 0.3410370656782091, "rougeL_fmeasure_stderr": 0.002033956805662392, "rougeL_precision": 0.400095294175111, "rougeL_precision_stderr": 0.002883412837425683, "rougeL_recall": 0.32801072044830487, "rougeL_recall_stderr": 0.0024748977975824578, "rougeLsum_fmeasure": 0.38535371000678353, "rougeLsum_fmeasure_stderr": 0.0022722669571099276, "rougeLsum_precision": 0.4508394877881969, "rougeLsum_precision_stderr": 0.00311858732792832, "rougeLsum_recall": 0.370539435057591, "rougeLsum_recall_stderr": 0.0027526630435952522}}, "3": {"generate_text_restaurant": {"bleu": 13.768530954975475, "bleu_stderr": 0.14594564253938233, "rouge1_fmeasure": 0.4732988552296844, "rouge1_fmeasure_stderr": 0.0023072398938351565, "rouge1_precision": 0.5513149605843634, "rouge1_precision_stderr": 0.003223926598984129, "rouge1_recall": 0.4544657860924009, "rouge1_recall_stderr": 0.0029435858030621223, "rouge2_fmeasure": 0.23099189410387647, "rouge2_fmeasure_stderr": 0.0020736584515027023, "rouge2_precision": 0.2713161825786521, "rouge2_precision_stderr": 0.002652312584043066, "rouge2_recall": 0.22274524949051497, "rouge2_recall_stderr": 0.0022849365352015077, "rougeL_fmeasure": 0.34354436912494557, "rougeL_fmeasure_stderr": 0.0020702299844247204, "rougeL_precision": 0.40182792574019505, "rougeL_precision_stderr": 0.0028897593436573114, "rougeL_recall": 0.3300569194031173, "rougeL_recall_stderr": 0.002494937679218632, "rougeLsum_fmeasure": 0.3901751322155334, "rougeLsum_fmeasure_stderr": 0.002337466007692747, "rougeLsum_precision": 0.45467251256274516, "rougeLsum_precision_stderr": 0.0031283683413357516, "rougeLsum_recall": 0.3750256251060309, "rougeLsum_recall_stderr": 0.0028037931259678327}}, "4": {"generate_text_restaurant": {"bleu": 13.955354009824239, "bleu_stderr": 0.17621205158559158, "rouge1_fmeasure": 0.477320750868792, "rouge1_fmeasure_stderr": 0.0022064402436474525, "rouge1_precision": 0.5568639783306667, "rouge1_precision_stderr": 0.0032035266773002813, "rouge1_recall": 0.4565383457910882, "rouge1_recall_stderr": 0.0028310725817788825, "rouge2_fmeasure": 0.2332985109627194, "rouge2_fmeasure_stderr": 0.0020392691038979755, "rouge2_precision": 0.27463832252094134, "rouge2_precision_stderr": 0.002656556546241727, "rouge2_recall": 0.2237734377709334, "rouge2_recall_stderr": 0.0022284036836192115, "rougeL_fmeasure": 0.3467659886451906, "rougeL_fmeasure_stderr": 0.002050748177099438, "rougeL_precision": 0.4053609865770688, "rougeL_precision_stderr": 0.002864650388473525, "rougeL_recall": 0.3320809453199833, "rougeL_recall_stderr": 0.0024559602375536747, "rougeLsum_fmeasure": 0.39460741009932404, "rougeLsum_fmeasure_stderr": 0.0022906964526605337, "rougeLsum_precision": 0.45966718146546615, "rougeLsum_precision_stderr": 0.0030934720926802956, "rougeLsum_recall": 0.3782310346965992, "rougeLsum_recall_stderr": 0.002754847069758433}}, "5": {"generate_text_restaurant": {"bleu": 13.809469952975402, "bleu_stderr": 0.19234612711909144, "rouge1_fmeasure": 0.4773967399542214, "rouge1_fmeasure_stderr": 0.002203954037576556, "rouge1_precision": 0.5620558517895992, "rouge1_precision_stderr": 0.003280696774072891, "rouge1_recall": 0.45294101366960293, "rouge1_recall_stderr": 0.0027912967652229588, "rouge2_fmeasure": 0.23481550856452307, "rouge2_fmeasure_stderr": 0.002031448354554745, "rouge2_precision": 0.28049075780574995, "rouge2_precision_stderr": 0.002723905086717432, "rouge2_recall": 0.22268647687005963, "rouge2_recall_stderr": 0.002185025554068672, "rougeL_fmeasure": 0.3489632758060724, "rougeL_fmeasure_stderr": 0.002066334034877156, "rougeL_precision": 0.41130704812365865, "rougeL_precision_stderr": 0.002906913984621831, "rougeL_recall": 0.3317608784263896, "rougeL_recall_stderr": 0.0024614789723510556, "rougeLsum_fmeasure": 0.3955511870962712, "rougeLsum_fmeasure_stderr": 0.002297775237896474, "rougeLsum_precision": 0.4652537741581926, "rougeLsum_precision_stderr": 0.0031761710705153377, "rougeLsum_recall": 0.3759647022751929, "rougeLsum_recall_stderr": 0.0027254534040526504}}}, "gem_xsum": {"0": {"article_DOC_summary": {"bleu": 1.7275911083855118, "bleu_stderr": 0.09315346519689312, "rouge1_fmeasure": 0.19865214203885193, "rouge1_fmeasure_stderr": 0.0025545783551457458, "rouge1_precision": 0.14341594283459833, "rouge1_precision_stderr": 0.0019150227608556209, "rouge1_recall": 0.33746025963895177, "rouge1_recall_stderr": 0.004396420817141737, "rouge2_fmeasure": 0.04482048555975224, "rouge2_fmeasure_stderr": 0.0014416784806632838, "rouge2_precision": 0.03201351724162917, "rouge2_precision_stderr": 0.0010356434821340256, "rouge2_recall": 0.07835505162865627, "rouge2_recall_stderr": 0.0025959352489549627, "rougeL_fmeasure": 0.14902776732684508, "rougeL_fmeasure_stderr": 0.001891372900724768, "rougeL_precision": 0.10751247708787921, "rougeL_precision_stderr": 0.001414700879319645, "rougeL_recall": 0.2541352831094459, "rougeL_recall_stderr": 0.003353803946860742, "rougeLsum_fmeasure": 0.15674139294988435, "rougeLsum_fmeasure_stderr": 0.00212246709615498, "rougeLsum_precision": 0.11294396508891945, "rougeLsum_precision_stderr": 0.001572036287070283, "rougeLsum_recall": 0.26761583095187486, "rougeLsum_recall_stderr": 0.0037473927221216433}}, "1": {"article_DOC_summary": {"bleu": 2.0186469544251735, "bleu_stderr": 0.1617291885425493, "rouge1_fmeasure": 0.21444912809637723, "rouge1_fmeasure_stderr": 0.0031796389298071976, "rouge1_precision": 0.20709667913473623, "rouge1_precision_stderr": 0.003804885301753494, "rouge1_recall": 0.26348276211668675, "rouge1_recall_stderr": 0.004043289954314806, "rouge2_fmeasure": 0.044418308641362814, "rouge2_fmeasure_stderr": 0.0019691773545816222, "rouge2_precision": 0.04383663455526474, "rouge2_precision_stderr": 0.00214583996427979, "rouge2_recall": 0.05388920501753989, "rouge2_recall_stderr": 0.0023226693119193275, "rougeL_fmeasure": 0.1623869671006431, "rougeL_fmeasure_stderr": 0.0025460769742929506, "rougeL_precision": 0.15712258874107546, "rougeL_precision_stderr": 0.003082174207025799, "rougeL_recall": 0.19988523818040707, "rougeL_recall_stderr": 0.0031736460739436286, "rougeLsum_fmeasure": 0.16649088600178905, "rougeLsum_fmeasure_stderr": 0.0026032467500540024, "rougeLsum_precision": 0.1603205164350595, "rougeLsum_precision_stderr": 0.0030835504137508457, "rougeLsum_recall": 0.2062303531639936, "rougeLsum_recall_stderr": 0.0034043264584466795}}, "2": {"article_DOC_summary": {"bleu": 2.2145229821006605, "bleu_stderr": 0.1260377464556724, "rouge1_fmeasure": 0.21986453565549552, "rouge1_fmeasure_stderr": 0.0031797103554287768, "rouge1_precision": 0.21815659979255142, "rouge1_precision_stderr": 0.003915968292612965, "rouge1_recall": 0.25916715473443264, "rouge1_recall_stderr": 0.003786573638513296, "rouge2_fmeasure": 0.04563831775128178, "rouge2_fmeasure_stderr": 0.0019892126630855363, "rouge2_precision": 0.046529716608035045, "rouge2_precision_stderr": 0.002251988137878124, "rouge2_recall": 0.052720735928175166, "rouge2_recall_stderr": 0.0022189416521812295, "rougeL_fmeasure": 0.16582930193810858, "rougeL_fmeasure_stderr": 0.002542926945635016, "rougeL_precision": 0.16430612216912469, "rougeL_precision_stderr": 0.0031021595613689294, "rougeL_recall": 0.19661860705667725, "rougeL_recall_stderr": 0.0030656994440208998, "rougeLsum_fmeasure": 0.16793897815284758, "rougeLsum_fmeasure_stderr": 0.002581747319926458, "rougeLsum_precision": 0.16611558298427548, "rougeLsum_precision_stderr": 0.00311738411294024, "rougeLsum_recall": 0.19958288390451825, "rougeLsum_recall_stderr": 0.003185887552936416}}, "3": {"article_DOC_summary": {"bleu": 2.0462739047977316, "bleu_stderr": 0.1211474412912817, "rouge1_fmeasure": 0.20657839021439553, "rouge1_fmeasure_stderr": 0.003333626734019319, "rouge1_precision": 0.20625419841912476, "rouge1_precision_stderr": 0.004018382000661381, "rouge1_recall": 0.24250606659620239, "rouge1_recall_stderr": 0.004028957037863405, "rouge2_fmeasure": 0.042886506849894174, "rouge2_fmeasure_stderr": 0.0018706621721287574, "rouge2_precision": 0.043339248978693325, "rouge2_precision_stderr": 0.002075403315567557, "rouge2_recall": 0.05003229789469728, "rouge2_recall_stderr": 0.0021657987374288, "rougeL_fmeasure": 0.15731407769184894, "rougeL_fmeasure_stderr": 0.0026060631597964816, "rougeL_precision": 0.15689184780635723, "rougeL_precision_stderr": 0.003186996560712215, "rougeL_recall": 0.18611465932657437, "rougeL_recall_stderr": 0.003226322626989969, "rougeLsum_fmeasure": 0.15821652483758794, "rougeLsum_fmeasure_stderr": 0.0026533496025248726, "rougeLsum_precision": 0.1577316821070431, "rougeLsum_precision_stderr": 0.003210671374104237, "rougeLsum_recall": 0.18721163975798155, "rougeLsum_recall_stderr": 0.003316732278746064}}, "4": {"article_DOC_summary": {"bleu": 0.1900845439712902, "bleu_stderr": 0.04734201088429449, "rouge1_fmeasure": 0.05669884311129999, "rouge1_fmeasure_stderr": 0.003233190697529867, "rouge1_precision": 0.06423392835301732, "rouge1_precision_stderr": 0.003890454592785219, "rouge1_recall": 0.061064960878397495, "rouge1_recall_stderr": 0.0035529952635305366, "rouge2_fmeasure": 0.011230809043560832, "rouge2_fmeasure_stderr": 0.001150436285440537, "rouge2_precision": 0.013119210106822345, "rouge2_precision_stderr": 0.0015803091283963062, "rouge2_recall": 0.01188116199451332, "rouge2_recall_stderr": 0.0012074230140535814, "rougeL_fmeasure": 0.04231694926938603, "rougeL_fmeasure_stderr": 0.0024588550815058375, "rougeL_precision": 0.048643821762800375, "rougeL_precision_stderr": 0.0030508122483730435, "rougeL_recall": 0.045534601833835085, "rougeL_recall_stderr": 0.0026917002780029826, "rougeLsum_fmeasure": 0.042878966271196875, "rougeLsum_fmeasure_stderr": 0.0025011086060412416, "rougeLsum_precision": 0.04908506848683216, "rougeLsum_precision_stderr": 0.0030727749370763383, "rougeLsum_recall": 0.04640232937561536, "rougeLsum_recall_stderr": 0.0027809753817619994}}, "5": {"article_DOC_summary": {"bleu": 5.949462853290167e-51, "bleu_stderr": 3.3116242110429246e-45, "rouge1_fmeasure": 0.0022256871499288173, "rouge1_fmeasure_stderr": 0.000655351445773999, "rouge1_precision": 0.0034539022767811622, "rouge1_precision_stderr": 0.001194139379020227, "rouge1_recall": 0.001875281856314757, "rouge1_recall_stderr": 0.0005400708989179161, "rouge2_fmeasure": 0.0003049361539927578, "rouge2_fmeasure_stderr": 0.00016618277661248796, "rouge2_precision": 0.0006133203936083778, "rouge2_precision_stderr": 0.00038964743374472006, "rouge2_recall": 0.00022420975251163933, "rouge2_recall_stderr": 0.00011871281237923162, "rougeL_fmeasure": 0.0015288199568875144, "rougeL_fmeasure_stderr": 0.0004717229876146918, "rougeL_precision": 0.002376363953379656, "rougeL_precision_stderr": 0.000833979092494122, "rougeL_recall": 0.0012732794478251294, "rougeL_recall_stderr": 0.0003799759956513556, "rougeLsum_fmeasure": 0.0017404000288000973, "rougeLsum_fmeasure_stderr": 0.0005261342065394403, "rougeLsum_precision": 0.002582083748444623, "rougeLsum_precision_stderr": 0.0008671989447029644, "rougeLsum_recall": 0.0015157649497445937, "rougeLsum_recall_stderr": 0.0004622173727756195}}}}
2b855b18boscarseed3/evaluation/generation/slim.2b855b18boscarseed3_GEM-web_nlg_en_PALM_prompt_4.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": [
3
+ {
4
+ "task_name": "GEM/web_nlg_en",
5
+ "prompt_name": "PALM_prompt",
6
+ "bleu": 0.7503302834538067,
7
+ "dataset_path": "GEM/web_nlg",
8
+ "dataset_name": "en",
9
+ "subset": null,
10
+ "bleu_stderr": 0.0402816045060437
11
+ },
12
+ {
13
+ "task_name": "GEM/web_nlg_en",
14
+ "prompt_name": "PALM_prompt",
15
+ "rouge1_precision": 0.17209973620193267,
16
+ "dataset_path": "GEM/web_nlg",
17
+ "dataset_name": "en",
18
+ "subset": null,
19
+ "rouge1_precision_stderr": 0.005047840493621876
20
+ },
21
+ {
22
+ "task_name": "GEM/web_nlg_en",
23
+ "prompt_name": "PALM_prompt",
24
+ "rouge1_recall": 0.3637510419209303,
25
+ "dataset_path": "GEM/web_nlg",
26
+ "dataset_name": "en",
27
+ "subset": null,
28
+ "rouge1_recall_stderr": 0.004667216670957412
29
+ },
30
+ {
31
+ "task_name": "GEM/web_nlg_en",
32
+ "prompt_name": "PALM_prompt",
33
+ "rouge1_fmeasure": 0.18968150933143582,
34
+ "dataset_path": "GEM/web_nlg",
35
+ "dataset_name": "en",
36
+ "subset": null,
37
+ "rouge1_fmeasure_stderr": 0.003940084164169744
38
+ },
39
+ {
40
+ "task_name": "GEM/web_nlg_en",
41
+ "prompt_name": "PALM_prompt",
42
+ "rouge2_precision": 0.09074863043296012,
43
+ "dataset_path": "GEM/web_nlg",
44
+ "dataset_name": "en",
45
+ "subset": null,
46
+ "rouge2_precision_stderr": 0.0033847322187014256
47
+ },
48
+ {
49
+ "task_name": "GEM/web_nlg_en",
50
+ "prompt_name": "PALM_prompt",
51
+ "rouge2_recall": 0.18934860539994808,
52
+ "dataset_path": "GEM/web_nlg",
53
+ "dataset_name": "en",
54
+ "subset": null,
55
+ "rouge2_recall_stderr": 0.003551268691823376
56
+ },
57
+ {
58
+ "task_name": "GEM/web_nlg_en",
59
+ "prompt_name": "PALM_prompt",
60
+ "rouge2_fmeasure": 0.09769455469909966,
61
+ "dataset_path": "GEM/web_nlg",
62
+ "dataset_name": "en",
63
+ "subset": null,
64
+ "rouge2_fmeasure_stderr": 0.0026898671929461455
65
+ },
66
+ {
67
+ "task_name": "GEM/web_nlg_en",
68
+ "prompt_name": "PALM_prompt",
69
+ "rougeL_precision": 0.15132731104989547,
70
+ "dataset_path": "GEM/web_nlg",
71
+ "dataset_name": "en",
72
+ "subset": null,
73
+ "rougeL_precision_stderr": 0.004296079897911914
74
+ },
75
+ {
76
+ "task_name": "GEM/web_nlg_en",
77
+ "prompt_name": "PALM_prompt",
78
+ "rougeL_recall": 0.3392207177743361,
79
+ "dataset_path": "GEM/web_nlg",
80
+ "dataset_name": "en",
81
+ "subset": null,
82
+ "rougeL_recall_stderr": 0.004346379157120974
83
+ },
84
+ {
85
+ "task_name": "GEM/web_nlg_en",
86
+ "prompt_name": "PALM_prompt",
87
+ "rougeL_fmeasure": 0.17009734401273233,
88
+ "dataset_path": "GEM/web_nlg",
89
+ "dataset_name": "en",
90
+ "subset": null,
91
+ "rougeL_fmeasure_stderr": 0.003316032712008811
92
+ },
93
+ {
94
+ "task_name": "GEM/web_nlg_en",
95
+ "prompt_name": "PALM_prompt",
96
+ "rougeLsum_precision": 0.15670285935286377,
97
+ "dataset_path": "GEM/web_nlg",
98
+ "dataset_name": "en",
99
+ "subset": null,
100
+ "rougeLsum_precision_stderr": 0.004521862460431119
101
+ },
102
+ {
103
+ "task_name": "GEM/web_nlg_en",
104
+ "prompt_name": "PALM_prompt",
105
+ "rougeLsum_recall": 0.34256056402766993,
106
+ "dataset_path": "GEM/web_nlg",
107
+ "dataset_name": "en",
108
+ "subset": null,
109
+ "rougeLsum_recall_stderr": 0.004340802406659253
110
+ },
111
+ {
112
+ "task_name": "GEM/web_nlg_en",
113
+ "prompt_name": "PALM_prompt",
114
+ "rougeLsum_fmeasure": 0.17420043534898244,
115
+ "dataset_path": "GEM/web_nlg",
116
+ "dataset_name": "en",
117
+ "subset": null,
118
+ "rougeLsum_fmeasure_stderr": 0.0034478375868970887
119
+ }
120
+ ],
121
+ "config": {
122
+ "model": "hf-causal",
123
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
124
+ "task_args": "",
125
+ "num_fewshot": 4,
126
+ "batch_size": 16,
127
+ "device": "cuda",
128
+ "use_cache": false,
129
+ "limit": 3000,
130
+ "bootstrap_iters": 10,
131
+ "seed": 1234
132
+ }
133
+ }
2b855b18boscarseed3/evaluation/generation/slim.2b855b18boscarseed3_GEM-web_nlg_en_PALM_prompt_5.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": [
3
+ {
4
+ "task_name": "GEM/web_nlg_en",
5
+ "prompt_name": "PALM_prompt",
6
+ "bleu": 0.8802082281295327,
7
+ "dataset_path": "GEM/web_nlg",
8
+ "dataset_name": "en",
9
+ "subset": null,
10
+ "bleu_stderr": 0.03585155302087567
11
+ },
12
+ {
13
+ "task_name": "GEM/web_nlg_en",
14
+ "prompt_name": "PALM_prompt",
15
+ "rouge1_precision": 0.2015386177119827,
16
+ "dataset_path": "GEM/web_nlg",
17
+ "dataset_name": "en",
18
+ "subset": null,
19
+ "rouge1_precision_stderr": 0.0056627338976135595
20
+ },
21
+ {
22
+ "task_name": "GEM/web_nlg_en",
23
+ "prompt_name": "PALM_prompt",
24
+ "rouge1_recall": 0.3778003683962055,
25
+ "dataset_path": "GEM/web_nlg",
26
+ "dataset_name": "en",
27
+ "subset": null,
28
+ "rouge1_recall_stderr": 0.00478414375889329
29
+ },
30
+ {
31
+ "task_name": "GEM/web_nlg_en",
32
+ "prompt_name": "PALM_prompt",
33
+ "rouge1_fmeasure": 0.2100307213117374,
34
+ "dataset_path": "GEM/web_nlg",
35
+ "dataset_name": "en",
36
+ "subset": null,
37
+ "rouge1_fmeasure_stderr": 0.004280889499111591
38
+ },
39
+ {
40
+ "task_name": "GEM/web_nlg_en",
41
+ "prompt_name": "PALM_prompt",
42
+ "rouge2_precision": 0.10934470263927108,
43
+ "dataset_path": "GEM/web_nlg",
44
+ "dataset_name": "en",
45
+ "subset": null,
46
+ "rouge2_precision_stderr": 0.0038246604808114346
47
+ },
48
+ {
49
+ "task_name": "GEM/web_nlg_en",
50
+ "prompt_name": "PALM_prompt",
51
+ "rouge2_recall": 0.19982577035446628,
52
+ "dataset_path": "GEM/web_nlg",
53
+ "dataset_name": "en",
54
+ "subset": null,
55
+ "rouge2_recall_stderr": 0.003701169618935952
56
+ },
57
+ {
58
+ "task_name": "GEM/web_nlg_en",
59
+ "prompt_name": "PALM_prompt",
60
+ "rouge2_fmeasure": 0.11089680744069119,
61
+ "dataset_path": "GEM/web_nlg",
62
+ "dataset_name": "en",
63
+ "subset": null,
64
+ "rouge2_fmeasure_stderr": 0.002955811011440687
65
+ },
66
+ {
67
+ "task_name": "GEM/web_nlg_en",
68
+ "prompt_name": "PALM_prompt",
69
+ "rougeL_precision": 0.1766978585338184,
70
+ "dataset_path": "GEM/web_nlg",
71
+ "dataset_name": "en",
72
+ "subset": null,
73
+ "rougeL_precision_stderr": 0.004870142265891642
74
+ },
75
+ {
76
+ "task_name": "GEM/web_nlg_en",
77
+ "prompt_name": "PALM_prompt",
78
+ "rougeL_recall": 0.34962603195773095,
79
+ "dataset_path": "GEM/web_nlg",
80
+ "dataset_name": "en",
81
+ "subset": null,
82
+ "rougeL_recall_stderr": 0.004451899185074764
83
+ },
84
+ {
85
+ "task_name": "GEM/web_nlg_en",
86
+ "prompt_name": "PALM_prompt",
87
+ "rougeL_fmeasure": 0.18728902811936157,
88
+ "dataset_path": "GEM/web_nlg",
89
+ "dataset_name": "en",
90
+ "subset": null,
91
+ "rougeL_fmeasure_stderr": 0.003646672780047176
92
+ },
93
+ {
94
+ "task_name": "GEM/web_nlg_en",
95
+ "prompt_name": "PALM_prompt",
96
+ "rougeLsum_precision": 0.1830292512610349,
97
+ "dataset_path": "GEM/web_nlg",
98
+ "dataset_name": "en",
99
+ "subset": null,
100
+ "rougeLsum_precision_stderr": 0.00511823428201166
101
+ },
102
+ {
103
+ "task_name": "GEM/web_nlg_en",
104
+ "prompt_name": "PALM_prompt",
105
+ "rougeLsum_recall": 0.353603903022092,
106
+ "dataset_path": "GEM/web_nlg",
107
+ "dataset_name": "en",
108
+ "subset": null,
109
+ "rougeLsum_recall_stderr": 0.004471111589359183
110
+ },
111
+ {
112
+ "task_name": "GEM/web_nlg_en",
113
+ "prompt_name": "PALM_prompt",
114
+ "rougeLsum_fmeasure": 0.1920899272461596,
115
+ "dataset_path": "GEM/web_nlg",
116
+ "dataset_name": "en",
117
+ "subset": null,
118
+ "rougeLsum_fmeasure_stderr": 0.0037931786488217814
119
+ }
120
+ ],
121
+ "config": {
122
+ "model": "hf-causal",
123
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
124
+ "task_args": "",
125
+ "num_fewshot": 5,
126
+ "batch_size": 16,
127
+ "device": "cuda",
128
+ "use_cache": false,
129
+ "limit": 3000,
130
+ "bootstrap_iters": 10,
131
+ "seed": 1234
132
+ }
133
+ }
2b855b18boscarseed3/evaluation/generation/slim.2b855b18boscarseed3_GEM-wiki_lingua_en_tldr_en_4.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": [
3
+ {
4
+ "task_name": "GEM/wiki_lingua_en",
5
+ "prompt_name": "tldr_en",
6
+ "rouge1_precision": 0.1055334273233753,
7
+ "dataset_path": "GEM/wiki_lingua",
8
+ "dataset_name": "en",
9
+ "subset": null,
10
+ "rouge1_precision_stderr": 0.004005109623144804
11
+ },
12
+ {
13
+ "task_name": "GEM/wiki_lingua_en",
14
+ "prompt_name": "tldr_en",
15
+ "rouge1_recall": 0.04830991975106493,
16
+ "dataset_path": "GEM/wiki_lingua",
17
+ "dataset_name": "en",
18
+ "subset": null,
19
+ "rouge1_recall_stderr": 0.001956273095648371
20
+ },
21
+ {
22
+ "task_name": "GEM/wiki_lingua_en",
23
+ "prompt_name": "tldr_en",
24
+ "rouge1_fmeasure": 0.05349208128293813,
25
+ "dataset_path": "GEM/wiki_lingua",
26
+ "dataset_name": "en",
27
+ "subset": null,
28
+ "rouge1_fmeasure_stderr": 0.0019435876334074464
29
+ },
30
+ {
31
+ "task_name": "GEM/wiki_lingua_en",
32
+ "prompt_name": "tldr_en",
33
+ "rouge2_precision": 0.03529801291930799,
34
+ "dataset_path": "GEM/wiki_lingua",
35
+ "dataset_name": "en",
36
+ "subset": null,
37
+ "rouge2_precision_stderr": 0.002325457268720388
38
+ },
39
+ {
40
+ "task_name": "GEM/wiki_lingua_en",
41
+ "prompt_name": "tldr_en",
42
+ "rouge2_recall": 0.012534282753832842,
43
+ "dataset_path": "GEM/wiki_lingua",
44
+ "dataset_name": "en",
45
+ "subset": null,
46
+ "rouge2_recall_stderr": 0.0008254659390003825
47
+ },
48
+ {
49
+ "task_name": "GEM/wiki_lingua_en",
50
+ "prompt_name": "tldr_en",
51
+ "rouge2_fmeasure": 0.014363695304766007,
52
+ "dataset_path": "GEM/wiki_lingua",
53
+ "dataset_name": "en",
54
+ "subset": null,
55
+ "rouge2_fmeasure_stderr": 0.0008381194097495501
56
+ },
57
+ {
58
+ "task_name": "GEM/wiki_lingua_en",
59
+ "prompt_name": "tldr_en",
60
+ "rougeL_precision": 0.08979847395448344,
61
+ "dataset_path": "GEM/wiki_lingua",
62
+ "dataset_name": "en",
63
+ "subset": null,
64
+ "rougeL_precision_stderr": 0.003545822235055598
65
+ },
66
+ {
67
+ "task_name": "GEM/wiki_lingua_en",
68
+ "prompt_name": "tldr_en",
69
+ "rougeL_recall": 0.038917636717379904,
70
+ "dataset_path": "GEM/wiki_lingua",
71
+ "dataset_name": "en",
72
+ "subset": null,
73
+ "rougeL_recall_stderr": 0.0015673979923397155
74
+ },
75
+ {
76
+ "task_name": "GEM/wiki_lingua_en",
77
+ "prompt_name": "tldr_en",
78
+ "rougeL_fmeasure": 0.04364703267936979,
79
+ "dataset_path": "GEM/wiki_lingua",
80
+ "dataset_name": "en",
81
+ "subset": null,
82
+ "rougeL_fmeasure_stderr": 0.0015875798918702792
83
+ },
84
+ {
85
+ "task_name": "GEM/wiki_lingua_en",
86
+ "prompt_name": "tldr_en",
87
+ "rougeLsum_precision": 0.09964328288443035,
88
+ "dataset_path": "GEM/wiki_lingua",
89
+ "dataset_name": "en",
90
+ "subset": null,
91
+ "rougeLsum_precision_stderr": 0.003821151145375782
92
+ },
93
+ {
94
+ "task_name": "GEM/wiki_lingua_en",
95
+ "prompt_name": "tldr_en",
96
+ "rougeLsum_recall": 0.04484118865254346,
97
+ "dataset_path": "GEM/wiki_lingua",
98
+ "dataset_name": "en",
99
+ "subset": null,
100
+ "rougeLsum_recall_stderr": 0.0018130620488983893
101
+ },
102
+ {
103
+ "task_name": "GEM/wiki_lingua_en",
104
+ "prompt_name": "tldr_en",
105
+ "rougeLsum_fmeasure": 0.049841630097568694,
106
+ "dataset_path": "GEM/wiki_lingua",
107
+ "dataset_name": "en",
108
+ "subset": null,
109
+ "rougeLsum_fmeasure_stderr": 0.0018082077986885854
110
+ },
111
+ {
112
+ "task_name": "GEM/wiki_lingua_en",
113
+ "prompt_name": "tldr_en",
114
+ "bleu": 0.00654223663156633,
115
+ "dataset_path": "GEM/wiki_lingua",
116
+ "dataset_name": "en",
117
+ "subset": null,
118
+ "bleu_stderr": 0.0027758848324662983
119
+ }
120
+ ],
121
+ "config": {
122
+ "model": "hf-causal",
123
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
124
+ "task_args": "",
125
+ "num_fewshot": 4,
126
+ "batch_size": 16,
127
+ "device": "cuda",
128
+ "use_cache": false,
129
+ "limit": 3000,
130
+ "bootstrap_iters": 10,
131
+ "seed": 1234
132
+ }
133
+ }
2b855b18boscarseed3/evaluation/generation/slim.2b855b18boscarseed3_GEM-wiki_lingua_en_tldr_en_5.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": [
3
+ {
4
+ "task_name": "GEM/wiki_lingua_en",
5
+ "prompt_name": "tldr_en",
6
+ "rouge1_precision": 0.017659398078161862,
7
+ "dataset_path": "GEM/wiki_lingua",
8
+ "dataset_name": "en",
9
+ "subset": null,
10
+ "rouge1_precision_stderr": 0.001842951988111103
11
+ },
12
+ {
13
+ "task_name": "GEM/wiki_lingua_en",
14
+ "prompt_name": "tldr_en",
15
+ "rouge1_recall": 0.008267332980204723,
16
+ "dataset_path": "GEM/wiki_lingua",
17
+ "dataset_name": "en",
18
+ "subset": null,
19
+ "rouge1_recall_stderr": 0.0009686556728017771
20
+ },
21
+ {
22
+ "task_name": "GEM/wiki_lingua_en",
23
+ "prompt_name": "tldr_en",
24
+ "rouge1_fmeasure": 0.008969589470047824,
25
+ "dataset_path": "GEM/wiki_lingua",
26
+ "dataset_name": "en",
27
+ "subset": null,
28
+ "rouge1_fmeasure_stderr": 0.0009287248458710643
29
+ },
30
+ {
31
+ "task_name": "GEM/wiki_lingua_en",
32
+ "prompt_name": "tldr_en",
33
+ "rouge2_precision": 0.006656029964148255,
34
+ "dataset_path": "GEM/wiki_lingua",
35
+ "dataset_name": "en",
36
+ "subset": null,
37
+ "rouge2_precision_stderr": 0.0010762435401541755
38
+ },
39
+ {
40
+ "task_name": "GEM/wiki_lingua_en",
41
+ "prompt_name": "tldr_en",
42
+ "rouge2_recall": 0.0026394181085068096,
43
+ "dataset_path": "GEM/wiki_lingua",
44
+ "dataset_name": "en",
45
+ "subset": null,
46
+ "rouge2_recall_stderr": 0.0004263557024540143
47
+ },
48
+ {
49
+ "task_name": "GEM/wiki_lingua_en",
50
+ "prompt_name": "tldr_en",
51
+ "rouge2_fmeasure": 0.0028872404480968354,
52
+ "dataset_path": "GEM/wiki_lingua",
53
+ "dataset_name": "en",
54
+ "subset": null,
55
+ "rouge2_fmeasure_stderr": 0.0004217887748055091
56
+ },
57
+ {
58
+ "task_name": "GEM/wiki_lingua_en",
59
+ "prompt_name": "tldr_en",
60
+ "rougeL_precision": 0.015617411068671785,
61
+ "dataset_path": "GEM/wiki_lingua",
62
+ "dataset_name": "en",
63
+ "subset": null,
64
+ "rougeL_precision_stderr": 0.0016930430963125933
65
+ },
66
+ {
67
+ "task_name": "GEM/wiki_lingua_en",
68
+ "prompt_name": "tldr_en",
69
+ "rougeL_recall": 0.006935211584416843,
70
+ "dataset_path": "GEM/wiki_lingua",
71
+ "dataset_name": "en",
72
+ "subset": null,
73
+ "rougeL_recall_stderr": 0.0007968820212509419
74
+ },
75
+ {
76
+ "task_name": "GEM/wiki_lingua_en",
77
+ "prompt_name": "tldr_en",
78
+ "rougeL_fmeasure": 0.007666950322090918,
79
+ "dataset_path": "GEM/wiki_lingua",
80
+ "dataset_name": "en",
81
+ "subset": null,
82
+ "rougeL_fmeasure_stderr": 0.0007998263132713956
83
+ },
84
+ {
85
+ "task_name": "GEM/wiki_lingua_en",
86
+ "prompt_name": "tldr_en",
87
+ "rougeLsum_precision": 0.01678289905280236,
88
+ "dataset_path": "GEM/wiki_lingua",
89
+ "dataset_name": "en",
90
+ "subset": null,
91
+ "rougeLsum_precision_stderr": 0.0017533652046718578
92
+ },
93
+ {
94
+ "task_name": "GEM/wiki_lingua_en",
95
+ "prompt_name": "tldr_en",
96
+ "rougeLsum_recall": 0.007835054650781782,
97
+ "dataset_path": "GEM/wiki_lingua",
98
+ "dataset_name": "en",
99
+ "subset": null,
100
+ "rougeLsum_recall_stderr": 0.0009174649411865726
101
+ },
102
+ {
103
+ "task_name": "GEM/wiki_lingua_en",
104
+ "prompt_name": "tldr_en",
105
+ "rougeLsum_fmeasure": 0.008516964231671285,
106
+ "dataset_path": "GEM/wiki_lingua",
107
+ "dataset_name": "en",
108
+ "subset": null,
109
+ "rougeLsum_fmeasure_stderr": 0.0008818018744518781
110
+ },
111
+ {
112
+ "task_name": "GEM/wiki_lingua_en",
113
+ "prompt_name": "tldr_en",
114
+ "bleu": 3.2932483830177584e-22,
115
+ "dataset_path": "GEM/wiki_lingua",
116
+ "dataset_name": "en",
117
+ "subset": null,
118
+ "bleu_stderr": 4.968624247175887e-20
119
+ }
120
+ ],
121
+ "config": {
122
+ "model": "hf-causal",
123
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
124
+ "task_args": "",
125
+ "num_fewshot": 5,
126
+ "batch_size": 16,
127
+ "device": "cuda",
128
+ "use_cache": false,
129
+ "limit": 3000,
130
+ "bootstrap_iters": 10,
131
+ "seed": 1234
132
+ }
133
+ }
2b855b18boscarseed3/evaluation/generation/slim.2b855b18boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_3.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": [
3
+ {
4
+ "task_name": "e2e_nlg_cleaned",
5
+ "prompt_name": "generate_text_restaurant",
6
+ "bleu": 13.768530954975475,
7
+ "dataset_path": "e2e_nlg_cleaned",
8
+ "dataset_name": null,
9
+ "subset": null,
10
+ "bleu_stderr": 0.14594564253938233
11
+ },
12
+ {
13
+ "task_name": "e2e_nlg_cleaned",
14
+ "prompt_name": "generate_text_restaurant",
15
+ "rouge1_precision": 0.5513149605843634,
16
+ "dataset_path": "e2e_nlg_cleaned",
17
+ "dataset_name": null,
18
+ "subset": null,
19
+ "rouge1_precision_stderr": 0.003223926598984129
20
+ },
21
+ {
22
+ "task_name": "e2e_nlg_cleaned",
23
+ "prompt_name": "generate_text_restaurant",
24
+ "rouge1_recall": 0.4544657860924009,
25
+ "dataset_path": "e2e_nlg_cleaned",
26
+ "dataset_name": null,
27
+ "subset": null,
28
+ "rouge1_recall_stderr": 0.0029435858030621223
29
+ },
30
+ {
31
+ "task_name": "e2e_nlg_cleaned",
32
+ "prompt_name": "generate_text_restaurant",
33
+ "rouge1_fmeasure": 0.4732988552296844,
34
+ "dataset_path": "e2e_nlg_cleaned",
35
+ "dataset_name": null,
36
+ "subset": null,
37
+ "rouge1_fmeasure_stderr": 0.0023072398938351565
38
+ },
39
+ {
40
+ "task_name": "e2e_nlg_cleaned",
41
+ "prompt_name": "generate_text_restaurant",
42
+ "rouge2_precision": 0.2713161825786521,
43
+ "dataset_path": "e2e_nlg_cleaned",
44
+ "dataset_name": null,
45
+ "subset": null,
46
+ "rouge2_precision_stderr": 0.002652312584043066
47
+ },
48
+ {
49
+ "task_name": "e2e_nlg_cleaned",
50
+ "prompt_name": "generate_text_restaurant",
51
+ "rouge2_recall": 0.22274524949051497,
52
+ "dataset_path": "e2e_nlg_cleaned",
53
+ "dataset_name": null,
54
+ "subset": null,
55
+ "rouge2_recall_stderr": 0.0022849365352015077
56
+ },
57
+ {
58
+ "task_name": "e2e_nlg_cleaned",
59
+ "prompt_name": "generate_text_restaurant",
60
+ "rouge2_fmeasure": 0.23099189410387647,
61
+ "dataset_path": "e2e_nlg_cleaned",
62
+ "dataset_name": null,
63
+ "subset": null,
64
+ "rouge2_fmeasure_stderr": 0.0020736584515027023
65
+ },
66
+ {
67
+ "task_name": "e2e_nlg_cleaned",
68
+ "prompt_name": "generate_text_restaurant",
69
+ "rougeL_precision": 0.40182792574019505,
70
+ "dataset_path": "e2e_nlg_cleaned",
71
+ "dataset_name": null,
72
+ "subset": null,
73
+ "rougeL_precision_stderr": 0.0028897593436573114
74
+ },
75
+ {
76
+ "task_name": "e2e_nlg_cleaned",
77
+ "prompt_name": "generate_text_restaurant",
78
+ "rougeL_recall": 0.3300569194031173,
79
+ "dataset_path": "e2e_nlg_cleaned",
80
+ "dataset_name": null,
81
+ "subset": null,
82
+ "rougeL_recall_stderr": 0.002494937679218632
83
+ },
84
+ {
85
+ "task_name": "e2e_nlg_cleaned",
86
+ "prompt_name": "generate_text_restaurant",
87
+ "rougeL_fmeasure": 0.34354436912494557,
88
+ "dataset_path": "e2e_nlg_cleaned",
89
+ "dataset_name": null,
90
+ "subset": null,
91
+ "rougeL_fmeasure_stderr": 0.0020702299844247204
92
+ },
93
+ {
94
+ "task_name": "e2e_nlg_cleaned",
95
+ "prompt_name": "generate_text_restaurant",
96
+ "rougeLsum_precision": 0.45467251256274516,
97
+ "dataset_path": "e2e_nlg_cleaned",
98
+ "dataset_name": null,
99
+ "subset": null,
100
+ "rougeLsum_precision_stderr": 0.0031283683413357516
101
+ },
102
+ {
103
+ "task_name": "e2e_nlg_cleaned",
104
+ "prompt_name": "generate_text_restaurant",
105
+ "rougeLsum_recall": 0.3750256251060309,
106
+ "dataset_path": "e2e_nlg_cleaned",
107
+ "dataset_name": null,
108
+ "subset": null,
109
+ "rougeLsum_recall_stderr": 0.0028037931259678327
110
+ },
111
+ {
112
+ "task_name": "e2e_nlg_cleaned",
113
+ "prompt_name": "generate_text_restaurant",
114
+ "rougeLsum_fmeasure": 0.3901751322155334,
115
+ "dataset_path": "e2e_nlg_cleaned",
116
+ "dataset_name": null,
117
+ "subset": null,
118
+ "rougeLsum_fmeasure_stderr": 0.002337466007692747
119
+ }
120
+ ],
121
+ "config": {
122
+ "model": "hf-causal",
123
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
124
+ "task_args": "",
125
+ "num_fewshot": 3,
126
+ "batch_size": 16,
127
+ "device": "cuda",
128
+ "use_cache": false,
129
+ "limit": 3000,
130
+ "bootstrap_iters": 10,
131
+ "seed": 1234
132
+ }
133
+ }
2b855b18boscarseed3/evaluation/generation/slim.2b855b18boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_4.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": [
3
+ {
4
+ "task_name": "e2e_nlg_cleaned",
5
+ "prompt_name": "generate_text_restaurant",
6
+ "bleu": 13.955354009824239,
7
+ "dataset_path": "e2e_nlg_cleaned",
8
+ "dataset_name": null,
9
+ "subset": null,
10
+ "bleu_stderr": 0.17621205158559158
11
+ },
12
+ {
13
+ "task_name": "e2e_nlg_cleaned",
14
+ "prompt_name": "generate_text_restaurant",
15
+ "rouge1_precision": 0.5568639783306667,
16
+ "dataset_path": "e2e_nlg_cleaned",
17
+ "dataset_name": null,
18
+ "subset": null,
19
+ "rouge1_precision_stderr": 0.0032035266773002813
20
+ },
21
+ {
22
+ "task_name": "e2e_nlg_cleaned",
23
+ "prompt_name": "generate_text_restaurant",
24
+ "rouge1_recall": 0.4565383457910882,
25
+ "dataset_path": "e2e_nlg_cleaned",
26
+ "dataset_name": null,
27
+ "subset": null,
28
+ "rouge1_recall_stderr": 0.0028310725817788825
29
+ },
30
+ {
31
+ "task_name": "e2e_nlg_cleaned",
32
+ "prompt_name": "generate_text_restaurant",
33
+ "rouge1_fmeasure": 0.477320750868792,
34
+ "dataset_path": "e2e_nlg_cleaned",
35
+ "dataset_name": null,
36
+ "subset": null,
37
+ "rouge1_fmeasure_stderr": 0.0022064402436474525
38
+ },
39
+ {
40
+ "task_name": "e2e_nlg_cleaned",
41
+ "prompt_name": "generate_text_restaurant",
42
+ "rouge2_precision": 0.27463832252094134,
43
+ "dataset_path": "e2e_nlg_cleaned",
44
+ "dataset_name": null,
45
+ "subset": null,
46
+ "rouge2_precision_stderr": 0.002656556546241727
47
+ },
48
+ {
49
+ "task_name": "e2e_nlg_cleaned",
50
+ "prompt_name": "generate_text_restaurant",
51
+ "rouge2_recall": 0.2237734377709334,
52
+ "dataset_path": "e2e_nlg_cleaned",
53
+ "dataset_name": null,
54
+ "subset": null,
55
+ "rouge2_recall_stderr": 0.0022284036836192115
56
+ },
57
+ {
58
+ "task_name": "e2e_nlg_cleaned",
59
+ "prompt_name": "generate_text_restaurant",
60
+ "rouge2_fmeasure": 0.2332985109627194,
61
+ "dataset_path": "e2e_nlg_cleaned",
62
+ "dataset_name": null,
63
+ "subset": null,
64
+ "rouge2_fmeasure_stderr": 0.0020392691038979755
65
+ },
66
+ {
67
+ "task_name": "e2e_nlg_cleaned",
68
+ "prompt_name": "generate_text_restaurant",
69
+ "rougeL_precision": 0.4053609865770688,
70
+ "dataset_path": "e2e_nlg_cleaned",
71
+ "dataset_name": null,
72
+ "subset": null,
73
+ "rougeL_precision_stderr": 0.002864650388473525
74
+ },
75
+ {
76
+ "task_name": "e2e_nlg_cleaned",
77
+ "prompt_name": "generate_text_restaurant",
78
+ "rougeL_recall": 0.3320809453199833,
79
+ "dataset_path": "e2e_nlg_cleaned",
80
+ "dataset_name": null,
81
+ "subset": null,
82
+ "rougeL_recall_stderr": 0.0024559602375536747
83
+ },
84
+ {
85
+ "task_name": "e2e_nlg_cleaned",
86
+ "prompt_name": "generate_text_restaurant",
87
+ "rougeL_fmeasure": 0.3467659886451906,
88
+ "dataset_path": "e2e_nlg_cleaned",
89
+ "dataset_name": null,
90
+ "subset": null,
91
+ "rougeL_fmeasure_stderr": 0.002050748177099438
92
+ },
93
+ {
94
+ "task_name": "e2e_nlg_cleaned",
95
+ "prompt_name": "generate_text_restaurant",
96
+ "rougeLsum_precision": 0.45966718146546615,
97
+ "dataset_path": "e2e_nlg_cleaned",
98
+ "dataset_name": null,
99
+ "subset": null,
100
+ "rougeLsum_precision_stderr": 0.0030934720926802956
101
+ },
102
+ {
103
+ "task_name": "e2e_nlg_cleaned",
104
+ "prompt_name": "generate_text_restaurant",
105
+ "rougeLsum_recall": 0.3782310346965992,
106
+ "dataset_path": "e2e_nlg_cleaned",
107
+ "dataset_name": null,
108
+ "subset": null,
109
+ "rougeLsum_recall_stderr": 0.002754847069758433
110
+ },
111
+ {
112
+ "task_name": "e2e_nlg_cleaned",
113
+ "prompt_name": "generate_text_restaurant",
114
+ "rougeLsum_fmeasure": 0.39460741009932404,
115
+ "dataset_path": "e2e_nlg_cleaned",
116
+ "dataset_name": null,
117
+ "subset": null,
118
+ "rougeLsum_fmeasure_stderr": 0.0022906964526605337
119
+ }
120
+ ],
121
+ "config": {
122
+ "model": "hf-causal",
123
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
124
+ "task_args": "",
125
+ "num_fewshot": 4,
126
+ "batch_size": 16,
127
+ "device": "cuda",
128
+ "use_cache": false,
129
+ "limit": 3000,
130
+ "bootstrap_iters": 10,
131
+ "seed": 1234
132
+ }
133
+ }
2b855b18boscarseed3/evaluation/generation/slim.2b855b18boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_5.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": [
3
+ {
4
+ "task_name": "e2e_nlg_cleaned",
5
+ "prompt_name": "generate_text_restaurant",
6
+ "bleu": 13.809469952975402,
7
+ "dataset_path": "e2e_nlg_cleaned",
8
+ "dataset_name": null,
9
+ "subset": null,
10
+ "bleu_stderr": 0.19234612711909144
11
+ },
12
+ {
13
+ "task_name": "e2e_nlg_cleaned",
14
+ "prompt_name": "generate_text_restaurant",
15
+ "rouge1_precision": 0.5620558517895992,
16
+ "dataset_path": "e2e_nlg_cleaned",
17
+ "dataset_name": null,
18
+ "subset": null,
19
+ "rouge1_precision_stderr": 0.003280696774072891
20
+ },
21
+ {
22
+ "task_name": "e2e_nlg_cleaned",
23
+ "prompt_name": "generate_text_restaurant",
24
+ "rouge1_recall": 0.45294101366960293,
25
+ "dataset_path": "e2e_nlg_cleaned",
26
+ "dataset_name": null,
27
+ "subset": null,
28
+ "rouge1_recall_stderr": 0.0027912967652229588
29
+ },
30
+ {
31
+ "task_name": "e2e_nlg_cleaned",
32
+ "prompt_name": "generate_text_restaurant",
33
+ "rouge1_fmeasure": 0.4773967399542214,
34
+ "dataset_path": "e2e_nlg_cleaned",
35
+ "dataset_name": null,
36
+ "subset": null,
37
+ "rouge1_fmeasure_stderr": 0.002203954037576556
38
+ },
39
+ {
40
+ "task_name": "e2e_nlg_cleaned",
41
+ "prompt_name": "generate_text_restaurant",
42
+ "rouge2_precision": 0.28049075780574995,
43
+ "dataset_path": "e2e_nlg_cleaned",
44
+ "dataset_name": null,
45
+ "subset": null,
46
+ "rouge2_precision_stderr": 0.002723905086717432
47
+ },
48
+ {
49
+ "task_name": "e2e_nlg_cleaned",
50
+ "prompt_name": "generate_text_restaurant",
51
+ "rouge2_recall": 0.22268647687005963,
52
+ "dataset_path": "e2e_nlg_cleaned",
53
+ "dataset_name": null,
54
+ "subset": null,
55
+ "rouge2_recall_stderr": 0.002185025554068672
56
+ },
57
+ {
58
+ "task_name": "e2e_nlg_cleaned",
59
+ "prompt_name": "generate_text_restaurant",
60
+ "rouge2_fmeasure": 0.23481550856452307,
61
+ "dataset_path": "e2e_nlg_cleaned",
62
+ "dataset_name": null,
63
+ "subset": null,
64
+ "rouge2_fmeasure_stderr": 0.002031448354554745
65
+ },
66
+ {
67
+ "task_name": "e2e_nlg_cleaned",
68
+ "prompt_name": "generate_text_restaurant",
69
+ "rougeL_precision": 0.41130704812365865,
70
+ "dataset_path": "e2e_nlg_cleaned",
71
+ "dataset_name": null,
72
+ "subset": null,
73
+ "rougeL_precision_stderr": 0.002906913984621831
74
+ },
75
+ {
76
+ "task_name": "e2e_nlg_cleaned",
77
+ "prompt_name": "generate_text_restaurant",
78
+ "rougeL_recall": 0.3317608784263896,
79
+ "dataset_path": "e2e_nlg_cleaned",
80
+ "dataset_name": null,
81
+ "subset": null,
82
+ "rougeL_recall_stderr": 0.0024614789723510556
83
+ },
84
+ {
85
+ "task_name": "e2e_nlg_cleaned",
86
+ "prompt_name": "generate_text_restaurant",
87
+ "rougeL_fmeasure": 0.3489632758060724,
88
+ "dataset_path": "e2e_nlg_cleaned",
89
+ "dataset_name": null,
90
+ "subset": null,
91
+ "rougeL_fmeasure_stderr": 0.002066334034877156
92
+ },
93
+ {
94
+ "task_name": "e2e_nlg_cleaned",
95
+ "prompt_name": "generate_text_restaurant",
96
+ "rougeLsum_precision": 0.4652537741581926,
97
+ "dataset_path": "e2e_nlg_cleaned",
98
+ "dataset_name": null,
99
+ "subset": null,
100
+ "rougeLsum_precision_stderr": 0.0031761710705153377
101
+ },
102
+ {
103
+ "task_name": "e2e_nlg_cleaned",
104
+ "prompt_name": "generate_text_restaurant",
105
+ "rougeLsum_recall": 0.3759647022751929,
106
+ "dataset_path": "e2e_nlg_cleaned",
107
+ "dataset_name": null,
108
+ "subset": null,
109
+ "rougeLsum_recall_stderr": 0.0027254534040526504
110
+ },
111
+ {
112
+ "task_name": "e2e_nlg_cleaned",
113
+ "prompt_name": "generate_text_restaurant",
114
+ "rougeLsum_fmeasure": 0.3955511870962712,
115
+ "dataset_path": "e2e_nlg_cleaned",
116
+ "dataset_name": null,
117
+ "subset": null,
118
+ "rougeLsum_fmeasure_stderr": 0.002297775237896474
119
+ }
120
+ ],
121
+ "config": {
122
+ "model": "hf-causal",
123
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
124
+ "task_args": "",
125
+ "num_fewshot": 5,
126
+ "batch_size": 16,
127
+ "device": "cuda",
128
+ "use_cache": false,
129
+ "limit": 3000,
130
+ "bootstrap_iters": 10,
131
+ "seed": 1234
132
+ }
133
+ }
2b855b18boscarseed3/evaluation/generation/slim.2b855b18boscarseed3_gem_xsum_article_DOC_summary_4.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": [
3
+ {
4
+ "task_name": "gem_xsum",
5
+ "prompt_name": "article_DOC_summary",
6
+ "rouge1_precision": 0.06423392835301732,
7
+ "dataset_path": "GEM/xsum",
8
+ "dataset_name": null,
9
+ "subset": "",
10
+ "rouge1_precision_stderr": 0.003890454592785219
11
+ },
12
+ {
13
+ "task_name": "gem_xsum",
14
+ "prompt_name": "article_DOC_summary",
15
+ "rouge1_recall": 0.061064960878397495,
16
+ "dataset_path": "GEM/xsum",
17
+ "dataset_name": null,
18
+ "subset": "",
19
+ "rouge1_recall_stderr": 0.0035529952635305366
20
+ },
21
+ {
22
+ "task_name": "gem_xsum",
23
+ "prompt_name": "article_DOC_summary",
24
+ "rouge1_fmeasure": 0.05669884311129999,
25
+ "dataset_path": "GEM/xsum",
26
+ "dataset_name": null,
27
+ "subset": "",
28
+ "rouge1_fmeasure_stderr": 0.003233190697529867
29
+ },
30
+ {
31
+ "task_name": "gem_xsum",
32
+ "prompt_name": "article_DOC_summary",
33
+ "rouge2_precision": 0.013119210106822345,
34
+ "dataset_path": "GEM/xsum",
35
+ "dataset_name": null,
36
+ "subset": "",
37
+ "rouge2_precision_stderr": 0.0015803091283963062
38
+ },
39
+ {
40
+ "task_name": "gem_xsum",
41
+ "prompt_name": "article_DOC_summary",
42
+ "rouge2_recall": 0.01188116199451332,
43
+ "dataset_path": "GEM/xsum",
44
+ "dataset_name": null,
45
+ "subset": "",
46
+ "rouge2_recall_stderr": 0.0012074230140535814
47
+ },
48
+ {
49
+ "task_name": "gem_xsum",
50
+ "prompt_name": "article_DOC_summary",
51
+ "rouge2_fmeasure": 0.011230809043560832,
52
+ "dataset_path": "GEM/xsum",
53
+ "dataset_name": null,
54
+ "subset": "",
55
+ "rouge2_fmeasure_stderr": 0.001150436285440537
56
+ },
57
+ {
58
+ "task_name": "gem_xsum",
59
+ "prompt_name": "article_DOC_summary",
60
+ "rougeL_precision": 0.048643821762800375,
61
+ "dataset_path": "GEM/xsum",
62
+ "dataset_name": null,
63
+ "subset": "",
64
+ "rougeL_precision_stderr": 0.0030508122483730435
65
+ },
66
+ {
67
+ "task_name": "gem_xsum",
68
+ "prompt_name": "article_DOC_summary",
69
+ "rougeL_recall": 0.045534601833835085,
70
+ "dataset_path": "GEM/xsum",
71
+ "dataset_name": null,
72
+ "subset": "",
73
+ "rougeL_recall_stderr": 0.0026917002780029826
74
+ },
75
+ {
76
+ "task_name": "gem_xsum",
77
+ "prompt_name": "article_DOC_summary",
78
+ "rougeL_fmeasure": 0.04231694926938603,
79
+ "dataset_path": "GEM/xsum",
80
+ "dataset_name": null,
81
+ "subset": "",
82
+ "rougeL_fmeasure_stderr": 0.0024588550815058375
83
+ },
84
+ {
85
+ "task_name": "gem_xsum",
86
+ "prompt_name": "article_DOC_summary",
87
+ "rougeLsum_precision": 0.04908506848683216,
88
+ "dataset_path": "GEM/xsum",
89
+ "dataset_name": null,
90
+ "subset": "",
91
+ "rougeLsum_precision_stderr": 0.0030727749370763383
92
+ },
93
+ {
94
+ "task_name": "gem_xsum",
95
+ "prompt_name": "article_DOC_summary",
96
+ "rougeLsum_recall": 0.04640232937561536,
97
+ "dataset_path": "GEM/xsum",
98
+ "dataset_name": null,
99
+ "subset": "",
100
+ "rougeLsum_recall_stderr": 0.0027809753817619994
101
+ },
102
+ {
103
+ "task_name": "gem_xsum",
104
+ "prompt_name": "article_DOC_summary",
105
+ "rougeLsum_fmeasure": 0.042878966271196875,
106
+ "dataset_path": "GEM/xsum",
107
+ "dataset_name": null,
108
+ "subset": "",
109
+ "rougeLsum_fmeasure_stderr": 0.0025011086060412416
110
+ },
111
+ {
112
+ "task_name": "gem_xsum",
113
+ "prompt_name": "article_DOC_summary",
114
+ "bleu": 0.1900845439712902,
115
+ "dataset_path": "GEM/xsum",
116
+ "dataset_name": null,
117
+ "subset": "",
118
+ "bleu_stderr": 0.04734201088429449
119
+ }
120
+ ],
121
+ "config": {
122
+ "model": "hf-causal",
123
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
124
+ "task_args": "",
125
+ "num_fewshot": 4,
126
+ "batch_size": 16,
127
+ "device": "cuda",
128
+ "use_cache": false,
129
+ "limit": 3000,
130
+ "bootstrap_iters": 10,
131
+ "seed": 1234
132
+ }
133
+ }
2b855b18boscarseed3/evaluation/generation/slim.2b855b18boscarseed3_gem_xsum_article_DOC_summary_5.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": [
3
+ {
4
+ "task_name": "gem_xsum",
5
+ "prompt_name": "article_DOC_summary",
6
+ "rouge1_precision": 0.0034539022767811622,
7
+ "dataset_path": "GEM/xsum",
8
+ "dataset_name": null,
9
+ "subset": "",
10
+ "rouge1_precision_stderr": 0.001194139379020227
11
+ },
12
+ {
13
+ "task_name": "gem_xsum",
14
+ "prompt_name": "article_DOC_summary",
15
+ "rouge1_recall": 0.001875281856314757,
16
+ "dataset_path": "GEM/xsum",
17
+ "dataset_name": null,
18
+ "subset": "",
19
+ "rouge1_recall_stderr": 0.0005400708989179161
20
+ },
21
+ {
22
+ "task_name": "gem_xsum",
23
+ "prompt_name": "article_DOC_summary",
24
+ "rouge1_fmeasure": 0.0022256871499288173,
25
+ "dataset_path": "GEM/xsum",
26
+ "dataset_name": null,
27
+ "subset": "",
28
+ "rouge1_fmeasure_stderr": 0.000655351445773999
29
+ },
30
+ {
31
+ "task_name": "gem_xsum",
32
+ "prompt_name": "article_DOC_summary",
33
+ "rouge2_precision": 0.0006133203936083778,
34
+ "dataset_path": "GEM/xsum",
35
+ "dataset_name": null,
36
+ "subset": "",
37
+ "rouge2_precision_stderr": 0.00038964743374472006
38
+ },
39
+ {
40
+ "task_name": "gem_xsum",
41
+ "prompt_name": "article_DOC_summary",
42
+ "rouge2_recall": 0.00022420975251163933,
43
+ "dataset_path": "GEM/xsum",
44
+ "dataset_name": null,
45
+ "subset": "",
46
+ "rouge2_recall_stderr": 0.00011871281237923162
47
+ },
48
+ {
49
+ "task_name": "gem_xsum",
50
+ "prompt_name": "article_DOC_summary",
51
+ "rouge2_fmeasure": 0.0003049361539927578,
52
+ "dataset_path": "GEM/xsum",
53
+ "dataset_name": null,
54
+ "subset": "",
55
+ "rouge2_fmeasure_stderr": 0.00016618277661248796
56
+ },
57
+ {
58
+ "task_name": "gem_xsum",
59
+ "prompt_name": "article_DOC_summary",
60
+ "rougeL_precision": 0.002376363953379656,
61
+ "dataset_path": "GEM/xsum",
62
+ "dataset_name": null,
63
+ "subset": "",
64
+ "rougeL_precision_stderr": 0.000833979092494122
65
+ },
66
+ {
67
+ "task_name": "gem_xsum",
68
+ "prompt_name": "article_DOC_summary",
69
+ "rougeL_recall": 0.0012732794478251294,
70
+ "dataset_path": "GEM/xsum",
71
+ "dataset_name": null,
72
+ "subset": "",
73
+ "rougeL_recall_stderr": 0.0003799759956513556
74
+ },
75
+ {
76
+ "task_name": "gem_xsum",
77
+ "prompt_name": "article_DOC_summary",
78
+ "rougeL_fmeasure": 0.0015288199568875144,
79
+ "dataset_path": "GEM/xsum",
80
+ "dataset_name": null,
81
+ "subset": "",
82
+ "rougeL_fmeasure_stderr": 0.0004717229876146918
83
+ },
84
+ {
85
+ "task_name": "gem_xsum",
86
+ "prompt_name": "article_DOC_summary",
87
+ "rougeLsum_precision": 0.002582083748444623,
88
+ "dataset_path": "GEM/xsum",
89
+ "dataset_name": null,
90
+ "subset": "",
91
+ "rougeLsum_precision_stderr": 0.0008671989447029644
92
+ },
93
+ {
94
+ "task_name": "gem_xsum",
95
+ "prompt_name": "article_DOC_summary",
96
+ "rougeLsum_recall": 0.0015157649497445937,
97
+ "dataset_path": "GEM/xsum",
98
+ "dataset_name": null,
99
+ "subset": "",
100
+ "rougeLsum_recall_stderr": 0.0004622173727756195
101
+ },
102
+ {
103
+ "task_name": "gem_xsum",
104
+ "prompt_name": "article_DOC_summary",
105
+ "rougeLsum_fmeasure": 0.0017404000288000973,
106
+ "dataset_path": "GEM/xsum",
107
+ "dataset_name": null,
108
+ "subset": "",
109
+ "rougeLsum_fmeasure_stderr": 0.0005261342065394403
110
+ },
111
+ {
112
+ "task_name": "gem_xsum",
113
+ "prompt_name": "article_DOC_summary",
114
+ "bleu": 5.949462853290167e-51,
115
+ "dataset_path": "GEM/xsum",
116
+ "dataset_name": null,
117
+ "subset": "",
118
+ "bleu_stderr": 3.3116242110429246e-45
119
+ }
120
+ ],
121
+ "config": {
122
+ "model": "hf-causal",
123
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
124
+ "task_args": "",
125
+ "num_fewshot": 5,
126
+ "batch_size": 16,
127
+ "device": "cuda",
128
+ "use_cache": false,
129
+ "limit": 3000,
130
+ "bootstrap_iters": 10,
131
+ "seed": 1234
132
+ }
133
+ }
2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_GEM-web_nlg_en_PALM_prompt_0.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.2885102222879185, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.018814750247349226}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.06986940590587196, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001589538808405114}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.3601035634130576, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005729612251664998}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.10748924438697063, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0019627335784421977}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.03058089684761934, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008472318784221166}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.16233456351171752, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003538947033728204}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.04793659458510968, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012000747473659339}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.06660129994406229, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0014896564243660553}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.3476114441502714, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.00556252882502401}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.10260929545050951, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018161736716811772}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.0653211036931926, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0015175143449742798}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3330969302973756, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.005181716506189715}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.10014642872991254, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018365986709506692}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed4/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_GEM-web_nlg_en_PALM_prompt_1.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.45000164090662254, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.031010800702381897}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.10727696683982045, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0036558138225375007}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.2897448946183138, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00496264975031796}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.13178158508361204, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.003190407778039719}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.0524832852434549, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002473120759691883}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.14198286520129574, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0033815332902604332}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.06359472186875659, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.002099568971326144}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.09727987964244793, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0032789242347531986}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.2734522373816878, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004647123213457267}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.12088175776229021, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0027928528706948627}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.09904741613960874, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003351314319270942}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.27407944020651004, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004629174108020479}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.12236186367475564, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0028537437961900563}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed4/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_GEM-web_nlg_en_PALM_prompt_2.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.5031628438128752, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.037975620540407796}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.13178865999800374, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004286556614273878}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.3195228913544668, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0048649315293930575}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.15572209579082977, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.003607678804936736}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.0669476537528754, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002794538500462027}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.16084052555444908, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003476068207637453}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.07711421193157286, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0023386418579917838}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.11763553502883987, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003684152147552909}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.3000059152925143, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004498671925487945}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.1412229177895771, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0030666242023744826}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.1202238869310131, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003806347033217231}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.30236430718282165, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004546454328550057}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.14345612194106672, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.003161708023891622}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed4/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_GEM-web_nlg_en_PALM_prompt_3.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.6420212476995946, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.02950378232155723}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.15224105536917001, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004722192087730495}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.33898948070527846, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0048980118832507155}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.17412767201474044, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0039444097918065555}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.07944656049514473, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0031629053575410486}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.1722958936156964, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003633224796306606}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.08838887319189719, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.002663413871498349}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.13659236235082065, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.004137604194004145}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.3177217464657264, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0045465089010983456}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.15817892872962086, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0034118175927275347}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.13968030776000567, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0042681099348826365}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3203914149196238, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004589744914342288}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.16089044506764633, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.003515045175602986}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed4/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_GEM-web_nlg_en_PALM_prompt_4.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.742678409649558, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.03902811107180319}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.16707144442848892, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.005064913944213605}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.3537887535116047, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0049574489193098545}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.18660114045707618, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.004148892494933675}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.08785595362294374, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0033365757552541796}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.18440740889177096, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003705491795000126}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.09601470202101701, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0028037377683630665}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.14771880453733474, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.004375949260846757}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.3293205890599116, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004593682894028154}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.1673589288354915, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0035415039121437716}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.15307105161040158, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.004600134750099294}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3338883264011036, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004629523755755748}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.17183676525292046, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.00368388395388885}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed4/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_GEM-web_nlg_en_PALM_prompt_5.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.7962411530479279, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.038686868475498286}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.18825367291224313, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.005487495890800591}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.36635462014549386, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005085317026124101}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.2021953000219425, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0043504572614772394}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.10065335217513668, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0036283095038707417}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.1943316000725506, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0038570021201066495}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.10599704037514564, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00294608664476847}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.16599091076937933, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.004729896791937895}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.341081122209838, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.00473516361704282}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.1815268669498086, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.003733200513314234}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.17133764860842876, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.004935257580344367}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3452411454115444, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.00478933123847975}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.18580221212137862, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.003868938220579297}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed4/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_GEM-wiki_lingua_en_tldr_en_0.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.16013395446722345, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002329509315681086}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.24051988624360016, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0029372320622517867}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.1763937728637578, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002131845057088457}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.03518006821199264, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008984190014947476}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.05483500694355518, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0014416092430391588}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.039152758251644056, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009416649535361641}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.12413640833467976, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001741956262257835}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.19165309639717706, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0023605110901685797}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.13773416295004748, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015550723670569116}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.14678351619017746, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0021546228235723225}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.22117900398025164, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002735226568909973}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.1617030633493469, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019556756647248637}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 2.0849796613959586, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04696672705660788}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed4/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_GEM-wiki_lingua_en_tldr_en_1.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.19672687103676886, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003258749918111476}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.18455367655351018, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002767761544501995}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.1585188102387623, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020155353418632874}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.04450842721911128, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0019891891088117566}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.0366989882855691, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0013228780295075004}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.03131814572165755, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0010581111015174926}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.15451077914916989, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0027579081240348092}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.14140803289733292, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0020982477388227037}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.12143905240577069, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015031946696328648}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.18604910346954598, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.00309676780562894}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.17402275394772532, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002572489122694544}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.1495562126157714, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001872368003495776}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 2.1283333155781783, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06173739921221236}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed4/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_GEM-wiki_lingua_en_tldr_en_2.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.307116290327102, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004057522411197763}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.2311237359085621, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0029299787091923426}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.21452377675057696, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021569835383149012}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.09422974796302211, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002705895596740964}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.06257756927405492, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0015515706837168303}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.058643602447126714, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012949499445233173}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.2432410772913567, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003517982923822789}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.17688649614797855, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002284569702859158}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.16492363195562856, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016715366691453716}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.28973928940437643, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003934722505209809}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.21682396943147, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0027702025077421225}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.20117338794906528, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.00204046959900182}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 3.4289227019273594, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0980619748318853}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed4/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_GEM-wiki_lingua_en_tldr_en_3.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.2769887302516951, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004362735797109796}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.18454195004679522, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0030754042628293415}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.17916235208830358, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0024670384081655434}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.083699955868728, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002522350390948556}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.0516334808309754, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0015239361027425543}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.0500245477227326, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001309394703781371}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.2222095212375479, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0037116950395030724}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.1434708228635378, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0024204831929773756}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.13980498548956438, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0019354114672183093}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.26189634849318716, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.00419480599211331}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.17336904809911782, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0029087721877783914}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.16844194368738818, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002334380113432297}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 2.376034489496753, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09539744973606208}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed4/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_GEM-wiki_lingua_en_tldr_en_4.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.09762628704552982, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0036791063125522785}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.060120073173523476, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0023641404901324687}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.05993860780461335, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002131835072656725}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.031638593954336734, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0019934615625138543}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.017092416816538707, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0010152070977343115}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.017263696586426885, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009451452357698392}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.08093159748512854, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003170566591100483}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.04781306770043991, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0018739108447516672}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.0480907943675992, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017151857375572795}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.09292340233124541, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0035473196130913734}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.056634037424394636, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002233844193266136}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.05650462795248104, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0020140510113373943}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.050389864234167886, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0059647741934256635}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed4/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_GEM-wiki_lingua_en_tldr_en_5.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.01565280096058062, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0016612857342280772}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.008742943600187653, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0009268427913538655}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.00902287038028598, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0008938998714092542}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.005949363814180337, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009962816866311115}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.0025172474630954234, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0003682244947535997}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.0027422642320708438, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00039566591143972346}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.013576661053931781, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0015265111710369024}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.007099111703727658, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0007499618787471169}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.007434229847140775, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0007491433837273781}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.015360312471765456, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0016431247061136746}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.008454810046980078, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0008941904755417539}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.008776987190420815, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0008707445823791472}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 8.265832471675542e-15, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 1.0283191795715903e-13}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed4/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_0.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 5.096542791777673, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08134151944998008}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.1408956477068821, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002144186684422226}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.24685313154685348, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00398756484296965}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.17519014511364406, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0027132936447791557}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.061134217270002185, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0010688471293828399}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.1111063112969638, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0020839019041010436}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.07713644020351093, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0013818850238330176}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.11746836291241039, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001770344961826471}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.2043979176270428, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003322762823006439}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.14533905089922927, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0022335766231647864}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.12383200422860538, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0019213851037623371}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.21771677447213866, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0035822171185702755}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.1543188455548364, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0024394128124284337}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed4/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_1.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 11.636437840523932, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.18091961292507475}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5529368438780359, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003218001337432491}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.43039030533165856, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003030212036057945}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.4574737595014057, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002370786565310856}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.26135235111012534, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0026182299150828863}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.2005747363074195, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002164712048368141}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.21331330559588113, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0020075313836372187}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.4032318331351836, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0029154284432604764}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3104173064842767, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0024620181857949}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.3309168988678521, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0020784193846465}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.4518823411347895, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0031313938217144024}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.34989523519495247, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002757132712457188}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.3724616883241483, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.00231589656468177}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed4/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_2.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 13.940035336382525, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.16542805262835492}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5716314258924529, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0032817981940628503}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.4601741627041542, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003012542198372132}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.4834475778667979, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002335469705298163}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.28312186562891195, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0027494548315134685}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.2254828164851577, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002295204996121906}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.236593278145209, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0021021809940676003}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.4234067633162062, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0029971383182949177}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3382839457099107, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0025050125067426973}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.35588396700463243, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0020950606545091788}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.47378317177615614, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0032012717287453565}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.38076842805666167, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.00281150270615279}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.40012402552401805, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002346179371877077}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed4/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_3.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 14.66179455610714, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.13415705332087588}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5764695834945521, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0032581882684994895}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.46717604518029776, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0029648964637177615}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.4903382443287797, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002292688731143241}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.29014920233592045, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0027255079449499536}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.23445276501664603, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0023471618423299826}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.24507314431248517, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.002138164969377021}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.4273847799570328, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0029921730573943955}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.34453910084478045, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002539290260055678}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.3620651921625073, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0021434092844854554}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.48176141155023144, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003228468591143861}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.39058848131953866, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002845452136118045}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.4099311876009419, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0024045411448076755}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed4/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_4.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 15.022031102332107, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.16417031377693017}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5770482830010712, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003251326328261717}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.47133629823365586, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002921936187342856}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.4942865134674111, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0022771967138141406}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.29383177907763103, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0027817031871968326}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.2382028240485566, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002353027684076357}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.24928414811807728, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.002166419484154268}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.4301441459323248, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002999824342775559}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3493414927576503, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0025247997528297225}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.36691773536391337, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002149918026412849}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.48329776264675745, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0031922793047583155}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.3951915424421474, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002839997838725084}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.41424687188229053, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002391527977802788}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed4/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_5.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 15.023149455940604, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.26579264699439253}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5780478341742799, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0032487320252010976}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.47288769811127807, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002931756266262836}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.49615760726840896, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002285028513023122}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.2935900735151307, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0027426056856979858}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.23773396003776032, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0023328435648278965}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.24913338818976044, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0021298687124516715}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.428313469994925, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002968356185108453}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.34974936188333616, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002557873842537307}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.36682879504685373, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0021565611677589556}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.4837406755476586, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.00318979067736214}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.3968080607735509, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002848496722384228}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.41568936322221234, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0023824138825465782}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed4/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_gem_xsum_article_DOC_summary_0.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.13277454130379152, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002044046634845437}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.29979852467546436, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0047173419723184435}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.18054282599967508, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0027351913296475264}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.026129050167589877, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009979708996190146}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.06308948517942466, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002497782798678786}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.03641032101090017, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0013950647012752266}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.1002013016407079, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0014914470348786162}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.22667939366957607, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003581233975383609}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.13609189336087749, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0019935442019973133}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.10492051542953285, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001668516433667799}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.23748271696550155, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003977359961775538}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.14264131978660138, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0022479874538290693}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.5443192891992854, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.11716716030446354}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed4/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b18boscarseed4/evaluation/generation/agg.2b855b18boscarseed4_gem_xsum_article_DOC_summary_1.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.19089828605932946, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0035433556999898697}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.26409155276091556, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004223976467554409}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.20369080524694547, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0030500271600260908}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.037491954876627515, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0017578679787133128}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.05336746488530688, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0023397553722686397}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.040090765084043044, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0017179399221850897}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.14468472424538684, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002744712660862456}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.20111909092776561, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0033400478979315274}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.1542802398449582, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002347002704067806}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.14803213767629145, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0027532412703009193}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.2084666442117558, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0035807223444093207}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.15878556304203392, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002418952435970748}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.8132262542177187, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.11026955010254395}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b18boscarseed4/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}