Muennighoff commited on
Commit
a13fb2c
·
1 Parent(s): 3335d76
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +132 -0
  2. 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_0.jsonl +0 -0
  3. 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_1.jsonl +0 -0
  4. 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_2.jsonl +0 -0
  5. 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_3.jsonl +0 -0
  6. 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_4.jsonl +0 -0
  7. 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_5.jsonl +0 -0
  8. 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_0.jsonl +0 -0
  9. 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_1.jsonl +0 -0
  10. 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_2.jsonl +0 -0
  11. 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_3.jsonl +0 -0
  12. 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_4.jsonl +0 -0
  13. 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_5.jsonl +0 -0
  14. 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl +0 -0
  15. 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl +0 -0
  16. 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl +0 -0
  17. 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl +0 -0
  18. 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl +0 -0
  19. 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl +0 -0
  20. 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_0.jsonl +0 -0
  21. 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_1.jsonl +0 -0
  22. 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_2.jsonl +0 -0
  23. 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_3.jsonl +0 -0
  24. 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_4.jsonl +0 -0
  25. 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_5.jsonl +0 -0
  26. 2b855b11bc4seed1/evaluation/generation/merged.csv +1 -0
  27. 2b855b11bc4seed1/evaluation/generation/merged.json +1 -0
  28. 2b855b11bc4seed3/evaluation/generation/agg.2b855b11bc4seed3_GEM-web_nlg_en_PALM_prompt_4.json +1 -0
  29. 2b855b11bc4seed3/evaluation/generation/agg.2b855b11bc4seed3_GEM-web_nlg_en_PALM_prompt_5.json +1 -0
  30. 2b855b11bc4seed3/evaluation/generation/agg.2b855b11bc4seed3_GEM-wiki_lingua_en_tldr_en_5.json +1 -0
  31. 2b855b11bc4seed3/evaluation/generation/agg.2b855b11bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_4.json +1 -0
  32. 2b855b11bc4seed3/evaluation/generation/agg.2b855b11bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_5.json +1 -0
  33. 2b855b11bc4seed3/evaluation/generation/agg.2b855b11bc4seed3_gem_xsum_article_DOC_summary_4.json +1 -0
  34. 2b855b11bc4seed3/evaluation/generation/agg.2b855b11bc4seed3_gem_xsum_article_DOC_summary_5.json +1 -0
  35. 2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_GEM-web_nlg_en_PALM_prompt_4.jsonl +3 -0
  36. 2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_GEM-web_nlg_en_PALM_prompt_5.jsonl +3 -0
  37. 2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_GEM-wiki_lingua_en_tldr_en_5.jsonl +3 -0
  38. 2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl +3 -0
  39. 2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl +3 -0
  40. 2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_gem_xsum_article_DOC_summary_4.jsonl +3 -0
  41. 2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_gem_xsum_article_DOC_summary_5.jsonl +3 -0
  42. 2b855b11bc4seed3/evaluation/generation/merged.csv +18 -4
  43. 2b855b11bc4seed3/evaluation/generation/merged.json +1 -1
  44. 2b855b11bc4seed3/evaluation/generation/slim.2b855b11bc4seed3_GEM-web_nlg_en_PALM_prompt_4.json +133 -0
  45. 2b855b11bc4seed3/evaluation/generation/slim.2b855b11bc4seed3_GEM-web_nlg_en_PALM_prompt_5.json +133 -0
  46. 2b855b11bc4seed3/evaluation/generation/slim.2b855b11bc4seed3_GEM-wiki_lingua_en_tldr_en_5.json +133 -0
  47. 2b855b11bc4seed3/evaluation/generation/slim.2b855b11bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_4.json +133 -0
  48. 2b855b11bc4seed3/evaluation/generation/slim.2b855b11bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_5.json +133 -0
  49. 2b855b11bc4seed3/evaluation/generation/slim.2b855b11bc4seed3_gem_xsum_article_DOC_summary_4.json +133 -0
  50. 2b855b11bc4seed3/evaluation/generation/slim.2b855b11bc4seed3_gem_xsum_article_DOC_summary_5.json +133 -0
.gitattributes CHANGED
@@ -428,3 +428,135 @@ evaluation/seed2/generation/examples.limited=3000.model=seed2.task=GEM-wiki_ling
428
  2b855b1b25c4seed1/evaluation/generation/examples.2b855b1b25c4seed1_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
429
  2b855b1b25c4seed2/evaluation/generation/examples.2b855b1b25c4seed2_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
430
  2b855b1b25c4seed4/evaluation/generation/examples.2b855b1b25c4seed4_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
428
  2b855b1b25c4seed1/evaluation/generation/examples.2b855b1b25c4seed1_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
429
  2b855b1b25c4seed2/evaluation/generation/examples.2b855b1b25c4seed2_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
430
  2b855b1b25c4seed4/evaluation/generation/examples.2b855b1b25c4seed4_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
431
+ 2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
432
+ 2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
433
+ 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
434
+ 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
435
+ 2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
436
+ 2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
437
+ 2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
438
+ 2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
439
+ 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
440
+ 2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
441
+ 2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
442
+ 2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
443
+ 2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
444
+ 2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
445
+ 2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
446
+ 2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
447
+ 2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
448
+ 2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
449
+ 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
450
+ 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
451
+ 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
452
+ 2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
453
+ 2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
454
+ 2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
455
+ 2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
456
+ 2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
457
+ 2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
458
+ 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
459
+ 2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
460
+ 2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
461
+ 2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
462
+ 2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
463
+ 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
464
+ 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
465
+ 2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
466
+ 2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
467
+ 2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
468
+ 2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
469
+ 2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
470
+ 2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
471
+ 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
472
+ 2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
473
+ 2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
474
+ 2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
475
+ 2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
476
+ 2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
477
+ 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
478
+ 2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
479
+ 2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
480
+ 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
481
+ 2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
482
+ 2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
483
+ 2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
484
+ 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
485
+ 2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
486
+ 2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
487
+ 2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
488
+ 2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
489
+ 2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
490
+ 2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
491
+ 2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
492
+ 2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
493
+ 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
494
+ 2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
495
+ 2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
496
+ 2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
497
+ 2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
498
+ 2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
499
+ 2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
500
+ 2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
501
+ 2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
502
+ 2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
503
+ 2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
504
+ 2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
505
+ 2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
506
+ 2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
507
+ 2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
508
+ 2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
509
+ 2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
510
+ 2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
511
+ 2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
512
+ 2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
513
+ 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
514
+ 2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
515
+ 2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
516
+ 2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
517
+ 2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
518
+ 2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
519
+ 2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
520
+ 2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
521
+ 2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
522
+ 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
523
+ 2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
524
+ 2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
525
+ 2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
526
+ 2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
527
+ 2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
528
+ 2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
529
+ 2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
530
+ 2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
531
+ 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
532
+ 2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
533
+ 2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
534
+ 2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
535
+ 2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
536
+ 2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
537
+ 2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
538
+ 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
539
+ 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
540
+ 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
541
+ 2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
542
+ 2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
543
+ 2b855b14bc4seed2/evaluation/generation/examples.2b855b14bc4seed2_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
544
+ 2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
545
+ 2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
546
+ 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
547
+ 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
548
+ 2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
549
+ 2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
550
+ 2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
551
+ 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
552
+ 2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
553
+ 2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
554
+ 2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
555
+ 2b855b14bc4seed4/evaluation/generation/examples.2b855b14bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
556
+ 2b855b11bc4seed4/evaluation/generation/examples.2b855b11bc4seed4_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
557
+ 2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
558
+ 2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
559
+ 2b855b18bc4seed1/evaluation/generation/examples.2b855b18bc4seed1_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
560
+ 2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
561
+ 2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
562
+ 2b855b14bc4seed3/evaluation/generation/examples.2b855b14bc4seed3_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_0.jsonl ADDED
File without changes
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_1.jsonl ADDED
File without changes
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_2.jsonl ADDED
File without changes
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_3.jsonl ADDED
File without changes
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_4.jsonl ADDED
File without changes
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-web_nlg_en_PALM_prompt_5.jsonl ADDED
File without changes
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_0.jsonl ADDED
File without changes
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_1.jsonl ADDED
File without changes
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_2.jsonl ADDED
File without changes
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_3.jsonl ADDED
File without changes
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_4.jsonl ADDED
File without changes
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_GEM-wiki_lingua_en_tldr_en_5.jsonl ADDED
File without changes
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl ADDED
File without changes
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl ADDED
File without changes
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl ADDED
File without changes
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl ADDED
File without changes
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl ADDED
File without changes
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl ADDED
File without changes
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_0.jsonl ADDED
File without changes
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_1.jsonl ADDED
File without changes
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_2.jsonl ADDED
File without changes
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_3.jsonl ADDED
File without changes
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_4.jsonl ADDED
File without changes
2b855b11bc4seed1/evaluation/generation/examples.2b855b11bc4seed1_gem_xsum_article_DOC_summary_5.jsonl ADDED
File without changes
2b855b11bc4seed1/evaluation/generation/merged.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ dataset,fewshots,prompt,metric,value
2b855b11bc4seed1/evaluation/generation/merged.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
2b855b11bc4seed3/evaluation/generation/agg.2b855b11bc4seed3_GEM-web_nlg_en_PALM_prompt_4.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.466035571971691, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.03473185881932128}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.06948874429007668, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.00126969268323737}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.35504271560944006, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004806804977630298}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.10915182875204862, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0017368992622529642}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.032080423637990074, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007748948128844898}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.1749610451479523, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0035756989094432607}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.05069482326429204, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011084349852182986}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.06534721122278908, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001160283231789772}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.33105657281482775, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004330159547841201}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.102624870740898, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015972476492216417}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.06651082429118176, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0012081627405732267}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3391017904387808, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004545927615205131}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.10445663963047473, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001657522536981241}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b11bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b11bc4seed3/evaluation/generation/agg.2b855b11bc4seed3_GEM-web_nlg_en_PALM_prompt_5.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.47567398731333893, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.024264359754404757}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.0696852083209599, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0013577090826179866}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.3612843042707264, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004861238941500158}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.10892336928116716, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0016854927070877707}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.0315904391098698, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007431193984384711}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.177085732300076, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0035275863817017116}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.05008036673402714, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0010613654934549248}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.0651341368207596, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012591521998187908}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.33426205604441794, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004323619187529501}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.10170780198533885, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001554714639457137}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.06639663258609937, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001291170836697709}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.34334559580001384, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0045556002073834055}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.10372985125549432, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016004866013084038}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b11bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b11bc4seed3/evaluation/generation/agg.2b855b11bc4seed3_GEM-wiki_lingua_en_tldr_en_5.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.008542236722467199, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0008747857935002281}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.012984770525311046, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0012451725161618523}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.008780004115444839, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0008275079317754659}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.0020271291197224393, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00029783255687868237}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.0032641829590180815, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00043766038421818427}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.002147119319405267, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00027623822033813396}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.006544489279360034, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0006478926275365654}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.010413387705542463, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.001029849799449182}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.006842107389797507, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0006383034087266817}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.00789904438059008, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0008062204339147679}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.012088341751228384, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0011647198145518685}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.00814217266519704, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0007660037005158112}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.5732639330570601e-06, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 2.4552136635733976e-06}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b11bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b11bc4seed3/evaluation/generation/agg.2b855b11bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_4.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 11.29834992541636, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.21750627906833597}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.48699136269149845, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0030725910034362365}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.39873667481511255, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002641833719734696}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.41733074609230864, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021862637249920026}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.22696779992360078, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002388090320486047}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.18435752956149218, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002022279759231637}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.19240159571656937, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0018744046004783308}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.3681077705552102, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0027056437550490925}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3004900799799888, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002254464504837015}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.31456390703859133, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0019506135524216602}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.41003964030862644, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002940274288350555}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.3361130546577144, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0025408101507360696}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.35140686520524467, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002186992225871175}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b11bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b11bc4seed3/evaluation/generation/agg.2b855b11bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_5.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 11.269628945064296, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.15756104774510044}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.4903478553495967, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.00308746448334887}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.39563780199750853, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002621989888992289}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.41656475829237966, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021719641372564386}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.23014095309503432, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0023899391576289145}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.18355072919581486, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001968342781813886}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.19301313468237216, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0018394274162893405}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.3721457131973453, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0027221328586820265}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.29906088364914196, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0022373123686497648}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.3150475005541494, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0019395006953993484}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.41476921512799597, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0029434738207341333}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.33464526234291403, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002498553893092118}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.3521891699869018, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0021596292682356695}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b11bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b11bc4seed3/evaluation/generation/agg.2b855b11bc4seed3_gem_xsum_article_DOC_summary_4.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.03793206438705544, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0024366223683722097}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.06987736475836392, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004064805072138278}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.04400316033856543, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002490349738342324}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.006979598507473323, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008901794799735351}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.013423754512870454, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0013418442840833563}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.008063468290742252, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0007981358698909123}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.030091382509093004, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0020782922503738504}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.05458303198935503, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003197618040713862}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.034178653761551166, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0019416567180353596}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.03177113843460543, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002181794130601312}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.05751126056018169, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0033952763901923607}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.036128933672296124, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0020726114306530577}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.5192386395102333, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.13002341898619957}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b11bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b11bc4seed3/evaluation/generation/agg.2b855b11bc4seed3_gem_xsum_article_DOC_summary_5.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.0025333563065761205, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0007200688467666427}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.0020475737491748073, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0005527163320441051}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.0022218780996264655, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0006107839354088714}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.0002052993867644256, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00010263779675566133}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.00015805735145357788, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 8.125011510560468e-05}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.0001768729417917658, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 8.939544334231789e-05}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.0018767772604883414, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0005169601507247577}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.0015653832711829878, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0004179534887542103}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.0016699276460120504, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0004477526803373312}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.0018647403070412589, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0005045589409446839}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.001573237052621675, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0004187928988454184}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.0016709735398329094, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.00044431903582722767}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 9.778811073469078e-39, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 1.521718069448933e-33}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b11bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_GEM-web_nlg_en_PALM_prompt_4.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:822ebb3e7d80bc055cca88683ef6739ef54c6e9352e11e41a0b4b93d0f0fc0d7
3
+ size 7862036
2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_GEM-web_nlg_en_PALM_prompt_5.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd43b411e8cb0b582dd1fa445c0344b5a8ab2aa220b7a069658327d8998d724a
3
+ size 8773910
2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_GEM-wiki_lingua_en_tldr_en_5.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29c03532573129e0924a3941e35775b51f963dee1d69ee18028034b66d149ac8
3
+ size 34799318
2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea51bea8ef170e701ef64d133006120aab85d4fe71465fb30cc13df2bfb59c47
3
+ size 8289971
2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:568f322556552de1a3dc7850e9c8de9e10f7790c36fcdeca8b3521182a5bc668
3
+ size 9374208
2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_gem_xsum_article_DOC_summary_4.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10d196b079c34336896043475bb74f33f92b92bf8b4a71dd4641ed98af2fd447
3
+ size 11672598
2b855b11bc4seed3/evaluation/generation/examples.2b855b11bc4seed3_gem_xsum_article_DOC_summary_5.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be12ce862ba9524728d86caa84fa14184ada1c03e6bf04c3caf7e92c03132d2f
3
+ size 13897474
2b855b11bc4seed3/evaluation/generation/merged.csv CHANGED
@@ -7,7 +7,11 @@ e2e_nlg_cleaned,2,generate_text_restaurant,rouge2_fmeasure,0.18538437537983646
7
  e2e_nlg_cleaned,2,median,rouge2_fmeasure,0.18538437537983646
8
  e2e_nlg_cleaned,3,generate_text_restaurant,rouge2_fmeasure,0.19219003937378554
9
  e2e_nlg_cleaned,3,median,rouge2_fmeasure,0.19219003937378554
10
- e2e_nlg_cleaned,3,average,multiple,0.14495506886771423
 
 
 
 
11
  gem_xsum,0,article_DOC_summary,rouge2_fmeasure,0.04347043245633625
12
  gem_xsum,0,median,rouge2_fmeasure,0.04347043245633625
13
  gem_xsum,1,article_DOC_summary,rouge2_fmeasure,0.030277850873655133
@@ -16,7 +20,11 @@ gem_xsum,2,article_DOC_summary,rouge2_fmeasure,0.03015426920788573
16
  gem_xsum,2,median,rouge2_fmeasure,0.03015426920788573
17
  gem_xsum,3,article_DOC_summary,rouge2_fmeasure,0.028265095806897757
18
  gem_xsum,3,median,rouge2_fmeasure,0.028265095806897757
19
- gem_xsum,3,average,multiple,0.03304191208619372
 
 
 
 
20
  web_nlg_en,0,PALM_prompt,rouge2_fmeasure,0.048503194247737774
21
  web_nlg_en,0,median,rouge2_fmeasure,0.048503194247737774
22
  web_nlg_en,1,PALM_prompt,rouge2_fmeasure,0.04633905642415022
@@ -25,7 +33,11 @@ web_nlg_en,2,PALM_prompt,rouge2_fmeasure,0.0482705113559789
25
  web_nlg_en,2,median,rouge2_fmeasure,0.0482705113559789
26
  web_nlg_en,3,PALM_prompt,rouge2_fmeasure,0.0486263549461623
27
  web_nlg_en,3,median,rouge2_fmeasure,0.0486263549461623
28
- web_nlg_en,3,average,multiple,0.047934779243507294
 
 
 
 
29
  wiki_lingua_en,0,tldr_en,rouge2_fmeasure,0.03524633277968111
30
  wiki_lingua_en,0,median,rouge2_fmeasure,0.03524633277968111
31
  wiki_lingua_en,1,tldr_en,rouge2_fmeasure,0.04022404252550308
@@ -36,4 +48,6 @@ wiki_lingua_en,3,tldr_en,rouge2_fmeasure,0.039320031366549095
36
  wiki_lingua_en,3,median,rouge2_fmeasure,0.039320031366549095
37
  wiki_lingua_en,4,tldr_en,rouge2_fmeasure,0.01267310048414024
38
  wiki_lingua_en,4,median,rouge2_fmeasure,0.01267310048414024
39
- wiki_lingua_en,4,average,multiple,0.034912145137215835
 
 
 
7
  e2e_nlg_cleaned,2,median,rouge2_fmeasure,0.18538437537983646
8
  e2e_nlg_cleaned,3,generate_text_restaurant,rouge2_fmeasure,0.19219003937378554
9
  e2e_nlg_cleaned,3,median,rouge2_fmeasure,0.19219003937378554
10
+ e2e_nlg_cleaned,4,generate_text_restaurant,rouge2_fmeasure,0.19240159571656937
11
+ e2e_nlg_cleaned,4,median,rouge2_fmeasure,0.19240159571656937
12
+ e2e_nlg_cleaned,5,generate_text_restaurant,rouge2_fmeasure,0.19301313468237216
13
+ e2e_nlg_cleaned,5,median,rouge2_fmeasure,0.19301313468237216
14
+ e2e_nlg_cleaned,5,average,multiple,0.16087250097829975
15
  gem_xsum,0,article_DOC_summary,rouge2_fmeasure,0.04347043245633625
16
  gem_xsum,0,median,rouge2_fmeasure,0.04347043245633625
17
  gem_xsum,1,article_DOC_summary,rouge2_fmeasure,0.030277850873655133
 
20
  gem_xsum,2,median,rouge2_fmeasure,0.03015426920788573
21
  gem_xsum,3,article_DOC_summary,rouge2_fmeasure,0.028265095806897757
22
  gem_xsum,3,median,rouge2_fmeasure,0.028265095806897757
23
+ gem_xsum,4,article_DOC_summary,rouge2_fmeasure,0.008063468290742252
24
+ gem_xsum,4,median,rouge2_fmeasure,0.008063468290742252
25
+ gem_xsum,5,article_DOC_summary,rouge2_fmeasure,0.0001768729417917658
26
+ gem_xsum,5,median,rouge2_fmeasure,0.0001768729417917658
27
+ gem_xsum,5,average,multiple,0.02340133159621815
28
  web_nlg_en,0,PALM_prompt,rouge2_fmeasure,0.048503194247737774
29
  web_nlg_en,0,median,rouge2_fmeasure,0.048503194247737774
30
  web_nlg_en,1,PALM_prompt,rouge2_fmeasure,0.04633905642415022
 
33
  web_nlg_en,2,median,rouge2_fmeasure,0.0482705113559789
34
  web_nlg_en,3,PALM_prompt,rouge2_fmeasure,0.0486263549461623
35
  web_nlg_en,3,median,rouge2_fmeasure,0.0486263549461623
36
+ web_nlg_en,4,PALM_prompt,rouge2_fmeasure,0.05069482326429204
37
+ web_nlg_en,4,median,rouge2_fmeasure,0.05069482326429204
38
+ web_nlg_en,5,PALM_prompt,rouge2_fmeasure,0.05008036673402714
39
+ web_nlg_en,5,median,rouge2_fmeasure,0.05008036673402714
40
+ web_nlg_en,5,average,multiple,0.0487523844953914
41
  wiki_lingua_en,0,tldr_en,rouge2_fmeasure,0.03524633277968111
42
  wiki_lingua_en,0,median,rouge2_fmeasure,0.03524633277968111
43
  wiki_lingua_en,1,tldr_en,rouge2_fmeasure,0.04022404252550308
 
48
  wiki_lingua_en,3,median,rouge2_fmeasure,0.039320031366549095
49
  wiki_lingua_en,4,tldr_en,rouge2_fmeasure,0.01267310048414024
50
  wiki_lingua_en,4,median,rouge2_fmeasure,0.01267310048414024
51
+ wiki_lingua_en,5,tldr_en,rouge2_fmeasure,0.002147119319405267
52
+ wiki_lingua_en,5,median,rouge2_fmeasure,0.002147119319405267
53
+ wiki_lingua_en,5,average,multiple,0.02945130750091407
2b855b11bc4seed3/evaluation/generation/merged.json CHANGED
@@ -1 +1 @@
1
- {"GEM/web_nlg_en": {"0": {"PALM_prompt": {"bleu": 0.3003906758006876, "bleu_stderr": 0.027331800335698254, "rouge1_fmeasure": 0.10522756302657624, "rouge1_fmeasure_stderr": 0.0020564917165852665, "rouge1_precision": 0.06963270011987571, "rouge1_precision_stderr": 0.0016168408610962013, "rouge1_recall": 0.29729464197458877, "rouge1_recall_stderr": 0.0047714353450774415, "rouge2_fmeasure": 0.048503194247737774, "rouge2_fmeasure_stderr": 0.001258248656235562, "rouge2_precision": 0.03200346886185345, "rouge2_precision_stderr": 0.000957625394662931, "rouge2_recall": 0.140470675875411, "rouge2_recall_stderr": 0.0032183793551318674, "rougeL_fmeasure": 0.10106959021191211, "rougeL_fmeasure_stderr": 0.0018911974850607549, "rougeL_precision": 0.06646767562482549, "rougeL_precision_stderr": 0.0014351668395974513, "rougeL_recall": 0.28816568568111717, "rougeL_recall_stderr": 0.004622479804486804, "rougeLsum_fmeasure": 0.10010042995723817, "rougeLsum_fmeasure_stderr": 0.001929318666410727, "rougeLsum_precision": 0.06620461518234684, "rougeLsum_precision_stderr": 0.00151460835911578, "rougeLsum_recall": 0.282597952018841, "rougeLsum_recall_stderr": 0.004481787483630335}}, "1": {"PALM_prompt": {"bleu": 0.32423805540348105, "bleu_stderr": 0.024690542644449438, "rouge1_fmeasure": 0.10249916346044939, "rouge1_fmeasure_stderr": 0.0018656887279984073, "rouge1_precision": 0.06685340388470082, "rouge1_precision_stderr": 0.0015253847039623312, "rouge1_recall": 0.323514427623318, "rouge1_recall_stderr": 0.004639043635695871, "rouge2_fmeasure": 0.04633905642415022, "rouge2_fmeasure_stderr": 0.0011829999453798483, "rouge2_precision": 0.030000286950072674, "rouge2_precision_stderr": 0.0008643544653956381, "rouge2_recall": 0.14824220519105014, "rouge2_recall_stderr": 0.0031972015283258494, "rougeL_fmeasure": 0.09712356208435262, "rougeL_fmeasure_stderr": 0.0017347095170401298, "rougeL_precision": 0.06336375493461167, "rougeL_precision_stderr": 0.0014215770633909588, "rougeL_recall": 0.303094617033746, "rougeL_recall_stderr": 0.004191983337899737, "rougeLsum_fmeasure": 0.09830433934297841, "rougeLsum_fmeasure_stderr": 0.0017883137206561263, "rougeLsum_precision": 0.06416576148923457, "rougeLsum_precision_stderr": 0.0014659099244540854, "rougeLsum_recall": 0.30832609036704645, "rougeLsum_recall_stderr": 0.004339774784201872}}, "2": {"PALM_prompt": {"bleu": 0.33841996991287543, "bleu_stderr": 0.013366615983706327, "rouge1_fmeasure": 0.10597979981258299, "rouge1_fmeasure_stderr": 0.0017714336298557038, "rouge1_precision": 0.06765631897767002, "rouge1_precision_stderr": 0.0013046336481251101, "rouge1_recall": 0.3402361179206784, "rouge1_recall_stderr": 0.004704696452770623, "rouge2_fmeasure": 0.0482705113559789, "rouge2_fmeasure_stderr": 0.001132968575984557, "rouge2_precision": 0.03077023716855104, "rouge2_precision_stderr": 0.0008032546127820459, "rouge2_recall": 0.1606656566085052, "rouge2_recall_stderr": 0.0033693273708689585, "rougeL_fmeasure": 0.10046626150365084, "rougeL_fmeasure_stderr": 0.0016642034527337758, "rougeL_precision": 0.06412349572198735, "rougeL_precision_stderr": 0.0012122011908497355, "rougeL_recall": 0.31907776335571136, "rougeL_recall_stderr": 0.004279931218687767, "rougeLsum_fmeasure": 0.10141858866959458, "rougeLsum_fmeasure_stderr": 0.001700099907526394, "rougeLsum_precision": 0.06475854902642648, "rougeLsum_precision_stderr": 0.0012483946453731626, "rougeLsum_recall": 0.3241391405150877, "rougeLsum_recall_stderr": 0.004418718258324822}}, "3": {"PALM_prompt": {"bleu": 0.40584434052309054, "bleu_stderr": 0.02616462426887616, "rouge1_fmeasure": 0.10602175173953958, "rouge1_fmeasure_stderr": 0.001741987498486231, "rouge1_precision": 0.06757351073723121, "rouge1_precision_stderr": 0.0012679743548778455, "rouge1_recall": 0.3440037162848658, "rouge1_recall_stderr": 0.0049119409175133995, "rouge2_fmeasure": 0.0486263549461623, "rouge2_fmeasure_stderr": 0.0010900986383698252, "rouge2_precision": 0.030886249533870456, "rouge2_precision_stderr": 0.0007629220092981514, "rouge2_recall": 0.16556453686704345, "rouge2_recall_stderr": 0.0034492328057988348, "rougeL_fmeasure": 0.10011209012325656, "rougeL_fmeasure_stderr": 0.0016356941636110295, "rougeL_precision": 0.06384738095162408, "rougeL_precision_stderr": 0.0011862226633864972, "rougeL_recall": 0.3209539378169773, "rougeL_recall_stderr": 0.004410637240630364, "rougeLsum_fmeasure": 0.10161889625106496, "rougeLsum_fmeasure_stderr": 0.0016751216394668587, "rougeLsum_precision": 0.06483859720845632, "rougeLsum_precision_stderr": 0.0012202108024313588, "rougeLsum_recall": 0.3270171129897123, "rougeLsum_recall_stderr": 0.0045709512856975595}}}, "GEM/wiki_lingua_en": {"0": {"tldr_en": {"bleu": 1.4622087249250515, "bleu_stderr": 0.04207910392007385, "rouge1_fmeasure": 0.1767157419817045, "rouge1_fmeasure_stderr": 0.0018123734169975135, "rouge1_precision": 0.1506827740817673, "rouge1_precision_stderr": 0.001852883253242218, "rouge1_recall": 0.25686230777525454, "rouge1_recall_stderr": 0.00260278766504943, "rouge2_fmeasure": 0.03524633277968111, "rouge2_fmeasure_stderr": 0.0008101513042416649, "rouge2_precision": 0.02971277123757481, "rouge2_precision_stderr": 0.000707355818382236, "rouge2_recall": 0.05307915600648516, "rouge2_recall_stderr": 0.0013686059525554353, "rougeL_fmeasure": 0.13759253266149163, "rougeL_fmeasure_stderr": 0.001281432881200856, "rougeL_precision": 0.11575873439522533, "rougeL_precision_stderr": 0.0012737353859565011, "rougeL_recall": 0.2052922273561128, "rougeL_recall_stderr": 0.0021096414533161158, "rougeLsum_fmeasure": 0.16173441706919453, "rougeLsum_fmeasure_stderr": 0.0016450542485275806, "rougeLsum_precision": 0.1374963134041478, "rougeLsum_precision_stderr": 0.0016657330727227028, "rougeLsum_recall": 0.2362819209208244, "rougeLsum_recall_stderr": 0.002418312889630038}}, "1": {"tldr_en": {"bleu": 1.972919037460516, "bleu_stderr": 0.05682843274487135, "rouge1_fmeasure": 0.19185955437187688, "rouge1_fmeasure_stderr": 0.001834411329990832, "rouge1_precision": 0.1655651982573561, "rouge1_precision_stderr": 0.001967434204420624, "rouge1_recall": 0.2778367078547306, "rouge1_recall_stderr": 0.0026376055817718498, "rouge2_fmeasure": 0.04022404252550308, "rouge2_fmeasure_stderr": 0.0009067575883966806, "rouge2_precision": 0.03468172497365085, "rouge2_precision_stderr": 0.0008414429849505186, "rouge2_recall": 0.06032773756466252, "rouge2_recall_stderr": 0.0014755233968311602, "rougeL_fmeasure": 0.1394283655875047, "rougeL_fmeasure_stderr": 0.0012494508344161473, "rougeL_precision": 0.1190010376024555, "rougeL_precision_stderr": 0.0013201442960543233, "rougeL_recall": 0.20730876012944102, "rougeL_recall_stderr": 0.0020918631972439913, "rougeLsum_fmeasure": 0.1798108549879984, "rougeLsum_fmeasure_stderr": 0.00170035102934959, "rougeLsum_precision": 0.15494002660111156, "rougeLsum_precision_stderr": 0.001825015399111526, "rougeLsum_recall": 0.26137076597079567, "rougeLsum_recall_stderr": 0.002487191761867969}}, "2": {"tldr_en": {"bleu": 2.2743708557193902, "bleu_stderr": 0.06189529268097383, "rouge1_fmeasure": 0.20283722636774154, "rouge1_fmeasure_stderr": 0.001838028419312966, "rouge1_precision": 0.17638523863069297, "rouge1_precision_stderr": 0.002020755519678356, "rouge1_recall": 0.2911840829982234, "rouge1_recall_stderr": 0.0026772675380155376, "rouge2_fmeasure": 0.04709721853020564, "rouge2_fmeasure_stderr": 0.000949364569447851, "rouge2_precision": 0.04064975395757813, "rouge2_precision_stderr": 0.0008728139678303188, "rouge2_recall": 0.0700705095693168, "rouge2_recall_stderr": 0.0016162550664557144, "rougeL_fmeasure": 0.14915790559695724, "rougeL_fmeasure_stderr": 0.001266186326972188, "rougeL_precision": 0.128540063496423, "rougeL_precision_stderr": 0.00138883173221342, "rougeL_recall": 0.21916109079967522, "rougeL_recall_stderr": 0.002145845723677153, "rougeLsum_fmeasure": 0.1899159372074719, "rougeLsum_fmeasure_stderr": 0.0017127052022618613, "rougeLsum_precision": 0.16486271659279425, "rougeLsum_precision_stderr": 0.0018790326981489205, "rougeLsum_recall": 0.2736737223320281, "rougeLsum_recall_stderr": 0.0025545391606520626}}, "3": {"tldr_en": {"bleu": 2.261325582734008, "bleu_stderr": 0.07440671017406066, "rouge1_fmeasure": 0.17007228676988045, "rouge1_fmeasure_stderr": 0.0021295726072665254, "rouge1_precision": 0.15457296707448556, "rouge1_precision_stderr": 0.0023843459800257866, "rouge1_recall": 0.24244601688859282, "rouge1_recall_stderr": 0.0031047631304705336, "rouge2_fmeasure": 0.039320031366549095, "rouge2_fmeasure_stderr": 0.0009156298192238671, "rouge2_precision": 0.03563497026047448, "rouge2_precision_stderr": 0.0009697902407615488, "rouge2_recall": 0.05775187966143085, "rouge2_recall_stderr": 0.0014973785566537818, "rougeL_fmeasure": 0.12582015691412737, "rougeL_fmeasure_stderr": 0.0015236321853914047, "rougeL_precision": 0.11385012472654804, "rougeL_precision_stderr": 0.0017484820235318678, "rougeL_recall": 0.18331103625638695, "rougeL_recall_stderr": 0.0024579268481887276, "rougeLsum_fmeasure": 0.15899932831508662, "rougeLsum_fmeasure_stderr": 0.0019839606960568193, "rougeLsum_precision": 0.1443112386465537, "rougeLsum_precision_stderr": 0.002222016297651524, "rougeLsum_recall": 0.22741868584839184, "rougeLsum_recall_stderr": 0.0029377295753792703}}, "4": {"tldr_en": {"bleu": 0.5600187004785872, "bleu_stderr": 0.034329899401054034, "rouge1_fmeasure": 0.054359788577698374, "rouge1_fmeasure_stderr": 0.001834775379938349, "rouge1_precision": 0.05049412311337655, "rouge1_precision_stderr": 0.0018775705845440543, "rouge1_recall": 0.07991007309327065, "rouge1_recall_stderr": 0.002748908637336395, "rouge2_fmeasure": 0.01267310048414024, "rouge2_fmeasure_stderr": 0.0006260839320397936, "rouge2_precision": 0.01142264448143536, "rouge2_precision_stderr": 0.0006812961216581213, "rouge2_recall": 0.02008142422629143, "rouge2_recall_stderr": 0.0010993317750224779, "rougeL_fmeasure": 0.04187829090557222, "rougeL_fmeasure_stderr": 0.001389559278464916, "rougeL_precision": 0.038958076750695146, "rougeL_precision_stderr": 0.0014576150060918941, "rougeL_recall": 0.0629580148095348, "rougeL_recall_stderr": 0.002201566030491766, "rougeLsum_fmeasure": 0.05081727487259366, "rougeLsum_fmeasure_stderr": 0.0017142240431756424, "rougeLsum_precision": 0.047269915920336, "rougeLsum_precision_stderr": 0.0017705102362424203, "rougeLsum_recall": 0.07497632668531767, "rougeLsum_recall_stderr": 0.002589565865103579}}}, "e2e_nlg_cleaned": {"0": {"generate_text_restaurant": {"bleu": 0.2918074829245511, "bleu_stderr": 0.03334721114977758, "rouge1_fmeasure": 0.1600243176507681, "rouge1_fmeasure_stderr": 0.0010733278398958096, "rouge1_precision": 0.12561487707508034, "rouge1_precision_stderr": 0.0013825187846368096, "rouge1_recall": 0.25305668157788624, "rouge1_recall_stderr": 0.0013315623225900873, "rouge2_fmeasure": 0.033687666872503644, "rouge2_fmeasure_stderr": 0.0005133425312314838, "rouge2_precision": 0.026668315991920277, "rouge2_precision_stderr": 0.0005306038751357611, "rouge2_recall": 0.05412553888370808, "rouge2_recall_stderr": 0.0007946156915472112, "rougeL_fmeasure": 0.15622553271845338, "rougeL_fmeasure_stderr": 0.0010043221686026895, "rougeL_precision": 0.12177712671014992, "rougeL_precision_stderr": 0.001240068402500465, "rougeL_recall": 0.24840753894632903, "rougeL_recall_stderr": 0.001318527645790754, "rougeLsum_fmeasure": 0.13789602591344347, "rougeLsum_fmeasure_stderr": 0.0009870480569216915, "rougeLsum_precision": 0.10841189553256889, "rougeLsum_precision_stderr": 0.0012618615939597366, "rougeLsum_recall": 0.21839285442742476, "rougeLsum_recall_stderr": 0.0012454131726385412}}, "1": {"generate_text_restaurant": {"bleu": 9.67590369741921, "bleu_stderr": 0.09901656476390079, "rouge1_fmeasure": 0.39957774138885144, "rouge1_fmeasure_stderr": 0.0022278577374241887, "rouge1_precision": 0.45349765710233814, "rouge1_precision_stderr": 0.0029408652429081406, "rouge1_recall": 0.39709628674803366, "rouge1_recall_stderr": 0.002799520545788188, "rouge2_fmeasure": 0.16855819384473125, "rouge2_fmeasure_stderr": 0.001773629505406216, "rouge2_precision": 0.19448143773761467, "rouge2_precision_stderr": 0.002225394284814737, "rouge2_recall": 0.16778603132296563, "rouge2_recall_stderr": 0.0019654958159185396, "rougeL_fmeasure": 0.2890014884493394, "rougeL_fmeasure_stderr": 0.0018207916834884527, "rougeL_precision": 0.33043423858845145, "rougeL_precision_stderr": 0.0024856924454187785, "rougeL_recall": 0.2872561015612187, "rougeL_recall_stderr": 0.0022337982582298155, "rougeLsum_fmeasure": 0.3277959949146152, "rougeLsum_fmeasure_stderr": 0.0021102604579282852, "rougeLsum_precision": 0.3738597805796585, "rougeLsum_precision_stderr": 0.002773351046379512, "rougeLsum_recall": 0.3249821747731812, "rougeLsum_recall_stderr": 0.002515405285337629}}, "2": {"generate_text_restaurant": {"bleu": 10.91262584267496, "bleu_stderr": 0.12763668087089558, "rouge1_fmeasure": 0.4158184072582917, "rouge1_fmeasure_stderr": 0.0021875821824657578, "rouge1_precision": 0.4745007823219277, "rouge1_precision_stderr": 0.003096779743530949, "rouge1_recall": 0.41114421373702703, "rouge1_recall_stderr": 0.002795411314326619, "rouge2_fmeasure": 0.18538437537983646, "rouge2_fmeasure_stderr": 0.001799521509176773, "rouge2_precision": 0.21433784297140893, "rouge2_precision_stderr": 0.002331836767735763, "rouge2_recall": 0.18401740949966608, "rouge2_recall_stderr": 0.002021878491927928, "rougeL_fmeasure": 0.30338837358825094, "rougeL_fmeasure_stderr": 0.001840114059497487, "rougeL_precision": 0.34796254903830837, "rougeL_precision_stderr": 0.002621056636644695, "rougeL_recall": 0.2999843023857228, "rougeL_recall_stderr": 0.002269391756360346, "rougeLsum_fmeasure": 0.3438444001935511, "rougeLsum_fmeasure_stderr": 0.0020872281136323037, "rougeLsum_precision": 0.39290245826296444, "rougeLsum_precision_stderr": 0.0028605438328826085, "rougeLsum_recall": 0.3399505800342592, "rougeLsum_recall_stderr": 0.0025542752245032124}}, "3": {"generate_text_restaurant": {"bleu": 11.413052941435677, "bleu_stderr": 0.20476608953978004, "rouge1_fmeasure": 0.42003617329938386, "rouge1_fmeasure_stderr": 0.002154444681203657, "rouge1_precision": 0.4836426637277247, "rouge1_precision_stderr": 0.003041696429497936, "rouge1_recall": 0.4077462553545315, "rouge1_recall_stderr": 0.002698117142879397, "rouge2_fmeasure": 0.19219003937378554, "rouge2_fmeasure_stderr": 0.0018883864751046853, "rouge2_precision": 0.22368030301486044, "rouge2_precision_stderr": 0.0024069791642568504, "rouge2_recall": 0.18735894030210265, "rouge2_recall_stderr": 0.0020871489281372663, "rougeL_fmeasure": 0.31324626443128384, "rougeL_fmeasure_stderr": 0.0019216880227233767, "rougeL_precision": 0.36189364939827273, "rougeL_precision_stderr": 0.0026889223009721307, "rougeL_recall": 0.30429571782108805, "rougeL_recall_stderr": 0.0023058551990661367, "rougeLsum_fmeasure": 0.35143133030157764, "rougeLsum_fmeasure_stderr": 0.002142111791450537, "rougeLsum_precision": 0.40514213772293617, "rougeLsum_precision_stderr": 0.0029172807069507275, "rougeLsum_recall": 0.341129701516605, "rougeLsum_recall_stderr": 0.0025465447925427515}}}, "gem_xsum": {"0": {"article_DOC_summary": {"bleu": 1.7452599273208484, "bleu_stderr": 0.0710626335421352, "rouge1_fmeasure": 0.2032531810901225, "rouge1_fmeasure_stderr": 0.002372876068699817, "rouge1_precision": 0.14818700118728248, "rouge1_precision_stderr": 0.0018676651463230222, "rouge1_recall": 0.34586554355363025, "rouge1_recall_stderr": 0.0041641658204401725, "rouge2_fmeasure": 0.04347043245633625, "rouge2_fmeasure_stderr": 0.001442207400667641, "rouge2_precision": 0.03119345685386172, "rouge2_precision_stderr": 0.0010373843022792317, "rouge2_recall": 0.07667587881403153, "rouge2_recall_stderr": 0.002653660348427432, "rougeL_fmeasure": 0.1528350870269447, "rougeL_fmeasure_stderr": 0.0017715223191776047, "rougeL_precision": 0.11119723252661128, "rougeL_precision_stderr": 0.0013663860116280308, "rougeL_recall": 0.2616100987791669, "rougeL_recall_stderr": 0.003275713171944408, "rougeLsum_fmeasure": 0.16043210769047098, "rougeLsum_fmeasure_stderr": 0.0020055581696195052, "rougeLsum_precision": 0.11658163341774634, "rougeLsum_precision_stderr": 0.0015235968275521983, "rougeLsum_recall": 0.275178892490744, "rougeLsum_recall_stderr": 0.0036968416298391823}}, "1": {"article_DOC_summary": {"bleu": 1.1604575025128918, "bleu_stderr": 0.0877746649601935, "rouge1_fmeasure": 0.16834116026562992, "rouge1_fmeasure_stderr": 0.0023007208182279906, "rouge1_precision": 0.11944768188603072, "rouge1_precision_stderr": 0.0017101747872860968, "rouge1_recall": 0.2967764173333572, "rouge1_recall_stderr": 0.003944563464315701, "rouge2_fmeasure": 0.030277850873655133, "rouge2_fmeasure_stderr": 0.0012385139115491765, "rouge2_precision": 0.021232879052259627, "rouge2_precision_stderr": 0.0008714791712295506, "rouge2_recall": 0.05526393564583485, "rouge2_recall_stderr": 0.0023240893343291387, "rougeL_fmeasure": 0.12978985120588027, "rougeL_fmeasure_stderr": 0.0017321643764184145, "rougeL_precision": 0.09192594935218694, "rougeL_precision_stderr": 0.0012769463345594439, "rougeL_recall": 0.23019911126154574, "rougeL_recall_stderr": 0.003091431421711302, "rougeLsum_fmeasure": 0.13564693066989447, "rougeLsum_fmeasure_stderr": 0.0018863533793062786, "rougeLsum_precision": 0.09602849667357606, "rougeLsum_precision_stderr": 0.001386138103889359, "rougeLsum_recall": 0.2407455765901895, "rougeLsum_recall_stderr": 0.003358547528460653}}, "2": {"article_DOC_summary": {"bleu": 1.1805459815971808, "bleu_stderr": 0.08801398952015627, "rouge1_fmeasure": 0.16960096474510714, "rouge1_fmeasure_stderr": 0.002303219538316631, "rouge1_precision": 0.12017928149855549, "rouge1_precision_stderr": 0.0017155806595076542, "rouge1_recall": 0.3000656582759754, "rouge1_recall_stderr": 0.003942220611475929, "rouge2_fmeasure": 0.03015426920788573, "rouge2_fmeasure_stderr": 0.001263243793733084, "rouge2_precision": 0.02115270486812927, "rouge2_precision_stderr": 0.0008856577807583306, "rouge2_recall": 0.054787883745542026, "rouge2_recall_stderr": 0.0023745228517959276, "rougeL_fmeasure": 0.131124075957363, "rougeL_fmeasure_stderr": 0.0017424293214564424, "rougeL_precision": 0.0927144051400674, "rougeL_precision_stderr": 0.0012857006594715614, "rougeL_recall": 0.23362729816209124, "rougeL_recall_stderr": 0.0031066367525268844, "rougeLsum_fmeasure": 0.1367233861657065, "rougeLsum_fmeasure_stderr": 0.0018905430841393567, "rougeLsum_precision": 0.09669042239875854, "rougeLsum_precision_stderr": 0.0013887653900610069, "rougeLsum_recall": 0.24335819875183146, "rougeLsum_recall_stderr": 0.003364341261182289}}, "3": {"article_DOC_summary": {"bleu": 1.2117893477080361, "bleu_stderr": 0.1149778380988921, "rouge1_fmeasure": 0.15889837582063066, "rouge1_fmeasure_stderr": 0.0024042734536394335, "rouge1_precision": 0.11536457953211869, "rouge1_precision_stderr": 0.0019139955193594944, "rouge1_recall": 0.2768098516237596, "rouge1_recall_stderr": 0.004205225171304428, "rouge2_fmeasure": 0.028265095806897757, "rouge2_fmeasure_stderr": 0.0012619861741006862, "rouge2_precision": 0.020015331184403896, "rouge2_precision_stderr": 0.0009021836723852284, "rouge2_recall": 0.05127981526296904, "rouge2_recall_stderr": 0.002364022279427071, "rougeL_fmeasure": 0.12453970375141683, "rougeL_fmeasure_stderr": 0.0018237860596141172, "rougeL_precision": 0.08997967190382496, "rougeL_precision_stderr": 0.0014235921883162194, "rougeL_recall": 0.21857370135939078, "rougeL_recall_stderr": 0.003310622426465869, "rougeLsum_fmeasure": 0.1276545465727294, "rougeLsum_fmeasure_stderr": 0.001972377201117121, "rougeLsum_precision": 0.09224753114626984, "rougeLsum_precision_stderr": 0.0015272098267094347, "rougeLsum_recall": 0.2238960772414025, "rougeLsum_recall_stderr": 0.003566353512085928}}}}
 
1
+ {"GEM/web_nlg_en": {"0": {"PALM_prompt": {"bleu": 0.3003906758006876, "bleu_stderr": 0.027331800335698254, "rouge1_fmeasure": 0.10522756302657624, "rouge1_fmeasure_stderr": 0.0020564917165852665, "rouge1_precision": 0.06963270011987571, "rouge1_precision_stderr": 0.0016168408610962013, "rouge1_recall": 0.29729464197458877, "rouge1_recall_stderr": 0.0047714353450774415, "rouge2_fmeasure": 0.048503194247737774, "rouge2_fmeasure_stderr": 0.001258248656235562, "rouge2_precision": 0.03200346886185345, "rouge2_precision_stderr": 0.000957625394662931, "rouge2_recall": 0.140470675875411, "rouge2_recall_stderr": 0.0032183793551318674, "rougeL_fmeasure": 0.10106959021191211, "rougeL_fmeasure_stderr": 0.0018911974850607549, "rougeL_precision": 0.06646767562482549, "rougeL_precision_stderr": 0.0014351668395974513, "rougeL_recall": 0.28816568568111717, "rougeL_recall_stderr": 0.004622479804486804, "rougeLsum_fmeasure": 0.10010042995723817, "rougeLsum_fmeasure_stderr": 0.001929318666410727, "rougeLsum_precision": 0.06620461518234684, "rougeLsum_precision_stderr": 0.00151460835911578, "rougeLsum_recall": 0.282597952018841, "rougeLsum_recall_stderr": 0.004481787483630335}}, "1": {"PALM_prompt": {"bleu": 0.32423805540348105, "bleu_stderr": 0.024690542644449438, "rouge1_fmeasure": 0.10249916346044939, "rouge1_fmeasure_stderr": 0.0018656887279984073, "rouge1_precision": 0.06685340388470082, "rouge1_precision_stderr": 0.0015253847039623312, "rouge1_recall": 0.323514427623318, "rouge1_recall_stderr": 0.004639043635695871, "rouge2_fmeasure": 0.04633905642415022, "rouge2_fmeasure_stderr": 0.0011829999453798483, "rouge2_precision": 0.030000286950072674, "rouge2_precision_stderr": 0.0008643544653956381, "rouge2_recall": 0.14824220519105014, "rouge2_recall_stderr": 0.0031972015283258494, "rougeL_fmeasure": 0.09712356208435262, "rougeL_fmeasure_stderr": 0.0017347095170401298, "rougeL_precision": 0.06336375493461167, "rougeL_precision_stderr": 0.0014215770633909588, "rougeL_recall": 0.303094617033746, "rougeL_recall_stderr": 0.004191983337899737, "rougeLsum_fmeasure": 0.09830433934297841, "rougeLsum_fmeasure_stderr": 0.0017883137206561263, "rougeLsum_precision": 0.06416576148923457, "rougeLsum_precision_stderr": 0.0014659099244540854, "rougeLsum_recall": 0.30832609036704645, "rougeLsum_recall_stderr": 0.004339774784201872}}, "2": {"PALM_prompt": {"bleu": 0.33841996991287543, "bleu_stderr": 0.013366615983706327, "rouge1_fmeasure": 0.10597979981258299, "rouge1_fmeasure_stderr": 0.0017714336298557038, "rouge1_precision": 0.06765631897767002, "rouge1_precision_stderr": 0.0013046336481251101, "rouge1_recall": 0.3402361179206784, "rouge1_recall_stderr": 0.004704696452770623, "rouge2_fmeasure": 0.0482705113559789, "rouge2_fmeasure_stderr": 0.001132968575984557, "rouge2_precision": 0.03077023716855104, "rouge2_precision_stderr": 0.0008032546127820459, "rouge2_recall": 0.1606656566085052, "rouge2_recall_stderr": 0.0033693273708689585, "rougeL_fmeasure": 0.10046626150365084, "rougeL_fmeasure_stderr": 0.0016642034527337758, "rougeL_precision": 0.06412349572198735, "rougeL_precision_stderr": 0.0012122011908497355, "rougeL_recall": 0.31907776335571136, "rougeL_recall_stderr": 0.004279931218687767, "rougeLsum_fmeasure": 0.10141858866959458, "rougeLsum_fmeasure_stderr": 0.001700099907526394, "rougeLsum_precision": 0.06475854902642648, "rougeLsum_precision_stderr": 0.0012483946453731626, "rougeLsum_recall": 0.3241391405150877, "rougeLsum_recall_stderr": 0.004418718258324822}}, "3": {"PALM_prompt": {"bleu": 0.40584434052309054, "bleu_stderr": 0.02616462426887616, "rouge1_fmeasure": 0.10602175173953958, "rouge1_fmeasure_stderr": 0.001741987498486231, "rouge1_precision": 0.06757351073723121, "rouge1_precision_stderr": 0.0012679743548778455, "rouge1_recall": 0.3440037162848658, "rouge1_recall_stderr": 0.0049119409175133995, "rouge2_fmeasure": 0.0486263549461623, "rouge2_fmeasure_stderr": 0.0010900986383698252, "rouge2_precision": 0.030886249533870456, "rouge2_precision_stderr": 0.0007629220092981514, "rouge2_recall": 0.16556453686704345, "rouge2_recall_stderr": 0.0034492328057988348, "rougeL_fmeasure": 0.10011209012325656, "rougeL_fmeasure_stderr": 0.0016356941636110295, "rougeL_precision": 0.06384738095162408, "rougeL_precision_stderr": 0.0011862226633864972, "rougeL_recall": 0.3209539378169773, "rougeL_recall_stderr": 0.004410637240630364, "rougeLsum_fmeasure": 0.10161889625106496, "rougeLsum_fmeasure_stderr": 0.0016751216394668587, "rougeLsum_precision": 0.06483859720845632, "rougeLsum_precision_stderr": 0.0012202108024313588, "rougeLsum_recall": 0.3270171129897123, "rougeLsum_recall_stderr": 0.0045709512856975595}}, "4": {"PALM_prompt": {"bleu": 0.466035571971691, "bleu_stderr": 0.03473185881932128, "rouge1_fmeasure": 0.10915182875204862, "rouge1_fmeasure_stderr": 0.0017368992622529642, "rouge1_precision": 0.06948874429007668, "rouge1_precision_stderr": 0.00126969268323737, "rouge1_recall": 0.35504271560944006, "rouge1_recall_stderr": 0.004806804977630298, "rouge2_fmeasure": 0.05069482326429204, "rouge2_fmeasure_stderr": 0.0011084349852182986, "rouge2_precision": 0.032080423637990074, "rouge2_precision_stderr": 0.0007748948128844898, "rouge2_recall": 0.1749610451479523, "rouge2_recall_stderr": 0.0035756989094432607, "rougeL_fmeasure": 0.102624870740898, "rougeL_fmeasure_stderr": 0.0015972476492216417, "rougeL_precision": 0.06534721122278908, "rougeL_precision_stderr": 0.001160283231789772, "rougeL_recall": 0.33105657281482775, "rougeL_recall_stderr": 0.004330159547841201, "rougeLsum_fmeasure": 0.10445663963047473, "rougeLsum_fmeasure_stderr": 0.001657522536981241, "rougeLsum_precision": 0.06651082429118176, "rougeLsum_precision_stderr": 0.0012081627405732267, "rougeLsum_recall": 0.3391017904387808, "rougeLsum_recall_stderr": 0.004545927615205131}}, "5": {"PALM_prompt": {"bleu": 0.47567398731333893, "bleu_stderr": 0.024264359754404757, "rouge1_fmeasure": 0.10892336928116716, "rouge1_fmeasure_stderr": 0.0016854927070877707, "rouge1_precision": 0.0696852083209599, "rouge1_precision_stderr": 0.0013577090826179866, "rouge1_recall": 0.3612843042707264, "rouge1_recall_stderr": 0.004861238941500158, "rouge2_fmeasure": 0.05008036673402714, "rouge2_fmeasure_stderr": 0.0010613654934549248, "rouge2_precision": 0.0315904391098698, "rouge2_precision_stderr": 0.0007431193984384711, "rouge2_recall": 0.177085732300076, "rouge2_recall_stderr": 0.0035275863817017116, "rougeL_fmeasure": 0.10170780198533885, "rougeL_fmeasure_stderr": 0.001554714639457137, "rougeL_precision": 0.0651341368207596, "rougeL_precision_stderr": 0.0012591521998187908, "rougeL_recall": 0.33426205604441794, "rougeL_recall_stderr": 0.004323619187529501, "rougeLsum_fmeasure": 0.10372985125549432, "rougeLsum_fmeasure_stderr": 0.0016004866013084038, "rougeLsum_precision": 0.06639663258609937, "rougeLsum_precision_stderr": 0.001291170836697709, "rougeLsum_recall": 0.34334559580001384, "rougeLsum_recall_stderr": 0.0045556002073834055}}}, "GEM/wiki_lingua_en": {"0": {"tldr_en": {"bleu": 1.4622087249250515, "bleu_stderr": 0.04207910392007385, "rouge1_fmeasure": 0.1767157419817045, "rouge1_fmeasure_stderr": 0.0018123734169975135, "rouge1_precision": 0.1506827740817673, "rouge1_precision_stderr": 0.001852883253242218, "rouge1_recall": 0.25686230777525454, "rouge1_recall_stderr": 0.00260278766504943, "rouge2_fmeasure": 0.03524633277968111, "rouge2_fmeasure_stderr": 0.0008101513042416649, "rouge2_precision": 0.02971277123757481, "rouge2_precision_stderr": 0.000707355818382236, "rouge2_recall": 0.05307915600648516, "rouge2_recall_stderr": 0.0013686059525554353, "rougeL_fmeasure": 0.13759253266149163, "rougeL_fmeasure_stderr": 0.001281432881200856, "rougeL_precision": 0.11575873439522533, "rougeL_precision_stderr": 0.0012737353859565011, "rougeL_recall": 0.2052922273561128, "rougeL_recall_stderr": 0.0021096414533161158, "rougeLsum_fmeasure": 0.16173441706919453, "rougeLsum_fmeasure_stderr": 0.0016450542485275806, "rougeLsum_precision": 0.1374963134041478, "rougeLsum_precision_stderr": 0.0016657330727227028, "rougeLsum_recall": 0.2362819209208244, "rougeLsum_recall_stderr": 0.002418312889630038}}, "1": {"tldr_en": {"bleu": 1.972919037460516, "bleu_stderr": 0.05682843274487135, "rouge1_fmeasure": 0.19185955437187688, "rouge1_fmeasure_stderr": 0.001834411329990832, "rouge1_precision": 0.1655651982573561, "rouge1_precision_stderr": 0.001967434204420624, "rouge1_recall": 0.2778367078547306, "rouge1_recall_stderr": 0.0026376055817718498, "rouge2_fmeasure": 0.04022404252550308, "rouge2_fmeasure_stderr": 0.0009067575883966806, "rouge2_precision": 0.03468172497365085, "rouge2_precision_stderr": 0.0008414429849505186, "rouge2_recall": 0.06032773756466252, "rouge2_recall_stderr": 0.0014755233968311602, "rougeL_fmeasure": 0.1394283655875047, "rougeL_fmeasure_stderr": 0.0012494508344161473, "rougeL_precision": 0.1190010376024555, "rougeL_precision_stderr": 0.0013201442960543233, "rougeL_recall": 0.20730876012944102, "rougeL_recall_stderr": 0.0020918631972439913, "rougeLsum_fmeasure": 0.1798108549879984, "rougeLsum_fmeasure_stderr": 0.00170035102934959, "rougeLsum_precision": 0.15494002660111156, "rougeLsum_precision_stderr": 0.001825015399111526, "rougeLsum_recall": 0.26137076597079567, "rougeLsum_recall_stderr": 0.002487191761867969}}, "2": {"tldr_en": {"bleu": 2.2743708557193902, "bleu_stderr": 0.06189529268097383, "rouge1_fmeasure": 0.20283722636774154, "rouge1_fmeasure_stderr": 0.001838028419312966, "rouge1_precision": 0.17638523863069297, "rouge1_precision_stderr": 0.002020755519678356, "rouge1_recall": 0.2911840829982234, "rouge1_recall_stderr": 0.0026772675380155376, "rouge2_fmeasure": 0.04709721853020564, "rouge2_fmeasure_stderr": 0.000949364569447851, "rouge2_precision": 0.04064975395757813, "rouge2_precision_stderr": 0.0008728139678303188, "rouge2_recall": 0.0700705095693168, "rouge2_recall_stderr": 0.0016162550664557144, "rougeL_fmeasure": 0.14915790559695724, "rougeL_fmeasure_stderr": 0.001266186326972188, "rougeL_precision": 0.128540063496423, "rougeL_precision_stderr": 0.00138883173221342, "rougeL_recall": 0.21916109079967522, "rougeL_recall_stderr": 0.002145845723677153, "rougeLsum_fmeasure": 0.1899159372074719, "rougeLsum_fmeasure_stderr": 0.0017127052022618613, "rougeLsum_precision": 0.16486271659279425, "rougeLsum_precision_stderr": 0.0018790326981489205, "rougeLsum_recall": 0.2736737223320281, "rougeLsum_recall_stderr": 0.0025545391606520626}}, "3": {"tldr_en": {"bleu": 2.261325582734008, "bleu_stderr": 0.07440671017406066, "rouge1_fmeasure": 0.17007228676988045, "rouge1_fmeasure_stderr": 0.0021295726072665254, "rouge1_precision": 0.15457296707448556, "rouge1_precision_stderr": 0.0023843459800257866, "rouge1_recall": 0.24244601688859282, "rouge1_recall_stderr": 0.0031047631304705336, "rouge2_fmeasure": 0.039320031366549095, "rouge2_fmeasure_stderr": 0.0009156298192238671, "rouge2_precision": 0.03563497026047448, "rouge2_precision_stderr": 0.0009697902407615488, "rouge2_recall": 0.05775187966143085, "rouge2_recall_stderr": 0.0014973785566537818, "rougeL_fmeasure": 0.12582015691412737, "rougeL_fmeasure_stderr": 0.0015236321853914047, "rougeL_precision": 0.11385012472654804, "rougeL_precision_stderr": 0.0017484820235318678, "rougeL_recall": 0.18331103625638695, "rougeL_recall_stderr": 0.0024579268481887276, "rougeLsum_fmeasure": 0.15899932831508662, "rougeLsum_fmeasure_stderr": 0.0019839606960568193, "rougeLsum_precision": 0.1443112386465537, "rougeLsum_precision_stderr": 0.002222016297651524, "rougeLsum_recall": 0.22741868584839184, "rougeLsum_recall_stderr": 0.0029377295753792703}}, "4": {"tldr_en": {"bleu": 0.5600187004785872, "bleu_stderr": 0.034329899401054034, "rouge1_fmeasure": 0.054359788577698374, "rouge1_fmeasure_stderr": 0.001834775379938349, "rouge1_precision": 0.05049412311337655, "rouge1_precision_stderr": 0.0018775705845440543, "rouge1_recall": 0.07991007309327065, "rouge1_recall_stderr": 0.002748908637336395, "rouge2_fmeasure": 0.01267310048414024, "rouge2_fmeasure_stderr": 0.0006260839320397936, "rouge2_precision": 0.01142264448143536, "rouge2_precision_stderr": 0.0006812961216581213, "rouge2_recall": 0.02008142422629143, "rouge2_recall_stderr": 0.0010993317750224779, "rougeL_fmeasure": 0.04187829090557222, "rougeL_fmeasure_stderr": 0.001389559278464916, "rougeL_precision": 0.038958076750695146, "rougeL_precision_stderr": 0.0014576150060918941, "rougeL_recall": 0.0629580148095348, "rougeL_recall_stderr": 0.002201566030491766, "rougeLsum_fmeasure": 0.05081727487259366, "rougeLsum_fmeasure_stderr": 0.0017142240431756424, "rougeLsum_precision": 0.047269915920336, "rougeLsum_precision_stderr": 0.0017705102362424203, "rougeLsum_recall": 0.07497632668531767, "rougeLsum_recall_stderr": 0.002589565865103579}}, "5": {"tldr_en": {"bleu": 1.5732639330570601e-06, "bleu_stderr": 2.4552136635733976e-06, "rouge1_fmeasure": 0.008780004115444839, "rouge1_fmeasure_stderr": 0.0008275079317754659, "rouge1_precision": 0.008542236722467199, "rouge1_precision_stderr": 0.0008747857935002281, "rouge1_recall": 0.012984770525311046, "rouge1_recall_stderr": 0.0012451725161618523, "rouge2_fmeasure": 0.002147119319405267, "rouge2_fmeasure_stderr": 0.00027623822033813396, "rouge2_precision": 0.0020271291197224393, "rouge2_precision_stderr": 0.00029783255687868237, "rouge2_recall": 0.0032641829590180815, "rouge2_recall_stderr": 0.00043766038421818427, "rougeL_fmeasure": 0.006842107389797507, "rougeL_fmeasure_stderr": 0.0006383034087266817, "rougeL_precision": 0.006544489279360034, "rougeL_precision_stderr": 0.0006478926275365654, "rougeL_recall": 0.010413387705542463, "rougeL_recall_stderr": 0.001029849799449182, "rougeLsum_fmeasure": 0.00814217266519704, "rougeLsum_fmeasure_stderr": 0.0007660037005158112, "rougeLsum_precision": 0.00789904438059008, "rougeLsum_precision_stderr": 0.0008062204339147679, "rougeLsum_recall": 0.012088341751228384, "rougeLsum_recall_stderr": 0.0011647198145518685}}}, "e2e_nlg_cleaned": {"0": {"generate_text_restaurant": {"bleu": 0.2918074829245511, "bleu_stderr": 0.03334721114977758, "rouge1_fmeasure": 0.1600243176507681, "rouge1_fmeasure_stderr": 0.0010733278398958096, "rouge1_precision": 0.12561487707508034, "rouge1_precision_stderr": 0.0013825187846368096, "rouge1_recall": 0.25305668157788624, "rouge1_recall_stderr": 0.0013315623225900873, "rouge2_fmeasure": 0.033687666872503644, "rouge2_fmeasure_stderr": 0.0005133425312314838, "rouge2_precision": 0.026668315991920277, "rouge2_precision_stderr": 0.0005306038751357611, "rouge2_recall": 0.05412553888370808, "rouge2_recall_stderr": 0.0007946156915472112, "rougeL_fmeasure": 0.15622553271845338, "rougeL_fmeasure_stderr": 0.0010043221686026895, "rougeL_precision": 0.12177712671014992, "rougeL_precision_stderr": 0.001240068402500465, "rougeL_recall": 0.24840753894632903, "rougeL_recall_stderr": 0.001318527645790754, "rougeLsum_fmeasure": 0.13789602591344347, "rougeLsum_fmeasure_stderr": 0.0009870480569216915, "rougeLsum_precision": 0.10841189553256889, "rougeLsum_precision_stderr": 0.0012618615939597366, "rougeLsum_recall": 0.21839285442742476, "rougeLsum_recall_stderr": 0.0012454131726385412}}, "1": {"generate_text_restaurant": {"bleu": 9.67590369741921, "bleu_stderr": 0.09901656476390079, "rouge1_fmeasure": 0.39957774138885144, "rouge1_fmeasure_stderr": 0.0022278577374241887, "rouge1_precision": 0.45349765710233814, "rouge1_precision_stderr": 0.0029408652429081406, "rouge1_recall": 0.39709628674803366, "rouge1_recall_stderr": 0.002799520545788188, "rouge2_fmeasure": 0.16855819384473125, "rouge2_fmeasure_stderr": 0.001773629505406216, "rouge2_precision": 0.19448143773761467, "rouge2_precision_stderr": 0.002225394284814737, "rouge2_recall": 0.16778603132296563, "rouge2_recall_stderr": 0.0019654958159185396, "rougeL_fmeasure": 0.2890014884493394, "rougeL_fmeasure_stderr": 0.0018207916834884527, "rougeL_precision": 0.33043423858845145, "rougeL_precision_stderr": 0.0024856924454187785, "rougeL_recall": 0.2872561015612187, "rougeL_recall_stderr": 0.0022337982582298155, "rougeLsum_fmeasure": 0.3277959949146152, "rougeLsum_fmeasure_stderr": 0.0021102604579282852, "rougeLsum_precision": 0.3738597805796585, "rougeLsum_precision_stderr": 0.002773351046379512, "rougeLsum_recall": 0.3249821747731812, "rougeLsum_recall_stderr": 0.002515405285337629}}, "2": {"generate_text_restaurant": {"bleu": 10.91262584267496, "bleu_stderr": 0.12763668087089558, "rouge1_fmeasure": 0.4158184072582917, "rouge1_fmeasure_stderr": 0.0021875821824657578, "rouge1_precision": 0.4745007823219277, "rouge1_precision_stderr": 0.003096779743530949, "rouge1_recall": 0.41114421373702703, "rouge1_recall_stderr": 0.002795411314326619, "rouge2_fmeasure": 0.18538437537983646, "rouge2_fmeasure_stderr": 0.001799521509176773, "rouge2_precision": 0.21433784297140893, "rouge2_precision_stderr": 0.002331836767735763, "rouge2_recall": 0.18401740949966608, "rouge2_recall_stderr": 0.002021878491927928, "rougeL_fmeasure": 0.30338837358825094, "rougeL_fmeasure_stderr": 0.001840114059497487, "rougeL_precision": 0.34796254903830837, "rougeL_precision_stderr": 0.002621056636644695, "rougeL_recall": 0.2999843023857228, "rougeL_recall_stderr": 0.002269391756360346, "rougeLsum_fmeasure": 0.3438444001935511, "rougeLsum_fmeasure_stderr": 0.0020872281136323037, "rougeLsum_precision": 0.39290245826296444, "rougeLsum_precision_stderr": 0.0028605438328826085, "rougeLsum_recall": 0.3399505800342592, "rougeLsum_recall_stderr": 0.0025542752245032124}}, "3": {"generate_text_restaurant": {"bleu": 11.413052941435677, "bleu_stderr": 0.20476608953978004, "rouge1_fmeasure": 0.42003617329938386, "rouge1_fmeasure_stderr": 0.002154444681203657, "rouge1_precision": 0.4836426637277247, "rouge1_precision_stderr": 0.003041696429497936, "rouge1_recall": 0.4077462553545315, "rouge1_recall_stderr": 0.002698117142879397, "rouge2_fmeasure": 0.19219003937378554, "rouge2_fmeasure_stderr": 0.0018883864751046853, "rouge2_precision": 0.22368030301486044, "rouge2_precision_stderr": 0.0024069791642568504, "rouge2_recall": 0.18735894030210265, "rouge2_recall_stderr": 0.0020871489281372663, "rougeL_fmeasure": 0.31324626443128384, "rougeL_fmeasure_stderr": 0.0019216880227233767, "rougeL_precision": 0.36189364939827273, "rougeL_precision_stderr": 0.0026889223009721307, "rougeL_recall": 0.30429571782108805, "rougeL_recall_stderr": 0.0023058551990661367, "rougeLsum_fmeasure": 0.35143133030157764, "rougeLsum_fmeasure_stderr": 0.002142111791450537, "rougeLsum_precision": 0.40514213772293617, "rougeLsum_precision_stderr": 0.0029172807069507275, "rougeLsum_recall": 0.341129701516605, "rougeLsum_recall_stderr": 0.0025465447925427515}}, "4": {"generate_text_restaurant": {"bleu": 11.29834992541636, "bleu_stderr": 0.21750627906833597, "rouge1_fmeasure": 0.41733074609230864, "rouge1_fmeasure_stderr": 0.0021862637249920026, "rouge1_precision": 0.48699136269149845, "rouge1_precision_stderr": 0.0030725910034362365, "rouge1_recall": 0.39873667481511255, "rouge1_recall_stderr": 0.002641833719734696, "rouge2_fmeasure": 0.19240159571656937, "rouge2_fmeasure_stderr": 0.0018744046004783308, "rouge2_precision": 0.22696779992360078, "rouge2_precision_stderr": 0.002388090320486047, "rouge2_recall": 0.18435752956149218, "rouge2_recall_stderr": 0.002022279759231637, "rougeL_fmeasure": 0.31456390703859133, "rougeL_fmeasure_stderr": 0.0019506135524216602, "rougeL_precision": 0.3681077705552102, "rougeL_precision_stderr": 0.0027056437550490925, "rougeL_recall": 0.3004900799799888, "rougeL_recall_stderr": 0.002254464504837015, "rougeLsum_fmeasure": 0.35140686520524467, "rougeLsum_fmeasure_stderr": 0.002186992225871175, "rougeLsum_precision": 0.41003964030862644, "rougeLsum_precision_stderr": 0.002940274288350555, "rougeLsum_recall": 0.3361130546577144, "rougeLsum_recall_stderr": 0.0025408101507360696}}, "5": {"generate_text_restaurant": {"bleu": 11.269628945064296, "bleu_stderr": 0.15756104774510044, "rouge1_fmeasure": 0.41656475829237966, "rouge1_fmeasure_stderr": 0.0021719641372564386, "rouge1_precision": 0.4903478553495967, "rouge1_precision_stderr": 0.00308746448334887, "rouge1_recall": 0.39563780199750853, "rouge1_recall_stderr": 0.002621989888992289, "rouge2_fmeasure": 0.19301313468237216, "rouge2_fmeasure_stderr": 0.0018394274162893405, "rouge2_precision": 0.23014095309503432, "rouge2_precision_stderr": 0.0023899391576289145, "rouge2_recall": 0.18355072919581486, "rouge2_recall_stderr": 0.001968342781813886, "rougeL_fmeasure": 0.3150475005541494, "rougeL_fmeasure_stderr": 0.0019395006953993484, "rougeL_precision": 0.3721457131973453, "rougeL_precision_stderr": 0.0027221328586820265, "rougeL_recall": 0.29906088364914196, "rougeL_recall_stderr": 0.0022373123686497648, "rougeLsum_fmeasure": 0.3521891699869018, "rougeLsum_fmeasure_stderr": 0.0021596292682356695, "rougeLsum_precision": 0.41476921512799597, "rougeLsum_precision_stderr": 0.0029434738207341333, "rougeLsum_recall": 0.33464526234291403, "rougeLsum_recall_stderr": 0.002498553893092118}}}, "gem_xsum": {"0": {"article_DOC_summary": {"bleu": 1.7452599273208484, "bleu_stderr": 0.0710626335421352, "rouge1_fmeasure": 0.2032531810901225, "rouge1_fmeasure_stderr": 0.002372876068699817, "rouge1_precision": 0.14818700118728248, "rouge1_precision_stderr": 0.0018676651463230222, "rouge1_recall": 0.34586554355363025, "rouge1_recall_stderr": 0.0041641658204401725, "rouge2_fmeasure": 0.04347043245633625, "rouge2_fmeasure_stderr": 0.001442207400667641, "rouge2_precision": 0.03119345685386172, "rouge2_precision_stderr": 0.0010373843022792317, "rouge2_recall": 0.07667587881403153, "rouge2_recall_stderr": 0.002653660348427432, "rougeL_fmeasure": 0.1528350870269447, "rougeL_fmeasure_stderr": 0.0017715223191776047, "rougeL_precision": 0.11119723252661128, "rougeL_precision_stderr": 0.0013663860116280308, "rougeL_recall": 0.2616100987791669, "rougeL_recall_stderr": 0.003275713171944408, "rougeLsum_fmeasure": 0.16043210769047098, "rougeLsum_fmeasure_stderr": 0.0020055581696195052, "rougeLsum_precision": 0.11658163341774634, "rougeLsum_precision_stderr": 0.0015235968275521983, "rougeLsum_recall": 0.275178892490744, "rougeLsum_recall_stderr": 0.0036968416298391823}}, "1": {"article_DOC_summary": {"bleu": 1.1604575025128918, "bleu_stderr": 0.0877746649601935, "rouge1_fmeasure": 0.16834116026562992, "rouge1_fmeasure_stderr": 0.0023007208182279906, "rouge1_precision": 0.11944768188603072, "rouge1_precision_stderr": 0.0017101747872860968, "rouge1_recall": 0.2967764173333572, "rouge1_recall_stderr": 0.003944563464315701, "rouge2_fmeasure": 0.030277850873655133, "rouge2_fmeasure_stderr": 0.0012385139115491765, "rouge2_precision": 0.021232879052259627, "rouge2_precision_stderr": 0.0008714791712295506, "rouge2_recall": 0.05526393564583485, "rouge2_recall_stderr": 0.0023240893343291387, "rougeL_fmeasure": 0.12978985120588027, "rougeL_fmeasure_stderr": 0.0017321643764184145, "rougeL_precision": 0.09192594935218694, "rougeL_precision_stderr": 0.0012769463345594439, "rougeL_recall": 0.23019911126154574, "rougeL_recall_stderr": 0.003091431421711302, "rougeLsum_fmeasure": 0.13564693066989447, "rougeLsum_fmeasure_stderr": 0.0018863533793062786, "rougeLsum_precision": 0.09602849667357606, "rougeLsum_precision_stderr": 0.001386138103889359, "rougeLsum_recall": 0.2407455765901895, "rougeLsum_recall_stderr": 0.003358547528460653}}, "2": {"article_DOC_summary": {"bleu": 1.1805459815971808, "bleu_stderr": 0.08801398952015627, "rouge1_fmeasure": 0.16960096474510714, "rouge1_fmeasure_stderr": 0.002303219538316631, "rouge1_precision": 0.12017928149855549, "rouge1_precision_stderr": 0.0017155806595076542, "rouge1_recall": 0.3000656582759754, "rouge1_recall_stderr": 0.003942220611475929, "rouge2_fmeasure": 0.03015426920788573, "rouge2_fmeasure_stderr": 0.001263243793733084, "rouge2_precision": 0.02115270486812927, "rouge2_precision_stderr": 0.0008856577807583306, "rouge2_recall": 0.054787883745542026, "rouge2_recall_stderr": 0.0023745228517959276, "rougeL_fmeasure": 0.131124075957363, "rougeL_fmeasure_stderr": 0.0017424293214564424, "rougeL_precision": 0.0927144051400674, "rougeL_precision_stderr": 0.0012857006594715614, "rougeL_recall": 0.23362729816209124, "rougeL_recall_stderr": 0.0031066367525268844, "rougeLsum_fmeasure": 0.1367233861657065, "rougeLsum_fmeasure_stderr": 0.0018905430841393567, "rougeLsum_precision": 0.09669042239875854, "rougeLsum_precision_stderr": 0.0013887653900610069, "rougeLsum_recall": 0.24335819875183146, "rougeLsum_recall_stderr": 0.003364341261182289}}, "3": {"article_DOC_summary": {"bleu": 1.2117893477080361, "bleu_stderr": 0.1149778380988921, "rouge1_fmeasure": 0.15889837582063066, "rouge1_fmeasure_stderr": 0.0024042734536394335, "rouge1_precision": 0.11536457953211869, "rouge1_precision_stderr": 0.0019139955193594944, "rouge1_recall": 0.2768098516237596, "rouge1_recall_stderr": 0.004205225171304428, "rouge2_fmeasure": 0.028265095806897757, "rouge2_fmeasure_stderr": 0.0012619861741006862, "rouge2_precision": 0.020015331184403896, "rouge2_precision_stderr": 0.0009021836723852284, "rouge2_recall": 0.05127981526296904, "rouge2_recall_stderr": 0.002364022279427071, "rougeL_fmeasure": 0.12453970375141683, "rougeL_fmeasure_stderr": 0.0018237860596141172, "rougeL_precision": 0.08997967190382496, "rougeL_precision_stderr": 0.0014235921883162194, "rougeL_recall": 0.21857370135939078, "rougeL_recall_stderr": 0.003310622426465869, "rougeLsum_fmeasure": 0.1276545465727294, "rougeLsum_fmeasure_stderr": 0.001972377201117121, "rougeLsum_precision": 0.09224753114626984, "rougeLsum_precision_stderr": 0.0015272098267094347, "rougeLsum_recall": 0.2238960772414025, "rougeLsum_recall_stderr": 0.003566353512085928}}, "4": {"article_DOC_summary": {"bleu": 0.5192386395102333, "bleu_stderr": 0.13002341898619957, "rouge1_fmeasure": 0.04400316033856543, "rouge1_fmeasure_stderr": 0.002490349738342324, "rouge1_precision": 0.03793206438705544, "rouge1_precision_stderr": 0.0024366223683722097, "rouge1_recall": 0.06987736475836392, "rouge1_recall_stderr": 0.004064805072138278, "rouge2_fmeasure": 0.008063468290742252, "rouge2_fmeasure_stderr": 0.0007981358698909123, "rouge2_precision": 0.006979598507473323, "rouge2_precision_stderr": 0.0008901794799735351, "rouge2_recall": 0.013423754512870454, "rouge2_recall_stderr": 0.0013418442840833563, "rougeL_fmeasure": 0.034178653761551166, "rougeL_fmeasure_stderr": 0.0019416567180353596, "rougeL_precision": 0.030091382509093004, "rougeL_precision_stderr": 0.0020782922503738504, "rougeL_recall": 0.05458303198935503, "rougeL_recall_stderr": 0.003197618040713862, "rougeLsum_fmeasure": 0.036128933672296124, "rougeLsum_fmeasure_stderr": 0.0020726114306530577, "rougeLsum_precision": 0.03177113843460543, "rougeLsum_precision_stderr": 0.002181794130601312, "rougeLsum_recall": 0.05751126056018169, "rougeLsum_recall_stderr": 0.0033952763901923607}}, "5": {"article_DOC_summary": {"bleu": 9.778811073469078e-39, "bleu_stderr": 1.521718069448933e-33, "rouge1_fmeasure": 0.0022218780996264655, "rouge1_fmeasure_stderr": 0.0006107839354088714, "rouge1_precision": 0.0025333563065761205, "rouge1_precision_stderr": 0.0007200688467666427, "rouge1_recall": 0.0020475737491748073, "rouge1_recall_stderr": 0.0005527163320441051, "rouge2_fmeasure": 0.0001768729417917658, "rouge2_fmeasure_stderr": 8.939544334231789e-05, "rouge2_precision": 0.0002052993867644256, "rouge2_precision_stderr": 0.00010263779675566133, "rouge2_recall": 0.00015805735145357788, "rouge2_recall_stderr": 8.125011510560468e-05, "rougeL_fmeasure": 0.0016699276460120504, "rougeL_fmeasure_stderr": 0.0004477526803373312, "rougeL_precision": 0.0018767772604883414, "rougeL_precision_stderr": 0.0005169601507247577, "rougeL_recall": 0.0015653832711829878, "rougeL_recall_stderr": 0.0004179534887542103, "rougeLsum_fmeasure": 0.0016709735398329094, "rougeLsum_fmeasure_stderr": 0.00044431903582722767, "rougeLsum_precision": 0.0018647403070412589, "rougeLsum_precision_stderr": 0.0005045589409446839, "rougeLsum_recall": 0.001573237052621675, "rougeLsum_recall_stderr": 0.0004187928988454184}}}}
2b855b11bc4seed3/evaluation/generation/slim.2b855b11bc4seed3_GEM-web_nlg_en_PALM_prompt_4.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": [
3
+ {
4
+ "task_name": "GEM/web_nlg_en",
5
+ "prompt_name": "PALM_prompt",
6
+ "bleu": 0.466035571971691,
7
+ "dataset_path": "GEM/web_nlg",
8
+ "dataset_name": "en",
9
+ "subset": null,
10
+ "bleu_stderr": 0.03473185881932128
11
+ },
12
+ {
13
+ "task_name": "GEM/web_nlg_en",
14
+ "prompt_name": "PALM_prompt",
15
+ "rouge1_precision": 0.06948874429007668,
16
+ "dataset_path": "GEM/web_nlg",
17
+ "dataset_name": "en",
18
+ "subset": null,
19
+ "rouge1_precision_stderr": 0.00126969268323737
20
+ },
21
+ {
22
+ "task_name": "GEM/web_nlg_en",
23
+ "prompt_name": "PALM_prompt",
24
+ "rouge1_recall": 0.35504271560944006,
25
+ "dataset_path": "GEM/web_nlg",
26
+ "dataset_name": "en",
27
+ "subset": null,
28
+ "rouge1_recall_stderr": 0.004806804977630298
29
+ },
30
+ {
31
+ "task_name": "GEM/web_nlg_en",
32
+ "prompt_name": "PALM_prompt",
33
+ "rouge1_fmeasure": 0.10915182875204862,
34
+ "dataset_path": "GEM/web_nlg",
35
+ "dataset_name": "en",
36
+ "subset": null,
37
+ "rouge1_fmeasure_stderr": 0.0017368992622529642
38
+ },
39
+ {
40
+ "task_name": "GEM/web_nlg_en",
41
+ "prompt_name": "PALM_prompt",
42
+ "rouge2_precision": 0.032080423637990074,
43
+ "dataset_path": "GEM/web_nlg",
44
+ "dataset_name": "en",
45
+ "subset": null,
46
+ "rouge2_precision_stderr": 0.0007748948128844898
47
+ },
48
+ {
49
+ "task_name": "GEM/web_nlg_en",
50
+ "prompt_name": "PALM_prompt",
51
+ "rouge2_recall": 0.1749610451479523,
52
+ "dataset_path": "GEM/web_nlg",
53
+ "dataset_name": "en",
54
+ "subset": null,
55
+ "rouge2_recall_stderr": 0.0035756989094432607
56
+ },
57
+ {
58
+ "task_name": "GEM/web_nlg_en",
59
+ "prompt_name": "PALM_prompt",
60
+ "rouge2_fmeasure": 0.05069482326429204,
61
+ "dataset_path": "GEM/web_nlg",
62
+ "dataset_name": "en",
63
+ "subset": null,
64
+ "rouge2_fmeasure_stderr": 0.0011084349852182986
65
+ },
66
+ {
67
+ "task_name": "GEM/web_nlg_en",
68
+ "prompt_name": "PALM_prompt",
69
+ "rougeL_precision": 0.06534721122278908,
70
+ "dataset_path": "GEM/web_nlg",
71
+ "dataset_name": "en",
72
+ "subset": null,
73
+ "rougeL_precision_stderr": 0.001160283231789772
74
+ },
75
+ {
76
+ "task_name": "GEM/web_nlg_en",
77
+ "prompt_name": "PALM_prompt",
78
+ "rougeL_recall": 0.33105657281482775,
79
+ "dataset_path": "GEM/web_nlg",
80
+ "dataset_name": "en",
81
+ "subset": null,
82
+ "rougeL_recall_stderr": 0.004330159547841201
83
+ },
84
+ {
85
+ "task_name": "GEM/web_nlg_en",
86
+ "prompt_name": "PALM_prompt",
87
+ "rougeL_fmeasure": 0.102624870740898,
88
+ "dataset_path": "GEM/web_nlg",
89
+ "dataset_name": "en",
90
+ "subset": null,
91
+ "rougeL_fmeasure_stderr": 0.0015972476492216417
92
+ },
93
+ {
94
+ "task_name": "GEM/web_nlg_en",
95
+ "prompt_name": "PALM_prompt",
96
+ "rougeLsum_precision": 0.06651082429118176,
97
+ "dataset_path": "GEM/web_nlg",
98
+ "dataset_name": "en",
99
+ "subset": null,
100
+ "rougeLsum_precision_stderr": 0.0012081627405732267
101
+ },
102
+ {
103
+ "task_name": "GEM/web_nlg_en",
104
+ "prompt_name": "PALM_prompt",
105
+ "rougeLsum_recall": 0.3391017904387808,
106
+ "dataset_path": "GEM/web_nlg",
107
+ "dataset_name": "en",
108
+ "subset": null,
109
+ "rougeLsum_recall_stderr": 0.004545927615205131
110
+ },
111
+ {
112
+ "task_name": "GEM/web_nlg_en",
113
+ "prompt_name": "PALM_prompt",
114
+ "rougeLsum_fmeasure": 0.10445663963047473,
115
+ "dataset_path": "GEM/web_nlg",
116
+ "dataset_name": "en",
117
+ "subset": null,
118
+ "rougeLsum_fmeasure_stderr": 0.001657522536981241
119
+ }
120
+ ],
121
+ "config": {
122
+ "model": "hf-causal",
123
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b11bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
124
+ "task_args": "",
125
+ "num_fewshot": 4,
126
+ "batch_size": 16,
127
+ "device": "cuda",
128
+ "use_cache": false,
129
+ "limit": 3000,
130
+ "bootstrap_iters": 10,
131
+ "seed": 1234
132
+ }
133
+ }
2b855b11bc4seed3/evaluation/generation/slim.2b855b11bc4seed3_GEM-web_nlg_en_PALM_prompt_5.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": [
3
+ {
4
+ "task_name": "GEM/web_nlg_en",
5
+ "prompt_name": "PALM_prompt",
6
+ "bleu": 0.47567398731333893,
7
+ "dataset_path": "GEM/web_nlg",
8
+ "dataset_name": "en",
9
+ "subset": null,
10
+ "bleu_stderr": 0.024264359754404757
11
+ },
12
+ {
13
+ "task_name": "GEM/web_nlg_en",
14
+ "prompt_name": "PALM_prompt",
15
+ "rouge1_precision": 0.0696852083209599,
16
+ "dataset_path": "GEM/web_nlg",
17
+ "dataset_name": "en",
18
+ "subset": null,
19
+ "rouge1_precision_stderr": 0.0013577090826179866
20
+ },
21
+ {
22
+ "task_name": "GEM/web_nlg_en",
23
+ "prompt_name": "PALM_prompt",
24
+ "rouge1_recall": 0.3612843042707264,
25
+ "dataset_path": "GEM/web_nlg",
26
+ "dataset_name": "en",
27
+ "subset": null,
28
+ "rouge1_recall_stderr": 0.004861238941500158
29
+ },
30
+ {
31
+ "task_name": "GEM/web_nlg_en",
32
+ "prompt_name": "PALM_prompt",
33
+ "rouge1_fmeasure": 0.10892336928116716,
34
+ "dataset_path": "GEM/web_nlg",
35
+ "dataset_name": "en",
36
+ "subset": null,
37
+ "rouge1_fmeasure_stderr": 0.0016854927070877707
38
+ },
39
+ {
40
+ "task_name": "GEM/web_nlg_en",
41
+ "prompt_name": "PALM_prompt",
42
+ "rouge2_precision": 0.0315904391098698,
43
+ "dataset_path": "GEM/web_nlg",
44
+ "dataset_name": "en",
45
+ "subset": null,
46
+ "rouge2_precision_stderr": 0.0007431193984384711
47
+ },
48
+ {
49
+ "task_name": "GEM/web_nlg_en",
50
+ "prompt_name": "PALM_prompt",
51
+ "rouge2_recall": 0.177085732300076,
52
+ "dataset_path": "GEM/web_nlg",
53
+ "dataset_name": "en",
54
+ "subset": null,
55
+ "rouge2_recall_stderr": 0.0035275863817017116
56
+ },
57
+ {
58
+ "task_name": "GEM/web_nlg_en",
59
+ "prompt_name": "PALM_prompt",
60
+ "rouge2_fmeasure": 0.05008036673402714,
61
+ "dataset_path": "GEM/web_nlg",
62
+ "dataset_name": "en",
63
+ "subset": null,
64
+ "rouge2_fmeasure_stderr": 0.0010613654934549248
65
+ },
66
+ {
67
+ "task_name": "GEM/web_nlg_en",
68
+ "prompt_name": "PALM_prompt",
69
+ "rougeL_precision": 0.0651341368207596,
70
+ "dataset_path": "GEM/web_nlg",
71
+ "dataset_name": "en",
72
+ "subset": null,
73
+ "rougeL_precision_stderr": 0.0012591521998187908
74
+ },
75
+ {
76
+ "task_name": "GEM/web_nlg_en",
77
+ "prompt_name": "PALM_prompt",
78
+ "rougeL_recall": 0.33426205604441794,
79
+ "dataset_path": "GEM/web_nlg",
80
+ "dataset_name": "en",
81
+ "subset": null,
82
+ "rougeL_recall_stderr": 0.004323619187529501
83
+ },
84
+ {
85
+ "task_name": "GEM/web_nlg_en",
86
+ "prompt_name": "PALM_prompt",
87
+ "rougeL_fmeasure": 0.10170780198533885,
88
+ "dataset_path": "GEM/web_nlg",
89
+ "dataset_name": "en",
90
+ "subset": null,
91
+ "rougeL_fmeasure_stderr": 0.001554714639457137
92
+ },
93
+ {
94
+ "task_name": "GEM/web_nlg_en",
95
+ "prompt_name": "PALM_prompt",
96
+ "rougeLsum_precision": 0.06639663258609937,
97
+ "dataset_path": "GEM/web_nlg",
98
+ "dataset_name": "en",
99
+ "subset": null,
100
+ "rougeLsum_precision_stderr": 0.001291170836697709
101
+ },
102
+ {
103
+ "task_name": "GEM/web_nlg_en",
104
+ "prompt_name": "PALM_prompt",
105
+ "rougeLsum_recall": 0.34334559580001384,
106
+ "dataset_path": "GEM/web_nlg",
107
+ "dataset_name": "en",
108
+ "subset": null,
109
+ "rougeLsum_recall_stderr": 0.0045556002073834055
110
+ },
111
+ {
112
+ "task_name": "GEM/web_nlg_en",
113
+ "prompt_name": "PALM_prompt",
114
+ "rougeLsum_fmeasure": 0.10372985125549432,
115
+ "dataset_path": "GEM/web_nlg",
116
+ "dataset_name": "en",
117
+ "subset": null,
118
+ "rougeLsum_fmeasure_stderr": 0.0016004866013084038
119
+ }
120
+ ],
121
+ "config": {
122
+ "model": "hf-causal",
123
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b11bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
124
+ "task_args": "",
125
+ "num_fewshot": 5,
126
+ "batch_size": 16,
127
+ "device": "cuda",
128
+ "use_cache": false,
129
+ "limit": 3000,
130
+ "bootstrap_iters": 10,
131
+ "seed": 1234
132
+ }
133
+ }
2b855b11bc4seed3/evaluation/generation/slim.2b855b11bc4seed3_GEM-wiki_lingua_en_tldr_en_5.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": [
3
+ {
4
+ "task_name": "GEM/wiki_lingua_en",
5
+ "prompt_name": "tldr_en",
6
+ "rouge1_precision": 0.008542236722467199,
7
+ "dataset_path": "GEM/wiki_lingua",
8
+ "dataset_name": "en",
9
+ "subset": null,
10
+ "rouge1_precision_stderr": 0.0008747857935002281
11
+ },
12
+ {
13
+ "task_name": "GEM/wiki_lingua_en",
14
+ "prompt_name": "tldr_en",
15
+ "rouge1_recall": 0.012984770525311046,
16
+ "dataset_path": "GEM/wiki_lingua",
17
+ "dataset_name": "en",
18
+ "subset": null,
19
+ "rouge1_recall_stderr": 0.0012451725161618523
20
+ },
21
+ {
22
+ "task_name": "GEM/wiki_lingua_en",
23
+ "prompt_name": "tldr_en",
24
+ "rouge1_fmeasure": 0.008780004115444839,
25
+ "dataset_path": "GEM/wiki_lingua",
26
+ "dataset_name": "en",
27
+ "subset": null,
28
+ "rouge1_fmeasure_stderr": 0.0008275079317754659
29
+ },
30
+ {
31
+ "task_name": "GEM/wiki_lingua_en",
32
+ "prompt_name": "tldr_en",
33
+ "rouge2_precision": 0.0020271291197224393,
34
+ "dataset_path": "GEM/wiki_lingua",
35
+ "dataset_name": "en",
36
+ "subset": null,
37
+ "rouge2_precision_stderr": 0.00029783255687868237
38
+ },
39
+ {
40
+ "task_name": "GEM/wiki_lingua_en",
41
+ "prompt_name": "tldr_en",
42
+ "rouge2_recall": 0.0032641829590180815,
43
+ "dataset_path": "GEM/wiki_lingua",
44
+ "dataset_name": "en",
45
+ "subset": null,
46
+ "rouge2_recall_stderr": 0.00043766038421818427
47
+ },
48
+ {
49
+ "task_name": "GEM/wiki_lingua_en",
50
+ "prompt_name": "tldr_en",
51
+ "rouge2_fmeasure": 0.002147119319405267,
52
+ "dataset_path": "GEM/wiki_lingua",
53
+ "dataset_name": "en",
54
+ "subset": null,
55
+ "rouge2_fmeasure_stderr": 0.00027623822033813396
56
+ },
57
+ {
58
+ "task_name": "GEM/wiki_lingua_en",
59
+ "prompt_name": "tldr_en",
60
+ "rougeL_precision": 0.006544489279360034,
61
+ "dataset_path": "GEM/wiki_lingua",
62
+ "dataset_name": "en",
63
+ "subset": null,
64
+ "rougeL_precision_stderr": 0.0006478926275365654
65
+ },
66
+ {
67
+ "task_name": "GEM/wiki_lingua_en",
68
+ "prompt_name": "tldr_en",
69
+ "rougeL_recall": 0.010413387705542463,
70
+ "dataset_path": "GEM/wiki_lingua",
71
+ "dataset_name": "en",
72
+ "subset": null,
73
+ "rougeL_recall_stderr": 0.001029849799449182
74
+ },
75
+ {
76
+ "task_name": "GEM/wiki_lingua_en",
77
+ "prompt_name": "tldr_en",
78
+ "rougeL_fmeasure": 0.006842107389797507,
79
+ "dataset_path": "GEM/wiki_lingua",
80
+ "dataset_name": "en",
81
+ "subset": null,
82
+ "rougeL_fmeasure_stderr": 0.0006383034087266817
83
+ },
84
+ {
85
+ "task_name": "GEM/wiki_lingua_en",
86
+ "prompt_name": "tldr_en",
87
+ "rougeLsum_precision": 0.00789904438059008,
88
+ "dataset_path": "GEM/wiki_lingua",
89
+ "dataset_name": "en",
90
+ "subset": null,
91
+ "rougeLsum_precision_stderr": 0.0008062204339147679
92
+ },
93
+ {
94
+ "task_name": "GEM/wiki_lingua_en",
95
+ "prompt_name": "tldr_en",
96
+ "rougeLsum_recall": 0.012088341751228384,
97
+ "dataset_path": "GEM/wiki_lingua",
98
+ "dataset_name": "en",
99
+ "subset": null,
100
+ "rougeLsum_recall_stderr": 0.0011647198145518685
101
+ },
102
+ {
103
+ "task_name": "GEM/wiki_lingua_en",
104
+ "prompt_name": "tldr_en",
105
+ "rougeLsum_fmeasure": 0.00814217266519704,
106
+ "dataset_path": "GEM/wiki_lingua",
107
+ "dataset_name": "en",
108
+ "subset": null,
109
+ "rougeLsum_fmeasure_stderr": 0.0007660037005158112
110
+ },
111
+ {
112
+ "task_name": "GEM/wiki_lingua_en",
113
+ "prompt_name": "tldr_en",
114
+ "bleu": 1.5732639330570601e-06,
115
+ "dataset_path": "GEM/wiki_lingua",
116
+ "dataset_name": "en",
117
+ "subset": null,
118
+ "bleu_stderr": 2.4552136635733976e-06
119
+ }
120
+ ],
121
+ "config": {
122
+ "model": "hf-causal",
123
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b11bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
124
+ "task_args": "",
125
+ "num_fewshot": 5,
126
+ "batch_size": 16,
127
+ "device": "cuda",
128
+ "use_cache": false,
129
+ "limit": 3000,
130
+ "bootstrap_iters": 10,
131
+ "seed": 1234
132
+ }
133
+ }
2b855b11bc4seed3/evaluation/generation/slim.2b855b11bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_4.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": [
3
+ {
4
+ "task_name": "e2e_nlg_cleaned",
5
+ "prompt_name": "generate_text_restaurant",
6
+ "bleu": 11.29834992541636,
7
+ "dataset_path": "e2e_nlg_cleaned",
8
+ "dataset_name": null,
9
+ "subset": null,
10
+ "bleu_stderr": 0.21750627906833597
11
+ },
12
+ {
13
+ "task_name": "e2e_nlg_cleaned",
14
+ "prompt_name": "generate_text_restaurant",
15
+ "rouge1_precision": 0.48699136269149845,
16
+ "dataset_path": "e2e_nlg_cleaned",
17
+ "dataset_name": null,
18
+ "subset": null,
19
+ "rouge1_precision_stderr": 0.0030725910034362365
20
+ },
21
+ {
22
+ "task_name": "e2e_nlg_cleaned",
23
+ "prompt_name": "generate_text_restaurant",
24
+ "rouge1_recall": 0.39873667481511255,
25
+ "dataset_path": "e2e_nlg_cleaned",
26
+ "dataset_name": null,
27
+ "subset": null,
28
+ "rouge1_recall_stderr": 0.002641833719734696
29
+ },
30
+ {
31
+ "task_name": "e2e_nlg_cleaned",
32
+ "prompt_name": "generate_text_restaurant",
33
+ "rouge1_fmeasure": 0.41733074609230864,
34
+ "dataset_path": "e2e_nlg_cleaned",
35
+ "dataset_name": null,
36
+ "subset": null,
37
+ "rouge1_fmeasure_stderr": 0.0021862637249920026
38
+ },
39
+ {
40
+ "task_name": "e2e_nlg_cleaned",
41
+ "prompt_name": "generate_text_restaurant",
42
+ "rouge2_precision": 0.22696779992360078,
43
+ "dataset_path": "e2e_nlg_cleaned",
44
+ "dataset_name": null,
45
+ "subset": null,
46
+ "rouge2_precision_stderr": 0.002388090320486047
47
+ },
48
+ {
49
+ "task_name": "e2e_nlg_cleaned",
50
+ "prompt_name": "generate_text_restaurant",
51
+ "rouge2_recall": 0.18435752956149218,
52
+ "dataset_path": "e2e_nlg_cleaned",
53
+ "dataset_name": null,
54
+ "subset": null,
55
+ "rouge2_recall_stderr": 0.002022279759231637
56
+ },
57
+ {
58
+ "task_name": "e2e_nlg_cleaned",
59
+ "prompt_name": "generate_text_restaurant",
60
+ "rouge2_fmeasure": 0.19240159571656937,
61
+ "dataset_path": "e2e_nlg_cleaned",
62
+ "dataset_name": null,
63
+ "subset": null,
64
+ "rouge2_fmeasure_stderr": 0.0018744046004783308
65
+ },
66
+ {
67
+ "task_name": "e2e_nlg_cleaned",
68
+ "prompt_name": "generate_text_restaurant",
69
+ "rougeL_precision": 0.3681077705552102,
70
+ "dataset_path": "e2e_nlg_cleaned",
71
+ "dataset_name": null,
72
+ "subset": null,
73
+ "rougeL_precision_stderr": 0.0027056437550490925
74
+ },
75
+ {
76
+ "task_name": "e2e_nlg_cleaned",
77
+ "prompt_name": "generate_text_restaurant",
78
+ "rougeL_recall": 0.3004900799799888,
79
+ "dataset_path": "e2e_nlg_cleaned",
80
+ "dataset_name": null,
81
+ "subset": null,
82
+ "rougeL_recall_stderr": 0.002254464504837015
83
+ },
84
+ {
85
+ "task_name": "e2e_nlg_cleaned",
86
+ "prompt_name": "generate_text_restaurant",
87
+ "rougeL_fmeasure": 0.31456390703859133,
88
+ "dataset_path": "e2e_nlg_cleaned",
89
+ "dataset_name": null,
90
+ "subset": null,
91
+ "rougeL_fmeasure_stderr": 0.0019506135524216602
92
+ },
93
+ {
94
+ "task_name": "e2e_nlg_cleaned",
95
+ "prompt_name": "generate_text_restaurant",
96
+ "rougeLsum_precision": 0.41003964030862644,
97
+ "dataset_path": "e2e_nlg_cleaned",
98
+ "dataset_name": null,
99
+ "subset": null,
100
+ "rougeLsum_precision_stderr": 0.002940274288350555
101
+ },
102
+ {
103
+ "task_name": "e2e_nlg_cleaned",
104
+ "prompt_name": "generate_text_restaurant",
105
+ "rougeLsum_recall": 0.3361130546577144,
106
+ "dataset_path": "e2e_nlg_cleaned",
107
+ "dataset_name": null,
108
+ "subset": null,
109
+ "rougeLsum_recall_stderr": 0.0025408101507360696
110
+ },
111
+ {
112
+ "task_name": "e2e_nlg_cleaned",
113
+ "prompt_name": "generate_text_restaurant",
114
+ "rougeLsum_fmeasure": 0.35140686520524467,
115
+ "dataset_path": "e2e_nlg_cleaned",
116
+ "dataset_name": null,
117
+ "subset": null,
118
+ "rougeLsum_fmeasure_stderr": 0.002186992225871175
119
+ }
120
+ ],
121
+ "config": {
122
+ "model": "hf-causal",
123
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b11bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
124
+ "task_args": "",
125
+ "num_fewshot": 4,
126
+ "batch_size": 16,
127
+ "device": "cuda",
128
+ "use_cache": false,
129
+ "limit": 3000,
130
+ "bootstrap_iters": 10,
131
+ "seed": 1234
132
+ }
133
+ }
2b855b11bc4seed3/evaluation/generation/slim.2b855b11bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_5.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": [
3
+ {
4
+ "task_name": "e2e_nlg_cleaned",
5
+ "prompt_name": "generate_text_restaurant",
6
+ "bleu": 11.269628945064296,
7
+ "dataset_path": "e2e_nlg_cleaned",
8
+ "dataset_name": null,
9
+ "subset": null,
10
+ "bleu_stderr": 0.15756104774510044
11
+ },
12
+ {
13
+ "task_name": "e2e_nlg_cleaned",
14
+ "prompt_name": "generate_text_restaurant",
15
+ "rouge1_precision": 0.4903478553495967,
16
+ "dataset_path": "e2e_nlg_cleaned",
17
+ "dataset_name": null,
18
+ "subset": null,
19
+ "rouge1_precision_stderr": 0.00308746448334887
20
+ },
21
+ {
22
+ "task_name": "e2e_nlg_cleaned",
23
+ "prompt_name": "generate_text_restaurant",
24
+ "rouge1_recall": 0.39563780199750853,
25
+ "dataset_path": "e2e_nlg_cleaned",
26
+ "dataset_name": null,
27
+ "subset": null,
28
+ "rouge1_recall_stderr": 0.002621989888992289
29
+ },
30
+ {
31
+ "task_name": "e2e_nlg_cleaned",
32
+ "prompt_name": "generate_text_restaurant",
33
+ "rouge1_fmeasure": 0.41656475829237966,
34
+ "dataset_path": "e2e_nlg_cleaned",
35
+ "dataset_name": null,
36
+ "subset": null,
37
+ "rouge1_fmeasure_stderr": 0.0021719641372564386
38
+ },
39
+ {
40
+ "task_name": "e2e_nlg_cleaned",
41
+ "prompt_name": "generate_text_restaurant",
42
+ "rouge2_precision": 0.23014095309503432,
43
+ "dataset_path": "e2e_nlg_cleaned",
44
+ "dataset_name": null,
45
+ "subset": null,
46
+ "rouge2_precision_stderr": 0.0023899391576289145
47
+ },
48
+ {
49
+ "task_name": "e2e_nlg_cleaned",
50
+ "prompt_name": "generate_text_restaurant",
51
+ "rouge2_recall": 0.18355072919581486,
52
+ "dataset_path": "e2e_nlg_cleaned",
53
+ "dataset_name": null,
54
+ "subset": null,
55
+ "rouge2_recall_stderr": 0.001968342781813886
56
+ },
57
+ {
58
+ "task_name": "e2e_nlg_cleaned",
59
+ "prompt_name": "generate_text_restaurant",
60
+ "rouge2_fmeasure": 0.19301313468237216,
61
+ "dataset_path": "e2e_nlg_cleaned",
62
+ "dataset_name": null,
63
+ "subset": null,
64
+ "rouge2_fmeasure_stderr": 0.0018394274162893405
65
+ },
66
+ {
67
+ "task_name": "e2e_nlg_cleaned",
68
+ "prompt_name": "generate_text_restaurant",
69
+ "rougeL_precision": 0.3721457131973453,
70
+ "dataset_path": "e2e_nlg_cleaned",
71
+ "dataset_name": null,
72
+ "subset": null,
73
+ "rougeL_precision_stderr": 0.0027221328586820265
74
+ },
75
+ {
76
+ "task_name": "e2e_nlg_cleaned",
77
+ "prompt_name": "generate_text_restaurant",
78
+ "rougeL_recall": 0.29906088364914196,
79
+ "dataset_path": "e2e_nlg_cleaned",
80
+ "dataset_name": null,
81
+ "subset": null,
82
+ "rougeL_recall_stderr": 0.0022373123686497648
83
+ },
84
+ {
85
+ "task_name": "e2e_nlg_cleaned",
86
+ "prompt_name": "generate_text_restaurant",
87
+ "rougeL_fmeasure": 0.3150475005541494,
88
+ "dataset_path": "e2e_nlg_cleaned",
89
+ "dataset_name": null,
90
+ "subset": null,
91
+ "rougeL_fmeasure_stderr": 0.0019395006953993484
92
+ },
93
+ {
94
+ "task_name": "e2e_nlg_cleaned",
95
+ "prompt_name": "generate_text_restaurant",
96
+ "rougeLsum_precision": 0.41476921512799597,
97
+ "dataset_path": "e2e_nlg_cleaned",
98
+ "dataset_name": null,
99
+ "subset": null,
100
+ "rougeLsum_precision_stderr": 0.0029434738207341333
101
+ },
102
+ {
103
+ "task_name": "e2e_nlg_cleaned",
104
+ "prompt_name": "generate_text_restaurant",
105
+ "rougeLsum_recall": 0.33464526234291403,
106
+ "dataset_path": "e2e_nlg_cleaned",
107
+ "dataset_name": null,
108
+ "subset": null,
109
+ "rougeLsum_recall_stderr": 0.002498553893092118
110
+ },
111
+ {
112
+ "task_name": "e2e_nlg_cleaned",
113
+ "prompt_name": "generate_text_restaurant",
114
+ "rougeLsum_fmeasure": 0.3521891699869018,
115
+ "dataset_path": "e2e_nlg_cleaned",
116
+ "dataset_name": null,
117
+ "subset": null,
118
+ "rougeLsum_fmeasure_stderr": 0.0021596292682356695
119
+ }
120
+ ],
121
+ "config": {
122
+ "model": "hf-causal",
123
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b11bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
124
+ "task_args": "",
125
+ "num_fewshot": 5,
126
+ "batch_size": 16,
127
+ "device": "cuda",
128
+ "use_cache": false,
129
+ "limit": 3000,
130
+ "bootstrap_iters": 10,
131
+ "seed": 1234
132
+ }
133
+ }
2b855b11bc4seed3/evaluation/generation/slim.2b855b11bc4seed3_gem_xsum_article_DOC_summary_4.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": [
3
+ {
4
+ "task_name": "gem_xsum",
5
+ "prompt_name": "article_DOC_summary",
6
+ "rouge1_precision": 0.03793206438705544,
7
+ "dataset_path": "GEM/xsum",
8
+ "dataset_name": null,
9
+ "subset": "",
10
+ "rouge1_precision_stderr": 0.0024366223683722097
11
+ },
12
+ {
13
+ "task_name": "gem_xsum",
14
+ "prompt_name": "article_DOC_summary",
15
+ "rouge1_recall": 0.06987736475836392,
16
+ "dataset_path": "GEM/xsum",
17
+ "dataset_name": null,
18
+ "subset": "",
19
+ "rouge1_recall_stderr": 0.004064805072138278
20
+ },
21
+ {
22
+ "task_name": "gem_xsum",
23
+ "prompt_name": "article_DOC_summary",
24
+ "rouge1_fmeasure": 0.04400316033856543,
25
+ "dataset_path": "GEM/xsum",
26
+ "dataset_name": null,
27
+ "subset": "",
28
+ "rouge1_fmeasure_stderr": 0.002490349738342324
29
+ },
30
+ {
31
+ "task_name": "gem_xsum",
32
+ "prompt_name": "article_DOC_summary",
33
+ "rouge2_precision": 0.006979598507473323,
34
+ "dataset_path": "GEM/xsum",
35
+ "dataset_name": null,
36
+ "subset": "",
37
+ "rouge2_precision_stderr": 0.0008901794799735351
38
+ },
39
+ {
40
+ "task_name": "gem_xsum",
41
+ "prompt_name": "article_DOC_summary",
42
+ "rouge2_recall": 0.013423754512870454,
43
+ "dataset_path": "GEM/xsum",
44
+ "dataset_name": null,
45
+ "subset": "",
46
+ "rouge2_recall_stderr": 0.0013418442840833563
47
+ },
48
+ {
49
+ "task_name": "gem_xsum",
50
+ "prompt_name": "article_DOC_summary",
51
+ "rouge2_fmeasure": 0.008063468290742252,
52
+ "dataset_path": "GEM/xsum",
53
+ "dataset_name": null,
54
+ "subset": "",
55
+ "rouge2_fmeasure_stderr": 0.0007981358698909123
56
+ },
57
+ {
58
+ "task_name": "gem_xsum",
59
+ "prompt_name": "article_DOC_summary",
60
+ "rougeL_precision": 0.030091382509093004,
61
+ "dataset_path": "GEM/xsum",
62
+ "dataset_name": null,
63
+ "subset": "",
64
+ "rougeL_precision_stderr": 0.0020782922503738504
65
+ },
66
+ {
67
+ "task_name": "gem_xsum",
68
+ "prompt_name": "article_DOC_summary",
69
+ "rougeL_recall": 0.05458303198935503,
70
+ "dataset_path": "GEM/xsum",
71
+ "dataset_name": null,
72
+ "subset": "",
73
+ "rougeL_recall_stderr": 0.003197618040713862
74
+ },
75
+ {
76
+ "task_name": "gem_xsum",
77
+ "prompt_name": "article_DOC_summary",
78
+ "rougeL_fmeasure": 0.034178653761551166,
79
+ "dataset_path": "GEM/xsum",
80
+ "dataset_name": null,
81
+ "subset": "",
82
+ "rougeL_fmeasure_stderr": 0.0019416567180353596
83
+ },
84
+ {
85
+ "task_name": "gem_xsum",
86
+ "prompt_name": "article_DOC_summary",
87
+ "rougeLsum_precision": 0.03177113843460543,
88
+ "dataset_path": "GEM/xsum",
89
+ "dataset_name": null,
90
+ "subset": "",
91
+ "rougeLsum_precision_stderr": 0.002181794130601312
92
+ },
93
+ {
94
+ "task_name": "gem_xsum",
95
+ "prompt_name": "article_DOC_summary",
96
+ "rougeLsum_recall": 0.05751126056018169,
97
+ "dataset_path": "GEM/xsum",
98
+ "dataset_name": null,
99
+ "subset": "",
100
+ "rougeLsum_recall_stderr": 0.0033952763901923607
101
+ },
102
+ {
103
+ "task_name": "gem_xsum",
104
+ "prompt_name": "article_DOC_summary",
105
+ "rougeLsum_fmeasure": 0.036128933672296124,
106
+ "dataset_path": "GEM/xsum",
107
+ "dataset_name": null,
108
+ "subset": "",
109
+ "rougeLsum_fmeasure_stderr": 0.0020726114306530577
110
+ },
111
+ {
112
+ "task_name": "gem_xsum",
113
+ "prompt_name": "article_DOC_summary",
114
+ "bleu": 0.5192386395102333,
115
+ "dataset_path": "GEM/xsum",
116
+ "dataset_name": null,
117
+ "subset": "",
118
+ "bleu_stderr": 0.13002341898619957
119
+ }
120
+ ],
121
+ "config": {
122
+ "model": "hf-causal",
123
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b11bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
124
+ "task_args": "",
125
+ "num_fewshot": 4,
126
+ "batch_size": 16,
127
+ "device": "cuda",
128
+ "use_cache": false,
129
+ "limit": 3000,
130
+ "bootstrap_iters": 10,
131
+ "seed": 1234
132
+ }
133
+ }
2b855b11bc4seed3/evaluation/generation/slim.2b855b11bc4seed3_gem_xsum_article_DOC_summary_5.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": [
3
+ {
4
+ "task_name": "gem_xsum",
5
+ "prompt_name": "article_DOC_summary",
6
+ "rouge1_precision": 0.0025333563065761205,
7
+ "dataset_path": "GEM/xsum",
8
+ "dataset_name": null,
9
+ "subset": "",
10
+ "rouge1_precision_stderr": 0.0007200688467666427
11
+ },
12
+ {
13
+ "task_name": "gem_xsum",
14
+ "prompt_name": "article_DOC_summary",
15
+ "rouge1_recall": 0.0020475737491748073,
16
+ "dataset_path": "GEM/xsum",
17
+ "dataset_name": null,
18
+ "subset": "",
19
+ "rouge1_recall_stderr": 0.0005527163320441051
20
+ },
21
+ {
22
+ "task_name": "gem_xsum",
23
+ "prompt_name": "article_DOC_summary",
24
+ "rouge1_fmeasure": 0.0022218780996264655,
25
+ "dataset_path": "GEM/xsum",
26
+ "dataset_name": null,
27
+ "subset": "",
28
+ "rouge1_fmeasure_stderr": 0.0006107839354088714
29
+ },
30
+ {
31
+ "task_name": "gem_xsum",
32
+ "prompt_name": "article_DOC_summary",
33
+ "rouge2_precision": 0.0002052993867644256,
34
+ "dataset_path": "GEM/xsum",
35
+ "dataset_name": null,
36
+ "subset": "",
37
+ "rouge2_precision_stderr": 0.00010263779675566133
38
+ },
39
+ {
40
+ "task_name": "gem_xsum",
41
+ "prompt_name": "article_DOC_summary",
42
+ "rouge2_recall": 0.00015805735145357788,
43
+ "dataset_path": "GEM/xsum",
44
+ "dataset_name": null,
45
+ "subset": "",
46
+ "rouge2_recall_stderr": 8.125011510560468e-05
47
+ },
48
+ {
49
+ "task_name": "gem_xsum",
50
+ "prompt_name": "article_DOC_summary",
51
+ "rouge2_fmeasure": 0.0001768729417917658,
52
+ "dataset_path": "GEM/xsum",
53
+ "dataset_name": null,
54
+ "subset": "",
55
+ "rouge2_fmeasure_stderr": 8.939544334231789e-05
56
+ },
57
+ {
58
+ "task_name": "gem_xsum",
59
+ "prompt_name": "article_DOC_summary",
60
+ "rougeL_precision": 0.0018767772604883414,
61
+ "dataset_path": "GEM/xsum",
62
+ "dataset_name": null,
63
+ "subset": "",
64
+ "rougeL_precision_stderr": 0.0005169601507247577
65
+ },
66
+ {
67
+ "task_name": "gem_xsum",
68
+ "prompt_name": "article_DOC_summary",
69
+ "rougeL_recall": 0.0015653832711829878,
70
+ "dataset_path": "GEM/xsum",
71
+ "dataset_name": null,
72
+ "subset": "",
73
+ "rougeL_recall_stderr": 0.0004179534887542103
74
+ },
75
+ {
76
+ "task_name": "gem_xsum",
77
+ "prompt_name": "article_DOC_summary",
78
+ "rougeL_fmeasure": 0.0016699276460120504,
79
+ "dataset_path": "GEM/xsum",
80
+ "dataset_name": null,
81
+ "subset": "",
82
+ "rougeL_fmeasure_stderr": 0.0004477526803373312
83
+ },
84
+ {
85
+ "task_name": "gem_xsum",
86
+ "prompt_name": "article_DOC_summary",
87
+ "rougeLsum_precision": 0.0018647403070412589,
88
+ "dataset_path": "GEM/xsum",
89
+ "dataset_name": null,
90
+ "subset": "",
91
+ "rougeLsum_precision_stderr": 0.0005045589409446839
92
+ },
93
+ {
94
+ "task_name": "gem_xsum",
95
+ "prompt_name": "article_DOC_summary",
96
+ "rougeLsum_recall": 0.001573237052621675,
97
+ "dataset_path": "GEM/xsum",
98
+ "dataset_name": null,
99
+ "subset": "",
100
+ "rougeLsum_recall_stderr": 0.0004187928988454184
101
+ },
102
+ {
103
+ "task_name": "gem_xsum",
104
+ "prompt_name": "article_DOC_summary",
105
+ "rougeLsum_fmeasure": 0.0016709735398329094,
106
+ "dataset_path": "GEM/xsum",
107
+ "dataset_name": null,
108
+ "subset": "",
109
+ "rougeLsum_fmeasure_stderr": 0.00044431903582722767
110
+ },
111
+ {
112
+ "task_name": "gem_xsum",
113
+ "prompt_name": "article_DOC_summary",
114
+ "bleu": 9.778811073469078e-39,
115
+ "dataset_path": "GEM/xsum",
116
+ "dataset_name": null,
117
+ "subset": "",
118
+ "bleu_stderr": 1.521718069448933e-33
119
+ }
120
+ ],
121
+ "config": {
122
+ "model": "hf-causal",
123
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b11bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
124
+ "task_args": "",
125
+ "num_fewshot": 5,
126
+ "batch_size": 16,
127
+ "device": "cuda",
128
+ "use_cache": false,
129
+ "limit": 3000,
130
+ "bootstrap_iters": 10,
131
+ "seed": 1234
132
+ }
133
+ }