Muennighoff commited on
Commit
bbf6528
·
1 Parent(s): d38545d
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +25 -0
  2. 2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_0.json +1 -0
  3. 2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_1.json +1 -0
  4. 2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_2.json +1 -0
  5. 2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_gem_xsum_article_DOC_summary_0.json +1 -0
  6. 2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_gem_xsum_article_DOC_summary_1.json +1 -0
  7. 2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_gem_xsum_article_DOC_summary_2.json +1 -0
  8. 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_0.jsonl +0 -0
  9. 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_1.jsonl +0 -0
  10. 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_2.jsonl +0 -0
  11. 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_3.jsonl +0 -0
  12. 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_4.jsonl +0 -0
  13. 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_5.jsonl +0 -0
  14. 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_0.jsonl +0 -0
  15. 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_1.jsonl +0 -0
  16. 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_2.jsonl +0 -0
  17. 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_3.jsonl +0 -0
  18. 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_4.jsonl +0 -0
  19. 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_5.jsonl +0 -0
  20. 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl +3 -0
  21. 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl +3 -0
  22. 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl +3 -0
  23. 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl +0 -0
  24. 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl +0 -0
  25. 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl +0 -0
  26. 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_0.jsonl +3 -0
  27. 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_1.jsonl +3 -0
  28. 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_2.jsonl +3 -0
  29. 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_3.jsonl +0 -0
  30. 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_4.jsonl +0 -0
  31. 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_5.jsonl +0 -0
  32. 2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_0.json +133 -0
  33. 2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_1.json +133 -0
  34. 2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_2.json +133 -0
  35. 2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_gem_xsum_article_DOC_summary_0.json +133 -0
  36. 2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_gem_xsum_article_DOC_summary_1.json +133 -0
  37. 2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_gem_xsum_article_DOC_summary_2.json +133 -0
  38. 2b855b11boscarseed1/evaluation/rankeval/2b855b11boscarseed1_0.json +54 -0
  39. 2b855b11boscarseed1/evaluation/rankeval/2b855b11boscarseed1_0_lm-eval_global_step52452_2023-02-24-23-14-05_0shots_backup.json +54 -0
  40. 2b855b11boscarseed1/evaluation/rankeval/2b855b11boscarseed1_1.json +32 -0
  41. 2b855b11boscarseed1/evaluation/rankeval/2b855b11boscarseed1_1_lm-eval_global_step52452_2023-02-24-23-14-05_1shots_backup.json +32 -0
  42. 2b855b11boscarseed1/evaluation/rankeval/2b855b11boscarseed1_2.json +32 -0
  43. 2b855b11boscarseed1/evaluation/rankeval/2b855b11boscarseed1_2_lm-eval_global_step52452_2023-02-24-23-14-05_2shots_backup.json +32 -0
  44. 2b855b11boscarseed1/evaluation/rankeval/2b855b11boscarseed1_3.json +32 -0
  45. 2b855b11boscarseed1/evaluation/rankeval/2b855b11boscarseed1_3_lm-eval_global_step52452_2023-02-24-23-14-05_3shots_backup.json +32 -0
  46. 2b855b11boscarseed1/evaluation/rankeval/2b855b11boscarseed1_4.json +32 -0
  47. 2b855b11boscarseed1/evaluation/rankeval/2b855b11boscarseed1_4_lm-eval_global_step52452_2023-02-24-23-14-05_4shots_backup.json +32 -0
  48. 2b855b11boscarseed1/evaluation/rankeval/2b855b11boscarseed1_5.json +32 -0
  49. 2b855b11boscarseed1/evaluation/rankeval/2b855b11boscarseed1_5_lm-eval_global_step52452_2023-02-24-23-14-05_5shots_backup.json +32 -0
  50. 2b855b11boscarseed1/global_step52452/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
.gitattributes CHANGED
@@ -224,3 +224,28 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
224
  2b855b1b25oscarseed3/evaluation/generation/examples.2b855b1b25oscarseed3_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
225
  2b855b1b25oscarseed3/evaluation/generation/examples.2b855b1b25oscarseed3_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
226
  2b855b1b25oscarseed1/evaluation/generation/examples.2b855b1b25oscarseed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  2b855b1b25oscarseed3/evaluation/generation/examples.2b855b1b25oscarseed3_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
225
  2b855b1b25oscarseed3/evaluation/generation/examples.2b855b1b25oscarseed3_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
226
  2b855b1b25oscarseed1/evaluation/generation/examples.2b855b1b25oscarseed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
227
+ 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
228
+ 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
229
+ 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
230
+ 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
231
+ 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
232
+ 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
233
+ 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
234
+ 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
235
+ 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
236
+ 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
237
+ 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
238
+ 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
239
+ 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
240
+ 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
241
+ 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
242
+ 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
243
+ 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
244
+ 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
245
+ 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
246
+ 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
247
+ 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
248
+ 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
249
+ 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
250
+ 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
251
+ 2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_0.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 0.8938384110720313, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04442740312402193}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.09374072562693576, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0012230359583290014}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.1941308443869652, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0022578323155766997}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.12367657246465094, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0014697284619829536}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.020960965090776735, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006244327243966665}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.04544625505084656, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0011979457762283947}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.02782065901980275, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0007104099274964277}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.08203779654362248, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0010243499554408882}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.17174001350939913, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0019699700948681647}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.10860196142327254, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0012302518402255934}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.08310697182038314, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0011057328214282471}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.17275727213813283, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002060390288561184}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.10973134394419841, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0013237761734567783}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_1.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 11.564210778804036, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.1198765256137471}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5637173176604086, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003321797810373166}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.42980056601194444, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002973789269502846}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.4605347726818318, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0023127571845312834}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.26763533541615536, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0027609708835756447}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.20040046914166157, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0021517714918030005}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.21501029566008578, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.002028484285834072}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.4107410670750688, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003020208536626803}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.30963490201281724, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002399599836596746}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.3329465663050009, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002054461632613191}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.45928342602402417, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0032079639333423174}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.3490148298085739, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002713242494960982}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.3743551835626005, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0022879473483603647}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_2.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 13.400542456523908, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.1609394772064794}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5764234982441327, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0033021249054890935}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.4503974768175401, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002919712590777667}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.4798036150204005, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002282274159276462}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.28675558273290647, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002781811689703388}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.22076517179049052, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0022524340062285335}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.2351759267020243, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.002088790334860442}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.4286366628943883, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0030557015039043693}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3325081715277142, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0024750818471041087}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.3549448525372181, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0021193431273246735}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.48074271393713025, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0032624614219069863}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.37474313833603423, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0027481389029155967}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.39953599561234265, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0023322731932715716}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_gem_xsum_article_DOC_summary_0.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.13211376744686648, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0021078227737627523}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.30958091840657137, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004718256158826077}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.1824311356993241, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0027742455429407478}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.028436679980413212, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0010660997414346854}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.06982063723069834, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0026535400196015854}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.03971064304403434, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0014694772612363063}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.10122789771540168, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0015927078993417136}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.23911281116753666, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0037320083765561033}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.14006214823111385, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0021160537297509855}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.10386228512699824, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0017447889334045853}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.2456731268051541, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004084619780933895}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.14375817680635894, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002322989973976244}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.7044284159611478, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.10158710863644861}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_gem_xsum_article_DOC_summary_1.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.1405432198297611, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001878679789941618}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.3276606298436086, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004376293585107094}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.19239078349475108, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002465960954886116}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.026022309438382178, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009602804697530564}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.06491047997705596, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002480531229175089}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.03654673156140313, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0013456659236836456}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.10365291271701285, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001348668367178514}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.24268394669042775, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0032299728759827427}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.14178613311816618, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017458776019216176}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.11061497335307985, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001495607110282887}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.2601523504391386, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003678694746255839}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.15166775898349918, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001985950973310373}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.414251403634457, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09836035081115309}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_gem_xsum_article_DOC_summary_2.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.14212593864186113, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0020278826787379143}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.3256945308193834, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004183499164539368}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.1926288064781011, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0024488782590945946}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.027670527801186028, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0010263896507194106}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.06591916139804017, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0023227491521481056}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.03789731741399596, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0013200459196047046}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.10597609939483277, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0014946429223521406}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.244127383947085, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003151449183932117}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.14362364844292436, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017716917176277007}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.11288346015402125, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0016505088448165134}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.2608104370267327, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003565773707068147}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.15326170376419787, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002014338226707074}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.4466481969495988, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.060446243451377536}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_0.jsonl ADDED
File without changes
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_1.jsonl ADDED
File without changes
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_2.jsonl ADDED
File without changes
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_3.jsonl ADDED
File without changes
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_4.jsonl ADDED
File without changes
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_5.jsonl ADDED
File without changes
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_0.jsonl ADDED
File without changes
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_1.jsonl ADDED
File without changes
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_2.jsonl ADDED
File without changes
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_3.jsonl ADDED
File without changes
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_4.jsonl ADDED
File without changes
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_5.jsonl ADDED
File without changes
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6353c3773c1d39e34b862c4911932034465cafac863191f5ac01d66a3812b49
3
+ size 4385180
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82693f699e92b9129f56a89945e6c17f1bda69924565cf84ceb15a06fab58ade
3
+ size 5003528
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:544db0a9de2861c36a56634d1f56ccb7a434e7ccc72136b5d4961e180e7a82de
3
+ size 6092020
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl ADDED
File without changes
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl ADDED
File without changes
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl ADDED
File without changes
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_0.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4be08b0ce24aa01e12de519b7b27a46825d4b36033288fa820578c7a52a56645
3
+ size 2804569
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_1.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c634be93c9fe3dad0dc116928f2375cef7035d1758a26a668ac97c4fc96f8708
3
+ size 5086665
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_2.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5130f6494f6faea8188110c672c64523046595732753de4f46bfb4f88339be9e
3
+ size 7358844
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_3.jsonl ADDED
File without changes
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_4.jsonl ADDED
File without changes
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_5.jsonl ADDED
File without changes
2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_0.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": [
3
+ {
4
+ "task_name": "e2e_nlg_cleaned",
5
+ "prompt_name": "generate_text_restaurant",
6
+ "bleu": 0.8938384110720313,
7
+ "dataset_path": "e2e_nlg_cleaned",
8
+ "dataset_name": null,
9
+ "subset": null,
10
+ "bleu_stderr": 0.04442740312402193
11
+ },
12
+ {
13
+ "task_name": "e2e_nlg_cleaned",
14
+ "prompt_name": "generate_text_restaurant",
15
+ "rouge1_precision": 0.09374072562693576,
16
+ "dataset_path": "e2e_nlg_cleaned",
17
+ "dataset_name": null,
18
+ "subset": null,
19
+ "rouge1_precision_stderr": 0.0012230359583290014
20
+ },
21
+ {
22
+ "task_name": "e2e_nlg_cleaned",
23
+ "prompt_name": "generate_text_restaurant",
24
+ "rouge1_recall": 0.1941308443869652,
25
+ "dataset_path": "e2e_nlg_cleaned",
26
+ "dataset_name": null,
27
+ "subset": null,
28
+ "rouge1_recall_stderr": 0.0022578323155766997
29
+ },
30
+ {
31
+ "task_name": "e2e_nlg_cleaned",
32
+ "prompt_name": "generate_text_restaurant",
33
+ "rouge1_fmeasure": 0.12367657246465094,
34
+ "dataset_path": "e2e_nlg_cleaned",
35
+ "dataset_name": null,
36
+ "subset": null,
37
+ "rouge1_fmeasure_stderr": 0.0014697284619829536
38
+ },
39
+ {
40
+ "task_name": "e2e_nlg_cleaned",
41
+ "prompt_name": "generate_text_restaurant",
42
+ "rouge2_precision": 0.020960965090776735,
43
+ "dataset_path": "e2e_nlg_cleaned",
44
+ "dataset_name": null,
45
+ "subset": null,
46
+ "rouge2_precision_stderr": 0.0006244327243966665
47
+ },
48
+ {
49
+ "task_name": "e2e_nlg_cleaned",
50
+ "prompt_name": "generate_text_restaurant",
51
+ "rouge2_recall": 0.04544625505084656,
52
+ "dataset_path": "e2e_nlg_cleaned",
53
+ "dataset_name": null,
54
+ "subset": null,
55
+ "rouge2_recall_stderr": 0.0011979457762283947
56
+ },
57
+ {
58
+ "task_name": "e2e_nlg_cleaned",
59
+ "prompt_name": "generate_text_restaurant",
60
+ "rouge2_fmeasure": 0.02782065901980275,
61
+ "dataset_path": "e2e_nlg_cleaned",
62
+ "dataset_name": null,
63
+ "subset": null,
64
+ "rouge2_fmeasure_stderr": 0.0007104099274964277
65
+ },
66
+ {
67
+ "task_name": "e2e_nlg_cleaned",
68
+ "prompt_name": "generate_text_restaurant",
69
+ "rougeL_precision": 0.08203779654362248,
70
+ "dataset_path": "e2e_nlg_cleaned",
71
+ "dataset_name": null,
72
+ "subset": null,
73
+ "rougeL_precision_stderr": 0.0010243499554408882
74
+ },
75
+ {
76
+ "task_name": "e2e_nlg_cleaned",
77
+ "prompt_name": "generate_text_restaurant",
78
+ "rougeL_recall": 0.17174001350939913,
79
+ "dataset_path": "e2e_nlg_cleaned",
80
+ "dataset_name": null,
81
+ "subset": null,
82
+ "rougeL_recall_stderr": 0.0019699700948681647
83
+ },
84
+ {
85
+ "task_name": "e2e_nlg_cleaned",
86
+ "prompt_name": "generate_text_restaurant",
87
+ "rougeL_fmeasure": 0.10860196142327254,
88
+ "dataset_path": "e2e_nlg_cleaned",
89
+ "dataset_name": null,
90
+ "subset": null,
91
+ "rougeL_fmeasure_stderr": 0.0012302518402255934
92
+ },
93
+ {
94
+ "task_name": "e2e_nlg_cleaned",
95
+ "prompt_name": "generate_text_restaurant",
96
+ "rougeLsum_precision": 0.08310697182038314,
97
+ "dataset_path": "e2e_nlg_cleaned",
98
+ "dataset_name": null,
99
+ "subset": null,
100
+ "rougeLsum_precision_stderr": 0.0011057328214282471
101
+ },
102
+ {
103
+ "task_name": "e2e_nlg_cleaned",
104
+ "prompt_name": "generate_text_restaurant",
105
+ "rougeLsum_recall": 0.17275727213813283,
106
+ "dataset_path": "e2e_nlg_cleaned",
107
+ "dataset_name": null,
108
+ "subset": null,
109
+ "rougeLsum_recall_stderr": 0.002060390288561184
110
+ },
111
+ {
112
+ "task_name": "e2e_nlg_cleaned",
113
+ "prompt_name": "generate_text_restaurant",
114
+ "rougeLsum_fmeasure": 0.10973134394419841,
115
+ "dataset_path": "e2e_nlg_cleaned",
116
+ "dataset_name": null,
117
+ "subset": null,
118
+ "rougeLsum_fmeasure_stderr": 0.0013237761734567783
119
+ }
120
+ ],
121
+ "config": {
122
+ "model": "hf-causal",
123
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
124
+ "task_args": "",
125
+ "num_fewshot": 0,
126
+ "batch_size": 16,
127
+ "device": "cuda",
128
+ "use_cache": false,
129
+ "limit": 3000,
130
+ "bootstrap_iters": 10,
131
+ "seed": 1234
132
+ }
133
+ }
2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_1.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": [
3
+ {
4
+ "task_name": "e2e_nlg_cleaned",
5
+ "prompt_name": "generate_text_restaurant",
6
+ "bleu": 11.564210778804036,
7
+ "dataset_path": "e2e_nlg_cleaned",
8
+ "dataset_name": null,
9
+ "subset": null,
10
+ "bleu_stderr": 0.1198765256137471
11
+ },
12
+ {
13
+ "task_name": "e2e_nlg_cleaned",
14
+ "prompt_name": "generate_text_restaurant",
15
+ "rouge1_precision": 0.5637173176604086,
16
+ "dataset_path": "e2e_nlg_cleaned",
17
+ "dataset_name": null,
18
+ "subset": null,
19
+ "rouge1_precision_stderr": 0.003321797810373166
20
+ },
21
+ {
22
+ "task_name": "e2e_nlg_cleaned",
23
+ "prompt_name": "generate_text_restaurant",
24
+ "rouge1_recall": 0.42980056601194444,
25
+ "dataset_path": "e2e_nlg_cleaned",
26
+ "dataset_name": null,
27
+ "subset": null,
28
+ "rouge1_recall_stderr": 0.002973789269502846
29
+ },
30
+ {
31
+ "task_name": "e2e_nlg_cleaned",
32
+ "prompt_name": "generate_text_restaurant",
33
+ "rouge1_fmeasure": 0.4605347726818318,
34
+ "dataset_path": "e2e_nlg_cleaned",
35
+ "dataset_name": null,
36
+ "subset": null,
37
+ "rouge1_fmeasure_stderr": 0.0023127571845312834
38
+ },
39
+ {
40
+ "task_name": "e2e_nlg_cleaned",
41
+ "prompt_name": "generate_text_restaurant",
42
+ "rouge2_precision": 0.26763533541615536,
43
+ "dataset_path": "e2e_nlg_cleaned",
44
+ "dataset_name": null,
45
+ "subset": null,
46
+ "rouge2_precision_stderr": 0.0027609708835756447
47
+ },
48
+ {
49
+ "task_name": "e2e_nlg_cleaned",
50
+ "prompt_name": "generate_text_restaurant",
51
+ "rouge2_recall": 0.20040046914166157,
52
+ "dataset_path": "e2e_nlg_cleaned",
53
+ "dataset_name": null,
54
+ "subset": null,
55
+ "rouge2_recall_stderr": 0.0021517714918030005
56
+ },
57
+ {
58
+ "task_name": "e2e_nlg_cleaned",
59
+ "prompt_name": "generate_text_restaurant",
60
+ "rouge2_fmeasure": 0.21501029566008578,
61
+ "dataset_path": "e2e_nlg_cleaned",
62
+ "dataset_name": null,
63
+ "subset": null,
64
+ "rouge2_fmeasure_stderr": 0.002028484285834072
65
+ },
66
+ {
67
+ "task_name": "e2e_nlg_cleaned",
68
+ "prompt_name": "generate_text_restaurant",
69
+ "rougeL_precision": 0.4107410670750688,
70
+ "dataset_path": "e2e_nlg_cleaned",
71
+ "dataset_name": null,
72
+ "subset": null,
73
+ "rougeL_precision_stderr": 0.003020208536626803
74
+ },
75
+ {
76
+ "task_name": "e2e_nlg_cleaned",
77
+ "prompt_name": "generate_text_restaurant",
78
+ "rougeL_recall": 0.30963490201281724,
79
+ "dataset_path": "e2e_nlg_cleaned",
80
+ "dataset_name": null,
81
+ "subset": null,
82
+ "rougeL_recall_stderr": 0.002399599836596746
83
+ },
84
+ {
85
+ "task_name": "e2e_nlg_cleaned",
86
+ "prompt_name": "generate_text_restaurant",
87
+ "rougeL_fmeasure": 0.3329465663050009,
88
+ "dataset_path": "e2e_nlg_cleaned",
89
+ "dataset_name": null,
90
+ "subset": null,
91
+ "rougeL_fmeasure_stderr": 0.002054461632613191
92
+ },
93
+ {
94
+ "task_name": "e2e_nlg_cleaned",
95
+ "prompt_name": "generate_text_restaurant",
96
+ "rougeLsum_precision": 0.45928342602402417,
97
+ "dataset_path": "e2e_nlg_cleaned",
98
+ "dataset_name": null,
99
+ "subset": null,
100
+ "rougeLsum_precision_stderr": 0.0032079639333423174
101
+ },
102
+ {
103
+ "task_name": "e2e_nlg_cleaned",
104
+ "prompt_name": "generate_text_restaurant",
105
+ "rougeLsum_recall": 0.3490148298085739,
106
+ "dataset_path": "e2e_nlg_cleaned",
107
+ "dataset_name": null,
108
+ "subset": null,
109
+ "rougeLsum_recall_stderr": 0.002713242494960982
110
+ },
111
+ {
112
+ "task_name": "e2e_nlg_cleaned",
113
+ "prompt_name": "generate_text_restaurant",
114
+ "rougeLsum_fmeasure": 0.3743551835626005,
115
+ "dataset_path": "e2e_nlg_cleaned",
116
+ "dataset_name": null,
117
+ "subset": null,
118
+ "rougeLsum_fmeasure_stderr": 0.0022879473483603647
119
+ }
120
+ ],
121
+ "config": {
122
+ "model": "hf-causal",
123
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
124
+ "task_args": "",
125
+ "num_fewshot": 1,
126
+ "batch_size": 16,
127
+ "device": "cuda",
128
+ "use_cache": false,
129
+ "limit": 3000,
130
+ "bootstrap_iters": 10,
131
+ "seed": 1234
132
+ }
133
+ }
2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_2.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": [
3
+ {
4
+ "task_name": "e2e_nlg_cleaned",
5
+ "prompt_name": "generate_text_restaurant",
6
+ "bleu": 13.400542456523908,
7
+ "dataset_path": "e2e_nlg_cleaned",
8
+ "dataset_name": null,
9
+ "subset": null,
10
+ "bleu_stderr": 0.1609394772064794
11
+ },
12
+ {
13
+ "task_name": "e2e_nlg_cleaned",
14
+ "prompt_name": "generate_text_restaurant",
15
+ "rouge1_precision": 0.5764234982441327,
16
+ "dataset_path": "e2e_nlg_cleaned",
17
+ "dataset_name": null,
18
+ "subset": null,
19
+ "rouge1_precision_stderr": 0.0033021249054890935
20
+ },
21
+ {
22
+ "task_name": "e2e_nlg_cleaned",
23
+ "prompt_name": "generate_text_restaurant",
24
+ "rouge1_recall": 0.4503974768175401,
25
+ "dataset_path": "e2e_nlg_cleaned",
26
+ "dataset_name": null,
27
+ "subset": null,
28
+ "rouge1_recall_stderr": 0.002919712590777667
29
+ },
30
+ {
31
+ "task_name": "e2e_nlg_cleaned",
32
+ "prompt_name": "generate_text_restaurant",
33
+ "rouge1_fmeasure": 0.4798036150204005,
34
+ "dataset_path": "e2e_nlg_cleaned",
35
+ "dataset_name": null,
36
+ "subset": null,
37
+ "rouge1_fmeasure_stderr": 0.002282274159276462
38
+ },
39
+ {
40
+ "task_name": "e2e_nlg_cleaned",
41
+ "prompt_name": "generate_text_restaurant",
42
+ "rouge2_precision": 0.28675558273290647,
43
+ "dataset_path": "e2e_nlg_cleaned",
44
+ "dataset_name": null,
45
+ "subset": null,
46
+ "rouge2_precision_stderr": 0.002781811689703388
47
+ },
48
+ {
49
+ "task_name": "e2e_nlg_cleaned",
50
+ "prompt_name": "generate_text_restaurant",
51
+ "rouge2_recall": 0.22076517179049052,
52
+ "dataset_path": "e2e_nlg_cleaned",
53
+ "dataset_name": null,
54
+ "subset": null,
55
+ "rouge2_recall_stderr": 0.0022524340062285335
56
+ },
57
+ {
58
+ "task_name": "e2e_nlg_cleaned",
59
+ "prompt_name": "generate_text_restaurant",
60
+ "rouge2_fmeasure": 0.2351759267020243,
61
+ "dataset_path": "e2e_nlg_cleaned",
62
+ "dataset_name": null,
63
+ "subset": null,
64
+ "rouge2_fmeasure_stderr": 0.002088790334860442
65
+ },
66
+ {
67
+ "task_name": "e2e_nlg_cleaned",
68
+ "prompt_name": "generate_text_restaurant",
69
+ "rougeL_precision": 0.4286366628943883,
70
+ "dataset_path": "e2e_nlg_cleaned",
71
+ "dataset_name": null,
72
+ "subset": null,
73
+ "rougeL_precision_stderr": 0.0030557015039043693
74
+ },
75
+ {
76
+ "task_name": "e2e_nlg_cleaned",
77
+ "prompt_name": "generate_text_restaurant",
78
+ "rougeL_recall": 0.3325081715277142,
79
+ "dataset_path": "e2e_nlg_cleaned",
80
+ "dataset_name": null,
81
+ "subset": null,
82
+ "rougeL_recall_stderr": 0.0024750818471041087
83
+ },
84
+ {
85
+ "task_name": "e2e_nlg_cleaned",
86
+ "prompt_name": "generate_text_restaurant",
87
+ "rougeL_fmeasure": 0.3549448525372181,
88
+ "dataset_path": "e2e_nlg_cleaned",
89
+ "dataset_name": null,
90
+ "subset": null,
91
+ "rougeL_fmeasure_stderr": 0.0021193431273246735
92
+ },
93
+ {
94
+ "task_name": "e2e_nlg_cleaned",
95
+ "prompt_name": "generate_text_restaurant",
96
+ "rougeLsum_precision": 0.48074271393713025,
97
+ "dataset_path": "e2e_nlg_cleaned",
98
+ "dataset_name": null,
99
+ "subset": null,
100
+ "rougeLsum_precision_stderr": 0.0032624614219069863
101
+ },
102
+ {
103
+ "task_name": "e2e_nlg_cleaned",
104
+ "prompt_name": "generate_text_restaurant",
105
+ "rougeLsum_recall": 0.37474313833603423,
106
+ "dataset_path": "e2e_nlg_cleaned",
107
+ "dataset_name": null,
108
+ "subset": null,
109
+ "rougeLsum_recall_stderr": 0.0027481389029155967
110
+ },
111
+ {
112
+ "task_name": "e2e_nlg_cleaned",
113
+ "prompt_name": "generate_text_restaurant",
114
+ "rougeLsum_fmeasure": 0.39953599561234265,
115
+ "dataset_path": "e2e_nlg_cleaned",
116
+ "dataset_name": null,
117
+ "subset": null,
118
+ "rougeLsum_fmeasure_stderr": 0.0023322731932715716
119
+ }
120
+ ],
121
+ "config": {
122
+ "model": "hf-causal",
123
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
124
+ "task_args": "",
125
+ "num_fewshot": 2,
126
+ "batch_size": 16,
127
+ "device": "cuda",
128
+ "use_cache": false,
129
+ "limit": 3000,
130
+ "bootstrap_iters": 10,
131
+ "seed": 1234
132
+ }
133
+ }
2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_gem_xsum_article_DOC_summary_0.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": [
3
+ {
4
+ "task_name": "gem_xsum",
5
+ "prompt_name": "article_DOC_summary",
6
+ "rouge1_precision": 0.13211376744686648,
7
+ "dataset_path": "GEM/xsum",
8
+ "dataset_name": null,
9
+ "subset": "",
10
+ "rouge1_precision_stderr": 0.0021078227737627523
11
+ },
12
+ {
13
+ "task_name": "gem_xsum",
14
+ "prompt_name": "article_DOC_summary",
15
+ "rouge1_recall": 0.30958091840657137,
16
+ "dataset_path": "GEM/xsum",
17
+ "dataset_name": null,
18
+ "subset": "",
19
+ "rouge1_recall_stderr": 0.004718256158826077
20
+ },
21
+ {
22
+ "task_name": "gem_xsum",
23
+ "prompt_name": "article_DOC_summary",
24
+ "rouge1_fmeasure": 0.1824311356993241,
25
+ "dataset_path": "GEM/xsum",
26
+ "dataset_name": null,
27
+ "subset": "",
28
+ "rouge1_fmeasure_stderr": 0.0027742455429407478
29
+ },
30
+ {
31
+ "task_name": "gem_xsum",
32
+ "prompt_name": "article_DOC_summary",
33
+ "rouge2_precision": 0.028436679980413212,
34
+ "dataset_path": "GEM/xsum",
35
+ "dataset_name": null,
36
+ "subset": "",
37
+ "rouge2_precision_stderr": 0.0010660997414346854
38
+ },
39
+ {
40
+ "task_name": "gem_xsum",
41
+ "prompt_name": "article_DOC_summary",
42
+ "rouge2_recall": 0.06982063723069834,
43
+ "dataset_path": "GEM/xsum",
44
+ "dataset_name": null,
45
+ "subset": "",
46
+ "rouge2_recall_stderr": 0.0026535400196015854
47
+ },
48
+ {
49
+ "task_name": "gem_xsum",
50
+ "prompt_name": "article_DOC_summary",
51
+ "rouge2_fmeasure": 0.03971064304403434,
52
+ "dataset_path": "GEM/xsum",
53
+ "dataset_name": null,
54
+ "subset": "",
55
+ "rouge2_fmeasure_stderr": 0.0014694772612363063
56
+ },
57
+ {
58
+ "task_name": "gem_xsum",
59
+ "prompt_name": "article_DOC_summary",
60
+ "rougeL_precision": 0.10122789771540168,
61
+ "dataset_path": "GEM/xsum",
62
+ "dataset_name": null,
63
+ "subset": "",
64
+ "rougeL_precision_stderr": 0.0015927078993417136
65
+ },
66
+ {
67
+ "task_name": "gem_xsum",
68
+ "prompt_name": "article_DOC_summary",
69
+ "rougeL_recall": 0.23911281116753666,
70
+ "dataset_path": "GEM/xsum",
71
+ "dataset_name": null,
72
+ "subset": "",
73
+ "rougeL_recall_stderr": 0.0037320083765561033
74
+ },
75
+ {
76
+ "task_name": "gem_xsum",
77
+ "prompt_name": "article_DOC_summary",
78
+ "rougeL_fmeasure": 0.14006214823111385,
79
+ "dataset_path": "GEM/xsum",
80
+ "dataset_name": null,
81
+ "subset": "",
82
+ "rougeL_fmeasure_stderr": 0.0021160537297509855
83
+ },
84
+ {
85
+ "task_name": "gem_xsum",
86
+ "prompt_name": "article_DOC_summary",
87
+ "rougeLsum_precision": 0.10386228512699824,
88
+ "dataset_path": "GEM/xsum",
89
+ "dataset_name": null,
90
+ "subset": "",
91
+ "rougeLsum_precision_stderr": 0.0017447889334045853
92
+ },
93
+ {
94
+ "task_name": "gem_xsum",
95
+ "prompt_name": "article_DOC_summary",
96
+ "rougeLsum_recall": 0.2456731268051541,
97
+ "dataset_path": "GEM/xsum",
98
+ "dataset_name": null,
99
+ "subset": "",
100
+ "rougeLsum_recall_stderr": 0.004084619780933895
101
+ },
102
+ {
103
+ "task_name": "gem_xsum",
104
+ "prompt_name": "article_DOC_summary",
105
+ "rougeLsum_fmeasure": 0.14375817680635894,
106
+ "dataset_path": "GEM/xsum",
107
+ "dataset_name": null,
108
+ "subset": "",
109
+ "rougeLsum_fmeasure_stderr": 0.002322989973976244
110
+ },
111
+ {
112
+ "task_name": "gem_xsum",
113
+ "prompt_name": "article_DOC_summary",
114
+ "bleu": 1.7044284159611478,
115
+ "dataset_path": "GEM/xsum",
116
+ "dataset_name": null,
117
+ "subset": "",
118
+ "bleu_stderr": 0.10158710863644861
119
+ }
120
+ ],
121
+ "config": {
122
+ "model": "hf-causal",
123
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
124
+ "task_args": "",
125
+ "num_fewshot": 0,
126
+ "batch_size": 16,
127
+ "device": "cuda",
128
+ "use_cache": false,
129
+ "limit": 3000,
130
+ "bootstrap_iters": 10,
131
+ "seed": 1234
132
+ }
133
+ }
2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_gem_xsum_article_DOC_summary_1.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": [
3
+ {
4
+ "task_name": "gem_xsum",
5
+ "prompt_name": "article_DOC_summary",
6
+ "rouge1_precision": 0.1405432198297611,
7
+ "dataset_path": "GEM/xsum",
8
+ "dataset_name": null,
9
+ "subset": "",
10
+ "rouge1_precision_stderr": 0.001878679789941618
11
+ },
12
+ {
13
+ "task_name": "gem_xsum",
14
+ "prompt_name": "article_DOC_summary",
15
+ "rouge1_recall": 0.3276606298436086,
16
+ "dataset_path": "GEM/xsum",
17
+ "dataset_name": null,
18
+ "subset": "",
19
+ "rouge1_recall_stderr": 0.004376293585107094
20
+ },
21
+ {
22
+ "task_name": "gem_xsum",
23
+ "prompt_name": "article_DOC_summary",
24
+ "rouge1_fmeasure": 0.19239078349475108,
25
+ "dataset_path": "GEM/xsum",
26
+ "dataset_name": null,
27
+ "subset": "",
28
+ "rouge1_fmeasure_stderr": 0.002465960954886116
29
+ },
30
+ {
31
+ "task_name": "gem_xsum",
32
+ "prompt_name": "article_DOC_summary",
33
+ "rouge2_precision": 0.026022309438382178,
34
+ "dataset_path": "GEM/xsum",
35
+ "dataset_name": null,
36
+ "subset": "",
37
+ "rouge2_precision_stderr": 0.0009602804697530564
38
+ },
39
+ {
40
+ "task_name": "gem_xsum",
41
+ "prompt_name": "article_DOC_summary",
42
+ "rouge2_recall": 0.06491047997705596,
43
+ "dataset_path": "GEM/xsum",
44
+ "dataset_name": null,
45
+ "subset": "",
46
+ "rouge2_recall_stderr": 0.002480531229175089
47
+ },
48
+ {
49
+ "task_name": "gem_xsum",
50
+ "prompt_name": "article_DOC_summary",
51
+ "rouge2_fmeasure": 0.03654673156140313,
52
+ "dataset_path": "GEM/xsum",
53
+ "dataset_name": null,
54
+ "subset": "",
55
+ "rouge2_fmeasure_stderr": 0.0013456659236836456
56
+ },
57
+ {
58
+ "task_name": "gem_xsum",
59
+ "prompt_name": "article_DOC_summary",
60
+ "rougeL_precision": 0.10365291271701285,
61
+ "dataset_path": "GEM/xsum",
62
+ "dataset_name": null,
63
+ "subset": "",
64
+ "rougeL_precision_stderr": 0.001348668367178514
65
+ },
66
+ {
67
+ "task_name": "gem_xsum",
68
+ "prompt_name": "article_DOC_summary",
69
+ "rougeL_recall": 0.24268394669042775,
70
+ "dataset_path": "GEM/xsum",
71
+ "dataset_name": null,
72
+ "subset": "",
73
+ "rougeL_recall_stderr": 0.0032299728759827427
74
+ },
75
+ {
76
+ "task_name": "gem_xsum",
77
+ "prompt_name": "article_DOC_summary",
78
+ "rougeL_fmeasure": 0.14178613311816618,
79
+ "dataset_path": "GEM/xsum",
80
+ "dataset_name": null,
81
+ "subset": "",
82
+ "rougeL_fmeasure_stderr": 0.0017458776019216176
83
+ },
84
+ {
85
+ "task_name": "gem_xsum",
86
+ "prompt_name": "article_DOC_summary",
87
+ "rougeLsum_precision": 0.11061497335307985,
88
+ "dataset_path": "GEM/xsum",
89
+ "dataset_name": null,
90
+ "subset": "",
91
+ "rougeLsum_precision_stderr": 0.001495607110282887
92
+ },
93
+ {
94
+ "task_name": "gem_xsum",
95
+ "prompt_name": "article_DOC_summary",
96
+ "rougeLsum_recall": 0.2601523504391386,
97
+ "dataset_path": "GEM/xsum",
98
+ "dataset_name": null,
99
+ "subset": "",
100
+ "rougeLsum_recall_stderr": 0.003678694746255839
101
+ },
102
+ {
103
+ "task_name": "gem_xsum",
104
+ "prompt_name": "article_DOC_summary",
105
+ "rougeLsum_fmeasure": 0.15166775898349918,
106
+ "dataset_path": "GEM/xsum",
107
+ "dataset_name": null,
108
+ "subset": "",
109
+ "rougeLsum_fmeasure_stderr": 0.001985950973310373
110
+ },
111
+ {
112
+ "task_name": "gem_xsum",
113
+ "prompt_name": "article_DOC_summary",
114
+ "bleu": 1.414251403634457,
115
+ "dataset_path": "GEM/xsum",
116
+ "dataset_name": null,
117
+ "subset": "",
118
+ "bleu_stderr": 0.09836035081115309
119
+ }
120
+ ],
121
+ "config": {
122
+ "model": "hf-causal",
123
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
124
+ "task_args": "",
125
+ "num_fewshot": 1,
126
+ "batch_size": 16,
127
+ "device": "cuda",
128
+ "use_cache": false,
129
+ "limit": 3000,
130
+ "bootstrap_iters": 10,
131
+ "seed": 1234
132
+ }
133
+ }
2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_gem_xsum_article_DOC_summary_2.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": [
3
+ {
4
+ "task_name": "gem_xsum",
5
+ "prompt_name": "article_DOC_summary",
6
+ "rouge1_precision": 0.14212593864186113,
7
+ "dataset_path": "GEM/xsum",
8
+ "dataset_name": null,
9
+ "subset": "",
10
+ "rouge1_precision_stderr": 0.0020278826787379143
11
+ },
12
+ {
13
+ "task_name": "gem_xsum",
14
+ "prompt_name": "article_DOC_summary",
15
+ "rouge1_recall": 0.3256945308193834,
16
+ "dataset_path": "GEM/xsum",
17
+ "dataset_name": null,
18
+ "subset": "",
19
+ "rouge1_recall_stderr": 0.004183499164539368
20
+ },
21
+ {
22
+ "task_name": "gem_xsum",
23
+ "prompt_name": "article_DOC_summary",
24
+ "rouge1_fmeasure": 0.1926288064781011,
25
+ "dataset_path": "GEM/xsum",
26
+ "dataset_name": null,
27
+ "subset": "",
28
+ "rouge1_fmeasure_stderr": 0.0024488782590945946
29
+ },
30
+ {
31
+ "task_name": "gem_xsum",
32
+ "prompt_name": "article_DOC_summary",
33
+ "rouge2_precision": 0.027670527801186028,
34
+ "dataset_path": "GEM/xsum",
35
+ "dataset_name": null,
36
+ "subset": "",
37
+ "rouge2_precision_stderr": 0.0010263896507194106
38
+ },
39
+ {
40
+ "task_name": "gem_xsum",
41
+ "prompt_name": "article_DOC_summary",
42
+ "rouge2_recall": 0.06591916139804017,
43
+ "dataset_path": "GEM/xsum",
44
+ "dataset_name": null,
45
+ "subset": "",
46
+ "rouge2_recall_stderr": 0.0023227491521481056
47
+ },
48
+ {
49
+ "task_name": "gem_xsum",
50
+ "prompt_name": "article_DOC_summary",
51
+ "rouge2_fmeasure": 0.03789731741399596,
52
+ "dataset_path": "GEM/xsum",
53
+ "dataset_name": null,
54
+ "subset": "",
55
+ "rouge2_fmeasure_stderr": 0.0013200459196047046
56
+ },
57
+ {
58
+ "task_name": "gem_xsum",
59
+ "prompt_name": "article_DOC_summary",
60
+ "rougeL_precision": 0.10597609939483277,
61
+ "dataset_path": "GEM/xsum",
62
+ "dataset_name": null,
63
+ "subset": "",
64
+ "rougeL_precision_stderr": 0.0014946429223521406
65
+ },
66
+ {
67
+ "task_name": "gem_xsum",
68
+ "prompt_name": "article_DOC_summary",
69
+ "rougeL_recall": 0.244127383947085,
70
+ "dataset_path": "GEM/xsum",
71
+ "dataset_name": null,
72
+ "subset": "",
73
+ "rougeL_recall_stderr": 0.003151449183932117
74
+ },
75
+ {
76
+ "task_name": "gem_xsum",
77
+ "prompt_name": "article_DOC_summary",
78
+ "rougeL_fmeasure": 0.14362364844292436,
79
+ "dataset_path": "GEM/xsum",
80
+ "dataset_name": null,
81
+ "subset": "",
82
+ "rougeL_fmeasure_stderr": 0.0017716917176277007
83
+ },
84
+ {
85
+ "task_name": "gem_xsum",
86
+ "prompt_name": "article_DOC_summary",
87
+ "rougeLsum_precision": 0.11288346015402125,
88
+ "dataset_path": "GEM/xsum",
89
+ "dataset_name": null,
90
+ "subset": "",
91
+ "rougeLsum_precision_stderr": 0.0016505088448165134
92
+ },
93
+ {
94
+ "task_name": "gem_xsum",
95
+ "prompt_name": "article_DOC_summary",
96
+ "rougeLsum_recall": 0.2608104370267327,
97
+ "dataset_path": "GEM/xsum",
98
+ "dataset_name": null,
99
+ "subset": "",
100
+ "rougeLsum_recall_stderr": 0.003565773707068147
101
+ },
102
+ {
103
+ "task_name": "gem_xsum",
104
+ "prompt_name": "article_DOC_summary",
105
+ "rougeLsum_fmeasure": 0.15326170376419787,
106
+ "dataset_path": "GEM/xsum",
107
+ "dataset_name": null,
108
+ "subset": "",
109
+ "rougeLsum_fmeasure_stderr": 0.002014338226707074
110
+ },
111
+ {
112
+ "task_name": "gem_xsum",
113
+ "prompt_name": "article_DOC_summary",
114
+ "bleu": 1.4466481969495988,
115
+ "dataset_path": "GEM/xsum",
116
+ "dataset_name": null,
117
+ "subset": "",
118
+ "bleu_stderr": 0.060446243451377536
119
+ }
120
+ ],
121
+ "config": {
122
+ "model": "hf-causal",
123
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
124
+ "task_args": "",
125
+ "num_fewshot": 2,
126
+ "batch_size": 16,
127
+ "device": "cuda",
128
+ "use_cache": false,
129
+ "limit": 3000,
130
+ "bootstrap_iters": 10,
131
+ "seed": 1234
132
+ }
133
+ }
2b855b11boscarseed1/evaluation/rankeval/2b855b11boscarseed1_0.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.325,
5
+ "acc_stderr": 0.014818724459095526
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.332,
9
+ "acc_stderr": 0.014899597242811483
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.36,
13
+ "acc_stderr": 0.013862183574189913
14
+ },
15
+ "cb": {
16
+ "acc": 0.44642857142857145,
17
+ "acc_stderr": 0.06703189227942398,
18
+ "f1": 0.29602713178294576
19
+ },
20
+ "copa": {
21
+ "acc": 0.73,
22
+ "acc_stderr": 0.04461960433384741
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.36974706233817967,
26
+ "acc_stderr": 0.00481749554678955,
27
+ "acc_norm": 0.4607647878908584,
28
+ "acc_norm_stderr": 0.004974395131539589
29
+ },
30
+ "rte": {
31
+ "acc": 0.5270758122743683,
32
+ "acc_stderr": 0.030052303463143706
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5201262825572218,
36
+ "acc_stderr": 0.014041096664344329
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.652592196686264,
40
+ "acc_stderr": 0.01101082650271874
41
+ }
42
+ },
43
+ "versions": {
44
+ "anli_r1": 0,
45
+ "anli_r2": 0,
46
+ "anli_r3": 0,
47
+ "cb": 1,
48
+ "copa": 0,
49
+ "hellaswag": 0,
50
+ "rte": 0,
51
+ "winogrande": 0,
52
+ "storycloze_2016": 0
53
+ }
54
+ }
2b855b11boscarseed1/evaluation/rankeval/2b855b11boscarseed1_0_lm-eval_global_step52452_2023-02-24-23-14-05_0shots_backup.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.325,
5
+ "acc_stderr": 0.014818724459095526
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.332,
9
+ "acc_stderr": 0.014899597242811483
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.36,
13
+ "acc_stderr": 0.013862183574189913
14
+ },
15
+ "cb": {
16
+ "acc": 0.44642857142857145,
17
+ "acc_stderr": 0.06703189227942398,
18
+ "f1": 0.29602713178294576
19
+ },
20
+ "copa": {
21
+ "acc": 0.73,
22
+ "acc_stderr": 0.04461960433384741
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.36974706233817967,
26
+ "acc_stderr": 0.00481749554678955,
27
+ "acc_norm": 0.4607647878908584,
28
+ "acc_norm_stderr": 0.004974395131539589
29
+ },
30
+ "rte": {
31
+ "acc": 0.5270758122743683,
32
+ "acc_stderr": 0.030052303463143706
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5201262825572218,
36
+ "acc_stderr": 0.014041096664344329
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.652592196686264,
40
+ "acc_stderr": 0.01101082650271874
41
+ }
42
+ },
43
+ "versions": {
44
+ "anli_r1": 0,
45
+ "anli_r2": 0,
46
+ "anli_r3": 0,
47
+ "cb": 1,
48
+ "copa": 0,
49
+ "hellaswag": 0,
50
+ "rte": 0,
51
+ "winogrande": 0,
52
+ "storycloze_2016": 0
53
+ }
54
+ }
2b855b11boscarseed1/evaluation/rankeval/2b855b11boscarseed1_1.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.338,
5
+ "acc_stderr": 0.014965960710224496
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.337,
9
+ "acc_stderr": 0.014955087918653588
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.355,
13
+ "acc_stderr": 0.013819249004047305
14
+ },
15
+ "cb": {
16
+ "acc": 0.5178571428571429,
17
+ "acc_stderr": 0.06737697508644647,
18
+ "f1": 0.3244444444444445
19
+ },
20
+ "copa": {
21
+ "acc": 0.66,
22
+ "acc_stderr": 0.04760952285695237
23
+ }
24
+ },
25
+ "versions": {
26
+ "anli_r1": 0,
27
+ "anli_r2": 0,
28
+ "anli_r3": 0,
29
+ "cb": 1,
30
+ "copa": 0
31
+ }
32
+ }
2b855b11boscarseed1/evaluation/rankeval/2b855b11boscarseed1_1_lm-eval_global_step52452_2023-02-24-23-14-05_1shots_backup.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.338,
5
+ "acc_stderr": 0.014965960710224496
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.337,
9
+ "acc_stderr": 0.014955087918653588
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.355,
13
+ "acc_stderr": 0.013819249004047305
14
+ },
15
+ "cb": {
16
+ "acc": 0.5178571428571429,
17
+ "acc_stderr": 0.06737697508644647,
18
+ "f1": 0.3244444444444445
19
+ },
20
+ "copa": {
21
+ "acc": 0.66,
22
+ "acc_stderr": 0.04760952285695237
23
+ }
24
+ },
25
+ "versions": {
26
+ "anli_r1": 0,
27
+ "anli_r2": 0,
28
+ "anli_r3": 0,
29
+ "cb": 1,
30
+ "copa": 0
31
+ }
32
+ }
2b855b11boscarseed1/evaluation/rankeval/2b855b11boscarseed1_2.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.335,
5
+ "acc_stderr": 0.014933117490932572
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.345,
9
+ "acc_stderr": 0.015039986742055235
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3441666666666667,
13
+ "acc_stderr": 0.013720551062295756
14
+ },
15
+ "cb": {
16
+ "acc": 0.5,
17
+ "acc_stderr": 0.06741998624632421,
18
+ "f1": 0.32094943240454077
19
+ },
20
+ "copa": {
21
+ "acc": 0.71,
22
+ "acc_stderr": 0.04560480215720684
23
+ }
24
+ },
25
+ "versions": {
26
+ "anli_r1": 0,
27
+ "anli_r2": 0,
28
+ "anli_r3": 0,
29
+ "cb": 1,
30
+ "copa": 0
31
+ }
32
+ }
2b855b11boscarseed1/evaluation/rankeval/2b855b11boscarseed1_2_lm-eval_global_step52452_2023-02-24-23-14-05_2shots_backup.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.335,
5
+ "acc_stderr": 0.014933117490932572
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.345,
9
+ "acc_stderr": 0.015039986742055235
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3441666666666667,
13
+ "acc_stderr": 0.013720551062295756
14
+ },
15
+ "cb": {
16
+ "acc": 0.5,
17
+ "acc_stderr": 0.06741998624632421,
18
+ "f1": 0.32094943240454077
19
+ },
20
+ "copa": {
21
+ "acc": 0.71,
22
+ "acc_stderr": 0.04560480215720684
23
+ }
24
+ },
25
+ "versions": {
26
+ "anli_r1": 0,
27
+ "anli_r2": 0,
28
+ "anli_r3": 0,
29
+ "cb": 1,
30
+ "copa": 0
31
+ }
32
+ }
2b855b11boscarseed1/evaluation/rankeval/2b855b11boscarseed1_3.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.36,
5
+ "acc_stderr": 0.01518652793204012
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.362,
9
+ "acc_stderr": 0.0152048409129195
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3625,
13
+ "acc_stderr": 0.013883037874225516
14
+ },
15
+ "cb": {
16
+ "acc": 0.5714285714285714,
17
+ "acc_stderr": 0.06672848092813058,
18
+ "f1": 0.37350026082420446
19
+ },
20
+ "copa": {
21
+ "acc": 0.67,
22
+ "acc_stderr": 0.04725815626252607
23
+ }
24
+ },
25
+ "versions": {
26
+ "anli_r1": 0,
27
+ "anli_r2": 0,
28
+ "anli_r3": 0,
29
+ "cb": 1,
30
+ "copa": 0
31
+ }
32
+ }
2b855b11boscarseed1/evaluation/rankeval/2b855b11boscarseed1_3_lm-eval_global_step52452_2023-02-24-23-14-05_3shots_backup.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.36,
5
+ "acc_stderr": 0.01518652793204012
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.362,
9
+ "acc_stderr": 0.0152048409129195
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3625,
13
+ "acc_stderr": 0.013883037874225516
14
+ },
15
+ "cb": {
16
+ "acc": 0.5714285714285714,
17
+ "acc_stderr": 0.06672848092813058,
18
+ "f1": 0.37350026082420446
19
+ },
20
+ "copa": {
21
+ "acc": 0.67,
22
+ "acc_stderr": 0.04725815626252607
23
+ }
24
+ },
25
+ "versions": {
26
+ "anli_r1": 0,
27
+ "anli_r2": 0,
28
+ "anli_r3": 0,
29
+ "cb": 1,
30
+ "copa": 0
31
+ }
32
+ }
2b855b11boscarseed1/evaluation/rankeval/2b855b11boscarseed1_4.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.335,
5
+ "acc_stderr": 0.014933117490932572
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.365,
9
+ "acc_stderr": 0.015231776226264905
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.32416666666666666,
13
+ "acc_stderr": 0.013517438120881629
14
+ },
15
+ "cb": {
16
+ "acc": 0.5,
17
+ "acc_stderr": 0.06741998624632421,
18
+ "f1": 0.3113738738738739
19
+ },
20
+ "copa": {
21
+ "acc": 0.64,
22
+ "acc_stderr": 0.04824181513244218
23
+ }
24
+ },
25
+ "versions": {
26
+ "anli_r1": 0,
27
+ "anli_r2": 0,
28
+ "anli_r3": 0,
29
+ "cb": 1,
30
+ "copa": 0
31
+ }
32
+ }
2b855b11boscarseed1/evaluation/rankeval/2b855b11boscarseed1_4_lm-eval_global_step52452_2023-02-24-23-14-05_4shots_backup.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.335,
5
+ "acc_stderr": 0.014933117490932572
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.365,
9
+ "acc_stderr": 0.015231776226264905
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.32416666666666666,
13
+ "acc_stderr": 0.013517438120881629
14
+ },
15
+ "cb": {
16
+ "acc": 0.5,
17
+ "acc_stderr": 0.06741998624632421,
18
+ "f1": 0.3113738738738739
19
+ },
20
+ "copa": {
21
+ "acc": 0.64,
22
+ "acc_stderr": 0.04824181513244218
23
+ }
24
+ },
25
+ "versions": {
26
+ "anli_r1": 0,
27
+ "anli_r2": 0,
28
+ "anli_r3": 0,
29
+ "cb": 1,
30
+ "copa": 0
31
+ }
32
+ }
2b855b11boscarseed1/evaluation/rankeval/2b855b11boscarseed1_5.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.357,
5
+ "acc_stderr": 0.015158521721486773
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.352,
9
+ "acc_stderr": 0.015110404505648666
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.33916666666666667,
13
+ "acc_stderr": 0.013672343491681822
14
+ },
15
+ "cb": {
16
+ "acc": 0.5178571428571429,
17
+ "acc_stderr": 0.06737697508644647,
18
+ "f1": 0.33024815560026827
19
+ },
20
+ "copa": {
21
+ "acc": 0.65,
22
+ "acc_stderr": 0.047937248544110196
23
+ }
24
+ },
25
+ "versions": {
26
+ "anli_r1": 0,
27
+ "anli_r2": 0,
28
+ "anli_r3": 0,
29
+ "cb": 1,
30
+ "copa": 0
31
+ }
32
+ }
2b855b11boscarseed1/evaluation/rankeval/2b855b11boscarseed1_5_lm-eval_global_step52452_2023-02-24-23-14-05_5shots_backup.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.357,
5
+ "acc_stderr": 0.015158521721486773
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.352,
9
+ "acc_stderr": 0.015110404505648666
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.33916666666666667,
13
+ "acc_stderr": 0.013672343491681822
14
+ },
15
+ "cb": {
16
+ "acc": 0.5178571428571429,
17
+ "acc_stderr": 0.06737697508644647,
18
+ "f1": 0.33024815560026827
19
+ },
20
+ "copa": {
21
+ "acc": 0.65,
22
+ "acc_stderr": 0.047937248544110196
23
+ }
24
+ },
25
+ "versions": {
26
+ "anli_r1": 0,
27
+ "anli_r2": 0,
28
+ "anli_r3": 0,
29
+ "cb": 1,
30
+ "copa": 0
31
+ }
32
+ }
2b855b11boscarseed1/global_step52452/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:350e38754e82e580293a61dbd3d97e2c366e8bfe97a7a18cd75ea401bdeb59c8
3
+ size 131677719