Commit
·
123ca89
1
Parent(s):
bbf6528
Add files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +230 -0
- 2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_0.json +1 -0
- 2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_1.json +1 -0
- 2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_2.json +1 -0
- 2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_3.json +1 -0
- 2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_4.json +1 -0
- 2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_5.json +1 -0
- 2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_0.json +1 -0
- 2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_1.json +1 -0
- 2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_2.json +1 -0
- 2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_3.json +1 -0
- 2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_4.json +1 -0
- 2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_5.json +1 -0
- 2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_3.json +1 -0
- 2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_4.json +1 -0
- 2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_5.json +1 -0
- 2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_gem_xsum_article_DOC_summary_3.json +1 -0
- 2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_gem_xsum_article_DOC_summary_4.json +1 -0
- 2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_gem_xsum_article_DOC_summary_5.json +1 -0
- 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_0.jsonl +3 -0
- 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_1.jsonl +3 -0
- 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_2.jsonl +3 -0
- 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_3.jsonl +3 -0
- 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_4.jsonl +3 -0
- 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_5.jsonl +3 -0
- 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_0.jsonl +3 -0
- 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_1.jsonl +3 -0
- 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_2.jsonl +3 -0
- 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_3.jsonl +3 -0
- 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_4.jsonl +3 -0
- 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_5.jsonl +3 -0
- 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl +3 -0
- 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl +3 -0
- 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl +3 -0
- 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_3.jsonl +3 -0
- 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_4.jsonl +3 -0
- 2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_5.jsonl +3 -0
- 2b855b11boscarseed1/evaluation/generation/merged.csv +53 -0
- 2b855b11boscarseed1/evaluation/generation/merged.json +1 -0
- 2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_0.json +133 -0
- 2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_1.json +133 -0
- 2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_2.json +133 -0
- 2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_3.json +133 -0
- 2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_4.json +133 -0
- 2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_5.json +133 -0
- 2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_0.json +133 -0
- 2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_1.json +133 -0
- 2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_2.json +133 -0
- 2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_3.json +133 -0
- 2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_4.json +133 -0
.gitattributes
CHANGED
@@ -249,3 +249,233 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
249 |
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
250 |
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
251 |
2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
250 |
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
251 |
2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
252 |
+
2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
253 |
+
2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
254 |
+
2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
255 |
+
2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
256 |
+
2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
257 |
+
2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
258 |
+
2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
259 |
+
2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
260 |
+
2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
261 |
+
2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
262 |
+
2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
263 |
+
2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
264 |
+
2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
265 |
+
2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
266 |
+
2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
267 |
+
2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
268 |
+
2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
269 |
+
2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
270 |
+
2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
271 |
+
2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
272 |
+
2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
273 |
+
2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
274 |
+
2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
275 |
+
2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
276 |
+
2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
277 |
+
2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
278 |
+
2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
279 |
+
2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
280 |
+
2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
281 |
+
2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
282 |
+
2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
283 |
+
2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
284 |
+
2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
285 |
+
2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
286 |
+
2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
287 |
+
2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
288 |
+
2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
289 |
+
2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
290 |
+
2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
291 |
+
2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
292 |
+
2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
293 |
+
2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
294 |
+
2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
295 |
+
2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
296 |
+
2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
297 |
+
2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
298 |
+
2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
299 |
+
2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
300 |
+
2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
301 |
+
2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
302 |
+
2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
303 |
+
2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
304 |
+
2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
305 |
+
2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
306 |
+
2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
307 |
+
2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
308 |
+
2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
309 |
+
2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
310 |
+
2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
311 |
+
2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
312 |
+
2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
313 |
+
2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
314 |
+
2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
315 |
+
2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
316 |
+
2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
317 |
+
2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
318 |
+
2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
319 |
+
2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
320 |
+
2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
321 |
+
2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
322 |
+
2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
323 |
+
2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
324 |
+
2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
325 |
+
2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
326 |
+
2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
327 |
+
2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
328 |
+
2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
329 |
+
2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
330 |
+
2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
331 |
+
2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
332 |
+
2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
333 |
+
2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
334 |
+
2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
335 |
+
2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
336 |
+
2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
337 |
+
2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
338 |
+
2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
339 |
+
2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
340 |
+
2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
341 |
+
2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
342 |
+
2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
343 |
+
2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
344 |
+
2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
345 |
+
2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
346 |
+
2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
347 |
+
2b855b11boscarseed2/evaluation/generation/examples.2b855b11boscarseed2_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
348 |
+
2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
349 |
+
2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
350 |
+
2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
351 |
+
2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
352 |
+
2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
353 |
+
2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
354 |
+
2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
355 |
+
2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
356 |
+
2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
357 |
+
2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
358 |
+
2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
359 |
+
2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
360 |
+
2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
361 |
+
2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
362 |
+
2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
363 |
+
2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
364 |
+
2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
365 |
+
2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
366 |
+
2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
367 |
+
2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
368 |
+
2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
369 |
+
2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
370 |
+
2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
371 |
+
2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
372 |
+
2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
373 |
+
2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
374 |
+
2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
375 |
+
2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
376 |
+
2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
377 |
+
2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
378 |
+
2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
379 |
+
2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
380 |
+
2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
381 |
+
2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
382 |
+
2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
383 |
+
2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
384 |
+
2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
385 |
+
2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
386 |
+
2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
387 |
+
2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
388 |
+
2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
389 |
+
2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
390 |
+
2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
391 |
+
2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
392 |
+
2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
393 |
+
2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
394 |
+
2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
395 |
+
2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
396 |
+
2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
397 |
+
2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
398 |
+
2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
399 |
+
2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
400 |
+
2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
401 |
+
2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
402 |
+
2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
403 |
+
2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
404 |
+
2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
405 |
+
2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
406 |
+
2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
407 |
+
2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
408 |
+
2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
409 |
+
2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
410 |
+
2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
411 |
+
2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
412 |
+
2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
413 |
+
2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
414 |
+
2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
415 |
+
2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
416 |
+
2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
417 |
+
2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
418 |
+
2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
419 |
+
2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
420 |
+
2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
421 |
+
2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
422 |
+
2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
423 |
+
2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
424 |
+
2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
425 |
+
2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
426 |
+
2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
427 |
+
2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
428 |
+
2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
429 |
+
2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
430 |
+
2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
431 |
+
2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
432 |
+
2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
433 |
+
2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
434 |
+
2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
435 |
+
2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
436 |
+
2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
437 |
+
2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
438 |
+
2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
439 |
+
2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
440 |
+
2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
441 |
+
2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
442 |
+
2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
443 |
+
2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
444 |
+
2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
445 |
+
2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
446 |
+
2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
447 |
+
2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
448 |
+
2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
449 |
+
2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
450 |
+
2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
451 |
+
2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
452 |
+
2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
453 |
+
2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
454 |
+
2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
455 |
+
2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
456 |
+
2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
457 |
+
2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
458 |
+
2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
459 |
+
2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
460 |
+
2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
461 |
+
2b855b14boscarseed2/evaluation/generation/examples.2b855b14boscarseed2_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
462 |
+
2b855b14boscarseed3/evaluation/generation/examples.2b855b14boscarseed3_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
463 |
+
2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
464 |
+
2b855b14boscarseed1/evaluation/generation/examples.2b855b14boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
465 |
+
2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
466 |
+
2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
467 |
+
2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
468 |
+
2b855b18boscarseed1/evaluation/generation/examples.2b855b18boscarseed1_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
469 |
+
2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
470 |
+
2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
471 |
+
2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
472 |
+
2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
473 |
+
2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
474 |
+
2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
475 |
+
2b855b18boscarseed3/evaluation/generation/examples.2b855b18boscarseed3_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
476 |
+
2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
477 |
+
2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
478 |
+
2b855b14boscarseed4/evaluation/generation/examples.2b855b14boscarseed4_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
479 |
+
2b855b18boscarseed2/evaluation/generation/examples.2b855b18boscarseed2_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
480 |
+
2b855b11boscarseed3/evaluation/generation/examples.2b855b11boscarseed3_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
481 |
+
2b855b11boscarseed4/evaluation/generation/examples.2b855b11boscarseed4_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_0.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.3801106976509281, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.030814977906570784}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.09254608062110982, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003969016029393206}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.2514658272132534, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005470193417734038}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.09839951058099769, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0024506647455068047}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.04251830138142587, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0025980589101773847}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.11876788025878558, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0033890059840335}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.04579430308932667, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0014988641201888384}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.08735615223930215, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0037276787142384374}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.24225619592357273, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.005268443646117898}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.09334972395562345, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0022379508481899127}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.08738746346078487, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003771697087126988}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.23626985324693767, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.005054786193539789}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.09257280431668473, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0022654509452080854}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_1.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.5239721644834189, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.03609798297687262}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.13082237107727746, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0041887696056868914}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.29894851591422195, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0051748543299946}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.14826158596606656, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0034048406078764424}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.06041678931722799, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0025853119808927536}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.1438349096175946, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0034922545459490128}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.06901771335292758, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0020860447213003814}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.11585043732455273, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003667689089676262}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.27586821042029314, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004795036002022945}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.13229541662919794, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002864700499056504}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.11910114676043697, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0037751935052806325}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.27899773980715475, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004795091819308235}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.13555256797626594, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002979591459721934}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_2.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.6155617098906994, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04320711987914936}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.15764962909670655, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004811814678098913}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.3156619362796641, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005146634895664645}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.16893359766104024, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0038194664000741757}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.07547310729323965, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002923357742977468}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.16143870017973952, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0037206349376128402}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.08285140079808971, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0024706623511134077}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.1391887515979127, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.004157638480121235}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.292888997189622, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004754219104081643}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.15099445149853782, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0032341025124660036}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.14269660006491244, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.004284849569606708}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.29574913885784265, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004809657463857144}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.1540461900906591, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.003343046732804547}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_3.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.7638442842916376, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.043708542511138676}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.17306675943358993, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.005175271126864724}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.3366968869783645, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0052711987086574656}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.18548340026259344, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.004223744602181456}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.08724475546238586, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.003316626156011152}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.17335505514005853, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003911222192069402}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.09385858633035163, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0028582868714185404}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.15183832422624116, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.004487449653529665}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.30970296664886204, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0048061266737124434}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.16463283674108964, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0035889811954755296}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.15662013259115215, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.004651735217163702}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3138687175842859, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004862972063876745}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.16864351715068068, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0037107680564301263}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_4.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.8719331256074123, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.057879221179053614}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.1851425968057546, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.005242772266486215}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.3521224313867968, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005038061225917597}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.19456487112141713, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.004109079813244224}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.09493067862691192, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0033973584763323903}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.18378368228130243, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003782300251501581}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.10023987500778037, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00282151252673031}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.163782526391218, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.004592058011452551}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.3247189913662455, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004610930009186234}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.17333927997130413, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.003493985482674142}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.16903111233990037, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.004770080380107174}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.32924041133628895, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004661708271273997}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.17795216339276917, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0036338202974834652}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_5.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.9071599045860481, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04132800578881959}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.19439128617510687, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.005421482683712362}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.360346127905188, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005185217658659474}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.20471471819889542, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.004366365024283305}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.10302514882290766, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0036082227594639027}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.19046117331587684, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0038953705603308754}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.10719970952871183, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00298368361864098}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.1710926703224802, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.004698450665775182}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.33314261355257685, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004754137888970346}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.18231989678501748, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.00369484632221736}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.17698148297717947, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.004898716157488609}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3375011370591266, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0047979049681730265}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.18722830056278536, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0038488776142113694}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_0.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.1476470442480304, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0027640730190324147}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.1987093879718628, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0029637609362227523}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.14881520480086885, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021587431282467824}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.0287482827215408, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009244009986468182}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.04207438489658476, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0013026460720942934}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.030793880936937773, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008589680063776107}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.11919226323771576, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0023730668962417723}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.1605716758450966, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002356213726056233}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.11844891250439296, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016052290609412721}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.13689809260299124, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002643420711314581}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.18318434106372952, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002750769375883103}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.1370648388242123, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019890326379503644}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.7689251791579401, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08056354651616494}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_1.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.22376650173536908, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0033099883221279717}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.233044248806137, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002952305065889663}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.19163795576049275, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002095941856037326}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.0566885511786849, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.001922144512262528}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.054244362067460944, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0015009900283382725}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.04458039177873789, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001138994780925126}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.17115197989214356, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002730566360378985}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.1753866049193498, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0022597816497720055}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.14368563344942828, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015580386025042744}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.20943173970561998, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003140748148754858}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.21808332623771157, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002768104713925207}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.17897651689986416, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019496135779428591}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 2.690348502752936, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04816844705930497}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_2.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.29318751960337, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003906796008685731}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.2507046446902222, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0029715006311925958}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.22003513847299133, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021076546284530782}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.08627504991656038, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002487880526812255}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.06676278485251605, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0016295916757062398}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.058544685834468764, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012428137662634153}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.22625403204772632, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003335160888542255}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.187612565720097, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0022917340918039352}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.1646868100175827, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015819419652510194}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.27427864211994335, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0037481794388014565}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.23337797750912687, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002789848859667374}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.20484462592482003, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019789253350355387}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 3.431375370657664, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04760571110301349}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_3.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.2680095180907205, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004391894212593074}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.19475234917332057, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003183500503824711}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.17923839306645714, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002399847035078048}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.07985656912272211, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0026354691331866935}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.05177357998459295, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0014956995793890587}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.047670044472559195, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012123884267024713}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.21163784293597465, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003752063265513303}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.14785575418501767, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002456148738115927}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.13674199106912197, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018380053950085664}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.25180299854737964, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.004210636657378989}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.18186826651932245, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002992911142368979}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.16742344251678107, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0022503819271764707}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 2.483185496130131, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08015886448893912}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_4.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.09408573017762623, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0036374250034598983}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.062322473280787063, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0024278586980927977}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.058650885364296294, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020654868574018297}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.031658574230410935, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002078738928518879}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.017760904175768037, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001047213466542941}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.01681560917128696, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008901092196544399}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.0777613311142758, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0031390664493088517}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.0487201551142007, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0018838472253448453}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.04642380293882198, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016260955707942933}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.0887232057330063, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0034775731184571623}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.05793373755686782, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002257623454700228}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.05466679297892778, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019233345508114297}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.07172095791495998, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.00789659723575077}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_5.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.01565502170194915, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0016289778088393908}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.009727975798126835, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.001037130264924102}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.009287298413081357, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0008981455376742198}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.005637375900266298, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008792716431913606}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.0026767834375694384, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0003637992491962634}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.0027003256417814083, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00035332419738820606}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.013183113805468402, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0014306544932015776}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.007603533275554497, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0008034168084209064}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.0074307876775269515, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0007182295177501359}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.014972493827476105, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0015795900577164992}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.009084241224624135, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0009715729115244676}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.008708338368004326, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0008427383878531978}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 3.989972548640045e-14, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 3.805866896172829e-13}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_3.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 14.083744966873544, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.13510527315117016}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5736572827150358, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0032384910127830125}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.45360607830143157, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00292439830442952}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.4820649513030502, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002281818602228921}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.2890815394423686, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0027576555191893308}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.2258474956058803, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0023037492151092706}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.23993823867723907, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.002119970027404284}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.4312357546460138, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003024393522392191}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3384918202097905, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0025049483648365892}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.360441051331739, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0021288701320219035}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.4832111553759888, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0032277890930449953}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.38118510800546923, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0027860575186553676}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.4054184452840819, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0023556882595847915}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_4.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 14.517833527253206, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.15177793846842857}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5749779290519293, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0032341752537122363}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.45475966418004093, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0028643752694128203}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.48422210730223353, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002254899743809727}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.2940709470644466, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0028229255550865045}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.22940149257285175, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002309314382978281}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.24433665653182082, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0021460412936580016}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.4319655914057995, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003014148626675866}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.33988537212359804, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002502036523908583}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.3623534452047717, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0021481098655366693}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.4850678394646162, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003225531635325354}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.3834152683878887, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002767686832931401}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.4082890917863408, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0023637430000640375}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_5.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 14.324593922782439, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.15241024294192065}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5781515372518692, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003222285163472006}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.4515334921668262, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0028702658791373904}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.48388952068811447, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002269811342308489}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.2959581192468744, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002871716283121458}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.22736711269872564, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0023134890859256136}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.2439106267338791, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0021758038765035117}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.43626029829832247, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.00305806886721717}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3384883136722747, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0024808236614542274}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.36343854325074254, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002159899813151206}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.4884840799940051, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003239765144780713}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.38127900697364553, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0027775578392814186}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.40862207341517703, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0023818673387890456}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_gem_xsum_article_DOC_summary_3.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.1422502333933988, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0024472995402754156}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.30983358436212066, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0046212886851808195}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.18666478200505243, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002739760762211627}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.027222625611619575, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0010616231746636696}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.06332962384389315, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002357369300824623}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.03663776309033808, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0013441419160810165}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.10615716840760141, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0018146571946823608}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.23288499948545, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003534724466115114}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.1393564669391015, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0020056393472558214}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.11259274644929758, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0019647845845397575}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.24767381751028186, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003919141916162093}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.14799512098760817, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0022200856382967975}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.392324866942817, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.10333126789792332}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_gem_xsum_article_DOC_summary_4.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.04678909556843838, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0029899451023121886}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.08036818701688478, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004523562391482061}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.052736675331581245, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0028824641785999464}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.009587305236726377, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.001262427047456841}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.01705350789198973, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0015073512113313507}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.010697176811020805, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009584744397015601}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.03558582877518959, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0024020712090389224}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.06057475710697018, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0034243280850370624}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.039672294104886395, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002180086549370051}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.03769649897971859, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0025149825952445427}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.06477261558492757, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0037095733376953295}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.0423806306620413, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0023629304701070007}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.7258559232581546, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.10507057432049555}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b11boscarseed1/evaluation/generation/agg.2b855b11boscarseed1_gem_xsum_article_DOC_summary_5.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.00279871811503332, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0008181720087639885}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.002210852921667167, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0006418342106197504}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.0024263300495769666, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0007044351988650494}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.00025897151313355527, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00011577315282337257}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.00019125158969776062, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 8.696066491461003e-05}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.00021830317883798518, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 9.811329959110373e-05}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.0020472883264226207, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0005641122660611984}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.0016493867049964376, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.00045298383898349467}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.001790621018457292, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0004902962943570475}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.0022311468898594308, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0006382468727242592}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.00179354722651545, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0005153565164091152}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.0019503315839039083, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0005574612507559669}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.1269861498697588e-39, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 5.474659303973499e-34}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_0.jsonl
CHANGED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57a5ef177c04bd68fcea8a3c1510172960f297e46a1cdbbfd6e2dfcef9d5b042
|
3 |
+
size 3837292
|
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_1.jsonl
CHANGED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3fc4c06f3b499deb995437419652e3b035ba3335eee17d3448a372b352bda78c
|
3 |
+
size 4717607
|
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_2.jsonl
CHANGED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3122da653c4d8121ab578c301dfdbef8faada95ef5d68f25223abeae853925e1
|
3 |
+
size 5557036
|
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_3.jsonl
CHANGED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:32c59eb0315bb94f710bef270d51edeeef82dbbc1e4b4f87039b9865c8b5cd7a
|
3 |
+
size 6464001
|
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_4.jsonl
CHANGED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0fc4edd863510066fd085c5f44f64f27460332d51d6b7909ad1407c50ed6abbd
|
3 |
+
size 7346922
|
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_5.jsonl
CHANGED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7ff66e93dcd2c5ceeda19a6c748198fc03740794dc2e7332a617477a43ed6e73
|
3 |
+
size 8236047
|
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_0.jsonl
CHANGED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c0e5544079dd758916c08d8abb59f8e59cb197b649cb375a01b1397cf0011b6
|
3 |
+
size 7525283
|
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_1.jsonl
CHANGED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1548d672bbf50f85117bd5d14c541c8d1d50acc09064d7470ba4f87ba9f1b3cc
|
3 |
+
size 13085418
|
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_2.jsonl
CHANGED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2e1918223eb02df317c99cf02e97d2a259cd10124e183adca6568e03627adacc
|
3 |
+
size 18628284
|
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_3.jsonl
CHANGED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1c0df3ac36cf02328cc932fd5aa3b193394ce8d4ee4a658a20765d5d2f513f6
|
3 |
+
size 24052227
|
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_4.jsonl
CHANGED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:69d35a9d7dbd698fc11d554aa58f44a10005430c73d6a7e7898b66c7d28743aa
|
3 |
+
size 29371822
|
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_5.jsonl
CHANGED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80e7a43ce56e97a4accf09f438b275f2db53838d3733afedd585d07d6875e4f2
|
3 |
+
size 34783939
|
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl
CHANGED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0af222e1f3a7d6de15bef8b102e4b0137c59cb9b9a9446da1942ffb45462dfc1
|
3 |
+
size 7175409
|
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl
CHANGED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba20fae0860784720f6249a963764b73a4ddc769f425480db554c1edf4a5c787
|
3 |
+
size 8254125
|
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl
CHANGED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06c063ff506ba0adbe7b0b8b934c5e8232cf3ae663faf352409510b886d6c777
|
3 |
+
size 9337214
|
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_3.jsonl
CHANGED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9480dc6730f5e9c64debd64cec332ef568120a71cc48ece316c1af8f7f9b5f0
|
3 |
+
size 9626426
|
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_4.jsonl
CHANGED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7347292c96bb4c023ea387a73208bcc1659ef87175649a15d64fe169105e5fb5
|
3 |
+
size 11668577
|
2b855b11boscarseed1/evaluation/generation/examples.2b855b11boscarseed1_gem_xsum_article_DOC_summary_5.jsonl
CHANGED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f95019b418a440e683993690c37173ae15cbebb47caedfe6273d0eab3dba7b75
|
3 |
+
size 13897460
|
2b855b11boscarseed1/evaluation/generation/merged.csv
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset,fewshots,prompt,metric,value
|
2 |
+
e2e_nlg_cleaned,0,generate_text_restaurant,rouge2_fmeasure,0.02782065901980275
|
3 |
+
e2e_nlg_cleaned,0,median,rouge2_fmeasure,0.02782065901980275
|
4 |
+
e2e_nlg_cleaned,1,generate_text_restaurant,rouge2_fmeasure,0.21501029566008578
|
5 |
+
e2e_nlg_cleaned,1,median,rouge2_fmeasure,0.21501029566008578
|
6 |
+
e2e_nlg_cleaned,2,generate_text_restaurant,rouge2_fmeasure,0.2351759267020243
|
7 |
+
e2e_nlg_cleaned,2,median,rouge2_fmeasure,0.2351759267020243
|
8 |
+
e2e_nlg_cleaned,3,generate_text_restaurant,rouge2_fmeasure,0.23993823867723907
|
9 |
+
e2e_nlg_cleaned,3,median,rouge2_fmeasure,0.23993823867723907
|
10 |
+
e2e_nlg_cleaned,4,generate_text_restaurant,rouge2_fmeasure,0.24433665653182082
|
11 |
+
e2e_nlg_cleaned,4,median,rouge2_fmeasure,0.24433665653182082
|
12 |
+
e2e_nlg_cleaned,5,generate_text_restaurant,rouge2_fmeasure,0.2439106267338791
|
13 |
+
e2e_nlg_cleaned,5,median,rouge2_fmeasure,0.2439106267338791
|
14 |
+
e2e_nlg_cleaned,5,average,multiple,0.20103206722080863
|
15 |
+
gem_xsum,0,article_DOC_summary,rouge2_fmeasure,0.03971064304403434
|
16 |
+
gem_xsum,0,median,rouge2_fmeasure,0.03971064304403434
|
17 |
+
gem_xsum,1,article_DOC_summary,rouge2_fmeasure,0.03654673156140313
|
18 |
+
gem_xsum,1,median,rouge2_fmeasure,0.03654673156140313
|
19 |
+
gem_xsum,2,article_DOC_summary,rouge2_fmeasure,0.03789731741399596
|
20 |
+
gem_xsum,2,median,rouge2_fmeasure,0.03789731741399596
|
21 |
+
gem_xsum,3,article_DOC_summary,rouge2_fmeasure,0.03663776309033808
|
22 |
+
gem_xsum,3,median,rouge2_fmeasure,0.03663776309033808
|
23 |
+
gem_xsum,4,article_DOC_summary,rouge2_fmeasure,0.010697176811020805
|
24 |
+
gem_xsum,4,median,rouge2_fmeasure,0.010697176811020805
|
25 |
+
gem_xsum,5,article_DOC_summary,rouge2_fmeasure,0.00021830317883798518
|
26 |
+
gem_xsum,5,median,rouge2_fmeasure,0.00021830317883798518
|
27 |
+
gem_xsum,5,average,multiple,0.02695132251660505
|
28 |
+
web_nlg_en,0,PALM_prompt,rouge2_fmeasure,0.04579430308932667
|
29 |
+
web_nlg_en,0,median,rouge2_fmeasure,0.04579430308932667
|
30 |
+
web_nlg_en,1,PALM_prompt,rouge2_fmeasure,0.06901771335292758
|
31 |
+
web_nlg_en,1,median,rouge2_fmeasure,0.06901771335292758
|
32 |
+
web_nlg_en,2,PALM_prompt,rouge2_fmeasure,0.08285140079808971
|
33 |
+
web_nlg_en,2,median,rouge2_fmeasure,0.08285140079808971
|
34 |
+
web_nlg_en,3,PALM_prompt,rouge2_fmeasure,0.09385858633035163
|
35 |
+
web_nlg_en,3,median,rouge2_fmeasure,0.09385858633035163
|
36 |
+
web_nlg_en,4,PALM_prompt,rouge2_fmeasure,0.10023987500778037
|
37 |
+
web_nlg_en,4,median,rouge2_fmeasure,0.10023987500778037
|
38 |
+
web_nlg_en,5,PALM_prompt,rouge2_fmeasure,0.10719970952871183
|
39 |
+
web_nlg_en,5,median,rouge2_fmeasure,0.10719970952871183
|
40 |
+
web_nlg_en,5,average,multiple,0.0831602646845313
|
41 |
+
wiki_lingua_en,0,tldr_en,rouge2_fmeasure,0.030793880936937773
|
42 |
+
wiki_lingua_en,0,median,rouge2_fmeasure,0.030793880936937773
|
43 |
+
wiki_lingua_en,1,tldr_en,rouge2_fmeasure,0.04458039177873789
|
44 |
+
wiki_lingua_en,1,median,rouge2_fmeasure,0.04458039177873789
|
45 |
+
wiki_lingua_en,2,tldr_en,rouge2_fmeasure,0.058544685834468764
|
46 |
+
wiki_lingua_en,2,median,rouge2_fmeasure,0.058544685834468764
|
47 |
+
wiki_lingua_en,3,tldr_en,rouge2_fmeasure,0.047670044472559195
|
48 |
+
wiki_lingua_en,3,median,rouge2_fmeasure,0.047670044472559195
|
49 |
+
wiki_lingua_en,4,tldr_en,rouge2_fmeasure,0.01681560917128696
|
50 |
+
wiki_lingua_en,4,median,rouge2_fmeasure,0.01681560917128696
|
51 |
+
wiki_lingua_en,5,tldr_en,rouge2_fmeasure,0.0027003256417814083
|
52 |
+
wiki_lingua_en,5,median,rouge2_fmeasure,0.0027003256417814083
|
53 |
+
wiki_lingua_en,5,average,multiple,0.03351748963929533
|
2b855b11boscarseed1/evaluation/generation/merged.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"GEM/web_nlg_en": {"0": {"PALM_prompt": {"bleu": 0.3801106976509281, "bleu_stderr": 0.030814977906570784, "rouge1_fmeasure": 0.09839951058099769, "rouge1_fmeasure_stderr": 0.0024506647455068047, "rouge1_precision": 0.09254608062110982, "rouge1_precision_stderr": 0.003969016029393206, "rouge1_recall": 0.2514658272132534, "rouge1_recall_stderr": 0.005470193417734038, "rouge2_fmeasure": 0.04579430308932667, "rouge2_fmeasure_stderr": 0.0014988641201888384, "rouge2_precision": 0.04251830138142587, "rouge2_precision_stderr": 0.0025980589101773847, "rouge2_recall": 0.11876788025878558, "rouge2_recall_stderr": 0.0033890059840335, "rougeL_fmeasure": 0.09334972395562345, "rougeL_fmeasure_stderr": 0.0022379508481899127, "rougeL_precision": 0.08735615223930215, "rougeL_precision_stderr": 0.0037276787142384374, "rougeL_recall": 0.24225619592357273, "rougeL_recall_stderr": 0.005268443646117898, "rougeLsum_fmeasure": 0.09257280431668473, "rougeLsum_fmeasure_stderr": 0.0022654509452080854, "rougeLsum_precision": 0.08738746346078487, "rougeLsum_precision_stderr": 0.003771697087126988, "rougeLsum_recall": 0.23626985324693767, "rougeLsum_recall_stderr": 0.005054786193539789}}, "1": {"PALM_prompt": {"bleu": 0.5239721644834189, "bleu_stderr": 0.03609798297687262, "rouge1_fmeasure": 0.14826158596606656, "rouge1_fmeasure_stderr": 0.0034048406078764424, "rouge1_precision": 0.13082237107727746, "rouge1_precision_stderr": 0.0041887696056868914, "rouge1_recall": 0.29894851591422195, "rouge1_recall_stderr": 0.0051748543299946, "rouge2_fmeasure": 0.06901771335292758, "rouge2_fmeasure_stderr": 0.0020860447213003814, "rouge2_precision": 0.06041678931722799, "rouge2_precision_stderr": 0.0025853119808927536, "rouge2_recall": 0.1438349096175946, "rouge2_recall_stderr": 0.0034922545459490128, "rougeL_fmeasure": 0.13229541662919794, "rougeL_fmeasure_stderr": 0.002864700499056504, "rougeL_precision": 0.11585043732455273, "rougeL_precision_stderr": 0.003667689089676262, "rougeL_recall": 0.27586821042029314, "rougeL_recall_stderr": 0.004795036002022945, "rougeLsum_fmeasure": 0.13555256797626594, "rougeLsum_fmeasure_stderr": 0.002979591459721934, "rougeLsum_precision": 0.11910114676043697, "rougeLsum_precision_stderr": 0.0037751935052806325, "rougeLsum_recall": 0.27899773980715475, "rougeLsum_recall_stderr": 0.004795091819308235}}, "2": {"PALM_prompt": {"bleu": 0.6155617098906994, "bleu_stderr": 0.04320711987914936, "rouge1_fmeasure": 0.16893359766104024, "rouge1_fmeasure_stderr": 0.0038194664000741757, "rouge1_precision": 0.15764962909670655, "rouge1_precision_stderr": 0.004811814678098913, "rouge1_recall": 0.3156619362796641, "rouge1_recall_stderr": 0.005146634895664645, "rouge2_fmeasure": 0.08285140079808971, "rouge2_fmeasure_stderr": 0.0024706623511134077, "rouge2_precision": 0.07547310729323965, "rouge2_precision_stderr": 0.002923357742977468, "rouge2_recall": 0.16143870017973952, "rouge2_recall_stderr": 0.0037206349376128402, "rougeL_fmeasure": 0.15099445149853782, "rougeL_fmeasure_stderr": 0.0032341025124660036, "rougeL_precision": 0.1391887515979127, "rougeL_precision_stderr": 0.004157638480121235, "rougeL_recall": 0.292888997189622, "rougeL_recall_stderr": 0.004754219104081643, "rougeLsum_fmeasure": 0.1540461900906591, "rougeLsum_fmeasure_stderr": 0.003343046732804547, "rougeLsum_precision": 0.14269660006491244, "rougeLsum_precision_stderr": 0.004284849569606708, "rougeLsum_recall": 0.29574913885784265, "rougeLsum_recall_stderr": 0.004809657463857144}}, "3": {"PALM_prompt": {"bleu": 0.7638442842916376, "bleu_stderr": 0.043708542511138676, "rouge1_fmeasure": 0.18548340026259344, "rouge1_fmeasure_stderr": 0.004223744602181456, "rouge1_precision": 0.17306675943358993, "rouge1_precision_stderr": 0.005175271126864724, "rouge1_recall": 0.3366968869783645, "rouge1_recall_stderr": 0.0052711987086574656, "rouge2_fmeasure": 0.09385858633035163, "rouge2_fmeasure_stderr": 0.0028582868714185404, "rouge2_precision": 0.08724475546238586, "rouge2_precision_stderr": 0.003316626156011152, "rouge2_recall": 0.17335505514005853, "rouge2_recall_stderr": 0.003911222192069402, "rougeL_fmeasure": 0.16463283674108964, "rougeL_fmeasure_stderr": 0.0035889811954755296, "rougeL_precision": 0.15183832422624116, "rougeL_precision_stderr": 0.004487449653529665, "rougeL_recall": 0.30970296664886204, "rougeL_recall_stderr": 0.0048061266737124434, "rougeLsum_fmeasure": 0.16864351715068068, "rougeLsum_fmeasure_stderr": 0.0037107680564301263, "rougeLsum_precision": 0.15662013259115215, "rougeLsum_precision_stderr": 0.004651735217163702, "rougeLsum_recall": 0.3138687175842859, "rougeLsum_recall_stderr": 0.004862972063876745}}, "4": {"PALM_prompt": {"bleu": 0.8719331256074123, "bleu_stderr": 0.057879221179053614, "rouge1_fmeasure": 0.19456487112141713, "rouge1_fmeasure_stderr": 0.004109079813244224, "rouge1_precision": 0.1851425968057546, "rouge1_precision_stderr": 0.005242772266486215, "rouge1_recall": 0.3521224313867968, "rouge1_recall_stderr": 0.005038061225917597, "rouge2_fmeasure": 0.10023987500778037, "rouge2_fmeasure_stderr": 0.00282151252673031, "rouge2_precision": 0.09493067862691192, "rouge2_precision_stderr": 0.0033973584763323903, "rouge2_recall": 0.18378368228130243, "rouge2_recall_stderr": 0.003782300251501581, "rougeL_fmeasure": 0.17333927997130413, "rougeL_fmeasure_stderr": 0.003493985482674142, "rougeL_precision": 0.163782526391218, "rougeL_precision_stderr": 0.004592058011452551, "rougeL_recall": 0.3247189913662455, "rougeL_recall_stderr": 0.004610930009186234, "rougeLsum_fmeasure": 0.17795216339276917, "rougeLsum_fmeasure_stderr": 0.0036338202974834652, "rougeLsum_precision": 0.16903111233990037, "rougeLsum_precision_stderr": 0.004770080380107174, "rougeLsum_recall": 0.32924041133628895, "rougeLsum_recall_stderr": 0.004661708271273997}}, "5": {"PALM_prompt": {"bleu": 0.9071599045860481, "bleu_stderr": 0.04132800578881959, "rouge1_fmeasure": 0.20471471819889542, "rouge1_fmeasure_stderr": 0.004366365024283305, "rouge1_precision": 0.19439128617510687, "rouge1_precision_stderr": 0.005421482683712362, "rouge1_recall": 0.360346127905188, "rouge1_recall_stderr": 0.005185217658659474, "rouge2_fmeasure": 0.10719970952871183, "rouge2_fmeasure_stderr": 0.00298368361864098, "rouge2_precision": 0.10302514882290766, "rouge2_precision_stderr": 0.0036082227594639027, "rouge2_recall": 0.19046117331587684, "rouge2_recall_stderr": 0.0038953705603308754, "rougeL_fmeasure": 0.18231989678501748, "rougeL_fmeasure_stderr": 0.00369484632221736, "rougeL_precision": 0.1710926703224802, "rougeL_precision_stderr": 0.004698450665775182, "rougeL_recall": 0.33314261355257685, "rougeL_recall_stderr": 0.004754137888970346, "rougeLsum_fmeasure": 0.18722830056278536, "rougeLsum_fmeasure_stderr": 0.0038488776142113694, "rougeLsum_precision": 0.17698148297717947, "rougeLsum_precision_stderr": 0.004898716157488609, "rougeLsum_recall": 0.3375011370591266, "rougeLsum_recall_stderr": 0.0047979049681730265}}}, "GEM/wiki_lingua_en": {"0": {"tldr_en": {"bleu": 1.7689251791579401, "bleu_stderr": 0.08056354651616494, "rouge1_fmeasure": 0.14881520480086885, "rouge1_fmeasure_stderr": 0.0021587431282467824, "rouge1_precision": 0.1476470442480304, "rouge1_precision_stderr": 0.0027640730190324147, "rouge1_recall": 0.1987093879718628, "rouge1_recall_stderr": 0.0029637609362227523, "rouge2_fmeasure": 0.030793880936937773, "rouge2_fmeasure_stderr": 0.0008589680063776107, "rouge2_precision": 0.0287482827215408, "rouge2_precision_stderr": 0.0009244009986468182, "rouge2_recall": 0.04207438489658476, "rouge2_recall_stderr": 0.0013026460720942934, "rougeL_fmeasure": 0.11844891250439296, "rougeL_fmeasure_stderr": 0.0016052290609412721, "rougeL_precision": 0.11919226323771576, "rougeL_precision_stderr": 0.0023730668962417723, "rougeL_recall": 0.1605716758450966, "rougeL_recall_stderr": 0.002356213726056233, "rougeLsum_fmeasure": 0.1370648388242123, "rougeLsum_fmeasure_stderr": 0.0019890326379503644, "rougeLsum_precision": 0.13689809260299124, "rougeLsum_precision_stderr": 0.002643420711314581, "rougeLsum_recall": 0.18318434106372952, "rougeLsum_recall_stderr": 0.002750769375883103}}, "1": {"tldr_en": {"bleu": 2.690348502752936, "bleu_stderr": 0.04816844705930497, "rouge1_fmeasure": 0.19163795576049275, "rouge1_fmeasure_stderr": 0.002095941856037326, "rouge1_precision": 0.22376650173536908, "rouge1_precision_stderr": 0.0033099883221279717, "rouge1_recall": 0.233044248806137, "rouge1_recall_stderr": 0.002952305065889663, "rouge2_fmeasure": 0.04458039177873789, "rouge2_fmeasure_stderr": 0.001138994780925126, "rouge2_precision": 0.0566885511786849, "rouge2_precision_stderr": 0.001922144512262528, "rouge2_recall": 0.054244362067460944, "rouge2_recall_stderr": 0.0015009900283382725, "rougeL_fmeasure": 0.14368563344942828, "rougeL_fmeasure_stderr": 0.0015580386025042744, "rougeL_precision": 0.17115197989214356, "rougeL_precision_stderr": 0.002730566360378985, "rougeL_recall": 0.1753866049193498, "rougeL_recall_stderr": 0.0022597816497720055, "rougeLsum_fmeasure": 0.17897651689986416, "rougeLsum_fmeasure_stderr": 0.0019496135779428591, "rougeLsum_precision": 0.20943173970561998, "rougeLsum_precision_stderr": 0.003140748148754858, "rougeLsum_recall": 0.21808332623771157, "rougeLsum_recall_stderr": 0.002768104713925207}}, "2": {"tldr_en": {"bleu": 3.431375370657664, "bleu_stderr": 0.04760571110301349, "rouge1_fmeasure": 0.22003513847299133, "rouge1_fmeasure_stderr": 0.0021076546284530782, "rouge1_precision": 0.29318751960337, "rouge1_precision_stderr": 0.003906796008685731, "rouge1_recall": 0.2507046446902222, "rouge1_recall_stderr": 0.0029715006311925958, "rouge2_fmeasure": 0.058544685834468764, "rouge2_fmeasure_stderr": 0.0012428137662634153, "rouge2_precision": 0.08627504991656038, "rouge2_precision_stderr": 0.002487880526812255, "rouge2_recall": 0.06676278485251605, "rouge2_recall_stderr": 0.0016295916757062398, "rougeL_fmeasure": 0.1646868100175827, "rougeL_fmeasure_stderr": 0.0015819419652510194, "rougeL_precision": 0.22625403204772632, "rougeL_precision_stderr": 0.003335160888542255, "rougeL_recall": 0.187612565720097, "rougeL_recall_stderr": 0.0022917340918039352, "rougeLsum_fmeasure": 0.20484462592482003, "rougeLsum_fmeasure_stderr": 0.0019789253350355387, "rougeLsum_precision": 0.27427864211994335, "rougeLsum_precision_stderr": 0.0037481794388014565, "rougeLsum_recall": 0.23337797750912687, "rougeLsum_recall_stderr": 0.002789848859667374}}, "3": {"tldr_en": {"bleu": 2.483185496130131, "bleu_stderr": 0.08015886448893912, "rouge1_fmeasure": 0.17923839306645714, "rouge1_fmeasure_stderr": 0.002399847035078048, "rouge1_precision": 0.2680095180907205, "rouge1_precision_stderr": 0.004391894212593074, "rouge1_recall": 0.19475234917332057, "rouge1_recall_stderr": 0.003183500503824711, "rouge2_fmeasure": 0.047670044472559195, "rouge2_fmeasure_stderr": 0.0012123884267024713, "rouge2_precision": 0.07985656912272211, "rouge2_precision_stderr": 0.0026354691331866935, "rouge2_recall": 0.05177357998459295, "rouge2_recall_stderr": 0.0014956995793890587, "rougeL_fmeasure": 0.13674199106912197, "rougeL_fmeasure_stderr": 0.0018380053950085664, "rougeL_precision": 0.21163784293597465, "rougeL_precision_stderr": 0.003752063265513303, "rougeL_recall": 0.14785575418501767, "rougeL_recall_stderr": 0.002456148738115927, "rougeLsum_fmeasure": 0.16742344251678107, "rougeLsum_fmeasure_stderr": 0.0022503819271764707, "rougeLsum_precision": 0.25180299854737964, "rougeLsum_precision_stderr": 0.004210636657378989, "rougeLsum_recall": 0.18186826651932245, "rougeLsum_recall_stderr": 0.002992911142368979}}, "4": {"tldr_en": {"bleu": 0.07172095791495998, "bleu_stderr": 0.00789659723575077, "rouge1_fmeasure": 0.058650885364296294, "rouge1_fmeasure_stderr": 0.0020654868574018297, "rouge1_precision": 0.09408573017762623, "rouge1_precision_stderr": 0.0036374250034598983, "rouge1_recall": 0.062322473280787063, "rouge1_recall_stderr": 0.0024278586980927977, "rouge2_fmeasure": 0.01681560917128696, "rouge2_fmeasure_stderr": 0.0008901092196544399, "rouge2_precision": 0.031658574230410935, "rouge2_precision_stderr": 0.002078738928518879, "rouge2_recall": 0.017760904175768037, "rouge2_recall_stderr": 0.001047213466542941, "rougeL_fmeasure": 0.04642380293882198, "rougeL_fmeasure_stderr": 0.0016260955707942933, "rougeL_precision": 0.0777613311142758, "rougeL_precision_stderr": 0.0031390664493088517, "rougeL_recall": 0.0487201551142007, "rougeL_recall_stderr": 0.0018838472253448453, "rougeLsum_fmeasure": 0.05466679297892778, "rougeLsum_fmeasure_stderr": 0.0019233345508114297, "rougeLsum_precision": 0.0887232057330063, "rougeLsum_precision_stderr": 0.0034775731184571623, "rougeLsum_recall": 0.05793373755686782, "rougeLsum_recall_stderr": 0.002257623454700228}}, "5": {"tldr_en": {"bleu": 3.989972548640045e-14, "bleu_stderr": 3.805866896172829e-13, "rouge1_fmeasure": 0.009287298413081357, "rouge1_fmeasure_stderr": 0.0008981455376742198, "rouge1_precision": 0.01565502170194915, "rouge1_precision_stderr": 0.0016289778088393908, "rouge1_recall": 0.009727975798126835, "rouge1_recall_stderr": 0.001037130264924102, "rouge2_fmeasure": 0.0027003256417814083, "rouge2_fmeasure_stderr": 0.00035332419738820606, "rouge2_precision": 0.005637375900266298, "rouge2_precision_stderr": 0.0008792716431913606, "rouge2_recall": 0.0026767834375694384, "rouge2_recall_stderr": 0.0003637992491962634, "rougeL_fmeasure": 0.0074307876775269515, "rougeL_fmeasure_stderr": 0.0007182295177501359, "rougeL_precision": 0.013183113805468402, "rougeL_precision_stderr": 0.0014306544932015776, "rougeL_recall": 0.007603533275554497, "rougeL_recall_stderr": 0.0008034168084209064, "rougeLsum_fmeasure": 0.008708338368004326, "rougeLsum_fmeasure_stderr": 0.0008427383878531978, "rougeLsum_precision": 0.014972493827476105, "rougeLsum_precision_stderr": 0.0015795900577164992, "rougeLsum_recall": 0.009084241224624135, "rougeLsum_recall_stderr": 0.0009715729115244676}}}, "e2e_nlg_cleaned": {"0": {"generate_text_restaurant": {"bleu": 0.8938384110720313, "bleu_stderr": 0.04442740312402193, "rouge1_fmeasure": 0.12367657246465094, "rouge1_fmeasure_stderr": 0.0014697284619829536, "rouge1_precision": 0.09374072562693576, "rouge1_precision_stderr": 0.0012230359583290014, "rouge1_recall": 0.1941308443869652, "rouge1_recall_stderr": 0.0022578323155766997, "rouge2_fmeasure": 0.02782065901980275, "rouge2_fmeasure_stderr": 0.0007104099274964277, "rouge2_precision": 0.020960965090776735, "rouge2_precision_stderr": 0.0006244327243966665, "rouge2_recall": 0.04544625505084656, "rouge2_recall_stderr": 0.0011979457762283947, "rougeL_fmeasure": 0.10860196142327254, "rougeL_fmeasure_stderr": 0.0012302518402255934, "rougeL_precision": 0.08203779654362248, "rougeL_precision_stderr": 0.0010243499554408882, "rougeL_recall": 0.17174001350939913, "rougeL_recall_stderr": 0.0019699700948681647, "rougeLsum_fmeasure": 0.10973134394419841, "rougeLsum_fmeasure_stderr": 0.0013237761734567783, "rougeLsum_precision": 0.08310697182038314, "rougeLsum_precision_stderr": 0.0011057328214282471, "rougeLsum_recall": 0.17275727213813283, "rougeLsum_recall_stderr": 0.002060390288561184}}, "1": {"generate_text_restaurant": {"bleu": 11.564210778804036, "bleu_stderr": 0.1198765256137471, "rouge1_fmeasure": 0.4605347726818318, "rouge1_fmeasure_stderr": 0.0023127571845312834, "rouge1_precision": 0.5637173176604086, "rouge1_precision_stderr": 0.003321797810373166, "rouge1_recall": 0.42980056601194444, "rouge1_recall_stderr": 0.002973789269502846, "rouge2_fmeasure": 0.21501029566008578, "rouge2_fmeasure_stderr": 0.002028484285834072, "rouge2_precision": 0.26763533541615536, "rouge2_precision_stderr": 0.0027609708835756447, "rouge2_recall": 0.20040046914166157, "rouge2_recall_stderr": 0.0021517714918030005, "rougeL_fmeasure": 0.3329465663050009, "rougeL_fmeasure_stderr": 0.002054461632613191, "rougeL_precision": 0.4107410670750688, "rougeL_precision_stderr": 0.003020208536626803, "rougeL_recall": 0.30963490201281724, "rougeL_recall_stderr": 0.002399599836596746, "rougeLsum_fmeasure": 0.3743551835626005, "rougeLsum_fmeasure_stderr": 0.0022879473483603647, "rougeLsum_precision": 0.45928342602402417, "rougeLsum_precision_stderr": 0.0032079639333423174, "rougeLsum_recall": 0.3490148298085739, "rougeLsum_recall_stderr": 0.002713242494960982}}, "2": {"generate_text_restaurant": {"bleu": 13.400542456523908, "bleu_stderr": 0.1609394772064794, "rouge1_fmeasure": 0.4798036150204005, "rouge1_fmeasure_stderr": 0.002282274159276462, "rouge1_precision": 0.5764234982441327, "rouge1_precision_stderr": 0.0033021249054890935, "rouge1_recall": 0.4503974768175401, "rouge1_recall_stderr": 0.002919712590777667, "rouge2_fmeasure": 0.2351759267020243, "rouge2_fmeasure_stderr": 0.002088790334860442, "rouge2_precision": 0.28675558273290647, "rouge2_precision_stderr": 0.002781811689703388, "rouge2_recall": 0.22076517179049052, "rouge2_recall_stderr": 0.0022524340062285335, "rougeL_fmeasure": 0.3549448525372181, "rougeL_fmeasure_stderr": 0.0021193431273246735, "rougeL_precision": 0.4286366628943883, "rougeL_precision_stderr": 0.0030557015039043693, "rougeL_recall": 0.3325081715277142, "rougeL_recall_stderr": 0.0024750818471041087, "rougeLsum_fmeasure": 0.39953599561234265, "rougeLsum_fmeasure_stderr": 0.0023322731932715716, "rougeLsum_precision": 0.48074271393713025, "rougeLsum_precision_stderr": 0.0032624614219069863, "rougeLsum_recall": 0.37474313833603423, "rougeLsum_recall_stderr": 0.0027481389029155967}}, "3": {"generate_text_restaurant": {"bleu": 14.083744966873544, "bleu_stderr": 0.13510527315117016, "rouge1_fmeasure": 0.4820649513030502, "rouge1_fmeasure_stderr": 0.002281818602228921, "rouge1_precision": 0.5736572827150358, "rouge1_precision_stderr": 0.0032384910127830125, "rouge1_recall": 0.45360607830143157, "rouge1_recall_stderr": 0.00292439830442952, "rouge2_fmeasure": 0.23993823867723907, "rouge2_fmeasure_stderr": 0.002119970027404284, "rouge2_precision": 0.2890815394423686, "rouge2_precision_stderr": 0.0027576555191893308, "rouge2_recall": 0.2258474956058803, "rouge2_recall_stderr": 0.0023037492151092706, "rougeL_fmeasure": 0.360441051331739, "rougeL_fmeasure_stderr": 0.0021288701320219035, "rougeL_precision": 0.4312357546460138, "rougeL_precision_stderr": 0.003024393522392191, "rougeL_recall": 0.3384918202097905, "rougeL_recall_stderr": 0.0025049483648365892, "rougeLsum_fmeasure": 0.4054184452840819, "rougeLsum_fmeasure_stderr": 0.0023556882595847915, "rougeLsum_precision": 0.4832111553759888, "rougeLsum_precision_stderr": 0.0032277890930449953, "rougeLsum_recall": 0.38118510800546923, "rougeLsum_recall_stderr": 0.0027860575186553676}}, "4": {"generate_text_restaurant": {"bleu": 14.517833527253206, "bleu_stderr": 0.15177793846842857, "rouge1_fmeasure": 0.48422210730223353, "rouge1_fmeasure_stderr": 0.002254899743809727, "rouge1_precision": 0.5749779290519293, "rouge1_precision_stderr": 0.0032341752537122363, "rouge1_recall": 0.45475966418004093, "rouge1_recall_stderr": 0.0028643752694128203, "rouge2_fmeasure": 0.24433665653182082, "rouge2_fmeasure_stderr": 0.0021460412936580016, "rouge2_precision": 0.2940709470644466, "rouge2_precision_stderr": 0.0028229255550865045, "rouge2_recall": 0.22940149257285175, "rouge2_recall_stderr": 0.002309314382978281, "rougeL_fmeasure": 0.3623534452047717, "rougeL_fmeasure_stderr": 0.0021481098655366693, "rougeL_precision": 0.4319655914057995, "rougeL_precision_stderr": 0.003014148626675866, "rougeL_recall": 0.33988537212359804, "rougeL_recall_stderr": 0.002502036523908583, "rougeLsum_fmeasure": 0.4082890917863408, "rougeLsum_fmeasure_stderr": 0.0023637430000640375, "rougeLsum_precision": 0.4850678394646162, "rougeLsum_precision_stderr": 0.003225531635325354, "rougeLsum_recall": 0.3834152683878887, "rougeLsum_recall_stderr": 0.002767686832931401}}, "5": {"generate_text_restaurant": {"bleu": 14.324593922782439, "bleu_stderr": 0.15241024294192065, "rouge1_fmeasure": 0.48388952068811447, "rouge1_fmeasure_stderr": 0.002269811342308489, "rouge1_precision": 0.5781515372518692, "rouge1_precision_stderr": 0.003222285163472006, "rouge1_recall": 0.4515334921668262, "rouge1_recall_stderr": 0.0028702658791373904, "rouge2_fmeasure": 0.2439106267338791, "rouge2_fmeasure_stderr": 0.0021758038765035117, "rouge2_precision": 0.2959581192468744, "rouge2_precision_stderr": 0.002871716283121458, "rouge2_recall": 0.22736711269872564, "rouge2_recall_stderr": 0.0023134890859256136, "rougeL_fmeasure": 0.36343854325074254, "rougeL_fmeasure_stderr": 0.002159899813151206, "rougeL_precision": 0.43626029829832247, "rougeL_precision_stderr": 0.00305806886721717, "rougeL_recall": 0.3384883136722747, "rougeL_recall_stderr": 0.0024808236614542274, "rougeLsum_fmeasure": 0.40862207341517703, "rougeLsum_fmeasure_stderr": 0.0023818673387890456, "rougeLsum_precision": 0.4884840799940051, "rougeLsum_precision_stderr": 0.003239765144780713, "rougeLsum_recall": 0.38127900697364553, "rougeLsum_recall_stderr": 0.0027775578392814186}}}, "gem_xsum": {"0": {"article_DOC_summary": {"bleu": 1.7044284159611478, "bleu_stderr": 0.10158710863644861, "rouge1_fmeasure": 0.1824311356993241, "rouge1_fmeasure_stderr": 0.0027742455429407478, "rouge1_precision": 0.13211376744686648, "rouge1_precision_stderr": 0.0021078227737627523, "rouge1_recall": 0.30958091840657137, "rouge1_recall_stderr": 0.004718256158826077, "rouge2_fmeasure": 0.03971064304403434, "rouge2_fmeasure_stderr": 0.0014694772612363063, "rouge2_precision": 0.028436679980413212, "rouge2_precision_stderr": 0.0010660997414346854, "rouge2_recall": 0.06982063723069834, "rouge2_recall_stderr": 0.0026535400196015854, "rougeL_fmeasure": 0.14006214823111385, "rougeL_fmeasure_stderr": 0.0021160537297509855, "rougeL_precision": 0.10122789771540168, "rougeL_precision_stderr": 0.0015927078993417136, "rougeL_recall": 0.23911281116753666, "rougeL_recall_stderr": 0.0037320083765561033, "rougeLsum_fmeasure": 0.14375817680635894, "rougeLsum_fmeasure_stderr": 0.002322989973976244, "rougeLsum_precision": 0.10386228512699824, "rougeLsum_precision_stderr": 0.0017447889334045853, "rougeLsum_recall": 0.2456731268051541, "rougeLsum_recall_stderr": 0.004084619780933895}}, "1": {"article_DOC_summary": {"bleu": 1.414251403634457, "bleu_stderr": 0.09836035081115309, "rouge1_fmeasure": 0.19239078349475108, "rouge1_fmeasure_stderr": 0.002465960954886116, "rouge1_precision": 0.1405432198297611, "rouge1_precision_stderr": 0.001878679789941618, "rouge1_recall": 0.3276606298436086, "rouge1_recall_stderr": 0.004376293585107094, "rouge2_fmeasure": 0.03654673156140313, "rouge2_fmeasure_stderr": 0.0013456659236836456, "rouge2_precision": 0.026022309438382178, "rouge2_precision_stderr": 0.0009602804697530564, "rouge2_recall": 0.06491047997705596, "rouge2_recall_stderr": 0.002480531229175089, "rougeL_fmeasure": 0.14178613311816618, "rougeL_fmeasure_stderr": 0.0017458776019216176, "rougeL_precision": 0.10365291271701285, "rougeL_precision_stderr": 0.001348668367178514, "rougeL_recall": 0.24268394669042775, "rougeL_recall_stderr": 0.0032299728759827427, "rougeLsum_fmeasure": 0.15166775898349918, "rougeLsum_fmeasure_stderr": 0.001985950973310373, "rougeLsum_precision": 0.11061497335307985, "rougeLsum_precision_stderr": 0.001495607110282887, "rougeLsum_recall": 0.2601523504391386, "rougeLsum_recall_stderr": 0.003678694746255839}}, "2": {"article_DOC_summary": {"bleu": 1.4466481969495988, "bleu_stderr": 0.060446243451377536, "rouge1_fmeasure": 0.1926288064781011, "rouge1_fmeasure_stderr": 0.0024488782590945946, "rouge1_precision": 0.14212593864186113, "rouge1_precision_stderr": 0.0020278826787379143, "rouge1_recall": 0.3256945308193834, "rouge1_recall_stderr": 0.004183499164539368, "rouge2_fmeasure": 0.03789731741399596, "rouge2_fmeasure_stderr": 0.0013200459196047046, "rouge2_precision": 0.027670527801186028, "rouge2_precision_stderr": 0.0010263896507194106, "rouge2_recall": 0.06591916139804017, "rouge2_recall_stderr": 0.0023227491521481056, "rougeL_fmeasure": 0.14362364844292436, "rougeL_fmeasure_stderr": 0.0017716917176277007, "rougeL_precision": 0.10597609939483277, "rougeL_precision_stderr": 0.0014946429223521406, "rougeL_recall": 0.244127383947085, "rougeL_recall_stderr": 0.003151449183932117, "rougeLsum_fmeasure": 0.15326170376419787, "rougeLsum_fmeasure_stderr": 0.002014338226707074, "rougeLsum_precision": 0.11288346015402125, "rougeLsum_precision_stderr": 0.0016505088448165134, "rougeLsum_recall": 0.2608104370267327, "rougeLsum_recall_stderr": 0.003565773707068147}}, "3": {"article_DOC_summary": {"bleu": 1.392324866942817, "bleu_stderr": 0.10333126789792332, "rouge1_fmeasure": 0.18666478200505243, "rouge1_fmeasure_stderr": 0.002739760762211627, "rouge1_precision": 0.1422502333933988, "rouge1_precision_stderr": 0.0024472995402754156, "rouge1_recall": 0.30983358436212066, "rouge1_recall_stderr": 0.0046212886851808195, "rouge2_fmeasure": 0.03663776309033808, "rouge2_fmeasure_stderr": 0.0013441419160810165, "rouge2_precision": 0.027222625611619575, "rouge2_precision_stderr": 0.0010616231746636696, "rouge2_recall": 0.06332962384389315, "rouge2_recall_stderr": 0.002357369300824623, "rougeL_fmeasure": 0.1393564669391015, "rougeL_fmeasure_stderr": 0.0020056393472558214, "rougeL_precision": 0.10615716840760141, "rougeL_precision_stderr": 0.0018146571946823608, "rougeL_recall": 0.23288499948545, "rougeL_recall_stderr": 0.003534724466115114, "rougeLsum_fmeasure": 0.14799512098760817, "rougeLsum_fmeasure_stderr": 0.0022200856382967975, "rougeLsum_precision": 0.11259274644929758, "rougeLsum_precision_stderr": 0.0019647845845397575, "rougeLsum_recall": 0.24767381751028186, "rougeLsum_recall_stderr": 0.003919141916162093}}, "4": {"article_DOC_summary": {"bleu": 0.7258559232581546, "bleu_stderr": 0.10507057432049555, "rouge1_fmeasure": 0.052736675331581245, "rouge1_fmeasure_stderr": 0.0028824641785999464, "rouge1_precision": 0.04678909556843838, "rouge1_precision_stderr": 0.0029899451023121886, "rouge1_recall": 0.08036818701688478, "rouge1_recall_stderr": 0.004523562391482061, "rouge2_fmeasure": 0.010697176811020805, "rouge2_fmeasure_stderr": 0.0009584744397015601, "rouge2_precision": 0.009587305236726377, "rouge2_precision_stderr": 0.001262427047456841, "rouge2_recall": 0.01705350789198973, "rouge2_recall_stderr": 0.0015073512113313507, "rougeL_fmeasure": 0.039672294104886395, "rougeL_fmeasure_stderr": 0.002180086549370051, "rougeL_precision": 0.03558582877518959, "rougeL_precision_stderr": 0.0024020712090389224, "rougeL_recall": 0.06057475710697018, "rougeL_recall_stderr": 0.0034243280850370624, "rougeLsum_fmeasure": 0.0423806306620413, "rougeLsum_fmeasure_stderr": 0.0023629304701070007, "rougeLsum_precision": 0.03769649897971859, "rougeLsum_precision_stderr": 0.0025149825952445427, "rougeLsum_recall": 0.06477261558492757, "rougeLsum_recall_stderr": 0.0037095733376953295}}, "5": {"article_DOC_summary": {"bleu": 1.1269861498697588e-39, "bleu_stderr": 5.474659303973499e-34, "rouge1_fmeasure": 0.0024263300495769666, "rouge1_fmeasure_stderr": 0.0007044351988650494, "rouge1_precision": 0.00279871811503332, "rouge1_precision_stderr": 0.0008181720087639885, "rouge1_recall": 0.002210852921667167, "rouge1_recall_stderr": 0.0006418342106197504, "rouge2_fmeasure": 0.00021830317883798518, "rouge2_fmeasure_stderr": 9.811329959110373e-05, "rouge2_precision": 0.00025897151313355527, "rouge2_precision_stderr": 0.00011577315282337257, "rouge2_recall": 0.00019125158969776062, "rouge2_recall_stderr": 8.696066491461003e-05, "rougeL_fmeasure": 0.001790621018457292, "rougeL_fmeasure_stderr": 0.0004902962943570475, "rougeL_precision": 0.0020472883264226207, "rougeL_precision_stderr": 0.0005641122660611984, "rougeL_recall": 0.0016493867049964376, "rougeL_recall_stderr": 0.00045298383898349467, "rougeLsum_fmeasure": 0.0019503315839039083, "rougeLsum_fmeasure_stderr": 0.0005574612507559669, "rougeLsum_precision": 0.0022311468898594308, "rougeLsum_precision_stderr": 0.0006382468727242592, "rougeLsum_recall": 0.00179354722651545, "rougeLsum_recall_stderr": 0.0005153565164091152}}}}
|
2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_0.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/web_nlg_en",
|
5 |
+
"prompt_name": "PALM_prompt",
|
6 |
+
"bleu": 0.3801106976509281,
|
7 |
+
"dataset_path": "GEM/web_nlg",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"bleu_stderr": 0.030814977906570784
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/web_nlg_en",
|
14 |
+
"prompt_name": "PALM_prompt",
|
15 |
+
"rouge1_precision": 0.09254608062110982,
|
16 |
+
"dataset_path": "GEM/web_nlg",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_precision_stderr": 0.003969016029393206
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/web_nlg_en",
|
23 |
+
"prompt_name": "PALM_prompt",
|
24 |
+
"rouge1_recall": 0.2514658272132534,
|
25 |
+
"dataset_path": "GEM/web_nlg",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_recall_stderr": 0.005470193417734038
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/web_nlg_en",
|
32 |
+
"prompt_name": "PALM_prompt",
|
33 |
+
"rouge1_fmeasure": 0.09839951058099769,
|
34 |
+
"dataset_path": "GEM/web_nlg",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge1_fmeasure_stderr": 0.0024506647455068047
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/web_nlg_en",
|
41 |
+
"prompt_name": "PALM_prompt",
|
42 |
+
"rouge2_precision": 0.04251830138142587,
|
43 |
+
"dataset_path": "GEM/web_nlg",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_precision_stderr": 0.0025980589101773847
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/web_nlg_en",
|
50 |
+
"prompt_name": "PALM_prompt",
|
51 |
+
"rouge2_recall": 0.11876788025878558,
|
52 |
+
"dataset_path": "GEM/web_nlg",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_recall_stderr": 0.0033890059840335
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/web_nlg_en",
|
59 |
+
"prompt_name": "PALM_prompt",
|
60 |
+
"rouge2_fmeasure": 0.04579430308932667,
|
61 |
+
"dataset_path": "GEM/web_nlg",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rouge2_fmeasure_stderr": 0.0014988641201888384
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/web_nlg_en",
|
68 |
+
"prompt_name": "PALM_prompt",
|
69 |
+
"rougeL_precision": 0.08735615223930215,
|
70 |
+
"dataset_path": "GEM/web_nlg",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_precision_stderr": 0.0037276787142384374
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/web_nlg_en",
|
77 |
+
"prompt_name": "PALM_prompt",
|
78 |
+
"rougeL_recall": 0.24225619592357273,
|
79 |
+
"dataset_path": "GEM/web_nlg",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_recall_stderr": 0.005268443646117898
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/web_nlg_en",
|
86 |
+
"prompt_name": "PALM_prompt",
|
87 |
+
"rougeL_fmeasure": 0.09334972395562345,
|
88 |
+
"dataset_path": "GEM/web_nlg",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeL_fmeasure_stderr": 0.0022379508481899127
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/web_nlg_en",
|
95 |
+
"prompt_name": "PALM_prompt",
|
96 |
+
"rougeLsum_precision": 0.08738746346078487,
|
97 |
+
"dataset_path": "GEM/web_nlg",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_precision_stderr": 0.003771697087126988
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/web_nlg_en",
|
104 |
+
"prompt_name": "PALM_prompt",
|
105 |
+
"rougeLsum_recall": 0.23626985324693767,
|
106 |
+
"dataset_path": "GEM/web_nlg",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_recall_stderr": 0.005054786193539789
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/web_nlg_en",
|
113 |
+
"prompt_name": "PALM_prompt",
|
114 |
+
"rougeLsum_fmeasure": 0.09257280431668473,
|
115 |
+
"dataset_path": "GEM/web_nlg",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"rougeLsum_fmeasure_stderr": 0.0022654509452080854
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 0,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_1.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/web_nlg_en",
|
5 |
+
"prompt_name": "PALM_prompt",
|
6 |
+
"bleu": 0.5239721644834189,
|
7 |
+
"dataset_path": "GEM/web_nlg",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"bleu_stderr": 0.03609798297687262
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/web_nlg_en",
|
14 |
+
"prompt_name": "PALM_prompt",
|
15 |
+
"rouge1_precision": 0.13082237107727746,
|
16 |
+
"dataset_path": "GEM/web_nlg",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_precision_stderr": 0.0041887696056868914
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/web_nlg_en",
|
23 |
+
"prompt_name": "PALM_prompt",
|
24 |
+
"rouge1_recall": 0.29894851591422195,
|
25 |
+
"dataset_path": "GEM/web_nlg",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_recall_stderr": 0.0051748543299946
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/web_nlg_en",
|
32 |
+
"prompt_name": "PALM_prompt",
|
33 |
+
"rouge1_fmeasure": 0.14826158596606656,
|
34 |
+
"dataset_path": "GEM/web_nlg",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge1_fmeasure_stderr": 0.0034048406078764424
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/web_nlg_en",
|
41 |
+
"prompt_name": "PALM_prompt",
|
42 |
+
"rouge2_precision": 0.06041678931722799,
|
43 |
+
"dataset_path": "GEM/web_nlg",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_precision_stderr": 0.0025853119808927536
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/web_nlg_en",
|
50 |
+
"prompt_name": "PALM_prompt",
|
51 |
+
"rouge2_recall": 0.1438349096175946,
|
52 |
+
"dataset_path": "GEM/web_nlg",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_recall_stderr": 0.0034922545459490128
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/web_nlg_en",
|
59 |
+
"prompt_name": "PALM_prompt",
|
60 |
+
"rouge2_fmeasure": 0.06901771335292758,
|
61 |
+
"dataset_path": "GEM/web_nlg",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rouge2_fmeasure_stderr": 0.0020860447213003814
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/web_nlg_en",
|
68 |
+
"prompt_name": "PALM_prompt",
|
69 |
+
"rougeL_precision": 0.11585043732455273,
|
70 |
+
"dataset_path": "GEM/web_nlg",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_precision_stderr": 0.003667689089676262
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/web_nlg_en",
|
77 |
+
"prompt_name": "PALM_prompt",
|
78 |
+
"rougeL_recall": 0.27586821042029314,
|
79 |
+
"dataset_path": "GEM/web_nlg",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_recall_stderr": 0.004795036002022945
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/web_nlg_en",
|
86 |
+
"prompt_name": "PALM_prompt",
|
87 |
+
"rougeL_fmeasure": 0.13229541662919794,
|
88 |
+
"dataset_path": "GEM/web_nlg",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeL_fmeasure_stderr": 0.002864700499056504
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/web_nlg_en",
|
95 |
+
"prompt_name": "PALM_prompt",
|
96 |
+
"rougeLsum_precision": 0.11910114676043697,
|
97 |
+
"dataset_path": "GEM/web_nlg",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_precision_stderr": 0.0037751935052806325
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/web_nlg_en",
|
104 |
+
"prompt_name": "PALM_prompt",
|
105 |
+
"rougeLsum_recall": 0.27899773980715475,
|
106 |
+
"dataset_path": "GEM/web_nlg",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_recall_stderr": 0.004795091819308235
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/web_nlg_en",
|
113 |
+
"prompt_name": "PALM_prompt",
|
114 |
+
"rougeLsum_fmeasure": 0.13555256797626594,
|
115 |
+
"dataset_path": "GEM/web_nlg",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"rougeLsum_fmeasure_stderr": 0.002979591459721934
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 1,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_2.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/web_nlg_en",
|
5 |
+
"prompt_name": "PALM_prompt",
|
6 |
+
"bleu": 0.6155617098906994,
|
7 |
+
"dataset_path": "GEM/web_nlg",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"bleu_stderr": 0.04320711987914936
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/web_nlg_en",
|
14 |
+
"prompt_name": "PALM_prompt",
|
15 |
+
"rouge1_precision": 0.15764962909670655,
|
16 |
+
"dataset_path": "GEM/web_nlg",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_precision_stderr": 0.004811814678098913
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/web_nlg_en",
|
23 |
+
"prompt_name": "PALM_prompt",
|
24 |
+
"rouge1_recall": 0.3156619362796641,
|
25 |
+
"dataset_path": "GEM/web_nlg",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_recall_stderr": 0.005146634895664645
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/web_nlg_en",
|
32 |
+
"prompt_name": "PALM_prompt",
|
33 |
+
"rouge1_fmeasure": 0.16893359766104024,
|
34 |
+
"dataset_path": "GEM/web_nlg",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge1_fmeasure_stderr": 0.0038194664000741757
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/web_nlg_en",
|
41 |
+
"prompt_name": "PALM_prompt",
|
42 |
+
"rouge2_precision": 0.07547310729323965,
|
43 |
+
"dataset_path": "GEM/web_nlg",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_precision_stderr": 0.002923357742977468
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/web_nlg_en",
|
50 |
+
"prompt_name": "PALM_prompt",
|
51 |
+
"rouge2_recall": 0.16143870017973952,
|
52 |
+
"dataset_path": "GEM/web_nlg",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_recall_stderr": 0.0037206349376128402
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/web_nlg_en",
|
59 |
+
"prompt_name": "PALM_prompt",
|
60 |
+
"rouge2_fmeasure": 0.08285140079808971,
|
61 |
+
"dataset_path": "GEM/web_nlg",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rouge2_fmeasure_stderr": 0.0024706623511134077
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/web_nlg_en",
|
68 |
+
"prompt_name": "PALM_prompt",
|
69 |
+
"rougeL_precision": 0.1391887515979127,
|
70 |
+
"dataset_path": "GEM/web_nlg",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_precision_stderr": 0.004157638480121235
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/web_nlg_en",
|
77 |
+
"prompt_name": "PALM_prompt",
|
78 |
+
"rougeL_recall": 0.292888997189622,
|
79 |
+
"dataset_path": "GEM/web_nlg",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_recall_stderr": 0.004754219104081643
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/web_nlg_en",
|
86 |
+
"prompt_name": "PALM_prompt",
|
87 |
+
"rougeL_fmeasure": 0.15099445149853782,
|
88 |
+
"dataset_path": "GEM/web_nlg",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeL_fmeasure_stderr": 0.0032341025124660036
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/web_nlg_en",
|
95 |
+
"prompt_name": "PALM_prompt",
|
96 |
+
"rougeLsum_precision": 0.14269660006491244,
|
97 |
+
"dataset_path": "GEM/web_nlg",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_precision_stderr": 0.004284849569606708
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/web_nlg_en",
|
104 |
+
"prompt_name": "PALM_prompt",
|
105 |
+
"rougeLsum_recall": 0.29574913885784265,
|
106 |
+
"dataset_path": "GEM/web_nlg",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_recall_stderr": 0.004809657463857144
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/web_nlg_en",
|
113 |
+
"prompt_name": "PALM_prompt",
|
114 |
+
"rougeLsum_fmeasure": 0.1540461900906591,
|
115 |
+
"dataset_path": "GEM/web_nlg",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"rougeLsum_fmeasure_stderr": 0.003343046732804547
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 2,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_3.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/web_nlg_en",
|
5 |
+
"prompt_name": "PALM_prompt",
|
6 |
+
"bleu": 0.7638442842916376,
|
7 |
+
"dataset_path": "GEM/web_nlg",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"bleu_stderr": 0.043708542511138676
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/web_nlg_en",
|
14 |
+
"prompt_name": "PALM_prompt",
|
15 |
+
"rouge1_precision": 0.17306675943358993,
|
16 |
+
"dataset_path": "GEM/web_nlg",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_precision_stderr": 0.005175271126864724
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/web_nlg_en",
|
23 |
+
"prompt_name": "PALM_prompt",
|
24 |
+
"rouge1_recall": 0.3366968869783645,
|
25 |
+
"dataset_path": "GEM/web_nlg",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_recall_stderr": 0.0052711987086574656
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/web_nlg_en",
|
32 |
+
"prompt_name": "PALM_prompt",
|
33 |
+
"rouge1_fmeasure": 0.18548340026259344,
|
34 |
+
"dataset_path": "GEM/web_nlg",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge1_fmeasure_stderr": 0.004223744602181456
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/web_nlg_en",
|
41 |
+
"prompt_name": "PALM_prompt",
|
42 |
+
"rouge2_precision": 0.08724475546238586,
|
43 |
+
"dataset_path": "GEM/web_nlg",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_precision_stderr": 0.003316626156011152
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/web_nlg_en",
|
50 |
+
"prompt_name": "PALM_prompt",
|
51 |
+
"rouge2_recall": 0.17335505514005853,
|
52 |
+
"dataset_path": "GEM/web_nlg",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_recall_stderr": 0.003911222192069402
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/web_nlg_en",
|
59 |
+
"prompt_name": "PALM_prompt",
|
60 |
+
"rouge2_fmeasure": 0.09385858633035163,
|
61 |
+
"dataset_path": "GEM/web_nlg",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rouge2_fmeasure_stderr": 0.0028582868714185404
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/web_nlg_en",
|
68 |
+
"prompt_name": "PALM_prompt",
|
69 |
+
"rougeL_precision": 0.15183832422624116,
|
70 |
+
"dataset_path": "GEM/web_nlg",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_precision_stderr": 0.004487449653529665
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/web_nlg_en",
|
77 |
+
"prompt_name": "PALM_prompt",
|
78 |
+
"rougeL_recall": 0.30970296664886204,
|
79 |
+
"dataset_path": "GEM/web_nlg",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_recall_stderr": 0.0048061266737124434
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/web_nlg_en",
|
86 |
+
"prompt_name": "PALM_prompt",
|
87 |
+
"rougeL_fmeasure": 0.16463283674108964,
|
88 |
+
"dataset_path": "GEM/web_nlg",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeL_fmeasure_stderr": 0.0035889811954755296
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/web_nlg_en",
|
95 |
+
"prompt_name": "PALM_prompt",
|
96 |
+
"rougeLsum_precision": 0.15662013259115215,
|
97 |
+
"dataset_path": "GEM/web_nlg",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_precision_stderr": 0.004651735217163702
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/web_nlg_en",
|
104 |
+
"prompt_name": "PALM_prompt",
|
105 |
+
"rougeLsum_recall": 0.3138687175842859,
|
106 |
+
"dataset_path": "GEM/web_nlg",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_recall_stderr": 0.004862972063876745
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/web_nlg_en",
|
113 |
+
"prompt_name": "PALM_prompt",
|
114 |
+
"rougeLsum_fmeasure": 0.16864351715068068,
|
115 |
+
"dataset_path": "GEM/web_nlg",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"rougeLsum_fmeasure_stderr": 0.0037107680564301263
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 3,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_4.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/web_nlg_en",
|
5 |
+
"prompt_name": "PALM_prompt",
|
6 |
+
"bleu": 0.8719331256074123,
|
7 |
+
"dataset_path": "GEM/web_nlg",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"bleu_stderr": 0.057879221179053614
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/web_nlg_en",
|
14 |
+
"prompt_name": "PALM_prompt",
|
15 |
+
"rouge1_precision": 0.1851425968057546,
|
16 |
+
"dataset_path": "GEM/web_nlg",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_precision_stderr": 0.005242772266486215
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/web_nlg_en",
|
23 |
+
"prompt_name": "PALM_prompt",
|
24 |
+
"rouge1_recall": 0.3521224313867968,
|
25 |
+
"dataset_path": "GEM/web_nlg",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_recall_stderr": 0.005038061225917597
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/web_nlg_en",
|
32 |
+
"prompt_name": "PALM_prompt",
|
33 |
+
"rouge1_fmeasure": 0.19456487112141713,
|
34 |
+
"dataset_path": "GEM/web_nlg",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge1_fmeasure_stderr": 0.004109079813244224
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/web_nlg_en",
|
41 |
+
"prompt_name": "PALM_prompt",
|
42 |
+
"rouge2_precision": 0.09493067862691192,
|
43 |
+
"dataset_path": "GEM/web_nlg",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_precision_stderr": 0.0033973584763323903
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/web_nlg_en",
|
50 |
+
"prompt_name": "PALM_prompt",
|
51 |
+
"rouge2_recall": 0.18378368228130243,
|
52 |
+
"dataset_path": "GEM/web_nlg",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_recall_stderr": 0.003782300251501581
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/web_nlg_en",
|
59 |
+
"prompt_name": "PALM_prompt",
|
60 |
+
"rouge2_fmeasure": 0.10023987500778037,
|
61 |
+
"dataset_path": "GEM/web_nlg",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rouge2_fmeasure_stderr": 0.00282151252673031
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/web_nlg_en",
|
68 |
+
"prompt_name": "PALM_prompt",
|
69 |
+
"rougeL_precision": 0.163782526391218,
|
70 |
+
"dataset_path": "GEM/web_nlg",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_precision_stderr": 0.004592058011452551
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/web_nlg_en",
|
77 |
+
"prompt_name": "PALM_prompt",
|
78 |
+
"rougeL_recall": 0.3247189913662455,
|
79 |
+
"dataset_path": "GEM/web_nlg",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_recall_stderr": 0.004610930009186234
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/web_nlg_en",
|
86 |
+
"prompt_name": "PALM_prompt",
|
87 |
+
"rougeL_fmeasure": 0.17333927997130413,
|
88 |
+
"dataset_path": "GEM/web_nlg",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeL_fmeasure_stderr": 0.003493985482674142
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/web_nlg_en",
|
95 |
+
"prompt_name": "PALM_prompt",
|
96 |
+
"rougeLsum_precision": 0.16903111233990037,
|
97 |
+
"dataset_path": "GEM/web_nlg",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_precision_stderr": 0.004770080380107174
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/web_nlg_en",
|
104 |
+
"prompt_name": "PALM_prompt",
|
105 |
+
"rougeLsum_recall": 0.32924041133628895,
|
106 |
+
"dataset_path": "GEM/web_nlg",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_recall_stderr": 0.004661708271273997
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/web_nlg_en",
|
113 |
+
"prompt_name": "PALM_prompt",
|
114 |
+
"rougeLsum_fmeasure": 0.17795216339276917,
|
115 |
+
"dataset_path": "GEM/web_nlg",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"rougeLsum_fmeasure_stderr": 0.0036338202974834652
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 4,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-web_nlg_en_PALM_prompt_5.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/web_nlg_en",
|
5 |
+
"prompt_name": "PALM_prompt",
|
6 |
+
"bleu": 0.9071599045860481,
|
7 |
+
"dataset_path": "GEM/web_nlg",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"bleu_stderr": 0.04132800578881959
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/web_nlg_en",
|
14 |
+
"prompt_name": "PALM_prompt",
|
15 |
+
"rouge1_precision": 0.19439128617510687,
|
16 |
+
"dataset_path": "GEM/web_nlg",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_precision_stderr": 0.005421482683712362
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/web_nlg_en",
|
23 |
+
"prompt_name": "PALM_prompt",
|
24 |
+
"rouge1_recall": 0.360346127905188,
|
25 |
+
"dataset_path": "GEM/web_nlg",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_recall_stderr": 0.005185217658659474
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/web_nlg_en",
|
32 |
+
"prompt_name": "PALM_prompt",
|
33 |
+
"rouge1_fmeasure": 0.20471471819889542,
|
34 |
+
"dataset_path": "GEM/web_nlg",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge1_fmeasure_stderr": 0.004366365024283305
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/web_nlg_en",
|
41 |
+
"prompt_name": "PALM_prompt",
|
42 |
+
"rouge2_precision": 0.10302514882290766,
|
43 |
+
"dataset_path": "GEM/web_nlg",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_precision_stderr": 0.0036082227594639027
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/web_nlg_en",
|
50 |
+
"prompt_name": "PALM_prompt",
|
51 |
+
"rouge2_recall": 0.19046117331587684,
|
52 |
+
"dataset_path": "GEM/web_nlg",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_recall_stderr": 0.0038953705603308754
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/web_nlg_en",
|
59 |
+
"prompt_name": "PALM_prompt",
|
60 |
+
"rouge2_fmeasure": 0.10719970952871183,
|
61 |
+
"dataset_path": "GEM/web_nlg",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rouge2_fmeasure_stderr": 0.00298368361864098
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/web_nlg_en",
|
68 |
+
"prompt_name": "PALM_prompt",
|
69 |
+
"rougeL_precision": 0.1710926703224802,
|
70 |
+
"dataset_path": "GEM/web_nlg",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_precision_stderr": 0.004698450665775182
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/web_nlg_en",
|
77 |
+
"prompt_name": "PALM_prompt",
|
78 |
+
"rougeL_recall": 0.33314261355257685,
|
79 |
+
"dataset_path": "GEM/web_nlg",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_recall_stderr": 0.004754137888970346
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/web_nlg_en",
|
86 |
+
"prompt_name": "PALM_prompt",
|
87 |
+
"rougeL_fmeasure": 0.18231989678501748,
|
88 |
+
"dataset_path": "GEM/web_nlg",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeL_fmeasure_stderr": 0.00369484632221736
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/web_nlg_en",
|
95 |
+
"prompt_name": "PALM_prompt",
|
96 |
+
"rougeLsum_precision": 0.17698148297717947,
|
97 |
+
"dataset_path": "GEM/web_nlg",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_precision_stderr": 0.004898716157488609
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/web_nlg_en",
|
104 |
+
"prompt_name": "PALM_prompt",
|
105 |
+
"rougeLsum_recall": 0.3375011370591266,
|
106 |
+
"dataset_path": "GEM/web_nlg",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_recall_stderr": 0.0047979049681730265
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/web_nlg_en",
|
113 |
+
"prompt_name": "PALM_prompt",
|
114 |
+
"rougeLsum_fmeasure": 0.18722830056278536,
|
115 |
+
"dataset_path": "GEM/web_nlg",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"rougeLsum_fmeasure_stderr": 0.0038488776142113694
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 5,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_0.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/wiki_lingua_en",
|
5 |
+
"prompt_name": "tldr_en",
|
6 |
+
"rouge1_precision": 0.1476470442480304,
|
7 |
+
"dataset_path": "GEM/wiki_lingua",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"rouge1_precision_stderr": 0.0027640730190324147
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/wiki_lingua_en",
|
14 |
+
"prompt_name": "tldr_en",
|
15 |
+
"rouge1_recall": 0.1987093879718628,
|
16 |
+
"dataset_path": "GEM/wiki_lingua",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_recall_stderr": 0.0029637609362227523
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/wiki_lingua_en",
|
23 |
+
"prompt_name": "tldr_en",
|
24 |
+
"rouge1_fmeasure": 0.14881520480086885,
|
25 |
+
"dataset_path": "GEM/wiki_lingua",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_fmeasure_stderr": 0.0021587431282467824
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/wiki_lingua_en",
|
32 |
+
"prompt_name": "tldr_en",
|
33 |
+
"rouge2_precision": 0.0287482827215408,
|
34 |
+
"dataset_path": "GEM/wiki_lingua",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge2_precision_stderr": 0.0009244009986468182
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/wiki_lingua_en",
|
41 |
+
"prompt_name": "tldr_en",
|
42 |
+
"rouge2_recall": 0.04207438489658476,
|
43 |
+
"dataset_path": "GEM/wiki_lingua",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_recall_stderr": 0.0013026460720942934
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/wiki_lingua_en",
|
50 |
+
"prompt_name": "tldr_en",
|
51 |
+
"rouge2_fmeasure": 0.030793880936937773,
|
52 |
+
"dataset_path": "GEM/wiki_lingua",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_fmeasure_stderr": 0.0008589680063776107
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/wiki_lingua_en",
|
59 |
+
"prompt_name": "tldr_en",
|
60 |
+
"rougeL_precision": 0.11919226323771576,
|
61 |
+
"dataset_path": "GEM/wiki_lingua",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rougeL_precision_stderr": 0.0023730668962417723
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/wiki_lingua_en",
|
68 |
+
"prompt_name": "tldr_en",
|
69 |
+
"rougeL_recall": 0.1605716758450966,
|
70 |
+
"dataset_path": "GEM/wiki_lingua",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_recall_stderr": 0.002356213726056233
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/wiki_lingua_en",
|
77 |
+
"prompt_name": "tldr_en",
|
78 |
+
"rougeL_fmeasure": 0.11844891250439296,
|
79 |
+
"dataset_path": "GEM/wiki_lingua",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_fmeasure_stderr": 0.0016052290609412721
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/wiki_lingua_en",
|
86 |
+
"prompt_name": "tldr_en",
|
87 |
+
"rougeLsum_precision": 0.13689809260299124,
|
88 |
+
"dataset_path": "GEM/wiki_lingua",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeLsum_precision_stderr": 0.002643420711314581
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/wiki_lingua_en",
|
95 |
+
"prompt_name": "tldr_en",
|
96 |
+
"rougeLsum_recall": 0.18318434106372952,
|
97 |
+
"dataset_path": "GEM/wiki_lingua",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_recall_stderr": 0.002750769375883103
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/wiki_lingua_en",
|
104 |
+
"prompt_name": "tldr_en",
|
105 |
+
"rougeLsum_fmeasure": 0.1370648388242123,
|
106 |
+
"dataset_path": "GEM/wiki_lingua",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_fmeasure_stderr": 0.0019890326379503644
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/wiki_lingua_en",
|
113 |
+
"prompt_name": "tldr_en",
|
114 |
+
"bleu": 1.7689251791579401,
|
115 |
+
"dataset_path": "GEM/wiki_lingua",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"bleu_stderr": 0.08056354651616494
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 0,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_1.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/wiki_lingua_en",
|
5 |
+
"prompt_name": "tldr_en",
|
6 |
+
"rouge1_precision": 0.22376650173536908,
|
7 |
+
"dataset_path": "GEM/wiki_lingua",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"rouge1_precision_stderr": 0.0033099883221279717
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/wiki_lingua_en",
|
14 |
+
"prompt_name": "tldr_en",
|
15 |
+
"rouge1_recall": 0.233044248806137,
|
16 |
+
"dataset_path": "GEM/wiki_lingua",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_recall_stderr": 0.002952305065889663
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/wiki_lingua_en",
|
23 |
+
"prompt_name": "tldr_en",
|
24 |
+
"rouge1_fmeasure": 0.19163795576049275,
|
25 |
+
"dataset_path": "GEM/wiki_lingua",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_fmeasure_stderr": 0.002095941856037326
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/wiki_lingua_en",
|
32 |
+
"prompt_name": "tldr_en",
|
33 |
+
"rouge2_precision": 0.0566885511786849,
|
34 |
+
"dataset_path": "GEM/wiki_lingua",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge2_precision_stderr": 0.001922144512262528
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/wiki_lingua_en",
|
41 |
+
"prompt_name": "tldr_en",
|
42 |
+
"rouge2_recall": 0.054244362067460944,
|
43 |
+
"dataset_path": "GEM/wiki_lingua",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_recall_stderr": 0.0015009900283382725
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/wiki_lingua_en",
|
50 |
+
"prompt_name": "tldr_en",
|
51 |
+
"rouge2_fmeasure": 0.04458039177873789,
|
52 |
+
"dataset_path": "GEM/wiki_lingua",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_fmeasure_stderr": 0.001138994780925126
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/wiki_lingua_en",
|
59 |
+
"prompt_name": "tldr_en",
|
60 |
+
"rougeL_precision": 0.17115197989214356,
|
61 |
+
"dataset_path": "GEM/wiki_lingua",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rougeL_precision_stderr": 0.002730566360378985
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/wiki_lingua_en",
|
68 |
+
"prompt_name": "tldr_en",
|
69 |
+
"rougeL_recall": 0.1753866049193498,
|
70 |
+
"dataset_path": "GEM/wiki_lingua",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_recall_stderr": 0.0022597816497720055
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/wiki_lingua_en",
|
77 |
+
"prompt_name": "tldr_en",
|
78 |
+
"rougeL_fmeasure": 0.14368563344942828,
|
79 |
+
"dataset_path": "GEM/wiki_lingua",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_fmeasure_stderr": 0.0015580386025042744
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/wiki_lingua_en",
|
86 |
+
"prompt_name": "tldr_en",
|
87 |
+
"rougeLsum_precision": 0.20943173970561998,
|
88 |
+
"dataset_path": "GEM/wiki_lingua",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeLsum_precision_stderr": 0.003140748148754858
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/wiki_lingua_en",
|
95 |
+
"prompt_name": "tldr_en",
|
96 |
+
"rougeLsum_recall": 0.21808332623771157,
|
97 |
+
"dataset_path": "GEM/wiki_lingua",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_recall_stderr": 0.002768104713925207
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/wiki_lingua_en",
|
104 |
+
"prompt_name": "tldr_en",
|
105 |
+
"rougeLsum_fmeasure": 0.17897651689986416,
|
106 |
+
"dataset_path": "GEM/wiki_lingua",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_fmeasure_stderr": 0.0019496135779428591
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/wiki_lingua_en",
|
113 |
+
"prompt_name": "tldr_en",
|
114 |
+
"bleu": 2.690348502752936,
|
115 |
+
"dataset_path": "GEM/wiki_lingua",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"bleu_stderr": 0.04816844705930497
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 1,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_2.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/wiki_lingua_en",
|
5 |
+
"prompt_name": "tldr_en",
|
6 |
+
"rouge1_precision": 0.29318751960337,
|
7 |
+
"dataset_path": "GEM/wiki_lingua",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"rouge1_precision_stderr": 0.003906796008685731
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/wiki_lingua_en",
|
14 |
+
"prompt_name": "tldr_en",
|
15 |
+
"rouge1_recall": 0.2507046446902222,
|
16 |
+
"dataset_path": "GEM/wiki_lingua",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_recall_stderr": 0.0029715006311925958
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/wiki_lingua_en",
|
23 |
+
"prompt_name": "tldr_en",
|
24 |
+
"rouge1_fmeasure": 0.22003513847299133,
|
25 |
+
"dataset_path": "GEM/wiki_lingua",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_fmeasure_stderr": 0.0021076546284530782
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/wiki_lingua_en",
|
32 |
+
"prompt_name": "tldr_en",
|
33 |
+
"rouge2_precision": 0.08627504991656038,
|
34 |
+
"dataset_path": "GEM/wiki_lingua",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge2_precision_stderr": 0.002487880526812255
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/wiki_lingua_en",
|
41 |
+
"prompt_name": "tldr_en",
|
42 |
+
"rouge2_recall": 0.06676278485251605,
|
43 |
+
"dataset_path": "GEM/wiki_lingua",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_recall_stderr": 0.0016295916757062398
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/wiki_lingua_en",
|
50 |
+
"prompt_name": "tldr_en",
|
51 |
+
"rouge2_fmeasure": 0.058544685834468764,
|
52 |
+
"dataset_path": "GEM/wiki_lingua",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_fmeasure_stderr": 0.0012428137662634153
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/wiki_lingua_en",
|
59 |
+
"prompt_name": "tldr_en",
|
60 |
+
"rougeL_precision": 0.22625403204772632,
|
61 |
+
"dataset_path": "GEM/wiki_lingua",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rougeL_precision_stderr": 0.003335160888542255
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/wiki_lingua_en",
|
68 |
+
"prompt_name": "tldr_en",
|
69 |
+
"rougeL_recall": 0.187612565720097,
|
70 |
+
"dataset_path": "GEM/wiki_lingua",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_recall_stderr": 0.0022917340918039352
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/wiki_lingua_en",
|
77 |
+
"prompt_name": "tldr_en",
|
78 |
+
"rougeL_fmeasure": 0.1646868100175827,
|
79 |
+
"dataset_path": "GEM/wiki_lingua",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_fmeasure_stderr": 0.0015819419652510194
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/wiki_lingua_en",
|
86 |
+
"prompt_name": "tldr_en",
|
87 |
+
"rougeLsum_precision": 0.27427864211994335,
|
88 |
+
"dataset_path": "GEM/wiki_lingua",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeLsum_precision_stderr": 0.0037481794388014565
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/wiki_lingua_en",
|
95 |
+
"prompt_name": "tldr_en",
|
96 |
+
"rougeLsum_recall": 0.23337797750912687,
|
97 |
+
"dataset_path": "GEM/wiki_lingua",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_recall_stderr": 0.002789848859667374
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/wiki_lingua_en",
|
104 |
+
"prompt_name": "tldr_en",
|
105 |
+
"rougeLsum_fmeasure": 0.20484462592482003,
|
106 |
+
"dataset_path": "GEM/wiki_lingua",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_fmeasure_stderr": 0.0019789253350355387
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/wiki_lingua_en",
|
113 |
+
"prompt_name": "tldr_en",
|
114 |
+
"bleu": 3.431375370657664,
|
115 |
+
"dataset_path": "GEM/wiki_lingua",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"bleu_stderr": 0.04760571110301349
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 2,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_3.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/wiki_lingua_en",
|
5 |
+
"prompt_name": "tldr_en",
|
6 |
+
"rouge1_precision": 0.2680095180907205,
|
7 |
+
"dataset_path": "GEM/wiki_lingua",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"rouge1_precision_stderr": 0.004391894212593074
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/wiki_lingua_en",
|
14 |
+
"prompt_name": "tldr_en",
|
15 |
+
"rouge1_recall": 0.19475234917332057,
|
16 |
+
"dataset_path": "GEM/wiki_lingua",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_recall_stderr": 0.003183500503824711
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/wiki_lingua_en",
|
23 |
+
"prompt_name": "tldr_en",
|
24 |
+
"rouge1_fmeasure": 0.17923839306645714,
|
25 |
+
"dataset_path": "GEM/wiki_lingua",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_fmeasure_stderr": 0.002399847035078048
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/wiki_lingua_en",
|
32 |
+
"prompt_name": "tldr_en",
|
33 |
+
"rouge2_precision": 0.07985656912272211,
|
34 |
+
"dataset_path": "GEM/wiki_lingua",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge2_precision_stderr": 0.0026354691331866935
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/wiki_lingua_en",
|
41 |
+
"prompt_name": "tldr_en",
|
42 |
+
"rouge2_recall": 0.05177357998459295,
|
43 |
+
"dataset_path": "GEM/wiki_lingua",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_recall_stderr": 0.0014956995793890587
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/wiki_lingua_en",
|
50 |
+
"prompt_name": "tldr_en",
|
51 |
+
"rouge2_fmeasure": 0.047670044472559195,
|
52 |
+
"dataset_path": "GEM/wiki_lingua",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_fmeasure_stderr": 0.0012123884267024713
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/wiki_lingua_en",
|
59 |
+
"prompt_name": "tldr_en",
|
60 |
+
"rougeL_precision": 0.21163784293597465,
|
61 |
+
"dataset_path": "GEM/wiki_lingua",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rougeL_precision_stderr": 0.003752063265513303
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/wiki_lingua_en",
|
68 |
+
"prompt_name": "tldr_en",
|
69 |
+
"rougeL_recall": 0.14785575418501767,
|
70 |
+
"dataset_path": "GEM/wiki_lingua",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_recall_stderr": 0.002456148738115927
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/wiki_lingua_en",
|
77 |
+
"prompt_name": "tldr_en",
|
78 |
+
"rougeL_fmeasure": 0.13674199106912197,
|
79 |
+
"dataset_path": "GEM/wiki_lingua",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_fmeasure_stderr": 0.0018380053950085664
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/wiki_lingua_en",
|
86 |
+
"prompt_name": "tldr_en",
|
87 |
+
"rougeLsum_precision": 0.25180299854737964,
|
88 |
+
"dataset_path": "GEM/wiki_lingua",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeLsum_precision_stderr": 0.004210636657378989
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/wiki_lingua_en",
|
95 |
+
"prompt_name": "tldr_en",
|
96 |
+
"rougeLsum_recall": 0.18186826651932245,
|
97 |
+
"dataset_path": "GEM/wiki_lingua",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_recall_stderr": 0.002992911142368979
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/wiki_lingua_en",
|
104 |
+
"prompt_name": "tldr_en",
|
105 |
+
"rougeLsum_fmeasure": 0.16742344251678107,
|
106 |
+
"dataset_path": "GEM/wiki_lingua",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_fmeasure_stderr": 0.0022503819271764707
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/wiki_lingua_en",
|
113 |
+
"prompt_name": "tldr_en",
|
114 |
+
"bleu": 2.483185496130131,
|
115 |
+
"dataset_path": "GEM/wiki_lingua",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"bleu_stderr": 0.08015886448893912
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 3,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
2b855b11boscarseed1/evaluation/generation/slim.2b855b11boscarseed1_GEM-wiki_lingua_en_tldr_en_4.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/wiki_lingua_en",
|
5 |
+
"prompt_name": "tldr_en",
|
6 |
+
"rouge1_precision": 0.09408573017762623,
|
7 |
+
"dataset_path": "GEM/wiki_lingua",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"rouge1_precision_stderr": 0.0036374250034598983
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/wiki_lingua_en",
|
14 |
+
"prompt_name": "tldr_en",
|
15 |
+
"rouge1_recall": 0.062322473280787063,
|
16 |
+
"dataset_path": "GEM/wiki_lingua",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_recall_stderr": 0.0024278586980927977
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/wiki_lingua_en",
|
23 |
+
"prompt_name": "tldr_en",
|
24 |
+
"rouge1_fmeasure": 0.058650885364296294,
|
25 |
+
"dataset_path": "GEM/wiki_lingua",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_fmeasure_stderr": 0.0020654868574018297
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/wiki_lingua_en",
|
32 |
+
"prompt_name": "tldr_en",
|
33 |
+
"rouge2_precision": 0.031658574230410935,
|
34 |
+
"dataset_path": "GEM/wiki_lingua",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge2_precision_stderr": 0.002078738928518879
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/wiki_lingua_en",
|
41 |
+
"prompt_name": "tldr_en",
|
42 |
+
"rouge2_recall": 0.017760904175768037,
|
43 |
+
"dataset_path": "GEM/wiki_lingua",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_recall_stderr": 0.001047213466542941
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/wiki_lingua_en",
|
50 |
+
"prompt_name": "tldr_en",
|
51 |
+
"rouge2_fmeasure": 0.01681560917128696,
|
52 |
+
"dataset_path": "GEM/wiki_lingua",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_fmeasure_stderr": 0.0008901092196544399
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/wiki_lingua_en",
|
59 |
+
"prompt_name": "tldr_en",
|
60 |
+
"rougeL_precision": 0.0777613311142758,
|
61 |
+
"dataset_path": "GEM/wiki_lingua",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rougeL_precision_stderr": 0.0031390664493088517
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/wiki_lingua_en",
|
68 |
+
"prompt_name": "tldr_en",
|
69 |
+
"rougeL_recall": 0.0487201551142007,
|
70 |
+
"dataset_path": "GEM/wiki_lingua",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_recall_stderr": 0.0018838472253448453
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/wiki_lingua_en",
|
77 |
+
"prompt_name": "tldr_en",
|
78 |
+
"rougeL_fmeasure": 0.04642380293882198,
|
79 |
+
"dataset_path": "GEM/wiki_lingua",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_fmeasure_stderr": 0.0016260955707942933
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/wiki_lingua_en",
|
86 |
+
"prompt_name": "tldr_en",
|
87 |
+
"rougeLsum_precision": 0.0887232057330063,
|
88 |
+
"dataset_path": "GEM/wiki_lingua",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeLsum_precision_stderr": 0.0034775731184571623
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/wiki_lingua_en",
|
95 |
+
"prompt_name": "tldr_en",
|
96 |
+
"rougeLsum_recall": 0.05793373755686782,
|
97 |
+
"dataset_path": "GEM/wiki_lingua",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_recall_stderr": 0.002257623454700228
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/wiki_lingua_en",
|
104 |
+
"prompt_name": "tldr_en",
|
105 |
+
"rougeLsum_fmeasure": 0.05466679297892778,
|
106 |
+
"dataset_path": "GEM/wiki_lingua",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_fmeasure_stderr": 0.0019233345508114297
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/wiki_lingua_en",
|
113 |
+
"prompt_name": "tldr_en",
|
114 |
+
"bleu": 0.07172095791495998,
|
115 |
+
"dataset_path": "GEM/wiki_lingua",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"bleu_stderr": 0.00789659723575077
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarseeds/2b855b11boscarseed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 4,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|