Update README.md
Browse files
README.md
CHANGED
@@ -57,8 +57,8 @@ model-index:
|
|
57 |
type: in-context-reinforcement-learning
|
58 |
name: In-Context Reinforcement Learning
|
59 |
dataset:
|
60 |
-
name:
|
61 |
-
type:
|
62 |
metrics:
|
63 |
- type: total_reward
|
64 |
value: 6315.00 +/- 675.00
|
@@ -70,8 +70,8 @@ model-index:
|
|
70 |
type: in-context-reinforcement-learning
|
71 |
name: In-Context Reinforcement Learning
|
72 |
dataset:
|
73 |
-
name:
|
74 |
-
type:
|
75 |
metrics:
|
76 |
- type: total_reward
|
77 |
value: 7226.50 +/- 241.50
|
@@ -83,8 +83,8 @@ model-index:
|
|
83 |
type: in-context-reinforcement-learning
|
84 |
name: In-Context Reinforcement Learning
|
85 |
dataset:
|
86 |
-
name:
|
87 |
-
type:
|
88 |
metrics:
|
89 |
- type: total_reward
|
90 |
value: 2794.60 +/- 612.62
|
@@ -96,8 +96,8 @@ model-index:
|
|
96 |
type: in-context-reinforcement-learning
|
97 |
name: In-Context Reinforcement Learning
|
98 |
dataset:
|
99 |
-
name:
|
100 |
-
type:
|
101 |
metrics:
|
102 |
- type: total_reward
|
103 |
value: 7376.26 +/- 0.00
|
@@ -109,8 +109,8 @@ model-index:
|
|
109 |
type: in-context-reinforcement-learning
|
110 |
name: In-Context Reinforcement Learning
|
111 |
dataset:
|
112 |
-
name:
|
113 |
-
type:
|
114 |
metrics:
|
115 |
- type: total_reward
|
116 |
value: 320567.82 +/- 58462.11
|
@@ -122,8 +122,8 @@ model-index:
|
|
122 |
type: in-context-reinforcement-learning
|
123 |
name: In-Context Reinforcement Learning
|
124 |
dataset:
|
125 |
-
name:
|
126 |
-
type:
|
127 |
metrics:
|
128 |
- type: total_reward
|
129 |
value: 6105.75 +/- 4368.65
|
@@ -135,8 +135,8 @@ model-index:
|
|
135 |
type: in-context-reinforcement-learning
|
136 |
name: In-Context Reinforcement Learning
|
137 |
dataset:
|
138 |
-
name:
|
139 |
-
type:
|
140 |
metrics:
|
141 |
- type: total_reward
|
142 |
value: 1000.00 +/- 0.00
|
@@ -148,8 +148,8 @@ model-index:
|
|
148 |
type: in-context-reinforcement-learning
|
149 |
name: In-Context Reinforcement Learning
|
150 |
dataset:
|
151 |
-
name:
|
152 |
-
type:
|
153 |
metrics:
|
154 |
- type: total_reward
|
155 |
value: -37.82 +/- 8.72
|
@@ -161,8 +161,8 @@ model-index:
|
|
161 |
type: in-context-reinforcement-learning
|
162 |
name: In-Context Reinforcement Learning
|
163 |
dataset:
|
164 |
-
name:
|
165 |
-
type:
|
166 |
metrics:
|
167 |
- type: total_reward
|
168 |
value: -6.25 +/- 2.63
|
@@ -174,8 +174,8 @@ model-index:
|
|
174 |
type: in-context-reinforcement-learning
|
175 |
name: In-Context Reinforcement Learning
|
176 |
dataset:
|
177 |
-
name:
|
178 |
-
type:
|
179 |
metrics:
|
180 |
- type: total_reward
|
181 |
value: 93.20 +/- 5.40
|
@@ -187,8 +187,8 @@ model-index:
|
|
187 |
type: in-context-reinforcement-learning
|
188 |
name: In-Context Reinforcement Learning
|
189 |
dataset:
|
190 |
-
name:
|
191 |
-
type:
|
192 |
metrics:
|
193 |
- type: total_reward
|
194 |
value: 5400.00 +/- 107.95
|
@@ -200,8 +200,8 @@ model-index:
|
|
200 |
type: in-context-reinforcement-learning
|
201 |
name: In-Context Reinforcement Learning
|
202 |
dataset:
|
203 |
-
name:
|
204 |
-
type:
|
205 |
metrics:
|
206 |
- type: total_reward
|
207 |
value: 307.08 +/- 25.20
|
@@ -213,8 +213,8 @@ model-index:
|
|
213 |
type: in-context-reinforcement-learning
|
214 |
name: In-Context Reinforcement Learning
|
215 |
dataset:
|
216 |
-
name:
|
217 |
-
type:
|
218 |
metrics:
|
219 |
- type: total_reward
|
220 |
value: 568.04 +/- 60.72
|
@@ -226,8 +226,8 @@ model-index:
|
|
226 |
type: in-context-reinforcement-learning
|
227 |
name: In-Context Reinforcement Learning
|
228 |
dataset:
|
229 |
-
name:
|
230 |
-
type:
|
231 |
metrics:
|
232 |
- type: total_reward
|
233 |
value: 7.88 +/- 4.28
|
@@ -239,8 +239,8 @@ model-index:
|
|
239 |
type: in-context-reinforcement-learning
|
240 |
name: In-Context Reinforcement Learning
|
241 |
dataset:
|
242 |
-
name:
|
243 |
-
type:
|
244 |
metrics:
|
245 |
- type: total_reward
|
246 |
value: 61.75 +/- 13.54
|
@@ -252,8 +252,8 @@ model-index:
|
|
252 |
type: in-context-reinforcement-learning
|
253 |
name: In-Context Reinforcement Learning
|
254 |
dataset:
|
255 |
-
name:
|
256 |
-
type:
|
257 |
metrics:
|
258 |
- type: total_reward
|
259 |
value: 624.67 +/- 42.77
|
@@ -265,8 +265,8 @@ model-index:
|
|
265 |
type: in-context-reinforcement-learning
|
266 |
name: In-Context Reinforcement Learning
|
267 |
dataset:
|
268 |
-
name:
|
269 |
-
type:
|
270 |
metrics:
|
271 |
- type: total_reward
|
272 |
value: 449.36 +/- 62.16
|
@@ -278,8 +278,8 @@ model-index:
|
|
278 |
type: in-context-reinforcement-learning
|
279 |
name: In-Context Reinforcement Learning
|
280 |
dataset:
|
281 |
-
name:
|
282 |
-
type:
|
283 |
metrics:
|
284 |
- type: total_reward
|
285 |
value: 482.08 +/- 32.48
|
@@ -291,8 +291,8 @@ model-index:
|
|
291 |
type: in-context-reinforcement-learning
|
292 |
name: In-Context Reinforcement Learning
|
293 |
dataset:
|
294 |
-
name:
|
295 |
-
type:
|
296 |
metrics:
|
297 |
- type: total_reward
|
298 |
value: 672.00 +/- 26.48
|
@@ -304,8 +304,8 @@ model-index:
|
|
304 |
type: in-context-reinforcement-learning
|
305 |
name: In-Context Reinforcement Learning
|
306 |
dataset:
|
307 |
-
name:
|
308 |
-
type:
|
309 |
metrics:
|
310 |
- type: total_reward
|
311 |
value: 719.00 +/- 41.10
|
@@ -317,8 +317,8 @@ model-index:
|
|
317 |
type: in-context-reinforcement-learning
|
318 |
name: In-Context Reinforcement Learning
|
319 |
dataset:
|
320 |
-
name:
|
321 |
-
type:
|
322 |
metrics:
|
323 |
- type: total_reward
|
324 |
value: 26.04 +/- 56.12
|
@@ -330,8 +330,8 @@ model-index:
|
|
330 |
type: in-context-reinforcement-learning
|
331 |
name: In-Context Reinforcement Learning
|
332 |
dataset:
|
333 |
-
name:
|
334 |
-
type:
|
335 |
metrics:
|
336 |
- type: total_reward
|
337 |
value: 571.01 +/- 112.28
|
@@ -343,8 +343,8 @@ model-index:
|
|
343 |
type: in-context-reinforcement-learning
|
344 |
name: In-Context Reinforcement Learning
|
345 |
dataset:
|
346 |
-
name:
|
347 |
-
type:
|
348 |
metrics:
|
349 |
- type: total_reward
|
350 |
value: 783.90 +/- 53.17
|
@@ -356,8 +356,8 @@ model-index:
|
|
356 |
type: in-context-reinforcement-learning
|
357 |
name: In-Context Reinforcement Learning
|
358 |
dataset:
|
359 |
-
name:
|
360 |
-
type:
|
361 |
metrics:
|
362 |
- type: total_reward
|
363 |
value: 523.60 +/- 58.15
|
@@ -369,8 +369,8 @@ model-index:
|
|
369 |
type: in-context-reinforcement-learning
|
370 |
name: In-Context Reinforcement Learning
|
371 |
dataset:
|
372 |
-
name:
|
373 |
-
type:
|
374 |
metrics:
|
375 |
- type: total_reward
|
376 |
value: 538.10 +/- 25.76
|
@@ -382,8 +382,8 @@ model-index:
|
|
382 |
type: in-context-reinforcement-learning
|
383 |
name: In-Context Reinforcement Learning
|
384 |
dataset:
|
385 |
-
name:
|
386 |
-
type:
|
387 |
metrics:
|
388 |
- type: total_reward
|
389 |
value: 356.51 +/- 249.44
|
@@ -395,8 +395,8 @@ model-index:
|
|
395 |
type: in-context-reinforcement-learning
|
396 |
name: In-Context Reinforcement Learning
|
397 |
dataset:
|
398 |
-
name:
|
399 |
-
type:
|
400 |
metrics:
|
401 |
- type: total_reward
|
402 |
value: 581.33 +/- 26.33
|
@@ -408,8 +408,8 @@ model-index:
|
|
408 |
type: in-context-reinforcement-learning
|
409 |
name: In-Context Reinforcement Learning
|
410 |
dataset:
|
411 |
-
name:
|
412 |
-
type:
|
413 |
metrics:
|
414 |
- type: total_reward
|
415 |
value: 352.86 +/- 147.78
|
@@ -421,8 +421,8 @@ model-index:
|
|
421 |
type: in-context-reinforcement-learning
|
422 |
name: In-Context Reinforcement Learning
|
423 |
dataset:
|
424 |
-
name:
|
425 |
-
type:
|
426 |
metrics:
|
427 |
- type: total_reward
|
428 |
value: 838.88 +/- 7.41
|
@@ -434,8 +434,8 @@ model-index:
|
|
434 |
type: in-context-reinforcement-learning
|
435 |
name: In-Context Reinforcement Learning
|
436 |
dataset:
|
437 |
-
name:
|
438 |
-
type:
|
439 |
metrics:
|
440 |
- type: total_reward
|
441 |
value: 493.00 +/- 3.57
|
@@ -447,8 +447,8 @@ model-index:
|
|
447 |
type: in-context-reinforcement-learning
|
448 |
name: In-Context Reinforcement Learning
|
449 |
dataset:
|
450 |
-
name:
|
451 |
-
type:
|
452 |
metrics:
|
453 |
- type: total_reward
|
454 |
value: 749.46 +/- 14.83
|
@@ -460,8 +460,8 @@ model-index:
|
|
460 |
type: in-context-reinforcement-learning
|
461 |
name: In-Context Reinforcement Learning
|
462 |
dataset:
|
463 |
-
name:
|
464 |
-
type:
|
465 |
metrics:
|
466 |
- type: total_reward
|
467 |
value: 732.47 +/- 15.23
|
@@ -473,8 +473,8 @@ model-index:
|
|
473 |
type: in-context-reinforcement-learning
|
474 |
name: In-Context Reinforcement Learning
|
475 |
dataset:
|
476 |
-
name:
|
477 |
-
type:
|
478 |
metrics:
|
479 |
- type: total_reward
|
480 |
value: 669.31 +/- 69.56
|
@@ -486,8 +486,8 @@ model-index:
|
|
486 |
type: in-context-reinforcement-learning
|
487 |
name: In-Context Reinforcement Learning
|
488 |
dataset:
|
489 |
-
name:
|
490 |
-
type:
|
491 |
metrics:
|
492 |
- type: total_reward
|
493 |
value: 142.81 +/- 146.64
|
@@ -499,8 +499,8 @@ model-index:
|
|
499 |
type: in-context-reinforcement-learning
|
500 |
name: In-Context Reinforcement Learning
|
501 |
dataset:
|
502 |
-
name:
|
503 |
-
type:
|
504 |
metrics:
|
505 |
- type: total_reward
|
506 |
value: 835.30 +/- 114.19
|
@@ -512,8 +512,8 @@ model-index:
|
|
512 |
type: in-context-reinforcement-learning
|
513 |
name: In-Context Reinforcement Learning
|
514 |
dataset:
|
515 |
-
name:
|
516 |
-
type:
|
517 |
metrics:
|
518 |
- type: total_reward
|
519 |
value: 852.96 +/- 16.08
|
@@ -525,8 +525,8 @@ model-index:
|
|
525 |
type: in-context-reinforcement-learning
|
526 |
name: In-Context Reinforcement Learning
|
527 |
dataset:
|
528 |
-
name:
|
529 |
-
type:
|
530 |
metrics:
|
531 |
- type: total_reward
|
532 |
value: 701.10 +/- 13.82
|
@@ -538,8 +538,8 @@ model-index:
|
|
538 |
type: in-context-reinforcement-learning
|
539 |
name: In-Context Reinforcement Learning
|
540 |
dataset:
|
541 |
-
name:
|
542 |
-
type:
|
543 |
metrics:
|
544 |
- type: total_reward
|
545 |
value: 493.10 +/- 53.65
|
@@ -551,8 +551,8 @@ model-index:
|
|
551 |
type: in-context-reinforcement-learning
|
552 |
name: In-Context Reinforcement Learning
|
553 |
dataset:
|
554 |
-
name:
|
555 |
-
type:
|
556 |
metrics:
|
557 |
- type: total_reward
|
558 |
value: 548.72 +/- 81.12
|
@@ -564,8 +564,8 @@ model-index:
|
|
564 |
type: in-context-reinforcement-learning
|
565 |
name: In-Context Reinforcement Learning
|
566 |
dataset:
|
567 |
-
name:
|
568 |
-
type:
|
569 |
metrics:
|
570 |
- type: total_reward
|
571 |
value: 352.43 +/- 137.24
|
@@ -577,8 +577,8 @@ model-index:
|
|
577 |
type: in-context-reinforcement-learning
|
578 |
name: In-Context Reinforcement Learning
|
579 |
dataset:
|
580 |
-
name:
|
581 |
-
type:
|
582 |
metrics:
|
583 |
- type: total_reward
|
584 |
value: 401.52 +/- 175.27
|
@@ -590,8 +590,8 @@ model-index:
|
|
590 |
type: in-context-reinforcement-learning
|
591 |
name: In-Context Reinforcement Learning
|
592 |
dataset:
|
593 |
-
name:
|
594 |
-
type:
|
595 |
metrics:
|
596 |
- type: total_reward
|
597 |
value: 364.20 +/- 79.56
|
@@ -603,8 +603,8 @@ model-index:
|
|
603 |
type: in-context-reinforcement-learning
|
604 |
name: In-Context Reinforcement Learning
|
605 |
dataset:
|
606 |
-
name:
|
607 |
-
type:
|
608 |
metrics:
|
609 |
- type: total_reward
|
610 |
value: 414.02 +/- 91.10
|
@@ -616,8 +616,8 @@ model-index:
|
|
616 |
type: in-context-reinforcement-learning
|
617 |
name: In-Context Reinforcement Learning
|
618 |
dataset:
|
619 |
-
name:
|
620 |
-
type:
|
621 |
metrics:
|
622 |
- type: total_reward
|
623 |
value: 553.18 +/- 84.72
|
@@ -629,8 +629,8 @@ model-index:
|
|
629 |
type: in-context-reinforcement-learning
|
630 |
name: In-Context Reinforcement Learning
|
631 |
dataset:
|
632 |
-
name:
|
633 |
-
type:
|
634 |
metrics:
|
635 |
- type: total_reward
|
636 |
value: 531.98 +/- 156.94
|
@@ -642,8 +642,8 @@ model-index:
|
|
642 |
type: in-context-reinforcement-learning
|
643 |
name: In-Context Reinforcement Learning
|
644 |
dataset:
|
645 |
-
name:
|
646 |
-
type:
|
647 |
metrics:
|
648 |
- type: total_reward
|
649 |
value: 703.93 +/- 108.27
|
@@ -655,8 +655,8 @@ model-index:
|
|
655 |
type: in-context-reinforcement-learning
|
656 |
name: In-Context Reinforcement Learning
|
657 |
dataset:
|
658 |
-
name:
|
659 |
-
type:
|
660 |
metrics:
|
661 |
- type: total_reward
|
662 |
value: 721.29 +/- 62.15
|
@@ -668,8 +668,8 @@ model-index:
|
|
668 |
type: in-context-reinforcement-learning
|
669 |
name: In-Context Reinforcement Learning
|
670 |
dataset:
|
671 |
-
name:
|
672 |
-
type:
|
673 |
metrics:
|
674 |
- type: total_reward
|
675 |
value: 578.24 +/- 143.73
|
@@ -681,8 +681,8 @@ model-index:
|
|
681 |
type: in-context-reinforcement-learning
|
682 |
name: In-Context Reinforcement Learning
|
683 |
dataset:
|
684 |
-
name:
|
685 |
-
type:
|
686 |
metrics:
|
687 |
- type: total_reward
|
688 |
value: 729.33 +/- 104.40
|
@@ -694,8 +694,8 @@ model-index:
|
|
694 |
type: in-context-reinforcement-learning
|
695 |
name: In-Context Reinforcement Learning
|
696 |
dataset:
|
697 |
-
name:
|
698 |
-
type:
|
699 |
metrics:
|
700 |
- type: total_reward
|
701 |
value: 372.16 +/- 112.75
|
@@ -707,8 +707,8 @@ model-index:
|
|
707 |
type: in-context-reinforcement-learning
|
708 |
name: In-Context Reinforcement Learning
|
709 |
dataset:
|
710 |
-
name:
|
711 |
-
type:
|
712 |
metrics:
|
713 |
- type: total_reward
|
714 |
value: 741.68 +/- 14.84
|
@@ -720,8 +720,8 @@ model-index:
|
|
720 |
type: in-context-reinforcement-learning
|
721 |
name: In-Context Reinforcement Learning
|
722 |
dataset:
|
723 |
-
name:
|
724 |
-
type:
|
725 |
metrics:
|
726 |
- type: total_reward
|
727 |
value: 684.45 +/- 136.55
|
@@ -733,8 +733,8 @@ model-index:
|
|
733 |
type: in-context-reinforcement-learning
|
734 |
name: In-Context Reinforcement Learning
|
735 |
dataset:
|
736 |
-
name:
|
737 |
-
type:
|
738 |
metrics:
|
739 |
- type: total_reward
|
740 |
value: 738.02 +/- 100.96
|
@@ -746,8 +746,8 @@ model-index:
|
|
746 |
type: in-context-reinforcement-learning
|
747 |
name: In-Context Reinforcement Learning
|
748 |
dataset:
|
749 |
-
name:
|
750 |
-
type:
|
751 |
metrics:
|
752 |
- type: total_reward
|
753 |
value: 268.34 +/- 29.07
|
@@ -759,8 +759,8 @@ model-index:
|
|
759 |
type: in-context-reinforcement-learning
|
760 |
name: In-Context Reinforcement Learning
|
761 |
dataset:
|
762 |
-
name:
|
763 |
-
type:
|
764 |
metrics:
|
765 |
- type: total_reward
|
766 |
value: 438.44 +/- 189.63
|
@@ -772,8 +772,8 @@ model-index:
|
|
772 |
type: in-context-reinforcement-learning
|
773 |
name: In-Context Reinforcement Learning
|
774 |
dataset:
|
775 |
-
name:
|
776 |
-
type:
|
777 |
metrics:
|
778 |
- type: total_reward
|
779 |
value: 483.98 +/- 83.25
|
@@ -785,8 +785,8 @@ model-index:
|
|
785 |
type: in-context-reinforcement-learning
|
786 |
name: In-Context Reinforcement Learning
|
787 |
dataset:
|
788 |
-
name:
|
789 |
-
type:
|
790 |
metrics:
|
791 |
- type: total_reward
|
792 |
value: 563.07 +/- 173.40
|
@@ -798,8 +798,8 @@ model-index:
|
|
798 |
type: in-context-reinforcement-learning
|
799 |
name: In-Context Reinforcement Learning
|
800 |
dataset:
|
801 |
-
name:
|
802 |
-
type:
|
803 |
metrics:
|
804 |
- type: total_reward
|
805 |
value: 487.19 +/- 60.02
|
@@ -811,8 +811,8 @@ model-index:
|
|
811 |
type: in-context-reinforcement-learning
|
812 |
name: In-Context Reinforcement Learning
|
813 |
dataset:
|
814 |
-
name:
|
815 |
-
type:
|
816 |
metrics:
|
817 |
- type: total_reward
|
818 |
value: 798.80 +/- 15.62
|
@@ -824,8 +824,8 @@ model-index:
|
|
824 |
type: in-context-reinforcement-learning
|
825 |
name: In-Context Reinforcement Learning
|
826 |
dataset:
|
827 |
-
name:
|
828 |
-
type:
|
829 |
metrics:
|
830 |
- type: total_reward
|
831 |
value: 562.48 +/- 91.17
|
@@ -837,8 +837,8 @@ model-index:
|
|
837 |
type: in-context-reinforcement-learning
|
838 |
name: In-Context Reinforcement Learning
|
839 |
dataset:
|
840 |
-
name:
|
841 |
-
type:
|
842 |
metrics:
|
843 |
- type: total_reward
|
844 |
value: 573.69 +/- 93.98
|
@@ -850,8 +850,8 @@ model-index:
|
|
850 |
type: in-context-reinforcement-learning
|
851 |
name: In-Context Reinforcement Learning
|
852 |
dataset:
|
853 |
-
name:
|
854 |
-
type:
|
855 |
metrics:
|
856 |
- type: total_reward
|
857 |
value: 347.40 +/- 50.60
|
@@ -863,8 +863,8 @@ model-index:
|
|
863 |
type: in-context-reinforcement-learning
|
864 |
name: In-Context Reinforcement Learning
|
865 |
dataset:
|
866 |
-
name:
|
867 |
-
type:
|
868 |
metrics:
|
869 |
- type: total_reward
|
870 |
value: 338.25 +/- 81.25
|
@@ -876,8 +876,8 @@ model-index:
|
|
876 |
type: in-context-reinforcement-learning
|
877 |
name: In-Context Reinforcement Learning
|
878 |
dataset:
|
879 |
-
name:
|
880 |
-
type:
|
881 |
metrics:
|
882 |
- type: total_reward
|
883 |
value: 11.81 +/- 21.28
|
@@ -889,8 +889,8 @@ model-index:
|
|
889 |
type: in-context-reinforcement-learning
|
890 |
name: In-Context Reinforcement Learning
|
891 |
dataset:
|
892 |
-
name:
|
893 |
-
type:
|
894 |
metrics:
|
895 |
- type: total_reward
|
896 |
value: 31.60 +/- 7.20
|
@@ -902,8 +902,8 @@ model-index:
|
|
902 |
type: in-context-reinforcement-learning
|
903 |
name: In-Context Reinforcement Learning
|
904 |
dataset:
|
905 |
-
name:
|
906 |
-
type:
|
907 |
metrics:
|
908 |
- type: total_reward
|
909 |
value: 18.21 +/- 9.46
|
@@ -915,8 +915,8 @@ model-index:
|
|
915 |
type: in-context-reinforcement-learning
|
916 |
name: In-Context Reinforcement Learning
|
917 |
dataset:
|
918 |
-
name:
|
919 |
-
type:
|
920 |
metrics:
|
921 |
- type: total_reward
|
922 |
value: 3.97 +/- 0.15
|
@@ -928,8 +928,8 @@ model-index:
|
|
928 |
type: in-context-reinforcement-learning
|
929 |
name: In-Context Reinforcement Learning
|
930 |
dataset:
|
931 |
-
name:
|
932 |
-
type:
|
933 |
metrics:
|
934 |
- type: total_reward
|
935 |
value: 358.50 +/- 4.50
|
@@ -941,8 +941,8 @@ model-index:
|
|
941 |
type: in-context-reinforcement-learning
|
942 |
name: In-Context Reinforcement Learning
|
943 |
dataset:
|
944 |
-
name:
|
945 |
-
type:
|
946 |
metrics:
|
947 |
- type: total_reward
|
948 |
value: 108.25 +/- 8.50
|
@@ -954,8 +954,8 @@ model-index:
|
|
954 |
type: in-context-reinforcement-learning
|
955 |
name: In-Context Reinforcement Learning
|
956 |
dataset:
|
957 |
-
name:
|
958 |
-
type:
|
959 |
metrics:
|
960 |
- type: total_reward
|
961 |
value: 83.65 +/- 12.10
|
@@ -967,8 +967,8 @@ model-index:
|
|
967 |
type: in-context-reinforcement-learning
|
968 |
name: In-Context Reinforcement Learning
|
969 |
dataset:
|
970 |
-
name:
|
971 |
-
type:
|
972 |
metrics:
|
973 |
- type: total_reward
|
974 |
value: 485.15 +/- 89.10
|
@@ -980,8 +980,8 @@ model-index:
|
|
980 |
type: in-context-reinforcement-learning
|
981 |
name: In-Context Reinforcement Learning
|
982 |
dataset:
|
983 |
-
name:
|
984 |
-
type:
|
985 |
metrics:
|
986 |
- type: total_reward
|
987 |
value: -450.47 +/- 0.00
|
@@ -993,8 +993,8 @@ model-index:
|
|
993 |
type: in-context-reinforcement-learning
|
994 |
name: In-Context Reinforcement Learning
|
995 |
dataset:
|
996 |
-
name:
|
997 |
-
type:
|
998 |
metrics:
|
999 |
- type: total_reward
|
1000 |
value: 377.92 +/- 13.24
|
@@ -1006,8 +1006,8 @@ model-index:
|
|
1006 |
type: in-context-reinforcement-learning
|
1007 |
name: In-Context Reinforcement Learning
|
1008 |
dataset:
|
1009 |
-
name:
|
1010 |
-
type:
|
1011 |
metrics:
|
1012 |
- type: total_reward
|
1013 |
value: 33.01 +/- 0.96
|
@@ -1019,8 +1019,8 @@ model-index:
|
|
1019 |
type: in-context-reinforcement-learning
|
1020 |
name: In-Context Reinforcement Learning
|
1021 |
dataset:
|
1022 |
-
name:
|
1023 |
-
type:
|
1024 |
metrics:
|
1025 |
- type: total_reward
|
1026 |
value: 98.80 +/- 83.60
|
@@ -1032,8 +1032,8 @@ model-index:
|
|
1032 |
type: in-context-reinforcement-learning
|
1033 |
name: In-Context Reinforcement Learning
|
1034 |
dataset:
|
1035 |
-
name:
|
1036 |
-
type:
|
1037 |
metrics:
|
1038 |
- type: total_reward
|
1039 |
value: 445.60 +/- 2.20
|
@@ -1045,8 +1045,8 @@ model-index:
|
|
1045 |
type: in-context-reinforcement-learning
|
1046 |
name: In-Context Reinforcement Learning
|
1047 |
dataset:
|
1048 |
-
name:
|
1049 |
-
type:
|
1050 |
metrics:
|
1051 |
- type: total_reward
|
1052 |
value: 2798.00 +/- 2112.00
|
@@ -1058,8 +1058,8 @@ model-index:
|
|
1058 |
type: in-context-reinforcement-learning
|
1059 |
name: In-Context Reinforcement Learning
|
1060 |
dataset:
|
1061 |
-
name:
|
1062 |
-
type:
|
1063 |
metrics:
|
1064 |
- type: total_reward
|
1065 |
value: 747.95 +/- 7.65
|
@@ -1071,8 +1071,8 @@ model-index:
|
|
1071 |
type: in-context-reinforcement-learning
|
1072 |
name: In-Context Reinforcement Learning
|
1073 |
dataset:
|
1074 |
-
name:
|
1075 |
-
type:
|
1076 |
metrics:
|
1077 |
- type: total_reward
|
1078 |
value: 3775.50 +/- 583.70
|
@@ -1084,8 +1084,8 @@ model-index:
|
|
1084 |
type: in-context-reinforcement-learning
|
1085 |
name: In-Context Reinforcement Learning
|
1086 |
dataset:
|
1087 |
-
name:
|
1088 |
-
type:
|
1089 |
metrics:
|
1090 |
- type: total_reward
|
1091 |
value: 268.25 +/- 2.35
|
@@ -1097,8 +1097,8 @@ model-index:
|
|
1097 |
type: in-context-reinforcement-learning
|
1098 |
name: In-Context Reinforcement Learning
|
1099 |
dataset:
|
1100 |
-
name:
|
1101 |
-
type:
|
1102 |
metrics:
|
1103 |
- type: total_reward
|
1104 |
value: 2.17 +/- 0.67
|
@@ -1110,8 +1110,8 @@ model-index:
|
|
1110 |
type: in-context-reinforcement-learning
|
1111 |
name: In-Context Reinforcement Learning
|
1112 |
dataset:
|
1113 |
-
name:
|
1114 |
-
type:
|
1115 |
metrics:
|
1116 |
- type: total_reward
|
1117 |
value: -191.39 +/- 22.96
|
@@ -1123,8 +1123,8 @@ model-index:
|
|
1123 |
type: in-context-reinforcement-learning
|
1124 |
name: In-Context Reinforcement Learning
|
1125 |
dataset:
|
1126 |
-
name:
|
1127 |
-
type:
|
1128 |
metrics:
|
1129 |
- type: total_reward
|
1130 |
value: -194.01 +/- 3.66
|
@@ -1136,8 +1136,8 @@ model-index:
|
|
1136 |
type: in-context-reinforcement-learning
|
1137 |
name: In-Context Reinforcement Learning
|
1138 |
dataset:
|
1139 |
-
name:
|
1140 |
-
type:
|
1141 |
metrics:
|
1142 |
- type: total_reward
|
1143 |
value: -213.28 +/- 2.01
|
@@ -1149,8 +1149,8 @@ model-index:
|
|
1149 |
type: in-context-reinforcement-learning
|
1150 |
name: In-Context Reinforcement Learning
|
1151 |
dataset:
|
1152 |
-
name:
|
1153 |
-
type:
|
1154 |
metrics:
|
1155 |
- type: total_reward
|
1156 |
value: -227.82 +/- 4.29
|
@@ -1162,8 +1162,8 @@ model-index:
|
|
1162 |
type: in-context-reinforcement-learning
|
1163 |
name: In-Context Reinforcement Learning
|
1164 |
dataset:
|
1165 |
-
name:
|
1166 |
-
type:
|
1167 |
metrics:
|
1168 |
- type: total_reward
|
1169 |
value: -259.99 +/- 22.70
|
@@ -1175,8 +1175,8 @@ model-index:
|
|
1175 |
type: in-context-reinforcement-learning
|
1176 |
name: In-Context Reinforcement Learning
|
1177 |
dataset:
|
1178 |
-
name:
|
1179 |
-
type:
|
1180 |
metrics:
|
1181 |
- type: total_reward
|
1182 |
value: -282.28 +/- 20.70
|
@@ -1188,8 +1188,8 @@ model-index:
|
|
1188 |
type: in-context-reinforcement-learning
|
1189 |
name: In-Context Reinforcement Learning
|
1190 |
dataset:
|
1191 |
-
name:
|
1192 |
-
type:
|
1193 |
metrics:
|
1194 |
- type: total_reward
|
1195 |
value: -307.02 +/- 19.23
|
@@ -1201,8 +1201,8 @@ model-index:
|
|
1201 |
type: in-context-reinforcement-learning
|
1202 |
name: In-Context Reinforcement Learning
|
1203 |
dataset:
|
1204 |
-
name:
|
1205 |
-
type:
|
1206 |
metrics:
|
1207 |
- type: total_reward
|
1208 |
value: -314.36 +/- 5.62
|
@@ -1214,8 +1214,8 @@ model-index:
|
|
1214 |
type: in-context-reinforcement-learning
|
1215 |
name: In-Context Reinforcement Learning
|
1216 |
dataset:
|
1217 |
-
name:
|
1218 |
-
type:
|
1219 |
metrics:
|
1220 |
- type: total_reward
|
1221 |
value: -339.34 +/- 9.57
|
@@ -1227,8 +1227,8 @@ model-index:
|
|
1227 |
type: in-context-reinforcement-learning
|
1228 |
name: In-Context Reinforcement Learning
|
1229 |
dataset:
|
1230 |
-
name:
|
1231 |
-
type:
|
1232 |
metrics:
|
1233 |
- type: total_reward
|
1234 |
value: -366.63 +/- 7.47
|
@@ -1240,8 +1240,8 @@ model-index:
|
|
1240 |
type: in-context-reinforcement-learning
|
1241 |
name: In-Context Reinforcement Learning
|
1242 |
dataset:
|
1243 |
-
name:
|
1244 |
-
type:
|
1245 |
metrics:
|
1246 |
- type: total_reward
|
1247 |
value: -395.94 +/- 17.65
|
@@ -1253,8 +1253,8 @@ model-index:
|
|
1253 |
type: in-context-reinforcement-learning
|
1254 |
name: In-Context Reinforcement Learning
|
1255 |
dataset:
|
1256 |
-
name:
|
1257 |
-
type:
|
1258 |
metrics:
|
1259 |
- type: total_reward
|
1260 |
value: -403.73 +/- 2.03
|
@@ -1266,8 +1266,8 @@ model-index:
|
|
1266 |
type: in-context-reinforcement-learning
|
1267 |
name: In-Context Reinforcement Learning
|
1268 |
dataset:
|
1269 |
-
name:
|
1270 |
-
type:
|
1271 |
metrics:
|
1272 |
- type: total_reward
|
1273 |
value: -434.25 +/- 4.12
|
@@ -1279,8 +1279,8 @@ model-index:
|
|
1279 |
type: in-context-reinforcement-learning
|
1280 |
name: In-Context Reinforcement Learning
|
1281 |
dataset:
|
1282 |
-
name:
|
1283 |
-
type:
|
1284 |
metrics:
|
1285 |
- type: total_reward
|
1286 |
value: -480.31 +/- 8.63
|
@@ -1292,8 +1292,8 @@ model-index:
|
|
1292 |
type: in-context-reinforcement-learning
|
1293 |
name: In-Context Reinforcement Learning
|
1294 |
dataset:
|
1295 |
-
name:
|
1296 |
-
type:
|
1297 |
metrics:
|
1298 |
- type: total_reward
|
1299 |
value: -480.76 +/- 5.98
|
@@ -1305,8 +1305,8 @@ model-index:
|
|
1305 |
type: in-context-reinforcement-learning
|
1306 |
name: In-Context Reinforcement Learning
|
1307 |
dataset:
|
1308 |
-
name:
|
1309 |
-
type:
|
1310 |
metrics:
|
1311 |
- type: total_reward
|
1312 |
value: -476.83 +/- 2.44
|
@@ -1318,8 +1318,8 @@ model-index:
|
|
1318 |
type: in-context-reinforcement-learning
|
1319 |
name: In-Context Reinforcement Learning
|
1320 |
dataset:
|
1321 |
-
name:
|
1322 |
-
type:
|
1323 |
metrics:
|
1324 |
- type: total_reward
|
1325 |
value: -497.13 +/- 2.95
|
@@ -1331,8 +1331,8 @@ model-index:
|
|
1331 |
type: in-context-reinforcement-learning
|
1332 |
name: In-Context Reinforcement Learning
|
1333 |
dataset:
|
1334 |
-
name:
|
1335 |
-
type:
|
1336 |
metrics:
|
1337 |
- type: total_reward
|
1338 |
value: -513.83 +/- 3.06
|
@@ -1344,8 +1344,8 @@ model-index:
|
|
1344 |
type: in-context-reinforcement-learning
|
1345 |
name: In-Context Reinforcement Learning
|
1346 |
dataset:
|
1347 |
-
name:
|
1348 |
-
type:
|
1349 |
metrics:
|
1350 |
- type: total_reward
|
1351 |
value: -532.70 +/- 3.61
|
@@ -1357,8 +1357,8 @@ model-index:
|
|
1357 |
type: in-context-reinforcement-learning
|
1358 |
name: In-Context Reinforcement Learning
|
1359 |
dataset:
|
1360 |
-
name:
|
1361 |
-
type:
|
1362 |
metrics:
|
1363 |
- type: total_reward
|
1364 |
value: -557.42 +/- 3.81
|
@@ -1370,8 +1370,8 @@ model-index:
|
|
1370 |
type: in-context-reinforcement-learning
|
1371 |
name: In-Context Reinforcement Learning
|
1372 |
dataset:
|
1373 |
-
name:
|
1374 |
-
type:
|
1375 |
metrics:
|
1376 |
- type: total_reward
|
1377 |
value: -574.57 +/- 4.37
|
|
|
57 |
type: in-context-reinforcement-learning
|
58 |
name: In-Context Reinforcement Learning
|
59 |
dataset:
|
60 |
+
name: ant_v4
|
61 |
+
type: MuJoCo
|
62 |
metrics:
|
63 |
- type: total_reward
|
64 |
value: 6315.00 +/- 675.00
|
|
|
70 |
type: in-context-reinforcement-learning
|
71 |
name: In-Context Reinforcement Learning
|
72 |
dataset:
|
73 |
+
name: halfcheetah_v4
|
74 |
+
type: MuJoCo
|
75 |
metrics:
|
76 |
- type: total_reward
|
77 |
value: 7226.50 +/- 241.50
|
|
|
83 |
type: in-context-reinforcement-learning
|
84 |
name: In-Context Reinforcement Learning
|
85 |
dataset:
|
86 |
+
name: hopper_v4
|
87 |
+
type: MuJoCo
|
88 |
metrics:
|
89 |
- type: total_reward
|
90 |
value: 2794.60 +/- 612.62
|
|
|
96 |
type: in-context-reinforcement-learning
|
97 |
name: In-Context Reinforcement Learning
|
98 |
dataset:
|
99 |
+
name: humanoid_v4
|
100 |
+
type: MuJoCo
|
101 |
metrics:
|
102 |
- type: total_reward
|
103 |
value: 7376.26 +/- 0.00
|
|
|
109 |
type: in-context-reinforcement-learning
|
110 |
name: In-Context Reinforcement Learning
|
111 |
dataset:
|
112 |
+
name: humanoidstandup_v4
|
113 |
+
type: MuJoCo
|
114 |
metrics:
|
115 |
- type: total_reward
|
116 |
value: 320567.82 +/- 58462.11
|
|
|
122 |
type: in-context-reinforcement-learning
|
123 |
name: In-Context Reinforcement Learning
|
124 |
dataset:
|
125 |
+
name: inverteddoublependulum_v4
|
126 |
+
type: MuJoCo
|
127 |
metrics:
|
128 |
- type: total_reward
|
129 |
value: 6105.75 +/- 4368.65
|
|
|
135 |
type: in-context-reinforcement-learning
|
136 |
name: In-Context Reinforcement Learning
|
137 |
dataset:
|
138 |
+
name: invertedpendulum_v4
|
139 |
+
type: MuJoCo
|
140 |
metrics:
|
141 |
- type: total_reward
|
142 |
value: 1000.00 +/- 0.00
|
|
|
148 |
type: in-context-reinforcement-learning
|
149 |
name: In-Context Reinforcement Learning
|
150 |
dataset:
|
151 |
+
name: pusher_v4
|
152 |
+
type: MuJoCo
|
153 |
metrics:
|
154 |
- type: total_reward
|
155 |
value: -37.82 +/- 8.72
|
|
|
161 |
type: in-context-reinforcement-learning
|
162 |
name: In-Context Reinforcement Learning
|
163 |
dataset:
|
164 |
+
name: reacher_v4
|
165 |
+
type: MuJoCo
|
166 |
metrics:
|
167 |
- type: total_reward
|
168 |
value: -6.25 +/- 2.63
|
|
|
174 |
type: in-context-reinforcement-learning
|
175 |
name: In-Context Reinforcement Learning
|
176 |
dataset:
|
177 |
+
name: swimmer_v4
|
178 |
+
type: MuJoCo
|
179 |
metrics:
|
180 |
- type: total_reward
|
181 |
value: 93.20 +/- 5.40
|
|
|
187 |
type: in-context-reinforcement-learning
|
188 |
name: In-Context Reinforcement Learning
|
189 |
dataset:
|
190 |
+
name: walker2d_v4
|
191 |
+
type: MuJoCo
|
192 |
metrics:
|
193 |
- type: total_reward
|
194 |
value: 5400.00 +/- 107.95
|
|
|
200 |
type: in-context-reinforcement-learning
|
201 |
name: In-Context Reinforcement Learning
|
202 |
dataset:
|
203 |
+
name: assembly-v2
|
204 |
+
type: Meta-World
|
205 |
metrics:
|
206 |
- type: total_reward
|
207 |
value: 307.08 +/- 25.20
|
|
|
213 |
type: in-context-reinforcement-learning
|
214 |
name: In-Context Reinforcement Learning
|
215 |
dataset:
|
216 |
+
name: basketball-v2
|
217 |
+
type: Meta-World
|
218 |
metrics:
|
219 |
- type: total_reward
|
220 |
value: 568.04 +/- 60.72
|
|
|
226 |
type: in-context-reinforcement-learning
|
227 |
name: In-Context Reinforcement Learning
|
228 |
dataset:
|
229 |
+
name: bin-picking-v2
|
230 |
+
type: Meta-World
|
231 |
metrics:
|
232 |
- type: total_reward
|
233 |
value: 7.88 +/- 4.28
|
|
|
239 |
type: in-context-reinforcement-learning
|
240 |
name: In-Context Reinforcement Learning
|
241 |
dataset:
|
242 |
+
name: box-close-v2
|
243 |
+
type: Meta-World
|
244 |
metrics:
|
245 |
- type: total_reward
|
246 |
value: 61.75 +/- 13.54
|
|
|
252 |
type: in-context-reinforcement-learning
|
253 |
name: In-Context Reinforcement Learning
|
254 |
dataset:
|
255 |
+
name: button-press-v2
|
256 |
+
type: Meta-World
|
257 |
metrics:
|
258 |
- type: total_reward
|
259 |
value: 624.67 +/- 42.77
|
|
|
265 |
type: in-context-reinforcement-learning
|
266 |
name: In-Context Reinforcement Learning
|
267 |
dataset:
|
268 |
+
name: button-press-topdown-v2
|
269 |
+
type: Meta-World
|
270 |
metrics:
|
271 |
- type: total_reward
|
272 |
value: 449.36 +/- 62.16
|
|
|
278 |
type: in-context-reinforcement-learning
|
279 |
name: In-Context Reinforcement Learning
|
280 |
dataset:
|
281 |
+
name: button-press-topdown-wall-v2
|
282 |
+
type: Meta-World
|
283 |
metrics:
|
284 |
- type: total_reward
|
285 |
value: 482.08 +/- 32.48
|
|
|
291 |
type: in-context-reinforcement-learning
|
292 |
name: In-Context Reinforcement Learning
|
293 |
dataset:
|
294 |
+
name: button-press-wall-v2
|
295 |
+
type: Meta-World
|
296 |
metrics:
|
297 |
- type: total_reward
|
298 |
value: 672.00 +/- 26.48
|
|
|
304 |
type: in-context-reinforcement-learning
|
305 |
name: In-Context Reinforcement Learning
|
306 |
dataset:
|
307 |
+
name: coffee-button-v2
|
308 |
+
type: Meta-World
|
309 |
metrics:
|
310 |
- type: total_reward
|
311 |
value: 719.00 +/- 41.10
|
|
|
317 |
type: in-context-reinforcement-learning
|
318 |
name: In-Context Reinforcement Learning
|
319 |
dataset:
|
320 |
+
name: coffee-pull-v2
|
321 |
+
type: Meta-World
|
322 |
metrics:
|
323 |
- type: total_reward
|
324 |
value: 26.04 +/- 56.12
|
|
|
330 |
type: in-context-reinforcement-learning
|
331 |
name: In-Context Reinforcement Learning
|
332 |
dataset:
|
333 |
+
name: coffee-push-v2
|
334 |
+
type: Meta-World
|
335 |
metrics:
|
336 |
- type: total_reward
|
337 |
value: 571.01 +/- 112.28
|
|
|
343 |
type: in-context-reinforcement-learning
|
344 |
name: In-Context Reinforcement Learning
|
345 |
dataset:
|
346 |
+
name: dial-turn-v2
|
347 |
+
type: Meta-World
|
348 |
metrics:
|
349 |
- type: total_reward
|
350 |
value: 783.90 +/- 53.17
|
|
|
356 |
type: in-context-reinforcement-learning
|
357 |
name: In-Context Reinforcement Learning
|
358 |
dataset:
|
359 |
+
name: disassemble-v2
|
360 |
+
type: Meta-World
|
361 |
metrics:
|
362 |
- type: total_reward
|
363 |
value: 523.60 +/- 58.15
|
|
|
369 |
type: in-context-reinforcement-learning
|
370 |
name: In-Context Reinforcement Learning
|
371 |
dataset:
|
372 |
+
name: door-close-v2
|
373 |
+
type: Meta-World
|
374 |
metrics:
|
375 |
- type: total_reward
|
376 |
value: 538.10 +/- 25.76
|
|
|
382 |
type: in-context-reinforcement-learning
|
383 |
name: In-Context Reinforcement Learning
|
384 |
dataset:
|
385 |
+
name: door-lock-v2
|
386 |
+
type: Meta-World
|
387 |
metrics:
|
388 |
- type: total_reward
|
389 |
value: 356.51 +/- 249.44
|
|
|
395 |
type: in-context-reinforcement-learning
|
396 |
name: In-Context Reinforcement Learning
|
397 |
dataset:
|
398 |
+
name: door-open-v2
|
399 |
+
type: Meta-World
|
400 |
metrics:
|
401 |
- type: total_reward
|
402 |
value: 581.33 +/- 26.33
|
|
|
408 |
type: in-context-reinforcement-learning
|
409 |
name: In-Context Reinforcement Learning
|
410 |
dataset:
|
411 |
+
name: door-unlock-v2
|
412 |
+
type: Meta-World
|
413 |
metrics:
|
414 |
- type: total_reward
|
415 |
value: 352.86 +/- 147.78
|
|
|
421 |
type: in-context-reinforcement-learning
|
422 |
name: In-Context Reinforcement Learning
|
423 |
dataset:
|
424 |
+
name: drawer-close-v2
|
425 |
+
type: Meta-World
|
426 |
metrics:
|
427 |
- type: total_reward
|
428 |
value: 838.88 +/- 7.41
|
|
|
434 |
type: in-context-reinforcement-learning
|
435 |
name: In-Context Reinforcement Learning
|
436 |
dataset:
|
437 |
+
name: drawer-open-v2
|
438 |
+
type: Meta-World
|
439 |
metrics:
|
440 |
- type: total_reward
|
441 |
value: 493.00 +/- 3.57
|
|
|
447 |
type: in-context-reinforcement-learning
|
448 |
name: In-Context Reinforcement Learning
|
449 |
dataset:
|
450 |
+
name: faucet-close-v2
|
451 |
+
type: Meta-World
|
452 |
metrics:
|
453 |
- type: total_reward
|
454 |
value: 749.46 +/- 14.83
|
|
|
460 |
type: in-context-reinforcement-learning
|
461 |
name: In-Context Reinforcement Learning
|
462 |
dataset:
|
463 |
+
name: faucet-open-v2
|
464 |
+
type: Meta-World
|
465 |
metrics:
|
466 |
- type: total_reward
|
467 |
value: 732.47 +/- 15.23
|
|
|
473 |
type: in-context-reinforcement-learning
|
474 |
name: In-Context Reinforcement Learning
|
475 |
dataset:
|
476 |
+
name: hammer-v2
|
477 |
+
type: Meta-World
|
478 |
metrics:
|
479 |
- type: total_reward
|
480 |
value: 669.31 +/- 69.56
|
|
|
486 |
type: in-context-reinforcement-learning
|
487 |
name: In-Context Reinforcement Learning
|
488 |
dataset:
|
489 |
+
name: hand-insert-v2
|
490 |
+
type: Meta-World
|
491 |
metrics:
|
492 |
- type: total_reward
|
493 |
value: 142.81 +/- 146.64
|
|
|
499 |
type: in-context-reinforcement-learning
|
500 |
name: In-Context Reinforcement Learning
|
501 |
dataset:
|
502 |
+
name: handle-press-v2
|
503 |
+
type: Meta-World
|
504 |
metrics:
|
505 |
- type: total_reward
|
506 |
value: 835.30 +/- 114.19
|
|
|
512 |
type: in-context-reinforcement-learning
|
513 |
name: In-Context Reinforcement Learning
|
514 |
dataset:
|
515 |
+
name: handle-press-side-v2
|
516 |
+
type: Meta-World
|
517 |
metrics:
|
518 |
- type: total_reward
|
519 |
value: 852.96 +/- 16.08
|
|
|
525 |
type: in-context-reinforcement-learning
|
526 |
name: In-Context Reinforcement Learning
|
527 |
dataset:
|
528 |
+
name: handle-pull-v2
|
529 |
+
type: Meta-World
|
530 |
metrics:
|
531 |
- type: total_reward
|
532 |
value: 701.10 +/- 13.82
|
|
|
538 |
type: in-context-reinforcement-learning
|
539 |
name: In-Context Reinforcement Learning
|
540 |
dataset:
|
541 |
+
name: handle-pull-side-v2
|
542 |
+
type: Meta-World
|
543 |
metrics:
|
544 |
- type: total_reward
|
545 |
value: 493.10 +/- 53.65
|
|
|
551 |
type: in-context-reinforcement-learning
|
552 |
name: In-Context Reinforcement Learning
|
553 |
dataset:
|
554 |
+
name: lever-pull-v2
|
555 |
+
type: Meta-World
|
556 |
metrics:
|
557 |
- type: total_reward
|
558 |
value: 548.72 +/- 81.12
|
|
|
564 |
type: in-context-reinforcement-learning
|
565 |
name: In-Context Reinforcement Learning
|
566 |
dataset:
|
567 |
+
name: peg-insert-side-v2
|
568 |
+
type: Meta-World
|
569 |
metrics:
|
570 |
- type: total_reward
|
571 |
value: 352.43 +/- 137.24
|
|
|
577 |
type: in-context-reinforcement-learning
|
578 |
name: In-Context Reinforcement Learning
|
579 |
dataset:
|
580 |
+
name: peg-unplug-side-v2
|
581 |
+
type: Meta-World
|
582 |
metrics:
|
583 |
- type: total_reward
|
584 |
value: 401.52 +/- 175.27
|
|
|
590 |
type: in-context-reinforcement-learning
|
591 |
name: In-Context Reinforcement Learning
|
592 |
dataset:
|
593 |
+
name: pick-out-of-hole-v2
|
594 |
+
type: Meta-World
|
595 |
metrics:
|
596 |
- type: total_reward
|
597 |
value: 364.20 +/- 79.56
|
|
|
603 |
type: in-context-reinforcement-learning
|
604 |
name: In-Context Reinforcement Learning
|
605 |
dataset:
|
606 |
+
name: pick-place-v2
|
607 |
+
type: Meta-World
|
608 |
metrics:
|
609 |
- type: total_reward
|
610 |
value: 414.02 +/- 91.10
|
|
|
616 |
type: in-context-reinforcement-learning
|
617 |
name: In-Context Reinforcement Learning
|
618 |
dataset:
|
619 |
+
name: pick-place-wall-v2
|
620 |
+
type: Meta-World
|
621 |
metrics:
|
622 |
- type: total_reward
|
623 |
value: 553.18 +/- 84.72
|
|
|
629 |
type: in-context-reinforcement-learning
|
630 |
name: In-Context Reinforcement Learning
|
631 |
dataset:
|
632 |
+
name: plate-slide-v2
|
633 |
+
type: Meta-World
|
634 |
metrics:
|
635 |
- type: total_reward
|
636 |
value: 531.98 +/- 156.94
|
|
|
642 |
type: in-context-reinforcement-learning
|
643 |
name: In-Context Reinforcement Learning
|
644 |
dataset:
|
645 |
+
name: plate-slide-back-v2
|
646 |
+
type: Meta-World
|
647 |
metrics:
|
648 |
- type: total_reward
|
649 |
value: 703.93 +/- 108.27
|
|
|
655 |
type: in-context-reinforcement-learning
|
656 |
name: In-Context Reinforcement Learning
|
657 |
dataset:
|
658 |
+
name: plate-slide-back-side-v2
|
659 |
+
type: Meta-World
|
660 |
metrics:
|
661 |
- type: total_reward
|
662 |
value: 721.29 +/- 62.15
|
|
|
668 |
type: in-context-reinforcement-learning
|
669 |
name: In-Context Reinforcement Learning
|
670 |
dataset:
|
671 |
+
name: plate-slide-side-v2
|
672 |
+
type: Meta-World
|
673 |
metrics:
|
674 |
- type: total_reward
|
675 |
value: 578.24 +/- 143.73
|
|
|
681 |
type: in-context-reinforcement-learning
|
682 |
name: In-Context Reinforcement Learning
|
683 |
dataset:
|
684 |
+
name: push-v2
|
685 |
+
type: Meta-World
|
686 |
metrics:
|
687 |
- type: total_reward
|
688 |
value: 729.33 +/- 104.40
|
|
|
694 |
type: in-context-reinforcement-learning
|
695 |
name: In-Context Reinforcement Learning
|
696 |
dataset:
|
697 |
+
name: push-back-v2
|
698 |
+
type: Meta-World
|
699 |
metrics:
|
700 |
- type: total_reward
|
701 |
value: 372.16 +/- 112.75
|
|
|
707 |
type: in-context-reinforcement-learning
|
708 |
name: In-Context Reinforcement Learning
|
709 |
dataset:
|
710 |
+
name: push-wall-v2
|
711 |
+
type: Meta-World
|
712 |
metrics:
|
713 |
- type: total_reward
|
714 |
value: 741.68 +/- 14.84
|
|
|
720 |
type: in-context-reinforcement-learning
|
721 |
name: In-Context Reinforcement Learning
|
722 |
dataset:
|
723 |
+
name: reach-v2
|
724 |
+
type: Meta-World
|
725 |
metrics:
|
726 |
- type: total_reward
|
727 |
value: 684.45 +/- 136.55
|
|
|
733 |
type: in-context-reinforcement-learning
|
734 |
name: In-Context Reinforcement Learning
|
735 |
dataset:
|
736 |
+
name: reach-wall-v2
|
737 |
+
type: Meta-World
|
738 |
metrics:
|
739 |
- type: total_reward
|
740 |
value: 738.02 +/- 100.96
|
|
|
746 |
type: in-context-reinforcement-learning
|
747 |
name: In-Context Reinforcement Learning
|
748 |
dataset:
|
749 |
+
name: shelf-place-v2
|
750 |
+
type: Meta-World
|
751 |
metrics:
|
752 |
- type: total_reward
|
753 |
value: 268.34 +/- 29.07
|
|
|
759 |
type: in-context-reinforcement-learning
|
760 |
name: In-Context Reinforcement Learning
|
761 |
dataset:
|
762 |
+
name: soccer-v2
|
763 |
+
type: Meta-World
|
764 |
metrics:
|
765 |
- type: total_reward
|
766 |
value: 438.44 +/- 189.63
|
|
|
772 |
type: in-context-reinforcement-learning
|
773 |
name: In-Context Reinforcement Learning
|
774 |
dataset:
|
775 |
+
name: stick-pull-v2
|
776 |
+
type: Meta-World
|
777 |
metrics:
|
778 |
- type: total_reward
|
779 |
value: 483.98 +/- 83.25
|
|
|
785 |
type: in-context-reinforcement-learning
|
786 |
name: In-Context Reinforcement Learning
|
787 |
dataset:
|
788 |
+
name: stick-push-v2
|
789 |
+
type: Meta-World
|
790 |
metrics:
|
791 |
- type: total_reward
|
792 |
value: 563.07 +/- 173.40
|
|
|
798 |
type: in-context-reinforcement-learning
|
799 |
name: In-Context Reinforcement Learning
|
800 |
dataset:
|
801 |
+
name: sweep-v2
|
802 |
+
type: Meta-World
|
803 |
metrics:
|
804 |
- type: total_reward
|
805 |
value: 487.19 +/- 60.02
|
|
|
811 |
type: in-context-reinforcement-learning
|
812 |
name: In-Context Reinforcement Learning
|
813 |
dataset:
|
814 |
+
name: sweep-into-v2
|
815 |
+
type: Meta-World
|
816 |
metrics:
|
817 |
- type: total_reward
|
818 |
value: 798.80 +/- 15.62
|
|
|
824 |
type: in-context-reinforcement-learning
|
825 |
name: In-Context Reinforcement Learning
|
826 |
dataset:
|
827 |
+
name: window-close-v2
|
828 |
+
type: Meta-World
|
829 |
metrics:
|
830 |
- type: total_reward
|
831 |
value: 562.48 +/- 91.17
|
|
|
837 |
type: in-context-reinforcement-learning
|
838 |
name: In-Context Reinforcement Learning
|
839 |
dataset:
|
840 |
+
name: window-open-v2
|
841 |
+
type: Meta-World
|
842 |
metrics:
|
843 |
- type: total_reward
|
844 |
value: 573.69 +/- 93.98
|
|
|
850 |
type: in-context-reinforcement-learning
|
851 |
name: In-Context Reinforcement Learning
|
852 |
dataset:
|
853 |
+
name: shadowhandblockstack
|
854 |
+
type: Bi-DexHands
|
855 |
metrics:
|
856 |
- type: total_reward
|
857 |
value: 347.40 +/- 50.60
|
|
|
863 |
type: in-context-reinforcement-learning
|
864 |
name: In-Context Reinforcement Learning
|
865 |
dataset:
|
866 |
+
name: shadowhandbottlecap
|
867 |
+
type: Bi-DexHands
|
868 |
metrics:
|
869 |
- type: total_reward
|
870 |
value: 338.25 +/- 81.25
|
|
|
876 |
type: in-context-reinforcement-learning
|
877 |
name: In-Context Reinforcement Learning
|
878 |
dataset:
|
879 |
+
name: shadowhandcatchabreast
|
880 |
+
type: Bi-DexHands
|
881 |
metrics:
|
882 |
- type: total_reward
|
883 |
value: 11.81 +/- 21.28
|
|
|
889 |
type: in-context-reinforcement-learning
|
890 |
name: In-Context Reinforcement Learning
|
891 |
dataset:
|
892 |
+
name: shadowhandcatchover2underarm
|
893 |
+
type: Bi-DexHands
|
894 |
metrics:
|
895 |
- type: total_reward
|
896 |
value: 31.60 +/- 7.20
|
|
|
902 |
type: in-context-reinforcement-learning
|
903 |
name: In-Context Reinforcement Learning
|
904 |
dataset:
|
905 |
+
name: shadowhandcatchunderarm
|
906 |
+
type: Bi-DexHands
|
907 |
metrics:
|
908 |
- type: total_reward
|
909 |
value: 18.21 +/- 9.46
|
|
|
915 |
type: in-context-reinforcement-learning
|
916 |
name: In-Context Reinforcement Learning
|
917 |
dataset:
|
918 |
+
name: shadowhanddoorcloseinward
|
919 |
+
type: Bi-DexHands
|
920 |
metrics:
|
921 |
- type: total_reward
|
922 |
value: 3.97 +/- 0.15
|
|
|
928 |
type: in-context-reinforcement-learning
|
929 |
name: In-Context Reinforcement Learning
|
930 |
dataset:
|
931 |
+
name: shadowhanddoorcloseoutward
|
932 |
+
type: Bi-DexHands
|
933 |
metrics:
|
934 |
- type: total_reward
|
935 |
value: 358.50 +/- 4.50
|
|
|
941 |
type: in-context-reinforcement-learning
|
942 |
name: In-Context Reinforcement Learning
|
943 |
dataset:
|
944 |
+
name: shadowhanddooropeninward
|
945 |
+
type: Bi-DexHands
|
946 |
metrics:
|
947 |
- type: total_reward
|
948 |
value: 108.25 +/- 8.50
|
|
|
954 |
type: in-context-reinforcement-learning
|
955 |
name: In-Context Reinforcement Learning
|
956 |
dataset:
|
957 |
+
name: shadowhanddooropenoutward
|
958 |
+
type: Bi-DexHands
|
959 |
metrics:
|
960 |
- type: total_reward
|
961 |
value: 83.65 +/- 12.10
|
|
|
967 |
type: in-context-reinforcement-learning
|
968 |
name: In-Context Reinforcement Learning
|
969 |
dataset:
|
970 |
+
name: shadowhandgraspandplace
|
971 |
+
type: Bi-DexHands
|
972 |
metrics:
|
973 |
- type: total_reward
|
974 |
value: 485.15 +/- 89.10
|
|
|
980 |
type: in-context-reinforcement-learning
|
981 |
name: In-Context Reinforcement Learning
|
982 |
dataset:
|
983 |
+
name: shadowhandkettle
|
984 |
+
type: Bi-DexHands
|
985 |
metrics:
|
986 |
- type: total_reward
|
987 |
value: -450.47 +/- 0.00
|
|
|
993 |
type: in-context-reinforcement-learning
|
994 |
name: In-Context Reinforcement Learning
|
995 |
dataset:
|
996 |
+
name: shadowhandliftunderarm
|
997 |
+
type: Bi-DexHands
|
998 |
metrics:
|
999 |
- type: total_reward
|
1000 |
value: 377.92 +/- 13.24
|
|
|
1006 |
type: in-context-reinforcement-learning
|
1007 |
name: In-Context Reinforcement Learning
|
1008 |
dataset:
|
1009 |
+
name: shadowhandover
|
1010 |
+
type: Bi-DexHands
|
1011 |
metrics:
|
1012 |
- type: total_reward
|
1013 |
value: 33.01 +/- 0.96
|
|
|
1019 |
type: in-context-reinforcement-learning
|
1020 |
name: In-Context Reinforcement Learning
|
1021 |
dataset:
|
1022 |
+
name: shadowhandpen
|
1023 |
+
type: Bi-DexHands
|
1024 |
metrics:
|
1025 |
- type: total_reward
|
1026 |
value: 98.80 +/- 83.60
|
|
|
1032 |
type: in-context-reinforcement-learning
|
1033 |
name: In-Context Reinforcement Learning
|
1034 |
dataset:
|
1035 |
+
name: shadowhandpushblock
|
1036 |
+
type: Bi-DexHands
|
1037 |
metrics:
|
1038 |
- type: total_reward
|
1039 |
value: 445.60 +/- 2.20
|
|
|
1045 |
type: in-context-reinforcement-learning
|
1046 |
name: In-Context Reinforcement Learning
|
1047 |
dataset:
|
1048 |
+
name: shadowhandreorientation
|
1049 |
+
type: Bi-DexHands
|
1050 |
metrics:
|
1051 |
- type: total_reward
|
1052 |
value: 2798.00 +/- 2112.00
|
|
|
1058 |
type: in-context-reinforcement-learning
|
1059 |
name: In-Context Reinforcement Learning
|
1060 |
dataset:
|
1061 |
+
name: shadowhandscissors
|
1062 |
+
type: Bi-DexHands
|
1063 |
metrics:
|
1064 |
- type: total_reward
|
1065 |
value: 747.95 +/- 7.65
|
|
|
1071 |
type: in-context-reinforcement-learning
|
1072 |
name: In-Context Reinforcement Learning
|
1073 |
dataset:
|
1074 |
+
name: shadowhandswingcup
|
1075 |
+
type: Bi-DexHands
|
1076 |
metrics:
|
1077 |
- type: total_reward
|
1078 |
value: 3775.50 +/- 583.70
|
|
|
1084 |
type: in-context-reinforcement-learning
|
1085 |
name: In-Context Reinforcement Learning
|
1086 |
dataset:
|
1087 |
+
name: shadowhandswitch
|
1088 |
+
type: Bi-DexHands
|
1089 |
metrics:
|
1090 |
- type: total_reward
|
1091 |
value: 268.25 +/- 2.35
|
|
|
1097 |
type: in-context-reinforcement-learning
|
1098 |
name: In-Context Reinforcement Learning
|
1099 |
dataset:
|
1100 |
+
name: shadowhandtwocatchunderarm
|
1101 |
+
type: Bi-DexHands
|
1102 |
metrics:
|
1103 |
- type: total_reward
|
1104 |
value: 2.17 +/- 0.67
|
|
|
1110 |
type: in-context-reinforcement-learning
|
1111 |
name: In-Context Reinforcement Learning
|
1112 |
dataset:
|
1113 |
+
name: industrial-benchmark-0-v1
|
1114 |
+
type: Industrial-Benchmark
|
1115 |
metrics:
|
1116 |
- type: total_reward
|
1117 |
value: -191.39 +/- 22.96
|
|
|
1123 |
type: in-context-reinforcement-learning
|
1124 |
name: In-Context Reinforcement Learning
|
1125 |
dataset:
|
1126 |
+
name: industrial-benchmark-5-v1
|
1127 |
+
type: Industrial-Benchmark
|
1128 |
metrics:
|
1129 |
- type: total_reward
|
1130 |
value: -194.01 +/- 3.66
|
|
|
1136 |
type: in-context-reinforcement-learning
|
1137 |
name: In-Context Reinforcement Learning
|
1138 |
dataset:
|
1139 |
+
name: industrial-benchmark-10-v1
|
1140 |
+
type: Industrial-Benchmark
|
1141 |
metrics:
|
1142 |
- type: total_reward
|
1143 |
value: -213.28 +/- 2.01
|
|
|
1149 |
type: in-context-reinforcement-learning
|
1150 |
name: In-Context Reinforcement Learning
|
1151 |
dataset:
|
1152 |
+
name: industrial-benchmark-15-v1
|
1153 |
+
type: Industrial-Benchmark
|
1154 |
metrics:
|
1155 |
- type: total_reward
|
1156 |
value: -227.82 +/- 4.29
|
|
|
1162 |
type: in-context-reinforcement-learning
|
1163 |
name: In-Context Reinforcement Learning
|
1164 |
dataset:
|
1165 |
+
name: industrial-benchmark-20-v1
|
1166 |
+
type: Industrial-Benchmark
|
1167 |
metrics:
|
1168 |
- type: total_reward
|
1169 |
value: -259.99 +/- 22.70
|
|
|
1175 |
type: in-context-reinforcement-learning
|
1176 |
name: In-Context Reinforcement Learning
|
1177 |
dataset:
|
1178 |
+
name: industrial-benchmark-25-v1
|
1179 |
+
type: Industrial-Benchmark
|
1180 |
metrics:
|
1181 |
- type: total_reward
|
1182 |
value: -282.28 +/- 20.70
|
|
|
1188 |
type: in-context-reinforcement-learning
|
1189 |
name: In-Context Reinforcement Learning
|
1190 |
dataset:
|
1191 |
+
name: industrial-benchmark-30-v1
|
1192 |
+
type: Industrial-Benchmark
|
1193 |
metrics:
|
1194 |
- type: total_reward
|
1195 |
value: -307.02 +/- 19.23
|
|
|
1201 |
type: in-context-reinforcement-learning
|
1202 |
name: In-Context Reinforcement Learning
|
1203 |
dataset:
|
1204 |
+
name: industrial-benchmark-35-v1
|
1205 |
+
type: Industrial-Benchmark
|
1206 |
metrics:
|
1207 |
- type: total_reward
|
1208 |
value: -314.36 +/- 5.62
|
|
|
1214 |
type: in-context-reinforcement-learning
|
1215 |
name: In-Context Reinforcement Learning
|
1216 |
dataset:
|
1217 |
+
name: industrial-benchmark-40-v1
|
1218 |
+
type: Industrial-Benchmark
|
1219 |
metrics:
|
1220 |
- type: total_reward
|
1221 |
value: -339.34 +/- 9.57
|
|
|
1227 |
type: in-context-reinforcement-learning
|
1228 |
name: In-Context Reinforcement Learning
|
1229 |
dataset:
|
1230 |
+
name: industrial-benchmark-45-v1
|
1231 |
+
type: Industrial-Benchmark
|
1232 |
metrics:
|
1233 |
- type: total_reward
|
1234 |
value: -366.63 +/- 7.47
|
|
|
1240 |
type: in-context-reinforcement-learning
|
1241 |
name: In-Context Reinforcement Learning
|
1242 |
dataset:
|
1243 |
+
name: industrial-benchmark-50-v1
|
1244 |
+
type: Industrial-Benchmark
|
1245 |
metrics:
|
1246 |
- type: total_reward
|
1247 |
value: -395.94 +/- 17.65
|
|
|
1253 |
type: in-context-reinforcement-learning
|
1254 |
name: In-Context Reinforcement Learning
|
1255 |
dataset:
|
1256 |
+
name: industrial-benchmark-55-v1
|
1257 |
+
type: Industrial-Benchmark
|
1258 |
metrics:
|
1259 |
- type: total_reward
|
1260 |
value: -403.73 +/- 2.03
|
|
|
1266 |
type: in-context-reinforcement-learning
|
1267 |
name: In-Context Reinforcement Learning
|
1268 |
dataset:
|
1269 |
+
name: industrial-benchmark-60-v1
|
1270 |
+
type: Industrial-Benchmark
|
1271 |
metrics:
|
1272 |
- type: total_reward
|
1273 |
value: -434.25 +/- 4.12
|
|
|
1279 |
type: in-context-reinforcement-learning
|
1280 |
name: In-Context Reinforcement Learning
|
1281 |
dataset:
|
1282 |
+
name: industrial-benchmark-65-v1
|
1283 |
+
type: Industrial-Benchmark
|
1284 |
metrics:
|
1285 |
- type: total_reward
|
1286 |
value: -480.31 +/- 8.63
|
|
|
1292 |
type: in-context-reinforcement-learning
|
1293 |
name: In-Context Reinforcement Learning
|
1294 |
dataset:
|
1295 |
+
name: industrial-benchmark-70-v1
|
1296 |
+
type: Industrial-Benchmark
|
1297 |
metrics:
|
1298 |
- type: total_reward
|
1299 |
value: -480.76 +/- 5.98
|
|
|
1305 |
type: in-context-reinforcement-learning
|
1306 |
name: In-Context Reinforcement Learning
|
1307 |
dataset:
|
1308 |
+
name: industrial-benchmark-75-v1
|
1309 |
+
type: Industrial-Benchmark
|
1310 |
metrics:
|
1311 |
- type: total_reward
|
1312 |
value: -476.83 +/- 2.44
|
|
|
1318 |
type: in-context-reinforcement-learning
|
1319 |
name: In-Context Reinforcement Learning
|
1320 |
dataset:
|
1321 |
+
name: industrial-benchmark-80-v1
|
1322 |
+
type: Industrial-Benchmark
|
1323 |
metrics:
|
1324 |
- type: total_reward
|
1325 |
value: -497.13 +/- 2.95
|
|
|
1331 |
type: in-context-reinforcement-learning
|
1332 |
name: In-Context Reinforcement Learning
|
1333 |
dataset:
|
1334 |
+
name: industrial-benchmark-85-v1
|
1335 |
+
type: Industrial-Benchmark
|
1336 |
metrics:
|
1337 |
- type: total_reward
|
1338 |
value: -513.83 +/- 3.06
|
|
|
1344 |
type: in-context-reinforcement-learning
|
1345 |
name: In-Context Reinforcement Learning
|
1346 |
dataset:
|
1347 |
+
name: industrial-benchmark-90-v1
|
1348 |
+
type: Industrial-Benchmark
|
1349 |
metrics:
|
1350 |
- type: total_reward
|
1351 |
value: -532.70 +/- 3.61
|
|
|
1357 |
type: in-context-reinforcement-learning
|
1358 |
name: In-Context Reinforcement Learning
|
1359 |
dataset:
|
1360 |
+
name: industrial-benchmark-95-v1
|
1361 |
+
type: Industrial-Benchmark
|
1362 |
metrics:
|
1363 |
- type: total_reward
|
1364 |
value: -557.42 +/- 3.81
|
|
|
1370 |
type: in-context-reinforcement-learning
|
1371 |
name: In-Context Reinforcement Learning
|
1372 |
dataset:
|
1373 |
+
name: industrial-benchmark-100-v1
|
1374 |
+
type: Industrial-Benchmark
|
1375 |
metrics:
|
1376 |
- type: total_reward
|
1377 |
value: -574.57 +/- 4.37
|