Added reults for 2B 1.1 model
Browse files
README.md
CHANGED
@@ -718,36 +718,36 @@ All results reported below are on a 5-shot setting.
|
|
718 |
<td>Commonsense Reasoning</td>
|
719 |
<td>xstorycloze_es</td>
|
720 |
<td>acc</td>
|
721 |
-
<td>64.
|
722 |
</tr>
|
723 |
<tr>
|
724 |
<td rowspan="2">NLI</td>
|
725 |
<td>wnli_es</td>
|
726 |
<td>acc</td>
|
727 |
-
<td>
|
728 |
</tr>
|
729 |
<tr>
|
730 |
<td>xnli_es</td>
|
731 |
<td>acc</td>
|
732 |
-
<td>44.
|
733 |
</tr>
|
734 |
<tr>
|
735 |
<td>Paraphrasing</td>
|
736 |
<td>paws_es</td>
|
737 |
<td>acc</td>
|
738 |
-
<td>
|
739 |
</tr>
|
740 |
<tr>
|
741 |
<td>QA</td>
|
742 |
<td>xquad_es</td>
|
743 |
<td>acc</td>
|
744 |
-
<td>
|
745 |
</tr>
|
746 |
<tr>
|
747 |
<td>Translation</td>
|
748 |
<td>flores_es</td>
|
749 |
<td>bleu</td>
|
750 |
-
<td>
|
751 |
</tr>
|
752 |
</tbody>
|
753 |
</table>
|
@@ -766,12 +766,12 @@ All results reported below are on a 5-shot setting.
|
|
766 |
<td rowspan="2">Commonsense Reasoning</td>
|
767 |
<td>copa_ca</td>
|
768 |
<td>acc</td>
|
769 |
-
<td>
|
770 |
</tr>
|
771 |
<tr>
|
772 |
<td>xstorycloze_ca</td>
|
773 |
<td>acc</td>
|
774 |
-
<td>
|
775 |
</tr>
|
776 |
<tr>
|
777 |
<td rowspan="2">NLI</td>
|
@@ -782,50 +782,50 @@ All results reported below are on a 5-shot setting.
|
|
782 |
<tr>
|
783 |
<td>xnli_ca</td>
|
784 |
<td>acc</td>
|
785 |
-
<td>48.
|
786 |
</tr>
|
787 |
<tr>
|
788 |
<td rowspan="2">Paraphrasing</td>
|
789 |
<td>parafraseja</td>
|
790 |
<td>acc</td>
|
791 |
-
<td>
|
792 |
</tr>
|
793 |
<tr>
|
794 |
<td>paws_ca</td>
|
795 |
<td>acc</td>
|
796 |
-
<td>
|
797 |
</tr>
|
798 |
<tr>
|
799 |
<td rowspan="5">QA</td>
|
800 |
<td>arc_ca_easy</td>
|
801 |
<td>acc</td>
|
802 |
-
<td>
|
803 |
</tr>
|
804 |
<tr>
|
805 |
<td>arc_ca_challenge</td>
|
806 |
<td>acc</td>
|
807 |
-
<td>
|
808 |
</tr>
|
809 |
<tr>
|
810 |
<td>openbookqa_ca</td>
|
811 |
<td>acc</td>
|
812 |
-
<td>
|
813 |
</tr>
|
814 |
<tr>
|
815 |
<td>piqa_ca</td>
|
816 |
<td>acc</td>
|
817 |
-
<td>
|
818 |
</tr>
|
819 |
<tr>
|
820 |
<td>siqa_ca</td>
|
821 |
<td>acc</td>
|
822 |
-
<td>
|
823 |
</tr>
|
824 |
<tr>
|
825 |
<td>Translation</td>
|
826 |
<td>flores_ca</td>
|
827 |
<td>bleu</td>
|
828 |
-
<td>
|
829 |
</tr>
|
830 |
</tbody></table>
|
831 |
|
@@ -843,51 +843,51 @@ All results reported below are on a 5-shot setting.
|
|
843 |
<td rowspan="2">Commonsense Reasoning</td>
|
844 |
<td>xcopa_eu</td>
|
845 |
<td>acc</td>
|
846 |
-
<td>
|
847 |
</tr>
|
848 |
<tr>
|
849 |
<td>xstorycloze_eu</td>
|
850 |
<td>acc</td>
|
851 |
-
<td>
|
852 |
</tr>
|
853 |
<tr>
|
854 |
<td rowspan="2">NLI</td>
|
855 |
<td>wnli_eu</td>
|
856 |
<td>acc</td>
|
857 |
-
<td>
|
858 |
</tr>
|
859 |
<tr>
|
860 |
<td>xnli_eu</td>
|
861 |
<td>acc</td>
|
862 |
-
<td>
|
863 |
</tr>
|
864 |
<tr>
|
865 |
<td rowspan="3">QA</td>
|
866 |
<td>eus_exams</td>
|
867 |
<td>acc</td>
|
868 |
-
<td>
|
869 |
</tr>
|
870 |
<tr>
|
871 |
<td>eus_proficiency</td>
|
872 |
<td>acc</td>
|
873 |
-
<td>
|
874 |
</tr>
|
875 |
<tr>
|
876 |
<td>eus_trivia</td>
|
877 |
<td>acc</td>
|
878 |
-
<td>
|
879 |
</tr>
|
880 |
<tr>
|
881 |
<td>Reading Comprehension</td>
|
882 |
<td>eus_reading</td>
|
883 |
<td>acc</td>
|
884 |
-
<td>
|
885 |
</tr>
|
886 |
<tr>
|
887 |
<td>Translation</td>
|
888 |
<td>flores_eu</td>
|
889 |
<td>bleu</td>
|
890 |
-
<td>
|
891 |
</tr>
|
892 |
</tbody></table>
|
893 |
|
@@ -905,24 +905,24 @@ All results reported below are on a 5-shot setting.
|
|
905 |
<td rowspan="2">Paraphrasing</td>
|
906 |
<td>parafrases_gl</td>
|
907 |
<td>acc</td>
|
908 |
-
<td>
|
909 |
</tr>
|
910 |
<tr>
|
911 |
<td>paws_gl</td>
|
912 |
<td>acc</td>
|
913 |
-
<td>
|
914 |
</tr>
|
915 |
<tr>
|
916 |
<td>QA</td>
|
917 |
<td>openbookqa_gl</td>
|
918 |
<td>acc</td>
|
919 |
-
<td>
|
920 |
</tr>
|
921 |
<tr>
|
922 |
<td>Translation</td>
|
923 |
<td>flores_gl</td>
|
924 |
<td>bleu</td>
|
925 |
-
<td>
|
926 |
</tr>
|
927 |
</tbody>
|
928 |
</table>
|
@@ -946,35 +946,35 @@ All results reported below are on a 5-shot setting.
|
|
946 |
<tr>
|
947 |
<td>xstorycloze_en</td>
|
948 |
<td>acc</td>
|
949 |
-
<td>
|
950 |
</tr>
|
951 |
<tr>
|
952 |
<td rowspan="2">NLI</td>
|
953 |
<td>wnli</td>
|
954 |
<td>acc</td>
|
955 |
-
<td>
|
956 |
</tr>
|
957 |
<tr>
|
958 |
<td>xnli_en</td>
|
959 |
<td>acc</td>
|
960 |
-
<td>47
|
961 |
</tr>
|
962 |
<tr>
|
963 |
<td>Paraphrasing</td>
|
964 |
<td>paws *</td>
|
965 |
<td>acc</td>
|
966 |
-
<td>
|
967 |
</tr>
|
968 |
<tr>
|
969 |
<td rowspan="6">QA</td>
|
970 |
<td>arc_easy</td>
|
971 |
<td>acc</td>
|
972 |
-
<td>
|
973 |
</tr>
|
974 |
<tr>
|
975 |
<td>arc_challenge</td>
|
976 |
<td>acc</td>
|
977 |
-
<td>
|
978 |
</tr>
|
979 |
<tr>
|
980 |
<td>openbookqa</td>
|
@@ -984,17 +984,17 @@ All results reported below are on a 5-shot setting.
|
|
984 |
<tr>
|
985 |
<td>piqa</td>
|
986 |
<td>acc</td>
|
987 |
-
<td>
|
988 |
</tr>
|
989 |
<tr>
|
990 |
<td>social_iqa</td>
|
991 |
<td>acc</td>
|
992 |
-
<td>
|
993 |
</tr>
|
994 |
<tr>
|
995 |
-
<td>
|
996 |
<td>acc</td>
|
997 |
-
<td>
|
998 |
</tr>
|
999 |
</tbody></table>
|
1000 |
|
|
|
718 |
<td>Commonsense Reasoning</td>
|
719 |
<td>xstorycloze_es</td>
|
720 |
<td>acc</td>
|
721 |
+
<td>64.73</td>
|
722 |
</tr>
|
723 |
<tr>
|
724 |
<td rowspan="2">NLI</td>
|
725 |
<td>wnli_es</td>
|
726 |
<td>acc</td>
|
727 |
+
<td>56.34</td>
|
728 |
</tr>
|
729 |
<tr>
|
730 |
<td>xnli_es</td>
|
731 |
<td>acc</td>
|
732 |
+
<td>44.74</td>
|
733 |
</tr>
|
734 |
<tr>
|
735 |
<td>Paraphrasing</td>
|
736 |
<td>paws_es</td>
|
737 |
<td>acc</td>
|
738 |
+
<td>55.95</td>
|
739 |
</tr>
|
740 |
<tr>
|
741 |
<td>QA</td>
|
742 |
<td>xquad_es</td>
|
743 |
<td>acc</td>
|
744 |
+
<td>57.59</td>
|
745 |
</tr>
|
746 |
<tr>
|
747 |
<td>Translation</td>
|
748 |
<td>flores_es</td>
|
749 |
<td>bleu</td>
|
750 |
+
<td>20.05</td>
|
751 |
</tr>
|
752 |
</tbody>
|
753 |
</table>
|
|
|
766 |
<td rowspan="2">Commonsense Reasoning</td>
|
767 |
<td>copa_ca</td>
|
768 |
<td>acc</td>
|
769 |
+
<td>70.2</td>
|
770 |
</tr>
|
771 |
<tr>
|
772 |
<td>xstorycloze_ca</td>
|
773 |
<td>acc</td>
|
774 |
+
<td>66.38</td>
|
775 |
</tr>
|
776 |
<tr>
|
777 |
<td rowspan="2">NLI</td>
|
|
|
782 |
<tr>
|
783 |
<td>xnli_ca</td>
|
784 |
<td>acc</td>
|
785 |
+
<td>48.15</td>
|
786 |
</tr>
|
787 |
<tr>
|
788 |
<td rowspan="2">Paraphrasing</td>
|
789 |
<td>parafraseja</td>
|
790 |
<td>acc</td>
|
791 |
+
<td>61.35</td>
|
792 |
</tr>
|
793 |
<tr>
|
794 |
<td>paws_ca</td>
|
795 |
<td>acc</td>
|
796 |
+
<td>57.05</td>
|
797 |
</tr>
|
798 |
<tr>
|
799 |
<td rowspan="5">QA</td>
|
800 |
<td>arc_ca_easy</td>
|
801 |
<td>acc</td>
|
802 |
+
<td>55.3</td>
|
803 |
</tr>
|
804 |
<tr>
|
805 |
<td>arc_ca_challenge</td>
|
806 |
<td>acc</td>
|
807 |
+
<td>27.65</td>
|
808 |
</tr>
|
809 |
<tr>
|
810 |
<td>openbookqa_ca</td>
|
811 |
<td>acc</td>
|
812 |
+
<td>29.40</td>
|
813 |
</tr>
|
814 |
<tr>
|
815 |
<td>piqa_ca</td>
|
816 |
<td>acc</td>
|
817 |
+
<td>63.82</td>
|
818 |
</tr>
|
819 |
<tr>
|
820 |
<td>siqa_ca</td>
|
821 |
<td>acc</td>
|
822 |
+
<td>43.04</td>
|
823 |
</tr>
|
824 |
<tr>
|
825 |
<td>Translation</td>
|
826 |
<td>flores_ca</td>
|
827 |
<td>bleu</td>
|
828 |
+
<td>24.93</td>
|
829 |
</tr>
|
830 |
</tbody></table>
|
831 |
|
|
|
843 |
<td rowspan="2">Commonsense Reasoning</td>
|
844 |
<td>xcopa_eu</td>
|
845 |
<td>acc</td>
|
846 |
+
<td>58</td>
|
847 |
</tr>
|
848 |
<tr>
|
849 |
<td>xstorycloze_eu</td>
|
850 |
<td>acc</td>
|
851 |
+
<td>58.97</td>
|
852 |
</tr>
|
853 |
<tr>
|
854 |
<td rowspan="2">NLI</td>
|
855 |
<td>wnli_eu</td>
|
856 |
<td>acc</td>
|
857 |
+
<td>43.66</td>
|
858 |
</tr>
|
859 |
<tr>
|
860 |
<td>xnli_eu</td>
|
861 |
<td>acc</td>
|
862 |
+
<td>42.03</td>
|
863 |
</tr>
|
864 |
<tr>
|
865 |
<td rowspan="3">QA</td>
|
866 |
<td>eus_exams</td>
|
867 |
<td>acc</td>
|
868 |
+
<td>26.11</td>
|
869 |
</tr>
|
870 |
<tr>
|
871 |
<td>eus_proficiency</td>
|
872 |
<td>acc</td>
|
873 |
+
<td>24.09</td>
|
874 |
</tr>
|
875 |
<tr>
|
876 |
<td>eus_trivia</td>
|
877 |
<td>acc</td>
|
878 |
+
<td>28.05</td>
|
879 |
</tr>
|
880 |
<tr>
|
881 |
<td>Reading Comprehension</td>
|
882 |
<td>eus_reading</td>
|
883 |
<td>acc</td>
|
884 |
+
<td>28.41</td>
|
885 |
</tr>
|
886 |
<tr>
|
887 |
<td>Translation</td>
|
888 |
<td>flores_eu</td>
|
889 |
<td>bleu</td>
|
890 |
+
<td>8.96</td>
|
891 |
</tr>
|
892 |
</tbody></table>
|
893 |
|
|
|
905 |
<td rowspan="2">Paraphrasing</td>
|
906 |
<td>parafrases_gl</td>
|
907 |
<td>acc</td>
|
908 |
+
<td>56.08</td>
|
909 |
</tr>
|
910 |
<tr>
|
911 |
<td>paws_gl</td>
|
912 |
<td>acc</td>
|
913 |
+
<td>54.85</td>
|
914 |
</tr>
|
915 |
<tr>
|
916 |
<td>QA</td>
|
917 |
<td>openbookqa_gl</td>
|
918 |
<td>acc</td>
|
919 |
+
<td>25.4</td>
|
920 |
</tr>
|
921 |
<tr>
|
922 |
<td>Translation</td>
|
923 |
<td>flores_gl</td>
|
924 |
<td>bleu</td>
|
925 |
+
<td>22.38</td>
|
926 |
</tr>
|
927 |
</tbody>
|
928 |
</table>
|
|
|
946 |
<tr>
|
947 |
<td>xstorycloze_en</td>
|
948 |
<td>acc</td>
|
949 |
+
<td>71.81</td>
|
950 |
</tr>
|
951 |
<tr>
|
952 |
<td rowspan="2">NLI</td>
|
953 |
<td>wnli</td>
|
954 |
<td>acc</td>
|
955 |
+
<td>52.11</td>
|
956 |
</tr>
|
957 |
<tr>
|
958 |
<td>xnli_en</td>
|
959 |
<td>acc</td>
|
960 |
+
<td>46.47</td>
|
961 |
</tr>
|
962 |
<tr>
|
963 |
<td>Paraphrasing</td>
|
964 |
<td>paws *</td>
|
965 |
<td>acc</td>
|
966 |
+
<td>56.5</td>
|
967 |
</tr>
|
968 |
<tr>
|
969 |
<td rowspan="6">QA</td>
|
970 |
<td>arc_easy</td>
|
971 |
<td>acc</td>
|
972 |
+
<td>72.14</td>
|
973 |
</tr>
|
974 |
<tr>
|
975 |
<td>arc_challenge</td>
|
976 |
<td>acc</td>
|
977 |
+
<td>35.41</td>
|
978 |
</tr>
|
979 |
<tr>
|
980 |
<td>openbookqa</td>
|
|
|
984 |
<tr>
|
985 |
<td>piqa</td>
|
986 |
<td>acc</td>
|
987 |
+
<td>73.61</td>
|
988 |
</tr>
|
989 |
<tr>
|
990 |
<td>social_iqa</td>
|
991 |
<td>acc</td>
|
992 |
+
<td>44.78</td>
|
993 |
</tr>
|
994 |
<tr>
|
995 |
+
<td>xquad_en **</td>
|
996 |
<td>acc</td>
|
997 |
+
<td>64.87</td>
|
998 |
</tr>
|
999 |
</tbody></table>
|
1000 |
|