SummerSigh commited on
Commit
5963aad
·
1 Parent(s): 3a60bea

Upload 13 files

Browse files
Files changed (6) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +1 -1
  4. scaler.pt +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +3 -1602
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cff4e153e6ae4cc0809ba50760698d5cdb174c040e4e0b0ecd716f14c8c032d
3
  size 566764677
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:601ba7c4801221453cdc411619b3a5e462bed33a0a394552bc9163aca42b0511
3
  size 566764677
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c90f91670d9be68b95a93b2db113cab2197c17152363fb01f228a0c298ca8c01
3
  size 283396921
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b52d365b3301b16df201321778a7715f4d2ac04e3875ab200597e5e3363be00
3
  size 283396921
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ffc1d1533f932b4acd1d3ced558144cff4759b929b8adaf81ffd2b3599100b77
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f082c72d40f23289d12f6128997de02fa1da6390366450b9ea6cd7036cdf235
3
  size 14575
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75076ac53249cd31f7f5c6e5e185c4fb5a3e9f0f65b416a04806909f89e94a3b
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67b585f28ad027f2ac72c545de5f340dac0bb92eae84b38785160b4ad9aef2e5
3
  size 557
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55166f4df592bad67cf5fb8e529112ba4f46ea5706aa48a22bdb83ca09ca44a4
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bd98f775e841215d8001dc84278762b1f3eed156f568ad2e6a842f6e07d2167
3
  size 627
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": 0.3574163317680359,
3
  "best_model_checkpoint": "OutModelPolicy\\checkpoint-132696",
4
- "epoch": 2.0,
5
- "global_step": 265392,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1605,1610 +1605,11 @@
1605
  "eval_samples_per_second": 309.276,
1606
  "eval_steps_per_second": 30.932,
1607
  "step": 132696
1608
- },
1609
- {
1610
- "epoch": 1.0,
1611
- "learning_rate": 9.980858503647435e-06,
1612
- "loss": 0.3356,
1613
- "step": 133000
1614
- },
1615
- {
1616
- "epoch": 1.01,
1617
- "learning_rate": 9.943178392717188e-06,
1618
- "loss": 0.3453,
1619
- "step": 133500
1620
- },
1621
- {
1622
- "epoch": 1.01,
1623
- "learning_rate": 9.905649002230662e-06,
1624
- "loss": 0.3349,
1625
- "step": 134000
1626
- },
1627
- {
1628
- "epoch": 1.01,
1629
- "learning_rate": 9.867968891300417e-06,
1630
- "loss": 0.3306,
1631
- "step": 134500
1632
- },
1633
- {
1634
- "epoch": 1.02,
1635
- "learning_rate": 9.83028878037017e-06,
1636
- "loss": 0.3361,
1637
- "step": 135000
1638
- },
1639
- {
1640
- "epoch": 1.02,
1641
- "learning_rate": 9.792608669439924e-06,
1642
- "loss": 0.3269,
1643
- "step": 135500
1644
- },
1645
- {
1646
- "epoch": 1.02,
1647
- "learning_rate": 9.754928558509678e-06,
1648
- "loss": 0.3349,
1649
- "step": 136000
1650
- },
1651
- {
1652
- "epoch": 1.03,
1653
- "learning_rate": 9.71732380780129e-06,
1654
- "loss": 0.3302,
1655
- "step": 136500
1656
- },
1657
- {
1658
- "epoch": 1.03,
1659
- "learning_rate": 9.679643696871044e-06,
1660
- "loss": 0.3416,
1661
- "step": 137000
1662
- },
1663
- {
1664
- "epoch": 1.04,
1665
- "learning_rate": 9.641963585940798e-06,
1666
- "loss": 0.3361,
1667
- "step": 137500
1668
- },
1669
- {
1670
- "epoch": 1.04,
1671
- "learning_rate": 9.604283475010551e-06,
1672
- "loss": 0.3445,
1673
- "step": 138000
1674
- },
1675
- {
1676
- "epoch": 1.04,
1677
- "learning_rate": 9.566678724302165e-06,
1678
- "loss": 0.332,
1679
- "step": 138500
1680
- },
1681
- {
1682
- "epoch": 1.05,
1683
- "learning_rate": 9.528998613371918e-06,
1684
- "loss": 0.3199,
1685
- "step": 139000
1686
- },
1687
- {
1688
- "epoch": 1.05,
1689
- "learning_rate": 9.491393862663532e-06,
1690
- "loss": 0.3306,
1691
- "step": 139500
1692
- },
1693
- {
1694
- "epoch": 1.06,
1695
- "learning_rate": 9.453713751733285e-06,
1696
- "loss": 0.3332,
1697
- "step": 140000
1698
- },
1699
- {
1700
- "epoch": 1.06,
1701
- "learning_rate": 9.41603364080304e-06,
1702
- "loss": 0.3434,
1703
- "step": 140500
1704
- },
1705
- {
1706
- "epoch": 1.06,
1707
- "learning_rate": 9.378353529872793e-06,
1708
- "loss": 0.3287,
1709
- "step": 141000
1710
- },
1711
- {
1712
- "epoch": 1.07,
1713
- "learning_rate": 9.340673418942546e-06,
1714
- "loss": 0.3366,
1715
- "step": 141500
1716
- },
1717
- {
1718
- "epoch": 1.07,
1719
- "learning_rate": 9.30306866823416e-06,
1720
- "loss": 0.3411,
1721
- "step": 142000
1722
- },
1723
- {
1724
- "epoch": 1.07,
1725
- "learning_rate": 9.265388557303913e-06,
1726
- "loss": 0.3333,
1727
- "step": 142500
1728
- },
1729
- {
1730
- "epoch": 1.08,
1731
- "learning_rate": 9.227708446373666e-06,
1732
- "loss": 0.3357,
1733
- "step": 143000
1734
- },
1735
- {
1736
- "epoch": 1.08,
1737
- "learning_rate": 9.190028335443421e-06,
1738
- "loss": 0.3214,
1739
- "step": 143500
1740
- },
1741
- {
1742
- "epoch": 1.09,
1743
- "learning_rate": 9.152348224513174e-06,
1744
- "loss": 0.3193,
1745
- "step": 144000
1746
- },
1747
- {
1748
- "epoch": 1.09,
1749
- "learning_rate": 9.114668113582927e-06,
1750
- "loss": 0.3201,
1751
- "step": 144500
1752
- },
1753
- {
1754
- "epoch": 1.09,
1755
- "learning_rate": 9.07698800265268e-06,
1756
- "loss": 0.3407,
1757
- "step": 145000
1758
- },
1759
- {
1760
- "epoch": 1.1,
1761
- "learning_rate": 9.039307891722433e-06,
1762
- "loss": 0.3423,
1763
- "step": 145500
1764
- },
1765
- {
1766
- "epoch": 1.1,
1767
- "learning_rate": 9.001703141014047e-06,
1768
- "loss": 0.3389,
1769
- "step": 146000
1770
- },
1771
- {
1772
- "epoch": 1.1,
1773
- "learning_rate": 8.964023030083802e-06,
1774
- "loss": 0.3429,
1775
- "step": 146500
1776
- },
1777
- {
1778
- "epoch": 1.11,
1779
- "learning_rate": 8.926418279375416e-06,
1780
- "loss": 0.3322,
1781
- "step": 147000
1782
- },
1783
- {
1784
- "epoch": 1.11,
1785
- "learning_rate": 8.888738168445169e-06,
1786
- "loss": 0.3265,
1787
- "step": 147500
1788
- },
1789
- {
1790
- "epoch": 1.12,
1791
- "learning_rate": 8.851058057514922e-06,
1792
- "loss": 0.3402,
1793
- "step": 148000
1794
- },
1795
- {
1796
- "epoch": 1.12,
1797
- "learning_rate": 8.813377946584675e-06,
1798
- "loss": 0.3368,
1799
- "step": 148500
1800
- },
1801
- {
1802
- "epoch": 1.12,
1803
- "learning_rate": 8.775697835654428e-06,
1804
- "loss": 0.3372,
1805
- "step": 149000
1806
- },
1807
- {
1808
- "epoch": 1.13,
1809
- "learning_rate": 8.738017724724183e-06,
1810
- "loss": 0.3405,
1811
- "step": 149500
1812
- },
1813
- {
1814
- "epoch": 1.13,
1815
- "learning_rate": 8.700337613793936e-06,
1816
- "loss": 0.328,
1817
- "step": 150000
1818
- },
1819
- {
1820
- "epoch": 1.13,
1821
- "learning_rate": 8.66265750286369e-06,
1822
- "loss": 0.3467,
1823
- "step": 150500
1824
- },
1825
- {
1826
- "epoch": 1.14,
1827
- "learning_rate": 8.624977391933442e-06,
1828
- "loss": 0.3246,
1829
- "step": 151000
1830
- },
1831
- {
1832
- "epoch": 1.14,
1833
- "learning_rate": 8.587372641225056e-06,
1834
- "loss": 0.334,
1835
- "step": 151500
1836
- },
1837
- {
1838
- "epoch": 1.15,
1839
- "learning_rate": 8.54969253029481e-06,
1840
- "loss": 0.3276,
1841
- "step": 152000
1842
- },
1843
- {
1844
- "epoch": 1.15,
1845
- "learning_rate": 8.512012419364564e-06,
1846
- "loss": 0.3291,
1847
- "step": 152500
1848
- },
1849
- {
1850
- "epoch": 1.15,
1851
- "learning_rate": 8.474407668656178e-06,
1852
- "loss": 0.3386,
1853
- "step": 153000
1854
- },
1855
- {
1856
- "epoch": 1.16,
1857
- "learning_rate": 8.436727557725931e-06,
1858
- "loss": 0.3446,
1859
- "step": 153500
1860
- },
1861
- {
1862
- "epoch": 1.16,
1863
- "learning_rate": 8.399047446795684e-06,
1864
- "loss": 0.334,
1865
- "step": 154000
1866
- },
1867
- {
1868
- "epoch": 1.16,
1869
- "learning_rate": 8.361367335865437e-06,
1870
- "loss": 0.3356,
1871
- "step": 154500
1872
- },
1873
- {
1874
- "epoch": 1.17,
1875
- "learning_rate": 8.323762585157051e-06,
1876
- "loss": 0.3513,
1877
- "step": 155000
1878
- },
1879
- {
1880
- "epoch": 1.17,
1881
- "learning_rate": 8.286082474226806e-06,
1882
- "loss": 0.3365,
1883
- "step": 155500
1884
- },
1885
- {
1886
- "epoch": 1.18,
1887
- "learning_rate": 8.248402363296559e-06,
1888
- "loss": 0.3255,
1889
- "step": 156000
1890
- },
1891
- {
1892
- "epoch": 1.18,
1893
- "learning_rate": 8.210722252366312e-06,
1894
- "loss": 0.3367,
1895
- "step": 156500
1896
- },
1897
- {
1898
- "epoch": 1.18,
1899
- "learning_rate": 8.173042141436065e-06,
1900
- "loss": 0.3263,
1901
- "step": 157000
1902
- },
1903
- {
1904
- "epoch": 1.19,
1905
- "learning_rate": 8.135362030505818e-06,
1906
- "loss": 0.3328,
1907
- "step": 157500
1908
- },
1909
- {
1910
- "epoch": 1.19,
1911
- "learning_rate": 8.097681919575571e-06,
1912
- "loss": 0.3191,
1913
- "step": 158000
1914
- },
1915
- {
1916
- "epoch": 1.19,
1917
- "learning_rate": 8.060001808645326e-06,
1918
- "loss": 0.3471,
1919
- "step": 158500
1920
- },
1921
- {
1922
- "epoch": 1.2,
1923
- "learning_rate": 8.02232169771508e-06,
1924
- "loss": 0.3239,
1925
- "step": 159000
1926
- },
1927
- {
1928
- "epoch": 1.2,
1929
- "learning_rate": 7.984716947006691e-06,
1930
- "loss": 0.337,
1931
- "step": 159500
1932
- },
1933
- {
1934
- "epoch": 1.21,
1935
- "learning_rate": 7.947036836076446e-06,
1936
- "loss": 0.3357,
1937
- "step": 160000
1938
- },
1939
- {
1940
- "epoch": 1.21,
1941
- "learning_rate": 7.9093567251462e-06,
1942
- "loss": 0.3122,
1943
- "step": 160500
1944
- },
1945
- {
1946
- "epoch": 1.21,
1947
- "learning_rate": 7.871676614215952e-06,
1948
- "loss": 0.3206,
1949
- "step": 161000
1950
- },
1951
- {
1952
- "epoch": 1.22,
1953
- "learning_rate": 7.834071863507566e-06,
1954
- "loss": 0.3265,
1955
- "step": 161500
1956
- },
1957
- {
1958
- "epoch": 1.22,
1959
- "learning_rate": 7.796391752577321e-06,
1960
- "loss": 0.3342,
1961
- "step": 162000
1962
- },
1963
- {
1964
- "epoch": 1.22,
1965
- "learning_rate": 7.758711641647072e-06,
1966
- "loss": 0.3482,
1967
- "step": 162500
1968
- },
1969
- {
1970
- "epoch": 1.23,
1971
- "learning_rate": 7.721031530716827e-06,
1972
- "loss": 0.3317,
1973
- "step": 163000
1974
- },
1975
- {
1976
- "epoch": 1.23,
1977
- "learning_rate": 7.68335141978658e-06,
1978
- "loss": 0.3414,
1979
- "step": 163500
1980
- },
1981
- {
1982
- "epoch": 1.24,
1983
- "learning_rate": 7.645671308856333e-06,
1984
- "loss": 0.3457,
1985
- "step": 164000
1986
- },
1987
- {
1988
- "epoch": 1.24,
1989
- "learning_rate": 7.607991197926088e-06,
1990
- "loss": 0.3244,
1991
- "step": 164500
1992
- },
1993
- {
1994
- "epoch": 1.24,
1995
- "learning_rate": 7.570386447217701e-06,
1996
- "loss": 0.3434,
1997
- "step": 165000
1998
- },
1999
- {
2000
- "epoch": 1.25,
2001
- "learning_rate": 7.532706336287454e-06,
2002
- "loss": 0.3379,
2003
- "step": 165500
2004
- },
2005
- {
2006
- "epoch": 1.25,
2007
- "learning_rate": 7.495026225357208e-06,
2008
- "loss": 0.3362,
2009
- "step": 166000
2010
- },
2011
- {
2012
- "epoch": 1.25,
2013
- "learning_rate": 7.457346114426961e-06,
2014
- "loss": 0.3424,
2015
- "step": 166500
2016
- },
2017
- {
2018
- "epoch": 1.26,
2019
- "learning_rate": 7.419666003496715e-06,
2020
- "loss": 0.3176,
2021
- "step": 167000
2022
- },
2023
- {
2024
- "epoch": 1.26,
2025
- "learning_rate": 7.3819858925664675e-06,
2026
- "loss": 0.3374,
2027
- "step": 167500
2028
- },
2029
- {
2030
- "epoch": 1.27,
2031
- "learning_rate": 7.3443057816362215e-06,
2032
- "loss": 0.33,
2033
- "step": 168000
2034
- },
2035
- {
2036
- "epoch": 1.27,
2037
- "learning_rate": 7.306625670705975e-06,
2038
- "loss": 0.3434,
2039
- "step": 168500
2040
- },
2041
- {
2042
- "epoch": 1.27,
2043
- "learning_rate": 7.268945559775729e-06,
2044
- "loss": 0.3312,
2045
- "step": 169000
2046
- },
2047
- {
2048
- "epoch": 1.28,
2049
- "learning_rate": 7.231416169289203e-06,
2050
- "loss": 0.3288,
2051
- "step": 169500
2052
- },
2053
- {
2054
- "epoch": 1.28,
2055
- "learning_rate": 7.193736058358957e-06,
2056
- "loss": 0.3365,
2057
- "step": 170000
2058
- },
2059
- {
2060
- "epoch": 1.28,
2061
- "learning_rate": 7.156055947428709e-06,
2062
- "loss": 0.3424,
2063
- "step": 170500
2064
- },
2065
- {
2066
- "epoch": 1.29,
2067
- "learning_rate": 7.118375836498463e-06,
2068
- "loss": 0.336,
2069
- "step": 171000
2070
- },
2071
- {
2072
- "epoch": 1.29,
2073
- "learning_rate": 7.080695725568216e-06,
2074
- "loss": 0.3246,
2075
- "step": 171500
2076
- },
2077
- {
2078
- "epoch": 1.3,
2079
- "learning_rate": 7.04301561463797e-06,
2080
- "loss": 0.3249,
2081
- "step": 172000
2082
- },
2083
- {
2084
- "epoch": 1.3,
2085
- "learning_rate": 7.005335503707723e-06,
2086
- "loss": 0.3461,
2087
- "step": 172500
2088
- },
2089
- {
2090
- "epoch": 1.3,
2091
- "learning_rate": 6.967655392777477e-06,
2092
- "loss": 0.338,
2093
- "step": 173000
2094
- },
2095
- {
2096
- "epoch": 1.31,
2097
- "learning_rate": 6.92997528184723e-06,
2098
- "loss": 0.3404,
2099
- "step": 173500
2100
- },
2101
- {
2102
- "epoch": 1.31,
2103
- "learning_rate": 6.892295170916984e-06,
2104
- "loss": 0.3425,
2105
- "step": 174000
2106
- },
2107
- {
2108
- "epoch": 1.32,
2109
- "learning_rate": 6.854615059986737e-06,
2110
- "loss": 0.3415,
2111
- "step": 174500
2112
- },
2113
- {
2114
- "epoch": 1.32,
2115
- "learning_rate": 6.817010309278351e-06,
2116
- "loss": 0.327,
2117
- "step": 175000
2118
- },
2119
- {
2120
- "epoch": 1.32,
2121
- "learning_rate": 6.779330198348104e-06,
2122
- "loss": 0.3414,
2123
- "step": 175500
2124
- },
2125
- {
2126
- "epoch": 1.33,
2127
- "learning_rate": 6.741650087417858e-06,
2128
- "loss": 0.3357,
2129
- "step": 176000
2130
- },
2131
- {
2132
- "epoch": 1.33,
2133
- "learning_rate": 6.703969976487611e-06,
2134
- "loss": 0.3448,
2135
- "step": 176500
2136
- },
2137
- {
2138
- "epoch": 1.33,
2139
- "learning_rate": 6.666365225779225e-06,
2140
- "loss": 0.3173,
2141
- "step": 177000
2142
- },
2143
- {
2144
- "epoch": 1.34,
2145
- "learning_rate": 6.628685114848978e-06,
2146
- "loss": 0.3375,
2147
- "step": 177500
2148
- },
2149
- {
2150
- "epoch": 1.34,
2151
- "learning_rate": 6.591005003918732e-06,
2152
- "loss": 0.3372,
2153
- "step": 178000
2154
- },
2155
- {
2156
- "epoch": 1.35,
2157
- "learning_rate": 6.553324892988485e-06,
2158
- "loss": 0.335,
2159
- "step": 178500
2160
- },
2161
- {
2162
- "epoch": 1.35,
2163
- "learning_rate": 6.515644782058239e-06,
2164
- "loss": 0.3258,
2165
- "step": 179000
2166
- },
2167
- {
2168
- "epoch": 1.35,
2169
- "learning_rate": 6.478040031349852e-06,
2170
- "loss": 0.3366,
2171
- "step": 179500
2172
- },
2173
- {
2174
- "epoch": 1.36,
2175
- "learning_rate": 6.440359920419606e-06,
2176
- "loss": 0.3354,
2177
- "step": 180000
2178
- },
2179
- {
2180
- "epoch": 1.36,
2181
- "learning_rate": 6.402679809489359e-06,
2182
- "loss": 0.3336,
2183
- "step": 180500
2184
- },
2185
- {
2186
- "epoch": 1.36,
2187
- "learning_rate": 6.364999698559113e-06,
2188
- "loss": 0.3292,
2189
- "step": 181000
2190
- },
2191
- {
2192
- "epoch": 1.37,
2193
- "learning_rate": 6.3273195876288665e-06,
2194
- "loss": 0.3301,
2195
- "step": 181500
2196
- },
2197
- {
2198
- "epoch": 1.37,
2199
- "learning_rate": 6.289714836920481e-06,
2200
- "loss": 0.3374,
2201
- "step": 182000
2202
- },
2203
- {
2204
- "epoch": 1.38,
2205
- "learning_rate": 6.252034725990233e-06,
2206
- "loss": 0.3225,
2207
- "step": 182500
2208
- },
2209
- {
2210
- "epoch": 1.38,
2211
- "learning_rate": 6.214354615059987e-06,
2212
- "loss": 0.3375,
2213
- "step": 183000
2214
- },
2215
- {
2216
- "epoch": 1.38,
2217
- "learning_rate": 6.17667450412974e-06,
2218
- "loss": 0.331,
2219
- "step": 183500
2220
- },
2221
- {
2222
- "epoch": 1.39,
2223
- "learning_rate": 6.138994393199494e-06,
2224
- "loss": 0.3183,
2225
- "step": 184000
2226
- },
2227
- {
2228
- "epoch": 1.39,
2229
- "learning_rate": 6.1013142822692475e-06,
2230
- "loss": 0.3555,
2231
- "step": 184500
2232
- },
2233
- {
2234
- "epoch": 1.39,
2235
- "learning_rate": 6.063709531560862e-06,
2236
- "loss": 0.3219,
2237
- "step": 185000
2238
- },
2239
- {
2240
- "epoch": 1.4,
2241
- "learning_rate": 6.026029420630614e-06,
2242
- "loss": 0.3369,
2243
- "step": 185500
2244
- },
2245
- {
2246
- "epoch": 1.4,
2247
- "learning_rate": 5.988349309700368e-06,
2248
- "loss": 0.3273,
2249
- "step": 186000
2250
- },
2251
- {
2252
- "epoch": 1.41,
2253
- "learning_rate": 5.950669198770121e-06,
2254
- "loss": 0.3426,
2255
- "step": 186500
2256
- },
2257
- {
2258
- "epoch": 1.41,
2259
- "learning_rate": 5.913064448061736e-06,
2260
- "loss": 0.3355,
2261
- "step": 187000
2262
- },
2263
- {
2264
- "epoch": 1.41,
2265
- "learning_rate": 5.875384337131489e-06,
2266
- "loss": 0.3458,
2267
- "step": 187500
2268
- },
2269
- {
2270
- "epoch": 1.42,
2271
- "learning_rate": 5.837704226201243e-06,
2272
- "loss": 0.3209,
2273
- "step": 188000
2274
- },
2275
- {
2276
- "epoch": 1.42,
2277
- "learning_rate": 5.800024115270995e-06,
2278
- "loss": 0.3336,
2279
- "step": 188500
2280
- },
2281
- {
2282
- "epoch": 1.42,
2283
- "learning_rate": 5.762344004340749e-06,
2284
- "loss": 0.3308,
2285
- "step": 189000
2286
- },
2287
- {
2288
- "epoch": 1.43,
2289
- "learning_rate": 5.7246638934105025e-06,
2290
- "loss": 0.3407,
2291
- "step": 189500
2292
- },
2293
- {
2294
- "epoch": 1.43,
2295
- "learning_rate": 5.6869837824802564e-06,
2296
- "loss": 0.3484,
2297
- "step": 190000
2298
- },
2299
- {
2300
- "epoch": 1.44,
2301
- "learning_rate": 5.6493036715500096e-06,
2302
- "loss": 0.3282,
2303
- "step": 190500
2304
- },
2305
- {
2306
- "epoch": 1.44,
2307
- "learning_rate": 5.6116235606197635e-06,
2308
- "loss": 0.3297,
2309
- "step": 191000
2310
- },
2311
- {
2312
- "epoch": 1.44,
2313
- "learning_rate": 5.573943449689516e-06,
2314
- "loss": 0.3184,
2315
- "step": 191500
2316
- },
2317
- {
2318
- "epoch": 1.45,
2319
- "learning_rate": 5.53626333875927e-06,
2320
- "loss": 0.3323,
2321
- "step": 192000
2322
- },
2323
- {
2324
- "epoch": 1.45,
2325
- "learning_rate": 5.498583227829023e-06,
2326
- "loss": 0.332,
2327
- "step": 192500
2328
- },
2329
- {
2330
- "epoch": 1.45,
2331
- "learning_rate": 5.4609784771206375e-06,
2332
- "loss": 0.3351,
2333
- "step": 193000
2334
- },
2335
- {
2336
- "epoch": 1.46,
2337
- "learning_rate": 5.423298366190391e-06,
2338
- "loss": 0.344,
2339
- "step": 193500
2340
- },
2341
- {
2342
- "epoch": 1.46,
2343
- "learning_rate": 5.385693615482004e-06,
2344
- "loss": 0.3399,
2345
- "step": 194000
2346
- },
2347
- {
2348
- "epoch": 1.47,
2349
- "learning_rate": 5.348013504551757e-06,
2350
- "loss": 0.3132,
2351
- "step": 194500
2352
- },
2353
- {
2354
- "epoch": 1.47,
2355
- "learning_rate": 5.310333393621511e-06,
2356
- "loss": 0.3303,
2357
- "step": 195000
2358
- },
2359
- {
2360
- "epoch": 1.47,
2361
- "learning_rate": 5.2726532826912645e-06,
2362
- "loss": 0.3287,
2363
- "step": 195500
2364
- },
2365
- {
2366
- "epoch": 1.48,
2367
- "learning_rate": 5.2349731717610185e-06,
2368
- "loss": 0.3227,
2369
- "step": 196000
2370
- },
2371
- {
2372
- "epoch": 1.48,
2373
- "learning_rate": 5.197293060830772e-06,
2374
- "loss": 0.3406,
2375
- "step": 196500
2376
- },
2377
- {
2378
- "epoch": 1.48,
2379
- "learning_rate": 5.159688310122385e-06,
2380
- "loss": 0.33,
2381
- "step": 197000
2382
- },
2383
- {
2384
- "epoch": 1.49,
2385
- "learning_rate": 5.122008199192139e-06,
2386
- "loss": 0.3329,
2387
- "step": 197500
2388
- },
2389
- {
2390
- "epoch": 1.49,
2391
- "learning_rate": 5.0843280882618916e-06,
2392
- "loss": 0.3411,
2393
- "step": 198000
2394
- },
2395
- {
2396
- "epoch": 1.5,
2397
- "learning_rate": 5.0466479773316455e-06,
2398
- "loss": 0.324,
2399
- "step": 198500
2400
- },
2401
- {
2402
- "epoch": 1.5,
2403
- "learning_rate": 5.0089678664013995e-06,
2404
- "loss": 0.3342,
2405
- "step": 199000
2406
- },
2407
- {
2408
- "epoch": 1.5,
2409
- "learning_rate": 4.971287755471153e-06,
2410
- "loss": 0.3385,
2411
- "step": 199500
2412
- },
2413
- {
2414
- "epoch": 1.51,
2415
- "learning_rate": 4.933607644540906e-06,
2416
- "loss": 0.3339,
2417
- "step": 200000
2418
- },
2419
- {
2420
- "epoch": 1.51,
2421
- "learning_rate": 4.895927533610659e-06,
2422
- "loss": 0.3273,
2423
- "step": 200500
2424
- },
2425
- {
2426
- "epoch": 1.51,
2427
- "learning_rate": 4.858247422680413e-06,
2428
- "loss": 0.331,
2429
- "step": 201000
2430
- },
2431
- {
2432
- "epoch": 1.52,
2433
- "learning_rate": 4.820642671972027e-06,
2434
- "loss": 0.3352,
2435
- "step": 201500
2436
- },
2437
- {
2438
- "epoch": 1.52,
2439
- "learning_rate": 4.7829625610417806e-06,
2440
- "loss": 0.3426,
2441
- "step": 202000
2442
- },
2443
- {
2444
- "epoch": 1.53,
2445
- "learning_rate": 4.745282450111534e-06,
2446
- "loss": 0.3414,
2447
- "step": 202500
2448
- },
2449
- {
2450
- "epoch": 1.53,
2451
- "learning_rate": 4.707602339181287e-06,
2452
- "loss": 0.3369,
2453
- "step": 203000
2454
- },
2455
- {
2456
- "epoch": 1.53,
2457
- "learning_rate": 4.669997588472901e-06,
2458
- "loss": 0.3415,
2459
- "step": 203500
2460
- },
2461
- {
2462
- "epoch": 1.54,
2463
- "learning_rate": 4.6323174775426545e-06,
2464
- "loss": 0.3443,
2465
- "step": 204000
2466
- },
2467
- {
2468
- "epoch": 1.54,
2469
- "learning_rate": 4.594637366612408e-06,
2470
- "loss": 0.3287,
2471
- "step": 204500
2472
- },
2473
- {
2474
- "epoch": 1.54,
2475
- "learning_rate": 4.556957255682162e-06,
2476
- "loss": 0.3355,
2477
- "step": 205000
2478
- },
2479
- {
2480
- "epoch": 1.55,
2481
- "learning_rate": 4.519277144751915e-06,
2482
- "loss": 0.323,
2483
- "step": 205500
2484
- },
2485
- {
2486
- "epoch": 1.55,
2487
- "learning_rate": 4.481597033821668e-06,
2488
- "loss": 0.3286,
2489
- "step": 206000
2490
- },
2491
- {
2492
- "epoch": 1.56,
2493
- "learning_rate": 4.443916922891422e-06,
2494
- "loss": 0.3397,
2495
- "step": 206500
2496
- },
2497
- {
2498
- "epoch": 1.56,
2499
- "learning_rate": 4.406236811961175e-06,
2500
- "loss": 0.3329,
2501
- "step": 207000
2502
- },
2503
- {
2504
- "epoch": 1.56,
2505
- "learning_rate": 4.368707421474649e-06,
2506
- "loss": 0.3111,
2507
- "step": 207500
2508
- },
2509
- {
2510
- "epoch": 1.57,
2511
- "learning_rate": 4.331027310544403e-06,
2512
- "loss": 0.3309,
2513
- "step": 208000
2514
- },
2515
- {
2516
- "epoch": 1.57,
2517
- "learning_rate": 4.293422559836016e-06,
2518
- "loss": 0.3205,
2519
- "step": 208500
2520
- },
2521
- {
2522
- "epoch": 1.58,
2523
- "learning_rate": 4.25574244890577e-06,
2524
- "loss": 0.3318,
2525
- "step": 209000
2526
- },
2527
- {
2528
- "epoch": 1.58,
2529
- "learning_rate": 4.218062337975523e-06,
2530
- "loss": 0.3383,
2531
- "step": 209500
2532
- },
2533
- {
2534
- "epoch": 1.58,
2535
- "learning_rate": 4.180382227045276e-06,
2536
- "loss": 0.329,
2537
- "step": 210000
2538
- },
2539
- {
2540
- "epoch": 1.59,
2541
- "learning_rate": 4.14270211611503e-06,
2542
- "loss": 0.3222,
2543
- "step": 210500
2544
- },
2545
- {
2546
- "epoch": 1.59,
2547
- "learning_rate": 4.105022005184783e-06,
2548
- "loss": 0.325,
2549
- "step": 211000
2550
- },
2551
- {
2552
- "epoch": 1.59,
2553
- "learning_rate": 4.067417254476397e-06,
2554
- "loss": 0.324,
2555
- "step": 211500
2556
- },
2557
- {
2558
- "epoch": 1.6,
2559
- "learning_rate": 4.029737143546151e-06,
2560
- "loss": 0.3179,
2561
- "step": 212000
2562
- },
2563
- {
2564
- "epoch": 1.6,
2565
- "learning_rate": 3.992057032615904e-06,
2566
- "loss": 0.329,
2567
- "step": 212500
2568
- },
2569
- {
2570
- "epoch": 1.61,
2571
- "learning_rate": 3.954376921685657e-06,
2572
- "loss": 0.3227,
2573
- "step": 213000
2574
- },
2575
- {
2576
- "epoch": 1.61,
2577
- "learning_rate": 3.916696810755411e-06,
2578
- "loss": 0.3339,
2579
- "step": 213500
2580
- },
2581
- {
2582
- "epoch": 1.61,
2583
- "learning_rate": 3.879016699825164e-06,
2584
- "loss": 0.3241,
2585
- "step": 214000
2586
- },
2587
- {
2588
- "epoch": 1.62,
2589
- "learning_rate": 3.8413365888949175e-06,
2590
- "loss": 0.3308,
2591
- "step": 214500
2592
- },
2593
- {
2594
- "epoch": 1.62,
2595
- "learning_rate": 3.803656477964671e-06,
2596
- "loss": 0.3273,
2597
- "step": 215000
2598
- },
2599
- {
2600
- "epoch": 1.62,
2601
- "learning_rate": 3.7659763670344246e-06,
2602
- "loss": 0.3293,
2603
- "step": 215500
2604
- },
2605
- {
2606
- "epoch": 1.63,
2607
- "learning_rate": 3.7283716163260388e-06,
2608
- "loss": 0.3376,
2609
- "step": 216000
2610
- },
2611
- {
2612
- "epoch": 1.63,
2613
- "learning_rate": 3.690691505395792e-06,
2614
- "loss": 0.3375,
2615
- "step": 216500
2616
- },
2617
- {
2618
- "epoch": 1.64,
2619
- "learning_rate": 3.6530113944655454e-06,
2620
- "loss": 0.3344,
2621
- "step": 217000
2622
- },
2623
- {
2624
- "epoch": 1.64,
2625
- "learning_rate": 3.615331283535299e-06,
2626
- "loss": 0.3159,
2627
- "step": 217500
2628
- },
2629
- {
2630
- "epoch": 1.64,
2631
- "learning_rate": 3.5777265328269127e-06,
2632
- "loss": 0.3243,
2633
- "step": 218000
2634
- },
2635
- {
2636
- "epoch": 1.65,
2637
- "learning_rate": 3.5400464218966663e-06,
2638
- "loss": 0.3296,
2639
- "step": 218500
2640
- },
2641
- {
2642
- "epoch": 1.65,
2643
- "learning_rate": 3.50236631096642e-06,
2644
- "loss": 0.3463,
2645
- "step": 219000
2646
- },
2647
- {
2648
- "epoch": 1.65,
2649
- "learning_rate": 3.464686200036173e-06,
2650
- "loss": 0.3229,
2651
- "step": 219500
2652
- },
2653
- {
2654
- "epoch": 1.66,
2655
- "learning_rate": 3.4270060891059265e-06,
2656
- "loss": 0.3302,
2657
- "step": 220000
2658
- },
2659
- {
2660
- "epoch": 1.66,
2661
- "learning_rate": 3.38932597817568e-06,
2662
- "loss": 0.3287,
2663
- "step": 220500
2664
- },
2665
- {
2666
- "epoch": 1.67,
2667
- "learning_rate": 3.351645867245433e-06,
2668
- "loss": 0.3265,
2669
- "step": 221000
2670
- },
2671
- {
2672
- "epoch": 1.67,
2673
- "learning_rate": 3.3140411165370473e-06,
2674
- "loss": 0.3391,
2675
- "step": 221500
2676
- },
2677
- {
2678
- "epoch": 1.67,
2679
- "learning_rate": 3.276361005606801e-06,
2680
- "loss": 0.328,
2681
- "step": 222000
2682
- },
2683
- {
2684
- "epoch": 1.68,
2685
- "learning_rate": 3.238680894676554e-06,
2686
- "loss": 0.3282,
2687
- "step": 222500
2688
- },
2689
- {
2690
- "epoch": 1.68,
2691
- "learning_rate": 3.2010007837463075e-06,
2692
- "loss": 0.3385,
2693
- "step": 223000
2694
- },
2695
- {
2696
- "epoch": 1.68,
2697
- "learning_rate": 3.163320672816061e-06,
2698
- "loss": 0.3223,
2699
- "step": 223500
2700
- },
2701
- {
2702
- "epoch": 1.69,
2703
- "learning_rate": 3.125640561885814e-06,
2704
- "loss": 0.3192,
2705
- "step": 224000
2706
- },
2707
- {
2708
- "epoch": 1.69,
2709
- "learning_rate": 3.0880358111774283e-06,
2710
- "loss": 0.328,
2711
- "step": 224500
2712
- },
2713
- {
2714
- "epoch": 1.7,
2715
- "learning_rate": 3.050355700247182e-06,
2716
- "loss": 0.3332,
2717
- "step": 225000
2718
- },
2719
- {
2720
- "epoch": 1.7,
2721
- "learning_rate": 3.012675589316935e-06,
2722
- "loss": 0.3292,
2723
- "step": 225500
2724
- },
2725
- {
2726
- "epoch": 1.7,
2727
- "learning_rate": 2.9749954783866885e-06,
2728
- "loss": 0.3291,
2729
- "step": 226000
2730
- },
2731
- {
2732
- "epoch": 1.71,
2733
- "learning_rate": 2.937315367456442e-06,
2734
- "loss": 0.327,
2735
- "step": 226500
2736
- },
2737
- {
2738
- "epoch": 1.71,
2739
- "learning_rate": 2.899710616748056e-06,
2740
- "loss": 0.336,
2741
- "step": 227000
2742
- },
2743
- {
2744
- "epoch": 1.71,
2745
- "learning_rate": 2.8620305058178093e-06,
2746
- "loss": 0.3155,
2747
- "step": 227500
2748
- },
2749
- {
2750
- "epoch": 1.72,
2751
- "learning_rate": 2.824350394887563e-06,
2752
- "loss": 0.3326,
2753
- "step": 228000
2754
- },
2755
- {
2756
- "epoch": 1.72,
2757
- "learning_rate": 2.786670283957316e-06,
2758
- "loss": 0.3241,
2759
- "step": 228500
2760
- },
2761
- {
2762
- "epoch": 1.73,
2763
- "learning_rate": 2.7489901730270696e-06,
2764
- "loss": 0.3312,
2765
- "step": 229000
2766
- },
2767
- {
2768
- "epoch": 1.73,
2769
- "learning_rate": 2.7113854223186837e-06,
2770
- "loss": 0.3371,
2771
- "step": 229500
2772
- },
2773
- {
2774
- "epoch": 1.73,
2775
- "learning_rate": 2.673705311388437e-06,
2776
- "loss": 0.3169,
2777
- "step": 230000
2778
- },
2779
- {
2780
- "epoch": 1.74,
2781
- "learning_rate": 2.6360252004581904e-06,
2782
- "loss": 0.3301,
2783
- "step": 230500
2784
- },
2785
- {
2786
- "epoch": 1.74,
2787
- "learning_rate": 2.598345089527944e-06,
2788
- "loss": 0.3188,
2789
- "step": 231000
2790
- },
2791
- {
2792
- "epoch": 1.74,
2793
- "learning_rate": 2.560664978597697e-06,
2794
- "loss": 0.331,
2795
- "step": 231500
2796
- },
2797
- {
2798
- "epoch": 1.75,
2799
- "learning_rate": 2.5229848676674506e-06,
2800
- "loss": 0.3279,
2801
- "step": 232000
2802
- },
2803
- {
2804
- "epoch": 1.75,
2805
- "learning_rate": 2.485304756737204e-06,
2806
- "loss": 0.3331,
2807
- "step": 232500
2808
- },
2809
- {
2810
- "epoch": 1.76,
2811
- "learning_rate": 2.4476246458069573e-06,
2812
- "loss": 0.3257,
2813
- "step": 233000
2814
- },
2815
- {
2816
- "epoch": 1.76,
2817
- "learning_rate": 2.4100198950985714e-06,
2818
- "loss": 0.3381,
2819
- "step": 233500
2820
- },
2821
- {
2822
- "epoch": 1.76,
2823
- "learning_rate": 2.372339784168325e-06,
2824
- "loss": 0.3196,
2825
- "step": 234000
2826
- },
2827
- {
2828
- "epoch": 1.77,
2829
- "learning_rate": 2.334659673238078e-06,
2830
- "loss": 0.3341,
2831
- "step": 234500
2832
- },
2833
- {
2834
- "epoch": 1.77,
2835
- "learning_rate": 2.2969795623078316e-06,
2836
- "loss": 0.3299,
2837
- "step": 235000
2838
- },
2839
- {
2840
- "epoch": 1.77,
2841
- "learning_rate": 2.2593748115994453e-06,
2842
- "loss": 0.3344,
2843
- "step": 235500
2844
- },
2845
- {
2846
- "epoch": 1.78,
2847
- "learning_rate": 2.221694700669199e-06,
2848
- "loss": 0.3377,
2849
- "step": 236000
2850
- },
2851
- {
2852
- "epoch": 1.78,
2853
- "learning_rate": 2.1840145897389524e-06,
2854
- "loss": 0.3278,
2855
- "step": 236500
2856
- },
2857
- {
2858
- "epoch": 1.79,
2859
- "learning_rate": 2.1463344788087056e-06,
2860
- "loss": 0.3484,
2861
- "step": 237000
2862
- },
2863
- {
2864
- "epoch": 1.79,
2865
- "learning_rate": 2.1087297281003197e-06,
2866
- "loss": 0.3289,
2867
- "step": 237500
2868
- },
2869
- {
2870
- "epoch": 1.79,
2871
- "learning_rate": 2.0710496171700733e-06,
2872
- "loss": 0.3261,
2873
- "step": 238000
2874
- },
2875
- {
2876
- "epoch": 1.8,
2877
- "learning_rate": 2.0333695062398264e-06,
2878
- "loss": 0.3206,
2879
- "step": 238500
2880
- },
2881
- {
2882
- "epoch": 1.8,
2883
- "learning_rate": 1.99568939530958e-06,
2884
- "loss": 0.3362,
2885
- "step": 239000
2886
- },
2887
- {
2888
- "epoch": 1.8,
2889
- "learning_rate": 1.9580092843793335e-06,
2890
- "loss": 0.3384,
2891
- "step": 239500
2892
- },
2893
- {
2894
- "epoch": 1.81,
2895
- "learning_rate": 1.920404533670947e-06,
2896
- "loss": 0.3282,
2897
- "step": 240000
2898
- },
2899
- {
2900
- "epoch": 1.81,
2901
- "learning_rate": 1.8827244227407007e-06,
2902
- "loss": 0.3236,
2903
- "step": 240500
2904
- },
2905
- {
2906
- "epoch": 1.82,
2907
- "learning_rate": 1.845044311810454e-06,
2908
- "loss": 0.3345,
2909
- "step": 241000
2910
- },
2911
- {
2912
- "epoch": 1.82,
2913
- "learning_rate": 1.8073642008802076e-06,
2914
- "loss": 0.3249,
2915
- "step": 241500
2916
- },
2917
- {
2918
- "epoch": 1.82,
2919
- "learning_rate": 1.769684089949961e-06,
2920
- "loss": 0.3334,
2921
- "step": 242000
2922
- },
2923
- {
2924
- "epoch": 1.83,
2925
- "learning_rate": 1.7320039790197143e-06,
2926
- "loss": 0.3184,
2927
- "step": 242500
2928
- },
2929
- {
2930
- "epoch": 1.83,
2931
- "learning_rate": 1.6943992283113284e-06,
2932
- "loss": 0.3349,
2933
- "step": 243000
2934
- },
2935
- {
2936
- "epoch": 1.84,
2937
- "learning_rate": 1.6567191173810818e-06,
2938
- "loss": 0.3094,
2939
- "step": 243500
2940
- },
2941
- {
2942
- "epoch": 1.84,
2943
- "learning_rate": 1.619039006450835e-06,
2944
- "loss": 0.3291,
2945
- "step": 244000
2946
- },
2947
- {
2948
- "epoch": 1.84,
2949
- "learning_rate": 1.5813588955205887e-06,
2950
- "loss": 0.3205,
2951
- "step": 244500
2952
- },
2953
- {
2954
- "epoch": 1.85,
2955
- "learning_rate": 1.543678784590342e-06,
2956
- "loss": 0.339,
2957
- "step": 245000
2958
- },
2959
- {
2960
- "epoch": 1.85,
2961
- "learning_rate": 1.5059986736600953e-06,
2962
- "loss": 0.3325,
2963
- "step": 245500
2964
- },
2965
- {
2966
- "epoch": 1.85,
2967
- "learning_rate": 1.4683939229517095e-06,
2968
- "loss": 0.3293,
2969
- "step": 246000
2970
- },
2971
- {
2972
- "epoch": 1.86,
2973
- "learning_rate": 1.4307138120214628e-06,
2974
- "loss": 0.32,
2975
- "step": 246500
2976
- },
2977
- {
2978
- "epoch": 1.86,
2979
- "learning_rate": 1.3930337010912161e-06,
2980
- "loss": 0.3361,
2981
- "step": 247000
2982
- },
2983
- {
2984
- "epoch": 1.87,
2985
- "learning_rate": 1.3553535901609697e-06,
2986
- "loss": 0.3253,
2987
- "step": 247500
2988
- },
2989
- {
2990
- "epoch": 1.87,
2991
- "learning_rate": 1.317673479230723e-06,
2992
- "loss": 0.3328,
2993
- "step": 248000
2994
- },
2995
- {
2996
- "epoch": 1.87,
2997
- "learning_rate": 1.2800687285223367e-06,
2998
- "loss": 0.3238,
2999
- "step": 248500
3000
- },
3001
- {
3002
- "epoch": 1.88,
3003
- "learning_rate": 1.2423886175920903e-06,
3004
- "loss": 0.3275,
3005
- "step": 249000
3006
- },
3007
- {
3008
- "epoch": 1.88,
3009
- "learning_rate": 1.2047085066618436e-06,
3010
- "loss": 0.3231,
3011
- "step": 249500
3012
- },
3013
- {
3014
- "epoch": 1.88,
3015
- "learning_rate": 1.1670283957315972e-06,
3016
- "loss": 0.3357,
3017
- "step": 250000
3018
- },
3019
- {
3020
- "epoch": 1.89,
3021
- "learning_rate": 1.129423645023211e-06,
3022
- "loss": 0.3324,
3023
- "step": 250500
3024
- },
3025
- {
3026
- "epoch": 1.89,
3027
- "learning_rate": 1.0917435340929644e-06,
3028
- "loss": 0.3311,
3029
- "step": 251000
3030
- },
3031
- {
3032
- "epoch": 1.9,
3033
- "learning_rate": 1.0540634231627178e-06,
3034
- "loss": 0.333,
3035
- "step": 251500
3036
- },
3037
- {
3038
- "epoch": 1.9,
3039
- "learning_rate": 1.0163833122324713e-06,
3040
- "loss": 0.3202,
3041
- "step": 252000
3042
- },
3043
- {
3044
- "epoch": 1.9,
3045
- "learning_rate": 9.787032013022249e-07,
3046
- "loss": 0.3228,
3047
- "step": 252500
3048
- },
3049
- {
3050
- "epoch": 1.91,
3051
- "learning_rate": 9.410984505938387e-07,
3052
- "loss": 0.3358,
3053
- "step": 253000
3054
- },
3055
- {
3056
- "epoch": 1.91,
3057
- "learning_rate": 9.03418339663592e-07,
3058
- "loss": 0.3214,
3059
- "step": 253500
3060
- },
3061
- {
3062
- "epoch": 1.91,
3063
- "learning_rate": 8.658135889552059e-07,
3064
- "loss": 0.3148,
3065
- "step": 254000
3066
- },
3067
- {
3068
- "epoch": 1.92,
3069
- "learning_rate": 8.281334780249594e-07,
3070
- "loss": 0.336,
3071
- "step": 254500
3072
- },
3073
- {
3074
- "epoch": 1.92,
3075
- "learning_rate": 7.904533670947127e-07,
3076
- "loss": 0.3085,
3077
- "step": 255000
3078
- },
3079
- {
3080
- "epoch": 1.93,
3081
- "learning_rate": 7.527732561644662e-07,
3082
- "loss": 0.326,
3083
- "step": 255500
3084
- },
3085
- {
3086
- "epoch": 1.93,
3087
- "learning_rate": 7.150931452342197e-07,
3088
- "loss": 0.32,
3089
- "step": 256000
3090
- },
3091
- {
3092
- "epoch": 1.93,
3093
- "learning_rate": 6.77413034303973e-07,
3094
- "loss": 0.3297,
3095
- "step": 256500
3096
- },
3097
- {
3098
- "epoch": 1.94,
3099
- "learning_rate": 6.397329233737265e-07,
3100
- "loss": 0.3217,
3101
- "step": 257000
3102
- },
3103
- {
3104
- "epoch": 1.94,
3105
- "learning_rate": 6.020528124434799e-07,
3106
- "loss": 0.331,
3107
- "step": 257500
3108
- },
3109
- {
3110
- "epoch": 1.94,
3111
- "learning_rate": 5.643727015132333e-07,
3112
- "loss": 0.3155,
3113
- "step": 258000
3114
- },
3115
- {
3116
- "epoch": 1.95,
3117
- "learning_rate": 5.267679508048472e-07,
3118
- "loss": 0.3246,
3119
- "step": 258500
3120
- },
3121
- {
3122
- "epoch": 1.95,
3123
- "learning_rate": 4.890878398746006e-07,
3124
- "loss": 0.3325,
3125
- "step": 259000
3126
- },
3127
- {
3128
- "epoch": 1.96,
3129
- "learning_rate": 4.514077289443541e-07,
3130
- "loss": 0.3272,
3131
- "step": 259500
3132
- },
3133
- {
3134
- "epoch": 1.96,
3135
- "learning_rate": 4.1372761801410747e-07,
3136
- "loss": 0.3197,
3137
- "step": 260000
3138
- },
3139
- {
3140
- "epoch": 1.96,
3141
- "learning_rate": 3.7604750708386086e-07,
3142
- "loss": 0.3308,
3143
- "step": 260500
3144
- },
3145
- {
3146
- "epoch": 1.97,
3147
- "learning_rate": 3.383673961536143e-07,
3148
- "loss": 0.3197,
3149
- "step": 261000
3150
- },
3151
- {
3152
- "epoch": 1.97,
3153
- "learning_rate": 3.0068728522336775e-07,
3154
- "loss": 0.3221,
3155
- "step": 261500
3156
- },
3157
- {
3158
- "epoch": 1.97,
3159
- "learning_rate": 2.6308253451498166e-07,
3160
- "loss": 0.3289,
3161
- "step": 262000
3162
- },
3163
- {
3164
- "epoch": 1.98,
3165
- "learning_rate": 2.2540242358473506e-07,
3166
- "loss": 0.3151,
3167
- "step": 262500
3168
- },
3169
- {
3170
- "epoch": 1.98,
3171
- "learning_rate": 1.8772231265448845e-07,
3172
- "loss": 0.3241,
3173
- "step": 263000
3174
- },
3175
- {
3176
- "epoch": 1.99,
3177
- "learning_rate": 1.500422017242419e-07,
3178
- "loss": 0.3351,
3179
- "step": 263500
3180
- },
3181
- {
3182
- "epoch": 1.99,
3183
- "learning_rate": 1.123620907939953e-07,
3184
- "loss": 0.3336,
3185
- "step": 264000
3186
- },
3187
- {
3188
- "epoch": 1.99,
3189
- "learning_rate": 7.468197986374873e-08,
3190
- "loss": 0.3458,
3191
- "step": 264500
3192
- },
3193
- {
3194
- "epoch": 2.0,
3195
- "learning_rate": 3.700186893350214e-08,
3196
- "loss": 0.3266,
3197
- "step": 265000
3198
- },
3199
- {
3200
- "epoch": 2.0,
3201
- "eval_accuracy": 0.8456207671711459,
3202
- "eval_loss": 0.36166509985923767,
3203
- "eval_runtime": 225.0831,
3204
- "eval_samples_per_second": 310.29,
3205
- "eval_steps_per_second": 31.033,
3206
- "step": 265392
3207
  }
3208
  ],
3209
  "max_steps": 265392,
3210
  "num_train_epochs": 2,
3211
- "total_flos": 6.614507563136844e+16,
3212
  "trial_name": null,
3213
  "trial_params": null
3214
  }
 
1
  {
2
  "best_metric": 0.3574163317680359,
3
  "best_model_checkpoint": "OutModelPolicy\\checkpoint-132696",
4
+ "epoch": 1.0,
5
+ "global_step": 132696,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1605
  "eval_samples_per_second": 309.276,
1606
  "eval_steps_per_second": 30.932,
1607
  "step": 132696
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1608
  }
1609
  ],
1610
  "max_steps": 265392,
1611
  "num_train_epochs": 2,
1612
+ "total_flos": 3.308206075463448e+16,
1613
  "trial_name": null,
1614
  "trial_params": null
1615
  }