{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5744833639192531, "eval_steps": 500, "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0019149445463975106, "grad_norm": 2.4355921745300293, "learning_rate": 0.0, "loss": 0.5195, "step": 1 }, { "epoch": 0.003829889092795021, "grad_norm": 2.444409132003784, "learning_rate": 6.369426751592358e-08, "loss": 0.543, "step": 2 }, { "epoch": 0.005744833639192532, "grad_norm": 2.3453526496887207, "learning_rate": 1.2738853503184715e-07, "loss": 0.5273, "step": 3 }, { "epoch": 0.007659778185590042, "grad_norm": 2.0779857635498047, "learning_rate": 1.9108280254777072e-07, "loss": 0.4805, "step": 4 }, { "epoch": 0.009574722731987553, "grad_norm": 2.307081699371338, "learning_rate": 2.547770700636943e-07, "loss": 0.4609, "step": 5 }, { "epoch": 0.011489667278385064, "grad_norm": 2.1092047691345215, "learning_rate": 3.1847133757961787e-07, "loss": 0.4219, "step": 6 }, { "epoch": 0.013404611824782574, "grad_norm": 2.4406685829162598, "learning_rate": 3.8216560509554143e-07, "loss": 0.5273, "step": 7 }, { "epoch": 0.015319556371180085, "grad_norm": 2.5096800327301025, "learning_rate": 4.45859872611465e-07, "loss": 0.5781, "step": 8 }, { "epoch": 0.017234500917577594, "grad_norm": 2.025543451309204, "learning_rate": 5.095541401273886e-07, "loss": 0.4355, "step": 9 }, { "epoch": 0.019149445463975106, "grad_norm": 2.0597877502441406, "learning_rate": 5.732484076433121e-07, "loss": 0.4902, "step": 10 }, { "epoch": 0.021064390010372615, "grad_norm": 2.492584466934204, "learning_rate": 6.369426751592357e-07, "loss": 0.6055, "step": 11 }, { "epoch": 0.022979334556770127, "grad_norm": 1.8612791299819946, "learning_rate": 7.006369426751592e-07, "loss": 0.457, "step": 12 }, { "epoch": 0.024894279103167636, "grad_norm": 1.9757673740386963, "learning_rate": 7.643312101910829e-07, "loss": 0.5586, "step": 13 }, { "epoch": 0.02680922364956515, "grad_norm": 1.6426618099212646, "learning_rate": 8.280254777070064e-07, "loss": 0.4453, "step": 14 }, { "epoch": 0.028724168195962657, "grad_norm": 1.8673598766326904, "learning_rate": 8.9171974522293e-07, "loss": 0.5586, "step": 15 }, { "epoch": 0.03063911274236017, "grad_norm": 1.8291136026382446, "learning_rate": 9.554140127388537e-07, "loss": 0.5156, "step": 16 }, { "epoch": 0.03255405728875768, "grad_norm": 1.7877495288848877, "learning_rate": 1.0191082802547772e-06, "loss": 0.5586, "step": 17 }, { "epoch": 0.03446900183515519, "grad_norm": 1.5062263011932373, "learning_rate": 1.0828025477707007e-06, "loss": 0.5273, "step": 18 }, { "epoch": 0.0363839463815527, "grad_norm": 1.7692209482192993, "learning_rate": 1.1464968152866242e-06, "loss": 0.6055, "step": 19 }, { "epoch": 0.03829889092795021, "grad_norm": 1.4987772703170776, "learning_rate": 1.210191082802548e-06, "loss": 0.5156, "step": 20 }, { "epoch": 0.040213835474347724, "grad_norm": 1.499037742614746, "learning_rate": 1.2738853503184715e-06, "loss": 0.5625, "step": 21 }, { "epoch": 0.04212878002074523, "grad_norm": 1.3796921968460083, "learning_rate": 1.337579617834395e-06, "loss": 0.5312, "step": 22 }, { "epoch": 0.04404372456714274, "grad_norm": 1.1836010217666626, "learning_rate": 1.4012738853503185e-06, "loss": 0.5, "step": 23 }, { "epoch": 0.045958669113540254, "grad_norm": 0.9325675964355469, "learning_rate": 1.4649681528662422e-06, "loss": 0.5117, "step": 24 }, { "epoch": 0.047873613659937767, "grad_norm": 0.9935901165008545, "learning_rate": 1.5286624203821657e-06, "loss": 0.4707, "step": 25 }, { "epoch": 0.04978855820633527, "grad_norm": 1.0810490846633911, "learning_rate": 1.5923566878980892e-06, "loss": 0.4453, "step": 26 }, { "epoch": 0.051703502752732784, "grad_norm": 1.1079798936843872, "learning_rate": 1.6560509554140127e-06, "loss": 0.4844, "step": 27 }, { "epoch": 0.0536184472991303, "grad_norm": 1.0377357006072998, "learning_rate": 1.7197452229299363e-06, "loss": 0.498, "step": 28 }, { "epoch": 0.05553339184552781, "grad_norm": 0.7516281604766846, "learning_rate": 1.78343949044586e-06, "loss": 0.3398, "step": 29 }, { "epoch": 0.057448336391925314, "grad_norm": 0.7505989670753479, "learning_rate": 1.8471337579617835e-06, "loss": 0.457, "step": 30 }, { "epoch": 0.05936328093832283, "grad_norm": 0.6712049245834351, "learning_rate": 1.9108280254777074e-06, "loss": 0.4922, "step": 31 }, { "epoch": 0.06127822548472034, "grad_norm": 0.7440502047538757, "learning_rate": 1.974522292993631e-06, "loss": 0.4648, "step": 32 }, { "epoch": 0.06319317003111785, "grad_norm": 0.7507379055023193, "learning_rate": 2.0382165605095544e-06, "loss": 0.4102, "step": 33 }, { "epoch": 0.06510811457751536, "grad_norm": 1.0105549097061157, "learning_rate": 2.101910828025478e-06, "loss": 0.4844, "step": 34 }, { "epoch": 0.06702305912391288, "grad_norm": 0.8056528568267822, "learning_rate": 2.1656050955414015e-06, "loss": 0.4023, "step": 35 }, { "epoch": 0.06893800367031037, "grad_norm": 0.80513596534729, "learning_rate": 2.229299363057325e-06, "loss": 0.4531, "step": 36 }, { "epoch": 0.07085294821670789, "grad_norm": 0.6895999908447266, "learning_rate": 2.2929936305732485e-06, "loss": 0.416, "step": 37 }, { "epoch": 0.0727678927631054, "grad_norm": 0.6521788835525513, "learning_rate": 2.356687898089172e-06, "loss": 0.3281, "step": 38 }, { "epoch": 0.07468283730950291, "grad_norm": 1.1881077289581299, "learning_rate": 2.420382165605096e-06, "loss": 0.4629, "step": 39 }, { "epoch": 0.07659778185590042, "grad_norm": 0.7610136866569519, "learning_rate": 2.4840764331210194e-06, "loss": 0.4922, "step": 40 }, { "epoch": 0.07851272640229794, "grad_norm": 0.6756831407546997, "learning_rate": 2.547770700636943e-06, "loss": 0.5117, "step": 41 }, { "epoch": 0.08042767094869545, "grad_norm": 0.5855818390846252, "learning_rate": 2.6114649681528665e-06, "loss": 0.4277, "step": 42 }, { "epoch": 0.08234261549509296, "grad_norm": 0.5638097524642944, "learning_rate": 2.67515923566879e-06, "loss": 0.4629, "step": 43 }, { "epoch": 0.08425756004149046, "grad_norm": 0.4795784056186676, "learning_rate": 2.7388535031847135e-06, "loss": 0.457, "step": 44 }, { "epoch": 0.08617250458788797, "grad_norm": 0.44129636883735657, "learning_rate": 2.802547770700637e-06, "loss": 0.4199, "step": 45 }, { "epoch": 0.08808744913428548, "grad_norm": 0.5452600717544556, "learning_rate": 2.8662420382165605e-06, "loss": 0.4883, "step": 46 }, { "epoch": 0.090002393680683, "grad_norm": 0.5549207925796509, "learning_rate": 2.9299363057324844e-06, "loss": 0.4336, "step": 47 }, { "epoch": 0.09191733822708051, "grad_norm": 0.4318218231201172, "learning_rate": 2.993630573248408e-06, "loss": 0.3633, "step": 48 }, { "epoch": 0.09383228277347802, "grad_norm": 0.4986816942691803, "learning_rate": 3.0573248407643314e-06, "loss": 0.3926, "step": 49 }, { "epoch": 0.09574722731987553, "grad_norm": 0.4866129159927368, "learning_rate": 3.121019108280255e-06, "loss": 0.418, "step": 50 }, { "epoch": 0.09766217186627305, "grad_norm": 0.6105746626853943, "learning_rate": 3.1847133757961785e-06, "loss": 0.4707, "step": 51 }, { "epoch": 0.09957711641267054, "grad_norm": 0.5020173192024231, "learning_rate": 3.248407643312102e-06, "loss": 0.4707, "step": 52 }, { "epoch": 0.10149206095906806, "grad_norm": 0.36393049359321594, "learning_rate": 3.3121019108280255e-06, "loss": 0.3926, "step": 53 }, { "epoch": 0.10340700550546557, "grad_norm": 0.45811325311660767, "learning_rate": 3.375796178343949e-06, "loss": 0.4492, "step": 54 }, { "epoch": 0.10532195005186308, "grad_norm": 0.5363308191299438, "learning_rate": 3.4394904458598725e-06, "loss": 0.3613, "step": 55 }, { "epoch": 0.1072368945982606, "grad_norm": 0.48611095547676086, "learning_rate": 3.5031847133757964e-06, "loss": 0.4434, "step": 56 }, { "epoch": 0.1091518391446581, "grad_norm": 0.4395770728588104, "learning_rate": 3.56687898089172e-06, "loss": 0.4668, "step": 57 }, { "epoch": 0.11106678369105562, "grad_norm": 0.43419352173805237, "learning_rate": 3.6305732484076435e-06, "loss": 0.4512, "step": 58 }, { "epoch": 0.11298172823745313, "grad_norm": 0.4470672607421875, "learning_rate": 3.694267515923567e-06, "loss": 0.4863, "step": 59 }, { "epoch": 0.11489667278385063, "grad_norm": 0.40022385120391846, "learning_rate": 3.757961783439491e-06, "loss": 0.4141, "step": 60 }, { "epoch": 0.11681161733024814, "grad_norm": 0.4839700162410736, "learning_rate": 3.821656050955415e-06, "loss": 0.543, "step": 61 }, { "epoch": 0.11872656187664565, "grad_norm": 0.37022727727890015, "learning_rate": 3.885350318471338e-06, "loss": 0.4629, "step": 62 }, { "epoch": 0.12064150642304317, "grad_norm": 0.4015008509159088, "learning_rate": 3.949044585987262e-06, "loss": 0.4473, "step": 63 }, { "epoch": 0.12255645096944068, "grad_norm": 0.33814293146133423, "learning_rate": 4.012738853503185e-06, "loss": 0.3594, "step": 64 }, { "epoch": 0.12447139551583819, "grad_norm": 0.38957205414772034, "learning_rate": 4.076433121019109e-06, "loss": 0.4688, "step": 65 }, { "epoch": 0.1263863400622357, "grad_norm": 0.34749555587768555, "learning_rate": 4.140127388535032e-06, "loss": 0.4004, "step": 66 }, { "epoch": 0.12830128460863321, "grad_norm": 0.408210813999176, "learning_rate": 4.203821656050956e-06, "loss": 0.4941, "step": 67 }, { "epoch": 0.13021622915503073, "grad_norm": 0.35355207324028015, "learning_rate": 4.26751592356688e-06, "loss": 0.3594, "step": 68 }, { "epoch": 0.13213117370142824, "grad_norm": 0.48519861698150635, "learning_rate": 4.331210191082803e-06, "loss": 0.4902, "step": 69 }, { "epoch": 0.13404611824782575, "grad_norm": 0.3278568387031555, "learning_rate": 4.394904458598727e-06, "loss": 0.3945, "step": 70 }, { "epoch": 0.13596106279422326, "grad_norm": 0.4168219566345215, "learning_rate": 4.45859872611465e-06, "loss": 0.416, "step": 71 }, { "epoch": 0.13787600734062075, "grad_norm": 0.3659290373325348, "learning_rate": 4.522292993630574e-06, "loss": 0.4375, "step": 72 }, { "epoch": 0.13979095188701826, "grad_norm": 0.3873862624168396, "learning_rate": 4.585987261146497e-06, "loss": 0.3965, "step": 73 }, { "epoch": 0.14170589643341577, "grad_norm": 0.38224056363105774, "learning_rate": 4.649681528662421e-06, "loss": 0.4648, "step": 74 }, { "epoch": 0.14362084097981329, "grad_norm": 0.36365070939064026, "learning_rate": 4.713375796178344e-06, "loss": 0.3672, "step": 75 }, { "epoch": 0.1455357855262108, "grad_norm": 0.3808673918247223, "learning_rate": 4.777070063694268e-06, "loss": 0.3789, "step": 76 }, { "epoch": 0.1474507300726083, "grad_norm": 0.3691989779472351, "learning_rate": 4.840764331210192e-06, "loss": 0.4258, "step": 77 }, { "epoch": 0.14936567461900582, "grad_norm": 0.4533417820930481, "learning_rate": 4.904458598726115e-06, "loss": 0.5, "step": 78 }, { "epoch": 0.15128061916540333, "grad_norm": 0.34389302134513855, "learning_rate": 4.968152866242039e-06, "loss": 0.3828, "step": 79 }, { "epoch": 0.15319556371180085, "grad_norm": 0.35934531688690186, "learning_rate": 5.031847133757962e-06, "loss": 0.4375, "step": 80 }, { "epoch": 0.15511050825819836, "grad_norm": 0.3186410367488861, "learning_rate": 5.095541401273886e-06, "loss": 0.3848, "step": 81 }, { "epoch": 0.15702545280459587, "grad_norm": 0.3838176727294922, "learning_rate": 5.159235668789809e-06, "loss": 0.3379, "step": 82 }, { "epoch": 0.15894039735099338, "grad_norm": 0.3711152672767639, "learning_rate": 5.222929936305733e-06, "loss": 0.3906, "step": 83 }, { "epoch": 0.1608553418973909, "grad_norm": 0.33732864260673523, "learning_rate": 5.286624203821657e-06, "loss": 0.4316, "step": 84 }, { "epoch": 0.1627702864437884, "grad_norm": 0.4067969024181366, "learning_rate": 5.35031847133758e-06, "loss": 0.3418, "step": 85 }, { "epoch": 0.16468523099018592, "grad_norm": 0.3334014415740967, "learning_rate": 5.414012738853504e-06, "loss": 0.3398, "step": 86 }, { "epoch": 0.16660017553658343, "grad_norm": 0.30455130338668823, "learning_rate": 5.477707006369427e-06, "loss": 0.3594, "step": 87 }, { "epoch": 0.16851512008298092, "grad_norm": 0.445254385471344, "learning_rate": 5.541401273885351e-06, "loss": 0.457, "step": 88 }, { "epoch": 0.17043006462937843, "grad_norm": 0.3656286299228668, "learning_rate": 5.605095541401274e-06, "loss": 0.4199, "step": 89 }, { "epoch": 0.17234500917577594, "grad_norm": 0.40646082162857056, "learning_rate": 5.668789808917198e-06, "loss": 0.3984, "step": 90 }, { "epoch": 0.17425995372217346, "grad_norm": 0.3945332467556, "learning_rate": 5.732484076433121e-06, "loss": 0.3594, "step": 91 }, { "epoch": 0.17617489826857097, "grad_norm": 0.3301292359828949, "learning_rate": 5.796178343949045e-06, "loss": 0.4199, "step": 92 }, { "epoch": 0.17808984281496848, "grad_norm": 0.3358660042285919, "learning_rate": 5.859872611464969e-06, "loss": 0.3535, "step": 93 }, { "epoch": 0.180004787361366, "grad_norm": 0.35072633624076843, "learning_rate": 5.923566878980892e-06, "loss": 0.4414, "step": 94 }, { "epoch": 0.1819197319077635, "grad_norm": 0.3340066075325012, "learning_rate": 5.987261146496816e-06, "loss": 0.3945, "step": 95 }, { "epoch": 0.18383467645416102, "grad_norm": 0.31716787815093994, "learning_rate": 6.050955414012739e-06, "loss": 0.3516, "step": 96 }, { "epoch": 0.18574962100055853, "grad_norm": 0.3251139223575592, "learning_rate": 6.114649681528663e-06, "loss": 0.3848, "step": 97 }, { "epoch": 0.18766456554695604, "grad_norm": 0.30106034874916077, "learning_rate": 6.178343949044586e-06, "loss": 0.3555, "step": 98 }, { "epoch": 0.18957951009335355, "grad_norm": 0.3289436399936676, "learning_rate": 6.24203821656051e-06, "loss": 0.3203, "step": 99 }, { "epoch": 0.19149445463975107, "grad_norm": 0.4102315902709961, "learning_rate": 6.305732484076433e-06, "loss": 0.4316, "step": 100 }, { "epoch": 0.19340939918614858, "grad_norm": 0.34562960267066956, "learning_rate": 6.369426751592357e-06, "loss": 0.3496, "step": 101 }, { "epoch": 0.1953243437325461, "grad_norm": 0.35827019810676575, "learning_rate": 6.433121019108281e-06, "loss": 0.3984, "step": 102 }, { "epoch": 0.19723928827894358, "grad_norm": 0.33068418502807617, "learning_rate": 6.496815286624204e-06, "loss": 0.3105, "step": 103 }, { "epoch": 0.1991542328253411, "grad_norm": 0.34721487760543823, "learning_rate": 6.560509554140128e-06, "loss": 0.4023, "step": 104 }, { "epoch": 0.2010691773717386, "grad_norm": 0.409750759601593, "learning_rate": 6.624203821656051e-06, "loss": 0.4258, "step": 105 }, { "epoch": 0.2029841219181361, "grad_norm": 0.3398571312427521, "learning_rate": 6.687898089171975e-06, "loss": 0.373, "step": 106 }, { "epoch": 0.20489906646453362, "grad_norm": 0.34012889862060547, "learning_rate": 6.751592356687898e-06, "loss": 0.3379, "step": 107 }, { "epoch": 0.20681401101093114, "grad_norm": 0.33973219990730286, "learning_rate": 6.815286624203822e-06, "loss": 0.3672, "step": 108 }, { "epoch": 0.20872895555732865, "grad_norm": 0.3657017648220062, "learning_rate": 6.878980891719745e-06, "loss": 0.3457, "step": 109 }, { "epoch": 0.21064390010372616, "grad_norm": 0.4519689679145813, "learning_rate": 6.942675159235669e-06, "loss": 0.3828, "step": 110 }, { "epoch": 0.21255884465012367, "grad_norm": 0.3746054768562317, "learning_rate": 7.006369426751593e-06, "loss": 0.377, "step": 111 }, { "epoch": 0.2144737891965212, "grad_norm": 0.4097052812576294, "learning_rate": 7.070063694267516e-06, "loss": 0.4609, "step": 112 }, { "epoch": 0.2163887337429187, "grad_norm": 0.3271676003932953, "learning_rate": 7.13375796178344e-06, "loss": 0.3438, "step": 113 }, { "epoch": 0.2183036782893162, "grad_norm": 0.3513050973415375, "learning_rate": 7.197452229299363e-06, "loss": 0.4375, "step": 114 }, { "epoch": 0.22021862283571372, "grad_norm": 0.3622526228427887, "learning_rate": 7.261146496815287e-06, "loss": 0.4121, "step": 115 }, { "epoch": 0.22213356738211124, "grad_norm": 0.3314346671104431, "learning_rate": 7.32484076433121e-06, "loss": 0.4023, "step": 116 }, { "epoch": 0.22404851192850875, "grad_norm": 0.33853384852409363, "learning_rate": 7.388535031847134e-06, "loss": 0.3496, "step": 117 }, { "epoch": 0.22596345647490626, "grad_norm": 0.3633553683757782, "learning_rate": 7.452229299363057e-06, "loss": 0.3613, "step": 118 }, { "epoch": 0.22787840102130374, "grad_norm": 0.33034729957580566, "learning_rate": 7.515923566878982e-06, "loss": 0.3672, "step": 119 }, { "epoch": 0.22979334556770126, "grad_norm": 0.6564728617668152, "learning_rate": 7.579617834394906e-06, "loss": 0.3672, "step": 120 }, { "epoch": 0.23170829011409877, "grad_norm": 0.3335916996002197, "learning_rate": 7.64331210191083e-06, "loss": 0.3809, "step": 121 }, { "epoch": 0.23362323466049628, "grad_norm": 0.3193455934524536, "learning_rate": 7.707006369426753e-06, "loss": 0.3086, "step": 122 }, { "epoch": 0.2355381792068938, "grad_norm": 0.39426565170288086, "learning_rate": 7.770700636942676e-06, "loss": 0.3066, "step": 123 }, { "epoch": 0.2374531237532913, "grad_norm": 0.3445550799369812, "learning_rate": 7.8343949044586e-06, "loss": 0.3652, "step": 124 }, { "epoch": 0.23936806829968882, "grad_norm": 0.3657853901386261, "learning_rate": 7.898089171974524e-06, "loss": 0.3633, "step": 125 }, { "epoch": 0.24128301284608633, "grad_norm": 0.36185649037361145, "learning_rate": 7.961783439490447e-06, "loss": 0.3438, "step": 126 }, { "epoch": 0.24319795739248384, "grad_norm": 0.40025272965431213, "learning_rate": 8.02547770700637e-06, "loss": 0.3574, "step": 127 }, { "epoch": 0.24511290193888136, "grad_norm": 0.4066087007522583, "learning_rate": 8.089171974522295e-06, "loss": 0.4668, "step": 128 }, { "epoch": 0.24702784648527887, "grad_norm": 0.33924514055252075, "learning_rate": 8.152866242038218e-06, "loss": 0.3652, "step": 129 }, { "epoch": 0.24894279103167638, "grad_norm": 0.347211629152298, "learning_rate": 8.21656050955414e-06, "loss": 0.3809, "step": 130 }, { "epoch": 0.2508577355780739, "grad_norm": 0.35744917392730713, "learning_rate": 8.280254777070064e-06, "loss": 0.3828, "step": 131 }, { "epoch": 0.2527726801244714, "grad_norm": 0.3828674256801605, "learning_rate": 8.343949044585989e-06, "loss": 0.416, "step": 132 }, { "epoch": 0.2546876246708689, "grad_norm": 0.39746707677841187, "learning_rate": 8.407643312101912e-06, "loss": 0.4023, "step": 133 }, { "epoch": 0.25660256921726643, "grad_norm": 0.35075193643569946, "learning_rate": 8.471337579617835e-06, "loss": 0.3828, "step": 134 }, { "epoch": 0.25851751376366394, "grad_norm": 0.36559295654296875, "learning_rate": 8.53503184713376e-06, "loss": 0.3633, "step": 135 }, { "epoch": 0.26043245831006145, "grad_norm": 0.3440432846546173, "learning_rate": 8.598726114649683e-06, "loss": 0.3418, "step": 136 }, { "epoch": 0.26234740285645897, "grad_norm": 0.3704967796802521, "learning_rate": 8.662420382165606e-06, "loss": 0.3789, "step": 137 }, { "epoch": 0.2642623474028565, "grad_norm": 0.3533952534198761, "learning_rate": 8.726114649681529e-06, "loss": 0.375, "step": 138 }, { "epoch": 0.266177291949254, "grad_norm": 0.35461661219596863, "learning_rate": 8.789808917197454e-06, "loss": 0.3516, "step": 139 }, { "epoch": 0.2680922364956515, "grad_norm": 0.36203712224960327, "learning_rate": 8.853503184713377e-06, "loss": 0.375, "step": 140 }, { "epoch": 0.270007181042049, "grad_norm": 0.3293781876564026, "learning_rate": 8.9171974522293e-06, "loss": 0.3379, "step": 141 }, { "epoch": 0.27192212558844653, "grad_norm": 0.32773709297180176, "learning_rate": 8.980891719745225e-06, "loss": 0.3379, "step": 142 }, { "epoch": 0.273837070134844, "grad_norm": 0.4453182816505432, "learning_rate": 9.044585987261148e-06, "loss": 0.5117, "step": 143 }, { "epoch": 0.2757520146812415, "grad_norm": 0.41457095742225647, "learning_rate": 9.10828025477707e-06, "loss": 0.3574, "step": 144 }, { "epoch": 0.277666959227639, "grad_norm": 0.41834110021591187, "learning_rate": 9.171974522292994e-06, "loss": 0.3906, "step": 145 }, { "epoch": 0.2795819037740365, "grad_norm": 0.35413509607315063, "learning_rate": 9.235668789808919e-06, "loss": 0.4336, "step": 146 }, { "epoch": 0.28149684832043403, "grad_norm": 0.3722779154777527, "learning_rate": 9.299363057324842e-06, "loss": 0.3086, "step": 147 }, { "epoch": 0.28341179286683155, "grad_norm": 0.3540852665901184, "learning_rate": 9.363057324840765e-06, "loss": 0.3633, "step": 148 }, { "epoch": 0.28532673741322906, "grad_norm": 0.3304201662540436, "learning_rate": 9.426751592356688e-06, "loss": 0.3027, "step": 149 }, { "epoch": 0.28724168195962657, "grad_norm": 0.3253211975097656, "learning_rate": 9.490445859872613e-06, "loss": 0.2773, "step": 150 }, { "epoch": 0.2891566265060241, "grad_norm": 0.39088380336761475, "learning_rate": 9.554140127388536e-06, "loss": 0.3086, "step": 151 }, { "epoch": 0.2910715710524216, "grad_norm": 0.3674003779888153, "learning_rate": 9.617834394904459e-06, "loss": 0.3867, "step": 152 }, { "epoch": 0.2929865155988191, "grad_norm": 0.42949748039245605, "learning_rate": 9.681528662420384e-06, "loss": 0.3535, "step": 153 }, { "epoch": 0.2949014601452166, "grad_norm": 0.3864225447177887, "learning_rate": 9.745222929936307e-06, "loss": 0.373, "step": 154 }, { "epoch": 0.29681640469161413, "grad_norm": 0.4238653779029846, "learning_rate": 9.80891719745223e-06, "loss": 0.3789, "step": 155 }, { "epoch": 0.29873134923801165, "grad_norm": 0.38426122069358826, "learning_rate": 9.872611464968153e-06, "loss": 0.332, "step": 156 }, { "epoch": 0.30064629378440916, "grad_norm": 0.37728747725486755, "learning_rate": 9.936305732484078e-06, "loss": 0.375, "step": 157 }, { "epoch": 0.30256123833080667, "grad_norm": 0.39007872343063354, "learning_rate": 1e-05, "loss": 0.4551, "step": 158 }, { "epoch": 0.3044761828772042, "grad_norm": 0.41649481654167175, "learning_rate": 9.999987624288363e-06, "loss": 0.4238, "step": 159 }, { "epoch": 0.3063911274236017, "grad_norm": 0.37433379888534546, "learning_rate": 9.999950497214712e-06, "loss": 0.3438, "step": 160 }, { "epoch": 0.3083060719699992, "grad_norm": 0.42663007974624634, "learning_rate": 9.999888618962839e-06, "loss": 0.3105, "step": 161 }, { "epoch": 0.3102210165163967, "grad_norm": 0.3587310314178467, "learning_rate": 9.999801989839055e-06, "loss": 0.3105, "step": 162 }, { "epoch": 0.31213596106279423, "grad_norm": 0.35394129157066345, "learning_rate": 9.999690610272203e-06, "loss": 0.2832, "step": 163 }, { "epoch": 0.31405090560919174, "grad_norm": 0.3573514223098755, "learning_rate": 9.999554480813642e-06, "loss": 0.3145, "step": 164 }, { "epoch": 0.31596585015558926, "grad_norm": 0.38047319650650024, "learning_rate": 9.999393602137252e-06, "loss": 0.3008, "step": 165 }, { "epoch": 0.31788079470198677, "grad_norm": 0.41491907835006714, "learning_rate": 9.999207975039429e-06, "loss": 0.3848, "step": 166 }, { "epoch": 0.3197957392483843, "grad_norm": 0.3877142369747162, "learning_rate": 9.998997600439078e-06, "loss": 0.3594, "step": 167 }, { "epoch": 0.3217106837947818, "grad_norm": 0.3370790481567383, "learning_rate": 9.998762479377613e-06, "loss": 0.2695, "step": 168 }, { "epoch": 0.3236256283411793, "grad_norm": 0.4250260293483734, "learning_rate": 9.998502613018952e-06, "loss": 0.3008, "step": 169 }, { "epoch": 0.3255405728875768, "grad_norm": 0.3558523952960968, "learning_rate": 9.998218002649507e-06, "loss": 0.2754, "step": 170 }, { "epoch": 0.32745551743397433, "grad_norm": 0.3442358672618866, "learning_rate": 9.99790864967818e-06, "loss": 0.3008, "step": 171 }, { "epoch": 0.32937046198037184, "grad_norm": 0.4215598702430725, "learning_rate": 9.997574555636355e-06, "loss": 0.3867, "step": 172 }, { "epoch": 0.33128540652676935, "grad_norm": 0.422977089881897, "learning_rate": 9.997215722177895e-06, "loss": 0.3066, "step": 173 }, { "epoch": 0.33320035107316687, "grad_norm": 0.3850831389427185, "learning_rate": 9.996832151079127e-06, "loss": 0.332, "step": 174 }, { "epoch": 0.3351152956195643, "grad_norm": 0.46243324875831604, "learning_rate": 9.996423844238836e-06, "loss": 0.3613, "step": 175 }, { "epoch": 0.33703024016596184, "grad_norm": 0.44479626417160034, "learning_rate": 9.995990803678259e-06, "loss": 0.3574, "step": 176 }, { "epoch": 0.33894518471235935, "grad_norm": 0.4255305230617523, "learning_rate": 9.995533031541069e-06, "loss": 0.3711, "step": 177 }, { "epoch": 0.34086012925875686, "grad_norm": 0.3565676510334015, "learning_rate": 9.995050530093366e-06, "loss": 0.3652, "step": 178 }, { "epoch": 0.3427750738051544, "grad_norm": 0.5011817216873169, "learning_rate": 9.994543301723674e-06, "loss": 0.375, "step": 179 }, { "epoch": 0.3446900183515519, "grad_norm": 0.4056403934955597, "learning_rate": 9.994011348942915e-06, "loss": 0.3418, "step": 180 }, { "epoch": 0.3466049628979494, "grad_norm": 0.39417311549186707, "learning_rate": 9.993454674384408e-06, "loss": 0.3027, "step": 181 }, { "epoch": 0.3485199074443469, "grad_norm": 0.31448638439178467, "learning_rate": 9.992873280803848e-06, "loss": 0.3066, "step": 182 }, { "epoch": 0.3504348519907444, "grad_norm": 0.39453354477882385, "learning_rate": 9.992267171079302e-06, "loss": 0.3887, "step": 183 }, { "epoch": 0.35234979653714193, "grad_norm": 0.36697134375572205, "learning_rate": 9.991636348211185e-06, "loss": 0.3301, "step": 184 }, { "epoch": 0.35426474108353945, "grad_norm": 0.4324952960014343, "learning_rate": 9.990980815322246e-06, "loss": 0.375, "step": 185 }, { "epoch": 0.35617968562993696, "grad_norm": 0.39873865246772766, "learning_rate": 9.990300575657565e-06, "loss": 0.3867, "step": 186 }, { "epoch": 0.35809463017633447, "grad_norm": 0.40679872035980225, "learning_rate": 9.989595632584518e-06, "loss": 0.3711, "step": 187 }, { "epoch": 0.360009574722732, "grad_norm": 0.3605036437511444, "learning_rate": 9.988865989592778e-06, "loss": 0.3438, "step": 188 }, { "epoch": 0.3619245192691295, "grad_norm": 0.4955857992172241, "learning_rate": 9.988111650294279e-06, "loss": 0.2969, "step": 189 }, { "epoch": 0.363839463815527, "grad_norm": 0.3720203936100006, "learning_rate": 9.987332618423221e-06, "loss": 0.3633, "step": 190 }, { "epoch": 0.3657544083619245, "grad_norm": 0.4176885187625885, "learning_rate": 9.986528897836032e-06, "loss": 0.3672, "step": 191 }, { "epoch": 0.36766935290832203, "grad_norm": 0.38222536444664, "learning_rate": 9.985700492511356e-06, "loss": 0.4141, "step": 192 }, { "epoch": 0.36958429745471955, "grad_norm": 0.35066160559654236, "learning_rate": 9.984847406550036e-06, "loss": 0.3438, "step": 193 }, { "epoch": 0.37149924200111706, "grad_norm": 0.35708916187286377, "learning_rate": 9.983969644175092e-06, "loss": 0.291, "step": 194 }, { "epoch": 0.37341418654751457, "grad_norm": 0.36304759979248047, "learning_rate": 9.983067209731696e-06, "loss": 0.3672, "step": 195 }, { "epoch": 0.3753291310939121, "grad_norm": 0.3975924849510193, "learning_rate": 9.982140107687156e-06, "loss": 0.3262, "step": 196 }, { "epoch": 0.3772440756403096, "grad_norm": 0.40323784947395325, "learning_rate": 9.98118834263089e-06, "loss": 0.3633, "step": 197 }, { "epoch": 0.3791590201867071, "grad_norm": 0.4162772595882416, "learning_rate": 9.980211919274407e-06, "loss": 0.4023, "step": 198 }, { "epoch": 0.3810739647331046, "grad_norm": 0.3952171206474304, "learning_rate": 9.979210842451282e-06, "loss": 0.3438, "step": 199 }, { "epoch": 0.38298890927950213, "grad_norm": 0.3455185890197754, "learning_rate": 9.978185117117126e-06, "loss": 0.3164, "step": 200 }, { "epoch": 0.38490385382589964, "grad_norm": 0.3772539794445038, "learning_rate": 9.977134748349575e-06, "loss": 0.3516, "step": 201 }, { "epoch": 0.38681879837229716, "grad_norm": 0.284085750579834, "learning_rate": 9.976059741348252e-06, "loss": 0.21, "step": 202 }, { "epoch": 0.38873374291869467, "grad_norm": 0.4230060875415802, "learning_rate": 9.974960101434747e-06, "loss": 0.3301, "step": 203 }, { "epoch": 0.3906486874650922, "grad_norm": 0.384288489818573, "learning_rate": 9.973835834052593e-06, "loss": 0.3203, "step": 204 }, { "epoch": 0.3925636320114897, "grad_norm": 0.3772657513618469, "learning_rate": 9.972686944767231e-06, "loss": 0.3672, "step": 205 }, { "epoch": 0.39447857655788715, "grad_norm": 0.36377066373825073, "learning_rate": 9.971513439265992e-06, "loss": 0.3496, "step": 206 }, { "epoch": 0.39639352110428466, "grad_norm": 0.5106296539306641, "learning_rate": 9.970315323358061e-06, "loss": 0.3164, "step": 207 }, { "epoch": 0.3983084656506822, "grad_norm": 0.406767874956131, "learning_rate": 9.969092602974453e-06, "loss": 0.4023, "step": 208 }, { "epoch": 0.4002234101970797, "grad_norm": 0.35188838839530945, "learning_rate": 9.967845284167981e-06, "loss": 0.3477, "step": 209 }, { "epoch": 0.4021383547434772, "grad_norm": 0.37596458196640015, "learning_rate": 9.96657337311323e-06, "loss": 0.3398, "step": 210 }, { "epoch": 0.4040532992898747, "grad_norm": 0.32428470253944397, "learning_rate": 9.965276876106523e-06, "loss": 0.2715, "step": 211 }, { "epoch": 0.4059682438362722, "grad_norm": 0.4191044569015503, "learning_rate": 9.963955799565885e-06, "loss": 0.4102, "step": 212 }, { "epoch": 0.40788318838266974, "grad_norm": 0.3697478175163269, "learning_rate": 9.962610150031025e-06, "loss": 0.3984, "step": 213 }, { "epoch": 0.40979813292906725, "grad_norm": 0.4006584882736206, "learning_rate": 9.96123993416329e-06, "loss": 0.3691, "step": 214 }, { "epoch": 0.41171307747546476, "grad_norm": 0.3929983079433441, "learning_rate": 9.959845158745635e-06, "loss": 0.3262, "step": 215 }, { "epoch": 0.4136280220218623, "grad_norm": 0.4248427152633667, "learning_rate": 9.9584258306826e-06, "loss": 0.3477, "step": 216 }, { "epoch": 0.4155429665682598, "grad_norm": 0.369737833738327, "learning_rate": 9.956981957000261e-06, "loss": 0.3945, "step": 217 }, { "epoch": 0.4174579111146573, "grad_norm": 0.34655651450157166, "learning_rate": 9.955513544846205e-06, "loss": 0.2969, "step": 218 }, { "epoch": 0.4193728556610548, "grad_norm": 0.4321710467338562, "learning_rate": 9.954020601489488e-06, "loss": 0.3398, "step": 219 }, { "epoch": 0.4212878002074523, "grad_norm": 0.387251615524292, "learning_rate": 9.952503134320606e-06, "loss": 0.3223, "step": 220 }, { "epoch": 0.42320274475384984, "grad_norm": 0.4663626551628113, "learning_rate": 9.950961150851454e-06, "loss": 0.4922, "step": 221 }, { "epoch": 0.42511768930024735, "grad_norm": 0.3990941047668457, "learning_rate": 9.949394658715289e-06, "loss": 0.3965, "step": 222 }, { "epoch": 0.42703263384664486, "grad_norm": 0.3907487988471985, "learning_rate": 9.94780366566669e-06, "loss": 0.3105, "step": 223 }, { "epoch": 0.4289475783930424, "grad_norm": 0.3887116611003876, "learning_rate": 9.946188179581529e-06, "loss": 0.3008, "step": 224 }, { "epoch": 0.4308625229394399, "grad_norm": 0.44479915499687195, "learning_rate": 9.94454820845692e-06, "loss": 0.4258, "step": 225 }, { "epoch": 0.4327774674858374, "grad_norm": 0.3838413655757904, "learning_rate": 9.942883760411188e-06, "loss": 0.3496, "step": 226 }, { "epoch": 0.4346924120322349, "grad_norm": 0.4151753783226013, "learning_rate": 9.941194843683826e-06, "loss": 0.4102, "step": 227 }, { "epoch": 0.4366073565786324, "grad_norm": 0.32539334893226624, "learning_rate": 9.939481466635448e-06, "loss": 0.2852, "step": 228 }, { "epoch": 0.43852230112502993, "grad_norm": 0.39479121565818787, "learning_rate": 9.937743637747763e-06, "loss": 0.3379, "step": 229 }, { "epoch": 0.44043724567142745, "grad_norm": 0.39315953850746155, "learning_rate": 9.935981365623516e-06, "loss": 0.3477, "step": 230 }, { "epoch": 0.44235219021782496, "grad_norm": 0.3778970539569855, "learning_rate": 9.934194658986457e-06, "loss": 0.3711, "step": 231 }, { "epoch": 0.44426713476422247, "grad_norm": 0.3942454159259796, "learning_rate": 9.93238352668129e-06, "loss": 0.3789, "step": 232 }, { "epoch": 0.44618207931062, "grad_norm": 0.3276035189628601, "learning_rate": 9.930547977673638e-06, "loss": 0.3242, "step": 233 }, { "epoch": 0.4480970238570175, "grad_norm": 0.32192081212997437, "learning_rate": 9.928688021049991e-06, "loss": 0.2773, "step": 234 }, { "epoch": 0.450011968403415, "grad_norm": 0.35930487513542175, "learning_rate": 9.926803666017664e-06, "loss": 0.3438, "step": 235 }, { "epoch": 0.4519269129498125, "grad_norm": 0.3854064643383026, "learning_rate": 9.924894921904748e-06, "loss": 0.3652, "step": 236 }, { "epoch": 0.45384185749621003, "grad_norm": 0.40004488825798035, "learning_rate": 9.92296179816007e-06, "loss": 0.3711, "step": 237 }, { "epoch": 0.4557568020426075, "grad_norm": 0.37695586681365967, "learning_rate": 9.921004304353147e-06, "loss": 0.3477, "step": 238 }, { "epoch": 0.457671746589005, "grad_norm": 0.3978940546512604, "learning_rate": 9.919022450174126e-06, "loss": 0.3672, "step": 239 }, { "epoch": 0.4595866911354025, "grad_norm": 0.40706950426101685, "learning_rate": 9.917016245433755e-06, "loss": 0.2852, "step": 240 }, { "epoch": 0.4615016356818, "grad_norm": 0.370563805103302, "learning_rate": 9.914985700063312e-06, "loss": 0.3535, "step": 241 }, { "epoch": 0.46341658022819754, "grad_norm": 0.39702001214027405, "learning_rate": 9.912930824114577e-06, "loss": 0.3125, "step": 242 }, { "epoch": 0.46533152477459505, "grad_norm": 0.35320210456848145, "learning_rate": 9.910851627759773e-06, "loss": 0.3281, "step": 243 }, { "epoch": 0.46724646932099256, "grad_norm": 0.4427463710308075, "learning_rate": 9.90874812129151e-06, "loss": 0.3398, "step": 244 }, { "epoch": 0.4691614138673901, "grad_norm": 0.35988548398017883, "learning_rate": 9.906620315122745e-06, "loss": 0.293, "step": 245 }, { "epoch": 0.4710763584137876, "grad_norm": 0.39641326665878296, "learning_rate": 9.904468219786727e-06, "loss": 0.3184, "step": 246 }, { "epoch": 0.4729913029601851, "grad_norm": 0.3900603652000427, "learning_rate": 9.902291845936938e-06, "loss": 0.3574, "step": 247 }, { "epoch": 0.4749062475065826, "grad_norm": 0.45603853464126587, "learning_rate": 9.900091204347048e-06, "loss": 0.457, "step": 248 }, { "epoch": 0.4768211920529801, "grad_norm": 0.3850320279598236, "learning_rate": 9.897866305910861e-06, "loss": 0.3457, "step": 249 }, { "epoch": 0.47873613659937764, "grad_norm": 0.3480890095233917, "learning_rate": 9.895617161642257e-06, "loss": 0.2969, "step": 250 }, { "epoch": 0.48065108114577515, "grad_norm": 0.4086163341999054, "learning_rate": 9.893343782675138e-06, "loss": 0.3594, "step": 251 }, { "epoch": 0.48256602569217266, "grad_norm": 0.3559402823448181, "learning_rate": 9.891046180263382e-06, "loss": 0.2734, "step": 252 }, { "epoch": 0.4844809702385702, "grad_norm": 0.3534909784793854, "learning_rate": 9.888724365780768e-06, "loss": 0.3301, "step": 253 }, { "epoch": 0.4863959147849677, "grad_norm": 0.3884546756744385, "learning_rate": 9.886378350720945e-06, "loss": 0.3965, "step": 254 }, { "epoch": 0.4883108593313652, "grad_norm": 0.364305704832077, "learning_rate": 9.884008146697352e-06, "loss": 0.2617, "step": 255 }, { "epoch": 0.4902258038777627, "grad_norm": 0.33573174476623535, "learning_rate": 9.881613765443174e-06, "loss": 0.3184, "step": 256 }, { "epoch": 0.4921407484241602, "grad_norm": 0.3591967821121216, "learning_rate": 9.879195218811282e-06, "loss": 0.2734, "step": 257 }, { "epoch": 0.49405569297055774, "grad_norm": 0.39021801948547363, "learning_rate": 9.876752518774167e-06, "loss": 0.334, "step": 258 }, { "epoch": 0.49597063751695525, "grad_norm": 0.36973780393600464, "learning_rate": 9.87428567742389e-06, "loss": 0.2812, "step": 259 }, { "epoch": 0.49788558206335276, "grad_norm": 0.3602698743343353, "learning_rate": 9.87179470697202e-06, "loss": 0.2793, "step": 260 }, { "epoch": 0.4998005266097503, "grad_norm": 0.4194125831127167, "learning_rate": 9.86927961974957e-06, "loss": 0.3711, "step": 261 }, { "epoch": 0.5017154711561478, "grad_norm": 0.39133426547050476, "learning_rate": 9.866740428206935e-06, "loss": 0.291, "step": 262 }, { "epoch": 0.5036304157025453, "grad_norm": 0.3631567060947418, "learning_rate": 9.864177144913837e-06, "loss": 0.3145, "step": 263 }, { "epoch": 0.5055453602489428, "grad_norm": 0.3521382212638855, "learning_rate": 9.86158978255926e-06, "loss": 0.3184, "step": 264 }, { "epoch": 0.5074603047953403, "grad_norm": 0.26583707332611084, "learning_rate": 9.85897835395138e-06, "loss": 0.2197, "step": 265 }, { "epoch": 0.5093752493417378, "grad_norm": 0.39313459396362305, "learning_rate": 9.856342872017515e-06, "loss": 0.3477, "step": 266 }, { "epoch": 0.5112901938881353, "grad_norm": 0.3507327139377594, "learning_rate": 9.85368334980405e-06, "loss": 0.332, "step": 267 }, { "epoch": 0.5132051384345329, "grad_norm": 0.36972326040267944, "learning_rate": 9.85099980047638e-06, "loss": 0.3105, "step": 268 }, { "epoch": 0.5151200829809304, "grad_norm": 0.44454941153526306, "learning_rate": 9.84829223731883e-06, "loss": 0.3984, "step": 269 }, { "epoch": 0.5170350275273279, "grad_norm": 0.38550686836242676, "learning_rate": 9.845560673734617e-06, "loss": 0.3789, "step": 270 }, { "epoch": 0.5189499720737254, "grad_norm": 0.32081368565559387, "learning_rate": 9.842805123245756e-06, "loss": 0.2695, "step": 271 }, { "epoch": 0.5208649166201229, "grad_norm": 0.365355521440506, "learning_rate": 9.840025599493002e-06, "loss": 0.2871, "step": 272 }, { "epoch": 0.5227798611665204, "grad_norm": 0.39958202838897705, "learning_rate": 9.837222116235793e-06, "loss": 0.3828, "step": 273 }, { "epoch": 0.5246948057129179, "grad_norm": 0.40972256660461426, "learning_rate": 9.834394687352168e-06, "loss": 0.4688, "step": 274 }, { "epoch": 0.5266097502593154, "grad_norm": 0.3923720419406891, "learning_rate": 9.831543326838708e-06, "loss": 0.3008, "step": 275 }, { "epoch": 0.528524694805713, "grad_norm": 0.40133339166641235, "learning_rate": 9.828668048810452e-06, "loss": 0.3457, "step": 276 }, { "epoch": 0.5304396393521105, "grad_norm": 0.3792482912540436, "learning_rate": 9.82576886750085e-06, "loss": 0.3438, "step": 277 }, { "epoch": 0.532354583898508, "grad_norm": 0.37136179208755493, "learning_rate": 9.822845797261676e-06, "loss": 0.3359, "step": 278 }, { "epoch": 0.5342695284449055, "grad_norm": 0.3462713062763214, "learning_rate": 9.819898852562954e-06, "loss": 0.2578, "step": 279 }, { "epoch": 0.536184472991303, "grad_norm": 0.47859999537467957, "learning_rate": 9.816928047992904e-06, "loss": 0.3789, "step": 280 }, { "epoch": 0.5380994175377005, "grad_norm": 0.3499022126197815, "learning_rate": 9.813933398257852e-06, "loss": 0.3047, "step": 281 }, { "epoch": 0.540014362084098, "grad_norm": 0.3872099816799164, "learning_rate": 9.810914918182168e-06, "loss": 0.3203, "step": 282 }, { "epoch": 0.5419293066304955, "grad_norm": 0.3496979773044586, "learning_rate": 9.807872622708187e-06, "loss": 0.3008, "step": 283 }, { "epoch": 0.5438442511768931, "grad_norm": 0.39993637800216675, "learning_rate": 9.804806526896138e-06, "loss": 0.3086, "step": 284 }, { "epoch": 0.5457591957232905, "grad_norm": 0.3726489245891571, "learning_rate": 9.801716645924066e-06, "loss": 0.2949, "step": 285 }, { "epoch": 0.547674140269688, "grad_norm": 0.5097160935401917, "learning_rate": 9.798602995087764e-06, "loss": 0.2871, "step": 286 }, { "epoch": 0.5495890848160855, "grad_norm": 0.4128972291946411, "learning_rate": 9.795465589800688e-06, "loss": 0.3848, "step": 287 }, { "epoch": 0.551504029362483, "grad_norm": 0.35612717270851135, "learning_rate": 9.79230444559389e-06, "loss": 0.3398, "step": 288 }, { "epoch": 0.5534189739088805, "grad_norm": 0.34881824254989624, "learning_rate": 9.78911957811593e-06, "loss": 0.3281, "step": 289 }, { "epoch": 0.555333918455278, "grad_norm": 0.35632357001304626, "learning_rate": 9.785911003132811e-06, "loss": 0.2852, "step": 290 }, { "epoch": 0.5572488630016755, "grad_norm": 0.37318652868270874, "learning_rate": 9.782678736527892e-06, "loss": 0.2871, "step": 291 }, { "epoch": 0.559163807548073, "grad_norm": 0.44519978761672974, "learning_rate": 9.779422794301812e-06, "loss": 0.3242, "step": 292 }, { "epoch": 0.5610787520944706, "grad_norm": 0.35489603877067566, "learning_rate": 9.776143192572414e-06, "loss": 0.332, "step": 293 }, { "epoch": 0.5629936966408681, "grad_norm": 0.42313477396965027, "learning_rate": 9.772839947574658e-06, "loss": 0.3672, "step": 294 }, { "epoch": 0.5649086411872656, "grad_norm": 0.35833480954170227, "learning_rate": 9.769513075660548e-06, "loss": 0.3203, "step": 295 }, { "epoch": 0.5668235857336631, "grad_norm": 0.4363710880279541, "learning_rate": 9.766162593299047e-06, "loss": 0.4043, "step": 296 }, { "epoch": 0.5687385302800606, "grad_norm": 0.42050284147262573, "learning_rate": 9.762788517075994e-06, "loss": 0.459, "step": 297 }, { "epoch": 0.5706534748264581, "grad_norm": 0.4468521177768707, "learning_rate": 9.75939086369403e-06, "loss": 0.3867, "step": 298 }, { "epoch": 0.5725684193728556, "grad_norm": 0.40442460775375366, "learning_rate": 9.755969649972507e-06, "loss": 0.3301, "step": 299 }, { "epoch": 0.5744833639192531, "grad_norm": 0.39085012674331665, "learning_rate": 9.752524892847402e-06, "loss": 0.3145, "step": 300 } ], "logging_steps": 1, "max_steps": 1569, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.6708373985367163e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }