{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 25000, "global_step": 304006, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.578817523338355e-05, "grad_norm": 5.140436751351711, "learning_rate": 3.2893654813986387e-09, "loss": 0.4497, "step": 10 }, { "epoch": 0.0001315763504667671, "grad_norm": 6.960848198201245, "learning_rate": 6.578730962797277e-09, "loss": 0.445, "step": 20 }, { "epoch": 0.00019736452570015065, "grad_norm": 6.326797398058807, "learning_rate": 9.868096444195915e-09, "loss": 0.4553, "step": 30 }, { "epoch": 0.0002631527009335342, "grad_norm": 4.583389396920319, "learning_rate": 1.3157461925594555e-08, "loss": 0.4405, "step": 40 }, { "epoch": 0.0003289408761669178, "grad_norm": 6.130662113104339, "learning_rate": 1.6446827406993193e-08, "loss": 0.4788, "step": 50 }, { "epoch": 0.0003947290514003013, "grad_norm": 5.676164799760723, "learning_rate": 1.973619288839183e-08, "loss": 0.4659, "step": 60 }, { "epoch": 0.0004605172266336849, "grad_norm": 4.974585265733263, "learning_rate": 2.302555836979047e-08, "loss": 0.4328, "step": 70 }, { "epoch": 0.0005263054018670684, "grad_norm": 4.231630728934762, "learning_rate": 2.631492385118911e-08, "loss": 0.4606, "step": 80 }, { "epoch": 0.000592093577100452, "grad_norm": 6.82758800389898, "learning_rate": 2.9604289332587746e-08, "loss": 0.4619, "step": 90 }, { "epoch": 0.0006578817523338356, "grad_norm": 6.165799059464612, "learning_rate": 3.2893654813986386e-08, "loss": 0.4172, "step": 100 }, { "epoch": 0.000723669927567219, "grad_norm": 4.8616182337420915, "learning_rate": 3.6183020295385026e-08, "loss": 0.4316, "step": 110 }, { "epoch": 0.0007894581028006026, "grad_norm": 8.443847792232482, "learning_rate": 3.947238577678366e-08, "loss": 0.4503, "step": 120 }, { "epoch": 0.0008552462780339862, "grad_norm": 6.467424895331107, "learning_rate": 4.2761751258182305e-08, "loss": 0.4646, "step": 130 }, { "epoch": 0.0009210344532673698, "grad_norm": 5.138630327341659, "learning_rate": 4.605111673958094e-08, "loss": 0.4369, "step": 140 }, { "epoch": 0.0009868226285007532, "grad_norm": 5.5569396666444115, "learning_rate": 4.934048222097957e-08, "loss": 0.4468, "step": 150 }, { "epoch": 0.0010526108037341368, "grad_norm": 5.523648591646886, "learning_rate": 5.262984770237822e-08, "loss": 0.4585, "step": 160 }, { "epoch": 0.0011183989789675204, "grad_norm": 5.29673127576729, "learning_rate": 5.591921318377685e-08, "loss": 0.4041, "step": 170 }, { "epoch": 0.001184187154200904, "grad_norm": 6.046864186139962, "learning_rate": 5.920857866517549e-08, "loss": 0.4117, "step": 180 }, { "epoch": 0.0012499753294342875, "grad_norm": 9.385955544719563, "learning_rate": 6.249794414657413e-08, "loss": 0.4078, "step": 190 }, { "epoch": 0.0013157635046676711, "grad_norm": 6.455608274237018, "learning_rate": 6.578730962797277e-08, "loss": 0.4269, "step": 200 }, { "epoch": 0.0013815516799010547, "grad_norm": 3.932927747976084, "learning_rate": 6.907667510937141e-08, "loss": 0.3875, "step": 210 }, { "epoch": 0.001447339855134438, "grad_norm": 6.592047428003099, "learning_rate": 7.236604059077005e-08, "loss": 0.383, "step": 220 }, { "epoch": 0.0015131280303678216, "grad_norm": 3.7069312386207285, "learning_rate": 7.565540607216869e-08, "loss": 0.3697, "step": 230 }, { "epoch": 0.0015789162056012052, "grad_norm": 3.9874865895803735, "learning_rate": 7.894477155356732e-08, "loss": 0.3302, "step": 240 }, { "epoch": 0.0016447043808345888, "grad_norm": 4.655402392973136, "learning_rate": 8.223413703496596e-08, "loss": 0.3285, "step": 250 }, { "epoch": 0.0017104925560679724, "grad_norm": 3.9138038960664945, "learning_rate": 8.552350251636461e-08, "loss": 0.3271, "step": 260 }, { "epoch": 0.001776280731301356, "grad_norm": 3.4989125919037196, "learning_rate": 8.881286799776324e-08, "loss": 0.3014, "step": 270 }, { "epoch": 0.0018420689065347395, "grad_norm": 2.5954799561167774, "learning_rate": 9.210223347916188e-08, "loss": 0.3036, "step": 280 }, { "epoch": 0.001907857081768123, "grad_norm": 2.937031167840467, "learning_rate": 9.539159896056052e-08, "loss": 0.2747, "step": 290 }, { "epoch": 0.0019736452570015064, "grad_norm": 4.768465418544338, "learning_rate": 9.868096444195914e-08, "loss": 0.2695, "step": 300 }, { "epoch": 0.00203943343223489, "grad_norm": 2.3676048404529637, "learning_rate": 1.0197032992335778e-07, "loss": 0.2684, "step": 310 }, { "epoch": 0.0021052216074682736, "grad_norm": 2.261923765447969, "learning_rate": 1.0525969540475644e-07, "loss": 0.2319, "step": 320 }, { "epoch": 0.002171009782701657, "grad_norm": 2.3571697144315076, "learning_rate": 1.0854906088615508e-07, "loss": 0.211, "step": 330 }, { "epoch": 0.0022367979579350408, "grad_norm": 3.060102396786887, "learning_rate": 1.118384263675537e-07, "loss": 0.207, "step": 340 }, { "epoch": 0.0023025861331684243, "grad_norm": 2.3843554057947185, "learning_rate": 1.1512779184895234e-07, "loss": 0.1904, "step": 350 }, { "epoch": 0.002368374308401808, "grad_norm": 2.007147921565353, "learning_rate": 1.1841715733035098e-07, "loss": 0.1864, "step": 360 }, { "epoch": 0.0024341624836351915, "grad_norm": 2.3011592485810555, "learning_rate": 1.217065228117496e-07, "loss": 0.1934, "step": 370 }, { "epoch": 0.002499950658868575, "grad_norm": 1.6626959729321424, "learning_rate": 1.2499588829314826e-07, "loss": 0.1816, "step": 380 }, { "epoch": 0.0025657388341019586, "grad_norm": 1.7812167867010924, "learning_rate": 1.2828525377454692e-07, "loss": 0.1489, "step": 390 }, { "epoch": 0.0026315270093353422, "grad_norm": 1.5960346835825434, "learning_rate": 1.3157461925594554e-07, "loss": 0.1559, "step": 400 }, { "epoch": 0.002697315184568726, "grad_norm": 1.9064351943554632, "learning_rate": 1.3486398473734417e-07, "loss": 0.1568, "step": 410 }, { "epoch": 0.0027631033598021094, "grad_norm": 1.9784192907474563, "learning_rate": 1.3815335021874282e-07, "loss": 0.1457, "step": 420 }, { "epoch": 0.0028288915350354925, "grad_norm": 2.881722588131491, "learning_rate": 1.4144271570014145e-07, "loss": 0.1322, "step": 430 }, { "epoch": 0.002894679710268876, "grad_norm": 1.8028148136942268, "learning_rate": 1.447320811815401e-07, "loss": 0.1078, "step": 440 }, { "epoch": 0.0029604678855022597, "grad_norm": 1.8830324452719418, "learning_rate": 1.4802144666293873e-07, "loss": 0.1071, "step": 450 }, { "epoch": 0.0030262560607356432, "grad_norm": 2.0750926064122774, "learning_rate": 1.5131081214433738e-07, "loss": 0.1406, "step": 460 }, { "epoch": 0.003092044235969027, "grad_norm": 2.0428943516795104, "learning_rate": 1.54600177625736e-07, "loss": 0.1246, "step": 470 }, { "epoch": 0.0031578324112024104, "grad_norm": 1.5992576922682145, "learning_rate": 1.5788954310713464e-07, "loss": 0.1065, "step": 480 }, { "epoch": 0.003223620586435794, "grad_norm": 1.6696505042336667, "learning_rate": 1.611789085885333e-07, "loss": 0.1015, "step": 490 }, { "epoch": 0.0032894087616691776, "grad_norm": 1.773238300067517, "learning_rate": 1.6446827406993192e-07, "loss": 0.119, "step": 500 }, { "epoch": 0.003355196936902561, "grad_norm": 1.7085641191630985, "learning_rate": 1.6775763955133057e-07, "loss": 0.1086, "step": 510 }, { "epoch": 0.0034209851121359447, "grad_norm": 1.2284024310209574, "learning_rate": 1.7104700503272922e-07, "loss": 0.093, "step": 520 }, { "epoch": 0.0034867732873693283, "grad_norm": 2.21283322060087, "learning_rate": 1.7433637051412782e-07, "loss": 0.0871, "step": 530 }, { "epoch": 0.003552561462602712, "grad_norm": 2.049244398585456, "learning_rate": 1.7762573599552648e-07, "loss": 0.0905, "step": 540 }, { "epoch": 0.0036183496378360954, "grad_norm": 1.6225441210881235, "learning_rate": 1.809151014769251e-07, "loss": 0.0902, "step": 550 }, { "epoch": 0.003684137813069479, "grad_norm": 1.5375612138934278, "learning_rate": 1.8420446695832376e-07, "loss": 0.103, "step": 560 }, { "epoch": 0.0037499259883028626, "grad_norm": 1.378156268314331, "learning_rate": 1.8749383243972238e-07, "loss": 0.1028, "step": 570 }, { "epoch": 0.003815714163536246, "grad_norm": 1.009157896520854, "learning_rate": 1.9078319792112103e-07, "loss": 0.0896, "step": 580 }, { "epoch": 0.0038815023387696298, "grad_norm": 1.5467709227562763, "learning_rate": 1.940725634025197e-07, "loss": 0.0885, "step": 590 }, { "epoch": 0.003947290514003013, "grad_norm": 1.7527600376023782, "learning_rate": 1.973619288839183e-07, "loss": 0.0901, "step": 600 }, { "epoch": 0.004013078689236397, "grad_norm": 1.3503590384647888, "learning_rate": 2.0065129436531694e-07, "loss": 0.0891, "step": 610 }, { "epoch": 0.00407886686446978, "grad_norm": 1.3570720279058475, "learning_rate": 2.0394065984671557e-07, "loss": 0.0864, "step": 620 }, { "epoch": 0.004144655039703164, "grad_norm": 1.5604502795058706, "learning_rate": 2.0723002532811422e-07, "loss": 0.0834, "step": 630 }, { "epoch": 0.004210443214936547, "grad_norm": 1.5887155185130852, "learning_rate": 2.1051939080951287e-07, "loss": 0.0875, "step": 640 }, { "epoch": 0.004276231390169931, "grad_norm": 1.525618797128787, "learning_rate": 2.138087562909115e-07, "loss": 0.0943, "step": 650 }, { "epoch": 0.004342019565403314, "grad_norm": 1.8841789576628, "learning_rate": 2.1709812177231015e-07, "loss": 0.0751, "step": 660 }, { "epoch": 0.004407807740636698, "grad_norm": 1.5639039031032087, "learning_rate": 2.2038748725370875e-07, "loss": 0.0681, "step": 670 }, { "epoch": 0.0044735959158700815, "grad_norm": 1.4061349349431433, "learning_rate": 2.236768527351074e-07, "loss": 0.0846, "step": 680 }, { "epoch": 0.004539384091103465, "grad_norm": 1.3827873292582429, "learning_rate": 2.2696621821650603e-07, "loss": 0.0738, "step": 690 }, { "epoch": 0.004605172266336849, "grad_norm": 2.0560009880443357, "learning_rate": 2.302555836979047e-07, "loss": 0.0742, "step": 700 }, { "epoch": 0.004670960441570232, "grad_norm": 1.443408506693044, "learning_rate": 2.3354494917930334e-07, "loss": 0.068, "step": 710 }, { "epoch": 0.004736748616803616, "grad_norm": 1.6406350385856892, "learning_rate": 2.3683431466070197e-07, "loss": 0.0728, "step": 720 }, { "epoch": 0.004802536792036999, "grad_norm": 1.9484258724957386, "learning_rate": 2.401236801421006e-07, "loss": 0.0707, "step": 730 }, { "epoch": 0.004868324967270383, "grad_norm": 1.1530847882111859, "learning_rate": 2.434130456234992e-07, "loss": 0.0616, "step": 740 }, { "epoch": 0.004934113142503766, "grad_norm": 1.5096901058477739, "learning_rate": 2.467024111048979e-07, "loss": 0.076, "step": 750 }, { "epoch": 0.00499990131773715, "grad_norm": 1.4447196580827169, "learning_rate": 2.4999177658629653e-07, "loss": 0.0678, "step": 760 }, { "epoch": 0.005065689492970533, "grad_norm": 1.4390041714701234, "learning_rate": 2.5328114206769515e-07, "loss": 0.0654, "step": 770 }, { "epoch": 0.005131477668203917, "grad_norm": 1.3749955595045402, "learning_rate": 2.5657050754909383e-07, "loss": 0.057, "step": 780 }, { "epoch": 0.0051972658434373, "grad_norm": 1.2447330817831304, "learning_rate": 2.598598730304924e-07, "loss": 0.0622, "step": 790 }, { "epoch": 0.0052630540186706844, "grad_norm": 1.65913151421609, "learning_rate": 2.631492385118911e-07, "loss": 0.0634, "step": 800 }, { "epoch": 0.005328842193904068, "grad_norm": 1.1879622216839991, "learning_rate": 2.664386039932897e-07, "loss": 0.0571, "step": 810 }, { "epoch": 0.005394630369137452, "grad_norm": 1.1791169583041436, "learning_rate": 2.6972796947468834e-07, "loss": 0.0595, "step": 820 }, { "epoch": 0.005460418544370835, "grad_norm": 1.2166379400486969, "learning_rate": 2.73017334956087e-07, "loss": 0.0543, "step": 830 }, { "epoch": 0.005526206719604219, "grad_norm": 1.4931895987152397, "learning_rate": 2.7630670043748565e-07, "loss": 0.0603, "step": 840 }, { "epoch": 0.005591994894837602, "grad_norm": 1.9146760748051577, "learning_rate": 2.7959606591888427e-07, "loss": 0.058, "step": 850 }, { "epoch": 0.005657783070070985, "grad_norm": 1.4263549993379119, "learning_rate": 2.828854314002829e-07, "loss": 0.0589, "step": 860 }, { "epoch": 0.005723571245304369, "grad_norm": 1.3760870673312209, "learning_rate": 2.861747968816815e-07, "loss": 0.0602, "step": 870 }, { "epoch": 0.005789359420537752, "grad_norm": 1.7962260139826807, "learning_rate": 2.894641623630802e-07, "loss": 0.0652, "step": 880 }, { "epoch": 0.005855147595771136, "grad_norm": 1.3446360370735333, "learning_rate": 2.9275352784447883e-07, "loss": 0.0625, "step": 890 }, { "epoch": 0.005920935771004519, "grad_norm": 1.6848654855778726, "learning_rate": 2.9604289332587746e-07, "loss": 0.06, "step": 900 }, { "epoch": 0.005986723946237903, "grad_norm": 1.228212621843868, "learning_rate": 2.993322588072761e-07, "loss": 0.0491, "step": 910 }, { "epoch": 0.0060525121214712865, "grad_norm": 1.2456872926747429, "learning_rate": 3.0262162428867477e-07, "loss": 0.0553, "step": 920 }, { "epoch": 0.0061183002967046705, "grad_norm": 1.378447118968889, "learning_rate": 3.059109897700734e-07, "loss": 0.0575, "step": 930 }, { "epoch": 0.006184088471938054, "grad_norm": 1.2310456031677794, "learning_rate": 3.09200355251472e-07, "loss": 0.0438, "step": 940 }, { "epoch": 0.006249876647171438, "grad_norm": 1.3472985578450032, "learning_rate": 3.1248972073287065e-07, "loss": 0.0565, "step": 950 }, { "epoch": 0.006315664822404821, "grad_norm": 1.282870378719704, "learning_rate": 3.1577908621426927e-07, "loss": 0.0448, "step": 960 }, { "epoch": 0.006381452997638205, "grad_norm": 1.3145722900165782, "learning_rate": 3.190684516956679e-07, "loss": 0.0446, "step": 970 }, { "epoch": 0.006447241172871588, "grad_norm": 1.6988338639064389, "learning_rate": 3.223578171770666e-07, "loss": 0.0483, "step": 980 }, { "epoch": 0.006513029348104972, "grad_norm": 1.117727721129522, "learning_rate": 3.256471826584652e-07, "loss": 0.0496, "step": 990 }, { "epoch": 0.006578817523338355, "grad_norm": 1.3495894572903995, "learning_rate": 3.2893654813986383e-07, "loss": 0.0499, "step": 1000 }, { "epoch": 0.006644605698571739, "grad_norm": 1.0338333956792374, "learning_rate": 3.322259136212625e-07, "loss": 0.0434, "step": 1010 }, { "epoch": 0.006710393873805122, "grad_norm": 1.1430635242079215, "learning_rate": 3.3551527910266114e-07, "loss": 0.0517, "step": 1020 }, { "epoch": 0.006776182049038505, "grad_norm": 1.163821681648826, "learning_rate": 3.3880464458405976e-07, "loss": 0.0459, "step": 1030 }, { "epoch": 0.006841970224271889, "grad_norm": 1.0664936393635867, "learning_rate": 3.4209401006545844e-07, "loss": 0.0467, "step": 1040 }, { "epoch": 0.006907758399505273, "grad_norm": 1.3888797445040726, "learning_rate": 3.4538337554685707e-07, "loss": 0.0474, "step": 1050 }, { "epoch": 0.006973546574738657, "grad_norm": 1.318865418762962, "learning_rate": 3.4867274102825564e-07, "loss": 0.0378, "step": 1060 }, { "epoch": 0.00703933474997204, "grad_norm": 1.5696355104718978, "learning_rate": 3.5196210650965427e-07, "loss": 0.0401, "step": 1070 }, { "epoch": 0.007105122925205424, "grad_norm": 1.2014114968146905, "learning_rate": 3.5525147199105295e-07, "loss": 0.0511, "step": 1080 }, { "epoch": 0.007170911100438807, "grad_norm": 1.2285634216284487, "learning_rate": 3.585408374724516e-07, "loss": 0.0501, "step": 1090 }, { "epoch": 0.007236699275672191, "grad_norm": 1.853446972806856, "learning_rate": 3.618302029538502e-07, "loss": 0.0406, "step": 1100 }, { "epoch": 0.007302487450905574, "grad_norm": 1.4972900968149492, "learning_rate": 3.651195684352489e-07, "loss": 0.0358, "step": 1110 }, { "epoch": 0.007368275626138958, "grad_norm": 1.5742253254479626, "learning_rate": 3.684089339166475e-07, "loss": 0.0521, "step": 1120 }, { "epoch": 0.007434063801372341, "grad_norm": 1.2825633198803554, "learning_rate": 3.7169829939804614e-07, "loss": 0.0411, "step": 1130 }, { "epoch": 0.007499851976605725, "grad_norm": 1.3445242510332422, "learning_rate": 3.7498766487944476e-07, "loss": 0.0335, "step": 1140 }, { "epoch": 0.007565640151839108, "grad_norm": 1.124610534574388, "learning_rate": 3.7827703036084344e-07, "loss": 0.047, "step": 1150 }, { "epoch": 0.007631428327072492, "grad_norm": 1.2034428117223053, "learning_rate": 3.8156639584224207e-07, "loss": 0.0413, "step": 1160 }, { "epoch": 0.0076972165023058755, "grad_norm": 0.9445823562929663, "learning_rate": 3.848557613236407e-07, "loss": 0.0409, "step": 1170 }, { "epoch": 0.0077630046775392595, "grad_norm": 1.0395737847495181, "learning_rate": 3.881451268050394e-07, "loss": 0.0403, "step": 1180 }, { "epoch": 0.007828792852772642, "grad_norm": 1.6103056428180047, "learning_rate": 3.91434492286438e-07, "loss": 0.0424, "step": 1190 }, { "epoch": 0.007894581028006026, "grad_norm": 1.0119211951987752, "learning_rate": 3.947238577678366e-07, "loss": 0.0488, "step": 1200 }, { "epoch": 0.00796036920323941, "grad_norm": 1.0384949825700627, "learning_rate": 3.980132232492353e-07, "loss": 0.0428, "step": 1210 }, { "epoch": 0.008026157378472794, "grad_norm": 1.2431405772485062, "learning_rate": 4.013025887306339e-07, "loss": 0.04, "step": 1220 }, { "epoch": 0.008091945553706176, "grad_norm": 0.796019143876453, "learning_rate": 4.045919542120325e-07, "loss": 0.0361, "step": 1230 }, { "epoch": 0.00815773372893956, "grad_norm": 1.203477809674857, "learning_rate": 4.0788131969343114e-07, "loss": 0.0411, "step": 1240 }, { "epoch": 0.008223521904172944, "grad_norm": 1.1588443150952243, "learning_rate": 4.111706851748298e-07, "loss": 0.027, "step": 1250 }, { "epoch": 0.008289310079406328, "grad_norm": 1.0098894156337885, "learning_rate": 4.1446005065622844e-07, "loss": 0.0414, "step": 1260 }, { "epoch": 0.00835509825463971, "grad_norm": 1.08329173106493, "learning_rate": 4.1774941613762707e-07, "loss": 0.0407, "step": 1270 }, { "epoch": 0.008420886429873094, "grad_norm": 1.33935533737454, "learning_rate": 4.2103878161902575e-07, "loss": 0.0353, "step": 1280 }, { "epoch": 0.008486674605106478, "grad_norm": 0.9407224042350913, "learning_rate": 4.243281471004244e-07, "loss": 0.0479, "step": 1290 }, { "epoch": 0.008552462780339862, "grad_norm": 1.2903238882171486, "learning_rate": 4.27617512581823e-07, "loss": 0.0402, "step": 1300 }, { "epoch": 0.008618250955573245, "grad_norm": 1.435041409743645, "learning_rate": 4.3090687806322163e-07, "loss": 0.0359, "step": 1310 }, { "epoch": 0.008684039130806629, "grad_norm": 1.2691945555659012, "learning_rate": 4.341962435446203e-07, "loss": 0.0354, "step": 1320 }, { "epoch": 0.008749827306040013, "grad_norm": 1.1523623668076972, "learning_rate": 4.3748560902601893e-07, "loss": 0.0359, "step": 1330 }, { "epoch": 0.008815615481273397, "grad_norm": 1.3250293809108329, "learning_rate": 4.407749745074175e-07, "loss": 0.0291, "step": 1340 }, { "epoch": 0.008881403656506779, "grad_norm": 1.6174320231775268, "learning_rate": 4.4406433998881624e-07, "loss": 0.0387, "step": 1350 }, { "epoch": 0.008947191831740163, "grad_norm": 1.4859429136268998, "learning_rate": 4.473537054702148e-07, "loss": 0.0328, "step": 1360 }, { "epoch": 0.009012980006973547, "grad_norm": 1.0031492284692978, "learning_rate": 4.5064307095161344e-07, "loss": 0.0448, "step": 1370 }, { "epoch": 0.00907876818220693, "grad_norm": 1.2033080423682312, "learning_rate": 4.5393243643301207e-07, "loss": 0.0393, "step": 1380 }, { "epoch": 0.009144556357440313, "grad_norm": 1.3487777423322331, "learning_rate": 4.5722180191441075e-07, "loss": 0.0371, "step": 1390 }, { "epoch": 0.009210344532673697, "grad_norm": 0.9865208241174178, "learning_rate": 4.605111673958094e-07, "loss": 0.0342, "step": 1400 }, { "epoch": 0.009276132707907081, "grad_norm": 1.0433836959429075, "learning_rate": 4.63800532877208e-07, "loss": 0.0339, "step": 1410 }, { "epoch": 0.009341920883140464, "grad_norm": 1.0560513726543517, "learning_rate": 4.670898983586067e-07, "loss": 0.0343, "step": 1420 }, { "epoch": 0.009407709058373848, "grad_norm": 1.1904769248567244, "learning_rate": 4.703792638400053e-07, "loss": 0.0437, "step": 1430 }, { "epoch": 0.009473497233607232, "grad_norm": 1.0177580515947824, "learning_rate": 4.7366862932140393e-07, "loss": 0.0287, "step": 1440 }, { "epoch": 0.009539285408840616, "grad_norm": 1.261709620102382, "learning_rate": 4.769579948028026e-07, "loss": 0.0287, "step": 1450 }, { "epoch": 0.009605073584073998, "grad_norm": 1.0803826064136302, "learning_rate": 4.802473602842012e-07, "loss": 0.0326, "step": 1460 }, { "epoch": 0.009670861759307382, "grad_norm": 1.0444603453895325, "learning_rate": 4.835367257655999e-07, "loss": 0.0262, "step": 1470 }, { "epoch": 0.009736649934540766, "grad_norm": 0.9596670077672101, "learning_rate": 4.868260912469984e-07, "loss": 0.0296, "step": 1480 }, { "epoch": 0.00980243810977415, "grad_norm": 1.0525178264040818, "learning_rate": 4.901154567283971e-07, "loss": 0.0329, "step": 1490 }, { "epoch": 0.009868226285007532, "grad_norm": 1.0774107377359559, "learning_rate": 4.934048222097958e-07, "loss": 0.0328, "step": 1500 }, { "epoch": 0.009934014460240916, "grad_norm": 1.3244848168719379, "learning_rate": 4.966941876911944e-07, "loss": 0.029, "step": 1510 }, { "epoch": 0.0099998026354743, "grad_norm": 1.334044083212868, "learning_rate": 4.999835531725931e-07, "loss": 0.0287, "step": 1520 }, { "epoch": 0.010065590810707683, "grad_norm": 0.765313602184042, "learning_rate": 5.032729186539917e-07, "loss": 0.0269, "step": 1530 }, { "epoch": 0.010131378985941067, "grad_norm": 0.8281323307127697, "learning_rate": 5.065622841353903e-07, "loss": 0.0277, "step": 1540 }, { "epoch": 0.01019716716117445, "grad_norm": 1.2559620688206763, "learning_rate": 5.098516496167889e-07, "loss": 0.0311, "step": 1550 }, { "epoch": 0.010262955336407835, "grad_norm": 1.1598064363096006, "learning_rate": 5.131410150981877e-07, "loss": 0.0403, "step": 1560 }, { "epoch": 0.010328743511641217, "grad_norm": 1.0477428937345807, "learning_rate": 5.164303805795862e-07, "loss": 0.0316, "step": 1570 }, { "epoch": 0.0103945316868746, "grad_norm": 0.8801558461101728, "learning_rate": 5.197197460609848e-07, "loss": 0.0311, "step": 1580 }, { "epoch": 0.010460319862107985, "grad_norm": 0.8609531603340911, "learning_rate": 5.230091115423835e-07, "loss": 0.0325, "step": 1590 }, { "epoch": 0.010526108037341369, "grad_norm": 0.8403890038183804, "learning_rate": 5.262984770237822e-07, "loss": 0.0236, "step": 1600 }, { "epoch": 0.010591896212574751, "grad_norm": 0.9339400170478332, "learning_rate": 5.295878425051807e-07, "loss": 0.0304, "step": 1610 }, { "epoch": 0.010657684387808135, "grad_norm": 1.0960893499730546, "learning_rate": 5.328772079865794e-07, "loss": 0.0325, "step": 1620 }, { "epoch": 0.01072347256304152, "grad_norm": 0.9308428900542113, "learning_rate": 5.361665734679781e-07, "loss": 0.0329, "step": 1630 }, { "epoch": 0.010789260738274903, "grad_norm": 1.2112125978662729, "learning_rate": 5.394559389493767e-07, "loss": 0.0334, "step": 1640 }, { "epoch": 0.010855048913508285, "grad_norm": 1.0302781958139824, "learning_rate": 5.427453044307754e-07, "loss": 0.0293, "step": 1650 }, { "epoch": 0.01092083708874167, "grad_norm": 0.9974291038770762, "learning_rate": 5.46034669912174e-07, "loss": 0.0327, "step": 1660 }, { "epoch": 0.010986625263975053, "grad_norm": 0.7670315781342885, "learning_rate": 5.493240353935726e-07, "loss": 0.0249, "step": 1670 }, { "epoch": 0.011052413439208437, "grad_norm": 0.7478647021049816, "learning_rate": 5.526134008749713e-07, "loss": 0.0217, "step": 1680 }, { "epoch": 0.01111820161444182, "grad_norm": 1.1130324674378704, "learning_rate": 5.5590276635637e-07, "loss": 0.0298, "step": 1690 }, { "epoch": 0.011183989789675204, "grad_norm": 1.1529051092003717, "learning_rate": 5.591921318377685e-07, "loss": 0.0226, "step": 1700 }, { "epoch": 0.011249777964908588, "grad_norm": 1.3715035522173997, "learning_rate": 5.624814973191671e-07, "loss": 0.0231, "step": 1710 }, { "epoch": 0.01131556614014197, "grad_norm": 1.5639227616623879, "learning_rate": 5.657708628005658e-07, "loss": 0.0295, "step": 1720 }, { "epoch": 0.011381354315375354, "grad_norm": 0.8848995323725682, "learning_rate": 5.690602282819645e-07, "loss": 0.0266, "step": 1730 }, { "epoch": 0.011447142490608738, "grad_norm": 1.0337987103996913, "learning_rate": 5.72349593763363e-07, "loss": 0.0221, "step": 1740 }, { "epoch": 0.011512930665842122, "grad_norm": 1.1064072969786722, "learning_rate": 5.756389592447617e-07, "loss": 0.0284, "step": 1750 }, { "epoch": 0.011578718841075504, "grad_norm": 0.8166529224920791, "learning_rate": 5.789283247261604e-07, "loss": 0.029, "step": 1760 }, { "epoch": 0.011644507016308888, "grad_norm": 1.3474738283344454, "learning_rate": 5.82217690207559e-07, "loss": 0.0375, "step": 1770 }, { "epoch": 0.011710295191542272, "grad_norm": 1.0489653908620278, "learning_rate": 5.855070556889577e-07, "loss": 0.0223, "step": 1780 }, { "epoch": 0.011776083366775656, "grad_norm": 1.4430221020773808, "learning_rate": 5.887964211703563e-07, "loss": 0.0338, "step": 1790 }, { "epoch": 0.011841871542009039, "grad_norm": 0.9733434456570851, "learning_rate": 5.920857866517549e-07, "loss": 0.0252, "step": 1800 }, { "epoch": 0.011907659717242423, "grad_norm": 0.3359877465993715, "learning_rate": 5.953751521331536e-07, "loss": 0.0267, "step": 1810 }, { "epoch": 0.011973447892475807, "grad_norm": 0.6973418291181533, "learning_rate": 5.986645176145522e-07, "loss": 0.0227, "step": 1820 }, { "epoch": 0.01203923606770919, "grad_norm": 1.1478180195368772, "learning_rate": 6.019538830959509e-07, "loss": 0.0297, "step": 1830 }, { "epoch": 0.012105024242942573, "grad_norm": 0.8976730206856561, "learning_rate": 6.052432485773495e-07, "loss": 0.0272, "step": 1840 }, { "epoch": 0.012170812418175957, "grad_norm": 1.0008151796028257, "learning_rate": 6.085326140587481e-07, "loss": 0.0298, "step": 1850 }, { "epoch": 0.012236600593409341, "grad_norm": 0.9507496777846851, "learning_rate": 6.118219795401468e-07, "loss": 0.0276, "step": 1860 }, { "epoch": 0.012302388768642723, "grad_norm": 0.8692876231363963, "learning_rate": 6.151113450215454e-07, "loss": 0.0225, "step": 1870 }, { "epoch": 0.012368176943876107, "grad_norm": 0.8525114629645008, "learning_rate": 6.18400710502944e-07, "loss": 0.0244, "step": 1880 }, { "epoch": 0.012433965119109491, "grad_norm": 1.3703535475417927, "learning_rate": 6.216900759843426e-07, "loss": 0.0246, "step": 1890 }, { "epoch": 0.012499753294342875, "grad_norm": 0.8961270496061547, "learning_rate": 6.249794414657413e-07, "loss": 0.022, "step": 1900 }, { "epoch": 0.012565541469576258, "grad_norm": 0.9645241474528774, "learning_rate": 6.2826880694714e-07, "loss": 0.0223, "step": 1910 }, { "epoch": 0.012631329644809642, "grad_norm": 0.7816993423801425, "learning_rate": 6.315581724285385e-07, "loss": 0.0203, "step": 1920 }, { "epoch": 0.012697117820043026, "grad_norm": 0.9053071658042197, "learning_rate": 6.348475379099372e-07, "loss": 0.0214, "step": 1930 }, { "epoch": 0.01276290599527641, "grad_norm": 0.7495807306705123, "learning_rate": 6.381369033913358e-07, "loss": 0.0321, "step": 1940 }, { "epoch": 0.012828694170509792, "grad_norm": 0.7818474339746628, "learning_rate": 6.414262688727345e-07, "loss": 0.0265, "step": 1950 }, { "epoch": 0.012894482345743176, "grad_norm": 1.0966907767152727, "learning_rate": 6.447156343541332e-07, "loss": 0.0173, "step": 1960 }, { "epoch": 0.01296027052097656, "grad_norm": 0.7693886669878787, "learning_rate": 6.480049998355317e-07, "loss": 0.0231, "step": 1970 }, { "epoch": 0.013026058696209944, "grad_norm": 0.6237145809645335, "learning_rate": 6.512943653169304e-07, "loss": 0.0223, "step": 1980 }, { "epoch": 0.013091846871443326, "grad_norm": 0.7054653036828666, "learning_rate": 6.545837307983291e-07, "loss": 0.023, "step": 1990 }, { "epoch": 0.01315763504667671, "grad_norm": 1.1178929849816706, "learning_rate": 6.578730962797277e-07, "loss": 0.0212, "step": 2000 }, { "epoch": 0.013223423221910094, "grad_norm": 1.1163913458539922, "learning_rate": 6.611624617611263e-07, "loss": 0.0227, "step": 2010 }, { "epoch": 0.013289211397143478, "grad_norm": 1.7359144152031947, "learning_rate": 6.64451827242525e-07, "loss": 0.0252, "step": 2020 }, { "epoch": 0.01335499957237686, "grad_norm": 0.7373966716638206, "learning_rate": 6.677411927239236e-07, "loss": 0.0329, "step": 2030 }, { "epoch": 0.013420787747610245, "grad_norm": 0.720670446091729, "learning_rate": 6.710305582053223e-07, "loss": 0.0194, "step": 2040 }, { "epoch": 0.013486575922843629, "grad_norm": 0.7173425337087885, "learning_rate": 6.74319923686721e-07, "loss": 0.0226, "step": 2050 }, { "epoch": 0.01355236409807701, "grad_norm": 1.1840341572691404, "learning_rate": 6.776092891681195e-07, "loss": 0.0248, "step": 2060 }, { "epoch": 0.013618152273310395, "grad_norm": 0.7100379667987435, "learning_rate": 6.808986546495182e-07, "loss": 0.0175, "step": 2070 }, { "epoch": 0.013683940448543779, "grad_norm": 0.9648864164940105, "learning_rate": 6.841880201309169e-07, "loss": 0.0246, "step": 2080 }, { "epoch": 0.013749728623777163, "grad_norm": 1.133338849025845, "learning_rate": 6.874773856123155e-07, "loss": 0.0188, "step": 2090 }, { "epoch": 0.013815516799010545, "grad_norm": 1.1199248865478284, "learning_rate": 6.907667510937141e-07, "loss": 0.0277, "step": 2100 }, { "epoch": 0.01388130497424393, "grad_norm": 0.9301624395020132, "learning_rate": 6.940561165751126e-07, "loss": 0.0223, "step": 2110 }, { "epoch": 0.013947093149477313, "grad_norm": 0.6920770351295712, "learning_rate": 6.973454820565113e-07, "loss": 0.0166, "step": 2120 }, { "epoch": 0.014012881324710697, "grad_norm": 0.9920554848229586, "learning_rate": 7.006348475379101e-07, "loss": 0.0234, "step": 2130 }, { "epoch": 0.01407866949994408, "grad_norm": 0.6405963593729592, "learning_rate": 7.039242130193085e-07, "loss": 0.0213, "step": 2140 }, { "epoch": 0.014144457675177463, "grad_norm": 0.7345498782353609, "learning_rate": 7.072135785007072e-07, "loss": 0.0208, "step": 2150 }, { "epoch": 0.014210245850410847, "grad_norm": 0.657338774431106, "learning_rate": 7.105029439821059e-07, "loss": 0.0211, "step": 2160 }, { "epoch": 0.014276034025644231, "grad_norm": 0.8109512697411366, "learning_rate": 7.137923094635045e-07, "loss": 0.0153, "step": 2170 }, { "epoch": 0.014341822200877614, "grad_norm": 1.236812143719435, "learning_rate": 7.170816749449032e-07, "loss": 0.018, "step": 2180 }, { "epoch": 0.014407610376110998, "grad_norm": 1.3089229277326815, "learning_rate": 7.203710404263018e-07, "loss": 0.024, "step": 2190 }, { "epoch": 0.014473398551344382, "grad_norm": 1.1946196812701195, "learning_rate": 7.236604059077004e-07, "loss": 0.0236, "step": 2200 }, { "epoch": 0.014539186726577764, "grad_norm": 0.900476342327903, "learning_rate": 7.269497713890991e-07, "loss": 0.0222, "step": 2210 }, { "epoch": 0.014604974901811148, "grad_norm": 1.0951492754035204, "learning_rate": 7.302391368704978e-07, "loss": 0.0193, "step": 2220 }, { "epoch": 0.014670763077044532, "grad_norm": 0.9526399005107948, "learning_rate": 7.335285023518963e-07, "loss": 0.0219, "step": 2230 }, { "epoch": 0.014736551252277916, "grad_norm": 0.7515278192322298, "learning_rate": 7.36817867833295e-07, "loss": 0.0156, "step": 2240 }, { "epoch": 0.014802339427511298, "grad_norm": 0.9051285291285653, "learning_rate": 7.401072333146937e-07, "loss": 0.0193, "step": 2250 }, { "epoch": 0.014868127602744682, "grad_norm": 1.4142533753319715, "learning_rate": 7.433965987960923e-07, "loss": 0.0221, "step": 2260 }, { "epoch": 0.014933915777978066, "grad_norm": 0.7523437445559888, "learning_rate": 7.46685964277491e-07, "loss": 0.023, "step": 2270 }, { "epoch": 0.01499970395321145, "grad_norm": 0.4815237553117995, "learning_rate": 7.499753297588895e-07, "loss": 0.0203, "step": 2280 }, { "epoch": 0.015065492128444833, "grad_norm": 0.6652601026896611, "learning_rate": 7.532646952402882e-07, "loss": 0.0168, "step": 2290 }, { "epoch": 0.015131280303678217, "grad_norm": 1.0894552770567736, "learning_rate": 7.565540607216869e-07, "loss": 0.0211, "step": 2300 }, { "epoch": 0.0151970684789116, "grad_norm": 0.6985470502654849, "learning_rate": 7.598434262030855e-07, "loss": 0.0212, "step": 2310 }, { "epoch": 0.015262856654144985, "grad_norm": 0.6107885384407372, "learning_rate": 7.631327916844841e-07, "loss": 0.0206, "step": 2320 }, { "epoch": 0.015328644829378367, "grad_norm": 1.139853870637523, "learning_rate": 7.664221571658828e-07, "loss": 0.0268, "step": 2330 }, { "epoch": 0.015394433004611751, "grad_norm": 0.8659369554262055, "learning_rate": 7.697115226472814e-07, "loss": 0.0216, "step": 2340 }, { "epoch": 0.015460221179845135, "grad_norm": 1.1454866190723005, "learning_rate": 7.730008881286801e-07, "loss": 0.0219, "step": 2350 }, { "epoch": 0.015526009355078519, "grad_norm": 0.7901205491706788, "learning_rate": 7.762902536100788e-07, "loss": 0.0224, "step": 2360 }, { "epoch": 0.015591797530311901, "grad_norm": 0.7345169661328602, "learning_rate": 7.795796190914773e-07, "loss": 0.0221, "step": 2370 }, { "epoch": 0.015657585705545284, "grad_norm": 0.9186065369695384, "learning_rate": 7.82868984572876e-07, "loss": 0.0226, "step": 2380 }, { "epoch": 0.015723373880778668, "grad_norm": 0.9557391308591117, "learning_rate": 7.861583500542747e-07, "loss": 0.0169, "step": 2390 }, { "epoch": 0.01578916205601205, "grad_norm": 1.2438932955517876, "learning_rate": 7.894477155356732e-07, "loss": 0.0186, "step": 2400 }, { "epoch": 0.015854950231245436, "grad_norm": 0.9125503237577827, "learning_rate": 7.927370810170719e-07, "loss": 0.0214, "step": 2410 }, { "epoch": 0.01592073840647882, "grad_norm": 0.8599551636694743, "learning_rate": 7.960264464984706e-07, "loss": 0.0178, "step": 2420 }, { "epoch": 0.015986526581712204, "grad_norm": 0.9605024575849123, "learning_rate": 7.993158119798691e-07, "loss": 0.0155, "step": 2430 }, { "epoch": 0.016052314756945588, "grad_norm": 1.040980524076166, "learning_rate": 8.026051774612678e-07, "loss": 0.0201, "step": 2440 }, { "epoch": 0.01611810293217897, "grad_norm": 0.7826429878602741, "learning_rate": 8.058945429426663e-07, "loss": 0.0184, "step": 2450 }, { "epoch": 0.016183891107412352, "grad_norm": 1.2291596142535601, "learning_rate": 8.09183908424065e-07, "loss": 0.0154, "step": 2460 }, { "epoch": 0.016249679282645736, "grad_norm": 0.9307258577920837, "learning_rate": 8.124732739054637e-07, "loss": 0.0164, "step": 2470 }, { "epoch": 0.01631546745787912, "grad_norm": 0.6776557683707822, "learning_rate": 8.157626393868623e-07, "loss": 0.0233, "step": 2480 }, { "epoch": 0.016381255633112504, "grad_norm": 0.6080674157293119, "learning_rate": 8.19052004868261e-07, "loss": 0.0188, "step": 2490 }, { "epoch": 0.016447043808345888, "grad_norm": 1.214583712551222, "learning_rate": 8.223413703496596e-07, "loss": 0.0198, "step": 2500 }, { "epoch": 0.016512831983579272, "grad_norm": 0.8506248533551644, "learning_rate": 8.256307358310582e-07, "loss": 0.0164, "step": 2510 }, { "epoch": 0.016578620158812656, "grad_norm": 0.9297501753292734, "learning_rate": 8.289201013124569e-07, "loss": 0.0221, "step": 2520 }, { "epoch": 0.01664440833404604, "grad_norm": 1.1740231043509077, "learning_rate": 8.322094667938556e-07, "loss": 0.02, "step": 2530 }, { "epoch": 0.01671019650927942, "grad_norm": 1.2266051598291825, "learning_rate": 8.354988322752541e-07, "loss": 0.021, "step": 2540 }, { "epoch": 0.016775984684512805, "grad_norm": 0.38485999236183027, "learning_rate": 8.387881977566528e-07, "loss": 0.0114, "step": 2550 }, { "epoch": 0.01684177285974619, "grad_norm": 1.1526056029106386, "learning_rate": 8.420775632380515e-07, "loss": 0.0164, "step": 2560 }, { "epoch": 0.016907561034979573, "grad_norm": 0.5647914868324955, "learning_rate": 8.453669287194501e-07, "loss": 0.0147, "step": 2570 }, { "epoch": 0.016973349210212957, "grad_norm": 0.9885811474387768, "learning_rate": 8.486562942008488e-07, "loss": 0.0206, "step": 2580 }, { "epoch": 0.01703913738544634, "grad_norm": 0.7263448398171481, "learning_rate": 8.519456596822474e-07, "loss": 0.0165, "step": 2590 }, { "epoch": 0.017104925560679725, "grad_norm": 1.2721022515093101, "learning_rate": 8.55235025163646e-07, "loss": 0.0179, "step": 2600 }, { "epoch": 0.017170713735913105, "grad_norm": 1.0258307120835735, "learning_rate": 8.585243906450447e-07, "loss": 0.0218, "step": 2610 }, { "epoch": 0.01723650191114649, "grad_norm": 1.2065749811363722, "learning_rate": 8.618137561264433e-07, "loss": 0.0181, "step": 2620 }, { "epoch": 0.017302290086379873, "grad_norm": 0.7983707046238495, "learning_rate": 8.651031216078419e-07, "loss": 0.0219, "step": 2630 }, { "epoch": 0.017368078261613257, "grad_norm": 0.7931891008129128, "learning_rate": 8.683924870892406e-07, "loss": 0.0195, "step": 2640 }, { "epoch": 0.01743386643684664, "grad_norm": 0.6524139427859033, "learning_rate": 8.716818525706392e-07, "loss": 0.0152, "step": 2650 }, { "epoch": 0.017499654612080025, "grad_norm": 0.8444363239419757, "learning_rate": 8.749712180520379e-07, "loss": 0.0135, "step": 2660 }, { "epoch": 0.01756544278731341, "grad_norm": 0.9619828423392692, "learning_rate": 8.782605835334365e-07, "loss": 0.0205, "step": 2670 }, { "epoch": 0.017631230962546793, "grad_norm": 0.9626874998702528, "learning_rate": 8.81549949014835e-07, "loss": 0.0183, "step": 2680 }, { "epoch": 0.017697019137780174, "grad_norm": 0.6918890347211126, "learning_rate": 8.848393144962338e-07, "loss": 0.0225, "step": 2690 }, { "epoch": 0.017762807313013558, "grad_norm": 0.5757918681461608, "learning_rate": 8.881286799776325e-07, "loss": 0.0132, "step": 2700 }, { "epoch": 0.017828595488246942, "grad_norm": 1.2007164282931668, "learning_rate": 8.91418045459031e-07, "loss": 0.0183, "step": 2710 }, { "epoch": 0.017894383663480326, "grad_norm": 0.8412261990460381, "learning_rate": 8.947074109404296e-07, "loss": 0.0161, "step": 2720 }, { "epoch": 0.01796017183871371, "grad_norm": 0.6452192566186187, "learning_rate": 8.979967764218284e-07, "loss": 0.0178, "step": 2730 }, { "epoch": 0.018025960013947094, "grad_norm": 0.628400894587616, "learning_rate": 9.012861419032269e-07, "loss": 0.016, "step": 2740 }, { "epoch": 0.018091748189180478, "grad_norm": 0.8466392138358041, "learning_rate": 9.045755073846256e-07, "loss": 0.0152, "step": 2750 }, { "epoch": 0.01815753636441386, "grad_norm": 0.6152043178508324, "learning_rate": 9.078648728660241e-07, "loss": 0.0216, "step": 2760 }, { "epoch": 0.018223324539647243, "grad_norm": 0.7205999700520127, "learning_rate": 9.111542383474228e-07, "loss": 0.0127, "step": 2770 }, { "epoch": 0.018289112714880627, "grad_norm": 0.7203071900026351, "learning_rate": 9.144436038288215e-07, "loss": 0.0186, "step": 2780 }, { "epoch": 0.01835490089011401, "grad_norm": 1.1444725217154907, "learning_rate": 9.177329693102201e-07, "loss": 0.0148, "step": 2790 }, { "epoch": 0.018420689065347395, "grad_norm": 0.8243699462772697, "learning_rate": 9.210223347916187e-07, "loss": 0.0213, "step": 2800 }, { "epoch": 0.01848647724058078, "grad_norm": 1.1664321568141456, "learning_rate": 9.243117002730174e-07, "loss": 0.0242, "step": 2810 }, { "epoch": 0.018552265415814163, "grad_norm": 1.5105037584014025, "learning_rate": 9.27601065754416e-07, "loss": 0.0216, "step": 2820 }, { "epoch": 0.018618053591047547, "grad_norm": 0.8880926018701565, "learning_rate": 9.308904312358147e-07, "loss": 0.0168, "step": 2830 }, { "epoch": 0.018683841766280927, "grad_norm": 0.5457964726079195, "learning_rate": 9.341797967172134e-07, "loss": 0.0157, "step": 2840 }, { "epoch": 0.01874962994151431, "grad_norm": 0.5329964154650737, "learning_rate": 9.374691621986119e-07, "loss": 0.0165, "step": 2850 }, { "epoch": 0.018815418116747695, "grad_norm": 1.0739248675230546, "learning_rate": 9.407585276800106e-07, "loss": 0.0208, "step": 2860 }, { "epoch": 0.01888120629198108, "grad_norm": 0.7352490387397436, "learning_rate": 9.440478931614093e-07, "loss": 0.0216, "step": 2870 }, { "epoch": 0.018946994467214463, "grad_norm": 0.7571394668335106, "learning_rate": 9.473372586428079e-07, "loss": 0.0179, "step": 2880 }, { "epoch": 0.019012782642447847, "grad_norm": 1.041111886878252, "learning_rate": 9.506266241242065e-07, "loss": 0.0133, "step": 2890 }, { "epoch": 0.01907857081768123, "grad_norm": 0.7658862312628462, "learning_rate": 9.539159896056052e-07, "loss": 0.0239, "step": 2900 }, { "epoch": 0.019144358992914612, "grad_norm": 0.9626035993649932, "learning_rate": 9.572053550870038e-07, "loss": 0.0176, "step": 2910 }, { "epoch": 0.019210147168147996, "grad_norm": 0.8081108857188529, "learning_rate": 9.604947205684024e-07, "loss": 0.0152, "step": 2920 }, { "epoch": 0.01927593534338138, "grad_norm": 0.5722903668396897, "learning_rate": 9.63784086049801e-07, "loss": 0.0181, "step": 2930 }, { "epoch": 0.019341723518614764, "grad_norm": 0.5745833353842698, "learning_rate": 9.670734515311997e-07, "loss": 0.0172, "step": 2940 }, { "epoch": 0.019407511693848148, "grad_norm": 0.5244680054151359, "learning_rate": 9.703628170125983e-07, "loss": 0.0179, "step": 2950 }, { "epoch": 0.019473299869081532, "grad_norm": 0.7827198618813179, "learning_rate": 9.736521824939969e-07, "loss": 0.0182, "step": 2960 }, { "epoch": 0.019539088044314916, "grad_norm": 0.6365602983769085, "learning_rate": 9.769415479753957e-07, "loss": 0.0178, "step": 2970 }, { "epoch": 0.0196048762195483, "grad_norm": 1.1754564046850446, "learning_rate": 9.802309134567942e-07, "loss": 0.0146, "step": 2980 }, { "epoch": 0.01967066439478168, "grad_norm": 0.5466175928548719, "learning_rate": 9.835202789381928e-07, "loss": 0.0157, "step": 2990 }, { "epoch": 0.019736452570015064, "grad_norm": 0.9855482907779451, "learning_rate": 9.868096444195916e-07, "loss": 0.0179, "step": 3000 }, { "epoch": 0.01980224074524845, "grad_norm": 0.44521022749802097, "learning_rate": 9.900990099009902e-07, "loss": 0.0129, "step": 3010 }, { "epoch": 0.019868028920481832, "grad_norm": 0.8571483599212918, "learning_rate": 9.933883753823887e-07, "loss": 0.0137, "step": 3020 }, { "epoch": 0.019933817095715217, "grad_norm": 0.8858965572162125, "learning_rate": 9.966777408637875e-07, "loss": 0.0156, "step": 3030 }, { "epoch": 0.0199996052709486, "grad_norm": 0.7900784643979945, "learning_rate": 9.999671063451861e-07, "loss": 0.0167, "step": 3040 }, { "epoch": 0.020065393446181985, "grad_norm": 1.8903105709743646, "learning_rate": 1.0032564718265847e-06, "loss": 0.0133, "step": 3050 }, { "epoch": 0.020131181621415365, "grad_norm": 0.555878455133898, "learning_rate": 1.0065458373079835e-06, "loss": 0.0121, "step": 3060 }, { "epoch": 0.02019696979664875, "grad_norm": 0.5898035325967278, "learning_rate": 1.009835202789382e-06, "loss": 0.014, "step": 3070 }, { "epoch": 0.020262757971882133, "grad_norm": 0.7395062977198645, "learning_rate": 1.0131245682707806e-06, "loss": 0.017, "step": 3080 }, { "epoch": 0.020328546147115517, "grad_norm": 0.6869344192743038, "learning_rate": 1.0164139337521794e-06, "loss": 0.015, "step": 3090 }, { "epoch": 0.0203943343223489, "grad_norm": 0.2205908280917993, "learning_rate": 1.0197032992335778e-06, "loss": 0.0107, "step": 3100 }, { "epoch": 0.020460122497582285, "grad_norm": 0.6691188452061795, "learning_rate": 1.0229926647149765e-06, "loss": 0.017, "step": 3110 }, { "epoch": 0.02052591067281567, "grad_norm": 0.6947993619444275, "learning_rate": 1.0262820301963753e-06, "loss": 0.0202, "step": 3120 }, { "epoch": 0.020591698848049053, "grad_norm": 0.9150724941724275, "learning_rate": 1.0295713956777737e-06, "loss": 0.0146, "step": 3130 }, { "epoch": 0.020657487023282434, "grad_norm": 0.6117079987048956, "learning_rate": 1.0328607611591725e-06, "loss": 0.0162, "step": 3140 }, { "epoch": 0.020723275198515818, "grad_norm": 0.7191991720040768, "learning_rate": 1.036150126640571e-06, "loss": 0.0214, "step": 3150 }, { "epoch": 0.0207890633737492, "grad_norm": 0.5066602543538461, "learning_rate": 1.0394394921219696e-06, "loss": 0.0167, "step": 3160 }, { "epoch": 0.020854851548982586, "grad_norm": 0.7578400278665804, "learning_rate": 1.0427288576033684e-06, "loss": 0.0135, "step": 3170 }, { "epoch": 0.02092063972421597, "grad_norm": 0.9379402455548475, "learning_rate": 1.046018223084767e-06, "loss": 0.0121, "step": 3180 }, { "epoch": 0.020986427899449354, "grad_norm": 0.6847590397692909, "learning_rate": 1.0493075885661656e-06, "loss": 0.021, "step": 3190 }, { "epoch": 0.021052216074682738, "grad_norm": 0.8854275446383759, "learning_rate": 1.0525969540475643e-06, "loss": 0.0189, "step": 3200 }, { "epoch": 0.021118004249916122, "grad_norm": 1.0221416852416088, "learning_rate": 1.055886319528963e-06, "loss": 0.0141, "step": 3210 }, { "epoch": 0.021183792425149502, "grad_norm": 0.9493541037412863, "learning_rate": 1.0591756850103615e-06, "loss": 0.0155, "step": 3220 }, { "epoch": 0.021249580600382886, "grad_norm": 1.114350184574067, "learning_rate": 1.0624650504917603e-06, "loss": 0.0169, "step": 3230 }, { "epoch": 0.02131536877561627, "grad_norm": 0.7378572275791965, "learning_rate": 1.0657544159731589e-06, "loss": 0.0182, "step": 3240 }, { "epoch": 0.021381156950849654, "grad_norm": 0.8071220739488533, "learning_rate": 1.0690437814545574e-06, "loss": 0.0173, "step": 3250 }, { "epoch": 0.02144694512608304, "grad_norm": 0.8014777757544543, "learning_rate": 1.0723331469359562e-06, "loss": 0.0179, "step": 3260 }, { "epoch": 0.021512733301316422, "grad_norm": 2.6687733092877144, "learning_rate": 1.0756225124173548e-06, "loss": 0.016, "step": 3270 }, { "epoch": 0.021578521476549806, "grad_norm": 0.6956603044066654, "learning_rate": 1.0789118778987534e-06, "loss": 0.014, "step": 3280 }, { "epoch": 0.021644309651783187, "grad_norm": 0.8183187418118666, "learning_rate": 1.0822012433801521e-06, "loss": 0.0103, "step": 3290 }, { "epoch": 0.02171009782701657, "grad_norm": 0.6720793117848741, "learning_rate": 1.0854906088615507e-06, "loss": 0.0142, "step": 3300 }, { "epoch": 0.021775886002249955, "grad_norm": 0.6747580800610382, "learning_rate": 1.0887799743429493e-06, "loss": 0.0114, "step": 3310 }, { "epoch": 0.02184167417748334, "grad_norm": 1.0744208753956332, "learning_rate": 1.092069339824348e-06, "loss": 0.0138, "step": 3320 }, { "epoch": 0.021907462352716723, "grad_norm": 0.6013710408935767, "learning_rate": 1.0953587053057467e-06, "loss": 0.0126, "step": 3330 }, { "epoch": 0.021973250527950107, "grad_norm": 0.9872258138131228, "learning_rate": 1.0986480707871452e-06, "loss": 0.0138, "step": 3340 }, { "epoch": 0.02203903870318349, "grad_norm": 0.8208370521318654, "learning_rate": 1.101937436268544e-06, "loss": 0.0126, "step": 3350 }, { "epoch": 0.022104826878416875, "grad_norm": 0.7080798987983685, "learning_rate": 1.1052268017499426e-06, "loss": 0.0211, "step": 3360 }, { "epoch": 0.022170615053650256, "grad_norm": 0.4530478076419779, "learning_rate": 1.1085161672313412e-06, "loss": 0.0201, "step": 3370 }, { "epoch": 0.02223640322888364, "grad_norm": 0.7531309692142998, "learning_rate": 1.11180553271274e-06, "loss": 0.016, "step": 3380 }, { "epoch": 0.022302191404117024, "grad_norm": 0.35814003404222583, "learning_rate": 1.1150948981941383e-06, "loss": 0.0136, "step": 3390 }, { "epoch": 0.022367979579350408, "grad_norm": 0.6195956537639108, "learning_rate": 1.118384263675537e-06, "loss": 0.0153, "step": 3400 }, { "epoch": 0.02243376775458379, "grad_norm": 0.7805222566012532, "learning_rate": 1.1216736291569359e-06, "loss": 0.0164, "step": 3410 }, { "epoch": 0.022499555929817176, "grad_norm": 0.6438124961946677, "learning_rate": 1.1249629946383342e-06, "loss": 0.0194, "step": 3420 }, { "epoch": 0.02256534410505056, "grad_norm": 0.6591969437476506, "learning_rate": 1.128252360119733e-06, "loss": 0.0147, "step": 3430 }, { "epoch": 0.02263113228028394, "grad_norm": 0.6324522295339602, "learning_rate": 1.1315417256011316e-06, "loss": 0.0138, "step": 3440 }, { "epoch": 0.022696920455517324, "grad_norm": 0.7299326062590841, "learning_rate": 1.1348310910825302e-06, "loss": 0.0195, "step": 3450 }, { "epoch": 0.022762708630750708, "grad_norm": 0.7451295436164307, "learning_rate": 1.138120456563929e-06, "loss": 0.0133, "step": 3460 }, { "epoch": 0.022828496805984092, "grad_norm": 0.6527495209731492, "learning_rate": 1.1414098220453275e-06, "loss": 0.0137, "step": 3470 }, { "epoch": 0.022894284981217476, "grad_norm": 0.5678552525501703, "learning_rate": 1.144699187526726e-06, "loss": 0.0106, "step": 3480 }, { "epoch": 0.02296007315645086, "grad_norm": 0.6428504612430855, "learning_rate": 1.1479885530081249e-06, "loss": 0.0107, "step": 3490 }, { "epoch": 0.023025861331684244, "grad_norm": 0.7223504800648013, "learning_rate": 1.1512779184895235e-06, "loss": 0.0182, "step": 3500 }, { "epoch": 0.023091649506917628, "grad_norm": 1.1432735961271254, "learning_rate": 1.154567283970922e-06, "loss": 0.0139, "step": 3510 }, { "epoch": 0.02315743768215101, "grad_norm": 1.2097364899994476, "learning_rate": 1.1578566494523208e-06, "loss": 0.0159, "step": 3520 }, { "epoch": 0.023223225857384393, "grad_norm": 0.8534188448954902, "learning_rate": 1.1611460149337194e-06, "loss": 0.0128, "step": 3530 }, { "epoch": 0.023289014032617777, "grad_norm": 0.7516591146755038, "learning_rate": 1.164435380415118e-06, "loss": 0.0116, "step": 3540 }, { "epoch": 0.02335480220785116, "grad_norm": 0.5368331203938685, "learning_rate": 1.1677247458965168e-06, "loss": 0.0112, "step": 3550 }, { "epoch": 0.023420590383084545, "grad_norm": 0.6065592322966762, "learning_rate": 1.1710141113779153e-06, "loss": 0.0179, "step": 3560 }, { "epoch": 0.02348637855831793, "grad_norm": 0.6521937889014761, "learning_rate": 1.174303476859314e-06, "loss": 0.0193, "step": 3570 }, { "epoch": 0.023552166733551313, "grad_norm": 0.4944269414089091, "learning_rate": 1.1775928423407127e-06, "loss": 0.0144, "step": 3580 }, { "epoch": 0.023617954908784693, "grad_norm": 0.20731098417968455, "learning_rate": 1.1808822078221113e-06, "loss": 0.0105, "step": 3590 }, { "epoch": 0.023683743084018077, "grad_norm": 0.6976561765666686, "learning_rate": 1.1841715733035098e-06, "loss": 0.0166, "step": 3600 }, { "epoch": 0.02374953125925146, "grad_norm": 0.5514542344547241, "learning_rate": 1.1874609387849084e-06, "loss": 0.0127, "step": 3610 }, { "epoch": 0.023815319434484845, "grad_norm": 0.4419874365914538, "learning_rate": 1.1907503042663072e-06, "loss": 0.0112, "step": 3620 }, { "epoch": 0.02388110760971823, "grad_norm": 0.5293705113090069, "learning_rate": 1.1940396697477058e-06, "loss": 0.0157, "step": 3630 }, { "epoch": 0.023946895784951613, "grad_norm": 0.8706253579549547, "learning_rate": 1.1973290352291043e-06, "loss": 0.0167, "step": 3640 }, { "epoch": 0.024012683960184997, "grad_norm": 0.42303636118470656, "learning_rate": 1.2006184007105031e-06, "loss": 0.0114, "step": 3650 }, { "epoch": 0.02407847213541838, "grad_norm": 0.9600609355430311, "learning_rate": 1.2039077661919017e-06, "loss": 0.0197, "step": 3660 }, { "epoch": 0.024144260310651762, "grad_norm": 0.2868480182581411, "learning_rate": 1.2071971316733003e-06, "loss": 0.013, "step": 3670 }, { "epoch": 0.024210048485885146, "grad_norm": 0.6014083676864707, "learning_rate": 1.210486497154699e-06, "loss": 0.0149, "step": 3680 }, { "epoch": 0.02427583666111853, "grad_norm": 0.6070999127776107, "learning_rate": 1.2137758626360976e-06, "loss": 0.0152, "step": 3690 }, { "epoch": 0.024341624836351914, "grad_norm": 1.0277655207557004, "learning_rate": 1.2170652281174962e-06, "loss": 0.0124, "step": 3700 }, { "epoch": 0.024407413011585298, "grad_norm": 0.3598295533303511, "learning_rate": 1.2203545935988948e-06, "loss": 0.0097, "step": 3710 }, { "epoch": 0.024473201186818682, "grad_norm": 0.8927860904901325, "learning_rate": 1.2236439590802936e-06, "loss": 0.0149, "step": 3720 }, { "epoch": 0.024538989362052066, "grad_norm": 0.6921408396127954, "learning_rate": 1.2269333245616921e-06, "loss": 0.0142, "step": 3730 }, { "epoch": 0.024604777537285447, "grad_norm": 0.991281034556815, "learning_rate": 1.2302226900430907e-06, "loss": 0.0149, "step": 3740 }, { "epoch": 0.02467056571251883, "grad_norm": 0.7347429940868252, "learning_rate": 1.2335120555244895e-06, "loss": 0.0175, "step": 3750 }, { "epoch": 0.024736353887752215, "grad_norm": 0.719000219005394, "learning_rate": 1.236801421005888e-06, "loss": 0.0109, "step": 3760 }, { "epoch": 0.0248021420629856, "grad_norm": 0.4569745579340372, "learning_rate": 1.2400907864872866e-06, "loss": 0.0162, "step": 3770 }, { "epoch": 0.024867930238218983, "grad_norm": 0.5393838409497881, "learning_rate": 1.2433801519686852e-06, "loss": 0.0132, "step": 3780 }, { "epoch": 0.024933718413452367, "grad_norm": 0.7691362898432066, "learning_rate": 1.246669517450084e-06, "loss": 0.0143, "step": 3790 }, { "epoch": 0.02499950658868575, "grad_norm": 0.5553726386874183, "learning_rate": 1.2499588829314826e-06, "loss": 0.011, "step": 3800 }, { "epoch": 0.025065294763919135, "grad_norm": 0.330015910258625, "learning_rate": 1.2532482484128814e-06, "loss": 0.0083, "step": 3810 }, { "epoch": 0.025131082939152515, "grad_norm": 0.8240139148390253, "learning_rate": 1.25653761389428e-06, "loss": 0.0132, "step": 3820 }, { "epoch": 0.0251968711143859, "grad_norm": 0.5988948849244424, "learning_rate": 1.2598269793756785e-06, "loss": 0.0134, "step": 3830 }, { "epoch": 0.025262659289619283, "grad_norm": 0.5189997228534806, "learning_rate": 1.263116344857077e-06, "loss": 0.0119, "step": 3840 }, { "epoch": 0.025328447464852667, "grad_norm": 1.2177190510472384, "learning_rate": 1.2664057103384759e-06, "loss": 0.015, "step": 3850 }, { "epoch": 0.02539423564008605, "grad_norm": 0.5449273196647457, "learning_rate": 1.2696950758198744e-06, "loss": 0.0098, "step": 3860 }, { "epoch": 0.025460023815319435, "grad_norm": 0.610482160246938, "learning_rate": 1.2729844413012732e-06, "loss": 0.0144, "step": 3870 }, { "epoch": 0.02552581199055282, "grad_norm": 0.2305226179524288, "learning_rate": 1.2762738067826716e-06, "loss": 0.0132, "step": 3880 }, { "epoch": 0.025591600165786203, "grad_norm": 0.912512274794577, "learning_rate": 1.2795631722640704e-06, "loss": 0.0101, "step": 3890 }, { "epoch": 0.025657388341019584, "grad_norm": 1.0264621509952865, "learning_rate": 1.282852537745469e-06, "loss": 0.0145, "step": 3900 }, { "epoch": 0.025723176516252968, "grad_norm": 0.739499045521694, "learning_rate": 1.2861419032268677e-06, "loss": 0.0139, "step": 3910 }, { "epoch": 0.025788964691486352, "grad_norm": 0.7294773918300504, "learning_rate": 1.2894312687082663e-06, "loss": 0.0127, "step": 3920 }, { "epoch": 0.025854752866719736, "grad_norm": 0.561979928787463, "learning_rate": 1.292720634189665e-06, "loss": 0.014, "step": 3930 }, { "epoch": 0.02592054104195312, "grad_norm": 0.9207809889733161, "learning_rate": 1.2960099996710635e-06, "loss": 0.0135, "step": 3940 }, { "epoch": 0.025986329217186504, "grad_norm": 0.6336114513583613, "learning_rate": 1.299299365152462e-06, "loss": 0.0109, "step": 3950 }, { "epoch": 0.026052117392419888, "grad_norm": 1.0099250712804206, "learning_rate": 1.3025887306338608e-06, "loss": 0.0144, "step": 3960 }, { "epoch": 0.02611790556765327, "grad_norm": 0.6139276360513694, "learning_rate": 1.3058780961152596e-06, "loss": 0.0105, "step": 3970 }, { "epoch": 0.026183693742886652, "grad_norm": 0.5257493754872191, "learning_rate": 1.3091674615966582e-06, "loss": 0.0136, "step": 3980 }, { "epoch": 0.026249481918120036, "grad_norm": 0.8644530363923908, "learning_rate": 1.312456827078057e-06, "loss": 0.0122, "step": 3990 }, { "epoch": 0.02631527009335342, "grad_norm": 0.6424828377941958, "learning_rate": 1.3157461925594553e-06, "loss": 0.0108, "step": 4000 }, { "epoch": 0.026381058268586804, "grad_norm": 0.7571484639370474, "learning_rate": 1.319035558040854e-06, "loss": 0.0156, "step": 4010 }, { "epoch": 0.02644684644382019, "grad_norm": 0.5621300454482118, "learning_rate": 1.3223249235222527e-06, "loss": 0.0131, "step": 4020 }, { "epoch": 0.026512634619053572, "grad_norm": 0.3496662071417244, "learning_rate": 1.3256142890036513e-06, "loss": 0.0093, "step": 4030 }, { "epoch": 0.026578422794286957, "grad_norm": 0.6259881353389269, "learning_rate": 1.32890365448505e-06, "loss": 0.0131, "step": 4040 }, { "epoch": 0.026644210969520337, "grad_norm": 0.6314724889726421, "learning_rate": 1.3321930199664484e-06, "loss": 0.0112, "step": 4050 }, { "epoch": 0.02670999914475372, "grad_norm": 0.4953479295705724, "learning_rate": 1.3354823854478472e-06, "loss": 0.0107, "step": 4060 }, { "epoch": 0.026775787319987105, "grad_norm": 0.5782973859912894, "learning_rate": 1.3387717509292458e-06, "loss": 0.0133, "step": 4070 }, { "epoch": 0.02684157549522049, "grad_norm": 0.6352255297034739, "learning_rate": 1.3420611164106446e-06, "loss": 0.0083, "step": 4080 }, { "epoch": 0.026907363670453873, "grad_norm": 0.46411082495825984, "learning_rate": 1.3453504818920431e-06, "loss": 0.0111, "step": 4090 }, { "epoch": 0.026973151845687257, "grad_norm": 0.8144368103790439, "learning_rate": 1.348639847373442e-06, "loss": 0.0128, "step": 4100 }, { "epoch": 0.02703894002092064, "grad_norm": 0.7418653123762154, "learning_rate": 1.3519292128548403e-06, "loss": 0.0145, "step": 4110 }, { "epoch": 0.02710472819615402, "grad_norm": 0.6499446497847639, "learning_rate": 1.355218578336239e-06, "loss": 0.0137, "step": 4120 }, { "epoch": 0.027170516371387406, "grad_norm": 0.4905957347080365, "learning_rate": 1.3585079438176376e-06, "loss": 0.0178, "step": 4130 }, { "epoch": 0.02723630454662079, "grad_norm": 0.7674557244726921, "learning_rate": 1.3617973092990364e-06, "loss": 0.0178, "step": 4140 }, { "epoch": 0.027302092721854174, "grad_norm": 0.4780705944415787, "learning_rate": 1.365086674780435e-06, "loss": 0.0121, "step": 4150 }, { "epoch": 0.027367880897087558, "grad_norm": 0.6785800704560512, "learning_rate": 1.3683760402618338e-06, "loss": 0.0123, "step": 4160 }, { "epoch": 0.02743366907232094, "grad_norm": 0.5723894248956667, "learning_rate": 1.3716654057432321e-06, "loss": 0.02, "step": 4170 }, { "epoch": 0.027499457247554326, "grad_norm": 0.6061329490954291, "learning_rate": 1.374954771224631e-06, "loss": 0.014, "step": 4180 }, { "epoch": 0.02756524542278771, "grad_norm": 0.6205955591850875, "learning_rate": 1.3782441367060295e-06, "loss": 0.016, "step": 4190 }, { "epoch": 0.02763103359802109, "grad_norm": 0.651304778546197, "learning_rate": 1.3815335021874283e-06, "loss": 0.0121, "step": 4200 }, { "epoch": 0.027696821773254474, "grad_norm": 0.83070602049481, "learning_rate": 1.3848228676688269e-06, "loss": 0.0087, "step": 4210 }, { "epoch": 0.02776260994848786, "grad_norm": 0.9015823675499328, "learning_rate": 1.3881122331502252e-06, "loss": 0.0125, "step": 4220 }, { "epoch": 0.027828398123721242, "grad_norm": 0.41676715130131375, "learning_rate": 1.391401598631624e-06, "loss": 0.011, "step": 4230 }, { "epoch": 0.027894186298954626, "grad_norm": 0.5374861368861227, "learning_rate": 1.3946909641130226e-06, "loss": 0.0101, "step": 4240 }, { "epoch": 0.02795997447418801, "grad_norm": 0.6963201482049425, "learning_rate": 1.3979803295944214e-06, "loss": 0.0118, "step": 4250 }, { "epoch": 0.028025762649421394, "grad_norm": 0.7106155898486263, "learning_rate": 1.4012696950758201e-06, "loss": 0.0131, "step": 4260 }, { "epoch": 0.028091550824654775, "grad_norm": 0.6436849108573708, "learning_rate": 1.4045590605572187e-06, "loss": 0.0118, "step": 4270 }, { "epoch": 0.02815733899988816, "grad_norm": 0.6333456871661823, "learning_rate": 1.407848426038617e-06, "loss": 0.0101, "step": 4280 }, { "epoch": 0.028223127175121543, "grad_norm": 0.9405698919550287, "learning_rate": 1.4111377915200159e-06, "loss": 0.0118, "step": 4290 }, { "epoch": 0.028288915350354927, "grad_norm": 0.5024490561747792, "learning_rate": 1.4144271570014144e-06, "loss": 0.0137, "step": 4300 }, { "epoch": 0.02835470352558831, "grad_norm": 0.4942844817801286, "learning_rate": 1.4177165224828132e-06, "loss": 0.0111, "step": 4310 }, { "epoch": 0.028420491700821695, "grad_norm": 0.6516319096976626, "learning_rate": 1.4210058879642118e-06, "loss": 0.015, "step": 4320 }, { "epoch": 0.02848627987605508, "grad_norm": 0.7269532748346864, "learning_rate": 1.4242952534456106e-06, "loss": 0.0088, "step": 4330 }, { "epoch": 0.028552068051288463, "grad_norm": 0.5404785021596349, "learning_rate": 1.427584618927009e-06, "loss": 0.0108, "step": 4340 }, { "epoch": 0.028617856226521843, "grad_norm": 0.24880667912405885, "learning_rate": 1.4308739844084077e-06, "loss": 0.01, "step": 4350 }, { "epoch": 0.028683644401755228, "grad_norm": 0.25332808312329264, "learning_rate": 1.4341633498898063e-06, "loss": 0.0131, "step": 4360 }, { "epoch": 0.02874943257698861, "grad_norm": 0.5204915162486589, "learning_rate": 1.437452715371205e-06, "loss": 0.0083, "step": 4370 }, { "epoch": 0.028815220752221996, "grad_norm": 0.6757094707814397, "learning_rate": 1.4407420808526037e-06, "loss": 0.0155, "step": 4380 }, { "epoch": 0.02888100892745538, "grad_norm": 0.7607106935061696, "learning_rate": 1.4440314463340022e-06, "loss": 0.013, "step": 4390 }, { "epoch": 0.028946797102688764, "grad_norm": 0.5242787853189108, "learning_rate": 1.4473208118154008e-06, "loss": 0.0134, "step": 4400 }, { "epoch": 0.029012585277922148, "grad_norm": 0.5730765224311741, "learning_rate": 1.4506101772967996e-06, "loss": 0.01, "step": 4410 }, { "epoch": 0.029078373453155528, "grad_norm": 0.4601036372521259, "learning_rate": 1.4538995427781982e-06, "loss": 0.0114, "step": 4420 }, { "epoch": 0.029144161628388912, "grad_norm": 1.670080538094367, "learning_rate": 1.457188908259597e-06, "loss": 0.0149, "step": 4430 }, { "epoch": 0.029209949803622296, "grad_norm": 0.720924061916606, "learning_rate": 1.4604782737409955e-06, "loss": 0.0104, "step": 4440 }, { "epoch": 0.02927573797885568, "grad_norm": 0.6461028730413233, "learning_rate": 1.4637676392223941e-06, "loss": 0.0107, "step": 4450 }, { "epoch": 0.029341526154089064, "grad_norm": 0.8213530114450586, "learning_rate": 1.4670570047037927e-06, "loss": 0.0158, "step": 4460 }, { "epoch": 0.029407314329322448, "grad_norm": 0.649791461930865, "learning_rate": 1.4703463701851915e-06, "loss": 0.0084, "step": 4470 }, { "epoch": 0.029473102504555832, "grad_norm": 0.6740935751929595, "learning_rate": 1.47363573566659e-06, "loss": 0.0142, "step": 4480 }, { "epoch": 0.029538890679789216, "grad_norm": 0.6121923074035489, "learning_rate": 1.4769251011479888e-06, "loss": 0.0114, "step": 4490 }, { "epoch": 0.029604678855022597, "grad_norm": 0.7706959404615779, "learning_rate": 1.4802144666293874e-06, "loss": 0.0166, "step": 4500 }, { "epoch": 0.02967046703025598, "grad_norm": 0.6129906257777799, "learning_rate": 1.4835038321107858e-06, "loss": 0.0106, "step": 4510 }, { "epoch": 0.029736255205489365, "grad_norm": 0.5613537230629525, "learning_rate": 1.4867931975921845e-06, "loss": 0.0177, "step": 4520 }, { "epoch": 0.02980204338072275, "grad_norm": 0.726933346752411, "learning_rate": 1.4900825630735833e-06, "loss": 0.0135, "step": 4530 }, { "epoch": 0.029867831555956133, "grad_norm": 0.44251257253876636, "learning_rate": 1.493371928554982e-06, "loss": 0.0126, "step": 4540 }, { "epoch": 0.029933619731189517, "grad_norm": 0.29366562401899743, "learning_rate": 1.4966612940363807e-06, "loss": 0.0109, "step": 4550 }, { "epoch": 0.0299994079064229, "grad_norm": 0.32812150348641184, "learning_rate": 1.499950659517779e-06, "loss": 0.0093, "step": 4560 }, { "epoch": 0.030065196081656285, "grad_norm": 0.6680342871807365, "learning_rate": 1.5032400249991776e-06, "loss": 0.0111, "step": 4570 }, { "epoch": 0.030130984256889665, "grad_norm": 0.5206661428390518, "learning_rate": 1.5065293904805764e-06, "loss": 0.0102, "step": 4580 }, { "epoch": 0.03019677243212305, "grad_norm": 0.6811324972089866, "learning_rate": 1.509818755961975e-06, "loss": 0.0099, "step": 4590 }, { "epoch": 0.030262560607356433, "grad_norm": 0.8228797581683012, "learning_rate": 1.5131081214433738e-06, "loss": 0.0091, "step": 4600 }, { "epoch": 0.030328348782589817, "grad_norm": 0.5979153882174565, "learning_rate": 1.5163974869247726e-06, "loss": 0.0125, "step": 4610 }, { "epoch": 0.0303941369578232, "grad_norm": 0.4653746010708094, "learning_rate": 1.519686852406171e-06, "loss": 0.0151, "step": 4620 }, { "epoch": 0.030459925133056585, "grad_norm": 0.788702871272065, "learning_rate": 1.5229762178875695e-06, "loss": 0.0092, "step": 4630 }, { "epoch": 0.03052571330828997, "grad_norm": 0.6669868735563904, "learning_rate": 1.5262655833689683e-06, "loss": 0.012, "step": 4640 }, { "epoch": 0.03059150148352335, "grad_norm": 0.3145965837189611, "learning_rate": 1.5295549488503669e-06, "loss": 0.0171, "step": 4650 }, { "epoch": 0.030657289658756734, "grad_norm": 0.6588853418151797, "learning_rate": 1.5328443143317656e-06, "loss": 0.0155, "step": 4660 }, { "epoch": 0.030723077833990118, "grad_norm": 0.612813336365907, "learning_rate": 1.5361336798131642e-06, "loss": 0.007, "step": 4670 }, { "epoch": 0.030788866009223502, "grad_norm": 0.477059372492862, "learning_rate": 1.5394230452945628e-06, "loss": 0.0116, "step": 4680 }, { "epoch": 0.030854654184456886, "grad_norm": 0.35857299243634094, "learning_rate": 1.5427124107759614e-06, "loss": 0.0146, "step": 4690 }, { "epoch": 0.03092044235969027, "grad_norm": 0.471420380297933, "learning_rate": 1.5460017762573601e-06, "loss": 0.0118, "step": 4700 }, { "epoch": 0.030986230534923654, "grad_norm": 0.6816191531462155, "learning_rate": 1.5492911417387587e-06, "loss": 0.0084, "step": 4710 }, { "epoch": 0.031052018710157038, "grad_norm": 0.5517097897801282, "learning_rate": 1.5525805072201575e-06, "loss": 0.012, "step": 4720 }, { "epoch": 0.03111780688539042, "grad_norm": 0.42303765686049016, "learning_rate": 1.5558698727015559e-06, "loss": 0.0099, "step": 4730 }, { "epoch": 0.031183595060623803, "grad_norm": 0.9618681585711615, "learning_rate": 1.5591592381829547e-06, "loss": 0.0114, "step": 4740 }, { "epoch": 0.031249383235857187, "grad_norm": 0.6919853450824509, "learning_rate": 1.5624486036643532e-06, "loss": 0.008, "step": 4750 }, { "epoch": 0.03131517141109057, "grad_norm": 0.397848996981083, "learning_rate": 1.565737969145752e-06, "loss": 0.0107, "step": 4760 }, { "epoch": 0.031380959586323955, "grad_norm": 0.3436200555683046, "learning_rate": 1.5690273346271506e-06, "loss": 0.0102, "step": 4770 }, { "epoch": 0.031446747761557335, "grad_norm": 0.48225143338955134, "learning_rate": 1.5723167001085494e-06, "loss": 0.0149, "step": 4780 }, { "epoch": 0.03151253593679072, "grad_norm": 0.6471732505806373, "learning_rate": 1.5756060655899477e-06, "loss": 0.0121, "step": 4790 }, { "epoch": 0.0315783241120241, "grad_norm": 0.5602067457735233, "learning_rate": 1.5788954310713463e-06, "loss": 0.0087, "step": 4800 }, { "epoch": 0.03164411228725749, "grad_norm": 0.5134702150539067, "learning_rate": 1.582184796552745e-06, "loss": 0.0118, "step": 4810 }, { "epoch": 0.03170990046249087, "grad_norm": 0.36824739753191954, "learning_rate": 1.5854741620341439e-06, "loss": 0.0129, "step": 4820 }, { "epoch": 0.03177568863772426, "grad_norm": 0.3900370475094557, "learning_rate": 1.5887635275155425e-06, "loss": 0.0062, "step": 4830 }, { "epoch": 0.03184147681295764, "grad_norm": 0.4483469267556913, "learning_rate": 1.5920528929969412e-06, "loss": 0.0093, "step": 4840 }, { "epoch": 0.03190726498819102, "grad_norm": 0.6526230489465191, "learning_rate": 1.5953422584783396e-06, "loss": 0.0078, "step": 4850 }, { "epoch": 0.03197305316342441, "grad_norm": 0.7485261603577432, "learning_rate": 1.5986316239597382e-06, "loss": 0.0097, "step": 4860 }, { "epoch": 0.03203884133865779, "grad_norm": 0.6532295539761513, "learning_rate": 1.601920989441137e-06, "loss": 0.0095, "step": 4870 }, { "epoch": 0.032104629513891175, "grad_norm": 0.42801982948762934, "learning_rate": 1.6052103549225355e-06, "loss": 0.0108, "step": 4880 }, { "epoch": 0.032170417689124556, "grad_norm": 0.7611951205807598, "learning_rate": 1.6084997204039343e-06, "loss": 0.0078, "step": 4890 }, { "epoch": 0.03223620586435794, "grad_norm": 0.8088089956086175, "learning_rate": 1.6117890858853327e-06, "loss": 0.0112, "step": 4900 }, { "epoch": 0.032301994039591324, "grad_norm": 0.32514096676322257, "learning_rate": 1.6150784513667315e-06, "loss": 0.0078, "step": 4910 }, { "epoch": 0.032367782214824704, "grad_norm": 0.6573695315107354, "learning_rate": 1.61836781684813e-06, "loss": 0.0096, "step": 4920 }, { "epoch": 0.03243357039005809, "grad_norm": 0.5896276135468894, "learning_rate": 1.6216571823295288e-06, "loss": 0.0125, "step": 4930 }, { "epoch": 0.03249935856529147, "grad_norm": 0.7450738729682703, "learning_rate": 1.6249465478109274e-06, "loss": 0.014, "step": 4940 }, { "epoch": 0.03256514674052486, "grad_norm": 0.43549079590805295, "learning_rate": 1.6282359132923262e-06, "loss": 0.0143, "step": 4950 }, { "epoch": 0.03263093491575824, "grad_norm": 0.4702340148707332, "learning_rate": 1.6315252787737245e-06, "loss": 0.0071, "step": 4960 }, { "epoch": 0.03269672309099163, "grad_norm": 0.5385902910272261, "learning_rate": 1.6348146442551233e-06, "loss": 0.01, "step": 4970 }, { "epoch": 0.03276251126622501, "grad_norm": 0.44011817793653585, "learning_rate": 1.638104009736522e-06, "loss": 0.0093, "step": 4980 }, { "epoch": 0.03282829944145839, "grad_norm": 0.6298706114691898, "learning_rate": 1.6413933752179207e-06, "loss": 0.0105, "step": 4990 }, { "epoch": 0.032894087616691776, "grad_norm": 0.4330593611314083, "learning_rate": 1.6446827406993193e-06, "loss": 0.0074, "step": 5000 }, { "epoch": 0.03295987579192516, "grad_norm": 0.4140134088737307, "learning_rate": 1.647972106180718e-06, "loss": 0.0111, "step": 5010 }, { "epoch": 0.033025663967158544, "grad_norm": 0.5098204927862053, "learning_rate": 1.6512614716621164e-06, "loss": 0.0127, "step": 5020 }, { "epoch": 0.033091452142391925, "grad_norm": 0.5109225527153398, "learning_rate": 1.6545508371435152e-06, "loss": 0.0109, "step": 5030 }, { "epoch": 0.03315724031762531, "grad_norm": 0.666303478489536, "learning_rate": 1.6578402026249138e-06, "loss": 0.0113, "step": 5040 }, { "epoch": 0.03322302849285869, "grad_norm": 0.5556720994372835, "learning_rate": 1.6611295681063126e-06, "loss": 0.0126, "step": 5050 }, { "epoch": 0.03328881666809208, "grad_norm": 0.7207316564081713, "learning_rate": 1.6644189335877111e-06, "loss": 0.0143, "step": 5060 }, { "epoch": 0.03335460484332546, "grad_norm": 1.080360668277232, "learning_rate": 1.6677082990691095e-06, "loss": 0.0113, "step": 5070 }, { "epoch": 0.03342039301855884, "grad_norm": 0.7031148316431313, "learning_rate": 1.6709976645505083e-06, "loss": 0.0109, "step": 5080 }, { "epoch": 0.03348618119379223, "grad_norm": 0.5351883354197574, "learning_rate": 1.674287030031907e-06, "loss": 0.0157, "step": 5090 }, { "epoch": 0.03355196936902561, "grad_norm": 0.3556953591121116, "learning_rate": 1.6775763955133056e-06, "loss": 0.011, "step": 5100 }, { "epoch": 0.033617757544259, "grad_norm": 0.5324230450487569, "learning_rate": 1.6808657609947044e-06, "loss": 0.0127, "step": 5110 }, { "epoch": 0.03368354571949238, "grad_norm": 0.2620580550651118, "learning_rate": 1.684155126476103e-06, "loss": 0.0105, "step": 5120 }, { "epoch": 0.033749333894725765, "grad_norm": 0.5299122258592935, "learning_rate": 1.6874444919575014e-06, "loss": 0.0087, "step": 5130 }, { "epoch": 0.033815122069959146, "grad_norm": 0.6472211015404185, "learning_rate": 1.6907338574389001e-06, "loss": 0.0117, "step": 5140 }, { "epoch": 0.033880910245192526, "grad_norm": 0.376485912349806, "learning_rate": 1.6940232229202987e-06, "loss": 0.0111, "step": 5150 }, { "epoch": 0.033946698420425914, "grad_norm": 0.39944847685741425, "learning_rate": 1.6973125884016975e-06, "loss": 0.0105, "step": 5160 }, { "epoch": 0.034012486595659294, "grad_norm": 0.5632977808119903, "learning_rate": 1.700601953883096e-06, "loss": 0.0063, "step": 5170 }, { "epoch": 0.03407827477089268, "grad_norm": 0.5017464788224837, "learning_rate": 1.7038913193644949e-06, "loss": 0.0083, "step": 5180 }, { "epoch": 0.03414406294612606, "grad_norm": 0.4660451520075984, "learning_rate": 1.7071806848458932e-06, "loss": 0.0124, "step": 5190 }, { "epoch": 0.03420985112135945, "grad_norm": 0.35014087665597743, "learning_rate": 1.710470050327292e-06, "loss": 0.01, "step": 5200 }, { "epoch": 0.03427563929659283, "grad_norm": 0.5242645359679671, "learning_rate": 1.7137594158086906e-06, "loss": 0.0093, "step": 5210 }, { "epoch": 0.03434142747182621, "grad_norm": 0.5615550055109465, "learning_rate": 1.7170487812900894e-06, "loss": 0.0114, "step": 5220 }, { "epoch": 0.0344072156470596, "grad_norm": 0.4015681221580442, "learning_rate": 1.720338146771488e-06, "loss": 0.0087, "step": 5230 }, { "epoch": 0.03447300382229298, "grad_norm": 0.3136550079872346, "learning_rate": 1.7236275122528865e-06, "loss": 0.0082, "step": 5240 }, { "epoch": 0.034538791997526366, "grad_norm": 0.5159636186005689, "learning_rate": 1.726916877734285e-06, "loss": 0.0138, "step": 5250 }, { "epoch": 0.03460458017275975, "grad_norm": 0.7244485690598927, "learning_rate": 1.7302062432156839e-06, "loss": 0.0085, "step": 5260 }, { "epoch": 0.034670368347993134, "grad_norm": 0.7133354163595972, "learning_rate": 1.7334956086970824e-06, "loss": 0.0102, "step": 5270 }, { "epoch": 0.034736156523226515, "grad_norm": 0.7209006615825355, "learning_rate": 1.7367849741784812e-06, "loss": 0.0085, "step": 5280 }, { "epoch": 0.034801944698459895, "grad_norm": 0.431710394518219, "learning_rate": 1.7400743396598798e-06, "loss": 0.0075, "step": 5290 }, { "epoch": 0.03486773287369328, "grad_norm": 0.6284383901743978, "learning_rate": 1.7433637051412784e-06, "loss": 0.0104, "step": 5300 }, { "epoch": 0.03493352104892666, "grad_norm": 0.4208745019645793, "learning_rate": 1.746653070622677e-06, "loss": 0.0066, "step": 5310 }, { "epoch": 0.03499930922416005, "grad_norm": 1.1380950383464505, "learning_rate": 1.7499424361040757e-06, "loss": 0.01, "step": 5320 }, { "epoch": 0.03506509739939343, "grad_norm": 0.6723507712386024, "learning_rate": 1.7532318015854743e-06, "loss": 0.0125, "step": 5330 }, { "epoch": 0.03513088557462682, "grad_norm": 0.5465198807005908, "learning_rate": 1.756521167066873e-06, "loss": 0.0144, "step": 5340 }, { "epoch": 0.0351966737498602, "grad_norm": 0.4982369866752197, "learning_rate": 1.7598105325482715e-06, "loss": 0.0059, "step": 5350 }, { "epoch": 0.03526246192509359, "grad_norm": 0.5009189221585731, "learning_rate": 1.76309989802967e-06, "loss": 0.0076, "step": 5360 }, { "epoch": 0.03532825010032697, "grad_norm": 0.4604157735092173, "learning_rate": 1.7663892635110688e-06, "loss": 0.0129, "step": 5370 }, { "epoch": 0.03539403827556035, "grad_norm": 0.666214902003707, "learning_rate": 1.7696786289924676e-06, "loss": 0.0094, "step": 5380 }, { "epoch": 0.035459826450793736, "grad_norm": 0.8203990136465868, "learning_rate": 1.7729679944738662e-06, "loss": 0.0077, "step": 5390 }, { "epoch": 0.035525614626027116, "grad_norm": 0.6730572762170783, "learning_rate": 1.776257359955265e-06, "loss": 0.0148, "step": 5400 }, { "epoch": 0.035591402801260504, "grad_norm": 1.0939970475717622, "learning_rate": 1.7795467254366633e-06, "loss": 0.0086, "step": 5410 }, { "epoch": 0.035657190976493884, "grad_norm": 0.7059642699051103, "learning_rate": 1.782836090918062e-06, "loss": 0.0084, "step": 5420 }, { "epoch": 0.03572297915172727, "grad_norm": 0.5671456408520574, "learning_rate": 1.7861254563994607e-06, "loss": 0.0096, "step": 5430 }, { "epoch": 0.03578876732696065, "grad_norm": 0.5217980008088007, "learning_rate": 1.7894148218808593e-06, "loss": 0.0103, "step": 5440 }, { "epoch": 0.03585455550219403, "grad_norm": 0.48948686956828447, "learning_rate": 1.792704187362258e-06, "loss": 0.009, "step": 5450 }, { "epoch": 0.03592034367742742, "grad_norm": 0.5600680454912037, "learning_rate": 1.7959935528436568e-06, "loss": 0.0096, "step": 5460 }, { "epoch": 0.0359861318526608, "grad_norm": 0.6347941953803226, "learning_rate": 1.7992829183250552e-06, "loss": 0.0119, "step": 5470 }, { "epoch": 0.03605192002789419, "grad_norm": 0.847702760884, "learning_rate": 1.8025722838064538e-06, "loss": 0.0083, "step": 5480 }, { "epoch": 0.03611770820312757, "grad_norm": 0.30077539418484334, "learning_rate": 1.8058616492878526e-06, "loss": 0.01, "step": 5490 }, { "epoch": 0.036183496378360956, "grad_norm": 0.221803842984932, "learning_rate": 1.8091510147692511e-06, "loss": 0.0111, "step": 5500 }, { "epoch": 0.03624928455359434, "grad_norm": 0.3767435845763983, "learning_rate": 1.81244038025065e-06, "loss": 0.0072, "step": 5510 }, { "epoch": 0.03631507272882772, "grad_norm": 0.425455384974288, "learning_rate": 1.8157297457320483e-06, "loss": 0.0091, "step": 5520 }, { "epoch": 0.036380860904061105, "grad_norm": 0.42249452832320533, "learning_rate": 1.819019111213447e-06, "loss": 0.0083, "step": 5530 }, { "epoch": 0.036446649079294485, "grad_norm": 0.4790301778942844, "learning_rate": 1.8223084766948456e-06, "loss": 0.0074, "step": 5540 }, { "epoch": 0.03651243725452787, "grad_norm": 0.6206195989053784, "learning_rate": 1.8255978421762444e-06, "loss": 0.0118, "step": 5550 }, { "epoch": 0.03657822542976125, "grad_norm": 0.422614370641915, "learning_rate": 1.828887207657643e-06, "loss": 0.0083, "step": 5560 }, { "epoch": 0.03664401360499464, "grad_norm": 0.7311545498285957, "learning_rate": 1.8321765731390418e-06, "loss": 0.0102, "step": 5570 }, { "epoch": 0.03670980178022802, "grad_norm": 0.6534696216685639, "learning_rate": 1.8354659386204401e-06, "loss": 0.0094, "step": 5580 }, { "epoch": 0.03677558995546141, "grad_norm": 0.5162997494694185, "learning_rate": 1.838755304101839e-06, "loss": 0.0079, "step": 5590 }, { "epoch": 0.03684137813069479, "grad_norm": 0.8096008904603942, "learning_rate": 1.8420446695832375e-06, "loss": 0.0121, "step": 5600 }, { "epoch": 0.03690716630592817, "grad_norm": 0.552539944773605, "learning_rate": 1.8453340350646363e-06, "loss": 0.009, "step": 5610 }, { "epoch": 0.03697295448116156, "grad_norm": 0.6021901496132274, "learning_rate": 1.8486234005460349e-06, "loss": 0.0181, "step": 5620 }, { "epoch": 0.03703874265639494, "grad_norm": 0.4658307537752647, "learning_rate": 1.8519127660274336e-06, "loss": 0.0079, "step": 5630 }, { "epoch": 0.037104530831628325, "grad_norm": 0.4784822040474796, "learning_rate": 1.855202131508832e-06, "loss": 0.011, "step": 5640 }, { "epoch": 0.037170319006861706, "grad_norm": 0.4068290071779964, "learning_rate": 1.8584914969902306e-06, "loss": 0.0083, "step": 5650 }, { "epoch": 0.03723610718209509, "grad_norm": 0.7188755997134064, "learning_rate": 1.8617808624716294e-06, "loss": 0.0102, "step": 5660 }, { "epoch": 0.037301895357328474, "grad_norm": 0.2907198880988216, "learning_rate": 1.8650702279530281e-06, "loss": 0.0081, "step": 5670 }, { "epoch": 0.037367683532561854, "grad_norm": 0.4881546147524782, "learning_rate": 1.8683595934344267e-06, "loss": 0.0102, "step": 5680 }, { "epoch": 0.03743347170779524, "grad_norm": 0.4561142032344082, "learning_rate": 1.871648958915825e-06, "loss": 0.0119, "step": 5690 }, { "epoch": 0.03749925988302862, "grad_norm": 0.10237931902560879, "learning_rate": 1.8749383243972239e-06, "loss": 0.0085, "step": 5700 }, { "epoch": 0.03756504805826201, "grad_norm": 0.42955867565297856, "learning_rate": 1.8782276898786224e-06, "loss": 0.0092, "step": 5710 }, { "epoch": 0.03763083623349539, "grad_norm": 0.6673726991720239, "learning_rate": 1.8815170553600212e-06, "loss": 0.0091, "step": 5720 }, { "epoch": 0.03769662440872878, "grad_norm": 0.3398370986292832, "learning_rate": 1.8848064208414198e-06, "loss": 0.0103, "step": 5730 }, { "epoch": 0.03776241258396216, "grad_norm": 0.3617131790353214, "learning_rate": 1.8880957863228186e-06, "loss": 0.0072, "step": 5740 }, { "epoch": 0.03782820075919554, "grad_norm": 0.4243375923617376, "learning_rate": 1.891385151804217e-06, "loss": 0.0061, "step": 5750 }, { "epoch": 0.03789398893442893, "grad_norm": 0.42277789207790517, "learning_rate": 1.8946745172856157e-06, "loss": 0.0096, "step": 5760 }, { "epoch": 0.03795977710966231, "grad_norm": 0.5773816508628471, "learning_rate": 1.8979638827670143e-06, "loss": 0.0085, "step": 5770 }, { "epoch": 0.038025565284895695, "grad_norm": 0.5176354375699946, "learning_rate": 1.901253248248413e-06, "loss": 0.0105, "step": 5780 }, { "epoch": 0.038091353460129075, "grad_norm": 0.40434626756530306, "learning_rate": 1.9045426137298117e-06, "loss": 0.0141, "step": 5790 }, { "epoch": 0.03815714163536246, "grad_norm": 0.6373883943822699, "learning_rate": 1.9078319792112105e-06, "loss": 0.0076, "step": 5800 }, { "epoch": 0.03822292981059584, "grad_norm": 0.6014559394963681, "learning_rate": 1.911121344692609e-06, "loss": 0.0082, "step": 5810 }, { "epoch": 0.038288717985829224, "grad_norm": 0.12591953565477332, "learning_rate": 1.9144107101740076e-06, "loss": 0.0054, "step": 5820 }, { "epoch": 0.03835450616106261, "grad_norm": 0.3901372692599097, "learning_rate": 1.9177000756554064e-06, "loss": 0.0086, "step": 5830 }, { "epoch": 0.03842029433629599, "grad_norm": 0.5299888589839744, "learning_rate": 1.9209894411368047e-06, "loss": 0.0068, "step": 5840 }, { "epoch": 0.03848608251152938, "grad_norm": 0.3731497586502807, "learning_rate": 1.9242788066182035e-06, "loss": 0.0106, "step": 5850 }, { "epoch": 0.03855187068676276, "grad_norm": 0.5597913894421418, "learning_rate": 1.927568172099602e-06, "loss": 0.0077, "step": 5860 }, { "epoch": 0.03861765886199615, "grad_norm": 0.5270435109532318, "learning_rate": 1.9308575375810007e-06, "loss": 0.0063, "step": 5870 }, { "epoch": 0.03868344703722953, "grad_norm": 0.408157667882442, "learning_rate": 1.9341469030623995e-06, "loss": 0.0082, "step": 5880 }, { "epoch": 0.038749235212462915, "grad_norm": 0.3029653394264043, "learning_rate": 1.9374362685437983e-06, "loss": 0.0068, "step": 5890 }, { "epoch": 0.038815023387696296, "grad_norm": 0.5172646185719219, "learning_rate": 1.9407256340251966e-06, "loss": 0.0072, "step": 5900 }, { "epoch": 0.038880811562929676, "grad_norm": 0.5089772983807782, "learning_rate": 1.9440149995065954e-06, "loss": 0.0098, "step": 5910 }, { "epoch": 0.038946599738163064, "grad_norm": 1.3137226796663664, "learning_rate": 1.9473043649879938e-06, "loss": 0.0132, "step": 5920 }, { "epoch": 0.039012387913396444, "grad_norm": 0.4151083646471681, "learning_rate": 1.9505937304693925e-06, "loss": 0.01, "step": 5930 }, { "epoch": 0.03907817608862983, "grad_norm": 0.2906839746498856, "learning_rate": 1.9538830959507913e-06, "loss": 0.0098, "step": 5940 }, { "epoch": 0.03914396426386321, "grad_norm": 0.29298255244226784, "learning_rate": 1.95717246143219e-06, "loss": 0.0122, "step": 5950 }, { "epoch": 0.0392097524390966, "grad_norm": 0.5082228879565492, "learning_rate": 1.9604618269135885e-06, "loss": 0.0094, "step": 5960 }, { "epoch": 0.03927554061432998, "grad_norm": 0.8882184371720874, "learning_rate": 1.9637511923949873e-06, "loss": 0.0091, "step": 5970 }, { "epoch": 0.03934132878956336, "grad_norm": 0.4418775130153138, "learning_rate": 1.9670405578763856e-06, "loss": 0.0084, "step": 5980 }, { "epoch": 0.03940711696479675, "grad_norm": 0.37669716172933243, "learning_rate": 1.9703299233577844e-06, "loss": 0.0101, "step": 5990 }, { "epoch": 0.03947290514003013, "grad_norm": 0.19012115909786528, "learning_rate": 1.973619288839183e-06, "loss": 0.0075, "step": 6000 }, { "epoch": 0.039538693315263516, "grad_norm": 0.8549666800267067, "learning_rate": 1.9769086543205816e-06, "loss": 0.0102, "step": 6010 }, { "epoch": 0.0396044814904969, "grad_norm": 0.37557758135958924, "learning_rate": 1.9801980198019803e-06, "loss": 0.0169, "step": 6020 }, { "epoch": 0.039670269665730284, "grad_norm": 0.6161115936106505, "learning_rate": 1.9834873852833787e-06, "loss": 0.0081, "step": 6030 }, { "epoch": 0.039736057840963665, "grad_norm": 0.5631082406963356, "learning_rate": 1.9867767507647775e-06, "loss": 0.0074, "step": 6040 }, { "epoch": 0.039801846016197046, "grad_norm": 0.5120180401824975, "learning_rate": 1.9900661162461763e-06, "loss": 0.0102, "step": 6050 }, { "epoch": 0.03986763419143043, "grad_norm": 0.8674406645254145, "learning_rate": 1.993355481727575e-06, "loss": 0.007, "step": 6060 }, { "epoch": 0.039933422366663814, "grad_norm": 0.38208844040641765, "learning_rate": 1.9966448472089734e-06, "loss": 0.0083, "step": 6070 }, { "epoch": 0.0399992105418972, "grad_norm": 0.8219391321568514, "learning_rate": 1.9999342126903722e-06, "loss": 0.0109, "step": 6080 }, { "epoch": 0.04006499871713058, "grad_norm": 0.4689975220352228, "learning_rate": 2.0032235781717706e-06, "loss": 0.0084, "step": 6090 }, { "epoch": 0.04013078689236397, "grad_norm": 0.4970480715387389, "learning_rate": 2.0065129436531694e-06, "loss": 0.0093, "step": 6100 }, { "epoch": 0.04019657506759735, "grad_norm": 1.0561783611316524, "learning_rate": 2.009802309134568e-06, "loss": 0.0075, "step": 6110 }, { "epoch": 0.04026236324283073, "grad_norm": 0.5049091293243226, "learning_rate": 2.013091674615967e-06, "loss": 0.0087, "step": 6120 }, { "epoch": 0.04032815141806412, "grad_norm": 0.7668041880459384, "learning_rate": 2.0163810400973653e-06, "loss": 0.0082, "step": 6130 }, { "epoch": 0.0403939395932975, "grad_norm": 0.3177419164928047, "learning_rate": 2.019670405578764e-06, "loss": 0.0081, "step": 6140 }, { "epoch": 0.040459727768530886, "grad_norm": 0.2850451448965283, "learning_rate": 2.0229597710601624e-06, "loss": 0.0097, "step": 6150 }, { "epoch": 0.040525515943764266, "grad_norm": 0.7287973231643202, "learning_rate": 2.0262491365415612e-06, "loss": 0.0081, "step": 6160 }, { "epoch": 0.040591304118997654, "grad_norm": 0.4444440890362873, "learning_rate": 2.02953850202296e-06, "loss": 0.0107, "step": 6170 }, { "epoch": 0.040657092294231034, "grad_norm": 0.643500686185788, "learning_rate": 2.032827867504359e-06, "loss": 0.0153, "step": 6180 }, { "epoch": 0.04072288046946442, "grad_norm": 0.2675399295797833, "learning_rate": 2.036117232985757e-06, "loss": 0.0101, "step": 6190 }, { "epoch": 0.0407886686446978, "grad_norm": 0.44450504293953874, "learning_rate": 2.0394065984671555e-06, "loss": 0.0119, "step": 6200 }, { "epoch": 0.04085445681993118, "grad_norm": 0.6219801572711213, "learning_rate": 2.0426959639485543e-06, "loss": 0.009, "step": 6210 }, { "epoch": 0.04092024499516457, "grad_norm": 0.6295604587933871, "learning_rate": 2.045985329429953e-06, "loss": 0.0088, "step": 6220 }, { "epoch": 0.04098603317039795, "grad_norm": 0.6765395118980536, "learning_rate": 2.049274694911352e-06, "loss": 0.0107, "step": 6230 }, { "epoch": 0.04105182134563134, "grad_norm": 0.376216168944815, "learning_rate": 2.0525640603927507e-06, "loss": 0.0098, "step": 6240 }, { "epoch": 0.04111760952086472, "grad_norm": 0.6997603505404134, "learning_rate": 2.055853425874149e-06, "loss": 0.0132, "step": 6250 }, { "epoch": 0.041183397696098106, "grad_norm": 0.41110270371272767, "learning_rate": 2.0591427913555474e-06, "loss": 0.0085, "step": 6260 }, { "epoch": 0.04124918587133149, "grad_norm": 0.7460377244432189, "learning_rate": 2.062432156836946e-06, "loss": 0.0095, "step": 6270 }, { "epoch": 0.04131497404656487, "grad_norm": 0.5633924553804867, "learning_rate": 2.065721522318345e-06, "loss": 0.0062, "step": 6280 }, { "epoch": 0.041380762221798255, "grad_norm": 0.40124405077876457, "learning_rate": 2.0690108877997437e-06, "loss": 0.0078, "step": 6290 }, { "epoch": 0.041446550397031635, "grad_norm": 0.5389525052465851, "learning_rate": 2.072300253281142e-06, "loss": 0.0066, "step": 6300 }, { "epoch": 0.04151233857226502, "grad_norm": 0.4852513929662061, "learning_rate": 2.075589618762541e-06, "loss": 0.0084, "step": 6310 }, { "epoch": 0.0415781267474984, "grad_norm": 0.7136842605931931, "learning_rate": 2.0788789842439393e-06, "loss": 0.0119, "step": 6320 }, { "epoch": 0.04164391492273179, "grad_norm": 0.22641907533635763, "learning_rate": 2.082168349725338e-06, "loss": 0.0075, "step": 6330 }, { "epoch": 0.04170970309796517, "grad_norm": 0.3410705322310364, "learning_rate": 2.085457715206737e-06, "loss": 0.007, "step": 6340 }, { "epoch": 0.04177549127319855, "grad_norm": 0.1765866115290946, "learning_rate": 2.0887470806881356e-06, "loss": 0.0089, "step": 6350 }, { "epoch": 0.04184127944843194, "grad_norm": 0.36296412209772483, "learning_rate": 2.092036446169534e-06, "loss": 0.008, "step": 6360 }, { "epoch": 0.04190706762366532, "grad_norm": 0.4689123631650761, "learning_rate": 2.0953258116509328e-06, "loss": 0.007, "step": 6370 }, { "epoch": 0.04197285579889871, "grad_norm": 0.5191997041447405, "learning_rate": 2.098615177132331e-06, "loss": 0.0094, "step": 6380 }, { "epoch": 0.04203864397413209, "grad_norm": 0.5721465663226758, "learning_rate": 2.10190454261373e-06, "loss": 0.0093, "step": 6390 }, { "epoch": 0.042104432149365476, "grad_norm": 0.48342136834073124, "learning_rate": 2.1051939080951287e-06, "loss": 0.009, "step": 6400 }, { "epoch": 0.042170220324598856, "grad_norm": 0.4172875496007264, "learning_rate": 2.1084832735765275e-06, "loss": 0.0089, "step": 6410 }, { "epoch": 0.042236008499832244, "grad_norm": 0.5797248931554864, "learning_rate": 2.111772639057926e-06, "loss": 0.0075, "step": 6420 }, { "epoch": 0.042301796675065624, "grad_norm": 0.5313019085342544, "learning_rate": 2.1150620045393246e-06, "loss": 0.0089, "step": 6430 }, { "epoch": 0.042367584850299005, "grad_norm": 0.5394533926114436, "learning_rate": 2.118351370020723e-06, "loss": 0.0066, "step": 6440 }, { "epoch": 0.04243337302553239, "grad_norm": 0.41011533698919056, "learning_rate": 2.1216407355021218e-06, "loss": 0.0099, "step": 6450 }, { "epoch": 0.04249916120076577, "grad_norm": 0.5165556831967486, "learning_rate": 2.1249301009835206e-06, "loss": 0.0064, "step": 6460 }, { "epoch": 0.04256494937599916, "grad_norm": 0.8341139973121945, "learning_rate": 2.1282194664649193e-06, "loss": 0.0074, "step": 6470 }, { "epoch": 0.04263073755123254, "grad_norm": 0.9462758796833919, "learning_rate": 2.1315088319463177e-06, "loss": 0.0104, "step": 6480 }, { "epoch": 0.04269652572646593, "grad_norm": 0.9420173449440414, "learning_rate": 2.134798197427716e-06, "loss": 0.0117, "step": 6490 }, { "epoch": 0.04276231390169931, "grad_norm": 0.8750262002637963, "learning_rate": 2.138087562909115e-06, "loss": 0.0089, "step": 6500 }, { "epoch": 0.04282810207693269, "grad_norm": 0.4452101303910222, "learning_rate": 2.1413769283905136e-06, "loss": 0.0077, "step": 6510 }, { "epoch": 0.04289389025216608, "grad_norm": 0.25727264720678855, "learning_rate": 2.1446662938719124e-06, "loss": 0.0108, "step": 6520 }, { "epoch": 0.04295967842739946, "grad_norm": 0.4382350022813877, "learning_rate": 2.147955659353311e-06, "loss": 0.0061, "step": 6530 }, { "epoch": 0.043025466602632845, "grad_norm": 0.29465872639634766, "learning_rate": 2.1512450248347096e-06, "loss": 0.0066, "step": 6540 }, { "epoch": 0.043091254777866225, "grad_norm": 0.7102076068834271, "learning_rate": 2.154534390316108e-06, "loss": 0.0102, "step": 6550 }, { "epoch": 0.04315704295309961, "grad_norm": 0.5954109383732726, "learning_rate": 2.1578237557975067e-06, "loss": 0.0072, "step": 6560 }, { "epoch": 0.04322283112833299, "grad_norm": 0.329801428341822, "learning_rate": 2.1611131212789055e-06, "loss": 0.0094, "step": 6570 }, { "epoch": 0.043288619303566374, "grad_norm": 0.6337787988314658, "learning_rate": 2.1644024867603043e-06, "loss": 0.0081, "step": 6580 }, { "epoch": 0.04335440747879976, "grad_norm": 0.2966920928177154, "learning_rate": 2.167691852241703e-06, "loss": 0.0084, "step": 6590 }, { "epoch": 0.04342019565403314, "grad_norm": 0.6758829299403186, "learning_rate": 2.1709812177231014e-06, "loss": 0.0072, "step": 6600 }, { "epoch": 0.04348598382926653, "grad_norm": 0.6074876697775901, "learning_rate": 2.1742705832045e-06, "loss": 0.0087, "step": 6610 }, { "epoch": 0.04355177200449991, "grad_norm": 0.2332271194454649, "learning_rate": 2.1775599486858986e-06, "loss": 0.0066, "step": 6620 }, { "epoch": 0.0436175601797333, "grad_norm": 0.4185230620416811, "learning_rate": 2.1808493141672974e-06, "loss": 0.0093, "step": 6630 }, { "epoch": 0.04368334835496668, "grad_norm": 0.5870218618885736, "learning_rate": 2.184138679648696e-06, "loss": 0.0095, "step": 6640 }, { "epoch": 0.04374913653020006, "grad_norm": 0.5616581416837575, "learning_rate": 2.1874280451300945e-06, "loss": 0.0065, "step": 6650 }, { "epoch": 0.043814924705433446, "grad_norm": 0.2980885155504129, "learning_rate": 2.1907174106114933e-06, "loss": 0.0068, "step": 6660 }, { "epoch": 0.043880712880666826, "grad_norm": 0.3448991264439708, "learning_rate": 2.1940067760928917e-06, "loss": 0.0052, "step": 6670 }, { "epoch": 0.043946501055900214, "grad_norm": 0.619225248556574, "learning_rate": 2.1972961415742904e-06, "loss": 0.0076, "step": 6680 }, { "epoch": 0.044012289231133594, "grad_norm": 0.31641147484626914, "learning_rate": 2.2005855070556892e-06, "loss": 0.009, "step": 6690 }, { "epoch": 0.04407807740636698, "grad_norm": 0.45529351543922897, "learning_rate": 2.203874872537088e-06, "loss": 0.0074, "step": 6700 }, { "epoch": 0.04414386558160036, "grad_norm": 0.7557466069254448, "learning_rate": 2.2071642380184864e-06, "loss": 0.0068, "step": 6710 }, { "epoch": 0.04420965375683375, "grad_norm": 0.531336193505623, "learning_rate": 2.210453603499885e-06, "loss": 0.007, "step": 6720 }, { "epoch": 0.04427544193206713, "grad_norm": 0.4717132782012947, "learning_rate": 2.2137429689812835e-06, "loss": 0.0052, "step": 6730 }, { "epoch": 0.04434123010730051, "grad_norm": 0.5704128918561894, "learning_rate": 2.2170323344626823e-06, "loss": 0.0118, "step": 6740 }, { "epoch": 0.0444070182825339, "grad_norm": 0.47916049544584494, "learning_rate": 2.220321699944081e-06, "loss": 0.0098, "step": 6750 }, { "epoch": 0.04447280645776728, "grad_norm": 0.674328343193046, "learning_rate": 2.22361106542548e-06, "loss": 0.009, "step": 6760 }, { "epoch": 0.04453859463300067, "grad_norm": 0.3790922831594942, "learning_rate": 2.2269004309068782e-06, "loss": 0.0067, "step": 6770 }, { "epoch": 0.04460438280823405, "grad_norm": 0.23284070242416813, "learning_rate": 2.2301897963882766e-06, "loss": 0.0087, "step": 6780 }, { "epoch": 0.044670170983467435, "grad_norm": 0.272550145221225, "learning_rate": 2.2334791618696754e-06, "loss": 0.0055, "step": 6790 }, { "epoch": 0.044735959158700815, "grad_norm": 0.4793447496628956, "learning_rate": 2.236768527351074e-06, "loss": 0.0109, "step": 6800 }, { "epoch": 0.044801747333934196, "grad_norm": 0.1966444189363045, "learning_rate": 2.240057892832473e-06, "loss": 0.012, "step": 6810 }, { "epoch": 0.04486753550916758, "grad_norm": 1.4271609355978359, "learning_rate": 2.2433472583138718e-06, "loss": 0.0092, "step": 6820 }, { "epoch": 0.044933323684400964, "grad_norm": 0.5888289302286018, "learning_rate": 2.24663662379527e-06, "loss": 0.0071, "step": 6830 }, { "epoch": 0.04499911185963435, "grad_norm": 0.32674113043940844, "learning_rate": 2.2499259892766685e-06, "loss": 0.0081, "step": 6840 }, { "epoch": 0.04506490003486773, "grad_norm": 0.37947295466627295, "learning_rate": 2.2532153547580673e-06, "loss": 0.0078, "step": 6850 }, { "epoch": 0.04513068821010112, "grad_norm": 0.9316764773756412, "learning_rate": 2.256504720239466e-06, "loss": 0.0092, "step": 6860 }, { "epoch": 0.0451964763853345, "grad_norm": 0.5385399028301133, "learning_rate": 2.259794085720865e-06, "loss": 0.0088, "step": 6870 }, { "epoch": 0.04526226456056788, "grad_norm": 0.1787653902612379, "learning_rate": 2.263083451202263e-06, "loss": 0.0103, "step": 6880 }, { "epoch": 0.04532805273580127, "grad_norm": 0.5621147879055581, "learning_rate": 2.266372816683662e-06, "loss": 0.0086, "step": 6890 }, { "epoch": 0.04539384091103465, "grad_norm": 0.5659412733016899, "learning_rate": 2.2696621821650603e-06, "loss": 0.0087, "step": 6900 }, { "epoch": 0.045459629086268036, "grad_norm": 0.5604321086656358, "learning_rate": 2.272951547646459e-06, "loss": 0.0089, "step": 6910 }, { "epoch": 0.045525417261501416, "grad_norm": 0.6341310303825353, "learning_rate": 2.276240913127858e-06, "loss": 0.0076, "step": 6920 }, { "epoch": 0.045591205436734804, "grad_norm": 0.42000839576119764, "learning_rate": 2.2795302786092567e-06, "loss": 0.0074, "step": 6930 }, { "epoch": 0.045656993611968184, "grad_norm": 0.45407486713716333, "learning_rate": 2.282819644090655e-06, "loss": 0.0087, "step": 6940 }, { "epoch": 0.04572278178720157, "grad_norm": 0.34203299979115764, "learning_rate": 2.286109009572054e-06, "loss": 0.0062, "step": 6950 }, { "epoch": 0.04578856996243495, "grad_norm": 0.6185534948650747, "learning_rate": 2.289398375053452e-06, "loss": 0.0089, "step": 6960 }, { "epoch": 0.04585435813766833, "grad_norm": 0.4369343976794147, "learning_rate": 2.292687740534851e-06, "loss": 0.0071, "step": 6970 }, { "epoch": 0.04592014631290172, "grad_norm": 0.4053973822458296, "learning_rate": 2.2959771060162498e-06, "loss": 0.0085, "step": 6980 }, { "epoch": 0.0459859344881351, "grad_norm": 0.420515959364827, "learning_rate": 2.2992664714976486e-06, "loss": 0.0084, "step": 6990 }, { "epoch": 0.04605172266336849, "grad_norm": 0.2576039009390562, "learning_rate": 2.302555836979047e-06, "loss": 0.0132, "step": 7000 }, { "epoch": 0.04611751083860187, "grad_norm": 0.16069099768783907, "learning_rate": 2.3058452024604457e-06, "loss": 0.0043, "step": 7010 }, { "epoch": 0.046183299013835256, "grad_norm": 0.1745326553679035, "learning_rate": 2.309134567941844e-06, "loss": 0.0099, "step": 7020 }, { "epoch": 0.04624908718906864, "grad_norm": 0.6142672761914828, "learning_rate": 2.312423933423243e-06, "loss": 0.009, "step": 7030 }, { "epoch": 0.04631487536430202, "grad_norm": 0.6786050045239455, "learning_rate": 2.3157132989046416e-06, "loss": 0.0097, "step": 7040 }, { "epoch": 0.046380663539535405, "grad_norm": 0.343366368100572, "learning_rate": 2.31900266438604e-06, "loss": 0.0101, "step": 7050 }, { "epoch": 0.046446451714768786, "grad_norm": 0.49743037459409994, "learning_rate": 2.322292029867439e-06, "loss": 0.0089, "step": 7060 }, { "epoch": 0.04651223989000217, "grad_norm": 0.4530274115433754, "learning_rate": 2.3255813953488376e-06, "loss": 0.0085, "step": 7070 }, { "epoch": 0.046578028065235554, "grad_norm": 0.5040197849316636, "learning_rate": 2.328870760830236e-06, "loss": 0.0158, "step": 7080 }, { "epoch": 0.04664381624046894, "grad_norm": 0.18919240785506222, "learning_rate": 2.3321601263116347e-06, "loss": 0.0071, "step": 7090 }, { "epoch": 0.04670960441570232, "grad_norm": 0.35423284780747627, "learning_rate": 2.3354494917930335e-06, "loss": 0.0081, "step": 7100 }, { "epoch": 0.0467753925909357, "grad_norm": 0.626611128306087, "learning_rate": 2.338738857274432e-06, "loss": 0.0091, "step": 7110 }, { "epoch": 0.04684118076616909, "grad_norm": 0.4392711871046507, "learning_rate": 2.3420282227558307e-06, "loss": 0.0075, "step": 7120 }, { "epoch": 0.04690696894140247, "grad_norm": 0.4113643113879094, "learning_rate": 2.345317588237229e-06, "loss": 0.0062, "step": 7130 }, { "epoch": 0.04697275711663586, "grad_norm": 0.39084714216467475, "learning_rate": 2.348606953718628e-06, "loss": 0.009, "step": 7140 }, { "epoch": 0.04703854529186924, "grad_norm": 0.36693312426091795, "learning_rate": 2.3518963192000266e-06, "loss": 0.0099, "step": 7150 }, { "epoch": 0.047104333467102626, "grad_norm": 0.36403988579299285, "learning_rate": 2.3551856846814254e-06, "loss": 0.0081, "step": 7160 }, { "epoch": 0.047170121642336006, "grad_norm": 0.6208450371524141, "learning_rate": 2.3584750501628237e-06, "loss": 0.0076, "step": 7170 }, { "epoch": 0.04723590981756939, "grad_norm": 0.21394795103812034, "learning_rate": 2.3617644156442225e-06, "loss": 0.0041, "step": 7180 }, { "epoch": 0.047301697992802774, "grad_norm": 0.33165872977146604, "learning_rate": 2.365053781125621e-06, "loss": 0.0071, "step": 7190 }, { "epoch": 0.047367486168036155, "grad_norm": 0.5469710789387547, "learning_rate": 2.3683431466070197e-06, "loss": 0.0097, "step": 7200 }, { "epoch": 0.04743327434326954, "grad_norm": 0.26777606533831033, "learning_rate": 2.3716325120884185e-06, "loss": 0.0049, "step": 7210 }, { "epoch": 0.04749906251850292, "grad_norm": 0.36225645308753773, "learning_rate": 2.374921877569817e-06, "loss": 0.0124, "step": 7220 }, { "epoch": 0.04756485069373631, "grad_norm": 0.5124540125895669, "learning_rate": 2.3782112430512156e-06, "loss": 0.0106, "step": 7230 }, { "epoch": 0.04763063886896969, "grad_norm": 0.6073143192020714, "learning_rate": 2.3815006085326144e-06, "loss": 0.0077, "step": 7240 }, { "epoch": 0.04769642704420308, "grad_norm": 0.33679930083388565, "learning_rate": 2.3847899740140128e-06, "loss": 0.0081, "step": 7250 }, { "epoch": 0.04776221521943646, "grad_norm": 0.3480843050499487, "learning_rate": 2.3880793394954115e-06, "loss": 0.0085, "step": 7260 }, { "epoch": 0.04782800339466984, "grad_norm": 0.7222141475114722, "learning_rate": 2.3913687049768103e-06, "loss": 0.008, "step": 7270 }, { "epoch": 0.04789379156990323, "grad_norm": 0.2115062195964283, "learning_rate": 2.3946580704582087e-06, "loss": 0.0075, "step": 7280 }, { "epoch": 0.04795957974513661, "grad_norm": 0.07936585620309033, "learning_rate": 2.3979474359396075e-06, "loss": 0.0077, "step": 7290 }, { "epoch": 0.048025367920369995, "grad_norm": 0.4606197196977585, "learning_rate": 2.4012368014210063e-06, "loss": 0.0071, "step": 7300 }, { "epoch": 0.048091156095603375, "grad_norm": 0.25168837603888145, "learning_rate": 2.4045261669024046e-06, "loss": 0.0093, "step": 7310 }, { "epoch": 0.04815694427083676, "grad_norm": 0.6126046738147267, "learning_rate": 2.4078155323838034e-06, "loss": 0.0102, "step": 7320 }, { "epoch": 0.04822273244607014, "grad_norm": 0.6338550764388059, "learning_rate": 2.411104897865202e-06, "loss": 0.0073, "step": 7330 }, { "epoch": 0.048288520621303524, "grad_norm": 0.2254098009260271, "learning_rate": 2.4143942633466005e-06, "loss": 0.0078, "step": 7340 }, { "epoch": 0.04835430879653691, "grad_norm": 0.5683212601134247, "learning_rate": 2.4176836288279993e-06, "loss": 0.0093, "step": 7350 }, { "epoch": 0.04842009697177029, "grad_norm": 0.516786436367293, "learning_rate": 2.420972994309398e-06, "loss": 0.0071, "step": 7360 }, { "epoch": 0.04848588514700368, "grad_norm": 0.3259316184558756, "learning_rate": 2.4242623597907965e-06, "loss": 0.0088, "step": 7370 }, { "epoch": 0.04855167332223706, "grad_norm": 0.5597202059510148, "learning_rate": 2.4275517252721953e-06, "loss": 0.0087, "step": 7380 }, { "epoch": 0.04861746149747045, "grad_norm": 0.39797299137559083, "learning_rate": 2.4308410907535936e-06, "loss": 0.0082, "step": 7390 }, { "epoch": 0.04868324967270383, "grad_norm": 0.2567541448995406, "learning_rate": 2.4341304562349924e-06, "loss": 0.0077, "step": 7400 }, { "epoch": 0.04874903784793721, "grad_norm": 0.40013690228790855, "learning_rate": 2.437419821716391e-06, "loss": 0.0066, "step": 7410 }, { "epoch": 0.048814826023170596, "grad_norm": 0.24752022991993933, "learning_rate": 2.4407091871977896e-06, "loss": 0.0085, "step": 7420 }, { "epoch": 0.04888061419840398, "grad_norm": 0.3658047371841641, "learning_rate": 2.4439985526791883e-06, "loss": 0.0088, "step": 7430 }, { "epoch": 0.048946402373637364, "grad_norm": 0.4252181238396877, "learning_rate": 2.447287918160587e-06, "loss": 0.0083, "step": 7440 }, { "epoch": 0.049012190548870745, "grad_norm": 0.4047904082851924, "learning_rate": 2.4505772836419855e-06, "loss": 0.008, "step": 7450 }, { "epoch": 0.04907797872410413, "grad_norm": 0.4877434882395218, "learning_rate": 2.4538666491233843e-06, "loss": 0.0068, "step": 7460 }, { "epoch": 0.04914376689933751, "grad_norm": 0.8060908498128815, "learning_rate": 2.457156014604783e-06, "loss": 0.0113, "step": 7470 }, { "epoch": 0.04920955507457089, "grad_norm": 0.5634590368895626, "learning_rate": 2.4604453800861814e-06, "loss": 0.0138, "step": 7480 }, { "epoch": 0.04927534324980428, "grad_norm": 0.3505160611530522, "learning_rate": 2.4637347455675802e-06, "loss": 0.0075, "step": 7490 }, { "epoch": 0.04934113142503766, "grad_norm": 0.4855459993162811, "learning_rate": 2.467024111048979e-06, "loss": 0.0084, "step": 7500 }, { "epoch": 0.04940691960027105, "grad_norm": 0.6028430813494059, "learning_rate": 2.4703134765303774e-06, "loss": 0.0114, "step": 7510 }, { "epoch": 0.04947270777550443, "grad_norm": 0.34225273209357826, "learning_rate": 2.473602842011776e-06, "loss": 0.0106, "step": 7520 }, { "epoch": 0.04953849595073782, "grad_norm": 0.3055657041734806, "learning_rate": 2.476892207493175e-06, "loss": 0.0074, "step": 7530 }, { "epoch": 0.0496042841259712, "grad_norm": 0.44660839901300675, "learning_rate": 2.4801815729745733e-06, "loss": 0.0063, "step": 7540 }, { "epoch": 0.049670072301204585, "grad_norm": 0.2987540322955873, "learning_rate": 2.483470938455972e-06, "loss": 0.0086, "step": 7550 }, { "epoch": 0.049735860476437965, "grad_norm": 0.46513894131092565, "learning_rate": 2.4867603039373704e-06, "loss": 0.0087, "step": 7560 }, { "epoch": 0.049801648651671346, "grad_norm": 0.37485616786698683, "learning_rate": 2.4900496694187692e-06, "loss": 0.0087, "step": 7570 }, { "epoch": 0.04986743682690473, "grad_norm": 0.546239013961458, "learning_rate": 2.493339034900168e-06, "loss": 0.007, "step": 7580 }, { "epoch": 0.049933225002138114, "grad_norm": 0.2294186263251124, "learning_rate": 2.496628400381567e-06, "loss": 0.0085, "step": 7590 }, { "epoch": 0.0499990131773715, "grad_norm": 1.046654560968226, "learning_rate": 2.499917765862965e-06, "loss": 0.011, "step": 7600 }, { "epoch": 0.05006480135260488, "grad_norm": 0.33002171044862966, "learning_rate": 2.503207131344364e-06, "loss": 0.0104, "step": 7610 }, { "epoch": 0.05013058952783827, "grad_norm": 0.4659608706687365, "learning_rate": 2.5064964968257627e-06, "loss": 0.0106, "step": 7620 }, { "epoch": 0.05019637770307165, "grad_norm": 0.31185382927948174, "learning_rate": 2.5097858623071615e-06, "loss": 0.0072, "step": 7630 }, { "epoch": 0.05026216587830503, "grad_norm": 0.8927085734507109, "learning_rate": 2.51307522778856e-06, "loss": 0.0093, "step": 7640 }, { "epoch": 0.05032795405353842, "grad_norm": 0.186882525175891, "learning_rate": 2.5163645932699582e-06, "loss": 0.0089, "step": 7650 }, { "epoch": 0.0503937422287718, "grad_norm": 0.5402961683817805, "learning_rate": 2.519653958751357e-06, "loss": 0.0107, "step": 7660 }, { "epoch": 0.050459530404005186, "grad_norm": 0.2664147428886024, "learning_rate": 2.5229433242327554e-06, "loss": 0.0064, "step": 7670 }, { "epoch": 0.050525318579238566, "grad_norm": 0.26262675292226495, "learning_rate": 2.526232689714154e-06, "loss": 0.0078, "step": 7680 }, { "epoch": 0.050591106754471954, "grad_norm": 0.3829613425212357, "learning_rate": 2.529522055195553e-06, "loss": 0.0061, "step": 7690 }, { "epoch": 0.050656894929705334, "grad_norm": 0.5085815474387773, "learning_rate": 2.5328114206769517e-06, "loss": 0.0073, "step": 7700 }, { "epoch": 0.050722683104938715, "grad_norm": 0.4433288771048355, "learning_rate": 2.53610078615835e-06, "loss": 0.0083, "step": 7710 }, { "epoch": 0.0507884712801721, "grad_norm": 0.6071763989019205, "learning_rate": 2.539390151639749e-06, "loss": 0.005, "step": 7720 }, { "epoch": 0.05085425945540548, "grad_norm": 0.4807482017871408, "learning_rate": 2.5426795171211477e-06, "loss": 0.008, "step": 7730 }, { "epoch": 0.05092004763063887, "grad_norm": 0.4802339277721866, "learning_rate": 2.5459688826025465e-06, "loss": 0.0079, "step": 7740 }, { "epoch": 0.05098583580587225, "grad_norm": 0.3314374632416448, "learning_rate": 2.5492582480839452e-06, "loss": 0.0075, "step": 7750 }, { "epoch": 0.05105162398110564, "grad_norm": 0.399602773530461, "learning_rate": 2.552547613565343e-06, "loss": 0.0079, "step": 7760 }, { "epoch": 0.05111741215633902, "grad_norm": 0.3449474764109721, "learning_rate": 2.555836979046742e-06, "loss": 0.0069, "step": 7770 }, { "epoch": 0.05118320033157241, "grad_norm": 0.5195495734003945, "learning_rate": 2.5591263445281408e-06, "loss": 0.0086, "step": 7780 }, { "epoch": 0.05124898850680579, "grad_norm": 0.7683907850811325, "learning_rate": 2.562415710009539e-06, "loss": 0.0066, "step": 7790 }, { "epoch": 0.05131477668203917, "grad_norm": 0.5115229791019176, "learning_rate": 2.565705075490938e-06, "loss": 0.0078, "step": 7800 }, { "epoch": 0.051380564857272555, "grad_norm": 0.36932544296235487, "learning_rate": 2.5689944409723367e-06, "loss": 0.0088, "step": 7810 }, { "epoch": 0.051446353032505936, "grad_norm": 0.453649449573728, "learning_rate": 2.5722838064537355e-06, "loss": 0.0081, "step": 7820 }, { "epoch": 0.05151214120773932, "grad_norm": 1.3372921732474699, "learning_rate": 2.575573171935134e-06, "loss": 0.0073, "step": 7830 }, { "epoch": 0.051577929382972704, "grad_norm": 0.5091926165205234, "learning_rate": 2.5788625374165326e-06, "loss": 0.0083, "step": 7840 }, { "epoch": 0.05164371755820609, "grad_norm": 0.8278877168406622, "learning_rate": 2.5821519028979314e-06, "loss": 0.0078, "step": 7850 }, { "epoch": 0.05170950573343947, "grad_norm": 0.5946042487511226, "learning_rate": 2.58544126837933e-06, "loss": 0.0075, "step": 7860 }, { "epoch": 0.05177529390867285, "grad_norm": 0.4219807338523664, "learning_rate": 2.588730633860728e-06, "loss": 0.0073, "step": 7870 }, { "epoch": 0.05184108208390624, "grad_norm": 0.5116858429440384, "learning_rate": 2.592019999342127e-06, "loss": 0.0078, "step": 7880 }, { "epoch": 0.05190687025913962, "grad_norm": 0.3284815331116716, "learning_rate": 2.5953093648235257e-06, "loss": 0.0071, "step": 7890 }, { "epoch": 0.05197265843437301, "grad_norm": 0.39160556322467593, "learning_rate": 2.598598730304924e-06, "loss": 0.006, "step": 7900 }, { "epoch": 0.05203844660960639, "grad_norm": 0.29213800388615163, "learning_rate": 2.601888095786323e-06, "loss": 0.0039, "step": 7910 }, { "epoch": 0.052104234784839776, "grad_norm": 0.42918592086836554, "learning_rate": 2.6051774612677216e-06, "loss": 0.0064, "step": 7920 }, { "epoch": 0.052170022960073156, "grad_norm": 0.4062423442165752, "learning_rate": 2.6084668267491204e-06, "loss": 0.0056, "step": 7930 }, { "epoch": 0.05223581113530654, "grad_norm": 0.3041053616621662, "learning_rate": 2.611756192230519e-06, "loss": 0.0064, "step": 7940 }, { "epoch": 0.052301599310539924, "grad_norm": 0.33214237033943317, "learning_rate": 2.6150455577119176e-06, "loss": 0.0071, "step": 7950 }, { "epoch": 0.052367387485773305, "grad_norm": 0.3395437373031286, "learning_rate": 2.6183349231933164e-06, "loss": 0.0078, "step": 7960 }, { "epoch": 0.05243317566100669, "grad_norm": 0.8310440495706154, "learning_rate": 2.621624288674715e-06, "loss": 0.0074, "step": 7970 }, { "epoch": 0.05249896383624007, "grad_norm": 0.49758701915718384, "learning_rate": 2.624913654156114e-06, "loss": 0.0074, "step": 7980 }, { "epoch": 0.05256475201147346, "grad_norm": 0.3484608574172409, "learning_rate": 2.628203019637512e-06, "loss": 0.0084, "step": 7990 }, { "epoch": 0.05263054018670684, "grad_norm": 0.28313668565036254, "learning_rate": 2.6314923851189107e-06, "loss": 0.0058, "step": 8000 }, { "epoch": 0.05269632836194022, "grad_norm": 0.34125139554636935, "learning_rate": 2.6347817506003094e-06, "loss": 0.0068, "step": 8010 }, { "epoch": 0.05276211653717361, "grad_norm": 0.21599203335257364, "learning_rate": 2.638071116081708e-06, "loss": 0.0077, "step": 8020 }, { "epoch": 0.05282790471240699, "grad_norm": 0.12436259197416315, "learning_rate": 2.6413604815631066e-06, "loss": 0.0056, "step": 8030 }, { "epoch": 0.05289369288764038, "grad_norm": 0.44643600144517775, "learning_rate": 2.6446498470445054e-06, "loss": 0.0063, "step": 8040 }, { "epoch": 0.05295948106287376, "grad_norm": 0.4159421130411129, "learning_rate": 2.647939212525904e-06, "loss": 0.0058, "step": 8050 }, { "epoch": 0.053025269238107145, "grad_norm": 0.1988573764278727, "learning_rate": 2.6512285780073025e-06, "loss": 0.0058, "step": 8060 }, { "epoch": 0.053091057413340526, "grad_norm": 0.6041244354651751, "learning_rate": 2.6545179434887013e-06, "loss": 0.0048, "step": 8070 }, { "epoch": 0.05315684558857391, "grad_norm": 0.617460654003287, "learning_rate": 2.6578073089701e-06, "loss": 0.0125, "step": 8080 }, { "epoch": 0.053222633763807294, "grad_norm": 0.1988645577240411, "learning_rate": 2.661096674451499e-06, "loss": 0.0071, "step": 8090 }, { "epoch": 0.053288421939040674, "grad_norm": 0.24493558822524947, "learning_rate": 2.664386039932897e-06, "loss": 0.0109, "step": 8100 }, { "epoch": 0.05335421011427406, "grad_norm": 0.9008301598501437, "learning_rate": 2.6676754054142956e-06, "loss": 0.008, "step": 8110 }, { "epoch": 0.05341999828950744, "grad_norm": 0.5791260102473599, "learning_rate": 2.6709647708956944e-06, "loss": 0.0092, "step": 8120 }, { "epoch": 0.05348578646474083, "grad_norm": 0.5884741064678607, "learning_rate": 2.674254136377093e-06, "loss": 0.0067, "step": 8130 }, { "epoch": 0.05355157463997421, "grad_norm": 0.4166081092548693, "learning_rate": 2.6775435018584915e-06, "loss": 0.0073, "step": 8140 }, { "epoch": 0.0536173628152076, "grad_norm": 0.26407054884118913, "learning_rate": 2.6808328673398903e-06, "loss": 0.007, "step": 8150 }, { "epoch": 0.05368315099044098, "grad_norm": 0.41510498659227707, "learning_rate": 2.684122232821289e-06, "loss": 0.0074, "step": 8160 }, { "epoch": 0.05374893916567436, "grad_norm": 0.23780311502129167, "learning_rate": 2.687411598302688e-06, "loss": 0.0053, "step": 8170 }, { "epoch": 0.053814727340907746, "grad_norm": 0.3944217528744171, "learning_rate": 2.6907009637840862e-06, "loss": 0.0053, "step": 8180 }, { "epoch": 0.05388051551614113, "grad_norm": 0.5118798585462019, "learning_rate": 2.693990329265485e-06, "loss": 0.0047, "step": 8190 }, { "epoch": 0.053946303691374514, "grad_norm": 0.3425066442861607, "learning_rate": 2.697279694746884e-06, "loss": 0.0069, "step": 8200 }, { "epoch": 0.054012091866607895, "grad_norm": 0.4739364465659819, "learning_rate": 2.7005690602282818e-06, "loss": 0.0077, "step": 8210 }, { "epoch": 0.05407788004184128, "grad_norm": 0.2458319655563834, "learning_rate": 2.7038584257096805e-06, "loss": 0.0065, "step": 8220 }, { "epoch": 0.05414366821707466, "grad_norm": 0.15859970092515363, "learning_rate": 2.7071477911910793e-06, "loss": 0.0071, "step": 8230 }, { "epoch": 0.05420945639230804, "grad_norm": 0.3157922125736338, "learning_rate": 2.710437156672478e-06, "loss": 0.0042, "step": 8240 }, { "epoch": 0.05427524456754143, "grad_norm": 0.4398455768375834, "learning_rate": 2.7137265221538765e-06, "loss": 0.0112, "step": 8250 }, { "epoch": 0.05434103274277481, "grad_norm": 0.37767454887028507, "learning_rate": 2.7170158876352753e-06, "loss": 0.0069, "step": 8260 }, { "epoch": 0.0544068209180082, "grad_norm": 0.4263765879354931, "learning_rate": 2.720305253116674e-06, "loss": 0.0082, "step": 8270 }, { "epoch": 0.05447260909324158, "grad_norm": 0.47883640471937455, "learning_rate": 2.723594618598073e-06, "loss": 0.0099, "step": 8280 }, { "epoch": 0.05453839726847497, "grad_norm": 0.18911780071441617, "learning_rate": 2.7268839840794716e-06, "loss": 0.0085, "step": 8290 }, { "epoch": 0.05460418544370835, "grad_norm": 0.23553786188253625, "learning_rate": 2.73017334956087e-06, "loss": 0.0047, "step": 8300 }, { "epoch": 0.054669973618941735, "grad_norm": 0.46442610890440367, "learning_rate": 2.7334627150422688e-06, "loss": 0.0061, "step": 8310 }, { "epoch": 0.054735761794175115, "grad_norm": 0.43596770972621424, "learning_rate": 2.7367520805236676e-06, "loss": 0.0071, "step": 8320 }, { "epoch": 0.054801549969408496, "grad_norm": 0.4627957445710947, "learning_rate": 2.7400414460050655e-06, "loss": 0.0054, "step": 8330 }, { "epoch": 0.05486733814464188, "grad_norm": 0.49212583994166453, "learning_rate": 2.7433308114864643e-06, "loss": 0.0074, "step": 8340 }, { "epoch": 0.054933126319875264, "grad_norm": 1.1495702088811859, "learning_rate": 2.746620176967863e-06, "loss": 0.009, "step": 8350 }, { "epoch": 0.05499891449510865, "grad_norm": 0.23578436794951835, "learning_rate": 2.749909542449262e-06, "loss": 0.0087, "step": 8360 }, { "epoch": 0.05506470267034203, "grad_norm": 0.33649381277065443, "learning_rate": 2.75319890793066e-06, "loss": 0.0057, "step": 8370 }, { "epoch": 0.05513049084557542, "grad_norm": 0.40001930076526687, "learning_rate": 2.756488273412059e-06, "loss": 0.004, "step": 8380 }, { "epoch": 0.0551962790208088, "grad_norm": 0.9678580670312694, "learning_rate": 2.7597776388934578e-06, "loss": 0.0123, "step": 8390 }, { "epoch": 0.05526206719604218, "grad_norm": 0.3563105051553976, "learning_rate": 2.7630670043748566e-06, "loss": 0.0056, "step": 8400 }, { "epoch": 0.05532785537127557, "grad_norm": 0.18802342011960504, "learning_rate": 2.766356369856255e-06, "loss": 0.0066, "step": 8410 }, { "epoch": 0.05539364354650895, "grad_norm": 0.30029452397442746, "learning_rate": 2.7696457353376537e-06, "loss": 0.0069, "step": 8420 }, { "epoch": 0.055459431721742336, "grad_norm": 0.4405276953243772, "learning_rate": 2.7729351008190525e-06, "loss": 0.0092, "step": 8430 }, { "epoch": 0.05552521989697572, "grad_norm": 0.27469572061794406, "learning_rate": 2.7762244663004504e-06, "loss": 0.0042, "step": 8440 }, { "epoch": 0.055591008072209104, "grad_norm": 0.3027940185609027, "learning_rate": 2.7795138317818492e-06, "loss": 0.0086, "step": 8450 }, { "epoch": 0.055656796247442485, "grad_norm": 0.30806602231147967, "learning_rate": 2.782803197263248e-06, "loss": 0.0063, "step": 8460 }, { "epoch": 0.055722584422675865, "grad_norm": 0.22773484926753698, "learning_rate": 2.786092562744647e-06, "loss": 0.0072, "step": 8470 }, { "epoch": 0.05578837259790925, "grad_norm": 0.2614046126770015, "learning_rate": 2.789381928226045e-06, "loss": 0.0102, "step": 8480 }, { "epoch": 0.05585416077314263, "grad_norm": 0.22931164056824496, "learning_rate": 2.792671293707444e-06, "loss": 0.0069, "step": 8490 }, { "epoch": 0.05591994894837602, "grad_norm": 0.18319906585663973, "learning_rate": 2.7959606591888427e-06, "loss": 0.0053, "step": 8500 }, { "epoch": 0.0559857371236094, "grad_norm": 0.49817858198731246, "learning_rate": 2.7992500246702415e-06, "loss": 0.0097, "step": 8510 }, { "epoch": 0.05605152529884279, "grad_norm": 0.23484939184234527, "learning_rate": 2.8025393901516403e-06, "loss": 0.0069, "step": 8520 }, { "epoch": 0.05611731347407617, "grad_norm": 0.27332985897068385, "learning_rate": 2.8058287556330387e-06, "loss": 0.0071, "step": 8530 }, { "epoch": 0.05618310164930955, "grad_norm": 0.25203718092719757, "learning_rate": 2.8091181211144374e-06, "loss": 0.0065, "step": 8540 }, { "epoch": 0.05624888982454294, "grad_norm": 0.4769345112365216, "learning_rate": 2.812407486595836e-06, "loss": 0.0052, "step": 8550 }, { "epoch": 0.05631467799977632, "grad_norm": 0.1946809435579715, "learning_rate": 2.815696852077234e-06, "loss": 0.0066, "step": 8560 }, { "epoch": 0.056380466175009705, "grad_norm": 0.6009644541058319, "learning_rate": 2.818986217558633e-06, "loss": 0.0054, "step": 8570 }, { "epoch": 0.056446254350243086, "grad_norm": 0.2202804232226157, "learning_rate": 2.8222755830400317e-06, "loss": 0.0104, "step": 8580 }, { "epoch": 0.05651204252547647, "grad_norm": 0.43258800819813914, "learning_rate": 2.8255649485214305e-06, "loss": 0.0053, "step": 8590 }, { "epoch": 0.056577830700709854, "grad_norm": 0.6792273908893088, "learning_rate": 2.828854314002829e-06, "loss": 0.0098, "step": 8600 }, { "epoch": 0.05664361887594324, "grad_norm": 0.45963100087997005, "learning_rate": 2.8321436794842277e-06, "loss": 0.0068, "step": 8610 }, { "epoch": 0.05670940705117662, "grad_norm": 0.3971661286599718, "learning_rate": 2.8354330449656265e-06, "loss": 0.0052, "step": 8620 }, { "epoch": 0.05677519522641, "grad_norm": 0.3313049023156147, "learning_rate": 2.8387224104470252e-06, "loss": 0.0059, "step": 8630 }, { "epoch": 0.05684098340164339, "grad_norm": 0.2151019447824468, "learning_rate": 2.8420117759284236e-06, "loss": 0.005, "step": 8640 }, { "epoch": 0.05690677157687677, "grad_norm": 0.45510111849252194, "learning_rate": 2.8453011414098224e-06, "loss": 0.0078, "step": 8650 }, { "epoch": 0.05697255975211016, "grad_norm": 0.5408950804031757, "learning_rate": 2.848590506891221e-06, "loss": 0.0075, "step": 8660 }, { "epoch": 0.05703834792734354, "grad_norm": 0.26203886900548484, "learning_rate": 2.851879872372619e-06, "loss": 0.0056, "step": 8670 }, { "epoch": 0.057104136102576926, "grad_norm": 0.47544225661747697, "learning_rate": 2.855169237854018e-06, "loss": 0.0071, "step": 8680 }, { "epoch": 0.057169924277810306, "grad_norm": 0.6076362567249599, "learning_rate": 2.8584586033354167e-06, "loss": 0.0057, "step": 8690 }, { "epoch": 0.05723571245304369, "grad_norm": 0.9166948700336156, "learning_rate": 2.8617479688168155e-06, "loss": 0.011, "step": 8700 }, { "epoch": 0.057301500628277074, "grad_norm": 0.3055560020506335, "learning_rate": 2.8650373342982143e-06, "loss": 0.0055, "step": 8710 }, { "epoch": 0.057367288803510455, "grad_norm": 0.4652188744159458, "learning_rate": 2.8683266997796126e-06, "loss": 0.0087, "step": 8720 }, { "epoch": 0.05743307697874384, "grad_norm": 0.553820081043512, "learning_rate": 2.8716160652610114e-06, "loss": 0.0085, "step": 8730 }, { "epoch": 0.05749886515397722, "grad_norm": 0.2062999103382714, "learning_rate": 2.87490543074241e-06, "loss": 0.0048, "step": 8740 }, { "epoch": 0.05756465332921061, "grad_norm": 0.6412392578388266, "learning_rate": 2.878194796223809e-06, "loss": 0.006, "step": 8750 }, { "epoch": 0.05763044150444399, "grad_norm": 0.30747986697023477, "learning_rate": 2.8814841617052073e-06, "loss": 0.0065, "step": 8760 }, { "epoch": 0.05769622967967737, "grad_norm": 0.31159875566867895, "learning_rate": 2.884773527186606e-06, "loss": 0.0073, "step": 8770 }, { "epoch": 0.05776201785491076, "grad_norm": 0.7661858040408671, "learning_rate": 2.8880628926680045e-06, "loss": 0.007, "step": 8780 }, { "epoch": 0.05782780603014414, "grad_norm": 0.259449597466795, "learning_rate": 2.891352258149403e-06, "loss": 0.0076, "step": 8790 }, { "epoch": 0.05789359420537753, "grad_norm": 0.3669201339809525, "learning_rate": 2.8946416236308016e-06, "loss": 0.0054, "step": 8800 }, { "epoch": 0.05795938238061091, "grad_norm": 0.41306868970104604, "learning_rate": 2.8979309891122004e-06, "loss": 0.0064, "step": 8810 }, { "epoch": 0.058025170555844295, "grad_norm": 0.39312445069630264, "learning_rate": 2.901220354593599e-06, "loss": 0.0048, "step": 8820 }, { "epoch": 0.058090958731077676, "grad_norm": 0.47792172641644765, "learning_rate": 2.9045097200749976e-06, "loss": 0.0046, "step": 8830 }, { "epoch": 0.058156746906311056, "grad_norm": 0.49543038366561243, "learning_rate": 2.9077990855563963e-06, "loss": 0.0051, "step": 8840 }, { "epoch": 0.058222535081544444, "grad_norm": 0.6107082991095868, "learning_rate": 2.911088451037795e-06, "loss": 0.0072, "step": 8850 }, { "epoch": 0.058288323256777824, "grad_norm": 0.187533876036093, "learning_rate": 2.914377816519194e-06, "loss": 0.0045, "step": 8860 }, { "epoch": 0.05835411143201121, "grad_norm": 0.37356053162257624, "learning_rate": 2.9176671820005927e-06, "loss": 0.0079, "step": 8870 }, { "epoch": 0.05841989960724459, "grad_norm": 0.37410400527474436, "learning_rate": 2.920956547481991e-06, "loss": 0.0082, "step": 8880 }, { "epoch": 0.05848568778247798, "grad_norm": 0.43540822926870965, "learning_rate": 2.9242459129633894e-06, "loss": 0.0062, "step": 8890 }, { "epoch": 0.05855147595771136, "grad_norm": 0.3429830159901712, "learning_rate": 2.9275352784447882e-06, "loss": 0.0077, "step": 8900 }, { "epoch": 0.05861726413294475, "grad_norm": 0.38654820295989417, "learning_rate": 2.9308246439261866e-06, "loss": 0.0039, "step": 8910 }, { "epoch": 0.05868305230817813, "grad_norm": 0.2740264565869401, "learning_rate": 2.9341140094075854e-06, "loss": 0.0065, "step": 8920 }, { "epoch": 0.05874884048341151, "grad_norm": 0.20540014618333213, "learning_rate": 2.937403374888984e-06, "loss": 0.0055, "step": 8930 }, { "epoch": 0.058814628658644896, "grad_norm": 0.6041626201596345, "learning_rate": 2.940692740370383e-06, "loss": 0.0056, "step": 8940 }, { "epoch": 0.05888041683387828, "grad_norm": 0.3572971876777996, "learning_rate": 2.9439821058517813e-06, "loss": 0.007, "step": 8950 }, { "epoch": 0.058946205009111664, "grad_norm": 0.5586700796895033, "learning_rate": 2.94727147133318e-06, "loss": 0.0073, "step": 8960 }, { "epoch": 0.059011993184345045, "grad_norm": 0.23149097153181553, "learning_rate": 2.950560836814579e-06, "loss": 0.0072, "step": 8970 }, { "epoch": 0.05907778135957843, "grad_norm": 1.0561779295604694, "learning_rate": 2.9538502022959777e-06, "loss": 0.007, "step": 8980 }, { "epoch": 0.05914356953481181, "grad_norm": 0.33757113852326526, "learning_rate": 2.957139567777376e-06, "loss": 0.0048, "step": 8990 }, { "epoch": 0.05920935771004519, "grad_norm": 0.41622325138078053, "learning_rate": 2.960428933258775e-06, "loss": 0.0134, "step": 9000 }, { "epoch": 0.05927514588527858, "grad_norm": 0.46537303090625975, "learning_rate": 2.963718298740173e-06, "loss": 0.0047, "step": 9010 }, { "epoch": 0.05934093406051196, "grad_norm": 0.519093692344868, "learning_rate": 2.9670076642215715e-06, "loss": 0.0052, "step": 9020 }, { "epoch": 0.05940672223574535, "grad_norm": 0.3176298716675916, "learning_rate": 2.9702970297029703e-06, "loss": 0.0049, "step": 9030 }, { "epoch": 0.05947251041097873, "grad_norm": 0.17090881119954493, "learning_rate": 2.973586395184369e-06, "loss": 0.0048, "step": 9040 }, { "epoch": 0.05953829858621212, "grad_norm": 0.4030287497295901, "learning_rate": 2.976875760665768e-06, "loss": 0.0075, "step": 9050 }, { "epoch": 0.0596040867614455, "grad_norm": 0.43112822358486813, "learning_rate": 2.9801651261471667e-06, "loss": 0.0107, "step": 9060 }, { "epoch": 0.05966987493667888, "grad_norm": 0.36138849809532586, "learning_rate": 2.983454491628565e-06, "loss": 0.0061, "step": 9070 }, { "epoch": 0.059735663111912266, "grad_norm": 0.2385116786515552, "learning_rate": 2.986743857109964e-06, "loss": 0.0069, "step": 9080 }, { "epoch": 0.059801451287145646, "grad_norm": 0.41827374609560863, "learning_rate": 2.9900332225913626e-06, "loss": 0.0088, "step": 9090 }, { "epoch": 0.059867239462379034, "grad_norm": 0.1954640310036548, "learning_rate": 2.9933225880727614e-06, "loss": 0.008, "step": 9100 }, { "epoch": 0.059933027637612414, "grad_norm": 0.22442014953753872, "learning_rate": 2.9966119535541597e-06, "loss": 0.0045, "step": 9110 }, { "epoch": 0.0599988158128458, "grad_norm": 0.3215157694241117, "learning_rate": 2.999901319035558e-06, "loss": 0.0061, "step": 9120 }, { "epoch": 0.06006460398807918, "grad_norm": 0.4701060280381484, "learning_rate": 3.003190684516957e-06, "loss": 0.0074, "step": 9130 }, { "epoch": 0.06013039216331257, "grad_norm": 0.38613037096356345, "learning_rate": 3.0064800499983553e-06, "loss": 0.0041, "step": 9140 }, { "epoch": 0.06019618033854595, "grad_norm": 0.677383307485742, "learning_rate": 3.009769415479754e-06, "loss": 0.0077, "step": 9150 }, { "epoch": 0.06026196851377933, "grad_norm": 0.24984197430091254, "learning_rate": 3.013058780961153e-06, "loss": 0.0082, "step": 9160 }, { "epoch": 0.06032775668901272, "grad_norm": 0.5339646105277455, "learning_rate": 3.0163481464425516e-06, "loss": 0.0055, "step": 9170 }, { "epoch": 0.0603935448642461, "grad_norm": 0.20509258384082873, "learning_rate": 3.01963751192395e-06, "loss": 0.0055, "step": 9180 }, { "epoch": 0.060459333039479486, "grad_norm": 0.3878957759275335, "learning_rate": 3.0229268774053488e-06, "loss": 0.0059, "step": 9190 }, { "epoch": 0.06052512121471287, "grad_norm": 0.17879637195534903, "learning_rate": 3.0262162428867475e-06, "loss": 0.006, "step": 9200 }, { "epoch": 0.060590909389946254, "grad_norm": 0.7705232576880128, "learning_rate": 3.0295056083681463e-06, "loss": 0.0053, "step": 9210 }, { "epoch": 0.060656697565179635, "grad_norm": 0.4987190532462923, "learning_rate": 3.032794973849545e-06, "loss": 0.0056, "step": 9220 }, { "epoch": 0.060722485740413015, "grad_norm": 0.34787562850378023, "learning_rate": 3.036084339330943e-06, "loss": 0.0082, "step": 9230 }, { "epoch": 0.0607882739156464, "grad_norm": 0.3023614531331815, "learning_rate": 3.039373704812342e-06, "loss": 0.0062, "step": 9240 }, { "epoch": 0.06085406209087978, "grad_norm": 0.20113083440288276, "learning_rate": 3.0426630702937406e-06, "loss": 0.0051, "step": 9250 }, { "epoch": 0.06091985026611317, "grad_norm": 0.17267702340677682, "learning_rate": 3.045952435775139e-06, "loss": 0.008, "step": 9260 }, { "epoch": 0.06098563844134655, "grad_norm": 0.18700429239063343, "learning_rate": 3.0492418012565378e-06, "loss": 0.006, "step": 9270 }, { "epoch": 0.06105142661657994, "grad_norm": 0.26805141933294635, "learning_rate": 3.0525311667379366e-06, "loss": 0.0045, "step": 9280 }, { "epoch": 0.06111721479181332, "grad_norm": 0.19871844996300425, "learning_rate": 3.0558205322193353e-06, "loss": 0.0084, "step": 9290 }, { "epoch": 0.0611830029670467, "grad_norm": 0.3033575247128198, "learning_rate": 3.0591098977007337e-06, "loss": 0.0052, "step": 9300 }, { "epoch": 0.06124879114228009, "grad_norm": 0.2839415831851423, "learning_rate": 3.0623992631821325e-06, "loss": 0.0038, "step": 9310 }, { "epoch": 0.06131457931751347, "grad_norm": 0.23640461826559142, "learning_rate": 3.0656886286635313e-06, "loss": 0.0088, "step": 9320 }, { "epoch": 0.061380367492746855, "grad_norm": 0.5290670751091161, "learning_rate": 3.06897799414493e-06, "loss": 0.0074, "step": 9330 }, { "epoch": 0.061446155667980236, "grad_norm": 0.5064317903462504, "learning_rate": 3.0722673596263284e-06, "loss": 0.0065, "step": 9340 }, { "epoch": 0.06151194384321362, "grad_norm": 0.5826689341472733, "learning_rate": 3.0755567251077268e-06, "loss": 0.0072, "step": 9350 }, { "epoch": 0.061577732018447004, "grad_norm": 0.09852264765928888, "learning_rate": 3.0788460905891256e-06, "loss": 0.0049, "step": 9360 }, { "epoch": 0.061643520193680384, "grad_norm": 0.5909663202240732, "learning_rate": 3.082135456070524e-06, "loss": 0.0075, "step": 9370 }, { "epoch": 0.06170930836891377, "grad_norm": 0.18649078354680146, "learning_rate": 3.0854248215519227e-06, "loss": 0.0055, "step": 9380 }, { "epoch": 0.06177509654414715, "grad_norm": 0.32387830442041454, "learning_rate": 3.0887141870333215e-06, "loss": 0.0062, "step": 9390 }, { "epoch": 0.06184088471938054, "grad_norm": 0.3067243971855929, "learning_rate": 3.0920035525147203e-06, "loss": 0.008, "step": 9400 }, { "epoch": 0.06190667289461392, "grad_norm": 0.3312220397840854, "learning_rate": 3.0952929179961187e-06, "loss": 0.0069, "step": 9410 }, { "epoch": 0.06197246106984731, "grad_norm": 0.5657555126841373, "learning_rate": 3.0985822834775174e-06, "loss": 0.009, "step": 9420 }, { "epoch": 0.06203824924508069, "grad_norm": 0.28696217880041897, "learning_rate": 3.1018716489589162e-06, "loss": 0.0066, "step": 9430 }, { "epoch": 0.062104037420314076, "grad_norm": 0.29920658940254874, "learning_rate": 3.105161014440315e-06, "loss": 0.0056, "step": 9440 }, { "epoch": 0.06216982559554746, "grad_norm": 0.20638670904033798, "learning_rate": 3.108450379921714e-06, "loss": 0.0055, "step": 9450 }, { "epoch": 0.06223561377078084, "grad_norm": 0.6996202741032301, "learning_rate": 3.1117397454031117e-06, "loss": 0.008, "step": 9460 }, { "epoch": 0.062301401946014225, "grad_norm": 0.2643136982215818, "learning_rate": 3.1150291108845105e-06, "loss": 0.0094, "step": 9470 }, { "epoch": 0.062367190121247605, "grad_norm": 0.907360052873659, "learning_rate": 3.1183184763659093e-06, "loss": 0.0061, "step": 9480 }, { "epoch": 0.06243297829648099, "grad_norm": 0.3856813019258602, "learning_rate": 3.1216078418473077e-06, "loss": 0.0066, "step": 9490 }, { "epoch": 0.06249876647171437, "grad_norm": 0.3335964430938164, "learning_rate": 3.1248972073287065e-06, "loss": 0.0081, "step": 9500 }, { "epoch": 0.06256455464694775, "grad_norm": 0.3633462034989622, "learning_rate": 3.1281865728101052e-06, "loss": 0.005, "step": 9510 }, { "epoch": 0.06263034282218113, "grad_norm": 0.46655825019774505, "learning_rate": 3.131475938291504e-06, "loss": 0.0058, "step": 9520 }, { "epoch": 0.06269613099741453, "grad_norm": 0.41265828353401485, "learning_rate": 3.1347653037729024e-06, "loss": 0.0068, "step": 9530 }, { "epoch": 0.06276191917264791, "grad_norm": 0.5552536712988917, "learning_rate": 3.138054669254301e-06, "loss": 0.0055, "step": 9540 }, { "epoch": 0.06282770734788129, "grad_norm": 0.3715169755850853, "learning_rate": 3.1413440347357e-06, "loss": 0.0094, "step": 9550 }, { "epoch": 0.06289349552311467, "grad_norm": 0.42449914048962134, "learning_rate": 3.1446334002170987e-06, "loss": 0.0073, "step": 9560 }, { "epoch": 0.06295928369834806, "grad_norm": 0.061525690977041414, "learning_rate": 3.1479227656984967e-06, "loss": 0.0047, "step": 9570 }, { "epoch": 0.06302507187358145, "grad_norm": 0.40653074153189905, "learning_rate": 3.1512121311798955e-06, "loss": 0.0051, "step": 9580 }, { "epoch": 0.06309086004881483, "grad_norm": 0.2712575735999133, "learning_rate": 3.1545014966612942e-06, "loss": 0.004, "step": 9590 }, { "epoch": 0.0631566482240482, "grad_norm": 0.2307920306980258, "learning_rate": 3.1577908621426926e-06, "loss": 0.007, "step": 9600 }, { "epoch": 0.06322243639928159, "grad_norm": 0.5420486172966058, "learning_rate": 3.1610802276240914e-06, "loss": 0.009, "step": 9610 }, { "epoch": 0.06328822457451498, "grad_norm": 0.3800591433554427, "learning_rate": 3.16436959310549e-06, "loss": 0.0046, "step": 9620 }, { "epoch": 0.06335401274974836, "grad_norm": 0.275721670750781, "learning_rate": 3.167658958586889e-06, "loss": 0.0053, "step": 9630 }, { "epoch": 0.06341980092498174, "grad_norm": 0.3320217134866761, "learning_rate": 3.1709483240682878e-06, "loss": 0.0067, "step": 9640 }, { "epoch": 0.06348558910021512, "grad_norm": 0.31607809948112225, "learning_rate": 3.174237689549686e-06, "loss": 0.0043, "step": 9650 }, { "epoch": 0.06355137727544852, "grad_norm": 0.12319186221689936, "learning_rate": 3.177527055031085e-06, "loss": 0.0089, "step": 9660 }, { "epoch": 0.0636171654506819, "grad_norm": 0.22799045128265044, "learning_rate": 3.1808164205124837e-06, "loss": 0.0078, "step": 9670 }, { "epoch": 0.06368295362591528, "grad_norm": 0.2702052738449157, "learning_rate": 3.1841057859938825e-06, "loss": 0.0066, "step": 9680 }, { "epoch": 0.06374874180114866, "grad_norm": 0.18102643332804175, "learning_rate": 3.1873951514752804e-06, "loss": 0.0067, "step": 9690 }, { "epoch": 0.06381452997638204, "grad_norm": 0.5902350398237659, "learning_rate": 3.190684516956679e-06, "loss": 0.0077, "step": 9700 }, { "epoch": 0.06388031815161543, "grad_norm": 0.535682489148456, "learning_rate": 3.193973882438078e-06, "loss": 0.0095, "step": 9710 }, { "epoch": 0.06394610632684881, "grad_norm": 0.4254769112675404, "learning_rate": 3.1972632479194763e-06, "loss": 0.005, "step": 9720 }, { "epoch": 0.0640118945020822, "grad_norm": 0.6726643171108618, "learning_rate": 3.200552613400875e-06, "loss": 0.0088, "step": 9730 }, { "epoch": 0.06407768267731558, "grad_norm": 0.1451518357661019, "learning_rate": 3.203841978882274e-06, "loss": 0.0032, "step": 9740 }, { "epoch": 0.06414347085254896, "grad_norm": 0.08054753866528151, "learning_rate": 3.2071313443636727e-06, "loss": 0.0068, "step": 9750 }, { "epoch": 0.06420925902778235, "grad_norm": 0.30214565415834393, "learning_rate": 3.210420709845071e-06, "loss": 0.0088, "step": 9760 }, { "epoch": 0.06427504720301573, "grad_norm": 0.31640182327428523, "learning_rate": 3.21371007532647e-06, "loss": 0.0062, "step": 9770 }, { "epoch": 0.06434083537824911, "grad_norm": 0.3472311733817819, "learning_rate": 3.2169994408078686e-06, "loss": 0.0055, "step": 9780 }, { "epoch": 0.06440662355348249, "grad_norm": 0.22962211746805183, "learning_rate": 3.2202888062892674e-06, "loss": 0.0087, "step": 9790 }, { "epoch": 0.06447241172871589, "grad_norm": 1.302362685810349, "learning_rate": 3.2235781717706654e-06, "loss": 0.0071, "step": 9800 }, { "epoch": 0.06453819990394927, "grad_norm": 0.3754890968236215, "learning_rate": 3.226867537252064e-06, "loss": 0.0063, "step": 9810 }, { "epoch": 0.06460398807918265, "grad_norm": 0.3065328279655814, "learning_rate": 3.230156902733463e-06, "loss": 0.0097, "step": 9820 }, { "epoch": 0.06466977625441603, "grad_norm": 0.4204267761538347, "learning_rate": 3.2334462682148617e-06, "loss": 0.0055, "step": 9830 }, { "epoch": 0.06473556442964941, "grad_norm": 0.4462949386827972, "learning_rate": 3.23673563369626e-06, "loss": 0.0078, "step": 9840 }, { "epoch": 0.0648013526048828, "grad_norm": 0.10607651492397112, "learning_rate": 3.240024999177659e-06, "loss": 0.007, "step": 9850 }, { "epoch": 0.06486714078011618, "grad_norm": 0.3665564786284457, "learning_rate": 3.2433143646590576e-06, "loss": 0.0077, "step": 9860 }, { "epoch": 0.06493292895534956, "grad_norm": 0.35163374458371666, "learning_rate": 3.2466037301404564e-06, "loss": 0.0075, "step": 9870 }, { "epoch": 0.06499871713058294, "grad_norm": 0.1349930424439406, "learning_rate": 3.249893095621855e-06, "loss": 0.0047, "step": 9880 }, { "epoch": 0.06506450530581634, "grad_norm": 0.8820472457482115, "learning_rate": 3.2531824611032536e-06, "loss": 0.009, "step": 9890 }, { "epoch": 0.06513029348104972, "grad_norm": 0.7222440268995861, "learning_rate": 3.2564718265846524e-06, "loss": 0.0054, "step": 9900 }, { "epoch": 0.0651960816562831, "grad_norm": 0.28680006055216084, "learning_rate": 3.2597611920660503e-06, "loss": 0.0115, "step": 9910 }, { "epoch": 0.06526186983151648, "grad_norm": 0.23526738227867108, "learning_rate": 3.263050557547449e-06, "loss": 0.0056, "step": 9920 }, { "epoch": 0.06532765800674986, "grad_norm": 0.46455797988689435, "learning_rate": 3.266339923028848e-06, "loss": 0.0056, "step": 9930 }, { "epoch": 0.06539344618198326, "grad_norm": 0.4781435312579179, "learning_rate": 3.2696292885102467e-06, "loss": 0.0099, "step": 9940 }, { "epoch": 0.06545923435721664, "grad_norm": 0.11867767659584437, "learning_rate": 3.272918653991645e-06, "loss": 0.0045, "step": 9950 }, { "epoch": 0.06552502253245002, "grad_norm": 0.13367780941736618, "learning_rate": 3.276208019473044e-06, "loss": 0.0059, "step": 9960 }, { "epoch": 0.0655908107076834, "grad_norm": 0.15816814575234736, "learning_rate": 3.2794973849544426e-06, "loss": 0.0099, "step": 9970 }, { "epoch": 0.06565659888291678, "grad_norm": 0.5267656556684036, "learning_rate": 3.2827867504358414e-06, "loss": 0.0079, "step": 9980 }, { "epoch": 0.06572238705815017, "grad_norm": 0.2377183615377811, "learning_rate": 3.28607611591724e-06, "loss": 0.0061, "step": 9990 }, { "epoch": 0.06578817523338355, "grad_norm": 0.4859224765104577, "learning_rate": 3.2893654813986385e-06, "loss": 0.0074, "step": 10000 }, { "epoch": 0.06585396340861693, "grad_norm": 0.45650097654314914, "learning_rate": 3.2926548468800373e-06, "loss": 0.0051, "step": 10010 }, { "epoch": 0.06591975158385031, "grad_norm": 0.277757097636804, "learning_rate": 3.295944212361436e-06, "loss": 0.007, "step": 10020 }, { "epoch": 0.06598553975908371, "grad_norm": 0.5844864858911901, "learning_rate": 3.299233577842834e-06, "loss": 0.0066, "step": 10030 }, { "epoch": 0.06605132793431709, "grad_norm": 0.22915569295820096, "learning_rate": 3.302522943324233e-06, "loss": 0.005, "step": 10040 }, { "epoch": 0.06611711610955047, "grad_norm": 0.4685159257168782, "learning_rate": 3.3058123088056316e-06, "loss": 0.0081, "step": 10050 }, { "epoch": 0.06618290428478385, "grad_norm": 0.30490755961717114, "learning_rate": 3.3091016742870304e-06, "loss": 0.0073, "step": 10060 }, { "epoch": 0.06624869246001723, "grad_norm": 0.1825623960179335, "learning_rate": 3.3123910397684288e-06, "loss": 0.0097, "step": 10070 }, { "epoch": 0.06631448063525062, "grad_norm": 0.5124412338552524, "learning_rate": 3.3156804052498275e-06, "loss": 0.0057, "step": 10080 }, { "epoch": 0.066380268810484, "grad_norm": 0.4176018544794703, "learning_rate": 3.3189697707312263e-06, "loss": 0.0042, "step": 10090 }, { "epoch": 0.06644605698571739, "grad_norm": 0.5168892263163316, "learning_rate": 3.322259136212625e-06, "loss": 0.0063, "step": 10100 }, { "epoch": 0.06651184516095077, "grad_norm": 0.4676998537961618, "learning_rate": 3.3255485016940235e-06, "loss": 0.0081, "step": 10110 }, { "epoch": 0.06657763333618416, "grad_norm": 0.24121165516768878, "learning_rate": 3.3288378671754223e-06, "loss": 0.005, "step": 10120 }, { "epoch": 0.06664342151141754, "grad_norm": 0.3087384745533371, "learning_rate": 3.332127232656821e-06, "loss": 0.0057, "step": 10130 }, { "epoch": 0.06670920968665092, "grad_norm": 0.26265356603910356, "learning_rate": 3.335416598138219e-06, "loss": 0.0084, "step": 10140 }, { "epoch": 0.0667749978618843, "grad_norm": 0.7803623440787707, "learning_rate": 3.3387059636196178e-06, "loss": 0.006, "step": 10150 }, { "epoch": 0.06684078603711768, "grad_norm": 0.3310360178969886, "learning_rate": 3.3419953291010166e-06, "loss": 0.0058, "step": 10160 }, { "epoch": 0.06690657421235108, "grad_norm": 0.36735373721948034, "learning_rate": 3.3452846945824153e-06, "loss": 0.0069, "step": 10170 }, { "epoch": 0.06697236238758446, "grad_norm": 0.36284371040445246, "learning_rate": 3.348574060063814e-06, "loss": 0.0061, "step": 10180 }, { "epoch": 0.06703815056281784, "grad_norm": 0.26760444667506017, "learning_rate": 3.3518634255452125e-06, "loss": 0.0034, "step": 10190 }, { "epoch": 0.06710393873805122, "grad_norm": 0.5023866169661192, "learning_rate": 3.3551527910266113e-06, "loss": 0.0067, "step": 10200 }, { "epoch": 0.0671697269132846, "grad_norm": 0.22593047126413562, "learning_rate": 3.35844215650801e-06, "loss": 0.0051, "step": 10210 }, { "epoch": 0.067235515088518, "grad_norm": 0.20082660744507286, "learning_rate": 3.361731521989409e-06, "loss": 0.005, "step": 10220 }, { "epoch": 0.06730130326375137, "grad_norm": 0.48982031966948647, "learning_rate": 3.365020887470807e-06, "loss": 0.008, "step": 10230 }, { "epoch": 0.06736709143898476, "grad_norm": 0.21328172527503228, "learning_rate": 3.368310252952206e-06, "loss": 0.0053, "step": 10240 }, { "epoch": 0.06743287961421814, "grad_norm": 0.3430647048579184, "learning_rate": 3.3715996184336044e-06, "loss": 0.0062, "step": 10250 }, { "epoch": 0.06749866778945153, "grad_norm": 0.6208032062611576, "learning_rate": 3.3748889839150027e-06, "loss": 0.0077, "step": 10260 }, { "epoch": 0.06756445596468491, "grad_norm": 0.4637516505786273, "learning_rate": 3.3781783493964015e-06, "loss": 0.0102, "step": 10270 }, { "epoch": 0.06763024413991829, "grad_norm": 0.2146316432641824, "learning_rate": 3.3814677148778003e-06, "loss": 0.0082, "step": 10280 }, { "epoch": 0.06769603231515167, "grad_norm": 0.3584146754041446, "learning_rate": 3.384757080359199e-06, "loss": 0.0045, "step": 10290 }, { "epoch": 0.06776182049038505, "grad_norm": 0.5645384660103911, "learning_rate": 3.3880464458405974e-06, "loss": 0.0075, "step": 10300 }, { "epoch": 0.06782760866561845, "grad_norm": 0.49611477882174615, "learning_rate": 3.3913358113219962e-06, "loss": 0.0069, "step": 10310 }, { "epoch": 0.06789339684085183, "grad_norm": 0.08744244147998247, "learning_rate": 3.394625176803395e-06, "loss": 0.0041, "step": 10320 }, { "epoch": 0.06795918501608521, "grad_norm": 0.35131159995487304, "learning_rate": 3.3979145422847938e-06, "loss": 0.0052, "step": 10330 }, { "epoch": 0.06802497319131859, "grad_norm": 0.4502813643015159, "learning_rate": 3.401203907766192e-06, "loss": 0.0069, "step": 10340 }, { "epoch": 0.06809076136655198, "grad_norm": 0.17402648025323678, "learning_rate": 3.404493273247591e-06, "loss": 0.0075, "step": 10350 }, { "epoch": 0.06815654954178536, "grad_norm": 0.28210313675172133, "learning_rate": 3.4077826387289897e-06, "loss": 0.0116, "step": 10360 }, { "epoch": 0.06822233771701874, "grad_norm": 0.5759802047810666, "learning_rate": 3.4110720042103877e-06, "loss": 0.0076, "step": 10370 }, { "epoch": 0.06828812589225212, "grad_norm": 0.24859200415232266, "learning_rate": 3.4143613696917864e-06, "loss": 0.0044, "step": 10380 }, { "epoch": 0.0683539140674855, "grad_norm": 0.5678438409967681, "learning_rate": 3.4176507351731852e-06, "loss": 0.0053, "step": 10390 }, { "epoch": 0.0684197022427189, "grad_norm": 0.4794924777117528, "learning_rate": 3.420940100654584e-06, "loss": 0.0056, "step": 10400 }, { "epoch": 0.06848549041795228, "grad_norm": 0.39044200733720785, "learning_rate": 3.424229466135983e-06, "loss": 0.0055, "step": 10410 }, { "epoch": 0.06855127859318566, "grad_norm": 0.36936911716305604, "learning_rate": 3.427518831617381e-06, "loss": 0.0062, "step": 10420 }, { "epoch": 0.06861706676841904, "grad_norm": 0.7971711735512673, "learning_rate": 3.43080819709878e-06, "loss": 0.0057, "step": 10430 }, { "epoch": 0.06868285494365242, "grad_norm": 0.15891118286526687, "learning_rate": 3.4340975625801787e-06, "loss": 0.0064, "step": 10440 }, { "epoch": 0.06874864311888582, "grad_norm": 0.41950824893442934, "learning_rate": 3.4373869280615775e-06, "loss": 0.0068, "step": 10450 }, { "epoch": 0.0688144312941192, "grad_norm": 0.2376456672534714, "learning_rate": 3.440676293542976e-06, "loss": 0.007, "step": 10460 }, { "epoch": 0.06888021946935258, "grad_norm": 0.3229430504264769, "learning_rate": 3.4439656590243747e-06, "loss": 0.0035, "step": 10470 }, { "epoch": 0.06894600764458596, "grad_norm": 0.2480783915326562, "learning_rate": 3.447255024505773e-06, "loss": 0.0044, "step": 10480 }, { "epoch": 0.06901179581981935, "grad_norm": 0.2695575053070671, "learning_rate": 3.4505443899871714e-06, "loss": 0.0077, "step": 10490 }, { "epoch": 0.06907758399505273, "grad_norm": 0.4802781093225595, "learning_rate": 3.45383375546857e-06, "loss": 0.0066, "step": 10500 }, { "epoch": 0.06914337217028611, "grad_norm": 0.7335464128386413, "learning_rate": 3.457123120949969e-06, "loss": 0.0078, "step": 10510 }, { "epoch": 0.0692091603455195, "grad_norm": 0.3564257448348209, "learning_rate": 3.4604124864313677e-06, "loss": 0.0052, "step": 10520 }, { "epoch": 0.06927494852075287, "grad_norm": 0.26924895197042265, "learning_rate": 3.463701851912766e-06, "loss": 0.0063, "step": 10530 }, { "epoch": 0.06934073669598627, "grad_norm": 0.2535304972539865, "learning_rate": 3.466991217394165e-06, "loss": 0.0041, "step": 10540 }, { "epoch": 0.06940652487121965, "grad_norm": 0.5371429946599071, "learning_rate": 3.4702805828755637e-06, "loss": 0.0062, "step": 10550 }, { "epoch": 0.06947231304645303, "grad_norm": 0.365417667822798, "learning_rate": 3.4735699483569625e-06, "loss": 0.0048, "step": 10560 }, { "epoch": 0.06953810122168641, "grad_norm": 0.3841162498621075, "learning_rate": 3.4768593138383613e-06, "loss": 0.0073, "step": 10570 }, { "epoch": 0.06960388939691979, "grad_norm": 0.30204014102820176, "learning_rate": 3.4801486793197596e-06, "loss": 0.0054, "step": 10580 }, { "epoch": 0.06966967757215319, "grad_norm": 0.17859621986625734, "learning_rate": 3.483438044801158e-06, "loss": 0.0054, "step": 10590 }, { "epoch": 0.06973546574738657, "grad_norm": 0.543805133133486, "learning_rate": 3.4867274102825568e-06, "loss": 0.0076, "step": 10600 }, { "epoch": 0.06980125392261995, "grad_norm": 0.1827776648666184, "learning_rate": 3.490016775763955e-06, "loss": 0.0052, "step": 10610 }, { "epoch": 0.06986704209785333, "grad_norm": 0.11941158734843596, "learning_rate": 3.493306141245354e-06, "loss": 0.0053, "step": 10620 }, { "epoch": 0.06993283027308672, "grad_norm": 0.2775393890827602, "learning_rate": 3.4965955067267527e-06, "loss": 0.0074, "step": 10630 }, { "epoch": 0.0699986184483201, "grad_norm": 0.6997741154550532, "learning_rate": 3.4998848722081515e-06, "loss": 0.0067, "step": 10640 }, { "epoch": 0.07006440662355348, "grad_norm": 0.3170017333916341, "learning_rate": 3.50317423768955e-06, "loss": 0.0073, "step": 10650 }, { "epoch": 0.07013019479878686, "grad_norm": 0.22970764725075402, "learning_rate": 3.5064636031709486e-06, "loss": 0.0072, "step": 10660 }, { "epoch": 0.07019598297402024, "grad_norm": 0.4342171901910779, "learning_rate": 3.5097529686523474e-06, "loss": 0.0077, "step": 10670 }, { "epoch": 0.07026177114925364, "grad_norm": 0.16256900180200282, "learning_rate": 3.513042334133746e-06, "loss": 0.0059, "step": 10680 }, { "epoch": 0.07032755932448702, "grad_norm": 0.25958490913962556, "learning_rate": 3.5163316996151446e-06, "loss": 0.0066, "step": 10690 }, { "epoch": 0.0703933474997204, "grad_norm": 0.25356660491608624, "learning_rate": 3.519621065096543e-06, "loss": 0.0033, "step": 10700 }, { "epoch": 0.07045913567495378, "grad_norm": 0.15478969347998037, "learning_rate": 3.5229104305779417e-06, "loss": 0.0049, "step": 10710 }, { "epoch": 0.07052492385018717, "grad_norm": 0.33577133760073397, "learning_rate": 3.52619979605934e-06, "loss": 0.005, "step": 10720 }, { "epoch": 0.07059071202542055, "grad_norm": 0.7873639579336978, "learning_rate": 3.529489161540739e-06, "loss": 0.0073, "step": 10730 }, { "epoch": 0.07065650020065393, "grad_norm": 0.21876251638462443, "learning_rate": 3.5327785270221376e-06, "loss": 0.0051, "step": 10740 }, { "epoch": 0.07072228837588732, "grad_norm": 0.3854149445460948, "learning_rate": 3.5360678925035364e-06, "loss": 0.0119, "step": 10750 }, { "epoch": 0.0707880765511207, "grad_norm": 0.22502063999700733, "learning_rate": 3.539357257984935e-06, "loss": 0.005, "step": 10760 }, { "epoch": 0.07085386472635409, "grad_norm": 0.09354097033960712, "learning_rate": 3.5426466234663336e-06, "loss": 0.0063, "step": 10770 }, { "epoch": 0.07091965290158747, "grad_norm": 0.2178002759618579, "learning_rate": 3.5459359889477324e-06, "loss": 0.0071, "step": 10780 }, { "epoch": 0.07098544107682085, "grad_norm": 0.5084527323211647, "learning_rate": 3.549225354429131e-06, "loss": 0.0066, "step": 10790 }, { "epoch": 0.07105122925205423, "grad_norm": 0.24401378495004922, "learning_rate": 3.55251471991053e-06, "loss": 0.0087, "step": 10800 }, { "epoch": 0.07111701742728761, "grad_norm": 0.35081450555425164, "learning_rate": 3.5558040853919283e-06, "loss": 0.0063, "step": 10810 }, { "epoch": 0.07118280560252101, "grad_norm": 0.3154325322659785, "learning_rate": 3.5590934508733267e-06, "loss": 0.0043, "step": 10820 }, { "epoch": 0.07124859377775439, "grad_norm": 0.4559463646544015, "learning_rate": 3.5623828163547254e-06, "loss": 0.006, "step": 10830 }, { "epoch": 0.07131438195298777, "grad_norm": 0.3271916850520497, "learning_rate": 3.565672181836124e-06, "loss": 0.0055, "step": 10840 }, { "epoch": 0.07138017012822115, "grad_norm": 0.16591938294543854, "learning_rate": 3.5689615473175226e-06, "loss": 0.0075, "step": 10850 }, { "epoch": 0.07144595830345454, "grad_norm": 0.23458297895551747, "learning_rate": 3.5722509127989214e-06, "loss": 0.0063, "step": 10860 }, { "epoch": 0.07151174647868792, "grad_norm": 0.40003550197719134, "learning_rate": 3.57554027828032e-06, "loss": 0.0068, "step": 10870 }, { "epoch": 0.0715775346539213, "grad_norm": 0.3093322132102305, "learning_rate": 3.5788296437617185e-06, "loss": 0.0067, "step": 10880 }, { "epoch": 0.07164332282915468, "grad_norm": 0.4080252941046586, "learning_rate": 3.5821190092431173e-06, "loss": 0.0065, "step": 10890 }, { "epoch": 0.07170911100438807, "grad_norm": 0.2818235183469464, "learning_rate": 3.585408374724516e-06, "loss": 0.0078, "step": 10900 }, { "epoch": 0.07177489917962146, "grad_norm": 0.16023235753671178, "learning_rate": 3.588697740205915e-06, "loss": 0.0061, "step": 10910 }, { "epoch": 0.07184068735485484, "grad_norm": 0.20601028820532974, "learning_rate": 3.5919871056873137e-06, "loss": 0.0047, "step": 10920 }, { "epoch": 0.07190647553008822, "grad_norm": 0.3971158714007825, "learning_rate": 3.5952764711687116e-06, "loss": 0.0074, "step": 10930 }, { "epoch": 0.0719722637053216, "grad_norm": 0.38368277098683545, "learning_rate": 3.5985658366501104e-06, "loss": 0.0082, "step": 10940 }, { "epoch": 0.072038051880555, "grad_norm": 0.27842482911534616, "learning_rate": 3.601855202131509e-06, "loss": 0.0097, "step": 10950 }, { "epoch": 0.07210384005578838, "grad_norm": 0.729303733316904, "learning_rate": 3.6051445676129075e-06, "loss": 0.0062, "step": 10960 }, { "epoch": 0.07216962823102176, "grad_norm": 0.377928104475396, "learning_rate": 3.6084339330943063e-06, "loss": 0.0042, "step": 10970 }, { "epoch": 0.07223541640625514, "grad_norm": 0.3890864452964716, "learning_rate": 3.611723298575705e-06, "loss": 0.0053, "step": 10980 }, { "epoch": 0.07230120458148852, "grad_norm": 0.2911997061418167, "learning_rate": 3.615012664057104e-06, "loss": 0.0107, "step": 10990 }, { "epoch": 0.07236699275672191, "grad_norm": 0.37211844434869445, "learning_rate": 3.6183020295385023e-06, "loss": 0.0069, "step": 11000 }, { "epoch": 0.07243278093195529, "grad_norm": 0.3179647307318908, "learning_rate": 3.621591395019901e-06, "loss": 0.0047, "step": 11010 }, { "epoch": 0.07249856910718867, "grad_norm": 0.5203687672560141, "learning_rate": 3.6248807605013e-06, "loss": 0.0054, "step": 11020 }, { "epoch": 0.07256435728242205, "grad_norm": 0.44245335537850256, "learning_rate": 3.6281701259826986e-06, "loss": 0.006, "step": 11030 }, { "epoch": 0.07263014545765543, "grad_norm": 0.35658820674705866, "learning_rate": 3.6314594914640965e-06, "loss": 0.0069, "step": 11040 }, { "epoch": 0.07269593363288883, "grad_norm": 0.3219935621789238, "learning_rate": 3.6347488569454953e-06, "loss": 0.0068, "step": 11050 }, { "epoch": 0.07276172180812221, "grad_norm": 0.30953094162556566, "learning_rate": 3.638038222426894e-06, "loss": 0.0072, "step": 11060 }, { "epoch": 0.07282750998335559, "grad_norm": 0.448136201312037, "learning_rate": 3.6413275879082925e-06, "loss": 0.0114, "step": 11070 }, { "epoch": 0.07289329815858897, "grad_norm": 0.2375513964688906, "learning_rate": 3.6446169533896913e-06, "loss": 0.0092, "step": 11080 }, { "epoch": 0.07295908633382236, "grad_norm": 0.23532655493255217, "learning_rate": 3.64790631887109e-06, "loss": 0.006, "step": 11090 }, { "epoch": 0.07302487450905575, "grad_norm": 0.4446226237293141, "learning_rate": 3.651195684352489e-06, "loss": 0.0048, "step": 11100 }, { "epoch": 0.07309066268428913, "grad_norm": 0.28092531269645316, "learning_rate": 3.6544850498338876e-06, "loss": 0.0086, "step": 11110 }, { "epoch": 0.0731564508595225, "grad_norm": 0.506206946612092, "learning_rate": 3.657774415315286e-06, "loss": 0.0064, "step": 11120 }, { "epoch": 0.07322223903475589, "grad_norm": 0.33421998180295553, "learning_rate": 3.6610637807966848e-06, "loss": 0.0069, "step": 11130 }, { "epoch": 0.07328802720998928, "grad_norm": 0.18564594612886182, "learning_rate": 3.6643531462780836e-06, "loss": 0.0041, "step": 11140 }, { "epoch": 0.07335381538522266, "grad_norm": 0.4777671671724861, "learning_rate": 3.6676425117594823e-06, "loss": 0.0067, "step": 11150 }, { "epoch": 0.07341960356045604, "grad_norm": 0.16509736783956597, "learning_rate": 3.6709318772408803e-06, "loss": 0.0082, "step": 11160 }, { "epoch": 0.07348539173568942, "grad_norm": 0.10192661958567963, "learning_rate": 3.674221242722279e-06, "loss": 0.0063, "step": 11170 }, { "epoch": 0.07355117991092282, "grad_norm": 0.3429698431883613, "learning_rate": 3.677510608203678e-06, "loss": 0.0039, "step": 11180 }, { "epoch": 0.0736169680861562, "grad_norm": 0.5768988128150451, "learning_rate": 3.680799973685076e-06, "loss": 0.0068, "step": 11190 }, { "epoch": 0.07368275626138958, "grad_norm": 0.6489940720928843, "learning_rate": 3.684089339166475e-06, "loss": 0.0047, "step": 11200 }, { "epoch": 0.07374854443662296, "grad_norm": 0.22953466478162943, "learning_rate": 3.6873787046478738e-06, "loss": 0.0055, "step": 11210 }, { "epoch": 0.07381433261185634, "grad_norm": 0.40321563458543846, "learning_rate": 3.6906680701292726e-06, "loss": 0.0068, "step": 11220 }, { "epoch": 0.07388012078708973, "grad_norm": 0.34586265654768095, "learning_rate": 3.693957435610671e-06, "loss": 0.0064, "step": 11230 }, { "epoch": 0.07394590896232311, "grad_norm": 0.7175266267120192, "learning_rate": 3.6972468010920697e-06, "loss": 0.0065, "step": 11240 }, { "epoch": 0.0740116971375565, "grad_norm": 0.27053748982495257, "learning_rate": 3.7005361665734685e-06, "loss": 0.0066, "step": 11250 }, { "epoch": 0.07407748531278988, "grad_norm": 0.30727448739906366, "learning_rate": 3.7038255320548673e-06, "loss": 0.0048, "step": 11260 }, { "epoch": 0.07414327348802326, "grad_norm": 0.1559997183016638, "learning_rate": 3.7071148975362652e-06, "loss": 0.0037, "step": 11270 }, { "epoch": 0.07420906166325665, "grad_norm": 0.34599369622889276, "learning_rate": 3.710404263017664e-06, "loss": 0.0056, "step": 11280 }, { "epoch": 0.07427484983849003, "grad_norm": 0.7843344467088212, "learning_rate": 3.713693628499063e-06, "loss": 0.009, "step": 11290 }, { "epoch": 0.07434063801372341, "grad_norm": 0.49485613635320613, "learning_rate": 3.716982993980461e-06, "loss": 0.0078, "step": 11300 }, { "epoch": 0.07440642618895679, "grad_norm": 0.48787875198927055, "learning_rate": 3.72027235946186e-06, "loss": 0.0053, "step": 11310 }, { "epoch": 0.07447221436419019, "grad_norm": 0.2664891907528577, "learning_rate": 3.7235617249432587e-06, "loss": 0.0061, "step": 11320 }, { "epoch": 0.07453800253942357, "grad_norm": 0.19168907299870971, "learning_rate": 3.7268510904246575e-06, "loss": 0.0033, "step": 11330 }, { "epoch": 0.07460379071465695, "grad_norm": 0.7804127398876561, "learning_rate": 3.7301404559060563e-06, "loss": 0.0083, "step": 11340 }, { "epoch": 0.07466957888989033, "grad_norm": 0.203632029379606, "learning_rate": 3.7334298213874547e-06, "loss": 0.0067, "step": 11350 }, { "epoch": 0.07473536706512371, "grad_norm": 0.17674626601871168, "learning_rate": 3.7367191868688534e-06, "loss": 0.0054, "step": 11360 }, { "epoch": 0.0748011552403571, "grad_norm": 0.47171137392376983, "learning_rate": 3.7400085523502522e-06, "loss": 0.007, "step": 11370 }, { "epoch": 0.07486694341559048, "grad_norm": 0.501659568699793, "learning_rate": 3.74329791783165e-06, "loss": 0.0042, "step": 11380 }, { "epoch": 0.07493273159082386, "grad_norm": 0.050671290568197365, "learning_rate": 3.746587283313049e-06, "loss": 0.0063, "step": 11390 }, { "epoch": 0.07499851976605725, "grad_norm": 0.37883656875947813, "learning_rate": 3.7498766487944477e-06, "loss": 0.0069, "step": 11400 }, { "epoch": 0.07506430794129063, "grad_norm": 0.7041739053173427, "learning_rate": 3.7531660142758465e-06, "loss": 0.0069, "step": 11410 }, { "epoch": 0.07513009611652402, "grad_norm": 0.5294255848254364, "learning_rate": 3.756455379757245e-06, "loss": 0.0068, "step": 11420 }, { "epoch": 0.0751958842917574, "grad_norm": 0.2649673723596543, "learning_rate": 3.7597447452386437e-06, "loss": 0.0061, "step": 11430 }, { "epoch": 0.07526167246699078, "grad_norm": 0.354496823455186, "learning_rate": 3.7630341107200425e-06, "loss": 0.0079, "step": 11440 }, { "epoch": 0.07532746064222416, "grad_norm": 0.09939719024226691, "learning_rate": 3.7663234762014412e-06, "loss": 0.0056, "step": 11450 }, { "epoch": 0.07539324881745756, "grad_norm": 0.3739147639771692, "learning_rate": 3.7696128416828396e-06, "loss": 0.0064, "step": 11460 }, { "epoch": 0.07545903699269094, "grad_norm": 0.4982298034643475, "learning_rate": 3.7729022071642384e-06, "loss": 0.0069, "step": 11470 }, { "epoch": 0.07552482516792432, "grad_norm": 0.2616098782733786, "learning_rate": 3.776191572645637e-06, "loss": 0.0042, "step": 11480 }, { "epoch": 0.0755906133431577, "grad_norm": 0.18559307279053375, "learning_rate": 3.779480938127036e-06, "loss": 0.0063, "step": 11490 }, { "epoch": 0.07565640151839108, "grad_norm": 0.1212613367358261, "learning_rate": 3.782770303608434e-06, "loss": 0.0065, "step": 11500 }, { "epoch": 0.07572218969362447, "grad_norm": 0.23171654630350713, "learning_rate": 3.7860596690898327e-06, "loss": 0.0031, "step": 11510 }, { "epoch": 0.07578797786885785, "grad_norm": 0.4910958504799755, "learning_rate": 3.7893490345712315e-06, "loss": 0.0053, "step": 11520 }, { "epoch": 0.07585376604409123, "grad_norm": 0.5769990352091583, "learning_rate": 3.7926384000526303e-06, "loss": 0.0067, "step": 11530 }, { "epoch": 0.07591955421932461, "grad_norm": 0.5308336255263961, "learning_rate": 3.7959277655340286e-06, "loss": 0.0056, "step": 11540 }, { "epoch": 0.07598534239455801, "grad_norm": 0.18342423279630712, "learning_rate": 3.7992171310154274e-06, "loss": 0.0065, "step": 11550 }, { "epoch": 0.07605113056979139, "grad_norm": 0.3301856954719868, "learning_rate": 3.802506496496826e-06, "loss": 0.0091, "step": 11560 }, { "epoch": 0.07611691874502477, "grad_norm": 0.5365137841300899, "learning_rate": 3.805795861978225e-06, "loss": 0.0031, "step": 11570 }, { "epoch": 0.07618270692025815, "grad_norm": 0.25373820831595967, "learning_rate": 3.8090852274596233e-06, "loss": 0.0047, "step": 11580 }, { "epoch": 0.07624849509549153, "grad_norm": 0.3012177923651033, "learning_rate": 3.812374592941022e-06, "loss": 0.0078, "step": 11590 }, { "epoch": 0.07631428327072493, "grad_norm": 0.43672420685682783, "learning_rate": 3.815663958422421e-06, "loss": 0.0073, "step": 11600 }, { "epoch": 0.0763800714459583, "grad_norm": 0.46619076296117967, "learning_rate": 3.818953323903819e-06, "loss": 0.0053, "step": 11610 }, { "epoch": 0.07644585962119169, "grad_norm": 0.18689375459682347, "learning_rate": 3.822242689385218e-06, "loss": 0.0066, "step": 11620 }, { "epoch": 0.07651164779642507, "grad_norm": 0.26490018899233403, "learning_rate": 3.825532054866616e-06, "loss": 0.0077, "step": 11630 }, { "epoch": 0.07657743597165845, "grad_norm": 0.363977128855603, "learning_rate": 3.828821420348015e-06, "loss": 0.0075, "step": 11640 }, { "epoch": 0.07664322414689184, "grad_norm": 0.4121383187026153, "learning_rate": 3.8321107858294136e-06, "loss": 0.007, "step": 11650 }, { "epoch": 0.07670901232212522, "grad_norm": 0.2575456800553466, "learning_rate": 3.835400151310813e-06, "loss": 0.008, "step": 11660 }, { "epoch": 0.0767748004973586, "grad_norm": 0.3659438747163495, "learning_rate": 3.838689516792211e-06, "loss": 0.0045, "step": 11670 }, { "epoch": 0.07684058867259198, "grad_norm": 0.2638999595222458, "learning_rate": 3.8419788822736095e-06, "loss": 0.0046, "step": 11680 }, { "epoch": 0.07690637684782538, "grad_norm": 0.4168497663585592, "learning_rate": 3.845268247755009e-06, "loss": 0.0051, "step": 11690 }, { "epoch": 0.07697216502305876, "grad_norm": 0.28877644669710484, "learning_rate": 3.848557613236407e-06, "loss": 0.0055, "step": 11700 }, { "epoch": 0.07703795319829214, "grad_norm": 0.2545827302363408, "learning_rate": 3.851846978717806e-06, "loss": 0.0069, "step": 11710 }, { "epoch": 0.07710374137352552, "grad_norm": 0.3918691794170839, "learning_rate": 3.855136344199204e-06, "loss": 0.0068, "step": 11720 }, { "epoch": 0.0771695295487589, "grad_norm": 0.3547901990576959, "learning_rate": 3.858425709680603e-06, "loss": 0.0092, "step": 11730 }, { "epoch": 0.0772353177239923, "grad_norm": 0.5144696618016108, "learning_rate": 3.861715075162001e-06, "loss": 0.0063, "step": 11740 }, { "epoch": 0.07730110589922567, "grad_norm": 0.2597493901113232, "learning_rate": 3.8650044406434e-06, "loss": 0.0046, "step": 11750 }, { "epoch": 0.07736689407445906, "grad_norm": 0.7183288636673911, "learning_rate": 3.868293806124799e-06, "loss": 0.0077, "step": 11760 }, { "epoch": 0.07743268224969244, "grad_norm": 0.4299614445091185, "learning_rate": 3.871583171606197e-06, "loss": 0.0065, "step": 11770 }, { "epoch": 0.07749847042492583, "grad_norm": 0.4415593030093234, "learning_rate": 3.8748725370875965e-06, "loss": 0.0043, "step": 11780 }, { "epoch": 0.07756425860015921, "grad_norm": 0.2602357207336974, "learning_rate": 3.878161902568995e-06, "loss": 0.0063, "step": 11790 }, { "epoch": 0.07763004677539259, "grad_norm": 0.2139060376544239, "learning_rate": 3.881451268050393e-06, "loss": 0.0043, "step": 11800 }, { "epoch": 0.07769583495062597, "grad_norm": 0.051884238498513646, "learning_rate": 3.8847406335317924e-06, "loss": 0.0082, "step": 11810 }, { "epoch": 0.07776162312585935, "grad_norm": 0.2324461172309749, "learning_rate": 3.888029999013191e-06, "loss": 0.0066, "step": 11820 }, { "epoch": 0.07782741130109275, "grad_norm": 0.2648526297371203, "learning_rate": 3.891319364494589e-06, "loss": 0.0036, "step": 11830 }, { "epoch": 0.07789319947632613, "grad_norm": 0.5807495515801714, "learning_rate": 3.8946087299759875e-06, "loss": 0.0087, "step": 11840 }, { "epoch": 0.07795898765155951, "grad_norm": 0.21096655458748634, "learning_rate": 3.897898095457387e-06, "loss": 0.006, "step": 11850 }, { "epoch": 0.07802477582679289, "grad_norm": 0.3152251185424014, "learning_rate": 3.901187460938785e-06, "loss": 0.0072, "step": 11860 }, { "epoch": 0.07809056400202627, "grad_norm": 0.24693321980508834, "learning_rate": 3.9044768264201835e-06, "loss": 0.003, "step": 11870 }, { "epoch": 0.07815635217725966, "grad_norm": 0.583213260779767, "learning_rate": 3.907766191901583e-06, "loss": 0.0092, "step": 11880 }, { "epoch": 0.07822214035249304, "grad_norm": 0.38240215187442644, "learning_rate": 3.911055557382981e-06, "loss": 0.005, "step": 11890 }, { "epoch": 0.07828792852772642, "grad_norm": 0.3220049701244461, "learning_rate": 3.91434492286438e-06, "loss": 0.0065, "step": 11900 }, { "epoch": 0.0783537167029598, "grad_norm": 0.43793147315950415, "learning_rate": 3.917634288345779e-06, "loss": 0.0057, "step": 11910 }, { "epoch": 0.0784195048781932, "grad_norm": 0.265955177469439, "learning_rate": 3.920923653827177e-06, "loss": 0.0049, "step": 11920 }, { "epoch": 0.07848529305342658, "grad_norm": 0.11389641916379133, "learning_rate": 3.924213019308576e-06, "loss": 0.0036, "step": 11930 }, { "epoch": 0.07855108122865996, "grad_norm": 0.33521068232304313, "learning_rate": 3.9275023847899745e-06, "loss": 0.0076, "step": 11940 }, { "epoch": 0.07861686940389334, "grad_norm": 0.24616569147329317, "learning_rate": 3.930791750271373e-06, "loss": 0.0063, "step": 11950 }, { "epoch": 0.07868265757912672, "grad_norm": 0.30712043393545724, "learning_rate": 3.934081115752771e-06, "loss": 0.0049, "step": 11960 }, { "epoch": 0.07874844575436012, "grad_norm": 0.351096678042347, "learning_rate": 3.9373704812341705e-06, "loss": 0.0055, "step": 11970 }, { "epoch": 0.0788142339295935, "grad_norm": 0.23227139175432582, "learning_rate": 3.940659846715569e-06, "loss": 0.0061, "step": 11980 }, { "epoch": 0.07888002210482688, "grad_norm": 0.29114505187218825, "learning_rate": 3.943949212196967e-06, "loss": 0.0054, "step": 11990 }, { "epoch": 0.07894581028006026, "grad_norm": 0.39475391414728667, "learning_rate": 3.947238577678366e-06, "loss": 0.0077, "step": 12000 }, { "epoch": 0.07901159845529365, "grad_norm": 0.2099444726389529, "learning_rate": 3.950527943159765e-06, "loss": 0.0073, "step": 12010 }, { "epoch": 0.07907738663052703, "grad_norm": 0.43487237296524833, "learning_rate": 3.953817308641163e-06, "loss": 0.0058, "step": 12020 }, { "epoch": 0.07914317480576041, "grad_norm": 0.2015584257887045, "learning_rate": 3.957106674122562e-06, "loss": 0.0074, "step": 12030 }, { "epoch": 0.0792089629809938, "grad_norm": 0.12570348042516785, "learning_rate": 3.960396039603961e-06, "loss": 0.0063, "step": 12040 }, { "epoch": 0.07927475115622717, "grad_norm": 0.4323653681029795, "learning_rate": 3.96368540508536e-06, "loss": 0.006, "step": 12050 }, { "epoch": 0.07934053933146057, "grad_norm": 0.3402493795842836, "learning_rate": 3.966974770566757e-06, "loss": 0.0057, "step": 12060 }, { "epoch": 0.07940632750669395, "grad_norm": 0.4735559067294081, "learning_rate": 3.970264136048157e-06, "loss": 0.0065, "step": 12070 }, { "epoch": 0.07947211568192733, "grad_norm": 0.22646435029862275, "learning_rate": 3.973553501529555e-06, "loss": 0.0088, "step": 12080 }, { "epoch": 0.07953790385716071, "grad_norm": 0.25568430102873585, "learning_rate": 3.976842867010954e-06, "loss": 0.0055, "step": 12090 }, { "epoch": 0.07960369203239409, "grad_norm": 0.33455203898237024, "learning_rate": 3.9801322324923526e-06, "loss": 0.0079, "step": 12100 }, { "epoch": 0.07966948020762749, "grad_norm": 0.43818216608752675, "learning_rate": 3.983421597973751e-06, "loss": 0.005, "step": 12110 }, { "epoch": 0.07973526838286087, "grad_norm": 0.4446258628184622, "learning_rate": 3.98671096345515e-06, "loss": 0.0067, "step": 12120 }, { "epoch": 0.07980105655809425, "grad_norm": 0.36142203125043787, "learning_rate": 3.9900003289365485e-06, "loss": 0.0079, "step": 12130 }, { "epoch": 0.07986684473332763, "grad_norm": 0.20058807850721436, "learning_rate": 3.993289694417947e-06, "loss": 0.0073, "step": 12140 }, { "epoch": 0.07993263290856102, "grad_norm": 0.16481875273238186, "learning_rate": 3.996579059899346e-06, "loss": 0.0079, "step": 12150 }, { "epoch": 0.0799984210837944, "grad_norm": 0.23204761416459244, "learning_rate": 3.9998684253807444e-06, "loss": 0.0038, "step": 12160 }, { "epoch": 0.08006420925902778, "grad_norm": 0.10770562439451765, "learning_rate": 4.003157790862144e-06, "loss": 0.0046, "step": 12170 }, { "epoch": 0.08012999743426116, "grad_norm": 0.40855456048689365, "learning_rate": 4.006447156343541e-06, "loss": 0.005, "step": 12180 }, { "epoch": 0.08019578560949454, "grad_norm": 0.12804886032991325, "learning_rate": 4.00973652182494e-06, "loss": 0.0088, "step": 12190 }, { "epoch": 0.08026157378472794, "grad_norm": 0.18080214916246293, "learning_rate": 4.013025887306339e-06, "loss": 0.0052, "step": 12200 }, { "epoch": 0.08032736195996132, "grad_norm": 0.19621653517270413, "learning_rate": 4.016315252787737e-06, "loss": 0.0037, "step": 12210 }, { "epoch": 0.0803931501351947, "grad_norm": 0.3020103803467902, "learning_rate": 4.019604618269136e-06, "loss": 0.0054, "step": 12220 }, { "epoch": 0.08045893831042808, "grad_norm": 0.20892860288013074, "learning_rate": 4.022893983750535e-06, "loss": 0.0047, "step": 12230 }, { "epoch": 0.08052472648566146, "grad_norm": 0.1765082009161109, "learning_rate": 4.026183349231934e-06, "loss": 0.0052, "step": 12240 }, { "epoch": 0.08059051466089485, "grad_norm": 0.18261934328843335, "learning_rate": 4.029472714713332e-06, "loss": 0.0045, "step": 12250 }, { "epoch": 0.08065630283612824, "grad_norm": 0.4030401447888847, "learning_rate": 4.032762080194731e-06, "loss": 0.0067, "step": 12260 }, { "epoch": 0.08072209101136162, "grad_norm": 0.040728545060032005, "learning_rate": 4.03605144567613e-06, "loss": 0.0048, "step": 12270 }, { "epoch": 0.080787879186595, "grad_norm": 0.5242703230564815, "learning_rate": 4.039340811157528e-06, "loss": 0.0063, "step": 12280 }, { "epoch": 0.08085366736182839, "grad_norm": 0.15815088986109183, "learning_rate": 4.0426301766389265e-06, "loss": 0.0079, "step": 12290 }, { "epoch": 0.08091945553706177, "grad_norm": 0.18929814229153927, "learning_rate": 4.045919542120325e-06, "loss": 0.0042, "step": 12300 }, { "epoch": 0.08098524371229515, "grad_norm": 0.3735885283007509, "learning_rate": 4.049208907601724e-06, "loss": 0.0084, "step": 12310 }, { "epoch": 0.08105103188752853, "grad_norm": 0.2648855963511553, "learning_rate": 4.0524982730831225e-06, "loss": 0.0055, "step": 12320 }, { "epoch": 0.08111682006276191, "grad_norm": 0.35813537779172017, "learning_rate": 4.055787638564521e-06, "loss": 0.0067, "step": 12330 }, { "epoch": 0.08118260823799531, "grad_norm": 0.3455425609503861, "learning_rate": 4.05907700404592e-06, "loss": 0.0062, "step": 12340 }, { "epoch": 0.08124839641322869, "grad_norm": 0.6264276486387995, "learning_rate": 4.062366369527318e-06, "loss": 0.0086, "step": 12350 }, { "epoch": 0.08131418458846207, "grad_norm": 0.26107229276907634, "learning_rate": 4.065655735008718e-06, "loss": 0.0068, "step": 12360 }, { "epoch": 0.08137997276369545, "grad_norm": 0.23712020819813331, "learning_rate": 4.068945100490116e-06, "loss": 0.0069, "step": 12370 }, { "epoch": 0.08144576093892884, "grad_norm": 0.7539926931636632, "learning_rate": 4.072234465971514e-06, "loss": 0.0082, "step": 12380 }, { "epoch": 0.08151154911416222, "grad_norm": 0.22474870523103335, "learning_rate": 4.0755238314529135e-06, "loss": 0.0049, "step": 12390 }, { "epoch": 0.0815773372893956, "grad_norm": 0.3245779997787262, "learning_rate": 4.078813196934311e-06, "loss": 0.0044, "step": 12400 }, { "epoch": 0.08164312546462899, "grad_norm": 0.25954889914585155, "learning_rate": 4.08210256241571e-06, "loss": 0.0056, "step": 12410 }, { "epoch": 0.08170891363986237, "grad_norm": 0.3543715511466448, "learning_rate": 4.085391927897109e-06, "loss": 0.0058, "step": 12420 }, { "epoch": 0.08177470181509576, "grad_norm": 0.2056764897959068, "learning_rate": 4.088681293378508e-06, "loss": 0.0043, "step": 12430 }, { "epoch": 0.08184048999032914, "grad_norm": 0.24531316262790923, "learning_rate": 4.091970658859906e-06, "loss": 0.0047, "step": 12440 }, { "epoch": 0.08190627816556252, "grad_norm": 0.18848473736436974, "learning_rate": 4.0952600243413045e-06, "loss": 0.0039, "step": 12450 }, { "epoch": 0.0819720663407959, "grad_norm": 0.2191545507841522, "learning_rate": 4.098549389822704e-06, "loss": 0.0049, "step": 12460 }, { "epoch": 0.08203785451602928, "grad_norm": 0.2500200707142282, "learning_rate": 4.101838755304102e-06, "loss": 0.0068, "step": 12470 }, { "epoch": 0.08210364269126268, "grad_norm": 0.37121696523014136, "learning_rate": 4.105128120785501e-06, "loss": 0.0062, "step": 12480 }, { "epoch": 0.08216943086649606, "grad_norm": 0.4335267423793037, "learning_rate": 4.1084174862669e-06, "loss": 0.0051, "step": 12490 }, { "epoch": 0.08223521904172944, "grad_norm": 0.4365604008546156, "learning_rate": 4.111706851748298e-06, "loss": 0.0056, "step": 12500 }, { "epoch": 0.08230100721696282, "grad_norm": 0.14983356201086678, "learning_rate": 4.114996217229697e-06, "loss": 0.0038, "step": 12510 }, { "epoch": 0.08236679539219621, "grad_norm": 0.3146404199470973, "learning_rate": 4.118285582711095e-06, "loss": 0.006, "step": 12520 }, { "epoch": 0.0824325835674296, "grad_norm": 0.32341869242228644, "learning_rate": 4.121574948192494e-06, "loss": 0.0047, "step": 12530 }, { "epoch": 0.08249837174266297, "grad_norm": 0.3130191939863107, "learning_rate": 4.124864313673892e-06, "loss": 0.0051, "step": 12540 }, { "epoch": 0.08256415991789635, "grad_norm": 0.2558094400224964, "learning_rate": 4.1281536791552916e-06, "loss": 0.0048, "step": 12550 }, { "epoch": 0.08262994809312973, "grad_norm": 0.4120767575870982, "learning_rate": 4.13144304463669e-06, "loss": 0.0081, "step": 12560 }, { "epoch": 0.08269573626836313, "grad_norm": 0.35175555579732143, "learning_rate": 4.134732410118088e-06, "loss": 0.0041, "step": 12570 }, { "epoch": 0.08276152444359651, "grad_norm": 0.23309892701870297, "learning_rate": 4.1380217755994875e-06, "loss": 0.0072, "step": 12580 }, { "epoch": 0.08282731261882989, "grad_norm": 0.31170962346064246, "learning_rate": 4.141311141080886e-06, "loss": 0.0046, "step": 12590 }, { "epoch": 0.08289310079406327, "grad_norm": 0.23255253969576473, "learning_rate": 4.144600506562284e-06, "loss": 0.0071, "step": 12600 }, { "epoch": 0.08295888896929667, "grad_norm": 0.4414949425334521, "learning_rate": 4.147889872043683e-06, "loss": 0.0058, "step": 12610 }, { "epoch": 0.08302467714453005, "grad_norm": 0.28461594538299456, "learning_rate": 4.151179237525082e-06, "loss": 0.0067, "step": 12620 }, { "epoch": 0.08309046531976343, "grad_norm": 0.2443632848291531, "learning_rate": 4.15446860300648e-06, "loss": 0.004, "step": 12630 }, { "epoch": 0.0831562534949968, "grad_norm": 0.37489084906403863, "learning_rate": 4.1577579684878785e-06, "loss": 0.0038, "step": 12640 }, { "epoch": 0.08322204167023019, "grad_norm": 0.4169250895259878, "learning_rate": 4.161047333969278e-06, "loss": 0.0053, "step": 12650 }, { "epoch": 0.08328782984546358, "grad_norm": 0.35203344753423893, "learning_rate": 4.164336699450676e-06, "loss": 0.0069, "step": 12660 }, { "epoch": 0.08335361802069696, "grad_norm": 0.31979754220857354, "learning_rate": 4.167626064932075e-06, "loss": 0.004, "step": 12670 }, { "epoch": 0.08341940619593034, "grad_norm": 0.12776050222435398, "learning_rate": 4.170915430413474e-06, "loss": 0.0043, "step": 12680 }, { "epoch": 0.08348519437116372, "grad_norm": 0.2173917186011461, "learning_rate": 4.174204795894872e-06, "loss": 0.0065, "step": 12690 }, { "epoch": 0.0835509825463971, "grad_norm": 0.46269772409626786, "learning_rate": 4.177494161376271e-06, "loss": 0.004, "step": 12700 }, { "epoch": 0.0836167707216305, "grad_norm": 0.20639439147552158, "learning_rate": 4.18078352685767e-06, "loss": 0.0073, "step": 12710 }, { "epoch": 0.08368255889686388, "grad_norm": 0.18430813461757817, "learning_rate": 4.184072892339068e-06, "loss": 0.0041, "step": 12720 }, { "epoch": 0.08374834707209726, "grad_norm": 0.32075145583696585, "learning_rate": 4.187362257820467e-06, "loss": 0.0045, "step": 12730 }, { "epoch": 0.08381413524733064, "grad_norm": 0.21845657863506512, "learning_rate": 4.1906516233018655e-06, "loss": 0.005, "step": 12740 }, { "epoch": 0.08387992342256403, "grad_norm": 0.48290873112865634, "learning_rate": 4.193940988783264e-06, "loss": 0.0083, "step": 12750 }, { "epoch": 0.08394571159779741, "grad_norm": 0.48474808500748556, "learning_rate": 4.197230354264662e-06, "loss": 0.0091, "step": 12760 }, { "epoch": 0.0840114997730308, "grad_norm": 0.11368052933677096, "learning_rate": 4.2005197197460614e-06, "loss": 0.0054, "step": 12770 }, { "epoch": 0.08407728794826418, "grad_norm": 0.2263146159672875, "learning_rate": 4.20380908522746e-06, "loss": 0.0064, "step": 12780 }, { "epoch": 0.08414307612349756, "grad_norm": 0.20460921867054438, "learning_rate": 4.207098450708858e-06, "loss": 0.0067, "step": 12790 }, { "epoch": 0.08420886429873095, "grad_norm": 0.29235439872398566, "learning_rate": 4.210387816190257e-06, "loss": 0.0049, "step": 12800 }, { "epoch": 0.08427465247396433, "grad_norm": 0.12621512932087187, "learning_rate": 4.213677181671656e-06, "loss": 0.0038, "step": 12810 }, { "epoch": 0.08434044064919771, "grad_norm": 0.41326111153732, "learning_rate": 4.216966547153055e-06, "loss": 0.0052, "step": 12820 }, { "epoch": 0.08440622882443109, "grad_norm": 0.36569926266118924, "learning_rate": 4.220255912634453e-06, "loss": 0.0075, "step": 12830 }, { "epoch": 0.08447201699966449, "grad_norm": 0.28445136126341813, "learning_rate": 4.223545278115852e-06, "loss": 0.0037, "step": 12840 }, { "epoch": 0.08453780517489787, "grad_norm": 0.561977029104122, "learning_rate": 4.226834643597251e-06, "loss": 0.0072, "step": 12850 }, { "epoch": 0.08460359335013125, "grad_norm": 0.16236362247206224, "learning_rate": 4.230124009078649e-06, "loss": 0.0052, "step": 12860 }, { "epoch": 0.08466938152536463, "grad_norm": 0.506377680540652, "learning_rate": 4.233413374560048e-06, "loss": 0.0071, "step": 12870 }, { "epoch": 0.08473516970059801, "grad_norm": 0.17913948579411187, "learning_rate": 4.236702740041446e-06, "loss": 0.0066, "step": 12880 }, { "epoch": 0.0848009578758314, "grad_norm": 0.26090031502095906, "learning_rate": 4.239992105522845e-06, "loss": 0.0066, "step": 12890 }, { "epoch": 0.08486674605106478, "grad_norm": 0.1506214086815846, "learning_rate": 4.2432814710042435e-06, "loss": 0.0077, "step": 12900 }, { "epoch": 0.08493253422629816, "grad_norm": 0.18901287579177453, "learning_rate": 4.246570836485642e-06, "loss": 0.0062, "step": 12910 }, { "epoch": 0.08499832240153155, "grad_norm": 0.39555083845864786, "learning_rate": 4.249860201967041e-06, "loss": 0.0046, "step": 12920 }, { "epoch": 0.08506411057676493, "grad_norm": 0.23060590128328595, "learning_rate": 4.2531495674484395e-06, "loss": 0.0051, "step": 12930 }, { "epoch": 0.08512989875199832, "grad_norm": 0.32353551742365666, "learning_rate": 4.256438932929839e-06, "loss": 0.006, "step": 12940 }, { "epoch": 0.0851956869272317, "grad_norm": 0.41814828016146494, "learning_rate": 4.259728298411237e-06, "loss": 0.0042, "step": 12950 }, { "epoch": 0.08526147510246508, "grad_norm": 0.2859940864368895, "learning_rate": 4.263017663892635e-06, "loss": 0.0058, "step": 12960 }, { "epoch": 0.08532726327769846, "grad_norm": 0.17184393545522209, "learning_rate": 4.266307029374034e-06, "loss": 0.0041, "step": 12970 }, { "epoch": 0.08539305145293186, "grad_norm": 0.28404408069339726, "learning_rate": 4.269596394855432e-06, "loss": 0.0056, "step": 12980 }, { "epoch": 0.08545883962816524, "grad_norm": 0.15976617227663845, "learning_rate": 4.272885760336831e-06, "loss": 0.0049, "step": 12990 }, { "epoch": 0.08552462780339862, "grad_norm": 0.2501702869896074, "learning_rate": 4.27617512581823e-06, "loss": 0.0044, "step": 13000 }, { "epoch": 0.085590415978632, "grad_norm": 0.22705376235442123, "learning_rate": 4.279464491299629e-06, "loss": 0.0044, "step": 13010 }, { "epoch": 0.08565620415386538, "grad_norm": 0.5973033263285852, "learning_rate": 4.282753856781027e-06, "loss": 0.0073, "step": 13020 }, { "epoch": 0.08572199232909877, "grad_norm": 0.320889745231086, "learning_rate": 4.286043222262426e-06, "loss": 0.0046, "step": 13030 }, { "epoch": 0.08578778050433215, "grad_norm": 0.3869792134408571, "learning_rate": 4.289332587743825e-06, "loss": 0.0063, "step": 13040 }, { "epoch": 0.08585356867956553, "grad_norm": 0.315543027625164, "learning_rate": 4.292621953225223e-06, "loss": 0.0048, "step": 13050 }, { "epoch": 0.08591935685479891, "grad_norm": 0.18260515879919414, "learning_rate": 4.295911318706622e-06, "loss": 0.0076, "step": 13060 }, { "epoch": 0.08598514503003231, "grad_norm": 0.22961135480999065, "learning_rate": 4.299200684188021e-06, "loss": 0.0058, "step": 13070 }, { "epoch": 0.08605093320526569, "grad_norm": 0.22095292509761683, "learning_rate": 4.302490049669419e-06, "loss": 0.005, "step": 13080 }, { "epoch": 0.08611672138049907, "grad_norm": 0.2396724744350075, "learning_rate": 4.3057794151508175e-06, "loss": 0.0063, "step": 13090 }, { "epoch": 0.08618250955573245, "grad_norm": 0.16125860383056115, "learning_rate": 4.309068780632216e-06, "loss": 0.0062, "step": 13100 }, { "epoch": 0.08624829773096583, "grad_norm": 0.13223148848337582, "learning_rate": 4.312358146113615e-06, "loss": 0.0034, "step": 13110 }, { "epoch": 0.08631408590619923, "grad_norm": 0.09677623674094814, "learning_rate": 4.3156475115950134e-06, "loss": 0.0063, "step": 13120 }, { "epoch": 0.0863798740814326, "grad_norm": 0.31467296102659503, "learning_rate": 4.318936877076413e-06, "loss": 0.0086, "step": 13130 }, { "epoch": 0.08644566225666599, "grad_norm": 0.21232459254941075, "learning_rate": 4.322226242557811e-06, "loss": 0.0077, "step": 13140 }, { "epoch": 0.08651145043189937, "grad_norm": 0.24113714402661787, "learning_rate": 4.325515608039209e-06, "loss": 0.0055, "step": 13150 }, { "epoch": 0.08657723860713275, "grad_norm": 0.4104253732811463, "learning_rate": 4.3288049735206086e-06, "loss": 0.007, "step": 13160 }, { "epoch": 0.08664302678236614, "grad_norm": 0.18806190421106733, "learning_rate": 4.332094339002007e-06, "loss": 0.0066, "step": 13170 }, { "epoch": 0.08670881495759952, "grad_norm": 0.3247813473419121, "learning_rate": 4.335383704483406e-06, "loss": 0.006, "step": 13180 }, { "epoch": 0.0867746031328329, "grad_norm": 0.19375183479171054, "learning_rate": 4.3386730699648045e-06, "loss": 0.0038, "step": 13190 }, { "epoch": 0.08684039130806628, "grad_norm": 0.17758319429269162, "learning_rate": 4.341962435446203e-06, "loss": 0.0055, "step": 13200 }, { "epoch": 0.08690617948329968, "grad_norm": 0.10865014680539155, "learning_rate": 4.345251800927601e-06, "loss": 0.0047, "step": 13210 }, { "epoch": 0.08697196765853306, "grad_norm": 0.3250794348434082, "learning_rate": 4.348541166409e-06, "loss": 0.0048, "step": 13220 }, { "epoch": 0.08703775583376644, "grad_norm": 0.3153548807302101, "learning_rate": 4.351830531890399e-06, "loss": 0.0051, "step": 13230 }, { "epoch": 0.08710354400899982, "grad_norm": 0.3912370807573049, "learning_rate": 4.355119897371797e-06, "loss": 0.0047, "step": 13240 }, { "epoch": 0.0871693321842332, "grad_norm": 0.2450057096016672, "learning_rate": 4.358409262853196e-06, "loss": 0.0045, "step": 13250 }, { "epoch": 0.0872351203594666, "grad_norm": 0.23701980212830995, "learning_rate": 4.361698628334595e-06, "loss": 0.0042, "step": 13260 }, { "epoch": 0.08730090853469998, "grad_norm": 0.22552583211823884, "learning_rate": 4.364987993815993e-06, "loss": 0.0037, "step": 13270 }, { "epoch": 0.08736669670993336, "grad_norm": 0.40933066258445233, "learning_rate": 4.368277359297392e-06, "loss": 0.0074, "step": 13280 }, { "epoch": 0.08743248488516674, "grad_norm": 0.33774401175691904, "learning_rate": 4.371566724778791e-06, "loss": 0.0051, "step": 13290 }, { "epoch": 0.08749827306040012, "grad_norm": 0.22349786828082865, "learning_rate": 4.374856090260189e-06, "loss": 0.0075, "step": 13300 }, { "epoch": 0.08756406123563351, "grad_norm": 0.16948060103541734, "learning_rate": 4.378145455741587e-06, "loss": 0.0076, "step": 13310 }, { "epoch": 0.08762984941086689, "grad_norm": 0.1321410040543576, "learning_rate": 4.381434821222987e-06, "loss": 0.0089, "step": 13320 }, { "epoch": 0.08769563758610027, "grad_norm": 0.18168297242802425, "learning_rate": 4.384724186704385e-06, "loss": 0.0074, "step": 13330 }, { "epoch": 0.08776142576133365, "grad_norm": 0.2324412005217177, "learning_rate": 4.388013552185783e-06, "loss": 0.0054, "step": 13340 }, { "epoch": 0.08782721393656705, "grad_norm": 0.060491039603242605, "learning_rate": 4.3913029176671825e-06, "loss": 0.004, "step": 13350 }, { "epoch": 0.08789300211180043, "grad_norm": 0.2156052414661175, "learning_rate": 4.394592283148581e-06, "loss": 0.0044, "step": 13360 }, { "epoch": 0.08795879028703381, "grad_norm": 0.21760457767074418, "learning_rate": 4.39788164862998e-06, "loss": 0.0051, "step": 13370 }, { "epoch": 0.08802457846226719, "grad_norm": 0.31820861750342294, "learning_rate": 4.4011710141113785e-06, "loss": 0.0068, "step": 13380 }, { "epoch": 0.08809036663750057, "grad_norm": 0.3549004282452916, "learning_rate": 4.404460379592777e-06, "loss": 0.0049, "step": 13390 }, { "epoch": 0.08815615481273396, "grad_norm": 0.27218739155952204, "learning_rate": 4.407749745074176e-06, "loss": 0.0062, "step": 13400 }, { "epoch": 0.08822194298796734, "grad_norm": 0.4644249221411784, "learning_rate": 4.411039110555574e-06, "loss": 0.0044, "step": 13410 }, { "epoch": 0.08828773116320073, "grad_norm": 0.2116061064063531, "learning_rate": 4.414328476036973e-06, "loss": 0.0041, "step": 13420 }, { "epoch": 0.0883535193384341, "grad_norm": 0.34953826817117617, "learning_rate": 4.417617841518371e-06, "loss": 0.0066, "step": 13430 }, { "epoch": 0.0884193075136675, "grad_norm": 0.27370356706301807, "learning_rate": 4.42090720699977e-06, "loss": 0.0053, "step": 13440 }, { "epoch": 0.08848509568890088, "grad_norm": 0.07453591430141558, "learning_rate": 4.424196572481169e-06, "loss": 0.0052, "step": 13450 }, { "epoch": 0.08855088386413426, "grad_norm": 0.11199131934849856, "learning_rate": 4.427485937962567e-06, "loss": 0.0032, "step": 13460 }, { "epoch": 0.08861667203936764, "grad_norm": 0.270339779042228, "learning_rate": 4.430775303443966e-06, "loss": 0.0045, "step": 13470 }, { "epoch": 0.08868246021460102, "grad_norm": 0.23225671237336343, "learning_rate": 4.434064668925365e-06, "loss": 0.0079, "step": 13480 }, { "epoch": 0.08874824838983442, "grad_norm": 0.29902843949206365, "learning_rate": 4.437354034406763e-06, "loss": 0.0038, "step": 13490 }, { "epoch": 0.0888140365650678, "grad_norm": 0.23888730295730742, "learning_rate": 4.440643399888162e-06, "loss": 0.0043, "step": 13500 }, { "epoch": 0.08887982474030118, "grad_norm": 0.1969906115595944, "learning_rate": 4.4439327653695606e-06, "loss": 0.006, "step": 13510 }, { "epoch": 0.08894561291553456, "grad_norm": 0.22773494265792169, "learning_rate": 4.44722213085096e-06, "loss": 0.0049, "step": 13520 }, { "epoch": 0.08901140109076794, "grad_norm": 0.042331216224036065, "learning_rate": 4.450511496332358e-06, "loss": 0.006, "step": 13530 }, { "epoch": 0.08907718926600133, "grad_norm": 0.19527546389656003, "learning_rate": 4.4538008618137565e-06, "loss": 0.0056, "step": 13540 }, { "epoch": 0.08914297744123471, "grad_norm": 0.26159298204491194, "learning_rate": 4.457090227295155e-06, "loss": 0.0051, "step": 13550 }, { "epoch": 0.0892087656164681, "grad_norm": 0.19011707824587878, "learning_rate": 4.460379592776553e-06, "loss": 0.0067, "step": 13560 }, { "epoch": 0.08927455379170147, "grad_norm": 0.3420898390602531, "learning_rate": 4.4636689582579524e-06, "loss": 0.0052, "step": 13570 }, { "epoch": 0.08934034196693487, "grad_norm": 0.09976201930379928, "learning_rate": 4.466958323739351e-06, "loss": 0.0046, "step": 13580 }, { "epoch": 0.08940613014216825, "grad_norm": 0.5773872432037177, "learning_rate": 4.47024768922075e-06, "loss": 0.0081, "step": 13590 }, { "epoch": 0.08947191831740163, "grad_norm": 0.2122943341803588, "learning_rate": 4.473537054702148e-06, "loss": 0.0038, "step": 13600 }, { "epoch": 0.08953770649263501, "grad_norm": 0.0941513232299447, "learning_rate": 4.476826420183547e-06, "loss": 0.0043, "step": 13610 }, { "epoch": 0.08960349466786839, "grad_norm": 0.07871449727958951, "learning_rate": 4.480115785664946e-06, "loss": 0.0038, "step": 13620 }, { "epoch": 0.08966928284310179, "grad_norm": 0.1358287994101901, "learning_rate": 4.483405151146344e-06, "loss": 0.0037, "step": 13630 }, { "epoch": 0.08973507101833517, "grad_norm": 0.19861383445207845, "learning_rate": 4.4866945166277435e-06, "loss": 0.0085, "step": 13640 }, { "epoch": 0.08980085919356855, "grad_norm": 0.26322951458315547, "learning_rate": 4.489983882109141e-06, "loss": 0.0045, "step": 13650 }, { "epoch": 0.08986664736880193, "grad_norm": 0.3739763709843301, "learning_rate": 4.49327324759054e-06, "loss": 0.0071, "step": 13660 }, { "epoch": 0.08993243554403532, "grad_norm": 0.23717587138846996, "learning_rate": 4.496562613071939e-06, "loss": 0.007, "step": 13670 }, { "epoch": 0.0899982237192687, "grad_norm": 0.16882983822160041, "learning_rate": 4.499851978553337e-06, "loss": 0.0057, "step": 13680 }, { "epoch": 0.09006401189450208, "grad_norm": 0.3930277051688773, "learning_rate": 4.503141344034736e-06, "loss": 0.0053, "step": 13690 }, { "epoch": 0.09012980006973546, "grad_norm": 0.30006235797421577, "learning_rate": 4.5064307095161345e-06, "loss": 0.0089, "step": 13700 }, { "epoch": 0.09019558824496884, "grad_norm": 0.37733114078154834, "learning_rate": 4.509720074997534e-06, "loss": 0.0051, "step": 13710 }, { "epoch": 0.09026137642020224, "grad_norm": 0.5042728441686188, "learning_rate": 4.513009440478932e-06, "loss": 0.0066, "step": 13720 }, { "epoch": 0.09032716459543562, "grad_norm": 0.34879278344318826, "learning_rate": 4.5162988059603305e-06, "loss": 0.0058, "step": 13730 }, { "epoch": 0.090392952770669, "grad_norm": 0.2930238507434115, "learning_rate": 4.51958817144173e-06, "loss": 0.0072, "step": 13740 }, { "epoch": 0.09045874094590238, "grad_norm": 0.0975300626017019, "learning_rate": 4.522877536923128e-06, "loss": 0.0036, "step": 13750 }, { "epoch": 0.09052452912113576, "grad_norm": 0.23400772278012874, "learning_rate": 4.526166902404526e-06, "loss": 0.0055, "step": 13760 }, { "epoch": 0.09059031729636915, "grad_norm": 0.1298537874619224, "learning_rate": 4.529456267885925e-06, "loss": 0.0051, "step": 13770 }, { "epoch": 0.09065610547160254, "grad_norm": 0.1869581417839541, "learning_rate": 4.532745633367324e-06, "loss": 0.008, "step": 13780 }, { "epoch": 0.09072189364683592, "grad_norm": 0.11854715312899547, "learning_rate": 4.536034998848722e-06, "loss": 0.0081, "step": 13790 }, { "epoch": 0.0907876818220693, "grad_norm": 0.1443775992222615, "learning_rate": 4.539324364330121e-06, "loss": 0.0046, "step": 13800 }, { "epoch": 0.09085346999730269, "grad_norm": 0.25751916218510507, "learning_rate": 4.54261372981152e-06, "loss": 0.0061, "step": 13810 }, { "epoch": 0.09091925817253607, "grad_norm": 0.09194591482832, "learning_rate": 4.545903095292918e-06, "loss": 0.0031, "step": 13820 }, { "epoch": 0.09098504634776945, "grad_norm": 0.3244937683601753, "learning_rate": 4.5491924607743175e-06, "loss": 0.0057, "step": 13830 }, { "epoch": 0.09105083452300283, "grad_norm": 0.745745001568759, "learning_rate": 4.552481826255716e-06, "loss": 0.0051, "step": 13840 }, { "epoch": 0.09111662269823621, "grad_norm": 0.17264337086589326, "learning_rate": 4.555771191737114e-06, "loss": 0.0062, "step": 13850 }, { "epoch": 0.09118241087346961, "grad_norm": 0.19815861793709283, "learning_rate": 4.559060557218513e-06, "loss": 0.005, "step": 13860 }, { "epoch": 0.09124819904870299, "grad_norm": 0.32488984584448233, "learning_rate": 4.562349922699912e-06, "loss": 0.0053, "step": 13870 }, { "epoch": 0.09131398722393637, "grad_norm": 0.08399548388315824, "learning_rate": 4.56563928818131e-06, "loss": 0.0063, "step": 13880 }, { "epoch": 0.09137977539916975, "grad_norm": 0.3964337688549464, "learning_rate": 4.5689286536627085e-06, "loss": 0.0064, "step": 13890 }, { "epoch": 0.09144556357440314, "grad_norm": 0.2999285421935953, "learning_rate": 4.572218019144108e-06, "loss": 0.0043, "step": 13900 }, { "epoch": 0.09151135174963652, "grad_norm": 0.3204179932386944, "learning_rate": 4.575507384625506e-06, "loss": 0.0037, "step": 13910 }, { "epoch": 0.0915771399248699, "grad_norm": 1.0920039956322147, "learning_rate": 4.578796750106904e-06, "loss": 0.0046, "step": 13920 }, { "epoch": 0.09164292810010329, "grad_norm": 0.22276907859317324, "learning_rate": 4.582086115588304e-06, "loss": 0.0028, "step": 13930 }, { "epoch": 0.09170871627533667, "grad_norm": 0.2823426656997142, "learning_rate": 4.585375481069702e-06, "loss": 0.0052, "step": 13940 }, { "epoch": 0.09177450445057006, "grad_norm": 0.21304141078669017, "learning_rate": 4.588664846551101e-06, "loss": 0.0034, "step": 13950 }, { "epoch": 0.09184029262580344, "grad_norm": 0.31766402247818204, "learning_rate": 4.5919542120324996e-06, "loss": 0.0071, "step": 13960 }, { "epoch": 0.09190608080103682, "grad_norm": 0.2800841059777247, "learning_rate": 4.595243577513898e-06, "loss": 0.0052, "step": 13970 }, { "epoch": 0.0919718689762702, "grad_norm": 0.3417700151622367, "learning_rate": 4.598532942995297e-06, "loss": 0.0048, "step": 13980 }, { "epoch": 0.09203765715150358, "grad_norm": 0.19805821510025318, "learning_rate": 4.601822308476695e-06, "loss": 0.0046, "step": 13990 }, { "epoch": 0.09210344532673698, "grad_norm": 0.1496671486661522, "learning_rate": 4.605111673958094e-06, "loss": 0.0076, "step": 14000 }, { "epoch": 0.09216923350197036, "grad_norm": 0.184238238338536, "learning_rate": 4.608401039439492e-06, "loss": 0.0052, "step": 14010 }, { "epoch": 0.09223502167720374, "grad_norm": 0.39551147030992745, "learning_rate": 4.611690404920891e-06, "loss": 0.0065, "step": 14020 }, { "epoch": 0.09230080985243712, "grad_norm": 0.271863555493224, "learning_rate": 4.61497977040229e-06, "loss": 0.0049, "step": 14030 }, { "epoch": 0.09236659802767051, "grad_norm": 0.24832413970422731, "learning_rate": 4.618269135883688e-06, "loss": 0.0069, "step": 14040 }, { "epoch": 0.0924323862029039, "grad_norm": 0.24496801273834634, "learning_rate": 4.621558501365087e-06, "loss": 0.0047, "step": 14050 }, { "epoch": 0.09249817437813727, "grad_norm": 0.18692835471474675, "learning_rate": 4.624847866846486e-06, "loss": 0.0043, "step": 14060 }, { "epoch": 0.09256396255337065, "grad_norm": 0.231701384868074, "learning_rate": 4.628137232327884e-06, "loss": 0.0048, "step": 14070 }, { "epoch": 0.09262975072860404, "grad_norm": 0.25131990397309256, "learning_rate": 4.631426597809283e-06, "loss": 0.0041, "step": 14080 }, { "epoch": 0.09269553890383743, "grad_norm": 0.4431782132051782, "learning_rate": 4.634715963290682e-06, "loss": 0.0061, "step": 14090 }, { "epoch": 0.09276132707907081, "grad_norm": 0.21907601159475926, "learning_rate": 4.63800532877208e-06, "loss": 0.007, "step": 14100 }, { "epoch": 0.09282711525430419, "grad_norm": 0.17294861933014063, "learning_rate": 4.641294694253478e-06, "loss": 0.0047, "step": 14110 }, { "epoch": 0.09289290342953757, "grad_norm": 0.14533704853880586, "learning_rate": 4.644584059734878e-06, "loss": 0.0059, "step": 14120 }, { "epoch": 0.09295869160477095, "grad_norm": 0.3236956237680638, "learning_rate": 4.647873425216276e-06, "loss": 0.0045, "step": 14130 }, { "epoch": 0.09302447978000435, "grad_norm": 0.4367734015049632, "learning_rate": 4.651162790697675e-06, "loss": 0.0068, "step": 14140 }, { "epoch": 0.09309026795523773, "grad_norm": 0.16931111278815456, "learning_rate": 4.6544521561790735e-06, "loss": 0.0045, "step": 14150 }, { "epoch": 0.09315605613047111, "grad_norm": 0.2680946662754323, "learning_rate": 4.657741521660472e-06, "loss": 0.0042, "step": 14160 }, { "epoch": 0.09322184430570449, "grad_norm": 0.16524722560824318, "learning_rate": 4.661030887141871e-06, "loss": 0.0035, "step": 14170 }, { "epoch": 0.09328763248093788, "grad_norm": 0.3993794384615253, "learning_rate": 4.6643202526232694e-06, "loss": 0.0066, "step": 14180 }, { "epoch": 0.09335342065617126, "grad_norm": 0.10764103884287225, "learning_rate": 4.667609618104668e-06, "loss": 0.0044, "step": 14190 }, { "epoch": 0.09341920883140464, "grad_norm": 0.18464811367684048, "learning_rate": 4.670898983586067e-06, "loss": 0.0029, "step": 14200 }, { "epoch": 0.09348499700663802, "grad_norm": 0.30416724583925403, "learning_rate": 4.674188349067465e-06, "loss": 0.0051, "step": 14210 }, { "epoch": 0.0935507851818714, "grad_norm": 0.25643419627280223, "learning_rate": 4.677477714548864e-06, "loss": 0.0064, "step": 14220 }, { "epoch": 0.0936165733571048, "grad_norm": 0.8334674742613668, "learning_rate": 4.680767080030262e-06, "loss": 0.0061, "step": 14230 }, { "epoch": 0.09368236153233818, "grad_norm": 0.3632584628426144, "learning_rate": 4.684056445511661e-06, "loss": 0.0041, "step": 14240 }, { "epoch": 0.09374814970757156, "grad_norm": 0.44517967096448113, "learning_rate": 4.68734581099306e-06, "loss": 0.0066, "step": 14250 }, { "epoch": 0.09381393788280494, "grad_norm": 0.24678099359308153, "learning_rate": 4.690635176474458e-06, "loss": 0.0067, "step": 14260 }, { "epoch": 0.09387972605803833, "grad_norm": 0.8262282088678358, "learning_rate": 4.693924541955857e-06, "loss": 0.0046, "step": 14270 }, { "epoch": 0.09394551423327172, "grad_norm": 0.48397016384432867, "learning_rate": 4.697213907437256e-06, "loss": 0.0089, "step": 14280 }, { "epoch": 0.0940113024085051, "grad_norm": 0.14126940740815122, "learning_rate": 4.700503272918655e-06, "loss": 0.0047, "step": 14290 }, { "epoch": 0.09407709058373848, "grad_norm": 0.22769166345698183, "learning_rate": 4.703792638400053e-06, "loss": 0.0052, "step": 14300 }, { "epoch": 0.09414287875897186, "grad_norm": 0.18574722162688823, "learning_rate": 4.7070820038814515e-06, "loss": 0.0037, "step": 14310 }, { "epoch": 0.09420866693420525, "grad_norm": 0.1301740745681214, "learning_rate": 4.710371369362851e-06, "loss": 0.0044, "step": 14320 }, { "epoch": 0.09427445510943863, "grad_norm": 0.11334794681182046, "learning_rate": 4.713660734844248e-06, "loss": 0.006, "step": 14330 }, { "epoch": 0.09434024328467201, "grad_norm": 0.5458669819900952, "learning_rate": 4.7169501003256475e-06, "loss": 0.0052, "step": 14340 }, { "epoch": 0.09440603145990539, "grad_norm": 0.2161177336384016, "learning_rate": 4.720239465807046e-06, "loss": 0.0062, "step": 14350 }, { "epoch": 0.09447181963513877, "grad_norm": 0.18671051256107893, "learning_rate": 4.723528831288445e-06, "loss": 0.0046, "step": 14360 }, { "epoch": 0.09453760781037217, "grad_norm": 0.3890521379537207, "learning_rate": 4.726818196769843e-06, "loss": 0.0079, "step": 14370 }, { "epoch": 0.09460339598560555, "grad_norm": 0.31793849892834886, "learning_rate": 4.730107562251242e-06, "loss": 0.0039, "step": 14380 }, { "epoch": 0.09466918416083893, "grad_norm": 0.27981540112651004, "learning_rate": 4.733396927732641e-06, "loss": 0.0089, "step": 14390 }, { "epoch": 0.09473497233607231, "grad_norm": 0.5106285503396356, "learning_rate": 4.736686293214039e-06, "loss": 0.0074, "step": 14400 }, { "epoch": 0.0948007605113057, "grad_norm": 0.4527916249788965, "learning_rate": 4.7399756586954386e-06, "loss": 0.0062, "step": 14410 }, { "epoch": 0.09486654868653908, "grad_norm": 0.4194535133012208, "learning_rate": 4.743265024176837e-06, "loss": 0.0056, "step": 14420 }, { "epoch": 0.09493233686177246, "grad_norm": 0.3085658905234561, "learning_rate": 4.746554389658235e-06, "loss": 0.0081, "step": 14430 }, { "epoch": 0.09499812503700585, "grad_norm": 0.31571294106789843, "learning_rate": 4.749843755139634e-06, "loss": 0.0048, "step": 14440 }, { "epoch": 0.09506391321223923, "grad_norm": 0.5201764900023417, "learning_rate": 4.753133120621032e-06, "loss": 0.0065, "step": 14450 }, { "epoch": 0.09512970138747262, "grad_norm": 0.2717138004758864, "learning_rate": 4.756422486102431e-06, "loss": 0.0048, "step": 14460 }, { "epoch": 0.095195489562706, "grad_norm": 0.1938786874670604, "learning_rate": 4.7597118515838296e-06, "loss": 0.0047, "step": 14470 }, { "epoch": 0.09526127773793938, "grad_norm": 0.3354572967796012, "learning_rate": 4.763001217065229e-06, "loss": 0.0075, "step": 14480 }, { "epoch": 0.09532706591317276, "grad_norm": 0.23837878604401497, "learning_rate": 4.766290582546627e-06, "loss": 0.0072, "step": 14490 }, { "epoch": 0.09539285408840616, "grad_norm": 0.3011572628469501, "learning_rate": 4.7695799480280255e-06, "loss": 0.0057, "step": 14500 }, { "epoch": 0.09545864226363954, "grad_norm": 0.06015448946666019, "learning_rate": 4.772869313509425e-06, "loss": 0.003, "step": 14510 }, { "epoch": 0.09552443043887292, "grad_norm": 0.3108983977162923, "learning_rate": 4.776158678990823e-06, "loss": 0.0043, "step": 14520 }, { "epoch": 0.0955902186141063, "grad_norm": 0.25092613177491097, "learning_rate": 4.779448044472222e-06, "loss": 0.0043, "step": 14530 }, { "epoch": 0.09565600678933968, "grad_norm": 0.41519314895252535, "learning_rate": 4.782737409953621e-06, "loss": 0.0067, "step": 14540 }, { "epoch": 0.09572179496457307, "grad_norm": 0.3390316693166818, "learning_rate": 4.786026775435019e-06, "loss": 0.0056, "step": 14550 }, { "epoch": 0.09578758313980645, "grad_norm": 0.14426780054663166, "learning_rate": 4.789316140916417e-06, "loss": 0.0034, "step": 14560 }, { "epoch": 0.09585337131503983, "grad_norm": 0.2965532796735236, "learning_rate": 4.792605506397816e-06, "loss": 0.0076, "step": 14570 }, { "epoch": 0.09591915949027321, "grad_norm": 0.2612939644621621, "learning_rate": 4.795894871879215e-06, "loss": 0.0057, "step": 14580 }, { "epoch": 0.0959849476655066, "grad_norm": 0.2311701589069524, "learning_rate": 4.799184237360613e-06, "loss": 0.0047, "step": 14590 }, { "epoch": 0.09605073584073999, "grad_norm": 0.21778621929491246, "learning_rate": 4.8024736028420125e-06, "loss": 0.0065, "step": 14600 }, { "epoch": 0.09611652401597337, "grad_norm": 0.17585685651832608, "learning_rate": 4.805762968323411e-06, "loss": 0.0043, "step": 14610 }, { "epoch": 0.09618231219120675, "grad_norm": 0.13430505002819565, "learning_rate": 4.809052333804809e-06, "loss": 0.004, "step": 14620 }, { "epoch": 0.09624810036644013, "grad_norm": 0.20836941935344352, "learning_rate": 4.8123416992862084e-06, "loss": 0.0033, "step": 14630 }, { "epoch": 0.09631388854167353, "grad_norm": 0.4479693606433881, "learning_rate": 4.815631064767607e-06, "loss": 0.0067, "step": 14640 }, { "epoch": 0.0963796767169069, "grad_norm": 0.20263600767579462, "learning_rate": 4.818920430249005e-06, "loss": 0.0034, "step": 14650 }, { "epoch": 0.09644546489214029, "grad_norm": 0.158625752594345, "learning_rate": 4.822209795730404e-06, "loss": 0.005, "step": 14660 }, { "epoch": 0.09651125306737367, "grad_norm": 0.19161656120065385, "learning_rate": 4.825499161211803e-06, "loss": 0.0055, "step": 14670 }, { "epoch": 0.09657704124260705, "grad_norm": 0.2827713788155193, "learning_rate": 4.828788526693201e-06, "loss": 0.0061, "step": 14680 }, { "epoch": 0.09664282941784044, "grad_norm": 0.15314053773496553, "learning_rate": 4.8320778921745995e-06, "loss": 0.007, "step": 14690 }, { "epoch": 0.09670861759307382, "grad_norm": 0.6053444051424994, "learning_rate": 4.835367257655999e-06, "loss": 0.0076, "step": 14700 }, { "epoch": 0.0967744057683072, "grad_norm": 0.36505215047251666, "learning_rate": 4.838656623137397e-06, "loss": 0.0088, "step": 14710 }, { "epoch": 0.09684019394354058, "grad_norm": 0.022430460568829916, "learning_rate": 4.841945988618796e-06, "loss": 0.0033, "step": 14720 }, { "epoch": 0.09690598211877398, "grad_norm": 0.3215848042115074, "learning_rate": 4.845235354100195e-06, "loss": 0.0044, "step": 14730 }, { "epoch": 0.09697177029400736, "grad_norm": 0.16433705974311247, "learning_rate": 4.848524719581593e-06, "loss": 0.0044, "step": 14740 }, { "epoch": 0.09703755846924074, "grad_norm": 0.23776984264156065, "learning_rate": 4.851814085062992e-06, "loss": 0.0027, "step": 14750 }, { "epoch": 0.09710334664447412, "grad_norm": 0.24251189083945648, "learning_rate": 4.8551034505443905e-06, "loss": 0.0053, "step": 14760 }, { "epoch": 0.0971691348197075, "grad_norm": 0.22496093866136865, "learning_rate": 4.858392816025789e-06, "loss": 0.0051, "step": 14770 }, { "epoch": 0.0972349229949409, "grad_norm": 0.2696067273569581, "learning_rate": 4.861682181507187e-06, "loss": 0.0038, "step": 14780 }, { "epoch": 0.09730071117017428, "grad_norm": 0.13491931970623025, "learning_rate": 4.8649715469885865e-06, "loss": 0.005, "step": 14790 }, { "epoch": 0.09736649934540766, "grad_norm": 0.3277083250071473, "learning_rate": 4.868260912469985e-06, "loss": 0.0042, "step": 14800 }, { "epoch": 0.09743228752064104, "grad_norm": 0.08754365989381686, "learning_rate": 4.871550277951383e-06, "loss": 0.0048, "step": 14810 }, { "epoch": 0.09749807569587442, "grad_norm": 0.1989235260162123, "learning_rate": 4.874839643432782e-06, "loss": 0.0046, "step": 14820 }, { "epoch": 0.09756386387110781, "grad_norm": 0.27505522472340777, "learning_rate": 4.878129008914181e-06, "loss": 0.004, "step": 14830 }, { "epoch": 0.09762965204634119, "grad_norm": 0.42875605688350205, "learning_rate": 4.881418374395579e-06, "loss": 0.0056, "step": 14840 }, { "epoch": 0.09769544022157457, "grad_norm": 0.5940203583785887, "learning_rate": 4.884707739876978e-06, "loss": 0.004, "step": 14850 }, { "epoch": 0.09776122839680795, "grad_norm": 0.19082635507159654, "learning_rate": 4.887997105358377e-06, "loss": 0.0071, "step": 14860 }, { "epoch": 0.09782701657204135, "grad_norm": 0.3040916646574753, "learning_rate": 4.891286470839776e-06, "loss": 0.0038, "step": 14870 }, { "epoch": 0.09789280474727473, "grad_norm": 0.2051997820173776, "learning_rate": 4.894575836321174e-06, "loss": 0.0094, "step": 14880 }, { "epoch": 0.09795859292250811, "grad_norm": 0.2484542431422592, "learning_rate": 4.897865201802573e-06, "loss": 0.0079, "step": 14890 }, { "epoch": 0.09802438109774149, "grad_norm": 0.20534007409110505, "learning_rate": 4.901154567283971e-06, "loss": 0.0039, "step": 14900 }, { "epoch": 0.09809016927297487, "grad_norm": 0.29546618127437885, "learning_rate": 4.90444393276537e-06, "loss": 0.0061, "step": 14910 }, { "epoch": 0.09815595744820826, "grad_norm": 0.31023524577697076, "learning_rate": 4.9077332982467686e-06, "loss": 0.0046, "step": 14920 }, { "epoch": 0.09822174562344164, "grad_norm": 0.4334303855379425, "learning_rate": 4.911022663728167e-06, "loss": 0.0032, "step": 14930 }, { "epoch": 0.09828753379867503, "grad_norm": 0.13401861045339955, "learning_rate": 4.914312029209566e-06, "loss": 0.0078, "step": 14940 }, { "epoch": 0.0983533219739084, "grad_norm": 0.18368306840075707, "learning_rate": 4.9176013946909645e-06, "loss": 0.0071, "step": 14950 }, { "epoch": 0.09841911014914179, "grad_norm": 0.38443039247812577, "learning_rate": 4.920890760172363e-06, "loss": 0.0042, "step": 14960 }, { "epoch": 0.09848489832437518, "grad_norm": 0.08435922157959146, "learning_rate": 4.924180125653762e-06, "loss": 0.0041, "step": 14970 }, { "epoch": 0.09855068649960856, "grad_norm": 0.10195026658396894, "learning_rate": 4.9274694911351604e-06, "loss": 0.0069, "step": 14980 }, { "epoch": 0.09861647467484194, "grad_norm": 0.9246455790787478, "learning_rate": 4.93075885661656e-06, "loss": 0.0067, "step": 14990 }, { "epoch": 0.09868226285007532, "grad_norm": 0.4100063409930566, "learning_rate": 4.934048222097958e-06, "loss": 0.0037, "step": 15000 }, { "epoch": 0.09874805102530872, "grad_norm": 0.6781971220916536, "learning_rate": 4.937337587579356e-06, "loss": 0.0041, "step": 15010 }, { "epoch": 0.0988138392005421, "grad_norm": 0.39502897804230946, "learning_rate": 4.940626953060755e-06, "loss": 0.0052, "step": 15020 }, { "epoch": 0.09887962737577548, "grad_norm": 0.1718179492556249, "learning_rate": 4.943916318542153e-06, "loss": 0.0066, "step": 15030 }, { "epoch": 0.09894541555100886, "grad_norm": 0.22730568748587154, "learning_rate": 4.947205684023552e-06, "loss": 0.006, "step": 15040 }, { "epoch": 0.09901120372624224, "grad_norm": 0.2804196042808396, "learning_rate": 4.950495049504951e-06, "loss": 0.0056, "step": 15050 }, { "epoch": 0.09907699190147563, "grad_norm": 0.3696880713220039, "learning_rate": 4.95378441498635e-06, "loss": 0.0062, "step": 15060 }, { "epoch": 0.09914278007670901, "grad_norm": 0.16054202431559655, "learning_rate": 4.957073780467748e-06, "loss": 0.004, "step": 15070 }, { "epoch": 0.0992085682519424, "grad_norm": 0.36685246075426053, "learning_rate": 4.960363145949147e-06, "loss": 0.0034, "step": 15080 }, { "epoch": 0.09927435642717578, "grad_norm": 0.3517171159610628, "learning_rate": 4.963652511430546e-06, "loss": 0.007, "step": 15090 }, { "epoch": 0.09934014460240917, "grad_norm": 0.6485845798608368, "learning_rate": 4.966941876911944e-06, "loss": 0.0072, "step": 15100 }, { "epoch": 0.09940593277764255, "grad_norm": 0.5866483757207437, "learning_rate": 4.970231242393343e-06, "loss": 0.006, "step": 15110 }, { "epoch": 0.09947172095287593, "grad_norm": 0.2970583332351823, "learning_rate": 4.973520607874741e-06, "loss": 0.0065, "step": 15120 }, { "epoch": 0.09953750912810931, "grad_norm": 0.13801359087975745, "learning_rate": 4.97680997335614e-06, "loss": 0.008, "step": 15130 }, { "epoch": 0.09960329730334269, "grad_norm": 0.05968016173908541, "learning_rate": 4.9800993388375385e-06, "loss": 0.0051, "step": 15140 }, { "epoch": 0.09966908547857609, "grad_norm": 0.17711987168533327, "learning_rate": 4.983388704318937e-06, "loss": 0.0056, "step": 15150 }, { "epoch": 0.09973487365380947, "grad_norm": 0.22664331075723396, "learning_rate": 4.986678069800336e-06, "loss": 0.0091, "step": 15160 }, { "epoch": 0.09980066182904285, "grad_norm": 0.44594132366423883, "learning_rate": 4.989967435281734e-06, "loss": 0.0059, "step": 15170 }, { "epoch": 0.09986645000427623, "grad_norm": 0.24625271006276803, "learning_rate": 4.993256800763134e-06, "loss": 0.0052, "step": 15180 }, { "epoch": 0.09993223817950961, "grad_norm": 0.151036433070092, "learning_rate": 4.996546166244532e-06, "loss": 0.004, "step": 15190 }, { "epoch": 0.099998026354743, "grad_norm": 0.17083783124481697, "learning_rate": 4.99983553172593e-06, "loss": 0.0048, "step": 15200 }, { "epoch": 0.10006381452997638, "grad_norm": 0.20042399290560464, "learning_rate": 5.003124897207329e-06, "loss": 0.0048, "step": 15210 }, { "epoch": 0.10012960270520976, "grad_norm": 0.07954434758162127, "learning_rate": 5.006414262688728e-06, "loss": 0.004, "step": 15220 }, { "epoch": 0.10019539088044314, "grad_norm": 0.15476286589781316, "learning_rate": 5.009703628170126e-06, "loss": 0.0051, "step": 15230 }, { "epoch": 0.10026117905567654, "grad_norm": 0.18542782387525225, "learning_rate": 5.0129929936515255e-06, "loss": 0.0045, "step": 15240 }, { "epoch": 0.10032696723090992, "grad_norm": 0.3733920542268285, "learning_rate": 5.016282359132924e-06, "loss": 0.0068, "step": 15250 }, { "epoch": 0.1003927554061433, "grad_norm": 0.12710955682947045, "learning_rate": 5.019571724614323e-06, "loss": 0.0092, "step": 15260 }, { "epoch": 0.10045854358137668, "grad_norm": 0.0887439594578551, "learning_rate": 5.0228610900957205e-06, "loss": 0.0064, "step": 15270 }, { "epoch": 0.10052433175661006, "grad_norm": 0.1490568397988862, "learning_rate": 5.02615045557712e-06, "loss": 0.0037, "step": 15280 }, { "epoch": 0.10059011993184346, "grad_norm": 0.19279292472893947, "learning_rate": 5.029439821058518e-06, "loss": 0.0039, "step": 15290 }, { "epoch": 0.10065590810707684, "grad_norm": 0.4514978115325436, "learning_rate": 5.0327291865399165e-06, "loss": 0.0051, "step": 15300 }, { "epoch": 0.10072169628231022, "grad_norm": 0.08150454336084267, "learning_rate": 5.036018552021316e-06, "loss": 0.0037, "step": 15310 }, { "epoch": 0.1007874844575436, "grad_norm": 0.22973606404500682, "learning_rate": 5.039307917502714e-06, "loss": 0.0039, "step": 15320 }, { "epoch": 0.10085327263277699, "grad_norm": 0.1679526920177313, "learning_rate": 5.042597282984113e-06, "loss": 0.0046, "step": 15330 }, { "epoch": 0.10091906080801037, "grad_norm": 0.08812908574510614, "learning_rate": 5.045886648465511e-06, "loss": 0.0038, "step": 15340 }, { "epoch": 0.10098484898324375, "grad_norm": 0.15441918981238656, "learning_rate": 5.04917601394691e-06, "loss": 0.0036, "step": 15350 }, { "epoch": 0.10105063715847713, "grad_norm": 0.406408254167829, "learning_rate": 5.052465379428308e-06, "loss": 0.0061, "step": 15360 }, { "epoch": 0.10111642533371051, "grad_norm": 0.08426112834465371, "learning_rate": 5.0557547449097076e-06, "loss": 0.0053, "step": 15370 }, { "epoch": 0.10118221350894391, "grad_norm": 0.11885849495985057, "learning_rate": 5.059044110391106e-06, "loss": 0.0048, "step": 15380 }, { "epoch": 0.10124800168417729, "grad_norm": 0.309266749207043, "learning_rate": 5.062333475872505e-06, "loss": 0.0066, "step": 15390 }, { "epoch": 0.10131378985941067, "grad_norm": 0.6199026197538265, "learning_rate": 5.0656228413539035e-06, "loss": 0.0091, "step": 15400 }, { "epoch": 0.10137957803464405, "grad_norm": 0.24234111382045298, "learning_rate": 5.068912206835301e-06, "loss": 0.004, "step": 15410 }, { "epoch": 0.10144536620987743, "grad_norm": 0.19421360294688275, "learning_rate": 5.0722015723167e-06, "loss": 0.0049, "step": 15420 }, { "epoch": 0.10151115438511082, "grad_norm": 0.35153689605429117, "learning_rate": 5.0754909377980986e-06, "loss": 0.0064, "step": 15430 }, { "epoch": 0.1015769425603442, "grad_norm": 0.1317786553007796, "learning_rate": 5.078780303279498e-06, "loss": 0.0038, "step": 15440 }, { "epoch": 0.10164273073557759, "grad_norm": 0.1879255772610586, "learning_rate": 5.082069668760896e-06, "loss": 0.0055, "step": 15450 }, { "epoch": 0.10170851891081097, "grad_norm": 0.18714281121618653, "learning_rate": 5.085359034242295e-06, "loss": 0.0041, "step": 15460 }, { "epoch": 0.10177430708604436, "grad_norm": 0.20673946287473507, "learning_rate": 5.088648399723694e-06, "loss": 0.004, "step": 15470 }, { "epoch": 0.10184009526127774, "grad_norm": 0.3492632651582055, "learning_rate": 5.091937765205093e-06, "loss": 0.0046, "step": 15480 }, { "epoch": 0.10190588343651112, "grad_norm": 0.266122063161904, "learning_rate": 5.095227130686491e-06, "loss": 0.0075, "step": 15490 }, { "epoch": 0.1019716716117445, "grad_norm": 0.11887056454284735, "learning_rate": 5.0985164961678905e-06, "loss": 0.0045, "step": 15500 }, { "epoch": 0.10203745978697788, "grad_norm": 0.2221329729017106, "learning_rate": 5.101805861649288e-06, "loss": 0.0094, "step": 15510 }, { "epoch": 0.10210324796221128, "grad_norm": 0.2167376208954482, "learning_rate": 5.105095227130686e-06, "loss": 0.007, "step": 15520 }, { "epoch": 0.10216903613744466, "grad_norm": 0.2576619269922418, "learning_rate": 5.108384592612086e-06, "loss": 0.0037, "step": 15530 }, { "epoch": 0.10223482431267804, "grad_norm": 0.224804084048572, "learning_rate": 5.111673958093484e-06, "loss": 0.0061, "step": 15540 }, { "epoch": 0.10230061248791142, "grad_norm": 0.2100262092452342, "learning_rate": 5.114963323574883e-06, "loss": 0.0053, "step": 15550 }, { "epoch": 0.10236640066314481, "grad_norm": 0.16122578901411141, "learning_rate": 5.1182526890562815e-06, "loss": 0.0036, "step": 15560 }, { "epoch": 0.1024321888383782, "grad_norm": 0.43217674567946074, "learning_rate": 5.121542054537681e-06, "loss": 0.0047, "step": 15570 }, { "epoch": 0.10249797701361157, "grad_norm": 0.2160156205503559, "learning_rate": 5.124831420019078e-06, "loss": 0.007, "step": 15580 }, { "epoch": 0.10256376518884495, "grad_norm": 0.0787881981297994, "learning_rate": 5.1281207855004774e-06, "loss": 0.0059, "step": 15590 }, { "epoch": 0.10262955336407834, "grad_norm": 0.11430846129111653, "learning_rate": 5.131410150981876e-06, "loss": 0.0074, "step": 15600 }, { "epoch": 0.10269534153931173, "grad_norm": 0.2747246954080976, "learning_rate": 5.134699516463275e-06, "loss": 0.0054, "step": 15610 }, { "epoch": 0.10276112971454511, "grad_norm": 0.21672825081835134, "learning_rate": 5.137988881944673e-06, "loss": 0.0056, "step": 15620 }, { "epoch": 0.10282691788977849, "grad_norm": 0.14229569398888853, "learning_rate": 5.141278247426072e-06, "loss": 0.0039, "step": 15630 }, { "epoch": 0.10289270606501187, "grad_norm": 0.14842574600074962, "learning_rate": 5.144567612907471e-06, "loss": 0.0061, "step": 15640 }, { "epoch": 0.10295849424024525, "grad_norm": 0.06464303624237305, "learning_rate": 5.1478569783888685e-06, "loss": 0.0046, "step": 15650 }, { "epoch": 0.10302428241547865, "grad_norm": 0.27895195130266476, "learning_rate": 5.151146343870268e-06, "loss": 0.0044, "step": 15660 }, { "epoch": 0.10309007059071203, "grad_norm": 0.6807045942679011, "learning_rate": 5.154435709351666e-06, "loss": 0.0062, "step": 15670 }, { "epoch": 0.10315585876594541, "grad_norm": 0.06697900918450198, "learning_rate": 5.157725074833065e-06, "loss": 0.0038, "step": 15680 }, { "epoch": 0.10322164694117879, "grad_norm": 0.06361556604484693, "learning_rate": 5.161014440314464e-06, "loss": 0.0038, "step": 15690 }, { "epoch": 0.10328743511641218, "grad_norm": 0.48066222693655825, "learning_rate": 5.164303805795863e-06, "loss": 0.007, "step": 15700 }, { "epoch": 0.10335322329164556, "grad_norm": 0.11627465995654201, "learning_rate": 5.167593171277261e-06, "loss": 0.0046, "step": 15710 }, { "epoch": 0.10341901146687894, "grad_norm": 0.10391542768794775, "learning_rate": 5.17088253675866e-06, "loss": 0.0097, "step": 15720 }, { "epoch": 0.10348479964211232, "grad_norm": 0.13725426792446524, "learning_rate": 5.174171902240058e-06, "loss": 0.0048, "step": 15730 }, { "epoch": 0.1035505878173457, "grad_norm": 0.18942367502836943, "learning_rate": 5.177461267721456e-06, "loss": 0.0048, "step": 15740 }, { "epoch": 0.1036163759925791, "grad_norm": 0.2781337429400539, "learning_rate": 5.1807506332028555e-06, "loss": 0.0063, "step": 15750 }, { "epoch": 0.10368216416781248, "grad_norm": 0.18979951986708835, "learning_rate": 5.184039998684254e-06, "loss": 0.0056, "step": 15760 }, { "epoch": 0.10374795234304586, "grad_norm": 0.283113682488182, "learning_rate": 5.187329364165653e-06, "loss": 0.0062, "step": 15770 }, { "epoch": 0.10381374051827924, "grad_norm": 0.19034540645947381, "learning_rate": 5.190618729647051e-06, "loss": 0.0041, "step": 15780 }, { "epoch": 0.10387952869351263, "grad_norm": 0.25925144272752515, "learning_rate": 5.193908095128451e-06, "loss": 0.0027, "step": 15790 }, { "epoch": 0.10394531686874602, "grad_norm": 0.09102336046540857, "learning_rate": 5.197197460609848e-06, "loss": 0.0067, "step": 15800 }, { "epoch": 0.1040111050439794, "grad_norm": 0.2630976815640445, "learning_rate": 5.200486826091248e-06, "loss": 0.0047, "step": 15810 }, { "epoch": 0.10407689321921278, "grad_norm": 0.18587459148301178, "learning_rate": 5.203776191572646e-06, "loss": 0.0061, "step": 15820 }, { "epoch": 0.10414268139444616, "grad_norm": 0.9574428844840072, "learning_rate": 5.207065557054045e-06, "loss": 0.0043, "step": 15830 }, { "epoch": 0.10420846956967955, "grad_norm": 0.05139238286090652, "learning_rate": 5.210354922535443e-06, "loss": 0.0032, "step": 15840 }, { "epoch": 0.10427425774491293, "grad_norm": 0.07514615678758996, "learning_rate": 5.2136442880168425e-06, "loss": 0.0045, "step": 15850 }, { "epoch": 0.10434004592014631, "grad_norm": 0.2617313988409042, "learning_rate": 5.216933653498241e-06, "loss": 0.0046, "step": 15860 }, { "epoch": 0.1044058340953797, "grad_norm": 0.42274103843131644, "learning_rate": 5.220223018979639e-06, "loss": 0.0066, "step": 15870 }, { "epoch": 0.10447162227061307, "grad_norm": 0.32562586910105207, "learning_rate": 5.223512384461038e-06, "loss": 0.0046, "step": 15880 }, { "epoch": 0.10453741044584647, "grad_norm": 0.1522420575402596, "learning_rate": 5.226801749942436e-06, "loss": 0.0048, "step": 15890 }, { "epoch": 0.10460319862107985, "grad_norm": 0.11929118924297921, "learning_rate": 5.230091115423835e-06, "loss": 0.0053, "step": 15900 }, { "epoch": 0.10466898679631323, "grad_norm": 0.07107278800681113, "learning_rate": 5.2333804809052335e-06, "loss": 0.0026, "step": 15910 }, { "epoch": 0.10473477497154661, "grad_norm": 0.13836387386774207, "learning_rate": 5.236669846386633e-06, "loss": 0.0055, "step": 15920 }, { "epoch": 0.10480056314678, "grad_norm": 0.3400359680713465, "learning_rate": 5.239959211868031e-06, "loss": 0.0068, "step": 15930 }, { "epoch": 0.10486635132201338, "grad_norm": 0.1357449255807581, "learning_rate": 5.24324857734943e-06, "loss": 0.0053, "step": 15940 }, { "epoch": 0.10493213949724677, "grad_norm": 0.150022019565458, "learning_rate": 5.246537942830829e-06, "loss": 0.0031, "step": 15950 }, { "epoch": 0.10499792767248015, "grad_norm": 0.18917963142544636, "learning_rate": 5.249827308312228e-06, "loss": 0.0058, "step": 15960 }, { "epoch": 0.10506371584771353, "grad_norm": 0.11729417014473632, "learning_rate": 5.253116673793625e-06, "loss": 0.004, "step": 15970 }, { "epoch": 0.10512950402294692, "grad_norm": 0.15941369749684947, "learning_rate": 5.256406039275024e-06, "loss": 0.0076, "step": 15980 }, { "epoch": 0.1051952921981803, "grad_norm": 0.15437460783573664, "learning_rate": 5.259695404756423e-06, "loss": 0.0032, "step": 15990 }, { "epoch": 0.10526108037341368, "grad_norm": 0.17485338803957678, "learning_rate": 5.262984770237821e-06, "loss": 0.0053, "step": 16000 }, { "epoch": 0.10532686854864706, "grad_norm": 0.38787476601524445, "learning_rate": 5.2662741357192205e-06, "loss": 0.0042, "step": 16010 }, { "epoch": 0.10539265672388044, "grad_norm": 0.12105527759867754, "learning_rate": 5.269563501200619e-06, "loss": 0.0043, "step": 16020 }, { "epoch": 0.10545844489911384, "grad_norm": 0.35874695074180585, "learning_rate": 5.272852866682018e-06, "loss": 0.0066, "step": 16030 }, { "epoch": 0.10552423307434722, "grad_norm": 0.2338509329769459, "learning_rate": 5.276142232163416e-06, "loss": 0.0058, "step": 16040 }, { "epoch": 0.1055900212495806, "grad_norm": 0.12902242754349108, "learning_rate": 5.279431597644815e-06, "loss": 0.0037, "step": 16050 }, { "epoch": 0.10565580942481398, "grad_norm": 0.20078775651296776, "learning_rate": 5.282720963126213e-06, "loss": 0.0039, "step": 16060 }, { "epoch": 0.10572159760004737, "grad_norm": 0.21639756027545667, "learning_rate": 5.286010328607612e-06, "loss": 0.0048, "step": 16070 }, { "epoch": 0.10578738577528075, "grad_norm": 0.2982377756050533, "learning_rate": 5.289299694089011e-06, "loss": 0.0047, "step": 16080 }, { "epoch": 0.10585317395051413, "grad_norm": 0.21589624521437045, "learning_rate": 5.292589059570409e-06, "loss": 0.0107, "step": 16090 }, { "epoch": 0.10591896212574752, "grad_norm": 0.09697764603842654, "learning_rate": 5.295878425051808e-06, "loss": 0.0056, "step": 16100 }, { "epoch": 0.1059847503009809, "grad_norm": 0.19161001110181267, "learning_rate": 5.299167790533206e-06, "loss": 0.004, "step": 16110 }, { "epoch": 0.10605053847621429, "grad_norm": 0.5458126392421402, "learning_rate": 5.302457156014605e-06, "loss": 0.0031, "step": 16120 }, { "epoch": 0.10611632665144767, "grad_norm": 0.19181677279718357, "learning_rate": 5.305746521496003e-06, "loss": 0.0041, "step": 16130 }, { "epoch": 0.10618211482668105, "grad_norm": 0.1432843525322762, "learning_rate": 5.309035886977403e-06, "loss": 0.0043, "step": 16140 }, { "epoch": 0.10624790300191443, "grad_norm": 0.20617655458950637, "learning_rate": 5.312325252458801e-06, "loss": 0.0026, "step": 16150 }, { "epoch": 0.10631369117714783, "grad_norm": 0.3016912465180825, "learning_rate": 5.3156146179402e-06, "loss": 0.0048, "step": 16160 }, { "epoch": 0.1063794793523812, "grad_norm": 0.13805382900342966, "learning_rate": 5.3189039834215985e-06, "loss": 0.0056, "step": 16170 }, { "epoch": 0.10644526752761459, "grad_norm": 0.341633140759406, "learning_rate": 5.322193348902998e-06, "loss": 0.0077, "step": 16180 }, { "epoch": 0.10651105570284797, "grad_norm": 0.1558079730948888, "learning_rate": 5.325482714384395e-06, "loss": 0.0054, "step": 16190 }, { "epoch": 0.10657684387808135, "grad_norm": 0.5010116804881115, "learning_rate": 5.328772079865794e-06, "loss": 0.0075, "step": 16200 }, { "epoch": 0.10664263205331474, "grad_norm": 0.19062971208952925, "learning_rate": 5.332061445347193e-06, "loss": 0.0061, "step": 16210 }, { "epoch": 0.10670842022854812, "grad_norm": 0.18465641946640615, "learning_rate": 5.335350810828591e-06, "loss": 0.0043, "step": 16220 }, { "epoch": 0.1067742084037815, "grad_norm": 0.16831427730489476, "learning_rate": 5.33864017630999e-06, "loss": 0.0071, "step": 16230 }, { "epoch": 0.10683999657901488, "grad_norm": 0.20016523460729937, "learning_rate": 5.341929541791389e-06, "loss": 0.0044, "step": 16240 }, { "epoch": 0.10690578475424826, "grad_norm": 0.16691572704949897, "learning_rate": 5.345218907272788e-06, "loss": 0.0042, "step": 16250 }, { "epoch": 0.10697157292948166, "grad_norm": 0.3259091180029217, "learning_rate": 5.348508272754186e-06, "loss": 0.0051, "step": 16260 }, { "epoch": 0.10703736110471504, "grad_norm": 0.1411954891325112, "learning_rate": 5.3517976382355855e-06, "loss": 0.0058, "step": 16270 }, { "epoch": 0.10710314927994842, "grad_norm": 0.22826732421795198, "learning_rate": 5.355087003716983e-06, "loss": 0.003, "step": 16280 }, { "epoch": 0.1071689374551818, "grad_norm": 0.505383816975498, "learning_rate": 5.358376369198382e-06, "loss": 0.0026, "step": 16290 }, { "epoch": 0.1072347256304152, "grad_norm": 0.15411534976501381, "learning_rate": 5.361665734679781e-06, "loss": 0.0044, "step": 16300 }, { "epoch": 0.10730051380564858, "grad_norm": 0.14013928617814053, "learning_rate": 5.364955100161179e-06, "loss": 0.0038, "step": 16310 }, { "epoch": 0.10736630198088196, "grad_norm": 0.12616922409875625, "learning_rate": 5.368244465642578e-06, "loss": 0.0052, "step": 16320 }, { "epoch": 0.10743209015611534, "grad_norm": 0.06837463380610614, "learning_rate": 5.3715338311239766e-06, "loss": 0.0053, "step": 16330 }, { "epoch": 0.10749787833134872, "grad_norm": 0.4013394541911915, "learning_rate": 5.374823196605376e-06, "loss": 0.0045, "step": 16340 }, { "epoch": 0.10756366650658211, "grad_norm": 0.13732080981359182, "learning_rate": 5.378112562086773e-06, "loss": 0.0028, "step": 16350 }, { "epoch": 0.10762945468181549, "grad_norm": 0.3552674175722791, "learning_rate": 5.3814019275681725e-06, "loss": 0.0031, "step": 16360 }, { "epoch": 0.10769524285704887, "grad_norm": 0.12450501533032556, "learning_rate": 5.384691293049571e-06, "loss": 0.0037, "step": 16370 }, { "epoch": 0.10776103103228225, "grad_norm": 0.025772545663779043, "learning_rate": 5.38798065853097e-06, "loss": 0.0044, "step": 16380 }, { "epoch": 0.10782681920751565, "grad_norm": 0.17767144771020926, "learning_rate": 5.3912700240123684e-06, "loss": 0.0051, "step": 16390 }, { "epoch": 0.10789260738274903, "grad_norm": 0.22050016098085587, "learning_rate": 5.394559389493768e-06, "loss": 0.0058, "step": 16400 }, { "epoch": 0.10795839555798241, "grad_norm": 0.21898596026093547, "learning_rate": 5.397848754975166e-06, "loss": 0.0045, "step": 16410 }, { "epoch": 0.10802418373321579, "grad_norm": 0.40842145446527894, "learning_rate": 5.4011381204565635e-06, "loss": 0.005, "step": 16420 }, { "epoch": 0.10808997190844917, "grad_norm": 0.2285137270634085, "learning_rate": 5.404427485937963e-06, "loss": 0.0034, "step": 16430 }, { "epoch": 0.10815576008368256, "grad_norm": 0.0778919484560272, "learning_rate": 5.407716851419361e-06, "loss": 0.0048, "step": 16440 }, { "epoch": 0.10822154825891594, "grad_norm": 0.16285476185072828, "learning_rate": 5.41100621690076e-06, "loss": 0.0046, "step": 16450 }, { "epoch": 0.10828733643414933, "grad_norm": 0.37128342242544815, "learning_rate": 5.414295582382159e-06, "loss": 0.0046, "step": 16460 }, { "epoch": 0.1083531246093827, "grad_norm": 0.10783835182739518, "learning_rate": 5.417584947863558e-06, "loss": 0.0035, "step": 16470 }, { "epoch": 0.10841891278461609, "grad_norm": 0.2918166225623455, "learning_rate": 5.420874313344956e-06, "loss": 0.0057, "step": 16480 }, { "epoch": 0.10848470095984948, "grad_norm": 0.3330505717189125, "learning_rate": 5.4241636788263554e-06, "loss": 0.0044, "step": 16490 }, { "epoch": 0.10855048913508286, "grad_norm": 0.061826568590804244, "learning_rate": 5.427453044307753e-06, "loss": 0.0031, "step": 16500 }, { "epoch": 0.10861627731031624, "grad_norm": 0.08030224684975629, "learning_rate": 5.430742409789152e-06, "loss": 0.0048, "step": 16510 }, { "epoch": 0.10868206548554962, "grad_norm": 0.15246468912020367, "learning_rate": 5.4340317752705505e-06, "loss": 0.003, "step": 16520 }, { "epoch": 0.10874785366078302, "grad_norm": 0.31705329574779056, "learning_rate": 5.43732114075195e-06, "loss": 0.0059, "step": 16530 }, { "epoch": 0.1088136418360164, "grad_norm": 0.24917727740018725, "learning_rate": 5.440610506233348e-06, "loss": 0.0066, "step": 16540 }, { "epoch": 0.10887943001124978, "grad_norm": 0.36439023091447303, "learning_rate": 5.4438998717147465e-06, "loss": 0.0066, "step": 16550 }, { "epoch": 0.10894521818648316, "grad_norm": 0.15345231848412946, "learning_rate": 5.447189237196146e-06, "loss": 0.0047, "step": 16560 }, { "epoch": 0.10901100636171654, "grad_norm": 0.21707751681282264, "learning_rate": 5.450478602677543e-06, "loss": 0.0051, "step": 16570 }, { "epoch": 0.10907679453694993, "grad_norm": 0.16252655288656018, "learning_rate": 5.453767968158943e-06, "loss": 0.0034, "step": 16580 }, { "epoch": 0.10914258271218331, "grad_norm": 0.07610361186953025, "learning_rate": 5.457057333640341e-06, "loss": 0.0042, "step": 16590 }, { "epoch": 0.1092083708874167, "grad_norm": 0.2088841285417584, "learning_rate": 5.46034669912174e-06, "loss": 0.0059, "step": 16600 }, { "epoch": 0.10927415906265008, "grad_norm": 0.2563028867677239, "learning_rate": 5.463636064603138e-06, "loss": 0.0052, "step": 16610 }, { "epoch": 0.10933994723788347, "grad_norm": 0.31419424535581897, "learning_rate": 5.4669254300845375e-06, "loss": 0.0046, "step": 16620 }, { "epoch": 0.10940573541311685, "grad_norm": 0.33805083686243675, "learning_rate": 5.470214795565936e-06, "loss": 0.0046, "step": 16630 }, { "epoch": 0.10947152358835023, "grad_norm": 0.1826603379998971, "learning_rate": 5.473504161047335e-06, "loss": 0.0055, "step": 16640 }, { "epoch": 0.10953731176358361, "grad_norm": 0.20445550412231964, "learning_rate": 5.4767935265287335e-06, "loss": 0.0135, "step": 16650 }, { "epoch": 0.10960309993881699, "grad_norm": 0.3769493255441844, "learning_rate": 5.480082892010131e-06, "loss": 0.0053, "step": 16660 }, { "epoch": 0.10966888811405039, "grad_norm": 0.3854206619095558, "learning_rate": 5.48337225749153e-06, "loss": 0.0059, "step": 16670 }, { "epoch": 0.10973467628928377, "grad_norm": 0.20792144181369265, "learning_rate": 5.4866616229729286e-06, "loss": 0.0061, "step": 16680 }, { "epoch": 0.10980046446451715, "grad_norm": 0.19874569749984491, "learning_rate": 5.489950988454328e-06, "loss": 0.0032, "step": 16690 }, { "epoch": 0.10986625263975053, "grad_norm": 0.2823526657243239, "learning_rate": 5.493240353935726e-06, "loss": 0.0042, "step": 16700 }, { "epoch": 0.10993204081498391, "grad_norm": 0.5434936325715287, "learning_rate": 5.496529719417125e-06, "loss": 0.0073, "step": 16710 }, { "epoch": 0.1099978289902173, "grad_norm": 0.41988787221711704, "learning_rate": 5.499819084898524e-06, "loss": 0.006, "step": 16720 }, { "epoch": 0.11006361716545068, "grad_norm": 0.24009452139926324, "learning_rate": 5.503108450379923e-06, "loss": 0.0064, "step": 16730 }, { "epoch": 0.11012940534068406, "grad_norm": 0.41609756787546326, "learning_rate": 5.50639781586132e-06, "loss": 0.004, "step": 16740 }, { "epoch": 0.11019519351591744, "grad_norm": 0.1446812310296086, "learning_rate": 5.50968718134272e-06, "loss": 0.0054, "step": 16750 }, { "epoch": 0.11026098169115084, "grad_norm": 0.15765946407712988, "learning_rate": 5.512976546824118e-06, "loss": 0.0059, "step": 16760 }, { "epoch": 0.11032676986638422, "grad_norm": 0.24051071789434278, "learning_rate": 5.516265912305516e-06, "loss": 0.0051, "step": 16770 }, { "epoch": 0.1103925580416176, "grad_norm": 0.25695041312152805, "learning_rate": 5.5195552777869156e-06, "loss": 0.004, "step": 16780 }, { "epoch": 0.11045834621685098, "grad_norm": 0.11480736218019766, "learning_rate": 5.522844643268314e-06, "loss": 0.0039, "step": 16790 }, { "epoch": 0.11052413439208436, "grad_norm": 0.22743955501174096, "learning_rate": 5.526134008749713e-06, "loss": 0.004, "step": 16800 }, { "epoch": 0.11058992256731776, "grad_norm": 0.09051569732466525, "learning_rate": 5.529423374231111e-06, "loss": 0.007, "step": 16810 }, { "epoch": 0.11065571074255114, "grad_norm": 0.2721091485633707, "learning_rate": 5.53271273971251e-06, "loss": 0.004, "step": 16820 }, { "epoch": 0.11072149891778452, "grad_norm": 0.32574028124492677, "learning_rate": 5.536002105193908e-06, "loss": 0.004, "step": 16830 }, { "epoch": 0.1107872870930179, "grad_norm": 0.2906431518208393, "learning_rate": 5.5392914706753074e-06, "loss": 0.0059, "step": 16840 }, { "epoch": 0.11085307526825128, "grad_norm": 0.1295318740538546, "learning_rate": 5.542580836156706e-06, "loss": 0.0044, "step": 16850 }, { "epoch": 0.11091886344348467, "grad_norm": 0.27491244339167137, "learning_rate": 5.545870201638105e-06, "loss": 0.0059, "step": 16860 }, { "epoch": 0.11098465161871805, "grad_norm": 0.16534297940448997, "learning_rate": 5.549159567119503e-06, "loss": 0.0048, "step": 16870 }, { "epoch": 0.11105043979395143, "grad_norm": 0.3547143374696486, "learning_rate": 5.552448932600901e-06, "loss": 0.0079, "step": 16880 }, { "epoch": 0.11111622796918481, "grad_norm": 0.06926248531818222, "learning_rate": 5.5557382980823e-06, "loss": 0.0085, "step": 16890 }, { "epoch": 0.11118201614441821, "grad_norm": 0.1922371920147405, "learning_rate": 5.5590276635636984e-06, "loss": 0.0051, "step": 16900 }, { "epoch": 0.11124780431965159, "grad_norm": 0.14700531458364866, "learning_rate": 5.562317029045098e-06, "loss": 0.0036, "step": 16910 }, { "epoch": 0.11131359249488497, "grad_norm": 0.2119970053516186, "learning_rate": 5.565606394526496e-06, "loss": 0.0036, "step": 16920 }, { "epoch": 0.11137938067011835, "grad_norm": 0.18725865281425846, "learning_rate": 5.568895760007895e-06, "loss": 0.0034, "step": 16930 }, { "epoch": 0.11144516884535173, "grad_norm": 0.20966095349615216, "learning_rate": 5.572185125489294e-06, "loss": 0.005, "step": 16940 }, { "epoch": 0.11151095702058512, "grad_norm": 0.9546653776686981, "learning_rate": 5.575474490970693e-06, "loss": 0.0063, "step": 16950 }, { "epoch": 0.1115767451958185, "grad_norm": 0.1782216544782264, "learning_rate": 5.57876385645209e-06, "loss": 0.0074, "step": 16960 }, { "epoch": 0.11164253337105189, "grad_norm": 0.14408948274730832, "learning_rate": 5.58205322193349e-06, "loss": 0.0042, "step": 16970 }, { "epoch": 0.11170832154628527, "grad_norm": 0.2605450087550767, "learning_rate": 5.585342587414888e-06, "loss": 0.0049, "step": 16980 }, { "epoch": 0.11177410972151866, "grad_norm": 0.10269690859627428, "learning_rate": 5.588631952896286e-06, "loss": 0.0046, "step": 16990 }, { "epoch": 0.11183989789675204, "grad_norm": 0.2522585309969254, "learning_rate": 5.5919213183776855e-06, "loss": 0.0059, "step": 17000 }, { "epoch": 0.11190568607198542, "grad_norm": 0.3742732715341101, "learning_rate": 5.595210683859084e-06, "loss": 0.0047, "step": 17010 }, { "epoch": 0.1119714742472188, "grad_norm": 0.10856907694731421, "learning_rate": 5.598500049340483e-06, "loss": 0.0053, "step": 17020 }, { "epoch": 0.11203726242245218, "grad_norm": 0.3559125199166049, "learning_rate": 5.601789414821881e-06, "loss": 0.0029, "step": 17030 }, { "epoch": 0.11210305059768558, "grad_norm": 0.2291870710977297, "learning_rate": 5.605078780303281e-06, "loss": 0.0044, "step": 17040 }, { "epoch": 0.11216883877291896, "grad_norm": 0.05325246875034208, "learning_rate": 5.608368145784678e-06, "loss": 0.0066, "step": 17050 }, { "epoch": 0.11223462694815234, "grad_norm": 0.14680288214145368, "learning_rate": 5.611657511266077e-06, "loss": 0.0041, "step": 17060 }, { "epoch": 0.11230041512338572, "grad_norm": 0.11765766659064705, "learning_rate": 5.614946876747476e-06, "loss": 0.0044, "step": 17070 }, { "epoch": 0.1123662032986191, "grad_norm": 0.33948700814609245, "learning_rate": 5.618236242228875e-06, "loss": 0.0045, "step": 17080 }, { "epoch": 0.1124319914738525, "grad_norm": 0.2730962030425047, "learning_rate": 5.621525607710273e-06, "loss": 0.0027, "step": 17090 }, { "epoch": 0.11249777964908587, "grad_norm": 0.1754181106752263, "learning_rate": 5.624814973191672e-06, "loss": 0.0062, "step": 17100 }, { "epoch": 0.11256356782431926, "grad_norm": 0.18940751949288606, "learning_rate": 5.628104338673071e-06, "loss": 0.0039, "step": 17110 }, { "epoch": 0.11262935599955264, "grad_norm": 0.24446657705382327, "learning_rate": 5.631393704154468e-06, "loss": 0.0035, "step": 17120 }, { "epoch": 0.11269514417478603, "grad_norm": 0.1811374442173629, "learning_rate": 5.6346830696358675e-06, "loss": 0.0042, "step": 17130 }, { "epoch": 0.11276093235001941, "grad_norm": 0.16982717561987296, "learning_rate": 5.637972435117266e-06, "loss": 0.0036, "step": 17140 }, { "epoch": 0.11282672052525279, "grad_norm": 0.2504113833489159, "learning_rate": 5.641261800598665e-06, "loss": 0.0078, "step": 17150 }, { "epoch": 0.11289250870048617, "grad_norm": 0.39358193579651296, "learning_rate": 5.6445511660800635e-06, "loss": 0.0055, "step": 17160 }, { "epoch": 0.11295829687571955, "grad_norm": 0.10546430085404808, "learning_rate": 5.647840531561463e-06, "loss": 0.0049, "step": 17170 }, { "epoch": 0.11302408505095295, "grad_norm": 0.155334527614609, "learning_rate": 5.651129897042861e-06, "loss": 0.0036, "step": 17180 }, { "epoch": 0.11308987322618633, "grad_norm": 0.15345446842241675, "learning_rate": 5.65441926252426e-06, "loss": 0.0071, "step": 17190 }, { "epoch": 0.11315566140141971, "grad_norm": 0.14054246310590865, "learning_rate": 5.657708628005658e-06, "loss": 0.0042, "step": 17200 }, { "epoch": 0.11322144957665309, "grad_norm": 0.3141259073206391, "learning_rate": 5.660997993487057e-06, "loss": 0.0034, "step": 17210 }, { "epoch": 0.11328723775188648, "grad_norm": 0.22936566622344434, "learning_rate": 5.664287358968455e-06, "loss": 0.0055, "step": 17220 }, { "epoch": 0.11335302592711986, "grad_norm": 0.2991088477379644, "learning_rate": 5.667576724449854e-06, "loss": 0.006, "step": 17230 }, { "epoch": 0.11341881410235324, "grad_norm": 0.4695706947035613, "learning_rate": 5.670866089931253e-06, "loss": 0.0041, "step": 17240 }, { "epoch": 0.11348460227758662, "grad_norm": 0.157152146526213, "learning_rate": 5.674155455412651e-06, "loss": 0.0056, "step": 17250 }, { "epoch": 0.11355039045282, "grad_norm": 0.29867835113509983, "learning_rate": 5.6774448208940505e-06, "loss": 0.0057, "step": 17260 }, { "epoch": 0.1136161786280534, "grad_norm": 0.14470967434548965, "learning_rate": 5.680734186375448e-06, "loss": 0.004, "step": 17270 }, { "epoch": 0.11368196680328678, "grad_norm": 0.10425628594043242, "learning_rate": 5.684023551856847e-06, "loss": 0.0053, "step": 17280 }, { "epoch": 0.11374775497852016, "grad_norm": 0.2532260361320278, "learning_rate": 5.6873129173382456e-06, "loss": 0.0038, "step": 17290 }, { "epoch": 0.11381354315375354, "grad_norm": 0.1474842003646823, "learning_rate": 5.690602282819645e-06, "loss": 0.0039, "step": 17300 }, { "epoch": 0.11387933132898692, "grad_norm": 0.2631332738953854, "learning_rate": 5.693891648301043e-06, "loss": 0.003, "step": 17310 }, { "epoch": 0.11394511950422032, "grad_norm": 0.412129409091528, "learning_rate": 5.697181013782442e-06, "loss": 0.0034, "step": 17320 }, { "epoch": 0.1140109076794537, "grad_norm": 0.14832915143966685, "learning_rate": 5.700470379263841e-06, "loss": 0.0045, "step": 17330 }, { "epoch": 0.11407669585468708, "grad_norm": 0.14115900338208615, "learning_rate": 5.703759744745238e-06, "loss": 0.0038, "step": 17340 }, { "epoch": 0.11414248402992046, "grad_norm": 0.26279926394843695, "learning_rate": 5.707049110226638e-06, "loss": 0.0026, "step": 17350 }, { "epoch": 0.11420827220515385, "grad_norm": 0.22927381439480482, "learning_rate": 5.710338475708036e-06, "loss": 0.0042, "step": 17360 }, { "epoch": 0.11427406038038723, "grad_norm": 0.17941356949924708, "learning_rate": 5.713627841189435e-06, "loss": 0.0048, "step": 17370 }, { "epoch": 0.11433984855562061, "grad_norm": 0.2675488709651785, "learning_rate": 5.716917206670833e-06, "loss": 0.0064, "step": 17380 }, { "epoch": 0.114405636730854, "grad_norm": 0.37812552046679626, "learning_rate": 5.720206572152233e-06, "loss": 0.0098, "step": 17390 }, { "epoch": 0.11447142490608737, "grad_norm": 0.32853325918218995, "learning_rate": 5.723495937633631e-06, "loss": 0.005, "step": 17400 }, { "epoch": 0.11453721308132077, "grad_norm": 0.11042704705642403, "learning_rate": 5.72678530311503e-06, "loss": 0.0033, "step": 17410 }, { "epoch": 0.11460300125655415, "grad_norm": 0.1804065560130388, "learning_rate": 5.7300746685964285e-06, "loss": 0.0051, "step": 17420 }, { "epoch": 0.11466878943178753, "grad_norm": 0.1798866836335706, "learning_rate": 5.733364034077828e-06, "loss": 0.0039, "step": 17430 }, { "epoch": 0.11473457760702091, "grad_norm": 0.42970260581516295, "learning_rate": 5.736653399559225e-06, "loss": 0.0069, "step": 17440 }, { "epoch": 0.1148003657822543, "grad_norm": 0.23389230732953337, "learning_rate": 5.739942765040624e-06, "loss": 0.0067, "step": 17450 }, { "epoch": 0.11486615395748768, "grad_norm": 0.2809855780236181, "learning_rate": 5.743232130522023e-06, "loss": 0.0037, "step": 17460 }, { "epoch": 0.11493194213272107, "grad_norm": 0.13283148051233015, "learning_rate": 5.746521496003421e-06, "loss": 0.0045, "step": 17470 }, { "epoch": 0.11499773030795445, "grad_norm": 0.26569813419306154, "learning_rate": 5.74981086148482e-06, "loss": 0.0064, "step": 17480 }, { "epoch": 0.11506351848318783, "grad_norm": 0.5353700157042747, "learning_rate": 5.753100226966219e-06, "loss": 0.0052, "step": 17490 }, { "epoch": 0.11512930665842122, "grad_norm": 0.1232180434580227, "learning_rate": 5.756389592447618e-06, "loss": 0.0063, "step": 17500 }, { "epoch": 0.1151950948336546, "grad_norm": 0.2589605825215261, "learning_rate": 5.7596789579290155e-06, "loss": 0.0043, "step": 17510 }, { "epoch": 0.11526088300888798, "grad_norm": 0.3930876521614418, "learning_rate": 5.762968323410415e-06, "loss": 0.0074, "step": 17520 }, { "epoch": 0.11532667118412136, "grad_norm": 0.05742494884407348, "learning_rate": 5.766257688891813e-06, "loss": 0.0034, "step": 17530 }, { "epoch": 0.11539245935935474, "grad_norm": 0.21908197615378366, "learning_rate": 5.769547054373212e-06, "loss": 0.003, "step": 17540 }, { "epoch": 0.11545824753458814, "grad_norm": 0.22011607119407228, "learning_rate": 5.772836419854611e-06, "loss": 0.0034, "step": 17550 }, { "epoch": 0.11552403570982152, "grad_norm": 0.15827338916634526, "learning_rate": 5.776125785336009e-06, "loss": 0.0037, "step": 17560 }, { "epoch": 0.1155898238850549, "grad_norm": 0.224483489202994, "learning_rate": 5.779415150817408e-06, "loss": 0.0063, "step": 17570 }, { "epoch": 0.11565561206028828, "grad_norm": 0.36904144436451874, "learning_rate": 5.782704516298806e-06, "loss": 0.007, "step": 17580 }, { "epoch": 0.11572140023552167, "grad_norm": 0.11664533260320006, "learning_rate": 5.785993881780205e-06, "loss": 0.0033, "step": 17590 }, { "epoch": 0.11578718841075505, "grad_norm": 0.18473472836441093, "learning_rate": 5.789283247261603e-06, "loss": 0.0053, "step": 17600 }, { "epoch": 0.11585297658598843, "grad_norm": 0.2111550305089958, "learning_rate": 5.7925726127430025e-06, "loss": 0.0042, "step": 17610 }, { "epoch": 0.11591876476122182, "grad_norm": 0.27721794076268114, "learning_rate": 5.795861978224401e-06, "loss": 0.0062, "step": 17620 }, { "epoch": 0.1159845529364552, "grad_norm": 0.30265024423941483, "learning_rate": 5.7991513437058e-06, "loss": 0.0048, "step": 17630 }, { "epoch": 0.11605034111168859, "grad_norm": 0.17887839112074647, "learning_rate": 5.802440709187198e-06, "loss": 0.0071, "step": 17640 }, { "epoch": 0.11611612928692197, "grad_norm": 0.12461644435544977, "learning_rate": 5.805730074668598e-06, "loss": 0.0057, "step": 17650 }, { "epoch": 0.11618191746215535, "grad_norm": 0.3067592035531449, "learning_rate": 5.809019440149995e-06, "loss": 0.0054, "step": 17660 }, { "epoch": 0.11624770563738873, "grad_norm": 0.21201596296511876, "learning_rate": 5.8123088056313935e-06, "loss": 0.0046, "step": 17670 }, { "epoch": 0.11631349381262211, "grad_norm": 0.08289092792999421, "learning_rate": 5.815598171112793e-06, "loss": 0.0044, "step": 17680 }, { "epoch": 0.1163792819878555, "grad_norm": 0.3180298668589161, "learning_rate": 5.818887536594191e-06, "loss": 0.0065, "step": 17690 }, { "epoch": 0.11644507016308889, "grad_norm": 0.22496819727970674, "learning_rate": 5.82217690207559e-06, "loss": 0.0038, "step": 17700 }, { "epoch": 0.11651085833832227, "grad_norm": 0.21133566810772308, "learning_rate": 5.825466267556989e-06, "loss": 0.0075, "step": 17710 }, { "epoch": 0.11657664651355565, "grad_norm": 0.22576650387251834, "learning_rate": 5.828755633038388e-06, "loss": 0.0036, "step": 17720 }, { "epoch": 0.11664243468878904, "grad_norm": 0.1011257180523394, "learning_rate": 5.832044998519786e-06, "loss": 0.0047, "step": 17730 }, { "epoch": 0.11670822286402242, "grad_norm": 0.1410598065117108, "learning_rate": 5.835334364001185e-06, "loss": 0.0053, "step": 17740 }, { "epoch": 0.1167740110392558, "grad_norm": 0.40922354612862044, "learning_rate": 5.838623729482583e-06, "loss": 0.0047, "step": 17750 }, { "epoch": 0.11683979921448918, "grad_norm": 0.1636778433334331, "learning_rate": 5.841913094963982e-06, "loss": 0.0053, "step": 17760 }, { "epoch": 0.11690558738972257, "grad_norm": 0.18971831090931696, "learning_rate": 5.8452024604453805e-06, "loss": 0.0081, "step": 17770 }, { "epoch": 0.11697137556495596, "grad_norm": 0.3349709581538356, "learning_rate": 5.848491825926779e-06, "loss": 0.0056, "step": 17780 }, { "epoch": 0.11703716374018934, "grad_norm": 0.17985608640693862, "learning_rate": 5.851781191408178e-06, "loss": 0.0052, "step": 17790 }, { "epoch": 0.11710295191542272, "grad_norm": 0.20786365753394348, "learning_rate": 5.8550705568895764e-06, "loss": 0.0044, "step": 17800 }, { "epoch": 0.1171687400906561, "grad_norm": 0.18709383262535353, "learning_rate": 5.858359922370976e-06, "loss": 0.0034, "step": 17810 }, { "epoch": 0.1172345282658895, "grad_norm": 0.10797648477723853, "learning_rate": 5.861649287852373e-06, "loss": 0.0067, "step": 17820 }, { "epoch": 0.11730031644112288, "grad_norm": 0.04102680193294419, "learning_rate": 5.864938653333772e-06, "loss": 0.0033, "step": 17830 }, { "epoch": 0.11736610461635626, "grad_norm": 0.19059585964245004, "learning_rate": 5.868228018815171e-06, "loss": 0.0038, "step": 17840 }, { "epoch": 0.11743189279158964, "grad_norm": 0.6187920880810978, "learning_rate": 5.87151738429657e-06, "loss": 0.0056, "step": 17850 }, { "epoch": 0.11749768096682302, "grad_norm": 0.2665441943673008, "learning_rate": 5.874806749777968e-06, "loss": 0.0048, "step": 17860 }, { "epoch": 0.11756346914205641, "grad_norm": 0.26148943864855123, "learning_rate": 5.8780961152593675e-06, "loss": 0.0044, "step": 17870 }, { "epoch": 0.11762925731728979, "grad_norm": 0.27703470556511167, "learning_rate": 5.881385480740766e-06, "loss": 0.006, "step": 17880 }, { "epoch": 0.11769504549252317, "grad_norm": 0.30083285239287355, "learning_rate": 5.884674846222163e-06, "loss": 0.0039, "step": 17890 }, { "epoch": 0.11776083366775655, "grad_norm": 0.25083624658650766, "learning_rate": 5.887964211703563e-06, "loss": 0.0052, "step": 17900 }, { "epoch": 0.11782662184298993, "grad_norm": 0.33744254117611194, "learning_rate": 5.891253577184961e-06, "loss": 0.0027, "step": 17910 }, { "epoch": 0.11789241001822333, "grad_norm": 0.1293112786694405, "learning_rate": 5.89454294266636e-06, "loss": 0.009, "step": 17920 }, { "epoch": 0.11795819819345671, "grad_norm": 0.17426024028656878, "learning_rate": 5.8978323081477585e-06, "loss": 0.0034, "step": 17930 }, { "epoch": 0.11802398636869009, "grad_norm": 0.01583701015398467, "learning_rate": 5.901121673629158e-06, "loss": 0.0026, "step": 17940 }, { "epoch": 0.11808977454392347, "grad_norm": 0.42864583748490004, "learning_rate": 5.904411039110556e-06, "loss": 0.0052, "step": 17950 }, { "epoch": 0.11815556271915686, "grad_norm": 0.2717714641556134, "learning_rate": 5.907700404591955e-06, "loss": 0.0038, "step": 17960 }, { "epoch": 0.11822135089439025, "grad_norm": 0.07045579838089948, "learning_rate": 5.910989770073353e-06, "loss": 0.0044, "step": 17970 }, { "epoch": 0.11828713906962363, "grad_norm": 0.16650255010780243, "learning_rate": 5.914279135554752e-06, "loss": 0.0049, "step": 17980 }, { "epoch": 0.118352927244857, "grad_norm": 0.07229967588096295, "learning_rate": 5.91756850103615e-06, "loss": 0.0038, "step": 17990 }, { "epoch": 0.11841871542009039, "grad_norm": 0.5630130423144887, "learning_rate": 5.92085786651755e-06, "loss": 0.0048, "step": 18000 }, { "epoch": 0.11848450359532378, "grad_norm": 0.12395315355912304, "learning_rate": 5.924147231998948e-06, "loss": 0.0035, "step": 18010 }, { "epoch": 0.11855029177055716, "grad_norm": 0.0415372999171099, "learning_rate": 5.927436597480346e-06, "loss": 0.0061, "step": 18020 }, { "epoch": 0.11861607994579054, "grad_norm": 0.18278867779798977, "learning_rate": 5.9307259629617455e-06, "loss": 0.0047, "step": 18030 }, { "epoch": 0.11868186812102392, "grad_norm": 0.1364377446094129, "learning_rate": 5.934015328443143e-06, "loss": 0.005, "step": 18040 }, { "epoch": 0.11874765629625732, "grad_norm": 0.3563397338962151, "learning_rate": 5.937304693924542e-06, "loss": 0.0046, "step": 18050 }, { "epoch": 0.1188134444714907, "grad_norm": 0.22065136118531806, "learning_rate": 5.940594059405941e-06, "loss": 0.0047, "step": 18060 }, { "epoch": 0.11887923264672408, "grad_norm": 0.06949628986502104, "learning_rate": 5.94388342488734e-06, "loss": 0.0024, "step": 18070 }, { "epoch": 0.11894502082195746, "grad_norm": 0.14572416987509, "learning_rate": 5.947172790368738e-06, "loss": 0.0053, "step": 18080 }, { "epoch": 0.11901080899719084, "grad_norm": 0.0702822082703542, "learning_rate": 5.950462155850137e-06, "loss": 0.0054, "step": 18090 }, { "epoch": 0.11907659717242423, "grad_norm": 0.156492842926526, "learning_rate": 5.953751521331536e-06, "loss": 0.004, "step": 18100 }, { "epoch": 0.11914238534765761, "grad_norm": 0.21096172025425558, "learning_rate": 5.957040886812935e-06, "loss": 0.0045, "step": 18110 }, { "epoch": 0.119208173522891, "grad_norm": 0.16938251262372117, "learning_rate": 5.960330252294333e-06, "loss": 0.0051, "step": 18120 }, { "epoch": 0.11927396169812438, "grad_norm": 0.3362561234761229, "learning_rate": 5.963619617775731e-06, "loss": 0.0036, "step": 18130 }, { "epoch": 0.11933974987335776, "grad_norm": 0.24013060011312187, "learning_rate": 5.96690898325713e-06, "loss": 0.0043, "step": 18140 }, { "epoch": 0.11940553804859115, "grad_norm": 0.15661811408042653, "learning_rate": 5.970198348738528e-06, "loss": 0.0056, "step": 18150 }, { "epoch": 0.11947132622382453, "grad_norm": 0.03783939128430875, "learning_rate": 5.973487714219928e-06, "loss": 0.0035, "step": 18160 }, { "epoch": 0.11953711439905791, "grad_norm": 0.08881420443058662, "learning_rate": 5.976777079701326e-06, "loss": 0.0052, "step": 18170 }, { "epoch": 0.11960290257429129, "grad_norm": 0.09564216663932544, "learning_rate": 5.980066445182725e-06, "loss": 0.0056, "step": 18180 }, { "epoch": 0.11966869074952469, "grad_norm": 0.2833002475894108, "learning_rate": 5.9833558106641236e-06, "loss": 0.0067, "step": 18190 }, { "epoch": 0.11973447892475807, "grad_norm": 0.29406630916184645, "learning_rate": 5.986645176145523e-06, "loss": 0.0062, "step": 18200 }, { "epoch": 0.11980026709999145, "grad_norm": 0.12000204879447676, "learning_rate": 5.98993454162692e-06, "loss": 0.0028, "step": 18210 }, { "epoch": 0.11986605527522483, "grad_norm": 0.18249310147464703, "learning_rate": 5.9932239071083195e-06, "loss": 0.0032, "step": 18220 }, { "epoch": 0.11993184345045821, "grad_norm": 0.05829965298023125, "learning_rate": 5.996513272589718e-06, "loss": 0.0053, "step": 18230 }, { "epoch": 0.1199976316256916, "grad_norm": 0.4983089024000351, "learning_rate": 5.999802638071116e-06, "loss": 0.0023, "step": 18240 }, { "epoch": 0.12006341980092498, "grad_norm": 0.2238647705549004, "learning_rate": 6.0030920035525154e-06, "loss": 0.0037, "step": 18250 }, { "epoch": 0.12012920797615836, "grad_norm": 0.4467323744502088, "learning_rate": 6.006381369033914e-06, "loss": 0.0044, "step": 18260 }, { "epoch": 0.12019499615139174, "grad_norm": 0.1330388805923319, "learning_rate": 6.009670734515313e-06, "loss": 0.0031, "step": 18270 }, { "epoch": 0.12026078432662514, "grad_norm": 0.2944774020099683, "learning_rate": 6.0129600999967105e-06, "loss": 0.0038, "step": 18280 }, { "epoch": 0.12032657250185852, "grad_norm": 0.08132634465996748, "learning_rate": 6.01624946547811e-06, "loss": 0.0025, "step": 18290 }, { "epoch": 0.1203923606770919, "grad_norm": 0.3570539886701495, "learning_rate": 6.019538830959508e-06, "loss": 0.0038, "step": 18300 }, { "epoch": 0.12045814885232528, "grad_norm": 0.03697351201579277, "learning_rate": 6.022828196440907e-06, "loss": 0.0069, "step": 18310 }, { "epoch": 0.12052393702755866, "grad_norm": 0.21300900913459303, "learning_rate": 6.026117561922306e-06, "loss": 0.0035, "step": 18320 }, { "epoch": 0.12058972520279206, "grad_norm": 0.15203049809238986, "learning_rate": 6.029406927403705e-06, "loss": 0.0049, "step": 18330 }, { "epoch": 0.12065551337802544, "grad_norm": 0.2473012376882348, "learning_rate": 6.032696292885103e-06, "loss": 0.0062, "step": 18340 }, { "epoch": 0.12072130155325882, "grad_norm": 0.13231310169290106, "learning_rate": 6.035985658366501e-06, "loss": 0.0063, "step": 18350 }, { "epoch": 0.1207870897284922, "grad_norm": 0.12082964032217221, "learning_rate": 6.0392750238479e-06, "loss": 0.0033, "step": 18360 }, { "epoch": 0.12085287790372558, "grad_norm": 0.2225738672754135, "learning_rate": 6.042564389329298e-06, "loss": 0.0076, "step": 18370 }, { "epoch": 0.12091866607895897, "grad_norm": 0.16196569571682515, "learning_rate": 6.0458537548106975e-06, "loss": 0.0038, "step": 18380 }, { "epoch": 0.12098445425419235, "grad_norm": 0.23121550870367502, "learning_rate": 6.049143120292096e-06, "loss": 0.0043, "step": 18390 }, { "epoch": 0.12105024242942573, "grad_norm": 0.14222872046464002, "learning_rate": 6.052432485773495e-06, "loss": 0.0063, "step": 18400 }, { "epoch": 0.12111603060465911, "grad_norm": 0.32036616135528817, "learning_rate": 6.0557218512548935e-06, "loss": 0.0045, "step": 18410 }, { "epoch": 0.12118181877989251, "grad_norm": 0.040095269720163994, "learning_rate": 6.059011216736293e-06, "loss": 0.0077, "step": 18420 }, { "epoch": 0.12124760695512589, "grad_norm": 0.11945011311898474, "learning_rate": 6.06230058221769e-06, "loss": 0.0069, "step": 18430 }, { "epoch": 0.12131339513035927, "grad_norm": 0.19741534609645725, "learning_rate": 6.06558994769909e-06, "loss": 0.0059, "step": 18440 }, { "epoch": 0.12137918330559265, "grad_norm": 0.3626233298746212, "learning_rate": 6.068879313180488e-06, "loss": 0.0044, "step": 18450 }, { "epoch": 0.12144497148082603, "grad_norm": 0.4286997103760642, "learning_rate": 6.072168678661886e-06, "loss": 0.0046, "step": 18460 }, { "epoch": 0.12151075965605942, "grad_norm": 0.24682040429647722, "learning_rate": 6.075458044143285e-06, "loss": 0.0033, "step": 18470 }, { "epoch": 0.1215765478312928, "grad_norm": 0.09698076565411888, "learning_rate": 6.078747409624684e-06, "loss": 0.0049, "step": 18480 }, { "epoch": 0.12164233600652619, "grad_norm": 0.13932112005172137, "learning_rate": 6.082036775106083e-06, "loss": 0.0054, "step": 18490 }, { "epoch": 0.12170812418175957, "grad_norm": 0.19635386106612962, "learning_rate": 6.085326140587481e-06, "loss": 0.0048, "step": 18500 }, { "epoch": 0.12177391235699296, "grad_norm": 0.19298092298165376, "learning_rate": 6.0886155060688805e-06, "loss": 0.0051, "step": 18510 }, { "epoch": 0.12183970053222634, "grad_norm": 0.18498246874282898, "learning_rate": 6.091904871550278e-06, "loss": 0.0049, "step": 18520 }, { "epoch": 0.12190548870745972, "grad_norm": 0.1698328909399384, "learning_rate": 6.095194237031677e-06, "loss": 0.0031, "step": 18530 }, { "epoch": 0.1219712768826931, "grad_norm": 0.4434619402360783, "learning_rate": 6.0984836025130755e-06, "loss": 0.0032, "step": 18540 }, { "epoch": 0.12203706505792648, "grad_norm": 0.2353542658535857, "learning_rate": 6.101772967994475e-06, "loss": 0.0062, "step": 18550 }, { "epoch": 0.12210285323315988, "grad_norm": 0.48159899227446484, "learning_rate": 6.105062333475873e-06, "loss": 0.0046, "step": 18560 }, { "epoch": 0.12216864140839326, "grad_norm": 0.1527658885063102, "learning_rate": 6.1083516989572715e-06, "loss": 0.0055, "step": 18570 }, { "epoch": 0.12223442958362664, "grad_norm": 0.3878593555758721, "learning_rate": 6.111641064438671e-06, "loss": 0.0047, "step": 18580 }, { "epoch": 0.12230021775886002, "grad_norm": 0.1533647006809957, "learning_rate": 6.114930429920068e-06, "loss": 0.0069, "step": 18590 }, { "epoch": 0.1223660059340934, "grad_norm": 0.31876166095139635, "learning_rate": 6.118219795401467e-06, "loss": 0.0035, "step": 18600 }, { "epoch": 0.1224317941093268, "grad_norm": 0.18176191764909136, "learning_rate": 6.121509160882866e-06, "loss": 0.0058, "step": 18610 }, { "epoch": 0.12249758228456017, "grad_norm": 0.10980484973013134, "learning_rate": 6.124798526364265e-06, "loss": 0.005, "step": 18620 }, { "epoch": 0.12256337045979356, "grad_norm": 0.21768248611970117, "learning_rate": 6.128087891845663e-06, "loss": 0.0034, "step": 18630 }, { "epoch": 0.12262915863502694, "grad_norm": 0.4108604869946293, "learning_rate": 6.1313772573270626e-06, "loss": 0.0044, "step": 18640 }, { "epoch": 0.12269494681026033, "grad_norm": 0.06433398753376533, "learning_rate": 6.134666622808461e-06, "loss": 0.0052, "step": 18650 }, { "epoch": 0.12276073498549371, "grad_norm": 0.20199020635564446, "learning_rate": 6.13795598828986e-06, "loss": 0.0059, "step": 18660 }, { "epoch": 0.12282652316072709, "grad_norm": 0.2381120275775589, "learning_rate": 6.141245353771258e-06, "loss": 0.004, "step": 18670 }, { "epoch": 0.12289231133596047, "grad_norm": 0.2658414762645938, "learning_rate": 6.144534719252657e-06, "loss": 0.004, "step": 18680 }, { "epoch": 0.12295809951119385, "grad_norm": 0.24646113303542336, "learning_rate": 6.147824084734055e-06, "loss": 0.0037, "step": 18690 }, { "epoch": 0.12302388768642725, "grad_norm": 0.20503306389745554, "learning_rate": 6.1511134502154536e-06, "loss": 0.0035, "step": 18700 }, { "epoch": 0.12308967586166063, "grad_norm": 0.5728803367540591, "learning_rate": 6.154402815696853e-06, "loss": 0.0057, "step": 18710 }, { "epoch": 0.12315546403689401, "grad_norm": 0.6341282472747434, "learning_rate": 6.157692181178251e-06, "loss": 0.0067, "step": 18720 }, { "epoch": 0.12322125221212739, "grad_norm": 0.19075455042483572, "learning_rate": 6.16098154665965e-06, "loss": 0.0052, "step": 18730 }, { "epoch": 0.12328704038736077, "grad_norm": 0.20310970779463222, "learning_rate": 6.164270912141048e-06, "loss": 0.0042, "step": 18740 }, { "epoch": 0.12335282856259416, "grad_norm": 0.256402388825664, "learning_rate": 6.167560277622447e-06, "loss": 0.0039, "step": 18750 }, { "epoch": 0.12341861673782754, "grad_norm": 0.17221212960423762, "learning_rate": 6.1708496431038454e-06, "loss": 0.0038, "step": 18760 }, { "epoch": 0.12348440491306092, "grad_norm": 0.10552256694023777, "learning_rate": 6.174139008585245e-06, "loss": 0.0043, "step": 18770 }, { "epoch": 0.1235501930882943, "grad_norm": 0.290045855787884, "learning_rate": 6.177428374066643e-06, "loss": 0.0047, "step": 18780 }, { "epoch": 0.1236159812635277, "grad_norm": 0.09600556494737189, "learning_rate": 6.180717739548042e-06, "loss": 0.0053, "step": 18790 }, { "epoch": 0.12368176943876108, "grad_norm": 0.2682352793042354, "learning_rate": 6.184007105029441e-06, "loss": 0.0052, "step": 18800 }, { "epoch": 0.12374755761399446, "grad_norm": 0.289632924373866, "learning_rate": 6.187296470510838e-06, "loss": 0.0061, "step": 18810 }, { "epoch": 0.12381334578922784, "grad_norm": 0.16794064029341887, "learning_rate": 6.190585835992237e-06, "loss": 0.0067, "step": 18820 }, { "epoch": 0.12387913396446122, "grad_norm": 0.2710320712746987, "learning_rate": 6.193875201473636e-06, "loss": 0.0043, "step": 18830 }, { "epoch": 0.12394492213969462, "grad_norm": 0.21111998232559206, "learning_rate": 6.197164566955035e-06, "loss": 0.0059, "step": 18840 }, { "epoch": 0.124010710314928, "grad_norm": 0.18263186392634914, "learning_rate": 6.200453932436433e-06, "loss": 0.0075, "step": 18850 }, { "epoch": 0.12407649849016138, "grad_norm": 0.2791358020684783, "learning_rate": 6.2037432979178324e-06, "loss": 0.0047, "step": 18860 }, { "epoch": 0.12414228666539476, "grad_norm": 0.143003673485915, "learning_rate": 6.207032663399231e-06, "loss": 0.0035, "step": 18870 }, { "epoch": 0.12420807484062815, "grad_norm": 0.1485746049017779, "learning_rate": 6.21032202888063e-06, "loss": 0.0053, "step": 18880 }, { "epoch": 0.12427386301586153, "grad_norm": 0.34193249409852483, "learning_rate": 6.213611394362028e-06, "loss": 0.0053, "step": 18890 }, { "epoch": 0.12433965119109491, "grad_norm": 0.05449255963579124, "learning_rate": 6.216900759843428e-06, "loss": 0.0057, "step": 18900 }, { "epoch": 0.1244054393663283, "grad_norm": 0.22424574386379184, "learning_rate": 6.220190125324825e-06, "loss": 0.0046, "step": 18910 }, { "epoch": 0.12447122754156167, "grad_norm": 0.27591274946144484, "learning_rate": 6.2234794908062235e-06, "loss": 0.0059, "step": 18920 }, { "epoch": 0.12453701571679507, "grad_norm": 0.29367038850909666, "learning_rate": 6.226768856287623e-06, "loss": 0.0042, "step": 18930 }, { "epoch": 0.12460280389202845, "grad_norm": 0.07628380058196682, "learning_rate": 6.230058221769021e-06, "loss": 0.0042, "step": 18940 }, { "epoch": 0.12466859206726183, "grad_norm": 0.13907501715757842, "learning_rate": 6.23334758725042e-06, "loss": 0.005, "step": 18950 }, { "epoch": 0.12473438024249521, "grad_norm": 0.27090425551611513, "learning_rate": 6.236636952731819e-06, "loss": 0.0033, "step": 18960 }, { "epoch": 0.12480016841772859, "grad_norm": 0.5995351093585218, "learning_rate": 6.239926318213218e-06, "loss": 0.0037, "step": 18970 }, { "epoch": 0.12486595659296199, "grad_norm": 0.23572707855269973, "learning_rate": 6.243215683694615e-06, "loss": 0.0041, "step": 18980 }, { "epoch": 0.12493174476819537, "grad_norm": 0.11631259984125261, "learning_rate": 6.2465050491760145e-06, "loss": 0.0031, "step": 18990 }, { "epoch": 0.12499753294342875, "grad_norm": 0.046654453672029375, "learning_rate": 6.249794414657413e-06, "loss": 0.0034, "step": 19000 }, { "epoch": 0.12506332111866214, "grad_norm": 0.06189575416145562, "learning_rate": 6.253083780138812e-06, "loss": 0.0042, "step": 19010 }, { "epoch": 0.1251291092938955, "grad_norm": 0.21126340561862492, "learning_rate": 6.2563731456202105e-06, "loss": 0.0052, "step": 19020 }, { "epoch": 0.1251948974691289, "grad_norm": 0.25295599955404374, "learning_rate": 6.259662511101609e-06, "loss": 0.0043, "step": 19030 }, { "epoch": 0.12526068564436227, "grad_norm": 0.3146079520266508, "learning_rate": 6.262951876583008e-06, "loss": 0.0051, "step": 19040 }, { "epoch": 0.12532647381959566, "grad_norm": 0.36629626880778327, "learning_rate": 6.2662412420644056e-06, "loss": 0.0028, "step": 19050 }, { "epoch": 0.12539226199482906, "grad_norm": 0.08713194160798869, "learning_rate": 6.269530607545805e-06, "loss": 0.0054, "step": 19060 }, { "epoch": 0.12545805017006242, "grad_norm": 0.07130368955156466, "learning_rate": 6.272819973027203e-06, "loss": 0.0035, "step": 19070 }, { "epoch": 0.12552383834529582, "grad_norm": 0.12461142864811477, "learning_rate": 6.276109338508602e-06, "loss": 0.0057, "step": 19080 }, { "epoch": 0.1255896265205292, "grad_norm": 0.1914040056841326, "learning_rate": 6.279398703990001e-06, "loss": 0.0085, "step": 19090 }, { "epoch": 0.12565541469576258, "grad_norm": 0.17220807279033262, "learning_rate": 6.2826880694714e-06, "loss": 0.0062, "step": 19100 }, { "epoch": 0.12572120287099597, "grad_norm": 0.20555111556812244, "learning_rate": 6.285977434952798e-06, "loss": 0.0051, "step": 19110 }, { "epoch": 0.12578699104622934, "grad_norm": 0.5844973809870156, "learning_rate": 6.2892668004341975e-06, "loss": 0.0138, "step": 19120 }, { "epoch": 0.12585277922146273, "grad_norm": 0.06845424607832355, "learning_rate": 6.292556165915595e-06, "loss": 0.0051, "step": 19130 }, { "epoch": 0.12591856739669613, "grad_norm": 0.23894445381543236, "learning_rate": 6.295845531396993e-06, "loss": 0.0055, "step": 19140 }, { "epoch": 0.1259843555719295, "grad_norm": 0.07459065585434933, "learning_rate": 6.2991348968783926e-06, "loss": 0.0082, "step": 19150 }, { "epoch": 0.1260501437471629, "grad_norm": 0.047843885187783416, "learning_rate": 6.302424262359791e-06, "loss": 0.0033, "step": 19160 }, { "epoch": 0.12611593192239626, "grad_norm": 0.1740591339262061, "learning_rate": 6.30571362784119e-06, "loss": 0.0043, "step": 19170 }, { "epoch": 0.12618172009762965, "grad_norm": 0.17391572668414115, "learning_rate": 6.3090029933225885e-06, "loss": 0.0036, "step": 19180 }, { "epoch": 0.12624750827286305, "grad_norm": 0.30628938627853153, "learning_rate": 6.312292358803988e-06, "loss": 0.0068, "step": 19190 }, { "epoch": 0.1263132964480964, "grad_norm": 0.5295855490971779, "learning_rate": 6.315581724285385e-06, "loss": 0.0056, "step": 19200 }, { "epoch": 0.1263790846233298, "grad_norm": 0.2695527929420773, "learning_rate": 6.318871089766785e-06, "loss": 0.0041, "step": 19210 }, { "epoch": 0.12644487279856317, "grad_norm": 0.26380246856584044, "learning_rate": 6.322160455248183e-06, "loss": 0.0075, "step": 19220 }, { "epoch": 0.12651066097379657, "grad_norm": 0.04451624165505983, "learning_rate": 6.325449820729582e-06, "loss": 0.0035, "step": 19230 }, { "epoch": 0.12657644914902996, "grad_norm": 0.16247544386854576, "learning_rate": 6.32873918621098e-06, "loss": 0.0045, "step": 19240 }, { "epoch": 0.12664223732426333, "grad_norm": 0.35461340010157866, "learning_rate": 6.332028551692379e-06, "loss": 0.0066, "step": 19250 }, { "epoch": 0.12670802549949672, "grad_norm": 0.22798102482418092, "learning_rate": 6.335317917173778e-06, "loss": 0.0038, "step": 19260 }, { "epoch": 0.1267738136747301, "grad_norm": 0.1777241194666593, "learning_rate": 6.338607282655176e-06, "loss": 0.0042, "step": 19270 }, { "epoch": 0.12683960184996348, "grad_norm": 0.1609647581672965, "learning_rate": 6.3418966481365755e-06, "loss": 0.0071, "step": 19280 }, { "epoch": 0.12690539002519688, "grad_norm": 0.5326307003372345, "learning_rate": 6.345186013617973e-06, "loss": 0.0041, "step": 19290 }, { "epoch": 0.12697117820043025, "grad_norm": 0.13099038983064135, "learning_rate": 6.348475379099372e-06, "loss": 0.0049, "step": 19300 }, { "epoch": 0.12703696637566364, "grad_norm": 0.11689996550525766, "learning_rate": 6.351764744580771e-06, "loss": 0.0025, "step": 19310 }, { "epoch": 0.12710275455089703, "grad_norm": 0.25863555559739076, "learning_rate": 6.35505411006217e-06, "loss": 0.0055, "step": 19320 }, { "epoch": 0.1271685427261304, "grad_norm": 0.1780204625436786, "learning_rate": 6.358343475543568e-06, "loss": 0.0044, "step": 19330 }, { "epoch": 0.1272343309013638, "grad_norm": 0.2037751030887683, "learning_rate": 6.361632841024967e-06, "loss": 0.0045, "step": 19340 }, { "epoch": 0.12730011907659716, "grad_norm": 0.15794807480024795, "learning_rate": 6.364922206506366e-06, "loss": 0.0045, "step": 19350 }, { "epoch": 0.12736590725183056, "grad_norm": 0.1267753374437187, "learning_rate": 6.368211571987765e-06, "loss": 0.0041, "step": 19360 }, { "epoch": 0.12743169542706395, "grad_norm": 0.19022496220867938, "learning_rate": 6.3715009374691625e-06, "loss": 0.0036, "step": 19370 }, { "epoch": 0.12749748360229732, "grad_norm": 0.10156021176718605, "learning_rate": 6.374790302950561e-06, "loss": 0.0035, "step": 19380 }, { "epoch": 0.1275632717775307, "grad_norm": 0.08932123740841987, "learning_rate": 6.37807966843196e-06, "loss": 0.0041, "step": 19390 }, { "epoch": 0.12762905995276408, "grad_norm": 0.3870058331895885, "learning_rate": 6.381369033913358e-06, "loss": 0.0043, "step": 19400 }, { "epoch": 0.12769484812799747, "grad_norm": 0.15711157751793842, "learning_rate": 6.384658399394758e-06, "loss": 0.0058, "step": 19410 }, { "epoch": 0.12776063630323087, "grad_norm": 0.1454431142073081, "learning_rate": 6.387947764876156e-06, "loss": 0.0077, "step": 19420 }, { "epoch": 0.12782642447846423, "grad_norm": 0.1445730266103891, "learning_rate": 6.391237130357555e-06, "loss": 0.0044, "step": 19430 }, { "epoch": 0.12789221265369763, "grad_norm": 0.16129379672895447, "learning_rate": 6.394526495838953e-06, "loss": 0.0059, "step": 19440 }, { "epoch": 0.127958000828931, "grad_norm": 0.3716470068409116, "learning_rate": 6.397815861320352e-06, "loss": 0.0047, "step": 19450 }, { "epoch": 0.1280237890041644, "grad_norm": 0.18016323710919452, "learning_rate": 6.40110522680175e-06, "loss": 0.0044, "step": 19460 }, { "epoch": 0.12808957717939778, "grad_norm": 0.19870099539719885, "learning_rate": 6.4043945922831495e-06, "loss": 0.0046, "step": 19470 }, { "epoch": 0.12815536535463115, "grad_norm": 0.24720667084275064, "learning_rate": 6.407683957764548e-06, "loss": 0.005, "step": 19480 }, { "epoch": 0.12822115352986455, "grad_norm": 0.10562404193230177, "learning_rate": 6.410973323245946e-06, "loss": 0.0036, "step": 19490 }, { "epoch": 0.1282869417050979, "grad_norm": 0.17590217113042445, "learning_rate": 6.414262688727345e-06, "loss": 0.0028, "step": 19500 }, { "epoch": 0.1283527298803313, "grad_norm": 0.6431032361754243, "learning_rate": 6.417552054208743e-06, "loss": 0.0049, "step": 19510 }, { "epoch": 0.1284185180555647, "grad_norm": 0.26941936676308603, "learning_rate": 6.420841419690142e-06, "loss": 0.0056, "step": 19520 }, { "epoch": 0.12848430623079807, "grad_norm": 0.24568775004564383, "learning_rate": 6.4241307851715405e-06, "loss": 0.007, "step": 19530 }, { "epoch": 0.12855009440603146, "grad_norm": 0.07797408096135415, "learning_rate": 6.42742015065294e-06, "loss": 0.0024, "step": 19540 }, { "epoch": 0.12861588258126486, "grad_norm": 0.2579931225055184, "learning_rate": 6.430709516134338e-06, "loss": 0.0058, "step": 19550 }, { "epoch": 0.12868167075649822, "grad_norm": 0.2596134207610506, "learning_rate": 6.433998881615737e-06, "loss": 0.0043, "step": 19560 }, { "epoch": 0.12874745893173162, "grad_norm": 0.053549456371150835, "learning_rate": 6.437288247097136e-06, "loss": 0.0043, "step": 19570 }, { "epoch": 0.12881324710696498, "grad_norm": 0.11265285462886987, "learning_rate": 6.440577612578535e-06, "loss": 0.0026, "step": 19580 }, { "epoch": 0.12887903528219838, "grad_norm": 0.1488564654455675, "learning_rate": 6.443866978059933e-06, "loss": 0.0045, "step": 19590 }, { "epoch": 0.12894482345743177, "grad_norm": 0.04003809610743213, "learning_rate": 6.447156343541331e-06, "loss": 0.005, "step": 19600 }, { "epoch": 0.12901061163266514, "grad_norm": 0.2349663881820212, "learning_rate": 6.45044570902273e-06, "loss": 0.0056, "step": 19610 }, { "epoch": 0.12907639980789853, "grad_norm": 0.28078816451981664, "learning_rate": 6.453735074504128e-06, "loss": 0.0076, "step": 19620 }, { "epoch": 0.1291421879831319, "grad_norm": 0.10456881882487083, "learning_rate": 6.4570244399855275e-06, "loss": 0.0032, "step": 19630 }, { "epoch": 0.1292079761583653, "grad_norm": 0.235566908205041, "learning_rate": 6.460313805466926e-06, "loss": 0.004, "step": 19640 }, { "epoch": 0.1292737643335987, "grad_norm": 0.17313467372350133, "learning_rate": 6.463603170948325e-06, "loss": 0.0056, "step": 19650 }, { "epoch": 0.12933955250883206, "grad_norm": 0.1245552567988488, "learning_rate": 6.4668925364297234e-06, "loss": 0.0042, "step": 19660 }, { "epoch": 0.12940534068406545, "grad_norm": 0.13468114054778318, "learning_rate": 6.470181901911123e-06, "loss": 0.0034, "step": 19670 }, { "epoch": 0.12947112885929882, "grad_norm": 0.0390460316192852, "learning_rate": 6.47347126739252e-06, "loss": 0.0052, "step": 19680 }, { "epoch": 0.1295369170345322, "grad_norm": 0.02970419343135038, "learning_rate": 6.476760632873919e-06, "loss": 0.0048, "step": 19690 }, { "epoch": 0.1296027052097656, "grad_norm": 0.1880989980845875, "learning_rate": 6.480049998355318e-06, "loss": 0.0037, "step": 19700 }, { "epoch": 0.12966849338499897, "grad_norm": 0.042872591405902445, "learning_rate": 6.483339363836716e-06, "loss": 0.0046, "step": 19710 }, { "epoch": 0.12973428156023237, "grad_norm": 0.08048077614856207, "learning_rate": 6.486628729318115e-06, "loss": 0.0051, "step": 19720 }, { "epoch": 0.12980006973546573, "grad_norm": 0.29724650453440377, "learning_rate": 6.489918094799514e-06, "loss": 0.0072, "step": 19730 }, { "epoch": 0.12986585791069913, "grad_norm": 0.05772917182667836, "learning_rate": 6.493207460280913e-06, "loss": 0.0028, "step": 19740 }, { "epoch": 0.12993164608593252, "grad_norm": 0.25877242434566133, "learning_rate": 6.49649682576231e-06, "loss": 0.0052, "step": 19750 }, { "epoch": 0.1299974342611659, "grad_norm": 0.1799221885272678, "learning_rate": 6.49978619124371e-06, "loss": 0.006, "step": 19760 }, { "epoch": 0.13006322243639928, "grad_norm": 0.05933394385845701, "learning_rate": 6.503075556725108e-06, "loss": 0.0073, "step": 19770 }, { "epoch": 0.13012901061163268, "grad_norm": 0.1579299944604846, "learning_rate": 6.506364922206507e-06, "loss": 0.0148, "step": 19780 }, { "epoch": 0.13019479878686605, "grad_norm": 0.05697343732940252, "learning_rate": 6.5096542876879055e-06, "loss": 0.005, "step": 19790 }, { "epoch": 0.13026058696209944, "grad_norm": 0.040900340165818797, "learning_rate": 6.512943653169305e-06, "loss": 0.0045, "step": 19800 }, { "epoch": 0.1303263751373328, "grad_norm": 0.21310202270189874, "learning_rate": 6.516233018650703e-06, "loss": 0.0046, "step": 19810 }, { "epoch": 0.1303921633125662, "grad_norm": 0.2151223805032133, "learning_rate": 6.519522384132101e-06, "loss": 0.0037, "step": 19820 }, { "epoch": 0.1304579514877996, "grad_norm": 0.3630009037752218, "learning_rate": 6.5228117496135e-06, "loss": 0.005, "step": 19830 }, { "epoch": 0.13052373966303296, "grad_norm": 0.13554432074010897, "learning_rate": 6.526101115094898e-06, "loss": 0.0047, "step": 19840 }, { "epoch": 0.13058952783826636, "grad_norm": 0.1562437944401343, "learning_rate": 6.529390480576297e-06, "loss": 0.0036, "step": 19850 }, { "epoch": 0.13065531601349972, "grad_norm": 0.0892411562566069, "learning_rate": 6.532679846057696e-06, "loss": 0.0037, "step": 19860 }, { "epoch": 0.13072110418873312, "grad_norm": 0.35674523474195385, "learning_rate": 6.535969211539095e-06, "loss": 0.0054, "step": 19870 }, { "epoch": 0.1307868923639665, "grad_norm": 0.15223611425039235, "learning_rate": 6.539258577020493e-06, "loss": 0.0035, "step": 19880 }, { "epoch": 0.13085268053919988, "grad_norm": 0.2529745672997958, "learning_rate": 6.5425479425018925e-06, "loss": 0.005, "step": 19890 }, { "epoch": 0.13091846871443327, "grad_norm": 0.32350674440375593, "learning_rate": 6.54583730798329e-06, "loss": 0.0065, "step": 19900 }, { "epoch": 0.13098425688966664, "grad_norm": 0.14621349357655278, "learning_rate": 6.549126673464689e-06, "loss": 0.0081, "step": 19910 }, { "epoch": 0.13105004506490003, "grad_norm": 0.0611955927017748, "learning_rate": 6.552416038946088e-06, "loss": 0.0046, "step": 19920 }, { "epoch": 0.13111583324013343, "grad_norm": 0.28405444114054385, "learning_rate": 6.555705404427486e-06, "loss": 0.0032, "step": 19930 }, { "epoch": 0.1311816214153668, "grad_norm": 0.10483460054604424, "learning_rate": 6.558994769908885e-06, "loss": 0.0028, "step": 19940 }, { "epoch": 0.1312474095906002, "grad_norm": 0.07671698667285191, "learning_rate": 6.5622841353902835e-06, "loss": 0.0037, "step": 19950 }, { "epoch": 0.13131319776583356, "grad_norm": 0.18022197216848887, "learning_rate": 6.565573500871683e-06, "loss": 0.0024, "step": 19960 }, { "epoch": 0.13137898594106695, "grad_norm": 0.12224236280254056, "learning_rate": 6.56886286635308e-06, "loss": 0.0046, "step": 19970 }, { "epoch": 0.13144477411630034, "grad_norm": 0.3403241446825503, "learning_rate": 6.57215223183448e-06, "loss": 0.0033, "step": 19980 }, { "epoch": 0.1315105622915337, "grad_norm": 0.28604439881520577, "learning_rate": 6.575441597315878e-06, "loss": 0.0058, "step": 19990 }, { "epoch": 0.1315763504667671, "grad_norm": 0.22679088074791276, "learning_rate": 6.578730962797277e-06, "loss": 0.0033, "step": 20000 }, { "epoch": 0.1316421386420005, "grad_norm": 0.2957889293350007, "learning_rate": 6.582020328278675e-06, "loss": 0.0029, "step": 20010 }, { "epoch": 0.13170792681723387, "grad_norm": 0.1142945910797539, "learning_rate": 6.585309693760075e-06, "loss": 0.0043, "step": 20020 }, { "epoch": 0.13177371499246726, "grad_norm": 0.1749002154174136, "learning_rate": 6.588599059241473e-06, "loss": 0.0035, "step": 20030 }, { "epoch": 0.13183950316770063, "grad_norm": 0.1131058210443266, "learning_rate": 6.591888424722872e-06, "loss": 0.0034, "step": 20040 }, { "epoch": 0.13190529134293402, "grad_norm": 0.25781972570961653, "learning_rate": 6.5951777902042706e-06, "loss": 0.0055, "step": 20050 }, { "epoch": 0.13197107951816742, "grad_norm": 0.4393201822846295, "learning_rate": 6.598467155685668e-06, "loss": 0.0076, "step": 20060 }, { "epoch": 0.13203686769340078, "grad_norm": 0.2586265793348021, "learning_rate": 6.601756521167067e-06, "loss": 0.005, "step": 20070 }, { "epoch": 0.13210265586863418, "grad_norm": 0.602595232588726, "learning_rate": 6.605045886648466e-06, "loss": 0.0043, "step": 20080 }, { "epoch": 0.13216844404386754, "grad_norm": 0.14820959804049924, "learning_rate": 6.608335252129865e-06, "loss": 0.0049, "step": 20090 }, { "epoch": 0.13223423221910094, "grad_norm": 0.17674789849784783, "learning_rate": 6.611624617611263e-06, "loss": 0.0047, "step": 20100 }, { "epoch": 0.13230002039433433, "grad_norm": 0.45144058858436265, "learning_rate": 6.614913983092662e-06, "loss": 0.0043, "step": 20110 }, { "epoch": 0.1323658085695677, "grad_norm": 0.18133473963927677, "learning_rate": 6.618203348574061e-06, "loss": 0.0048, "step": 20120 }, { "epoch": 0.1324315967448011, "grad_norm": 0.09619123520549662, "learning_rate": 6.62149271405546e-06, "loss": 0.0076, "step": 20130 }, { "epoch": 0.13249738492003446, "grad_norm": 0.045075616162829645, "learning_rate": 6.6247820795368575e-06, "loss": 0.005, "step": 20140 }, { "epoch": 0.13256317309526786, "grad_norm": 0.17170686256283169, "learning_rate": 6.628071445018257e-06, "loss": 0.0065, "step": 20150 }, { "epoch": 0.13262896127050125, "grad_norm": 0.3699998935117178, "learning_rate": 6.631360810499655e-06, "loss": 0.0035, "step": 20160 }, { "epoch": 0.13269474944573462, "grad_norm": 0.1991632819691742, "learning_rate": 6.6346501759810534e-06, "loss": 0.0059, "step": 20170 }, { "epoch": 0.132760537620968, "grad_norm": 0.14994117989871364, "learning_rate": 6.637939541462453e-06, "loss": 0.003, "step": 20180 }, { "epoch": 0.13282632579620138, "grad_norm": 0.2002789085040586, "learning_rate": 6.641228906943851e-06, "loss": 0.0014, "step": 20190 }, { "epoch": 0.13289211397143477, "grad_norm": 0.2734158007730615, "learning_rate": 6.64451827242525e-06, "loss": 0.0043, "step": 20200 }, { "epoch": 0.13295790214666817, "grad_norm": 0.11943282315226801, "learning_rate": 6.647807637906648e-06, "loss": 0.0048, "step": 20210 }, { "epoch": 0.13302369032190153, "grad_norm": 0.1284646159674024, "learning_rate": 6.651097003388047e-06, "loss": 0.0051, "step": 20220 }, { "epoch": 0.13308947849713493, "grad_norm": 0.05401675236902267, "learning_rate": 6.654386368869445e-06, "loss": 0.0033, "step": 20230 }, { "epoch": 0.13315526667236832, "grad_norm": 0.15950357067835336, "learning_rate": 6.6576757343508445e-06, "loss": 0.003, "step": 20240 }, { "epoch": 0.1332210548476017, "grad_norm": 0.08210755931946019, "learning_rate": 6.660965099832243e-06, "loss": 0.0042, "step": 20250 }, { "epoch": 0.13328684302283508, "grad_norm": 0.1443993434852066, "learning_rate": 6.664254465313642e-06, "loss": 0.0041, "step": 20260 }, { "epoch": 0.13335263119806845, "grad_norm": 0.15369539491566173, "learning_rate": 6.6675438307950404e-06, "loss": 0.0043, "step": 20270 }, { "epoch": 0.13341841937330184, "grad_norm": 0.1201259786952562, "learning_rate": 6.670833196276438e-06, "loss": 0.0042, "step": 20280 }, { "epoch": 0.13348420754853524, "grad_norm": 0.19195321720338984, "learning_rate": 6.674122561757837e-06, "loss": 0.0081, "step": 20290 }, { "epoch": 0.1335499957237686, "grad_norm": 0.1594453986312537, "learning_rate": 6.6774119272392355e-06, "loss": 0.0048, "step": 20300 }, { "epoch": 0.133615783899002, "grad_norm": 0.23355589052732084, "learning_rate": 6.680701292720635e-06, "loss": 0.0042, "step": 20310 }, { "epoch": 0.13368157207423537, "grad_norm": 0.42547803919095545, "learning_rate": 6.683990658202033e-06, "loss": 0.0047, "step": 20320 }, { "epoch": 0.13374736024946876, "grad_norm": 0.11146081699632926, "learning_rate": 6.687280023683432e-06, "loss": 0.0047, "step": 20330 }, { "epoch": 0.13381314842470216, "grad_norm": 0.009601748656736615, "learning_rate": 6.690569389164831e-06, "loss": 0.0045, "step": 20340 }, { "epoch": 0.13387893659993552, "grad_norm": 0.22331760821784352, "learning_rate": 6.69385875464623e-06, "loss": 0.0027, "step": 20350 }, { "epoch": 0.13394472477516892, "grad_norm": 0.13534727507372765, "learning_rate": 6.697148120127628e-06, "loss": 0.0022, "step": 20360 }, { "epoch": 0.13401051295040228, "grad_norm": 0.19315564003038352, "learning_rate": 6.7004374856090275e-06, "loss": 0.004, "step": 20370 }, { "epoch": 0.13407630112563568, "grad_norm": 0.11860921295058177, "learning_rate": 6.703726851090425e-06, "loss": 0.0029, "step": 20380 }, { "epoch": 0.13414208930086907, "grad_norm": 0.2547914817324598, "learning_rate": 6.707016216571823e-06, "loss": 0.0037, "step": 20390 }, { "epoch": 0.13420787747610244, "grad_norm": 0.202981118827671, "learning_rate": 6.7103055820532225e-06, "loss": 0.0036, "step": 20400 }, { "epoch": 0.13427366565133583, "grad_norm": 0.1730845248042853, "learning_rate": 6.713594947534621e-06, "loss": 0.0042, "step": 20410 }, { "epoch": 0.1343394538265692, "grad_norm": 0.11403378621624195, "learning_rate": 6.71688431301602e-06, "loss": 0.0038, "step": 20420 }, { "epoch": 0.1344052420018026, "grad_norm": 0.25438904397988965, "learning_rate": 6.7201736784974185e-06, "loss": 0.0052, "step": 20430 }, { "epoch": 0.134471030177036, "grad_norm": 0.24417539520135548, "learning_rate": 6.723463043978818e-06, "loss": 0.0043, "step": 20440 }, { "epoch": 0.13453681835226936, "grad_norm": 0.43751776058073594, "learning_rate": 6.726752409460215e-06, "loss": 0.0048, "step": 20450 }, { "epoch": 0.13460260652750275, "grad_norm": 0.16713268261810538, "learning_rate": 6.730041774941614e-06, "loss": 0.0034, "step": 20460 }, { "epoch": 0.13466839470273614, "grad_norm": 0.491026966194323, "learning_rate": 6.733331140423013e-06, "loss": 0.005, "step": 20470 }, { "epoch": 0.1347341828779695, "grad_norm": 0.31063172423519525, "learning_rate": 6.736620505904412e-06, "loss": 0.0041, "step": 20480 }, { "epoch": 0.1347999710532029, "grad_norm": 0.07770468201482042, "learning_rate": 6.73990987138581e-06, "loss": 0.0023, "step": 20490 }, { "epoch": 0.13486575922843627, "grad_norm": 0.20512004950298376, "learning_rate": 6.743199236867209e-06, "loss": 0.0033, "step": 20500 }, { "epoch": 0.13493154740366967, "grad_norm": 0.04407042229475785, "learning_rate": 6.746488602348608e-06, "loss": 0.0038, "step": 20510 }, { "epoch": 0.13499733557890306, "grad_norm": 0.14863572920669696, "learning_rate": 6.7497779678300054e-06, "loss": 0.003, "step": 20520 }, { "epoch": 0.13506312375413643, "grad_norm": 0.5914215262704665, "learning_rate": 6.753067333311405e-06, "loss": 0.0048, "step": 20530 }, { "epoch": 0.13512891192936982, "grad_norm": 0.2838901726821563, "learning_rate": 6.756356698792803e-06, "loss": 0.0026, "step": 20540 }, { "epoch": 0.1351947001046032, "grad_norm": 0.18770519248929127, "learning_rate": 6.759646064274202e-06, "loss": 0.0054, "step": 20550 }, { "epoch": 0.13526048827983658, "grad_norm": 0.038273737787192175, "learning_rate": 6.7629354297556006e-06, "loss": 0.0026, "step": 20560 }, { "epoch": 0.13532627645506998, "grad_norm": 0.19460940295966264, "learning_rate": 6.766224795237e-06, "loss": 0.0039, "step": 20570 }, { "epoch": 0.13539206463030334, "grad_norm": 0.09239618403677201, "learning_rate": 6.769514160718398e-06, "loss": 0.0037, "step": 20580 }, { "epoch": 0.13545785280553674, "grad_norm": 0.15321491469402918, "learning_rate": 6.772803526199797e-06, "loss": 0.0044, "step": 20590 }, { "epoch": 0.1355236409807701, "grad_norm": 0.15714020884215738, "learning_rate": 6.776092891681195e-06, "loss": 0.0047, "step": 20600 }, { "epoch": 0.1355894291560035, "grad_norm": 0.10585428874795132, "learning_rate": 6.779382257162593e-06, "loss": 0.0037, "step": 20610 }, { "epoch": 0.1356552173312369, "grad_norm": 0.16353636875870486, "learning_rate": 6.7826716226439924e-06, "loss": 0.0055, "step": 20620 }, { "epoch": 0.13572100550647026, "grad_norm": 0.06369604169373133, "learning_rate": 6.785960988125391e-06, "loss": 0.0039, "step": 20630 }, { "epoch": 0.13578679368170365, "grad_norm": 0.1757698101935702, "learning_rate": 6.78925035360679e-06, "loss": 0.0042, "step": 20640 }, { "epoch": 0.13585258185693702, "grad_norm": 0.2470671837096776, "learning_rate": 6.792539719088188e-06, "loss": 0.0038, "step": 20650 }, { "epoch": 0.13591837003217042, "grad_norm": 0.21072953605355133, "learning_rate": 6.7958290845695876e-06, "loss": 0.0039, "step": 20660 }, { "epoch": 0.1359841582074038, "grad_norm": 0.14240486790065546, "learning_rate": 6.799118450050985e-06, "loss": 0.0033, "step": 20670 }, { "epoch": 0.13604994638263718, "grad_norm": 0.5439901721344235, "learning_rate": 6.802407815532384e-06, "loss": 0.0073, "step": 20680 }, { "epoch": 0.13611573455787057, "grad_norm": 0.1605978481257339, "learning_rate": 6.805697181013783e-06, "loss": 0.0071, "step": 20690 }, { "epoch": 0.13618152273310397, "grad_norm": 0.2327768481736756, "learning_rate": 6.808986546495182e-06, "loss": 0.0052, "step": 20700 }, { "epoch": 0.13624731090833733, "grad_norm": 0.1384624736648731, "learning_rate": 6.81227591197658e-06, "loss": 0.0046, "step": 20710 }, { "epoch": 0.13631309908357073, "grad_norm": 0.21385011212744126, "learning_rate": 6.8155652774579794e-06, "loss": 0.0052, "step": 20720 }, { "epoch": 0.1363788872588041, "grad_norm": 0.0836354710790314, "learning_rate": 6.818854642939378e-06, "loss": 0.0031, "step": 20730 }, { "epoch": 0.1364446754340375, "grad_norm": 0.18769535233667994, "learning_rate": 6.822144008420775e-06, "loss": 0.0066, "step": 20740 }, { "epoch": 0.13651046360927088, "grad_norm": 0.30614321744504347, "learning_rate": 6.825433373902175e-06, "loss": 0.005, "step": 20750 }, { "epoch": 0.13657625178450425, "grad_norm": 0.058977575116479206, "learning_rate": 6.828722739383573e-06, "loss": 0.0035, "step": 20760 }, { "epoch": 0.13664203995973764, "grad_norm": 0.09459699168641397, "learning_rate": 6.832012104864972e-06, "loss": 0.0029, "step": 20770 }, { "epoch": 0.136707828134971, "grad_norm": 0.26897764456850415, "learning_rate": 6.8353014703463705e-06, "loss": 0.0041, "step": 20780 }, { "epoch": 0.1367736163102044, "grad_norm": 0.24713285109905975, "learning_rate": 6.83859083582777e-06, "loss": 0.0081, "step": 20790 }, { "epoch": 0.1368394044854378, "grad_norm": 0.28413122413878955, "learning_rate": 6.841880201309168e-06, "loss": 0.0055, "step": 20800 }, { "epoch": 0.13690519266067117, "grad_norm": 0.15051349832673178, "learning_rate": 6.845169566790567e-06, "loss": 0.0061, "step": 20810 }, { "epoch": 0.13697098083590456, "grad_norm": 0.029774285126296218, "learning_rate": 6.848458932271966e-06, "loss": 0.0032, "step": 20820 }, { "epoch": 0.13703676901113793, "grad_norm": 0.11752629166435787, "learning_rate": 6.851748297753365e-06, "loss": 0.0029, "step": 20830 }, { "epoch": 0.13710255718637132, "grad_norm": 0.20765101584523768, "learning_rate": 6.855037663234762e-06, "loss": 0.0048, "step": 20840 }, { "epoch": 0.13716834536160472, "grad_norm": 0.6329923081365117, "learning_rate": 6.858327028716161e-06, "loss": 0.0056, "step": 20850 }, { "epoch": 0.13723413353683808, "grad_norm": 0.2678498140988605, "learning_rate": 6.86161639419756e-06, "loss": 0.0045, "step": 20860 }, { "epoch": 0.13729992171207148, "grad_norm": 0.25211637460583436, "learning_rate": 6.864905759678958e-06, "loss": 0.0065, "step": 20870 }, { "epoch": 0.13736570988730484, "grad_norm": 0.16235547535461517, "learning_rate": 6.8681951251603575e-06, "loss": 0.005, "step": 20880 }, { "epoch": 0.13743149806253824, "grad_norm": 0.13569145019696519, "learning_rate": 6.871484490641756e-06, "loss": 0.0033, "step": 20890 }, { "epoch": 0.13749728623777163, "grad_norm": 0.44334755034639783, "learning_rate": 6.874773856123155e-06, "loss": 0.0046, "step": 20900 }, { "epoch": 0.137563074413005, "grad_norm": 0.27731347140003904, "learning_rate": 6.8780632216045526e-06, "loss": 0.0047, "step": 20910 }, { "epoch": 0.1376288625882384, "grad_norm": 0.13075777790309664, "learning_rate": 6.881352587085952e-06, "loss": 0.0032, "step": 20920 }, { "epoch": 0.13769465076347176, "grad_norm": 0.06524037265642202, "learning_rate": 6.88464195256735e-06, "loss": 0.0035, "step": 20930 }, { "epoch": 0.13776043893870515, "grad_norm": 0.06281978401416988, "learning_rate": 6.887931318048749e-06, "loss": 0.0046, "step": 20940 }, { "epoch": 0.13782622711393855, "grad_norm": 0.12674304838844042, "learning_rate": 6.891220683530148e-06, "loss": 0.0044, "step": 20950 }, { "epoch": 0.13789201528917192, "grad_norm": 0.14377723847823182, "learning_rate": 6.894510049011546e-06, "loss": 0.0041, "step": 20960 }, { "epoch": 0.1379578034644053, "grad_norm": 0.14916465118613217, "learning_rate": 6.897799414492945e-06, "loss": 0.0035, "step": 20970 }, { "epoch": 0.1380235916396387, "grad_norm": 0.11314727880844931, "learning_rate": 6.901088779974343e-06, "loss": 0.0047, "step": 20980 }, { "epoch": 0.13808937981487207, "grad_norm": 0.35181662606803676, "learning_rate": 6.904378145455742e-06, "loss": 0.0044, "step": 20990 }, { "epoch": 0.13815516799010547, "grad_norm": 0.5843321839192337, "learning_rate": 6.90766751093714e-06, "loss": 0.0047, "step": 21000 }, { "epoch": 0.13822095616533883, "grad_norm": 0.39384642977364154, "learning_rate": 6.9109568764185396e-06, "loss": 0.0051, "step": 21010 }, { "epoch": 0.13828674434057223, "grad_norm": 0.10021732116545376, "learning_rate": 6.914246241899938e-06, "loss": 0.0064, "step": 21020 }, { "epoch": 0.13835253251580562, "grad_norm": 0.25088704036813986, "learning_rate": 6.917535607381337e-06, "loss": 0.0044, "step": 21030 }, { "epoch": 0.138418320691039, "grad_norm": 0.052123104766683935, "learning_rate": 6.9208249728627355e-06, "loss": 0.0035, "step": 21040 }, { "epoch": 0.13848410886627238, "grad_norm": 0.21542547559166697, "learning_rate": 6.924114338344135e-06, "loss": 0.0034, "step": 21050 }, { "epoch": 0.13854989704150575, "grad_norm": 0.15762849603753212, "learning_rate": 6.927403703825532e-06, "loss": 0.0047, "step": 21060 }, { "epoch": 0.13861568521673914, "grad_norm": 0.10955110660701224, "learning_rate": 6.930693069306931e-06, "loss": 0.0063, "step": 21070 }, { "epoch": 0.13868147339197254, "grad_norm": 0.24324697780028845, "learning_rate": 6.93398243478833e-06, "loss": 0.0054, "step": 21080 }, { "epoch": 0.1387472615672059, "grad_norm": 0.1877647134614444, "learning_rate": 6.937271800269728e-06, "loss": 0.0028, "step": 21090 }, { "epoch": 0.1388130497424393, "grad_norm": 0.09595437425185534, "learning_rate": 6.940561165751127e-06, "loss": 0.0041, "step": 21100 }, { "epoch": 0.13887883791767267, "grad_norm": 0.24847259957194254, "learning_rate": 6.943850531232526e-06, "loss": 0.0044, "step": 21110 }, { "epoch": 0.13894462609290606, "grad_norm": 0.22013361626959166, "learning_rate": 6.947139896713925e-06, "loss": 0.0045, "step": 21120 }, { "epoch": 0.13901041426813945, "grad_norm": 0.3028677044740496, "learning_rate": 6.950429262195323e-06, "loss": 0.0056, "step": 21130 }, { "epoch": 0.13907620244337282, "grad_norm": 0.06333420434098583, "learning_rate": 6.9537186276767225e-06, "loss": 0.0044, "step": 21140 }, { "epoch": 0.13914199061860621, "grad_norm": 0.07774087608447719, "learning_rate": 6.95700799315812e-06, "loss": 0.0037, "step": 21150 }, { "epoch": 0.13920777879383958, "grad_norm": 0.22547148586210952, "learning_rate": 6.960297358639519e-06, "loss": 0.0069, "step": 21160 }, { "epoch": 0.13927356696907298, "grad_norm": 0.236082165850046, "learning_rate": 6.963586724120918e-06, "loss": 0.004, "step": 21170 }, { "epoch": 0.13933935514430637, "grad_norm": 0.26992865061526283, "learning_rate": 6.966876089602316e-06, "loss": 0.0047, "step": 21180 }, { "epoch": 0.13940514331953974, "grad_norm": 0.04409138642805041, "learning_rate": 6.970165455083715e-06, "loss": 0.0029, "step": 21190 }, { "epoch": 0.13947093149477313, "grad_norm": 0.23247768565285112, "learning_rate": 6.9734548205651135e-06, "loss": 0.0051, "step": 21200 }, { "epoch": 0.13953671967000653, "grad_norm": 0.14052260380512566, "learning_rate": 6.976744186046513e-06, "loss": 0.0038, "step": 21210 }, { "epoch": 0.1396025078452399, "grad_norm": 0.3657949423375439, "learning_rate": 6.98003355152791e-06, "loss": 0.0057, "step": 21220 }, { "epoch": 0.1396682960204733, "grad_norm": 0.045757576747749724, "learning_rate": 6.9833229170093095e-06, "loss": 0.004, "step": 21230 }, { "epoch": 0.13973408419570665, "grad_norm": 0.2561959337241426, "learning_rate": 6.986612282490708e-06, "loss": 0.0038, "step": 21240 }, { "epoch": 0.13979987237094005, "grad_norm": 0.12783609499131637, "learning_rate": 6.989901647972107e-06, "loss": 0.0066, "step": 21250 }, { "epoch": 0.13986566054617344, "grad_norm": 0.2584923529890372, "learning_rate": 6.993191013453505e-06, "loss": 0.0055, "step": 21260 }, { "epoch": 0.1399314487214068, "grad_norm": 0.36013971338151357, "learning_rate": 6.996480378934905e-06, "loss": 0.0065, "step": 21270 }, { "epoch": 0.1399972368966402, "grad_norm": 0.21904380112633284, "learning_rate": 6.999769744416303e-06, "loss": 0.004, "step": 21280 }, { "epoch": 0.14006302507187357, "grad_norm": 0.19679084829292626, "learning_rate": 7.0030591098977005e-06, "loss": 0.004, "step": 21290 }, { "epoch": 0.14012881324710696, "grad_norm": 0.320456310178321, "learning_rate": 7.0063484753791e-06, "loss": 0.0048, "step": 21300 }, { "epoch": 0.14019460142234036, "grad_norm": 0.18620614438946367, "learning_rate": 7.009637840860498e-06, "loss": 0.0055, "step": 21310 }, { "epoch": 0.14026038959757373, "grad_norm": 0.9007260155247402, "learning_rate": 7.012927206341897e-06, "loss": 0.0081, "step": 21320 }, { "epoch": 0.14032617777280712, "grad_norm": 0.022594177843438798, "learning_rate": 7.016216571823296e-06, "loss": 0.0048, "step": 21330 }, { "epoch": 0.1403919659480405, "grad_norm": 0.2695885684451044, "learning_rate": 7.019505937304695e-06, "loss": 0.0051, "step": 21340 }, { "epoch": 0.14045775412327388, "grad_norm": 0.18023391833078273, "learning_rate": 7.022795302786093e-06, "loss": 0.0047, "step": 21350 }, { "epoch": 0.14052354229850728, "grad_norm": 0.5690278596113966, "learning_rate": 7.026084668267492e-06, "loss": 0.0056, "step": 21360 }, { "epoch": 0.14058933047374064, "grad_norm": 0.09357104768770755, "learning_rate": 7.02937403374889e-06, "loss": 0.0051, "step": 21370 }, { "epoch": 0.14065511864897404, "grad_norm": 0.2584256296897939, "learning_rate": 7.032663399230289e-06, "loss": 0.0047, "step": 21380 }, { "epoch": 0.1407209068242074, "grad_norm": 0.1436361859182926, "learning_rate": 7.0359527647116875e-06, "loss": 0.0036, "step": 21390 }, { "epoch": 0.1407866949994408, "grad_norm": 0.15802775966942084, "learning_rate": 7.039242130193086e-06, "loss": 0.0048, "step": 21400 }, { "epoch": 0.1408524831746742, "grad_norm": 0.1717747184377646, "learning_rate": 7.042531495674485e-06, "loss": 0.0054, "step": 21410 }, { "epoch": 0.14091827134990756, "grad_norm": 0.19709089168898794, "learning_rate": 7.045820861155883e-06, "loss": 0.0035, "step": 21420 }, { "epoch": 0.14098405952514095, "grad_norm": 0.27160034333146527, "learning_rate": 7.049110226637283e-06, "loss": 0.008, "step": 21430 }, { "epoch": 0.14104984770037435, "grad_norm": 0.13618793840356236, "learning_rate": 7.05239959211868e-06, "loss": 0.0033, "step": 21440 }, { "epoch": 0.14111563587560771, "grad_norm": 0.1568797345021452, "learning_rate": 7.055688957600079e-06, "loss": 0.0026, "step": 21450 }, { "epoch": 0.1411814240508411, "grad_norm": 0.042401118600785, "learning_rate": 7.058978323081478e-06, "loss": 0.0031, "step": 21460 }, { "epoch": 0.14124721222607448, "grad_norm": 0.18635964966921376, "learning_rate": 7.062267688562877e-06, "loss": 0.0041, "step": 21470 }, { "epoch": 0.14131300040130787, "grad_norm": 0.34767433074034076, "learning_rate": 7.065557054044275e-06, "loss": 0.0046, "step": 21480 }, { "epoch": 0.14137878857654126, "grad_norm": 0.5949915919409591, "learning_rate": 7.0688464195256745e-06, "loss": 0.0065, "step": 21490 }, { "epoch": 0.14144457675177463, "grad_norm": 0.10803009863378206, "learning_rate": 7.072135785007073e-06, "loss": 0.0033, "step": 21500 }, { "epoch": 0.14151036492700803, "grad_norm": 0.20982449814467774, "learning_rate": 7.075425150488472e-06, "loss": 0.0035, "step": 21510 }, { "epoch": 0.1415761531022414, "grad_norm": 0.2365471897124854, "learning_rate": 7.07871451596987e-06, "loss": 0.0054, "step": 21520 }, { "epoch": 0.1416419412774748, "grad_norm": 0.19744291219227947, "learning_rate": 7.082003881451268e-06, "loss": 0.007, "step": 21530 }, { "epoch": 0.14170772945270818, "grad_norm": 0.12915245910644582, "learning_rate": 7.085293246932667e-06, "loss": 0.0045, "step": 21540 }, { "epoch": 0.14177351762794155, "grad_norm": 0.5455809885430072, "learning_rate": 7.0885826124140655e-06, "loss": 0.0042, "step": 21550 }, { "epoch": 0.14183930580317494, "grad_norm": 0.19790360956281133, "learning_rate": 7.091871977895465e-06, "loss": 0.004, "step": 21560 }, { "epoch": 0.1419050939784083, "grad_norm": 0.19361562769375396, "learning_rate": 7.095161343376863e-06, "loss": 0.0041, "step": 21570 }, { "epoch": 0.1419708821536417, "grad_norm": 0.2294041343245101, "learning_rate": 7.098450708858262e-06, "loss": 0.004, "step": 21580 }, { "epoch": 0.1420366703288751, "grad_norm": 0.19035464216149756, "learning_rate": 7.101740074339661e-06, "loss": 0.0047, "step": 21590 }, { "epoch": 0.14210245850410846, "grad_norm": 0.21082101500153275, "learning_rate": 7.10502943982106e-06, "loss": 0.0047, "step": 21600 }, { "epoch": 0.14216824667934186, "grad_norm": 0.1797810003152595, "learning_rate": 7.108318805302457e-06, "loss": 0.0035, "step": 21610 }, { "epoch": 0.14223403485457523, "grad_norm": 0.11266991733389002, "learning_rate": 7.111608170783857e-06, "loss": 0.0044, "step": 21620 }, { "epoch": 0.14229982302980862, "grad_norm": 0.05920355087677648, "learning_rate": 7.114897536265255e-06, "loss": 0.004, "step": 21630 }, { "epoch": 0.14236561120504201, "grad_norm": 0.22996426657535116, "learning_rate": 7.118186901746653e-06, "loss": 0.0027, "step": 21640 }, { "epoch": 0.14243139938027538, "grad_norm": 0.1745248129988403, "learning_rate": 7.1214762672280525e-06, "loss": 0.0071, "step": 21650 }, { "epoch": 0.14249718755550878, "grad_norm": 0.2084436476636564, "learning_rate": 7.124765632709451e-06, "loss": 0.0035, "step": 21660 }, { "epoch": 0.14256297573074217, "grad_norm": 0.058776143142362604, "learning_rate": 7.12805499819085e-06, "loss": 0.0025, "step": 21670 }, { "epoch": 0.14262876390597554, "grad_norm": 0.14691788052727645, "learning_rate": 7.131344363672248e-06, "loss": 0.0049, "step": 21680 }, { "epoch": 0.14269455208120893, "grad_norm": 0.22250934643348194, "learning_rate": 7.134633729153647e-06, "loss": 0.0038, "step": 21690 }, { "epoch": 0.1427603402564423, "grad_norm": 0.2287895781494382, "learning_rate": 7.137923094635045e-06, "loss": 0.0031, "step": 21700 }, { "epoch": 0.1428261284316757, "grad_norm": 0.1171739086148501, "learning_rate": 7.141212460116444e-06, "loss": 0.0037, "step": 21710 }, { "epoch": 0.1428919166069091, "grad_norm": 0.07699554390454622, "learning_rate": 7.144501825597843e-06, "loss": 0.0044, "step": 21720 }, { "epoch": 0.14295770478214245, "grad_norm": 0.17797680426507592, "learning_rate": 7.147791191079242e-06, "loss": 0.0037, "step": 21730 }, { "epoch": 0.14302349295737585, "grad_norm": 0.10256824967168128, "learning_rate": 7.15108055656064e-06, "loss": 0.0032, "step": 21740 }, { "epoch": 0.14308928113260921, "grad_norm": 0.10303754924695889, "learning_rate": 7.154369922042038e-06, "loss": 0.0061, "step": 21750 }, { "epoch": 0.1431550693078426, "grad_norm": 0.3235292297932305, "learning_rate": 7.157659287523437e-06, "loss": 0.004, "step": 21760 }, { "epoch": 0.143220857483076, "grad_norm": 0.2274833174358309, "learning_rate": 7.160948653004835e-06, "loss": 0.0061, "step": 21770 }, { "epoch": 0.14328664565830937, "grad_norm": 0.1786650358697281, "learning_rate": 7.164238018486235e-06, "loss": 0.0022, "step": 21780 }, { "epoch": 0.14335243383354276, "grad_norm": 0.13924829533905972, "learning_rate": 7.167527383967633e-06, "loss": 0.005, "step": 21790 }, { "epoch": 0.14341822200877613, "grad_norm": 0.19631022942230344, "learning_rate": 7.170816749449032e-06, "loss": 0.0047, "step": 21800 }, { "epoch": 0.14348401018400953, "grad_norm": 0.32454864102615916, "learning_rate": 7.1741061149304305e-06, "loss": 0.0047, "step": 21810 }, { "epoch": 0.14354979835924292, "grad_norm": 0.22267989130062096, "learning_rate": 7.17739548041183e-06, "loss": 0.0058, "step": 21820 }, { "epoch": 0.14361558653447629, "grad_norm": 0.2694473470324235, "learning_rate": 7.180684845893227e-06, "loss": 0.0081, "step": 21830 }, { "epoch": 0.14368137470970968, "grad_norm": 0.28436048844778267, "learning_rate": 7.183974211374627e-06, "loss": 0.0026, "step": 21840 }, { "epoch": 0.14374716288494305, "grad_norm": 0.1001345479653423, "learning_rate": 7.187263576856025e-06, "loss": 0.0035, "step": 21850 }, { "epoch": 0.14381295106017644, "grad_norm": 0.15886169572448763, "learning_rate": 7.190552942337423e-06, "loss": 0.0063, "step": 21860 }, { "epoch": 0.14387873923540984, "grad_norm": 0.14245270604318871, "learning_rate": 7.193842307818822e-06, "loss": 0.0033, "step": 21870 }, { "epoch": 0.1439445274106432, "grad_norm": 0.08360954589205914, "learning_rate": 7.197131673300221e-06, "loss": 0.0025, "step": 21880 }, { "epoch": 0.1440103155858766, "grad_norm": 0.24155245103287912, "learning_rate": 7.20042103878162e-06, "loss": 0.0068, "step": 21890 }, { "epoch": 0.14407610376111, "grad_norm": 0.10419692873234404, "learning_rate": 7.203710404263018e-06, "loss": 0.0047, "step": 21900 }, { "epoch": 0.14414189193634336, "grad_norm": 0.12266146466628827, "learning_rate": 7.2069997697444176e-06, "loss": 0.003, "step": 21910 }, { "epoch": 0.14420768011157675, "grad_norm": 0.09573621876742369, "learning_rate": 7.210289135225815e-06, "loss": 0.0041, "step": 21920 }, { "epoch": 0.14427346828681012, "grad_norm": 0.1806161142148377, "learning_rate": 7.213578500707214e-06, "loss": 0.0039, "step": 21930 }, { "epoch": 0.1443392564620435, "grad_norm": 0.18737786978646037, "learning_rate": 7.216867866188613e-06, "loss": 0.0042, "step": 21940 }, { "epoch": 0.1444050446372769, "grad_norm": 0.2692614759044072, "learning_rate": 7.220157231670012e-06, "loss": 0.0041, "step": 21950 }, { "epoch": 0.14447083281251027, "grad_norm": 0.27034507801732194, "learning_rate": 7.22344659715141e-06, "loss": 0.0054, "step": 21960 }, { "epoch": 0.14453662098774367, "grad_norm": 0.06537708729256762, "learning_rate": 7.2267359626328086e-06, "loss": 0.0049, "step": 21970 }, { "epoch": 0.14460240916297704, "grad_norm": 0.2089886566912901, "learning_rate": 7.230025328114208e-06, "loss": 0.0037, "step": 21980 }, { "epoch": 0.14466819733821043, "grad_norm": 0.168975740392447, "learning_rate": 7.233314693595605e-06, "loss": 0.0036, "step": 21990 }, { "epoch": 0.14473398551344382, "grad_norm": 0.11304064392113004, "learning_rate": 7.2366040590770045e-06, "loss": 0.0053, "step": 22000 }, { "epoch": 0.1447997736886772, "grad_norm": 0.3531605955647899, "learning_rate": 7.239893424558403e-06, "loss": 0.0047, "step": 22010 }, { "epoch": 0.14486556186391059, "grad_norm": 0.13424452621625224, "learning_rate": 7.243182790039802e-06, "loss": 0.003, "step": 22020 }, { "epoch": 0.14493135003914395, "grad_norm": 0.16217641758994172, "learning_rate": 7.2464721555212004e-06, "loss": 0.0049, "step": 22030 }, { "epoch": 0.14499713821437735, "grad_norm": 0.27180080683746577, "learning_rate": 7.2497615210026e-06, "loss": 0.0044, "step": 22040 }, { "epoch": 0.14506292638961074, "grad_norm": 0.13941559811289414, "learning_rate": 7.253050886483998e-06, "loss": 0.0074, "step": 22050 }, { "epoch": 0.1451287145648441, "grad_norm": 0.14191308288105045, "learning_rate": 7.256340251965397e-06, "loss": 0.0031, "step": 22060 }, { "epoch": 0.1451945027400775, "grad_norm": 0.07814633640720232, "learning_rate": 7.259629617446795e-06, "loss": 0.0024, "step": 22070 }, { "epoch": 0.14526029091531087, "grad_norm": 0.2431831237337326, "learning_rate": 7.262918982928193e-06, "loss": 0.0059, "step": 22080 }, { "epoch": 0.14532607909054426, "grad_norm": 0.11383352259566751, "learning_rate": 7.266208348409592e-06, "loss": 0.0059, "step": 22090 }, { "epoch": 0.14539186726577766, "grad_norm": 0.20868862420631748, "learning_rate": 7.269497713890991e-06, "loss": 0.0033, "step": 22100 }, { "epoch": 0.14545765544101102, "grad_norm": 0.18896482043521953, "learning_rate": 7.27278707937239e-06, "loss": 0.0053, "step": 22110 }, { "epoch": 0.14552344361624442, "grad_norm": 0.17809971043050185, "learning_rate": 7.276076444853788e-06, "loss": 0.0049, "step": 22120 }, { "epoch": 0.1455892317914778, "grad_norm": 0.2995606946469912, "learning_rate": 7.2793658103351874e-06, "loss": 0.006, "step": 22130 }, { "epoch": 0.14565501996671118, "grad_norm": 0.12568122187429803, "learning_rate": 7.282655175816585e-06, "loss": 0.0043, "step": 22140 }, { "epoch": 0.14572080814194457, "grad_norm": 0.206776078911676, "learning_rate": 7.285944541297984e-06, "loss": 0.0033, "step": 22150 }, { "epoch": 0.14578659631717794, "grad_norm": 0.273786445756027, "learning_rate": 7.2892339067793825e-06, "loss": 0.0055, "step": 22160 }, { "epoch": 0.14585238449241134, "grad_norm": 0.07624241205112539, "learning_rate": 7.292523272260782e-06, "loss": 0.0044, "step": 22170 }, { "epoch": 0.14591817266764473, "grad_norm": 0.32252355147878153, "learning_rate": 7.29581263774218e-06, "loss": 0.0054, "step": 22180 }, { "epoch": 0.1459839608428781, "grad_norm": 0.14169824919466256, "learning_rate": 7.299102003223579e-06, "loss": 0.0024, "step": 22190 }, { "epoch": 0.1460497490181115, "grad_norm": 0.423013625225961, "learning_rate": 7.302391368704978e-06, "loss": 0.0066, "step": 22200 }, { "epoch": 0.14611553719334486, "grad_norm": 0.11660399927488747, "learning_rate": 7.305680734186375e-06, "loss": 0.0033, "step": 22210 }, { "epoch": 0.14618132536857825, "grad_norm": 0.32563238576407905, "learning_rate": 7.308970099667775e-06, "loss": 0.0054, "step": 22220 }, { "epoch": 0.14624711354381165, "grad_norm": 0.1530850267323079, "learning_rate": 7.312259465149173e-06, "loss": 0.0036, "step": 22230 }, { "epoch": 0.146312901719045, "grad_norm": 0.1189941248133806, "learning_rate": 7.315548830630572e-06, "loss": 0.0035, "step": 22240 }, { "epoch": 0.1463786898942784, "grad_norm": 0.15800045615819922, "learning_rate": 7.31883819611197e-06, "loss": 0.0038, "step": 22250 }, { "epoch": 0.14644447806951177, "grad_norm": 0.216590448180138, "learning_rate": 7.3221275615933695e-06, "loss": 0.0043, "step": 22260 }, { "epoch": 0.14651026624474517, "grad_norm": 0.04600947399375776, "learning_rate": 7.325416927074768e-06, "loss": 0.0039, "step": 22270 }, { "epoch": 0.14657605441997856, "grad_norm": 0.03201593877362356, "learning_rate": 7.328706292556167e-06, "loss": 0.0053, "step": 22280 }, { "epoch": 0.14664184259521193, "grad_norm": 0.34267989413612887, "learning_rate": 7.3319956580375655e-06, "loss": 0.0062, "step": 22290 }, { "epoch": 0.14670763077044532, "grad_norm": 0.12776646202243147, "learning_rate": 7.335285023518965e-06, "loss": 0.0028, "step": 22300 }, { "epoch": 0.1467734189456787, "grad_norm": 0.05057026224691099, "learning_rate": 7.338574389000362e-06, "loss": 0.0063, "step": 22310 }, { "epoch": 0.14683920712091209, "grad_norm": 0.011496788708853716, "learning_rate": 7.3418637544817606e-06, "loss": 0.0074, "step": 22320 }, { "epoch": 0.14690499529614548, "grad_norm": 0.1956774438160919, "learning_rate": 7.34515311996316e-06, "loss": 0.0042, "step": 22330 }, { "epoch": 0.14697078347137885, "grad_norm": 0.10646811884335161, "learning_rate": 7.348442485444558e-06, "loss": 0.0041, "step": 22340 }, { "epoch": 0.14703657164661224, "grad_norm": 0.052381887566080684, "learning_rate": 7.351731850925957e-06, "loss": 0.0039, "step": 22350 }, { "epoch": 0.14710235982184564, "grad_norm": 0.2528715203390998, "learning_rate": 7.355021216407356e-06, "loss": 0.0042, "step": 22360 }, { "epoch": 0.147168147997079, "grad_norm": 0.11309516822406625, "learning_rate": 7.358310581888755e-06, "loss": 0.0034, "step": 22370 }, { "epoch": 0.1472339361723124, "grad_norm": 0.12549749562007392, "learning_rate": 7.361599947370152e-06, "loss": 0.0054, "step": 22380 }, { "epoch": 0.14729972434754576, "grad_norm": 0.08497388917941978, "learning_rate": 7.364889312851552e-06, "loss": 0.0057, "step": 22390 }, { "epoch": 0.14736551252277916, "grad_norm": 0.09712292518768753, "learning_rate": 7.36817867833295e-06, "loss": 0.0058, "step": 22400 }, { "epoch": 0.14743130069801255, "grad_norm": 0.020959416970661646, "learning_rate": 7.371468043814349e-06, "loss": 0.0044, "step": 22410 }, { "epoch": 0.14749708887324592, "grad_norm": 0.05522966695322041, "learning_rate": 7.3747574092957476e-06, "loss": 0.004, "step": 22420 }, { "epoch": 0.1475628770484793, "grad_norm": 0.15497115012980822, "learning_rate": 7.378046774777146e-06, "loss": 0.0063, "step": 22430 }, { "epoch": 0.14762866522371268, "grad_norm": 0.25094111488225873, "learning_rate": 7.381336140258545e-06, "loss": 0.0063, "step": 22440 }, { "epoch": 0.14769445339894607, "grad_norm": 0.14955355226090797, "learning_rate": 7.384625505739943e-06, "loss": 0.0045, "step": 22450 }, { "epoch": 0.14776024157417947, "grad_norm": 0.15261688454347688, "learning_rate": 7.387914871221342e-06, "loss": 0.003, "step": 22460 }, { "epoch": 0.14782602974941284, "grad_norm": 0.23885787088013438, "learning_rate": 7.39120423670274e-06, "loss": 0.0037, "step": 22470 }, { "epoch": 0.14789181792464623, "grad_norm": 0.13366924970265484, "learning_rate": 7.3944936021841394e-06, "loss": 0.0033, "step": 22480 }, { "epoch": 0.1479576060998796, "grad_norm": 0.12187836104457489, "learning_rate": 7.397782967665538e-06, "loss": 0.0032, "step": 22490 }, { "epoch": 0.148023394275113, "grad_norm": 0.12862366059568167, "learning_rate": 7.401072333146937e-06, "loss": 0.0044, "step": 22500 }, { "epoch": 0.14808918245034638, "grad_norm": 0.20841788947944517, "learning_rate": 7.404361698628335e-06, "loss": 0.0034, "step": 22510 }, { "epoch": 0.14815497062557975, "grad_norm": 0.10444234502125495, "learning_rate": 7.4076510641097346e-06, "loss": 0.0031, "step": 22520 }, { "epoch": 0.14822075880081315, "grad_norm": 0.12414113858160009, "learning_rate": 7.410940429591132e-06, "loss": 0.0035, "step": 22530 }, { "epoch": 0.1482865469760465, "grad_norm": 0.24155313991366512, "learning_rate": 7.4142297950725304e-06, "loss": 0.0047, "step": 22540 }, { "epoch": 0.1483523351512799, "grad_norm": 0.12263608565909008, "learning_rate": 7.41751916055393e-06, "loss": 0.0067, "step": 22550 }, { "epoch": 0.1484181233265133, "grad_norm": 0.3502757152858583, "learning_rate": 7.420808526035328e-06, "loss": 0.0051, "step": 22560 }, { "epoch": 0.14848391150174667, "grad_norm": 0.04434004520119712, "learning_rate": 7.424097891516727e-06, "loss": 0.0037, "step": 22570 }, { "epoch": 0.14854969967698006, "grad_norm": 0.05890300642174674, "learning_rate": 7.427387256998126e-06, "loss": 0.0031, "step": 22580 }, { "epoch": 0.14861548785221346, "grad_norm": 0.17213907156741917, "learning_rate": 7.430676622479525e-06, "loss": 0.0037, "step": 22590 }, { "epoch": 0.14868127602744682, "grad_norm": 0.1519559738668144, "learning_rate": 7.433965987960922e-06, "loss": 0.0043, "step": 22600 }, { "epoch": 0.14874706420268022, "grad_norm": 0.25533805656503417, "learning_rate": 7.437255353442322e-06, "loss": 0.0045, "step": 22610 }, { "epoch": 0.14881285237791358, "grad_norm": 0.36372456813729825, "learning_rate": 7.44054471892372e-06, "loss": 0.0053, "step": 22620 }, { "epoch": 0.14887864055314698, "grad_norm": 0.05837587029222731, "learning_rate": 7.443834084405119e-06, "loss": 0.0024, "step": 22630 }, { "epoch": 0.14894442872838037, "grad_norm": 0.1073815477580881, "learning_rate": 7.4471234498865175e-06, "loss": 0.0077, "step": 22640 }, { "epoch": 0.14901021690361374, "grad_norm": 0.17585817598150696, "learning_rate": 7.450412815367916e-06, "loss": 0.0058, "step": 22650 }, { "epoch": 0.14907600507884713, "grad_norm": 0.5298792277356725, "learning_rate": 7.453702180849315e-06, "loss": 0.006, "step": 22660 }, { "epoch": 0.1491417932540805, "grad_norm": 0.08549793727476195, "learning_rate": 7.456991546330713e-06, "loss": 0.0026, "step": 22670 }, { "epoch": 0.1492075814293139, "grad_norm": 0.1741179861995182, "learning_rate": 7.460280911812113e-06, "loss": 0.0046, "step": 22680 }, { "epoch": 0.1492733696045473, "grad_norm": 0.27849206583529096, "learning_rate": 7.46357027729351e-06, "loss": 0.0069, "step": 22690 }, { "epoch": 0.14933915777978066, "grad_norm": 0.2672255964323487, "learning_rate": 7.466859642774909e-06, "loss": 0.0061, "step": 22700 }, { "epoch": 0.14940494595501405, "grad_norm": 0.12232781771163292, "learning_rate": 7.470149008256308e-06, "loss": 0.0037, "step": 22710 }, { "epoch": 0.14947073413024742, "grad_norm": 0.17135636586155326, "learning_rate": 7.473438373737707e-06, "loss": 0.0039, "step": 22720 }, { "epoch": 0.1495365223054808, "grad_norm": 0.1422943476311338, "learning_rate": 7.476727739219105e-06, "loss": 0.0022, "step": 22730 }, { "epoch": 0.1496023104807142, "grad_norm": 0.14870217767228974, "learning_rate": 7.4800171047005045e-06, "loss": 0.0024, "step": 22740 }, { "epoch": 0.14966809865594757, "grad_norm": 0.2545819013789407, "learning_rate": 7.483306470181903e-06, "loss": 0.004, "step": 22750 }, { "epoch": 0.14973388683118097, "grad_norm": 0.39060363675082294, "learning_rate": 7.4865958356633e-06, "loss": 0.0028, "step": 22760 }, { "epoch": 0.14979967500641433, "grad_norm": 0.05308946739904174, "learning_rate": 7.4898852011446995e-06, "loss": 0.0095, "step": 22770 }, { "epoch": 0.14986546318164773, "grad_norm": 0.18546286975229867, "learning_rate": 7.493174566626098e-06, "loss": 0.0036, "step": 22780 }, { "epoch": 0.14993125135688112, "grad_norm": 0.07939570495512466, "learning_rate": 7.496463932107497e-06, "loss": 0.003, "step": 22790 }, { "epoch": 0.1499970395321145, "grad_norm": 0.034326470083456184, "learning_rate": 7.4997532975888955e-06, "loss": 0.0034, "step": 22800 }, { "epoch": 0.15006282770734788, "grad_norm": 0.28423952119089296, "learning_rate": 7.503042663070295e-06, "loss": 0.0052, "step": 22810 }, { "epoch": 0.15012861588258125, "grad_norm": 0.10478828590685879, "learning_rate": 7.506332028551693e-06, "loss": 0.0036, "step": 22820 }, { "epoch": 0.15019440405781465, "grad_norm": 0.24644693601042264, "learning_rate": 7.509621394033092e-06, "loss": 0.0059, "step": 22830 }, { "epoch": 0.15026019223304804, "grad_norm": 0.1350687638289251, "learning_rate": 7.51291075951449e-06, "loss": 0.0068, "step": 22840 }, { "epoch": 0.1503259804082814, "grad_norm": 0.14017079903845, "learning_rate": 7.516200124995889e-06, "loss": 0.0047, "step": 22850 }, { "epoch": 0.1503917685835148, "grad_norm": 0.0890921677015808, "learning_rate": 7.519489490477287e-06, "loss": 0.0052, "step": 22860 }, { "epoch": 0.1504575567587482, "grad_norm": 0.14246429106803615, "learning_rate": 7.5227788559586866e-06, "loss": 0.0043, "step": 22870 }, { "epoch": 0.15052334493398156, "grad_norm": 0.2300988880507367, "learning_rate": 7.526068221440085e-06, "loss": 0.0053, "step": 22880 }, { "epoch": 0.15058913310921496, "grad_norm": 0.12726224649555737, "learning_rate": 7.529357586921483e-06, "loss": 0.0035, "step": 22890 }, { "epoch": 0.15065492128444832, "grad_norm": 0.07857502294596466, "learning_rate": 7.5326469524028825e-06, "loss": 0.003, "step": 22900 }, { "epoch": 0.15072070945968172, "grad_norm": 0.10452064543718514, "learning_rate": 7.53593631788428e-06, "loss": 0.0031, "step": 22910 }, { "epoch": 0.1507864976349151, "grad_norm": 0.1632382936076269, "learning_rate": 7.539225683365679e-06, "loss": 0.0036, "step": 22920 }, { "epoch": 0.15085228581014848, "grad_norm": 0.3730064672240261, "learning_rate": 7.5425150488470776e-06, "loss": 0.0047, "step": 22930 }, { "epoch": 0.15091807398538187, "grad_norm": 0.7165610983641842, "learning_rate": 7.545804414328477e-06, "loss": 0.0084, "step": 22940 }, { "epoch": 0.15098386216061524, "grad_norm": 0.1940208172294238, "learning_rate": 7.549093779809875e-06, "loss": 0.0029, "step": 22950 }, { "epoch": 0.15104965033584863, "grad_norm": 0.25034583390764414, "learning_rate": 7.552383145291274e-06, "loss": 0.0046, "step": 22960 }, { "epoch": 0.15111543851108203, "grad_norm": 0.09815260419655114, "learning_rate": 7.555672510772673e-06, "loss": 0.0038, "step": 22970 }, { "epoch": 0.1511812266863154, "grad_norm": 0.03603989496598763, "learning_rate": 7.558961876254072e-06, "loss": 0.0031, "step": 22980 }, { "epoch": 0.1512470148615488, "grad_norm": 0.07624427272392004, "learning_rate": 7.56225124173547e-06, "loss": 0.0053, "step": 22990 }, { "epoch": 0.15131280303678216, "grad_norm": 0.09493656885404204, "learning_rate": 7.565540607216868e-06, "loss": 0.0032, "step": 23000 }, { "epoch": 0.15137859121201555, "grad_norm": 0.13502662980302385, "learning_rate": 7.568829972698267e-06, "loss": 0.0026, "step": 23010 }, { "epoch": 0.15144437938724895, "grad_norm": 0.12160076538517617, "learning_rate": 7.572119338179665e-06, "loss": 0.0034, "step": 23020 }, { "epoch": 0.1515101675624823, "grad_norm": 0.3020370830473997, "learning_rate": 7.575408703661065e-06, "loss": 0.0048, "step": 23030 }, { "epoch": 0.1515759557377157, "grad_norm": 0.1436301604048554, "learning_rate": 7.578698069142463e-06, "loss": 0.0035, "step": 23040 }, { "epoch": 0.15164174391294907, "grad_norm": 0.18548166990450018, "learning_rate": 7.581987434623862e-06, "loss": 0.0042, "step": 23050 }, { "epoch": 0.15170753208818247, "grad_norm": 0.12076809882669383, "learning_rate": 7.5852768001052605e-06, "loss": 0.0029, "step": 23060 }, { "epoch": 0.15177332026341586, "grad_norm": 0.30261226314109374, "learning_rate": 7.58856616558666e-06, "loss": 0.0064, "step": 23070 }, { "epoch": 0.15183910843864923, "grad_norm": 0.08575062532011879, "learning_rate": 7.591855531068057e-06, "loss": 0.0055, "step": 23080 }, { "epoch": 0.15190489661388262, "grad_norm": 0.20324339858212073, "learning_rate": 7.5951448965494564e-06, "loss": 0.005, "step": 23090 }, { "epoch": 0.15197068478911602, "grad_norm": 0.07444890576092704, "learning_rate": 7.598434262030855e-06, "loss": 0.0069, "step": 23100 }, { "epoch": 0.15203647296434938, "grad_norm": 0.16484197309341347, "learning_rate": 7.601723627512253e-06, "loss": 0.003, "step": 23110 }, { "epoch": 0.15210226113958278, "grad_norm": 0.11538656776812888, "learning_rate": 7.605012992993652e-06, "loss": 0.0028, "step": 23120 }, { "epoch": 0.15216804931481615, "grad_norm": 0.18128443567566682, "learning_rate": 7.608302358475051e-06, "loss": 0.0035, "step": 23130 }, { "epoch": 0.15223383749004954, "grad_norm": 0.17683486330065165, "learning_rate": 7.61159172395645e-06, "loss": 0.0036, "step": 23140 }, { "epoch": 0.15229962566528293, "grad_norm": 0.11060502369230409, "learning_rate": 7.6148810894378475e-06, "loss": 0.0026, "step": 23150 }, { "epoch": 0.1523654138405163, "grad_norm": 0.1411851203215504, "learning_rate": 7.618170454919247e-06, "loss": 0.0048, "step": 23160 }, { "epoch": 0.1524312020157497, "grad_norm": 0.12785860212083486, "learning_rate": 7.621459820400645e-06, "loss": 0.0072, "step": 23170 }, { "epoch": 0.15249699019098306, "grad_norm": 0.16667430314250664, "learning_rate": 7.624749185882044e-06, "loss": 0.0058, "step": 23180 }, { "epoch": 0.15256277836621646, "grad_norm": 0.4293095778070792, "learning_rate": 7.628038551363443e-06, "loss": 0.0104, "step": 23190 }, { "epoch": 0.15262856654144985, "grad_norm": 0.3206302811270365, "learning_rate": 7.631327916844842e-06, "loss": 0.0041, "step": 23200 }, { "epoch": 0.15269435471668322, "grad_norm": 0.02599865548528807, "learning_rate": 7.63461728232624e-06, "loss": 0.0042, "step": 23210 }, { "epoch": 0.1527601428919166, "grad_norm": 0.2672882659712957, "learning_rate": 7.637906647807639e-06, "loss": 0.005, "step": 23220 }, { "epoch": 0.15282593106714998, "grad_norm": 0.25330532946788775, "learning_rate": 7.641196013289037e-06, "loss": 0.003, "step": 23230 }, { "epoch": 0.15289171924238337, "grad_norm": 0.20043037638407718, "learning_rate": 7.644485378770435e-06, "loss": 0.0041, "step": 23240 }, { "epoch": 0.15295750741761677, "grad_norm": 0.3957225152468006, "learning_rate": 7.647774744251835e-06, "loss": 0.0051, "step": 23250 }, { "epoch": 0.15302329559285013, "grad_norm": 0.16698322908427127, "learning_rate": 7.651064109733232e-06, "loss": 0.0032, "step": 23260 }, { "epoch": 0.15308908376808353, "grad_norm": 0.28844333998383576, "learning_rate": 7.654353475214632e-06, "loss": 0.0049, "step": 23270 }, { "epoch": 0.1531548719433169, "grad_norm": 0.16280221003130863, "learning_rate": 7.65764284069603e-06, "loss": 0.0036, "step": 23280 }, { "epoch": 0.1532206601185503, "grad_norm": 0.4568639750789179, "learning_rate": 7.660932206177429e-06, "loss": 0.0041, "step": 23290 }, { "epoch": 0.15328644829378368, "grad_norm": 0.13414305431965187, "learning_rate": 7.664221571658827e-06, "loss": 0.0043, "step": 23300 }, { "epoch": 0.15335223646901705, "grad_norm": 0.08475727015848025, "learning_rate": 7.667510937140227e-06, "loss": 0.0035, "step": 23310 }, { "epoch": 0.15341802464425044, "grad_norm": 0.13331369511366603, "learning_rate": 7.670800302621626e-06, "loss": 0.0045, "step": 23320 }, { "epoch": 0.15348381281948384, "grad_norm": 0.23459796904591101, "learning_rate": 7.674089668103022e-06, "loss": 0.005, "step": 23330 }, { "epoch": 0.1535496009947172, "grad_norm": 0.1359064258918287, "learning_rate": 7.677379033584422e-06, "loss": 0.0036, "step": 23340 }, { "epoch": 0.1536153891699506, "grad_norm": 0.20468503934169416, "learning_rate": 7.68066839906582e-06, "loss": 0.0054, "step": 23350 }, { "epoch": 0.15368117734518397, "grad_norm": 0.13868561974825436, "learning_rate": 7.683957764547219e-06, "loss": 0.0027, "step": 23360 }, { "epoch": 0.15374696552041736, "grad_norm": 0.03873704730978151, "learning_rate": 7.687247130028617e-06, "loss": 0.0065, "step": 23370 }, { "epoch": 0.15381275369565076, "grad_norm": 0.21301174922172572, "learning_rate": 7.690536495510017e-06, "loss": 0.0025, "step": 23380 }, { "epoch": 0.15387854187088412, "grad_norm": 0.0912486365057701, "learning_rate": 7.693825860991416e-06, "loss": 0.0032, "step": 23390 }, { "epoch": 0.15394433004611752, "grad_norm": 0.1424795194374967, "learning_rate": 7.697115226472814e-06, "loss": 0.0039, "step": 23400 }, { "epoch": 0.15401011822135088, "grad_norm": 0.24449098421123816, "learning_rate": 7.700404591954213e-06, "loss": 0.003, "step": 23410 }, { "epoch": 0.15407590639658428, "grad_norm": 0.4020298333461178, "learning_rate": 7.703693957435613e-06, "loss": 0.0061, "step": 23420 }, { "epoch": 0.15414169457181767, "grad_norm": 0.20844187256263838, "learning_rate": 7.70698332291701e-06, "loss": 0.0039, "step": 23430 }, { "epoch": 0.15420748274705104, "grad_norm": 0.27944030119502544, "learning_rate": 7.710272688398408e-06, "loss": 0.003, "step": 23440 }, { "epoch": 0.15427327092228443, "grad_norm": 0.1089392706441407, "learning_rate": 7.713562053879808e-06, "loss": 0.0092, "step": 23450 }, { "epoch": 0.1543390590975178, "grad_norm": 0.12413367331091031, "learning_rate": 7.716851419361206e-06, "loss": 0.003, "step": 23460 }, { "epoch": 0.1544048472727512, "grad_norm": 0.10017644967166768, "learning_rate": 7.720140784842604e-06, "loss": 0.0072, "step": 23470 }, { "epoch": 0.1544706354479846, "grad_norm": 0.10855670127650796, "learning_rate": 7.723430150324003e-06, "loss": 0.0033, "step": 23480 }, { "epoch": 0.15453642362321796, "grad_norm": 0.24720059145506992, "learning_rate": 7.726719515805403e-06, "loss": 0.0038, "step": 23490 }, { "epoch": 0.15460221179845135, "grad_norm": 0.31258540778411076, "learning_rate": 7.7300088812868e-06, "loss": 0.0044, "step": 23500 }, { "epoch": 0.15466799997368472, "grad_norm": 0.21353402142721872, "learning_rate": 7.7332982467682e-06, "loss": 0.0041, "step": 23510 }, { "epoch": 0.1547337881489181, "grad_norm": 0.19624115439946027, "learning_rate": 7.736587612249598e-06, "loss": 0.005, "step": 23520 }, { "epoch": 0.1547995763241515, "grad_norm": 0.10970660060387273, "learning_rate": 7.739876977730996e-06, "loss": 0.0041, "step": 23530 }, { "epoch": 0.15486536449938487, "grad_norm": 0.5173961398635022, "learning_rate": 7.743166343212395e-06, "loss": 0.0044, "step": 23540 }, { "epoch": 0.15493115267461827, "grad_norm": 0.15528133943330236, "learning_rate": 7.746455708693795e-06, "loss": 0.0054, "step": 23550 }, { "epoch": 0.15499694084985166, "grad_norm": 0.20682754093574113, "learning_rate": 7.749745074175193e-06, "loss": 0.0032, "step": 23560 }, { "epoch": 0.15506272902508503, "grad_norm": 0.0474217242102871, "learning_rate": 7.75303443965659e-06, "loss": 0.0053, "step": 23570 }, { "epoch": 0.15512851720031842, "grad_norm": 0.04844501151440962, "learning_rate": 7.75632380513799e-06, "loss": 0.0041, "step": 23580 }, { "epoch": 0.1551943053755518, "grad_norm": 0.0771881586680584, "learning_rate": 7.759613170619388e-06, "loss": 0.0037, "step": 23590 }, { "epoch": 0.15526009355078518, "grad_norm": 0.441004310061997, "learning_rate": 7.762902536100786e-06, "loss": 0.0041, "step": 23600 }, { "epoch": 0.15532588172601858, "grad_norm": 0.3045947414132706, "learning_rate": 7.766191901582185e-06, "loss": 0.0038, "step": 23610 }, { "epoch": 0.15539166990125194, "grad_norm": 0.2111046415332088, "learning_rate": 7.769481267063585e-06, "loss": 0.004, "step": 23620 }, { "epoch": 0.15545745807648534, "grad_norm": 0.2168135921132033, "learning_rate": 7.772770632544983e-06, "loss": 0.0038, "step": 23630 }, { "epoch": 0.1555232462517187, "grad_norm": 0.2209820169817308, "learning_rate": 7.776059998026382e-06, "loss": 0.0042, "step": 23640 }, { "epoch": 0.1555890344269521, "grad_norm": 0.3210415659860572, "learning_rate": 7.77934936350778e-06, "loss": 0.0047, "step": 23650 }, { "epoch": 0.1556548226021855, "grad_norm": 0.35341075556768636, "learning_rate": 7.782638728989178e-06, "loss": 0.0038, "step": 23660 }, { "epoch": 0.15572061077741886, "grad_norm": 0.06443219395042976, "learning_rate": 7.785928094470577e-06, "loss": 0.0039, "step": 23670 }, { "epoch": 0.15578639895265226, "grad_norm": 0.060689886196389264, "learning_rate": 7.789217459951975e-06, "loss": 0.0069, "step": 23680 }, { "epoch": 0.15585218712788562, "grad_norm": 0.27658890169440237, "learning_rate": 7.792506825433375e-06, "loss": 0.0039, "step": 23690 }, { "epoch": 0.15591797530311902, "grad_norm": 0.41745525245598036, "learning_rate": 7.795796190914773e-06, "loss": 0.0053, "step": 23700 }, { "epoch": 0.1559837634783524, "grad_norm": 0.11785311364153227, "learning_rate": 7.799085556396172e-06, "loss": 0.0068, "step": 23710 }, { "epoch": 0.15604955165358578, "grad_norm": 0.20179433891690388, "learning_rate": 7.80237492187757e-06, "loss": 0.0042, "step": 23720 }, { "epoch": 0.15611533982881917, "grad_norm": 0.35772541992849455, "learning_rate": 7.80566428735897e-06, "loss": 0.0034, "step": 23730 }, { "epoch": 0.15618112800405254, "grad_norm": 0.0960989928864433, "learning_rate": 7.808953652840367e-06, "loss": 0.0035, "step": 23740 }, { "epoch": 0.15624691617928593, "grad_norm": 0.10625610300221183, "learning_rate": 7.812243018321767e-06, "loss": 0.0055, "step": 23750 }, { "epoch": 0.15631270435451933, "grad_norm": 0.08085069926921469, "learning_rate": 7.815532383803165e-06, "loss": 0.0037, "step": 23760 }, { "epoch": 0.1563784925297527, "grad_norm": 0.38948085989250986, "learning_rate": 7.818821749284564e-06, "loss": 0.0036, "step": 23770 }, { "epoch": 0.1564442807049861, "grad_norm": 0.1668503245748016, "learning_rate": 7.822111114765962e-06, "loss": 0.0035, "step": 23780 }, { "epoch": 0.15651006888021948, "grad_norm": 0.10407699561625168, "learning_rate": 7.82540048024736e-06, "loss": 0.006, "step": 23790 }, { "epoch": 0.15657585705545285, "grad_norm": 0.02029648951805462, "learning_rate": 7.82868984572876e-06, "loss": 0.0037, "step": 23800 }, { "epoch": 0.15664164523068624, "grad_norm": 0.12314047071628875, "learning_rate": 7.831979211210157e-06, "loss": 0.0047, "step": 23810 }, { "epoch": 0.1567074334059196, "grad_norm": 0.04363541214601263, "learning_rate": 7.835268576691557e-06, "loss": 0.0055, "step": 23820 }, { "epoch": 0.156773221581153, "grad_norm": 0.15370416246636615, "learning_rate": 7.838557942172956e-06, "loss": 0.0044, "step": 23830 }, { "epoch": 0.1568390097563864, "grad_norm": 0.23722855028046894, "learning_rate": 7.841847307654354e-06, "loss": 0.0032, "step": 23840 }, { "epoch": 0.15690479793161977, "grad_norm": 0.09330300904687862, "learning_rate": 7.845136673135752e-06, "loss": 0.0058, "step": 23850 }, { "epoch": 0.15697058610685316, "grad_norm": 0.17525148549192993, "learning_rate": 7.848426038617152e-06, "loss": 0.0041, "step": 23860 }, { "epoch": 0.15703637428208653, "grad_norm": 0.25952299375421345, "learning_rate": 7.85171540409855e-06, "loss": 0.0054, "step": 23870 }, { "epoch": 0.15710216245731992, "grad_norm": 0.14037188897356462, "learning_rate": 7.855004769579949e-06, "loss": 0.0033, "step": 23880 }, { "epoch": 0.15716795063255332, "grad_norm": 0.061870446204876126, "learning_rate": 7.858294135061347e-06, "loss": 0.0042, "step": 23890 }, { "epoch": 0.15723373880778668, "grad_norm": 0.3599217589904295, "learning_rate": 7.861583500542746e-06, "loss": 0.0057, "step": 23900 }, { "epoch": 0.15729952698302008, "grad_norm": 0.10087038381403331, "learning_rate": 7.864872866024144e-06, "loss": 0.0031, "step": 23910 }, { "epoch": 0.15736531515825344, "grad_norm": 0.4125066893664957, "learning_rate": 7.868162231505543e-06, "loss": 0.0037, "step": 23920 }, { "epoch": 0.15743110333348684, "grad_norm": 0.2648938843456016, "learning_rate": 7.871451596986943e-06, "loss": 0.0035, "step": 23930 }, { "epoch": 0.15749689150872023, "grad_norm": 1.9734530567247917, "learning_rate": 7.874740962468341e-06, "loss": 0.0063, "step": 23940 }, { "epoch": 0.1575626796839536, "grad_norm": 0.19023532092966008, "learning_rate": 7.87803032794974e-06, "loss": 0.0044, "step": 23950 }, { "epoch": 0.157628467859187, "grad_norm": 0.06572961111285244, "learning_rate": 7.881319693431138e-06, "loss": 0.0051, "step": 23960 }, { "epoch": 0.15769425603442036, "grad_norm": 0.34240065506653916, "learning_rate": 7.884609058912536e-06, "loss": 0.0039, "step": 23970 }, { "epoch": 0.15776004420965375, "grad_norm": 0.1628283683282973, "learning_rate": 7.887898424393934e-06, "loss": 0.0049, "step": 23980 }, { "epoch": 0.15782583238488715, "grad_norm": 0.06611566045089463, "learning_rate": 7.891187789875334e-06, "loss": 0.0044, "step": 23990 }, { "epoch": 0.15789162056012052, "grad_norm": 0.28299280965526236, "learning_rate": 7.894477155356733e-06, "loss": 0.0037, "step": 24000 }, { "epoch": 0.1579574087353539, "grad_norm": 0.09921870028524604, "learning_rate": 7.897766520838131e-06, "loss": 0.0027, "step": 24010 }, { "epoch": 0.1580231969105873, "grad_norm": 0.019067780407599755, "learning_rate": 7.90105588631953e-06, "loss": 0.0035, "step": 24020 }, { "epoch": 0.15808898508582067, "grad_norm": 0.25592115492754774, "learning_rate": 7.904345251800928e-06, "loss": 0.0062, "step": 24030 }, { "epoch": 0.15815477326105407, "grad_norm": 0.08424809845288371, "learning_rate": 7.907634617282326e-06, "loss": 0.0045, "step": 24040 }, { "epoch": 0.15822056143628743, "grad_norm": 0.03049122768358159, "learning_rate": 7.910923982763725e-06, "loss": 0.0028, "step": 24050 }, { "epoch": 0.15828634961152083, "grad_norm": 0.13895666028259104, "learning_rate": 7.914213348245125e-06, "loss": 0.0038, "step": 24060 }, { "epoch": 0.15835213778675422, "grad_norm": 0.06930716124402464, "learning_rate": 7.917502713726523e-06, "loss": 0.0031, "step": 24070 }, { "epoch": 0.1584179259619876, "grad_norm": 0.142755308556803, "learning_rate": 7.920792079207921e-06, "loss": 0.0048, "step": 24080 }, { "epoch": 0.15848371413722098, "grad_norm": 0.19750778549373868, "learning_rate": 7.92408144468932e-06, "loss": 0.0053, "step": 24090 }, { "epoch": 0.15854950231245435, "grad_norm": 0.26278229529031444, "learning_rate": 7.92737081017072e-06, "loss": 0.0051, "step": 24100 }, { "epoch": 0.15861529048768774, "grad_norm": 0.15527972908224247, "learning_rate": 7.930660175652116e-06, "loss": 0.0027, "step": 24110 }, { "epoch": 0.15868107866292114, "grad_norm": 0.22121999023652844, "learning_rate": 7.933949541133515e-06, "loss": 0.0047, "step": 24120 }, { "epoch": 0.1587468668381545, "grad_norm": 0.1295412754314659, "learning_rate": 7.937238906614915e-06, "loss": 0.0043, "step": 24130 }, { "epoch": 0.1588126550133879, "grad_norm": 0.32286961877917386, "learning_rate": 7.940528272096313e-06, "loss": 0.0029, "step": 24140 }, { "epoch": 0.15887844318862127, "grad_norm": 0.09476331497300793, "learning_rate": 7.943817637577712e-06, "loss": 0.0036, "step": 24150 }, { "epoch": 0.15894423136385466, "grad_norm": 0.16548176725476793, "learning_rate": 7.94710700305911e-06, "loss": 0.004, "step": 24160 }, { "epoch": 0.15901001953908805, "grad_norm": 0.07463411034690012, "learning_rate": 7.95039636854051e-06, "loss": 0.0056, "step": 24170 }, { "epoch": 0.15907580771432142, "grad_norm": 0.08919296890870537, "learning_rate": 7.953685734021908e-06, "loss": 0.0046, "step": 24180 }, { "epoch": 0.15914159588955482, "grad_norm": 0.17099871350685877, "learning_rate": 7.956975099503307e-06, "loss": 0.0037, "step": 24190 }, { "epoch": 0.15920738406478818, "grad_norm": 0.0882541681733071, "learning_rate": 7.960264464984705e-06, "loss": 0.0039, "step": 24200 }, { "epoch": 0.15927317224002158, "grad_norm": 0.10322503387470207, "learning_rate": 7.963553830466103e-06, "loss": 0.0026, "step": 24210 }, { "epoch": 0.15933896041525497, "grad_norm": 0.11424417110735294, "learning_rate": 7.966843195947502e-06, "loss": 0.0053, "step": 24220 }, { "epoch": 0.15940474859048834, "grad_norm": 0.3506344373713226, "learning_rate": 7.970132561428902e-06, "loss": 0.0048, "step": 24230 }, { "epoch": 0.15947053676572173, "grad_norm": 0.10787227035506529, "learning_rate": 7.9734219269103e-06, "loss": 0.0039, "step": 24240 }, { "epoch": 0.15953632494095513, "grad_norm": 0.1462358137757721, "learning_rate": 7.976711292391699e-06, "loss": 0.0033, "step": 24250 }, { "epoch": 0.1596021131161885, "grad_norm": 0.13629153270388666, "learning_rate": 7.980000657873097e-06, "loss": 0.0032, "step": 24260 }, { "epoch": 0.1596679012914219, "grad_norm": 0.24310689651522957, "learning_rate": 7.983290023354495e-06, "loss": 0.0031, "step": 24270 }, { "epoch": 0.15973368946665525, "grad_norm": 0.24702294728076868, "learning_rate": 7.986579388835894e-06, "loss": 0.0061, "step": 24280 }, { "epoch": 0.15979947764188865, "grad_norm": 0.1587320520639759, "learning_rate": 7.989868754317292e-06, "loss": 0.0038, "step": 24290 }, { "epoch": 0.15986526581712204, "grad_norm": 0.1641156117873858, "learning_rate": 7.993158119798692e-06, "loss": 0.0047, "step": 24300 }, { "epoch": 0.1599310539923554, "grad_norm": 0.004107856779382798, "learning_rate": 7.99644748528009e-06, "loss": 0.0087, "step": 24310 }, { "epoch": 0.1599968421675888, "grad_norm": 0.10626550131001591, "learning_rate": 7.999736850761489e-06, "loss": 0.0046, "step": 24320 }, { "epoch": 0.16006263034282217, "grad_norm": 0.1801488902474661, "learning_rate": 8.003026216242887e-06, "loss": 0.004, "step": 24330 }, { "epoch": 0.16012841851805557, "grad_norm": 0.19018609121206026, "learning_rate": 8.006315581724287e-06, "loss": 0.005, "step": 24340 }, { "epoch": 0.16019420669328896, "grad_norm": 0.25578614019524387, "learning_rate": 8.009604947205684e-06, "loss": 0.0063, "step": 24350 }, { "epoch": 0.16025999486852233, "grad_norm": 0.33603516304225317, "learning_rate": 8.012894312687082e-06, "loss": 0.0034, "step": 24360 }, { "epoch": 0.16032578304375572, "grad_norm": 0.17314400238386063, "learning_rate": 8.016183678168482e-06, "loss": 0.0025, "step": 24370 }, { "epoch": 0.1603915712189891, "grad_norm": 0.12737547115373962, "learning_rate": 8.01947304364988e-06, "loss": 0.0031, "step": 24380 }, { "epoch": 0.16045735939422248, "grad_norm": 0.13277385541101802, "learning_rate": 8.022762409131279e-06, "loss": 0.0022, "step": 24390 }, { "epoch": 0.16052314756945588, "grad_norm": 0.3363618947451381, "learning_rate": 8.026051774612677e-06, "loss": 0.0041, "step": 24400 }, { "epoch": 0.16058893574468924, "grad_norm": 0.320151543933092, "learning_rate": 8.029341140094077e-06, "loss": 0.0039, "step": 24410 }, { "epoch": 0.16065472391992264, "grad_norm": 0.13346237532221916, "learning_rate": 8.032630505575474e-06, "loss": 0.0079, "step": 24420 }, { "epoch": 0.160720512095156, "grad_norm": 0.2014477849857948, "learning_rate": 8.035919871056874e-06, "loss": 0.0049, "step": 24430 }, { "epoch": 0.1607863002703894, "grad_norm": 0.1190849208026749, "learning_rate": 8.039209236538273e-06, "loss": 0.0044, "step": 24440 }, { "epoch": 0.1608520884456228, "grad_norm": 0.2572558813070681, "learning_rate": 8.042498602019671e-06, "loss": 0.0041, "step": 24450 }, { "epoch": 0.16091787662085616, "grad_norm": 0.10452894895207694, "learning_rate": 8.04578796750107e-06, "loss": 0.0058, "step": 24460 }, { "epoch": 0.16098366479608955, "grad_norm": 0.19163696591829976, "learning_rate": 8.049077332982468e-06, "loss": 0.0048, "step": 24470 }, { "epoch": 0.16104945297132292, "grad_norm": 0.21604808645858922, "learning_rate": 8.052366698463868e-06, "loss": 0.007, "step": 24480 }, { "epoch": 0.16111524114655632, "grad_norm": 0.06759702475405689, "learning_rate": 8.055656063945264e-06, "loss": 0.0026, "step": 24490 }, { "epoch": 0.1611810293217897, "grad_norm": 0.18260399603316593, "learning_rate": 8.058945429426664e-06, "loss": 0.0035, "step": 24500 }, { "epoch": 0.16124681749702308, "grad_norm": 0.17839763219290264, "learning_rate": 8.062234794908063e-06, "loss": 0.0043, "step": 24510 }, { "epoch": 0.16131260567225647, "grad_norm": 0.16309400125816584, "learning_rate": 8.065524160389461e-06, "loss": 0.0052, "step": 24520 }, { "epoch": 0.16137839384748986, "grad_norm": 0.1425760043802475, "learning_rate": 8.06881352587086e-06, "loss": 0.0036, "step": 24530 }, { "epoch": 0.16144418202272323, "grad_norm": 0.16089186964500649, "learning_rate": 8.07210289135226e-06, "loss": 0.0026, "step": 24540 }, { "epoch": 0.16150997019795663, "grad_norm": 0.10703449075784177, "learning_rate": 8.075392256833658e-06, "loss": 0.0028, "step": 24550 }, { "epoch": 0.16157575837319, "grad_norm": 0.18003415271340756, "learning_rate": 8.078681622315056e-06, "loss": 0.0043, "step": 24560 }, { "epoch": 0.1616415465484234, "grad_norm": 0.08755824032372495, "learning_rate": 8.081970987796455e-06, "loss": 0.0048, "step": 24570 }, { "epoch": 0.16170733472365678, "grad_norm": 0.20459602603561886, "learning_rate": 8.085260353277853e-06, "loss": 0.0062, "step": 24580 }, { "epoch": 0.16177312289889015, "grad_norm": 0.06043725801504488, "learning_rate": 8.088549718759251e-06, "loss": 0.003, "step": 24590 }, { "epoch": 0.16183891107412354, "grad_norm": 0.17506567006175852, "learning_rate": 8.09183908424065e-06, "loss": 0.0025, "step": 24600 }, { "epoch": 0.1619046992493569, "grad_norm": 0.10444670007953828, "learning_rate": 8.09512844972205e-06, "loss": 0.0048, "step": 24610 }, { "epoch": 0.1619704874245903, "grad_norm": 0.17231256880691403, "learning_rate": 8.098417815203448e-06, "loss": 0.003, "step": 24620 }, { "epoch": 0.1620362755998237, "grad_norm": 0.18373250457270285, "learning_rate": 8.101707180684847e-06, "loss": 0.0033, "step": 24630 }, { "epoch": 0.16210206377505706, "grad_norm": 0.108305609583242, "learning_rate": 8.104996546166245e-06, "loss": 0.0035, "step": 24640 }, { "epoch": 0.16216785195029046, "grad_norm": 0.17220438809480293, "learning_rate": 8.108285911647645e-06, "loss": 0.0046, "step": 24650 }, { "epoch": 0.16223364012552383, "grad_norm": 0.2989721231847918, "learning_rate": 8.111575277129042e-06, "loss": 0.0036, "step": 24660 }, { "epoch": 0.16229942830075722, "grad_norm": 0.16904178148102808, "learning_rate": 8.114864642610442e-06, "loss": 0.0048, "step": 24670 }, { "epoch": 0.16236521647599061, "grad_norm": 0.5225524960065421, "learning_rate": 8.11815400809184e-06, "loss": 0.0045, "step": 24680 }, { "epoch": 0.16243100465122398, "grad_norm": 0.0734398086882798, "learning_rate": 8.121443373573238e-06, "loss": 0.0042, "step": 24690 }, { "epoch": 0.16249679282645738, "grad_norm": 0.12375571876771843, "learning_rate": 8.124732739054637e-06, "loss": 0.0051, "step": 24700 }, { "epoch": 0.16256258100169074, "grad_norm": 0.24156109220435026, "learning_rate": 8.128022104536035e-06, "loss": 0.0047, "step": 24710 }, { "epoch": 0.16262836917692414, "grad_norm": 0.2727464073181419, "learning_rate": 8.131311470017435e-06, "loss": 0.004, "step": 24720 }, { "epoch": 0.16269415735215753, "grad_norm": 0.11811900993664605, "learning_rate": 8.134600835498832e-06, "loss": 0.0016, "step": 24730 }, { "epoch": 0.1627599455273909, "grad_norm": 0.2584906199810587, "learning_rate": 8.137890200980232e-06, "loss": 0.0057, "step": 24740 }, { "epoch": 0.1628257337026243, "grad_norm": 0.17299419648597142, "learning_rate": 8.14117956646163e-06, "loss": 0.0048, "step": 24750 }, { "epoch": 0.1628915218778577, "grad_norm": 0.08650901411127328, "learning_rate": 8.144468931943029e-06, "loss": 0.0037, "step": 24760 }, { "epoch": 0.16295731005309105, "grad_norm": 0.11911488713900467, "learning_rate": 8.147758297424427e-06, "loss": 0.0039, "step": 24770 }, { "epoch": 0.16302309822832445, "grad_norm": 0.029476624202844647, "learning_rate": 8.151047662905827e-06, "loss": 0.003, "step": 24780 }, { "epoch": 0.16308888640355781, "grad_norm": 0.13466291912998735, "learning_rate": 8.154337028387225e-06, "loss": 0.0033, "step": 24790 }, { "epoch": 0.1631546745787912, "grad_norm": 0.13893540755367595, "learning_rate": 8.157626393868622e-06, "loss": 0.0033, "step": 24800 }, { "epoch": 0.1632204627540246, "grad_norm": 0.19635796465674862, "learning_rate": 8.160915759350022e-06, "loss": 0.0027, "step": 24810 }, { "epoch": 0.16328625092925797, "grad_norm": 0.08627038279334663, "learning_rate": 8.16420512483142e-06, "loss": 0.0042, "step": 24820 }, { "epoch": 0.16335203910449136, "grad_norm": 0.29630232228425396, "learning_rate": 8.167494490312819e-06, "loss": 0.0051, "step": 24830 }, { "epoch": 0.16341782727972473, "grad_norm": 0.24746697324552497, "learning_rate": 8.170783855794217e-06, "loss": 0.0064, "step": 24840 }, { "epoch": 0.16348361545495813, "grad_norm": 0.2575536537524858, "learning_rate": 8.174073221275617e-06, "loss": 0.0044, "step": 24850 }, { "epoch": 0.16354940363019152, "grad_norm": 0.08102787869043586, "learning_rate": 8.177362586757016e-06, "loss": 0.0044, "step": 24860 }, { "epoch": 0.1636151918054249, "grad_norm": 0.4148576775309224, "learning_rate": 8.180651952238414e-06, "loss": 0.0059, "step": 24870 }, { "epoch": 0.16368097998065828, "grad_norm": 0.03046680675939073, "learning_rate": 8.183941317719812e-06, "loss": 0.0023, "step": 24880 }, { "epoch": 0.16374676815589165, "grad_norm": 0.1883485755423865, "learning_rate": 8.187230683201212e-06, "loss": 0.0028, "step": 24890 }, { "epoch": 0.16381255633112504, "grad_norm": 0.054676137778120776, "learning_rate": 8.190520048682609e-06, "loss": 0.0036, "step": 24900 }, { "epoch": 0.16387834450635844, "grad_norm": 0.1964475472668713, "learning_rate": 8.193809414164007e-06, "loss": 0.0032, "step": 24910 }, { "epoch": 0.1639441326815918, "grad_norm": 0.12780055334756826, "learning_rate": 8.197098779645408e-06, "loss": 0.005, "step": 24920 }, { "epoch": 0.1640099208568252, "grad_norm": 0.13410840962888468, "learning_rate": 8.200388145126806e-06, "loss": 0.0037, "step": 24930 }, { "epoch": 0.16407570903205856, "grad_norm": 0.14247462030307087, "learning_rate": 8.203677510608204e-06, "loss": 0.0047, "step": 24940 }, { "epoch": 0.16414149720729196, "grad_norm": 0.4642985214148537, "learning_rate": 8.206966876089603e-06, "loss": 0.0027, "step": 24950 }, { "epoch": 0.16420728538252535, "grad_norm": 0.23655654151034722, "learning_rate": 8.210256241571003e-06, "loss": 0.0042, "step": 24960 }, { "epoch": 0.16427307355775872, "grad_norm": 0.7696554912408892, "learning_rate": 8.2135456070524e-06, "loss": 0.003, "step": 24970 }, { "epoch": 0.16433886173299211, "grad_norm": 0.10281138165672342, "learning_rate": 8.2168349725338e-06, "loss": 0.0041, "step": 24980 }, { "epoch": 0.1644046499082255, "grad_norm": 0.06999109061465685, "learning_rate": 8.220124338015198e-06, "loss": 0.0056, "step": 24990 }, { "epoch": 0.16447043808345888, "grad_norm": 0.08865529092557295, "learning_rate": 8.223413703496596e-06, "loss": 0.0037, "step": 25000 }, { "epoch": 0.16447043808345888, "eval_loss": 0.002761112991720438, "eval_runtime": 13.4985, "eval_samples_per_second": 14.816, "eval_steps_per_second": 7.408, "step": 25000 }, { "epoch": 0.16453622625869227, "grad_norm": 0.23423850935842958, "learning_rate": 8.226703068977994e-06, "loss": 0.0043, "step": 25010 }, { "epoch": 0.16460201443392564, "grad_norm": 0.08514495516272208, "learning_rate": 8.229992434459395e-06, "loss": 0.0041, "step": 25020 }, { "epoch": 0.16466780260915903, "grad_norm": 0.025637653332848048, "learning_rate": 8.233281799940793e-06, "loss": 0.0047, "step": 25030 }, { "epoch": 0.16473359078439243, "grad_norm": 0.2011473353137935, "learning_rate": 8.23657116542219e-06, "loss": 0.0034, "step": 25040 }, { "epoch": 0.1647993789596258, "grad_norm": 0.7480701210597966, "learning_rate": 8.23986053090359e-06, "loss": 0.0071, "step": 25050 }, { "epoch": 0.1648651671348592, "grad_norm": 0.1120635407290162, "learning_rate": 8.243149896384988e-06, "loss": 0.0037, "step": 25060 }, { "epoch": 0.16493095531009255, "grad_norm": 0.10977487220538715, "learning_rate": 8.246439261866386e-06, "loss": 0.0044, "step": 25070 }, { "epoch": 0.16499674348532595, "grad_norm": 0.22055551978529422, "learning_rate": 8.249728627347785e-06, "loss": 0.0077, "step": 25080 }, { "epoch": 0.16506253166055934, "grad_norm": 0.43754146788456527, "learning_rate": 8.253017992829185e-06, "loss": 0.0026, "step": 25090 }, { "epoch": 0.1651283198357927, "grad_norm": 0.10382526537220874, "learning_rate": 8.256307358310583e-06, "loss": 0.0049, "step": 25100 }, { "epoch": 0.1651941080110261, "grad_norm": 0.3372017009751536, "learning_rate": 8.259596723791981e-06, "loss": 0.0048, "step": 25110 }, { "epoch": 0.16525989618625947, "grad_norm": 0.007974671459041772, "learning_rate": 8.26288608927338e-06, "loss": 0.005, "step": 25120 }, { "epoch": 0.16532568436149286, "grad_norm": 0.10595124359845846, "learning_rate": 8.266175454754778e-06, "loss": 0.0054, "step": 25130 }, { "epoch": 0.16539147253672626, "grad_norm": 0.143433463171153, "learning_rate": 8.269464820236177e-06, "loss": 0.0035, "step": 25140 }, { "epoch": 0.16545726071195963, "grad_norm": 0.33548303710190863, "learning_rate": 8.272754185717575e-06, "loss": 0.005, "step": 25150 }, { "epoch": 0.16552304888719302, "grad_norm": 0.17468077407773308, "learning_rate": 8.276043551198975e-06, "loss": 0.0083, "step": 25160 }, { "epoch": 0.16558883706242639, "grad_norm": 0.06923866837785006, "learning_rate": 8.279332916680373e-06, "loss": 0.0032, "step": 25170 }, { "epoch": 0.16565462523765978, "grad_norm": 0.1254102990940669, "learning_rate": 8.282622282161772e-06, "loss": 0.0062, "step": 25180 }, { "epoch": 0.16572041341289317, "grad_norm": 0.14674159763418965, "learning_rate": 8.28591164764317e-06, "loss": 0.0039, "step": 25190 }, { "epoch": 0.16578620158812654, "grad_norm": 0.40841934950156983, "learning_rate": 8.289201013124568e-06, "loss": 0.005, "step": 25200 }, { "epoch": 0.16585198976335994, "grad_norm": 0.12109583200598446, "learning_rate": 8.292490378605967e-06, "loss": 0.0032, "step": 25210 }, { "epoch": 0.16591777793859333, "grad_norm": 0.17256567166524034, "learning_rate": 8.295779744087367e-06, "loss": 0.0045, "step": 25220 }, { "epoch": 0.1659835661138267, "grad_norm": 0.12953952890050255, "learning_rate": 8.299069109568765e-06, "loss": 0.0044, "step": 25230 }, { "epoch": 0.1660493542890601, "grad_norm": 0.2283247701326008, "learning_rate": 8.302358475050164e-06, "loss": 0.0038, "step": 25240 }, { "epoch": 0.16611514246429346, "grad_norm": 0.08693707756973537, "learning_rate": 8.305647840531562e-06, "loss": 0.0029, "step": 25250 }, { "epoch": 0.16618093063952685, "grad_norm": 0.17521698041742312, "learning_rate": 8.30893720601296e-06, "loss": 0.003, "step": 25260 }, { "epoch": 0.16624671881476025, "grad_norm": 0.20422162003262134, "learning_rate": 8.31222657149436e-06, "loss": 0.0052, "step": 25270 }, { "epoch": 0.1663125069899936, "grad_norm": 0.15806181671483344, "learning_rate": 8.315515936975757e-06, "loss": 0.0037, "step": 25280 }, { "epoch": 0.166378295165227, "grad_norm": 0.21369108013970275, "learning_rate": 8.318805302457157e-06, "loss": 0.0042, "step": 25290 }, { "epoch": 0.16644408334046037, "grad_norm": 0.10651256326697606, "learning_rate": 8.322094667938555e-06, "loss": 0.0037, "step": 25300 }, { "epoch": 0.16650987151569377, "grad_norm": 0.1405741474017102, "learning_rate": 8.325384033419954e-06, "loss": 0.0036, "step": 25310 }, { "epoch": 0.16657565969092716, "grad_norm": 0.08817461487797451, "learning_rate": 8.328673398901352e-06, "loss": 0.0065, "step": 25320 }, { "epoch": 0.16664144786616053, "grad_norm": 0.13665807475480246, "learning_rate": 8.331962764382752e-06, "loss": 0.0041, "step": 25330 }, { "epoch": 0.16670723604139392, "grad_norm": 0.26243658113904844, "learning_rate": 8.33525212986415e-06, "loss": 0.0041, "step": 25340 }, { "epoch": 0.1667730242166273, "grad_norm": 0.1718809015841341, "learning_rate": 8.338541495345549e-06, "loss": 0.0044, "step": 25350 }, { "epoch": 0.16683881239186069, "grad_norm": 0.1536904362657223, "learning_rate": 8.341830860826947e-06, "loss": 0.0029, "step": 25360 }, { "epoch": 0.16690460056709408, "grad_norm": 0.08581990704416112, "learning_rate": 8.345120226308346e-06, "loss": 0.0043, "step": 25370 }, { "epoch": 0.16697038874232745, "grad_norm": 0.3403738695663554, "learning_rate": 8.348409591789744e-06, "loss": 0.0032, "step": 25380 }, { "epoch": 0.16703617691756084, "grad_norm": 0.1491264064380036, "learning_rate": 8.351698957271142e-06, "loss": 0.0044, "step": 25390 }, { "epoch": 0.1671019650927942, "grad_norm": 0.07045637241060757, "learning_rate": 8.354988322752542e-06, "loss": 0.005, "step": 25400 }, { "epoch": 0.1671677532680276, "grad_norm": 0.08608340006001523, "learning_rate": 8.35827768823394e-06, "loss": 0.0059, "step": 25410 }, { "epoch": 0.167233541443261, "grad_norm": 0.17467870500210064, "learning_rate": 8.36156705371534e-06, "loss": 0.0023, "step": 25420 }, { "epoch": 0.16729932961849436, "grad_norm": 0.45128331626004825, "learning_rate": 8.364856419196738e-06, "loss": 0.0031, "step": 25430 }, { "epoch": 0.16736511779372776, "grad_norm": 0.1593314893656155, "learning_rate": 8.368145784678136e-06, "loss": 0.0045, "step": 25440 }, { "epoch": 0.16743090596896115, "grad_norm": 0.09370959732062914, "learning_rate": 8.371435150159534e-06, "loss": 0.0027, "step": 25450 }, { "epoch": 0.16749669414419452, "grad_norm": 0.0529122747552783, "learning_rate": 8.374724515640934e-06, "loss": 0.0035, "step": 25460 }, { "epoch": 0.1675624823194279, "grad_norm": 0.08111871311141003, "learning_rate": 8.378013881122333e-06, "loss": 0.0022, "step": 25470 }, { "epoch": 0.16762827049466128, "grad_norm": 0.12470683329976942, "learning_rate": 8.381303246603731e-06, "loss": 0.0064, "step": 25480 }, { "epoch": 0.16769405866989467, "grad_norm": 0.30110141668932405, "learning_rate": 8.38459261208513e-06, "loss": 0.0048, "step": 25490 }, { "epoch": 0.16775984684512807, "grad_norm": 0.13838806480821114, "learning_rate": 8.387881977566528e-06, "loss": 0.0061, "step": 25500 }, { "epoch": 0.16782563502036144, "grad_norm": 0.18508149981073335, "learning_rate": 8.391171343047926e-06, "loss": 0.0027, "step": 25510 }, { "epoch": 0.16789142319559483, "grad_norm": 0.15739672370791027, "learning_rate": 8.394460708529324e-06, "loss": 0.0027, "step": 25520 }, { "epoch": 0.1679572113708282, "grad_norm": 0.019170116521334903, "learning_rate": 8.397750074010725e-06, "loss": 0.0039, "step": 25530 }, { "epoch": 0.1680229995460616, "grad_norm": 0.26591237527512396, "learning_rate": 8.401039439492123e-06, "loss": 0.0043, "step": 25540 }, { "epoch": 0.16808878772129499, "grad_norm": 0.07008822535915882, "learning_rate": 8.404328804973521e-06, "loss": 0.0025, "step": 25550 }, { "epoch": 0.16815457589652835, "grad_norm": 0.07041916739842764, "learning_rate": 8.40761817045492e-06, "loss": 0.0044, "step": 25560 }, { "epoch": 0.16822036407176175, "grad_norm": 0.17553480697867602, "learning_rate": 8.41090753593632e-06, "loss": 0.0057, "step": 25570 }, { "epoch": 0.1682861522469951, "grad_norm": 0.1953068055005021, "learning_rate": 8.414196901417716e-06, "loss": 0.0044, "step": 25580 }, { "epoch": 0.1683519404222285, "grad_norm": 0.14598864779966844, "learning_rate": 8.417486266899115e-06, "loss": 0.0048, "step": 25590 }, { "epoch": 0.1684177285974619, "grad_norm": 0.08782494296245018, "learning_rate": 8.420775632380515e-06, "loss": 0.0031, "step": 25600 }, { "epoch": 0.16848351677269527, "grad_norm": 0.26482661246927053, "learning_rate": 8.424064997861913e-06, "loss": 0.0045, "step": 25610 }, { "epoch": 0.16854930494792866, "grad_norm": 0.061939580952200826, "learning_rate": 8.427354363343311e-06, "loss": 0.0046, "step": 25620 }, { "epoch": 0.16861509312316203, "grad_norm": 0.30460054971812633, "learning_rate": 8.43064372882471e-06, "loss": 0.0035, "step": 25630 }, { "epoch": 0.16868088129839542, "grad_norm": 0.200790295770587, "learning_rate": 8.43393309430611e-06, "loss": 0.0034, "step": 25640 }, { "epoch": 0.16874666947362882, "grad_norm": 0.14809013831571807, "learning_rate": 8.437222459787508e-06, "loss": 0.0051, "step": 25650 }, { "epoch": 0.16881245764886219, "grad_norm": 0.192671237852319, "learning_rate": 8.440511825268907e-06, "loss": 0.0056, "step": 25660 }, { "epoch": 0.16887824582409558, "grad_norm": 0.24459834954872983, "learning_rate": 8.443801190750305e-06, "loss": 0.004, "step": 25670 }, { "epoch": 0.16894403399932897, "grad_norm": 0.1573282367794042, "learning_rate": 8.447090556231703e-06, "loss": 0.0031, "step": 25680 }, { "epoch": 0.16900982217456234, "grad_norm": 0.4163741958836399, "learning_rate": 8.450379921713102e-06, "loss": 0.0043, "step": 25690 }, { "epoch": 0.16907561034979574, "grad_norm": 0.10362838278290316, "learning_rate": 8.453669287194502e-06, "loss": 0.003, "step": 25700 }, { "epoch": 0.1691413985250291, "grad_norm": 0.02298247356650996, "learning_rate": 8.4569586526759e-06, "loss": 0.0031, "step": 25710 }, { "epoch": 0.1692071867002625, "grad_norm": 0.015160016863697317, "learning_rate": 8.460248018157298e-06, "loss": 0.0028, "step": 25720 }, { "epoch": 0.1692729748754959, "grad_norm": 0.09512453180803968, "learning_rate": 8.463537383638697e-06, "loss": 0.0034, "step": 25730 }, { "epoch": 0.16933876305072926, "grad_norm": 0.37375837729789413, "learning_rate": 8.466826749120095e-06, "loss": 0.0057, "step": 25740 }, { "epoch": 0.16940455122596265, "grad_norm": 0.3254490551085155, "learning_rate": 8.470116114601494e-06, "loss": 0.0072, "step": 25750 }, { "epoch": 0.16947033940119602, "grad_norm": 0.3360950924917663, "learning_rate": 8.473405480082892e-06, "loss": 0.0042, "step": 25760 }, { "epoch": 0.1695361275764294, "grad_norm": 0.11594657030411432, "learning_rate": 8.476694845564292e-06, "loss": 0.0039, "step": 25770 }, { "epoch": 0.1696019157516628, "grad_norm": 0.22444263967204528, "learning_rate": 8.47998421104569e-06, "loss": 0.003, "step": 25780 }, { "epoch": 0.16966770392689617, "grad_norm": 0.3623144434262562, "learning_rate": 8.483273576527089e-06, "loss": 0.0047, "step": 25790 }, { "epoch": 0.16973349210212957, "grad_norm": 0.14226490394827523, "learning_rate": 8.486562942008487e-06, "loss": 0.0033, "step": 25800 }, { "epoch": 0.16979928027736294, "grad_norm": 0.18188023554033375, "learning_rate": 8.489852307489887e-06, "loss": 0.0037, "step": 25810 }, { "epoch": 0.16986506845259633, "grad_norm": 0.09043539396892022, "learning_rate": 8.493141672971284e-06, "loss": 0.0059, "step": 25820 }, { "epoch": 0.16993085662782972, "grad_norm": 0.5740391532121274, "learning_rate": 8.496431038452682e-06, "loss": 0.0027, "step": 25830 }, { "epoch": 0.1699966448030631, "grad_norm": 0.3353180957897683, "learning_rate": 8.499720403934082e-06, "loss": 0.0037, "step": 25840 }, { "epoch": 0.17006243297829648, "grad_norm": 0.08202321879394363, "learning_rate": 8.50300976941548e-06, "loss": 0.0031, "step": 25850 }, { "epoch": 0.17012822115352985, "grad_norm": 0.2128142709568921, "learning_rate": 8.506299134896879e-06, "loss": 0.0042, "step": 25860 }, { "epoch": 0.17019400932876325, "grad_norm": 0.18258098099419795, "learning_rate": 8.509588500378277e-06, "loss": 0.0035, "step": 25870 }, { "epoch": 0.17025979750399664, "grad_norm": 0.14056446923784457, "learning_rate": 8.512877865859677e-06, "loss": 0.0043, "step": 25880 }, { "epoch": 0.17032558567923, "grad_norm": 0.1568881468123679, "learning_rate": 8.516167231341074e-06, "loss": 0.0036, "step": 25890 }, { "epoch": 0.1703913738544634, "grad_norm": 0.07996500477340067, "learning_rate": 8.519456596822474e-06, "loss": 0.0033, "step": 25900 }, { "epoch": 0.1704571620296968, "grad_norm": 0.12418883594697061, "learning_rate": 8.522745962303872e-06, "loss": 0.0041, "step": 25910 }, { "epoch": 0.17052295020493016, "grad_norm": 0.21371911185073567, "learning_rate": 8.52603532778527e-06, "loss": 0.003, "step": 25920 }, { "epoch": 0.17058873838016356, "grad_norm": 0.11011886115303135, "learning_rate": 8.52932469326667e-06, "loss": 0.0076, "step": 25930 }, { "epoch": 0.17065452655539692, "grad_norm": 0.1818060043184646, "learning_rate": 8.532614058748068e-06, "loss": 0.0031, "step": 25940 }, { "epoch": 0.17072031473063032, "grad_norm": 0.217191820669482, "learning_rate": 8.535903424229468e-06, "loss": 0.0038, "step": 25950 }, { "epoch": 0.1707861029058637, "grad_norm": 0.2962530952092759, "learning_rate": 8.539192789710864e-06, "loss": 0.0062, "step": 25960 }, { "epoch": 0.17085189108109708, "grad_norm": 0.09548371240801254, "learning_rate": 8.542482155192264e-06, "loss": 0.0034, "step": 25970 }, { "epoch": 0.17091767925633047, "grad_norm": 0.21502023442321108, "learning_rate": 8.545771520673663e-06, "loss": 0.0048, "step": 25980 }, { "epoch": 0.17098346743156384, "grad_norm": 0.06501541548897563, "learning_rate": 8.549060886155061e-06, "loss": 0.0046, "step": 25990 }, { "epoch": 0.17104925560679723, "grad_norm": 0.070645889516256, "learning_rate": 8.55235025163646e-06, "loss": 0.0036, "step": 26000 }, { "epoch": 0.17111504378203063, "grad_norm": 0.24090140956119835, "learning_rate": 8.55563961711786e-06, "loss": 0.0059, "step": 26010 }, { "epoch": 0.171180831957264, "grad_norm": 0.2588147588110863, "learning_rate": 8.558928982599258e-06, "loss": 0.0068, "step": 26020 }, { "epoch": 0.1712466201324974, "grad_norm": 0.1798056721965641, "learning_rate": 8.562218348080656e-06, "loss": 0.0041, "step": 26030 }, { "epoch": 0.17131240830773076, "grad_norm": 0.13655980309197246, "learning_rate": 8.565507713562055e-06, "loss": 0.0035, "step": 26040 }, { "epoch": 0.17137819648296415, "grad_norm": 0.1827274411991452, "learning_rate": 8.568797079043453e-06, "loss": 0.0074, "step": 26050 }, { "epoch": 0.17144398465819755, "grad_norm": 0.10505488718941484, "learning_rate": 8.572086444524851e-06, "loss": 0.0065, "step": 26060 }, { "epoch": 0.1715097728334309, "grad_norm": 0.056363879968297066, "learning_rate": 8.57537581000625e-06, "loss": 0.0036, "step": 26070 }, { "epoch": 0.1715755610086643, "grad_norm": 0.5669972652226761, "learning_rate": 8.57866517548765e-06, "loss": 0.0046, "step": 26080 }, { "epoch": 0.17164134918389767, "grad_norm": 0.21080469373960217, "learning_rate": 8.581954540969048e-06, "loss": 0.0042, "step": 26090 }, { "epoch": 0.17170713735913107, "grad_norm": 0.29403049500615874, "learning_rate": 8.585243906450446e-06, "loss": 0.0051, "step": 26100 }, { "epoch": 0.17177292553436446, "grad_norm": 0.08998429766873604, "learning_rate": 8.588533271931845e-06, "loss": 0.0032, "step": 26110 }, { "epoch": 0.17183871370959783, "grad_norm": 0.3635961702694095, "learning_rate": 8.591822637413245e-06, "loss": 0.0035, "step": 26120 }, { "epoch": 0.17190450188483122, "grad_norm": 0.4927445893021716, "learning_rate": 8.595112002894641e-06, "loss": 0.0044, "step": 26130 }, { "epoch": 0.17197029006006462, "grad_norm": 0.12660880479460748, "learning_rate": 8.598401368376042e-06, "loss": 0.0037, "step": 26140 }, { "epoch": 0.17203607823529798, "grad_norm": 0.10159224167517844, "learning_rate": 8.60169073385744e-06, "loss": 0.0026, "step": 26150 }, { "epoch": 0.17210186641053138, "grad_norm": 0.28831818702633444, "learning_rate": 8.604980099338838e-06, "loss": 0.0029, "step": 26160 }, { "epoch": 0.17216765458576475, "grad_norm": 0.2988163569289175, "learning_rate": 8.608269464820237e-06, "loss": 0.0043, "step": 26170 }, { "epoch": 0.17223344276099814, "grad_norm": 0.16282201635059457, "learning_rate": 8.611558830301635e-06, "loss": 0.0034, "step": 26180 }, { "epoch": 0.17229923093623153, "grad_norm": 0.6022296834896168, "learning_rate": 8.614848195783035e-06, "loss": 0.0038, "step": 26190 }, { "epoch": 0.1723650191114649, "grad_norm": 0.30082376317810755, "learning_rate": 8.618137561264432e-06, "loss": 0.0055, "step": 26200 }, { "epoch": 0.1724308072866983, "grad_norm": 0.05838672261908584, "learning_rate": 8.621426926745832e-06, "loss": 0.0039, "step": 26210 }, { "epoch": 0.17249659546193166, "grad_norm": 0.2765565534149153, "learning_rate": 8.62471629222723e-06, "loss": 0.003, "step": 26220 }, { "epoch": 0.17256238363716506, "grad_norm": 0.2603736484613843, "learning_rate": 8.628005657708629e-06, "loss": 0.0068, "step": 26230 }, { "epoch": 0.17262817181239845, "grad_norm": 0.16474527182667295, "learning_rate": 8.631295023190027e-06, "loss": 0.0033, "step": 26240 }, { "epoch": 0.17269395998763182, "grad_norm": 0.12194669973146728, "learning_rate": 8.634584388671427e-06, "loss": 0.0039, "step": 26250 }, { "epoch": 0.1727597481628652, "grad_norm": 0.2615021196506132, "learning_rate": 8.637873754152825e-06, "loss": 0.0065, "step": 26260 }, { "epoch": 0.17282553633809858, "grad_norm": 0.1822771657137411, "learning_rate": 8.641163119634222e-06, "loss": 0.0031, "step": 26270 }, { "epoch": 0.17289132451333197, "grad_norm": 0.11521764606466087, "learning_rate": 8.644452485115622e-06, "loss": 0.0035, "step": 26280 }, { "epoch": 0.17295711268856537, "grad_norm": 0.21184391500201755, "learning_rate": 8.64774185059702e-06, "loss": 0.0048, "step": 26290 }, { "epoch": 0.17302290086379873, "grad_norm": 0.30672121895587395, "learning_rate": 8.651031216078419e-06, "loss": 0.0048, "step": 26300 }, { "epoch": 0.17308868903903213, "grad_norm": 0.0691798728285221, "learning_rate": 8.654320581559817e-06, "loss": 0.0045, "step": 26310 }, { "epoch": 0.1731544772142655, "grad_norm": 0.2596853251609014, "learning_rate": 8.657609947041217e-06, "loss": 0.0054, "step": 26320 }, { "epoch": 0.1732202653894989, "grad_norm": 0.19023365184885446, "learning_rate": 8.660899312522616e-06, "loss": 0.0042, "step": 26330 }, { "epoch": 0.17328605356473228, "grad_norm": 0.10729797959330668, "learning_rate": 8.664188678004014e-06, "loss": 0.0026, "step": 26340 }, { "epoch": 0.17335184173996565, "grad_norm": 0.056723979969255094, "learning_rate": 8.667478043485412e-06, "loss": 0.0033, "step": 26350 }, { "epoch": 0.17341762991519905, "grad_norm": 0.29460110580501514, "learning_rate": 8.670767408966812e-06, "loss": 0.005, "step": 26360 }, { "epoch": 0.1734834180904324, "grad_norm": 0.17773304709919754, "learning_rate": 8.674056774448209e-06, "loss": 0.0033, "step": 26370 }, { "epoch": 0.1735492062656658, "grad_norm": 0.15060717354283917, "learning_rate": 8.677346139929609e-06, "loss": 0.0031, "step": 26380 }, { "epoch": 0.1736149944408992, "grad_norm": 0.17036197012063356, "learning_rate": 8.680635505411007e-06, "loss": 0.0027, "step": 26390 }, { "epoch": 0.17368078261613257, "grad_norm": 0.1828206020279724, "learning_rate": 8.683924870892406e-06, "loss": 0.0054, "step": 26400 }, { "epoch": 0.17374657079136596, "grad_norm": 0.22288650451158198, "learning_rate": 8.687214236373804e-06, "loss": 0.004, "step": 26410 }, { "epoch": 0.17381235896659936, "grad_norm": 0.08677343251408501, "learning_rate": 8.690503601855202e-06, "loss": 0.0034, "step": 26420 }, { "epoch": 0.17387814714183272, "grad_norm": 0.08898304361156859, "learning_rate": 8.693792967336603e-06, "loss": 0.0049, "step": 26430 }, { "epoch": 0.17394393531706612, "grad_norm": 0.2133942009294529, "learning_rate": 8.697082332818e-06, "loss": 0.0044, "step": 26440 }, { "epoch": 0.17400972349229948, "grad_norm": 0.041694374333600286, "learning_rate": 8.7003716982994e-06, "loss": 0.0053, "step": 26450 }, { "epoch": 0.17407551166753288, "grad_norm": 0.1027111639652914, "learning_rate": 8.703661063780798e-06, "loss": 0.0039, "step": 26460 }, { "epoch": 0.17414129984276627, "grad_norm": 0.20445889528727193, "learning_rate": 8.706950429262196e-06, "loss": 0.0039, "step": 26470 }, { "epoch": 0.17420708801799964, "grad_norm": 0.11788534052951827, "learning_rate": 8.710239794743594e-06, "loss": 0.0026, "step": 26480 }, { "epoch": 0.17427287619323303, "grad_norm": 0.38784157064352476, "learning_rate": 8.713529160224994e-06, "loss": 0.0034, "step": 26490 }, { "epoch": 0.1743386643684664, "grad_norm": 0.3794147572235317, "learning_rate": 8.716818525706393e-06, "loss": 0.0052, "step": 26500 }, { "epoch": 0.1744044525436998, "grad_norm": 0.26826244021739176, "learning_rate": 8.72010789118779e-06, "loss": 0.0055, "step": 26510 }, { "epoch": 0.1744702407189332, "grad_norm": 0.1355272223766405, "learning_rate": 8.72339725666919e-06, "loss": 0.0034, "step": 26520 }, { "epoch": 0.17453602889416656, "grad_norm": 0.2271490483338606, "learning_rate": 8.726686622150588e-06, "loss": 0.0036, "step": 26530 }, { "epoch": 0.17460181706939995, "grad_norm": 0.19172855600567806, "learning_rate": 8.729975987631986e-06, "loss": 0.0045, "step": 26540 }, { "epoch": 0.17466760524463332, "grad_norm": 0.03127356120659973, "learning_rate": 8.733265353113385e-06, "loss": 0.0043, "step": 26550 }, { "epoch": 0.1747333934198667, "grad_norm": 0.0681812428118095, "learning_rate": 8.736554718594785e-06, "loss": 0.0027, "step": 26560 }, { "epoch": 0.1747991815951001, "grad_norm": 0.051419676029598696, "learning_rate": 8.739844084076183e-06, "loss": 0.0052, "step": 26570 }, { "epoch": 0.17486496977033347, "grad_norm": 0.24733373293716066, "learning_rate": 8.743133449557581e-06, "loss": 0.0047, "step": 26580 }, { "epoch": 0.17493075794556687, "grad_norm": 0.1559472750449584, "learning_rate": 8.74642281503898e-06, "loss": 0.0035, "step": 26590 }, { "epoch": 0.17499654612080023, "grad_norm": 0.318931431755413, "learning_rate": 8.749712180520378e-06, "loss": 0.0054, "step": 26600 }, { "epoch": 0.17506233429603363, "grad_norm": 0.2049830308615028, "learning_rate": 8.753001546001776e-06, "loss": 0.005, "step": 26610 }, { "epoch": 0.17512812247126702, "grad_norm": 0.10222355918501244, "learning_rate": 8.756290911483175e-06, "loss": 0.0032, "step": 26620 }, { "epoch": 0.1751939106465004, "grad_norm": 0.1123403499163519, "learning_rate": 8.759580276964575e-06, "loss": 0.005, "step": 26630 }, { "epoch": 0.17525969882173378, "grad_norm": 0.2097994435697556, "learning_rate": 8.762869642445973e-06, "loss": 0.0072, "step": 26640 }, { "epoch": 0.17532548699696718, "grad_norm": 0.06946542022898868, "learning_rate": 8.766159007927372e-06, "loss": 0.0048, "step": 26650 }, { "epoch": 0.17539127517220054, "grad_norm": 0.15631893734212077, "learning_rate": 8.76944837340877e-06, "loss": 0.0043, "step": 26660 }, { "epoch": 0.17545706334743394, "grad_norm": 0.08999704412892535, "learning_rate": 8.772737738890168e-06, "loss": 0.0035, "step": 26670 }, { "epoch": 0.1755228515226673, "grad_norm": 0.20544560494065037, "learning_rate": 8.776027104371567e-06, "loss": 0.0049, "step": 26680 }, { "epoch": 0.1755886396979007, "grad_norm": 0.17566503322535582, "learning_rate": 8.779316469852967e-06, "loss": 0.0057, "step": 26690 }, { "epoch": 0.1756544278731341, "grad_norm": 0.021967559643964917, "learning_rate": 8.782605835334365e-06, "loss": 0.0028, "step": 26700 }, { "epoch": 0.17572021604836746, "grad_norm": 0.173827352407833, "learning_rate": 8.785895200815763e-06, "loss": 0.0047, "step": 26710 }, { "epoch": 0.17578600422360086, "grad_norm": 0.07233592321012978, "learning_rate": 8.789184566297162e-06, "loss": 0.0051, "step": 26720 }, { "epoch": 0.17585179239883422, "grad_norm": 0.08094563290558579, "learning_rate": 8.79247393177856e-06, "loss": 0.0054, "step": 26730 }, { "epoch": 0.17591758057406762, "grad_norm": 0.17971694676527492, "learning_rate": 8.79576329725996e-06, "loss": 0.006, "step": 26740 }, { "epoch": 0.175983368749301, "grad_norm": 0.29302940872762157, "learning_rate": 8.799052662741357e-06, "loss": 0.004, "step": 26750 }, { "epoch": 0.17604915692453438, "grad_norm": 0.23529434246403272, "learning_rate": 8.802342028222757e-06, "loss": 0.0033, "step": 26760 }, { "epoch": 0.17611494509976777, "grad_norm": 0.33344832920803286, "learning_rate": 8.805631393704155e-06, "loss": 0.0066, "step": 26770 }, { "epoch": 0.17618073327500114, "grad_norm": 0.1493357935000021, "learning_rate": 8.808920759185554e-06, "loss": 0.0037, "step": 26780 }, { "epoch": 0.17624652145023453, "grad_norm": 0.12479136364469356, "learning_rate": 8.812210124666952e-06, "loss": 0.0079, "step": 26790 }, { "epoch": 0.17631230962546793, "grad_norm": 0.17907133864759764, "learning_rate": 8.815499490148352e-06, "loss": 0.0035, "step": 26800 }, { "epoch": 0.1763780978007013, "grad_norm": 0.13817326813358072, "learning_rate": 8.81878885562975e-06, "loss": 0.003, "step": 26810 }, { "epoch": 0.1764438859759347, "grad_norm": 0.13829069802024924, "learning_rate": 8.822078221111149e-06, "loss": 0.0042, "step": 26820 }, { "epoch": 0.17650967415116806, "grad_norm": 0.5250369137788788, "learning_rate": 8.825367586592547e-06, "loss": 0.0049, "step": 26830 }, { "epoch": 0.17657546232640145, "grad_norm": 0.14519533135634755, "learning_rate": 8.828656952073946e-06, "loss": 0.0041, "step": 26840 }, { "epoch": 0.17664125050163484, "grad_norm": 0.1897659444041922, "learning_rate": 8.831946317555344e-06, "loss": 0.0048, "step": 26850 }, { "epoch": 0.1767070386768682, "grad_norm": 0.2290129396270122, "learning_rate": 8.835235683036742e-06, "loss": 0.0051, "step": 26860 }, { "epoch": 0.1767728268521016, "grad_norm": 0.11723864837602539, "learning_rate": 8.838525048518142e-06, "loss": 0.0034, "step": 26870 }, { "epoch": 0.176838615027335, "grad_norm": 0.037272232720677236, "learning_rate": 8.84181441399954e-06, "loss": 0.0036, "step": 26880 }, { "epoch": 0.17690440320256837, "grad_norm": 0.11843130679635139, "learning_rate": 8.845103779480939e-06, "loss": 0.0037, "step": 26890 }, { "epoch": 0.17697019137780176, "grad_norm": 0.055878151159451425, "learning_rate": 8.848393144962337e-06, "loss": 0.0042, "step": 26900 }, { "epoch": 0.17703597955303513, "grad_norm": 0.23379743074577522, "learning_rate": 8.851682510443736e-06, "loss": 0.0037, "step": 26910 }, { "epoch": 0.17710176772826852, "grad_norm": 0.03730156110662929, "learning_rate": 8.854971875925134e-06, "loss": 0.0025, "step": 26920 }, { "epoch": 0.17716755590350192, "grad_norm": 0.1442591027456807, "learning_rate": 8.858261241406534e-06, "loss": 0.0034, "step": 26930 }, { "epoch": 0.17723334407873528, "grad_norm": 0.27708141468434433, "learning_rate": 8.861550606887933e-06, "loss": 0.0048, "step": 26940 }, { "epoch": 0.17729913225396868, "grad_norm": 0.3035657774718094, "learning_rate": 8.864839972369331e-06, "loss": 0.0059, "step": 26950 }, { "epoch": 0.17736492042920204, "grad_norm": 0.26250569847236327, "learning_rate": 8.86812933785073e-06, "loss": 0.0034, "step": 26960 }, { "epoch": 0.17743070860443544, "grad_norm": 0.11273790665461744, "learning_rate": 8.871418703332128e-06, "loss": 0.0042, "step": 26970 }, { "epoch": 0.17749649677966883, "grad_norm": 0.5150148993393796, "learning_rate": 8.874708068813526e-06, "loss": 0.0063, "step": 26980 }, { "epoch": 0.1775622849549022, "grad_norm": 0.1385062008455449, "learning_rate": 8.877997434294924e-06, "loss": 0.0043, "step": 26990 }, { "epoch": 0.1776280731301356, "grad_norm": 0.21765059908473516, "learning_rate": 8.881286799776324e-06, "loss": 0.0032, "step": 27000 }, { "epoch": 0.17769386130536896, "grad_norm": 0.22677861447425518, "learning_rate": 8.884576165257723e-06, "loss": 0.004, "step": 27010 }, { "epoch": 0.17775964948060236, "grad_norm": 0.144274324963644, "learning_rate": 8.887865530739121e-06, "loss": 0.0067, "step": 27020 }, { "epoch": 0.17782543765583575, "grad_norm": 0.2176659810383989, "learning_rate": 8.89115489622052e-06, "loss": 0.0037, "step": 27030 }, { "epoch": 0.17789122583106912, "grad_norm": 0.1288094021185384, "learning_rate": 8.89444426170192e-06, "loss": 0.0035, "step": 27040 }, { "epoch": 0.1779570140063025, "grad_norm": 0.1411449603172393, "learning_rate": 8.897733627183316e-06, "loss": 0.0037, "step": 27050 }, { "epoch": 0.17802280218153588, "grad_norm": 0.17728244812906346, "learning_rate": 8.901022992664716e-06, "loss": 0.0049, "step": 27060 }, { "epoch": 0.17808859035676927, "grad_norm": 0.109572831315261, "learning_rate": 8.904312358146115e-06, "loss": 0.004, "step": 27070 }, { "epoch": 0.17815437853200267, "grad_norm": 0.04064926137592133, "learning_rate": 8.907601723627513e-06, "loss": 0.0024, "step": 27080 }, { "epoch": 0.17822016670723603, "grad_norm": 0.17536719437148285, "learning_rate": 8.910891089108911e-06, "loss": 0.0068, "step": 27090 }, { "epoch": 0.17828595488246943, "grad_norm": 0.16110275057216325, "learning_rate": 8.91418045459031e-06, "loss": 0.0104, "step": 27100 }, { "epoch": 0.17835174305770282, "grad_norm": 0.9749202494752809, "learning_rate": 8.91746982007171e-06, "loss": 0.0049, "step": 27110 }, { "epoch": 0.1784175312329362, "grad_norm": 0.294789552192893, "learning_rate": 8.920759185553106e-06, "loss": 0.005, "step": 27120 }, { "epoch": 0.17848331940816958, "grad_norm": 0.27334755979494424, "learning_rate": 8.924048551034506e-06, "loss": 0.0047, "step": 27130 }, { "epoch": 0.17854910758340295, "grad_norm": 0.08689622091198504, "learning_rate": 8.927337916515905e-06, "loss": 0.0073, "step": 27140 }, { "epoch": 0.17861489575863634, "grad_norm": 0.0879029672273368, "learning_rate": 8.930627281997303e-06, "loss": 0.0066, "step": 27150 }, { "epoch": 0.17868068393386974, "grad_norm": 0.04820349180397107, "learning_rate": 8.933916647478702e-06, "loss": 0.0033, "step": 27160 }, { "epoch": 0.1787464721091031, "grad_norm": 0.7385078857434697, "learning_rate": 8.937206012960102e-06, "loss": 0.0052, "step": 27170 }, { "epoch": 0.1788122602843365, "grad_norm": 0.10594686926545563, "learning_rate": 8.9404953784415e-06, "loss": 0.0046, "step": 27180 }, { "epoch": 0.17887804845956987, "grad_norm": 0.28779040215781365, "learning_rate": 8.943784743922898e-06, "loss": 0.0041, "step": 27190 }, { "epoch": 0.17894383663480326, "grad_norm": 0.08357912104716106, "learning_rate": 8.947074109404297e-06, "loss": 0.0022, "step": 27200 }, { "epoch": 0.17900962481003665, "grad_norm": 0.14386016017978195, "learning_rate": 8.950363474885695e-06, "loss": 0.0034, "step": 27210 }, { "epoch": 0.17907541298527002, "grad_norm": 0.14000078135971097, "learning_rate": 8.953652840367093e-06, "loss": 0.0021, "step": 27220 }, { "epoch": 0.17914120116050342, "grad_norm": 0.09061550204632317, "learning_rate": 8.956942205848492e-06, "loss": 0.0044, "step": 27230 }, { "epoch": 0.17920698933573678, "grad_norm": 0.09732372596985808, "learning_rate": 8.960231571329892e-06, "loss": 0.0024, "step": 27240 }, { "epoch": 0.17927277751097018, "grad_norm": 0.05190333859664854, "learning_rate": 8.96352093681129e-06, "loss": 0.0027, "step": 27250 }, { "epoch": 0.17933856568620357, "grad_norm": 0.10225991203877213, "learning_rate": 8.966810302292689e-06, "loss": 0.0038, "step": 27260 }, { "epoch": 0.17940435386143694, "grad_norm": 0.4060926481560335, "learning_rate": 8.970099667774087e-06, "loss": 0.0036, "step": 27270 }, { "epoch": 0.17947014203667033, "grad_norm": 0.1241781249221029, "learning_rate": 8.973389033255487e-06, "loss": 0.0039, "step": 27280 }, { "epoch": 0.1795359302119037, "grad_norm": 0.1143877122889676, "learning_rate": 8.976678398736884e-06, "loss": 0.0037, "step": 27290 }, { "epoch": 0.1796017183871371, "grad_norm": 0.1307266959812176, "learning_rate": 8.979967764218282e-06, "loss": 0.0075, "step": 27300 }, { "epoch": 0.1796675065623705, "grad_norm": 0.417895024925785, "learning_rate": 8.983257129699682e-06, "loss": 0.0044, "step": 27310 }, { "epoch": 0.17973329473760385, "grad_norm": 0.16044967832698528, "learning_rate": 8.98654649518108e-06, "loss": 0.0039, "step": 27320 }, { "epoch": 0.17979908291283725, "grad_norm": 0.12971769765304766, "learning_rate": 8.989835860662479e-06, "loss": 0.005, "step": 27330 }, { "epoch": 0.17986487108807064, "grad_norm": 0.13369886730206493, "learning_rate": 8.993125226143877e-06, "loss": 0.0046, "step": 27340 }, { "epoch": 0.179930659263304, "grad_norm": 0.05410185718332589, "learning_rate": 8.996414591625277e-06, "loss": 0.0048, "step": 27350 }, { "epoch": 0.1799964474385374, "grad_norm": 0.3020602914225411, "learning_rate": 8.999703957106674e-06, "loss": 0.0031, "step": 27360 }, { "epoch": 0.18006223561377077, "grad_norm": 0.15613553367891908, "learning_rate": 9.002993322588074e-06, "loss": 0.004, "step": 27370 }, { "epoch": 0.18012802378900417, "grad_norm": 0.035392444814758554, "learning_rate": 9.006282688069472e-06, "loss": 0.004, "step": 27380 }, { "epoch": 0.18019381196423756, "grad_norm": 0.19747680515564378, "learning_rate": 9.00957205355087e-06, "loss": 0.0095, "step": 27390 }, { "epoch": 0.18025960013947093, "grad_norm": 0.29323033302524526, "learning_rate": 9.012861419032269e-06, "loss": 0.0052, "step": 27400 }, { "epoch": 0.18032538831470432, "grad_norm": 0.2215776767791163, "learning_rate": 9.016150784513667e-06, "loss": 0.0041, "step": 27410 }, { "epoch": 0.1803911764899377, "grad_norm": 0.1397067963662237, "learning_rate": 9.019440149995067e-06, "loss": 0.003, "step": 27420 }, { "epoch": 0.18045696466517108, "grad_norm": 0.20461343990257583, "learning_rate": 9.022729515476464e-06, "loss": 0.0034, "step": 27430 }, { "epoch": 0.18052275284040448, "grad_norm": 0.23788788241011655, "learning_rate": 9.026018880957864e-06, "loss": 0.0034, "step": 27440 }, { "epoch": 0.18058854101563784, "grad_norm": 0.3930755102239977, "learning_rate": 9.029308246439263e-06, "loss": 0.0024, "step": 27450 }, { "epoch": 0.18065432919087124, "grad_norm": 0.24064843838570207, "learning_rate": 9.032597611920661e-06, "loss": 0.0039, "step": 27460 }, { "epoch": 0.1807201173661046, "grad_norm": 0.48537953052019556, "learning_rate": 9.03588697740206e-06, "loss": 0.0056, "step": 27470 }, { "epoch": 0.180785905541338, "grad_norm": 0.07166441816789837, "learning_rate": 9.03917634288346e-06, "loss": 0.0032, "step": 27480 }, { "epoch": 0.1808516937165714, "grad_norm": 0.3677679737826756, "learning_rate": 9.042465708364858e-06, "loss": 0.0031, "step": 27490 }, { "epoch": 0.18091748189180476, "grad_norm": 0.36581418491732026, "learning_rate": 9.045755073846256e-06, "loss": 0.0028, "step": 27500 }, { "epoch": 0.18098327006703815, "grad_norm": 0.18051815358479553, "learning_rate": 9.049044439327654e-06, "loss": 0.0059, "step": 27510 }, { "epoch": 0.18104905824227152, "grad_norm": 0.11046093294411444, "learning_rate": 9.052333804809053e-06, "loss": 0.0027, "step": 27520 }, { "epoch": 0.18111484641750492, "grad_norm": 0.11798342559243494, "learning_rate": 9.055623170290451e-06, "loss": 0.004, "step": 27530 }, { "epoch": 0.1811806345927383, "grad_norm": 0.18645901990656633, "learning_rate": 9.05891253577185e-06, "loss": 0.0069, "step": 27540 }, { "epoch": 0.18124642276797168, "grad_norm": 0.15059000534072808, "learning_rate": 9.06220190125325e-06, "loss": 0.0057, "step": 27550 }, { "epoch": 0.18131221094320507, "grad_norm": 0.32374776034920094, "learning_rate": 9.065491266734648e-06, "loss": 0.0043, "step": 27560 }, { "epoch": 0.18137799911843847, "grad_norm": 0.21913293814389542, "learning_rate": 9.068780632216046e-06, "loss": 0.0053, "step": 27570 }, { "epoch": 0.18144378729367183, "grad_norm": 0.08195512097734325, "learning_rate": 9.072069997697445e-06, "loss": 0.0061, "step": 27580 }, { "epoch": 0.18150957546890523, "grad_norm": 0.05232668007598632, "learning_rate": 9.075359363178845e-06, "loss": 0.0041, "step": 27590 }, { "epoch": 0.1815753636441386, "grad_norm": 0.3247351008058549, "learning_rate": 9.078648728660241e-06, "loss": 0.0081, "step": 27600 }, { "epoch": 0.181641151819372, "grad_norm": 0.19080171942790364, "learning_rate": 9.081938094141641e-06, "loss": 0.0048, "step": 27610 }, { "epoch": 0.18170693999460538, "grad_norm": 0.15258639567617283, "learning_rate": 9.08522745962304e-06, "loss": 0.002, "step": 27620 }, { "epoch": 0.18177272816983875, "grad_norm": 0.12617875350881674, "learning_rate": 9.088516825104438e-06, "loss": 0.0044, "step": 27630 }, { "epoch": 0.18183851634507214, "grad_norm": 0.14238135793767734, "learning_rate": 9.091806190585837e-06, "loss": 0.0034, "step": 27640 }, { "epoch": 0.1819043045203055, "grad_norm": 0.18773508642394207, "learning_rate": 9.095095556067235e-06, "loss": 0.0025, "step": 27650 }, { "epoch": 0.1819700926955389, "grad_norm": 0.11468080614197215, "learning_rate": 9.098384921548635e-06, "loss": 0.0037, "step": 27660 }, { "epoch": 0.1820358808707723, "grad_norm": 0.0936283169420567, "learning_rate": 9.101674287030032e-06, "loss": 0.0029, "step": 27670 }, { "epoch": 0.18210166904600567, "grad_norm": 0.13874142465986514, "learning_rate": 9.104963652511432e-06, "loss": 0.0041, "step": 27680 }, { "epoch": 0.18216745722123906, "grad_norm": 0.1605996231084874, "learning_rate": 9.10825301799283e-06, "loss": 0.0028, "step": 27690 }, { "epoch": 0.18223324539647243, "grad_norm": 0.06358507021077747, "learning_rate": 9.111542383474228e-06, "loss": 0.0041, "step": 27700 }, { "epoch": 0.18229903357170582, "grad_norm": 0.2520011422627148, "learning_rate": 9.114831748955627e-06, "loss": 0.0036, "step": 27710 }, { "epoch": 0.18236482174693922, "grad_norm": 0.13661870802023326, "learning_rate": 9.118121114437027e-06, "loss": 0.0079, "step": 27720 }, { "epoch": 0.18243060992217258, "grad_norm": 0.06388678380446877, "learning_rate": 9.121410479918425e-06, "loss": 0.0067, "step": 27730 }, { "epoch": 0.18249639809740598, "grad_norm": 0.1930493826787001, "learning_rate": 9.124699845399824e-06, "loss": 0.0018, "step": 27740 }, { "epoch": 0.18256218627263934, "grad_norm": 0.054739585916484715, "learning_rate": 9.127989210881222e-06, "loss": 0.0114, "step": 27750 }, { "epoch": 0.18262797444787274, "grad_norm": 0.05905821925671181, "learning_rate": 9.13127857636262e-06, "loss": 0.0048, "step": 27760 }, { "epoch": 0.18269376262310613, "grad_norm": 0.08153765300093095, "learning_rate": 9.134567941844019e-06, "loss": 0.0048, "step": 27770 }, { "epoch": 0.1827595507983395, "grad_norm": 0.07032540243757061, "learning_rate": 9.137857307325417e-06, "loss": 0.0036, "step": 27780 }, { "epoch": 0.1828253389735729, "grad_norm": 0.19740432904691638, "learning_rate": 9.141146672806817e-06, "loss": 0.0047, "step": 27790 }, { "epoch": 0.1828911271488063, "grad_norm": 0.44406096271130385, "learning_rate": 9.144436038288215e-06, "loss": 0.0048, "step": 27800 }, { "epoch": 0.18295691532403965, "grad_norm": 0.2020998017902333, "learning_rate": 9.147725403769614e-06, "loss": 0.0066, "step": 27810 }, { "epoch": 0.18302270349927305, "grad_norm": 0.10894726787425286, "learning_rate": 9.151014769251012e-06, "loss": 0.0061, "step": 27820 }, { "epoch": 0.18308849167450642, "grad_norm": 0.08690331288345611, "learning_rate": 9.15430413473241e-06, "loss": 0.0057, "step": 27830 }, { "epoch": 0.1831542798497398, "grad_norm": 0.1026960807808777, "learning_rate": 9.157593500213809e-06, "loss": 0.0056, "step": 27840 }, { "epoch": 0.1832200680249732, "grad_norm": 0.15853322179800286, "learning_rate": 9.160882865695209e-06, "loss": 0.0048, "step": 27850 }, { "epoch": 0.18328585620020657, "grad_norm": 0.10669638141528306, "learning_rate": 9.164172231176607e-06, "loss": 0.0045, "step": 27860 }, { "epoch": 0.18335164437543996, "grad_norm": 0.20633907868125767, "learning_rate": 9.167461596658006e-06, "loss": 0.0042, "step": 27870 }, { "epoch": 0.18341743255067333, "grad_norm": 0.19758752910976574, "learning_rate": 9.170750962139404e-06, "loss": 0.0045, "step": 27880 }, { "epoch": 0.18348322072590673, "grad_norm": 0.44517946275361614, "learning_rate": 9.174040327620802e-06, "loss": 0.0075, "step": 27890 }, { "epoch": 0.18354900890114012, "grad_norm": 0.26312461965290357, "learning_rate": 9.177329693102202e-06, "loss": 0.0068, "step": 27900 }, { "epoch": 0.1836147970763735, "grad_norm": 0.22265042641224486, "learning_rate": 9.180619058583599e-06, "loss": 0.0074, "step": 27910 }, { "epoch": 0.18368058525160688, "grad_norm": 0.1268919703517032, "learning_rate": 9.183908424064999e-06, "loss": 0.003, "step": 27920 }, { "epoch": 0.18374637342684025, "grad_norm": 0.2046263667654496, "learning_rate": 9.187197789546397e-06, "loss": 0.0037, "step": 27930 }, { "epoch": 0.18381216160207364, "grad_norm": 0.14213314093435192, "learning_rate": 9.190487155027796e-06, "loss": 0.004, "step": 27940 }, { "epoch": 0.18387794977730704, "grad_norm": 0.23582076759335577, "learning_rate": 9.193776520509194e-06, "loss": 0.0044, "step": 27950 }, { "epoch": 0.1839437379525404, "grad_norm": 0.2645456286665, "learning_rate": 9.197065885990594e-06, "loss": 0.0037, "step": 27960 }, { "epoch": 0.1840095261277738, "grad_norm": 0.0484726816781291, "learning_rate": 9.200355251471993e-06, "loss": 0.0029, "step": 27970 }, { "epoch": 0.18407531430300716, "grad_norm": 0.04010113870324861, "learning_rate": 9.20364461695339e-06, "loss": 0.0024, "step": 27980 }, { "epoch": 0.18414110247824056, "grad_norm": 0.1660842406923321, "learning_rate": 9.20693398243479e-06, "loss": 0.003, "step": 27990 }, { "epoch": 0.18420689065347395, "grad_norm": 0.10317834426973521, "learning_rate": 9.210223347916188e-06, "loss": 0.0036, "step": 28000 }, { "epoch": 0.18427267882870732, "grad_norm": 0.0936524535089293, "learning_rate": 9.213512713397586e-06, "loss": 0.0029, "step": 28010 }, { "epoch": 0.18433846700394071, "grad_norm": 0.17457985968038675, "learning_rate": 9.216802078878984e-06, "loss": 0.0039, "step": 28020 }, { "epoch": 0.18440425517917408, "grad_norm": 0.11816367669624468, "learning_rate": 9.220091444360384e-06, "loss": 0.0052, "step": 28030 }, { "epoch": 0.18447004335440748, "grad_norm": 0.20853369347597706, "learning_rate": 9.223380809841783e-06, "loss": 0.0041, "step": 28040 }, { "epoch": 0.18453583152964087, "grad_norm": 0.06946211137489494, "learning_rate": 9.226670175323181e-06, "loss": 0.003, "step": 28050 }, { "epoch": 0.18460161970487424, "grad_norm": 0.05780188032965639, "learning_rate": 9.22995954080458e-06, "loss": 0.003, "step": 28060 }, { "epoch": 0.18466740788010763, "grad_norm": 0.13413028319119616, "learning_rate": 9.233248906285978e-06, "loss": 0.0054, "step": 28070 }, { "epoch": 0.18473319605534103, "grad_norm": 0.15110784008432498, "learning_rate": 9.236538271767376e-06, "loss": 0.0021, "step": 28080 }, { "epoch": 0.1847989842305744, "grad_norm": 0.07680749896910892, "learning_rate": 9.239827637248775e-06, "loss": 0.0039, "step": 28090 }, { "epoch": 0.1848647724058078, "grad_norm": 0.1929834261613852, "learning_rate": 9.243117002730175e-06, "loss": 0.0029, "step": 28100 }, { "epoch": 0.18493056058104115, "grad_norm": 0.16243253469458033, "learning_rate": 9.246406368211573e-06, "loss": 0.0025, "step": 28110 }, { "epoch": 0.18499634875627455, "grad_norm": 0.2141451516585802, "learning_rate": 9.249695733692971e-06, "loss": 0.0029, "step": 28120 }, { "epoch": 0.18506213693150794, "grad_norm": 0.09168564591482994, "learning_rate": 9.25298509917437e-06, "loss": 0.0046, "step": 28130 }, { "epoch": 0.1851279251067413, "grad_norm": 0.16165169852349012, "learning_rate": 9.256274464655768e-06, "loss": 0.0027, "step": 28140 }, { "epoch": 0.1851937132819747, "grad_norm": 0.07899152129003821, "learning_rate": 9.259563830137167e-06, "loss": 0.0042, "step": 28150 }, { "epoch": 0.18525950145720807, "grad_norm": 0.16670652094372526, "learning_rate": 9.262853195618567e-06, "loss": 0.0048, "step": 28160 }, { "epoch": 0.18532528963244146, "grad_norm": 0.13932984485509708, "learning_rate": 9.266142561099965e-06, "loss": 0.0026, "step": 28170 }, { "epoch": 0.18539107780767486, "grad_norm": 0.36399761497466626, "learning_rate": 9.269431926581363e-06, "loss": 0.0054, "step": 28180 }, { "epoch": 0.18545686598290823, "grad_norm": 0.13700629503269066, "learning_rate": 9.272721292062762e-06, "loss": 0.0032, "step": 28190 }, { "epoch": 0.18552265415814162, "grad_norm": 0.08078398417983498, "learning_rate": 9.27601065754416e-06, "loss": 0.0035, "step": 28200 }, { "epoch": 0.185588442333375, "grad_norm": 0.242168784376655, "learning_rate": 9.279300023025558e-06, "loss": 0.0034, "step": 28210 }, { "epoch": 0.18565423050860838, "grad_norm": 0.13324489052806787, "learning_rate": 9.282589388506957e-06, "loss": 0.0028, "step": 28220 }, { "epoch": 0.18572001868384178, "grad_norm": 0.2273997075672811, "learning_rate": 9.285878753988357e-06, "loss": 0.0048, "step": 28230 }, { "epoch": 0.18578580685907514, "grad_norm": 0.012708739846374611, "learning_rate": 9.289168119469755e-06, "loss": 0.003, "step": 28240 }, { "epoch": 0.18585159503430854, "grad_norm": 0.13652438659453564, "learning_rate": 9.292457484951154e-06, "loss": 0.0031, "step": 28250 }, { "epoch": 0.1859173832095419, "grad_norm": 0.07807290629818085, "learning_rate": 9.295746850432552e-06, "loss": 0.0044, "step": 28260 }, { "epoch": 0.1859831713847753, "grad_norm": 0.14008523991434116, "learning_rate": 9.299036215913952e-06, "loss": 0.0036, "step": 28270 }, { "epoch": 0.1860489595600087, "grad_norm": 0.1702161153617048, "learning_rate": 9.30232558139535e-06, "loss": 0.0049, "step": 28280 }, { "epoch": 0.18611474773524206, "grad_norm": 0.35634224707619694, "learning_rate": 9.305614946876749e-06, "loss": 0.0036, "step": 28290 }, { "epoch": 0.18618053591047545, "grad_norm": 0.11031105838429536, "learning_rate": 9.308904312358147e-06, "loss": 0.003, "step": 28300 }, { "epoch": 0.18624632408570885, "grad_norm": 0.052328809776013854, "learning_rate": 9.312193677839545e-06, "loss": 0.0041, "step": 28310 }, { "epoch": 0.18631211226094221, "grad_norm": 0.1272126762141276, "learning_rate": 9.315483043320944e-06, "loss": 0.0038, "step": 28320 }, { "epoch": 0.1863779004361756, "grad_norm": 0.10272443274775606, "learning_rate": 9.318772408802342e-06, "loss": 0.0043, "step": 28330 }, { "epoch": 0.18644368861140898, "grad_norm": 0.08854941933564585, "learning_rate": 9.322061774283742e-06, "loss": 0.0025, "step": 28340 }, { "epoch": 0.18650947678664237, "grad_norm": 0.16351480570603635, "learning_rate": 9.32535113976514e-06, "loss": 0.0049, "step": 28350 }, { "epoch": 0.18657526496187576, "grad_norm": 0.15849905431187053, "learning_rate": 9.328640505246539e-06, "loss": 0.0048, "step": 28360 }, { "epoch": 0.18664105313710913, "grad_norm": 0.12179420669220123, "learning_rate": 9.331929870727937e-06, "loss": 0.0022, "step": 28370 }, { "epoch": 0.18670684131234253, "grad_norm": 0.13837116880614125, "learning_rate": 9.335219236209336e-06, "loss": 0.0028, "step": 28380 }, { "epoch": 0.1867726294875759, "grad_norm": 0.007082386083976807, "learning_rate": 9.338508601690734e-06, "loss": 0.0021, "step": 28390 }, { "epoch": 0.1868384176628093, "grad_norm": 0.1372753536791653, "learning_rate": 9.341797967172134e-06, "loss": 0.0026, "step": 28400 }, { "epoch": 0.18690420583804268, "grad_norm": 0.07764332595884513, "learning_rate": 9.345087332653532e-06, "loss": 0.0025, "step": 28410 }, { "epoch": 0.18696999401327605, "grad_norm": 0.10419289804545341, "learning_rate": 9.34837669813493e-06, "loss": 0.0034, "step": 28420 }, { "epoch": 0.18703578218850944, "grad_norm": 0.20202641045839625, "learning_rate": 9.351666063616329e-06, "loss": 0.0024, "step": 28430 }, { "epoch": 0.1871015703637428, "grad_norm": 0.14065364285908485, "learning_rate": 9.354955429097727e-06, "loss": 0.0057, "step": 28440 }, { "epoch": 0.1871673585389762, "grad_norm": 0.044552238496180396, "learning_rate": 9.358244794579126e-06, "loss": 0.0032, "step": 28450 }, { "epoch": 0.1872331467142096, "grad_norm": 0.14678272716098084, "learning_rate": 9.361534160060524e-06, "loss": 0.0024, "step": 28460 }, { "epoch": 0.18729893488944296, "grad_norm": 0.3837679878624486, "learning_rate": 9.364823525541924e-06, "loss": 0.0034, "step": 28470 }, { "epoch": 0.18736472306467636, "grad_norm": 0.13815423853552253, "learning_rate": 9.368112891023323e-06, "loss": 0.0048, "step": 28480 }, { "epoch": 0.18743051123990973, "grad_norm": 0.07855701763755336, "learning_rate": 9.371402256504721e-06, "loss": 0.0026, "step": 28490 }, { "epoch": 0.18749629941514312, "grad_norm": 0.10142027448878295, "learning_rate": 9.37469162198612e-06, "loss": 0.0039, "step": 28500 }, { "epoch": 0.18756208759037651, "grad_norm": 0.3514778930029772, "learning_rate": 9.37798098746752e-06, "loss": 0.0055, "step": 28510 }, { "epoch": 0.18762787576560988, "grad_norm": 0.2485795625611131, "learning_rate": 9.381270352948916e-06, "loss": 0.0038, "step": 28520 }, { "epoch": 0.18769366394084327, "grad_norm": 0.3387523738203166, "learning_rate": 9.384559718430316e-06, "loss": 0.0036, "step": 28530 }, { "epoch": 0.18775945211607667, "grad_norm": 0.05670838640878394, "learning_rate": 9.387849083911714e-06, "loss": 0.0018, "step": 28540 }, { "epoch": 0.18782524029131004, "grad_norm": 0.16266074898921648, "learning_rate": 9.391138449393113e-06, "loss": 0.0025, "step": 28550 }, { "epoch": 0.18789102846654343, "grad_norm": 0.3024444943385772, "learning_rate": 9.394427814874511e-06, "loss": 0.0042, "step": 28560 }, { "epoch": 0.1879568166417768, "grad_norm": 0.031135595674785375, "learning_rate": 9.39771718035591e-06, "loss": 0.0024, "step": 28570 }, { "epoch": 0.1880226048170102, "grad_norm": 0.15007206721746874, "learning_rate": 9.40100654583731e-06, "loss": 0.0029, "step": 28580 }, { "epoch": 0.18808839299224359, "grad_norm": 0.13378585557319278, "learning_rate": 9.404295911318706e-06, "loss": 0.0042, "step": 28590 }, { "epoch": 0.18815418116747695, "grad_norm": 0.09442555229336787, "learning_rate": 9.407585276800106e-06, "loss": 0.0026, "step": 28600 }, { "epoch": 0.18821996934271035, "grad_norm": 0.07193193293274477, "learning_rate": 9.410874642281505e-06, "loss": 0.0039, "step": 28610 }, { "epoch": 0.1882857575179437, "grad_norm": 0.06402373142616327, "learning_rate": 9.414164007762903e-06, "loss": 0.004, "step": 28620 }, { "epoch": 0.1883515456931771, "grad_norm": 0.1250701874988749, "learning_rate": 9.417453373244301e-06, "loss": 0.004, "step": 28630 }, { "epoch": 0.1884173338684105, "grad_norm": 0.08372178193596597, "learning_rate": 9.420742738725702e-06, "loss": 0.0047, "step": 28640 }, { "epoch": 0.18848312204364387, "grad_norm": 0.20892759815841513, "learning_rate": 9.4240321042071e-06, "loss": 0.0024, "step": 28650 }, { "epoch": 0.18854891021887726, "grad_norm": 0.1391257291508243, "learning_rate": 9.427321469688497e-06, "loss": 0.0041, "step": 28660 }, { "epoch": 0.18861469839411063, "grad_norm": 0.0725195497971656, "learning_rate": 9.430610835169897e-06, "loss": 0.0029, "step": 28670 }, { "epoch": 0.18868048656934402, "grad_norm": 0.141665831016284, "learning_rate": 9.433900200651295e-06, "loss": 0.0031, "step": 28680 }, { "epoch": 0.18874627474457742, "grad_norm": 0.4005377448632756, "learning_rate": 9.437189566132693e-06, "loss": 0.0057, "step": 28690 }, { "epoch": 0.18881206291981079, "grad_norm": 0.06792059928241607, "learning_rate": 9.440478931614092e-06, "loss": 0.0027, "step": 28700 }, { "epoch": 0.18887785109504418, "grad_norm": 0.15959418825536223, "learning_rate": 9.443768297095492e-06, "loss": 0.0032, "step": 28710 }, { "epoch": 0.18894363927027755, "grad_norm": 0.09578823273914426, "learning_rate": 9.44705766257689e-06, "loss": 0.0036, "step": 28720 }, { "epoch": 0.18900942744551094, "grad_norm": 0.15493137025540865, "learning_rate": 9.450347028058288e-06, "loss": 0.0035, "step": 28730 }, { "epoch": 0.18907521562074434, "grad_norm": 0.21765859438497723, "learning_rate": 9.453636393539687e-06, "loss": 0.0037, "step": 28740 }, { "epoch": 0.1891410037959777, "grad_norm": 0.23441887402426242, "learning_rate": 9.456925759021087e-06, "loss": 0.0048, "step": 28750 }, { "epoch": 0.1892067919712111, "grad_norm": 0.16143040320480884, "learning_rate": 9.460215124502484e-06, "loss": 0.0034, "step": 28760 }, { "epoch": 0.1892725801464445, "grad_norm": 0.08390915412861183, "learning_rate": 9.463504489983882e-06, "loss": 0.0034, "step": 28770 }, { "epoch": 0.18933836832167786, "grad_norm": 0.28566986634966224, "learning_rate": 9.466793855465282e-06, "loss": 0.0042, "step": 28780 }, { "epoch": 0.18940415649691125, "grad_norm": 0.017413927885408097, "learning_rate": 9.47008322094668e-06, "loss": 0.0022, "step": 28790 }, { "epoch": 0.18946994467214462, "grad_norm": 0.15974117376001074, "learning_rate": 9.473372586428079e-06, "loss": 0.003, "step": 28800 }, { "epoch": 0.189535732847378, "grad_norm": 0.4663935541303878, "learning_rate": 9.476661951909477e-06, "loss": 0.0044, "step": 28810 }, { "epoch": 0.1896015210226114, "grad_norm": 0.11548338480521446, "learning_rate": 9.479951317390877e-06, "loss": 0.0019, "step": 28820 }, { "epoch": 0.18966730919784477, "grad_norm": 0.33020001927911335, "learning_rate": 9.483240682872274e-06, "loss": 0.0051, "step": 28830 }, { "epoch": 0.18973309737307817, "grad_norm": 0.11455083050962384, "learning_rate": 9.486530048353674e-06, "loss": 0.0034, "step": 28840 }, { "epoch": 0.18979888554831154, "grad_norm": 0.07545187046771344, "learning_rate": 9.489819413835072e-06, "loss": 0.0036, "step": 28850 }, { "epoch": 0.18986467372354493, "grad_norm": 0.17003128960130565, "learning_rate": 9.49310877931647e-06, "loss": 0.0039, "step": 28860 }, { "epoch": 0.18993046189877832, "grad_norm": 0.11451010034406572, "learning_rate": 9.496398144797869e-06, "loss": 0.003, "step": 28870 }, { "epoch": 0.1899962500740117, "grad_norm": 0.2841438592506221, "learning_rate": 9.499687510279267e-06, "loss": 0.0032, "step": 28880 }, { "epoch": 0.19006203824924509, "grad_norm": 0.15547065746308927, "learning_rate": 9.502976875760667e-06, "loss": 0.0028, "step": 28890 }, { "epoch": 0.19012782642447845, "grad_norm": 0.1693741683955176, "learning_rate": 9.506266241242064e-06, "loss": 0.0029, "step": 28900 }, { "epoch": 0.19019361459971185, "grad_norm": 0.15628515933178372, "learning_rate": 9.509555606723464e-06, "loss": 0.0038, "step": 28910 }, { "epoch": 0.19025940277494524, "grad_norm": 0.17288115250802277, "learning_rate": 9.512844972204862e-06, "loss": 0.0065, "step": 28920 }, { "epoch": 0.1903251909501786, "grad_norm": 0.21153116515259032, "learning_rate": 9.51613433768626e-06, "loss": 0.0035, "step": 28930 }, { "epoch": 0.190390979125412, "grad_norm": 0.10814590674948922, "learning_rate": 9.519423703167659e-06, "loss": 0.0035, "step": 28940 }, { "epoch": 0.19045676730064537, "grad_norm": 0.07118419675196985, "learning_rate": 9.52271306864906e-06, "loss": 0.0036, "step": 28950 }, { "epoch": 0.19052255547587876, "grad_norm": 0.11536070818873986, "learning_rate": 9.526002434130458e-06, "loss": 0.0032, "step": 28960 }, { "epoch": 0.19058834365111216, "grad_norm": 0.19472864392417286, "learning_rate": 9.529291799611856e-06, "loss": 0.0047, "step": 28970 }, { "epoch": 0.19065413182634552, "grad_norm": 0.4066489611304676, "learning_rate": 9.532581165093254e-06, "loss": 0.0034, "step": 28980 }, { "epoch": 0.19071992000157892, "grad_norm": 0.16825448256274744, "learning_rate": 9.535870530574653e-06, "loss": 0.0034, "step": 28990 }, { "epoch": 0.1907857081768123, "grad_norm": 0.15448523191081187, "learning_rate": 9.539159896056051e-06, "loss": 0.0035, "step": 29000 }, { "epoch": 0.19085149635204568, "grad_norm": 0.05306524494040237, "learning_rate": 9.54244926153745e-06, "loss": 0.003, "step": 29010 }, { "epoch": 0.19091728452727907, "grad_norm": 0.14783168810610312, "learning_rate": 9.54573862701885e-06, "loss": 0.0056, "step": 29020 }, { "epoch": 0.19098307270251244, "grad_norm": 0.07975589142946105, "learning_rate": 9.549027992500248e-06, "loss": 0.0051, "step": 29030 }, { "epoch": 0.19104886087774584, "grad_norm": 0.06568964983488929, "learning_rate": 9.552317357981646e-06, "loss": 0.0034, "step": 29040 }, { "epoch": 0.19111464905297923, "grad_norm": 0.178642401233633, "learning_rate": 9.555606723463045e-06, "loss": 0.006, "step": 29050 }, { "epoch": 0.1911804372282126, "grad_norm": 0.211340086829868, "learning_rate": 9.558896088944445e-06, "loss": 0.0032, "step": 29060 }, { "epoch": 0.191246225403446, "grad_norm": 0.11211195461019315, "learning_rate": 9.562185454425841e-06, "loss": 0.003, "step": 29070 }, { "epoch": 0.19131201357867936, "grad_norm": 0.1544255185302993, "learning_rate": 9.565474819907241e-06, "loss": 0.0064, "step": 29080 }, { "epoch": 0.19137780175391275, "grad_norm": 0.1686394747080045, "learning_rate": 9.56876418538864e-06, "loss": 0.0051, "step": 29090 }, { "epoch": 0.19144358992914615, "grad_norm": 0.43450862740274815, "learning_rate": 9.572053550870038e-06, "loss": 0.0055, "step": 29100 }, { "epoch": 0.1915093781043795, "grad_norm": 0.011608458025805185, "learning_rate": 9.575342916351436e-06, "loss": 0.0031, "step": 29110 }, { "epoch": 0.1915751662796129, "grad_norm": 0.16738337262868377, "learning_rate": 9.578632281832835e-06, "loss": 0.0048, "step": 29120 }, { "epoch": 0.19164095445484627, "grad_norm": 0.06628618184955426, "learning_rate": 9.581921647314235e-06, "loss": 0.0028, "step": 29130 }, { "epoch": 0.19170674263007967, "grad_norm": 0.15259909096581475, "learning_rate": 9.585211012795631e-06, "loss": 0.0052, "step": 29140 }, { "epoch": 0.19177253080531306, "grad_norm": 0.22996301730904412, "learning_rate": 9.588500378277032e-06, "loss": 0.0026, "step": 29150 }, { "epoch": 0.19183831898054643, "grad_norm": 0.23118304178875773, "learning_rate": 9.59178974375843e-06, "loss": 0.0059, "step": 29160 }, { "epoch": 0.19190410715577982, "grad_norm": 0.276714429591086, "learning_rate": 9.595079109239828e-06, "loss": 0.0055, "step": 29170 }, { "epoch": 0.1919698953310132, "grad_norm": 0.3195660204820046, "learning_rate": 9.598368474721227e-06, "loss": 0.0035, "step": 29180 }, { "epoch": 0.19203568350624659, "grad_norm": 0.21798021569114284, "learning_rate": 9.601657840202627e-06, "loss": 0.0051, "step": 29190 }, { "epoch": 0.19210147168147998, "grad_norm": 0.09803918046459106, "learning_rate": 9.604947205684025e-06, "loss": 0.0042, "step": 29200 }, { "epoch": 0.19216725985671335, "grad_norm": 0.11676299077475266, "learning_rate": 9.608236571165423e-06, "loss": 0.0228, "step": 29210 }, { "epoch": 0.19223304803194674, "grad_norm": 0.1539754130848427, "learning_rate": 9.611525936646822e-06, "loss": 0.0032, "step": 29220 }, { "epoch": 0.19229883620718013, "grad_norm": 0.18552925421334507, "learning_rate": 9.61481530212822e-06, "loss": 0.0041, "step": 29230 }, { "epoch": 0.1923646243824135, "grad_norm": 1.1305492148564373, "learning_rate": 9.618104667609618e-06, "loss": 0.005, "step": 29240 }, { "epoch": 0.1924304125576469, "grad_norm": 0.09885862887983668, "learning_rate": 9.621394033091017e-06, "loss": 0.0046, "step": 29250 }, { "epoch": 0.19249620073288026, "grad_norm": 0.06112880202144941, "learning_rate": 9.624683398572417e-06, "loss": 0.0054, "step": 29260 }, { "epoch": 0.19256198890811366, "grad_norm": 0.24353881651954348, "learning_rate": 9.627972764053815e-06, "loss": 0.004, "step": 29270 }, { "epoch": 0.19262777708334705, "grad_norm": 0.1169830978718957, "learning_rate": 9.631262129535214e-06, "loss": 0.0041, "step": 29280 }, { "epoch": 0.19269356525858042, "grad_norm": 0.023524971904227397, "learning_rate": 9.634551495016612e-06, "loss": 0.0044, "step": 29290 }, { "epoch": 0.1927593534338138, "grad_norm": 6.87415454096722, "learning_rate": 9.63784086049801e-06, "loss": 0.0082, "step": 29300 }, { "epoch": 0.19282514160904718, "grad_norm": 0.10573663761627512, "learning_rate": 9.641130225979409e-06, "loss": 0.0048, "step": 29310 }, { "epoch": 0.19289092978428057, "grad_norm": 0.25417023044516596, "learning_rate": 9.644419591460809e-06, "loss": 0.0034, "step": 29320 }, { "epoch": 0.19295671795951397, "grad_norm": 0.12855837691820346, "learning_rate": 9.647708956942207e-06, "loss": 0.0047, "step": 29330 }, { "epoch": 0.19302250613474733, "grad_norm": 0.11255222130625685, "learning_rate": 9.650998322423605e-06, "loss": 0.0041, "step": 29340 }, { "epoch": 0.19308829430998073, "grad_norm": 0.0939355166193716, "learning_rate": 9.654287687905004e-06, "loss": 0.0022, "step": 29350 }, { "epoch": 0.1931540824852141, "grad_norm": 0.10881039123056176, "learning_rate": 9.657577053386402e-06, "loss": 0.0028, "step": 29360 }, { "epoch": 0.1932198706604475, "grad_norm": 0.1274112851052437, "learning_rate": 9.660866418867802e-06, "loss": 0.0033, "step": 29370 }, { "epoch": 0.19328565883568088, "grad_norm": 0.22739027997178193, "learning_rate": 9.664155784349199e-06, "loss": 0.0043, "step": 29380 }, { "epoch": 0.19335144701091425, "grad_norm": 0.2000479462542749, "learning_rate": 9.667445149830599e-06, "loss": 0.0049, "step": 29390 }, { "epoch": 0.19341723518614765, "grad_norm": 0.19477863824460995, "learning_rate": 9.670734515311997e-06, "loss": 0.016, "step": 29400 }, { "epoch": 0.193483023361381, "grad_norm": 0.15539169367818967, "learning_rate": 9.674023880793396e-06, "loss": 0.0045, "step": 29410 }, { "epoch": 0.1935488115366144, "grad_norm": 0.11486294835820159, "learning_rate": 9.677313246274794e-06, "loss": 0.0043, "step": 29420 }, { "epoch": 0.1936145997118478, "grad_norm": 0.24621566185493526, "learning_rate": 9.680602611756194e-06, "loss": 0.0032, "step": 29430 }, { "epoch": 0.19368038788708117, "grad_norm": 0.0615976332459678, "learning_rate": 9.683891977237592e-06, "loss": 0.0034, "step": 29440 }, { "epoch": 0.19374617606231456, "grad_norm": 0.24063049948026966, "learning_rate": 9.687181342718989e-06, "loss": 0.0045, "step": 29450 }, { "epoch": 0.19381196423754796, "grad_norm": 0.046995402432779665, "learning_rate": 9.69047070820039e-06, "loss": 0.0038, "step": 29460 }, { "epoch": 0.19387775241278132, "grad_norm": 0.19557598918318556, "learning_rate": 9.693760073681788e-06, "loss": 0.005, "step": 29470 }, { "epoch": 0.19394354058801472, "grad_norm": 0.4219686570567648, "learning_rate": 9.697049439163186e-06, "loss": 0.0047, "step": 29480 }, { "epoch": 0.19400932876324808, "grad_norm": 0.10067054188124853, "learning_rate": 9.700338804644584e-06, "loss": 0.0034, "step": 29490 }, { "epoch": 0.19407511693848148, "grad_norm": 0.13286511249850838, "learning_rate": 9.703628170125984e-06, "loss": 0.0036, "step": 29500 }, { "epoch": 0.19414090511371487, "grad_norm": 0.17030955793082467, "learning_rate": 9.706917535607383e-06, "loss": 0.0027, "step": 29510 }, { "epoch": 0.19420669328894824, "grad_norm": 0.09908488734797338, "learning_rate": 9.710206901088781e-06, "loss": 0.0021, "step": 29520 }, { "epoch": 0.19427248146418163, "grad_norm": 0.06265063886685819, "learning_rate": 9.71349626657018e-06, "loss": 0.003, "step": 29530 }, { "epoch": 0.194338269639415, "grad_norm": 0.1497118874302718, "learning_rate": 9.716785632051578e-06, "loss": 0.0026, "step": 29540 }, { "epoch": 0.1944040578146484, "grad_norm": 0.16037637604193908, "learning_rate": 9.720074997532976e-06, "loss": 0.0035, "step": 29550 }, { "epoch": 0.1944698459898818, "grad_norm": 0.13822890412885733, "learning_rate": 9.723364363014375e-06, "loss": 0.0036, "step": 29560 }, { "epoch": 0.19453563416511516, "grad_norm": 0.05632752704473601, "learning_rate": 9.726653728495775e-06, "loss": 0.0025, "step": 29570 }, { "epoch": 0.19460142234034855, "grad_norm": 0.11239228252948558, "learning_rate": 9.729943093977173e-06, "loss": 0.0027, "step": 29580 }, { "epoch": 0.19466721051558192, "grad_norm": 1.0344336454215148, "learning_rate": 9.733232459458571e-06, "loss": 0.0052, "step": 29590 }, { "epoch": 0.1947329986908153, "grad_norm": 0.11372037227886043, "learning_rate": 9.73652182493997e-06, "loss": 0.0035, "step": 29600 }, { "epoch": 0.1947987868660487, "grad_norm": 0.25698162251563167, "learning_rate": 9.739811190421368e-06, "loss": 0.0072, "step": 29610 }, { "epoch": 0.19486457504128207, "grad_norm": 0.16283645152278067, "learning_rate": 9.743100555902766e-06, "loss": 0.0038, "step": 29620 }, { "epoch": 0.19493036321651547, "grad_norm": 0.16731011162422663, "learning_rate": 9.746389921384166e-06, "loss": 0.0042, "step": 29630 }, { "epoch": 0.19499615139174883, "grad_norm": 0.09343322076826074, "learning_rate": 9.749679286865565e-06, "loss": 0.0039, "step": 29640 }, { "epoch": 0.19506193956698223, "grad_norm": 0.13980255317559778, "learning_rate": 9.752968652346963e-06, "loss": 0.0022, "step": 29650 }, { "epoch": 0.19512772774221562, "grad_norm": 0.40081579885040136, "learning_rate": 9.756258017828362e-06, "loss": 0.0068, "step": 29660 }, { "epoch": 0.195193515917449, "grad_norm": 0.12899630189892491, "learning_rate": 9.75954738330976e-06, "loss": 0.0024, "step": 29670 }, { "epoch": 0.19525930409268238, "grad_norm": 0.10022675529080545, "learning_rate": 9.762836748791158e-06, "loss": 0.0032, "step": 29680 }, { "epoch": 0.19532509226791578, "grad_norm": 0.14149817664848, "learning_rate": 9.766126114272557e-06, "loss": 0.0033, "step": 29690 }, { "epoch": 0.19539088044314915, "grad_norm": 0.13949190084972066, "learning_rate": 9.769415479753957e-06, "loss": 0.0052, "step": 29700 }, { "epoch": 0.19545666861838254, "grad_norm": 0.057577362169863276, "learning_rate": 9.772704845235355e-06, "loss": 0.0049, "step": 29710 }, { "epoch": 0.1955224567936159, "grad_norm": 0.09530102623065587, "learning_rate": 9.775994210716753e-06, "loss": 0.0032, "step": 29720 }, { "epoch": 0.1955882449688493, "grad_norm": 0.1756818689798675, "learning_rate": 9.779283576198152e-06, "loss": 0.0028, "step": 29730 }, { "epoch": 0.1956540331440827, "grad_norm": 0.33456031240442685, "learning_rate": 9.782572941679552e-06, "loss": 0.003, "step": 29740 }, { "epoch": 0.19571982131931606, "grad_norm": 0.10519230719478014, "learning_rate": 9.785862307160948e-06, "loss": 0.0044, "step": 29750 }, { "epoch": 0.19578560949454946, "grad_norm": 0.06774966022358064, "learning_rate": 9.789151672642349e-06, "loss": 0.0045, "step": 29760 }, { "epoch": 0.19585139766978282, "grad_norm": 0.23985613785596535, "learning_rate": 9.792441038123747e-06, "loss": 0.0048, "step": 29770 }, { "epoch": 0.19591718584501622, "grad_norm": 0.19420127289331754, "learning_rate": 9.795730403605145e-06, "loss": 0.0051, "step": 29780 }, { "epoch": 0.1959829740202496, "grad_norm": 0.2660952891108545, "learning_rate": 9.799019769086544e-06, "loss": 0.0031, "step": 29790 }, { "epoch": 0.19604876219548298, "grad_norm": 0.17819616980781902, "learning_rate": 9.802309134567942e-06, "loss": 0.0048, "step": 29800 }, { "epoch": 0.19611455037071637, "grad_norm": 0.0855783480890515, "learning_rate": 9.805598500049342e-06, "loss": 0.0026, "step": 29810 }, { "epoch": 0.19618033854594974, "grad_norm": 0.2283742699013268, "learning_rate": 9.80888786553074e-06, "loss": 0.0033, "step": 29820 }, { "epoch": 0.19624612672118313, "grad_norm": 0.2747378563318374, "learning_rate": 9.812177231012139e-06, "loss": 0.0063, "step": 29830 }, { "epoch": 0.19631191489641653, "grad_norm": 0.08472205995725336, "learning_rate": 9.815466596493537e-06, "loss": 0.0033, "step": 29840 }, { "epoch": 0.1963777030716499, "grad_norm": 0.29127284908776824, "learning_rate": 9.818755961974935e-06, "loss": 0.0042, "step": 29850 }, { "epoch": 0.1964434912468833, "grad_norm": 0.08251515854595665, "learning_rate": 9.822045327456334e-06, "loss": 0.0038, "step": 29860 }, { "epoch": 0.19650927942211666, "grad_norm": 0.01696543184169228, "learning_rate": 9.825334692937734e-06, "loss": 0.004, "step": 29870 }, { "epoch": 0.19657506759735005, "grad_norm": 0.0491988308012807, "learning_rate": 9.828624058419132e-06, "loss": 0.0043, "step": 29880 }, { "epoch": 0.19664085577258344, "grad_norm": 0.4878463368761938, "learning_rate": 9.83191342390053e-06, "loss": 0.0023, "step": 29890 }, { "epoch": 0.1967066439478168, "grad_norm": 0.20524938093166112, "learning_rate": 9.835202789381929e-06, "loss": 0.0052, "step": 29900 }, { "epoch": 0.1967724321230502, "grad_norm": 0.22217495864592152, "learning_rate": 9.838492154863327e-06, "loss": 0.0044, "step": 29910 }, { "epoch": 0.19683822029828357, "grad_norm": 0.11848612650522965, "learning_rate": 9.841781520344726e-06, "loss": 0.0045, "step": 29920 }, { "epoch": 0.19690400847351697, "grad_norm": 0.2234170173579547, "learning_rate": 9.845070885826124e-06, "loss": 0.0036, "step": 29930 }, { "epoch": 0.19696979664875036, "grad_norm": 0.10898685934733546, "learning_rate": 9.848360251307524e-06, "loss": 0.0036, "step": 29940 }, { "epoch": 0.19703558482398373, "grad_norm": 0.06977362011545368, "learning_rate": 9.851649616788922e-06, "loss": 0.0037, "step": 29950 }, { "epoch": 0.19710137299921712, "grad_norm": 0.18837330650309897, "learning_rate": 9.854938982270321e-06, "loss": 0.005, "step": 29960 }, { "epoch": 0.19716716117445052, "grad_norm": 0.22441411248300788, "learning_rate": 9.85822834775172e-06, "loss": 0.0048, "step": 29970 }, { "epoch": 0.19723294934968388, "grad_norm": 0.21469927067441497, "learning_rate": 9.86151771323312e-06, "loss": 0.0048, "step": 29980 }, { "epoch": 0.19729873752491728, "grad_norm": 0.052150818900643936, "learning_rate": 9.864807078714516e-06, "loss": 0.0036, "step": 29990 }, { "epoch": 0.19736452570015064, "grad_norm": 0.24541071414581841, "learning_rate": 9.868096444195916e-06, "loss": 0.0029, "step": 30000 }, { "epoch": 0.19743031387538404, "grad_norm": 0.22291891632812066, "learning_rate": 9.871385809677314e-06, "loss": 0.0071, "step": 30010 }, { "epoch": 0.19749610205061743, "grad_norm": 0.16367671281654333, "learning_rate": 9.874675175158713e-06, "loss": 0.0033, "step": 30020 }, { "epoch": 0.1975618902258508, "grad_norm": 0.12014965114230401, "learning_rate": 9.877964540640111e-06, "loss": 0.0046, "step": 30030 }, { "epoch": 0.1976276784010842, "grad_norm": 0.17113899970133578, "learning_rate": 9.88125390612151e-06, "loss": 0.0032, "step": 30040 }, { "epoch": 0.19769346657631756, "grad_norm": 0.20037038713438293, "learning_rate": 9.88454327160291e-06, "loss": 0.0028, "step": 30050 }, { "epoch": 0.19775925475155096, "grad_norm": 0.07766484993591617, "learning_rate": 9.887832637084306e-06, "loss": 0.0036, "step": 30060 }, { "epoch": 0.19782504292678435, "grad_norm": 0.006711440649081519, "learning_rate": 9.891122002565706e-06, "loss": 0.0026, "step": 30070 }, { "epoch": 0.19789083110201772, "grad_norm": 0.21751805772850377, "learning_rate": 9.894411368047105e-06, "loss": 0.0041, "step": 30080 }, { "epoch": 0.1979566192772511, "grad_norm": 0.03220065435652824, "learning_rate": 9.897700733528503e-06, "loss": 0.0036, "step": 30090 }, { "epoch": 0.19802240745248448, "grad_norm": 0.12009124867528848, "learning_rate": 9.900990099009901e-06, "loss": 0.0037, "step": 30100 }, { "epoch": 0.19808819562771787, "grad_norm": 0.2222366085018451, "learning_rate": 9.904279464491301e-06, "loss": 0.0056, "step": 30110 }, { "epoch": 0.19815398380295127, "grad_norm": 0.1081249915172621, "learning_rate": 9.9075688299727e-06, "loss": 0.0032, "step": 30120 }, { "epoch": 0.19821977197818463, "grad_norm": 0.017536464693924243, "learning_rate": 9.910858195454096e-06, "loss": 0.004, "step": 30130 }, { "epoch": 0.19828556015341803, "grad_norm": 0.1422856026057801, "learning_rate": 9.914147560935496e-06, "loss": 0.0053, "step": 30140 }, { "epoch": 0.1983513483286514, "grad_norm": 0.07485419303020915, "learning_rate": 9.917436926416895e-06, "loss": 0.0032, "step": 30150 }, { "epoch": 0.1984171365038848, "grad_norm": 0.20773542009323503, "learning_rate": 9.920726291898293e-06, "loss": 0.0033, "step": 30160 }, { "epoch": 0.19848292467911818, "grad_norm": 0.19175555654269955, "learning_rate": 9.924015657379692e-06, "loss": 0.0075, "step": 30170 }, { "epoch": 0.19854871285435155, "grad_norm": 0.20985691143234897, "learning_rate": 9.927305022861092e-06, "loss": 0.0039, "step": 30180 }, { "epoch": 0.19861450102958494, "grad_norm": 0.3434436115501467, "learning_rate": 9.93059438834249e-06, "loss": 0.004, "step": 30190 }, { "epoch": 0.19868028920481834, "grad_norm": 0.055206462464677676, "learning_rate": 9.933883753823888e-06, "loss": 0.0042, "step": 30200 }, { "epoch": 0.1987460773800517, "grad_norm": 0.18919139305721996, "learning_rate": 9.937173119305287e-06, "loss": 0.0029, "step": 30210 }, { "epoch": 0.1988118655552851, "grad_norm": 0.20294720913287786, "learning_rate": 9.940462484786687e-06, "loss": 0.0047, "step": 30220 }, { "epoch": 0.19887765373051847, "grad_norm": 0.14923437230054304, "learning_rate": 9.943751850268083e-06, "loss": 0.0031, "step": 30230 }, { "epoch": 0.19894344190575186, "grad_norm": 0.18977400780769596, "learning_rate": 9.947041215749482e-06, "loss": 0.0037, "step": 30240 }, { "epoch": 0.19900923008098526, "grad_norm": 0.024289507976503227, "learning_rate": 9.950330581230882e-06, "loss": 0.0018, "step": 30250 }, { "epoch": 0.19907501825621862, "grad_norm": 0.19818251061297906, "learning_rate": 9.95361994671228e-06, "loss": 0.0051, "step": 30260 }, { "epoch": 0.19914080643145202, "grad_norm": 0.0344791598782375, "learning_rate": 9.956909312193679e-06, "loss": 0.0045, "step": 30270 }, { "epoch": 0.19920659460668538, "grad_norm": 0.1323334386062104, "learning_rate": 9.960198677675077e-06, "loss": 0.0041, "step": 30280 }, { "epoch": 0.19927238278191878, "grad_norm": 0.33465106843362546, "learning_rate": 9.963488043156477e-06, "loss": 0.0056, "step": 30290 }, { "epoch": 0.19933817095715217, "grad_norm": 0.34114904354776493, "learning_rate": 9.966777408637874e-06, "loss": 0.0039, "step": 30300 }, { "epoch": 0.19940395913238554, "grad_norm": 0.21767811687617364, "learning_rate": 9.970066774119274e-06, "loss": 0.0045, "step": 30310 }, { "epoch": 0.19946974730761893, "grad_norm": 0.10854271769451135, "learning_rate": 9.973356139600672e-06, "loss": 0.0042, "step": 30320 }, { "epoch": 0.1995355354828523, "grad_norm": 0.19971697527189775, "learning_rate": 9.97664550508207e-06, "loss": 0.0041, "step": 30330 }, { "epoch": 0.1996013236580857, "grad_norm": 0.1377435947495772, "learning_rate": 9.979934870563469e-06, "loss": 0.005, "step": 30340 }, { "epoch": 0.1996671118333191, "grad_norm": 0.3764920516016218, "learning_rate": 9.983224236044867e-06, "loss": 0.0049, "step": 30350 }, { "epoch": 0.19973290000855246, "grad_norm": 0.2728504420816479, "learning_rate": 9.986513601526267e-06, "loss": 0.0053, "step": 30360 }, { "epoch": 0.19979868818378585, "grad_norm": 0.2373340734025173, "learning_rate": 9.989802967007664e-06, "loss": 0.0027, "step": 30370 }, { "epoch": 0.19986447635901922, "grad_norm": 0.11084349543074763, "learning_rate": 9.993092332489064e-06, "loss": 0.0026, "step": 30380 }, { "epoch": 0.1999302645342526, "grad_norm": 0.14212565792640156, "learning_rate": 9.996381697970462e-06, "loss": 0.003, "step": 30390 }, { "epoch": 0.199996052709486, "grad_norm": 0.08991132776000282, "learning_rate": 9.99967106345186e-06, "loss": 0.003, "step": 30400 }, { "epoch": 0.20006184088471937, "grad_norm": 0.08209554487960029, "learning_rate": 9.999999973302125e-06, "loss": 0.0035, "step": 30410 }, { "epoch": 0.20012762905995277, "grad_norm": 0.15379092057419452, "learning_rate": 9.999999881013171e-06, "loss": 0.0039, "step": 30420 }, { "epoch": 0.20019341723518616, "grad_norm": 0.7472837511648487, "learning_rate": 9.999999722803537e-06, "loss": 0.0039, "step": 30430 }, { "epoch": 0.20025920541041953, "grad_norm": 0.12273226967834597, "learning_rate": 9.999999498673224e-06, "loss": 0.0028, "step": 30440 }, { "epoch": 0.20032499358565292, "grad_norm": 0.0784736493307935, "learning_rate": 9.999999208622238e-06, "loss": 0.0034, "step": 30450 }, { "epoch": 0.2003907817608863, "grad_norm": 0.12553544719618573, "learning_rate": 9.999998852650579e-06, "loss": 0.0033, "step": 30460 }, { "epoch": 0.20045656993611968, "grad_norm": 0.21386138785920983, "learning_rate": 9.999998430758253e-06, "loss": 0.0032, "step": 30470 }, { "epoch": 0.20052235811135308, "grad_norm": 0.10823786377589745, "learning_rate": 9.999997942945267e-06, "loss": 0.0029, "step": 30480 }, { "epoch": 0.20058814628658644, "grad_norm": 0.10660694257793167, "learning_rate": 9.999997389211627e-06, "loss": 0.0033, "step": 30490 }, { "epoch": 0.20065393446181984, "grad_norm": 0.13090317982690505, "learning_rate": 9.999996769557337e-06, "loss": 0.0029, "step": 30500 }, { "epoch": 0.2007197226370532, "grad_norm": 0.23367681772325302, "learning_rate": 9.99999608398241e-06, "loss": 0.0035, "step": 30510 }, { "epoch": 0.2007855108122866, "grad_norm": 0.17712794381735877, "learning_rate": 9.999995332486853e-06, "loss": 0.0044, "step": 30520 }, { "epoch": 0.20085129898752, "grad_norm": 0.11300580858515262, "learning_rate": 9.999994515070675e-06, "loss": 0.0041, "step": 30530 }, { "epoch": 0.20091708716275336, "grad_norm": 0.2382765873274028, "learning_rate": 9.999993631733889e-06, "loss": 0.0035, "step": 30540 }, { "epoch": 0.20098287533798675, "grad_norm": 0.08988006449695143, "learning_rate": 9.999992682476503e-06, "loss": 0.0033, "step": 30550 }, { "epoch": 0.20104866351322012, "grad_norm": 0.2524870293777626, "learning_rate": 9.999991667298534e-06, "loss": 0.005, "step": 30560 }, { "epoch": 0.20111445168845352, "grad_norm": 0.06425475060662449, "learning_rate": 9.999990586199992e-06, "loss": 0.0046, "step": 30570 }, { "epoch": 0.2011802398636869, "grad_norm": 0.12177271278539153, "learning_rate": 9.999989439180894e-06, "loss": 0.0038, "step": 30580 }, { "epoch": 0.20124602803892028, "grad_norm": 0.08313236679633763, "learning_rate": 9.999988226241251e-06, "loss": 0.0055, "step": 30590 }, { "epoch": 0.20131181621415367, "grad_norm": 0.3818894513170794, "learning_rate": 9.999986947381082e-06, "loss": 0.004, "step": 30600 }, { "epoch": 0.20137760438938704, "grad_norm": 0.24589718441945574, "learning_rate": 9.999985602600404e-06, "loss": 0.0043, "step": 30610 }, { "epoch": 0.20144339256462043, "grad_norm": 0.3016866768917769, "learning_rate": 9.999984191899235e-06, "loss": 0.0045, "step": 30620 }, { "epoch": 0.20150918073985383, "grad_norm": 0.10412778509221579, "learning_rate": 9.99998271527759e-06, "loss": 0.0039, "step": 30630 }, { "epoch": 0.2015749689150872, "grad_norm": 0.06735899896423732, "learning_rate": 9.999981172735494e-06, "loss": 0.0021, "step": 30640 }, { "epoch": 0.2016407570903206, "grad_norm": 0.08101962866428848, "learning_rate": 9.999979564272963e-06, "loss": 0.0028, "step": 30650 }, { "epoch": 0.20170654526555398, "grad_norm": 0.25283935075373304, "learning_rate": 9.99997788989002e-06, "loss": 0.0044, "step": 30660 }, { "epoch": 0.20177233344078735, "grad_norm": 0.18068974199951351, "learning_rate": 9.999976149586688e-06, "loss": 0.0032, "step": 30670 }, { "epoch": 0.20183812161602074, "grad_norm": 0.11231964007732134, "learning_rate": 9.999974343362987e-06, "loss": 0.0042, "step": 30680 }, { "epoch": 0.2019039097912541, "grad_norm": 0.03745834100141294, "learning_rate": 9.999972471218944e-06, "loss": 0.0025, "step": 30690 }, { "epoch": 0.2019696979664875, "grad_norm": 0.19251899754043303, "learning_rate": 9.99997053315458e-06, "loss": 0.0057, "step": 30700 }, { "epoch": 0.2020354861417209, "grad_norm": 0.1808798575755433, "learning_rate": 9.999968529169925e-06, "loss": 0.0039, "step": 30710 }, { "epoch": 0.20210127431695427, "grad_norm": 0.18501960964200248, "learning_rate": 9.999966459265002e-06, "loss": 0.0045, "step": 30720 }, { "epoch": 0.20216706249218766, "grad_norm": 0.1835543339650755, "learning_rate": 9.99996432343984e-06, "loss": 0.0033, "step": 30730 }, { "epoch": 0.20223285066742103, "grad_norm": 0.10142633648341977, "learning_rate": 9.999962121694467e-06, "loss": 0.0052, "step": 30740 }, { "epoch": 0.20229863884265442, "grad_norm": 0.11894066483761946, "learning_rate": 9.999959854028911e-06, "loss": 0.0065, "step": 30750 }, { "epoch": 0.20236442701788782, "grad_norm": 0.24942184308665327, "learning_rate": 9.999957520443202e-06, "loss": 0.0035, "step": 30760 }, { "epoch": 0.20243021519312118, "grad_norm": 0.17490847325900813, "learning_rate": 9.999955120937373e-06, "loss": 0.0073, "step": 30770 }, { "epoch": 0.20249600336835458, "grad_norm": 0.1280682116438125, "learning_rate": 9.999952655511454e-06, "loss": 0.0028, "step": 30780 }, { "epoch": 0.20256179154358794, "grad_norm": 0.04023553919914915, "learning_rate": 9.999950124165476e-06, "loss": 0.0021, "step": 30790 }, { "epoch": 0.20262757971882134, "grad_norm": 0.08189990229955288, "learning_rate": 9.999947526899473e-06, "loss": 0.0022, "step": 30800 }, { "epoch": 0.20269336789405473, "grad_norm": 0.1562157169002255, "learning_rate": 9.999944863713482e-06, "loss": 0.0019, "step": 30810 }, { "epoch": 0.2027591560692881, "grad_norm": 0.08898192283048993, "learning_rate": 9.999942134607537e-06, "loss": 0.0054, "step": 30820 }, { "epoch": 0.2028249442445215, "grad_norm": 0.14252017867560604, "learning_rate": 9.99993933958167e-06, "loss": 0.002, "step": 30830 }, { "epoch": 0.20289073241975486, "grad_norm": 0.23176191867043225, "learning_rate": 9.999936478635924e-06, "loss": 0.0043, "step": 30840 }, { "epoch": 0.20295652059498825, "grad_norm": 0.14849024906659797, "learning_rate": 9.999933551770334e-06, "loss": 0.0042, "step": 30850 }, { "epoch": 0.20302230877022165, "grad_norm": 0.1608212922759157, "learning_rate": 9.999930558984936e-06, "loss": 0.0058, "step": 30860 }, { "epoch": 0.20308809694545502, "grad_norm": 0.17550958757556914, "learning_rate": 9.999927500279772e-06, "loss": 0.0053, "step": 30870 }, { "epoch": 0.2031538851206884, "grad_norm": 0.28645536912955005, "learning_rate": 9.999924375654883e-06, "loss": 0.0045, "step": 30880 }, { "epoch": 0.2032196732959218, "grad_norm": 0.0823187111140541, "learning_rate": 9.99992118511031e-06, "loss": 0.0035, "step": 30890 }, { "epoch": 0.20328546147115517, "grad_norm": 0.009132802276897587, "learning_rate": 9.999917928646093e-06, "loss": 0.0036, "step": 30900 }, { "epoch": 0.20335124964638857, "grad_norm": 0.06703158984878553, "learning_rate": 9.999914606262277e-06, "loss": 0.0036, "step": 30910 }, { "epoch": 0.20341703782162193, "grad_norm": 0.21138556866210978, "learning_rate": 9.999911217958907e-06, "loss": 0.0032, "step": 30920 }, { "epoch": 0.20348282599685533, "grad_norm": 0.061370468032185696, "learning_rate": 9.999907763736023e-06, "loss": 0.0027, "step": 30930 }, { "epoch": 0.20354861417208872, "grad_norm": 0.2741839240630405, "learning_rate": 9.999904243593675e-06, "loss": 0.0053, "step": 30940 }, { "epoch": 0.2036144023473221, "grad_norm": 0.11675237711506899, "learning_rate": 9.999900657531906e-06, "loss": 0.0043, "step": 30950 }, { "epoch": 0.20368019052255548, "grad_norm": 0.1334955440650495, "learning_rate": 9.999897005550767e-06, "loss": 0.0052, "step": 30960 }, { "epoch": 0.20374597869778885, "grad_norm": 0.08443493350314227, "learning_rate": 9.999893287650302e-06, "loss": 0.0029, "step": 30970 }, { "epoch": 0.20381176687302224, "grad_norm": 0.10463104203508868, "learning_rate": 9.999889503830564e-06, "loss": 0.004, "step": 30980 }, { "epoch": 0.20387755504825564, "grad_norm": 0.1687196683155595, "learning_rate": 9.999885654091601e-06, "loss": 0.003, "step": 30990 }, { "epoch": 0.203943343223489, "grad_norm": 0.11197472929164784, "learning_rate": 9.999881738433464e-06, "loss": 0.0042, "step": 31000 }, { "epoch": 0.2040091313987224, "grad_norm": 0.18541511117967327, "learning_rate": 9.999877756856203e-06, "loss": 0.0053, "step": 31010 }, { "epoch": 0.20407491957395577, "grad_norm": 0.09048283300840977, "learning_rate": 9.999873709359873e-06, "loss": 0.0023, "step": 31020 }, { "epoch": 0.20414070774918916, "grad_norm": 0.08607068434719342, "learning_rate": 9.999869595944525e-06, "loss": 0.0032, "step": 31030 }, { "epoch": 0.20420649592442255, "grad_norm": 0.05038061729320843, "learning_rate": 9.999865416610216e-06, "loss": 0.0027, "step": 31040 }, { "epoch": 0.20427228409965592, "grad_norm": 0.2341012346836347, "learning_rate": 9.999861171356999e-06, "loss": 0.0028, "step": 31050 }, { "epoch": 0.20433807227488932, "grad_norm": 0.13467658164797658, "learning_rate": 9.999856860184931e-06, "loss": 0.0052, "step": 31060 }, { "epoch": 0.20440386045012268, "grad_norm": 0.21496128359690264, "learning_rate": 9.999852483094068e-06, "loss": 0.0041, "step": 31070 }, { "epoch": 0.20446964862535608, "grad_norm": 0.060109212098685465, "learning_rate": 9.999848040084469e-06, "loss": 0.0033, "step": 31080 }, { "epoch": 0.20453543680058947, "grad_norm": 0.1936819070055655, "learning_rate": 9.999843531156192e-06, "loss": 0.0043, "step": 31090 }, { "epoch": 0.20460122497582284, "grad_norm": 0.0598787624566135, "learning_rate": 9.999838956309294e-06, "loss": 0.0028, "step": 31100 }, { "epoch": 0.20466701315105623, "grad_norm": 0.2517804099436514, "learning_rate": 9.999834315543839e-06, "loss": 0.0048, "step": 31110 }, { "epoch": 0.20473280132628963, "grad_norm": 0.16316086029117435, "learning_rate": 9.999829608859888e-06, "loss": 0.0049, "step": 31120 }, { "epoch": 0.204798589501523, "grad_norm": 0.11886483423416107, "learning_rate": 9.999824836257498e-06, "loss": 0.0029, "step": 31130 }, { "epoch": 0.2048643776767564, "grad_norm": 0.15318920160701047, "learning_rate": 9.99981999773674e-06, "loss": 0.0026, "step": 31140 }, { "epoch": 0.20493016585198975, "grad_norm": 0.18511789478656834, "learning_rate": 9.999815093297672e-06, "loss": 0.0025, "step": 31150 }, { "epoch": 0.20499595402722315, "grad_norm": 0.17140053020429913, "learning_rate": 9.999810122940359e-06, "loss": 0.0032, "step": 31160 }, { "epoch": 0.20506174220245654, "grad_norm": 0.015700693825749224, "learning_rate": 9.99980508666487e-06, "loss": 0.004, "step": 31170 }, { "epoch": 0.2051275303776899, "grad_norm": 0.07980073179454118, "learning_rate": 9.999799984471266e-06, "loss": 0.0039, "step": 31180 }, { "epoch": 0.2051933185529233, "grad_norm": 0.15873023488748014, "learning_rate": 9.999794816359618e-06, "loss": 0.0039, "step": 31190 }, { "epoch": 0.20525910672815667, "grad_norm": 0.07259144002028889, "learning_rate": 9.999789582329995e-06, "loss": 0.0037, "step": 31200 }, { "epoch": 0.20532489490339006, "grad_norm": 0.13222487457088197, "learning_rate": 9.999784282382463e-06, "loss": 0.0035, "step": 31210 }, { "epoch": 0.20539068307862346, "grad_norm": 0.07114752893656286, "learning_rate": 9.999778916517093e-06, "loss": 0.002, "step": 31220 }, { "epoch": 0.20545647125385683, "grad_norm": 0.23117165903374576, "learning_rate": 9.999773484733957e-06, "loss": 0.0023, "step": 31230 }, { "epoch": 0.20552225942909022, "grad_norm": 0.15222318157350387, "learning_rate": 9.999767987033126e-06, "loss": 0.0039, "step": 31240 }, { "epoch": 0.2055880476043236, "grad_norm": 0.04495410188170782, "learning_rate": 9.999762423414672e-06, "loss": 0.0024, "step": 31250 }, { "epoch": 0.20565383577955698, "grad_norm": 0.20467886538197383, "learning_rate": 9.999756793878667e-06, "loss": 0.0033, "step": 31260 }, { "epoch": 0.20571962395479038, "grad_norm": 0.23541038941265294, "learning_rate": 9.99975109842519e-06, "loss": 0.0094, "step": 31270 }, { "epoch": 0.20578541213002374, "grad_norm": 0.17325791455178627, "learning_rate": 9.99974533705431e-06, "loss": 0.0048, "step": 31280 }, { "epoch": 0.20585120030525714, "grad_norm": 0.25008072274173654, "learning_rate": 9.999739509766107e-06, "loss": 0.0063, "step": 31290 }, { "epoch": 0.2059169884804905, "grad_norm": 0.11674222579411057, "learning_rate": 9.999733616560656e-06, "loss": 0.0038, "step": 31300 }, { "epoch": 0.2059827766557239, "grad_norm": 0.20861103125570873, "learning_rate": 9.999727657438036e-06, "loss": 0.0029, "step": 31310 }, { "epoch": 0.2060485648309573, "grad_norm": 0.18658311306483674, "learning_rate": 9.999721632398325e-06, "loss": 0.0061, "step": 31320 }, { "epoch": 0.20611435300619066, "grad_norm": 0.21522960268206617, "learning_rate": 9.999715541441601e-06, "loss": 0.0046, "step": 31330 }, { "epoch": 0.20618014118142405, "grad_norm": 0.04966733786372822, "learning_rate": 9.999709384567945e-06, "loss": 0.005, "step": 31340 }, { "epoch": 0.20624592935665745, "grad_norm": 0.14543419353449602, "learning_rate": 9.999703161777442e-06, "loss": 0.0025, "step": 31350 }, { "epoch": 0.20631171753189081, "grad_norm": 0.23234021987907713, "learning_rate": 9.99969687307017e-06, "loss": 0.0048, "step": 31360 }, { "epoch": 0.2063775057071242, "grad_norm": 0.06443905944880826, "learning_rate": 9.999690518446212e-06, "loss": 0.0042, "step": 31370 }, { "epoch": 0.20644329388235758, "grad_norm": 0.05177621626649045, "learning_rate": 9.999684097905651e-06, "loss": 0.0024, "step": 31380 }, { "epoch": 0.20650908205759097, "grad_norm": 0.1003690031907525, "learning_rate": 9.999677611448575e-06, "loss": 0.0022, "step": 31390 }, { "epoch": 0.20657487023282436, "grad_norm": 0.1160364862111027, "learning_rate": 9.999671059075067e-06, "loss": 0.0048, "step": 31400 }, { "epoch": 0.20664065840805773, "grad_norm": 0.10373007002722429, "learning_rate": 9.999664440785216e-06, "loss": 0.0038, "step": 31410 }, { "epoch": 0.20670644658329113, "grad_norm": 0.06097350296563894, "learning_rate": 9.999657756579104e-06, "loss": 0.0034, "step": 31420 }, { "epoch": 0.2067722347585245, "grad_norm": 0.08808869325200833, "learning_rate": 9.999651006456825e-06, "loss": 0.0021, "step": 31430 }, { "epoch": 0.2068380229337579, "grad_norm": 0.14397600938306904, "learning_rate": 9.999644190418465e-06, "loss": 0.0031, "step": 31440 }, { "epoch": 0.20690381110899128, "grad_norm": 0.06696406580939189, "learning_rate": 9.999637308464113e-06, "loss": 0.0031, "step": 31450 }, { "epoch": 0.20696959928422465, "grad_norm": 0.2544315334966412, "learning_rate": 9.999630360593862e-06, "loss": 0.004, "step": 31460 }, { "epoch": 0.20703538745945804, "grad_norm": 0.2561294074032374, "learning_rate": 9.999623346807802e-06, "loss": 0.0036, "step": 31470 }, { "epoch": 0.2071011756346914, "grad_norm": 0.07628399845375555, "learning_rate": 9.999616267106026e-06, "loss": 0.0037, "step": 31480 }, { "epoch": 0.2071669638099248, "grad_norm": 0.0749681305648974, "learning_rate": 9.999609121488629e-06, "loss": 0.0046, "step": 31490 }, { "epoch": 0.2072327519851582, "grad_norm": 0.1247469021274499, "learning_rate": 9.999601909955704e-06, "loss": 0.0042, "step": 31500 }, { "epoch": 0.20729854016039156, "grad_norm": 0.11811060620527872, "learning_rate": 9.999594632507344e-06, "loss": 0.0038, "step": 31510 }, { "epoch": 0.20736432833562496, "grad_norm": 0.19077708491942932, "learning_rate": 9.999587289143649e-06, "loss": 0.004, "step": 31520 }, { "epoch": 0.20743011651085833, "grad_norm": 0.12610994644360077, "learning_rate": 9.999579879864711e-06, "loss": 0.0045, "step": 31530 }, { "epoch": 0.20749590468609172, "grad_norm": 0.1907600324734717, "learning_rate": 9.999572404670631e-06, "loss": 0.004, "step": 31540 }, { "epoch": 0.20756169286132511, "grad_norm": 0.11857735035285102, "learning_rate": 9.999564863561506e-06, "loss": 0.0029, "step": 31550 }, { "epoch": 0.20762748103655848, "grad_norm": 0.04443927476474864, "learning_rate": 9.999557256537437e-06, "loss": 0.0038, "step": 31560 }, { "epoch": 0.20769326921179188, "grad_norm": 0.2559871170839074, "learning_rate": 9.999549583598525e-06, "loss": 0.0048, "step": 31570 }, { "epoch": 0.20775905738702527, "grad_norm": 0.2790741456322526, "learning_rate": 9.999541844744868e-06, "loss": 0.0033, "step": 31580 }, { "epoch": 0.20782484556225864, "grad_norm": 0.40613333219336695, "learning_rate": 9.99953403997657e-06, "loss": 0.0045, "step": 31590 }, { "epoch": 0.20789063373749203, "grad_norm": 0.16805885958605266, "learning_rate": 9.999526169293733e-06, "loss": 0.0039, "step": 31600 }, { "epoch": 0.2079564219127254, "grad_norm": 0.09447431239301143, "learning_rate": 9.999518232696462e-06, "loss": 0.0031, "step": 31610 }, { "epoch": 0.2080222100879588, "grad_norm": 0.17966244786695057, "learning_rate": 9.99951023018486e-06, "loss": 0.0046, "step": 31620 }, { "epoch": 0.2080879982631922, "grad_norm": 0.30236791986206313, "learning_rate": 9.999502161759034e-06, "loss": 0.0039, "step": 31630 }, { "epoch": 0.20815378643842555, "grad_norm": 0.14233955023055173, "learning_rate": 9.99949402741909e-06, "loss": 0.0032, "step": 31640 }, { "epoch": 0.20821957461365895, "grad_norm": 0.3423880007945043, "learning_rate": 9.999485827165136e-06, "loss": 0.0119, "step": 31650 }, { "epoch": 0.20828536278889231, "grad_norm": 0.19667384873249685, "learning_rate": 9.999477560997277e-06, "loss": 0.004, "step": 31660 }, { "epoch": 0.2083511509641257, "grad_norm": 0.20892103981593074, "learning_rate": 9.999469228915626e-06, "loss": 0.0042, "step": 31670 }, { "epoch": 0.2084169391393591, "grad_norm": 0.1851597782691998, "learning_rate": 9.99946083092029e-06, "loss": 0.0061, "step": 31680 }, { "epoch": 0.20848272731459247, "grad_norm": 0.06224953493527923, "learning_rate": 9.999452367011382e-06, "loss": 0.0032, "step": 31690 }, { "epoch": 0.20854851548982586, "grad_norm": 0.3304213976671916, "learning_rate": 9.999443837189012e-06, "loss": 0.0032, "step": 31700 }, { "epoch": 0.20861430366505923, "grad_norm": 0.48640956381036105, "learning_rate": 9.999435241453293e-06, "loss": 0.0039, "step": 31710 }, { "epoch": 0.20868009184029263, "grad_norm": 0.10371132152869725, "learning_rate": 9.999426579804336e-06, "loss": 0.0027, "step": 31720 }, { "epoch": 0.20874588001552602, "grad_norm": 0.12291240158234279, "learning_rate": 9.999417852242259e-06, "loss": 0.0044, "step": 31730 }, { "epoch": 0.2088116681907594, "grad_norm": 0.14223925465834872, "learning_rate": 9.999409058767175e-06, "loss": 0.0029, "step": 31740 }, { "epoch": 0.20887745636599278, "grad_norm": 0.1580229314104071, "learning_rate": 9.9994001993792e-06, "loss": 0.0034, "step": 31750 }, { "epoch": 0.20894324454122615, "grad_norm": 0.019189794264571892, "learning_rate": 9.999391274078454e-06, "loss": 0.003, "step": 31760 }, { "epoch": 0.20900903271645954, "grad_norm": 0.25201180196041545, "learning_rate": 9.999382282865049e-06, "loss": 0.0015, "step": 31770 }, { "epoch": 0.20907482089169294, "grad_norm": 0.13495300771056473, "learning_rate": 9.999373225739107e-06, "loss": 0.0031, "step": 31780 }, { "epoch": 0.2091406090669263, "grad_norm": 0.12862514469339323, "learning_rate": 9.999364102700747e-06, "loss": 0.0024, "step": 31790 }, { "epoch": 0.2092063972421597, "grad_norm": 0.13969924591128596, "learning_rate": 9.999354913750089e-06, "loss": 0.0028, "step": 31800 }, { "epoch": 0.20927218541739306, "grad_norm": 0.5423929235224603, "learning_rate": 9.999345658887254e-06, "loss": 0.0042, "step": 31810 }, { "epoch": 0.20933797359262646, "grad_norm": 0.09354110691317179, "learning_rate": 9.999336338112364e-06, "loss": 0.0035, "step": 31820 }, { "epoch": 0.20940376176785985, "grad_norm": 0.08797184700966194, "learning_rate": 9.999326951425542e-06, "loss": 0.0041, "step": 31830 }, { "epoch": 0.20946954994309322, "grad_norm": 0.06121644438705412, "learning_rate": 9.999317498826912e-06, "loss": 0.0038, "step": 31840 }, { "epoch": 0.20953533811832661, "grad_norm": 0.16297946622767656, "learning_rate": 9.999307980316599e-06, "loss": 0.0028, "step": 31850 }, { "epoch": 0.20960112629356, "grad_norm": 0.13798035129976746, "learning_rate": 9.999298395894728e-06, "loss": 0.0039, "step": 31860 }, { "epoch": 0.20966691446879338, "grad_norm": 0.10255896488988406, "learning_rate": 9.999288745561424e-06, "loss": 0.0025, "step": 31870 }, { "epoch": 0.20973270264402677, "grad_norm": 0.10382581300657884, "learning_rate": 9.999279029316817e-06, "loss": 0.003, "step": 31880 }, { "epoch": 0.20979849081926014, "grad_norm": 0.04088822811609957, "learning_rate": 9.999269247161032e-06, "loss": 0.002, "step": 31890 }, { "epoch": 0.20986427899449353, "grad_norm": 0.1305896597468101, "learning_rate": 9.999259399094202e-06, "loss": 0.0026, "step": 31900 }, { "epoch": 0.20993006716972692, "grad_norm": 0.17356999159154674, "learning_rate": 9.999249485116452e-06, "loss": 0.0037, "step": 31910 }, { "epoch": 0.2099958553449603, "grad_norm": 0.1025490326398379, "learning_rate": 9.999239505227917e-06, "loss": 0.0058, "step": 31920 }, { "epoch": 0.21006164352019369, "grad_norm": 0.23254022850562697, "learning_rate": 9.999229459428726e-06, "loss": 0.0038, "step": 31930 }, { "epoch": 0.21012743169542705, "grad_norm": 0.003721140634135423, "learning_rate": 9.999219347719013e-06, "loss": 0.003, "step": 31940 }, { "epoch": 0.21019321987066045, "grad_norm": 0.1015842215464625, "learning_rate": 9.999209170098909e-06, "loss": 0.0031, "step": 31950 }, { "epoch": 0.21025900804589384, "grad_norm": 0.1894205552886869, "learning_rate": 9.99919892656855e-06, "loss": 0.004, "step": 31960 }, { "epoch": 0.2103247962211272, "grad_norm": 0.08551805721929566, "learning_rate": 9.999188617128071e-06, "loss": 0.0035, "step": 31970 }, { "epoch": 0.2103905843963606, "grad_norm": 0.3225914731186439, "learning_rate": 9.999178241777609e-06, "loss": 0.0041, "step": 31980 }, { "epoch": 0.21045637257159397, "grad_norm": 0.108006733219098, "learning_rate": 9.999167800517298e-06, "loss": 0.0022, "step": 31990 }, { "epoch": 0.21052216074682736, "grad_norm": 0.2169498207154874, "learning_rate": 9.999157293347277e-06, "loss": 0.004, "step": 32000 }, { "epoch": 0.21058794892206076, "grad_norm": 0.18186360197006204, "learning_rate": 9.999146720267685e-06, "loss": 0.0044, "step": 32010 }, { "epoch": 0.21065373709729412, "grad_norm": 0.057081397537386544, "learning_rate": 9.999136081278662e-06, "loss": 0.005, "step": 32020 }, { "epoch": 0.21071952527252752, "grad_norm": 0.26202716682051364, "learning_rate": 9.999125376380346e-06, "loss": 0.0048, "step": 32030 }, { "epoch": 0.21078531344776089, "grad_norm": 0.211955337177323, "learning_rate": 9.999114605572879e-06, "loss": 0.0049, "step": 32040 }, { "epoch": 0.21085110162299428, "grad_norm": 0.15914713760284147, "learning_rate": 9.999103768856406e-06, "loss": 0.0056, "step": 32050 }, { "epoch": 0.21091688979822767, "grad_norm": 0.03683347293562223, "learning_rate": 9.999092866231065e-06, "loss": 0.0029, "step": 32060 }, { "epoch": 0.21098267797346104, "grad_norm": 0.18035184617402256, "learning_rate": 9.999081897697003e-06, "loss": 0.0046, "step": 32070 }, { "epoch": 0.21104846614869444, "grad_norm": 0.04890324438197025, "learning_rate": 9.999070863254364e-06, "loss": 0.0047, "step": 32080 }, { "epoch": 0.21111425432392783, "grad_norm": 0.10647846853335109, "learning_rate": 9.999059762903292e-06, "loss": 0.006, "step": 32090 }, { "epoch": 0.2111800424991612, "grad_norm": 0.17392773160144778, "learning_rate": 9.999048596643935e-06, "loss": 0.0025, "step": 32100 }, { "epoch": 0.2112458306743946, "grad_norm": 0.11339587815730379, "learning_rate": 9.999037364476439e-06, "loss": 0.002, "step": 32110 }, { "epoch": 0.21131161884962796, "grad_norm": 0.1410472835748826, "learning_rate": 9.999026066400954e-06, "loss": 0.0026, "step": 32120 }, { "epoch": 0.21137740702486135, "grad_norm": 0.30180441830655513, "learning_rate": 9.999014702417627e-06, "loss": 0.0045, "step": 32130 }, { "epoch": 0.21144319520009475, "grad_norm": 0.0426252352174955, "learning_rate": 9.999003272526608e-06, "loss": 0.0027, "step": 32140 }, { "epoch": 0.2115089833753281, "grad_norm": 0.12579380085195835, "learning_rate": 9.998991776728051e-06, "loss": 0.0055, "step": 32150 }, { "epoch": 0.2115747715505615, "grad_norm": 0.05618851737896169, "learning_rate": 9.998980215022103e-06, "loss": 0.0035, "step": 32160 }, { "epoch": 0.21164055972579487, "grad_norm": 0.06483683565138959, "learning_rate": 9.998968587408917e-06, "loss": 0.0046, "step": 32170 }, { "epoch": 0.21170634790102827, "grad_norm": 0.10427793694072436, "learning_rate": 9.99895689388865e-06, "loss": 0.0033, "step": 32180 }, { "epoch": 0.21177213607626166, "grad_norm": 0.01779692966938394, "learning_rate": 9.998945134461452e-06, "loss": 0.0026, "step": 32190 }, { "epoch": 0.21183792425149503, "grad_norm": 0.08617730636913958, "learning_rate": 9.99893330912748e-06, "loss": 0.0042, "step": 32200 }, { "epoch": 0.21190371242672842, "grad_norm": 0.13018667340659415, "learning_rate": 9.99892141788689e-06, "loss": 0.0045, "step": 32210 }, { "epoch": 0.2119695006019618, "grad_norm": 0.07402967687802578, "learning_rate": 9.998909460739838e-06, "loss": 0.0021, "step": 32220 }, { "epoch": 0.21203528877719519, "grad_norm": 0.11749649797607287, "learning_rate": 9.998897437686483e-06, "loss": 0.0045, "step": 32230 }, { "epoch": 0.21210107695242858, "grad_norm": 0.1712198475820349, "learning_rate": 9.998885348726982e-06, "loss": 0.0045, "step": 32240 }, { "epoch": 0.21216686512766195, "grad_norm": 0.10324833737668447, "learning_rate": 9.998873193861496e-06, "loss": 0.0041, "step": 32250 }, { "epoch": 0.21223265330289534, "grad_norm": 0.04279923060307339, "learning_rate": 9.998860973090183e-06, "loss": 0.0031, "step": 32260 }, { "epoch": 0.2122984414781287, "grad_norm": 0.05876084850005957, "learning_rate": 9.998848686413204e-06, "loss": 0.0033, "step": 32270 }, { "epoch": 0.2123642296533621, "grad_norm": 0.03466202325930695, "learning_rate": 9.998836333830726e-06, "loss": 0.003, "step": 32280 }, { "epoch": 0.2124300178285955, "grad_norm": 0.17452018619324053, "learning_rate": 9.998823915342906e-06, "loss": 0.0038, "step": 32290 }, { "epoch": 0.21249580600382886, "grad_norm": 0.03013408813268503, "learning_rate": 9.998811430949911e-06, "loss": 0.0038, "step": 32300 }, { "epoch": 0.21256159417906226, "grad_norm": 0.2470191930163621, "learning_rate": 9.998798880651903e-06, "loss": 0.0038, "step": 32310 }, { "epoch": 0.21262738235429565, "grad_norm": 0.06600035155080226, "learning_rate": 9.99878626444905e-06, "loss": 0.0022, "step": 32320 }, { "epoch": 0.21269317052952902, "grad_norm": 0.1158543327022531, "learning_rate": 9.998773582341517e-06, "loss": 0.0048, "step": 32330 }, { "epoch": 0.2127589587047624, "grad_norm": 0.16574826856379354, "learning_rate": 9.998760834329473e-06, "loss": 0.0044, "step": 32340 }, { "epoch": 0.21282474687999578, "grad_norm": 0.07235938331954074, "learning_rate": 9.998748020413083e-06, "loss": 0.0031, "step": 32350 }, { "epoch": 0.21289053505522917, "grad_norm": 0.042416499656715526, "learning_rate": 9.998735140592518e-06, "loss": 0.0038, "step": 32360 }, { "epoch": 0.21295632323046257, "grad_norm": 0.07552320304098509, "learning_rate": 9.998722194867947e-06, "loss": 0.0025, "step": 32370 }, { "epoch": 0.21302211140569594, "grad_norm": 0.07703679825184187, "learning_rate": 9.998709183239541e-06, "loss": 0.0044, "step": 32380 }, { "epoch": 0.21308789958092933, "grad_norm": 0.15178037198477554, "learning_rate": 9.998696105707471e-06, "loss": 0.004, "step": 32390 }, { "epoch": 0.2131536877561627, "grad_norm": 0.19079133420046926, "learning_rate": 9.998682962271912e-06, "loss": 0.0025, "step": 32400 }, { "epoch": 0.2132194759313961, "grad_norm": 0.1556315112800698, "learning_rate": 9.998669752933035e-06, "loss": 0.0035, "step": 32410 }, { "epoch": 0.21328526410662949, "grad_norm": 0.10614626777325516, "learning_rate": 9.998656477691013e-06, "loss": 0.0035, "step": 32420 }, { "epoch": 0.21335105228186285, "grad_norm": 0.11296882931059059, "learning_rate": 9.998643136546023e-06, "loss": 0.0036, "step": 32430 }, { "epoch": 0.21341684045709625, "grad_norm": 0.1426097987420839, "learning_rate": 9.99862972949824e-06, "loss": 0.0032, "step": 32440 }, { "epoch": 0.2134826286323296, "grad_norm": 0.2192065707704331, "learning_rate": 9.998616256547841e-06, "loss": 0.0034, "step": 32450 }, { "epoch": 0.213548416807563, "grad_norm": 0.034757071784460254, "learning_rate": 9.998602717695006e-06, "loss": 0.0037, "step": 32460 }, { "epoch": 0.2136142049827964, "grad_norm": 0.1317366136834194, "learning_rate": 9.998589112939908e-06, "loss": 0.0048, "step": 32470 }, { "epoch": 0.21367999315802977, "grad_norm": 0.0893682957386439, "learning_rate": 9.998575442282732e-06, "loss": 0.0023, "step": 32480 }, { "epoch": 0.21374578133326316, "grad_norm": 0.09969244452070355, "learning_rate": 9.998561705723655e-06, "loss": 0.0038, "step": 32490 }, { "epoch": 0.21381156950849653, "grad_norm": 0.18488229640484327, "learning_rate": 9.998547903262858e-06, "loss": 0.0046, "step": 32500 }, { "epoch": 0.21387735768372992, "grad_norm": 0.0016840059612588428, "learning_rate": 9.998534034900524e-06, "loss": 0.0023, "step": 32510 }, { "epoch": 0.21394314585896332, "grad_norm": 0.704453435595484, "learning_rate": 9.998520100636837e-06, "loss": 0.003, "step": 32520 }, { "epoch": 0.21400893403419669, "grad_norm": 0.1759073001383765, "learning_rate": 9.998506100471981e-06, "loss": 0.0021, "step": 32530 }, { "epoch": 0.21407472220943008, "grad_norm": 0.20625460277937255, "learning_rate": 9.998492034406135e-06, "loss": 0.0038, "step": 32540 }, { "epoch": 0.21414051038466347, "grad_norm": 0.16208888396316445, "learning_rate": 9.998477902439491e-06, "loss": 0.0019, "step": 32550 }, { "epoch": 0.21420629855989684, "grad_norm": 0.11156607093061574, "learning_rate": 9.998463704572234e-06, "loss": 0.0034, "step": 32560 }, { "epoch": 0.21427208673513023, "grad_norm": 0.2593721834526594, "learning_rate": 9.998449440804548e-06, "loss": 0.0032, "step": 32570 }, { "epoch": 0.2143378749103636, "grad_norm": 0.21261284481704015, "learning_rate": 9.998435111136625e-06, "loss": 0.0032, "step": 32580 }, { "epoch": 0.214403663085597, "grad_norm": 0.06108001297544291, "learning_rate": 9.998420715568649e-06, "loss": 0.0029, "step": 32590 }, { "epoch": 0.2144694512608304, "grad_norm": 0.10797154954354539, "learning_rate": 9.998406254100815e-06, "loss": 0.0049, "step": 32600 }, { "epoch": 0.21453523943606376, "grad_norm": 0.06260015164548031, "learning_rate": 9.998391726733309e-06, "loss": 0.004, "step": 32610 }, { "epoch": 0.21460102761129715, "grad_norm": 0.02308163602465497, "learning_rate": 9.998377133466329e-06, "loss": 0.0045, "step": 32620 }, { "epoch": 0.21466681578653052, "grad_norm": 0.11729661699518826, "learning_rate": 9.998362474300061e-06, "loss": 0.0028, "step": 32630 }, { "epoch": 0.2147326039617639, "grad_norm": 0.37178899462612, "learning_rate": 9.9983477492347e-06, "loss": 0.0049, "step": 32640 }, { "epoch": 0.2147983921369973, "grad_norm": 0.1742146954622468, "learning_rate": 9.998332958270441e-06, "loss": 0.0026, "step": 32650 }, { "epoch": 0.21486418031223067, "grad_norm": 0.17481483499962333, "learning_rate": 9.99831810140748e-06, "loss": 0.0067, "step": 32660 }, { "epoch": 0.21492996848746407, "grad_norm": 0.07087073122894544, "learning_rate": 9.99830317864601e-06, "loss": 0.0023, "step": 32670 }, { "epoch": 0.21499575666269743, "grad_norm": 0.06491258547176823, "learning_rate": 9.998288189986232e-06, "loss": 0.0059, "step": 32680 }, { "epoch": 0.21506154483793083, "grad_norm": 0.19458817025680067, "learning_rate": 9.998273135428339e-06, "loss": 0.0055, "step": 32690 }, { "epoch": 0.21512733301316422, "grad_norm": 0.09433734256682726, "learning_rate": 9.998258014972531e-06, "loss": 0.007, "step": 32700 }, { "epoch": 0.2151931211883976, "grad_norm": 0.024998586784121774, "learning_rate": 9.998242828619008e-06, "loss": 0.0037, "step": 32710 }, { "epoch": 0.21525890936363098, "grad_norm": 0.06344020101975321, "learning_rate": 9.998227576367971e-06, "loss": 0.004, "step": 32720 }, { "epoch": 0.21532469753886435, "grad_norm": 0.0982211639521692, "learning_rate": 9.998212258219622e-06, "loss": 0.0037, "step": 32730 }, { "epoch": 0.21539048571409775, "grad_norm": 0.21667223750056214, "learning_rate": 9.998196874174159e-06, "loss": 0.0054, "step": 32740 }, { "epoch": 0.21545627388933114, "grad_norm": 0.42301617593037893, "learning_rate": 9.998181424231786e-06, "loss": 0.0035, "step": 32750 }, { "epoch": 0.2155220620645645, "grad_norm": 0.18120415397588568, "learning_rate": 9.99816590839271e-06, "loss": 0.0042, "step": 32760 }, { "epoch": 0.2155878502397979, "grad_norm": 0.08348122111694675, "learning_rate": 9.998150326657131e-06, "loss": 0.0031, "step": 32770 }, { "epoch": 0.2156536384150313, "grad_norm": 0.13780418772930275, "learning_rate": 9.998134679025258e-06, "loss": 0.0024, "step": 32780 }, { "epoch": 0.21571942659026466, "grad_norm": 0.04839080277969805, "learning_rate": 9.998118965497296e-06, "loss": 0.0034, "step": 32790 }, { "epoch": 0.21578521476549806, "grad_norm": 0.058201108468101434, "learning_rate": 9.998103186073452e-06, "loss": 0.0033, "step": 32800 }, { "epoch": 0.21585100294073142, "grad_norm": 0.04932914112935615, "learning_rate": 9.998087340753932e-06, "loss": 0.0021, "step": 32810 }, { "epoch": 0.21591679111596482, "grad_norm": 0.07307338515653344, "learning_rate": 9.99807142953895e-06, "loss": 0.0035, "step": 32820 }, { "epoch": 0.2159825792911982, "grad_norm": 0.1391717405942338, "learning_rate": 9.99805545242871e-06, "loss": 0.003, "step": 32830 }, { "epoch": 0.21604836746643158, "grad_norm": 0.13109264086191025, "learning_rate": 9.998039409423427e-06, "loss": 0.0029, "step": 32840 }, { "epoch": 0.21611415564166497, "grad_norm": 0.2078950261047603, "learning_rate": 9.998023300523311e-06, "loss": 0.0044, "step": 32850 }, { "epoch": 0.21617994381689834, "grad_norm": 0.08489289142842572, "learning_rate": 9.998007125728576e-06, "loss": 0.0021, "step": 32860 }, { "epoch": 0.21624573199213173, "grad_norm": 0.22670384575533478, "learning_rate": 9.997990885039432e-06, "loss": 0.0038, "step": 32870 }, { "epoch": 0.21631152016736513, "grad_norm": 0.2769398622745546, "learning_rate": 9.997974578456093e-06, "loss": 0.0033, "step": 32880 }, { "epoch": 0.2163773083425985, "grad_norm": 0.10242207555776492, "learning_rate": 9.997958205978778e-06, "loss": 0.0026, "step": 32890 }, { "epoch": 0.2164430965178319, "grad_norm": 0.13643816309610987, "learning_rate": 9.9979417676077e-06, "loss": 0.0075, "step": 32900 }, { "epoch": 0.21650888469306526, "grad_norm": 0.08455781502001804, "learning_rate": 9.997925263343077e-06, "loss": 0.0037, "step": 32910 }, { "epoch": 0.21657467286829865, "grad_norm": 0.09126162883133977, "learning_rate": 9.997908693185125e-06, "loss": 0.0032, "step": 32920 }, { "epoch": 0.21664046104353205, "grad_norm": 0.30665065672572106, "learning_rate": 9.997892057134062e-06, "loss": 0.0037, "step": 32930 }, { "epoch": 0.2167062492187654, "grad_norm": 0.007623301909152741, "learning_rate": 9.997875355190112e-06, "loss": 0.0029, "step": 32940 }, { "epoch": 0.2167720373939988, "grad_norm": 0.06484126428210936, "learning_rate": 9.997858587353489e-06, "loss": 0.0037, "step": 32950 }, { "epoch": 0.21683782556923217, "grad_norm": 0.1799963850492614, "learning_rate": 9.997841753624419e-06, "loss": 0.0055, "step": 32960 }, { "epoch": 0.21690361374446557, "grad_norm": 0.1442221166271672, "learning_rate": 9.99782485400312e-06, "loss": 0.0024, "step": 32970 }, { "epoch": 0.21696940191969896, "grad_norm": 0.1618552976103416, "learning_rate": 9.99780788848982e-06, "loss": 0.0045, "step": 32980 }, { "epoch": 0.21703519009493233, "grad_norm": 0.06365690303065918, "learning_rate": 9.997790857084735e-06, "loss": 0.0024, "step": 32990 }, { "epoch": 0.21710097827016572, "grad_norm": 0.1182045090448073, "learning_rate": 9.997773759788097e-06, "loss": 0.0043, "step": 33000 }, { "epoch": 0.21716676644539912, "grad_norm": 0.0715579369467523, "learning_rate": 9.997756596600127e-06, "loss": 0.0039, "step": 33010 }, { "epoch": 0.21723255462063248, "grad_norm": 0.1251643156616106, "learning_rate": 9.997739367521053e-06, "loss": 0.0029, "step": 33020 }, { "epoch": 0.21729834279586588, "grad_norm": 0.11079210559413288, "learning_rate": 9.997722072551102e-06, "loss": 0.0021, "step": 33030 }, { "epoch": 0.21736413097109925, "grad_norm": 0.25991513872934024, "learning_rate": 9.997704711690501e-06, "loss": 0.0036, "step": 33040 }, { "epoch": 0.21742991914633264, "grad_norm": 0.054107693441922886, "learning_rate": 9.99768728493948e-06, "loss": 0.0037, "step": 33050 }, { "epoch": 0.21749570732156603, "grad_norm": 0.13429914070622237, "learning_rate": 9.997669792298269e-06, "loss": 0.0044, "step": 33060 }, { "epoch": 0.2175614954967994, "grad_norm": 0.12064810990763175, "learning_rate": 9.997652233767097e-06, "loss": 0.0038, "step": 33070 }, { "epoch": 0.2176272836720328, "grad_norm": 0.17389650446507202, "learning_rate": 9.997634609346197e-06, "loss": 0.0033, "step": 33080 }, { "epoch": 0.21769307184726616, "grad_norm": 0.06765386569865965, "learning_rate": 9.997616919035802e-06, "loss": 0.0024, "step": 33090 }, { "epoch": 0.21775886002249956, "grad_norm": 0.19576103912119386, "learning_rate": 9.997599162836143e-06, "loss": 0.0038, "step": 33100 }, { "epoch": 0.21782464819773295, "grad_norm": 0.07950075837069008, "learning_rate": 9.997581340747455e-06, "loss": 0.0023, "step": 33110 }, { "epoch": 0.21789043637296632, "grad_norm": 0.025117242443464922, "learning_rate": 9.997563452769974e-06, "loss": 0.0026, "step": 33120 }, { "epoch": 0.2179562245481997, "grad_norm": 0.19837882752128436, "learning_rate": 9.997545498903936e-06, "loss": 0.0045, "step": 33130 }, { "epoch": 0.21802201272343308, "grad_norm": 0.10261511968632635, "learning_rate": 9.997527479149575e-06, "loss": 0.0027, "step": 33140 }, { "epoch": 0.21808780089866647, "grad_norm": 0.07545148745411495, "learning_rate": 9.99750939350713e-06, "loss": 0.0058, "step": 33150 }, { "epoch": 0.21815358907389987, "grad_norm": 0.14335744469111242, "learning_rate": 9.997491241976841e-06, "loss": 0.0058, "step": 33160 }, { "epoch": 0.21821937724913323, "grad_norm": 0.06868652072206623, "learning_rate": 9.997473024558946e-06, "loss": 0.0028, "step": 33170 }, { "epoch": 0.21828516542436663, "grad_norm": 0.07254580667951972, "learning_rate": 9.997454741253685e-06, "loss": 0.0042, "step": 33180 }, { "epoch": 0.2183509535996, "grad_norm": 0.08063985576252723, "learning_rate": 9.997436392061299e-06, "loss": 0.0042, "step": 33190 }, { "epoch": 0.2184167417748334, "grad_norm": 0.08392907117414863, "learning_rate": 9.997417976982032e-06, "loss": 0.0021, "step": 33200 }, { "epoch": 0.21848252995006678, "grad_norm": 0.04849915179566784, "learning_rate": 9.997399496016122e-06, "loss": 0.0023, "step": 33210 }, { "epoch": 0.21854831812530015, "grad_norm": 0.13012490919883213, "learning_rate": 9.997380949163817e-06, "loss": 0.0018, "step": 33220 }, { "epoch": 0.21861410630053354, "grad_norm": 0.0342169781670365, "learning_rate": 9.997362336425362e-06, "loss": 0.0025, "step": 33230 }, { "epoch": 0.21867989447576694, "grad_norm": 0.1301256489716107, "learning_rate": 9.997343657800999e-06, "loss": 0.0031, "step": 33240 }, { "epoch": 0.2187456826510003, "grad_norm": 0.46790564404531093, "learning_rate": 9.997324913290975e-06, "loss": 0.0031, "step": 33250 }, { "epoch": 0.2188114708262337, "grad_norm": 0.1337546241557902, "learning_rate": 9.99730610289554e-06, "loss": 0.0019, "step": 33260 }, { "epoch": 0.21887725900146707, "grad_norm": 0.04132830311441393, "learning_rate": 9.997287226614938e-06, "loss": 0.0024, "step": 33270 }, { "epoch": 0.21894304717670046, "grad_norm": 0.12151416353545852, "learning_rate": 9.997268284449421e-06, "loss": 0.0029, "step": 33280 }, { "epoch": 0.21900883535193386, "grad_norm": 0.22576575154971398, "learning_rate": 9.997249276399236e-06, "loss": 0.0033, "step": 33290 }, { "epoch": 0.21907462352716722, "grad_norm": 0.18810759067424912, "learning_rate": 9.997230202464639e-06, "loss": 0.0031, "step": 33300 }, { "epoch": 0.21914041170240062, "grad_norm": 0.2037839667095659, "learning_rate": 9.997211062645874e-06, "loss": 0.0037, "step": 33310 }, { "epoch": 0.21920619987763398, "grad_norm": 0.12720471338506184, "learning_rate": 9.997191856943197e-06, "loss": 0.0028, "step": 33320 }, { "epoch": 0.21927198805286738, "grad_norm": 0.24747512575887098, "learning_rate": 9.997172585356865e-06, "loss": 0.0074, "step": 33330 }, { "epoch": 0.21933777622810077, "grad_norm": 0.22465834641595908, "learning_rate": 9.997153247887124e-06, "loss": 0.0038, "step": 33340 }, { "epoch": 0.21940356440333414, "grad_norm": 0.16532786129210933, "learning_rate": 9.997133844534235e-06, "loss": 0.0036, "step": 33350 }, { "epoch": 0.21946935257856753, "grad_norm": 0.09052887521249883, "learning_rate": 9.997114375298453e-06, "loss": 0.0037, "step": 33360 }, { "epoch": 0.2195351407538009, "grad_norm": 0.18999040774531653, "learning_rate": 9.997094840180032e-06, "loss": 0.004, "step": 33370 }, { "epoch": 0.2196009289290343, "grad_norm": 0.18138843673413788, "learning_rate": 9.997075239179232e-06, "loss": 0.0041, "step": 33380 }, { "epoch": 0.2196667171042677, "grad_norm": 0.12954530448491358, "learning_rate": 9.997055572296313e-06, "loss": 0.0042, "step": 33390 }, { "epoch": 0.21973250527950106, "grad_norm": 0.01812347439234837, "learning_rate": 9.997035839531529e-06, "loss": 0.0028, "step": 33400 }, { "epoch": 0.21979829345473445, "grad_norm": 0.19638840634911395, "learning_rate": 9.997016040885143e-06, "loss": 0.0046, "step": 33410 }, { "epoch": 0.21986408162996782, "grad_norm": 0.12183209716192707, "learning_rate": 9.996996176357418e-06, "loss": 0.0062, "step": 33420 }, { "epoch": 0.2199298698052012, "grad_norm": 0.31811942082664596, "learning_rate": 9.996976245948614e-06, "loss": 0.004, "step": 33430 }, { "epoch": 0.2199956579804346, "grad_norm": 0.056496474223109855, "learning_rate": 9.996956249658993e-06, "loss": 0.0034, "step": 33440 }, { "epoch": 0.22006144615566797, "grad_norm": 0.08834110943232538, "learning_rate": 9.99693618748882e-06, "loss": 0.0028, "step": 33450 }, { "epoch": 0.22012723433090137, "grad_norm": 0.15515818562115163, "learning_rate": 9.996916059438358e-06, "loss": 0.0022, "step": 33460 }, { "epoch": 0.22019302250613473, "grad_norm": 0.13960817212627813, "learning_rate": 9.996895865507875e-06, "loss": 0.0017, "step": 33470 }, { "epoch": 0.22025881068136813, "grad_norm": 0.0894756681282692, "learning_rate": 9.996875605697635e-06, "loss": 0.0027, "step": 33480 }, { "epoch": 0.22032459885660152, "grad_norm": 0.11950978979033197, "learning_rate": 9.996855280007904e-06, "loss": 0.0033, "step": 33490 }, { "epoch": 0.2203903870318349, "grad_norm": 0.0016011247374633699, "learning_rate": 9.996834888438954e-06, "loss": 0.0026, "step": 33500 }, { "epoch": 0.22045617520706828, "grad_norm": 0.17819827190264645, "learning_rate": 9.99681443099105e-06, "loss": 0.0056, "step": 33510 }, { "epoch": 0.22052196338230168, "grad_norm": 0.1598319892048985, "learning_rate": 9.996793907664465e-06, "loss": 0.0039, "step": 33520 }, { "epoch": 0.22058775155753504, "grad_norm": 0.19325467101125454, "learning_rate": 9.996773318459467e-06, "loss": 0.0056, "step": 33530 }, { "epoch": 0.22065353973276844, "grad_norm": 0.09377033682409923, "learning_rate": 9.996752663376327e-06, "loss": 0.0032, "step": 33540 }, { "epoch": 0.2207193279080018, "grad_norm": 0.0579090071977364, "learning_rate": 9.99673194241532e-06, "loss": 0.0027, "step": 33550 }, { "epoch": 0.2207851160832352, "grad_norm": 0.010748720544499045, "learning_rate": 9.996711155576719e-06, "loss": 0.0026, "step": 33560 }, { "epoch": 0.2208509042584686, "grad_norm": 0.0560061550277282, "learning_rate": 9.996690302860794e-06, "loss": 0.0023, "step": 33570 }, { "epoch": 0.22091669243370196, "grad_norm": 0.09917916177661874, "learning_rate": 9.996669384267825e-06, "loss": 0.0029, "step": 33580 }, { "epoch": 0.22098248060893536, "grad_norm": 0.20073336644368192, "learning_rate": 9.996648399798084e-06, "loss": 0.0045, "step": 33590 }, { "epoch": 0.22104826878416872, "grad_norm": 0.18711276688010994, "learning_rate": 9.99662734945185e-06, "loss": 0.0028, "step": 33600 }, { "epoch": 0.22111405695940212, "grad_norm": 0.12771792121898867, "learning_rate": 9.9966062332294e-06, "loss": 0.0014, "step": 33610 }, { "epoch": 0.2211798451346355, "grad_norm": 0.06715628974847454, "learning_rate": 9.99658505113101e-06, "loss": 0.0036, "step": 33620 }, { "epoch": 0.22124563330986888, "grad_norm": 0.1277519425151319, "learning_rate": 9.996563803156966e-06, "loss": 0.0037, "step": 33630 }, { "epoch": 0.22131142148510227, "grad_norm": 0.0819120027235128, "learning_rate": 9.99654248930754e-06, "loss": 0.0031, "step": 33640 }, { "epoch": 0.22137720966033564, "grad_norm": 0.20285773260990586, "learning_rate": 9.996521109583018e-06, "loss": 0.0015, "step": 33650 }, { "epoch": 0.22144299783556903, "grad_norm": 0.11189583029923682, "learning_rate": 9.99649966398368e-06, "loss": 0.0033, "step": 33660 }, { "epoch": 0.22150878601080243, "grad_norm": 0.10519776369695795, "learning_rate": 9.99647815250981e-06, "loss": 0.0028, "step": 33670 }, { "epoch": 0.2215745741860358, "grad_norm": 0.06556277294603728, "learning_rate": 9.99645657516169e-06, "loss": 0.0036, "step": 33680 }, { "epoch": 0.2216403623612692, "grad_norm": 0.1224884219990497, "learning_rate": 9.996434931939605e-06, "loss": 0.0038, "step": 33690 }, { "epoch": 0.22170615053650256, "grad_norm": 0.12841844290830606, "learning_rate": 9.99641322284384e-06, "loss": 0.0016, "step": 33700 }, { "epoch": 0.22177193871173595, "grad_norm": 0.22004388933303506, "learning_rate": 9.996391447874683e-06, "loss": 0.0045, "step": 33710 }, { "epoch": 0.22183772688696934, "grad_norm": 0.08187633985754708, "learning_rate": 9.99636960703242e-06, "loss": 0.0027, "step": 33720 }, { "epoch": 0.2219035150622027, "grad_norm": 0.1911036639740433, "learning_rate": 9.996347700317338e-06, "loss": 0.0039, "step": 33730 }, { "epoch": 0.2219693032374361, "grad_norm": 0.0914122069451377, "learning_rate": 9.996325727729728e-06, "loss": 0.0067, "step": 33740 }, { "epoch": 0.2220350914126695, "grad_norm": 0.06379068988401915, "learning_rate": 9.996303689269877e-06, "loss": 0.0024, "step": 33750 }, { "epoch": 0.22210087958790287, "grad_norm": 0.09856730938960016, "learning_rate": 9.996281584938078e-06, "loss": 0.007, "step": 33760 }, { "epoch": 0.22216666776313626, "grad_norm": 0.08958955084912638, "learning_rate": 9.99625941473462e-06, "loss": 0.0041, "step": 33770 }, { "epoch": 0.22223245593836963, "grad_norm": 0.1327221500655154, "learning_rate": 9.996237178659798e-06, "loss": 0.0038, "step": 33780 }, { "epoch": 0.22229824411360302, "grad_norm": 0.1559564169961025, "learning_rate": 9.996214876713903e-06, "loss": 0.0037, "step": 33790 }, { "epoch": 0.22236403228883642, "grad_norm": 0.1141194919349871, "learning_rate": 9.996192508897231e-06, "loss": 0.0049, "step": 33800 }, { "epoch": 0.22242982046406978, "grad_norm": 0.017172917600852232, "learning_rate": 9.996170075210074e-06, "loss": 0.0039, "step": 33810 }, { "epoch": 0.22249560863930318, "grad_norm": 0.2420330570016687, "learning_rate": 9.99614757565273e-06, "loss": 0.0021, "step": 33820 }, { "epoch": 0.22256139681453654, "grad_norm": 0.1139134505415527, "learning_rate": 9.996125010225498e-06, "loss": 0.003, "step": 33830 }, { "epoch": 0.22262718498976994, "grad_norm": 0.15602326152299106, "learning_rate": 9.996102378928669e-06, "loss": 0.0036, "step": 33840 }, { "epoch": 0.22269297316500333, "grad_norm": 0.11601640294612474, "learning_rate": 9.996079681762547e-06, "loss": 0.0039, "step": 33850 }, { "epoch": 0.2227587613402367, "grad_norm": 0.08154004027343029, "learning_rate": 9.996056918727427e-06, "loss": 0.0024, "step": 33860 }, { "epoch": 0.2228245495154701, "grad_norm": 0.10701276112107848, "learning_rate": 9.996034089823614e-06, "loss": 0.005, "step": 33870 }, { "epoch": 0.22289033769070346, "grad_norm": 0.07106220273285015, "learning_rate": 9.996011195051405e-06, "loss": 0.0037, "step": 33880 }, { "epoch": 0.22295612586593686, "grad_norm": 0.20886727687739023, "learning_rate": 9.995988234411104e-06, "loss": 0.004, "step": 33890 }, { "epoch": 0.22302191404117025, "grad_norm": 0.06581565892873362, "learning_rate": 9.995965207903014e-06, "loss": 0.0026, "step": 33900 }, { "epoch": 0.22308770221640362, "grad_norm": 0.11965793400199812, "learning_rate": 9.995942115527435e-06, "loss": 0.003, "step": 33910 }, { "epoch": 0.223153490391637, "grad_norm": 0.18845698112671316, "learning_rate": 9.995918957284676e-06, "loss": 0.0029, "step": 33920 }, { "epoch": 0.22321927856687038, "grad_norm": 0.1346017655042729, "learning_rate": 9.99589573317504e-06, "loss": 0.0038, "step": 33930 }, { "epoch": 0.22328506674210377, "grad_norm": 0.1286438192505771, "learning_rate": 9.995872443198833e-06, "loss": 0.0046, "step": 33940 }, { "epoch": 0.22335085491733717, "grad_norm": 0.2927564399812979, "learning_rate": 9.995849087356362e-06, "loss": 0.0035, "step": 33950 }, { "epoch": 0.22341664309257053, "grad_norm": 0.10578726112372652, "learning_rate": 9.995825665647936e-06, "loss": 0.0036, "step": 33960 }, { "epoch": 0.22348243126780393, "grad_norm": 0.12101306604299557, "learning_rate": 9.995802178073863e-06, "loss": 0.0037, "step": 33970 }, { "epoch": 0.22354821944303732, "grad_norm": 0.07306007068119792, "learning_rate": 9.995778624634454e-06, "loss": 0.0049, "step": 33980 }, { "epoch": 0.2236140076182707, "grad_norm": 0.09972523411015191, "learning_rate": 9.995755005330018e-06, "loss": 0.0046, "step": 33990 }, { "epoch": 0.22367979579350408, "grad_norm": 0.09674293362934078, "learning_rate": 9.995731320160866e-06, "loss": 0.0038, "step": 34000 }, { "epoch": 0.22374558396873745, "grad_norm": 0.043492650118697046, "learning_rate": 9.995707569127311e-06, "loss": 0.0017, "step": 34010 }, { "epoch": 0.22381137214397084, "grad_norm": 0.2063887172406962, "learning_rate": 9.995683752229667e-06, "loss": 0.0039, "step": 34020 }, { "epoch": 0.22387716031920424, "grad_norm": 0.12435170396622176, "learning_rate": 9.995659869468249e-06, "loss": 0.0048, "step": 34030 }, { "epoch": 0.2239429484944376, "grad_norm": 0.09410157980131514, "learning_rate": 9.995635920843366e-06, "loss": 0.0033, "step": 34040 }, { "epoch": 0.224008736669671, "grad_norm": 0.11248429295341952, "learning_rate": 9.995611906355343e-06, "loss": 0.0034, "step": 34050 }, { "epoch": 0.22407452484490437, "grad_norm": 0.04814199608572426, "learning_rate": 9.99558782600449e-06, "loss": 0.0039, "step": 34060 }, { "epoch": 0.22414031302013776, "grad_norm": 0.20989131031030225, "learning_rate": 9.995563679791123e-06, "loss": 0.008, "step": 34070 }, { "epoch": 0.22420610119537115, "grad_norm": 0.14724447706173052, "learning_rate": 9.995539467715565e-06, "loss": 0.0039, "step": 34080 }, { "epoch": 0.22427188937060452, "grad_norm": 0.0884465209410513, "learning_rate": 9.995515189778134e-06, "loss": 0.0029, "step": 34090 }, { "epoch": 0.22433767754583792, "grad_norm": 0.015758418827286708, "learning_rate": 9.99549084597915e-06, "loss": 0.0026, "step": 34100 }, { "epoch": 0.22440346572107128, "grad_norm": 0.034773983479282226, "learning_rate": 9.995466436318933e-06, "loss": 0.0042, "step": 34110 }, { "epoch": 0.22446925389630468, "grad_norm": 0.1886942803615187, "learning_rate": 9.995441960797805e-06, "loss": 0.0038, "step": 34120 }, { "epoch": 0.22453504207153807, "grad_norm": 0.0734743243353409, "learning_rate": 9.99541741941609e-06, "loss": 0.0043, "step": 34130 }, { "epoch": 0.22460083024677144, "grad_norm": 0.1369189614359996, "learning_rate": 9.99539281217411e-06, "loss": 0.0051, "step": 34140 }, { "epoch": 0.22466661842200483, "grad_norm": 0.13928406368357593, "learning_rate": 9.99536813907219e-06, "loss": 0.0022, "step": 34150 }, { "epoch": 0.2247324065972382, "grad_norm": 0.27392819293798104, "learning_rate": 9.995343400110656e-06, "loss": 0.0037, "step": 34160 }, { "epoch": 0.2247981947724716, "grad_norm": 0.10793187983132117, "learning_rate": 9.995318595289832e-06, "loss": 0.0051, "step": 34170 }, { "epoch": 0.224863982947705, "grad_norm": 0.1675717510785211, "learning_rate": 9.995293724610047e-06, "loss": 0.0039, "step": 34180 }, { "epoch": 0.22492977112293835, "grad_norm": 0.08259251499962188, "learning_rate": 9.99526878807163e-06, "loss": 0.0026, "step": 34190 }, { "epoch": 0.22499555929817175, "grad_norm": 0.11415029834196097, "learning_rate": 9.995243785674907e-06, "loss": 0.0039, "step": 34200 }, { "epoch": 0.22506134747340514, "grad_norm": 0.023736503118109743, "learning_rate": 9.99521871742021e-06, "loss": 0.0022, "step": 34210 }, { "epoch": 0.2251271356486385, "grad_norm": 0.300312769762668, "learning_rate": 9.995193583307867e-06, "loss": 0.0032, "step": 34220 }, { "epoch": 0.2251929238238719, "grad_norm": 0.15599766739986348, "learning_rate": 9.995168383338212e-06, "loss": 0.0049, "step": 34230 }, { "epoch": 0.22525871199910527, "grad_norm": 0.2074691921644045, "learning_rate": 9.995143117511574e-06, "loss": 0.0025, "step": 34240 }, { "epoch": 0.22532450017433867, "grad_norm": 0.24005421507448618, "learning_rate": 9.99511778582829e-06, "loss": 0.0041, "step": 34250 }, { "epoch": 0.22539028834957206, "grad_norm": 0.14670733262974262, "learning_rate": 9.995092388288692e-06, "loss": 0.0034, "step": 34260 }, { "epoch": 0.22545607652480543, "grad_norm": 0.06422946730260393, "learning_rate": 9.995066924893114e-06, "loss": 0.0041, "step": 34270 }, { "epoch": 0.22552186470003882, "grad_norm": 0.09994971484754163, "learning_rate": 9.995041395641894e-06, "loss": 0.0057, "step": 34280 }, { "epoch": 0.2255876528752722, "grad_norm": 0.08109531666748689, "learning_rate": 9.995015800535366e-06, "loss": 0.003, "step": 34290 }, { "epoch": 0.22565344105050558, "grad_norm": 0.1960300750441103, "learning_rate": 9.994990139573868e-06, "loss": 0.0056, "step": 34300 }, { "epoch": 0.22571922922573898, "grad_norm": 0.06329239876510276, "learning_rate": 9.994964412757739e-06, "loss": 0.0032, "step": 34310 }, { "epoch": 0.22578501740097234, "grad_norm": 0.08289771173471544, "learning_rate": 9.99493862008732e-06, "loss": 0.002, "step": 34320 }, { "epoch": 0.22585080557620574, "grad_norm": 0.19440119574910927, "learning_rate": 9.994912761562946e-06, "loss": 0.0034, "step": 34330 }, { "epoch": 0.2259165937514391, "grad_norm": 0.08868895498606628, "learning_rate": 9.994886837184964e-06, "loss": 0.0031, "step": 34340 }, { "epoch": 0.2259823819266725, "grad_norm": 0.19504822371223213, "learning_rate": 9.994860846953713e-06, "loss": 0.0037, "step": 34350 }, { "epoch": 0.2260481701019059, "grad_norm": 0.2299181394490415, "learning_rate": 9.994834790869534e-06, "loss": 0.0044, "step": 34360 }, { "epoch": 0.22611395827713926, "grad_norm": 0.19963204975805146, "learning_rate": 9.994808668932774e-06, "loss": 0.0044, "step": 34370 }, { "epoch": 0.22617974645237265, "grad_norm": 0.11433085316912048, "learning_rate": 9.994782481143774e-06, "loss": 0.0034, "step": 34380 }, { "epoch": 0.22624553462760602, "grad_norm": 0.2570142744081518, "learning_rate": 9.994756227502883e-06, "loss": 0.0087, "step": 34390 }, { "epoch": 0.22631132280283942, "grad_norm": 0.07407448536025679, "learning_rate": 9.994729908010441e-06, "loss": 0.0016, "step": 34400 }, { "epoch": 0.2263771109780728, "grad_norm": 0.23234712572852487, "learning_rate": 9.9947035226668e-06, "loss": 0.0037, "step": 34410 }, { "epoch": 0.22644289915330618, "grad_norm": 0.4010073649346825, "learning_rate": 9.994677071472308e-06, "loss": 0.0033, "step": 34420 }, { "epoch": 0.22650868732853957, "grad_norm": 0.08569999035148781, "learning_rate": 9.994650554427314e-06, "loss": 0.0038, "step": 34430 }, { "epoch": 0.22657447550377297, "grad_norm": 0.10904332653759878, "learning_rate": 9.994623971532163e-06, "loss": 0.0023, "step": 34440 }, { "epoch": 0.22664026367900633, "grad_norm": 0.4359464495697641, "learning_rate": 9.99459732278721e-06, "loss": 0.004, "step": 34450 }, { "epoch": 0.22670605185423973, "grad_norm": 0.1197895122252049, "learning_rate": 9.994570608192804e-06, "loss": 0.0044, "step": 34460 }, { "epoch": 0.2267718400294731, "grad_norm": 0.22838200231215441, "learning_rate": 9.994543827749299e-06, "loss": 0.0041, "step": 34470 }, { "epoch": 0.2268376282047065, "grad_norm": 0.20911197378047763, "learning_rate": 9.994516981457048e-06, "loss": 0.0036, "step": 34480 }, { "epoch": 0.22690341637993988, "grad_norm": 0.07071767015104265, "learning_rate": 9.994490069316402e-06, "loss": 0.0036, "step": 34490 }, { "epoch": 0.22696920455517325, "grad_norm": 0.1998158319276901, "learning_rate": 9.99446309132772e-06, "loss": 0.0026, "step": 34500 }, { "epoch": 0.22703499273040664, "grad_norm": 0.17834225146688795, "learning_rate": 9.994436047491354e-06, "loss": 0.0032, "step": 34510 }, { "epoch": 0.22710078090564, "grad_norm": 0.023633203296553402, "learning_rate": 9.994408937807663e-06, "loss": 0.0029, "step": 34520 }, { "epoch": 0.2271665690808734, "grad_norm": 0.02511740994517891, "learning_rate": 9.994381762277004e-06, "loss": 0.003, "step": 34530 }, { "epoch": 0.2272323572561068, "grad_norm": 0.11869726603918643, "learning_rate": 9.994354520899734e-06, "loss": 0.0024, "step": 34540 }, { "epoch": 0.22729814543134017, "grad_norm": 0.30953943171864984, "learning_rate": 9.994327213676215e-06, "loss": 0.0039, "step": 34550 }, { "epoch": 0.22736393360657356, "grad_norm": 0.2474202752219638, "learning_rate": 9.994299840606802e-06, "loss": 0.0061, "step": 34560 }, { "epoch": 0.22742972178180693, "grad_norm": 0.2049396486732969, "learning_rate": 9.994272401691862e-06, "loss": 0.0053, "step": 34570 }, { "epoch": 0.22749550995704032, "grad_norm": 0.23476069627146803, "learning_rate": 9.994244896931753e-06, "loss": 0.002, "step": 34580 }, { "epoch": 0.22756129813227371, "grad_norm": 0.1326732419937406, "learning_rate": 9.994217326326836e-06, "loss": 0.0022, "step": 34590 }, { "epoch": 0.22762708630750708, "grad_norm": 0.2554503701671322, "learning_rate": 9.99418968987748e-06, "loss": 0.0028, "step": 34600 }, { "epoch": 0.22769287448274048, "grad_norm": 0.09739699785648694, "learning_rate": 9.994161987584043e-06, "loss": 0.0026, "step": 34610 }, { "epoch": 0.22775866265797384, "grad_norm": 0.11574002537530391, "learning_rate": 9.994134219446897e-06, "loss": 0.0025, "step": 34620 }, { "epoch": 0.22782445083320724, "grad_norm": 0.26881597692940273, "learning_rate": 9.994106385466403e-06, "loss": 0.0034, "step": 34630 }, { "epoch": 0.22789023900844063, "grad_norm": 0.21832524749413773, "learning_rate": 9.994078485642928e-06, "loss": 0.0038, "step": 34640 }, { "epoch": 0.227956027183674, "grad_norm": 0.13737958032673023, "learning_rate": 9.994050519976843e-06, "loss": 0.003, "step": 34650 }, { "epoch": 0.2280218153589074, "grad_norm": 0.12657639084263633, "learning_rate": 9.994022488468513e-06, "loss": 0.004, "step": 34660 }, { "epoch": 0.2280876035341408, "grad_norm": 0.11974278042748358, "learning_rate": 9.993994391118312e-06, "loss": 0.0044, "step": 34670 }, { "epoch": 0.22815339170937415, "grad_norm": 0.07905383621489968, "learning_rate": 9.993966227926608e-06, "loss": 0.0032, "step": 34680 }, { "epoch": 0.22821917988460755, "grad_norm": 0.18704845003859372, "learning_rate": 9.99393799889377e-06, "loss": 0.0033, "step": 34690 }, { "epoch": 0.22828496805984091, "grad_norm": 0.3487773220285664, "learning_rate": 9.993909704020174e-06, "loss": 0.0046, "step": 34700 }, { "epoch": 0.2283507562350743, "grad_norm": 0.08269988146233581, "learning_rate": 9.993881343306193e-06, "loss": 0.0028, "step": 34710 }, { "epoch": 0.2284165444103077, "grad_norm": 0.31973910078514295, "learning_rate": 9.993852916752198e-06, "loss": 0.0091, "step": 34720 }, { "epoch": 0.22848233258554107, "grad_norm": 0.011742134486653455, "learning_rate": 9.993824424358565e-06, "loss": 0.0023, "step": 34730 }, { "epoch": 0.22854812076077446, "grad_norm": 0.03653517427849755, "learning_rate": 9.99379586612567e-06, "loss": 0.0023, "step": 34740 }, { "epoch": 0.22861390893600783, "grad_norm": 0.21950100971515593, "learning_rate": 9.993767242053892e-06, "loss": 0.0029, "step": 34750 }, { "epoch": 0.22867969711124123, "grad_norm": 0.08665263038131736, "learning_rate": 9.993738552143602e-06, "loss": 0.0024, "step": 34760 }, { "epoch": 0.22874548528647462, "grad_norm": 0.20403235174087875, "learning_rate": 9.993709796395183e-06, "loss": 0.003, "step": 34770 }, { "epoch": 0.228811273461708, "grad_norm": 0.060185357193442895, "learning_rate": 9.993680974809015e-06, "loss": 0.0041, "step": 34780 }, { "epoch": 0.22887706163694138, "grad_norm": 0.11942327399312105, "learning_rate": 9.993652087385475e-06, "loss": 0.0032, "step": 34790 }, { "epoch": 0.22894284981217475, "grad_norm": 0.12890776017060254, "learning_rate": 9.993623134124945e-06, "loss": 0.0029, "step": 34800 }, { "epoch": 0.22900863798740814, "grad_norm": 0.14701532728779648, "learning_rate": 9.993594115027807e-06, "loss": 0.0042, "step": 34810 }, { "epoch": 0.22907442616264154, "grad_norm": 0.030932565009021334, "learning_rate": 9.993565030094443e-06, "loss": 0.0019, "step": 34820 }, { "epoch": 0.2291402143378749, "grad_norm": 0.22082308573544232, "learning_rate": 9.993535879325236e-06, "loss": 0.0033, "step": 34830 }, { "epoch": 0.2292060025131083, "grad_norm": 0.10710341556048635, "learning_rate": 9.993506662720572e-06, "loss": 0.0041, "step": 34840 }, { "epoch": 0.22927179068834166, "grad_norm": 0.11402454839729313, "learning_rate": 9.993477380280836e-06, "loss": 0.0051, "step": 34850 }, { "epoch": 0.22933757886357506, "grad_norm": 0.09298284889496508, "learning_rate": 9.993448032006412e-06, "loss": 0.0026, "step": 34860 }, { "epoch": 0.22940336703880845, "grad_norm": 0.15665185534549433, "learning_rate": 9.99341861789769e-06, "loss": 0.0035, "step": 34870 }, { "epoch": 0.22946915521404182, "grad_norm": 0.07966493048185987, "learning_rate": 9.993389137955055e-06, "loss": 0.0049, "step": 34880 }, { "epoch": 0.22953494338927521, "grad_norm": 0.01908795927305477, "learning_rate": 9.993359592178897e-06, "loss": 0.0024, "step": 34890 }, { "epoch": 0.2296007315645086, "grad_norm": 0.0878212137637118, "learning_rate": 9.993329980569606e-06, "loss": 0.0026, "step": 34900 }, { "epoch": 0.22966651973974198, "grad_norm": 0.05389384662294702, "learning_rate": 9.99330030312757e-06, "loss": 0.0038, "step": 34910 }, { "epoch": 0.22973230791497537, "grad_norm": 0.04643730538314863, "learning_rate": 9.993270559853183e-06, "loss": 0.005, "step": 34920 }, { "epoch": 0.22979809609020874, "grad_norm": 0.12164646322408225, "learning_rate": 9.993240750746837e-06, "loss": 0.002, "step": 34930 }, { "epoch": 0.22986388426544213, "grad_norm": 0.22397342395886266, "learning_rate": 9.993210875808923e-06, "loss": 0.0037, "step": 34940 }, { "epoch": 0.22992967244067553, "grad_norm": 0.1118822447494332, "learning_rate": 9.993180935039836e-06, "loss": 0.0028, "step": 34950 }, { "epoch": 0.2299954606159089, "grad_norm": 0.07369769585095363, "learning_rate": 9.993150928439972e-06, "loss": 0.0025, "step": 34960 }, { "epoch": 0.2300612487911423, "grad_norm": 0.1442630831574844, "learning_rate": 9.993120856009723e-06, "loss": 0.0034, "step": 34970 }, { "epoch": 0.23012703696637565, "grad_norm": 0.12685121184717527, "learning_rate": 9.993090717749489e-06, "loss": 0.0057, "step": 34980 }, { "epoch": 0.23019282514160905, "grad_norm": 0.09357105986509137, "learning_rate": 9.993060513659666e-06, "loss": 0.0028, "step": 34990 }, { "epoch": 0.23025861331684244, "grad_norm": 0.07728887029101016, "learning_rate": 9.993030243740655e-06, "loss": 0.0029, "step": 35000 }, { "epoch": 0.2303244014920758, "grad_norm": 0.16972752162139151, "learning_rate": 9.992999907992851e-06, "loss": 0.0024, "step": 35010 }, { "epoch": 0.2303901896673092, "grad_norm": 0.08079267373945569, "learning_rate": 9.992969506416655e-06, "loss": 0.0026, "step": 35020 }, { "epoch": 0.23045597784254257, "grad_norm": 0.15658529366468693, "learning_rate": 9.992939039012468e-06, "loss": 0.0037, "step": 35030 }, { "epoch": 0.23052176601777596, "grad_norm": 0.45239139738659684, "learning_rate": 9.992908505780692e-06, "loss": 0.003, "step": 35040 }, { "epoch": 0.23058755419300936, "grad_norm": 0.1443292656570891, "learning_rate": 9.99287790672173e-06, "loss": 0.0075, "step": 35050 }, { "epoch": 0.23065334236824273, "grad_norm": 0.16986347127445967, "learning_rate": 9.992847241835984e-06, "loss": 0.0028, "step": 35060 }, { "epoch": 0.23071913054347612, "grad_norm": 0.05298725568154353, "learning_rate": 9.992816511123859e-06, "loss": 0.0035, "step": 35070 }, { "epoch": 0.2307849187187095, "grad_norm": 0.3604486898304925, "learning_rate": 9.992785714585762e-06, "loss": 0.0047, "step": 35080 }, { "epoch": 0.23085070689394288, "grad_norm": 0.1792285789059057, "learning_rate": 9.992754852222096e-06, "loss": 0.0026, "step": 35090 }, { "epoch": 0.23091649506917628, "grad_norm": 0.13295147333112217, "learning_rate": 9.99272392403327e-06, "loss": 0.0036, "step": 35100 }, { "epoch": 0.23098228324440964, "grad_norm": 0.10313474147152439, "learning_rate": 9.992692930019692e-06, "loss": 0.0035, "step": 35110 }, { "epoch": 0.23104807141964304, "grad_norm": 0.20677038224856584, "learning_rate": 9.992661870181768e-06, "loss": 0.005, "step": 35120 }, { "epoch": 0.23111385959487643, "grad_norm": 0.17554935036617358, "learning_rate": 9.99263074451991e-06, "loss": 0.0086, "step": 35130 }, { "epoch": 0.2311796477701098, "grad_norm": 0.25802864018596466, "learning_rate": 9.992599553034527e-06, "loss": 0.0061, "step": 35140 }, { "epoch": 0.2312454359453432, "grad_norm": 0.16671293887652452, "learning_rate": 9.99256829572603e-06, "loss": 0.0034, "step": 35150 }, { "epoch": 0.23131122412057656, "grad_norm": 0.20672507524213854, "learning_rate": 9.992536972594834e-06, "loss": 0.0081, "step": 35160 }, { "epoch": 0.23137701229580995, "grad_norm": 0.12357614734487495, "learning_rate": 9.99250558364135e-06, "loss": 0.0034, "step": 35170 }, { "epoch": 0.23144280047104335, "grad_norm": 0.03440633580375592, "learning_rate": 9.99247412886599e-06, "loss": 0.004, "step": 35180 }, { "epoch": 0.23150858864627671, "grad_norm": 0.06783340442692926, "learning_rate": 9.99244260826917e-06, "loss": 0.0023, "step": 35190 }, { "epoch": 0.2315743768215101, "grad_norm": 0.22640614499945008, "learning_rate": 9.992411021851307e-06, "loss": 0.0025, "step": 35200 }, { "epoch": 0.23164016499674348, "grad_norm": 0.1444048832270504, "learning_rate": 9.992379369612817e-06, "loss": 0.0047, "step": 35210 }, { "epoch": 0.23170595317197687, "grad_norm": 0.1504893456173003, "learning_rate": 9.992347651554116e-06, "loss": 0.0032, "step": 35220 }, { "epoch": 0.23177174134721026, "grad_norm": 0.08879083706359048, "learning_rate": 9.992315867675623e-06, "loss": 0.0033, "step": 35230 }, { "epoch": 0.23183752952244363, "grad_norm": 0.030514129721638972, "learning_rate": 9.992284017977756e-06, "loss": 0.0037, "step": 35240 }, { "epoch": 0.23190331769767702, "grad_norm": 0.09069476631721259, "learning_rate": 9.992252102460937e-06, "loss": 0.0024, "step": 35250 }, { "epoch": 0.2319691058729104, "grad_norm": 0.09677981215973495, "learning_rate": 9.992220121125586e-06, "loss": 0.005, "step": 35260 }, { "epoch": 0.23203489404814379, "grad_norm": 0.15060020456853926, "learning_rate": 9.992188073972124e-06, "loss": 0.003, "step": 35270 }, { "epoch": 0.23210068222337718, "grad_norm": 0.01118766971091801, "learning_rate": 9.992155961000974e-06, "loss": 0.004, "step": 35280 }, { "epoch": 0.23216647039861055, "grad_norm": 0.22155434814119632, "learning_rate": 9.992123782212557e-06, "loss": 0.0044, "step": 35290 }, { "epoch": 0.23223225857384394, "grad_norm": 0.20726519329435322, "learning_rate": 9.992091537607303e-06, "loss": 0.005, "step": 35300 }, { "epoch": 0.2322980467490773, "grad_norm": 0.18396878267436814, "learning_rate": 9.99205922718563e-06, "loss": 0.0017, "step": 35310 }, { "epoch": 0.2323638349243107, "grad_norm": 0.12031796092000618, "learning_rate": 9.992026850947969e-06, "loss": 0.0044, "step": 35320 }, { "epoch": 0.2324296230995441, "grad_norm": 0.027134006434164373, "learning_rate": 9.991994408894746e-06, "loss": 0.0023, "step": 35330 }, { "epoch": 0.23249541127477746, "grad_norm": 0.01845841913026857, "learning_rate": 9.991961901026388e-06, "loss": 0.0029, "step": 35340 }, { "epoch": 0.23256119945001086, "grad_norm": 0.08880913220585301, "learning_rate": 9.991929327343322e-06, "loss": 0.002, "step": 35350 }, { "epoch": 0.23262698762524422, "grad_norm": 0.1968233006104834, "learning_rate": 9.99189668784598e-06, "loss": 0.0032, "step": 35360 }, { "epoch": 0.23269277580047762, "grad_norm": 0.07238561481300458, "learning_rate": 9.991863982534794e-06, "loss": 0.0022, "step": 35370 }, { "epoch": 0.232758563975711, "grad_norm": 0.1299431991527866, "learning_rate": 9.991831211410189e-06, "loss": 0.0048, "step": 35380 }, { "epoch": 0.23282435215094438, "grad_norm": 0.14959620452414632, "learning_rate": 9.991798374472601e-06, "loss": 0.0019, "step": 35390 }, { "epoch": 0.23289014032617777, "grad_norm": 0.0861716196044997, "learning_rate": 9.991765471722464e-06, "loss": 0.0053, "step": 35400 }, { "epoch": 0.23295592850141117, "grad_norm": 0.11569236292105944, "learning_rate": 9.99173250316021e-06, "loss": 0.0071, "step": 35410 }, { "epoch": 0.23302171667664454, "grad_norm": 0.5413777040465674, "learning_rate": 9.991699468786275e-06, "loss": 0.0032, "step": 35420 }, { "epoch": 0.23308750485187793, "grad_norm": 0.23914809396848274, "learning_rate": 9.991666368601093e-06, "loss": 0.0043, "step": 35430 }, { "epoch": 0.2331532930271113, "grad_norm": 0.10075293101580213, "learning_rate": 9.9916332026051e-06, "loss": 0.0048, "step": 35440 }, { "epoch": 0.2332190812023447, "grad_norm": 0.26460087099133833, "learning_rate": 9.991599970798735e-06, "loss": 0.0035, "step": 35450 }, { "epoch": 0.23328486937757809, "grad_norm": 0.18716745284982886, "learning_rate": 9.991566673182436e-06, "loss": 0.0046, "step": 35460 }, { "epoch": 0.23335065755281145, "grad_norm": 0.07309042675992553, "learning_rate": 9.991533309756641e-06, "loss": 0.0022, "step": 35470 }, { "epoch": 0.23341644572804485, "grad_norm": 0.032354797302117905, "learning_rate": 9.99149988052179e-06, "loss": 0.0033, "step": 35480 }, { "epoch": 0.2334822339032782, "grad_norm": 0.12488354057115034, "learning_rate": 9.991466385478326e-06, "loss": 0.0017, "step": 35490 }, { "epoch": 0.2335480220785116, "grad_norm": 0.04180882219362103, "learning_rate": 9.991432824626686e-06, "loss": 0.0036, "step": 35500 }, { "epoch": 0.233613810253745, "grad_norm": 0.11318943088179048, "learning_rate": 9.991399197967315e-06, "loss": 0.0033, "step": 35510 }, { "epoch": 0.23367959842897837, "grad_norm": 0.3324617595142318, "learning_rate": 9.991365505500659e-06, "loss": 0.0024, "step": 35520 }, { "epoch": 0.23374538660421176, "grad_norm": 0.2279218301188535, "learning_rate": 9.991331747227157e-06, "loss": 0.0047, "step": 35530 }, { "epoch": 0.23381117477944513, "grad_norm": 0.067793491541154, "learning_rate": 9.991297923147259e-06, "loss": 0.0042, "step": 35540 }, { "epoch": 0.23387696295467852, "grad_norm": 0.1235251379425847, "learning_rate": 9.991264033261407e-06, "loss": 0.0045, "step": 35550 }, { "epoch": 0.23394275112991192, "grad_norm": 0.09580533483026626, "learning_rate": 9.991230077570048e-06, "loss": 0.0035, "step": 35560 }, { "epoch": 0.23400853930514529, "grad_norm": 0.1111411589327977, "learning_rate": 9.991196056073635e-06, "loss": 0.0015, "step": 35570 }, { "epoch": 0.23407432748037868, "grad_norm": 0.07079390744307705, "learning_rate": 9.99116196877261e-06, "loss": 0.0024, "step": 35580 }, { "epoch": 0.23414011565561205, "grad_norm": 0.08275887130312634, "learning_rate": 9.991127815667424e-06, "loss": 0.0051, "step": 35590 }, { "epoch": 0.23420590383084544, "grad_norm": 0.051345961527729835, "learning_rate": 9.991093596758532e-06, "loss": 0.0021, "step": 35600 }, { "epoch": 0.23427169200607884, "grad_norm": 0.09028992630821712, "learning_rate": 9.99105931204638e-06, "loss": 0.0029, "step": 35610 }, { "epoch": 0.2343374801813122, "grad_norm": 0.1628315419590758, "learning_rate": 9.99102496153142e-06, "loss": 0.0029, "step": 35620 }, { "epoch": 0.2344032683565456, "grad_norm": 0.026928425860815057, "learning_rate": 9.990990545214108e-06, "loss": 0.0033, "step": 35630 }, { "epoch": 0.234469056531779, "grad_norm": 0.0605975563604888, "learning_rate": 9.990956063094896e-06, "loss": 0.0023, "step": 35640 }, { "epoch": 0.23453484470701236, "grad_norm": 0.1092738805059046, "learning_rate": 9.99092151517424e-06, "loss": 0.0026, "step": 35650 }, { "epoch": 0.23460063288224575, "grad_norm": 0.034761388569237286, "learning_rate": 9.990886901452593e-06, "loss": 0.006, "step": 35660 }, { "epoch": 0.23466642105747912, "grad_norm": 0.16446875708680322, "learning_rate": 9.990852221930411e-06, "loss": 0.0029, "step": 35670 }, { "epoch": 0.2347322092327125, "grad_norm": 0.08239230068739643, "learning_rate": 9.990817476608154e-06, "loss": 0.0028, "step": 35680 }, { "epoch": 0.2347979974079459, "grad_norm": 0.16450614919689124, "learning_rate": 9.99078266548628e-06, "loss": 0.0051, "step": 35690 }, { "epoch": 0.23486378558317927, "grad_norm": 0.09567857552136946, "learning_rate": 9.990747788565245e-06, "loss": 0.002, "step": 35700 }, { "epoch": 0.23492957375841267, "grad_norm": 0.22956101008733157, "learning_rate": 9.990712845845512e-06, "loss": 0.0036, "step": 35710 }, { "epoch": 0.23499536193364604, "grad_norm": 0.12227592224452828, "learning_rate": 9.99067783732754e-06, "loss": 0.0039, "step": 35720 }, { "epoch": 0.23506115010887943, "grad_norm": 0.11763538396468125, "learning_rate": 9.99064276301179e-06, "loss": 0.0037, "step": 35730 }, { "epoch": 0.23512693828411282, "grad_norm": 0.2990405655483769, "learning_rate": 9.990607622898729e-06, "loss": 0.0039, "step": 35740 }, { "epoch": 0.2351927264593462, "grad_norm": 0.11369424320826904, "learning_rate": 9.990572416988813e-06, "loss": 0.0028, "step": 35750 }, { "epoch": 0.23525851463457959, "grad_norm": 0.1293147121576685, "learning_rate": 9.99053714528251e-06, "loss": 0.0044, "step": 35760 }, { "epoch": 0.23532430280981295, "grad_norm": 0.031827016687086554, "learning_rate": 9.990501807780287e-06, "loss": 0.0024, "step": 35770 }, { "epoch": 0.23539009098504635, "grad_norm": 0.09514923044209501, "learning_rate": 9.990466404482605e-06, "loss": 0.0025, "step": 35780 }, { "epoch": 0.23545587916027974, "grad_norm": 0.185333703865053, "learning_rate": 9.990430935389936e-06, "loss": 0.0033, "step": 35790 }, { "epoch": 0.2355216673355131, "grad_norm": 0.2356502786409749, "learning_rate": 9.990395400502744e-06, "loss": 0.0037, "step": 35800 }, { "epoch": 0.2355874555107465, "grad_norm": 0.220479706360596, "learning_rate": 9.990359799821499e-06, "loss": 0.0037, "step": 35810 }, { "epoch": 0.23565324368597987, "grad_norm": 0.17841317670838583, "learning_rate": 9.990324133346671e-06, "loss": 0.0029, "step": 35820 }, { "epoch": 0.23571903186121326, "grad_norm": 0.11003775358314015, "learning_rate": 9.990288401078727e-06, "loss": 0.002, "step": 35830 }, { "epoch": 0.23578482003644666, "grad_norm": 0.1278363047132318, "learning_rate": 9.990252603018143e-06, "loss": 0.0052, "step": 35840 }, { "epoch": 0.23585060821168002, "grad_norm": 0.10890820058646489, "learning_rate": 9.990216739165385e-06, "loss": 0.0033, "step": 35850 }, { "epoch": 0.23591639638691342, "grad_norm": 0.23985921492249954, "learning_rate": 9.990180809520931e-06, "loss": 0.0047, "step": 35860 }, { "epoch": 0.2359821845621468, "grad_norm": 0.0789061479810533, "learning_rate": 9.990144814085253e-06, "loss": 0.0037, "step": 35870 }, { "epoch": 0.23604797273738018, "grad_norm": 0.11441087019716036, "learning_rate": 9.990108752858824e-06, "loss": 0.0037, "step": 35880 }, { "epoch": 0.23611376091261357, "grad_norm": 0.1446014300216444, "learning_rate": 9.990072625842122e-06, "loss": 0.0049, "step": 35890 }, { "epoch": 0.23617954908784694, "grad_norm": 0.29947319401461747, "learning_rate": 9.990036433035622e-06, "loss": 0.0035, "step": 35900 }, { "epoch": 0.23624533726308033, "grad_norm": 0.09354970297882056, "learning_rate": 9.990000174439802e-06, "loss": 0.004, "step": 35910 }, { "epoch": 0.23631112543831373, "grad_norm": 0.06855845340089985, "learning_rate": 9.989963850055138e-06, "loss": 0.0038, "step": 35920 }, { "epoch": 0.2363769136135471, "grad_norm": 0.08453948310996362, "learning_rate": 9.989927459882112e-06, "loss": 0.0031, "step": 35930 }, { "epoch": 0.2364427017887805, "grad_norm": 0.213224848169395, "learning_rate": 9.989891003921201e-06, "loss": 0.0024, "step": 35940 }, { "epoch": 0.23650848996401386, "grad_norm": 0.15234618129023625, "learning_rate": 9.989854482172887e-06, "loss": 0.0041, "step": 35950 }, { "epoch": 0.23657427813924725, "grad_norm": 0.1268474388272366, "learning_rate": 9.98981789463765e-06, "loss": 0.0031, "step": 35960 }, { "epoch": 0.23664006631448065, "grad_norm": 0.16124288980662208, "learning_rate": 9.989781241315975e-06, "loss": 0.002, "step": 35970 }, { "epoch": 0.236705854489714, "grad_norm": 0.20915383027863316, "learning_rate": 9.989744522208342e-06, "loss": 0.005, "step": 35980 }, { "epoch": 0.2367716426649474, "grad_norm": 0.06009339019304256, "learning_rate": 9.98970773731524e-06, "loss": 0.0035, "step": 35990 }, { "epoch": 0.23683743084018077, "grad_norm": 0.15696612030481097, "learning_rate": 9.989670886637147e-06, "loss": 0.004, "step": 36000 }, { "epoch": 0.23690321901541417, "grad_norm": 0.06185259451566335, "learning_rate": 9.989633970174555e-06, "loss": 0.0024, "step": 36010 }, { "epoch": 0.23696900719064756, "grad_norm": 0.05979780694074387, "learning_rate": 9.989596987927948e-06, "loss": 0.0029, "step": 36020 }, { "epoch": 0.23703479536588093, "grad_norm": 0.1140831623910392, "learning_rate": 9.989559939897814e-06, "loss": 0.0034, "step": 36030 }, { "epoch": 0.23710058354111432, "grad_norm": 0.11447240198232456, "learning_rate": 9.989522826084642e-06, "loss": 0.0027, "step": 36040 }, { "epoch": 0.2371663717163477, "grad_norm": 0.20997239855487415, "learning_rate": 9.989485646488919e-06, "loss": 0.004, "step": 36050 }, { "epoch": 0.23723215989158108, "grad_norm": 0.11284351612561723, "learning_rate": 9.989448401111139e-06, "loss": 0.0023, "step": 36060 }, { "epoch": 0.23729794806681448, "grad_norm": 0.07968958328752829, "learning_rate": 9.98941108995179e-06, "loss": 0.0034, "step": 36070 }, { "epoch": 0.23736373624204785, "grad_norm": 0.10715524586396612, "learning_rate": 9.989373713011365e-06, "loss": 0.0029, "step": 36080 }, { "epoch": 0.23742952441728124, "grad_norm": 0.13309317197500897, "learning_rate": 9.989336270290358e-06, "loss": 0.0059, "step": 36090 }, { "epoch": 0.23749531259251463, "grad_norm": 0.3214639464415431, "learning_rate": 9.98929876178926e-06, "loss": 0.0043, "step": 36100 }, { "epoch": 0.237561100767748, "grad_norm": 0.047823181059158254, "learning_rate": 9.989261187508566e-06, "loss": 0.0024, "step": 36110 }, { "epoch": 0.2376268889429814, "grad_norm": 0.2217999705310464, "learning_rate": 9.989223547448775e-06, "loss": 0.0029, "step": 36120 }, { "epoch": 0.23769267711821476, "grad_norm": 0.029459319459522026, "learning_rate": 9.989185841610378e-06, "loss": 0.0032, "step": 36130 }, { "epoch": 0.23775846529344816, "grad_norm": 0.033682068127764565, "learning_rate": 9.989148069993875e-06, "loss": 0.0023, "step": 36140 }, { "epoch": 0.23782425346868155, "grad_norm": 0.11567006287623274, "learning_rate": 9.989110232599763e-06, "loss": 0.0027, "step": 36150 }, { "epoch": 0.23789004164391492, "grad_norm": 0.11220669702756939, "learning_rate": 9.989072329428544e-06, "loss": 0.0025, "step": 36160 }, { "epoch": 0.2379558298191483, "grad_norm": 0.09917213372413236, "learning_rate": 9.989034360480714e-06, "loss": 0.0016, "step": 36170 }, { "epoch": 0.23802161799438168, "grad_norm": 0.08941475754195659, "learning_rate": 9.988996325756774e-06, "loss": 0.0039, "step": 36180 }, { "epoch": 0.23808740616961507, "grad_norm": 0.10894623440033008, "learning_rate": 9.988958225257225e-06, "loss": 0.0059, "step": 36190 }, { "epoch": 0.23815319434484847, "grad_norm": 0.12101803306374134, "learning_rate": 9.988920058982573e-06, "loss": 0.0034, "step": 36200 }, { "epoch": 0.23821898252008183, "grad_norm": 0.08141308926445792, "learning_rate": 9.98888182693332e-06, "loss": 0.0025, "step": 36210 }, { "epoch": 0.23828477069531523, "grad_norm": 0.1111532717309891, "learning_rate": 9.988843529109965e-06, "loss": 0.0029, "step": 36220 }, { "epoch": 0.2383505588705486, "grad_norm": 0.11571624028638716, "learning_rate": 9.988805165513018e-06, "loss": 0.0052, "step": 36230 }, { "epoch": 0.238416347045782, "grad_norm": 0.05423883490189211, "learning_rate": 9.988766736142986e-06, "loss": 0.0053, "step": 36240 }, { "epoch": 0.23848213522101538, "grad_norm": 0.0873598833191282, "learning_rate": 9.98872824100037e-06, "loss": 0.0037, "step": 36250 }, { "epoch": 0.23854792339624875, "grad_norm": 0.11790185774626193, "learning_rate": 9.988689680085682e-06, "loss": 0.003, "step": 36260 }, { "epoch": 0.23861371157148215, "grad_norm": 0.2796325606473982, "learning_rate": 9.98865105339943e-06, "loss": 0.0029, "step": 36270 }, { "epoch": 0.2386794997467155, "grad_norm": 0.11994563190032206, "learning_rate": 9.98861236094212e-06, "loss": 0.0037, "step": 36280 }, { "epoch": 0.2387452879219489, "grad_norm": 0.037669414509327495, "learning_rate": 9.988573602714265e-06, "loss": 0.0028, "step": 36290 }, { "epoch": 0.2388110760971823, "grad_norm": 0.06222042962812031, "learning_rate": 9.988534778716376e-06, "loss": 0.0052, "step": 36300 }, { "epoch": 0.23887686427241567, "grad_norm": 0.09873305589192605, "learning_rate": 9.988495888948966e-06, "loss": 0.0141, "step": 36310 }, { "epoch": 0.23894265244764906, "grad_norm": 0.106361870053533, "learning_rate": 9.988456933412543e-06, "loss": 0.0036, "step": 36320 }, { "epoch": 0.23900844062288246, "grad_norm": 0.026858415626476465, "learning_rate": 9.988417912107626e-06, "loss": 0.0031, "step": 36330 }, { "epoch": 0.23907422879811582, "grad_norm": 0.044830120688059936, "learning_rate": 9.988378825034726e-06, "loss": 0.0038, "step": 36340 }, { "epoch": 0.23914001697334922, "grad_norm": 0.006439445624825794, "learning_rate": 9.98833967219436e-06, "loss": 0.0026, "step": 36350 }, { "epoch": 0.23920580514858258, "grad_norm": 0.036804611689249526, "learning_rate": 9.988300453587043e-06, "loss": 0.0049, "step": 36360 }, { "epoch": 0.23927159332381598, "grad_norm": 0.056561113532186146, "learning_rate": 9.988261169213294e-06, "loss": 0.0029, "step": 36370 }, { "epoch": 0.23933738149904937, "grad_norm": 0.13530312448923854, "learning_rate": 9.988221819073627e-06, "loss": 0.0027, "step": 36380 }, { "epoch": 0.23940316967428274, "grad_norm": 0.3546172779757825, "learning_rate": 9.988182403168566e-06, "loss": 0.002, "step": 36390 }, { "epoch": 0.23946895784951613, "grad_norm": 0.12705244412149458, "learning_rate": 9.988142921498627e-06, "loss": 0.0032, "step": 36400 }, { "epoch": 0.2395347460247495, "grad_norm": 0.047679127204104534, "learning_rate": 9.988103374064334e-06, "loss": 0.0029, "step": 36410 }, { "epoch": 0.2396005341999829, "grad_norm": 0.10317573426026132, "learning_rate": 9.988063760866202e-06, "loss": 0.005, "step": 36420 }, { "epoch": 0.2396663223752163, "grad_norm": 0.1276892770729136, "learning_rate": 9.98802408190476e-06, "loss": 0.0034, "step": 36430 }, { "epoch": 0.23973211055044966, "grad_norm": 0.19950053437575985, "learning_rate": 9.987984337180527e-06, "loss": 0.0062, "step": 36440 }, { "epoch": 0.23979789872568305, "grad_norm": 0.12218636284237187, "learning_rate": 9.98794452669403e-06, "loss": 0.0022, "step": 36450 }, { "epoch": 0.23986368690091642, "grad_norm": 0.11318004574006851, "learning_rate": 9.98790465044579e-06, "loss": 0.004, "step": 36460 }, { "epoch": 0.2399294750761498, "grad_norm": 0.13520475986173572, "learning_rate": 9.987864708436337e-06, "loss": 0.0032, "step": 36470 }, { "epoch": 0.2399952632513832, "grad_norm": 0.25531797834002934, "learning_rate": 9.987824700666195e-06, "loss": 0.0034, "step": 36480 }, { "epoch": 0.24006105142661657, "grad_norm": 0.037923637077770675, "learning_rate": 9.987784627135892e-06, "loss": 0.002, "step": 36490 }, { "epoch": 0.24012683960184997, "grad_norm": 0.15312460748330695, "learning_rate": 9.987744487845956e-06, "loss": 0.003, "step": 36500 }, { "epoch": 0.24019262777708333, "grad_norm": 0.10064773777652675, "learning_rate": 9.987704282796917e-06, "loss": 0.0027, "step": 36510 }, { "epoch": 0.24025841595231673, "grad_norm": 0.012712279693304383, "learning_rate": 9.987664011989305e-06, "loss": 0.0018, "step": 36520 }, { "epoch": 0.24032420412755012, "grad_norm": 0.1980607970587177, "learning_rate": 9.98762367542365e-06, "loss": 0.0039, "step": 36530 }, { "epoch": 0.2403899923027835, "grad_norm": 0.09726978291982616, "learning_rate": 9.987583273100486e-06, "loss": 0.003, "step": 36540 }, { "epoch": 0.24045578047801688, "grad_norm": 0.004278583239827945, "learning_rate": 9.987542805020342e-06, "loss": 0.0021, "step": 36550 }, { "epoch": 0.24052156865325028, "grad_norm": 0.11329428153394747, "learning_rate": 9.987502271183754e-06, "loss": 0.0029, "step": 36560 }, { "epoch": 0.24058735682848365, "grad_norm": 0.13910568512028887, "learning_rate": 9.987461671591257e-06, "loss": 0.0021, "step": 36570 }, { "epoch": 0.24065314500371704, "grad_norm": 0.04673832719912357, "learning_rate": 9.987421006243384e-06, "loss": 0.0041, "step": 36580 }, { "epoch": 0.2407189331789504, "grad_norm": 0.12355739485348494, "learning_rate": 9.987380275140674e-06, "loss": 0.0022, "step": 36590 }, { "epoch": 0.2407847213541838, "grad_norm": 0.03156030820359826, "learning_rate": 9.987339478283662e-06, "loss": 0.0038, "step": 36600 }, { "epoch": 0.2408505095294172, "grad_norm": 0.15556870062059433, "learning_rate": 9.987298615672887e-06, "loss": 0.0032, "step": 36610 }, { "epoch": 0.24091629770465056, "grad_norm": 0.0930103549756155, "learning_rate": 9.987257687308885e-06, "loss": 0.0026, "step": 36620 }, { "epoch": 0.24098208587988396, "grad_norm": 0.0762941948368359, "learning_rate": 9.9872166931922e-06, "loss": 0.0023, "step": 36630 }, { "epoch": 0.24104787405511732, "grad_norm": 0.031437643013561625, "learning_rate": 9.987175633323368e-06, "loss": 0.0011, "step": 36640 }, { "epoch": 0.24111366223035072, "grad_norm": 0.19795843578776404, "learning_rate": 9.987134507702934e-06, "loss": 0.0038, "step": 36650 }, { "epoch": 0.2411794504055841, "grad_norm": 0.13237988450935523, "learning_rate": 9.987093316331438e-06, "loss": 0.0037, "step": 36660 }, { "epoch": 0.24124523858081748, "grad_norm": 0.08774109311446499, "learning_rate": 9.987052059209424e-06, "loss": 0.0032, "step": 36670 }, { "epoch": 0.24131102675605087, "grad_norm": 0.015689976186706287, "learning_rate": 9.987010736337435e-06, "loss": 0.0025, "step": 36680 }, { "epoch": 0.24137681493128424, "grad_norm": 0.01749058335210682, "learning_rate": 9.986969347716018e-06, "loss": 0.0021, "step": 36690 }, { "epoch": 0.24144260310651763, "grad_norm": 0.26758471018873453, "learning_rate": 9.986927893345717e-06, "loss": 0.0045, "step": 36700 }, { "epoch": 0.24150839128175103, "grad_norm": 0.10009356855190278, "learning_rate": 9.986886373227078e-06, "loss": 0.003, "step": 36710 }, { "epoch": 0.2415741794569844, "grad_norm": 0.10697736318790786, "learning_rate": 9.98684478736065e-06, "loss": 0.0013, "step": 36720 }, { "epoch": 0.2416399676322178, "grad_norm": 0.11867819155555423, "learning_rate": 9.986803135746978e-06, "loss": 0.0033, "step": 36730 }, { "epoch": 0.24170575580745116, "grad_norm": 0.09832438833056059, "learning_rate": 9.986761418386614e-06, "loss": 0.0028, "step": 36740 }, { "epoch": 0.24177154398268455, "grad_norm": 0.03056890519215422, "learning_rate": 9.98671963528011e-06, "loss": 0.0029, "step": 36750 }, { "epoch": 0.24183733215791794, "grad_norm": 0.36722354236890786, "learning_rate": 9.986677786428014e-06, "loss": 0.0032, "step": 36760 }, { "epoch": 0.2419031203331513, "grad_norm": 0.2647908098196543, "learning_rate": 9.986635871830876e-06, "loss": 0.0034, "step": 36770 }, { "epoch": 0.2419689085083847, "grad_norm": 0.07839353304781367, "learning_rate": 9.986593891489252e-06, "loss": 0.0016, "step": 36780 }, { "epoch": 0.2420346966836181, "grad_norm": 0.1353325789646992, "learning_rate": 9.986551845403694e-06, "loss": 0.0021, "step": 36790 }, { "epoch": 0.24210048485885147, "grad_norm": 0.18218563517943512, "learning_rate": 9.986509733574757e-06, "loss": 0.003, "step": 36800 }, { "epoch": 0.24216627303408486, "grad_norm": 0.07164654363391722, "learning_rate": 9.986467556002997e-06, "loss": 0.0021, "step": 36810 }, { "epoch": 0.24223206120931823, "grad_norm": 0.058299433803833685, "learning_rate": 9.986425312688967e-06, "loss": 0.0031, "step": 36820 }, { "epoch": 0.24229784938455162, "grad_norm": 0.12875520532670762, "learning_rate": 9.986383003633226e-06, "loss": 0.004, "step": 36830 }, { "epoch": 0.24236363755978502, "grad_norm": 0.08109129419574934, "learning_rate": 9.986340628836332e-06, "loss": 0.0024, "step": 36840 }, { "epoch": 0.24242942573501838, "grad_norm": 0.08220923830052111, "learning_rate": 9.986298188298844e-06, "loss": 0.0044, "step": 36850 }, { "epoch": 0.24249521391025178, "grad_norm": 0.23249359386418633, "learning_rate": 9.98625568202132e-06, "loss": 0.002, "step": 36860 }, { "epoch": 0.24256100208548514, "grad_norm": 0.11172711201878663, "learning_rate": 9.986213110004321e-06, "loss": 0.0034, "step": 36870 }, { "epoch": 0.24262679026071854, "grad_norm": 0.06375080506916528, "learning_rate": 9.98617047224841e-06, "loss": 0.0015, "step": 36880 }, { "epoch": 0.24269257843595193, "grad_norm": 0.0567463564211683, "learning_rate": 9.986127768754146e-06, "loss": 0.004, "step": 36890 }, { "epoch": 0.2427583666111853, "grad_norm": 0.1719457306538051, "learning_rate": 9.986084999522094e-06, "loss": 0.0044, "step": 36900 }, { "epoch": 0.2428241547864187, "grad_norm": 0.23135165724070822, "learning_rate": 9.986042164552817e-06, "loss": 0.0048, "step": 36910 }, { "epoch": 0.24288994296165206, "grad_norm": 0.116216478686306, "learning_rate": 9.985999263846881e-06, "loss": 0.0041, "step": 36920 }, { "epoch": 0.24295573113688546, "grad_norm": 0.0672414234389256, "learning_rate": 9.98595629740485e-06, "loss": 0.002, "step": 36930 }, { "epoch": 0.24302151931211885, "grad_norm": 0.20235069705356695, "learning_rate": 9.985913265227294e-06, "loss": 0.003, "step": 36940 }, { "epoch": 0.24308730748735222, "grad_norm": 0.04888861041432765, "learning_rate": 9.985870167314776e-06, "loss": 0.0037, "step": 36950 }, { "epoch": 0.2431530956625856, "grad_norm": 0.13109024978466072, "learning_rate": 9.985827003667865e-06, "loss": 0.003, "step": 36960 }, { "epoch": 0.24321888383781898, "grad_norm": 0.16019486877349273, "learning_rate": 9.985783774287132e-06, "loss": 0.0025, "step": 36970 }, { "epoch": 0.24328467201305237, "grad_norm": 0.16784480297118345, "learning_rate": 9.985740479173146e-06, "loss": 0.0037, "step": 36980 }, { "epoch": 0.24335046018828577, "grad_norm": 0.2903612831274077, "learning_rate": 9.985697118326478e-06, "loss": 0.0059, "step": 36990 }, { "epoch": 0.24341624836351913, "grad_norm": 0.062131116577847034, "learning_rate": 9.9856536917477e-06, "loss": 0.0025, "step": 37000 }, { "epoch": 0.24348203653875253, "grad_norm": 0.11635903738026744, "learning_rate": 9.98561019943738e-06, "loss": 0.0039, "step": 37010 }, { "epoch": 0.24354782471398592, "grad_norm": 0.12303276208845118, "learning_rate": 9.9855666413961e-06, "loss": 0.0043, "step": 37020 }, { "epoch": 0.2436136128892193, "grad_norm": 0.059382024162576215, "learning_rate": 9.985523017624427e-06, "loss": 0.005, "step": 37030 }, { "epoch": 0.24367940106445268, "grad_norm": 0.08775793962247362, "learning_rate": 9.98547932812294e-06, "loss": 0.0026, "step": 37040 }, { "epoch": 0.24374518923968605, "grad_norm": 0.13553783632714408, "learning_rate": 9.985435572892215e-06, "loss": 0.003, "step": 37050 }, { "epoch": 0.24381097741491944, "grad_norm": 0.09774858312095583, "learning_rate": 9.985391751932826e-06, "loss": 0.0052, "step": 37060 }, { "epoch": 0.24387676559015284, "grad_norm": 0.01712001990417525, "learning_rate": 9.985347865245353e-06, "loss": 0.0024, "step": 37070 }, { "epoch": 0.2439425537653862, "grad_norm": 0.11029636123560442, "learning_rate": 9.985303912830372e-06, "loss": 0.0025, "step": 37080 }, { "epoch": 0.2440083419406196, "grad_norm": 0.3254815239958269, "learning_rate": 9.985259894688468e-06, "loss": 0.003, "step": 37090 }, { "epoch": 0.24407413011585297, "grad_norm": 0.298004670484974, "learning_rate": 9.985215810820215e-06, "loss": 0.0031, "step": 37100 }, { "epoch": 0.24413991829108636, "grad_norm": 0.15039646558455075, "learning_rate": 9.985171661226199e-06, "loss": 0.0034, "step": 37110 }, { "epoch": 0.24420570646631976, "grad_norm": 0.14937755071768177, "learning_rate": 9.985127445907e-06, "loss": 0.0017, "step": 37120 }, { "epoch": 0.24427149464155312, "grad_norm": 0.09131077784479022, "learning_rate": 9.9850831648632e-06, "loss": 0.004, "step": 37130 }, { "epoch": 0.24433728281678652, "grad_norm": 0.17403287504884685, "learning_rate": 9.985038818095384e-06, "loss": 0.0022, "step": 37140 }, { "epoch": 0.24440307099201988, "grad_norm": 0.02758337821084257, "learning_rate": 9.984994405604136e-06, "loss": 0.0027, "step": 37150 }, { "epoch": 0.24446885916725328, "grad_norm": 0.17708110767430138, "learning_rate": 9.984949927390044e-06, "loss": 0.0043, "step": 37160 }, { "epoch": 0.24453464734248667, "grad_norm": 0.07445221773173258, "learning_rate": 9.98490538345369e-06, "loss": 0.0033, "step": 37170 }, { "epoch": 0.24460043551772004, "grad_norm": 0.17637090951463208, "learning_rate": 9.984860773795665e-06, "loss": 0.0036, "step": 37180 }, { "epoch": 0.24466622369295343, "grad_norm": 0.06272953720180109, "learning_rate": 9.984816098416557e-06, "loss": 0.0089, "step": 37190 }, { "epoch": 0.2447320118681868, "grad_norm": 0.03558670655508122, "learning_rate": 9.984771357316953e-06, "loss": 0.0015, "step": 37200 }, { "epoch": 0.2447978000434202, "grad_norm": 0.06284965282989186, "learning_rate": 9.984726550497443e-06, "loss": 0.0023, "step": 37210 }, { "epoch": 0.2448635882186536, "grad_norm": 0.1115900649960209, "learning_rate": 9.98468167795862e-06, "loss": 0.0038, "step": 37220 }, { "epoch": 0.24492937639388696, "grad_norm": 0.09576422429758562, "learning_rate": 9.984636739701072e-06, "loss": 0.0041, "step": 37230 }, { "epoch": 0.24499516456912035, "grad_norm": 0.21121825397295446, "learning_rate": 9.984591735725396e-06, "loss": 0.0025, "step": 37240 }, { "epoch": 0.24506095274435372, "grad_norm": 0.2333192319456811, "learning_rate": 9.984546666032183e-06, "loss": 0.0035, "step": 37250 }, { "epoch": 0.2451267409195871, "grad_norm": 0.09724913581465036, "learning_rate": 9.984501530622025e-06, "loss": 0.0042, "step": 37260 }, { "epoch": 0.2451925290948205, "grad_norm": 0.06285933860185279, "learning_rate": 9.98445632949552e-06, "loss": 0.003, "step": 37270 }, { "epoch": 0.24525831727005387, "grad_norm": 0.052527504318364546, "learning_rate": 9.984411062653262e-06, "loss": 0.0029, "step": 37280 }, { "epoch": 0.24532410544528727, "grad_norm": 0.36715138174145545, "learning_rate": 9.98436573009585e-06, "loss": 0.0036, "step": 37290 }, { "epoch": 0.24538989362052066, "grad_norm": 0.17956582111659208, "learning_rate": 9.984320331823883e-06, "loss": 0.0028, "step": 37300 }, { "epoch": 0.24545568179575403, "grad_norm": 0.202488507090176, "learning_rate": 9.984274867837956e-06, "loss": 0.0024, "step": 37310 }, { "epoch": 0.24552146997098742, "grad_norm": 0.03544180775043014, "learning_rate": 9.984229338138667e-06, "loss": 0.0017, "step": 37320 }, { "epoch": 0.2455872581462208, "grad_norm": 0.11326884428305235, "learning_rate": 9.98418374272662e-06, "loss": 0.0011, "step": 37330 }, { "epoch": 0.24565304632145418, "grad_norm": 0.07743283326103258, "learning_rate": 9.984138081602418e-06, "loss": 0.0012, "step": 37340 }, { "epoch": 0.24571883449668758, "grad_norm": 0.1790314105436848, "learning_rate": 9.984092354766656e-06, "loss": 0.0032, "step": 37350 }, { "epoch": 0.24578462267192094, "grad_norm": 0.1870499957988858, "learning_rate": 9.984046562219942e-06, "loss": 0.0028, "step": 37360 }, { "epoch": 0.24585041084715434, "grad_norm": 0.027429775246055024, "learning_rate": 9.98400070396288e-06, "loss": 0.0052, "step": 37370 }, { "epoch": 0.2459161990223877, "grad_norm": 0.21392295296856853, "learning_rate": 9.983954779996072e-06, "loss": 0.0027, "step": 37380 }, { "epoch": 0.2459819871976211, "grad_norm": 0.1675513808438624, "learning_rate": 9.983908790320125e-06, "loss": 0.0058, "step": 37390 }, { "epoch": 0.2460477753728545, "grad_norm": 0.06449239757676348, "learning_rate": 9.983862734935644e-06, "loss": 0.0027, "step": 37400 }, { "epoch": 0.24611356354808786, "grad_norm": 0.05568519006284861, "learning_rate": 9.983816613843238e-06, "loss": 0.0028, "step": 37410 }, { "epoch": 0.24617935172332125, "grad_norm": 0.17385439221122823, "learning_rate": 9.983770427043513e-06, "loss": 0.0033, "step": 37420 }, { "epoch": 0.24624513989855462, "grad_norm": 0.045085397297902646, "learning_rate": 9.983724174537081e-06, "loss": 0.0027, "step": 37430 }, { "epoch": 0.24631092807378802, "grad_norm": 0.06040766976762317, "learning_rate": 9.983677856324549e-06, "loss": 0.0054, "step": 37440 }, { "epoch": 0.2463767162490214, "grad_norm": 0.1657856865959363, "learning_rate": 9.98363147240653e-06, "loss": 0.0026, "step": 37450 }, { "epoch": 0.24644250442425478, "grad_norm": 0.07691842075177079, "learning_rate": 9.983585022783632e-06, "loss": 0.0029, "step": 37460 }, { "epoch": 0.24650829259948817, "grad_norm": 0.28106568993037606, "learning_rate": 9.983538507456472e-06, "loss": 0.0039, "step": 37470 }, { "epoch": 0.24657408077472154, "grad_norm": 0.051897805100329285, "learning_rate": 9.98349192642566e-06, "loss": 0.0033, "step": 37480 }, { "epoch": 0.24663986894995493, "grad_norm": 0.11778172550429471, "learning_rate": 9.98344527969181e-06, "loss": 0.0037, "step": 37490 }, { "epoch": 0.24670565712518833, "grad_norm": 0.13074345982829916, "learning_rate": 9.983398567255538e-06, "loss": 0.0048, "step": 37500 }, { "epoch": 0.2467714453004217, "grad_norm": 0.05961756873175776, "learning_rate": 9.983351789117462e-06, "loss": 0.0012, "step": 37510 }, { "epoch": 0.2468372334756551, "grad_norm": 0.11095424989866058, "learning_rate": 9.983304945278194e-06, "loss": 0.0031, "step": 37520 }, { "epoch": 0.24690302165088848, "grad_norm": 0.13231191722851962, "learning_rate": 9.983258035738355e-06, "loss": 0.002, "step": 37530 }, { "epoch": 0.24696880982612185, "grad_norm": 0.045282694496902955, "learning_rate": 9.983211060498564e-06, "loss": 0.0037, "step": 37540 }, { "epoch": 0.24703459800135524, "grad_norm": 0.023964630992889188, "learning_rate": 9.983164019559437e-06, "loss": 0.0024, "step": 37550 }, { "epoch": 0.2471003861765886, "grad_norm": 0.1514109237905154, "learning_rate": 9.983116912921595e-06, "loss": 0.006, "step": 37560 }, { "epoch": 0.247166174351822, "grad_norm": 0.0908093774379389, "learning_rate": 9.983069740585662e-06, "loss": 0.0029, "step": 37570 }, { "epoch": 0.2472319625270554, "grad_norm": 0.2634262443413112, "learning_rate": 9.98302250255226e-06, "loss": 0.0046, "step": 37580 }, { "epoch": 0.24729775070228877, "grad_norm": 0.15481455232853966, "learning_rate": 9.982975198822007e-06, "loss": 0.0036, "step": 37590 }, { "epoch": 0.24736353887752216, "grad_norm": 0.059266830743223496, "learning_rate": 9.982927829395532e-06, "loss": 0.003, "step": 37600 }, { "epoch": 0.24742932705275553, "grad_norm": 0.11935191677457734, "learning_rate": 9.982880394273455e-06, "loss": 0.0033, "step": 37610 }, { "epoch": 0.24749511522798892, "grad_norm": 0.00895558633280853, "learning_rate": 9.982832893456405e-06, "loss": 0.0046, "step": 37620 }, { "epoch": 0.24756090340322232, "grad_norm": 0.1506161528589857, "learning_rate": 9.982785326945007e-06, "loss": 0.0023, "step": 37630 }, { "epoch": 0.24762669157845568, "grad_norm": 0.07950078974529372, "learning_rate": 9.982737694739886e-06, "loss": 0.0029, "step": 37640 }, { "epoch": 0.24769247975368908, "grad_norm": 0.45711959157224913, "learning_rate": 9.982689996841673e-06, "loss": 0.0035, "step": 37650 }, { "epoch": 0.24775826792892244, "grad_norm": 0.31789062127624823, "learning_rate": 9.982642233250995e-06, "loss": 0.0031, "step": 37660 }, { "epoch": 0.24782405610415584, "grad_norm": 0.09281144058618485, "learning_rate": 9.982594403968481e-06, "loss": 0.0029, "step": 37670 }, { "epoch": 0.24788984427938923, "grad_norm": 0.47861315054171094, "learning_rate": 9.982546508994767e-06, "loss": 0.0086, "step": 37680 }, { "epoch": 0.2479556324546226, "grad_norm": 0.03971809962691593, "learning_rate": 9.982498548330477e-06, "loss": 0.0029, "step": 37690 }, { "epoch": 0.248021420629856, "grad_norm": 0.27053758351842033, "learning_rate": 9.982450521976249e-06, "loss": 0.0057, "step": 37700 }, { "epoch": 0.24808720880508936, "grad_norm": 0.013915159685378646, "learning_rate": 9.982402429932714e-06, "loss": 0.0031, "step": 37710 }, { "epoch": 0.24815299698032275, "grad_norm": 0.4269763423647504, "learning_rate": 9.982354272200506e-06, "loss": 0.0034, "step": 37720 }, { "epoch": 0.24821878515555615, "grad_norm": 0.17964561575135474, "learning_rate": 9.982306048780259e-06, "loss": 0.0033, "step": 37730 }, { "epoch": 0.24828457333078952, "grad_norm": 0.11178807642090673, "learning_rate": 9.982257759672608e-06, "loss": 0.0034, "step": 37740 }, { "epoch": 0.2483503615060229, "grad_norm": 0.013961403409091196, "learning_rate": 9.982209404878195e-06, "loss": 0.0027, "step": 37750 }, { "epoch": 0.2484161496812563, "grad_norm": 0.10166371901625483, "learning_rate": 9.982160984397652e-06, "loss": 0.0035, "step": 37760 }, { "epoch": 0.24848193785648967, "grad_norm": 0.10609899064799862, "learning_rate": 9.98211249823162e-06, "loss": 0.0033, "step": 37770 }, { "epoch": 0.24854772603172307, "grad_norm": 0.1920841384983639, "learning_rate": 9.982063946380736e-06, "loss": 0.0031, "step": 37780 }, { "epoch": 0.24861351420695643, "grad_norm": 0.19861279513446004, "learning_rate": 9.982015328845644e-06, "loss": 0.0053, "step": 37790 }, { "epoch": 0.24867930238218983, "grad_norm": 0.03801952402018078, "learning_rate": 9.981966645626982e-06, "loss": 0.0027, "step": 37800 }, { "epoch": 0.24874509055742322, "grad_norm": 0.015878942671935795, "learning_rate": 9.98191789672539e-06, "loss": 0.0028, "step": 37810 }, { "epoch": 0.2488108787326566, "grad_norm": 0.08473295003829903, "learning_rate": 9.981869082141515e-06, "loss": 0.0027, "step": 37820 }, { "epoch": 0.24887666690788998, "grad_norm": 0.058494993993058506, "learning_rate": 9.981820201876e-06, "loss": 0.0042, "step": 37830 }, { "epoch": 0.24894245508312335, "grad_norm": 0.07827334054633846, "learning_rate": 9.981771255929486e-06, "loss": 0.0026, "step": 37840 }, { "epoch": 0.24900824325835674, "grad_norm": 0.0500272341715812, "learning_rate": 9.981722244302623e-06, "loss": 0.0031, "step": 37850 }, { "epoch": 0.24907403143359014, "grad_norm": 0.06031515819240158, "learning_rate": 9.981673166996053e-06, "loss": 0.0037, "step": 37860 }, { "epoch": 0.2491398196088235, "grad_norm": 0.11642822217264587, "learning_rate": 9.981624024010426e-06, "loss": 0.0029, "step": 37870 }, { "epoch": 0.2492056077840569, "grad_norm": 0.08352040584386568, "learning_rate": 9.981574815346388e-06, "loss": 0.002, "step": 37880 }, { "epoch": 0.24927139595929027, "grad_norm": 0.16254238473833105, "learning_rate": 9.981525541004588e-06, "loss": 0.0033, "step": 37890 }, { "epoch": 0.24933718413452366, "grad_norm": 0.03789378612023818, "learning_rate": 9.981476200985675e-06, "loss": 0.0016, "step": 37900 }, { "epoch": 0.24940297230975705, "grad_norm": 0.10309322432325475, "learning_rate": 9.981426795290303e-06, "loss": 0.003, "step": 37910 }, { "epoch": 0.24946876048499042, "grad_norm": 0.23167290151687092, "learning_rate": 9.981377323919119e-06, "loss": 0.0024, "step": 37920 }, { "epoch": 0.24953454866022381, "grad_norm": 0.03804309854030367, "learning_rate": 9.981327786872779e-06, "loss": 0.0021, "step": 37930 }, { "epoch": 0.24960033683545718, "grad_norm": 0.0194941352818865, "learning_rate": 9.981278184151932e-06, "loss": 0.0051, "step": 37940 }, { "epoch": 0.24966612501069058, "grad_norm": 0.18911592945495231, "learning_rate": 9.981228515757237e-06, "loss": 0.0041, "step": 37950 }, { "epoch": 0.24973191318592397, "grad_norm": 0.04220362868994979, "learning_rate": 9.981178781689344e-06, "loss": 0.0029, "step": 37960 }, { "epoch": 0.24979770136115734, "grad_norm": 0.7673568781072411, "learning_rate": 9.98112898194891e-06, "loss": 0.0021, "step": 37970 }, { "epoch": 0.24986348953639073, "grad_norm": 0.38739701490138223, "learning_rate": 9.981079116536596e-06, "loss": 0.003, "step": 37980 }, { "epoch": 0.24992927771162413, "grad_norm": 0.24515736824332196, "learning_rate": 9.981029185453053e-06, "loss": 0.0034, "step": 37990 }, { "epoch": 0.2499950658868575, "grad_norm": 0.0674724969887214, "learning_rate": 9.980979188698943e-06, "loss": 0.0041, "step": 38000 }, { "epoch": 0.2500608540620909, "grad_norm": 0.16088032279583697, "learning_rate": 9.980929126274924e-06, "loss": 0.0042, "step": 38010 }, { "epoch": 0.2501266422373243, "grad_norm": 0.08981274331147553, "learning_rate": 9.980878998181656e-06, "loss": 0.0026, "step": 38020 }, { "epoch": 0.2501924304125576, "grad_norm": 0.026215507860582136, "learning_rate": 9.9808288044198e-06, "loss": 0.0022, "step": 38030 }, { "epoch": 0.250258218587791, "grad_norm": 0.1196748582326055, "learning_rate": 9.980778544990018e-06, "loss": 0.0027, "step": 38040 }, { "epoch": 0.2503240067630244, "grad_norm": 0.22601190711064778, "learning_rate": 9.980728219892973e-06, "loss": 0.0032, "step": 38050 }, { "epoch": 0.2503897949382578, "grad_norm": 0.1936267344178936, "learning_rate": 9.980677829129328e-06, "loss": 0.0032, "step": 38060 }, { "epoch": 0.2504555831134912, "grad_norm": 0.12112802069141917, "learning_rate": 9.980627372699748e-06, "loss": 0.0018, "step": 38070 }, { "epoch": 0.25052137128872454, "grad_norm": 0.044550292267955934, "learning_rate": 9.980576850604897e-06, "loss": 0.003, "step": 38080 }, { "epoch": 0.25058715946395793, "grad_norm": 0.25115906715116104, "learning_rate": 9.980526262845442e-06, "loss": 0.0036, "step": 38090 }, { "epoch": 0.2506529476391913, "grad_norm": 0.059469458258143176, "learning_rate": 9.980475609422049e-06, "loss": 0.0033, "step": 38100 }, { "epoch": 0.2507187358144247, "grad_norm": 0.049352528278550765, "learning_rate": 9.980424890335387e-06, "loss": 0.004, "step": 38110 }, { "epoch": 0.2507845239896581, "grad_norm": 0.18477015136732258, "learning_rate": 9.980374105586124e-06, "loss": 0.003, "step": 38120 }, { "epoch": 0.2508503121648915, "grad_norm": 0.06801059493593462, "learning_rate": 9.98032325517493e-06, "loss": 0.0042, "step": 38130 }, { "epoch": 0.25091610034012485, "grad_norm": 0.20452299323474918, "learning_rate": 9.980272339102475e-06, "loss": 0.0033, "step": 38140 }, { "epoch": 0.25098188851535824, "grad_norm": 0.13534035032169411, "learning_rate": 9.98022135736943e-06, "loss": 0.0043, "step": 38150 }, { "epoch": 0.25104767669059164, "grad_norm": 0.08197541375723981, "learning_rate": 9.980170309976468e-06, "loss": 0.0027, "step": 38160 }, { "epoch": 0.25111346486582503, "grad_norm": 0.07722846210661016, "learning_rate": 9.980119196924262e-06, "loss": 0.0044, "step": 38170 }, { "epoch": 0.2511792530410584, "grad_norm": 0.2509013508467335, "learning_rate": 9.980068018213486e-06, "loss": 0.0035, "step": 38180 }, { "epoch": 0.25124504121629176, "grad_norm": 0.047392801665344604, "learning_rate": 9.980016773844813e-06, "loss": 0.0021, "step": 38190 }, { "epoch": 0.25131082939152516, "grad_norm": 0.06904672292409166, "learning_rate": 9.97996546381892e-06, "loss": 0.0021, "step": 38200 }, { "epoch": 0.25137661756675855, "grad_norm": 0.1424338628011655, "learning_rate": 9.979914088136484e-06, "loss": 0.0045, "step": 38210 }, { "epoch": 0.25144240574199195, "grad_norm": 0.07856777504236699, "learning_rate": 9.979862646798182e-06, "loss": 0.002, "step": 38220 }, { "epoch": 0.25150819391722534, "grad_norm": 0.05894722698550017, "learning_rate": 9.979811139804691e-06, "loss": 0.0026, "step": 38230 }, { "epoch": 0.2515739820924587, "grad_norm": 0.08801896526042882, "learning_rate": 9.97975956715669e-06, "loss": 0.0028, "step": 38240 }, { "epoch": 0.2516397702676921, "grad_norm": 0.21526840487340526, "learning_rate": 9.979707928854862e-06, "loss": 0.006, "step": 38250 }, { "epoch": 0.25170555844292547, "grad_norm": 0.1493420163034689, "learning_rate": 9.979656224899885e-06, "loss": 0.003, "step": 38260 }, { "epoch": 0.25177134661815886, "grad_norm": 0.1231964665757082, "learning_rate": 9.979604455292441e-06, "loss": 0.0051, "step": 38270 }, { "epoch": 0.25183713479339226, "grad_norm": 0.07731229203916824, "learning_rate": 9.979552620033212e-06, "loss": 0.0025, "step": 38280 }, { "epoch": 0.2519029229686256, "grad_norm": 0.005301810442928834, "learning_rate": 9.979500719122885e-06, "loss": 0.0022, "step": 38290 }, { "epoch": 0.251968711143859, "grad_norm": 0.14745355233845128, "learning_rate": 9.97944875256214e-06, "loss": 0.0031, "step": 38300 }, { "epoch": 0.2520344993190924, "grad_norm": 0.13181990125763773, "learning_rate": 9.979396720351664e-06, "loss": 0.003, "step": 38310 }, { "epoch": 0.2521002874943258, "grad_norm": 0.03987634289671358, "learning_rate": 9.979344622492141e-06, "loss": 0.0028, "step": 38320 }, { "epoch": 0.2521660756695592, "grad_norm": 0.18108254596535456, "learning_rate": 9.979292458984261e-06, "loss": 0.0042, "step": 38330 }, { "epoch": 0.2522318638447925, "grad_norm": 0.08582384827614266, "learning_rate": 9.979240229828711e-06, "loss": 0.0028, "step": 38340 }, { "epoch": 0.2522976520200259, "grad_norm": 0.08783597232045245, "learning_rate": 9.97918793502618e-06, "loss": 0.0021, "step": 38350 }, { "epoch": 0.2523634401952593, "grad_norm": 0.013372687933429961, "learning_rate": 9.979135574577354e-06, "loss": 0.001, "step": 38360 }, { "epoch": 0.2524292283704927, "grad_norm": 0.08108241457070431, "learning_rate": 9.979083148482927e-06, "loss": 0.0018, "step": 38370 }, { "epoch": 0.2524950165457261, "grad_norm": 0.062271430247031946, "learning_rate": 9.979030656743588e-06, "loss": 0.0041, "step": 38380 }, { "epoch": 0.25256080472095943, "grad_norm": 0.08823981599904004, "learning_rate": 9.978978099360031e-06, "loss": 0.0046, "step": 38390 }, { "epoch": 0.2526265928961928, "grad_norm": 0.04023236374003602, "learning_rate": 9.978925476332946e-06, "loss": 0.0033, "step": 38400 }, { "epoch": 0.2526923810714262, "grad_norm": 0.11020393963783824, "learning_rate": 9.97887278766303e-06, "loss": 0.0028, "step": 38410 }, { "epoch": 0.2527581692466596, "grad_norm": 0.12757374962596743, "learning_rate": 9.978820033350977e-06, "loss": 0.0028, "step": 38420 }, { "epoch": 0.252823957421893, "grad_norm": 0.18058152067335242, "learning_rate": 9.97876721339748e-06, "loss": 0.0034, "step": 38430 }, { "epoch": 0.25288974559712635, "grad_norm": 0.13060152273850117, "learning_rate": 9.978714327803238e-06, "loss": 0.0018, "step": 38440 }, { "epoch": 0.25295553377235974, "grad_norm": 0.38375779606639443, "learning_rate": 9.978661376568949e-06, "loss": 0.005, "step": 38450 }, { "epoch": 0.25302132194759314, "grad_norm": 0.1304667108257034, "learning_rate": 9.978608359695307e-06, "loss": 0.0021, "step": 38460 }, { "epoch": 0.25308711012282653, "grad_norm": 0.2132441950227639, "learning_rate": 9.978555277183016e-06, "loss": 0.0031, "step": 38470 }, { "epoch": 0.2531528982980599, "grad_norm": 0.047592113011511825, "learning_rate": 9.978502129032772e-06, "loss": 0.0027, "step": 38480 }, { "epoch": 0.25321868647329326, "grad_norm": 0.43770400919635877, "learning_rate": 9.978448915245278e-06, "loss": 0.0038, "step": 38490 }, { "epoch": 0.25328447464852666, "grad_norm": 0.2976824553834202, "learning_rate": 9.978395635821234e-06, "loss": 0.0044, "step": 38500 }, { "epoch": 0.25335026282376005, "grad_norm": 0.13572018780277756, "learning_rate": 9.978342290761344e-06, "loss": 0.0028, "step": 38510 }, { "epoch": 0.25341605099899345, "grad_norm": 0.07247799874559817, "learning_rate": 9.97828888006631e-06, "loss": 0.0056, "step": 38520 }, { "epoch": 0.25348183917422684, "grad_norm": 0.08228883399589561, "learning_rate": 9.978235403736838e-06, "loss": 0.002, "step": 38530 }, { "epoch": 0.2535476273494602, "grad_norm": 0.09194315090892591, "learning_rate": 9.978181861773628e-06, "loss": 0.002, "step": 38540 }, { "epoch": 0.2536134155246936, "grad_norm": 0.046650549607662066, "learning_rate": 9.978128254177393e-06, "loss": 0.0024, "step": 38550 }, { "epoch": 0.25367920369992697, "grad_norm": 0.10346417799907817, "learning_rate": 9.978074580948835e-06, "loss": 0.0022, "step": 38560 }, { "epoch": 0.25374499187516036, "grad_norm": 0.3720662134253889, "learning_rate": 9.978020842088663e-06, "loss": 0.0035, "step": 38570 }, { "epoch": 0.25381078005039376, "grad_norm": 0.06476562958370391, "learning_rate": 9.977967037597587e-06, "loss": 0.0032, "step": 38580 }, { "epoch": 0.25387656822562715, "grad_norm": 0.04844642764325752, "learning_rate": 9.977913167476312e-06, "loss": 0.0039, "step": 38590 }, { "epoch": 0.2539423564008605, "grad_norm": 0.0862856521823372, "learning_rate": 9.977859231725552e-06, "loss": 0.0024, "step": 38600 }, { "epoch": 0.2540081445760939, "grad_norm": 0.04479095041889788, "learning_rate": 9.977805230346019e-06, "loss": 0.0029, "step": 38610 }, { "epoch": 0.2540739327513273, "grad_norm": 0.13936954793208228, "learning_rate": 9.977751163338422e-06, "loss": 0.0033, "step": 38620 }, { "epoch": 0.2541397209265607, "grad_norm": 0.09661325726982942, "learning_rate": 9.977697030703475e-06, "loss": 0.0032, "step": 38630 }, { "epoch": 0.25420550910179407, "grad_norm": 0.06223438930409326, "learning_rate": 9.977642832441892e-06, "loss": 0.0059, "step": 38640 }, { "epoch": 0.2542712972770274, "grad_norm": 0.03828548129489393, "learning_rate": 9.977588568554386e-06, "loss": 0.0027, "step": 38650 }, { "epoch": 0.2543370854522608, "grad_norm": 0.08007414098435982, "learning_rate": 9.977534239041675e-06, "loss": 0.0023, "step": 38660 }, { "epoch": 0.2544028736274942, "grad_norm": 0.06759583323239599, "learning_rate": 9.977479843904473e-06, "loss": 0.0095, "step": 38670 }, { "epoch": 0.2544686618027276, "grad_norm": 0.23913130460270315, "learning_rate": 9.977425383143499e-06, "loss": 0.0054, "step": 38680 }, { "epoch": 0.254534449977961, "grad_norm": 0.09057747773513114, "learning_rate": 9.977370856759469e-06, "loss": 0.0022, "step": 38690 }, { "epoch": 0.2546002381531943, "grad_norm": 0.06400882784714358, "learning_rate": 9.977316264753104e-06, "loss": 0.0024, "step": 38700 }, { "epoch": 0.2546660263284277, "grad_norm": 0.28899690737340916, "learning_rate": 9.977261607125124e-06, "loss": 0.0029, "step": 38710 }, { "epoch": 0.2547318145036611, "grad_norm": 0.15833487426215423, "learning_rate": 9.977206883876246e-06, "loss": 0.0034, "step": 38720 }, { "epoch": 0.2547976026788945, "grad_norm": 0.24061378281602316, "learning_rate": 9.977152095007195e-06, "loss": 0.0051, "step": 38730 }, { "epoch": 0.2548633908541279, "grad_norm": 0.07931502274485792, "learning_rate": 9.977097240518692e-06, "loss": 0.0022, "step": 38740 }, { "epoch": 0.25492917902936124, "grad_norm": 0.06289692417375993, "learning_rate": 9.97704232041146e-06, "loss": 0.003, "step": 38750 }, { "epoch": 0.25499496720459464, "grad_norm": 0.20562159144371278, "learning_rate": 9.976987334686223e-06, "loss": 0.004, "step": 38760 }, { "epoch": 0.25506075537982803, "grad_norm": 0.10849492944429133, "learning_rate": 9.976932283343707e-06, "loss": 0.0038, "step": 38770 }, { "epoch": 0.2551265435550614, "grad_norm": 0.023265681980783232, "learning_rate": 9.976877166384638e-06, "loss": 0.0026, "step": 38780 }, { "epoch": 0.2551923317302948, "grad_norm": 0.11153609741677076, "learning_rate": 9.976821983809742e-06, "loss": 0.0028, "step": 38790 }, { "epoch": 0.25525811990552816, "grad_norm": 0.11307961662120886, "learning_rate": 9.976766735619745e-06, "loss": 0.0035, "step": 38800 }, { "epoch": 0.25532390808076155, "grad_norm": 0.10841102506350714, "learning_rate": 9.976711421815376e-06, "loss": 0.003, "step": 38810 }, { "epoch": 0.25538969625599495, "grad_norm": 0.06536973791550273, "learning_rate": 9.976656042397367e-06, "loss": 0.0044, "step": 38820 }, { "epoch": 0.25545548443122834, "grad_norm": 0.29574512584451734, "learning_rate": 9.976600597366446e-06, "loss": 0.0046, "step": 38830 }, { "epoch": 0.25552127260646174, "grad_norm": 0.08072548888564525, "learning_rate": 9.976545086723345e-06, "loss": 0.0024, "step": 38840 }, { "epoch": 0.2555870607816951, "grad_norm": 0.20324437783124188, "learning_rate": 9.976489510468792e-06, "loss": 0.0039, "step": 38850 }, { "epoch": 0.25565284895692847, "grad_norm": 0.10404037834654374, "learning_rate": 9.976433868603526e-06, "loss": 0.0043, "step": 38860 }, { "epoch": 0.25571863713216186, "grad_norm": 0.04027110355257957, "learning_rate": 9.976378161128275e-06, "loss": 0.0049, "step": 38870 }, { "epoch": 0.25578442530739526, "grad_norm": 0.024609260905638475, "learning_rate": 9.976322388043777e-06, "loss": 0.0029, "step": 38880 }, { "epoch": 0.25585021348262865, "grad_norm": 0.06423514599450708, "learning_rate": 9.976266549350767e-06, "loss": 0.004, "step": 38890 }, { "epoch": 0.255916001657862, "grad_norm": 0.1095723282874086, "learning_rate": 9.976210645049977e-06, "loss": 0.0031, "step": 38900 }, { "epoch": 0.2559817898330954, "grad_norm": 0.1774185872657039, "learning_rate": 9.976154675142151e-06, "loss": 0.0044, "step": 38910 }, { "epoch": 0.2560475780083288, "grad_norm": 0.13736509193520846, "learning_rate": 9.976098639628023e-06, "loss": 0.0041, "step": 38920 }, { "epoch": 0.2561133661835622, "grad_norm": 0.14642977093191742, "learning_rate": 9.97604253850833e-06, "loss": 0.0025, "step": 38930 }, { "epoch": 0.25617915435879557, "grad_norm": 0.10332859165123794, "learning_rate": 9.975986371783815e-06, "loss": 0.0033, "step": 38940 }, { "epoch": 0.2562449425340289, "grad_norm": 0.20561377388738128, "learning_rate": 9.975930139455217e-06, "loss": 0.0031, "step": 38950 }, { "epoch": 0.2563107307092623, "grad_norm": 0.10800511516927956, "learning_rate": 9.975873841523277e-06, "loss": 0.0032, "step": 38960 }, { "epoch": 0.2563765188844957, "grad_norm": 0.12438759858567722, "learning_rate": 9.975817477988739e-06, "loss": 0.0043, "step": 38970 }, { "epoch": 0.2564423070597291, "grad_norm": 0.058454653022898956, "learning_rate": 9.975761048852344e-06, "loss": 0.0029, "step": 38980 }, { "epoch": 0.2565080952349625, "grad_norm": 0.06095575058975379, "learning_rate": 9.975704554114838e-06, "loss": 0.0036, "step": 38990 }, { "epoch": 0.2565738834101958, "grad_norm": 0.1056998506210758, "learning_rate": 9.975647993776965e-06, "loss": 0.0039, "step": 39000 }, { "epoch": 0.2566396715854292, "grad_norm": 0.13830823307953333, "learning_rate": 9.975591367839468e-06, "loss": 0.0029, "step": 39010 }, { "epoch": 0.2567054597606626, "grad_norm": 0.2951688138978125, "learning_rate": 9.975534676303098e-06, "loss": 0.0027, "step": 39020 }, { "epoch": 0.256771247935896, "grad_norm": 0.12351802172284664, "learning_rate": 9.9754779191686e-06, "loss": 0.0024, "step": 39030 }, { "epoch": 0.2568370361111294, "grad_norm": 0.048248979132114914, "learning_rate": 9.975421096436722e-06, "loss": 0.0024, "step": 39040 }, { "epoch": 0.2569028242863628, "grad_norm": 0.09610287843127643, "learning_rate": 9.975364208108215e-06, "loss": 0.0029, "step": 39050 }, { "epoch": 0.25696861246159614, "grad_norm": 0.10665407903445638, "learning_rate": 9.975307254183828e-06, "loss": 0.0028, "step": 39060 }, { "epoch": 0.25703440063682953, "grad_norm": 0.19672237243189533, "learning_rate": 9.97525023466431e-06, "loss": 0.0031, "step": 39070 }, { "epoch": 0.2571001888120629, "grad_norm": 0.15118524898714628, "learning_rate": 9.975193149550418e-06, "loss": 0.0021, "step": 39080 }, { "epoch": 0.2571659769872963, "grad_norm": 0.06128689788159105, "learning_rate": 9.9751359988429e-06, "loss": 0.0036, "step": 39090 }, { "epoch": 0.2572317651625297, "grad_norm": 0.029668103696545274, "learning_rate": 9.975078782542508e-06, "loss": 0.0019, "step": 39100 }, { "epoch": 0.25729755333776305, "grad_norm": 0.29559671247065483, "learning_rate": 9.975021500650002e-06, "loss": 0.0036, "step": 39110 }, { "epoch": 0.25736334151299645, "grad_norm": 0.1978114743850032, "learning_rate": 9.974964153166132e-06, "loss": 0.0049, "step": 39120 }, { "epoch": 0.25742912968822984, "grad_norm": 0.11626797851051175, "learning_rate": 9.974906740091658e-06, "loss": 0.0042, "step": 39130 }, { "epoch": 0.25749491786346324, "grad_norm": 0.0157357126372376, "learning_rate": 9.974849261427335e-06, "loss": 0.0046, "step": 39140 }, { "epoch": 0.25756070603869663, "grad_norm": 0.17835738644023563, "learning_rate": 9.97479171717392e-06, "loss": 0.0022, "step": 39150 }, { "epoch": 0.25762649421392997, "grad_norm": 0.06181456535278527, "learning_rate": 9.974734107332174e-06, "loss": 0.0029, "step": 39160 }, { "epoch": 0.25769228238916336, "grad_norm": 0.14813091098453463, "learning_rate": 9.974676431902854e-06, "loss": 0.0031, "step": 39170 }, { "epoch": 0.25775807056439676, "grad_norm": 0.06941676033887317, "learning_rate": 9.974618690886722e-06, "loss": 0.0041, "step": 39180 }, { "epoch": 0.25782385873963015, "grad_norm": 0.10076756995593354, "learning_rate": 9.974560884284538e-06, "loss": 0.0036, "step": 39190 }, { "epoch": 0.25788964691486355, "grad_norm": 0.06881006362186019, "learning_rate": 9.974503012097068e-06, "loss": 0.0023, "step": 39200 }, { "epoch": 0.2579554350900969, "grad_norm": 0.17337530950510685, "learning_rate": 9.97444507432507e-06, "loss": 0.0047, "step": 39210 }, { "epoch": 0.2580212232653303, "grad_norm": 0.2437559391917406, "learning_rate": 9.97438707096931e-06, "loss": 0.004, "step": 39220 }, { "epoch": 0.2580870114405637, "grad_norm": 0.09599121186336237, "learning_rate": 9.974329002030553e-06, "loss": 0.0021, "step": 39230 }, { "epoch": 0.25815279961579707, "grad_norm": 0.13534725509708223, "learning_rate": 9.974270867509564e-06, "loss": 0.0031, "step": 39240 }, { "epoch": 0.25821858779103046, "grad_norm": 0.021839134980047508, "learning_rate": 9.97421266740711e-06, "loss": 0.0026, "step": 39250 }, { "epoch": 0.2582843759662638, "grad_norm": 0.07808117691822382, "learning_rate": 9.974154401723957e-06, "loss": 0.0017, "step": 39260 }, { "epoch": 0.2583501641414972, "grad_norm": 0.36374732432150386, "learning_rate": 9.974096070460875e-06, "loss": 0.0069, "step": 39270 }, { "epoch": 0.2584159523167306, "grad_norm": 0.22829313228998868, "learning_rate": 9.974037673618633e-06, "loss": 0.0031, "step": 39280 }, { "epoch": 0.258481740491964, "grad_norm": 0.2976288288229201, "learning_rate": 9.973979211197998e-06, "loss": 0.0035, "step": 39290 }, { "epoch": 0.2585475286671974, "grad_norm": 0.016202069944955922, "learning_rate": 9.973920683199744e-06, "loss": 0.0018, "step": 39300 }, { "epoch": 0.2586133168424307, "grad_norm": 0.027676275831929203, "learning_rate": 9.973862089624641e-06, "loss": 0.0027, "step": 39310 }, { "epoch": 0.2586791050176641, "grad_norm": 0.061149947967161965, "learning_rate": 9.973803430473464e-06, "loss": 0.0085, "step": 39320 }, { "epoch": 0.2587448931928975, "grad_norm": 0.10648624925968016, "learning_rate": 9.973744705746984e-06, "loss": 0.0026, "step": 39330 }, { "epoch": 0.2588106813681309, "grad_norm": 0.0704172584058573, "learning_rate": 9.973685915445974e-06, "loss": 0.0039, "step": 39340 }, { "epoch": 0.2588764695433643, "grad_norm": 0.1386531667104651, "learning_rate": 9.973627059571212e-06, "loss": 0.0033, "step": 39350 }, { "epoch": 0.25894225771859763, "grad_norm": 0.11763124322283425, "learning_rate": 9.973568138123474e-06, "loss": 0.0018, "step": 39360 }, { "epoch": 0.25900804589383103, "grad_norm": 0.11477771546232841, "learning_rate": 9.973509151103532e-06, "loss": 0.0023, "step": 39370 }, { "epoch": 0.2590738340690644, "grad_norm": 0.15405815197912942, "learning_rate": 9.97345009851217e-06, "loss": 0.0028, "step": 39380 }, { "epoch": 0.2591396222442978, "grad_norm": 0.13903560199007967, "learning_rate": 9.973390980350161e-06, "loss": 0.0025, "step": 39390 }, { "epoch": 0.2592054104195312, "grad_norm": 0.07093035709431479, "learning_rate": 9.973331796618291e-06, "loss": 0.0026, "step": 39400 }, { "epoch": 0.25927119859476455, "grad_norm": 0.016179090639245372, "learning_rate": 9.973272547317334e-06, "loss": 0.0016, "step": 39410 }, { "epoch": 0.25933698676999795, "grad_norm": 0.03973676081368713, "learning_rate": 9.973213232448074e-06, "loss": 0.0028, "step": 39420 }, { "epoch": 0.25940277494523134, "grad_norm": 0.05252351526999887, "learning_rate": 9.973153852011293e-06, "loss": 0.0043, "step": 39430 }, { "epoch": 0.25946856312046473, "grad_norm": 0.22641103903848064, "learning_rate": 9.973094406007774e-06, "loss": 0.0075, "step": 39440 }, { "epoch": 0.25953435129569813, "grad_norm": 0.08563440477379954, "learning_rate": 9.9730348944383e-06, "loss": 0.0032, "step": 39450 }, { "epoch": 0.25960013947093147, "grad_norm": 0.09774929782337537, "learning_rate": 9.972975317303656e-06, "loss": 0.0034, "step": 39460 }, { "epoch": 0.25966592764616486, "grad_norm": 0.23645235391975616, "learning_rate": 9.972915674604627e-06, "loss": 0.0048, "step": 39470 }, { "epoch": 0.25973171582139826, "grad_norm": 0.03290000422158966, "learning_rate": 9.972855966342e-06, "loss": 0.0037, "step": 39480 }, { "epoch": 0.25979750399663165, "grad_norm": 0.11927978965880938, "learning_rate": 9.97279619251656e-06, "loss": 0.0039, "step": 39490 }, { "epoch": 0.25986329217186505, "grad_norm": 0.1745501513805007, "learning_rate": 9.9727363531291e-06, "loss": 0.0038, "step": 39500 }, { "epoch": 0.25992908034709844, "grad_norm": 0.22522561926184456, "learning_rate": 9.972676448180406e-06, "loss": 0.0025, "step": 39510 }, { "epoch": 0.2599948685223318, "grad_norm": 0.08117630955883678, "learning_rate": 9.972616477671265e-06, "loss": 0.0025, "step": 39520 }, { "epoch": 0.2600606566975652, "grad_norm": 0.005986030392000845, "learning_rate": 9.972556441602472e-06, "loss": 0.005, "step": 39530 }, { "epoch": 0.26012644487279857, "grad_norm": 0.115611538566198, "learning_rate": 9.972496339974817e-06, "loss": 0.0018, "step": 39540 }, { "epoch": 0.26019223304803196, "grad_norm": 0.0009198730569543475, "learning_rate": 9.972436172789092e-06, "loss": 0.0029, "step": 39550 }, { "epoch": 0.26025802122326536, "grad_norm": 0.07439843215869256, "learning_rate": 9.97237594004609e-06, "loss": 0.0024, "step": 39560 }, { "epoch": 0.2603238093984987, "grad_norm": 0.1843016647643666, "learning_rate": 9.972315641746607e-06, "loss": 0.0039, "step": 39570 }, { "epoch": 0.2603895975737321, "grad_norm": 0.11053201966172929, "learning_rate": 9.972255277891437e-06, "loss": 0.0032, "step": 39580 }, { "epoch": 0.2604553857489655, "grad_norm": 0.08770053493589049, "learning_rate": 9.972194848481373e-06, "loss": 0.0018, "step": 39590 }, { "epoch": 0.2605211739241989, "grad_norm": 0.1107732840412449, "learning_rate": 9.972134353517215e-06, "loss": 0.0037, "step": 39600 }, { "epoch": 0.2605869620994323, "grad_norm": 0.013319148258014921, "learning_rate": 9.972073792999762e-06, "loss": 0.002, "step": 39610 }, { "epoch": 0.2606527502746656, "grad_norm": 0.18161634144893402, "learning_rate": 9.972013166929807e-06, "loss": 0.0029, "step": 39620 }, { "epoch": 0.260718538449899, "grad_norm": 0.15437105676385804, "learning_rate": 9.971952475308153e-06, "loss": 0.0035, "step": 39630 }, { "epoch": 0.2607843266251324, "grad_norm": 0.11574547364397925, "learning_rate": 9.971891718135602e-06, "loss": 0.0041, "step": 39640 }, { "epoch": 0.2608501148003658, "grad_norm": 0.07505285408360912, "learning_rate": 9.97183089541295e-06, "loss": 0.003, "step": 39650 }, { "epoch": 0.2609159029755992, "grad_norm": 0.0962952454258424, "learning_rate": 9.971770007141004e-06, "loss": 0.0032, "step": 39660 }, { "epoch": 0.26098169115083253, "grad_norm": 0.2586132683186554, "learning_rate": 9.971709053320562e-06, "loss": 0.0037, "step": 39670 }, { "epoch": 0.2610474793260659, "grad_norm": 0.390058080988262, "learning_rate": 9.971648033952432e-06, "loss": 0.0062, "step": 39680 }, { "epoch": 0.2611132675012993, "grad_norm": 0.11102533015090581, "learning_rate": 9.971586949037416e-06, "loss": 0.0046, "step": 39690 }, { "epoch": 0.2611790556765327, "grad_norm": 0.14860849515154406, "learning_rate": 9.971525798576321e-06, "loss": 0.0025, "step": 39700 }, { "epoch": 0.2612448438517661, "grad_norm": 0.09801140203519577, "learning_rate": 9.97146458256995e-06, "loss": 0.004, "step": 39710 }, { "epoch": 0.26131063202699945, "grad_norm": 0.03065730898591501, "learning_rate": 9.971403301019112e-06, "loss": 0.0015, "step": 39720 }, { "epoch": 0.26137642020223284, "grad_norm": 0.1916057050841179, "learning_rate": 9.971341953924617e-06, "loss": 0.0023, "step": 39730 }, { "epoch": 0.26144220837746623, "grad_norm": 0.10469741057598068, "learning_rate": 9.97128054128727e-06, "loss": 0.0033, "step": 39740 }, { "epoch": 0.26150799655269963, "grad_norm": 0.045204833006017575, "learning_rate": 9.971219063107882e-06, "loss": 0.0032, "step": 39750 }, { "epoch": 0.261573784727933, "grad_norm": 0.018636265613005742, "learning_rate": 9.971157519387267e-06, "loss": 0.0021, "step": 39760 }, { "epoch": 0.26163957290316636, "grad_norm": 0.16130681733286462, "learning_rate": 9.971095910126232e-06, "loss": 0.0037, "step": 39770 }, { "epoch": 0.26170536107839976, "grad_norm": 0.03786822214389841, "learning_rate": 9.97103423532559e-06, "loss": 0.0033, "step": 39780 }, { "epoch": 0.26177114925363315, "grad_norm": 0.01600255562757636, "learning_rate": 9.970972494986156e-06, "loss": 0.0045, "step": 39790 }, { "epoch": 0.26183693742886655, "grad_norm": 0.04633269287368749, "learning_rate": 9.970910689108742e-06, "loss": 0.0055, "step": 39800 }, { "epoch": 0.26190272560409994, "grad_norm": 0.05544766454752513, "learning_rate": 9.970848817694163e-06, "loss": 0.0019, "step": 39810 }, { "epoch": 0.2619685137793333, "grad_norm": 0.11246721543032687, "learning_rate": 9.970786880743237e-06, "loss": 0.0026, "step": 39820 }, { "epoch": 0.2620343019545667, "grad_norm": 0.04382147384221786, "learning_rate": 9.970724878256777e-06, "loss": 0.0034, "step": 39830 }, { "epoch": 0.26210009012980007, "grad_norm": 0.12864147209473195, "learning_rate": 9.970662810235605e-06, "loss": 0.0039, "step": 39840 }, { "epoch": 0.26216587830503346, "grad_norm": 0.04875553866673152, "learning_rate": 9.970600676680534e-06, "loss": 0.0029, "step": 39850 }, { "epoch": 0.26223166648026686, "grad_norm": 0.3229138231453195, "learning_rate": 9.970538477592388e-06, "loss": 0.0046, "step": 39860 }, { "epoch": 0.2622974546555002, "grad_norm": 0.04333675960249176, "learning_rate": 9.970476212971986e-06, "loss": 0.0036, "step": 39870 }, { "epoch": 0.2623632428307336, "grad_norm": 0.370751358094883, "learning_rate": 9.970413882820146e-06, "loss": 0.0029, "step": 39880 }, { "epoch": 0.262429031005967, "grad_norm": 0.06002988461001218, "learning_rate": 9.970351487137692e-06, "loss": 0.0043, "step": 39890 }, { "epoch": 0.2624948191812004, "grad_norm": 0.21752412829465093, "learning_rate": 9.970289025925447e-06, "loss": 0.0051, "step": 39900 }, { "epoch": 0.2625606073564338, "grad_norm": 0.07408854346436768, "learning_rate": 9.970226499184235e-06, "loss": 0.0028, "step": 39910 }, { "epoch": 0.2626263955316671, "grad_norm": 0.12860043476040334, "learning_rate": 9.970163906914875e-06, "loss": 0.0033, "step": 39920 }, { "epoch": 0.2626921837069005, "grad_norm": 0.11738095435417191, "learning_rate": 9.970101249118201e-06, "loss": 0.0029, "step": 39930 }, { "epoch": 0.2627579718821339, "grad_norm": 0.16565601549294953, "learning_rate": 9.970038525795031e-06, "loss": 0.0025, "step": 39940 }, { "epoch": 0.2628237600573673, "grad_norm": 0.20315966913195327, "learning_rate": 9.969975736946199e-06, "loss": 0.0039, "step": 39950 }, { "epoch": 0.2628895482326007, "grad_norm": 0.1663690259912224, "learning_rate": 9.969912882572526e-06, "loss": 0.0034, "step": 39960 }, { "epoch": 0.26295533640783403, "grad_norm": 0.052097842311170706, "learning_rate": 9.969849962674845e-06, "loss": 0.0031, "step": 39970 }, { "epoch": 0.2630211245830674, "grad_norm": 0.10145094261130701, "learning_rate": 9.969786977253986e-06, "loss": 0.0025, "step": 39980 }, { "epoch": 0.2630869127583008, "grad_norm": 0.1045223388858077, "learning_rate": 9.969723926310774e-06, "loss": 0.0048, "step": 39990 }, { "epoch": 0.2631527009335342, "grad_norm": 0.27642669728509445, "learning_rate": 9.969660809846047e-06, "loss": 0.0026, "step": 40000 }, { "epoch": 0.2632184891087676, "grad_norm": 0.20425779905166486, "learning_rate": 9.969597627860635e-06, "loss": 0.0027, "step": 40010 }, { "epoch": 0.263284277284001, "grad_norm": 0.17134910247728546, "learning_rate": 9.96953438035537e-06, "loss": 0.0022, "step": 40020 }, { "epoch": 0.26335006545923434, "grad_norm": 0.018327324959748374, "learning_rate": 9.969471067331085e-06, "loss": 0.0064, "step": 40030 }, { "epoch": 0.26341585363446773, "grad_norm": 0.02468777153240658, "learning_rate": 9.969407688788616e-06, "loss": 0.0015, "step": 40040 }, { "epoch": 0.26348164180970113, "grad_norm": 0.015599056745739198, "learning_rate": 9.9693442447288e-06, "loss": 0.0035, "step": 40050 }, { "epoch": 0.2635474299849345, "grad_norm": 0.28122271531689025, "learning_rate": 9.96928073515247e-06, "loss": 0.0038, "step": 40060 }, { "epoch": 0.2636132181601679, "grad_norm": 0.03877045156852154, "learning_rate": 9.969217160060466e-06, "loss": 0.0026, "step": 40070 }, { "epoch": 0.26367900633540126, "grad_norm": 0.11214731209791443, "learning_rate": 9.969153519453625e-06, "loss": 0.0026, "step": 40080 }, { "epoch": 0.26374479451063465, "grad_norm": 0.17691507759420302, "learning_rate": 9.969089813332787e-06, "loss": 0.0029, "step": 40090 }, { "epoch": 0.26381058268586804, "grad_norm": 0.12817671895788613, "learning_rate": 9.969026041698792e-06, "loss": 0.003, "step": 40100 }, { "epoch": 0.26387637086110144, "grad_norm": 0.3231690865940804, "learning_rate": 9.968962204552478e-06, "loss": 0.0096, "step": 40110 }, { "epoch": 0.26394215903633483, "grad_norm": 0.052733004712881726, "learning_rate": 9.968898301894692e-06, "loss": 0.0022, "step": 40120 }, { "epoch": 0.2640079472115682, "grad_norm": 0.10031687095029057, "learning_rate": 9.968834333726272e-06, "loss": 0.0031, "step": 40130 }, { "epoch": 0.26407373538680157, "grad_norm": 0.1438172556431727, "learning_rate": 9.968770300048062e-06, "loss": 0.0029, "step": 40140 }, { "epoch": 0.26413952356203496, "grad_norm": 0.058226627702398615, "learning_rate": 9.968706200860907e-06, "loss": 0.0024, "step": 40150 }, { "epoch": 0.26420531173726836, "grad_norm": 0.1481154285817919, "learning_rate": 9.968642036165652e-06, "loss": 0.0034, "step": 40160 }, { "epoch": 0.26427109991250175, "grad_norm": 0.05922770267815431, "learning_rate": 9.968577805963144e-06, "loss": 0.0083, "step": 40170 }, { "epoch": 0.2643368880877351, "grad_norm": 0.16680404796743734, "learning_rate": 9.968513510254228e-06, "loss": 0.0044, "step": 40180 }, { "epoch": 0.2644026762629685, "grad_norm": 0.06889514872264062, "learning_rate": 9.968449149039752e-06, "loss": 0.0156, "step": 40190 }, { "epoch": 0.2644684644382019, "grad_norm": 0.04282580690862, "learning_rate": 9.968384722320565e-06, "loss": 0.002, "step": 40200 }, { "epoch": 0.2645342526134353, "grad_norm": 0.20837584213336394, "learning_rate": 9.968320230097517e-06, "loss": 0.0061, "step": 40210 }, { "epoch": 0.26460004078866867, "grad_norm": 0.060481649297997986, "learning_rate": 9.968255672371458e-06, "loss": 0.0032, "step": 40220 }, { "epoch": 0.264665828963902, "grad_norm": 0.1454911965124737, "learning_rate": 9.968191049143238e-06, "loss": 0.0027, "step": 40230 }, { "epoch": 0.2647316171391354, "grad_norm": 0.06547365126437002, "learning_rate": 9.96812636041371e-06, "loss": 0.0025, "step": 40240 }, { "epoch": 0.2647974053143688, "grad_norm": 0.2306553505720118, "learning_rate": 9.968061606183728e-06, "loss": 0.0052, "step": 40250 }, { "epoch": 0.2648631934896022, "grad_norm": 0.06896303168191312, "learning_rate": 9.967996786454141e-06, "loss": 0.0029, "step": 40260 }, { "epoch": 0.2649289816648356, "grad_norm": 0.09320701786161158, "learning_rate": 9.96793190122581e-06, "loss": 0.0033, "step": 40270 }, { "epoch": 0.2649947698400689, "grad_norm": 0.0418329432120184, "learning_rate": 9.967866950499587e-06, "loss": 0.0023, "step": 40280 }, { "epoch": 0.2650605580153023, "grad_norm": 0.1891874384907202, "learning_rate": 9.967801934276329e-06, "loss": 0.0029, "step": 40290 }, { "epoch": 0.2651263461905357, "grad_norm": 0.09718313718678122, "learning_rate": 9.967736852556893e-06, "loss": 0.0011, "step": 40300 }, { "epoch": 0.2651921343657691, "grad_norm": 0.017538006892466252, "learning_rate": 9.967671705342135e-06, "loss": 0.0041, "step": 40310 }, { "epoch": 0.2652579225410025, "grad_norm": 0.09751961708214306, "learning_rate": 9.967606492632918e-06, "loss": 0.0036, "step": 40320 }, { "epoch": 0.26532371071623584, "grad_norm": 0.08481520008934584, "learning_rate": 9.9675412144301e-06, "loss": 0.0028, "step": 40330 }, { "epoch": 0.26538949889146923, "grad_norm": 0.10130262806343199, "learning_rate": 9.967475870734539e-06, "loss": 0.0038, "step": 40340 }, { "epoch": 0.2654552870667026, "grad_norm": 0.1565233077360261, "learning_rate": 9.9674104615471e-06, "loss": 0.0037, "step": 40350 }, { "epoch": 0.265521075241936, "grad_norm": 0.5695186114721201, "learning_rate": 9.967344986868645e-06, "loss": 0.0041, "step": 40360 }, { "epoch": 0.2655868634171694, "grad_norm": 0.023704456681282248, "learning_rate": 9.967279446700038e-06, "loss": 0.0017, "step": 40370 }, { "epoch": 0.26565265159240276, "grad_norm": 0.08640519322516832, "learning_rate": 9.967213841042138e-06, "loss": 0.0028, "step": 40380 }, { "epoch": 0.26571843976763615, "grad_norm": 0.017965018217097607, "learning_rate": 9.967148169895815e-06, "loss": 0.002, "step": 40390 }, { "epoch": 0.26578422794286954, "grad_norm": 0.17202512187738261, "learning_rate": 9.967082433261934e-06, "loss": 0.002, "step": 40400 }, { "epoch": 0.26585001611810294, "grad_norm": 0.10524383050789611, "learning_rate": 9.967016631141359e-06, "loss": 0.0028, "step": 40410 }, { "epoch": 0.26591580429333633, "grad_norm": 0.06491087442488506, "learning_rate": 9.966950763534961e-06, "loss": 0.0043, "step": 40420 }, { "epoch": 0.26598159246856967, "grad_norm": 0.1378473231057624, "learning_rate": 9.966884830443608e-06, "loss": 0.0031, "step": 40430 }, { "epoch": 0.26604738064380307, "grad_norm": 0.21519300024417762, "learning_rate": 9.966818831868166e-06, "loss": 0.0036, "step": 40440 }, { "epoch": 0.26611316881903646, "grad_norm": 0.1220363562966577, "learning_rate": 9.966752767809508e-06, "loss": 0.002, "step": 40450 }, { "epoch": 0.26617895699426986, "grad_norm": 0.1677983245372008, "learning_rate": 9.966686638268505e-06, "loss": 0.0025, "step": 40460 }, { "epoch": 0.26624474516950325, "grad_norm": 0.15273000175359608, "learning_rate": 9.966620443246028e-06, "loss": 0.0023, "step": 40470 }, { "epoch": 0.26631053334473664, "grad_norm": 0.15372331425774258, "learning_rate": 9.966554182742948e-06, "loss": 0.0058, "step": 40480 }, { "epoch": 0.26637632151997, "grad_norm": 0.18337081041313924, "learning_rate": 9.966487856760143e-06, "loss": 0.0043, "step": 40490 }, { "epoch": 0.2664421096952034, "grad_norm": 0.02189911573014062, "learning_rate": 9.966421465298484e-06, "loss": 0.0039, "step": 40500 }, { "epoch": 0.26650789787043677, "grad_norm": 0.07161235453266893, "learning_rate": 9.966355008358848e-06, "loss": 0.0014, "step": 40510 }, { "epoch": 0.26657368604567017, "grad_norm": 0.17832544003593584, "learning_rate": 9.96628848594211e-06, "loss": 0.0049, "step": 40520 }, { "epoch": 0.26663947422090356, "grad_norm": 0.11708877112495014, "learning_rate": 9.966221898049146e-06, "loss": 0.0041, "step": 40530 }, { "epoch": 0.2667052623961369, "grad_norm": 0.2720926769588136, "learning_rate": 9.966155244680837e-06, "loss": 0.0026, "step": 40540 }, { "epoch": 0.2667710505713703, "grad_norm": 0.1110605673862778, "learning_rate": 9.96608852583806e-06, "loss": 0.0025, "step": 40550 }, { "epoch": 0.2668368387466037, "grad_norm": 0.10527933114168735, "learning_rate": 9.966021741521693e-06, "loss": 0.002, "step": 40560 }, { "epoch": 0.2669026269218371, "grad_norm": 0.05391259441120502, "learning_rate": 9.96595489173262e-06, "loss": 0.0043, "step": 40570 }, { "epoch": 0.2669684150970705, "grad_norm": 0.11964126937563609, "learning_rate": 9.965887976471721e-06, "loss": 0.0032, "step": 40580 }, { "epoch": 0.2670342032723038, "grad_norm": 0.018800840593027217, "learning_rate": 9.96582099573988e-06, "loss": 0.002, "step": 40590 }, { "epoch": 0.2670999914475372, "grad_norm": 0.0483448662167516, "learning_rate": 9.965753949537973e-06, "loss": 0.0029, "step": 40600 }, { "epoch": 0.2671657796227706, "grad_norm": 0.07289466761011473, "learning_rate": 9.965686837866892e-06, "loss": 0.0019, "step": 40610 }, { "epoch": 0.267231567798004, "grad_norm": 0.34748333151774835, "learning_rate": 9.965619660727517e-06, "loss": 0.0036, "step": 40620 }, { "epoch": 0.2672973559732374, "grad_norm": 0.0417036318756678, "learning_rate": 9.965552418120737e-06, "loss": 0.0022, "step": 40630 }, { "epoch": 0.26736314414847073, "grad_norm": 0.3100304306516847, "learning_rate": 9.965485110047436e-06, "loss": 0.004, "step": 40640 }, { "epoch": 0.2674289323237041, "grad_norm": 0.1020132742058641, "learning_rate": 9.965417736508503e-06, "loss": 0.0047, "step": 40650 }, { "epoch": 0.2674947204989375, "grad_norm": 0.078989653497427, "learning_rate": 9.965350297504825e-06, "loss": 0.0031, "step": 40660 }, { "epoch": 0.2675605086741709, "grad_norm": 0.02595143957619093, "learning_rate": 9.96528279303729e-06, "loss": 0.0022, "step": 40670 }, { "epoch": 0.2676262968494043, "grad_norm": 0.1373008824880941, "learning_rate": 9.965215223106793e-06, "loss": 0.0037, "step": 40680 }, { "epoch": 0.26769208502463765, "grad_norm": 0.16715153094747212, "learning_rate": 9.96514758771422e-06, "loss": 0.0028, "step": 40690 }, { "epoch": 0.26775787319987104, "grad_norm": 0.0294351798592355, "learning_rate": 9.965079886860463e-06, "loss": 0.0027, "step": 40700 }, { "epoch": 0.26782366137510444, "grad_norm": 0.10070181819105559, "learning_rate": 9.965012120546417e-06, "loss": 0.0046, "step": 40710 }, { "epoch": 0.26788944955033783, "grad_norm": 0.08414225449670883, "learning_rate": 9.964944288772975e-06, "loss": 0.0025, "step": 40720 }, { "epoch": 0.2679552377255712, "grad_norm": 0.13621087081767289, "learning_rate": 9.964876391541028e-06, "loss": 0.0037, "step": 40730 }, { "epoch": 0.26802102590080457, "grad_norm": 0.16766060250307668, "learning_rate": 9.964808428851475e-06, "loss": 0.004, "step": 40740 }, { "epoch": 0.26808681407603796, "grad_norm": 0.115776725895638, "learning_rate": 9.964740400705213e-06, "loss": 0.002, "step": 40750 }, { "epoch": 0.26815260225127135, "grad_norm": 0.017947914123557496, "learning_rate": 9.964672307103133e-06, "loss": 0.0025, "step": 40760 }, { "epoch": 0.26821839042650475, "grad_norm": 0.0905082613016459, "learning_rate": 9.96460414804614e-06, "loss": 0.0024, "step": 40770 }, { "epoch": 0.26828417860173814, "grad_norm": 0.23885501950689003, "learning_rate": 9.964535923535125e-06, "loss": 0.002, "step": 40780 }, { "epoch": 0.2683499667769715, "grad_norm": 0.05253085006771872, "learning_rate": 9.964467633570996e-06, "loss": 0.0025, "step": 40790 }, { "epoch": 0.2684157549522049, "grad_norm": 0.09204487292734946, "learning_rate": 9.964399278154645e-06, "loss": 0.0019, "step": 40800 }, { "epoch": 0.26848154312743827, "grad_norm": 0.10515155866711087, "learning_rate": 9.964330857286978e-06, "loss": 0.0028, "step": 40810 }, { "epoch": 0.26854733130267167, "grad_norm": 0.06719637558134794, "learning_rate": 9.964262370968897e-06, "loss": 0.0017, "step": 40820 }, { "epoch": 0.26861311947790506, "grad_norm": 0.09410265533784062, "learning_rate": 9.964193819201304e-06, "loss": 0.0065, "step": 40830 }, { "epoch": 0.2686789076531384, "grad_norm": 0.047501400964219884, "learning_rate": 9.964125201985105e-06, "loss": 0.0034, "step": 40840 }, { "epoch": 0.2687446958283718, "grad_norm": 0.08227595009438714, "learning_rate": 9.964056519321199e-06, "loss": 0.0034, "step": 40850 }, { "epoch": 0.2688104840036052, "grad_norm": 0.19906875598679188, "learning_rate": 9.963987771210496e-06, "loss": 0.0019, "step": 40860 }, { "epoch": 0.2688762721788386, "grad_norm": 0.2337864296272488, "learning_rate": 9.963918957653902e-06, "loss": 0.0026, "step": 40870 }, { "epoch": 0.268942060354072, "grad_norm": 0.10926651871367783, "learning_rate": 9.963850078652323e-06, "loss": 0.0026, "step": 40880 }, { "epoch": 0.2690078485293053, "grad_norm": 0.15226262811415192, "learning_rate": 9.963781134206667e-06, "loss": 0.0019, "step": 40890 }, { "epoch": 0.2690736367045387, "grad_norm": 0.08960833121209544, "learning_rate": 9.963712124317845e-06, "loss": 0.0038, "step": 40900 }, { "epoch": 0.2691394248797721, "grad_norm": 0.05450560333307455, "learning_rate": 9.963643048986766e-06, "loss": 0.0049, "step": 40910 }, { "epoch": 0.2692052130550055, "grad_norm": 0.32128169127418515, "learning_rate": 9.96357390821434e-06, "loss": 0.002, "step": 40920 }, { "epoch": 0.2692710012302389, "grad_norm": 0.0153596751657639, "learning_rate": 9.963504702001479e-06, "loss": 0.0021, "step": 40930 }, { "epoch": 0.2693367894054723, "grad_norm": 1.2022106919471705, "learning_rate": 9.963435430349095e-06, "loss": 0.0041, "step": 40940 }, { "epoch": 0.2694025775807056, "grad_norm": 0.44814403719113033, "learning_rate": 9.963366093258101e-06, "loss": 0.0067, "step": 40950 }, { "epoch": 0.269468365755939, "grad_norm": 0.24076575404421607, "learning_rate": 9.963296690729411e-06, "loss": 0.0025, "step": 40960 }, { "epoch": 0.2695341539311724, "grad_norm": 0.0474533238397907, "learning_rate": 9.963227222763943e-06, "loss": 0.0027, "step": 40970 }, { "epoch": 0.2695999421064058, "grad_norm": 0.3649782269397251, "learning_rate": 9.96315768936261e-06, "loss": 0.0029, "step": 40980 }, { "epoch": 0.2696657302816392, "grad_norm": 0.1158996085825162, "learning_rate": 9.963088090526328e-06, "loss": 0.0026, "step": 40990 }, { "epoch": 0.26973151845687254, "grad_norm": 0.06876261568410101, "learning_rate": 9.963018426256017e-06, "loss": 0.0019, "step": 41000 }, { "epoch": 0.26979730663210594, "grad_norm": 0.07335515814352463, "learning_rate": 9.962948696552593e-06, "loss": 0.002, "step": 41010 }, { "epoch": 0.26986309480733933, "grad_norm": 0.13231693294804076, "learning_rate": 9.962878901416978e-06, "loss": 0.0035, "step": 41020 }, { "epoch": 0.2699288829825727, "grad_norm": 0.2334476453503439, "learning_rate": 9.96280904085009e-06, "loss": 0.003, "step": 41030 }, { "epoch": 0.2699946711578061, "grad_norm": 0.2046188810697905, "learning_rate": 9.962739114852853e-06, "loss": 0.0026, "step": 41040 }, { "epoch": 0.27006045933303946, "grad_norm": 0.10167991632744335, "learning_rate": 9.962669123426186e-06, "loss": 0.0023, "step": 41050 }, { "epoch": 0.27012624750827285, "grad_norm": 0.22416222378517825, "learning_rate": 9.96259906657101e-06, "loss": 0.0026, "step": 41060 }, { "epoch": 0.27019203568350625, "grad_norm": 0.07432001488687862, "learning_rate": 9.962528944288254e-06, "loss": 0.0027, "step": 41070 }, { "epoch": 0.27025782385873964, "grad_norm": 0.10534290248385939, "learning_rate": 9.96245875657884e-06, "loss": 0.0027, "step": 41080 }, { "epoch": 0.27032361203397304, "grad_norm": 0.2085018138420406, "learning_rate": 9.962388503443692e-06, "loss": 0.0036, "step": 41090 }, { "epoch": 0.2703894002092064, "grad_norm": 0.31973296025407205, "learning_rate": 9.962318184883737e-06, "loss": 0.0037, "step": 41100 }, { "epoch": 0.27045518838443977, "grad_norm": 0.0761235539492544, "learning_rate": 9.962247800899905e-06, "loss": 0.0029, "step": 41110 }, { "epoch": 0.27052097655967317, "grad_norm": 0.047398354100132024, "learning_rate": 9.962177351493118e-06, "loss": 0.0033, "step": 41120 }, { "epoch": 0.27058676473490656, "grad_norm": 0.0648094026908003, "learning_rate": 9.96210683666431e-06, "loss": 0.0023, "step": 41130 }, { "epoch": 0.27065255291013995, "grad_norm": 0.13842801003214583, "learning_rate": 9.962036256414409e-06, "loss": 0.0027, "step": 41140 }, { "epoch": 0.2707183410853733, "grad_norm": 0.16590921434649378, "learning_rate": 9.961965610744345e-06, "loss": 0.0038, "step": 41150 }, { "epoch": 0.2707841292606067, "grad_norm": 0.06692080137511058, "learning_rate": 9.961894899655051e-06, "loss": 0.0044, "step": 41160 }, { "epoch": 0.2708499174358401, "grad_norm": 0.04096314154423717, "learning_rate": 9.961824123147456e-06, "loss": 0.0029, "step": 41170 }, { "epoch": 0.2709157056110735, "grad_norm": 0.003363675717698471, "learning_rate": 9.961753281222497e-06, "loss": 0.0019, "step": 41180 }, { "epoch": 0.27098149378630687, "grad_norm": 0.12218914734331764, "learning_rate": 9.961682373881106e-06, "loss": 0.0032, "step": 41190 }, { "epoch": 0.2710472819615402, "grad_norm": 0.22655098532376197, "learning_rate": 9.961611401124219e-06, "loss": 0.0028, "step": 41200 }, { "epoch": 0.2711130701367736, "grad_norm": 0.11546795502943075, "learning_rate": 9.961540362952768e-06, "loss": 0.0022, "step": 41210 }, { "epoch": 0.271178858312007, "grad_norm": 0.07318939308632547, "learning_rate": 9.961469259367693e-06, "loss": 0.0019, "step": 41220 }, { "epoch": 0.2712446464872404, "grad_norm": 0.10427588839520595, "learning_rate": 9.961398090369934e-06, "loss": 0.0045, "step": 41230 }, { "epoch": 0.2713104346624738, "grad_norm": 0.1923570168011621, "learning_rate": 9.961326855960422e-06, "loss": 0.0022, "step": 41240 }, { "epoch": 0.2713762228377071, "grad_norm": 0.18593533101916251, "learning_rate": 9.961255556140104e-06, "loss": 0.0021, "step": 41250 }, { "epoch": 0.2714420110129405, "grad_norm": 0.04026600331822011, "learning_rate": 9.961184190909913e-06, "loss": 0.0024, "step": 41260 }, { "epoch": 0.2715077991881739, "grad_norm": 0.07211533625842684, "learning_rate": 9.961112760270795e-06, "loss": 0.0016, "step": 41270 }, { "epoch": 0.2715735873634073, "grad_norm": 0.07116429725213774, "learning_rate": 9.96104126422369e-06, "loss": 0.0013, "step": 41280 }, { "epoch": 0.2716393755386407, "grad_norm": 0.12293706827285467, "learning_rate": 9.96096970276954e-06, "loss": 0.0027, "step": 41290 }, { "epoch": 0.27170516371387404, "grad_norm": 0.08745252328112738, "learning_rate": 9.96089807590929e-06, "loss": 0.0032, "step": 41300 }, { "epoch": 0.27177095188910744, "grad_norm": 0.07010687197713275, "learning_rate": 9.960826383643882e-06, "loss": 0.0028, "step": 41310 }, { "epoch": 0.27183674006434083, "grad_norm": 0.11507572724405617, "learning_rate": 9.960754625974264e-06, "loss": 0.0034, "step": 41320 }, { "epoch": 0.2719025282395742, "grad_norm": 0.05534527078745722, "learning_rate": 9.96068280290138e-06, "loss": 0.004, "step": 41330 }, { "epoch": 0.2719683164148076, "grad_norm": 0.07453276159732979, "learning_rate": 9.960610914426177e-06, "loss": 0.0022, "step": 41340 }, { "epoch": 0.27203410459004096, "grad_norm": 0.1339560217705958, "learning_rate": 9.960538960549604e-06, "loss": 0.0087, "step": 41350 }, { "epoch": 0.27209989276527435, "grad_norm": 0.14934007458881451, "learning_rate": 9.960466941272609e-06, "loss": 0.0024, "step": 41360 }, { "epoch": 0.27216568094050775, "grad_norm": 0.04442215359277477, "learning_rate": 9.960394856596141e-06, "loss": 0.0022, "step": 41370 }, { "epoch": 0.27223146911574114, "grad_norm": 0.21999048139842595, "learning_rate": 9.96032270652115e-06, "loss": 0.0015, "step": 41380 }, { "epoch": 0.27229725729097454, "grad_norm": 0.09781728700973602, "learning_rate": 9.960250491048591e-06, "loss": 0.0028, "step": 41390 }, { "epoch": 0.27236304546620793, "grad_norm": 0.058709199410540515, "learning_rate": 9.960178210179411e-06, "loss": 0.0033, "step": 41400 }, { "epoch": 0.27242883364144127, "grad_norm": 0.06935924563735314, "learning_rate": 9.960105863914566e-06, "loss": 0.0031, "step": 41410 }, { "epoch": 0.27249462181667466, "grad_norm": 0.024945293463315654, "learning_rate": 9.960033452255009e-06, "loss": 0.0022, "step": 41420 }, { "epoch": 0.27256040999190806, "grad_norm": 0.018298148454060855, "learning_rate": 9.959960975201695e-06, "loss": 0.0025, "step": 41430 }, { "epoch": 0.27262619816714145, "grad_norm": 0.16704278083492569, "learning_rate": 9.959888432755578e-06, "loss": 0.002, "step": 41440 }, { "epoch": 0.27269198634237485, "grad_norm": 0.19643165961962328, "learning_rate": 9.959815824917615e-06, "loss": 0.003, "step": 41450 }, { "epoch": 0.2727577745176082, "grad_norm": 0.0874327017120147, "learning_rate": 9.959743151688767e-06, "loss": 0.0027, "step": 41460 }, { "epoch": 0.2728235626928416, "grad_norm": 0.06065711732226928, "learning_rate": 9.959670413069988e-06, "loss": 0.0059, "step": 41470 }, { "epoch": 0.272889350868075, "grad_norm": 0.25146235699617897, "learning_rate": 9.959597609062238e-06, "loss": 0.0032, "step": 41480 }, { "epoch": 0.27295513904330837, "grad_norm": 0.20897243908696425, "learning_rate": 9.959524739666475e-06, "loss": 0.0021, "step": 41490 }, { "epoch": 0.27302092721854176, "grad_norm": 0.4644410718427033, "learning_rate": 9.959451804883662e-06, "loss": 0.0039, "step": 41500 }, { "epoch": 0.2730867153937751, "grad_norm": 0.3073495225546869, "learning_rate": 9.959378804714762e-06, "loss": 0.0041, "step": 41510 }, { "epoch": 0.2731525035690085, "grad_norm": 0.13033379766885392, "learning_rate": 9.959305739160735e-06, "loss": 0.0018, "step": 41520 }, { "epoch": 0.2732182917442419, "grad_norm": 0.029646344785481258, "learning_rate": 9.959232608222544e-06, "loss": 0.0025, "step": 41530 }, { "epoch": 0.2732840799194753, "grad_norm": 0.14403219687885363, "learning_rate": 9.959159411901156e-06, "loss": 0.0044, "step": 41540 }, { "epoch": 0.2733498680947087, "grad_norm": 0.26001792784881284, "learning_rate": 9.959086150197531e-06, "loss": 0.0021, "step": 41550 }, { "epoch": 0.273415656269942, "grad_norm": 0.13804236216296098, "learning_rate": 9.959012823112642e-06, "loss": 0.0022, "step": 41560 }, { "epoch": 0.2734814444451754, "grad_norm": 0.0660941135788987, "learning_rate": 9.95893943064745e-06, "loss": 0.0019, "step": 41570 }, { "epoch": 0.2735472326204088, "grad_norm": 0.0388589939399733, "learning_rate": 9.958865972802925e-06, "loss": 0.0046, "step": 41580 }, { "epoch": 0.2736130207956422, "grad_norm": 0.13596708503623112, "learning_rate": 9.958792449580033e-06, "loss": 0.0025, "step": 41590 }, { "epoch": 0.2736788089708756, "grad_norm": 0.14083997931641956, "learning_rate": 9.958718860979749e-06, "loss": 0.0027, "step": 41600 }, { "epoch": 0.27374459714610894, "grad_norm": 0.11202875196040532, "learning_rate": 9.958645207003034e-06, "loss": 0.0019, "step": 41610 }, { "epoch": 0.27381038532134233, "grad_norm": 0.05837004454131759, "learning_rate": 9.958571487650868e-06, "loss": 0.0023, "step": 41620 }, { "epoch": 0.2738761734965757, "grad_norm": 0.07376471408409215, "learning_rate": 9.95849770292422e-06, "loss": 0.0054, "step": 41630 }, { "epoch": 0.2739419616718091, "grad_norm": 0.0813857133644499, "learning_rate": 9.958423852824061e-06, "loss": 0.0048, "step": 41640 }, { "epoch": 0.2740077498470425, "grad_norm": 0.08233211737656962, "learning_rate": 9.958349937351366e-06, "loss": 0.0024, "step": 41650 }, { "epoch": 0.27407353802227585, "grad_norm": 0.0685803492747003, "learning_rate": 9.95827595650711e-06, "loss": 0.0014, "step": 41660 }, { "epoch": 0.27413932619750925, "grad_norm": 0.08619129464417057, "learning_rate": 9.958201910292266e-06, "loss": 0.0027, "step": 41670 }, { "epoch": 0.27420511437274264, "grad_norm": 0.21093712518802435, "learning_rate": 9.958127798707814e-06, "loss": 0.0031, "step": 41680 }, { "epoch": 0.27427090254797604, "grad_norm": 0.08274259534206727, "learning_rate": 9.958053621754728e-06, "loss": 0.0025, "step": 41690 }, { "epoch": 0.27433669072320943, "grad_norm": 0.08511229385614517, "learning_rate": 9.957979379433987e-06, "loss": 0.0024, "step": 41700 }, { "epoch": 0.27440247889844277, "grad_norm": 0.05851218702920948, "learning_rate": 9.957905071746571e-06, "loss": 0.0054, "step": 41710 }, { "epoch": 0.27446826707367616, "grad_norm": 0.08754128157285436, "learning_rate": 9.957830698693458e-06, "loss": 0.0029, "step": 41720 }, { "epoch": 0.27453405524890956, "grad_norm": 0.2252792235084924, "learning_rate": 9.957756260275628e-06, "loss": 0.0034, "step": 41730 }, { "epoch": 0.27459984342414295, "grad_norm": 0.0971627831096287, "learning_rate": 9.957681756494064e-06, "loss": 0.0017, "step": 41740 }, { "epoch": 0.27466563159937635, "grad_norm": 0.0445972085598302, "learning_rate": 9.95760718734975e-06, "loss": 0.0033, "step": 41750 }, { "epoch": 0.2747314197746097, "grad_norm": 0.02068895843278417, "learning_rate": 9.957532552843665e-06, "loss": 0.0024, "step": 41760 }, { "epoch": 0.2747972079498431, "grad_norm": 0.061206409317995945, "learning_rate": 9.957457852976794e-06, "loss": 0.0029, "step": 41770 }, { "epoch": 0.2748629961250765, "grad_norm": 0.26033271605257263, "learning_rate": 9.957383087750124e-06, "loss": 0.0031, "step": 41780 }, { "epoch": 0.27492878430030987, "grad_norm": 0.20152699069860108, "learning_rate": 9.95730825716464e-06, "loss": 0.0029, "step": 41790 }, { "epoch": 0.27499457247554326, "grad_norm": 0.11924390101627273, "learning_rate": 9.957233361221326e-06, "loss": 0.0021, "step": 41800 }, { "epoch": 0.2750603606507766, "grad_norm": 0.08524067550159893, "learning_rate": 9.957158399921173e-06, "loss": 0.0019, "step": 41810 }, { "epoch": 0.27512614882601, "grad_norm": 0.08577648372404144, "learning_rate": 9.957083373265167e-06, "loss": 0.0021, "step": 41820 }, { "epoch": 0.2751919370012434, "grad_norm": 0.10255901189408409, "learning_rate": 9.957008281254298e-06, "loss": 0.0046, "step": 41830 }, { "epoch": 0.2752577251764768, "grad_norm": 0.05738027278389516, "learning_rate": 9.956933123889558e-06, "loss": 0.0029, "step": 41840 }, { "epoch": 0.2753235133517102, "grad_norm": 0.11845079339976251, "learning_rate": 9.956857901171934e-06, "loss": 0.0026, "step": 41850 }, { "epoch": 0.2753893015269435, "grad_norm": 0.23406789899066252, "learning_rate": 9.956782613102419e-06, "loss": 0.005, "step": 41860 }, { "epoch": 0.2754550897021769, "grad_norm": 0.09553141760385551, "learning_rate": 9.956707259682007e-06, "loss": 0.0024, "step": 41870 }, { "epoch": 0.2755208778774103, "grad_norm": 0.1678745214569975, "learning_rate": 9.95663184091169e-06, "loss": 0.0024, "step": 41880 }, { "epoch": 0.2755866660526437, "grad_norm": 0.08961848573334054, "learning_rate": 9.956556356792465e-06, "loss": 0.0023, "step": 41890 }, { "epoch": 0.2756524542278771, "grad_norm": 0.061353441893992666, "learning_rate": 9.956480807325324e-06, "loss": 0.0031, "step": 41900 }, { "epoch": 0.2757182424031105, "grad_norm": 0.009598195295452281, "learning_rate": 9.956405192511263e-06, "loss": 0.0046, "step": 41910 }, { "epoch": 0.27578403057834383, "grad_norm": 0.05356177881406995, "learning_rate": 9.956329512351281e-06, "loss": 0.0018, "step": 41920 }, { "epoch": 0.2758498187535772, "grad_norm": 0.07391340021938073, "learning_rate": 9.956253766846376e-06, "loss": 0.0027, "step": 41930 }, { "epoch": 0.2759156069288106, "grad_norm": 0.03850910838212033, "learning_rate": 9.956177955997546e-06, "loss": 0.0025, "step": 41940 }, { "epoch": 0.275981395104044, "grad_norm": 0.12697103594270093, "learning_rate": 9.956102079805789e-06, "loss": 0.0037, "step": 41950 }, { "epoch": 0.2760471832792774, "grad_norm": 0.050721114717979916, "learning_rate": 9.956026138272105e-06, "loss": 0.0027, "step": 41960 }, { "epoch": 0.27611297145451075, "grad_norm": 0.18819601570088515, "learning_rate": 9.955950131397498e-06, "loss": 0.0055, "step": 41970 }, { "epoch": 0.27617875962974414, "grad_norm": 0.04455687880638875, "learning_rate": 9.955874059182969e-06, "loss": 0.0049, "step": 41980 }, { "epoch": 0.27624454780497754, "grad_norm": 0.2208958615341567, "learning_rate": 9.955797921629522e-06, "loss": 0.0042, "step": 41990 }, { "epoch": 0.27631033598021093, "grad_norm": 0.21342991592366187, "learning_rate": 9.955721718738157e-06, "loss": 0.0015, "step": 42000 }, { "epoch": 0.2763761241554443, "grad_norm": 0.20790878067346846, "learning_rate": 9.955645450509882e-06, "loss": 0.0029, "step": 42010 }, { "epoch": 0.27644191233067766, "grad_norm": 0.05179392172223772, "learning_rate": 9.955569116945702e-06, "loss": 0.0038, "step": 42020 }, { "epoch": 0.27650770050591106, "grad_norm": 0.275232248056917, "learning_rate": 9.955492718046624e-06, "loss": 0.0031, "step": 42030 }, { "epoch": 0.27657348868114445, "grad_norm": 0.12977101959375525, "learning_rate": 9.955416253813653e-06, "loss": 0.0024, "step": 42040 }, { "epoch": 0.27663927685637785, "grad_norm": 0.2644824621963045, "learning_rate": 9.9553397242478e-06, "loss": 0.0045, "step": 42050 }, { "epoch": 0.27670506503161124, "grad_norm": 0.11557392791694103, "learning_rate": 9.955263129350071e-06, "loss": 0.0023, "step": 42060 }, { "epoch": 0.2767708532068446, "grad_norm": 0.13430491420121302, "learning_rate": 9.955186469121475e-06, "loss": 0.0031, "step": 42070 }, { "epoch": 0.276836641382078, "grad_norm": 0.0653839585715324, "learning_rate": 9.95510974356303e-06, "loss": 0.0037, "step": 42080 }, { "epoch": 0.27690242955731137, "grad_norm": 0.060269399855075165, "learning_rate": 9.955032952675738e-06, "loss": 0.0021, "step": 42090 }, { "epoch": 0.27696821773254476, "grad_norm": 0.11188401602329637, "learning_rate": 9.95495609646062e-06, "loss": 0.0038, "step": 42100 }, { "epoch": 0.27703400590777816, "grad_norm": 0.046565426535039214, "learning_rate": 9.954879174918683e-06, "loss": 0.0029, "step": 42110 }, { "epoch": 0.2770997940830115, "grad_norm": 0.09550069385801041, "learning_rate": 9.954802188050942e-06, "loss": 0.0037, "step": 42120 }, { "epoch": 0.2771655822582449, "grad_norm": 0.12309219099701944, "learning_rate": 9.954725135858416e-06, "loss": 0.0018, "step": 42130 }, { "epoch": 0.2772313704334783, "grad_norm": 0.10029482856937666, "learning_rate": 9.954648018342118e-06, "loss": 0.0023, "step": 42140 }, { "epoch": 0.2772971586087117, "grad_norm": 0.04681745695947795, "learning_rate": 9.954570835503063e-06, "loss": 0.0021, "step": 42150 }, { "epoch": 0.2773629467839451, "grad_norm": 0.07973081262456991, "learning_rate": 9.954493587342273e-06, "loss": 0.0028, "step": 42160 }, { "epoch": 0.2774287349591784, "grad_norm": 0.04239705233441735, "learning_rate": 9.954416273860762e-06, "loss": 0.0029, "step": 42170 }, { "epoch": 0.2774945231344118, "grad_norm": 0.18962813523291003, "learning_rate": 9.954338895059554e-06, "loss": 0.0023, "step": 42180 }, { "epoch": 0.2775603113096452, "grad_norm": 0.13374686018222792, "learning_rate": 9.954261450939664e-06, "loss": 0.0018, "step": 42190 }, { "epoch": 0.2776260994848786, "grad_norm": 0.0918980785116354, "learning_rate": 9.954183941502117e-06, "loss": 0.0014, "step": 42200 }, { "epoch": 0.277691887660112, "grad_norm": 0.1193346786731134, "learning_rate": 9.954106366747932e-06, "loss": 0.0042, "step": 42210 }, { "epoch": 0.27775767583534533, "grad_norm": 0.031149474724484667, "learning_rate": 9.954028726678134e-06, "loss": 0.0034, "step": 42220 }, { "epoch": 0.2778234640105787, "grad_norm": 0.1539989888444677, "learning_rate": 9.953951021293747e-06, "loss": 0.0059, "step": 42230 }, { "epoch": 0.2778892521858121, "grad_norm": 0.05288860308359702, "learning_rate": 9.953873250595793e-06, "loss": 0.0027, "step": 42240 }, { "epoch": 0.2779550403610455, "grad_norm": 0.08295778770701215, "learning_rate": 9.953795414585298e-06, "loss": 0.0017, "step": 42250 }, { "epoch": 0.2780208285362789, "grad_norm": 0.2349455818471448, "learning_rate": 9.95371751326329e-06, "loss": 0.0028, "step": 42260 }, { "epoch": 0.27808661671151225, "grad_norm": 0.0531718368970597, "learning_rate": 9.953639546630794e-06, "loss": 0.0022, "step": 42270 }, { "epoch": 0.27815240488674564, "grad_norm": 0.11031151438484695, "learning_rate": 9.95356151468884e-06, "loss": 0.0056, "step": 42280 }, { "epoch": 0.27821819306197904, "grad_norm": 0.02635687986368489, "learning_rate": 9.953483417438454e-06, "loss": 0.0045, "step": 42290 }, { "epoch": 0.27828398123721243, "grad_norm": 0.10310864826944058, "learning_rate": 9.95340525488067e-06, "loss": 0.0022, "step": 42300 }, { "epoch": 0.2783497694124458, "grad_norm": 0.054630821196258075, "learning_rate": 9.953327027016513e-06, "loss": 0.0022, "step": 42310 }, { "epoch": 0.27841555758767916, "grad_norm": 0.08873267948490841, "learning_rate": 9.953248733847019e-06, "loss": 0.0025, "step": 42320 }, { "epoch": 0.27848134576291256, "grad_norm": 0.051920814005179, "learning_rate": 9.953170375373216e-06, "loss": 0.003, "step": 42330 }, { "epoch": 0.27854713393814595, "grad_norm": 0.06891030731117591, "learning_rate": 9.95309195159614e-06, "loss": 0.0023, "step": 42340 }, { "epoch": 0.27861292211337935, "grad_norm": 0.0852404347776064, "learning_rate": 9.953013462516827e-06, "loss": 0.0029, "step": 42350 }, { "epoch": 0.27867871028861274, "grad_norm": 0.08869759102067101, "learning_rate": 9.952934908136307e-06, "loss": 0.0031, "step": 42360 }, { "epoch": 0.27874449846384614, "grad_norm": 0.09891055346971034, "learning_rate": 9.952856288455619e-06, "loss": 0.0031, "step": 42370 }, { "epoch": 0.2788102866390795, "grad_norm": 0.006651649329596502, "learning_rate": 9.952777603475797e-06, "loss": 0.0026, "step": 42380 }, { "epoch": 0.27887607481431287, "grad_norm": 0.021532182773564064, "learning_rate": 9.95269885319788e-06, "loss": 0.0046, "step": 42390 }, { "epoch": 0.27894186298954626, "grad_norm": 0.4061034175961696, "learning_rate": 9.952620037622906e-06, "loss": 0.0029, "step": 42400 }, { "epoch": 0.27900765116477966, "grad_norm": 0.05885155115621627, "learning_rate": 9.952541156751916e-06, "loss": 0.0028, "step": 42410 }, { "epoch": 0.27907343934001305, "grad_norm": 0.0905511917276675, "learning_rate": 9.952462210585948e-06, "loss": 0.0052, "step": 42420 }, { "epoch": 0.2791392275152464, "grad_norm": 0.11980390835942101, "learning_rate": 9.952383199126041e-06, "loss": 0.0044, "step": 42430 }, { "epoch": 0.2792050156904798, "grad_norm": 0.06876355125567264, "learning_rate": 9.952304122373239e-06, "loss": 0.0035, "step": 42440 }, { "epoch": 0.2792708038657132, "grad_norm": 0.19630111114893126, "learning_rate": 9.952224980328586e-06, "loss": 0.0039, "step": 42450 }, { "epoch": 0.2793365920409466, "grad_norm": 0.29253729928722455, "learning_rate": 9.952145772993122e-06, "loss": 0.0028, "step": 42460 }, { "epoch": 0.27940238021617997, "grad_norm": 0.028237334275226275, "learning_rate": 9.952066500367893e-06, "loss": 0.0034, "step": 42470 }, { "epoch": 0.2794681683914133, "grad_norm": 0.3917140162835, "learning_rate": 9.951987162453945e-06, "loss": 0.0029, "step": 42480 }, { "epoch": 0.2795339565666467, "grad_norm": 0.04122664478624513, "learning_rate": 9.951907759252322e-06, "loss": 0.0015, "step": 42490 }, { "epoch": 0.2795997447418801, "grad_norm": 0.2896564272158462, "learning_rate": 9.951828290764071e-06, "loss": 0.0054, "step": 42500 }, { "epoch": 0.2796655329171135, "grad_norm": 0.7589849327298415, "learning_rate": 9.951748756990244e-06, "loss": 0.0025, "step": 42510 }, { "epoch": 0.2797313210923469, "grad_norm": 0.26372632751691494, "learning_rate": 9.951669157931885e-06, "loss": 0.0036, "step": 42520 }, { "epoch": 0.2797971092675802, "grad_norm": 0.039235059751813094, "learning_rate": 9.951589493590044e-06, "loss": 0.0041, "step": 42530 }, { "epoch": 0.2798628974428136, "grad_norm": 0.3662541164011544, "learning_rate": 9.95150976396577e-06, "loss": 0.0046, "step": 42540 }, { "epoch": 0.279928685618047, "grad_norm": 0.06898708832544437, "learning_rate": 9.95142996906012e-06, "loss": 0.0018, "step": 42550 }, { "epoch": 0.2799944737932804, "grad_norm": 0.09718585491348442, "learning_rate": 9.95135010887414e-06, "loss": 0.0034, "step": 42560 }, { "epoch": 0.2800602619685138, "grad_norm": 0.10269925419040551, "learning_rate": 9.951270183408885e-06, "loss": 0.0042, "step": 42570 }, { "epoch": 0.28012605014374714, "grad_norm": 0.035820880306610334, "learning_rate": 9.95119019266541e-06, "loss": 0.0043, "step": 42580 }, { "epoch": 0.28019183831898054, "grad_norm": 0.06692740341487244, "learning_rate": 9.951110136644769e-06, "loss": 0.0025, "step": 42590 }, { "epoch": 0.28025762649421393, "grad_norm": 0.11634789148754406, "learning_rate": 9.951030015348014e-06, "loss": 0.0032, "step": 42600 }, { "epoch": 0.2803234146694473, "grad_norm": 0.06338140035631498, "learning_rate": 9.950949828776206e-06, "loss": 0.0021, "step": 42610 }, { "epoch": 0.2803892028446807, "grad_norm": 0.13818551500394355, "learning_rate": 9.9508695769304e-06, "loss": 0.007, "step": 42620 }, { "epoch": 0.28045499101991406, "grad_norm": 0.1863874361873277, "learning_rate": 9.950789259811656e-06, "loss": 0.0047, "step": 42630 }, { "epoch": 0.28052077919514745, "grad_norm": 0.07568289905357013, "learning_rate": 9.95070887742103e-06, "loss": 0.0025, "step": 42640 }, { "epoch": 0.28058656737038085, "grad_norm": 0.04356938644651087, "learning_rate": 9.950628429759583e-06, "loss": 0.0024, "step": 42650 }, { "epoch": 0.28065235554561424, "grad_norm": 0.19926873121875427, "learning_rate": 9.950547916828377e-06, "loss": 0.0031, "step": 42660 }, { "epoch": 0.28071814372084763, "grad_norm": 0.11337472403109365, "learning_rate": 9.950467338628472e-06, "loss": 0.0022, "step": 42670 }, { "epoch": 0.280783931896081, "grad_norm": 0.1274282105666932, "learning_rate": 9.950386695160928e-06, "loss": 0.0026, "step": 42680 }, { "epoch": 0.28084972007131437, "grad_norm": 0.0733378222084467, "learning_rate": 9.950305986426814e-06, "loss": 0.0046, "step": 42690 }, { "epoch": 0.28091550824654776, "grad_norm": 0.06641604679543132, "learning_rate": 9.95022521242719e-06, "loss": 0.0047, "step": 42700 }, { "epoch": 0.28098129642178116, "grad_norm": 0.10718823297888193, "learning_rate": 9.950144373163121e-06, "loss": 0.0028, "step": 42710 }, { "epoch": 0.28104708459701455, "grad_norm": 0.14496755154377272, "learning_rate": 9.950063468635676e-06, "loss": 0.0027, "step": 42720 }, { "epoch": 0.2811128727722479, "grad_norm": 0.06262482879815817, "learning_rate": 9.949982498845916e-06, "loss": 0.0036, "step": 42730 }, { "epoch": 0.2811786609474813, "grad_norm": 0.00977885663127635, "learning_rate": 9.949901463794913e-06, "loss": 0.0049, "step": 42740 }, { "epoch": 0.2812444491227147, "grad_norm": 0.029440797607701685, "learning_rate": 9.949820363483736e-06, "loss": 0.0025, "step": 42750 }, { "epoch": 0.2813102372979481, "grad_norm": 0.04483855454866076, "learning_rate": 9.94973919791345e-06, "loss": 0.0035, "step": 42760 }, { "epoch": 0.28137602547318147, "grad_norm": 0.13719315342575905, "learning_rate": 9.94965796708513e-06, "loss": 0.0052, "step": 42770 }, { "epoch": 0.2814418136484148, "grad_norm": 0.04487562479397162, "learning_rate": 9.949576670999843e-06, "loss": 0.0023, "step": 42780 }, { "epoch": 0.2815076018236482, "grad_norm": 0.05576099610499335, "learning_rate": 9.949495309658664e-06, "loss": 0.0035, "step": 42790 }, { "epoch": 0.2815733899988816, "grad_norm": 0.5888964293708593, "learning_rate": 9.949413883062661e-06, "loss": 0.0044, "step": 42800 }, { "epoch": 0.281639178174115, "grad_norm": 0.04196996217143955, "learning_rate": 9.949332391212913e-06, "loss": 0.0036, "step": 42810 }, { "epoch": 0.2817049663493484, "grad_norm": 0.373462435895378, "learning_rate": 9.94925083411049e-06, "loss": 0.0034, "step": 42820 }, { "epoch": 0.2817707545245818, "grad_norm": 0.014676251895829223, "learning_rate": 9.949169211756471e-06, "loss": 0.0027, "step": 42830 }, { "epoch": 0.2818365426998151, "grad_norm": 0.16530092898245236, "learning_rate": 9.94908752415193e-06, "loss": 0.0047, "step": 42840 }, { "epoch": 0.2819023308750485, "grad_norm": 0.0636744775034669, "learning_rate": 9.949005771297944e-06, "loss": 0.0053, "step": 42850 }, { "epoch": 0.2819681190502819, "grad_norm": 0.14961725991162153, "learning_rate": 9.94892395319559e-06, "loss": 0.0023, "step": 42860 }, { "epoch": 0.2820339072255153, "grad_norm": 0.2278356084362201, "learning_rate": 9.948842069845949e-06, "loss": 0.0019, "step": 42870 }, { "epoch": 0.2820996954007487, "grad_norm": 0.11613768298584966, "learning_rate": 9.948760121250101e-06, "loss": 0.0029, "step": 42880 }, { "epoch": 0.28216548357598203, "grad_norm": 0.03673600321328507, "learning_rate": 9.948678107409122e-06, "loss": 0.0035, "step": 42890 }, { "epoch": 0.28223127175121543, "grad_norm": 0.2324245523590597, "learning_rate": 9.948596028324098e-06, "loss": 0.0026, "step": 42900 }, { "epoch": 0.2822970599264488, "grad_norm": 0.1919177311128656, "learning_rate": 9.94851388399611e-06, "loss": 0.0022, "step": 42910 }, { "epoch": 0.2823628481016822, "grad_norm": 0.1632635935594714, "learning_rate": 9.948431674426238e-06, "loss": 0.005, "step": 42920 }, { "epoch": 0.2824286362769156, "grad_norm": 0.14487722705353862, "learning_rate": 9.948349399615569e-06, "loss": 0.003, "step": 42930 }, { "epoch": 0.28249442445214895, "grad_norm": 0.05859388854529419, "learning_rate": 9.948267059565187e-06, "loss": 0.0019, "step": 42940 }, { "epoch": 0.28256021262738235, "grad_norm": 0.1298571052097377, "learning_rate": 9.948184654276175e-06, "loss": 0.0031, "step": 42950 }, { "epoch": 0.28262600080261574, "grad_norm": 0.0706913781794397, "learning_rate": 9.948102183749626e-06, "loss": 0.0031, "step": 42960 }, { "epoch": 0.28269178897784913, "grad_norm": 0.11340171312682004, "learning_rate": 9.94801964798662e-06, "loss": 0.0039, "step": 42970 }, { "epoch": 0.28275757715308253, "grad_norm": 0.08837200703782423, "learning_rate": 9.947937046988251e-06, "loss": 0.0028, "step": 42980 }, { "epoch": 0.28282336532831587, "grad_norm": 0.06707624387453322, "learning_rate": 9.947854380755604e-06, "loss": 0.0056, "step": 42990 }, { "epoch": 0.28288915350354926, "grad_norm": 0.08066166332012045, "learning_rate": 9.94777164928977e-06, "loss": 0.0026, "step": 43000 }, { "epoch": 0.28295494167878266, "grad_norm": 0.11824451485671138, "learning_rate": 9.947688852591838e-06, "loss": 0.0016, "step": 43010 }, { "epoch": 0.28302072985401605, "grad_norm": 0.02119886080358926, "learning_rate": 9.947605990662906e-06, "loss": 0.0014, "step": 43020 }, { "epoch": 0.28308651802924945, "grad_norm": 0.06954069943538041, "learning_rate": 9.94752306350406e-06, "loss": 0.0028, "step": 43030 }, { "epoch": 0.2831523062044828, "grad_norm": 0.08821798955796263, "learning_rate": 9.947440071116395e-06, "loss": 0.0035, "step": 43040 }, { "epoch": 0.2832180943797162, "grad_norm": 0.08731680142195301, "learning_rate": 9.947357013501005e-06, "loss": 0.0029, "step": 43050 }, { "epoch": 0.2832838825549496, "grad_norm": 0.059062017604599795, "learning_rate": 9.947273890658986e-06, "loss": 0.0023, "step": 43060 }, { "epoch": 0.28334967073018297, "grad_norm": 0.05479446085517929, "learning_rate": 9.947190702591434e-06, "loss": 0.0058, "step": 43070 }, { "epoch": 0.28341545890541636, "grad_norm": 0.09791734298409646, "learning_rate": 9.947107449299444e-06, "loss": 0.0039, "step": 43080 }, { "epoch": 0.2834812470806497, "grad_norm": 0.11667678426140168, "learning_rate": 9.947024130784117e-06, "loss": 0.0037, "step": 43090 }, { "epoch": 0.2835470352558831, "grad_norm": 0.2222801354312715, "learning_rate": 9.946940747046548e-06, "loss": 0.003, "step": 43100 }, { "epoch": 0.2836128234311165, "grad_norm": 0.09091308533034313, "learning_rate": 9.946857298087838e-06, "loss": 0.0038, "step": 43110 }, { "epoch": 0.2836786116063499, "grad_norm": 0.08211792611394032, "learning_rate": 9.946773783909088e-06, "loss": 0.0012, "step": 43120 }, { "epoch": 0.2837443997815833, "grad_norm": 0.09472713579936187, "learning_rate": 9.946690204511397e-06, "loss": 0.004, "step": 43130 }, { "epoch": 0.2838101879568166, "grad_norm": 0.0493389795997402, "learning_rate": 9.946606559895866e-06, "loss": 0.0022, "step": 43140 }, { "epoch": 0.28387597613205, "grad_norm": 0.047054088844236425, "learning_rate": 9.946522850063603e-06, "loss": 0.0018, "step": 43150 }, { "epoch": 0.2839417643072834, "grad_norm": 0.18689228524223434, "learning_rate": 9.946439075015706e-06, "loss": 0.0043, "step": 43160 }, { "epoch": 0.2840075524825168, "grad_norm": 0.028642114477717376, "learning_rate": 9.946355234753283e-06, "loss": 0.0015, "step": 43170 }, { "epoch": 0.2840733406577502, "grad_norm": 0.09984888922482231, "learning_rate": 9.946271329277437e-06, "loss": 0.0032, "step": 43180 }, { "epoch": 0.28413912883298353, "grad_norm": 0.05908313093704912, "learning_rate": 9.946187358589276e-06, "loss": 0.0028, "step": 43190 }, { "epoch": 0.28420491700821693, "grad_norm": 0.05161928906793624, "learning_rate": 9.946103322689908e-06, "loss": 0.0022, "step": 43200 }, { "epoch": 0.2842707051834503, "grad_norm": 0.15335796010204777, "learning_rate": 9.946019221580439e-06, "loss": 0.004, "step": 43210 }, { "epoch": 0.2843364933586837, "grad_norm": 0.2228452166204158, "learning_rate": 9.945935055261976e-06, "loss": 0.0034, "step": 43220 }, { "epoch": 0.2844022815339171, "grad_norm": 0.006485122954208796, "learning_rate": 9.945850823735632e-06, "loss": 0.0015, "step": 43230 }, { "epoch": 0.28446806970915045, "grad_norm": 0.024143255708872956, "learning_rate": 9.945766527002516e-06, "loss": 0.0026, "step": 43240 }, { "epoch": 0.28453385788438385, "grad_norm": 0.11074212655113937, "learning_rate": 9.94568216506374e-06, "loss": 0.0036, "step": 43250 }, { "epoch": 0.28459964605961724, "grad_norm": 0.017191724118305617, "learning_rate": 9.945597737920418e-06, "loss": 0.0024, "step": 43260 }, { "epoch": 0.28466543423485063, "grad_norm": 0.19213983004632995, "learning_rate": 9.94551324557366e-06, "loss": 0.0024, "step": 43270 }, { "epoch": 0.28473122241008403, "grad_norm": 0.16208467393035797, "learning_rate": 9.94542868802458e-06, "loss": 0.0035, "step": 43280 }, { "epoch": 0.2847970105853174, "grad_norm": 0.06568266205594632, "learning_rate": 9.945344065274293e-06, "loss": 0.0032, "step": 43290 }, { "epoch": 0.28486279876055076, "grad_norm": 0.08943188113556357, "learning_rate": 9.945259377323919e-06, "loss": 0.0027, "step": 43300 }, { "epoch": 0.28492858693578416, "grad_norm": 0.1982349256364734, "learning_rate": 9.945174624174569e-06, "loss": 0.0017, "step": 43310 }, { "epoch": 0.28499437511101755, "grad_norm": 0.1655767304364167, "learning_rate": 9.945089805827363e-06, "loss": 0.0067, "step": 43320 }, { "epoch": 0.28506016328625094, "grad_norm": 0.07535873566741033, "learning_rate": 9.945004922283418e-06, "loss": 0.0025, "step": 43330 }, { "epoch": 0.28512595146148434, "grad_norm": 0.0706546516436267, "learning_rate": 9.944919973543854e-06, "loss": 0.0037, "step": 43340 }, { "epoch": 0.2851917396367177, "grad_norm": 0.15775633978378842, "learning_rate": 9.944834959609792e-06, "loss": 0.003, "step": 43350 }, { "epoch": 0.2852575278119511, "grad_norm": 0.04888112565934072, "learning_rate": 9.94474988048235e-06, "loss": 0.0023, "step": 43360 }, { "epoch": 0.28532331598718447, "grad_norm": 0.09629761760195679, "learning_rate": 9.944664736162653e-06, "loss": 0.0031, "step": 43370 }, { "epoch": 0.28538910416241786, "grad_norm": 0.027959845042550266, "learning_rate": 9.94457952665182e-06, "loss": 0.004, "step": 43380 }, { "epoch": 0.28545489233765126, "grad_norm": 0.13841317310923365, "learning_rate": 9.94449425195098e-06, "loss": 0.0049, "step": 43390 }, { "epoch": 0.2855206805128846, "grad_norm": 0.09362275406654172, "learning_rate": 9.94440891206125e-06, "loss": 0.0019, "step": 43400 }, { "epoch": 0.285586468688118, "grad_norm": 0.03491065008419881, "learning_rate": 9.94432350698376e-06, "loss": 0.0025, "step": 43410 }, { "epoch": 0.2856522568633514, "grad_norm": 0.4653024503639774, "learning_rate": 9.944238036719635e-06, "loss": 0.004, "step": 43420 }, { "epoch": 0.2857180450385848, "grad_norm": 0.10878346882104668, "learning_rate": 9.944152501270001e-06, "loss": 0.0022, "step": 43430 }, { "epoch": 0.2857838332138182, "grad_norm": 0.1993826431671183, "learning_rate": 9.944066900635986e-06, "loss": 0.0033, "step": 43440 }, { "epoch": 0.2858496213890515, "grad_norm": 0.03881838732275483, "learning_rate": 9.94398123481872e-06, "loss": 0.0026, "step": 43450 }, { "epoch": 0.2859154095642849, "grad_norm": 0.07332919156654631, "learning_rate": 9.943895503819332e-06, "loss": 0.0022, "step": 43460 }, { "epoch": 0.2859811977395183, "grad_norm": 0.13163346127562886, "learning_rate": 9.94380970763895e-06, "loss": 0.0032, "step": 43470 }, { "epoch": 0.2860469859147517, "grad_norm": 0.1044146579021384, "learning_rate": 9.943723846278707e-06, "loss": 0.0037, "step": 43480 }, { "epoch": 0.2861127740899851, "grad_norm": 0.2264695244265737, "learning_rate": 9.943637919739737e-06, "loss": 0.0029, "step": 43490 }, { "epoch": 0.28617856226521843, "grad_norm": 0.32672454750892427, "learning_rate": 9.943551928023168e-06, "loss": 0.0037, "step": 43500 }, { "epoch": 0.2862443504404518, "grad_norm": 0.23532917156849106, "learning_rate": 9.943465871130135e-06, "loss": 0.0038, "step": 43510 }, { "epoch": 0.2863101386156852, "grad_norm": 0.09910085318700042, "learning_rate": 9.943379749061777e-06, "loss": 0.0049, "step": 43520 }, { "epoch": 0.2863759267909186, "grad_norm": 0.07119823500368293, "learning_rate": 9.943293561819227e-06, "loss": 0.0023, "step": 43530 }, { "epoch": 0.286441714966152, "grad_norm": 0.14362897763433258, "learning_rate": 9.943207309403618e-06, "loss": 0.0014, "step": 43540 }, { "epoch": 0.28650750314138534, "grad_norm": 0.04475405068523219, "learning_rate": 9.94312099181609e-06, "loss": 0.0016, "step": 43550 }, { "epoch": 0.28657329131661874, "grad_norm": 0.0727236506876708, "learning_rate": 9.943034609057783e-06, "loss": 0.0021, "step": 43560 }, { "epoch": 0.28663907949185213, "grad_norm": 0.08993129569613564, "learning_rate": 9.942948161129833e-06, "loss": 0.0034, "step": 43570 }, { "epoch": 0.28670486766708553, "grad_norm": 0.012771615962279527, "learning_rate": 9.942861648033379e-06, "loss": 0.0024, "step": 43580 }, { "epoch": 0.2867706558423189, "grad_norm": 0.0994276920686172, "learning_rate": 9.942775069769564e-06, "loss": 0.0014, "step": 43590 }, { "epoch": 0.28683644401755226, "grad_norm": 0.16741945437017164, "learning_rate": 9.942688426339529e-06, "loss": 0.0036, "step": 43600 }, { "epoch": 0.28690223219278566, "grad_norm": 0.0065051359294339825, "learning_rate": 9.942601717744416e-06, "loss": 0.003, "step": 43610 }, { "epoch": 0.28696802036801905, "grad_norm": 0.08967781664503975, "learning_rate": 9.942514943985368e-06, "loss": 0.0021, "step": 43620 }, { "epoch": 0.28703380854325244, "grad_norm": 0.021595152151562564, "learning_rate": 9.942428105063529e-06, "loss": 0.0023, "step": 43630 }, { "epoch": 0.28709959671848584, "grad_norm": 0.1027571835795197, "learning_rate": 9.942341200980044e-06, "loss": 0.0039, "step": 43640 }, { "epoch": 0.2871653848937192, "grad_norm": 0.07786067289316793, "learning_rate": 9.942254231736058e-06, "loss": 0.0028, "step": 43650 }, { "epoch": 0.28723117306895257, "grad_norm": 0.11105812363474797, "learning_rate": 9.94216719733272e-06, "loss": 0.0042, "step": 43660 }, { "epoch": 0.28729696124418597, "grad_norm": 0.08968103037978278, "learning_rate": 9.942080097771175e-06, "loss": 0.0038, "step": 43670 }, { "epoch": 0.28736274941941936, "grad_norm": 0.03649018441495956, "learning_rate": 9.94199293305257e-06, "loss": 0.0027, "step": 43680 }, { "epoch": 0.28742853759465276, "grad_norm": 0.04801603952742918, "learning_rate": 9.941905703178058e-06, "loss": 0.0024, "step": 43690 }, { "epoch": 0.2874943257698861, "grad_norm": 0.11940962141826186, "learning_rate": 9.941818408148788e-06, "loss": 0.002, "step": 43700 }, { "epoch": 0.2875601139451195, "grad_norm": 0.1378573318369863, "learning_rate": 9.94173104796591e-06, "loss": 0.0038, "step": 43710 }, { "epoch": 0.2876259021203529, "grad_norm": 0.09030147384854822, "learning_rate": 9.941643622630576e-06, "loss": 0.004, "step": 43720 }, { "epoch": 0.2876916902955863, "grad_norm": 0.14693533711305856, "learning_rate": 9.94155613214394e-06, "loss": 0.0026, "step": 43730 }, { "epoch": 0.28775747847081967, "grad_norm": 0.16432707560583765, "learning_rate": 9.941468576507153e-06, "loss": 0.0023, "step": 43740 }, { "epoch": 0.287823266646053, "grad_norm": 0.04485338564159887, "learning_rate": 9.94138095572137e-06, "loss": 0.0022, "step": 43750 }, { "epoch": 0.2878890548212864, "grad_norm": 0.024724155231122168, "learning_rate": 9.94129326978775e-06, "loss": 0.0038, "step": 43760 }, { "epoch": 0.2879548429965198, "grad_norm": 0.04482991055495551, "learning_rate": 9.941205518707442e-06, "loss": 0.0035, "step": 43770 }, { "epoch": 0.2880206311717532, "grad_norm": 0.26679930983003314, "learning_rate": 9.941117702481606e-06, "loss": 0.005, "step": 43780 }, { "epoch": 0.2880864193469866, "grad_norm": 0.07454509327636288, "learning_rate": 9.941029821111402e-06, "loss": 0.0035, "step": 43790 }, { "epoch": 0.28815220752222, "grad_norm": 0.044907863960461064, "learning_rate": 9.940941874597988e-06, "loss": 0.0036, "step": 43800 }, { "epoch": 0.2882179956974533, "grad_norm": 0.1788980142944571, "learning_rate": 9.940853862942523e-06, "loss": 0.0035, "step": 43810 }, { "epoch": 0.2882837838726867, "grad_norm": 0.2002093193935569, "learning_rate": 9.940765786146166e-06, "loss": 0.0032, "step": 43820 }, { "epoch": 0.2883495720479201, "grad_norm": 0.09113933499760973, "learning_rate": 9.94067764421008e-06, "loss": 0.0027, "step": 43830 }, { "epoch": 0.2884153602231535, "grad_norm": 0.14583665810951815, "learning_rate": 9.940589437135426e-06, "loss": 0.003, "step": 43840 }, { "epoch": 0.2884811483983869, "grad_norm": 0.38807047760324126, "learning_rate": 9.940501164923366e-06, "loss": 0.0043, "step": 43850 }, { "epoch": 0.28854693657362024, "grad_norm": 0.27970826504831625, "learning_rate": 9.940412827575067e-06, "loss": 0.003, "step": 43860 }, { "epoch": 0.28861272474885363, "grad_norm": 0.23199378210212618, "learning_rate": 9.940324425091692e-06, "loss": 0.0047, "step": 43870 }, { "epoch": 0.288678512924087, "grad_norm": 0.17752741820792, "learning_rate": 9.940235957474405e-06, "loss": 0.003, "step": 43880 }, { "epoch": 0.2887443010993204, "grad_norm": 0.4666143270959734, "learning_rate": 9.940147424724372e-06, "loss": 0.0038, "step": 43890 }, { "epoch": 0.2888100892745538, "grad_norm": 0.02485787223075514, "learning_rate": 9.940058826842765e-06, "loss": 0.0045, "step": 43900 }, { "epoch": 0.28887587744978716, "grad_norm": 0.16832987428962123, "learning_rate": 9.939970163830745e-06, "loss": 0.0019, "step": 43910 }, { "epoch": 0.28894166562502055, "grad_norm": 0.07614736970810898, "learning_rate": 9.939881435689488e-06, "loss": 0.0025, "step": 43920 }, { "epoch": 0.28900745380025394, "grad_norm": 0.0965258843663058, "learning_rate": 9.939792642420159e-06, "loss": 0.0026, "step": 43930 }, { "epoch": 0.28907324197548734, "grad_norm": 0.09585721688470249, "learning_rate": 9.939703784023932e-06, "loss": 0.0023, "step": 43940 }, { "epoch": 0.28913903015072073, "grad_norm": 0.08012250314958756, "learning_rate": 9.939614860501976e-06, "loss": 0.0022, "step": 43950 }, { "epoch": 0.28920481832595407, "grad_norm": 0.15127904518715132, "learning_rate": 9.939525871855463e-06, "loss": 0.0025, "step": 43960 }, { "epoch": 0.28927060650118747, "grad_norm": 0.07954810257327795, "learning_rate": 9.939436818085568e-06, "loss": 0.0049, "step": 43970 }, { "epoch": 0.28933639467642086, "grad_norm": 0.08722496133615795, "learning_rate": 9.939347699193465e-06, "loss": 0.0045, "step": 43980 }, { "epoch": 0.28940218285165425, "grad_norm": 0.1955835328153243, "learning_rate": 9.93925851518033e-06, "loss": 0.002, "step": 43990 }, { "epoch": 0.28946797102688765, "grad_norm": 0.1264543172729514, "learning_rate": 9.939169266047334e-06, "loss": 0.0024, "step": 44000 }, { "epoch": 0.289533759202121, "grad_norm": 0.06707678908308086, "learning_rate": 9.93907995179566e-06, "loss": 0.0021, "step": 44010 }, { "epoch": 0.2895995473773544, "grad_norm": 0.10577003800654076, "learning_rate": 9.938990572426482e-06, "loss": 0.0048, "step": 44020 }, { "epoch": 0.2896653355525878, "grad_norm": 0.14406869259961091, "learning_rate": 9.938901127940976e-06, "loss": 0.0049, "step": 44030 }, { "epoch": 0.28973112372782117, "grad_norm": 0.064199106395096, "learning_rate": 9.938811618340327e-06, "loss": 0.0024, "step": 44040 }, { "epoch": 0.28979691190305457, "grad_norm": 0.027826193276765214, "learning_rate": 9.938722043625712e-06, "loss": 0.0055, "step": 44050 }, { "epoch": 0.2898627000782879, "grad_norm": 0.08615251481699311, "learning_rate": 9.938632403798312e-06, "loss": 0.0031, "step": 44060 }, { "epoch": 0.2899284882535213, "grad_norm": 0.02667868359142811, "learning_rate": 9.938542698859308e-06, "loss": 0.0021, "step": 44070 }, { "epoch": 0.2899942764287547, "grad_norm": 0.18841616021784208, "learning_rate": 9.938452928809886e-06, "loss": 0.0034, "step": 44080 }, { "epoch": 0.2900600646039881, "grad_norm": 0.0736719013593585, "learning_rate": 9.938363093651224e-06, "loss": 0.001, "step": 44090 }, { "epoch": 0.2901258527792215, "grad_norm": 0.0544867460243226, "learning_rate": 9.938273193384512e-06, "loss": 0.0022, "step": 44100 }, { "epoch": 0.2901916409544548, "grad_norm": 0.12415134568499497, "learning_rate": 9.938183228010933e-06, "loss": 0.0041, "step": 44110 }, { "epoch": 0.2902574291296882, "grad_norm": 0.0821961175829816, "learning_rate": 9.938093197531671e-06, "loss": 0.0024, "step": 44120 }, { "epoch": 0.2903232173049216, "grad_norm": 0.15840505468287272, "learning_rate": 9.938003101947917e-06, "loss": 0.0029, "step": 44130 }, { "epoch": 0.290389005480155, "grad_norm": 0.09817842219769611, "learning_rate": 9.937912941260856e-06, "loss": 0.0027, "step": 44140 }, { "epoch": 0.2904547936553884, "grad_norm": 0.24453112301988392, "learning_rate": 9.937822715471678e-06, "loss": 0.0025, "step": 44150 }, { "epoch": 0.29052058183062174, "grad_norm": 0.008273111373412153, "learning_rate": 9.937732424581572e-06, "loss": 0.0031, "step": 44160 }, { "epoch": 0.29058637000585513, "grad_norm": 0.07332301240392017, "learning_rate": 9.937642068591728e-06, "loss": 0.0032, "step": 44170 }, { "epoch": 0.2906521581810885, "grad_norm": 0.05620728038917024, "learning_rate": 9.937551647503339e-06, "loss": 0.0026, "step": 44180 }, { "epoch": 0.2907179463563219, "grad_norm": 0.05368713618976995, "learning_rate": 9.937461161317593e-06, "loss": 0.0023, "step": 44190 }, { "epoch": 0.2907837345315553, "grad_norm": 0.1110053801590558, "learning_rate": 9.937370610035689e-06, "loss": 0.0046, "step": 44200 }, { "epoch": 0.29084952270678865, "grad_norm": 0.055418191682819355, "learning_rate": 9.937279993658817e-06, "loss": 0.0042, "step": 44210 }, { "epoch": 0.29091531088202205, "grad_norm": 0.14857328275858644, "learning_rate": 9.93718931218817e-06, "loss": 0.0037, "step": 44220 }, { "epoch": 0.29098109905725544, "grad_norm": 0.03002213369758619, "learning_rate": 9.93709856562495e-06, "loss": 0.0021, "step": 44230 }, { "epoch": 0.29104688723248884, "grad_norm": 0.10147853695202612, "learning_rate": 9.937007753970346e-06, "loss": 0.0017, "step": 44240 }, { "epoch": 0.29111267540772223, "grad_norm": 0.014886573681472148, "learning_rate": 9.93691687722556e-06, "loss": 0.0019, "step": 44250 }, { "epoch": 0.2911784635829556, "grad_norm": 0.040967994501867284, "learning_rate": 9.93682593539179e-06, "loss": 0.004, "step": 44260 }, { "epoch": 0.29124425175818897, "grad_norm": 0.17726897985639758, "learning_rate": 9.936734928470231e-06, "loss": 0.0037, "step": 44270 }, { "epoch": 0.29131003993342236, "grad_norm": 0.18281599843910523, "learning_rate": 9.936643856462088e-06, "loss": 0.0026, "step": 44280 }, { "epoch": 0.29137582810865575, "grad_norm": 0.12708668910772886, "learning_rate": 9.936552719368558e-06, "loss": 0.0033, "step": 44290 }, { "epoch": 0.29144161628388915, "grad_norm": 0.04052652837457596, "learning_rate": 9.936461517190842e-06, "loss": 0.0024, "step": 44300 }, { "epoch": 0.29150740445912254, "grad_norm": 0.024227805897346695, "learning_rate": 9.936370249930147e-06, "loss": 0.0018, "step": 44310 }, { "epoch": 0.2915731926343559, "grad_norm": 0.03531577121790943, "learning_rate": 9.936278917587675e-06, "loss": 0.0016, "step": 44320 }, { "epoch": 0.2916389808095893, "grad_norm": 0.18678870021476182, "learning_rate": 9.936187520164626e-06, "loss": 0.0022, "step": 44330 }, { "epoch": 0.29170476898482267, "grad_norm": 0.03742129325340932, "learning_rate": 9.936096057662209e-06, "loss": 0.001, "step": 44340 }, { "epoch": 0.29177055716005607, "grad_norm": 0.1547239861575689, "learning_rate": 9.936004530081628e-06, "loss": 0.003, "step": 44350 }, { "epoch": 0.29183634533528946, "grad_norm": 0.26895247125909255, "learning_rate": 9.93591293742409e-06, "loss": 0.003, "step": 44360 }, { "epoch": 0.2919021335105228, "grad_norm": 0.17439042706570743, "learning_rate": 9.935821279690802e-06, "loss": 0.0025, "step": 44370 }, { "epoch": 0.2919679216857562, "grad_norm": 0.021056952904996373, "learning_rate": 9.935729556882976e-06, "loss": 0.0016, "step": 44380 }, { "epoch": 0.2920337098609896, "grad_norm": 0.3147910887747353, "learning_rate": 9.935637769001817e-06, "loss": 0.0031, "step": 44390 }, { "epoch": 0.292099498036223, "grad_norm": 0.30329886564554853, "learning_rate": 9.935545916048538e-06, "loss": 0.0022, "step": 44400 }, { "epoch": 0.2921652862114564, "grad_norm": 0.05070395400858198, "learning_rate": 9.935453998024346e-06, "loss": 0.0027, "step": 44410 }, { "epoch": 0.2922310743866897, "grad_norm": 0.11230630266468722, "learning_rate": 9.935362014930456e-06, "loss": 0.0024, "step": 44420 }, { "epoch": 0.2922968625619231, "grad_norm": 0.08566904622383845, "learning_rate": 9.935269966768081e-06, "loss": 0.0022, "step": 44430 }, { "epoch": 0.2923626507371565, "grad_norm": 0.06445199910178168, "learning_rate": 9.935177853538436e-06, "loss": 0.0027, "step": 44440 }, { "epoch": 0.2924284389123899, "grad_norm": 0.051818650235327376, "learning_rate": 9.93508567524273e-06, "loss": 0.0011, "step": 44450 }, { "epoch": 0.2924942270876233, "grad_norm": 0.10223874108262992, "learning_rate": 9.934993431882186e-06, "loss": 0.0028, "step": 44460 }, { "epoch": 0.29256001526285663, "grad_norm": 0.07256144704817966, "learning_rate": 9.934901123458011e-06, "loss": 0.0027, "step": 44470 }, { "epoch": 0.29262580343809, "grad_norm": 0.11805068668895825, "learning_rate": 9.93480874997143e-06, "loss": 0.0034, "step": 44480 }, { "epoch": 0.2926915916133234, "grad_norm": 0.25792933097403564, "learning_rate": 9.934716311423656e-06, "loss": 0.0033, "step": 44490 }, { "epoch": 0.2927573797885568, "grad_norm": 0.040920192410892536, "learning_rate": 9.934623807815909e-06, "loss": 0.0027, "step": 44500 }, { "epoch": 0.2928231679637902, "grad_norm": 0.07984973650471151, "learning_rate": 9.93453123914941e-06, "loss": 0.0039, "step": 44510 }, { "epoch": 0.29288895613902355, "grad_norm": 0.1271339522036489, "learning_rate": 9.934438605425376e-06, "loss": 0.0043, "step": 44520 }, { "epoch": 0.29295474431425694, "grad_norm": 0.1962904128265196, "learning_rate": 9.934345906645033e-06, "loss": 0.0033, "step": 44530 }, { "epoch": 0.29302053248949034, "grad_norm": 0.19974861851430173, "learning_rate": 9.9342531428096e-06, "loss": 0.0035, "step": 44540 }, { "epoch": 0.29308632066472373, "grad_norm": 0.07738819332208108, "learning_rate": 9.934160313920303e-06, "loss": 0.0047, "step": 44550 }, { "epoch": 0.2931521088399571, "grad_norm": 0.2827061509446877, "learning_rate": 9.934067419978361e-06, "loss": 0.0028, "step": 44560 }, { "epoch": 0.29321789701519047, "grad_norm": 0.05517155817382457, "learning_rate": 9.933974460985e-06, "loss": 0.0026, "step": 44570 }, { "epoch": 0.29328368519042386, "grad_norm": 0.10733464682130824, "learning_rate": 9.93388143694145e-06, "loss": 0.0014, "step": 44580 }, { "epoch": 0.29334947336565725, "grad_norm": 0.22213049735979432, "learning_rate": 9.933788347848934e-06, "loss": 0.0024, "step": 44590 }, { "epoch": 0.29341526154089065, "grad_norm": 0.07984469604760919, "learning_rate": 9.933695193708679e-06, "loss": 0.0029, "step": 44600 }, { "epoch": 0.29348104971612404, "grad_norm": 0.14469147241753422, "learning_rate": 9.933601974521914e-06, "loss": 0.002, "step": 44610 }, { "epoch": 0.2935468378913574, "grad_norm": 0.10874418862302478, "learning_rate": 9.933508690289869e-06, "loss": 0.003, "step": 44620 }, { "epoch": 0.2936126260665908, "grad_norm": 0.03357955539293998, "learning_rate": 9.933415341013772e-06, "loss": 0.0014, "step": 44630 }, { "epoch": 0.29367841424182417, "grad_norm": 0.10188202722995404, "learning_rate": 9.933321926694854e-06, "loss": 0.0017, "step": 44640 }, { "epoch": 0.29374420241705756, "grad_norm": 0.05472378095022885, "learning_rate": 9.933228447334347e-06, "loss": 0.0051, "step": 44650 }, { "epoch": 0.29380999059229096, "grad_norm": 0.11014266417447112, "learning_rate": 9.933134902933486e-06, "loss": 0.0018, "step": 44660 }, { "epoch": 0.2938757787675243, "grad_norm": 0.0727281038104225, "learning_rate": 9.9330412934935e-06, "loss": 0.0031, "step": 44670 }, { "epoch": 0.2939415669427577, "grad_norm": 0.10682338285784382, "learning_rate": 9.932947619015623e-06, "loss": 0.0028, "step": 44680 }, { "epoch": 0.2940073551179911, "grad_norm": 0.10757602915911157, "learning_rate": 9.932853879501095e-06, "loss": 0.0029, "step": 44690 }, { "epoch": 0.2940731432932245, "grad_norm": 0.1190808851276998, "learning_rate": 9.932760074951147e-06, "loss": 0.0027, "step": 44700 }, { "epoch": 0.2941389314684579, "grad_norm": 0.05928878347461329, "learning_rate": 9.932666205367018e-06, "loss": 0.0017, "step": 44710 }, { "epoch": 0.29420471964369127, "grad_norm": 0.08015071097701454, "learning_rate": 9.932572270749947e-06, "loss": 0.0034, "step": 44720 }, { "epoch": 0.2942705078189246, "grad_norm": 0.13850209789775794, "learning_rate": 9.932478271101169e-06, "loss": 0.0033, "step": 44730 }, { "epoch": 0.294336295994158, "grad_norm": 0.02953258971053008, "learning_rate": 9.932384206421924e-06, "loss": 0.0023, "step": 44740 }, { "epoch": 0.2944020841693914, "grad_norm": 0.07676359187216787, "learning_rate": 9.932290076713456e-06, "loss": 0.0034, "step": 44750 }, { "epoch": 0.2944678723446248, "grad_norm": 0.026569030444631297, "learning_rate": 9.932195881977001e-06, "loss": 0.0042, "step": 44760 }, { "epoch": 0.2945336605198582, "grad_norm": 0.03129294643491755, "learning_rate": 9.932101622213803e-06, "loss": 0.0014, "step": 44770 }, { "epoch": 0.2945994486950915, "grad_norm": 0.062465784948927516, "learning_rate": 9.932007297425106e-06, "loss": 0.0033, "step": 44780 }, { "epoch": 0.2946652368703249, "grad_norm": 0.256023008749213, "learning_rate": 9.93191290761215e-06, "loss": 0.0044, "step": 44790 }, { "epoch": 0.2947310250455583, "grad_norm": 0.06646691071686586, "learning_rate": 9.931818452776185e-06, "loss": 0.0022, "step": 44800 }, { "epoch": 0.2947968132207917, "grad_norm": 0.12760562156539376, "learning_rate": 9.93172393291845e-06, "loss": 0.0041, "step": 44810 }, { "epoch": 0.2948626013960251, "grad_norm": 0.11131730574796952, "learning_rate": 9.931629348040198e-06, "loss": 0.0025, "step": 44820 }, { "epoch": 0.29492838957125844, "grad_norm": 0.049954682735341975, "learning_rate": 9.931534698142669e-06, "loss": 0.0017, "step": 44830 }, { "epoch": 0.29499417774649184, "grad_norm": 0.15552263532755434, "learning_rate": 9.931439983227115e-06, "loss": 0.0028, "step": 44840 }, { "epoch": 0.29505996592172523, "grad_norm": 0.12829458218007445, "learning_rate": 9.931345203294784e-06, "loss": 0.0031, "step": 44850 }, { "epoch": 0.2951257540969586, "grad_norm": 0.10107730775986758, "learning_rate": 9.931250358346925e-06, "loss": 0.0026, "step": 44860 }, { "epoch": 0.295191542272192, "grad_norm": 0.2427447667954062, "learning_rate": 9.931155448384791e-06, "loss": 0.0022, "step": 44870 }, { "epoch": 0.29525733044742536, "grad_norm": 0.17309659640390235, "learning_rate": 9.931060473409629e-06, "loss": 0.0039, "step": 44880 }, { "epoch": 0.29532311862265875, "grad_norm": 0.1407267415851866, "learning_rate": 9.930965433422695e-06, "loss": 0.0017, "step": 44890 }, { "epoch": 0.29538890679789215, "grad_norm": 0.05661168277544848, "learning_rate": 9.930870328425238e-06, "loss": 0.0031, "step": 44900 }, { "epoch": 0.29545469497312554, "grad_norm": 0.04604051947877417, "learning_rate": 9.930775158418517e-06, "loss": 0.0038, "step": 44910 }, { "epoch": 0.29552048314835894, "grad_norm": 0.16102250905956267, "learning_rate": 9.930679923403783e-06, "loss": 0.0021, "step": 44920 }, { "epoch": 0.2955862713235923, "grad_norm": 0.07561346695432831, "learning_rate": 9.93058462338229e-06, "loss": 0.0029, "step": 44930 }, { "epoch": 0.29565205949882567, "grad_norm": 0.1347632202423851, "learning_rate": 9.930489258355302e-06, "loss": 0.003, "step": 44940 }, { "epoch": 0.29571784767405906, "grad_norm": 0.07961494866626646, "learning_rate": 9.930393828324068e-06, "loss": 0.006, "step": 44950 }, { "epoch": 0.29578363584929246, "grad_norm": 0.07801162671228605, "learning_rate": 9.93029833328985e-06, "loss": 0.0019, "step": 44960 }, { "epoch": 0.29584942402452585, "grad_norm": 0.09596809422819606, "learning_rate": 9.930202773253908e-06, "loss": 0.0021, "step": 44970 }, { "epoch": 0.2959152121997592, "grad_norm": 0.13365368689803697, "learning_rate": 9.930107148217499e-06, "loss": 0.0034, "step": 44980 }, { "epoch": 0.2959810003749926, "grad_norm": 0.10747834100808952, "learning_rate": 9.930011458181885e-06, "loss": 0.0022, "step": 44990 }, { "epoch": 0.296046788550226, "grad_norm": 0.1502135312506894, "learning_rate": 9.92991570314833e-06, "loss": 0.0024, "step": 45000 }, { "epoch": 0.2961125767254594, "grad_norm": 0.044821774722470176, "learning_rate": 9.929819883118092e-06, "loss": 0.0015, "step": 45010 }, { "epoch": 0.29617836490069277, "grad_norm": 0.06475181121636908, "learning_rate": 9.929723998092438e-06, "loss": 0.002, "step": 45020 }, { "epoch": 0.2962441530759261, "grad_norm": 0.09618567871506552, "learning_rate": 9.929628048072629e-06, "loss": 0.0012, "step": 45030 }, { "epoch": 0.2963099412511595, "grad_norm": 0.07055981395622302, "learning_rate": 9.929532033059933e-06, "loss": 0.0022, "step": 45040 }, { "epoch": 0.2963757294263929, "grad_norm": 0.12128378421476844, "learning_rate": 9.929435953055613e-06, "loss": 0.0021, "step": 45050 }, { "epoch": 0.2964415176016263, "grad_norm": 0.151644928839741, "learning_rate": 9.929339808060938e-06, "loss": 0.0027, "step": 45060 }, { "epoch": 0.2965073057768597, "grad_norm": 0.14659087231993828, "learning_rate": 9.929243598077176e-06, "loss": 0.0022, "step": 45070 }, { "epoch": 0.296573093952093, "grad_norm": 0.03697486597141652, "learning_rate": 9.929147323105595e-06, "loss": 0.0011, "step": 45080 }, { "epoch": 0.2966388821273264, "grad_norm": 0.12278488269475334, "learning_rate": 9.929050983147462e-06, "loss": 0.0023, "step": 45090 }, { "epoch": 0.2967046703025598, "grad_norm": 0.04513372466104049, "learning_rate": 9.928954578204048e-06, "loss": 0.0015, "step": 45100 }, { "epoch": 0.2967704584777932, "grad_norm": 0.16203983894975107, "learning_rate": 9.928858108276627e-06, "loss": 0.0023, "step": 45110 }, { "epoch": 0.2968362466530266, "grad_norm": 0.4186414129319213, "learning_rate": 9.928761573366467e-06, "loss": 0.0023, "step": 45120 }, { "epoch": 0.29690203482825994, "grad_norm": 0.08506916030719745, "learning_rate": 9.928664973474843e-06, "loss": 0.0043, "step": 45130 }, { "epoch": 0.29696782300349334, "grad_norm": 0.21263184840550312, "learning_rate": 9.928568308603026e-06, "loss": 0.0022, "step": 45140 }, { "epoch": 0.29703361117872673, "grad_norm": 0.04975270476621701, "learning_rate": 9.928471578752296e-06, "loss": 0.002, "step": 45150 }, { "epoch": 0.2970993993539601, "grad_norm": 0.047583794415308904, "learning_rate": 9.928374783923923e-06, "loss": 0.0039, "step": 45160 }, { "epoch": 0.2971651875291935, "grad_norm": 0.06092774369917869, "learning_rate": 9.928277924119185e-06, "loss": 0.004, "step": 45170 }, { "epoch": 0.2972309757044269, "grad_norm": 0.10654753908718374, "learning_rate": 9.928180999339359e-06, "loss": 0.0041, "step": 45180 }, { "epoch": 0.29729676387966025, "grad_norm": 0.19482012911537283, "learning_rate": 9.928084009585723e-06, "loss": 0.0047, "step": 45190 }, { "epoch": 0.29736255205489365, "grad_norm": 0.10053827182352133, "learning_rate": 9.927986954859554e-06, "loss": 0.0036, "step": 45200 }, { "epoch": 0.29742834023012704, "grad_norm": 0.038746021062124325, "learning_rate": 9.927889835162135e-06, "loss": 0.0032, "step": 45210 }, { "epoch": 0.29749412840536044, "grad_norm": 0.07548669364986618, "learning_rate": 9.927792650494744e-06, "loss": 0.0016, "step": 45220 }, { "epoch": 0.29755991658059383, "grad_norm": 0.32177325784839705, "learning_rate": 9.927695400858662e-06, "loss": 0.005, "step": 45230 }, { "epoch": 0.29762570475582717, "grad_norm": 0.10905124504148637, "learning_rate": 9.927598086255172e-06, "loss": 0.0036, "step": 45240 }, { "epoch": 0.29769149293106056, "grad_norm": 0.09229202248500765, "learning_rate": 9.927500706685559e-06, "loss": 0.0029, "step": 45250 }, { "epoch": 0.29775728110629396, "grad_norm": 0.04744039426099556, "learning_rate": 9.927403262151102e-06, "loss": 0.0022, "step": 45260 }, { "epoch": 0.29782306928152735, "grad_norm": 0.08714698186191973, "learning_rate": 9.927305752653092e-06, "loss": 0.0022, "step": 45270 }, { "epoch": 0.29788885745676075, "grad_norm": 0.2217412318890901, "learning_rate": 9.927208178192809e-06, "loss": 0.0022, "step": 45280 }, { "epoch": 0.2979546456319941, "grad_norm": 0.09768266382466308, "learning_rate": 9.927110538771541e-06, "loss": 0.002, "step": 45290 }, { "epoch": 0.2980204338072275, "grad_norm": 0.02691287069342285, "learning_rate": 9.927012834390577e-06, "loss": 0.0034, "step": 45300 }, { "epoch": 0.2980862219824609, "grad_norm": 0.2499338923316316, "learning_rate": 9.926915065051203e-06, "loss": 0.0032, "step": 45310 }, { "epoch": 0.29815201015769427, "grad_norm": 0.05430592942618084, "learning_rate": 9.92681723075471e-06, "loss": 0.0023, "step": 45320 }, { "epoch": 0.29821779833292766, "grad_norm": 0.033097685685125465, "learning_rate": 9.926719331502386e-06, "loss": 0.0014, "step": 45330 }, { "epoch": 0.298283586508161, "grad_norm": 0.006431104115196558, "learning_rate": 9.926621367295524e-06, "loss": 0.0017, "step": 45340 }, { "epoch": 0.2983493746833944, "grad_norm": 0.09870964784613515, "learning_rate": 9.926523338135414e-06, "loss": 0.0034, "step": 45350 }, { "epoch": 0.2984151628586278, "grad_norm": 0.03144595962226421, "learning_rate": 9.926425244023349e-06, "loss": 0.0032, "step": 45360 }, { "epoch": 0.2984809510338612, "grad_norm": 0.03861668080276058, "learning_rate": 9.926327084960619e-06, "loss": 0.0019, "step": 45370 }, { "epoch": 0.2985467392090946, "grad_norm": 0.1498563048512507, "learning_rate": 9.926228860948523e-06, "loss": 0.0042, "step": 45380 }, { "epoch": 0.2986125273843279, "grad_norm": 0.3470378546101081, "learning_rate": 9.926130571988354e-06, "loss": 0.0036, "step": 45390 }, { "epoch": 0.2986783155595613, "grad_norm": 0.1673290096881188, "learning_rate": 9.926032218081408e-06, "loss": 0.0018, "step": 45400 }, { "epoch": 0.2987441037347947, "grad_norm": 0.11392979071018244, "learning_rate": 9.925933799228982e-06, "loss": 0.0029, "step": 45410 }, { "epoch": 0.2988098919100281, "grad_norm": 0.1543400207057572, "learning_rate": 9.925835315432372e-06, "loss": 0.0039, "step": 45420 }, { "epoch": 0.2988756800852615, "grad_norm": 0.5226563948399084, "learning_rate": 9.92573676669288e-06, "loss": 0.0025, "step": 45430 }, { "epoch": 0.29894146826049484, "grad_norm": 0.14660691514270494, "learning_rate": 9.9256381530118e-06, "loss": 0.002, "step": 45440 }, { "epoch": 0.29900725643572823, "grad_norm": 0.12154112475606005, "learning_rate": 9.925539474390435e-06, "loss": 0.0017, "step": 45450 }, { "epoch": 0.2990730446109616, "grad_norm": 0.06768115202279398, "learning_rate": 9.925440730830087e-06, "loss": 0.0019, "step": 45460 }, { "epoch": 0.299138832786195, "grad_norm": 0.03392483397783663, "learning_rate": 9.925341922332057e-06, "loss": 0.002, "step": 45470 }, { "epoch": 0.2992046209614284, "grad_norm": 0.04096380696736629, "learning_rate": 9.925243048897649e-06, "loss": 0.0021, "step": 45480 }, { "epoch": 0.29927040913666175, "grad_norm": 0.0612589356242445, "learning_rate": 9.925144110528162e-06, "loss": 0.0031, "step": 45490 }, { "epoch": 0.29933619731189515, "grad_norm": 0.022340133552416635, "learning_rate": 9.925045107224907e-06, "loss": 0.0032, "step": 45500 }, { "epoch": 0.29940198548712854, "grad_norm": 0.06527866007841085, "learning_rate": 9.924946038989182e-06, "loss": 0.0016, "step": 45510 }, { "epoch": 0.29946777366236194, "grad_norm": 0.07952348187229323, "learning_rate": 9.9248469058223e-06, "loss": 0.002, "step": 45520 }, { "epoch": 0.29953356183759533, "grad_norm": 0.041578204382440025, "learning_rate": 9.924747707725564e-06, "loss": 0.0025, "step": 45530 }, { "epoch": 0.29959935001282867, "grad_norm": 0.057489924318292265, "learning_rate": 9.924648444700283e-06, "loss": 0.0036, "step": 45540 }, { "epoch": 0.29966513818806206, "grad_norm": 0.033577372370473924, "learning_rate": 9.924549116747764e-06, "loss": 0.0031, "step": 45550 }, { "epoch": 0.29973092636329546, "grad_norm": 0.031113104767436412, "learning_rate": 9.924449723869319e-06, "loss": 0.0015, "step": 45560 }, { "epoch": 0.29979671453852885, "grad_norm": 0.07677037308739057, "learning_rate": 9.924350266066258e-06, "loss": 0.0019, "step": 45570 }, { "epoch": 0.29986250271376225, "grad_norm": 0.008299438520721195, "learning_rate": 9.924250743339891e-06, "loss": 0.0024, "step": 45580 }, { "epoch": 0.2999282908889956, "grad_norm": 0.08358682384895538, "learning_rate": 9.924151155691532e-06, "loss": 0.0018, "step": 45590 }, { "epoch": 0.299994079064229, "grad_norm": 0.12474071615513226, "learning_rate": 9.924051503122491e-06, "loss": 0.002, "step": 45600 }, { "epoch": 0.3000598672394624, "grad_norm": 0.0881190062214195, "learning_rate": 9.923951785634085e-06, "loss": 0.0024, "step": 45610 }, { "epoch": 0.30012565541469577, "grad_norm": 0.09685789982564602, "learning_rate": 9.923852003227626e-06, "loss": 0.0011, "step": 45620 }, { "epoch": 0.30019144358992916, "grad_norm": 0.11587584739923927, "learning_rate": 9.923752155904432e-06, "loss": 0.0017, "step": 45630 }, { "epoch": 0.3002572317651625, "grad_norm": 0.09343116426774195, "learning_rate": 9.923652243665817e-06, "loss": 0.0032, "step": 45640 }, { "epoch": 0.3003230199403959, "grad_norm": 0.09557462668449822, "learning_rate": 9.9235522665131e-06, "loss": 0.0022, "step": 45650 }, { "epoch": 0.3003888081156293, "grad_norm": 0.03564070473953574, "learning_rate": 9.923452224447601e-06, "loss": 0.0016, "step": 45660 }, { "epoch": 0.3004545962908627, "grad_norm": 0.16198659481677655, "learning_rate": 9.923352117470633e-06, "loss": 0.0031, "step": 45670 }, { "epoch": 0.3005203844660961, "grad_norm": 0.18155413670621023, "learning_rate": 9.923251945583521e-06, "loss": 0.0033, "step": 45680 }, { "epoch": 0.3005861726413295, "grad_norm": 0.16159614768286698, "learning_rate": 9.923151708787585e-06, "loss": 0.0029, "step": 45690 }, { "epoch": 0.3006519608165628, "grad_norm": 0.07165726334661723, "learning_rate": 9.923051407084144e-06, "loss": 0.0058, "step": 45700 }, { "epoch": 0.3007177489917962, "grad_norm": 0.053320699785663485, "learning_rate": 9.922951040474523e-06, "loss": 0.0017, "step": 45710 }, { "epoch": 0.3007835371670296, "grad_norm": 0.17572003290034535, "learning_rate": 9.922850608960044e-06, "loss": 0.0028, "step": 45720 }, { "epoch": 0.300849325342263, "grad_norm": 0.061921359002729784, "learning_rate": 9.922750112542033e-06, "loss": 0.0013, "step": 45730 }, { "epoch": 0.3009151135174964, "grad_norm": 0.2317224099790327, "learning_rate": 9.92264955122181e-06, "loss": 0.0017, "step": 45740 }, { "epoch": 0.30098090169272973, "grad_norm": 0.2016565796964563, "learning_rate": 9.922548925000705e-06, "loss": 0.0019, "step": 45750 }, { "epoch": 0.3010466898679631, "grad_norm": 0.057576991414853784, "learning_rate": 9.922448233880047e-06, "loss": 0.002, "step": 45760 }, { "epoch": 0.3011124780431965, "grad_norm": 0.1424272090887001, "learning_rate": 9.922347477861156e-06, "loss": 0.0022, "step": 45770 }, { "epoch": 0.3011782662184299, "grad_norm": 0.005735806334483476, "learning_rate": 9.922246656945366e-06, "loss": 0.0021, "step": 45780 }, { "epoch": 0.3012440543936633, "grad_norm": 0.04459541460108854, "learning_rate": 9.922145771134007e-06, "loss": 0.0014, "step": 45790 }, { "epoch": 0.30130984256889665, "grad_norm": 0.5397696470387765, "learning_rate": 9.922044820428406e-06, "loss": 0.0032, "step": 45800 }, { "epoch": 0.30137563074413004, "grad_norm": 0.12099807432981367, "learning_rate": 9.921943804829895e-06, "loss": 0.002, "step": 45810 }, { "epoch": 0.30144141891936344, "grad_norm": 0.06605037641902026, "learning_rate": 9.921842724339805e-06, "loss": 0.0021, "step": 45820 }, { "epoch": 0.30150720709459683, "grad_norm": 0.03782915349913632, "learning_rate": 9.92174157895947e-06, "loss": 0.0035, "step": 45830 }, { "epoch": 0.3015729952698302, "grad_norm": 0.028266916937483406, "learning_rate": 9.921640368690224e-06, "loss": 0.0025, "step": 45840 }, { "epoch": 0.30163878344506356, "grad_norm": 0.056822377098971213, "learning_rate": 9.9215390935334e-06, "loss": 0.0025, "step": 45850 }, { "epoch": 0.30170457162029696, "grad_norm": 0.12161415939561611, "learning_rate": 9.921437753490331e-06, "loss": 0.0033, "step": 45860 }, { "epoch": 0.30177035979553035, "grad_norm": 0.08883088153246069, "learning_rate": 9.921336348562358e-06, "loss": 0.0026, "step": 45870 }, { "epoch": 0.30183614797076375, "grad_norm": 0.07212415249892322, "learning_rate": 9.921234878750815e-06, "loss": 0.0039, "step": 45880 }, { "epoch": 0.30190193614599714, "grad_norm": 0.13940293191163372, "learning_rate": 9.92113334405704e-06, "loss": 0.0039, "step": 45890 }, { "epoch": 0.3019677243212305, "grad_norm": 0.07325189777846682, "learning_rate": 9.921031744482374e-06, "loss": 0.0049, "step": 45900 }, { "epoch": 0.3020335124964639, "grad_norm": 0.0863562052905986, "learning_rate": 9.920930080028153e-06, "loss": 0.0027, "step": 45910 }, { "epoch": 0.30209930067169727, "grad_norm": 0.04092926465382644, "learning_rate": 9.920828350695718e-06, "loss": 0.0021, "step": 45920 }, { "epoch": 0.30216508884693066, "grad_norm": 0.06013193490050524, "learning_rate": 9.920726556486414e-06, "loss": 0.0031, "step": 45930 }, { "epoch": 0.30223087702216406, "grad_norm": 0.12922259935703598, "learning_rate": 9.920624697401577e-06, "loss": 0.0037, "step": 45940 }, { "epoch": 0.3022966651973974, "grad_norm": 0.08683986115331031, "learning_rate": 9.920522773442553e-06, "loss": 0.0039, "step": 45950 }, { "epoch": 0.3023624533726308, "grad_norm": 0.06145064567296216, "learning_rate": 9.920420784610689e-06, "loss": 0.0019, "step": 45960 }, { "epoch": 0.3024282415478642, "grad_norm": 0.0663581860108396, "learning_rate": 9.920318730907324e-06, "loss": 0.0046, "step": 45970 }, { "epoch": 0.3024940297230976, "grad_norm": 0.10503401806272379, "learning_rate": 9.920216612333806e-06, "loss": 0.0031, "step": 45980 }, { "epoch": 0.302559817898331, "grad_norm": 0.13228805580975486, "learning_rate": 9.920114428891482e-06, "loss": 0.0018, "step": 45990 }, { "epoch": 0.3026256060735643, "grad_norm": 0.1155903004000084, "learning_rate": 9.9200121805817e-06, "loss": 0.0033, "step": 46000 }, { "epoch": 0.3026913942487977, "grad_norm": 0.2487504470490734, "learning_rate": 9.919909867405805e-06, "loss": 0.0029, "step": 46010 }, { "epoch": 0.3027571824240311, "grad_norm": 0.42847932674680966, "learning_rate": 9.919807489365147e-06, "loss": 0.0026, "step": 46020 }, { "epoch": 0.3028229705992645, "grad_norm": 0.10409285155573393, "learning_rate": 9.919705046461076e-06, "loss": 0.0044, "step": 46030 }, { "epoch": 0.3028887587744979, "grad_norm": 0.06730534198953672, "learning_rate": 9.919602538694946e-06, "loss": 0.0028, "step": 46040 }, { "epoch": 0.30295454694973123, "grad_norm": 0.070454703143832, "learning_rate": 9.919499966068103e-06, "loss": 0.0032, "step": 46050 }, { "epoch": 0.3030203351249646, "grad_norm": 0.08465089688092702, "learning_rate": 9.9193973285819e-06, "loss": 0.0041, "step": 46060 }, { "epoch": 0.303086123300198, "grad_norm": 0.5031909551254274, "learning_rate": 9.919294626237696e-06, "loss": 0.0024, "step": 46070 }, { "epoch": 0.3031519114754314, "grad_norm": 0.10447602866280867, "learning_rate": 9.91919185903684e-06, "loss": 0.0031, "step": 46080 }, { "epoch": 0.3032176996506648, "grad_norm": 0.431798315396869, "learning_rate": 9.919089026980684e-06, "loss": 0.0035, "step": 46090 }, { "epoch": 0.30328348782589815, "grad_norm": 0.11266031650975111, "learning_rate": 9.91898613007059e-06, "loss": 0.0021, "step": 46100 }, { "epoch": 0.30334927600113154, "grad_norm": 0.0823578717715735, "learning_rate": 9.918883168307914e-06, "loss": 0.0023, "step": 46110 }, { "epoch": 0.30341506417636493, "grad_norm": 0.042729480079213976, "learning_rate": 9.918780141694008e-06, "loss": 0.0033, "step": 46120 }, { "epoch": 0.30348085235159833, "grad_norm": 0.27062990956133665, "learning_rate": 9.918677050230237e-06, "loss": 0.0042, "step": 46130 }, { "epoch": 0.3035466405268317, "grad_norm": 0.42976067214685537, "learning_rate": 9.918573893917957e-06, "loss": 0.0024, "step": 46140 }, { "epoch": 0.3036124287020651, "grad_norm": 0.05602260431180005, "learning_rate": 9.918470672758527e-06, "loss": 0.0039, "step": 46150 }, { "epoch": 0.30367821687729846, "grad_norm": 0.08940598505394376, "learning_rate": 9.918367386753309e-06, "loss": 0.0028, "step": 46160 }, { "epoch": 0.30374400505253185, "grad_norm": 0.04611890907270545, "learning_rate": 9.918264035903666e-06, "loss": 0.0025, "step": 46170 }, { "epoch": 0.30380979322776525, "grad_norm": 0.09199142494085613, "learning_rate": 9.918160620210958e-06, "loss": 0.0055, "step": 46180 }, { "epoch": 0.30387558140299864, "grad_norm": 0.04727676113057849, "learning_rate": 9.91805713967655e-06, "loss": 0.0038, "step": 46190 }, { "epoch": 0.30394136957823203, "grad_norm": 0.2836139825158083, "learning_rate": 9.917953594301806e-06, "loss": 0.0028, "step": 46200 }, { "epoch": 0.3040071577534654, "grad_norm": 0.22587781079401098, "learning_rate": 9.917849984088093e-06, "loss": 0.0021, "step": 46210 }, { "epoch": 0.30407294592869877, "grad_norm": 0.2051192066540403, "learning_rate": 9.917746309036773e-06, "loss": 0.0037, "step": 46220 }, { "epoch": 0.30413873410393216, "grad_norm": 0.14877652738352726, "learning_rate": 9.917642569149218e-06, "loss": 0.0022, "step": 46230 }, { "epoch": 0.30420452227916556, "grad_norm": 0.03952673339462249, "learning_rate": 9.917538764426791e-06, "loss": 0.0031, "step": 46240 }, { "epoch": 0.30427031045439895, "grad_norm": 0.04593183310778741, "learning_rate": 9.917434894870862e-06, "loss": 0.0022, "step": 46250 }, { "epoch": 0.3043360986296323, "grad_norm": 0.1633371095085318, "learning_rate": 9.9173309604828e-06, "loss": 0.0024, "step": 46260 }, { "epoch": 0.3044018868048657, "grad_norm": 0.1755241180829995, "learning_rate": 9.91722696126398e-06, "loss": 0.0022, "step": 46270 }, { "epoch": 0.3044676749800991, "grad_norm": 0.03962310054049892, "learning_rate": 9.917122897215765e-06, "loss": 0.0017, "step": 46280 }, { "epoch": 0.3045334631553325, "grad_norm": 0.0780974470514856, "learning_rate": 9.917018768339534e-06, "loss": 0.005, "step": 46290 }, { "epoch": 0.30459925133056587, "grad_norm": 0.04571983064285236, "learning_rate": 9.916914574636655e-06, "loss": 0.008, "step": 46300 }, { "epoch": 0.3046650395057992, "grad_norm": 0.06315415270760931, "learning_rate": 9.916810316108507e-06, "loss": 0.0031, "step": 46310 }, { "epoch": 0.3047308276810326, "grad_norm": 0.049995565526964575, "learning_rate": 9.91670599275646e-06, "loss": 0.0029, "step": 46320 }, { "epoch": 0.304796615856266, "grad_norm": 0.07246895119191915, "learning_rate": 9.91660160458189e-06, "loss": 0.0022, "step": 46330 }, { "epoch": 0.3048624040314994, "grad_norm": 0.09359443003845028, "learning_rate": 9.916497151586174e-06, "loss": 0.0014, "step": 46340 }, { "epoch": 0.3049281922067328, "grad_norm": 0.0910129575100637, "learning_rate": 9.91639263377069e-06, "loss": 0.0029, "step": 46350 }, { "epoch": 0.3049939803819661, "grad_norm": 0.10223113131414469, "learning_rate": 9.916288051136815e-06, "loss": 0.0012, "step": 46360 }, { "epoch": 0.3050597685571995, "grad_norm": 0.058059359379138184, "learning_rate": 9.916183403685928e-06, "loss": 0.0026, "step": 46370 }, { "epoch": 0.3051255567324329, "grad_norm": 0.028448290659506957, "learning_rate": 9.916078691419408e-06, "loss": 0.0036, "step": 46380 }, { "epoch": 0.3051913449076663, "grad_norm": 0.058306790954025176, "learning_rate": 9.915973914338638e-06, "loss": 0.0031, "step": 46390 }, { "epoch": 0.3052571330828997, "grad_norm": 0.11143001990347788, "learning_rate": 9.915869072444996e-06, "loss": 0.0026, "step": 46400 }, { "epoch": 0.30532292125813304, "grad_norm": 0.038547115776013725, "learning_rate": 9.915764165739868e-06, "loss": 0.0033, "step": 46410 }, { "epoch": 0.30538870943336643, "grad_norm": 0.05130474612870171, "learning_rate": 9.915659194224633e-06, "loss": 0.0038, "step": 46420 }, { "epoch": 0.30545449760859983, "grad_norm": 0.17452455803885958, "learning_rate": 9.915554157900678e-06, "loss": 0.0031, "step": 46430 }, { "epoch": 0.3055202857838332, "grad_norm": 0.11027644246058833, "learning_rate": 9.915449056769385e-06, "loss": 0.0025, "step": 46440 }, { "epoch": 0.3055860739590666, "grad_norm": 0.0957427668005177, "learning_rate": 9.915343890832142e-06, "loss": 0.0023, "step": 46450 }, { "epoch": 0.30565186213429996, "grad_norm": 0.11857687855292812, "learning_rate": 9.915238660090336e-06, "loss": 0.0034, "step": 46460 }, { "epoch": 0.30571765030953335, "grad_norm": 0.14440177597386952, "learning_rate": 9.915133364545353e-06, "loss": 0.0029, "step": 46470 }, { "epoch": 0.30578343848476675, "grad_norm": 0.07196493613257264, "learning_rate": 9.915028004198582e-06, "loss": 0.0029, "step": 46480 }, { "epoch": 0.30584922666000014, "grad_norm": 0.09758008074070496, "learning_rate": 9.914922579051412e-06, "loss": 0.0027, "step": 46490 }, { "epoch": 0.30591501483523353, "grad_norm": 0.14036270497330045, "learning_rate": 9.914817089105232e-06, "loss": 0.0031, "step": 46500 }, { "epoch": 0.3059808030104669, "grad_norm": 0.06088105529698062, "learning_rate": 9.914711534361433e-06, "loss": 0.0025, "step": 46510 }, { "epoch": 0.30604659118570027, "grad_norm": 0.14227952462230825, "learning_rate": 9.914605914821408e-06, "loss": 0.0025, "step": 46520 }, { "epoch": 0.30611237936093366, "grad_norm": 0.2704707099059688, "learning_rate": 9.914500230486548e-06, "loss": 0.0025, "step": 46530 }, { "epoch": 0.30617816753616706, "grad_norm": 0.03448149318489987, "learning_rate": 9.914394481358246e-06, "loss": 0.0025, "step": 46540 }, { "epoch": 0.30624395571140045, "grad_norm": 0.317517462771904, "learning_rate": 9.914288667437898e-06, "loss": 0.0033, "step": 46550 }, { "epoch": 0.3063097438866338, "grad_norm": 0.10179316007273707, "learning_rate": 9.914182788726899e-06, "loss": 0.0032, "step": 46560 }, { "epoch": 0.3063755320618672, "grad_norm": 0.15538981298695145, "learning_rate": 9.914076845226643e-06, "loss": 0.002, "step": 46570 }, { "epoch": 0.3064413202371006, "grad_norm": 0.1825970260657408, "learning_rate": 9.913970836938527e-06, "loss": 0.0033, "step": 46580 }, { "epoch": 0.306507108412334, "grad_norm": 0.12689338027985522, "learning_rate": 9.913864763863951e-06, "loss": 0.0038, "step": 46590 }, { "epoch": 0.30657289658756737, "grad_norm": 0.26195460062823805, "learning_rate": 9.913758626004311e-06, "loss": 0.0036, "step": 46600 }, { "epoch": 0.30663868476280076, "grad_norm": 0.02541817892750638, "learning_rate": 9.91365242336101e-06, "loss": 0.0028, "step": 46610 }, { "epoch": 0.3067044729380341, "grad_norm": 0.05957779039079585, "learning_rate": 9.913546155935442e-06, "loss": 0.0058, "step": 46620 }, { "epoch": 0.3067702611132675, "grad_norm": 0.07574645207640289, "learning_rate": 9.913439823729012e-06, "loss": 0.0028, "step": 46630 }, { "epoch": 0.3068360492885009, "grad_norm": 0.1835823164949097, "learning_rate": 9.913333426743124e-06, "loss": 0.0021, "step": 46640 }, { "epoch": 0.3069018374637343, "grad_norm": 0.09762742358703244, "learning_rate": 9.913226964979176e-06, "loss": 0.0032, "step": 46650 }, { "epoch": 0.3069676256389677, "grad_norm": 0.019554581035640974, "learning_rate": 9.913120438438573e-06, "loss": 0.0024, "step": 46660 }, { "epoch": 0.307033413814201, "grad_norm": 0.0913223333663146, "learning_rate": 9.913013847122723e-06, "loss": 0.0026, "step": 46670 }, { "epoch": 0.3070992019894344, "grad_norm": 0.02858413199913048, "learning_rate": 9.912907191033027e-06, "loss": 0.003, "step": 46680 }, { "epoch": 0.3071649901646678, "grad_norm": 0.03646472865035002, "learning_rate": 9.912800470170892e-06, "loss": 0.0011, "step": 46690 }, { "epoch": 0.3072307783399012, "grad_norm": 0.18897841427749174, "learning_rate": 9.912693684537728e-06, "loss": 0.0037, "step": 46700 }, { "epoch": 0.3072965665151346, "grad_norm": 0.14939814742918295, "learning_rate": 9.912586834134938e-06, "loss": 0.0043, "step": 46710 }, { "epoch": 0.30736235469036793, "grad_norm": 0.10433564553975061, "learning_rate": 9.912479918963935e-06, "loss": 0.0015, "step": 46720 }, { "epoch": 0.30742814286560133, "grad_norm": 0.07141443090192924, "learning_rate": 9.912372939026126e-06, "loss": 0.0045, "step": 46730 }, { "epoch": 0.3074939310408347, "grad_norm": 0.017976167738335148, "learning_rate": 9.912265894322925e-06, "loss": 0.0023, "step": 46740 }, { "epoch": 0.3075597192160681, "grad_norm": 0.08780736044369006, "learning_rate": 9.912158784855738e-06, "loss": 0.0015, "step": 46750 }, { "epoch": 0.3076255073913015, "grad_norm": 0.15256819449343825, "learning_rate": 9.912051610625981e-06, "loss": 0.0022, "step": 46760 }, { "epoch": 0.30769129556653485, "grad_norm": 0.1454049530066538, "learning_rate": 9.911944371635065e-06, "loss": 0.0024, "step": 46770 }, { "epoch": 0.30775708374176824, "grad_norm": 0.09652506529915972, "learning_rate": 9.911837067884405e-06, "loss": 0.0016, "step": 46780 }, { "epoch": 0.30782287191700164, "grad_norm": 0.13888316229941763, "learning_rate": 9.911729699375416e-06, "loss": 0.0025, "step": 46790 }, { "epoch": 0.30788866009223503, "grad_norm": 0.641992811439434, "learning_rate": 9.911622266109512e-06, "loss": 0.0118, "step": 46800 }, { "epoch": 0.30795444826746843, "grad_norm": 0.041190459318113964, "learning_rate": 9.911514768088113e-06, "loss": 0.0017, "step": 46810 }, { "epoch": 0.30802023644270177, "grad_norm": 0.12185856886975328, "learning_rate": 9.911407205312632e-06, "loss": 0.005, "step": 46820 }, { "epoch": 0.30808602461793516, "grad_norm": 0.13703200334710208, "learning_rate": 9.911299577784487e-06, "loss": 0.0022, "step": 46830 }, { "epoch": 0.30815181279316856, "grad_norm": 0.44109417889973357, "learning_rate": 9.9111918855051e-06, "loss": 0.0036, "step": 46840 }, { "epoch": 0.30821760096840195, "grad_norm": 0.15052131888665826, "learning_rate": 9.91108412847589e-06, "loss": 0.002, "step": 46850 }, { "epoch": 0.30828338914363534, "grad_norm": 0.12447641529005926, "learning_rate": 9.910976306698277e-06, "loss": 0.0052, "step": 46860 }, { "epoch": 0.3083491773188687, "grad_norm": 0.07051003205596107, "learning_rate": 9.910868420173683e-06, "loss": 0.0018, "step": 46870 }, { "epoch": 0.3084149654941021, "grad_norm": 0.038777699691674765, "learning_rate": 9.910760468903529e-06, "loss": 0.0029, "step": 46880 }, { "epoch": 0.3084807536693355, "grad_norm": 0.047350582532835396, "learning_rate": 9.91065245288924e-06, "loss": 0.0023, "step": 46890 }, { "epoch": 0.30854654184456887, "grad_norm": 0.021081469530213677, "learning_rate": 9.91054437213224e-06, "loss": 0.0024, "step": 46900 }, { "epoch": 0.30861233001980226, "grad_norm": 0.04481055929025106, "learning_rate": 9.910436226633952e-06, "loss": 0.0015, "step": 46910 }, { "epoch": 0.3086781181950356, "grad_norm": 0.013283633323752535, "learning_rate": 9.910328016395806e-06, "loss": 0.0029, "step": 46920 }, { "epoch": 0.308743906370269, "grad_norm": 0.07935473032909725, "learning_rate": 9.910219741419225e-06, "loss": 0.0015, "step": 46930 }, { "epoch": 0.3088096945455024, "grad_norm": 0.16020506001993257, "learning_rate": 9.910111401705636e-06, "loss": 0.0016, "step": 46940 }, { "epoch": 0.3088754827207358, "grad_norm": 0.3430271251517056, "learning_rate": 9.91000299725647e-06, "loss": 0.0058, "step": 46950 }, { "epoch": 0.3089412708959692, "grad_norm": 0.14344411131078855, "learning_rate": 9.909894528073155e-06, "loss": 0.0026, "step": 46960 }, { "epoch": 0.3090070590712025, "grad_norm": 0.009604178949248377, "learning_rate": 9.909785994157122e-06, "loss": 0.0026, "step": 46970 }, { "epoch": 0.3090728472464359, "grad_norm": 0.040765580528671685, "learning_rate": 9.9096773955098e-06, "loss": 0.0015, "step": 46980 }, { "epoch": 0.3091386354216693, "grad_norm": 0.10533185518926416, "learning_rate": 9.909568732132621e-06, "loss": 0.002, "step": 46990 }, { "epoch": 0.3092044235969027, "grad_norm": 0.18957131826320922, "learning_rate": 9.909460004027018e-06, "loss": 0.0029, "step": 47000 }, { "epoch": 0.3092702117721361, "grad_norm": 0.04930222675432839, "learning_rate": 9.909351211194428e-06, "loss": 0.0024, "step": 47010 }, { "epoch": 0.30933599994736943, "grad_norm": 0.12386709564719145, "learning_rate": 9.909242353636279e-06, "loss": 0.0015, "step": 47020 }, { "epoch": 0.3094017881226028, "grad_norm": 0.03594329167782134, "learning_rate": 9.909133431354012e-06, "loss": 0.0024, "step": 47030 }, { "epoch": 0.3094675762978362, "grad_norm": 0.21307462251367645, "learning_rate": 9.90902444434906e-06, "loss": 0.0026, "step": 47040 }, { "epoch": 0.3095333644730696, "grad_norm": 0.04793576122061521, "learning_rate": 9.908915392622858e-06, "loss": 0.0034, "step": 47050 }, { "epoch": 0.309599152648303, "grad_norm": 0.1413832599066292, "learning_rate": 9.90880627617685e-06, "loss": 0.0046, "step": 47060 }, { "epoch": 0.30966494082353635, "grad_norm": 0.09277995214906788, "learning_rate": 9.908697095012467e-06, "loss": 0.0031, "step": 47070 }, { "epoch": 0.30973072899876974, "grad_norm": 0.06024770981340338, "learning_rate": 9.908587849131154e-06, "loss": 0.002, "step": 47080 }, { "epoch": 0.30979651717400314, "grad_norm": 0.10030693403604643, "learning_rate": 9.908478538534349e-06, "loss": 0.0033, "step": 47090 }, { "epoch": 0.30986230534923653, "grad_norm": 0.13225914533200123, "learning_rate": 9.908369163223496e-06, "loss": 0.0043, "step": 47100 }, { "epoch": 0.3099280935244699, "grad_norm": 0.09411810884217724, "learning_rate": 9.908259723200033e-06, "loss": 0.0025, "step": 47110 }, { "epoch": 0.3099938816997033, "grad_norm": 0.6444107391797963, "learning_rate": 9.908150218465404e-06, "loss": 0.0055, "step": 47120 }, { "epoch": 0.31005966987493666, "grad_norm": 0.023345511447641597, "learning_rate": 9.908040649021054e-06, "loss": 0.002, "step": 47130 }, { "epoch": 0.31012545805017006, "grad_norm": 0.03800658153200313, "learning_rate": 9.907931014868427e-06, "loss": 0.0031, "step": 47140 }, { "epoch": 0.31019124622540345, "grad_norm": 0.29369843472706075, "learning_rate": 9.907821316008968e-06, "loss": 0.0028, "step": 47150 }, { "epoch": 0.31025703440063684, "grad_norm": 0.2498850195276778, "learning_rate": 9.907711552444124e-06, "loss": 0.0021, "step": 47160 }, { "epoch": 0.31032282257587024, "grad_norm": 0.034098794875814874, "learning_rate": 9.907601724175342e-06, "loss": 0.0022, "step": 47170 }, { "epoch": 0.3103886107511036, "grad_norm": 0.07075571165689248, "learning_rate": 9.90749183120407e-06, "loss": 0.0022, "step": 47180 }, { "epoch": 0.31045439892633697, "grad_norm": 0.1713753671677234, "learning_rate": 9.907381873531754e-06, "loss": 0.0034, "step": 47190 }, { "epoch": 0.31052018710157037, "grad_norm": 0.2708576155358331, "learning_rate": 9.907271851159849e-06, "loss": 0.0033, "step": 47200 }, { "epoch": 0.31058597527680376, "grad_norm": 0.11924084889691958, "learning_rate": 9.907161764089803e-06, "loss": 0.0029, "step": 47210 }, { "epoch": 0.31065176345203716, "grad_norm": 0.07861636526395838, "learning_rate": 9.907051612323065e-06, "loss": 0.0032, "step": 47220 }, { "epoch": 0.3107175516272705, "grad_norm": 0.07071008094829623, "learning_rate": 9.906941395861092e-06, "loss": 0.0034, "step": 47230 }, { "epoch": 0.3107833398025039, "grad_norm": 0.23519227296090373, "learning_rate": 9.906831114705333e-06, "loss": 0.0018, "step": 47240 }, { "epoch": 0.3108491279777373, "grad_norm": 0.17807082644700606, "learning_rate": 9.906720768857243e-06, "loss": 0.0032, "step": 47250 }, { "epoch": 0.3109149161529707, "grad_norm": 0.028229627890959574, "learning_rate": 9.906610358318278e-06, "loss": 0.0015, "step": 47260 }, { "epoch": 0.31098070432820407, "grad_norm": 0.019194348195701746, "learning_rate": 9.906499883089894e-06, "loss": 0.0038, "step": 47270 }, { "epoch": 0.3110464925034374, "grad_norm": 0.0811055217631956, "learning_rate": 9.906389343173546e-06, "loss": 0.0022, "step": 47280 }, { "epoch": 0.3111122806786708, "grad_norm": 0.024650242102967485, "learning_rate": 9.906278738570692e-06, "loss": 0.0011, "step": 47290 }, { "epoch": 0.3111780688539042, "grad_norm": 0.07679205900293574, "learning_rate": 9.90616806928279e-06, "loss": 0.004, "step": 47300 }, { "epoch": 0.3112438570291376, "grad_norm": 0.04698833065000821, "learning_rate": 9.906057335311298e-06, "loss": 0.0025, "step": 47310 }, { "epoch": 0.311309645204371, "grad_norm": 0.14098094755334203, "learning_rate": 9.90594653665768e-06, "loss": 0.002, "step": 47320 }, { "epoch": 0.3113754333796043, "grad_norm": 0.05239623519456225, "learning_rate": 9.905835673323391e-06, "loss": 0.0016, "step": 47330 }, { "epoch": 0.3114412215548377, "grad_norm": 0.0952113264180125, "learning_rate": 9.905724745309897e-06, "loss": 0.0023, "step": 47340 }, { "epoch": 0.3115070097300711, "grad_norm": 0.03559396974334064, "learning_rate": 9.90561375261866e-06, "loss": 0.0022, "step": 47350 }, { "epoch": 0.3115727979053045, "grad_norm": 0.03893914996345903, "learning_rate": 9.90550269525114e-06, "loss": 0.0022, "step": 47360 }, { "epoch": 0.3116385860805379, "grad_norm": 0.07511703209899813, "learning_rate": 9.905391573208807e-06, "loss": 0.0021, "step": 47370 }, { "epoch": 0.31170437425577124, "grad_norm": 0.021966608076077847, "learning_rate": 9.905280386493121e-06, "loss": 0.0038, "step": 47380 }, { "epoch": 0.31177016243100464, "grad_norm": 0.11588615829020733, "learning_rate": 9.90516913510555e-06, "loss": 0.0042, "step": 47390 }, { "epoch": 0.31183595060623803, "grad_norm": 0.05839775033067665, "learning_rate": 9.905057819047558e-06, "loss": 0.002, "step": 47400 }, { "epoch": 0.3119017387814714, "grad_norm": 0.1091918608341405, "learning_rate": 9.904946438320619e-06, "loss": 0.0035, "step": 47410 }, { "epoch": 0.3119675269567048, "grad_norm": 0.12829741499003264, "learning_rate": 9.904834992926195e-06, "loss": 0.0023, "step": 47420 }, { "epoch": 0.31203331513193816, "grad_norm": 0.1614054039485076, "learning_rate": 9.904723482865756e-06, "loss": 0.0041, "step": 47430 }, { "epoch": 0.31209910330717155, "grad_norm": 0.23299431217569488, "learning_rate": 9.904611908140777e-06, "loss": 0.0027, "step": 47440 }, { "epoch": 0.31216489148240495, "grad_norm": 0.0954719186525637, "learning_rate": 9.904500268752725e-06, "loss": 0.0029, "step": 47450 }, { "epoch": 0.31223067965763834, "grad_norm": 0.0452203999950053, "learning_rate": 9.904388564703071e-06, "loss": 0.0015, "step": 47460 }, { "epoch": 0.31229646783287174, "grad_norm": 0.06397106094894539, "learning_rate": 9.904276795993292e-06, "loss": 0.0039, "step": 47470 }, { "epoch": 0.3123622560081051, "grad_norm": 0.1718166856498394, "learning_rate": 9.904164962624858e-06, "loss": 0.0027, "step": 47480 }, { "epoch": 0.31242804418333847, "grad_norm": 0.09884053582124568, "learning_rate": 9.904053064599244e-06, "loss": 0.0026, "step": 47490 }, { "epoch": 0.31249383235857187, "grad_norm": 0.23113219741748064, "learning_rate": 9.903941101917927e-06, "loss": 0.0042, "step": 47500 }, { "epoch": 0.31255962053380526, "grad_norm": 0.08453189381808322, "learning_rate": 9.90382907458238e-06, "loss": 0.0027, "step": 47510 }, { "epoch": 0.31262540870903865, "grad_norm": 0.07696289061684827, "learning_rate": 9.903716982594084e-06, "loss": 0.0023, "step": 47520 }, { "epoch": 0.312691196884272, "grad_norm": 0.11675451689022338, "learning_rate": 9.903604825954514e-06, "loss": 0.0031, "step": 47530 }, { "epoch": 0.3127569850595054, "grad_norm": 0.2859487302792516, "learning_rate": 9.903492604665148e-06, "loss": 0.0022, "step": 47540 }, { "epoch": 0.3128227732347388, "grad_norm": 0.11966265659907649, "learning_rate": 9.903380318727469e-06, "loss": 0.0041, "step": 47550 }, { "epoch": 0.3128885614099722, "grad_norm": 0.13089184827505643, "learning_rate": 9.903267968142953e-06, "loss": 0.0029, "step": 47560 }, { "epoch": 0.31295434958520557, "grad_norm": 0.18178859858535304, "learning_rate": 9.903155552913086e-06, "loss": 0.0032, "step": 47570 }, { "epoch": 0.31302013776043897, "grad_norm": 0.20066029830096638, "learning_rate": 9.903043073039347e-06, "loss": 0.0023, "step": 47580 }, { "epoch": 0.3130859259356723, "grad_norm": 0.0818753495138074, "learning_rate": 9.902930528523218e-06, "loss": 0.0027, "step": 47590 }, { "epoch": 0.3131517141109057, "grad_norm": 0.20316520457598905, "learning_rate": 9.902817919366186e-06, "loss": 0.0016, "step": 47600 }, { "epoch": 0.3132175022861391, "grad_norm": 0.06454887678190982, "learning_rate": 9.902705245569733e-06, "loss": 0.0026, "step": 47610 }, { "epoch": 0.3132832904613725, "grad_norm": 0.05919210706169634, "learning_rate": 9.902592507135346e-06, "loss": 0.0011, "step": 47620 }, { "epoch": 0.3133490786366059, "grad_norm": 0.015764051438717154, "learning_rate": 9.902479704064512e-06, "loss": 0.003, "step": 47630 }, { "epoch": 0.3134148668118392, "grad_norm": 0.10321843149882703, "learning_rate": 9.902366836358717e-06, "loss": 0.0035, "step": 47640 }, { "epoch": 0.3134806549870726, "grad_norm": 0.06344695462499633, "learning_rate": 9.902253904019448e-06, "loss": 0.0035, "step": 47650 }, { "epoch": 0.313546443162306, "grad_norm": 0.06831587986083244, "learning_rate": 9.902140907048197e-06, "loss": 0.0018, "step": 47660 }, { "epoch": 0.3136122313375394, "grad_norm": 0.2480729175755959, "learning_rate": 9.902027845446451e-06, "loss": 0.0058, "step": 47670 }, { "epoch": 0.3136780195127728, "grad_norm": 0.2308668955983607, "learning_rate": 9.9019147192157e-06, "loss": 0.0038, "step": 47680 }, { "epoch": 0.31374380768800614, "grad_norm": 0.6047110430844436, "learning_rate": 9.901801528357438e-06, "loss": 0.0098, "step": 47690 }, { "epoch": 0.31380959586323953, "grad_norm": 0.23981073309241124, "learning_rate": 9.901688272873157e-06, "loss": 0.0026, "step": 47700 }, { "epoch": 0.3138753840384729, "grad_norm": 0.05662012598039378, "learning_rate": 9.901574952764349e-06, "loss": 0.0022, "step": 47710 }, { "epoch": 0.3139411722137063, "grad_norm": 0.12618965750278968, "learning_rate": 9.901461568032509e-06, "loss": 0.0025, "step": 47720 }, { "epoch": 0.3140069603889397, "grad_norm": 0.3255725353312789, "learning_rate": 9.90134811867913e-06, "loss": 0.0029, "step": 47730 }, { "epoch": 0.31407274856417305, "grad_norm": 0.10409126504818962, "learning_rate": 9.90123460470571e-06, "loss": 0.002, "step": 47740 }, { "epoch": 0.31413853673940645, "grad_norm": 0.14922273461237182, "learning_rate": 9.901121026113746e-06, "loss": 0.0062, "step": 47750 }, { "epoch": 0.31420432491463984, "grad_norm": 0.05470608447368228, "learning_rate": 9.901007382904733e-06, "loss": 0.0015, "step": 47760 }, { "epoch": 0.31427011308987324, "grad_norm": 0.06808431454573957, "learning_rate": 9.90089367508017e-06, "loss": 0.0027, "step": 47770 }, { "epoch": 0.31433590126510663, "grad_norm": 0.10454731101226068, "learning_rate": 9.900779902641557e-06, "loss": 0.0023, "step": 47780 }, { "epoch": 0.31440168944033997, "grad_norm": 0.0265336564499254, "learning_rate": 9.900666065590395e-06, "loss": 0.0034, "step": 47790 }, { "epoch": 0.31446747761557337, "grad_norm": 0.16524380224550753, "learning_rate": 9.90055216392818e-06, "loss": 0.0045, "step": 47800 }, { "epoch": 0.31453326579080676, "grad_norm": 0.1298092613469491, "learning_rate": 9.900438197656419e-06, "loss": 0.0021, "step": 47810 }, { "epoch": 0.31459905396604015, "grad_norm": 0.015385355436027549, "learning_rate": 9.900324166776613e-06, "loss": 0.0027, "step": 47820 }, { "epoch": 0.31466484214127355, "grad_norm": 0.09160549255957576, "learning_rate": 9.900210071290267e-06, "loss": 0.0035, "step": 47830 }, { "epoch": 0.3147306303165069, "grad_norm": 0.09861785087550749, "learning_rate": 9.90009591119888e-06, "loss": 0.0025, "step": 47840 }, { "epoch": 0.3147964184917403, "grad_norm": 0.07961061759317446, "learning_rate": 9.89998168650396e-06, "loss": 0.0034, "step": 47850 }, { "epoch": 0.3148622066669737, "grad_norm": 0.14510168653313044, "learning_rate": 9.899867397207017e-06, "loss": 0.0062, "step": 47860 }, { "epoch": 0.31492799484220707, "grad_norm": 0.15379451176503983, "learning_rate": 9.899753043309551e-06, "loss": 0.0024, "step": 47870 }, { "epoch": 0.31499378301744047, "grad_norm": 0.19392484758402115, "learning_rate": 9.899638624813074e-06, "loss": 0.0053, "step": 47880 }, { "epoch": 0.3150595711926738, "grad_norm": 0.08391565942968443, "learning_rate": 9.899524141719093e-06, "loss": 0.0032, "step": 47890 }, { "epoch": 0.3151253593679072, "grad_norm": 0.058907392386710083, "learning_rate": 9.899409594029119e-06, "loss": 0.002, "step": 47900 }, { "epoch": 0.3151911475431406, "grad_norm": 0.07129137835155831, "learning_rate": 9.89929498174466e-06, "loss": 0.003, "step": 47910 }, { "epoch": 0.315256935718374, "grad_norm": 0.03130082917048551, "learning_rate": 9.899180304867226e-06, "loss": 0.0017, "step": 47920 }, { "epoch": 0.3153227238936074, "grad_norm": 0.12934811349063474, "learning_rate": 9.899065563398333e-06, "loss": 0.002, "step": 47930 }, { "epoch": 0.3153885120688407, "grad_norm": 0.2665771327157871, "learning_rate": 9.898950757339491e-06, "loss": 0.0026, "step": 47940 }, { "epoch": 0.3154543002440741, "grad_norm": 0.09635028074370305, "learning_rate": 9.898835886692216e-06, "loss": 0.0025, "step": 47950 }, { "epoch": 0.3155200884193075, "grad_norm": 0.08151573645257072, "learning_rate": 9.898720951458019e-06, "loss": 0.0027, "step": 47960 }, { "epoch": 0.3155858765945409, "grad_norm": 0.0232027579906718, "learning_rate": 9.898605951638419e-06, "loss": 0.0012, "step": 47970 }, { "epoch": 0.3156516647697743, "grad_norm": 0.03686251641239001, "learning_rate": 9.898490887234928e-06, "loss": 0.0032, "step": 47980 }, { "epoch": 0.31571745294500764, "grad_norm": 0.15592636522582687, "learning_rate": 9.898375758249066e-06, "loss": 0.0021, "step": 47990 }, { "epoch": 0.31578324112024103, "grad_norm": 0.0554510597654615, "learning_rate": 9.89826056468235e-06, "loss": 0.0025, "step": 48000 }, { "epoch": 0.3158490292954744, "grad_norm": 0.1805074812523767, "learning_rate": 9.898145306536298e-06, "loss": 0.0034, "step": 48010 }, { "epoch": 0.3159148174707078, "grad_norm": 0.08133106625681724, "learning_rate": 9.898029983812432e-06, "loss": 0.0028, "step": 48020 }, { "epoch": 0.3159806056459412, "grad_norm": 0.07355062788839807, "learning_rate": 9.89791459651227e-06, "loss": 0.0017, "step": 48030 }, { "epoch": 0.3160463938211746, "grad_norm": 0.17888221818287778, "learning_rate": 9.897799144637334e-06, "loss": 0.0023, "step": 48040 }, { "epoch": 0.31611218199640795, "grad_norm": 0.029545010412647267, "learning_rate": 9.897683628189148e-06, "loss": 0.0024, "step": 48050 }, { "epoch": 0.31617797017164134, "grad_norm": 0.03664427469683022, "learning_rate": 9.897568047169231e-06, "loss": 0.0021, "step": 48060 }, { "epoch": 0.31624375834687474, "grad_norm": 0.1605715965115024, "learning_rate": 9.897452401579111e-06, "loss": 0.0017, "step": 48070 }, { "epoch": 0.31630954652210813, "grad_norm": 0.13118424950624683, "learning_rate": 9.897336691420309e-06, "loss": 0.0029, "step": 48080 }, { "epoch": 0.3163753346973415, "grad_norm": 0.03445172691860861, "learning_rate": 9.897220916694355e-06, "loss": 0.0021, "step": 48090 }, { "epoch": 0.31644112287257486, "grad_norm": 0.14764344876208615, "learning_rate": 9.89710507740277e-06, "loss": 0.0025, "step": 48100 }, { "epoch": 0.31650691104780826, "grad_norm": 0.005374435837604715, "learning_rate": 9.896989173547085e-06, "loss": 0.0012, "step": 48110 }, { "epoch": 0.31657269922304165, "grad_norm": 0.07754860694043957, "learning_rate": 9.896873205128828e-06, "loss": 0.0026, "step": 48120 }, { "epoch": 0.31663848739827505, "grad_norm": 0.014114474819998193, "learning_rate": 9.896757172149525e-06, "loss": 0.0037, "step": 48130 }, { "epoch": 0.31670427557350844, "grad_norm": 0.06372859273415796, "learning_rate": 9.896641074610709e-06, "loss": 0.0022, "step": 48140 }, { "epoch": 0.3167700637487418, "grad_norm": 0.10481744883946534, "learning_rate": 9.89652491251391e-06, "loss": 0.0074, "step": 48150 }, { "epoch": 0.3168358519239752, "grad_norm": 0.023159443158943638, "learning_rate": 9.896408685860657e-06, "loss": 0.0013, "step": 48160 }, { "epoch": 0.31690164009920857, "grad_norm": 0.7743063211821304, "learning_rate": 9.896292394652486e-06, "loss": 0.0043, "step": 48170 }, { "epoch": 0.31696742827444196, "grad_norm": 0.06368183649211039, "learning_rate": 9.896176038890928e-06, "loss": 0.0029, "step": 48180 }, { "epoch": 0.31703321644967536, "grad_norm": 0.151420747122595, "learning_rate": 9.896059618577519e-06, "loss": 0.0027, "step": 48190 }, { "epoch": 0.3170990046249087, "grad_norm": 0.13663164232825126, "learning_rate": 9.89594313371379e-06, "loss": 0.0025, "step": 48200 }, { "epoch": 0.3171647928001421, "grad_norm": 0.08097004188904695, "learning_rate": 9.89582658430128e-06, "loss": 0.0021, "step": 48210 }, { "epoch": 0.3172305809753755, "grad_norm": 0.017548948897902584, "learning_rate": 9.895709970341526e-06, "loss": 0.0019, "step": 48220 }, { "epoch": 0.3172963691506089, "grad_norm": 0.030262379320228742, "learning_rate": 9.895593291836062e-06, "loss": 0.0032, "step": 48230 }, { "epoch": 0.3173621573258423, "grad_norm": 0.14131836830488154, "learning_rate": 9.89547654878643e-06, "loss": 0.0021, "step": 48240 }, { "epoch": 0.3174279455010756, "grad_norm": 0.1123167236073681, "learning_rate": 9.895359741194168e-06, "loss": 0.0019, "step": 48250 }, { "epoch": 0.317493733676309, "grad_norm": 0.13065525029870953, "learning_rate": 9.895242869060815e-06, "loss": 0.0028, "step": 48260 }, { "epoch": 0.3175595218515424, "grad_norm": 0.05562507032924043, "learning_rate": 9.895125932387913e-06, "loss": 0.0037, "step": 48270 }, { "epoch": 0.3176253100267758, "grad_norm": 0.20390319005231639, "learning_rate": 9.895008931177003e-06, "loss": 0.0021, "step": 48280 }, { "epoch": 0.3176910982020092, "grad_norm": 0.17412981130289598, "learning_rate": 9.894891865429628e-06, "loss": 0.0031, "step": 48290 }, { "epoch": 0.31775688637724253, "grad_norm": 0.009090826028231698, "learning_rate": 9.894774735147331e-06, "loss": 0.0025, "step": 48300 }, { "epoch": 0.3178226745524759, "grad_norm": 0.18108240331879843, "learning_rate": 9.894657540331657e-06, "loss": 0.0033, "step": 48310 }, { "epoch": 0.3178884627277093, "grad_norm": 0.07697857481675083, "learning_rate": 9.89454028098415e-06, "loss": 0.0038, "step": 48320 }, { "epoch": 0.3179542509029427, "grad_norm": 0.028635406492283317, "learning_rate": 9.894422957106358e-06, "loss": 0.0029, "step": 48330 }, { "epoch": 0.3180200390781761, "grad_norm": 0.11077376823687686, "learning_rate": 9.894305568699825e-06, "loss": 0.003, "step": 48340 }, { "epoch": 0.31808582725340945, "grad_norm": 0.11977843848960093, "learning_rate": 9.894188115766102e-06, "loss": 0.0038, "step": 48350 }, { "epoch": 0.31815161542864284, "grad_norm": 0.04304268052028586, "learning_rate": 9.894070598306734e-06, "loss": 0.0016, "step": 48360 }, { "epoch": 0.31821740360387624, "grad_norm": 0.26841013058998026, "learning_rate": 9.89395301632327e-06, "loss": 0.0042, "step": 48370 }, { "epoch": 0.31828319177910963, "grad_norm": 0.07035491288394334, "learning_rate": 9.893835369817266e-06, "loss": 0.0042, "step": 48380 }, { "epoch": 0.318348979954343, "grad_norm": 0.10834620891717119, "learning_rate": 9.893717658790266e-06, "loss": 0.004, "step": 48390 }, { "epoch": 0.31841476812957636, "grad_norm": 0.06770752707192629, "learning_rate": 9.893599883243828e-06, "loss": 0.002, "step": 48400 }, { "epoch": 0.31848055630480976, "grad_norm": 0.06355001396333039, "learning_rate": 9.8934820431795e-06, "loss": 0.0036, "step": 48410 }, { "epoch": 0.31854634448004315, "grad_norm": 0.043278880329915795, "learning_rate": 9.893364138598838e-06, "loss": 0.0019, "step": 48420 }, { "epoch": 0.31861213265527655, "grad_norm": 0.13060235003930998, "learning_rate": 9.893246169503396e-06, "loss": 0.004, "step": 48430 }, { "epoch": 0.31867792083050994, "grad_norm": 0.11383857883839259, "learning_rate": 9.893128135894729e-06, "loss": 0.0019, "step": 48440 }, { "epoch": 0.3187437090057433, "grad_norm": 0.08548196772669435, "learning_rate": 9.893010037774394e-06, "loss": 0.0028, "step": 48450 }, { "epoch": 0.3188094971809767, "grad_norm": 0.14285302287154997, "learning_rate": 9.892891875143948e-06, "loss": 0.0025, "step": 48460 }, { "epoch": 0.31887528535621007, "grad_norm": 0.1786309967660769, "learning_rate": 9.892773648004947e-06, "loss": 0.0025, "step": 48470 }, { "epoch": 0.31894107353144346, "grad_norm": 0.22828619610611936, "learning_rate": 9.892655356358953e-06, "loss": 0.0037, "step": 48480 }, { "epoch": 0.31900686170667686, "grad_norm": 0.04092129223434248, "learning_rate": 9.892537000207523e-06, "loss": 0.0012, "step": 48490 }, { "epoch": 0.31907264988191025, "grad_norm": 0.03966334996938687, "learning_rate": 9.892418579552217e-06, "loss": 0.0055, "step": 48500 }, { "epoch": 0.3191384380571436, "grad_norm": 0.06956416134859932, "learning_rate": 9.892300094394599e-06, "loss": 0.0018, "step": 48510 }, { "epoch": 0.319204226232377, "grad_norm": 0.10778708895969363, "learning_rate": 9.892181544736227e-06, "loss": 0.002, "step": 48520 }, { "epoch": 0.3192700144076104, "grad_norm": 0.3252510099626808, "learning_rate": 9.892062930578669e-06, "loss": 0.0022, "step": 48530 }, { "epoch": 0.3193358025828438, "grad_norm": 0.11885612131085331, "learning_rate": 9.891944251923487e-06, "loss": 0.0034, "step": 48540 }, { "epoch": 0.31940159075807717, "grad_norm": 0.02811096906501253, "learning_rate": 9.891825508772243e-06, "loss": 0.0023, "step": 48550 }, { "epoch": 0.3194673789333105, "grad_norm": 0.12298722230022625, "learning_rate": 9.891706701126505e-06, "loss": 0.0029, "step": 48560 }, { "epoch": 0.3195331671085439, "grad_norm": 0.07829115489039296, "learning_rate": 9.891587828987838e-06, "loss": 0.0022, "step": 48570 }, { "epoch": 0.3195989552837773, "grad_norm": 0.0694955693212719, "learning_rate": 9.891468892357812e-06, "loss": 0.0019, "step": 48580 }, { "epoch": 0.3196647434590107, "grad_norm": 0.03760190081363733, "learning_rate": 9.891349891237993e-06, "loss": 0.0027, "step": 48590 }, { "epoch": 0.3197305316342441, "grad_norm": 0.10234322009124142, "learning_rate": 9.89123082562995e-06, "loss": 0.0024, "step": 48600 }, { "epoch": 0.3197963198094774, "grad_norm": 0.06393554733207643, "learning_rate": 9.891111695535251e-06, "loss": 0.0045, "step": 48610 }, { "epoch": 0.3198621079847108, "grad_norm": 0.01854682108308274, "learning_rate": 9.89099250095547e-06, "loss": 0.004, "step": 48620 }, { "epoch": 0.3199278961599442, "grad_norm": 0.04144250388879282, "learning_rate": 9.890873241892178e-06, "loss": 0.0019, "step": 48630 }, { "epoch": 0.3199936843351776, "grad_norm": 0.016696233538302876, "learning_rate": 9.890753918346945e-06, "loss": 0.0044, "step": 48640 }, { "epoch": 0.320059472510411, "grad_norm": 0.1685367732062804, "learning_rate": 9.890634530321346e-06, "loss": 0.0037, "step": 48650 }, { "epoch": 0.32012526068564434, "grad_norm": 0.007086936573889325, "learning_rate": 9.890515077816953e-06, "loss": 0.0023, "step": 48660 }, { "epoch": 0.32019104886087774, "grad_norm": 0.11200610900385555, "learning_rate": 9.890395560835345e-06, "loss": 0.0027, "step": 48670 }, { "epoch": 0.32025683703611113, "grad_norm": 0.030567870559724805, "learning_rate": 9.890275979378094e-06, "loss": 0.0044, "step": 48680 }, { "epoch": 0.3203226252113445, "grad_norm": 0.1364003773977538, "learning_rate": 9.890156333446777e-06, "loss": 0.0045, "step": 48690 }, { "epoch": 0.3203884133865779, "grad_norm": 0.0991753229156541, "learning_rate": 9.890036623042972e-06, "loss": 0.0042, "step": 48700 }, { "epoch": 0.32045420156181126, "grad_norm": 0.08301761927627353, "learning_rate": 9.88991684816826e-06, "loss": 0.0028, "step": 48710 }, { "epoch": 0.32051998973704465, "grad_norm": 0.13451751075926555, "learning_rate": 9.889797008824215e-06, "loss": 0.0037, "step": 48720 }, { "epoch": 0.32058577791227805, "grad_norm": 0.03479182604652776, "learning_rate": 9.88967710501242e-06, "loss": 0.0069, "step": 48730 }, { "epoch": 0.32065156608751144, "grad_norm": 0.08765661591371311, "learning_rate": 9.889557136734455e-06, "loss": 0.0039, "step": 48740 }, { "epoch": 0.32071735426274484, "grad_norm": 0.046549694189628614, "learning_rate": 9.889437103991904e-06, "loss": 0.0052, "step": 48750 }, { "epoch": 0.3207831424379782, "grad_norm": 0.04354013827094156, "learning_rate": 9.889317006786345e-06, "loss": 0.0026, "step": 48760 }, { "epoch": 0.32084893061321157, "grad_norm": 0.024641041119445612, "learning_rate": 9.889196845119367e-06, "loss": 0.0019, "step": 48770 }, { "epoch": 0.32091471878844496, "grad_norm": 0.3834491265835456, "learning_rate": 9.889076618992548e-06, "loss": 0.0041, "step": 48780 }, { "epoch": 0.32098050696367836, "grad_norm": 0.03293911322045692, "learning_rate": 9.888956328407478e-06, "loss": 0.0024, "step": 48790 }, { "epoch": 0.32104629513891175, "grad_norm": 0.25082036310822975, "learning_rate": 9.888835973365741e-06, "loss": 0.0039, "step": 48800 }, { "epoch": 0.3211120833141451, "grad_norm": 0.12248942720699545, "learning_rate": 9.888715553868924e-06, "loss": 0.0023, "step": 48810 }, { "epoch": 0.3211778714893785, "grad_norm": 0.21409699964486306, "learning_rate": 9.888595069918614e-06, "loss": 0.0034, "step": 48820 }, { "epoch": 0.3212436596646119, "grad_norm": 0.09008880132559105, "learning_rate": 9.888474521516401e-06, "loss": 0.003, "step": 48830 }, { "epoch": 0.3213094478398453, "grad_norm": 0.2360496270349424, "learning_rate": 9.888353908663872e-06, "loss": 0.0027, "step": 48840 }, { "epoch": 0.32137523601507867, "grad_norm": 0.08919356726117995, "learning_rate": 9.888233231362621e-06, "loss": 0.0017, "step": 48850 }, { "epoch": 0.321441024190312, "grad_norm": 0.04345143522491655, "learning_rate": 9.888112489614235e-06, "loss": 0.0036, "step": 48860 }, { "epoch": 0.3215068123655454, "grad_norm": 0.11910956283392643, "learning_rate": 9.887991683420308e-06, "loss": 0.0023, "step": 48870 }, { "epoch": 0.3215726005407788, "grad_norm": 0.29284907580333625, "learning_rate": 9.887870812782432e-06, "loss": 0.0032, "step": 48880 }, { "epoch": 0.3216383887160122, "grad_norm": 0.10041730336211065, "learning_rate": 9.8877498777022e-06, "loss": 0.0022, "step": 48890 }, { "epoch": 0.3217041768912456, "grad_norm": 0.04727521778683958, "learning_rate": 9.887628878181208e-06, "loss": 0.0037, "step": 48900 }, { "epoch": 0.3217699650664789, "grad_norm": 0.12175000379783656, "learning_rate": 9.88750781422105e-06, "loss": 0.0018, "step": 48910 }, { "epoch": 0.3218357532417123, "grad_norm": 0.45105844527213595, "learning_rate": 9.887386685823324e-06, "loss": 0.0066, "step": 48920 }, { "epoch": 0.3219015414169457, "grad_norm": 0.022394402913661404, "learning_rate": 9.887265492989623e-06, "loss": 0.0028, "step": 48930 }, { "epoch": 0.3219673295921791, "grad_norm": 0.019864911025320876, "learning_rate": 9.887144235721549e-06, "loss": 0.0046, "step": 48940 }, { "epoch": 0.3220331177674125, "grad_norm": 0.10748498661263371, "learning_rate": 9.8870229140207e-06, "loss": 0.0026, "step": 48950 }, { "epoch": 0.32209890594264584, "grad_norm": 0.012898963782321436, "learning_rate": 9.886901527888673e-06, "loss": 0.0032, "step": 48960 }, { "epoch": 0.32216469411787924, "grad_norm": 0.11236630380478499, "learning_rate": 9.88678007732707e-06, "loss": 0.0033, "step": 48970 }, { "epoch": 0.32223048229311263, "grad_norm": 0.04330098536307177, "learning_rate": 9.886658562337493e-06, "loss": 0.0024, "step": 48980 }, { "epoch": 0.322296270468346, "grad_norm": 0.18116017047769106, "learning_rate": 9.886536982921541e-06, "loss": 0.0028, "step": 48990 }, { "epoch": 0.3223620586435794, "grad_norm": 0.1265869838755358, "learning_rate": 9.886415339080823e-06, "loss": 0.0027, "step": 49000 }, { "epoch": 0.3224278468188128, "grad_norm": 0.2167267578156157, "learning_rate": 9.886293630816937e-06, "loss": 0.0028, "step": 49010 }, { "epoch": 0.32249363499404615, "grad_norm": 0.11854482608368204, "learning_rate": 9.886171858131489e-06, "loss": 0.0018, "step": 49020 }, { "epoch": 0.32255942316927955, "grad_norm": 0.08004394076815523, "learning_rate": 9.886050021026085e-06, "loss": 0.0025, "step": 49030 }, { "epoch": 0.32262521134451294, "grad_norm": 0.026250505657566103, "learning_rate": 9.885928119502333e-06, "loss": 0.0031, "step": 49040 }, { "epoch": 0.32269099951974634, "grad_norm": 0.09614082711132063, "learning_rate": 9.885806153561836e-06, "loss": 0.0033, "step": 49050 }, { "epoch": 0.32275678769497973, "grad_norm": 0.04289131743548292, "learning_rate": 9.885684123206208e-06, "loss": 0.0016, "step": 49060 }, { "epoch": 0.32282257587021307, "grad_norm": 0.12361058001092917, "learning_rate": 9.885562028437051e-06, "loss": 0.0029, "step": 49070 }, { "epoch": 0.32288836404544646, "grad_norm": 0.03307054931806825, "learning_rate": 9.88543986925598e-06, "loss": 0.0042, "step": 49080 }, { "epoch": 0.32295415222067986, "grad_norm": 0.0706525464951872, "learning_rate": 9.885317645664603e-06, "loss": 0.0055, "step": 49090 }, { "epoch": 0.32301994039591325, "grad_norm": 0.015191968358617159, "learning_rate": 9.885195357664532e-06, "loss": 0.0007, "step": 49100 }, { "epoch": 0.32308572857114665, "grad_norm": 0.025295540061535, "learning_rate": 9.88507300525738e-06, "loss": 0.0024, "step": 49110 }, { "epoch": 0.32315151674638, "grad_norm": 0.11088480610894182, "learning_rate": 9.884950588444758e-06, "loss": 0.0025, "step": 49120 }, { "epoch": 0.3232173049216134, "grad_norm": 0.04375773575295405, "learning_rate": 9.884828107228283e-06, "loss": 0.0022, "step": 49130 }, { "epoch": 0.3232830930968468, "grad_norm": 0.05086740019336339, "learning_rate": 9.884705561609567e-06, "loss": 0.0015, "step": 49140 }, { "epoch": 0.32334888127208017, "grad_norm": 0.1508753279802573, "learning_rate": 9.884582951590228e-06, "loss": 0.0018, "step": 49150 }, { "epoch": 0.32341466944731356, "grad_norm": 0.023771967080111558, "learning_rate": 9.884460277171881e-06, "loss": 0.0016, "step": 49160 }, { "epoch": 0.3234804576225469, "grad_norm": 0.09175641881179572, "learning_rate": 9.884337538356144e-06, "loss": 0.0035, "step": 49170 }, { "epoch": 0.3235462457977803, "grad_norm": 0.24003054659834128, "learning_rate": 9.884214735144634e-06, "loss": 0.0035, "step": 49180 }, { "epoch": 0.3236120339730137, "grad_norm": 0.044852596699746915, "learning_rate": 9.884091867538972e-06, "loss": 0.002, "step": 49190 }, { "epoch": 0.3236778221482471, "grad_norm": 0.03863870605867666, "learning_rate": 9.883968935540775e-06, "loss": 0.0024, "step": 49200 }, { "epoch": 0.3237436103234805, "grad_norm": 0.0727802347972928, "learning_rate": 9.883845939151668e-06, "loss": 0.0024, "step": 49210 }, { "epoch": 0.3238093984987138, "grad_norm": 0.24883211764167978, "learning_rate": 9.88372287837327e-06, "loss": 0.0019, "step": 49220 }, { "epoch": 0.3238751866739472, "grad_norm": 0.10300289168632114, "learning_rate": 9.883599753207203e-06, "loss": 0.0028, "step": 49230 }, { "epoch": 0.3239409748491806, "grad_norm": 0.0757338506575712, "learning_rate": 9.88347656365509e-06, "loss": 0.0027, "step": 49240 }, { "epoch": 0.324006763024414, "grad_norm": 0.012326375682427608, "learning_rate": 9.883353309718557e-06, "loss": 0.002, "step": 49250 }, { "epoch": 0.3240725511996474, "grad_norm": 0.05490071483845238, "learning_rate": 9.883229991399228e-06, "loss": 0.0018, "step": 49260 }, { "epoch": 0.32413833937488074, "grad_norm": 0.05323012587463112, "learning_rate": 9.88310660869873e-06, "loss": 0.0045, "step": 49270 }, { "epoch": 0.32420412755011413, "grad_norm": 0.12996514631748307, "learning_rate": 9.88298316161869e-06, "loss": 0.0026, "step": 49280 }, { "epoch": 0.3242699157253475, "grad_norm": 0.10124259382187488, "learning_rate": 9.88285965016073e-06, "loss": 0.0029, "step": 49290 }, { "epoch": 0.3243357039005809, "grad_norm": 0.05188803802538859, "learning_rate": 9.882736074326485e-06, "loss": 0.0035, "step": 49300 }, { "epoch": 0.3244014920758143, "grad_norm": 0.09743626464949061, "learning_rate": 9.882612434117583e-06, "loss": 0.0015, "step": 49310 }, { "epoch": 0.32446728025104765, "grad_norm": 0.31026680441383175, "learning_rate": 9.882488729535652e-06, "loss": 0.0029, "step": 49320 }, { "epoch": 0.32453306842628105, "grad_norm": 0.035210638066898316, "learning_rate": 9.882364960582322e-06, "loss": 0.0017, "step": 49330 }, { "epoch": 0.32459885660151444, "grad_norm": 0.004236868860288836, "learning_rate": 9.882241127259228e-06, "loss": 0.002, "step": 49340 }, { "epoch": 0.32466464477674783, "grad_norm": 0.14606638327246815, "learning_rate": 9.882117229568002e-06, "loss": 0.0027, "step": 49350 }, { "epoch": 0.32473043295198123, "grad_norm": 0.017896438075059744, "learning_rate": 9.881993267510276e-06, "loss": 0.0018, "step": 49360 }, { "epoch": 0.32479622112721457, "grad_norm": 0.20141765692411628, "learning_rate": 9.881869241087687e-06, "loss": 0.0019, "step": 49370 }, { "epoch": 0.32486200930244796, "grad_norm": 0.05910315861913697, "learning_rate": 9.881745150301867e-06, "loss": 0.0032, "step": 49380 }, { "epoch": 0.32492779747768136, "grad_norm": 0.16330474421445065, "learning_rate": 9.881620995154453e-06, "loss": 0.004, "step": 49390 }, { "epoch": 0.32499358565291475, "grad_norm": 0.1398138453652658, "learning_rate": 9.881496775647082e-06, "loss": 0.0027, "step": 49400 }, { "epoch": 0.32505937382814815, "grad_norm": 0.04764038055315209, "learning_rate": 9.881372491781393e-06, "loss": 0.0026, "step": 49410 }, { "epoch": 0.3251251620033815, "grad_norm": 0.0403098274324599, "learning_rate": 9.881248143559022e-06, "loss": 0.003, "step": 49420 }, { "epoch": 0.3251909501786149, "grad_norm": 0.058555437233321586, "learning_rate": 9.881123730981613e-06, "loss": 0.0035, "step": 49430 }, { "epoch": 0.3252567383538483, "grad_norm": 0.4944603612986769, "learning_rate": 9.8809992540508e-06, "loss": 0.0026, "step": 49440 }, { "epoch": 0.32532252652908167, "grad_norm": 0.1626072288519918, "learning_rate": 9.880874712768228e-06, "loss": 0.0054, "step": 49450 }, { "epoch": 0.32538831470431506, "grad_norm": 0.1339421881223442, "learning_rate": 9.88075010713554e-06, "loss": 0.0022, "step": 49460 }, { "epoch": 0.32545410287954846, "grad_norm": 0.0742284323482645, "learning_rate": 9.880625437154375e-06, "loss": 0.0031, "step": 49470 }, { "epoch": 0.3255198910547818, "grad_norm": 0.04682365918862436, "learning_rate": 9.880500702826379e-06, "loss": 0.0026, "step": 49480 }, { "epoch": 0.3255856792300152, "grad_norm": 0.04502866468649149, "learning_rate": 9.880375904153196e-06, "loss": 0.0023, "step": 49490 }, { "epoch": 0.3256514674052486, "grad_norm": 0.05087496889083154, "learning_rate": 9.880251041136474e-06, "loss": 0.0023, "step": 49500 }, { "epoch": 0.325717255580482, "grad_norm": 0.13657234479274805, "learning_rate": 9.880126113777855e-06, "loss": 0.0028, "step": 49510 }, { "epoch": 0.3257830437557154, "grad_norm": 0.0830925336292051, "learning_rate": 9.880001122078987e-06, "loss": 0.0023, "step": 49520 }, { "epoch": 0.3258488319309487, "grad_norm": 0.6347349091078316, "learning_rate": 9.879876066041524e-06, "loss": 0.002, "step": 49530 }, { "epoch": 0.3259146201061821, "grad_norm": 0.2300800875534505, "learning_rate": 9.879750945667103e-06, "loss": 0.0038, "step": 49540 }, { "epoch": 0.3259804082814155, "grad_norm": 0.08968840133504312, "learning_rate": 9.879625760957384e-06, "loss": 0.0017, "step": 49550 }, { "epoch": 0.3260461964566489, "grad_norm": 0.11270252081775858, "learning_rate": 9.879500511914014e-06, "loss": 0.0039, "step": 49560 }, { "epoch": 0.3261119846318823, "grad_norm": 0.1829692354028916, "learning_rate": 9.879375198538642e-06, "loss": 0.0035, "step": 49570 }, { "epoch": 0.32617777280711563, "grad_norm": 0.07798396742498395, "learning_rate": 9.879249820832923e-06, "loss": 0.0024, "step": 49580 }, { "epoch": 0.326243560982349, "grad_norm": 0.11941635458958012, "learning_rate": 9.87912437879851e-06, "loss": 0.0035, "step": 49590 }, { "epoch": 0.3263093491575824, "grad_norm": 0.060604243015570027, "learning_rate": 9.878998872437054e-06, "loss": 0.0022, "step": 49600 }, { "epoch": 0.3263751373328158, "grad_norm": 0.22780994776513888, "learning_rate": 9.878873301750213e-06, "loss": 0.0038, "step": 49610 }, { "epoch": 0.3264409255080492, "grad_norm": 0.5513579575969123, "learning_rate": 9.878747666739641e-06, "loss": 0.0016, "step": 49620 }, { "epoch": 0.32650671368328255, "grad_norm": 0.3781460855858608, "learning_rate": 9.878621967406995e-06, "loss": 0.0026, "step": 49630 }, { "epoch": 0.32657250185851594, "grad_norm": 0.014205190775234465, "learning_rate": 9.87849620375393e-06, "loss": 0.0057, "step": 49640 }, { "epoch": 0.32663829003374933, "grad_norm": 0.08640885307846231, "learning_rate": 9.878370375782108e-06, "loss": 0.0013, "step": 49650 }, { "epoch": 0.32670407820898273, "grad_norm": 0.014842871775462766, "learning_rate": 9.878244483493184e-06, "loss": 0.0026, "step": 49660 }, { "epoch": 0.3267698663842161, "grad_norm": 0.028234603831854006, "learning_rate": 9.87811852688882e-06, "loss": 0.0034, "step": 49670 }, { "epoch": 0.32683565455944946, "grad_norm": 0.29370251447154083, "learning_rate": 9.877992505970676e-06, "loss": 0.0023, "step": 49680 }, { "epoch": 0.32690144273468286, "grad_norm": 0.04923060761870478, "learning_rate": 9.877866420740416e-06, "loss": 0.0022, "step": 49690 }, { "epoch": 0.32696723090991625, "grad_norm": 0.038565037684287255, "learning_rate": 9.877740271199699e-06, "loss": 0.002, "step": 49700 }, { "epoch": 0.32703301908514965, "grad_norm": 0.04698668813463127, "learning_rate": 9.877614057350189e-06, "loss": 0.0033, "step": 49710 }, { "epoch": 0.32709880726038304, "grad_norm": 0.0750646445152711, "learning_rate": 9.87748777919355e-06, "loss": 0.0023, "step": 49720 }, { "epoch": 0.3271645954356164, "grad_norm": 0.04997391237419517, "learning_rate": 9.877361436731446e-06, "loss": 0.0024, "step": 49730 }, { "epoch": 0.3272303836108498, "grad_norm": 0.11453722211022638, "learning_rate": 9.877235029965544e-06, "loss": 0.0021, "step": 49740 }, { "epoch": 0.32729617178608317, "grad_norm": 0.09425166165920902, "learning_rate": 9.877108558897512e-06, "loss": 0.0028, "step": 49750 }, { "epoch": 0.32736195996131656, "grad_norm": 0.062400748317496334, "learning_rate": 9.876982023529016e-06, "loss": 0.0014, "step": 49760 }, { "epoch": 0.32742774813654996, "grad_norm": 0.03608913598660298, "learning_rate": 9.876855423861721e-06, "loss": 0.0023, "step": 49770 }, { "epoch": 0.3274935363117833, "grad_norm": 0.050424886866630984, "learning_rate": 9.876728759897302e-06, "loss": 0.0028, "step": 49780 }, { "epoch": 0.3275593244870167, "grad_norm": 0.010891762936319513, "learning_rate": 9.876602031637426e-06, "loss": 0.0032, "step": 49790 }, { "epoch": 0.3276251126622501, "grad_norm": 0.28897959855051664, "learning_rate": 9.876475239083763e-06, "loss": 0.0027, "step": 49800 }, { "epoch": 0.3276909008374835, "grad_norm": 0.0665003987330646, "learning_rate": 9.876348382237985e-06, "loss": 0.0041, "step": 49810 }, { "epoch": 0.3277566890127169, "grad_norm": 0.15797309448776695, "learning_rate": 9.876221461101768e-06, "loss": 0.0044, "step": 49820 }, { "epoch": 0.3278224771879502, "grad_norm": 0.09603713095859491, "learning_rate": 9.87609447567678e-06, "loss": 0.0014, "step": 49830 }, { "epoch": 0.3278882653631836, "grad_norm": 0.11870034159916643, "learning_rate": 9.875967425964698e-06, "loss": 0.0019, "step": 49840 }, { "epoch": 0.327954053538417, "grad_norm": 0.04916161484687681, "learning_rate": 9.875840311967197e-06, "loss": 0.004, "step": 49850 }, { "epoch": 0.3280198417136504, "grad_norm": 0.019260239162423132, "learning_rate": 9.875713133685951e-06, "loss": 0.0044, "step": 49860 }, { "epoch": 0.3280856298888838, "grad_norm": 0.07123806325221606, "learning_rate": 9.87558589112264e-06, "loss": 0.0034, "step": 49870 }, { "epoch": 0.32815141806411713, "grad_norm": 0.04585220769468555, "learning_rate": 9.87545858427894e-06, "loss": 0.0018, "step": 49880 }, { "epoch": 0.3282172062393505, "grad_norm": 0.11490945827850522, "learning_rate": 9.875331213156529e-06, "loss": 0.0016, "step": 49890 }, { "epoch": 0.3282829944145839, "grad_norm": 0.06711706635971265, "learning_rate": 9.875203777757085e-06, "loss": 0.002, "step": 49900 }, { "epoch": 0.3283487825898173, "grad_norm": 0.36497618334696175, "learning_rate": 9.875076278082291e-06, "loss": 0.0042, "step": 49910 }, { "epoch": 0.3284145707650507, "grad_norm": 0.03536853856083498, "learning_rate": 9.874948714133827e-06, "loss": 0.0026, "step": 49920 }, { "epoch": 0.3284803589402841, "grad_norm": 0.04354326464104106, "learning_rate": 9.874821085913373e-06, "loss": 0.002, "step": 49930 }, { "epoch": 0.32854614711551744, "grad_norm": 0.08444414796814714, "learning_rate": 9.874693393422614e-06, "loss": 0.0029, "step": 49940 }, { "epoch": 0.32861193529075083, "grad_norm": 0.30474018641930356, "learning_rate": 9.874565636663234e-06, "loss": 0.0018, "step": 49950 }, { "epoch": 0.32867772346598423, "grad_norm": 0.03987917640156798, "learning_rate": 9.874437815636915e-06, "loss": 0.0021, "step": 49960 }, { "epoch": 0.3287435116412176, "grad_norm": 0.07241627739841255, "learning_rate": 9.874309930345342e-06, "loss": 0.0027, "step": 49970 }, { "epoch": 0.328809299816451, "grad_norm": 0.09198583727360334, "learning_rate": 9.874181980790204e-06, "loss": 0.0027, "step": 49980 }, { "epoch": 0.32887508799168436, "grad_norm": 0.05620805509174686, "learning_rate": 9.874053966973184e-06, "loss": 0.0025, "step": 49990 }, { "epoch": 0.32894087616691775, "grad_norm": 0.030389854518645152, "learning_rate": 9.873925888895976e-06, "loss": 0.003, "step": 50000 }, { "epoch": 0.32894087616691775, "eval_loss": 0.0017323493957519531, "eval_runtime": 13.0969, "eval_samples_per_second": 15.271, "eval_steps_per_second": 7.635, "step": 50000 }, { "epoch": 0.32900666434215114, "grad_norm": 0.08454182534130184, "learning_rate": 9.873797746560261e-06, "loss": 0.0037, "step": 50010 }, { "epoch": 0.32907245251738454, "grad_norm": 0.06819460261610433, "learning_rate": 9.873669539967734e-06, "loss": 0.0029, "step": 50020 }, { "epoch": 0.32913824069261793, "grad_norm": 0.004303592941869637, "learning_rate": 9.873541269120082e-06, "loss": 0.0027, "step": 50030 }, { "epoch": 0.3292040288678513, "grad_norm": 0.10443671557699197, "learning_rate": 9.873412934018998e-06, "loss": 0.0016, "step": 50040 }, { "epoch": 0.32926981704308467, "grad_norm": 0.028252775775518435, "learning_rate": 9.873284534666174e-06, "loss": 0.0049, "step": 50050 }, { "epoch": 0.32933560521831806, "grad_norm": 0.12269400017241239, "learning_rate": 9.873156071063301e-06, "loss": 0.003, "step": 50060 }, { "epoch": 0.32940139339355146, "grad_norm": 0.037697576313566766, "learning_rate": 9.873027543212074e-06, "loss": 0.0038, "step": 50070 }, { "epoch": 0.32946718156878485, "grad_norm": 0.20417728533995938, "learning_rate": 9.872898951114189e-06, "loss": 0.0038, "step": 50080 }, { "epoch": 0.3295329697440182, "grad_norm": 0.06110623709816239, "learning_rate": 9.872770294771338e-06, "loss": 0.0017, "step": 50090 }, { "epoch": 0.3295987579192516, "grad_norm": 0.18687216659597106, "learning_rate": 9.87264157418522e-06, "loss": 0.0026, "step": 50100 }, { "epoch": 0.329664546094485, "grad_norm": 0.10576997192829296, "learning_rate": 9.87251278935753e-06, "loss": 0.0042, "step": 50110 }, { "epoch": 0.3297303342697184, "grad_norm": 0.07515144390656571, "learning_rate": 9.87238394028997e-06, "loss": 0.0022, "step": 50120 }, { "epoch": 0.32979612244495177, "grad_norm": 0.1266238407343625, "learning_rate": 9.872255026984232e-06, "loss": 0.0025, "step": 50130 }, { "epoch": 0.3298619106201851, "grad_norm": 0.12165412181680679, "learning_rate": 9.87212604944202e-06, "loss": 0.002, "step": 50140 }, { "epoch": 0.3299276987954185, "grad_norm": 0.16904223460505555, "learning_rate": 9.871997007665038e-06, "loss": 0.0021, "step": 50150 }, { "epoch": 0.3299934869706519, "grad_norm": 0.09463176411705466, "learning_rate": 9.871867901654979e-06, "loss": 0.0027, "step": 50160 }, { "epoch": 0.3300592751458853, "grad_norm": 0.08479174072489218, "learning_rate": 9.871738731413551e-06, "loss": 0.0024, "step": 50170 }, { "epoch": 0.3301250633211187, "grad_norm": 0.10886703343844259, "learning_rate": 9.871609496942455e-06, "loss": 0.0026, "step": 50180 }, { "epoch": 0.330190851496352, "grad_norm": 0.006035898788253833, "learning_rate": 9.871480198243395e-06, "loss": 0.0031, "step": 50190 }, { "epoch": 0.3302566396715854, "grad_norm": 0.06295695377174074, "learning_rate": 9.871350835318077e-06, "loss": 0.0028, "step": 50200 }, { "epoch": 0.3303224278468188, "grad_norm": 0.014391969701646171, "learning_rate": 9.871221408168203e-06, "loss": 0.0028, "step": 50210 }, { "epoch": 0.3303882160220522, "grad_norm": 0.047020259650434076, "learning_rate": 9.871091916795484e-06, "loss": 0.0029, "step": 50220 }, { "epoch": 0.3304540041972856, "grad_norm": 0.050838241589277876, "learning_rate": 9.870962361201624e-06, "loss": 0.001, "step": 50230 }, { "epoch": 0.33051979237251894, "grad_norm": 0.07257644095507679, "learning_rate": 9.870832741388331e-06, "loss": 0.0023, "step": 50240 }, { "epoch": 0.33058558054775233, "grad_norm": 0.08394359141339526, "learning_rate": 9.870703057357319e-06, "loss": 0.0025, "step": 50250 }, { "epoch": 0.33065136872298573, "grad_norm": 0.028391060701980865, "learning_rate": 9.87057330911029e-06, "loss": 0.002, "step": 50260 }, { "epoch": 0.3307171568982191, "grad_norm": 0.24099268424504, "learning_rate": 9.87044349664896e-06, "loss": 0.0023, "step": 50270 }, { "epoch": 0.3307829450734525, "grad_norm": 0.03273871375036158, "learning_rate": 9.870313619975039e-06, "loss": 0.0014, "step": 50280 }, { "epoch": 0.33084873324868586, "grad_norm": 0.061105665586091526, "learning_rate": 9.870183679090238e-06, "loss": 0.0022, "step": 50290 }, { "epoch": 0.33091452142391925, "grad_norm": 0.053744988356622045, "learning_rate": 9.870053673996271e-06, "loss": 0.0022, "step": 50300 }, { "epoch": 0.33098030959915264, "grad_norm": 0.09360170396224837, "learning_rate": 9.869923604694856e-06, "loss": 0.0021, "step": 50310 }, { "epoch": 0.33104609777438604, "grad_norm": 0.31583503980701433, "learning_rate": 9.8697934711877e-06, "loss": 0.003, "step": 50320 }, { "epoch": 0.33111188594961943, "grad_norm": 0.04939690333655309, "learning_rate": 9.869663273476524e-06, "loss": 0.0029, "step": 50330 }, { "epoch": 0.33117767412485277, "grad_norm": 0.10211250156186902, "learning_rate": 9.869533011563045e-06, "loss": 0.0026, "step": 50340 }, { "epoch": 0.33124346230008617, "grad_norm": 0.09452577551982008, "learning_rate": 9.869402685448978e-06, "loss": 0.0034, "step": 50350 }, { "epoch": 0.33130925047531956, "grad_norm": 0.0665943204778061, "learning_rate": 9.869272295136042e-06, "loss": 0.0039, "step": 50360 }, { "epoch": 0.33137503865055296, "grad_norm": 0.18393810265655752, "learning_rate": 9.869141840625955e-06, "loss": 0.0021, "step": 50370 }, { "epoch": 0.33144082682578635, "grad_norm": 0.13070584350763137, "learning_rate": 9.869011321920442e-06, "loss": 0.0034, "step": 50380 }, { "epoch": 0.33150661500101974, "grad_norm": 0.03880850843818679, "learning_rate": 9.868880739021215e-06, "loss": 0.0019, "step": 50390 }, { "epoch": 0.3315724031762531, "grad_norm": 0.46407888025334704, "learning_rate": 9.868750091930003e-06, "loss": 0.0029, "step": 50400 }, { "epoch": 0.3316381913514865, "grad_norm": 0.09500610108041703, "learning_rate": 9.868619380648525e-06, "loss": 0.0024, "step": 50410 }, { "epoch": 0.33170397952671987, "grad_norm": 0.06773656395217946, "learning_rate": 9.868488605178506e-06, "loss": 0.0014, "step": 50420 }, { "epoch": 0.33176976770195327, "grad_norm": 0.047558879115692024, "learning_rate": 9.86835776552167e-06, "loss": 0.0064, "step": 50430 }, { "epoch": 0.33183555587718666, "grad_norm": 0.06324544118179098, "learning_rate": 9.868226861679739e-06, "loss": 0.0036, "step": 50440 }, { "epoch": 0.33190134405242, "grad_norm": 0.06375134690640938, "learning_rate": 9.868095893654443e-06, "loss": 0.0027, "step": 50450 }, { "epoch": 0.3319671322276534, "grad_norm": 0.05709161177790047, "learning_rate": 9.867964861447508e-06, "loss": 0.0024, "step": 50460 }, { "epoch": 0.3320329204028868, "grad_norm": 0.05422628869129621, "learning_rate": 9.867833765060659e-06, "loss": 0.0028, "step": 50470 }, { "epoch": 0.3320987085781202, "grad_norm": 0.08955590710704169, "learning_rate": 9.867702604495628e-06, "loss": 0.0037, "step": 50480 }, { "epoch": 0.3321644967533536, "grad_norm": 0.09874930859116575, "learning_rate": 9.86757137975414e-06, "loss": 0.0018, "step": 50490 }, { "epoch": 0.3322302849285869, "grad_norm": 0.037269489763259854, "learning_rate": 9.867440090837928e-06, "loss": 0.0031, "step": 50500 }, { "epoch": 0.3322960731038203, "grad_norm": 0.04366543051510979, "learning_rate": 9.867308737748722e-06, "loss": 0.0029, "step": 50510 }, { "epoch": 0.3323618612790537, "grad_norm": 0.044682789445393785, "learning_rate": 9.867177320488254e-06, "loss": 0.0032, "step": 50520 }, { "epoch": 0.3324276494542871, "grad_norm": 0.09971206721472507, "learning_rate": 9.867045839058258e-06, "loss": 0.0026, "step": 50530 }, { "epoch": 0.3324934376295205, "grad_norm": 0.054305621433573675, "learning_rate": 9.866914293460464e-06, "loss": 0.0018, "step": 50540 }, { "epoch": 0.33255922580475383, "grad_norm": 0.06496978693675776, "learning_rate": 9.86678268369661e-06, "loss": 0.0021, "step": 50550 }, { "epoch": 0.3326250139799872, "grad_norm": 0.05468197875265843, "learning_rate": 9.866651009768429e-06, "loss": 0.0031, "step": 50560 }, { "epoch": 0.3326908021552206, "grad_norm": 0.16899826174778157, "learning_rate": 9.866519271677657e-06, "loss": 0.0031, "step": 50570 }, { "epoch": 0.332756590330454, "grad_norm": 0.14807548179906624, "learning_rate": 9.86638746942603e-06, "loss": 0.0042, "step": 50580 }, { "epoch": 0.3328223785056874, "grad_norm": 0.15694521949283094, "learning_rate": 9.86625560301529e-06, "loss": 0.003, "step": 50590 }, { "epoch": 0.33288816668092075, "grad_norm": 0.1555989441328529, "learning_rate": 9.866123672447171e-06, "loss": 0.0052, "step": 50600 }, { "epoch": 0.33295395485615414, "grad_norm": 0.07953864659740814, "learning_rate": 9.865991677723415e-06, "loss": 0.0018, "step": 50610 }, { "epoch": 0.33301974303138754, "grad_norm": 0.05643752301995013, "learning_rate": 9.86585961884576e-06, "loss": 0.0019, "step": 50620 }, { "epoch": 0.33308553120662093, "grad_norm": 0.021041791995647926, "learning_rate": 9.86572749581595e-06, "loss": 0.0024, "step": 50630 }, { "epoch": 0.3331513193818543, "grad_norm": 0.07777457748936191, "learning_rate": 9.865595308635725e-06, "loss": 0.003, "step": 50640 }, { "epoch": 0.33321710755708767, "grad_norm": 0.04535145386050218, "learning_rate": 9.865463057306827e-06, "loss": 0.0022, "step": 50650 }, { "epoch": 0.33328289573232106, "grad_norm": 0.03459315304210319, "learning_rate": 9.865330741831e-06, "loss": 0.0022, "step": 50660 }, { "epoch": 0.33334868390755445, "grad_norm": 0.1582980325969045, "learning_rate": 9.865198362209992e-06, "loss": 0.0038, "step": 50670 }, { "epoch": 0.33341447208278785, "grad_norm": 0.06980028794319641, "learning_rate": 9.865065918445542e-06, "loss": 0.0022, "step": 50680 }, { "epoch": 0.33348026025802124, "grad_norm": 0.24758643258041865, "learning_rate": 9.864933410539402e-06, "loss": 0.0039, "step": 50690 }, { "epoch": 0.3335460484332546, "grad_norm": 0.1341393502559332, "learning_rate": 9.864800838493316e-06, "loss": 0.0037, "step": 50700 }, { "epoch": 0.333611836608488, "grad_norm": 0.1192170832402524, "learning_rate": 9.864668202309033e-06, "loss": 0.0019, "step": 50710 }, { "epoch": 0.33367762478372137, "grad_norm": 0.03499066953739295, "learning_rate": 9.8645355019883e-06, "loss": 0.0036, "step": 50720 }, { "epoch": 0.33374341295895477, "grad_norm": 0.11841982221108258, "learning_rate": 9.864402737532868e-06, "loss": 0.0104, "step": 50730 }, { "epoch": 0.33380920113418816, "grad_norm": 0.09080870265263484, "learning_rate": 9.864269908944488e-06, "loss": 0.0046, "step": 50740 }, { "epoch": 0.3338749893094215, "grad_norm": 0.04540871634522118, "learning_rate": 9.86413701622491e-06, "loss": 0.0039, "step": 50750 }, { "epoch": 0.3339407774846549, "grad_norm": 0.03947461402458857, "learning_rate": 9.864004059375886e-06, "loss": 0.009, "step": 50760 }, { "epoch": 0.3340065656598883, "grad_norm": 0.09687834798976698, "learning_rate": 9.86387103839917e-06, "loss": 0.0026, "step": 50770 }, { "epoch": 0.3340723538351217, "grad_norm": 0.11676230363889475, "learning_rate": 9.863737953296513e-06, "loss": 0.0014, "step": 50780 }, { "epoch": 0.3341381420103551, "grad_norm": 0.18360530873761247, "learning_rate": 9.863604804069673e-06, "loss": 0.0021, "step": 50790 }, { "epoch": 0.3342039301855884, "grad_norm": 0.06050295182307123, "learning_rate": 9.863471590720405e-06, "loss": 0.0028, "step": 50800 }, { "epoch": 0.3342697183608218, "grad_norm": 0.042504041496851844, "learning_rate": 9.863338313250463e-06, "loss": 0.0039, "step": 50810 }, { "epoch": 0.3343355065360552, "grad_norm": 0.02968650101692584, "learning_rate": 9.863204971661606e-06, "loss": 0.0022, "step": 50820 }, { "epoch": 0.3344012947112886, "grad_norm": 0.16998037389260856, "learning_rate": 9.863071565955593e-06, "loss": 0.0037, "step": 50830 }, { "epoch": 0.334467082886522, "grad_norm": 0.18996635893855318, "learning_rate": 9.86293809613418e-06, "loss": 0.0046, "step": 50840 }, { "epoch": 0.33453287106175533, "grad_norm": 0.012204764699303587, "learning_rate": 9.862804562199129e-06, "loss": 0.0017, "step": 50850 }, { "epoch": 0.3345986592369887, "grad_norm": 0.050196333243772456, "learning_rate": 9.862670964152199e-06, "loss": 0.0015, "step": 50860 }, { "epoch": 0.3346644474122221, "grad_norm": 0.07817279193875455, "learning_rate": 9.862537301995152e-06, "loss": 0.002, "step": 50870 }, { "epoch": 0.3347302355874555, "grad_norm": 0.17411791739861338, "learning_rate": 9.86240357572975e-06, "loss": 0.0027, "step": 50880 }, { "epoch": 0.3347960237626889, "grad_norm": 0.06034479718965879, "learning_rate": 9.862269785357757e-06, "loss": 0.003, "step": 50890 }, { "epoch": 0.3348618119379223, "grad_norm": 0.09083438733684632, "learning_rate": 9.862135930880936e-06, "loss": 0.0023, "step": 50900 }, { "epoch": 0.33492760011315564, "grad_norm": 0.21895909595650423, "learning_rate": 9.862002012301051e-06, "loss": 0.003, "step": 50910 }, { "epoch": 0.33499338828838904, "grad_norm": 0.06263344211585542, "learning_rate": 9.86186802961987e-06, "loss": 0.0016, "step": 50920 }, { "epoch": 0.33505917646362243, "grad_norm": 0.10748607109827123, "learning_rate": 9.861733982839157e-06, "loss": 0.0029, "step": 50930 }, { "epoch": 0.3351249646388558, "grad_norm": 0.12392758346471175, "learning_rate": 9.861599871960681e-06, "loss": 0.0022, "step": 50940 }, { "epoch": 0.3351907528140892, "grad_norm": 0.21204261450123907, "learning_rate": 9.861465696986209e-06, "loss": 0.0035, "step": 50950 }, { "epoch": 0.33525654098932256, "grad_norm": 0.08470641982103921, "learning_rate": 9.86133145791751e-06, "loss": 0.0029, "step": 50960 }, { "epoch": 0.33532232916455595, "grad_norm": 0.07226099251258626, "learning_rate": 9.861197154756355e-06, "loss": 0.0039, "step": 50970 }, { "epoch": 0.33538811733978935, "grad_norm": 0.22395655423858676, "learning_rate": 9.861062787504514e-06, "loss": 0.0024, "step": 50980 }, { "epoch": 0.33545390551502274, "grad_norm": 0.05195338612416526, "learning_rate": 9.860928356163758e-06, "loss": 0.0019, "step": 50990 }, { "epoch": 0.33551969369025614, "grad_norm": 0.07268436280860212, "learning_rate": 9.86079386073586e-06, "loss": 0.0022, "step": 51000 }, { "epoch": 0.3355854818654895, "grad_norm": 0.20324009613439806, "learning_rate": 9.860659301222593e-06, "loss": 0.0038, "step": 51010 }, { "epoch": 0.33565127004072287, "grad_norm": 0.09770328867334022, "learning_rate": 9.860524677625732e-06, "loss": 0.0029, "step": 51020 }, { "epoch": 0.33571705821595627, "grad_norm": 0.06502991486931908, "learning_rate": 9.86038998994705e-06, "loss": 0.0029, "step": 51030 }, { "epoch": 0.33578284639118966, "grad_norm": 0.010184574094948745, "learning_rate": 9.860255238188322e-06, "loss": 0.0018, "step": 51040 }, { "epoch": 0.33584863456642305, "grad_norm": 0.18099406287021075, "learning_rate": 9.86012042235133e-06, "loss": 0.0017, "step": 51050 }, { "epoch": 0.3359144227416564, "grad_norm": 0.10675693026744003, "learning_rate": 9.859985542437844e-06, "loss": 0.0028, "step": 51060 }, { "epoch": 0.3359802109168898, "grad_norm": 0.049467810994873115, "learning_rate": 9.859850598449647e-06, "loss": 0.0037, "step": 51070 }, { "epoch": 0.3360459990921232, "grad_norm": 0.03512760424513996, "learning_rate": 9.85971559038852e-06, "loss": 0.0014, "step": 51080 }, { "epoch": 0.3361117872673566, "grad_norm": 0.04344984770096312, "learning_rate": 9.859580518256236e-06, "loss": 0.0024, "step": 51090 }, { "epoch": 0.33617757544258997, "grad_norm": 0.19620985531517132, "learning_rate": 9.859445382054582e-06, "loss": 0.0018, "step": 51100 }, { "epoch": 0.3362433636178233, "grad_norm": 0.08511912314537097, "learning_rate": 9.859310181785337e-06, "loss": 0.0035, "step": 51110 }, { "epoch": 0.3363091517930567, "grad_norm": 0.03872057468668168, "learning_rate": 9.859174917450285e-06, "loss": 0.003, "step": 51120 }, { "epoch": 0.3363749399682901, "grad_norm": 0.10204889441732828, "learning_rate": 9.859039589051208e-06, "loss": 0.0018, "step": 51130 }, { "epoch": 0.3364407281435235, "grad_norm": 0.06316577178490228, "learning_rate": 9.858904196589888e-06, "loss": 0.0059, "step": 51140 }, { "epoch": 0.3365065163187569, "grad_norm": 0.048920900895693044, "learning_rate": 9.858768740068116e-06, "loss": 0.0026, "step": 51150 }, { "epoch": 0.3365723044939902, "grad_norm": 0.09682003909075582, "learning_rate": 9.858633219487673e-06, "loss": 0.0027, "step": 51160 }, { "epoch": 0.3366380926692236, "grad_norm": 0.07660922670074226, "learning_rate": 9.858497634850348e-06, "loss": 0.0018, "step": 51170 }, { "epoch": 0.336703880844457, "grad_norm": 0.03763205796604782, "learning_rate": 9.858361986157928e-06, "loss": 0.0028, "step": 51180 }, { "epoch": 0.3367696690196904, "grad_norm": 0.07928766319309156, "learning_rate": 9.858226273412199e-06, "loss": 0.0025, "step": 51190 }, { "epoch": 0.3368354571949238, "grad_norm": 0.1563447335041536, "learning_rate": 9.858090496614953e-06, "loss": 0.0016, "step": 51200 }, { "epoch": 0.33690124537015714, "grad_norm": 0.007591294912370506, "learning_rate": 9.85795465576798e-06, "loss": 0.0023, "step": 51210 }, { "epoch": 0.33696703354539054, "grad_norm": 0.007450246427115076, "learning_rate": 9.857818750873071e-06, "loss": 0.0014, "step": 51220 }, { "epoch": 0.33703282172062393, "grad_norm": 0.03777751734449994, "learning_rate": 9.857682781932017e-06, "loss": 0.0033, "step": 51230 }, { "epoch": 0.3370986098958573, "grad_norm": 0.0431486189984561, "learning_rate": 9.85754674894661e-06, "loss": 0.0022, "step": 51240 }, { "epoch": 0.3371643980710907, "grad_norm": 0.2508541798674848, "learning_rate": 9.857410651918646e-06, "loss": 0.0014, "step": 51250 }, { "epoch": 0.33723018624632406, "grad_norm": 0.04338314343829664, "learning_rate": 9.857274490849918e-06, "loss": 0.002, "step": 51260 }, { "epoch": 0.33729597442155745, "grad_norm": 0.07220913421911006, "learning_rate": 9.857138265742218e-06, "loss": 0.0041, "step": 51270 }, { "epoch": 0.33736176259679085, "grad_norm": 0.09706214920617684, "learning_rate": 9.857001976597348e-06, "loss": 0.0028, "step": 51280 }, { "epoch": 0.33742755077202424, "grad_norm": 0.17177185698928896, "learning_rate": 9.8568656234171e-06, "loss": 0.0013, "step": 51290 }, { "epoch": 0.33749333894725764, "grad_norm": 0.12373664899837933, "learning_rate": 9.856729206203275e-06, "loss": 0.0037, "step": 51300 }, { "epoch": 0.337559127122491, "grad_norm": 0.29138942700440096, "learning_rate": 9.856592724957667e-06, "loss": 0.0027, "step": 51310 }, { "epoch": 0.33762491529772437, "grad_norm": 0.009574550197072111, "learning_rate": 9.856456179682081e-06, "loss": 0.003, "step": 51320 }, { "epoch": 0.33769070347295777, "grad_norm": 0.02154599766037637, "learning_rate": 9.856319570378315e-06, "loss": 0.0019, "step": 51330 }, { "epoch": 0.33775649164819116, "grad_norm": 0.012871864113763197, "learning_rate": 9.856182897048167e-06, "loss": 0.0016, "step": 51340 }, { "epoch": 0.33782227982342455, "grad_norm": 0.018369856879012457, "learning_rate": 9.856046159693443e-06, "loss": 0.0024, "step": 51350 }, { "epoch": 0.33788806799865795, "grad_norm": 0.22195508705235464, "learning_rate": 9.855909358315944e-06, "loss": 0.0031, "step": 51360 }, { "epoch": 0.3379538561738913, "grad_norm": 0.11617719542487759, "learning_rate": 9.855772492917474e-06, "loss": 0.0029, "step": 51370 }, { "epoch": 0.3380196443491247, "grad_norm": 0.0732453378580902, "learning_rate": 9.855635563499837e-06, "loss": 0.0032, "step": 51380 }, { "epoch": 0.3380854325243581, "grad_norm": 0.11315501730291073, "learning_rate": 9.85549857006484e-06, "loss": 0.0023, "step": 51390 }, { "epoch": 0.33815122069959147, "grad_norm": 0.20802204975530642, "learning_rate": 9.855361512614288e-06, "loss": 0.0027, "step": 51400 }, { "epoch": 0.33821700887482486, "grad_norm": 0.035553359924481075, "learning_rate": 9.855224391149987e-06, "loss": 0.0009, "step": 51410 }, { "epoch": 0.3382827970500582, "grad_norm": 0.08035743339313793, "learning_rate": 9.855087205673746e-06, "loss": 0.0019, "step": 51420 }, { "epoch": 0.3383485852252916, "grad_norm": 0.02214350667433623, "learning_rate": 9.854949956187371e-06, "loss": 0.0019, "step": 51430 }, { "epoch": 0.338414373400525, "grad_norm": 0.06490172800892734, "learning_rate": 9.854812642692676e-06, "loss": 0.003, "step": 51440 }, { "epoch": 0.3384801615757584, "grad_norm": 0.15147315534848013, "learning_rate": 9.85467526519147e-06, "loss": 0.0027, "step": 51450 }, { "epoch": 0.3385459497509918, "grad_norm": 0.16645732595664525, "learning_rate": 9.854537823685561e-06, "loss": 0.002, "step": 51460 }, { "epoch": 0.3386117379262251, "grad_norm": 0.1119412300969635, "learning_rate": 9.854400318176766e-06, "loss": 0.0022, "step": 51470 }, { "epoch": 0.3386775261014585, "grad_norm": 0.13056498280911266, "learning_rate": 9.854262748666893e-06, "loss": 0.0031, "step": 51480 }, { "epoch": 0.3387433142766919, "grad_norm": 0.09333987347787064, "learning_rate": 9.85412511515776e-06, "loss": 0.0031, "step": 51490 }, { "epoch": 0.3388091024519253, "grad_norm": 0.09443184980868513, "learning_rate": 9.853987417651179e-06, "loss": 0.0029, "step": 51500 }, { "epoch": 0.3388748906271587, "grad_norm": 0.1690291669025534, "learning_rate": 9.853849656148966e-06, "loss": 0.0033, "step": 51510 }, { "epoch": 0.33894067880239204, "grad_norm": 0.1335293363308689, "learning_rate": 9.853711830652936e-06, "loss": 0.0025, "step": 51520 }, { "epoch": 0.33900646697762543, "grad_norm": 0.040563298824157885, "learning_rate": 9.85357394116491e-06, "loss": 0.0029, "step": 51530 }, { "epoch": 0.3390722551528588, "grad_norm": 0.07640475337446002, "learning_rate": 9.853435987686702e-06, "loss": 0.0026, "step": 51540 }, { "epoch": 0.3391380433280922, "grad_norm": 0.12444593113902686, "learning_rate": 9.853297970220133e-06, "loss": 0.0023, "step": 51550 }, { "epoch": 0.3392038315033256, "grad_norm": 0.02094760388260266, "learning_rate": 9.853159888767022e-06, "loss": 0.0019, "step": 51560 }, { "epoch": 0.33926961967855895, "grad_norm": 0.021109358783894076, "learning_rate": 9.853021743329187e-06, "loss": 0.0019, "step": 51570 }, { "epoch": 0.33933540785379235, "grad_norm": 0.08811266958332732, "learning_rate": 9.852883533908453e-06, "loss": 0.0027, "step": 51580 }, { "epoch": 0.33940119602902574, "grad_norm": 0.04457604702244054, "learning_rate": 9.852745260506643e-06, "loss": 0.0034, "step": 51590 }, { "epoch": 0.33946698420425914, "grad_norm": 0.25575781827384014, "learning_rate": 9.852606923125574e-06, "loss": 0.0043, "step": 51600 }, { "epoch": 0.33953277237949253, "grad_norm": 0.05371695876757008, "learning_rate": 9.852468521767076e-06, "loss": 0.0027, "step": 51610 }, { "epoch": 0.33959856055472587, "grad_norm": 0.038946334808403184, "learning_rate": 9.85233005643297e-06, "loss": 0.0017, "step": 51620 }, { "epoch": 0.33966434872995926, "grad_norm": 0.10589794630432947, "learning_rate": 9.852191527125085e-06, "loss": 0.0044, "step": 51630 }, { "epoch": 0.33973013690519266, "grad_norm": 0.03451707080430067, "learning_rate": 9.852052933845244e-06, "loss": 0.0027, "step": 51640 }, { "epoch": 0.33979592508042605, "grad_norm": 0.04236418440590052, "learning_rate": 9.851914276595276e-06, "loss": 0.0019, "step": 51650 }, { "epoch": 0.33986171325565945, "grad_norm": 0.15163158907304577, "learning_rate": 9.851775555377009e-06, "loss": 0.0028, "step": 51660 }, { "epoch": 0.3399275014308928, "grad_norm": 0.05926474910764259, "learning_rate": 9.85163677019227e-06, "loss": 0.0032, "step": 51670 }, { "epoch": 0.3399932896061262, "grad_norm": 0.09095554512138032, "learning_rate": 9.851497921042892e-06, "loss": 0.0028, "step": 51680 }, { "epoch": 0.3400590777813596, "grad_norm": 0.024512964762389337, "learning_rate": 9.851359007930703e-06, "loss": 0.0015, "step": 51690 }, { "epoch": 0.34012486595659297, "grad_norm": 0.01696990846368285, "learning_rate": 9.851220030857535e-06, "loss": 0.0012, "step": 51700 }, { "epoch": 0.34019065413182636, "grad_norm": 0.0886476889781678, "learning_rate": 9.851080989825223e-06, "loss": 0.003, "step": 51710 }, { "epoch": 0.3402564423070597, "grad_norm": 0.06337057555077848, "learning_rate": 9.850941884835597e-06, "loss": 0.003, "step": 51720 }, { "epoch": 0.3403222304822931, "grad_norm": 0.15717495667856027, "learning_rate": 9.850802715890491e-06, "loss": 0.0026, "step": 51730 }, { "epoch": 0.3403880186575265, "grad_norm": 0.07038440072630611, "learning_rate": 9.85066348299174e-06, "loss": 0.0021, "step": 51740 }, { "epoch": 0.3404538068327599, "grad_norm": 0.05568412153374424, "learning_rate": 9.850524186141181e-06, "loss": 0.0018, "step": 51750 }, { "epoch": 0.3405195950079933, "grad_norm": 0.13978625067885334, "learning_rate": 9.850384825340648e-06, "loss": 0.0018, "step": 51760 }, { "epoch": 0.3405853831832266, "grad_norm": 0.06875591935512242, "learning_rate": 9.850245400591982e-06, "loss": 0.0036, "step": 51770 }, { "epoch": 0.34065117135846, "grad_norm": 0.05397748434254311, "learning_rate": 9.85010591189702e-06, "loss": 0.0024, "step": 51780 }, { "epoch": 0.3407169595336934, "grad_norm": 0.19415562974889206, "learning_rate": 9.8499663592576e-06, "loss": 0.0033, "step": 51790 }, { "epoch": 0.3407827477089268, "grad_norm": 0.03869768537033699, "learning_rate": 9.84982674267556e-06, "loss": 0.0027, "step": 51800 }, { "epoch": 0.3408485358841602, "grad_norm": 0.30185396860865604, "learning_rate": 9.849687062152746e-06, "loss": 0.0025, "step": 51810 }, { "epoch": 0.3409143240593936, "grad_norm": 0.07113044034938393, "learning_rate": 9.849547317690996e-06, "loss": 0.0019, "step": 51820 }, { "epoch": 0.34098011223462693, "grad_norm": 0.14838888908921846, "learning_rate": 9.849407509292153e-06, "loss": 0.0026, "step": 51830 }, { "epoch": 0.3410459004098603, "grad_norm": 0.03379906392688423, "learning_rate": 9.849267636958058e-06, "loss": 0.003, "step": 51840 }, { "epoch": 0.3411116885850937, "grad_norm": 0.04074795472490884, "learning_rate": 9.84912770069056e-06, "loss": 0.0025, "step": 51850 }, { "epoch": 0.3411774767603271, "grad_norm": 0.14141354374909654, "learning_rate": 9.848987700491498e-06, "loss": 0.0021, "step": 51860 }, { "epoch": 0.3412432649355605, "grad_norm": 0.2710849860641066, "learning_rate": 9.848847636362726e-06, "loss": 0.0038, "step": 51870 }, { "epoch": 0.34130905311079385, "grad_norm": 0.05176666080207937, "learning_rate": 9.848707508306081e-06, "loss": 0.0023, "step": 51880 }, { "epoch": 0.34137484128602724, "grad_norm": 0.11827628975292673, "learning_rate": 9.848567316323418e-06, "loss": 0.0033, "step": 51890 }, { "epoch": 0.34144062946126064, "grad_norm": 0.07683824981343504, "learning_rate": 9.848427060416581e-06, "loss": 0.002, "step": 51900 }, { "epoch": 0.34150641763649403, "grad_norm": 0.07799161069739416, "learning_rate": 9.84828674058742e-06, "loss": 0.0028, "step": 51910 }, { "epoch": 0.3415722058117274, "grad_norm": 0.2471811508042172, "learning_rate": 9.848146356837787e-06, "loss": 0.0026, "step": 51920 }, { "epoch": 0.34163799398696076, "grad_norm": 0.2413770203112555, "learning_rate": 9.84800590916953e-06, "loss": 0.0029, "step": 51930 }, { "epoch": 0.34170378216219416, "grad_norm": 0.10457425468410801, "learning_rate": 9.847865397584504e-06, "loss": 0.002, "step": 51940 }, { "epoch": 0.34176957033742755, "grad_norm": 0.17604850589667514, "learning_rate": 9.847724822084559e-06, "loss": 0.0027, "step": 51950 }, { "epoch": 0.34183535851266095, "grad_norm": 0.03410704710429869, "learning_rate": 9.847584182671547e-06, "loss": 0.0046, "step": 51960 }, { "epoch": 0.34190114668789434, "grad_norm": 0.04821469582138628, "learning_rate": 9.847443479347327e-06, "loss": 0.0018, "step": 51970 }, { "epoch": 0.3419669348631277, "grad_norm": 0.02953358090197298, "learning_rate": 9.847302712113752e-06, "loss": 0.0015, "step": 51980 }, { "epoch": 0.3420327230383611, "grad_norm": 0.16491315964849565, "learning_rate": 9.847161880972673e-06, "loss": 0.0022, "step": 51990 }, { "epoch": 0.34209851121359447, "grad_norm": 0.0604866905749365, "learning_rate": 9.847020985925953e-06, "loss": 0.0022, "step": 52000 }, { "epoch": 0.34216429938882786, "grad_norm": 0.07051141214702387, "learning_rate": 9.846880026975449e-06, "loss": 0.0018, "step": 52010 }, { "epoch": 0.34223008756406126, "grad_norm": 0.07509001425774163, "learning_rate": 9.846739004123016e-06, "loss": 0.0034, "step": 52020 }, { "epoch": 0.3422958757392946, "grad_norm": 0.06159816379283048, "learning_rate": 9.846597917370516e-06, "loss": 0.0026, "step": 52030 }, { "epoch": 0.342361663914528, "grad_norm": 0.12165830972893733, "learning_rate": 9.846456766719807e-06, "loss": 0.0014, "step": 52040 }, { "epoch": 0.3424274520897614, "grad_norm": 0.16390957263765252, "learning_rate": 9.846315552172752e-06, "loss": 0.0034, "step": 52050 }, { "epoch": 0.3424932402649948, "grad_norm": 0.10003411524925679, "learning_rate": 9.846174273731212e-06, "loss": 0.002, "step": 52060 }, { "epoch": 0.3425590284402282, "grad_norm": 0.05219707539416686, "learning_rate": 9.846032931397049e-06, "loss": 0.0034, "step": 52070 }, { "epoch": 0.3426248166154615, "grad_norm": 0.04859506170384905, "learning_rate": 9.845891525172129e-06, "loss": 0.0036, "step": 52080 }, { "epoch": 0.3426906047906949, "grad_norm": 0.1675900998750281, "learning_rate": 9.845750055058314e-06, "loss": 0.0034, "step": 52090 }, { "epoch": 0.3427563929659283, "grad_norm": 0.02463020531745815, "learning_rate": 9.845608521057469e-06, "loss": 0.0022, "step": 52100 }, { "epoch": 0.3428221811411617, "grad_norm": 0.050916544045636516, "learning_rate": 9.84546692317146e-06, "loss": 0.0028, "step": 52110 }, { "epoch": 0.3428879693163951, "grad_norm": 0.13427411961792377, "learning_rate": 9.845325261402154e-06, "loss": 0.003, "step": 52120 }, { "epoch": 0.34295375749162843, "grad_norm": 0.07591528318734585, "learning_rate": 9.845183535751421e-06, "loss": 0.0014, "step": 52130 }, { "epoch": 0.3430195456668618, "grad_norm": 0.022820560865441583, "learning_rate": 9.845041746221126e-06, "loss": 0.0026, "step": 52140 }, { "epoch": 0.3430853338420952, "grad_norm": 0.4719066126093035, "learning_rate": 9.84489989281314e-06, "loss": 0.0048, "step": 52150 }, { "epoch": 0.3431511220173286, "grad_norm": 0.05338328185831782, "learning_rate": 9.844757975529334e-06, "loss": 0.0012, "step": 52160 }, { "epoch": 0.343216910192562, "grad_norm": 0.2492120986565244, "learning_rate": 9.844615994371577e-06, "loss": 0.0021, "step": 52170 }, { "epoch": 0.34328269836779535, "grad_norm": 0.07074681204052431, "learning_rate": 9.844473949341744e-06, "loss": 0.0027, "step": 52180 }, { "epoch": 0.34334848654302874, "grad_norm": 0.24626729193479813, "learning_rate": 9.844331840441706e-06, "loss": 0.003, "step": 52190 }, { "epoch": 0.34341427471826214, "grad_norm": 0.0556716494460178, "learning_rate": 9.844189667673336e-06, "loss": 0.0017, "step": 52200 }, { "epoch": 0.34348006289349553, "grad_norm": 0.0846058802023846, "learning_rate": 9.844047431038509e-06, "loss": 0.0017, "step": 52210 }, { "epoch": 0.3435458510687289, "grad_norm": 0.08288697257790728, "learning_rate": 9.8439051305391e-06, "loss": 0.0028, "step": 52220 }, { "epoch": 0.34361163924396226, "grad_norm": 0.0674427619726527, "learning_rate": 9.843762766176984e-06, "loss": 0.0033, "step": 52230 }, { "epoch": 0.34367742741919566, "grad_norm": 0.13100339814009393, "learning_rate": 9.843620337954042e-06, "loss": 0.0026, "step": 52240 }, { "epoch": 0.34374321559442905, "grad_norm": 0.03622322203361419, "learning_rate": 9.843477845872146e-06, "loss": 0.0012, "step": 52250 }, { "epoch": 0.34380900376966245, "grad_norm": 0.17229072453135524, "learning_rate": 9.84333528993318e-06, "loss": 0.0045, "step": 52260 }, { "epoch": 0.34387479194489584, "grad_norm": 0.09569591892809423, "learning_rate": 9.843192670139021e-06, "loss": 0.0026, "step": 52270 }, { "epoch": 0.34394058012012924, "grad_norm": 0.3071707187531286, "learning_rate": 9.84304998649155e-06, "loss": 0.0027, "step": 52280 }, { "epoch": 0.3440063682953626, "grad_norm": 0.04857342794349844, "learning_rate": 9.842907238992646e-06, "loss": 0.0023, "step": 52290 }, { "epoch": 0.34407215647059597, "grad_norm": 0.08702962530190626, "learning_rate": 9.842764427644194e-06, "loss": 0.0018, "step": 52300 }, { "epoch": 0.34413794464582936, "grad_norm": 0.14654116477492546, "learning_rate": 9.842621552448075e-06, "loss": 0.0037, "step": 52310 }, { "epoch": 0.34420373282106276, "grad_norm": 0.06365390241406688, "learning_rate": 9.842478613406172e-06, "loss": 0.0024, "step": 52320 }, { "epoch": 0.34426952099629615, "grad_norm": 0.07579949356199747, "learning_rate": 9.842335610520372e-06, "loss": 0.0017, "step": 52330 }, { "epoch": 0.3443353091715295, "grad_norm": 0.0503078236428124, "learning_rate": 9.842192543792559e-06, "loss": 0.0022, "step": 52340 }, { "epoch": 0.3444010973467629, "grad_norm": 0.08089163814606622, "learning_rate": 9.842049413224619e-06, "loss": 0.0027, "step": 52350 }, { "epoch": 0.3444668855219963, "grad_norm": 0.002114581908466182, "learning_rate": 9.84190621881844e-06, "loss": 0.0025, "step": 52360 }, { "epoch": 0.3445326736972297, "grad_norm": 0.03224570468639902, "learning_rate": 9.841762960575909e-06, "loss": 0.0007, "step": 52370 }, { "epoch": 0.34459846187246307, "grad_norm": 0.05674370995681629, "learning_rate": 9.841619638498914e-06, "loss": 0.0015, "step": 52380 }, { "epoch": 0.3446642500476964, "grad_norm": 0.10722847912355746, "learning_rate": 9.841476252589346e-06, "loss": 0.0054, "step": 52390 }, { "epoch": 0.3447300382229298, "grad_norm": 0.1388585029998502, "learning_rate": 9.841332802849096e-06, "loss": 0.0031, "step": 52400 }, { "epoch": 0.3447958263981632, "grad_norm": 0.09425809266808155, "learning_rate": 9.841189289280052e-06, "loss": 0.0024, "step": 52410 }, { "epoch": 0.3448616145733966, "grad_norm": 0.05051114204762876, "learning_rate": 9.841045711884112e-06, "loss": 0.0018, "step": 52420 }, { "epoch": 0.34492740274863, "grad_norm": 0.14042418661522027, "learning_rate": 9.840902070663163e-06, "loss": 0.0033, "step": 52430 }, { "epoch": 0.3449931909238633, "grad_norm": 0.15037953721258307, "learning_rate": 9.840758365619101e-06, "loss": 0.0028, "step": 52440 }, { "epoch": 0.3450589790990967, "grad_norm": 0.012135103980212439, "learning_rate": 9.84061459675382e-06, "loss": 0.0041, "step": 52450 }, { "epoch": 0.3451247672743301, "grad_norm": 0.07972538191146661, "learning_rate": 9.840470764069219e-06, "loss": 0.0016, "step": 52460 }, { "epoch": 0.3451905554495635, "grad_norm": 0.22025871248053627, "learning_rate": 9.84032686756719e-06, "loss": 0.0024, "step": 52470 }, { "epoch": 0.3452563436247969, "grad_norm": 0.0659690313435091, "learning_rate": 9.840182907249631e-06, "loss": 0.0016, "step": 52480 }, { "epoch": 0.34532213180003024, "grad_norm": 0.05411818271539869, "learning_rate": 9.840038883118441e-06, "loss": 0.0015, "step": 52490 }, { "epoch": 0.34538791997526364, "grad_norm": 0.03752668448341468, "learning_rate": 9.839894795175517e-06, "loss": 0.0027, "step": 52500 }, { "epoch": 0.34545370815049703, "grad_norm": 0.10018259985138041, "learning_rate": 9.839750643422763e-06, "loss": 0.0037, "step": 52510 }, { "epoch": 0.3455194963257304, "grad_norm": 0.05075444050410642, "learning_rate": 9.839606427862074e-06, "loss": 0.004, "step": 52520 }, { "epoch": 0.3455852845009638, "grad_norm": 0.372223557793772, "learning_rate": 9.839462148495356e-06, "loss": 0.0028, "step": 52530 }, { "epoch": 0.34565107267619716, "grad_norm": 0.07345100283637568, "learning_rate": 9.839317805324507e-06, "loss": 0.003, "step": 52540 }, { "epoch": 0.34571686085143055, "grad_norm": 0.08903290404106136, "learning_rate": 9.839173398351433e-06, "loss": 0.0011, "step": 52550 }, { "epoch": 0.34578264902666395, "grad_norm": 0.014706806218335424, "learning_rate": 9.839028927578039e-06, "loss": 0.0027, "step": 52560 }, { "epoch": 0.34584843720189734, "grad_norm": 0.19716600325375266, "learning_rate": 9.838884393006228e-06, "loss": 0.0054, "step": 52570 }, { "epoch": 0.34591422537713074, "grad_norm": 0.22927362307059235, "learning_rate": 9.838739794637903e-06, "loss": 0.0018, "step": 52580 }, { "epoch": 0.3459800135523641, "grad_norm": 0.027543490410921218, "learning_rate": 9.838595132474973e-06, "loss": 0.0025, "step": 52590 }, { "epoch": 0.34604580172759747, "grad_norm": 0.18484350145243525, "learning_rate": 9.838450406519347e-06, "loss": 0.0026, "step": 52600 }, { "epoch": 0.34611158990283086, "grad_norm": 0.12044493272516464, "learning_rate": 9.83830561677293e-06, "loss": 0.003, "step": 52610 }, { "epoch": 0.34617737807806426, "grad_norm": 0.027335008208733882, "learning_rate": 9.838160763237632e-06, "loss": 0.0016, "step": 52620 }, { "epoch": 0.34624316625329765, "grad_norm": 0.1748913150880353, "learning_rate": 9.838015845915363e-06, "loss": 0.0024, "step": 52630 }, { "epoch": 0.346308954428531, "grad_norm": 0.1328675478732809, "learning_rate": 9.837870864808033e-06, "loss": 0.0028, "step": 52640 }, { "epoch": 0.3463747426037644, "grad_norm": 0.10442163942021379, "learning_rate": 9.837725819917555e-06, "loss": 0.0027, "step": 52650 }, { "epoch": 0.3464405307789978, "grad_norm": 0.10773871317044342, "learning_rate": 9.837580711245838e-06, "loss": 0.0023, "step": 52660 }, { "epoch": 0.3465063189542312, "grad_norm": 0.07273447621509088, "learning_rate": 9.837435538794799e-06, "loss": 0.0048, "step": 52670 }, { "epoch": 0.34657210712946457, "grad_norm": 0.2174678637511761, "learning_rate": 9.83729030256635e-06, "loss": 0.0053, "step": 52680 }, { "epoch": 0.3466378953046979, "grad_norm": 0.4758314984596935, "learning_rate": 9.837145002562408e-06, "loss": 0.0039, "step": 52690 }, { "epoch": 0.3467036834799313, "grad_norm": 0.07724788991645468, "learning_rate": 9.836999638784883e-06, "loss": 0.004, "step": 52700 }, { "epoch": 0.3467694716551647, "grad_norm": 0.07348563000645639, "learning_rate": 9.836854211235699e-06, "loss": 0.0057, "step": 52710 }, { "epoch": 0.3468352598303981, "grad_norm": 0.06427618575610956, "learning_rate": 9.836708719916767e-06, "loss": 0.0017, "step": 52720 }, { "epoch": 0.3469010480056315, "grad_norm": 0.09178470278161036, "learning_rate": 9.836563164830008e-06, "loss": 0.0025, "step": 52730 }, { "epoch": 0.3469668361808648, "grad_norm": 0.12139321176122836, "learning_rate": 9.836417545977342e-06, "loss": 0.0019, "step": 52740 }, { "epoch": 0.3470326243560982, "grad_norm": 0.22242224564916344, "learning_rate": 9.836271863360686e-06, "loss": 0.0024, "step": 52750 }, { "epoch": 0.3470984125313316, "grad_norm": 0.0008422462917532557, "learning_rate": 9.836126116981964e-06, "loss": 0.0019, "step": 52760 }, { "epoch": 0.347164200706565, "grad_norm": 0.04491176382215499, "learning_rate": 9.835980306843097e-06, "loss": 0.0014, "step": 52770 }, { "epoch": 0.3472299888817984, "grad_norm": 0.18912208447588869, "learning_rate": 9.835834432946003e-06, "loss": 0.004, "step": 52780 }, { "epoch": 0.3472957770570318, "grad_norm": 0.16016915552298044, "learning_rate": 9.835688495292611e-06, "loss": 0.0018, "step": 52790 }, { "epoch": 0.34736156523226513, "grad_norm": 1.1710882519424548, "learning_rate": 9.835542493884843e-06, "loss": 0.005, "step": 52800 }, { "epoch": 0.34742735340749853, "grad_norm": 0.0009449936936848569, "learning_rate": 9.83539642872462e-06, "loss": 0.0014, "step": 52810 }, { "epoch": 0.3474931415827319, "grad_norm": 0.11391698158090696, "learning_rate": 9.835250299813875e-06, "loss": 0.0018, "step": 52820 }, { "epoch": 0.3475589297579653, "grad_norm": 0.04939035414420409, "learning_rate": 9.835104107154527e-06, "loss": 0.0012, "step": 52830 }, { "epoch": 0.3476247179331987, "grad_norm": 0.0508632414642176, "learning_rate": 9.83495785074851e-06, "loss": 0.0018, "step": 52840 }, { "epoch": 0.34769050610843205, "grad_norm": 0.06415456981081682, "learning_rate": 9.83481153059775e-06, "loss": 0.0028, "step": 52850 }, { "epoch": 0.34775629428366545, "grad_norm": 0.09067634268194519, "learning_rate": 9.834665146704174e-06, "loss": 0.0047, "step": 52860 }, { "epoch": 0.34782208245889884, "grad_norm": 0.07279226849525137, "learning_rate": 9.834518699069713e-06, "loss": 0.0023, "step": 52870 }, { "epoch": 0.34788787063413223, "grad_norm": 0.025562915846605114, "learning_rate": 9.8343721876963e-06, "loss": 0.0019, "step": 52880 }, { "epoch": 0.34795365880936563, "grad_norm": 0.21033018595428543, "learning_rate": 9.834225612585864e-06, "loss": 0.0018, "step": 52890 }, { "epoch": 0.34801944698459897, "grad_norm": 0.057683976456209284, "learning_rate": 9.83407897374034e-06, "loss": 0.0028, "step": 52900 }, { "epoch": 0.34808523515983236, "grad_norm": 0.10829588272556111, "learning_rate": 9.833932271161657e-06, "loss": 0.0024, "step": 52910 }, { "epoch": 0.34815102333506576, "grad_norm": 0.1037519760546481, "learning_rate": 9.833785504851753e-06, "loss": 0.0023, "step": 52920 }, { "epoch": 0.34821681151029915, "grad_norm": 0.14523120203490564, "learning_rate": 9.833638674812563e-06, "loss": 0.0031, "step": 52930 }, { "epoch": 0.34828259968553255, "grad_norm": 0.03738460287263662, "learning_rate": 9.83349178104602e-06, "loss": 0.0037, "step": 52940 }, { "epoch": 0.3483483878607659, "grad_norm": 0.08509126619236435, "learning_rate": 9.833344823554065e-06, "loss": 0.0019, "step": 52950 }, { "epoch": 0.3484141760359993, "grad_norm": 0.08327074170170523, "learning_rate": 9.83319780233863e-06, "loss": 0.0055, "step": 52960 }, { "epoch": 0.3484799642112327, "grad_norm": 0.06732740347739771, "learning_rate": 9.833050717401657e-06, "loss": 0.003, "step": 52970 }, { "epoch": 0.34854575238646607, "grad_norm": 0.14648306267625905, "learning_rate": 9.832903568745085e-06, "loss": 0.0035, "step": 52980 }, { "epoch": 0.34861154056169946, "grad_norm": 0.018535327935585127, "learning_rate": 9.832756356370852e-06, "loss": 0.0018, "step": 52990 }, { "epoch": 0.3486773287369328, "grad_norm": 0.05800427271735093, "learning_rate": 9.832609080280901e-06, "loss": 0.0017, "step": 53000 }, { "epoch": 0.3487431169121662, "grad_norm": 0.026431430344412057, "learning_rate": 9.832461740477174e-06, "loss": 0.0017, "step": 53010 }, { "epoch": 0.3488089050873996, "grad_norm": 0.1765442072292481, "learning_rate": 9.832314336961613e-06, "loss": 0.0025, "step": 53020 }, { "epoch": 0.348874693262633, "grad_norm": 0.04772583928747844, "learning_rate": 9.832166869736159e-06, "loss": 0.0025, "step": 53030 }, { "epoch": 0.3489404814378664, "grad_norm": 0.14961230027652478, "learning_rate": 9.832019338802759e-06, "loss": 0.0028, "step": 53040 }, { "epoch": 0.3490062696130997, "grad_norm": 0.03341938346938574, "learning_rate": 9.831871744163358e-06, "loss": 0.0021, "step": 53050 }, { "epoch": 0.3490720577883331, "grad_norm": 0.06450535060965672, "learning_rate": 9.831724085819898e-06, "loss": 0.0024, "step": 53060 }, { "epoch": 0.3491378459635665, "grad_norm": 0.042228287545588536, "learning_rate": 9.83157636377433e-06, "loss": 0.0016, "step": 53070 }, { "epoch": 0.3492036341387999, "grad_norm": 0.01718352740049158, "learning_rate": 9.831428578028604e-06, "loss": 0.0021, "step": 53080 }, { "epoch": 0.3492694223140333, "grad_norm": 0.10992126689212187, "learning_rate": 9.831280728584662e-06, "loss": 0.0026, "step": 53090 }, { "epoch": 0.34933521048926663, "grad_norm": 0.11991883679709617, "learning_rate": 9.831132815444457e-06, "loss": 0.0029, "step": 53100 }, { "epoch": 0.34940099866450003, "grad_norm": 0.041343539320876393, "learning_rate": 9.830984838609937e-06, "loss": 0.0013, "step": 53110 }, { "epoch": 0.3494667868397334, "grad_norm": 0.08511000837369738, "learning_rate": 9.830836798083055e-06, "loss": 0.0027, "step": 53120 }, { "epoch": 0.3495325750149668, "grad_norm": 0.1852789365777099, "learning_rate": 9.830688693865763e-06, "loss": 0.002, "step": 53130 }, { "epoch": 0.3495983631902002, "grad_norm": 0.178815623653656, "learning_rate": 9.83054052596001e-06, "loss": 0.0025, "step": 53140 }, { "epoch": 0.34966415136543355, "grad_norm": 0.07663083761431136, "learning_rate": 9.830392294367756e-06, "loss": 0.0032, "step": 53150 }, { "epoch": 0.34972993954066695, "grad_norm": 0.053680383847687994, "learning_rate": 9.830243999090949e-06, "loss": 0.003, "step": 53160 }, { "epoch": 0.34979572771590034, "grad_norm": 0.13540403739590298, "learning_rate": 9.830095640131545e-06, "loss": 0.0044, "step": 53170 }, { "epoch": 0.34986151589113373, "grad_norm": 0.1339753092584364, "learning_rate": 9.829947217491504e-06, "loss": 0.0027, "step": 53180 }, { "epoch": 0.34992730406636713, "grad_norm": 0.08237460246894342, "learning_rate": 9.829798731172779e-06, "loss": 0.0017, "step": 53190 }, { "epoch": 0.34999309224160047, "grad_norm": 0.2778921589348618, "learning_rate": 9.82965018117733e-06, "loss": 0.0035, "step": 53200 }, { "epoch": 0.35005888041683386, "grad_norm": 0.1638557779817386, "learning_rate": 9.829501567507115e-06, "loss": 0.006, "step": 53210 }, { "epoch": 0.35012466859206726, "grad_norm": 0.06592647756306294, "learning_rate": 9.829352890164092e-06, "loss": 0.0022, "step": 53220 }, { "epoch": 0.35019045676730065, "grad_norm": 0.08679607970625708, "learning_rate": 9.829204149150222e-06, "loss": 0.0034, "step": 53230 }, { "epoch": 0.35025624494253405, "grad_norm": 0.0748666942200947, "learning_rate": 9.829055344467467e-06, "loss": 0.0016, "step": 53240 }, { "epoch": 0.35032203311776744, "grad_norm": 0.03665646306972698, "learning_rate": 9.828906476117787e-06, "loss": 0.0023, "step": 53250 }, { "epoch": 0.3503878212930008, "grad_norm": 0.09717579933592747, "learning_rate": 9.828757544103144e-06, "loss": 0.0033, "step": 53260 }, { "epoch": 0.3504536094682342, "grad_norm": 0.008251557562886118, "learning_rate": 9.828608548425506e-06, "loss": 0.0044, "step": 53270 }, { "epoch": 0.35051939764346757, "grad_norm": 0.0779453284900064, "learning_rate": 9.828459489086833e-06, "loss": 0.0018, "step": 53280 }, { "epoch": 0.35058518581870096, "grad_norm": 0.10258743318461894, "learning_rate": 9.828310366089093e-06, "loss": 0.002, "step": 53290 }, { "epoch": 0.35065097399393436, "grad_norm": 0.12439934740810382, "learning_rate": 9.82816117943425e-06, "loss": 0.0023, "step": 53300 }, { "epoch": 0.3507167621691677, "grad_norm": 0.08435578023512616, "learning_rate": 9.828011929124272e-06, "loss": 0.0029, "step": 53310 }, { "epoch": 0.3507825503444011, "grad_norm": 0.007682895133155692, "learning_rate": 9.827862615161126e-06, "loss": 0.0011, "step": 53320 }, { "epoch": 0.3508483385196345, "grad_norm": 0.0017870748992519256, "learning_rate": 9.827713237546781e-06, "loss": 0.0054, "step": 53330 }, { "epoch": 0.3509141266948679, "grad_norm": 0.07657817332374209, "learning_rate": 9.827563796283206e-06, "loss": 0.0013, "step": 53340 }, { "epoch": 0.3509799148701013, "grad_norm": 0.17430838490614514, "learning_rate": 9.827414291372373e-06, "loss": 0.0039, "step": 53350 }, { "epoch": 0.3510457030453346, "grad_norm": 0.12584856420020393, "learning_rate": 9.827264722816251e-06, "loss": 0.0025, "step": 53360 }, { "epoch": 0.351111491220568, "grad_norm": 0.024919721482881484, "learning_rate": 9.827115090616812e-06, "loss": 0.0011, "step": 53370 }, { "epoch": 0.3511772793958014, "grad_norm": 0.21775512638376718, "learning_rate": 9.826965394776031e-06, "loss": 0.0025, "step": 53380 }, { "epoch": 0.3512430675710348, "grad_norm": 0.19991264422067956, "learning_rate": 9.82681563529588e-06, "loss": 0.0034, "step": 53390 }, { "epoch": 0.3513088557462682, "grad_norm": 0.014841512791258983, "learning_rate": 9.826665812178334e-06, "loss": 0.0026, "step": 53400 }, { "epoch": 0.35137464392150153, "grad_norm": 0.2437539917031869, "learning_rate": 9.826515925425366e-06, "loss": 0.0036, "step": 53410 }, { "epoch": 0.3514404320967349, "grad_norm": 0.06977137928048903, "learning_rate": 9.826365975038955e-06, "loss": 0.0031, "step": 53420 }, { "epoch": 0.3515062202719683, "grad_norm": 0.05194426128352336, "learning_rate": 9.826215961021077e-06, "loss": 0.0038, "step": 53430 }, { "epoch": 0.3515720084472017, "grad_norm": 0.035251190383956535, "learning_rate": 9.82606588337371e-06, "loss": 0.0013, "step": 53440 }, { "epoch": 0.3516377966224351, "grad_norm": 0.021080148729119567, "learning_rate": 9.825915742098831e-06, "loss": 0.0028, "step": 53450 }, { "epoch": 0.35170358479766844, "grad_norm": 0.11323298309767318, "learning_rate": 9.825765537198422e-06, "loss": 0.0029, "step": 53460 }, { "epoch": 0.35176937297290184, "grad_norm": 0.06851351238630163, "learning_rate": 9.825615268674462e-06, "loss": 0.0031, "step": 53470 }, { "epoch": 0.35183516114813523, "grad_norm": 0.05875948107436786, "learning_rate": 9.825464936528931e-06, "loss": 0.0044, "step": 53480 }, { "epoch": 0.35190094932336863, "grad_norm": 0.09611263654647084, "learning_rate": 9.825314540763815e-06, "loss": 0.0018, "step": 53490 }, { "epoch": 0.351966737498602, "grad_norm": 0.012194576952525215, "learning_rate": 9.825164081381093e-06, "loss": 0.0043, "step": 53500 }, { "epoch": 0.35203252567383536, "grad_norm": 0.019686502181380426, "learning_rate": 9.82501355838275e-06, "loss": 0.0022, "step": 53510 }, { "epoch": 0.35209831384906876, "grad_norm": 0.11261345282837265, "learning_rate": 9.82486297177077e-06, "loss": 0.0018, "step": 53520 }, { "epoch": 0.35216410202430215, "grad_norm": 0.07847445789450765, "learning_rate": 9.82471232154714e-06, "loss": 0.0041, "step": 53530 }, { "epoch": 0.35222989019953554, "grad_norm": 0.19610062348753676, "learning_rate": 9.824561607713843e-06, "loss": 0.0032, "step": 53540 }, { "epoch": 0.35229567837476894, "grad_norm": 0.06767979738822226, "learning_rate": 9.824410830272868e-06, "loss": 0.0027, "step": 53550 }, { "epoch": 0.3523614665500023, "grad_norm": 0.07495090454532849, "learning_rate": 9.824259989226206e-06, "loss": 0.0024, "step": 53560 }, { "epoch": 0.3524272547252357, "grad_norm": 0.01946197041526212, "learning_rate": 9.82410908457584e-06, "loss": 0.0015, "step": 53570 }, { "epoch": 0.35249304290046907, "grad_norm": 0.048900501847185945, "learning_rate": 9.823958116323763e-06, "loss": 0.0021, "step": 53580 }, { "epoch": 0.35255883107570246, "grad_norm": 0.4861036024732884, "learning_rate": 9.823807084471963e-06, "loss": 0.0015, "step": 53590 }, { "epoch": 0.35262461925093586, "grad_norm": 0.2146737219871639, "learning_rate": 9.823655989022434e-06, "loss": 0.0049, "step": 53600 }, { "epoch": 0.3526904074261692, "grad_norm": 0.20255399958746198, "learning_rate": 9.823504829977166e-06, "loss": 0.003, "step": 53610 }, { "epoch": 0.3527561956014026, "grad_norm": 0.023395723987365037, "learning_rate": 9.823353607338152e-06, "loss": 0.0019, "step": 53620 }, { "epoch": 0.352821983776636, "grad_norm": 0.09483380620745456, "learning_rate": 9.823202321107388e-06, "loss": 0.002, "step": 53630 }, { "epoch": 0.3528877719518694, "grad_norm": 0.11323959251858307, "learning_rate": 9.823050971286866e-06, "loss": 0.0023, "step": 53640 }, { "epoch": 0.3529535601271028, "grad_norm": 0.04697942693717127, "learning_rate": 9.822899557878582e-06, "loss": 0.0018, "step": 53650 }, { "epoch": 0.3530193483023361, "grad_norm": 0.0846194438479311, "learning_rate": 9.822748080884534e-06, "loss": 0.003, "step": 53660 }, { "epoch": 0.3530851364775695, "grad_norm": 0.08953193688174721, "learning_rate": 9.822596540306717e-06, "loss": 0.0023, "step": 53670 }, { "epoch": 0.3531509246528029, "grad_norm": 0.06870346912527021, "learning_rate": 9.82244493614713e-06, "loss": 0.0019, "step": 53680 }, { "epoch": 0.3532167128280363, "grad_norm": 0.1696972659819099, "learning_rate": 9.822293268407772e-06, "loss": 0.0034, "step": 53690 }, { "epoch": 0.3532825010032697, "grad_norm": 0.20960392034556086, "learning_rate": 9.822141537090642e-06, "loss": 0.0036, "step": 53700 }, { "epoch": 0.3533482891785031, "grad_norm": 0.018911662259947, "learning_rate": 9.82198974219774e-06, "loss": 0.0016, "step": 53710 }, { "epoch": 0.3534140773537364, "grad_norm": 0.1191023789888118, "learning_rate": 9.821837883731067e-06, "loss": 0.0014, "step": 53720 }, { "epoch": 0.3534798655289698, "grad_norm": 0.025197412472908298, "learning_rate": 9.821685961692626e-06, "loss": 0.0023, "step": 53730 }, { "epoch": 0.3535456537042032, "grad_norm": 0.01772123607372011, "learning_rate": 9.821533976084422e-06, "loss": 0.0021, "step": 53740 }, { "epoch": 0.3536114418794366, "grad_norm": 0.034114444593312275, "learning_rate": 9.821381926908453e-06, "loss": 0.0009, "step": 53750 }, { "epoch": 0.35367723005467, "grad_norm": 0.03996876695336014, "learning_rate": 9.82122981416673e-06, "loss": 0.0017, "step": 53760 }, { "epoch": 0.35374301822990334, "grad_norm": 0.07461441676594699, "learning_rate": 9.821077637861256e-06, "loss": 0.0027, "step": 53770 }, { "epoch": 0.35380880640513673, "grad_norm": 0.04657523412293579, "learning_rate": 9.820925397994035e-06, "loss": 0.0021, "step": 53780 }, { "epoch": 0.3538745945803701, "grad_norm": 0.18228670625372234, "learning_rate": 9.820773094567077e-06, "loss": 0.0033, "step": 53790 }, { "epoch": 0.3539403827556035, "grad_norm": 0.062389427103676474, "learning_rate": 9.820620727582391e-06, "loss": 0.0012, "step": 53800 }, { "epoch": 0.3540061709308369, "grad_norm": 0.01645540042636606, "learning_rate": 9.820468297041982e-06, "loss": 0.0015, "step": 53810 }, { "epoch": 0.35407195910607026, "grad_norm": 0.14646398727425708, "learning_rate": 9.820315802947865e-06, "loss": 0.003, "step": 53820 }, { "epoch": 0.35413774728130365, "grad_norm": 0.0976786664818674, "learning_rate": 9.820163245302043e-06, "loss": 0.0016, "step": 53830 }, { "epoch": 0.35420353545653704, "grad_norm": 0.08136636627413091, "learning_rate": 9.820010624106533e-06, "loss": 0.004, "step": 53840 }, { "epoch": 0.35426932363177044, "grad_norm": 0.05059787924993224, "learning_rate": 9.819857939363346e-06, "loss": 0.0025, "step": 53850 }, { "epoch": 0.35433511180700383, "grad_norm": 0.008223507951509118, "learning_rate": 9.819705191074496e-06, "loss": 0.0017, "step": 53860 }, { "epoch": 0.35440089998223717, "grad_norm": 0.2905806923129157, "learning_rate": 9.819552379241994e-06, "loss": 0.0027, "step": 53870 }, { "epoch": 0.35446668815747057, "grad_norm": 0.08330196784290152, "learning_rate": 9.819399503867857e-06, "loss": 0.002, "step": 53880 }, { "epoch": 0.35453247633270396, "grad_norm": 0.14188870781558943, "learning_rate": 9.8192465649541e-06, "loss": 0.003, "step": 53890 }, { "epoch": 0.35459826450793736, "grad_norm": 0.09311746234488277, "learning_rate": 9.81909356250274e-06, "loss": 0.0017, "step": 53900 }, { "epoch": 0.35466405268317075, "grad_norm": 0.051335199424904616, "learning_rate": 9.818940496515792e-06, "loss": 0.0022, "step": 53910 }, { "epoch": 0.3547298408584041, "grad_norm": 0.08254081637125969, "learning_rate": 9.818787366995276e-06, "loss": 0.0019, "step": 53920 }, { "epoch": 0.3547956290336375, "grad_norm": 0.18500653038436585, "learning_rate": 9.818634173943209e-06, "loss": 0.0025, "step": 53930 }, { "epoch": 0.3548614172088709, "grad_norm": 0.09556690004272109, "learning_rate": 9.818480917361615e-06, "loss": 0.0022, "step": 53940 }, { "epoch": 0.35492720538410427, "grad_norm": 0.17448665864787805, "learning_rate": 9.818327597252509e-06, "loss": 0.0022, "step": 53950 }, { "epoch": 0.35499299355933767, "grad_norm": 0.041893713726390405, "learning_rate": 9.818174213617915e-06, "loss": 0.0017, "step": 53960 }, { "epoch": 0.355058781734571, "grad_norm": 0.023951647903035594, "learning_rate": 9.818020766459858e-06, "loss": 0.0023, "step": 53970 }, { "epoch": 0.3551245699098044, "grad_norm": 0.0034963193507846395, "learning_rate": 9.817867255780356e-06, "loss": 0.0038, "step": 53980 }, { "epoch": 0.3551903580850378, "grad_norm": 0.05978710336897698, "learning_rate": 9.817713681581436e-06, "loss": 0.0021, "step": 53990 }, { "epoch": 0.3552561462602712, "grad_norm": 0.007568128069948854, "learning_rate": 9.81756004386512e-06, "loss": 0.0017, "step": 54000 }, { "epoch": 0.3553219344355046, "grad_norm": 0.027136862378961672, "learning_rate": 9.817406342633438e-06, "loss": 0.002, "step": 54010 }, { "epoch": 0.3553877226107379, "grad_norm": 0.08475123975903567, "learning_rate": 9.817252577888412e-06, "loss": 0.0027, "step": 54020 }, { "epoch": 0.3554535107859713, "grad_norm": 0.0771125233123233, "learning_rate": 9.817098749632072e-06, "loss": 0.0011, "step": 54030 }, { "epoch": 0.3555192989612047, "grad_norm": 0.12671091584607963, "learning_rate": 9.816944857866446e-06, "loss": 0.0032, "step": 54040 }, { "epoch": 0.3555850871364381, "grad_norm": 0.11607775054456639, "learning_rate": 9.816790902593563e-06, "loss": 0.0023, "step": 54050 }, { "epoch": 0.3556508753116715, "grad_norm": 0.08590077752299101, "learning_rate": 9.81663688381545e-06, "loss": 0.0014, "step": 54060 }, { "epoch": 0.35571666348690484, "grad_norm": 0.0681398589254211, "learning_rate": 9.816482801534141e-06, "loss": 0.002, "step": 54070 }, { "epoch": 0.35578245166213823, "grad_norm": 0.08978353896595397, "learning_rate": 9.816328655751665e-06, "loss": 0.0034, "step": 54080 }, { "epoch": 0.3558482398373716, "grad_norm": 0.10558553379036402, "learning_rate": 9.816174446470057e-06, "loss": 0.0015, "step": 54090 }, { "epoch": 0.355914028012605, "grad_norm": 0.09923749309170753, "learning_rate": 9.816020173691347e-06, "loss": 0.0019, "step": 54100 }, { "epoch": 0.3559798161878384, "grad_norm": 0.08152326328747027, "learning_rate": 9.815865837417571e-06, "loss": 0.0028, "step": 54110 }, { "epoch": 0.35604560436307175, "grad_norm": 0.06148500042273246, "learning_rate": 9.815711437650762e-06, "loss": 0.0027, "step": 54120 }, { "epoch": 0.35611139253830515, "grad_norm": 0.012738350667087527, "learning_rate": 9.815556974392956e-06, "loss": 0.0016, "step": 54130 }, { "epoch": 0.35617718071353854, "grad_norm": 0.0634240646793384, "learning_rate": 9.815402447646194e-06, "loss": 0.0022, "step": 54140 }, { "epoch": 0.35624296888877194, "grad_norm": 0.07507238868411019, "learning_rate": 9.815247857412507e-06, "loss": 0.003, "step": 54150 }, { "epoch": 0.35630875706400533, "grad_norm": 0.05771074316152239, "learning_rate": 9.815093203693935e-06, "loss": 0.0016, "step": 54160 }, { "epoch": 0.3563745452392387, "grad_norm": 0.225351446161527, "learning_rate": 9.81493848649252e-06, "loss": 0.0034, "step": 54170 }, { "epoch": 0.35644033341447207, "grad_norm": 0.11100620491716535, "learning_rate": 9.8147837058103e-06, "loss": 0.0027, "step": 54180 }, { "epoch": 0.35650612158970546, "grad_norm": 0.06507645136148626, "learning_rate": 9.814628861649313e-06, "loss": 0.0023, "step": 54190 }, { "epoch": 0.35657190976493885, "grad_norm": 0.06944149406648736, "learning_rate": 9.814473954011603e-06, "loss": 0.0027, "step": 54200 }, { "epoch": 0.35663769794017225, "grad_norm": 0.05088995997163959, "learning_rate": 9.814318982899212e-06, "loss": 0.0038, "step": 54210 }, { "epoch": 0.35670348611540564, "grad_norm": 0.23129950127080143, "learning_rate": 9.814163948314183e-06, "loss": 0.0034, "step": 54220 }, { "epoch": 0.356769274290639, "grad_norm": 0.1329145092411907, "learning_rate": 9.81400885025856e-06, "loss": 0.0016, "step": 54230 }, { "epoch": 0.3568350624658724, "grad_norm": 0.1548803530416055, "learning_rate": 9.81385368873439e-06, "loss": 0.0018, "step": 54240 }, { "epoch": 0.35690085064110577, "grad_norm": 0.044006756024792495, "learning_rate": 9.813698463743715e-06, "loss": 0.0019, "step": 54250 }, { "epoch": 0.35696663881633917, "grad_norm": 0.17594356564760347, "learning_rate": 9.813543175288582e-06, "loss": 0.0017, "step": 54260 }, { "epoch": 0.35703242699157256, "grad_norm": 0.13218864869514174, "learning_rate": 9.81338782337104e-06, "loss": 0.0011, "step": 54270 }, { "epoch": 0.3570982151668059, "grad_norm": 0.17081236093113894, "learning_rate": 9.813232407993138e-06, "loss": 0.0016, "step": 54280 }, { "epoch": 0.3571640033420393, "grad_norm": 0.05319990462590746, "learning_rate": 9.813076929156924e-06, "loss": 0.0024, "step": 54290 }, { "epoch": 0.3572297915172727, "grad_norm": 0.16394982715701673, "learning_rate": 9.812921386864446e-06, "loss": 0.0016, "step": 54300 }, { "epoch": 0.3572955796925061, "grad_norm": 0.0923487967019849, "learning_rate": 9.812765781117756e-06, "loss": 0.0012, "step": 54310 }, { "epoch": 0.3573613678677395, "grad_norm": 0.0477635058064609, "learning_rate": 9.812610111918908e-06, "loss": 0.0024, "step": 54320 }, { "epoch": 0.3574271560429728, "grad_norm": 0.07530209034345287, "learning_rate": 9.81245437926995e-06, "loss": 0.002, "step": 54330 }, { "epoch": 0.3574929442182062, "grad_norm": 0.3203598875826221, "learning_rate": 9.812298583172938e-06, "loss": 0.0024, "step": 54340 }, { "epoch": 0.3575587323934396, "grad_norm": 0.20338285743496218, "learning_rate": 9.812142723629925e-06, "loss": 0.004, "step": 54350 }, { "epoch": 0.357624520568673, "grad_norm": 0.05124629821611268, "learning_rate": 9.811986800642967e-06, "loss": 0.002, "step": 54360 }, { "epoch": 0.3576903087439064, "grad_norm": 0.1576740807429318, "learning_rate": 9.811830814214119e-06, "loss": 0.0024, "step": 54370 }, { "epoch": 0.35775609691913973, "grad_norm": 0.05165530310353644, "learning_rate": 9.811674764345437e-06, "loss": 0.0019, "step": 54380 }, { "epoch": 0.3578218850943731, "grad_norm": 0.2356549991987253, "learning_rate": 9.811518651038978e-06, "loss": 0.0025, "step": 54390 }, { "epoch": 0.3578876732696065, "grad_norm": 0.18224898008450952, "learning_rate": 9.811362474296804e-06, "loss": 0.0018, "step": 54400 }, { "epoch": 0.3579534614448399, "grad_norm": 0.3049717085230612, "learning_rate": 9.811206234120967e-06, "loss": 0.0019, "step": 54410 }, { "epoch": 0.3580192496200733, "grad_norm": 0.09383095617838126, "learning_rate": 9.811049930513533e-06, "loss": 0.0029, "step": 54420 }, { "epoch": 0.35808503779530665, "grad_norm": 0.08202175203791819, "learning_rate": 9.810893563476563e-06, "loss": 0.0024, "step": 54430 }, { "epoch": 0.35815082597054004, "grad_norm": 0.12929300087025286, "learning_rate": 9.810737133012113e-06, "loss": 0.0015, "step": 54440 }, { "epoch": 0.35821661414577344, "grad_norm": 0.05769197381256482, "learning_rate": 9.810580639122248e-06, "loss": 0.0025, "step": 54450 }, { "epoch": 0.35828240232100683, "grad_norm": 0.19134971077918048, "learning_rate": 9.810424081809036e-06, "loss": 0.004, "step": 54460 }, { "epoch": 0.3583481904962402, "grad_norm": 0.11320968247776232, "learning_rate": 9.810267461074534e-06, "loss": 0.0019, "step": 54470 }, { "epoch": 0.35841397867147357, "grad_norm": 0.12982960362838095, "learning_rate": 9.810110776920812e-06, "loss": 0.0034, "step": 54480 }, { "epoch": 0.35847976684670696, "grad_norm": 0.14118850540907815, "learning_rate": 9.809954029349932e-06, "loss": 0.0039, "step": 54490 }, { "epoch": 0.35854555502194035, "grad_norm": 0.0040387581855827635, "learning_rate": 9.809797218363963e-06, "loss": 0.0015, "step": 54500 }, { "epoch": 0.35861134319717375, "grad_norm": 0.09183653168699817, "learning_rate": 9.809640343964972e-06, "loss": 0.0025, "step": 54510 }, { "epoch": 0.35867713137240714, "grad_norm": 0.11449888150198986, "learning_rate": 9.809483406155026e-06, "loss": 0.0015, "step": 54520 }, { "epoch": 0.3587429195476405, "grad_norm": 0.05922628461045736, "learning_rate": 9.809326404936195e-06, "loss": 0.0025, "step": 54530 }, { "epoch": 0.3588087077228739, "grad_norm": 0.3919149150196372, "learning_rate": 9.80916934031055e-06, "loss": 0.0036, "step": 54540 }, { "epoch": 0.35887449589810727, "grad_norm": 0.1179130075139655, "learning_rate": 9.80901221228016e-06, "loss": 0.0027, "step": 54550 }, { "epoch": 0.35894028407334067, "grad_norm": 0.024939775426986506, "learning_rate": 9.808855020847097e-06, "loss": 0.0016, "step": 54560 }, { "epoch": 0.35900607224857406, "grad_norm": 0.12253876316421872, "learning_rate": 9.808697766013436e-06, "loss": 0.0034, "step": 54570 }, { "epoch": 0.3590718604238074, "grad_norm": 0.04603914677055427, "learning_rate": 9.808540447781246e-06, "loss": 0.0017, "step": 54580 }, { "epoch": 0.3591376485990408, "grad_norm": 0.2595278277427899, "learning_rate": 9.808383066152603e-06, "loss": 0.0026, "step": 54590 }, { "epoch": 0.3592034367742742, "grad_norm": 0.2820351173139383, "learning_rate": 9.80822562112958e-06, "loss": 0.0024, "step": 54600 }, { "epoch": 0.3592692249495076, "grad_norm": 0.024835038375438965, "learning_rate": 9.80806811271426e-06, "loss": 0.0052, "step": 54610 }, { "epoch": 0.359335013124741, "grad_norm": 0.17555272476348643, "learning_rate": 9.80791054090871e-06, "loss": 0.0033, "step": 54620 }, { "epoch": 0.3594008012999743, "grad_norm": 0.08288841606169318, "learning_rate": 9.807752905715015e-06, "loss": 0.0023, "step": 54630 }, { "epoch": 0.3594665894752077, "grad_norm": 0.210431484761351, "learning_rate": 9.807595207135248e-06, "loss": 0.0032, "step": 54640 }, { "epoch": 0.3595323776504411, "grad_norm": 0.12482234202202397, "learning_rate": 9.80743744517149e-06, "loss": 0.0021, "step": 54650 }, { "epoch": 0.3595981658256745, "grad_norm": 0.12410467583265355, "learning_rate": 9.807279619825823e-06, "loss": 0.0027, "step": 54660 }, { "epoch": 0.3596639540009079, "grad_norm": 0.08175074206346879, "learning_rate": 9.807121731100326e-06, "loss": 0.0016, "step": 54670 }, { "epoch": 0.3597297421761413, "grad_norm": 1.3904705455235296, "learning_rate": 9.80696377899708e-06, "loss": 0.0021, "step": 54680 }, { "epoch": 0.3597955303513746, "grad_norm": 0.08858267230297946, "learning_rate": 9.806805763518168e-06, "loss": 0.0016, "step": 54690 }, { "epoch": 0.359861318526608, "grad_norm": 0.19605511346948745, "learning_rate": 9.806647684665675e-06, "loss": 0.0041, "step": 54700 }, { "epoch": 0.3599271067018414, "grad_norm": 0.09843143933180257, "learning_rate": 9.806489542441682e-06, "loss": 0.0023, "step": 54710 }, { "epoch": 0.3599928948770748, "grad_norm": 0.17300482768974074, "learning_rate": 9.806331336848275e-06, "loss": 0.0039, "step": 54720 }, { "epoch": 0.3600586830523082, "grad_norm": 0.20667435047862923, "learning_rate": 9.806173067887542e-06, "loss": 0.0015, "step": 54730 }, { "epoch": 0.36012447122754154, "grad_norm": 0.07193801005532935, "learning_rate": 9.806014735561567e-06, "loss": 0.0024, "step": 54740 }, { "epoch": 0.36019025940277494, "grad_norm": 0.15819726377504117, "learning_rate": 9.805856339872438e-06, "loss": 0.0033, "step": 54750 }, { "epoch": 0.36025604757800833, "grad_norm": 0.06362793644162708, "learning_rate": 9.805697880822245e-06, "loss": 0.003, "step": 54760 }, { "epoch": 0.3603218357532417, "grad_norm": 0.052616857089463606, "learning_rate": 9.805539358413074e-06, "loss": 0.0025, "step": 54770 }, { "epoch": 0.3603876239284751, "grad_norm": 0.131670246840292, "learning_rate": 9.80538077264702e-06, "loss": 0.0023, "step": 54780 }, { "epoch": 0.36045341210370846, "grad_norm": 0.20619383416725456, "learning_rate": 9.805222123526168e-06, "loss": 0.0023, "step": 54790 }, { "epoch": 0.36051920027894185, "grad_norm": 0.016232885692595365, "learning_rate": 9.805063411052613e-06, "loss": 0.0176, "step": 54800 }, { "epoch": 0.36058498845417525, "grad_norm": 0.07433546457998949, "learning_rate": 9.804904635228446e-06, "loss": 0.0014, "step": 54810 }, { "epoch": 0.36065077662940864, "grad_norm": 0.20653903912085472, "learning_rate": 9.804745796055763e-06, "loss": 0.0019, "step": 54820 }, { "epoch": 0.36071656480464204, "grad_norm": 0.04474347621869161, "learning_rate": 9.804586893536656e-06, "loss": 0.0024, "step": 54830 }, { "epoch": 0.3607823529798754, "grad_norm": 0.1395603445433095, "learning_rate": 9.80442792767322e-06, "loss": 0.002, "step": 54840 }, { "epoch": 0.36084814115510877, "grad_norm": 0.019177838057792804, "learning_rate": 9.80426889846755e-06, "loss": 0.0017, "step": 54850 }, { "epoch": 0.36091392933034216, "grad_norm": 0.1136881178826424, "learning_rate": 9.804109805921747e-06, "loss": 0.0029, "step": 54860 }, { "epoch": 0.36097971750557556, "grad_norm": 0.059166048400902685, "learning_rate": 9.803950650037903e-06, "loss": 0.0022, "step": 54870 }, { "epoch": 0.36104550568080895, "grad_norm": 0.018862502030514386, "learning_rate": 9.803791430818117e-06, "loss": 0.0068, "step": 54880 }, { "epoch": 0.3611112938560423, "grad_norm": 0.321223764471458, "learning_rate": 9.803632148264493e-06, "loss": 0.0028, "step": 54890 }, { "epoch": 0.3611770820312757, "grad_norm": 0.07397631386627745, "learning_rate": 9.803472802379126e-06, "loss": 0.0054, "step": 54900 }, { "epoch": 0.3612428702065091, "grad_norm": 0.10243569900229173, "learning_rate": 9.803313393164119e-06, "loss": 0.0036, "step": 54910 }, { "epoch": 0.3613086583817425, "grad_norm": 0.01869898978673998, "learning_rate": 9.803153920621573e-06, "loss": 0.0021, "step": 54920 }, { "epoch": 0.36137444655697587, "grad_norm": 0.1850329940874569, "learning_rate": 9.802994384753592e-06, "loss": 0.0043, "step": 54930 }, { "epoch": 0.3614402347322092, "grad_norm": 0.22271294635704858, "learning_rate": 9.802834785562276e-06, "loss": 0.0029, "step": 54940 }, { "epoch": 0.3615060229074426, "grad_norm": 0.011622366495113202, "learning_rate": 9.802675123049733e-06, "loss": 0.0016, "step": 54950 }, { "epoch": 0.361571811082676, "grad_norm": 0.10198329109068063, "learning_rate": 9.802515397218067e-06, "loss": 0.0026, "step": 54960 }, { "epoch": 0.3616375992579094, "grad_norm": 0.15018189795072992, "learning_rate": 9.802355608069382e-06, "loss": 0.0019, "step": 54970 }, { "epoch": 0.3617033874331428, "grad_norm": 0.06840354163814365, "learning_rate": 9.802195755605787e-06, "loss": 0.0039, "step": 54980 }, { "epoch": 0.3617691756083761, "grad_norm": 0.17168656474311836, "learning_rate": 9.802035839829387e-06, "loss": 0.002, "step": 54990 }, { "epoch": 0.3618349637836095, "grad_norm": 0.09342476371279088, "learning_rate": 9.801875860742292e-06, "loss": 0.0024, "step": 55000 }, { "epoch": 0.3619007519588429, "grad_norm": 0.04893850696030958, "learning_rate": 9.801715818346612e-06, "loss": 0.0028, "step": 55010 }, { "epoch": 0.3619665401340763, "grad_norm": 0.1193964059528409, "learning_rate": 9.801555712644455e-06, "loss": 0.0023, "step": 55020 }, { "epoch": 0.3620323283093097, "grad_norm": 0.13184756697944922, "learning_rate": 9.801395543637933e-06, "loss": 0.0025, "step": 55030 }, { "epoch": 0.36209811648454304, "grad_norm": 0.12270895373379272, "learning_rate": 9.801235311329158e-06, "loss": 0.003, "step": 55040 }, { "epoch": 0.36216390465977644, "grad_norm": 0.03899781070290948, "learning_rate": 9.801075015720242e-06, "loss": 0.0021, "step": 55050 }, { "epoch": 0.36222969283500983, "grad_norm": 0.04386518191599091, "learning_rate": 9.8009146568133e-06, "loss": 0.0016, "step": 55060 }, { "epoch": 0.3622954810102432, "grad_norm": 0.021556622020518692, "learning_rate": 9.80075423461044e-06, "loss": 0.0021, "step": 55070 }, { "epoch": 0.3623612691854766, "grad_norm": 0.13003223324736782, "learning_rate": 9.800593749113785e-06, "loss": 0.0029, "step": 55080 }, { "epoch": 0.36242705736070996, "grad_norm": 0.04752528044190828, "learning_rate": 9.800433200325447e-06, "loss": 0.0015, "step": 55090 }, { "epoch": 0.36249284553594335, "grad_norm": 0.056428357828504555, "learning_rate": 9.800272588247543e-06, "loss": 0.0013, "step": 55100 }, { "epoch": 0.36255863371117675, "grad_norm": 0.2902562354649512, "learning_rate": 9.800111912882191e-06, "loss": 0.0016, "step": 55110 }, { "epoch": 0.36262442188641014, "grad_norm": 0.022562427882547736, "learning_rate": 9.799951174231508e-06, "loss": 0.0016, "step": 55120 }, { "epoch": 0.36269021006164354, "grad_norm": 0.0637276516886279, "learning_rate": 9.799790372297614e-06, "loss": 0.0018, "step": 55130 }, { "epoch": 0.36275599823687693, "grad_norm": 0.2931488017287005, "learning_rate": 9.799629507082631e-06, "loss": 0.0019, "step": 55140 }, { "epoch": 0.36282178641211027, "grad_norm": 0.05051621862455846, "learning_rate": 9.799468578588678e-06, "loss": 0.007, "step": 55150 }, { "epoch": 0.36288757458734366, "grad_norm": 0.05207970774275412, "learning_rate": 9.799307586817877e-06, "loss": 0.0027, "step": 55160 }, { "epoch": 0.36295336276257706, "grad_norm": 0.07430468850455625, "learning_rate": 9.79914653177235e-06, "loss": 0.0041, "step": 55170 }, { "epoch": 0.36301915093781045, "grad_norm": 0.05308026965072037, "learning_rate": 9.798985413454221e-06, "loss": 0.0019, "step": 55180 }, { "epoch": 0.36308493911304385, "grad_norm": 0.02757059665473684, "learning_rate": 9.798824231865614e-06, "loss": 0.0029, "step": 55190 }, { "epoch": 0.3631507272882772, "grad_norm": 0.049539303502488925, "learning_rate": 9.798662987008653e-06, "loss": 0.0019, "step": 55200 }, { "epoch": 0.3632165154635106, "grad_norm": 0.3337500618344105, "learning_rate": 9.798501678885466e-06, "loss": 0.0031, "step": 55210 }, { "epoch": 0.363282303638744, "grad_norm": 0.20524650435389893, "learning_rate": 9.798340307498178e-06, "loss": 0.0028, "step": 55220 }, { "epoch": 0.36334809181397737, "grad_norm": 0.04829353730905589, "learning_rate": 9.798178872848916e-06, "loss": 0.0019, "step": 55230 }, { "epoch": 0.36341387998921076, "grad_norm": 0.0019836136902686096, "learning_rate": 9.798017374939812e-06, "loss": 0.0011, "step": 55240 }, { "epoch": 0.3634796681644441, "grad_norm": 0.10941090102111142, "learning_rate": 9.797855813772992e-06, "loss": 0.0026, "step": 55250 }, { "epoch": 0.3635454563396775, "grad_norm": 0.08938761929586884, "learning_rate": 9.797694189350586e-06, "loss": 0.0012, "step": 55260 }, { "epoch": 0.3636112445149109, "grad_norm": 0.0870215892033957, "learning_rate": 9.797532501674726e-06, "loss": 0.002, "step": 55270 }, { "epoch": 0.3636770326901443, "grad_norm": 0.18970524455929905, "learning_rate": 9.797370750747544e-06, "loss": 0.0037, "step": 55280 }, { "epoch": 0.3637428208653777, "grad_norm": 0.05463703742573444, "learning_rate": 9.79720893657117e-06, "loss": 0.0014, "step": 55290 }, { "epoch": 0.363808609040611, "grad_norm": 0.09355149977418278, "learning_rate": 9.797047059147741e-06, "loss": 0.0019, "step": 55300 }, { "epoch": 0.3638743972158444, "grad_norm": 0.042558943365242435, "learning_rate": 9.79688511847939e-06, "loss": 0.0015, "step": 55310 }, { "epoch": 0.3639401853910778, "grad_norm": 0.058043711736345066, "learning_rate": 9.79672311456825e-06, "loss": 0.0018, "step": 55320 }, { "epoch": 0.3640059735663112, "grad_norm": 0.24588180741991486, "learning_rate": 9.79656104741646e-06, "loss": 0.0021, "step": 55330 }, { "epoch": 0.3640717617415446, "grad_norm": 0.10456052689529145, "learning_rate": 9.796398917026154e-06, "loss": 0.0022, "step": 55340 }, { "epoch": 0.36413754991677794, "grad_norm": 0.01071654352588185, "learning_rate": 9.79623672339947e-06, "loss": 0.002, "step": 55350 }, { "epoch": 0.36420333809201133, "grad_norm": 0.031222270648085578, "learning_rate": 9.796074466538549e-06, "loss": 0.0014, "step": 55360 }, { "epoch": 0.3642691262672447, "grad_norm": 0.14309398014853353, "learning_rate": 9.795912146445529e-06, "loss": 0.0022, "step": 55370 }, { "epoch": 0.3643349144424781, "grad_norm": 0.06973375281679038, "learning_rate": 9.795749763122549e-06, "loss": 0.0038, "step": 55380 }, { "epoch": 0.3644007026177115, "grad_norm": 0.05679694465218973, "learning_rate": 9.795587316571748e-06, "loss": 0.003, "step": 55390 }, { "epoch": 0.36446649079294485, "grad_norm": 0.15370470788234858, "learning_rate": 9.79542480679527e-06, "loss": 0.004, "step": 55400 }, { "epoch": 0.36453227896817825, "grad_norm": 0.15936066832397894, "learning_rate": 9.79526223379526e-06, "loss": 0.0035, "step": 55410 }, { "epoch": 0.36459806714341164, "grad_norm": 0.05236036901871452, "learning_rate": 9.795099597573858e-06, "loss": 0.0029, "step": 55420 }, { "epoch": 0.36466385531864504, "grad_norm": 0.1056100299648354, "learning_rate": 9.794936898133208e-06, "loss": 0.0019, "step": 55430 }, { "epoch": 0.36472964349387843, "grad_norm": 0.09639169546846162, "learning_rate": 9.794774135475458e-06, "loss": 0.0019, "step": 55440 }, { "epoch": 0.36479543166911177, "grad_norm": 0.10435774066986223, "learning_rate": 9.79461130960275e-06, "loss": 0.0018, "step": 55450 }, { "epoch": 0.36486121984434516, "grad_norm": 0.07408045006150484, "learning_rate": 9.794448420517234e-06, "loss": 0.0032, "step": 55460 }, { "epoch": 0.36492700801957856, "grad_norm": 0.04517765008705908, "learning_rate": 9.794285468221056e-06, "loss": 0.0012, "step": 55470 }, { "epoch": 0.36499279619481195, "grad_norm": 0.10639530654079277, "learning_rate": 9.794122452716364e-06, "loss": 0.0053, "step": 55480 }, { "epoch": 0.36505858437004535, "grad_norm": 0.010784894666836523, "learning_rate": 9.79395937400531e-06, "loss": 0.0017, "step": 55490 }, { "epoch": 0.3651243725452787, "grad_norm": 0.12352930210022063, "learning_rate": 9.79379623209004e-06, "loss": 0.0025, "step": 55500 }, { "epoch": 0.3651901607205121, "grad_norm": 0.10874978187849837, "learning_rate": 9.793633026972708e-06, "loss": 0.0015, "step": 55510 }, { "epoch": 0.3652559488957455, "grad_norm": 0.05012134083288549, "learning_rate": 9.793469758655465e-06, "loss": 0.0017, "step": 55520 }, { "epoch": 0.36532173707097887, "grad_norm": 0.01953967556422253, "learning_rate": 9.793306427140461e-06, "loss": 0.0035, "step": 55530 }, { "epoch": 0.36538752524621226, "grad_norm": 0.09906891362589733, "learning_rate": 9.793143032429855e-06, "loss": 0.0022, "step": 55540 }, { "epoch": 0.3654533134214456, "grad_norm": 0.04669622134537196, "learning_rate": 9.792979574525794e-06, "loss": 0.0024, "step": 55550 }, { "epoch": 0.365519101596679, "grad_norm": 0.08583079791316212, "learning_rate": 9.792816053430438e-06, "loss": 0.0022, "step": 55560 }, { "epoch": 0.3655848897719124, "grad_norm": 0.19652077505509175, "learning_rate": 9.792652469145943e-06, "loss": 0.0036, "step": 55570 }, { "epoch": 0.3656506779471458, "grad_norm": 0.048600774175569725, "learning_rate": 9.792488821674463e-06, "loss": 0.0013, "step": 55580 }, { "epoch": 0.3657164661223792, "grad_norm": 0.16667947953380444, "learning_rate": 9.792325111018157e-06, "loss": 0.0032, "step": 55590 }, { "epoch": 0.3657822542976126, "grad_norm": 0.13634914530004597, "learning_rate": 9.792161337179185e-06, "loss": 0.0031, "step": 55600 }, { "epoch": 0.3658480424728459, "grad_norm": 0.022192139529752703, "learning_rate": 9.791997500159703e-06, "loss": 0.0026, "step": 55610 }, { "epoch": 0.3659138306480793, "grad_norm": 0.1324642218706153, "learning_rate": 9.791833599961873e-06, "loss": 0.0014, "step": 55620 }, { "epoch": 0.3659796188233127, "grad_norm": 0.07524441776357492, "learning_rate": 9.791669636587855e-06, "loss": 0.002, "step": 55630 }, { "epoch": 0.3660454069985461, "grad_norm": 0.10789261172025484, "learning_rate": 9.791505610039811e-06, "loss": 0.0022, "step": 55640 }, { "epoch": 0.3661111951737795, "grad_norm": 0.05414561343300326, "learning_rate": 9.791341520319906e-06, "loss": 0.0038, "step": 55650 }, { "epoch": 0.36617698334901283, "grad_norm": 0.26312017611738625, "learning_rate": 9.7911773674303e-06, "loss": 0.0012, "step": 55660 }, { "epoch": 0.3662427715242462, "grad_norm": 0.1017071593609333, "learning_rate": 9.791013151373157e-06, "loss": 0.0025, "step": 55670 }, { "epoch": 0.3663085596994796, "grad_norm": 0.158898095500027, "learning_rate": 9.790848872150645e-06, "loss": 0.0012, "step": 55680 }, { "epoch": 0.366374347874713, "grad_norm": 0.058263594200307606, "learning_rate": 9.790684529764926e-06, "loss": 0.0024, "step": 55690 }, { "epoch": 0.3664401360499464, "grad_norm": 0.24693750278804782, "learning_rate": 9.790520124218171e-06, "loss": 0.0036, "step": 55700 }, { "epoch": 0.36650592422517975, "grad_norm": 0.12200462562162871, "learning_rate": 9.790355655512544e-06, "loss": 0.003, "step": 55710 }, { "epoch": 0.36657171240041314, "grad_norm": 0.19352337852362123, "learning_rate": 9.790191123650217e-06, "loss": 0.0022, "step": 55720 }, { "epoch": 0.36663750057564654, "grad_norm": 0.24612956342096196, "learning_rate": 9.790026528633356e-06, "loss": 0.0017, "step": 55730 }, { "epoch": 0.36670328875087993, "grad_norm": 0.18503407082132886, "learning_rate": 9.789861870464133e-06, "loss": 0.0019, "step": 55740 }, { "epoch": 0.3667690769261133, "grad_norm": 0.09155872485869303, "learning_rate": 9.789697149144717e-06, "loss": 0.0041, "step": 55750 }, { "epoch": 0.36683486510134666, "grad_norm": 0.12633521084198984, "learning_rate": 9.789532364677282e-06, "loss": 0.0022, "step": 55760 }, { "epoch": 0.36690065327658006, "grad_norm": 0.06406683966597526, "learning_rate": 9.789367517063996e-06, "loss": 0.0049, "step": 55770 }, { "epoch": 0.36696644145181345, "grad_norm": 0.06301138667154231, "learning_rate": 9.789202606307039e-06, "loss": 0.0026, "step": 55780 }, { "epoch": 0.36703222962704685, "grad_norm": 0.010447364956334772, "learning_rate": 9.78903763240858e-06, "loss": 0.0056, "step": 55790 }, { "epoch": 0.36709801780228024, "grad_norm": 0.06552202338777696, "learning_rate": 9.788872595370797e-06, "loss": 0.0017, "step": 55800 }, { "epoch": 0.3671638059775136, "grad_norm": 0.04771497341037907, "learning_rate": 9.788707495195865e-06, "loss": 0.0028, "step": 55810 }, { "epoch": 0.367229594152747, "grad_norm": 0.06539336667835362, "learning_rate": 9.78854233188596e-06, "loss": 0.0012, "step": 55820 }, { "epoch": 0.36729538232798037, "grad_norm": 0.15130610482693105, "learning_rate": 9.78837710544326e-06, "loss": 0.0039, "step": 55830 }, { "epoch": 0.36736117050321376, "grad_norm": 0.10239553549901625, "learning_rate": 9.788211815869943e-06, "loss": 0.0023, "step": 55840 }, { "epoch": 0.36742695867844716, "grad_norm": 0.03596746259596638, "learning_rate": 9.788046463168188e-06, "loss": 0.0013, "step": 55850 }, { "epoch": 0.3674927468536805, "grad_norm": 0.10808925754405432, "learning_rate": 9.787881047340178e-06, "loss": 0.0023, "step": 55860 }, { "epoch": 0.3675585350289139, "grad_norm": 0.08590791303880314, "learning_rate": 9.78771556838809e-06, "loss": 0.0022, "step": 55870 }, { "epoch": 0.3676243232041473, "grad_norm": 0.14227482892305893, "learning_rate": 9.787550026314106e-06, "loss": 0.0023, "step": 55880 }, { "epoch": 0.3676901113793807, "grad_norm": 0.23674792211951676, "learning_rate": 9.787384421120412e-06, "loss": 0.0022, "step": 55890 }, { "epoch": 0.3677558995546141, "grad_norm": 0.02347960415446799, "learning_rate": 9.787218752809186e-06, "loss": 0.0018, "step": 55900 }, { "epoch": 0.3678216877298474, "grad_norm": 0.0831025963950356, "learning_rate": 9.787053021382618e-06, "loss": 0.0031, "step": 55910 }, { "epoch": 0.3678874759050808, "grad_norm": 0.042545838183498136, "learning_rate": 9.78688722684289e-06, "loss": 0.002, "step": 55920 }, { "epoch": 0.3679532640803142, "grad_norm": 0.07959177754127372, "learning_rate": 9.786721369192186e-06, "loss": 0.003, "step": 55930 }, { "epoch": 0.3680190522555476, "grad_norm": 0.058287053591677095, "learning_rate": 9.786555448432697e-06, "loss": 0.0021, "step": 55940 }, { "epoch": 0.368084840430781, "grad_norm": 0.060639545830505084, "learning_rate": 9.786389464566607e-06, "loss": 0.0024, "step": 55950 }, { "epoch": 0.36815062860601433, "grad_norm": 0.15617013515570324, "learning_rate": 9.786223417596107e-06, "loss": 0.0033, "step": 55960 }, { "epoch": 0.3682164167812477, "grad_norm": 0.09937634256160711, "learning_rate": 9.786057307523385e-06, "loss": 0.0018, "step": 55970 }, { "epoch": 0.3682822049564811, "grad_norm": 0.0318401774782665, "learning_rate": 9.785891134350631e-06, "loss": 0.0026, "step": 55980 }, { "epoch": 0.3683479931317145, "grad_norm": 0.10149508593900654, "learning_rate": 9.785724898080036e-06, "loss": 0.0011, "step": 55990 }, { "epoch": 0.3684137813069479, "grad_norm": 0.061526705067192225, "learning_rate": 9.785558598713789e-06, "loss": 0.0019, "step": 56000 }, { "epoch": 0.36847956948218125, "grad_norm": 0.12963893226254133, "learning_rate": 9.785392236254088e-06, "loss": 0.0023, "step": 56010 }, { "epoch": 0.36854535765741464, "grad_norm": 0.023865476121262805, "learning_rate": 9.78522581070312e-06, "loss": 0.0024, "step": 56020 }, { "epoch": 0.36861114583264804, "grad_norm": 0.0028391475918642913, "learning_rate": 9.785059322063085e-06, "loss": 0.0018, "step": 56030 }, { "epoch": 0.36867693400788143, "grad_norm": 0.06213942015863913, "learning_rate": 9.784892770336174e-06, "loss": 0.0017, "step": 56040 }, { "epoch": 0.3687427221831148, "grad_norm": 0.10366513722884978, "learning_rate": 9.784726155524586e-06, "loss": 0.0017, "step": 56050 }, { "epoch": 0.36880851035834816, "grad_norm": 0.06481048910884923, "learning_rate": 9.784559477630516e-06, "loss": 0.0011, "step": 56060 }, { "epoch": 0.36887429853358156, "grad_norm": 0.019011963849548324, "learning_rate": 9.784392736656159e-06, "loss": 0.0016, "step": 56070 }, { "epoch": 0.36894008670881495, "grad_norm": 0.005663529497318376, "learning_rate": 9.784225932603718e-06, "loss": 0.0024, "step": 56080 }, { "epoch": 0.36900587488404835, "grad_norm": 0.03286511124535849, "learning_rate": 9.784059065475389e-06, "loss": 0.0015, "step": 56090 }, { "epoch": 0.36907166305928174, "grad_norm": 0.05270714952469, "learning_rate": 9.783892135273374e-06, "loss": 0.002, "step": 56100 }, { "epoch": 0.36913745123451513, "grad_norm": 0.10085217075625617, "learning_rate": 9.783725141999872e-06, "loss": 0.0033, "step": 56110 }, { "epoch": 0.3692032394097485, "grad_norm": 0.13024535211787716, "learning_rate": 9.783558085657085e-06, "loss": 0.0031, "step": 56120 }, { "epoch": 0.36926902758498187, "grad_norm": 0.18082689443654845, "learning_rate": 9.783390966247217e-06, "loss": 0.0017, "step": 56130 }, { "epoch": 0.36933481576021526, "grad_norm": 0.1525652217215608, "learning_rate": 9.78322378377247e-06, "loss": 0.0029, "step": 56140 }, { "epoch": 0.36940060393544866, "grad_norm": 0.05090210142193525, "learning_rate": 9.78305653823505e-06, "loss": 0.0029, "step": 56150 }, { "epoch": 0.36946639211068205, "grad_norm": 0.003430059797781568, "learning_rate": 9.782889229637158e-06, "loss": 0.0015, "step": 56160 }, { "epoch": 0.3695321802859154, "grad_norm": 0.09153627623331222, "learning_rate": 9.782721857981003e-06, "loss": 0.0014, "step": 56170 }, { "epoch": 0.3695979684611488, "grad_norm": 0.06296665914627324, "learning_rate": 9.78255442326879e-06, "loss": 0.0025, "step": 56180 }, { "epoch": 0.3696637566363822, "grad_norm": 0.052006044615916146, "learning_rate": 9.78238692550273e-06, "loss": 0.0016, "step": 56190 }, { "epoch": 0.3697295448116156, "grad_norm": 0.15487017318070084, "learning_rate": 9.782219364685027e-06, "loss": 0.0018, "step": 56200 }, { "epoch": 0.36979533298684897, "grad_norm": 0.12648261673469352, "learning_rate": 9.782051740817892e-06, "loss": 0.0014, "step": 56210 }, { "epoch": 0.3698611211620823, "grad_norm": 0.016550651232689124, "learning_rate": 9.781884053903535e-06, "loss": 0.0052, "step": 56220 }, { "epoch": 0.3699269093373157, "grad_norm": 0.06899915870732755, "learning_rate": 9.781716303944167e-06, "loss": 0.0029, "step": 56230 }, { "epoch": 0.3699926975125491, "grad_norm": 0.0923399907416517, "learning_rate": 9.781548490942e-06, "loss": 0.0027, "step": 56240 }, { "epoch": 0.3700584856877825, "grad_norm": 0.10208760034558567, "learning_rate": 9.781380614899243e-06, "loss": 0.002, "step": 56250 }, { "epoch": 0.3701242738630159, "grad_norm": 0.03951810728633675, "learning_rate": 9.781212675818115e-06, "loss": 0.002, "step": 56260 }, { "epoch": 0.3701900620382492, "grad_norm": 0.08288025190785112, "learning_rate": 9.781044673700826e-06, "loss": 0.002, "step": 56270 }, { "epoch": 0.3702558502134826, "grad_norm": 0.038589674608866135, "learning_rate": 9.780876608549592e-06, "loss": 0.0031, "step": 56280 }, { "epoch": 0.370321638388716, "grad_norm": 0.022830355945132576, "learning_rate": 9.78070848036663e-06, "loss": 0.0018, "step": 56290 }, { "epoch": 0.3703874265639494, "grad_norm": 0.1297670502006142, "learning_rate": 9.780540289154153e-06, "loss": 0.0029, "step": 56300 }, { "epoch": 0.3704532147391828, "grad_norm": 0.0901443012689881, "learning_rate": 9.780372034914383e-06, "loss": 0.0032, "step": 56310 }, { "epoch": 0.37051900291441614, "grad_norm": 0.058145644185082494, "learning_rate": 9.780203717649537e-06, "loss": 0.0026, "step": 56320 }, { "epoch": 0.37058479108964953, "grad_norm": 0.12226808169546069, "learning_rate": 9.780035337361832e-06, "loss": 0.0037, "step": 56330 }, { "epoch": 0.37065057926488293, "grad_norm": 0.12818236341642644, "learning_rate": 9.779866894053489e-06, "loss": 0.0019, "step": 56340 }, { "epoch": 0.3707163674401163, "grad_norm": 0.08487002502974758, "learning_rate": 9.77969838772673e-06, "loss": 0.0021, "step": 56350 }, { "epoch": 0.3707821556153497, "grad_norm": 0.03179018363194542, "learning_rate": 9.779529818383777e-06, "loss": 0.0012, "step": 56360 }, { "epoch": 0.37084794379058306, "grad_norm": 0.15010934280274832, "learning_rate": 9.77936118602685e-06, "loss": 0.0023, "step": 56370 }, { "epoch": 0.37091373196581645, "grad_norm": 0.20347364812381882, "learning_rate": 9.779192490658174e-06, "loss": 0.0027, "step": 56380 }, { "epoch": 0.37097952014104985, "grad_norm": 0.06726192620650219, "learning_rate": 9.779023732279971e-06, "loss": 0.0026, "step": 56390 }, { "epoch": 0.37104530831628324, "grad_norm": 0.1716107381516121, "learning_rate": 9.778854910894469e-06, "loss": 0.0028, "step": 56400 }, { "epoch": 0.37111109649151663, "grad_norm": 0.198594565092621, "learning_rate": 9.778686026503891e-06, "loss": 0.0035, "step": 56410 }, { "epoch": 0.37117688466675, "grad_norm": 0.0507070861591518, "learning_rate": 9.778517079110466e-06, "loss": 0.0032, "step": 56420 }, { "epoch": 0.37124267284198337, "grad_norm": 0.04222407364741629, "learning_rate": 9.77834806871642e-06, "loss": 0.0018, "step": 56430 }, { "epoch": 0.37130846101721676, "grad_norm": 0.33338160835390673, "learning_rate": 9.778178995323982e-06, "loss": 0.0035, "step": 56440 }, { "epoch": 0.37137424919245016, "grad_norm": 0.05395368187126144, "learning_rate": 9.77800985893538e-06, "loss": 0.0034, "step": 56450 }, { "epoch": 0.37144003736768355, "grad_norm": 0.21396729179067706, "learning_rate": 9.777840659552846e-06, "loss": 0.0025, "step": 56460 }, { "epoch": 0.3715058255429169, "grad_norm": 0.07912325285586765, "learning_rate": 9.777671397178608e-06, "loss": 0.0012, "step": 56470 }, { "epoch": 0.3715716137181503, "grad_norm": 0.12757610819964926, "learning_rate": 9.777502071814899e-06, "loss": 0.0019, "step": 56480 }, { "epoch": 0.3716374018933837, "grad_norm": 0.04577887839065021, "learning_rate": 9.777332683463951e-06, "loss": 0.002, "step": 56490 }, { "epoch": 0.3717031900686171, "grad_norm": 0.06392103914572365, "learning_rate": 9.777163232127997e-06, "loss": 0.0019, "step": 56500 }, { "epoch": 0.37176897824385047, "grad_norm": 0.10558545461199494, "learning_rate": 9.776993717809273e-06, "loss": 0.0017, "step": 56510 }, { "epoch": 0.3718347664190838, "grad_norm": 0.26093756519369116, "learning_rate": 9.776824140510013e-06, "loss": 0.0016, "step": 56520 }, { "epoch": 0.3719005545943172, "grad_norm": 0.1391940009750583, "learning_rate": 9.776654500232451e-06, "loss": 0.0015, "step": 56530 }, { "epoch": 0.3719663427695506, "grad_norm": 0.05536798092839159, "learning_rate": 9.776484796978824e-06, "loss": 0.0019, "step": 56540 }, { "epoch": 0.372032130944784, "grad_norm": 0.1317191833519002, "learning_rate": 9.776315030751373e-06, "loss": 0.0022, "step": 56550 }, { "epoch": 0.3720979191200174, "grad_norm": 0.36683190575478725, "learning_rate": 9.776145201552331e-06, "loss": 0.0038, "step": 56560 }, { "epoch": 0.3721637072952508, "grad_norm": 0.2383510202634865, "learning_rate": 9.77597530938394e-06, "loss": 0.0034, "step": 56570 }, { "epoch": 0.3722294954704841, "grad_norm": 0.05983700271956146, "learning_rate": 9.77580535424844e-06, "loss": 0.004, "step": 56580 }, { "epoch": 0.3722952836457175, "grad_norm": 0.08433741253616048, "learning_rate": 9.77563533614807e-06, "loss": 0.0059, "step": 56590 }, { "epoch": 0.3723610718209509, "grad_norm": 0.005394074698754221, "learning_rate": 9.775465255085074e-06, "loss": 0.002, "step": 56600 }, { "epoch": 0.3724268599961843, "grad_norm": 0.13446869250011437, "learning_rate": 9.775295111061692e-06, "loss": 0.0046, "step": 56610 }, { "epoch": 0.3724926481714177, "grad_norm": 0.02832567910437748, "learning_rate": 9.775124904080169e-06, "loss": 0.0017, "step": 56620 }, { "epoch": 0.37255843634665103, "grad_norm": 0.04281833087900615, "learning_rate": 9.774954634142747e-06, "loss": 0.002, "step": 56630 }, { "epoch": 0.37262422452188443, "grad_norm": 0.02735476368846317, "learning_rate": 9.774784301251672e-06, "loss": 0.0025, "step": 56640 }, { "epoch": 0.3726900126971178, "grad_norm": 0.03534491898272837, "learning_rate": 9.774613905409191e-06, "loss": 0.0049, "step": 56650 }, { "epoch": 0.3727558008723512, "grad_norm": 0.01713037367140173, "learning_rate": 9.774443446617549e-06, "loss": 0.0034, "step": 56660 }, { "epoch": 0.3728215890475846, "grad_norm": 0.032485064694341745, "learning_rate": 9.774272924878994e-06, "loss": 0.0013, "step": 56670 }, { "epoch": 0.37288737722281795, "grad_norm": 0.31083874452822835, "learning_rate": 9.774102340195773e-06, "loss": 0.0029, "step": 56680 }, { "epoch": 0.37295316539805135, "grad_norm": 0.11228065086103428, "learning_rate": 9.773931692570136e-06, "loss": 0.0021, "step": 56690 }, { "epoch": 0.37301895357328474, "grad_norm": 0.21736473689233668, "learning_rate": 9.773760982004332e-06, "loss": 0.0014, "step": 56700 }, { "epoch": 0.37308474174851813, "grad_norm": 0.09999673668803524, "learning_rate": 9.773590208500613e-06, "loss": 0.0026, "step": 56710 }, { "epoch": 0.37315052992375153, "grad_norm": 0.0810652814006212, "learning_rate": 9.77341937206123e-06, "loss": 0.0017, "step": 56720 }, { "epoch": 0.37321631809898487, "grad_norm": 0.152865501404689, "learning_rate": 9.773248472688435e-06, "loss": 0.002, "step": 56730 }, { "epoch": 0.37328210627421826, "grad_norm": 0.02997190248436131, "learning_rate": 9.773077510384481e-06, "loss": 0.0022, "step": 56740 }, { "epoch": 0.37334789444945166, "grad_norm": 0.05695416323177018, "learning_rate": 9.772906485151623e-06, "loss": 0.0009, "step": 56750 }, { "epoch": 0.37341368262468505, "grad_norm": 0.06752115074919951, "learning_rate": 9.772735396992114e-06, "loss": 0.0025, "step": 56760 }, { "epoch": 0.37347947079991844, "grad_norm": 0.17160062988139196, "learning_rate": 9.772564245908212e-06, "loss": 0.0042, "step": 56770 }, { "epoch": 0.3735452589751518, "grad_norm": 0.03701496445887843, "learning_rate": 9.772393031902172e-06, "loss": 0.004, "step": 56780 }, { "epoch": 0.3736110471503852, "grad_norm": 0.11793479183840247, "learning_rate": 9.772221754976251e-06, "loss": 0.0018, "step": 56790 }, { "epoch": 0.3736768353256186, "grad_norm": 0.1355745815997037, "learning_rate": 9.772050415132708e-06, "loss": 0.0021, "step": 56800 }, { "epoch": 0.37374262350085197, "grad_norm": 0.057163273529578874, "learning_rate": 9.7718790123738e-06, "loss": 0.0029, "step": 56810 }, { "epoch": 0.37380841167608536, "grad_norm": 0.031130979693083035, "learning_rate": 9.771707546701792e-06, "loss": 0.0032, "step": 56820 }, { "epoch": 0.3738741998513187, "grad_norm": 0.04587976384771032, "learning_rate": 9.77153601811894e-06, "loss": 0.0017, "step": 56830 }, { "epoch": 0.3739399880265521, "grad_norm": 0.07929733208125449, "learning_rate": 9.771364426627506e-06, "loss": 0.0015, "step": 56840 }, { "epoch": 0.3740057762017855, "grad_norm": 0.11080761467149121, "learning_rate": 9.771192772229754e-06, "loss": 0.0015, "step": 56850 }, { "epoch": 0.3740715643770189, "grad_norm": 0.0733281957542529, "learning_rate": 9.771021054927945e-06, "loss": 0.0041, "step": 56860 }, { "epoch": 0.3741373525522523, "grad_norm": 0.04736544650633704, "learning_rate": 9.770849274724344e-06, "loss": 0.003, "step": 56870 }, { "epoch": 0.3742031407274856, "grad_norm": 0.139013145136021, "learning_rate": 9.770677431621213e-06, "loss": 0.0066, "step": 56880 }, { "epoch": 0.374268928902719, "grad_norm": 0.0939950347031161, "learning_rate": 9.770505525620823e-06, "loss": 0.002, "step": 56890 }, { "epoch": 0.3743347170779524, "grad_norm": 0.07645332014876921, "learning_rate": 9.77033355672544e-06, "loss": 0.0023, "step": 56900 }, { "epoch": 0.3744005052531858, "grad_norm": 0.040437649370702704, "learning_rate": 9.770161524937326e-06, "loss": 0.002, "step": 56910 }, { "epoch": 0.3744662934284192, "grad_norm": 0.19249042891362272, "learning_rate": 9.769989430258752e-06, "loss": 0.0029, "step": 56920 }, { "epoch": 0.37453208160365253, "grad_norm": 0.04600528774042336, "learning_rate": 9.769817272691987e-06, "loss": 0.0033, "step": 56930 }, { "epoch": 0.37459786977888593, "grad_norm": 0.00923049834152819, "learning_rate": 9.769645052239302e-06, "loss": 0.0028, "step": 56940 }, { "epoch": 0.3746636579541193, "grad_norm": 0.02320864290876605, "learning_rate": 9.769472768902966e-06, "loss": 0.0019, "step": 56950 }, { "epoch": 0.3747294461293527, "grad_norm": 0.061854348866559196, "learning_rate": 9.76930042268525e-06, "loss": 0.0012, "step": 56960 }, { "epoch": 0.3747952343045861, "grad_norm": 0.1201954371881394, "learning_rate": 9.769128013588427e-06, "loss": 0.0038, "step": 56970 }, { "epoch": 0.37486102247981945, "grad_norm": 0.0531782485492515, "learning_rate": 9.76895554161477e-06, "loss": 0.0022, "step": 56980 }, { "epoch": 0.37492681065505284, "grad_norm": 0.06021848465598847, "learning_rate": 9.768783006766554e-06, "loss": 0.002, "step": 56990 }, { "epoch": 0.37499259883028624, "grad_norm": 0.21339901254609236, "learning_rate": 9.768610409046051e-06, "loss": 0.0029, "step": 57000 }, { "epoch": 0.37505838700551963, "grad_norm": 0.08102270540562626, "learning_rate": 9.76843774845554e-06, "loss": 0.0032, "step": 57010 }, { "epoch": 0.37512417518075303, "grad_norm": 0.10524206519509222, "learning_rate": 9.768265024997294e-06, "loss": 0.0028, "step": 57020 }, { "epoch": 0.3751899633559864, "grad_norm": 0.050034712671669965, "learning_rate": 9.768092238673592e-06, "loss": 0.0014, "step": 57030 }, { "epoch": 0.37525575153121976, "grad_norm": 0.04574887408678057, "learning_rate": 9.767919389486714e-06, "loss": 0.0011, "step": 57040 }, { "epoch": 0.37532153970645316, "grad_norm": 0.040563839440295894, "learning_rate": 9.767746477438934e-06, "loss": 0.0011, "step": 57050 }, { "epoch": 0.37538732788168655, "grad_norm": 0.05050923149129661, "learning_rate": 9.767573502532534e-06, "loss": 0.0016, "step": 57060 }, { "epoch": 0.37545311605691994, "grad_norm": 0.055927375762098996, "learning_rate": 9.767400464769798e-06, "loss": 0.002, "step": 57070 }, { "epoch": 0.37551890423215334, "grad_norm": 0.050991306817526824, "learning_rate": 9.767227364153002e-06, "loss": 0.0024, "step": 57080 }, { "epoch": 0.3755846924073867, "grad_norm": 0.09323790434841062, "learning_rate": 9.76705420068443e-06, "loss": 0.0027, "step": 57090 }, { "epoch": 0.37565048058262007, "grad_norm": 0.14961448373737551, "learning_rate": 9.766880974366367e-06, "loss": 0.0035, "step": 57100 }, { "epoch": 0.37571626875785347, "grad_norm": 0.06293404433519595, "learning_rate": 9.766707685201096e-06, "loss": 0.0022, "step": 57110 }, { "epoch": 0.37578205693308686, "grad_norm": 0.04142222603672003, "learning_rate": 9.766534333190898e-06, "loss": 0.0012, "step": 57120 }, { "epoch": 0.37584784510832026, "grad_norm": 0.08144613845860417, "learning_rate": 9.766360918338064e-06, "loss": 0.0033, "step": 57130 }, { "epoch": 0.3759136332835536, "grad_norm": 0.048446093799509246, "learning_rate": 9.766187440644877e-06, "loss": 0.0019, "step": 57140 }, { "epoch": 0.375979421458787, "grad_norm": 0.05015383949815469, "learning_rate": 9.766013900113624e-06, "loss": 0.0033, "step": 57150 }, { "epoch": 0.3760452096340204, "grad_norm": 0.06782350070397176, "learning_rate": 9.765840296746595e-06, "loss": 0.0031, "step": 57160 }, { "epoch": 0.3761109978092538, "grad_norm": 0.07647146341252861, "learning_rate": 9.765666630546079e-06, "loss": 0.0018, "step": 57170 }, { "epoch": 0.37617678598448717, "grad_norm": 0.02271864745267687, "learning_rate": 9.765492901514362e-06, "loss": 0.0022, "step": 57180 }, { "epoch": 0.3762425741597205, "grad_norm": 0.07336775447225433, "learning_rate": 9.765319109653738e-06, "loss": 0.0024, "step": 57190 }, { "epoch": 0.3763083623349539, "grad_norm": 0.05290148896115566, "learning_rate": 9.765145254966498e-06, "loss": 0.0018, "step": 57200 }, { "epoch": 0.3763741505101873, "grad_norm": 0.018562755178749123, "learning_rate": 9.764971337454932e-06, "loss": 0.0019, "step": 57210 }, { "epoch": 0.3764399386854207, "grad_norm": 0.07350167034718494, "learning_rate": 9.764797357121334e-06, "loss": 0.0022, "step": 57220 }, { "epoch": 0.3765057268606541, "grad_norm": 0.07192317426324801, "learning_rate": 9.764623313967998e-06, "loss": 0.0024, "step": 57230 }, { "epoch": 0.3765715150358874, "grad_norm": 0.03589060129572861, "learning_rate": 9.764449207997219e-06, "loss": 0.0025, "step": 57240 }, { "epoch": 0.3766373032111208, "grad_norm": 0.035727131588046995, "learning_rate": 9.764275039211292e-06, "loss": 0.0026, "step": 57250 }, { "epoch": 0.3767030913863542, "grad_norm": 0.0744835815555461, "learning_rate": 9.764100807612514e-06, "loss": 0.0021, "step": 57260 }, { "epoch": 0.3767688795615876, "grad_norm": 0.014586679195806381, "learning_rate": 9.763926513203181e-06, "loss": 0.0019, "step": 57270 }, { "epoch": 0.376834667736821, "grad_norm": 0.08117033171634905, "learning_rate": 9.763752155985591e-06, "loss": 0.0019, "step": 57280 }, { "epoch": 0.37690045591205434, "grad_norm": 0.1385471790365447, "learning_rate": 9.763577735962042e-06, "loss": 0.0057, "step": 57290 }, { "epoch": 0.37696624408728774, "grad_norm": 0.07063120858830757, "learning_rate": 9.763403253134836e-06, "loss": 0.0019, "step": 57300 }, { "epoch": 0.37703203226252113, "grad_norm": 0.035636592809350456, "learning_rate": 9.763228707506272e-06, "loss": 0.0028, "step": 57310 }, { "epoch": 0.3770978204377545, "grad_norm": 0.09986396966253015, "learning_rate": 9.76305409907865e-06, "loss": 0.0021, "step": 57320 }, { "epoch": 0.3771636086129879, "grad_norm": 0.08330332831974717, "learning_rate": 9.762879427854275e-06, "loss": 0.0023, "step": 57330 }, { "epoch": 0.37722939678822126, "grad_norm": 0.17126941020579486, "learning_rate": 9.762704693835448e-06, "loss": 0.0026, "step": 57340 }, { "epoch": 0.37729518496345466, "grad_norm": 0.23229627767699293, "learning_rate": 9.762529897024474e-06, "loss": 0.0043, "step": 57350 }, { "epoch": 0.37736097313868805, "grad_norm": 0.150894639455051, "learning_rate": 9.762355037423654e-06, "loss": 0.0018, "step": 57360 }, { "epoch": 0.37742676131392144, "grad_norm": 0.023121278544648904, "learning_rate": 9.762180115035299e-06, "loss": 0.0019, "step": 57370 }, { "epoch": 0.37749254948915484, "grad_norm": 0.02185614143933271, "learning_rate": 9.762005129861709e-06, "loss": 0.0044, "step": 57380 }, { "epoch": 0.3775583376643882, "grad_norm": 0.00271963142261104, "learning_rate": 9.761830081905195e-06, "loss": 0.003, "step": 57390 }, { "epoch": 0.37762412583962157, "grad_norm": 0.01654941545170305, "learning_rate": 9.761654971168064e-06, "loss": 0.002, "step": 57400 }, { "epoch": 0.37768991401485497, "grad_norm": 0.0918039602233061, "learning_rate": 9.761479797652624e-06, "loss": 0.0015, "step": 57410 }, { "epoch": 0.37775570219008836, "grad_norm": 0.07382157529807275, "learning_rate": 9.761304561361188e-06, "loss": 0.0018, "step": 57420 }, { "epoch": 0.37782149036532175, "grad_norm": 0.472807561210671, "learning_rate": 9.761129262296061e-06, "loss": 0.0018, "step": 57430 }, { "epoch": 0.3778872785405551, "grad_norm": 0.018428092425667314, "learning_rate": 9.760953900459555e-06, "loss": 0.0025, "step": 57440 }, { "epoch": 0.3779530667157885, "grad_norm": 0.05936766598988743, "learning_rate": 9.760778475853987e-06, "loss": 0.0018, "step": 57450 }, { "epoch": 0.3780188548910219, "grad_norm": 0.01524551495397518, "learning_rate": 9.760602988481664e-06, "loss": 0.0015, "step": 57460 }, { "epoch": 0.3780846430662553, "grad_norm": 0.0335179777372221, "learning_rate": 9.760427438344905e-06, "loss": 0.0025, "step": 57470 }, { "epoch": 0.37815043124148867, "grad_norm": 0.1062678312878353, "learning_rate": 9.76025182544602e-06, "loss": 0.0021, "step": 57480 }, { "epoch": 0.37821621941672207, "grad_norm": 0.10805968690736256, "learning_rate": 9.760076149787325e-06, "loss": 0.0018, "step": 57490 }, { "epoch": 0.3782820075919554, "grad_norm": 0.08231533165588921, "learning_rate": 9.759900411371139e-06, "loss": 0.0015, "step": 57500 }, { "epoch": 0.3783477957671888, "grad_norm": 0.04594666161549857, "learning_rate": 9.759724610199775e-06, "loss": 0.0031, "step": 57510 }, { "epoch": 0.3784135839424222, "grad_norm": 0.08773430288702375, "learning_rate": 9.759548746275554e-06, "loss": 0.003, "step": 57520 }, { "epoch": 0.3784793721176556, "grad_norm": 0.08489235758033908, "learning_rate": 9.759372819600793e-06, "loss": 0.0018, "step": 57530 }, { "epoch": 0.378545160292889, "grad_norm": 0.09395472895370831, "learning_rate": 9.759196830177812e-06, "loss": 0.0017, "step": 57540 }, { "epoch": 0.3786109484681223, "grad_norm": 0.09606728265530849, "learning_rate": 9.759020778008931e-06, "loss": 0.0016, "step": 57550 }, { "epoch": 0.3786767366433557, "grad_norm": 0.05579894245887483, "learning_rate": 9.75884466309647e-06, "loss": 0.0011, "step": 57560 }, { "epoch": 0.3787425248185891, "grad_norm": 0.026552349111500086, "learning_rate": 9.758668485442754e-06, "loss": 0.0011, "step": 57570 }, { "epoch": 0.3788083129938225, "grad_norm": 0.07096715036925633, "learning_rate": 9.758492245050104e-06, "loss": 0.0023, "step": 57580 }, { "epoch": 0.3788741011690559, "grad_norm": 0.10419578970439139, "learning_rate": 9.758315941920841e-06, "loss": 0.0015, "step": 57590 }, { "epoch": 0.37893988934428924, "grad_norm": 0.0872129430457712, "learning_rate": 9.758139576057294e-06, "loss": 0.0036, "step": 57600 }, { "epoch": 0.37900567751952263, "grad_norm": 0.041431989169999316, "learning_rate": 9.757963147461786e-06, "loss": 0.0013, "step": 57610 }, { "epoch": 0.379071465694756, "grad_norm": 0.08679766277305273, "learning_rate": 9.757786656136642e-06, "loss": 0.0051, "step": 57620 }, { "epoch": 0.3791372538699894, "grad_norm": 0.057643643574995365, "learning_rate": 9.757610102084193e-06, "loss": 0.0029, "step": 57630 }, { "epoch": 0.3792030420452228, "grad_norm": 0.00485978416836387, "learning_rate": 9.757433485306759e-06, "loss": 0.0019, "step": 57640 }, { "epoch": 0.37926883022045615, "grad_norm": 0.15074812300551915, "learning_rate": 9.757256805806677e-06, "loss": 0.0026, "step": 57650 }, { "epoch": 0.37933461839568955, "grad_norm": 0.011081075232927675, "learning_rate": 9.757080063586273e-06, "loss": 0.0042, "step": 57660 }, { "epoch": 0.37940040657092294, "grad_norm": 0.1109859353581008, "learning_rate": 9.756903258647875e-06, "loss": 0.0015, "step": 57670 }, { "epoch": 0.37946619474615634, "grad_norm": 0.14413525281200684, "learning_rate": 9.756726390993816e-06, "loss": 0.0022, "step": 57680 }, { "epoch": 0.37953198292138973, "grad_norm": 0.09982020953168268, "learning_rate": 9.756549460626427e-06, "loss": 0.0048, "step": 57690 }, { "epoch": 0.37959777109662307, "grad_norm": 0.06619015768011934, "learning_rate": 9.756372467548042e-06, "loss": 0.0044, "step": 57700 }, { "epoch": 0.37966355927185647, "grad_norm": 0.03228629926985058, "learning_rate": 9.756195411760994e-06, "loss": 0.0034, "step": 57710 }, { "epoch": 0.37972934744708986, "grad_norm": 0.028620044357369177, "learning_rate": 9.756018293267618e-06, "loss": 0.0021, "step": 57720 }, { "epoch": 0.37979513562232325, "grad_norm": 0.14702827618449824, "learning_rate": 9.755841112070248e-06, "loss": 0.0023, "step": 57730 }, { "epoch": 0.37986092379755665, "grad_norm": 0.26519862383060816, "learning_rate": 9.75566386817122e-06, "loss": 0.0027, "step": 57740 }, { "epoch": 0.37992671197279, "grad_norm": 0.14834135036544224, "learning_rate": 9.755486561572871e-06, "loss": 0.0018, "step": 57750 }, { "epoch": 0.3799925001480234, "grad_norm": 0.013382090528434667, "learning_rate": 9.755309192277539e-06, "loss": 0.0015, "step": 57760 }, { "epoch": 0.3800582883232568, "grad_norm": 0.06284302825029191, "learning_rate": 9.755131760287562e-06, "loss": 0.0011, "step": 57770 }, { "epoch": 0.38012407649849017, "grad_norm": 0.05933054465155157, "learning_rate": 9.75495426560528e-06, "loss": 0.0012, "step": 57780 }, { "epoch": 0.38018986467372357, "grad_norm": 0.030794371439530704, "learning_rate": 9.754776708233032e-06, "loss": 0.0015, "step": 57790 }, { "epoch": 0.3802556528489569, "grad_norm": 0.08255249587086355, "learning_rate": 9.75459908817316e-06, "loss": 0.0026, "step": 57800 }, { "epoch": 0.3803214410241903, "grad_norm": 0.052083727032416455, "learning_rate": 9.754421405428006e-06, "loss": 0.0011, "step": 57810 }, { "epoch": 0.3803872291994237, "grad_norm": 0.15663178707188097, "learning_rate": 9.754243659999912e-06, "loss": 0.0025, "step": 57820 }, { "epoch": 0.3804530173746571, "grad_norm": 0.04552927421907448, "learning_rate": 9.75406585189122e-06, "loss": 0.0019, "step": 57830 }, { "epoch": 0.3805188055498905, "grad_norm": 0.05952452976302099, "learning_rate": 9.753887981104276e-06, "loss": 0.0015, "step": 57840 }, { "epoch": 0.3805845937251238, "grad_norm": 0.06661971313017852, "learning_rate": 9.753710047641424e-06, "loss": 0.0015, "step": 57850 }, { "epoch": 0.3806503819003572, "grad_norm": 0.08491515590160989, "learning_rate": 9.753532051505012e-06, "loss": 0.0023, "step": 57860 }, { "epoch": 0.3807161700755906, "grad_norm": 0.10824236987879822, "learning_rate": 9.753353992697386e-06, "loss": 0.0017, "step": 57870 }, { "epoch": 0.380781958250824, "grad_norm": 0.09192890564256948, "learning_rate": 9.753175871220893e-06, "loss": 0.0023, "step": 57880 }, { "epoch": 0.3808477464260574, "grad_norm": 0.17132618098747715, "learning_rate": 9.752997687077878e-06, "loss": 0.0025, "step": 57890 }, { "epoch": 0.38091353460129074, "grad_norm": 0.021768204617180497, "learning_rate": 9.752819440270698e-06, "loss": 0.0027, "step": 57900 }, { "epoch": 0.38097932277652413, "grad_norm": 0.17973725171219074, "learning_rate": 9.752641130801695e-06, "loss": 0.0034, "step": 57910 }, { "epoch": 0.3810451109517575, "grad_norm": 0.22789402393108515, "learning_rate": 9.752462758673224e-06, "loss": 0.0021, "step": 57920 }, { "epoch": 0.3811108991269909, "grad_norm": 0.02672793514627443, "learning_rate": 9.752284323887638e-06, "loss": 0.0012, "step": 57930 }, { "epoch": 0.3811766873022243, "grad_norm": 0.17378036191568236, "learning_rate": 9.752105826447285e-06, "loss": 0.0034, "step": 57940 }, { "epoch": 0.38124247547745765, "grad_norm": 0.10684926207720996, "learning_rate": 9.751927266354522e-06, "loss": 0.002, "step": 57950 }, { "epoch": 0.38130826365269105, "grad_norm": 0.054709764218731596, "learning_rate": 9.751748643611701e-06, "loss": 0.0021, "step": 57960 }, { "epoch": 0.38137405182792444, "grad_norm": 0.0763751785939203, "learning_rate": 9.751569958221179e-06, "loss": 0.003, "step": 57970 }, { "epoch": 0.38143984000315784, "grad_norm": 0.018338535167009194, "learning_rate": 9.751391210185312e-06, "loss": 0.0028, "step": 57980 }, { "epoch": 0.38150562817839123, "grad_norm": 0.04936094715565829, "learning_rate": 9.751212399506453e-06, "loss": 0.0025, "step": 57990 }, { "epoch": 0.3815714163536246, "grad_norm": 0.06495810343851233, "learning_rate": 9.751033526186964e-06, "loss": 0.0025, "step": 58000 }, { "epoch": 0.38163720452885797, "grad_norm": 0.3015916130291499, "learning_rate": 9.7508545902292e-06, "loss": 0.0025, "step": 58010 }, { "epoch": 0.38170299270409136, "grad_norm": 0.15210264886512806, "learning_rate": 9.750675591635522e-06, "loss": 0.0031, "step": 58020 }, { "epoch": 0.38176878087932475, "grad_norm": 0.08930416430017656, "learning_rate": 9.75049653040829e-06, "loss": 0.0035, "step": 58030 }, { "epoch": 0.38183456905455815, "grad_norm": 0.09875449239114839, "learning_rate": 9.75031740654986e-06, "loss": 0.0028, "step": 58040 }, { "epoch": 0.38190035722979154, "grad_norm": 0.14256511110760256, "learning_rate": 9.750138220062601e-06, "loss": 0.0025, "step": 58050 }, { "epoch": 0.3819661454050249, "grad_norm": 0.09270268041857962, "learning_rate": 9.749958970948872e-06, "loss": 0.0022, "step": 58060 }, { "epoch": 0.3820319335802583, "grad_norm": 0.05180456471042304, "learning_rate": 9.749779659211037e-06, "loss": 0.0024, "step": 58070 }, { "epoch": 0.38209772175549167, "grad_norm": 0.09716141688803782, "learning_rate": 9.749600284851457e-06, "loss": 0.0031, "step": 58080 }, { "epoch": 0.38216350993072506, "grad_norm": 0.05169618736989479, "learning_rate": 9.7494208478725e-06, "loss": 0.0014, "step": 58090 }, { "epoch": 0.38222929810595846, "grad_norm": 0.03918756089188938, "learning_rate": 9.749241348276533e-06, "loss": 0.0016, "step": 58100 }, { "epoch": 0.3822950862811918, "grad_norm": 0.023229924754789053, "learning_rate": 9.749061786065919e-06, "loss": 0.003, "step": 58110 }, { "epoch": 0.3823608744564252, "grad_norm": 0.08815470477916738, "learning_rate": 9.748882161243028e-06, "loss": 0.0047, "step": 58120 }, { "epoch": 0.3824266626316586, "grad_norm": 0.04246673324627817, "learning_rate": 9.748702473810225e-06, "loss": 0.0023, "step": 58130 }, { "epoch": 0.382492450806892, "grad_norm": 0.1200767842884016, "learning_rate": 9.748522723769882e-06, "loss": 0.0031, "step": 58140 }, { "epoch": 0.3825582389821254, "grad_norm": 0.06708456946808594, "learning_rate": 9.748342911124369e-06, "loss": 0.002, "step": 58150 }, { "epoch": 0.3826240271573587, "grad_norm": 0.13345544465899697, "learning_rate": 9.748163035876055e-06, "loss": 0.0029, "step": 58160 }, { "epoch": 0.3826898153325921, "grad_norm": 0.06802923477567002, "learning_rate": 9.747983098027312e-06, "loss": 0.0018, "step": 58170 }, { "epoch": 0.3827556035078255, "grad_norm": 0.31040096726936994, "learning_rate": 9.747803097580513e-06, "loss": 0.0034, "step": 58180 }, { "epoch": 0.3828213916830589, "grad_norm": 0.03963406772890336, "learning_rate": 9.74762303453803e-06, "loss": 0.002, "step": 58190 }, { "epoch": 0.3828871798582923, "grad_norm": 0.03443374910147443, "learning_rate": 9.747442908902237e-06, "loss": 0.0026, "step": 58200 }, { "epoch": 0.38295296803352563, "grad_norm": 0.09354938474488805, "learning_rate": 9.74726272067551e-06, "loss": 0.0049, "step": 58210 }, { "epoch": 0.383018756208759, "grad_norm": 0.01751225928568085, "learning_rate": 9.747082469860226e-06, "loss": 0.001, "step": 58220 }, { "epoch": 0.3830845443839924, "grad_norm": 0.09990543787340764, "learning_rate": 9.746902156458756e-06, "loss": 0.0029, "step": 58230 }, { "epoch": 0.3831503325592258, "grad_norm": 0.048217489262100635, "learning_rate": 9.746721780473483e-06, "loss": 0.0036, "step": 58240 }, { "epoch": 0.3832161207344592, "grad_norm": 0.14153120495564575, "learning_rate": 9.746541341906782e-06, "loss": 0.003, "step": 58250 }, { "epoch": 0.38328190890969255, "grad_norm": 0.18746332640931188, "learning_rate": 9.746360840761033e-06, "loss": 0.0037, "step": 58260 }, { "epoch": 0.38334769708492594, "grad_norm": 0.03395984060461639, "learning_rate": 9.746180277038616e-06, "loss": 0.0024, "step": 58270 }, { "epoch": 0.38341348526015934, "grad_norm": 0.13698587262622386, "learning_rate": 9.74599965074191e-06, "loss": 0.0031, "step": 58280 }, { "epoch": 0.38347927343539273, "grad_norm": 0.09799118539824435, "learning_rate": 9.745818961873298e-06, "loss": 0.0014, "step": 58290 }, { "epoch": 0.3835450616106261, "grad_norm": 0.06771084805332646, "learning_rate": 9.745638210435164e-06, "loss": 0.0012, "step": 58300 }, { "epoch": 0.38361084978585946, "grad_norm": 0.08098736763461722, "learning_rate": 9.745457396429885e-06, "loss": 0.0022, "step": 58310 }, { "epoch": 0.38367663796109286, "grad_norm": 0.05727796051456784, "learning_rate": 9.745276519859852e-06, "loss": 0.0018, "step": 58320 }, { "epoch": 0.38374242613632625, "grad_norm": 0.059509802052449164, "learning_rate": 9.745095580727444e-06, "loss": 0.0017, "step": 58330 }, { "epoch": 0.38380821431155965, "grad_norm": 0.02903371763844834, "learning_rate": 9.74491457903505e-06, "loss": 0.0041, "step": 58340 }, { "epoch": 0.38387400248679304, "grad_norm": 0.043366526664327415, "learning_rate": 9.744733514785053e-06, "loss": 0.0017, "step": 58350 }, { "epoch": 0.3839397906620264, "grad_norm": 0.11037309622883991, "learning_rate": 9.744552387979847e-06, "loss": 0.0009, "step": 58360 }, { "epoch": 0.3840055788372598, "grad_norm": 0.5239519894791438, "learning_rate": 9.744371198621811e-06, "loss": 0.0031, "step": 58370 }, { "epoch": 0.38407136701249317, "grad_norm": 0.1270852928254179, "learning_rate": 9.744189946713342e-06, "loss": 0.002, "step": 58380 }, { "epoch": 0.38413715518772656, "grad_norm": 0.05547845451033962, "learning_rate": 9.744008632256822e-06, "loss": 0.0025, "step": 58390 }, { "epoch": 0.38420294336295996, "grad_norm": 0.08475512539858646, "learning_rate": 9.74382725525465e-06, "loss": 0.003, "step": 58400 }, { "epoch": 0.3842687315381933, "grad_norm": 0.10614148626631836, "learning_rate": 9.743645815709209e-06, "loss": 0.0033, "step": 58410 }, { "epoch": 0.3843345197134267, "grad_norm": 0.41739471310878246, "learning_rate": 9.743464313622896e-06, "loss": 0.0023, "step": 58420 }, { "epoch": 0.3844003078886601, "grad_norm": 0.11637161543909551, "learning_rate": 9.743282748998102e-06, "loss": 0.0029, "step": 58430 }, { "epoch": 0.3844660960638935, "grad_norm": 0.05741670541853299, "learning_rate": 9.743101121837224e-06, "loss": 0.0022, "step": 58440 }, { "epoch": 0.3845318842391269, "grad_norm": 0.1531495491437679, "learning_rate": 9.74291943214265e-06, "loss": 0.0022, "step": 58450 }, { "epoch": 0.38459767241436027, "grad_norm": 0.026970694468357127, "learning_rate": 9.742737679916783e-06, "loss": 0.0015, "step": 58460 }, { "epoch": 0.3846634605895936, "grad_norm": 0.04038084741868354, "learning_rate": 9.742555865162015e-06, "loss": 0.0017, "step": 58470 }, { "epoch": 0.384729248764827, "grad_norm": 0.0343662768624619, "learning_rate": 9.742373987880741e-06, "loss": 0.002, "step": 58480 }, { "epoch": 0.3847950369400604, "grad_norm": 0.01929725676994019, "learning_rate": 9.742192048075367e-06, "loss": 0.0012, "step": 58490 }, { "epoch": 0.3848608251152938, "grad_norm": 0.03132569662243113, "learning_rate": 9.742010045748283e-06, "loss": 0.0016, "step": 58500 }, { "epoch": 0.3849266132905272, "grad_norm": 0.10406550263456733, "learning_rate": 9.741827980901893e-06, "loss": 0.0022, "step": 58510 }, { "epoch": 0.3849924014657605, "grad_norm": 0.13373166482266233, "learning_rate": 9.741645853538595e-06, "loss": 0.0026, "step": 58520 }, { "epoch": 0.3850581896409939, "grad_norm": 0.13996741687938546, "learning_rate": 9.741463663660792e-06, "loss": 0.0027, "step": 58530 }, { "epoch": 0.3851239778162273, "grad_norm": 0.04688724389197585, "learning_rate": 9.741281411270887e-06, "loss": 0.0016, "step": 58540 }, { "epoch": 0.3851897659914607, "grad_norm": 0.03176456574254529, "learning_rate": 9.74109909637128e-06, "loss": 0.0019, "step": 58550 }, { "epoch": 0.3852555541666941, "grad_norm": 0.7485691573356724, "learning_rate": 9.740916718964377e-06, "loss": 0.0021, "step": 58560 }, { "epoch": 0.38532134234192744, "grad_norm": 0.1042182798357786, "learning_rate": 9.740734279052581e-06, "loss": 0.002, "step": 58570 }, { "epoch": 0.38538713051716084, "grad_norm": 0.1876814199805139, "learning_rate": 9.7405517766383e-06, "loss": 0.0033, "step": 58580 }, { "epoch": 0.38545291869239423, "grad_norm": 0.0942303275534602, "learning_rate": 9.740369211723936e-06, "loss": 0.0051, "step": 58590 }, { "epoch": 0.3855187068676276, "grad_norm": 0.07729786754185008, "learning_rate": 9.740186584311898e-06, "loss": 0.0028, "step": 58600 }, { "epoch": 0.385584495042861, "grad_norm": 0.03509653050342239, "learning_rate": 9.740003894404594e-06, "loss": 0.0013, "step": 58610 }, { "epoch": 0.38565028321809436, "grad_norm": 0.021041374695784487, "learning_rate": 9.739821142004434e-06, "loss": 0.006, "step": 58620 }, { "epoch": 0.38571607139332775, "grad_norm": 0.1130975108188768, "learning_rate": 9.739638327113822e-06, "loss": 0.002, "step": 58630 }, { "epoch": 0.38578185956856115, "grad_norm": 0.08174942768794986, "learning_rate": 9.739455449735176e-06, "loss": 0.0038, "step": 58640 }, { "epoch": 0.38584764774379454, "grad_norm": 0.2193418486968196, "learning_rate": 9.7392725098709e-06, "loss": 0.0022, "step": 58650 }, { "epoch": 0.38591343591902794, "grad_norm": 0.12045741082920097, "learning_rate": 9.739089507523412e-06, "loss": 0.0055, "step": 58660 }, { "epoch": 0.3859792240942613, "grad_norm": 0.15364722951347556, "learning_rate": 9.73890644269512e-06, "loss": 0.004, "step": 58670 }, { "epoch": 0.38604501226949467, "grad_norm": 0.22916667350094752, "learning_rate": 9.738723315388439e-06, "loss": 0.0062, "step": 58680 }, { "epoch": 0.38611080044472806, "grad_norm": 0.025659028455333393, "learning_rate": 9.738540125605785e-06, "loss": 0.0012, "step": 58690 }, { "epoch": 0.38617658861996146, "grad_norm": 0.03471126109827874, "learning_rate": 9.738356873349571e-06, "loss": 0.002, "step": 58700 }, { "epoch": 0.38624237679519485, "grad_norm": 0.018582973797064556, "learning_rate": 9.738173558622215e-06, "loss": 0.0026, "step": 58710 }, { "epoch": 0.3863081649704282, "grad_norm": 0.042584465334730946, "learning_rate": 9.737990181426131e-06, "loss": 0.0015, "step": 58720 }, { "epoch": 0.3863739531456616, "grad_norm": 0.12243451509409127, "learning_rate": 9.73780674176374e-06, "loss": 0.002, "step": 58730 }, { "epoch": 0.386439741320895, "grad_norm": 0.12405198413379805, "learning_rate": 9.737623239637458e-06, "loss": 0.0017, "step": 58740 }, { "epoch": 0.3865055294961284, "grad_norm": 0.03647469865676409, "learning_rate": 9.737439675049707e-06, "loss": 0.0014, "step": 58750 }, { "epoch": 0.38657131767136177, "grad_norm": 0.12895880942441001, "learning_rate": 9.737256048002903e-06, "loss": 0.0022, "step": 58760 }, { "epoch": 0.3866371058465951, "grad_norm": 0.13424197378201236, "learning_rate": 9.73707235849947e-06, "loss": 0.0016, "step": 58770 }, { "epoch": 0.3867028940218285, "grad_norm": 0.1206538518540227, "learning_rate": 9.73688860654183e-06, "loss": 0.0022, "step": 58780 }, { "epoch": 0.3867686821970619, "grad_norm": 0.03183376272953167, "learning_rate": 9.736704792132406e-06, "loss": 0.0025, "step": 58790 }, { "epoch": 0.3868344703722953, "grad_norm": 0.038369970227114934, "learning_rate": 9.736520915273617e-06, "loss": 0.0014, "step": 58800 }, { "epoch": 0.3869002585475287, "grad_norm": 0.1414658642243639, "learning_rate": 9.736336975967893e-06, "loss": 0.0023, "step": 58810 }, { "epoch": 0.386966046722762, "grad_norm": 0.057583218253651064, "learning_rate": 9.736152974217655e-06, "loss": 0.0024, "step": 58820 }, { "epoch": 0.3870318348979954, "grad_norm": 0.08135091743468104, "learning_rate": 9.735968910025331e-06, "loss": 0.0016, "step": 58830 }, { "epoch": 0.3870976230732288, "grad_norm": 0.054559265298169735, "learning_rate": 9.735784783393346e-06, "loss": 0.0025, "step": 58840 }, { "epoch": 0.3871634112484622, "grad_norm": 0.036929000046883154, "learning_rate": 9.73560059432413e-06, "loss": 0.0029, "step": 58850 }, { "epoch": 0.3872291994236956, "grad_norm": 0.03618456068810122, "learning_rate": 9.735416342820113e-06, "loss": 0.0024, "step": 58860 }, { "epoch": 0.38729498759892894, "grad_norm": 0.02663105424689158, "learning_rate": 9.735232028883717e-06, "loss": 0.0015, "step": 58870 }, { "epoch": 0.38736077577416234, "grad_norm": 0.10518298062066836, "learning_rate": 9.735047652517378e-06, "loss": 0.0023, "step": 58880 }, { "epoch": 0.38742656394939573, "grad_norm": 0.06253749292437231, "learning_rate": 9.734863213723524e-06, "loss": 0.0029, "step": 58890 }, { "epoch": 0.3874923521246291, "grad_norm": 0.03630902028813155, "learning_rate": 9.734678712504588e-06, "loss": 0.0014, "step": 58900 }, { "epoch": 0.3875581402998625, "grad_norm": 0.05386078349928609, "learning_rate": 9.734494148863005e-06, "loss": 0.0021, "step": 58910 }, { "epoch": 0.3876239284750959, "grad_norm": 0.009179899993911769, "learning_rate": 9.734309522801202e-06, "loss": 0.0016, "step": 58920 }, { "epoch": 0.38768971665032925, "grad_norm": 0.0603338057352067, "learning_rate": 9.734124834321618e-06, "loss": 0.0033, "step": 58930 }, { "epoch": 0.38775550482556265, "grad_norm": 0.08127283769306343, "learning_rate": 9.733940083426687e-06, "loss": 0.0073, "step": 58940 }, { "epoch": 0.38782129300079604, "grad_norm": 1.1453546951118483, "learning_rate": 9.733755270118846e-06, "loss": 0.0021, "step": 58950 }, { "epoch": 0.38788708117602944, "grad_norm": 0.10467676297130797, "learning_rate": 9.733570394400528e-06, "loss": 0.0024, "step": 58960 }, { "epoch": 0.38795286935126283, "grad_norm": 0.20985221817917166, "learning_rate": 9.733385456274173e-06, "loss": 0.0023, "step": 58970 }, { "epoch": 0.38801865752649617, "grad_norm": 0.08417473321565928, "learning_rate": 9.733200455742218e-06, "loss": 0.0024, "step": 58980 }, { "epoch": 0.38808444570172956, "grad_norm": 0.03399947355422506, "learning_rate": 9.733015392807105e-06, "loss": 0.0029, "step": 58990 }, { "epoch": 0.38815023387696296, "grad_norm": 0.02824046225474605, "learning_rate": 9.73283026747127e-06, "loss": 0.0025, "step": 59000 }, { "epoch": 0.38821602205219635, "grad_norm": 0.048817270901485875, "learning_rate": 9.732645079737156e-06, "loss": 0.0021, "step": 59010 }, { "epoch": 0.38828181022742975, "grad_norm": 0.08932428690430168, "learning_rate": 9.732459829607204e-06, "loss": 0.0017, "step": 59020 }, { "epoch": 0.3883475984026631, "grad_norm": 0.061993584577636145, "learning_rate": 9.732274517083857e-06, "loss": 0.0031, "step": 59030 }, { "epoch": 0.3884133865778965, "grad_norm": 0.03286554694405493, "learning_rate": 9.732089142169557e-06, "loss": 0.0017, "step": 59040 }, { "epoch": 0.3884791747531299, "grad_norm": 0.08597392537764059, "learning_rate": 9.731903704866748e-06, "loss": 0.003, "step": 59050 }, { "epoch": 0.38854496292836327, "grad_norm": 0.04255754628519837, "learning_rate": 9.731718205177876e-06, "loss": 0.0018, "step": 59060 }, { "epoch": 0.38861075110359666, "grad_norm": 0.18122670236915114, "learning_rate": 9.731532643105385e-06, "loss": 0.003, "step": 59070 }, { "epoch": 0.38867653927883, "grad_norm": 0.03882413665477849, "learning_rate": 9.731347018651725e-06, "loss": 0.0034, "step": 59080 }, { "epoch": 0.3887423274540634, "grad_norm": 0.03751083234527228, "learning_rate": 9.731161331819339e-06, "loss": 0.0029, "step": 59090 }, { "epoch": 0.3888081156292968, "grad_norm": 0.03839862390423648, "learning_rate": 9.730975582610678e-06, "loss": 0.0026, "step": 59100 }, { "epoch": 0.3888739038045302, "grad_norm": 0.39836672836805687, "learning_rate": 9.730789771028188e-06, "loss": 0.0022, "step": 59110 }, { "epoch": 0.3889396919797636, "grad_norm": 0.26311281706443435, "learning_rate": 9.730603897074322e-06, "loss": 0.0052, "step": 59120 }, { "epoch": 0.3890054801549969, "grad_norm": 0.06566678280512904, "learning_rate": 9.730417960751527e-06, "loss": 0.0019, "step": 59130 }, { "epoch": 0.3890712683302303, "grad_norm": 0.18851701175570276, "learning_rate": 9.73023196206226e-06, "loss": 0.002, "step": 59140 }, { "epoch": 0.3891370565054637, "grad_norm": 0.04289102383545391, "learning_rate": 9.730045901008968e-06, "loss": 0.0032, "step": 59150 }, { "epoch": 0.3892028446806971, "grad_norm": 0.1428972246154119, "learning_rate": 9.729859777594107e-06, "loss": 0.0019, "step": 59160 }, { "epoch": 0.3892686328559305, "grad_norm": 0.11627738753615159, "learning_rate": 9.72967359182013e-06, "loss": 0.0025, "step": 59170 }, { "epoch": 0.38933442103116384, "grad_norm": 0.14001079089699214, "learning_rate": 9.72948734368949e-06, "loss": 0.0023, "step": 59180 }, { "epoch": 0.38940020920639723, "grad_norm": 0.01843199904628912, "learning_rate": 9.729301033204645e-06, "loss": 0.0021, "step": 59190 }, { "epoch": 0.3894659973816306, "grad_norm": 0.04367051585782413, "learning_rate": 9.729114660368051e-06, "loss": 0.0048, "step": 59200 }, { "epoch": 0.389531785556864, "grad_norm": 0.08212300295996838, "learning_rate": 9.728928225182163e-06, "loss": 0.0036, "step": 59210 }, { "epoch": 0.3895975737320974, "grad_norm": 0.09629836074356, "learning_rate": 9.728741727649441e-06, "loss": 0.0034, "step": 59220 }, { "epoch": 0.38966336190733075, "grad_norm": 0.04184879419193803, "learning_rate": 9.728555167772343e-06, "loss": 0.0023, "step": 59230 }, { "epoch": 0.38972915008256415, "grad_norm": 0.0795660176116195, "learning_rate": 9.72836854555333e-06, "loss": 0.0014, "step": 59240 }, { "epoch": 0.38979493825779754, "grad_norm": 0.052565929660700166, "learning_rate": 9.728181860994862e-06, "loss": 0.0034, "step": 59250 }, { "epoch": 0.38986072643303094, "grad_norm": 0.09920318134248868, "learning_rate": 9.727995114099397e-06, "loss": 0.0039, "step": 59260 }, { "epoch": 0.38992651460826433, "grad_norm": 0.07317908922361561, "learning_rate": 9.727808304869401e-06, "loss": 0.002, "step": 59270 }, { "epoch": 0.38999230278349767, "grad_norm": 0.05228749769164897, "learning_rate": 9.727621433307337e-06, "loss": 0.0026, "step": 59280 }, { "epoch": 0.39005809095873106, "grad_norm": 0.03159493163373976, "learning_rate": 9.727434499415666e-06, "loss": 0.0016, "step": 59290 }, { "epoch": 0.39012387913396446, "grad_norm": 0.03929031742885827, "learning_rate": 9.727247503196855e-06, "loss": 0.0018, "step": 59300 }, { "epoch": 0.39018966730919785, "grad_norm": 0.08116982310794212, "learning_rate": 9.727060444653369e-06, "loss": 0.0013, "step": 59310 }, { "epoch": 0.39025545548443125, "grad_norm": 0.11028777474871741, "learning_rate": 9.726873323787671e-06, "loss": 0.0027, "step": 59320 }, { "epoch": 0.3903212436596646, "grad_norm": 0.25873920907754216, "learning_rate": 9.726686140602232e-06, "loss": 0.0027, "step": 59330 }, { "epoch": 0.390387031834898, "grad_norm": 0.004748315705373184, "learning_rate": 9.72649889509952e-06, "loss": 0.0041, "step": 59340 }, { "epoch": 0.3904528200101314, "grad_norm": 0.14091785270356716, "learning_rate": 9.726311587282e-06, "loss": 0.003, "step": 59350 }, { "epoch": 0.39051860818536477, "grad_norm": 0.1715087974011642, "learning_rate": 9.726124217152145e-06, "loss": 0.0018, "step": 59360 }, { "epoch": 0.39058439636059816, "grad_norm": 0.06374946829690839, "learning_rate": 9.725936784712422e-06, "loss": 0.0018, "step": 59370 }, { "epoch": 0.39065018453583156, "grad_norm": 0.2341710726487357, "learning_rate": 9.725749289965305e-06, "loss": 0.0044, "step": 59380 }, { "epoch": 0.3907159727110649, "grad_norm": 0.12294536284107782, "learning_rate": 9.725561732913266e-06, "loss": 0.0028, "step": 59390 }, { "epoch": 0.3907817608862983, "grad_norm": 0.08068846217701257, "learning_rate": 9.725374113558777e-06, "loss": 0.0013, "step": 59400 }, { "epoch": 0.3908475490615317, "grad_norm": 0.026545150443519523, "learning_rate": 9.72518643190431e-06, "loss": 0.0012, "step": 59410 }, { "epoch": 0.3909133372367651, "grad_norm": 0.12170938421622159, "learning_rate": 9.72499868795234e-06, "loss": 0.002, "step": 59420 }, { "epoch": 0.3909791254119985, "grad_norm": 0.05537990151187751, "learning_rate": 9.724810881705346e-06, "loss": 0.0033, "step": 59430 }, { "epoch": 0.3910449135872318, "grad_norm": 0.1715587034888436, "learning_rate": 9.724623013165799e-06, "loss": 0.0033, "step": 59440 }, { "epoch": 0.3911107017624652, "grad_norm": 0.12024894750430457, "learning_rate": 9.724435082336178e-06, "loss": 0.0022, "step": 59450 }, { "epoch": 0.3911764899376986, "grad_norm": 0.05848245482017721, "learning_rate": 9.724247089218964e-06, "loss": 0.0025, "step": 59460 }, { "epoch": 0.391242278112932, "grad_norm": 0.15849240163382214, "learning_rate": 9.724059033816628e-06, "loss": 0.003, "step": 59470 }, { "epoch": 0.3913080662881654, "grad_norm": 0.4017879474203577, "learning_rate": 9.723870916131655e-06, "loss": 0.0047, "step": 59480 }, { "epoch": 0.39137385446339873, "grad_norm": 0.04949338132229026, "learning_rate": 9.723682736166525e-06, "loss": 0.002, "step": 59490 }, { "epoch": 0.3914396426386321, "grad_norm": 0.07294063800235293, "learning_rate": 9.723494493923717e-06, "loss": 0.0019, "step": 59500 }, { "epoch": 0.3915054308138655, "grad_norm": 0.10201545327911976, "learning_rate": 9.723306189405712e-06, "loss": 0.0025, "step": 59510 }, { "epoch": 0.3915712189890989, "grad_norm": 0.16796835603834112, "learning_rate": 9.723117822614997e-06, "loss": 0.0012, "step": 59520 }, { "epoch": 0.3916370071643323, "grad_norm": 0.06510957575340999, "learning_rate": 9.722929393554051e-06, "loss": 0.0016, "step": 59530 }, { "epoch": 0.39170279533956565, "grad_norm": 0.08194848547245544, "learning_rate": 9.72274090222536e-06, "loss": 0.0027, "step": 59540 }, { "epoch": 0.39176858351479904, "grad_norm": 0.2425933242225362, "learning_rate": 9.722552348631408e-06, "loss": 0.0035, "step": 59550 }, { "epoch": 0.39183437169003243, "grad_norm": 0.015527793379791427, "learning_rate": 9.722363732774685e-06, "loss": 0.0015, "step": 59560 }, { "epoch": 0.39190015986526583, "grad_norm": 0.16507762496644782, "learning_rate": 9.722175054657672e-06, "loss": 0.0025, "step": 59570 }, { "epoch": 0.3919659480404992, "grad_norm": 0.14469122857378425, "learning_rate": 9.72198631428286e-06, "loss": 0.0026, "step": 59580 }, { "epoch": 0.39203173621573256, "grad_norm": 0.05497185117075783, "learning_rate": 9.721797511652738e-06, "loss": 0.0025, "step": 59590 }, { "epoch": 0.39209752439096596, "grad_norm": 0.07703976489175025, "learning_rate": 9.721608646769792e-06, "loss": 0.0028, "step": 59600 }, { "epoch": 0.39216331256619935, "grad_norm": 0.07137467832336135, "learning_rate": 9.721419719636514e-06, "loss": 0.0014, "step": 59610 }, { "epoch": 0.39222910074143275, "grad_norm": 0.08823448400352281, "learning_rate": 9.721230730255396e-06, "loss": 0.0022, "step": 59620 }, { "epoch": 0.39229488891666614, "grad_norm": 0.03549148330159865, "learning_rate": 9.721041678628926e-06, "loss": 0.0025, "step": 59630 }, { "epoch": 0.3923606770918995, "grad_norm": 0.16444970781555385, "learning_rate": 9.720852564759601e-06, "loss": 0.0034, "step": 59640 }, { "epoch": 0.3924264652671329, "grad_norm": 0.331352130237136, "learning_rate": 9.720663388649911e-06, "loss": 0.006, "step": 59650 }, { "epoch": 0.39249225344236627, "grad_norm": 0.10719384968064066, "learning_rate": 9.720474150302353e-06, "loss": 0.0024, "step": 59660 }, { "epoch": 0.39255804161759966, "grad_norm": 0.027554810990509073, "learning_rate": 9.720284849719418e-06, "loss": 0.0017, "step": 59670 }, { "epoch": 0.39262382979283306, "grad_norm": 0.12160333338559787, "learning_rate": 9.720095486903605e-06, "loss": 0.0019, "step": 59680 }, { "epoch": 0.3926896179680664, "grad_norm": 0.0847422217944451, "learning_rate": 9.71990606185741e-06, "loss": 0.0023, "step": 59690 }, { "epoch": 0.3927554061432998, "grad_norm": 0.12384356012626206, "learning_rate": 9.71971657458333e-06, "loss": 0.0029, "step": 59700 }, { "epoch": 0.3928211943185332, "grad_norm": 0.07959931657477218, "learning_rate": 9.719527025083862e-06, "loss": 0.0009, "step": 59710 }, { "epoch": 0.3928869824937666, "grad_norm": 0.04177676525874176, "learning_rate": 9.719337413361506e-06, "loss": 0.0025, "step": 59720 }, { "epoch": 0.392952770669, "grad_norm": 0.1633473010754156, "learning_rate": 9.719147739418762e-06, "loss": 0.0024, "step": 59730 }, { "epoch": 0.3930185588442333, "grad_norm": 0.11813169011157898, "learning_rate": 9.718958003258132e-06, "loss": 0.0021, "step": 59740 }, { "epoch": 0.3930843470194667, "grad_norm": 0.029428102765637627, "learning_rate": 9.718768204882115e-06, "loss": 0.003, "step": 59750 }, { "epoch": 0.3931501351947001, "grad_norm": 0.06526798304826702, "learning_rate": 9.718578344293216e-06, "loss": 0.0027, "step": 59760 }, { "epoch": 0.3932159233699335, "grad_norm": 0.07875319553867563, "learning_rate": 9.718388421493936e-06, "loss": 0.0025, "step": 59770 }, { "epoch": 0.3932817115451669, "grad_norm": 0.06665842893978213, "learning_rate": 9.71819843648678e-06, "loss": 0.0016, "step": 59780 }, { "epoch": 0.39334749972040023, "grad_norm": 0.28299292214436295, "learning_rate": 9.718008389274253e-06, "loss": 0.0012, "step": 59790 }, { "epoch": 0.3934132878956336, "grad_norm": 0.050420920102429864, "learning_rate": 9.717818279858859e-06, "loss": 0.0035, "step": 59800 }, { "epoch": 0.393479076070867, "grad_norm": 0.031931183962000356, "learning_rate": 9.717628108243108e-06, "loss": 0.0013, "step": 59810 }, { "epoch": 0.3935448642461004, "grad_norm": 0.13813003376994643, "learning_rate": 9.7174378744295e-06, "loss": 0.002, "step": 59820 }, { "epoch": 0.3936106524213338, "grad_norm": 0.0968253449852054, "learning_rate": 9.717247578420552e-06, "loss": 0.0026, "step": 59830 }, { "epoch": 0.39367644059656715, "grad_norm": 0.13113274540984088, "learning_rate": 9.717057220218767e-06, "loss": 0.0034, "step": 59840 }, { "epoch": 0.39374222877180054, "grad_norm": 0.5639452558390732, "learning_rate": 9.716866799826658e-06, "loss": 0.0019, "step": 59850 }, { "epoch": 0.39380801694703393, "grad_norm": 0.13436861850727944, "learning_rate": 9.716676317246733e-06, "loss": 0.0027, "step": 59860 }, { "epoch": 0.39387380512226733, "grad_norm": 0.061660839759311734, "learning_rate": 9.716485772481505e-06, "loss": 0.0026, "step": 59870 }, { "epoch": 0.3939395932975007, "grad_norm": 0.11359406365489493, "learning_rate": 9.716295165533484e-06, "loss": 0.0023, "step": 59880 }, { "epoch": 0.3940053814727341, "grad_norm": 0.3126816126612243, "learning_rate": 9.716104496405186e-06, "loss": 0.0018, "step": 59890 }, { "epoch": 0.39407116964796746, "grad_norm": 0.12685379663051669, "learning_rate": 9.715913765099122e-06, "loss": 0.0021, "step": 59900 }, { "epoch": 0.39413695782320085, "grad_norm": 0.14064408569723752, "learning_rate": 9.715722971617808e-06, "loss": 0.0044, "step": 59910 }, { "epoch": 0.39420274599843425, "grad_norm": 0.016777473581770078, "learning_rate": 9.71553211596376e-06, "loss": 0.0028, "step": 59920 }, { "epoch": 0.39426853417366764, "grad_norm": 0.2653813449689342, "learning_rate": 9.715341198139494e-06, "loss": 0.0045, "step": 59930 }, { "epoch": 0.39433432234890103, "grad_norm": 0.06997870606907756, "learning_rate": 9.715150218147526e-06, "loss": 0.0029, "step": 59940 }, { "epoch": 0.3944001105241344, "grad_norm": 0.20538589420878575, "learning_rate": 9.714959175990375e-06, "loss": 0.0135, "step": 59950 }, { "epoch": 0.39446589869936777, "grad_norm": 0.1399591105781602, "learning_rate": 9.71476807167056e-06, "loss": 0.0024, "step": 59960 }, { "epoch": 0.39453168687460116, "grad_norm": 0.011050198085913539, "learning_rate": 9.7145769051906e-06, "loss": 0.0011, "step": 59970 }, { "epoch": 0.39459747504983456, "grad_norm": 0.05823424908317193, "learning_rate": 9.714385676553015e-06, "loss": 0.0038, "step": 59980 }, { "epoch": 0.39466326322506795, "grad_norm": 0.2779173443034739, "learning_rate": 9.714194385760325e-06, "loss": 0.0017, "step": 59990 }, { "epoch": 0.3947290514003013, "grad_norm": 0.19015342588103454, "learning_rate": 9.714003032815055e-06, "loss": 0.0017, "step": 60000 }, { "epoch": 0.3947948395755347, "grad_norm": 0.11337503417261566, "learning_rate": 9.713811617719726e-06, "loss": 0.0021, "step": 60010 }, { "epoch": 0.3948606277507681, "grad_norm": 0.03649386158926155, "learning_rate": 9.713620140476861e-06, "loss": 0.0034, "step": 60020 }, { "epoch": 0.3949264159260015, "grad_norm": 0.02423259847543351, "learning_rate": 9.713428601088987e-06, "loss": 0.0014, "step": 60030 }, { "epoch": 0.39499220410123487, "grad_norm": 0.005127959527620505, "learning_rate": 9.713236999558626e-06, "loss": 0.002, "step": 60040 }, { "epoch": 0.3950579922764682, "grad_norm": 0.05740445290232333, "learning_rate": 9.713045335888308e-06, "loss": 0.0027, "step": 60050 }, { "epoch": 0.3951237804517016, "grad_norm": 0.119774627685531, "learning_rate": 9.712853610080555e-06, "loss": 0.0027, "step": 60060 }, { "epoch": 0.395189568626935, "grad_norm": 0.06610584775053199, "learning_rate": 9.7126618221379e-06, "loss": 0.0011, "step": 60070 }, { "epoch": 0.3952553568021684, "grad_norm": 0.08808981942698495, "learning_rate": 9.712469972062868e-06, "loss": 0.0018, "step": 60080 }, { "epoch": 0.3953211449774018, "grad_norm": 0.06010564644339203, "learning_rate": 9.712278059857989e-06, "loss": 0.0027, "step": 60090 }, { "epoch": 0.3953869331526351, "grad_norm": 0.1312051025864794, "learning_rate": 9.712086085525794e-06, "loss": 0.0018, "step": 60100 }, { "epoch": 0.3954527213278685, "grad_norm": 0.122435653377361, "learning_rate": 9.711894049068813e-06, "loss": 0.0021, "step": 60110 }, { "epoch": 0.3955185095031019, "grad_norm": 0.03322081368520661, "learning_rate": 9.711701950489578e-06, "loss": 0.0036, "step": 60120 }, { "epoch": 0.3955842976783353, "grad_norm": 0.20499089005352458, "learning_rate": 9.711509789790623e-06, "loss": 0.0034, "step": 60130 }, { "epoch": 0.3956500858535687, "grad_norm": 0.01799841465804664, "learning_rate": 9.711317566974481e-06, "loss": 0.0013, "step": 60140 }, { "epoch": 0.39571587402880204, "grad_norm": 0.0685435799672151, "learning_rate": 9.711125282043683e-06, "loss": 0.0027, "step": 60150 }, { "epoch": 0.39578166220403543, "grad_norm": 0.264112277776439, "learning_rate": 9.710932935000771e-06, "loss": 0.002, "step": 60160 }, { "epoch": 0.39584745037926883, "grad_norm": 0.07462800557195597, "learning_rate": 9.710740525848275e-06, "loss": 0.0032, "step": 60170 }, { "epoch": 0.3959132385545022, "grad_norm": 0.07054939873894725, "learning_rate": 9.710548054588733e-06, "loss": 0.0016, "step": 60180 }, { "epoch": 0.3959790267297356, "grad_norm": 0.0010417625889441074, "learning_rate": 9.710355521224683e-06, "loss": 0.003, "step": 60190 }, { "epoch": 0.39604481490496896, "grad_norm": 0.09749757642458, "learning_rate": 9.710162925758666e-06, "loss": 0.0043, "step": 60200 }, { "epoch": 0.39611060308020235, "grad_norm": 0.39455791336851215, "learning_rate": 9.709970268193216e-06, "loss": 0.0024, "step": 60210 }, { "epoch": 0.39617639125543574, "grad_norm": 0.006359398541603156, "learning_rate": 9.709777548530878e-06, "loss": 0.0024, "step": 60220 }, { "epoch": 0.39624217943066914, "grad_norm": 0.2747259695732659, "learning_rate": 9.709584766774189e-06, "loss": 0.0028, "step": 60230 }, { "epoch": 0.39630796760590253, "grad_norm": 0.11033405617336313, "learning_rate": 9.709391922925693e-06, "loss": 0.0025, "step": 60240 }, { "epoch": 0.3963737557811359, "grad_norm": 0.06479289817455273, "learning_rate": 9.709199016987933e-06, "loss": 0.0021, "step": 60250 }, { "epoch": 0.39643954395636927, "grad_norm": 0.6111180271425718, "learning_rate": 9.70900604896345e-06, "loss": 0.0024, "step": 60260 }, { "epoch": 0.39650533213160266, "grad_norm": 0.0648956493099615, "learning_rate": 9.708813018854787e-06, "loss": 0.002, "step": 60270 }, { "epoch": 0.39657112030683606, "grad_norm": 0.10220409082100601, "learning_rate": 9.708619926664494e-06, "loss": 0.0025, "step": 60280 }, { "epoch": 0.39663690848206945, "grad_norm": 0.32033436119530573, "learning_rate": 9.708426772395114e-06, "loss": 0.0034, "step": 60290 }, { "epoch": 0.3967026966573028, "grad_norm": 0.639587777768593, "learning_rate": 9.708233556049191e-06, "loss": 0.0018, "step": 60300 }, { "epoch": 0.3967684848325362, "grad_norm": 0.011527943168392096, "learning_rate": 9.708040277629279e-06, "loss": 0.0024, "step": 60310 }, { "epoch": 0.3968342730077696, "grad_norm": 0.04324087017692721, "learning_rate": 9.707846937137918e-06, "loss": 0.002, "step": 60320 }, { "epoch": 0.396900061183003, "grad_norm": 0.054975692342459914, "learning_rate": 9.707653534577663e-06, "loss": 0.0014, "step": 60330 }, { "epoch": 0.39696584935823637, "grad_norm": 0.11869841219943876, "learning_rate": 9.707460069951061e-06, "loss": 0.0032, "step": 60340 }, { "epoch": 0.39703163753346976, "grad_norm": 0.04354966860628715, "learning_rate": 9.707266543260666e-06, "loss": 0.0025, "step": 60350 }, { "epoch": 0.3970974257087031, "grad_norm": 0.05243048144875416, "learning_rate": 9.707072954509023e-06, "loss": 0.0015, "step": 60360 }, { "epoch": 0.3971632138839365, "grad_norm": 0.35572568210557065, "learning_rate": 9.70687930369869e-06, "loss": 0.0029, "step": 60370 }, { "epoch": 0.3972290020591699, "grad_norm": 0.011204005172804687, "learning_rate": 9.70668559083222e-06, "loss": 0.0022, "step": 60380 }, { "epoch": 0.3972947902344033, "grad_norm": 0.17041011315475027, "learning_rate": 9.706491815912165e-06, "loss": 0.0027, "step": 60390 }, { "epoch": 0.3973605784096367, "grad_norm": 0.058043585317104636, "learning_rate": 9.706297978941079e-06, "loss": 0.0013, "step": 60400 }, { "epoch": 0.39742636658487, "grad_norm": 0.11251439691348845, "learning_rate": 9.70610407992152e-06, "loss": 0.0015, "step": 60410 }, { "epoch": 0.3974921547601034, "grad_norm": 0.3005915878713511, "learning_rate": 9.705910118856043e-06, "loss": 0.0038, "step": 60420 }, { "epoch": 0.3975579429353368, "grad_norm": 0.04124235106600235, "learning_rate": 9.705716095747205e-06, "loss": 0.0009, "step": 60430 }, { "epoch": 0.3976237311105702, "grad_norm": 0.1793601843076536, "learning_rate": 9.705522010597565e-06, "loss": 0.0023, "step": 60440 }, { "epoch": 0.3976895192858036, "grad_norm": 0.05711964176366064, "learning_rate": 9.705327863409681e-06, "loss": 0.0012, "step": 60450 }, { "epoch": 0.39775530746103693, "grad_norm": 0.009792032379647701, "learning_rate": 9.705133654186112e-06, "loss": 0.0021, "step": 60460 }, { "epoch": 0.3978210956362703, "grad_norm": 0.09835789842210912, "learning_rate": 9.70493938292942e-06, "loss": 0.0039, "step": 60470 }, { "epoch": 0.3978868838115037, "grad_norm": 0.03419231210283659, "learning_rate": 9.704745049642165e-06, "loss": 0.0019, "step": 60480 }, { "epoch": 0.3979526719867371, "grad_norm": 0.10835326353880238, "learning_rate": 9.704550654326913e-06, "loss": 0.0031, "step": 60490 }, { "epoch": 0.3980184601619705, "grad_norm": 0.1398520063456098, "learning_rate": 9.70435619698622e-06, "loss": 0.003, "step": 60500 }, { "epoch": 0.39808424833720385, "grad_norm": 0.04208837053998916, "learning_rate": 9.704161677622654e-06, "loss": 0.003, "step": 60510 }, { "epoch": 0.39815003651243724, "grad_norm": 0.0541282369696137, "learning_rate": 9.70396709623878e-06, "loss": 0.0009, "step": 60520 }, { "epoch": 0.39821582468767064, "grad_norm": 0.03790979851595527, "learning_rate": 9.703772452837163e-06, "loss": 0.0028, "step": 60530 }, { "epoch": 0.39828161286290403, "grad_norm": 0.1453308698183942, "learning_rate": 9.703577747420367e-06, "loss": 0.002, "step": 60540 }, { "epoch": 0.3983474010381374, "grad_norm": 0.10509231169195296, "learning_rate": 9.703382979990964e-06, "loss": 0.0021, "step": 60550 }, { "epoch": 0.39841318921337077, "grad_norm": 0.11785874630694829, "learning_rate": 9.703188150551515e-06, "loss": 0.0018, "step": 60560 }, { "epoch": 0.39847897738860416, "grad_norm": 0.0325267868966857, "learning_rate": 9.702993259104593e-06, "loss": 0.0031, "step": 60570 }, { "epoch": 0.39854476556383756, "grad_norm": 0.1152407056141346, "learning_rate": 9.702798305652767e-06, "loss": 0.0027, "step": 60580 }, { "epoch": 0.39861055373907095, "grad_norm": 0.04979408894149604, "learning_rate": 9.702603290198606e-06, "loss": 0.0026, "step": 60590 }, { "epoch": 0.39867634191430434, "grad_norm": 0.0980994578931813, "learning_rate": 9.702408212744684e-06, "loss": 0.003, "step": 60600 }, { "epoch": 0.3987421300895377, "grad_norm": 0.2322249467530351, "learning_rate": 9.70221307329357e-06, "loss": 0.0011, "step": 60610 }, { "epoch": 0.3988079182647711, "grad_norm": 0.08574896065579629, "learning_rate": 9.702017871847837e-06, "loss": 0.0041, "step": 60620 }, { "epoch": 0.39887370644000447, "grad_norm": 0.008727179945483478, "learning_rate": 9.70182260841006e-06, "loss": 0.0026, "step": 60630 }, { "epoch": 0.39893949461523787, "grad_norm": 0.1842443245586913, "learning_rate": 9.701627282982812e-06, "loss": 0.0016, "step": 60640 }, { "epoch": 0.39900528279047126, "grad_norm": 0.04067795180369295, "learning_rate": 9.701431895568668e-06, "loss": 0.0017, "step": 60650 }, { "epoch": 0.3990710709657046, "grad_norm": 0.04740951695233215, "learning_rate": 9.701236446170209e-06, "loss": 0.0038, "step": 60660 }, { "epoch": 0.399136859140938, "grad_norm": 0.021946157918491326, "learning_rate": 9.701040934790004e-06, "loss": 0.0018, "step": 60670 }, { "epoch": 0.3992026473161714, "grad_norm": 0.039203986795713304, "learning_rate": 9.700845361430636e-06, "loss": 0.0027, "step": 60680 }, { "epoch": 0.3992684354914048, "grad_norm": 0.29942085006391816, "learning_rate": 9.700649726094681e-06, "loss": 0.0036, "step": 60690 }, { "epoch": 0.3993342236666382, "grad_norm": 0.14804790788854594, "learning_rate": 9.70045402878472e-06, "loss": 0.0028, "step": 60700 }, { "epoch": 0.3994000118418715, "grad_norm": 0.22098894592289564, "learning_rate": 9.70025826950333e-06, "loss": 0.0028, "step": 60710 }, { "epoch": 0.3994658000171049, "grad_norm": 0.006893821910023809, "learning_rate": 9.700062448253097e-06, "loss": 0.0016, "step": 60720 }, { "epoch": 0.3995315881923383, "grad_norm": 0.05204958766319102, "learning_rate": 9.699866565036598e-06, "loss": 0.0018, "step": 60730 }, { "epoch": 0.3995973763675717, "grad_norm": 0.033942804815149094, "learning_rate": 9.699670619856418e-06, "loss": 0.0022, "step": 60740 }, { "epoch": 0.3996631645428051, "grad_norm": 0.07805069008750519, "learning_rate": 9.699474612715141e-06, "loss": 0.0028, "step": 60750 }, { "epoch": 0.39972895271803843, "grad_norm": 0.01403475063792247, "learning_rate": 9.699278543615348e-06, "loss": 0.002, "step": 60760 }, { "epoch": 0.3997947408932718, "grad_norm": 0.059758778966458946, "learning_rate": 9.699082412559626e-06, "loss": 0.0019, "step": 60770 }, { "epoch": 0.3998605290685052, "grad_norm": 0.06876871348847297, "learning_rate": 9.698886219550562e-06, "loss": 0.0028, "step": 60780 }, { "epoch": 0.3999263172437386, "grad_norm": 0.07613248144376115, "learning_rate": 9.69868996459074e-06, "loss": 0.0021, "step": 60790 }, { "epoch": 0.399992105418972, "grad_norm": 0.08848536624927146, "learning_rate": 9.69849364768275e-06, "loss": 0.0025, "step": 60800 }, { "epoch": 0.4000578935942054, "grad_norm": 0.024926029736740185, "learning_rate": 9.69829726882918e-06, "loss": 0.003, "step": 60810 }, { "epoch": 0.40012368176943874, "grad_norm": 0.15270709404800425, "learning_rate": 9.698100828032615e-06, "loss": 0.0027, "step": 60820 }, { "epoch": 0.40018946994467214, "grad_norm": 0.016483828033170236, "learning_rate": 9.697904325295651e-06, "loss": 0.0016, "step": 60830 }, { "epoch": 0.40025525811990553, "grad_norm": 0.024209957973583645, "learning_rate": 9.697707760620877e-06, "loss": 0.0016, "step": 60840 }, { "epoch": 0.4003210462951389, "grad_norm": 0.22903644693806033, "learning_rate": 9.69751113401088e-06, "loss": 0.002, "step": 60850 }, { "epoch": 0.4003868344703723, "grad_norm": 0.08751556655148894, "learning_rate": 9.697314445468259e-06, "loss": 0.0029, "step": 60860 }, { "epoch": 0.40045262264560566, "grad_norm": 0.24855753718776535, "learning_rate": 9.6971176949956e-06, "loss": 0.0034, "step": 60870 }, { "epoch": 0.40051841082083905, "grad_norm": 0.10033125205027384, "learning_rate": 9.696920882595504e-06, "loss": 0.002, "step": 60880 }, { "epoch": 0.40058419899607245, "grad_norm": 0.06512977331398083, "learning_rate": 9.696724008270562e-06, "loss": 0.0034, "step": 60890 }, { "epoch": 0.40064998717130584, "grad_norm": 0.014680392088374324, "learning_rate": 9.696527072023369e-06, "loss": 0.0019, "step": 60900 }, { "epoch": 0.40071577534653924, "grad_norm": 0.19165430277581588, "learning_rate": 9.696330073856524e-06, "loss": 0.003, "step": 60910 }, { "epoch": 0.4007815635217726, "grad_norm": 0.5917284082949034, "learning_rate": 9.696133013772622e-06, "loss": 0.0036, "step": 60920 }, { "epoch": 0.40084735169700597, "grad_norm": 0.06411465383566127, "learning_rate": 9.695935891774263e-06, "loss": 0.0039, "step": 60930 }, { "epoch": 0.40091313987223937, "grad_norm": 0.055711240539353356, "learning_rate": 9.695738707864045e-06, "loss": 0.0013, "step": 60940 }, { "epoch": 0.40097892804747276, "grad_norm": 0.43434995255131226, "learning_rate": 9.695541462044566e-06, "loss": 0.0034, "step": 60950 }, { "epoch": 0.40104471622270615, "grad_norm": 0.03939016294217984, "learning_rate": 9.695344154318431e-06, "loss": 0.0023, "step": 60960 }, { "epoch": 0.4011105043979395, "grad_norm": 0.07793224366055854, "learning_rate": 9.695146784688236e-06, "loss": 0.0013, "step": 60970 }, { "epoch": 0.4011762925731729, "grad_norm": 0.26810590132410755, "learning_rate": 9.694949353156587e-06, "loss": 0.0025, "step": 60980 }, { "epoch": 0.4012420807484063, "grad_norm": 0.02894648923909, "learning_rate": 9.694751859726087e-06, "loss": 0.0015, "step": 60990 }, { "epoch": 0.4013078689236397, "grad_norm": 0.2982463802507638, "learning_rate": 9.694554304399334e-06, "loss": 0.0026, "step": 61000 }, { "epoch": 0.40137365709887307, "grad_norm": 0.09156778952212168, "learning_rate": 9.69435668717894e-06, "loss": 0.0015, "step": 61010 }, { "epoch": 0.4014394452741064, "grad_norm": 0.02921874997399314, "learning_rate": 9.694159008067507e-06, "loss": 0.0018, "step": 61020 }, { "epoch": 0.4015052334493398, "grad_norm": 0.09977707387792092, "learning_rate": 9.693961267067642e-06, "loss": 0.0011, "step": 61030 }, { "epoch": 0.4015710216245732, "grad_norm": 0.11367939376486412, "learning_rate": 9.693763464181952e-06, "loss": 0.0017, "step": 61040 }, { "epoch": 0.4016368097998066, "grad_norm": 0.018979291147241745, "learning_rate": 9.693565599413044e-06, "loss": 0.0025, "step": 61050 }, { "epoch": 0.40170259797504, "grad_norm": 0.01264954583961362, "learning_rate": 9.693367672763527e-06, "loss": 0.0019, "step": 61060 }, { "epoch": 0.4017683861502733, "grad_norm": 0.024967657714563676, "learning_rate": 9.69316968423601e-06, "loss": 0.0015, "step": 61070 }, { "epoch": 0.4018341743255067, "grad_norm": 0.20202119824792836, "learning_rate": 9.692971633833105e-06, "loss": 0.0024, "step": 61080 }, { "epoch": 0.4018999625007401, "grad_norm": 0.060033533811453425, "learning_rate": 9.692773521557421e-06, "loss": 0.0016, "step": 61090 }, { "epoch": 0.4019657506759735, "grad_norm": 0.09300897625382042, "learning_rate": 9.692575347411572e-06, "loss": 0.0021, "step": 61100 }, { "epoch": 0.4020315388512069, "grad_norm": 0.033904310909383846, "learning_rate": 9.692377111398171e-06, "loss": 0.0017, "step": 61110 }, { "epoch": 0.40209732702644024, "grad_norm": 0.18010291467824502, "learning_rate": 9.692178813519828e-06, "loss": 0.0041, "step": 61120 }, { "epoch": 0.40216311520167364, "grad_norm": 0.04796075145831409, "learning_rate": 9.691980453779162e-06, "loss": 0.0012, "step": 61130 }, { "epoch": 0.40222890337690703, "grad_norm": 0.09889248091779983, "learning_rate": 9.691782032178785e-06, "loss": 0.0025, "step": 61140 }, { "epoch": 0.4022946915521404, "grad_norm": 0.10468835522681412, "learning_rate": 9.691583548721313e-06, "loss": 0.0016, "step": 61150 }, { "epoch": 0.4023604797273738, "grad_norm": 0.03544361497560892, "learning_rate": 9.691385003409364e-06, "loss": 0.0012, "step": 61160 }, { "epoch": 0.40242626790260716, "grad_norm": 0.10788159750654472, "learning_rate": 9.691186396245556e-06, "loss": 0.0026, "step": 61170 }, { "epoch": 0.40249205607784055, "grad_norm": 0.13577770191707741, "learning_rate": 9.690987727232509e-06, "loss": 0.002, "step": 61180 }, { "epoch": 0.40255784425307395, "grad_norm": 0.0628220639405478, "learning_rate": 9.690788996372838e-06, "loss": 0.0019, "step": 61190 }, { "epoch": 0.40262363242830734, "grad_norm": 0.12163510306014316, "learning_rate": 9.690590203669166e-06, "loss": 0.0021, "step": 61200 }, { "epoch": 0.40268942060354074, "grad_norm": 0.08786769251116043, "learning_rate": 9.690391349124111e-06, "loss": 0.0016, "step": 61210 }, { "epoch": 0.4027552087787741, "grad_norm": 0.07094206107595923, "learning_rate": 9.690192432740299e-06, "loss": 0.0016, "step": 61220 }, { "epoch": 0.40282099695400747, "grad_norm": 0.1943545375135208, "learning_rate": 9.68999345452035e-06, "loss": 0.0026, "step": 61230 }, { "epoch": 0.40288678512924087, "grad_norm": 0.04316399298792144, "learning_rate": 9.689794414466889e-06, "loss": 0.0033, "step": 61240 }, { "epoch": 0.40295257330447426, "grad_norm": 0.056970106563925774, "learning_rate": 9.689595312582538e-06, "loss": 0.0032, "step": 61250 }, { "epoch": 0.40301836147970765, "grad_norm": 0.13486012585752863, "learning_rate": 9.689396148869923e-06, "loss": 0.0023, "step": 61260 }, { "epoch": 0.40308414965494105, "grad_norm": 0.03903964126007481, "learning_rate": 9.68919692333167e-06, "loss": 0.0016, "step": 61270 }, { "epoch": 0.4031499378301744, "grad_norm": 0.04301961358930253, "learning_rate": 9.688997635970406e-06, "loss": 0.0019, "step": 61280 }, { "epoch": 0.4032157260054078, "grad_norm": 0.008400868291503319, "learning_rate": 9.688798286788756e-06, "loss": 0.0021, "step": 61290 }, { "epoch": 0.4032815141806412, "grad_norm": 0.024783261113196835, "learning_rate": 9.688598875789352e-06, "loss": 0.0024, "step": 61300 }, { "epoch": 0.40334730235587457, "grad_norm": 0.07179034719588326, "learning_rate": 9.68839940297482e-06, "loss": 0.0016, "step": 61310 }, { "epoch": 0.40341309053110797, "grad_norm": 0.028967546806820694, "learning_rate": 9.688199868347792e-06, "loss": 0.0013, "step": 61320 }, { "epoch": 0.4034788787063413, "grad_norm": 0.11021247843911572, "learning_rate": 9.688000271910898e-06, "loss": 0.0016, "step": 61330 }, { "epoch": 0.4035446668815747, "grad_norm": 0.1069609325670732, "learning_rate": 9.687800613666767e-06, "loss": 0.0021, "step": 61340 }, { "epoch": 0.4036104550568081, "grad_norm": 0.2267493486992102, "learning_rate": 9.687600893618035e-06, "loss": 0.0018, "step": 61350 }, { "epoch": 0.4036762432320415, "grad_norm": 0.03491464840051276, "learning_rate": 9.687401111767333e-06, "loss": 0.0013, "step": 61360 }, { "epoch": 0.4037420314072749, "grad_norm": 0.039297696978102314, "learning_rate": 9.687201268117297e-06, "loss": 0.0028, "step": 61370 }, { "epoch": 0.4038078195825082, "grad_norm": 0.128817847340894, "learning_rate": 9.68700136267056e-06, "loss": 0.0019, "step": 61380 }, { "epoch": 0.4038736077577416, "grad_norm": 0.05753893931925207, "learning_rate": 9.686801395429759e-06, "loss": 0.004, "step": 61390 }, { "epoch": 0.403939395932975, "grad_norm": 0.05148764180129196, "learning_rate": 9.686601366397528e-06, "loss": 0.0016, "step": 61400 }, { "epoch": 0.4040051841082084, "grad_norm": 0.024231628185174287, "learning_rate": 9.686401275576505e-06, "loss": 0.0017, "step": 61410 }, { "epoch": 0.4040709722834418, "grad_norm": 0.13856370715808194, "learning_rate": 9.68620112296933e-06, "loss": 0.0027, "step": 61420 }, { "epoch": 0.40413676045867514, "grad_norm": 0.09443689232997886, "learning_rate": 9.686000908578639e-06, "loss": 0.0015, "step": 61430 }, { "epoch": 0.40420254863390853, "grad_norm": 0.24764267144335073, "learning_rate": 9.685800632407075e-06, "loss": 0.0037, "step": 61440 }, { "epoch": 0.4042683368091419, "grad_norm": 0.1947786689496539, "learning_rate": 9.685600294457276e-06, "loss": 0.0037, "step": 61450 }, { "epoch": 0.4043341249843753, "grad_norm": 0.14964666050840827, "learning_rate": 9.685399894731883e-06, "loss": 0.0024, "step": 61460 }, { "epoch": 0.4043999131596087, "grad_norm": 0.13204306501465626, "learning_rate": 9.685199433233538e-06, "loss": 0.0028, "step": 61470 }, { "epoch": 0.40446570133484205, "grad_norm": 0.09342910667375573, "learning_rate": 9.684998909964887e-06, "loss": 0.0013, "step": 61480 }, { "epoch": 0.40453148951007545, "grad_norm": 0.04981202701310778, "learning_rate": 9.684798324928571e-06, "loss": 0.0022, "step": 61490 }, { "epoch": 0.40459727768530884, "grad_norm": 0.12791328849902514, "learning_rate": 9.684597678127234e-06, "loss": 0.0022, "step": 61500 }, { "epoch": 0.40466306586054224, "grad_norm": 0.03928282487821865, "learning_rate": 9.684396969563525e-06, "loss": 0.0015, "step": 61510 }, { "epoch": 0.40472885403577563, "grad_norm": 0.03611466337960123, "learning_rate": 9.684196199240086e-06, "loss": 0.0027, "step": 61520 }, { "epoch": 0.40479464221100897, "grad_norm": 0.29373455204180837, "learning_rate": 9.683995367159566e-06, "loss": 0.0029, "step": 61530 }, { "epoch": 0.40486043038624236, "grad_norm": 0.08389732502060954, "learning_rate": 9.683794473324612e-06, "loss": 0.0033, "step": 61540 }, { "epoch": 0.40492621856147576, "grad_norm": 0.06429180267469176, "learning_rate": 9.683593517737873e-06, "loss": 0.0013, "step": 61550 }, { "epoch": 0.40499200673670915, "grad_norm": 0.12288901522207366, "learning_rate": 9.683392500401999e-06, "loss": 0.0022, "step": 61560 }, { "epoch": 0.40505779491194255, "grad_norm": 0.07173170569954372, "learning_rate": 9.68319142131964e-06, "loss": 0.0013, "step": 61570 }, { "epoch": 0.4051235830871759, "grad_norm": 0.04128794470568336, "learning_rate": 9.682990280493446e-06, "loss": 0.0026, "step": 61580 }, { "epoch": 0.4051893712624093, "grad_norm": 0.04578394219916399, "learning_rate": 9.682789077926069e-06, "loss": 0.002, "step": 61590 }, { "epoch": 0.4052551594376427, "grad_norm": 0.021375277675216783, "learning_rate": 9.682587813620163e-06, "loss": 0.0015, "step": 61600 }, { "epoch": 0.40532094761287607, "grad_norm": 0.07830978471680804, "learning_rate": 9.682386487578383e-06, "loss": 0.002, "step": 61610 }, { "epoch": 0.40538673578810946, "grad_norm": 0.024159626494096587, "learning_rate": 9.682185099803379e-06, "loss": 0.0019, "step": 61620 }, { "epoch": 0.4054525239633428, "grad_norm": 0.05907691693775466, "learning_rate": 9.681983650297809e-06, "loss": 0.0029, "step": 61630 }, { "epoch": 0.4055183121385762, "grad_norm": 0.07046384202983293, "learning_rate": 9.681782139064326e-06, "loss": 0.0029, "step": 61640 }, { "epoch": 0.4055841003138096, "grad_norm": 0.05534752806589088, "learning_rate": 9.681580566105591e-06, "loss": 0.0023, "step": 61650 }, { "epoch": 0.405649888489043, "grad_norm": 0.13925652072973296, "learning_rate": 9.68137893142426e-06, "loss": 0.0027, "step": 61660 }, { "epoch": 0.4057156766642764, "grad_norm": 0.07014654385487905, "learning_rate": 9.68117723502299e-06, "loss": 0.0021, "step": 61670 }, { "epoch": 0.4057814648395097, "grad_norm": 0.026326835803399114, "learning_rate": 9.680975476904441e-06, "loss": 0.0024, "step": 61680 }, { "epoch": 0.4058472530147431, "grad_norm": 0.10581429468447286, "learning_rate": 9.680773657071274e-06, "loss": 0.0015, "step": 61690 }, { "epoch": 0.4059130411899765, "grad_norm": 0.11617079190918758, "learning_rate": 9.680571775526147e-06, "loss": 0.0018, "step": 61700 }, { "epoch": 0.4059788293652099, "grad_norm": 0.033924360941605655, "learning_rate": 9.680369832271726e-06, "loss": 0.0027, "step": 61710 }, { "epoch": 0.4060446175404433, "grad_norm": 0.04180892691619961, "learning_rate": 9.680167827310668e-06, "loss": 0.001, "step": 61720 }, { "epoch": 0.40611040571567664, "grad_norm": 0.14983095566315416, "learning_rate": 9.679965760645642e-06, "loss": 0.0023, "step": 61730 }, { "epoch": 0.40617619389091003, "grad_norm": 0.08890351925568536, "learning_rate": 9.679763632279308e-06, "loss": 0.002, "step": 61740 }, { "epoch": 0.4062419820661434, "grad_norm": 0.10435840640830842, "learning_rate": 9.679561442214333e-06, "loss": 0.0017, "step": 61750 }, { "epoch": 0.4063077702413768, "grad_norm": 0.09378877688282697, "learning_rate": 9.679359190453382e-06, "loss": 0.0022, "step": 61760 }, { "epoch": 0.4063735584166102, "grad_norm": 0.039382714383277485, "learning_rate": 9.67915687699912e-06, "loss": 0.0033, "step": 61770 }, { "epoch": 0.4064393465918436, "grad_norm": 0.11311518358636966, "learning_rate": 9.678954501854217e-06, "loss": 0.0025, "step": 61780 }, { "epoch": 0.40650513476707695, "grad_norm": 0.03647367699681267, "learning_rate": 9.678752065021339e-06, "loss": 0.0009, "step": 61790 }, { "epoch": 0.40657092294231034, "grad_norm": 0.11580772326785826, "learning_rate": 9.678549566503156e-06, "loss": 0.0024, "step": 61800 }, { "epoch": 0.40663671111754374, "grad_norm": 0.038840777309324226, "learning_rate": 9.678347006302337e-06, "loss": 0.0022, "step": 61810 }, { "epoch": 0.40670249929277713, "grad_norm": 0.13745599794890417, "learning_rate": 9.678144384421555e-06, "loss": 0.0024, "step": 61820 }, { "epoch": 0.4067682874680105, "grad_norm": 0.2410555272816705, "learning_rate": 9.677941700863478e-06, "loss": 0.002, "step": 61830 }, { "epoch": 0.40683407564324386, "grad_norm": 0.0895319337573399, "learning_rate": 9.677738955630782e-06, "loss": 0.0018, "step": 61840 }, { "epoch": 0.40689986381847726, "grad_norm": 0.11342732094161898, "learning_rate": 9.677536148726136e-06, "loss": 0.0036, "step": 61850 }, { "epoch": 0.40696565199371065, "grad_norm": 0.0886419745065125, "learning_rate": 9.677333280152217e-06, "loss": 0.0017, "step": 61860 }, { "epoch": 0.40703144016894405, "grad_norm": 0.22223936878828976, "learning_rate": 9.677130349911697e-06, "loss": 0.0048, "step": 61870 }, { "epoch": 0.40709722834417744, "grad_norm": 0.022951701672289167, "learning_rate": 9.676927358007253e-06, "loss": 0.0022, "step": 61880 }, { "epoch": 0.4071630165194108, "grad_norm": 0.17133779148308811, "learning_rate": 9.67672430444156e-06, "loss": 0.0021, "step": 61890 }, { "epoch": 0.4072288046946442, "grad_norm": 0.21160914905107447, "learning_rate": 9.676521189217298e-06, "loss": 0.0028, "step": 61900 }, { "epoch": 0.40729459286987757, "grad_norm": 0.09812262690346321, "learning_rate": 9.676318012337142e-06, "loss": 0.0029, "step": 61910 }, { "epoch": 0.40736038104511096, "grad_norm": 0.08714312131936376, "learning_rate": 9.676114773803773e-06, "loss": 0.002, "step": 61920 }, { "epoch": 0.40742616922034436, "grad_norm": 0.1086090705895593, "learning_rate": 9.675911473619868e-06, "loss": 0.0022, "step": 61930 }, { "epoch": 0.4074919573955777, "grad_norm": 0.07605913139315225, "learning_rate": 9.67570811178811e-06, "loss": 0.0025, "step": 61940 }, { "epoch": 0.4075577455708111, "grad_norm": 0.09733403189589394, "learning_rate": 9.675504688311177e-06, "loss": 0.0011, "step": 61950 }, { "epoch": 0.4076235337460445, "grad_norm": 0.10332216490003067, "learning_rate": 9.675301203191754e-06, "loss": 0.0017, "step": 61960 }, { "epoch": 0.4076893219212779, "grad_norm": 0.042249805487290654, "learning_rate": 9.675097656432521e-06, "loss": 0.0018, "step": 61970 }, { "epoch": 0.4077551100965113, "grad_norm": 0.25259589933161314, "learning_rate": 9.674894048036165e-06, "loss": 0.0033, "step": 61980 }, { "epoch": 0.4078208982717446, "grad_norm": 0.09419929819194675, "learning_rate": 9.674690378005366e-06, "loss": 0.0028, "step": 61990 }, { "epoch": 0.407886686446978, "grad_norm": 0.06405895794973147, "learning_rate": 9.674486646342814e-06, "loss": 0.0019, "step": 62000 }, { "epoch": 0.4079524746222114, "grad_norm": 0.07984182209369957, "learning_rate": 9.674282853051192e-06, "loss": 0.0017, "step": 62010 }, { "epoch": 0.4080182627974448, "grad_norm": 0.0361212367599844, "learning_rate": 9.674078998133187e-06, "loss": 0.0028, "step": 62020 }, { "epoch": 0.4080840509726782, "grad_norm": 0.012766589866081618, "learning_rate": 9.673875081591488e-06, "loss": 0.0039, "step": 62030 }, { "epoch": 0.40814983914791153, "grad_norm": 0.01073590750591825, "learning_rate": 9.673671103428782e-06, "loss": 0.0015, "step": 62040 }, { "epoch": 0.4082156273231449, "grad_norm": 0.093744330831153, "learning_rate": 9.67346706364776e-06, "loss": 0.002, "step": 62050 }, { "epoch": 0.4082814154983783, "grad_norm": 0.05455178806213105, "learning_rate": 9.67326296225111e-06, "loss": 0.0032, "step": 62060 }, { "epoch": 0.4083472036736117, "grad_norm": 0.05964376605129662, "learning_rate": 9.673058799241523e-06, "loss": 0.0014, "step": 62070 }, { "epoch": 0.4084129918488451, "grad_norm": 0.04123604662168443, "learning_rate": 9.672854574621694e-06, "loss": 0.0014, "step": 62080 }, { "epoch": 0.40847878002407845, "grad_norm": 0.03905891952860521, "learning_rate": 9.672650288394311e-06, "loss": 0.0018, "step": 62090 }, { "epoch": 0.40854456819931184, "grad_norm": 0.054578556287111994, "learning_rate": 9.672445940562072e-06, "loss": 0.0007, "step": 62100 }, { "epoch": 0.40861035637454524, "grad_norm": 0.041700870357441674, "learning_rate": 9.672241531127667e-06, "loss": 0.0011, "step": 62110 }, { "epoch": 0.40867614454977863, "grad_norm": 0.05793731218084911, "learning_rate": 9.672037060093794e-06, "loss": 0.0021, "step": 62120 }, { "epoch": 0.408741932725012, "grad_norm": 0.02887178351763624, "learning_rate": 9.671832527463146e-06, "loss": 0.0023, "step": 62130 }, { "epoch": 0.40880772090024536, "grad_norm": 0.03854279362622037, "learning_rate": 9.67162793323842e-06, "loss": 0.0037, "step": 62140 }, { "epoch": 0.40887350907547876, "grad_norm": 0.04412736883739794, "learning_rate": 9.671423277422316e-06, "loss": 0.0021, "step": 62150 }, { "epoch": 0.40893929725071215, "grad_norm": 0.0384786863357091, "learning_rate": 9.67121856001753e-06, "loss": 0.0016, "step": 62160 }, { "epoch": 0.40900508542594555, "grad_norm": 0.11040330167136357, "learning_rate": 9.671013781026763e-06, "loss": 0.0019, "step": 62170 }, { "epoch": 0.40907087360117894, "grad_norm": 0.14170425116567723, "learning_rate": 9.670808940452712e-06, "loss": 0.0024, "step": 62180 }, { "epoch": 0.4091366617764123, "grad_norm": 0.09762357652937968, "learning_rate": 9.67060403829808e-06, "loss": 0.0023, "step": 62190 }, { "epoch": 0.4092024499516457, "grad_norm": 0.05027489877342767, "learning_rate": 9.670399074565566e-06, "loss": 0.002, "step": 62200 }, { "epoch": 0.40926823812687907, "grad_norm": 0.07379973516048577, "learning_rate": 9.670194049257874e-06, "loss": 0.0017, "step": 62210 }, { "epoch": 0.40933402630211246, "grad_norm": 0.20198463802071143, "learning_rate": 9.669988962377709e-06, "loss": 0.0023, "step": 62220 }, { "epoch": 0.40939981447734586, "grad_norm": 0.031591511327059375, "learning_rate": 9.669783813927771e-06, "loss": 0.0023, "step": 62230 }, { "epoch": 0.40946560265257925, "grad_norm": 0.011211851877711275, "learning_rate": 9.669578603910767e-06, "loss": 0.0015, "step": 62240 }, { "epoch": 0.4095313908278126, "grad_norm": 0.08155722567279305, "learning_rate": 9.669373332329401e-06, "loss": 0.0022, "step": 62250 }, { "epoch": 0.409597179003046, "grad_norm": 0.04259326857755096, "learning_rate": 9.669167999186382e-06, "loss": 0.0022, "step": 62260 }, { "epoch": 0.4096629671782794, "grad_norm": 0.010264273351730078, "learning_rate": 9.668962604484415e-06, "loss": 0.0017, "step": 62270 }, { "epoch": 0.4097287553535128, "grad_norm": 0.2977046803843716, "learning_rate": 9.668757148226209e-06, "loss": 0.0027, "step": 62280 }, { "epoch": 0.40979454352874617, "grad_norm": 0.0891035133240562, "learning_rate": 9.668551630414471e-06, "loss": 0.0029, "step": 62290 }, { "epoch": 0.4098603317039795, "grad_norm": 0.04222013683623349, "learning_rate": 9.668346051051912e-06, "loss": 0.0013, "step": 62300 }, { "epoch": 0.4099261198792129, "grad_norm": 0.10241257106889567, "learning_rate": 9.668140410141242e-06, "loss": 0.0031, "step": 62310 }, { "epoch": 0.4099919080544463, "grad_norm": 0.14476468003746423, "learning_rate": 9.667934707685174e-06, "loss": 0.0014, "step": 62320 }, { "epoch": 0.4100576962296797, "grad_norm": 0.05863295405021002, "learning_rate": 9.667728943686417e-06, "loss": 0.0017, "step": 62330 }, { "epoch": 0.4101234844049131, "grad_norm": 0.0011586851797094977, "learning_rate": 9.667523118147686e-06, "loss": 0.0013, "step": 62340 }, { "epoch": 0.4101892725801464, "grad_norm": 0.06343614415039042, "learning_rate": 9.667317231071692e-06, "loss": 0.0009, "step": 62350 }, { "epoch": 0.4102550607553798, "grad_norm": 0.07325659498258608, "learning_rate": 9.667111282461152e-06, "loss": 0.002, "step": 62360 }, { "epoch": 0.4103208489306132, "grad_norm": 0.16672187464343433, "learning_rate": 9.666905272318781e-06, "loss": 0.0022, "step": 62370 }, { "epoch": 0.4103866371058466, "grad_norm": 0.013918929037916994, "learning_rate": 9.666699200647296e-06, "loss": 0.003, "step": 62380 }, { "epoch": 0.41045242528108, "grad_norm": 0.0886414583529007, "learning_rate": 9.66649306744941e-06, "loss": 0.0013, "step": 62390 }, { "epoch": 0.41051821345631334, "grad_norm": 0.10473305907950498, "learning_rate": 9.666286872727845e-06, "loss": 0.0016, "step": 62400 }, { "epoch": 0.41058400163154674, "grad_norm": 0.014583447616158703, "learning_rate": 9.666080616485316e-06, "loss": 0.0023, "step": 62410 }, { "epoch": 0.41064978980678013, "grad_norm": 0.03813088546926762, "learning_rate": 9.665874298724545e-06, "loss": 0.0017, "step": 62420 }, { "epoch": 0.4107155779820135, "grad_norm": 0.08389079098986849, "learning_rate": 9.665667919448251e-06, "loss": 0.002, "step": 62430 }, { "epoch": 0.4107813661572469, "grad_norm": 0.049841474010012383, "learning_rate": 9.665461478659155e-06, "loss": 0.002, "step": 62440 }, { "epoch": 0.41084715433248026, "grad_norm": 0.06964667917711988, "learning_rate": 9.665254976359979e-06, "loss": 0.0028, "step": 62450 }, { "epoch": 0.41091294250771365, "grad_norm": 0.07477054231652015, "learning_rate": 9.665048412553444e-06, "loss": 0.0049, "step": 62460 }, { "epoch": 0.41097873068294705, "grad_norm": 0.11253903902310299, "learning_rate": 9.664841787242275e-06, "loss": 0.0023, "step": 62470 }, { "epoch": 0.41104451885818044, "grad_norm": 0.10533614520770751, "learning_rate": 9.664635100429198e-06, "loss": 0.0023, "step": 62480 }, { "epoch": 0.41111030703341384, "grad_norm": 0.04089937115367534, "learning_rate": 9.664428352116934e-06, "loss": 0.0021, "step": 62490 }, { "epoch": 0.4111760952086472, "grad_norm": 0.010112736911753874, "learning_rate": 9.66422154230821e-06, "loss": 0.0014, "step": 62500 }, { "epoch": 0.41124188338388057, "grad_norm": 0.10168941276234665, "learning_rate": 9.664014671005754e-06, "loss": 0.002, "step": 62510 }, { "epoch": 0.41130767155911396, "grad_norm": 0.07662321006836868, "learning_rate": 9.663807738212295e-06, "loss": 0.0017, "step": 62520 }, { "epoch": 0.41137345973434736, "grad_norm": 0.03905833662280514, "learning_rate": 9.663600743930555e-06, "loss": 0.0012, "step": 62530 }, { "epoch": 0.41143924790958075, "grad_norm": 0.05017778950477031, "learning_rate": 9.66339368816327e-06, "loss": 0.0025, "step": 62540 }, { "epoch": 0.4115050360848141, "grad_norm": 0.011601931225397153, "learning_rate": 9.663186570913165e-06, "loss": 0.0014, "step": 62550 }, { "epoch": 0.4115708242600475, "grad_norm": 0.09466092379983629, "learning_rate": 9.662979392182973e-06, "loss": 0.0022, "step": 62560 }, { "epoch": 0.4116366124352809, "grad_norm": 0.05408147140684596, "learning_rate": 9.662772151975426e-06, "loss": 0.0018, "step": 62570 }, { "epoch": 0.4117024006105143, "grad_norm": 0.2633732933754755, "learning_rate": 9.662564850293253e-06, "loss": 0.0029, "step": 62580 }, { "epoch": 0.41176818878574767, "grad_norm": 0.002806143982200611, "learning_rate": 9.662357487139192e-06, "loss": 0.0018, "step": 62590 }, { "epoch": 0.411833976960981, "grad_norm": 0.03796194567795519, "learning_rate": 9.662150062515972e-06, "loss": 0.002, "step": 62600 }, { "epoch": 0.4118997651362144, "grad_norm": 0.019349665754771095, "learning_rate": 9.66194257642633e-06, "loss": 0.0022, "step": 62610 }, { "epoch": 0.4119655533114478, "grad_norm": 0.10013215676618314, "learning_rate": 9.661735028873002e-06, "loss": 0.0027, "step": 62620 }, { "epoch": 0.4120313414866812, "grad_norm": 0.008028927494108079, "learning_rate": 9.661527419858724e-06, "loss": 0.0026, "step": 62630 }, { "epoch": 0.4120971296619146, "grad_norm": 0.07831089337873634, "learning_rate": 9.661319749386234e-06, "loss": 0.003, "step": 62640 }, { "epoch": 0.4121629178371479, "grad_norm": 0.05106765931714427, "learning_rate": 9.661112017458266e-06, "loss": 0.0028, "step": 62650 }, { "epoch": 0.4122287060123813, "grad_norm": 0.016969010481839564, "learning_rate": 9.660904224077564e-06, "loss": 0.0017, "step": 62660 }, { "epoch": 0.4122944941876147, "grad_norm": 0.00939272885177676, "learning_rate": 9.660696369246864e-06, "loss": 0.0025, "step": 62670 }, { "epoch": 0.4123602823628481, "grad_norm": 0.05859740167306576, "learning_rate": 9.660488452968908e-06, "loss": 0.0021, "step": 62680 }, { "epoch": 0.4124260705380815, "grad_norm": 0.05950845918852477, "learning_rate": 9.660280475246438e-06, "loss": 0.0017, "step": 62690 }, { "epoch": 0.4124918587133149, "grad_norm": 0.06893823520616507, "learning_rate": 9.660072436082193e-06, "loss": 0.0015, "step": 62700 }, { "epoch": 0.41255764688854824, "grad_norm": 0.24597839585225162, "learning_rate": 9.659864335478919e-06, "loss": 0.0032, "step": 62710 }, { "epoch": 0.41262343506378163, "grad_norm": 0.025170062124417, "learning_rate": 9.659656173439355e-06, "loss": 0.0015, "step": 62720 }, { "epoch": 0.412689223239015, "grad_norm": 0.21428087484785113, "learning_rate": 9.659447949966252e-06, "loss": 0.003, "step": 62730 }, { "epoch": 0.4127550114142484, "grad_norm": 0.20997174767653695, "learning_rate": 9.659239665062353e-06, "loss": 0.0018, "step": 62740 }, { "epoch": 0.4128207995894818, "grad_norm": 0.03850269996139066, "learning_rate": 9.6590313187304e-06, "loss": 0.0038, "step": 62750 }, { "epoch": 0.41288658776471515, "grad_norm": 0.07497804416173777, "learning_rate": 9.658822910973145e-06, "loss": 0.0027, "step": 62760 }, { "epoch": 0.41295237593994855, "grad_norm": 0.11027279565139557, "learning_rate": 9.658614441793333e-06, "loss": 0.0035, "step": 62770 }, { "epoch": 0.41301816411518194, "grad_norm": 0.015605430257186675, "learning_rate": 9.658405911193712e-06, "loss": 0.0017, "step": 62780 }, { "epoch": 0.41308395229041533, "grad_norm": 0.07537588001182813, "learning_rate": 9.658197319177033e-06, "loss": 0.0024, "step": 62790 }, { "epoch": 0.41314974046564873, "grad_norm": 0.01945651032523927, "learning_rate": 9.657988665746045e-06, "loss": 0.0016, "step": 62800 }, { "epoch": 0.41321552864088207, "grad_norm": 0.08812344747177812, "learning_rate": 9.6577799509035e-06, "loss": 0.0036, "step": 62810 }, { "epoch": 0.41328131681611546, "grad_norm": 0.07704816966802802, "learning_rate": 9.657571174652149e-06, "loss": 0.0018, "step": 62820 }, { "epoch": 0.41334710499134886, "grad_norm": 0.010775963379458268, "learning_rate": 9.657362336994744e-06, "loss": 0.0043, "step": 62830 }, { "epoch": 0.41341289316658225, "grad_norm": 0.10394429535296243, "learning_rate": 9.65715343793404e-06, "loss": 0.0023, "step": 62840 }, { "epoch": 0.41347868134181565, "grad_norm": 0.04143064814822288, "learning_rate": 9.65694447747279e-06, "loss": 0.0021, "step": 62850 }, { "epoch": 0.413544469517049, "grad_norm": 0.07281751459251341, "learning_rate": 9.656735455613748e-06, "loss": 0.0026, "step": 62860 }, { "epoch": 0.4136102576922824, "grad_norm": 0.09016044876245415, "learning_rate": 9.65652637235967e-06, "loss": 0.0019, "step": 62870 }, { "epoch": 0.4136760458675158, "grad_norm": 0.14835072223801127, "learning_rate": 9.656317227713316e-06, "loss": 0.0024, "step": 62880 }, { "epoch": 0.41374183404274917, "grad_norm": 0.0572146155645462, "learning_rate": 9.656108021677439e-06, "loss": 0.0019, "step": 62890 }, { "epoch": 0.41380762221798256, "grad_norm": 0.071355145714515, "learning_rate": 9.6558987542548e-06, "loss": 0.0023, "step": 62900 }, { "epoch": 0.4138734103932159, "grad_norm": 0.04973023225414765, "learning_rate": 9.655689425448156e-06, "loss": 0.0023, "step": 62910 }, { "epoch": 0.4139391985684493, "grad_norm": 0.11729913212542398, "learning_rate": 9.65548003526027e-06, "loss": 0.003, "step": 62920 }, { "epoch": 0.4140049867436827, "grad_norm": 0.031854251231554054, "learning_rate": 9.655270583693899e-06, "loss": 0.0013, "step": 62930 }, { "epoch": 0.4140707749189161, "grad_norm": 0.15670998412347048, "learning_rate": 9.655061070751805e-06, "loss": 0.0026, "step": 62940 }, { "epoch": 0.4141365630941495, "grad_norm": 0.07997182439243336, "learning_rate": 9.654851496436753e-06, "loss": 0.0032, "step": 62950 }, { "epoch": 0.4142023512693828, "grad_norm": 0.08721671915450946, "learning_rate": 9.654641860751503e-06, "loss": 0.0024, "step": 62960 }, { "epoch": 0.4142681394446162, "grad_norm": 0.0443484245012758, "learning_rate": 9.654432163698818e-06, "loss": 0.003, "step": 62970 }, { "epoch": 0.4143339276198496, "grad_norm": 0.16209143081840258, "learning_rate": 9.654222405281469e-06, "loss": 0.0019, "step": 62980 }, { "epoch": 0.414399715795083, "grad_norm": 0.02740785250965332, "learning_rate": 9.654012585502215e-06, "loss": 0.0016, "step": 62990 }, { "epoch": 0.4144655039703164, "grad_norm": 0.13149244661228665, "learning_rate": 9.653802704363825e-06, "loss": 0.006, "step": 63000 }, { "epoch": 0.41453129214554973, "grad_norm": 0.34990103070639594, "learning_rate": 9.653592761869066e-06, "loss": 0.0043, "step": 63010 }, { "epoch": 0.41459708032078313, "grad_norm": 0.06829075136003895, "learning_rate": 9.653382758020703e-06, "loss": 0.0016, "step": 63020 }, { "epoch": 0.4146628684960165, "grad_norm": 0.11539788177314352, "learning_rate": 9.653172692821512e-06, "loss": 0.0029, "step": 63030 }, { "epoch": 0.4147286566712499, "grad_norm": 0.15914319925661383, "learning_rate": 9.652962566274253e-06, "loss": 0.0021, "step": 63040 }, { "epoch": 0.4147944448464833, "grad_norm": 0.10961162391779411, "learning_rate": 9.652752378381705e-06, "loss": 0.0018, "step": 63050 }, { "epoch": 0.41486023302171665, "grad_norm": 0.018360190788838264, "learning_rate": 9.652542129146633e-06, "loss": 0.0014, "step": 63060 }, { "epoch": 0.41492602119695005, "grad_norm": 0.07329979467005858, "learning_rate": 9.652331818571812e-06, "loss": 0.002, "step": 63070 }, { "epoch": 0.41499180937218344, "grad_norm": 0.024265202367987425, "learning_rate": 9.652121446660012e-06, "loss": 0.0025, "step": 63080 }, { "epoch": 0.41505759754741683, "grad_norm": 0.04583641790302253, "learning_rate": 9.65191101341401e-06, "loss": 0.0015, "step": 63090 }, { "epoch": 0.41512338572265023, "grad_norm": 0.135327011815956, "learning_rate": 9.65170051883658e-06, "loss": 0.0024, "step": 63100 }, { "epoch": 0.41518917389788357, "grad_norm": 0.14235010915533933, "learning_rate": 9.651489962930496e-06, "loss": 0.0026, "step": 63110 }, { "epoch": 0.41525496207311696, "grad_norm": 0.03353725323060354, "learning_rate": 9.651279345698533e-06, "loss": 0.0019, "step": 63120 }, { "epoch": 0.41532075024835036, "grad_norm": 0.10051455580261892, "learning_rate": 9.65106866714347e-06, "loss": 0.0025, "step": 63130 }, { "epoch": 0.41538653842358375, "grad_norm": 0.316164579224113, "learning_rate": 9.650857927268082e-06, "loss": 0.0018, "step": 63140 }, { "epoch": 0.41545232659881715, "grad_norm": 0.05951801165388868, "learning_rate": 9.65064712607515e-06, "loss": 0.0021, "step": 63150 }, { "epoch": 0.41551811477405054, "grad_norm": 0.039498800290566266, "learning_rate": 9.650436263567453e-06, "loss": 0.0031, "step": 63160 }, { "epoch": 0.4155839029492839, "grad_norm": 0.2681673282928968, "learning_rate": 9.65022533974777e-06, "loss": 0.0034, "step": 63170 }, { "epoch": 0.4156496911245173, "grad_norm": 0.03148306297063813, "learning_rate": 9.650014354618879e-06, "loss": 0.0019, "step": 63180 }, { "epoch": 0.41571547929975067, "grad_norm": 0.10490152277230796, "learning_rate": 9.649803308183568e-06, "loss": 0.0041, "step": 63190 }, { "epoch": 0.41578126747498406, "grad_norm": 0.09244644016220427, "learning_rate": 9.649592200444616e-06, "loss": 0.0017, "step": 63200 }, { "epoch": 0.41584705565021746, "grad_norm": 0.03654372652964212, "learning_rate": 9.649381031404804e-06, "loss": 0.0021, "step": 63210 }, { "epoch": 0.4159128438254508, "grad_norm": 0.05384374358998058, "learning_rate": 9.649169801066921e-06, "loss": 0.0025, "step": 63220 }, { "epoch": 0.4159786320006842, "grad_norm": 0.11062159137294336, "learning_rate": 9.64895850943375e-06, "loss": 0.0018, "step": 63230 }, { "epoch": 0.4160444201759176, "grad_norm": 0.06853867447058347, "learning_rate": 9.648747156508075e-06, "loss": 0.0012, "step": 63240 }, { "epoch": 0.416110208351151, "grad_norm": 0.03722430533637018, "learning_rate": 9.648535742292683e-06, "loss": 0.0019, "step": 63250 }, { "epoch": 0.4161759965263844, "grad_norm": 0.07952996471234106, "learning_rate": 9.648324266790363e-06, "loss": 0.0017, "step": 63260 }, { "epoch": 0.4162417847016177, "grad_norm": 0.07905417112088894, "learning_rate": 9.648112730003902e-06, "loss": 0.0023, "step": 63270 }, { "epoch": 0.4163075728768511, "grad_norm": 0.016688324560656862, "learning_rate": 9.647901131936089e-06, "loss": 0.0008, "step": 63280 }, { "epoch": 0.4163733610520845, "grad_norm": 0.5963541947354618, "learning_rate": 9.647689472589714e-06, "loss": 0.0022, "step": 63290 }, { "epoch": 0.4164391492273179, "grad_norm": 0.09998538763342427, "learning_rate": 9.647477751967567e-06, "loss": 0.0016, "step": 63300 }, { "epoch": 0.4165049374025513, "grad_norm": 0.15191175242135357, "learning_rate": 9.647265970072439e-06, "loss": 0.0026, "step": 63310 }, { "epoch": 0.41657072557778463, "grad_norm": 0.10379303575824786, "learning_rate": 9.647054126907123e-06, "loss": 0.0021, "step": 63320 }, { "epoch": 0.416636513753018, "grad_norm": 0.1806094924310811, "learning_rate": 9.646842222474414e-06, "loss": 0.0026, "step": 63330 }, { "epoch": 0.4167023019282514, "grad_norm": 0.11374022537121242, "learning_rate": 9.646630256777102e-06, "loss": 0.002, "step": 63340 }, { "epoch": 0.4167680901034848, "grad_norm": 0.03953346356939194, "learning_rate": 9.646418229817983e-06, "loss": 0.002, "step": 63350 }, { "epoch": 0.4168338782787182, "grad_norm": 0.04284580663129251, "learning_rate": 9.64620614159985e-06, "loss": 0.0022, "step": 63360 }, { "epoch": 0.41689966645395155, "grad_norm": 0.07280757994639443, "learning_rate": 9.645993992125505e-06, "loss": 0.0053, "step": 63370 }, { "epoch": 0.41696545462918494, "grad_norm": 0.09348464001539289, "learning_rate": 9.645781781397741e-06, "loss": 0.0015, "step": 63380 }, { "epoch": 0.41703124280441833, "grad_norm": 0.0902651668366474, "learning_rate": 9.645569509419357e-06, "loss": 0.0015, "step": 63390 }, { "epoch": 0.41709703097965173, "grad_norm": 0.030243810825382536, "learning_rate": 9.645357176193151e-06, "loss": 0.0015, "step": 63400 }, { "epoch": 0.4171628191548851, "grad_norm": 0.013551039183184392, "learning_rate": 9.645144781721922e-06, "loss": 0.0028, "step": 63410 }, { "epoch": 0.41722860733011846, "grad_norm": 0.04991102039956996, "learning_rate": 9.64493232600847e-06, "loss": 0.0024, "step": 63420 }, { "epoch": 0.41729439550535186, "grad_norm": 0.13402868163792137, "learning_rate": 9.6447198090556e-06, "loss": 0.0024, "step": 63430 }, { "epoch": 0.41736018368058525, "grad_norm": 0.07362237450659209, "learning_rate": 9.644507230866109e-06, "loss": 0.0034, "step": 63440 }, { "epoch": 0.41742597185581864, "grad_norm": 0.025528455302809352, "learning_rate": 9.6442945914428e-06, "loss": 0.0022, "step": 63450 }, { "epoch": 0.41749176003105204, "grad_norm": 0.10346832770278383, "learning_rate": 9.64408189078848e-06, "loss": 0.0015, "step": 63460 }, { "epoch": 0.4175575482062854, "grad_norm": 0.10723079358147014, "learning_rate": 9.643869128905949e-06, "loss": 0.0028, "step": 63470 }, { "epoch": 0.4176233363815188, "grad_norm": 0.10023644757184204, "learning_rate": 9.643656305798017e-06, "loss": 0.0018, "step": 63480 }, { "epoch": 0.41768912455675217, "grad_norm": 0.2294778931578746, "learning_rate": 9.643443421467485e-06, "loss": 0.0038, "step": 63490 }, { "epoch": 0.41775491273198556, "grad_norm": 0.09380603067192761, "learning_rate": 9.643230475917162e-06, "loss": 0.0024, "step": 63500 }, { "epoch": 0.41782070090721896, "grad_norm": 0.1328078782226859, "learning_rate": 9.643017469149857e-06, "loss": 0.0017, "step": 63510 }, { "epoch": 0.4178864890824523, "grad_norm": 0.027733453698287523, "learning_rate": 9.642804401168374e-06, "loss": 0.0018, "step": 63520 }, { "epoch": 0.4179522772576857, "grad_norm": 0.23976199166077086, "learning_rate": 9.642591271975527e-06, "loss": 0.0031, "step": 63530 }, { "epoch": 0.4180180654329191, "grad_norm": 0.09957985537892657, "learning_rate": 9.642378081574124e-06, "loss": 0.0019, "step": 63540 }, { "epoch": 0.4180838536081525, "grad_norm": 0.08426793680585959, "learning_rate": 9.642164829966975e-06, "loss": 0.0036, "step": 63550 }, { "epoch": 0.4181496417833859, "grad_norm": 0.07486001542543927, "learning_rate": 9.64195151715689e-06, "loss": 0.0022, "step": 63560 }, { "epoch": 0.4182154299586192, "grad_norm": 0.2537656809070543, "learning_rate": 9.641738143146686e-06, "loss": 0.003, "step": 63570 }, { "epoch": 0.4182812181338526, "grad_norm": 0.07435319467235202, "learning_rate": 9.641524707939171e-06, "loss": 0.0017, "step": 63580 }, { "epoch": 0.418347006309086, "grad_norm": 0.05538889643518127, "learning_rate": 9.641311211537163e-06, "loss": 0.0017, "step": 63590 }, { "epoch": 0.4184127944843194, "grad_norm": 0.23835000252977775, "learning_rate": 9.641097653943474e-06, "loss": 0.0036, "step": 63600 }, { "epoch": 0.4184785826595528, "grad_norm": 0.03923638997553522, "learning_rate": 9.640884035160923e-06, "loss": 0.0019, "step": 63610 }, { "epoch": 0.41854437083478613, "grad_norm": 0.07596489769962304, "learning_rate": 9.640670355192321e-06, "loss": 0.0019, "step": 63620 }, { "epoch": 0.4186101590100195, "grad_norm": 0.07138852042705598, "learning_rate": 9.64045661404049e-06, "loss": 0.0022, "step": 63630 }, { "epoch": 0.4186759471852529, "grad_norm": 0.06969285004253357, "learning_rate": 9.640242811708247e-06, "loss": 0.0027, "step": 63640 }, { "epoch": 0.4187417353604863, "grad_norm": 0.0328438001516701, "learning_rate": 9.64002894819841e-06, "loss": 0.0026, "step": 63650 }, { "epoch": 0.4188075235357197, "grad_norm": 0.05499010013044857, "learning_rate": 9.639815023513797e-06, "loss": 0.0018, "step": 63660 }, { "epoch": 0.4188733117109531, "grad_norm": 0.20880238239822113, "learning_rate": 9.639601037657231e-06, "loss": 0.0024, "step": 63670 }, { "epoch": 0.41893909988618644, "grad_norm": 0.018462283326204403, "learning_rate": 9.639386990631533e-06, "loss": 0.0021, "step": 63680 }, { "epoch": 0.41900488806141983, "grad_norm": 0.04475993467871905, "learning_rate": 9.639172882439523e-06, "loss": 0.0029, "step": 63690 }, { "epoch": 0.41907067623665323, "grad_norm": 0.04543543304382611, "learning_rate": 9.638958713084027e-06, "loss": 0.002, "step": 63700 }, { "epoch": 0.4191364644118866, "grad_norm": 0.07952458312934546, "learning_rate": 9.638744482567865e-06, "loss": 0.003, "step": 63710 }, { "epoch": 0.41920225258712, "grad_norm": 0.006187009641269426, "learning_rate": 9.638530190893865e-06, "loss": 0.0013, "step": 63720 }, { "epoch": 0.41926804076235336, "grad_norm": 0.07198152371138632, "learning_rate": 9.638315838064849e-06, "loss": 0.0019, "step": 63730 }, { "epoch": 0.41933382893758675, "grad_norm": 0.11669321250280906, "learning_rate": 9.638101424083646e-06, "loss": 0.0021, "step": 63740 }, { "epoch": 0.41939961711282014, "grad_norm": 0.10609903145717925, "learning_rate": 9.63788694895308e-06, "loss": 0.0023, "step": 63750 }, { "epoch": 0.41946540528805354, "grad_norm": 0.04791418449880836, "learning_rate": 9.637672412675981e-06, "loss": 0.0018, "step": 63760 }, { "epoch": 0.41953119346328693, "grad_norm": 0.22113617771270944, "learning_rate": 9.637457815255176e-06, "loss": 0.0045, "step": 63770 }, { "epoch": 0.41959698163852027, "grad_norm": 0.045640841884008757, "learning_rate": 9.637243156693496e-06, "loss": 0.0011, "step": 63780 }, { "epoch": 0.41966276981375367, "grad_norm": 0.016081606363606154, "learning_rate": 9.63702843699377e-06, "loss": 0.0006, "step": 63790 }, { "epoch": 0.41972855798898706, "grad_norm": 0.0050281081364424605, "learning_rate": 9.636813656158827e-06, "loss": 0.0018, "step": 63800 }, { "epoch": 0.41979434616422046, "grad_norm": 0.07010712512316006, "learning_rate": 9.636598814191503e-06, "loss": 0.0036, "step": 63810 }, { "epoch": 0.41986013433945385, "grad_norm": 0.027703737302294182, "learning_rate": 9.636383911094625e-06, "loss": 0.0017, "step": 63820 }, { "epoch": 0.4199259225146872, "grad_norm": 0.03608401110927954, "learning_rate": 9.636168946871032e-06, "loss": 0.0019, "step": 63830 }, { "epoch": 0.4199917106899206, "grad_norm": 0.10473672102202766, "learning_rate": 9.635953921523554e-06, "loss": 0.002, "step": 63840 }, { "epoch": 0.420057498865154, "grad_norm": 0.048118805910618324, "learning_rate": 9.635738835055028e-06, "loss": 0.0031, "step": 63850 }, { "epoch": 0.42012328704038737, "grad_norm": 0.048674977701445396, "learning_rate": 9.635523687468288e-06, "loss": 0.0015, "step": 63860 }, { "epoch": 0.42018907521562077, "grad_norm": 0.003244109888443465, "learning_rate": 9.635308478766174e-06, "loss": 0.0011, "step": 63870 }, { "epoch": 0.4202548633908541, "grad_norm": 0.06558169916549005, "learning_rate": 9.63509320895152e-06, "loss": 0.0022, "step": 63880 }, { "epoch": 0.4203206515660875, "grad_norm": 0.008768610445636737, "learning_rate": 9.634877878027164e-06, "loss": 0.0022, "step": 63890 }, { "epoch": 0.4203864397413209, "grad_norm": 0.015367938827400259, "learning_rate": 9.634662485995949e-06, "loss": 0.0012, "step": 63900 }, { "epoch": 0.4204522279165543, "grad_norm": 0.0530732169745951, "learning_rate": 9.63444703286071e-06, "loss": 0.0027, "step": 63910 }, { "epoch": 0.4205180160917877, "grad_norm": 0.08487105869575812, "learning_rate": 9.634231518624289e-06, "loss": 0.0027, "step": 63920 }, { "epoch": 0.420583804267021, "grad_norm": 0.029828120989753088, "learning_rate": 9.634015943289528e-06, "loss": 0.0015, "step": 63930 }, { "epoch": 0.4206495924422544, "grad_norm": 0.017114806826170864, "learning_rate": 9.633800306859268e-06, "loss": 0.0026, "step": 63940 }, { "epoch": 0.4207153806174878, "grad_norm": 0.19735473343903348, "learning_rate": 9.633584609336354e-06, "loss": 0.0009, "step": 63950 }, { "epoch": 0.4207811687927212, "grad_norm": 0.07702770678491648, "learning_rate": 9.63336885072363e-06, "loss": 0.0016, "step": 63960 }, { "epoch": 0.4208469569679546, "grad_norm": 0.12539805130223325, "learning_rate": 9.633153031023938e-06, "loss": 0.0016, "step": 63970 }, { "epoch": 0.42091274514318794, "grad_norm": 0.02946248447445889, "learning_rate": 9.632937150240126e-06, "loss": 0.0013, "step": 63980 }, { "epoch": 0.42097853331842133, "grad_norm": 0.24723162726764983, "learning_rate": 9.632721208375037e-06, "loss": 0.0042, "step": 63990 }, { "epoch": 0.4210443214936547, "grad_norm": 0.03147447093196186, "learning_rate": 9.63250520543152e-06, "loss": 0.001, "step": 64000 }, { "epoch": 0.4211101096688881, "grad_norm": 0.05405337959394601, "learning_rate": 9.632289141412426e-06, "loss": 0.0015, "step": 64010 }, { "epoch": 0.4211758978441215, "grad_norm": 0.10877322331299726, "learning_rate": 9.632073016320599e-06, "loss": 0.0015, "step": 64020 }, { "epoch": 0.42124168601935486, "grad_norm": 0.032624125408263954, "learning_rate": 9.631856830158889e-06, "loss": 0.0027, "step": 64030 }, { "epoch": 0.42130747419458825, "grad_norm": 0.2997236906982161, "learning_rate": 9.631640582930147e-06, "loss": 0.0014, "step": 64040 }, { "epoch": 0.42137326236982164, "grad_norm": 0.04097166128846874, "learning_rate": 9.631424274637222e-06, "loss": 0.0024, "step": 64050 }, { "epoch": 0.42143905054505504, "grad_norm": 0.033295487277837314, "learning_rate": 9.63120790528297e-06, "loss": 0.0033, "step": 64060 }, { "epoch": 0.42150483872028843, "grad_norm": 0.0931212270777652, "learning_rate": 9.630991474870242e-06, "loss": 0.0019, "step": 64070 }, { "epoch": 0.42157062689552177, "grad_norm": 0.19258097559173443, "learning_rate": 9.63077498340189e-06, "loss": 0.0051, "step": 64080 }, { "epoch": 0.42163641507075517, "grad_norm": 0.26424962847507805, "learning_rate": 9.63055843088077e-06, "loss": 0.0021, "step": 64090 }, { "epoch": 0.42170220324598856, "grad_norm": 0.1014727625338054, "learning_rate": 9.630341817309737e-06, "loss": 0.0024, "step": 64100 }, { "epoch": 0.42176799142122195, "grad_norm": 0.10142317684205672, "learning_rate": 9.630125142691644e-06, "loss": 0.0015, "step": 64110 }, { "epoch": 0.42183377959645535, "grad_norm": 0.11588043100117487, "learning_rate": 9.629908407029351e-06, "loss": 0.0018, "step": 64120 }, { "epoch": 0.42189956777168874, "grad_norm": 0.030084840062726975, "learning_rate": 9.629691610325715e-06, "loss": 0.0043, "step": 64130 }, { "epoch": 0.4219653559469221, "grad_norm": 0.06321808240780642, "learning_rate": 9.629474752583592e-06, "loss": 0.0015, "step": 64140 }, { "epoch": 0.4220311441221555, "grad_norm": 0.024831631026715827, "learning_rate": 9.629257833805844e-06, "loss": 0.0018, "step": 64150 }, { "epoch": 0.42209693229738887, "grad_norm": 0.04361902896380559, "learning_rate": 9.629040853995328e-06, "loss": 0.0019, "step": 64160 }, { "epoch": 0.42216272047262227, "grad_norm": 0.01637264519158295, "learning_rate": 9.628823813154909e-06, "loss": 0.0037, "step": 64170 }, { "epoch": 0.42222850864785566, "grad_norm": 0.09164328618038545, "learning_rate": 9.628606711287443e-06, "loss": 0.0031, "step": 64180 }, { "epoch": 0.422294296823089, "grad_norm": 0.19664805225443743, "learning_rate": 9.628389548395795e-06, "loss": 0.0011, "step": 64190 }, { "epoch": 0.4223600849983224, "grad_norm": 0.05315101815809665, "learning_rate": 9.62817232448283e-06, "loss": 0.0019, "step": 64200 }, { "epoch": 0.4224258731735558, "grad_norm": 0.19517787826240626, "learning_rate": 9.62795503955141e-06, "loss": 0.0027, "step": 64210 }, { "epoch": 0.4224916613487892, "grad_norm": 0.02631248922612493, "learning_rate": 9.627737693604398e-06, "loss": 0.001, "step": 64220 }, { "epoch": 0.4225574495240226, "grad_norm": 0.059514143213151786, "learning_rate": 9.627520286644662e-06, "loss": 0.0014, "step": 64230 }, { "epoch": 0.4226232376992559, "grad_norm": 0.1093542405552706, "learning_rate": 9.627302818675069e-06, "loss": 0.0017, "step": 64240 }, { "epoch": 0.4226890258744893, "grad_norm": 0.17224089483268126, "learning_rate": 9.627085289698484e-06, "loss": 0.003, "step": 64250 }, { "epoch": 0.4227548140497227, "grad_norm": 0.1816014043628193, "learning_rate": 9.626867699717774e-06, "loss": 0.0032, "step": 64260 }, { "epoch": 0.4228206022249561, "grad_norm": 0.06942594555247442, "learning_rate": 9.626650048735811e-06, "loss": 0.0025, "step": 64270 }, { "epoch": 0.4228863904001895, "grad_norm": 0.14961285103968067, "learning_rate": 9.626432336755464e-06, "loss": 0.003, "step": 64280 }, { "epoch": 0.42295217857542283, "grad_norm": 0.19896314321156292, "learning_rate": 9.626214563779601e-06, "loss": 0.0035, "step": 64290 }, { "epoch": 0.4230179667506562, "grad_norm": 0.06350854041413428, "learning_rate": 9.625996729811093e-06, "loss": 0.0019, "step": 64300 }, { "epoch": 0.4230837549258896, "grad_norm": 0.03311403425126497, "learning_rate": 9.625778834852815e-06, "loss": 0.0023, "step": 64310 }, { "epoch": 0.423149543101123, "grad_norm": 0.2480203529562255, "learning_rate": 9.625560878907638e-06, "loss": 0.0036, "step": 64320 }, { "epoch": 0.4232153312763564, "grad_norm": 0.1326860257709707, "learning_rate": 9.625342861978438e-06, "loss": 0.0028, "step": 64330 }, { "epoch": 0.42328111945158975, "grad_norm": 0.010566788763927429, "learning_rate": 9.625124784068083e-06, "loss": 0.0014, "step": 64340 }, { "epoch": 0.42334690762682314, "grad_norm": 0.09122388502282322, "learning_rate": 9.624906645179454e-06, "loss": 0.0027, "step": 64350 }, { "epoch": 0.42341269580205654, "grad_norm": 0.1219847954273057, "learning_rate": 9.624688445315426e-06, "loss": 0.0018, "step": 64360 }, { "epoch": 0.42347848397728993, "grad_norm": 0.006305778120812874, "learning_rate": 9.624470184478874e-06, "loss": 0.0008, "step": 64370 }, { "epoch": 0.4235442721525233, "grad_norm": 0.27104387663269186, "learning_rate": 9.624251862672675e-06, "loss": 0.0048, "step": 64380 }, { "epoch": 0.42361006032775667, "grad_norm": 0.16234875495669274, "learning_rate": 9.624033479899713e-06, "loss": 0.0019, "step": 64390 }, { "epoch": 0.42367584850299006, "grad_norm": 0.02485037173924506, "learning_rate": 9.62381503616286e-06, "loss": 0.0013, "step": 64400 }, { "epoch": 0.42374163667822345, "grad_norm": 0.015172221155524654, "learning_rate": 9.623596531465e-06, "loss": 0.0032, "step": 64410 }, { "epoch": 0.42380742485345685, "grad_norm": 0.04677019032170218, "learning_rate": 9.623377965809014e-06, "loss": 0.0021, "step": 64420 }, { "epoch": 0.42387321302869024, "grad_norm": 0.030820195485729843, "learning_rate": 9.62315933919778e-06, "loss": 0.0055, "step": 64430 }, { "epoch": 0.4239390012039236, "grad_norm": 0.045202105091300016, "learning_rate": 9.622940651634185e-06, "loss": 0.0027, "step": 64440 }, { "epoch": 0.424004789379157, "grad_norm": 0.13904856355809153, "learning_rate": 9.622721903121108e-06, "loss": 0.0019, "step": 64450 }, { "epoch": 0.42407057755439037, "grad_norm": 0.004222675641602416, "learning_rate": 9.622503093661437e-06, "loss": 0.0013, "step": 64460 }, { "epoch": 0.42413636572962377, "grad_norm": 0.08098080088083777, "learning_rate": 9.622284223258056e-06, "loss": 0.0028, "step": 64470 }, { "epoch": 0.42420215390485716, "grad_norm": 0.02193553796994573, "learning_rate": 9.622065291913847e-06, "loss": 0.0013, "step": 64480 }, { "epoch": 0.4242679420800905, "grad_norm": 0.04743329041969691, "learning_rate": 9.6218462996317e-06, "loss": 0.0026, "step": 64490 }, { "epoch": 0.4243337302553239, "grad_norm": 0.08446543206794796, "learning_rate": 9.621627246414502e-06, "loss": 0.0017, "step": 64500 }, { "epoch": 0.4243995184305573, "grad_norm": 0.07240353064665336, "learning_rate": 9.62140813226514e-06, "loss": 0.0018, "step": 64510 }, { "epoch": 0.4244653066057907, "grad_norm": 0.17729164399114666, "learning_rate": 9.621188957186503e-06, "loss": 0.0031, "step": 64520 }, { "epoch": 0.4245310947810241, "grad_norm": 0.09806237539344619, "learning_rate": 9.62096972118148e-06, "loss": 0.0012, "step": 64530 }, { "epoch": 0.4245968829562574, "grad_norm": 0.06184984434657449, "learning_rate": 9.620750424252963e-06, "loss": 0.001, "step": 64540 }, { "epoch": 0.4246626711314908, "grad_norm": 0.0520843942334336, "learning_rate": 9.620531066403843e-06, "loss": 0.0019, "step": 64550 }, { "epoch": 0.4247284593067242, "grad_norm": 0.026793451799125713, "learning_rate": 9.620311647637009e-06, "loss": 0.0021, "step": 64560 }, { "epoch": 0.4247942474819576, "grad_norm": 0.0300404539386389, "learning_rate": 9.620092167955359e-06, "loss": 0.0023, "step": 64570 }, { "epoch": 0.424860035657191, "grad_norm": 0.11391770666564692, "learning_rate": 9.619872627361782e-06, "loss": 0.0018, "step": 64580 }, { "epoch": 0.4249258238324244, "grad_norm": 0.02620206712946585, "learning_rate": 9.619653025859174e-06, "loss": 0.0041, "step": 64590 }, { "epoch": 0.4249916120076577, "grad_norm": 0.04463616626145178, "learning_rate": 9.619433363450432e-06, "loss": 0.0037, "step": 64600 }, { "epoch": 0.4250574001828911, "grad_norm": 0.07760618158043041, "learning_rate": 9.619213640138451e-06, "loss": 0.003, "step": 64610 }, { "epoch": 0.4251231883581245, "grad_norm": 0.31634484058688245, "learning_rate": 9.618993855926127e-06, "loss": 0.0034, "step": 64620 }, { "epoch": 0.4251889765333579, "grad_norm": 0.042022196919299606, "learning_rate": 9.618774010816359e-06, "loss": 0.0029, "step": 64630 }, { "epoch": 0.4252547647085913, "grad_norm": 0.03574764928462866, "learning_rate": 9.618554104812044e-06, "loss": 0.0033, "step": 64640 }, { "epoch": 0.42532055288382464, "grad_norm": 0.10964266402617644, "learning_rate": 9.618334137916082e-06, "loss": 0.0019, "step": 64650 }, { "epoch": 0.42538634105905804, "grad_norm": 0.2545036309909349, "learning_rate": 9.618114110131373e-06, "loss": 0.0018, "step": 64660 }, { "epoch": 0.42545212923429143, "grad_norm": 0.07738303364792529, "learning_rate": 9.617894021460818e-06, "loss": 0.0021, "step": 64670 }, { "epoch": 0.4255179174095248, "grad_norm": 0.07367521434634394, "learning_rate": 9.617673871907319e-06, "loss": 0.0035, "step": 64680 }, { "epoch": 0.4255837055847582, "grad_norm": 0.08594604940098113, "learning_rate": 9.617453661473777e-06, "loss": 0.0032, "step": 64690 }, { "epoch": 0.42564949375999156, "grad_norm": 0.10192853433907735, "learning_rate": 9.617233390163096e-06, "loss": 0.0026, "step": 64700 }, { "epoch": 0.42571528193522495, "grad_norm": 0.07028737601415225, "learning_rate": 9.617013057978182e-06, "loss": 0.0014, "step": 64710 }, { "epoch": 0.42578107011045835, "grad_norm": 0.14649653844327912, "learning_rate": 9.616792664921938e-06, "loss": 0.0023, "step": 64720 }, { "epoch": 0.42584685828569174, "grad_norm": 0.035151000679197415, "learning_rate": 9.61657221099727e-06, "loss": 0.0047, "step": 64730 }, { "epoch": 0.42591264646092514, "grad_norm": 0.10908436492268281, "learning_rate": 9.616351696207085e-06, "loss": 0.0036, "step": 64740 }, { "epoch": 0.4259784346361585, "grad_norm": 0.06021432264295575, "learning_rate": 9.616131120554289e-06, "loss": 0.004, "step": 64750 }, { "epoch": 0.42604422281139187, "grad_norm": 0.004092348076617836, "learning_rate": 9.61591048404179e-06, "loss": 0.0028, "step": 64760 }, { "epoch": 0.42611001098662526, "grad_norm": 0.13130354354784335, "learning_rate": 9.6156897866725e-06, "loss": 0.0023, "step": 64770 }, { "epoch": 0.42617579916185866, "grad_norm": 0.09563445417972788, "learning_rate": 9.615469028449324e-06, "loss": 0.0037, "step": 64780 }, { "epoch": 0.42624158733709205, "grad_norm": 0.20580930464515776, "learning_rate": 9.615248209375176e-06, "loss": 0.004, "step": 64790 }, { "epoch": 0.4263073755123254, "grad_norm": 0.04572450995815952, "learning_rate": 9.615027329452968e-06, "loss": 0.0036, "step": 64800 }, { "epoch": 0.4263731636875588, "grad_norm": 0.08393184817014021, "learning_rate": 9.61480638868561e-06, "loss": 0.0016, "step": 64810 }, { "epoch": 0.4264389518627922, "grad_norm": 0.1683728047972824, "learning_rate": 9.614585387076013e-06, "loss": 0.001, "step": 64820 }, { "epoch": 0.4265047400380256, "grad_norm": 0.10544982391730498, "learning_rate": 9.614364324627095e-06, "loss": 0.0017, "step": 64830 }, { "epoch": 0.42657052821325897, "grad_norm": 0.07157630992265514, "learning_rate": 9.614143201341769e-06, "loss": 0.0015, "step": 64840 }, { "epoch": 0.4266363163884923, "grad_norm": 0.026861446377373924, "learning_rate": 9.61392201722295e-06, "loss": 0.0026, "step": 64850 }, { "epoch": 0.4267021045637257, "grad_norm": 0.09301057481545104, "learning_rate": 9.613700772273552e-06, "loss": 0.0024, "step": 64860 }, { "epoch": 0.4267678927389591, "grad_norm": 0.008140643616916035, "learning_rate": 9.613479466496495e-06, "loss": 0.0023, "step": 64870 }, { "epoch": 0.4268336809141925, "grad_norm": 0.18170264773018469, "learning_rate": 9.613258099894697e-06, "loss": 0.0044, "step": 64880 }, { "epoch": 0.4268994690894259, "grad_norm": 0.11261494624892789, "learning_rate": 9.613036672471074e-06, "loss": 0.0025, "step": 64890 }, { "epoch": 0.4269652572646592, "grad_norm": 0.05936367404478823, "learning_rate": 9.612815184228548e-06, "loss": 0.0025, "step": 64900 }, { "epoch": 0.4270310454398926, "grad_norm": 0.13737979022878205, "learning_rate": 9.612593635170035e-06, "loss": 0.0037, "step": 64910 }, { "epoch": 0.427096833615126, "grad_norm": 0.08598390996995899, "learning_rate": 9.61237202529846e-06, "loss": 0.0019, "step": 64920 }, { "epoch": 0.4271626217903594, "grad_norm": 0.059048092613005254, "learning_rate": 9.612150354616744e-06, "loss": 0.003, "step": 64930 }, { "epoch": 0.4272284099655928, "grad_norm": 0.09028005364215945, "learning_rate": 9.61192862312781e-06, "loss": 0.0021, "step": 64940 }, { "epoch": 0.42729419814082614, "grad_norm": 0.05813339187468323, "learning_rate": 9.611706830834577e-06, "loss": 0.0012, "step": 64950 }, { "epoch": 0.42735998631605954, "grad_norm": 0.03252958619457243, "learning_rate": 9.611484977739972e-06, "loss": 0.0025, "step": 64960 }, { "epoch": 0.42742577449129293, "grad_norm": 0.005893693896990203, "learning_rate": 9.611263063846924e-06, "loss": 0.0024, "step": 64970 }, { "epoch": 0.4274915626665263, "grad_norm": 0.08180193686415774, "learning_rate": 9.611041089158353e-06, "loss": 0.0014, "step": 64980 }, { "epoch": 0.4275573508417597, "grad_norm": 0.08959880041436603, "learning_rate": 9.610819053677188e-06, "loss": 0.0019, "step": 64990 }, { "epoch": 0.42762313901699306, "grad_norm": 0.11740328625302156, "learning_rate": 9.610596957406356e-06, "loss": 0.0014, "step": 65000 }, { "epoch": 0.42768892719222645, "grad_norm": 0.15074320384446477, "learning_rate": 9.610374800348785e-06, "loss": 0.0021, "step": 65010 }, { "epoch": 0.42775471536745985, "grad_norm": 0.03636867877157791, "learning_rate": 9.610152582507404e-06, "loss": 0.002, "step": 65020 }, { "epoch": 0.42782050354269324, "grad_norm": 0.060609340013340784, "learning_rate": 9.609930303885141e-06, "loss": 0.0016, "step": 65030 }, { "epoch": 0.42788629171792664, "grad_norm": 0.05318607372134851, "learning_rate": 9.60970796448493e-06, "loss": 0.0014, "step": 65040 }, { "epoch": 0.42795207989316003, "grad_norm": 0.2172841878165417, "learning_rate": 9.609485564309701e-06, "loss": 0.0032, "step": 65050 }, { "epoch": 0.42801786806839337, "grad_norm": 0.13634534009328322, "learning_rate": 9.609263103362387e-06, "loss": 0.0029, "step": 65060 }, { "epoch": 0.42808365624362676, "grad_norm": 0.03140868921905068, "learning_rate": 9.609040581645919e-06, "loss": 0.0017, "step": 65070 }, { "epoch": 0.42814944441886016, "grad_norm": 0.13876119766465814, "learning_rate": 9.608817999163231e-06, "loss": 0.0026, "step": 65080 }, { "epoch": 0.42821523259409355, "grad_norm": 0.04476232256240213, "learning_rate": 9.608595355917257e-06, "loss": 0.0019, "step": 65090 }, { "epoch": 0.42828102076932695, "grad_norm": 0.012890139242185576, "learning_rate": 9.608372651910935e-06, "loss": 0.0019, "step": 65100 }, { "epoch": 0.4283468089445603, "grad_norm": 0.17111554034667656, "learning_rate": 9.6081498871472e-06, "loss": 0.0018, "step": 65110 }, { "epoch": 0.4284125971197937, "grad_norm": 0.09734759357037207, "learning_rate": 9.607927061628988e-06, "loss": 0.0019, "step": 65120 }, { "epoch": 0.4284783852950271, "grad_norm": 0.05206668540683622, "learning_rate": 9.607704175359236e-06, "loss": 0.0017, "step": 65130 }, { "epoch": 0.42854417347026047, "grad_norm": 0.03514930082247765, "learning_rate": 9.607481228340884e-06, "loss": 0.0016, "step": 65140 }, { "epoch": 0.42860996164549386, "grad_norm": 0.16099717539341687, "learning_rate": 9.607258220576873e-06, "loss": 0.0024, "step": 65150 }, { "epoch": 0.4286757498207272, "grad_norm": 0.0895593497456098, "learning_rate": 9.607035152070141e-06, "loss": 0.001, "step": 65160 }, { "epoch": 0.4287415379959606, "grad_norm": 0.035695431697143856, "learning_rate": 9.60681202282363e-06, "loss": 0.003, "step": 65170 }, { "epoch": 0.428807326171194, "grad_norm": 0.023913920841123634, "learning_rate": 9.60658883284028e-06, "loss": 0.0017, "step": 65180 }, { "epoch": 0.4288731143464274, "grad_norm": 0.12261463725784927, "learning_rate": 9.606365582123035e-06, "loss": 0.0021, "step": 65190 }, { "epoch": 0.4289389025216608, "grad_norm": 0.09131404275574667, "learning_rate": 9.60614227067484e-06, "loss": 0.0021, "step": 65200 }, { "epoch": 0.4290046906968941, "grad_norm": 0.04303049977402392, "learning_rate": 9.605918898498633e-06, "loss": 0.0029, "step": 65210 }, { "epoch": 0.4290704788721275, "grad_norm": 0.04736676740053262, "learning_rate": 9.605695465597367e-06, "loss": 0.0015, "step": 65220 }, { "epoch": 0.4291362670473609, "grad_norm": 0.04220440708958109, "learning_rate": 9.605471971973984e-06, "loss": 0.002, "step": 65230 }, { "epoch": 0.4292020552225943, "grad_norm": 0.08285738933566025, "learning_rate": 9.605248417631428e-06, "loss": 0.0017, "step": 65240 }, { "epoch": 0.4292678433978277, "grad_norm": 0.10786143861306305, "learning_rate": 9.605024802572651e-06, "loss": 0.0035, "step": 65250 }, { "epoch": 0.42933363157306104, "grad_norm": 0.11595279298304442, "learning_rate": 9.604801126800598e-06, "loss": 0.0019, "step": 65260 }, { "epoch": 0.42939941974829443, "grad_norm": 0.03810828669126058, "learning_rate": 9.60457739031822e-06, "loss": 0.0024, "step": 65270 }, { "epoch": 0.4294652079235278, "grad_norm": 0.1067959120377414, "learning_rate": 9.604353593128466e-06, "loss": 0.0012, "step": 65280 }, { "epoch": 0.4295309960987612, "grad_norm": 0.044790446689329785, "learning_rate": 9.604129735234286e-06, "loss": 0.0013, "step": 65290 }, { "epoch": 0.4295967842739946, "grad_norm": 0.10077886698259678, "learning_rate": 9.603905816638632e-06, "loss": 0.0032, "step": 65300 }, { "epoch": 0.42966257244922795, "grad_norm": 0.07115868311429714, "learning_rate": 9.603681837344455e-06, "loss": 0.004, "step": 65310 }, { "epoch": 0.42972836062446135, "grad_norm": 0.09697232904809024, "learning_rate": 9.60345779735471e-06, "loss": 0.0024, "step": 65320 }, { "epoch": 0.42979414879969474, "grad_norm": 0.03936349321149416, "learning_rate": 9.60323369667235e-06, "loss": 0.0026, "step": 65330 }, { "epoch": 0.42985993697492814, "grad_norm": 0.12143061180281162, "learning_rate": 9.603009535300329e-06, "loss": 0.0021, "step": 65340 }, { "epoch": 0.42992572515016153, "grad_norm": 0.03898482049164498, "learning_rate": 9.602785313241601e-06, "loss": 0.0018, "step": 65350 }, { "epoch": 0.42999151332539487, "grad_norm": 0.0009655453128549049, "learning_rate": 9.602561030499127e-06, "loss": 0.0014, "step": 65360 }, { "epoch": 0.43005730150062826, "grad_norm": 0.02999571433588557, "learning_rate": 9.602336687075859e-06, "loss": 0.0019, "step": 65370 }, { "epoch": 0.43012308967586166, "grad_norm": 0.24524741454734367, "learning_rate": 9.602112282974756e-06, "loss": 0.0013, "step": 65380 }, { "epoch": 0.43018887785109505, "grad_norm": 0.006750146208759565, "learning_rate": 9.601887818198778e-06, "loss": 0.0013, "step": 65390 }, { "epoch": 0.43025466602632845, "grad_norm": 0.04596909594447533, "learning_rate": 9.601663292750884e-06, "loss": 0.002, "step": 65400 }, { "epoch": 0.4303204542015618, "grad_norm": 0.052446998223441406, "learning_rate": 9.601438706634033e-06, "loss": 0.0023, "step": 65410 }, { "epoch": 0.4303862423767952, "grad_norm": 0.06259060223600753, "learning_rate": 9.601214059851188e-06, "loss": 0.0024, "step": 65420 }, { "epoch": 0.4304520305520286, "grad_norm": 0.06555433321305836, "learning_rate": 9.600989352405308e-06, "loss": 0.0021, "step": 65430 }, { "epoch": 0.43051781872726197, "grad_norm": 0.09214455025321716, "learning_rate": 9.600764584299357e-06, "loss": 0.0023, "step": 65440 }, { "epoch": 0.43058360690249536, "grad_norm": 0.06588191749244336, "learning_rate": 9.6005397555363e-06, "loss": 0.0015, "step": 65450 }, { "epoch": 0.4306493950777287, "grad_norm": 0.0390546513021913, "learning_rate": 9.600314866119098e-06, "loss": 0.0038, "step": 65460 }, { "epoch": 0.4307151832529621, "grad_norm": 0.10213638196901938, "learning_rate": 9.60008991605072e-06, "loss": 0.0032, "step": 65470 }, { "epoch": 0.4307809714281955, "grad_norm": 0.2994169879857362, "learning_rate": 9.599864905334126e-06, "loss": 0.003, "step": 65480 }, { "epoch": 0.4308467596034289, "grad_norm": 0.2256909419591845, "learning_rate": 9.599639833972287e-06, "loss": 0.0031, "step": 65490 }, { "epoch": 0.4309125477786623, "grad_norm": 0.076284853071343, "learning_rate": 9.59941470196817e-06, "loss": 0.0036, "step": 65500 }, { "epoch": 0.4309783359538956, "grad_norm": 0.02523610426207533, "learning_rate": 9.599189509324743e-06, "loss": 0.0014, "step": 65510 }, { "epoch": 0.431044124129129, "grad_norm": 0.06101321728286597, "learning_rate": 9.598964256044974e-06, "loss": 0.0013, "step": 65520 }, { "epoch": 0.4311099123043624, "grad_norm": 0.031093502970349116, "learning_rate": 9.598738942131834e-06, "loss": 0.0035, "step": 65530 }, { "epoch": 0.4311757004795958, "grad_norm": 0.11155366820221564, "learning_rate": 9.598513567588291e-06, "loss": 0.0016, "step": 65540 }, { "epoch": 0.4312414886548292, "grad_norm": 0.058394895652812466, "learning_rate": 9.59828813241732e-06, "loss": 0.0028, "step": 65550 }, { "epoch": 0.4313072768300626, "grad_norm": 0.09012203106571029, "learning_rate": 9.598062636621889e-06, "loss": 0.0027, "step": 65560 }, { "epoch": 0.43137306500529593, "grad_norm": 0.05745876707816943, "learning_rate": 9.597837080204975e-06, "loss": 0.0008, "step": 65570 }, { "epoch": 0.4314388531805293, "grad_norm": 0.005557065974748572, "learning_rate": 9.59761146316955e-06, "loss": 0.0029, "step": 65580 }, { "epoch": 0.4315046413557627, "grad_norm": 0.04031114965035131, "learning_rate": 9.597385785518587e-06, "loss": 0.0012, "step": 65590 }, { "epoch": 0.4315704295309961, "grad_norm": 0.06602603469923908, "learning_rate": 9.597160047255064e-06, "loss": 0.0023, "step": 65600 }, { "epoch": 0.4316362177062295, "grad_norm": 0.18428306346769216, "learning_rate": 9.596934248381957e-06, "loss": 0.0024, "step": 65610 }, { "epoch": 0.43170200588146285, "grad_norm": 0.12110109068547144, "learning_rate": 9.596708388902241e-06, "loss": 0.0015, "step": 65620 }, { "epoch": 0.43176779405669624, "grad_norm": 0.4998035280331614, "learning_rate": 9.596482468818895e-06, "loss": 0.004, "step": 65630 }, { "epoch": 0.43183358223192964, "grad_norm": 0.05284624727251149, "learning_rate": 9.596256488134899e-06, "loss": 0.0035, "step": 65640 }, { "epoch": 0.43189937040716303, "grad_norm": 0.17603891059406118, "learning_rate": 9.596030446853229e-06, "loss": 0.003, "step": 65650 }, { "epoch": 0.4319651585823964, "grad_norm": 0.25667539871816974, "learning_rate": 9.595804344976866e-06, "loss": 0.0024, "step": 65660 }, { "epoch": 0.43203094675762976, "grad_norm": 0.05903029743278189, "learning_rate": 9.595578182508795e-06, "loss": 0.0018, "step": 65670 }, { "epoch": 0.43209673493286316, "grad_norm": 0.069280860424273, "learning_rate": 9.595351959451992e-06, "loss": 0.0023, "step": 65680 }, { "epoch": 0.43216252310809655, "grad_norm": 0.051619068735063244, "learning_rate": 9.595125675809444e-06, "loss": 0.0026, "step": 65690 }, { "epoch": 0.43222831128332995, "grad_norm": 0.016743970965694525, "learning_rate": 9.594899331584131e-06, "loss": 0.0014, "step": 65700 }, { "epoch": 0.43229409945856334, "grad_norm": 0.12371074800242977, "learning_rate": 9.59467292677904e-06, "loss": 0.0027, "step": 65710 }, { "epoch": 0.4323598876337967, "grad_norm": 0.14936851984029867, "learning_rate": 9.594446461397154e-06, "loss": 0.0026, "step": 65720 }, { "epoch": 0.4324256758090301, "grad_norm": 0.04657088128567086, "learning_rate": 9.59421993544146e-06, "loss": 0.0012, "step": 65730 }, { "epoch": 0.43249146398426347, "grad_norm": 0.027058149084067163, "learning_rate": 9.593993348914942e-06, "loss": 0.0026, "step": 65740 }, { "epoch": 0.43255725215949686, "grad_norm": 0.045339009456945446, "learning_rate": 9.593766701820593e-06, "loss": 0.0011, "step": 65750 }, { "epoch": 0.43262304033473026, "grad_norm": 0.05407313804061301, "learning_rate": 9.593539994161395e-06, "loss": 0.0012, "step": 65760 }, { "epoch": 0.4326888285099636, "grad_norm": 0.03799856312643619, "learning_rate": 9.593313225940339e-06, "loss": 0.0032, "step": 65770 }, { "epoch": 0.432754616685197, "grad_norm": 0.003944700058119788, "learning_rate": 9.593086397160416e-06, "loss": 0.0022, "step": 65780 }, { "epoch": 0.4328204048604304, "grad_norm": 0.10035146647300683, "learning_rate": 9.592859507824616e-06, "loss": 0.0021, "step": 65790 }, { "epoch": 0.4328861930356638, "grad_norm": 0.22305529055646983, "learning_rate": 9.59263255793593e-06, "loss": 0.0025, "step": 65800 }, { "epoch": 0.4329519812108972, "grad_norm": 0.034523205421621385, "learning_rate": 9.59240554749735e-06, "loss": 0.0033, "step": 65810 }, { "epoch": 0.4330177693861305, "grad_norm": 0.11156693990539097, "learning_rate": 9.592178476511868e-06, "loss": 0.0032, "step": 65820 }, { "epoch": 0.4330835575613639, "grad_norm": 0.022053363189003517, "learning_rate": 9.59195134498248e-06, "loss": 0.0022, "step": 65830 }, { "epoch": 0.4331493457365973, "grad_norm": 0.14061856045135465, "learning_rate": 9.59172415291218e-06, "loss": 0.0033, "step": 65840 }, { "epoch": 0.4332151339118307, "grad_norm": 0.08821238886682167, "learning_rate": 9.591496900303963e-06, "loss": 0.0023, "step": 65850 }, { "epoch": 0.4332809220870641, "grad_norm": 0.1190212710291426, "learning_rate": 9.591269587160823e-06, "loss": 0.0015, "step": 65860 }, { "epoch": 0.43334671026229743, "grad_norm": 0.04148377694401424, "learning_rate": 9.591042213485759e-06, "loss": 0.0034, "step": 65870 }, { "epoch": 0.4334124984375308, "grad_norm": 0.10766929122498131, "learning_rate": 9.59081477928177e-06, "loss": 0.0019, "step": 65880 }, { "epoch": 0.4334782866127642, "grad_norm": 0.1941711201710614, "learning_rate": 9.590587284551852e-06, "loss": 0.0035, "step": 65890 }, { "epoch": 0.4335440747879976, "grad_norm": 0.09320673448491064, "learning_rate": 9.590359729299006e-06, "loss": 0.0024, "step": 65900 }, { "epoch": 0.433609862963231, "grad_norm": 0.04680708390563473, "learning_rate": 9.590132113526231e-06, "loss": 0.0015, "step": 65910 }, { "epoch": 0.43367565113846435, "grad_norm": 0.0030728973004236055, "learning_rate": 9.589904437236527e-06, "loss": 0.002, "step": 65920 }, { "epoch": 0.43374143931369774, "grad_norm": 0.11683651990405108, "learning_rate": 9.5896767004329e-06, "loss": 0.0016, "step": 65930 }, { "epoch": 0.43380722748893114, "grad_norm": 0.158673357372992, "learning_rate": 9.589448903118348e-06, "loss": 0.0016, "step": 65940 }, { "epoch": 0.43387301566416453, "grad_norm": 0.07741975765573472, "learning_rate": 9.589221045295875e-06, "loss": 0.0031, "step": 65950 }, { "epoch": 0.4339388038393979, "grad_norm": 0.011935801341683832, "learning_rate": 9.588993126968487e-06, "loss": 0.0012, "step": 65960 }, { "epoch": 0.43400459201463126, "grad_norm": 0.14427778630746588, "learning_rate": 9.588765148139188e-06, "loss": 0.0046, "step": 65970 }, { "epoch": 0.43407038018986466, "grad_norm": 0.044970114599792804, "learning_rate": 9.588537108810983e-06, "loss": 0.0019, "step": 65980 }, { "epoch": 0.43413616836509805, "grad_norm": 0.08077987252388256, "learning_rate": 9.58830900898688e-06, "loss": 0.0035, "step": 65990 }, { "epoch": 0.43420195654033145, "grad_norm": 0.484784428070887, "learning_rate": 9.588080848669885e-06, "loss": 0.0012, "step": 66000 }, { "epoch": 0.43426774471556484, "grad_norm": 0.0464388281371957, "learning_rate": 9.587852627863008e-06, "loss": 0.0015, "step": 66010 }, { "epoch": 0.43433353289079824, "grad_norm": 0.058519232808160855, "learning_rate": 9.587624346569255e-06, "loss": 0.0012, "step": 66020 }, { "epoch": 0.4343993210660316, "grad_norm": 0.05209676198216927, "learning_rate": 9.587396004791636e-06, "loss": 0.0018, "step": 66030 }, { "epoch": 0.43446510924126497, "grad_norm": 0.01198352102388928, "learning_rate": 9.587167602533165e-06, "loss": 0.0015, "step": 66040 }, { "epoch": 0.43453089741649836, "grad_norm": 0.043346434163912984, "learning_rate": 9.58693913979685e-06, "loss": 0.0023, "step": 66050 }, { "epoch": 0.43459668559173176, "grad_norm": 0.03797954873069289, "learning_rate": 9.586710616585704e-06, "loss": 0.0012, "step": 66060 }, { "epoch": 0.43466247376696515, "grad_norm": 0.05640176533428298, "learning_rate": 9.586482032902739e-06, "loss": 0.0024, "step": 66070 }, { "epoch": 0.4347282619421985, "grad_norm": 0.05878646144810438, "learning_rate": 9.58625338875097e-06, "loss": 0.0017, "step": 66080 }, { "epoch": 0.4347940501174319, "grad_norm": 0.2808298515141012, "learning_rate": 9.58602468413341e-06, "loss": 0.0035, "step": 66090 }, { "epoch": 0.4348598382926653, "grad_norm": 0.0946609552332817, "learning_rate": 9.585795919053078e-06, "loss": 0.0033, "step": 66100 }, { "epoch": 0.4349256264678987, "grad_norm": 0.1052616571034463, "learning_rate": 9.585567093512984e-06, "loss": 0.0017, "step": 66110 }, { "epoch": 0.43499141464313207, "grad_norm": 0.08032101395547211, "learning_rate": 9.58533820751615e-06, "loss": 0.0011, "step": 66120 }, { "epoch": 0.4350572028183654, "grad_norm": 0.06378110225380487, "learning_rate": 9.585109261065592e-06, "loss": 0.003, "step": 66130 }, { "epoch": 0.4351229909935988, "grad_norm": 0.03660663006425482, "learning_rate": 9.584880254164329e-06, "loss": 0.0015, "step": 66140 }, { "epoch": 0.4351887791688322, "grad_norm": 0.03265877591108816, "learning_rate": 9.584651186815379e-06, "loss": 0.0011, "step": 66150 }, { "epoch": 0.4352545673440656, "grad_norm": 0.07697649225538496, "learning_rate": 9.584422059021763e-06, "loss": 0.0043, "step": 66160 }, { "epoch": 0.435320355519299, "grad_norm": 0.0349027074221733, "learning_rate": 9.5841928707865e-06, "loss": 0.0018, "step": 66170 }, { "epoch": 0.4353861436945323, "grad_norm": 0.0997030391390158, "learning_rate": 9.583963622112615e-06, "loss": 0.0022, "step": 66180 }, { "epoch": 0.4354519318697657, "grad_norm": 0.09371945145076475, "learning_rate": 9.583734313003129e-06, "loss": 0.0022, "step": 66190 }, { "epoch": 0.4355177200449991, "grad_norm": 0.04792797119936632, "learning_rate": 9.583504943461063e-06, "loss": 0.0015, "step": 66200 }, { "epoch": 0.4355835082202325, "grad_norm": 0.07484346950199584, "learning_rate": 9.583275513489443e-06, "loss": 0.0014, "step": 66210 }, { "epoch": 0.4356492963954659, "grad_norm": 0.028471961237725697, "learning_rate": 9.583046023091294e-06, "loss": 0.0019, "step": 66220 }, { "epoch": 0.43571508457069924, "grad_norm": 0.09001261378603004, "learning_rate": 9.582816472269643e-06, "loss": 0.0017, "step": 66230 }, { "epoch": 0.43578087274593263, "grad_norm": 0.13508440163646634, "learning_rate": 9.582586861027513e-06, "loss": 0.0025, "step": 66240 }, { "epoch": 0.43584666092116603, "grad_norm": 0.06918483027213085, "learning_rate": 9.582357189367933e-06, "loss": 0.0018, "step": 66250 }, { "epoch": 0.4359124490963994, "grad_norm": 0.0875316415513471, "learning_rate": 9.582127457293932e-06, "loss": 0.0019, "step": 66260 }, { "epoch": 0.4359782372716328, "grad_norm": 0.14462180224642163, "learning_rate": 9.581897664808538e-06, "loss": 0.0023, "step": 66270 }, { "epoch": 0.43604402544686616, "grad_norm": 0.10165942213947228, "learning_rate": 9.581667811914778e-06, "loss": 0.0018, "step": 66280 }, { "epoch": 0.43610981362209955, "grad_norm": 0.2092496061121443, "learning_rate": 9.581437898615687e-06, "loss": 0.0016, "step": 66290 }, { "epoch": 0.43617560179733295, "grad_norm": 0.16580724078550546, "learning_rate": 9.581207924914296e-06, "loss": 0.0031, "step": 66300 }, { "epoch": 0.43624138997256634, "grad_norm": 0.04783770039833391, "learning_rate": 9.580977890813631e-06, "loss": 0.002, "step": 66310 }, { "epoch": 0.43630717814779973, "grad_norm": 0.05966621906769507, "learning_rate": 9.580747796316732e-06, "loss": 0.0022, "step": 66320 }, { "epoch": 0.4363729663230331, "grad_norm": 0.15087405257575462, "learning_rate": 9.580517641426628e-06, "loss": 0.0028, "step": 66330 }, { "epoch": 0.43643875449826647, "grad_norm": 0.048500347846493055, "learning_rate": 9.580287426146355e-06, "loss": 0.0027, "step": 66340 }, { "epoch": 0.43650454267349986, "grad_norm": 0.10637552322911309, "learning_rate": 9.580057150478948e-06, "loss": 0.0023, "step": 66350 }, { "epoch": 0.43657033084873326, "grad_norm": 0.03180278841176591, "learning_rate": 9.579826814427444e-06, "loss": 0.0025, "step": 66360 }, { "epoch": 0.43663611902396665, "grad_norm": 0.07715690889110176, "learning_rate": 9.579596417994877e-06, "loss": 0.0019, "step": 66370 }, { "epoch": 0.4367019071992, "grad_norm": 0.14612114460996287, "learning_rate": 9.579365961184289e-06, "loss": 0.0017, "step": 66380 }, { "epoch": 0.4367676953744334, "grad_norm": 0.10897455082925235, "learning_rate": 9.579135443998713e-06, "loss": 0.0035, "step": 66390 }, { "epoch": 0.4368334835496668, "grad_norm": 0.013774827694080375, "learning_rate": 9.578904866441193e-06, "loss": 0.0014, "step": 66400 }, { "epoch": 0.4368992717249002, "grad_norm": 0.06065546851246945, "learning_rate": 9.578674228514765e-06, "loss": 0.002, "step": 66410 }, { "epoch": 0.43696505990013357, "grad_norm": 0.03475198690477698, "learning_rate": 9.578443530222475e-06, "loss": 0.0009, "step": 66420 }, { "epoch": 0.4370308480753669, "grad_norm": 0.05723612158287991, "learning_rate": 9.578212771567359e-06, "loss": 0.0019, "step": 66430 }, { "epoch": 0.4370966362506003, "grad_norm": 0.03782009038310287, "learning_rate": 9.577981952552461e-06, "loss": 0.0017, "step": 66440 }, { "epoch": 0.4371624244258337, "grad_norm": 0.049911087716078155, "learning_rate": 9.577751073180827e-06, "loss": 0.0031, "step": 66450 }, { "epoch": 0.4372282126010671, "grad_norm": 0.061939718698252745, "learning_rate": 9.577520133455498e-06, "loss": 0.0046, "step": 66460 }, { "epoch": 0.4372940007763005, "grad_norm": 0.07383229967603827, "learning_rate": 9.577289133379517e-06, "loss": 0.0015, "step": 66470 }, { "epoch": 0.4373597889515339, "grad_norm": 0.0627635128610348, "learning_rate": 9.577058072955934e-06, "loss": 0.0018, "step": 66480 }, { "epoch": 0.4374255771267672, "grad_norm": 0.1515939864041144, "learning_rate": 9.576826952187794e-06, "loss": 0.0021, "step": 66490 }, { "epoch": 0.4374913653020006, "grad_norm": 0.0987117944295397, "learning_rate": 9.576595771078143e-06, "loss": 0.0012, "step": 66500 }, { "epoch": 0.437557153477234, "grad_norm": 0.17040314242956908, "learning_rate": 9.576364529630029e-06, "loss": 0.0021, "step": 66510 }, { "epoch": 0.4376229416524674, "grad_norm": 0.06579364353963052, "learning_rate": 9.576133227846501e-06, "loss": 0.0014, "step": 66520 }, { "epoch": 0.4376887298277008, "grad_norm": 0.16976875502254318, "learning_rate": 9.575901865730609e-06, "loss": 0.0015, "step": 66530 }, { "epoch": 0.43775451800293413, "grad_norm": 0.11675641144613609, "learning_rate": 9.575670443285403e-06, "loss": 0.001, "step": 66540 }, { "epoch": 0.43782030617816753, "grad_norm": 0.11431042229987862, "learning_rate": 9.575438960513933e-06, "loss": 0.0038, "step": 66550 }, { "epoch": 0.4378860943534009, "grad_norm": 0.04803708689112525, "learning_rate": 9.575207417419253e-06, "loss": 0.0023, "step": 66560 }, { "epoch": 0.4379518825286343, "grad_norm": 0.023738327135567762, "learning_rate": 9.574975814004413e-06, "loss": 0.0018, "step": 66570 }, { "epoch": 0.4380176707038677, "grad_norm": 0.09050099664005568, "learning_rate": 9.57474415027247e-06, "loss": 0.0019, "step": 66580 }, { "epoch": 0.43808345887910105, "grad_norm": 0.1357456219939311, "learning_rate": 9.574512426226475e-06, "loss": 0.0036, "step": 66590 }, { "epoch": 0.43814924705433445, "grad_norm": 0.1220964692641408, "learning_rate": 9.574280641869485e-06, "loss": 0.0028, "step": 66600 }, { "epoch": 0.43821503522956784, "grad_norm": 0.05937599428566346, "learning_rate": 9.574048797204555e-06, "loss": 0.0029, "step": 66610 }, { "epoch": 0.43828082340480123, "grad_norm": 0.15380279234424446, "learning_rate": 9.573816892234742e-06, "loss": 0.0016, "step": 66620 }, { "epoch": 0.43834661158003463, "grad_norm": 0.04952436774448871, "learning_rate": 9.573584926963103e-06, "loss": 0.0027, "step": 66630 }, { "epoch": 0.43841239975526797, "grad_norm": 0.1187475826993716, "learning_rate": 9.573352901392699e-06, "loss": 0.0033, "step": 66640 }, { "epoch": 0.43847818793050136, "grad_norm": 0.17900486979725233, "learning_rate": 9.573120815526586e-06, "loss": 0.0019, "step": 66650 }, { "epoch": 0.43854397610573476, "grad_norm": 0.09954742798189149, "learning_rate": 9.572888669367824e-06, "loss": 0.0017, "step": 66660 }, { "epoch": 0.43860976428096815, "grad_norm": 0.026440248801708102, "learning_rate": 9.572656462919474e-06, "loss": 0.0017, "step": 66670 }, { "epoch": 0.43867555245620155, "grad_norm": 0.02047045273036792, "learning_rate": 9.572424196184598e-06, "loss": 0.0027, "step": 66680 }, { "epoch": 0.4387413406314349, "grad_norm": 0.15439451111484143, "learning_rate": 9.572191869166259e-06, "loss": 0.0023, "step": 66690 }, { "epoch": 0.4388071288066683, "grad_norm": 0.16358132770984718, "learning_rate": 9.571959481867517e-06, "loss": 0.0028, "step": 66700 }, { "epoch": 0.4388729169819017, "grad_norm": 0.08904572354002634, "learning_rate": 9.571727034291438e-06, "loss": 0.002, "step": 66710 }, { "epoch": 0.43893870515713507, "grad_norm": 0.10635244633182465, "learning_rate": 9.571494526441089e-06, "loss": 0.0044, "step": 66720 }, { "epoch": 0.43900449333236846, "grad_norm": 0.4585295232571708, "learning_rate": 9.571261958319532e-06, "loss": 0.002, "step": 66730 }, { "epoch": 0.4390702815076018, "grad_norm": 0.03723113643457704, "learning_rate": 9.571029329929832e-06, "loss": 0.0028, "step": 66740 }, { "epoch": 0.4391360696828352, "grad_norm": 0.09418566696179284, "learning_rate": 9.57079664127506e-06, "loss": 0.0018, "step": 66750 }, { "epoch": 0.4392018578580686, "grad_norm": 0.14663698204000533, "learning_rate": 9.570563892358282e-06, "loss": 0.0027, "step": 66760 }, { "epoch": 0.439267646033302, "grad_norm": 0.051820589629920165, "learning_rate": 9.570331083182564e-06, "loss": 0.0019, "step": 66770 }, { "epoch": 0.4393334342085354, "grad_norm": 0.07026775102429351, "learning_rate": 9.57009821375098e-06, "loss": 0.003, "step": 66780 }, { "epoch": 0.4393992223837687, "grad_norm": 0.005391109085980213, "learning_rate": 9.569865284066597e-06, "loss": 0.0016, "step": 66790 }, { "epoch": 0.4394650105590021, "grad_norm": 0.01370444779374196, "learning_rate": 9.569632294132489e-06, "loss": 0.0023, "step": 66800 }, { "epoch": 0.4395307987342355, "grad_norm": 0.05284458212878541, "learning_rate": 9.569399243951726e-06, "loss": 0.0033, "step": 66810 }, { "epoch": 0.4395965869094689, "grad_norm": 0.017428276398276724, "learning_rate": 9.569166133527377e-06, "loss": 0.0014, "step": 66820 }, { "epoch": 0.4396623750847023, "grad_norm": 0.03748741925707204, "learning_rate": 9.568932962862521e-06, "loss": 0.0019, "step": 66830 }, { "epoch": 0.43972816325993563, "grad_norm": 0.05728992187986889, "learning_rate": 9.568699731960229e-06, "loss": 0.0025, "step": 66840 }, { "epoch": 0.43979395143516903, "grad_norm": 0.04387383228152961, "learning_rate": 9.568466440823578e-06, "loss": 0.0011, "step": 66850 }, { "epoch": 0.4398597396104024, "grad_norm": 0.16485714034459803, "learning_rate": 9.56823308945564e-06, "loss": 0.0017, "step": 66860 }, { "epoch": 0.4399255277856358, "grad_norm": 0.10807716209817232, "learning_rate": 9.567999677859496e-06, "loss": 0.0017, "step": 66870 }, { "epoch": 0.4399913159608692, "grad_norm": 0.1324154256730623, "learning_rate": 9.567766206038222e-06, "loss": 0.0024, "step": 66880 }, { "epoch": 0.44005710413610255, "grad_norm": 0.11026249454862169, "learning_rate": 9.567532673994894e-06, "loss": 0.0027, "step": 66890 }, { "epoch": 0.44012289231133594, "grad_norm": 0.448123875914078, "learning_rate": 9.567299081732596e-06, "loss": 0.0025, "step": 66900 }, { "epoch": 0.44018868048656934, "grad_norm": 0.3258626606394224, "learning_rate": 9.567065429254401e-06, "loss": 0.005, "step": 66910 }, { "epoch": 0.44025446866180273, "grad_norm": 0.025406403710257854, "learning_rate": 9.566831716563393e-06, "loss": 0.0011, "step": 66920 }, { "epoch": 0.44032025683703613, "grad_norm": 0.2250476496992893, "learning_rate": 9.566597943662653e-06, "loss": 0.002, "step": 66930 }, { "epoch": 0.44038604501226947, "grad_norm": 0.07880499498785787, "learning_rate": 9.566364110555264e-06, "loss": 0.0014, "step": 66940 }, { "epoch": 0.44045183318750286, "grad_norm": 0.13571428145606734, "learning_rate": 9.566130217244307e-06, "loss": 0.002, "step": 66950 }, { "epoch": 0.44051762136273626, "grad_norm": 0.06948703876440383, "learning_rate": 9.565896263732866e-06, "loss": 0.0014, "step": 66960 }, { "epoch": 0.44058340953796965, "grad_norm": 0.031955161914282645, "learning_rate": 9.565662250024028e-06, "loss": 0.0018, "step": 66970 }, { "epoch": 0.44064919771320304, "grad_norm": 0.13101571030367, "learning_rate": 9.565428176120875e-06, "loss": 0.002, "step": 66980 }, { "epoch": 0.44071498588843644, "grad_norm": 0.08081525422750818, "learning_rate": 9.565194042026495e-06, "loss": 0.0013, "step": 66990 }, { "epoch": 0.4407807740636698, "grad_norm": 0.014167822092141936, "learning_rate": 9.564959847743976e-06, "loss": 0.0018, "step": 67000 }, { "epoch": 0.4408465622389032, "grad_norm": 0.10552634149202016, "learning_rate": 9.564725593276401e-06, "loss": 0.0022, "step": 67010 }, { "epoch": 0.44091235041413657, "grad_norm": 0.10244135170244822, "learning_rate": 9.564491278626863e-06, "loss": 0.0019, "step": 67020 }, { "epoch": 0.44097813858936996, "grad_norm": 0.14298626086848115, "learning_rate": 9.56425690379845e-06, "loss": 0.0031, "step": 67030 }, { "epoch": 0.44104392676460336, "grad_norm": 0.24657579099387752, "learning_rate": 9.56402246879425e-06, "loss": 0.0027, "step": 67040 }, { "epoch": 0.4411097149398367, "grad_norm": 0.056292744301928106, "learning_rate": 9.563787973617357e-06, "loss": 0.0027, "step": 67050 }, { "epoch": 0.4411755031150701, "grad_norm": 0.06325480349782897, "learning_rate": 9.56355341827086e-06, "loss": 0.0036, "step": 67060 }, { "epoch": 0.4412412912903035, "grad_norm": 0.04694248787934623, "learning_rate": 9.563318802757852e-06, "loss": 0.0029, "step": 67070 }, { "epoch": 0.4413070794655369, "grad_norm": 0.04278332847957484, "learning_rate": 9.563084127081428e-06, "loss": 0.0025, "step": 67080 }, { "epoch": 0.44137286764077027, "grad_norm": 0.05313985261737059, "learning_rate": 9.562849391244681e-06, "loss": 0.0011, "step": 67090 }, { "epoch": 0.4414386558160036, "grad_norm": 0.15676160693568517, "learning_rate": 9.562614595250705e-06, "loss": 0.0041, "step": 67100 }, { "epoch": 0.441504443991237, "grad_norm": 0.047436031798197345, "learning_rate": 9.562379739102596e-06, "loss": 0.0021, "step": 67110 }, { "epoch": 0.4415702321664704, "grad_norm": 0.11264575321606823, "learning_rate": 9.562144822803453e-06, "loss": 0.0019, "step": 67120 }, { "epoch": 0.4416360203417038, "grad_norm": 0.013247585254616692, "learning_rate": 9.561909846356366e-06, "loss": 0.0042, "step": 67130 }, { "epoch": 0.4417018085169372, "grad_norm": 0.0022454538924005962, "learning_rate": 9.56167480976444e-06, "loss": 0.0016, "step": 67140 }, { "epoch": 0.4417675966921705, "grad_norm": 0.03665399124213182, "learning_rate": 9.561439713030771e-06, "loss": 0.0021, "step": 67150 }, { "epoch": 0.4418333848674039, "grad_norm": 0.05137682659531436, "learning_rate": 9.56120455615846e-06, "loss": 0.0022, "step": 67160 }, { "epoch": 0.4418991730426373, "grad_norm": 0.018957692305322283, "learning_rate": 9.560969339150605e-06, "loss": 0.0014, "step": 67170 }, { "epoch": 0.4419649612178707, "grad_norm": 0.06132148770558805, "learning_rate": 9.560734062010309e-06, "loss": 0.001, "step": 67180 }, { "epoch": 0.4420307493931041, "grad_norm": 0.0727215836232255, "learning_rate": 9.560498724740672e-06, "loss": 0.0019, "step": 67190 }, { "epoch": 0.44209653756833744, "grad_norm": 0.1671432332955407, "learning_rate": 9.560263327344801e-06, "loss": 0.0018, "step": 67200 }, { "epoch": 0.44216232574357084, "grad_norm": 0.09605812758507118, "learning_rate": 9.560027869825795e-06, "loss": 0.0019, "step": 67210 }, { "epoch": 0.44222811391880423, "grad_norm": 0.1179429479846787, "learning_rate": 9.559792352186758e-06, "loss": 0.0021, "step": 67220 }, { "epoch": 0.4422939020940376, "grad_norm": 0.09643609817189028, "learning_rate": 9.559556774430798e-06, "loss": 0.0013, "step": 67230 }, { "epoch": 0.442359690269271, "grad_norm": 0.018560891267733665, "learning_rate": 9.559321136561021e-06, "loss": 0.0015, "step": 67240 }, { "epoch": 0.44242547844450436, "grad_norm": 0.045338500172745246, "learning_rate": 9.559085438580531e-06, "loss": 0.0016, "step": 67250 }, { "epoch": 0.44249126661973776, "grad_norm": 0.008973014090796066, "learning_rate": 9.558849680492438e-06, "loss": 0.0014, "step": 67260 }, { "epoch": 0.44255705479497115, "grad_norm": 0.15862474543883706, "learning_rate": 9.558613862299848e-06, "loss": 0.006, "step": 67270 }, { "epoch": 0.44262284297020454, "grad_norm": 0.050411054617115376, "learning_rate": 9.558377984005873e-06, "loss": 0.0035, "step": 67280 }, { "epoch": 0.44268863114543794, "grad_norm": 0.1908652956210443, "learning_rate": 9.558142045613621e-06, "loss": 0.0019, "step": 67290 }, { "epoch": 0.4427544193206713, "grad_norm": 0.07556425515510534, "learning_rate": 9.557906047126201e-06, "loss": 0.0032, "step": 67300 }, { "epoch": 0.44282020749590467, "grad_norm": 0.07455582343140117, "learning_rate": 9.557669988546727e-06, "loss": 0.003, "step": 67310 }, { "epoch": 0.44288599567113807, "grad_norm": 0.04591540403577187, "learning_rate": 9.557433869878311e-06, "loss": 0.0024, "step": 67320 }, { "epoch": 0.44295178384637146, "grad_norm": 0.08368896634527581, "learning_rate": 9.557197691124066e-06, "loss": 0.0016, "step": 67330 }, { "epoch": 0.44301757202160486, "grad_norm": 0.07807117082378615, "learning_rate": 9.556961452287105e-06, "loss": 0.0033, "step": 67340 }, { "epoch": 0.4430833601968382, "grad_norm": 0.03486796441136918, "learning_rate": 9.556725153370543e-06, "loss": 0.0018, "step": 67350 }, { "epoch": 0.4431491483720716, "grad_norm": 0.16378339661127514, "learning_rate": 9.556488794377494e-06, "loss": 0.0014, "step": 67360 }, { "epoch": 0.443214936547305, "grad_norm": 0.049099459833679734, "learning_rate": 9.556252375311077e-06, "loss": 0.0017, "step": 67370 }, { "epoch": 0.4432807247225384, "grad_norm": 0.07828553369889708, "learning_rate": 9.556015896174406e-06, "loss": 0.0017, "step": 67380 }, { "epoch": 0.44334651289777177, "grad_norm": 0.019210818999881068, "learning_rate": 9.5557793569706e-06, "loss": 0.0021, "step": 67390 }, { "epoch": 0.4434123010730051, "grad_norm": 0.06826873768084932, "learning_rate": 9.555542757702781e-06, "loss": 0.0014, "step": 67400 }, { "epoch": 0.4434780892482385, "grad_norm": 0.09091239985761497, "learning_rate": 9.555306098374062e-06, "loss": 0.0018, "step": 67410 }, { "epoch": 0.4435438774234719, "grad_norm": 0.022105930695985116, "learning_rate": 9.555069378987567e-06, "loss": 0.0055, "step": 67420 }, { "epoch": 0.4436096655987053, "grad_norm": 0.05688054118755604, "learning_rate": 9.554832599546418e-06, "loss": 0.0021, "step": 67430 }, { "epoch": 0.4436754537739387, "grad_norm": 0.035409452950001356, "learning_rate": 9.554595760053734e-06, "loss": 0.0022, "step": 67440 }, { "epoch": 0.4437412419491721, "grad_norm": 0.0140141099081741, "learning_rate": 9.554358860512636e-06, "loss": 0.0015, "step": 67450 }, { "epoch": 0.4438070301244054, "grad_norm": 0.02875305456372381, "learning_rate": 9.554121900926252e-06, "loss": 0.0028, "step": 67460 }, { "epoch": 0.4438728182996388, "grad_norm": 0.03631447786170256, "learning_rate": 9.553884881297703e-06, "loss": 0.0012, "step": 67470 }, { "epoch": 0.4439386064748722, "grad_norm": 0.05068290803034916, "learning_rate": 9.553647801630118e-06, "loss": 0.0031, "step": 67480 }, { "epoch": 0.4440043946501056, "grad_norm": 0.11004037882247826, "learning_rate": 9.553410661926617e-06, "loss": 0.002, "step": 67490 }, { "epoch": 0.444070182825339, "grad_norm": 0.0667826795295666, "learning_rate": 9.553173462190329e-06, "loss": 0.0012, "step": 67500 }, { "epoch": 0.44413597100057234, "grad_norm": 0.15648655644488108, "learning_rate": 9.55293620242438e-06, "loss": 0.0039, "step": 67510 }, { "epoch": 0.44420175917580573, "grad_norm": 0.10285962627357148, "learning_rate": 9.552698882631901e-06, "loss": 0.0022, "step": 67520 }, { "epoch": 0.4442675473510391, "grad_norm": 0.008605748091259878, "learning_rate": 9.552461502816019e-06, "loss": 0.0012, "step": 67530 }, { "epoch": 0.4443333355262725, "grad_norm": 0.07594634094410589, "learning_rate": 9.552224062979865e-06, "loss": 0.0023, "step": 67540 }, { "epoch": 0.4443991237015059, "grad_norm": 0.03910220528295549, "learning_rate": 9.551986563126566e-06, "loss": 0.0016, "step": 67550 }, { "epoch": 0.44446491187673925, "grad_norm": 0.06658386150359594, "learning_rate": 9.551749003259257e-06, "loss": 0.0021, "step": 67560 }, { "epoch": 0.44453070005197265, "grad_norm": 0.026257411642246473, "learning_rate": 9.551511383381068e-06, "loss": 0.003, "step": 67570 }, { "epoch": 0.44459648822720604, "grad_norm": 0.023729318270017764, "learning_rate": 9.551273703495133e-06, "loss": 0.0027, "step": 67580 }, { "epoch": 0.44466227640243944, "grad_norm": 0.03339704302872821, "learning_rate": 9.551035963604584e-06, "loss": 0.0024, "step": 67590 }, { "epoch": 0.44472806457767283, "grad_norm": 0.051616769164656594, "learning_rate": 9.550798163712557e-06, "loss": 0.0012, "step": 67600 }, { "epoch": 0.44479385275290617, "grad_norm": 0.006407867743943877, "learning_rate": 9.550560303822187e-06, "loss": 0.0022, "step": 67610 }, { "epoch": 0.44485964092813957, "grad_norm": 0.13580907557808422, "learning_rate": 9.550322383936607e-06, "loss": 0.0023, "step": 67620 }, { "epoch": 0.44492542910337296, "grad_norm": 0.0663108579691957, "learning_rate": 9.55008440405896e-06, "loss": 0.0019, "step": 67630 }, { "epoch": 0.44499121727860635, "grad_norm": 0.04097388091014878, "learning_rate": 9.549846364192377e-06, "loss": 0.0021, "step": 67640 }, { "epoch": 0.44505700545383975, "grad_norm": 0.07994019904359799, "learning_rate": 9.54960826434e-06, "loss": 0.0011, "step": 67650 }, { "epoch": 0.4451227936290731, "grad_norm": 0.05171496593542397, "learning_rate": 9.549370104504968e-06, "loss": 0.0016, "step": 67660 }, { "epoch": 0.4451885818043065, "grad_norm": 0.04623515818762153, "learning_rate": 9.54913188469042e-06, "loss": 0.0014, "step": 67670 }, { "epoch": 0.4452543699795399, "grad_norm": 0.04238541408804988, "learning_rate": 9.548893604899498e-06, "loss": 0.002, "step": 67680 }, { "epoch": 0.44532015815477327, "grad_norm": 0.06885725251357022, "learning_rate": 9.54865526513534e-06, "loss": 0.0013, "step": 67690 }, { "epoch": 0.44538594633000667, "grad_norm": 0.030455014620379577, "learning_rate": 9.548416865401093e-06, "loss": 0.0012, "step": 67700 }, { "epoch": 0.44545173450524, "grad_norm": 0.18115183713347005, "learning_rate": 9.548178405699897e-06, "loss": 0.0016, "step": 67710 }, { "epoch": 0.4455175226804734, "grad_norm": 0.035359691831441775, "learning_rate": 9.547939886034897e-06, "loss": 0.0021, "step": 67720 }, { "epoch": 0.4455833108557068, "grad_norm": 0.12005165794710854, "learning_rate": 9.547701306409237e-06, "loss": 0.0031, "step": 67730 }, { "epoch": 0.4456490990309402, "grad_norm": 0.05735792595241765, "learning_rate": 9.547462666826063e-06, "loss": 0.0027, "step": 67740 }, { "epoch": 0.4457148872061736, "grad_norm": 0.15348513657091437, "learning_rate": 9.54722396728852e-06, "loss": 0.0029, "step": 67750 }, { "epoch": 0.4457806753814069, "grad_norm": 0.1196325838609502, "learning_rate": 9.54698520779976e-06, "loss": 0.0019, "step": 67760 }, { "epoch": 0.4458464635566403, "grad_norm": 0.0350069819271661, "learning_rate": 9.546746388362925e-06, "loss": 0.0025, "step": 67770 }, { "epoch": 0.4459122517318737, "grad_norm": 0.05381014810533231, "learning_rate": 9.546507508981165e-06, "loss": 0.0017, "step": 67780 }, { "epoch": 0.4459780399071071, "grad_norm": 0.3711540105411636, "learning_rate": 9.546268569657629e-06, "loss": 0.0014, "step": 67790 }, { "epoch": 0.4460438280823405, "grad_norm": 0.03590587726165643, "learning_rate": 9.54602957039547e-06, "loss": 0.004, "step": 67800 }, { "epoch": 0.44610961625757384, "grad_norm": 0.07364897359378529, "learning_rate": 9.545790511197837e-06, "loss": 0.0033, "step": 67810 }, { "epoch": 0.44617540443280723, "grad_norm": 0.08594283823041024, "learning_rate": 9.545551392067882e-06, "loss": 0.0016, "step": 67820 }, { "epoch": 0.4462411926080406, "grad_norm": 0.08963407914956406, "learning_rate": 9.545312213008758e-06, "loss": 0.0023, "step": 67830 }, { "epoch": 0.446306980783274, "grad_norm": 0.03557578255886975, "learning_rate": 9.545072974023618e-06, "loss": 0.0026, "step": 67840 }, { "epoch": 0.4463727689585074, "grad_norm": 0.013407148291019468, "learning_rate": 9.544833675115617e-06, "loss": 0.0019, "step": 67850 }, { "epoch": 0.44643855713374075, "grad_norm": 0.07841268238677716, "learning_rate": 9.544594316287907e-06, "loss": 0.0014, "step": 67860 }, { "epoch": 0.44650434530897415, "grad_norm": 0.14348979544417723, "learning_rate": 9.544354897543648e-06, "loss": 0.0022, "step": 67870 }, { "epoch": 0.44657013348420754, "grad_norm": 0.08919702502949585, "learning_rate": 9.544115418885994e-06, "loss": 0.0022, "step": 67880 }, { "epoch": 0.44663592165944094, "grad_norm": 0.06539823120396351, "learning_rate": 9.543875880318103e-06, "loss": 0.0022, "step": 67890 }, { "epoch": 0.44670170983467433, "grad_norm": 0.14244761836034986, "learning_rate": 9.543636281843132e-06, "loss": 0.0016, "step": 67900 }, { "epoch": 0.4467674980099077, "grad_norm": 0.15512534764620772, "learning_rate": 9.54339662346424e-06, "loss": 0.0013, "step": 67910 }, { "epoch": 0.44683328618514107, "grad_norm": 0.43959420106124586, "learning_rate": 9.54315690518459e-06, "loss": 0.001, "step": 67920 }, { "epoch": 0.44689907436037446, "grad_norm": 0.25354267485061227, "learning_rate": 9.542917127007338e-06, "loss": 0.0034, "step": 67930 }, { "epoch": 0.44696486253560785, "grad_norm": 0.15404060974447872, "learning_rate": 9.542677288935649e-06, "loss": 0.0057, "step": 67940 }, { "epoch": 0.44703065071084125, "grad_norm": 0.5699071298912816, "learning_rate": 9.542437390972682e-06, "loss": 0.0033, "step": 67950 }, { "epoch": 0.44709643888607464, "grad_norm": 0.11893576952892516, "learning_rate": 9.5421974331216e-06, "loss": 0.0015, "step": 67960 }, { "epoch": 0.447162227061308, "grad_norm": 0.07156379530802433, "learning_rate": 9.541957415385569e-06, "loss": 0.0018, "step": 67970 }, { "epoch": 0.4472280152365414, "grad_norm": 0.10551087704284574, "learning_rate": 9.54171733776775e-06, "loss": 0.0015, "step": 67980 }, { "epoch": 0.44729380341177477, "grad_norm": 0.2132111390952546, "learning_rate": 9.541477200271313e-06, "loss": 0.0025, "step": 67990 }, { "epoch": 0.44735959158700817, "grad_norm": 0.043726278472778914, "learning_rate": 9.541237002899421e-06, "loss": 0.0012, "step": 68000 }, { "epoch": 0.44742537976224156, "grad_norm": 0.2247062804677952, "learning_rate": 9.540996745655243e-06, "loss": 0.0029, "step": 68010 }, { "epoch": 0.4474911679374749, "grad_norm": 0.03191226349961825, "learning_rate": 9.540756428541942e-06, "loss": 0.0022, "step": 68020 }, { "epoch": 0.4475569561127083, "grad_norm": 0.03629163486136588, "learning_rate": 9.540516051562688e-06, "loss": 0.0026, "step": 68030 }, { "epoch": 0.4476227442879417, "grad_norm": 0.09408042021541652, "learning_rate": 9.540275614720654e-06, "loss": 0.002, "step": 68040 }, { "epoch": 0.4476885324631751, "grad_norm": 0.010385366381979849, "learning_rate": 9.540035118019007e-06, "loss": 0.0021, "step": 68050 }, { "epoch": 0.4477543206384085, "grad_norm": 0.01681266824493082, "learning_rate": 9.539794561460917e-06, "loss": 0.0031, "step": 68060 }, { "epoch": 0.4478201088136418, "grad_norm": 0.0907594604395546, "learning_rate": 9.539553945049558e-06, "loss": 0.0022, "step": 68070 }, { "epoch": 0.4478858969888752, "grad_norm": 0.056825363443089374, "learning_rate": 9.5393132687881e-06, "loss": 0.0019, "step": 68080 }, { "epoch": 0.4479516851641086, "grad_norm": 0.027923808069734598, "learning_rate": 9.539072532679717e-06, "loss": 0.0011, "step": 68090 }, { "epoch": 0.448017473339342, "grad_norm": 0.07164426097317321, "learning_rate": 9.538831736727581e-06, "loss": 0.0017, "step": 68100 }, { "epoch": 0.4480832615145754, "grad_norm": 0.017132392812210306, "learning_rate": 9.53859088093487e-06, "loss": 0.0025, "step": 68110 }, { "epoch": 0.44814904968980873, "grad_norm": 0.010173703137939286, "learning_rate": 9.538349965304758e-06, "loss": 0.0034, "step": 68120 }, { "epoch": 0.4482148378650421, "grad_norm": 0.09888635686408973, "learning_rate": 9.538108989840422e-06, "loss": 0.002, "step": 68130 }, { "epoch": 0.4482806260402755, "grad_norm": 0.02611819311578178, "learning_rate": 9.537867954545039e-06, "loss": 0.0018, "step": 68140 }, { "epoch": 0.4483464142155089, "grad_norm": 0.022899456866029937, "learning_rate": 9.537626859421784e-06, "loss": 0.0023, "step": 68150 }, { "epoch": 0.4484122023907423, "grad_norm": 0.1298318146811805, "learning_rate": 9.537385704473838e-06, "loss": 0.0025, "step": 68160 }, { "epoch": 0.44847799056597565, "grad_norm": 0.15288598198655484, "learning_rate": 9.537144489704381e-06, "loss": 0.0034, "step": 68170 }, { "epoch": 0.44854377874120904, "grad_norm": 0.08248329079816812, "learning_rate": 9.536903215116594e-06, "loss": 0.0024, "step": 68180 }, { "epoch": 0.44860956691644244, "grad_norm": 0.25385371454868605, "learning_rate": 9.536661880713654e-06, "loss": 0.0025, "step": 68190 }, { "epoch": 0.44867535509167583, "grad_norm": 0.01349348545585881, "learning_rate": 9.536420486498746e-06, "loss": 0.0013, "step": 68200 }, { "epoch": 0.4487411432669092, "grad_norm": 0.07890883762965309, "learning_rate": 9.536179032475053e-06, "loss": 0.0015, "step": 68210 }, { "epoch": 0.44880693144214256, "grad_norm": 0.17816550255221514, "learning_rate": 9.535937518645755e-06, "loss": 0.0015, "step": 68220 }, { "epoch": 0.44887271961737596, "grad_norm": 0.0523768850934503, "learning_rate": 9.535695945014038e-06, "loss": 0.0046, "step": 68230 }, { "epoch": 0.44893850779260935, "grad_norm": 0.05148971527754055, "learning_rate": 9.53545431158309e-06, "loss": 0.0021, "step": 68240 }, { "epoch": 0.44900429596784275, "grad_norm": 0.10002858223148502, "learning_rate": 9.535212618356092e-06, "loss": 0.0013, "step": 68250 }, { "epoch": 0.44907008414307614, "grad_norm": 0.045760175879019906, "learning_rate": 9.534970865336234e-06, "loss": 0.0014, "step": 68260 }, { "epoch": 0.4491358723183095, "grad_norm": 0.0874019403404685, "learning_rate": 9.534729052526701e-06, "loss": 0.0035, "step": 68270 }, { "epoch": 0.4492016604935429, "grad_norm": 0.1333147718425304, "learning_rate": 9.53448717993068e-06, "loss": 0.0016, "step": 68280 }, { "epoch": 0.44926744866877627, "grad_norm": 0.026860263318962203, "learning_rate": 9.534245247551365e-06, "loss": 0.0012, "step": 68290 }, { "epoch": 0.44933323684400966, "grad_norm": 0.045542825504708624, "learning_rate": 9.534003255391942e-06, "loss": 0.0018, "step": 68300 }, { "epoch": 0.44939902501924306, "grad_norm": 0.026508636746100128, "learning_rate": 9.5337612034556e-06, "loss": 0.002, "step": 68310 }, { "epoch": 0.4494648131944764, "grad_norm": 0.7465826612988316, "learning_rate": 9.533519091745533e-06, "loss": 0.0019, "step": 68320 }, { "epoch": 0.4495306013697098, "grad_norm": 0.022059572677354276, "learning_rate": 9.53327692026493e-06, "loss": 0.002, "step": 68330 }, { "epoch": 0.4495963895449432, "grad_norm": 0.0710885978766826, "learning_rate": 9.53303468901699e-06, "loss": 0.002, "step": 68340 }, { "epoch": 0.4496621777201766, "grad_norm": 0.12922626375465163, "learning_rate": 9.532792398004901e-06, "loss": 0.0022, "step": 68350 }, { "epoch": 0.44972796589541, "grad_norm": 0.03915442762235547, "learning_rate": 9.532550047231859e-06, "loss": 0.004, "step": 68360 }, { "epoch": 0.44979375407064337, "grad_norm": 0.0593956250062097, "learning_rate": 9.532307636701059e-06, "loss": 0.0012, "step": 68370 }, { "epoch": 0.4498595422458767, "grad_norm": 0.04066477262614825, "learning_rate": 9.532065166415696e-06, "loss": 0.0017, "step": 68380 }, { "epoch": 0.4499253304211101, "grad_norm": 0.11866160438043433, "learning_rate": 9.53182263637897e-06, "loss": 0.0036, "step": 68390 }, { "epoch": 0.4499911185963435, "grad_norm": 0.08964532591764791, "learning_rate": 9.531580046594075e-06, "loss": 0.0027, "step": 68400 }, { "epoch": 0.4500569067715769, "grad_norm": 0.05310086532080124, "learning_rate": 9.531337397064212e-06, "loss": 0.0007, "step": 68410 }, { "epoch": 0.4501226949468103, "grad_norm": 0.04673417342440102, "learning_rate": 9.531094687792579e-06, "loss": 0.0019, "step": 68420 }, { "epoch": 0.4501884831220436, "grad_norm": 0.07997413016308494, "learning_rate": 9.530851918782375e-06, "loss": 0.0011, "step": 68430 }, { "epoch": 0.450254271297277, "grad_norm": 0.009106850674639963, "learning_rate": 9.530609090036801e-06, "loss": 0.0026, "step": 68440 }, { "epoch": 0.4503200594725104, "grad_norm": 0.07237319431600395, "learning_rate": 9.53036620155906e-06, "loss": 0.0034, "step": 68450 }, { "epoch": 0.4503858476477438, "grad_norm": 0.09273195070349838, "learning_rate": 9.530123253352354e-06, "loss": 0.0019, "step": 68460 }, { "epoch": 0.4504516358229772, "grad_norm": 0.18190805863396164, "learning_rate": 9.529880245419884e-06, "loss": 0.0019, "step": 68470 }, { "epoch": 0.45051742399821054, "grad_norm": 0.059211886750581706, "learning_rate": 9.529637177764855e-06, "loss": 0.0028, "step": 68480 }, { "epoch": 0.45058321217344394, "grad_norm": 0.20589835456826888, "learning_rate": 9.529394050390472e-06, "loss": 0.0017, "step": 68490 }, { "epoch": 0.45064900034867733, "grad_norm": 0.05684264863277865, "learning_rate": 9.529150863299942e-06, "loss": 0.0019, "step": 68500 }, { "epoch": 0.4507147885239107, "grad_norm": 0.03698504375422173, "learning_rate": 9.528907616496468e-06, "loss": 0.0029, "step": 68510 }, { "epoch": 0.4507805766991441, "grad_norm": 0.027991353389413613, "learning_rate": 9.52866430998326e-06, "loss": 0.0017, "step": 68520 }, { "epoch": 0.45084636487437746, "grad_norm": 0.03332742072607553, "learning_rate": 9.528420943763522e-06, "loss": 0.0006, "step": 68530 }, { "epoch": 0.45091215304961085, "grad_norm": 0.19485656774569432, "learning_rate": 9.528177517840467e-06, "loss": 0.0029, "step": 68540 }, { "epoch": 0.45097794122484425, "grad_norm": 0.09860253374504317, "learning_rate": 9.5279340322173e-06, "loss": 0.003, "step": 68550 }, { "epoch": 0.45104372940007764, "grad_norm": 0.009646245524467856, "learning_rate": 9.527690486897235e-06, "loss": 0.0015, "step": 68560 }, { "epoch": 0.45110951757531104, "grad_norm": 0.04117494499465269, "learning_rate": 9.527446881883481e-06, "loss": 0.0015, "step": 68570 }, { "epoch": 0.4511753057505444, "grad_norm": 0.13442342348567207, "learning_rate": 9.52720321717925e-06, "loss": 0.0026, "step": 68580 }, { "epoch": 0.45124109392577777, "grad_norm": 0.027332893523384572, "learning_rate": 9.526959492787755e-06, "loss": 0.002, "step": 68590 }, { "epoch": 0.45130688210101116, "grad_norm": 0.06230552737675487, "learning_rate": 9.526715708712207e-06, "loss": 0.0009, "step": 68600 }, { "epoch": 0.45137267027624456, "grad_norm": 0.15011571606284077, "learning_rate": 9.526471864955824e-06, "loss": 0.003, "step": 68610 }, { "epoch": 0.45143845845147795, "grad_norm": 0.03855035924221991, "learning_rate": 9.526227961521816e-06, "loss": 0.0039, "step": 68620 }, { "epoch": 0.4515042466267113, "grad_norm": 0.07510539587372526, "learning_rate": 9.525983998413403e-06, "loss": 0.0018, "step": 68630 }, { "epoch": 0.4515700348019447, "grad_norm": 0.021232183922394676, "learning_rate": 9.525739975633802e-06, "loss": 0.0017, "step": 68640 }, { "epoch": 0.4516358229771781, "grad_norm": 0.043812371965063865, "learning_rate": 9.525495893186226e-06, "loss": 0.0019, "step": 68650 }, { "epoch": 0.4517016111524115, "grad_norm": 0.0615942127911295, "learning_rate": 9.525251751073897e-06, "loss": 0.0021, "step": 68660 }, { "epoch": 0.45176739932764487, "grad_norm": 0.03925624328404877, "learning_rate": 9.52500754930003e-06, "loss": 0.0012, "step": 68670 }, { "epoch": 0.4518331875028782, "grad_norm": 0.1028690731645392, "learning_rate": 9.524763287867846e-06, "loss": 0.0022, "step": 68680 }, { "epoch": 0.4518989756781116, "grad_norm": 0.13073473819200448, "learning_rate": 9.524518966780568e-06, "loss": 0.0012, "step": 68690 }, { "epoch": 0.451964763853345, "grad_norm": 0.029294233598925513, "learning_rate": 9.524274586041414e-06, "loss": 0.0029, "step": 68700 }, { "epoch": 0.4520305520285784, "grad_norm": 0.027118430010386604, "learning_rate": 9.524030145653605e-06, "loss": 0.0026, "step": 68710 }, { "epoch": 0.4520963402038118, "grad_norm": 0.0038740231203471228, "learning_rate": 9.523785645620368e-06, "loss": 0.0036, "step": 68720 }, { "epoch": 0.4521621283790451, "grad_norm": 0.08227447953421313, "learning_rate": 9.523541085944925e-06, "loss": 0.0026, "step": 68730 }, { "epoch": 0.4522279165542785, "grad_norm": 0.0335275014765651, "learning_rate": 9.523296466630498e-06, "loss": 0.002, "step": 68740 }, { "epoch": 0.4522937047295119, "grad_norm": 0.01865101079967224, "learning_rate": 9.523051787680313e-06, "loss": 0.0017, "step": 68750 }, { "epoch": 0.4523594929047453, "grad_norm": 0.061894360251977566, "learning_rate": 9.522807049097598e-06, "loss": 0.0016, "step": 68760 }, { "epoch": 0.4524252810799787, "grad_norm": 0.009151803942695672, "learning_rate": 9.522562250885579e-06, "loss": 0.0016, "step": 68770 }, { "epoch": 0.45249106925521204, "grad_norm": 0.09860879571908586, "learning_rate": 9.52231739304748e-06, "loss": 0.0022, "step": 68780 }, { "epoch": 0.45255685743044544, "grad_norm": 0.02457768554123869, "learning_rate": 9.522072475586535e-06, "loss": 0.0013, "step": 68790 }, { "epoch": 0.45262264560567883, "grad_norm": 0.03165599642262333, "learning_rate": 9.521827498505967e-06, "loss": 0.0022, "step": 68800 }, { "epoch": 0.4526884337809122, "grad_norm": 0.03908470119700895, "learning_rate": 9.52158246180901e-06, "loss": 0.0009, "step": 68810 }, { "epoch": 0.4527542219561456, "grad_norm": 0.08117201678179431, "learning_rate": 9.521337365498894e-06, "loss": 0.0039, "step": 68820 }, { "epoch": 0.45282001013137896, "grad_norm": 0.10495171454181834, "learning_rate": 9.521092209578849e-06, "loss": 0.0015, "step": 68830 }, { "epoch": 0.45288579830661235, "grad_norm": 0.02966513660638548, "learning_rate": 9.520846994052108e-06, "loss": 0.0019, "step": 68840 }, { "epoch": 0.45295158648184575, "grad_norm": 0.04835580502908908, "learning_rate": 9.520601718921904e-06, "loss": 0.0019, "step": 68850 }, { "epoch": 0.45301737465707914, "grad_norm": 0.0725921004834644, "learning_rate": 9.520356384191469e-06, "loss": 0.0015, "step": 68860 }, { "epoch": 0.45308316283231254, "grad_norm": 0.059911620227359935, "learning_rate": 9.520110989864042e-06, "loss": 0.0023, "step": 68870 }, { "epoch": 0.45314895100754593, "grad_norm": 0.27188976611901694, "learning_rate": 9.519865535942855e-06, "loss": 0.0009, "step": 68880 }, { "epoch": 0.45321473918277927, "grad_norm": 0.10724885229405816, "learning_rate": 9.519620022431143e-06, "loss": 0.002, "step": 68890 }, { "epoch": 0.45328052735801266, "grad_norm": 0.04190745103979157, "learning_rate": 9.519374449332143e-06, "loss": 0.003, "step": 68900 }, { "epoch": 0.45334631553324606, "grad_norm": 0.06739394783851776, "learning_rate": 9.519128816649096e-06, "loss": 0.0015, "step": 68910 }, { "epoch": 0.45341210370847945, "grad_norm": 0.02373509650576971, "learning_rate": 9.518883124385238e-06, "loss": 0.0006, "step": 68920 }, { "epoch": 0.45347789188371285, "grad_norm": 0.06554376759217774, "learning_rate": 9.518637372543812e-06, "loss": 0.0015, "step": 68930 }, { "epoch": 0.4535436800589462, "grad_norm": 0.01758838893963159, "learning_rate": 9.51839156112805e-06, "loss": 0.0023, "step": 68940 }, { "epoch": 0.4536094682341796, "grad_norm": 0.030394315432271477, "learning_rate": 9.518145690141201e-06, "loss": 0.002, "step": 68950 }, { "epoch": 0.453675256409413, "grad_norm": 0.1702318104342654, "learning_rate": 9.517899759586502e-06, "loss": 0.0018, "step": 68960 }, { "epoch": 0.45374104458464637, "grad_norm": 0.024092318828371925, "learning_rate": 9.517653769467196e-06, "loss": 0.0026, "step": 68970 }, { "epoch": 0.45380683275987976, "grad_norm": 0.05102683441413813, "learning_rate": 9.517407719786527e-06, "loss": 0.0014, "step": 68980 }, { "epoch": 0.4538726209351131, "grad_norm": 0.12254562666038617, "learning_rate": 9.517161610547738e-06, "loss": 0.0027, "step": 68990 }, { "epoch": 0.4539384091103465, "grad_norm": 0.05236839632799052, "learning_rate": 9.516915441754075e-06, "loss": 0.002, "step": 69000 }, { "epoch": 0.4540041972855799, "grad_norm": 0.06443164109174036, "learning_rate": 9.516669213408784e-06, "loss": 0.0017, "step": 69010 }, { "epoch": 0.4540699854608133, "grad_norm": 0.06193145416337977, "learning_rate": 9.51642292551511e-06, "loss": 0.0016, "step": 69020 }, { "epoch": 0.4541357736360467, "grad_norm": 0.04551217278479747, "learning_rate": 9.5161765780763e-06, "loss": 0.002, "step": 69030 }, { "epoch": 0.45420156181128, "grad_norm": 0.10018728626472993, "learning_rate": 9.515930171095602e-06, "loss": 0.0017, "step": 69040 }, { "epoch": 0.4542673499865134, "grad_norm": 0.001863602951493131, "learning_rate": 9.515683704576266e-06, "loss": 0.0011, "step": 69050 }, { "epoch": 0.4543331381617468, "grad_norm": 0.05132505828280613, "learning_rate": 9.515437178521538e-06, "loss": 0.0023, "step": 69060 }, { "epoch": 0.4543989263369802, "grad_norm": 0.0422411868320261, "learning_rate": 9.515190592934673e-06, "loss": 0.0017, "step": 69070 }, { "epoch": 0.4544647145122136, "grad_norm": 0.08254285966786495, "learning_rate": 9.514943947818918e-06, "loss": 0.0022, "step": 69080 }, { "epoch": 0.45453050268744694, "grad_norm": 0.3464927798428846, "learning_rate": 9.514697243177527e-06, "loss": 0.0035, "step": 69090 }, { "epoch": 0.45459629086268033, "grad_norm": 0.02987302620077673, "learning_rate": 9.514450479013752e-06, "loss": 0.0031, "step": 69100 }, { "epoch": 0.4546620790379137, "grad_norm": 0.06482215683229602, "learning_rate": 9.514203655330847e-06, "loss": 0.0013, "step": 69110 }, { "epoch": 0.4547278672131471, "grad_norm": 0.034915097151230624, "learning_rate": 9.513956772132065e-06, "loss": 0.0017, "step": 69120 }, { "epoch": 0.4547936553883805, "grad_norm": 0.034083005524767264, "learning_rate": 9.51370982942066e-06, "loss": 0.0024, "step": 69130 }, { "epoch": 0.45485944356361385, "grad_norm": 0.0024992418150444344, "learning_rate": 9.51346282719989e-06, "loss": 0.003, "step": 69140 }, { "epoch": 0.45492523173884725, "grad_norm": 0.08642753305991532, "learning_rate": 9.513215765473012e-06, "loss": 0.0011, "step": 69150 }, { "epoch": 0.45499101991408064, "grad_norm": 0.04038291428413213, "learning_rate": 9.512968644243283e-06, "loss": 0.0014, "step": 69160 }, { "epoch": 0.45505680808931404, "grad_norm": 0.0013744832968036998, "learning_rate": 9.512721463513958e-06, "loss": 0.0016, "step": 69170 }, { "epoch": 0.45512259626454743, "grad_norm": 0.1468122465709776, "learning_rate": 9.512474223288298e-06, "loss": 0.0018, "step": 69180 }, { "epoch": 0.45518838443978077, "grad_norm": 0.0525011516146293, "learning_rate": 9.512226923569563e-06, "loss": 0.0023, "step": 69190 }, { "epoch": 0.45525417261501416, "grad_norm": 0.17680176144188742, "learning_rate": 9.511979564361013e-06, "loss": 0.0019, "step": 69200 }, { "epoch": 0.45531996079024756, "grad_norm": 0.14576466803034038, "learning_rate": 9.511732145665908e-06, "loss": 0.0022, "step": 69210 }, { "epoch": 0.45538574896548095, "grad_norm": 0.11679102801579257, "learning_rate": 9.511484667487514e-06, "loss": 0.0028, "step": 69220 }, { "epoch": 0.45545153714071435, "grad_norm": 0.023661459162481975, "learning_rate": 9.51123712982909e-06, "loss": 0.0022, "step": 69230 }, { "epoch": 0.4555173253159477, "grad_norm": 0.07726758523562331, "learning_rate": 9.5109895326939e-06, "loss": 0.0017, "step": 69240 }, { "epoch": 0.4555831134911811, "grad_norm": 0.03988937470276148, "learning_rate": 9.510741876085207e-06, "loss": 0.0017, "step": 69250 }, { "epoch": 0.4556489016664145, "grad_norm": 0.0029730852343716626, "learning_rate": 9.51049416000628e-06, "loss": 0.0019, "step": 69260 }, { "epoch": 0.45571468984164787, "grad_norm": 0.3504252529463463, "learning_rate": 9.510246384460384e-06, "loss": 0.0023, "step": 69270 }, { "epoch": 0.45578047801688126, "grad_norm": 0.03847905049789741, "learning_rate": 9.509998549450784e-06, "loss": 0.0023, "step": 69280 }, { "epoch": 0.4558462661921146, "grad_norm": 0.004434753046099827, "learning_rate": 9.509750654980747e-06, "loss": 0.0015, "step": 69290 }, { "epoch": 0.455912054367348, "grad_norm": 0.10986996281220056, "learning_rate": 9.509502701053544e-06, "loss": 0.0027, "step": 69300 }, { "epoch": 0.4559778425425814, "grad_norm": 0.23792620702867906, "learning_rate": 9.509254687672443e-06, "loss": 0.004, "step": 69310 }, { "epoch": 0.4560436307178148, "grad_norm": 0.11031396699595204, "learning_rate": 9.509006614840713e-06, "loss": 0.0024, "step": 69320 }, { "epoch": 0.4561094188930482, "grad_norm": 0.030843050987888806, "learning_rate": 9.508758482561624e-06, "loss": 0.0013, "step": 69330 }, { "epoch": 0.4561752070682816, "grad_norm": 0.19550489510067476, "learning_rate": 9.508510290838449e-06, "loss": 0.0023, "step": 69340 }, { "epoch": 0.4562409952435149, "grad_norm": 0.015962886478145782, "learning_rate": 9.508262039674459e-06, "loss": 0.0008, "step": 69350 }, { "epoch": 0.4563067834187483, "grad_norm": 0.06592442286460788, "learning_rate": 9.508013729072928e-06, "loss": 0.0016, "step": 69360 }, { "epoch": 0.4563725715939817, "grad_norm": 0.20455656083568421, "learning_rate": 9.50776535903713e-06, "loss": 0.0021, "step": 69370 }, { "epoch": 0.4564383597692151, "grad_norm": 0.0741362335922428, "learning_rate": 9.507516929570339e-06, "loss": 0.0029, "step": 69380 }, { "epoch": 0.4565041479444485, "grad_norm": 0.09531959855480984, "learning_rate": 9.50726844067583e-06, "loss": 0.0013, "step": 69390 }, { "epoch": 0.45656993611968183, "grad_norm": 0.13804080332453728, "learning_rate": 9.507019892356878e-06, "loss": 0.0021, "step": 69400 }, { "epoch": 0.4566357242949152, "grad_norm": 0.0832835017961367, "learning_rate": 9.506771284616763e-06, "loss": 0.0015, "step": 69410 }, { "epoch": 0.4567015124701486, "grad_norm": 0.0513401021032348, "learning_rate": 9.50652261745876e-06, "loss": 0.002, "step": 69420 }, { "epoch": 0.456767300645382, "grad_norm": 0.0011511753437228594, "learning_rate": 9.506273890886149e-06, "loss": 0.0011, "step": 69430 }, { "epoch": 0.4568330888206154, "grad_norm": 0.044958885253885715, "learning_rate": 9.506025104902208e-06, "loss": 0.0015, "step": 69440 }, { "epoch": 0.45689887699584875, "grad_norm": 0.08187333055295294, "learning_rate": 9.505776259510218e-06, "loss": 0.0037, "step": 69450 }, { "epoch": 0.45696466517108214, "grad_norm": 0.2954702324239876, "learning_rate": 9.505527354713459e-06, "loss": 0.0055, "step": 69460 }, { "epoch": 0.45703045334631553, "grad_norm": 0.20510112837726166, "learning_rate": 9.505278390515213e-06, "loss": 0.0011, "step": 69470 }, { "epoch": 0.45709624152154893, "grad_norm": 0.12354526051372088, "learning_rate": 9.505029366918763e-06, "loss": 0.0027, "step": 69480 }, { "epoch": 0.4571620296967823, "grad_norm": 0.015896583813927802, "learning_rate": 9.50478028392739e-06, "loss": 0.0012, "step": 69490 }, { "epoch": 0.45722781787201566, "grad_norm": 0.01838691314707945, "learning_rate": 9.504531141544383e-06, "loss": 0.0013, "step": 69500 }, { "epoch": 0.45729360604724906, "grad_norm": 0.011194683600148386, "learning_rate": 9.504281939773019e-06, "loss": 0.0047, "step": 69510 }, { "epoch": 0.45735939422248245, "grad_norm": 0.09831093973274313, "learning_rate": 9.50403267861659e-06, "loss": 0.0078, "step": 69520 }, { "epoch": 0.45742518239771585, "grad_norm": 0.11330703591986818, "learning_rate": 9.50378335807838e-06, "loss": 0.002, "step": 69530 }, { "epoch": 0.45749097057294924, "grad_norm": 0.11653228082251926, "learning_rate": 9.503533978161676e-06, "loss": 0.0022, "step": 69540 }, { "epoch": 0.4575567587481826, "grad_norm": 0.05984019672599051, "learning_rate": 9.503284538869766e-06, "loss": 0.003, "step": 69550 }, { "epoch": 0.457622546923416, "grad_norm": 0.04724230586925662, "learning_rate": 9.50303504020594e-06, "loss": 0.0019, "step": 69560 }, { "epoch": 0.45768833509864937, "grad_norm": 0.015733648259203103, "learning_rate": 9.502785482173484e-06, "loss": 0.0011, "step": 69570 }, { "epoch": 0.45775412327388276, "grad_norm": 0.16200347490614678, "learning_rate": 9.502535864775692e-06, "loss": 0.0028, "step": 69580 }, { "epoch": 0.45781991144911616, "grad_norm": 0.019574622981280154, "learning_rate": 9.502286188015853e-06, "loss": 0.0014, "step": 69590 }, { "epoch": 0.4578856996243495, "grad_norm": 0.029884874451792526, "learning_rate": 9.502036451897258e-06, "loss": 0.0025, "step": 69600 }, { "epoch": 0.4579514877995829, "grad_norm": 0.1431370055117117, "learning_rate": 9.5017866564232e-06, "loss": 0.0024, "step": 69610 }, { "epoch": 0.4580172759748163, "grad_norm": 0.06525395872867645, "learning_rate": 9.501536801596974e-06, "loss": 0.0015, "step": 69620 }, { "epoch": 0.4580830641500497, "grad_norm": 0.013325206434676945, "learning_rate": 9.501286887421874e-06, "loss": 0.0018, "step": 69630 }, { "epoch": 0.4581488523252831, "grad_norm": 0.14311074850684005, "learning_rate": 9.501036913901192e-06, "loss": 0.0027, "step": 69640 }, { "epoch": 0.4582146405005164, "grad_norm": 0.6252251852848308, "learning_rate": 9.500786881038226e-06, "loss": 0.0017, "step": 69650 }, { "epoch": 0.4582804286757498, "grad_norm": 0.0614789661461126, "learning_rate": 9.500536788836275e-06, "loss": 0.0023, "step": 69660 }, { "epoch": 0.4583462168509832, "grad_norm": 0.08653191112724196, "learning_rate": 9.50028663729863e-06, "loss": 0.0021, "step": 69670 }, { "epoch": 0.4584120050262166, "grad_norm": 0.05957275481294435, "learning_rate": 9.500036426428594e-06, "loss": 0.0012, "step": 69680 }, { "epoch": 0.45847779320145, "grad_norm": 0.03749605190523954, "learning_rate": 9.499786156229462e-06, "loss": 0.0025, "step": 69690 }, { "epoch": 0.45854358137668333, "grad_norm": 0.0776344015118646, "learning_rate": 9.499535826704538e-06, "loss": 0.0008, "step": 69700 }, { "epoch": 0.4586093695519167, "grad_norm": 0.06776265929546972, "learning_rate": 9.49928543785712e-06, "loss": 0.0022, "step": 69710 }, { "epoch": 0.4586751577271501, "grad_norm": 0.02660903881653944, "learning_rate": 9.49903498969051e-06, "loss": 0.0017, "step": 69720 }, { "epoch": 0.4587409459023835, "grad_norm": 0.055618060340927894, "learning_rate": 9.498784482208007e-06, "loss": 0.0018, "step": 69730 }, { "epoch": 0.4588067340776169, "grad_norm": 0.04624028531438066, "learning_rate": 9.498533915412917e-06, "loss": 0.0023, "step": 69740 }, { "epoch": 0.45887252225285025, "grad_norm": 0.028279277961944443, "learning_rate": 9.498283289308544e-06, "loss": 0.0013, "step": 69750 }, { "epoch": 0.45893831042808364, "grad_norm": 0.00823458027396209, "learning_rate": 9.498032603898188e-06, "loss": 0.0011, "step": 69760 }, { "epoch": 0.45900409860331703, "grad_norm": 0.19267209303848018, "learning_rate": 9.49778185918516e-06, "loss": 0.002, "step": 69770 }, { "epoch": 0.45906988677855043, "grad_norm": 0.029590814611622326, "learning_rate": 9.49753105517276e-06, "loss": 0.0038, "step": 69780 }, { "epoch": 0.4591356749537838, "grad_norm": 0.12192311850679205, "learning_rate": 9.4972801918643e-06, "loss": 0.0017, "step": 69790 }, { "epoch": 0.4592014631290172, "grad_norm": 0.0788190189915474, "learning_rate": 9.497029269263083e-06, "loss": 0.0019, "step": 69800 }, { "epoch": 0.45926725130425056, "grad_norm": 0.027103149114243927, "learning_rate": 9.49677828737242e-06, "loss": 0.0019, "step": 69810 }, { "epoch": 0.45933303947948395, "grad_norm": 0.024884657440027113, "learning_rate": 9.496527246195617e-06, "loss": 0.0016, "step": 69820 }, { "epoch": 0.45939882765471735, "grad_norm": 0.04826599280357352, "learning_rate": 9.496276145735988e-06, "loss": 0.0015, "step": 69830 }, { "epoch": 0.45946461582995074, "grad_norm": 0.0734691919662178, "learning_rate": 9.49602498599684e-06, "loss": 0.002, "step": 69840 }, { "epoch": 0.45953040400518413, "grad_norm": 0.048358324468014184, "learning_rate": 9.495773766981486e-06, "loss": 0.0013, "step": 69850 }, { "epoch": 0.4595961921804175, "grad_norm": 0.08482114613214145, "learning_rate": 9.495522488693236e-06, "loss": 0.002, "step": 69860 }, { "epoch": 0.45966198035565087, "grad_norm": 0.06488619855878927, "learning_rate": 9.495271151135407e-06, "loss": 0.0016, "step": 69870 }, { "epoch": 0.45972776853088426, "grad_norm": 0.11551263352530074, "learning_rate": 9.49501975431131e-06, "loss": 0.0028, "step": 69880 }, { "epoch": 0.45979355670611766, "grad_norm": 0.12104074983701306, "learning_rate": 9.494768298224258e-06, "loss": 0.0017, "step": 69890 }, { "epoch": 0.45985934488135105, "grad_norm": 0.02584464400511618, "learning_rate": 9.49451678287757e-06, "loss": 0.0033, "step": 69900 }, { "epoch": 0.4599251330565844, "grad_norm": 0.10130654867887522, "learning_rate": 9.494265208274557e-06, "loss": 0.0022, "step": 69910 }, { "epoch": 0.4599909212318178, "grad_norm": 0.05352180392417205, "learning_rate": 9.49401357441854e-06, "loss": 0.0007, "step": 69920 }, { "epoch": 0.4600567094070512, "grad_norm": 0.03465857041848351, "learning_rate": 9.493761881312837e-06, "loss": 0.0016, "step": 69930 }, { "epoch": 0.4601224975822846, "grad_norm": 0.009062141766550534, "learning_rate": 9.493510128960763e-06, "loss": 0.0019, "step": 69940 }, { "epoch": 0.46018828575751797, "grad_norm": 0.07582493780894285, "learning_rate": 9.493258317365642e-06, "loss": 0.002, "step": 69950 }, { "epoch": 0.4602540739327513, "grad_norm": 0.07295215211918056, "learning_rate": 9.493006446530786e-06, "loss": 0.0026, "step": 69960 }, { "epoch": 0.4603198621079847, "grad_norm": 0.0905711635135335, "learning_rate": 9.492754516459524e-06, "loss": 0.0026, "step": 69970 }, { "epoch": 0.4603856502832181, "grad_norm": 0.024314204314794405, "learning_rate": 9.492502527155171e-06, "loss": 0.0036, "step": 69980 }, { "epoch": 0.4604514384584515, "grad_norm": 0.04688185278714349, "learning_rate": 9.492250478621055e-06, "loss": 0.0014, "step": 69990 }, { "epoch": 0.4605172266336849, "grad_norm": 0.1922582662991144, "learning_rate": 9.491998370860494e-06, "loss": 0.0016, "step": 70000 }, { "epoch": 0.4605830148089182, "grad_norm": 0.035053822509660854, "learning_rate": 9.491746203876816e-06, "loss": 0.0018, "step": 70010 }, { "epoch": 0.4606488029841516, "grad_norm": 0.0720021415453095, "learning_rate": 9.49149397767334e-06, "loss": 0.0015, "step": 70020 }, { "epoch": 0.460714591159385, "grad_norm": 0.021995805324438074, "learning_rate": 9.491241692253399e-06, "loss": 0.003, "step": 70030 }, { "epoch": 0.4607803793346184, "grad_norm": 0.502043559042918, "learning_rate": 9.490989347620313e-06, "loss": 0.0027, "step": 70040 }, { "epoch": 0.4608461675098518, "grad_norm": 0.12742244199735286, "learning_rate": 9.490736943777412e-06, "loss": 0.0008, "step": 70050 }, { "epoch": 0.46091195568508514, "grad_norm": 0.033691636680344576, "learning_rate": 9.490484480728021e-06, "loss": 0.0017, "step": 70060 }, { "epoch": 0.46097774386031853, "grad_norm": 0.10351372092225106, "learning_rate": 9.490231958475474e-06, "loss": 0.0021, "step": 70070 }, { "epoch": 0.46104353203555193, "grad_norm": 0.4614544240573182, "learning_rate": 9.489979377023092e-06, "loss": 0.0016, "step": 70080 }, { "epoch": 0.4611093202107853, "grad_norm": 0.06025097201729872, "learning_rate": 9.489726736374214e-06, "loss": 0.0032, "step": 70090 }, { "epoch": 0.4611751083860187, "grad_norm": 0.0490836294893018, "learning_rate": 9.489474036532165e-06, "loss": 0.0028, "step": 70100 }, { "epoch": 0.46124089656125206, "grad_norm": 0.07122300965796911, "learning_rate": 9.489221277500279e-06, "loss": 0.0024, "step": 70110 }, { "epoch": 0.46130668473648545, "grad_norm": 0.047507229032801315, "learning_rate": 9.488968459281885e-06, "loss": 0.0023, "step": 70120 }, { "epoch": 0.46137247291171885, "grad_norm": 0.020716047181859022, "learning_rate": 9.48871558188032e-06, "loss": 0.0018, "step": 70130 }, { "epoch": 0.46143826108695224, "grad_norm": 0.017204444336241936, "learning_rate": 9.488462645298919e-06, "loss": 0.0016, "step": 70140 }, { "epoch": 0.46150404926218563, "grad_norm": 0.15389555231332222, "learning_rate": 9.488209649541012e-06, "loss": 0.0026, "step": 70150 }, { "epoch": 0.461569837437419, "grad_norm": 0.04542403769911311, "learning_rate": 9.487956594609937e-06, "loss": 0.0031, "step": 70160 }, { "epoch": 0.46163562561265237, "grad_norm": 0.056297918942532915, "learning_rate": 9.487703480509032e-06, "loss": 0.0012, "step": 70170 }, { "epoch": 0.46170141378788576, "grad_norm": 0.05794730342617834, "learning_rate": 9.487450307241632e-06, "loss": 0.0018, "step": 70180 }, { "epoch": 0.46176720196311916, "grad_norm": 0.023491099427133025, "learning_rate": 9.487197074811074e-06, "loss": 0.0024, "step": 70190 }, { "epoch": 0.46183299013835255, "grad_norm": 0.09128394953586533, "learning_rate": 9.486943783220699e-06, "loss": 0.0013, "step": 70200 }, { "epoch": 0.4618987783135859, "grad_norm": 0.014389886322464247, "learning_rate": 9.486690432473845e-06, "loss": 0.0039, "step": 70210 }, { "epoch": 0.4619645664888193, "grad_norm": 0.0349145533186782, "learning_rate": 9.486437022573854e-06, "loss": 0.0024, "step": 70220 }, { "epoch": 0.4620303546640527, "grad_norm": 0.048998677801328924, "learning_rate": 9.486183553524065e-06, "loss": 0.0031, "step": 70230 }, { "epoch": 0.4620961428392861, "grad_norm": 0.010326711499146764, "learning_rate": 9.485930025327819e-06, "loss": 0.0014, "step": 70240 }, { "epoch": 0.46216193101451947, "grad_norm": 0.006042642797568722, "learning_rate": 9.48567643798846e-06, "loss": 0.0015, "step": 70250 }, { "epoch": 0.46222771918975286, "grad_norm": 0.05090184448828973, "learning_rate": 9.485422791509332e-06, "loss": 0.0028, "step": 70260 }, { "epoch": 0.4622935073649862, "grad_norm": 0.014447810530770475, "learning_rate": 9.485169085893778e-06, "loss": 0.0016, "step": 70270 }, { "epoch": 0.4623592955402196, "grad_norm": 0.0152954702186925, "learning_rate": 9.484915321145145e-06, "loss": 0.0015, "step": 70280 }, { "epoch": 0.462425083715453, "grad_norm": 0.07027872679167003, "learning_rate": 9.484661497266774e-06, "loss": 0.0023, "step": 70290 }, { "epoch": 0.4624908718906864, "grad_norm": 0.07652715202382245, "learning_rate": 9.484407614262016e-06, "loss": 0.0008, "step": 70300 }, { "epoch": 0.4625566600659198, "grad_norm": 0.13206896746368385, "learning_rate": 9.484153672134218e-06, "loss": 0.0022, "step": 70310 }, { "epoch": 0.4626224482411531, "grad_norm": 0.16656102434314385, "learning_rate": 9.483899670886723e-06, "loss": 0.0014, "step": 70320 }, { "epoch": 0.4626882364163865, "grad_norm": 0.3941618381798595, "learning_rate": 9.483645610522886e-06, "loss": 0.0027, "step": 70330 }, { "epoch": 0.4627540245916199, "grad_norm": 0.11257456899928227, "learning_rate": 9.483391491046055e-06, "loss": 0.0027, "step": 70340 }, { "epoch": 0.4628198127668533, "grad_norm": 0.01630995230727584, "learning_rate": 9.483137312459575e-06, "loss": 0.0015, "step": 70350 }, { "epoch": 0.4628856009420867, "grad_norm": 0.043347122809501905, "learning_rate": 9.482883074766806e-06, "loss": 0.0033, "step": 70360 }, { "epoch": 0.46295138911732003, "grad_norm": 0.11273289458346811, "learning_rate": 9.482628777971093e-06, "loss": 0.0008, "step": 70370 }, { "epoch": 0.46301717729255343, "grad_norm": 0.053136507854034225, "learning_rate": 9.48237442207579e-06, "loss": 0.0031, "step": 70380 }, { "epoch": 0.4630829654677868, "grad_norm": 0.08318529108859264, "learning_rate": 9.482120007084255e-06, "loss": 0.0012, "step": 70390 }, { "epoch": 0.4631487536430202, "grad_norm": 0.04392640748131355, "learning_rate": 9.481865532999837e-06, "loss": 0.0016, "step": 70400 }, { "epoch": 0.4632145418182536, "grad_norm": 0.05473531662892245, "learning_rate": 9.481610999825892e-06, "loss": 0.0014, "step": 70410 }, { "epoch": 0.46328032999348695, "grad_norm": 0.04925676235005721, "learning_rate": 9.481356407565776e-06, "loss": 0.0012, "step": 70420 }, { "epoch": 0.46334611816872034, "grad_norm": 0.2830659046350954, "learning_rate": 9.481101756222847e-06, "loss": 0.0025, "step": 70430 }, { "epoch": 0.46341190634395374, "grad_norm": 0.10492865276953067, "learning_rate": 9.480847045800462e-06, "loss": 0.0015, "step": 70440 }, { "epoch": 0.46347769451918713, "grad_norm": 0.10564938154633642, "learning_rate": 9.480592276301978e-06, "loss": 0.0041, "step": 70450 }, { "epoch": 0.46354348269442053, "grad_norm": 0.1680283184869667, "learning_rate": 9.480337447730754e-06, "loss": 0.0027, "step": 70460 }, { "epoch": 0.46360927086965387, "grad_norm": 0.08262101697157999, "learning_rate": 9.480082560090152e-06, "loss": 0.0011, "step": 70470 }, { "epoch": 0.46367505904488726, "grad_norm": 0.012591843310478414, "learning_rate": 9.47982761338353e-06, "loss": 0.0026, "step": 70480 }, { "epoch": 0.46374084722012066, "grad_norm": 0.0012028870322620647, "learning_rate": 9.47957260761425e-06, "loss": 0.0039, "step": 70490 }, { "epoch": 0.46380663539535405, "grad_norm": 0.06878281356095496, "learning_rate": 9.479317542785673e-06, "loss": 0.0022, "step": 70500 }, { "epoch": 0.46387242357058744, "grad_norm": 0.011651697129767595, "learning_rate": 9.479062418901162e-06, "loss": 0.0047, "step": 70510 }, { "epoch": 0.4639382117458208, "grad_norm": 0.06626153192177438, "learning_rate": 9.478807235964083e-06, "loss": 0.0028, "step": 70520 }, { "epoch": 0.4640039999210542, "grad_norm": 0.08366694726285487, "learning_rate": 9.478551993977799e-06, "loss": 0.0021, "step": 70530 }, { "epoch": 0.46406978809628757, "grad_norm": 0.1679426354603067, "learning_rate": 9.478296692945675e-06, "loss": 0.0031, "step": 70540 }, { "epoch": 0.46413557627152097, "grad_norm": 0.09957804150407365, "learning_rate": 9.478041332871075e-06, "loss": 0.0013, "step": 70550 }, { "epoch": 0.46420136444675436, "grad_norm": 0.08214602009824033, "learning_rate": 9.47778591375737e-06, "loss": 0.0011, "step": 70560 }, { "epoch": 0.4642671526219877, "grad_norm": 0.1380048199399672, "learning_rate": 9.477530435607922e-06, "loss": 0.0036, "step": 70570 }, { "epoch": 0.4643329407972211, "grad_norm": 0.10031938252187884, "learning_rate": 9.477274898426104e-06, "loss": 0.0026, "step": 70580 }, { "epoch": 0.4643987289724545, "grad_norm": 0.06978302668067589, "learning_rate": 9.477019302215284e-06, "loss": 0.0015, "step": 70590 }, { "epoch": 0.4644645171476879, "grad_norm": 0.020087321053601087, "learning_rate": 9.47676364697883e-06, "loss": 0.001, "step": 70600 }, { "epoch": 0.4645303053229213, "grad_norm": 0.05811297762072472, "learning_rate": 9.476507932720114e-06, "loss": 0.0016, "step": 70610 }, { "epoch": 0.4645960934981546, "grad_norm": 0.06751156255360007, "learning_rate": 9.476252159442507e-06, "loss": 0.0019, "step": 70620 }, { "epoch": 0.464661881673388, "grad_norm": 0.017208651725218082, "learning_rate": 9.475996327149382e-06, "loss": 0.0013, "step": 70630 }, { "epoch": 0.4647276698486214, "grad_norm": 0.05645984679535856, "learning_rate": 9.475740435844109e-06, "loss": 0.0032, "step": 70640 }, { "epoch": 0.4647934580238548, "grad_norm": 0.12889406698896158, "learning_rate": 9.475484485530067e-06, "loss": 0.0019, "step": 70650 }, { "epoch": 0.4648592461990882, "grad_norm": 0.07880084066150743, "learning_rate": 9.475228476210626e-06, "loss": 0.0036, "step": 70660 }, { "epoch": 0.46492503437432153, "grad_norm": 0.05718107953246561, "learning_rate": 9.47497240788916e-06, "loss": 0.0018, "step": 70670 }, { "epoch": 0.4649908225495549, "grad_norm": 0.05580289516894135, "learning_rate": 9.474716280569052e-06, "loss": 0.0019, "step": 70680 }, { "epoch": 0.4650566107247883, "grad_norm": 0.024624206322089157, "learning_rate": 9.474460094253672e-06, "loss": 0.0018, "step": 70690 }, { "epoch": 0.4651223989000217, "grad_norm": 0.17142870035719374, "learning_rate": 9.4742038489464e-06, "loss": 0.0021, "step": 70700 }, { "epoch": 0.4651881870752551, "grad_norm": 0.016053072308926355, "learning_rate": 9.473947544650614e-06, "loss": 0.0018, "step": 70710 }, { "epoch": 0.46525397525048845, "grad_norm": 0.022652744238239306, "learning_rate": 9.473691181369696e-06, "loss": 0.0012, "step": 70720 }, { "epoch": 0.46531976342572184, "grad_norm": 0.05834851055012624, "learning_rate": 9.47343475910702e-06, "loss": 0.0014, "step": 70730 }, { "epoch": 0.46538555160095524, "grad_norm": 0.03367275910050229, "learning_rate": 9.473178277865973e-06, "loss": 0.0014, "step": 70740 }, { "epoch": 0.46545133977618863, "grad_norm": 0.05830765876126138, "learning_rate": 9.472921737649933e-06, "loss": 0.0026, "step": 70750 }, { "epoch": 0.465517127951422, "grad_norm": 0.13144842289720116, "learning_rate": 9.472665138462283e-06, "loss": 0.0025, "step": 70760 }, { "epoch": 0.4655829161266554, "grad_norm": 0.3118905444785838, "learning_rate": 9.472408480306405e-06, "loss": 0.0012, "step": 70770 }, { "epoch": 0.46564870430188876, "grad_norm": 0.040414689310933644, "learning_rate": 9.472151763185686e-06, "loss": 0.0013, "step": 70780 }, { "epoch": 0.46571449247712216, "grad_norm": 0.09844576956989877, "learning_rate": 9.471894987103506e-06, "loss": 0.0018, "step": 70790 }, { "epoch": 0.46578028065235555, "grad_norm": 0.1223172598383847, "learning_rate": 9.471638152063254e-06, "loss": 0.0029, "step": 70800 }, { "epoch": 0.46584606882758894, "grad_norm": 0.09774915602046434, "learning_rate": 9.471381258068315e-06, "loss": 0.0018, "step": 70810 }, { "epoch": 0.46591185700282234, "grad_norm": 0.10011867138123079, "learning_rate": 9.471124305122075e-06, "loss": 0.0026, "step": 70820 }, { "epoch": 0.4659776451780557, "grad_norm": 0.06162777981767935, "learning_rate": 9.470867293227923e-06, "loss": 0.0022, "step": 70830 }, { "epoch": 0.46604343335328907, "grad_norm": 0.10218154635109261, "learning_rate": 9.470610222389247e-06, "loss": 0.0017, "step": 70840 }, { "epoch": 0.46610922152852247, "grad_norm": 0.07215472987689582, "learning_rate": 9.470353092609437e-06, "loss": 0.0017, "step": 70850 }, { "epoch": 0.46617500970375586, "grad_norm": 0.01824115746859436, "learning_rate": 9.470095903891881e-06, "loss": 0.0021, "step": 70860 }, { "epoch": 0.46624079787898925, "grad_norm": 0.17068536451594357, "learning_rate": 9.469838656239973e-06, "loss": 0.0017, "step": 70870 }, { "epoch": 0.4663065860542226, "grad_norm": 0.05231036496999403, "learning_rate": 9.469581349657101e-06, "loss": 0.0028, "step": 70880 }, { "epoch": 0.466372374229456, "grad_norm": 0.06212301571102705, "learning_rate": 9.46932398414666e-06, "loss": 0.0019, "step": 70890 }, { "epoch": 0.4664381624046894, "grad_norm": 0.11223978312949123, "learning_rate": 9.46906655971204e-06, "loss": 0.003, "step": 70900 }, { "epoch": 0.4665039505799228, "grad_norm": 0.29214633399111206, "learning_rate": 9.468809076356642e-06, "loss": 0.0025, "step": 70910 }, { "epoch": 0.46656973875515617, "grad_norm": 0.020297175940533128, "learning_rate": 9.468551534083852e-06, "loss": 0.0017, "step": 70920 }, { "epoch": 0.4666355269303895, "grad_norm": 0.09715519072414017, "learning_rate": 9.46829393289707e-06, "loss": 0.0025, "step": 70930 }, { "epoch": 0.4667013151056229, "grad_norm": 0.12715490870463417, "learning_rate": 9.468036272799692e-06, "loss": 0.0017, "step": 70940 }, { "epoch": 0.4667671032808563, "grad_norm": 0.1511512608023703, "learning_rate": 9.467778553795116e-06, "loss": 0.0021, "step": 70950 }, { "epoch": 0.4668328914560897, "grad_norm": 0.052718621639011365, "learning_rate": 9.467520775886735e-06, "loss": 0.002, "step": 70960 }, { "epoch": 0.4668986796313231, "grad_norm": 0.08087864981412837, "learning_rate": 9.467262939077952e-06, "loss": 0.0008, "step": 70970 }, { "epoch": 0.4669644678065564, "grad_norm": 0.055066844779003324, "learning_rate": 9.467005043372166e-06, "loss": 0.0026, "step": 70980 }, { "epoch": 0.4670302559817898, "grad_norm": 0.08091328281616754, "learning_rate": 9.466747088772777e-06, "loss": 0.0066, "step": 70990 }, { "epoch": 0.4670960441570232, "grad_norm": 0.036462414365995, "learning_rate": 9.466489075283185e-06, "loss": 0.0009, "step": 71000 }, { "epoch": 0.4671618323322566, "grad_norm": 0.04880068115287342, "learning_rate": 9.46623100290679e-06, "loss": 0.0037, "step": 71010 }, { "epoch": 0.46722762050749, "grad_norm": 0.0055592793715982915, "learning_rate": 9.465972871647e-06, "loss": 0.002, "step": 71020 }, { "epoch": 0.46729340868272334, "grad_norm": 0.1702397455616593, "learning_rate": 9.465714681507214e-06, "loss": 0.0016, "step": 71030 }, { "epoch": 0.46735919685795674, "grad_norm": 0.03608979999359349, "learning_rate": 9.465456432490836e-06, "loss": 0.0018, "step": 71040 }, { "epoch": 0.46742498503319013, "grad_norm": 0.06768732454733677, "learning_rate": 9.46519812460127e-06, "loss": 0.0013, "step": 71050 }, { "epoch": 0.4674907732084235, "grad_norm": 0.05913108040630358, "learning_rate": 9.464939757841926e-06, "loss": 0.0017, "step": 71060 }, { "epoch": 0.4675565613836569, "grad_norm": 0.045671736248380675, "learning_rate": 9.464681332216206e-06, "loss": 0.0015, "step": 71070 }, { "epoch": 0.46762234955889026, "grad_norm": 0.034010864871714945, "learning_rate": 9.464422847727518e-06, "loss": 0.0032, "step": 71080 }, { "epoch": 0.46768813773412365, "grad_norm": 0.12337890878346489, "learning_rate": 9.464164304379273e-06, "loss": 0.0012, "step": 71090 }, { "epoch": 0.46775392590935705, "grad_norm": 0.179637849713407, "learning_rate": 9.463905702174875e-06, "loss": 0.0019, "step": 71100 }, { "epoch": 0.46781971408459044, "grad_norm": 0.028718514035781663, "learning_rate": 9.463647041117736e-06, "loss": 0.0035, "step": 71110 }, { "epoch": 0.46788550225982384, "grad_norm": 0.038254413769591926, "learning_rate": 9.463388321211265e-06, "loss": 0.0018, "step": 71120 }, { "epoch": 0.4679512904350572, "grad_norm": 0.03587119513661077, "learning_rate": 9.463129542458875e-06, "loss": 0.0024, "step": 71130 }, { "epoch": 0.46801707861029057, "grad_norm": 0.07415815849829478, "learning_rate": 9.462870704863976e-06, "loss": 0.0037, "step": 71140 }, { "epoch": 0.46808286678552397, "grad_norm": 0.1849018920891588, "learning_rate": 9.462611808429983e-06, "loss": 0.0019, "step": 71150 }, { "epoch": 0.46814865496075736, "grad_norm": 0.05384868504787667, "learning_rate": 9.462352853160305e-06, "loss": 0.0025, "step": 71160 }, { "epoch": 0.46821444313599075, "grad_norm": 0.03206040686440291, "learning_rate": 9.462093839058359e-06, "loss": 0.0007, "step": 71170 }, { "epoch": 0.4682802313112241, "grad_norm": 0.05537369834035708, "learning_rate": 9.46183476612756e-06, "loss": 0.0023, "step": 71180 }, { "epoch": 0.4683460194864575, "grad_norm": 0.06603857453842886, "learning_rate": 9.461575634371322e-06, "loss": 0.0035, "step": 71190 }, { "epoch": 0.4684118076616909, "grad_norm": 0.06969303930322751, "learning_rate": 9.461316443793064e-06, "loss": 0.001, "step": 71200 }, { "epoch": 0.4684775958369243, "grad_norm": 0.046536681447108735, "learning_rate": 9.461057194396202e-06, "loss": 0.0016, "step": 71210 }, { "epoch": 0.46854338401215767, "grad_norm": 0.03027138312792676, "learning_rate": 9.460797886184153e-06, "loss": 0.0011, "step": 71220 }, { "epoch": 0.46860917218739107, "grad_norm": 0.04868763808032448, "learning_rate": 9.460538519160337e-06, "loss": 0.0009, "step": 71230 }, { "epoch": 0.4686749603626244, "grad_norm": 0.014208447872521778, "learning_rate": 9.460279093328171e-06, "loss": 0.0024, "step": 71240 }, { "epoch": 0.4687407485378578, "grad_norm": 0.2255723842133259, "learning_rate": 9.460019608691079e-06, "loss": 0.0037, "step": 71250 }, { "epoch": 0.4688065367130912, "grad_norm": 0.05940640583505578, "learning_rate": 9.45976006525248e-06, "loss": 0.002, "step": 71260 }, { "epoch": 0.4688723248883246, "grad_norm": 0.08532118726229367, "learning_rate": 9.459500463015797e-06, "loss": 0.002, "step": 71270 }, { "epoch": 0.468938113063558, "grad_norm": 0.17009548303769625, "learning_rate": 9.45924080198445e-06, "loss": 0.0053, "step": 71280 }, { "epoch": 0.4690039012387913, "grad_norm": 0.11542317732579593, "learning_rate": 9.458981082161867e-06, "loss": 0.0026, "step": 71290 }, { "epoch": 0.4690696894140247, "grad_norm": 0.11329871997533925, "learning_rate": 9.458721303551467e-06, "loss": 0.003, "step": 71300 }, { "epoch": 0.4691354775892581, "grad_norm": 0.05787324197500077, "learning_rate": 9.458461466156679e-06, "loss": 0.0026, "step": 71310 }, { "epoch": 0.4692012657644915, "grad_norm": 0.04992844393706924, "learning_rate": 9.458201569980926e-06, "loss": 0.0018, "step": 71320 }, { "epoch": 0.4692670539397249, "grad_norm": 0.04957793774422078, "learning_rate": 9.457941615027637e-06, "loss": 0.0024, "step": 71330 }, { "epoch": 0.46933284211495824, "grad_norm": 0.06653494000304243, "learning_rate": 9.457681601300236e-06, "loss": 0.0014, "step": 71340 }, { "epoch": 0.46939863029019163, "grad_norm": 0.06995883898662239, "learning_rate": 9.457421528802155e-06, "loss": 0.0024, "step": 71350 }, { "epoch": 0.469464418465425, "grad_norm": 0.027207388061138624, "learning_rate": 9.45716139753682e-06, "loss": 0.0015, "step": 71360 }, { "epoch": 0.4695302066406584, "grad_norm": 0.0052906784898641805, "learning_rate": 9.45690120750766e-06, "loss": 0.0014, "step": 71370 }, { "epoch": 0.4695959948158918, "grad_norm": 0.022864392942195406, "learning_rate": 9.456640958718109e-06, "loss": 0.0016, "step": 71380 }, { "epoch": 0.46966178299112515, "grad_norm": 0.10176538514096492, "learning_rate": 9.456380651171594e-06, "loss": 0.0027, "step": 71390 }, { "epoch": 0.46972757116635855, "grad_norm": 0.030124732992982236, "learning_rate": 9.45612028487155e-06, "loss": 0.0014, "step": 71400 }, { "epoch": 0.46979335934159194, "grad_norm": 0.03910371722206177, "learning_rate": 9.45585985982141e-06, "loss": 0.0006, "step": 71410 }, { "epoch": 0.46985914751682534, "grad_norm": 0.02706417307263897, "learning_rate": 9.455599376024604e-06, "loss": 0.0013, "step": 71420 }, { "epoch": 0.46992493569205873, "grad_norm": 0.07048262805027887, "learning_rate": 9.455338833484569e-06, "loss": 0.0018, "step": 71430 }, { "epoch": 0.46999072386729207, "grad_norm": 0.0571805978315384, "learning_rate": 9.45507823220474e-06, "loss": 0.0019, "step": 71440 }, { "epoch": 0.47005651204252547, "grad_norm": 0.01438787742234686, "learning_rate": 9.45481757218855e-06, "loss": 0.0018, "step": 71450 }, { "epoch": 0.47012230021775886, "grad_norm": 0.017025778961197707, "learning_rate": 9.454556853439441e-06, "loss": 0.0038, "step": 71460 }, { "epoch": 0.47018808839299225, "grad_norm": 0.03354491302102579, "learning_rate": 9.454296075960845e-06, "loss": 0.0017, "step": 71470 }, { "epoch": 0.47025387656822565, "grad_norm": 0.02789693794298126, "learning_rate": 9.454035239756203e-06, "loss": 0.0014, "step": 71480 }, { "epoch": 0.470319664743459, "grad_norm": 0.03898284546488568, "learning_rate": 9.453774344828952e-06, "loss": 0.001, "step": 71490 }, { "epoch": 0.4703854529186924, "grad_norm": 0.12793533379168986, "learning_rate": 9.453513391182533e-06, "loss": 0.0012, "step": 71500 }, { "epoch": 0.4704512410939258, "grad_norm": 0.0038871686224760753, "learning_rate": 9.453252378820388e-06, "loss": 0.0036, "step": 71510 }, { "epoch": 0.47051702926915917, "grad_norm": 0.0733797407599925, "learning_rate": 9.452991307745955e-06, "loss": 0.0068, "step": 71520 }, { "epoch": 0.47058281744439256, "grad_norm": 0.03883133584336794, "learning_rate": 9.452730177962676e-06, "loss": 0.0016, "step": 71530 }, { "epoch": 0.4706486056196259, "grad_norm": 0.04344063737418088, "learning_rate": 9.452468989473998e-06, "loss": 0.0024, "step": 71540 }, { "epoch": 0.4707143937948593, "grad_norm": 0.15994606145808024, "learning_rate": 9.45220774228336e-06, "loss": 0.0023, "step": 71550 }, { "epoch": 0.4707801819700927, "grad_norm": 0.03000213463481515, "learning_rate": 9.451946436394211e-06, "loss": 0.0024, "step": 71560 }, { "epoch": 0.4708459701453261, "grad_norm": 0.04869749430078707, "learning_rate": 9.45168507180999e-06, "loss": 0.0023, "step": 71570 }, { "epoch": 0.4709117583205595, "grad_norm": 0.07112819581165117, "learning_rate": 9.451423648534146e-06, "loss": 0.0013, "step": 71580 }, { "epoch": 0.4709775464957928, "grad_norm": 0.07447715716296513, "learning_rate": 9.451162166570126e-06, "loss": 0.002, "step": 71590 }, { "epoch": 0.4710433346710262, "grad_norm": 0.07603443656310091, "learning_rate": 9.45090062592138e-06, "loss": 0.0011, "step": 71600 }, { "epoch": 0.4711091228462596, "grad_norm": 0.07109499580726633, "learning_rate": 9.45063902659135e-06, "loss": 0.0038, "step": 71610 }, { "epoch": 0.471174911021493, "grad_norm": 0.09639065222405398, "learning_rate": 9.450377368583489e-06, "loss": 0.0027, "step": 71620 }, { "epoch": 0.4712406991967264, "grad_norm": 0.05743855637640574, "learning_rate": 9.450115651901248e-06, "loss": 0.0012, "step": 71630 }, { "epoch": 0.47130648737195974, "grad_norm": 0.01984759893599592, "learning_rate": 9.449853876548073e-06, "loss": 0.002, "step": 71640 }, { "epoch": 0.47137227554719313, "grad_norm": 0.05059215000401124, "learning_rate": 9.449592042527418e-06, "loss": 0.0032, "step": 71650 }, { "epoch": 0.4714380637224265, "grad_norm": 0.0676664418404798, "learning_rate": 9.449330149842737e-06, "loss": 0.0036, "step": 71660 }, { "epoch": 0.4715038518976599, "grad_norm": 0.09764104131568978, "learning_rate": 9.449068198497478e-06, "loss": 0.0011, "step": 71670 }, { "epoch": 0.4715696400728933, "grad_norm": 0.050048830247268215, "learning_rate": 9.448806188495098e-06, "loss": 0.0021, "step": 71680 }, { "epoch": 0.4716354282481267, "grad_norm": 0.09948756607928949, "learning_rate": 9.448544119839051e-06, "loss": 0.0012, "step": 71690 }, { "epoch": 0.47170121642336005, "grad_norm": 0.09711353413057226, "learning_rate": 9.448281992532793e-06, "loss": 0.003, "step": 71700 }, { "epoch": 0.47176700459859344, "grad_norm": 0.055644585532667566, "learning_rate": 9.448019806579777e-06, "loss": 0.0018, "step": 71710 }, { "epoch": 0.47183279277382684, "grad_norm": 0.10882216065232281, "learning_rate": 9.447757561983462e-06, "loss": 0.0015, "step": 71720 }, { "epoch": 0.47189858094906023, "grad_norm": 0.04744888318330209, "learning_rate": 9.447495258747305e-06, "loss": 0.0021, "step": 71730 }, { "epoch": 0.4719643691242936, "grad_norm": 0.004688306995042755, "learning_rate": 9.447232896874765e-06, "loss": 0.0025, "step": 71740 }, { "epoch": 0.47203015729952696, "grad_norm": 0.12792007543773912, "learning_rate": 9.446970476369299e-06, "loss": 0.001, "step": 71750 }, { "epoch": 0.47209594547476036, "grad_norm": 0.1087932323733012, "learning_rate": 9.446707997234367e-06, "loss": 0.0013, "step": 71760 }, { "epoch": 0.47216173364999375, "grad_norm": 0.09360502961876237, "learning_rate": 9.446445459473432e-06, "loss": 0.0014, "step": 71770 }, { "epoch": 0.47222752182522715, "grad_norm": 0.00423538606125901, "learning_rate": 9.446182863089955e-06, "loss": 0.0011, "step": 71780 }, { "epoch": 0.47229331000046054, "grad_norm": 0.06863740652411267, "learning_rate": 9.445920208087393e-06, "loss": 0.0031, "step": 71790 }, { "epoch": 0.4723590981756939, "grad_norm": 0.13074728357952206, "learning_rate": 9.445657494469216e-06, "loss": 0.0023, "step": 71800 }, { "epoch": 0.4724248863509273, "grad_norm": 0.5124610363350113, "learning_rate": 9.445394722238883e-06, "loss": 0.0051, "step": 71810 }, { "epoch": 0.47249067452616067, "grad_norm": 0.07217709816393286, "learning_rate": 9.44513189139986e-06, "loss": 0.0019, "step": 71820 }, { "epoch": 0.47255646270139406, "grad_norm": 0.09819823784129705, "learning_rate": 9.444869001955615e-06, "loss": 0.001, "step": 71830 }, { "epoch": 0.47262225087662746, "grad_norm": 0.02990315274844072, "learning_rate": 9.444606053909608e-06, "loss": 0.0016, "step": 71840 }, { "epoch": 0.4726880390518608, "grad_norm": 0.09883895022729386, "learning_rate": 9.444343047265309e-06, "loss": 0.0013, "step": 71850 }, { "epoch": 0.4727538272270942, "grad_norm": 0.04765403164664816, "learning_rate": 9.444079982026185e-06, "loss": 0.0013, "step": 71860 }, { "epoch": 0.4728196154023276, "grad_norm": 0.11783970429625676, "learning_rate": 9.443816858195706e-06, "loss": 0.0024, "step": 71870 }, { "epoch": 0.472885403577561, "grad_norm": 0.0572047859039584, "learning_rate": 9.44355367577734e-06, "loss": 0.0027, "step": 71880 }, { "epoch": 0.4729511917527944, "grad_norm": 0.1135690275875699, "learning_rate": 9.443290434774555e-06, "loss": 0.0018, "step": 71890 }, { "epoch": 0.4730169799280277, "grad_norm": 0.15140461810620073, "learning_rate": 9.443027135190824e-06, "loss": 0.0027, "step": 71900 }, { "epoch": 0.4730827681032611, "grad_norm": 0.019072594289718123, "learning_rate": 9.442763777029615e-06, "loss": 0.0012, "step": 71910 }, { "epoch": 0.4731485562784945, "grad_norm": 0.10391605870366441, "learning_rate": 9.442500360294405e-06, "loss": 0.001, "step": 71920 }, { "epoch": 0.4732143444537279, "grad_norm": 0.004194001317914089, "learning_rate": 9.442236884988664e-06, "loss": 0.0022, "step": 71930 }, { "epoch": 0.4732801326289613, "grad_norm": 0.1280409273719919, "learning_rate": 9.441973351115868e-06, "loss": 0.0012, "step": 71940 }, { "epoch": 0.47334592080419463, "grad_norm": 0.06851159390168723, "learning_rate": 9.441709758679487e-06, "loss": 0.0018, "step": 71950 }, { "epoch": 0.473411708979428, "grad_norm": 0.008059232209180266, "learning_rate": 9.441446107683e-06, "loss": 0.0048, "step": 71960 }, { "epoch": 0.4734774971546614, "grad_norm": 0.036905816809321444, "learning_rate": 9.441182398129883e-06, "loss": 0.0025, "step": 71970 }, { "epoch": 0.4735432853298948, "grad_norm": 0.09036856156392777, "learning_rate": 9.44091863002361e-06, "loss": 0.0053, "step": 71980 }, { "epoch": 0.4736090735051282, "grad_norm": 0.02388311543936669, "learning_rate": 9.44065480336766e-06, "loss": 0.0018, "step": 71990 }, { "epoch": 0.47367486168036155, "grad_norm": 0.010574402512798085, "learning_rate": 9.440390918165513e-06, "loss": 0.0024, "step": 72000 }, { "epoch": 0.47374064985559494, "grad_norm": 0.19533836594815834, "learning_rate": 9.440126974420647e-06, "loss": 0.0014, "step": 72010 }, { "epoch": 0.47380643803082834, "grad_norm": 0.14535155402879485, "learning_rate": 9.439862972136539e-06, "loss": 0.0022, "step": 72020 }, { "epoch": 0.47387222620606173, "grad_norm": 0.10167897354624522, "learning_rate": 9.439598911316676e-06, "loss": 0.0021, "step": 72030 }, { "epoch": 0.4739380143812951, "grad_norm": 0.05776752481964568, "learning_rate": 9.439334791964534e-06, "loss": 0.0029, "step": 72040 }, { "epoch": 0.47400380255652846, "grad_norm": 0.20414550553828298, "learning_rate": 9.439070614083597e-06, "loss": 0.0026, "step": 72050 }, { "epoch": 0.47406959073176186, "grad_norm": 0.076103127010892, "learning_rate": 9.438806377677346e-06, "loss": 0.0022, "step": 72060 }, { "epoch": 0.47413537890699525, "grad_norm": 0.04079750926485275, "learning_rate": 9.438542082749269e-06, "loss": 0.0014, "step": 72070 }, { "epoch": 0.47420116708222865, "grad_norm": 0.09826916145488465, "learning_rate": 9.438277729302846e-06, "loss": 0.0019, "step": 72080 }, { "epoch": 0.47426695525746204, "grad_norm": 0.14169144769760345, "learning_rate": 9.438013317341565e-06, "loss": 0.0022, "step": 72090 }, { "epoch": 0.4743327434326954, "grad_norm": 0.02835303542820064, "learning_rate": 9.437748846868913e-06, "loss": 0.0028, "step": 72100 }, { "epoch": 0.4743985316079288, "grad_norm": 0.21513305954915565, "learning_rate": 9.437484317888372e-06, "loss": 0.0024, "step": 72110 }, { "epoch": 0.47446431978316217, "grad_norm": 0.08123154561121136, "learning_rate": 9.437219730403434e-06, "loss": 0.0014, "step": 72120 }, { "epoch": 0.47453010795839556, "grad_norm": 0.04559625119921824, "learning_rate": 9.436955084417587e-06, "loss": 0.0017, "step": 72130 }, { "epoch": 0.47459589613362896, "grad_norm": 0.09012949498807757, "learning_rate": 9.43669037993432e-06, "loss": 0.0016, "step": 72140 }, { "epoch": 0.47466168430886235, "grad_norm": 0.16685095217806944, "learning_rate": 9.436425616957122e-06, "loss": 0.0012, "step": 72150 }, { "epoch": 0.4747274724840957, "grad_norm": 0.0042223030068880354, "learning_rate": 9.436160795489481e-06, "loss": 0.0016, "step": 72160 }, { "epoch": 0.4747932606593291, "grad_norm": 0.29025677319237847, "learning_rate": 9.435895915534894e-06, "loss": 0.0028, "step": 72170 }, { "epoch": 0.4748590488345625, "grad_norm": 0.6302111812213913, "learning_rate": 9.435630977096847e-06, "loss": 0.0016, "step": 72180 }, { "epoch": 0.4749248370097959, "grad_norm": 0.018040123771800337, "learning_rate": 9.435365980178841e-06, "loss": 0.0016, "step": 72190 }, { "epoch": 0.47499062518502927, "grad_norm": 0.09901187962758895, "learning_rate": 9.435100924784362e-06, "loss": 0.0026, "step": 72200 }, { "epoch": 0.4750564133602626, "grad_norm": 0.06039632195954357, "learning_rate": 9.43483581091691e-06, "loss": 0.0037, "step": 72210 }, { "epoch": 0.475122201535496, "grad_norm": 0.1250676160828225, "learning_rate": 9.434570638579976e-06, "loss": 0.004, "step": 72220 }, { "epoch": 0.4751879897107294, "grad_norm": 0.06124446952191947, "learning_rate": 9.434305407777057e-06, "loss": 0.0025, "step": 72230 }, { "epoch": 0.4752537778859628, "grad_norm": 0.03982109676828781, "learning_rate": 9.434040118511653e-06, "loss": 0.0012, "step": 72240 }, { "epoch": 0.4753195660611962, "grad_norm": 0.14115715503492143, "learning_rate": 9.433774770787261e-06, "loss": 0.0041, "step": 72250 }, { "epoch": 0.4753853542364295, "grad_norm": 0.06786739642566286, "learning_rate": 9.433509364607377e-06, "loss": 0.0018, "step": 72260 }, { "epoch": 0.4754511424116629, "grad_norm": 0.17449392536033076, "learning_rate": 9.433243899975499e-06, "loss": 0.0027, "step": 72270 }, { "epoch": 0.4755169305868963, "grad_norm": 0.24924735202031784, "learning_rate": 9.43297837689513e-06, "loss": 0.0033, "step": 72280 }, { "epoch": 0.4755827187621297, "grad_norm": 0.10084453706427926, "learning_rate": 9.43271279536977e-06, "loss": 0.0019, "step": 72290 }, { "epoch": 0.4756485069373631, "grad_norm": 0.11794213483696142, "learning_rate": 9.432447155402919e-06, "loss": 0.0013, "step": 72300 }, { "epoch": 0.47571429511259644, "grad_norm": 0.06357407804149093, "learning_rate": 9.432181456998082e-06, "loss": 0.0021, "step": 72310 }, { "epoch": 0.47578008328782984, "grad_norm": 0.08978763167171033, "learning_rate": 9.43191570015876e-06, "loss": 0.0018, "step": 72320 }, { "epoch": 0.47584587146306323, "grad_norm": 0.04572911196348702, "learning_rate": 9.431649884888456e-06, "loss": 0.0013, "step": 72330 }, { "epoch": 0.4759116596382966, "grad_norm": 0.024069772187170147, "learning_rate": 9.431384011190676e-06, "loss": 0.0008, "step": 72340 }, { "epoch": 0.47597744781353, "grad_norm": 0.028420193903527898, "learning_rate": 9.431118079068926e-06, "loss": 0.001, "step": 72350 }, { "epoch": 0.47604323598876336, "grad_norm": 0.06982626828521951, "learning_rate": 9.430852088526712e-06, "loss": 0.0022, "step": 72360 }, { "epoch": 0.47610902416399675, "grad_norm": 0.07424713418013593, "learning_rate": 9.430586039567537e-06, "loss": 0.0022, "step": 72370 }, { "epoch": 0.47617481233923015, "grad_norm": 0.019305486461644893, "learning_rate": 9.430319932194914e-06, "loss": 0.0024, "step": 72380 }, { "epoch": 0.47624060051446354, "grad_norm": 0.051467960866544316, "learning_rate": 9.430053766412347e-06, "loss": 0.0022, "step": 72390 }, { "epoch": 0.47630638868969694, "grad_norm": 0.024287442345973007, "learning_rate": 9.42978754222335e-06, "loss": 0.0016, "step": 72400 }, { "epoch": 0.4763721768649303, "grad_norm": 0.24042541011307905, "learning_rate": 9.429521259631427e-06, "loss": 0.0022, "step": 72410 }, { "epoch": 0.47643796504016367, "grad_norm": 0.14064463039560873, "learning_rate": 9.429254918640093e-06, "loss": 0.0032, "step": 72420 }, { "epoch": 0.47650375321539706, "grad_norm": 0.052700632560753254, "learning_rate": 9.428988519252858e-06, "loss": 0.0018, "step": 72430 }, { "epoch": 0.47656954139063046, "grad_norm": 0.12468450042791479, "learning_rate": 9.428722061473235e-06, "loss": 0.0017, "step": 72440 }, { "epoch": 0.47663532956586385, "grad_norm": 0.07811453191721102, "learning_rate": 9.428455545304737e-06, "loss": 0.0039, "step": 72450 }, { "epoch": 0.4767011177410972, "grad_norm": 0.09912023673944209, "learning_rate": 9.428188970750877e-06, "loss": 0.0021, "step": 72460 }, { "epoch": 0.4767669059163306, "grad_norm": 0.1869926580097408, "learning_rate": 9.42792233781517e-06, "loss": 0.0022, "step": 72470 }, { "epoch": 0.476832694091564, "grad_norm": 0.08652636704531874, "learning_rate": 9.42765564650113e-06, "loss": 0.0018, "step": 72480 }, { "epoch": 0.4768984822667974, "grad_norm": 0.05735750158660875, "learning_rate": 9.427388896812275e-06, "loss": 0.0027, "step": 72490 }, { "epoch": 0.47696427044203077, "grad_norm": 0.09469645960464844, "learning_rate": 9.42712208875212e-06, "loss": 0.0028, "step": 72500 }, { "epoch": 0.4770300586172641, "grad_norm": 0.12618883542122902, "learning_rate": 9.426855222324185e-06, "loss": 0.0015, "step": 72510 }, { "epoch": 0.4770958467924975, "grad_norm": 0.014743390158205978, "learning_rate": 9.426588297531987e-06, "loss": 0.0015, "step": 72520 }, { "epoch": 0.4771616349677309, "grad_norm": 0.020959525421203523, "learning_rate": 9.426321314379045e-06, "loss": 0.0016, "step": 72530 }, { "epoch": 0.4772274231429643, "grad_norm": 0.06926935974467514, "learning_rate": 9.426054272868881e-06, "loss": 0.0016, "step": 72540 }, { "epoch": 0.4772932113181977, "grad_norm": 0.05814849142972202, "learning_rate": 9.425787173005012e-06, "loss": 0.0019, "step": 72550 }, { "epoch": 0.477358999493431, "grad_norm": 0.14301496240502226, "learning_rate": 9.425520014790963e-06, "loss": 0.0032, "step": 72560 }, { "epoch": 0.4774247876686644, "grad_norm": 0.03189504222232977, "learning_rate": 9.425252798230253e-06, "loss": 0.0048, "step": 72570 }, { "epoch": 0.4774905758438978, "grad_norm": 0.07816099518708247, "learning_rate": 9.424985523326409e-06, "loss": 0.0022, "step": 72580 }, { "epoch": 0.4775563640191312, "grad_norm": 0.0340372590913239, "learning_rate": 9.42471819008295e-06, "loss": 0.0039, "step": 72590 }, { "epoch": 0.4776221521943646, "grad_norm": 0.12008906802992678, "learning_rate": 9.424450798503403e-06, "loss": 0.0023, "step": 72600 }, { "epoch": 0.47768794036959794, "grad_norm": 0.13269499023416115, "learning_rate": 9.424183348591296e-06, "loss": 0.0024, "step": 72610 }, { "epoch": 0.47775372854483134, "grad_norm": 0.04240340763285351, "learning_rate": 9.423915840350151e-06, "loss": 0.0021, "step": 72620 }, { "epoch": 0.47781951672006473, "grad_norm": 0.029394122307541026, "learning_rate": 9.423648273783497e-06, "loss": 0.0018, "step": 72630 }, { "epoch": 0.4778853048952981, "grad_norm": 0.00534878466948678, "learning_rate": 9.42338064889486e-06, "loss": 0.0015, "step": 72640 }, { "epoch": 0.4779510930705315, "grad_norm": 0.0440309982827608, "learning_rate": 9.423112965687771e-06, "loss": 0.0018, "step": 72650 }, { "epoch": 0.4780168812457649, "grad_norm": 0.10760180347280085, "learning_rate": 9.422845224165757e-06, "loss": 0.0019, "step": 72660 }, { "epoch": 0.47808266942099825, "grad_norm": 0.09887378463201099, "learning_rate": 9.422577424332348e-06, "loss": 0.0016, "step": 72670 }, { "epoch": 0.47814845759623165, "grad_norm": 0.026309988537512135, "learning_rate": 9.422309566191075e-06, "loss": 0.0048, "step": 72680 }, { "epoch": 0.47821424577146504, "grad_norm": 0.43713177311601076, "learning_rate": 9.42204164974547e-06, "loss": 0.0038, "step": 72690 }, { "epoch": 0.47828003394669844, "grad_norm": 0.02288734257065723, "learning_rate": 9.421773674999066e-06, "loss": 0.0007, "step": 72700 }, { "epoch": 0.47834582212193183, "grad_norm": 0.029757303792309994, "learning_rate": 9.421505641955395e-06, "loss": 0.0022, "step": 72710 }, { "epoch": 0.47841161029716517, "grad_norm": 0.11814495012742672, "learning_rate": 9.421237550617989e-06, "loss": 0.0013, "step": 72720 }, { "epoch": 0.47847739847239856, "grad_norm": 0.4408852091944588, "learning_rate": 9.420969400990385e-06, "loss": 0.003, "step": 72730 }, { "epoch": 0.47854318664763196, "grad_norm": 0.09045570210906459, "learning_rate": 9.420701193076118e-06, "loss": 0.0024, "step": 72740 }, { "epoch": 0.47860897482286535, "grad_norm": 0.08416816728395055, "learning_rate": 9.420432926878725e-06, "loss": 0.003, "step": 72750 }, { "epoch": 0.47867476299809875, "grad_norm": 0.09677531707269912, "learning_rate": 9.42016460240174e-06, "loss": 0.0019, "step": 72760 }, { "epoch": 0.4787405511733321, "grad_norm": 0.08071597284962852, "learning_rate": 9.419896219648701e-06, "loss": 0.0012, "step": 72770 }, { "epoch": 0.4788063393485655, "grad_norm": 0.08637321605554366, "learning_rate": 9.41962777862315e-06, "loss": 0.0026, "step": 72780 }, { "epoch": 0.4788721275237989, "grad_norm": 0.01681216309577081, "learning_rate": 9.419359279328623e-06, "loss": 0.0019, "step": 72790 }, { "epoch": 0.47893791569903227, "grad_norm": 0.0325547130785147, "learning_rate": 9.41909072176866e-06, "loss": 0.0009, "step": 72800 }, { "epoch": 0.47900370387426566, "grad_norm": 0.014027280324883878, "learning_rate": 9.418822105946804e-06, "loss": 0.0014, "step": 72810 }, { "epoch": 0.479069492049499, "grad_norm": 0.07211794711308671, "learning_rate": 9.418553431866594e-06, "loss": 0.0014, "step": 72820 }, { "epoch": 0.4791352802247324, "grad_norm": 0.15317854983485923, "learning_rate": 9.418284699531572e-06, "loss": 0.0024, "step": 72830 }, { "epoch": 0.4792010683999658, "grad_norm": 0.012841534645691419, "learning_rate": 9.418015908945284e-06, "loss": 0.0022, "step": 72840 }, { "epoch": 0.4792668565751992, "grad_norm": 0.07629472870912499, "learning_rate": 9.41774706011127e-06, "loss": 0.0015, "step": 72850 }, { "epoch": 0.4793326447504326, "grad_norm": 0.005433229181667546, "learning_rate": 9.417478153033077e-06, "loss": 0.0018, "step": 72860 }, { "epoch": 0.4793984329256659, "grad_norm": 0.18294037152141757, "learning_rate": 9.417209187714251e-06, "loss": 0.0015, "step": 72870 }, { "epoch": 0.4794642211008993, "grad_norm": 0.03356516932200717, "learning_rate": 9.416940164158335e-06, "loss": 0.0039, "step": 72880 }, { "epoch": 0.4795300092761327, "grad_norm": 0.02032285193922555, "learning_rate": 9.416671082368879e-06, "loss": 0.0021, "step": 72890 }, { "epoch": 0.4795957974513661, "grad_norm": 0.05223300685280732, "learning_rate": 9.416401942349429e-06, "loss": 0.002, "step": 72900 }, { "epoch": 0.4796615856265995, "grad_norm": 0.18330632417121948, "learning_rate": 9.416132744103533e-06, "loss": 0.0024, "step": 72910 }, { "epoch": 0.47972737380183283, "grad_norm": 0.04639401764717566, "learning_rate": 9.415863487634743e-06, "loss": 0.0016, "step": 72920 }, { "epoch": 0.47979316197706623, "grad_norm": 0.3357821337876305, "learning_rate": 9.415594172946603e-06, "loss": 0.0019, "step": 72930 }, { "epoch": 0.4798589501522996, "grad_norm": 0.18190302894914687, "learning_rate": 9.41532480004267e-06, "loss": 0.0024, "step": 72940 }, { "epoch": 0.479924738327533, "grad_norm": 0.05879190735706249, "learning_rate": 9.415055368926492e-06, "loss": 0.0024, "step": 72950 }, { "epoch": 0.4799905265027664, "grad_norm": 0.06861495628862595, "learning_rate": 9.414785879601622e-06, "loss": 0.0031, "step": 72960 }, { "epoch": 0.48005631467799975, "grad_norm": 0.1288393923996567, "learning_rate": 9.414516332071615e-06, "loss": 0.0023, "step": 72970 }, { "epoch": 0.48012210285323315, "grad_norm": 0.010433363583224742, "learning_rate": 9.41424672634002e-06, "loss": 0.0026, "step": 72980 }, { "epoch": 0.48018789102846654, "grad_norm": 0.05595326386309709, "learning_rate": 9.413977062410396e-06, "loss": 0.0018, "step": 72990 }, { "epoch": 0.48025367920369993, "grad_norm": 0.08592643915839297, "learning_rate": 9.413707340286295e-06, "loss": 0.0015, "step": 73000 }, { "epoch": 0.48031946737893333, "grad_norm": 0.15219819764607845, "learning_rate": 9.413437559971277e-06, "loss": 0.0019, "step": 73010 }, { "epoch": 0.48038525555416667, "grad_norm": 0.03536633720338264, "learning_rate": 9.413167721468895e-06, "loss": 0.0011, "step": 73020 }, { "epoch": 0.48045104372940006, "grad_norm": 0.14863623495476672, "learning_rate": 9.412897824782707e-06, "loss": 0.0019, "step": 73030 }, { "epoch": 0.48051683190463346, "grad_norm": 0.0027301432605142772, "learning_rate": 9.412627869916273e-06, "loss": 0.0013, "step": 73040 }, { "epoch": 0.48058262007986685, "grad_norm": 0.07209121482797853, "learning_rate": 9.412357856873153e-06, "loss": 0.0012, "step": 73050 }, { "epoch": 0.48064840825510025, "grad_norm": 0.05132446188699398, "learning_rate": 9.412087785656905e-06, "loss": 0.0007, "step": 73060 }, { "epoch": 0.4807141964303336, "grad_norm": 0.1239893822971224, "learning_rate": 9.41181765627109e-06, "loss": 0.0014, "step": 73070 }, { "epoch": 0.480779984605567, "grad_norm": 0.021331448950674337, "learning_rate": 9.41154746871927e-06, "loss": 0.0031, "step": 73080 }, { "epoch": 0.4808457727808004, "grad_norm": 0.03278365496615623, "learning_rate": 9.411277223005004e-06, "loss": 0.002, "step": 73090 }, { "epoch": 0.48091156095603377, "grad_norm": 0.12001039767866985, "learning_rate": 9.41100691913186e-06, "loss": 0.0017, "step": 73100 }, { "epoch": 0.48097734913126716, "grad_norm": 0.039446309604101566, "learning_rate": 9.410736557103398e-06, "loss": 0.0036, "step": 73110 }, { "epoch": 0.48104313730650056, "grad_norm": 0.07460340489510253, "learning_rate": 9.410466136923184e-06, "loss": 0.0023, "step": 73120 }, { "epoch": 0.4811089254817339, "grad_norm": 0.080801425595255, "learning_rate": 9.410195658594786e-06, "loss": 0.002, "step": 73130 }, { "epoch": 0.4811747136569673, "grad_norm": 0.06778994093803946, "learning_rate": 9.409925122121764e-06, "loss": 0.0022, "step": 73140 }, { "epoch": 0.4812405018322007, "grad_norm": 0.16084465287468883, "learning_rate": 9.40965452750769e-06, "loss": 0.004, "step": 73150 }, { "epoch": 0.4813062900074341, "grad_norm": 0.09337507238093133, "learning_rate": 9.409383874756127e-06, "loss": 0.0034, "step": 73160 }, { "epoch": 0.4813720781826675, "grad_norm": 0.1802981088228711, "learning_rate": 9.409113163870646e-06, "loss": 0.0036, "step": 73170 }, { "epoch": 0.4814378663579008, "grad_norm": 0.0037713336078931486, "learning_rate": 9.408842394854819e-06, "loss": 0.0014, "step": 73180 }, { "epoch": 0.4815036545331342, "grad_norm": 0.05300596697423923, "learning_rate": 9.40857156771221e-06, "loss": 0.0008, "step": 73190 }, { "epoch": 0.4815694427083676, "grad_norm": 0.00707956504418119, "learning_rate": 9.408300682446393e-06, "loss": 0.0021, "step": 73200 }, { "epoch": 0.481635230883601, "grad_norm": 0.0485293607584429, "learning_rate": 9.408029739060939e-06, "loss": 0.0025, "step": 73210 }, { "epoch": 0.4817010190588344, "grad_norm": 0.13950543604631319, "learning_rate": 9.407758737559421e-06, "loss": 0.0026, "step": 73220 }, { "epoch": 0.48176680723406773, "grad_norm": 0.09041326212732703, "learning_rate": 9.407487677945408e-06, "loss": 0.0014, "step": 73230 }, { "epoch": 0.4818325954093011, "grad_norm": 0.05625704737572948, "learning_rate": 9.407216560222478e-06, "loss": 0.0027, "step": 73240 }, { "epoch": 0.4818983835845345, "grad_norm": 0.017600837766485296, "learning_rate": 9.406945384394205e-06, "loss": 0.0011, "step": 73250 }, { "epoch": 0.4819641717597679, "grad_norm": 0.16547591472891243, "learning_rate": 9.406674150464161e-06, "loss": 0.0012, "step": 73260 }, { "epoch": 0.4820299599350013, "grad_norm": 0.12571429487984148, "learning_rate": 9.406402858435926e-06, "loss": 0.0022, "step": 73270 }, { "epoch": 0.48209574811023465, "grad_norm": 0.03163112821223533, "learning_rate": 9.406131508313075e-06, "loss": 0.0016, "step": 73280 }, { "epoch": 0.48216153628546804, "grad_norm": 0.06731383531584874, "learning_rate": 9.405860100099184e-06, "loss": 0.0049, "step": 73290 }, { "epoch": 0.48222732446070143, "grad_norm": 0.06616169874288007, "learning_rate": 9.405588633797835e-06, "loss": 0.0018, "step": 73300 }, { "epoch": 0.48229311263593483, "grad_norm": 0.016029230682208247, "learning_rate": 9.405317109412602e-06, "loss": 0.0016, "step": 73310 }, { "epoch": 0.4823589008111682, "grad_norm": 0.03865627505906652, "learning_rate": 9.40504552694707e-06, "loss": 0.0012, "step": 73320 }, { "epoch": 0.48242468898640156, "grad_norm": 0.0648798835060121, "learning_rate": 9.404773886404817e-06, "loss": 0.0014, "step": 73330 }, { "epoch": 0.48249047716163496, "grad_norm": 0.07270212608122878, "learning_rate": 9.404502187789422e-06, "loss": 0.0037, "step": 73340 }, { "epoch": 0.48255626533686835, "grad_norm": 0.11384502866790458, "learning_rate": 9.404230431104472e-06, "loss": 0.0029, "step": 73350 }, { "epoch": 0.48262205351210175, "grad_norm": 0.03756735849289004, "learning_rate": 9.403958616353546e-06, "loss": 0.0015, "step": 73360 }, { "epoch": 0.48268784168733514, "grad_norm": 0.017321307075150585, "learning_rate": 9.40368674354023e-06, "loss": 0.0014, "step": 73370 }, { "epoch": 0.4827536298625685, "grad_norm": 0.0036098176365394153, "learning_rate": 9.403414812668108e-06, "loss": 0.0016, "step": 73380 }, { "epoch": 0.4828194180378019, "grad_norm": 0.16409087016810742, "learning_rate": 9.403142823740766e-06, "loss": 0.0019, "step": 73390 }, { "epoch": 0.48288520621303527, "grad_norm": 0.015909504598290318, "learning_rate": 9.402870776761785e-06, "loss": 0.0023, "step": 73400 }, { "epoch": 0.48295099438826866, "grad_norm": 0.1016440327167746, "learning_rate": 9.402598671734757e-06, "loss": 0.001, "step": 73410 }, { "epoch": 0.48301678256350206, "grad_norm": 0.06773135789067483, "learning_rate": 9.402326508663269e-06, "loss": 0.0021, "step": 73420 }, { "epoch": 0.4830825707387354, "grad_norm": 0.028427278203835425, "learning_rate": 9.402054287550908e-06, "loss": 0.0021, "step": 73430 }, { "epoch": 0.4831483589139688, "grad_norm": 0.05591302740209288, "learning_rate": 9.401782008401262e-06, "loss": 0.0012, "step": 73440 }, { "epoch": 0.4832141470892022, "grad_norm": 0.13277604874096444, "learning_rate": 9.401509671217921e-06, "loss": 0.0015, "step": 73450 }, { "epoch": 0.4832799352644356, "grad_norm": 0.027465960792581186, "learning_rate": 9.401237276004478e-06, "loss": 0.0014, "step": 73460 }, { "epoch": 0.483345723439669, "grad_norm": 0.057443686566629834, "learning_rate": 9.400964822764522e-06, "loss": 0.002, "step": 73470 }, { "epoch": 0.4834115116149023, "grad_norm": 0.119205051427111, "learning_rate": 9.400692311501646e-06, "loss": 0.0013, "step": 73480 }, { "epoch": 0.4834772997901357, "grad_norm": 0.007437637611202505, "learning_rate": 9.400419742219441e-06, "loss": 0.0018, "step": 73490 }, { "epoch": 0.4835430879653691, "grad_norm": 0.07986764490313757, "learning_rate": 9.400147114921505e-06, "loss": 0.0024, "step": 73500 }, { "epoch": 0.4836088761406025, "grad_norm": 0.006980295907424496, "learning_rate": 9.399874429611428e-06, "loss": 0.001, "step": 73510 }, { "epoch": 0.4836746643158359, "grad_norm": 0.03996887996999726, "learning_rate": 9.399601686292808e-06, "loss": 0.0025, "step": 73520 }, { "epoch": 0.48374045249106923, "grad_norm": 0.005982172060976151, "learning_rate": 9.399328884969237e-06, "loss": 0.0009, "step": 73530 }, { "epoch": 0.4838062406663026, "grad_norm": 0.041248274148557536, "learning_rate": 9.399056025644316e-06, "loss": 0.0032, "step": 73540 }, { "epoch": 0.483872028841536, "grad_norm": 0.08848690909627416, "learning_rate": 9.39878310832164e-06, "loss": 0.0026, "step": 73550 }, { "epoch": 0.4839378170167694, "grad_norm": 0.01700676147956197, "learning_rate": 9.398510133004807e-06, "loss": 0.0014, "step": 73560 }, { "epoch": 0.4840036051920028, "grad_norm": 0.01902032697529916, "learning_rate": 9.398237099697418e-06, "loss": 0.0013, "step": 73570 }, { "epoch": 0.4840693933672362, "grad_norm": 0.023561716192421207, "learning_rate": 9.39796400840307e-06, "loss": 0.0027, "step": 73580 }, { "epoch": 0.48413518154246954, "grad_norm": 0.027899337814650146, "learning_rate": 9.397690859125368e-06, "loss": 0.0019, "step": 73590 }, { "epoch": 0.48420096971770293, "grad_norm": 0.07392308187314993, "learning_rate": 9.397417651867907e-06, "loss": 0.0014, "step": 73600 }, { "epoch": 0.48426675789293633, "grad_norm": 0.14904286231902977, "learning_rate": 9.397144386634293e-06, "loss": 0.0015, "step": 73610 }, { "epoch": 0.4843325460681697, "grad_norm": 0.06397974883731322, "learning_rate": 9.396871063428128e-06, "loss": 0.0024, "step": 73620 }, { "epoch": 0.4843983342434031, "grad_norm": 0.011272004058452264, "learning_rate": 9.396597682253015e-06, "loss": 0.001, "step": 73630 }, { "epoch": 0.48446412241863646, "grad_norm": 0.06497858199573957, "learning_rate": 9.39632424311256e-06, "loss": 0.0024, "step": 73640 }, { "epoch": 0.48452991059386985, "grad_norm": 0.6216483914560608, "learning_rate": 9.396050746010366e-06, "loss": 0.004, "step": 73650 }, { "epoch": 0.48459569876910324, "grad_norm": 0.08522954134783206, "learning_rate": 9.395777190950038e-06, "loss": 0.0019, "step": 73660 }, { "epoch": 0.48466148694433664, "grad_norm": 0.032728715483748415, "learning_rate": 9.395503577935186e-06, "loss": 0.0031, "step": 73670 }, { "epoch": 0.48472727511957003, "grad_norm": 0.07462195938140757, "learning_rate": 9.395229906969416e-06, "loss": 0.0028, "step": 73680 }, { "epoch": 0.4847930632948034, "grad_norm": 0.12025211046232241, "learning_rate": 9.394956178056334e-06, "loss": 0.003, "step": 73690 }, { "epoch": 0.48485885147003677, "grad_norm": 0.04607204753632287, "learning_rate": 9.394682391199552e-06, "loss": 0.0011, "step": 73700 }, { "epoch": 0.48492463964527016, "grad_norm": 0.046176835400920484, "learning_rate": 9.394408546402677e-06, "loss": 0.0017, "step": 73710 }, { "epoch": 0.48499042782050356, "grad_norm": 0.06833958068889662, "learning_rate": 9.394134643669322e-06, "loss": 0.0027, "step": 73720 }, { "epoch": 0.48505621599573695, "grad_norm": 0.02452246034237243, "learning_rate": 9.393860683003095e-06, "loss": 0.0024, "step": 73730 }, { "epoch": 0.4851220041709703, "grad_norm": 0.10929095547532641, "learning_rate": 9.39358666440761e-06, "loss": 0.0029, "step": 73740 }, { "epoch": 0.4851877923462037, "grad_norm": 0.02680985330395545, "learning_rate": 9.39331258788648e-06, "loss": 0.0025, "step": 73750 }, { "epoch": 0.4852535805214371, "grad_norm": 0.0654753973175115, "learning_rate": 9.393038453443317e-06, "loss": 0.0015, "step": 73760 }, { "epoch": 0.4853193686966705, "grad_norm": 0.01509283738586327, "learning_rate": 9.392764261081737e-06, "loss": 0.0024, "step": 73770 }, { "epoch": 0.48538515687190387, "grad_norm": 0.160289463068921, "learning_rate": 9.392490010805353e-06, "loss": 0.002, "step": 73780 }, { "epoch": 0.4854509450471372, "grad_norm": 0.017902890208183555, "learning_rate": 9.392215702617784e-06, "loss": 0.0008, "step": 73790 }, { "epoch": 0.4855167332223706, "grad_norm": 0.03658706261053762, "learning_rate": 9.391941336522644e-06, "loss": 0.001, "step": 73800 }, { "epoch": 0.485582521397604, "grad_norm": 0.036571457047654854, "learning_rate": 9.391666912523546e-06, "loss": 0.0012, "step": 73810 }, { "epoch": 0.4856483095728374, "grad_norm": 0.03607053145158252, "learning_rate": 9.391392430624118e-06, "loss": 0.0025, "step": 73820 }, { "epoch": 0.4857140977480708, "grad_norm": 0.10669036811166728, "learning_rate": 9.391117890827971e-06, "loss": 0.0027, "step": 73830 }, { "epoch": 0.4857798859233041, "grad_norm": 0.07822014574048651, "learning_rate": 9.390843293138729e-06, "loss": 0.0014, "step": 73840 }, { "epoch": 0.4858456740985375, "grad_norm": 0.054734743912021544, "learning_rate": 9.390568637560009e-06, "loss": 0.002, "step": 73850 }, { "epoch": 0.4859114622737709, "grad_norm": 0.06354356146964506, "learning_rate": 9.390293924095432e-06, "loss": 0.0014, "step": 73860 }, { "epoch": 0.4859772504490043, "grad_norm": 0.041176577036856926, "learning_rate": 9.390019152748625e-06, "loss": 0.0031, "step": 73870 }, { "epoch": 0.4860430386242377, "grad_norm": 0.15043261624333573, "learning_rate": 9.389744323523204e-06, "loss": 0.0019, "step": 73880 }, { "epoch": 0.48610882679947104, "grad_norm": 0.0035969657354787857, "learning_rate": 9.389469436422795e-06, "loss": 0.0016, "step": 73890 }, { "epoch": 0.48617461497470443, "grad_norm": 0.08716402163378463, "learning_rate": 9.389194491451023e-06, "loss": 0.0018, "step": 73900 }, { "epoch": 0.4862404031499378, "grad_norm": 0.06688267751287177, "learning_rate": 9.388919488611514e-06, "loss": 0.002, "step": 73910 }, { "epoch": 0.4863061913251712, "grad_norm": 0.07056825434909723, "learning_rate": 9.388644427907891e-06, "loss": 0.0023, "step": 73920 }, { "epoch": 0.4863719795004046, "grad_norm": 0.15014697620361447, "learning_rate": 9.388369309343781e-06, "loss": 0.0087, "step": 73930 }, { "epoch": 0.48643776767563796, "grad_norm": 0.17136228499970418, "learning_rate": 9.388094132922813e-06, "loss": 0.0021, "step": 73940 }, { "epoch": 0.48650355585087135, "grad_norm": 0.043528555020981054, "learning_rate": 9.387818898648614e-06, "loss": 0.0017, "step": 73950 }, { "epoch": 0.48656934402610474, "grad_norm": 0.06266638848704728, "learning_rate": 9.38754360652481e-06, "loss": 0.002, "step": 73960 }, { "epoch": 0.48663513220133814, "grad_norm": 0.015310258684375553, "learning_rate": 9.387268256555033e-06, "loss": 0.0025, "step": 73970 }, { "epoch": 0.48670092037657153, "grad_norm": 0.04854503179209698, "learning_rate": 9.386992848742915e-06, "loss": 0.0064, "step": 73980 }, { "epoch": 0.48676670855180487, "grad_norm": 0.07184889695635423, "learning_rate": 9.386717383092085e-06, "loss": 0.0014, "step": 73990 }, { "epoch": 0.48683249672703827, "grad_norm": 0.023060343300538393, "learning_rate": 9.386441859606173e-06, "loss": 0.0018, "step": 74000 }, { "epoch": 0.48689828490227166, "grad_norm": 0.20521354984319823, "learning_rate": 9.386166278288814e-06, "loss": 0.0023, "step": 74010 }, { "epoch": 0.48696407307750506, "grad_norm": 0.0328382688676574, "learning_rate": 9.385890639143642e-06, "loss": 0.0018, "step": 74020 }, { "epoch": 0.48702986125273845, "grad_norm": 0.3055997230063391, "learning_rate": 9.38561494217429e-06, "loss": 0.002, "step": 74030 }, { "epoch": 0.48709564942797184, "grad_norm": 0.07923061760323359, "learning_rate": 9.385339187384393e-06, "loss": 0.0014, "step": 74040 }, { "epoch": 0.4871614376032052, "grad_norm": 0.0146813266087005, "learning_rate": 9.385063374777584e-06, "loss": 0.002, "step": 74050 }, { "epoch": 0.4872272257784386, "grad_norm": 0.0672537415009703, "learning_rate": 9.384787504357503e-06, "loss": 0.0021, "step": 74060 }, { "epoch": 0.48729301395367197, "grad_norm": 0.058792662648214186, "learning_rate": 9.384511576127785e-06, "loss": 0.0013, "step": 74070 }, { "epoch": 0.48735880212890537, "grad_norm": 0.041347370676979786, "learning_rate": 9.38423559009207e-06, "loss": 0.0018, "step": 74080 }, { "epoch": 0.48742459030413876, "grad_norm": 0.3489559798354231, "learning_rate": 9.383959546253994e-06, "loss": 0.0033, "step": 74090 }, { "epoch": 0.4874903784793721, "grad_norm": 0.07883327211436396, "learning_rate": 9.383683444617197e-06, "loss": 0.0024, "step": 74100 }, { "epoch": 0.4875561666546055, "grad_norm": 0.1762719512806657, "learning_rate": 9.383407285185322e-06, "loss": 0.0022, "step": 74110 }, { "epoch": 0.4876219548298389, "grad_norm": 0.05231673260030047, "learning_rate": 9.383131067962007e-06, "loss": 0.0019, "step": 74120 }, { "epoch": 0.4876877430050723, "grad_norm": 0.050402686634824306, "learning_rate": 9.382854792950893e-06, "loss": 0.0019, "step": 74130 }, { "epoch": 0.4877535311803057, "grad_norm": 0.04879285945268547, "learning_rate": 9.382578460155624e-06, "loss": 0.0018, "step": 74140 }, { "epoch": 0.487819319355539, "grad_norm": 0.09383311275299577, "learning_rate": 9.382302069579845e-06, "loss": 0.002, "step": 74150 }, { "epoch": 0.4878851075307724, "grad_norm": 0.07337120790331572, "learning_rate": 9.382025621227197e-06, "loss": 0.002, "step": 74160 }, { "epoch": 0.4879508957060058, "grad_norm": 0.07240355276920706, "learning_rate": 9.381749115101324e-06, "loss": 0.0013, "step": 74170 }, { "epoch": 0.4880166838812392, "grad_norm": 0.1336357659945027, "learning_rate": 9.381472551205876e-06, "loss": 0.0016, "step": 74180 }, { "epoch": 0.4880824720564726, "grad_norm": 0.10684996787982964, "learning_rate": 9.381195929544496e-06, "loss": 0.0023, "step": 74190 }, { "epoch": 0.48814826023170593, "grad_norm": 0.009649247838615904, "learning_rate": 9.38091925012083e-06, "loss": 0.0016, "step": 74200 }, { "epoch": 0.4882140484069393, "grad_norm": 0.06428379671531363, "learning_rate": 9.380642512938527e-06, "loss": 0.0019, "step": 74210 }, { "epoch": 0.4882798365821727, "grad_norm": 0.08781009020177223, "learning_rate": 9.380365718001237e-06, "loss": 0.0019, "step": 74220 }, { "epoch": 0.4883456247574061, "grad_norm": 0.038703841378559214, "learning_rate": 9.380088865312608e-06, "loss": 0.0034, "step": 74230 }, { "epoch": 0.4884114129326395, "grad_norm": 0.049487855021517826, "learning_rate": 9.37981195487629e-06, "loss": 0.0013, "step": 74240 }, { "epoch": 0.48847720110787285, "grad_norm": 0.12286101712855829, "learning_rate": 9.379534986695934e-06, "loss": 0.0023, "step": 74250 }, { "epoch": 0.48854298928310624, "grad_norm": 0.09010098351797866, "learning_rate": 9.379257960775194e-06, "loss": 0.0015, "step": 74260 }, { "epoch": 0.48860877745833964, "grad_norm": 0.028765324745069494, "learning_rate": 9.378980877117716e-06, "loss": 0.0029, "step": 74270 }, { "epoch": 0.48867456563357303, "grad_norm": 0.06972053853986049, "learning_rate": 9.378703735727158e-06, "loss": 0.0015, "step": 74280 }, { "epoch": 0.4887403538088064, "grad_norm": 0.04543216763618251, "learning_rate": 9.378426536607176e-06, "loss": 0.0013, "step": 74290 }, { "epoch": 0.48880614198403977, "grad_norm": 0.06865183322610315, "learning_rate": 9.37814927976142e-06, "loss": 0.0022, "step": 74300 }, { "epoch": 0.48887193015927316, "grad_norm": 0.07119892484051793, "learning_rate": 9.377871965193546e-06, "loss": 0.0014, "step": 74310 }, { "epoch": 0.48893771833450655, "grad_norm": 0.039544200946821234, "learning_rate": 9.377594592907212e-06, "loss": 0.0013, "step": 74320 }, { "epoch": 0.48900350650973995, "grad_norm": 0.006037089930298784, "learning_rate": 9.377317162906073e-06, "loss": 0.0018, "step": 74330 }, { "epoch": 0.48906929468497334, "grad_norm": 0.06945643399077116, "learning_rate": 9.377039675193788e-06, "loss": 0.0026, "step": 74340 }, { "epoch": 0.4891350828602067, "grad_norm": 0.10065881462224253, "learning_rate": 9.376762129774015e-06, "loss": 0.0012, "step": 74350 }, { "epoch": 0.4892008710354401, "grad_norm": 0.051351084037840916, "learning_rate": 9.376484526650413e-06, "loss": 0.0014, "step": 74360 }, { "epoch": 0.48926665921067347, "grad_norm": 0.1411397172271311, "learning_rate": 9.376206865826643e-06, "loss": 0.0015, "step": 74370 }, { "epoch": 0.48933244738590687, "grad_norm": 0.07954229948331294, "learning_rate": 9.375929147306367e-06, "loss": 0.0021, "step": 74380 }, { "epoch": 0.48939823556114026, "grad_norm": 0.07358680922933986, "learning_rate": 9.375651371093242e-06, "loss": 0.0025, "step": 74390 }, { "epoch": 0.4894640237363736, "grad_norm": 0.030204833578674158, "learning_rate": 9.375373537190932e-06, "loss": 0.002, "step": 74400 }, { "epoch": 0.489529811911607, "grad_norm": 0.01725271053261682, "learning_rate": 9.375095645603102e-06, "loss": 0.0016, "step": 74410 }, { "epoch": 0.4895956000868404, "grad_norm": 0.05489724400848274, "learning_rate": 9.374817696333414e-06, "loss": 0.0012, "step": 74420 }, { "epoch": 0.4896613882620738, "grad_norm": 0.03978539217643112, "learning_rate": 9.374539689385533e-06, "loss": 0.0012, "step": 74430 }, { "epoch": 0.4897271764373072, "grad_norm": 0.03410644082626013, "learning_rate": 9.374261624763126e-06, "loss": 0.0019, "step": 74440 }, { "epoch": 0.4897929646125405, "grad_norm": 0.013248464866791097, "learning_rate": 9.373983502469857e-06, "loss": 0.0019, "step": 74450 }, { "epoch": 0.4898587527877739, "grad_norm": 0.02767407079734986, "learning_rate": 9.37370532250939e-06, "loss": 0.0015, "step": 74460 }, { "epoch": 0.4899245409630073, "grad_norm": 0.08999140016306682, "learning_rate": 9.3734270848854e-06, "loss": 0.002, "step": 74470 }, { "epoch": 0.4899903291382407, "grad_norm": 0.15393059609486043, "learning_rate": 9.373148789601548e-06, "loss": 0.0022, "step": 74480 }, { "epoch": 0.4900561173134741, "grad_norm": 0.037404795583347895, "learning_rate": 9.372870436661508e-06, "loss": 0.0019, "step": 74490 }, { "epoch": 0.49012190548870743, "grad_norm": 0.014411182011632984, "learning_rate": 9.372592026068945e-06, "loss": 0.0026, "step": 74500 }, { "epoch": 0.4901876936639408, "grad_norm": 0.029803307377685016, "learning_rate": 9.372313557827536e-06, "loss": 0.0024, "step": 74510 }, { "epoch": 0.4902534818391742, "grad_norm": 0.22317150876910144, "learning_rate": 9.372035031940947e-06, "loss": 0.0021, "step": 74520 }, { "epoch": 0.4903192700144076, "grad_norm": 0.10599626801308386, "learning_rate": 9.371756448412853e-06, "loss": 0.0012, "step": 74530 }, { "epoch": 0.490385058189641, "grad_norm": 0.06851413674225974, "learning_rate": 9.371477807246924e-06, "loss": 0.0024, "step": 74540 }, { "epoch": 0.4904508463648744, "grad_norm": 0.06891609800715603, "learning_rate": 9.371199108446838e-06, "loss": 0.0022, "step": 74550 }, { "epoch": 0.49051663454010774, "grad_norm": 0.085645013249547, "learning_rate": 9.370920352016266e-06, "loss": 0.0013, "step": 74560 }, { "epoch": 0.49058242271534114, "grad_norm": 0.11214649363253912, "learning_rate": 9.370641537958885e-06, "loss": 0.0016, "step": 74570 }, { "epoch": 0.49064821089057453, "grad_norm": 0.11096538560970717, "learning_rate": 9.37036266627837e-06, "loss": 0.0024, "step": 74580 }, { "epoch": 0.4907139990658079, "grad_norm": 0.05861371905098193, "learning_rate": 9.370083736978396e-06, "loss": 0.0022, "step": 74590 }, { "epoch": 0.4907797872410413, "grad_norm": 0.014324720113432405, "learning_rate": 9.369804750062642e-06, "loss": 0.0014, "step": 74600 }, { "epoch": 0.49084557541627466, "grad_norm": 0.10442899146322701, "learning_rate": 9.369525705534788e-06, "loss": 0.0013, "step": 74610 }, { "epoch": 0.49091136359150805, "grad_norm": 0.11096448843732551, "learning_rate": 9.369246603398512e-06, "loss": 0.0013, "step": 74620 }, { "epoch": 0.49097715176674145, "grad_norm": 0.1700005046795116, "learning_rate": 9.36896744365749e-06, "loss": 0.0024, "step": 74630 }, { "epoch": 0.49104293994197484, "grad_norm": 0.24342947337352192, "learning_rate": 9.368688226315409e-06, "loss": 0.003, "step": 74640 }, { "epoch": 0.49110872811720824, "grad_norm": 0.07363644793328201, "learning_rate": 9.368408951375944e-06, "loss": 0.0016, "step": 74650 }, { "epoch": 0.4911745162924416, "grad_norm": 0.1590079257011821, "learning_rate": 9.368129618842781e-06, "loss": 0.0028, "step": 74660 }, { "epoch": 0.49124030446767497, "grad_norm": 0.00602487763408142, "learning_rate": 9.367850228719603e-06, "loss": 0.0013, "step": 74670 }, { "epoch": 0.49130609264290837, "grad_norm": 0.036542283529384705, "learning_rate": 9.36757078101009e-06, "loss": 0.0022, "step": 74680 }, { "epoch": 0.49137188081814176, "grad_norm": 0.1396789196563386, "learning_rate": 9.36729127571793e-06, "loss": 0.0047, "step": 74690 }, { "epoch": 0.49143766899337515, "grad_norm": 0.0788026966344834, "learning_rate": 9.367011712846805e-06, "loss": 0.0031, "step": 74700 }, { "epoch": 0.4915034571686085, "grad_norm": 0.04157139485867293, "learning_rate": 9.3667320924004e-06, "loss": 0.0015, "step": 74710 }, { "epoch": 0.4915692453438419, "grad_norm": 0.06742268833076213, "learning_rate": 9.366452414382407e-06, "loss": 0.0024, "step": 74720 }, { "epoch": 0.4916350335190753, "grad_norm": 0.19952206280213447, "learning_rate": 9.36617267879651e-06, "loss": 0.0065, "step": 74730 }, { "epoch": 0.4917008216943087, "grad_norm": 0.0856402663923174, "learning_rate": 9.365892885646397e-06, "loss": 0.0041, "step": 74740 }, { "epoch": 0.49176660986954207, "grad_norm": 0.07154133192841859, "learning_rate": 9.365613034935755e-06, "loss": 0.001, "step": 74750 }, { "epoch": 0.4918323980447754, "grad_norm": 0.11303510898637038, "learning_rate": 9.365333126668277e-06, "loss": 0.0021, "step": 74760 }, { "epoch": 0.4918981862200088, "grad_norm": 0.01605052328305114, "learning_rate": 9.365053160847653e-06, "loss": 0.0013, "step": 74770 }, { "epoch": 0.4919639743952422, "grad_norm": 0.09018029689481326, "learning_rate": 9.36477313747757e-06, "loss": 0.0021, "step": 74780 }, { "epoch": 0.4920297625704756, "grad_norm": 0.02420089454175216, "learning_rate": 9.364493056561725e-06, "loss": 0.0017, "step": 74790 }, { "epoch": 0.492095550745709, "grad_norm": 0.07626809368221978, "learning_rate": 9.364212918103808e-06, "loss": 0.002, "step": 74800 }, { "epoch": 0.4921613389209423, "grad_norm": 0.06742062016385375, "learning_rate": 9.363932722107513e-06, "loss": 0.0017, "step": 74810 }, { "epoch": 0.4922271270961757, "grad_norm": 0.10068048996742096, "learning_rate": 9.363652468576534e-06, "loss": 0.0026, "step": 74820 }, { "epoch": 0.4922929152714091, "grad_norm": 0.17387571982608707, "learning_rate": 9.363372157514565e-06, "loss": 0.0054, "step": 74830 }, { "epoch": 0.4923587034466425, "grad_norm": 0.009858131232803224, "learning_rate": 9.363091788925303e-06, "loss": 0.0018, "step": 74840 }, { "epoch": 0.4924244916218759, "grad_norm": 0.12568399398349409, "learning_rate": 9.362811362812444e-06, "loss": 0.0016, "step": 74850 }, { "epoch": 0.49249027979710924, "grad_norm": 0.013840212445252796, "learning_rate": 9.362530879179687e-06, "loss": 0.0028, "step": 74860 }, { "epoch": 0.49255606797234264, "grad_norm": 0.09374876574870236, "learning_rate": 9.362250338030725e-06, "loss": 0.0023, "step": 74870 }, { "epoch": 0.49262185614757603, "grad_norm": 0.017639920197133323, "learning_rate": 9.361969739369262e-06, "loss": 0.0017, "step": 74880 }, { "epoch": 0.4926876443228094, "grad_norm": 0.05668413059493221, "learning_rate": 9.361689083198995e-06, "loss": 0.0015, "step": 74890 }, { "epoch": 0.4927534324980428, "grad_norm": 0.11475071096121539, "learning_rate": 9.361408369523624e-06, "loss": 0.0015, "step": 74900 }, { "epoch": 0.49281922067327616, "grad_norm": 0.09654923058878381, "learning_rate": 9.36112759834685e-06, "loss": 0.0045, "step": 74910 }, { "epoch": 0.49288500884850955, "grad_norm": 0.053635381170084005, "learning_rate": 9.360846769672376e-06, "loss": 0.0016, "step": 74920 }, { "epoch": 0.49295079702374295, "grad_norm": 0.04517341042111644, "learning_rate": 9.360565883503903e-06, "loss": 0.002, "step": 74930 }, { "epoch": 0.49301658519897634, "grad_norm": 0.03680098282555504, "learning_rate": 9.360284939845134e-06, "loss": 0.0012, "step": 74940 }, { "epoch": 0.49308237337420974, "grad_norm": 0.0019202228990306209, "learning_rate": 9.360003938699776e-06, "loss": 0.0028, "step": 74950 }, { "epoch": 0.4931481615494431, "grad_norm": 0.31638832756424257, "learning_rate": 9.35972288007153e-06, "loss": 0.0011, "step": 74960 }, { "epoch": 0.49321394972467647, "grad_norm": 0.041922883130717646, "learning_rate": 9.359441763964104e-06, "loss": 0.002, "step": 74970 }, { "epoch": 0.49327973789990986, "grad_norm": 0.056247240778541756, "learning_rate": 9.359160590381203e-06, "loss": 0.0014, "step": 74980 }, { "epoch": 0.49334552607514326, "grad_norm": 0.14967849992651808, "learning_rate": 9.358879359326533e-06, "loss": 0.0022, "step": 74990 }, { "epoch": 0.49341131425037665, "grad_norm": 0.05740316975156564, "learning_rate": 9.358598070803804e-06, "loss": 0.002, "step": 75000 }, { "epoch": 0.49341131425037665, "eval_loss": 0.0012473494280129671, "eval_runtime": 13.0868, "eval_samples_per_second": 15.283, "eval_steps_per_second": 7.641, "step": 75000 }, { "epoch": 0.49347710242561005, "grad_norm": 0.10351230990612614, "learning_rate": 9.358316724816725e-06, "loss": 0.002, "step": 75010 }, { "epoch": 0.4935428906008434, "grad_norm": 0.04494017316450875, "learning_rate": 9.358035321369003e-06, "loss": 0.0022, "step": 75020 }, { "epoch": 0.4936086787760768, "grad_norm": 0.0342019748699663, "learning_rate": 9.35775386046435e-06, "loss": 0.0019, "step": 75030 }, { "epoch": 0.4936744669513102, "grad_norm": 0.04700462555417048, "learning_rate": 9.357472342106475e-06, "loss": 0.0025, "step": 75040 }, { "epoch": 0.49374025512654357, "grad_norm": 0.06673440803490537, "learning_rate": 9.357190766299091e-06, "loss": 0.0017, "step": 75050 }, { "epoch": 0.49380604330177696, "grad_norm": 0.019395544288951662, "learning_rate": 9.356909133045911e-06, "loss": 0.0008, "step": 75060 }, { "epoch": 0.4938718314770103, "grad_norm": 0.09495240578769329, "learning_rate": 9.356627442350646e-06, "loss": 0.0027, "step": 75070 }, { "epoch": 0.4939376196522437, "grad_norm": 0.23880049699660616, "learning_rate": 9.356345694217011e-06, "loss": 0.003, "step": 75080 }, { "epoch": 0.4940034078274771, "grad_norm": 0.22400314474430796, "learning_rate": 9.356063888648718e-06, "loss": 0.0021, "step": 75090 }, { "epoch": 0.4940691960027105, "grad_norm": 0.01622530473196208, "learning_rate": 9.355782025649488e-06, "loss": 0.0014, "step": 75100 }, { "epoch": 0.4941349841779439, "grad_norm": 0.12559024795090873, "learning_rate": 9.355500105223033e-06, "loss": 0.0017, "step": 75110 }, { "epoch": 0.4942007723531772, "grad_norm": 0.020301781406138704, "learning_rate": 9.35521812737307e-06, "loss": 0.0006, "step": 75120 }, { "epoch": 0.4942665605284106, "grad_norm": 0.000797901597646693, "learning_rate": 9.354936092103318e-06, "loss": 0.002, "step": 75130 }, { "epoch": 0.494332348703644, "grad_norm": 0.07820969891056415, "learning_rate": 9.354653999417495e-06, "loss": 0.0015, "step": 75140 }, { "epoch": 0.4943981368788774, "grad_norm": 0.0010208669272568928, "learning_rate": 9.35437184931932e-06, "loss": 0.0042, "step": 75150 }, { "epoch": 0.4944639250541108, "grad_norm": 0.040036964052390954, "learning_rate": 9.354089641812511e-06, "loss": 0.0024, "step": 75160 }, { "epoch": 0.49452971322934414, "grad_norm": 0.04435498222820449, "learning_rate": 9.353807376900794e-06, "loss": 0.0011, "step": 75170 }, { "epoch": 0.49459550140457753, "grad_norm": 0.06846597124891028, "learning_rate": 9.353525054587885e-06, "loss": 0.0018, "step": 75180 }, { "epoch": 0.4946612895798109, "grad_norm": 0.02742981281602704, "learning_rate": 9.353242674877508e-06, "loss": 0.0018, "step": 75190 }, { "epoch": 0.4947270777550443, "grad_norm": 0.0688780896309575, "learning_rate": 9.352960237773386e-06, "loss": 0.0054, "step": 75200 }, { "epoch": 0.4947928659302777, "grad_norm": 0.08689554745690597, "learning_rate": 9.352677743279241e-06, "loss": 0.0025, "step": 75210 }, { "epoch": 0.49485865410551105, "grad_norm": 0.09092235255405344, "learning_rate": 9.352395191398802e-06, "loss": 0.0016, "step": 75220 }, { "epoch": 0.49492444228074445, "grad_norm": 0.03157985776414139, "learning_rate": 9.352112582135791e-06, "loss": 0.0017, "step": 75230 }, { "epoch": 0.49499023045597784, "grad_norm": 0.09023969675254963, "learning_rate": 9.351829915493933e-06, "loss": 0.0036, "step": 75240 }, { "epoch": 0.49505601863121124, "grad_norm": 0.07875153339398928, "learning_rate": 9.351547191476957e-06, "loss": 0.0022, "step": 75250 }, { "epoch": 0.49512180680644463, "grad_norm": 0.1018198867016555, "learning_rate": 9.35126441008859e-06, "loss": 0.0024, "step": 75260 }, { "epoch": 0.49518759498167797, "grad_norm": 0.12061510430265059, "learning_rate": 9.350981571332558e-06, "loss": 0.0017, "step": 75270 }, { "epoch": 0.49525338315691136, "grad_norm": 0.019749759777349184, "learning_rate": 9.350698675212593e-06, "loss": 0.0012, "step": 75280 }, { "epoch": 0.49531917133214476, "grad_norm": 0.01964328843317285, "learning_rate": 9.350415721732423e-06, "loss": 0.0009, "step": 75290 }, { "epoch": 0.49538495950737815, "grad_norm": 0.06531737791790912, "learning_rate": 9.350132710895779e-06, "loss": 0.0012, "step": 75300 }, { "epoch": 0.49545074768261155, "grad_norm": 0.030304677827519663, "learning_rate": 9.349849642706392e-06, "loss": 0.0021, "step": 75310 }, { "epoch": 0.4955165358578449, "grad_norm": 0.05977952804076355, "learning_rate": 9.349566517167997e-06, "loss": 0.0033, "step": 75320 }, { "epoch": 0.4955823240330783, "grad_norm": 0.15591543567776178, "learning_rate": 9.34928333428432e-06, "loss": 0.0052, "step": 75330 }, { "epoch": 0.4956481122083117, "grad_norm": 0.0029623758341252283, "learning_rate": 9.3490000940591e-06, "loss": 0.0015, "step": 75340 }, { "epoch": 0.49571390038354507, "grad_norm": 0.1367257661875268, "learning_rate": 9.34871679649607e-06, "loss": 0.0042, "step": 75350 }, { "epoch": 0.49577968855877846, "grad_norm": 0.06818642741627703, "learning_rate": 9.348433441598966e-06, "loss": 0.003, "step": 75360 }, { "epoch": 0.4958454767340118, "grad_norm": 0.16614821763225238, "learning_rate": 9.348150029371521e-06, "loss": 0.0029, "step": 75370 }, { "epoch": 0.4959112649092452, "grad_norm": 0.13601357148027202, "learning_rate": 9.347866559817473e-06, "loss": 0.0022, "step": 75380 }, { "epoch": 0.4959770530844786, "grad_norm": 0.0741521426454875, "learning_rate": 9.34758303294056e-06, "loss": 0.001, "step": 75390 }, { "epoch": 0.496042841259712, "grad_norm": 0.0026412238829253733, "learning_rate": 9.34729944874452e-06, "loss": 0.0016, "step": 75400 }, { "epoch": 0.4961086294349454, "grad_norm": 0.1270253877198521, "learning_rate": 9.347015807233091e-06, "loss": 0.0018, "step": 75410 }, { "epoch": 0.4961744176101787, "grad_norm": 0.021424308942991004, "learning_rate": 9.346732108410013e-06, "loss": 0.0016, "step": 75420 }, { "epoch": 0.4962402057854121, "grad_norm": 0.09024661645524033, "learning_rate": 9.346448352279025e-06, "loss": 0.0032, "step": 75430 }, { "epoch": 0.4963059939606455, "grad_norm": 0.16121334823278716, "learning_rate": 9.346164538843872e-06, "loss": 0.0028, "step": 75440 }, { "epoch": 0.4963717821358789, "grad_norm": 0.1467281112925701, "learning_rate": 9.345880668108291e-06, "loss": 0.0013, "step": 75450 }, { "epoch": 0.4964375703111123, "grad_norm": 0.05129872305563786, "learning_rate": 9.345596740076027e-06, "loss": 0.0021, "step": 75460 }, { "epoch": 0.4965033584863457, "grad_norm": 0.0923355856233973, "learning_rate": 9.345312754750823e-06, "loss": 0.0023, "step": 75470 }, { "epoch": 0.49656914666157903, "grad_norm": 0.07692837712665847, "learning_rate": 9.345028712136424e-06, "loss": 0.0017, "step": 75480 }, { "epoch": 0.4966349348368124, "grad_norm": 0.053438861987731, "learning_rate": 9.344744612236574e-06, "loss": 0.0024, "step": 75490 }, { "epoch": 0.4967007230120458, "grad_norm": 0.15497007091823367, "learning_rate": 9.344460455055018e-06, "loss": 0.0012, "step": 75500 }, { "epoch": 0.4967665111872792, "grad_norm": 0.008995272254751338, "learning_rate": 9.344176240595504e-06, "loss": 0.0012, "step": 75510 }, { "epoch": 0.4968322993625126, "grad_norm": 0.3870590198816611, "learning_rate": 9.343891968861777e-06, "loss": 0.0027, "step": 75520 }, { "epoch": 0.49689808753774595, "grad_norm": 0.6068118651204182, "learning_rate": 9.343607639857586e-06, "loss": 0.0023, "step": 75530 }, { "epoch": 0.49696387571297934, "grad_norm": 0.05424660502019822, "learning_rate": 9.34332325358668e-06, "loss": 0.0019, "step": 75540 }, { "epoch": 0.49702966388821274, "grad_norm": 0.04802337476067777, "learning_rate": 9.34303881005281e-06, "loss": 0.0012, "step": 75550 }, { "epoch": 0.49709545206344613, "grad_norm": 0.06970175751332382, "learning_rate": 9.34275430925972e-06, "loss": 0.0021, "step": 75560 }, { "epoch": 0.4971612402386795, "grad_norm": 0.04214343099863509, "learning_rate": 9.342469751211169e-06, "loss": 0.0036, "step": 75570 }, { "epoch": 0.49722702841391286, "grad_norm": 0.09658352343943848, "learning_rate": 9.342185135910901e-06, "loss": 0.0017, "step": 75580 }, { "epoch": 0.49729281658914626, "grad_norm": 0.042396163883324664, "learning_rate": 9.341900463362677e-06, "loss": 0.0017, "step": 75590 }, { "epoch": 0.49735860476437965, "grad_norm": 0.05715399838612025, "learning_rate": 9.341615733570241e-06, "loss": 0.0014, "step": 75600 }, { "epoch": 0.49742439293961305, "grad_norm": 0.09399564457881135, "learning_rate": 9.341330946537352e-06, "loss": 0.0018, "step": 75610 }, { "epoch": 0.49749018111484644, "grad_norm": 0.11117862366817383, "learning_rate": 9.341046102267766e-06, "loss": 0.0016, "step": 75620 }, { "epoch": 0.4975559692900798, "grad_norm": 0.08031042447876359, "learning_rate": 9.340761200765235e-06, "loss": 0.0018, "step": 75630 }, { "epoch": 0.4976217574653132, "grad_norm": 0.13178635684470702, "learning_rate": 9.340476242033516e-06, "loss": 0.0031, "step": 75640 }, { "epoch": 0.49768754564054657, "grad_norm": 0.011340976034100446, "learning_rate": 9.340191226076368e-06, "loss": 0.0014, "step": 75650 }, { "epoch": 0.49775333381577996, "grad_norm": 0.005615258704478037, "learning_rate": 9.339906152897546e-06, "loss": 0.002, "step": 75660 }, { "epoch": 0.49781912199101336, "grad_norm": 0.11094515040569333, "learning_rate": 9.339621022500809e-06, "loss": 0.0031, "step": 75670 }, { "epoch": 0.4978849101662467, "grad_norm": 0.006276917764380673, "learning_rate": 9.339335834889918e-06, "loss": 0.0031, "step": 75680 }, { "epoch": 0.4979506983414801, "grad_norm": 0.02663807380400989, "learning_rate": 9.33905059006863e-06, "loss": 0.0012, "step": 75690 }, { "epoch": 0.4980164865167135, "grad_norm": 0.05619635665319344, "learning_rate": 9.33876528804071e-06, "loss": 0.0019, "step": 75700 }, { "epoch": 0.4980822746919469, "grad_norm": 0.05042389372922761, "learning_rate": 9.338479928809914e-06, "loss": 0.001, "step": 75710 }, { "epoch": 0.4981480628671803, "grad_norm": 0.01142681345684592, "learning_rate": 9.338194512380008e-06, "loss": 0.0043, "step": 75720 }, { "epoch": 0.4982138510424136, "grad_norm": 0.056535141115185435, "learning_rate": 9.337909038754756e-06, "loss": 0.0015, "step": 75730 }, { "epoch": 0.498279639217647, "grad_norm": 0.027214222058159657, "learning_rate": 9.337623507937918e-06, "loss": 0.0006, "step": 75740 }, { "epoch": 0.4983454273928804, "grad_norm": 0.17341097387190813, "learning_rate": 9.33733791993326e-06, "loss": 0.0019, "step": 75750 }, { "epoch": 0.4984112155681138, "grad_norm": 0.0439376148405483, "learning_rate": 9.33705227474455e-06, "loss": 0.0017, "step": 75760 }, { "epoch": 0.4984770037433472, "grad_norm": 0.08302782317146765, "learning_rate": 9.336766572375549e-06, "loss": 0.0008, "step": 75770 }, { "epoch": 0.49854279191858053, "grad_norm": 0.13495945018418118, "learning_rate": 9.336480812830026e-06, "loss": 0.0017, "step": 75780 }, { "epoch": 0.4986085800938139, "grad_norm": 0.08116761862833276, "learning_rate": 9.33619499611175e-06, "loss": 0.001, "step": 75790 }, { "epoch": 0.4986743682690473, "grad_norm": 0.08362116330254166, "learning_rate": 9.335909122224487e-06, "loss": 0.0028, "step": 75800 }, { "epoch": 0.4987401564442807, "grad_norm": 0.2031585106598176, "learning_rate": 9.335623191172008e-06, "loss": 0.0012, "step": 75810 }, { "epoch": 0.4988059446195141, "grad_norm": 0.08169140224489262, "learning_rate": 9.335337202958081e-06, "loss": 0.0023, "step": 75820 }, { "epoch": 0.49887173279474745, "grad_norm": 0.1463443330496368, "learning_rate": 9.335051157586476e-06, "loss": 0.0023, "step": 75830 }, { "epoch": 0.49893752096998084, "grad_norm": 0.10914749088587176, "learning_rate": 9.334765055060969e-06, "loss": 0.0021, "step": 75840 }, { "epoch": 0.49900330914521424, "grad_norm": 0.04535654193988026, "learning_rate": 9.334478895385325e-06, "loss": 0.0013, "step": 75850 }, { "epoch": 0.49906909732044763, "grad_norm": 0.006241999768720547, "learning_rate": 9.334192678563321e-06, "loss": 0.0012, "step": 75860 }, { "epoch": 0.499134885495681, "grad_norm": 0.11754838967157877, "learning_rate": 9.333906404598732e-06, "loss": 0.0012, "step": 75870 }, { "epoch": 0.49920067367091436, "grad_norm": 0.09155499642728752, "learning_rate": 9.333620073495327e-06, "loss": 0.0023, "step": 75880 }, { "epoch": 0.49926646184614776, "grad_norm": 0.03417196542219648, "learning_rate": 9.333333685256887e-06, "loss": 0.0014, "step": 75890 }, { "epoch": 0.49933225002138115, "grad_norm": 0.009866051489497504, "learning_rate": 9.333047239887183e-06, "loss": 0.0013, "step": 75900 }, { "epoch": 0.49939803819661455, "grad_norm": 0.239545363342283, "learning_rate": 9.332760737389993e-06, "loss": 0.0028, "step": 75910 }, { "epoch": 0.49946382637184794, "grad_norm": 0.19630423528577437, "learning_rate": 9.332474177769095e-06, "loss": 0.0019, "step": 75920 }, { "epoch": 0.4995296145470813, "grad_norm": 0.07625924413505415, "learning_rate": 9.332187561028267e-06, "loss": 0.0017, "step": 75930 }, { "epoch": 0.4995954027223147, "grad_norm": 0.06332085614440804, "learning_rate": 9.331900887171289e-06, "loss": 0.0025, "step": 75940 }, { "epoch": 0.49966119089754807, "grad_norm": 0.12861439151473494, "learning_rate": 9.331614156201938e-06, "loss": 0.0018, "step": 75950 }, { "epoch": 0.49972697907278146, "grad_norm": 0.003329620787762566, "learning_rate": 9.331327368123994e-06, "loss": 0.0013, "step": 75960 }, { "epoch": 0.49979276724801486, "grad_norm": 0.04549195890218772, "learning_rate": 9.331040522941242e-06, "loss": 0.0027, "step": 75970 }, { "epoch": 0.49985855542324825, "grad_norm": 0.1859410515308922, "learning_rate": 9.33075362065746e-06, "loss": 0.0013, "step": 75980 }, { "epoch": 0.4999243435984816, "grad_norm": 0.012303355419631427, "learning_rate": 9.330466661276432e-06, "loss": 0.0016, "step": 75990 }, { "epoch": 0.499990131773715, "grad_norm": 0.26772022214132374, "learning_rate": 9.33017964480194e-06, "loss": 0.0018, "step": 76000 }, { "epoch": 0.5000559199489484, "grad_norm": 0.05009712662977754, "learning_rate": 9.329892571237771e-06, "loss": 0.0014, "step": 76010 }, { "epoch": 0.5001217081241818, "grad_norm": 0.028985164238808186, "learning_rate": 9.329605440587706e-06, "loss": 0.0012, "step": 76020 }, { "epoch": 0.5001874962994152, "grad_norm": 0.11764777436466281, "learning_rate": 9.329318252855533e-06, "loss": 0.0024, "step": 76030 }, { "epoch": 0.5002532844746486, "grad_norm": 0.062260301498172896, "learning_rate": 9.329031008045039e-06, "loss": 0.0013, "step": 76040 }, { "epoch": 0.500319072649882, "grad_norm": 0.08394323826356047, "learning_rate": 9.32874370616001e-06, "loss": 0.0016, "step": 76050 }, { "epoch": 0.5003848608251152, "grad_norm": 0.14780940008330137, "learning_rate": 9.328456347204233e-06, "loss": 0.0032, "step": 76060 }, { "epoch": 0.5004506490003486, "grad_norm": 0.23122587578001205, "learning_rate": 9.328168931181497e-06, "loss": 0.0022, "step": 76070 }, { "epoch": 0.500516437175582, "grad_norm": 0.09302701007427702, "learning_rate": 9.327881458095593e-06, "loss": 0.0016, "step": 76080 }, { "epoch": 0.5005822253508154, "grad_norm": 0.14405105426463685, "learning_rate": 9.327593927950307e-06, "loss": 0.0012, "step": 76090 }, { "epoch": 0.5006480135260488, "grad_norm": 0.05999876453836681, "learning_rate": 9.327306340749436e-06, "loss": 0.0022, "step": 76100 }, { "epoch": 0.5007138017012822, "grad_norm": 0.08017727692639443, "learning_rate": 9.327018696496766e-06, "loss": 0.0014, "step": 76110 }, { "epoch": 0.5007795898765156, "grad_norm": 0.08095601241890443, "learning_rate": 9.326730995196093e-06, "loss": 0.0031, "step": 76120 }, { "epoch": 0.500845378051749, "grad_norm": 0.0785893690373991, "learning_rate": 9.326443236851209e-06, "loss": 0.0018, "step": 76130 }, { "epoch": 0.5009111662269824, "grad_norm": 0.04030471676220692, "learning_rate": 9.326155421465906e-06, "loss": 0.0015, "step": 76140 }, { "epoch": 0.5009769544022158, "grad_norm": 0.014344289298899339, "learning_rate": 9.325867549043981e-06, "loss": 0.0017, "step": 76150 }, { "epoch": 0.5010427425774491, "grad_norm": 0.011476822952747295, "learning_rate": 9.325579619589227e-06, "loss": 0.0024, "step": 76160 }, { "epoch": 0.5011085307526825, "grad_norm": 0.11059232417274015, "learning_rate": 9.325291633105443e-06, "loss": 0.0019, "step": 76170 }, { "epoch": 0.5011743189279159, "grad_norm": 0.1098017528648875, "learning_rate": 9.325003589596424e-06, "loss": 0.0068, "step": 76180 }, { "epoch": 0.5012401071031493, "grad_norm": 0.06563622565206231, "learning_rate": 9.324715489065968e-06, "loss": 0.0017, "step": 76190 }, { "epoch": 0.5013058952783827, "grad_norm": 0.32376269695970444, "learning_rate": 9.324427331517873e-06, "loss": 0.0029, "step": 76200 }, { "epoch": 0.501371683453616, "grad_norm": 0.03241926053037136, "learning_rate": 9.324139116955939e-06, "loss": 0.0028, "step": 76210 }, { "epoch": 0.5014374716288494, "grad_norm": 0.05833209909724237, "learning_rate": 9.323850845383964e-06, "loss": 0.002, "step": 76220 }, { "epoch": 0.5015032598040828, "grad_norm": 0.0542300208925242, "learning_rate": 9.323562516805751e-06, "loss": 0.0012, "step": 76230 }, { "epoch": 0.5015690479793162, "grad_norm": 0.06049300317038945, "learning_rate": 9.3232741312251e-06, "loss": 0.0014, "step": 76240 }, { "epoch": 0.5016348361545496, "grad_norm": 0.04933281250747778, "learning_rate": 9.322985688645814e-06, "loss": 0.0028, "step": 76250 }, { "epoch": 0.501700624329783, "grad_norm": 0.09783766843561349, "learning_rate": 9.322697189071694e-06, "loss": 0.0038, "step": 76260 }, { "epoch": 0.5017664125050163, "grad_norm": 0.24741943254878904, "learning_rate": 9.322408632506547e-06, "loss": 0.002, "step": 76270 }, { "epoch": 0.5018322006802497, "grad_norm": 0.06017543776728961, "learning_rate": 9.322120018954175e-06, "loss": 0.0026, "step": 76280 }, { "epoch": 0.5018979888554831, "grad_norm": 0.05181237591267125, "learning_rate": 9.321831348418381e-06, "loss": 0.0032, "step": 76290 }, { "epoch": 0.5019637770307165, "grad_norm": 0.11170515100854267, "learning_rate": 9.321542620902974e-06, "loss": 0.0018, "step": 76300 }, { "epoch": 0.5020295652059499, "grad_norm": 0.04134784434455558, "learning_rate": 9.32125383641176e-06, "loss": 0.0015, "step": 76310 }, { "epoch": 0.5020953533811833, "grad_norm": 0.04268953718564541, "learning_rate": 9.320964994948548e-06, "loss": 0.0016, "step": 76320 }, { "epoch": 0.5021611415564167, "grad_norm": 0.019684925088502173, "learning_rate": 9.320676096517142e-06, "loss": 0.0013, "step": 76330 }, { "epoch": 0.5022269297316501, "grad_norm": 0.10388482806238125, "learning_rate": 9.320387141121357e-06, "loss": 0.0019, "step": 76340 }, { "epoch": 0.5022927179068835, "grad_norm": 0.09128676259400455, "learning_rate": 9.320098128764995e-06, "loss": 0.0019, "step": 76350 }, { "epoch": 0.5023585060821169, "grad_norm": 0.013289117006593855, "learning_rate": 9.319809059451872e-06, "loss": 0.0017, "step": 76360 }, { "epoch": 0.5024242942573501, "grad_norm": 0.045493446328504515, "learning_rate": 9.319519933185796e-06, "loss": 0.0016, "step": 76370 }, { "epoch": 0.5024900824325835, "grad_norm": 0.054242396523556356, "learning_rate": 9.319230749970582e-06, "loss": 0.001, "step": 76380 }, { "epoch": 0.5025558706078169, "grad_norm": 0.09567449180166444, "learning_rate": 9.31894150981004e-06, "loss": 0.0029, "step": 76390 }, { "epoch": 0.5026216587830503, "grad_norm": 0.20955986143610428, "learning_rate": 9.318652212707984e-06, "loss": 0.0022, "step": 76400 }, { "epoch": 0.5026874469582837, "grad_norm": 0.0533683120280716, "learning_rate": 9.318362858668229e-06, "loss": 0.0011, "step": 76410 }, { "epoch": 0.5027532351335171, "grad_norm": 0.06059423503637028, "learning_rate": 9.31807344769459e-06, "loss": 0.0017, "step": 76420 }, { "epoch": 0.5028190233087505, "grad_norm": 0.010453283647475779, "learning_rate": 9.31778397979088e-06, "loss": 0.0026, "step": 76430 }, { "epoch": 0.5028848114839839, "grad_norm": 0.04834249798027902, "learning_rate": 9.317494454960919e-06, "loss": 0.0011, "step": 76440 }, { "epoch": 0.5029505996592173, "grad_norm": 0.06558229830071448, "learning_rate": 9.31720487320852e-06, "loss": 0.0011, "step": 76450 }, { "epoch": 0.5030163878344507, "grad_norm": 0.07637828767420907, "learning_rate": 9.316915234537506e-06, "loss": 0.0016, "step": 76460 }, { "epoch": 0.503082176009684, "grad_norm": 0.028825306114298125, "learning_rate": 9.31662553895169e-06, "loss": 0.0045, "step": 76470 }, { "epoch": 0.5031479641849174, "grad_norm": 0.011997160141989588, "learning_rate": 9.316335786454897e-06, "loss": 0.0016, "step": 76480 }, { "epoch": 0.5032137523601508, "grad_norm": 0.02342117096110845, "learning_rate": 9.316045977050944e-06, "loss": 0.0029, "step": 76490 }, { "epoch": 0.5032795405353842, "grad_norm": 0.03187499964116415, "learning_rate": 9.315756110743653e-06, "loss": 0.0017, "step": 76500 }, { "epoch": 0.5033453287106175, "grad_norm": 0.008666032629050606, "learning_rate": 9.315466187536843e-06, "loss": 0.0015, "step": 76510 }, { "epoch": 0.5034111168858509, "grad_norm": 0.08588947212662124, "learning_rate": 9.315176207434342e-06, "loss": 0.0009, "step": 76520 }, { "epoch": 0.5034769050610843, "grad_norm": 0.22209176328743283, "learning_rate": 9.314886170439966e-06, "loss": 0.0018, "step": 76530 }, { "epoch": 0.5035426932363177, "grad_norm": 0.033590688590543365, "learning_rate": 9.314596076557545e-06, "loss": 0.0013, "step": 76540 }, { "epoch": 0.5036084814115511, "grad_norm": 0.032010547586319396, "learning_rate": 9.3143059257909e-06, "loss": 0.0012, "step": 76550 }, { "epoch": 0.5036742695867845, "grad_norm": 0.0861576341119876, "learning_rate": 9.314015718143856e-06, "loss": 0.0015, "step": 76560 }, { "epoch": 0.5037400577620178, "grad_norm": 0.07429817656349948, "learning_rate": 9.313725453620242e-06, "loss": 0.0024, "step": 76570 }, { "epoch": 0.5038058459372512, "grad_norm": 0.029687527250934244, "learning_rate": 9.313435132223883e-06, "loss": 0.0009, "step": 76580 }, { "epoch": 0.5038716341124846, "grad_norm": 0.06867597411537282, "learning_rate": 9.313144753958607e-06, "loss": 0.0017, "step": 76590 }, { "epoch": 0.503937422287718, "grad_norm": 0.05898686838470627, "learning_rate": 9.312854318828242e-06, "loss": 0.0017, "step": 76600 }, { "epoch": 0.5040032104629514, "grad_norm": 0.13129250847311522, "learning_rate": 9.312563826836617e-06, "loss": 0.0017, "step": 76610 }, { "epoch": 0.5040689986381848, "grad_norm": 0.055982579729745206, "learning_rate": 9.312273277987565e-06, "loss": 0.0015, "step": 76620 }, { "epoch": 0.5041347868134182, "grad_norm": 0.05071276565385404, "learning_rate": 9.311982672284915e-06, "loss": 0.0016, "step": 76630 }, { "epoch": 0.5042005749886516, "grad_norm": 0.09517406761063583, "learning_rate": 9.311692009732494e-06, "loss": 0.0018, "step": 76640 }, { "epoch": 0.504266363163885, "grad_norm": 0.12372080415274558, "learning_rate": 9.311401290334139e-06, "loss": 0.0021, "step": 76650 }, { "epoch": 0.5043321513391184, "grad_norm": 0.052029993784719085, "learning_rate": 9.311110514093681e-06, "loss": 0.0015, "step": 76660 }, { "epoch": 0.5043979395143517, "grad_norm": 0.0106419822297727, "learning_rate": 9.310819681014955e-06, "loss": 0.0024, "step": 76670 }, { "epoch": 0.504463727689585, "grad_norm": 0.05483247488233353, "learning_rate": 9.310528791101796e-06, "loss": 0.0023, "step": 76680 }, { "epoch": 0.5045295158648184, "grad_norm": 0.010598895930671117, "learning_rate": 9.310237844358036e-06, "loss": 0.0017, "step": 76690 }, { "epoch": 0.5045953040400518, "grad_norm": 0.07056002819670702, "learning_rate": 9.309946840787512e-06, "loss": 0.0024, "step": 76700 }, { "epoch": 0.5046610922152852, "grad_norm": 0.0048885607210138555, "learning_rate": 9.309655780394062e-06, "loss": 0.001, "step": 76710 }, { "epoch": 0.5047268803905186, "grad_norm": 0.04542712742537216, "learning_rate": 9.309364663181523e-06, "loss": 0.0022, "step": 76720 }, { "epoch": 0.504792668565752, "grad_norm": 1.1985141285927787, "learning_rate": 9.309073489153732e-06, "loss": 0.0029, "step": 76730 }, { "epoch": 0.5048584567409854, "grad_norm": 0.13193280476293004, "learning_rate": 9.30878225831453e-06, "loss": 0.0029, "step": 76740 }, { "epoch": 0.5049242449162188, "grad_norm": 0.25920206516584415, "learning_rate": 9.308490970667753e-06, "loss": 0.0028, "step": 76750 }, { "epoch": 0.5049900330914522, "grad_norm": 0.034285677494345954, "learning_rate": 9.308199626217247e-06, "loss": 0.0028, "step": 76760 }, { "epoch": 0.5050558212666856, "grad_norm": 0.024267768182438306, "learning_rate": 9.307908224966846e-06, "loss": 0.0012, "step": 76770 }, { "epoch": 0.5051216094419189, "grad_norm": 0.05422498265990552, "learning_rate": 9.307616766920398e-06, "loss": 0.0019, "step": 76780 }, { "epoch": 0.5051873976171523, "grad_norm": 0.0651091268693641, "learning_rate": 9.307325252081743e-06, "loss": 0.0012, "step": 76790 }, { "epoch": 0.5052531857923857, "grad_norm": 0.060247342790495734, "learning_rate": 9.307033680454724e-06, "loss": 0.0025, "step": 76800 }, { "epoch": 0.505318973967619, "grad_norm": 0.01077220946661635, "learning_rate": 9.306742052043185e-06, "loss": 0.003, "step": 76810 }, { "epoch": 0.5053847621428524, "grad_norm": 0.17549067673844287, "learning_rate": 9.306450366850972e-06, "loss": 0.0022, "step": 76820 }, { "epoch": 0.5054505503180858, "grad_norm": 0.20654245581135486, "learning_rate": 9.306158624881932e-06, "loss": 0.0028, "step": 76830 }, { "epoch": 0.5055163384933192, "grad_norm": 0.07441245019034531, "learning_rate": 9.305866826139906e-06, "loss": 0.0019, "step": 76840 }, { "epoch": 0.5055821266685526, "grad_norm": 0.012497233676254018, "learning_rate": 9.305574970628747e-06, "loss": 0.0023, "step": 76850 }, { "epoch": 0.505647914843786, "grad_norm": 0.008197299135183371, "learning_rate": 9.305283058352302e-06, "loss": 0.002, "step": 76860 }, { "epoch": 0.5057137030190194, "grad_norm": 0.03148555797164189, "learning_rate": 9.304991089314415e-06, "loss": 0.0014, "step": 76870 }, { "epoch": 0.5057794911942527, "grad_norm": 0.037308708722467054, "learning_rate": 9.304699063518941e-06, "loss": 0.0035, "step": 76880 }, { "epoch": 0.5058452793694861, "grad_norm": 0.013479934275229132, "learning_rate": 9.304406980969727e-06, "loss": 0.0014, "step": 76890 }, { "epoch": 0.5059110675447195, "grad_norm": 0.10639522979539294, "learning_rate": 9.304114841670623e-06, "loss": 0.0025, "step": 76900 }, { "epoch": 0.5059768557199529, "grad_norm": 0.07077949387799387, "learning_rate": 9.303822645625484e-06, "loss": 0.0022, "step": 76910 }, { "epoch": 0.5060426438951863, "grad_norm": 0.026845399804505836, "learning_rate": 9.30353039283816e-06, "loss": 0.0017, "step": 76920 }, { "epoch": 0.5061084320704197, "grad_norm": 0.09612172765710252, "learning_rate": 9.303238083312504e-06, "loss": 0.0022, "step": 76930 }, { "epoch": 0.5061742202456531, "grad_norm": 0.10697034676443067, "learning_rate": 9.30294571705237e-06, "loss": 0.0031, "step": 76940 }, { "epoch": 0.5062400084208865, "grad_norm": 0.047187099343874515, "learning_rate": 9.302653294061614e-06, "loss": 0.0014, "step": 76950 }, { "epoch": 0.5063057965961199, "grad_norm": 0.19217880535443568, "learning_rate": 9.30236081434409e-06, "loss": 0.006, "step": 76960 }, { "epoch": 0.5063715847713532, "grad_norm": 0.10994526741090531, "learning_rate": 9.302068277903656e-06, "loss": 0.0026, "step": 76970 }, { "epoch": 0.5064373729465865, "grad_norm": 0.13915876145377196, "learning_rate": 9.301775684744165e-06, "loss": 0.0015, "step": 76980 }, { "epoch": 0.5065031611218199, "grad_norm": 0.031507384783888225, "learning_rate": 9.301483034869476e-06, "loss": 0.002, "step": 76990 }, { "epoch": 0.5065689492970533, "grad_norm": 0.009986718790358493, "learning_rate": 9.30119032828345e-06, "loss": 0.0019, "step": 77000 }, { "epoch": 0.5066347374722867, "grad_norm": 0.07633878519687291, "learning_rate": 9.300897564989944e-06, "loss": 0.0013, "step": 77010 }, { "epoch": 0.5067005256475201, "grad_norm": 0.11989639600709, "learning_rate": 9.300604744992819e-06, "loss": 0.0032, "step": 77020 }, { "epoch": 0.5067663138227535, "grad_norm": 0.0530403129623688, "learning_rate": 9.300311868295932e-06, "loss": 0.001, "step": 77030 }, { "epoch": 0.5068321019979869, "grad_norm": 0.06245899085856569, "learning_rate": 9.30001893490315e-06, "loss": 0.0022, "step": 77040 }, { "epoch": 0.5068978901732203, "grad_norm": 0.061848488101888685, "learning_rate": 9.29972594481833e-06, "loss": 0.0014, "step": 77050 }, { "epoch": 0.5069636783484537, "grad_norm": 0.0010141274515688228, "learning_rate": 9.299432898045339e-06, "loss": 0.0016, "step": 77060 }, { "epoch": 0.5070294665236871, "grad_norm": 0.018700590924978815, "learning_rate": 9.299139794588037e-06, "loss": 0.001, "step": 77070 }, { "epoch": 0.5070952546989204, "grad_norm": 0.030475057964451012, "learning_rate": 9.298846634450289e-06, "loss": 0.0021, "step": 77080 }, { "epoch": 0.5071610428741538, "grad_norm": 0.14280614615186846, "learning_rate": 9.298553417635963e-06, "loss": 0.0024, "step": 77090 }, { "epoch": 0.5072268310493872, "grad_norm": 0.049861888431812995, "learning_rate": 9.29826014414892e-06, "loss": 0.0016, "step": 77100 }, { "epoch": 0.5072926192246205, "grad_norm": 0.10605157248100752, "learning_rate": 9.297966813993031e-06, "loss": 0.0024, "step": 77110 }, { "epoch": 0.5073584073998539, "grad_norm": 0.4432863432510455, "learning_rate": 9.297673427172163e-06, "loss": 0.0032, "step": 77120 }, { "epoch": 0.5074241955750873, "grad_norm": 0.04882776663097314, "learning_rate": 9.29737998369018e-06, "loss": 0.0008, "step": 77130 }, { "epoch": 0.5074899837503207, "grad_norm": 0.06879561447092451, "learning_rate": 9.297086483550955e-06, "loss": 0.0016, "step": 77140 }, { "epoch": 0.5075557719255541, "grad_norm": 0.07184105949564991, "learning_rate": 9.296792926758353e-06, "loss": 0.0012, "step": 77150 }, { "epoch": 0.5076215601007875, "grad_norm": 0.05558378199866645, "learning_rate": 9.29649931331625e-06, "loss": 0.0015, "step": 77160 }, { "epoch": 0.5076873482760209, "grad_norm": 0.10211065725857289, "learning_rate": 9.296205643228514e-06, "loss": 0.0029, "step": 77170 }, { "epoch": 0.5077531364512543, "grad_norm": 0.11486733832444408, "learning_rate": 9.295911916499016e-06, "loss": 0.0038, "step": 77180 }, { "epoch": 0.5078189246264876, "grad_norm": 0.049302286614888, "learning_rate": 9.295618133131631e-06, "loss": 0.0023, "step": 77190 }, { "epoch": 0.507884712801721, "grad_norm": 0.10859936394419226, "learning_rate": 9.29532429313023e-06, "loss": 0.0014, "step": 77200 }, { "epoch": 0.5079505009769544, "grad_norm": 0.11294477125597734, "learning_rate": 9.295030396498688e-06, "loss": 0.0013, "step": 77210 }, { "epoch": 0.5080162891521878, "grad_norm": 0.07248725948062482, "learning_rate": 9.294736443240879e-06, "loss": 0.0023, "step": 77220 }, { "epoch": 0.5080820773274212, "grad_norm": 0.22715212439498683, "learning_rate": 9.29444243336068e-06, "loss": 0.004, "step": 77230 }, { "epoch": 0.5081478655026546, "grad_norm": 0.025935176980274855, "learning_rate": 9.294148366861966e-06, "loss": 0.0019, "step": 77240 }, { "epoch": 0.508213653677888, "grad_norm": 0.14788403869003874, "learning_rate": 9.293854243748615e-06, "loss": 0.0024, "step": 77250 }, { "epoch": 0.5082794418531213, "grad_norm": 0.0401013057694553, "learning_rate": 9.293560064024504e-06, "loss": 0.0015, "step": 77260 }, { "epoch": 0.5083452300283547, "grad_norm": 0.10500678365323167, "learning_rate": 9.293265827693512e-06, "loss": 0.0031, "step": 77270 }, { "epoch": 0.5084110182035881, "grad_norm": 0.12085973645329638, "learning_rate": 9.292971534759516e-06, "loss": 0.0014, "step": 77280 }, { "epoch": 0.5084768063788214, "grad_norm": 0.03926198871320873, "learning_rate": 9.292677185226399e-06, "loss": 0.0032, "step": 77290 }, { "epoch": 0.5085425945540548, "grad_norm": 0.05677067700565706, "learning_rate": 9.292382779098042e-06, "loss": 0.0024, "step": 77300 }, { "epoch": 0.5086083827292882, "grad_norm": 0.07784952386858109, "learning_rate": 9.292088316378325e-06, "loss": 0.0034, "step": 77310 }, { "epoch": 0.5086741709045216, "grad_norm": 0.0769399324819082, "learning_rate": 9.29179379707113e-06, "loss": 0.0028, "step": 77320 }, { "epoch": 0.508739959079755, "grad_norm": 0.11691790339809253, "learning_rate": 9.29149922118034e-06, "loss": 0.0023, "step": 77330 }, { "epoch": 0.5088057472549884, "grad_norm": 0.10112170891749772, "learning_rate": 9.29120458870984e-06, "loss": 0.0021, "step": 77340 }, { "epoch": 0.5088715354302218, "grad_norm": 0.03362911179877116, "learning_rate": 9.290909899663513e-06, "loss": 0.003, "step": 77350 }, { "epoch": 0.5089373236054552, "grad_norm": 0.07726200913722343, "learning_rate": 9.290615154045244e-06, "loss": 0.0021, "step": 77360 }, { "epoch": 0.5090031117806886, "grad_norm": 0.07370917133574834, "learning_rate": 9.29032035185892e-06, "loss": 0.002, "step": 77370 }, { "epoch": 0.509068899955922, "grad_norm": 0.04532797748050992, "learning_rate": 9.29002549310843e-06, "loss": 0.0017, "step": 77380 }, { "epoch": 0.5091346881311553, "grad_norm": 0.01993382029438785, "learning_rate": 9.289730577797657e-06, "loss": 0.001, "step": 77390 }, { "epoch": 0.5092004763063886, "grad_norm": 0.1807554617479341, "learning_rate": 9.289435605930491e-06, "loss": 0.0013, "step": 77400 }, { "epoch": 0.509266264481622, "grad_norm": 0.05521565839104069, "learning_rate": 9.289140577510823e-06, "loss": 0.0022, "step": 77410 }, { "epoch": 0.5093320526568554, "grad_norm": 0.06827148826730867, "learning_rate": 9.288845492542538e-06, "loss": 0.0025, "step": 77420 }, { "epoch": 0.5093978408320888, "grad_norm": 0.032244118774028746, "learning_rate": 9.288550351029531e-06, "loss": 0.0014, "step": 77430 }, { "epoch": 0.5094636290073222, "grad_norm": 0.13459488991763197, "learning_rate": 9.288255152975691e-06, "loss": 0.0014, "step": 77440 }, { "epoch": 0.5095294171825556, "grad_norm": 0.5449637136230869, "learning_rate": 9.28795989838491e-06, "loss": 0.0029, "step": 77450 }, { "epoch": 0.509595205357789, "grad_norm": 0.04626275673750874, "learning_rate": 9.287664587261082e-06, "loss": 0.0049, "step": 77460 }, { "epoch": 0.5096609935330224, "grad_norm": 0.1184667039096374, "learning_rate": 9.287369219608098e-06, "loss": 0.0017, "step": 77470 }, { "epoch": 0.5097267817082558, "grad_norm": 0.059792494757681634, "learning_rate": 9.287073795429854e-06, "loss": 0.0009, "step": 77480 }, { "epoch": 0.5097925698834891, "grad_norm": 0.14962262782448807, "learning_rate": 9.286778314730247e-06, "loss": 0.0018, "step": 77490 }, { "epoch": 0.5098583580587225, "grad_norm": 0.05494155782959239, "learning_rate": 9.286482777513168e-06, "loss": 0.001, "step": 77500 }, { "epoch": 0.5099241462339559, "grad_norm": 0.037283765488136465, "learning_rate": 9.286187183782516e-06, "loss": 0.0024, "step": 77510 }, { "epoch": 0.5099899344091893, "grad_norm": 0.08015566622939323, "learning_rate": 9.285891533542188e-06, "loss": 0.0012, "step": 77520 }, { "epoch": 0.5100557225844227, "grad_norm": 0.029487002964095732, "learning_rate": 9.285595826796083e-06, "loss": 0.0016, "step": 77530 }, { "epoch": 0.5101215107596561, "grad_norm": 0.017819825986836485, "learning_rate": 9.285300063548095e-06, "loss": 0.0009, "step": 77540 }, { "epoch": 0.5101872989348895, "grad_norm": 0.07454007608664917, "learning_rate": 9.28500424380213e-06, "loss": 0.0016, "step": 77550 }, { "epoch": 0.5102530871101228, "grad_norm": 0.030277637588589368, "learning_rate": 9.284708367562083e-06, "loss": 0.0016, "step": 77560 }, { "epoch": 0.5103188752853562, "grad_norm": 0.06580484169868693, "learning_rate": 9.284412434831856e-06, "loss": 0.0013, "step": 77570 }, { "epoch": 0.5103846634605896, "grad_norm": 0.001473592176310579, "learning_rate": 9.284116445615353e-06, "loss": 0.0025, "step": 77580 }, { "epoch": 0.510450451635823, "grad_norm": 0.07431103069771845, "learning_rate": 9.283820399916474e-06, "loss": 0.0011, "step": 77590 }, { "epoch": 0.5105162398110563, "grad_norm": 0.11424473679277475, "learning_rate": 9.283524297739122e-06, "loss": 0.0017, "step": 77600 }, { "epoch": 0.5105820279862897, "grad_norm": 0.019222482036845377, "learning_rate": 9.283228139087204e-06, "loss": 0.0015, "step": 77610 }, { "epoch": 0.5106478161615231, "grad_norm": 0.0956607379895116, "learning_rate": 9.28293192396462e-06, "loss": 0.0013, "step": 77620 }, { "epoch": 0.5107136043367565, "grad_norm": 0.013613240211167811, "learning_rate": 9.282635652375278e-06, "loss": 0.0014, "step": 77630 }, { "epoch": 0.5107793925119899, "grad_norm": 0.06740807204552811, "learning_rate": 9.282339324323084e-06, "loss": 0.0008, "step": 77640 }, { "epoch": 0.5108451806872233, "grad_norm": 0.0996023810701718, "learning_rate": 9.282042939811944e-06, "loss": 0.0019, "step": 77650 }, { "epoch": 0.5109109688624567, "grad_norm": 0.08611402456190921, "learning_rate": 9.281746498845767e-06, "loss": 0.0018, "step": 77660 }, { "epoch": 0.5109767570376901, "grad_norm": 0.08328149712054826, "learning_rate": 9.28145000142846e-06, "loss": 0.0008, "step": 77670 }, { "epoch": 0.5110425452129235, "grad_norm": 0.04848143689776534, "learning_rate": 9.281153447563932e-06, "loss": 0.0008, "step": 77680 }, { "epoch": 0.5111083333881569, "grad_norm": 0.14299207196058633, "learning_rate": 9.280856837256094e-06, "loss": 0.0027, "step": 77690 }, { "epoch": 0.5111741215633901, "grad_norm": 0.0847791689938115, "learning_rate": 9.280560170508855e-06, "loss": 0.0021, "step": 77700 }, { "epoch": 0.5112399097386235, "grad_norm": 0.017188652386135886, "learning_rate": 9.280263447326128e-06, "loss": 0.0028, "step": 77710 }, { "epoch": 0.5113056979138569, "grad_norm": 0.10277121361196558, "learning_rate": 9.279966667711823e-06, "loss": 0.0013, "step": 77720 }, { "epoch": 0.5113714860890903, "grad_norm": 0.12902096397543206, "learning_rate": 9.279669831669854e-06, "loss": 0.0029, "step": 77730 }, { "epoch": 0.5114372742643237, "grad_norm": 0.07513140568368479, "learning_rate": 9.279372939204135e-06, "loss": 0.0023, "step": 77740 }, { "epoch": 0.5115030624395571, "grad_norm": 0.05610255813385991, "learning_rate": 9.27907599031858e-06, "loss": 0.0015, "step": 77750 }, { "epoch": 0.5115688506147905, "grad_norm": 0.028352969377455146, "learning_rate": 9.278778985017103e-06, "loss": 0.0011, "step": 77760 }, { "epoch": 0.5116346387900239, "grad_norm": 0.047318019448153975, "learning_rate": 9.27848192330362e-06, "loss": 0.0015, "step": 77770 }, { "epoch": 0.5117004269652573, "grad_norm": 0.20182081124823306, "learning_rate": 9.278184805182049e-06, "loss": 0.0014, "step": 77780 }, { "epoch": 0.5117662151404907, "grad_norm": 0.07513538749168956, "learning_rate": 9.277887630656306e-06, "loss": 0.0067, "step": 77790 }, { "epoch": 0.511832003315724, "grad_norm": 0.01384156569188805, "learning_rate": 9.277590399730308e-06, "loss": 0.0025, "step": 77800 }, { "epoch": 0.5118977914909574, "grad_norm": 0.09657063858306854, "learning_rate": 9.277293112407976e-06, "loss": 0.0022, "step": 77810 }, { "epoch": 0.5119635796661908, "grad_norm": 0.14736890306181655, "learning_rate": 9.276995768693229e-06, "loss": 0.002, "step": 77820 }, { "epoch": 0.5120293678414242, "grad_norm": 0.02958877722217888, "learning_rate": 9.276698368589987e-06, "loss": 0.0022, "step": 77830 }, { "epoch": 0.5120951560166576, "grad_norm": 0.02154155158881731, "learning_rate": 9.276400912102169e-06, "loss": 0.003, "step": 77840 }, { "epoch": 0.512160944191891, "grad_norm": 0.08699852539407439, "learning_rate": 9.276103399233698e-06, "loss": 0.002, "step": 77850 }, { "epoch": 0.5122267323671243, "grad_norm": 0.10054072229197367, "learning_rate": 9.275805829988499e-06, "loss": 0.003, "step": 77860 }, { "epoch": 0.5122925205423577, "grad_norm": 0.019679486739765262, "learning_rate": 9.275508204370491e-06, "loss": 0.0009, "step": 77870 }, { "epoch": 0.5123583087175911, "grad_norm": 0.03338050285007669, "learning_rate": 9.275210522383602e-06, "loss": 0.0011, "step": 77880 }, { "epoch": 0.5124240968928245, "grad_norm": 0.015033980845489115, "learning_rate": 9.274912784031752e-06, "loss": 0.0012, "step": 77890 }, { "epoch": 0.5124898850680578, "grad_norm": 0.12995155724065735, "learning_rate": 9.27461498931887e-06, "loss": 0.0026, "step": 77900 }, { "epoch": 0.5125556732432912, "grad_norm": 0.1789074163267857, "learning_rate": 9.274317138248882e-06, "loss": 0.0028, "step": 77910 }, { "epoch": 0.5126214614185246, "grad_norm": 0.044263772189534374, "learning_rate": 9.274019230825714e-06, "loss": 0.0012, "step": 77920 }, { "epoch": 0.512687249593758, "grad_norm": 0.03554981412957293, "learning_rate": 9.273721267053293e-06, "loss": 0.0013, "step": 77930 }, { "epoch": 0.5127530377689914, "grad_norm": 0.03376151913055326, "learning_rate": 9.273423246935549e-06, "loss": 0.0016, "step": 77940 }, { "epoch": 0.5128188259442248, "grad_norm": 0.09497927915180841, "learning_rate": 9.273125170476408e-06, "loss": 0.0009, "step": 77950 }, { "epoch": 0.5128846141194582, "grad_norm": 0.06251187866710557, "learning_rate": 9.272827037679806e-06, "loss": 0.0031, "step": 77960 }, { "epoch": 0.5129504022946916, "grad_norm": 0.028382187339501354, "learning_rate": 9.272528848549666e-06, "loss": 0.0012, "step": 77970 }, { "epoch": 0.513016190469925, "grad_norm": 0.1683766877927912, "learning_rate": 9.272230603089926e-06, "loss": 0.0022, "step": 77980 }, { "epoch": 0.5130819786451584, "grad_norm": 0.02239503647730946, "learning_rate": 9.271932301304513e-06, "loss": 0.0015, "step": 77990 }, { "epoch": 0.5131477668203916, "grad_norm": 0.12473313119074023, "learning_rate": 9.271633943197362e-06, "loss": 0.002, "step": 78000 }, { "epoch": 0.513213554995625, "grad_norm": 0.26368419296621587, "learning_rate": 9.271335528772409e-06, "loss": 0.0015, "step": 78010 }, { "epoch": 0.5132793431708584, "grad_norm": 0.029061659770924748, "learning_rate": 9.271037058033586e-06, "loss": 0.0005, "step": 78020 }, { "epoch": 0.5133451313460918, "grad_norm": 0.1045931734062506, "learning_rate": 9.270738530984826e-06, "loss": 0.0016, "step": 78030 }, { "epoch": 0.5134109195213252, "grad_norm": 0.11425013177428629, "learning_rate": 9.270439947630068e-06, "loss": 0.0019, "step": 78040 }, { "epoch": 0.5134767076965586, "grad_norm": 0.024856273860500124, "learning_rate": 9.270141307973246e-06, "loss": 0.0014, "step": 78050 }, { "epoch": 0.513542495871792, "grad_norm": 0.06202969344433716, "learning_rate": 9.2698426120183e-06, "loss": 0.0015, "step": 78060 }, { "epoch": 0.5136082840470254, "grad_norm": 0.08855074510540785, "learning_rate": 9.269543859769166e-06, "loss": 0.002, "step": 78070 }, { "epoch": 0.5136740722222588, "grad_norm": 0.06220296787933964, "learning_rate": 9.269245051229785e-06, "loss": 0.0005, "step": 78080 }, { "epoch": 0.5137398603974922, "grad_norm": 0.013357632744241774, "learning_rate": 9.268946186404096e-06, "loss": 0.0017, "step": 78090 }, { "epoch": 0.5138056485727256, "grad_norm": 0.019903903541959086, "learning_rate": 9.268647265296035e-06, "loss": 0.0033, "step": 78100 }, { "epoch": 0.5138714367479589, "grad_norm": 0.09458909592752307, "learning_rate": 9.26834828790955e-06, "loss": 0.0015, "step": 78110 }, { "epoch": 0.5139372249231923, "grad_norm": 0.09561089089770226, "learning_rate": 9.268049254248576e-06, "loss": 0.002, "step": 78120 }, { "epoch": 0.5140030130984257, "grad_norm": 0.052718529929922216, "learning_rate": 9.267750164317063e-06, "loss": 0.004, "step": 78130 }, { "epoch": 0.5140688012736591, "grad_norm": 0.1731125062044452, "learning_rate": 9.267451018118946e-06, "loss": 0.0024, "step": 78140 }, { "epoch": 0.5141345894488925, "grad_norm": 0.009067041321787492, "learning_rate": 9.267151815658174e-06, "loss": 0.0014, "step": 78150 }, { "epoch": 0.5142003776241258, "grad_norm": 0.023855045903875608, "learning_rate": 9.26685255693869e-06, "loss": 0.0035, "step": 78160 }, { "epoch": 0.5142661657993592, "grad_norm": 0.107310956541927, "learning_rate": 9.266553241964441e-06, "loss": 0.0036, "step": 78170 }, { "epoch": 0.5143319539745926, "grad_norm": 0.09153185215379592, "learning_rate": 9.266253870739373e-06, "loss": 0.0018, "step": 78180 }, { "epoch": 0.514397742149826, "grad_norm": 0.025106797082922763, "learning_rate": 9.265954443267432e-06, "loss": 0.0019, "step": 78190 }, { "epoch": 0.5144635303250594, "grad_norm": 0.044117350306820007, "learning_rate": 9.265654959552566e-06, "loss": 0.0029, "step": 78200 }, { "epoch": 0.5145293185002927, "grad_norm": 0.17601435767442786, "learning_rate": 9.265355419598722e-06, "loss": 0.0011, "step": 78210 }, { "epoch": 0.5145951066755261, "grad_norm": 0.022381175987259905, "learning_rate": 9.265055823409851e-06, "loss": 0.0021, "step": 78220 }, { "epoch": 0.5146608948507595, "grad_norm": 0.004513960721012943, "learning_rate": 9.264756170989905e-06, "loss": 0.0019, "step": 78230 }, { "epoch": 0.5147266830259929, "grad_norm": 0.038934296886301914, "learning_rate": 9.26445646234283e-06, "loss": 0.0013, "step": 78240 }, { "epoch": 0.5147924712012263, "grad_norm": 0.02192760922746702, "learning_rate": 9.26415669747258e-06, "loss": 0.0017, "step": 78250 }, { "epoch": 0.5148582593764597, "grad_norm": 0.07859964810471691, "learning_rate": 9.263856876383107e-06, "loss": 0.0009, "step": 78260 }, { "epoch": 0.5149240475516931, "grad_norm": 0.1281930593383661, "learning_rate": 9.263556999078363e-06, "loss": 0.0028, "step": 78270 }, { "epoch": 0.5149898357269265, "grad_norm": 0.04435349398032107, "learning_rate": 9.263257065562304e-06, "loss": 0.0024, "step": 78280 }, { "epoch": 0.5150556239021599, "grad_norm": 0.060368655506186954, "learning_rate": 9.26295707583888e-06, "loss": 0.0021, "step": 78290 }, { "epoch": 0.5151214120773933, "grad_norm": 0.08497263560394318, "learning_rate": 9.26265702991205e-06, "loss": 0.0026, "step": 78300 }, { "epoch": 0.5151872002526265, "grad_norm": 0.03479334432284593, "learning_rate": 9.262356927785768e-06, "loss": 0.0013, "step": 78310 }, { "epoch": 0.5152529884278599, "grad_norm": 0.008647652796317976, "learning_rate": 9.262056769463991e-06, "loss": 0.0016, "step": 78320 }, { "epoch": 0.5153187766030933, "grad_norm": 0.073307317933442, "learning_rate": 9.261756554950677e-06, "loss": 0.0019, "step": 78330 }, { "epoch": 0.5153845647783267, "grad_norm": 0.29036112668242964, "learning_rate": 9.261456284249784e-06, "loss": 0.0021, "step": 78340 }, { "epoch": 0.5154503529535601, "grad_norm": 0.011076667439635023, "learning_rate": 9.261155957365269e-06, "loss": 0.0014, "step": 78350 }, { "epoch": 0.5155161411287935, "grad_norm": 0.010996427556176273, "learning_rate": 9.260855574301095e-06, "loss": 0.0012, "step": 78360 }, { "epoch": 0.5155819293040269, "grad_norm": 0.07918259981232505, "learning_rate": 9.260555135061217e-06, "loss": 0.0017, "step": 78370 }, { "epoch": 0.5156477174792603, "grad_norm": 0.010021833150354945, "learning_rate": 9.260254639649601e-06, "loss": 0.0012, "step": 78380 }, { "epoch": 0.5157135056544937, "grad_norm": 0.00846718956735469, "learning_rate": 9.259954088070207e-06, "loss": 0.0025, "step": 78390 }, { "epoch": 0.5157792938297271, "grad_norm": 0.11429236346744298, "learning_rate": 9.259653480326997e-06, "loss": 0.0013, "step": 78400 }, { "epoch": 0.5158450820049604, "grad_norm": 0.07248776169748847, "learning_rate": 9.259352816423936e-06, "loss": 0.0018, "step": 78410 }, { "epoch": 0.5159108701801938, "grad_norm": 0.008408168586581019, "learning_rate": 9.259052096364986e-06, "loss": 0.002, "step": 78420 }, { "epoch": 0.5159766583554272, "grad_norm": 0.07482815421601129, "learning_rate": 9.258751320154113e-06, "loss": 0.001, "step": 78430 }, { "epoch": 0.5160424465306606, "grad_norm": 0.15887873707886063, "learning_rate": 9.258450487795281e-06, "loss": 0.0041, "step": 78440 }, { "epoch": 0.516108234705894, "grad_norm": 0.017324649802464336, "learning_rate": 9.258149599292457e-06, "loss": 0.001, "step": 78450 }, { "epoch": 0.5161740228811273, "grad_norm": 0.009970938707622917, "learning_rate": 9.257848654649608e-06, "loss": 0.0014, "step": 78460 }, { "epoch": 0.5162398110563607, "grad_norm": 0.09219522510313939, "learning_rate": 9.257547653870701e-06, "loss": 0.0014, "step": 78470 }, { "epoch": 0.5163055992315941, "grad_norm": 0.04044964750428796, "learning_rate": 9.257246596959708e-06, "loss": 0.0023, "step": 78480 }, { "epoch": 0.5163713874068275, "grad_norm": 0.1523042946003011, "learning_rate": 9.256945483920595e-06, "loss": 0.0015, "step": 78490 }, { "epoch": 0.5164371755820609, "grad_norm": 0.17367832569662775, "learning_rate": 9.25664431475733e-06, "loss": 0.0044, "step": 78500 }, { "epoch": 0.5165029637572942, "grad_norm": 0.03741223373150343, "learning_rate": 9.256343089473888e-06, "loss": 0.0023, "step": 78510 }, { "epoch": 0.5165687519325276, "grad_norm": 0.06828313647010943, "learning_rate": 9.256041808074239e-06, "loss": 0.001, "step": 78520 }, { "epoch": 0.516634540107761, "grad_norm": 0.15915376382555568, "learning_rate": 9.255740470562352e-06, "loss": 0.0024, "step": 78530 }, { "epoch": 0.5167003282829944, "grad_norm": 0.10371171801917108, "learning_rate": 9.255439076942202e-06, "loss": 0.0009, "step": 78540 }, { "epoch": 0.5167661164582278, "grad_norm": 0.23499870200979914, "learning_rate": 9.255137627217767e-06, "loss": 0.002, "step": 78550 }, { "epoch": 0.5168319046334612, "grad_norm": 0.13419435730663568, "learning_rate": 9.254836121393013e-06, "loss": 0.003, "step": 78560 }, { "epoch": 0.5168976928086946, "grad_norm": 0.051087822315000524, "learning_rate": 9.254534559471923e-06, "loss": 0.0025, "step": 78570 }, { "epoch": 0.516963480983928, "grad_norm": 0.03423534184216087, "learning_rate": 9.254232941458467e-06, "loss": 0.0016, "step": 78580 }, { "epoch": 0.5170292691591614, "grad_norm": 0.020301149428111856, "learning_rate": 9.253931267356627e-06, "loss": 0.0015, "step": 78590 }, { "epoch": 0.5170950573343948, "grad_norm": 0.11156106172848827, "learning_rate": 9.253629537170374e-06, "loss": 0.0028, "step": 78600 }, { "epoch": 0.5171608455096282, "grad_norm": 0.06404639595288307, "learning_rate": 9.253327750903689e-06, "loss": 0.0012, "step": 78610 }, { "epoch": 0.5172266336848614, "grad_norm": 0.027412576306744905, "learning_rate": 9.253025908560553e-06, "loss": 0.0017, "step": 78620 }, { "epoch": 0.5172924218600948, "grad_norm": 0.07101201774645056, "learning_rate": 9.252724010144942e-06, "loss": 0.0015, "step": 78630 }, { "epoch": 0.5173582100353282, "grad_norm": 0.03182758171497745, "learning_rate": 9.252422055660839e-06, "loss": 0.0019, "step": 78640 }, { "epoch": 0.5174239982105616, "grad_norm": 0.0053569099684846995, "learning_rate": 9.252120045112223e-06, "loss": 0.0011, "step": 78650 }, { "epoch": 0.517489786385795, "grad_norm": 0.11023013314322158, "learning_rate": 9.251817978503076e-06, "loss": 0.0014, "step": 78660 }, { "epoch": 0.5175555745610284, "grad_norm": 0.015246174257089441, "learning_rate": 9.251515855837383e-06, "loss": 0.0013, "step": 78670 }, { "epoch": 0.5176213627362618, "grad_norm": 0.025207326160091026, "learning_rate": 9.251213677119123e-06, "loss": 0.003, "step": 78680 }, { "epoch": 0.5176871509114952, "grad_norm": 0.1783474935514775, "learning_rate": 9.250911442352284e-06, "loss": 0.0025, "step": 78690 }, { "epoch": 0.5177529390867286, "grad_norm": 0.01463313328196941, "learning_rate": 9.250609151540847e-06, "loss": 0.0018, "step": 78700 }, { "epoch": 0.517818727261962, "grad_norm": 0.003292855577226246, "learning_rate": 9.250306804688803e-06, "loss": 0.0021, "step": 78710 }, { "epoch": 0.5178845154371953, "grad_norm": 0.04389770655654172, "learning_rate": 9.250004401800132e-06, "loss": 0.0028, "step": 78720 }, { "epoch": 0.5179503036124287, "grad_norm": 0.046226323986374274, "learning_rate": 9.249701942878823e-06, "loss": 0.0031, "step": 78730 }, { "epoch": 0.5180160917876621, "grad_norm": 0.058720706704838116, "learning_rate": 9.249399427928865e-06, "loss": 0.002, "step": 78740 }, { "epoch": 0.5180818799628955, "grad_norm": 0.07516109657455156, "learning_rate": 9.249096856954244e-06, "loss": 0.002, "step": 78750 }, { "epoch": 0.5181476681381288, "grad_norm": 0.08353759228978512, "learning_rate": 9.248794229958954e-06, "loss": 0.0026, "step": 78760 }, { "epoch": 0.5182134563133622, "grad_norm": 0.03176222911466668, "learning_rate": 9.24849154694698e-06, "loss": 0.0008, "step": 78770 }, { "epoch": 0.5182792444885956, "grad_norm": 0.0076773942065655005, "learning_rate": 9.248188807922313e-06, "loss": 0.0024, "step": 78780 }, { "epoch": 0.518345032663829, "grad_norm": 0.09668704907353706, "learning_rate": 9.247886012888946e-06, "loss": 0.0016, "step": 78790 }, { "epoch": 0.5184108208390624, "grad_norm": 0.009346087817804298, "learning_rate": 9.247583161850872e-06, "loss": 0.0013, "step": 78800 }, { "epoch": 0.5184766090142958, "grad_norm": 0.037547091964529516, "learning_rate": 9.247280254812084e-06, "loss": 0.0018, "step": 78810 }, { "epoch": 0.5185423971895291, "grad_norm": 0.20721764176943913, "learning_rate": 9.24697729177657e-06, "loss": 0.0046, "step": 78820 }, { "epoch": 0.5186081853647625, "grad_norm": 0.06044029160458628, "learning_rate": 9.24667427274833e-06, "loss": 0.0014, "step": 78830 }, { "epoch": 0.5186739735399959, "grad_norm": 0.09351683890328587, "learning_rate": 9.246371197731358e-06, "loss": 0.002, "step": 78840 }, { "epoch": 0.5187397617152293, "grad_norm": 0.07410939531806372, "learning_rate": 9.246068066729649e-06, "loss": 0.0032, "step": 78850 }, { "epoch": 0.5188055498904627, "grad_norm": 0.05980700973881295, "learning_rate": 9.245764879747198e-06, "loss": 0.0022, "step": 78860 }, { "epoch": 0.5188713380656961, "grad_norm": 0.06163969775043742, "learning_rate": 9.245461636788007e-06, "loss": 0.0014, "step": 78870 }, { "epoch": 0.5189371262409295, "grad_norm": 0.14800488838094678, "learning_rate": 9.245158337856068e-06, "loss": 0.002, "step": 78880 }, { "epoch": 0.5190029144161629, "grad_norm": 0.07724132138449104, "learning_rate": 9.244854982955385e-06, "loss": 0.0023, "step": 78890 }, { "epoch": 0.5190687025913963, "grad_norm": 0.0005431156225138688, "learning_rate": 9.244551572089953e-06, "loss": 0.0012, "step": 78900 }, { "epoch": 0.5191344907666297, "grad_norm": 0.05951166428834875, "learning_rate": 9.244248105263776e-06, "loss": 0.002, "step": 78910 }, { "epoch": 0.5192002789418629, "grad_norm": 0.05803402768719075, "learning_rate": 9.243944582480853e-06, "loss": 0.0021, "step": 78920 }, { "epoch": 0.5192660671170963, "grad_norm": 0.2375023765547611, "learning_rate": 9.243641003745185e-06, "loss": 0.0024, "step": 78930 }, { "epoch": 0.5193318552923297, "grad_norm": 0.3122652500051987, "learning_rate": 9.243337369060776e-06, "loss": 0.0044, "step": 78940 }, { "epoch": 0.5193976434675631, "grad_norm": 0.006033612374611105, "learning_rate": 9.243033678431628e-06, "loss": 0.0014, "step": 78950 }, { "epoch": 0.5194634316427965, "grad_norm": 0.12761950792794657, "learning_rate": 9.242729931861747e-06, "loss": 0.0026, "step": 78960 }, { "epoch": 0.5195292198180299, "grad_norm": 0.07781776169049595, "learning_rate": 9.242426129355136e-06, "loss": 0.002, "step": 78970 }, { "epoch": 0.5195950079932633, "grad_norm": 0.009993834889180308, "learning_rate": 9.242122270915798e-06, "loss": 0.0011, "step": 78980 }, { "epoch": 0.5196607961684967, "grad_norm": 0.06855450900820126, "learning_rate": 9.241818356547744e-06, "loss": 0.001, "step": 78990 }, { "epoch": 0.5197265843437301, "grad_norm": 0.07772695739156714, "learning_rate": 9.241514386254979e-06, "loss": 0.0022, "step": 79000 }, { "epoch": 0.5197923725189635, "grad_norm": 0.045808372771886194, "learning_rate": 9.241210360041508e-06, "loss": 0.0025, "step": 79010 }, { "epoch": 0.5198581606941969, "grad_norm": 0.14751187021906634, "learning_rate": 9.240906277911342e-06, "loss": 0.0021, "step": 79020 }, { "epoch": 0.5199239488694302, "grad_norm": 0.02483973824821159, "learning_rate": 9.24060213986849e-06, "loss": 0.0022, "step": 79030 }, { "epoch": 0.5199897370446636, "grad_norm": 0.04804612494714416, "learning_rate": 9.240297945916961e-06, "loss": 0.0016, "step": 79040 }, { "epoch": 0.520055525219897, "grad_norm": 0.03799369508672318, "learning_rate": 9.239993696060765e-06, "loss": 0.0018, "step": 79050 }, { "epoch": 0.5201213133951303, "grad_norm": 0.0608475567132277, "learning_rate": 9.239689390303914e-06, "loss": 0.0017, "step": 79060 }, { "epoch": 0.5201871015703637, "grad_norm": 0.017227547647896177, "learning_rate": 9.23938502865042e-06, "loss": 0.0015, "step": 79070 }, { "epoch": 0.5202528897455971, "grad_norm": 0.1740343233103119, "learning_rate": 9.239080611104296e-06, "loss": 0.0044, "step": 79080 }, { "epoch": 0.5203186779208305, "grad_norm": 0.0882854264419463, "learning_rate": 9.238776137669556e-06, "loss": 0.002, "step": 79090 }, { "epoch": 0.5203844660960639, "grad_norm": 0.16593762381749333, "learning_rate": 9.238471608350215e-06, "loss": 0.0007, "step": 79100 }, { "epoch": 0.5204502542712973, "grad_norm": 0.12666283631101985, "learning_rate": 9.238167023150283e-06, "loss": 0.001, "step": 79110 }, { "epoch": 0.5205160424465307, "grad_norm": 0.04914994609924246, "learning_rate": 9.237862382073782e-06, "loss": 0.0005, "step": 79120 }, { "epoch": 0.520581830621764, "grad_norm": 0.15332714497424393, "learning_rate": 9.237557685124722e-06, "loss": 0.0021, "step": 79130 }, { "epoch": 0.5206476187969974, "grad_norm": 0.06943509523303483, "learning_rate": 9.237252932307126e-06, "loss": 0.0035, "step": 79140 }, { "epoch": 0.5207134069722308, "grad_norm": 0.11265502095123145, "learning_rate": 9.23694812362501e-06, "loss": 0.0019, "step": 79150 }, { "epoch": 0.5207791951474642, "grad_norm": 0.004676289192519101, "learning_rate": 9.236643259082391e-06, "loss": 0.0012, "step": 79160 }, { "epoch": 0.5208449833226976, "grad_norm": 0.028601090497901407, "learning_rate": 9.23633833868329e-06, "loss": 0.0022, "step": 79170 }, { "epoch": 0.520910771497931, "grad_norm": 0.03957210119620281, "learning_rate": 9.236033362431728e-06, "loss": 0.0017, "step": 79180 }, { "epoch": 0.5209765596731644, "grad_norm": 0.06890814119298823, "learning_rate": 9.235728330331723e-06, "loss": 0.0021, "step": 79190 }, { "epoch": 0.5210423478483978, "grad_norm": 0.041903571485541496, "learning_rate": 9.2354232423873e-06, "loss": 0.0025, "step": 79200 }, { "epoch": 0.5211081360236312, "grad_norm": 0.22331452513397115, "learning_rate": 9.235118098602479e-06, "loss": 0.0031, "step": 79210 }, { "epoch": 0.5211739241988645, "grad_norm": 0.019217878353579857, "learning_rate": 9.234812898981283e-06, "loss": 0.0013, "step": 79220 }, { "epoch": 0.5212397123740978, "grad_norm": 0.21155962171850898, "learning_rate": 9.234507643527736e-06, "loss": 0.0045, "step": 79230 }, { "epoch": 0.5213055005493312, "grad_norm": 0.12351951636732633, "learning_rate": 9.234202332245862e-06, "loss": 0.0021, "step": 79240 }, { "epoch": 0.5213712887245646, "grad_norm": 0.07000458093183347, "learning_rate": 9.23389696513969e-06, "loss": 0.0015, "step": 79250 }, { "epoch": 0.521437076899798, "grad_norm": 0.020884365445415887, "learning_rate": 9.23359154221324e-06, "loss": 0.0014, "step": 79260 }, { "epoch": 0.5215028650750314, "grad_norm": 0.01757439472830231, "learning_rate": 9.233286063470544e-06, "loss": 0.0008, "step": 79270 }, { "epoch": 0.5215686532502648, "grad_norm": 0.1540563208502278, "learning_rate": 9.232980528915627e-06, "loss": 0.0012, "step": 79280 }, { "epoch": 0.5216344414254982, "grad_norm": 0.03470784385942262, "learning_rate": 9.232674938552517e-06, "loss": 0.001, "step": 79290 }, { "epoch": 0.5217002296007316, "grad_norm": 0.004644063346635989, "learning_rate": 9.232369292385243e-06, "loss": 0.0035, "step": 79300 }, { "epoch": 0.521766017775965, "grad_norm": 0.01257675245972217, "learning_rate": 9.232063590417837e-06, "loss": 0.0042, "step": 79310 }, { "epoch": 0.5218318059511984, "grad_norm": 0.0700077174074083, "learning_rate": 9.231757832654327e-06, "loss": 0.0017, "step": 79320 }, { "epoch": 0.5218975941264317, "grad_norm": 0.0594418065875501, "learning_rate": 9.231452019098743e-06, "loss": 0.0031, "step": 79330 }, { "epoch": 0.5219633823016651, "grad_norm": 0.062079332392101165, "learning_rate": 9.231146149755121e-06, "loss": 0.002, "step": 79340 }, { "epoch": 0.5220291704768985, "grad_norm": 0.22504831092225092, "learning_rate": 9.23084022462749e-06, "loss": 0.0025, "step": 79350 }, { "epoch": 0.5220949586521318, "grad_norm": 0.10791285438260348, "learning_rate": 9.230534243719885e-06, "loss": 0.0017, "step": 79360 }, { "epoch": 0.5221607468273652, "grad_norm": 0.05942031960239992, "learning_rate": 9.23022820703634e-06, "loss": 0.002, "step": 79370 }, { "epoch": 0.5222265350025986, "grad_norm": 0.23999663425706674, "learning_rate": 9.229922114580889e-06, "loss": 0.0029, "step": 79380 }, { "epoch": 0.522292323177832, "grad_norm": 0.002170265211868838, "learning_rate": 9.229615966357568e-06, "loss": 0.0008, "step": 79390 }, { "epoch": 0.5223581113530654, "grad_norm": 0.03925693135643959, "learning_rate": 9.229309762370415e-06, "loss": 0.0014, "step": 79400 }, { "epoch": 0.5224238995282988, "grad_norm": 0.0873517008351162, "learning_rate": 9.229003502623463e-06, "loss": 0.0018, "step": 79410 }, { "epoch": 0.5224896877035322, "grad_norm": 0.010153908674480351, "learning_rate": 9.228697187120754e-06, "loss": 0.0025, "step": 79420 }, { "epoch": 0.5225554758787655, "grad_norm": 0.16957807958249402, "learning_rate": 9.228390815866323e-06, "loss": 0.0018, "step": 79430 }, { "epoch": 0.5226212640539989, "grad_norm": 0.10429434949810362, "learning_rate": 9.228084388864214e-06, "loss": 0.0018, "step": 79440 }, { "epoch": 0.5226870522292323, "grad_norm": 0.07590743868981865, "learning_rate": 9.227777906118461e-06, "loss": 0.0028, "step": 79450 }, { "epoch": 0.5227528404044657, "grad_norm": 0.1100303516782568, "learning_rate": 9.227471367633109e-06, "loss": 0.0033, "step": 79460 }, { "epoch": 0.5228186285796991, "grad_norm": 0.0050810212358864655, "learning_rate": 9.227164773412198e-06, "loss": 0.0012, "step": 79470 }, { "epoch": 0.5228844167549325, "grad_norm": 0.0913765508206469, "learning_rate": 9.22685812345977e-06, "loss": 0.0012, "step": 79480 }, { "epoch": 0.5229502049301659, "grad_norm": 0.04566941871432471, "learning_rate": 9.226551417779868e-06, "loss": 0.0036, "step": 79490 }, { "epoch": 0.5230159931053993, "grad_norm": 0.09847790890599523, "learning_rate": 9.226244656376536e-06, "loss": 0.0013, "step": 79500 }, { "epoch": 0.5230817812806327, "grad_norm": 0.06450041953399097, "learning_rate": 9.225937839253817e-06, "loss": 0.0015, "step": 79510 }, { "epoch": 0.523147569455866, "grad_norm": 0.13185808716815658, "learning_rate": 9.22563096641576e-06, "loss": 0.0036, "step": 79520 }, { "epoch": 0.5232133576310994, "grad_norm": 0.17821232054164562, "learning_rate": 9.225324037866408e-06, "loss": 0.0085, "step": 79530 }, { "epoch": 0.5232791458063327, "grad_norm": 0.003808678423750006, "learning_rate": 9.225017053609807e-06, "loss": 0.0014, "step": 79540 }, { "epoch": 0.5233449339815661, "grad_norm": 0.020839829580643007, "learning_rate": 9.224710013650003e-06, "loss": 0.0024, "step": 79550 }, { "epoch": 0.5234107221567995, "grad_norm": 0.046904043976736025, "learning_rate": 9.224402917991048e-06, "loss": 0.0025, "step": 79560 }, { "epoch": 0.5234765103320329, "grad_norm": 0.048607620628521246, "learning_rate": 9.22409576663699e-06, "loss": 0.0013, "step": 79570 }, { "epoch": 0.5235422985072663, "grad_norm": 0.11596460946250281, "learning_rate": 9.223788559591875e-06, "loss": 0.0017, "step": 79580 }, { "epoch": 0.5236080866824997, "grad_norm": 0.03177750542712235, "learning_rate": 9.223481296859758e-06, "loss": 0.0013, "step": 79590 }, { "epoch": 0.5236738748577331, "grad_norm": 0.06170692773618573, "learning_rate": 9.223173978444686e-06, "loss": 0.0019, "step": 79600 }, { "epoch": 0.5237396630329665, "grad_norm": 0.04895807219450888, "learning_rate": 9.222866604350715e-06, "loss": 0.0071, "step": 79610 }, { "epoch": 0.5238054512081999, "grad_norm": 0.026038722530625974, "learning_rate": 9.222559174581893e-06, "loss": 0.0021, "step": 79620 }, { "epoch": 0.5238712393834333, "grad_norm": 0.12337713918287757, "learning_rate": 9.222251689142275e-06, "loss": 0.0024, "step": 79630 }, { "epoch": 0.5239370275586666, "grad_norm": 0.028624197357579845, "learning_rate": 9.221944148035915e-06, "loss": 0.0027, "step": 79640 }, { "epoch": 0.5240028157339, "grad_norm": 0.005438322260190585, "learning_rate": 9.221636551266869e-06, "loss": 0.0015, "step": 79650 }, { "epoch": 0.5240686039091333, "grad_norm": 0.01811479474410437, "learning_rate": 9.22132889883919e-06, "loss": 0.0012, "step": 79660 }, { "epoch": 0.5241343920843667, "grad_norm": 0.025073577619638826, "learning_rate": 9.221021190756935e-06, "loss": 0.0012, "step": 79670 }, { "epoch": 0.5242001802596001, "grad_norm": 0.17313704903056337, "learning_rate": 9.220713427024161e-06, "loss": 0.0027, "step": 79680 }, { "epoch": 0.5242659684348335, "grad_norm": 0.06218703866067507, "learning_rate": 9.220405607644927e-06, "loss": 0.0016, "step": 79690 }, { "epoch": 0.5243317566100669, "grad_norm": 0.11039408821142066, "learning_rate": 9.220097732623289e-06, "loss": 0.001, "step": 79700 }, { "epoch": 0.5243975447853003, "grad_norm": 0.04051933101111625, "learning_rate": 9.219789801963308e-06, "loss": 0.0024, "step": 79710 }, { "epoch": 0.5244633329605337, "grad_norm": 0.07604252472751281, "learning_rate": 9.219481815669042e-06, "loss": 0.001, "step": 79720 }, { "epoch": 0.5245291211357671, "grad_norm": 0.28018021456798475, "learning_rate": 9.219173773744554e-06, "loss": 0.0034, "step": 79730 }, { "epoch": 0.5245949093110004, "grad_norm": 0.018473260513841433, "learning_rate": 9.218865676193901e-06, "loss": 0.0018, "step": 79740 }, { "epoch": 0.5246606974862338, "grad_norm": 0.05859639132112305, "learning_rate": 9.21855752302115e-06, "loss": 0.0022, "step": 79750 }, { "epoch": 0.5247264856614672, "grad_norm": 1.1577037368617078, "learning_rate": 9.218249314230362e-06, "loss": 0.0024, "step": 79760 }, { "epoch": 0.5247922738367006, "grad_norm": 0.05783083997967338, "learning_rate": 9.217941049825598e-06, "loss": 0.002, "step": 79770 }, { "epoch": 0.524858062011934, "grad_norm": 0.16849137739607395, "learning_rate": 9.217632729810927e-06, "loss": 0.0045, "step": 79780 }, { "epoch": 0.5249238501871674, "grad_norm": 0.06052166535150413, "learning_rate": 9.217324354190406e-06, "loss": 0.0028, "step": 79790 }, { "epoch": 0.5249896383624008, "grad_norm": 0.04165638184878494, "learning_rate": 9.217015922968109e-06, "loss": 0.0011, "step": 79800 }, { "epoch": 0.5250554265376342, "grad_norm": 0.02953067403932642, "learning_rate": 9.216707436148098e-06, "loss": 0.0014, "step": 79810 }, { "epoch": 0.5251212147128675, "grad_norm": 0.07452516274484833, "learning_rate": 9.216398893734442e-06, "loss": 0.0034, "step": 79820 }, { "epoch": 0.5251870028881009, "grad_norm": 0.018425769998422846, "learning_rate": 9.216090295731209e-06, "loss": 0.0011, "step": 79830 }, { "epoch": 0.5252527910633342, "grad_norm": 0.1044604417851315, "learning_rate": 9.215781642142466e-06, "loss": 0.0013, "step": 79840 }, { "epoch": 0.5253185792385676, "grad_norm": 0.20512148082010914, "learning_rate": 9.215472932972282e-06, "loss": 0.0018, "step": 79850 }, { "epoch": 0.525384367413801, "grad_norm": 0.05498493940704403, "learning_rate": 9.215164168224729e-06, "loss": 0.0017, "step": 79860 }, { "epoch": 0.5254501555890344, "grad_norm": 0.0668040445776884, "learning_rate": 9.214855347903878e-06, "loss": 0.0009, "step": 79870 }, { "epoch": 0.5255159437642678, "grad_norm": 0.10379663473281635, "learning_rate": 9.214546472013796e-06, "loss": 0.0018, "step": 79880 }, { "epoch": 0.5255817319395012, "grad_norm": 0.3593877380911085, "learning_rate": 9.214237540558562e-06, "loss": 0.0019, "step": 79890 }, { "epoch": 0.5256475201147346, "grad_norm": 0.022458210316673007, "learning_rate": 9.213928553542245e-06, "loss": 0.0011, "step": 79900 }, { "epoch": 0.525713308289968, "grad_norm": 0.011897276981860229, "learning_rate": 9.213619510968918e-06, "loss": 0.0018, "step": 79910 }, { "epoch": 0.5257790964652014, "grad_norm": 0.09821815482101297, "learning_rate": 9.213310412842657e-06, "loss": 0.0014, "step": 79920 }, { "epoch": 0.5258448846404348, "grad_norm": 0.018366735787027112, "learning_rate": 9.21300125916754e-06, "loss": 0.0023, "step": 79930 }, { "epoch": 0.5259106728156681, "grad_norm": 0.016588703309041046, "learning_rate": 9.212692049947636e-06, "loss": 0.0018, "step": 79940 }, { "epoch": 0.5259764609909015, "grad_norm": 0.03828121369536563, "learning_rate": 9.212382785187029e-06, "loss": 0.0014, "step": 79950 }, { "epoch": 0.5260422491661348, "grad_norm": 0.07758695390068837, "learning_rate": 9.21207346488979e-06, "loss": 0.0016, "step": 79960 }, { "epoch": 0.5261080373413682, "grad_norm": 0.17351530689300548, "learning_rate": 9.211764089060002e-06, "loss": 0.0028, "step": 79970 }, { "epoch": 0.5261738255166016, "grad_norm": 0.09868615406390341, "learning_rate": 9.211454657701741e-06, "loss": 0.0015, "step": 79980 }, { "epoch": 0.526239613691835, "grad_norm": 0.038332479641876686, "learning_rate": 9.211145170819088e-06, "loss": 0.0032, "step": 79990 }, { "epoch": 0.5263054018670684, "grad_norm": 0.021644550769202995, "learning_rate": 9.210835628416124e-06, "loss": 0.0012, "step": 80000 }, { "epoch": 0.5263711900423018, "grad_norm": 0.11049159041846617, "learning_rate": 9.210526030496929e-06, "loss": 0.0017, "step": 80010 }, { "epoch": 0.5264369782175352, "grad_norm": 0.044692911920547765, "learning_rate": 9.210216377065584e-06, "loss": 0.0021, "step": 80020 }, { "epoch": 0.5265027663927686, "grad_norm": 0.035210845704344074, "learning_rate": 9.209906668126171e-06, "loss": 0.0008, "step": 80030 }, { "epoch": 0.526568554568002, "grad_norm": 0.04350482079914762, "learning_rate": 9.209596903682777e-06, "loss": 0.0019, "step": 80040 }, { "epoch": 0.5266343427432353, "grad_norm": 0.0802242574513624, "learning_rate": 9.209287083739483e-06, "loss": 0.0024, "step": 80050 }, { "epoch": 0.5267001309184687, "grad_norm": 0.01009035491261604, "learning_rate": 9.208977208300372e-06, "loss": 0.0007, "step": 80060 }, { "epoch": 0.5267659190937021, "grad_norm": 0.08153284064573511, "learning_rate": 9.208667277369531e-06, "loss": 0.0015, "step": 80070 }, { "epoch": 0.5268317072689355, "grad_norm": 0.023334399076673916, "learning_rate": 9.208357290951049e-06, "loss": 0.0012, "step": 80080 }, { "epoch": 0.5268974954441689, "grad_norm": 0.04026140013988629, "learning_rate": 9.20804724904901e-06, "loss": 0.0026, "step": 80090 }, { "epoch": 0.5269632836194023, "grad_norm": 0.06271234418614413, "learning_rate": 9.207737151667505e-06, "loss": 0.0015, "step": 80100 }, { "epoch": 0.5270290717946357, "grad_norm": 0.0819746696707652, "learning_rate": 9.207426998810616e-06, "loss": 0.0012, "step": 80110 }, { "epoch": 0.527094859969869, "grad_norm": 0.06881657086006064, "learning_rate": 9.207116790482438e-06, "loss": 0.0025, "step": 80120 }, { "epoch": 0.5271606481451024, "grad_norm": 0.03738336351419337, "learning_rate": 9.206806526687058e-06, "loss": 0.001, "step": 80130 }, { "epoch": 0.5272264363203358, "grad_norm": 0.027449152846692856, "learning_rate": 9.206496207428565e-06, "loss": 0.0016, "step": 80140 }, { "epoch": 0.5272922244955691, "grad_norm": 0.0351396536907057, "learning_rate": 9.206185832711056e-06, "loss": 0.0027, "step": 80150 }, { "epoch": 0.5273580126708025, "grad_norm": 0.07757851578119575, "learning_rate": 9.205875402538618e-06, "loss": 0.0014, "step": 80160 }, { "epoch": 0.5274238008460359, "grad_norm": 0.059275582727513265, "learning_rate": 9.205564916915344e-06, "loss": 0.0032, "step": 80170 }, { "epoch": 0.5274895890212693, "grad_norm": 0.4299696121749974, "learning_rate": 9.205254375845331e-06, "loss": 0.0052, "step": 80180 }, { "epoch": 0.5275553771965027, "grad_norm": 0.020308761326020205, "learning_rate": 9.204943779332668e-06, "loss": 0.0022, "step": 80190 }, { "epoch": 0.5276211653717361, "grad_norm": 0.03429307480707372, "learning_rate": 9.204633127381457e-06, "loss": 0.0037, "step": 80200 }, { "epoch": 0.5276869535469695, "grad_norm": 0.03963474312259051, "learning_rate": 9.204322419995786e-06, "loss": 0.0011, "step": 80210 }, { "epoch": 0.5277527417222029, "grad_norm": 0.28472362457399697, "learning_rate": 9.204011657179757e-06, "loss": 0.0036, "step": 80220 }, { "epoch": 0.5278185298974363, "grad_norm": 0.08457635435517853, "learning_rate": 9.203700838937463e-06, "loss": 0.0024, "step": 80230 }, { "epoch": 0.5278843180726697, "grad_norm": 0.29989560761018647, "learning_rate": 9.203389965273006e-06, "loss": 0.0025, "step": 80240 }, { "epoch": 0.527950106247903, "grad_norm": 0.11275839250882644, "learning_rate": 9.203079036190483e-06, "loss": 0.0017, "step": 80250 }, { "epoch": 0.5280158944231363, "grad_norm": 0.05428552133203248, "learning_rate": 9.202768051693992e-06, "loss": 0.0029, "step": 80260 }, { "epoch": 0.5280816825983697, "grad_norm": 0.05185779380480577, "learning_rate": 9.202457011787633e-06, "loss": 0.0014, "step": 80270 }, { "epoch": 0.5281474707736031, "grad_norm": 0.05660695702045874, "learning_rate": 9.20214591647551e-06, "loss": 0.0016, "step": 80280 }, { "epoch": 0.5282132589488365, "grad_norm": 0.06974236386253964, "learning_rate": 9.201834765761722e-06, "loss": 0.0026, "step": 80290 }, { "epoch": 0.5282790471240699, "grad_norm": 0.01620841562266283, "learning_rate": 9.20152355965037e-06, "loss": 0.0009, "step": 80300 }, { "epoch": 0.5283448352993033, "grad_norm": 0.03661348692519375, "learning_rate": 9.201212298145559e-06, "loss": 0.0018, "step": 80310 }, { "epoch": 0.5284106234745367, "grad_norm": 0.17662517188468715, "learning_rate": 9.200900981251391e-06, "loss": 0.0019, "step": 80320 }, { "epoch": 0.5284764116497701, "grad_norm": 0.06272526179094645, "learning_rate": 9.200589608971974e-06, "loss": 0.0025, "step": 80330 }, { "epoch": 0.5285421998250035, "grad_norm": 0.03313318285688238, "learning_rate": 9.200278181311411e-06, "loss": 0.0011, "step": 80340 }, { "epoch": 0.5286079880002368, "grad_norm": 0.12503824030200866, "learning_rate": 9.199966698273806e-06, "loss": 0.0017, "step": 80350 }, { "epoch": 0.5286737761754702, "grad_norm": 0.44275814871794533, "learning_rate": 9.199655159863267e-06, "loss": 0.0036, "step": 80360 }, { "epoch": 0.5287395643507036, "grad_norm": 0.6221832152747522, "learning_rate": 9.199343566083904e-06, "loss": 0.0036, "step": 80370 }, { "epoch": 0.528805352525937, "grad_norm": 0.07315971398027507, "learning_rate": 9.19903191693982e-06, "loss": 0.0011, "step": 80380 }, { "epoch": 0.5288711407011704, "grad_norm": 0.35164662733151597, "learning_rate": 9.19872021243513e-06, "loss": 0.0023, "step": 80390 }, { "epoch": 0.5289369288764038, "grad_norm": 0.13799318112805387, "learning_rate": 9.198408452573939e-06, "loss": 0.0021, "step": 80400 }, { "epoch": 0.5290027170516372, "grad_norm": 0.019526338761084904, "learning_rate": 9.198096637360358e-06, "loss": 0.0007, "step": 80410 }, { "epoch": 0.5290685052268705, "grad_norm": 0.04932003745836081, "learning_rate": 9.197784766798498e-06, "loss": 0.002, "step": 80420 }, { "epoch": 0.5291342934021039, "grad_norm": 0.010361823291673343, "learning_rate": 9.197472840892471e-06, "loss": 0.0015, "step": 80430 }, { "epoch": 0.5292000815773373, "grad_norm": 0.047624452430036726, "learning_rate": 9.197160859646392e-06, "loss": 0.0012, "step": 80440 }, { "epoch": 0.5292658697525707, "grad_norm": 0.03914116443421378, "learning_rate": 9.196848823064372e-06, "loss": 0.0025, "step": 80450 }, { "epoch": 0.529331657927804, "grad_norm": 0.41951774115609497, "learning_rate": 9.196536731150523e-06, "loss": 0.0059, "step": 80460 }, { "epoch": 0.5293974461030374, "grad_norm": 0.04637920243209315, "learning_rate": 9.196224583908963e-06, "loss": 0.0041, "step": 80470 }, { "epoch": 0.5294632342782708, "grad_norm": 0.04933347147623399, "learning_rate": 9.195912381343805e-06, "loss": 0.0014, "step": 80480 }, { "epoch": 0.5295290224535042, "grad_norm": 0.04011697993018571, "learning_rate": 9.195600123459166e-06, "loss": 0.0019, "step": 80490 }, { "epoch": 0.5295948106287376, "grad_norm": 0.013782791939249896, "learning_rate": 9.195287810259164e-06, "loss": 0.002, "step": 80500 }, { "epoch": 0.529660598803971, "grad_norm": 0.009716921901527084, "learning_rate": 9.194975441747915e-06, "loss": 0.0019, "step": 80510 }, { "epoch": 0.5297263869792044, "grad_norm": 0.05659446657671855, "learning_rate": 9.194663017929537e-06, "loss": 0.0011, "step": 80520 }, { "epoch": 0.5297921751544378, "grad_norm": 0.13989745238358647, "learning_rate": 9.19435053880815e-06, "loss": 0.0055, "step": 80530 }, { "epoch": 0.5298579633296712, "grad_norm": 0.08252522027390814, "learning_rate": 9.194038004387874e-06, "loss": 0.0024, "step": 80540 }, { "epoch": 0.5299237515049046, "grad_norm": 0.06805178447218896, "learning_rate": 9.19372541467283e-06, "loss": 0.0011, "step": 80550 }, { "epoch": 0.5299895396801378, "grad_norm": 0.13715070671188986, "learning_rate": 9.193412769667137e-06, "loss": 0.0017, "step": 80560 }, { "epoch": 0.5300553278553712, "grad_norm": 0.12968223482878924, "learning_rate": 9.193100069374919e-06, "loss": 0.0015, "step": 80570 }, { "epoch": 0.5301211160306046, "grad_norm": 0.04033261827889311, "learning_rate": 9.192787313800298e-06, "loss": 0.0013, "step": 80580 }, { "epoch": 0.530186904205838, "grad_norm": 0.2700637190685335, "learning_rate": 9.192474502947398e-06, "loss": 0.0047, "step": 80590 }, { "epoch": 0.5302526923810714, "grad_norm": 0.0329275798624794, "learning_rate": 9.19216163682034e-06, "loss": 0.0038, "step": 80600 }, { "epoch": 0.5303184805563048, "grad_norm": 0.05993771956964769, "learning_rate": 9.191848715423253e-06, "loss": 0.0025, "step": 80610 }, { "epoch": 0.5303842687315382, "grad_norm": 0.039586694560042594, "learning_rate": 9.191535738760262e-06, "loss": 0.004, "step": 80620 }, { "epoch": 0.5304500569067716, "grad_norm": 0.18282990443523187, "learning_rate": 9.191222706835492e-06, "loss": 0.002, "step": 80630 }, { "epoch": 0.530515845082005, "grad_norm": 0.07201018199499927, "learning_rate": 9.19090961965307e-06, "loss": 0.0015, "step": 80640 }, { "epoch": 0.5305816332572384, "grad_norm": 0.07069357155770906, "learning_rate": 9.190596477217123e-06, "loss": 0.0017, "step": 80650 }, { "epoch": 0.5306474214324717, "grad_norm": 0.11850699439324601, "learning_rate": 9.190283279531782e-06, "loss": 0.0019, "step": 80660 }, { "epoch": 0.5307132096077051, "grad_norm": 0.08694414816690557, "learning_rate": 9.189970026601174e-06, "loss": 0.0016, "step": 80670 }, { "epoch": 0.5307789977829385, "grad_norm": 0.003763490195916282, "learning_rate": 9.189656718429432e-06, "loss": 0.0016, "step": 80680 }, { "epoch": 0.5308447859581719, "grad_norm": 0.06759334055351152, "learning_rate": 9.189343355020683e-06, "loss": 0.0026, "step": 80690 }, { "epoch": 0.5309105741334053, "grad_norm": 0.011003173411309047, "learning_rate": 9.189029936379059e-06, "loss": 0.0014, "step": 80700 }, { "epoch": 0.5309763623086386, "grad_norm": 0.014212863598545157, "learning_rate": 9.188716462508696e-06, "loss": 0.0016, "step": 80710 }, { "epoch": 0.531042150483872, "grad_norm": 0.19713777595403387, "learning_rate": 9.188402933413722e-06, "loss": 0.0023, "step": 80720 }, { "epoch": 0.5311079386591054, "grad_norm": 0.0047874322518211875, "learning_rate": 9.188089349098272e-06, "loss": 0.0026, "step": 80730 }, { "epoch": 0.5311737268343388, "grad_norm": 0.012653626794846062, "learning_rate": 9.187775709566481e-06, "loss": 0.0013, "step": 80740 }, { "epoch": 0.5312395150095722, "grad_norm": 0.44397135944594385, "learning_rate": 9.187462014822486e-06, "loss": 0.0035, "step": 80750 }, { "epoch": 0.5313053031848055, "grad_norm": 0.07360699343813822, "learning_rate": 9.18714826487042e-06, "loss": 0.0012, "step": 80760 }, { "epoch": 0.5313710913600389, "grad_norm": 0.09192943893079364, "learning_rate": 9.186834459714421e-06, "loss": 0.0008, "step": 80770 }, { "epoch": 0.5314368795352723, "grad_norm": 0.020923039937985586, "learning_rate": 9.186520599358625e-06, "loss": 0.0012, "step": 80780 }, { "epoch": 0.5315026677105057, "grad_norm": 0.04055677096073741, "learning_rate": 9.18620668380717e-06, "loss": 0.0014, "step": 80790 }, { "epoch": 0.5315684558857391, "grad_norm": 0.011420424839464972, "learning_rate": 9.185892713064198e-06, "loss": 0.0016, "step": 80800 }, { "epoch": 0.5316342440609725, "grad_norm": 0.030817339931716283, "learning_rate": 9.185578687133842e-06, "loss": 0.0014, "step": 80810 }, { "epoch": 0.5317000322362059, "grad_norm": 0.01227838123688928, "learning_rate": 9.185264606020247e-06, "loss": 0.0015, "step": 80820 }, { "epoch": 0.5317658204114393, "grad_norm": 0.018471634867977798, "learning_rate": 9.184950469727555e-06, "loss": 0.0015, "step": 80830 }, { "epoch": 0.5318316085866727, "grad_norm": 0.020413975793046036, "learning_rate": 9.184636278259903e-06, "loss": 0.001, "step": 80840 }, { "epoch": 0.5318973967619061, "grad_norm": 0.026913740326794996, "learning_rate": 9.184322031621437e-06, "loss": 0.002, "step": 80850 }, { "epoch": 0.5319631849371393, "grad_norm": 0.10435926981153076, "learning_rate": 9.1840077298163e-06, "loss": 0.0023, "step": 80860 }, { "epoch": 0.5320289731123727, "grad_norm": 0.050687910391412384, "learning_rate": 9.183693372848632e-06, "loss": 0.0009, "step": 80870 }, { "epoch": 0.5320947612876061, "grad_norm": 0.06156203530933544, "learning_rate": 9.183378960722581e-06, "loss": 0.0012, "step": 80880 }, { "epoch": 0.5321605494628395, "grad_norm": 0.060659852134797895, "learning_rate": 9.183064493442291e-06, "loss": 0.0027, "step": 80890 }, { "epoch": 0.5322263376380729, "grad_norm": 0.07701248031343039, "learning_rate": 9.18274997101191e-06, "loss": 0.0031, "step": 80900 }, { "epoch": 0.5322921258133063, "grad_norm": 0.04910888315428879, "learning_rate": 9.182435393435582e-06, "loss": 0.0021, "step": 80910 }, { "epoch": 0.5323579139885397, "grad_norm": 0.07228538030123473, "learning_rate": 9.182120760717455e-06, "loss": 0.0032, "step": 80920 }, { "epoch": 0.5324237021637731, "grad_norm": 0.152207058625437, "learning_rate": 9.181806072861678e-06, "loss": 0.0017, "step": 80930 }, { "epoch": 0.5324894903390065, "grad_norm": 0.020245615396162805, "learning_rate": 9.181491329872401e-06, "loss": 0.0015, "step": 80940 }, { "epoch": 0.5325552785142399, "grad_norm": 0.04505818277053944, "learning_rate": 9.181176531753771e-06, "loss": 0.0019, "step": 80950 }, { "epoch": 0.5326210666894733, "grad_norm": 0.04906389791249498, "learning_rate": 9.180861678509939e-06, "loss": 0.0017, "step": 80960 }, { "epoch": 0.5326868548647066, "grad_norm": 0.04578943627444174, "learning_rate": 9.180546770145057e-06, "loss": 0.0015, "step": 80970 }, { "epoch": 0.53275264303994, "grad_norm": 0.0662514745657511, "learning_rate": 9.180231806663276e-06, "loss": 0.0021, "step": 80980 }, { "epoch": 0.5328184312151734, "grad_norm": 0.15823467047118636, "learning_rate": 9.17991678806875e-06, "loss": 0.0016, "step": 80990 }, { "epoch": 0.5328842193904068, "grad_norm": 0.04362069410704725, "learning_rate": 9.179601714365631e-06, "loss": 0.0013, "step": 81000 }, { "epoch": 0.5329500075656401, "grad_norm": 0.03737373858916176, "learning_rate": 9.179286585558073e-06, "loss": 0.002, "step": 81010 }, { "epoch": 0.5330157957408735, "grad_norm": 0.08407636295219682, "learning_rate": 9.17897140165023e-06, "loss": 0.0031, "step": 81020 }, { "epoch": 0.5330815839161069, "grad_norm": 0.16239979050780157, "learning_rate": 9.178656162646259e-06, "loss": 0.0013, "step": 81030 }, { "epoch": 0.5331473720913403, "grad_norm": 0.1630093831233566, "learning_rate": 9.178340868550314e-06, "loss": 0.0029, "step": 81040 }, { "epoch": 0.5332131602665737, "grad_norm": 0.05880563758240571, "learning_rate": 9.178025519366555e-06, "loss": 0.0024, "step": 81050 }, { "epoch": 0.5332789484418071, "grad_norm": 0.08097581529367462, "learning_rate": 9.177710115099139e-06, "loss": 0.0018, "step": 81060 }, { "epoch": 0.5333447366170404, "grad_norm": 0.07401566132292042, "learning_rate": 9.177394655752221e-06, "loss": 0.0011, "step": 81070 }, { "epoch": 0.5334105247922738, "grad_norm": 0.0155261946142836, "learning_rate": 9.177079141329963e-06, "loss": 0.0017, "step": 81080 }, { "epoch": 0.5334763129675072, "grad_norm": 0.18082071032422717, "learning_rate": 9.176763571836524e-06, "loss": 0.002, "step": 81090 }, { "epoch": 0.5335421011427406, "grad_norm": 0.3234666365270632, "learning_rate": 9.176447947276065e-06, "loss": 0.0051, "step": 81100 }, { "epoch": 0.533607889317974, "grad_norm": 0.039375577571532684, "learning_rate": 9.176132267652746e-06, "loss": 0.0008, "step": 81110 }, { "epoch": 0.5336736774932074, "grad_norm": 0.0734557256804741, "learning_rate": 9.175816532970732e-06, "loss": 0.0018, "step": 81120 }, { "epoch": 0.5337394656684408, "grad_norm": 0.08792527829875686, "learning_rate": 9.17550074323418e-06, "loss": 0.0038, "step": 81130 }, { "epoch": 0.5338052538436742, "grad_norm": 0.048775251550302566, "learning_rate": 9.17518489844726e-06, "loss": 0.0014, "step": 81140 }, { "epoch": 0.5338710420189076, "grad_norm": 0.012013633254484806, "learning_rate": 9.174868998614132e-06, "loss": 0.0017, "step": 81150 }, { "epoch": 0.533936830194141, "grad_norm": 0.030535837312247165, "learning_rate": 9.17455304373896e-06, "loss": 0.001, "step": 81160 }, { "epoch": 0.5340026183693742, "grad_norm": 0.0335025237558226, "learning_rate": 9.174237033825914e-06, "loss": 0.002, "step": 81170 }, { "epoch": 0.5340684065446076, "grad_norm": 0.1803028932335642, "learning_rate": 9.173920968879157e-06, "loss": 0.0017, "step": 81180 }, { "epoch": 0.534134194719841, "grad_norm": 0.06445706283606434, "learning_rate": 9.173604848902858e-06, "loss": 0.0013, "step": 81190 }, { "epoch": 0.5341999828950744, "grad_norm": 0.06003547293330186, "learning_rate": 9.173288673901181e-06, "loss": 0.0012, "step": 81200 }, { "epoch": 0.5342657710703078, "grad_norm": 0.09274952705670163, "learning_rate": 9.172972443878299e-06, "loss": 0.0025, "step": 81210 }, { "epoch": 0.5343315592455412, "grad_norm": 0.06633527321039476, "learning_rate": 9.17265615883838e-06, "loss": 0.0008, "step": 81220 }, { "epoch": 0.5343973474207746, "grad_norm": 0.008278482248291067, "learning_rate": 9.172339818785591e-06, "loss": 0.0005, "step": 81230 }, { "epoch": 0.534463135596008, "grad_norm": 0.08009610575893812, "learning_rate": 9.172023423724108e-06, "loss": 0.0025, "step": 81240 }, { "epoch": 0.5345289237712414, "grad_norm": 0.10183282546590035, "learning_rate": 9.171706973658097e-06, "loss": 0.0017, "step": 81250 }, { "epoch": 0.5345947119464748, "grad_norm": 0.04082000567812398, "learning_rate": 9.171390468591733e-06, "loss": 0.0016, "step": 81260 }, { "epoch": 0.5346605001217081, "grad_norm": 0.21244205767262342, "learning_rate": 9.171073908529188e-06, "loss": 0.0042, "step": 81270 }, { "epoch": 0.5347262882969415, "grad_norm": 0.03927821988489202, "learning_rate": 9.170757293474637e-06, "loss": 0.0014, "step": 81280 }, { "epoch": 0.5347920764721749, "grad_norm": 0.08274194771191358, "learning_rate": 9.170440623432252e-06, "loss": 0.0013, "step": 81290 }, { "epoch": 0.5348578646474083, "grad_norm": 0.018045352136129716, "learning_rate": 9.170123898406209e-06, "loss": 0.0013, "step": 81300 }, { "epoch": 0.5349236528226416, "grad_norm": 0.019741577290580358, "learning_rate": 9.169807118400683e-06, "loss": 0.001, "step": 81310 }, { "epoch": 0.534989440997875, "grad_norm": 0.034875873359280087, "learning_rate": 9.169490283419855e-06, "loss": 0.0011, "step": 81320 }, { "epoch": 0.5350552291731084, "grad_norm": 0.09884567761119878, "learning_rate": 9.169173393467897e-06, "loss": 0.0019, "step": 81330 }, { "epoch": 0.5351210173483418, "grad_norm": 0.0097438308914364, "learning_rate": 9.168856448548988e-06, "loss": 0.0044, "step": 81340 }, { "epoch": 0.5351868055235752, "grad_norm": 0.15639266540857233, "learning_rate": 9.168539448667306e-06, "loss": 0.0019, "step": 81350 }, { "epoch": 0.5352525936988086, "grad_norm": 0.06607275861536004, "learning_rate": 9.168222393827034e-06, "loss": 0.001, "step": 81360 }, { "epoch": 0.5353183818740419, "grad_norm": 0.03349285666889815, "learning_rate": 9.167905284032347e-06, "loss": 0.0031, "step": 81370 }, { "epoch": 0.5353841700492753, "grad_norm": 0.03044607995659288, "learning_rate": 9.167588119287432e-06, "loss": 0.0023, "step": 81380 }, { "epoch": 0.5354499582245087, "grad_norm": 0.07180604283749319, "learning_rate": 9.167270899596465e-06, "loss": 0.0016, "step": 81390 }, { "epoch": 0.5355157463997421, "grad_norm": 0.0407454117734474, "learning_rate": 9.16695362496363e-06, "loss": 0.0028, "step": 81400 }, { "epoch": 0.5355815345749755, "grad_norm": 0.02918649145505074, "learning_rate": 9.166636295393111e-06, "loss": 0.0021, "step": 81410 }, { "epoch": 0.5356473227502089, "grad_norm": 0.1715178612686379, "learning_rate": 9.166318910889089e-06, "loss": 0.0028, "step": 81420 }, { "epoch": 0.5357131109254423, "grad_norm": 0.11716536275593464, "learning_rate": 9.166001471455753e-06, "loss": 0.0024, "step": 81430 }, { "epoch": 0.5357788991006757, "grad_norm": 0.02674673912884726, "learning_rate": 9.165683977097285e-06, "loss": 0.0027, "step": 81440 }, { "epoch": 0.5358446872759091, "grad_norm": 0.041257276605644225, "learning_rate": 9.16536642781787e-06, "loss": 0.0012, "step": 81450 }, { "epoch": 0.5359104754511425, "grad_norm": 0.11248935452435566, "learning_rate": 9.165048823621698e-06, "loss": 0.0012, "step": 81460 }, { "epoch": 0.5359762636263758, "grad_norm": 0.06440495323314671, "learning_rate": 9.164731164512955e-06, "loss": 0.0012, "step": 81470 }, { "epoch": 0.5360420518016091, "grad_norm": 0.014469033412530495, "learning_rate": 9.164413450495825e-06, "loss": 0.0016, "step": 81480 }, { "epoch": 0.5361078399768425, "grad_norm": 0.09582213911941195, "learning_rate": 9.164095681574503e-06, "loss": 0.0013, "step": 81490 }, { "epoch": 0.5361736281520759, "grad_norm": 0.03389522051864375, "learning_rate": 9.163777857753175e-06, "loss": 0.0015, "step": 81500 }, { "epoch": 0.5362394163273093, "grad_norm": 0.03887271920486785, "learning_rate": 9.163459979036033e-06, "loss": 0.0028, "step": 81510 }, { "epoch": 0.5363052045025427, "grad_norm": 0.041305282003931056, "learning_rate": 9.163142045427265e-06, "loss": 0.0009, "step": 81520 }, { "epoch": 0.5363709926777761, "grad_norm": 0.2258847420915349, "learning_rate": 9.162824056931067e-06, "loss": 0.0014, "step": 81530 }, { "epoch": 0.5364367808530095, "grad_norm": 0.0534639648972524, "learning_rate": 9.162506013551628e-06, "loss": 0.0025, "step": 81540 }, { "epoch": 0.5365025690282429, "grad_norm": 0.009162279707630006, "learning_rate": 9.162187915293142e-06, "loss": 0.0019, "step": 81550 }, { "epoch": 0.5365683572034763, "grad_norm": 0.0002046871222635474, "learning_rate": 9.161869762159803e-06, "loss": 0.0012, "step": 81560 }, { "epoch": 0.5366341453787097, "grad_norm": 0.07404441421281942, "learning_rate": 9.161551554155805e-06, "loss": 0.0014, "step": 81570 }, { "epoch": 0.536699933553943, "grad_norm": 0.0568261840752268, "learning_rate": 9.161233291285345e-06, "loss": 0.0011, "step": 81580 }, { "epoch": 0.5367657217291764, "grad_norm": 0.08751575384250694, "learning_rate": 9.160914973552618e-06, "loss": 0.0016, "step": 81590 }, { "epoch": 0.5368315099044098, "grad_norm": 0.1609361745474329, "learning_rate": 9.16059660096182e-06, "loss": 0.0014, "step": 81600 }, { "epoch": 0.5368972980796431, "grad_norm": 0.07731854608942068, "learning_rate": 9.16027817351715e-06, "loss": 0.0014, "step": 81610 }, { "epoch": 0.5369630862548765, "grad_norm": 0.17216762312005662, "learning_rate": 9.159959691222804e-06, "loss": 0.003, "step": 81620 }, { "epoch": 0.5370288744301099, "grad_norm": 0.00403515994401583, "learning_rate": 9.159641154082982e-06, "loss": 0.0013, "step": 81630 }, { "epoch": 0.5370946626053433, "grad_norm": 0.03537815269983068, "learning_rate": 9.159322562101886e-06, "loss": 0.0009, "step": 81640 }, { "epoch": 0.5371604507805767, "grad_norm": 0.1294514803448041, "learning_rate": 9.159003915283713e-06, "loss": 0.0013, "step": 81650 }, { "epoch": 0.5372262389558101, "grad_norm": 0.11503101089464479, "learning_rate": 9.158685213632665e-06, "loss": 0.0025, "step": 81660 }, { "epoch": 0.5372920271310435, "grad_norm": 0.14444368178381733, "learning_rate": 9.158366457152945e-06, "loss": 0.0019, "step": 81670 }, { "epoch": 0.5373578153062768, "grad_norm": 0.1668974548107301, "learning_rate": 9.158047645848754e-06, "loss": 0.0019, "step": 81680 }, { "epoch": 0.5374236034815102, "grad_norm": 0.08830048587489371, "learning_rate": 9.157728779724296e-06, "loss": 0.0026, "step": 81690 }, { "epoch": 0.5374893916567436, "grad_norm": 0.024231752066534273, "learning_rate": 9.157409858783774e-06, "loss": 0.0014, "step": 81700 }, { "epoch": 0.537555179831977, "grad_norm": 0.031027043821703848, "learning_rate": 9.157090883031397e-06, "loss": 0.0013, "step": 81710 }, { "epoch": 0.5376209680072104, "grad_norm": 0.04251306370443697, "learning_rate": 9.156771852471364e-06, "loss": 0.0023, "step": 81720 }, { "epoch": 0.5376867561824438, "grad_norm": 0.04660043167979942, "learning_rate": 9.156452767107884e-06, "loss": 0.0022, "step": 81730 }, { "epoch": 0.5377525443576772, "grad_norm": 0.019231526375786137, "learning_rate": 9.156133626945165e-06, "loss": 0.0015, "step": 81740 }, { "epoch": 0.5378183325329106, "grad_norm": 0.04106986566836317, "learning_rate": 9.155814431987413e-06, "loss": 0.0027, "step": 81750 }, { "epoch": 0.537884120708144, "grad_norm": 0.016404474934379833, "learning_rate": 9.155495182238839e-06, "loss": 0.0052, "step": 81760 }, { "epoch": 0.5379499088833773, "grad_norm": 0.003668226110187592, "learning_rate": 9.155175877703648e-06, "loss": 0.0018, "step": 81770 }, { "epoch": 0.5380156970586106, "grad_norm": 0.07616858087477338, "learning_rate": 9.15485651838605e-06, "loss": 0.0014, "step": 81780 }, { "epoch": 0.538081485233844, "grad_norm": 0.0871624894295688, "learning_rate": 9.15453710429026e-06, "loss": 0.0023, "step": 81790 }, { "epoch": 0.5381472734090774, "grad_norm": 0.1093087739863513, "learning_rate": 9.154217635420486e-06, "loss": 0.0024, "step": 81800 }, { "epoch": 0.5382130615843108, "grad_norm": 0.06683552271046854, "learning_rate": 9.153898111780939e-06, "loss": 0.0011, "step": 81810 }, { "epoch": 0.5382788497595442, "grad_norm": 0.0908256362742608, "learning_rate": 9.153578533375833e-06, "loss": 0.0018, "step": 81820 }, { "epoch": 0.5383446379347776, "grad_norm": 0.07469961422312643, "learning_rate": 9.153258900209384e-06, "loss": 0.0029, "step": 81830 }, { "epoch": 0.538410426110011, "grad_norm": 0.027751089076467766, "learning_rate": 9.152939212285801e-06, "loss": 0.0013, "step": 81840 }, { "epoch": 0.5384762142852444, "grad_norm": 0.036378450876572724, "learning_rate": 9.152619469609301e-06, "loss": 0.0011, "step": 81850 }, { "epoch": 0.5385420024604778, "grad_norm": 0.04343328686537473, "learning_rate": 9.152299672184101e-06, "loss": 0.0013, "step": 81860 }, { "epoch": 0.5386077906357112, "grad_norm": 0.09278369727487316, "learning_rate": 9.151979820014415e-06, "loss": 0.0026, "step": 81870 }, { "epoch": 0.5386735788109446, "grad_norm": 0.020324479620252602, "learning_rate": 9.15165991310446e-06, "loss": 0.0013, "step": 81880 }, { "epoch": 0.5387393669861779, "grad_norm": 0.24175270142056132, "learning_rate": 9.151339951458457e-06, "loss": 0.0025, "step": 81890 }, { "epoch": 0.5388051551614113, "grad_norm": 0.0657419011485787, "learning_rate": 9.15101993508062e-06, "loss": 0.0011, "step": 81900 }, { "epoch": 0.5388709433366446, "grad_norm": 0.029826288164445134, "learning_rate": 9.150699863975172e-06, "loss": 0.0011, "step": 81910 }, { "epoch": 0.538936731511878, "grad_norm": 0.09047524372021414, "learning_rate": 9.15037973814633e-06, "loss": 0.0014, "step": 81920 }, { "epoch": 0.5390025196871114, "grad_norm": 0.017900502957127504, "learning_rate": 9.150059557598317e-06, "loss": 0.0018, "step": 81930 }, { "epoch": 0.5390683078623448, "grad_norm": 0.18119090847514677, "learning_rate": 9.149739322335352e-06, "loss": 0.0019, "step": 81940 }, { "epoch": 0.5391340960375782, "grad_norm": 0.04336598796111567, "learning_rate": 9.149419032361657e-06, "loss": 0.004, "step": 81950 }, { "epoch": 0.5391998842128116, "grad_norm": 0.05673580029053118, "learning_rate": 9.149098687681457e-06, "loss": 0.0022, "step": 81960 }, { "epoch": 0.539265672388045, "grad_norm": 0.10601160163289292, "learning_rate": 9.148778288298974e-06, "loss": 0.0019, "step": 81970 }, { "epoch": 0.5393314605632784, "grad_norm": 0.13447131264536977, "learning_rate": 9.148457834218431e-06, "loss": 0.0018, "step": 81980 }, { "epoch": 0.5393972487385117, "grad_norm": 0.045196266052190126, "learning_rate": 9.148137325444056e-06, "loss": 0.0012, "step": 81990 }, { "epoch": 0.5394630369137451, "grad_norm": 0.10421960401993964, "learning_rate": 9.147816761980073e-06, "loss": 0.0023, "step": 82000 }, { "epoch": 0.5395288250889785, "grad_norm": 0.11123577617920934, "learning_rate": 9.147496143830708e-06, "loss": 0.0017, "step": 82010 }, { "epoch": 0.5395946132642119, "grad_norm": 0.1160139591841061, "learning_rate": 9.147175471000186e-06, "loss": 0.0031, "step": 82020 }, { "epoch": 0.5396604014394453, "grad_norm": 0.10401602472494069, "learning_rate": 9.14685474349274e-06, "loss": 0.0019, "step": 82030 }, { "epoch": 0.5397261896146787, "grad_norm": 0.25446624442471816, "learning_rate": 9.146533961312596e-06, "loss": 0.0033, "step": 82040 }, { "epoch": 0.5397919777899121, "grad_norm": 0.07645454536403762, "learning_rate": 9.146213124463981e-06, "loss": 0.002, "step": 82050 }, { "epoch": 0.5398577659651455, "grad_norm": 0.022845800698908174, "learning_rate": 9.145892232951128e-06, "loss": 0.0021, "step": 82060 }, { "epoch": 0.5399235541403788, "grad_norm": 0.003010954459852543, "learning_rate": 9.145571286778266e-06, "loss": 0.0031, "step": 82070 }, { "epoch": 0.5399893423156122, "grad_norm": 0.07677210348337567, "learning_rate": 9.145250285949628e-06, "loss": 0.0018, "step": 82080 }, { "epoch": 0.5400551304908455, "grad_norm": 0.024558057685762573, "learning_rate": 9.144929230469444e-06, "loss": 0.0021, "step": 82090 }, { "epoch": 0.5401209186660789, "grad_norm": 0.013331445342038877, "learning_rate": 9.144608120341948e-06, "loss": 0.0064, "step": 82100 }, { "epoch": 0.5401867068413123, "grad_norm": 0.1291066026538861, "learning_rate": 9.144286955571373e-06, "loss": 0.0023, "step": 82110 }, { "epoch": 0.5402524950165457, "grad_norm": 0.0433661446622194, "learning_rate": 9.143965736161955e-06, "loss": 0.0023, "step": 82120 }, { "epoch": 0.5403182831917791, "grad_norm": 0.1223284057144456, "learning_rate": 9.143644462117927e-06, "loss": 0.0009, "step": 82130 }, { "epoch": 0.5403840713670125, "grad_norm": 0.04127995442266815, "learning_rate": 9.143323133443524e-06, "loss": 0.0037, "step": 82140 }, { "epoch": 0.5404498595422459, "grad_norm": 0.10851552339401442, "learning_rate": 9.143001750142986e-06, "loss": 0.0026, "step": 82150 }, { "epoch": 0.5405156477174793, "grad_norm": 0.002983801840716046, "learning_rate": 9.142680312220547e-06, "loss": 0.0016, "step": 82160 }, { "epoch": 0.5405814358927127, "grad_norm": 0.12694255810683228, "learning_rate": 9.142358819680445e-06, "loss": 0.0015, "step": 82170 }, { "epoch": 0.5406472240679461, "grad_norm": 0.04504683144654725, "learning_rate": 9.142037272526923e-06, "loss": 0.0034, "step": 82180 }, { "epoch": 0.5407130122431794, "grad_norm": 0.05812137684583336, "learning_rate": 9.141715670764214e-06, "loss": 0.0021, "step": 82190 }, { "epoch": 0.5407788004184128, "grad_norm": 0.12695732527335649, "learning_rate": 9.141394014396562e-06, "loss": 0.0015, "step": 82200 }, { "epoch": 0.5408445885936461, "grad_norm": 0.19053237488048214, "learning_rate": 9.141072303428206e-06, "loss": 0.0026, "step": 82210 }, { "epoch": 0.5409103767688795, "grad_norm": 0.05023289993273504, "learning_rate": 9.140750537863388e-06, "loss": 0.0028, "step": 82220 }, { "epoch": 0.5409761649441129, "grad_norm": 0.19625587512095405, "learning_rate": 9.14042871770635e-06, "loss": 0.0019, "step": 82230 }, { "epoch": 0.5410419531193463, "grad_norm": 0.1460272474262308, "learning_rate": 9.140106842961337e-06, "loss": 0.0023, "step": 82240 }, { "epoch": 0.5411077412945797, "grad_norm": 0.004150760275258226, "learning_rate": 9.13978491363259e-06, "loss": 0.0018, "step": 82250 }, { "epoch": 0.5411735294698131, "grad_norm": 0.35837240202936393, "learning_rate": 9.139462929724355e-06, "loss": 0.0011, "step": 82260 }, { "epoch": 0.5412393176450465, "grad_norm": 0.07856211004424378, "learning_rate": 9.139140891240874e-06, "loss": 0.0023, "step": 82270 }, { "epoch": 0.5413051058202799, "grad_norm": 0.020816396802461234, "learning_rate": 9.138818798186398e-06, "loss": 0.0017, "step": 82280 }, { "epoch": 0.5413708939955132, "grad_norm": 0.10816846219820875, "learning_rate": 9.13849665056517e-06, "loss": 0.0033, "step": 82290 }, { "epoch": 0.5414366821707466, "grad_norm": 0.06087990421782631, "learning_rate": 9.138174448381436e-06, "loss": 0.0018, "step": 82300 }, { "epoch": 0.54150247034598, "grad_norm": 0.09094564386827023, "learning_rate": 9.137852191639447e-06, "loss": 0.0041, "step": 82310 }, { "epoch": 0.5415682585212134, "grad_norm": 0.012603737379820621, "learning_rate": 9.13752988034345e-06, "loss": 0.0012, "step": 82320 }, { "epoch": 0.5416340466964468, "grad_norm": 0.039483664122022186, "learning_rate": 9.137207514497697e-06, "loss": 0.0014, "step": 82330 }, { "epoch": 0.5416998348716802, "grad_norm": 0.09961437767903378, "learning_rate": 9.136885094106434e-06, "loss": 0.0013, "step": 82340 }, { "epoch": 0.5417656230469136, "grad_norm": 0.06113435520984428, "learning_rate": 9.136562619173914e-06, "loss": 0.0013, "step": 82350 }, { "epoch": 0.541831411222147, "grad_norm": 0.08216977080796553, "learning_rate": 9.136240089704389e-06, "loss": 0.0022, "step": 82360 }, { "epoch": 0.5418971993973803, "grad_norm": 0.03307003377646642, "learning_rate": 9.13591750570211e-06, "loss": 0.0014, "step": 82370 }, { "epoch": 0.5419629875726137, "grad_norm": 0.001303006902008055, "learning_rate": 9.135594867171333e-06, "loss": 0.0018, "step": 82380 }, { "epoch": 0.5420287757478471, "grad_norm": 0.118895062892403, "learning_rate": 9.135272174116308e-06, "loss": 0.0017, "step": 82390 }, { "epoch": 0.5420945639230804, "grad_norm": 0.040890880248909425, "learning_rate": 9.13494942654129e-06, "loss": 0.0015, "step": 82400 }, { "epoch": 0.5421603520983138, "grad_norm": 0.11046932189109471, "learning_rate": 9.134626624450536e-06, "loss": 0.0018, "step": 82410 }, { "epoch": 0.5422261402735472, "grad_norm": 0.046721327033288844, "learning_rate": 9.1343037678483e-06, "loss": 0.0025, "step": 82420 }, { "epoch": 0.5422919284487806, "grad_norm": 0.2236898930444662, "learning_rate": 9.13398085673884e-06, "loss": 0.0022, "step": 82430 }, { "epoch": 0.542357716624014, "grad_norm": 0.13745439370091825, "learning_rate": 9.133657891126412e-06, "loss": 0.0023, "step": 82440 }, { "epoch": 0.5424235047992474, "grad_norm": 0.08034666244184166, "learning_rate": 9.133334871015276e-06, "loss": 0.0022, "step": 82450 }, { "epoch": 0.5424892929744808, "grad_norm": 0.03505615702883543, "learning_rate": 9.133011796409688e-06, "loss": 0.0016, "step": 82460 }, { "epoch": 0.5425550811497142, "grad_norm": 0.032721022669474055, "learning_rate": 9.13268866731391e-06, "loss": 0.0011, "step": 82470 }, { "epoch": 0.5426208693249476, "grad_norm": 0.058101446976896835, "learning_rate": 9.132365483732202e-06, "loss": 0.0018, "step": 82480 }, { "epoch": 0.542686657500181, "grad_norm": 0.06328774545247834, "learning_rate": 9.132042245668821e-06, "loss": 0.002, "step": 82490 }, { "epoch": 0.5427524456754143, "grad_norm": 0.05675936438490669, "learning_rate": 9.131718953128036e-06, "loss": 0.0028, "step": 82500 }, { "epoch": 0.5428182338506476, "grad_norm": 0.018278033864535432, "learning_rate": 9.131395606114102e-06, "loss": 0.0011, "step": 82510 }, { "epoch": 0.542884022025881, "grad_norm": 0.06850089932226243, "learning_rate": 9.131072204631286e-06, "loss": 0.0012, "step": 82520 }, { "epoch": 0.5429498102011144, "grad_norm": 0.06484483583407907, "learning_rate": 9.13074874868385e-06, "loss": 0.0018, "step": 82530 }, { "epoch": 0.5430155983763478, "grad_norm": 0.11476472119256192, "learning_rate": 9.13042523827606e-06, "loss": 0.0026, "step": 82540 }, { "epoch": 0.5430813865515812, "grad_norm": 0.09758225612856847, "learning_rate": 9.13010167341218e-06, "loss": 0.0029, "step": 82550 }, { "epoch": 0.5431471747268146, "grad_norm": 0.0977970710835086, "learning_rate": 9.129778054096476e-06, "loss": 0.002, "step": 82560 }, { "epoch": 0.543212962902048, "grad_norm": 0.09063329273050032, "learning_rate": 9.129454380333217e-06, "loss": 0.0014, "step": 82570 }, { "epoch": 0.5432787510772814, "grad_norm": 0.0237819242314103, "learning_rate": 9.129130652126667e-06, "loss": 0.0017, "step": 82580 }, { "epoch": 0.5433445392525148, "grad_norm": 0.18082394558520484, "learning_rate": 9.128806869481095e-06, "loss": 0.0044, "step": 82590 }, { "epoch": 0.5434103274277481, "grad_norm": 0.010740058266605559, "learning_rate": 9.128483032400773e-06, "loss": 0.0024, "step": 82600 }, { "epoch": 0.5434761156029815, "grad_norm": 0.0413179940219949, "learning_rate": 9.128159140889965e-06, "loss": 0.0017, "step": 82610 }, { "epoch": 0.5435419037782149, "grad_norm": 0.005670702410669936, "learning_rate": 9.127835194952946e-06, "loss": 0.0031, "step": 82620 }, { "epoch": 0.5436076919534483, "grad_norm": 0.030775179901292273, "learning_rate": 9.127511194593984e-06, "loss": 0.0017, "step": 82630 }, { "epoch": 0.5436734801286817, "grad_norm": 0.030322684018406966, "learning_rate": 9.127187139817351e-06, "loss": 0.0016, "step": 82640 }, { "epoch": 0.5437392683039151, "grad_norm": 0.1612323092886953, "learning_rate": 9.12686303062732e-06, "loss": 0.0033, "step": 82650 }, { "epoch": 0.5438050564791485, "grad_norm": 0.13448868166632624, "learning_rate": 9.126538867028165e-06, "loss": 0.0008, "step": 82660 }, { "epoch": 0.5438708446543818, "grad_norm": 0.03855819841052449, "learning_rate": 9.126214649024158e-06, "loss": 0.0014, "step": 82670 }, { "epoch": 0.5439366328296152, "grad_norm": 0.07486762770980784, "learning_rate": 9.125890376619576e-06, "loss": 0.0019, "step": 82680 }, { "epoch": 0.5440024210048486, "grad_norm": 0.02999683809091433, "learning_rate": 9.12556604981869e-06, "loss": 0.0015, "step": 82690 }, { "epoch": 0.5440682091800819, "grad_norm": 0.06646854072811331, "learning_rate": 9.125241668625781e-06, "loss": 0.0012, "step": 82700 }, { "epoch": 0.5441339973553153, "grad_norm": 0.010868915649565434, "learning_rate": 9.124917233045123e-06, "loss": 0.0017, "step": 82710 }, { "epoch": 0.5441997855305487, "grad_norm": 0.04156105121632294, "learning_rate": 9.124592743080994e-06, "loss": 0.0009, "step": 82720 }, { "epoch": 0.5442655737057821, "grad_norm": 0.050023700029324145, "learning_rate": 9.124268198737671e-06, "loss": 0.0013, "step": 82730 }, { "epoch": 0.5443313618810155, "grad_norm": 0.08403114260116744, "learning_rate": 9.123943600019436e-06, "loss": 0.0017, "step": 82740 }, { "epoch": 0.5443971500562489, "grad_norm": 0.03503088010188893, "learning_rate": 9.123618946930564e-06, "loss": 0.0007, "step": 82750 }, { "epoch": 0.5444629382314823, "grad_norm": 0.00915659528623383, "learning_rate": 9.123294239475338e-06, "loss": 0.0016, "step": 82760 }, { "epoch": 0.5445287264067157, "grad_norm": 0.01418192029399432, "learning_rate": 9.12296947765804e-06, "loss": 0.0012, "step": 82770 }, { "epoch": 0.5445945145819491, "grad_norm": 0.18460983639876816, "learning_rate": 9.122644661482948e-06, "loss": 0.0048, "step": 82780 }, { "epoch": 0.5446603027571825, "grad_norm": 0.03285309006474961, "learning_rate": 9.122319790954348e-06, "loss": 0.002, "step": 82790 }, { "epoch": 0.5447260909324159, "grad_norm": 0.016453911657690053, "learning_rate": 9.121994866076522e-06, "loss": 0.0019, "step": 82800 }, { "epoch": 0.5447918791076491, "grad_norm": 0.03694715886716159, "learning_rate": 9.121669886853753e-06, "loss": 0.001, "step": 82810 }, { "epoch": 0.5448576672828825, "grad_norm": 0.08472303888432664, "learning_rate": 9.121344853290328e-06, "loss": 0.0016, "step": 82820 }, { "epoch": 0.5449234554581159, "grad_norm": 0.07604152997363543, "learning_rate": 9.121019765390528e-06, "loss": 0.0012, "step": 82830 }, { "epoch": 0.5449892436333493, "grad_norm": 0.0599595639392156, "learning_rate": 9.120694623158644e-06, "loss": 0.0024, "step": 82840 }, { "epoch": 0.5450550318085827, "grad_norm": 0.07915071406634669, "learning_rate": 9.120369426598959e-06, "loss": 0.002, "step": 82850 }, { "epoch": 0.5451208199838161, "grad_norm": 0.04994524586870053, "learning_rate": 9.12004417571576e-06, "loss": 0.0012, "step": 82860 }, { "epoch": 0.5451866081590495, "grad_norm": 0.06346553491311699, "learning_rate": 9.119718870513339e-06, "loss": 0.002, "step": 82870 }, { "epoch": 0.5452523963342829, "grad_norm": 0.0034715729930742006, "learning_rate": 9.119393510995982e-06, "loss": 0.0018, "step": 82880 }, { "epoch": 0.5453181845095163, "grad_norm": 0.027642578597399048, "learning_rate": 9.119068097167979e-06, "loss": 0.0036, "step": 82890 }, { "epoch": 0.5453839726847497, "grad_norm": 0.09672894909465399, "learning_rate": 9.11874262903362e-06, "loss": 0.0009, "step": 82900 }, { "epoch": 0.545449760859983, "grad_norm": 0.03225211965603114, "learning_rate": 9.1184171065972e-06, "loss": 0.0014, "step": 82910 }, { "epoch": 0.5455155490352164, "grad_norm": 0.0054723413161296315, "learning_rate": 9.118091529863004e-06, "loss": 0.0019, "step": 82920 }, { "epoch": 0.5455813372104498, "grad_norm": 0.0707583875800677, "learning_rate": 9.11776589883533e-06, "loss": 0.0019, "step": 82930 }, { "epoch": 0.5456471253856832, "grad_norm": 0.06308091081799728, "learning_rate": 9.117440213518467e-06, "loss": 0.0016, "step": 82940 }, { "epoch": 0.5457129135609166, "grad_norm": 0.002248210924054801, "learning_rate": 9.117114473916712e-06, "loss": 0.0011, "step": 82950 }, { "epoch": 0.54577870173615, "grad_norm": 0.5049094357432851, "learning_rate": 9.116788680034357e-06, "loss": 0.0009, "step": 82960 }, { "epoch": 0.5458444899113833, "grad_norm": 0.08575117803101447, "learning_rate": 9.1164628318757e-06, "loss": 0.0015, "step": 82970 }, { "epoch": 0.5459102780866167, "grad_norm": 0.02904346226165919, "learning_rate": 9.116136929445036e-06, "loss": 0.0017, "step": 82980 }, { "epoch": 0.5459760662618501, "grad_norm": 0.04200950552812726, "learning_rate": 9.11581097274666e-06, "loss": 0.0015, "step": 82990 }, { "epoch": 0.5460418544370835, "grad_norm": 0.04607078021401529, "learning_rate": 9.115484961784874e-06, "loss": 0.0015, "step": 83000 }, { "epoch": 0.5461076426123168, "grad_norm": 0.015220317526392066, "learning_rate": 9.11515889656397e-06, "loss": 0.0021, "step": 83010 }, { "epoch": 0.5461734307875502, "grad_norm": 0.1445578954213589, "learning_rate": 9.114832777088252e-06, "loss": 0.0024, "step": 83020 }, { "epoch": 0.5462392189627836, "grad_norm": 0.018116682418438036, "learning_rate": 9.114506603362016e-06, "loss": 0.0009, "step": 83030 }, { "epoch": 0.546305007138017, "grad_norm": 0.08279326240294822, "learning_rate": 9.114180375389564e-06, "loss": 0.0015, "step": 83040 }, { "epoch": 0.5463707953132504, "grad_norm": 0.12594866279898653, "learning_rate": 9.113854093175198e-06, "loss": 0.0018, "step": 83050 }, { "epoch": 0.5464365834884838, "grad_norm": 0.07194919673624534, "learning_rate": 9.11352775672322e-06, "loss": 0.0053, "step": 83060 }, { "epoch": 0.5465023716637172, "grad_norm": 0.02664067084572378, "learning_rate": 9.11320136603793e-06, "loss": 0.0016, "step": 83070 }, { "epoch": 0.5465681598389506, "grad_norm": 0.04125847841367935, "learning_rate": 9.112874921123632e-06, "loss": 0.0025, "step": 83080 }, { "epoch": 0.546633948014184, "grad_norm": 0.0609533692768641, "learning_rate": 9.11254842198463e-06, "loss": 0.0015, "step": 83090 }, { "epoch": 0.5466997361894174, "grad_norm": 0.09601413045600285, "learning_rate": 9.11222186862523e-06, "loss": 0.0037, "step": 83100 }, { "epoch": 0.5467655243646506, "grad_norm": 0.07301543419915546, "learning_rate": 9.111895261049735e-06, "loss": 0.0017, "step": 83110 }, { "epoch": 0.546831312539884, "grad_norm": 0.13911293367590213, "learning_rate": 9.111568599262453e-06, "loss": 0.0013, "step": 83120 }, { "epoch": 0.5468971007151174, "grad_norm": 0.09849218650980043, "learning_rate": 9.11124188326769e-06, "loss": 0.0021, "step": 83130 }, { "epoch": 0.5469628888903508, "grad_norm": 0.06035253332350558, "learning_rate": 9.110915113069753e-06, "loss": 0.0009, "step": 83140 }, { "epoch": 0.5470286770655842, "grad_norm": 0.13223471806105766, "learning_rate": 9.11058828867295e-06, "loss": 0.0031, "step": 83150 }, { "epoch": 0.5470944652408176, "grad_norm": 0.014375407981397921, "learning_rate": 9.11026141008159e-06, "loss": 0.0048, "step": 83160 }, { "epoch": 0.547160253416051, "grad_norm": 0.05099121002756416, "learning_rate": 9.109934477299986e-06, "loss": 0.0016, "step": 83170 }, { "epoch": 0.5472260415912844, "grad_norm": 0.03536953107950694, "learning_rate": 9.109607490332444e-06, "loss": 0.0019, "step": 83180 }, { "epoch": 0.5472918297665178, "grad_norm": 0.02525242203052039, "learning_rate": 9.109280449183277e-06, "loss": 0.0029, "step": 83190 }, { "epoch": 0.5473576179417512, "grad_norm": 0.06491104862694268, "learning_rate": 9.108953353856796e-06, "loss": 0.0012, "step": 83200 }, { "epoch": 0.5474234061169845, "grad_norm": 0.04982532865215151, "learning_rate": 9.108626204357314e-06, "loss": 0.0014, "step": 83210 }, { "epoch": 0.5474891942922179, "grad_norm": 0.02059880407494009, "learning_rate": 9.108299000689142e-06, "loss": 0.0009, "step": 83220 }, { "epoch": 0.5475549824674513, "grad_norm": 0.04623777578497544, "learning_rate": 9.107971742856598e-06, "loss": 0.0025, "step": 83230 }, { "epoch": 0.5476207706426847, "grad_norm": 0.08222765388942742, "learning_rate": 9.107644430863994e-06, "loss": 0.0018, "step": 83240 }, { "epoch": 0.5476865588179181, "grad_norm": 0.059730664222632804, "learning_rate": 9.107317064715646e-06, "loss": 0.0018, "step": 83250 }, { "epoch": 0.5477523469931515, "grad_norm": 0.06453113669020952, "learning_rate": 9.106989644415869e-06, "loss": 0.0017, "step": 83260 }, { "epoch": 0.5478181351683848, "grad_norm": 0.030011435513705175, "learning_rate": 9.106662169968982e-06, "loss": 0.0016, "step": 83270 }, { "epoch": 0.5478839233436182, "grad_norm": 0.007269292103568847, "learning_rate": 9.1063346413793e-06, "loss": 0.0027, "step": 83280 }, { "epoch": 0.5479497115188516, "grad_norm": 0.07114706427739578, "learning_rate": 9.106007058651141e-06, "loss": 0.0023, "step": 83290 }, { "epoch": 0.548015499694085, "grad_norm": 0.0376057601726072, "learning_rate": 9.105679421788827e-06, "loss": 0.0012, "step": 83300 }, { "epoch": 0.5480812878693184, "grad_norm": 0.12992420185544779, "learning_rate": 9.105351730796676e-06, "loss": 0.0011, "step": 83310 }, { "epoch": 0.5481470760445517, "grad_norm": 0.15932293892439223, "learning_rate": 9.105023985679006e-06, "loss": 0.0011, "step": 83320 }, { "epoch": 0.5482128642197851, "grad_norm": 0.07462303689091632, "learning_rate": 9.104696186440141e-06, "loss": 0.0019, "step": 83330 }, { "epoch": 0.5482786523950185, "grad_norm": 0.01820608043564686, "learning_rate": 9.104368333084403e-06, "loss": 0.0014, "step": 83340 }, { "epoch": 0.5483444405702519, "grad_norm": 0.03614387599592569, "learning_rate": 9.104040425616111e-06, "loss": 0.0015, "step": 83350 }, { "epoch": 0.5484102287454853, "grad_norm": 0.08128086790903086, "learning_rate": 9.103712464039594e-06, "loss": 0.0024, "step": 83360 }, { "epoch": 0.5484760169207187, "grad_norm": 0.11216242458580074, "learning_rate": 9.103384448359169e-06, "loss": 0.0025, "step": 83370 }, { "epoch": 0.5485418050959521, "grad_norm": 0.05549842443529602, "learning_rate": 9.103056378579166e-06, "loss": 0.0017, "step": 83380 }, { "epoch": 0.5486075932711855, "grad_norm": 0.04757476216945756, "learning_rate": 9.102728254703907e-06, "loss": 0.0012, "step": 83390 }, { "epoch": 0.5486733814464189, "grad_norm": 0.2083213940507285, "learning_rate": 9.10240007673772e-06, "loss": 0.003, "step": 83400 }, { "epoch": 0.5487391696216523, "grad_norm": 0.034478234655508595, "learning_rate": 9.102071844684929e-06, "loss": 0.0015, "step": 83410 }, { "epoch": 0.5488049577968855, "grad_norm": 0.03210903870246672, "learning_rate": 9.101743558549867e-06, "loss": 0.0019, "step": 83420 }, { "epoch": 0.5488707459721189, "grad_norm": 0.18925755308163938, "learning_rate": 9.101415218336857e-06, "loss": 0.0031, "step": 83430 }, { "epoch": 0.5489365341473523, "grad_norm": 0.027624531487382493, "learning_rate": 9.10108682405023e-06, "loss": 0.004, "step": 83440 }, { "epoch": 0.5490023223225857, "grad_norm": 0.06902974976082449, "learning_rate": 9.100758375694314e-06, "loss": 0.0021, "step": 83450 }, { "epoch": 0.5490681104978191, "grad_norm": 0.03728795933861175, "learning_rate": 9.100429873273441e-06, "loss": 0.0026, "step": 83460 }, { "epoch": 0.5491338986730525, "grad_norm": 0.09082336564044721, "learning_rate": 9.10010131679194e-06, "loss": 0.0017, "step": 83470 }, { "epoch": 0.5491996868482859, "grad_norm": 0.17261890202672994, "learning_rate": 9.099772706254148e-06, "loss": 0.0048, "step": 83480 }, { "epoch": 0.5492654750235193, "grad_norm": 0.07682843901179391, "learning_rate": 9.099444041664392e-06, "loss": 0.0021, "step": 83490 }, { "epoch": 0.5493312631987527, "grad_norm": 0.0237821017800097, "learning_rate": 9.099115323027006e-06, "loss": 0.0015, "step": 83500 }, { "epoch": 0.5493970513739861, "grad_norm": 0.007162231834413, "learning_rate": 9.098786550346324e-06, "loss": 0.0026, "step": 83510 }, { "epoch": 0.5494628395492194, "grad_norm": 0.10415277305727334, "learning_rate": 9.098457723626683e-06, "loss": 0.0023, "step": 83520 }, { "epoch": 0.5495286277244528, "grad_norm": 0.04997698670979215, "learning_rate": 9.098128842872416e-06, "loss": 0.0013, "step": 83530 }, { "epoch": 0.5495944158996862, "grad_norm": 0.15914992083837473, "learning_rate": 9.097799908087858e-06, "loss": 0.0015, "step": 83540 }, { "epoch": 0.5496602040749196, "grad_norm": 0.04918529346934396, "learning_rate": 9.09747091927735e-06, "loss": 0.0014, "step": 83550 }, { "epoch": 0.549725992250153, "grad_norm": 0.031861602941760006, "learning_rate": 9.097141876445226e-06, "loss": 0.0013, "step": 83560 }, { "epoch": 0.5497917804253863, "grad_norm": 0.014438477279718816, "learning_rate": 9.096812779595823e-06, "loss": 0.0022, "step": 83570 }, { "epoch": 0.5498575686006197, "grad_norm": 0.04397476640577944, "learning_rate": 9.096483628733483e-06, "loss": 0.0009, "step": 83580 }, { "epoch": 0.5499233567758531, "grad_norm": 0.06267106814126715, "learning_rate": 9.096154423862544e-06, "loss": 0.0014, "step": 83590 }, { "epoch": 0.5499891449510865, "grad_norm": 0.03174580458164157, "learning_rate": 9.095825164987348e-06, "loss": 0.0017, "step": 83600 }, { "epoch": 0.5500549331263199, "grad_norm": 0.07584019870023681, "learning_rate": 9.095495852112234e-06, "loss": 0.0022, "step": 83610 }, { "epoch": 0.5501207213015532, "grad_norm": 0.023908658431718473, "learning_rate": 9.095166485241544e-06, "loss": 0.0021, "step": 83620 }, { "epoch": 0.5501865094767866, "grad_norm": 0.1502205410851485, "learning_rate": 9.094837064379621e-06, "loss": 0.0027, "step": 83630 }, { "epoch": 0.55025229765202, "grad_norm": 0.06864404586591463, "learning_rate": 9.094507589530808e-06, "loss": 0.0021, "step": 83640 }, { "epoch": 0.5503180858272534, "grad_norm": 0.10685246057105721, "learning_rate": 9.094178060699448e-06, "loss": 0.0022, "step": 83650 }, { "epoch": 0.5503838740024868, "grad_norm": 0.060080301321033994, "learning_rate": 9.093848477889886e-06, "loss": 0.0019, "step": 83660 }, { "epoch": 0.5504496621777202, "grad_norm": 0.03535343829134338, "learning_rate": 9.093518841106467e-06, "loss": 0.0016, "step": 83670 }, { "epoch": 0.5505154503529536, "grad_norm": 0.026194751640753206, "learning_rate": 9.093189150353538e-06, "loss": 0.0011, "step": 83680 }, { "epoch": 0.550581238528187, "grad_norm": 0.01976071355992875, "learning_rate": 9.092859405635446e-06, "loss": 0.0025, "step": 83690 }, { "epoch": 0.5506470267034204, "grad_norm": 0.07616929325255516, "learning_rate": 9.092529606956538e-06, "loss": 0.0009, "step": 83700 }, { "epoch": 0.5507128148786538, "grad_norm": 0.03695509624989934, "learning_rate": 9.09219975432116e-06, "loss": 0.001, "step": 83710 }, { "epoch": 0.550778603053887, "grad_norm": 0.05762715767180655, "learning_rate": 9.091869847733662e-06, "loss": 0.002, "step": 83720 }, { "epoch": 0.5508443912291204, "grad_norm": 0.2954423297408277, "learning_rate": 9.091539887198397e-06, "loss": 0.004, "step": 83730 }, { "epoch": 0.5509101794043538, "grad_norm": 0.07342361234071952, "learning_rate": 9.091209872719711e-06, "loss": 0.0016, "step": 83740 }, { "epoch": 0.5509759675795872, "grad_norm": 0.09824538944085295, "learning_rate": 9.090879804301955e-06, "loss": 0.0022, "step": 83750 }, { "epoch": 0.5510417557548206, "grad_norm": 0.07310282203236092, "learning_rate": 9.090549681949481e-06, "loss": 0.0012, "step": 83760 }, { "epoch": 0.551107543930054, "grad_norm": 0.351743041882213, "learning_rate": 9.090219505666645e-06, "loss": 0.0027, "step": 83770 }, { "epoch": 0.5511733321052874, "grad_norm": 0.03211845292607376, "learning_rate": 9.089889275457797e-06, "loss": 0.0019, "step": 83780 }, { "epoch": 0.5512391202805208, "grad_norm": 0.11095694153443166, "learning_rate": 9.08955899132729e-06, "loss": 0.0019, "step": 83790 }, { "epoch": 0.5513049084557542, "grad_norm": 0.14509356374225466, "learning_rate": 9.089228653279481e-06, "loss": 0.0015, "step": 83800 }, { "epoch": 0.5513706966309876, "grad_norm": 0.08063534912618808, "learning_rate": 9.088898261318722e-06, "loss": 0.0017, "step": 83810 }, { "epoch": 0.551436484806221, "grad_norm": 0.03074285506431211, "learning_rate": 9.088567815449372e-06, "loss": 0.001, "step": 83820 }, { "epoch": 0.5515022729814543, "grad_norm": 0.05762269411075173, "learning_rate": 9.088237315675786e-06, "loss": 0.0014, "step": 83830 }, { "epoch": 0.5515680611566877, "grad_norm": 0.05783705161968796, "learning_rate": 9.087906762002323e-06, "loss": 0.0014, "step": 83840 }, { "epoch": 0.5516338493319211, "grad_norm": 0.04473045294030793, "learning_rate": 9.08757615443334e-06, "loss": 0.0013, "step": 83850 }, { "epoch": 0.5516996375071545, "grad_norm": 0.08305171455815173, "learning_rate": 9.087245492973194e-06, "loss": 0.0023, "step": 83860 }, { "epoch": 0.5517654256823878, "grad_norm": 0.05308701115902039, "learning_rate": 9.086914777626245e-06, "loss": 0.0011, "step": 83870 }, { "epoch": 0.5518312138576212, "grad_norm": 0.06078574169416605, "learning_rate": 9.086584008396858e-06, "loss": 0.0026, "step": 83880 }, { "epoch": 0.5518970020328546, "grad_norm": 0.01644122616601443, "learning_rate": 9.086253185289388e-06, "loss": 0.0021, "step": 83890 }, { "epoch": 0.551962790208088, "grad_norm": 0.015781907960367957, "learning_rate": 9.0859223083082e-06, "loss": 0.0017, "step": 83900 }, { "epoch": 0.5520285783833214, "grad_norm": 0.029065799990031362, "learning_rate": 9.085591377457655e-06, "loss": 0.0018, "step": 83910 }, { "epoch": 0.5520943665585548, "grad_norm": 0.2558642397015587, "learning_rate": 9.085260392742115e-06, "loss": 0.0013, "step": 83920 }, { "epoch": 0.5521601547337881, "grad_norm": 0.11577124491728386, "learning_rate": 9.084929354165945e-06, "loss": 0.0011, "step": 83930 }, { "epoch": 0.5522259429090215, "grad_norm": 0.10875115563905498, "learning_rate": 9.08459826173351e-06, "loss": 0.0023, "step": 83940 }, { "epoch": 0.5522917310842549, "grad_norm": 0.00855318860199668, "learning_rate": 9.084267115449175e-06, "loss": 0.0057, "step": 83950 }, { "epoch": 0.5523575192594883, "grad_norm": 0.03385836497650213, "learning_rate": 9.083935915317306e-06, "loss": 0.0018, "step": 83960 }, { "epoch": 0.5524233074347217, "grad_norm": 0.09602217123588641, "learning_rate": 9.083604661342268e-06, "loss": 0.0017, "step": 83970 }, { "epoch": 0.5524890956099551, "grad_norm": 0.05570147544464812, "learning_rate": 9.08327335352843e-06, "loss": 0.002, "step": 83980 }, { "epoch": 0.5525548837851885, "grad_norm": 0.012885426663480528, "learning_rate": 9.082941991880158e-06, "loss": 0.0011, "step": 83990 }, { "epoch": 0.5526206719604219, "grad_norm": 0.024429759270799375, "learning_rate": 9.082610576401823e-06, "loss": 0.0025, "step": 84000 }, { "epoch": 0.5526864601356553, "grad_norm": 0.06921432361858472, "learning_rate": 9.082279107097794e-06, "loss": 0.0028, "step": 84010 }, { "epoch": 0.5527522483108886, "grad_norm": 0.011948054484479475, "learning_rate": 9.081947583972441e-06, "loss": 0.0013, "step": 84020 }, { "epoch": 0.5528180364861219, "grad_norm": 0.07070249763505834, "learning_rate": 9.081616007030135e-06, "loss": 0.0025, "step": 84030 }, { "epoch": 0.5528838246613553, "grad_norm": 0.07978692772356709, "learning_rate": 9.081284376275246e-06, "loss": 0.0023, "step": 84040 }, { "epoch": 0.5529496128365887, "grad_norm": 0.046636018728062036, "learning_rate": 9.080952691712147e-06, "loss": 0.003, "step": 84050 }, { "epoch": 0.5530154010118221, "grad_norm": 0.06470727187169838, "learning_rate": 9.080620953345212e-06, "loss": 0.0022, "step": 84060 }, { "epoch": 0.5530811891870555, "grad_norm": 0.08142947878066549, "learning_rate": 9.080289161178813e-06, "loss": 0.0015, "step": 84070 }, { "epoch": 0.5531469773622889, "grad_norm": 0.07865051452423931, "learning_rate": 9.079957315217325e-06, "loss": 0.0015, "step": 84080 }, { "epoch": 0.5532127655375223, "grad_norm": 0.003930780299497404, "learning_rate": 9.079625415465126e-06, "loss": 0.0012, "step": 84090 }, { "epoch": 0.5532785537127557, "grad_norm": 0.08669041318786896, "learning_rate": 9.079293461926586e-06, "loss": 0.0007, "step": 84100 }, { "epoch": 0.5533443418879891, "grad_norm": 0.04516219351431131, "learning_rate": 9.078961454606087e-06, "loss": 0.0036, "step": 84110 }, { "epoch": 0.5534101300632225, "grad_norm": 0.0020765127899666637, "learning_rate": 9.078629393508003e-06, "loss": 0.0012, "step": 84120 }, { "epoch": 0.5534759182384558, "grad_norm": 0.04288001526111897, "learning_rate": 9.078297278636713e-06, "loss": 0.0006, "step": 84130 }, { "epoch": 0.5535417064136892, "grad_norm": 0.0478444059490145, "learning_rate": 9.077965109996597e-06, "loss": 0.0027, "step": 84140 }, { "epoch": 0.5536074945889226, "grad_norm": 0.025158518682557637, "learning_rate": 9.07763288759203e-06, "loss": 0.0012, "step": 84150 }, { "epoch": 0.553673282764156, "grad_norm": 0.0444168702280438, "learning_rate": 9.0773006114274e-06, "loss": 0.0021, "step": 84160 }, { "epoch": 0.5537390709393893, "grad_norm": 0.10047068729365533, "learning_rate": 9.076968281507079e-06, "loss": 0.0026, "step": 84170 }, { "epoch": 0.5538048591146227, "grad_norm": 0.04955202378570913, "learning_rate": 9.076635897835454e-06, "loss": 0.0012, "step": 84180 }, { "epoch": 0.5538706472898561, "grad_norm": 0.14482695766616785, "learning_rate": 9.076303460416905e-06, "loss": 0.0016, "step": 84190 }, { "epoch": 0.5539364354650895, "grad_norm": 0.05576465818657006, "learning_rate": 9.075970969255816e-06, "loss": 0.0024, "step": 84200 }, { "epoch": 0.5540022236403229, "grad_norm": 0.142594677125471, "learning_rate": 9.075638424356568e-06, "loss": 0.0009, "step": 84210 }, { "epoch": 0.5540680118155563, "grad_norm": 0.23332835389638706, "learning_rate": 9.07530582572355e-06, "loss": 0.0025, "step": 84220 }, { "epoch": 0.5541337999907897, "grad_norm": 0.0642589624262974, "learning_rate": 9.074973173361142e-06, "loss": 0.0019, "step": 84230 }, { "epoch": 0.554199588166023, "grad_norm": 0.1276847048411522, "learning_rate": 9.074640467273734e-06, "loss": 0.0016, "step": 84240 }, { "epoch": 0.5542653763412564, "grad_norm": 0.10580785928867195, "learning_rate": 9.074307707465709e-06, "loss": 0.0018, "step": 84250 }, { "epoch": 0.5543311645164898, "grad_norm": 0.08701754348799134, "learning_rate": 9.073974893941454e-06, "loss": 0.0018, "step": 84260 }, { "epoch": 0.5543969526917232, "grad_norm": 0.039079445191012224, "learning_rate": 9.073642026705362e-06, "loss": 0.0017, "step": 84270 }, { "epoch": 0.5544627408669566, "grad_norm": 0.12627652297388905, "learning_rate": 9.073309105761818e-06, "loss": 0.0016, "step": 84280 }, { "epoch": 0.55452852904219, "grad_norm": 0.23082271284102374, "learning_rate": 9.07297613111521e-06, "loss": 0.0014, "step": 84290 }, { "epoch": 0.5545943172174234, "grad_norm": 0.05846516287136486, "learning_rate": 9.07264310276993e-06, "loss": 0.0029, "step": 84300 }, { "epoch": 0.5546601053926568, "grad_norm": 0.04411804941743398, "learning_rate": 9.072310020730368e-06, "loss": 0.0017, "step": 84310 }, { "epoch": 0.5547258935678901, "grad_norm": 0.10836657626297923, "learning_rate": 9.071976885000914e-06, "loss": 0.0022, "step": 84320 }, { "epoch": 0.5547916817431235, "grad_norm": 0.027629739113811114, "learning_rate": 9.071643695585962e-06, "loss": 0.0012, "step": 84330 }, { "epoch": 0.5548574699183568, "grad_norm": 0.08567810717218204, "learning_rate": 9.071310452489906e-06, "loss": 0.0011, "step": 84340 }, { "epoch": 0.5549232580935902, "grad_norm": 0.009433320197226256, "learning_rate": 9.070977155717138e-06, "loss": 0.0025, "step": 84350 }, { "epoch": 0.5549890462688236, "grad_norm": 0.044617148809675086, "learning_rate": 9.07064380527205e-06, "loss": 0.0021, "step": 84360 }, { "epoch": 0.555054834444057, "grad_norm": 0.06864500791414602, "learning_rate": 9.07031040115904e-06, "loss": 0.0025, "step": 84370 }, { "epoch": 0.5551206226192904, "grad_norm": 0.024573316281374383, "learning_rate": 9.069976943382503e-06, "loss": 0.0012, "step": 84380 }, { "epoch": 0.5551864107945238, "grad_norm": 0.08154204543249197, "learning_rate": 9.069643431946835e-06, "loss": 0.0011, "step": 84390 }, { "epoch": 0.5552521989697572, "grad_norm": 0.09645296773438629, "learning_rate": 9.069309866856433e-06, "loss": 0.0012, "step": 84400 }, { "epoch": 0.5553179871449906, "grad_norm": 0.04235608268779916, "learning_rate": 9.068976248115695e-06, "loss": 0.0008, "step": 84410 }, { "epoch": 0.555383775320224, "grad_norm": 0.05199190927751422, "learning_rate": 9.068642575729019e-06, "loss": 0.0014, "step": 84420 }, { "epoch": 0.5554495634954574, "grad_norm": 0.17807377983840547, "learning_rate": 9.068308849700804e-06, "loss": 0.0014, "step": 84430 }, { "epoch": 0.5555153516706907, "grad_norm": 0.01809577756670504, "learning_rate": 9.067975070035452e-06, "loss": 0.0011, "step": 84440 }, { "epoch": 0.555581139845924, "grad_norm": 0.18083108320083388, "learning_rate": 9.067641236737362e-06, "loss": 0.0022, "step": 84450 }, { "epoch": 0.5556469280211574, "grad_norm": 0.07257953998090219, "learning_rate": 9.067307349810934e-06, "loss": 0.0016, "step": 84460 }, { "epoch": 0.5557127161963908, "grad_norm": 0.02151625604793801, "learning_rate": 9.066973409260573e-06, "loss": 0.0011, "step": 84470 }, { "epoch": 0.5557785043716242, "grad_norm": 0.07980229069941229, "learning_rate": 9.066639415090678e-06, "loss": 0.0013, "step": 84480 }, { "epoch": 0.5558442925468576, "grad_norm": 0.03406022357560045, "learning_rate": 9.066305367305657e-06, "loss": 0.0008, "step": 84490 }, { "epoch": 0.555910080722091, "grad_norm": 0.03820804248747158, "learning_rate": 9.06597126590991e-06, "loss": 0.001, "step": 84500 }, { "epoch": 0.5559758688973244, "grad_norm": 0.06335958487177026, "learning_rate": 9.065637110907844e-06, "loss": 0.0016, "step": 84510 }, { "epoch": 0.5560416570725578, "grad_norm": 0.09588929852911943, "learning_rate": 9.065302902303863e-06, "loss": 0.0024, "step": 84520 }, { "epoch": 0.5561074452477912, "grad_norm": 0.18297874878058937, "learning_rate": 9.064968640102374e-06, "loss": 0.0021, "step": 84530 }, { "epoch": 0.5561732334230245, "grad_norm": 0.014775265884504332, "learning_rate": 9.064634324307786e-06, "loss": 0.0028, "step": 84540 }, { "epoch": 0.5562390215982579, "grad_norm": 0.050915680533202515, "learning_rate": 9.064299954924505e-06, "loss": 0.0015, "step": 84550 }, { "epoch": 0.5563048097734913, "grad_norm": 0.07367899250463071, "learning_rate": 9.063965531956938e-06, "loss": 0.0016, "step": 84560 }, { "epoch": 0.5563705979487247, "grad_norm": 0.1540555349378389, "learning_rate": 9.063631055409496e-06, "loss": 0.0037, "step": 84570 }, { "epoch": 0.5564363861239581, "grad_norm": 0.031145158535837637, "learning_rate": 9.063296525286587e-06, "loss": 0.002, "step": 84580 }, { "epoch": 0.5565021742991915, "grad_norm": 0.04507400809110883, "learning_rate": 9.062961941592623e-06, "loss": 0.0008, "step": 84590 }, { "epoch": 0.5565679624744249, "grad_norm": 0.02777712572438289, "learning_rate": 9.062627304332015e-06, "loss": 0.0012, "step": 84600 }, { "epoch": 0.5566337506496583, "grad_norm": 0.032592937105266125, "learning_rate": 9.062292613509177e-06, "loss": 0.0017, "step": 84610 }, { "epoch": 0.5566995388248916, "grad_norm": 0.07901893068229447, "learning_rate": 9.061957869128515e-06, "loss": 0.0015, "step": 84620 }, { "epoch": 0.556765327000125, "grad_norm": 0.0515826194099607, "learning_rate": 9.06162307119445e-06, "loss": 0.0017, "step": 84630 }, { "epoch": 0.5568311151753583, "grad_norm": 0.14393980221271768, "learning_rate": 9.061288219711391e-06, "loss": 0.0036, "step": 84640 }, { "epoch": 0.5568969033505917, "grad_norm": 0.1280696294836463, "learning_rate": 9.060953314683753e-06, "loss": 0.0028, "step": 84650 }, { "epoch": 0.5569626915258251, "grad_norm": 0.07154247843419427, "learning_rate": 9.060618356115955e-06, "loss": 0.0011, "step": 84660 }, { "epoch": 0.5570284797010585, "grad_norm": 0.11559865605780921, "learning_rate": 9.060283344012412e-06, "loss": 0.003, "step": 84670 }, { "epoch": 0.5570942678762919, "grad_norm": 0.0188866938066161, "learning_rate": 9.059948278377538e-06, "loss": 0.0015, "step": 84680 }, { "epoch": 0.5571600560515253, "grad_norm": 0.08943284142143963, "learning_rate": 9.059613159215752e-06, "loss": 0.0017, "step": 84690 }, { "epoch": 0.5572258442267587, "grad_norm": 0.06473077960308182, "learning_rate": 9.059277986531474e-06, "loss": 0.0015, "step": 84700 }, { "epoch": 0.5572916324019921, "grad_norm": 0.1736445275294251, "learning_rate": 9.058942760329122e-06, "loss": 0.0022, "step": 84710 }, { "epoch": 0.5573574205772255, "grad_norm": 0.007702276523466937, "learning_rate": 9.058607480613114e-06, "loss": 0.0034, "step": 84720 }, { "epoch": 0.5574232087524589, "grad_norm": 0.02785161979520554, "learning_rate": 9.05827214738787e-06, "loss": 0.0015, "step": 84730 }, { "epoch": 0.5574889969276923, "grad_norm": 0.034542722476393325, "learning_rate": 9.057936760657815e-06, "loss": 0.0027, "step": 84740 }, { "epoch": 0.5575547851029256, "grad_norm": 0.03405942336560089, "learning_rate": 9.057601320427369e-06, "loss": 0.0019, "step": 84750 }, { "epoch": 0.557620573278159, "grad_norm": 0.057403025152115844, "learning_rate": 9.057265826700951e-06, "loss": 0.0012, "step": 84760 }, { "epoch": 0.5576863614533923, "grad_norm": 0.023280228148470635, "learning_rate": 9.05693027948299e-06, "loss": 0.0009, "step": 84770 }, { "epoch": 0.5577521496286257, "grad_norm": 0.030997204245013094, "learning_rate": 9.056594678777905e-06, "loss": 0.0045, "step": 84780 }, { "epoch": 0.5578179378038591, "grad_norm": 0.09123801379224863, "learning_rate": 9.056259024590127e-06, "loss": 0.0014, "step": 84790 }, { "epoch": 0.5578837259790925, "grad_norm": 0.043583203629673245, "learning_rate": 9.055923316924073e-06, "loss": 0.0012, "step": 84800 }, { "epoch": 0.5579495141543259, "grad_norm": 0.04386006620619399, "learning_rate": 9.055587555784173e-06, "loss": 0.0019, "step": 84810 }, { "epoch": 0.5580153023295593, "grad_norm": 0.029546077813883406, "learning_rate": 9.055251741174854e-06, "loss": 0.0024, "step": 84820 }, { "epoch": 0.5580810905047927, "grad_norm": 0.05941952517120084, "learning_rate": 9.054915873100545e-06, "loss": 0.0029, "step": 84830 }, { "epoch": 0.5581468786800261, "grad_norm": 0.022670491151092636, "learning_rate": 9.05457995156567e-06, "loss": 0.0022, "step": 84840 }, { "epoch": 0.5582126668552594, "grad_norm": 0.03778629233731034, "learning_rate": 9.054243976574661e-06, "loss": 0.0013, "step": 84850 }, { "epoch": 0.5582784550304928, "grad_norm": 0.022307558926334844, "learning_rate": 9.053907948131946e-06, "loss": 0.0021, "step": 84860 }, { "epoch": 0.5583442432057262, "grad_norm": 0.14762779582808364, "learning_rate": 9.053571866241957e-06, "loss": 0.0022, "step": 84870 }, { "epoch": 0.5584100313809596, "grad_norm": 0.07355715993614907, "learning_rate": 9.053235730909124e-06, "loss": 0.0023, "step": 84880 }, { "epoch": 0.558475819556193, "grad_norm": 0.10032005918232545, "learning_rate": 9.052899542137878e-06, "loss": 0.0016, "step": 84890 }, { "epoch": 0.5585416077314264, "grad_norm": 0.13372327220989602, "learning_rate": 9.052563299932651e-06, "loss": 0.002, "step": 84900 }, { "epoch": 0.5586073959066598, "grad_norm": 0.08835665346042586, "learning_rate": 9.052227004297877e-06, "loss": 0.0019, "step": 84910 }, { "epoch": 0.5586731840818931, "grad_norm": 0.11353840897163164, "learning_rate": 9.051890655237991e-06, "loss": 0.002, "step": 84920 }, { "epoch": 0.5587389722571265, "grad_norm": 0.009922901320763825, "learning_rate": 9.051554252757426e-06, "loss": 0.0016, "step": 84930 }, { "epoch": 0.5588047604323599, "grad_norm": 0.06344304184176121, "learning_rate": 9.051217796860616e-06, "loss": 0.0031, "step": 84940 }, { "epoch": 0.5588705486075932, "grad_norm": 0.02426252848490209, "learning_rate": 9.050881287552e-06, "loss": 0.0014, "step": 84950 }, { "epoch": 0.5589363367828266, "grad_norm": 0.04477150951617983, "learning_rate": 9.05054472483601e-06, "loss": 0.0013, "step": 84960 }, { "epoch": 0.55900212495806, "grad_norm": 0.01375637285827515, "learning_rate": 9.050208108717089e-06, "loss": 0.0015, "step": 84970 }, { "epoch": 0.5590679131332934, "grad_norm": 0.007507799487905911, "learning_rate": 9.049871439199672e-06, "loss": 0.0013, "step": 84980 }, { "epoch": 0.5591337013085268, "grad_norm": 0.02308477022394417, "learning_rate": 9.049534716288196e-06, "loss": 0.0041, "step": 84990 }, { "epoch": 0.5591994894837602, "grad_norm": 0.08699019172238026, "learning_rate": 9.049197939987102e-06, "loss": 0.0015, "step": 85000 }, { "epoch": 0.5592652776589936, "grad_norm": 0.006586878201941806, "learning_rate": 9.048861110300831e-06, "loss": 0.0011, "step": 85010 }, { "epoch": 0.559331065834227, "grad_norm": 0.011419673335557429, "learning_rate": 9.048524227233824e-06, "loss": 0.0024, "step": 85020 }, { "epoch": 0.5593968540094604, "grad_norm": 0.106469507389277, "learning_rate": 9.04818729079052e-06, "loss": 0.0017, "step": 85030 }, { "epoch": 0.5594626421846938, "grad_norm": 0.10336594516506384, "learning_rate": 9.047850300975365e-06, "loss": 0.0013, "step": 85040 }, { "epoch": 0.559528430359927, "grad_norm": 0.0421729164087826, "learning_rate": 9.047513257792797e-06, "loss": 0.0016, "step": 85050 }, { "epoch": 0.5595942185351604, "grad_norm": 0.030754554784756555, "learning_rate": 9.047176161247265e-06, "loss": 0.0021, "step": 85060 }, { "epoch": 0.5596600067103938, "grad_norm": 0.12120168307034905, "learning_rate": 9.046839011343209e-06, "loss": 0.0021, "step": 85070 }, { "epoch": 0.5597257948856272, "grad_norm": 0.002121051089647578, "learning_rate": 9.046501808085075e-06, "loss": 0.0024, "step": 85080 }, { "epoch": 0.5597915830608606, "grad_norm": 0.05628651211560145, "learning_rate": 9.046164551477312e-06, "loss": 0.0033, "step": 85090 }, { "epoch": 0.559857371236094, "grad_norm": 0.1064605293511976, "learning_rate": 9.045827241524362e-06, "loss": 0.0024, "step": 85100 }, { "epoch": 0.5599231594113274, "grad_norm": 0.012920269479767038, "learning_rate": 9.045489878230675e-06, "loss": 0.0016, "step": 85110 }, { "epoch": 0.5599889475865608, "grad_norm": 0.1761855208831612, "learning_rate": 9.045152461600698e-06, "loss": 0.0013, "step": 85120 }, { "epoch": 0.5600547357617942, "grad_norm": 0.008353729790829718, "learning_rate": 9.044814991638878e-06, "loss": 0.001, "step": 85130 }, { "epoch": 0.5601205239370276, "grad_norm": 0.025708485416008807, "learning_rate": 9.044477468349667e-06, "loss": 0.0018, "step": 85140 }, { "epoch": 0.5601863121122609, "grad_norm": 0.2295029472355909, "learning_rate": 9.044139891737512e-06, "loss": 0.0014, "step": 85150 }, { "epoch": 0.5602521002874943, "grad_norm": 0.07633190499288385, "learning_rate": 9.043802261806866e-06, "loss": 0.003, "step": 85160 }, { "epoch": 0.5603178884627277, "grad_norm": 0.041530975156812006, "learning_rate": 9.04346457856218e-06, "loss": 0.0009, "step": 85170 }, { "epoch": 0.5603836766379611, "grad_norm": 0.07574433021736166, "learning_rate": 9.043126842007905e-06, "loss": 0.0018, "step": 85180 }, { "epoch": 0.5604494648131945, "grad_norm": 0.022144836395771757, "learning_rate": 9.042789052148496e-06, "loss": 0.002, "step": 85190 }, { "epoch": 0.5605152529884279, "grad_norm": 0.05898122265049342, "learning_rate": 9.042451208988405e-06, "loss": 0.0017, "step": 85200 }, { "epoch": 0.5605810411636613, "grad_norm": 0.029684161578932996, "learning_rate": 9.042113312532084e-06, "loss": 0.0012, "step": 85210 }, { "epoch": 0.5606468293388946, "grad_norm": 0.01802811049049469, "learning_rate": 9.041775362783991e-06, "loss": 0.0019, "step": 85220 }, { "epoch": 0.560712617514128, "grad_norm": 0.1334695534955986, "learning_rate": 9.041437359748582e-06, "loss": 0.0033, "step": 85230 }, { "epoch": 0.5607784056893614, "grad_norm": 0.12295350949280093, "learning_rate": 9.04109930343031e-06, "loss": 0.0016, "step": 85240 }, { "epoch": 0.5608441938645948, "grad_norm": 0.09613940600483974, "learning_rate": 9.040761193833634e-06, "loss": 0.0022, "step": 85250 }, { "epoch": 0.5609099820398281, "grad_norm": 0.020632542074944665, "learning_rate": 9.040423030963012e-06, "loss": 0.0016, "step": 85260 }, { "epoch": 0.5609757702150615, "grad_norm": 0.09883538734634757, "learning_rate": 9.040084814822901e-06, "loss": 0.0019, "step": 85270 }, { "epoch": 0.5610415583902949, "grad_norm": 0.15841036646227577, "learning_rate": 9.039746545417762e-06, "loss": 0.0017, "step": 85280 }, { "epoch": 0.5611073465655283, "grad_norm": 0.03222050086485589, "learning_rate": 9.039408222752052e-06, "loss": 0.0008, "step": 85290 }, { "epoch": 0.5611731347407617, "grad_norm": 0.10070482774856947, "learning_rate": 9.039069846830237e-06, "loss": 0.0017, "step": 85300 }, { "epoch": 0.5612389229159951, "grad_norm": 0.06258290739899448, "learning_rate": 9.038731417656773e-06, "loss": 0.0012, "step": 85310 }, { "epoch": 0.5613047110912285, "grad_norm": 0.006321903193946971, "learning_rate": 9.03839293523612e-06, "loss": 0.0028, "step": 85320 }, { "epoch": 0.5613704992664619, "grad_norm": 0.13175803708460196, "learning_rate": 9.038054399572746e-06, "loss": 0.0033, "step": 85330 }, { "epoch": 0.5614362874416953, "grad_norm": 0.03944890469572403, "learning_rate": 9.037715810671115e-06, "loss": 0.0007, "step": 85340 }, { "epoch": 0.5615020756169287, "grad_norm": 0.017813360547238397, "learning_rate": 9.037377168535685e-06, "loss": 0.0022, "step": 85350 }, { "epoch": 0.561567863792162, "grad_norm": 0.059169443727303865, "learning_rate": 9.037038473170925e-06, "loss": 0.0006, "step": 85360 }, { "epoch": 0.5616336519673953, "grad_norm": 0.04903527037281202, "learning_rate": 9.036699724581299e-06, "loss": 0.001, "step": 85370 }, { "epoch": 0.5616994401426287, "grad_norm": 0.19201055673295983, "learning_rate": 9.036360922771272e-06, "loss": 0.0013, "step": 85380 }, { "epoch": 0.5617652283178621, "grad_norm": 0.08975369215988856, "learning_rate": 9.036022067745315e-06, "loss": 0.0016, "step": 85390 }, { "epoch": 0.5618310164930955, "grad_norm": 0.03265900524887951, "learning_rate": 9.03568315950789e-06, "loss": 0.002, "step": 85400 }, { "epoch": 0.5618968046683289, "grad_norm": 0.058558147234357184, "learning_rate": 9.035344198063471e-06, "loss": 0.0013, "step": 85410 }, { "epoch": 0.5619625928435623, "grad_norm": 0.07636458117236852, "learning_rate": 9.035005183416522e-06, "loss": 0.0021, "step": 85420 }, { "epoch": 0.5620283810187957, "grad_norm": 0.03720171167710396, "learning_rate": 9.034666115571515e-06, "loss": 0.003, "step": 85430 }, { "epoch": 0.5620941691940291, "grad_norm": 0.02266569979571492, "learning_rate": 9.03432699453292e-06, "loss": 0.0019, "step": 85440 }, { "epoch": 0.5621599573692625, "grad_norm": 0.09243264088521319, "learning_rate": 9.033987820305209e-06, "loss": 0.0025, "step": 85450 }, { "epoch": 0.5622257455444958, "grad_norm": 0.003187057819353007, "learning_rate": 9.03364859289285e-06, "loss": 0.0011, "step": 85460 }, { "epoch": 0.5622915337197292, "grad_norm": 0.016623162016624227, "learning_rate": 9.033309312300321e-06, "loss": 0.0018, "step": 85470 }, { "epoch": 0.5623573218949626, "grad_norm": 0.00280233398619982, "learning_rate": 9.032969978532088e-06, "loss": 0.0016, "step": 85480 }, { "epoch": 0.562423110070196, "grad_norm": 0.08880345743900409, "learning_rate": 9.032630591592633e-06, "loss": 0.0014, "step": 85490 }, { "epoch": 0.5624888982454294, "grad_norm": 0.02553491569885432, "learning_rate": 9.032291151486424e-06, "loss": 0.0015, "step": 85500 }, { "epoch": 0.5625546864206628, "grad_norm": 0.0740416492929273, "learning_rate": 9.03195165821794e-06, "loss": 0.0014, "step": 85510 }, { "epoch": 0.5626204745958961, "grad_norm": 0.005390558536757284, "learning_rate": 9.031612111791655e-06, "loss": 0.0018, "step": 85520 }, { "epoch": 0.5626862627711295, "grad_norm": 0.0271638580041662, "learning_rate": 9.031272512212046e-06, "loss": 0.0014, "step": 85530 }, { "epoch": 0.5627520509463629, "grad_norm": 0.14844050191273542, "learning_rate": 9.03093285948359e-06, "loss": 0.0022, "step": 85540 }, { "epoch": 0.5628178391215963, "grad_norm": 0.0399637546613999, "learning_rate": 9.030593153610767e-06, "loss": 0.0021, "step": 85550 }, { "epoch": 0.5628836272968296, "grad_norm": 0.026686584126258555, "learning_rate": 9.030253394598054e-06, "loss": 0.0006, "step": 85560 }, { "epoch": 0.562949415472063, "grad_norm": 0.03906292755561246, "learning_rate": 9.02991358244993e-06, "loss": 0.0019, "step": 85570 }, { "epoch": 0.5630152036472964, "grad_norm": 0.07739071225664904, "learning_rate": 9.029573717170877e-06, "loss": 0.0011, "step": 85580 }, { "epoch": 0.5630809918225298, "grad_norm": 0.0755396985653913, "learning_rate": 9.029233798765373e-06, "loss": 0.0024, "step": 85590 }, { "epoch": 0.5631467799977632, "grad_norm": 0.2089696982219718, "learning_rate": 9.028893827237904e-06, "loss": 0.0017, "step": 85600 }, { "epoch": 0.5632125681729966, "grad_norm": 0.09642786436985896, "learning_rate": 9.028553802592947e-06, "loss": 0.0012, "step": 85610 }, { "epoch": 0.56327835634823, "grad_norm": 0.05627321492182703, "learning_rate": 9.028213724834987e-06, "loss": 0.0006, "step": 85620 }, { "epoch": 0.5633441445234634, "grad_norm": 0.07901174242213307, "learning_rate": 9.02787359396851e-06, "loss": 0.0013, "step": 85630 }, { "epoch": 0.5634099326986968, "grad_norm": 0.01666250569486461, "learning_rate": 9.027533409997997e-06, "loss": 0.0012, "step": 85640 }, { "epoch": 0.5634757208739302, "grad_norm": 0.040964676517525576, "learning_rate": 9.027193172927936e-06, "loss": 0.0009, "step": 85650 }, { "epoch": 0.5635415090491636, "grad_norm": 0.009072067199207421, "learning_rate": 9.026852882762808e-06, "loss": 0.0019, "step": 85660 }, { "epoch": 0.5636072972243968, "grad_norm": 0.019231320571795982, "learning_rate": 9.026512539507106e-06, "loss": 0.0015, "step": 85670 }, { "epoch": 0.5636730853996302, "grad_norm": 0.032497041668171744, "learning_rate": 9.026172143165312e-06, "loss": 0.0008, "step": 85680 }, { "epoch": 0.5637388735748636, "grad_norm": 0.14568888445351275, "learning_rate": 9.025831693741916e-06, "loss": 0.0025, "step": 85690 }, { "epoch": 0.563804661750097, "grad_norm": 0.07099032361459419, "learning_rate": 9.025491191241407e-06, "loss": 0.0016, "step": 85700 }, { "epoch": 0.5638704499253304, "grad_norm": 0.029395320498732846, "learning_rate": 9.025150635668272e-06, "loss": 0.0041, "step": 85710 }, { "epoch": 0.5639362381005638, "grad_norm": 0.03315368931035807, "learning_rate": 9.024810027027003e-06, "loss": 0.0008, "step": 85720 }, { "epoch": 0.5640020262757972, "grad_norm": 0.028383398282425327, "learning_rate": 9.024469365322088e-06, "loss": 0.0006, "step": 85730 }, { "epoch": 0.5640678144510306, "grad_norm": 0.13439886365216788, "learning_rate": 9.024128650558021e-06, "loss": 0.0014, "step": 85740 }, { "epoch": 0.564133602626264, "grad_norm": 0.024054089019104063, "learning_rate": 9.023787882739295e-06, "loss": 0.0014, "step": 85750 }, { "epoch": 0.5641993908014974, "grad_norm": 0.03519947984531602, "learning_rate": 9.023447061870398e-06, "loss": 0.0019, "step": 85760 }, { "epoch": 0.5642651789767307, "grad_norm": 0.033013908807986846, "learning_rate": 9.023106187955828e-06, "loss": 0.0037, "step": 85770 }, { "epoch": 0.5643309671519641, "grad_norm": 0.11140896738927114, "learning_rate": 9.022765261000077e-06, "loss": 0.002, "step": 85780 }, { "epoch": 0.5643967553271975, "grad_norm": 0.10002319795461638, "learning_rate": 9.022424281007642e-06, "loss": 0.0014, "step": 85790 }, { "epoch": 0.5644625435024309, "grad_norm": 0.008817172816598664, "learning_rate": 9.022083247983015e-06, "loss": 0.0017, "step": 85800 }, { "epoch": 0.5645283316776643, "grad_norm": 0.07249139867763087, "learning_rate": 9.021742161930694e-06, "loss": 0.0021, "step": 85810 }, { "epoch": 0.5645941198528976, "grad_norm": 0.000480934491399102, "learning_rate": 9.021401022855177e-06, "loss": 0.001, "step": 85820 }, { "epoch": 0.564659908028131, "grad_norm": 0.2547827449939102, "learning_rate": 9.02105983076096e-06, "loss": 0.0036, "step": 85830 }, { "epoch": 0.5647256962033644, "grad_norm": 0.022027225462880665, "learning_rate": 9.020718585652543e-06, "loss": 0.0016, "step": 85840 }, { "epoch": 0.5647914843785978, "grad_norm": 0.0901720906041522, "learning_rate": 9.020377287534422e-06, "loss": 0.0011, "step": 85850 }, { "epoch": 0.5648572725538312, "grad_norm": 0.09006233094583618, "learning_rate": 9.0200359364111e-06, "loss": 0.0029, "step": 85860 }, { "epoch": 0.5649230607290645, "grad_norm": 0.03314922360797537, "learning_rate": 9.019694532287078e-06, "loss": 0.0015, "step": 85870 }, { "epoch": 0.5649888489042979, "grad_norm": 0.011879688899068118, "learning_rate": 9.019353075166852e-06, "loss": 0.0014, "step": 85880 }, { "epoch": 0.5650546370795313, "grad_norm": 0.2870430703784006, "learning_rate": 9.01901156505493e-06, "loss": 0.002, "step": 85890 }, { "epoch": 0.5651204252547647, "grad_norm": 0.023162015021494557, "learning_rate": 9.01867000195581e-06, "loss": 0.0025, "step": 85900 }, { "epoch": 0.5651862134299981, "grad_norm": 0.0027022863187417985, "learning_rate": 9.018328385873995e-06, "loss": 0.0031, "step": 85910 }, { "epoch": 0.5652520016052315, "grad_norm": 0.008627459578417129, "learning_rate": 9.017986716813992e-06, "loss": 0.0022, "step": 85920 }, { "epoch": 0.5653177897804649, "grad_norm": 0.027591200331929577, "learning_rate": 9.017644994780306e-06, "loss": 0.0009, "step": 85930 }, { "epoch": 0.5653835779556983, "grad_norm": 0.05362382192918125, "learning_rate": 9.017303219777437e-06, "loss": 0.0016, "step": 85940 }, { "epoch": 0.5654493661309317, "grad_norm": 0.02417416247585998, "learning_rate": 9.016961391809898e-06, "loss": 0.0026, "step": 85950 }, { "epoch": 0.5655151543061651, "grad_norm": 0.03950660832604168, "learning_rate": 9.016619510882192e-06, "loss": 0.0007, "step": 85960 }, { "epoch": 0.5655809424813983, "grad_norm": 0.09564019579511188, "learning_rate": 9.016277576998826e-06, "loss": 0.002, "step": 85970 }, { "epoch": 0.5656467306566317, "grad_norm": 0.05548875771617618, "learning_rate": 9.01593559016431e-06, "loss": 0.0015, "step": 85980 }, { "epoch": 0.5657125188318651, "grad_norm": 0.027747491331768903, "learning_rate": 9.015593550383149e-06, "loss": 0.0005, "step": 85990 }, { "epoch": 0.5657783070070985, "grad_norm": 0.023356162796319933, "learning_rate": 9.015251457659854e-06, "loss": 0.0015, "step": 86000 }, { "epoch": 0.5658440951823319, "grad_norm": 0.003312470299508355, "learning_rate": 9.01490931199894e-06, "loss": 0.0017, "step": 86010 }, { "epoch": 0.5659098833575653, "grad_norm": 0.17828327043639258, "learning_rate": 9.014567113404913e-06, "loss": 0.0015, "step": 86020 }, { "epoch": 0.5659756715327987, "grad_norm": 0.36099614077108666, "learning_rate": 9.014224861882285e-06, "loss": 0.0019, "step": 86030 }, { "epoch": 0.5660414597080321, "grad_norm": 0.03973439272836404, "learning_rate": 9.013882557435572e-06, "loss": 0.0018, "step": 86040 }, { "epoch": 0.5661072478832655, "grad_norm": 0.04842975335489502, "learning_rate": 9.013540200069283e-06, "loss": 0.002, "step": 86050 }, { "epoch": 0.5661730360584989, "grad_norm": 0.09916496746861529, "learning_rate": 9.01319778978793e-06, "loss": 0.0021, "step": 86060 }, { "epoch": 0.5662388242337322, "grad_norm": 0.12782403613206544, "learning_rate": 9.012855326596033e-06, "loss": 0.0021, "step": 86070 }, { "epoch": 0.5663046124089656, "grad_norm": 0.30683341848059664, "learning_rate": 9.012512810498102e-06, "loss": 0.002, "step": 86080 }, { "epoch": 0.566370400584199, "grad_norm": 0.000752057029081226, "learning_rate": 9.012170241498657e-06, "loss": 0.0015, "step": 86090 }, { "epoch": 0.5664361887594324, "grad_norm": 0.14162830144897762, "learning_rate": 9.011827619602212e-06, "loss": 0.0016, "step": 86100 }, { "epoch": 0.5665019769346658, "grad_norm": 0.05755372407464669, "learning_rate": 9.011484944813285e-06, "loss": 0.0017, "step": 86110 }, { "epoch": 0.5665677651098991, "grad_norm": 0.08648774879657264, "learning_rate": 9.011142217136392e-06, "loss": 0.0023, "step": 86120 }, { "epoch": 0.5666335532851325, "grad_norm": 0.1393089283283489, "learning_rate": 9.010799436576054e-06, "loss": 0.0017, "step": 86130 }, { "epoch": 0.5666993414603659, "grad_norm": 0.2221262106560605, "learning_rate": 9.01045660313679e-06, "loss": 0.0021, "step": 86140 }, { "epoch": 0.5667651296355993, "grad_norm": 0.0031673844600127784, "learning_rate": 9.010113716823118e-06, "loss": 0.0018, "step": 86150 }, { "epoch": 0.5668309178108327, "grad_norm": 0.047158814481570104, "learning_rate": 9.009770777639562e-06, "loss": 0.0009, "step": 86160 }, { "epoch": 0.5668967059860661, "grad_norm": 0.021764302308314876, "learning_rate": 9.009427785590639e-06, "loss": 0.0019, "step": 86170 }, { "epoch": 0.5669624941612994, "grad_norm": 0.04554630604861876, "learning_rate": 9.009084740680876e-06, "loss": 0.0019, "step": 86180 }, { "epoch": 0.5670282823365328, "grad_norm": 0.048955714933526844, "learning_rate": 9.008741642914791e-06, "loss": 0.0012, "step": 86190 }, { "epoch": 0.5670940705117662, "grad_norm": 0.032588859238617825, "learning_rate": 9.00839849229691e-06, "loss": 0.0025, "step": 86200 }, { "epoch": 0.5671598586869996, "grad_norm": 0.028584627695268258, "learning_rate": 9.008055288831758e-06, "loss": 0.0018, "step": 86210 }, { "epoch": 0.567225646862233, "grad_norm": 0.07681362610347658, "learning_rate": 9.007712032523856e-06, "loss": 0.0053, "step": 86220 }, { "epoch": 0.5672914350374664, "grad_norm": 0.023004147248231556, "learning_rate": 9.007368723377735e-06, "loss": 0.0027, "step": 86230 }, { "epoch": 0.5673572232126998, "grad_norm": 0.01618785510208525, "learning_rate": 9.007025361397917e-06, "loss": 0.0014, "step": 86240 }, { "epoch": 0.5674230113879332, "grad_norm": 0.1449779087214474, "learning_rate": 9.00668194658893e-06, "loss": 0.0034, "step": 86250 }, { "epoch": 0.5674887995631666, "grad_norm": 0.04808252025726622, "learning_rate": 9.006338478955303e-06, "loss": 0.0016, "step": 86260 }, { "epoch": 0.5675545877384, "grad_norm": 0.021183505774114632, "learning_rate": 9.005994958501563e-06, "loss": 0.0014, "step": 86270 }, { "epoch": 0.5676203759136332, "grad_norm": 0.051029909863638895, "learning_rate": 9.00565138523224e-06, "loss": 0.0023, "step": 86280 }, { "epoch": 0.5676861640888666, "grad_norm": 0.02874327478999398, "learning_rate": 9.005307759151862e-06, "loss": 0.0016, "step": 86290 }, { "epoch": 0.5677519522641, "grad_norm": 0.035733146299112145, "learning_rate": 9.00496408026496e-06, "loss": 0.0011, "step": 86300 }, { "epoch": 0.5678177404393334, "grad_norm": 0.05796266184129174, "learning_rate": 9.004620348576065e-06, "loss": 0.0017, "step": 86310 }, { "epoch": 0.5678835286145668, "grad_norm": 0.012142977611524938, "learning_rate": 9.004276564089711e-06, "loss": 0.001, "step": 86320 }, { "epoch": 0.5679493167898002, "grad_norm": 0.12161809622336141, "learning_rate": 9.003932726810427e-06, "loss": 0.0033, "step": 86330 }, { "epoch": 0.5680151049650336, "grad_norm": 0.012081884111150637, "learning_rate": 9.003588836742749e-06, "loss": 0.0012, "step": 86340 }, { "epoch": 0.568080893140267, "grad_norm": 0.03726735881250147, "learning_rate": 9.00324489389121e-06, "loss": 0.0042, "step": 86350 }, { "epoch": 0.5681466813155004, "grad_norm": 0.12235182889875927, "learning_rate": 9.002900898260344e-06, "loss": 0.0026, "step": 86360 }, { "epoch": 0.5682124694907338, "grad_norm": 0.1192174559186706, "learning_rate": 9.002556849854689e-06, "loss": 0.0027, "step": 86370 }, { "epoch": 0.5682782576659671, "grad_norm": 0.06942771246671613, "learning_rate": 9.002212748678778e-06, "loss": 0.0023, "step": 86380 }, { "epoch": 0.5683440458412005, "grad_norm": 0.030796790277504364, "learning_rate": 9.001868594737147e-06, "loss": 0.0018, "step": 86390 }, { "epoch": 0.5684098340164339, "grad_norm": 0.06451281998092125, "learning_rate": 9.001524388034335e-06, "loss": 0.0009, "step": 86400 }, { "epoch": 0.5684756221916673, "grad_norm": 0.17878070578021002, "learning_rate": 9.00118012857488e-06, "loss": 0.0013, "step": 86410 }, { "epoch": 0.5685414103669006, "grad_norm": 0.07992963031197371, "learning_rate": 9.000835816363322e-06, "loss": 0.0024, "step": 86420 }, { "epoch": 0.568607198542134, "grad_norm": 1.1723288857444814, "learning_rate": 9.000491451404198e-06, "loss": 0.0018, "step": 86430 }, { "epoch": 0.5686729867173674, "grad_norm": 0.05011848083506467, "learning_rate": 9.000147033702049e-06, "loss": 0.0024, "step": 86440 }, { "epoch": 0.5687387748926008, "grad_norm": 0.07844232417989033, "learning_rate": 8.999802563261415e-06, "loss": 0.0019, "step": 86450 }, { "epoch": 0.5688045630678342, "grad_norm": 0.015119203962754695, "learning_rate": 8.999458040086841e-06, "loss": 0.0006, "step": 86460 }, { "epoch": 0.5688703512430676, "grad_norm": 0.052782278265775695, "learning_rate": 8.999113464182866e-06, "loss": 0.0017, "step": 86470 }, { "epoch": 0.5689361394183009, "grad_norm": 0.22326323112765986, "learning_rate": 8.998768835554033e-06, "loss": 0.0019, "step": 86480 }, { "epoch": 0.5690019275935343, "grad_norm": 0.01907218619222915, "learning_rate": 8.998424154204888e-06, "loss": 0.0014, "step": 86490 }, { "epoch": 0.5690677157687677, "grad_norm": 0.06503691495367092, "learning_rate": 8.998079420139974e-06, "loss": 0.0026, "step": 86500 }, { "epoch": 0.5691335039440011, "grad_norm": 0.027379185164763665, "learning_rate": 8.997734633363833e-06, "loss": 0.0015, "step": 86510 }, { "epoch": 0.5691992921192345, "grad_norm": 0.04449372716899968, "learning_rate": 8.997389793881016e-06, "loss": 0.0014, "step": 86520 }, { "epoch": 0.5692650802944679, "grad_norm": 0.00417600947098854, "learning_rate": 8.997044901696065e-06, "loss": 0.0018, "step": 86530 }, { "epoch": 0.5693308684697013, "grad_norm": 0.09110168702008592, "learning_rate": 8.996699956813532e-06, "loss": 0.0021, "step": 86540 }, { "epoch": 0.5693966566449347, "grad_norm": 0.012174726215415708, "learning_rate": 8.99635495923796e-06, "loss": 0.0024, "step": 86550 }, { "epoch": 0.5694624448201681, "grad_norm": 0.062405570752253, "learning_rate": 8.9960099089739e-06, "loss": 0.0025, "step": 86560 }, { "epoch": 0.5695282329954015, "grad_norm": 0.0651005335131293, "learning_rate": 8.9956648060259e-06, "loss": 0.0022, "step": 86570 }, { "epoch": 0.5695940211706348, "grad_norm": 0.29727806636477555, "learning_rate": 8.99531965039851e-06, "loss": 0.002, "step": 86580 }, { "epoch": 0.5696598093458681, "grad_norm": 0.012751178938362865, "learning_rate": 8.994974442096282e-06, "loss": 0.0009, "step": 86590 }, { "epoch": 0.5697255975211015, "grad_norm": 0.01427080619860831, "learning_rate": 8.994629181123765e-06, "loss": 0.0016, "step": 86600 }, { "epoch": 0.5697913856963349, "grad_norm": 0.18087979004800664, "learning_rate": 8.994283867485513e-06, "loss": 0.0027, "step": 86610 }, { "epoch": 0.5698571738715683, "grad_norm": 0.06145325961405303, "learning_rate": 8.993938501186077e-06, "loss": 0.0013, "step": 86620 }, { "epoch": 0.5699229620468017, "grad_norm": 0.05584164871854337, "learning_rate": 8.993593082230013e-06, "loss": 0.002, "step": 86630 }, { "epoch": 0.5699887502220351, "grad_norm": 0.010522216113331257, "learning_rate": 8.993247610621873e-06, "loss": 0.0022, "step": 86640 }, { "epoch": 0.5700545383972685, "grad_norm": 0.0186303324408601, "learning_rate": 8.99290208636621e-06, "loss": 0.0017, "step": 86650 }, { "epoch": 0.5701203265725019, "grad_norm": 0.18817424473033173, "learning_rate": 8.992556509467584e-06, "loss": 0.0015, "step": 86660 }, { "epoch": 0.5701861147477353, "grad_norm": 0.050534905533227495, "learning_rate": 8.992210879930547e-06, "loss": 0.0014, "step": 86670 }, { "epoch": 0.5702519029229687, "grad_norm": 0.031005455658262925, "learning_rate": 8.991865197759659e-06, "loss": 0.0019, "step": 86680 }, { "epoch": 0.570317691098202, "grad_norm": 0.0019199598251801735, "learning_rate": 8.991519462959475e-06, "loss": 0.0017, "step": 86690 }, { "epoch": 0.5703834792734354, "grad_norm": 0.09384915490897856, "learning_rate": 8.991173675534555e-06, "loss": 0.0025, "step": 86700 }, { "epoch": 0.5704492674486688, "grad_norm": 0.007142433869062804, "learning_rate": 8.990827835489454e-06, "loss": 0.001, "step": 86710 }, { "epoch": 0.5705150556239021, "grad_norm": 0.021405457968674144, "learning_rate": 8.990481942828738e-06, "loss": 0.0023, "step": 86720 }, { "epoch": 0.5705808437991355, "grad_norm": 0.05644010975216418, "learning_rate": 8.990135997556963e-06, "loss": 0.0011, "step": 86730 }, { "epoch": 0.5706466319743689, "grad_norm": 0.012931949668875743, "learning_rate": 8.98978999967869e-06, "loss": 0.001, "step": 86740 }, { "epoch": 0.5707124201496023, "grad_norm": 0.012377530480980756, "learning_rate": 8.989443949198483e-06, "loss": 0.0025, "step": 86750 }, { "epoch": 0.5707782083248357, "grad_norm": 0.017945976735666787, "learning_rate": 8.989097846120903e-06, "loss": 0.0019, "step": 86760 }, { "epoch": 0.5708439965000691, "grad_norm": 0.06710533002076076, "learning_rate": 8.988751690450512e-06, "loss": 0.0028, "step": 86770 }, { "epoch": 0.5709097846753025, "grad_norm": 0.05875678978934809, "learning_rate": 8.988405482191876e-06, "loss": 0.0016, "step": 86780 }, { "epoch": 0.5709755728505358, "grad_norm": 0.12402996067997604, "learning_rate": 8.98805922134956e-06, "loss": 0.0011, "step": 86790 }, { "epoch": 0.5710413610257692, "grad_norm": 0.0659353441900121, "learning_rate": 8.987712907928123e-06, "loss": 0.0027, "step": 86800 }, { "epoch": 0.5711071492010026, "grad_norm": 0.015524186453030693, "learning_rate": 8.987366541932139e-06, "loss": 0.0014, "step": 86810 }, { "epoch": 0.571172937376236, "grad_norm": 0.033865542888516294, "learning_rate": 8.98702012336617e-06, "loss": 0.0026, "step": 86820 }, { "epoch": 0.5712387255514694, "grad_norm": 0.04345741737299945, "learning_rate": 8.986673652234783e-06, "loss": 0.0017, "step": 86830 }, { "epoch": 0.5713045137267028, "grad_norm": 0.0026385168106628897, "learning_rate": 8.986327128542547e-06, "loss": 0.0014, "step": 86840 }, { "epoch": 0.5713703019019362, "grad_norm": 0.32003032661177955, "learning_rate": 8.985980552294032e-06, "loss": 0.0028, "step": 86850 }, { "epoch": 0.5714360900771696, "grad_norm": 0.013555677918543643, "learning_rate": 8.985633923493804e-06, "loss": 0.002, "step": 86860 }, { "epoch": 0.571501878252403, "grad_norm": 0.0887107168493831, "learning_rate": 8.985287242146436e-06, "loss": 0.0017, "step": 86870 }, { "epoch": 0.5715676664276363, "grad_norm": 0.08498534092800425, "learning_rate": 8.9849405082565e-06, "loss": 0.0037, "step": 86880 }, { "epoch": 0.5716334546028696, "grad_norm": 0.04532764104397678, "learning_rate": 8.984593721828563e-06, "loss": 0.0016, "step": 86890 }, { "epoch": 0.571699242778103, "grad_norm": 0.05540889458711427, "learning_rate": 8.984246882867198e-06, "loss": 0.0012, "step": 86900 }, { "epoch": 0.5717650309533364, "grad_norm": 0.14003301398229595, "learning_rate": 8.983899991376981e-06, "loss": 0.0017, "step": 86910 }, { "epoch": 0.5718308191285698, "grad_norm": 0.0011480775442735596, "learning_rate": 8.98355304736248e-06, "loss": 0.001, "step": 86920 }, { "epoch": 0.5718966073038032, "grad_norm": 0.009059630254631369, "learning_rate": 8.983206050828276e-06, "loss": 0.0009, "step": 86930 }, { "epoch": 0.5719623954790366, "grad_norm": 0.05317776253736076, "learning_rate": 8.982859001778938e-06, "loss": 0.0016, "step": 86940 }, { "epoch": 0.57202818365427, "grad_norm": 0.5116608250008098, "learning_rate": 8.982511900219046e-06, "loss": 0.0024, "step": 86950 }, { "epoch": 0.5720939718295034, "grad_norm": 0.14658357429275745, "learning_rate": 8.982164746153173e-06, "loss": 0.0037, "step": 86960 }, { "epoch": 0.5721597600047368, "grad_norm": 0.027882528232192373, "learning_rate": 8.981817539585897e-06, "loss": 0.0013, "step": 86970 }, { "epoch": 0.5722255481799702, "grad_norm": 0.1553445458130445, "learning_rate": 8.981470280521796e-06, "loss": 0.0012, "step": 86980 }, { "epoch": 0.5722913363552035, "grad_norm": 0.0987780806672622, "learning_rate": 8.981122968965447e-06, "loss": 0.0028, "step": 86990 }, { "epoch": 0.5723571245304369, "grad_norm": 0.04367098454365866, "learning_rate": 8.980775604921431e-06, "loss": 0.0014, "step": 87000 }, { "epoch": 0.5724229127056703, "grad_norm": 0.01730872672344937, "learning_rate": 8.980428188394325e-06, "loss": 0.0021, "step": 87010 }, { "epoch": 0.5724887008809036, "grad_norm": 0.07470464348567547, "learning_rate": 8.980080719388713e-06, "loss": 0.0019, "step": 87020 }, { "epoch": 0.572554489056137, "grad_norm": 0.05171900252684076, "learning_rate": 8.979733197909174e-06, "loss": 0.0011, "step": 87030 }, { "epoch": 0.5726202772313704, "grad_norm": 0.10913174766031601, "learning_rate": 8.97938562396029e-06, "loss": 0.0025, "step": 87040 }, { "epoch": 0.5726860654066038, "grad_norm": 0.1532379901034397, "learning_rate": 8.979037997546642e-06, "loss": 0.0019, "step": 87050 }, { "epoch": 0.5727518535818372, "grad_norm": 0.09007516729602134, "learning_rate": 8.978690318672816e-06, "loss": 0.0043, "step": 87060 }, { "epoch": 0.5728176417570706, "grad_norm": 0.1260001121516705, "learning_rate": 8.978342587343394e-06, "loss": 0.0016, "step": 87070 }, { "epoch": 0.572883429932304, "grad_norm": 0.039695355155551684, "learning_rate": 8.977994803562959e-06, "loss": 0.0025, "step": 87080 }, { "epoch": 0.5729492181075374, "grad_norm": 0.024916385037688032, "learning_rate": 8.9776469673361e-06, "loss": 0.0016, "step": 87090 }, { "epoch": 0.5730150062827707, "grad_norm": 0.10870043299948852, "learning_rate": 8.9772990786674e-06, "loss": 0.0024, "step": 87100 }, { "epoch": 0.5730807944580041, "grad_norm": 0.04485545179202879, "learning_rate": 8.976951137561449e-06, "loss": 0.0021, "step": 87110 }, { "epoch": 0.5731465826332375, "grad_norm": 0.04944173714052543, "learning_rate": 8.976603144022831e-06, "loss": 0.0015, "step": 87120 }, { "epoch": 0.5732123708084709, "grad_norm": 0.02309237036864213, "learning_rate": 8.976255098056135e-06, "loss": 0.0024, "step": 87130 }, { "epoch": 0.5732781589837043, "grad_norm": 0.0576314978051758, "learning_rate": 8.975906999665948e-06, "loss": 0.0017, "step": 87140 }, { "epoch": 0.5733439471589377, "grad_norm": 0.139677107280492, "learning_rate": 8.975558848856863e-06, "loss": 0.0041, "step": 87150 }, { "epoch": 0.5734097353341711, "grad_norm": 0.11762056063885667, "learning_rate": 8.975210645633467e-06, "loss": 0.0025, "step": 87160 }, { "epoch": 0.5734755235094045, "grad_norm": 0.12524510985562246, "learning_rate": 8.974862390000351e-06, "loss": 0.0037, "step": 87170 }, { "epoch": 0.5735413116846378, "grad_norm": 0.0040609248537003375, "learning_rate": 8.974514081962108e-06, "loss": 0.0022, "step": 87180 }, { "epoch": 0.5736070998598712, "grad_norm": 0.027523999728247824, "learning_rate": 8.974165721523331e-06, "loss": 0.0009, "step": 87190 }, { "epoch": 0.5736728880351045, "grad_norm": 0.1808030756337251, "learning_rate": 8.97381730868861e-06, "loss": 0.0027, "step": 87200 }, { "epoch": 0.5737386762103379, "grad_norm": 0.1637601626801989, "learning_rate": 8.97346884346254e-06, "loss": 0.0026, "step": 87210 }, { "epoch": 0.5738044643855713, "grad_norm": 0.01836716709017763, "learning_rate": 8.973120325849714e-06, "loss": 0.0009, "step": 87220 }, { "epoch": 0.5738702525608047, "grad_norm": 0.3375123560185118, "learning_rate": 8.972771755854728e-06, "loss": 0.0043, "step": 87230 }, { "epoch": 0.5739360407360381, "grad_norm": 0.040464676018696535, "learning_rate": 8.97242313348218e-06, "loss": 0.0011, "step": 87240 }, { "epoch": 0.5740018289112715, "grad_norm": 0.07363744522921087, "learning_rate": 8.972074458736662e-06, "loss": 0.0009, "step": 87250 }, { "epoch": 0.5740676170865049, "grad_norm": 0.04583833680061569, "learning_rate": 8.971725731622773e-06, "loss": 0.0028, "step": 87260 }, { "epoch": 0.5741334052617383, "grad_norm": 0.09159032955387213, "learning_rate": 8.97137695214511e-06, "loss": 0.0026, "step": 87270 }, { "epoch": 0.5741991934369717, "grad_norm": 0.14007273163677705, "learning_rate": 8.971028120308271e-06, "loss": 0.0026, "step": 87280 }, { "epoch": 0.5742649816122051, "grad_norm": 0.14859750678625205, "learning_rate": 8.970679236116858e-06, "loss": 0.0029, "step": 87290 }, { "epoch": 0.5743307697874384, "grad_norm": 0.10311345643901466, "learning_rate": 8.970330299575467e-06, "loss": 0.0026, "step": 87300 }, { "epoch": 0.5743965579626717, "grad_norm": 0.03183813358918576, "learning_rate": 8.9699813106887e-06, "loss": 0.0018, "step": 87310 }, { "epoch": 0.5744623461379051, "grad_norm": 0.048721234766906735, "learning_rate": 8.96963226946116e-06, "loss": 0.0026, "step": 87320 }, { "epoch": 0.5745281343131385, "grad_norm": 0.06124328079322069, "learning_rate": 8.969283175897444e-06, "loss": 0.0009, "step": 87330 }, { "epoch": 0.5745939224883719, "grad_norm": 0.2224178225257154, "learning_rate": 8.96893403000216e-06, "loss": 0.0018, "step": 87340 }, { "epoch": 0.5746597106636053, "grad_norm": 0.018998617475436522, "learning_rate": 8.968584831779907e-06, "loss": 0.0009, "step": 87350 }, { "epoch": 0.5747254988388387, "grad_norm": 0.09229205170154131, "learning_rate": 8.968235581235292e-06, "loss": 0.0014, "step": 87360 }, { "epoch": 0.5747912870140721, "grad_norm": 0.009434265054884861, "learning_rate": 8.967886278372917e-06, "loss": 0.0013, "step": 87370 }, { "epoch": 0.5748570751893055, "grad_norm": 0.11366951025297262, "learning_rate": 8.967536923197388e-06, "loss": 0.0022, "step": 87380 }, { "epoch": 0.5749228633645389, "grad_norm": 0.03014713577861462, "learning_rate": 8.967187515713312e-06, "loss": 0.0011, "step": 87390 }, { "epoch": 0.5749886515397722, "grad_norm": 0.06061439030463442, "learning_rate": 8.966838055925294e-06, "loss": 0.006, "step": 87400 }, { "epoch": 0.5750544397150056, "grad_norm": 0.011538441772313918, "learning_rate": 8.966488543837943e-06, "loss": 0.0018, "step": 87410 }, { "epoch": 0.575120227890239, "grad_norm": 0.015113977563281407, "learning_rate": 8.966138979455867e-06, "loss": 0.0022, "step": 87420 }, { "epoch": 0.5751860160654724, "grad_norm": 0.022825720021830073, "learning_rate": 8.965789362783672e-06, "loss": 0.002, "step": 87430 }, { "epoch": 0.5752518042407058, "grad_norm": 0.12022686110735518, "learning_rate": 8.96543969382597e-06, "loss": 0.0012, "step": 87440 }, { "epoch": 0.5753175924159392, "grad_norm": 0.06063103729678557, "learning_rate": 8.96508997258737e-06, "loss": 0.0016, "step": 87450 }, { "epoch": 0.5753833805911726, "grad_norm": 0.2826368715606634, "learning_rate": 8.964740199072485e-06, "loss": 0.0011, "step": 87460 }, { "epoch": 0.575449168766406, "grad_norm": 0.16038917108881748, "learning_rate": 8.964390373285923e-06, "loss": 0.0024, "step": 87470 }, { "epoch": 0.5755149569416393, "grad_norm": 0.002134200470857605, "learning_rate": 8.964040495232299e-06, "loss": 0.0015, "step": 87480 }, { "epoch": 0.5755807451168727, "grad_norm": 0.09318732429127914, "learning_rate": 8.963690564916223e-06, "loss": 0.0014, "step": 87490 }, { "epoch": 0.575646533292106, "grad_norm": 0.15194534614677568, "learning_rate": 8.963340582342313e-06, "loss": 0.0021, "step": 87500 }, { "epoch": 0.5757123214673394, "grad_norm": 0.039949437356106204, "learning_rate": 8.962990547515178e-06, "loss": 0.0012, "step": 87510 }, { "epoch": 0.5757781096425728, "grad_norm": 0.04607935488844359, "learning_rate": 8.962640460439435e-06, "loss": 0.0012, "step": 87520 }, { "epoch": 0.5758438978178062, "grad_norm": 0.01669942290948563, "learning_rate": 8.9622903211197e-06, "loss": 0.0011, "step": 87530 }, { "epoch": 0.5759096859930396, "grad_norm": 0.014657869892450028, "learning_rate": 8.961940129560592e-06, "loss": 0.0016, "step": 87540 }, { "epoch": 0.575975474168273, "grad_norm": 0.058781696483509886, "learning_rate": 8.961589885766721e-06, "loss": 0.0026, "step": 87550 }, { "epoch": 0.5760412623435064, "grad_norm": 0.06355274655881091, "learning_rate": 8.961239589742712e-06, "loss": 0.0016, "step": 87560 }, { "epoch": 0.5761070505187398, "grad_norm": 0.014322584790429544, "learning_rate": 8.960889241493178e-06, "loss": 0.0021, "step": 87570 }, { "epoch": 0.5761728386939732, "grad_norm": 0.1435350845437525, "learning_rate": 8.960538841022743e-06, "loss": 0.0019, "step": 87580 }, { "epoch": 0.5762386268692066, "grad_norm": 0.07410596041875875, "learning_rate": 8.960188388336022e-06, "loss": 0.0017, "step": 87590 }, { "epoch": 0.57630441504444, "grad_norm": 0.018813119559923003, "learning_rate": 8.959837883437637e-06, "loss": 0.0025, "step": 87600 }, { "epoch": 0.5763702032196732, "grad_norm": 0.19862921929919375, "learning_rate": 8.95948732633221e-06, "loss": 0.0036, "step": 87610 }, { "epoch": 0.5764359913949066, "grad_norm": 0.004263033715851734, "learning_rate": 8.959136717024361e-06, "loss": 0.0018, "step": 87620 }, { "epoch": 0.57650177957014, "grad_norm": 0.09349740030488786, "learning_rate": 8.958786055518715e-06, "loss": 0.0015, "step": 87630 }, { "epoch": 0.5765675677453734, "grad_norm": 0.037850571644422376, "learning_rate": 8.958435341819895e-06, "loss": 0.0015, "step": 87640 }, { "epoch": 0.5766333559206068, "grad_norm": 0.0850593302667323, "learning_rate": 8.958084575932522e-06, "loss": 0.002, "step": 87650 }, { "epoch": 0.5766991440958402, "grad_norm": 0.09969417181443137, "learning_rate": 8.957733757861223e-06, "loss": 0.0016, "step": 87660 }, { "epoch": 0.5767649322710736, "grad_norm": 0.2517180451075414, "learning_rate": 8.957382887610622e-06, "loss": 0.0018, "step": 87670 }, { "epoch": 0.576830720446307, "grad_norm": 0.0797464545089616, "learning_rate": 8.957031965185345e-06, "loss": 0.0013, "step": 87680 }, { "epoch": 0.5768965086215404, "grad_norm": 0.021032266643395872, "learning_rate": 8.956680990590021e-06, "loss": 0.0024, "step": 87690 }, { "epoch": 0.5769622967967738, "grad_norm": 0.004932738681256724, "learning_rate": 8.956329963829274e-06, "loss": 0.0009, "step": 87700 }, { "epoch": 0.5770280849720071, "grad_norm": 0.043134170812431004, "learning_rate": 8.955978884907733e-06, "loss": 0.0037, "step": 87710 }, { "epoch": 0.5770938731472405, "grad_norm": 0.0719473091809497, "learning_rate": 8.95562775383003e-06, "loss": 0.0015, "step": 87720 }, { "epoch": 0.5771596613224739, "grad_norm": 0.040433914103220996, "learning_rate": 8.955276570600787e-06, "loss": 0.0012, "step": 87730 }, { "epoch": 0.5772254494977073, "grad_norm": 0.058601182666034826, "learning_rate": 8.95492533522464e-06, "loss": 0.0023, "step": 87740 }, { "epoch": 0.5772912376729407, "grad_norm": 0.03053585009075746, "learning_rate": 8.95457404770622e-06, "loss": 0.0013, "step": 87750 }, { "epoch": 0.577357025848174, "grad_norm": 0.048440523044444815, "learning_rate": 8.954222708050155e-06, "loss": 0.0018, "step": 87760 }, { "epoch": 0.5774228140234074, "grad_norm": 0.0018327373437225453, "learning_rate": 8.953871316261077e-06, "loss": 0.0015, "step": 87770 }, { "epoch": 0.5774886021986408, "grad_norm": 0.060761068050878435, "learning_rate": 8.953519872343623e-06, "loss": 0.0017, "step": 87780 }, { "epoch": 0.5775543903738742, "grad_norm": 0.0564576182236436, "learning_rate": 8.953168376302424e-06, "loss": 0.002, "step": 87790 }, { "epoch": 0.5776201785491076, "grad_norm": 0.06434560418916936, "learning_rate": 8.952816828142112e-06, "loss": 0.002, "step": 87800 }, { "epoch": 0.5776859667243409, "grad_norm": 0.05248588161034524, "learning_rate": 8.952465227867327e-06, "loss": 0.0012, "step": 87810 }, { "epoch": 0.5777517548995743, "grad_norm": 0.060194398652783546, "learning_rate": 8.9521135754827e-06, "loss": 0.0031, "step": 87820 }, { "epoch": 0.5778175430748077, "grad_norm": 0.052908044431529834, "learning_rate": 8.951761870992868e-06, "loss": 0.0038, "step": 87830 }, { "epoch": 0.5778833312500411, "grad_norm": 0.10435545291904345, "learning_rate": 8.951410114402468e-06, "loss": 0.0015, "step": 87840 }, { "epoch": 0.5779491194252745, "grad_norm": 0.04665496431255806, "learning_rate": 8.95105830571614e-06, "loss": 0.0014, "step": 87850 }, { "epoch": 0.5780149076005079, "grad_norm": 0.018196053496006503, "learning_rate": 8.95070644493852e-06, "loss": 0.0013, "step": 87860 }, { "epoch": 0.5780806957757413, "grad_norm": 0.11167027942129296, "learning_rate": 8.950354532074248e-06, "loss": 0.0019, "step": 87870 }, { "epoch": 0.5781464839509747, "grad_norm": 0.11789014278115101, "learning_rate": 8.950002567127965e-06, "loss": 0.003, "step": 87880 }, { "epoch": 0.5782122721262081, "grad_norm": 0.014905384625127294, "learning_rate": 8.949650550104306e-06, "loss": 0.0045, "step": 87890 }, { "epoch": 0.5782780603014415, "grad_norm": 0.15716125958698346, "learning_rate": 8.949298481007917e-06, "loss": 0.0017, "step": 87900 }, { "epoch": 0.5783438484766747, "grad_norm": 0.07914672958369974, "learning_rate": 8.94894635984344e-06, "loss": 0.0016, "step": 87910 }, { "epoch": 0.5784096366519081, "grad_norm": 0.04716007311122893, "learning_rate": 8.948594186615513e-06, "loss": 0.0018, "step": 87920 }, { "epoch": 0.5784754248271415, "grad_norm": 0.018376011359661656, "learning_rate": 8.948241961328784e-06, "loss": 0.0011, "step": 87930 }, { "epoch": 0.5785412130023749, "grad_norm": 0.02455549448454669, "learning_rate": 8.947889683987893e-06, "loss": 0.0017, "step": 87940 }, { "epoch": 0.5786070011776083, "grad_norm": 0.024975035649053574, "learning_rate": 8.947537354597489e-06, "loss": 0.0015, "step": 87950 }, { "epoch": 0.5786727893528417, "grad_norm": 0.07270961790806797, "learning_rate": 8.94718497316221e-06, "loss": 0.0016, "step": 87960 }, { "epoch": 0.5787385775280751, "grad_norm": 0.0725062867564817, "learning_rate": 8.94683253968671e-06, "loss": 0.0069, "step": 87970 }, { "epoch": 0.5788043657033085, "grad_norm": 0.08955681702652278, "learning_rate": 8.946480054175631e-06, "loss": 0.0013, "step": 87980 }, { "epoch": 0.5788701538785419, "grad_norm": 0.2562278422613094, "learning_rate": 8.946127516633619e-06, "loss": 0.002, "step": 87990 }, { "epoch": 0.5789359420537753, "grad_norm": 0.005776299764932039, "learning_rate": 8.945774927065325e-06, "loss": 0.0025, "step": 88000 }, { "epoch": 0.5790017302290087, "grad_norm": 0.08871302475804231, "learning_rate": 8.945422285475397e-06, "loss": 0.0028, "step": 88010 }, { "epoch": 0.579067518404242, "grad_norm": 0.11849787576556328, "learning_rate": 8.945069591868482e-06, "loss": 0.0019, "step": 88020 }, { "epoch": 0.5791333065794754, "grad_norm": 0.1347383860426566, "learning_rate": 8.944716846249233e-06, "loss": 0.0017, "step": 88030 }, { "epoch": 0.5791990947547088, "grad_norm": 0.06026550556080508, "learning_rate": 8.944364048622298e-06, "loss": 0.0012, "step": 88040 }, { "epoch": 0.5792648829299422, "grad_norm": 0.0908764466204406, "learning_rate": 8.94401119899233e-06, "loss": 0.0022, "step": 88050 }, { "epoch": 0.5793306711051756, "grad_norm": 0.0682613082247057, "learning_rate": 8.943658297363981e-06, "loss": 0.0013, "step": 88060 }, { "epoch": 0.579396459280409, "grad_norm": 0.05434224326624053, "learning_rate": 8.943305343741904e-06, "loss": 0.001, "step": 88070 }, { "epoch": 0.5794622474556423, "grad_norm": 0.13681990040545974, "learning_rate": 8.942952338130753e-06, "loss": 0.0009, "step": 88080 }, { "epoch": 0.5795280356308757, "grad_norm": 0.08858176839815421, "learning_rate": 8.942599280535178e-06, "loss": 0.0018, "step": 88090 }, { "epoch": 0.5795938238061091, "grad_norm": 0.031936895356864434, "learning_rate": 8.942246170959836e-06, "loss": 0.0021, "step": 88100 }, { "epoch": 0.5796596119813425, "grad_norm": 0.09332442745481317, "learning_rate": 8.941893009409387e-06, "loss": 0.0023, "step": 88110 }, { "epoch": 0.5797254001565758, "grad_norm": 0.028890097862714435, "learning_rate": 8.941539795888479e-06, "loss": 0.0021, "step": 88120 }, { "epoch": 0.5797911883318092, "grad_norm": 0.041678981041372826, "learning_rate": 8.941186530401774e-06, "loss": 0.0021, "step": 88130 }, { "epoch": 0.5798569765070426, "grad_norm": 0.1414120265806596, "learning_rate": 8.94083321295393e-06, "loss": 0.0018, "step": 88140 }, { "epoch": 0.579922764682276, "grad_norm": 0.062452659916589603, "learning_rate": 8.940479843549602e-06, "loss": 0.0011, "step": 88150 }, { "epoch": 0.5799885528575094, "grad_norm": 0.07776147691935122, "learning_rate": 8.940126422193451e-06, "loss": 0.0011, "step": 88160 }, { "epoch": 0.5800543410327428, "grad_norm": 0.09726906747423748, "learning_rate": 8.939772948890135e-06, "loss": 0.0023, "step": 88170 }, { "epoch": 0.5801201292079762, "grad_norm": 0.0010237285501471661, "learning_rate": 8.939419423644316e-06, "loss": 0.0009, "step": 88180 }, { "epoch": 0.5801859173832096, "grad_norm": 0.048356085631321544, "learning_rate": 8.939065846460654e-06, "loss": 0.0017, "step": 88190 }, { "epoch": 0.580251705558443, "grad_norm": 0.10741707498653406, "learning_rate": 8.938712217343812e-06, "loss": 0.0017, "step": 88200 }, { "epoch": 0.5803174937336764, "grad_norm": 0.07723916128653516, "learning_rate": 8.93835853629845e-06, "loss": 0.0013, "step": 88210 }, { "epoch": 0.5803832819089096, "grad_norm": 0.2632983231372904, "learning_rate": 8.938004803329233e-06, "loss": 0.0029, "step": 88220 }, { "epoch": 0.580449070084143, "grad_norm": 0.07403643424246295, "learning_rate": 8.937651018440822e-06, "loss": 0.0012, "step": 88230 }, { "epoch": 0.5805148582593764, "grad_norm": 0.02592310828432788, "learning_rate": 8.937297181637886e-06, "loss": 0.0017, "step": 88240 }, { "epoch": 0.5805806464346098, "grad_norm": 0.09492022114191942, "learning_rate": 8.936943292925084e-06, "loss": 0.0008, "step": 88250 }, { "epoch": 0.5806464346098432, "grad_norm": 0.044522498196360505, "learning_rate": 8.936589352307088e-06, "loss": 0.0014, "step": 88260 }, { "epoch": 0.5807122227850766, "grad_norm": 0.6348885482272688, "learning_rate": 8.936235359788559e-06, "loss": 0.0017, "step": 88270 }, { "epoch": 0.58077801096031, "grad_norm": 0.06209825410048625, "learning_rate": 8.935881315374166e-06, "loss": 0.0018, "step": 88280 }, { "epoch": 0.5808437991355434, "grad_norm": 0.05244476034238095, "learning_rate": 8.935527219068578e-06, "loss": 0.0012, "step": 88290 }, { "epoch": 0.5809095873107768, "grad_norm": 0.10120640682523155, "learning_rate": 8.935173070876464e-06, "loss": 0.0017, "step": 88300 }, { "epoch": 0.5809753754860102, "grad_norm": 0.01870468204864613, "learning_rate": 8.93481887080249e-06, "loss": 0.0018, "step": 88310 }, { "epoch": 0.5810411636612435, "grad_norm": 0.17795914974022956, "learning_rate": 8.934464618851327e-06, "loss": 0.0024, "step": 88320 }, { "epoch": 0.5811069518364769, "grad_norm": 0.0426149443724034, "learning_rate": 8.934110315027647e-06, "loss": 0.0061, "step": 88330 }, { "epoch": 0.5811727400117103, "grad_norm": 0.029631465932442296, "learning_rate": 8.93375595933612e-06, "loss": 0.0017, "step": 88340 }, { "epoch": 0.5812385281869437, "grad_norm": 0.09520842183263954, "learning_rate": 8.933401551781418e-06, "loss": 0.0018, "step": 88350 }, { "epoch": 0.581304316362177, "grad_norm": 0.10169500033652362, "learning_rate": 8.933047092368214e-06, "loss": 0.0012, "step": 88360 }, { "epoch": 0.5813701045374104, "grad_norm": 0.05659088316319161, "learning_rate": 8.932692581101182e-06, "loss": 0.0014, "step": 88370 }, { "epoch": 0.5814358927126438, "grad_norm": 0.11214246741174713, "learning_rate": 8.932338017984991e-06, "loss": 0.0006, "step": 88380 }, { "epoch": 0.5815016808878772, "grad_norm": 0.07925354040005199, "learning_rate": 8.931983403024323e-06, "loss": 0.0014, "step": 88390 }, { "epoch": 0.5815674690631106, "grad_norm": 0.019125613753127144, "learning_rate": 8.931628736223849e-06, "loss": 0.0017, "step": 88400 }, { "epoch": 0.581633257238344, "grad_norm": 0.016429801806472515, "learning_rate": 8.931274017588245e-06, "loss": 0.0014, "step": 88410 }, { "epoch": 0.5816990454135773, "grad_norm": 0.05111693217890731, "learning_rate": 8.930919247122189e-06, "loss": 0.0017, "step": 88420 }, { "epoch": 0.5817648335888107, "grad_norm": 0.08557596512467755, "learning_rate": 8.930564424830358e-06, "loss": 0.0029, "step": 88430 }, { "epoch": 0.5818306217640441, "grad_norm": 0.13232424789290956, "learning_rate": 8.930209550717428e-06, "loss": 0.0021, "step": 88440 }, { "epoch": 0.5818964099392775, "grad_norm": 0.013129297910178625, "learning_rate": 8.92985462478808e-06, "loss": 0.0014, "step": 88450 }, { "epoch": 0.5819621981145109, "grad_norm": 0.13305768916396798, "learning_rate": 8.929499647046993e-06, "loss": 0.0014, "step": 88460 }, { "epoch": 0.5820279862897443, "grad_norm": 0.06287721942923817, "learning_rate": 8.929144617498848e-06, "loss": 0.0017, "step": 88470 }, { "epoch": 0.5820937744649777, "grad_norm": 0.043327073743467184, "learning_rate": 8.928789536148323e-06, "loss": 0.0018, "step": 88480 }, { "epoch": 0.5821595626402111, "grad_norm": 0.13410251527187642, "learning_rate": 8.928434403000103e-06, "loss": 0.0021, "step": 88490 }, { "epoch": 0.5822253508154445, "grad_norm": 0.033424353351092576, "learning_rate": 8.928079218058867e-06, "loss": 0.0017, "step": 88500 }, { "epoch": 0.5822911389906779, "grad_norm": 0.10055778264088121, "learning_rate": 8.9277239813293e-06, "loss": 0.0022, "step": 88510 }, { "epoch": 0.5823569271659113, "grad_norm": 0.06768326838230652, "learning_rate": 8.927368692816082e-06, "loss": 0.0021, "step": 88520 }, { "epoch": 0.5824227153411445, "grad_norm": 0.055619413515731166, "learning_rate": 8.927013352523901e-06, "loss": 0.0033, "step": 88530 }, { "epoch": 0.5824885035163779, "grad_norm": 0.06642839142743405, "learning_rate": 8.926657960457441e-06, "loss": 0.0013, "step": 88540 }, { "epoch": 0.5825542916916113, "grad_norm": 0.11034578201588097, "learning_rate": 8.926302516621388e-06, "loss": 0.0015, "step": 88550 }, { "epoch": 0.5826200798668447, "grad_norm": 0.034582745205153934, "learning_rate": 8.925947021020426e-06, "loss": 0.0033, "step": 88560 }, { "epoch": 0.5826858680420781, "grad_norm": 0.11803207270941263, "learning_rate": 8.925591473659244e-06, "loss": 0.0041, "step": 88570 }, { "epoch": 0.5827516562173115, "grad_norm": 0.03260203134519867, "learning_rate": 8.925235874542527e-06, "loss": 0.0015, "step": 88580 }, { "epoch": 0.5828174443925449, "grad_norm": 0.05269129560823881, "learning_rate": 8.924880223674967e-06, "loss": 0.0023, "step": 88590 }, { "epoch": 0.5828832325677783, "grad_norm": 0.23295612216815947, "learning_rate": 8.92452452106125e-06, "loss": 0.0083, "step": 88600 }, { "epoch": 0.5829490207430117, "grad_norm": 0.022411225251006146, "learning_rate": 8.924168766706068e-06, "loss": 0.0039, "step": 88610 }, { "epoch": 0.5830148089182451, "grad_norm": 0.03695452518882334, "learning_rate": 8.923812960614108e-06, "loss": 0.0012, "step": 88620 }, { "epoch": 0.5830805970934784, "grad_norm": 0.019566152179561647, "learning_rate": 8.923457102790063e-06, "loss": 0.001, "step": 88630 }, { "epoch": 0.5831463852687118, "grad_norm": 0.06945393335420913, "learning_rate": 8.923101193238626e-06, "loss": 0.0017, "step": 88640 }, { "epoch": 0.5832121734439452, "grad_norm": 0.06155624256175486, "learning_rate": 8.922745231964487e-06, "loss": 0.0034, "step": 88650 }, { "epoch": 0.5832779616191786, "grad_norm": 0.04180514451825158, "learning_rate": 8.922389218972341e-06, "loss": 0.0027, "step": 88660 }, { "epoch": 0.583343749794412, "grad_norm": 0.16241537896560426, "learning_rate": 8.92203315426688e-06, "loss": 0.0016, "step": 88670 }, { "epoch": 0.5834095379696453, "grad_norm": 0.044320122769994696, "learning_rate": 8.921677037852798e-06, "loss": 0.0015, "step": 88680 }, { "epoch": 0.5834753261448787, "grad_norm": 0.051682957161585344, "learning_rate": 8.921320869734792e-06, "loss": 0.003, "step": 88690 }, { "epoch": 0.5835411143201121, "grad_norm": 0.02852204735515762, "learning_rate": 8.920964649917558e-06, "loss": 0.002, "step": 88700 }, { "epoch": 0.5836069024953455, "grad_norm": 0.05886707270970665, "learning_rate": 8.920608378405792e-06, "loss": 0.0021, "step": 88710 }, { "epoch": 0.5836726906705789, "grad_norm": 0.019832068906709216, "learning_rate": 8.92025205520419e-06, "loss": 0.0029, "step": 88720 }, { "epoch": 0.5837384788458122, "grad_norm": 0.03404503956265, "learning_rate": 8.91989568031745e-06, "loss": 0.0009, "step": 88730 }, { "epoch": 0.5838042670210456, "grad_norm": 0.0659247888705279, "learning_rate": 8.919539253750271e-06, "loss": 0.0018, "step": 88740 }, { "epoch": 0.583870055196279, "grad_norm": 0.05617117173389785, "learning_rate": 8.91918277550735e-06, "loss": 0.0015, "step": 88750 }, { "epoch": 0.5839358433715124, "grad_norm": 0.013520137070170576, "learning_rate": 8.918826245593393e-06, "loss": 0.0032, "step": 88760 }, { "epoch": 0.5840016315467458, "grad_norm": 0.09047011823541895, "learning_rate": 8.918469664013095e-06, "loss": 0.002, "step": 88770 }, { "epoch": 0.5840674197219792, "grad_norm": 0.18223447197765147, "learning_rate": 8.918113030771158e-06, "loss": 0.0026, "step": 88780 }, { "epoch": 0.5841332078972126, "grad_norm": 0.006807966862853973, "learning_rate": 8.917756345872285e-06, "loss": 0.0021, "step": 88790 }, { "epoch": 0.584198996072446, "grad_norm": 0.10762243582477629, "learning_rate": 8.917399609321176e-06, "loss": 0.0027, "step": 88800 }, { "epoch": 0.5842647842476794, "grad_norm": 0.07142312118515504, "learning_rate": 8.917042821122539e-06, "loss": 0.0014, "step": 88810 }, { "epoch": 0.5843305724229128, "grad_norm": 0.032622278320602334, "learning_rate": 8.916685981281074e-06, "loss": 0.0043, "step": 88820 }, { "epoch": 0.584396360598146, "grad_norm": 0.2208502549576518, "learning_rate": 8.916329089801487e-06, "loss": 0.0031, "step": 88830 }, { "epoch": 0.5844621487733794, "grad_norm": 0.04765829101666393, "learning_rate": 8.915972146688486e-06, "loss": 0.0023, "step": 88840 }, { "epoch": 0.5845279369486128, "grad_norm": 0.3188596421462302, "learning_rate": 8.91561515194677e-06, "loss": 0.0012, "step": 88850 }, { "epoch": 0.5845937251238462, "grad_norm": 0.07451015103594545, "learning_rate": 8.915258105581054e-06, "loss": 0.002, "step": 88860 }, { "epoch": 0.5846595132990796, "grad_norm": 0.08067767105169694, "learning_rate": 8.91490100759604e-06, "loss": 0.0008, "step": 88870 }, { "epoch": 0.584725301474313, "grad_norm": 0.04442103884719448, "learning_rate": 8.914543857996435e-06, "loss": 0.0015, "step": 88880 }, { "epoch": 0.5847910896495464, "grad_norm": 0.06033430059620866, "learning_rate": 8.914186656786952e-06, "loss": 0.0025, "step": 88890 }, { "epoch": 0.5848568778247798, "grad_norm": 0.07078080954502819, "learning_rate": 8.9138294039723e-06, "loss": 0.0011, "step": 88900 }, { "epoch": 0.5849226660000132, "grad_norm": 0.26304804084072086, "learning_rate": 8.913472099557185e-06, "loss": 0.0018, "step": 88910 }, { "epoch": 0.5849884541752466, "grad_norm": 0.08316518189690064, "learning_rate": 8.91311474354632e-06, "loss": 0.0017, "step": 88920 }, { "epoch": 0.5850542423504799, "grad_norm": 0.013521620878404321, "learning_rate": 8.91275733594442e-06, "loss": 0.0024, "step": 88930 }, { "epoch": 0.5851200305257133, "grad_norm": 0.02735472586404547, "learning_rate": 8.91239987675619e-06, "loss": 0.0012, "step": 88940 }, { "epoch": 0.5851858187009467, "grad_norm": 0.23196356034666402, "learning_rate": 8.912042365986348e-06, "loss": 0.0008, "step": 88950 }, { "epoch": 0.58525160687618, "grad_norm": 0.20921581407676235, "learning_rate": 8.911684803639608e-06, "loss": 0.0034, "step": 88960 }, { "epoch": 0.5853173950514134, "grad_norm": 0.5701954619483641, "learning_rate": 8.91132718972068e-06, "loss": 0.0015, "step": 88970 }, { "epoch": 0.5853831832266468, "grad_norm": 0.04900756468152941, "learning_rate": 8.910969524234282e-06, "loss": 0.002, "step": 88980 }, { "epoch": 0.5854489714018802, "grad_norm": 0.03174258353081803, "learning_rate": 8.910611807185128e-06, "loss": 0.0012, "step": 88990 }, { "epoch": 0.5855147595771136, "grad_norm": 0.026754281011107984, "learning_rate": 8.910254038577935e-06, "loss": 0.0016, "step": 89000 }, { "epoch": 0.585580547752347, "grad_norm": 0.017673591250475858, "learning_rate": 8.90989621841742e-06, "loss": 0.0029, "step": 89010 }, { "epoch": 0.5856463359275804, "grad_norm": 0.007577017324693394, "learning_rate": 8.909538346708299e-06, "loss": 0.0013, "step": 89020 }, { "epoch": 0.5857121241028138, "grad_norm": 0.14245260422185996, "learning_rate": 8.909180423455292e-06, "loss": 0.0024, "step": 89030 }, { "epoch": 0.5857779122780471, "grad_norm": 0.03992818092638973, "learning_rate": 8.908822448663117e-06, "loss": 0.0015, "step": 89040 }, { "epoch": 0.5858437004532805, "grad_norm": 0.057312806523581145, "learning_rate": 8.908464422336495e-06, "loss": 0.0028, "step": 89050 }, { "epoch": 0.5859094886285139, "grad_norm": 0.10564634717141207, "learning_rate": 8.908106344480144e-06, "loss": 0.0034, "step": 89060 }, { "epoch": 0.5859752768037473, "grad_norm": 0.1044950547371466, "learning_rate": 8.907748215098786e-06, "loss": 0.0023, "step": 89070 }, { "epoch": 0.5860410649789807, "grad_norm": 0.11745284966793111, "learning_rate": 8.907390034197142e-06, "loss": 0.002, "step": 89080 }, { "epoch": 0.5861068531542141, "grad_norm": 0.15323420195841544, "learning_rate": 8.907031801779939e-06, "loss": 0.0026, "step": 89090 }, { "epoch": 0.5861726413294475, "grad_norm": 0.06516194530583956, "learning_rate": 8.906673517851893e-06, "loss": 0.0014, "step": 89100 }, { "epoch": 0.5862384295046809, "grad_norm": 0.0536503557495156, "learning_rate": 8.906315182417731e-06, "loss": 0.0022, "step": 89110 }, { "epoch": 0.5863042176799143, "grad_norm": 0.022520490375290463, "learning_rate": 8.905956795482177e-06, "loss": 0.0016, "step": 89120 }, { "epoch": 0.5863700058551476, "grad_norm": 0.07100010612746062, "learning_rate": 8.905598357049955e-06, "loss": 0.0009, "step": 89130 }, { "epoch": 0.5864357940303809, "grad_norm": 0.03987580018335449, "learning_rate": 8.905239867125794e-06, "loss": 0.0035, "step": 89140 }, { "epoch": 0.5865015822056143, "grad_norm": 0.041031149446340544, "learning_rate": 8.904881325714416e-06, "loss": 0.0016, "step": 89150 }, { "epoch": 0.5865673703808477, "grad_norm": 0.08526187774366928, "learning_rate": 8.904522732820554e-06, "loss": 0.0014, "step": 89160 }, { "epoch": 0.5866331585560811, "grad_norm": 0.15076588262336243, "learning_rate": 8.904164088448927e-06, "loss": 0.0016, "step": 89170 }, { "epoch": 0.5866989467313145, "grad_norm": 0.06821826996960627, "learning_rate": 8.90380539260427e-06, "loss": 0.0015, "step": 89180 }, { "epoch": 0.5867647349065479, "grad_norm": 0.04501353597854564, "learning_rate": 8.903446645291311e-06, "loss": 0.0007, "step": 89190 }, { "epoch": 0.5868305230817813, "grad_norm": 0.09884923150388097, "learning_rate": 8.903087846514779e-06, "loss": 0.002, "step": 89200 }, { "epoch": 0.5868963112570147, "grad_norm": 0.025946552914703727, "learning_rate": 8.902728996279406e-06, "loss": 0.0019, "step": 89210 }, { "epoch": 0.5869620994322481, "grad_norm": 0.017597733003412715, "learning_rate": 8.90237009458992e-06, "loss": 0.002, "step": 89220 }, { "epoch": 0.5870278876074815, "grad_norm": 0.07423028690491093, "learning_rate": 8.902011141451055e-06, "loss": 0.0015, "step": 89230 }, { "epoch": 0.5870936757827148, "grad_norm": 0.021941227083651524, "learning_rate": 8.901652136867545e-06, "loss": 0.0019, "step": 89240 }, { "epoch": 0.5871594639579482, "grad_norm": 0.06494704904612075, "learning_rate": 8.90129308084412e-06, "loss": 0.0013, "step": 89250 }, { "epoch": 0.5872252521331816, "grad_norm": 0.08306675236623545, "learning_rate": 8.900933973385514e-06, "loss": 0.0021, "step": 89260 }, { "epoch": 0.587291040308415, "grad_norm": 0.011863573759848884, "learning_rate": 8.900574814496463e-06, "loss": 0.0005, "step": 89270 }, { "epoch": 0.5873568284836483, "grad_norm": 0.0068373168241156156, "learning_rate": 8.900215604181702e-06, "loss": 0.0013, "step": 89280 }, { "epoch": 0.5874226166588817, "grad_norm": 0.03272228629139394, "learning_rate": 8.899856342445967e-06, "loss": 0.0012, "step": 89290 }, { "epoch": 0.5874884048341151, "grad_norm": 0.03511971202743044, "learning_rate": 8.899497029293994e-06, "loss": 0.0024, "step": 89300 }, { "epoch": 0.5875541930093485, "grad_norm": 0.05075356650175283, "learning_rate": 8.899137664730522e-06, "loss": 0.0012, "step": 89310 }, { "epoch": 0.5876199811845819, "grad_norm": 0.06336543822500605, "learning_rate": 8.898778248760285e-06, "loss": 0.0014, "step": 89320 }, { "epoch": 0.5876857693598153, "grad_norm": 0.1605274662129067, "learning_rate": 8.898418781388025e-06, "loss": 0.002, "step": 89330 }, { "epoch": 0.5877515575350486, "grad_norm": 0.013828948738764222, "learning_rate": 8.89805926261848e-06, "loss": 0.0009, "step": 89340 }, { "epoch": 0.587817345710282, "grad_norm": 0.18328650238790178, "learning_rate": 8.89769969245639e-06, "loss": 0.0026, "step": 89350 }, { "epoch": 0.5878831338855154, "grad_norm": 0.0054072153828189275, "learning_rate": 8.897340070906497e-06, "loss": 0.0027, "step": 89360 }, { "epoch": 0.5879489220607488, "grad_norm": 0.2734925738879754, "learning_rate": 8.89698039797354e-06, "loss": 0.0017, "step": 89370 }, { "epoch": 0.5880147102359822, "grad_norm": 0.034478053300323414, "learning_rate": 8.896620673662264e-06, "loss": 0.0015, "step": 89380 }, { "epoch": 0.5880804984112156, "grad_norm": 0.013860494619907472, "learning_rate": 8.896260897977407e-06, "loss": 0.0015, "step": 89390 }, { "epoch": 0.588146286586449, "grad_norm": 0.0162035727158316, "learning_rate": 8.895901070923717e-06, "loss": 0.0022, "step": 89400 }, { "epoch": 0.5882120747616824, "grad_norm": 0.09448885088790285, "learning_rate": 8.895541192505937e-06, "loss": 0.0018, "step": 89410 }, { "epoch": 0.5882778629369158, "grad_norm": 0.01062243013983482, "learning_rate": 8.895181262728807e-06, "loss": 0.0038, "step": 89420 }, { "epoch": 0.5883436511121491, "grad_norm": 0.011817581622457357, "learning_rate": 8.89482128159708e-06, "loss": 0.0018, "step": 89430 }, { "epoch": 0.5884094392873825, "grad_norm": 0.08669197162252923, "learning_rate": 8.894461249115496e-06, "loss": 0.0031, "step": 89440 }, { "epoch": 0.5884752274626158, "grad_norm": 0.09653268338170762, "learning_rate": 8.894101165288804e-06, "loss": 0.0021, "step": 89450 }, { "epoch": 0.5885410156378492, "grad_norm": 0.0663032612702215, "learning_rate": 8.893741030121752e-06, "loss": 0.0015, "step": 89460 }, { "epoch": 0.5886068038130826, "grad_norm": 0.03305391464248971, "learning_rate": 8.893380843619088e-06, "loss": 0.0019, "step": 89470 }, { "epoch": 0.588672591988316, "grad_norm": 0.03563543706941251, "learning_rate": 8.893020605785559e-06, "loss": 0.0014, "step": 89480 }, { "epoch": 0.5887383801635494, "grad_norm": 0.025630168114456574, "learning_rate": 8.892660316625916e-06, "loss": 0.0016, "step": 89490 }, { "epoch": 0.5888041683387828, "grad_norm": 0.043827636436277455, "learning_rate": 8.892299976144909e-06, "loss": 0.0024, "step": 89500 }, { "epoch": 0.5888699565140162, "grad_norm": 0.10566478059427875, "learning_rate": 8.891939584347289e-06, "loss": 0.0026, "step": 89510 }, { "epoch": 0.5889357446892496, "grad_norm": 0.009791041374329882, "learning_rate": 8.891579141237805e-06, "loss": 0.0025, "step": 89520 }, { "epoch": 0.589001532864483, "grad_norm": 0.11555017386722247, "learning_rate": 8.891218646821211e-06, "loss": 0.0038, "step": 89530 }, { "epoch": 0.5890673210397164, "grad_norm": 0.016468634353155737, "learning_rate": 8.89085810110226e-06, "loss": 0.0007, "step": 89540 }, { "epoch": 0.5891331092149497, "grad_norm": 0.028823580353911877, "learning_rate": 8.890497504085706e-06, "loss": 0.0016, "step": 89550 }, { "epoch": 0.589198897390183, "grad_norm": 0.035900009628917054, "learning_rate": 8.890136855776304e-06, "loss": 0.0019, "step": 89560 }, { "epoch": 0.5892646855654164, "grad_norm": 0.03148147206841473, "learning_rate": 8.889776156178805e-06, "loss": 0.001, "step": 89570 }, { "epoch": 0.5893304737406498, "grad_norm": 0.050513063941885726, "learning_rate": 8.889415405297968e-06, "loss": 0.0013, "step": 89580 }, { "epoch": 0.5893962619158832, "grad_norm": 0.028506997927745817, "learning_rate": 8.889054603138547e-06, "loss": 0.0013, "step": 89590 }, { "epoch": 0.5894620500911166, "grad_norm": 0.0074387959678231495, "learning_rate": 8.8886937497053e-06, "loss": 0.0024, "step": 89600 }, { "epoch": 0.58952783826635, "grad_norm": 0.09862749029743582, "learning_rate": 8.888332845002985e-06, "loss": 0.0018, "step": 89610 }, { "epoch": 0.5895936264415834, "grad_norm": 0.06853484682855732, "learning_rate": 8.88797188903636e-06, "loss": 0.0023, "step": 89620 }, { "epoch": 0.5896594146168168, "grad_norm": 0.01657693988566404, "learning_rate": 8.887610881810182e-06, "loss": 0.0016, "step": 89630 }, { "epoch": 0.5897252027920502, "grad_norm": 0.0231438442033608, "learning_rate": 8.887249823329214e-06, "loss": 0.001, "step": 89640 }, { "epoch": 0.5897909909672835, "grad_norm": 0.01135754071978912, "learning_rate": 8.886888713598214e-06, "loss": 0.0013, "step": 89650 }, { "epoch": 0.5898567791425169, "grad_norm": 0.02448055236645589, "learning_rate": 8.886527552621941e-06, "loss": 0.002, "step": 89660 }, { "epoch": 0.5899225673177503, "grad_norm": 0.04611806614637625, "learning_rate": 8.886166340405162e-06, "loss": 0.0018, "step": 89670 }, { "epoch": 0.5899883554929837, "grad_norm": 0.10419339508181139, "learning_rate": 8.885805076952634e-06, "loss": 0.0022, "step": 89680 }, { "epoch": 0.5900541436682171, "grad_norm": 0.02468763126139495, "learning_rate": 8.885443762269123e-06, "loss": 0.0014, "step": 89690 }, { "epoch": 0.5901199318434505, "grad_norm": 0.10096339610318607, "learning_rate": 8.885082396359391e-06, "loss": 0.0025, "step": 89700 }, { "epoch": 0.5901857200186839, "grad_norm": 0.7808072564160647, "learning_rate": 8.884720979228203e-06, "loss": 0.0025, "step": 89710 }, { "epoch": 0.5902515081939173, "grad_norm": 0.029560836970120158, "learning_rate": 8.884359510880325e-06, "loss": 0.0012, "step": 89720 }, { "epoch": 0.5903172963691506, "grad_norm": 0.2333053073035379, "learning_rate": 8.88399799132052e-06, "loss": 0.0021, "step": 89730 }, { "epoch": 0.590383084544384, "grad_norm": 0.00964670539048662, "learning_rate": 8.883636420553558e-06, "loss": 0.0018, "step": 89740 }, { "epoch": 0.5904488727196173, "grad_norm": 0.04342401028338094, "learning_rate": 8.883274798584203e-06, "loss": 0.0011, "step": 89750 }, { "epoch": 0.5905146608948507, "grad_norm": 0.07345689527791761, "learning_rate": 8.882913125417222e-06, "loss": 0.0013, "step": 89760 }, { "epoch": 0.5905804490700841, "grad_norm": 0.07097182664765082, "learning_rate": 8.882551401057386e-06, "loss": 0.0024, "step": 89770 }, { "epoch": 0.5906462372453175, "grad_norm": 0.02396638180557029, "learning_rate": 8.882189625509463e-06, "loss": 0.0014, "step": 89780 }, { "epoch": 0.5907120254205509, "grad_norm": 0.017568545375728228, "learning_rate": 8.881827798778225e-06, "loss": 0.0009, "step": 89790 }, { "epoch": 0.5907778135957843, "grad_norm": 0.03369621950136205, "learning_rate": 8.881465920868437e-06, "loss": 0.0035, "step": 89800 }, { "epoch": 0.5908436017710177, "grad_norm": 0.0017569175753288275, "learning_rate": 8.881103991784874e-06, "loss": 0.0012, "step": 89810 }, { "epoch": 0.5909093899462511, "grad_norm": 0.03912145085640021, "learning_rate": 8.880742011532306e-06, "loss": 0.0014, "step": 89820 }, { "epoch": 0.5909751781214845, "grad_norm": 0.04913037196934326, "learning_rate": 8.880379980115508e-06, "loss": 0.0011, "step": 89830 }, { "epoch": 0.5910409662967179, "grad_norm": 0.02332430334125259, "learning_rate": 8.88001789753925e-06, "loss": 0.0013, "step": 89840 }, { "epoch": 0.5911067544719512, "grad_norm": 0.06265572052868966, "learning_rate": 8.879655763808309e-06, "loss": 0.0029, "step": 89850 }, { "epoch": 0.5911725426471846, "grad_norm": 0.059522511322679786, "learning_rate": 8.879293578927455e-06, "loss": 0.0013, "step": 89860 }, { "epoch": 0.591238330822418, "grad_norm": 0.1971392741889432, "learning_rate": 8.878931342901465e-06, "loss": 0.0024, "step": 89870 }, { "epoch": 0.5913041189976513, "grad_norm": 0.025620918197425593, "learning_rate": 8.878569055735117e-06, "loss": 0.0015, "step": 89880 }, { "epoch": 0.5913699071728847, "grad_norm": 0.10493050519131941, "learning_rate": 8.878206717433185e-06, "loss": 0.0027, "step": 89890 }, { "epoch": 0.5914356953481181, "grad_norm": 0.4027192178230908, "learning_rate": 8.877844328000448e-06, "loss": 0.0025, "step": 89900 }, { "epoch": 0.5915014835233515, "grad_norm": 0.10393525876555985, "learning_rate": 8.877481887441681e-06, "loss": 0.0007, "step": 89910 }, { "epoch": 0.5915672716985849, "grad_norm": 0.003750827089595303, "learning_rate": 8.877119395761666e-06, "loss": 0.0023, "step": 89920 }, { "epoch": 0.5916330598738183, "grad_norm": 0.02007301368472531, "learning_rate": 8.876756852965179e-06, "loss": 0.0013, "step": 89930 }, { "epoch": 0.5916988480490517, "grad_norm": 0.05853958644742775, "learning_rate": 8.876394259057003e-06, "loss": 0.0021, "step": 89940 }, { "epoch": 0.5917646362242851, "grad_norm": 0.2260583900816392, "learning_rate": 8.876031614041913e-06, "loss": 0.0014, "step": 89950 }, { "epoch": 0.5918304243995184, "grad_norm": 0.05597354373057565, "learning_rate": 8.875668917924695e-06, "loss": 0.0013, "step": 89960 }, { "epoch": 0.5918962125747518, "grad_norm": 0.0562790872316986, "learning_rate": 8.875306170710132e-06, "loss": 0.0029, "step": 89970 }, { "epoch": 0.5919620007499852, "grad_norm": 0.05182713913612503, "learning_rate": 8.874943372403002e-06, "loss": 0.0012, "step": 89980 }, { "epoch": 0.5920277889252186, "grad_norm": 0.03884254924888952, "learning_rate": 8.87458052300809e-06, "loss": 0.0012, "step": 89990 }, { "epoch": 0.592093577100452, "grad_norm": 0.12172592364511901, "learning_rate": 8.874217622530182e-06, "loss": 0.0019, "step": 90000 }, { "epoch": 0.5921593652756854, "grad_norm": 0.0451370009539883, "learning_rate": 8.873854670974058e-06, "loss": 0.0036, "step": 90010 }, { "epoch": 0.5922251534509188, "grad_norm": 0.0889018557599292, "learning_rate": 8.873491668344507e-06, "loss": 0.0014, "step": 90020 }, { "epoch": 0.5922909416261521, "grad_norm": 0.04451559222464453, "learning_rate": 8.873128614646314e-06, "loss": 0.0016, "step": 90030 }, { "epoch": 0.5923567298013855, "grad_norm": 0.03814580296013155, "learning_rate": 8.872765509884264e-06, "loss": 0.0013, "step": 90040 }, { "epoch": 0.5924225179766189, "grad_norm": 0.15717839149043666, "learning_rate": 8.872402354063147e-06, "loss": 0.0042, "step": 90050 }, { "epoch": 0.5924883061518522, "grad_norm": 0.05700307812499898, "learning_rate": 8.87203914718775e-06, "loss": 0.0021, "step": 90060 }, { "epoch": 0.5925540943270856, "grad_norm": 0.09665907711138941, "learning_rate": 8.87167588926286e-06, "loss": 0.0014, "step": 90070 }, { "epoch": 0.592619882502319, "grad_norm": 0.02260469369938893, "learning_rate": 8.871312580293267e-06, "loss": 0.0011, "step": 90080 }, { "epoch": 0.5926856706775524, "grad_norm": 0.05068615541069927, "learning_rate": 8.870949220283761e-06, "loss": 0.0032, "step": 90090 }, { "epoch": 0.5927514588527858, "grad_norm": 0.06131622750322075, "learning_rate": 8.870585809239133e-06, "loss": 0.0021, "step": 90100 }, { "epoch": 0.5928172470280192, "grad_norm": 0.1471638627918429, "learning_rate": 8.870222347164175e-06, "loss": 0.0025, "step": 90110 }, { "epoch": 0.5928830352032526, "grad_norm": 0.04313902693204869, "learning_rate": 8.869858834063676e-06, "loss": 0.0009, "step": 90120 }, { "epoch": 0.592948823378486, "grad_norm": 0.04329705230680051, "learning_rate": 8.869495269942433e-06, "loss": 0.002, "step": 90130 }, { "epoch": 0.5930146115537194, "grad_norm": 0.14636363960448381, "learning_rate": 8.869131654805235e-06, "loss": 0.0015, "step": 90140 }, { "epoch": 0.5930803997289528, "grad_norm": 0.06545226267503189, "learning_rate": 8.868767988656877e-06, "loss": 0.0007, "step": 90150 }, { "epoch": 0.593146187904186, "grad_norm": 0.17728612216611297, "learning_rate": 8.868404271502156e-06, "loss": 0.0013, "step": 90160 }, { "epoch": 0.5932119760794194, "grad_norm": 0.17161146616444312, "learning_rate": 8.868040503345864e-06, "loss": 0.0021, "step": 90170 }, { "epoch": 0.5932777642546528, "grad_norm": 0.02099215587485191, "learning_rate": 8.867676684192799e-06, "loss": 0.0011, "step": 90180 }, { "epoch": 0.5933435524298862, "grad_norm": 0.20874026432971743, "learning_rate": 8.86731281404776e-06, "loss": 0.0021, "step": 90190 }, { "epoch": 0.5934093406051196, "grad_norm": 0.15231310981812535, "learning_rate": 8.866948892915537e-06, "loss": 0.0035, "step": 90200 }, { "epoch": 0.593475128780353, "grad_norm": 0.0244790496389659, "learning_rate": 8.866584920800935e-06, "loss": 0.0016, "step": 90210 }, { "epoch": 0.5935409169555864, "grad_norm": 0.0030693120210890735, "learning_rate": 8.866220897708751e-06, "loss": 0.0008, "step": 90220 }, { "epoch": 0.5936067051308198, "grad_norm": 0.06674639178024734, "learning_rate": 8.865856823643783e-06, "loss": 0.0014, "step": 90230 }, { "epoch": 0.5936724933060532, "grad_norm": 0.07586839494897767, "learning_rate": 8.865492698610832e-06, "loss": 0.0009, "step": 90240 }, { "epoch": 0.5937382814812866, "grad_norm": 0.06490798578238915, "learning_rate": 8.865128522614696e-06, "loss": 0.0012, "step": 90250 }, { "epoch": 0.5938040696565199, "grad_norm": 0.030879994811448742, "learning_rate": 8.86476429566018e-06, "loss": 0.0019, "step": 90260 }, { "epoch": 0.5938698578317533, "grad_norm": 0.03139254112674276, "learning_rate": 8.864400017752084e-06, "loss": 0.0017, "step": 90270 }, { "epoch": 0.5939356460069867, "grad_norm": 0.04215529025426994, "learning_rate": 8.864035688895212e-06, "loss": 0.0018, "step": 90280 }, { "epoch": 0.5940014341822201, "grad_norm": 0.06083789136652894, "learning_rate": 8.863671309094367e-06, "loss": 0.003, "step": 90290 }, { "epoch": 0.5940672223574535, "grad_norm": 0.01065425161324611, "learning_rate": 8.863306878354352e-06, "loss": 0.0019, "step": 90300 }, { "epoch": 0.5941330105326869, "grad_norm": 0.004267073757947819, "learning_rate": 8.862942396679974e-06, "loss": 0.0014, "step": 90310 }, { "epoch": 0.5941987987079203, "grad_norm": 0.04759117112084682, "learning_rate": 8.862577864076034e-06, "loss": 0.0025, "step": 90320 }, { "epoch": 0.5942645868831536, "grad_norm": 0.021694327824892567, "learning_rate": 8.862213280547343e-06, "loss": 0.0021, "step": 90330 }, { "epoch": 0.594330375058387, "grad_norm": 0.06356830993241006, "learning_rate": 8.861848646098704e-06, "loss": 0.0021, "step": 90340 }, { "epoch": 0.5943961632336204, "grad_norm": 0.05523570516282504, "learning_rate": 8.861483960734926e-06, "loss": 0.002, "step": 90350 }, { "epoch": 0.5944619514088538, "grad_norm": 0.03628362453543539, "learning_rate": 8.861119224460818e-06, "loss": 0.001, "step": 90360 }, { "epoch": 0.5945277395840871, "grad_norm": 0.010898418467969403, "learning_rate": 8.860754437281187e-06, "loss": 0.0009, "step": 90370 }, { "epoch": 0.5945935277593205, "grad_norm": 0.10206496837239544, "learning_rate": 8.860389599200845e-06, "loss": 0.0018, "step": 90380 }, { "epoch": 0.5946593159345539, "grad_norm": 0.20321277721399894, "learning_rate": 8.860024710224597e-06, "loss": 0.0022, "step": 90390 }, { "epoch": 0.5947251041097873, "grad_norm": 0.07594313587021599, "learning_rate": 8.859659770357259e-06, "loss": 0.0028, "step": 90400 }, { "epoch": 0.5947908922850207, "grad_norm": 0.0668354432268223, "learning_rate": 8.85929477960364e-06, "loss": 0.0032, "step": 90410 }, { "epoch": 0.5948566804602541, "grad_norm": 0.057196265986853435, "learning_rate": 8.858929737968553e-06, "loss": 0.0014, "step": 90420 }, { "epoch": 0.5949224686354875, "grad_norm": 0.024429325190032335, "learning_rate": 8.858564645456809e-06, "loss": 0.0012, "step": 90430 }, { "epoch": 0.5949882568107209, "grad_norm": 0.100214318339499, "learning_rate": 8.858199502073223e-06, "loss": 0.0013, "step": 90440 }, { "epoch": 0.5950540449859543, "grad_norm": 0.10170293984054236, "learning_rate": 8.857834307822608e-06, "loss": 0.0043, "step": 90450 }, { "epoch": 0.5951198331611877, "grad_norm": 0.10638246586542871, "learning_rate": 8.85746906270978e-06, "loss": 0.0058, "step": 90460 }, { "epoch": 0.595185621336421, "grad_norm": 0.07588452055111373, "learning_rate": 8.857103766739554e-06, "loss": 0.0013, "step": 90470 }, { "epoch": 0.5952514095116543, "grad_norm": 0.09296545628549807, "learning_rate": 8.856738419916748e-06, "loss": 0.0023, "step": 90480 }, { "epoch": 0.5953171976868877, "grad_norm": 0.05129387738067505, "learning_rate": 8.856373022246173e-06, "loss": 0.0013, "step": 90490 }, { "epoch": 0.5953829858621211, "grad_norm": 0.12081911782874902, "learning_rate": 8.856007573732655e-06, "loss": 0.0016, "step": 90500 }, { "epoch": 0.5954487740373545, "grad_norm": 0.051479436056603115, "learning_rate": 8.855642074381004e-06, "loss": 0.002, "step": 90510 }, { "epoch": 0.5955145622125879, "grad_norm": 0.04517929142421364, "learning_rate": 8.855276524196042e-06, "loss": 0.0027, "step": 90520 }, { "epoch": 0.5955803503878213, "grad_norm": 0.07249455083519733, "learning_rate": 8.85491092318259e-06, "loss": 0.0009, "step": 90530 }, { "epoch": 0.5956461385630547, "grad_norm": 0.04261557349779772, "learning_rate": 8.854545271345468e-06, "loss": 0.0019, "step": 90540 }, { "epoch": 0.5957119267382881, "grad_norm": 0.013670750222323995, "learning_rate": 8.854179568689494e-06, "loss": 0.0017, "step": 90550 }, { "epoch": 0.5957777149135215, "grad_norm": 0.13706676655533886, "learning_rate": 8.85381381521949e-06, "loss": 0.0037, "step": 90560 }, { "epoch": 0.5958435030887548, "grad_norm": 0.01561782217724283, "learning_rate": 8.85344801094028e-06, "loss": 0.0017, "step": 90570 }, { "epoch": 0.5959092912639882, "grad_norm": 0.2463960211970969, "learning_rate": 8.853082155856687e-06, "loss": 0.0016, "step": 90580 }, { "epoch": 0.5959750794392216, "grad_norm": 0.00011396055431156648, "learning_rate": 8.852716249973532e-06, "loss": 0.0016, "step": 90590 }, { "epoch": 0.596040867614455, "grad_norm": 0.07521026872486299, "learning_rate": 8.852350293295643e-06, "loss": 0.0019, "step": 90600 }, { "epoch": 0.5961066557896884, "grad_norm": 0.07670571040980574, "learning_rate": 8.85198428582784e-06, "loss": 0.0016, "step": 90610 }, { "epoch": 0.5961724439649217, "grad_norm": 0.06132940011742531, "learning_rate": 8.851618227574952e-06, "loss": 0.0019, "step": 90620 }, { "epoch": 0.5962382321401551, "grad_norm": 0.04949939236116249, "learning_rate": 8.851252118541805e-06, "loss": 0.0024, "step": 90630 }, { "epoch": 0.5963040203153885, "grad_norm": 0.0553888674670994, "learning_rate": 8.850885958733224e-06, "loss": 0.0019, "step": 90640 }, { "epoch": 0.5963698084906219, "grad_norm": 0.012930410128020278, "learning_rate": 8.850519748154037e-06, "loss": 0.0016, "step": 90650 }, { "epoch": 0.5964355966658553, "grad_norm": 0.01521289894913737, "learning_rate": 8.850153486809074e-06, "loss": 0.0016, "step": 90660 }, { "epoch": 0.5965013848410886, "grad_norm": 0.055069039442263636, "learning_rate": 8.849787174703162e-06, "loss": 0.0017, "step": 90670 }, { "epoch": 0.596567173016322, "grad_norm": 0.0811427446030108, "learning_rate": 8.84942081184113e-06, "loss": 0.0024, "step": 90680 }, { "epoch": 0.5966329611915554, "grad_norm": 0.057544282674294636, "learning_rate": 8.84905439822781e-06, "loss": 0.0005, "step": 90690 }, { "epoch": 0.5966987493667888, "grad_norm": 0.05570178830183729, "learning_rate": 8.848687933868032e-06, "loss": 0.0016, "step": 90700 }, { "epoch": 0.5967645375420222, "grad_norm": 0.09102844828418076, "learning_rate": 8.848321418766628e-06, "loss": 0.002, "step": 90710 }, { "epoch": 0.5968303257172556, "grad_norm": 0.0005392636713250765, "learning_rate": 8.847954852928429e-06, "loss": 0.0016, "step": 90720 }, { "epoch": 0.596896113892489, "grad_norm": 0.05239784432520485, "learning_rate": 8.84758823635827e-06, "loss": 0.001, "step": 90730 }, { "epoch": 0.5969619020677224, "grad_norm": 0.12360526984325496, "learning_rate": 8.84722156906098e-06, "loss": 0.0016, "step": 90740 }, { "epoch": 0.5970276902429558, "grad_norm": 0.028924551678179446, "learning_rate": 8.8468548510414e-06, "loss": 0.0015, "step": 90750 }, { "epoch": 0.5970934784181892, "grad_norm": 0.1174818288852287, "learning_rate": 8.846488082304358e-06, "loss": 0.0016, "step": 90760 }, { "epoch": 0.5971592665934224, "grad_norm": 0.017523535559747793, "learning_rate": 8.846121262854694e-06, "loss": 0.0009, "step": 90770 }, { "epoch": 0.5972250547686558, "grad_norm": 0.04057283939602031, "learning_rate": 8.845754392697243e-06, "loss": 0.0021, "step": 90780 }, { "epoch": 0.5972908429438892, "grad_norm": 0.06332404729939904, "learning_rate": 8.845387471836841e-06, "loss": 0.0019, "step": 90790 }, { "epoch": 0.5973566311191226, "grad_norm": 0.06190501899440406, "learning_rate": 8.845020500278329e-06, "loss": 0.002, "step": 90800 }, { "epoch": 0.597422419294356, "grad_norm": 0.020690352106808838, "learning_rate": 8.844653478026539e-06, "loss": 0.0012, "step": 90810 }, { "epoch": 0.5974882074695894, "grad_norm": 0.0211120590160728, "learning_rate": 8.844286405086315e-06, "loss": 0.0011, "step": 90820 }, { "epoch": 0.5975539956448228, "grad_norm": 0.05043389405441221, "learning_rate": 8.843919281462494e-06, "loss": 0.0025, "step": 90830 }, { "epoch": 0.5976197838200562, "grad_norm": 0.062709118360947, "learning_rate": 8.843552107159918e-06, "loss": 0.0016, "step": 90840 }, { "epoch": 0.5976855719952896, "grad_norm": 0.03242661100145462, "learning_rate": 8.843184882183427e-06, "loss": 0.0013, "step": 90850 }, { "epoch": 0.597751360170523, "grad_norm": 0.04623928594686961, "learning_rate": 8.842817606537863e-06, "loss": 0.0032, "step": 90860 }, { "epoch": 0.5978171483457564, "grad_norm": 0.059589164733591, "learning_rate": 8.842450280228068e-06, "loss": 0.0021, "step": 90870 }, { "epoch": 0.5978829365209897, "grad_norm": 0.08480545409868703, "learning_rate": 8.842082903258881e-06, "loss": 0.0019, "step": 90880 }, { "epoch": 0.5979487246962231, "grad_norm": 0.012536791806364309, "learning_rate": 8.841715475635153e-06, "loss": 0.0019, "step": 90890 }, { "epoch": 0.5980145128714565, "grad_norm": 0.068625528710476, "learning_rate": 8.841347997361724e-06, "loss": 0.0032, "step": 90900 }, { "epoch": 0.5980803010466899, "grad_norm": 0.2384492211951482, "learning_rate": 8.840980468443437e-06, "loss": 0.0015, "step": 90910 }, { "epoch": 0.5981460892219232, "grad_norm": 0.07983754028469313, "learning_rate": 8.840612888885143e-06, "loss": 0.0026, "step": 90920 }, { "epoch": 0.5982118773971566, "grad_norm": 0.12039395334411823, "learning_rate": 8.840245258691682e-06, "loss": 0.0018, "step": 90930 }, { "epoch": 0.59827766557239, "grad_norm": 0.023588161556558622, "learning_rate": 8.839877577867907e-06, "loss": 0.0008, "step": 90940 }, { "epoch": 0.5983434537476234, "grad_norm": 0.07051573372807562, "learning_rate": 8.839509846418659e-06, "loss": 0.0022, "step": 90950 }, { "epoch": 0.5984092419228568, "grad_norm": 0.07184910121950527, "learning_rate": 8.839142064348791e-06, "loss": 0.0017, "step": 90960 }, { "epoch": 0.5984750300980902, "grad_norm": 0.04262984958968317, "learning_rate": 8.838774231663152e-06, "loss": 0.0018, "step": 90970 }, { "epoch": 0.5985408182733235, "grad_norm": 0.05552564308415751, "learning_rate": 8.838406348366589e-06, "loss": 0.0009, "step": 90980 }, { "epoch": 0.5986066064485569, "grad_norm": 0.07525939931568064, "learning_rate": 8.838038414463953e-06, "loss": 0.001, "step": 90990 }, { "epoch": 0.5986723946237903, "grad_norm": 0.20111932485783254, "learning_rate": 8.837670429960095e-06, "loss": 0.0024, "step": 91000 }, { "epoch": 0.5987381827990237, "grad_norm": 0.05243575327925156, "learning_rate": 8.837302394859868e-06, "loss": 0.0018, "step": 91010 }, { "epoch": 0.5988039709742571, "grad_norm": 0.026673679184524995, "learning_rate": 8.836934309168122e-06, "loss": 0.0023, "step": 91020 }, { "epoch": 0.5988697591494905, "grad_norm": 0.011240498560268694, "learning_rate": 8.83656617288971e-06, "loss": 0.0011, "step": 91030 }, { "epoch": 0.5989355473247239, "grad_norm": 0.05016037121746426, "learning_rate": 8.836197986029489e-06, "loss": 0.0022, "step": 91040 }, { "epoch": 0.5990013354999573, "grad_norm": 0.4101269508703122, "learning_rate": 8.83582974859231e-06, "loss": 0.0024, "step": 91050 }, { "epoch": 0.5990671236751907, "grad_norm": 0.0529011529772968, "learning_rate": 8.835461460583029e-06, "loss": 0.0017, "step": 91060 }, { "epoch": 0.599132911850424, "grad_norm": 0.07236133056022674, "learning_rate": 8.835093122006499e-06, "loss": 0.0022, "step": 91070 }, { "epoch": 0.5991987000256573, "grad_norm": 0.06934791456861837, "learning_rate": 8.834724732867581e-06, "loss": 0.001, "step": 91080 }, { "epoch": 0.5992644882008907, "grad_norm": 0.029604659474094932, "learning_rate": 8.834356293171126e-06, "loss": 0.0012, "step": 91090 }, { "epoch": 0.5993302763761241, "grad_norm": 0.04120140081191482, "learning_rate": 8.833987802921999e-06, "loss": 0.0009, "step": 91100 }, { "epoch": 0.5993960645513575, "grad_norm": 0.036391139267028264, "learning_rate": 8.83361926212505e-06, "loss": 0.0015, "step": 91110 }, { "epoch": 0.5994618527265909, "grad_norm": 0.0862590710630756, "learning_rate": 8.833250670785144e-06, "loss": 0.0029, "step": 91120 }, { "epoch": 0.5995276409018243, "grad_norm": 0.049845199443623596, "learning_rate": 8.83288202890714e-06, "loss": 0.0027, "step": 91130 }, { "epoch": 0.5995934290770577, "grad_norm": 0.0665734257817885, "learning_rate": 8.832513336495894e-06, "loss": 0.0014, "step": 91140 }, { "epoch": 0.5996592172522911, "grad_norm": 0.020360744752179073, "learning_rate": 8.832144593556273e-06, "loss": 0.002, "step": 91150 }, { "epoch": 0.5997250054275245, "grad_norm": 0.21353569979651807, "learning_rate": 8.831775800093131e-06, "loss": 0.0029, "step": 91160 }, { "epoch": 0.5997907936027579, "grad_norm": 0.07508091288206244, "learning_rate": 8.831406956111337e-06, "loss": 0.002, "step": 91170 }, { "epoch": 0.5998565817779912, "grad_norm": 0.042776353606178125, "learning_rate": 8.831038061615751e-06, "loss": 0.0014, "step": 91180 }, { "epoch": 0.5999223699532246, "grad_norm": 0.2502481445358088, "learning_rate": 8.830669116611236e-06, "loss": 0.0017, "step": 91190 }, { "epoch": 0.599988158128458, "grad_norm": 0.023381281144994003, "learning_rate": 8.830300121102658e-06, "loss": 0.0011, "step": 91200 }, { "epoch": 0.6000539463036914, "grad_norm": 0.10877640746977285, "learning_rate": 8.829931075094881e-06, "loss": 0.0018, "step": 91210 }, { "epoch": 0.6001197344789247, "grad_norm": 0.006892790275670512, "learning_rate": 8.82956197859277e-06, "loss": 0.0046, "step": 91220 }, { "epoch": 0.6001855226541581, "grad_norm": 0.07395984414254679, "learning_rate": 8.829192831601193e-06, "loss": 0.0021, "step": 91230 }, { "epoch": 0.6002513108293915, "grad_norm": 0.026005667771230718, "learning_rate": 8.828823634125013e-06, "loss": 0.0009, "step": 91240 }, { "epoch": 0.6003170990046249, "grad_norm": 0.15430951900862774, "learning_rate": 8.828454386169103e-06, "loss": 0.002, "step": 91250 }, { "epoch": 0.6003828871798583, "grad_norm": 0.03350737638444675, "learning_rate": 8.828085087738326e-06, "loss": 0.0017, "step": 91260 }, { "epoch": 0.6004486753550917, "grad_norm": 0.09559629158539938, "learning_rate": 8.827715738837554e-06, "loss": 0.0012, "step": 91270 }, { "epoch": 0.600514463530325, "grad_norm": 0.17276214114122512, "learning_rate": 8.827346339471656e-06, "loss": 0.0041, "step": 91280 }, { "epoch": 0.6005802517055584, "grad_norm": 0.0575534976561503, "learning_rate": 8.826976889645502e-06, "loss": 0.0032, "step": 91290 }, { "epoch": 0.6006460398807918, "grad_norm": 0.038200753776513795, "learning_rate": 8.826607389363963e-06, "loss": 0.0029, "step": 91300 }, { "epoch": 0.6007118280560252, "grad_norm": 0.04649081274791908, "learning_rate": 8.826237838631912e-06, "loss": 0.0014, "step": 91310 }, { "epoch": 0.6007776162312586, "grad_norm": 0.13304902080293807, "learning_rate": 8.825868237454218e-06, "loss": 0.0026, "step": 91320 }, { "epoch": 0.600843404406492, "grad_norm": 0.05251675280757379, "learning_rate": 8.825498585835755e-06, "loss": 0.0024, "step": 91330 }, { "epoch": 0.6009091925817254, "grad_norm": 0.046873813746191116, "learning_rate": 8.825128883781398e-06, "loss": 0.0013, "step": 91340 }, { "epoch": 0.6009749807569588, "grad_norm": 0.14427133534469855, "learning_rate": 8.824759131296021e-06, "loss": 0.0021, "step": 91350 }, { "epoch": 0.6010407689321922, "grad_norm": 0.07261839189714069, "learning_rate": 8.824389328384495e-06, "loss": 0.0017, "step": 91360 }, { "epoch": 0.6011065571074256, "grad_norm": 0.050010344744381, "learning_rate": 8.824019475051702e-06, "loss": 0.0016, "step": 91370 }, { "epoch": 0.601172345282659, "grad_norm": 0.066925795971544, "learning_rate": 8.823649571302514e-06, "loss": 0.0011, "step": 91380 }, { "epoch": 0.6012381334578922, "grad_norm": 0.14625757531967865, "learning_rate": 8.823279617141809e-06, "loss": 0.003, "step": 91390 }, { "epoch": 0.6013039216331256, "grad_norm": 0.05737618281371924, "learning_rate": 8.822909612574462e-06, "loss": 0.0021, "step": 91400 }, { "epoch": 0.601369709808359, "grad_norm": 0.0210271249356327, "learning_rate": 8.822539557605356e-06, "loss": 0.0018, "step": 91410 }, { "epoch": 0.6014354979835924, "grad_norm": 0.2789416877180013, "learning_rate": 8.822169452239366e-06, "loss": 0.0024, "step": 91420 }, { "epoch": 0.6015012861588258, "grad_norm": 0.008906695108632854, "learning_rate": 8.821799296481374e-06, "loss": 0.0017, "step": 91430 }, { "epoch": 0.6015670743340592, "grad_norm": 0.07711790397209965, "learning_rate": 8.821429090336258e-06, "loss": 0.0022, "step": 91440 }, { "epoch": 0.6016328625092926, "grad_norm": 0.05459300073063028, "learning_rate": 8.8210588338089e-06, "loss": 0.0017, "step": 91450 }, { "epoch": 0.601698650684526, "grad_norm": 0.09581676117935181, "learning_rate": 8.820688526904182e-06, "loss": 0.0012, "step": 91460 }, { "epoch": 0.6017644388597594, "grad_norm": 0.056691604976595554, "learning_rate": 8.820318169626984e-06, "loss": 0.001, "step": 91470 }, { "epoch": 0.6018302270349928, "grad_norm": 0.07955124625243452, "learning_rate": 8.819947761982191e-06, "loss": 0.0015, "step": 91480 }, { "epoch": 0.6018960152102261, "grad_norm": 0.05389269261875786, "learning_rate": 8.819577303974686e-06, "loss": 0.001, "step": 91490 }, { "epoch": 0.6019618033854595, "grad_norm": 0.05547287260489931, "learning_rate": 8.819206795609353e-06, "loss": 0.0016, "step": 91500 }, { "epoch": 0.6020275915606929, "grad_norm": 0.0639147290216564, "learning_rate": 8.818836236891078e-06, "loss": 0.0015, "step": 91510 }, { "epoch": 0.6020933797359262, "grad_norm": 0.16278170769439046, "learning_rate": 8.818465627824745e-06, "loss": 0.0019, "step": 91520 }, { "epoch": 0.6021591679111596, "grad_norm": 0.05172703833369719, "learning_rate": 8.81809496841524e-06, "loss": 0.0025, "step": 91530 }, { "epoch": 0.602224956086393, "grad_norm": 0.08175562157224538, "learning_rate": 8.817724258667451e-06, "loss": 0.0015, "step": 91540 }, { "epoch": 0.6022907442616264, "grad_norm": 0.023158939999111627, "learning_rate": 8.817353498586264e-06, "loss": 0.0017, "step": 91550 }, { "epoch": 0.6023565324368598, "grad_norm": 0.022440699377333846, "learning_rate": 8.816982688176568e-06, "loss": 0.002, "step": 91560 }, { "epoch": 0.6024223206120932, "grad_norm": 0.21344303289291824, "learning_rate": 8.816611827443252e-06, "loss": 0.0015, "step": 91570 }, { "epoch": 0.6024881087873266, "grad_norm": 0.10222813171949953, "learning_rate": 8.816240916391205e-06, "loss": 0.0023, "step": 91580 }, { "epoch": 0.6025538969625599, "grad_norm": 0.02378058275079379, "learning_rate": 8.815869955025318e-06, "loss": 0.0017, "step": 91590 }, { "epoch": 0.6026196851377933, "grad_norm": 0.012971445735660192, "learning_rate": 8.815498943350482e-06, "loss": 0.0023, "step": 91600 }, { "epoch": 0.6026854733130267, "grad_norm": 0.05835279206262352, "learning_rate": 8.815127881371585e-06, "loss": 0.0015, "step": 91610 }, { "epoch": 0.6027512614882601, "grad_norm": 0.03556844740463847, "learning_rate": 8.814756769093524e-06, "loss": 0.0015, "step": 91620 }, { "epoch": 0.6028170496634935, "grad_norm": 0.05225027710550439, "learning_rate": 8.81438560652119e-06, "loss": 0.0014, "step": 91630 }, { "epoch": 0.6028828378387269, "grad_norm": 0.0790509150811988, "learning_rate": 8.814014393659476e-06, "loss": 0.0019, "step": 91640 }, { "epoch": 0.6029486260139603, "grad_norm": 0.1674028933535483, "learning_rate": 8.813643130513275e-06, "loss": 0.002, "step": 91650 }, { "epoch": 0.6030144141891937, "grad_norm": 0.08497050572557358, "learning_rate": 8.813271817087483e-06, "loss": 0.0014, "step": 91660 }, { "epoch": 0.603080202364427, "grad_norm": 0.044443782837658985, "learning_rate": 8.812900453386997e-06, "loss": 0.0009, "step": 91670 }, { "epoch": 0.6031459905396604, "grad_norm": 0.05675744278730545, "learning_rate": 8.81252903941671e-06, "loss": 0.0027, "step": 91680 }, { "epoch": 0.6032117787148937, "grad_norm": 0.025545653370679884, "learning_rate": 8.812157575181522e-06, "loss": 0.0012, "step": 91690 }, { "epoch": 0.6032775668901271, "grad_norm": 0.07859835939965304, "learning_rate": 8.811786060686327e-06, "loss": 0.0013, "step": 91700 }, { "epoch": 0.6033433550653605, "grad_norm": 0.008573313621930022, "learning_rate": 8.811414495936025e-06, "loss": 0.0011, "step": 91710 }, { "epoch": 0.6034091432405939, "grad_norm": 0.07332790483261314, "learning_rate": 8.811042880935516e-06, "loss": 0.0015, "step": 91720 }, { "epoch": 0.6034749314158273, "grad_norm": 0.07253567586215934, "learning_rate": 8.810671215689697e-06, "loss": 0.0028, "step": 91730 }, { "epoch": 0.6035407195910607, "grad_norm": 0.20593688212687197, "learning_rate": 8.81029950020347e-06, "loss": 0.0016, "step": 91740 }, { "epoch": 0.6036065077662941, "grad_norm": 0.017439369925854882, "learning_rate": 8.809927734481734e-06, "loss": 0.0012, "step": 91750 }, { "epoch": 0.6036722959415275, "grad_norm": 0.03678867246305054, "learning_rate": 8.80955591852939e-06, "loss": 0.003, "step": 91760 }, { "epoch": 0.6037380841167609, "grad_norm": 0.03684811098248104, "learning_rate": 8.809184052351344e-06, "loss": 0.0013, "step": 91770 }, { "epoch": 0.6038038722919943, "grad_norm": 0.07196281328321195, "learning_rate": 8.808812135952495e-06, "loss": 0.0021, "step": 91780 }, { "epoch": 0.6038696604672277, "grad_norm": 0.025550287418601613, "learning_rate": 8.808440169337747e-06, "loss": 0.0029, "step": 91790 }, { "epoch": 0.603935448642461, "grad_norm": 0.10402220280541183, "learning_rate": 8.808068152512006e-06, "loss": 0.0019, "step": 91800 }, { "epoch": 0.6040012368176944, "grad_norm": 1.0126632448775432, "learning_rate": 8.807696085480172e-06, "loss": 0.0054, "step": 91810 }, { "epoch": 0.6040670249929277, "grad_norm": 0.0798506654034454, "learning_rate": 8.807323968247157e-06, "loss": 0.0024, "step": 91820 }, { "epoch": 0.6041328131681611, "grad_norm": 0.015761708453597645, "learning_rate": 8.806951800817864e-06, "loss": 0.0011, "step": 91830 }, { "epoch": 0.6041986013433945, "grad_norm": 0.045678953113943606, "learning_rate": 8.806579583197198e-06, "loss": 0.0016, "step": 91840 }, { "epoch": 0.6042643895186279, "grad_norm": 0.04162108093789857, "learning_rate": 8.806207315390068e-06, "loss": 0.0022, "step": 91850 }, { "epoch": 0.6043301776938613, "grad_norm": 0.06982606796696234, "learning_rate": 8.805834997401381e-06, "loss": 0.0017, "step": 91860 }, { "epoch": 0.6043959658690947, "grad_norm": 0.03972084967410362, "learning_rate": 8.805462629236047e-06, "loss": 0.0014, "step": 91870 }, { "epoch": 0.6044617540443281, "grad_norm": 0.052875017912453255, "learning_rate": 8.805090210898975e-06, "loss": 0.0017, "step": 91880 }, { "epoch": 0.6045275422195615, "grad_norm": 0.0058121046543670426, "learning_rate": 8.804717742395075e-06, "loss": 0.001, "step": 91890 }, { "epoch": 0.6045933303947948, "grad_norm": 0.03717689456329028, "learning_rate": 8.804345223729256e-06, "loss": 0.0014, "step": 91900 }, { "epoch": 0.6046591185700282, "grad_norm": 0.03899402722153509, "learning_rate": 8.803972654906435e-06, "loss": 0.0018, "step": 91910 }, { "epoch": 0.6047249067452616, "grad_norm": 0.03444448622460015, "learning_rate": 8.803600035931515e-06, "loss": 0.0015, "step": 91920 }, { "epoch": 0.604790694920495, "grad_norm": 0.006917892199248401, "learning_rate": 8.803227366809416e-06, "loss": 0.0013, "step": 91930 }, { "epoch": 0.6048564830957284, "grad_norm": 0.004202547888713653, "learning_rate": 8.802854647545049e-06, "loss": 0.0062, "step": 91940 }, { "epoch": 0.6049222712709618, "grad_norm": 0.053717041654394566, "learning_rate": 8.802481878143328e-06, "loss": 0.0023, "step": 91950 }, { "epoch": 0.6049880594461952, "grad_norm": 0.0860549618017637, "learning_rate": 8.802109058609167e-06, "loss": 0.0017, "step": 91960 }, { "epoch": 0.6050538476214286, "grad_norm": 0.027575920387808407, "learning_rate": 8.801736188947481e-06, "loss": 0.0012, "step": 91970 }, { "epoch": 0.605119635796662, "grad_norm": 0.015456390252401806, "learning_rate": 8.801363269163187e-06, "loss": 0.0014, "step": 91980 }, { "epoch": 0.6051854239718953, "grad_norm": 0.06603617103201848, "learning_rate": 8.800990299261201e-06, "loss": 0.0022, "step": 91990 }, { "epoch": 0.6052512121471286, "grad_norm": 0.053028301575942884, "learning_rate": 8.800617279246442e-06, "loss": 0.0015, "step": 92000 }, { "epoch": 0.605317000322362, "grad_norm": 0.027372068927613125, "learning_rate": 8.800244209123827e-06, "loss": 0.0016, "step": 92010 }, { "epoch": 0.6053827884975954, "grad_norm": 0.08255655027467937, "learning_rate": 8.799871088898272e-06, "loss": 0.0022, "step": 92020 }, { "epoch": 0.6054485766728288, "grad_norm": 0.12033046323058522, "learning_rate": 8.7994979185747e-06, "loss": 0.002, "step": 92030 }, { "epoch": 0.6055143648480622, "grad_norm": 0.02094468664759898, "learning_rate": 8.799124698158027e-06, "loss": 0.0025, "step": 92040 }, { "epoch": 0.6055801530232956, "grad_norm": 0.03277697691012693, "learning_rate": 8.79875142765318e-06, "loss": 0.0011, "step": 92050 }, { "epoch": 0.605645941198529, "grad_norm": 0.0015103104252380958, "learning_rate": 8.798378107065073e-06, "loss": 0.0007, "step": 92060 }, { "epoch": 0.6057117293737624, "grad_norm": 0.10407520861372938, "learning_rate": 8.798004736398634e-06, "loss": 0.0013, "step": 92070 }, { "epoch": 0.6057775175489958, "grad_norm": 0.05398380758039977, "learning_rate": 8.797631315658781e-06, "loss": 0.0013, "step": 92080 }, { "epoch": 0.6058433057242292, "grad_norm": 0.010334094767982211, "learning_rate": 8.797257844850438e-06, "loss": 0.0013, "step": 92090 }, { "epoch": 0.6059090938994625, "grad_norm": 0.1521708610521541, "learning_rate": 8.796884323978532e-06, "loss": 0.001, "step": 92100 }, { "epoch": 0.6059748820746959, "grad_norm": 0.035572923195417085, "learning_rate": 8.796510753047984e-06, "loss": 0.0015, "step": 92110 }, { "epoch": 0.6060406702499292, "grad_norm": 0.06746349720361151, "learning_rate": 8.79613713206372e-06, "loss": 0.0021, "step": 92120 }, { "epoch": 0.6061064584251626, "grad_norm": 0.028229880511418254, "learning_rate": 8.795763461030669e-06, "loss": 0.0004, "step": 92130 }, { "epoch": 0.606172246600396, "grad_norm": 0.035802591355286656, "learning_rate": 8.795389739953753e-06, "loss": 0.0017, "step": 92140 }, { "epoch": 0.6062380347756294, "grad_norm": 0.1361008706236675, "learning_rate": 8.795015968837902e-06, "loss": 0.0022, "step": 92150 }, { "epoch": 0.6063038229508628, "grad_norm": 0.04197476555742889, "learning_rate": 8.794642147688042e-06, "loss": 0.0021, "step": 92160 }, { "epoch": 0.6063696111260962, "grad_norm": 0.012983158501020328, "learning_rate": 8.794268276509104e-06, "loss": 0.0011, "step": 92170 }, { "epoch": 0.6064353993013296, "grad_norm": 0.10368613285037408, "learning_rate": 8.793894355306014e-06, "loss": 0.0029, "step": 92180 }, { "epoch": 0.606501187476563, "grad_norm": 0.10387725204162161, "learning_rate": 8.793520384083705e-06, "loss": 0.0012, "step": 92190 }, { "epoch": 0.6065669756517963, "grad_norm": 0.006032362014627867, "learning_rate": 8.793146362847105e-06, "loss": 0.0009, "step": 92200 }, { "epoch": 0.6066327638270297, "grad_norm": 0.03684236253878031, "learning_rate": 8.792772291601146e-06, "loss": 0.0032, "step": 92210 }, { "epoch": 0.6066985520022631, "grad_norm": 0.04864302310691738, "learning_rate": 8.792398170350762e-06, "loss": 0.0032, "step": 92220 }, { "epoch": 0.6067643401774965, "grad_norm": 0.05824604926804929, "learning_rate": 8.792023999100882e-06, "loss": 0.0016, "step": 92230 }, { "epoch": 0.6068301283527299, "grad_norm": 0.013659684218471593, "learning_rate": 8.791649777856442e-06, "loss": 0.0011, "step": 92240 }, { "epoch": 0.6068959165279633, "grad_norm": 0.054079089047103936, "learning_rate": 8.791275506622373e-06, "loss": 0.0042, "step": 92250 }, { "epoch": 0.6069617047031967, "grad_norm": 0.0009048578377190218, "learning_rate": 8.790901185403608e-06, "loss": 0.0025, "step": 92260 }, { "epoch": 0.60702749287843, "grad_norm": 0.012930374332376078, "learning_rate": 8.790526814205089e-06, "loss": 0.0019, "step": 92270 }, { "epoch": 0.6070932810536634, "grad_norm": 0.06708478334147894, "learning_rate": 8.790152393031747e-06, "loss": 0.0017, "step": 92280 }, { "epoch": 0.6071590692288968, "grad_norm": 0.0438102777309634, "learning_rate": 8.789777921888519e-06, "loss": 0.0012, "step": 92290 }, { "epoch": 0.6072248574041302, "grad_norm": 0.005072105801336316, "learning_rate": 8.789403400780342e-06, "loss": 0.0025, "step": 92300 }, { "epoch": 0.6072906455793635, "grad_norm": 0.004880561116484026, "learning_rate": 8.789028829712152e-06, "loss": 0.0016, "step": 92310 }, { "epoch": 0.6073564337545969, "grad_norm": 0.06372866919626793, "learning_rate": 8.788654208688891e-06, "loss": 0.0012, "step": 92320 }, { "epoch": 0.6074222219298303, "grad_norm": 0.14911932615873327, "learning_rate": 8.788279537715496e-06, "loss": 0.0022, "step": 92330 }, { "epoch": 0.6074880101050637, "grad_norm": 0.07116157394897318, "learning_rate": 8.787904816796909e-06, "loss": 0.0009, "step": 92340 }, { "epoch": 0.6075537982802971, "grad_norm": 0.09404407979659116, "learning_rate": 8.787530045938065e-06, "loss": 0.0028, "step": 92350 }, { "epoch": 0.6076195864555305, "grad_norm": 0.008769517364631826, "learning_rate": 8.78715522514391e-06, "loss": 0.0012, "step": 92360 }, { "epoch": 0.6076853746307639, "grad_norm": 0.07471699440009207, "learning_rate": 8.786780354419385e-06, "loss": 0.0027, "step": 92370 }, { "epoch": 0.6077511628059973, "grad_norm": 0.0672183676445284, "learning_rate": 8.786405433769431e-06, "loss": 0.0014, "step": 92380 }, { "epoch": 0.6078169509812307, "grad_norm": 0.006608114961597257, "learning_rate": 8.78603046319899e-06, "loss": 0.0008, "step": 92390 }, { "epoch": 0.6078827391564641, "grad_norm": 0.024846572550030636, "learning_rate": 8.78565544271301e-06, "loss": 0.0016, "step": 92400 }, { "epoch": 0.6079485273316974, "grad_norm": 0.04153794561812344, "learning_rate": 8.785280372316431e-06, "loss": 0.0018, "step": 92410 }, { "epoch": 0.6080143155069307, "grad_norm": 0.09935144180338457, "learning_rate": 8.7849052520142e-06, "loss": 0.0012, "step": 92420 }, { "epoch": 0.6080801036821641, "grad_norm": 0.05254715834554173, "learning_rate": 8.784530081811262e-06, "loss": 0.0035, "step": 92430 }, { "epoch": 0.6081458918573975, "grad_norm": 0.04581819602366883, "learning_rate": 8.784154861712562e-06, "loss": 0.001, "step": 92440 }, { "epoch": 0.6082116800326309, "grad_norm": 0.09816434668505013, "learning_rate": 8.78377959172305e-06, "loss": 0.0018, "step": 92450 }, { "epoch": 0.6082774682078643, "grad_norm": 0.1260490697364626, "learning_rate": 8.783404271847671e-06, "loss": 0.0027, "step": 92460 }, { "epoch": 0.6083432563830977, "grad_norm": 0.07700145084509366, "learning_rate": 8.783028902091375e-06, "loss": 0.0019, "step": 92470 }, { "epoch": 0.6084090445583311, "grad_norm": 0.03742124340836585, "learning_rate": 8.782653482459109e-06, "loss": 0.0014, "step": 92480 }, { "epoch": 0.6084748327335645, "grad_norm": 0.0980507773182491, "learning_rate": 8.782278012955825e-06, "loss": 0.0012, "step": 92490 }, { "epoch": 0.6085406209087979, "grad_norm": 0.04864653990946584, "learning_rate": 8.781902493586471e-06, "loss": 0.0023, "step": 92500 }, { "epoch": 0.6086064090840312, "grad_norm": 0.07951475063910665, "learning_rate": 8.781526924356e-06, "loss": 0.0017, "step": 92510 }, { "epoch": 0.6086721972592646, "grad_norm": 0.2684533813020184, "learning_rate": 8.781151305269362e-06, "loss": 0.002, "step": 92520 }, { "epoch": 0.608737985434498, "grad_norm": 0.058282022959707545, "learning_rate": 8.780775636331508e-06, "loss": 0.0008, "step": 92530 }, { "epoch": 0.6088037736097314, "grad_norm": 0.05772310455162596, "learning_rate": 8.780399917547395e-06, "loss": 0.0013, "step": 92540 }, { "epoch": 0.6088695617849648, "grad_norm": 0.15329497774822778, "learning_rate": 8.780024148921973e-06, "loss": 0.0025, "step": 92550 }, { "epoch": 0.6089353499601982, "grad_norm": 0.021859428361591694, "learning_rate": 8.779648330460197e-06, "loss": 0.0021, "step": 92560 }, { "epoch": 0.6090011381354316, "grad_norm": 0.028071310390496367, "learning_rate": 8.779272462167023e-06, "loss": 0.0013, "step": 92570 }, { "epoch": 0.609066926310665, "grad_norm": 0.0020617007811874945, "learning_rate": 8.778896544047406e-06, "loss": 0.002, "step": 92580 }, { "epoch": 0.6091327144858983, "grad_norm": 0.042300898388934444, "learning_rate": 8.7785205761063e-06, "loss": 0.0021, "step": 92590 }, { "epoch": 0.6091985026611317, "grad_norm": 0.06818618509108754, "learning_rate": 8.778144558348664e-06, "loss": 0.0018, "step": 92600 }, { "epoch": 0.609264290836365, "grad_norm": 0.023221181570198455, "learning_rate": 8.777768490779457e-06, "loss": 0.0017, "step": 92610 }, { "epoch": 0.6093300790115984, "grad_norm": 0.0045289268033623905, "learning_rate": 8.777392373403633e-06, "loss": 0.0011, "step": 92620 }, { "epoch": 0.6093958671868318, "grad_norm": 0.02133801516788622, "learning_rate": 8.777016206226152e-06, "loss": 0.0013, "step": 92630 }, { "epoch": 0.6094616553620652, "grad_norm": 0.1139693038806121, "learning_rate": 8.776639989251976e-06, "loss": 0.0014, "step": 92640 }, { "epoch": 0.6095274435372986, "grad_norm": 0.01412264268761485, "learning_rate": 8.776263722486064e-06, "loss": 0.0017, "step": 92650 }, { "epoch": 0.609593231712532, "grad_norm": 0.05734274473558635, "learning_rate": 8.775887405933374e-06, "loss": 0.0022, "step": 92660 }, { "epoch": 0.6096590198877654, "grad_norm": 0.14045170691421927, "learning_rate": 8.775511039598873e-06, "loss": 0.0019, "step": 92670 }, { "epoch": 0.6097248080629988, "grad_norm": 0.054979246764503706, "learning_rate": 8.775134623487517e-06, "loss": 0.0019, "step": 92680 }, { "epoch": 0.6097905962382322, "grad_norm": 0.059288013349395485, "learning_rate": 8.774758157604272e-06, "loss": 0.0019, "step": 92690 }, { "epoch": 0.6098563844134656, "grad_norm": 0.06563136555133735, "learning_rate": 8.7743816419541e-06, "loss": 0.0016, "step": 92700 }, { "epoch": 0.6099221725886989, "grad_norm": 0.09651075588492222, "learning_rate": 8.774005076541968e-06, "loss": 0.0034, "step": 92710 }, { "epoch": 0.6099879607639322, "grad_norm": 0.013908564460827246, "learning_rate": 8.773628461372836e-06, "loss": 0.0012, "step": 92720 }, { "epoch": 0.6100537489391656, "grad_norm": 0.051373612085580944, "learning_rate": 8.773251796451674e-06, "loss": 0.0022, "step": 92730 }, { "epoch": 0.610119537114399, "grad_norm": 0.08443608189011942, "learning_rate": 8.772875081783445e-06, "loss": 0.0011, "step": 92740 }, { "epoch": 0.6101853252896324, "grad_norm": 0.059413163871379425, "learning_rate": 8.772498317373115e-06, "loss": 0.0017, "step": 92750 }, { "epoch": 0.6102511134648658, "grad_norm": 0.03384288171029877, "learning_rate": 8.772121503225656e-06, "loss": 0.0015, "step": 92760 }, { "epoch": 0.6103169016400992, "grad_norm": 0.15552635177065408, "learning_rate": 8.77174463934603e-06, "loss": 0.0028, "step": 92770 }, { "epoch": 0.6103826898153326, "grad_norm": 0.06950477309031187, "learning_rate": 8.77136772573921e-06, "loss": 0.0017, "step": 92780 }, { "epoch": 0.610448477990566, "grad_norm": 0.03738523783835558, "learning_rate": 8.770990762410164e-06, "loss": 0.0014, "step": 92790 }, { "epoch": 0.6105142661657994, "grad_norm": 0.7206723429121858, "learning_rate": 8.77061374936386e-06, "loss": 0.0018, "step": 92800 }, { "epoch": 0.6105800543410328, "grad_norm": 0.04415294665681544, "learning_rate": 8.770236686605271e-06, "loss": 0.0016, "step": 92810 }, { "epoch": 0.6106458425162661, "grad_norm": 0.005508739319183663, "learning_rate": 8.769859574139367e-06, "loss": 0.0015, "step": 92820 }, { "epoch": 0.6107116306914995, "grad_norm": 0.04397679832256811, "learning_rate": 8.769482411971122e-06, "loss": 0.0018, "step": 92830 }, { "epoch": 0.6107774188667329, "grad_norm": 0.037377435261087257, "learning_rate": 8.769105200105505e-06, "loss": 0.0016, "step": 92840 }, { "epoch": 0.6108432070419663, "grad_norm": 0.07564256334507294, "learning_rate": 8.768727938547492e-06, "loss": 0.0012, "step": 92850 }, { "epoch": 0.6109089952171997, "grad_norm": 0.03660173830612556, "learning_rate": 8.768350627302056e-06, "loss": 0.0013, "step": 92860 }, { "epoch": 0.610974783392433, "grad_norm": 0.08442338403748031, "learning_rate": 8.767973266374172e-06, "loss": 0.0011, "step": 92870 }, { "epoch": 0.6110405715676664, "grad_norm": 0.059413612995536075, "learning_rate": 8.767595855768815e-06, "loss": 0.0017, "step": 92880 }, { "epoch": 0.6111063597428998, "grad_norm": 0.02096248997970458, "learning_rate": 8.76721839549096e-06, "loss": 0.0016, "step": 92890 }, { "epoch": 0.6111721479181332, "grad_norm": 0.015407709936028616, "learning_rate": 8.766840885545584e-06, "loss": 0.002, "step": 92900 }, { "epoch": 0.6112379360933666, "grad_norm": 0.03367215326918713, "learning_rate": 8.766463325937663e-06, "loss": 0.0026, "step": 92910 }, { "epoch": 0.6113037242685999, "grad_norm": 0.01768924605031239, "learning_rate": 8.766085716672177e-06, "loss": 0.001, "step": 92920 }, { "epoch": 0.6113695124438333, "grad_norm": 0.08270048398540912, "learning_rate": 8.765708057754103e-06, "loss": 0.0013, "step": 92930 }, { "epoch": 0.6114353006190667, "grad_norm": 0.05350666216108394, "learning_rate": 8.76533034918842e-06, "loss": 0.003, "step": 92940 }, { "epoch": 0.6115010887943001, "grad_norm": 0.0432399980312697, "learning_rate": 8.764952590980112e-06, "loss": 0.0015, "step": 92950 }, { "epoch": 0.6115668769695335, "grad_norm": 0.02258482006820547, "learning_rate": 8.764574783134152e-06, "loss": 0.0014, "step": 92960 }, { "epoch": 0.6116326651447669, "grad_norm": 0.03223778552988039, "learning_rate": 8.764196925655525e-06, "loss": 0.0017, "step": 92970 }, { "epoch": 0.6116984533200003, "grad_norm": 0.1439136434079595, "learning_rate": 8.763819018549214e-06, "loss": 0.0015, "step": 92980 }, { "epoch": 0.6117642414952337, "grad_norm": 0.0895828070015965, "learning_rate": 8.763441061820199e-06, "loss": 0.0025, "step": 92990 }, { "epoch": 0.6118300296704671, "grad_norm": 0.06430785367075878, "learning_rate": 8.763063055473465e-06, "loss": 0.002, "step": 93000 }, { "epoch": 0.6118958178457005, "grad_norm": 0.02912020347708452, "learning_rate": 8.762684999513993e-06, "loss": 0.002, "step": 93010 }, { "epoch": 0.6119616060209337, "grad_norm": 0.04601740119462693, "learning_rate": 8.76230689394677e-06, "loss": 0.0014, "step": 93020 }, { "epoch": 0.6120273941961671, "grad_norm": 0.0310958841961947, "learning_rate": 8.76192873877678e-06, "loss": 0.001, "step": 93030 }, { "epoch": 0.6120931823714005, "grad_norm": 0.11719852343747152, "learning_rate": 8.761550534009009e-06, "loss": 0.0014, "step": 93040 }, { "epoch": 0.6121589705466339, "grad_norm": 0.10146212484186877, "learning_rate": 8.761172279648441e-06, "loss": 0.0029, "step": 93050 }, { "epoch": 0.6122247587218673, "grad_norm": 0.0785527167569829, "learning_rate": 8.760793975700066e-06, "loss": 0.0014, "step": 93060 }, { "epoch": 0.6122905468971007, "grad_norm": 0.07504056394631965, "learning_rate": 8.76041562216887e-06, "loss": 0.0014, "step": 93070 }, { "epoch": 0.6123563350723341, "grad_norm": 0.06385648773637674, "learning_rate": 8.760037219059842e-06, "loss": 0.0016, "step": 93080 }, { "epoch": 0.6124221232475675, "grad_norm": 0.026079125066253945, "learning_rate": 8.759658766377972e-06, "loss": 0.0016, "step": 93090 }, { "epoch": 0.6124879114228009, "grad_norm": 0.045376028599633855, "learning_rate": 8.759280264128247e-06, "loss": 0.0027, "step": 93100 }, { "epoch": 0.6125536995980343, "grad_norm": 0.03427859697118333, "learning_rate": 8.758901712315658e-06, "loss": 0.0012, "step": 93110 }, { "epoch": 0.6126194877732676, "grad_norm": 0.11093542245508359, "learning_rate": 8.758523110945196e-06, "loss": 0.0013, "step": 93120 }, { "epoch": 0.612685275948501, "grad_norm": 0.12963292633962503, "learning_rate": 8.758144460021854e-06, "loss": 0.0015, "step": 93130 }, { "epoch": 0.6127510641237344, "grad_norm": 0.020554480564330953, "learning_rate": 8.757765759550622e-06, "loss": 0.0018, "step": 93140 }, { "epoch": 0.6128168522989678, "grad_norm": 0.04620910158977991, "learning_rate": 8.757387009536496e-06, "loss": 0.0013, "step": 93150 }, { "epoch": 0.6128826404742012, "grad_norm": 0.04077329716676392, "learning_rate": 8.757008209984466e-06, "loss": 0.0028, "step": 93160 }, { "epoch": 0.6129484286494346, "grad_norm": 0.032375569767107905, "learning_rate": 8.756629360899527e-06, "loss": 0.001, "step": 93170 }, { "epoch": 0.613014216824668, "grad_norm": 0.20993114887265882, "learning_rate": 8.756250462286675e-06, "loss": 0.0025, "step": 93180 }, { "epoch": 0.6130800049999013, "grad_norm": 0.007580914358805155, "learning_rate": 8.755871514150904e-06, "loss": 0.001, "step": 93190 }, { "epoch": 0.6131457931751347, "grad_norm": 0.00946025053822958, "learning_rate": 8.755492516497212e-06, "loss": 0.001, "step": 93200 }, { "epoch": 0.6132115813503681, "grad_norm": 0.007353428682694441, "learning_rate": 8.755113469330594e-06, "loss": 0.0024, "step": 93210 }, { "epoch": 0.6132773695256015, "grad_norm": 0.09870967943478973, "learning_rate": 8.754734372656049e-06, "loss": 0.0013, "step": 93220 }, { "epoch": 0.6133431577008348, "grad_norm": 0.06835072398881958, "learning_rate": 8.754355226478572e-06, "loss": 0.0044, "step": 93230 }, { "epoch": 0.6134089458760682, "grad_norm": 0.0010129650476025356, "learning_rate": 8.753976030803165e-06, "loss": 0.001, "step": 93240 }, { "epoch": 0.6134747340513016, "grad_norm": 0.02225992399630469, "learning_rate": 8.753596785634827e-06, "loss": 0.0025, "step": 93250 }, { "epoch": 0.613540522226535, "grad_norm": 0.05426078032664477, "learning_rate": 8.753217490978556e-06, "loss": 0.0017, "step": 93260 }, { "epoch": 0.6136063104017684, "grad_norm": 0.06991432098557811, "learning_rate": 8.752838146839352e-06, "loss": 0.0013, "step": 93270 }, { "epoch": 0.6136720985770018, "grad_norm": 0.029967189223882574, "learning_rate": 8.752458753222222e-06, "loss": 0.0012, "step": 93280 }, { "epoch": 0.6137378867522352, "grad_norm": 0.03626651440660852, "learning_rate": 8.752079310132162e-06, "loss": 0.001, "step": 93290 }, { "epoch": 0.6138036749274686, "grad_norm": 0.07586284720673891, "learning_rate": 8.751699817574178e-06, "loss": 0.0018, "step": 93300 }, { "epoch": 0.613869463102702, "grad_norm": 0.09790679588848475, "learning_rate": 8.751320275553272e-06, "loss": 0.0019, "step": 93310 }, { "epoch": 0.6139352512779354, "grad_norm": 0.009662904464195364, "learning_rate": 8.750940684074448e-06, "loss": 0.0029, "step": 93320 }, { "epoch": 0.6140010394531686, "grad_norm": 0.024491927609828133, "learning_rate": 8.750561043142709e-06, "loss": 0.0019, "step": 93330 }, { "epoch": 0.614066827628402, "grad_norm": 0.17607175463215552, "learning_rate": 8.750181352763064e-06, "loss": 0.0029, "step": 93340 }, { "epoch": 0.6141326158036354, "grad_norm": 0.022681512543140065, "learning_rate": 8.749801612940515e-06, "loss": 0.0019, "step": 93350 }, { "epoch": 0.6141984039788688, "grad_norm": 0.006137844408975053, "learning_rate": 8.749421823680071e-06, "loss": 0.0015, "step": 93360 }, { "epoch": 0.6142641921541022, "grad_norm": 0.07048464280732966, "learning_rate": 8.74904198498674e-06, "loss": 0.0019, "step": 93370 }, { "epoch": 0.6143299803293356, "grad_norm": 0.0797168261229325, "learning_rate": 8.748662096865525e-06, "loss": 0.001, "step": 93380 }, { "epoch": 0.614395768504569, "grad_norm": 0.03568746282139918, "learning_rate": 8.748282159321439e-06, "loss": 0.0013, "step": 93390 }, { "epoch": 0.6144615566798024, "grad_norm": 0.016571486352094753, "learning_rate": 8.747902172359492e-06, "loss": 0.0012, "step": 93400 }, { "epoch": 0.6145273448550358, "grad_norm": 0.07516244989619723, "learning_rate": 8.74752213598469e-06, "loss": 0.0023, "step": 93410 }, { "epoch": 0.6145931330302692, "grad_norm": 0.04657632305417666, "learning_rate": 8.747142050202045e-06, "loss": 0.0014, "step": 93420 }, { "epoch": 0.6146589212055025, "grad_norm": 0.01918800552755645, "learning_rate": 8.746761915016568e-06, "loss": 0.0009, "step": 93430 }, { "epoch": 0.6147247093807359, "grad_norm": 0.05982887431484331, "learning_rate": 8.746381730433273e-06, "loss": 0.0013, "step": 93440 }, { "epoch": 0.6147904975559693, "grad_norm": 0.10491916358967111, "learning_rate": 8.746001496457168e-06, "loss": 0.0019, "step": 93450 }, { "epoch": 0.6148562857312027, "grad_norm": 0.047919317083238766, "learning_rate": 8.74562121309327e-06, "loss": 0.0013, "step": 93460 }, { "epoch": 0.614922073906436, "grad_norm": 0.02726854353651343, "learning_rate": 8.745240880346592e-06, "loss": 0.002, "step": 93470 }, { "epoch": 0.6149878620816694, "grad_norm": 0.09051233153935473, "learning_rate": 8.744860498222146e-06, "loss": 0.0013, "step": 93480 }, { "epoch": 0.6150536502569028, "grad_norm": 0.048612068042535914, "learning_rate": 8.74448006672495e-06, "loss": 0.0017, "step": 93490 }, { "epoch": 0.6151194384321362, "grad_norm": 0.031115535678654518, "learning_rate": 8.744099585860018e-06, "loss": 0.0007, "step": 93500 }, { "epoch": 0.6151852266073696, "grad_norm": 0.18305841597183045, "learning_rate": 8.743719055632364e-06, "loss": 0.0013, "step": 93510 }, { "epoch": 0.615251014782603, "grad_norm": 0.14350263982453884, "learning_rate": 8.743338476047011e-06, "loss": 0.0025, "step": 93520 }, { "epoch": 0.6153168029578363, "grad_norm": 0.052837588614633835, "learning_rate": 8.742957847108973e-06, "loss": 0.0029, "step": 93530 }, { "epoch": 0.6153825911330697, "grad_norm": 0.12372569978554303, "learning_rate": 8.742577168823267e-06, "loss": 0.0023, "step": 93540 }, { "epoch": 0.6154483793083031, "grad_norm": 0.10569870891774244, "learning_rate": 8.742196441194912e-06, "loss": 0.0016, "step": 93550 }, { "epoch": 0.6155141674835365, "grad_norm": 0.11944032086290436, "learning_rate": 8.741815664228932e-06, "loss": 0.0032, "step": 93560 }, { "epoch": 0.6155799556587699, "grad_norm": 0.01850691727439788, "learning_rate": 8.741434837930342e-06, "loss": 0.002, "step": 93570 }, { "epoch": 0.6156457438340033, "grad_norm": 0.030941357999623376, "learning_rate": 8.741053962304165e-06, "loss": 0.0012, "step": 93580 }, { "epoch": 0.6157115320092367, "grad_norm": 0.17502987170017725, "learning_rate": 8.740673037355424e-06, "loss": 0.0017, "step": 93590 }, { "epoch": 0.6157773201844701, "grad_norm": 0.10570338339518848, "learning_rate": 8.740292063089138e-06, "loss": 0.0016, "step": 93600 }, { "epoch": 0.6158431083597035, "grad_norm": 0.041215182198029686, "learning_rate": 8.739911039510332e-06, "loss": 0.0009, "step": 93610 }, { "epoch": 0.6159088965349369, "grad_norm": 0.02652286791508167, "learning_rate": 8.739529966624029e-06, "loss": 0.0017, "step": 93620 }, { "epoch": 0.6159746847101701, "grad_norm": 0.08534546110126294, "learning_rate": 8.739148844435253e-06, "loss": 0.0017, "step": 93630 }, { "epoch": 0.6160404728854035, "grad_norm": 0.0039133657367974, "learning_rate": 8.738767672949028e-06, "loss": 0.0016, "step": 93640 }, { "epoch": 0.6161062610606369, "grad_norm": 0.012255999207250708, "learning_rate": 8.738386452170383e-06, "loss": 0.0023, "step": 93650 }, { "epoch": 0.6161720492358703, "grad_norm": 0.1566108374687578, "learning_rate": 8.738005182104339e-06, "loss": 0.0012, "step": 93660 }, { "epoch": 0.6162378374111037, "grad_norm": 0.003179461837463341, "learning_rate": 8.737623862755926e-06, "loss": 0.0028, "step": 93670 }, { "epoch": 0.6163036255863371, "grad_norm": 0.07018912834409084, "learning_rate": 8.73724249413017e-06, "loss": 0.0007, "step": 93680 }, { "epoch": 0.6163694137615705, "grad_norm": 0.031171504155343772, "learning_rate": 8.736861076232099e-06, "loss": 0.0021, "step": 93690 }, { "epoch": 0.6164352019368039, "grad_norm": 0.12825568818971264, "learning_rate": 8.736479609066743e-06, "loss": 0.0018, "step": 93700 }, { "epoch": 0.6165009901120373, "grad_norm": 0.11802822269732662, "learning_rate": 8.73609809263913e-06, "loss": 0.0018, "step": 93710 }, { "epoch": 0.6165667782872707, "grad_norm": 0.04122945349615558, "learning_rate": 8.73571652695429e-06, "loss": 0.0014, "step": 93720 }, { "epoch": 0.6166325664625041, "grad_norm": 0.005839593900303796, "learning_rate": 8.735334912017255e-06, "loss": 0.0022, "step": 93730 }, { "epoch": 0.6166983546377374, "grad_norm": 0.014940465997734588, "learning_rate": 8.734953247833053e-06, "loss": 0.002, "step": 93740 }, { "epoch": 0.6167641428129708, "grad_norm": 0.03863360725612594, "learning_rate": 8.73457153440672e-06, "loss": 0.0011, "step": 93750 }, { "epoch": 0.6168299309882042, "grad_norm": 0.000438819869319137, "learning_rate": 8.734189771743287e-06, "loss": 0.0017, "step": 93760 }, { "epoch": 0.6168957191634376, "grad_norm": 0.09099744458673466, "learning_rate": 8.733807959847785e-06, "loss": 0.0022, "step": 93770 }, { "epoch": 0.616961507338671, "grad_norm": 0.05082641683946039, "learning_rate": 8.73342609872525e-06, "loss": 0.0014, "step": 93780 }, { "epoch": 0.6170272955139043, "grad_norm": 0.009909788165056011, "learning_rate": 8.733044188380718e-06, "loss": 0.001, "step": 93790 }, { "epoch": 0.6170930836891377, "grad_norm": 0.07072842510888233, "learning_rate": 8.732662228819222e-06, "loss": 0.002, "step": 93800 }, { "epoch": 0.6171588718643711, "grad_norm": 0.04770635029119433, "learning_rate": 8.732280220045798e-06, "loss": 0.0009, "step": 93810 }, { "epoch": 0.6172246600396045, "grad_norm": 0.0011350797748321839, "learning_rate": 8.731898162065483e-06, "loss": 0.0014, "step": 93820 }, { "epoch": 0.6172904482148379, "grad_norm": 0.050724133658274106, "learning_rate": 8.731516054883313e-06, "loss": 0.0021, "step": 93830 }, { "epoch": 0.6173562363900712, "grad_norm": 0.02943083092538657, "learning_rate": 8.731133898504328e-06, "loss": 0.0046, "step": 93840 }, { "epoch": 0.6174220245653046, "grad_norm": 0.07865430502633346, "learning_rate": 8.730751692933563e-06, "loss": 0.0009, "step": 93850 }, { "epoch": 0.617487812740538, "grad_norm": 0.12296236081613793, "learning_rate": 8.73036943817606e-06, "loss": 0.0029, "step": 93860 }, { "epoch": 0.6175536009157714, "grad_norm": 0.08709188380323156, "learning_rate": 8.729987134236859e-06, "loss": 0.0023, "step": 93870 }, { "epoch": 0.6176193890910048, "grad_norm": 0.14090044907801255, "learning_rate": 8.729604781120997e-06, "loss": 0.0015, "step": 93880 }, { "epoch": 0.6176851772662382, "grad_norm": 0.010141745904743508, "learning_rate": 8.729222378833517e-06, "loss": 0.0017, "step": 93890 }, { "epoch": 0.6177509654414716, "grad_norm": 0.08746056237573624, "learning_rate": 8.728839927379462e-06, "loss": 0.0017, "step": 93900 }, { "epoch": 0.617816753616705, "grad_norm": 0.027814558912038777, "learning_rate": 8.728457426763873e-06, "loss": 0.0023, "step": 93910 }, { "epoch": 0.6178825417919384, "grad_norm": 0.1776375647151599, "learning_rate": 8.728074876991792e-06, "loss": 0.0016, "step": 93920 }, { "epoch": 0.6179483299671717, "grad_norm": 0.1146675966826633, "learning_rate": 8.727692278068265e-06, "loss": 0.0016, "step": 93930 }, { "epoch": 0.618014118142405, "grad_norm": 0.020932172908359876, "learning_rate": 8.727309629998332e-06, "loss": 0.0019, "step": 93940 }, { "epoch": 0.6180799063176384, "grad_norm": 0.030149276613054023, "learning_rate": 8.726926932787043e-06, "loss": 0.0008, "step": 93950 }, { "epoch": 0.6181456944928718, "grad_norm": 0.036196251599433626, "learning_rate": 8.72654418643944e-06, "loss": 0.0006, "step": 93960 }, { "epoch": 0.6182114826681052, "grad_norm": 0.09226838095445536, "learning_rate": 8.726161390960572e-06, "loss": 0.0016, "step": 93970 }, { "epoch": 0.6182772708433386, "grad_norm": 0.03877485842427629, "learning_rate": 8.725778546355484e-06, "loss": 0.001, "step": 93980 }, { "epoch": 0.618343059018572, "grad_norm": 0.05057294550882576, "learning_rate": 8.725395652629222e-06, "loss": 0.0017, "step": 93990 }, { "epoch": 0.6184088471938054, "grad_norm": 0.08565516127940144, "learning_rate": 8.725012709786837e-06, "loss": 0.003, "step": 94000 }, { "epoch": 0.6184746353690388, "grad_norm": 0.08737881532980286, "learning_rate": 8.724629717833376e-06, "loss": 0.0017, "step": 94010 }, { "epoch": 0.6185404235442722, "grad_norm": 0.026663575820915997, "learning_rate": 8.724246676773888e-06, "loss": 0.0021, "step": 94020 }, { "epoch": 0.6186062117195056, "grad_norm": 0.038721149322767726, "learning_rate": 8.723863586613427e-06, "loss": 0.001, "step": 94030 }, { "epoch": 0.6186719998947389, "grad_norm": 0.025843619431967085, "learning_rate": 8.723480447357038e-06, "loss": 0.0016, "step": 94040 }, { "epoch": 0.6187377880699723, "grad_norm": 0.01476304348030395, "learning_rate": 8.723097259009775e-06, "loss": 0.0023, "step": 94050 }, { "epoch": 0.6188035762452057, "grad_norm": 0.04131446633863458, "learning_rate": 8.72271402157669e-06, "loss": 0.0015, "step": 94060 }, { "epoch": 0.618869364420439, "grad_norm": 0.07269266290776243, "learning_rate": 8.722330735062839e-06, "loss": 0.0011, "step": 94070 }, { "epoch": 0.6189351525956724, "grad_norm": 0.0069491613458890844, "learning_rate": 8.72194739947327e-06, "loss": 0.0011, "step": 94080 }, { "epoch": 0.6190009407709058, "grad_norm": 0.14400231283086018, "learning_rate": 8.721564014813037e-06, "loss": 0.0016, "step": 94090 }, { "epoch": 0.6190667289461392, "grad_norm": 0.011812629968224743, "learning_rate": 8.7211805810872e-06, "loss": 0.0017, "step": 94100 }, { "epoch": 0.6191325171213726, "grad_norm": 0.09720170050877926, "learning_rate": 8.720797098300808e-06, "loss": 0.0011, "step": 94110 }, { "epoch": 0.619198305296606, "grad_norm": 0.07350383837910478, "learning_rate": 8.720413566458922e-06, "loss": 0.0021, "step": 94120 }, { "epoch": 0.6192640934718394, "grad_norm": 0.3263713936256163, "learning_rate": 8.720029985566596e-06, "loss": 0.0013, "step": 94130 }, { "epoch": 0.6193298816470727, "grad_norm": 0.02892519585540262, "learning_rate": 8.719646355628886e-06, "loss": 0.0014, "step": 94140 }, { "epoch": 0.6193956698223061, "grad_norm": 0.34037456764384605, "learning_rate": 8.719262676650853e-06, "loss": 0.0019, "step": 94150 }, { "epoch": 0.6194614579975395, "grad_norm": 0.03652717800743169, "learning_rate": 8.718878948637553e-06, "loss": 0.0011, "step": 94160 }, { "epoch": 0.6195272461727729, "grad_norm": 0.031574000546992956, "learning_rate": 8.718495171594046e-06, "loss": 0.0022, "step": 94170 }, { "epoch": 0.6195930343480063, "grad_norm": 0.05444700161881884, "learning_rate": 8.71811134552539e-06, "loss": 0.0016, "step": 94180 }, { "epoch": 0.6196588225232397, "grad_norm": 0.03353348965331651, "learning_rate": 8.71772747043665e-06, "loss": 0.0007, "step": 94190 }, { "epoch": 0.6197246106984731, "grad_norm": 0.03141951025517186, "learning_rate": 8.717343546332885e-06, "loss": 0.0015, "step": 94200 }, { "epoch": 0.6197903988737065, "grad_norm": 0.1470338513494538, "learning_rate": 8.716959573219151e-06, "loss": 0.0024, "step": 94210 }, { "epoch": 0.6198561870489399, "grad_norm": 0.013464122883985184, "learning_rate": 8.716575551100519e-06, "loss": 0.0009, "step": 94220 }, { "epoch": 0.6199219752241732, "grad_norm": 0.024552226540219504, "learning_rate": 8.716191479982046e-06, "loss": 0.0012, "step": 94230 }, { "epoch": 0.6199877633994066, "grad_norm": 0.021780431165995144, "learning_rate": 8.7158073598688e-06, "loss": 0.002, "step": 94240 }, { "epoch": 0.6200535515746399, "grad_norm": 0.07462130145369378, "learning_rate": 8.715423190765842e-06, "loss": 0.0013, "step": 94250 }, { "epoch": 0.6201193397498733, "grad_norm": 0.08194728538748278, "learning_rate": 8.715038972678238e-06, "loss": 0.0014, "step": 94260 }, { "epoch": 0.6201851279251067, "grad_norm": 0.03097999392491201, "learning_rate": 8.714654705611053e-06, "loss": 0.0015, "step": 94270 }, { "epoch": 0.6202509161003401, "grad_norm": 0.04526221329260746, "learning_rate": 8.714270389569355e-06, "loss": 0.0019, "step": 94280 }, { "epoch": 0.6203167042755735, "grad_norm": 0.14875230000201617, "learning_rate": 8.713886024558209e-06, "loss": 0.0017, "step": 94290 }, { "epoch": 0.6203824924508069, "grad_norm": 0.015573244452666255, "learning_rate": 8.713501610582683e-06, "loss": 0.0015, "step": 94300 }, { "epoch": 0.6204482806260403, "grad_norm": 0.02155737800026826, "learning_rate": 8.713117147647847e-06, "loss": 0.0015, "step": 94310 }, { "epoch": 0.6205140688012737, "grad_norm": 0.05076462350329203, "learning_rate": 8.712732635758767e-06, "loss": 0.0017, "step": 94320 }, { "epoch": 0.6205798569765071, "grad_norm": 0.11015138420632561, "learning_rate": 8.712348074920514e-06, "loss": 0.0024, "step": 94330 }, { "epoch": 0.6206456451517405, "grad_norm": 0.10466170535784708, "learning_rate": 8.711963465138158e-06, "loss": 0.0025, "step": 94340 }, { "epoch": 0.6207114333269738, "grad_norm": 0.09538428745543646, "learning_rate": 8.71157880641677e-06, "loss": 0.0021, "step": 94350 }, { "epoch": 0.6207772215022072, "grad_norm": 0.4262719198227068, "learning_rate": 8.71119409876142e-06, "loss": 0.0015, "step": 94360 }, { "epoch": 0.6208430096774405, "grad_norm": 0.0512133058527236, "learning_rate": 8.710809342177181e-06, "loss": 0.001, "step": 94370 }, { "epoch": 0.6209087978526739, "grad_norm": 0.04151164134414081, "learning_rate": 8.710424536669128e-06, "loss": 0.0031, "step": 94380 }, { "epoch": 0.6209745860279073, "grad_norm": 0.051153284565042784, "learning_rate": 8.710039682242331e-06, "loss": 0.0006, "step": 94390 }, { "epoch": 0.6210403742031407, "grad_norm": 0.02292366626081593, "learning_rate": 8.709654778901862e-06, "loss": 0.0013, "step": 94400 }, { "epoch": 0.6211061623783741, "grad_norm": 0.16458927184592567, "learning_rate": 8.709269826652803e-06, "loss": 0.0017, "step": 94410 }, { "epoch": 0.6211719505536075, "grad_norm": 0.07869316832043878, "learning_rate": 8.708884825500221e-06, "loss": 0.0012, "step": 94420 }, { "epoch": 0.6212377387288409, "grad_norm": 0.01848358371990827, "learning_rate": 8.708499775449199e-06, "loss": 0.0012, "step": 94430 }, { "epoch": 0.6213035269040743, "grad_norm": 0.007309280672199551, "learning_rate": 8.708114676504809e-06, "loss": 0.0014, "step": 94440 }, { "epoch": 0.6213693150793076, "grad_norm": 0.030100773536711617, "learning_rate": 8.707729528672128e-06, "loss": 0.003, "step": 94450 }, { "epoch": 0.621435103254541, "grad_norm": 0.24683055172352536, "learning_rate": 8.707344331956238e-06, "loss": 0.0031, "step": 94460 }, { "epoch": 0.6215008914297744, "grad_norm": 0.10817835997507759, "learning_rate": 8.706959086362211e-06, "loss": 0.0015, "step": 94470 }, { "epoch": 0.6215666796050078, "grad_norm": 0.04159225224001745, "learning_rate": 8.706573791895134e-06, "loss": 0.0014, "step": 94480 }, { "epoch": 0.6216324677802412, "grad_norm": 0.08391792816100291, "learning_rate": 8.706188448560082e-06, "loss": 0.0019, "step": 94490 }, { "epoch": 0.6216982559554746, "grad_norm": 0.12071659002527922, "learning_rate": 8.705803056362133e-06, "loss": 0.0019, "step": 94500 }, { "epoch": 0.621764044130708, "grad_norm": 0.0755528227340664, "learning_rate": 8.705417615306376e-06, "loss": 0.0011, "step": 94510 }, { "epoch": 0.6218298323059414, "grad_norm": 0.09076198927984398, "learning_rate": 8.705032125397886e-06, "loss": 0.0013, "step": 94520 }, { "epoch": 0.6218956204811747, "grad_norm": 0.05822602944668608, "learning_rate": 8.704646586641747e-06, "loss": 0.0013, "step": 94530 }, { "epoch": 0.6219614086564081, "grad_norm": 0.02963784251505198, "learning_rate": 8.704260999043041e-06, "loss": 0.0014, "step": 94540 }, { "epoch": 0.6220271968316414, "grad_norm": 0.024799511732985603, "learning_rate": 8.703875362606856e-06, "loss": 0.0016, "step": 94550 }, { "epoch": 0.6220929850068748, "grad_norm": 0.0076572300194457385, "learning_rate": 8.70348967733827e-06, "loss": 0.0023, "step": 94560 }, { "epoch": 0.6221587731821082, "grad_norm": 0.008475586325421457, "learning_rate": 8.703103943242373e-06, "loss": 0.002, "step": 94570 }, { "epoch": 0.6222245613573416, "grad_norm": 0.14523647472140952, "learning_rate": 8.702718160324248e-06, "loss": 0.0012, "step": 94580 }, { "epoch": 0.622290349532575, "grad_norm": 0.15202913088906944, "learning_rate": 8.702332328588982e-06, "loss": 0.0023, "step": 94590 }, { "epoch": 0.6223561377078084, "grad_norm": 0.0357085862567364, "learning_rate": 8.701946448041663e-06, "loss": 0.0018, "step": 94600 }, { "epoch": 0.6224219258830418, "grad_norm": 0.03504440031073131, "learning_rate": 8.701560518687375e-06, "loss": 0.001, "step": 94610 }, { "epoch": 0.6224877140582752, "grad_norm": 0.038372947035907365, "learning_rate": 8.70117454053121e-06, "loss": 0.0012, "step": 94620 }, { "epoch": 0.6225535022335086, "grad_norm": 0.0939852613970364, "learning_rate": 8.700788513578255e-06, "loss": 0.0017, "step": 94630 }, { "epoch": 0.622619290408742, "grad_norm": 0.12265443349525586, "learning_rate": 8.7004024378336e-06, "loss": 0.0018, "step": 94640 }, { "epoch": 0.6226850785839754, "grad_norm": 0.028830312995675525, "learning_rate": 8.700016313302336e-06, "loss": 0.0009, "step": 94650 }, { "epoch": 0.6227508667592087, "grad_norm": 0.04643614664471653, "learning_rate": 8.69963013998955e-06, "loss": 0.0013, "step": 94660 }, { "epoch": 0.622816654934442, "grad_norm": 0.026679346974503308, "learning_rate": 8.699243917900335e-06, "loss": 0.0022, "step": 94670 }, { "epoch": 0.6228824431096754, "grad_norm": 0.002738016679347658, "learning_rate": 8.698857647039786e-06, "loss": 0.0018, "step": 94680 }, { "epoch": 0.6229482312849088, "grad_norm": 0.042580455990736, "learning_rate": 8.698471327412993e-06, "loss": 0.003, "step": 94690 }, { "epoch": 0.6230140194601422, "grad_norm": 0.029303129848170933, "learning_rate": 8.69808495902505e-06, "loss": 0.0015, "step": 94700 }, { "epoch": 0.6230798076353756, "grad_norm": 0.024240013745408816, "learning_rate": 8.697698541881052e-06, "loss": 0.0008, "step": 94710 }, { "epoch": 0.623145595810609, "grad_norm": 0.03240191930964989, "learning_rate": 8.697312075986089e-06, "loss": 0.0013, "step": 94720 }, { "epoch": 0.6232113839858424, "grad_norm": 0.06745498225032284, "learning_rate": 8.696925561345261e-06, "loss": 0.0011, "step": 94730 }, { "epoch": 0.6232771721610758, "grad_norm": 0.015366390331864262, "learning_rate": 8.696538997963664e-06, "loss": 0.0012, "step": 94740 }, { "epoch": 0.6233429603363092, "grad_norm": 0.07898015439870651, "learning_rate": 8.696152385846391e-06, "loss": 0.0022, "step": 94750 }, { "epoch": 0.6234087485115425, "grad_norm": 0.02514610238091578, "learning_rate": 8.69576572499854e-06, "loss": 0.0019, "step": 94760 }, { "epoch": 0.6234745366867759, "grad_norm": 0.26655693988929763, "learning_rate": 8.695379015425211e-06, "loss": 0.0052, "step": 94770 }, { "epoch": 0.6235403248620093, "grad_norm": 0.013709701318786143, "learning_rate": 8.694992257131502e-06, "loss": 0.0015, "step": 94780 }, { "epoch": 0.6236061130372427, "grad_norm": 0.07486453912247658, "learning_rate": 8.69460545012251e-06, "loss": 0.0019, "step": 94790 }, { "epoch": 0.6236719012124761, "grad_norm": 0.028425126389343795, "learning_rate": 8.694218594403338e-06, "loss": 0.0012, "step": 94800 }, { "epoch": 0.6237376893877095, "grad_norm": 0.03824516927131769, "learning_rate": 8.693831689979083e-06, "loss": 0.0027, "step": 94810 }, { "epoch": 0.6238034775629429, "grad_norm": 0.04382244073477562, "learning_rate": 8.693444736854846e-06, "loss": 0.0026, "step": 94820 }, { "epoch": 0.6238692657381762, "grad_norm": 0.01880884543091112, "learning_rate": 8.693057735035732e-06, "loss": 0.0035, "step": 94830 }, { "epoch": 0.6239350539134096, "grad_norm": 0.018936986744890142, "learning_rate": 8.69267068452684e-06, "loss": 0.0021, "step": 94840 }, { "epoch": 0.624000842088643, "grad_norm": 0.11315423642143999, "learning_rate": 8.692283585333275e-06, "loss": 0.0024, "step": 94850 }, { "epoch": 0.6240666302638763, "grad_norm": 0.005207538171900327, "learning_rate": 8.691896437460142e-06, "loss": 0.0018, "step": 94860 }, { "epoch": 0.6241324184391097, "grad_norm": 0.0645147783098664, "learning_rate": 8.69150924091254e-06, "loss": 0.0016, "step": 94870 }, { "epoch": 0.6241982066143431, "grad_norm": 0.027752523979186542, "learning_rate": 8.691121995695577e-06, "loss": 0.0023, "step": 94880 }, { "epoch": 0.6242639947895765, "grad_norm": 0.06657482226240943, "learning_rate": 8.690734701814359e-06, "loss": 0.0012, "step": 94890 }, { "epoch": 0.6243297829648099, "grad_norm": 0.04164009518002008, "learning_rate": 8.690347359273992e-06, "loss": 0.002, "step": 94900 }, { "epoch": 0.6243955711400433, "grad_norm": 0.0408327636040812, "learning_rate": 8.68995996807958e-06, "loss": 0.0057, "step": 94910 }, { "epoch": 0.6244613593152767, "grad_norm": 0.00918735647953522, "learning_rate": 8.689572528236235e-06, "loss": 0.0009, "step": 94920 }, { "epoch": 0.6245271474905101, "grad_norm": 0.04093913694120169, "learning_rate": 8.689185039749061e-06, "loss": 0.0005, "step": 94930 }, { "epoch": 0.6245929356657435, "grad_norm": 0.1351710026329368, "learning_rate": 8.68879750262317e-06, "loss": 0.0015, "step": 94940 }, { "epoch": 0.6246587238409769, "grad_norm": 0.13319698526344834, "learning_rate": 8.688409916863671e-06, "loss": 0.0016, "step": 94950 }, { "epoch": 0.6247245120162102, "grad_norm": 0.1252975607183452, "learning_rate": 8.68802228247567e-06, "loss": 0.0014, "step": 94960 }, { "epoch": 0.6247903001914435, "grad_norm": 0.008817106484753128, "learning_rate": 8.687634599464281e-06, "loss": 0.0022, "step": 94970 }, { "epoch": 0.6248560883666769, "grad_norm": 0.003400658954961763, "learning_rate": 8.687246867834616e-06, "loss": 0.001, "step": 94980 }, { "epoch": 0.6249218765419103, "grad_norm": 0.006809342590949, "learning_rate": 8.686859087591785e-06, "loss": 0.0019, "step": 94990 }, { "epoch": 0.6249876647171437, "grad_norm": 0.08359889033852455, "learning_rate": 8.686471258740901e-06, "loss": 0.002, "step": 95000 }, { "epoch": 0.6250534528923771, "grad_norm": 0.023611084105601088, "learning_rate": 8.686083381287077e-06, "loss": 0.002, "step": 95010 }, { "epoch": 0.6251192410676105, "grad_norm": 0.068553255208004, "learning_rate": 8.685695455235427e-06, "loss": 0.0021, "step": 95020 }, { "epoch": 0.6251850292428439, "grad_norm": 0.010451518788937403, "learning_rate": 8.685307480591065e-06, "loss": 0.0019, "step": 95030 }, { "epoch": 0.6252508174180773, "grad_norm": 0.011579322760892548, "learning_rate": 8.684919457359108e-06, "loss": 0.0013, "step": 95040 }, { "epoch": 0.6253166055933107, "grad_norm": 0.018910443504888786, "learning_rate": 8.68453138554467e-06, "loss": 0.0009, "step": 95050 }, { "epoch": 0.625382393768544, "grad_norm": 0.1417669042784636, "learning_rate": 8.684143265152868e-06, "loss": 0.0015, "step": 95060 }, { "epoch": 0.6254481819437774, "grad_norm": 0.014226594147178386, "learning_rate": 8.68375509618882e-06, "loss": 0.0013, "step": 95070 }, { "epoch": 0.6255139701190108, "grad_norm": 0.07829027770876082, "learning_rate": 8.68336687865764e-06, "loss": 0.0022, "step": 95080 }, { "epoch": 0.6255797582942442, "grad_norm": 0.036919959369983935, "learning_rate": 8.68297861256445e-06, "loss": 0.0015, "step": 95090 }, { "epoch": 0.6256455464694776, "grad_norm": 0.0072704248600797185, "learning_rate": 8.682590297914369e-06, "loss": 0.0028, "step": 95100 }, { "epoch": 0.625711334644711, "grad_norm": 0.03154710080996263, "learning_rate": 8.682201934712513e-06, "loss": 0.0017, "step": 95110 }, { "epoch": 0.6257771228199444, "grad_norm": 0.034793137623903604, "learning_rate": 8.681813522964006e-06, "loss": 0.0027, "step": 95120 }, { "epoch": 0.6258429109951777, "grad_norm": 0.08688666254716981, "learning_rate": 8.681425062673966e-06, "loss": 0.0017, "step": 95130 }, { "epoch": 0.6259086991704111, "grad_norm": 0.07549073608751031, "learning_rate": 8.68103655384752e-06, "loss": 0.003, "step": 95140 }, { "epoch": 0.6259744873456445, "grad_norm": 0.15043352123524037, "learning_rate": 8.680647996489785e-06, "loss": 0.0028, "step": 95150 }, { "epoch": 0.6260402755208779, "grad_norm": 0.06351767545246027, "learning_rate": 8.680259390605883e-06, "loss": 0.0018, "step": 95160 }, { "epoch": 0.6261060636961112, "grad_norm": 0.0012073619158027957, "learning_rate": 8.679870736200938e-06, "loss": 0.0021, "step": 95170 }, { "epoch": 0.6261718518713446, "grad_norm": 0.1902188767298189, "learning_rate": 8.679482033280078e-06, "loss": 0.0017, "step": 95180 }, { "epoch": 0.626237640046578, "grad_norm": 0.06073044278453272, "learning_rate": 8.679093281848425e-06, "loss": 0.0014, "step": 95190 }, { "epoch": 0.6263034282218114, "grad_norm": 0.07067683028214324, "learning_rate": 8.678704481911104e-06, "loss": 0.0022, "step": 95200 }, { "epoch": 0.6263692163970448, "grad_norm": 0.05787865173987077, "learning_rate": 8.678315633473241e-06, "loss": 0.0023, "step": 95210 }, { "epoch": 0.6264350045722782, "grad_norm": 0.025027088045976545, "learning_rate": 8.677926736539966e-06, "loss": 0.0009, "step": 95220 }, { "epoch": 0.6265007927475116, "grad_norm": 0.05938774378070011, "learning_rate": 8.677537791116402e-06, "loss": 0.0008, "step": 95230 }, { "epoch": 0.626566580922745, "grad_norm": 0.07281500811307502, "learning_rate": 8.677148797207677e-06, "loss": 0.0024, "step": 95240 }, { "epoch": 0.6266323690979784, "grad_norm": 0.05884208374590829, "learning_rate": 8.676759754818922e-06, "loss": 0.0009, "step": 95250 }, { "epoch": 0.6266981572732118, "grad_norm": 0.06546553624207922, "learning_rate": 8.676370663955265e-06, "loss": 0.0019, "step": 95260 }, { "epoch": 0.626763945448445, "grad_norm": 0.020519518788646735, "learning_rate": 8.675981524621836e-06, "loss": 0.0012, "step": 95270 }, { "epoch": 0.6268297336236784, "grad_norm": 0.09924279325578289, "learning_rate": 8.675592336823766e-06, "loss": 0.0016, "step": 95280 }, { "epoch": 0.6268955217989118, "grad_norm": 0.09734295495396923, "learning_rate": 8.675203100566183e-06, "loss": 0.002, "step": 95290 }, { "epoch": 0.6269613099741452, "grad_norm": 0.052058143972355454, "learning_rate": 8.674813815854224e-06, "loss": 0.0013, "step": 95300 }, { "epoch": 0.6270270981493786, "grad_norm": 0.12275205975099411, "learning_rate": 8.674424482693018e-06, "loss": 0.0015, "step": 95310 }, { "epoch": 0.627092886324612, "grad_norm": 0.02772120763516827, "learning_rate": 8.674035101087698e-06, "loss": 0.0014, "step": 95320 }, { "epoch": 0.6271586744998454, "grad_norm": 0.19735422605975841, "learning_rate": 8.673645671043398e-06, "loss": 0.0018, "step": 95330 }, { "epoch": 0.6272244626750788, "grad_norm": 0.08512290483165999, "learning_rate": 8.673256192565253e-06, "loss": 0.0012, "step": 95340 }, { "epoch": 0.6272902508503122, "grad_norm": 0.16868159170620362, "learning_rate": 8.672866665658397e-06, "loss": 0.0011, "step": 95350 }, { "epoch": 0.6273560390255456, "grad_norm": 0.02762333008925231, "learning_rate": 8.672477090327967e-06, "loss": 0.0012, "step": 95360 }, { "epoch": 0.6274218272007789, "grad_norm": 0.055845860073719805, "learning_rate": 8.672087466579097e-06, "loss": 0.0006, "step": 95370 }, { "epoch": 0.6274876153760123, "grad_norm": 0.05536073487413245, "learning_rate": 8.671697794416928e-06, "loss": 0.0021, "step": 95380 }, { "epoch": 0.6275534035512457, "grad_norm": 0.032231585575848036, "learning_rate": 8.671308073846592e-06, "loss": 0.0014, "step": 95390 }, { "epoch": 0.6276191917264791, "grad_norm": 0.06905479594354408, "learning_rate": 8.670918304873232e-06, "loss": 0.0022, "step": 95400 }, { "epoch": 0.6276849799017125, "grad_norm": 0.03699145160234243, "learning_rate": 8.670528487501984e-06, "loss": 0.0026, "step": 95410 }, { "epoch": 0.6277507680769459, "grad_norm": 0.04344609155599653, "learning_rate": 8.670138621737986e-06, "loss": 0.0015, "step": 95420 }, { "epoch": 0.6278165562521792, "grad_norm": 0.05122159677883846, "learning_rate": 8.669748707586381e-06, "loss": 0.002, "step": 95430 }, { "epoch": 0.6278823444274126, "grad_norm": 0.12940373993673768, "learning_rate": 8.66935874505231e-06, "loss": 0.0014, "step": 95440 }, { "epoch": 0.627948132602646, "grad_norm": 0.02803712635786709, "learning_rate": 8.668968734140911e-06, "loss": 0.0013, "step": 95450 }, { "epoch": 0.6280139207778794, "grad_norm": 0.0664594637156765, "learning_rate": 8.66857867485733e-06, "loss": 0.0011, "step": 95460 }, { "epoch": 0.6280797089531127, "grad_norm": 0.025705482453094626, "learning_rate": 8.668188567206706e-06, "loss": 0.0022, "step": 95470 }, { "epoch": 0.6281454971283461, "grad_norm": 0.03802292125232241, "learning_rate": 8.667798411194184e-06, "loss": 0.0019, "step": 95480 }, { "epoch": 0.6282112853035795, "grad_norm": 0.2133319717185615, "learning_rate": 8.667408206824908e-06, "loss": 0.0018, "step": 95490 }, { "epoch": 0.6282770734788129, "grad_norm": 0.08484951760238965, "learning_rate": 8.667017954104022e-06, "loss": 0.0017, "step": 95500 }, { "epoch": 0.6283428616540463, "grad_norm": 0.01680609428962942, "learning_rate": 8.666627653036671e-06, "loss": 0.0014, "step": 95510 }, { "epoch": 0.6284086498292797, "grad_norm": 0.14785035141216146, "learning_rate": 8.666237303628001e-06, "loss": 0.0013, "step": 95520 }, { "epoch": 0.6284744380045131, "grad_norm": 0.1883493643462571, "learning_rate": 8.66584690588316e-06, "loss": 0.0024, "step": 95530 }, { "epoch": 0.6285402261797465, "grad_norm": 0.1521516811073406, "learning_rate": 8.665456459807292e-06, "loss": 0.0011, "step": 95540 }, { "epoch": 0.6286060143549799, "grad_norm": 0.08097531215555236, "learning_rate": 8.665065965405548e-06, "loss": 0.0024, "step": 95550 }, { "epoch": 0.6286718025302133, "grad_norm": 0.042156147657816744, "learning_rate": 8.664675422683073e-06, "loss": 0.0011, "step": 95560 }, { "epoch": 0.6287375907054467, "grad_norm": 0.041527945276973184, "learning_rate": 8.664284831645017e-06, "loss": 0.0025, "step": 95570 }, { "epoch": 0.6288033788806799, "grad_norm": 0.008195882849421932, "learning_rate": 8.663894192296532e-06, "loss": 0.0026, "step": 95580 }, { "epoch": 0.6288691670559133, "grad_norm": 0.04275602049219066, "learning_rate": 8.663503504642764e-06, "loss": 0.0014, "step": 95590 }, { "epoch": 0.6289349552311467, "grad_norm": 0.0028505927819537746, "learning_rate": 8.663112768688869e-06, "loss": 0.0012, "step": 95600 }, { "epoch": 0.6290007434063801, "grad_norm": 0.03714054405748365, "learning_rate": 8.662721984439994e-06, "loss": 0.0013, "step": 95610 }, { "epoch": 0.6290665315816135, "grad_norm": 0.016593400850607405, "learning_rate": 8.662331151901295e-06, "loss": 0.0015, "step": 95620 }, { "epoch": 0.6291323197568469, "grad_norm": 0.047058201199168384, "learning_rate": 8.661940271077923e-06, "loss": 0.0017, "step": 95630 }, { "epoch": 0.6291981079320803, "grad_norm": 0.025499986745973306, "learning_rate": 8.661549341975028e-06, "loss": 0.0009, "step": 95640 }, { "epoch": 0.6292638961073137, "grad_norm": 0.05091904779139037, "learning_rate": 8.661158364597768e-06, "loss": 0.0016, "step": 95650 }, { "epoch": 0.6293296842825471, "grad_norm": 0.056910077355153875, "learning_rate": 8.6607673389513e-06, "loss": 0.0017, "step": 95660 }, { "epoch": 0.6293954724577805, "grad_norm": 0.00211729636391669, "learning_rate": 8.660376265040774e-06, "loss": 0.0006, "step": 95670 }, { "epoch": 0.6294612606330138, "grad_norm": 0.007890405595855311, "learning_rate": 8.659985142871348e-06, "loss": 0.0014, "step": 95680 }, { "epoch": 0.6295270488082472, "grad_norm": 0.07981251506090023, "learning_rate": 8.65959397244818e-06, "loss": 0.0011, "step": 95690 }, { "epoch": 0.6295928369834806, "grad_norm": 0.09710407047757687, "learning_rate": 8.659202753776424e-06, "loss": 0.001, "step": 95700 }, { "epoch": 0.629658625158714, "grad_norm": 0.09661421594232872, "learning_rate": 8.658811486861242e-06, "loss": 0.0014, "step": 95710 }, { "epoch": 0.6297244133339474, "grad_norm": 0.010472755878640369, "learning_rate": 8.658420171707788e-06, "loss": 0.0017, "step": 95720 }, { "epoch": 0.6297902015091807, "grad_norm": 0.02894517777583503, "learning_rate": 8.658028808321226e-06, "loss": 0.001, "step": 95730 }, { "epoch": 0.6298559896844141, "grad_norm": 0.08533112916308273, "learning_rate": 8.657637396706715e-06, "loss": 0.0014, "step": 95740 }, { "epoch": 0.6299217778596475, "grad_norm": 0.12878647080928463, "learning_rate": 8.657245936869411e-06, "loss": 0.0017, "step": 95750 }, { "epoch": 0.6299875660348809, "grad_norm": 0.00955998480379486, "learning_rate": 8.65685442881448e-06, "loss": 0.0009, "step": 95760 }, { "epoch": 0.6300533542101143, "grad_norm": 0.05043846537901213, "learning_rate": 8.656462872547079e-06, "loss": 0.0012, "step": 95770 }, { "epoch": 0.6301191423853476, "grad_norm": 0.046213552645725485, "learning_rate": 8.656071268072374e-06, "loss": 0.0012, "step": 95780 }, { "epoch": 0.630184930560581, "grad_norm": 0.15167165553843745, "learning_rate": 8.655679615395527e-06, "loss": 0.0017, "step": 95790 }, { "epoch": 0.6302507187358144, "grad_norm": 0.005856498490933502, "learning_rate": 8.655287914521701e-06, "loss": 0.0025, "step": 95800 }, { "epoch": 0.6303165069110478, "grad_norm": 0.10560573971616591, "learning_rate": 8.654896165456063e-06, "loss": 0.0016, "step": 95810 }, { "epoch": 0.6303822950862812, "grad_norm": 0.05478360226380714, "learning_rate": 8.654504368203773e-06, "loss": 0.0027, "step": 95820 }, { "epoch": 0.6304480832615146, "grad_norm": 0.03195005432801229, "learning_rate": 8.654112522770002e-06, "loss": 0.0017, "step": 95830 }, { "epoch": 0.630513871436748, "grad_norm": 0.008283894568870192, "learning_rate": 8.653720629159911e-06, "loss": 0.0016, "step": 95840 }, { "epoch": 0.6305796596119814, "grad_norm": 0.16137745120069916, "learning_rate": 8.653328687378669e-06, "loss": 0.0014, "step": 95850 }, { "epoch": 0.6306454477872148, "grad_norm": 0.12909855387881794, "learning_rate": 8.652936697431445e-06, "loss": 0.0016, "step": 95860 }, { "epoch": 0.6307112359624482, "grad_norm": 0.09388049711258853, "learning_rate": 8.652544659323405e-06, "loss": 0.001, "step": 95870 }, { "epoch": 0.6307770241376814, "grad_norm": 0.042014501113074795, "learning_rate": 8.652152573059717e-06, "loss": 0.0024, "step": 95880 }, { "epoch": 0.6308428123129148, "grad_norm": 0.09291005079704057, "learning_rate": 8.651760438645554e-06, "loss": 0.0021, "step": 95890 }, { "epoch": 0.6309086004881482, "grad_norm": 0.0007622005634055085, "learning_rate": 8.65136825608608e-06, "loss": 0.0016, "step": 95900 }, { "epoch": 0.6309743886633816, "grad_norm": 0.012033450955160067, "learning_rate": 8.650976025386472e-06, "loss": 0.0018, "step": 95910 }, { "epoch": 0.631040176838615, "grad_norm": 0.013241526472534558, "learning_rate": 8.650583746551897e-06, "loss": 0.0011, "step": 95920 }, { "epoch": 0.6311059650138484, "grad_norm": 0.05893667227016251, "learning_rate": 8.650191419587528e-06, "loss": 0.0011, "step": 95930 }, { "epoch": 0.6311717531890818, "grad_norm": 0.040144619905630674, "learning_rate": 8.649799044498539e-06, "loss": 0.0026, "step": 95940 }, { "epoch": 0.6312375413643152, "grad_norm": 0.13845964118750922, "learning_rate": 8.6494066212901e-06, "loss": 0.0013, "step": 95950 }, { "epoch": 0.6313033295395486, "grad_norm": 0.07938966279261937, "learning_rate": 8.649014149967388e-06, "loss": 0.0016, "step": 95960 }, { "epoch": 0.631369117714782, "grad_norm": 0.001431224559178126, "learning_rate": 8.648621630535574e-06, "loss": 0.0013, "step": 95970 }, { "epoch": 0.6314349058900153, "grad_norm": 0.09296224174404003, "learning_rate": 8.648229062999838e-06, "loss": 0.0015, "step": 95980 }, { "epoch": 0.6315006940652487, "grad_norm": 0.10243523418153883, "learning_rate": 8.64783644736535e-06, "loss": 0.0008, "step": 95990 }, { "epoch": 0.6315664822404821, "grad_norm": 0.01045033661279998, "learning_rate": 8.64744378363729e-06, "loss": 0.0012, "step": 96000 }, { "epoch": 0.6316322704157155, "grad_norm": 0.05401842296658033, "learning_rate": 8.647051071820833e-06, "loss": 0.0012, "step": 96010 }, { "epoch": 0.6316980585909489, "grad_norm": 0.017368132258395477, "learning_rate": 8.646658311921158e-06, "loss": 0.0017, "step": 96020 }, { "epoch": 0.6317638467661822, "grad_norm": 0.020150783735993494, "learning_rate": 8.646265503943443e-06, "loss": 0.0015, "step": 96030 }, { "epoch": 0.6318296349414156, "grad_norm": 0.1291844238154169, "learning_rate": 8.645872647892865e-06, "loss": 0.0016, "step": 96040 }, { "epoch": 0.631895423116649, "grad_norm": 0.12883362657069974, "learning_rate": 8.645479743774608e-06, "loss": 0.0011, "step": 96050 }, { "epoch": 0.6319612112918824, "grad_norm": 0.05172605845095895, "learning_rate": 8.645086791593846e-06, "loss": 0.0016, "step": 96060 }, { "epoch": 0.6320269994671158, "grad_norm": 0.081242061036509, "learning_rate": 8.644693791355765e-06, "loss": 0.0008, "step": 96070 }, { "epoch": 0.6320927876423492, "grad_norm": 0.07068083476464641, "learning_rate": 8.644300743065543e-06, "loss": 0.0019, "step": 96080 }, { "epoch": 0.6321585758175825, "grad_norm": 0.06312728962000916, "learning_rate": 8.643907646728362e-06, "loss": 0.0024, "step": 96090 }, { "epoch": 0.6322243639928159, "grad_norm": 0.1189735281807863, "learning_rate": 8.643514502349408e-06, "loss": 0.0014, "step": 96100 }, { "epoch": 0.6322901521680493, "grad_norm": 0.0922238671393944, "learning_rate": 8.64312130993386e-06, "loss": 0.002, "step": 96110 }, { "epoch": 0.6323559403432827, "grad_norm": 0.08890606847364539, "learning_rate": 8.642728069486905e-06, "loss": 0.0027, "step": 96120 }, { "epoch": 0.6324217285185161, "grad_norm": 0.0644987068021449, "learning_rate": 8.642334781013725e-06, "loss": 0.0026, "step": 96130 }, { "epoch": 0.6324875166937495, "grad_norm": 0.0796059714983819, "learning_rate": 8.641941444519508e-06, "loss": 0.0017, "step": 96140 }, { "epoch": 0.6325533048689829, "grad_norm": 0.09663489484480621, "learning_rate": 8.64154806000944e-06, "loss": 0.0017, "step": 96150 }, { "epoch": 0.6326190930442163, "grad_norm": 0.05174045259491552, "learning_rate": 8.641154627488704e-06, "loss": 0.0009, "step": 96160 }, { "epoch": 0.6326848812194497, "grad_norm": 0.004652977360122576, "learning_rate": 8.640761146962488e-06, "loss": 0.0018, "step": 96170 }, { "epoch": 0.632750669394683, "grad_norm": 0.020481150409890722, "learning_rate": 8.640367618435983e-06, "loss": 0.001, "step": 96180 }, { "epoch": 0.6328164575699163, "grad_norm": 0.03098976035392344, "learning_rate": 8.639974041914374e-06, "loss": 0.0018, "step": 96190 }, { "epoch": 0.6328822457451497, "grad_norm": 0.01149676922949202, "learning_rate": 8.63958041740285e-06, "loss": 0.0011, "step": 96200 }, { "epoch": 0.6329480339203831, "grad_norm": 0.05714689095026183, "learning_rate": 8.639186744906602e-06, "loss": 0.0016, "step": 96210 }, { "epoch": 0.6330138220956165, "grad_norm": 0.026944229635117608, "learning_rate": 8.63879302443082e-06, "loss": 0.0011, "step": 96220 }, { "epoch": 0.6330796102708499, "grad_norm": 0.07166558977572904, "learning_rate": 8.638399255980694e-06, "loss": 0.0017, "step": 96230 }, { "epoch": 0.6331453984460833, "grad_norm": 0.2461819673168261, "learning_rate": 8.638005439561417e-06, "loss": 0.0024, "step": 96240 }, { "epoch": 0.6332111866213167, "grad_norm": 0.02295263591322084, "learning_rate": 8.637611575178182e-06, "loss": 0.0023, "step": 96250 }, { "epoch": 0.6332769747965501, "grad_norm": 0.10258641039441474, "learning_rate": 8.637217662836177e-06, "loss": 0.001, "step": 96260 }, { "epoch": 0.6333427629717835, "grad_norm": 0.05930652102136519, "learning_rate": 8.6368237025406e-06, "loss": 0.0008, "step": 96270 }, { "epoch": 0.6334085511470169, "grad_norm": 0.01743918666450793, "learning_rate": 8.636429694296642e-06, "loss": 0.0025, "step": 96280 }, { "epoch": 0.6334743393222502, "grad_norm": 0.03746940355721529, "learning_rate": 8.636035638109502e-06, "loss": 0.0017, "step": 96290 }, { "epoch": 0.6335401274974836, "grad_norm": 0.012975437750336407, "learning_rate": 8.635641533984369e-06, "loss": 0.0012, "step": 96300 }, { "epoch": 0.633605915672717, "grad_norm": 0.017566015594235557, "learning_rate": 8.635247381926445e-06, "loss": 0.0012, "step": 96310 }, { "epoch": 0.6336717038479504, "grad_norm": 0.10775423487375577, "learning_rate": 8.634853181940923e-06, "loss": 0.0025, "step": 96320 }, { "epoch": 0.6337374920231837, "grad_norm": 0.05490012733702605, "learning_rate": 8.634458934032999e-06, "loss": 0.0006, "step": 96330 }, { "epoch": 0.6338032801984171, "grad_norm": 0.0755927470525296, "learning_rate": 8.634064638207874e-06, "loss": 0.0017, "step": 96340 }, { "epoch": 0.6338690683736505, "grad_norm": 0.0017112052301462223, "learning_rate": 8.633670294470748e-06, "loss": 0.0014, "step": 96350 }, { "epoch": 0.6339348565488839, "grad_norm": 0.09424311842203748, "learning_rate": 8.633275902826813e-06, "loss": 0.0011, "step": 96360 }, { "epoch": 0.6340006447241173, "grad_norm": 0.18623707379188012, "learning_rate": 8.632881463281277e-06, "loss": 0.0013, "step": 96370 }, { "epoch": 0.6340664328993507, "grad_norm": 0.021848841611601628, "learning_rate": 8.632486975839336e-06, "loss": 0.0011, "step": 96380 }, { "epoch": 0.634132221074584, "grad_norm": 0.029307352272001163, "learning_rate": 8.63209244050619e-06, "loss": 0.0019, "step": 96390 }, { "epoch": 0.6341980092498174, "grad_norm": 0.0987595330845422, "learning_rate": 8.631697857287043e-06, "loss": 0.0026, "step": 96400 }, { "epoch": 0.6342637974250508, "grad_norm": 0.05296707683426351, "learning_rate": 8.631303226187096e-06, "loss": 0.0015, "step": 96410 }, { "epoch": 0.6343295856002842, "grad_norm": 0.02467889002431228, "learning_rate": 8.63090854721155e-06, "loss": 0.0013, "step": 96420 }, { "epoch": 0.6343953737755176, "grad_norm": 0.11367704045028763, "learning_rate": 8.630513820365613e-06, "loss": 0.0017, "step": 96430 }, { "epoch": 0.634461161950751, "grad_norm": 0.14089891549205352, "learning_rate": 8.630119045654487e-06, "loss": 0.0015, "step": 96440 }, { "epoch": 0.6345269501259844, "grad_norm": 0.05412682855506702, "learning_rate": 8.629724223083376e-06, "loss": 0.0012, "step": 96450 }, { "epoch": 0.6345927383012178, "grad_norm": 0.1712330698153167, "learning_rate": 8.629329352657485e-06, "loss": 0.0037, "step": 96460 }, { "epoch": 0.6346585264764512, "grad_norm": 0.05289900393239038, "learning_rate": 8.628934434382022e-06, "loss": 0.002, "step": 96470 }, { "epoch": 0.6347243146516846, "grad_norm": 0.10657536588800455, "learning_rate": 8.628539468262192e-06, "loss": 0.0046, "step": 96480 }, { "epoch": 0.6347901028269178, "grad_norm": 0.06217837549193676, "learning_rate": 8.628144454303204e-06, "loss": 0.002, "step": 96490 }, { "epoch": 0.6348558910021512, "grad_norm": 0.00022387929221061117, "learning_rate": 8.627749392510265e-06, "loss": 0.0022, "step": 96500 }, { "epoch": 0.6349216791773846, "grad_norm": 0.02596261049985573, "learning_rate": 8.627354282888583e-06, "loss": 0.0017, "step": 96510 }, { "epoch": 0.634987467352618, "grad_norm": 0.03235771503852226, "learning_rate": 8.626959125443365e-06, "loss": 0.0014, "step": 96520 }, { "epoch": 0.6350532555278514, "grad_norm": 0.030215018718074214, "learning_rate": 8.626563920179826e-06, "loss": 0.0011, "step": 96530 }, { "epoch": 0.6351190437030848, "grad_norm": 0.049016264761436265, "learning_rate": 8.626168667103174e-06, "loss": 0.0011, "step": 96540 }, { "epoch": 0.6351848318783182, "grad_norm": 0.11759313935836549, "learning_rate": 8.625773366218619e-06, "loss": 0.0017, "step": 96550 }, { "epoch": 0.6352506200535516, "grad_norm": 0.07941883543707971, "learning_rate": 8.625378017531373e-06, "loss": 0.002, "step": 96560 }, { "epoch": 0.635316408228785, "grad_norm": 0.03659954365561745, "learning_rate": 8.624982621046651e-06, "loss": 0.002, "step": 96570 }, { "epoch": 0.6353821964040184, "grad_norm": 0.0149523811757161, "learning_rate": 8.624587176769664e-06, "loss": 0.0011, "step": 96580 }, { "epoch": 0.6354479845792518, "grad_norm": 0.040386325690936725, "learning_rate": 8.624191684705624e-06, "loss": 0.0017, "step": 96590 }, { "epoch": 0.6355137727544851, "grad_norm": 0.15805916466001668, "learning_rate": 8.623796144859747e-06, "loss": 0.0016, "step": 96600 }, { "epoch": 0.6355795609297185, "grad_norm": 0.0293859542678482, "learning_rate": 8.623400557237248e-06, "loss": 0.0009, "step": 96610 }, { "epoch": 0.6356453491049519, "grad_norm": 0.0524274695494786, "learning_rate": 8.62300492184334e-06, "loss": 0.0028, "step": 96620 }, { "epoch": 0.6357111372801852, "grad_norm": 0.002731469982406882, "learning_rate": 8.622609238683243e-06, "loss": 0.001, "step": 96630 }, { "epoch": 0.6357769254554186, "grad_norm": 0.04453464909767992, "learning_rate": 8.622213507762171e-06, "loss": 0.003, "step": 96640 }, { "epoch": 0.635842713630652, "grad_norm": 0.05747751430571358, "learning_rate": 8.621817729085342e-06, "loss": 0.0007, "step": 96650 }, { "epoch": 0.6359085018058854, "grad_norm": 0.061471134765726035, "learning_rate": 8.621421902657976e-06, "loss": 0.0017, "step": 96660 }, { "epoch": 0.6359742899811188, "grad_norm": 0.05991893970954149, "learning_rate": 8.621026028485288e-06, "loss": 0.0009, "step": 96670 }, { "epoch": 0.6360400781563522, "grad_norm": 0.09750036576849058, "learning_rate": 8.6206301065725e-06, "loss": 0.0018, "step": 96680 }, { "epoch": 0.6361058663315856, "grad_norm": 0.10610052743070517, "learning_rate": 8.620234136924831e-06, "loss": 0.0025, "step": 96690 }, { "epoch": 0.6361716545068189, "grad_norm": 0.005456238597366089, "learning_rate": 8.619838119547502e-06, "loss": 0.0011, "step": 96700 }, { "epoch": 0.6362374426820523, "grad_norm": 0.03945151920803906, "learning_rate": 8.619442054445732e-06, "loss": 0.002, "step": 96710 }, { "epoch": 0.6363032308572857, "grad_norm": 0.048931250123867454, "learning_rate": 8.619045941624747e-06, "loss": 0.0009, "step": 96720 }, { "epoch": 0.6363690190325191, "grad_norm": 0.16643479771384442, "learning_rate": 8.618649781089765e-06, "loss": 0.0031, "step": 96730 }, { "epoch": 0.6364348072077525, "grad_norm": 0.049245669518334104, "learning_rate": 8.618253572846012e-06, "loss": 0.0012, "step": 96740 }, { "epoch": 0.6365005953829859, "grad_norm": 0.05633608971059817, "learning_rate": 8.61785731689871e-06, "loss": 0.0013, "step": 96750 }, { "epoch": 0.6365663835582193, "grad_norm": 0.039154596135231694, "learning_rate": 8.617461013253084e-06, "loss": 0.0015, "step": 96760 }, { "epoch": 0.6366321717334527, "grad_norm": 0.0482078228433754, "learning_rate": 8.617064661914358e-06, "loss": 0.0024, "step": 96770 }, { "epoch": 0.636697959908686, "grad_norm": 0.05025034472447353, "learning_rate": 8.61666826288776e-06, "loss": 0.0012, "step": 96780 }, { "epoch": 0.6367637480839194, "grad_norm": 0.051129146992283415, "learning_rate": 8.616271816178513e-06, "loss": 0.0022, "step": 96790 }, { "epoch": 0.6368295362591527, "grad_norm": 0.0020949215143580627, "learning_rate": 8.615875321791847e-06, "loss": 0.0016, "step": 96800 }, { "epoch": 0.6368953244343861, "grad_norm": 0.02129467295729483, "learning_rate": 8.615478779732987e-06, "loss": 0.0014, "step": 96810 }, { "epoch": 0.6369611126096195, "grad_norm": 0.06320141366048261, "learning_rate": 8.61508219000716e-06, "loss": 0.0017, "step": 96820 }, { "epoch": 0.6370269007848529, "grad_norm": 0.08693993713421432, "learning_rate": 8.614685552619599e-06, "loss": 0.0019, "step": 96830 }, { "epoch": 0.6370926889600863, "grad_norm": 0.15321529077019372, "learning_rate": 8.61428886757553e-06, "loss": 0.0027, "step": 96840 }, { "epoch": 0.6371584771353197, "grad_norm": 0.07054086778958286, "learning_rate": 8.613892134880184e-06, "loss": 0.0029, "step": 96850 }, { "epoch": 0.6372242653105531, "grad_norm": 0.2133503944464841, "learning_rate": 8.61349535453879e-06, "loss": 0.0032, "step": 96860 }, { "epoch": 0.6372900534857865, "grad_norm": 0.032331456844639166, "learning_rate": 8.61309852655658e-06, "loss": 0.0013, "step": 96870 }, { "epoch": 0.6373558416610199, "grad_norm": 0.3856666773037703, "learning_rate": 8.612701650938788e-06, "loss": 0.0024, "step": 96880 }, { "epoch": 0.6374216298362533, "grad_norm": 0.14123423970083906, "learning_rate": 8.612304727690647e-06, "loss": 0.0006, "step": 96890 }, { "epoch": 0.6374874180114866, "grad_norm": 0.11388677165404824, "learning_rate": 8.611907756817383e-06, "loss": 0.0018, "step": 96900 }, { "epoch": 0.63755320618672, "grad_norm": 0.022637437866415304, "learning_rate": 8.611510738324239e-06, "loss": 0.0014, "step": 96910 }, { "epoch": 0.6376189943619534, "grad_norm": 0.06502286444013128, "learning_rate": 8.611113672216443e-06, "loss": 0.0033, "step": 96920 }, { "epoch": 0.6376847825371867, "grad_norm": 0.08049102395340328, "learning_rate": 8.610716558499233e-06, "loss": 0.0014, "step": 96930 }, { "epoch": 0.6377505707124201, "grad_norm": 0.05788682140334235, "learning_rate": 8.610319397177842e-06, "loss": 0.0012, "step": 96940 }, { "epoch": 0.6378163588876535, "grad_norm": 0.021333456575792345, "learning_rate": 8.60992218825751e-06, "loss": 0.001, "step": 96950 }, { "epoch": 0.6378821470628869, "grad_norm": 0.04562644593899427, "learning_rate": 8.60952493174347e-06, "loss": 0.0013, "step": 96960 }, { "epoch": 0.6379479352381203, "grad_norm": 0.0762273200734701, "learning_rate": 8.609127627640962e-06, "loss": 0.0014, "step": 96970 }, { "epoch": 0.6380137234133537, "grad_norm": 0.13077454366685665, "learning_rate": 8.608730275955223e-06, "loss": 0.0053, "step": 96980 }, { "epoch": 0.6380795115885871, "grad_norm": 0.06802457498980168, "learning_rate": 8.608332876691492e-06, "loss": 0.0012, "step": 96990 }, { "epoch": 0.6381452997638205, "grad_norm": 0.05071180535341055, "learning_rate": 8.607935429855008e-06, "loss": 0.001, "step": 97000 }, { "epoch": 0.6382110879390538, "grad_norm": 0.43551807982140517, "learning_rate": 8.607537935451011e-06, "loss": 0.0013, "step": 97010 }, { "epoch": 0.6382768761142872, "grad_norm": 0.00620564749712428, "learning_rate": 8.607140393484743e-06, "loss": 0.0032, "step": 97020 }, { "epoch": 0.6383426642895206, "grad_norm": 0.11463758944176458, "learning_rate": 8.606742803961444e-06, "loss": 0.0025, "step": 97030 }, { "epoch": 0.638408452464754, "grad_norm": 0.004571359796570299, "learning_rate": 8.606345166886355e-06, "loss": 0.0013, "step": 97040 }, { "epoch": 0.6384742406399874, "grad_norm": 0.06693456846337618, "learning_rate": 8.605947482264722e-06, "loss": 0.002, "step": 97050 }, { "epoch": 0.6385400288152208, "grad_norm": 0.06599916375107458, "learning_rate": 8.605549750101785e-06, "loss": 0.0015, "step": 97060 }, { "epoch": 0.6386058169904542, "grad_norm": 0.053199444209927824, "learning_rate": 8.605151970402787e-06, "loss": 0.0013, "step": 97070 }, { "epoch": 0.6386716051656876, "grad_norm": 0.025643519061985254, "learning_rate": 8.604754143172975e-06, "loss": 0.0017, "step": 97080 }, { "epoch": 0.638737393340921, "grad_norm": 0.06850521790111945, "learning_rate": 8.604356268417594e-06, "loss": 0.0008, "step": 97090 }, { "epoch": 0.6388031815161543, "grad_norm": 0.025788107893017123, "learning_rate": 8.603958346141887e-06, "loss": 0.001, "step": 97100 }, { "epoch": 0.6388689696913876, "grad_norm": 0.15704385611455532, "learning_rate": 8.603560376351101e-06, "loss": 0.001, "step": 97110 }, { "epoch": 0.638934757866621, "grad_norm": 0.006825991665510941, "learning_rate": 8.603162359050485e-06, "loss": 0.0013, "step": 97120 }, { "epoch": 0.6390005460418544, "grad_norm": 0.05913052036022253, "learning_rate": 8.602764294245285e-06, "loss": 0.0023, "step": 97130 }, { "epoch": 0.6390663342170878, "grad_norm": 0.01743723983886808, "learning_rate": 8.602366181940749e-06, "loss": 0.001, "step": 97140 }, { "epoch": 0.6391321223923212, "grad_norm": 0.05872935377864085, "learning_rate": 8.601968022142126e-06, "loss": 0.0025, "step": 97150 }, { "epoch": 0.6391979105675546, "grad_norm": 0.09154604422306455, "learning_rate": 8.601569814854666e-06, "loss": 0.0018, "step": 97160 }, { "epoch": 0.639263698742788, "grad_norm": 0.16696141601341205, "learning_rate": 8.601171560083618e-06, "loss": 0.0015, "step": 97170 }, { "epoch": 0.6393294869180214, "grad_norm": 0.14021900554269498, "learning_rate": 8.600773257834234e-06, "loss": 0.0029, "step": 97180 }, { "epoch": 0.6393952750932548, "grad_norm": 0.024762946476127878, "learning_rate": 8.600374908111763e-06, "loss": 0.0017, "step": 97190 }, { "epoch": 0.6394610632684882, "grad_norm": 0.06852870184460644, "learning_rate": 8.599976510921461e-06, "loss": 0.0011, "step": 97200 }, { "epoch": 0.6395268514437215, "grad_norm": 0.06915496541196553, "learning_rate": 8.599578066268576e-06, "loss": 0.0011, "step": 97210 }, { "epoch": 0.6395926396189549, "grad_norm": 0.027425190518367684, "learning_rate": 8.599179574158363e-06, "loss": 0.0019, "step": 97220 }, { "epoch": 0.6396584277941882, "grad_norm": 0.0006819057692411236, "learning_rate": 8.598781034596076e-06, "loss": 0.002, "step": 97230 }, { "epoch": 0.6397242159694216, "grad_norm": 0.020388821880051134, "learning_rate": 8.598382447586968e-06, "loss": 0.0018, "step": 97240 }, { "epoch": 0.639790004144655, "grad_norm": 0.04409569556992166, "learning_rate": 8.597983813136296e-06, "loss": 0.0016, "step": 97250 }, { "epoch": 0.6398557923198884, "grad_norm": 0.13269523199740113, "learning_rate": 8.597585131249315e-06, "loss": 0.0016, "step": 97260 }, { "epoch": 0.6399215804951218, "grad_norm": 0.03662589051802548, "learning_rate": 8.597186401931282e-06, "loss": 0.0012, "step": 97270 }, { "epoch": 0.6399873686703552, "grad_norm": 0.051131003659121606, "learning_rate": 8.596787625187453e-06, "loss": 0.0023, "step": 97280 }, { "epoch": 0.6400531568455886, "grad_norm": 0.11800074805464812, "learning_rate": 8.596388801023083e-06, "loss": 0.0027, "step": 97290 }, { "epoch": 0.640118945020822, "grad_norm": 0.09310584864217591, "learning_rate": 8.595989929443436e-06, "loss": 0.0027, "step": 97300 }, { "epoch": 0.6401847331960553, "grad_norm": 0.001265952938989482, "learning_rate": 8.595591010453765e-06, "loss": 0.0016, "step": 97310 }, { "epoch": 0.6402505213712887, "grad_norm": 0.09579059084261313, "learning_rate": 8.595192044059334e-06, "loss": 0.0014, "step": 97320 }, { "epoch": 0.6403163095465221, "grad_norm": 0.03621406183213854, "learning_rate": 8.594793030265399e-06, "loss": 0.0028, "step": 97330 }, { "epoch": 0.6403820977217555, "grad_norm": 0.04262332338446606, "learning_rate": 8.594393969077223e-06, "loss": 0.0008, "step": 97340 }, { "epoch": 0.6404478858969889, "grad_norm": 0.036525341186374474, "learning_rate": 8.593994860500068e-06, "loss": 0.0012, "step": 97350 }, { "epoch": 0.6405136740722223, "grad_norm": 0.23174185667537542, "learning_rate": 8.593595704539193e-06, "loss": 0.0017, "step": 97360 }, { "epoch": 0.6405794622474557, "grad_norm": 0.013098080320182219, "learning_rate": 8.593196501199863e-06, "loss": 0.0005, "step": 97370 }, { "epoch": 0.640645250422689, "grad_norm": 0.07217089535396533, "learning_rate": 8.592797250487341e-06, "loss": 0.0024, "step": 97380 }, { "epoch": 0.6407110385979224, "grad_norm": 0.022709798171992923, "learning_rate": 8.59239795240689e-06, "loss": 0.0011, "step": 97390 }, { "epoch": 0.6407768267731558, "grad_norm": 0.07383822956510822, "learning_rate": 8.591998606963774e-06, "loss": 0.0019, "step": 97400 }, { "epoch": 0.6408426149483891, "grad_norm": 0.15337817007285676, "learning_rate": 8.59159921416326e-06, "loss": 0.0021, "step": 97410 }, { "epoch": 0.6409084031236225, "grad_norm": 0.024979162229326203, "learning_rate": 8.59119977401061e-06, "loss": 0.0025, "step": 97420 }, { "epoch": 0.6409741912988559, "grad_norm": 0.04772369837087863, "learning_rate": 8.590800286511094e-06, "loss": 0.0021, "step": 97430 }, { "epoch": 0.6410399794740893, "grad_norm": 0.021574051498619178, "learning_rate": 8.590400751669978e-06, "loss": 0.0011, "step": 97440 }, { "epoch": 0.6411057676493227, "grad_norm": 0.011170926662422295, "learning_rate": 8.590001169492528e-06, "loss": 0.001, "step": 97450 }, { "epoch": 0.6411715558245561, "grad_norm": 0.033854709239353256, "learning_rate": 8.589601539984013e-06, "loss": 0.002, "step": 97460 }, { "epoch": 0.6412373439997895, "grad_norm": 0.00922897424463594, "learning_rate": 8.589201863149702e-06, "loss": 0.0011, "step": 97470 }, { "epoch": 0.6413031321750229, "grad_norm": 0.05026578294302038, "learning_rate": 8.588802138994865e-06, "loss": 0.0009, "step": 97480 }, { "epoch": 0.6413689203502563, "grad_norm": 0.1732332633801434, "learning_rate": 8.588402367524771e-06, "loss": 0.0012, "step": 97490 }, { "epoch": 0.6414347085254897, "grad_norm": 0.022544645556397323, "learning_rate": 8.58800254874469e-06, "loss": 0.0026, "step": 97500 }, { "epoch": 0.6415004967007231, "grad_norm": 0.139102532505378, "learning_rate": 8.587602682659895e-06, "loss": 0.0009, "step": 97510 }, { "epoch": 0.6415662848759563, "grad_norm": 0.008047435160662326, "learning_rate": 8.587202769275657e-06, "loss": 0.0005, "step": 97520 }, { "epoch": 0.6416320730511897, "grad_norm": 0.01647672224774159, "learning_rate": 8.586802808597248e-06, "loss": 0.0032, "step": 97530 }, { "epoch": 0.6416978612264231, "grad_norm": 0.07944358366394198, "learning_rate": 8.586402800629943e-06, "loss": 0.0015, "step": 97540 }, { "epoch": 0.6417636494016565, "grad_norm": 0.00617170406497807, "learning_rate": 8.586002745379015e-06, "loss": 0.0013, "step": 97550 }, { "epoch": 0.6418294375768899, "grad_norm": 0.02435905489776554, "learning_rate": 8.585602642849735e-06, "loss": 0.0012, "step": 97560 }, { "epoch": 0.6418952257521233, "grad_norm": 0.13572476042465217, "learning_rate": 8.585202493047381e-06, "loss": 0.0013, "step": 97570 }, { "epoch": 0.6419610139273567, "grad_norm": 0.03409257938597336, "learning_rate": 8.584802295977232e-06, "loss": 0.0024, "step": 97580 }, { "epoch": 0.6420268021025901, "grad_norm": 0.042232265158080616, "learning_rate": 8.584402051644558e-06, "loss": 0.0014, "step": 97590 }, { "epoch": 0.6420925902778235, "grad_norm": 0.10569163647438022, "learning_rate": 8.58400176005464e-06, "loss": 0.0021, "step": 97600 }, { "epoch": 0.6421583784530569, "grad_norm": 0.017974534995846735, "learning_rate": 8.583601421212752e-06, "loss": 0.0014, "step": 97610 }, { "epoch": 0.6422241666282902, "grad_norm": 0.08523930568200958, "learning_rate": 8.583201035124178e-06, "loss": 0.0019, "step": 97620 }, { "epoch": 0.6422899548035236, "grad_norm": 0.06170928293664857, "learning_rate": 8.58280060179419e-06, "loss": 0.0013, "step": 97630 }, { "epoch": 0.642355742978757, "grad_norm": 0.10578991338750143, "learning_rate": 8.582400121228071e-06, "loss": 0.0017, "step": 97640 }, { "epoch": 0.6424215311539904, "grad_norm": 0.10517104529176857, "learning_rate": 8.5819995934311e-06, "loss": 0.0015, "step": 97650 }, { "epoch": 0.6424873193292238, "grad_norm": 0.06386398806034317, "learning_rate": 8.58159901840856e-06, "loss": 0.0019, "step": 97660 }, { "epoch": 0.6425531075044572, "grad_norm": 0.023694182449966978, "learning_rate": 8.581198396165729e-06, "loss": 0.0011, "step": 97670 }, { "epoch": 0.6426188956796905, "grad_norm": 0.05645413303398922, "learning_rate": 8.58079772670789e-06, "loss": 0.0034, "step": 97680 }, { "epoch": 0.6426846838549239, "grad_norm": 0.06515024149247507, "learning_rate": 8.580397010040324e-06, "loss": 0.0021, "step": 97690 }, { "epoch": 0.6427504720301573, "grad_norm": 0.02422600535044063, "learning_rate": 8.579996246168318e-06, "loss": 0.0009, "step": 97700 }, { "epoch": 0.6428162602053907, "grad_norm": 0.013644989166045616, "learning_rate": 8.579595435097152e-06, "loss": 0.001, "step": 97710 }, { "epoch": 0.642882048380624, "grad_norm": 0.012303270659910347, "learning_rate": 8.579194576832113e-06, "loss": 0.0012, "step": 97720 }, { "epoch": 0.6429478365558574, "grad_norm": 0.019708439348483976, "learning_rate": 8.578793671378484e-06, "loss": 0.0028, "step": 97730 }, { "epoch": 0.6430136247310908, "grad_norm": 0.013496105950071953, "learning_rate": 8.578392718741551e-06, "loss": 0.0018, "step": 97740 }, { "epoch": 0.6430794129063242, "grad_norm": 0.1787881677629108, "learning_rate": 8.5779917189266e-06, "loss": 0.0032, "step": 97750 }, { "epoch": 0.6431452010815576, "grad_norm": 0.05663327131357061, "learning_rate": 8.57759067193892e-06, "loss": 0.0011, "step": 97760 }, { "epoch": 0.643210989256791, "grad_norm": 0.11608607034119707, "learning_rate": 8.577189577783796e-06, "loss": 0.0015, "step": 97770 }, { "epoch": 0.6432767774320244, "grad_norm": 0.0405198427890466, "learning_rate": 8.576788436466516e-06, "loss": 0.0012, "step": 97780 }, { "epoch": 0.6433425656072578, "grad_norm": 0.0888090894305233, "learning_rate": 8.57638724799237e-06, "loss": 0.0017, "step": 97790 }, { "epoch": 0.6434083537824912, "grad_norm": 0.0199538464609325, "learning_rate": 8.575986012366648e-06, "loss": 0.0024, "step": 97800 }, { "epoch": 0.6434741419577246, "grad_norm": 0.04469531029099032, "learning_rate": 8.575584729594637e-06, "loss": 0.0035, "step": 97810 }, { "epoch": 0.6435399301329578, "grad_norm": 0.04565449145869079, "learning_rate": 8.575183399681631e-06, "loss": 0.0034, "step": 97820 }, { "epoch": 0.6436057183081912, "grad_norm": 0.057006828332942024, "learning_rate": 8.574782022632919e-06, "loss": 0.0013, "step": 97830 }, { "epoch": 0.6436715064834246, "grad_norm": 0.09256225208140485, "learning_rate": 8.574380598453793e-06, "loss": 0.0013, "step": 97840 }, { "epoch": 0.643737294658658, "grad_norm": 0.035534239773040675, "learning_rate": 8.573979127149548e-06, "loss": 0.0043, "step": 97850 }, { "epoch": 0.6438030828338914, "grad_norm": 0.07678820080148203, "learning_rate": 8.573577608725471e-06, "loss": 0.0023, "step": 97860 }, { "epoch": 0.6438688710091248, "grad_norm": 0.03773683962776949, "learning_rate": 8.573176043186863e-06, "loss": 0.0009, "step": 97870 }, { "epoch": 0.6439346591843582, "grad_norm": 0.07678674044100647, "learning_rate": 8.572774430539013e-06, "loss": 0.0018, "step": 97880 }, { "epoch": 0.6440004473595916, "grad_norm": 0.031046092669130266, "learning_rate": 8.57237277078722e-06, "loss": 0.0012, "step": 97890 }, { "epoch": 0.644066235534825, "grad_norm": 0.0237576452532853, "learning_rate": 8.571971063936775e-06, "loss": 0.0018, "step": 97900 }, { "epoch": 0.6441320237100584, "grad_norm": 0.03731225938682225, "learning_rate": 8.571569309992977e-06, "loss": 0.0009, "step": 97910 }, { "epoch": 0.6441978118852917, "grad_norm": 0.018711105317008366, "learning_rate": 8.571167508961122e-06, "loss": 0.0006, "step": 97920 }, { "epoch": 0.6442636000605251, "grad_norm": 0.29692559087808257, "learning_rate": 8.570765660846508e-06, "loss": 0.0017, "step": 97930 }, { "epoch": 0.6443293882357585, "grad_norm": 0.08066885038466304, "learning_rate": 8.570363765654434e-06, "loss": 0.0028, "step": 97940 }, { "epoch": 0.6443951764109919, "grad_norm": 0.030091764228633355, "learning_rate": 8.569961823390195e-06, "loss": 0.0034, "step": 97950 }, { "epoch": 0.6444609645862253, "grad_norm": 0.012776785115404723, "learning_rate": 8.569559834059094e-06, "loss": 0.0009, "step": 97960 }, { "epoch": 0.6445267527614587, "grad_norm": 0.025138067581286158, "learning_rate": 8.56915779766643e-06, "loss": 0.0008, "step": 97970 }, { "epoch": 0.644592540936692, "grad_norm": 0.04385518011996617, "learning_rate": 8.568755714217502e-06, "loss": 0.002, "step": 97980 }, { "epoch": 0.6446583291119254, "grad_norm": 0.021413922445259, "learning_rate": 8.568353583717613e-06, "loss": 0.0014, "step": 97990 }, { "epoch": 0.6447241172871588, "grad_norm": 0.01994370357383862, "learning_rate": 8.567951406172062e-06, "loss": 0.0014, "step": 98000 }, { "epoch": 0.6447899054623922, "grad_norm": 0.024217225601400744, "learning_rate": 8.567549181586154e-06, "loss": 0.0006, "step": 98010 }, { "epoch": 0.6448556936376256, "grad_norm": 0.003053463301578152, "learning_rate": 8.567146909965192e-06, "loss": 0.0016, "step": 98020 }, { "epoch": 0.6449214818128589, "grad_norm": 0.02277872670896475, "learning_rate": 8.566744591314477e-06, "loss": 0.0015, "step": 98030 }, { "epoch": 0.6449872699880923, "grad_norm": 0.18257371712028705, "learning_rate": 8.566342225639317e-06, "loss": 0.002, "step": 98040 }, { "epoch": 0.6450530581633257, "grad_norm": 0.046155220801466924, "learning_rate": 8.565939812945015e-06, "loss": 0.0019, "step": 98050 }, { "epoch": 0.6451188463385591, "grad_norm": 0.05741602556980444, "learning_rate": 8.565537353236875e-06, "loss": 0.001, "step": 98060 }, { "epoch": 0.6451846345137925, "grad_norm": 0.1544684576883545, "learning_rate": 8.565134846520204e-06, "loss": 0.0014, "step": 98070 }, { "epoch": 0.6452504226890259, "grad_norm": 0.047759237033093635, "learning_rate": 8.56473229280031e-06, "loss": 0.0012, "step": 98080 }, { "epoch": 0.6453162108642593, "grad_norm": 0.0017099214012235681, "learning_rate": 8.564329692082501e-06, "loss": 0.0006, "step": 98090 }, { "epoch": 0.6453819990394927, "grad_norm": 0.021146677640683023, "learning_rate": 8.56392704437208e-06, "loss": 0.0013, "step": 98100 }, { "epoch": 0.6454477872147261, "grad_norm": 0.04433321232202762, "learning_rate": 8.563524349674361e-06, "loss": 0.0011, "step": 98110 }, { "epoch": 0.6455135753899595, "grad_norm": 0.1637909007844512, "learning_rate": 8.56312160799465e-06, "loss": 0.0048, "step": 98120 }, { "epoch": 0.6455793635651927, "grad_norm": 0.11496330786214544, "learning_rate": 8.56271881933826e-06, "loss": 0.0017, "step": 98130 }, { "epoch": 0.6456451517404261, "grad_norm": 0.008549762984031808, "learning_rate": 8.562315983710497e-06, "loss": 0.0019, "step": 98140 }, { "epoch": 0.6457109399156595, "grad_norm": 0.01093542484735846, "learning_rate": 8.561913101116676e-06, "loss": 0.0017, "step": 98150 }, { "epoch": 0.6457767280908929, "grad_norm": 0.013890484434972749, "learning_rate": 8.561510171562107e-06, "loss": 0.0017, "step": 98160 }, { "epoch": 0.6458425162661263, "grad_norm": 0.1272736908606467, "learning_rate": 8.5611071950521e-06, "loss": 0.0016, "step": 98170 }, { "epoch": 0.6459083044413597, "grad_norm": 0.1688037381099171, "learning_rate": 8.560704171591974e-06, "loss": 0.0022, "step": 98180 }, { "epoch": 0.6459740926165931, "grad_norm": 0.18506667857712597, "learning_rate": 8.560301101187035e-06, "loss": 0.0009, "step": 98190 }, { "epoch": 0.6460398807918265, "grad_norm": 0.05390409397013649, "learning_rate": 8.559897983842603e-06, "loss": 0.0011, "step": 98200 }, { "epoch": 0.6461056689670599, "grad_norm": 0.08704318705243227, "learning_rate": 8.559494819563991e-06, "loss": 0.0027, "step": 98210 }, { "epoch": 0.6461714571422933, "grad_norm": 0.010768570030722896, "learning_rate": 8.559091608356511e-06, "loss": 0.0008, "step": 98220 }, { "epoch": 0.6462372453175266, "grad_norm": 0.09509709845347385, "learning_rate": 8.558688350225486e-06, "loss": 0.0016, "step": 98230 }, { "epoch": 0.64630303349276, "grad_norm": 0.056428293879887396, "learning_rate": 8.558285045176227e-06, "loss": 0.0009, "step": 98240 }, { "epoch": 0.6463688216679934, "grad_norm": 0.016884710762628104, "learning_rate": 8.557881693214052e-06, "loss": 0.0023, "step": 98250 }, { "epoch": 0.6464346098432268, "grad_norm": 0.04663197936941848, "learning_rate": 8.557478294344278e-06, "loss": 0.0011, "step": 98260 }, { "epoch": 0.6465003980184602, "grad_norm": 0.11283390586208678, "learning_rate": 8.557074848572228e-06, "loss": 0.0018, "step": 98270 }, { "epoch": 0.6465661861936935, "grad_norm": 0.00032190835602304376, "learning_rate": 8.556671355903218e-06, "loss": 0.0013, "step": 98280 }, { "epoch": 0.6466319743689269, "grad_norm": 0.03141747483026364, "learning_rate": 8.556267816342568e-06, "loss": 0.0014, "step": 98290 }, { "epoch": 0.6466977625441603, "grad_norm": 0.07745979382979377, "learning_rate": 8.555864229895598e-06, "loss": 0.0016, "step": 98300 }, { "epoch": 0.6467635507193937, "grad_norm": 0.03169740372867711, "learning_rate": 8.55546059656763e-06, "loss": 0.0021, "step": 98310 }, { "epoch": 0.6468293388946271, "grad_norm": 0.04818832226910234, "learning_rate": 8.555056916363982e-06, "loss": 0.0011, "step": 98320 }, { "epoch": 0.6468951270698604, "grad_norm": 0.024422506457771387, "learning_rate": 8.55465318928998e-06, "loss": 0.0014, "step": 98330 }, { "epoch": 0.6469609152450938, "grad_norm": 0.028829389689334756, "learning_rate": 8.554249415350946e-06, "loss": 0.0018, "step": 98340 }, { "epoch": 0.6470267034203272, "grad_norm": 0.055114744362557096, "learning_rate": 8.553845594552203e-06, "loss": 0.0016, "step": 98350 }, { "epoch": 0.6470924915955606, "grad_norm": 0.025924814807734405, "learning_rate": 8.553441726899076e-06, "loss": 0.0012, "step": 98360 }, { "epoch": 0.647158279770794, "grad_norm": 0.19731472102917968, "learning_rate": 8.553037812396888e-06, "loss": 0.002, "step": 98370 }, { "epoch": 0.6472240679460274, "grad_norm": 0.05138380639531345, "learning_rate": 8.552633851050964e-06, "loss": 0.0017, "step": 98380 }, { "epoch": 0.6472898561212608, "grad_norm": 0.12967825187494847, "learning_rate": 8.552229842866633e-06, "loss": 0.0017, "step": 98390 }, { "epoch": 0.6473556442964942, "grad_norm": 0.040829835680461515, "learning_rate": 8.551825787849219e-06, "loss": 0.0009, "step": 98400 }, { "epoch": 0.6474214324717276, "grad_norm": 0.003308506584388016, "learning_rate": 8.551421686004049e-06, "loss": 0.0013, "step": 98410 }, { "epoch": 0.647487220646961, "grad_norm": 0.0497378307676423, "learning_rate": 8.551017537336451e-06, "loss": 0.0006, "step": 98420 }, { "epoch": 0.6475530088221944, "grad_norm": 0.06890572373024631, "learning_rate": 8.550613341851755e-06, "loss": 0.0012, "step": 98430 }, { "epoch": 0.6476187969974276, "grad_norm": 0.014631003633943745, "learning_rate": 8.550209099555287e-06, "loss": 0.0008, "step": 98440 }, { "epoch": 0.647684585172661, "grad_norm": 0.052351367827220226, "learning_rate": 8.549804810452378e-06, "loss": 0.003, "step": 98450 }, { "epoch": 0.6477503733478944, "grad_norm": 0.45864560319045133, "learning_rate": 8.549400474548359e-06, "loss": 0.0028, "step": 98460 }, { "epoch": 0.6478161615231278, "grad_norm": 0.061512641660810406, "learning_rate": 8.548996091848561e-06, "loss": 0.0007, "step": 98470 }, { "epoch": 0.6478819496983612, "grad_norm": 0.06855445029572035, "learning_rate": 8.548591662358315e-06, "loss": 0.0006, "step": 98480 }, { "epoch": 0.6479477378735946, "grad_norm": 0.06570346634704397, "learning_rate": 8.54818718608295e-06, "loss": 0.0016, "step": 98490 }, { "epoch": 0.648013526048828, "grad_norm": 0.04156991615196163, "learning_rate": 8.547782663027803e-06, "loss": 0.002, "step": 98500 }, { "epoch": 0.6480793142240614, "grad_norm": 0.15491623458986115, "learning_rate": 8.547378093198207e-06, "loss": 0.0014, "step": 98510 }, { "epoch": 0.6481451023992948, "grad_norm": 0.03887225509706216, "learning_rate": 8.546973476599494e-06, "loss": 0.0023, "step": 98520 }, { "epoch": 0.6482108905745282, "grad_norm": 0.019190959258213996, "learning_rate": 8.546568813236998e-06, "loss": 0.0012, "step": 98530 }, { "epoch": 0.6482766787497615, "grad_norm": 0.2535817915924524, "learning_rate": 8.546164103116054e-06, "loss": 0.0012, "step": 98540 }, { "epoch": 0.6483424669249949, "grad_norm": 0.03706769621514846, "learning_rate": 8.545759346242002e-06, "loss": 0.0014, "step": 98550 }, { "epoch": 0.6484082551002283, "grad_norm": 0.05970029292089877, "learning_rate": 8.545354542620174e-06, "loss": 0.0012, "step": 98560 }, { "epoch": 0.6484740432754617, "grad_norm": 0.033109630562265284, "learning_rate": 8.544949692255907e-06, "loss": 0.0014, "step": 98570 }, { "epoch": 0.648539831450695, "grad_norm": 0.0907011150463179, "learning_rate": 8.544544795154542e-06, "loss": 0.002, "step": 98580 }, { "epoch": 0.6486056196259284, "grad_norm": 0.018844870959910342, "learning_rate": 8.544139851321414e-06, "loss": 0.001, "step": 98590 }, { "epoch": 0.6486714078011618, "grad_norm": 0.033233336028201776, "learning_rate": 8.543734860761863e-06, "loss": 0.0014, "step": 98600 }, { "epoch": 0.6487371959763952, "grad_norm": 0.18121765999200964, "learning_rate": 8.54332982348123e-06, "loss": 0.0016, "step": 98610 }, { "epoch": 0.6488029841516286, "grad_norm": 0.08220479646113119, "learning_rate": 8.542924739484852e-06, "loss": 0.0013, "step": 98620 }, { "epoch": 0.648868772326862, "grad_norm": 0.21783283819565538, "learning_rate": 8.54251960877807e-06, "loss": 0.0009, "step": 98630 }, { "epoch": 0.6489345605020953, "grad_norm": 0.10850484433492527, "learning_rate": 8.542114431366229e-06, "loss": 0.0006, "step": 98640 }, { "epoch": 0.6490003486773287, "grad_norm": 0.05581960101135131, "learning_rate": 8.541709207254667e-06, "loss": 0.001, "step": 98650 }, { "epoch": 0.6490661368525621, "grad_norm": 0.042183514814155824, "learning_rate": 8.541303936448729e-06, "loss": 0.0052, "step": 98660 }, { "epoch": 0.6491319250277955, "grad_norm": 0.07108411074163515, "learning_rate": 8.540898618953755e-06, "loss": 0.0012, "step": 98670 }, { "epoch": 0.6491977132030289, "grad_norm": 0.1294227803542725, "learning_rate": 8.540493254775093e-06, "loss": 0.0017, "step": 98680 }, { "epoch": 0.6492635013782623, "grad_norm": 0.07402578692051867, "learning_rate": 8.540087843918083e-06, "loss": 0.0017, "step": 98690 }, { "epoch": 0.6493292895534957, "grad_norm": 0.07507468316900018, "learning_rate": 8.539682386388073e-06, "loss": 0.001, "step": 98700 }, { "epoch": 0.6493950777287291, "grad_norm": 0.01803354847105784, "learning_rate": 8.539276882190409e-06, "loss": 0.0007, "step": 98710 }, { "epoch": 0.6494608659039625, "grad_norm": 0.0056369927683511765, "learning_rate": 8.538871331330437e-06, "loss": 0.0019, "step": 98720 }, { "epoch": 0.6495266540791959, "grad_norm": 0.055209093293125046, "learning_rate": 8.5384657338135e-06, "loss": 0.0024, "step": 98730 }, { "epoch": 0.6495924422544291, "grad_norm": 0.07296196052399401, "learning_rate": 8.538060089644949e-06, "loss": 0.0013, "step": 98740 }, { "epoch": 0.6496582304296625, "grad_norm": 0.01463589899338959, "learning_rate": 8.537654398830132e-06, "loss": 0.0012, "step": 98750 }, { "epoch": 0.6497240186048959, "grad_norm": 0.016149844870035096, "learning_rate": 8.537248661374397e-06, "loss": 0.0012, "step": 98760 }, { "epoch": 0.6497898067801293, "grad_norm": 0.05959840008705231, "learning_rate": 8.536842877283093e-06, "loss": 0.0071, "step": 98770 }, { "epoch": 0.6498555949553627, "grad_norm": 0.01342791807734102, "learning_rate": 8.536437046561572e-06, "loss": 0.0026, "step": 98780 }, { "epoch": 0.6499213831305961, "grad_norm": 0.13036666309983352, "learning_rate": 8.53603116921518e-06, "loss": 0.0011, "step": 98790 }, { "epoch": 0.6499871713058295, "grad_norm": 0.08889691765766357, "learning_rate": 8.535625245249273e-06, "loss": 0.0011, "step": 98800 }, { "epoch": 0.6500529594810629, "grad_norm": 0.030503508707515548, "learning_rate": 8.5352192746692e-06, "loss": 0.0008, "step": 98810 }, { "epoch": 0.6501187476562963, "grad_norm": 0.020651939001150924, "learning_rate": 8.534813257480315e-06, "loss": 0.001, "step": 98820 }, { "epoch": 0.6501845358315297, "grad_norm": 0.07166889209197162, "learning_rate": 8.534407193687968e-06, "loss": 0.0014, "step": 98830 }, { "epoch": 0.650250324006763, "grad_norm": 0.0674290606291296, "learning_rate": 8.534001083297516e-06, "loss": 0.0026, "step": 98840 }, { "epoch": 0.6503161121819964, "grad_norm": 0.0443686465933081, "learning_rate": 8.533594926314312e-06, "loss": 0.0011, "step": 98850 }, { "epoch": 0.6503819003572298, "grad_norm": 0.05940761096900089, "learning_rate": 8.53318872274371e-06, "loss": 0.0011, "step": 98860 }, { "epoch": 0.6504476885324632, "grad_norm": 0.04072292908490117, "learning_rate": 8.532782472591067e-06, "loss": 0.0005, "step": 98870 }, { "epoch": 0.6505134767076965, "grad_norm": 0.13187799617638876, "learning_rate": 8.532376175861739e-06, "loss": 0.0034, "step": 98880 }, { "epoch": 0.6505792648829299, "grad_norm": 0.0651045040087562, "learning_rate": 8.53196983256108e-06, "loss": 0.0012, "step": 98890 }, { "epoch": 0.6506450530581633, "grad_norm": 0.07972601939496153, "learning_rate": 8.53156344269445e-06, "loss": 0.0021, "step": 98900 }, { "epoch": 0.6507108412333967, "grad_norm": 0.012586181227290317, "learning_rate": 8.531157006267205e-06, "loss": 0.0015, "step": 98910 }, { "epoch": 0.6507766294086301, "grad_norm": 0.015946209865679708, "learning_rate": 8.530750523284705e-06, "loss": 0.0015, "step": 98920 }, { "epoch": 0.6508424175838635, "grad_norm": 0.05152102628592896, "learning_rate": 8.530343993752311e-06, "loss": 0.0014, "step": 98930 }, { "epoch": 0.6509082057590969, "grad_norm": 0.019417061011808845, "learning_rate": 8.529937417675377e-06, "loss": 0.0009, "step": 98940 }, { "epoch": 0.6509739939343302, "grad_norm": 0.057933990971360774, "learning_rate": 8.529530795059269e-06, "loss": 0.0009, "step": 98950 }, { "epoch": 0.6510397821095636, "grad_norm": 0.04053727692885296, "learning_rate": 8.529124125909345e-06, "loss": 0.0013, "step": 98960 }, { "epoch": 0.651105570284797, "grad_norm": 0.15743205378280992, "learning_rate": 8.528717410230969e-06, "loss": 0.0007, "step": 98970 }, { "epoch": 0.6511713584600304, "grad_norm": 0.028730746226396302, "learning_rate": 8.528310648029499e-06, "loss": 0.0012, "step": 98980 }, { "epoch": 0.6512371466352638, "grad_norm": 0.10747684911053894, "learning_rate": 8.5279038393103e-06, "loss": 0.0016, "step": 98990 }, { "epoch": 0.6513029348104972, "grad_norm": 0.12438300294382218, "learning_rate": 8.52749698407874e-06, "loss": 0.0011, "step": 99000 }, { "epoch": 0.6513687229857306, "grad_norm": 0.1444064693663938, "learning_rate": 8.527090082340176e-06, "loss": 0.0012, "step": 99010 }, { "epoch": 0.651434511160964, "grad_norm": 0.1670995693908285, "learning_rate": 8.526683134099975e-06, "loss": 0.0016, "step": 99020 }, { "epoch": 0.6515002993361974, "grad_norm": 0.013280565296397574, "learning_rate": 8.526276139363502e-06, "loss": 0.0013, "step": 99030 }, { "epoch": 0.6515660875114307, "grad_norm": 0.07894181249083675, "learning_rate": 8.525869098136125e-06, "loss": 0.0029, "step": 99040 }, { "epoch": 0.651631875686664, "grad_norm": 0.02204754763359685, "learning_rate": 8.525462010423209e-06, "loss": 0.0015, "step": 99050 }, { "epoch": 0.6516976638618974, "grad_norm": 0.011256068060594596, "learning_rate": 8.52505487623012e-06, "loss": 0.0006, "step": 99060 }, { "epoch": 0.6517634520371308, "grad_norm": 0.03636358780304354, "learning_rate": 8.52464769556223e-06, "loss": 0.0013, "step": 99070 }, { "epoch": 0.6518292402123642, "grad_norm": 0.10255726654887533, "learning_rate": 8.524240468424902e-06, "loss": 0.0015, "step": 99080 }, { "epoch": 0.6518950283875976, "grad_norm": 0.016299061228433286, "learning_rate": 8.523833194823507e-06, "loss": 0.0012, "step": 99090 }, { "epoch": 0.651960816562831, "grad_norm": 0.006194513132208358, "learning_rate": 8.523425874763416e-06, "loss": 0.0017, "step": 99100 }, { "epoch": 0.6520266047380644, "grad_norm": 0.03796268279102582, "learning_rate": 8.523018508249996e-06, "loss": 0.0024, "step": 99110 }, { "epoch": 0.6520923929132978, "grad_norm": 0.06640487524456327, "learning_rate": 8.52261109528862e-06, "loss": 0.0017, "step": 99120 }, { "epoch": 0.6521581810885312, "grad_norm": 0.07208207279957814, "learning_rate": 8.52220363588466e-06, "loss": 0.0035, "step": 99130 }, { "epoch": 0.6522239692637646, "grad_norm": 0.028039160946545245, "learning_rate": 8.521796130043487e-06, "loss": 0.0018, "step": 99140 }, { "epoch": 0.6522897574389979, "grad_norm": 0.025717580579604243, "learning_rate": 8.521388577770474e-06, "loss": 0.0012, "step": 99150 }, { "epoch": 0.6523555456142313, "grad_norm": 0.0424275286741134, "learning_rate": 8.520980979070991e-06, "loss": 0.001, "step": 99160 }, { "epoch": 0.6524213337894647, "grad_norm": 0.07098985069297149, "learning_rate": 8.52057333395042e-06, "loss": 0.0013, "step": 99170 }, { "epoch": 0.652487121964698, "grad_norm": 0.09648497303573605, "learning_rate": 8.520165642414125e-06, "loss": 0.0013, "step": 99180 }, { "epoch": 0.6525529101399314, "grad_norm": 0.02826339317441945, "learning_rate": 8.51975790446749e-06, "loss": 0.0008, "step": 99190 }, { "epoch": 0.6526186983151648, "grad_norm": 0.22042280439779324, "learning_rate": 8.519350120115884e-06, "loss": 0.0018, "step": 99200 }, { "epoch": 0.6526844864903982, "grad_norm": 0.018123024724681067, "learning_rate": 8.518942289364689e-06, "loss": 0.0005, "step": 99210 }, { "epoch": 0.6527502746656316, "grad_norm": 0.01987398337760507, "learning_rate": 8.518534412219276e-06, "loss": 0.0013, "step": 99220 }, { "epoch": 0.652816062840865, "grad_norm": 0.06621466511239357, "learning_rate": 8.518126488685027e-06, "loss": 0.0105, "step": 99230 }, { "epoch": 0.6528818510160984, "grad_norm": 0.02067860044531272, "learning_rate": 8.51771851876732e-06, "loss": 0.0012, "step": 99240 }, { "epoch": 0.6529476391913317, "grad_norm": 0.05818106849053515, "learning_rate": 8.51731050247153e-06, "loss": 0.002, "step": 99250 }, { "epoch": 0.6530134273665651, "grad_norm": 0.040211075420887296, "learning_rate": 8.51690243980304e-06, "loss": 0.0024, "step": 99260 }, { "epoch": 0.6530792155417985, "grad_norm": 0.2941283519661503, "learning_rate": 8.516494330767229e-06, "loss": 0.0036, "step": 99270 }, { "epoch": 0.6531450037170319, "grad_norm": 0.10082008951972339, "learning_rate": 8.516086175369476e-06, "loss": 0.0012, "step": 99280 }, { "epoch": 0.6532107918922653, "grad_norm": 0.004558522139736486, "learning_rate": 8.515677973615165e-06, "loss": 0.0014, "step": 99290 }, { "epoch": 0.6532765800674987, "grad_norm": 0.13601236803140318, "learning_rate": 8.515269725509676e-06, "loss": 0.0021, "step": 99300 }, { "epoch": 0.6533423682427321, "grad_norm": 0.03742309433818166, "learning_rate": 8.51486143105839e-06, "loss": 0.0017, "step": 99310 }, { "epoch": 0.6534081564179655, "grad_norm": 0.04114771639921986, "learning_rate": 8.514453090266693e-06, "loss": 0.0011, "step": 99320 }, { "epoch": 0.6534739445931989, "grad_norm": 0.050832278706106765, "learning_rate": 8.514044703139968e-06, "loss": 0.0015, "step": 99330 }, { "epoch": 0.6535397327684322, "grad_norm": 0.07170794256143737, "learning_rate": 8.513636269683599e-06, "loss": 0.0023, "step": 99340 }, { "epoch": 0.6536055209436656, "grad_norm": 0.08105006331490201, "learning_rate": 8.513227789902968e-06, "loss": 0.0009, "step": 99350 }, { "epoch": 0.6536713091188989, "grad_norm": 0.018072636090915365, "learning_rate": 8.512819263803464e-06, "loss": 0.0011, "step": 99360 }, { "epoch": 0.6537370972941323, "grad_norm": 0.005057819370740073, "learning_rate": 8.512410691390471e-06, "loss": 0.0011, "step": 99370 }, { "epoch": 0.6538028854693657, "grad_norm": 0.006441988706901528, "learning_rate": 8.512002072669377e-06, "loss": 0.0013, "step": 99380 }, { "epoch": 0.6538686736445991, "grad_norm": 0.034639655818905865, "learning_rate": 8.511593407645569e-06, "loss": 0.0016, "step": 99390 }, { "epoch": 0.6539344618198325, "grad_norm": 0.009595987166489253, "learning_rate": 8.511184696324436e-06, "loss": 0.0023, "step": 99400 }, { "epoch": 0.6540002499950659, "grad_norm": 0.10717508798026573, "learning_rate": 8.510775938711365e-06, "loss": 0.0011, "step": 99410 }, { "epoch": 0.6540660381702993, "grad_norm": 0.11640506593224613, "learning_rate": 8.510367134811745e-06, "loss": 0.002, "step": 99420 }, { "epoch": 0.6541318263455327, "grad_norm": 0.06134631969857098, "learning_rate": 8.509958284630965e-06, "loss": 0.0017, "step": 99430 }, { "epoch": 0.6541976145207661, "grad_norm": 0.021634349954673594, "learning_rate": 8.509549388174416e-06, "loss": 0.0015, "step": 99440 }, { "epoch": 0.6542634026959995, "grad_norm": 0.03720079300450216, "learning_rate": 8.50914044544749e-06, "loss": 0.0042, "step": 99450 }, { "epoch": 0.6543291908712328, "grad_norm": 0.0468415905346575, "learning_rate": 8.50873145645558e-06, "loss": 0.0009, "step": 99460 }, { "epoch": 0.6543949790464662, "grad_norm": 0.03128819801393831, "learning_rate": 8.508322421204073e-06, "loss": 0.0014, "step": 99470 }, { "epoch": 0.6544607672216995, "grad_norm": 0.13674515176154525, "learning_rate": 8.507913339698366e-06, "loss": 0.003, "step": 99480 }, { "epoch": 0.6545265553969329, "grad_norm": 0.027051084535958465, "learning_rate": 8.507504211943853e-06, "loss": 0.0036, "step": 99490 }, { "epoch": 0.6545923435721663, "grad_norm": 0.08633811558671103, "learning_rate": 8.507095037945926e-06, "loss": 0.0018, "step": 99500 }, { "epoch": 0.6546581317473997, "grad_norm": 0.024028015441362714, "learning_rate": 8.506685817709976e-06, "loss": 0.0017, "step": 99510 }, { "epoch": 0.6547239199226331, "grad_norm": 0.07204480161071965, "learning_rate": 8.506276551241406e-06, "loss": 0.0014, "step": 99520 }, { "epoch": 0.6547897080978665, "grad_norm": 0.027685521161724163, "learning_rate": 8.505867238545607e-06, "loss": 0.0018, "step": 99530 }, { "epoch": 0.6548554962730999, "grad_norm": 0.055491355945562475, "learning_rate": 8.505457879627974e-06, "loss": 0.0019, "step": 99540 }, { "epoch": 0.6549212844483333, "grad_norm": 0.02120229000937546, "learning_rate": 8.505048474493908e-06, "loss": 0.0021, "step": 99550 }, { "epoch": 0.6549870726235666, "grad_norm": 0.03888288581808831, "learning_rate": 8.504639023148806e-06, "loss": 0.0013, "step": 99560 }, { "epoch": 0.6550528607988, "grad_norm": 0.3745621835772519, "learning_rate": 8.504229525598064e-06, "loss": 0.0016, "step": 99570 }, { "epoch": 0.6551186489740334, "grad_norm": 0.029344799602003237, "learning_rate": 8.503819981847084e-06, "loss": 0.0013, "step": 99580 }, { "epoch": 0.6551844371492668, "grad_norm": 0.24719689969375222, "learning_rate": 8.503410391901262e-06, "loss": 0.0019, "step": 99590 }, { "epoch": 0.6552502253245002, "grad_norm": 0.09503874397857885, "learning_rate": 8.503000755766e-06, "loss": 0.0011, "step": 99600 }, { "epoch": 0.6553160134997336, "grad_norm": 0.06687660382359849, "learning_rate": 8.502591073446699e-06, "loss": 0.0015, "step": 99610 }, { "epoch": 0.655381801674967, "grad_norm": 0.09154727272668946, "learning_rate": 8.50218134494876e-06, "loss": 0.0016, "step": 99620 }, { "epoch": 0.6554475898502004, "grad_norm": 0.032533865109536236, "learning_rate": 8.501771570277583e-06, "loss": 0.0013, "step": 99630 }, { "epoch": 0.6555133780254337, "grad_norm": 0.06856487186260483, "learning_rate": 8.501361749438573e-06, "loss": 0.0016, "step": 99640 }, { "epoch": 0.6555791662006671, "grad_norm": 0.07242531649254791, "learning_rate": 8.500951882437132e-06, "loss": 0.0017, "step": 99650 }, { "epoch": 0.6556449543759004, "grad_norm": 0.07953581143263978, "learning_rate": 8.500541969278663e-06, "loss": 0.0021, "step": 99660 }, { "epoch": 0.6557107425511338, "grad_norm": 0.06618080654575104, "learning_rate": 8.500132009968574e-06, "loss": 0.001, "step": 99670 }, { "epoch": 0.6557765307263672, "grad_norm": 0.10186664546622226, "learning_rate": 8.499722004512266e-06, "loss": 0.0014, "step": 99680 }, { "epoch": 0.6558423189016006, "grad_norm": 0.04807330006989541, "learning_rate": 8.499311952915146e-06, "loss": 0.0019, "step": 99690 }, { "epoch": 0.655908107076834, "grad_norm": 0.018840792544359296, "learning_rate": 8.49890185518262e-06, "loss": 0.0011, "step": 99700 }, { "epoch": 0.6559738952520674, "grad_norm": 0.03554448685520829, "learning_rate": 8.498491711320096e-06, "loss": 0.002, "step": 99710 }, { "epoch": 0.6560396834273008, "grad_norm": 0.026746908441060927, "learning_rate": 8.498081521332979e-06, "loss": 0.0013, "step": 99720 }, { "epoch": 0.6561054716025342, "grad_norm": 0.05267065099694868, "learning_rate": 8.497671285226679e-06, "loss": 0.0013, "step": 99730 }, { "epoch": 0.6561712597777676, "grad_norm": 0.01910386358759411, "learning_rate": 8.497261003006604e-06, "loss": 0.0021, "step": 99740 }, { "epoch": 0.656237047953001, "grad_norm": 0.025975290334539828, "learning_rate": 8.496850674678161e-06, "loss": 0.0017, "step": 99750 }, { "epoch": 0.6563028361282343, "grad_norm": 0.05671664609929805, "learning_rate": 8.496440300246764e-06, "loss": 0.0012, "step": 99760 }, { "epoch": 0.6563686243034677, "grad_norm": 0.04251139640456356, "learning_rate": 8.496029879717822e-06, "loss": 0.0015, "step": 99770 }, { "epoch": 0.656434412478701, "grad_norm": 0.05497832376860341, "learning_rate": 8.495619413096744e-06, "loss": 0.0005, "step": 99780 }, { "epoch": 0.6565002006539344, "grad_norm": 0.049576967240513845, "learning_rate": 8.495208900388943e-06, "loss": 0.0011, "step": 99790 }, { "epoch": 0.6565659888291678, "grad_norm": 0.004628899998058905, "learning_rate": 8.494798341599833e-06, "loss": 0.0015, "step": 99800 }, { "epoch": 0.6566317770044012, "grad_norm": 0.0037893482398256425, "learning_rate": 8.494387736734823e-06, "loss": 0.001, "step": 99810 }, { "epoch": 0.6566975651796346, "grad_norm": 0.024900879555268284, "learning_rate": 8.493977085799329e-06, "loss": 0.0013, "step": 99820 }, { "epoch": 0.656763353354868, "grad_norm": 0.06867515245695782, "learning_rate": 8.493566388798765e-06, "loss": 0.002, "step": 99830 }, { "epoch": 0.6568291415301014, "grad_norm": 0.06443502739630613, "learning_rate": 8.493155645738546e-06, "loss": 0.0013, "step": 99840 }, { "epoch": 0.6568949297053348, "grad_norm": 0.1268923583589755, "learning_rate": 8.492744856624087e-06, "loss": 0.0017, "step": 99850 }, { "epoch": 0.6569607178805682, "grad_norm": 0.13411013738784236, "learning_rate": 8.492334021460802e-06, "loss": 0.0016, "step": 99860 }, { "epoch": 0.6570265060558015, "grad_norm": 0.08020681300922008, "learning_rate": 8.49192314025411e-06, "loss": 0.0021, "step": 99870 }, { "epoch": 0.6570922942310349, "grad_norm": 0.03922800293047183, "learning_rate": 8.491512213009428e-06, "loss": 0.0015, "step": 99880 }, { "epoch": 0.6571580824062683, "grad_norm": 0.02082361806556158, "learning_rate": 8.491101239732171e-06, "loss": 0.0016, "step": 99890 }, { "epoch": 0.6572238705815017, "grad_norm": 0.026166899243047194, "learning_rate": 8.490690220427762e-06, "loss": 0.002, "step": 99900 }, { "epoch": 0.6572896587567351, "grad_norm": 0.17951425050105263, "learning_rate": 8.490279155101616e-06, "loss": 0.0015, "step": 99910 }, { "epoch": 0.6573554469319685, "grad_norm": 0.010359900775133562, "learning_rate": 8.489868043759152e-06, "loss": 0.0012, "step": 99920 }, { "epoch": 0.6574212351072019, "grad_norm": 0.0585174593364267, "learning_rate": 8.489456886405793e-06, "loss": 0.0016, "step": 99930 }, { "epoch": 0.6574870232824352, "grad_norm": 0.20471980692671338, "learning_rate": 8.48904568304696e-06, "loss": 0.0021, "step": 99940 }, { "epoch": 0.6575528114576686, "grad_norm": 0.04946662942050281, "learning_rate": 8.48863443368807e-06, "loss": 0.0011, "step": 99950 }, { "epoch": 0.657618599632902, "grad_norm": 0.02261995306229859, "learning_rate": 8.488223138334552e-06, "loss": 0.001, "step": 99960 }, { "epoch": 0.6576843878081353, "grad_norm": 0.019100967188313573, "learning_rate": 8.487811796991823e-06, "loss": 0.0015, "step": 99970 }, { "epoch": 0.6577501759833687, "grad_norm": 0.0536172340503797, "learning_rate": 8.487400409665306e-06, "loss": 0.0013, "step": 99980 }, { "epoch": 0.6578159641586021, "grad_norm": 0.04721073421839362, "learning_rate": 8.486988976360429e-06, "loss": 0.0008, "step": 99990 }, { "epoch": 0.6578817523338355, "grad_norm": 0.0032673483196855964, "learning_rate": 8.486577497082611e-06, "loss": 0.0002, "step": 100000 }, { "epoch": 0.6578817523338355, "eval_loss": 0.001114123617298901, "eval_runtime": 13.0874, "eval_samples_per_second": 15.282, "eval_steps_per_second": 7.641, "step": 100000 }, { "epoch": 0.6579475405090689, "grad_norm": 0.04557131831678843, "learning_rate": 8.486165971837282e-06, "loss": 0.0012, "step": 100010 }, { "epoch": 0.6580133286843023, "grad_norm": 0.023124155784441724, "learning_rate": 8.485754400629864e-06, "loss": 0.0022, "step": 100020 }, { "epoch": 0.6580791168595357, "grad_norm": 0.03799473202362458, "learning_rate": 8.485342783465786e-06, "loss": 0.0013, "step": 100030 }, { "epoch": 0.6581449050347691, "grad_norm": 0.13379672354831967, "learning_rate": 8.484931120350475e-06, "loss": 0.0014, "step": 100040 }, { "epoch": 0.6582106932100025, "grad_norm": 0.0873687690382406, "learning_rate": 8.484519411289353e-06, "loss": 0.0014, "step": 100050 }, { "epoch": 0.6582764813852359, "grad_norm": 0.034262870078708284, "learning_rate": 8.484107656287856e-06, "loss": 0.0015, "step": 100060 }, { "epoch": 0.6583422695604692, "grad_norm": 0.012155142193083477, "learning_rate": 8.483695855351407e-06, "loss": 0.0012, "step": 100070 }, { "epoch": 0.6584080577357025, "grad_norm": 0.10521423133600723, "learning_rate": 8.483284008485436e-06, "loss": 0.001, "step": 100080 }, { "epoch": 0.6584738459109359, "grad_norm": 0.04156774599928556, "learning_rate": 8.482872115695375e-06, "loss": 0.0024, "step": 100090 }, { "epoch": 0.6585396340861693, "grad_norm": 0.0622213756836367, "learning_rate": 8.482460176986653e-06, "loss": 0.0014, "step": 100100 }, { "epoch": 0.6586054222614027, "grad_norm": 0.028254275225551982, "learning_rate": 8.4820481923647e-06, "loss": 0.001, "step": 100110 }, { "epoch": 0.6586712104366361, "grad_norm": 0.09128711610203016, "learning_rate": 8.48163616183495e-06, "loss": 0.0015, "step": 100120 }, { "epoch": 0.6587369986118695, "grad_norm": 0.03306459771355937, "learning_rate": 8.481224085402834e-06, "loss": 0.0013, "step": 100130 }, { "epoch": 0.6588027867871029, "grad_norm": 0.028734145263835397, "learning_rate": 8.480811963073785e-06, "loss": 0.0008, "step": 100140 }, { "epoch": 0.6588685749623363, "grad_norm": 0.0475226227832197, "learning_rate": 8.480399794853236e-06, "loss": 0.0041, "step": 100150 }, { "epoch": 0.6589343631375697, "grad_norm": 0.008497535700878954, "learning_rate": 8.479987580746622e-06, "loss": 0.001, "step": 100160 }, { "epoch": 0.659000151312803, "grad_norm": 0.007012715348035577, "learning_rate": 8.479575320759377e-06, "loss": 0.0018, "step": 100170 }, { "epoch": 0.6590659394880364, "grad_norm": 0.044757575050601676, "learning_rate": 8.479163014896937e-06, "loss": 0.0013, "step": 100180 }, { "epoch": 0.6591317276632698, "grad_norm": 0.005619212766575057, "learning_rate": 8.478750663164736e-06, "loss": 0.0015, "step": 100190 }, { "epoch": 0.6591975158385032, "grad_norm": 0.06895680496448962, "learning_rate": 8.478338265568213e-06, "loss": 0.0022, "step": 100200 }, { "epoch": 0.6592633040137366, "grad_norm": 0.0116929304947474, "learning_rate": 8.477925822112805e-06, "loss": 0.0013, "step": 100210 }, { "epoch": 0.65932909218897, "grad_norm": 0.010375613486916815, "learning_rate": 8.477513332803948e-06, "loss": 0.0004, "step": 100220 }, { "epoch": 0.6593948803642034, "grad_norm": 0.038192383983286456, "learning_rate": 8.477100797647081e-06, "loss": 0.0016, "step": 100230 }, { "epoch": 0.6594606685394367, "grad_norm": 0.000210614836991977, "learning_rate": 8.476688216647643e-06, "loss": 0.0012, "step": 100240 }, { "epoch": 0.6595264567146701, "grad_norm": 0.008202489164567537, "learning_rate": 8.476275589811075e-06, "loss": 0.0018, "step": 100250 }, { "epoch": 0.6595922448899035, "grad_norm": 0.08819847692028787, "learning_rate": 8.475862917142814e-06, "loss": 0.0016, "step": 100260 }, { "epoch": 0.6596580330651368, "grad_norm": 0.22278735640065936, "learning_rate": 8.475450198648302e-06, "loss": 0.0018, "step": 100270 }, { "epoch": 0.6597238212403702, "grad_norm": 0.05772147815585324, "learning_rate": 8.475037434332982e-06, "loss": 0.0009, "step": 100280 }, { "epoch": 0.6597896094156036, "grad_norm": 0.01806841621660489, "learning_rate": 8.474624624202293e-06, "loss": 0.0012, "step": 100290 }, { "epoch": 0.659855397590837, "grad_norm": 0.06259973831460577, "learning_rate": 8.47421176826168e-06, "loss": 0.0013, "step": 100300 }, { "epoch": 0.6599211857660704, "grad_norm": 0.010518718023239993, "learning_rate": 8.473798866516587e-06, "loss": 0.001, "step": 100310 }, { "epoch": 0.6599869739413038, "grad_norm": 0.35525688243113923, "learning_rate": 8.473385918972455e-06, "loss": 0.0024, "step": 100320 }, { "epoch": 0.6600527621165372, "grad_norm": 0.08933252677458162, "learning_rate": 8.472972925634728e-06, "loss": 0.0021, "step": 100330 }, { "epoch": 0.6601185502917706, "grad_norm": 0.16805272737113752, "learning_rate": 8.472559886508854e-06, "loss": 0.0019, "step": 100340 }, { "epoch": 0.660184338467004, "grad_norm": 0.017377333232424564, "learning_rate": 8.472146801600276e-06, "loss": 0.0012, "step": 100350 }, { "epoch": 0.6602501266422374, "grad_norm": 0.09685170165631461, "learning_rate": 8.47173367091444e-06, "loss": 0.0013, "step": 100360 }, { "epoch": 0.6603159148174708, "grad_norm": 0.1172401554251712, "learning_rate": 8.471320494456799e-06, "loss": 0.0032, "step": 100370 }, { "epoch": 0.660381702992704, "grad_norm": 0.027556194452950274, "learning_rate": 8.47090727223279e-06, "loss": 0.0013, "step": 100380 }, { "epoch": 0.6604474911679374, "grad_norm": 0.08568340491866082, "learning_rate": 8.470494004247868e-06, "loss": 0.0008, "step": 100390 }, { "epoch": 0.6605132793431708, "grad_norm": 0.04730048464528924, "learning_rate": 8.470080690507482e-06, "loss": 0.001, "step": 100400 }, { "epoch": 0.6605790675184042, "grad_norm": 0.05923689241868239, "learning_rate": 8.469667331017077e-06, "loss": 0.0033, "step": 100410 }, { "epoch": 0.6606448556936376, "grad_norm": 0.044375770230744065, "learning_rate": 8.469253925782105e-06, "loss": 0.001, "step": 100420 }, { "epoch": 0.660710643868871, "grad_norm": 0.08590329724696973, "learning_rate": 8.468840474808015e-06, "loss": 0.0016, "step": 100430 }, { "epoch": 0.6607764320441044, "grad_norm": 0.0013822540371691487, "learning_rate": 8.468426978100262e-06, "loss": 0.0013, "step": 100440 }, { "epoch": 0.6608422202193378, "grad_norm": 0.09731502097347142, "learning_rate": 8.468013435664293e-06, "loss": 0.0012, "step": 100450 }, { "epoch": 0.6609080083945712, "grad_norm": 0.03190303312836081, "learning_rate": 8.467599847505563e-06, "loss": 0.0013, "step": 100460 }, { "epoch": 0.6609737965698046, "grad_norm": 0.05688060547276583, "learning_rate": 8.467186213629523e-06, "loss": 0.0021, "step": 100470 }, { "epoch": 0.6610395847450379, "grad_norm": 0.005312406411490304, "learning_rate": 8.466772534041627e-06, "loss": 0.0015, "step": 100480 }, { "epoch": 0.6611053729202713, "grad_norm": 0.02750554754150462, "learning_rate": 8.46635880874733e-06, "loss": 0.0032, "step": 100490 }, { "epoch": 0.6611711610955047, "grad_norm": 0.035688467059066725, "learning_rate": 8.465945037752085e-06, "loss": 0.0016, "step": 100500 }, { "epoch": 0.6612369492707381, "grad_norm": 0.051077204323162276, "learning_rate": 8.465531221061349e-06, "loss": 0.0013, "step": 100510 }, { "epoch": 0.6613027374459715, "grad_norm": 0.2084206582199078, "learning_rate": 8.465117358680577e-06, "loss": 0.0017, "step": 100520 }, { "epoch": 0.6613685256212049, "grad_norm": 0.0517178198427275, "learning_rate": 8.464703450615223e-06, "loss": 0.0013, "step": 100530 }, { "epoch": 0.6614343137964382, "grad_norm": 0.06407285288552876, "learning_rate": 8.46428949687075e-06, "loss": 0.0013, "step": 100540 }, { "epoch": 0.6615001019716716, "grad_norm": 0.07240491734596917, "learning_rate": 8.46387549745261e-06, "loss": 0.0021, "step": 100550 }, { "epoch": 0.661565890146905, "grad_norm": 0.050600897411171214, "learning_rate": 8.463461452366261e-06, "loss": 0.0013, "step": 100560 }, { "epoch": 0.6616316783221384, "grad_norm": 0.0353602784433751, "learning_rate": 8.463047361617166e-06, "loss": 0.0021, "step": 100570 }, { "epoch": 0.6616974664973717, "grad_norm": 0.014452048151984897, "learning_rate": 8.462633225210785e-06, "loss": 0.0012, "step": 100580 }, { "epoch": 0.6617632546726051, "grad_norm": 0.06228460804730047, "learning_rate": 8.462219043152572e-06, "loss": 0.0017, "step": 100590 }, { "epoch": 0.6618290428478385, "grad_norm": 0.11928936740763327, "learning_rate": 8.461804815447993e-06, "loss": 0.0013, "step": 100600 }, { "epoch": 0.6618948310230719, "grad_norm": 0.11797842006084436, "learning_rate": 8.461390542102505e-06, "loss": 0.0008, "step": 100610 }, { "epoch": 0.6619606191983053, "grad_norm": 0.03463044368656275, "learning_rate": 8.460976223121574e-06, "loss": 0.0006, "step": 100620 }, { "epoch": 0.6620264073735387, "grad_norm": 0.010269499998331166, "learning_rate": 8.46056185851066e-06, "loss": 0.0014, "step": 100630 }, { "epoch": 0.6620921955487721, "grad_norm": 0.04785328505304435, "learning_rate": 8.460147448275227e-06, "loss": 0.0015, "step": 100640 }, { "epoch": 0.6621579837240055, "grad_norm": 0.08202195310026626, "learning_rate": 8.459732992420739e-06, "loss": 0.002, "step": 100650 }, { "epoch": 0.6622237718992389, "grad_norm": 0.10645317250466012, "learning_rate": 8.459318490952658e-06, "loss": 0.0034, "step": 100660 }, { "epoch": 0.6622895600744723, "grad_norm": 0.1534081679742794, "learning_rate": 8.458903943876451e-06, "loss": 0.0017, "step": 100670 }, { "epoch": 0.6623553482497055, "grad_norm": 0.02605470832512179, "learning_rate": 8.458489351197584e-06, "loss": 0.0016, "step": 100680 }, { "epoch": 0.6624211364249389, "grad_norm": 0.08241252549364017, "learning_rate": 8.45807471292152e-06, "loss": 0.0016, "step": 100690 }, { "epoch": 0.6624869246001723, "grad_norm": 0.045948656701322775, "learning_rate": 8.457660029053727e-06, "loss": 0.001, "step": 100700 }, { "epoch": 0.6625527127754057, "grad_norm": 0.24341640969368714, "learning_rate": 8.457245299599674e-06, "loss": 0.0017, "step": 100710 }, { "epoch": 0.6626185009506391, "grad_norm": 0.07447705071602882, "learning_rate": 8.456830524564828e-06, "loss": 0.0037, "step": 100720 }, { "epoch": 0.6626842891258725, "grad_norm": 0.02361125089793352, "learning_rate": 8.456415703954656e-06, "loss": 0.0033, "step": 100730 }, { "epoch": 0.6627500773011059, "grad_norm": 0.021702567089015287, "learning_rate": 8.45600083777463e-06, "loss": 0.0009, "step": 100740 }, { "epoch": 0.6628158654763393, "grad_norm": 0.08298957768605655, "learning_rate": 8.455585926030216e-06, "loss": 0.0017, "step": 100750 }, { "epoch": 0.6628816536515727, "grad_norm": 0.08075426090825762, "learning_rate": 8.455170968726887e-06, "loss": 0.0006, "step": 100760 }, { "epoch": 0.6629474418268061, "grad_norm": 0.021968237326808575, "learning_rate": 8.454755965870112e-06, "loss": 0.002, "step": 100770 }, { "epoch": 0.6630132300020395, "grad_norm": 0.0171262880363305, "learning_rate": 8.454340917465362e-06, "loss": 0.0008, "step": 100780 }, { "epoch": 0.6630790181772728, "grad_norm": 0.17120711056626364, "learning_rate": 8.453925823518112e-06, "loss": 0.0016, "step": 100790 }, { "epoch": 0.6631448063525062, "grad_norm": 0.06361673916810291, "learning_rate": 8.453510684033834e-06, "loss": 0.002, "step": 100800 }, { "epoch": 0.6632105945277396, "grad_norm": 0.05102837901371725, "learning_rate": 8.453095499017999e-06, "loss": 0.0011, "step": 100810 }, { "epoch": 0.663276382702973, "grad_norm": 0.05930884777918528, "learning_rate": 8.452680268476083e-06, "loss": 0.0017, "step": 100820 }, { "epoch": 0.6633421708782063, "grad_norm": 0.0518795412927979, "learning_rate": 8.452264992413557e-06, "loss": 0.0022, "step": 100830 }, { "epoch": 0.6634079590534397, "grad_norm": 0.1503843391581842, "learning_rate": 8.451849670835901e-06, "loss": 0.002, "step": 100840 }, { "epoch": 0.6634737472286731, "grad_norm": 0.0625604255783429, "learning_rate": 8.45143430374859e-06, "loss": 0.0009, "step": 100850 }, { "epoch": 0.6635395354039065, "grad_norm": 0.015553775084351825, "learning_rate": 8.451018891157094e-06, "loss": 0.0016, "step": 100860 }, { "epoch": 0.6636053235791399, "grad_norm": 0.0035009547479531054, "learning_rate": 8.450603433066897e-06, "loss": 0.0026, "step": 100870 }, { "epoch": 0.6636711117543733, "grad_norm": 0.02015928944601608, "learning_rate": 8.450187929483475e-06, "loss": 0.0018, "step": 100880 }, { "epoch": 0.6637368999296066, "grad_norm": 0.059596566738958724, "learning_rate": 8.449772380412304e-06, "loss": 0.0035, "step": 100890 }, { "epoch": 0.66380268810484, "grad_norm": 0.040270852343702146, "learning_rate": 8.449356785858863e-06, "loss": 0.0009, "step": 100900 }, { "epoch": 0.6638684762800734, "grad_norm": 0.0193104809790846, "learning_rate": 8.448941145828631e-06, "loss": 0.001, "step": 100910 }, { "epoch": 0.6639342644553068, "grad_norm": 0.007332530433575911, "learning_rate": 8.448525460327091e-06, "loss": 0.0008, "step": 100920 }, { "epoch": 0.6640000526305402, "grad_norm": 0.010701140217520335, "learning_rate": 8.44810972935972e-06, "loss": 0.0019, "step": 100930 }, { "epoch": 0.6640658408057736, "grad_norm": 0.08400937859125084, "learning_rate": 8.447693952932e-06, "loss": 0.0009, "step": 100940 }, { "epoch": 0.664131628981007, "grad_norm": 0.020175627584536258, "learning_rate": 8.447278131049412e-06, "loss": 0.002, "step": 100950 }, { "epoch": 0.6641974171562404, "grad_norm": 0.04346817511773031, "learning_rate": 8.446862263717442e-06, "loss": 0.0033, "step": 100960 }, { "epoch": 0.6642632053314738, "grad_norm": 0.08908792250181007, "learning_rate": 8.446446350941566e-06, "loss": 0.0012, "step": 100970 }, { "epoch": 0.6643289935067072, "grad_norm": 0.05888115276140766, "learning_rate": 8.446030392727274e-06, "loss": 0.0014, "step": 100980 }, { "epoch": 0.6643947816819404, "grad_norm": 0.06769799620367076, "learning_rate": 8.445614389080047e-06, "loss": 0.0008, "step": 100990 }, { "epoch": 0.6644605698571738, "grad_norm": 0.06486993755593518, "learning_rate": 8.445198340005369e-06, "loss": 0.0022, "step": 101000 }, { "epoch": 0.6645263580324072, "grad_norm": 0.0451241600783666, "learning_rate": 8.444782245508729e-06, "loss": 0.0011, "step": 101010 }, { "epoch": 0.6645921462076406, "grad_norm": 0.05349107101386884, "learning_rate": 8.444366105595607e-06, "loss": 0.0011, "step": 101020 }, { "epoch": 0.664657934382874, "grad_norm": 0.09624753150630302, "learning_rate": 8.443949920271495e-06, "loss": 0.0016, "step": 101030 }, { "epoch": 0.6647237225581074, "grad_norm": 0.07410452515188763, "learning_rate": 8.443533689541877e-06, "loss": 0.0018, "step": 101040 }, { "epoch": 0.6647895107333408, "grad_norm": 0.0298871369657345, "learning_rate": 8.44311741341224e-06, "loss": 0.0008, "step": 101050 }, { "epoch": 0.6648552989085742, "grad_norm": 0.032128316547087886, "learning_rate": 8.442701091888076e-06, "loss": 0.0022, "step": 101060 }, { "epoch": 0.6649210870838076, "grad_norm": 0.0762830680031309, "learning_rate": 8.44228472497487e-06, "loss": 0.0018, "step": 101070 }, { "epoch": 0.664986875259041, "grad_norm": 0.036391709721273695, "learning_rate": 8.441868312678115e-06, "loss": 0.0012, "step": 101080 }, { "epoch": 0.6650526634342743, "grad_norm": 0.031045812280677695, "learning_rate": 8.441451855003295e-06, "loss": 0.0008, "step": 101090 }, { "epoch": 0.6651184516095077, "grad_norm": 0.03162895479473418, "learning_rate": 8.441035351955908e-06, "loss": 0.001, "step": 101100 }, { "epoch": 0.6651842397847411, "grad_norm": 0.03545070626654849, "learning_rate": 8.440618803541439e-06, "loss": 0.001, "step": 101110 }, { "epoch": 0.6652500279599745, "grad_norm": 0.18736996773829237, "learning_rate": 8.440202209765385e-06, "loss": 0.0033, "step": 101120 }, { "epoch": 0.6653158161352078, "grad_norm": 0.023545150140156835, "learning_rate": 8.439785570633234e-06, "loss": 0.0028, "step": 101130 }, { "epoch": 0.6653816043104412, "grad_norm": 0.06973640703779056, "learning_rate": 8.439368886150482e-06, "loss": 0.0012, "step": 101140 }, { "epoch": 0.6654473924856746, "grad_norm": 0.0075930049179931194, "learning_rate": 8.438952156322621e-06, "loss": 0.0012, "step": 101150 }, { "epoch": 0.665513180660908, "grad_norm": 0.008010192111838376, "learning_rate": 8.438535381155148e-06, "loss": 0.0012, "step": 101160 }, { "epoch": 0.6655789688361414, "grad_norm": 0.05349592504759207, "learning_rate": 8.438118560653553e-06, "loss": 0.0014, "step": 101170 }, { "epoch": 0.6656447570113748, "grad_norm": 0.06755542170439795, "learning_rate": 8.437701694823336e-06, "loss": 0.002, "step": 101180 }, { "epoch": 0.6657105451866081, "grad_norm": 0.04530592421240766, "learning_rate": 8.43728478366999e-06, "loss": 0.0011, "step": 101190 }, { "epoch": 0.6657763333618415, "grad_norm": 0.03882613553498316, "learning_rate": 8.436867827199014e-06, "loss": 0.0012, "step": 101200 }, { "epoch": 0.6658421215370749, "grad_norm": 0.008228015530456174, "learning_rate": 8.436450825415904e-06, "loss": 0.0024, "step": 101210 }, { "epoch": 0.6659079097123083, "grad_norm": 0.011177148833523306, "learning_rate": 8.436033778326157e-06, "loss": 0.0013, "step": 101220 }, { "epoch": 0.6659736978875417, "grad_norm": 0.025726512926951647, "learning_rate": 8.435616685935271e-06, "loss": 0.0012, "step": 101230 }, { "epoch": 0.6660394860627751, "grad_norm": 0.03295687634834612, "learning_rate": 8.435199548248747e-06, "loss": 0.0008, "step": 101240 }, { "epoch": 0.6661052742380085, "grad_norm": 0.056356217475010364, "learning_rate": 8.434782365272084e-06, "loss": 0.0018, "step": 101250 }, { "epoch": 0.6661710624132419, "grad_norm": 0.038972270642742224, "learning_rate": 8.434365137010782e-06, "loss": 0.0021, "step": 101260 }, { "epoch": 0.6662368505884753, "grad_norm": 0.03798470319949241, "learning_rate": 8.433947863470342e-06, "loss": 0.0011, "step": 101270 }, { "epoch": 0.6663026387637087, "grad_norm": 0.09051650372124559, "learning_rate": 8.433530544656265e-06, "loss": 0.0009, "step": 101280 }, { "epoch": 0.666368426938942, "grad_norm": 0.08151140967058723, "learning_rate": 8.433113180574053e-06, "loss": 0.0011, "step": 101290 }, { "epoch": 0.6664342151141753, "grad_norm": 0.05626295080494896, "learning_rate": 8.432695771229209e-06, "loss": 0.0017, "step": 101300 }, { "epoch": 0.6665000032894087, "grad_norm": 0.04838792699237338, "learning_rate": 8.432278316627237e-06, "loss": 0.0011, "step": 101310 }, { "epoch": 0.6665657914646421, "grad_norm": 0.024980965729879536, "learning_rate": 8.431860816773637e-06, "loss": 0.0011, "step": 101320 }, { "epoch": 0.6666315796398755, "grad_norm": 0.32204517368989244, "learning_rate": 8.43144327167392e-06, "loss": 0.0041, "step": 101330 }, { "epoch": 0.6666973678151089, "grad_norm": 0.08737809938446361, "learning_rate": 8.431025681333585e-06, "loss": 0.0014, "step": 101340 }, { "epoch": 0.6667631559903423, "grad_norm": 0.10748878359029419, "learning_rate": 8.430608045758138e-06, "loss": 0.0016, "step": 101350 }, { "epoch": 0.6668289441655757, "grad_norm": 0.028809753734700256, "learning_rate": 8.430190364953089e-06, "loss": 0.001, "step": 101360 }, { "epoch": 0.6668947323408091, "grad_norm": 0.09740463734180536, "learning_rate": 8.42977263892394e-06, "loss": 0.0017, "step": 101370 }, { "epoch": 0.6669605205160425, "grad_norm": 0.03523125289097718, "learning_rate": 8.429354867676204e-06, "loss": 0.0023, "step": 101380 }, { "epoch": 0.6670263086912759, "grad_norm": 0.08098256079523497, "learning_rate": 8.428937051215384e-06, "loss": 0.001, "step": 101390 }, { "epoch": 0.6670920968665092, "grad_norm": 0.08454053806927406, "learning_rate": 8.428519189546991e-06, "loss": 0.0019, "step": 101400 }, { "epoch": 0.6671578850417426, "grad_norm": 0.06156864808990494, "learning_rate": 8.428101282676534e-06, "loss": 0.0014, "step": 101410 }, { "epoch": 0.667223673216976, "grad_norm": 0.1007742891704252, "learning_rate": 8.427683330609522e-06, "loss": 0.0007, "step": 101420 }, { "epoch": 0.6672894613922093, "grad_norm": 0.0393897034709617, "learning_rate": 8.427265333351464e-06, "loss": 0.0018, "step": 101430 }, { "epoch": 0.6673552495674427, "grad_norm": 0.020267102654102104, "learning_rate": 8.426847290907874e-06, "loss": 0.0005, "step": 101440 }, { "epoch": 0.6674210377426761, "grad_norm": 0.0669770723973603, "learning_rate": 8.426429203284262e-06, "loss": 0.0017, "step": 101450 }, { "epoch": 0.6674868259179095, "grad_norm": 0.02858900320673713, "learning_rate": 8.42601107048614e-06, "loss": 0.0007, "step": 101460 }, { "epoch": 0.6675526140931429, "grad_norm": 0.08933819227640045, "learning_rate": 8.42559289251902e-06, "loss": 0.0014, "step": 101470 }, { "epoch": 0.6676184022683763, "grad_norm": 0.05556044122717225, "learning_rate": 8.425174669388418e-06, "loss": 0.0008, "step": 101480 }, { "epoch": 0.6676841904436097, "grad_norm": 0.04644559385018169, "learning_rate": 8.424756401099846e-06, "loss": 0.0012, "step": 101490 }, { "epoch": 0.667749978618843, "grad_norm": 0.023832129641898153, "learning_rate": 8.424338087658819e-06, "loss": 0.0019, "step": 101500 }, { "epoch": 0.6678157667940764, "grad_norm": 0.02432468440437518, "learning_rate": 8.42391972907085e-06, "loss": 0.0008, "step": 101510 }, { "epoch": 0.6678815549693098, "grad_norm": 0.06258464602619607, "learning_rate": 8.423501325341458e-06, "loss": 0.0014, "step": 101520 }, { "epoch": 0.6679473431445432, "grad_norm": 0.018443819958861123, "learning_rate": 8.423082876476158e-06, "loss": 0.0009, "step": 101530 }, { "epoch": 0.6680131313197766, "grad_norm": 0.07079996366594256, "learning_rate": 8.422664382480465e-06, "loss": 0.0012, "step": 101540 }, { "epoch": 0.66807891949501, "grad_norm": 0.006794252434397616, "learning_rate": 8.4222458433599e-06, "loss": 0.0013, "step": 101550 }, { "epoch": 0.6681447076702434, "grad_norm": 0.04748231323120435, "learning_rate": 8.421827259119977e-06, "loss": 0.0008, "step": 101560 }, { "epoch": 0.6682104958454768, "grad_norm": 0.032740981198218264, "learning_rate": 8.421408629766219e-06, "loss": 0.0008, "step": 101570 }, { "epoch": 0.6682762840207102, "grad_norm": 0.04644194610968773, "learning_rate": 8.420989955304142e-06, "loss": 0.0021, "step": 101580 }, { "epoch": 0.6683420721959435, "grad_norm": 0.06789632460024508, "learning_rate": 8.420571235739265e-06, "loss": 0.0015, "step": 101590 }, { "epoch": 0.6684078603711768, "grad_norm": 0.024094051223810625, "learning_rate": 8.420152471077114e-06, "loss": 0.002, "step": 101600 }, { "epoch": 0.6684736485464102, "grad_norm": 0.05849028798512989, "learning_rate": 8.419733661323205e-06, "loss": 0.0012, "step": 101610 }, { "epoch": 0.6685394367216436, "grad_norm": 0.01793062362625909, "learning_rate": 8.419314806483062e-06, "loss": 0.0014, "step": 101620 }, { "epoch": 0.668605224896877, "grad_norm": 0.037406209923324635, "learning_rate": 8.418895906562206e-06, "loss": 0.001, "step": 101630 }, { "epoch": 0.6686710130721104, "grad_norm": 0.059078065024207746, "learning_rate": 8.418476961566159e-06, "loss": 0.0011, "step": 101640 }, { "epoch": 0.6687368012473438, "grad_norm": 0.015222956472959156, "learning_rate": 8.418057971500447e-06, "loss": 0.0008, "step": 101650 }, { "epoch": 0.6688025894225772, "grad_norm": 0.06844644111200722, "learning_rate": 8.41763893637059e-06, "loss": 0.0018, "step": 101660 }, { "epoch": 0.6688683775978106, "grad_norm": 0.03728548526448192, "learning_rate": 8.41721985618212e-06, "loss": 0.0014, "step": 101670 }, { "epoch": 0.668934165773044, "grad_norm": 0.05244311945562172, "learning_rate": 8.416800730940553e-06, "loss": 0.0006, "step": 101680 }, { "epoch": 0.6689999539482774, "grad_norm": 0.11141755718124893, "learning_rate": 8.416381560651422e-06, "loss": 0.0025, "step": 101690 }, { "epoch": 0.6690657421235107, "grad_norm": 0.05160078857253352, "learning_rate": 8.41596234532025e-06, "loss": 0.0027, "step": 101700 }, { "epoch": 0.6691315302987441, "grad_norm": 0.015561168803588642, "learning_rate": 8.415543084952565e-06, "loss": 0.0014, "step": 101710 }, { "epoch": 0.6691973184739775, "grad_norm": 0.037648643500839386, "learning_rate": 8.415123779553893e-06, "loss": 0.0022, "step": 101720 }, { "epoch": 0.6692631066492108, "grad_norm": 0.04474307583980124, "learning_rate": 8.414704429129765e-06, "loss": 0.0019, "step": 101730 }, { "epoch": 0.6693288948244442, "grad_norm": 0.10076970807490583, "learning_rate": 8.414285033685708e-06, "loss": 0.001, "step": 101740 }, { "epoch": 0.6693946829996776, "grad_norm": 0.013745273827281855, "learning_rate": 8.413865593227251e-06, "loss": 0.0009, "step": 101750 }, { "epoch": 0.669460471174911, "grad_norm": 0.04537876394625759, "learning_rate": 8.413446107759925e-06, "loss": 0.0013, "step": 101760 }, { "epoch": 0.6695262593501444, "grad_norm": 0.07598373680887435, "learning_rate": 8.41302657728926e-06, "loss": 0.0011, "step": 101770 }, { "epoch": 0.6695920475253778, "grad_norm": 0.06176871382090253, "learning_rate": 8.412607001820788e-06, "loss": 0.0025, "step": 101780 }, { "epoch": 0.6696578357006112, "grad_norm": 0.024148112496925993, "learning_rate": 8.412187381360039e-06, "loss": 0.0022, "step": 101790 }, { "epoch": 0.6697236238758446, "grad_norm": 0.15478117488512128, "learning_rate": 8.411767715912546e-06, "loss": 0.0017, "step": 101800 }, { "epoch": 0.6697894120510779, "grad_norm": 0.02582580846115848, "learning_rate": 8.411348005483842e-06, "loss": 0.0014, "step": 101810 }, { "epoch": 0.6698552002263113, "grad_norm": 0.12582137408607838, "learning_rate": 8.410928250079462e-06, "loss": 0.0014, "step": 101820 }, { "epoch": 0.6699209884015447, "grad_norm": 0.06465357791044607, "learning_rate": 8.410508449704939e-06, "loss": 0.001, "step": 101830 }, { "epoch": 0.6699867765767781, "grad_norm": 0.02070886017165697, "learning_rate": 8.410088604365807e-06, "loss": 0.0013, "step": 101840 }, { "epoch": 0.6700525647520115, "grad_norm": 0.0010231194006084948, "learning_rate": 8.409668714067603e-06, "loss": 0.0013, "step": 101850 }, { "epoch": 0.6701183529272449, "grad_norm": 0.03566562529642525, "learning_rate": 8.40924877881586e-06, "loss": 0.001, "step": 101860 }, { "epoch": 0.6701841411024783, "grad_norm": 0.015052315652472616, "learning_rate": 8.408828798616117e-06, "loss": 0.002, "step": 101870 }, { "epoch": 0.6702499292777117, "grad_norm": 0.03998892446326396, "learning_rate": 8.40840877347391e-06, "loss": 0.0014, "step": 101880 }, { "epoch": 0.670315717452945, "grad_norm": 0.08396059830117968, "learning_rate": 8.407988703394777e-06, "loss": 0.0021, "step": 101890 }, { "epoch": 0.6703815056281784, "grad_norm": 0.07556089653884927, "learning_rate": 8.407568588384257e-06, "loss": 0.0015, "step": 101900 }, { "epoch": 0.6704472938034117, "grad_norm": 0.0221792267739236, "learning_rate": 8.407148428447888e-06, "loss": 0.0007, "step": 101910 }, { "epoch": 0.6705130819786451, "grad_norm": 0.006748168959724184, "learning_rate": 8.40672822359121e-06, "loss": 0.0013, "step": 101920 }, { "epoch": 0.6705788701538785, "grad_norm": 0.045284072241511354, "learning_rate": 8.406307973819761e-06, "loss": 0.001, "step": 101930 }, { "epoch": 0.6706446583291119, "grad_norm": 0.06711714493703505, "learning_rate": 8.405887679139084e-06, "loss": 0.0011, "step": 101940 }, { "epoch": 0.6707104465043453, "grad_norm": 0.06773657121159488, "learning_rate": 8.40546733955472e-06, "loss": 0.0024, "step": 101950 }, { "epoch": 0.6707762346795787, "grad_norm": 0.07390107230193849, "learning_rate": 8.40504695507221e-06, "loss": 0.0028, "step": 101960 }, { "epoch": 0.6708420228548121, "grad_norm": 0.043483761344557476, "learning_rate": 8.404626525697097e-06, "loss": 0.0012, "step": 101970 }, { "epoch": 0.6709078110300455, "grad_norm": 0.05978322809389642, "learning_rate": 8.404206051434923e-06, "loss": 0.0013, "step": 101980 }, { "epoch": 0.6709735992052789, "grad_norm": 0.018099753417182805, "learning_rate": 8.403785532291233e-06, "loss": 0.0014, "step": 101990 }, { "epoch": 0.6710393873805123, "grad_norm": 0.020408580428321218, "learning_rate": 8.40336496827157e-06, "loss": 0.0029, "step": 102000 }, { "epoch": 0.6711051755557456, "grad_norm": 0.03644064869789221, "learning_rate": 8.40294435938148e-06, "loss": 0.0017, "step": 102010 }, { "epoch": 0.671170963730979, "grad_norm": 0.018144279302857988, "learning_rate": 8.402523705626507e-06, "loss": 0.0012, "step": 102020 }, { "epoch": 0.6712367519062123, "grad_norm": 0.031143633406454683, "learning_rate": 8.402103007012196e-06, "loss": 0.002, "step": 102030 }, { "epoch": 0.6713025400814457, "grad_norm": 0.10452704478615764, "learning_rate": 8.401682263544097e-06, "loss": 0.0009, "step": 102040 }, { "epoch": 0.6713683282566791, "grad_norm": 0.08980143887492688, "learning_rate": 8.401261475227756e-06, "loss": 0.0019, "step": 102050 }, { "epoch": 0.6714341164319125, "grad_norm": 0.0818152455514214, "learning_rate": 8.400840642068718e-06, "loss": 0.0015, "step": 102060 }, { "epoch": 0.6714999046071459, "grad_norm": 0.07242812247535074, "learning_rate": 8.400419764072533e-06, "loss": 0.0016, "step": 102070 }, { "epoch": 0.6715656927823793, "grad_norm": 0.04298450744231155, "learning_rate": 8.399998841244751e-06, "loss": 0.001, "step": 102080 }, { "epoch": 0.6716314809576127, "grad_norm": 0.011774366791604674, "learning_rate": 8.399577873590922e-06, "loss": 0.0008, "step": 102090 }, { "epoch": 0.6716972691328461, "grad_norm": 0.02675945981981922, "learning_rate": 8.399156861116593e-06, "loss": 0.0023, "step": 102100 }, { "epoch": 0.6717630573080794, "grad_norm": 0.016872910961007306, "learning_rate": 8.398735803827318e-06, "loss": 0.0019, "step": 102110 }, { "epoch": 0.6718288454833128, "grad_norm": 0.06908006938791691, "learning_rate": 8.398314701728647e-06, "loss": 0.0012, "step": 102120 }, { "epoch": 0.6718946336585462, "grad_norm": 0.008231828665519534, "learning_rate": 8.39789355482613e-06, "loss": 0.0008, "step": 102130 }, { "epoch": 0.6719604218337796, "grad_norm": 0.07300127177018424, "learning_rate": 8.397472363125322e-06, "loss": 0.0023, "step": 102140 }, { "epoch": 0.672026210009013, "grad_norm": 0.019376950534004417, "learning_rate": 8.397051126631775e-06, "loss": 0.0035, "step": 102150 }, { "epoch": 0.6720919981842464, "grad_norm": 0.0840759154360369, "learning_rate": 8.396629845351043e-06, "loss": 0.0017, "step": 102160 }, { "epoch": 0.6721577863594798, "grad_norm": 0.3563916160559209, "learning_rate": 8.39620851928868e-06, "loss": 0.0029, "step": 102170 }, { "epoch": 0.6722235745347132, "grad_norm": 0.0038435520480033128, "learning_rate": 8.39578714845024e-06, "loss": 0.0017, "step": 102180 }, { "epoch": 0.6722893627099465, "grad_norm": 0.02723689343256983, "learning_rate": 8.39536573284128e-06, "loss": 0.0017, "step": 102190 }, { "epoch": 0.6723551508851799, "grad_norm": 0.043782359758033504, "learning_rate": 8.394944272467353e-06, "loss": 0.0019, "step": 102200 }, { "epoch": 0.6724209390604133, "grad_norm": 0.026823090396396524, "learning_rate": 8.39452276733402e-06, "loss": 0.0013, "step": 102210 }, { "epoch": 0.6724867272356466, "grad_norm": 0.03024861128638956, "learning_rate": 8.394101217446836e-06, "loss": 0.0009, "step": 102220 }, { "epoch": 0.67255251541088, "grad_norm": 0.14688082195588795, "learning_rate": 8.393679622811359e-06, "loss": 0.0009, "step": 102230 }, { "epoch": 0.6726183035861134, "grad_norm": 0.08907617475407514, "learning_rate": 8.393257983433147e-06, "loss": 0.0017, "step": 102240 }, { "epoch": 0.6726840917613468, "grad_norm": 0.162950556521387, "learning_rate": 8.39283629931776e-06, "loss": 0.0028, "step": 102250 }, { "epoch": 0.6727498799365802, "grad_norm": 0.029686476562205754, "learning_rate": 8.392414570470755e-06, "loss": 0.0016, "step": 102260 }, { "epoch": 0.6728156681118136, "grad_norm": 0.10794805032845903, "learning_rate": 8.391992796897694e-06, "loss": 0.0018, "step": 102270 }, { "epoch": 0.672881456287047, "grad_norm": 0.012339318468336566, "learning_rate": 8.39157097860414e-06, "loss": 0.001, "step": 102280 }, { "epoch": 0.6729472444622804, "grad_norm": 0.15520647083492314, "learning_rate": 8.391149115595648e-06, "loss": 0.0012, "step": 102290 }, { "epoch": 0.6730130326375138, "grad_norm": 0.04903096563540799, "learning_rate": 8.390727207877785e-06, "loss": 0.001, "step": 102300 }, { "epoch": 0.6730788208127472, "grad_norm": 0.01278941108529647, "learning_rate": 8.390305255456113e-06, "loss": 0.002, "step": 102310 }, { "epoch": 0.6731446089879805, "grad_norm": 0.31348553757016095, "learning_rate": 8.389883258336193e-06, "loss": 0.0023, "step": 102320 }, { "epoch": 0.6732103971632138, "grad_norm": 0.021373169687544068, "learning_rate": 8.389461216523592e-06, "loss": 0.0014, "step": 102330 }, { "epoch": 0.6732761853384472, "grad_norm": 0.02238863561281127, "learning_rate": 8.389039130023871e-06, "loss": 0.0011, "step": 102340 }, { "epoch": 0.6733419735136806, "grad_norm": 0.04028005318506777, "learning_rate": 8.388616998842597e-06, "loss": 0.0011, "step": 102350 }, { "epoch": 0.673407761688914, "grad_norm": 0.13355612289676694, "learning_rate": 8.388194822985332e-06, "loss": 0.0021, "step": 102360 }, { "epoch": 0.6734735498641474, "grad_norm": 0.12177737456201133, "learning_rate": 8.387772602457647e-06, "loss": 0.0008, "step": 102370 }, { "epoch": 0.6735393380393808, "grad_norm": 0.020278008545003393, "learning_rate": 8.387350337265103e-06, "loss": 0.0012, "step": 102380 }, { "epoch": 0.6736051262146142, "grad_norm": 0.02819117534263057, "learning_rate": 8.386928027413272e-06, "loss": 0.0007, "step": 102390 }, { "epoch": 0.6736709143898476, "grad_norm": 0.06282577006217421, "learning_rate": 8.38650567290772e-06, "loss": 0.0007, "step": 102400 }, { "epoch": 0.673736702565081, "grad_norm": 0.036155557823516565, "learning_rate": 8.386083273754018e-06, "loss": 0.0015, "step": 102410 }, { "epoch": 0.6738024907403143, "grad_norm": 0.05215843573962011, "learning_rate": 8.38566082995773e-06, "loss": 0.0012, "step": 102420 }, { "epoch": 0.6738682789155477, "grad_norm": 0.0372893412109578, "learning_rate": 8.38523834152443e-06, "loss": 0.0018, "step": 102430 }, { "epoch": 0.6739340670907811, "grad_norm": 0.05188882873200079, "learning_rate": 8.384815808459683e-06, "loss": 0.0017, "step": 102440 }, { "epoch": 0.6739998552660145, "grad_norm": 0.38319432928057295, "learning_rate": 8.384393230769063e-06, "loss": 0.0028, "step": 102450 }, { "epoch": 0.6740656434412479, "grad_norm": 0.036361319078503214, "learning_rate": 8.383970608458143e-06, "loss": 0.003, "step": 102460 }, { "epoch": 0.6741314316164813, "grad_norm": 0.1008642844933487, "learning_rate": 8.383547941532492e-06, "loss": 0.0021, "step": 102470 }, { "epoch": 0.6741972197917147, "grad_norm": 0.12278749795880521, "learning_rate": 8.383125229997685e-06, "loss": 0.0015, "step": 102480 }, { "epoch": 0.674263007966948, "grad_norm": 0.10609607613331057, "learning_rate": 8.382702473859293e-06, "loss": 0.0017, "step": 102490 }, { "epoch": 0.6743287961421814, "grad_norm": 0.0808492082922933, "learning_rate": 8.38227967312289e-06, "loss": 0.0018, "step": 102500 }, { "epoch": 0.6743945843174148, "grad_norm": 0.08505744185866579, "learning_rate": 8.381856827794052e-06, "loss": 0.0017, "step": 102510 }, { "epoch": 0.6744603724926481, "grad_norm": 0.06432487621293417, "learning_rate": 8.381433937878352e-06, "loss": 0.0035, "step": 102520 }, { "epoch": 0.6745261606678815, "grad_norm": 0.11593108162298824, "learning_rate": 8.381011003381366e-06, "loss": 0.0021, "step": 102530 }, { "epoch": 0.6745919488431149, "grad_norm": 0.0556163525181557, "learning_rate": 8.380588024308668e-06, "loss": 0.0013, "step": 102540 }, { "epoch": 0.6746577370183483, "grad_norm": 0.051139616002686045, "learning_rate": 8.380165000665838e-06, "loss": 0.0017, "step": 102550 }, { "epoch": 0.6747235251935817, "grad_norm": 0.14180652771353158, "learning_rate": 8.379741932458452e-06, "loss": 0.002, "step": 102560 }, { "epoch": 0.6747893133688151, "grad_norm": 0.020092611584110857, "learning_rate": 8.379318819692088e-06, "loss": 0.0013, "step": 102570 }, { "epoch": 0.6748551015440485, "grad_norm": 0.08583324476715211, "learning_rate": 8.378895662372321e-06, "loss": 0.0017, "step": 102580 }, { "epoch": 0.6749208897192819, "grad_norm": 0.02032368087337619, "learning_rate": 8.378472460504737e-06, "loss": 0.0017, "step": 102590 }, { "epoch": 0.6749866778945153, "grad_norm": 0.024591450440814684, "learning_rate": 8.37804921409491e-06, "loss": 0.0025, "step": 102600 }, { "epoch": 0.6750524660697487, "grad_norm": 0.06954617140761332, "learning_rate": 8.37762592314842e-06, "loss": 0.0014, "step": 102610 }, { "epoch": 0.675118254244982, "grad_norm": 0.05616993668786023, "learning_rate": 8.377202587670851e-06, "loss": 0.0009, "step": 102620 }, { "epoch": 0.6751840424202153, "grad_norm": 0.07177913863372698, "learning_rate": 8.376779207667783e-06, "loss": 0.0018, "step": 102630 }, { "epoch": 0.6752498305954487, "grad_norm": 0.04538434715521294, "learning_rate": 8.376355783144795e-06, "loss": 0.0029, "step": 102640 }, { "epoch": 0.6753156187706821, "grad_norm": 0.03160357610024469, "learning_rate": 8.375932314107474e-06, "loss": 0.0013, "step": 102650 }, { "epoch": 0.6753814069459155, "grad_norm": 0.10038112845310154, "learning_rate": 8.375508800561401e-06, "loss": 0.0013, "step": 102660 }, { "epoch": 0.6754471951211489, "grad_norm": 0.263551123492693, "learning_rate": 8.37508524251216e-06, "loss": 0.0029, "step": 102670 }, { "epoch": 0.6755129832963823, "grad_norm": 0.054849184516016024, "learning_rate": 8.374661639965333e-06, "loss": 0.0012, "step": 102680 }, { "epoch": 0.6755787714716157, "grad_norm": 0.08726945355905154, "learning_rate": 8.374237992926508e-06, "loss": 0.003, "step": 102690 }, { "epoch": 0.6756445596468491, "grad_norm": 0.016798095451534916, "learning_rate": 8.373814301401271e-06, "loss": 0.0009, "step": 102700 }, { "epoch": 0.6757103478220825, "grad_norm": 0.005751000094498173, "learning_rate": 8.373390565395205e-06, "loss": 0.0017, "step": 102710 }, { "epoch": 0.6757761359973159, "grad_norm": 0.017925277778795854, "learning_rate": 8.372966784913897e-06, "loss": 0.0018, "step": 102720 }, { "epoch": 0.6758419241725492, "grad_norm": 0.042769312225584, "learning_rate": 8.372542959962935e-06, "loss": 0.0016, "step": 102730 }, { "epoch": 0.6759077123477826, "grad_norm": 0.06716390243842055, "learning_rate": 8.372119090547908e-06, "loss": 0.0034, "step": 102740 }, { "epoch": 0.675973500523016, "grad_norm": 0.004461592709486658, "learning_rate": 8.371695176674403e-06, "loss": 0.0012, "step": 102750 }, { "epoch": 0.6760392886982494, "grad_norm": 0.021821770714902117, "learning_rate": 8.371271218348009e-06, "loss": 0.0018, "step": 102760 }, { "epoch": 0.6761050768734828, "grad_norm": 0.014446789313823677, "learning_rate": 8.370847215574313e-06, "loss": 0.0012, "step": 102770 }, { "epoch": 0.6761708650487162, "grad_norm": 0.08572200809503483, "learning_rate": 8.370423168358911e-06, "loss": 0.0015, "step": 102780 }, { "epoch": 0.6762366532239495, "grad_norm": 0.0444376677374331, "learning_rate": 8.369999076707388e-06, "loss": 0.0011, "step": 102790 }, { "epoch": 0.6763024413991829, "grad_norm": 0.03343260531458739, "learning_rate": 8.36957494062534e-06, "loss": 0.0009, "step": 102800 }, { "epoch": 0.6763682295744163, "grad_norm": 0.03922882612977189, "learning_rate": 8.369150760118357e-06, "loss": 0.0022, "step": 102810 }, { "epoch": 0.6764340177496497, "grad_norm": 0.08281767202393021, "learning_rate": 8.36872653519203e-06, "loss": 0.0014, "step": 102820 }, { "epoch": 0.676499805924883, "grad_norm": 0.016446831081463005, "learning_rate": 8.368302265851953e-06, "loss": 0.0007, "step": 102830 }, { "epoch": 0.6765655941001164, "grad_norm": 0.002157890279191424, "learning_rate": 8.367877952103717e-06, "loss": 0.0012, "step": 102840 }, { "epoch": 0.6766313822753498, "grad_norm": 0.06454772011101341, "learning_rate": 8.367453593952922e-06, "loss": 0.0028, "step": 102850 }, { "epoch": 0.6766971704505832, "grad_norm": 0.09255510446727631, "learning_rate": 8.367029191405159e-06, "loss": 0.0016, "step": 102860 }, { "epoch": 0.6767629586258166, "grad_norm": 0.08790733514805711, "learning_rate": 8.366604744466026e-06, "loss": 0.0009, "step": 102870 }, { "epoch": 0.67682874680105, "grad_norm": 0.10584664633893737, "learning_rate": 8.366180253141114e-06, "loss": 0.0053, "step": 102880 }, { "epoch": 0.6768945349762834, "grad_norm": 0.06892009741913374, "learning_rate": 8.365755717436024e-06, "loss": 0.0014, "step": 102890 }, { "epoch": 0.6769603231515168, "grad_norm": 0.12575598062124377, "learning_rate": 8.365331137356351e-06, "loss": 0.0012, "step": 102900 }, { "epoch": 0.6770261113267502, "grad_norm": 0.10187858422360382, "learning_rate": 8.364906512907694e-06, "loss": 0.0017, "step": 102910 }, { "epoch": 0.6770918995019836, "grad_norm": 0.005970661485163562, "learning_rate": 8.36448184409565e-06, "loss": 0.0013, "step": 102920 }, { "epoch": 0.6771576876772168, "grad_norm": 0.045799980621399415, "learning_rate": 8.364057130925819e-06, "loss": 0.0013, "step": 102930 }, { "epoch": 0.6772234758524502, "grad_norm": 0.1396773370320485, "learning_rate": 8.3636323734038e-06, "loss": 0.0017, "step": 102940 }, { "epoch": 0.6772892640276836, "grad_norm": 0.037813737564372674, "learning_rate": 8.363207571535194e-06, "loss": 0.0011, "step": 102950 }, { "epoch": 0.677355052202917, "grad_norm": 0.04932263535230235, "learning_rate": 8.3627827253256e-06, "loss": 0.0027, "step": 102960 }, { "epoch": 0.6774208403781504, "grad_norm": 0.1759890818888971, "learning_rate": 8.362357834780621e-06, "loss": 0.0018, "step": 102970 }, { "epoch": 0.6774866285533838, "grad_norm": 0.14453344351973432, "learning_rate": 8.361932899905856e-06, "loss": 0.0016, "step": 102980 }, { "epoch": 0.6775524167286172, "grad_norm": 0.04389030136992421, "learning_rate": 8.361507920706911e-06, "loss": 0.0012, "step": 102990 }, { "epoch": 0.6776182049038506, "grad_norm": 0.0872521118343269, "learning_rate": 8.361082897189388e-06, "loss": 0.0015, "step": 103000 }, { "epoch": 0.677683993079084, "grad_norm": 0.00888064244471623, "learning_rate": 8.360657829358888e-06, "loss": 0.001, "step": 103010 }, { "epoch": 0.6777497812543174, "grad_norm": 0.022329574065066957, "learning_rate": 8.360232717221017e-06, "loss": 0.0009, "step": 103020 }, { "epoch": 0.6778155694295507, "grad_norm": 0.018118357760619506, "learning_rate": 8.359807560781381e-06, "loss": 0.0009, "step": 103030 }, { "epoch": 0.6778813576047841, "grad_norm": 0.14372777063451117, "learning_rate": 8.359382360045582e-06, "loss": 0.0017, "step": 103040 }, { "epoch": 0.6779471457800175, "grad_norm": 0.01668280061858679, "learning_rate": 8.35895711501923e-06, "loss": 0.0022, "step": 103050 }, { "epoch": 0.6780129339552509, "grad_norm": 0.05614558849354816, "learning_rate": 8.35853182570793e-06, "loss": 0.0009, "step": 103060 }, { "epoch": 0.6780787221304843, "grad_norm": 0.1557204791577254, "learning_rate": 8.358106492117286e-06, "loss": 0.0015, "step": 103070 }, { "epoch": 0.6781445103057177, "grad_norm": 0.013178458200135936, "learning_rate": 8.357681114252909e-06, "loss": 0.0009, "step": 103080 }, { "epoch": 0.678210298480951, "grad_norm": 0.03377370072941364, "learning_rate": 8.357255692120407e-06, "loss": 0.0012, "step": 103090 }, { "epoch": 0.6782760866561844, "grad_norm": 0.0641153999011148, "learning_rate": 8.356830225725388e-06, "loss": 0.0018, "step": 103100 }, { "epoch": 0.6783418748314178, "grad_norm": 0.033747599372391184, "learning_rate": 8.356404715073464e-06, "loss": 0.0007, "step": 103110 }, { "epoch": 0.6784076630066512, "grad_norm": 0.054885977361516854, "learning_rate": 8.355979160170237e-06, "loss": 0.0022, "step": 103120 }, { "epoch": 0.6784734511818845, "grad_norm": 0.04477530064307144, "learning_rate": 8.355553561021327e-06, "loss": 0.002, "step": 103130 }, { "epoch": 0.6785392393571179, "grad_norm": 0.07859415390114861, "learning_rate": 8.355127917632342e-06, "loss": 0.0008, "step": 103140 }, { "epoch": 0.6786050275323513, "grad_norm": 0.039080710567625826, "learning_rate": 8.35470223000889e-06, "loss": 0.0012, "step": 103150 }, { "epoch": 0.6786708157075847, "grad_norm": 0.028342588424451462, "learning_rate": 8.35427649815659e-06, "loss": 0.0006, "step": 103160 }, { "epoch": 0.6787366038828181, "grad_norm": 0.02883421885759579, "learning_rate": 8.353850722081049e-06, "loss": 0.0007, "step": 103170 }, { "epoch": 0.6788023920580515, "grad_norm": 0.07983402892337195, "learning_rate": 8.353424901787884e-06, "loss": 0.0024, "step": 103180 }, { "epoch": 0.6788681802332849, "grad_norm": 0.1275414208621448, "learning_rate": 8.352999037282706e-06, "loss": 0.0013, "step": 103190 }, { "epoch": 0.6789339684085183, "grad_norm": 0.14125552084049803, "learning_rate": 8.352573128571133e-06, "loss": 0.0019, "step": 103200 }, { "epoch": 0.6789997565837517, "grad_norm": 0.12716909467456627, "learning_rate": 8.35214717565878e-06, "loss": 0.0016, "step": 103210 }, { "epoch": 0.6790655447589851, "grad_norm": 0.04044941448380012, "learning_rate": 8.35172117855126e-06, "loss": 0.0029, "step": 103220 }, { "epoch": 0.6791313329342185, "grad_norm": 0.002938808773793235, "learning_rate": 8.35129513725419e-06, "loss": 0.0014, "step": 103230 }, { "epoch": 0.6791971211094517, "grad_norm": 0.05017891951501262, "learning_rate": 8.35086905177319e-06, "loss": 0.0021, "step": 103240 }, { "epoch": 0.6792629092846851, "grad_norm": 0.029327213828144735, "learning_rate": 8.350442922113874e-06, "loss": 0.0013, "step": 103250 }, { "epoch": 0.6793286974599185, "grad_norm": 0.029696516982494127, "learning_rate": 8.350016748281864e-06, "loss": 0.0014, "step": 103260 }, { "epoch": 0.6793944856351519, "grad_norm": 0.028603486395811544, "learning_rate": 8.349590530282774e-06, "loss": 0.0014, "step": 103270 }, { "epoch": 0.6794602738103853, "grad_norm": 0.031411049610793156, "learning_rate": 8.349164268122224e-06, "loss": 0.001, "step": 103280 }, { "epoch": 0.6795260619856187, "grad_norm": 0.09931214949639856, "learning_rate": 8.34873796180584e-06, "loss": 0.0029, "step": 103290 }, { "epoch": 0.6795918501608521, "grad_norm": 0.037589864747440305, "learning_rate": 8.348311611339238e-06, "loss": 0.0011, "step": 103300 }, { "epoch": 0.6796576383360855, "grad_norm": 0.054365284967105694, "learning_rate": 8.347885216728036e-06, "loss": 0.0012, "step": 103310 }, { "epoch": 0.6797234265113189, "grad_norm": 0.06506250041556794, "learning_rate": 8.34745877797786e-06, "loss": 0.002, "step": 103320 }, { "epoch": 0.6797892146865523, "grad_norm": 0.030299176631819923, "learning_rate": 8.347032295094331e-06, "loss": 0.0014, "step": 103330 }, { "epoch": 0.6798550028617856, "grad_norm": 0.04655469725010242, "learning_rate": 8.346605768083072e-06, "loss": 0.0026, "step": 103340 }, { "epoch": 0.679920791037019, "grad_norm": 0.06695340722140611, "learning_rate": 8.346179196949708e-06, "loss": 0.0014, "step": 103350 }, { "epoch": 0.6799865792122524, "grad_norm": 0.10566877213439951, "learning_rate": 8.345752581699859e-06, "loss": 0.0013, "step": 103360 }, { "epoch": 0.6800523673874858, "grad_norm": 0.13832816357646857, "learning_rate": 8.345325922339153e-06, "loss": 0.0009, "step": 103370 }, { "epoch": 0.6801181555627192, "grad_norm": 0.09799453026912161, "learning_rate": 8.344899218873213e-06, "loss": 0.001, "step": 103380 }, { "epoch": 0.6801839437379525, "grad_norm": 0.030540520920022986, "learning_rate": 8.344472471307666e-06, "loss": 0.002, "step": 103390 }, { "epoch": 0.6802497319131859, "grad_norm": 0.01752971941275064, "learning_rate": 8.344045679648139e-06, "loss": 0.0017, "step": 103400 }, { "epoch": 0.6803155200884193, "grad_norm": 0.009392310646336176, "learning_rate": 8.343618843900257e-06, "loss": 0.0006, "step": 103410 }, { "epoch": 0.6803813082636527, "grad_norm": 0.01917176642974138, "learning_rate": 8.343191964069648e-06, "loss": 0.002, "step": 103420 }, { "epoch": 0.6804470964388861, "grad_norm": 0.038278816367676154, "learning_rate": 8.34276504016194e-06, "loss": 0.0027, "step": 103430 }, { "epoch": 0.6805128846141194, "grad_norm": 0.052250010903030684, "learning_rate": 8.342338072182762e-06, "loss": 0.0011, "step": 103440 }, { "epoch": 0.6805786727893528, "grad_norm": 0.049566285517481684, "learning_rate": 8.341911060137744e-06, "loss": 0.0011, "step": 103450 }, { "epoch": 0.6806444609645862, "grad_norm": 0.07677826238348966, "learning_rate": 8.341484004032515e-06, "loss": 0.0015, "step": 103460 }, { "epoch": 0.6807102491398196, "grad_norm": 0.13011843180301014, "learning_rate": 8.341056903872704e-06, "loss": 0.0017, "step": 103470 }, { "epoch": 0.680776037315053, "grad_norm": 0.036208736009200146, "learning_rate": 8.340629759663945e-06, "loss": 0.0033, "step": 103480 }, { "epoch": 0.6808418254902864, "grad_norm": 0.013724307573327408, "learning_rate": 8.340202571411866e-06, "loss": 0.0011, "step": 103490 }, { "epoch": 0.6809076136655198, "grad_norm": 0.06069626518691989, "learning_rate": 8.339775339122103e-06, "loss": 0.0021, "step": 103500 }, { "epoch": 0.6809734018407532, "grad_norm": 0.1877361957253163, "learning_rate": 8.339348062800286e-06, "loss": 0.0015, "step": 103510 }, { "epoch": 0.6810391900159866, "grad_norm": 0.04299205460951142, "learning_rate": 8.338920742452046e-06, "loss": 0.0015, "step": 103520 }, { "epoch": 0.68110497819122, "grad_norm": 0.09350087009259603, "learning_rate": 8.338493378083023e-06, "loss": 0.0023, "step": 103530 }, { "epoch": 0.6811707663664532, "grad_norm": 0.15815313607216644, "learning_rate": 8.338065969698847e-06, "loss": 0.0024, "step": 103540 }, { "epoch": 0.6812365545416866, "grad_norm": 0.1263332083401223, "learning_rate": 8.337638517305154e-06, "loss": 0.0018, "step": 103550 }, { "epoch": 0.68130234271692, "grad_norm": 0.08043648485451012, "learning_rate": 8.33721102090758e-06, "loss": 0.0035, "step": 103560 }, { "epoch": 0.6813681308921534, "grad_norm": 0.06636557416745471, "learning_rate": 8.33678348051176e-06, "loss": 0.0016, "step": 103570 }, { "epoch": 0.6814339190673868, "grad_norm": 0.09871969637582714, "learning_rate": 8.336355896123334e-06, "loss": 0.0018, "step": 103580 }, { "epoch": 0.6814997072426202, "grad_norm": 0.043447870960137615, "learning_rate": 8.335928267747935e-06, "loss": 0.0018, "step": 103590 }, { "epoch": 0.6815654954178536, "grad_norm": 0.08811130434070724, "learning_rate": 8.335500595391202e-06, "loss": 0.0011, "step": 103600 }, { "epoch": 0.681631283593087, "grad_norm": 0.017202717357076527, "learning_rate": 8.335072879058776e-06, "loss": 0.0024, "step": 103610 }, { "epoch": 0.6816970717683204, "grad_norm": 0.011606525676529842, "learning_rate": 8.334645118756296e-06, "loss": 0.0017, "step": 103620 }, { "epoch": 0.6817628599435538, "grad_norm": 0.023174629258600955, "learning_rate": 8.334217314489397e-06, "loss": 0.0015, "step": 103630 }, { "epoch": 0.6818286481187872, "grad_norm": 0.034681096555643894, "learning_rate": 8.333789466263722e-06, "loss": 0.001, "step": 103640 }, { "epoch": 0.6818944362940205, "grad_norm": 0.018208950279332684, "learning_rate": 8.333361574084915e-06, "loss": 0.0008, "step": 103650 }, { "epoch": 0.6819602244692539, "grad_norm": 0.05097578731068277, "learning_rate": 8.332933637958614e-06, "loss": 0.0022, "step": 103660 }, { "epoch": 0.6820260126444873, "grad_norm": 0.07662831648691007, "learning_rate": 8.33250565789046e-06, "loss": 0.0028, "step": 103670 }, { "epoch": 0.6820918008197207, "grad_norm": 0.000749632655075351, "learning_rate": 8.332077633886098e-06, "loss": 0.0015, "step": 103680 }, { "epoch": 0.682157588994954, "grad_norm": 0.268429221395572, "learning_rate": 8.33164956595117e-06, "loss": 0.0019, "step": 103690 }, { "epoch": 0.6822233771701874, "grad_norm": 0.07579014307909689, "learning_rate": 8.331221454091319e-06, "loss": 0.0009, "step": 103700 }, { "epoch": 0.6822891653454208, "grad_norm": 0.02607145729660887, "learning_rate": 8.33079329831219e-06, "loss": 0.0009, "step": 103710 }, { "epoch": 0.6823549535206542, "grad_norm": 0.009550047510751817, "learning_rate": 8.33036509861943e-06, "loss": 0.0015, "step": 103720 }, { "epoch": 0.6824207416958876, "grad_norm": 0.003046996929494993, "learning_rate": 8.32993685501868e-06, "loss": 0.0008, "step": 103730 }, { "epoch": 0.682486529871121, "grad_norm": 0.029482920699409148, "learning_rate": 8.32950856751559e-06, "loss": 0.0015, "step": 103740 }, { "epoch": 0.6825523180463543, "grad_norm": 0.03476484856144971, "learning_rate": 8.329080236115806e-06, "loss": 0.0011, "step": 103750 }, { "epoch": 0.6826181062215877, "grad_norm": 0.13138738856233292, "learning_rate": 8.328651860824973e-06, "loss": 0.0012, "step": 103760 }, { "epoch": 0.6826838943968211, "grad_norm": 0.020607710269027163, "learning_rate": 8.328223441648739e-06, "loss": 0.0013, "step": 103770 }, { "epoch": 0.6827496825720545, "grad_norm": 0.03375207133537554, "learning_rate": 8.327794978592757e-06, "loss": 0.0014, "step": 103780 }, { "epoch": 0.6828154707472879, "grad_norm": 0.05494578150026559, "learning_rate": 8.32736647166267e-06, "loss": 0.0009, "step": 103790 }, { "epoch": 0.6828812589225213, "grad_norm": 0.1424818631980359, "learning_rate": 8.326937920864128e-06, "loss": 0.0011, "step": 103800 }, { "epoch": 0.6829470470977547, "grad_norm": 0.012838426354591719, "learning_rate": 8.326509326202785e-06, "loss": 0.0023, "step": 103810 }, { "epoch": 0.6830128352729881, "grad_norm": 0.047383918313239994, "learning_rate": 8.326080687684288e-06, "loss": 0.0019, "step": 103820 }, { "epoch": 0.6830786234482215, "grad_norm": 0.1306810660960552, "learning_rate": 8.32565200531429e-06, "loss": 0.0019, "step": 103830 }, { "epoch": 0.6831444116234549, "grad_norm": 0.02486056279362268, "learning_rate": 8.325223279098444e-06, "loss": 0.0008, "step": 103840 }, { "epoch": 0.6832101997986881, "grad_norm": 0.03195517288695396, "learning_rate": 8.3247945090424e-06, "loss": 0.0029, "step": 103850 }, { "epoch": 0.6832759879739215, "grad_norm": 0.1072655489760303, "learning_rate": 8.324365695151812e-06, "loss": 0.001, "step": 103860 }, { "epoch": 0.6833417761491549, "grad_norm": 0.0436598925371955, "learning_rate": 8.323936837432334e-06, "loss": 0.0017, "step": 103870 }, { "epoch": 0.6834075643243883, "grad_norm": 0.07368486291553031, "learning_rate": 8.32350793588962e-06, "loss": 0.0018, "step": 103880 }, { "epoch": 0.6834733524996217, "grad_norm": 0.12457575584910162, "learning_rate": 8.323078990529324e-06, "loss": 0.0023, "step": 103890 }, { "epoch": 0.6835391406748551, "grad_norm": 0.010466911048898896, "learning_rate": 8.3226500013571e-06, "loss": 0.0008, "step": 103900 }, { "epoch": 0.6836049288500885, "grad_norm": 0.01549277349821762, "learning_rate": 8.322220968378605e-06, "loss": 0.0009, "step": 103910 }, { "epoch": 0.6836707170253219, "grad_norm": 0.09523117375905178, "learning_rate": 8.321791891599497e-06, "loss": 0.0019, "step": 103920 }, { "epoch": 0.6837365052005553, "grad_norm": 0.05621071990248971, "learning_rate": 8.321362771025432e-06, "loss": 0.0011, "step": 103930 }, { "epoch": 0.6838022933757887, "grad_norm": 0.06451682624758098, "learning_rate": 8.320933606662067e-06, "loss": 0.0007, "step": 103940 }, { "epoch": 0.683868081551022, "grad_norm": 0.020855136681832597, "learning_rate": 8.320504398515062e-06, "loss": 0.0018, "step": 103950 }, { "epoch": 0.6839338697262554, "grad_norm": 0.17811461683364507, "learning_rate": 8.320075146590072e-06, "loss": 0.0016, "step": 103960 }, { "epoch": 0.6839996579014888, "grad_norm": 0.13714706197044163, "learning_rate": 8.319645850892758e-06, "loss": 0.0028, "step": 103970 }, { "epoch": 0.6840654460767222, "grad_norm": 0.14425052718073766, "learning_rate": 8.319216511428784e-06, "loss": 0.0012, "step": 103980 }, { "epoch": 0.6841312342519555, "grad_norm": 0.03610314750826485, "learning_rate": 8.318787128203805e-06, "loss": 0.0013, "step": 103990 }, { "epoch": 0.6841970224271889, "grad_norm": 0.05543099928678702, "learning_rate": 8.318357701223483e-06, "loss": 0.0022, "step": 104000 }, { "epoch": 0.6842628106024223, "grad_norm": 0.07563010107407725, "learning_rate": 8.317928230493481e-06, "loss": 0.0012, "step": 104010 }, { "epoch": 0.6843285987776557, "grad_norm": 0.019861820617853625, "learning_rate": 8.317498716019464e-06, "loss": 0.0009, "step": 104020 }, { "epoch": 0.6843943869528891, "grad_norm": 0.00617415922924373, "learning_rate": 8.317069157807087e-06, "loss": 0.0023, "step": 104030 }, { "epoch": 0.6844601751281225, "grad_norm": 0.022171126725541387, "learning_rate": 8.31663955586202e-06, "loss": 0.0013, "step": 104040 }, { "epoch": 0.6845259633033558, "grad_norm": 0.02703505109790045, "learning_rate": 8.316209910189926e-06, "loss": 0.0011, "step": 104050 }, { "epoch": 0.6845917514785892, "grad_norm": 0.2611274479313011, "learning_rate": 8.315780220796466e-06, "loss": 0.0021, "step": 104060 }, { "epoch": 0.6846575396538226, "grad_norm": 0.02309161394971672, "learning_rate": 8.315350487687308e-06, "loss": 0.0011, "step": 104070 }, { "epoch": 0.684723327829056, "grad_norm": 0.024052749026272666, "learning_rate": 8.31492071086812e-06, "loss": 0.0016, "step": 104080 }, { "epoch": 0.6847891160042894, "grad_norm": 0.019191634827951116, "learning_rate": 8.314490890344562e-06, "loss": 0.0008, "step": 104090 }, { "epoch": 0.6848549041795228, "grad_norm": 0.1210304891588382, "learning_rate": 8.314061026122307e-06, "loss": 0.0015, "step": 104100 }, { "epoch": 0.6849206923547562, "grad_norm": 0.0052153823151136385, "learning_rate": 8.313631118207016e-06, "loss": 0.0013, "step": 104110 }, { "epoch": 0.6849864805299896, "grad_norm": 0.001205085826966936, "learning_rate": 8.313201166604364e-06, "loss": 0.0011, "step": 104120 }, { "epoch": 0.685052268705223, "grad_norm": 0.06279633823881442, "learning_rate": 8.312771171320015e-06, "loss": 0.0018, "step": 104130 }, { "epoch": 0.6851180568804563, "grad_norm": 0.02360906504973473, "learning_rate": 8.312341132359639e-06, "loss": 0.0004, "step": 104140 }, { "epoch": 0.6851838450556897, "grad_norm": 0.02447013510333855, "learning_rate": 8.311911049728904e-06, "loss": 0.0015, "step": 104150 }, { "epoch": 0.685249633230923, "grad_norm": 0.1403363838170234, "learning_rate": 8.311480923433486e-06, "loss": 0.0021, "step": 104160 }, { "epoch": 0.6853154214061564, "grad_norm": 0.02387796939506978, "learning_rate": 8.31105075347905e-06, "loss": 0.0009, "step": 104170 }, { "epoch": 0.6853812095813898, "grad_norm": 0.07862613158868273, "learning_rate": 8.31062053987127e-06, "loss": 0.0045, "step": 104180 }, { "epoch": 0.6854469977566232, "grad_norm": 0.008797041183137367, "learning_rate": 8.310190282615816e-06, "loss": 0.0015, "step": 104190 }, { "epoch": 0.6855127859318566, "grad_norm": 0.03143899473167734, "learning_rate": 8.309759981718363e-06, "loss": 0.0017, "step": 104200 }, { "epoch": 0.68557857410709, "grad_norm": 0.009818373241608287, "learning_rate": 8.309329637184583e-06, "loss": 0.0016, "step": 104210 }, { "epoch": 0.6856443622823234, "grad_norm": 0.11061284938820956, "learning_rate": 8.30889924902015e-06, "loss": 0.0018, "step": 104220 }, { "epoch": 0.6857101504575568, "grad_norm": 0.008256896513465312, "learning_rate": 8.308468817230738e-06, "loss": 0.0009, "step": 104230 }, { "epoch": 0.6857759386327902, "grad_norm": 0.06168215985276372, "learning_rate": 8.308038341822021e-06, "loss": 0.0016, "step": 104240 }, { "epoch": 0.6858417268080236, "grad_norm": 0.024505469241808787, "learning_rate": 8.307607822799678e-06, "loss": 0.0006, "step": 104250 }, { "epoch": 0.6859075149832569, "grad_norm": 0.1042018084921054, "learning_rate": 8.30717726016938e-06, "loss": 0.0011, "step": 104260 }, { "epoch": 0.6859733031584903, "grad_norm": 0.08098169401107158, "learning_rate": 8.306746653936807e-06, "loss": 0.0018, "step": 104270 }, { "epoch": 0.6860390913337236, "grad_norm": 0.0799605503529577, "learning_rate": 8.306316004107635e-06, "loss": 0.0029, "step": 104280 }, { "epoch": 0.686104879508957, "grad_norm": 0.3735124968930019, "learning_rate": 8.305885310687543e-06, "loss": 0.0011, "step": 104290 }, { "epoch": 0.6861706676841904, "grad_norm": 0.045719285023063494, "learning_rate": 8.305454573682206e-06, "loss": 0.0029, "step": 104300 }, { "epoch": 0.6862364558594238, "grad_norm": 0.06726460889883797, "learning_rate": 8.305023793097308e-06, "loss": 0.0014, "step": 104310 }, { "epoch": 0.6863022440346572, "grad_norm": 0.049477074053090805, "learning_rate": 8.304592968938525e-06, "loss": 0.0032, "step": 104320 }, { "epoch": 0.6863680322098906, "grad_norm": 0.013547580838416498, "learning_rate": 8.304162101211537e-06, "loss": 0.0021, "step": 104330 }, { "epoch": 0.686433820385124, "grad_norm": 0.044306204023811975, "learning_rate": 8.303731189922025e-06, "loss": 0.0024, "step": 104340 }, { "epoch": 0.6864996085603574, "grad_norm": 0.11437244930908733, "learning_rate": 8.30330023507567e-06, "loss": 0.0018, "step": 104350 }, { "epoch": 0.6865653967355907, "grad_norm": 0.021196846995081054, "learning_rate": 8.302869236678158e-06, "loss": 0.0011, "step": 104360 }, { "epoch": 0.6866311849108241, "grad_norm": 0.0872053776055337, "learning_rate": 8.302438194735164e-06, "loss": 0.002, "step": 104370 }, { "epoch": 0.6866969730860575, "grad_norm": 0.07096013712959873, "learning_rate": 8.302007109252377e-06, "loss": 0.0016, "step": 104380 }, { "epoch": 0.6867627612612909, "grad_norm": 0.017536143137978105, "learning_rate": 8.301575980235477e-06, "loss": 0.0016, "step": 104390 }, { "epoch": 0.6868285494365243, "grad_norm": 0.07660108351394944, "learning_rate": 8.301144807690147e-06, "loss": 0.0022, "step": 104400 }, { "epoch": 0.6868943376117577, "grad_norm": 0.026600534997651137, "learning_rate": 8.300713591622075e-06, "loss": 0.0011, "step": 104410 }, { "epoch": 0.6869601257869911, "grad_norm": 0.011673234175525063, "learning_rate": 8.300282332036947e-06, "loss": 0.002, "step": 104420 }, { "epoch": 0.6870259139622245, "grad_norm": 0.13205694687800681, "learning_rate": 8.299851028940445e-06, "loss": 0.0015, "step": 104430 }, { "epoch": 0.6870917021374578, "grad_norm": 0.05227980559098313, "learning_rate": 8.299419682338259e-06, "loss": 0.0029, "step": 104440 }, { "epoch": 0.6871574903126912, "grad_norm": 0.03771581241467305, "learning_rate": 8.298988292236072e-06, "loss": 0.0009, "step": 104450 }, { "epoch": 0.6872232784879245, "grad_norm": 0.06614457058440977, "learning_rate": 8.298556858639576e-06, "loss": 0.0011, "step": 104460 }, { "epoch": 0.6872890666631579, "grad_norm": 0.026000530401583247, "learning_rate": 8.298125381554452e-06, "loss": 0.0036, "step": 104470 }, { "epoch": 0.6873548548383913, "grad_norm": 0.0961951488146961, "learning_rate": 8.297693860986398e-06, "loss": 0.0019, "step": 104480 }, { "epoch": 0.6874206430136247, "grad_norm": 0.015796985880382586, "learning_rate": 8.297262296941095e-06, "loss": 0.0028, "step": 104490 }, { "epoch": 0.6874864311888581, "grad_norm": 0.0061840011681408924, "learning_rate": 8.296830689424238e-06, "loss": 0.0014, "step": 104500 }, { "epoch": 0.6875522193640915, "grad_norm": 0.06643869478944875, "learning_rate": 8.296399038441516e-06, "loss": 0.0012, "step": 104510 }, { "epoch": 0.6876180075393249, "grad_norm": 0.023301574945680928, "learning_rate": 8.29596734399862e-06, "loss": 0.0019, "step": 104520 }, { "epoch": 0.6876837957145583, "grad_norm": 0.09619262088941559, "learning_rate": 8.29553560610124e-06, "loss": 0.0027, "step": 104530 }, { "epoch": 0.6877495838897917, "grad_norm": 0.046638566492305134, "learning_rate": 8.29510382475507e-06, "loss": 0.0015, "step": 104540 }, { "epoch": 0.6878153720650251, "grad_norm": 0.0591363204272299, "learning_rate": 8.2946719999658e-06, "loss": 0.0008, "step": 104550 }, { "epoch": 0.6878811602402585, "grad_norm": 0.02932047602424201, "learning_rate": 8.294240131739128e-06, "loss": 0.0012, "step": 104560 }, { "epoch": 0.6879469484154918, "grad_norm": 0.09472592746514333, "learning_rate": 8.293808220080744e-06, "loss": 0.0034, "step": 104570 }, { "epoch": 0.6880127365907251, "grad_norm": 0.032621849877797995, "learning_rate": 8.293376264996342e-06, "loss": 0.0014, "step": 104580 }, { "epoch": 0.6880785247659585, "grad_norm": 0.051492507346116316, "learning_rate": 8.29294426649162e-06, "loss": 0.0033, "step": 104590 }, { "epoch": 0.6881443129411919, "grad_norm": 0.1901891342611514, "learning_rate": 8.292512224572272e-06, "loss": 0.0023, "step": 104600 }, { "epoch": 0.6882101011164253, "grad_norm": 0.05733758609422096, "learning_rate": 8.292080139243992e-06, "loss": 0.0029, "step": 104610 }, { "epoch": 0.6882758892916587, "grad_norm": 0.009590218399897227, "learning_rate": 8.29164801051248e-06, "loss": 0.0013, "step": 104620 }, { "epoch": 0.6883416774668921, "grad_norm": 0.14446999384601616, "learning_rate": 8.29121583838343e-06, "loss": 0.0012, "step": 104630 }, { "epoch": 0.6884074656421255, "grad_norm": 0.07870676927675971, "learning_rate": 8.290783622862544e-06, "loss": 0.0014, "step": 104640 }, { "epoch": 0.6884732538173589, "grad_norm": 0.011202338063586616, "learning_rate": 8.290351363955518e-06, "loss": 0.0034, "step": 104650 }, { "epoch": 0.6885390419925923, "grad_norm": 0.005196916180087235, "learning_rate": 8.28991906166805e-06, "loss": 0.0016, "step": 104660 }, { "epoch": 0.6886048301678256, "grad_norm": 0.1405580776452399, "learning_rate": 8.28948671600584e-06, "loss": 0.0024, "step": 104670 }, { "epoch": 0.688670618343059, "grad_norm": 0.008164021539098374, "learning_rate": 8.28905432697459e-06, "loss": 0.0027, "step": 104680 }, { "epoch": 0.6887364065182924, "grad_norm": 0.05527644496187761, "learning_rate": 8.288621894579998e-06, "loss": 0.0011, "step": 104690 }, { "epoch": 0.6888021946935258, "grad_norm": 0.00015673583683454035, "learning_rate": 8.288189418827768e-06, "loss": 0.0016, "step": 104700 }, { "epoch": 0.6888679828687592, "grad_norm": 0.024619990706876302, "learning_rate": 8.2877568997236e-06, "loss": 0.0017, "step": 104710 }, { "epoch": 0.6889337710439926, "grad_norm": 0.006427967123087539, "learning_rate": 8.287324337273195e-06, "loss": 0.0012, "step": 104720 }, { "epoch": 0.688999559219226, "grad_norm": 0.004013967791595081, "learning_rate": 8.28689173148226e-06, "loss": 0.0008, "step": 104730 }, { "epoch": 0.6890653473944593, "grad_norm": 0.03555354144930054, "learning_rate": 8.286459082356496e-06, "loss": 0.001, "step": 104740 }, { "epoch": 0.6891311355696927, "grad_norm": 0.09598729958156065, "learning_rate": 8.286026389901605e-06, "loss": 0.0009, "step": 104750 }, { "epoch": 0.6891969237449261, "grad_norm": 0.025276463647947296, "learning_rate": 8.285593654123296e-06, "loss": 0.002, "step": 104760 }, { "epoch": 0.6892627119201594, "grad_norm": 0.11726587521902988, "learning_rate": 8.285160875027271e-06, "loss": 0.0008, "step": 104770 }, { "epoch": 0.6893285000953928, "grad_norm": 0.05581832004367506, "learning_rate": 8.28472805261924e-06, "loss": 0.0018, "step": 104780 }, { "epoch": 0.6893942882706262, "grad_norm": 0.04952562254160439, "learning_rate": 8.284295186904904e-06, "loss": 0.001, "step": 104790 }, { "epoch": 0.6894600764458596, "grad_norm": 0.0072004248190880235, "learning_rate": 8.283862277889973e-06, "loss": 0.0012, "step": 104800 }, { "epoch": 0.689525864621093, "grad_norm": 0.008534054962165487, "learning_rate": 8.283429325580155e-06, "loss": 0.0018, "step": 104810 }, { "epoch": 0.6895916527963264, "grad_norm": 0.09012389719637125, "learning_rate": 8.282996329981155e-06, "loss": 0.0031, "step": 104820 }, { "epoch": 0.6896574409715598, "grad_norm": 0.24082282254800225, "learning_rate": 8.282563291098684e-06, "loss": 0.001, "step": 104830 }, { "epoch": 0.6897232291467932, "grad_norm": 0.009579843417029329, "learning_rate": 8.282130208938454e-06, "loss": 0.0013, "step": 104840 }, { "epoch": 0.6897890173220266, "grad_norm": 0.07614148763510623, "learning_rate": 8.281697083506169e-06, "loss": 0.001, "step": 104850 }, { "epoch": 0.68985480549726, "grad_norm": 0.053253388383363395, "learning_rate": 8.281263914807543e-06, "loss": 0.002, "step": 104860 }, { "epoch": 0.6899205936724933, "grad_norm": 0.026039759404776614, "learning_rate": 8.280830702848286e-06, "loss": 0.001, "step": 104870 }, { "epoch": 0.6899863818477266, "grad_norm": 0.06151848333442372, "learning_rate": 8.28039744763411e-06, "loss": 0.0007, "step": 104880 }, { "epoch": 0.69005217002296, "grad_norm": 0.11593750432295281, "learning_rate": 8.279964149170727e-06, "loss": 0.0016, "step": 104890 }, { "epoch": 0.6901179581981934, "grad_norm": 0.07428498533899554, "learning_rate": 8.279530807463849e-06, "loss": 0.0024, "step": 104900 }, { "epoch": 0.6901837463734268, "grad_norm": 0.020980952618832163, "learning_rate": 8.27909742251919e-06, "loss": 0.0009, "step": 104910 }, { "epoch": 0.6902495345486602, "grad_norm": 0.07420285212818575, "learning_rate": 8.278663994342464e-06, "loss": 0.0021, "step": 104920 }, { "epoch": 0.6903153227238936, "grad_norm": 0.05090582207446297, "learning_rate": 8.278230522939383e-06, "loss": 0.0014, "step": 104930 }, { "epoch": 0.690381110899127, "grad_norm": 0.014317586586103832, "learning_rate": 8.277797008315667e-06, "loss": 0.0014, "step": 104940 }, { "epoch": 0.6904468990743604, "grad_norm": 0.08265601386031392, "learning_rate": 8.277363450477024e-06, "loss": 0.0011, "step": 104950 }, { "epoch": 0.6905126872495938, "grad_norm": 0.015820281241981766, "learning_rate": 8.276929849429178e-06, "loss": 0.0008, "step": 104960 }, { "epoch": 0.6905784754248271, "grad_norm": 0.005882303481393005, "learning_rate": 8.276496205177841e-06, "loss": 0.0022, "step": 104970 }, { "epoch": 0.6906442636000605, "grad_norm": 0.032078139863538815, "learning_rate": 8.276062517728731e-06, "loss": 0.0018, "step": 104980 }, { "epoch": 0.6907100517752939, "grad_norm": 0.02749763096826392, "learning_rate": 8.275628787087566e-06, "loss": 0.0012, "step": 104990 }, { "epoch": 0.6907758399505273, "grad_norm": 0.029749594171655805, "learning_rate": 8.275195013260065e-06, "loss": 0.0019, "step": 105000 }, { "epoch": 0.6908416281257607, "grad_norm": 0.07126208335430272, "learning_rate": 8.274761196251946e-06, "loss": 0.0015, "step": 105010 }, { "epoch": 0.6909074163009941, "grad_norm": 0.03607811783572115, "learning_rate": 8.27432733606893e-06, "loss": 0.0011, "step": 105020 }, { "epoch": 0.6909732044762275, "grad_norm": 0.0494578934630607, "learning_rate": 8.273893432716733e-06, "loss": 0.0013, "step": 105030 }, { "epoch": 0.6910389926514608, "grad_norm": 0.04133009320965977, "learning_rate": 8.273459486201082e-06, "loss": 0.0025, "step": 105040 }, { "epoch": 0.6911047808266942, "grad_norm": 0.029232408692019058, "learning_rate": 8.273025496527693e-06, "loss": 0.0015, "step": 105050 }, { "epoch": 0.6911705690019276, "grad_norm": 0.002467926037829495, "learning_rate": 8.272591463702291e-06, "loss": 0.002, "step": 105060 }, { "epoch": 0.691236357177161, "grad_norm": 0.010936892420647609, "learning_rate": 8.272157387730595e-06, "loss": 0.001, "step": 105070 }, { "epoch": 0.6913021453523943, "grad_norm": 0.02987347590123436, "learning_rate": 8.27172326861833e-06, "loss": 0.0021, "step": 105080 }, { "epoch": 0.6913679335276277, "grad_norm": 0.03235367601021213, "learning_rate": 8.27128910637122e-06, "loss": 0.0012, "step": 105090 }, { "epoch": 0.6914337217028611, "grad_norm": 0.08822215165677877, "learning_rate": 8.270854900994992e-06, "loss": 0.0011, "step": 105100 }, { "epoch": 0.6914995098780945, "grad_norm": 0.05833636828592479, "learning_rate": 8.270420652495362e-06, "loss": 0.0007, "step": 105110 }, { "epoch": 0.6915652980533279, "grad_norm": 0.08758081325815656, "learning_rate": 8.269986360878062e-06, "loss": 0.0018, "step": 105120 }, { "epoch": 0.6916310862285613, "grad_norm": 0.03522817121140025, "learning_rate": 8.269552026148817e-06, "loss": 0.001, "step": 105130 }, { "epoch": 0.6916968744037947, "grad_norm": 0.015059135329231265, "learning_rate": 8.26911764831335e-06, "loss": 0.001, "step": 105140 }, { "epoch": 0.6917626625790281, "grad_norm": 0.044831866873492214, "learning_rate": 8.268683227377392e-06, "loss": 0.003, "step": 105150 }, { "epoch": 0.6918284507542615, "grad_norm": 0.04163246408097947, "learning_rate": 8.268248763346668e-06, "loss": 0.0015, "step": 105160 }, { "epoch": 0.6918942389294949, "grad_norm": 0.02200017299916731, "learning_rate": 8.267814256226906e-06, "loss": 0.0008, "step": 105170 }, { "epoch": 0.6919600271047281, "grad_norm": 0.04666156710866075, "learning_rate": 8.267379706023836e-06, "loss": 0.0029, "step": 105180 }, { "epoch": 0.6920258152799615, "grad_norm": 0.0649421391624563, "learning_rate": 8.266945112743187e-06, "loss": 0.0019, "step": 105190 }, { "epoch": 0.6920916034551949, "grad_norm": 0.009830179881233726, "learning_rate": 8.266510476390686e-06, "loss": 0.0029, "step": 105200 }, { "epoch": 0.6921573916304283, "grad_norm": 0.01719268845565459, "learning_rate": 8.266075796972069e-06, "loss": 0.0033, "step": 105210 }, { "epoch": 0.6922231798056617, "grad_norm": 0.06035837545118309, "learning_rate": 8.265641074493061e-06, "loss": 0.0011, "step": 105220 }, { "epoch": 0.6922889679808951, "grad_norm": 0.0826556024878363, "learning_rate": 8.265206308959396e-06, "loss": 0.0028, "step": 105230 }, { "epoch": 0.6923547561561285, "grad_norm": 0.04363725402151917, "learning_rate": 8.264771500376807e-06, "loss": 0.0011, "step": 105240 }, { "epoch": 0.6924205443313619, "grad_norm": 0.005373036059509258, "learning_rate": 8.264336648751023e-06, "loss": 0.0015, "step": 105250 }, { "epoch": 0.6924863325065953, "grad_norm": 0.006689311800278663, "learning_rate": 8.263901754087781e-06, "loss": 0.0012, "step": 105260 }, { "epoch": 0.6925521206818287, "grad_norm": 0.18478509567683601, "learning_rate": 8.263466816392815e-06, "loss": 0.0014, "step": 105270 }, { "epoch": 0.692617908857062, "grad_norm": 0.015080831503507615, "learning_rate": 8.263031835671855e-06, "loss": 0.0009, "step": 105280 }, { "epoch": 0.6926836970322954, "grad_norm": 0.016528920838529873, "learning_rate": 8.26259681193064e-06, "loss": 0.0024, "step": 105290 }, { "epoch": 0.6927494852075288, "grad_norm": 0.0500445485013182, "learning_rate": 8.262161745174903e-06, "loss": 0.0034, "step": 105300 }, { "epoch": 0.6928152733827622, "grad_norm": 0.06733405637988349, "learning_rate": 8.261726635410382e-06, "loss": 0.0009, "step": 105310 }, { "epoch": 0.6928810615579956, "grad_norm": 0.01722434403109834, "learning_rate": 8.261291482642811e-06, "loss": 0.0012, "step": 105320 }, { "epoch": 0.692946849733229, "grad_norm": 0.2334385313045624, "learning_rate": 8.260856286877927e-06, "loss": 0.0017, "step": 105330 }, { "epoch": 0.6930126379084623, "grad_norm": 0.005212743978907192, "learning_rate": 8.260421048121472e-06, "loss": 0.0013, "step": 105340 }, { "epoch": 0.6930784260836957, "grad_norm": 0.11438993287120572, "learning_rate": 8.25998576637918e-06, "loss": 0.0021, "step": 105350 }, { "epoch": 0.6931442142589291, "grad_norm": 0.06565897580549968, "learning_rate": 8.259550441656792e-06, "loss": 0.0018, "step": 105360 }, { "epoch": 0.6932100024341625, "grad_norm": 0.10523053976537237, "learning_rate": 8.259115073960045e-06, "loss": 0.0026, "step": 105370 }, { "epoch": 0.6932757906093958, "grad_norm": 0.056631741236866956, "learning_rate": 8.258679663294683e-06, "loss": 0.0016, "step": 105380 }, { "epoch": 0.6933415787846292, "grad_norm": 0.23637445382594288, "learning_rate": 8.258244209666442e-06, "loss": 0.0044, "step": 105390 }, { "epoch": 0.6934073669598626, "grad_norm": 0.07068097939950253, "learning_rate": 8.257808713081064e-06, "loss": 0.002, "step": 105400 }, { "epoch": 0.693473155135096, "grad_norm": 0.022075941974974818, "learning_rate": 8.257373173544294e-06, "loss": 0.0016, "step": 105410 }, { "epoch": 0.6935389433103294, "grad_norm": 0.1170118497982531, "learning_rate": 8.256937591061871e-06, "loss": 0.0013, "step": 105420 }, { "epoch": 0.6936047314855628, "grad_norm": 0.007751130795357637, "learning_rate": 8.256501965639538e-06, "loss": 0.0017, "step": 105430 }, { "epoch": 0.6936705196607962, "grad_norm": 0.004293265667628154, "learning_rate": 8.25606629728304e-06, "loss": 0.0015, "step": 105440 }, { "epoch": 0.6937363078360296, "grad_norm": 0.04211355372215725, "learning_rate": 8.255630585998121e-06, "loss": 0.0013, "step": 105450 }, { "epoch": 0.693802096011263, "grad_norm": 0.03182323794005743, "learning_rate": 8.255194831790522e-06, "loss": 0.0013, "step": 105460 }, { "epoch": 0.6938678841864964, "grad_norm": 0.03197834957951415, "learning_rate": 8.254759034665993e-06, "loss": 0.0013, "step": 105470 }, { "epoch": 0.6939336723617296, "grad_norm": 0.10561743439051746, "learning_rate": 8.254323194630276e-06, "loss": 0.0014, "step": 105480 }, { "epoch": 0.693999460536963, "grad_norm": 0.08081401151774806, "learning_rate": 8.253887311689119e-06, "loss": 0.0011, "step": 105490 }, { "epoch": 0.6940652487121964, "grad_norm": 0.05056511053812843, "learning_rate": 8.253451385848267e-06, "loss": 0.0012, "step": 105500 }, { "epoch": 0.6941310368874298, "grad_norm": 0.06030312061739942, "learning_rate": 8.25301541711347e-06, "loss": 0.0018, "step": 105510 }, { "epoch": 0.6941968250626632, "grad_norm": 0.08049193152897983, "learning_rate": 8.252579405490473e-06, "loss": 0.0027, "step": 105520 }, { "epoch": 0.6942626132378966, "grad_norm": 0.13735410623914351, "learning_rate": 8.252143350985025e-06, "loss": 0.0018, "step": 105530 }, { "epoch": 0.69432840141313, "grad_norm": 0.04077619266982014, "learning_rate": 8.251707253602876e-06, "loss": 0.0022, "step": 105540 }, { "epoch": 0.6943941895883634, "grad_norm": 0.14770204949726187, "learning_rate": 8.251271113349777e-06, "loss": 0.0019, "step": 105550 }, { "epoch": 0.6944599777635968, "grad_norm": 0.08421997100015792, "learning_rate": 8.250834930231475e-06, "loss": 0.0009, "step": 105560 }, { "epoch": 0.6945257659388302, "grad_norm": 0.06771809487887745, "learning_rate": 8.250398704253722e-06, "loss": 0.0007, "step": 105570 }, { "epoch": 0.6945915541140636, "grad_norm": 0.061245150763903924, "learning_rate": 8.249962435422269e-06, "loss": 0.0009, "step": 105580 }, { "epoch": 0.6946573422892969, "grad_norm": 0.026358506548695645, "learning_rate": 8.249526123742868e-06, "loss": 0.0014, "step": 105590 }, { "epoch": 0.6947231304645303, "grad_norm": 0.061072642754187695, "learning_rate": 8.249089769221272e-06, "loss": 0.0008, "step": 105600 }, { "epoch": 0.6947889186397637, "grad_norm": 0.08077736143206982, "learning_rate": 8.248653371863233e-06, "loss": 0.0021, "step": 105610 }, { "epoch": 0.6948547068149971, "grad_norm": 0.03603552377609939, "learning_rate": 8.248216931674506e-06, "loss": 0.0035, "step": 105620 }, { "epoch": 0.6949204949902305, "grad_norm": 0.021467474103908202, "learning_rate": 8.247780448660843e-06, "loss": 0.0007, "step": 105630 }, { "epoch": 0.6949862831654638, "grad_norm": 0.07836355933981227, "learning_rate": 8.247343922828e-06, "loss": 0.0008, "step": 105640 }, { "epoch": 0.6950520713406972, "grad_norm": 0.1955700039613043, "learning_rate": 8.246907354181733e-06, "loss": 0.0016, "step": 105650 }, { "epoch": 0.6951178595159306, "grad_norm": 0.04628013261057177, "learning_rate": 8.246470742727795e-06, "loss": 0.0026, "step": 105660 }, { "epoch": 0.695183647691164, "grad_norm": 0.015912661739033353, "learning_rate": 8.246034088471946e-06, "loss": 0.0039, "step": 105670 }, { "epoch": 0.6952494358663974, "grad_norm": 0.029403938248158505, "learning_rate": 8.24559739141994e-06, "loss": 0.0015, "step": 105680 }, { "epoch": 0.6953152240416307, "grad_norm": 0.035476280585192733, "learning_rate": 8.245160651577536e-06, "loss": 0.0047, "step": 105690 }, { "epoch": 0.6953810122168641, "grad_norm": 0.04643093799666359, "learning_rate": 8.244723868950491e-06, "loss": 0.0021, "step": 105700 }, { "epoch": 0.6954468003920975, "grad_norm": 0.041476797348095194, "learning_rate": 8.244287043544564e-06, "loss": 0.0027, "step": 105710 }, { "epoch": 0.6955125885673309, "grad_norm": 0.13238935494679882, "learning_rate": 8.243850175365516e-06, "loss": 0.0011, "step": 105720 }, { "epoch": 0.6955783767425643, "grad_norm": 0.06976091898604103, "learning_rate": 8.243413264419102e-06, "loss": 0.0023, "step": 105730 }, { "epoch": 0.6956441649177977, "grad_norm": 0.022552686992599574, "learning_rate": 8.242976310711087e-06, "loss": 0.001, "step": 105740 }, { "epoch": 0.6957099530930311, "grad_norm": 0.009004473719233788, "learning_rate": 8.24253931424723e-06, "loss": 0.0009, "step": 105750 }, { "epoch": 0.6957757412682645, "grad_norm": 0.00039846201195375876, "learning_rate": 8.242102275033293e-06, "loss": 0.0021, "step": 105760 }, { "epoch": 0.6958415294434979, "grad_norm": 0.01139985153428055, "learning_rate": 8.241665193075037e-06, "loss": 0.0025, "step": 105770 }, { "epoch": 0.6959073176187313, "grad_norm": 0.08874736349646418, "learning_rate": 8.241228068378225e-06, "loss": 0.002, "step": 105780 }, { "epoch": 0.6959731057939645, "grad_norm": 0.28464594330506854, "learning_rate": 8.24079090094862e-06, "loss": 0.0017, "step": 105790 }, { "epoch": 0.6960388939691979, "grad_norm": 0.034534021973267874, "learning_rate": 8.240353690791987e-06, "loss": 0.002, "step": 105800 }, { "epoch": 0.6961046821444313, "grad_norm": 0.08263161427492995, "learning_rate": 8.239916437914088e-06, "loss": 0.0028, "step": 105810 }, { "epoch": 0.6961704703196647, "grad_norm": 0.0013945037932511395, "learning_rate": 8.23947914232069e-06, "loss": 0.0014, "step": 105820 }, { "epoch": 0.6962362584948981, "grad_norm": 0.1176054841774549, "learning_rate": 8.239041804017555e-06, "loss": 0.0015, "step": 105830 }, { "epoch": 0.6963020466701315, "grad_norm": 0.042279586708241675, "learning_rate": 8.238604423010455e-06, "loss": 0.0017, "step": 105840 }, { "epoch": 0.6963678348453649, "grad_norm": 0.0409883080871217, "learning_rate": 8.23816699930515e-06, "loss": 0.001, "step": 105850 }, { "epoch": 0.6964336230205983, "grad_norm": 0.0011914044377991549, "learning_rate": 8.237729532907411e-06, "loss": 0.0016, "step": 105860 }, { "epoch": 0.6964994111958317, "grad_norm": 0.020962723487076444, "learning_rate": 8.237292023823003e-06, "loss": 0.0018, "step": 105870 }, { "epoch": 0.6965651993710651, "grad_norm": 0.05399903913778256, "learning_rate": 8.236854472057697e-06, "loss": 0.0009, "step": 105880 }, { "epoch": 0.6966309875462984, "grad_norm": 0.1926185504881172, "learning_rate": 8.236416877617259e-06, "loss": 0.002, "step": 105890 }, { "epoch": 0.6966967757215318, "grad_norm": 0.09693284425672913, "learning_rate": 8.235979240507461e-06, "loss": 0.0012, "step": 105900 }, { "epoch": 0.6967625638967652, "grad_norm": 0.08224691033040187, "learning_rate": 8.235541560734071e-06, "loss": 0.0013, "step": 105910 }, { "epoch": 0.6968283520719986, "grad_norm": 0.024561245754626964, "learning_rate": 8.23510383830286e-06, "loss": 0.0022, "step": 105920 }, { "epoch": 0.696894140247232, "grad_norm": 0.020423350756954953, "learning_rate": 8.2346660732196e-06, "loss": 0.0012, "step": 105930 }, { "epoch": 0.6969599284224653, "grad_norm": 0.09597846434245647, "learning_rate": 8.234228265490058e-06, "loss": 0.0008, "step": 105940 }, { "epoch": 0.6970257165976987, "grad_norm": 0.04480172151562728, "learning_rate": 8.233790415120014e-06, "loss": 0.0007, "step": 105950 }, { "epoch": 0.6970915047729321, "grad_norm": 0.04287664794866968, "learning_rate": 8.233352522115232e-06, "loss": 0.0017, "step": 105960 }, { "epoch": 0.6971572929481655, "grad_norm": 0.068216750082351, "learning_rate": 8.232914586481493e-06, "loss": 0.0014, "step": 105970 }, { "epoch": 0.6972230811233989, "grad_norm": 0.07592365243944159, "learning_rate": 8.232476608224565e-06, "loss": 0.0012, "step": 105980 }, { "epoch": 0.6972888692986323, "grad_norm": 0.004912858107136084, "learning_rate": 8.232038587350226e-06, "loss": 0.0016, "step": 105990 }, { "epoch": 0.6973546574738656, "grad_norm": 0.0775658495455737, "learning_rate": 8.23160052386425e-06, "loss": 0.0018, "step": 106000 }, { "epoch": 0.697420445649099, "grad_norm": 0.05939379854254611, "learning_rate": 8.231162417772409e-06, "loss": 0.0008, "step": 106010 }, { "epoch": 0.6974862338243324, "grad_norm": 0.020153786446616386, "learning_rate": 8.230724269080486e-06, "loss": 0.001, "step": 106020 }, { "epoch": 0.6975520219995658, "grad_norm": 0.029201183375684284, "learning_rate": 8.230286077794252e-06, "loss": 0.0012, "step": 106030 }, { "epoch": 0.6976178101747992, "grad_norm": 0.12668921094459867, "learning_rate": 8.229847843919485e-06, "loss": 0.0027, "step": 106040 }, { "epoch": 0.6976835983500326, "grad_norm": 0.027332637731003237, "learning_rate": 8.229409567461965e-06, "loss": 0.0024, "step": 106050 }, { "epoch": 0.697749386525266, "grad_norm": 0.1427996669710046, "learning_rate": 8.228971248427468e-06, "loss": 0.0015, "step": 106060 }, { "epoch": 0.6978151747004994, "grad_norm": 0.008587840064224143, "learning_rate": 8.228532886821776e-06, "loss": 0.0014, "step": 106070 }, { "epoch": 0.6978809628757328, "grad_norm": 0.0064456898975939056, "learning_rate": 8.228094482650663e-06, "loss": 0.0013, "step": 106080 }, { "epoch": 0.6979467510509662, "grad_norm": 0.08328667570582345, "learning_rate": 8.227656035919915e-06, "loss": 0.0012, "step": 106090 }, { "epoch": 0.6980125392261994, "grad_norm": 0.017505183788577308, "learning_rate": 8.227217546635309e-06, "loss": 0.0019, "step": 106100 }, { "epoch": 0.6980783274014328, "grad_norm": 0.014927809706945362, "learning_rate": 8.226779014802626e-06, "loss": 0.0012, "step": 106110 }, { "epoch": 0.6981441155766662, "grad_norm": 0.0031682232319124523, "learning_rate": 8.22634044042765e-06, "loss": 0.0012, "step": 106120 }, { "epoch": 0.6982099037518996, "grad_norm": 0.050399479603944, "learning_rate": 8.22590182351616e-06, "loss": 0.0018, "step": 106130 }, { "epoch": 0.698275691927133, "grad_norm": 0.10027753624110712, "learning_rate": 8.225463164073941e-06, "loss": 0.0021, "step": 106140 }, { "epoch": 0.6983414801023664, "grad_norm": 0.08475469179715836, "learning_rate": 8.225024462106777e-06, "loss": 0.0016, "step": 106150 }, { "epoch": 0.6984072682775998, "grad_norm": 0.019665655131973426, "learning_rate": 8.22458571762045e-06, "loss": 0.0009, "step": 106160 }, { "epoch": 0.6984730564528332, "grad_norm": 0.02553903655942844, "learning_rate": 8.224146930620744e-06, "loss": 0.0014, "step": 106170 }, { "epoch": 0.6985388446280666, "grad_norm": 0.19406997881736512, "learning_rate": 8.223708101113448e-06, "loss": 0.0013, "step": 106180 }, { "epoch": 0.6986046328033, "grad_norm": 0.004026667496687187, "learning_rate": 8.223269229104343e-06, "loss": 0.0018, "step": 106190 }, { "epoch": 0.6986704209785333, "grad_norm": 0.039929931277097326, "learning_rate": 8.222830314599216e-06, "loss": 0.0011, "step": 106200 }, { "epoch": 0.6987362091537667, "grad_norm": 0.09526402685638512, "learning_rate": 8.222391357603856e-06, "loss": 0.0014, "step": 106210 }, { "epoch": 0.6988019973290001, "grad_norm": 0.08267310430115832, "learning_rate": 8.22195235812405e-06, "loss": 0.0018, "step": 106220 }, { "epoch": 0.6988677855042335, "grad_norm": 0.00939140571263771, "learning_rate": 8.221513316165584e-06, "loss": 0.0012, "step": 106230 }, { "epoch": 0.6989335736794668, "grad_norm": 0.027031042951704252, "learning_rate": 8.221074231734246e-06, "loss": 0.0015, "step": 106240 }, { "epoch": 0.6989993618547002, "grad_norm": 0.13879410562864644, "learning_rate": 8.220635104835828e-06, "loss": 0.0029, "step": 106250 }, { "epoch": 0.6990651500299336, "grad_norm": 0.08989526992129956, "learning_rate": 8.220195935476117e-06, "loss": 0.0016, "step": 106260 }, { "epoch": 0.699130938205167, "grad_norm": 0.07126108528980789, "learning_rate": 8.219756723660903e-06, "loss": 0.0032, "step": 106270 }, { "epoch": 0.6991967263804004, "grad_norm": 0.04939459604739646, "learning_rate": 8.219317469395977e-06, "loss": 0.0024, "step": 106280 }, { "epoch": 0.6992625145556338, "grad_norm": 0.0944951275663528, "learning_rate": 8.218878172687133e-06, "loss": 0.0011, "step": 106290 }, { "epoch": 0.6993283027308671, "grad_norm": 0.061270056079268324, "learning_rate": 8.21843883354016e-06, "loss": 0.0007, "step": 106300 }, { "epoch": 0.6993940909061005, "grad_norm": 0.054953180092524964, "learning_rate": 8.217999451960847e-06, "loss": 0.0006, "step": 106310 }, { "epoch": 0.6994598790813339, "grad_norm": 0.05965888320544775, "learning_rate": 8.217560027954994e-06, "loss": 0.0016, "step": 106320 }, { "epoch": 0.6995256672565673, "grad_norm": 0.035719595566795534, "learning_rate": 8.21712056152839e-06, "loss": 0.0021, "step": 106330 }, { "epoch": 0.6995914554318007, "grad_norm": 0.0869721204072301, "learning_rate": 8.216681052686828e-06, "loss": 0.0015, "step": 106340 }, { "epoch": 0.6996572436070341, "grad_norm": 0.07884550315442065, "learning_rate": 8.216241501436107e-06, "loss": 0.0013, "step": 106350 }, { "epoch": 0.6997230317822675, "grad_norm": 0.05880134867830082, "learning_rate": 8.215801907782018e-06, "loss": 0.002, "step": 106360 }, { "epoch": 0.6997888199575009, "grad_norm": 0.1423608381634699, "learning_rate": 8.215362271730359e-06, "loss": 0.0023, "step": 106370 }, { "epoch": 0.6998546081327343, "grad_norm": 0.039036086637195114, "learning_rate": 8.214922593286926e-06, "loss": 0.0012, "step": 106380 }, { "epoch": 0.6999203963079677, "grad_norm": 0.03713201474958656, "learning_rate": 8.214482872457515e-06, "loss": 0.0023, "step": 106390 }, { "epoch": 0.6999861844832009, "grad_norm": 0.055131101951884046, "learning_rate": 8.214043109247922e-06, "loss": 0.0012, "step": 106400 }, { "epoch": 0.7000519726584343, "grad_norm": 0.18825359312258477, "learning_rate": 8.213603303663948e-06, "loss": 0.0018, "step": 106410 }, { "epoch": 0.7001177608336677, "grad_norm": 0.26404336409128104, "learning_rate": 8.213163455711388e-06, "loss": 0.0021, "step": 106420 }, { "epoch": 0.7001835490089011, "grad_norm": 0.05991125299536566, "learning_rate": 8.212723565396045e-06, "loss": 0.0026, "step": 106430 }, { "epoch": 0.7002493371841345, "grad_norm": 0.23052790725401504, "learning_rate": 8.212283632723716e-06, "loss": 0.0013, "step": 106440 }, { "epoch": 0.7003151253593679, "grad_norm": 0.037295733696478726, "learning_rate": 8.2118436577002e-06, "loss": 0.0011, "step": 106450 }, { "epoch": 0.7003809135346013, "grad_norm": 0.04380934620981688, "learning_rate": 8.211403640331301e-06, "loss": 0.0016, "step": 106460 }, { "epoch": 0.7004467017098347, "grad_norm": 0.001559115935934906, "learning_rate": 8.21096358062282e-06, "loss": 0.0008, "step": 106470 }, { "epoch": 0.7005124898850681, "grad_norm": 0.06702226340460057, "learning_rate": 8.210523478580554e-06, "loss": 0.0022, "step": 106480 }, { "epoch": 0.7005782780603015, "grad_norm": 0.06073296678799634, "learning_rate": 8.21008333421031e-06, "loss": 0.0009, "step": 106490 }, { "epoch": 0.7006440662355349, "grad_norm": 0.03623001452217812, "learning_rate": 8.209643147517889e-06, "loss": 0.0007, "step": 106500 }, { "epoch": 0.7007098544107682, "grad_norm": 0.09691018038403168, "learning_rate": 8.209202918509097e-06, "loss": 0.0011, "step": 106510 }, { "epoch": 0.7007756425860016, "grad_norm": 0.058010116345086765, "learning_rate": 8.208762647189736e-06, "loss": 0.0014, "step": 106520 }, { "epoch": 0.700841430761235, "grad_norm": 0.02098031454181914, "learning_rate": 8.208322333565609e-06, "loss": 0.0018, "step": 106530 }, { "epoch": 0.7009072189364683, "grad_norm": 0.009922009438004687, "learning_rate": 8.207881977642525e-06, "loss": 0.0021, "step": 106540 }, { "epoch": 0.7009730071117017, "grad_norm": 0.018109384011474005, "learning_rate": 8.207441579426285e-06, "loss": 0.0012, "step": 106550 }, { "epoch": 0.7010387952869351, "grad_norm": 0.07949205811333922, "learning_rate": 8.2070011389227e-06, "loss": 0.0008, "step": 106560 }, { "epoch": 0.7011045834621685, "grad_norm": 0.008149062337791757, "learning_rate": 8.206560656137575e-06, "loss": 0.001, "step": 106570 }, { "epoch": 0.7011703716374019, "grad_norm": 0.06545396882176853, "learning_rate": 8.206120131076716e-06, "loss": 0.0011, "step": 106580 }, { "epoch": 0.7012361598126353, "grad_norm": 0.030668361417747085, "learning_rate": 8.205679563745932e-06, "loss": 0.001, "step": 106590 }, { "epoch": 0.7013019479878687, "grad_norm": 0.030217271487229146, "learning_rate": 8.20523895415103e-06, "loss": 0.0021, "step": 106600 }, { "epoch": 0.701367736163102, "grad_norm": 0.01590939841745613, "learning_rate": 8.204798302297825e-06, "loss": 0.0006, "step": 106610 }, { "epoch": 0.7014335243383354, "grad_norm": 0.21887940845142276, "learning_rate": 8.204357608192119e-06, "loss": 0.002, "step": 106620 }, { "epoch": 0.7014993125135688, "grad_norm": 0.08881365576430982, "learning_rate": 8.203916871839725e-06, "loss": 0.0011, "step": 106630 }, { "epoch": 0.7015651006888022, "grad_norm": 0.054272551247355846, "learning_rate": 8.203476093246457e-06, "loss": 0.0015, "step": 106640 }, { "epoch": 0.7016308888640356, "grad_norm": 0.016574401990384006, "learning_rate": 8.20303527241812e-06, "loss": 0.001, "step": 106650 }, { "epoch": 0.701696677039269, "grad_norm": 0.0670671268398361, "learning_rate": 8.20259440936053e-06, "loss": 0.0016, "step": 106660 }, { "epoch": 0.7017624652145024, "grad_norm": 0.05758356123360926, "learning_rate": 8.202153504079502e-06, "loss": 0.0011, "step": 106670 }, { "epoch": 0.7018282533897358, "grad_norm": 0.028804355485147648, "learning_rate": 8.201712556580843e-06, "loss": 0.0025, "step": 106680 }, { "epoch": 0.7018940415649692, "grad_norm": 0.05277132873585087, "learning_rate": 8.201271566870367e-06, "loss": 0.0017, "step": 106690 }, { "epoch": 0.7019598297402025, "grad_norm": 0.04345532181049832, "learning_rate": 8.200830534953895e-06, "loss": 0.001, "step": 106700 }, { "epoch": 0.7020256179154358, "grad_norm": 0.030702436494080963, "learning_rate": 8.200389460837233e-06, "loss": 0.0009, "step": 106710 }, { "epoch": 0.7020914060906692, "grad_norm": 0.04378220476308606, "learning_rate": 8.199948344526202e-06, "loss": 0.0017, "step": 106720 }, { "epoch": 0.7021571942659026, "grad_norm": 0.015305040158840365, "learning_rate": 8.199507186026613e-06, "loss": 0.0011, "step": 106730 }, { "epoch": 0.702222982441136, "grad_norm": 0.030595167675233452, "learning_rate": 8.199065985344288e-06, "loss": 0.0018, "step": 106740 }, { "epoch": 0.7022887706163694, "grad_norm": 0.0021500191820645305, "learning_rate": 8.198624742485041e-06, "loss": 0.0026, "step": 106750 }, { "epoch": 0.7023545587916028, "grad_norm": 0.004872058480214781, "learning_rate": 8.198183457454687e-06, "loss": 0.0012, "step": 106760 }, { "epoch": 0.7024203469668362, "grad_norm": 0.04940462335752169, "learning_rate": 8.197742130259049e-06, "loss": 0.0016, "step": 106770 }, { "epoch": 0.7024861351420696, "grad_norm": 0.026208183788011295, "learning_rate": 8.19730076090394e-06, "loss": 0.0025, "step": 106780 }, { "epoch": 0.702551923317303, "grad_norm": 0.015782552790838695, "learning_rate": 8.196859349395182e-06, "loss": 0.0008, "step": 106790 }, { "epoch": 0.7026177114925364, "grad_norm": 0.04571108860127908, "learning_rate": 8.196417895738597e-06, "loss": 0.0015, "step": 106800 }, { "epoch": 0.7026834996677697, "grad_norm": 0.0015566492634818817, "learning_rate": 8.195976399940001e-06, "loss": 0.0015, "step": 106810 }, { "epoch": 0.7027492878430031, "grad_norm": 0.0014762312604661666, "learning_rate": 8.195534862005216e-06, "loss": 0.0013, "step": 106820 }, { "epoch": 0.7028150760182365, "grad_norm": 0.043287830832134486, "learning_rate": 8.195093281940064e-06, "loss": 0.0009, "step": 106830 }, { "epoch": 0.7028808641934698, "grad_norm": 0.1448144869535862, "learning_rate": 8.194651659750368e-06, "loss": 0.0018, "step": 106840 }, { "epoch": 0.7029466523687032, "grad_norm": 0.06775902511516674, "learning_rate": 8.194209995441947e-06, "loss": 0.0021, "step": 106850 }, { "epoch": 0.7030124405439366, "grad_norm": 0.08573564443786227, "learning_rate": 8.193768289020627e-06, "loss": 0.0019, "step": 106860 }, { "epoch": 0.70307822871917, "grad_norm": 0.02672132869511751, "learning_rate": 8.19332654049223e-06, "loss": 0.0012, "step": 106870 }, { "epoch": 0.7031440168944034, "grad_norm": 0.05861346931375567, "learning_rate": 8.192884749862582e-06, "loss": 0.0012, "step": 106880 }, { "epoch": 0.7032098050696368, "grad_norm": 0.04735999476276466, "learning_rate": 8.192442917137503e-06, "loss": 0.0024, "step": 106890 }, { "epoch": 0.7032755932448702, "grad_norm": 0.04365884734052754, "learning_rate": 8.192001042322823e-06, "loss": 0.0016, "step": 106900 }, { "epoch": 0.7033413814201035, "grad_norm": 0.03647504140784874, "learning_rate": 8.191559125424367e-06, "loss": 0.0011, "step": 106910 }, { "epoch": 0.7034071695953369, "grad_norm": 0.006448689408826781, "learning_rate": 8.19111716644796e-06, "loss": 0.0012, "step": 106920 }, { "epoch": 0.7034729577705703, "grad_norm": 0.0719031111722738, "learning_rate": 8.190675165399428e-06, "loss": 0.0008, "step": 106930 }, { "epoch": 0.7035387459458037, "grad_norm": 0.04666658317922413, "learning_rate": 8.1902331222846e-06, "loss": 0.0028, "step": 106940 }, { "epoch": 0.7036045341210371, "grad_norm": 0.06142820982253835, "learning_rate": 8.189791037109304e-06, "loss": 0.001, "step": 106950 }, { "epoch": 0.7036703222962705, "grad_norm": 0.030993809395052748, "learning_rate": 8.189348909879368e-06, "loss": 0.0014, "step": 106960 }, { "epoch": 0.7037361104715039, "grad_norm": 0.054470387476084, "learning_rate": 8.18890674060062e-06, "loss": 0.0016, "step": 106970 }, { "epoch": 0.7038018986467373, "grad_norm": 0.06442314419972502, "learning_rate": 8.188464529278892e-06, "loss": 0.0016, "step": 106980 }, { "epoch": 0.7038676868219707, "grad_norm": 0.15026202308726042, "learning_rate": 8.188022275920014e-06, "loss": 0.0009, "step": 106990 }, { "epoch": 0.703933474997204, "grad_norm": 0.011572711515824407, "learning_rate": 8.187579980529814e-06, "loss": 0.0013, "step": 107000 }, { "epoch": 0.7039992631724374, "grad_norm": 0.09195890424593954, "learning_rate": 8.187137643114125e-06, "loss": 0.0012, "step": 107010 }, { "epoch": 0.7040650513476707, "grad_norm": 0.10680167985405743, "learning_rate": 8.186695263678779e-06, "loss": 0.0025, "step": 107020 }, { "epoch": 0.7041308395229041, "grad_norm": 0.03339223847066527, "learning_rate": 8.186252842229607e-06, "loss": 0.0009, "step": 107030 }, { "epoch": 0.7041966276981375, "grad_norm": 0.020879657456964025, "learning_rate": 8.185810378772446e-06, "loss": 0.0009, "step": 107040 }, { "epoch": 0.7042624158733709, "grad_norm": 0.020820599577332743, "learning_rate": 8.185367873313123e-06, "loss": 0.0012, "step": 107050 }, { "epoch": 0.7043282040486043, "grad_norm": 0.012755550987236441, "learning_rate": 8.184925325857478e-06, "loss": 0.0014, "step": 107060 }, { "epoch": 0.7043939922238377, "grad_norm": 0.04167304825972845, "learning_rate": 8.184482736411344e-06, "loss": 0.0014, "step": 107070 }, { "epoch": 0.7044597803990711, "grad_norm": 0.0217616020882024, "learning_rate": 8.184040104980553e-06, "loss": 0.0012, "step": 107080 }, { "epoch": 0.7045255685743045, "grad_norm": 0.0010195047384094198, "learning_rate": 8.183597431570945e-06, "loss": 0.0008, "step": 107090 }, { "epoch": 0.7045913567495379, "grad_norm": 0.04407741550842076, "learning_rate": 8.183154716188353e-06, "loss": 0.001, "step": 107100 }, { "epoch": 0.7046571449247713, "grad_norm": 0.09832694853024698, "learning_rate": 8.182711958838617e-06, "loss": 0.0016, "step": 107110 }, { "epoch": 0.7047229331000046, "grad_norm": 0.06109404570400975, "learning_rate": 8.182269159527571e-06, "loss": 0.0011, "step": 107120 }, { "epoch": 0.704788721275238, "grad_norm": 0.0684697882811027, "learning_rate": 8.181826318261054e-06, "loss": 0.085, "step": 107130 }, { "epoch": 0.7048545094504713, "grad_norm": 0.020079751856681944, "learning_rate": 8.181383435044907e-06, "loss": 0.0019, "step": 107140 }, { "epoch": 0.7049202976257047, "grad_norm": 0.029282128941097916, "learning_rate": 8.180940509884967e-06, "loss": 0.0011, "step": 107150 }, { "epoch": 0.7049860858009381, "grad_norm": 0.03553110353606864, "learning_rate": 8.180497542787072e-06, "loss": 0.0007, "step": 107160 }, { "epoch": 0.7050518739761715, "grad_norm": 0.02610003759297607, "learning_rate": 8.180054533757064e-06, "loss": 0.0012, "step": 107170 }, { "epoch": 0.7051176621514049, "grad_norm": 0.020480904413702938, "learning_rate": 8.179611482800784e-06, "loss": 0.0014, "step": 107180 }, { "epoch": 0.7051834503266383, "grad_norm": 0.03090813640311067, "learning_rate": 8.179168389924072e-06, "loss": 0.0017, "step": 107190 }, { "epoch": 0.7052492385018717, "grad_norm": 0.016107581946951475, "learning_rate": 8.178725255132772e-06, "loss": 0.001, "step": 107200 }, { "epoch": 0.7053150266771051, "grad_norm": 0.026620735972073616, "learning_rate": 8.178282078432723e-06, "loss": 0.0013, "step": 107210 }, { "epoch": 0.7053808148523384, "grad_norm": 0.03391875041155837, "learning_rate": 8.177838859829769e-06, "loss": 0.0015, "step": 107220 }, { "epoch": 0.7054466030275718, "grad_norm": 0.03634606867976191, "learning_rate": 8.177395599329756e-06, "loss": 0.0013, "step": 107230 }, { "epoch": 0.7055123912028052, "grad_norm": 0.06430957145717682, "learning_rate": 8.176952296938528e-06, "loss": 0.0026, "step": 107240 }, { "epoch": 0.7055781793780386, "grad_norm": 0.05059092844247105, "learning_rate": 8.176508952661926e-06, "loss": 0.0008, "step": 107250 }, { "epoch": 0.705643967553272, "grad_norm": 0.19484798003940793, "learning_rate": 8.176065566505795e-06, "loss": 0.0009, "step": 107260 }, { "epoch": 0.7057097557285054, "grad_norm": 0.06272820558456267, "learning_rate": 8.175622138475986e-06, "loss": 0.0018, "step": 107270 }, { "epoch": 0.7057755439037388, "grad_norm": 0.014633452350096594, "learning_rate": 8.175178668578339e-06, "loss": 0.0016, "step": 107280 }, { "epoch": 0.7058413320789722, "grad_norm": 0.008240246789471505, "learning_rate": 8.174735156818705e-06, "loss": 0.0009, "step": 107290 }, { "epoch": 0.7059071202542055, "grad_norm": 0.08627389833802873, "learning_rate": 8.174291603202929e-06, "loss": 0.0019, "step": 107300 }, { "epoch": 0.7059729084294389, "grad_norm": 0.021484944523904054, "learning_rate": 8.173848007736859e-06, "loss": 0.0013, "step": 107310 }, { "epoch": 0.7060386966046722, "grad_norm": 0.04154815573969382, "learning_rate": 8.173404370426345e-06, "loss": 0.0011, "step": 107320 }, { "epoch": 0.7061044847799056, "grad_norm": 0.07908440771575793, "learning_rate": 8.172960691277236e-06, "loss": 0.0011, "step": 107330 }, { "epoch": 0.706170272955139, "grad_norm": 0.07698297798322162, "learning_rate": 8.17251697029538e-06, "loss": 0.001, "step": 107340 }, { "epoch": 0.7062360611303724, "grad_norm": 0.23312324853095465, "learning_rate": 8.172073207486626e-06, "loss": 0.0021, "step": 107350 }, { "epoch": 0.7063018493056058, "grad_norm": 0.058089905339744966, "learning_rate": 8.17162940285683e-06, "loss": 0.0012, "step": 107360 }, { "epoch": 0.7063676374808392, "grad_norm": 0.0253780172290305, "learning_rate": 8.171185556411836e-06, "loss": 0.0018, "step": 107370 }, { "epoch": 0.7064334256560726, "grad_norm": 0.028084034100484642, "learning_rate": 8.170741668157502e-06, "loss": 0.001, "step": 107380 }, { "epoch": 0.706499213831306, "grad_norm": 0.006906879807753125, "learning_rate": 8.170297738099676e-06, "loss": 0.0009, "step": 107390 }, { "epoch": 0.7065650020065394, "grad_norm": 0.05482665658191038, "learning_rate": 8.169853766244213e-06, "loss": 0.0022, "step": 107400 }, { "epoch": 0.7066307901817728, "grad_norm": 0.054591407517126, "learning_rate": 8.169409752596966e-06, "loss": 0.0028, "step": 107410 }, { "epoch": 0.7066965783570062, "grad_norm": 0.08407369874727512, "learning_rate": 8.168965697163786e-06, "loss": 0.001, "step": 107420 }, { "epoch": 0.7067623665322394, "grad_norm": 0.047823824097623875, "learning_rate": 8.168521599950534e-06, "loss": 0.0014, "step": 107430 }, { "epoch": 0.7068281547074728, "grad_norm": 0.031676221298227704, "learning_rate": 8.16807746096306e-06, "loss": 0.0017, "step": 107440 }, { "epoch": 0.7068939428827062, "grad_norm": 0.004091638683286521, "learning_rate": 8.167633280207218e-06, "loss": 0.0012, "step": 107450 }, { "epoch": 0.7069597310579396, "grad_norm": 0.03987201174564566, "learning_rate": 8.16718905768887e-06, "loss": 0.0012, "step": 107460 }, { "epoch": 0.707025519233173, "grad_norm": 0.10199567283295674, "learning_rate": 8.166744793413869e-06, "loss": 0.0013, "step": 107470 }, { "epoch": 0.7070913074084064, "grad_norm": 0.0246534124087839, "learning_rate": 8.166300487388072e-06, "loss": 0.0019, "step": 107480 }, { "epoch": 0.7071570955836398, "grad_norm": 0.07079345106354606, "learning_rate": 8.165856139617339e-06, "loss": 0.0015, "step": 107490 }, { "epoch": 0.7072228837588732, "grad_norm": 0.06128056758988496, "learning_rate": 8.165411750107526e-06, "loss": 0.002, "step": 107500 }, { "epoch": 0.7072886719341066, "grad_norm": 0.06029596512581564, "learning_rate": 8.164967318864495e-06, "loss": 0.0009, "step": 107510 }, { "epoch": 0.70735446010934, "grad_norm": 0.0033343499016520767, "learning_rate": 8.164522845894101e-06, "loss": 0.001, "step": 107520 }, { "epoch": 0.7074202482845733, "grad_norm": 0.02565530183540139, "learning_rate": 8.164078331202207e-06, "loss": 0.001, "step": 107530 }, { "epoch": 0.7074860364598067, "grad_norm": 0.046450651503741884, "learning_rate": 8.163633774794672e-06, "loss": 0.0016, "step": 107540 }, { "epoch": 0.7075518246350401, "grad_norm": 0.0020563555105876643, "learning_rate": 8.16318917667736e-06, "loss": 0.0014, "step": 107550 }, { "epoch": 0.7076176128102735, "grad_norm": 0.09302835277435968, "learning_rate": 8.162744536856129e-06, "loss": 0.0013, "step": 107560 }, { "epoch": 0.7076834009855069, "grad_norm": 0.026479668778320833, "learning_rate": 8.162299855336842e-06, "loss": 0.0008, "step": 107570 }, { "epoch": 0.7077491891607403, "grad_norm": 0.09879486935756791, "learning_rate": 8.161855132125365e-06, "loss": 0.0016, "step": 107580 }, { "epoch": 0.7078149773359736, "grad_norm": 0.022940920510881615, "learning_rate": 8.161410367227558e-06, "loss": 0.0007, "step": 107590 }, { "epoch": 0.707880765511207, "grad_norm": 0.016719876627011965, "learning_rate": 8.160965560649285e-06, "loss": 0.0031, "step": 107600 }, { "epoch": 0.7079465536864404, "grad_norm": 0.0634911224170572, "learning_rate": 8.160520712396412e-06, "loss": 0.0016, "step": 107610 }, { "epoch": 0.7080123418616738, "grad_norm": 0.0147123727819195, "learning_rate": 8.1600758224748e-06, "loss": 0.0012, "step": 107620 }, { "epoch": 0.7080781300369071, "grad_norm": 0.16000736498797286, "learning_rate": 8.159630890890322e-06, "loss": 0.0008, "step": 107630 }, { "epoch": 0.7081439182121405, "grad_norm": 0.03532978222699983, "learning_rate": 8.159185917648838e-06, "loss": 0.001, "step": 107640 }, { "epoch": 0.7082097063873739, "grad_norm": 0.14488649739263754, "learning_rate": 8.158740902756216e-06, "loss": 0.0018, "step": 107650 }, { "epoch": 0.7082754945626073, "grad_norm": 0.018251221149193853, "learning_rate": 8.158295846218321e-06, "loss": 0.0016, "step": 107660 }, { "epoch": 0.7083412827378407, "grad_norm": 0.014597278772060839, "learning_rate": 8.157850748041025e-06, "loss": 0.0009, "step": 107670 }, { "epoch": 0.7084070709130741, "grad_norm": 0.034783420698369254, "learning_rate": 8.157405608230193e-06, "loss": 0.0024, "step": 107680 }, { "epoch": 0.7084728590883075, "grad_norm": 0.11696481174028785, "learning_rate": 8.156960426791697e-06, "loss": 0.0017, "step": 107690 }, { "epoch": 0.7085386472635409, "grad_norm": 0.024536001968782126, "learning_rate": 8.156515203731404e-06, "loss": 0.0013, "step": 107700 }, { "epoch": 0.7086044354387743, "grad_norm": 0.020284838003461005, "learning_rate": 8.156069939055183e-06, "loss": 0.0016, "step": 107710 }, { "epoch": 0.7086702236140077, "grad_norm": 0.036268322139596944, "learning_rate": 8.155624632768907e-06, "loss": 0.0009, "step": 107720 }, { "epoch": 0.708736011789241, "grad_norm": 0.050248976420779023, "learning_rate": 8.155179284878443e-06, "loss": 0.0012, "step": 107730 }, { "epoch": 0.7088017999644743, "grad_norm": 0.037640095529283206, "learning_rate": 8.154733895389667e-06, "loss": 0.0011, "step": 107740 }, { "epoch": 0.7088675881397077, "grad_norm": 0.03509586453084174, "learning_rate": 8.154288464308448e-06, "loss": 0.0013, "step": 107750 }, { "epoch": 0.7089333763149411, "grad_norm": 0.02590490413450462, "learning_rate": 8.153842991640661e-06, "loss": 0.0009, "step": 107760 }, { "epoch": 0.7089991644901745, "grad_norm": 0.030540735365712796, "learning_rate": 8.15339747739218e-06, "loss": 0.0011, "step": 107770 }, { "epoch": 0.7090649526654079, "grad_norm": 0.02814613471409935, "learning_rate": 8.152951921568873e-06, "loss": 0.002, "step": 107780 }, { "epoch": 0.7091307408406413, "grad_norm": 0.07951729850513076, "learning_rate": 8.152506324176619e-06, "loss": 0.0015, "step": 107790 }, { "epoch": 0.7091965290158747, "grad_norm": 0.04006312138515444, "learning_rate": 8.152060685221292e-06, "loss": 0.0008, "step": 107800 }, { "epoch": 0.7092623171911081, "grad_norm": 0.13714421717794398, "learning_rate": 8.151615004708769e-06, "loss": 0.0014, "step": 107810 }, { "epoch": 0.7093281053663415, "grad_norm": 0.02421693202018786, "learning_rate": 8.151169282644923e-06, "loss": 0.0013, "step": 107820 }, { "epoch": 0.7093938935415748, "grad_norm": 0.04220946685201353, "learning_rate": 8.15072351903563e-06, "loss": 0.0014, "step": 107830 }, { "epoch": 0.7094596817168082, "grad_norm": 0.008268006809480385, "learning_rate": 8.15027771388677e-06, "loss": 0.0009, "step": 107840 }, { "epoch": 0.7095254698920416, "grad_norm": 0.035933003811155176, "learning_rate": 8.149831867204218e-06, "loss": 0.0027, "step": 107850 }, { "epoch": 0.709591258067275, "grad_norm": 0.04025402477251703, "learning_rate": 8.149385978993853e-06, "loss": 0.0018, "step": 107860 }, { "epoch": 0.7096570462425084, "grad_norm": 0.05158202430627272, "learning_rate": 8.148940049261556e-06, "loss": 0.0012, "step": 107870 }, { "epoch": 0.7097228344177418, "grad_norm": 0.12121858257872842, "learning_rate": 8.148494078013203e-06, "loss": 0.0021, "step": 107880 }, { "epoch": 0.7097886225929751, "grad_norm": 0.21638345132132608, "learning_rate": 8.148048065254675e-06, "loss": 0.0019, "step": 107890 }, { "epoch": 0.7098544107682085, "grad_norm": 0.049953616396655826, "learning_rate": 8.14760201099185e-06, "loss": 0.0025, "step": 107900 }, { "epoch": 0.7099201989434419, "grad_norm": 0.011846242798499438, "learning_rate": 8.147155915230613e-06, "loss": 0.002, "step": 107910 }, { "epoch": 0.7099859871186753, "grad_norm": 0.02249560984296165, "learning_rate": 8.146709777976844e-06, "loss": 0.0029, "step": 107920 }, { "epoch": 0.7100517752939087, "grad_norm": 0.1533247743228312, "learning_rate": 8.146263599236422e-06, "loss": 0.0013, "step": 107930 }, { "epoch": 0.710117563469142, "grad_norm": 0.006907296095034312, "learning_rate": 8.145817379015235e-06, "loss": 0.0008, "step": 107940 }, { "epoch": 0.7101833516443754, "grad_norm": 0.0013080743693804262, "learning_rate": 8.145371117319161e-06, "loss": 0.0019, "step": 107950 }, { "epoch": 0.7102491398196088, "grad_norm": 0.03424576024263458, "learning_rate": 8.144924814154084e-06, "loss": 0.0011, "step": 107960 }, { "epoch": 0.7103149279948422, "grad_norm": 0.028331652893574203, "learning_rate": 8.14447846952589e-06, "loss": 0.0014, "step": 107970 }, { "epoch": 0.7103807161700756, "grad_norm": 0.05071310964494352, "learning_rate": 8.144032083440465e-06, "loss": 0.0008, "step": 107980 }, { "epoch": 0.710446504345309, "grad_norm": 0.052974492347726124, "learning_rate": 8.14358565590369e-06, "loss": 0.0023, "step": 107990 }, { "epoch": 0.7105122925205424, "grad_norm": 0.0077346940580841515, "learning_rate": 8.143139186921454e-06, "loss": 0.0018, "step": 108000 }, { "epoch": 0.7105780806957758, "grad_norm": 0.04278062912010009, "learning_rate": 8.142692676499642e-06, "loss": 0.0021, "step": 108010 }, { "epoch": 0.7106438688710092, "grad_norm": 0.09196007296107982, "learning_rate": 8.142246124644141e-06, "loss": 0.0013, "step": 108020 }, { "epoch": 0.7107096570462426, "grad_norm": 0.04851587198321403, "learning_rate": 8.141799531360842e-06, "loss": 0.0025, "step": 108030 }, { "epoch": 0.7107754452214758, "grad_norm": 0.0289682193919463, "learning_rate": 8.141352896655625e-06, "loss": 0.0011, "step": 108040 }, { "epoch": 0.7108412333967092, "grad_norm": 0.03571179181622519, "learning_rate": 8.140906220534386e-06, "loss": 0.0033, "step": 108050 }, { "epoch": 0.7109070215719426, "grad_norm": 0.05702111896570132, "learning_rate": 8.140459503003012e-06, "loss": 0.0017, "step": 108060 }, { "epoch": 0.710972809747176, "grad_norm": 0.016424141668046243, "learning_rate": 8.140012744067388e-06, "loss": 0.0014, "step": 108070 }, { "epoch": 0.7110385979224094, "grad_norm": 0.01181887251701417, "learning_rate": 8.13956594373341e-06, "loss": 0.0025, "step": 108080 }, { "epoch": 0.7111043860976428, "grad_norm": 0.012487872182700607, "learning_rate": 8.139119102006967e-06, "loss": 0.001, "step": 108090 }, { "epoch": 0.7111701742728762, "grad_norm": 0.0527409603159115, "learning_rate": 8.138672218893948e-06, "loss": 0.001, "step": 108100 }, { "epoch": 0.7112359624481096, "grad_norm": 0.039099580868755264, "learning_rate": 8.138225294400248e-06, "loss": 0.0007, "step": 108110 }, { "epoch": 0.711301750623343, "grad_norm": 0.019889443297046398, "learning_rate": 8.137778328531758e-06, "loss": 0.0008, "step": 108120 }, { "epoch": 0.7113675387985764, "grad_norm": 0.02243182648542777, "learning_rate": 8.13733132129437e-06, "loss": 0.0004, "step": 108130 }, { "epoch": 0.7114333269738097, "grad_norm": 0.02797859171831655, "learning_rate": 8.13688427269398e-06, "loss": 0.0011, "step": 108140 }, { "epoch": 0.7114991151490431, "grad_norm": 0.0520866765251425, "learning_rate": 8.136437182736476e-06, "loss": 0.0012, "step": 108150 }, { "epoch": 0.7115649033242765, "grad_norm": 0.1268113480391182, "learning_rate": 8.13599005142776e-06, "loss": 0.002, "step": 108160 }, { "epoch": 0.7116306914995099, "grad_norm": 0.11106884717475504, "learning_rate": 8.135542878773723e-06, "loss": 0.0014, "step": 108170 }, { "epoch": 0.7116964796747433, "grad_norm": 0.031010096094479415, "learning_rate": 8.135095664780259e-06, "loss": 0.0014, "step": 108180 }, { "epoch": 0.7117622678499766, "grad_norm": 0.025218846031348458, "learning_rate": 8.13464840945327e-06, "loss": 0.0015, "step": 108190 }, { "epoch": 0.71182805602521, "grad_norm": 0.11287662581651958, "learning_rate": 8.134201112798646e-06, "loss": 0.0029, "step": 108200 }, { "epoch": 0.7118938442004434, "grad_norm": 0.009502324770804508, "learning_rate": 8.133753774822289e-06, "loss": 0.0005, "step": 108210 }, { "epoch": 0.7119596323756768, "grad_norm": 0.04035846398922051, "learning_rate": 8.133306395530094e-06, "loss": 0.0029, "step": 108220 }, { "epoch": 0.7120254205509102, "grad_norm": 0.10455273081136988, "learning_rate": 8.13285897492796e-06, "loss": 0.0009, "step": 108230 }, { "epoch": 0.7120912087261435, "grad_norm": 0.02938596069303176, "learning_rate": 8.132411513021787e-06, "loss": 0.0019, "step": 108240 }, { "epoch": 0.7121569969013769, "grad_norm": 0.1444272075072479, "learning_rate": 8.131964009817472e-06, "loss": 0.0015, "step": 108250 }, { "epoch": 0.7122227850766103, "grad_norm": 0.0893836476673368, "learning_rate": 8.13151646532092e-06, "loss": 0.0016, "step": 108260 }, { "epoch": 0.7122885732518437, "grad_norm": 0.03862853570643121, "learning_rate": 8.131068879538025e-06, "loss": 0.0009, "step": 108270 }, { "epoch": 0.7123543614270771, "grad_norm": 0.17773888476314298, "learning_rate": 8.130621252474691e-06, "loss": 0.0009, "step": 108280 }, { "epoch": 0.7124201496023105, "grad_norm": 0.08186576544912476, "learning_rate": 8.13017358413682e-06, "loss": 0.0011, "step": 108290 }, { "epoch": 0.7124859377775439, "grad_norm": 0.0477352722650578, "learning_rate": 8.129725874530315e-06, "loss": 0.0012, "step": 108300 }, { "epoch": 0.7125517259527773, "grad_norm": 0.22118648043562963, "learning_rate": 8.129278123661074e-06, "loss": 0.003, "step": 108310 }, { "epoch": 0.7126175141280107, "grad_norm": 0.007555490433996255, "learning_rate": 8.128830331535006e-06, "loss": 0.001, "step": 108320 }, { "epoch": 0.7126833023032441, "grad_norm": 0.04214972948681905, "learning_rate": 8.128382498158012e-06, "loss": 0.0022, "step": 108330 }, { "epoch": 0.7127490904784775, "grad_norm": 0.01829646003745864, "learning_rate": 8.127934623535996e-06, "loss": 0.0017, "step": 108340 }, { "epoch": 0.7128148786537107, "grad_norm": 0.04356524548263068, "learning_rate": 8.127486707674865e-06, "loss": 0.0011, "step": 108350 }, { "epoch": 0.7128806668289441, "grad_norm": 0.05034099214835348, "learning_rate": 8.127038750580522e-06, "loss": 0.0015, "step": 108360 }, { "epoch": 0.7129464550041775, "grad_norm": 0.044646631662135505, "learning_rate": 8.126590752258873e-06, "loss": 0.001, "step": 108370 }, { "epoch": 0.7130122431794109, "grad_norm": 0.0051119212792403515, "learning_rate": 8.126142712715826e-06, "loss": 0.0014, "step": 108380 }, { "epoch": 0.7130780313546443, "grad_norm": 0.006690708957166274, "learning_rate": 8.125694631957288e-06, "loss": 0.0011, "step": 108390 }, { "epoch": 0.7131438195298777, "grad_norm": 0.0380469336702256, "learning_rate": 8.125246509989165e-06, "loss": 0.0011, "step": 108400 }, { "epoch": 0.7132096077051111, "grad_norm": 0.02795827685978135, "learning_rate": 8.124798346817366e-06, "loss": 0.0021, "step": 108410 }, { "epoch": 0.7132753958803445, "grad_norm": 0.06771127226339513, "learning_rate": 8.124350142447799e-06, "loss": 0.0014, "step": 108420 }, { "epoch": 0.7133411840555779, "grad_norm": 0.035360636344350244, "learning_rate": 8.123901896886375e-06, "loss": 0.0016, "step": 108430 }, { "epoch": 0.7134069722308113, "grad_norm": 0.011161502887966189, "learning_rate": 8.123453610139e-06, "loss": 0.0009, "step": 108440 }, { "epoch": 0.7134727604060446, "grad_norm": 0.04959052597924514, "learning_rate": 8.123005282211588e-06, "loss": 0.0011, "step": 108450 }, { "epoch": 0.713538548581278, "grad_norm": 0.0032691371435514855, "learning_rate": 8.12255691311005e-06, "loss": 0.0012, "step": 108460 }, { "epoch": 0.7136043367565114, "grad_norm": 0.025090493531317187, "learning_rate": 8.122108502840296e-06, "loss": 0.0037, "step": 108470 }, { "epoch": 0.7136701249317448, "grad_norm": 0.04932900560102159, "learning_rate": 8.121660051408235e-06, "loss": 0.0013, "step": 108480 }, { "epoch": 0.7137359131069781, "grad_norm": 0.020115811991913726, "learning_rate": 8.121211558819783e-06, "loss": 0.0027, "step": 108490 }, { "epoch": 0.7138017012822115, "grad_norm": 0.034901040728961094, "learning_rate": 8.120763025080855e-06, "loss": 0.001, "step": 108500 }, { "epoch": 0.7138674894574449, "grad_norm": 0.2735026268582708, "learning_rate": 8.12031445019736e-06, "loss": 0.0048, "step": 108510 }, { "epoch": 0.7139332776326783, "grad_norm": 0.02553750843382375, "learning_rate": 8.119865834175213e-06, "loss": 0.0011, "step": 108520 }, { "epoch": 0.7139990658079117, "grad_norm": 0.05716404173760628, "learning_rate": 8.11941717702033e-06, "loss": 0.0016, "step": 108530 }, { "epoch": 0.7140648539831451, "grad_norm": 0.002192985283092195, "learning_rate": 8.118968478738626e-06, "loss": 0.0022, "step": 108540 }, { "epoch": 0.7141306421583784, "grad_norm": 0.21115666015933557, "learning_rate": 8.118519739336017e-06, "loss": 0.0023, "step": 108550 }, { "epoch": 0.7141964303336118, "grad_norm": 0.04456601867560065, "learning_rate": 8.118070958818419e-06, "loss": 0.0008, "step": 108560 }, { "epoch": 0.7142622185088452, "grad_norm": 0.02709737561926678, "learning_rate": 8.117622137191745e-06, "loss": 0.0011, "step": 108570 }, { "epoch": 0.7143280066840786, "grad_norm": 0.06614359876068208, "learning_rate": 8.117173274461918e-06, "loss": 0.0023, "step": 108580 }, { "epoch": 0.714393794859312, "grad_norm": 0.07750085449367022, "learning_rate": 8.116724370634854e-06, "loss": 0.0008, "step": 108590 }, { "epoch": 0.7144595830345454, "grad_norm": 0.08281640166896169, "learning_rate": 8.11627542571647e-06, "loss": 0.0009, "step": 108600 }, { "epoch": 0.7145253712097788, "grad_norm": 0.07752470109046669, "learning_rate": 8.115826439712686e-06, "loss": 0.0032, "step": 108610 }, { "epoch": 0.7145911593850122, "grad_norm": 0.02777630013482529, "learning_rate": 8.115377412629421e-06, "loss": 0.0015, "step": 108620 }, { "epoch": 0.7146569475602456, "grad_norm": 0.12277651606693409, "learning_rate": 8.114928344472596e-06, "loss": 0.0022, "step": 108630 }, { "epoch": 0.714722735735479, "grad_norm": 0.03397747056890913, "learning_rate": 8.114479235248132e-06, "loss": 0.001, "step": 108640 }, { "epoch": 0.7147885239107122, "grad_norm": 0.06748749648687606, "learning_rate": 8.114030084961947e-06, "loss": 0.0015, "step": 108650 }, { "epoch": 0.7148543120859456, "grad_norm": 0.049435668978845125, "learning_rate": 8.113580893619968e-06, "loss": 0.0008, "step": 108660 }, { "epoch": 0.714920100261179, "grad_norm": 0.019403778141078958, "learning_rate": 8.11313166122811e-06, "loss": 0.0021, "step": 108670 }, { "epoch": 0.7149858884364124, "grad_norm": 0.0436722453667285, "learning_rate": 8.112682387792304e-06, "loss": 0.0049, "step": 108680 }, { "epoch": 0.7150516766116458, "grad_norm": 0.0739085068350437, "learning_rate": 8.112233073318466e-06, "loss": 0.0029, "step": 108690 }, { "epoch": 0.7151174647868792, "grad_norm": 0.1842982042047596, "learning_rate": 8.111783717812522e-06, "loss": 0.002, "step": 108700 }, { "epoch": 0.7151832529621126, "grad_norm": 0.04259726591712364, "learning_rate": 8.1113343212804e-06, "loss": 0.0028, "step": 108710 }, { "epoch": 0.715249041137346, "grad_norm": 0.03817372366885372, "learning_rate": 8.11088488372802e-06, "loss": 0.0006, "step": 108720 }, { "epoch": 0.7153148293125794, "grad_norm": 0.05411271279422485, "learning_rate": 8.11043540516131e-06, "loss": 0.0019, "step": 108730 }, { "epoch": 0.7153806174878128, "grad_norm": 0.05299012504424196, "learning_rate": 8.109985885586195e-06, "loss": 0.001, "step": 108740 }, { "epoch": 0.7154464056630461, "grad_norm": 0.16299177494353628, "learning_rate": 8.109536325008605e-06, "loss": 0.0018, "step": 108750 }, { "epoch": 0.7155121938382795, "grad_norm": 0.006501083645229807, "learning_rate": 8.109086723434462e-06, "loss": 0.0014, "step": 108760 }, { "epoch": 0.7155779820135129, "grad_norm": 0.02845851362962206, "learning_rate": 8.108637080869695e-06, "loss": 0.0013, "step": 108770 }, { "epoch": 0.7156437701887463, "grad_norm": 0.030684442790638842, "learning_rate": 8.108187397320234e-06, "loss": 0.0013, "step": 108780 }, { "epoch": 0.7157095583639796, "grad_norm": 0.04827415718425067, "learning_rate": 8.107737672792007e-06, "loss": 0.001, "step": 108790 }, { "epoch": 0.715775346539213, "grad_norm": 0.03659955344096252, "learning_rate": 8.107287907290941e-06, "loss": 0.002, "step": 108800 }, { "epoch": 0.7158411347144464, "grad_norm": 0.13025029241081743, "learning_rate": 8.10683810082297e-06, "loss": 0.0018, "step": 108810 }, { "epoch": 0.7159069228896798, "grad_norm": 0.16858655620824953, "learning_rate": 8.106388253394019e-06, "loss": 0.0016, "step": 108820 }, { "epoch": 0.7159727110649132, "grad_norm": 0.003736793662185768, "learning_rate": 8.105938365010025e-06, "loss": 0.0009, "step": 108830 }, { "epoch": 0.7160384992401466, "grad_norm": 0.03358925668382752, "learning_rate": 8.105488435676913e-06, "loss": 0.0025, "step": 108840 }, { "epoch": 0.71610428741538, "grad_norm": 0.024567187963299056, "learning_rate": 8.105038465400618e-06, "loss": 0.0013, "step": 108850 }, { "epoch": 0.7161700755906133, "grad_norm": 0.024234396109921806, "learning_rate": 8.104588454187075e-06, "loss": 0.0018, "step": 108860 }, { "epoch": 0.7162358637658467, "grad_norm": 0.09229402738169103, "learning_rate": 8.104138402042215e-06, "loss": 0.0012, "step": 108870 }, { "epoch": 0.7163016519410801, "grad_norm": 0.01930804632149405, "learning_rate": 8.103688308971968e-06, "loss": 0.001, "step": 108880 }, { "epoch": 0.7163674401163135, "grad_norm": 0.00918361477339522, "learning_rate": 8.103238174982273e-06, "loss": 0.0009, "step": 108890 }, { "epoch": 0.7164332282915469, "grad_norm": 0.05917269587435932, "learning_rate": 8.102788000079062e-06, "loss": 0.0011, "step": 108900 }, { "epoch": 0.7164990164667803, "grad_norm": 0.12026179738515388, "learning_rate": 8.102337784268273e-06, "loss": 0.0021, "step": 108910 }, { "epoch": 0.7165648046420137, "grad_norm": 0.03267971636790797, "learning_rate": 8.101887527555837e-06, "loss": 0.0016, "step": 108920 }, { "epoch": 0.7166305928172471, "grad_norm": 0.052264789194475046, "learning_rate": 8.101437229947695e-06, "loss": 0.0012, "step": 108930 }, { "epoch": 0.7166963809924805, "grad_norm": 0.01974717136441518, "learning_rate": 8.10098689144978e-06, "loss": 0.0011, "step": 108940 }, { "epoch": 0.7167621691677138, "grad_norm": 0.05567849865523519, "learning_rate": 8.100536512068032e-06, "loss": 0.0011, "step": 108950 }, { "epoch": 0.7168279573429471, "grad_norm": 0.06970357893326048, "learning_rate": 8.100086091808387e-06, "loss": 0.0016, "step": 108960 }, { "epoch": 0.7168937455181805, "grad_norm": 0.017658859613980282, "learning_rate": 8.099635630676785e-06, "loss": 0.0012, "step": 108970 }, { "epoch": 0.7169595336934139, "grad_norm": 0.019603062408385497, "learning_rate": 8.099185128679164e-06, "loss": 0.0019, "step": 108980 }, { "epoch": 0.7170253218686473, "grad_norm": 0.046979119223026465, "learning_rate": 8.098734585821463e-06, "loss": 0.0016, "step": 108990 }, { "epoch": 0.7170911100438807, "grad_norm": 0.025523637710258215, "learning_rate": 8.098284002109625e-06, "loss": 0.001, "step": 109000 }, { "epoch": 0.7171568982191141, "grad_norm": 0.05802044818123921, "learning_rate": 8.097833377549587e-06, "loss": 0.001, "step": 109010 }, { "epoch": 0.7172226863943475, "grad_norm": 0.05106652473981279, "learning_rate": 8.097382712147291e-06, "loss": 0.0012, "step": 109020 }, { "epoch": 0.7172884745695809, "grad_norm": 0.05271742044112403, "learning_rate": 8.09693200590868e-06, "loss": 0.0008, "step": 109030 }, { "epoch": 0.7173542627448143, "grad_norm": 0.0727920890280939, "learning_rate": 8.096481258839695e-06, "loss": 0.0026, "step": 109040 }, { "epoch": 0.7174200509200477, "grad_norm": 0.07533416415776199, "learning_rate": 8.096030470946277e-06, "loss": 0.0017, "step": 109050 }, { "epoch": 0.717485839095281, "grad_norm": 0.02654768190538857, "learning_rate": 8.095579642234372e-06, "loss": 0.0013, "step": 109060 }, { "epoch": 0.7175516272705144, "grad_norm": 0.02955529833740042, "learning_rate": 8.095128772709924e-06, "loss": 0.0073, "step": 109070 }, { "epoch": 0.7176174154457478, "grad_norm": 0.07741429310212582, "learning_rate": 8.094677862378876e-06, "loss": 0.001, "step": 109080 }, { "epoch": 0.7176832036209811, "grad_norm": 0.14049217323391575, "learning_rate": 8.094226911247175e-06, "loss": 0.0019, "step": 109090 }, { "epoch": 0.7177489917962145, "grad_norm": 0.09857657193589096, "learning_rate": 8.09377591932076e-06, "loss": 0.0014, "step": 109100 }, { "epoch": 0.7178147799714479, "grad_norm": 0.06449480319791617, "learning_rate": 8.093324886605584e-06, "loss": 0.0019, "step": 109110 }, { "epoch": 0.7178805681466813, "grad_norm": 0.06624889928713032, "learning_rate": 8.092873813107592e-06, "loss": 0.0013, "step": 109120 }, { "epoch": 0.7179463563219147, "grad_norm": 0.005372906882281906, "learning_rate": 8.092422698832726e-06, "loss": 0.001, "step": 109130 }, { "epoch": 0.7180121444971481, "grad_norm": 0.06849401775657071, "learning_rate": 8.091971543786943e-06, "loss": 0.002, "step": 109140 }, { "epoch": 0.7180779326723815, "grad_norm": 0.1530966669088114, "learning_rate": 8.091520347976182e-06, "loss": 0.0025, "step": 109150 }, { "epoch": 0.7181437208476148, "grad_norm": 0.04199592575179588, "learning_rate": 8.091069111406396e-06, "loss": 0.0014, "step": 109160 }, { "epoch": 0.7182095090228482, "grad_norm": 0.08441281790682846, "learning_rate": 8.090617834083535e-06, "loss": 0.0019, "step": 109170 }, { "epoch": 0.7182752971980816, "grad_norm": 0.12995015535821483, "learning_rate": 8.090166516013543e-06, "loss": 0.0015, "step": 109180 }, { "epoch": 0.718341085373315, "grad_norm": 0.05042001863376073, "learning_rate": 8.089715157202379e-06, "loss": 0.0012, "step": 109190 }, { "epoch": 0.7184068735485484, "grad_norm": 0.029370228926428734, "learning_rate": 8.089263757655985e-06, "loss": 0.0014, "step": 109200 }, { "epoch": 0.7184726617237818, "grad_norm": 0.00870146749246011, "learning_rate": 8.088812317380319e-06, "loss": 0.0027, "step": 109210 }, { "epoch": 0.7185384498990152, "grad_norm": 0.0009705039680055863, "learning_rate": 8.088360836381331e-06, "loss": 0.0018, "step": 109220 }, { "epoch": 0.7186042380742486, "grad_norm": 0.032131510281087995, "learning_rate": 8.08790931466497e-06, "loss": 0.0007, "step": 109230 }, { "epoch": 0.718670026249482, "grad_norm": 0.04451799994574734, "learning_rate": 8.087457752237193e-06, "loss": 0.0012, "step": 109240 }, { "epoch": 0.7187358144247153, "grad_norm": 0.1644335028719053, "learning_rate": 8.08700614910395e-06, "loss": 0.0024, "step": 109250 }, { "epoch": 0.7188016025999486, "grad_norm": 0.0757605001096957, "learning_rate": 8.0865545052712e-06, "loss": 0.0019, "step": 109260 }, { "epoch": 0.718867390775182, "grad_norm": 0.04464972702869075, "learning_rate": 8.086102820744894e-06, "loss": 0.0011, "step": 109270 }, { "epoch": 0.7189331789504154, "grad_norm": 0.02210710202408504, "learning_rate": 8.085651095530987e-06, "loss": 0.0009, "step": 109280 }, { "epoch": 0.7189989671256488, "grad_norm": 0.043902609374319244, "learning_rate": 8.085199329635435e-06, "loss": 0.0009, "step": 109290 }, { "epoch": 0.7190647553008822, "grad_norm": 0.06558025006148983, "learning_rate": 8.084747523064196e-06, "loss": 0.0018, "step": 109300 }, { "epoch": 0.7191305434761156, "grad_norm": 0.06759168911961394, "learning_rate": 8.084295675823224e-06, "loss": 0.0025, "step": 109310 }, { "epoch": 0.719196331651349, "grad_norm": 0.03160464016835465, "learning_rate": 8.083843787918477e-06, "loss": 0.0012, "step": 109320 }, { "epoch": 0.7192621198265824, "grad_norm": 0.044615470260645704, "learning_rate": 8.083391859355913e-06, "loss": 0.0011, "step": 109330 }, { "epoch": 0.7193279080018158, "grad_norm": 0.09590983025821113, "learning_rate": 8.08293989014149e-06, "loss": 0.0024, "step": 109340 }, { "epoch": 0.7193936961770492, "grad_norm": 0.03140143990118985, "learning_rate": 8.082487880281168e-06, "loss": 0.0016, "step": 109350 }, { "epoch": 0.7194594843522826, "grad_norm": 0.14417866129582968, "learning_rate": 8.082035829780906e-06, "loss": 0.0021, "step": 109360 }, { "epoch": 0.7195252725275159, "grad_norm": 0.0394200692314092, "learning_rate": 8.081583738646661e-06, "loss": 0.0015, "step": 109370 }, { "epoch": 0.7195910607027493, "grad_norm": 0.06756622433635988, "learning_rate": 8.081131606884398e-06, "loss": 0.0018, "step": 109380 }, { "epoch": 0.7196568488779826, "grad_norm": 0.04923759804472803, "learning_rate": 8.080679434500077e-06, "loss": 0.0009, "step": 109390 }, { "epoch": 0.719722637053216, "grad_norm": 0.016952142035166127, "learning_rate": 8.080227221499654e-06, "loss": 0.0034, "step": 109400 }, { "epoch": 0.7197884252284494, "grad_norm": 0.05756736012597884, "learning_rate": 8.079774967889098e-06, "loss": 0.001, "step": 109410 }, { "epoch": 0.7198542134036828, "grad_norm": 0.10986958381206513, "learning_rate": 8.07932267367437e-06, "loss": 0.0022, "step": 109420 }, { "epoch": 0.7199200015789162, "grad_norm": 0.05900380696117488, "learning_rate": 8.07887033886143e-06, "loss": 0.0013, "step": 109430 }, { "epoch": 0.7199857897541496, "grad_norm": 0.0673374044741968, "learning_rate": 8.078417963456244e-06, "loss": 0.0009, "step": 109440 }, { "epoch": 0.720051577929383, "grad_norm": 0.07384897283062987, "learning_rate": 8.077965547464777e-06, "loss": 0.0019, "step": 109450 }, { "epoch": 0.7201173661046164, "grad_norm": 0.06692222090964554, "learning_rate": 8.077513090892992e-06, "loss": 0.002, "step": 109460 }, { "epoch": 0.7201831542798497, "grad_norm": 0.04889991466099656, "learning_rate": 8.077060593746855e-06, "loss": 0.0034, "step": 109470 }, { "epoch": 0.7202489424550831, "grad_norm": 0.0722647818342168, "learning_rate": 8.07660805603233e-06, "loss": 0.0017, "step": 109480 }, { "epoch": 0.7203147306303165, "grad_norm": 0.06004351540865688, "learning_rate": 8.076155477755387e-06, "loss": 0.001, "step": 109490 }, { "epoch": 0.7203805188055499, "grad_norm": 0.03432632636786021, "learning_rate": 8.07570285892199e-06, "loss": 0.0012, "step": 109500 }, { "epoch": 0.7204463069807833, "grad_norm": 0.014600042466752032, "learning_rate": 8.075250199538106e-06, "loss": 0.0019, "step": 109510 }, { "epoch": 0.7205120951560167, "grad_norm": 0.02469476094543338, "learning_rate": 8.074797499609707e-06, "loss": 0.0013, "step": 109520 }, { "epoch": 0.7205778833312501, "grad_norm": 0.10235438191557822, "learning_rate": 8.074344759142756e-06, "loss": 0.0015, "step": 109530 }, { "epoch": 0.7206436715064835, "grad_norm": 0.1144264694366571, "learning_rate": 8.073891978143225e-06, "loss": 0.0015, "step": 109540 }, { "epoch": 0.7207094596817168, "grad_norm": 0.00369017837023814, "learning_rate": 8.073439156617084e-06, "loss": 0.0018, "step": 109550 }, { "epoch": 0.7207752478569502, "grad_norm": 0.0016814069987912638, "learning_rate": 8.072986294570302e-06, "loss": 0.0015, "step": 109560 }, { "epoch": 0.7208410360321835, "grad_norm": 0.04533204643645863, "learning_rate": 8.072533392008848e-06, "loss": 0.0013, "step": 109570 }, { "epoch": 0.7209068242074169, "grad_norm": 0.046688324208297424, "learning_rate": 8.072080448938698e-06, "loss": 0.0014, "step": 109580 }, { "epoch": 0.7209726123826503, "grad_norm": 0.03944713160043594, "learning_rate": 8.071627465365817e-06, "loss": 0.0015, "step": 109590 }, { "epoch": 0.7210384005578837, "grad_norm": 0.0424161097044619, "learning_rate": 8.071174441296182e-06, "loss": 0.001, "step": 109600 }, { "epoch": 0.7211041887331171, "grad_norm": 0.09762284478199246, "learning_rate": 8.070721376735765e-06, "loss": 0.0007, "step": 109610 }, { "epoch": 0.7211699769083505, "grad_norm": 0.08213552242186874, "learning_rate": 8.070268271690538e-06, "loss": 0.002, "step": 109620 }, { "epoch": 0.7212357650835839, "grad_norm": 0.025600748071206495, "learning_rate": 8.069815126166475e-06, "loss": 0.0024, "step": 109630 }, { "epoch": 0.7213015532588173, "grad_norm": 0.012479237542521622, "learning_rate": 8.069361940169554e-06, "loss": 0.0014, "step": 109640 }, { "epoch": 0.7213673414340507, "grad_norm": 0.061131237279058885, "learning_rate": 8.068908713705743e-06, "loss": 0.0019, "step": 109650 }, { "epoch": 0.7214331296092841, "grad_norm": 0.07357203728249878, "learning_rate": 8.068455446781022e-06, "loss": 0.0081, "step": 109660 }, { "epoch": 0.7214989177845174, "grad_norm": 0.08589551298124204, "learning_rate": 8.068002139401367e-06, "loss": 0.0021, "step": 109670 }, { "epoch": 0.7215647059597508, "grad_norm": 0.061294858469455364, "learning_rate": 8.067548791572752e-06, "loss": 0.0014, "step": 109680 }, { "epoch": 0.7216304941349841, "grad_norm": 0.07534040480591186, "learning_rate": 8.067095403301157e-06, "loss": 0.002, "step": 109690 }, { "epoch": 0.7216962823102175, "grad_norm": 0.0957556732980684, "learning_rate": 8.066641974592557e-06, "loss": 0.0014, "step": 109700 }, { "epoch": 0.7217620704854509, "grad_norm": 0.05610916278857897, "learning_rate": 8.066188505452931e-06, "loss": 0.0009, "step": 109710 }, { "epoch": 0.7218278586606843, "grad_norm": 0.059689656934954635, "learning_rate": 8.065734995888259e-06, "loss": 0.0011, "step": 109720 }, { "epoch": 0.7218936468359177, "grad_norm": 0.05462193866927284, "learning_rate": 8.065281445904519e-06, "loss": 0.0032, "step": 109730 }, { "epoch": 0.7219594350111511, "grad_norm": 0.060885973597809534, "learning_rate": 8.064827855507688e-06, "loss": 0.0019, "step": 109740 }, { "epoch": 0.7220252231863845, "grad_norm": 0.04518378212651259, "learning_rate": 8.064374224703752e-06, "loss": 0.0025, "step": 109750 }, { "epoch": 0.7220910113616179, "grad_norm": 0.0686488955910729, "learning_rate": 8.063920553498686e-06, "loss": 0.0012, "step": 109760 }, { "epoch": 0.7221567995368513, "grad_norm": 0.13681656888934068, "learning_rate": 8.063466841898474e-06, "loss": 0.0018, "step": 109770 }, { "epoch": 0.7222225877120846, "grad_norm": 0.04577923979324873, "learning_rate": 8.0630130899091e-06, "loss": 0.003, "step": 109780 }, { "epoch": 0.722288375887318, "grad_norm": 0.07601113926424292, "learning_rate": 8.062559297536541e-06, "loss": 0.0014, "step": 109790 }, { "epoch": 0.7223541640625514, "grad_norm": 0.10629572987157551, "learning_rate": 8.062105464786782e-06, "loss": 0.001, "step": 109800 }, { "epoch": 0.7224199522377848, "grad_norm": 0.02689512774608059, "learning_rate": 8.061651591665809e-06, "loss": 0.0014, "step": 109810 }, { "epoch": 0.7224857404130182, "grad_norm": 0.055928387121932994, "learning_rate": 8.061197678179603e-06, "loss": 0.0009, "step": 109820 }, { "epoch": 0.7225515285882516, "grad_norm": 0.0500086772321108, "learning_rate": 8.06074372433415e-06, "loss": 0.0021, "step": 109830 }, { "epoch": 0.722617316763485, "grad_norm": 0.17079619608334182, "learning_rate": 8.060289730135436e-06, "loss": 0.0026, "step": 109840 }, { "epoch": 0.7226831049387183, "grad_norm": 0.09093023606952513, "learning_rate": 8.059835695589442e-06, "loss": 0.0011, "step": 109850 }, { "epoch": 0.7227488931139517, "grad_norm": 0.06427254176146398, "learning_rate": 8.059381620702158e-06, "loss": 0.0009, "step": 109860 }, { "epoch": 0.7228146812891851, "grad_norm": 0.03894017495505987, "learning_rate": 8.05892750547957e-06, "loss": 0.0039, "step": 109870 }, { "epoch": 0.7228804694644184, "grad_norm": 0.10620176004649776, "learning_rate": 8.058473349927664e-06, "loss": 0.0015, "step": 109880 }, { "epoch": 0.7229462576396518, "grad_norm": 0.13635760109718897, "learning_rate": 8.058019154052428e-06, "loss": 0.0022, "step": 109890 }, { "epoch": 0.7230120458148852, "grad_norm": 1.0408550177389457, "learning_rate": 8.057564917859852e-06, "loss": 0.0028, "step": 109900 }, { "epoch": 0.7230778339901186, "grad_norm": 0.043063454303632884, "learning_rate": 8.057110641355923e-06, "loss": 0.0016, "step": 109910 }, { "epoch": 0.723143622165352, "grad_norm": 0.00912989729653917, "learning_rate": 8.05665632454663e-06, "loss": 0.0023, "step": 109920 }, { "epoch": 0.7232094103405854, "grad_norm": 0.008108590791497174, "learning_rate": 8.056201967437963e-06, "loss": 0.0022, "step": 109930 }, { "epoch": 0.7232751985158188, "grad_norm": 0.0020959177271455314, "learning_rate": 8.055747570035913e-06, "loss": 0.0017, "step": 109940 }, { "epoch": 0.7233409866910522, "grad_norm": 0.07245534298209154, "learning_rate": 8.05529313234647e-06, "loss": 0.0017, "step": 109950 }, { "epoch": 0.7234067748662856, "grad_norm": 0.04118986709422952, "learning_rate": 8.054838654375627e-06, "loss": 0.0014, "step": 109960 }, { "epoch": 0.723472563041519, "grad_norm": 0.01544931540516388, "learning_rate": 8.054384136129374e-06, "loss": 0.0012, "step": 109970 }, { "epoch": 0.7235383512167523, "grad_norm": 0.2115573231022379, "learning_rate": 8.053929577613702e-06, "loss": 0.0014, "step": 109980 }, { "epoch": 0.7236041393919856, "grad_norm": 0.04904765914670897, "learning_rate": 8.053474978834608e-06, "loss": 0.001, "step": 109990 }, { "epoch": 0.723669927567219, "grad_norm": 0.062101897881158274, "learning_rate": 8.053020339798084e-06, "loss": 0.0011, "step": 110000 }, { "epoch": 0.7237357157424524, "grad_norm": 0.01439886149371149, "learning_rate": 8.052565660510123e-06, "loss": 0.0021, "step": 110010 }, { "epoch": 0.7238015039176858, "grad_norm": 0.03058241777801135, "learning_rate": 8.05211094097672e-06, "loss": 0.001, "step": 110020 }, { "epoch": 0.7238672920929192, "grad_norm": 0.05675621182618913, "learning_rate": 8.05165618120387e-06, "loss": 0.0012, "step": 110030 }, { "epoch": 0.7239330802681526, "grad_norm": 0.03838692957779977, "learning_rate": 8.051201381197569e-06, "loss": 0.0006, "step": 110040 }, { "epoch": 0.723998868443386, "grad_norm": 0.020574159631534892, "learning_rate": 8.050746540963814e-06, "loss": 0.0011, "step": 110050 }, { "epoch": 0.7240646566186194, "grad_norm": 0.09852089701436229, "learning_rate": 8.0502916605086e-06, "loss": 0.0025, "step": 110060 }, { "epoch": 0.7241304447938528, "grad_norm": 0.15288526866953786, "learning_rate": 8.049836739837925e-06, "loss": 0.0024, "step": 110070 }, { "epoch": 0.7241962329690861, "grad_norm": 0.018231216089475883, "learning_rate": 8.049381778957786e-06, "loss": 0.0019, "step": 110080 }, { "epoch": 0.7242620211443195, "grad_norm": 0.06976611868436361, "learning_rate": 8.048926777874182e-06, "loss": 0.0035, "step": 110090 }, { "epoch": 0.7243278093195529, "grad_norm": 0.04584589327079277, "learning_rate": 8.04847173659311e-06, "loss": 0.0012, "step": 110100 }, { "epoch": 0.7243935974947863, "grad_norm": 0.10221896082920005, "learning_rate": 8.048016655120574e-06, "loss": 0.0015, "step": 110110 }, { "epoch": 0.7244593856700197, "grad_norm": 0.03250520646788371, "learning_rate": 8.04756153346257e-06, "loss": 0.0015, "step": 110120 }, { "epoch": 0.7245251738452531, "grad_norm": 0.19183475851940776, "learning_rate": 8.0471063716251e-06, "loss": 0.0035, "step": 110130 }, { "epoch": 0.7245909620204865, "grad_norm": 0.14445263683641407, "learning_rate": 8.046651169614161e-06, "loss": 0.0021, "step": 110140 }, { "epoch": 0.7246567501957198, "grad_norm": 0.05069329686666102, "learning_rate": 8.04619592743576e-06, "loss": 0.0014, "step": 110150 }, { "epoch": 0.7247225383709532, "grad_norm": 0.17806523903156318, "learning_rate": 8.045740645095896e-06, "loss": 0.0019, "step": 110160 }, { "epoch": 0.7247883265461866, "grad_norm": 0.0795214334118404, "learning_rate": 8.045285322600573e-06, "loss": 0.0016, "step": 110170 }, { "epoch": 0.7248541147214199, "grad_norm": 0.07622399492123533, "learning_rate": 8.044829959955792e-06, "loss": 0.0013, "step": 110180 }, { "epoch": 0.7249199028966533, "grad_norm": 0.04319577913878496, "learning_rate": 8.044374557167559e-06, "loss": 0.0014, "step": 110190 }, { "epoch": 0.7249856910718867, "grad_norm": 0.051540853510754964, "learning_rate": 8.043919114241874e-06, "loss": 0.0017, "step": 110200 }, { "epoch": 0.7250514792471201, "grad_norm": 0.028033134900909223, "learning_rate": 8.043463631184746e-06, "loss": 0.0007, "step": 110210 }, { "epoch": 0.7251172674223535, "grad_norm": 0.033938932385591265, "learning_rate": 8.043008108002179e-06, "loss": 0.0007, "step": 110220 }, { "epoch": 0.7251830555975869, "grad_norm": 0.16280421055361885, "learning_rate": 8.042552544700178e-06, "loss": 0.0013, "step": 110230 }, { "epoch": 0.7252488437728203, "grad_norm": 0.03241714672796392, "learning_rate": 8.042096941284749e-06, "loss": 0.0029, "step": 110240 }, { "epoch": 0.7253146319480537, "grad_norm": 0.02632410009632188, "learning_rate": 8.0416412977619e-06, "loss": 0.0013, "step": 110250 }, { "epoch": 0.7253804201232871, "grad_norm": 0.11129963308086943, "learning_rate": 8.041185614137636e-06, "loss": 0.0008, "step": 110260 }, { "epoch": 0.7254462082985205, "grad_norm": 0.04140138249191298, "learning_rate": 8.040729890417968e-06, "loss": 0.0011, "step": 110270 }, { "epoch": 0.7255119964737539, "grad_norm": 0.0739275743926214, "learning_rate": 8.040274126608902e-06, "loss": 0.0013, "step": 110280 }, { "epoch": 0.7255777846489871, "grad_norm": 0.02680724200605477, "learning_rate": 8.039818322716447e-06, "loss": 0.0011, "step": 110290 }, { "epoch": 0.7256435728242205, "grad_norm": 0.055588393507329005, "learning_rate": 8.039362478746612e-06, "loss": 0.0009, "step": 110300 }, { "epoch": 0.7257093609994539, "grad_norm": 0.07900328478669971, "learning_rate": 8.038906594705408e-06, "loss": 0.0014, "step": 110310 }, { "epoch": 0.7257751491746873, "grad_norm": 0.5430516923802919, "learning_rate": 8.038450670598848e-06, "loss": 0.0014, "step": 110320 }, { "epoch": 0.7258409373499207, "grad_norm": 0.08892018429180633, "learning_rate": 8.037994706432937e-06, "loss": 0.0028, "step": 110330 }, { "epoch": 0.7259067255251541, "grad_norm": 0.024632471733432297, "learning_rate": 8.03753870221369e-06, "loss": 0.0014, "step": 110340 }, { "epoch": 0.7259725137003875, "grad_norm": 0.052843584888761286, "learning_rate": 8.037082657947118e-06, "loss": 0.0018, "step": 110350 }, { "epoch": 0.7260383018756209, "grad_norm": 0.11548054909721096, "learning_rate": 8.036626573639236e-06, "loss": 0.0012, "step": 110360 }, { "epoch": 0.7261040900508543, "grad_norm": 0.46950137543737275, "learning_rate": 8.036170449296054e-06, "loss": 0.0016, "step": 110370 }, { "epoch": 0.7261698782260877, "grad_norm": 0.013996218929281788, "learning_rate": 8.035714284923587e-06, "loss": 0.0016, "step": 110380 }, { "epoch": 0.726235666401321, "grad_norm": 0.08889344963457121, "learning_rate": 8.035258080527848e-06, "loss": 0.0024, "step": 110390 }, { "epoch": 0.7263014545765544, "grad_norm": 0.0699485441761941, "learning_rate": 8.034801836114854e-06, "loss": 0.0018, "step": 110400 }, { "epoch": 0.7263672427517878, "grad_norm": 0.04168210921056078, "learning_rate": 8.034345551690619e-06, "loss": 0.0025, "step": 110410 }, { "epoch": 0.7264330309270212, "grad_norm": 0.1484212531928277, "learning_rate": 8.033889227261158e-06, "loss": 0.0017, "step": 110420 }, { "epoch": 0.7264988191022546, "grad_norm": 0.013210297899471575, "learning_rate": 8.033432862832488e-06, "loss": 0.0018, "step": 110430 }, { "epoch": 0.726564607277488, "grad_norm": 0.143642339669195, "learning_rate": 8.032976458410625e-06, "loss": 0.0021, "step": 110440 }, { "epoch": 0.7266303954527213, "grad_norm": 0.0026993845198503437, "learning_rate": 8.032520014001588e-06, "loss": 0.0007, "step": 110450 }, { "epoch": 0.7266961836279547, "grad_norm": 0.0477656873628908, "learning_rate": 8.032063529611392e-06, "loss": 0.0013, "step": 110460 }, { "epoch": 0.7267619718031881, "grad_norm": 0.07000451703251168, "learning_rate": 8.031607005246058e-06, "loss": 0.0015, "step": 110470 }, { "epoch": 0.7268277599784215, "grad_norm": 0.1084618175584579, "learning_rate": 8.031150440911603e-06, "loss": 0.0012, "step": 110480 }, { "epoch": 0.7268935481536548, "grad_norm": 0.04996922743685582, "learning_rate": 8.030693836614049e-06, "loss": 0.0014, "step": 110490 }, { "epoch": 0.7269593363288882, "grad_norm": 0.05000495965382026, "learning_rate": 8.030237192359413e-06, "loss": 0.0043, "step": 110500 }, { "epoch": 0.7270251245041216, "grad_norm": 0.05658049097544468, "learning_rate": 8.029780508153718e-06, "loss": 0.002, "step": 110510 }, { "epoch": 0.727090912679355, "grad_norm": 0.007065045715890751, "learning_rate": 8.029323784002982e-06, "loss": 0.0016, "step": 110520 }, { "epoch": 0.7271567008545884, "grad_norm": 0.0572686146793725, "learning_rate": 8.02886701991323e-06, "loss": 0.0015, "step": 110530 }, { "epoch": 0.7272224890298218, "grad_norm": 0.17251725210559557, "learning_rate": 8.02841021589048e-06, "loss": 0.001, "step": 110540 }, { "epoch": 0.7272882772050552, "grad_norm": 0.074390134968673, "learning_rate": 8.02795337194076e-06, "loss": 0.0021, "step": 110550 }, { "epoch": 0.7273540653802886, "grad_norm": 0.06258941706341055, "learning_rate": 8.027496488070086e-06, "loss": 0.0014, "step": 110560 }, { "epoch": 0.727419853555522, "grad_norm": 0.008292095843938897, "learning_rate": 8.027039564284487e-06, "loss": 0.0026, "step": 110570 }, { "epoch": 0.7274856417307554, "grad_norm": 0.03265341443496709, "learning_rate": 8.026582600589986e-06, "loss": 0.0006, "step": 110580 }, { "epoch": 0.7275514299059886, "grad_norm": 0.010874809055944446, "learning_rate": 8.026125596992608e-06, "loss": 0.0022, "step": 110590 }, { "epoch": 0.727617218081222, "grad_norm": 0.056123222939621735, "learning_rate": 8.025668553498376e-06, "loss": 0.0026, "step": 110600 }, { "epoch": 0.7276830062564554, "grad_norm": 0.08220346276861698, "learning_rate": 8.025211470113319e-06, "loss": 0.0017, "step": 110610 }, { "epoch": 0.7277487944316888, "grad_norm": 0.011098861591502767, "learning_rate": 8.024754346843462e-06, "loss": 0.0007, "step": 110620 }, { "epoch": 0.7278145826069222, "grad_norm": 0.060825878157129504, "learning_rate": 8.02429718369483e-06, "loss": 0.0029, "step": 110630 }, { "epoch": 0.7278803707821556, "grad_norm": 0.04769537083449169, "learning_rate": 8.023839980673451e-06, "loss": 0.0021, "step": 110640 }, { "epoch": 0.727946158957389, "grad_norm": 0.04026168919818992, "learning_rate": 8.023382737785354e-06, "loss": 0.0022, "step": 110650 }, { "epoch": 0.7280119471326224, "grad_norm": 0.05988221694504896, "learning_rate": 8.022925455036566e-06, "loss": 0.0016, "step": 110660 }, { "epoch": 0.7280777353078558, "grad_norm": 0.06379903000338868, "learning_rate": 8.022468132433119e-06, "loss": 0.0012, "step": 110670 }, { "epoch": 0.7281435234830892, "grad_norm": 0.07681178452128479, "learning_rate": 8.022010769981038e-06, "loss": 0.0016, "step": 110680 }, { "epoch": 0.7282093116583225, "grad_norm": 0.07096663200544902, "learning_rate": 8.021553367686355e-06, "loss": 0.0016, "step": 110690 }, { "epoch": 0.7282750998335559, "grad_norm": 0.10414322615371245, "learning_rate": 8.0210959255551e-06, "loss": 0.0015, "step": 110700 }, { "epoch": 0.7283408880087893, "grad_norm": 0.5008624649058214, "learning_rate": 8.020638443593306e-06, "loss": 0.0015, "step": 110710 }, { "epoch": 0.7284066761840227, "grad_norm": 0.03994217344981409, "learning_rate": 8.020180921807002e-06, "loss": 0.0025, "step": 110720 }, { "epoch": 0.7284724643592561, "grad_norm": 0.001883162371707197, "learning_rate": 8.019723360202221e-06, "loss": 0.0021, "step": 110730 }, { "epoch": 0.7285382525344894, "grad_norm": 0.013965489873603599, "learning_rate": 8.019265758784996e-06, "loss": 0.0012, "step": 110740 }, { "epoch": 0.7286040407097228, "grad_norm": 0.10742903409598323, "learning_rate": 8.01880811756136e-06, "loss": 0.0022, "step": 110750 }, { "epoch": 0.7286698288849562, "grad_norm": 0.03680722086672519, "learning_rate": 8.018350436537346e-06, "loss": 0.0009, "step": 110760 }, { "epoch": 0.7287356170601896, "grad_norm": 0.14733857154823266, "learning_rate": 8.017892715718986e-06, "loss": 0.0015, "step": 110770 }, { "epoch": 0.728801405235423, "grad_norm": 0.07747936088485784, "learning_rate": 8.01743495511232e-06, "loss": 0.0029, "step": 110780 }, { "epoch": 0.7288671934106564, "grad_norm": 0.018297370707137512, "learning_rate": 8.016977154723378e-06, "loss": 0.001, "step": 110790 }, { "epoch": 0.7289329815858897, "grad_norm": 0.04856139458222062, "learning_rate": 8.0165193145582e-06, "loss": 0.001, "step": 110800 }, { "epoch": 0.7289987697611231, "grad_norm": 0.08352047722015615, "learning_rate": 8.016061434622818e-06, "loss": 0.0011, "step": 110810 }, { "epoch": 0.7290645579363565, "grad_norm": 0.008890443619102564, "learning_rate": 8.015603514923272e-06, "loss": 0.002, "step": 110820 }, { "epoch": 0.7291303461115899, "grad_norm": 0.0430508706351958, "learning_rate": 8.015145555465598e-06, "loss": 0.0015, "step": 110830 }, { "epoch": 0.7291961342868233, "grad_norm": 0.08820711673177044, "learning_rate": 8.014687556255834e-06, "loss": 0.0013, "step": 110840 }, { "epoch": 0.7292619224620567, "grad_norm": 0.24074057272592622, "learning_rate": 8.014229517300015e-06, "loss": 0.0024, "step": 110850 }, { "epoch": 0.7293277106372901, "grad_norm": 0.04425244805663542, "learning_rate": 8.013771438604187e-06, "loss": 0.0008, "step": 110860 }, { "epoch": 0.7293934988125235, "grad_norm": 0.04293619581957982, "learning_rate": 8.013313320174385e-06, "loss": 0.0008, "step": 110870 }, { "epoch": 0.7294592869877569, "grad_norm": 0.06525819921145219, "learning_rate": 8.012855162016648e-06, "loss": 0.0012, "step": 110880 }, { "epoch": 0.7295250751629903, "grad_norm": 0.1148750879727014, "learning_rate": 8.01239696413702e-06, "loss": 0.0014, "step": 110890 }, { "epoch": 0.7295908633382235, "grad_norm": 0.000434333664788901, "learning_rate": 8.011938726541537e-06, "loss": 0.0012, "step": 110900 }, { "epoch": 0.7296566515134569, "grad_norm": 0.03801654795108337, "learning_rate": 8.011480449236244e-06, "loss": 0.0017, "step": 110910 }, { "epoch": 0.7297224396886903, "grad_norm": 0.036305862691768415, "learning_rate": 8.011022132227185e-06, "loss": 0.001, "step": 110920 }, { "epoch": 0.7297882278639237, "grad_norm": 0.03132209648541517, "learning_rate": 8.010563775520396e-06, "loss": 0.0013, "step": 110930 }, { "epoch": 0.7298540160391571, "grad_norm": 0.04566631358080385, "learning_rate": 8.010105379121924e-06, "loss": 0.0006, "step": 110940 }, { "epoch": 0.7299198042143905, "grad_norm": 0.051090958567303474, "learning_rate": 8.009646943037812e-06, "loss": 0.0028, "step": 110950 }, { "epoch": 0.7299855923896239, "grad_norm": 0.048217739901947936, "learning_rate": 8.009188467274108e-06, "loss": 0.001, "step": 110960 }, { "epoch": 0.7300513805648573, "grad_norm": 0.06753233201839833, "learning_rate": 8.008729951836848e-06, "loss": 0.0016, "step": 110970 }, { "epoch": 0.7301171687400907, "grad_norm": 0.05838892896966952, "learning_rate": 8.008271396732086e-06, "loss": 0.0006, "step": 110980 }, { "epoch": 0.7301829569153241, "grad_norm": 0.04162229714912323, "learning_rate": 8.007812801965862e-06, "loss": 0.0007, "step": 110990 }, { "epoch": 0.7302487450905574, "grad_norm": 0.029741408729653758, "learning_rate": 8.007354167544225e-06, "loss": 0.0028, "step": 111000 }, { "epoch": 0.7303145332657908, "grad_norm": 0.04792184291016807, "learning_rate": 8.006895493473219e-06, "loss": 0.0026, "step": 111010 }, { "epoch": 0.7303803214410242, "grad_norm": 0.06183255712931007, "learning_rate": 8.006436779758896e-06, "loss": 0.0011, "step": 111020 }, { "epoch": 0.7304461096162576, "grad_norm": 0.12507064415124894, "learning_rate": 8.005978026407296e-06, "loss": 0.0013, "step": 111030 }, { "epoch": 0.730511897791491, "grad_norm": 0.053283385342061514, "learning_rate": 8.005519233424474e-06, "loss": 0.0007, "step": 111040 }, { "epoch": 0.7305776859667243, "grad_norm": 0.0009561536514363675, "learning_rate": 8.005060400816478e-06, "loss": 0.0012, "step": 111050 }, { "epoch": 0.7306434741419577, "grad_norm": 0.02998770793272984, "learning_rate": 8.004601528589355e-06, "loss": 0.001, "step": 111060 }, { "epoch": 0.7307092623171911, "grad_norm": 0.005244408597123339, "learning_rate": 8.004142616749155e-06, "loss": 0.0005, "step": 111070 }, { "epoch": 0.7307750504924245, "grad_norm": 0.10784535810674391, "learning_rate": 8.00368366530193e-06, "loss": 0.0021, "step": 111080 }, { "epoch": 0.7308408386676579, "grad_norm": 0.07419283510343458, "learning_rate": 8.003224674253729e-06, "loss": 0.0012, "step": 111090 }, { "epoch": 0.7309066268428912, "grad_norm": 0.03962213801579518, "learning_rate": 8.002765643610606e-06, "loss": 0.0008, "step": 111100 }, { "epoch": 0.7309724150181246, "grad_norm": 0.10325997137105992, "learning_rate": 8.002306573378609e-06, "loss": 0.0012, "step": 111110 }, { "epoch": 0.731038203193358, "grad_norm": 0.09240526913631172, "learning_rate": 8.001847463563794e-06, "loss": 0.001, "step": 111120 }, { "epoch": 0.7311039913685914, "grad_norm": 0.004663156561451518, "learning_rate": 8.001388314172214e-06, "loss": 0.0012, "step": 111130 }, { "epoch": 0.7311697795438248, "grad_norm": 0.0792240911320094, "learning_rate": 8.000929125209919e-06, "loss": 0.0017, "step": 111140 }, { "epoch": 0.7312355677190582, "grad_norm": 0.10538013942656572, "learning_rate": 8.000469896682967e-06, "loss": 0.003, "step": 111150 }, { "epoch": 0.7313013558942916, "grad_norm": 0.016135609497784944, "learning_rate": 8.000010628597411e-06, "loss": 0.0025, "step": 111160 }, { "epoch": 0.731367144069525, "grad_norm": 0.012151908322149274, "learning_rate": 7.999551320959305e-06, "loss": 0.001, "step": 111170 }, { "epoch": 0.7314329322447584, "grad_norm": 0.13641701089773575, "learning_rate": 7.999091973774705e-06, "loss": 0.0014, "step": 111180 }, { "epoch": 0.7314987204199918, "grad_norm": 0.07121856679298251, "learning_rate": 7.998632587049668e-06, "loss": 0.0011, "step": 111190 }, { "epoch": 0.7315645085952251, "grad_norm": 0.03367156678741338, "learning_rate": 7.99817316079025e-06, "loss": 0.0011, "step": 111200 }, { "epoch": 0.7316302967704584, "grad_norm": 0.005081278573993789, "learning_rate": 7.997713695002509e-06, "loss": 0.0011, "step": 111210 }, { "epoch": 0.7316960849456918, "grad_norm": 0.07767343859968864, "learning_rate": 7.997254189692501e-06, "loss": 0.0017, "step": 111220 }, { "epoch": 0.7317618731209252, "grad_norm": 0.051294356021243846, "learning_rate": 7.996794644866286e-06, "loss": 0.0013, "step": 111230 }, { "epoch": 0.7318276612961586, "grad_norm": 0.034607477427318886, "learning_rate": 7.99633506052992e-06, "loss": 0.0011, "step": 111240 }, { "epoch": 0.731893449471392, "grad_norm": 0.007488860627423611, "learning_rate": 7.995875436689466e-06, "loss": 0.0007, "step": 111250 }, { "epoch": 0.7319592376466254, "grad_norm": 0.04508825220767637, "learning_rate": 7.995415773350982e-06, "loss": 0.0014, "step": 111260 }, { "epoch": 0.7320250258218588, "grad_norm": 0.08642978220060821, "learning_rate": 7.994956070520528e-06, "loss": 0.0013, "step": 111270 }, { "epoch": 0.7320908139970922, "grad_norm": 0.05615589526680587, "learning_rate": 7.994496328204163e-06, "loss": 0.001, "step": 111280 }, { "epoch": 0.7321566021723256, "grad_norm": 0.055446952160112015, "learning_rate": 7.99403654640795e-06, "loss": 0.0009, "step": 111290 }, { "epoch": 0.732222390347559, "grad_norm": 0.22747627408996152, "learning_rate": 7.993576725137952e-06, "loss": 0.0015, "step": 111300 }, { "epoch": 0.7322881785227923, "grad_norm": 0.004093395237270132, "learning_rate": 7.993116864400231e-06, "loss": 0.0014, "step": 111310 }, { "epoch": 0.7323539666980257, "grad_norm": 0.06354238064038607, "learning_rate": 7.992656964200848e-06, "loss": 0.0022, "step": 111320 }, { "epoch": 0.732419754873259, "grad_norm": 0.010072046774276268, "learning_rate": 7.99219702454587e-06, "loss": 0.0016, "step": 111330 }, { "epoch": 0.7324855430484924, "grad_norm": 0.040616740550472714, "learning_rate": 7.991737045441356e-06, "loss": 0.0007, "step": 111340 }, { "epoch": 0.7325513312237258, "grad_norm": 0.03193119258111681, "learning_rate": 7.991277026893374e-06, "loss": 0.0013, "step": 111350 }, { "epoch": 0.7326171193989592, "grad_norm": 0.020280534979635455, "learning_rate": 7.990816968907987e-06, "loss": 0.0013, "step": 111360 }, { "epoch": 0.7326829075741926, "grad_norm": 0.039862580211921336, "learning_rate": 7.990356871491261e-06, "loss": 0.0025, "step": 111370 }, { "epoch": 0.732748695749426, "grad_norm": 0.013723086667777778, "learning_rate": 7.989896734649263e-06, "loss": 0.0019, "step": 111380 }, { "epoch": 0.7328144839246594, "grad_norm": 0.02915233268902942, "learning_rate": 7.989436558388058e-06, "loss": 0.0012, "step": 111390 }, { "epoch": 0.7328802720998928, "grad_norm": 0.013543495462788614, "learning_rate": 7.988976342713715e-06, "loss": 0.0012, "step": 111400 }, { "epoch": 0.7329460602751261, "grad_norm": 0.06696931906990909, "learning_rate": 7.9885160876323e-06, "loss": 0.003, "step": 111410 }, { "epoch": 0.7330118484503595, "grad_norm": 0.13414229286598728, "learning_rate": 7.98805579314988e-06, "loss": 0.0017, "step": 111420 }, { "epoch": 0.7330776366255929, "grad_norm": 0.02673300988646207, "learning_rate": 7.987595459272529e-06, "loss": 0.0016, "step": 111430 }, { "epoch": 0.7331434248008263, "grad_norm": 0.03695902650757251, "learning_rate": 7.987135086006308e-06, "loss": 0.0017, "step": 111440 }, { "epoch": 0.7332092129760597, "grad_norm": 0.059774792812483185, "learning_rate": 7.986674673357291e-06, "loss": 0.001, "step": 111450 }, { "epoch": 0.7332750011512931, "grad_norm": 0.062009187929283116, "learning_rate": 7.98621422133155e-06, "loss": 0.0011, "step": 111460 }, { "epoch": 0.7333407893265265, "grad_norm": 0.06302656424009315, "learning_rate": 7.985753729935152e-06, "loss": 0.0006, "step": 111470 }, { "epoch": 0.7334065775017599, "grad_norm": 0.21982215386353499, "learning_rate": 7.98529319917417e-06, "loss": 0.0019, "step": 111480 }, { "epoch": 0.7334723656769933, "grad_norm": 0.05771232680348224, "learning_rate": 7.984832629054678e-06, "loss": 0.0009, "step": 111490 }, { "epoch": 0.7335381538522266, "grad_norm": 0.08020053261406716, "learning_rate": 7.984372019582742e-06, "loss": 0.0019, "step": 111500 }, { "epoch": 0.7336039420274599, "grad_norm": 0.1768342551347376, "learning_rate": 7.98391137076444e-06, "loss": 0.0013, "step": 111510 }, { "epoch": 0.7336697302026933, "grad_norm": 0.01803205772328233, "learning_rate": 7.983450682605845e-06, "loss": 0.001, "step": 111520 }, { "epoch": 0.7337355183779267, "grad_norm": 0.5542198395119204, "learning_rate": 7.982989955113028e-06, "loss": 0.0014, "step": 111530 }, { "epoch": 0.7338013065531601, "grad_norm": 0.01785999886800021, "learning_rate": 7.982529188292063e-06, "loss": 0.0019, "step": 111540 }, { "epoch": 0.7338670947283935, "grad_norm": 0.047121732555943756, "learning_rate": 7.982068382149028e-06, "loss": 0.0006, "step": 111550 }, { "epoch": 0.7339328829036269, "grad_norm": 0.12653140712692237, "learning_rate": 7.981607536689997e-06, "loss": 0.001, "step": 111560 }, { "epoch": 0.7339986710788603, "grad_norm": 0.008774070574101699, "learning_rate": 7.981146651921045e-06, "loss": 0.0013, "step": 111570 }, { "epoch": 0.7340644592540937, "grad_norm": 0.05533306788204825, "learning_rate": 7.980685727848251e-06, "loss": 0.0025, "step": 111580 }, { "epoch": 0.7341302474293271, "grad_norm": 0.0055644689217172316, "learning_rate": 7.980224764477688e-06, "loss": 0.0011, "step": 111590 }, { "epoch": 0.7341960356045605, "grad_norm": 0.03538837840848345, "learning_rate": 7.979763761815437e-06, "loss": 0.0007, "step": 111600 }, { "epoch": 0.7342618237797938, "grad_norm": 0.03074894435812508, "learning_rate": 7.979302719867575e-06, "loss": 0.0012, "step": 111610 }, { "epoch": 0.7343276119550272, "grad_norm": 0.039675532489754424, "learning_rate": 7.978841638640177e-06, "loss": 0.0009, "step": 111620 }, { "epoch": 0.7343934001302606, "grad_norm": 0.021782466607372795, "learning_rate": 7.978380518139325e-06, "loss": 0.001, "step": 111630 }, { "epoch": 0.734459188305494, "grad_norm": 0.03359234617026612, "learning_rate": 7.9779193583711e-06, "loss": 0.0008, "step": 111640 }, { "epoch": 0.7345249764807273, "grad_norm": 0.0018527996129287262, "learning_rate": 7.97745815934158e-06, "loss": 0.0013, "step": 111650 }, { "epoch": 0.7345907646559607, "grad_norm": 0.07662048407192956, "learning_rate": 7.976996921056843e-06, "loss": 0.0024, "step": 111660 }, { "epoch": 0.7346565528311941, "grad_norm": 0.08327612642025076, "learning_rate": 7.976535643522976e-06, "loss": 0.0019, "step": 111670 }, { "epoch": 0.7347223410064275, "grad_norm": 0.0871773132743369, "learning_rate": 7.976074326746055e-06, "loss": 0.0029, "step": 111680 }, { "epoch": 0.7347881291816609, "grad_norm": 0.11800739757548806, "learning_rate": 7.975612970732165e-06, "loss": 0.0069, "step": 111690 }, { "epoch": 0.7348539173568943, "grad_norm": 0.050567596691393775, "learning_rate": 7.975151575487388e-06, "loss": 0.0021, "step": 111700 }, { "epoch": 0.7349197055321277, "grad_norm": 0.004966657527237683, "learning_rate": 7.974690141017809e-06, "loss": 0.0016, "step": 111710 }, { "epoch": 0.734985493707361, "grad_norm": 0.24722752704257642, "learning_rate": 7.974228667329507e-06, "loss": 0.0028, "step": 111720 }, { "epoch": 0.7350512818825944, "grad_norm": 0.013651282097319509, "learning_rate": 7.973767154428568e-06, "loss": 0.0027, "step": 111730 }, { "epoch": 0.7351170700578278, "grad_norm": 0.053592338708629395, "learning_rate": 7.973305602321079e-06, "loss": 0.0009, "step": 111740 }, { "epoch": 0.7351828582330612, "grad_norm": 0.0539201161141238, "learning_rate": 7.972844011013124e-06, "loss": 0.0025, "step": 111750 }, { "epoch": 0.7352486464082946, "grad_norm": 0.13631031989802683, "learning_rate": 7.972382380510787e-06, "loss": 0.0017, "step": 111760 }, { "epoch": 0.735314434583528, "grad_norm": 0.03420408425218041, "learning_rate": 7.971920710820156e-06, "loss": 0.0011, "step": 111770 }, { "epoch": 0.7353802227587614, "grad_norm": 0.06474267843499311, "learning_rate": 7.971459001947318e-06, "loss": 0.0012, "step": 111780 }, { "epoch": 0.7354460109339948, "grad_norm": 0.16852909520834844, "learning_rate": 7.970997253898359e-06, "loss": 0.0028, "step": 111790 }, { "epoch": 0.7355117991092281, "grad_norm": 0.04436394944917556, "learning_rate": 7.970535466679365e-06, "loss": 0.0013, "step": 111800 }, { "epoch": 0.7355775872844615, "grad_norm": 0.09529684035502169, "learning_rate": 7.97007364029643e-06, "loss": 0.0007, "step": 111810 }, { "epoch": 0.7356433754596948, "grad_norm": 0.0178686306078933, "learning_rate": 7.969611774755637e-06, "loss": 0.001, "step": 111820 }, { "epoch": 0.7357091636349282, "grad_norm": 0.015483339617088253, "learning_rate": 7.969149870063077e-06, "loss": 0.0005, "step": 111830 }, { "epoch": 0.7357749518101616, "grad_norm": 0.010329664204216962, "learning_rate": 7.96868792622484e-06, "loss": 0.0008, "step": 111840 }, { "epoch": 0.735840739985395, "grad_norm": 0.04997539297870549, "learning_rate": 7.96822594324702e-06, "loss": 0.0026, "step": 111850 }, { "epoch": 0.7359065281606284, "grad_norm": 0.07806065971235954, "learning_rate": 7.9677639211357e-06, "loss": 0.0005, "step": 111860 }, { "epoch": 0.7359723163358618, "grad_norm": 0.044282692233428574, "learning_rate": 7.967301859896979e-06, "loss": 0.0011, "step": 111870 }, { "epoch": 0.7360381045110952, "grad_norm": 0.12499530948555576, "learning_rate": 7.966839759536944e-06, "loss": 0.0014, "step": 111880 }, { "epoch": 0.7361038926863286, "grad_norm": 0.021511379420839184, "learning_rate": 7.96637762006169e-06, "loss": 0.0031, "step": 111890 }, { "epoch": 0.736169680861562, "grad_norm": 0.04550769354532235, "learning_rate": 7.965915441477308e-06, "loss": 0.0012, "step": 111900 }, { "epoch": 0.7362354690367954, "grad_norm": 0.011450579184013343, "learning_rate": 7.965453223789894e-06, "loss": 0.0027, "step": 111910 }, { "epoch": 0.7363012572120287, "grad_norm": 0.011870075289337489, "learning_rate": 7.964990967005539e-06, "loss": 0.0009, "step": 111920 }, { "epoch": 0.736367045387262, "grad_norm": 0.015071728105714924, "learning_rate": 7.96452867113034e-06, "loss": 0.0012, "step": 111930 }, { "epoch": 0.7364328335624954, "grad_norm": 0.040565108944702284, "learning_rate": 7.964066336170388e-06, "loss": 0.0024, "step": 111940 }, { "epoch": 0.7364986217377288, "grad_norm": 0.03254593693850824, "learning_rate": 7.963603962131785e-06, "loss": 0.0009, "step": 111950 }, { "epoch": 0.7365644099129622, "grad_norm": 0.026862468481606408, "learning_rate": 7.963141549020621e-06, "loss": 0.0011, "step": 111960 }, { "epoch": 0.7366301980881956, "grad_norm": 0.0003410934138338389, "learning_rate": 7.962679096842996e-06, "loss": 0.001, "step": 111970 }, { "epoch": 0.736695986263429, "grad_norm": 0.05386923980682002, "learning_rate": 7.962216605605005e-06, "loss": 0.0005, "step": 111980 }, { "epoch": 0.7367617744386624, "grad_norm": 0.02751443035096086, "learning_rate": 7.961754075312747e-06, "loss": 0.0009, "step": 111990 }, { "epoch": 0.7368275626138958, "grad_norm": 0.003772241208001221, "learning_rate": 7.961291505972319e-06, "loss": 0.0008, "step": 112000 }, { "epoch": 0.7368933507891292, "grad_norm": 0.08890796558263986, "learning_rate": 7.960828897589821e-06, "loss": 0.0018, "step": 112010 }, { "epoch": 0.7369591389643625, "grad_norm": 0.04101441293604031, "learning_rate": 7.96036625017135e-06, "loss": 0.002, "step": 112020 }, { "epoch": 0.7370249271395959, "grad_norm": 0.049160672530891444, "learning_rate": 7.959903563723006e-06, "loss": 0.0025, "step": 112030 }, { "epoch": 0.7370907153148293, "grad_norm": 0.026628302333636104, "learning_rate": 7.95944083825089e-06, "loss": 0.0012, "step": 112040 }, { "epoch": 0.7371565034900627, "grad_norm": 0.08476599973755115, "learning_rate": 7.958978073761106e-06, "loss": 0.0011, "step": 112050 }, { "epoch": 0.7372222916652961, "grad_norm": 0.0059160794768054826, "learning_rate": 7.958515270259748e-06, "loss": 0.001, "step": 112060 }, { "epoch": 0.7372880798405295, "grad_norm": 0.20565211126645558, "learning_rate": 7.958052427752922e-06, "loss": 0.0031, "step": 112070 }, { "epoch": 0.7373538680157629, "grad_norm": 0.051252945411675604, "learning_rate": 7.957589546246731e-06, "loss": 0.003, "step": 112080 }, { "epoch": 0.7374196561909963, "grad_norm": 0.012535033367977021, "learning_rate": 7.957126625747275e-06, "loss": 0.0013, "step": 112090 }, { "epoch": 0.7374854443662296, "grad_norm": 0.046052720345579445, "learning_rate": 7.956663666260658e-06, "loss": 0.0011, "step": 112100 }, { "epoch": 0.737551232541463, "grad_norm": 0.08550989164137895, "learning_rate": 7.956200667792985e-06, "loss": 0.0007, "step": 112110 }, { "epoch": 0.7376170207166963, "grad_norm": 0.11541017915812381, "learning_rate": 7.95573763035036e-06, "loss": 0.0024, "step": 112120 }, { "epoch": 0.7376828088919297, "grad_norm": 0.20818046316199545, "learning_rate": 7.955274553938884e-06, "loss": 0.0017, "step": 112130 }, { "epoch": 0.7377485970671631, "grad_norm": 0.0974848874365866, "learning_rate": 7.954811438564668e-06, "loss": 0.0019, "step": 112140 }, { "epoch": 0.7378143852423965, "grad_norm": 0.14492593894440003, "learning_rate": 7.954348284233815e-06, "loss": 0.0014, "step": 112150 }, { "epoch": 0.7378801734176299, "grad_norm": 0.026182487681038587, "learning_rate": 7.95388509095243e-06, "loss": 0.0011, "step": 112160 }, { "epoch": 0.7379459615928633, "grad_norm": 0.10106555224455786, "learning_rate": 7.953421858726622e-06, "loss": 0.0007, "step": 112170 }, { "epoch": 0.7380117497680967, "grad_norm": 0.021476889384876165, "learning_rate": 7.952958587562498e-06, "loss": 0.0013, "step": 112180 }, { "epoch": 0.7380775379433301, "grad_norm": 0.09525481897730997, "learning_rate": 7.952495277466166e-06, "loss": 0.0019, "step": 112190 }, { "epoch": 0.7381433261185635, "grad_norm": 0.026702165091395294, "learning_rate": 7.952031928443731e-06, "loss": 0.0014, "step": 112200 }, { "epoch": 0.7382091142937969, "grad_norm": 0.11938614608761013, "learning_rate": 7.951568540501306e-06, "loss": 0.0015, "step": 112210 }, { "epoch": 0.7382749024690303, "grad_norm": 0.05189183144193795, "learning_rate": 7.951105113645e-06, "loss": 0.0011, "step": 112220 }, { "epoch": 0.7383406906442636, "grad_norm": 0.02444489211458525, "learning_rate": 7.95064164788092e-06, "loss": 0.0011, "step": 112230 }, { "epoch": 0.738406478819497, "grad_norm": 0.07033797415354898, "learning_rate": 7.950178143215179e-06, "loss": 0.0019, "step": 112240 }, { "epoch": 0.7384722669947303, "grad_norm": 0.07293889891105361, "learning_rate": 7.949714599653888e-06, "loss": 0.0018, "step": 112250 }, { "epoch": 0.7385380551699637, "grad_norm": 0.11535803680053787, "learning_rate": 7.949251017203157e-06, "loss": 0.0065, "step": 112260 }, { "epoch": 0.7386038433451971, "grad_norm": 0.19496821527242378, "learning_rate": 7.948787395869098e-06, "loss": 0.0102, "step": 112270 }, { "epoch": 0.7386696315204305, "grad_norm": 0.05357994501201249, "learning_rate": 7.948323735657824e-06, "loss": 0.0012, "step": 112280 }, { "epoch": 0.7387354196956639, "grad_norm": 0.044536894375966564, "learning_rate": 7.947860036575448e-06, "loss": 0.0012, "step": 112290 }, { "epoch": 0.7388012078708973, "grad_norm": 0.14441332756868777, "learning_rate": 7.947396298628085e-06, "loss": 0.002, "step": 112300 }, { "epoch": 0.7388669960461307, "grad_norm": 0.0956214071499385, "learning_rate": 7.946932521821845e-06, "loss": 0.0012, "step": 112310 }, { "epoch": 0.7389327842213641, "grad_norm": 0.02856313406634979, "learning_rate": 7.946468706162847e-06, "loss": 0.001, "step": 112320 }, { "epoch": 0.7389985723965974, "grad_norm": 0.09938085277810764, "learning_rate": 7.9460048516572e-06, "loss": 0.0047, "step": 112330 }, { "epoch": 0.7390643605718308, "grad_norm": 0.020260617736421266, "learning_rate": 7.945540958311026e-06, "loss": 0.0014, "step": 112340 }, { "epoch": 0.7391301487470642, "grad_norm": 0.026189136916543676, "learning_rate": 7.94507702613044e-06, "loss": 0.0015, "step": 112350 }, { "epoch": 0.7391959369222976, "grad_norm": 0.10091263486865058, "learning_rate": 7.944613055121556e-06, "loss": 0.0029, "step": 112360 }, { "epoch": 0.739261725097531, "grad_norm": 0.11263510396185972, "learning_rate": 7.94414904529049e-06, "loss": 0.0015, "step": 112370 }, { "epoch": 0.7393275132727644, "grad_norm": 0.003336915100804221, "learning_rate": 7.943684996643363e-06, "loss": 0.0014, "step": 112380 }, { "epoch": 0.7393933014479978, "grad_norm": 0.008213405689185658, "learning_rate": 7.94322090918629e-06, "loss": 0.0008, "step": 112390 }, { "epoch": 0.7394590896232311, "grad_norm": 0.0011042561322494415, "learning_rate": 7.942756782925393e-06, "loss": 0.0012, "step": 112400 }, { "epoch": 0.7395248777984645, "grad_norm": 0.025764931327142252, "learning_rate": 7.94229261786679e-06, "loss": 0.001, "step": 112410 }, { "epoch": 0.7395906659736979, "grad_norm": 0.0367475574562075, "learning_rate": 7.941828414016598e-06, "loss": 0.0013, "step": 112420 }, { "epoch": 0.7396564541489312, "grad_norm": 0.062012448665958946, "learning_rate": 7.941364171380942e-06, "loss": 0.0012, "step": 112430 }, { "epoch": 0.7397222423241646, "grad_norm": 0.02895223538064951, "learning_rate": 7.940899889965938e-06, "loss": 0.001, "step": 112440 }, { "epoch": 0.739788030499398, "grad_norm": 0.04143269218941842, "learning_rate": 7.940435569777708e-06, "loss": 0.0006, "step": 112450 }, { "epoch": 0.7398538186746314, "grad_norm": 0.020334975054862165, "learning_rate": 7.939971210822375e-06, "loss": 0.0024, "step": 112460 }, { "epoch": 0.7399196068498648, "grad_norm": 0.03607051768979666, "learning_rate": 7.939506813106061e-06, "loss": 0.0021, "step": 112470 }, { "epoch": 0.7399853950250982, "grad_norm": 0.1782350463555464, "learning_rate": 7.939042376634889e-06, "loss": 0.0015, "step": 112480 }, { "epoch": 0.7400511832003316, "grad_norm": 0.018496648730378654, "learning_rate": 7.93857790141498e-06, "loss": 0.0033, "step": 112490 }, { "epoch": 0.740116971375565, "grad_norm": 0.02345432641687881, "learning_rate": 7.93811338745246e-06, "loss": 0.0018, "step": 112500 }, { "epoch": 0.7401827595507984, "grad_norm": 0.00895689423649889, "learning_rate": 7.937648834753453e-06, "loss": 0.0033, "step": 112510 }, { "epoch": 0.7402485477260318, "grad_norm": 0.05195305942108502, "learning_rate": 7.937184243324084e-06, "loss": 0.0017, "step": 112520 }, { "epoch": 0.740314335901265, "grad_norm": 0.10777615653012693, "learning_rate": 7.936719613170476e-06, "loss": 0.0011, "step": 112530 }, { "epoch": 0.7403801240764984, "grad_norm": 0.03615054384352429, "learning_rate": 7.936254944298757e-06, "loss": 0.003, "step": 112540 }, { "epoch": 0.7404459122517318, "grad_norm": 0.07200117231890953, "learning_rate": 7.93579023671505e-06, "loss": 0.0015, "step": 112550 }, { "epoch": 0.7405117004269652, "grad_norm": 0.04880998697444805, "learning_rate": 7.935325490425487e-06, "loss": 0.0023, "step": 112560 }, { "epoch": 0.7405774886021986, "grad_norm": 0.021713346803358673, "learning_rate": 7.934860705436194e-06, "loss": 0.0011, "step": 112570 }, { "epoch": 0.740643276777432, "grad_norm": 0.042933281682640746, "learning_rate": 7.934395881753295e-06, "loss": 0.0007, "step": 112580 }, { "epoch": 0.7407090649526654, "grad_norm": 0.06474371258344638, "learning_rate": 7.93393101938292e-06, "loss": 0.0016, "step": 112590 }, { "epoch": 0.7407748531278988, "grad_norm": 0.08856966542386636, "learning_rate": 7.933466118331199e-06, "loss": 0.0011, "step": 112600 }, { "epoch": 0.7408406413031322, "grad_norm": 0.05360011719303223, "learning_rate": 7.933001178604262e-06, "loss": 0.0018, "step": 112610 }, { "epoch": 0.7409064294783656, "grad_norm": 0.03373925967879749, "learning_rate": 7.932536200208237e-06, "loss": 0.0012, "step": 112620 }, { "epoch": 0.740972217653599, "grad_norm": 0.026956176245144032, "learning_rate": 7.932071183149252e-06, "loss": 0.001, "step": 112630 }, { "epoch": 0.7410380058288323, "grad_norm": 0.031761460045237284, "learning_rate": 7.931606127433444e-06, "loss": 0.0016, "step": 112640 }, { "epoch": 0.7411037940040657, "grad_norm": 0.07047677374365043, "learning_rate": 7.93114103306694e-06, "loss": 0.0013, "step": 112650 }, { "epoch": 0.7411695821792991, "grad_norm": 0.07988880942387376, "learning_rate": 7.930675900055875e-06, "loss": 0.0012, "step": 112660 }, { "epoch": 0.7412353703545325, "grad_norm": 0.010684187387547128, "learning_rate": 7.930210728406377e-06, "loss": 0.0013, "step": 112670 }, { "epoch": 0.7413011585297659, "grad_norm": 0.03921855260405153, "learning_rate": 7.929745518124583e-06, "loss": 0.009, "step": 112680 }, { "epoch": 0.7413669467049993, "grad_norm": 0.7890881863928728, "learning_rate": 7.929280269216624e-06, "loss": 0.0017, "step": 112690 }, { "epoch": 0.7414327348802326, "grad_norm": 0.07000380741594713, "learning_rate": 7.928814981688632e-06, "loss": 0.0013, "step": 112700 }, { "epoch": 0.741498523055466, "grad_norm": 0.029491457642508344, "learning_rate": 7.928349655546747e-06, "loss": 0.0009, "step": 112710 }, { "epoch": 0.7415643112306994, "grad_norm": 0.06361844325044692, "learning_rate": 7.9278842907971e-06, "loss": 0.0007, "step": 112720 }, { "epoch": 0.7416300994059328, "grad_norm": 0.04906219803748319, "learning_rate": 7.927418887445827e-06, "loss": 0.0012, "step": 112730 }, { "epoch": 0.7416958875811661, "grad_norm": 0.0005724536772158844, "learning_rate": 7.926953445499064e-06, "loss": 0.001, "step": 112740 }, { "epoch": 0.7417616757563995, "grad_norm": 0.01827115860275571, "learning_rate": 7.92648796496295e-06, "loss": 0.0007, "step": 112750 }, { "epoch": 0.7418274639316329, "grad_norm": 0.08075391438826326, "learning_rate": 7.926022445843617e-06, "loss": 0.0015, "step": 112760 }, { "epoch": 0.7418932521068663, "grad_norm": 0.05716593341266755, "learning_rate": 7.925556888147205e-06, "loss": 0.0011, "step": 112770 }, { "epoch": 0.7419590402820997, "grad_norm": 0.14950633705786878, "learning_rate": 7.925091291879854e-06, "loss": 0.0013, "step": 112780 }, { "epoch": 0.7420248284573331, "grad_norm": 0.00920000764340111, "learning_rate": 7.9246256570477e-06, "loss": 0.0011, "step": 112790 }, { "epoch": 0.7420906166325665, "grad_norm": 0.17156973750493804, "learning_rate": 7.924159983656882e-06, "loss": 0.0025, "step": 112800 }, { "epoch": 0.7421564048077999, "grad_norm": 0.05885746907893517, "learning_rate": 7.92369427171354e-06, "loss": 0.0019, "step": 112810 }, { "epoch": 0.7422221929830333, "grad_norm": 0.05437006470226237, "learning_rate": 7.923228521223815e-06, "loss": 0.0008, "step": 112820 }, { "epoch": 0.7422879811582667, "grad_norm": 0.002187204644316173, "learning_rate": 7.922762732193846e-06, "loss": 0.001, "step": 112830 }, { "epoch": 0.7423537693335, "grad_norm": 0.012693901554475065, "learning_rate": 7.922296904629774e-06, "loss": 0.0008, "step": 112840 }, { "epoch": 0.7424195575087333, "grad_norm": 0.11988623266044615, "learning_rate": 7.921831038537742e-06, "loss": 0.0014, "step": 112850 }, { "epoch": 0.7424853456839667, "grad_norm": 0.005356639126400966, "learning_rate": 7.92136513392389e-06, "loss": 0.0007, "step": 112860 }, { "epoch": 0.7425511338592001, "grad_norm": 0.23032346406423282, "learning_rate": 7.920899190794364e-06, "loss": 0.0028, "step": 112870 }, { "epoch": 0.7426169220344335, "grad_norm": 0.04542395090964114, "learning_rate": 7.920433209155304e-06, "loss": 0.0012, "step": 112880 }, { "epoch": 0.7426827102096669, "grad_norm": 0.021864099713683312, "learning_rate": 7.919967189012854e-06, "loss": 0.0023, "step": 112890 }, { "epoch": 0.7427484983849003, "grad_norm": 0.009749399189822107, "learning_rate": 7.919501130373157e-06, "loss": 0.0007, "step": 112900 }, { "epoch": 0.7428142865601337, "grad_norm": 0.23200666342914775, "learning_rate": 7.919035033242361e-06, "loss": 0.001, "step": 112910 }, { "epoch": 0.7428800747353671, "grad_norm": 0.019194048735149266, "learning_rate": 7.91856889762661e-06, "loss": 0.0015, "step": 112920 }, { "epoch": 0.7429458629106005, "grad_norm": 0.08640548886275189, "learning_rate": 7.918102723532045e-06, "loss": 0.0011, "step": 112930 }, { "epoch": 0.7430116510858338, "grad_norm": 0.09159593521655525, "learning_rate": 7.91763651096482e-06, "loss": 0.0009, "step": 112940 }, { "epoch": 0.7430774392610672, "grad_norm": 0.06295992974362723, "learning_rate": 7.917170259931075e-06, "loss": 0.0019, "step": 112950 }, { "epoch": 0.7431432274363006, "grad_norm": 0.02224082032115617, "learning_rate": 7.91670397043696e-06, "loss": 0.0009, "step": 112960 }, { "epoch": 0.743209015611534, "grad_norm": 0.056674727457451536, "learning_rate": 7.916237642488624e-06, "loss": 0.001, "step": 112970 }, { "epoch": 0.7432748037867674, "grad_norm": 0.03769124678682129, "learning_rate": 7.91577127609221e-06, "loss": 0.0012, "step": 112980 }, { "epoch": 0.7433405919620008, "grad_norm": 0.06102867965453598, "learning_rate": 7.915304871253873e-06, "loss": 0.0013, "step": 112990 }, { "epoch": 0.7434063801372341, "grad_norm": 0.0608522183379988, "learning_rate": 7.914838427979758e-06, "loss": 0.0013, "step": 113000 }, { "epoch": 0.7434721683124675, "grad_norm": 0.02610406904696412, "learning_rate": 7.914371946276015e-06, "loss": 0.0013, "step": 113010 }, { "epoch": 0.7435379564877009, "grad_norm": 0.07194144316280471, "learning_rate": 7.913905426148795e-06, "loss": 0.0017, "step": 113020 }, { "epoch": 0.7436037446629343, "grad_norm": 0.07420155127041732, "learning_rate": 7.913438867604249e-06, "loss": 0.0018, "step": 113030 }, { "epoch": 0.7436695328381676, "grad_norm": 0.18820748413896557, "learning_rate": 7.91297227064853e-06, "loss": 0.0028, "step": 113040 }, { "epoch": 0.743735321013401, "grad_norm": 0.329045684099634, "learning_rate": 7.912505635287783e-06, "loss": 0.001, "step": 113050 }, { "epoch": 0.7438011091886344, "grad_norm": 0.01628516955529594, "learning_rate": 7.912038961528167e-06, "loss": 0.0021, "step": 113060 }, { "epoch": 0.7438668973638678, "grad_norm": 0.08698005393899318, "learning_rate": 7.911572249375831e-06, "loss": 0.001, "step": 113070 }, { "epoch": 0.7439326855391012, "grad_norm": 0.04667823206186361, "learning_rate": 7.911105498836932e-06, "loss": 0.001, "step": 113080 }, { "epoch": 0.7439984737143346, "grad_norm": 0.020065491635058445, "learning_rate": 7.91063870991762e-06, "loss": 0.0015, "step": 113090 }, { "epoch": 0.744064261889568, "grad_norm": 0.1521269931862249, "learning_rate": 7.910171882624049e-06, "loss": 0.0016, "step": 113100 }, { "epoch": 0.7441300500648014, "grad_norm": 0.2419954158520955, "learning_rate": 7.909705016962377e-06, "loss": 0.0016, "step": 113110 }, { "epoch": 0.7441958382400348, "grad_norm": 0.020916642730919095, "learning_rate": 7.909238112938758e-06, "loss": 0.0015, "step": 113120 }, { "epoch": 0.7442616264152682, "grad_norm": 0.2627542737113135, "learning_rate": 7.908771170559344e-06, "loss": 0.0013, "step": 113130 }, { "epoch": 0.7443274145905016, "grad_norm": 0.21234044578462788, "learning_rate": 7.908304189830296e-06, "loss": 0.0037, "step": 113140 }, { "epoch": 0.7443932027657348, "grad_norm": 0.02445405230551492, "learning_rate": 7.90783717075777e-06, "loss": 0.002, "step": 113150 }, { "epoch": 0.7444589909409682, "grad_norm": 0.06128763240688577, "learning_rate": 7.90737011334792e-06, "loss": 0.0088, "step": 113160 }, { "epoch": 0.7445247791162016, "grad_norm": 0.031452046863368055, "learning_rate": 7.906903017606908e-06, "loss": 0.0012, "step": 113170 }, { "epoch": 0.744590567291435, "grad_norm": 0.06347630355227485, "learning_rate": 7.906435883540889e-06, "loss": 0.0033, "step": 113180 }, { "epoch": 0.7446563554666684, "grad_norm": 0.11916800103791407, "learning_rate": 7.905968711156024e-06, "loss": 0.0017, "step": 113190 }, { "epoch": 0.7447221436419018, "grad_norm": 0.0673160561800219, "learning_rate": 7.90550150045847e-06, "loss": 0.0022, "step": 113200 }, { "epoch": 0.7447879318171352, "grad_norm": 0.028035826895900864, "learning_rate": 7.90503425145439e-06, "loss": 0.0015, "step": 113210 }, { "epoch": 0.7448537199923686, "grad_norm": 0.09043861855342121, "learning_rate": 7.90456696414994e-06, "loss": 0.0025, "step": 113220 }, { "epoch": 0.744919508167602, "grad_norm": 0.12613314490167105, "learning_rate": 7.904099638551286e-06, "loss": 0.0018, "step": 113230 }, { "epoch": 0.7449852963428354, "grad_norm": 0.05703776639451222, "learning_rate": 7.903632274664585e-06, "loss": 0.0016, "step": 113240 }, { "epoch": 0.7450510845180687, "grad_norm": 0.009772435297210865, "learning_rate": 7.903164872496e-06, "loss": 0.0012, "step": 113250 }, { "epoch": 0.7451168726933021, "grad_norm": 0.0006593807387788071, "learning_rate": 7.902697432051692e-06, "loss": 0.0013, "step": 113260 }, { "epoch": 0.7451826608685355, "grad_norm": 0.145689394110058, "learning_rate": 7.902229953337828e-06, "loss": 0.0039, "step": 113270 }, { "epoch": 0.7452484490437689, "grad_norm": 0.010818650773688007, "learning_rate": 7.901762436360567e-06, "loss": 0.0036, "step": 113280 }, { "epoch": 0.7453142372190023, "grad_norm": 0.020570834944065114, "learning_rate": 7.901294881126075e-06, "loss": 0.0024, "step": 113290 }, { "epoch": 0.7453800253942356, "grad_norm": 0.10779386091793167, "learning_rate": 7.900827287640516e-06, "loss": 0.0027, "step": 113300 }, { "epoch": 0.745445813569469, "grad_norm": 0.04714356713678083, "learning_rate": 7.900359655910054e-06, "loss": 0.0008, "step": 113310 }, { "epoch": 0.7455116017447024, "grad_norm": 0.016839465306878475, "learning_rate": 7.899891985940856e-06, "loss": 0.0016, "step": 113320 }, { "epoch": 0.7455773899199358, "grad_norm": 0.035630153788827425, "learning_rate": 7.899424277739085e-06, "loss": 0.0023, "step": 113330 }, { "epoch": 0.7456431780951692, "grad_norm": 0.045223484394823446, "learning_rate": 7.898956531310908e-06, "loss": 0.0011, "step": 113340 }, { "epoch": 0.7457089662704025, "grad_norm": 0.002862301434015096, "learning_rate": 7.898488746662495e-06, "loss": 0.0014, "step": 113350 }, { "epoch": 0.7457747544456359, "grad_norm": 0.07097648313692423, "learning_rate": 7.89802092380001e-06, "loss": 0.0021, "step": 113360 }, { "epoch": 0.7458405426208693, "grad_norm": 0.06141900372109058, "learning_rate": 7.897553062729624e-06, "loss": 0.002, "step": 113370 }, { "epoch": 0.7459063307961027, "grad_norm": 0.06017591389504623, "learning_rate": 7.897085163457503e-06, "loss": 0.0012, "step": 113380 }, { "epoch": 0.7459721189713361, "grad_norm": 0.18946964230561683, "learning_rate": 7.896617225989813e-06, "loss": 0.0017, "step": 113390 }, { "epoch": 0.7460379071465695, "grad_norm": 0.04766471439988206, "learning_rate": 7.89614925033273e-06, "loss": 0.0021, "step": 113400 }, { "epoch": 0.7461036953218029, "grad_norm": 0.05906308525566715, "learning_rate": 7.895681236492418e-06, "loss": 0.0009, "step": 113410 }, { "epoch": 0.7461694834970363, "grad_norm": 0.022243518904578374, "learning_rate": 7.89521318447505e-06, "loss": 0.0012, "step": 113420 }, { "epoch": 0.7462352716722697, "grad_norm": 0.06133766441175411, "learning_rate": 7.8947450942868e-06, "loss": 0.0009, "step": 113430 }, { "epoch": 0.7463010598475031, "grad_norm": 0.14645871500209812, "learning_rate": 7.894276965933833e-06, "loss": 0.0043, "step": 113440 }, { "epoch": 0.7463668480227363, "grad_norm": 0.03599030933094364, "learning_rate": 7.893808799422324e-06, "loss": 0.0008, "step": 113450 }, { "epoch": 0.7464326361979697, "grad_norm": 0.03349957312841673, "learning_rate": 7.893340594758445e-06, "loss": 0.0006, "step": 113460 }, { "epoch": 0.7464984243732031, "grad_norm": 0.004960983061533839, "learning_rate": 7.892872351948372e-06, "loss": 0.0013, "step": 113470 }, { "epoch": 0.7465642125484365, "grad_norm": 0.056831260743372566, "learning_rate": 7.892404070998271e-06, "loss": 0.0021, "step": 113480 }, { "epoch": 0.7466300007236699, "grad_norm": 0.01370556802230949, "learning_rate": 7.891935751914324e-06, "loss": 0.001, "step": 113490 }, { "epoch": 0.7466957888989033, "grad_norm": 0.01366233065386489, "learning_rate": 7.8914673947027e-06, "loss": 0.0011, "step": 113500 }, { "epoch": 0.7467615770741367, "grad_norm": 0.003843214261474415, "learning_rate": 7.890998999369577e-06, "loss": 0.0023, "step": 113510 }, { "epoch": 0.7468273652493701, "grad_norm": 0.08564942820330061, "learning_rate": 7.890530565921125e-06, "loss": 0.0009, "step": 113520 }, { "epoch": 0.7468931534246035, "grad_norm": 0.057776909447388836, "learning_rate": 7.890062094363528e-06, "loss": 0.001, "step": 113530 }, { "epoch": 0.7469589415998369, "grad_norm": 0.044633835764155916, "learning_rate": 7.88959358470296e-06, "loss": 0.0011, "step": 113540 }, { "epoch": 0.7470247297750703, "grad_norm": 0.08689509016297195, "learning_rate": 7.88912503694559e-06, "loss": 0.0009, "step": 113550 }, { "epoch": 0.7470905179503036, "grad_norm": 0.14827127658719175, "learning_rate": 7.888656451097607e-06, "loss": 0.0011, "step": 113560 }, { "epoch": 0.747156306125537, "grad_norm": 0.00981452675166354, "learning_rate": 7.888187827165181e-06, "loss": 0.0009, "step": 113570 }, { "epoch": 0.7472220943007704, "grad_norm": 0.12555470259276416, "learning_rate": 7.887719165154494e-06, "loss": 0.0015, "step": 113580 }, { "epoch": 0.7472878824760038, "grad_norm": 0.021444468883606533, "learning_rate": 7.887250465071723e-06, "loss": 0.0022, "step": 113590 }, { "epoch": 0.7473536706512371, "grad_norm": 0.11108170796607383, "learning_rate": 7.886781726923049e-06, "loss": 0.0029, "step": 113600 }, { "epoch": 0.7474194588264705, "grad_norm": 0.0848205932351646, "learning_rate": 7.88631295071465e-06, "loss": 0.0011, "step": 113610 }, { "epoch": 0.7474852470017039, "grad_norm": 0.02495491761395305, "learning_rate": 7.885844136452709e-06, "loss": 0.0008, "step": 113620 }, { "epoch": 0.7475510351769373, "grad_norm": 0.014178948739938598, "learning_rate": 7.885375284143403e-06, "loss": 0.0013, "step": 113630 }, { "epoch": 0.7476168233521707, "grad_norm": 0.023844268752624513, "learning_rate": 7.88490639379292e-06, "loss": 0.003, "step": 113640 }, { "epoch": 0.7476826115274041, "grad_norm": 0.04625942524761644, "learning_rate": 7.884437465407435e-06, "loss": 0.0008, "step": 113650 }, { "epoch": 0.7477483997026374, "grad_norm": 0.10584015890583542, "learning_rate": 7.883968498993134e-06, "loss": 0.0021, "step": 113660 }, { "epoch": 0.7478141878778708, "grad_norm": 0.02062056393501444, "learning_rate": 7.883499494556197e-06, "loss": 0.0005, "step": 113670 }, { "epoch": 0.7478799760531042, "grad_norm": 0.065322319521798, "learning_rate": 7.883030452102812e-06, "loss": 0.0015, "step": 113680 }, { "epoch": 0.7479457642283376, "grad_norm": 0.04430644036733797, "learning_rate": 7.88256137163916e-06, "loss": 0.0007, "step": 113690 }, { "epoch": 0.748011552403571, "grad_norm": 0.0072001471393475834, "learning_rate": 7.882092253171424e-06, "loss": 0.002, "step": 113700 }, { "epoch": 0.7480773405788044, "grad_norm": 0.013787014816925614, "learning_rate": 7.881623096705791e-06, "loss": 0.002, "step": 113710 }, { "epoch": 0.7481431287540378, "grad_norm": 0.03279540927013477, "learning_rate": 7.881153902248448e-06, "loss": 0.0017, "step": 113720 }, { "epoch": 0.7482089169292712, "grad_norm": 0.03036451306846756, "learning_rate": 7.880684669805579e-06, "loss": 0.0012, "step": 113730 }, { "epoch": 0.7482747051045046, "grad_norm": 0.01937274167418575, "learning_rate": 7.880215399383369e-06, "loss": 0.0014, "step": 113740 }, { "epoch": 0.748340493279738, "grad_norm": 0.021165175881201596, "learning_rate": 7.879746090988007e-06, "loss": 0.0011, "step": 113750 }, { "epoch": 0.7484062814549712, "grad_norm": 0.045914059013716015, "learning_rate": 7.879276744625682e-06, "loss": 0.001, "step": 113760 }, { "epoch": 0.7484720696302046, "grad_norm": 0.005209158302296648, "learning_rate": 7.878807360302577e-06, "loss": 0.001, "step": 113770 }, { "epoch": 0.748537857805438, "grad_norm": 0.043346100989638314, "learning_rate": 7.878337938024886e-06, "loss": 0.0015, "step": 113780 }, { "epoch": 0.7486036459806714, "grad_norm": 0.022786939587157214, "learning_rate": 7.877868477798792e-06, "loss": 0.0023, "step": 113790 }, { "epoch": 0.7486694341559048, "grad_norm": 0.06377351244563717, "learning_rate": 7.87739897963049e-06, "loss": 0.0016, "step": 113800 }, { "epoch": 0.7487352223311382, "grad_norm": 0.016983754634074442, "learning_rate": 7.876929443526167e-06, "loss": 0.0013, "step": 113810 }, { "epoch": 0.7488010105063716, "grad_norm": 0.037339878632223183, "learning_rate": 7.876459869492012e-06, "loss": 0.0012, "step": 113820 }, { "epoch": 0.748866798681605, "grad_norm": 0.0451636521860065, "learning_rate": 7.875990257534221e-06, "loss": 0.0012, "step": 113830 }, { "epoch": 0.7489325868568384, "grad_norm": 0.03603681101723742, "learning_rate": 7.87552060765898e-06, "loss": 0.0015, "step": 113840 }, { "epoch": 0.7489983750320718, "grad_norm": 0.057085117841965675, "learning_rate": 7.875050919872485e-06, "loss": 0.0013, "step": 113850 }, { "epoch": 0.7490641632073051, "grad_norm": 0.054157691366303794, "learning_rate": 7.874581194180926e-06, "loss": 0.0012, "step": 113860 }, { "epoch": 0.7491299513825385, "grad_norm": 0.05436823528513685, "learning_rate": 7.874111430590496e-06, "loss": 0.0021, "step": 113870 }, { "epoch": 0.7491957395577719, "grad_norm": 0.08857571857475736, "learning_rate": 7.87364162910739e-06, "loss": 0.0012, "step": 113880 }, { "epoch": 0.7492615277330053, "grad_norm": 0.05144483791387707, "learning_rate": 7.873171789737799e-06, "loss": 0.002, "step": 113890 }, { "epoch": 0.7493273159082386, "grad_norm": 0.03898097267938895, "learning_rate": 7.87270191248792e-06, "loss": 0.0022, "step": 113900 }, { "epoch": 0.749393104083472, "grad_norm": 0.03380630355866997, "learning_rate": 7.87223199736395e-06, "loss": 0.0009, "step": 113910 }, { "epoch": 0.7494588922587054, "grad_norm": 0.05070898550153458, "learning_rate": 7.87176204437208e-06, "loss": 0.0025, "step": 113920 }, { "epoch": 0.7495246804339388, "grad_norm": 0.054005697222871386, "learning_rate": 7.871292053518506e-06, "loss": 0.001, "step": 113930 }, { "epoch": 0.7495904686091722, "grad_norm": 0.1489314908177482, "learning_rate": 7.870822024809428e-06, "loss": 0.0009, "step": 113940 }, { "epoch": 0.7496562567844056, "grad_norm": 0.016999422448480066, "learning_rate": 7.87035195825104e-06, "loss": 0.0023, "step": 113950 }, { "epoch": 0.7497220449596389, "grad_norm": 0.12206802357744187, "learning_rate": 7.869881853849542e-06, "loss": 0.0011, "step": 113960 }, { "epoch": 0.7497878331348723, "grad_norm": 0.05917837364005042, "learning_rate": 7.86941171161113e-06, "loss": 0.0015, "step": 113970 }, { "epoch": 0.7498536213101057, "grad_norm": 0.10211570232554618, "learning_rate": 7.868941531542003e-06, "loss": 0.0019, "step": 113980 }, { "epoch": 0.7499194094853391, "grad_norm": 0.048185881411406395, "learning_rate": 7.868471313648359e-06, "loss": 0.0015, "step": 113990 }, { "epoch": 0.7499851976605725, "grad_norm": 0.06608721926785012, "learning_rate": 7.868001057936396e-06, "loss": 0.0007, "step": 114000 }, { "epoch": 0.7500509858358059, "grad_norm": 0.07528610301720004, "learning_rate": 7.86753076441232e-06, "loss": 0.0015, "step": 114010 }, { "epoch": 0.7501167740110393, "grad_norm": 0.051206422406366504, "learning_rate": 7.867060433082324e-06, "loss": 0.0014, "step": 114020 }, { "epoch": 0.7501825621862727, "grad_norm": 0.12810472441630505, "learning_rate": 7.866590063952615e-06, "loss": 0.0015, "step": 114030 }, { "epoch": 0.7502483503615061, "grad_norm": 0.03328457849832261, "learning_rate": 7.866119657029389e-06, "loss": 0.0005, "step": 114040 }, { "epoch": 0.7503141385367395, "grad_norm": 0.03015442633704035, "learning_rate": 7.865649212318853e-06, "loss": 0.001, "step": 114050 }, { "epoch": 0.7503799267119728, "grad_norm": 0.1211558706521593, "learning_rate": 7.865178729827205e-06, "loss": 0.0015, "step": 114060 }, { "epoch": 0.7504457148872061, "grad_norm": 0.05264368889658619, "learning_rate": 7.864708209560652e-06, "loss": 0.0009, "step": 114070 }, { "epoch": 0.7505115030624395, "grad_norm": 0.05643309918357155, "learning_rate": 7.864237651525395e-06, "loss": 0.0011, "step": 114080 }, { "epoch": 0.7505772912376729, "grad_norm": 0.046540526843948556, "learning_rate": 7.863767055727638e-06, "loss": 0.0009, "step": 114090 }, { "epoch": 0.7506430794129063, "grad_norm": 0.14248059886068123, "learning_rate": 7.863296422173585e-06, "loss": 0.0014, "step": 114100 }, { "epoch": 0.7507088675881397, "grad_norm": 0.016377071365037032, "learning_rate": 7.862825750869443e-06, "loss": 0.001, "step": 114110 }, { "epoch": 0.7507746557633731, "grad_norm": 0.018593402816471455, "learning_rate": 7.862355041821415e-06, "loss": 0.0013, "step": 114120 }, { "epoch": 0.7508404439386065, "grad_norm": 0.0598174404532731, "learning_rate": 7.861884295035707e-06, "loss": 0.0007, "step": 114130 }, { "epoch": 0.7509062321138399, "grad_norm": 0.04638420349350927, "learning_rate": 7.861413510518527e-06, "loss": 0.0022, "step": 114140 }, { "epoch": 0.7509720202890733, "grad_norm": 0.014473404722788389, "learning_rate": 7.860942688276084e-06, "loss": 0.0008, "step": 114150 }, { "epoch": 0.7510378084643067, "grad_norm": 0.042103172946298534, "learning_rate": 7.860471828314577e-06, "loss": 0.0012, "step": 114160 }, { "epoch": 0.75110359663954, "grad_norm": 0.022696595225150464, "learning_rate": 7.860000930640223e-06, "loss": 0.0008, "step": 114170 }, { "epoch": 0.7511693848147734, "grad_norm": 0.04296554708741935, "learning_rate": 7.859529995259228e-06, "loss": 0.0013, "step": 114180 }, { "epoch": 0.7512351729900067, "grad_norm": 0.05821474445834657, "learning_rate": 7.859059022177798e-06, "loss": 0.0015, "step": 114190 }, { "epoch": 0.7513009611652401, "grad_norm": 0.05987295492691131, "learning_rate": 7.858588011402141e-06, "loss": 0.0011, "step": 114200 }, { "epoch": 0.7513667493404735, "grad_norm": 0.031943013463767565, "learning_rate": 7.858116962938475e-06, "loss": 0.0025, "step": 114210 }, { "epoch": 0.7514325375157069, "grad_norm": 0.2059497189563009, "learning_rate": 7.857645876793001e-06, "loss": 0.0014, "step": 114220 }, { "epoch": 0.7514983256909403, "grad_norm": 0.044156768270592624, "learning_rate": 7.857174752971936e-06, "loss": 0.0016, "step": 114230 }, { "epoch": 0.7515641138661737, "grad_norm": 0.036885401851899445, "learning_rate": 7.856703591481491e-06, "loss": 0.0028, "step": 114240 }, { "epoch": 0.7516299020414071, "grad_norm": 0.009904455632179534, "learning_rate": 7.856232392327875e-06, "loss": 0.0015, "step": 114250 }, { "epoch": 0.7516956902166405, "grad_norm": 0.2610375603817623, "learning_rate": 7.855761155517303e-06, "loss": 0.0007, "step": 114260 }, { "epoch": 0.7517614783918738, "grad_norm": 0.012592977466000119, "learning_rate": 7.855289881055985e-06, "loss": 0.0009, "step": 114270 }, { "epoch": 0.7518272665671072, "grad_norm": 0.007703214171818324, "learning_rate": 7.854818568950136e-06, "loss": 0.0011, "step": 114280 }, { "epoch": 0.7518930547423406, "grad_norm": 0.03206483166586755, "learning_rate": 7.85434721920597e-06, "loss": 0.0008, "step": 114290 }, { "epoch": 0.751958842917574, "grad_norm": 0.024572530651029156, "learning_rate": 7.8538758318297e-06, "loss": 0.001, "step": 114300 }, { "epoch": 0.7520246310928074, "grad_norm": 0.12004701885999428, "learning_rate": 7.853404406827542e-06, "loss": 0.0025, "step": 114310 }, { "epoch": 0.7520904192680408, "grad_norm": 0.005823768431192293, "learning_rate": 7.852932944205714e-06, "loss": 0.0006, "step": 114320 }, { "epoch": 0.7521562074432742, "grad_norm": 0.06826918798573157, "learning_rate": 7.852461443970427e-06, "loss": 0.0013, "step": 114330 }, { "epoch": 0.7522219956185076, "grad_norm": 0.15220085788554452, "learning_rate": 7.851989906127899e-06, "loss": 0.0029, "step": 114340 }, { "epoch": 0.752287783793741, "grad_norm": 0.04499054927232534, "learning_rate": 7.851518330684348e-06, "loss": 0.0032, "step": 114350 }, { "epoch": 0.7523535719689743, "grad_norm": 0.035794188751426266, "learning_rate": 7.851046717645992e-06, "loss": 0.0014, "step": 114360 }, { "epoch": 0.7524193601442076, "grad_norm": 0.03183470406367869, "learning_rate": 7.850575067019043e-06, "loss": 0.0013, "step": 114370 }, { "epoch": 0.752485148319441, "grad_norm": 0.05856785139483161, "learning_rate": 7.850103378809729e-06, "loss": 0.0013, "step": 114380 }, { "epoch": 0.7525509364946744, "grad_norm": 0.02057521317968509, "learning_rate": 7.84963165302426e-06, "loss": 0.001, "step": 114390 }, { "epoch": 0.7526167246699078, "grad_norm": 0.043763370400566215, "learning_rate": 7.849159889668861e-06, "loss": 0.001, "step": 114400 }, { "epoch": 0.7526825128451412, "grad_norm": 0.030983307883034446, "learning_rate": 7.848688088749748e-06, "loss": 0.0016, "step": 114410 }, { "epoch": 0.7527483010203746, "grad_norm": 0.00915988508266335, "learning_rate": 7.848216250273141e-06, "loss": 0.0008, "step": 114420 }, { "epoch": 0.752814089195608, "grad_norm": 0.11605813900968857, "learning_rate": 7.847744374245265e-06, "loss": 0.0016, "step": 114430 }, { "epoch": 0.7528798773708414, "grad_norm": 0.24467286661553617, "learning_rate": 7.84727246067234e-06, "loss": 0.0036, "step": 114440 }, { "epoch": 0.7529456655460748, "grad_norm": 0.04721979128923165, "learning_rate": 7.846800509560583e-06, "loss": 0.0017, "step": 114450 }, { "epoch": 0.7530114537213082, "grad_norm": 0.07175868329604353, "learning_rate": 7.846328520916224e-06, "loss": 0.0011, "step": 114460 }, { "epoch": 0.7530772418965415, "grad_norm": 0.06381420568176316, "learning_rate": 7.84585649474548e-06, "loss": 0.0015, "step": 114470 }, { "epoch": 0.7531430300717749, "grad_norm": 0.02891718311739597, "learning_rate": 7.845384431054575e-06, "loss": 0.001, "step": 114480 }, { "epoch": 0.7532088182470082, "grad_norm": 0.07542092918437093, "learning_rate": 7.844912329849735e-06, "loss": 0.0012, "step": 114490 }, { "epoch": 0.7532746064222416, "grad_norm": 0.06380512523570177, "learning_rate": 7.844440191137183e-06, "loss": 0.0013, "step": 114500 }, { "epoch": 0.753340394597475, "grad_norm": 0.14262624032414387, "learning_rate": 7.843968014923142e-06, "loss": 0.0013, "step": 114510 }, { "epoch": 0.7534061827727084, "grad_norm": 0.0248224370676556, "learning_rate": 7.84349580121384e-06, "loss": 0.0008, "step": 114520 }, { "epoch": 0.7534719709479418, "grad_norm": 0.06427209170679786, "learning_rate": 7.843023550015503e-06, "loss": 0.0013, "step": 114530 }, { "epoch": 0.7535377591231752, "grad_norm": 0.1244378999878985, "learning_rate": 7.842551261334355e-06, "loss": 0.0017, "step": 114540 }, { "epoch": 0.7536035472984086, "grad_norm": 0.19641604871456717, "learning_rate": 7.842078935176623e-06, "loss": 0.0008, "step": 114550 }, { "epoch": 0.753669335473642, "grad_norm": 0.09292042409664944, "learning_rate": 7.841606571548535e-06, "loss": 0.001, "step": 114560 }, { "epoch": 0.7537351236488754, "grad_norm": 0.04463253834840153, "learning_rate": 7.841134170456319e-06, "loss": 0.0011, "step": 114570 }, { "epoch": 0.7538009118241087, "grad_norm": 0.2224655874052647, "learning_rate": 7.840661731906204e-06, "loss": 0.0008, "step": 114580 }, { "epoch": 0.7538666999993421, "grad_norm": 0.03653604490225314, "learning_rate": 7.840189255904415e-06, "loss": 0.0017, "step": 114590 }, { "epoch": 0.7539324881745755, "grad_norm": 0.06533710609688734, "learning_rate": 7.839716742457185e-06, "loss": 0.0006, "step": 114600 }, { "epoch": 0.7539982763498089, "grad_norm": 0.041694396548120384, "learning_rate": 7.839244191570741e-06, "loss": 0.0015, "step": 114610 }, { "epoch": 0.7540640645250423, "grad_norm": 0.11063006525804893, "learning_rate": 7.838771603251316e-06, "loss": 0.0011, "step": 114620 }, { "epoch": 0.7541298527002757, "grad_norm": 0.044122201578422164, "learning_rate": 7.838298977505137e-06, "loss": 0.0009, "step": 114630 }, { "epoch": 0.754195640875509, "grad_norm": 0.02824467412203337, "learning_rate": 7.837826314338439e-06, "loss": 0.0014, "step": 114640 }, { "epoch": 0.7542614290507424, "grad_norm": 0.09013950468370302, "learning_rate": 7.837353613757453e-06, "loss": 0.0015, "step": 114650 }, { "epoch": 0.7543272172259758, "grad_norm": 0.05946000245243306, "learning_rate": 7.836880875768406e-06, "loss": 0.002, "step": 114660 }, { "epoch": 0.7543930054012092, "grad_norm": 0.08939170713142315, "learning_rate": 7.83640810037754e-06, "loss": 0.0009, "step": 114670 }, { "epoch": 0.7544587935764425, "grad_norm": 0.004721485923889014, "learning_rate": 7.835935287591079e-06, "loss": 0.0005, "step": 114680 }, { "epoch": 0.7545245817516759, "grad_norm": 0.017672994734341134, "learning_rate": 7.835462437415262e-06, "loss": 0.0024, "step": 114690 }, { "epoch": 0.7545903699269093, "grad_norm": 0.0019615195456326876, "learning_rate": 7.834989549856322e-06, "loss": 0.0018, "step": 114700 }, { "epoch": 0.7546561581021427, "grad_norm": 0.007613217543577423, "learning_rate": 7.834516624920491e-06, "loss": 0.0011, "step": 114710 }, { "epoch": 0.7547219462773761, "grad_norm": 0.24077601624497144, "learning_rate": 7.834043662614007e-06, "loss": 0.0011, "step": 114720 }, { "epoch": 0.7547877344526095, "grad_norm": 0.03243185606013134, "learning_rate": 7.833570662943108e-06, "loss": 0.0011, "step": 114730 }, { "epoch": 0.7548535226278429, "grad_norm": 0.0596780222810833, "learning_rate": 7.833097625914025e-06, "loss": 0.0018, "step": 114740 }, { "epoch": 0.7549193108030763, "grad_norm": 0.028898054734230422, "learning_rate": 7.832624551532997e-06, "loss": 0.0009, "step": 114750 }, { "epoch": 0.7549850989783097, "grad_norm": 0.06485561761510233, "learning_rate": 7.83215143980626e-06, "loss": 0.0028, "step": 114760 }, { "epoch": 0.7550508871535431, "grad_norm": 0.07783763306389106, "learning_rate": 7.831678290740052e-06, "loss": 0.0019, "step": 114770 }, { "epoch": 0.7551166753287764, "grad_norm": 0.057131316238578575, "learning_rate": 7.831205104340613e-06, "loss": 0.0019, "step": 114780 }, { "epoch": 0.7551824635040097, "grad_norm": 0.012171073153950834, "learning_rate": 7.83073188061418e-06, "loss": 0.0012, "step": 114790 }, { "epoch": 0.7552482516792431, "grad_norm": 0.1027047296679339, "learning_rate": 7.83025861956699e-06, "loss": 0.0012, "step": 114800 }, { "epoch": 0.7553140398544765, "grad_norm": 0.012407559937594015, "learning_rate": 7.829785321205285e-06, "loss": 0.0008, "step": 114810 }, { "epoch": 0.7553798280297099, "grad_norm": 0.05499695482832932, "learning_rate": 7.829311985535304e-06, "loss": 0.0012, "step": 114820 }, { "epoch": 0.7554456162049433, "grad_norm": 0.19041790815661044, "learning_rate": 7.82883861256329e-06, "loss": 0.0034, "step": 114830 }, { "epoch": 0.7555114043801767, "grad_norm": 0.14242257639566533, "learning_rate": 7.82836520229548e-06, "loss": 0.001, "step": 114840 }, { "epoch": 0.7555771925554101, "grad_norm": 0.02506789612664212, "learning_rate": 7.827891754738118e-06, "loss": 0.0041, "step": 114850 }, { "epoch": 0.7556429807306435, "grad_norm": 0.037078894494968234, "learning_rate": 7.827418269897447e-06, "loss": 0.001, "step": 114860 }, { "epoch": 0.7557087689058769, "grad_norm": 0.05655588490509265, "learning_rate": 7.826944747779705e-06, "loss": 0.0026, "step": 114870 }, { "epoch": 0.7557745570811102, "grad_norm": 0.03393399859724049, "learning_rate": 7.826471188391142e-06, "loss": 0.0013, "step": 114880 }, { "epoch": 0.7558403452563436, "grad_norm": 0.013209274993124819, "learning_rate": 7.825997591737996e-06, "loss": 0.0019, "step": 114890 }, { "epoch": 0.755906133431577, "grad_norm": 0.22270804213783946, "learning_rate": 7.825523957826511e-06, "loss": 0.0041, "step": 114900 }, { "epoch": 0.7559719216068104, "grad_norm": 0.0753481607297614, "learning_rate": 7.825050286662935e-06, "loss": 0.0018, "step": 114910 }, { "epoch": 0.7560377097820438, "grad_norm": 0.06419346702610101, "learning_rate": 7.824576578253508e-06, "loss": 0.0026, "step": 114920 }, { "epoch": 0.7561034979572772, "grad_norm": 0.1773797380731407, "learning_rate": 7.82410283260448e-06, "loss": 0.0011, "step": 114930 }, { "epoch": 0.7561692861325106, "grad_norm": 0.05578901799114261, "learning_rate": 7.823629049722094e-06, "loss": 0.0015, "step": 114940 }, { "epoch": 0.756235074307744, "grad_norm": 0.00748975245569981, "learning_rate": 7.8231552296126e-06, "loss": 0.0011, "step": 114950 }, { "epoch": 0.7563008624829773, "grad_norm": 0.043791795040320575, "learning_rate": 7.822681372282241e-06, "loss": 0.0028, "step": 114960 }, { "epoch": 0.7563666506582107, "grad_norm": 0.03587203414234234, "learning_rate": 7.822207477737265e-06, "loss": 0.0015, "step": 114970 }, { "epoch": 0.7564324388334441, "grad_norm": 0.03368254714871955, "learning_rate": 7.821733545983923e-06, "loss": 0.0021, "step": 114980 }, { "epoch": 0.7564982270086774, "grad_norm": 0.0937078659992088, "learning_rate": 7.82125957702846e-06, "loss": 0.0014, "step": 114990 }, { "epoch": 0.7565640151839108, "grad_norm": 0.06380169438521105, "learning_rate": 7.820785570877127e-06, "loss": 0.0014, "step": 115000 }, { "epoch": 0.7566298033591442, "grad_norm": 0.027149940168442986, "learning_rate": 7.82031152753617e-06, "loss": 0.0011, "step": 115010 }, { "epoch": 0.7566955915343776, "grad_norm": 0.06891012042915305, "learning_rate": 7.819837447011845e-06, "loss": 0.0024, "step": 115020 }, { "epoch": 0.756761379709611, "grad_norm": 0.027717782594519645, "learning_rate": 7.819363329310396e-06, "loss": 0.0011, "step": 115030 }, { "epoch": 0.7568271678848444, "grad_norm": 0.005707946838649907, "learning_rate": 7.818889174438078e-06, "loss": 0.0009, "step": 115040 }, { "epoch": 0.7568929560600778, "grad_norm": 0.07594074674734458, "learning_rate": 7.81841498240114e-06, "loss": 0.0014, "step": 115050 }, { "epoch": 0.7569587442353112, "grad_norm": 0.008022472596974374, "learning_rate": 7.817940753205833e-06, "loss": 0.0013, "step": 115060 }, { "epoch": 0.7570245324105446, "grad_norm": 0.1242202403227962, "learning_rate": 7.817466486858414e-06, "loss": 0.001, "step": 115070 }, { "epoch": 0.757090320585778, "grad_norm": 0.08408394783996659, "learning_rate": 7.81699218336513e-06, "loss": 0.0008, "step": 115080 }, { "epoch": 0.7571561087610112, "grad_norm": 0.15499907887682426, "learning_rate": 7.81651784273224e-06, "loss": 0.0014, "step": 115090 }, { "epoch": 0.7572218969362446, "grad_norm": 0.03645179649783436, "learning_rate": 7.81604346496599e-06, "loss": 0.0015, "step": 115100 }, { "epoch": 0.757287685111478, "grad_norm": 0.042815109882482855, "learning_rate": 7.815569050072643e-06, "loss": 0.0019, "step": 115110 }, { "epoch": 0.7573534732867114, "grad_norm": 0.1034541031485114, "learning_rate": 7.815094598058449e-06, "loss": 0.0016, "step": 115120 }, { "epoch": 0.7574192614619448, "grad_norm": 0.04328176150691842, "learning_rate": 7.814620108929663e-06, "loss": 0.0009, "step": 115130 }, { "epoch": 0.7574850496371782, "grad_norm": 0.10656772791955443, "learning_rate": 7.814145582692543e-06, "loss": 0.0015, "step": 115140 }, { "epoch": 0.7575508378124116, "grad_norm": 0.07933507953541107, "learning_rate": 7.813671019353341e-06, "loss": 0.0015, "step": 115150 }, { "epoch": 0.757616625987645, "grad_norm": 0.011644510663425573, "learning_rate": 7.81319641891832e-06, "loss": 0.0008, "step": 115160 }, { "epoch": 0.7576824141628784, "grad_norm": 0.016551414496007694, "learning_rate": 7.812721781393733e-06, "loss": 0.0008, "step": 115170 }, { "epoch": 0.7577482023381118, "grad_norm": 0.03456115973802804, "learning_rate": 7.812247106785838e-06, "loss": 0.0024, "step": 115180 }, { "epoch": 0.7578139905133451, "grad_norm": 0.06879308820277659, "learning_rate": 7.811772395100894e-06, "loss": 0.0017, "step": 115190 }, { "epoch": 0.7578797786885785, "grad_norm": 0.03071319110091107, "learning_rate": 7.81129764634516e-06, "loss": 0.0012, "step": 115200 }, { "epoch": 0.7579455668638119, "grad_norm": 0.14622314071250364, "learning_rate": 7.810822860524893e-06, "loss": 0.0022, "step": 115210 }, { "epoch": 0.7580113550390453, "grad_norm": 0.012770860327008728, "learning_rate": 7.810348037646356e-06, "loss": 0.0016, "step": 115220 }, { "epoch": 0.7580771432142787, "grad_norm": 0.10887354221517688, "learning_rate": 7.809873177715807e-06, "loss": 0.001, "step": 115230 }, { "epoch": 0.758142931389512, "grad_norm": 0.037261084481588214, "learning_rate": 7.809398280739505e-06, "loss": 0.0007, "step": 115240 }, { "epoch": 0.7582087195647454, "grad_norm": 0.04687203699454744, "learning_rate": 7.808923346723716e-06, "loss": 0.0025, "step": 115250 }, { "epoch": 0.7582745077399788, "grad_norm": 0.008951130225358308, "learning_rate": 7.808448375674695e-06, "loss": 0.001, "step": 115260 }, { "epoch": 0.7583402959152122, "grad_norm": 0.07003874068441482, "learning_rate": 7.807973367598711e-06, "loss": 0.0013, "step": 115270 }, { "epoch": 0.7584060840904456, "grad_norm": 0.08442801079122482, "learning_rate": 7.80749832250202e-06, "loss": 0.0011, "step": 115280 }, { "epoch": 0.7584718722656789, "grad_norm": 0.08273663602629446, "learning_rate": 7.807023240390893e-06, "loss": 0.0013, "step": 115290 }, { "epoch": 0.7585376604409123, "grad_norm": 0.281662152000496, "learning_rate": 7.806548121271584e-06, "loss": 0.0011, "step": 115300 }, { "epoch": 0.7586034486161457, "grad_norm": 0.0788408571859122, "learning_rate": 7.806072965150363e-06, "loss": 0.0012, "step": 115310 }, { "epoch": 0.7586692367913791, "grad_norm": 0.07702977228089172, "learning_rate": 7.805597772033496e-06, "loss": 0.0009, "step": 115320 }, { "epoch": 0.7587350249666125, "grad_norm": 0.02114615420706686, "learning_rate": 7.805122541927244e-06, "loss": 0.0007, "step": 115330 }, { "epoch": 0.7588008131418459, "grad_norm": 0.0475270746012365, "learning_rate": 7.804647274837875e-06, "loss": 0.0012, "step": 115340 }, { "epoch": 0.7588666013170793, "grad_norm": 0.02094129260484091, "learning_rate": 7.804171970771652e-06, "loss": 0.0006, "step": 115350 }, { "epoch": 0.7589323894923127, "grad_norm": 0.09126929743161208, "learning_rate": 7.803696629734844e-06, "loss": 0.0022, "step": 115360 }, { "epoch": 0.7589981776675461, "grad_norm": 0.29283135609906646, "learning_rate": 7.803221251733719e-06, "loss": 0.0006, "step": 115370 }, { "epoch": 0.7590639658427795, "grad_norm": 0.10088937617111966, "learning_rate": 7.802745836774542e-06, "loss": 0.0013, "step": 115380 }, { "epoch": 0.7591297540180127, "grad_norm": 0.08453481178208813, "learning_rate": 7.80227038486358e-06, "loss": 0.0009, "step": 115390 }, { "epoch": 0.7591955421932461, "grad_norm": 0.023130179339139773, "learning_rate": 7.801794896007104e-06, "loss": 0.0006, "step": 115400 }, { "epoch": 0.7592613303684795, "grad_norm": 0.00401082715711729, "learning_rate": 7.801319370211382e-06, "loss": 0.0004, "step": 115410 }, { "epoch": 0.7593271185437129, "grad_norm": 0.009606588495956187, "learning_rate": 7.800843807482684e-06, "loss": 0.0021, "step": 115420 }, { "epoch": 0.7593929067189463, "grad_norm": 0.04913003216318472, "learning_rate": 7.80036820782728e-06, "loss": 0.0017, "step": 115430 }, { "epoch": 0.7594586948941797, "grad_norm": 0.07200673831057128, "learning_rate": 7.799892571251438e-06, "loss": 0.0015, "step": 115440 }, { "epoch": 0.7595244830694131, "grad_norm": 0.03242821628551409, "learning_rate": 7.799416897761431e-06, "loss": 0.0014, "step": 115450 }, { "epoch": 0.7595902712446465, "grad_norm": 0.1069521738363762, "learning_rate": 7.798941187363532e-06, "loss": 0.0023, "step": 115460 }, { "epoch": 0.7596560594198799, "grad_norm": 0.011272688979839978, "learning_rate": 7.798465440064009e-06, "loss": 0.0009, "step": 115470 }, { "epoch": 0.7597218475951133, "grad_norm": 0.09039705684125843, "learning_rate": 7.797989655869136e-06, "loss": 0.001, "step": 115480 }, { "epoch": 0.7597876357703467, "grad_norm": 0.045315192019823015, "learning_rate": 7.797513834785186e-06, "loss": 0.0025, "step": 115490 }, { "epoch": 0.75985342394558, "grad_norm": 0.029658949424592223, "learning_rate": 7.797037976818433e-06, "loss": 0.0009, "step": 115500 }, { "epoch": 0.7599192121208134, "grad_norm": 0.002884578124554664, "learning_rate": 7.79656208197515e-06, "loss": 0.002, "step": 115510 }, { "epoch": 0.7599850002960468, "grad_norm": 0.08214099770154859, "learning_rate": 7.79608615026161e-06, "loss": 0.0027, "step": 115520 }, { "epoch": 0.7600507884712802, "grad_norm": 0.17803427288509321, "learning_rate": 7.79561018168409e-06, "loss": 0.003, "step": 115530 }, { "epoch": 0.7601165766465136, "grad_norm": 0.01645514254939033, "learning_rate": 7.795134176248865e-06, "loss": 0.0013, "step": 115540 }, { "epoch": 0.760182364821747, "grad_norm": 0.028866550137371935, "learning_rate": 7.794658133962208e-06, "loss": 0.0017, "step": 115550 }, { "epoch": 0.7602481529969803, "grad_norm": 0.025298547599484644, "learning_rate": 7.794182054830397e-06, "loss": 0.0022, "step": 115560 }, { "epoch": 0.7603139411722137, "grad_norm": 0.03551116359476462, "learning_rate": 7.793705938859711e-06, "loss": 0.0011, "step": 115570 }, { "epoch": 0.7603797293474471, "grad_norm": 0.025087594172220267, "learning_rate": 7.793229786056424e-06, "loss": 0.001, "step": 115580 }, { "epoch": 0.7604455175226805, "grad_norm": 0.02481768766758506, "learning_rate": 7.792753596426814e-06, "loss": 0.0036, "step": 115590 }, { "epoch": 0.7605113056979138, "grad_norm": 0.005728241007060508, "learning_rate": 7.79227736997716e-06, "loss": 0.0006, "step": 115600 }, { "epoch": 0.7605770938731472, "grad_norm": 0.05043021933461935, "learning_rate": 7.79180110671374e-06, "loss": 0.0018, "step": 115610 }, { "epoch": 0.7606428820483806, "grad_norm": 0.00175178875385398, "learning_rate": 7.791324806642835e-06, "loss": 0.0015, "step": 115620 }, { "epoch": 0.760708670223614, "grad_norm": 0.04547987196863385, "learning_rate": 7.790848469770722e-06, "loss": 0.0016, "step": 115630 }, { "epoch": 0.7607744583988474, "grad_norm": 0.027393883992983013, "learning_rate": 7.790372096103681e-06, "loss": 0.0017, "step": 115640 }, { "epoch": 0.7608402465740808, "grad_norm": 0.01109157606970377, "learning_rate": 7.789895685647996e-06, "loss": 0.0018, "step": 115650 }, { "epoch": 0.7609060347493142, "grad_norm": 0.2013448309988382, "learning_rate": 7.789419238409946e-06, "loss": 0.0016, "step": 115660 }, { "epoch": 0.7609718229245476, "grad_norm": 0.004309036125463948, "learning_rate": 7.788942754395813e-06, "loss": 0.0023, "step": 115670 }, { "epoch": 0.761037611099781, "grad_norm": 0.04316969153748988, "learning_rate": 7.788466233611877e-06, "loss": 0.0007, "step": 115680 }, { "epoch": 0.7611033992750144, "grad_norm": 0.011596498874517754, "learning_rate": 7.787989676064421e-06, "loss": 0.0012, "step": 115690 }, { "epoch": 0.7611691874502476, "grad_norm": 0.10784173843278061, "learning_rate": 7.78751308175973e-06, "loss": 0.0017, "step": 115700 }, { "epoch": 0.761234975625481, "grad_norm": 0.014147828113732623, "learning_rate": 7.787036450704087e-06, "loss": 0.0015, "step": 115710 }, { "epoch": 0.7613007638007144, "grad_norm": 0.0131577013919635, "learning_rate": 7.786559782903775e-06, "loss": 0.0012, "step": 115720 }, { "epoch": 0.7613665519759478, "grad_norm": 0.06498391184873636, "learning_rate": 7.78608307836508e-06, "loss": 0.0074, "step": 115730 }, { "epoch": 0.7614323401511812, "grad_norm": 0.04394062902934885, "learning_rate": 7.785606337094285e-06, "loss": 0.0017, "step": 115740 }, { "epoch": 0.7614981283264146, "grad_norm": 0.0007403817266052735, "learning_rate": 7.785129559097675e-06, "loss": 0.0006, "step": 115750 }, { "epoch": 0.761563916501648, "grad_norm": 0.03361700722414409, "learning_rate": 7.784652744381538e-06, "loss": 0.0019, "step": 115760 }, { "epoch": 0.7616297046768814, "grad_norm": 0.04815740114717721, "learning_rate": 7.784175892952161e-06, "loss": 0.0013, "step": 115770 }, { "epoch": 0.7616954928521148, "grad_norm": 0.019879238097510295, "learning_rate": 7.783699004815828e-06, "loss": 0.0011, "step": 115780 }, { "epoch": 0.7617612810273482, "grad_norm": 0.09647209779689207, "learning_rate": 7.783222079978827e-06, "loss": 0.0022, "step": 115790 }, { "epoch": 0.7618270692025815, "grad_norm": 0.048764014072238154, "learning_rate": 7.782745118447448e-06, "loss": 0.0006, "step": 115800 }, { "epoch": 0.7618928573778149, "grad_norm": 0.04052467488202088, "learning_rate": 7.782268120227978e-06, "loss": 0.0021, "step": 115810 }, { "epoch": 0.7619586455530483, "grad_norm": 0.04002665046715692, "learning_rate": 7.781791085326705e-06, "loss": 0.0011, "step": 115820 }, { "epoch": 0.7620244337282817, "grad_norm": 0.012475762322996327, "learning_rate": 7.78131401374992e-06, "loss": 0.0013, "step": 115830 }, { "epoch": 0.762090221903515, "grad_norm": 0.015772918384055517, "learning_rate": 7.780836905503911e-06, "loss": 0.0006, "step": 115840 }, { "epoch": 0.7621560100787484, "grad_norm": 0.08748399674405861, "learning_rate": 7.780359760594968e-06, "loss": 0.0026, "step": 115850 }, { "epoch": 0.7622217982539818, "grad_norm": 0.0012558915397313942, "learning_rate": 7.779882579029386e-06, "loss": 0.0033, "step": 115860 }, { "epoch": 0.7622875864292152, "grad_norm": 0.028394896253461484, "learning_rate": 7.77940536081345e-06, "loss": 0.0018, "step": 115870 }, { "epoch": 0.7623533746044486, "grad_norm": 0.03846763356651168, "learning_rate": 7.778928105953457e-06, "loss": 0.0009, "step": 115880 }, { "epoch": 0.762419162779682, "grad_norm": 0.07981353935345316, "learning_rate": 7.778450814455696e-06, "loss": 0.0019, "step": 115890 }, { "epoch": 0.7624849509549153, "grad_norm": 0.02185362352614754, "learning_rate": 7.77797348632646e-06, "loss": 0.0011, "step": 115900 }, { "epoch": 0.7625507391301487, "grad_norm": 0.024407049902069697, "learning_rate": 7.777496121572044e-06, "loss": 0.0024, "step": 115910 }, { "epoch": 0.7626165273053821, "grad_norm": 0.023019411651719363, "learning_rate": 7.77701872019874e-06, "loss": 0.0012, "step": 115920 }, { "epoch": 0.7626823154806155, "grad_norm": 0.08984339750343083, "learning_rate": 7.776541282212841e-06, "loss": 0.0008, "step": 115930 }, { "epoch": 0.7627481036558489, "grad_norm": 0.08272668508161589, "learning_rate": 7.776063807620647e-06, "loss": 0.0009, "step": 115940 }, { "epoch": 0.7628138918310823, "grad_norm": 0.028861131657629754, "learning_rate": 7.775586296428445e-06, "loss": 0.0007, "step": 115950 }, { "epoch": 0.7628796800063157, "grad_norm": 0.006502122777547374, "learning_rate": 7.775108748642537e-06, "loss": 0.0007, "step": 115960 }, { "epoch": 0.7629454681815491, "grad_norm": 0.005717848302685165, "learning_rate": 7.774631164269216e-06, "loss": 0.0011, "step": 115970 }, { "epoch": 0.7630112563567825, "grad_norm": 0.13258011942272027, "learning_rate": 7.77415354331478e-06, "loss": 0.0008, "step": 115980 }, { "epoch": 0.7630770445320159, "grad_norm": 0.06013355647124942, "learning_rate": 7.773675885785526e-06, "loss": 0.0011, "step": 115990 }, { "epoch": 0.7631428327072493, "grad_norm": 0.02122013749283246, "learning_rate": 7.77319819168775e-06, "loss": 0.0014, "step": 116000 }, { "epoch": 0.7632086208824825, "grad_norm": 0.06269269701599359, "learning_rate": 7.772720461027752e-06, "loss": 0.0022, "step": 116010 }, { "epoch": 0.7632744090577159, "grad_norm": 0.006474165930300046, "learning_rate": 7.772242693811827e-06, "loss": 0.0009, "step": 116020 }, { "epoch": 0.7633401972329493, "grad_norm": 0.0649370542382146, "learning_rate": 7.771764890046279e-06, "loss": 0.0009, "step": 116030 }, { "epoch": 0.7634059854081827, "grad_norm": 0.1553916504966202, "learning_rate": 7.771287049737404e-06, "loss": 0.0013, "step": 116040 }, { "epoch": 0.7634717735834161, "grad_norm": 0.05349111005479484, "learning_rate": 7.770809172891503e-06, "loss": 0.0014, "step": 116050 }, { "epoch": 0.7635375617586495, "grad_norm": 0.034683553145864784, "learning_rate": 7.770331259514875e-06, "loss": 0.0021, "step": 116060 }, { "epoch": 0.7636033499338829, "grad_norm": 0.0032044434437465716, "learning_rate": 7.769853309613823e-06, "loss": 0.0003, "step": 116070 }, { "epoch": 0.7636691381091163, "grad_norm": 0.060431052563967386, "learning_rate": 7.769375323194646e-06, "loss": 0.0011, "step": 116080 }, { "epoch": 0.7637349262843497, "grad_norm": 0.023240811506444553, "learning_rate": 7.768897300263649e-06, "loss": 0.0007, "step": 116090 }, { "epoch": 0.7638007144595831, "grad_norm": 0.035463553783038955, "learning_rate": 7.76841924082713e-06, "loss": 0.0008, "step": 116100 }, { "epoch": 0.7638665026348164, "grad_norm": 0.07858783356514036, "learning_rate": 7.767941144891397e-06, "loss": 0.0007, "step": 116110 }, { "epoch": 0.7639322908100498, "grad_norm": 0.02141379977683437, "learning_rate": 7.767463012462748e-06, "loss": 0.0009, "step": 116120 }, { "epoch": 0.7639980789852832, "grad_norm": 0.03604243659526705, "learning_rate": 7.766984843547491e-06, "loss": 0.0008, "step": 116130 }, { "epoch": 0.7640638671605166, "grad_norm": 0.008368931809299953, "learning_rate": 7.766506638151928e-06, "loss": 0.0006, "step": 116140 }, { "epoch": 0.76412965533575, "grad_norm": 0.030485270089501578, "learning_rate": 7.766028396282365e-06, "loss": 0.0012, "step": 116150 }, { "epoch": 0.7641954435109833, "grad_norm": 0.04991382459636528, "learning_rate": 7.765550117945107e-06, "loss": 0.0007, "step": 116160 }, { "epoch": 0.7642612316862167, "grad_norm": 0.008562728289524658, "learning_rate": 7.765071803146458e-06, "loss": 0.0017, "step": 116170 }, { "epoch": 0.7643270198614501, "grad_norm": 0.07139973176950314, "learning_rate": 7.764593451892724e-06, "loss": 0.0009, "step": 116180 }, { "epoch": 0.7643928080366835, "grad_norm": 0.5538000492898304, "learning_rate": 7.764115064190215e-06, "loss": 0.0027, "step": 116190 }, { "epoch": 0.7644585962119169, "grad_norm": 0.03118108178110361, "learning_rate": 7.763636640045235e-06, "loss": 0.0008, "step": 116200 }, { "epoch": 0.7645243843871502, "grad_norm": 0.04183910242732482, "learning_rate": 7.763158179464095e-06, "loss": 0.0012, "step": 116210 }, { "epoch": 0.7645901725623836, "grad_norm": 0.08085277354002687, "learning_rate": 7.762679682453097e-06, "loss": 0.0017, "step": 116220 }, { "epoch": 0.764655960737617, "grad_norm": 0.057775310697109575, "learning_rate": 7.762201149018558e-06, "loss": 0.0016, "step": 116230 }, { "epoch": 0.7647217489128504, "grad_norm": 0.004447255313007042, "learning_rate": 7.76172257916678e-06, "loss": 0.0013, "step": 116240 }, { "epoch": 0.7647875370880838, "grad_norm": 0.04196842262326735, "learning_rate": 7.761243972904073e-06, "loss": 0.0015, "step": 116250 }, { "epoch": 0.7648533252633172, "grad_norm": 0.03394257984219217, "learning_rate": 7.760765330236753e-06, "loss": 0.0011, "step": 116260 }, { "epoch": 0.7649191134385506, "grad_norm": 0.025900315502581234, "learning_rate": 7.760286651171123e-06, "loss": 0.0015, "step": 116270 }, { "epoch": 0.764984901613784, "grad_norm": 0.022731565135035044, "learning_rate": 7.7598079357135e-06, "loss": 0.0015, "step": 116280 }, { "epoch": 0.7650506897890174, "grad_norm": 0.08988669840583444, "learning_rate": 7.759329183870192e-06, "loss": 0.0007, "step": 116290 }, { "epoch": 0.7651164779642508, "grad_norm": 0.20409982215138184, "learning_rate": 7.75885039564751e-06, "loss": 0.0021, "step": 116300 }, { "epoch": 0.765182266139484, "grad_norm": 0.006804093879964251, "learning_rate": 7.75837157105177e-06, "loss": 0.001, "step": 116310 }, { "epoch": 0.7652480543147174, "grad_norm": 0.03473157616741068, "learning_rate": 7.757892710089284e-06, "loss": 0.0012, "step": 116320 }, { "epoch": 0.7653138424899508, "grad_norm": 0.06964796928060948, "learning_rate": 7.757413812766363e-06, "loss": 0.0019, "step": 116330 }, { "epoch": 0.7653796306651842, "grad_norm": 0.0856230080589193, "learning_rate": 7.756934879089322e-06, "loss": 0.0011, "step": 116340 }, { "epoch": 0.7654454188404176, "grad_norm": 0.01631917958509448, "learning_rate": 7.756455909064478e-06, "loss": 0.0011, "step": 116350 }, { "epoch": 0.765511207015651, "grad_norm": 0.01512942422618425, "learning_rate": 7.755976902698141e-06, "loss": 0.0019, "step": 116360 }, { "epoch": 0.7655769951908844, "grad_norm": 0.062033734195213346, "learning_rate": 7.75549785999663e-06, "loss": 0.0016, "step": 116370 }, { "epoch": 0.7656427833661178, "grad_norm": 0.01312561162004995, "learning_rate": 7.75501878096626e-06, "loss": 0.0023, "step": 116380 }, { "epoch": 0.7657085715413512, "grad_norm": 0.017448255356624146, "learning_rate": 7.754539665613345e-06, "loss": 0.0011, "step": 116390 }, { "epoch": 0.7657743597165846, "grad_norm": 0.045302979740641416, "learning_rate": 7.754060513944203e-06, "loss": 0.0016, "step": 116400 }, { "epoch": 0.765840147891818, "grad_norm": 0.00555458690169222, "learning_rate": 7.753581325965154e-06, "loss": 0.0011, "step": 116410 }, { "epoch": 0.7659059360670513, "grad_norm": 0.010419839337438987, "learning_rate": 7.753102101682513e-06, "loss": 0.0023, "step": 116420 }, { "epoch": 0.7659717242422847, "grad_norm": 0.04632792408821343, "learning_rate": 7.752622841102598e-06, "loss": 0.0017, "step": 116430 }, { "epoch": 0.766037512417518, "grad_norm": 0.05692728750331429, "learning_rate": 7.752143544231729e-06, "loss": 0.0015, "step": 116440 }, { "epoch": 0.7661033005927514, "grad_norm": 0.01947986232905589, "learning_rate": 7.751664211076223e-06, "loss": 0.0009, "step": 116450 }, { "epoch": 0.7661690887679848, "grad_norm": 0.018475732705926416, "learning_rate": 7.751184841642402e-06, "loss": 0.001, "step": 116460 }, { "epoch": 0.7662348769432182, "grad_norm": 0.021862206224652365, "learning_rate": 7.750705435936584e-06, "loss": 0.0009, "step": 116470 }, { "epoch": 0.7663006651184516, "grad_norm": 0.0350142551503179, "learning_rate": 7.75022599396509e-06, "loss": 0.0014, "step": 116480 }, { "epoch": 0.766366453293685, "grad_norm": 0.11272901387628595, "learning_rate": 7.749746515734244e-06, "loss": 0.0009, "step": 116490 }, { "epoch": 0.7664322414689184, "grad_norm": 0.09859023196588983, "learning_rate": 7.749267001250363e-06, "loss": 0.0008, "step": 116500 }, { "epoch": 0.7664980296441518, "grad_norm": 0.0192037447779218, "learning_rate": 7.748787450519771e-06, "loss": 0.0015, "step": 116510 }, { "epoch": 0.7665638178193851, "grad_norm": 0.03290331296158138, "learning_rate": 7.74830786354879e-06, "loss": 0.0011, "step": 116520 }, { "epoch": 0.7666296059946185, "grad_norm": 0.06906868523580598, "learning_rate": 7.747828240343744e-06, "loss": 0.0022, "step": 116530 }, { "epoch": 0.7666953941698519, "grad_norm": 0.4540107008332457, "learning_rate": 7.747348580910958e-06, "loss": 0.0019, "step": 116540 }, { "epoch": 0.7667611823450853, "grad_norm": 0.015271940501281849, "learning_rate": 7.74686888525675e-06, "loss": 0.0019, "step": 116550 }, { "epoch": 0.7668269705203187, "grad_norm": 0.02880244248596735, "learning_rate": 7.74638915338745e-06, "loss": 0.0015, "step": 116560 }, { "epoch": 0.7668927586955521, "grad_norm": 0.00905082893832495, "learning_rate": 7.745909385309382e-06, "loss": 0.0013, "step": 116570 }, { "epoch": 0.7669585468707855, "grad_norm": 0.022767272927005415, "learning_rate": 7.745429581028869e-06, "loss": 0.0005, "step": 116580 }, { "epoch": 0.7670243350460189, "grad_norm": 0.006759916092319406, "learning_rate": 7.744949740552237e-06, "loss": 0.0007, "step": 116590 }, { "epoch": 0.7670901232212523, "grad_norm": 0.14399023284761728, "learning_rate": 7.744469863885815e-06, "loss": 0.0013, "step": 116600 }, { "epoch": 0.7671559113964856, "grad_norm": 0.1626098141172188, "learning_rate": 7.743989951035929e-06, "loss": 0.0011, "step": 116610 }, { "epoch": 0.7672216995717189, "grad_norm": 0.012061065020455116, "learning_rate": 7.743510002008903e-06, "loss": 0.0017, "step": 116620 }, { "epoch": 0.7672874877469523, "grad_norm": 0.04060298765372934, "learning_rate": 7.743030016811069e-06, "loss": 0.0008, "step": 116630 }, { "epoch": 0.7673532759221857, "grad_norm": 0.052969071003335776, "learning_rate": 7.742549995448752e-06, "loss": 0.0029, "step": 116640 }, { "epoch": 0.7674190640974191, "grad_norm": 0.032989498857183115, "learning_rate": 7.742069937928282e-06, "loss": 0.0011, "step": 116650 }, { "epoch": 0.7674848522726525, "grad_norm": 0.04283496815371497, "learning_rate": 7.741589844255987e-06, "loss": 0.0027, "step": 116660 }, { "epoch": 0.7675506404478859, "grad_norm": 0.04836802689385228, "learning_rate": 7.7411097144382e-06, "loss": 0.0007, "step": 116670 }, { "epoch": 0.7676164286231193, "grad_norm": 0.009567085182235318, "learning_rate": 7.740629548481248e-06, "loss": 0.0012, "step": 116680 }, { "epoch": 0.7676822167983527, "grad_norm": 0.1629958166104666, "learning_rate": 7.74014934639146e-06, "loss": 0.0033, "step": 116690 }, { "epoch": 0.7677480049735861, "grad_norm": 0.010010073104048927, "learning_rate": 7.739669108175174e-06, "loss": 0.001, "step": 116700 }, { "epoch": 0.7678137931488195, "grad_norm": 0.07296759939086792, "learning_rate": 7.739188833838713e-06, "loss": 0.0009, "step": 116710 }, { "epoch": 0.7678795813240528, "grad_norm": 0.2528206767433268, "learning_rate": 7.738708523388417e-06, "loss": 0.0013, "step": 116720 }, { "epoch": 0.7679453694992862, "grad_norm": 0.006237085336261997, "learning_rate": 7.738228176830611e-06, "loss": 0.0016, "step": 116730 }, { "epoch": 0.7680111576745196, "grad_norm": 0.019457245966790854, "learning_rate": 7.737747794171633e-06, "loss": 0.0012, "step": 116740 }, { "epoch": 0.768076945849753, "grad_norm": 0.061640970581780516, "learning_rate": 7.737267375417814e-06, "loss": 0.0015, "step": 116750 }, { "epoch": 0.7681427340249863, "grad_norm": 0.008511662049739372, "learning_rate": 7.736786920575487e-06, "loss": 0.0011, "step": 116760 }, { "epoch": 0.7682085222002197, "grad_norm": 0.03695900561122622, "learning_rate": 7.73630642965099e-06, "loss": 0.0017, "step": 116770 }, { "epoch": 0.7682743103754531, "grad_norm": 0.0044759876789435, "learning_rate": 7.735825902650657e-06, "loss": 0.0007, "step": 116780 }, { "epoch": 0.7683400985506865, "grad_norm": 0.08465446470350159, "learning_rate": 7.735345339580821e-06, "loss": 0.0021, "step": 116790 }, { "epoch": 0.7684058867259199, "grad_norm": 0.08865687043534297, "learning_rate": 7.73486474044782e-06, "loss": 0.0009, "step": 116800 }, { "epoch": 0.7684716749011533, "grad_norm": 0.018975487235646814, "learning_rate": 7.734384105257989e-06, "loss": 0.0028, "step": 116810 }, { "epoch": 0.7685374630763866, "grad_norm": 0.043498028928586736, "learning_rate": 7.733903434017665e-06, "loss": 0.0018, "step": 116820 }, { "epoch": 0.76860325125162, "grad_norm": 0.020070352963829613, "learning_rate": 7.733422726733184e-06, "loss": 0.0012, "step": 116830 }, { "epoch": 0.7686690394268534, "grad_norm": 0.0454415639941819, "learning_rate": 7.732941983410887e-06, "loss": 0.0007, "step": 116840 }, { "epoch": 0.7687348276020868, "grad_norm": 0.10144381143445466, "learning_rate": 7.732461204057109e-06, "loss": 0.0018, "step": 116850 }, { "epoch": 0.7688006157773202, "grad_norm": 0.046990205328345164, "learning_rate": 7.731980388678192e-06, "loss": 0.0013, "step": 116860 }, { "epoch": 0.7688664039525536, "grad_norm": 0.05329000248305604, "learning_rate": 7.731499537280469e-06, "loss": 0.0013, "step": 116870 }, { "epoch": 0.768932192127787, "grad_norm": 0.06131974728013423, "learning_rate": 7.731018649870286e-06, "loss": 0.0014, "step": 116880 }, { "epoch": 0.7689979803030204, "grad_norm": 0.07435548924107599, "learning_rate": 7.730537726453981e-06, "loss": 0.001, "step": 116890 }, { "epoch": 0.7690637684782538, "grad_norm": 0.026383994091433008, "learning_rate": 7.730056767037894e-06, "loss": 0.0013, "step": 116900 }, { "epoch": 0.7691295566534871, "grad_norm": 0.009797077117933338, "learning_rate": 7.729575771628368e-06, "loss": 0.001, "step": 116910 }, { "epoch": 0.7691953448287205, "grad_norm": 0.05498742556643837, "learning_rate": 7.729094740231743e-06, "loss": 0.0005, "step": 116920 }, { "epoch": 0.7692611330039538, "grad_norm": 0.22978778873971342, "learning_rate": 7.728613672854357e-06, "loss": 0.0015, "step": 116930 }, { "epoch": 0.7693269211791872, "grad_norm": 0.010638752528395884, "learning_rate": 7.72813256950256e-06, "loss": 0.0009, "step": 116940 }, { "epoch": 0.7693927093544206, "grad_norm": 0.039610483258492805, "learning_rate": 7.72765143018269e-06, "loss": 0.0012, "step": 116950 }, { "epoch": 0.769458497529654, "grad_norm": 0.038344553285036054, "learning_rate": 7.72717025490109e-06, "loss": 0.0021, "step": 116960 }, { "epoch": 0.7695242857048874, "grad_norm": 0.0322783103596287, "learning_rate": 7.726689043664108e-06, "loss": 0.0009, "step": 116970 }, { "epoch": 0.7695900738801208, "grad_norm": 0.051479088069816256, "learning_rate": 7.726207796478085e-06, "loss": 0.0025, "step": 116980 }, { "epoch": 0.7696558620553542, "grad_norm": 0.03130965449909677, "learning_rate": 7.725726513349367e-06, "loss": 0.0019, "step": 116990 }, { "epoch": 0.7697216502305876, "grad_norm": 0.016382556450519746, "learning_rate": 7.7252451942843e-06, "loss": 0.0019, "step": 117000 }, { "epoch": 0.769787438405821, "grad_norm": 0.031046093761967118, "learning_rate": 7.724763839289226e-06, "loss": 0.0014, "step": 117010 }, { "epoch": 0.7698532265810544, "grad_norm": 0.046887406589106465, "learning_rate": 7.724282448370494e-06, "loss": 0.0019, "step": 117020 }, { "epoch": 0.7699190147562877, "grad_norm": 0.06441926574345086, "learning_rate": 7.723801021534454e-06, "loss": 0.0023, "step": 117030 }, { "epoch": 0.769984802931521, "grad_norm": 0.1592178803082758, "learning_rate": 7.723319558787447e-06, "loss": 0.0026, "step": 117040 }, { "epoch": 0.7700505911067544, "grad_norm": 0.10216661770130682, "learning_rate": 7.722838060135825e-06, "loss": 0.001, "step": 117050 }, { "epoch": 0.7701163792819878, "grad_norm": 0.15155419602336048, "learning_rate": 7.722356525585936e-06, "loss": 0.0018, "step": 117060 }, { "epoch": 0.7701821674572212, "grad_norm": 0.08054758866398813, "learning_rate": 7.721874955144125e-06, "loss": 0.0031, "step": 117070 }, { "epoch": 0.7702479556324546, "grad_norm": 0.15094845209956845, "learning_rate": 7.721393348816744e-06, "loss": 0.0013, "step": 117080 }, { "epoch": 0.770313743807688, "grad_norm": 0.06984801455937376, "learning_rate": 7.720911706610144e-06, "loss": 0.0009, "step": 117090 }, { "epoch": 0.7703795319829214, "grad_norm": 0.029797976157464795, "learning_rate": 7.72043002853067e-06, "loss": 0.0014, "step": 117100 }, { "epoch": 0.7704453201581548, "grad_norm": 0.03202450988110476, "learning_rate": 7.719948314584677e-06, "loss": 0.0025, "step": 117110 }, { "epoch": 0.7705111083333882, "grad_norm": 0.07278347035816725, "learning_rate": 7.719466564778516e-06, "loss": 0.0036, "step": 117120 }, { "epoch": 0.7705768965086215, "grad_norm": 0.06284739199708943, "learning_rate": 7.718984779118536e-06, "loss": 0.0013, "step": 117130 }, { "epoch": 0.7706426846838549, "grad_norm": 0.08554553712683496, "learning_rate": 7.718502957611089e-06, "loss": 0.0042, "step": 117140 }, { "epoch": 0.7707084728590883, "grad_norm": 0.013175782682336159, "learning_rate": 7.71802110026253e-06, "loss": 0.0007, "step": 117150 }, { "epoch": 0.7707742610343217, "grad_norm": 0.1467795029018005, "learning_rate": 7.71753920707921e-06, "loss": 0.0021, "step": 117160 }, { "epoch": 0.7708400492095551, "grad_norm": 0.017025297972287256, "learning_rate": 7.717057278067481e-06, "loss": 0.0013, "step": 117170 }, { "epoch": 0.7709058373847885, "grad_norm": 0.04387546261505267, "learning_rate": 7.7165753132337e-06, "loss": 0.0018, "step": 117180 }, { "epoch": 0.7709716255600219, "grad_norm": 0.03679739442155827, "learning_rate": 7.716093312584219e-06, "loss": 0.0022, "step": 117190 }, { "epoch": 0.7710374137352553, "grad_norm": 0.027711022890292685, "learning_rate": 7.715611276125393e-06, "loss": 0.0018, "step": 117200 }, { "epoch": 0.7711032019104886, "grad_norm": 0.14341759600908438, "learning_rate": 7.715129203863579e-06, "loss": 0.0025, "step": 117210 }, { "epoch": 0.771168990085722, "grad_norm": 0.007084957743362926, "learning_rate": 7.71464709580513e-06, "loss": 0.0017, "step": 117220 }, { "epoch": 0.7712347782609553, "grad_norm": 0.039885316255758405, "learning_rate": 7.714164951956404e-06, "loss": 0.0014, "step": 117230 }, { "epoch": 0.7713005664361887, "grad_norm": 0.06720758263216264, "learning_rate": 7.713682772323756e-06, "loss": 0.0022, "step": 117240 }, { "epoch": 0.7713663546114221, "grad_norm": 0.32536169247737645, "learning_rate": 7.713200556913547e-06, "loss": 0.0047, "step": 117250 }, { "epoch": 0.7714321427866555, "grad_norm": 0.050025545625725705, "learning_rate": 7.71271830573213e-06, "loss": 0.0017, "step": 117260 }, { "epoch": 0.7714979309618889, "grad_norm": 0.04454870401667706, "learning_rate": 7.712236018785867e-06, "loss": 0.0021, "step": 117270 }, { "epoch": 0.7715637191371223, "grad_norm": 0.030476031958702684, "learning_rate": 7.711753696081113e-06, "loss": 0.0009, "step": 117280 }, { "epoch": 0.7716295073123557, "grad_norm": 0.03614267983181109, "learning_rate": 7.711271337624228e-06, "loss": 0.0015, "step": 117290 }, { "epoch": 0.7716952954875891, "grad_norm": 0.01574366874080174, "learning_rate": 7.710788943421573e-06, "loss": 0.002, "step": 117300 }, { "epoch": 0.7717610836628225, "grad_norm": 0.03292952755631189, "learning_rate": 7.710306513479507e-06, "loss": 0.0031, "step": 117310 }, { "epoch": 0.7718268718380559, "grad_norm": 0.0277184990066175, "learning_rate": 7.709824047804389e-06, "loss": 0.0024, "step": 117320 }, { "epoch": 0.7718926600132893, "grad_norm": 0.06261262261615215, "learning_rate": 7.709341546402582e-06, "loss": 0.0008, "step": 117330 }, { "epoch": 0.7719584481885226, "grad_norm": 0.08476508670148591, "learning_rate": 7.708859009280447e-06, "loss": 0.0031, "step": 117340 }, { "epoch": 0.772024236363756, "grad_norm": 0.002860360083918454, "learning_rate": 7.708376436444348e-06, "loss": 0.0009, "step": 117350 }, { "epoch": 0.7720900245389893, "grad_norm": 0.012764055939594253, "learning_rate": 7.707893827900642e-06, "loss": 0.0008, "step": 117360 }, { "epoch": 0.7721558127142227, "grad_norm": 0.015395705387524617, "learning_rate": 7.707411183655693e-06, "loss": 0.0011, "step": 117370 }, { "epoch": 0.7722216008894561, "grad_norm": 0.07390914790613533, "learning_rate": 7.706928503715868e-06, "loss": 0.0009, "step": 117380 }, { "epoch": 0.7722873890646895, "grad_norm": 0.041583574276833415, "learning_rate": 7.706445788087529e-06, "loss": 0.0013, "step": 117390 }, { "epoch": 0.7723531772399229, "grad_norm": 0.02279183861423567, "learning_rate": 7.705963036777038e-06, "loss": 0.0013, "step": 117400 }, { "epoch": 0.7724189654151563, "grad_norm": 0.038331727931897416, "learning_rate": 7.705480249790763e-06, "loss": 0.0018, "step": 117410 }, { "epoch": 0.7724847535903897, "grad_norm": 0.06404182127580359, "learning_rate": 7.704997427135066e-06, "loss": 0.0007, "step": 117420 }, { "epoch": 0.7725505417656231, "grad_norm": 0.1540141170807067, "learning_rate": 7.704514568816316e-06, "loss": 0.0012, "step": 117430 }, { "epoch": 0.7726163299408564, "grad_norm": 0.13118981269052984, "learning_rate": 7.704031674840876e-06, "loss": 0.0015, "step": 117440 }, { "epoch": 0.7726821181160898, "grad_norm": 0.08725542986783714, "learning_rate": 7.703548745215114e-06, "loss": 0.0017, "step": 117450 }, { "epoch": 0.7727479062913232, "grad_norm": 0.04415520447423732, "learning_rate": 7.703065779945397e-06, "loss": 0.0013, "step": 117460 }, { "epoch": 0.7728136944665566, "grad_norm": 0.03337250515333841, "learning_rate": 7.702582779038091e-06, "loss": 0.001, "step": 117470 }, { "epoch": 0.77287948264179, "grad_norm": 0.003808051621750039, "learning_rate": 7.702099742499565e-06, "loss": 0.0016, "step": 117480 }, { "epoch": 0.7729452708170234, "grad_norm": 0.05009892274933636, "learning_rate": 7.701616670336189e-06, "loss": 0.0021, "step": 117490 }, { "epoch": 0.7730110589922567, "grad_norm": 0.04154732760698936, "learning_rate": 7.70113356255433e-06, "loss": 0.003, "step": 117500 }, { "epoch": 0.7730768471674901, "grad_norm": 0.01710941882754599, "learning_rate": 7.700650419160358e-06, "loss": 0.001, "step": 117510 }, { "epoch": 0.7731426353427235, "grad_norm": 0.00268761867876414, "learning_rate": 7.700167240160643e-06, "loss": 0.0011, "step": 117520 }, { "epoch": 0.7732084235179569, "grad_norm": 0.024551002033625004, "learning_rate": 7.699684025561555e-06, "loss": 0.0008, "step": 117530 }, { "epoch": 0.7732742116931902, "grad_norm": 0.02110771010570841, "learning_rate": 7.699200775369465e-06, "loss": 0.0011, "step": 117540 }, { "epoch": 0.7733399998684236, "grad_norm": 0.039741559491625805, "learning_rate": 7.698717489590742e-06, "loss": 0.0009, "step": 117550 }, { "epoch": 0.773405788043657, "grad_norm": 0.00047190457073173736, "learning_rate": 7.69823416823176e-06, "loss": 0.0018, "step": 117560 }, { "epoch": 0.7734715762188904, "grad_norm": 0.02419319144425854, "learning_rate": 7.697750811298891e-06, "loss": 0.0011, "step": 117570 }, { "epoch": 0.7735373643941238, "grad_norm": 0.040258826658534023, "learning_rate": 7.697267418798508e-06, "loss": 0.0011, "step": 117580 }, { "epoch": 0.7736031525693572, "grad_norm": 0.0027501639508516022, "learning_rate": 7.696783990736985e-06, "loss": 0.0012, "step": 117590 }, { "epoch": 0.7736689407445906, "grad_norm": 0.03364813126875745, "learning_rate": 7.696300527120693e-06, "loss": 0.0016, "step": 117600 }, { "epoch": 0.773734728919824, "grad_norm": 0.0031585532914011694, "learning_rate": 7.695817027956007e-06, "loss": 0.0011, "step": 117610 }, { "epoch": 0.7738005170950574, "grad_norm": 0.025799249519749233, "learning_rate": 7.695333493249302e-06, "loss": 0.0008, "step": 117620 }, { "epoch": 0.7738663052702908, "grad_norm": 0.02735022627833855, "learning_rate": 7.694849923006951e-06, "loss": 0.0009, "step": 117630 }, { "epoch": 0.773932093445524, "grad_norm": 0.10899582474466708, "learning_rate": 7.694366317235334e-06, "loss": 0.0015, "step": 117640 }, { "epoch": 0.7739978816207574, "grad_norm": 0.017465326161443244, "learning_rate": 7.693882675940823e-06, "loss": 0.0009, "step": 117650 }, { "epoch": 0.7740636697959908, "grad_norm": 0.04718500052819118, "learning_rate": 7.693398999129794e-06, "loss": 0.0014, "step": 117660 }, { "epoch": 0.7741294579712242, "grad_norm": 0.03490620669361524, "learning_rate": 7.692915286808629e-06, "loss": 0.0009, "step": 117670 }, { "epoch": 0.7741952461464576, "grad_norm": 0.08636210758476627, "learning_rate": 7.6924315389837e-06, "loss": 0.0012, "step": 117680 }, { "epoch": 0.774261034321691, "grad_norm": 0.041804274066080296, "learning_rate": 7.691947755661384e-06, "loss": 0.0026, "step": 117690 }, { "epoch": 0.7743268224969244, "grad_norm": 0.056184333902743105, "learning_rate": 7.691463936848063e-06, "loss": 0.0012, "step": 117700 }, { "epoch": 0.7743926106721578, "grad_norm": 0.022931658601753818, "learning_rate": 7.690980082550117e-06, "loss": 0.0014, "step": 117710 }, { "epoch": 0.7744583988473912, "grad_norm": 0.004107817242351777, "learning_rate": 7.69049619277392e-06, "loss": 0.0021, "step": 117720 }, { "epoch": 0.7745241870226246, "grad_norm": 0.20443994472405558, "learning_rate": 7.690012267525855e-06, "loss": 0.0016, "step": 117730 }, { "epoch": 0.7745899751978579, "grad_norm": 0.02348358233569859, "learning_rate": 7.689528306812303e-06, "loss": 0.0013, "step": 117740 }, { "epoch": 0.7746557633730913, "grad_norm": 0.31888437884145826, "learning_rate": 7.689044310639642e-06, "loss": 0.0027, "step": 117750 }, { "epoch": 0.7747215515483247, "grad_norm": 0.022507074608573194, "learning_rate": 7.688560279014252e-06, "loss": 0.0014, "step": 117760 }, { "epoch": 0.7747873397235581, "grad_norm": 0.049382147944765566, "learning_rate": 7.688076211942518e-06, "loss": 0.0009, "step": 117770 }, { "epoch": 0.7748531278987915, "grad_norm": 0.0005359783382497564, "learning_rate": 7.687592109430823e-06, "loss": 0.0018, "step": 117780 }, { "epoch": 0.7749189160740249, "grad_norm": 0.15777101794888404, "learning_rate": 7.687107971485545e-06, "loss": 0.0021, "step": 117790 }, { "epoch": 0.7749847042492582, "grad_norm": 0.011642785851310613, "learning_rate": 7.68662379811307e-06, "loss": 0.0009, "step": 117800 }, { "epoch": 0.7750504924244916, "grad_norm": 0.003578294577186074, "learning_rate": 7.68613958931978e-06, "loss": 0.0005, "step": 117810 }, { "epoch": 0.775116280599725, "grad_norm": 0.020293558372468382, "learning_rate": 7.68565534511206e-06, "loss": 0.0007, "step": 117820 }, { "epoch": 0.7751820687749584, "grad_norm": 0.08657220433860553, "learning_rate": 7.685171065496294e-06, "loss": 0.0008, "step": 117830 }, { "epoch": 0.7752478569501918, "grad_norm": 0.031188967545499054, "learning_rate": 7.684686750478867e-06, "loss": 0.0007, "step": 117840 }, { "epoch": 0.7753136451254251, "grad_norm": 0.0007032421815204061, "learning_rate": 7.684202400066164e-06, "loss": 0.0012, "step": 117850 }, { "epoch": 0.7753794333006585, "grad_norm": 0.020635776284745493, "learning_rate": 7.683718014264567e-06, "loss": 0.0015, "step": 117860 }, { "epoch": 0.7754452214758919, "grad_norm": 0.012330469471518272, "learning_rate": 7.68323359308047e-06, "loss": 0.0008, "step": 117870 }, { "epoch": 0.7755110096511253, "grad_norm": 0.050949370726341585, "learning_rate": 7.682749136520253e-06, "loss": 0.0018, "step": 117880 }, { "epoch": 0.7755767978263587, "grad_norm": 0.06382052620640781, "learning_rate": 7.682264644590307e-06, "loss": 0.0015, "step": 117890 }, { "epoch": 0.7756425860015921, "grad_norm": 0.07863204926319713, "learning_rate": 7.681780117297018e-06, "loss": 0.0009, "step": 117900 }, { "epoch": 0.7757083741768255, "grad_norm": 0.02967489775998954, "learning_rate": 7.681295554646773e-06, "loss": 0.0007, "step": 117910 }, { "epoch": 0.7757741623520589, "grad_norm": 0.030873937986660924, "learning_rate": 7.680810956645963e-06, "loss": 0.0008, "step": 117920 }, { "epoch": 0.7758399505272923, "grad_norm": 0.14129097127572976, "learning_rate": 7.680326323300974e-06, "loss": 0.0015, "step": 117930 }, { "epoch": 0.7759057387025257, "grad_norm": 0.012212661524304443, "learning_rate": 7.679841654618201e-06, "loss": 0.0007, "step": 117940 }, { "epoch": 0.7759715268777589, "grad_norm": 0.009663576081504399, "learning_rate": 7.679356950604027e-06, "loss": 0.0017, "step": 117950 }, { "epoch": 0.7760373150529923, "grad_norm": 0.015159907875503977, "learning_rate": 7.678872211264847e-06, "loss": 0.0019, "step": 117960 }, { "epoch": 0.7761031032282257, "grad_norm": 0.009673382350468067, "learning_rate": 7.67838743660705e-06, "loss": 0.0018, "step": 117970 }, { "epoch": 0.7761688914034591, "grad_norm": 0.31703071661779597, "learning_rate": 7.677902626637028e-06, "loss": 0.0013, "step": 117980 }, { "epoch": 0.7762346795786925, "grad_norm": 0.05074209401991658, "learning_rate": 7.677417781361174e-06, "loss": 0.0027, "step": 117990 }, { "epoch": 0.7763004677539259, "grad_norm": 0.05413421636807346, "learning_rate": 7.676932900785877e-06, "loss": 0.002, "step": 118000 }, { "epoch": 0.7763662559291593, "grad_norm": 0.00940771196104757, "learning_rate": 7.676447984917533e-06, "loss": 0.0022, "step": 118010 }, { "epoch": 0.7764320441043927, "grad_norm": 0.04269616570853953, "learning_rate": 7.675963033762533e-06, "loss": 0.0014, "step": 118020 }, { "epoch": 0.7764978322796261, "grad_norm": 0.053511978195228155, "learning_rate": 7.675478047327272e-06, "loss": 0.0011, "step": 118030 }, { "epoch": 0.7765636204548595, "grad_norm": 0.04625617080753083, "learning_rate": 7.674993025618143e-06, "loss": 0.0016, "step": 118040 }, { "epoch": 0.7766294086300928, "grad_norm": 0.1268207128345256, "learning_rate": 7.674507968641542e-06, "loss": 0.0016, "step": 118050 }, { "epoch": 0.7766951968053262, "grad_norm": 0.030251948790909203, "learning_rate": 7.674022876403862e-06, "loss": 0.0009, "step": 118060 }, { "epoch": 0.7767609849805596, "grad_norm": 0.11409808189753669, "learning_rate": 7.673537748911502e-06, "loss": 0.0016, "step": 118070 }, { "epoch": 0.776826773155793, "grad_norm": 0.012720281791419458, "learning_rate": 7.673052586170853e-06, "loss": 0.0015, "step": 118080 }, { "epoch": 0.7768925613310264, "grad_norm": 0.04382227169019862, "learning_rate": 7.672567388188315e-06, "loss": 0.0017, "step": 118090 }, { "epoch": 0.7769583495062597, "grad_norm": 0.00035234624732177343, "learning_rate": 7.672082154970286e-06, "loss": 0.0023, "step": 118100 }, { "epoch": 0.7770241376814931, "grad_norm": 0.04087946301448027, "learning_rate": 7.67159688652316e-06, "loss": 0.0013, "step": 118110 }, { "epoch": 0.7770899258567265, "grad_norm": 0.0992376284710398, "learning_rate": 7.671111582853337e-06, "loss": 0.0014, "step": 118120 }, { "epoch": 0.7771557140319599, "grad_norm": 0.07944159439473829, "learning_rate": 7.670626243967215e-06, "loss": 0.0007, "step": 118130 }, { "epoch": 0.7772215022071933, "grad_norm": 0.06257649765321037, "learning_rate": 7.670140869871192e-06, "loss": 0.0016, "step": 118140 }, { "epoch": 0.7772872903824266, "grad_norm": 0.019967489475929634, "learning_rate": 7.669655460571667e-06, "loss": 0.0006, "step": 118150 }, { "epoch": 0.77735307855766, "grad_norm": 0.040315131873659905, "learning_rate": 7.669170016075041e-06, "loss": 0.0017, "step": 118160 }, { "epoch": 0.7774188667328934, "grad_norm": 0.048825167252081664, "learning_rate": 7.668684536387713e-06, "loss": 0.0016, "step": 118170 }, { "epoch": 0.7774846549081268, "grad_norm": 0.08705792951297311, "learning_rate": 7.668199021516086e-06, "loss": 0.0025, "step": 118180 }, { "epoch": 0.7775504430833602, "grad_norm": 0.018762988579504813, "learning_rate": 7.66771347146656e-06, "loss": 0.0008, "step": 118190 }, { "epoch": 0.7776162312585936, "grad_norm": 0.032783860095559225, "learning_rate": 7.667227886245533e-06, "loss": 0.0012, "step": 118200 }, { "epoch": 0.777682019433827, "grad_norm": 0.011067818267664623, "learning_rate": 7.66674226585941e-06, "loss": 0.0023, "step": 118210 }, { "epoch": 0.7777478076090604, "grad_norm": 0.03161551092527213, "learning_rate": 7.666256610314595e-06, "loss": 0.0011, "step": 118220 }, { "epoch": 0.7778135957842938, "grad_norm": 0.035762557712573015, "learning_rate": 7.665770919617489e-06, "loss": 0.0018, "step": 118230 }, { "epoch": 0.7778793839595272, "grad_norm": 0.059436727642903, "learning_rate": 7.665285193774496e-06, "loss": 0.0013, "step": 118240 }, { "epoch": 0.7779451721347604, "grad_norm": 0.1743053294022844, "learning_rate": 7.664799432792019e-06, "loss": 0.001, "step": 118250 }, { "epoch": 0.7780109603099938, "grad_norm": 0.0033798180095286528, "learning_rate": 7.664313636676463e-06, "loss": 0.001, "step": 118260 }, { "epoch": 0.7780767484852272, "grad_norm": 0.1063570271846579, "learning_rate": 7.663827805434232e-06, "loss": 0.0015, "step": 118270 }, { "epoch": 0.7781425366604606, "grad_norm": 0.026112695325583187, "learning_rate": 7.663341939071734e-06, "loss": 0.0015, "step": 118280 }, { "epoch": 0.778208324835694, "grad_norm": 0.041273213733219745, "learning_rate": 7.66285603759537e-06, "loss": 0.0018, "step": 118290 }, { "epoch": 0.7782741130109274, "grad_norm": 0.033236110396113655, "learning_rate": 7.662370101011551e-06, "loss": 0.0008, "step": 118300 }, { "epoch": 0.7783399011861608, "grad_norm": 0.09546199701060509, "learning_rate": 7.661884129326681e-06, "loss": 0.0026, "step": 118310 }, { "epoch": 0.7784056893613942, "grad_norm": 0.016583340491405748, "learning_rate": 7.661398122547169e-06, "loss": 0.001, "step": 118320 }, { "epoch": 0.7784714775366276, "grad_norm": 0.04875222413537477, "learning_rate": 7.660912080679418e-06, "loss": 0.001, "step": 118330 }, { "epoch": 0.778537265711861, "grad_norm": 0.04869939813184747, "learning_rate": 7.660426003729842e-06, "loss": 0.0013, "step": 118340 }, { "epoch": 0.7786030538870944, "grad_norm": 0.26364906597763543, "learning_rate": 7.659939891704846e-06, "loss": 0.0007, "step": 118350 }, { "epoch": 0.7786688420623277, "grad_norm": 0.0028806055803376023, "learning_rate": 7.659453744610841e-06, "loss": 0.0008, "step": 118360 }, { "epoch": 0.7787346302375611, "grad_norm": 0.03113351091357111, "learning_rate": 7.658967562454235e-06, "loss": 0.0009, "step": 118370 }, { "epoch": 0.7788004184127945, "grad_norm": 0.09115780013092588, "learning_rate": 7.658481345241438e-06, "loss": 0.0017, "step": 118380 }, { "epoch": 0.7788662065880279, "grad_norm": 0.11246646694309026, "learning_rate": 7.657995092978859e-06, "loss": 0.0009, "step": 118390 }, { "epoch": 0.7789319947632612, "grad_norm": 0.03165299980694926, "learning_rate": 7.65750880567291e-06, "loss": 0.0013, "step": 118400 }, { "epoch": 0.7789977829384946, "grad_norm": 0.06329145286616708, "learning_rate": 7.657022483330005e-06, "loss": 0.0013, "step": 118410 }, { "epoch": 0.779063571113728, "grad_norm": 0.01669630420316362, "learning_rate": 7.656536125956553e-06, "loss": 0.0026, "step": 118420 }, { "epoch": 0.7791293592889614, "grad_norm": 0.038904398090211255, "learning_rate": 7.656049733558964e-06, "loss": 0.0011, "step": 118430 }, { "epoch": 0.7791951474641948, "grad_norm": 0.14974877765319336, "learning_rate": 7.655563306143656e-06, "loss": 0.0019, "step": 118440 }, { "epoch": 0.7792609356394282, "grad_norm": 0.06895912956321096, "learning_rate": 7.655076843717037e-06, "loss": 0.0018, "step": 118450 }, { "epoch": 0.7793267238146615, "grad_norm": 0.06154377750662878, "learning_rate": 7.654590346285525e-06, "loss": 0.0013, "step": 118460 }, { "epoch": 0.7793925119898949, "grad_norm": 0.0024205711348388496, "learning_rate": 7.654103813855529e-06, "loss": 0.001, "step": 118470 }, { "epoch": 0.7794583001651283, "grad_norm": 0.020860737987756336, "learning_rate": 7.653617246433469e-06, "loss": 0.001, "step": 118480 }, { "epoch": 0.7795240883403617, "grad_norm": 0.11881878392092703, "learning_rate": 7.653130644025757e-06, "loss": 0.0012, "step": 118490 }, { "epoch": 0.7795898765155951, "grad_norm": 0.011355591387456307, "learning_rate": 7.65264400663881e-06, "loss": 0.0009, "step": 118500 }, { "epoch": 0.7796556646908285, "grad_norm": 0.02131775232388846, "learning_rate": 7.65215733427904e-06, "loss": 0.0015, "step": 118510 }, { "epoch": 0.7797214528660619, "grad_norm": 0.048680922291347295, "learning_rate": 7.651670626952867e-06, "loss": 0.001, "step": 118520 }, { "epoch": 0.7797872410412953, "grad_norm": 0.012011303579788295, "learning_rate": 7.651183884666708e-06, "loss": 0.0032, "step": 118530 }, { "epoch": 0.7798530292165287, "grad_norm": 0.10747161956975412, "learning_rate": 7.65069710742698e-06, "loss": 0.0027, "step": 118540 }, { "epoch": 0.779918817391762, "grad_norm": 0.017556373014190864, "learning_rate": 7.650210295240097e-06, "loss": 0.0014, "step": 118550 }, { "epoch": 0.7799846055669953, "grad_norm": 0.09796766802196082, "learning_rate": 7.649723448112481e-06, "loss": 0.0016, "step": 118560 }, { "epoch": 0.7800503937422287, "grad_norm": 0.05835587635808526, "learning_rate": 7.649236566050551e-06, "loss": 0.0007, "step": 118570 }, { "epoch": 0.7801161819174621, "grad_norm": 0.030577212146941195, "learning_rate": 7.648749649060725e-06, "loss": 0.0007, "step": 118580 }, { "epoch": 0.7801819700926955, "grad_norm": 0.035260692292683415, "learning_rate": 7.64826269714942e-06, "loss": 0.0008, "step": 118590 }, { "epoch": 0.7802477582679289, "grad_norm": 0.02831461059050762, "learning_rate": 7.647775710323061e-06, "loss": 0.0011, "step": 118600 }, { "epoch": 0.7803135464431623, "grad_norm": 0.013482607796346103, "learning_rate": 7.647288688588066e-06, "loss": 0.002, "step": 118610 }, { "epoch": 0.7803793346183957, "grad_norm": 0.037676039477866886, "learning_rate": 7.646801631950855e-06, "loss": 0.0008, "step": 118620 }, { "epoch": 0.7804451227936291, "grad_norm": 0.0038707716054694288, "learning_rate": 7.64631454041785e-06, "loss": 0.0009, "step": 118630 }, { "epoch": 0.7805109109688625, "grad_norm": 0.027385970297221788, "learning_rate": 7.645827413995475e-06, "loss": 0.0008, "step": 118640 }, { "epoch": 0.7805766991440959, "grad_norm": 0.12521610915018327, "learning_rate": 7.645340252690149e-06, "loss": 0.0014, "step": 118650 }, { "epoch": 0.7806424873193292, "grad_norm": 0.022322946521645078, "learning_rate": 7.644853056508297e-06, "loss": 0.0016, "step": 118660 }, { "epoch": 0.7807082754945626, "grad_norm": 0.028326609519487424, "learning_rate": 7.644365825456342e-06, "loss": 0.0011, "step": 118670 }, { "epoch": 0.780774063669796, "grad_norm": 0.02402132182289952, "learning_rate": 7.643878559540706e-06, "loss": 0.0015, "step": 118680 }, { "epoch": 0.7808398518450294, "grad_norm": 0.013334526217340147, "learning_rate": 7.643391258767817e-06, "loss": 0.0008, "step": 118690 }, { "epoch": 0.7809056400202627, "grad_norm": 0.01582272188621048, "learning_rate": 7.642903923144096e-06, "loss": 0.001, "step": 118700 }, { "epoch": 0.7809714281954961, "grad_norm": 0.004622096451893078, "learning_rate": 7.642416552675971e-06, "loss": 0.0018, "step": 118710 }, { "epoch": 0.7810372163707295, "grad_norm": 0.07788690492671588, "learning_rate": 7.641929147369862e-06, "loss": 0.0012, "step": 118720 }, { "epoch": 0.7811030045459629, "grad_norm": 0.04140106452360846, "learning_rate": 7.641441707232202e-06, "loss": 0.0008, "step": 118730 }, { "epoch": 0.7811687927211963, "grad_norm": 0.02794660042608552, "learning_rate": 7.640954232269413e-06, "loss": 0.001, "step": 118740 }, { "epoch": 0.7812345808964297, "grad_norm": 0.03079895008234, "learning_rate": 7.640466722487924e-06, "loss": 0.0014, "step": 118750 }, { "epoch": 0.7813003690716631, "grad_norm": 0.10210485662170389, "learning_rate": 7.639979177894162e-06, "loss": 0.001, "step": 118760 }, { "epoch": 0.7813661572468964, "grad_norm": 0.07943785493562858, "learning_rate": 7.639491598494555e-06, "loss": 0.0016, "step": 118770 }, { "epoch": 0.7814319454221298, "grad_norm": 0.07546621567946843, "learning_rate": 7.639003984295528e-06, "loss": 0.001, "step": 118780 }, { "epoch": 0.7814977335973632, "grad_norm": 0.08316093087213611, "learning_rate": 7.638516335303517e-06, "loss": 0.0037, "step": 118790 }, { "epoch": 0.7815635217725966, "grad_norm": 0.02392729180145219, "learning_rate": 7.638028651524943e-06, "loss": 0.0011, "step": 118800 }, { "epoch": 0.78162930994783, "grad_norm": 0.0221436638871826, "learning_rate": 7.637540932966243e-06, "loss": 0.0014, "step": 118810 }, { "epoch": 0.7816950981230634, "grad_norm": 0.01296705242351883, "learning_rate": 7.637053179633841e-06, "loss": 0.0012, "step": 118820 }, { "epoch": 0.7817608862982968, "grad_norm": 0.04037578713735681, "learning_rate": 7.636565391534171e-06, "loss": 0.0019, "step": 118830 }, { "epoch": 0.7818266744735302, "grad_norm": 0.02790719731715996, "learning_rate": 7.636077568673663e-06, "loss": 0.0019, "step": 118840 }, { "epoch": 0.7818924626487636, "grad_norm": 0.10083585281207534, "learning_rate": 7.63558971105875e-06, "loss": 0.0011, "step": 118850 }, { "epoch": 0.781958250823997, "grad_norm": 0.06993553598713342, "learning_rate": 7.635101818695862e-06, "loss": 0.0009, "step": 118860 }, { "epoch": 0.7820240389992302, "grad_norm": 0.06158573017342976, "learning_rate": 7.634613891591434e-06, "loss": 0.0024, "step": 118870 }, { "epoch": 0.7820898271744636, "grad_norm": 0.024839832038031175, "learning_rate": 7.634125929751897e-06, "loss": 0.0009, "step": 118880 }, { "epoch": 0.782155615349697, "grad_norm": 0.059558539551739564, "learning_rate": 7.633637933183683e-06, "loss": 0.0016, "step": 118890 }, { "epoch": 0.7822214035249304, "grad_norm": 0.020081706219603118, "learning_rate": 7.633149901893229e-06, "loss": 0.001, "step": 118900 }, { "epoch": 0.7822871917001638, "grad_norm": 0.049554722324139905, "learning_rate": 7.632661835886965e-06, "loss": 0.0023, "step": 118910 }, { "epoch": 0.7823529798753972, "grad_norm": 0.0787322076481972, "learning_rate": 7.63217373517133e-06, "loss": 0.0019, "step": 118920 }, { "epoch": 0.7824187680506306, "grad_norm": 0.11978303870328402, "learning_rate": 7.631685599752758e-06, "loss": 0.0013, "step": 118930 }, { "epoch": 0.782484556225864, "grad_norm": 0.056001437405547125, "learning_rate": 7.631197429637685e-06, "loss": 0.0011, "step": 118940 }, { "epoch": 0.7825503444010974, "grad_norm": 0.0710076615316238, "learning_rate": 7.630709224832544e-06, "loss": 0.0016, "step": 118950 }, { "epoch": 0.7826161325763308, "grad_norm": 0.031097366926198883, "learning_rate": 7.630220985343776e-06, "loss": 0.0025, "step": 118960 }, { "epoch": 0.7826819207515641, "grad_norm": 0.022898243455252362, "learning_rate": 7.629732711177816e-06, "loss": 0.0017, "step": 118970 }, { "epoch": 0.7827477089267975, "grad_norm": 0.10078874150509455, "learning_rate": 7.6292444023411e-06, "loss": 0.0016, "step": 118980 }, { "epoch": 0.7828134971020309, "grad_norm": 0.06430406286687074, "learning_rate": 7.628756058840068e-06, "loss": 0.0012, "step": 118990 }, { "epoch": 0.7828792852772642, "grad_norm": 0.04653957676953153, "learning_rate": 7.628267680681158e-06, "loss": 0.0026, "step": 119000 }, { "epoch": 0.7829450734524976, "grad_norm": 0.02266667039297959, "learning_rate": 7.627779267870807e-06, "loss": 0.0017, "step": 119010 }, { "epoch": 0.783010861627731, "grad_norm": 0.044005176465004646, "learning_rate": 7.627290820415459e-06, "loss": 0.0022, "step": 119020 }, { "epoch": 0.7830766498029644, "grad_norm": 0.03503253032501184, "learning_rate": 7.6268023383215475e-06, "loss": 0.0016, "step": 119030 }, { "epoch": 0.7831424379781978, "grad_norm": 0.05198713739349881, "learning_rate": 7.626313821595516e-06, "loss": 0.0009, "step": 119040 }, { "epoch": 0.7832082261534312, "grad_norm": 0.07028054713333917, "learning_rate": 7.625825270243805e-06, "loss": 0.0016, "step": 119050 }, { "epoch": 0.7832740143286646, "grad_norm": 0.06976988466431631, "learning_rate": 7.6253366842728574e-06, "loss": 0.0011, "step": 119060 }, { "epoch": 0.7833398025038979, "grad_norm": 0.030193644033838164, "learning_rate": 7.624848063689112e-06, "loss": 0.0006, "step": 119070 }, { "epoch": 0.7834055906791313, "grad_norm": 0.17950477858846295, "learning_rate": 7.624359408499013e-06, "loss": 0.0014, "step": 119080 }, { "epoch": 0.7834713788543647, "grad_norm": 0.00708518929985696, "learning_rate": 7.623870718709001e-06, "loss": 0.0029, "step": 119090 }, { "epoch": 0.7835371670295981, "grad_norm": 0.06275535575687699, "learning_rate": 7.623381994325518e-06, "loss": 0.0027, "step": 119100 }, { "epoch": 0.7836029552048315, "grad_norm": 0.1276475585524983, "learning_rate": 7.6228932353550114e-06, "loss": 0.0009, "step": 119110 }, { "epoch": 0.7836687433800649, "grad_norm": 0.12466628607738518, "learning_rate": 7.622404441803921e-06, "loss": 0.0017, "step": 119120 }, { "epoch": 0.7837345315552983, "grad_norm": 0.0697691693629294, "learning_rate": 7.621915613678694e-06, "loss": 0.0011, "step": 119130 }, { "epoch": 0.7838003197305317, "grad_norm": 0.30338600292470436, "learning_rate": 7.621426750985774e-06, "loss": 0.0027, "step": 119140 }, { "epoch": 0.783866107905765, "grad_norm": 0.0010843743193279202, "learning_rate": 7.620937853731607e-06, "loss": 0.0009, "step": 119150 }, { "epoch": 0.7839318960809984, "grad_norm": 0.022975578752933684, "learning_rate": 7.620448921922636e-06, "loss": 0.0022, "step": 119160 }, { "epoch": 0.7839976842562317, "grad_norm": 0.05694827006619187, "learning_rate": 7.61995995556531e-06, "loss": 0.0017, "step": 119170 }, { "epoch": 0.7840634724314651, "grad_norm": 0.044000885216967725, "learning_rate": 7.619470954666075e-06, "loss": 0.0025, "step": 119180 }, { "epoch": 0.7841292606066985, "grad_norm": 0.02726451786273288, "learning_rate": 7.6189819192313785e-06, "loss": 0.0008, "step": 119190 }, { "epoch": 0.7841950487819319, "grad_norm": 0.13582245389650854, "learning_rate": 7.618492849267667e-06, "loss": 0.0019, "step": 119200 }, { "epoch": 0.7842608369571653, "grad_norm": 0.07711161402714971, "learning_rate": 7.6180037447813885e-06, "loss": 0.0022, "step": 119210 }, { "epoch": 0.7843266251323987, "grad_norm": 0.07026666001243624, "learning_rate": 7.6175146057789915e-06, "loss": 0.0015, "step": 119220 }, { "epoch": 0.7843924133076321, "grad_norm": 0.04107549043133419, "learning_rate": 7.617025432266925e-06, "loss": 0.0015, "step": 119230 }, { "epoch": 0.7844582014828655, "grad_norm": 0.07009034707383954, "learning_rate": 7.616536224251638e-06, "loss": 0.0015, "step": 119240 }, { "epoch": 0.7845239896580989, "grad_norm": 0.006681867389968604, "learning_rate": 7.616046981739582e-06, "loss": 0.0022, "step": 119250 }, { "epoch": 0.7845897778333323, "grad_norm": 0.08276605836925534, "learning_rate": 7.615557704737204e-06, "loss": 0.0011, "step": 119260 }, { "epoch": 0.7846555660085657, "grad_norm": 0.08485445084066252, "learning_rate": 7.615068393250958e-06, "loss": 0.0014, "step": 119270 }, { "epoch": 0.784721354183799, "grad_norm": 0.018238744433423708, "learning_rate": 7.614579047287294e-06, "loss": 0.0015, "step": 119280 }, { "epoch": 0.7847871423590324, "grad_norm": 0.021906105913519623, "learning_rate": 7.614089666852663e-06, "loss": 0.0012, "step": 119290 }, { "epoch": 0.7848529305342657, "grad_norm": 0.06566711846593518, "learning_rate": 7.6136002519535164e-06, "loss": 0.001, "step": 119300 }, { "epoch": 0.7849187187094991, "grad_norm": 0.1514643083427232, "learning_rate": 7.6131108025963095e-06, "loss": 0.0016, "step": 119310 }, { "epoch": 0.7849845068847325, "grad_norm": 0.03370897740744996, "learning_rate": 7.612621318787492e-06, "loss": 0.0019, "step": 119320 }, { "epoch": 0.7850502950599659, "grad_norm": 0.07213851835095052, "learning_rate": 7.6121318005335195e-06, "loss": 0.0012, "step": 119330 }, { "epoch": 0.7851160832351993, "grad_norm": 0.037610710845793635, "learning_rate": 7.611642247840846e-06, "loss": 0.0007, "step": 119340 }, { "epoch": 0.7851818714104327, "grad_norm": 0.0006736652961924347, "learning_rate": 7.611152660715924e-06, "loss": 0.0007, "step": 119350 }, { "epoch": 0.7852476595856661, "grad_norm": 0.2920609720707656, "learning_rate": 7.610663039165209e-06, "loss": 0.0017, "step": 119360 }, { "epoch": 0.7853134477608995, "grad_norm": 0.09468821754032745, "learning_rate": 7.6101733831951565e-06, "loss": 0.0013, "step": 119370 }, { "epoch": 0.7853792359361328, "grad_norm": 0.11336153892679743, "learning_rate": 7.609683692812223e-06, "loss": 0.0013, "step": 119380 }, { "epoch": 0.7854450241113662, "grad_norm": 0.05015236953968705, "learning_rate": 7.609193968022863e-06, "loss": 0.0017, "step": 119390 }, { "epoch": 0.7855108122865996, "grad_norm": 0.027441823657218225, "learning_rate": 7.608704208833535e-06, "loss": 0.0031, "step": 119400 }, { "epoch": 0.785576600461833, "grad_norm": 0.014480923335048352, "learning_rate": 7.608214415250695e-06, "loss": 0.0005, "step": 119410 }, { "epoch": 0.7856423886370664, "grad_norm": 0.09259143400520384, "learning_rate": 7.6077245872808e-06, "loss": 0.0008, "step": 119420 }, { "epoch": 0.7857081768122998, "grad_norm": 0.0554195953270224, "learning_rate": 7.607234724930308e-06, "loss": 0.0013, "step": 119430 }, { "epoch": 0.7857739649875332, "grad_norm": 0.0688981982629581, "learning_rate": 7.606744828205679e-06, "loss": 0.0011, "step": 119440 }, { "epoch": 0.7858397531627666, "grad_norm": 0.035241622070767205, "learning_rate": 7.60625489711337e-06, "loss": 0.0009, "step": 119450 }, { "epoch": 0.785905541338, "grad_norm": 0.10481317344417158, "learning_rate": 7.605764931659843e-06, "loss": 0.0012, "step": 119460 }, { "epoch": 0.7859713295132333, "grad_norm": 0.0840142154979839, "learning_rate": 7.605274931851554e-06, "loss": 0.0019, "step": 119470 }, { "epoch": 0.7860371176884666, "grad_norm": 0.08376091980625547, "learning_rate": 7.6047848976949645e-06, "loss": 0.0016, "step": 119480 }, { "epoch": 0.7861029058637, "grad_norm": 0.00958303222176461, "learning_rate": 7.604294829196535e-06, "loss": 0.001, "step": 119490 }, { "epoch": 0.7861686940389334, "grad_norm": 0.01656738692111493, "learning_rate": 7.60380472636273e-06, "loss": 0.001, "step": 119500 }, { "epoch": 0.7862344822141668, "grad_norm": 0.03019602945648854, "learning_rate": 7.603314589200008e-06, "loss": 0.0011, "step": 119510 }, { "epoch": 0.7863002703894002, "grad_norm": 0.12251956087671259, "learning_rate": 7.602824417714832e-06, "loss": 0.0017, "step": 119520 }, { "epoch": 0.7863660585646336, "grad_norm": 0.02234621966181764, "learning_rate": 7.602334211913662e-06, "loss": 0.0012, "step": 119530 }, { "epoch": 0.786431846739867, "grad_norm": 0.07010934200069815, "learning_rate": 7.6018439718029645e-06, "loss": 0.0008, "step": 119540 }, { "epoch": 0.7864976349151004, "grad_norm": 0.02980388773533308, "learning_rate": 7.6013536973892e-06, "loss": 0.0016, "step": 119550 }, { "epoch": 0.7865634230903338, "grad_norm": 0.06960186915197764, "learning_rate": 7.600863388678834e-06, "loss": 0.0027, "step": 119560 }, { "epoch": 0.7866292112655672, "grad_norm": 0.0015620101876531345, "learning_rate": 7.600373045678331e-06, "loss": 0.0015, "step": 119570 }, { "epoch": 0.7866949994408005, "grad_norm": 0.058301269539154155, "learning_rate": 7.599882668394155e-06, "loss": 0.0012, "step": 119580 }, { "epoch": 0.7867607876160339, "grad_norm": 0.1055669561058573, "learning_rate": 7.599392256832772e-06, "loss": 0.0016, "step": 119590 }, { "epoch": 0.7868265757912672, "grad_norm": 0.04025792079236067, "learning_rate": 7.598901811000646e-06, "loss": 0.0008, "step": 119600 }, { "epoch": 0.7868923639665006, "grad_norm": 0.04559450895550704, "learning_rate": 7.598411330904245e-06, "loss": 0.0008, "step": 119610 }, { "epoch": 0.786958152141734, "grad_norm": 0.08669439890502739, "learning_rate": 7.597920816550034e-06, "loss": 0.0014, "step": 119620 }, { "epoch": 0.7870239403169674, "grad_norm": 0.08873657342819898, "learning_rate": 7.597430267944481e-06, "loss": 0.0017, "step": 119630 }, { "epoch": 0.7870897284922008, "grad_norm": 0.04470562422292083, "learning_rate": 7.596939685094054e-06, "loss": 0.0007, "step": 119640 }, { "epoch": 0.7871555166674342, "grad_norm": 0.05521388924707934, "learning_rate": 7.596449068005221e-06, "loss": 0.0037, "step": 119650 }, { "epoch": 0.7872213048426676, "grad_norm": 0.058298725587334344, "learning_rate": 7.595958416684447e-06, "loss": 0.0011, "step": 119660 }, { "epoch": 0.787287093017901, "grad_norm": 0.17653851350648725, "learning_rate": 7.595467731138204e-06, "loss": 0.0014, "step": 119670 }, { "epoch": 0.7873528811931343, "grad_norm": 0.14392711278035222, "learning_rate": 7.59497701137296e-06, "loss": 0.0008, "step": 119680 }, { "epoch": 0.7874186693683677, "grad_norm": 0.024322094186626543, "learning_rate": 7.594486257395187e-06, "loss": 0.0008, "step": 119690 }, { "epoch": 0.7874844575436011, "grad_norm": 0.021125897168164015, "learning_rate": 7.5939954692113525e-06, "loss": 0.0017, "step": 119700 }, { "epoch": 0.7875502457188345, "grad_norm": 0.024636726096307116, "learning_rate": 7.5935046468279285e-06, "loss": 0.0014, "step": 119710 }, { "epoch": 0.7876160338940679, "grad_norm": 0.015249272972842037, "learning_rate": 7.593013790251385e-06, "loss": 0.0019, "step": 119720 }, { "epoch": 0.7876818220693013, "grad_norm": 0.0589868045489224, "learning_rate": 7.592522899488195e-06, "loss": 0.0008, "step": 119730 }, { "epoch": 0.7877476102445347, "grad_norm": 0.03091143910644181, "learning_rate": 7.592031974544829e-06, "loss": 0.001, "step": 119740 }, { "epoch": 0.787813398419768, "grad_norm": 0.029322374480385098, "learning_rate": 7.591541015427759e-06, "loss": 0.0017, "step": 119750 }, { "epoch": 0.7878791865950014, "grad_norm": 0.011013362081019978, "learning_rate": 7.5910500221434605e-06, "loss": 0.0021, "step": 119760 }, { "epoch": 0.7879449747702348, "grad_norm": 0.07599807928857479, "learning_rate": 7.590558994698405e-06, "loss": 0.0008, "step": 119770 }, { "epoch": 0.7880107629454682, "grad_norm": 0.017441880704963976, "learning_rate": 7.5900679330990664e-06, "loss": 0.0021, "step": 119780 }, { "epoch": 0.7880765511207015, "grad_norm": 0.007407382001720837, "learning_rate": 7.58957683735192e-06, "loss": 0.0016, "step": 119790 }, { "epoch": 0.7881423392959349, "grad_norm": 0.06524367157490332, "learning_rate": 7.589085707463437e-06, "loss": 0.0007, "step": 119800 }, { "epoch": 0.7882081274711683, "grad_norm": 0.0005365421553530087, "learning_rate": 7.588594543440096e-06, "loss": 0.0013, "step": 119810 }, { "epoch": 0.7882739156464017, "grad_norm": 0.06651097692013376, "learning_rate": 7.588103345288372e-06, "loss": 0.001, "step": 119820 }, { "epoch": 0.7883397038216351, "grad_norm": 0.030840643939351865, "learning_rate": 7.587612113014742e-06, "loss": 0.0013, "step": 119830 }, { "epoch": 0.7884054919968685, "grad_norm": 0.0279079941084088, "learning_rate": 7.58712084662568e-06, "loss": 0.0023, "step": 119840 }, { "epoch": 0.7884712801721019, "grad_norm": 0.060214961826457754, "learning_rate": 7.586629546127664e-06, "loss": 0.0009, "step": 119850 }, { "epoch": 0.7885370683473353, "grad_norm": 0.015965849933268373, "learning_rate": 7.5861382115271695e-06, "loss": 0.0015, "step": 119860 }, { "epoch": 0.7886028565225687, "grad_norm": 0.0387091029152122, "learning_rate": 7.585646842830679e-06, "loss": 0.0011, "step": 119870 }, { "epoch": 0.7886686446978021, "grad_norm": 0.12836598486796028, "learning_rate": 7.585155440044667e-06, "loss": 0.0041, "step": 119880 }, { "epoch": 0.7887344328730354, "grad_norm": 0.030881997471072096, "learning_rate": 7.584664003175612e-06, "loss": 0.0013, "step": 119890 }, { "epoch": 0.7888002210482687, "grad_norm": 0.10756392205399787, "learning_rate": 7.584172532229997e-06, "loss": 0.0028, "step": 119900 }, { "epoch": 0.7888660092235021, "grad_norm": 0.20385032110997386, "learning_rate": 7.583681027214297e-06, "loss": 0.0016, "step": 119910 }, { "epoch": 0.7889317973987355, "grad_norm": 0.09091995544639636, "learning_rate": 7.583189488134995e-06, "loss": 0.0009, "step": 119920 }, { "epoch": 0.7889975855739689, "grad_norm": 0.12437123979966792, "learning_rate": 7.582697914998569e-06, "loss": 0.0015, "step": 119930 }, { "epoch": 0.7890633737492023, "grad_norm": 0.0734669636476027, "learning_rate": 7.582206307811503e-06, "loss": 0.0019, "step": 119940 }, { "epoch": 0.7891291619244357, "grad_norm": 0.06087634499906915, "learning_rate": 7.581714666580275e-06, "loss": 0.0017, "step": 119950 }, { "epoch": 0.7891949500996691, "grad_norm": 0.030395870856835518, "learning_rate": 7.581222991311372e-06, "loss": 0.0012, "step": 119960 }, { "epoch": 0.7892607382749025, "grad_norm": 0.142178868804003, "learning_rate": 7.580731282011271e-06, "loss": 0.0012, "step": 119970 }, { "epoch": 0.7893265264501359, "grad_norm": 0.01843720711822062, "learning_rate": 7.580239538686458e-06, "loss": 0.0012, "step": 119980 }, { "epoch": 0.7893923146253692, "grad_norm": 0.04593081275494385, "learning_rate": 7.579747761343415e-06, "loss": 0.0016, "step": 119990 }, { "epoch": 0.7894581028006026, "grad_norm": 0.0036544177024163227, "learning_rate": 7.579255949988626e-06, "loss": 0.0012, "step": 120000 }, { "epoch": 0.789523890975836, "grad_norm": 0.026775694698707914, "learning_rate": 7.578764104628573e-06, "loss": 0.0013, "step": 120010 }, { "epoch": 0.7895896791510694, "grad_norm": 0.026449273862727333, "learning_rate": 7.578272225269745e-06, "loss": 0.0016, "step": 120020 }, { "epoch": 0.7896554673263028, "grad_norm": 0.021374706810247997, "learning_rate": 7.577780311918623e-06, "loss": 0.0012, "step": 120030 }, { "epoch": 0.7897212555015362, "grad_norm": 0.022621141818497663, "learning_rate": 7.577288364581694e-06, "loss": 0.0006, "step": 120040 }, { "epoch": 0.7897870436767696, "grad_norm": 0.015529883448441608, "learning_rate": 7.576796383265443e-06, "loss": 0.0008, "step": 120050 }, { "epoch": 0.789852831852003, "grad_norm": 0.02145596496718941, "learning_rate": 7.576304367976359e-06, "loss": 0.0005, "step": 120060 }, { "epoch": 0.7899186200272363, "grad_norm": 0.038273331999050614, "learning_rate": 7.575812318720925e-06, "loss": 0.0009, "step": 120070 }, { "epoch": 0.7899844082024697, "grad_norm": 0.02516465012483401, "learning_rate": 7.575320235505632e-06, "loss": 0.0022, "step": 120080 }, { "epoch": 0.790050196377703, "grad_norm": 0.016247307821006435, "learning_rate": 7.574828118336965e-06, "loss": 0.0008, "step": 120090 }, { "epoch": 0.7901159845529364, "grad_norm": 0.06472378610501328, "learning_rate": 7.574335967221414e-06, "loss": 0.0008, "step": 120100 }, { "epoch": 0.7901817727281698, "grad_norm": 0.04798758278919516, "learning_rate": 7.573843782165465e-06, "loss": 0.0018, "step": 120110 }, { "epoch": 0.7902475609034032, "grad_norm": 0.01960491723595814, "learning_rate": 7.57335156317561e-06, "loss": 0.001, "step": 120120 }, { "epoch": 0.7903133490786366, "grad_norm": 0.061923607419727154, "learning_rate": 7.572859310258336e-06, "loss": 0.001, "step": 120130 }, { "epoch": 0.79037913725387, "grad_norm": 0.030701710632779985, "learning_rate": 7.572367023420135e-06, "loss": 0.0015, "step": 120140 }, { "epoch": 0.7904449254291034, "grad_norm": 0.007976745340265075, "learning_rate": 7.571874702667495e-06, "loss": 0.0008, "step": 120150 }, { "epoch": 0.7905107136043368, "grad_norm": 0.022274131627056964, "learning_rate": 7.57138234800691e-06, "loss": 0.0016, "step": 120160 }, { "epoch": 0.7905765017795702, "grad_norm": 0.16388871070376296, "learning_rate": 7.57088995944487e-06, "loss": 0.0017, "step": 120170 }, { "epoch": 0.7906422899548036, "grad_norm": 0.07334486079140362, "learning_rate": 7.570397536987865e-06, "loss": 0.0017, "step": 120180 }, { "epoch": 0.790708078130037, "grad_norm": 0.031303760294532024, "learning_rate": 7.569905080642388e-06, "loss": 0.0011, "step": 120190 }, { "epoch": 0.7907738663052702, "grad_norm": 0.06858690122029083, "learning_rate": 7.569412590414932e-06, "loss": 0.0018, "step": 120200 }, { "epoch": 0.7908396544805036, "grad_norm": 0.023655438570259125, "learning_rate": 7.568920066311991e-06, "loss": 0.0006, "step": 120210 }, { "epoch": 0.790905442655737, "grad_norm": 0.07996350444874355, "learning_rate": 7.568427508340057e-06, "loss": 0.0016, "step": 120220 }, { "epoch": 0.7909712308309704, "grad_norm": 0.03329802190306965, "learning_rate": 7.5679349165056245e-06, "loss": 0.0019, "step": 120230 }, { "epoch": 0.7910370190062038, "grad_norm": 0.05942723479528849, "learning_rate": 7.5674422908151875e-06, "loss": 0.0009, "step": 120240 }, { "epoch": 0.7911028071814372, "grad_norm": 0.0040872159849940585, "learning_rate": 7.566949631275243e-06, "loss": 0.0039, "step": 120250 }, { "epoch": 0.7911685953566706, "grad_norm": 0.02179094519145733, "learning_rate": 7.566456937892283e-06, "loss": 0.0007, "step": 120260 }, { "epoch": 0.791234383531904, "grad_norm": 0.03733334306692613, "learning_rate": 7.565964210672807e-06, "loss": 0.003, "step": 120270 }, { "epoch": 0.7913001717071374, "grad_norm": 0.10440038091408302, "learning_rate": 7.565471449623307e-06, "loss": 0.0007, "step": 120280 }, { "epoch": 0.7913659598823708, "grad_norm": 0.034195651172375656, "learning_rate": 7.564978654750283e-06, "loss": 0.0015, "step": 120290 }, { "epoch": 0.7914317480576041, "grad_norm": 0.005343735790262877, "learning_rate": 7.5644858260602285e-06, "loss": 0.0014, "step": 120300 }, { "epoch": 0.7914975362328375, "grad_norm": 0.14003405251240073, "learning_rate": 7.563992963559646e-06, "loss": 0.0011, "step": 120310 }, { "epoch": 0.7915633244080709, "grad_norm": 0.1552240495736243, "learning_rate": 7.563500067255027e-06, "loss": 0.0013, "step": 120320 }, { "epoch": 0.7916291125833043, "grad_norm": 0.09627778985429428, "learning_rate": 7.563007137152876e-06, "loss": 0.0018, "step": 120330 }, { "epoch": 0.7916949007585377, "grad_norm": 0.02285651692708313, "learning_rate": 7.56251417325969e-06, "loss": 0.0014, "step": 120340 }, { "epoch": 0.791760688933771, "grad_norm": 0.09935604500620313, "learning_rate": 7.562021175581966e-06, "loss": 0.0018, "step": 120350 }, { "epoch": 0.7918264771090044, "grad_norm": 0.03074068839440757, "learning_rate": 7.561528144126207e-06, "loss": 0.001, "step": 120360 }, { "epoch": 0.7918922652842378, "grad_norm": 0.0469065560290318, "learning_rate": 7.561035078898911e-06, "loss": 0.0018, "step": 120370 }, { "epoch": 0.7919580534594712, "grad_norm": 0.0035772910799257746, "learning_rate": 7.560541979906579e-06, "loss": 0.0007, "step": 120380 }, { "epoch": 0.7920238416347046, "grad_norm": 0.055534811304590974, "learning_rate": 7.560048847155713e-06, "loss": 0.001, "step": 120390 }, { "epoch": 0.7920896298099379, "grad_norm": 0.04036099244284844, "learning_rate": 7.559555680652815e-06, "loss": 0.0008, "step": 120400 }, { "epoch": 0.7921554179851713, "grad_norm": 0.10978257910193208, "learning_rate": 7.559062480404384e-06, "loss": 0.0013, "step": 120410 }, { "epoch": 0.7922212061604047, "grad_norm": 0.1194842890724164, "learning_rate": 7.558569246416924e-06, "loss": 0.0016, "step": 120420 }, { "epoch": 0.7922869943356381, "grad_norm": 0.08534029990082749, "learning_rate": 7.558075978696941e-06, "loss": 0.0014, "step": 120430 }, { "epoch": 0.7923527825108715, "grad_norm": 0.06470614107362367, "learning_rate": 7.557582677250934e-06, "loss": 0.0011, "step": 120440 }, { "epoch": 0.7924185706861049, "grad_norm": 0.05029697919619168, "learning_rate": 7.557089342085406e-06, "loss": 0.0033, "step": 120450 }, { "epoch": 0.7924843588613383, "grad_norm": 0.012302651372485904, "learning_rate": 7.556595973206866e-06, "loss": 0.0017, "step": 120460 }, { "epoch": 0.7925501470365717, "grad_norm": 0.013591839277014671, "learning_rate": 7.556102570621816e-06, "loss": 0.0014, "step": 120470 }, { "epoch": 0.7926159352118051, "grad_norm": 0.027422872397569246, "learning_rate": 7.555609134336761e-06, "loss": 0.0019, "step": 120480 }, { "epoch": 0.7926817233870385, "grad_norm": 0.011573899114102705, "learning_rate": 7.555115664358207e-06, "loss": 0.0017, "step": 120490 }, { "epoch": 0.7927475115622717, "grad_norm": 0.024245482408774784, "learning_rate": 7.55462216069266e-06, "loss": 0.001, "step": 120500 }, { "epoch": 0.7928132997375051, "grad_norm": 0.025944925926487474, "learning_rate": 7.554128623346624e-06, "loss": 0.0012, "step": 120510 }, { "epoch": 0.7928790879127385, "grad_norm": 0.3017930265050713, "learning_rate": 7.55363505232661e-06, "loss": 0.0017, "step": 120520 }, { "epoch": 0.7929448760879719, "grad_norm": 0.05894982442686249, "learning_rate": 7.553141447639123e-06, "loss": 0.0008, "step": 120530 }, { "epoch": 0.7930106642632053, "grad_norm": 0.03723849964428774, "learning_rate": 7.552647809290671e-06, "loss": 0.0012, "step": 120540 }, { "epoch": 0.7930764524384387, "grad_norm": 0.04432272905615432, "learning_rate": 7.552154137287762e-06, "loss": 0.0006, "step": 120550 }, { "epoch": 0.7931422406136721, "grad_norm": 0.08975840204126234, "learning_rate": 7.551660431636905e-06, "loss": 0.0015, "step": 120560 }, { "epoch": 0.7932080287889055, "grad_norm": 0.037739421250377804, "learning_rate": 7.551166692344609e-06, "loss": 0.0008, "step": 120570 }, { "epoch": 0.7932738169641389, "grad_norm": 0.001451539350376569, "learning_rate": 7.550672919417382e-06, "loss": 0.0006, "step": 120580 }, { "epoch": 0.7933396051393723, "grad_norm": 0.046967528966866214, "learning_rate": 7.550179112861737e-06, "loss": 0.0021, "step": 120590 }, { "epoch": 0.7934053933146056, "grad_norm": 0.025466447220459404, "learning_rate": 7.549685272684184e-06, "loss": 0.001, "step": 120600 }, { "epoch": 0.793471181489839, "grad_norm": 0.055231250498611384, "learning_rate": 7.549191398891231e-06, "loss": 0.0012, "step": 120610 }, { "epoch": 0.7935369696650724, "grad_norm": 0.03958817500464607, "learning_rate": 7.548697491489392e-06, "loss": 0.001, "step": 120620 }, { "epoch": 0.7936027578403058, "grad_norm": 0.01908231973957947, "learning_rate": 7.548203550485177e-06, "loss": 0.001, "step": 120630 }, { "epoch": 0.7936685460155392, "grad_norm": 0.01092123612261625, "learning_rate": 7.547709575885098e-06, "loss": 0.0008, "step": 120640 }, { "epoch": 0.7937343341907726, "grad_norm": 0.19719266262825463, "learning_rate": 7.5472155676956704e-06, "loss": 0.0021, "step": 120650 }, { "epoch": 0.793800122366006, "grad_norm": 0.13521199094029446, "learning_rate": 7.546721525923405e-06, "loss": 0.0014, "step": 120660 }, { "epoch": 0.7938659105412393, "grad_norm": 0.13575774620332437, "learning_rate": 7.546227450574815e-06, "loss": 0.0008, "step": 120670 }, { "epoch": 0.7939316987164727, "grad_norm": 0.029338838496591278, "learning_rate": 7.545733341656416e-06, "loss": 0.0018, "step": 120680 }, { "epoch": 0.7939974868917061, "grad_norm": 0.008404534191589397, "learning_rate": 7.545239199174722e-06, "loss": 0.0009, "step": 120690 }, { "epoch": 0.7940632750669395, "grad_norm": 0.005360605946043678, "learning_rate": 7.544745023136246e-06, "loss": 0.0013, "step": 120700 }, { "epoch": 0.7941290632421728, "grad_norm": 0.037379206193727456, "learning_rate": 7.544250813547505e-06, "loss": 0.0009, "step": 120710 }, { "epoch": 0.7941948514174062, "grad_norm": 0.01672313635674732, "learning_rate": 7.543756570415015e-06, "loss": 0.0017, "step": 120720 }, { "epoch": 0.7942606395926396, "grad_norm": 0.008636649468051719, "learning_rate": 7.543262293745291e-06, "loss": 0.0013, "step": 120730 }, { "epoch": 0.794326427767873, "grad_norm": 0.2445491108588707, "learning_rate": 7.542767983544852e-06, "loss": 0.0045, "step": 120740 }, { "epoch": 0.7943922159431064, "grad_norm": 0.03101744161774794, "learning_rate": 7.542273639820211e-06, "loss": 0.0008, "step": 120750 }, { "epoch": 0.7944580041183398, "grad_norm": 0.06651512203446183, "learning_rate": 7.541779262577888e-06, "loss": 0.0021, "step": 120760 }, { "epoch": 0.7945237922935732, "grad_norm": 0.14512911639188747, "learning_rate": 7.5412848518244006e-06, "loss": 0.002, "step": 120770 }, { "epoch": 0.7945895804688066, "grad_norm": 0.035247867021498264, "learning_rate": 7.540790407566267e-06, "loss": 0.0008, "step": 120780 }, { "epoch": 0.79465536864404, "grad_norm": 0.0056291718134931735, "learning_rate": 7.540295929810009e-06, "loss": 0.0009, "step": 120790 }, { "epoch": 0.7947211568192734, "grad_norm": 0.0007752238806470912, "learning_rate": 7.53980141856214e-06, "loss": 0.0009, "step": 120800 }, { "epoch": 0.7947869449945066, "grad_norm": 0.0828202721304986, "learning_rate": 7.539306873829184e-06, "loss": 0.0009, "step": 120810 }, { "epoch": 0.79485273316974, "grad_norm": 0.0337693794396876, "learning_rate": 7.538812295617659e-06, "loss": 0.0009, "step": 120820 }, { "epoch": 0.7949185213449734, "grad_norm": 0.03219478254213839, "learning_rate": 7.538317683934089e-06, "loss": 0.0012, "step": 120830 }, { "epoch": 0.7949843095202068, "grad_norm": 0.01847268875680915, "learning_rate": 7.5378230387849895e-06, "loss": 0.0011, "step": 120840 }, { "epoch": 0.7950500976954402, "grad_norm": 0.08167595049033688, "learning_rate": 7.537328360176887e-06, "loss": 0.0016, "step": 120850 }, { "epoch": 0.7951158858706736, "grad_norm": 0.03131907319271528, "learning_rate": 7.5368336481163e-06, "loss": 0.001, "step": 120860 }, { "epoch": 0.795181674045907, "grad_norm": 0.03177681176932404, "learning_rate": 7.5363389026097545e-06, "loss": 0.0011, "step": 120870 }, { "epoch": 0.7952474622211404, "grad_norm": 0.2782694000247453, "learning_rate": 7.535844123663769e-06, "loss": 0.0026, "step": 120880 }, { "epoch": 0.7953132503963738, "grad_norm": 0.02667703707581718, "learning_rate": 7.535349311284869e-06, "loss": 0.002, "step": 120890 }, { "epoch": 0.7953790385716072, "grad_norm": 0.019128070201719746, "learning_rate": 7.534854465479578e-06, "loss": 0.0013, "step": 120900 }, { "epoch": 0.7954448267468405, "grad_norm": 0.09652026721971174, "learning_rate": 7.5343595862544215e-06, "loss": 0.0009, "step": 120910 }, { "epoch": 0.7955106149220739, "grad_norm": 0.04623708199604265, "learning_rate": 7.533864673615923e-06, "loss": 0.0012, "step": 120920 }, { "epoch": 0.7955764030973073, "grad_norm": 0.02942408181524548, "learning_rate": 7.533369727570605e-06, "loss": 0.0013, "step": 120930 }, { "epoch": 0.7956421912725407, "grad_norm": 0.029090219090189367, "learning_rate": 7.5328747481249964e-06, "loss": 0.0018, "step": 120940 }, { "epoch": 0.795707979447774, "grad_norm": 0.03681307652836779, "learning_rate": 7.532379735285622e-06, "loss": 0.0023, "step": 120950 }, { "epoch": 0.7957737676230074, "grad_norm": 0.0038266135317089117, "learning_rate": 7.5318846890590065e-06, "loss": 0.0012, "step": 120960 }, { "epoch": 0.7958395557982408, "grad_norm": 0.042249518819914546, "learning_rate": 7.531389609451678e-06, "loss": 0.004, "step": 120970 }, { "epoch": 0.7959053439734742, "grad_norm": 0.018037619520800505, "learning_rate": 7.530894496470166e-06, "loss": 0.001, "step": 120980 }, { "epoch": 0.7959711321487076, "grad_norm": 0.01117125481833696, "learning_rate": 7.530399350120994e-06, "loss": 0.0011, "step": 120990 }, { "epoch": 0.796036920323941, "grad_norm": 0.0010985017220457034, "learning_rate": 7.5299041704106934e-06, "loss": 0.0011, "step": 121000 }, { "epoch": 0.7961027084991743, "grad_norm": 0.1651424847288304, "learning_rate": 7.52940895734579e-06, "loss": 0.0009, "step": 121010 }, { "epoch": 0.7961684966744077, "grad_norm": 0.0560883153432493, "learning_rate": 7.5289137109328145e-06, "loss": 0.0013, "step": 121020 }, { "epoch": 0.7962342848496411, "grad_norm": 0.0448229782525424, "learning_rate": 7.528418431178295e-06, "loss": 0.0007, "step": 121030 }, { "epoch": 0.7963000730248745, "grad_norm": 0.03963879302146717, "learning_rate": 7.527923118088764e-06, "loss": 0.0018, "step": 121040 }, { "epoch": 0.7963658612001079, "grad_norm": 0.053739161258471356, "learning_rate": 7.527427771670749e-06, "loss": 0.0015, "step": 121050 }, { "epoch": 0.7964316493753413, "grad_norm": 0.016054166824553955, "learning_rate": 7.526932391930781e-06, "loss": 0.0015, "step": 121060 }, { "epoch": 0.7964974375505747, "grad_norm": 0.06278200936782369, "learning_rate": 7.526436978875393e-06, "loss": 0.0019, "step": 121070 }, { "epoch": 0.7965632257258081, "grad_norm": 0.028408805324222533, "learning_rate": 7.5259415325111155e-06, "loss": 0.0012, "step": 121080 }, { "epoch": 0.7966290139010415, "grad_norm": 0.016676171776924556, "learning_rate": 7.52544605284448e-06, "loss": 0.0014, "step": 121090 }, { "epoch": 0.7966948020762749, "grad_norm": 0.2536956151412732, "learning_rate": 7.524950539882018e-06, "loss": 0.0029, "step": 121100 }, { "epoch": 0.7967605902515081, "grad_norm": 0.09473134343933973, "learning_rate": 7.524454993630265e-06, "loss": 0.001, "step": 121110 }, { "epoch": 0.7968263784267415, "grad_norm": 0.022234729233969932, "learning_rate": 7.5239594140957535e-06, "loss": 0.0015, "step": 121120 }, { "epoch": 0.7968921666019749, "grad_norm": 0.00478441354896499, "learning_rate": 7.523463801285016e-06, "loss": 0.0024, "step": 121130 }, { "epoch": 0.7969579547772083, "grad_norm": 0.18309098887484263, "learning_rate": 7.5229681552045885e-06, "loss": 0.0015, "step": 121140 }, { "epoch": 0.7970237429524417, "grad_norm": 0.10975842678127995, "learning_rate": 7.5224724758610045e-06, "loss": 0.0013, "step": 121150 }, { "epoch": 0.7970895311276751, "grad_norm": 0.002431323938743523, "learning_rate": 7.5219767632607996e-06, "loss": 0.0017, "step": 121160 }, { "epoch": 0.7971553193029085, "grad_norm": 0.05379867209691859, "learning_rate": 7.521481017410509e-06, "loss": 0.0008, "step": 121170 }, { "epoch": 0.7972211074781419, "grad_norm": 0.09299074368234878, "learning_rate": 7.520985238316669e-06, "loss": 0.0023, "step": 121180 }, { "epoch": 0.7972868956533753, "grad_norm": 0.022663446424612356, "learning_rate": 7.520489425985817e-06, "loss": 0.0015, "step": 121190 }, { "epoch": 0.7973526838286087, "grad_norm": 0.03254819298416422, "learning_rate": 7.519993580424488e-06, "loss": 0.0012, "step": 121200 }, { "epoch": 0.7974184720038421, "grad_norm": 0.05957520870164151, "learning_rate": 7.51949770163922e-06, "loss": 0.0006, "step": 121210 }, { "epoch": 0.7974842601790754, "grad_norm": 0.10059480975291442, "learning_rate": 7.519001789636551e-06, "loss": 0.0016, "step": 121220 }, { "epoch": 0.7975500483543088, "grad_norm": 0.05077977368644569, "learning_rate": 7.518505844423019e-06, "loss": 0.0023, "step": 121230 }, { "epoch": 0.7976158365295422, "grad_norm": 0.033774874237047274, "learning_rate": 7.518009866005162e-06, "loss": 0.0012, "step": 121240 }, { "epoch": 0.7976816247047755, "grad_norm": 0.013997632937804122, "learning_rate": 7.51751385438952e-06, "loss": 0.0009, "step": 121250 }, { "epoch": 0.7977474128800089, "grad_norm": 0.007731194493037539, "learning_rate": 7.5170178095826315e-06, "loss": 0.001, "step": 121260 }, { "epoch": 0.7978132010552423, "grad_norm": 0.09437352575456263, "learning_rate": 7.516521731591038e-06, "loss": 0.0013, "step": 121270 }, { "epoch": 0.7978789892304757, "grad_norm": 0.3766306950675294, "learning_rate": 7.516025620421277e-06, "loss": 0.001, "step": 121280 }, { "epoch": 0.7979447774057091, "grad_norm": 0.007118778332205781, "learning_rate": 7.515529476079892e-06, "loss": 0.003, "step": 121290 }, { "epoch": 0.7980105655809425, "grad_norm": 0.02419497174689228, "learning_rate": 7.515033298573424e-06, "loss": 0.0022, "step": 121300 }, { "epoch": 0.7980763537561759, "grad_norm": 0.09300833432882039, "learning_rate": 7.514537087908413e-06, "loss": 0.0012, "step": 121310 }, { "epoch": 0.7981421419314092, "grad_norm": 0.08905376163642648, "learning_rate": 7.514040844091404e-06, "loss": 0.0012, "step": 121320 }, { "epoch": 0.7982079301066426, "grad_norm": 0.2313255970893167, "learning_rate": 7.513544567128937e-06, "loss": 0.0024, "step": 121330 }, { "epoch": 0.798273718281876, "grad_norm": 0.047817048471675276, "learning_rate": 7.513048257027556e-06, "loss": 0.0018, "step": 121340 }, { "epoch": 0.7983395064571094, "grad_norm": 0.03184535023533915, "learning_rate": 7.512551913793802e-06, "loss": 0.0012, "step": 121350 }, { "epoch": 0.7984052946323428, "grad_norm": 0.06339613457292471, "learning_rate": 7.512055537434224e-06, "loss": 0.0013, "step": 121360 }, { "epoch": 0.7984710828075762, "grad_norm": 0.01234623423560138, "learning_rate": 7.5115591279553615e-06, "loss": 0.0009, "step": 121370 }, { "epoch": 0.7985368709828096, "grad_norm": 0.019614303303095222, "learning_rate": 7.511062685363762e-06, "loss": 0.0011, "step": 121380 }, { "epoch": 0.798602659158043, "grad_norm": 0.048612782947834214, "learning_rate": 7.510566209665969e-06, "loss": 0.0015, "step": 121390 }, { "epoch": 0.7986684473332764, "grad_norm": 0.054108331532425294, "learning_rate": 7.51006970086853e-06, "loss": 0.0011, "step": 121400 }, { "epoch": 0.7987342355085097, "grad_norm": 0.0593188269622174, "learning_rate": 7.509573158977987e-06, "loss": 0.0011, "step": 121410 }, { "epoch": 0.798800023683743, "grad_norm": 0.06633829515884389, "learning_rate": 7.5090765840008915e-06, "loss": 0.0015, "step": 121420 }, { "epoch": 0.7988658118589764, "grad_norm": 0.01766021592419194, "learning_rate": 7.508579975943788e-06, "loss": 0.0005, "step": 121430 }, { "epoch": 0.7989316000342098, "grad_norm": 0.014383384867673508, "learning_rate": 7.508083334813223e-06, "loss": 0.0014, "step": 121440 }, { "epoch": 0.7989973882094432, "grad_norm": 0.08332148200080444, "learning_rate": 7.507586660615747e-06, "loss": 0.001, "step": 121450 }, { "epoch": 0.7990631763846766, "grad_norm": 0.03171364970813051, "learning_rate": 7.507089953357907e-06, "loss": 0.0019, "step": 121460 }, { "epoch": 0.79912896455991, "grad_norm": 0.014542162156188365, "learning_rate": 7.506593213046249e-06, "loss": 0.001, "step": 121470 }, { "epoch": 0.7991947527351434, "grad_norm": 0.019398586199225045, "learning_rate": 7.506096439687325e-06, "loss": 0.0016, "step": 121480 }, { "epoch": 0.7992605409103768, "grad_norm": 0.00965897243456696, "learning_rate": 7.505599633287684e-06, "loss": 0.0007, "step": 121490 }, { "epoch": 0.7993263290856102, "grad_norm": 0.026471240927745294, "learning_rate": 7.505102793853877e-06, "loss": 0.0005, "step": 121500 }, { "epoch": 0.7993921172608436, "grad_norm": 0.07460783235172108, "learning_rate": 7.5046059213924514e-06, "loss": 0.0016, "step": 121510 }, { "epoch": 0.7994579054360769, "grad_norm": 0.013131673521434222, "learning_rate": 7.504109015909962e-06, "loss": 0.001, "step": 121520 }, { "epoch": 0.7995236936113103, "grad_norm": 0.03574424063602248, "learning_rate": 7.503612077412956e-06, "loss": 0.0011, "step": 121530 }, { "epoch": 0.7995894817865437, "grad_norm": 0.03893269319274647, "learning_rate": 7.503115105907987e-06, "loss": 0.0014, "step": 121540 }, { "epoch": 0.799655269961777, "grad_norm": 0.020201162210133825, "learning_rate": 7.502618101401608e-06, "loss": 0.0007, "step": 121550 }, { "epoch": 0.7997210581370104, "grad_norm": 0.006819143618035797, "learning_rate": 7.50212106390037e-06, "loss": 0.0016, "step": 121560 }, { "epoch": 0.7997868463122438, "grad_norm": 0.015842053740961835, "learning_rate": 7.501623993410828e-06, "loss": 0.0017, "step": 121570 }, { "epoch": 0.7998526344874772, "grad_norm": 0.008646699120232469, "learning_rate": 7.501126889939534e-06, "loss": 0.0008, "step": 121580 }, { "epoch": 0.7999184226627106, "grad_norm": 0.06067698766557649, "learning_rate": 7.5006297534930406e-06, "loss": 0.0011, "step": 121590 }, { "epoch": 0.799984210837944, "grad_norm": 0.04492531117179125, "learning_rate": 7.500132584077904e-06, "loss": 0.0009, "step": 121600 }, { "epoch": 0.8000499990131774, "grad_norm": 0.05598835139371754, "learning_rate": 7.49963538170068e-06, "loss": 0.0014, "step": 121610 }, { "epoch": 0.8001157871884108, "grad_norm": 0.018149920053098765, "learning_rate": 7.4991381463679215e-06, "loss": 0.0012, "step": 121620 }, { "epoch": 0.8001815753636441, "grad_norm": 0.013824265180147887, "learning_rate": 7.4986408780861855e-06, "loss": 0.0017, "step": 121630 }, { "epoch": 0.8002473635388775, "grad_norm": 0.08016849575092189, "learning_rate": 7.498143576862026e-06, "loss": 0.0008, "step": 121640 }, { "epoch": 0.8003131517141109, "grad_norm": 0.017800887412830375, "learning_rate": 7.497646242702001e-06, "loss": 0.0012, "step": 121650 }, { "epoch": 0.8003789398893443, "grad_norm": 0.05200861351428135, "learning_rate": 7.49714887561267e-06, "loss": 0.0016, "step": 121660 }, { "epoch": 0.8004447280645777, "grad_norm": 0.011818598727348837, "learning_rate": 7.4966514756005845e-06, "loss": 0.0006, "step": 121670 }, { "epoch": 0.8005105162398111, "grad_norm": 0.04826008402849774, "learning_rate": 7.496154042672306e-06, "loss": 0.001, "step": 121680 }, { "epoch": 0.8005763044150445, "grad_norm": 0.019521226298306757, "learning_rate": 7.4956565768343945e-06, "loss": 0.0011, "step": 121690 }, { "epoch": 0.8006420925902779, "grad_norm": 0.045361403033736716, "learning_rate": 7.4951590780934055e-06, "loss": 0.0012, "step": 121700 }, { "epoch": 0.8007078807655112, "grad_norm": 0.12573105395181608, "learning_rate": 7.494661546455899e-06, "loss": 0.0013, "step": 121710 }, { "epoch": 0.8007736689407446, "grad_norm": 0.16369207946082662, "learning_rate": 7.494163981928434e-06, "loss": 0.0014, "step": 121720 }, { "epoch": 0.8008394571159779, "grad_norm": 0.04850599622780548, "learning_rate": 7.493666384517571e-06, "loss": 0.0008, "step": 121730 }, { "epoch": 0.8009052452912113, "grad_norm": 0.04863615674879375, "learning_rate": 7.49316875422987e-06, "loss": 0.0017, "step": 121740 }, { "epoch": 0.8009710334664447, "grad_norm": 0.02377989158943035, "learning_rate": 7.4926710910718925e-06, "loss": 0.0018, "step": 121750 }, { "epoch": 0.8010368216416781, "grad_norm": 0.005341496176698597, "learning_rate": 7.4921733950502004e-06, "loss": 0.0018, "step": 121760 }, { "epoch": 0.8011026098169115, "grad_norm": 0.07198262478143137, "learning_rate": 7.491675666171354e-06, "loss": 0.0026, "step": 121770 }, { "epoch": 0.8011683979921449, "grad_norm": 0.028042068548239412, "learning_rate": 7.491177904441916e-06, "loss": 0.0017, "step": 121780 }, { "epoch": 0.8012341861673783, "grad_norm": 0.04455758661469705, "learning_rate": 7.490680109868449e-06, "loss": 0.0029, "step": 121790 }, { "epoch": 0.8012999743426117, "grad_norm": 0.015743129765064284, "learning_rate": 7.4901822824575145e-06, "loss": 0.0008, "step": 121800 }, { "epoch": 0.8013657625178451, "grad_norm": 0.04141567935291234, "learning_rate": 7.489684422215678e-06, "loss": 0.001, "step": 121810 }, { "epoch": 0.8014315506930785, "grad_norm": 0.004106856004562255, "learning_rate": 7.489186529149504e-06, "loss": 0.0019, "step": 121820 }, { "epoch": 0.8014973388683118, "grad_norm": 0.028194213098092863, "learning_rate": 7.4886886032655535e-06, "loss": 0.0012, "step": 121830 }, { "epoch": 0.8015631270435452, "grad_norm": 0.01392759340521709, "learning_rate": 7.488190644570395e-06, "loss": 0.0015, "step": 121840 }, { "epoch": 0.8016289152187785, "grad_norm": 0.0013337603573923124, "learning_rate": 7.487692653070591e-06, "loss": 0.0014, "step": 121850 }, { "epoch": 0.8016947033940119, "grad_norm": 0.1149067636233918, "learning_rate": 7.487194628772706e-06, "loss": 0.001, "step": 121860 }, { "epoch": 0.8017604915692453, "grad_norm": 0.04133784397965659, "learning_rate": 7.486696571683309e-06, "loss": 0.0013, "step": 121870 }, { "epoch": 0.8018262797444787, "grad_norm": 0.02003766010180005, "learning_rate": 7.486198481808966e-06, "loss": 0.0015, "step": 121880 }, { "epoch": 0.8018920679197121, "grad_norm": 0.026635096993238565, "learning_rate": 7.485700359156244e-06, "loss": 0.001, "step": 121890 }, { "epoch": 0.8019578560949455, "grad_norm": 0.028068099690079944, "learning_rate": 7.485202203731708e-06, "loss": 0.0025, "step": 121900 }, { "epoch": 0.8020236442701789, "grad_norm": 0.13302508190588444, "learning_rate": 7.484704015541928e-06, "loss": 0.0015, "step": 121910 }, { "epoch": 0.8020894324454123, "grad_norm": 0.01534436781229484, "learning_rate": 7.484205794593472e-06, "loss": 0.0013, "step": 121920 }, { "epoch": 0.8021552206206456, "grad_norm": 0.027953778746426444, "learning_rate": 7.483707540892907e-06, "loss": 0.0011, "step": 121930 }, { "epoch": 0.802221008795879, "grad_norm": 0.01275273487682293, "learning_rate": 7.483209254446804e-06, "loss": 0.001, "step": 121940 }, { "epoch": 0.8022867969711124, "grad_norm": 0.0017514417706757234, "learning_rate": 7.482710935261731e-06, "loss": 0.0013, "step": 121950 }, { "epoch": 0.8023525851463458, "grad_norm": 0.03398136134846266, "learning_rate": 7.482212583344259e-06, "loss": 0.0015, "step": 121960 }, { "epoch": 0.8024183733215792, "grad_norm": 0.04889601440794712, "learning_rate": 7.481714198700958e-06, "loss": 0.0014, "step": 121970 }, { "epoch": 0.8024841614968126, "grad_norm": 0.08046046733843795, "learning_rate": 7.481215781338398e-06, "loss": 0.0012, "step": 121980 }, { "epoch": 0.802549949672046, "grad_norm": 0.005732898526544777, "learning_rate": 7.480717331263151e-06, "loss": 0.001, "step": 121990 }, { "epoch": 0.8026157378472794, "grad_norm": 0.06563092644585128, "learning_rate": 7.480218848481789e-06, "loss": 0.0023, "step": 122000 }, { "epoch": 0.8026815260225127, "grad_norm": 0.033459094038625375, "learning_rate": 7.479720333000883e-06, "loss": 0.0012, "step": 122010 }, { "epoch": 0.8027473141977461, "grad_norm": 0.07232916671176039, "learning_rate": 7.479221784827006e-06, "loss": 0.0018, "step": 122020 }, { "epoch": 0.8028131023729794, "grad_norm": 0.06292923239564391, "learning_rate": 7.478723203966731e-06, "loss": 0.0011, "step": 122030 }, { "epoch": 0.8028788905482128, "grad_norm": 0.1687077791986891, "learning_rate": 7.478224590426633e-06, "loss": 0.002, "step": 122040 }, { "epoch": 0.8029446787234462, "grad_norm": 0.047394120512548474, "learning_rate": 7.477725944213282e-06, "loss": 0.0011, "step": 122050 }, { "epoch": 0.8030104668986796, "grad_norm": 0.011112792508723664, "learning_rate": 7.477227265333254e-06, "loss": 0.0018, "step": 122060 }, { "epoch": 0.803076255073913, "grad_norm": 0.09230973379045389, "learning_rate": 7.476728553793126e-06, "loss": 0.0009, "step": 122070 }, { "epoch": 0.8031420432491464, "grad_norm": 0.010613835767962402, "learning_rate": 7.476229809599469e-06, "loss": 0.0009, "step": 122080 }, { "epoch": 0.8032078314243798, "grad_norm": 0.01648535345423147, "learning_rate": 7.475731032758862e-06, "loss": 0.0015, "step": 122090 }, { "epoch": 0.8032736195996132, "grad_norm": 0.05189250753722968, "learning_rate": 7.47523222327788e-06, "loss": 0.0006, "step": 122100 }, { "epoch": 0.8033394077748466, "grad_norm": 0.04413254398448773, "learning_rate": 7.4747333811630975e-06, "loss": 0.0005, "step": 122110 }, { "epoch": 0.80340519595008, "grad_norm": 0.052688472184014026, "learning_rate": 7.474234506421093e-06, "loss": 0.0012, "step": 122120 }, { "epoch": 0.8034709841253134, "grad_norm": 0.0035998620227179743, "learning_rate": 7.473735599058443e-06, "loss": 0.0007, "step": 122130 }, { "epoch": 0.8035367723005467, "grad_norm": 0.0645906863339116, "learning_rate": 7.473236659081728e-06, "loss": 0.0012, "step": 122140 }, { "epoch": 0.80360256047578, "grad_norm": 0.11132387853660082, "learning_rate": 7.472737686497521e-06, "loss": 0.0016, "step": 122150 }, { "epoch": 0.8036683486510134, "grad_norm": 0.09390494638364942, "learning_rate": 7.472238681312405e-06, "loss": 0.0021, "step": 122160 }, { "epoch": 0.8037341368262468, "grad_norm": 0.13290366806380519, "learning_rate": 7.4717396435329556e-06, "loss": 0.0025, "step": 122170 }, { "epoch": 0.8037999250014802, "grad_norm": 0.04861609435577693, "learning_rate": 7.471240573165754e-06, "loss": 0.0018, "step": 122180 }, { "epoch": 0.8038657131767136, "grad_norm": 0.021953837462736103, "learning_rate": 7.470741470217379e-06, "loss": 0.0017, "step": 122190 }, { "epoch": 0.803931501351947, "grad_norm": 0.034809499926647024, "learning_rate": 7.470242334694413e-06, "loss": 0.0008, "step": 122200 }, { "epoch": 0.8039972895271804, "grad_norm": 0.045465523774791995, "learning_rate": 7.4697431666034355e-06, "loss": 0.0011, "step": 122210 }, { "epoch": 0.8040630777024138, "grad_norm": 0.18553204036825433, "learning_rate": 7.469243965951027e-06, "loss": 0.0012, "step": 122220 }, { "epoch": 0.8041288658776472, "grad_norm": 0.1557415470640934, "learning_rate": 7.468744732743769e-06, "loss": 0.0024, "step": 122230 }, { "epoch": 0.8041946540528805, "grad_norm": 0.04430904923847805, "learning_rate": 7.468245466988243e-06, "loss": 0.0014, "step": 122240 }, { "epoch": 0.8042604422281139, "grad_norm": 0.04469069738634408, "learning_rate": 7.4677461686910335e-06, "loss": 0.0018, "step": 122250 }, { "epoch": 0.8043262304033473, "grad_norm": 0.10212356287022442, "learning_rate": 7.467246837858723e-06, "loss": 0.0009, "step": 122260 }, { "epoch": 0.8043920185785807, "grad_norm": 0.0418255228402229, "learning_rate": 7.466747474497892e-06, "loss": 0.0013, "step": 122270 }, { "epoch": 0.8044578067538141, "grad_norm": 0.029469679381542782, "learning_rate": 7.466248078615127e-06, "loss": 0.0009, "step": 122280 }, { "epoch": 0.8045235949290475, "grad_norm": 0.0735225035501408, "learning_rate": 7.46574865021701e-06, "loss": 0.0018, "step": 122290 }, { "epoch": 0.8045893831042809, "grad_norm": 0.023671198070807687, "learning_rate": 7.465249189310127e-06, "loss": 0.0012, "step": 122300 }, { "epoch": 0.8046551712795142, "grad_norm": 0.017064021311340036, "learning_rate": 7.464749695901064e-06, "loss": 0.0007, "step": 122310 }, { "epoch": 0.8047209594547476, "grad_norm": 0.2873372517525403, "learning_rate": 7.464250169996403e-06, "loss": 0.0021, "step": 122320 }, { "epoch": 0.804786747629981, "grad_norm": 0.12587895192937182, "learning_rate": 7.463750611602733e-06, "loss": 0.0018, "step": 122330 }, { "epoch": 0.8048525358052143, "grad_norm": 0.037804087348729344, "learning_rate": 7.463251020726638e-06, "loss": 0.0009, "step": 122340 }, { "epoch": 0.8049183239804477, "grad_norm": 0.05648941830716555, "learning_rate": 7.462751397374707e-06, "loss": 0.0012, "step": 122350 }, { "epoch": 0.8049841121556811, "grad_norm": 0.042316941605016374, "learning_rate": 7.462251741553525e-06, "loss": 0.0006, "step": 122360 }, { "epoch": 0.8050499003309145, "grad_norm": 0.016186231254659508, "learning_rate": 7.46175205326968e-06, "loss": 0.0025, "step": 122370 }, { "epoch": 0.8051156885061479, "grad_norm": 0.30257968449372774, "learning_rate": 7.46125233252976e-06, "loss": 0.0036, "step": 122380 }, { "epoch": 0.8051814766813813, "grad_norm": 0.0263848608386675, "learning_rate": 7.460752579340355e-06, "loss": 0.0007, "step": 122390 }, { "epoch": 0.8052472648566147, "grad_norm": 0.09491497800480832, "learning_rate": 7.4602527937080515e-06, "loss": 0.0016, "step": 122400 }, { "epoch": 0.8053130530318481, "grad_norm": 0.09234318025535641, "learning_rate": 7.459752975639441e-06, "loss": 0.001, "step": 122410 }, { "epoch": 0.8053788412070815, "grad_norm": 0.07384898243229654, "learning_rate": 7.459253125141111e-06, "loss": 0.0012, "step": 122420 }, { "epoch": 0.8054446293823149, "grad_norm": 0.0763491825066212, "learning_rate": 7.458753242219652e-06, "loss": 0.0018, "step": 122430 }, { "epoch": 0.8055104175575482, "grad_norm": 0.031090456192184823, "learning_rate": 7.458253326881655e-06, "loss": 0.0017, "step": 122440 }, { "epoch": 0.8055762057327815, "grad_norm": 0.012336993301104196, "learning_rate": 7.457753379133711e-06, "loss": 0.0011, "step": 122450 }, { "epoch": 0.8056419939080149, "grad_norm": 0.20580639791374408, "learning_rate": 7.457253398982411e-06, "loss": 0.0009, "step": 122460 }, { "epoch": 0.8057077820832483, "grad_norm": 0.03868255429237003, "learning_rate": 7.456753386434349e-06, "loss": 0.0007, "step": 122470 }, { "epoch": 0.8057735702584817, "grad_norm": 0.016184368416133688, "learning_rate": 7.456253341496115e-06, "loss": 0.001, "step": 122480 }, { "epoch": 0.8058393584337151, "grad_norm": 0.05243567177041303, "learning_rate": 7.455753264174301e-06, "loss": 0.0016, "step": 122490 }, { "epoch": 0.8059051466089485, "grad_norm": 0.003554884599497107, "learning_rate": 7.455253154475501e-06, "loss": 0.0019, "step": 122500 }, { "epoch": 0.8059709347841819, "grad_norm": 0.09974215385214956, "learning_rate": 7.454753012406309e-06, "loss": 0.0016, "step": 122510 }, { "epoch": 0.8060367229594153, "grad_norm": 0.03728340694001773, "learning_rate": 7.4542528379733195e-06, "loss": 0.001, "step": 122520 }, { "epoch": 0.8061025111346487, "grad_norm": 0.02128766988824569, "learning_rate": 7.453752631183125e-06, "loss": 0.0011, "step": 122530 }, { "epoch": 0.8061682993098821, "grad_norm": 0.02677854081405156, "learning_rate": 7.4532523920423215e-06, "loss": 0.0005, "step": 122540 }, { "epoch": 0.8062340874851154, "grad_norm": 0.03551322754693762, "learning_rate": 7.4527521205575035e-06, "loss": 0.0006, "step": 122550 }, { "epoch": 0.8062998756603488, "grad_norm": 0.1198045443054747, "learning_rate": 7.452251816735269e-06, "loss": 0.0017, "step": 122560 }, { "epoch": 0.8063656638355822, "grad_norm": 0.02061267315480052, "learning_rate": 7.451751480582209e-06, "loss": 0.0012, "step": 122570 }, { "epoch": 0.8064314520108156, "grad_norm": 0.024629211570843516, "learning_rate": 7.451251112104926e-06, "loss": 0.002, "step": 122580 }, { "epoch": 0.806497240186049, "grad_norm": 0.046854263192664135, "learning_rate": 7.450750711310013e-06, "loss": 0.0012, "step": 122590 }, { "epoch": 0.8065630283612824, "grad_norm": 0.05974176252443556, "learning_rate": 7.450250278204069e-06, "loss": 0.0009, "step": 122600 }, { "epoch": 0.8066288165365157, "grad_norm": 0.02696594554224983, "learning_rate": 7.449749812793691e-06, "loss": 0.0019, "step": 122610 }, { "epoch": 0.8066946047117491, "grad_norm": 0.02148131421176359, "learning_rate": 7.4492493150854775e-06, "loss": 0.0021, "step": 122620 }, { "epoch": 0.8067603928869825, "grad_norm": 0.03460170437424757, "learning_rate": 7.448748785086025e-06, "loss": 0.0013, "step": 122630 }, { "epoch": 0.8068261810622159, "grad_norm": 0.03766150042001916, "learning_rate": 7.448248222801937e-06, "loss": 0.0013, "step": 122640 }, { "epoch": 0.8068919692374492, "grad_norm": 0.03919035825925163, "learning_rate": 7.44774762823981e-06, "loss": 0.0011, "step": 122650 }, { "epoch": 0.8069577574126826, "grad_norm": 0.010417723111653902, "learning_rate": 7.447247001406246e-06, "loss": 0.0016, "step": 122660 }, { "epoch": 0.807023545587916, "grad_norm": 0.01832684331749719, "learning_rate": 7.446746342307842e-06, "loss": 0.0014, "step": 122670 }, { "epoch": 0.8070893337631494, "grad_norm": 0.09364973311128544, "learning_rate": 7.446245650951201e-06, "loss": 0.0013, "step": 122680 }, { "epoch": 0.8071551219383828, "grad_norm": 0.09395275522789445, "learning_rate": 7.4457449273429236e-06, "loss": 0.0024, "step": 122690 }, { "epoch": 0.8072209101136162, "grad_norm": 0.02702478180985196, "learning_rate": 7.445244171489612e-06, "loss": 0.0006, "step": 122700 }, { "epoch": 0.8072866982888496, "grad_norm": 0.11492501511386727, "learning_rate": 7.444743383397868e-06, "loss": 0.0015, "step": 122710 }, { "epoch": 0.807352486464083, "grad_norm": 0.0030638465559876724, "learning_rate": 7.444242563074294e-06, "loss": 0.0007, "step": 122720 }, { "epoch": 0.8074182746393164, "grad_norm": 0.04042896018770578, "learning_rate": 7.443741710525493e-06, "loss": 0.0023, "step": 122730 }, { "epoch": 0.8074840628145498, "grad_norm": 0.053476908541863974, "learning_rate": 7.443240825758068e-06, "loss": 0.0014, "step": 122740 }, { "epoch": 0.807549850989783, "grad_norm": 0.005163493901567496, "learning_rate": 7.442739908778623e-06, "loss": 0.0007, "step": 122750 }, { "epoch": 0.8076156391650164, "grad_norm": 0.04299369312058248, "learning_rate": 7.442238959593759e-06, "loss": 0.0014, "step": 122760 }, { "epoch": 0.8076814273402498, "grad_norm": 0.026904133229734412, "learning_rate": 7.441737978210088e-06, "loss": 0.0017, "step": 122770 }, { "epoch": 0.8077472155154832, "grad_norm": 0.058912105097875075, "learning_rate": 7.441236964634209e-06, "loss": 0.0013, "step": 122780 }, { "epoch": 0.8078130036907166, "grad_norm": 0.01979675292328955, "learning_rate": 7.440735918872728e-06, "loss": 0.001, "step": 122790 }, { "epoch": 0.80787879186595, "grad_norm": 0.0681196044348807, "learning_rate": 7.440234840932253e-06, "loss": 0.0019, "step": 122800 }, { "epoch": 0.8079445800411834, "grad_norm": 0.01400653130015817, "learning_rate": 7.439733730819389e-06, "loss": 0.0008, "step": 122810 }, { "epoch": 0.8080103682164168, "grad_norm": 0.024011459393645327, "learning_rate": 7.4392325885407415e-06, "loss": 0.0012, "step": 122820 }, { "epoch": 0.8080761563916502, "grad_norm": 0.027211558704916324, "learning_rate": 7.43873141410292e-06, "loss": 0.0014, "step": 122830 }, { "epoch": 0.8081419445668836, "grad_norm": 0.005353888608055072, "learning_rate": 7.438230207512531e-06, "loss": 0.0014, "step": 122840 }, { "epoch": 0.8082077327421169, "grad_norm": 0.038039031531737696, "learning_rate": 7.437728968776182e-06, "loss": 0.0013, "step": 122850 }, { "epoch": 0.8082735209173503, "grad_norm": 0.032177419881067496, "learning_rate": 7.437227697900483e-06, "loss": 0.0025, "step": 122860 }, { "epoch": 0.8083393090925837, "grad_norm": 0.04068265015128954, "learning_rate": 7.436726394892042e-06, "loss": 0.0005, "step": 122870 }, { "epoch": 0.8084050972678171, "grad_norm": 0.007969040893345877, "learning_rate": 7.436225059757467e-06, "loss": 0.0008, "step": 122880 }, { "epoch": 0.8084708854430505, "grad_norm": 0.01696663869763851, "learning_rate": 7.435723692503367e-06, "loss": 0.0016, "step": 122890 }, { "epoch": 0.8085366736182839, "grad_norm": 0.03345348151839068, "learning_rate": 7.435222293136357e-06, "loss": 0.0009, "step": 122900 }, { "epoch": 0.8086024617935172, "grad_norm": 0.009001305757839313, "learning_rate": 7.4347208616630425e-06, "loss": 0.0008, "step": 122910 }, { "epoch": 0.8086682499687506, "grad_norm": 0.017994405782189134, "learning_rate": 7.434219398090036e-06, "loss": 0.0009, "step": 122920 }, { "epoch": 0.808734038143984, "grad_norm": 0.013298951742700507, "learning_rate": 7.433717902423951e-06, "loss": 0.001, "step": 122930 }, { "epoch": 0.8087998263192174, "grad_norm": 0.03152057470329743, "learning_rate": 7.4332163746713945e-06, "loss": 0.0008, "step": 122940 }, { "epoch": 0.8088656144944507, "grad_norm": 0.007687925932831934, "learning_rate": 7.432714814838984e-06, "loss": 0.0013, "step": 122950 }, { "epoch": 0.8089314026696841, "grad_norm": 0.012133327402157647, "learning_rate": 7.4322132229333275e-06, "loss": 0.0006, "step": 122960 }, { "epoch": 0.8089971908449175, "grad_norm": 0.022320854007532018, "learning_rate": 7.431711598961042e-06, "loss": 0.0016, "step": 122970 }, { "epoch": 0.8090629790201509, "grad_norm": 0.03616389829381254, "learning_rate": 7.4312099429287385e-06, "loss": 0.0011, "step": 122980 }, { "epoch": 0.8091287671953843, "grad_norm": 0.14536845106798518, "learning_rate": 7.430708254843032e-06, "loss": 0.0014, "step": 122990 }, { "epoch": 0.8091945553706177, "grad_norm": 0.04729396648805421, "learning_rate": 7.430206534710537e-06, "loss": 0.0013, "step": 123000 }, { "epoch": 0.8092603435458511, "grad_norm": 0.03928974924367276, "learning_rate": 7.429704782537867e-06, "loss": 0.0009, "step": 123010 }, { "epoch": 0.8093261317210845, "grad_norm": 0.05838942356273529, "learning_rate": 7.429202998331637e-06, "loss": 0.0023, "step": 123020 }, { "epoch": 0.8093919198963179, "grad_norm": 0.017316195012035816, "learning_rate": 7.428701182098464e-06, "loss": 0.0011, "step": 123030 }, { "epoch": 0.8094577080715513, "grad_norm": 0.07817346540586968, "learning_rate": 7.428199333844964e-06, "loss": 0.001, "step": 123040 }, { "epoch": 0.8095234962467847, "grad_norm": 0.02271120231088103, "learning_rate": 7.4276974535777536e-06, "loss": 0.001, "step": 123050 }, { "epoch": 0.8095892844220179, "grad_norm": 0.06267719052918004, "learning_rate": 7.427195541303448e-06, "loss": 0.0006, "step": 123060 }, { "epoch": 0.8096550725972513, "grad_norm": 0.09136577496220578, "learning_rate": 7.426693597028666e-06, "loss": 0.0021, "step": 123070 }, { "epoch": 0.8097208607724847, "grad_norm": 0.057316858071973564, "learning_rate": 7.426191620760024e-06, "loss": 0.0005, "step": 123080 }, { "epoch": 0.8097866489477181, "grad_norm": 0.0992845524483582, "learning_rate": 7.425689612504141e-06, "loss": 0.0013, "step": 123090 }, { "epoch": 0.8098524371229515, "grad_norm": 0.015113932092910778, "learning_rate": 7.425187572267636e-06, "loss": 0.0012, "step": 123100 }, { "epoch": 0.8099182252981849, "grad_norm": 0.08466855186295387, "learning_rate": 7.424685500057127e-06, "loss": 0.0011, "step": 123110 }, { "epoch": 0.8099840134734183, "grad_norm": 0.02923334961916481, "learning_rate": 7.424183395879235e-06, "loss": 0.0036, "step": 123120 }, { "epoch": 0.8100498016486517, "grad_norm": 0.04409644797358728, "learning_rate": 7.423681259740576e-06, "loss": 0.0012, "step": 123130 }, { "epoch": 0.8101155898238851, "grad_norm": 0.09023794208061009, "learning_rate": 7.423179091647774e-06, "loss": 0.001, "step": 123140 }, { "epoch": 0.8101813779991185, "grad_norm": 0.17394402877632517, "learning_rate": 7.422676891607449e-06, "loss": 0.0011, "step": 123150 }, { "epoch": 0.8102471661743518, "grad_norm": 0.07401988271291685, "learning_rate": 7.42217465962622e-06, "loss": 0.0011, "step": 123160 }, { "epoch": 0.8103129543495852, "grad_norm": 0.06381067053575341, "learning_rate": 7.421672395710712e-06, "loss": 0.0035, "step": 123170 }, { "epoch": 0.8103787425248186, "grad_norm": 0.051186815513406536, "learning_rate": 7.421170099867544e-06, "loss": 0.0015, "step": 123180 }, { "epoch": 0.810444530700052, "grad_norm": 0.046539432016675236, "learning_rate": 7.42066777210334e-06, "loss": 0.0019, "step": 123190 }, { "epoch": 0.8105103188752854, "grad_norm": 0.262947243103269, "learning_rate": 7.42016541242472e-06, "loss": 0.0015, "step": 123200 }, { "epoch": 0.8105761070505187, "grad_norm": 0.0031156583973349053, "learning_rate": 7.4196630208383115e-06, "loss": 0.0008, "step": 123210 }, { "epoch": 0.8106418952257521, "grad_norm": 0.047666383655231875, "learning_rate": 7.419160597350735e-06, "loss": 0.0012, "step": 123220 }, { "epoch": 0.8107076834009855, "grad_norm": 0.06296734252971489, "learning_rate": 7.4186581419686155e-06, "loss": 0.0011, "step": 123230 }, { "epoch": 0.8107734715762189, "grad_norm": 0.041603904153226845, "learning_rate": 7.4181556546985776e-06, "loss": 0.0018, "step": 123240 }, { "epoch": 0.8108392597514523, "grad_norm": 0.17855202837501952, "learning_rate": 7.417653135547246e-06, "loss": 0.0024, "step": 123250 }, { "epoch": 0.8109050479266856, "grad_norm": 0.111116286537081, "learning_rate": 7.417150584521244e-06, "loss": 0.0015, "step": 123260 }, { "epoch": 0.810970836101919, "grad_norm": 0.014706688893579959, "learning_rate": 7.4166480016272e-06, "loss": 0.0011, "step": 123270 }, { "epoch": 0.8110366242771524, "grad_norm": 0.022987889162973306, "learning_rate": 7.41614538687174e-06, "loss": 0.0017, "step": 123280 }, { "epoch": 0.8111024124523858, "grad_norm": 0.014668162741548595, "learning_rate": 7.41564274026149e-06, "loss": 0.0016, "step": 123290 }, { "epoch": 0.8111682006276192, "grad_norm": 0.05516979578906562, "learning_rate": 7.415140061803077e-06, "loss": 0.0059, "step": 123300 }, { "epoch": 0.8112339888028526, "grad_norm": 0.0036256015746894865, "learning_rate": 7.414637351503126e-06, "loss": 0.0012, "step": 123310 }, { "epoch": 0.811299776978086, "grad_norm": 0.09380644724988628, "learning_rate": 7.414134609368268e-06, "loss": 0.0013, "step": 123320 }, { "epoch": 0.8113655651533194, "grad_norm": 0.11546179716344304, "learning_rate": 7.413631835405131e-06, "loss": 0.0017, "step": 123330 }, { "epoch": 0.8114313533285528, "grad_norm": 0.05477687705001316, "learning_rate": 7.413129029620343e-06, "loss": 0.0008, "step": 123340 }, { "epoch": 0.8114971415037862, "grad_norm": 0.054250568687690244, "learning_rate": 7.412626192020531e-06, "loss": 0.0022, "step": 123350 }, { "epoch": 0.8115629296790194, "grad_norm": 0.04492625595729937, "learning_rate": 7.412123322612328e-06, "loss": 0.0008, "step": 123360 }, { "epoch": 0.8116287178542528, "grad_norm": 0.11320272654398782, "learning_rate": 7.411620421402362e-06, "loss": 0.001, "step": 123370 }, { "epoch": 0.8116945060294862, "grad_norm": 0.027590851010561915, "learning_rate": 7.4111174883972635e-06, "loss": 0.001, "step": 123380 }, { "epoch": 0.8117602942047196, "grad_norm": 0.10604188106798668, "learning_rate": 7.410614523603663e-06, "loss": 0.0014, "step": 123390 }, { "epoch": 0.811826082379953, "grad_norm": 0.058874617666268676, "learning_rate": 7.4101115270281905e-06, "loss": 0.0016, "step": 123400 }, { "epoch": 0.8118918705551864, "grad_norm": 0.07807538807526761, "learning_rate": 7.409608498677481e-06, "loss": 0.0019, "step": 123410 }, { "epoch": 0.8119576587304198, "grad_norm": 0.05521928868485467, "learning_rate": 7.409105438558164e-06, "loss": 0.0014, "step": 123420 }, { "epoch": 0.8120234469056532, "grad_norm": 0.06759824842202221, "learning_rate": 7.408602346676873e-06, "loss": 0.0019, "step": 123430 }, { "epoch": 0.8120892350808866, "grad_norm": 0.05388239682491312, "learning_rate": 7.40809922304024e-06, "loss": 0.0018, "step": 123440 }, { "epoch": 0.81215502325612, "grad_norm": 0.011794586459420886, "learning_rate": 7.407596067654898e-06, "loss": 0.0016, "step": 123450 }, { "epoch": 0.8122208114313533, "grad_norm": 0.015332475058381268, "learning_rate": 7.4070928805274815e-06, "loss": 0.0007, "step": 123460 }, { "epoch": 0.8122865996065867, "grad_norm": 0.07076556809655478, "learning_rate": 7.406589661664624e-06, "loss": 0.0012, "step": 123470 }, { "epoch": 0.8123523877818201, "grad_norm": 0.04593618103341573, "learning_rate": 7.40608641107296e-06, "loss": 0.0011, "step": 123480 }, { "epoch": 0.8124181759570535, "grad_norm": 0.05723520338436025, "learning_rate": 7.405583128759126e-06, "loss": 0.0012, "step": 123490 }, { "epoch": 0.8124839641322869, "grad_norm": 0.025909922958110707, "learning_rate": 7.405079814729754e-06, "loss": 0.0014, "step": 123500 }, { "epoch": 0.8125497523075202, "grad_norm": 0.05317768270110162, "learning_rate": 7.404576468991484e-06, "loss": 0.0015, "step": 123510 }, { "epoch": 0.8126155404827536, "grad_norm": 0.03193422949787353, "learning_rate": 7.40407309155095e-06, "loss": 0.0008, "step": 123520 }, { "epoch": 0.812681328657987, "grad_norm": 0.04225048478516951, "learning_rate": 7.403569682414787e-06, "loss": 0.001, "step": 123530 }, { "epoch": 0.8127471168332204, "grad_norm": 0.014719622111462924, "learning_rate": 7.403066241589633e-06, "loss": 0.0012, "step": 123540 }, { "epoch": 0.8128129050084538, "grad_norm": 0.06571966486456252, "learning_rate": 7.4025627690821275e-06, "loss": 0.0016, "step": 123550 }, { "epoch": 0.8128786931836872, "grad_norm": 0.007323051127743239, "learning_rate": 7.402059264898906e-06, "loss": 0.0011, "step": 123560 }, { "epoch": 0.8129444813589205, "grad_norm": 0.005609017391561003, "learning_rate": 7.401555729046609e-06, "loss": 0.0011, "step": 123570 }, { "epoch": 0.8130102695341539, "grad_norm": 0.019916476220262023, "learning_rate": 7.401052161531873e-06, "loss": 0.0008, "step": 123580 }, { "epoch": 0.8130760577093873, "grad_norm": 0.11565283728400837, "learning_rate": 7.400548562361339e-06, "loss": 0.0017, "step": 123590 }, { "epoch": 0.8131418458846207, "grad_norm": 0.07781867952276511, "learning_rate": 7.400044931541644e-06, "loss": 0.0014, "step": 123600 }, { "epoch": 0.8132076340598541, "grad_norm": 0.22894559997152336, "learning_rate": 7.3995412690794285e-06, "loss": 0.002, "step": 123610 }, { "epoch": 0.8132734222350875, "grad_norm": 0.03665409096165001, "learning_rate": 7.399037574981336e-06, "loss": 0.001, "step": 123620 }, { "epoch": 0.8133392104103209, "grad_norm": 0.07542747309782365, "learning_rate": 7.398533849254004e-06, "loss": 0.0017, "step": 123630 }, { "epoch": 0.8134049985855543, "grad_norm": 0.007908910165976705, "learning_rate": 7.398030091904075e-06, "loss": 0.0018, "step": 123640 }, { "epoch": 0.8134707867607877, "grad_norm": 0.020672943366703982, "learning_rate": 7.3975263029381904e-06, "loss": 0.0013, "step": 123650 }, { "epoch": 0.813536574936021, "grad_norm": 0.07377061745750585, "learning_rate": 7.397022482362991e-06, "loss": 0.002, "step": 123660 }, { "epoch": 0.8136023631112543, "grad_norm": 0.010617469100592232, "learning_rate": 7.396518630185121e-06, "loss": 0.0013, "step": 123670 }, { "epoch": 0.8136681512864877, "grad_norm": 0.008561085010278417, "learning_rate": 7.396014746411223e-06, "loss": 0.0012, "step": 123680 }, { "epoch": 0.8137339394617211, "grad_norm": 0.11464659797568784, "learning_rate": 7.395510831047941e-06, "loss": 0.0014, "step": 123690 }, { "epoch": 0.8137997276369545, "grad_norm": 0.04448896489464944, "learning_rate": 7.395006884101916e-06, "loss": 0.0005, "step": 123700 }, { "epoch": 0.8138655158121879, "grad_norm": 0.01776403792207682, "learning_rate": 7.394502905579795e-06, "loss": 0.001, "step": 123710 }, { "epoch": 0.8139313039874213, "grad_norm": 0.003198288704128079, "learning_rate": 7.393998895488219e-06, "loss": 0.0026, "step": 123720 }, { "epoch": 0.8139970921626547, "grad_norm": 0.014608893075543236, "learning_rate": 7.393494853833836e-06, "loss": 0.0019, "step": 123730 }, { "epoch": 0.8140628803378881, "grad_norm": 0.03159052942034768, "learning_rate": 7.392990780623291e-06, "loss": 0.0005, "step": 123740 }, { "epoch": 0.8141286685131215, "grad_norm": 0.013003347933508668, "learning_rate": 7.39248667586323e-06, "loss": 0.0009, "step": 123750 }, { "epoch": 0.8141944566883549, "grad_norm": 0.046920799558365464, "learning_rate": 7.391982539560297e-06, "loss": 0.0008, "step": 123760 }, { "epoch": 0.8142602448635882, "grad_norm": 0.05047096117920603, "learning_rate": 7.391478371721142e-06, "loss": 0.0018, "step": 123770 }, { "epoch": 0.8143260330388216, "grad_norm": 0.010006467663944925, "learning_rate": 7.390974172352407e-06, "loss": 0.002, "step": 123780 }, { "epoch": 0.814391821214055, "grad_norm": 0.01859576914255316, "learning_rate": 7.390469941460745e-06, "loss": 0.0012, "step": 123790 }, { "epoch": 0.8144576093892884, "grad_norm": 0.005621133071237839, "learning_rate": 7.389965679052801e-06, "loss": 0.0006, "step": 123800 }, { "epoch": 0.8145233975645217, "grad_norm": 0.00684242268703851, "learning_rate": 7.389461385135224e-06, "loss": 0.0013, "step": 123810 }, { "epoch": 0.8145891857397551, "grad_norm": 0.013692583258188604, "learning_rate": 7.388957059714661e-06, "loss": 0.0017, "step": 123820 }, { "epoch": 0.8146549739149885, "grad_norm": 0.021110528767158878, "learning_rate": 7.388452702797764e-06, "loss": 0.0007, "step": 123830 }, { "epoch": 0.8147207620902219, "grad_norm": 0.03561726809693696, "learning_rate": 7.387948314391179e-06, "loss": 0.0012, "step": 123840 }, { "epoch": 0.8147865502654553, "grad_norm": 0.1111479175113836, "learning_rate": 7.387443894501558e-06, "loss": 0.0033, "step": 123850 }, { "epoch": 0.8148523384406887, "grad_norm": 0.07553434381612828, "learning_rate": 7.386939443135552e-06, "loss": 0.0012, "step": 123860 }, { "epoch": 0.814918126615922, "grad_norm": 0.005234037703629473, "learning_rate": 7.386434960299811e-06, "loss": 0.0019, "step": 123870 }, { "epoch": 0.8149839147911554, "grad_norm": 0.08743903222506863, "learning_rate": 7.385930446000986e-06, "loss": 0.0017, "step": 123880 }, { "epoch": 0.8150497029663888, "grad_norm": 0.10583184042003799, "learning_rate": 7.385425900245729e-06, "loss": 0.0015, "step": 123890 }, { "epoch": 0.8151154911416222, "grad_norm": 0.1554006997826174, "learning_rate": 7.384921323040692e-06, "loss": 0.0018, "step": 123900 }, { "epoch": 0.8151812793168556, "grad_norm": 0.10869752238673472, "learning_rate": 7.384416714392527e-06, "loss": 0.001, "step": 123910 }, { "epoch": 0.815247067492089, "grad_norm": 0.06222993354517922, "learning_rate": 7.383912074307885e-06, "loss": 0.002, "step": 123920 }, { "epoch": 0.8153128556673224, "grad_norm": 0.03159041856479958, "learning_rate": 7.383407402793423e-06, "loss": 0.0011, "step": 123930 }, { "epoch": 0.8153786438425558, "grad_norm": 0.12837682196797326, "learning_rate": 7.382902699855793e-06, "loss": 0.0009, "step": 123940 }, { "epoch": 0.8154444320177892, "grad_norm": 0.015055284353371959, "learning_rate": 7.3823979655016484e-06, "loss": 0.0009, "step": 123950 }, { "epoch": 0.8155102201930226, "grad_norm": 0.06029398602433234, "learning_rate": 7.3818931997376446e-06, "loss": 0.0022, "step": 123960 }, { "epoch": 0.815576008368256, "grad_norm": 0.0511865092880556, "learning_rate": 7.3813884025704354e-06, "loss": 0.0014, "step": 123970 }, { "epoch": 0.8156417965434892, "grad_norm": 0.12054342747374158, "learning_rate": 7.380883574006676e-06, "loss": 0.001, "step": 123980 }, { "epoch": 0.8157075847187226, "grad_norm": 0.013610709326267175, "learning_rate": 7.3803787140530244e-06, "loss": 0.0018, "step": 123990 }, { "epoch": 0.815773372893956, "grad_norm": 0.04020045590157819, "learning_rate": 7.379873822716136e-06, "loss": 0.0018, "step": 124000 }, { "epoch": 0.8158391610691894, "grad_norm": 0.10197275010501186, "learning_rate": 7.379368900002666e-06, "loss": 0.0017, "step": 124010 }, { "epoch": 0.8159049492444228, "grad_norm": 0.05249165454621936, "learning_rate": 7.378863945919271e-06, "loss": 0.0012, "step": 124020 }, { "epoch": 0.8159707374196562, "grad_norm": 0.00033963420458959213, "learning_rate": 7.37835896047261e-06, "loss": 0.0014, "step": 124030 }, { "epoch": 0.8160365255948896, "grad_norm": 0.024827957172304776, "learning_rate": 7.377853943669341e-06, "loss": 0.0013, "step": 124040 }, { "epoch": 0.816102313770123, "grad_norm": 0.15625626514849886, "learning_rate": 7.377348895516121e-06, "loss": 0.0015, "step": 124050 }, { "epoch": 0.8161681019453564, "grad_norm": 0.26880525932901356, "learning_rate": 7.376843816019608e-06, "loss": 0.001, "step": 124060 }, { "epoch": 0.8162338901205898, "grad_norm": 0.020275814869431603, "learning_rate": 7.376338705186463e-06, "loss": 0.0012, "step": 124070 }, { "epoch": 0.8162996782958231, "grad_norm": 0.02572649959166669, "learning_rate": 7.375833563023345e-06, "loss": 0.0012, "step": 124080 }, { "epoch": 0.8163654664710565, "grad_norm": 0.016395268837156415, "learning_rate": 7.375328389536912e-06, "loss": 0.0011, "step": 124090 }, { "epoch": 0.8164312546462899, "grad_norm": 0.14701190506021705, "learning_rate": 7.374823184733826e-06, "loss": 0.001, "step": 124100 }, { "epoch": 0.8164970428215232, "grad_norm": 0.021690554078524358, "learning_rate": 7.374317948620747e-06, "loss": 0.0011, "step": 124110 }, { "epoch": 0.8165628309967566, "grad_norm": 0.009241315796446832, "learning_rate": 7.3738126812043376e-06, "loss": 0.0016, "step": 124120 }, { "epoch": 0.81662861917199, "grad_norm": 0.040790532755946636, "learning_rate": 7.373307382491257e-06, "loss": 0.0008, "step": 124130 }, { "epoch": 0.8166944073472234, "grad_norm": 0.0470192241862857, "learning_rate": 7.372802052488169e-06, "loss": 0.0006, "step": 124140 }, { "epoch": 0.8167601955224568, "grad_norm": 0.05470263734679829, "learning_rate": 7.372296691201736e-06, "loss": 0.0023, "step": 124150 }, { "epoch": 0.8168259836976902, "grad_norm": 0.0875777579482852, "learning_rate": 7.371791298638619e-06, "loss": 0.0008, "step": 124160 }, { "epoch": 0.8168917718729236, "grad_norm": 0.05096194182413989, "learning_rate": 7.371285874805482e-06, "loss": 0.0015, "step": 124170 }, { "epoch": 0.8169575600481569, "grad_norm": 0.040733035327695906, "learning_rate": 7.370780419708989e-06, "loss": 0.001, "step": 124180 }, { "epoch": 0.8170233482233903, "grad_norm": 0.15412857884572723, "learning_rate": 7.370274933355803e-06, "loss": 0.0018, "step": 124190 }, { "epoch": 0.8170891363986237, "grad_norm": 0.012246242275308471, "learning_rate": 7.36976941575259e-06, "loss": 0.0007, "step": 124200 }, { "epoch": 0.8171549245738571, "grad_norm": 0.046831170025672525, "learning_rate": 7.3692638669060134e-06, "loss": 0.0009, "step": 124210 }, { "epoch": 0.8172207127490905, "grad_norm": 0.02583627317355692, "learning_rate": 7.368758286822739e-06, "loss": 0.0015, "step": 124220 }, { "epoch": 0.8172865009243239, "grad_norm": 0.10248155676572242, "learning_rate": 7.368252675509433e-06, "loss": 0.0008, "step": 124230 }, { "epoch": 0.8173522890995573, "grad_norm": 0.09738278844387968, "learning_rate": 7.36774703297276e-06, "loss": 0.0031, "step": 124240 }, { "epoch": 0.8174180772747907, "grad_norm": 0.03922306515981235, "learning_rate": 7.3672413592193885e-06, "loss": 0.0015, "step": 124250 }, { "epoch": 0.817483865450024, "grad_norm": 0.07311263990890438, "learning_rate": 7.366735654255983e-06, "loss": 0.0011, "step": 124260 }, { "epoch": 0.8175496536252574, "grad_norm": 0.009411062096053073, "learning_rate": 7.366229918089211e-06, "loss": 0.0052, "step": 124270 }, { "epoch": 0.8176154418004907, "grad_norm": 0.019674401786419125, "learning_rate": 7.365724150725744e-06, "loss": 0.0008, "step": 124280 }, { "epoch": 0.8176812299757241, "grad_norm": 0.017069219616295782, "learning_rate": 7.365218352172246e-06, "loss": 0.0018, "step": 124290 }, { "epoch": 0.8177470181509575, "grad_norm": 0.01723045373999721, "learning_rate": 7.364712522435385e-06, "loss": 0.0009, "step": 124300 }, { "epoch": 0.8178128063261909, "grad_norm": 0.03495062809918709, "learning_rate": 7.364206661521833e-06, "loss": 0.0011, "step": 124310 }, { "epoch": 0.8178785945014243, "grad_norm": 0.11460093167927078, "learning_rate": 7.363700769438259e-06, "loss": 0.004, "step": 124320 }, { "epoch": 0.8179443826766577, "grad_norm": 0.03492534546556275, "learning_rate": 7.3631948461913306e-06, "loss": 0.0026, "step": 124330 }, { "epoch": 0.8180101708518911, "grad_norm": 0.011267004711519032, "learning_rate": 7.362688891787721e-06, "loss": 0.0008, "step": 124340 }, { "epoch": 0.8180759590271245, "grad_norm": 0.07290286458911155, "learning_rate": 7.362182906234096e-06, "loss": 0.0011, "step": 124350 }, { "epoch": 0.8181417472023579, "grad_norm": 0.011337472810963097, "learning_rate": 7.36167688953713e-06, "loss": 0.0009, "step": 124360 }, { "epoch": 0.8182075353775913, "grad_norm": 0.14023926186783162, "learning_rate": 7.361170841703495e-06, "loss": 0.0016, "step": 124370 }, { "epoch": 0.8182733235528246, "grad_norm": 0.06533566666080423, "learning_rate": 7.360664762739862e-06, "loss": 0.0013, "step": 124380 }, { "epoch": 0.818339111728058, "grad_norm": 0.07510018391838282, "learning_rate": 7.360158652652902e-06, "loss": 0.0013, "step": 124390 }, { "epoch": 0.8184048999032913, "grad_norm": 0.027762183445819818, "learning_rate": 7.35965251144929e-06, "loss": 0.0006, "step": 124400 }, { "epoch": 0.8184706880785247, "grad_norm": 0.09950386646282115, "learning_rate": 7.359146339135696e-06, "loss": 0.0013, "step": 124410 }, { "epoch": 0.8185364762537581, "grad_norm": 0.1886875364195584, "learning_rate": 7.358640135718796e-06, "loss": 0.0012, "step": 124420 }, { "epoch": 0.8186022644289915, "grad_norm": 0.18536154476393354, "learning_rate": 7.358133901205262e-06, "loss": 0.0016, "step": 124430 }, { "epoch": 0.8186680526042249, "grad_norm": 0.013500529935325687, "learning_rate": 7.3576276356017695e-06, "loss": 0.0015, "step": 124440 }, { "epoch": 0.8187338407794583, "grad_norm": 0.09735433893809385, "learning_rate": 7.357121338914992e-06, "loss": 0.0013, "step": 124450 }, { "epoch": 0.8187996289546917, "grad_norm": 0.03302581144770722, "learning_rate": 7.3566150111516065e-06, "loss": 0.0018, "step": 124460 }, { "epoch": 0.8188654171299251, "grad_norm": 0.0249047264723881, "learning_rate": 7.356108652318288e-06, "loss": 0.0008, "step": 124470 }, { "epoch": 0.8189312053051585, "grad_norm": 0.007675593511955885, "learning_rate": 7.355602262421711e-06, "loss": 0.0013, "step": 124480 }, { "epoch": 0.8189969934803918, "grad_norm": 0.2991414371532148, "learning_rate": 7.355095841468551e-06, "loss": 0.0022, "step": 124490 }, { "epoch": 0.8190627816556252, "grad_norm": 0.034922076535301154, "learning_rate": 7.354589389465489e-06, "loss": 0.0013, "step": 124500 }, { "epoch": 0.8191285698308586, "grad_norm": 0.1949622002152039, "learning_rate": 7.3540829064191975e-06, "loss": 0.002, "step": 124510 }, { "epoch": 0.819194358006092, "grad_norm": 0.03403811897567194, "learning_rate": 7.353576392336357e-06, "loss": 0.0015, "step": 124520 }, { "epoch": 0.8192601461813254, "grad_norm": 0.06658342484429028, "learning_rate": 7.353069847223644e-06, "loss": 0.0017, "step": 124530 }, { "epoch": 0.8193259343565588, "grad_norm": 0.011417871818820393, "learning_rate": 7.352563271087738e-06, "loss": 0.0008, "step": 124540 }, { "epoch": 0.8193917225317922, "grad_norm": 0.008604498130055573, "learning_rate": 7.352056663935316e-06, "loss": 0.0014, "step": 124550 }, { "epoch": 0.8194575107070255, "grad_norm": 0.027108264743215895, "learning_rate": 7.3515500257730585e-06, "loss": 0.0032, "step": 124560 }, { "epoch": 0.8195232988822589, "grad_norm": 0.03408122552826953, "learning_rate": 7.351043356607646e-06, "loss": 0.0016, "step": 124570 }, { "epoch": 0.8195890870574923, "grad_norm": 0.052836028926923066, "learning_rate": 7.3505366564457555e-06, "loss": 0.0013, "step": 124580 }, { "epoch": 0.8196548752327256, "grad_norm": 0.052633011224504825, "learning_rate": 7.350029925294071e-06, "loss": 0.001, "step": 124590 }, { "epoch": 0.819720663407959, "grad_norm": 0.0002435958250367975, "learning_rate": 7.349523163159271e-06, "loss": 0.0011, "step": 124600 }, { "epoch": 0.8197864515831924, "grad_norm": 0.008409420520716155, "learning_rate": 7.349016370048038e-06, "loss": 0.0007, "step": 124610 }, { "epoch": 0.8198522397584258, "grad_norm": 0.0469978607773065, "learning_rate": 7.348509545967051e-06, "loss": 0.0007, "step": 124620 }, { "epoch": 0.8199180279336592, "grad_norm": 0.0241966930474291, "learning_rate": 7.348002690922995e-06, "loss": 0.001, "step": 124630 }, { "epoch": 0.8199838161088926, "grad_norm": 0.032941951875573014, "learning_rate": 7.347495804922551e-06, "loss": 0.0008, "step": 124640 }, { "epoch": 0.820049604284126, "grad_norm": 0.0030474602160591615, "learning_rate": 7.3469888879724025e-06, "loss": 0.0013, "step": 124650 }, { "epoch": 0.8201153924593594, "grad_norm": 0.005513531406845063, "learning_rate": 7.346481940079233e-06, "loss": 0.0008, "step": 124660 }, { "epoch": 0.8201811806345928, "grad_norm": 0.05482463076883686, "learning_rate": 7.345974961249725e-06, "loss": 0.0013, "step": 124670 }, { "epoch": 0.8202469688098262, "grad_norm": 0.04227372489770805, "learning_rate": 7.345467951490564e-06, "loss": 0.0014, "step": 124680 }, { "epoch": 0.8203127569850595, "grad_norm": 0.016183208884944136, "learning_rate": 7.344960910808432e-06, "loss": 0.0012, "step": 124690 }, { "epoch": 0.8203785451602928, "grad_norm": 0.0024901156003118144, "learning_rate": 7.344453839210017e-06, "loss": 0.0009, "step": 124700 }, { "epoch": 0.8204443333355262, "grad_norm": 0.11356867965938641, "learning_rate": 7.343946736702003e-06, "loss": 0.0024, "step": 124710 }, { "epoch": 0.8205101215107596, "grad_norm": 0.06491811444463975, "learning_rate": 7.343439603291076e-06, "loss": 0.0007, "step": 124720 }, { "epoch": 0.820575909685993, "grad_norm": 0.02094825469969348, "learning_rate": 7.342932438983921e-06, "loss": 0.0011, "step": 124730 }, { "epoch": 0.8206416978612264, "grad_norm": 0.010448708760238335, "learning_rate": 7.342425243787226e-06, "loss": 0.0006, "step": 124740 }, { "epoch": 0.8207074860364598, "grad_norm": 0.07996109356917061, "learning_rate": 7.341918017707675e-06, "loss": 0.0015, "step": 124750 }, { "epoch": 0.8207732742116932, "grad_norm": 0.0768936527742012, "learning_rate": 7.34141076075196e-06, "loss": 0.0016, "step": 124760 }, { "epoch": 0.8208390623869266, "grad_norm": 0.027980295015336883, "learning_rate": 7.340903472926766e-06, "loss": 0.001, "step": 124770 }, { "epoch": 0.82090485056216, "grad_norm": 0.03654411856387503, "learning_rate": 7.3403961542387806e-06, "loss": 0.0011, "step": 124780 }, { "epoch": 0.8209706387373933, "grad_norm": 0.09174509047877454, "learning_rate": 7.3398888046946935e-06, "loss": 0.0026, "step": 124790 }, { "epoch": 0.8210364269126267, "grad_norm": 0.0011532426961746427, "learning_rate": 7.339381424301194e-06, "loss": 0.002, "step": 124800 }, { "epoch": 0.8211022150878601, "grad_norm": 0.034296971902994126, "learning_rate": 7.338874013064971e-06, "loss": 0.0014, "step": 124810 }, { "epoch": 0.8211680032630935, "grad_norm": 0.009795262140708435, "learning_rate": 7.338366570992712e-06, "loss": 0.0013, "step": 124820 }, { "epoch": 0.8212337914383269, "grad_norm": 0.03278687003344145, "learning_rate": 7.337859098091111e-06, "loss": 0.0007, "step": 124830 }, { "epoch": 0.8212995796135603, "grad_norm": 0.035136369203685625, "learning_rate": 7.337351594366857e-06, "loss": 0.0012, "step": 124840 }, { "epoch": 0.8213653677887937, "grad_norm": 0.028748030338475755, "learning_rate": 7.33684405982664e-06, "loss": 0.0011, "step": 124850 }, { "epoch": 0.821431155964027, "grad_norm": 0.033687167457854855, "learning_rate": 7.336336494477153e-06, "loss": 0.001, "step": 124860 }, { "epoch": 0.8214969441392604, "grad_norm": 0.02157545705408428, "learning_rate": 7.335828898325087e-06, "loss": 0.0008, "step": 124870 }, { "epoch": 0.8215627323144938, "grad_norm": 0.08230569165599902, "learning_rate": 7.3353212713771334e-06, "loss": 0.0011, "step": 124880 }, { "epoch": 0.8216285204897271, "grad_norm": 0.07869035762119914, "learning_rate": 7.334813613639987e-06, "loss": 0.0014, "step": 124890 }, { "epoch": 0.8216943086649605, "grad_norm": 0.076391062633265, "learning_rate": 7.334305925120339e-06, "loss": 0.0009, "step": 124900 }, { "epoch": 0.8217600968401939, "grad_norm": 0.038845744627555134, "learning_rate": 7.333798205824885e-06, "loss": 0.0019, "step": 124910 }, { "epoch": 0.8218258850154273, "grad_norm": 0.03222053879579205, "learning_rate": 7.333290455760316e-06, "loss": 0.0008, "step": 124920 }, { "epoch": 0.8218916731906607, "grad_norm": 0.006505374834867635, "learning_rate": 7.332782674933326e-06, "loss": 0.0009, "step": 124930 }, { "epoch": 0.8219574613658941, "grad_norm": 0.028734885547056038, "learning_rate": 7.332274863350613e-06, "loss": 0.0024, "step": 124940 }, { "epoch": 0.8220232495411275, "grad_norm": 0.03338705092860957, "learning_rate": 7.33176702101887e-06, "loss": 0.0018, "step": 124950 }, { "epoch": 0.8220890377163609, "grad_norm": 0.1156984453920841, "learning_rate": 7.331259147944791e-06, "loss": 0.0015, "step": 124960 }, { "epoch": 0.8221548258915943, "grad_norm": 0.023685190005416676, "learning_rate": 7.330751244135076e-06, "loss": 0.0013, "step": 124970 }, { "epoch": 0.8222206140668277, "grad_norm": 0.012125791849926414, "learning_rate": 7.330243309596419e-06, "loss": 0.0013, "step": 124980 }, { "epoch": 0.8222864022420611, "grad_norm": 0.004883358280431778, "learning_rate": 7.3297353443355155e-06, "loss": 0.0014, "step": 124990 }, { "epoch": 0.8223521904172943, "grad_norm": 0.183790349362008, "learning_rate": 7.329227348359063e-06, "loss": 0.0011, "step": 125000 }, { "epoch": 0.8223521904172943, "eval_loss": 0.0008855647756718099, "eval_runtime": 13.0929, "eval_samples_per_second": 15.275, "eval_steps_per_second": 7.638, "step": 125000 }, { "epoch": 0.8224179785925277, "grad_norm": 0.004664681916647502, "learning_rate": 7.3287193216737596e-06, "loss": 0.0015, "step": 125010 }, { "epoch": 0.8224837667677611, "grad_norm": 0.0200841677064191, "learning_rate": 7.328211264286304e-06, "loss": 0.0011, "step": 125020 }, { "epoch": 0.8225495549429945, "grad_norm": 0.0609858318625686, "learning_rate": 7.327703176203392e-06, "loss": 0.0007, "step": 125030 }, { "epoch": 0.8226153431182279, "grad_norm": 0.04856138681826322, "learning_rate": 7.327195057431725e-06, "loss": 0.0011, "step": 125040 }, { "epoch": 0.8226811312934613, "grad_norm": 0.01901013574681311, "learning_rate": 7.326686907978003e-06, "loss": 0.0012, "step": 125050 }, { "epoch": 0.8227469194686947, "grad_norm": 0.02005202931183454, "learning_rate": 7.326178727848919e-06, "loss": 0.0008, "step": 125060 }, { "epoch": 0.8228127076439281, "grad_norm": 0.03052824629944537, "learning_rate": 7.325670517051181e-06, "loss": 0.0022, "step": 125070 }, { "epoch": 0.8228784958191615, "grad_norm": 0.12880904778128296, "learning_rate": 7.325162275591485e-06, "loss": 0.002, "step": 125080 }, { "epoch": 0.8229442839943949, "grad_norm": 0.023473142368454724, "learning_rate": 7.324654003476533e-06, "loss": 0.0014, "step": 125090 }, { "epoch": 0.8230100721696282, "grad_norm": 0.005618180124282961, "learning_rate": 7.324145700713026e-06, "loss": 0.0014, "step": 125100 }, { "epoch": 0.8230758603448616, "grad_norm": 0.012753943553971445, "learning_rate": 7.323637367307663e-06, "loss": 0.0011, "step": 125110 }, { "epoch": 0.823141648520095, "grad_norm": 0.06665914341429077, "learning_rate": 7.323129003267151e-06, "loss": 0.0021, "step": 125120 }, { "epoch": 0.8232074366953284, "grad_norm": 0.07965642470720147, "learning_rate": 7.322620608598187e-06, "loss": 0.0008, "step": 125130 }, { "epoch": 0.8232732248705618, "grad_norm": 0.0008840723814331438, "learning_rate": 7.322112183307477e-06, "loss": 0.0016, "step": 125140 }, { "epoch": 0.8233390130457952, "grad_norm": 0.15802970423823187, "learning_rate": 7.321603727401724e-06, "loss": 0.0019, "step": 125150 }, { "epoch": 0.8234048012210285, "grad_norm": 0.006544438397832193, "learning_rate": 7.32109524088763e-06, "loss": 0.001, "step": 125160 }, { "epoch": 0.8234705893962619, "grad_norm": 0.18365732378594288, "learning_rate": 7.320586723771901e-06, "loss": 0.0016, "step": 125170 }, { "epoch": 0.8235363775714953, "grad_norm": 0.025283182398302132, "learning_rate": 7.3200781760612405e-06, "loss": 0.001, "step": 125180 }, { "epoch": 0.8236021657467287, "grad_norm": 0.023535049428769203, "learning_rate": 7.319569597762351e-06, "loss": 0.0014, "step": 125190 }, { "epoch": 0.823667953921962, "grad_norm": 0.07599137317809515, "learning_rate": 7.319060988881941e-06, "loss": 0.0009, "step": 125200 }, { "epoch": 0.8237337420971954, "grad_norm": 0.058668664837880365, "learning_rate": 7.318552349426714e-06, "loss": 0.0016, "step": 125210 }, { "epoch": 0.8237995302724288, "grad_norm": 0.057219179682045214, "learning_rate": 7.318043679403378e-06, "loss": 0.0018, "step": 125220 }, { "epoch": 0.8238653184476622, "grad_norm": 0.06025292875361759, "learning_rate": 7.3175349788186365e-06, "loss": 0.0012, "step": 125230 }, { "epoch": 0.8239311066228956, "grad_norm": 0.09744653121644833, "learning_rate": 7.3170262476792e-06, "loss": 0.0022, "step": 125240 }, { "epoch": 0.823996894798129, "grad_norm": 0.023785753002589338, "learning_rate": 7.316517485991772e-06, "loss": 0.0015, "step": 125250 }, { "epoch": 0.8240626829733624, "grad_norm": 0.06834665747432053, "learning_rate": 7.3160086937630615e-06, "loss": 0.0007, "step": 125260 }, { "epoch": 0.8241284711485958, "grad_norm": 0.11884505859138758, "learning_rate": 7.315499870999777e-06, "loss": 0.0019, "step": 125270 }, { "epoch": 0.8241942593238292, "grad_norm": 0.005951485469198949, "learning_rate": 7.314991017708626e-06, "loss": 0.0011, "step": 125280 }, { "epoch": 0.8242600474990626, "grad_norm": 0.1034280867835816, "learning_rate": 7.314482133896319e-06, "loss": 0.0013, "step": 125290 }, { "epoch": 0.8243258356742958, "grad_norm": 0.038771137620899676, "learning_rate": 7.313973219569564e-06, "loss": 0.0015, "step": 125300 }, { "epoch": 0.8243916238495292, "grad_norm": 0.02272298483289852, "learning_rate": 7.313464274735069e-06, "loss": 0.0012, "step": 125310 }, { "epoch": 0.8244574120247626, "grad_norm": 0.01182978123308351, "learning_rate": 7.312955299399545e-06, "loss": 0.0009, "step": 125320 }, { "epoch": 0.824523200199996, "grad_norm": 0.08771293696199978, "learning_rate": 7.312446293569705e-06, "loss": 0.0036, "step": 125330 }, { "epoch": 0.8245889883752294, "grad_norm": 0.028005386212646755, "learning_rate": 7.311937257252258e-06, "loss": 0.0008, "step": 125340 }, { "epoch": 0.8246547765504628, "grad_norm": 0.035577232636258, "learning_rate": 7.311428190453914e-06, "loss": 0.0008, "step": 125350 }, { "epoch": 0.8247205647256962, "grad_norm": 0.056665021772433294, "learning_rate": 7.310919093181386e-06, "loss": 0.0016, "step": 125360 }, { "epoch": 0.8247863529009296, "grad_norm": 0.033642682983257656, "learning_rate": 7.3104099654413854e-06, "loss": 0.0007, "step": 125370 }, { "epoch": 0.824852141076163, "grad_norm": 0.010536705545248774, "learning_rate": 7.3099008072406244e-06, "loss": 0.001, "step": 125380 }, { "epoch": 0.8249179292513964, "grad_norm": 0.21468328162328068, "learning_rate": 7.309391618585816e-06, "loss": 0.0021, "step": 125390 }, { "epoch": 0.8249837174266298, "grad_norm": 0.09181747213303543, "learning_rate": 7.308882399483674e-06, "loss": 0.0012, "step": 125400 }, { "epoch": 0.8250495056018631, "grad_norm": 0.03766367527861545, "learning_rate": 7.308373149940913e-06, "loss": 0.0005, "step": 125410 }, { "epoch": 0.8251152937770965, "grad_norm": 0.007757128299583739, "learning_rate": 7.307863869964244e-06, "loss": 0.0008, "step": 125420 }, { "epoch": 0.8251810819523299, "grad_norm": 0.06449218386030328, "learning_rate": 7.307354559560383e-06, "loss": 0.0011, "step": 125430 }, { "epoch": 0.8252468701275633, "grad_norm": 0.09668239810403602, "learning_rate": 7.306845218736046e-06, "loss": 0.001, "step": 125440 }, { "epoch": 0.8253126583027967, "grad_norm": 0.0897303773858843, "learning_rate": 7.306335847497945e-06, "loss": 0.0019, "step": 125450 }, { "epoch": 0.82537844647803, "grad_norm": 0.032063243629259004, "learning_rate": 7.305826445852801e-06, "loss": 0.0005, "step": 125460 }, { "epoch": 0.8254442346532634, "grad_norm": 0.0015791797884448968, "learning_rate": 7.305317013807325e-06, "loss": 0.002, "step": 125470 }, { "epoch": 0.8255100228284968, "grad_norm": 0.11127790983195195, "learning_rate": 7.304807551368235e-06, "loss": 0.0011, "step": 125480 }, { "epoch": 0.8255758110037302, "grad_norm": 0.055592206079325565, "learning_rate": 7.3042980585422485e-06, "loss": 0.0007, "step": 125490 }, { "epoch": 0.8256415991789636, "grad_norm": 0.047105371972173196, "learning_rate": 7.303788535336081e-06, "loss": 0.0009, "step": 125500 }, { "epoch": 0.8257073873541969, "grad_norm": 0.041869656449594096, "learning_rate": 7.303278981756453e-06, "loss": 0.0005, "step": 125510 }, { "epoch": 0.8257731755294303, "grad_norm": 0.06478881512435411, "learning_rate": 7.302769397810079e-06, "loss": 0.0009, "step": 125520 }, { "epoch": 0.8258389637046637, "grad_norm": 0.026311663757067685, "learning_rate": 7.302259783503681e-06, "loss": 0.0008, "step": 125530 }, { "epoch": 0.8259047518798971, "grad_norm": 0.12132612673535027, "learning_rate": 7.301750138843976e-06, "loss": 0.0013, "step": 125540 }, { "epoch": 0.8259705400551305, "grad_norm": 0.059450300333073955, "learning_rate": 7.301240463837682e-06, "loss": 0.0019, "step": 125550 }, { "epoch": 0.8260363282303639, "grad_norm": 0.060326256235159956, "learning_rate": 7.3007307584915224e-06, "loss": 0.0016, "step": 125560 }, { "epoch": 0.8261021164055973, "grad_norm": 0.01459891833975349, "learning_rate": 7.3002210228122125e-06, "loss": 0.0012, "step": 125570 }, { "epoch": 0.8261679045808307, "grad_norm": 0.04587465268351486, "learning_rate": 7.299711256806475e-06, "loss": 0.0007, "step": 125580 }, { "epoch": 0.8262336927560641, "grad_norm": 0.11412795850472311, "learning_rate": 7.299201460481031e-06, "loss": 0.0014, "step": 125590 }, { "epoch": 0.8262994809312975, "grad_norm": 0.10244313091343933, "learning_rate": 7.298691633842603e-06, "loss": 0.0008, "step": 125600 }, { "epoch": 0.8263652691065307, "grad_norm": 0.14261780935323076, "learning_rate": 7.29818177689791e-06, "loss": 0.0022, "step": 125610 }, { "epoch": 0.8264310572817641, "grad_norm": 0.005484497699326148, "learning_rate": 7.297671889653676e-06, "loss": 0.0008, "step": 125620 }, { "epoch": 0.8264968454569975, "grad_norm": 0.030580146867567735, "learning_rate": 7.297161972116622e-06, "loss": 0.0004, "step": 125630 }, { "epoch": 0.8265626336322309, "grad_norm": 0.03820690892852529, "learning_rate": 7.29665202429347e-06, "loss": 0.0009, "step": 125640 }, { "epoch": 0.8266284218074643, "grad_norm": 0.048151372623102984, "learning_rate": 7.296142046190946e-06, "loss": 0.0007, "step": 125650 }, { "epoch": 0.8266942099826977, "grad_norm": 0.006504727196369945, "learning_rate": 7.295632037815772e-06, "loss": 0.0014, "step": 125660 }, { "epoch": 0.8267599981579311, "grad_norm": 0.07602346375316389, "learning_rate": 7.2951219991746714e-06, "loss": 0.0012, "step": 125670 }, { "epoch": 0.8268257863331645, "grad_norm": 0.026422826413728097, "learning_rate": 7.29461193027437e-06, "loss": 0.0015, "step": 125680 }, { "epoch": 0.8268915745083979, "grad_norm": 0.018702840378390574, "learning_rate": 7.294101831121593e-06, "loss": 0.0013, "step": 125690 }, { "epoch": 0.8269573626836313, "grad_norm": 0.11232911742596208, "learning_rate": 7.293591701723064e-06, "loss": 0.0009, "step": 125700 }, { "epoch": 0.8270231508588646, "grad_norm": 0.14421813926805502, "learning_rate": 7.293081542085509e-06, "loss": 0.0015, "step": 125710 }, { "epoch": 0.827088939034098, "grad_norm": 0.048543046758428056, "learning_rate": 7.2925713522156534e-06, "loss": 0.0021, "step": 125720 }, { "epoch": 0.8271547272093314, "grad_norm": 0.05211071724491841, "learning_rate": 7.292061132120226e-06, "loss": 0.0012, "step": 125730 }, { "epoch": 0.8272205153845648, "grad_norm": 0.044383040833536856, "learning_rate": 7.291550881805951e-06, "loss": 0.0018, "step": 125740 }, { "epoch": 0.8272863035597982, "grad_norm": 0.002242489725814615, "learning_rate": 7.291040601279558e-06, "loss": 0.0016, "step": 125750 }, { "epoch": 0.8273520917350315, "grad_norm": 0.016589607124495278, "learning_rate": 7.290530290547772e-06, "loss": 0.0022, "step": 125760 }, { "epoch": 0.8274178799102649, "grad_norm": 0.016002780557031884, "learning_rate": 7.290019949617324e-06, "loss": 0.0014, "step": 125770 }, { "epoch": 0.8274836680854983, "grad_norm": 0.01192416040334697, "learning_rate": 7.28950957849494e-06, "loss": 0.0013, "step": 125780 }, { "epoch": 0.8275494562607317, "grad_norm": 0.017516069142098675, "learning_rate": 7.288999177187349e-06, "loss": 0.0012, "step": 125790 }, { "epoch": 0.8276152444359651, "grad_norm": 0.17440905224218858, "learning_rate": 7.288488745701281e-06, "loss": 0.0013, "step": 125800 }, { "epoch": 0.8276810326111984, "grad_norm": 0.0026828243228541413, "learning_rate": 7.2879782840434654e-06, "loss": 0.0013, "step": 125810 }, { "epoch": 0.8277468207864318, "grad_norm": 0.05057418133774914, "learning_rate": 7.287467792220633e-06, "loss": 0.001, "step": 125820 }, { "epoch": 0.8278126089616652, "grad_norm": 0.030654924145350895, "learning_rate": 7.286957270239512e-06, "loss": 0.0007, "step": 125830 }, { "epoch": 0.8278783971368986, "grad_norm": 0.2517387478075604, "learning_rate": 7.286446718106834e-06, "loss": 0.0008, "step": 125840 }, { "epoch": 0.827944185312132, "grad_norm": 0.014381394749181896, "learning_rate": 7.285936135829332e-06, "loss": 0.0018, "step": 125850 }, { "epoch": 0.8280099734873654, "grad_norm": 0.010803427706593692, "learning_rate": 7.285425523413738e-06, "loss": 0.001, "step": 125860 }, { "epoch": 0.8280757616625988, "grad_norm": 0.04645157559430055, "learning_rate": 7.28491488086678e-06, "loss": 0.0007, "step": 125870 }, { "epoch": 0.8281415498378322, "grad_norm": 0.07044742599246495, "learning_rate": 7.284404208195193e-06, "loss": 0.0009, "step": 125880 }, { "epoch": 0.8282073380130656, "grad_norm": 0.01038029740046218, "learning_rate": 7.2838935054057105e-06, "loss": 0.0011, "step": 125890 }, { "epoch": 0.828273126188299, "grad_norm": 0.010354592517360947, "learning_rate": 7.283382772505063e-06, "loss": 0.0043, "step": 125900 }, { "epoch": 0.8283389143635324, "grad_norm": 0.039266831466510006, "learning_rate": 7.282872009499986e-06, "loss": 0.0012, "step": 125910 }, { "epoch": 0.8284047025387656, "grad_norm": 0.06417478057333093, "learning_rate": 7.282361216397213e-06, "loss": 0.0017, "step": 125920 }, { "epoch": 0.828470490713999, "grad_norm": 0.1283569903076453, "learning_rate": 7.2818503932034815e-06, "loss": 0.0016, "step": 125930 }, { "epoch": 0.8285362788892324, "grad_norm": 0.15906177304967178, "learning_rate": 7.28133953992552e-06, "loss": 0.0023, "step": 125940 }, { "epoch": 0.8286020670644658, "grad_norm": 0.03531690981928371, "learning_rate": 7.280828656570069e-06, "loss": 0.0012, "step": 125950 }, { "epoch": 0.8286678552396992, "grad_norm": 0.03270915954734233, "learning_rate": 7.28031774314386e-06, "loss": 0.0021, "step": 125960 }, { "epoch": 0.8287336434149326, "grad_norm": 0.09649807227695927, "learning_rate": 7.2798067996536316e-06, "loss": 0.0011, "step": 125970 }, { "epoch": 0.828799431590166, "grad_norm": 0.10277463048094879, "learning_rate": 7.279295826106121e-06, "loss": 0.0012, "step": 125980 }, { "epoch": 0.8288652197653994, "grad_norm": 0.026368106591511824, "learning_rate": 7.278784822508061e-06, "loss": 0.0022, "step": 125990 }, { "epoch": 0.8289310079406328, "grad_norm": 0.05873476235455423, "learning_rate": 7.278273788866193e-06, "loss": 0.0017, "step": 126000 }, { "epoch": 0.8289967961158662, "grad_norm": 0.05887047498519989, "learning_rate": 7.277762725187253e-06, "loss": 0.0022, "step": 126010 }, { "epoch": 0.8290625842910995, "grad_norm": 0.14416562819201245, "learning_rate": 7.2772516314779775e-06, "loss": 0.0022, "step": 126020 }, { "epoch": 0.8291283724663329, "grad_norm": 0.04654022635551116, "learning_rate": 7.276740507745106e-06, "loss": 0.0016, "step": 126030 }, { "epoch": 0.8291941606415663, "grad_norm": 0.12426757899264124, "learning_rate": 7.276229353995378e-06, "loss": 0.0008, "step": 126040 }, { "epoch": 0.8292599488167997, "grad_norm": 0.01807221254819285, "learning_rate": 7.27571817023553e-06, "loss": 0.0009, "step": 126050 }, { "epoch": 0.829325736992033, "grad_norm": 0.1862117203069186, "learning_rate": 7.275206956472306e-06, "loss": 0.0011, "step": 126060 }, { "epoch": 0.8293915251672664, "grad_norm": 0.03692773045814557, "learning_rate": 7.2746957127124425e-06, "loss": 0.0017, "step": 126070 }, { "epoch": 0.8294573133424998, "grad_norm": 0.015483022469465736, "learning_rate": 7.27418443896268e-06, "loss": 0.0007, "step": 126080 }, { "epoch": 0.8295231015177332, "grad_norm": 0.037317396956116355, "learning_rate": 7.273673135229759e-06, "loss": 0.0057, "step": 126090 }, { "epoch": 0.8295888896929666, "grad_norm": 0.0036671967751716744, "learning_rate": 7.273161801520423e-06, "loss": 0.0022, "step": 126100 }, { "epoch": 0.8296546778682, "grad_norm": 0.09744239935420118, "learning_rate": 7.272650437841411e-06, "loss": 0.0018, "step": 126110 }, { "epoch": 0.8297204660434333, "grad_norm": 0.09052264488385425, "learning_rate": 7.272139044199467e-06, "loss": 0.002, "step": 126120 }, { "epoch": 0.8297862542186667, "grad_norm": 0.04062887082253203, "learning_rate": 7.2716276206013305e-06, "loss": 0.0007, "step": 126130 }, { "epoch": 0.8298520423939001, "grad_norm": 0.08522601758138933, "learning_rate": 7.271116167053746e-06, "loss": 0.0021, "step": 126140 }, { "epoch": 0.8299178305691335, "grad_norm": 0.053671992451281875, "learning_rate": 7.270604683563456e-06, "loss": 0.0008, "step": 126150 }, { "epoch": 0.8299836187443669, "grad_norm": 0.05070572792772074, "learning_rate": 7.2700931701372054e-06, "loss": 0.0007, "step": 126160 }, { "epoch": 0.8300494069196003, "grad_norm": 0.09290145591339515, "learning_rate": 7.269581626781737e-06, "loss": 0.0011, "step": 126170 }, { "epoch": 0.8301151950948337, "grad_norm": 0.06668947481548253, "learning_rate": 7.269070053503794e-06, "loss": 0.0008, "step": 126180 }, { "epoch": 0.8301809832700671, "grad_norm": 0.07100827245226324, "learning_rate": 7.2685584503101225e-06, "loss": 0.0013, "step": 126190 }, { "epoch": 0.8302467714453005, "grad_norm": 0.03788856113258715, "learning_rate": 7.268046817207467e-06, "loss": 0.0007, "step": 126200 }, { "epoch": 0.8303125596205339, "grad_norm": 0.040296109281046294, "learning_rate": 7.267535154202574e-06, "loss": 0.0024, "step": 126210 }, { "epoch": 0.8303783477957671, "grad_norm": 0.04914221861693428, "learning_rate": 7.267023461302186e-06, "loss": 0.0015, "step": 126220 }, { "epoch": 0.8304441359710005, "grad_norm": 0.04513958231181573, "learning_rate": 7.266511738513054e-06, "loss": 0.0016, "step": 126230 }, { "epoch": 0.8305099241462339, "grad_norm": 0.13112187419853358, "learning_rate": 7.265999985841921e-06, "loss": 0.0017, "step": 126240 }, { "epoch": 0.8305757123214673, "grad_norm": 0.020068571243187103, "learning_rate": 7.2654882032955355e-06, "loss": 0.0009, "step": 126250 }, { "epoch": 0.8306415004967007, "grad_norm": 0.031836325806840024, "learning_rate": 7.264976390880645e-06, "loss": 0.0006, "step": 126260 }, { "epoch": 0.8307072886719341, "grad_norm": 0.10875449140636165, "learning_rate": 7.264464548603996e-06, "loss": 0.0009, "step": 126270 }, { "epoch": 0.8307730768471675, "grad_norm": 0.052334414397012516, "learning_rate": 7.263952676472338e-06, "loss": 0.0006, "step": 126280 }, { "epoch": 0.8308388650224009, "grad_norm": 0.03493931036716775, "learning_rate": 7.263440774492419e-06, "loss": 0.0013, "step": 126290 }, { "epoch": 0.8309046531976343, "grad_norm": 0.08978988650735303, "learning_rate": 7.262928842670989e-06, "loss": 0.002, "step": 126300 }, { "epoch": 0.8309704413728677, "grad_norm": 0.034471685597432265, "learning_rate": 7.262416881014797e-06, "loss": 0.0009, "step": 126310 }, { "epoch": 0.8310362295481011, "grad_norm": 0.016284705802121203, "learning_rate": 7.261904889530591e-06, "loss": 0.0008, "step": 126320 }, { "epoch": 0.8311020177233344, "grad_norm": 0.014301608332004632, "learning_rate": 7.261392868225123e-06, "loss": 0.0014, "step": 126330 }, { "epoch": 0.8311678058985678, "grad_norm": 0.008219179605434626, "learning_rate": 7.2608808171051435e-06, "loss": 0.0049, "step": 126340 }, { "epoch": 0.8312335940738012, "grad_norm": 0.050478878244843484, "learning_rate": 7.260368736177403e-06, "loss": 0.0006, "step": 126350 }, { "epoch": 0.8312993822490345, "grad_norm": 0.040874018029739057, "learning_rate": 7.2598566254486524e-06, "loss": 0.001, "step": 126360 }, { "epoch": 0.8313651704242679, "grad_norm": 0.04467090713159496, "learning_rate": 7.2593444849256445e-06, "loss": 0.0015, "step": 126370 }, { "epoch": 0.8314309585995013, "grad_norm": 0.05784343697168438, "learning_rate": 7.2588323146151305e-06, "loss": 0.001, "step": 126380 }, { "epoch": 0.8314967467747347, "grad_norm": 0.04401334555429794, "learning_rate": 7.258320114523863e-06, "loss": 0.0012, "step": 126390 }, { "epoch": 0.8315625349499681, "grad_norm": 0.016547357742064193, "learning_rate": 7.2578078846585965e-06, "loss": 0.0015, "step": 126400 }, { "epoch": 0.8316283231252015, "grad_norm": 0.09008768933572202, "learning_rate": 7.257295625026081e-06, "loss": 0.0011, "step": 126410 }, { "epoch": 0.8316941113004349, "grad_norm": 0.08114567995919642, "learning_rate": 7.256783335633073e-06, "loss": 0.0008, "step": 126420 }, { "epoch": 0.8317598994756682, "grad_norm": 0.020423204087834133, "learning_rate": 7.256271016486327e-06, "loss": 0.0034, "step": 126430 }, { "epoch": 0.8318256876509016, "grad_norm": 0.04005702870013945, "learning_rate": 7.255758667592595e-06, "loss": 0.001, "step": 126440 }, { "epoch": 0.831891475826135, "grad_norm": 0.05583377445789716, "learning_rate": 7.2552462889586336e-06, "loss": 0.0016, "step": 126450 }, { "epoch": 0.8319572640013684, "grad_norm": 0.014391142032064909, "learning_rate": 7.254733880591198e-06, "loss": 0.0019, "step": 126460 }, { "epoch": 0.8320230521766018, "grad_norm": 0.0048134171182336195, "learning_rate": 7.254221442497044e-06, "loss": 0.0035, "step": 126470 }, { "epoch": 0.8320888403518352, "grad_norm": 0.05897101357800487, "learning_rate": 7.253708974682925e-06, "loss": 0.0005, "step": 126480 }, { "epoch": 0.8321546285270686, "grad_norm": 0.015698439578722197, "learning_rate": 7.253196477155601e-06, "loss": 0.0015, "step": 126490 }, { "epoch": 0.832220416702302, "grad_norm": 0.028044703693898197, "learning_rate": 7.252683949921827e-06, "loss": 0.0008, "step": 126500 }, { "epoch": 0.8322862048775354, "grad_norm": 0.49416508288412964, "learning_rate": 7.252171392988361e-06, "loss": 0.0026, "step": 126510 }, { "epoch": 0.8323519930527687, "grad_norm": 0.04792008706550454, "learning_rate": 7.251658806361961e-06, "loss": 0.0015, "step": 126520 }, { "epoch": 0.832417781228002, "grad_norm": 0.025744227824443442, "learning_rate": 7.251146190049383e-06, "loss": 0.0005, "step": 126530 }, { "epoch": 0.8324835694032354, "grad_norm": 0.0189636425909344, "learning_rate": 7.2506335440573875e-06, "loss": 0.0009, "step": 126540 }, { "epoch": 0.8325493575784688, "grad_norm": 0.01510471656757981, "learning_rate": 7.250120868392731e-06, "loss": 0.0009, "step": 126550 }, { "epoch": 0.8326151457537022, "grad_norm": 0.02482263197792215, "learning_rate": 7.2496081630621765e-06, "loss": 0.0008, "step": 126560 }, { "epoch": 0.8326809339289356, "grad_norm": 0.038746103038807655, "learning_rate": 7.24909542807248e-06, "loss": 0.0034, "step": 126570 }, { "epoch": 0.832746722104169, "grad_norm": 0.017127174976210833, "learning_rate": 7.2485826634304035e-06, "loss": 0.0019, "step": 126580 }, { "epoch": 0.8328125102794024, "grad_norm": 0.04610471413845103, "learning_rate": 7.248069869142706e-06, "loss": 0.0011, "step": 126590 }, { "epoch": 0.8328782984546358, "grad_norm": 0.06763169350178881, "learning_rate": 7.247557045216149e-06, "loss": 0.0019, "step": 126600 }, { "epoch": 0.8329440866298692, "grad_norm": 0.010138620169705678, "learning_rate": 7.247044191657493e-06, "loss": 0.0011, "step": 126610 }, { "epoch": 0.8330098748051026, "grad_norm": 0.027138907320759423, "learning_rate": 7.2465313084735e-06, "loss": 0.001, "step": 126620 }, { "epoch": 0.8330756629803359, "grad_norm": 0.04590784947070671, "learning_rate": 7.246018395670933e-06, "loss": 0.0008, "step": 126630 }, { "epoch": 0.8331414511555693, "grad_norm": 0.076055678112824, "learning_rate": 7.2455054532565516e-06, "loss": 0.001, "step": 126640 }, { "epoch": 0.8332072393308027, "grad_norm": 0.010634985310736199, "learning_rate": 7.244992481237122e-06, "loss": 0.0017, "step": 126650 }, { "epoch": 0.833273027506036, "grad_norm": 0.08569337407519716, "learning_rate": 7.244479479619404e-06, "loss": 0.0022, "step": 126660 }, { "epoch": 0.8333388156812694, "grad_norm": 0.0673030229023636, "learning_rate": 7.243966448410164e-06, "loss": 0.0009, "step": 126670 }, { "epoch": 0.8334046038565028, "grad_norm": 0.03278518820690521, "learning_rate": 7.243453387616162e-06, "loss": 0.0012, "step": 126680 }, { "epoch": 0.8334703920317362, "grad_norm": 0.02838322328314302, "learning_rate": 7.242940297244166e-06, "loss": 0.0008, "step": 126690 }, { "epoch": 0.8335361802069696, "grad_norm": 0.053954751039478915, "learning_rate": 7.24242717730094e-06, "loss": 0.0012, "step": 126700 }, { "epoch": 0.833601968382203, "grad_norm": 0.03513913613252009, "learning_rate": 7.2419140277932474e-06, "loss": 0.0009, "step": 126710 }, { "epoch": 0.8336677565574364, "grad_norm": 0.12933798667714919, "learning_rate": 7.241400848727855e-06, "loss": 0.0011, "step": 126720 }, { "epoch": 0.8337335447326697, "grad_norm": 0.004107716853697332, "learning_rate": 7.240887640111529e-06, "loss": 0.0005, "step": 126730 }, { "epoch": 0.8337993329079031, "grad_norm": 0.01667366627876451, "learning_rate": 7.2403744019510325e-06, "loss": 0.0008, "step": 126740 }, { "epoch": 0.8338651210831365, "grad_norm": 0.04704813277373462, "learning_rate": 7.239861134253136e-06, "loss": 0.0015, "step": 126750 }, { "epoch": 0.8339309092583699, "grad_norm": 0.015380012746503963, "learning_rate": 7.239347837024605e-06, "loss": 0.0016, "step": 126760 }, { "epoch": 0.8339966974336033, "grad_norm": 0.0005266009623152542, "learning_rate": 7.238834510272207e-06, "loss": 0.0006, "step": 126770 }, { "epoch": 0.8340624856088367, "grad_norm": 0.018522809657484107, "learning_rate": 7.23832115400271e-06, "loss": 0.0008, "step": 126780 }, { "epoch": 0.8341282737840701, "grad_norm": 0.24141223839534873, "learning_rate": 7.2378077682228796e-06, "loss": 0.0019, "step": 126790 }, { "epoch": 0.8341940619593035, "grad_norm": 0.07485719671960274, "learning_rate": 7.237294352939488e-06, "loss": 0.0014, "step": 126800 }, { "epoch": 0.8342598501345369, "grad_norm": 0.41020729139734835, "learning_rate": 7.236780908159302e-06, "loss": 0.0029, "step": 126810 }, { "epoch": 0.8343256383097702, "grad_norm": 0.07035473672406407, "learning_rate": 7.236267433889093e-06, "loss": 0.0015, "step": 126820 }, { "epoch": 0.8343914264850036, "grad_norm": 0.044918815866985436, "learning_rate": 7.2357539301356275e-06, "loss": 0.0017, "step": 126830 }, { "epoch": 0.8344572146602369, "grad_norm": 0.055421945404244034, "learning_rate": 7.235240396905678e-06, "loss": 0.0011, "step": 126840 }, { "epoch": 0.8345230028354703, "grad_norm": 0.0368094570032865, "learning_rate": 7.234726834206014e-06, "loss": 0.0011, "step": 126850 }, { "epoch": 0.8345887910107037, "grad_norm": 0.1399754699947038, "learning_rate": 7.2342132420434076e-06, "loss": 0.0019, "step": 126860 }, { "epoch": 0.8346545791859371, "grad_norm": 0.16095239503700515, "learning_rate": 7.2336996204246275e-06, "loss": 0.0019, "step": 126870 }, { "epoch": 0.8347203673611705, "grad_norm": 0.07146652532252072, "learning_rate": 7.23318596935645e-06, "loss": 0.0011, "step": 126880 }, { "epoch": 0.8347861555364039, "grad_norm": 0.0615408635352947, "learning_rate": 7.232672288845643e-06, "loss": 0.001, "step": 126890 }, { "epoch": 0.8348519437116373, "grad_norm": 0.037740540270814346, "learning_rate": 7.232158578898979e-06, "loss": 0.0023, "step": 126900 }, { "epoch": 0.8349177318868707, "grad_norm": 0.11760736855757954, "learning_rate": 7.231644839523233e-06, "loss": 0.0019, "step": 126910 }, { "epoch": 0.8349835200621041, "grad_norm": 0.012999978138222143, "learning_rate": 7.231131070725176e-06, "loss": 0.001, "step": 126920 }, { "epoch": 0.8350493082373375, "grad_norm": 0.09774392219489651, "learning_rate": 7.230617272511583e-06, "loss": 0.0022, "step": 126930 }, { "epoch": 0.8351150964125708, "grad_norm": 0.055310503530340754, "learning_rate": 7.2301034448892286e-06, "loss": 0.0014, "step": 126940 }, { "epoch": 0.8351808845878042, "grad_norm": 0.0011134608358428658, "learning_rate": 7.229589587864886e-06, "loss": 0.0011, "step": 126950 }, { "epoch": 0.8352466727630375, "grad_norm": 0.09765991582393194, "learning_rate": 7.229075701445331e-06, "loss": 0.0013, "step": 126960 }, { "epoch": 0.8353124609382709, "grad_norm": 0.08121188282771818, "learning_rate": 7.228561785637337e-06, "loss": 0.0013, "step": 126970 }, { "epoch": 0.8353782491135043, "grad_norm": 0.02490942208583605, "learning_rate": 7.228047840447681e-06, "loss": 0.0012, "step": 126980 }, { "epoch": 0.8354440372887377, "grad_norm": 0.016332584150006345, "learning_rate": 7.227533865883137e-06, "loss": 0.0007, "step": 126990 }, { "epoch": 0.8355098254639711, "grad_norm": 0.022526356085254256, "learning_rate": 7.227019861950484e-06, "loss": 0.0014, "step": 127000 }, { "epoch": 0.8355756136392045, "grad_norm": 0.0713043751490617, "learning_rate": 7.2265058286564985e-06, "loss": 0.002, "step": 127010 }, { "epoch": 0.8356414018144379, "grad_norm": 0.02789985297155041, "learning_rate": 7.225991766007956e-06, "loss": 0.0011, "step": 127020 }, { "epoch": 0.8357071899896713, "grad_norm": 0.0657533730038996, "learning_rate": 7.225477674011634e-06, "loss": 0.0011, "step": 127030 }, { "epoch": 0.8357729781649046, "grad_norm": 0.03585901906134064, "learning_rate": 7.224963552674311e-06, "loss": 0.0018, "step": 127040 }, { "epoch": 0.835838766340138, "grad_norm": 0.009584566940366369, "learning_rate": 7.224449402002765e-06, "loss": 0.0015, "step": 127050 }, { "epoch": 0.8359045545153714, "grad_norm": 0.0660730589537076, "learning_rate": 7.223935222003775e-06, "loss": 0.0008, "step": 127060 }, { "epoch": 0.8359703426906048, "grad_norm": 0.04344779285001621, "learning_rate": 7.2234210126841194e-06, "loss": 0.0012, "step": 127070 }, { "epoch": 0.8360361308658382, "grad_norm": 0.035273188073597195, "learning_rate": 7.222906774050579e-06, "loss": 0.0018, "step": 127080 }, { "epoch": 0.8361019190410716, "grad_norm": 0.040523472227423606, "learning_rate": 7.222392506109932e-06, "loss": 0.001, "step": 127090 }, { "epoch": 0.836167707216305, "grad_norm": 0.00534442804550321, "learning_rate": 7.22187820886896e-06, "loss": 0.0008, "step": 127100 }, { "epoch": 0.8362334953915384, "grad_norm": 0.03287527320256187, "learning_rate": 7.2213638823344426e-06, "loss": 0.0014, "step": 127110 }, { "epoch": 0.8362992835667717, "grad_norm": 0.025740395889058982, "learning_rate": 7.2208495265131595e-06, "loss": 0.0011, "step": 127120 }, { "epoch": 0.8363650717420051, "grad_norm": 0.001333362244729946, "learning_rate": 7.220335141411894e-06, "loss": 0.0007, "step": 127130 }, { "epoch": 0.8364308599172384, "grad_norm": 0.037595896184006265, "learning_rate": 7.219820727037428e-06, "loss": 0.0029, "step": 127140 }, { "epoch": 0.8364966480924718, "grad_norm": 0.006079728991771803, "learning_rate": 7.219306283396542e-06, "loss": 0.0021, "step": 127150 }, { "epoch": 0.8365624362677052, "grad_norm": 0.20768030242065585, "learning_rate": 7.218791810496021e-06, "loss": 0.0021, "step": 127160 }, { "epoch": 0.8366282244429386, "grad_norm": 0.02977684424732908, "learning_rate": 7.2182773083426446e-06, "loss": 0.0009, "step": 127170 }, { "epoch": 0.836694012618172, "grad_norm": 0.0039596779817651796, "learning_rate": 7.217762776943198e-06, "loss": 0.0014, "step": 127180 }, { "epoch": 0.8367598007934054, "grad_norm": 0.020147785789898266, "learning_rate": 7.217248216304464e-06, "loss": 0.002, "step": 127190 }, { "epoch": 0.8368255889686388, "grad_norm": 0.06295546257323376, "learning_rate": 7.216733626433229e-06, "loss": 0.0012, "step": 127200 }, { "epoch": 0.8368913771438722, "grad_norm": 0.021976565722369792, "learning_rate": 7.216219007336275e-06, "loss": 0.0008, "step": 127210 }, { "epoch": 0.8369571653191056, "grad_norm": 0.03825464991559374, "learning_rate": 7.215704359020387e-06, "loss": 0.002, "step": 127220 }, { "epoch": 0.837022953494339, "grad_norm": 0.0751290014239241, "learning_rate": 7.215189681492351e-06, "loss": 0.0016, "step": 127230 }, { "epoch": 0.8370887416695723, "grad_norm": 0.0481152759636594, "learning_rate": 7.2146749747589515e-06, "loss": 0.0017, "step": 127240 }, { "epoch": 0.8371545298448057, "grad_norm": 0.043835101629059306, "learning_rate": 7.214160238826975e-06, "loss": 0.0008, "step": 127250 }, { "epoch": 0.837220318020039, "grad_norm": 0.004632175147791619, "learning_rate": 7.213645473703209e-06, "loss": 0.0015, "step": 127260 }, { "epoch": 0.8372861061952724, "grad_norm": 0.007062231160819445, "learning_rate": 7.213130679394439e-06, "loss": 0.0015, "step": 127270 }, { "epoch": 0.8373518943705058, "grad_norm": 0.02723980583287795, "learning_rate": 7.2126158559074524e-06, "loss": 0.0007, "step": 127280 }, { "epoch": 0.8374176825457392, "grad_norm": 0.21225423734596774, "learning_rate": 7.2121010032490365e-06, "loss": 0.0023, "step": 127290 }, { "epoch": 0.8374834707209726, "grad_norm": 0.0475763327936632, "learning_rate": 7.211586121425979e-06, "loss": 0.0012, "step": 127300 }, { "epoch": 0.837549258896206, "grad_norm": 0.026849405076141846, "learning_rate": 7.211071210445068e-06, "loss": 0.0014, "step": 127310 }, { "epoch": 0.8376150470714394, "grad_norm": 0.02095644511812087, "learning_rate": 7.210556270313095e-06, "loss": 0.001, "step": 127320 }, { "epoch": 0.8376808352466728, "grad_norm": 0.013653641845846434, "learning_rate": 7.210041301036845e-06, "loss": 0.0014, "step": 127330 }, { "epoch": 0.8377466234219062, "grad_norm": 0.0537833959911595, "learning_rate": 7.20952630262311e-06, "loss": 0.0016, "step": 127340 }, { "epoch": 0.8378124115971395, "grad_norm": 0.03134743668248966, "learning_rate": 7.209011275078679e-06, "loss": 0.0008, "step": 127350 }, { "epoch": 0.8378781997723729, "grad_norm": 0.03228698013977275, "learning_rate": 7.208496218410341e-06, "loss": 0.0011, "step": 127360 }, { "epoch": 0.8379439879476063, "grad_norm": 0.008764599262928824, "learning_rate": 7.207981132624888e-06, "loss": 0.0013, "step": 127370 }, { "epoch": 0.8380097761228397, "grad_norm": 0.11055582241947277, "learning_rate": 7.2074660177291115e-06, "loss": 0.0018, "step": 127380 }, { "epoch": 0.8380755642980731, "grad_norm": 0.005568803705651317, "learning_rate": 7.206950873729801e-06, "loss": 0.0005, "step": 127390 }, { "epoch": 0.8381413524733065, "grad_norm": 0.05452472937549365, "learning_rate": 7.2064357006337505e-06, "loss": 0.0012, "step": 127400 }, { "epoch": 0.8382071406485399, "grad_norm": 0.1285187080518066, "learning_rate": 7.2059204984477504e-06, "loss": 0.0013, "step": 127410 }, { "epoch": 0.8382729288237732, "grad_norm": 0.023488047034307905, "learning_rate": 7.205405267178594e-06, "loss": 0.0008, "step": 127420 }, { "epoch": 0.8383387169990066, "grad_norm": 0.039157945955972336, "learning_rate": 7.204890006833074e-06, "loss": 0.0016, "step": 127430 }, { "epoch": 0.83840450517424, "grad_norm": 0.054101683040056955, "learning_rate": 7.204374717417982e-06, "loss": 0.0009, "step": 127440 }, { "epoch": 0.8384702933494733, "grad_norm": 0.06691910280096146, "learning_rate": 7.203859398940114e-06, "loss": 0.0024, "step": 127450 }, { "epoch": 0.8385360815247067, "grad_norm": 0.030717229664022107, "learning_rate": 7.203344051406263e-06, "loss": 0.0005, "step": 127460 }, { "epoch": 0.8386018696999401, "grad_norm": 0.020473484023991156, "learning_rate": 7.202828674823225e-06, "loss": 0.0015, "step": 127470 }, { "epoch": 0.8386676578751735, "grad_norm": 0.014862410538428588, "learning_rate": 7.202313269197793e-06, "loss": 0.0005, "step": 127480 }, { "epoch": 0.8387334460504069, "grad_norm": 0.04393788590004088, "learning_rate": 7.201797834536761e-06, "loss": 0.0043, "step": 127490 }, { "epoch": 0.8387992342256403, "grad_norm": 0.05104042616930888, "learning_rate": 7.201282370846928e-06, "loss": 0.0005, "step": 127500 }, { "epoch": 0.8388650224008737, "grad_norm": 0.0003986100406788203, "learning_rate": 7.200766878135086e-06, "loss": 0.0008, "step": 127510 }, { "epoch": 0.8389308105761071, "grad_norm": 0.12638809214393176, "learning_rate": 7.200251356408036e-06, "loss": 0.002, "step": 127520 }, { "epoch": 0.8389965987513405, "grad_norm": 0.025673155040744904, "learning_rate": 7.19973580567257e-06, "loss": 0.0012, "step": 127530 }, { "epoch": 0.8390623869265739, "grad_norm": 0.033640865989430065, "learning_rate": 7.199220225935488e-06, "loss": 0.001, "step": 127540 }, { "epoch": 0.8391281751018071, "grad_norm": 0.055986087400413005, "learning_rate": 7.198704617203586e-06, "loss": 0.0016, "step": 127550 }, { "epoch": 0.8391939632770405, "grad_norm": 0.06682346643164987, "learning_rate": 7.198188979483664e-06, "loss": 0.0008, "step": 127560 }, { "epoch": 0.8392597514522739, "grad_norm": 0.07049761985994249, "learning_rate": 7.197673312782518e-06, "loss": 0.002, "step": 127570 }, { "epoch": 0.8393255396275073, "grad_norm": 0.029628346156044257, "learning_rate": 7.197157617106947e-06, "loss": 0.0006, "step": 127580 }, { "epoch": 0.8393913278027407, "grad_norm": 0.0495264968722095, "learning_rate": 7.196641892463751e-06, "loss": 0.0014, "step": 127590 }, { "epoch": 0.8394571159779741, "grad_norm": 0.020090465689805853, "learning_rate": 7.196126138859729e-06, "loss": 0.0014, "step": 127600 }, { "epoch": 0.8395229041532075, "grad_norm": 0.019175303998552325, "learning_rate": 7.195610356301681e-06, "loss": 0.001, "step": 127610 }, { "epoch": 0.8395886923284409, "grad_norm": 0.02993622803627593, "learning_rate": 7.195094544796406e-06, "loss": 0.0016, "step": 127620 }, { "epoch": 0.8396544805036743, "grad_norm": 0.35943282894427836, "learning_rate": 7.194578704350705e-06, "loss": 0.001, "step": 127630 }, { "epoch": 0.8397202686789077, "grad_norm": 0.02056765105523733, "learning_rate": 7.194062834971378e-06, "loss": 0.0019, "step": 127640 }, { "epoch": 0.839786056854141, "grad_norm": 0.017010314137788515, "learning_rate": 7.19354693666523e-06, "loss": 0.0005, "step": 127650 }, { "epoch": 0.8398518450293744, "grad_norm": 0.02463542853705139, "learning_rate": 7.193031009439058e-06, "loss": 0.0012, "step": 127660 }, { "epoch": 0.8399176332046078, "grad_norm": 0.013710758515718359, "learning_rate": 7.192515053299668e-06, "loss": 0.0012, "step": 127670 }, { "epoch": 0.8399834213798412, "grad_norm": 0.04372452966473406, "learning_rate": 7.191999068253859e-06, "loss": 0.0014, "step": 127680 }, { "epoch": 0.8400492095550746, "grad_norm": 0.16695849328036935, "learning_rate": 7.191483054308437e-06, "loss": 0.0017, "step": 127690 }, { "epoch": 0.840114997730308, "grad_norm": 0.05401163474126999, "learning_rate": 7.190967011470202e-06, "loss": 0.001, "step": 127700 }, { "epoch": 0.8401807859055413, "grad_norm": 0.08088324152790714, "learning_rate": 7.190450939745959e-06, "loss": 0.0017, "step": 127710 }, { "epoch": 0.8402465740807747, "grad_norm": 0.0252884237331338, "learning_rate": 7.1899348391425135e-06, "loss": 0.0014, "step": 127720 }, { "epoch": 0.8403123622560081, "grad_norm": 0.04632621389200346, "learning_rate": 7.189418709666668e-06, "loss": 0.0007, "step": 127730 }, { "epoch": 0.8403781504312415, "grad_norm": 0.019565276446060844, "learning_rate": 7.188902551325227e-06, "loss": 0.0011, "step": 127740 }, { "epoch": 0.8404439386064749, "grad_norm": 0.01709531012756815, "learning_rate": 7.188386364124998e-06, "loss": 0.001, "step": 127750 }, { "epoch": 0.8405097267817082, "grad_norm": 0.002134693506733729, "learning_rate": 7.187870148072784e-06, "loss": 0.0008, "step": 127760 }, { "epoch": 0.8405755149569416, "grad_norm": 0.023723039530326586, "learning_rate": 7.187353903175391e-06, "loss": 0.0025, "step": 127770 }, { "epoch": 0.840641303132175, "grad_norm": 0.05176412646689755, "learning_rate": 7.186837629439627e-06, "loss": 0.0019, "step": 127780 }, { "epoch": 0.8407070913074084, "grad_norm": 0.0028508389916208137, "learning_rate": 7.186321326872296e-06, "loss": 0.0023, "step": 127790 }, { "epoch": 0.8407728794826418, "grad_norm": 0.04727659940463894, "learning_rate": 7.185804995480208e-06, "loss": 0.001, "step": 127800 }, { "epoch": 0.8408386676578752, "grad_norm": 0.036420548210596315, "learning_rate": 7.185288635270169e-06, "loss": 0.0007, "step": 127810 }, { "epoch": 0.8409044558331086, "grad_norm": 0.04392011419905869, "learning_rate": 7.184772246248986e-06, "loss": 0.0006, "step": 127820 }, { "epoch": 0.840970244008342, "grad_norm": 0.15924890121681673, "learning_rate": 7.184255828423468e-06, "loss": 0.0022, "step": 127830 }, { "epoch": 0.8410360321835754, "grad_norm": 0.03325526811913283, "learning_rate": 7.183739381800423e-06, "loss": 0.0009, "step": 127840 }, { "epoch": 0.8411018203588088, "grad_norm": 0.04497612078894063, "learning_rate": 7.183222906386661e-06, "loss": 0.0007, "step": 127850 }, { "epoch": 0.841167608534042, "grad_norm": 0.004243563238841624, "learning_rate": 7.18270640218899e-06, "loss": 0.0017, "step": 127860 }, { "epoch": 0.8412333967092754, "grad_norm": 0.003542745188905984, "learning_rate": 7.1821898692142214e-06, "loss": 0.0005, "step": 127870 }, { "epoch": 0.8412991848845088, "grad_norm": 0.061631557805279465, "learning_rate": 7.1816733074691614e-06, "loss": 0.0017, "step": 127880 }, { "epoch": 0.8413649730597422, "grad_norm": 0.020566016399941, "learning_rate": 7.181156716960624e-06, "loss": 0.0012, "step": 127890 }, { "epoch": 0.8414307612349756, "grad_norm": 0.01312574683371375, "learning_rate": 7.18064009769542e-06, "loss": 0.002, "step": 127900 }, { "epoch": 0.841496549410209, "grad_norm": 0.03691156242684742, "learning_rate": 7.18012344968036e-06, "loss": 0.0008, "step": 127910 }, { "epoch": 0.8415623375854424, "grad_norm": 0.01714721058349739, "learning_rate": 7.179606772922254e-06, "loss": 0.0007, "step": 127920 }, { "epoch": 0.8416281257606758, "grad_norm": 0.04950357579280253, "learning_rate": 7.179090067427916e-06, "loss": 0.0015, "step": 127930 }, { "epoch": 0.8416939139359092, "grad_norm": 0.020407998552211364, "learning_rate": 7.178573333204158e-06, "loss": 0.0007, "step": 127940 }, { "epoch": 0.8417597021111426, "grad_norm": 0.009203100818475963, "learning_rate": 7.17805657025779e-06, "loss": 0.0037, "step": 127950 }, { "epoch": 0.8418254902863759, "grad_norm": 0.015892582662792514, "learning_rate": 7.177539778595628e-06, "loss": 0.0009, "step": 127960 }, { "epoch": 0.8418912784616093, "grad_norm": 0.0263119814198643, "learning_rate": 7.1770229582244855e-06, "loss": 0.0016, "step": 127970 }, { "epoch": 0.8419570666368427, "grad_norm": 0.04254877113661952, "learning_rate": 7.176506109151176e-06, "loss": 0.0009, "step": 127980 }, { "epoch": 0.8420228548120761, "grad_norm": 0.03716087969054859, "learning_rate": 7.175989231382512e-06, "loss": 0.0007, "step": 127990 }, { "epoch": 0.8420886429873095, "grad_norm": 0.15783225983972896, "learning_rate": 7.175472324925309e-06, "loss": 0.0013, "step": 128000 }, { "epoch": 0.8421544311625428, "grad_norm": 0.00988281709241792, "learning_rate": 7.174955389786382e-06, "loss": 0.0004, "step": 128010 }, { "epoch": 0.8422202193377762, "grad_norm": 0.015135995371463653, "learning_rate": 7.174438425972547e-06, "loss": 0.0012, "step": 128020 }, { "epoch": 0.8422860075130096, "grad_norm": 0.03321816965105564, "learning_rate": 7.17392143349062e-06, "loss": 0.0009, "step": 128030 }, { "epoch": 0.842351795688243, "grad_norm": 0.08252160505511584, "learning_rate": 7.173404412347417e-06, "loss": 0.0012, "step": 128040 }, { "epoch": 0.8424175838634764, "grad_norm": 0.018817330919505573, "learning_rate": 7.172887362549753e-06, "loss": 0.0012, "step": 128050 }, { "epoch": 0.8424833720387097, "grad_norm": 0.06813353095000083, "learning_rate": 7.172370284104446e-06, "loss": 0.0012, "step": 128060 }, { "epoch": 0.8425491602139431, "grad_norm": 0.06047745357802308, "learning_rate": 7.1718531770183135e-06, "loss": 0.001, "step": 128070 }, { "epoch": 0.8426149483891765, "grad_norm": 0.08295116111717925, "learning_rate": 7.17133604129817e-06, "loss": 0.001, "step": 128080 }, { "epoch": 0.8426807365644099, "grad_norm": 0.031047387063688706, "learning_rate": 7.170818876950838e-06, "loss": 0.0014, "step": 128090 }, { "epoch": 0.8427465247396433, "grad_norm": 0.01844869225203994, "learning_rate": 7.170301683983134e-06, "loss": 0.0006, "step": 128100 }, { "epoch": 0.8428123129148767, "grad_norm": 0.015997738586416815, "learning_rate": 7.169784462401877e-06, "loss": 0.0011, "step": 128110 }, { "epoch": 0.8428781010901101, "grad_norm": 0.13328542506427732, "learning_rate": 7.169267212213885e-06, "loss": 0.0015, "step": 128120 }, { "epoch": 0.8429438892653435, "grad_norm": 0.054792567587105226, "learning_rate": 7.168749933425979e-06, "loss": 0.0005, "step": 128130 }, { "epoch": 0.8430096774405769, "grad_norm": 0.10275614466868116, "learning_rate": 7.168232626044977e-06, "loss": 0.0013, "step": 128140 }, { "epoch": 0.8430754656158103, "grad_norm": 0.1398289121424552, "learning_rate": 7.167715290077701e-06, "loss": 0.0011, "step": 128150 }, { "epoch": 0.8431412537910435, "grad_norm": 0.04041691820065605, "learning_rate": 7.167197925530972e-06, "loss": 0.0015, "step": 128160 }, { "epoch": 0.8432070419662769, "grad_norm": 0.01962246367041101, "learning_rate": 7.166680532411609e-06, "loss": 0.0007, "step": 128170 }, { "epoch": 0.8432728301415103, "grad_norm": 0.07200971525039451, "learning_rate": 7.166163110726436e-06, "loss": 0.0009, "step": 128180 }, { "epoch": 0.8433386183167437, "grad_norm": 0.04435530153742981, "learning_rate": 7.165645660482272e-06, "loss": 0.0009, "step": 128190 }, { "epoch": 0.8434044064919771, "grad_norm": 0.07303410983321772, "learning_rate": 7.165128181685941e-06, "loss": 0.001, "step": 128200 }, { "epoch": 0.8434701946672105, "grad_norm": 0.027202848346561916, "learning_rate": 7.164610674344263e-06, "loss": 0.0004, "step": 128210 }, { "epoch": 0.8435359828424439, "grad_norm": 0.191734545068901, "learning_rate": 7.164093138464064e-06, "loss": 0.0016, "step": 128220 }, { "epoch": 0.8436017710176773, "grad_norm": 0.06169098553633502, "learning_rate": 7.163575574052166e-06, "loss": 0.0011, "step": 128230 }, { "epoch": 0.8436675591929107, "grad_norm": 0.07844983224445393, "learning_rate": 7.163057981115393e-06, "loss": 0.0019, "step": 128240 }, { "epoch": 0.8437333473681441, "grad_norm": 0.10519382254650489, "learning_rate": 7.162540359660569e-06, "loss": 0.0021, "step": 128250 }, { "epoch": 0.8437991355433775, "grad_norm": 0.11124558054008753, "learning_rate": 7.162022709694517e-06, "loss": 0.0016, "step": 128260 }, { "epoch": 0.8438649237186108, "grad_norm": 0.004083450231927765, "learning_rate": 7.161505031224063e-06, "loss": 0.0007, "step": 128270 }, { "epoch": 0.8439307118938442, "grad_norm": 0.008493301536630917, "learning_rate": 7.1609873242560325e-06, "loss": 0.0005, "step": 128280 }, { "epoch": 0.8439965000690776, "grad_norm": 0.052270959532938276, "learning_rate": 7.16046958879725e-06, "loss": 0.001, "step": 128290 }, { "epoch": 0.844062288244311, "grad_norm": 0.08799619736896522, "learning_rate": 7.159951824854542e-06, "loss": 0.0013, "step": 128300 }, { "epoch": 0.8441280764195443, "grad_norm": 0.14674626417024866, "learning_rate": 7.159434032434735e-06, "loss": 0.0018, "step": 128310 }, { "epoch": 0.8441938645947777, "grad_norm": 0.013395366533463788, "learning_rate": 7.1589162115446555e-06, "loss": 0.0014, "step": 128320 }, { "epoch": 0.8442596527700111, "grad_norm": 0.03893916775132759, "learning_rate": 7.158398362191129e-06, "loss": 0.0013, "step": 128330 }, { "epoch": 0.8443254409452445, "grad_norm": 0.10388109270454789, "learning_rate": 7.157880484380985e-06, "loss": 0.0022, "step": 128340 }, { "epoch": 0.8443912291204779, "grad_norm": 0.12822618919533751, "learning_rate": 7.15736257812105e-06, "loss": 0.0011, "step": 128350 }, { "epoch": 0.8444570172957113, "grad_norm": 0.05864454457664747, "learning_rate": 7.156844643418152e-06, "loss": 0.0009, "step": 128360 }, { "epoch": 0.8445228054709446, "grad_norm": 0.034019160706158326, "learning_rate": 7.156326680279123e-06, "loss": 0.0015, "step": 128370 }, { "epoch": 0.844588593646178, "grad_norm": 0.04150210639797458, "learning_rate": 7.155808688710787e-06, "loss": 0.0017, "step": 128380 }, { "epoch": 0.8446543818214114, "grad_norm": 0.05267644473629111, "learning_rate": 7.155290668719977e-06, "loss": 0.0011, "step": 128390 }, { "epoch": 0.8447201699966448, "grad_norm": 0.05810961651833481, "learning_rate": 7.154772620313517e-06, "loss": 0.002, "step": 128400 }, { "epoch": 0.8447859581718782, "grad_norm": 0.018852321797919618, "learning_rate": 7.1542545434982445e-06, "loss": 0.0012, "step": 128410 }, { "epoch": 0.8448517463471116, "grad_norm": 0.02761705838638914, "learning_rate": 7.153736438280985e-06, "loss": 0.001, "step": 128420 }, { "epoch": 0.844917534522345, "grad_norm": 0.0006071597854370548, "learning_rate": 7.153218304668572e-06, "loss": 0.0027, "step": 128430 }, { "epoch": 0.8449833226975784, "grad_norm": 0.05966732434601274, "learning_rate": 7.152700142667835e-06, "loss": 0.0009, "step": 128440 }, { "epoch": 0.8450491108728118, "grad_norm": 0.08997250137030643, "learning_rate": 7.152181952285604e-06, "loss": 0.001, "step": 128450 }, { "epoch": 0.8451148990480452, "grad_norm": 0.06120695090711646, "learning_rate": 7.151663733528714e-06, "loss": 0.0015, "step": 128460 }, { "epoch": 0.8451806872232784, "grad_norm": 0.0073518568282468924, "learning_rate": 7.151145486403995e-06, "loss": 0.0007, "step": 128470 }, { "epoch": 0.8452464753985118, "grad_norm": 0.024220619985003144, "learning_rate": 7.150627210918282e-06, "loss": 0.0011, "step": 128480 }, { "epoch": 0.8453122635737452, "grad_norm": 0.02314929732460828, "learning_rate": 7.150108907078405e-06, "loss": 0.0012, "step": 128490 }, { "epoch": 0.8453780517489786, "grad_norm": 0.05628678328403007, "learning_rate": 7.149590574891202e-06, "loss": 0.0012, "step": 128500 }, { "epoch": 0.845443839924212, "grad_norm": 0.1226304747828272, "learning_rate": 7.149072214363502e-06, "loss": 0.0014, "step": 128510 }, { "epoch": 0.8455096280994454, "grad_norm": 0.0006065345020874099, "learning_rate": 7.14855382550214e-06, "loss": 0.0011, "step": 128520 }, { "epoch": 0.8455754162746788, "grad_norm": 0.011184601780713437, "learning_rate": 7.148035408313952e-06, "loss": 0.0007, "step": 128530 }, { "epoch": 0.8456412044499122, "grad_norm": 0.051556156575777064, "learning_rate": 7.147516962805772e-06, "loss": 0.0009, "step": 128540 }, { "epoch": 0.8457069926251456, "grad_norm": 0.009049994095256735, "learning_rate": 7.146998488984437e-06, "loss": 0.0012, "step": 128550 }, { "epoch": 0.845772780800379, "grad_norm": 0.025159421571968325, "learning_rate": 7.14647998685678e-06, "loss": 0.0011, "step": 128560 }, { "epoch": 0.8458385689756123, "grad_norm": 0.5301292373408998, "learning_rate": 7.145961456429639e-06, "loss": 0.0023, "step": 128570 }, { "epoch": 0.8459043571508457, "grad_norm": 0.04866533646549373, "learning_rate": 7.145442897709849e-06, "loss": 0.0012, "step": 128580 }, { "epoch": 0.8459701453260791, "grad_norm": 0.01959532970078137, "learning_rate": 7.144924310704248e-06, "loss": 0.0017, "step": 128590 }, { "epoch": 0.8460359335013125, "grad_norm": 0.023633578520394632, "learning_rate": 7.144405695419671e-06, "loss": 0.0023, "step": 128600 }, { "epoch": 0.8461017216765458, "grad_norm": 0.14933225373047024, "learning_rate": 7.143887051862958e-06, "loss": 0.0012, "step": 128610 }, { "epoch": 0.8461675098517792, "grad_norm": 0.04926877840946562, "learning_rate": 7.143368380040945e-06, "loss": 0.0007, "step": 128620 }, { "epoch": 0.8462332980270126, "grad_norm": 0.11525478412902483, "learning_rate": 7.142849679960472e-06, "loss": 0.0012, "step": 128630 }, { "epoch": 0.846299086202246, "grad_norm": 0.039791210543799416, "learning_rate": 7.142330951628377e-06, "loss": 0.0009, "step": 128640 }, { "epoch": 0.8463648743774794, "grad_norm": 0.04822428382078908, "learning_rate": 7.141812195051497e-06, "loss": 0.0015, "step": 128650 }, { "epoch": 0.8464306625527128, "grad_norm": 0.00527041926165344, "learning_rate": 7.141293410236674e-06, "loss": 0.0007, "step": 128660 }, { "epoch": 0.8464964507279461, "grad_norm": 0.03282284890686918, "learning_rate": 7.140774597190746e-06, "loss": 0.002, "step": 128670 }, { "epoch": 0.8465622389031795, "grad_norm": 0.013060150045075446, "learning_rate": 7.140255755920555e-06, "loss": 0.0011, "step": 128680 }, { "epoch": 0.8466280270784129, "grad_norm": 0.0298964586595534, "learning_rate": 7.1397368864329395e-06, "loss": 0.0012, "step": 128690 }, { "epoch": 0.8466938152536463, "grad_norm": 0.24313296510035343, "learning_rate": 7.139217988734742e-06, "loss": 0.0015, "step": 128700 }, { "epoch": 0.8467596034288797, "grad_norm": 0.01652087261701624, "learning_rate": 7.138699062832802e-06, "loss": 0.0034, "step": 128710 }, { "epoch": 0.8468253916041131, "grad_norm": 0.036902017982747365, "learning_rate": 7.138180108733963e-06, "loss": 0.001, "step": 128720 }, { "epoch": 0.8468911797793465, "grad_norm": 0.17238110143516935, "learning_rate": 7.137661126445063e-06, "loss": 0.0013, "step": 128730 }, { "epoch": 0.8469569679545799, "grad_norm": 0.05866354881772074, "learning_rate": 7.1371421159729506e-06, "loss": 0.0011, "step": 128740 }, { "epoch": 0.8470227561298133, "grad_norm": 0.02513237672596413, "learning_rate": 7.136623077324464e-06, "loss": 0.0015, "step": 128750 }, { "epoch": 0.8470885443050467, "grad_norm": 0.10147003091102755, "learning_rate": 7.136104010506448e-06, "loss": 0.001, "step": 128760 }, { "epoch": 0.84715433248028, "grad_norm": 0.01252194970356034, "learning_rate": 7.135584915525746e-06, "loss": 0.003, "step": 128770 }, { "epoch": 0.8472201206555133, "grad_norm": 0.07498744047656544, "learning_rate": 7.1350657923892e-06, "loss": 0.0008, "step": 128780 }, { "epoch": 0.8472859088307467, "grad_norm": 0.04403393607377973, "learning_rate": 7.134546641103655e-06, "loss": 0.0012, "step": 128790 }, { "epoch": 0.8473516970059801, "grad_norm": 0.01521794979418363, "learning_rate": 7.134027461675956e-06, "loss": 0.0012, "step": 128800 }, { "epoch": 0.8474174851812135, "grad_norm": 0.1973680896598156, "learning_rate": 7.133508254112949e-06, "loss": 0.0044, "step": 128810 }, { "epoch": 0.8474832733564469, "grad_norm": 0.0800205065163092, "learning_rate": 7.132989018421479e-06, "loss": 0.0012, "step": 128820 }, { "epoch": 0.8475490615316803, "grad_norm": 0.0034048873667353852, "learning_rate": 7.13246975460839e-06, "loss": 0.0009, "step": 128830 }, { "epoch": 0.8476148497069137, "grad_norm": 0.00023894266579811346, "learning_rate": 7.131950462680529e-06, "loss": 0.001, "step": 128840 }, { "epoch": 0.8476806378821471, "grad_norm": 0.08588076902271347, "learning_rate": 7.131431142644742e-06, "loss": 0.0012, "step": 128850 }, { "epoch": 0.8477464260573805, "grad_norm": 0.22815565123033069, "learning_rate": 7.130911794507877e-06, "loss": 0.0007, "step": 128860 }, { "epoch": 0.8478122142326139, "grad_norm": 0.11768328461994904, "learning_rate": 7.130392418276779e-06, "loss": 0.0018, "step": 128870 }, { "epoch": 0.8478780024078472, "grad_norm": 0.008682282727903666, "learning_rate": 7.129873013958299e-06, "loss": 0.0015, "step": 128880 }, { "epoch": 0.8479437905830806, "grad_norm": 0.028207054317638265, "learning_rate": 7.129353581559281e-06, "loss": 0.0009, "step": 128890 }, { "epoch": 0.848009578758314, "grad_norm": 0.12924940029855303, "learning_rate": 7.128834121086577e-06, "loss": 0.0021, "step": 128900 }, { "epoch": 0.8480753669335473, "grad_norm": 0.06484254848678446, "learning_rate": 7.1283146325470305e-06, "loss": 0.0012, "step": 128910 }, { "epoch": 0.8481411551087807, "grad_norm": 0.004920873456739674, "learning_rate": 7.127795115947495e-06, "loss": 0.0005, "step": 128920 }, { "epoch": 0.8482069432840141, "grad_norm": 0.0502900499084958, "learning_rate": 7.127275571294821e-06, "loss": 0.0008, "step": 128930 }, { "epoch": 0.8482727314592475, "grad_norm": 0.05224740098810826, "learning_rate": 7.126755998595854e-06, "loss": 0.0012, "step": 128940 }, { "epoch": 0.8483385196344809, "grad_norm": 0.031110176008954684, "learning_rate": 7.126236397857446e-06, "loss": 0.0015, "step": 128950 }, { "epoch": 0.8484043078097143, "grad_norm": 0.024434024520076737, "learning_rate": 7.125716769086447e-06, "loss": 0.0005, "step": 128960 }, { "epoch": 0.8484700959849477, "grad_norm": 0.07338574624947328, "learning_rate": 7.125197112289709e-06, "loss": 0.0013, "step": 128970 }, { "epoch": 0.848535884160181, "grad_norm": 0.01470450144487789, "learning_rate": 7.12467742747408e-06, "loss": 0.0013, "step": 128980 }, { "epoch": 0.8486016723354144, "grad_norm": 0.132980022235079, "learning_rate": 7.124157714646418e-06, "loss": 0.0031, "step": 128990 }, { "epoch": 0.8486674605106478, "grad_norm": 0.21239229591530154, "learning_rate": 7.123637973813569e-06, "loss": 0.0016, "step": 129000 }, { "epoch": 0.8487332486858812, "grad_norm": 0.16003269085867897, "learning_rate": 7.123118204982388e-06, "loss": 0.0014, "step": 129010 }, { "epoch": 0.8487990368611146, "grad_norm": 0.01588544805921743, "learning_rate": 7.122598408159726e-06, "loss": 0.0014, "step": 129020 }, { "epoch": 0.848864825036348, "grad_norm": 0.010970640385607914, "learning_rate": 7.1220785833524384e-06, "loss": 0.0016, "step": 129030 }, { "epoch": 0.8489306132115814, "grad_norm": 0.06699134714517645, "learning_rate": 7.121558730567377e-06, "loss": 0.001, "step": 129040 }, { "epoch": 0.8489964013868148, "grad_norm": 0.03374451410567828, "learning_rate": 7.121038849811397e-06, "loss": 0.0017, "step": 129050 }, { "epoch": 0.8490621895620482, "grad_norm": 0.010716532751618376, "learning_rate": 7.12051894109135e-06, "loss": 0.0007, "step": 129060 }, { "epoch": 0.8491279777372815, "grad_norm": 0.03792146713593564, "learning_rate": 7.119999004414093e-06, "loss": 0.0011, "step": 129070 }, { "epoch": 0.8491937659125148, "grad_norm": 0.09756277282683723, "learning_rate": 7.119479039786481e-06, "loss": 0.0014, "step": 129080 }, { "epoch": 0.8492595540877482, "grad_norm": 0.0767691273840166, "learning_rate": 7.118959047215367e-06, "loss": 0.0012, "step": 129090 }, { "epoch": 0.8493253422629816, "grad_norm": 0.017647192193374718, "learning_rate": 7.118439026707609e-06, "loss": 0.0008, "step": 129100 }, { "epoch": 0.849391130438215, "grad_norm": 0.038247192585654385, "learning_rate": 7.11791897827006e-06, "loss": 0.0019, "step": 129110 }, { "epoch": 0.8494569186134484, "grad_norm": 0.0633631343394751, "learning_rate": 7.11739890190958e-06, "loss": 0.0012, "step": 129120 }, { "epoch": 0.8495227067886818, "grad_norm": 0.17027586196642383, "learning_rate": 7.116878797633025e-06, "loss": 0.0013, "step": 129130 }, { "epoch": 0.8495884949639152, "grad_norm": 0.0330911603629297, "learning_rate": 7.116358665447249e-06, "loss": 0.0015, "step": 129140 }, { "epoch": 0.8496542831391486, "grad_norm": 0.042472610186338106, "learning_rate": 7.115838505359114e-06, "loss": 0.0018, "step": 129150 }, { "epoch": 0.849720071314382, "grad_norm": 0.0049337178237774705, "learning_rate": 7.115318317375475e-06, "loss": 0.0007, "step": 129160 }, { "epoch": 0.8497858594896154, "grad_norm": 0.07317088838611474, "learning_rate": 7.11479810150319e-06, "loss": 0.0018, "step": 129170 }, { "epoch": 0.8498516476648488, "grad_norm": 0.04047861841835797, "learning_rate": 7.114277857749119e-06, "loss": 0.0018, "step": 129180 }, { "epoch": 0.8499174358400821, "grad_norm": 0.01850454669548987, "learning_rate": 7.11375758612012e-06, "loss": 0.0017, "step": 129190 }, { "epoch": 0.8499832240153155, "grad_norm": 0.017756305705541712, "learning_rate": 7.1132372866230544e-06, "loss": 0.0013, "step": 129200 }, { "epoch": 0.8500490121905488, "grad_norm": 0.033384576419938874, "learning_rate": 7.112716959264779e-06, "loss": 0.0006, "step": 129210 }, { "epoch": 0.8501148003657822, "grad_norm": 0.006372337848398979, "learning_rate": 7.112196604052156e-06, "loss": 0.0013, "step": 129220 }, { "epoch": 0.8501805885410156, "grad_norm": 0.04113173869002757, "learning_rate": 7.111676220992044e-06, "loss": 0.0012, "step": 129230 }, { "epoch": 0.850246376716249, "grad_norm": 0.009739148929692486, "learning_rate": 7.1111558100913044e-06, "loss": 0.0017, "step": 129240 }, { "epoch": 0.8503121648914824, "grad_norm": 0.011737090088120817, "learning_rate": 7.1106353713568e-06, "loss": 0.0007, "step": 129250 }, { "epoch": 0.8503779530667158, "grad_norm": 0.053486293909753936, "learning_rate": 7.110114904795391e-06, "loss": 0.0006, "step": 129260 }, { "epoch": 0.8504437412419492, "grad_norm": 0.062494014130932654, "learning_rate": 7.109594410413939e-06, "loss": 0.0008, "step": 129270 }, { "epoch": 0.8505095294171826, "grad_norm": 0.04233475354955541, "learning_rate": 7.1090738882193066e-06, "loss": 0.0019, "step": 129280 }, { "epoch": 0.8505753175924159, "grad_norm": 0.00731144891218705, "learning_rate": 7.108553338218358e-06, "loss": 0.0008, "step": 129290 }, { "epoch": 0.8506411057676493, "grad_norm": 0.02761348655378921, "learning_rate": 7.108032760417952e-06, "loss": 0.0014, "step": 129300 }, { "epoch": 0.8507068939428827, "grad_norm": 0.025073158426967515, "learning_rate": 7.107512154824956e-06, "loss": 0.0015, "step": 129310 }, { "epoch": 0.8507726821181161, "grad_norm": 0.0007335112947389946, "learning_rate": 7.106991521446233e-06, "loss": 0.0011, "step": 129320 }, { "epoch": 0.8508384702933495, "grad_norm": 0.03677404367814131, "learning_rate": 7.106470860288645e-06, "loss": 0.0008, "step": 129330 }, { "epoch": 0.8509042584685829, "grad_norm": 0.03885451207134475, "learning_rate": 7.10595017135906e-06, "loss": 0.0015, "step": 129340 }, { "epoch": 0.8509700466438163, "grad_norm": 0.025890969050052243, "learning_rate": 7.10542945466434e-06, "loss": 0.0016, "step": 129350 }, { "epoch": 0.8510358348190497, "grad_norm": 0.059278784048261354, "learning_rate": 7.104908710211351e-06, "loss": 0.0009, "step": 129360 }, { "epoch": 0.851101622994283, "grad_norm": 0.04899253383321028, "learning_rate": 7.104387938006957e-06, "loss": 0.0012, "step": 129370 }, { "epoch": 0.8511674111695164, "grad_norm": 0.02471293438300841, "learning_rate": 7.103867138058027e-06, "loss": 0.0016, "step": 129380 }, { "epoch": 0.8512331993447497, "grad_norm": 0.03295071888834453, "learning_rate": 7.103346310371426e-06, "loss": 0.0007, "step": 129390 }, { "epoch": 0.8512989875199831, "grad_norm": 0.07052752256650564, "learning_rate": 7.10282545495402e-06, "loss": 0.0021, "step": 129400 }, { "epoch": 0.8513647756952165, "grad_norm": 0.0938841007632115, "learning_rate": 7.102304571812677e-06, "loss": 0.0008, "step": 129410 }, { "epoch": 0.8514305638704499, "grad_norm": 0.05978558831815475, "learning_rate": 7.101783660954263e-06, "loss": 0.0006, "step": 129420 }, { "epoch": 0.8514963520456833, "grad_norm": 0.03440137094552293, "learning_rate": 7.101262722385647e-06, "loss": 0.0012, "step": 129430 }, { "epoch": 0.8515621402209167, "grad_norm": 0.031075031844554863, "learning_rate": 7.100741756113696e-06, "loss": 0.0019, "step": 129440 }, { "epoch": 0.8516279283961501, "grad_norm": 0.03335446643335372, "learning_rate": 7.100220762145281e-06, "loss": 0.0022, "step": 129450 }, { "epoch": 0.8516937165713835, "grad_norm": 0.03103001435397472, "learning_rate": 7.099699740487268e-06, "loss": 0.0012, "step": 129460 }, { "epoch": 0.8517595047466169, "grad_norm": 0.04584619567023294, "learning_rate": 7.099178691146528e-06, "loss": 0.0016, "step": 129470 }, { "epoch": 0.8518252929218503, "grad_norm": 0.04816593956924758, "learning_rate": 7.09865761412993e-06, "loss": 0.0007, "step": 129480 }, { "epoch": 0.8518910810970836, "grad_norm": 0.12052573505872094, "learning_rate": 7.098136509444343e-06, "loss": 0.0008, "step": 129490 }, { "epoch": 0.851956869272317, "grad_norm": 0.09821654080678827, "learning_rate": 7.097615377096639e-06, "loss": 0.0012, "step": 129500 }, { "epoch": 0.8520226574475503, "grad_norm": 0.0754157988003785, "learning_rate": 7.097094217093688e-06, "loss": 0.0011, "step": 129510 }, { "epoch": 0.8520884456227837, "grad_norm": 0.02112741380256157, "learning_rate": 7.09657302944236e-06, "loss": 0.0011, "step": 129520 }, { "epoch": 0.8521542337980171, "grad_norm": 0.05042076833933038, "learning_rate": 7.096051814149527e-06, "loss": 0.0017, "step": 129530 }, { "epoch": 0.8522200219732505, "grad_norm": 0.019076448446378326, "learning_rate": 7.0955305712220625e-06, "loss": 0.0009, "step": 129540 }, { "epoch": 0.8522858101484839, "grad_norm": 0.001614039543631321, "learning_rate": 7.095009300666836e-06, "loss": 0.0015, "step": 129550 }, { "epoch": 0.8523515983237173, "grad_norm": 0.013758914299181857, "learning_rate": 7.094488002490722e-06, "loss": 0.0015, "step": 129560 }, { "epoch": 0.8524173864989507, "grad_norm": 0.02991195779489813, "learning_rate": 7.093966676700592e-06, "loss": 0.0007, "step": 129570 }, { "epoch": 0.8524831746741841, "grad_norm": 0.06753672723434125, "learning_rate": 7.093445323303321e-06, "loss": 0.0017, "step": 129580 }, { "epoch": 0.8525489628494174, "grad_norm": 0.09479719281117188, "learning_rate": 7.092923942305781e-06, "loss": 0.0029, "step": 129590 }, { "epoch": 0.8526147510246508, "grad_norm": 0.03435530550439197, "learning_rate": 7.092402533714845e-06, "loss": 0.0009, "step": 129600 }, { "epoch": 0.8526805391998842, "grad_norm": 0.015940933609194404, "learning_rate": 7.09188109753739e-06, "loss": 0.0012, "step": 129610 }, { "epoch": 0.8527463273751176, "grad_norm": 0.15448404034935626, "learning_rate": 7.091359633780289e-06, "loss": 0.0014, "step": 129620 }, { "epoch": 0.852812115550351, "grad_norm": 0.08678946122093775, "learning_rate": 7.090838142450417e-06, "loss": 0.0014, "step": 129630 }, { "epoch": 0.8528779037255844, "grad_norm": 0.022388254167157538, "learning_rate": 7.09031662355465e-06, "loss": 0.0006, "step": 129640 }, { "epoch": 0.8529436919008178, "grad_norm": 0.03007654613655034, "learning_rate": 7.0897950770998644e-06, "loss": 0.0017, "step": 129650 }, { "epoch": 0.8530094800760512, "grad_norm": 0.053632500079842216, "learning_rate": 7.089273503092936e-06, "loss": 0.0015, "step": 129660 }, { "epoch": 0.8530752682512845, "grad_norm": 0.025786914859692527, "learning_rate": 7.0887519015407404e-06, "loss": 0.0021, "step": 129670 }, { "epoch": 0.8531410564265179, "grad_norm": 0.045159451027424226, "learning_rate": 7.088230272450155e-06, "loss": 0.0016, "step": 129680 }, { "epoch": 0.8532068446017513, "grad_norm": 0.04591724463260198, "learning_rate": 7.087708615828055e-06, "loss": 0.0015, "step": 129690 }, { "epoch": 0.8532726327769846, "grad_norm": 0.0018059276294874764, "learning_rate": 7.087186931681323e-06, "loss": 0.0012, "step": 129700 }, { "epoch": 0.853338420952218, "grad_norm": 0.034505957345442484, "learning_rate": 7.086665220016832e-06, "loss": 0.0014, "step": 129710 }, { "epoch": 0.8534042091274514, "grad_norm": 0.014443107905658118, "learning_rate": 7.0861434808414645e-06, "loss": 0.0016, "step": 129720 }, { "epoch": 0.8534699973026848, "grad_norm": 0.22610327730359192, "learning_rate": 7.085621714162094e-06, "loss": 0.0021, "step": 129730 }, { "epoch": 0.8535357854779182, "grad_norm": 0.05242255416103026, "learning_rate": 7.085099919985603e-06, "loss": 0.0008, "step": 129740 }, { "epoch": 0.8536015736531516, "grad_norm": 0.04765388323329357, "learning_rate": 7.084578098318871e-06, "loss": 0.0016, "step": 129750 }, { "epoch": 0.853667361828385, "grad_norm": 0.07166394268095819, "learning_rate": 7.084056249168778e-06, "loss": 0.0012, "step": 129760 }, { "epoch": 0.8537331500036184, "grad_norm": 0.06651180487823775, "learning_rate": 7.0835343725422025e-06, "loss": 0.001, "step": 129770 }, { "epoch": 0.8537989381788518, "grad_norm": 0.0034684728390634895, "learning_rate": 7.083012468446025e-06, "loss": 0.0013, "step": 129780 }, { "epoch": 0.8538647263540852, "grad_norm": 0.580659953762247, "learning_rate": 7.082490536887129e-06, "loss": 0.0017, "step": 129790 }, { "epoch": 0.8539305145293185, "grad_norm": 0.09845170133503142, "learning_rate": 7.081968577872392e-06, "loss": 0.0018, "step": 129800 }, { "epoch": 0.8539963027045518, "grad_norm": 0.02498516029613608, "learning_rate": 7.081446591408698e-06, "loss": 0.001, "step": 129810 }, { "epoch": 0.8540620908797852, "grad_norm": 0.05335414278035858, "learning_rate": 7.0809245775029274e-06, "loss": 0.0005, "step": 129820 }, { "epoch": 0.8541278790550186, "grad_norm": 0.005480098204535605, "learning_rate": 7.080402536161965e-06, "loss": 0.0011, "step": 129830 }, { "epoch": 0.854193667230252, "grad_norm": 0.052859756534700736, "learning_rate": 7.079880467392692e-06, "loss": 0.0012, "step": 129840 }, { "epoch": 0.8542594554054854, "grad_norm": 0.07517145515340833, "learning_rate": 7.0793583712019905e-06, "loss": 0.0019, "step": 129850 }, { "epoch": 0.8543252435807188, "grad_norm": 0.14356088695224756, "learning_rate": 7.0788362475967455e-06, "loss": 0.0021, "step": 129860 }, { "epoch": 0.8543910317559522, "grad_norm": 0.09859237484192329, "learning_rate": 7.078314096583839e-06, "loss": 0.0012, "step": 129870 }, { "epoch": 0.8544568199311856, "grad_norm": 0.13844241046923625, "learning_rate": 7.077791918170156e-06, "loss": 0.0011, "step": 129880 }, { "epoch": 0.854522608106419, "grad_norm": 0.08692752921566636, "learning_rate": 7.077269712362582e-06, "loss": 0.0007, "step": 129890 }, { "epoch": 0.8545883962816523, "grad_norm": 0.0035528665271515494, "learning_rate": 7.076747479168e-06, "loss": 0.0009, "step": 129900 }, { "epoch": 0.8546541844568857, "grad_norm": 0.09101044822163963, "learning_rate": 7.0762252185932965e-06, "loss": 0.0013, "step": 129910 }, { "epoch": 0.8547199726321191, "grad_norm": 0.06081403337842026, "learning_rate": 7.075702930645357e-06, "loss": 0.0009, "step": 129920 }, { "epoch": 0.8547857608073525, "grad_norm": 0.034820896226116935, "learning_rate": 7.075180615331066e-06, "loss": 0.0009, "step": 129930 }, { "epoch": 0.8548515489825859, "grad_norm": 0.08753408670448652, "learning_rate": 7.074658272657311e-06, "loss": 0.0013, "step": 129940 }, { "epoch": 0.8549173371578193, "grad_norm": 0.011758588024264103, "learning_rate": 7.07413590263098e-06, "loss": 0.0013, "step": 129950 }, { "epoch": 0.8549831253330527, "grad_norm": 0.09367459206745538, "learning_rate": 7.073613505258957e-06, "loss": 0.0024, "step": 129960 }, { "epoch": 0.855048913508286, "grad_norm": 0.07040060748145911, "learning_rate": 7.073091080548131e-06, "loss": 0.0012, "step": 129970 }, { "epoch": 0.8551147016835194, "grad_norm": 0.033894523771747294, "learning_rate": 7.072568628505391e-06, "loss": 0.0011, "step": 129980 }, { "epoch": 0.8551804898587528, "grad_norm": 0.0654948988094786, "learning_rate": 7.072046149137622e-06, "loss": 0.001, "step": 129990 }, { "epoch": 0.8552462780339861, "grad_norm": 0.06969251560616052, "learning_rate": 7.071523642451714e-06, "loss": 0.0005, "step": 130000 }, { "epoch": 0.8553120662092195, "grad_norm": 0.010179848402416702, "learning_rate": 7.071001108454556e-06, "loss": 0.0006, "step": 130010 }, { "epoch": 0.8553778543844529, "grad_norm": 0.19195055179129963, "learning_rate": 7.070478547153038e-06, "loss": 0.0017, "step": 130020 }, { "epoch": 0.8554436425596863, "grad_norm": 0.06708034426784036, "learning_rate": 7.069955958554047e-06, "loss": 0.001, "step": 130030 }, { "epoch": 0.8555094307349197, "grad_norm": 0.01802954734504686, "learning_rate": 7.069433342664476e-06, "loss": 0.0016, "step": 130040 }, { "epoch": 0.8555752189101531, "grad_norm": 0.028822860935573028, "learning_rate": 7.068910699491214e-06, "loss": 0.0007, "step": 130050 }, { "epoch": 0.8556410070853865, "grad_norm": 0.11385275681311441, "learning_rate": 7.06838802904115e-06, "loss": 0.0026, "step": 130060 }, { "epoch": 0.8557067952606199, "grad_norm": 0.0203594487466442, "learning_rate": 7.067865331321176e-06, "loss": 0.0013, "step": 130070 }, { "epoch": 0.8557725834358533, "grad_norm": 0.010554472802444155, "learning_rate": 7.067342606338184e-06, "loss": 0.0014, "step": 130080 }, { "epoch": 0.8558383716110867, "grad_norm": 0.03207403147448345, "learning_rate": 7.066819854099065e-06, "loss": 0.0006, "step": 130090 }, { "epoch": 0.8559041597863201, "grad_norm": 0.0022039771342756836, "learning_rate": 7.066297074610712e-06, "loss": 0.0006, "step": 130100 }, { "epoch": 0.8559699479615533, "grad_norm": 0.028500968406072146, "learning_rate": 7.065774267880016e-06, "loss": 0.002, "step": 130110 }, { "epoch": 0.8560357361367867, "grad_norm": 0.022424236653094228, "learning_rate": 7.06525143391387e-06, "loss": 0.001, "step": 130120 }, { "epoch": 0.8561015243120201, "grad_norm": 0.006633959554468973, "learning_rate": 7.064728572719168e-06, "loss": 0.0004, "step": 130130 }, { "epoch": 0.8561673124872535, "grad_norm": 0.060417517218177307, "learning_rate": 7.064205684302803e-06, "loss": 0.0012, "step": 130140 }, { "epoch": 0.8562331006624869, "grad_norm": 0.01870760050072225, "learning_rate": 7.063682768671669e-06, "loss": 0.0014, "step": 130150 }, { "epoch": 0.8562988888377203, "grad_norm": 0.0267962650411933, "learning_rate": 7.0631598258326595e-06, "loss": 0.0009, "step": 130160 }, { "epoch": 0.8563646770129537, "grad_norm": 0.03718559372495701, "learning_rate": 7.062636855792671e-06, "loss": 0.0016, "step": 130170 }, { "epoch": 0.8564304651881871, "grad_norm": 0.026891308021635498, "learning_rate": 7.062113858558594e-06, "loss": 0.0009, "step": 130180 }, { "epoch": 0.8564962533634205, "grad_norm": 0.027023700157292364, "learning_rate": 7.061590834137329e-06, "loss": 0.0008, "step": 130190 }, { "epoch": 0.8565620415386539, "grad_norm": 0.011613903295030412, "learning_rate": 7.061067782535769e-06, "loss": 0.0006, "step": 130200 }, { "epoch": 0.8566278297138872, "grad_norm": 0.06419815422794478, "learning_rate": 7.06054470376081e-06, "loss": 0.0014, "step": 130210 }, { "epoch": 0.8566936178891206, "grad_norm": 0.05460705844012208, "learning_rate": 7.060021597819349e-06, "loss": 0.0013, "step": 130220 }, { "epoch": 0.856759406064354, "grad_norm": 0.00700272881385371, "learning_rate": 7.0594984647182825e-06, "loss": 0.0022, "step": 130230 }, { "epoch": 0.8568251942395874, "grad_norm": 0.020249605040634354, "learning_rate": 7.0589753044645085e-06, "loss": 0.0013, "step": 130240 }, { "epoch": 0.8568909824148208, "grad_norm": 0.01034714537114662, "learning_rate": 7.058452117064921e-06, "loss": 0.0013, "step": 130250 }, { "epoch": 0.8569567705900542, "grad_norm": 0.05009078063972286, "learning_rate": 7.057928902526421e-06, "loss": 0.0008, "step": 130260 }, { "epoch": 0.8570225587652875, "grad_norm": 0.03794055257377523, "learning_rate": 7.057405660855906e-06, "loss": 0.0013, "step": 130270 }, { "epoch": 0.8570883469405209, "grad_norm": 0.05239367708977839, "learning_rate": 7.056882392060275e-06, "loss": 0.0008, "step": 130280 }, { "epoch": 0.8571541351157543, "grad_norm": 0.026308179420956777, "learning_rate": 7.056359096146425e-06, "loss": 0.0011, "step": 130290 }, { "epoch": 0.8572199232909877, "grad_norm": 0.07928214438349065, "learning_rate": 7.0558357731212565e-06, "loss": 0.0032, "step": 130300 }, { "epoch": 0.857285711466221, "grad_norm": 0.011862655591764531, "learning_rate": 7.055312422991669e-06, "loss": 0.0009, "step": 130310 }, { "epoch": 0.8573514996414544, "grad_norm": 0.0899117528069509, "learning_rate": 7.054789045764563e-06, "loss": 0.0013, "step": 130320 }, { "epoch": 0.8574172878166878, "grad_norm": 0.013690647479954896, "learning_rate": 7.054265641446836e-06, "loss": 0.0013, "step": 130330 }, { "epoch": 0.8574830759919212, "grad_norm": 0.04718748293561685, "learning_rate": 7.053742210045392e-06, "loss": 0.0022, "step": 130340 }, { "epoch": 0.8575488641671546, "grad_norm": 0.031017833473444993, "learning_rate": 7.053218751567131e-06, "loss": 0.0006, "step": 130350 }, { "epoch": 0.857614652342388, "grad_norm": 0.008299862861261874, "learning_rate": 7.052695266018954e-06, "loss": 0.0015, "step": 130360 }, { "epoch": 0.8576804405176214, "grad_norm": 0.0032723736073688066, "learning_rate": 7.052171753407764e-06, "loss": 0.0007, "step": 130370 }, { "epoch": 0.8577462286928548, "grad_norm": 0.10270005372215701, "learning_rate": 7.05164821374046e-06, "loss": 0.0015, "step": 130380 }, { "epoch": 0.8578120168680882, "grad_norm": 0.07856797229989183, "learning_rate": 7.051124647023946e-06, "loss": 0.0015, "step": 130390 }, { "epoch": 0.8578778050433216, "grad_norm": 0.05859777851055297, "learning_rate": 7.050601053265125e-06, "loss": 0.001, "step": 130400 }, { "epoch": 0.8579435932185548, "grad_norm": 0.037267264138001244, "learning_rate": 7.050077432470901e-06, "loss": 0.001, "step": 130410 }, { "epoch": 0.8580093813937882, "grad_norm": 0.17697995884778506, "learning_rate": 7.049553784648176e-06, "loss": 0.0012, "step": 130420 }, { "epoch": 0.8580751695690216, "grad_norm": 0.04730305581665289, "learning_rate": 7.049030109803856e-06, "loss": 0.0008, "step": 130430 }, { "epoch": 0.858140957744255, "grad_norm": 0.005517369361384281, "learning_rate": 7.048506407944842e-06, "loss": 0.0008, "step": 130440 }, { "epoch": 0.8582067459194884, "grad_norm": 0.010332748747876751, "learning_rate": 7.04798267907804e-06, "loss": 0.0006, "step": 130450 }, { "epoch": 0.8582725340947218, "grad_norm": 0.1989088666378748, "learning_rate": 7.047458923210356e-06, "loss": 0.0011, "step": 130460 }, { "epoch": 0.8583383222699552, "grad_norm": 0.061497134921150186, "learning_rate": 7.046935140348694e-06, "loss": 0.0017, "step": 130470 }, { "epoch": 0.8584041104451886, "grad_norm": 0.055340073766790975, "learning_rate": 7.0464113304999606e-06, "loss": 0.0008, "step": 130480 }, { "epoch": 0.858469898620422, "grad_norm": 0.04374446449817152, "learning_rate": 7.045887493671061e-06, "loss": 0.002, "step": 130490 }, { "epoch": 0.8585356867956554, "grad_norm": 0.08092220158422182, "learning_rate": 7.045363629868902e-06, "loss": 0.0016, "step": 130500 }, { "epoch": 0.8586014749708887, "grad_norm": 0.003479067683860428, "learning_rate": 7.044839739100389e-06, "loss": 0.0014, "step": 130510 }, { "epoch": 0.8586672631461221, "grad_norm": 0.11510421011118425, "learning_rate": 7.0443158213724295e-06, "loss": 0.0014, "step": 130520 }, { "epoch": 0.8587330513213555, "grad_norm": 0.02047998452983251, "learning_rate": 7.0437918766919336e-06, "loss": 0.0007, "step": 130530 }, { "epoch": 0.8587988394965889, "grad_norm": 0.02282524770703879, "learning_rate": 7.043267905065806e-06, "loss": 0.0011, "step": 130540 }, { "epoch": 0.8588646276718223, "grad_norm": 0.017212432268267536, "learning_rate": 7.042743906500955e-06, "loss": 0.0009, "step": 130550 }, { "epoch": 0.8589304158470557, "grad_norm": 0.002733791343829766, "learning_rate": 7.042219881004291e-06, "loss": 0.0012, "step": 130560 }, { "epoch": 0.858996204022289, "grad_norm": 0.042090922536094366, "learning_rate": 7.041695828582722e-06, "loss": 0.0012, "step": 130570 }, { "epoch": 0.8590619921975224, "grad_norm": 0.037475147428656694, "learning_rate": 7.041171749243156e-06, "loss": 0.0011, "step": 130580 }, { "epoch": 0.8591277803727558, "grad_norm": 0.015321013522702608, "learning_rate": 7.040647642992502e-06, "loss": 0.002, "step": 130590 }, { "epoch": 0.8591935685479892, "grad_norm": 0.0826069904044954, "learning_rate": 7.040123509837673e-06, "loss": 0.0023, "step": 130600 }, { "epoch": 0.8592593567232226, "grad_norm": 0.0410119595687501, "learning_rate": 7.039599349785578e-06, "loss": 0.0012, "step": 130610 }, { "epoch": 0.8593251448984559, "grad_norm": 0.04666915089767826, "learning_rate": 7.039075162843125e-06, "loss": 0.0013, "step": 130620 }, { "epoch": 0.8593909330736893, "grad_norm": 0.02101369322012259, "learning_rate": 7.038550949017229e-06, "loss": 0.0006, "step": 130630 }, { "epoch": 0.8594567212489227, "grad_norm": 0.088914557806491, "learning_rate": 7.038026708314799e-06, "loss": 0.0008, "step": 130640 }, { "epoch": 0.8595225094241561, "grad_norm": 0.035897922424071645, "learning_rate": 7.037502440742746e-06, "loss": 0.0007, "step": 130650 }, { "epoch": 0.8595882975993895, "grad_norm": 0.019168397384146364, "learning_rate": 7.036978146307984e-06, "loss": 0.001, "step": 130660 }, { "epoch": 0.8596540857746229, "grad_norm": 0.03431410680305547, "learning_rate": 7.036453825017424e-06, "loss": 0.001, "step": 130670 }, { "epoch": 0.8597198739498563, "grad_norm": 0.09499995565136168, "learning_rate": 7.035929476877981e-06, "loss": 0.0019, "step": 130680 }, { "epoch": 0.8597856621250897, "grad_norm": 0.00487079457543668, "learning_rate": 7.035405101896564e-06, "loss": 0.001, "step": 130690 }, { "epoch": 0.8598514503003231, "grad_norm": 0.19586388611034508, "learning_rate": 7.034880700080088e-06, "loss": 0.0008, "step": 130700 }, { "epoch": 0.8599172384755565, "grad_norm": 0.025099495780134682, "learning_rate": 7.0343562714354675e-06, "loss": 0.0024, "step": 130710 }, { "epoch": 0.8599830266507897, "grad_norm": 0.00777834457395162, "learning_rate": 7.033831815969618e-06, "loss": 0.0014, "step": 130720 }, { "epoch": 0.8600488148260231, "grad_norm": 0.0022267297401787896, "learning_rate": 7.033307333689452e-06, "loss": 0.0009, "step": 130730 }, { "epoch": 0.8601146030012565, "grad_norm": 0.05490713873301383, "learning_rate": 7.032782824601885e-06, "loss": 0.0019, "step": 130740 }, { "epoch": 0.8601803911764899, "grad_norm": 0.17054412641411626, "learning_rate": 7.032258288713833e-06, "loss": 0.0015, "step": 130750 }, { "epoch": 0.8602461793517233, "grad_norm": 0.016933020447950208, "learning_rate": 7.031733726032208e-06, "loss": 0.0016, "step": 130760 }, { "epoch": 0.8603119675269567, "grad_norm": 0.0017736785554733044, "learning_rate": 7.03120913656393e-06, "loss": 0.0018, "step": 130770 }, { "epoch": 0.8603777557021901, "grad_norm": 0.15024785430617027, "learning_rate": 7.030684520315914e-06, "loss": 0.0022, "step": 130780 }, { "epoch": 0.8604435438774235, "grad_norm": 0.07350361811747902, "learning_rate": 7.030159877295077e-06, "loss": 0.0009, "step": 130790 }, { "epoch": 0.8605093320526569, "grad_norm": 0.03632384674091758, "learning_rate": 7.029635207508334e-06, "loss": 0.0011, "step": 130800 }, { "epoch": 0.8605751202278903, "grad_norm": 0.038376723760298076, "learning_rate": 7.029110510962604e-06, "loss": 0.0009, "step": 130810 }, { "epoch": 0.8606409084031236, "grad_norm": 0.021631629868674188, "learning_rate": 7.028585787664806e-06, "loss": 0.0007, "step": 130820 }, { "epoch": 0.860706696578357, "grad_norm": 0.04251346470995136, "learning_rate": 7.028061037621854e-06, "loss": 0.001, "step": 130830 }, { "epoch": 0.8607724847535904, "grad_norm": 0.001781603445730297, "learning_rate": 7.027536260840671e-06, "loss": 0.0006, "step": 130840 }, { "epoch": 0.8608382729288238, "grad_norm": 0.03207033838775201, "learning_rate": 7.027011457328172e-06, "loss": 0.0017, "step": 130850 }, { "epoch": 0.8609040611040572, "grad_norm": 0.11583681313571303, "learning_rate": 7.026486627091278e-06, "loss": 0.0013, "step": 130860 }, { "epoch": 0.8609698492792905, "grad_norm": 0.06345186630085956, "learning_rate": 7.025961770136908e-06, "loss": 0.0007, "step": 130870 }, { "epoch": 0.8610356374545239, "grad_norm": 0.008454463800504509, "learning_rate": 7.025436886471981e-06, "loss": 0.0009, "step": 130880 }, { "epoch": 0.8611014256297573, "grad_norm": 0.030473050973292216, "learning_rate": 7.024911976103419e-06, "loss": 0.0022, "step": 130890 }, { "epoch": 0.8611672138049907, "grad_norm": 0.045648389793708104, "learning_rate": 7.024387039038141e-06, "loss": 0.001, "step": 130900 }, { "epoch": 0.8612330019802241, "grad_norm": 0.0003178414719509957, "learning_rate": 7.023862075283069e-06, "loss": 0.0006, "step": 130910 }, { "epoch": 0.8612987901554574, "grad_norm": 0.020356821648694526, "learning_rate": 7.0233370848451225e-06, "loss": 0.0008, "step": 130920 }, { "epoch": 0.8613645783306908, "grad_norm": 0.05049481760747052, "learning_rate": 7.022812067731224e-06, "loss": 0.0006, "step": 130930 }, { "epoch": 0.8614303665059242, "grad_norm": 0.03493485020778445, "learning_rate": 7.022287023948296e-06, "loss": 0.002, "step": 130940 }, { "epoch": 0.8614961546811576, "grad_norm": 0.025388591332882567, "learning_rate": 7.021761953503261e-06, "loss": 0.0014, "step": 130950 }, { "epoch": 0.861561942856391, "grad_norm": 0.009761110509571033, "learning_rate": 7.021236856403038e-06, "loss": 0.0008, "step": 130960 }, { "epoch": 0.8616277310316244, "grad_norm": 0.252664592389649, "learning_rate": 7.020711732654555e-06, "loss": 0.0012, "step": 130970 }, { "epoch": 0.8616935192068578, "grad_norm": 0.08006514961544521, "learning_rate": 7.020186582264732e-06, "loss": 0.001, "step": 130980 }, { "epoch": 0.8617593073820912, "grad_norm": 0.04225133338209465, "learning_rate": 7.019661405240494e-06, "loss": 0.0011, "step": 130990 }, { "epoch": 0.8618250955573246, "grad_norm": 0.025469058245591276, "learning_rate": 7.019136201588766e-06, "loss": 0.0014, "step": 131000 }, { "epoch": 0.861890883732558, "grad_norm": 0.02564347276812712, "learning_rate": 7.0186109713164686e-06, "loss": 0.0017, "step": 131010 }, { "epoch": 0.8619566719077912, "grad_norm": 0.06793357305693651, "learning_rate": 7.018085714430532e-06, "loss": 0.0007, "step": 131020 }, { "epoch": 0.8620224600830246, "grad_norm": 0.02391171978214762, "learning_rate": 7.017560430937874e-06, "loss": 0.0011, "step": 131030 }, { "epoch": 0.862088248258258, "grad_norm": 0.07869438817167446, "learning_rate": 7.017035120845427e-06, "loss": 0.0014, "step": 131040 }, { "epoch": 0.8621540364334914, "grad_norm": 0.02185025845901896, "learning_rate": 7.0165097841601125e-06, "loss": 0.0018, "step": 131050 }, { "epoch": 0.8622198246087248, "grad_norm": 0.02131461005258984, "learning_rate": 7.0159844208888595e-06, "loss": 0.0011, "step": 131060 }, { "epoch": 0.8622856127839582, "grad_norm": 0.03520684448829617, "learning_rate": 7.015459031038593e-06, "loss": 0.0015, "step": 131070 }, { "epoch": 0.8623514009591916, "grad_norm": 0.042437946427650614, "learning_rate": 7.014933614616239e-06, "loss": 0.0012, "step": 131080 }, { "epoch": 0.862417189134425, "grad_norm": 0.015142233112781189, "learning_rate": 7.014408171628726e-06, "loss": 0.0013, "step": 131090 }, { "epoch": 0.8624829773096584, "grad_norm": 0.1194906189382798, "learning_rate": 7.0138827020829805e-06, "loss": 0.0015, "step": 131100 }, { "epoch": 0.8625487654848918, "grad_norm": 0.00080726280096673, "learning_rate": 7.013357205985931e-06, "loss": 0.0006, "step": 131110 }, { "epoch": 0.8626145536601252, "grad_norm": 0.04318433637850733, "learning_rate": 7.012831683344507e-06, "loss": 0.0013, "step": 131120 }, { "epoch": 0.8626803418353585, "grad_norm": 0.004817916808754397, "learning_rate": 7.012306134165634e-06, "loss": 0.0008, "step": 131130 }, { "epoch": 0.8627461300105919, "grad_norm": 0.12523989192459414, "learning_rate": 7.011780558456243e-06, "loss": 0.001, "step": 131140 }, { "epoch": 0.8628119181858253, "grad_norm": 0.06401570131944072, "learning_rate": 7.011254956223263e-06, "loss": 0.0009, "step": 131150 }, { "epoch": 0.8628777063610586, "grad_norm": 0.02331012216909507, "learning_rate": 7.0107293274736225e-06, "loss": 0.0026, "step": 131160 }, { "epoch": 0.862943494536292, "grad_norm": 0.023526659373052476, "learning_rate": 7.010203672214253e-06, "loss": 0.0051, "step": 131170 }, { "epoch": 0.8630092827115254, "grad_norm": 0.06336021356028992, "learning_rate": 7.009677990452085e-06, "loss": 0.0013, "step": 131180 }, { "epoch": 0.8630750708867588, "grad_norm": 0.07401411271292696, "learning_rate": 7.009152282194047e-06, "loss": 0.0025, "step": 131190 }, { "epoch": 0.8631408590619922, "grad_norm": 0.06627595088129933, "learning_rate": 7.008626547447072e-06, "loss": 0.0013, "step": 131200 }, { "epoch": 0.8632066472372256, "grad_norm": 0.003462026403141995, "learning_rate": 7.0081007862180915e-06, "loss": 0.0012, "step": 131210 }, { "epoch": 0.863272435412459, "grad_norm": 0.03359784895956565, "learning_rate": 7.007574998514035e-06, "loss": 0.0011, "step": 131220 }, { "epoch": 0.8633382235876923, "grad_norm": 0.045321780506616914, "learning_rate": 7.007049184341838e-06, "loss": 0.0007, "step": 131230 }, { "epoch": 0.8634040117629257, "grad_norm": 0.007173598449049675, "learning_rate": 7.006523343708429e-06, "loss": 0.0011, "step": 131240 }, { "epoch": 0.8634697999381591, "grad_norm": 0.04114879700436027, "learning_rate": 7.005997476620744e-06, "loss": 0.001, "step": 131250 }, { "epoch": 0.8635355881133925, "grad_norm": 0.03235031363984942, "learning_rate": 7.005471583085715e-06, "loss": 0.001, "step": 131260 }, { "epoch": 0.8636013762886259, "grad_norm": 0.25029409896456295, "learning_rate": 7.004945663110274e-06, "loss": 0.0011, "step": 131270 }, { "epoch": 0.8636671644638593, "grad_norm": 0.045859440142593244, "learning_rate": 7.004419716701356e-06, "loss": 0.0014, "step": 131280 }, { "epoch": 0.8637329526390927, "grad_norm": 0.031557879456358344, "learning_rate": 7.003893743865895e-06, "loss": 0.0008, "step": 131290 }, { "epoch": 0.8637987408143261, "grad_norm": 0.034872909995273045, "learning_rate": 7.003367744610826e-06, "loss": 0.0012, "step": 131300 }, { "epoch": 0.8638645289895595, "grad_norm": 0.08937829157232813, "learning_rate": 7.002841718943084e-06, "loss": 0.0008, "step": 131310 }, { "epoch": 0.8639303171647928, "grad_norm": 0.030178557710095387, "learning_rate": 7.0023156668696036e-06, "loss": 0.0015, "step": 131320 }, { "epoch": 0.8639961053400261, "grad_norm": 0.09144032793687522, "learning_rate": 7.001789588397321e-06, "loss": 0.0017, "step": 131330 }, { "epoch": 0.8640618935152595, "grad_norm": 0.11967824897297549, "learning_rate": 7.001263483533171e-06, "loss": 0.0013, "step": 131340 }, { "epoch": 0.8641276816904929, "grad_norm": 0.10302510599160719, "learning_rate": 7.0007373522840895e-06, "loss": 0.002, "step": 131350 }, { "epoch": 0.8641934698657263, "grad_norm": 0.042775172409060716, "learning_rate": 7.000211194657014e-06, "loss": 0.0018, "step": 131360 }, { "epoch": 0.8642592580409597, "grad_norm": 0.024712467653886686, "learning_rate": 6.999685010658882e-06, "loss": 0.0007, "step": 131370 }, { "epoch": 0.8643250462161931, "grad_norm": 0.009029640043572311, "learning_rate": 6.99915880029663e-06, "loss": 0.0015, "step": 131380 }, { "epoch": 0.8643908343914265, "grad_norm": 0.030849596755780473, "learning_rate": 6.998632563577197e-06, "loss": 0.0008, "step": 131390 }, { "epoch": 0.8644566225666599, "grad_norm": 0.0018623794924866497, "learning_rate": 6.998106300507519e-06, "loss": 0.0019, "step": 131400 }, { "epoch": 0.8645224107418933, "grad_norm": 0.141707619727845, "learning_rate": 6.997580011094535e-06, "loss": 0.0015, "step": 131410 }, { "epoch": 0.8645881989171267, "grad_norm": 0.03438187924154019, "learning_rate": 6.997053695345182e-06, "loss": 0.0031, "step": 131420 }, { "epoch": 0.86465398709236, "grad_norm": 0.08015500906176427, "learning_rate": 6.996527353266402e-06, "loss": 0.0014, "step": 131430 }, { "epoch": 0.8647197752675934, "grad_norm": 0.005983224822256636, "learning_rate": 6.996000984865135e-06, "loss": 0.0011, "step": 131440 }, { "epoch": 0.8647855634428268, "grad_norm": 0.022747263616482875, "learning_rate": 6.995474590148317e-06, "loss": 0.0005, "step": 131450 }, { "epoch": 0.8648513516180601, "grad_norm": 0.0883465537041816, "learning_rate": 6.99494816912289e-06, "loss": 0.0012, "step": 131460 }, { "epoch": 0.8649171397932935, "grad_norm": 0.0658033092543092, "learning_rate": 6.994421721795796e-06, "loss": 0.0007, "step": 131470 }, { "epoch": 0.8649829279685269, "grad_norm": 0.08850701005091079, "learning_rate": 6.993895248173973e-06, "loss": 0.0018, "step": 131480 }, { "epoch": 0.8650487161437603, "grad_norm": 0.06866886520369091, "learning_rate": 6.993368748264362e-06, "loss": 0.0011, "step": 131490 }, { "epoch": 0.8651145043189937, "grad_norm": 0.020440570741737208, "learning_rate": 6.9928422220739064e-06, "loss": 0.002, "step": 131500 }, { "epoch": 0.8651802924942271, "grad_norm": 0.09552491283176905, "learning_rate": 6.9923156696095486e-06, "loss": 0.0013, "step": 131510 }, { "epoch": 0.8652460806694605, "grad_norm": 0.13319885520073158, "learning_rate": 6.991789090878228e-06, "loss": 0.0018, "step": 131520 }, { "epoch": 0.8653118688446939, "grad_norm": 0.010817637072894402, "learning_rate": 6.99126248588689e-06, "loss": 0.0008, "step": 131530 }, { "epoch": 0.8653776570199272, "grad_norm": 0.04053880956941295, "learning_rate": 6.990735854642474e-06, "loss": 0.001, "step": 131540 }, { "epoch": 0.8654434451951606, "grad_norm": 0.029047890381460995, "learning_rate": 6.9902091971519245e-06, "loss": 0.0012, "step": 131550 }, { "epoch": 0.865509233370394, "grad_norm": 0.025416381311348532, "learning_rate": 6.989682513422187e-06, "loss": 0.0011, "step": 131560 }, { "epoch": 0.8655750215456274, "grad_norm": 0.016757178686717594, "learning_rate": 6.989155803460204e-06, "loss": 0.0009, "step": 131570 }, { "epoch": 0.8656408097208608, "grad_norm": 0.012057682999716986, "learning_rate": 6.98862906727292e-06, "loss": 0.0012, "step": 131580 }, { "epoch": 0.8657065978960942, "grad_norm": 0.15851103165773342, "learning_rate": 6.988102304867278e-06, "loss": 0.0009, "step": 131590 }, { "epoch": 0.8657723860713276, "grad_norm": 0.026278262085403854, "learning_rate": 6.987575516250225e-06, "loss": 0.001, "step": 131600 }, { "epoch": 0.865838174246561, "grad_norm": 0.0016118202760028505, "learning_rate": 6.987048701428706e-06, "loss": 0.0008, "step": 131610 }, { "epoch": 0.8659039624217943, "grad_norm": 0.017780914657775808, "learning_rate": 6.986521860409665e-06, "loss": 0.0007, "step": 131620 }, { "epoch": 0.8659697505970277, "grad_norm": 0.03143667852040073, "learning_rate": 6.985994993200049e-06, "loss": 0.001, "step": 131630 }, { "epoch": 0.866035538772261, "grad_norm": 0.0008943359864535823, "learning_rate": 6.985468099806805e-06, "loss": 0.0011, "step": 131640 }, { "epoch": 0.8661013269474944, "grad_norm": 0.12735271391451392, "learning_rate": 6.984941180236878e-06, "loss": 0.0011, "step": 131650 }, { "epoch": 0.8661671151227278, "grad_norm": 0.016909709076519126, "learning_rate": 6.984414234497217e-06, "loss": 0.0019, "step": 131660 }, { "epoch": 0.8662329032979612, "grad_norm": 0.000691137539737456, "learning_rate": 6.983887262594766e-06, "loss": 0.0014, "step": 131670 }, { "epoch": 0.8662986914731946, "grad_norm": 0.023896712857614102, "learning_rate": 6.9833602645364766e-06, "loss": 0.0017, "step": 131680 }, { "epoch": 0.866364479648428, "grad_norm": 0.025851490907310017, "learning_rate": 6.982833240329294e-06, "loss": 0.0036, "step": 131690 }, { "epoch": 0.8664302678236614, "grad_norm": 0.13773292320614972, "learning_rate": 6.982306189980169e-06, "loss": 0.0013, "step": 131700 }, { "epoch": 0.8664960559988948, "grad_norm": 0.010533170723845253, "learning_rate": 6.981779113496048e-06, "loss": 0.001, "step": 131710 }, { "epoch": 0.8665618441741282, "grad_norm": 0.024991853431007318, "learning_rate": 6.981252010883881e-06, "loss": 0.0017, "step": 131720 }, { "epoch": 0.8666276323493616, "grad_norm": 0.027618215083848135, "learning_rate": 6.980724882150617e-06, "loss": 0.0007, "step": 131730 }, { "epoch": 0.8666934205245949, "grad_norm": 0.02517411939094998, "learning_rate": 6.980197727303206e-06, "loss": 0.0007, "step": 131740 }, { "epoch": 0.8667592086998283, "grad_norm": 0.08294145578487634, "learning_rate": 6.979670546348599e-06, "loss": 0.0008, "step": 131750 }, { "epoch": 0.8668249968750616, "grad_norm": 0.18291091176110774, "learning_rate": 6.9791433392937455e-06, "loss": 0.0015, "step": 131760 }, { "epoch": 0.866890785050295, "grad_norm": 0.0594966381755362, "learning_rate": 6.978616106145597e-06, "loss": 0.0005, "step": 131770 }, { "epoch": 0.8669565732255284, "grad_norm": 0.09642964473130745, "learning_rate": 6.978088846911103e-06, "loss": 0.0015, "step": 131780 }, { "epoch": 0.8670223614007618, "grad_norm": 0.025624978047151453, "learning_rate": 6.977561561597217e-06, "loss": 0.0009, "step": 131790 }, { "epoch": 0.8670881495759952, "grad_norm": 0.026540200245899957, "learning_rate": 6.977034250210888e-06, "loss": 0.0008, "step": 131800 }, { "epoch": 0.8671539377512286, "grad_norm": 0.1821766146078908, "learning_rate": 6.97650691275907e-06, "loss": 0.0012, "step": 131810 }, { "epoch": 0.867219725926462, "grad_norm": 0.0329222101769544, "learning_rate": 6.975979549248717e-06, "loss": 0.0007, "step": 131820 }, { "epoch": 0.8672855141016954, "grad_norm": 0.0853799586583414, "learning_rate": 6.9754521596867796e-06, "loss": 0.0011, "step": 131830 }, { "epoch": 0.8673513022769287, "grad_norm": 0.08616274594665223, "learning_rate": 6.974924744080211e-06, "loss": 0.0008, "step": 131840 }, { "epoch": 0.8674170904521621, "grad_norm": 0.035791554191122295, "learning_rate": 6.9743973024359656e-06, "loss": 0.0014, "step": 131850 }, { "epoch": 0.8674828786273955, "grad_norm": 0.08176799916385738, "learning_rate": 6.9738698347609955e-06, "loss": 0.0013, "step": 131860 }, { "epoch": 0.8675486668026289, "grad_norm": 0.009830831018608025, "learning_rate": 6.9733423410622594e-06, "loss": 0.001, "step": 131870 }, { "epoch": 0.8676144549778623, "grad_norm": 0.1696711692137586, "learning_rate": 6.972814821346707e-06, "loss": 0.0019, "step": 131880 }, { "epoch": 0.8676802431530957, "grad_norm": 0.2528457742532965, "learning_rate": 6.972287275621296e-06, "loss": 0.0008, "step": 131890 }, { "epoch": 0.8677460313283291, "grad_norm": 0.016529042642452006, "learning_rate": 6.971759703892979e-06, "loss": 0.0013, "step": 131900 }, { "epoch": 0.8678118195035625, "grad_norm": 0.022202578902232168, "learning_rate": 6.971232106168715e-06, "loss": 0.0007, "step": 131910 }, { "epoch": 0.8678776076787958, "grad_norm": 0.05444758653407547, "learning_rate": 6.970704482455457e-06, "loss": 0.0008, "step": 131920 }, { "epoch": 0.8679433958540292, "grad_norm": 0.04094888234047622, "learning_rate": 6.9701768327601625e-06, "loss": 0.0011, "step": 131930 }, { "epoch": 0.8680091840292625, "grad_norm": 0.06132215854506042, "learning_rate": 6.9696491570897885e-06, "loss": 0.0013, "step": 131940 }, { "epoch": 0.8680749722044959, "grad_norm": 0.13852234841582925, "learning_rate": 6.969121455451291e-06, "loss": 0.0014, "step": 131950 }, { "epoch": 0.8681407603797293, "grad_norm": 0.022738121956526164, "learning_rate": 6.968593727851628e-06, "loss": 0.0011, "step": 131960 }, { "epoch": 0.8682065485549627, "grad_norm": 0.042871150274929146, "learning_rate": 6.968065974297757e-06, "loss": 0.0012, "step": 131970 }, { "epoch": 0.8682723367301961, "grad_norm": 0.10140254468444211, "learning_rate": 6.967538194796636e-06, "loss": 0.0009, "step": 131980 }, { "epoch": 0.8683381249054295, "grad_norm": 0.00516339555673941, "learning_rate": 6.967010389355222e-06, "loss": 0.0015, "step": 131990 }, { "epoch": 0.8684039130806629, "grad_norm": 0.055634984787578605, "learning_rate": 6.9664825579804765e-06, "loss": 0.0005, "step": 132000 }, { "epoch": 0.8684697012558963, "grad_norm": 0.08337927900096258, "learning_rate": 6.965954700679356e-06, "loss": 0.0011, "step": 132010 }, { "epoch": 0.8685354894311297, "grad_norm": 0.03416875196889365, "learning_rate": 6.96542681745882e-06, "loss": 0.0013, "step": 132020 }, { "epoch": 0.8686012776063631, "grad_norm": 0.05708546173095846, "learning_rate": 6.96489890832583e-06, "loss": 0.0019, "step": 132030 }, { "epoch": 0.8686670657815965, "grad_norm": 0.032281986776024535, "learning_rate": 6.964370973287344e-06, "loss": 0.0008, "step": 132040 }, { "epoch": 0.8687328539568298, "grad_norm": 0.14414631502984168, "learning_rate": 6.963843012350324e-06, "loss": 0.0013, "step": 132050 }, { "epoch": 0.8687986421320631, "grad_norm": 0.08871714313649133, "learning_rate": 6.96331502552173e-06, "loss": 0.0015, "step": 132060 }, { "epoch": 0.8688644303072965, "grad_norm": 0.011990192710983777, "learning_rate": 6.962787012808523e-06, "loss": 0.0005, "step": 132070 }, { "epoch": 0.8689302184825299, "grad_norm": 0.048999493994700855, "learning_rate": 6.962258974217664e-06, "loss": 0.0007, "step": 132080 }, { "epoch": 0.8689960066577633, "grad_norm": 0.16197150377906996, "learning_rate": 6.961730909756115e-06, "loss": 0.0009, "step": 132090 }, { "epoch": 0.8690617948329967, "grad_norm": 0.16631162194695384, "learning_rate": 6.961202819430839e-06, "loss": 0.0026, "step": 132100 }, { "epoch": 0.8691275830082301, "grad_norm": 0.01758840078611762, "learning_rate": 6.960674703248798e-06, "loss": 0.0011, "step": 132110 }, { "epoch": 0.8691933711834635, "grad_norm": 0.045950645850626186, "learning_rate": 6.960146561216952e-06, "loss": 0.001, "step": 132120 }, { "epoch": 0.8692591593586969, "grad_norm": 0.04236866443836029, "learning_rate": 6.959618393342269e-06, "loss": 0.0009, "step": 132130 }, { "epoch": 0.8693249475339303, "grad_norm": 0.056334094942993136, "learning_rate": 6.959090199631708e-06, "loss": 0.0014, "step": 132140 }, { "epoch": 0.8693907357091636, "grad_norm": 0.004216223214418276, "learning_rate": 6.958561980092236e-06, "loss": 0.0006, "step": 132150 }, { "epoch": 0.869456523884397, "grad_norm": 0.04185960140617627, "learning_rate": 6.958033734730816e-06, "loss": 0.0016, "step": 132160 }, { "epoch": 0.8695223120596304, "grad_norm": 0.05494738006445617, "learning_rate": 6.957505463554412e-06, "loss": 0.0006, "step": 132170 }, { "epoch": 0.8695881002348638, "grad_norm": 0.08239690877626557, "learning_rate": 6.956977166569989e-06, "loss": 0.002, "step": 132180 }, { "epoch": 0.8696538884100972, "grad_norm": 0.028922836233904417, "learning_rate": 6.956448843784511e-06, "loss": 0.0023, "step": 132190 }, { "epoch": 0.8697196765853306, "grad_norm": 0.01480104244861706, "learning_rate": 6.9559204952049465e-06, "loss": 0.0024, "step": 132200 }, { "epoch": 0.869785464760564, "grad_norm": 0.07415175292076272, "learning_rate": 6.955392120838258e-06, "loss": 0.0023, "step": 132210 }, { "epoch": 0.8698512529357973, "grad_norm": 0.015947906292586377, "learning_rate": 6.954863720691414e-06, "loss": 0.0014, "step": 132220 }, { "epoch": 0.8699170411110307, "grad_norm": 0.014486641104876258, "learning_rate": 6.95433529477138e-06, "loss": 0.0012, "step": 132230 }, { "epoch": 0.8699828292862641, "grad_norm": 0.3221589697582401, "learning_rate": 6.953806843085123e-06, "loss": 0.0012, "step": 132240 }, { "epoch": 0.8700486174614974, "grad_norm": 0.003949790688563214, "learning_rate": 6.953278365639611e-06, "loss": 0.0008, "step": 132250 }, { "epoch": 0.8701144056367308, "grad_norm": 0.06828487154621543, "learning_rate": 6.95274986244181e-06, "loss": 0.0017, "step": 132260 }, { "epoch": 0.8701801938119642, "grad_norm": 0.0651746090091188, "learning_rate": 6.9522213334986876e-06, "loss": 0.0019, "step": 132270 }, { "epoch": 0.8702459819871976, "grad_norm": 0.03560142694427664, "learning_rate": 6.951692778817214e-06, "loss": 0.001, "step": 132280 }, { "epoch": 0.870311770162431, "grad_norm": 0.14289668425301727, "learning_rate": 6.951164198404358e-06, "loss": 0.0017, "step": 132290 }, { "epoch": 0.8703775583376644, "grad_norm": 0.07293265662534332, "learning_rate": 6.950635592267085e-06, "loss": 0.0013, "step": 132300 }, { "epoch": 0.8704433465128978, "grad_norm": 0.15876826155626705, "learning_rate": 6.950106960412368e-06, "loss": 0.0016, "step": 132310 }, { "epoch": 0.8705091346881312, "grad_norm": 0.008743560760449161, "learning_rate": 6.949578302847173e-06, "loss": 0.0009, "step": 132320 }, { "epoch": 0.8705749228633646, "grad_norm": 0.013174592770666058, "learning_rate": 6.949049619578473e-06, "loss": 0.0007, "step": 132330 }, { "epoch": 0.870640711038598, "grad_norm": 0.008900958574371927, "learning_rate": 6.948520910613238e-06, "loss": 0.0012, "step": 132340 }, { "epoch": 0.8707064992138313, "grad_norm": 0.011977495510186021, "learning_rate": 6.947992175958437e-06, "loss": 0.0013, "step": 132350 }, { "epoch": 0.8707722873890646, "grad_norm": 0.0613396867398974, "learning_rate": 6.947463415621041e-06, "loss": 0.001, "step": 132360 }, { "epoch": 0.870838075564298, "grad_norm": 0.01933152180205152, "learning_rate": 6.946934629608022e-06, "loss": 0.0018, "step": 132370 }, { "epoch": 0.8709038637395314, "grad_norm": 0.04614906016984427, "learning_rate": 6.94640581792635e-06, "loss": 0.0021, "step": 132380 }, { "epoch": 0.8709696519147648, "grad_norm": 0.05118482942484355, "learning_rate": 6.945876980583001e-06, "loss": 0.0021, "step": 132390 }, { "epoch": 0.8710354400899982, "grad_norm": 0.041231134522302045, "learning_rate": 6.945348117584943e-06, "loss": 0.0007, "step": 132400 }, { "epoch": 0.8711012282652316, "grad_norm": 0.016839487928229525, "learning_rate": 6.94481922893915e-06, "loss": 0.0007, "step": 132410 }, { "epoch": 0.871167016440465, "grad_norm": 0.012743268483439584, "learning_rate": 6.9442903146525965e-06, "loss": 0.0007, "step": 132420 }, { "epoch": 0.8712328046156984, "grad_norm": 0.06151610108943103, "learning_rate": 6.943761374732254e-06, "loss": 0.0014, "step": 132430 }, { "epoch": 0.8712985927909318, "grad_norm": 0.02655951676609086, "learning_rate": 6.943232409185097e-06, "loss": 0.0016, "step": 132440 }, { "epoch": 0.8713643809661651, "grad_norm": 0.01064193284098795, "learning_rate": 6.942703418018096e-06, "loss": 0.0008, "step": 132450 }, { "epoch": 0.8714301691413985, "grad_norm": 0.24141943254532305, "learning_rate": 6.9421744012382306e-06, "loss": 0.0016, "step": 132460 }, { "epoch": 0.8714959573166319, "grad_norm": 0.17344253388899422, "learning_rate": 6.941645358852474e-06, "loss": 0.0013, "step": 132470 }, { "epoch": 0.8715617454918653, "grad_norm": 0.022011870366657225, "learning_rate": 6.941116290867799e-06, "loss": 0.0007, "step": 132480 }, { "epoch": 0.8716275336670987, "grad_norm": 0.060382074187233396, "learning_rate": 6.940587197291182e-06, "loss": 0.0027, "step": 132490 }, { "epoch": 0.8716933218423321, "grad_norm": 0.023645224971453847, "learning_rate": 6.940058078129599e-06, "loss": 0.001, "step": 132500 }, { "epoch": 0.8717591100175655, "grad_norm": 0.05570277651564355, "learning_rate": 6.939528933390025e-06, "loss": 0.0017, "step": 132510 }, { "epoch": 0.8718248981927988, "grad_norm": 0.0803106780405304, "learning_rate": 6.938999763079438e-06, "loss": 0.0018, "step": 132520 }, { "epoch": 0.8718906863680322, "grad_norm": 0.03938292835110547, "learning_rate": 6.938470567204814e-06, "loss": 0.0015, "step": 132530 }, { "epoch": 0.8719564745432656, "grad_norm": 0.0365590001766985, "learning_rate": 6.937941345773129e-06, "loss": 0.0006, "step": 132540 }, { "epoch": 0.872022262718499, "grad_norm": 0.010999554713545464, "learning_rate": 6.937412098791363e-06, "loss": 0.0012, "step": 132550 }, { "epoch": 0.8720880508937323, "grad_norm": 0.00048689227677887234, "learning_rate": 6.93688282626649e-06, "loss": 0.0004, "step": 132560 }, { "epoch": 0.8721538390689657, "grad_norm": 0.0013064521972130185, "learning_rate": 6.93635352820549e-06, "loss": 0.0016, "step": 132570 }, { "epoch": 0.8722196272441991, "grad_norm": 0.05189722681050606, "learning_rate": 6.935824204615341e-06, "loss": 0.0006, "step": 132580 }, { "epoch": 0.8722854154194325, "grad_norm": 0.04846749018315444, "learning_rate": 6.935294855503022e-06, "loss": 0.0005, "step": 132590 }, { "epoch": 0.8723512035946659, "grad_norm": 0.05472298702373587, "learning_rate": 6.934765480875511e-06, "loss": 0.0024, "step": 132600 }, { "epoch": 0.8724169917698993, "grad_norm": 0.0021182227397477968, "learning_rate": 6.93423608073979e-06, "loss": 0.0015, "step": 132610 }, { "epoch": 0.8724827799451327, "grad_norm": 0.20383405496255866, "learning_rate": 6.933706655102835e-06, "loss": 0.0026, "step": 132620 }, { "epoch": 0.8725485681203661, "grad_norm": 0.004422549616063878, "learning_rate": 6.9331772039716285e-06, "loss": 0.0009, "step": 132630 }, { "epoch": 0.8726143562955995, "grad_norm": 0.09624700854603459, "learning_rate": 6.93264772735315e-06, "loss": 0.0014, "step": 132640 }, { "epoch": 0.8726801444708329, "grad_norm": 0.030075243769631242, "learning_rate": 6.93211822525438e-06, "loss": 0.0008, "step": 132650 }, { "epoch": 0.8727459326460661, "grad_norm": 0.041191686805871784, "learning_rate": 6.9315886976823e-06, "loss": 0.0008, "step": 132660 }, { "epoch": 0.8728117208212995, "grad_norm": 0.030062619842272608, "learning_rate": 6.931059144643892e-06, "loss": 0.0023, "step": 132670 }, { "epoch": 0.8728775089965329, "grad_norm": 0.06548894009616162, "learning_rate": 6.930529566146136e-06, "loss": 0.001, "step": 132680 }, { "epoch": 0.8729432971717663, "grad_norm": 0.048794907505568204, "learning_rate": 6.929999962196015e-06, "loss": 0.001, "step": 132690 }, { "epoch": 0.8730090853469997, "grad_norm": 0.004226615562573587, "learning_rate": 6.929470332800512e-06, "loss": 0.0012, "step": 132700 }, { "epoch": 0.8730748735222331, "grad_norm": 0.16610302777517746, "learning_rate": 6.928940677966609e-06, "loss": 0.0022, "step": 132710 }, { "epoch": 0.8731406616974665, "grad_norm": 0.054394905617489234, "learning_rate": 6.9284109977012895e-06, "loss": 0.0015, "step": 132720 }, { "epoch": 0.8732064498726999, "grad_norm": 0.045395917764831865, "learning_rate": 6.927881292011535e-06, "loss": 0.0008, "step": 132730 }, { "epoch": 0.8732722380479333, "grad_norm": 0.05556629892293743, "learning_rate": 6.927351560904332e-06, "loss": 0.0011, "step": 132740 }, { "epoch": 0.8733380262231667, "grad_norm": 0.02799800602890078, "learning_rate": 6.926821804386663e-06, "loss": 0.001, "step": 132750 }, { "epoch": 0.8734038143984, "grad_norm": 0.005707156992200766, "learning_rate": 6.926292022465512e-06, "loss": 0.0009, "step": 132760 }, { "epoch": 0.8734696025736334, "grad_norm": 0.12415816900090015, "learning_rate": 6.9257622151478645e-06, "loss": 0.0046, "step": 132770 }, { "epoch": 0.8735353907488668, "grad_norm": 0.050547300933019, "learning_rate": 6.9252323824407055e-06, "loss": 0.0006, "step": 132780 }, { "epoch": 0.8736011789241002, "grad_norm": 0.08854034053241402, "learning_rate": 6.924702524351022e-06, "loss": 0.0017, "step": 132790 }, { "epoch": 0.8736669670993336, "grad_norm": 0.03229502111409312, "learning_rate": 6.924172640885795e-06, "loss": 0.0007, "step": 132800 }, { "epoch": 0.873732755274567, "grad_norm": 0.002296910722552937, "learning_rate": 6.923642732052016e-06, "loss": 0.0012, "step": 132810 }, { "epoch": 0.8737985434498003, "grad_norm": 0.04721128924368402, "learning_rate": 6.923112797856667e-06, "loss": 0.0031, "step": 132820 }, { "epoch": 0.8738643316250337, "grad_norm": 0.021652091578126417, "learning_rate": 6.922582838306737e-06, "loss": 0.0011, "step": 132830 }, { "epoch": 0.8739301198002671, "grad_norm": 0.0006418278688131667, "learning_rate": 6.922052853409214e-06, "loss": 0.0015, "step": 132840 }, { "epoch": 0.8739959079755005, "grad_norm": 0.1534771110560948, "learning_rate": 6.921522843171083e-06, "loss": 0.002, "step": 132850 }, { "epoch": 0.8740616961507338, "grad_norm": 0.021877334011805156, "learning_rate": 6.9209928075993336e-06, "loss": 0.0019, "step": 132860 }, { "epoch": 0.8741274843259672, "grad_norm": 0.05190736718994161, "learning_rate": 6.920462746700953e-06, "loss": 0.0017, "step": 132870 }, { "epoch": 0.8741932725012006, "grad_norm": 0.005819817466341844, "learning_rate": 6.91993266048293e-06, "loss": 0.0009, "step": 132880 }, { "epoch": 0.874259060676434, "grad_norm": 0.18251042832800915, "learning_rate": 6.919402548952252e-06, "loss": 0.0012, "step": 132890 }, { "epoch": 0.8743248488516674, "grad_norm": 0.08067522033909841, "learning_rate": 6.918872412115909e-06, "loss": 0.0013, "step": 132900 }, { "epoch": 0.8743906370269008, "grad_norm": 0.06216744270872307, "learning_rate": 6.918342249980892e-06, "loss": 0.0006, "step": 132910 }, { "epoch": 0.8744564252021342, "grad_norm": 0.029335414236214958, "learning_rate": 6.917812062554187e-06, "loss": 0.0019, "step": 132920 }, { "epoch": 0.8745222133773676, "grad_norm": 0.007935741024253918, "learning_rate": 6.91728184984279e-06, "loss": 0.0011, "step": 132930 }, { "epoch": 0.874588001552601, "grad_norm": 0.013882261398513973, "learning_rate": 6.916751611853685e-06, "loss": 0.0019, "step": 132940 }, { "epoch": 0.8746537897278344, "grad_norm": 0.03938215261791415, "learning_rate": 6.916221348593865e-06, "loss": 0.0014, "step": 132950 }, { "epoch": 0.8747195779030678, "grad_norm": 0.05374802150468731, "learning_rate": 6.915691060070322e-06, "loss": 0.0023, "step": 132960 }, { "epoch": 0.874785366078301, "grad_norm": 0.013752118198998596, "learning_rate": 6.915160746290047e-06, "loss": 0.0014, "step": 132970 }, { "epoch": 0.8748511542535344, "grad_norm": 0.05282974143155948, "learning_rate": 6.914630407260032e-06, "loss": 0.0011, "step": 132980 }, { "epoch": 0.8749169424287678, "grad_norm": 0.06688121469762395, "learning_rate": 6.914100042987268e-06, "loss": 0.0013, "step": 132990 }, { "epoch": 0.8749827306040012, "grad_norm": 0.10233939185430439, "learning_rate": 6.913569653478749e-06, "loss": 0.0012, "step": 133000 }, { "epoch": 0.8750485187792346, "grad_norm": 0.05036565808569199, "learning_rate": 6.9130392387414655e-06, "loss": 0.0011, "step": 133010 }, { "epoch": 0.875114306954468, "grad_norm": 0.09377575275802069, "learning_rate": 6.912508798782412e-06, "loss": 0.001, "step": 133020 }, { "epoch": 0.8751800951297014, "grad_norm": 0.055681614287229356, "learning_rate": 6.911978333608583e-06, "loss": 0.0008, "step": 133030 }, { "epoch": 0.8752458833049348, "grad_norm": 0.08285169128575194, "learning_rate": 6.9114478432269706e-06, "loss": 0.0013, "step": 133040 }, { "epoch": 0.8753116714801682, "grad_norm": 0.013627227566048472, "learning_rate": 6.910917327644568e-06, "loss": 0.0012, "step": 133050 }, { "epoch": 0.8753774596554016, "grad_norm": 0.04432901927299004, "learning_rate": 6.910386786868372e-06, "loss": 0.0008, "step": 133060 }, { "epoch": 0.8754432478306349, "grad_norm": 0.023657401858526748, "learning_rate": 6.909856220905376e-06, "loss": 0.001, "step": 133070 }, { "epoch": 0.8755090360058683, "grad_norm": 0.09415306583656999, "learning_rate": 6.909325629762574e-06, "loss": 0.0011, "step": 133080 }, { "epoch": 0.8755748241811017, "grad_norm": 0.026028239084232026, "learning_rate": 6.908795013446965e-06, "loss": 0.0007, "step": 133090 }, { "epoch": 0.8756406123563351, "grad_norm": 0.08990820793909984, "learning_rate": 6.9082643719655405e-06, "loss": 0.0013, "step": 133100 }, { "epoch": 0.8757064005315685, "grad_norm": 0.03434898730231807, "learning_rate": 6.907733705325299e-06, "loss": 0.0014, "step": 133110 }, { "epoch": 0.8757721887068018, "grad_norm": 0.08229676645954151, "learning_rate": 6.9072030135332356e-06, "loss": 0.0021, "step": 133120 }, { "epoch": 0.8758379768820352, "grad_norm": 0.01731053986751025, "learning_rate": 6.906672296596348e-06, "loss": 0.002, "step": 133130 }, { "epoch": 0.8759037650572686, "grad_norm": 0.0007083281996027434, "learning_rate": 6.906141554521634e-06, "loss": 0.0004, "step": 133140 }, { "epoch": 0.875969553232502, "grad_norm": 0.07939492559846972, "learning_rate": 6.905610787316087e-06, "loss": 0.0012, "step": 133150 }, { "epoch": 0.8760353414077354, "grad_norm": 0.0174832330587843, "learning_rate": 6.90507999498671e-06, "loss": 0.002, "step": 133160 }, { "epoch": 0.8761011295829687, "grad_norm": 0.09546881409494648, "learning_rate": 6.904549177540498e-06, "loss": 0.0015, "step": 133170 }, { "epoch": 0.8761669177582021, "grad_norm": 0.029611146335997523, "learning_rate": 6.904018334984449e-06, "loss": 0.0019, "step": 133180 }, { "epoch": 0.8762327059334355, "grad_norm": 0.008606354124556459, "learning_rate": 6.903487467325564e-06, "loss": 0.0008, "step": 133190 }, { "epoch": 0.8762984941086689, "grad_norm": 0.019347746144547626, "learning_rate": 6.90295657457084e-06, "loss": 0.0008, "step": 133200 }, { "epoch": 0.8763642822839023, "grad_norm": 0.039071191407016356, "learning_rate": 6.902425656727277e-06, "loss": 0.0012, "step": 133210 }, { "epoch": 0.8764300704591357, "grad_norm": 0.04421979322742448, "learning_rate": 6.901894713801876e-06, "loss": 0.0012, "step": 133220 }, { "epoch": 0.8764958586343691, "grad_norm": 0.024145493616797985, "learning_rate": 6.901363745801634e-06, "loss": 0.0012, "step": 133230 }, { "epoch": 0.8765616468096025, "grad_norm": 0.01315904117254934, "learning_rate": 6.900832752733554e-06, "loss": 0.0009, "step": 133240 }, { "epoch": 0.8766274349848359, "grad_norm": 0.034516441205938775, "learning_rate": 6.900301734604636e-06, "loss": 0.0012, "step": 133250 }, { "epoch": 0.8766932231600693, "grad_norm": 0.07298145507826989, "learning_rate": 6.899770691421881e-06, "loss": 0.0009, "step": 133260 }, { "epoch": 0.8767590113353025, "grad_norm": 0.0030619767041573, "learning_rate": 6.899239623192289e-06, "loss": 0.0012, "step": 133270 }, { "epoch": 0.8768247995105359, "grad_norm": 0.03907965528938835, "learning_rate": 6.898708529922864e-06, "loss": 0.0024, "step": 133280 }, { "epoch": 0.8768905876857693, "grad_norm": 0.016470581659743386, "learning_rate": 6.8981774116206056e-06, "loss": 0.0011, "step": 133290 }, { "epoch": 0.8769563758610027, "grad_norm": 0.05596153523621081, "learning_rate": 6.897646268292519e-06, "loss": 0.0017, "step": 133300 }, { "epoch": 0.8770221640362361, "grad_norm": 0.007942893059058858, "learning_rate": 6.897115099945605e-06, "loss": 0.0009, "step": 133310 }, { "epoch": 0.8770879522114695, "grad_norm": 0.07329388801864282, "learning_rate": 6.896583906586867e-06, "loss": 0.001, "step": 133320 }, { "epoch": 0.8771537403867029, "grad_norm": 0.05784733540171916, "learning_rate": 6.896052688223307e-06, "loss": 0.0007, "step": 133330 }, { "epoch": 0.8772195285619363, "grad_norm": 0.04427700083641988, "learning_rate": 6.89552144486193e-06, "loss": 0.0017, "step": 133340 }, { "epoch": 0.8772853167371697, "grad_norm": 0.1457334761878043, "learning_rate": 6.89499017650974e-06, "loss": 0.0022, "step": 133350 }, { "epoch": 0.8773511049124031, "grad_norm": 0.06429252234538353, "learning_rate": 6.894458883173742e-06, "loss": 0.0007, "step": 133360 }, { "epoch": 0.8774168930876364, "grad_norm": 0.034502636034420525, "learning_rate": 6.893927564860939e-06, "loss": 0.0007, "step": 133370 }, { "epoch": 0.8774826812628698, "grad_norm": 0.027527246398844123, "learning_rate": 6.893396221578338e-06, "loss": 0.0009, "step": 133380 }, { "epoch": 0.8775484694381032, "grad_norm": 0.056197134331135344, "learning_rate": 6.8928648533329415e-06, "loss": 0.0022, "step": 133390 }, { "epoch": 0.8776142576133366, "grad_norm": 0.0638830654037974, "learning_rate": 6.892333460131757e-06, "loss": 0.0016, "step": 133400 }, { "epoch": 0.87768004578857, "grad_norm": 0.054492642684064084, "learning_rate": 6.89180204198179e-06, "loss": 0.0017, "step": 133410 }, { "epoch": 0.8777458339638033, "grad_norm": 0.12263947556530157, "learning_rate": 6.891270598890048e-06, "loss": 0.0007, "step": 133420 }, { "epoch": 0.8778116221390367, "grad_norm": 0.059113947154212275, "learning_rate": 6.890739130863535e-06, "loss": 0.0011, "step": 133430 }, { "epoch": 0.8778774103142701, "grad_norm": 0.09856281022957962, "learning_rate": 6.890207637909261e-06, "loss": 0.001, "step": 133440 }, { "epoch": 0.8779431984895035, "grad_norm": 0.037641984269517696, "learning_rate": 6.889676120034232e-06, "loss": 0.0015, "step": 133450 }, { "epoch": 0.8780089866647369, "grad_norm": 0.07520287010952136, "learning_rate": 6.889144577245455e-06, "loss": 0.0011, "step": 133460 }, { "epoch": 0.8780747748399703, "grad_norm": 0.004067344828057352, "learning_rate": 6.888613009549936e-06, "loss": 0.0019, "step": 133470 }, { "epoch": 0.8781405630152036, "grad_norm": 0.07707163812210584, "learning_rate": 6.888081416954688e-06, "loss": 0.0013, "step": 133480 }, { "epoch": 0.878206351190437, "grad_norm": 0.000755906799715088, "learning_rate": 6.887549799466717e-06, "loss": 0.0006, "step": 133490 }, { "epoch": 0.8782721393656704, "grad_norm": 0.04804872601153945, "learning_rate": 6.887018157093033e-06, "loss": 0.0007, "step": 133500 }, { "epoch": 0.8783379275409038, "grad_norm": 0.012316188068542669, "learning_rate": 6.8864864898406426e-06, "loss": 0.0016, "step": 133510 }, { "epoch": 0.8784037157161372, "grad_norm": 0.07442154990271743, "learning_rate": 6.885954797716559e-06, "loss": 0.0014, "step": 133520 }, { "epoch": 0.8784695038913706, "grad_norm": 0.05865250945626629, "learning_rate": 6.885423080727787e-06, "loss": 0.0016, "step": 133530 }, { "epoch": 0.878535292066604, "grad_norm": 0.019119209007845278, "learning_rate": 6.8848913388813425e-06, "loss": 0.0013, "step": 133540 }, { "epoch": 0.8786010802418374, "grad_norm": 0.09130989876355367, "learning_rate": 6.884359572184233e-06, "loss": 0.0009, "step": 133550 }, { "epoch": 0.8786668684170708, "grad_norm": 0.044819732374354784, "learning_rate": 6.883827780643469e-06, "loss": 0.0013, "step": 133560 }, { "epoch": 0.8787326565923042, "grad_norm": 0.03869077215236865, "learning_rate": 6.883295964266065e-06, "loss": 0.0042, "step": 133570 }, { "epoch": 0.8787984447675374, "grad_norm": 0.01731097937324163, "learning_rate": 6.882764123059029e-06, "loss": 0.0012, "step": 133580 }, { "epoch": 0.8788642329427708, "grad_norm": 0.05972679023938219, "learning_rate": 6.882232257029373e-06, "loss": 0.0014, "step": 133590 }, { "epoch": 0.8789300211180042, "grad_norm": 0.12646735057430025, "learning_rate": 6.881700366184111e-06, "loss": 0.0027, "step": 133600 }, { "epoch": 0.8789958092932376, "grad_norm": 0.004721795557313591, "learning_rate": 6.881168450530255e-06, "loss": 0.0008, "step": 133610 }, { "epoch": 0.879061597468471, "grad_norm": 0.055345800577357086, "learning_rate": 6.880636510074819e-06, "loss": 0.0011, "step": 133620 }, { "epoch": 0.8791273856437044, "grad_norm": 0.12667680005407703, "learning_rate": 6.880104544824813e-06, "loss": 0.0013, "step": 133630 }, { "epoch": 0.8791931738189378, "grad_norm": 0.04732844724966943, "learning_rate": 6.879572554787253e-06, "loss": 0.0011, "step": 133640 }, { "epoch": 0.8792589619941712, "grad_norm": 0.0014633330900269404, "learning_rate": 6.879040539969152e-06, "loss": 0.0008, "step": 133650 }, { "epoch": 0.8793247501694046, "grad_norm": 0.0970803417756735, "learning_rate": 6.878508500377523e-06, "loss": 0.0013, "step": 133660 }, { "epoch": 0.879390538344638, "grad_norm": 0.13970538160425452, "learning_rate": 6.877976436019385e-06, "loss": 0.0017, "step": 133670 }, { "epoch": 0.8794563265198713, "grad_norm": 0.06656114665158831, "learning_rate": 6.877444346901747e-06, "loss": 0.0015, "step": 133680 }, { "epoch": 0.8795221146951047, "grad_norm": 0.04055228058100541, "learning_rate": 6.876912233031627e-06, "loss": 0.0026, "step": 133690 }, { "epoch": 0.8795879028703381, "grad_norm": 0.0370180417311959, "learning_rate": 6.876380094416042e-06, "loss": 0.001, "step": 133700 }, { "epoch": 0.8796536910455715, "grad_norm": 0.06733643726552835, "learning_rate": 6.875847931062005e-06, "loss": 0.0011, "step": 133710 }, { "epoch": 0.8797194792208048, "grad_norm": 0.03265824297859201, "learning_rate": 6.875315742976531e-06, "loss": 0.0007, "step": 133720 }, { "epoch": 0.8797852673960382, "grad_norm": 0.020583228928064568, "learning_rate": 6.874783530166641e-06, "loss": 0.0007, "step": 133730 }, { "epoch": 0.8798510555712716, "grad_norm": 0.011586764474777826, "learning_rate": 6.874251292639349e-06, "loss": 0.0008, "step": 133740 }, { "epoch": 0.879916843746505, "grad_norm": 0.0030829649264125614, "learning_rate": 6.873719030401673e-06, "loss": 0.0029, "step": 133750 }, { "epoch": 0.8799826319217384, "grad_norm": 0.08742952780091894, "learning_rate": 6.873186743460629e-06, "loss": 0.0014, "step": 133760 }, { "epoch": 0.8800484200969718, "grad_norm": 0.02303588409504844, "learning_rate": 6.872654431823235e-06, "loss": 0.0014, "step": 133770 }, { "epoch": 0.8801142082722051, "grad_norm": 0.07674356286439266, "learning_rate": 6.87212209549651e-06, "loss": 0.0016, "step": 133780 }, { "epoch": 0.8801799964474385, "grad_norm": 0.04051766692305306, "learning_rate": 6.871589734487472e-06, "loss": 0.0013, "step": 133790 }, { "epoch": 0.8802457846226719, "grad_norm": 0.019249982807617753, "learning_rate": 6.8710573488031406e-06, "loss": 0.0013, "step": 133800 }, { "epoch": 0.8803115727979053, "grad_norm": 0.02971453683569043, "learning_rate": 6.870524938450534e-06, "loss": 0.0027, "step": 133810 }, { "epoch": 0.8803773609731387, "grad_norm": 0.0668385911646142, "learning_rate": 6.869992503436671e-06, "loss": 0.0007, "step": 133820 }, { "epoch": 0.8804431491483721, "grad_norm": 1.2656232946858308, "learning_rate": 6.869460043768572e-06, "loss": 0.0016, "step": 133830 }, { "epoch": 0.8805089373236055, "grad_norm": 0.05104445042524346, "learning_rate": 6.868927559453257e-06, "loss": 0.0015, "step": 133840 }, { "epoch": 0.8805747254988389, "grad_norm": 0.03008593079193142, "learning_rate": 6.868395050497745e-06, "loss": 0.0011, "step": 133850 }, { "epoch": 0.8806405136740723, "grad_norm": 0.029407168116910874, "learning_rate": 6.867862516909059e-06, "loss": 0.0019, "step": 133860 }, { "epoch": 0.8807063018493057, "grad_norm": 0.08463177749250526, "learning_rate": 6.86732995869422e-06, "loss": 0.0018, "step": 133870 }, { "epoch": 0.8807720900245389, "grad_norm": 0.6435506008403409, "learning_rate": 6.866797375860247e-06, "loss": 0.0015, "step": 133880 }, { "epoch": 0.8808378781997723, "grad_norm": 0.08490840910151018, "learning_rate": 6.866264768414163e-06, "loss": 0.0015, "step": 133890 }, { "epoch": 0.8809036663750057, "grad_norm": 0.021292092360658984, "learning_rate": 6.865732136362989e-06, "loss": 0.0007, "step": 133900 }, { "epoch": 0.8809694545502391, "grad_norm": 0.006230754712549052, "learning_rate": 6.86519947971375e-06, "loss": 0.0009, "step": 133910 }, { "epoch": 0.8810352427254725, "grad_norm": 0.11289473259922818, "learning_rate": 6.864666798473465e-06, "loss": 0.0012, "step": 133920 }, { "epoch": 0.8811010309007059, "grad_norm": 0.18978693564281668, "learning_rate": 6.86413409264916e-06, "loss": 0.0019, "step": 133930 }, { "epoch": 0.8811668190759393, "grad_norm": 0.10829845977030793, "learning_rate": 6.863601362247856e-06, "loss": 0.0012, "step": 133940 }, { "epoch": 0.8812326072511727, "grad_norm": 0.014699010393247123, "learning_rate": 6.863068607276577e-06, "loss": 0.0009, "step": 133950 }, { "epoch": 0.8812983954264061, "grad_norm": 0.02761149686737048, "learning_rate": 6.862535827742349e-06, "loss": 0.0011, "step": 133960 }, { "epoch": 0.8813641836016395, "grad_norm": 0.05041173716711823, "learning_rate": 6.862003023652195e-06, "loss": 0.0009, "step": 133970 }, { "epoch": 0.8814299717768729, "grad_norm": 0.03521516928491419, "learning_rate": 6.8614701950131365e-06, "loss": 0.001, "step": 133980 }, { "epoch": 0.8814957599521062, "grad_norm": 0.05535171472064654, "learning_rate": 6.860937341832204e-06, "loss": 0.0016, "step": 133990 }, { "epoch": 0.8815615481273396, "grad_norm": 0.009534188722208521, "learning_rate": 6.860404464116419e-06, "loss": 0.001, "step": 134000 }, { "epoch": 0.881627336302573, "grad_norm": 0.09368042852794421, "learning_rate": 6.859871561872807e-06, "loss": 0.0022, "step": 134010 }, { "epoch": 0.8816931244778063, "grad_norm": 0.040555905353106186, "learning_rate": 6.859338635108396e-06, "loss": 0.0009, "step": 134020 }, { "epoch": 0.8817589126530397, "grad_norm": 0.09825875055871794, "learning_rate": 6.858805683830211e-06, "loss": 0.0014, "step": 134030 }, { "epoch": 0.8818247008282731, "grad_norm": 0.022858200877413654, "learning_rate": 6.858272708045277e-06, "loss": 0.0024, "step": 134040 }, { "epoch": 0.8818904890035065, "grad_norm": 0.036370172203996305, "learning_rate": 6.857739707760623e-06, "loss": 0.0017, "step": 134050 }, { "epoch": 0.8819562771787399, "grad_norm": 0.03372641783904535, "learning_rate": 6.857206682983275e-06, "loss": 0.0006, "step": 134060 }, { "epoch": 0.8820220653539733, "grad_norm": 0.10099983245486498, "learning_rate": 6.856673633720262e-06, "loss": 0.0014, "step": 134070 }, { "epoch": 0.8820878535292067, "grad_norm": 0.06657275652522156, "learning_rate": 6.85614055997861e-06, "loss": 0.0015, "step": 134080 }, { "epoch": 0.88215364170444, "grad_norm": 0.006202728974926936, "learning_rate": 6.8556074617653475e-06, "loss": 0.0012, "step": 134090 }, { "epoch": 0.8822194298796734, "grad_norm": 0.05532380797151573, "learning_rate": 6.8550743390875044e-06, "loss": 0.0022, "step": 134100 }, { "epoch": 0.8822852180549068, "grad_norm": 0.11744269725328958, "learning_rate": 6.854541191952107e-06, "loss": 0.002, "step": 134110 }, { "epoch": 0.8823510062301402, "grad_norm": 0.017680300232986897, "learning_rate": 6.854008020366185e-06, "loss": 0.0008, "step": 134120 }, { "epoch": 0.8824167944053736, "grad_norm": 0.03253284158663127, "learning_rate": 6.85347482433677e-06, "loss": 0.001, "step": 134130 }, { "epoch": 0.882482582580607, "grad_norm": 0.01620649247569095, "learning_rate": 6.852941603870891e-06, "loss": 0.0012, "step": 134140 }, { "epoch": 0.8825483707558404, "grad_norm": 0.040906422117085416, "learning_rate": 6.852408358975575e-06, "loss": 0.0012, "step": 134150 }, { "epoch": 0.8826141589310738, "grad_norm": 0.13139962323581544, "learning_rate": 6.851875089657857e-06, "loss": 0.0009, "step": 134160 }, { "epoch": 0.8826799471063072, "grad_norm": 0.049496727944611005, "learning_rate": 6.851341795924764e-06, "loss": 0.0019, "step": 134170 }, { "epoch": 0.8827457352815405, "grad_norm": 0.25179909264870054, "learning_rate": 6.8508084777833274e-06, "loss": 0.0016, "step": 134180 }, { "epoch": 0.8828115234567738, "grad_norm": 0.07024236117213652, "learning_rate": 6.850275135240581e-06, "loss": 0.0007, "step": 134190 }, { "epoch": 0.8828773116320072, "grad_norm": 0.05511198207441218, "learning_rate": 6.8497417683035535e-06, "loss": 0.0007, "step": 134200 }, { "epoch": 0.8829430998072406, "grad_norm": 0.012068579251049746, "learning_rate": 6.84920837697928e-06, "loss": 0.0009, "step": 134210 }, { "epoch": 0.883008887982474, "grad_norm": 0.02217727654372188, "learning_rate": 6.8486749612747905e-06, "loss": 0.0014, "step": 134220 }, { "epoch": 0.8830746761577074, "grad_norm": 0.050163748545845294, "learning_rate": 6.848141521197118e-06, "loss": 0.0031, "step": 134230 }, { "epoch": 0.8831404643329408, "grad_norm": 0.03783325545443043, "learning_rate": 6.847608056753294e-06, "loss": 0.0011, "step": 134240 }, { "epoch": 0.8832062525081742, "grad_norm": 0.08297174911614137, "learning_rate": 6.8470745679503555e-06, "loss": 0.0022, "step": 134250 }, { "epoch": 0.8832720406834076, "grad_norm": 0.17419495773471724, "learning_rate": 6.846541054795335e-06, "loss": 0.001, "step": 134260 }, { "epoch": 0.883337828858641, "grad_norm": 0.07428757498943706, "learning_rate": 6.846007517295263e-06, "loss": 0.001, "step": 134270 }, { "epoch": 0.8834036170338744, "grad_norm": 0.02538537207871804, "learning_rate": 6.845473955457178e-06, "loss": 0.0006, "step": 134280 }, { "epoch": 0.8834694052091077, "grad_norm": 0.056589039098243005, "learning_rate": 6.844940369288114e-06, "loss": 0.0007, "step": 134290 }, { "epoch": 0.883535193384341, "grad_norm": 0.11489212384045872, "learning_rate": 6.8444067587951024e-06, "loss": 0.0016, "step": 134300 }, { "epoch": 0.8836009815595744, "grad_norm": 0.025481364269300037, "learning_rate": 6.84387312398518e-06, "loss": 0.0016, "step": 134310 }, { "epoch": 0.8836667697348078, "grad_norm": 0.040534254928145566, "learning_rate": 6.843339464865384e-06, "loss": 0.0009, "step": 134320 }, { "epoch": 0.8837325579100412, "grad_norm": 0.07105856108382415, "learning_rate": 6.84280578144275e-06, "loss": 0.001, "step": 134330 }, { "epoch": 0.8837983460852746, "grad_norm": 0.005835507983276565, "learning_rate": 6.842272073724312e-06, "loss": 0.0007, "step": 134340 }, { "epoch": 0.883864134260508, "grad_norm": 0.01672135661834707, "learning_rate": 6.8417383417171075e-06, "loss": 0.0011, "step": 134350 }, { "epoch": 0.8839299224357414, "grad_norm": 0.02791831154482254, "learning_rate": 6.841204585428176e-06, "loss": 0.0018, "step": 134360 }, { "epoch": 0.8839957106109748, "grad_norm": 0.1068451348002691, "learning_rate": 6.840670804864549e-06, "loss": 0.0019, "step": 134370 }, { "epoch": 0.8840614987862082, "grad_norm": 0.027347668755436715, "learning_rate": 6.840137000033269e-06, "loss": 0.0006, "step": 134380 }, { "epoch": 0.8841272869614416, "grad_norm": 0.06392392765209029, "learning_rate": 6.839603170941371e-06, "loss": 0.0017, "step": 134390 }, { "epoch": 0.8841930751366749, "grad_norm": 0.012119861227085329, "learning_rate": 6.839069317595895e-06, "loss": 0.0009, "step": 134400 }, { "epoch": 0.8842588633119083, "grad_norm": 0.0029472563459684792, "learning_rate": 6.83853544000388e-06, "loss": 0.0008, "step": 134410 }, { "epoch": 0.8843246514871417, "grad_norm": 0.04058762700765208, "learning_rate": 6.838001538172361e-06, "loss": 0.0012, "step": 134420 }, { "epoch": 0.8843904396623751, "grad_norm": 0.0176692813842904, "learning_rate": 6.837467612108378e-06, "loss": 0.0007, "step": 134430 }, { "epoch": 0.8844562278376085, "grad_norm": 0.04429734435595704, "learning_rate": 6.836933661818972e-06, "loss": 0.0026, "step": 134440 }, { "epoch": 0.8845220160128419, "grad_norm": 0.09734081752175555, "learning_rate": 6.8363996873111835e-06, "loss": 0.0023, "step": 134450 }, { "epoch": 0.8845878041880753, "grad_norm": 0.033162757684948266, "learning_rate": 6.8358656885920516e-06, "loss": 0.0005, "step": 134460 }, { "epoch": 0.8846535923633086, "grad_norm": 0.04486125330842938, "learning_rate": 6.835331665668615e-06, "loss": 0.002, "step": 134470 }, { "epoch": 0.884719380538542, "grad_norm": 0.03207222028138521, "learning_rate": 6.834797618547916e-06, "loss": 0.0013, "step": 134480 }, { "epoch": 0.8847851687137754, "grad_norm": 0.04269353164303592, "learning_rate": 6.834263547236993e-06, "loss": 0.0012, "step": 134490 }, { "epoch": 0.8848509568890087, "grad_norm": 0.061444624184721144, "learning_rate": 6.833729451742892e-06, "loss": 0.0012, "step": 134500 }, { "epoch": 0.8849167450642421, "grad_norm": 0.08085678139546401, "learning_rate": 6.8331953320726505e-06, "loss": 0.0013, "step": 134510 }, { "epoch": 0.8849825332394755, "grad_norm": 0.050361187439538754, "learning_rate": 6.832661188233312e-06, "loss": 0.0006, "step": 134520 }, { "epoch": 0.8850483214147089, "grad_norm": 0.10204712841107301, "learning_rate": 6.832127020231919e-06, "loss": 0.001, "step": 134530 }, { "epoch": 0.8851141095899423, "grad_norm": 0.07806214731276519, "learning_rate": 6.831592828075513e-06, "loss": 0.0008, "step": 134540 }, { "epoch": 0.8851798977651757, "grad_norm": 0.010042837915529962, "learning_rate": 6.831058611771137e-06, "loss": 0.0006, "step": 134550 }, { "epoch": 0.8852456859404091, "grad_norm": 0.028279650595482635, "learning_rate": 6.8305243713258354e-06, "loss": 0.0022, "step": 134560 }, { "epoch": 0.8853114741156425, "grad_norm": 0.0110128735465747, "learning_rate": 6.82999010674665e-06, "loss": 0.0006, "step": 134570 }, { "epoch": 0.8853772622908759, "grad_norm": 0.07591687025008331, "learning_rate": 6.829455818040627e-06, "loss": 0.0076, "step": 134580 }, { "epoch": 0.8854430504661093, "grad_norm": 0.13325928692335967, "learning_rate": 6.828921505214809e-06, "loss": 0.0013, "step": 134590 }, { "epoch": 0.8855088386413426, "grad_norm": 0.04053083510758725, "learning_rate": 6.828387168276239e-06, "loss": 0.0008, "step": 134600 }, { "epoch": 0.885574626816576, "grad_norm": 0.0031102301076345405, "learning_rate": 6.827852807231965e-06, "loss": 0.0006, "step": 134610 }, { "epoch": 0.8856404149918093, "grad_norm": 0.019680524279953867, "learning_rate": 6.827318422089027e-06, "loss": 0.0011, "step": 134620 }, { "epoch": 0.8857062031670427, "grad_norm": 0.013460644749303651, "learning_rate": 6.826784012854477e-06, "loss": 0.0005, "step": 134630 }, { "epoch": 0.8857719913422761, "grad_norm": 0.043292637157252095, "learning_rate": 6.826249579535358e-06, "loss": 0.0021, "step": 134640 }, { "epoch": 0.8858377795175095, "grad_norm": 0.10237649280579635, "learning_rate": 6.8257151221387145e-06, "loss": 0.0018, "step": 134650 }, { "epoch": 0.8859035676927429, "grad_norm": 0.021542314857105605, "learning_rate": 6.8251806406715925e-06, "loss": 0.0011, "step": 134660 }, { "epoch": 0.8859693558679763, "grad_norm": 0.0158990267406065, "learning_rate": 6.824646135141042e-06, "loss": 0.0023, "step": 134670 }, { "epoch": 0.8860351440432097, "grad_norm": 0.03196865725857321, "learning_rate": 6.824111605554107e-06, "loss": 0.0023, "step": 134680 }, { "epoch": 0.8861009322184431, "grad_norm": 0.02409507994429544, "learning_rate": 6.823577051917836e-06, "loss": 0.0009, "step": 134690 }, { "epoch": 0.8861667203936764, "grad_norm": 0.023722051997599036, "learning_rate": 6.823042474239276e-06, "loss": 0.0012, "step": 134700 }, { "epoch": 0.8862325085689098, "grad_norm": 0.004238258721760209, "learning_rate": 6.8225078725254765e-06, "loss": 0.0017, "step": 134710 }, { "epoch": 0.8862982967441432, "grad_norm": 0.031356495123423915, "learning_rate": 6.821973246783484e-06, "loss": 0.0012, "step": 134720 }, { "epoch": 0.8863640849193766, "grad_norm": 0.008367882840996198, "learning_rate": 6.821438597020349e-06, "loss": 0.001, "step": 134730 }, { "epoch": 0.88642987309461, "grad_norm": 0.048933636434355436, "learning_rate": 6.820903923243118e-06, "loss": 0.0018, "step": 134740 }, { "epoch": 0.8864956612698434, "grad_norm": 0.03569156288604286, "learning_rate": 6.820369225458842e-06, "loss": 0.0011, "step": 134750 }, { "epoch": 0.8865614494450768, "grad_norm": 0.012087961333577634, "learning_rate": 6.819834503674569e-06, "loss": 0.0005, "step": 134760 }, { "epoch": 0.8866272376203101, "grad_norm": 0.11093356946373041, "learning_rate": 6.819299757897351e-06, "loss": 0.0019, "step": 134770 }, { "epoch": 0.8866930257955435, "grad_norm": 0.0757375081051316, "learning_rate": 6.818764988134236e-06, "loss": 0.0011, "step": 134780 }, { "epoch": 0.8867588139707769, "grad_norm": 0.0732428073229898, "learning_rate": 6.8182301943922765e-06, "loss": 0.0011, "step": 134790 }, { "epoch": 0.8868246021460102, "grad_norm": 0.05002240737616326, "learning_rate": 6.817695376678523e-06, "loss": 0.0007, "step": 134800 }, { "epoch": 0.8868903903212436, "grad_norm": 0.015105506101557194, "learning_rate": 6.817160535000022e-06, "loss": 0.0011, "step": 134810 }, { "epoch": 0.886956178496477, "grad_norm": 0.04046060035940385, "learning_rate": 6.816625669363832e-06, "loss": 0.0017, "step": 134820 }, { "epoch": 0.8870219666717104, "grad_norm": 0.012698156481440057, "learning_rate": 6.816090779777001e-06, "loss": 0.0009, "step": 134830 }, { "epoch": 0.8870877548469438, "grad_norm": 0.0035189582287617023, "learning_rate": 6.815555866246582e-06, "loss": 0.0009, "step": 134840 }, { "epoch": 0.8871535430221772, "grad_norm": 0.04964120081865478, "learning_rate": 6.815020928779626e-06, "loss": 0.0012, "step": 134850 }, { "epoch": 0.8872193311974106, "grad_norm": 0.10951002979848838, "learning_rate": 6.814485967383187e-06, "loss": 0.0029, "step": 134860 }, { "epoch": 0.887285119372644, "grad_norm": 0.04170527991267045, "learning_rate": 6.813950982064317e-06, "loss": 0.001, "step": 134870 }, { "epoch": 0.8873509075478774, "grad_norm": 0.030702878885587783, "learning_rate": 6.81341597283007e-06, "loss": 0.0015, "step": 134880 }, { "epoch": 0.8874166957231108, "grad_norm": 0.04196129793637601, "learning_rate": 6.812880939687499e-06, "loss": 0.0011, "step": 134890 }, { "epoch": 0.8874824838983442, "grad_norm": 0.014418472336126633, "learning_rate": 6.8123458826436605e-06, "loss": 0.0018, "step": 134900 }, { "epoch": 0.8875482720735774, "grad_norm": 0.06137265214288274, "learning_rate": 6.811810801705605e-06, "loss": 0.001, "step": 134910 }, { "epoch": 0.8876140602488108, "grad_norm": 0.039447271485581664, "learning_rate": 6.81127569688039e-06, "loss": 0.0009, "step": 134920 }, { "epoch": 0.8876798484240442, "grad_norm": 0.09662963661720685, "learning_rate": 6.8107405681750695e-06, "loss": 0.0019, "step": 134930 }, { "epoch": 0.8877456365992776, "grad_norm": 0.01195682936059223, "learning_rate": 6.810205415596696e-06, "loss": 0.0016, "step": 134940 }, { "epoch": 0.887811424774511, "grad_norm": 0.085026475871298, "learning_rate": 6.80967023915233e-06, "loss": 0.0015, "step": 134950 }, { "epoch": 0.8878772129497444, "grad_norm": 0.13623395603144287, "learning_rate": 6.809135038849024e-06, "loss": 0.0012, "step": 134960 }, { "epoch": 0.8879430011249778, "grad_norm": 0.12389593974359113, "learning_rate": 6.808599814693835e-06, "loss": 0.0006, "step": 134970 }, { "epoch": 0.8880087893002112, "grad_norm": 0.05075099641809487, "learning_rate": 6.80806456669382e-06, "loss": 0.0014, "step": 134980 }, { "epoch": 0.8880745774754446, "grad_norm": 0.029932447380214208, "learning_rate": 6.807529294856035e-06, "loss": 0.001, "step": 134990 }, { "epoch": 0.888140365650678, "grad_norm": 0.025973636884193657, "learning_rate": 6.806993999187537e-06, "loss": 0.0005, "step": 135000 }, { "epoch": 0.8882061538259113, "grad_norm": 0.2212518663664218, "learning_rate": 6.806458679695382e-06, "loss": 0.0029, "step": 135010 }, { "epoch": 0.8882719420011447, "grad_norm": 0.0017237138047811551, "learning_rate": 6.805923336386633e-06, "loss": 0.001, "step": 135020 }, { "epoch": 0.8883377301763781, "grad_norm": 0.13537122132010546, "learning_rate": 6.8053879692683426e-06, "loss": 0.0013, "step": 135030 }, { "epoch": 0.8884035183516115, "grad_norm": 0.03506661912985135, "learning_rate": 6.804852578347572e-06, "loss": 0.0012, "step": 135040 }, { "epoch": 0.8884693065268449, "grad_norm": 0.027595552135260652, "learning_rate": 6.804317163631377e-06, "loss": 0.0008, "step": 135050 }, { "epoch": 0.8885350947020783, "grad_norm": 0.002833591385813513, "learning_rate": 6.80378172512682e-06, "loss": 0.0008, "step": 135060 }, { "epoch": 0.8886008828773116, "grad_norm": 0.046235266812120486, "learning_rate": 6.803246262840958e-06, "loss": 0.0008, "step": 135070 }, { "epoch": 0.888666671052545, "grad_norm": 0.04442708547621203, "learning_rate": 6.802710776780852e-06, "loss": 0.0034, "step": 135080 }, { "epoch": 0.8887324592277784, "grad_norm": 0.030948956736507573, "learning_rate": 6.8021752669535614e-06, "loss": 0.0005, "step": 135090 }, { "epoch": 0.8887982474030118, "grad_norm": 0.004816488258205778, "learning_rate": 6.8016397333661455e-06, "loss": 0.0008, "step": 135100 }, { "epoch": 0.8888640355782451, "grad_norm": 0.011720745431171457, "learning_rate": 6.801104176025667e-06, "loss": 0.0015, "step": 135110 }, { "epoch": 0.8889298237534785, "grad_norm": 0.08739750395698791, "learning_rate": 6.800568594939185e-06, "loss": 0.0008, "step": 135120 }, { "epoch": 0.8889956119287119, "grad_norm": 0.0030901794898276053, "learning_rate": 6.8000329901137605e-06, "loss": 0.0008, "step": 135130 }, { "epoch": 0.8890614001039453, "grad_norm": 0.23577264071423476, "learning_rate": 6.7994973615564565e-06, "loss": 0.0012, "step": 135140 }, { "epoch": 0.8891271882791787, "grad_norm": 0.04471402039471367, "learning_rate": 6.798961709274333e-06, "loss": 0.0012, "step": 135150 }, { "epoch": 0.8891929764544121, "grad_norm": 0.10798521928474072, "learning_rate": 6.798426033274455e-06, "loss": 0.0014, "step": 135160 }, { "epoch": 0.8892587646296455, "grad_norm": 0.007467330400152407, "learning_rate": 6.797890333563881e-06, "loss": 0.0006, "step": 135170 }, { "epoch": 0.8893245528048789, "grad_norm": 0.12411649826887554, "learning_rate": 6.7973546101496765e-06, "loss": 0.0012, "step": 135180 }, { "epoch": 0.8893903409801123, "grad_norm": 0.03072379459096956, "learning_rate": 6.796818863038903e-06, "loss": 0.0011, "step": 135190 }, { "epoch": 0.8894561291553457, "grad_norm": 0.07661750059312544, "learning_rate": 6.796283092238625e-06, "loss": 0.0008, "step": 135200 }, { "epoch": 0.889521917330579, "grad_norm": 0.0005312113841170837, "learning_rate": 6.795747297755906e-06, "loss": 0.0006, "step": 135210 }, { "epoch": 0.8895877055058123, "grad_norm": 0.017455071006998227, "learning_rate": 6.79521147959781e-06, "loss": 0.001, "step": 135220 }, { "epoch": 0.8896534936810457, "grad_norm": 0.07532464248618212, "learning_rate": 6.794675637771402e-06, "loss": 0.0017, "step": 135230 }, { "epoch": 0.8897192818562791, "grad_norm": 0.16013265979847716, "learning_rate": 6.794139772283744e-06, "loss": 0.0013, "step": 135240 }, { "epoch": 0.8897850700315125, "grad_norm": 0.11847634332855268, "learning_rate": 6.793603883141904e-06, "loss": 0.0017, "step": 135250 }, { "epoch": 0.8898508582067459, "grad_norm": 0.08180739649114423, "learning_rate": 6.793067970352944e-06, "loss": 0.0017, "step": 135260 }, { "epoch": 0.8899166463819793, "grad_norm": 0.0014359893269245876, "learning_rate": 6.792532033923932e-06, "loss": 0.0016, "step": 135270 }, { "epoch": 0.8899824345572127, "grad_norm": 0.05651722662588343, "learning_rate": 6.791996073861934e-06, "loss": 0.0009, "step": 135280 }, { "epoch": 0.8900482227324461, "grad_norm": 0.001734817424613051, "learning_rate": 6.791460090174016e-06, "loss": 0.0007, "step": 135290 }, { "epoch": 0.8901140109076795, "grad_norm": 0.10091728402519287, "learning_rate": 6.790924082867243e-06, "loss": 0.0019, "step": 135300 }, { "epoch": 0.8901797990829129, "grad_norm": 0.2827968571668841, "learning_rate": 6.790388051948682e-06, "loss": 0.0013, "step": 135310 }, { "epoch": 0.8902455872581462, "grad_norm": 0.0019125436433154396, "learning_rate": 6.789851997425403e-06, "loss": 0.0005, "step": 135320 }, { "epoch": 0.8903113754333796, "grad_norm": 0.013637896481346999, "learning_rate": 6.789315919304469e-06, "loss": 0.0012, "step": 135330 }, { "epoch": 0.890377163608613, "grad_norm": 0.016174309199606936, "learning_rate": 6.788779817592949e-06, "loss": 0.0008, "step": 135340 }, { "epoch": 0.8904429517838464, "grad_norm": 0.004585783705226809, "learning_rate": 6.788243692297915e-06, "loss": 0.0012, "step": 135350 }, { "epoch": 0.8905087399590798, "grad_norm": 0.0840448982255563, "learning_rate": 6.787707543426431e-06, "loss": 0.0026, "step": 135360 }, { "epoch": 0.8905745281343131, "grad_norm": 0.14635038785924967, "learning_rate": 6.787171370985568e-06, "loss": 0.0017, "step": 135370 }, { "epoch": 0.8906403163095465, "grad_norm": 0.0658937932482124, "learning_rate": 6.786635174982393e-06, "loss": 0.001, "step": 135380 }, { "epoch": 0.8907061044847799, "grad_norm": 0.03301003819507073, "learning_rate": 6.786098955423976e-06, "loss": 0.0013, "step": 135390 }, { "epoch": 0.8907718926600133, "grad_norm": 0.02092603276464589, "learning_rate": 6.7855627123173885e-06, "loss": 0.0009, "step": 135400 }, { "epoch": 0.8908376808352467, "grad_norm": 0.060922297236681806, "learning_rate": 6.785026445669698e-06, "loss": 0.0011, "step": 135410 }, { "epoch": 0.89090346901048, "grad_norm": 0.04392397182110914, "learning_rate": 6.784490155487975e-06, "loss": 0.0011, "step": 135420 }, { "epoch": 0.8909692571857134, "grad_norm": 0.09030757607294496, "learning_rate": 6.783953841779289e-06, "loss": 0.0008, "step": 135430 }, { "epoch": 0.8910350453609468, "grad_norm": 0.026308348779501164, "learning_rate": 6.7834175045507155e-06, "loss": 0.0008, "step": 135440 }, { "epoch": 0.8911008335361802, "grad_norm": 0.02064644421263257, "learning_rate": 6.782881143809319e-06, "loss": 0.0009, "step": 135450 }, { "epoch": 0.8911666217114136, "grad_norm": 0.033205039452345333, "learning_rate": 6.782344759562176e-06, "loss": 0.0018, "step": 135460 }, { "epoch": 0.891232409886647, "grad_norm": 0.04997924022047459, "learning_rate": 6.781808351816357e-06, "loss": 0.0014, "step": 135470 }, { "epoch": 0.8912981980618804, "grad_norm": 0.02866282104473229, "learning_rate": 6.781271920578935e-06, "loss": 0.0015, "step": 135480 }, { "epoch": 0.8913639862371138, "grad_norm": 0.09206371413236525, "learning_rate": 6.780735465856979e-06, "loss": 0.0016, "step": 135490 }, { "epoch": 0.8914297744123472, "grad_norm": 0.026284244989986577, "learning_rate": 6.780198987657565e-06, "loss": 0.0013, "step": 135500 }, { "epoch": 0.8914955625875806, "grad_norm": 0.048797345419093904, "learning_rate": 6.779662485987764e-06, "loss": 0.001, "step": 135510 }, { "epoch": 0.8915613507628138, "grad_norm": 0.031238058208934095, "learning_rate": 6.77912596085465e-06, "loss": 0.0006, "step": 135520 }, { "epoch": 0.8916271389380472, "grad_norm": 0.09475150933816408, "learning_rate": 6.778589412265297e-06, "loss": 0.0014, "step": 135530 }, { "epoch": 0.8916929271132806, "grad_norm": 0.01166807779693006, "learning_rate": 6.77805284022678e-06, "loss": 0.0014, "step": 135540 }, { "epoch": 0.891758715288514, "grad_norm": 0.011023880913600294, "learning_rate": 6.777516244746171e-06, "loss": 0.0011, "step": 135550 }, { "epoch": 0.8918245034637474, "grad_norm": 0.007899068979157731, "learning_rate": 6.776979625830545e-06, "loss": 0.0016, "step": 135560 }, { "epoch": 0.8918902916389808, "grad_norm": 0.05379392770998734, "learning_rate": 6.776442983486977e-06, "loss": 0.0006, "step": 135570 }, { "epoch": 0.8919560798142142, "grad_norm": 0.020329724662417364, "learning_rate": 6.7759063177225425e-06, "loss": 0.0018, "step": 135580 }, { "epoch": 0.8920218679894476, "grad_norm": 0.022725604340095518, "learning_rate": 6.7753696285443185e-06, "loss": 0.0011, "step": 135590 }, { "epoch": 0.892087656164681, "grad_norm": 0.014017685684231368, "learning_rate": 6.774832915959378e-06, "loss": 0.0012, "step": 135600 }, { "epoch": 0.8921534443399144, "grad_norm": 0.02644899605416667, "learning_rate": 6.7742961799748e-06, "loss": 0.0005, "step": 135610 }, { "epoch": 0.8922192325151477, "grad_norm": 0.04324924690450634, "learning_rate": 6.773759420597657e-06, "loss": 0.0016, "step": 135620 }, { "epoch": 0.8922850206903811, "grad_norm": 0.10448162980673338, "learning_rate": 6.773222637835029e-06, "loss": 0.0021, "step": 135630 }, { "epoch": 0.8923508088656145, "grad_norm": 0.08012691810151364, "learning_rate": 6.772685831693991e-06, "loss": 0.0009, "step": 135640 }, { "epoch": 0.8924165970408479, "grad_norm": 0.02538542272908831, "learning_rate": 6.772149002181623e-06, "loss": 0.0006, "step": 135650 }, { "epoch": 0.8924823852160813, "grad_norm": 0.01920525223244314, "learning_rate": 6.771612149305e-06, "loss": 0.001, "step": 135660 }, { "epoch": 0.8925481733913146, "grad_norm": 0.042326092399213776, "learning_rate": 6.771075273071201e-06, "loss": 0.0012, "step": 135670 }, { "epoch": 0.892613961566548, "grad_norm": 0.05883548221221418, "learning_rate": 6.770538373487306e-06, "loss": 0.0013, "step": 135680 }, { "epoch": 0.8926797497417814, "grad_norm": 0.04059639992224358, "learning_rate": 6.77000145056039e-06, "loss": 0.001, "step": 135690 }, { "epoch": 0.8927455379170148, "grad_norm": 0.07112864608187933, "learning_rate": 6.769464504297532e-06, "loss": 0.0009, "step": 135700 }, { "epoch": 0.8928113260922482, "grad_norm": 0.038414587838724715, "learning_rate": 6.768927534705814e-06, "loss": 0.0048, "step": 135710 }, { "epoch": 0.8928771142674815, "grad_norm": 0.006351843450456139, "learning_rate": 6.768390541792314e-06, "loss": 0.0019, "step": 135720 }, { "epoch": 0.8929429024427149, "grad_norm": 0.021615385018637197, "learning_rate": 6.767853525564113e-06, "loss": 0.0014, "step": 135730 }, { "epoch": 0.8930086906179483, "grad_norm": 0.0649251424947644, "learning_rate": 6.7673164860282895e-06, "loss": 0.001, "step": 135740 }, { "epoch": 0.8930744787931817, "grad_norm": 0.001162874194212221, "learning_rate": 6.766779423191925e-06, "loss": 0.001, "step": 135750 }, { "epoch": 0.8931402669684151, "grad_norm": 0.04083509712300262, "learning_rate": 6.766242337062099e-06, "loss": 0.0012, "step": 135760 }, { "epoch": 0.8932060551436485, "grad_norm": 0.017913361235098978, "learning_rate": 6.765705227645892e-06, "loss": 0.0008, "step": 135770 }, { "epoch": 0.8932718433188819, "grad_norm": 0.00019850346390644559, "learning_rate": 6.7651680949503885e-06, "loss": 0.001, "step": 135780 }, { "epoch": 0.8933376314941153, "grad_norm": 0.001996575985252853, "learning_rate": 6.764630938982667e-06, "loss": 0.0041, "step": 135790 }, { "epoch": 0.8934034196693487, "grad_norm": 0.039094147214507775, "learning_rate": 6.7640937597498114e-06, "loss": 0.001, "step": 135800 }, { "epoch": 0.8934692078445821, "grad_norm": 0.021334120705827596, "learning_rate": 6.763556557258904e-06, "loss": 0.0018, "step": 135810 }, { "epoch": 0.8935349960198155, "grad_norm": 0.02977435199661204, "learning_rate": 6.763019331517025e-06, "loss": 0.0013, "step": 135820 }, { "epoch": 0.8936007841950487, "grad_norm": 0.03469838305040618, "learning_rate": 6.762482082531259e-06, "loss": 0.0016, "step": 135830 }, { "epoch": 0.8936665723702821, "grad_norm": 0.02118634295260956, "learning_rate": 6.761944810308689e-06, "loss": 0.001, "step": 135840 }, { "epoch": 0.8937323605455155, "grad_norm": 0.021722292861190654, "learning_rate": 6.761407514856397e-06, "loss": 0.0007, "step": 135850 }, { "epoch": 0.8937981487207489, "grad_norm": 0.02257084070655641, "learning_rate": 6.760870196181471e-06, "loss": 0.0008, "step": 135860 }, { "epoch": 0.8938639368959823, "grad_norm": 0.055175416727225604, "learning_rate": 6.760332854290991e-06, "loss": 0.0014, "step": 135870 }, { "epoch": 0.8939297250712157, "grad_norm": 0.05582968184331611, "learning_rate": 6.759795489192043e-06, "loss": 0.0011, "step": 135880 }, { "epoch": 0.8939955132464491, "grad_norm": 0.01523451125049534, "learning_rate": 6.75925810089171e-06, "loss": 0.0009, "step": 135890 }, { "epoch": 0.8940613014216825, "grad_norm": 0.031094170765474518, "learning_rate": 6.758720689397079e-06, "loss": 0.0004, "step": 135900 }, { "epoch": 0.8941270895969159, "grad_norm": 0.08100015403469239, "learning_rate": 6.758183254715233e-06, "loss": 0.0008, "step": 135910 }, { "epoch": 0.8941928777721493, "grad_norm": 0.039615369212676436, "learning_rate": 6.757645796853263e-06, "loss": 0.0004, "step": 135920 }, { "epoch": 0.8942586659473826, "grad_norm": 0.007060985684139045, "learning_rate": 6.757108315818248e-06, "loss": 0.002, "step": 135930 }, { "epoch": 0.894324454122616, "grad_norm": 0.05039059832604465, "learning_rate": 6.75657081161728e-06, "loss": 0.001, "step": 135940 }, { "epoch": 0.8943902422978494, "grad_norm": 0.027159353976367314, "learning_rate": 6.7560332842574415e-06, "loss": 0.0009, "step": 135950 }, { "epoch": 0.8944560304730828, "grad_norm": 0.030838617594387673, "learning_rate": 6.755495733745821e-06, "loss": 0.0008, "step": 135960 }, { "epoch": 0.8945218186483161, "grad_norm": 0.02154780456890233, "learning_rate": 6.754958160089504e-06, "loss": 0.001, "step": 135970 }, { "epoch": 0.8945876068235495, "grad_norm": 0.06994656168518734, "learning_rate": 6.754420563295581e-06, "loss": 0.0013, "step": 135980 }, { "epoch": 0.8946533949987829, "grad_norm": 0.16094520343479402, "learning_rate": 6.753882943371137e-06, "loss": 0.0022, "step": 135990 }, { "epoch": 0.8947191831740163, "grad_norm": 0.059447439697684394, "learning_rate": 6.7533453003232614e-06, "loss": 0.0013, "step": 136000 }, { "epoch": 0.8947849713492497, "grad_norm": 0.04067456511983641, "learning_rate": 6.752807634159044e-06, "loss": 0.0019, "step": 136010 }, { "epoch": 0.8948507595244831, "grad_norm": 0.07593271601519863, "learning_rate": 6.75226994488557e-06, "loss": 0.0011, "step": 136020 }, { "epoch": 0.8949165476997164, "grad_norm": 0.007984029682454258, "learning_rate": 6.751732232509931e-06, "loss": 0.001, "step": 136030 }, { "epoch": 0.8949823358749498, "grad_norm": 0.00019244687375393962, "learning_rate": 6.751194497039214e-06, "loss": 0.0014, "step": 136040 }, { "epoch": 0.8950481240501832, "grad_norm": 0.03192742770607777, "learning_rate": 6.750656738480511e-06, "loss": 0.0011, "step": 136050 }, { "epoch": 0.8951139122254166, "grad_norm": 0.04561570719278395, "learning_rate": 6.750118956840912e-06, "loss": 0.0007, "step": 136060 }, { "epoch": 0.89517970040065, "grad_norm": 0.016601096773654934, "learning_rate": 6.749581152127505e-06, "loss": 0.0008, "step": 136070 }, { "epoch": 0.8952454885758834, "grad_norm": 0.042670812135033036, "learning_rate": 6.749043324347381e-06, "loss": 0.0009, "step": 136080 }, { "epoch": 0.8953112767511168, "grad_norm": 0.022443674835907902, "learning_rate": 6.748505473507632e-06, "loss": 0.0021, "step": 136090 }, { "epoch": 0.8953770649263502, "grad_norm": 0.07964615830842892, "learning_rate": 6.747967599615347e-06, "loss": 0.0006, "step": 136100 }, { "epoch": 0.8954428531015836, "grad_norm": 0.028233263776216527, "learning_rate": 6.74742970267762e-06, "loss": 0.0005, "step": 136110 }, { "epoch": 0.895508641276817, "grad_norm": 0.048033404794717176, "learning_rate": 6.746891782701542e-06, "loss": 0.0015, "step": 136120 }, { "epoch": 0.8955744294520502, "grad_norm": 0.003322076674767633, "learning_rate": 6.746353839694204e-06, "loss": 0.0009, "step": 136130 }, { "epoch": 0.8956402176272836, "grad_norm": 0.2442151264633814, "learning_rate": 6.7458158736627e-06, "loss": 0.0013, "step": 136140 }, { "epoch": 0.895706005802517, "grad_norm": 0.09363221390819233, "learning_rate": 6.745277884614121e-06, "loss": 0.0015, "step": 136150 }, { "epoch": 0.8957717939777504, "grad_norm": 0.06638046830380477, "learning_rate": 6.744739872555559e-06, "loss": 0.0006, "step": 136160 }, { "epoch": 0.8958375821529838, "grad_norm": 0.1126571807598098, "learning_rate": 6.7442018374941086e-06, "loss": 0.0007, "step": 136170 }, { "epoch": 0.8959033703282172, "grad_norm": 0.06760237236314029, "learning_rate": 6.743663779436865e-06, "loss": 0.0014, "step": 136180 }, { "epoch": 0.8959691585034506, "grad_norm": 0.00803911783480331, "learning_rate": 6.74312569839092e-06, "loss": 0.0006, "step": 136190 }, { "epoch": 0.896034946678684, "grad_norm": 0.031214761591606254, "learning_rate": 6.742587594363368e-06, "loss": 0.0006, "step": 136200 }, { "epoch": 0.8961007348539174, "grad_norm": 0.025521358236908914, "learning_rate": 6.7420494673613036e-06, "loss": 0.0038, "step": 136210 }, { "epoch": 0.8961665230291508, "grad_norm": 0.0677423662665541, "learning_rate": 6.741511317391822e-06, "loss": 0.0011, "step": 136220 }, { "epoch": 0.8962323112043841, "grad_norm": 0.023036277167879843, "learning_rate": 6.740973144462015e-06, "loss": 0.003, "step": 136230 }, { "epoch": 0.8962980993796175, "grad_norm": 0.12185278829145765, "learning_rate": 6.740434948578984e-06, "loss": 0.0031, "step": 136240 }, { "epoch": 0.8963638875548509, "grad_norm": 0.02097795848808492, "learning_rate": 6.73989672974982e-06, "loss": 0.0013, "step": 136250 }, { "epoch": 0.8964296757300843, "grad_norm": 0.010637193697565881, "learning_rate": 6.739358487981621e-06, "loss": 0.0007, "step": 136260 }, { "epoch": 0.8964954639053176, "grad_norm": 0.020186016201345528, "learning_rate": 6.738820223281482e-06, "loss": 0.0015, "step": 136270 }, { "epoch": 0.896561252080551, "grad_norm": 0.010580530968856545, "learning_rate": 6.7382819356565e-06, "loss": 0.0009, "step": 136280 }, { "epoch": 0.8966270402557844, "grad_norm": 0.06727325220720945, "learning_rate": 6.737743625113772e-06, "loss": 0.0021, "step": 136290 }, { "epoch": 0.8966928284310178, "grad_norm": 0.0005429949279867182, "learning_rate": 6.737205291660395e-06, "loss": 0.0006, "step": 136300 }, { "epoch": 0.8967586166062512, "grad_norm": 0.012697215420729746, "learning_rate": 6.736666935303467e-06, "loss": 0.0009, "step": 136310 }, { "epoch": 0.8968244047814846, "grad_norm": 0.1106780821566153, "learning_rate": 6.736128556050085e-06, "loss": 0.0015, "step": 136320 }, { "epoch": 0.896890192956718, "grad_norm": 0.003663334038505446, "learning_rate": 6.735590153907347e-06, "loss": 0.0013, "step": 136330 }, { "epoch": 0.8969559811319513, "grad_norm": 0.019941265797294952, "learning_rate": 6.735051728882353e-06, "loss": 0.0009, "step": 136340 }, { "epoch": 0.8970217693071847, "grad_norm": 0.035267726882882766, "learning_rate": 6.734513280982199e-06, "loss": 0.0012, "step": 136350 }, { "epoch": 0.8970875574824181, "grad_norm": 0.02964558059358115, "learning_rate": 6.733974810213987e-06, "loss": 0.0011, "step": 136360 }, { "epoch": 0.8971533456576515, "grad_norm": 0.043562755299803635, "learning_rate": 6.733436316584813e-06, "loss": 0.0011, "step": 136370 }, { "epoch": 0.8972191338328849, "grad_norm": 0.10258111893939632, "learning_rate": 6.7328978001017795e-06, "loss": 0.0019, "step": 136380 }, { "epoch": 0.8972849220081183, "grad_norm": 0.05985966460385008, "learning_rate": 6.732359260771984e-06, "loss": 0.0009, "step": 136390 }, { "epoch": 0.8973507101833517, "grad_norm": 0.024491575768773553, "learning_rate": 6.731820698602529e-06, "loss": 0.0007, "step": 136400 }, { "epoch": 0.8974164983585851, "grad_norm": 0.020530320998632213, "learning_rate": 6.7312821136005125e-06, "loss": 0.0014, "step": 136410 }, { "epoch": 0.8974822865338185, "grad_norm": 0.0428407449471349, "learning_rate": 6.730743505773038e-06, "loss": 0.0007, "step": 136420 }, { "epoch": 0.8975480747090518, "grad_norm": 0.02207374817365525, "learning_rate": 6.730204875127204e-06, "loss": 0.001, "step": 136430 }, { "epoch": 0.8976138628842851, "grad_norm": 0.04698828042812121, "learning_rate": 6.729666221670114e-06, "loss": 0.0007, "step": 136440 }, { "epoch": 0.8976796510595185, "grad_norm": 0.0157793449306594, "learning_rate": 6.729127545408869e-06, "loss": 0.0028, "step": 136450 }, { "epoch": 0.8977454392347519, "grad_norm": 0.039871320685879126, "learning_rate": 6.72858884635057e-06, "loss": 0.002, "step": 136460 }, { "epoch": 0.8978112274099853, "grad_norm": 0.016726325357669845, "learning_rate": 6.72805012450232e-06, "loss": 0.0005, "step": 136470 }, { "epoch": 0.8978770155852187, "grad_norm": 0.007902277232539105, "learning_rate": 6.72751137987122e-06, "loss": 0.0008, "step": 136480 }, { "epoch": 0.8979428037604521, "grad_norm": 0.0046434064373486765, "learning_rate": 6.726972612464376e-06, "loss": 0.0011, "step": 136490 }, { "epoch": 0.8980085919356855, "grad_norm": 0.0663406024393924, "learning_rate": 6.726433822288889e-06, "loss": 0.0006, "step": 136500 }, { "epoch": 0.8980743801109189, "grad_norm": 0.04239501242172305, "learning_rate": 6.7258950093518645e-06, "loss": 0.0008, "step": 136510 }, { "epoch": 0.8981401682861523, "grad_norm": 0.05269451207336671, "learning_rate": 6.725356173660405e-06, "loss": 0.0015, "step": 136520 }, { "epoch": 0.8982059564613857, "grad_norm": 0.025539690411319527, "learning_rate": 6.724817315221612e-06, "loss": 0.0018, "step": 136530 }, { "epoch": 0.898271744636619, "grad_norm": 0.04627651057667584, "learning_rate": 6.7242784340425935e-06, "loss": 0.0013, "step": 136540 }, { "epoch": 0.8983375328118524, "grad_norm": 0.03530334246765502, "learning_rate": 6.723739530130453e-06, "loss": 0.0006, "step": 136550 }, { "epoch": 0.8984033209870858, "grad_norm": 0.06314853863243744, "learning_rate": 6.723200603492297e-06, "loss": 0.0021, "step": 136560 }, { "epoch": 0.8984691091623191, "grad_norm": 0.03370235944458524, "learning_rate": 6.722661654135227e-06, "loss": 0.0011, "step": 136570 }, { "epoch": 0.8985348973375525, "grad_norm": 0.08100448957074594, "learning_rate": 6.722122682066352e-06, "loss": 0.0014, "step": 136580 }, { "epoch": 0.8986006855127859, "grad_norm": 0.005360899760740695, "learning_rate": 6.721583687292778e-06, "loss": 0.0018, "step": 136590 }, { "epoch": 0.8986664736880193, "grad_norm": 0.040898684128631985, "learning_rate": 6.721044669821608e-06, "loss": 0.0006, "step": 136600 }, { "epoch": 0.8987322618632527, "grad_norm": 0.04417249595355523, "learning_rate": 6.72050562965995e-06, "loss": 0.0014, "step": 136610 }, { "epoch": 0.8987980500384861, "grad_norm": 0.05468492584111083, "learning_rate": 6.719966566814913e-06, "loss": 0.0005, "step": 136620 }, { "epoch": 0.8988638382137195, "grad_norm": 0.07083429381388258, "learning_rate": 6.719427481293602e-06, "loss": 0.0023, "step": 136630 }, { "epoch": 0.8989296263889528, "grad_norm": 0.01617701238751714, "learning_rate": 6.718888373103124e-06, "loss": 0.0013, "step": 136640 }, { "epoch": 0.8989954145641862, "grad_norm": 0.029079638031006664, "learning_rate": 6.718349242250588e-06, "loss": 0.0016, "step": 136650 }, { "epoch": 0.8990612027394196, "grad_norm": 0.1734500989212414, "learning_rate": 6.717810088743101e-06, "loss": 0.0011, "step": 136660 }, { "epoch": 0.899126990914653, "grad_norm": 0.0630065606672792, "learning_rate": 6.71727091258777e-06, "loss": 0.0021, "step": 136670 }, { "epoch": 0.8991927790898864, "grad_norm": 0.13569930604849317, "learning_rate": 6.716731713791708e-06, "loss": 0.0007, "step": 136680 }, { "epoch": 0.8992585672651198, "grad_norm": 0.017650447976211025, "learning_rate": 6.716192492362019e-06, "loss": 0.0021, "step": 136690 }, { "epoch": 0.8993243554403532, "grad_norm": 0.021120220890725402, "learning_rate": 6.715653248305816e-06, "loss": 0.0009, "step": 136700 }, { "epoch": 0.8993901436155866, "grad_norm": 0.020307034908739214, "learning_rate": 6.7151139816302045e-06, "loss": 0.001, "step": 136710 }, { "epoch": 0.89945593179082, "grad_norm": 0.07111915022693625, "learning_rate": 6.714574692342297e-06, "loss": 0.0009, "step": 136720 }, { "epoch": 0.8995217199660533, "grad_norm": 0.08045395003769253, "learning_rate": 6.714035380449204e-06, "loss": 0.0013, "step": 136730 }, { "epoch": 0.8995875081412867, "grad_norm": 0.028991507132630493, "learning_rate": 6.713496045958033e-06, "loss": 0.0012, "step": 136740 }, { "epoch": 0.89965329631652, "grad_norm": 0.01655607493131916, "learning_rate": 6.712956688875898e-06, "loss": 0.0009, "step": 136750 }, { "epoch": 0.8997190844917534, "grad_norm": 0.015501624174984987, "learning_rate": 6.712417309209909e-06, "loss": 0.0006, "step": 136760 }, { "epoch": 0.8997848726669868, "grad_norm": 0.015964727096528412, "learning_rate": 6.7118779069671754e-06, "loss": 0.0018, "step": 136770 }, { "epoch": 0.8998506608422202, "grad_norm": 0.004572730789746079, "learning_rate": 6.7113384821548114e-06, "loss": 0.0009, "step": 136780 }, { "epoch": 0.8999164490174536, "grad_norm": 0.02706832931063188, "learning_rate": 6.710799034779927e-06, "loss": 0.0008, "step": 136790 }, { "epoch": 0.899982237192687, "grad_norm": 0.0068903418906100635, "learning_rate": 6.710259564849633e-06, "loss": 0.0013, "step": 136800 }, { "epoch": 0.9000480253679204, "grad_norm": 0.0318300203606873, "learning_rate": 6.709720072371046e-06, "loss": 0.0009, "step": 136810 }, { "epoch": 0.9001138135431538, "grad_norm": 0.06620131557546038, "learning_rate": 6.709180557351276e-06, "loss": 0.0015, "step": 136820 }, { "epoch": 0.9001796017183872, "grad_norm": 0.02366838171998905, "learning_rate": 6.708641019797436e-06, "loss": 0.0008, "step": 136830 }, { "epoch": 0.9002453898936206, "grad_norm": 0.08077529736701104, "learning_rate": 6.70810145971664e-06, "loss": 0.0011, "step": 136840 }, { "epoch": 0.9003111780688539, "grad_norm": 0.0025947736903803456, "learning_rate": 6.7075618771160025e-06, "loss": 0.0011, "step": 136850 }, { "epoch": 0.9003769662440873, "grad_norm": 0.18964297994514343, "learning_rate": 6.707022272002635e-06, "loss": 0.0018, "step": 136860 }, { "epoch": 0.9004427544193206, "grad_norm": 0.013926318171661007, "learning_rate": 6.706482644383652e-06, "loss": 0.0012, "step": 136870 }, { "epoch": 0.900508542594554, "grad_norm": 0.027305866698370464, "learning_rate": 6.705942994266171e-06, "loss": 0.0009, "step": 136880 }, { "epoch": 0.9005743307697874, "grad_norm": 0.02202235052186469, "learning_rate": 6.705403321657304e-06, "loss": 0.0007, "step": 136890 }, { "epoch": 0.9006401189450208, "grad_norm": 0.011922198467305092, "learning_rate": 6.704863626564169e-06, "loss": 0.0013, "step": 136900 }, { "epoch": 0.9007059071202542, "grad_norm": 0.03648652269358521, "learning_rate": 6.704323908993878e-06, "loss": 0.0007, "step": 136910 }, { "epoch": 0.9007716952954876, "grad_norm": 0.03975809416872136, "learning_rate": 6.7037841689535484e-06, "loss": 0.0013, "step": 136920 }, { "epoch": 0.900837483470721, "grad_norm": 0.020358581822417038, "learning_rate": 6.703244406450295e-06, "loss": 0.0006, "step": 136930 }, { "epoch": 0.9009032716459544, "grad_norm": 0.05321237460845685, "learning_rate": 6.7027046214912375e-06, "loss": 0.0008, "step": 136940 }, { "epoch": 0.9009690598211877, "grad_norm": 0.06951852692397935, "learning_rate": 6.702164814083488e-06, "loss": 0.0012, "step": 136950 }, { "epoch": 0.9010348479964211, "grad_norm": 0.011374880789246046, "learning_rate": 6.701624984234168e-06, "loss": 0.0012, "step": 136960 }, { "epoch": 0.9011006361716545, "grad_norm": 0.06865676330856363, "learning_rate": 6.70108513195039e-06, "loss": 0.0011, "step": 136970 }, { "epoch": 0.9011664243468879, "grad_norm": 0.03448314700810404, "learning_rate": 6.700545257239274e-06, "loss": 0.001, "step": 136980 }, { "epoch": 0.9012322125221213, "grad_norm": 0.06270704582313989, "learning_rate": 6.700005360107938e-06, "loss": 0.0009, "step": 136990 }, { "epoch": 0.9012980006973547, "grad_norm": 0.04384305434370325, "learning_rate": 6.699465440563499e-06, "loss": 0.001, "step": 137000 }, { "epoch": 0.9013637888725881, "grad_norm": 0.04138700508552371, "learning_rate": 6.698925498613076e-06, "loss": 0.0013, "step": 137010 }, { "epoch": 0.9014295770478215, "grad_norm": 0.02586417067141702, "learning_rate": 6.698385534263787e-06, "loss": 0.0014, "step": 137020 }, { "epoch": 0.9014953652230548, "grad_norm": 0.05336757585273068, "learning_rate": 6.697845547522753e-06, "loss": 0.0017, "step": 137030 }, { "epoch": 0.9015611533982882, "grad_norm": 0.033505140686257845, "learning_rate": 6.6973055383970905e-06, "loss": 0.0011, "step": 137040 }, { "epoch": 0.9016269415735215, "grad_norm": 0.03746669231974243, "learning_rate": 6.696765506893922e-06, "loss": 0.0007, "step": 137050 }, { "epoch": 0.9016927297487549, "grad_norm": 0.020036518108418163, "learning_rate": 6.696225453020364e-06, "loss": 0.0007, "step": 137060 }, { "epoch": 0.9017585179239883, "grad_norm": 0.011061850984736549, "learning_rate": 6.695685376783538e-06, "loss": 0.0011, "step": 137070 }, { "epoch": 0.9018243060992217, "grad_norm": 0.006438137592630855, "learning_rate": 6.695145278190567e-06, "loss": 0.0007, "step": 137080 }, { "epoch": 0.9018900942744551, "grad_norm": 0.0564755549041346, "learning_rate": 6.694605157248568e-06, "loss": 0.0013, "step": 137090 }, { "epoch": 0.9019558824496885, "grad_norm": 0.0340315061089965, "learning_rate": 6.694065013964665e-06, "loss": 0.0021, "step": 137100 }, { "epoch": 0.9020216706249219, "grad_norm": 0.042105026417693134, "learning_rate": 6.6935248483459756e-06, "loss": 0.0021, "step": 137110 }, { "epoch": 0.9020874588001553, "grad_norm": 0.042592865198962625, "learning_rate": 6.692984660399626e-06, "loss": 0.0011, "step": 137120 }, { "epoch": 0.9021532469753887, "grad_norm": 0.1612582237141932, "learning_rate": 6.692444450132733e-06, "loss": 0.0015, "step": 137130 }, { "epoch": 0.9022190351506221, "grad_norm": 0.08152088790302715, "learning_rate": 6.691904217552424e-06, "loss": 0.0024, "step": 137140 }, { "epoch": 0.9022848233258554, "grad_norm": 0.039619141664052976, "learning_rate": 6.691363962665819e-06, "loss": 0.0016, "step": 137150 }, { "epoch": 0.9023506115010888, "grad_norm": 0.017336032052244592, "learning_rate": 6.6908236854800405e-06, "loss": 0.0014, "step": 137160 }, { "epoch": 0.9024163996763221, "grad_norm": 0.154335711286522, "learning_rate": 6.690283386002212e-06, "loss": 0.0022, "step": 137170 }, { "epoch": 0.9024821878515555, "grad_norm": 0.0247386390402817, "learning_rate": 6.689743064239457e-06, "loss": 0.0014, "step": 137180 }, { "epoch": 0.9025479760267889, "grad_norm": 0.02600414150076703, "learning_rate": 6.689202720198898e-06, "loss": 0.001, "step": 137190 }, { "epoch": 0.9026137642020223, "grad_norm": 0.05539747486732519, "learning_rate": 6.68866235388766e-06, "loss": 0.0018, "step": 137200 }, { "epoch": 0.9026795523772557, "grad_norm": 0.10477409811371738, "learning_rate": 6.688121965312868e-06, "loss": 0.0009, "step": 137210 }, { "epoch": 0.9027453405524891, "grad_norm": 0.011731822617487675, "learning_rate": 6.687581554481647e-06, "loss": 0.0015, "step": 137220 }, { "epoch": 0.9028111287277225, "grad_norm": 0.024797739823269806, "learning_rate": 6.68704112140112e-06, "loss": 0.0008, "step": 137230 }, { "epoch": 0.9028769169029559, "grad_norm": 0.01342519228357478, "learning_rate": 6.686500666078412e-06, "loss": 0.0008, "step": 137240 }, { "epoch": 0.9029427050781893, "grad_norm": 0.02965530002626634, "learning_rate": 6.685960188520651e-06, "loss": 0.0009, "step": 137250 }, { "epoch": 0.9030084932534226, "grad_norm": 0.007224705201478226, "learning_rate": 6.685419688734958e-06, "loss": 0.0017, "step": 137260 }, { "epoch": 0.903074281428656, "grad_norm": 0.00666857856902141, "learning_rate": 6.684879166728464e-06, "loss": 0.0005, "step": 137270 }, { "epoch": 0.9031400696038894, "grad_norm": 0.016124168400762043, "learning_rate": 6.684338622508293e-06, "loss": 0.0006, "step": 137280 }, { "epoch": 0.9032058577791228, "grad_norm": 0.022064140144181836, "learning_rate": 6.683798056081574e-06, "loss": 0.0014, "step": 137290 }, { "epoch": 0.9032716459543562, "grad_norm": 0.11164646405838141, "learning_rate": 6.683257467455429e-06, "loss": 0.0012, "step": 137300 }, { "epoch": 0.9033374341295896, "grad_norm": 0.024351757977074437, "learning_rate": 6.682716856636988e-06, "loss": 0.0007, "step": 137310 }, { "epoch": 0.903403222304823, "grad_norm": 0.02744521562951784, "learning_rate": 6.682176223633381e-06, "loss": 0.0022, "step": 137320 }, { "epoch": 0.9034690104800563, "grad_norm": 0.0035079506915608473, "learning_rate": 6.681635568451732e-06, "loss": 0.0012, "step": 137330 }, { "epoch": 0.9035347986552897, "grad_norm": 0.07516276462851981, "learning_rate": 6.68109489109917e-06, "loss": 0.0008, "step": 137340 }, { "epoch": 0.9036005868305231, "grad_norm": 0.08113213197528366, "learning_rate": 6.680554191582824e-06, "loss": 0.001, "step": 137350 }, { "epoch": 0.9036663750057564, "grad_norm": 0.03955274088551709, "learning_rate": 6.680013469909823e-06, "loss": 0.0009, "step": 137360 }, { "epoch": 0.9037321631809898, "grad_norm": 0.012240062830353934, "learning_rate": 6.679472726087294e-06, "loss": 0.0008, "step": 137370 }, { "epoch": 0.9037979513562232, "grad_norm": 0.03839845263798328, "learning_rate": 6.678931960122368e-06, "loss": 0.0009, "step": 137380 }, { "epoch": 0.9038637395314566, "grad_norm": 0.043385049041344756, "learning_rate": 6.678391172022174e-06, "loss": 0.001, "step": 137390 }, { "epoch": 0.90392952770669, "grad_norm": 0.025213301932532014, "learning_rate": 6.6778503617938425e-06, "loss": 0.0009, "step": 137400 }, { "epoch": 0.9039953158819234, "grad_norm": 0.07863348867566533, "learning_rate": 6.677309529444503e-06, "loss": 0.0013, "step": 137410 }, { "epoch": 0.9040611040571568, "grad_norm": 0.0822098645157326, "learning_rate": 6.676768674981287e-06, "loss": 0.0004, "step": 137420 }, { "epoch": 0.9041268922323902, "grad_norm": 0.027084347795215385, "learning_rate": 6.676227798411323e-06, "loss": 0.0008, "step": 137430 }, { "epoch": 0.9041926804076236, "grad_norm": 0.0288103248378588, "learning_rate": 6.675686899741742e-06, "loss": 0.0018, "step": 137440 }, { "epoch": 0.904258468582857, "grad_norm": 0.04052335927591791, "learning_rate": 6.675145978979678e-06, "loss": 0.0006, "step": 137450 }, { "epoch": 0.9043242567580903, "grad_norm": 0.035008298791429505, "learning_rate": 6.674605036132261e-06, "loss": 0.0006, "step": 137460 }, { "epoch": 0.9043900449333236, "grad_norm": 0.009146718803273609, "learning_rate": 6.674064071206623e-06, "loss": 0.0005, "step": 137470 }, { "epoch": 0.904455833108557, "grad_norm": 0.011643434504318739, "learning_rate": 6.6735230842098965e-06, "loss": 0.0015, "step": 137480 }, { "epoch": 0.9045216212837904, "grad_norm": 0.009007485792745353, "learning_rate": 6.672982075149212e-06, "loss": 0.0015, "step": 137490 }, { "epoch": 0.9045874094590238, "grad_norm": 0.01564467657486067, "learning_rate": 6.672441044031704e-06, "loss": 0.0006, "step": 137500 }, { "epoch": 0.9046531976342572, "grad_norm": 0.03946760951931515, "learning_rate": 6.671899990864505e-06, "loss": 0.0008, "step": 137510 }, { "epoch": 0.9047189858094906, "grad_norm": 0.023007540568506436, "learning_rate": 6.67135891565475e-06, "loss": 0.0008, "step": 137520 }, { "epoch": 0.904784773984724, "grad_norm": 0.04067906346142635, "learning_rate": 6.67081781840957e-06, "loss": 0.0013, "step": 137530 }, { "epoch": 0.9048505621599574, "grad_norm": 0.08007806082999015, "learning_rate": 6.670276699136101e-06, "loss": 0.0008, "step": 137540 }, { "epoch": 0.9049163503351908, "grad_norm": 0.016560756138638173, "learning_rate": 6.6697355578414755e-06, "loss": 0.0008, "step": 137550 }, { "epoch": 0.9049821385104241, "grad_norm": 0.061220243749837956, "learning_rate": 6.66919439453283e-06, "loss": 0.0012, "step": 137560 }, { "epoch": 0.9050479266856575, "grad_norm": 0.08752828819294384, "learning_rate": 6.6686532092172955e-06, "loss": 0.0009, "step": 137570 }, { "epoch": 0.9051137148608909, "grad_norm": 0.05819984995915615, "learning_rate": 6.668112001902011e-06, "loss": 0.0006, "step": 137580 }, { "epoch": 0.9051795030361243, "grad_norm": 0.10050931575265326, "learning_rate": 6.66757077259411e-06, "loss": 0.0033, "step": 137590 }, { "epoch": 0.9052452912113577, "grad_norm": 0.03948714021572237, "learning_rate": 6.6670295213007294e-06, "loss": 0.0013, "step": 137600 }, { "epoch": 0.905311079386591, "grad_norm": 0.07247927820795541, "learning_rate": 6.666488248029005e-06, "loss": 0.0034, "step": 137610 }, { "epoch": 0.9053768675618244, "grad_norm": 0.09564647439122465, "learning_rate": 6.665946952786071e-06, "loss": 0.0015, "step": 137620 }, { "epoch": 0.9054426557370578, "grad_norm": 0.02416170231918815, "learning_rate": 6.6654056355790656e-06, "loss": 0.0006, "step": 137630 }, { "epoch": 0.9055084439122912, "grad_norm": 0.17717644900858096, "learning_rate": 6.6648642964151245e-06, "loss": 0.0022, "step": 137640 }, { "epoch": 0.9055742320875246, "grad_norm": 0.009896218075407168, "learning_rate": 6.664322935301386e-06, "loss": 0.0009, "step": 137650 }, { "epoch": 0.9056400202627579, "grad_norm": 0.007109432575157516, "learning_rate": 6.663781552244988e-06, "loss": 0.001, "step": 137660 }, { "epoch": 0.9057058084379913, "grad_norm": 0.05014722361415921, "learning_rate": 6.663240147253066e-06, "loss": 0.0009, "step": 137670 }, { "epoch": 0.9057715966132247, "grad_norm": 0.051961269328366456, "learning_rate": 6.66269872033276e-06, "loss": 0.0025, "step": 137680 }, { "epoch": 0.9058373847884581, "grad_norm": 0.029032666894350984, "learning_rate": 6.662157271491206e-06, "loss": 0.0006, "step": 137690 }, { "epoch": 0.9059031729636915, "grad_norm": 0.020896741517355117, "learning_rate": 6.661615800735544e-06, "loss": 0.0014, "step": 137700 }, { "epoch": 0.9059689611389249, "grad_norm": 0.09150166690187127, "learning_rate": 6.661074308072914e-06, "loss": 0.0016, "step": 137710 }, { "epoch": 0.9060347493141583, "grad_norm": 0.03761780754351526, "learning_rate": 6.660532793510453e-06, "loss": 0.0012, "step": 137720 }, { "epoch": 0.9061005374893917, "grad_norm": 0.016436000463575767, "learning_rate": 6.6599912570553025e-06, "loss": 0.0019, "step": 137730 }, { "epoch": 0.9061663256646251, "grad_norm": 0.0498067147083601, "learning_rate": 6.6594496987146e-06, "loss": 0.0006, "step": 137740 }, { "epoch": 0.9062321138398585, "grad_norm": 0.024538572145600773, "learning_rate": 6.658908118495486e-06, "loss": 0.0012, "step": 137750 }, { "epoch": 0.9062979020150919, "grad_norm": 0.014110130414190566, "learning_rate": 6.658366516405102e-06, "loss": 0.001, "step": 137760 }, { "epoch": 0.9063636901903251, "grad_norm": 0.018063465940027227, "learning_rate": 6.657824892450586e-06, "loss": 0.0007, "step": 137770 }, { "epoch": 0.9064294783655585, "grad_norm": 0.05236479890347112, "learning_rate": 6.657283246639083e-06, "loss": 0.0009, "step": 137780 }, { "epoch": 0.9064952665407919, "grad_norm": 0.03512931729615675, "learning_rate": 6.65674157897773e-06, "loss": 0.001, "step": 137790 }, { "epoch": 0.9065610547160253, "grad_norm": 0.03519464234571805, "learning_rate": 6.656199889473672e-06, "loss": 0.0017, "step": 137800 }, { "epoch": 0.9066268428912587, "grad_norm": 0.003857498465480812, "learning_rate": 6.655658178134048e-06, "loss": 0.0013, "step": 137810 }, { "epoch": 0.9066926310664921, "grad_norm": 0.06945696088617084, "learning_rate": 6.6551164449659995e-06, "loss": 0.0012, "step": 137820 }, { "epoch": 0.9067584192417255, "grad_norm": 0.12713298288351021, "learning_rate": 6.65457468997667e-06, "loss": 0.0013, "step": 137830 }, { "epoch": 0.9068242074169589, "grad_norm": 0.048590814668467555, "learning_rate": 6.654032913173204e-06, "loss": 0.0008, "step": 137840 }, { "epoch": 0.9068899955921923, "grad_norm": 0.18012147736437525, "learning_rate": 6.653491114562742e-06, "loss": 0.0011, "step": 137850 }, { "epoch": 0.9069557837674257, "grad_norm": 0.016126395223889978, "learning_rate": 6.6529492941524276e-06, "loss": 0.0024, "step": 137860 }, { "epoch": 0.907021571942659, "grad_norm": 0.012110572517864487, "learning_rate": 6.652407451949405e-06, "loss": 0.0015, "step": 137870 }, { "epoch": 0.9070873601178924, "grad_norm": 0.02155817883492866, "learning_rate": 6.651865587960816e-06, "loss": 0.0008, "step": 137880 }, { "epoch": 0.9071531482931258, "grad_norm": 0.024058976439693423, "learning_rate": 6.651323702193806e-06, "loss": 0.0005, "step": 137890 }, { "epoch": 0.9072189364683592, "grad_norm": 0.08365916255764752, "learning_rate": 6.650781794655519e-06, "loss": 0.0011, "step": 137900 }, { "epoch": 0.9072847246435926, "grad_norm": 0.061473704619467086, "learning_rate": 6.6502398653531005e-06, "loss": 0.001, "step": 137910 }, { "epoch": 0.907350512818826, "grad_norm": 0.09378290642106805, "learning_rate": 6.649697914293696e-06, "loss": 0.0009, "step": 137920 }, { "epoch": 0.9074163009940593, "grad_norm": 0.04395178958171197, "learning_rate": 6.649155941484448e-06, "loss": 0.001, "step": 137930 }, { "epoch": 0.9074820891692927, "grad_norm": 0.001449609428806698, "learning_rate": 6.6486139469325025e-06, "loss": 0.0016, "step": 137940 }, { "epoch": 0.9075478773445261, "grad_norm": 0.08592146676813982, "learning_rate": 6.648071930645006e-06, "loss": 0.0013, "step": 137950 }, { "epoch": 0.9076136655197595, "grad_norm": 0.027192012189468494, "learning_rate": 6.647529892629104e-06, "loss": 0.0009, "step": 137960 }, { "epoch": 0.9076794536949928, "grad_norm": 0.050511085028594895, "learning_rate": 6.646987832891944e-06, "loss": 0.0016, "step": 137970 }, { "epoch": 0.9077452418702262, "grad_norm": 0.05920107620068968, "learning_rate": 6.646445751440673e-06, "loss": 0.0012, "step": 137980 }, { "epoch": 0.9078110300454596, "grad_norm": 0.03647845893686397, "learning_rate": 6.645903648282435e-06, "loss": 0.0009, "step": 137990 }, { "epoch": 0.907876818220693, "grad_norm": 0.08463191491983418, "learning_rate": 6.645361523424381e-06, "loss": 0.001, "step": 138000 }, { "epoch": 0.9079426063959264, "grad_norm": 0.017036981150431143, "learning_rate": 6.644819376873655e-06, "loss": 0.0007, "step": 138010 }, { "epoch": 0.9080083945711598, "grad_norm": 0.11411236739803249, "learning_rate": 6.644277208637404e-06, "loss": 0.0011, "step": 138020 }, { "epoch": 0.9080741827463932, "grad_norm": 0.08136089654056113, "learning_rate": 6.643735018722781e-06, "loss": 0.0012, "step": 138030 }, { "epoch": 0.9081399709216266, "grad_norm": 0.03770967961290882, "learning_rate": 6.64319280713693e-06, "loss": 0.0019, "step": 138040 }, { "epoch": 0.90820575909686, "grad_norm": 0.055033238022887086, "learning_rate": 6.642650573887002e-06, "loss": 0.0009, "step": 138050 }, { "epoch": 0.9082715472720934, "grad_norm": 0.2563296276424796, "learning_rate": 6.642108318980145e-06, "loss": 0.0011, "step": 138060 }, { "epoch": 0.9083373354473266, "grad_norm": 0.016030266012470085, "learning_rate": 6.6415660424235075e-06, "loss": 0.0025, "step": 138070 }, { "epoch": 0.90840312362256, "grad_norm": 0.044789333639858014, "learning_rate": 6.6410237442242395e-06, "loss": 0.001, "step": 138080 }, { "epoch": 0.9084689117977934, "grad_norm": 0.04024311415747103, "learning_rate": 6.640481424389489e-06, "loss": 0.0011, "step": 138090 }, { "epoch": 0.9085346999730268, "grad_norm": 0.010352360817319262, "learning_rate": 6.63993908292641e-06, "loss": 0.0022, "step": 138100 }, { "epoch": 0.9086004881482602, "grad_norm": 0.039538811321369785, "learning_rate": 6.63939671984215e-06, "loss": 0.0011, "step": 138110 }, { "epoch": 0.9086662763234936, "grad_norm": 0.09959585635375859, "learning_rate": 6.63885433514386e-06, "loss": 0.0027, "step": 138120 }, { "epoch": 0.908732064498727, "grad_norm": 0.0806950015079178, "learning_rate": 6.638311928838691e-06, "loss": 0.0009, "step": 138130 }, { "epoch": 0.9087978526739604, "grad_norm": 0.05515279713919441, "learning_rate": 6.637769500933794e-06, "loss": 0.0011, "step": 138140 }, { "epoch": 0.9088636408491938, "grad_norm": 0.0335256093260376, "learning_rate": 6.63722705143632e-06, "loss": 0.0008, "step": 138150 }, { "epoch": 0.9089294290244272, "grad_norm": 0.03589450839493884, "learning_rate": 6.636684580353423e-06, "loss": 0.0013, "step": 138160 }, { "epoch": 0.9089952171996606, "grad_norm": 0.12278871917342885, "learning_rate": 6.636142087692252e-06, "loss": 0.0016, "step": 138170 }, { "epoch": 0.9090610053748939, "grad_norm": 0.022003607824794512, "learning_rate": 6.6355995734599606e-06, "loss": 0.001, "step": 138180 }, { "epoch": 0.9091267935501273, "grad_norm": 0.06618633297961335, "learning_rate": 6.635057037663702e-06, "loss": 0.0019, "step": 138190 }, { "epoch": 0.9091925817253607, "grad_norm": 0.03029077518561294, "learning_rate": 6.634514480310629e-06, "loss": 0.0013, "step": 138200 }, { "epoch": 0.909258369900594, "grad_norm": 0.058561270283919054, "learning_rate": 6.633971901407894e-06, "loss": 0.0019, "step": 138210 }, { "epoch": 0.9093241580758274, "grad_norm": 0.044145057920353566, "learning_rate": 6.633429300962649e-06, "loss": 0.001, "step": 138220 }, { "epoch": 0.9093899462510608, "grad_norm": 0.02625990957599945, "learning_rate": 6.6328866789820515e-06, "loss": 0.0011, "step": 138230 }, { "epoch": 0.9094557344262942, "grad_norm": 0.022991473082989208, "learning_rate": 6.632344035473252e-06, "loss": 0.0006, "step": 138240 }, { "epoch": 0.9095215226015276, "grad_norm": 0.06500351860144994, "learning_rate": 6.6318013704434085e-06, "loss": 0.001, "step": 138250 }, { "epoch": 0.909587310776761, "grad_norm": 0.07320105077179792, "learning_rate": 6.631258683899671e-06, "loss": 0.0016, "step": 138260 }, { "epoch": 0.9096530989519944, "grad_norm": 0.047118333972373784, "learning_rate": 6.630715975849195e-06, "loss": 0.0005, "step": 138270 }, { "epoch": 0.9097188871272277, "grad_norm": 0.017535761316813555, "learning_rate": 6.630173246299141e-06, "loss": 0.0007, "step": 138280 }, { "epoch": 0.9097846753024611, "grad_norm": 0.020476762853131675, "learning_rate": 6.62963049525666e-06, "loss": 0.0004, "step": 138290 }, { "epoch": 0.9098504634776945, "grad_norm": 0.0023023813844309173, "learning_rate": 6.6290877227289065e-06, "loss": 0.0011, "step": 138300 }, { "epoch": 0.9099162516529279, "grad_norm": 0.15270955957339713, "learning_rate": 6.62854492872304e-06, "loss": 0.0012, "step": 138310 }, { "epoch": 0.9099820398281613, "grad_norm": 0.0037551863297263627, "learning_rate": 6.628002113246213e-06, "loss": 0.001, "step": 138320 }, { "epoch": 0.9100478280033947, "grad_norm": 0.014451558610295444, "learning_rate": 6.6274592763055855e-06, "loss": 0.0007, "step": 138330 }, { "epoch": 0.9101136161786281, "grad_norm": 0.056753120656972675, "learning_rate": 6.626916417908313e-06, "loss": 0.0012, "step": 138340 }, { "epoch": 0.9101794043538615, "grad_norm": 0.041008622243663945, "learning_rate": 6.626373538061552e-06, "loss": 0.0018, "step": 138350 }, { "epoch": 0.9102451925290949, "grad_norm": 0.10575120159171778, "learning_rate": 6.625830636772461e-06, "loss": 0.0016, "step": 138360 }, { "epoch": 0.9103109807043283, "grad_norm": 0.026010377235102213, "learning_rate": 6.625287714048199e-06, "loss": 0.0009, "step": 138370 }, { "epoch": 0.9103767688795615, "grad_norm": 0.0012027108735512336, "learning_rate": 6.62474476989592e-06, "loss": 0.001, "step": 138380 }, { "epoch": 0.9104425570547949, "grad_norm": 0.03666458708839768, "learning_rate": 6.624201804322785e-06, "loss": 0.0012, "step": 138390 }, { "epoch": 0.9105083452300283, "grad_norm": 0.03211686753007093, "learning_rate": 6.623658817335951e-06, "loss": 0.0012, "step": 138400 }, { "epoch": 0.9105741334052617, "grad_norm": 0.049869295591239135, "learning_rate": 6.623115808942578e-06, "loss": 0.0022, "step": 138410 }, { "epoch": 0.9106399215804951, "grad_norm": 0.06312603899707145, "learning_rate": 6.622572779149826e-06, "loss": 0.0011, "step": 138420 }, { "epoch": 0.9107057097557285, "grad_norm": 0.13265384817294307, "learning_rate": 6.622029727964854e-06, "loss": 0.0028, "step": 138430 }, { "epoch": 0.9107714979309619, "grad_norm": 0.06940158941863434, "learning_rate": 6.6214866553948194e-06, "loss": 0.0014, "step": 138440 }, { "epoch": 0.9108372861061953, "grad_norm": 0.04691179182926039, "learning_rate": 6.620943561446884e-06, "loss": 0.0008, "step": 138450 }, { "epoch": 0.9109030742814287, "grad_norm": 0.031313312099559024, "learning_rate": 6.620400446128208e-06, "loss": 0.001, "step": 138460 }, { "epoch": 0.9109688624566621, "grad_norm": 0.05110392722034852, "learning_rate": 6.619857309445951e-06, "loss": 0.0014, "step": 138470 }, { "epoch": 0.9110346506318954, "grad_norm": 0.11191101389960188, "learning_rate": 6.619314151407274e-06, "loss": 0.0018, "step": 138480 }, { "epoch": 0.9111004388071288, "grad_norm": 0.0353849610003886, "learning_rate": 6.61877097201934e-06, "loss": 0.0012, "step": 138490 }, { "epoch": 0.9111662269823622, "grad_norm": 0.0855936486386542, "learning_rate": 6.618227771289307e-06, "loss": 0.002, "step": 138500 }, { "epoch": 0.9112320151575956, "grad_norm": 0.03645807638387115, "learning_rate": 6.617684549224339e-06, "loss": 0.0016, "step": 138510 }, { "epoch": 0.911297803332829, "grad_norm": 0.025635136358126604, "learning_rate": 6.617141305831598e-06, "loss": 0.0011, "step": 138520 }, { "epoch": 0.9113635915080623, "grad_norm": 0.03290492184872691, "learning_rate": 6.616598041118245e-06, "loss": 0.001, "step": 138530 }, { "epoch": 0.9114293796832957, "grad_norm": 0.02989650790123386, "learning_rate": 6.6160547550914435e-06, "loss": 0.0017, "step": 138540 }, { "epoch": 0.9114951678585291, "grad_norm": 0.039952181810859896, "learning_rate": 6.615511447758355e-06, "loss": 0.0011, "step": 138550 }, { "epoch": 0.9115609560337625, "grad_norm": 0.06320106537139841, "learning_rate": 6.614968119126144e-06, "loss": 0.0012, "step": 138560 }, { "epoch": 0.9116267442089959, "grad_norm": 0.0006524014726126372, "learning_rate": 6.6144247692019726e-06, "loss": 0.0006, "step": 138570 }, { "epoch": 0.9116925323842292, "grad_norm": 0.012353699726803637, "learning_rate": 6.613881397993006e-06, "loss": 0.0024, "step": 138580 }, { "epoch": 0.9117583205594626, "grad_norm": 0.04859407544321121, "learning_rate": 6.613338005506407e-06, "loss": 0.0014, "step": 138590 }, { "epoch": 0.911824108734696, "grad_norm": 0.03753730035214026, "learning_rate": 6.612794591749338e-06, "loss": 0.0009, "step": 138600 }, { "epoch": 0.9118898969099294, "grad_norm": 0.12484461020370753, "learning_rate": 6.612251156728966e-06, "loss": 0.0013, "step": 138610 }, { "epoch": 0.9119556850851628, "grad_norm": 0.07000554269964789, "learning_rate": 6.611707700452455e-06, "loss": 0.001, "step": 138620 }, { "epoch": 0.9120214732603962, "grad_norm": 0.05157052258421817, "learning_rate": 6.61116422292697e-06, "loss": 0.0012, "step": 138630 }, { "epoch": 0.9120872614356296, "grad_norm": 0.17942672761610995, "learning_rate": 6.610620724159677e-06, "loss": 0.0008, "step": 138640 }, { "epoch": 0.912153049610863, "grad_norm": 0.028159106367510532, "learning_rate": 6.61007720415774e-06, "loss": 0.001, "step": 138650 }, { "epoch": 0.9122188377860964, "grad_norm": 0.019960703088782544, "learning_rate": 6.609533662928325e-06, "loss": 0.0011, "step": 138660 }, { "epoch": 0.9122846259613298, "grad_norm": 0.02376284100570654, "learning_rate": 6.6089901004786e-06, "loss": 0.0005, "step": 138670 }, { "epoch": 0.9123504141365631, "grad_norm": 0.029418232087090275, "learning_rate": 6.608446516815728e-06, "loss": 0.0013, "step": 138680 }, { "epoch": 0.9124162023117964, "grad_norm": 0.062089882646157, "learning_rate": 6.60790291194688e-06, "loss": 0.0016, "step": 138690 }, { "epoch": 0.9124819904870298, "grad_norm": 0.037766940779795484, "learning_rate": 6.60735928587922e-06, "loss": 0.001, "step": 138700 }, { "epoch": 0.9125477786622632, "grad_norm": 0.018679871432245896, "learning_rate": 6.6068156386199165e-06, "loss": 0.0006, "step": 138710 }, { "epoch": 0.9126135668374966, "grad_norm": 0.07325411334386053, "learning_rate": 6.606271970176137e-06, "loss": 0.0007, "step": 138720 }, { "epoch": 0.91267935501273, "grad_norm": 0.03174744425383589, "learning_rate": 6.605728280555047e-06, "loss": 0.0013, "step": 138730 }, { "epoch": 0.9127451431879634, "grad_norm": 0.004105010092897969, "learning_rate": 6.605184569763817e-06, "loss": 0.0022, "step": 138740 }, { "epoch": 0.9128109313631968, "grad_norm": 0.026658392056305623, "learning_rate": 6.604640837809616e-06, "loss": 0.001, "step": 138750 }, { "epoch": 0.9128767195384302, "grad_norm": 0.022113042817697468, "learning_rate": 6.604097084699611e-06, "loss": 0.001, "step": 138760 }, { "epoch": 0.9129425077136636, "grad_norm": 0.017404824941177285, "learning_rate": 6.603553310440971e-06, "loss": 0.0006, "step": 138770 }, { "epoch": 0.913008295888897, "grad_norm": 0.0920740253551142, "learning_rate": 6.603009515040866e-06, "loss": 0.0009, "step": 138780 }, { "epoch": 0.9130740840641303, "grad_norm": 0.023012242308026815, "learning_rate": 6.602465698506463e-06, "loss": 0.0014, "step": 138790 }, { "epoch": 0.9131398722393637, "grad_norm": 0.048557184151472886, "learning_rate": 6.601921860844935e-06, "loss": 0.001, "step": 138800 }, { "epoch": 0.913205660414597, "grad_norm": 0.022453344433014256, "learning_rate": 6.6013780020634526e-06, "loss": 0.0011, "step": 138810 }, { "epoch": 0.9132714485898304, "grad_norm": 0.029414740230234857, "learning_rate": 6.6008341221691816e-06, "loss": 0.0009, "step": 138820 }, { "epoch": 0.9133372367650638, "grad_norm": 0.097894656003267, "learning_rate": 6.600290221169297e-06, "loss": 0.0009, "step": 138830 }, { "epoch": 0.9134030249402972, "grad_norm": 0.00795410169412942, "learning_rate": 6.599746299070967e-06, "loss": 0.001, "step": 138840 }, { "epoch": 0.9134688131155306, "grad_norm": 0.06450771692054705, "learning_rate": 6.5992023558813645e-06, "loss": 0.0005, "step": 138850 }, { "epoch": 0.913534601290764, "grad_norm": 0.008703888019949587, "learning_rate": 6.5986583916076584e-06, "loss": 0.0021, "step": 138860 }, { "epoch": 0.9136003894659974, "grad_norm": 0.12575407048135406, "learning_rate": 6.598114406257023e-06, "loss": 0.0007, "step": 138870 }, { "epoch": 0.9136661776412308, "grad_norm": 0.007234946067338529, "learning_rate": 6.5975703998366306e-06, "loss": 0.0018, "step": 138880 }, { "epoch": 0.9137319658164641, "grad_norm": 0.059597760978688775, "learning_rate": 6.597026372353652e-06, "loss": 0.0013, "step": 138890 }, { "epoch": 0.9137977539916975, "grad_norm": 0.024346202073251715, "learning_rate": 6.596482323815259e-06, "loss": 0.0014, "step": 138900 }, { "epoch": 0.9138635421669309, "grad_norm": 0.0344457580906228, "learning_rate": 6.595938254228625e-06, "loss": 0.0011, "step": 138910 }, { "epoch": 0.9139293303421643, "grad_norm": 0.18895147208162447, "learning_rate": 6.595394163600923e-06, "loss": 0.0018, "step": 138920 }, { "epoch": 0.9139951185173977, "grad_norm": 0.019713040221905336, "learning_rate": 6.5948500519393285e-06, "loss": 0.0012, "step": 138930 }, { "epoch": 0.9140609066926311, "grad_norm": 0.009768474490382028, "learning_rate": 6.594305919251014e-06, "loss": 0.001, "step": 138940 }, { "epoch": 0.9141266948678645, "grad_norm": 0.09169688013854196, "learning_rate": 6.593761765543152e-06, "loss": 0.0008, "step": 138950 }, { "epoch": 0.9141924830430979, "grad_norm": 0.004448623102237934, "learning_rate": 6.5932175908229165e-06, "loss": 0.0013, "step": 138960 }, { "epoch": 0.9142582712183313, "grad_norm": 0.1709043116236182, "learning_rate": 6.592673395097484e-06, "loss": 0.0035, "step": 138970 }, { "epoch": 0.9143240593935646, "grad_norm": 0.020837579069906274, "learning_rate": 6.592129178374029e-06, "loss": 0.0008, "step": 138980 }, { "epoch": 0.9143898475687979, "grad_norm": 0.015226379412932797, "learning_rate": 6.591584940659723e-06, "loss": 0.0016, "step": 138990 }, { "epoch": 0.9144556357440313, "grad_norm": 0.19032948486824472, "learning_rate": 6.591040681961746e-06, "loss": 0.0027, "step": 139000 }, { "epoch": 0.9145214239192647, "grad_norm": 0.045255929994936876, "learning_rate": 6.590496402287273e-06, "loss": 0.001, "step": 139010 }, { "epoch": 0.9145872120944981, "grad_norm": 0.0012781338327051085, "learning_rate": 6.589952101643478e-06, "loss": 0.0019, "step": 139020 }, { "epoch": 0.9146530002697315, "grad_norm": 0.03240955748433346, "learning_rate": 6.589407780037538e-06, "loss": 0.0022, "step": 139030 }, { "epoch": 0.9147187884449649, "grad_norm": 0.020577855003951297, "learning_rate": 6.588863437476628e-06, "loss": 0.0009, "step": 139040 }, { "epoch": 0.9147845766201983, "grad_norm": 0.020181438604117, "learning_rate": 6.588319073967925e-06, "loss": 0.001, "step": 139050 }, { "epoch": 0.9148503647954317, "grad_norm": 0.046078420191540105, "learning_rate": 6.587774689518608e-06, "loss": 0.001, "step": 139060 }, { "epoch": 0.9149161529706651, "grad_norm": 0.10130828412121876, "learning_rate": 6.587230284135853e-06, "loss": 0.0011, "step": 139070 }, { "epoch": 0.9149819411458985, "grad_norm": 0.35860374316813065, "learning_rate": 6.586685857826838e-06, "loss": 0.0005, "step": 139080 }, { "epoch": 0.9150477293211319, "grad_norm": 0.021305245617038553, "learning_rate": 6.58614141059874e-06, "loss": 0.0009, "step": 139090 }, { "epoch": 0.9151135174963652, "grad_norm": 0.015811780239360477, "learning_rate": 6.585596942458736e-06, "loss": 0.0012, "step": 139100 }, { "epoch": 0.9151793056715986, "grad_norm": 0.012737383169752224, "learning_rate": 6.585052453414008e-06, "loss": 0.0008, "step": 139110 }, { "epoch": 0.915245093846832, "grad_norm": 0.06935708338930996, "learning_rate": 6.5845079434717295e-06, "loss": 0.0015, "step": 139120 }, { "epoch": 0.9153108820220653, "grad_norm": 0.007415189350643279, "learning_rate": 6.583963412639085e-06, "loss": 0.0012, "step": 139130 }, { "epoch": 0.9153766701972987, "grad_norm": 0.052964932982774285, "learning_rate": 6.5834188609232496e-06, "loss": 0.0009, "step": 139140 }, { "epoch": 0.9154424583725321, "grad_norm": 0.029858541878247918, "learning_rate": 6.582874288331404e-06, "loss": 0.0008, "step": 139150 }, { "epoch": 0.9155082465477655, "grad_norm": 0.013066326300633837, "learning_rate": 6.5823296948707295e-06, "loss": 0.0005, "step": 139160 }, { "epoch": 0.9155740347229989, "grad_norm": 0.1676253592535289, "learning_rate": 6.581785080548403e-06, "loss": 0.0017, "step": 139170 }, { "epoch": 0.9156398228982323, "grad_norm": 0.03761075897382461, "learning_rate": 6.581240445371606e-06, "loss": 0.0014, "step": 139180 }, { "epoch": 0.9157056110734657, "grad_norm": 0.018587278136643272, "learning_rate": 6.580695789347521e-06, "loss": 0.0009, "step": 139190 }, { "epoch": 0.915771399248699, "grad_norm": 0.03298980850049987, "learning_rate": 6.580151112483326e-06, "loss": 0.0045, "step": 139200 }, { "epoch": 0.9158371874239324, "grad_norm": 0.058327175899781085, "learning_rate": 6.5796064147862035e-06, "loss": 0.0017, "step": 139210 }, { "epoch": 0.9159029755991658, "grad_norm": 0.10580116926357655, "learning_rate": 6.579061696263336e-06, "loss": 0.0011, "step": 139220 }, { "epoch": 0.9159687637743992, "grad_norm": 0.3718839890763086, "learning_rate": 6.578516956921901e-06, "loss": 0.004, "step": 139230 }, { "epoch": 0.9160345519496326, "grad_norm": 0.0894850431096581, "learning_rate": 6.577972196769085e-06, "loss": 0.0013, "step": 139240 }, { "epoch": 0.916100340124866, "grad_norm": 0.010745200403238916, "learning_rate": 6.577427415812067e-06, "loss": 0.0012, "step": 139250 }, { "epoch": 0.9161661283000994, "grad_norm": 0.0311559501001535, "learning_rate": 6.576882614058033e-06, "loss": 0.0013, "step": 139260 }, { "epoch": 0.9162319164753328, "grad_norm": 0.050898258795249854, "learning_rate": 6.576337791514162e-06, "loss": 0.0019, "step": 139270 }, { "epoch": 0.9162977046505661, "grad_norm": 0.04662805699631417, "learning_rate": 6.575792948187641e-06, "loss": 0.0021, "step": 139280 }, { "epoch": 0.9163634928257995, "grad_norm": 0.017078572245741178, "learning_rate": 6.575248084085648e-06, "loss": 0.0019, "step": 139290 }, { "epoch": 0.9164292810010328, "grad_norm": 0.037927827352571546, "learning_rate": 6.574703199215371e-06, "loss": 0.0016, "step": 139300 }, { "epoch": 0.9164950691762662, "grad_norm": 0.02398228110244589, "learning_rate": 6.574158293583992e-06, "loss": 0.001, "step": 139310 }, { "epoch": 0.9165608573514996, "grad_norm": 0.021777651714656343, "learning_rate": 6.573613367198694e-06, "loss": 0.0021, "step": 139320 }, { "epoch": 0.916626645526733, "grad_norm": 0.042101368835715844, "learning_rate": 6.573068420066666e-06, "loss": 0.0008, "step": 139330 }, { "epoch": 0.9166924337019664, "grad_norm": 0.004951691640736318, "learning_rate": 6.572523452195088e-06, "loss": 0.0005, "step": 139340 }, { "epoch": 0.9167582218771998, "grad_norm": 0.05863538855598134, "learning_rate": 6.571978463591145e-06, "loss": 0.0008, "step": 139350 }, { "epoch": 0.9168240100524332, "grad_norm": 0.1296903990783678, "learning_rate": 6.571433454262024e-06, "loss": 0.0012, "step": 139360 }, { "epoch": 0.9168897982276666, "grad_norm": 0.021845963738693307, "learning_rate": 6.570888424214912e-06, "loss": 0.0008, "step": 139370 }, { "epoch": 0.9169555864029, "grad_norm": 0.03626018527004084, "learning_rate": 6.570343373456991e-06, "loss": 0.001, "step": 139380 }, { "epoch": 0.9170213745781334, "grad_norm": 0.002687162694752884, "learning_rate": 6.569798301995449e-06, "loss": 0.0008, "step": 139390 }, { "epoch": 0.9170871627533667, "grad_norm": 0.05189623956262463, "learning_rate": 6.569253209837474e-06, "loss": 0.0008, "step": 139400 }, { "epoch": 0.9171529509286, "grad_norm": 0.09615468796685017, "learning_rate": 6.5687080969902485e-06, "loss": 0.0006, "step": 139410 }, { "epoch": 0.9172187391038334, "grad_norm": 0.05290786845816975, "learning_rate": 6.5681629634609635e-06, "loss": 0.0017, "step": 139420 }, { "epoch": 0.9172845272790668, "grad_norm": 0.11456629719928278, "learning_rate": 6.567617809256802e-06, "loss": 0.0011, "step": 139430 }, { "epoch": 0.9173503154543002, "grad_norm": 0.0210805647785232, "learning_rate": 6.567072634384954e-06, "loss": 0.0033, "step": 139440 }, { "epoch": 0.9174161036295336, "grad_norm": 0.07626219561609905, "learning_rate": 6.566527438852608e-06, "loss": 0.002, "step": 139450 }, { "epoch": 0.917481891804767, "grad_norm": 0.0117506904138002, "learning_rate": 6.56598222266695e-06, "loss": 0.0029, "step": 139460 }, { "epoch": 0.9175476799800004, "grad_norm": 0.03850112092966161, "learning_rate": 6.56543698583517e-06, "loss": 0.0008, "step": 139470 }, { "epoch": 0.9176134681552338, "grad_norm": 0.017445218334421836, "learning_rate": 6.564891728364454e-06, "loss": 0.002, "step": 139480 }, { "epoch": 0.9176792563304672, "grad_norm": 0.05527307133425547, "learning_rate": 6.564346450261991e-06, "loss": 0.0016, "step": 139490 }, { "epoch": 0.9177450445057005, "grad_norm": 0.06225192063178405, "learning_rate": 6.5638011515349735e-06, "loss": 0.0015, "step": 139500 }, { "epoch": 0.9178108326809339, "grad_norm": 0.0022224505001649263, "learning_rate": 6.563255832190587e-06, "loss": 0.0004, "step": 139510 }, { "epoch": 0.9178766208561673, "grad_norm": 0.038867978437694115, "learning_rate": 6.5627104922360226e-06, "loss": 0.0017, "step": 139520 }, { "epoch": 0.9179424090314007, "grad_norm": 0.07241758533078566, "learning_rate": 6.56216513167847e-06, "loss": 0.0014, "step": 139530 }, { "epoch": 0.9180081972066341, "grad_norm": 0.0014363699905536064, "learning_rate": 6.561619750525121e-06, "loss": 0.0015, "step": 139540 }, { "epoch": 0.9180739853818675, "grad_norm": 0.01531684697026082, "learning_rate": 6.561074348783163e-06, "loss": 0.0016, "step": 139550 }, { "epoch": 0.9181397735571009, "grad_norm": 0.0080218906573003, "learning_rate": 6.560528926459786e-06, "loss": 0.001, "step": 139560 }, { "epoch": 0.9182055617323343, "grad_norm": 0.0868360015522889, "learning_rate": 6.5599834835621844e-06, "loss": 0.001, "step": 139570 }, { "epoch": 0.9182713499075676, "grad_norm": 0.017148418290469605, "learning_rate": 6.559438020097549e-06, "loss": 0.0006, "step": 139580 }, { "epoch": 0.918337138082801, "grad_norm": 0.051973003764195336, "learning_rate": 6.558892536073069e-06, "loss": 0.0011, "step": 139590 }, { "epoch": 0.9184029262580344, "grad_norm": 0.10613959681772746, "learning_rate": 6.558347031495937e-06, "loss": 0.0018, "step": 139600 }, { "epoch": 0.9184687144332677, "grad_norm": 0.0669802282539251, "learning_rate": 6.557801506373347e-06, "loss": 0.0007, "step": 139610 }, { "epoch": 0.9185345026085011, "grad_norm": 0.0019878745748885484, "learning_rate": 6.557255960712488e-06, "loss": 0.0008, "step": 139620 }, { "epoch": 0.9186002907837345, "grad_norm": 0.00748193613933744, "learning_rate": 6.556710394520553e-06, "loss": 0.0008, "step": 139630 }, { "epoch": 0.9186660789589679, "grad_norm": 0.07131850220578495, "learning_rate": 6.556164807804739e-06, "loss": 0.0024, "step": 139640 }, { "epoch": 0.9187318671342013, "grad_norm": 0.05044019304005551, "learning_rate": 6.555619200572234e-06, "loss": 0.001, "step": 139650 }, { "epoch": 0.9187976553094347, "grad_norm": 0.08251373102269215, "learning_rate": 6.555073572830234e-06, "loss": 0.001, "step": 139660 }, { "epoch": 0.9188634434846681, "grad_norm": 0.018389296120293108, "learning_rate": 6.554527924585931e-06, "loss": 0.0007, "step": 139670 }, { "epoch": 0.9189292316599015, "grad_norm": 0.08834737362570691, "learning_rate": 6.553982255846521e-06, "loss": 0.0011, "step": 139680 }, { "epoch": 0.9189950198351349, "grad_norm": 0.12011110648321494, "learning_rate": 6.553436566619196e-06, "loss": 0.0018, "step": 139690 }, { "epoch": 0.9190608080103683, "grad_norm": 0.0011316401679435468, "learning_rate": 6.552890856911153e-06, "loss": 0.0008, "step": 139700 }, { "epoch": 0.9191265961856016, "grad_norm": 0.0278146726465491, "learning_rate": 6.552345126729584e-06, "loss": 0.0023, "step": 139710 }, { "epoch": 0.919192384360835, "grad_norm": 0.0017745692496656607, "learning_rate": 6.551799376081685e-06, "loss": 0.0016, "step": 139720 }, { "epoch": 0.9192581725360683, "grad_norm": 0.041742849608737864, "learning_rate": 6.551253604974652e-06, "loss": 0.0033, "step": 139730 }, { "epoch": 0.9193239607113017, "grad_norm": 0.0485929228193972, "learning_rate": 6.550707813415681e-06, "loss": 0.0006, "step": 139740 }, { "epoch": 0.9193897488865351, "grad_norm": 0.15672518218590675, "learning_rate": 6.550162001411965e-06, "loss": 0.0016, "step": 139750 }, { "epoch": 0.9194555370617685, "grad_norm": 0.010564960468394722, "learning_rate": 6.5496161689707024e-06, "loss": 0.0012, "step": 139760 }, { "epoch": 0.9195213252370019, "grad_norm": 0.06913697050372099, "learning_rate": 6.5490703160990885e-06, "loss": 0.0013, "step": 139770 }, { "epoch": 0.9195871134122353, "grad_norm": 0.03958828782389048, "learning_rate": 6.548524442804322e-06, "loss": 0.001, "step": 139780 }, { "epoch": 0.9196529015874687, "grad_norm": 0.015360876466880355, "learning_rate": 6.547978549093596e-06, "loss": 0.0023, "step": 139790 }, { "epoch": 0.9197186897627021, "grad_norm": 0.03858547074522718, "learning_rate": 6.547432634974111e-06, "loss": 0.0016, "step": 139800 }, { "epoch": 0.9197844779379354, "grad_norm": 0.021830900240834672, "learning_rate": 6.546886700453063e-06, "loss": 0.0054, "step": 139810 }, { "epoch": 0.9198502661131688, "grad_norm": 0.003486609046981579, "learning_rate": 6.546340745537649e-06, "loss": 0.0004, "step": 139820 }, { "epoch": 0.9199160542884022, "grad_norm": 0.08966267540556672, "learning_rate": 6.54579477023507e-06, "loss": 0.0009, "step": 139830 }, { "epoch": 0.9199818424636356, "grad_norm": 0.014453344803146174, "learning_rate": 6.545248774552521e-06, "loss": 0.0007, "step": 139840 }, { "epoch": 0.920047630638869, "grad_norm": 0.08945297452269103, "learning_rate": 6.5447027584972e-06, "loss": 0.0012, "step": 139850 }, { "epoch": 0.9201134188141024, "grad_norm": 0.057430073013642585, "learning_rate": 6.5441567220763095e-06, "loss": 0.0016, "step": 139860 }, { "epoch": 0.9201792069893358, "grad_norm": 0.0736974757818947, "learning_rate": 6.543610665297045e-06, "loss": 0.0007, "step": 139870 }, { "epoch": 0.9202449951645691, "grad_norm": 0.09267436777428825, "learning_rate": 6.543064588166605e-06, "loss": 0.002, "step": 139880 }, { "epoch": 0.9203107833398025, "grad_norm": 0.04110725257158572, "learning_rate": 6.542518490692193e-06, "loss": 0.0011, "step": 139890 }, { "epoch": 0.9203765715150359, "grad_norm": 0.10184280607135293, "learning_rate": 6.541972372881008e-06, "loss": 0.0012, "step": 139900 }, { "epoch": 0.9204423596902692, "grad_norm": 0.04480503011650637, "learning_rate": 6.541426234740248e-06, "loss": 0.0008, "step": 139910 }, { "epoch": 0.9205081478655026, "grad_norm": 0.028085452513761933, "learning_rate": 6.540880076277114e-06, "loss": 0.0019, "step": 139920 }, { "epoch": 0.920573936040736, "grad_norm": 0.058301029737427304, "learning_rate": 6.540333897498809e-06, "loss": 0.0016, "step": 139930 }, { "epoch": 0.9206397242159694, "grad_norm": 0.10071623798008389, "learning_rate": 6.539787698412529e-06, "loss": 0.0014, "step": 139940 }, { "epoch": 0.9207055123912028, "grad_norm": 0.014125345349773732, "learning_rate": 6.539241479025479e-06, "loss": 0.0016, "step": 139950 }, { "epoch": 0.9207713005664362, "grad_norm": 0.00017572442541562475, "learning_rate": 6.53869523934486e-06, "loss": 0.0018, "step": 139960 }, { "epoch": 0.9208370887416696, "grad_norm": 0.1368181235478304, "learning_rate": 6.538148979377873e-06, "loss": 0.001, "step": 139970 }, { "epoch": 0.920902876916903, "grad_norm": 0.06574731133871234, "learning_rate": 6.53760269913172e-06, "loss": 0.0009, "step": 139980 }, { "epoch": 0.9209686650921364, "grad_norm": 0.04017326923715302, "learning_rate": 6.5370563986136034e-06, "loss": 0.0008, "step": 139990 }, { "epoch": 0.9210344532673698, "grad_norm": 0.032576740828860244, "learning_rate": 6.536510077830727e-06, "loss": 0.0012, "step": 140000 }, { "epoch": 0.921100241442603, "grad_norm": 0.026603429586243392, "learning_rate": 6.53596373679029e-06, "loss": 0.0007, "step": 140010 }, { "epoch": 0.9211660296178364, "grad_norm": 0.014960745523037229, "learning_rate": 6.535417375499497e-06, "loss": 0.0006, "step": 140020 }, { "epoch": 0.9212318177930698, "grad_norm": 0.04668085182443453, "learning_rate": 6.534870993965554e-06, "loss": 0.0017, "step": 140030 }, { "epoch": 0.9212976059683032, "grad_norm": 0.016952477210024894, "learning_rate": 6.5343245921956635e-06, "loss": 0.0007, "step": 140040 }, { "epoch": 0.9213633941435366, "grad_norm": 0.12425105627214315, "learning_rate": 6.533778170197027e-06, "loss": 0.0013, "step": 140050 }, { "epoch": 0.92142918231877, "grad_norm": 0.027776887190436948, "learning_rate": 6.53323172797685e-06, "loss": 0.0012, "step": 140060 }, { "epoch": 0.9214949704940034, "grad_norm": 0.08636912246066267, "learning_rate": 6.532685265542337e-06, "loss": 0.0015, "step": 140070 }, { "epoch": 0.9215607586692368, "grad_norm": 0.11815227837352515, "learning_rate": 6.532138782900691e-06, "loss": 0.0012, "step": 140080 }, { "epoch": 0.9216265468444702, "grad_norm": 0.03136554047909512, "learning_rate": 6.531592280059121e-06, "loss": 0.001, "step": 140090 }, { "epoch": 0.9216923350197036, "grad_norm": 0.03821888289787405, "learning_rate": 6.531045757024828e-06, "loss": 0.0026, "step": 140100 }, { "epoch": 0.921758123194937, "grad_norm": 0.06626367040984028, "learning_rate": 6.530499213805019e-06, "loss": 0.0009, "step": 140110 }, { "epoch": 0.9218239113701703, "grad_norm": 0.06490095810942904, "learning_rate": 6.5299526504069e-06, "loss": 0.001, "step": 140120 }, { "epoch": 0.9218896995454037, "grad_norm": 0.060409856110838184, "learning_rate": 6.529406066837677e-06, "loss": 0.0028, "step": 140130 }, { "epoch": 0.9219554877206371, "grad_norm": 0.22026229037952774, "learning_rate": 6.5288594631045555e-06, "loss": 0.0018, "step": 140140 }, { "epoch": 0.9220212758958705, "grad_norm": 0.12256538094936135, "learning_rate": 6.5283128392147435e-06, "loss": 0.0009, "step": 140150 }, { "epoch": 0.9220870640711039, "grad_norm": 0.0007754179094448186, "learning_rate": 6.527766195175444e-06, "loss": 0.0009, "step": 140160 }, { "epoch": 0.9221528522463373, "grad_norm": 0.018257353470457687, "learning_rate": 6.527219530993869e-06, "loss": 0.0007, "step": 140170 }, { "epoch": 0.9222186404215706, "grad_norm": 0.052819936320199656, "learning_rate": 6.526672846677224e-06, "loss": 0.001, "step": 140180 }, { "epoch": 0.922284428596804, "grad_norm": 0.06664888667707226, "learning_rate": 6.526126142232714e-06, "loss": 0.0016, "step": 140190 }, { "epoch": 0.9223502167720374, "grad_norm": 0.15125220524896338, "learning_rate": 6.525579417667549e-06, "loss": 0.0021, "step": 140200 }, { "epoch": 0.9224160049472708, "grad_norm": 0.027002440217289498, "learning_rate": 6.525032672988938e-06, "loss": 0.0005, "step": 140210 }, { "epoch": 0.9224817931225041, "grad_norm": 0.03399978572110265, "learning_rate": 6.524485908204088e-06, "loss": 0.0007, "step": 140220 }, { "epoch": 0.9225475812977375, "grad_norm": 0.04753396892014007, "learning_rate": 6.523939123320207e-06, "loss": 0.0011, "step": 140230 }, { "epoch": 0.9226133694729709, "grad_norm": 0.010501105483656977, "learning_rate": 6.523392318344507e-06, "loss": 0.0005, "step": 140240 }, { "epoch": 0.9226791576482043, "grad_norm": 0.037937773593429966, "learning_rate": 6.522845493284193e-06, "loss": 0.0007, "step": 140250 }, { "epoch": 0.9227449458234377, "grad_norm": 0.02747255670745556, "learning_rate": 6.522298648146477e-06, "loss": 0.0008, "step": 140260 }, { "epoch": 0.9228107339986711, "grad_norm": 0.1308977482593697, "learning_rate": 6.521751782938568e-06, "loss": 0.0024, "step": 140270 }, { "epoch": 0.9228765221739045, "grad_norm": 0.02112062786138294, "learning_rate": 6.521204897667675e-06, "loss": 0.0018, "step": 140280 }, { "epoch": 0.9229423103491379, "grad_norm": 0.03560649973298155, "learning_rate": 6.520657992341011e-06, "loss": 0.0015, "step": 140290 }, { "epoch": 0.9230080985243713, "grad_norm": 0.07081748741069945, "learning_rate": 6.520111066965784e-06, "loss": 0.0012, "step": 140300 }, { "epoch": 0.9230738866996047, "grad_norm": 0.04439378745575361, "learning_rate": 6.519564121549207e-06, "loss": 0.0015, "step": 140310 }, { "epoch": 0.923139674874838, "grad_norm": 0.026461811054622916, "learning_rate": 6.519017156098486e-06, "loss": 0.0009, "step": 140320 }, { "epoch": 0.9232054630500713, "grad_norm": 0.01397699203029111, "learning_rate": 6.518470170620837e-06, "loss": 0.0009, "step": 140330 }, { "epoch": 0.9232712512253047, "grad_norm": 0.052296705728019206, "learning_rate": 6.517923165123472e-06, "loss": 0.0013, "step": 140340 }, { "epoch": 0.9233370394005381, "grad_norm": 0.018462682228932513, "learning_rate": 6.5173761396136e-06, "loss": 0.0011, "step": 140350 }, { "epoch": 0.9234028275757715, "grad_norm": 0.1361451018188759, "learning_rate": 6.516829094098434e-06, "loss": 0.0016, "step": 140360 }, { "epoch": 0.9234686157510049, "grad_norm": 0.08326745939818889, "learning_rate": 6.5162820285851865e-06, "loss": 0.0016, "step": 140370 }, { "epoch": 0.9235344039262383, "grad_norm": 0.124837883449603, "learning_rate": 6.51573494308107e-06, "loss": 0.001, "step": 140380 }, { "epoch": 0.9236001921014717, "grad_norm": 0.07912639519844135, "learning_rate": 6.515187837593296e-06, "loss": 0.0009, "step": 140390 }, { "epoch": 0.9236659802767051, "grad_norm": 0.034884520884218326, "learning_rate": 6.514640712129081e-06, "loss": 0.0009, "step": 140400 }, { "epoch": 0.9237317684519385, "grad_norm": 0.04180787807185878, "learning_rate": 6.514093566695635e-06, "loss": 0.0034, "step": 140410 }, { "epoch": 0.9237975566271718, "grad_norm": 0.039915659135457054, "learning_rate": 6.513546401300174e-06, "loss": 0.001, "step": 140420 }, { "epoch": 0.9238633448024052, "grad_norm": 0.0027173477562273335, "learning_rate": 6.512999215949911e-06, "loss": 0.0035, "step": 140430 }, { "epoch": 0.9239291329776386, "grad_norm": 0.0011104523667007865, "learning_rate": 6.51245201065206e-06, "loss": 0.0017, "step": 140440 }, { "epoch": 0.923994921152872, "grad_norm": 0.005719367473512774, "learning_rate": 6.5119047854138325e-06, "loss": 0.0018, "step": 140450 }, { "epoch": 0.9240607093281054, "grad_norm": 0.0006266162770847532, "learning_rate": 6.5113575402424485e-06, "loss": 0.0015, "step": 140460 }, { "epoch": 0.9241264975033388, "grad_norm": 0.027662238879363105, "learning_rate": 6.510810275145122e-06, "loss": 0.0011, "step": 140470 }, { "epoch": 0.9241922856785721, "grad_norm": 0.05650970078674137, "learning_rate": 6.510262990129066e-06, "loss": 0.0028, "step": 140480 }, { "epoch": 0.9242580738538055, "grad_norm": 0.007769412472712946, "learning_rate": 6.509715685201496e-06, "loss": 0.0006, "step": 140490 }, { "epoch": 0.9243238620290389, "grad_norm": 0.09033181985353425, "learning_rate": 6.509168360369629e-06, "loss": 0.0016, "step": 140500 }, { "epoch": 0.9243896502042723, "grad_norm": 0.12571287769991696, "learning_rate": 6.50862101564068e-06, "loss": 0.0016, "step": 140510 }, { "epoch": 0.9244554383795057, "grad_norm": 0.113368811516394, "learning_rate": 6.508073651021866e-06, "loss": 0.0012, "step": 140520 }, { "epoch": 0.924521226554739, "grad_norm": 0.05090804773684995, "learning_rate": 6.5075262665204045e-06, "loss": 0.0015, "step": 140530 }, { "epoch": 0.9245870147299724, "grad_norm": 0.01172991279677656, "learning_rate": 6.506978862143509e-06, "loss": 0.0009, "step": 140540 }, { "epoch": 0.9246528029052058, "grad_norm": 0.03382719148246593, "learning_rate": 6.506431437898402e-06, "loss": 0.0009, "step": 140550 }, { "epoch": 0.9247185910804392, "grad_norm": 0.07904024932520128, "learning_rate": 6.505883993792295e-06, "loss": 0.001, "step": 140560 }, { "epoch": 0.9247843792556726, "grad_norm": 0.051831217981293254, "learning_rate": 6.505336529832409e-06, "loss": 0.0004, "step": 140570 }, { "epoch": 0.924850167430906, "grad_norm": 0.1354469827379509, "learning_rate": 6.504789046025961e-06, "loss": 0.001, "step": 140580 }, { "epoch": 0.9249159556061394, "grad_norm": 0.022031341652176028, "learning_rate": 6.5042415423801675e-06, "loss": 0.0015, "step": 140590 }, { "epoch": 0.9249817437813728, "grad_norm": 0.00031566027983453537, "learning_rate": 6.503694018902249e-06, "loss": 0.0007, "step": 140600 }, { "epoch": 0.9250475319566062, "grad_norm": 0.016827544772563582, "learning_rate": 6.503146475599425e-06, "loss": 0.0005, "step": 140610 }, { "epoch": 0.9251133201318396, "grad_norm": 0.07219392360967068, "learning_rate": 6.502598912478912e-06, "loss": 0.0013, "step": 140620 }, { "epoch": 0.9251791083070728, "grad_norm": 0.030279549753263562, "learning_rate": 6.50205132954793e-06, "loss": 0.0013, "step": 140630 }, { "epoch": 0.9252448964823062, "grad_norm": 0.04526058697391315, "learning_rate": 6.501503726813699e-06, "loss": 0.003, "step": 140640 }, { "epoch": 0.9253106846575396, "grad_norm": 0.0029186188949524897, "learning_rate": 6.500956104283438e-06, "loss": 0.0007, "step": 140650 }, { "epoch": 0.925376472832773, "grad_norm": 0.06710361383557374, "learning_rate": 6.500408461964366e-06, "loss": 0.0011, "step": 140660 }, { "epoch": 0.9254422610080064, "grad_norm": 0.028114841988964665, "learning_rate": 6.499860799863705e-06, "loss": 0.0035, "step": 140670 }, { "epoch": 0.9255080491832398, "grad_norm": 0.04246515507861087, "learning_rate": 6.499313117988676e-06, "loss": 0.001, "step": 140680 }, { "epoch": 0.9255738373584732, "grad_norm": 0.02746997613803672, "learning_rate": 6.498765416346498e-06, "loss": 0.0011, "step": 140690 }, { "epoch": 0.9256396255337066, "grad_norm": 0.025609972821733613, "learning_rate": 6.498217694944391e-06, "loss": 0.0009, "step": 140700 }, { "epoch": 0.92570541370894, "grad_norm": 0.0804552074062252, "learning_rate": 6.497669953789578e-06, "loss": 0.0014, "step": 140710 }, { "epoch": 0.9257712018841734, "grad_norm": 0.026704476303169376, "learning_rate": 6.4971221928892805e-06, "loss": 0.0012, "step": 140720 }, { "epoch": 0.9258369900594067, "grad_norm": 0.02254570000927415, "learning_rate": 6.496574412250719e-06, "loss": 0.0016, "step": 140730 }, { "epoch": 0.9259027782346401, "grad_norm": 0.014357525101060156, "learning_rate": 6.496026611881118e-06, "loss": 0.0011, "step": 140740 }, { "epoch": 0.9259685664098735, "grad_norm": 0.052909332395609625, "learning_rate": 6.495478791787697e-06, "loss": 0.0006, "step": 140750 }, { "epoch": 0.9260343545851069, "grad_norm": 0.002509278087167564, "learning_rate": 6.494930951977681e-06, "loss": 0.0006, "step": 140760 }, { "epoch": 0.9261001427603403, "grad_norm": 0.08101340685355371, "learning_rate": 6.494383092458291e-06, "loss": 0.0009, "step": 140770 }, { "epoch": 0.9261659309355736, "grad_norm": 0.10236117708149844, "learning_rate": 6.493835213236749e-06, "loss": 0.0015, "step": 140780 }, { "epoch": 0.926231719110807, "grad_norm": 0.025901732244979818, "learning_rate": 6.4932873143202815e-06, "loss": 0.0014, "step": 140790 }, { "epoch": 0.9262975072860404, "grad_norm": 0.06162360754105766, "learning_rate": 6.49273939571611e-06, "loss": 0.0017, "step": 140800 }, { "epoch": 0.9263632954612738, "grad_norm": 0.03168965859756165, "learning_rate": 6.492191457431458e-06, "loss": 0.0013, "step": 140810 }, { "epoch": 0.9264290836365072, "grad_norm": 0.0022248311411807707, "learning_rate": 6.4916434994735526e-06, "loss": 0.0007, "step": 140820 }, { "epoch": 0.9264948718117405, "grad_norm": 0.02219623593398693, "learning_rate": 6.4910955218496144e-06, "loss": 0.001, "step": 140830 }, { "epoch": 0.9265606599869739, "grad_norm": 0.023374420437190287, "learning_rate": 6.490547524566869e-06, "loss": 0.0011, "step": 140840 }, { "epoch": 0.9266264481622073, "grad_norm": 0.06818650663490272, "learning_rate": 6.489999507632542e-06, "loss": 0.0017, "step": 140850 }, { "epoch": 0.9266922363374407, "grad_norm": 0.026422253248070713, "learning_rate": 6.489451471053858e-06, "loss": 0.0014, "step": 140860 }, { "epoch": 0.9267580245126741, "grad_norm": 0.07619332708053565, "learning_rate": 6.488903414838044e-06, "loss": 0.001, "step": 140870 }, { "epoch": 0.9268238126879075, "grad_norm": 0.07637962258106329, "learning_rate": 6.4883553389923245e-06, "loss": 0.0014, "step": 140880 }, { "epoch": 0.9268896008631409, "grad_norm": 0.02909554908284404, "learning_rate": 6.487807243523925e-06, "loss": 0.0007, "step": 140890 }, { "epoch": 0.9269553890383743, "grad_norm": 0.0784925705845873, "learning_rate": 6.487259128440071e-06, "loss": 0.0006, "step": 140900 }, { "epoch": 0.9270211772136077, "grad_norm": 0.0006454972626701303, "learning_rate": 6.48671099374799e-06, "loss": 0.0009, "step": 140910 }, { "epoch": 0.9270869653888411, "grad_norm": 0.032169030767155334, "learning_rate": 6.4861628394549094e-06, "loss": 0.0014, "step": 140920 }, { "epoch": 0.9271527535640743, "grad_norm": 0.029168214569609132, "learning_rate": 6.485614665568055e-06, "loss": 0.0016, "step": 140930 }, { "epoch": 0.9272185417393077, "grad_norm": 0.1155737486702031, "learning_rate": 6.4850664720946546e-06, "loss": 0.0021, "step": 140940 }, { "epoch": 0.9272843299145411, "grad_norm": 0.019236443304294915, "learning_rate": 6.484518259041934e-06, "loss": 0.0014, "step": 140950 }, { "epoch": 0.9273501180897745, "grad_norm": 0.003337546127512383, "learning_rate": 6.483970026417124e-06, "loss": 0.0016, "step": 140960 }, { "epoch": 0.9274159062650079, "grad_norm": 0.04142116638047084, "learning_rate": 6.48342177422745e-06, "loss": 0.0012, "step": 140970 }, { "epoch": 0.9274816944402413, "grad_norm": 0.019223627646007903, "learning_rate": 6.482873502480141e-06, "loss": 0.0012, "step": 140980 }, { "epoch": 0.9275474826154747, "grad_norm": 0.0004543169661778011, "learning_rate": 6.482325211182425e-06, "loss": 0.0012, "step": 140990 }, { "epoch": 0.9276132707907081, "grad_norm": 0.06082606369647573, "learning_rate": 6.481776900341533e-06, "loss": 0.001, "step": 141000 }, { "epoch": 0.9276790589659415, "grad_norm": 0.026155166207975724, "learning_rate": 6.481228569964691e-06, "loss": 0.0011, "step": 141010 }, { "epoch": 0.9277448471411749, "grad_norm": 0.063927039441174, "learning_rate": 6.48068022005913e-06, "loss": 0.0008, "step": 141020 }, { "epoch": 0.9278106353164083, "grad_norm": 0.021004508035018653, "learning_rate": 6.48013185063208e-06, "loss": 0.0025, "step": 141030 }, { "epoch": 0.9278764234916416, "grad_norm": 0.05543223168131496, "learning_rate": 6.479583461690768e-06, "loss": 0.0017, "step": 141040 }, { "epoch": 0.927942211666875, "grad_norm": 0.13270344744319973, "learning_rate": 6.479035053242426e-06, "loss": 0.0009, "step": 141050 }, { "epoch": 0.9280079998421084, "grad_norm": 0.06568720049177097, "learning_rate": 6.4784866252942855e-06, "loss": 0.0017, "step": 141060 }, { "epoch": 0.9280737880173417, "grad_norm": 0.0854510387807459, "learning_rate": 6.477938177853576e-06, "loss": 0.0012, "step": 141070 }, { "epoch": 0.9281395761925751, "grad_norm": 0.07960256626392215, "learning_rate": 6.477389710927528e-06, "loss": 0.0015, "step": 141080 }, { "epoch": 0.9282053643678085, "grad_norm": 0.03212095425339351, "learning_rate": 6.4768412245233715e-06, "loss": 0.0014, "step": 141090 }, { "epoch": 0.9282711525430419, "grad_norm": 0.018971212351314683, "learning_rate": 6.4762927186483395e-06, "loss": 0.0008, "step": 141100 }, { "epoch": 0.9283369407182753, "grad_norm": 0.05605451758334485, "learning_rate": 6.475744193309664e-06, "loss": 0.0009, "step": 141110 }, { "epoch": 0.9284027288935087, "grad_norm": 0.10068431322862204, "learning_rate": 6.475195648514574e-06, "loss": 0.0015, "step": 141120 }, { "epoch": 0.9284685170687421, "grad_norm": 0.03529893852403432, "learning_rate": 6.474647084270304e-06, "loss": 0.0016, "step": 141130 }, { "epoch": 0.9285343052439754, "grad_norm": 0.00383579933869266, "learning_rate": 6.4740985005840864e-06, "loss": 0.0012, "step": 141140 }, { "epoch": 0.9286000934192088, "grad_norm": 0.20190778559868033, "learning_rate": 6.4735498974631525e-06, "loss": 0.0012, "step": 141150 }, { "epoch": 0.9286658815944422, "grad_norm": 0.07453726495169279, "learning_rate": 6.473001274914735e-06, "loss": 0.0016, "step": 141160 }, { "epoch": 0.9287316697696756, "grad_norm": 0.04306598039672234, "learning_rate": 6.472452632946069e-06, "loss": 0.0009, "step": 141170 }, { "epoch": 0.928797457944909, "grad_norm": 0.11762478982057324, "learning_rate": 6.471903971564387e-06, "loss": 0.0016, "step": 141180 }, { "epoch": 0.9288632461201424, "grad_norm": 0.14355641452513815, "learning_rate": 6.471355290776923e-06, "loss": 0.0016, "step": 141190 }, { "epoch": 0.9289290342953758, "grad_norm": 0.012247710107545613, "learning_rate": 6.470806590590909e-06, "loss": 0.0011, "step": 141200 }, { "epoch": 0.9289948224706092, "grad_norm": 0.02733518929031061, "learning_rate": 6.470257871013581e-06, "loss": 0.0006, "step": 141210 }, { "epoch": 0.9290606106458426, "grad_norm": 0.07853498987065233, "learning_rate": 6.469709132052171e-06, "loss": 0.0014, "step": 141220 }, { "epoch": 0.929126398821076, "grad_norm": 0.043555744183098134, "learning_rate": 6.4691603737139175e-06, "loss": 0.0011, "step": 141230 }, { "epoch": 0.9291921869963092, "grad_norm": 0.04179640324218061, "learning_rate": 6.468611596006052e-06, "loss": 0.0009, "step": 141240 }, { "epoch": 0.9292579751715426, "grad_norm": 0.03896002448631836, "learning_rate": 6.468062798935812e-06, "loss": 0.0011, "step": 141250 }, { "epoch": 0.929323763346776, "grad_norm": 0.05254883375507877, "learning_rate": 6.467513982510432e-06, "loss": 0.0009, "step": 141260 }, { "epoch": 0.9293895515220094, "grad_norm": 0.025765228257305633, "learning_rate": 6.466965146737148e-06, "loss": 0.0006, "step": 141270 }, { "epoch": 0.9294553396972428, "grad_norm": 0.05780792836882059, "learning_rate": 6.466416291623195e-06, "loss": 0.0017, "step": 141280 }, { "epoch": 0.9295211278724762, "grad_norm": 0.018916838109713403, "learning_rate": 6.465867417175808e-06, "loss": 0.0015, "step": 141290 }, { "epoch": 0.9295869160477096, "grad_norm": 0.005508053236871393, "learning_rate": 6.4653185234022264e-06, "loss": 0.0005, "step": 141300 }, { "epoch": 0.929652704222943, "grad_norm": 0.07768875700345354, "learning_rate": 6.464769610309685e-06, "loss": 0.0011, "step": 141310 }, { "epoch": 0.9297184923981764, "grad_norm": 0.01048048884105234, "learning_rate": 6.4642206779054226e-06, "loss": 0.0018, "step": 141320 }, { "epoch": 0.9297842805734098, "grad_norm": 0.0003578373840833467, "learning_rate": 6.463671726196674e-06, "loss": 0.0011, "step": 141330 }, { "epoch": 0.9298500687486431, "grad_norm": 0.035938932675190424, "learning_rate": 6.463122755190679e-06, "loss": 0.0008, "step": 141340 }, { "epoch": 0.9299158569238765, "grad_norm": 0.048997001391133735, "learning_rate": 6.4625737648946725e-06, "loss": 0.0012, "step": 141350 }, { "epoch": 0.9299816450991099, "grad_norm": 0.06660845645262892, "learning_rate": 6.462024755315894e-06, "loss": 0.0013, "step": 141360 }, { "epoch": 0.9300474332743432, "grad_norm": 0.014091429253640292, "learning_rate": 6.461475726461583e-06, "loss": 0.0007, "step": 141370 }, { "epoch": 0.9301132214495766, "grad_norm": 0.05310063361682931, "learning_rate": 6.460926678338978e-06, "loss": 0.0021, "step": 141380 }, { "epoch": 0.93017900962481, "grad_norm": 0.019589995930992945, "learning_rate": 6.460377610955315e-06, "loss": 0.0007, "step": 141390 }, { "epoch": 0.9302447978000434, "grad_norm": 0.05462436627057313, "learning_rate": 6.459828524317835e-06, "loss": 0.0014, "step": 141400 }, { "epoch": 0.9303105859752768, "grad_norm": 0.02558117208577825, "learning_rate": 6.4592794184337755e-06, "loss": 0.0006, "step": 141410 }, { "epoch": 0.9303763741505102, "grad_norm": 0.011963233644435538, "learning_rate": 6.458730293310377e-06, "loss": 0.0009, "step": 141420 }, { "epoch": 0.9304421623257436, "grad_norm": 0.06125073540339336, "learning_rate": 6.458181148954881e-06, "loss": 0.0016, "step": 141430 }, { "epoch": 0.9305079505009769, "grad_norm": 9.051217487654311e-05, "learning_rate": 6.457631985374525e-06, "loss": 0.0008, "step": 141440 }, { "epoch": 0.9305737386762103, "grad_norm": 0.05534751329019683, "learning_rate": 6.457082802576551e-06, "loss": 0.0008, "step": 141450 }, { "epoch": 0.9306395268514437, "grad_norm": 0.001713485029684285, "learning_rate": 6.456533600568198e-06, "loss": 0.0007, "step": 141460 }, { "epoch": 0.9307053150266771, "grad_norm": 0.04734953331767104, "learning_rate": 6.455984379356707e-06, "loss": 0.0009, "step": 141470 }, { "epoch": 0.9307711032019105, "grad_norm": 0.04258909414935982, "learning_rate": 6.455435138949318e-06, "loss": 0.0027, "step": 141480 }, { "epoch": 0.9308368913771439, "grad_norm": 0.01563432263453375, "learning_rate": 6.454885879353276e-06, "loss": 0.0011, "step": 141490 }, { "epoch": 0.9309026795523773, "grad_norm": 0.011872436684169952, "learning_rate": 6.4543366005758214e-06, "loss": 0.002, "step": 141500 }, { "epoch": 0.9309684677276107, "grad_norm": 0.06093983801572539, "learning_rate": 6.453787302624193e-06, "loss": 0.0011, "step": 141510 }, { "epoch": 0.931034255902844, "grad_norm": 0.07152202361349325, "learning_rate": 6.453237985505634e-06, "loss": 0.0009, "step": 141520 }, { "epoch": 0.9311000440780774, "grad_norm": 0.03907977994182948, "learning_rate": 6.452688649227388e-06, "loss": 0.0007, "step": 141530 }, { "epoch": 0.9311658322533108, "grad_norm": 0.17393843585461943, "learning_rate": 6.452139293796698e-06, "loss": 0.0012, "step": 141540 }, { "epoch": 0.9312316204285441, "grad_norm": 0.0768091242056232, "learning_rate": 6.451589919220802e-06, "loss": 0.0014, "step": 141550 }, { "epoch": 0.9312974086037775, "grad_norm": 0.05658456333099164, "learning_rate": 6.451040525506949e-06, "loss": 0.0009, "step": 141560 }, { "epoch": 0.9313631967790109, "grad_norm": 0.022053072633973842, "learning_rate": 6.45049111266238e-06, "loss": 0.0008, "step": 141570 }, { "epoch": 0.9314289849542443, "grad_norm": 0.051374804192598086, "learning_rate": 6.4499416806943394e-06, "loss": 0.0006, "step": 141580 }, { "epoch": 0.9314947731294777, "grad_norm": 0.06306442063297851, "learning_rate": 6.449392229610069e-06, "loss": 0.0012, "step": 141590 }, { "epoch": 0.9315605613047111, "grad_norm": 0.00844156257559792, "learning_rate": 6.448842759416814e-06, "loss": 0.0006, "step": 141600 }, { "epoch": 0.9316263494799445, "grad_norm": 0.005555701352813135, "learning_rate": 6.448293270121817e-06, "loss": 0.0012, "step": 141610 }, { "epoch": 0.9316921376551779, "grad_norm": 0.05144566672149934, "learning_rate": 6.447743761732324e-06, "loss": 0.0008, "step": 141620 }, { "epoch": 0.9317579258304113, "grad_norm": 0.004904276820096326, "learning_rate": 6.447194234255583e-06, "loss": 0.0013, "step": 141630 }, { "epoch": 0.9318237140056447, "grad_norm": 0.026522905955313224, "learning_rate": 6.446644687698833e-06, "loss": 0.0013, "step": 141640 }, { "epoch": 0.931889502180878, "grad_norm": 0.07815561713328815, "learning_rate": 6.446095122069324e-06, "loss": 0.0013, "step": 141650 }, { "epoch": 0.9319552903561114, "grad_norm": 0.0009819192689780668, "learning_rate": 6.445545537374298e-06, "loss": 0.0012, "step": 141660 }, { "epoch": 0.9320210785313447, "grad_norm": 0.013526537181569341, "learning_rate": 6.444995933621004e-06, "loss": 0.0007, "step": 141670 }, { "epoch": 0.9320868667065781, "grad_norm": 0.06636230620838932, "learning_rate": 6.444446310816686e-06, "loss": 0.0026, "step": 141680 }, { "epoch": 0.9321526548818115, "grad_norm": 0.03252828767812446, "learning_rate": 6.44389666896859e-06, "loss": 0.0007, "step": 141690 }, { "epoch": 0.9322184430570449, "grad_norm": 0.010498749003592673, "learning_rate": 6.443347008083965e-06, "loss": 0.0008, "step": 141700 }, { "epoch": 0.9322842312322783, "grad_norm": 0.019674436128641098, "learning_rate": 6.4427973281700554e-06, "loss": 0.0016, "step": 141710 }, { "epoch": 0.9323500194075117, "grad_norm": 0.0055313539712065814, "learning_rate": 6.4422476292341095e-06, "loss": 0.0006, "step": 141720 }, { "epoch": 0.9324158075827451, "grad_norm": 0.11229796529048897, "learning_rate": 6.441697911283374e-06, "loss": 0.0008, "step": 141730 }, { "epoch": 0.9324815957579785, "grad_norm": 0.054962906343633336, "learning_rate": 6.441148174325096e-06, "loss": 0.0031, "step": 141740 }, { "epoch": 0.9325473839332118, "grad_norm": 0.01665407874620319, "learning_rate": 6.4405984183665245e-06, "loss": 0.001, "step": 141750 }, { "epoch": 0.9326131721084452, "grad_norm": 0.018237860617567894, "learning_rate": 6.440048643414908e-06, "loss": 0.0009, "step": 141760 }, { "epoch": 0.9326789602836786, "grad_norm": 0.04233712085552978, "learning_rate": 6.439498849477492e-06, "loss": 0.0011, "step": 141770 }, { "epoch": 0.932744748458912, "grad_norm": 0.033904609326247476, "learning_rate": 6.438949036561529e-06, "loss": 0.0023, "step": 141780 }, { "epoch": 0.9328105366341454, "grad_norm": 0.009496615684472647, "learning_rate": 6.438399204674265e-06, "loss": 0.0009, "step": 141790 }, { "epoch": 0.9328763248093788, "grad_norm": 0.09939945531378218, "learning_rate": 6.4378493538229495e-06, "loss": 0.0017, "step": 141800 }, { "epoch": 0.9329421129846122, "grad_norm": 0.06723859432774931, "learning_rate": 6.437299484014832e-06, "loss": 0.001, "step": 141810 }, { "epoch": 0.9330079011598456, "grad_norm": 0.027773711325846245, "learning_rate": 6.436749595257161e-06, "loss": 0.0007, "step": 141820 }, { "epoch": 0.933073689335079, "grad_norm": 0.14152053504795806, "learning_rate": 6.43619968755719e-06, "loss": 0.0018, "step": 141830 }, { "epoch": 0.9331394775103123, "grad_norm": 0.009094353229954619, "learning_rate": 6.435649760922165e-06, "loss": 0.0013, "step": 141840 }, { "epoch": 0.9332052656855456, "grad_norm": 0.025206922132938164, "learning_rate": 6.435099815359338e-06, "loss": 0.0013, "step": 141850 }, { "epoch": 0.933271053860779, "grad_norm": 0.01849830320323031, "learning_rate": 6.434549850875959e-06, "loss": 0.0007, "step": 141860 }, { "epoch": 0.9333368420360124, "grad_norm": 0.02642325929510674, "learning_rate": 6.433999867479279e-06, "loss": 0.0025, "step": 141870 }, { "epoch": 0.9334026302112458, "grad_norm": 0.058157919483857186, "learning_rate": 6.43344986517655e-06, "loss": 0.0012, "step": 141880 }, { "epoch": 0.9334684183864792, "grad_norm": 0.015650001386175366, "learning_rate": 6.432899843975022e-06, "loss": 0.0004, "step": 141890 }, { "epoch": 0.9335342065617126, "grad_norm": 0.10239765854808883, "learning_rate": 6.432349803881947e-06, "loss": 0.0007, "step": 141900 }, { "epoch": 0.933599994736946, "grad_norm": 0.008858239263640799, "learning_rate": 6.431799744904578e-06, "loss": 0.0009, "step": 141910 }, { "epoch": 0.9336657829121794, "grad_norm": 0.02935091932974863, "learning_rate": 6.431249667050166e-06, "loss": 0.0009, "step": 141920 }, { "epoch": 0.9337315710874128, "grad_norm": 0.06805129381659225, "learning_rate": 6.430699570325962e-06, "loss": 0.0006, "step": 141930 }, { "epoch": 0.9337973592626462, "grad_norm": 0.019787973837741314, "learning_rate": 6.430149454739219e-06, "loss": 0.0028, "step": 141940 }, { "epoch": 0.9338631474378796, "grad_norm": 0.029052419144898624, "learning_rate": 6.429599320297192e-06, "loss": 0.0004, "step": 141950 }, { "epoch": 0.9339289356131129, "grad_norm": 0.12406639191201867, "learning_rate": 6.429049167007133e-06, "loss": 0.0011, "step": 141960 }, { "epoch": 0.9339947237883462, "grad_norm": 0.06807737680924793, "learning_rate": 6.4284989948762944e-06, "loss": 0.001, "step": 141970 }, { "epoch": 0.9340605119635796, "grad_norm": 0.011888012479605118, "learning_rate": 6.42794880391193e-06, "loss": 0.0006, "step": 141980 }, { "epoch": 0.934126300138813, "grad_norm": 0.0079949835070961, "learning_rate": 6.4273985941212945e-06, "loss": 0.0006, "step": 141990 }, { "epoch": 0.9341920883140464, "grad_norm": 0.007205405031349209, "learning_rate": 6.42684836551164e-06, "loss": 0.0014, "step": 142000 }, { "epoch": 0.9342578764892798, "grad_norm": 0.026087208408834266, "learning_rate": 6.426298118090224e-06, "loss": 0.0008, "step": 142010 }, { "epoch": 0.9343236646645132, "grad_norm": 0.012042382708349992, "learning_rate": 6.425747851864298e-06, "loss": 0.0013, "step": 142020 }, { "epoch": 0.9343894528397466, "grad_norm": 0.05571654944277279, "learning_rate": 6.4251975668411196e-06, "loss": 0.0011, "step": 142030 }, { "epoch": 0.93445524101498, "grad_norm": 0.027574633160790993, "learning_rate": 6.424647263027941e-06, "loss": 0.0007, "step": 142040 }, { "epoch": 0.9345210291902134, "grad_norm": 0.09500630825164356, "learning_rate": 6.424096940432016e-06, "loss": 0.0009, "step": 142050 }, { "epoch": 0.9345868173654467, "grad_norm": 0.06666469331245113, "learning_rate": 6.423546599060606e-06, "loss": 0.0016, "step": 142060 }, { "epoch": 0.9346526055406801, "grad_norm": 0.05485840688936985, "learning_rate": 6.422996238920963e-06, "loss": 0.0018, "step": 142070 }, { "epoch": 0.9347183937159135, "grad_norm": 0.008660645912248468, "learning_rate": 6.422445860020343e-06, "loss": 0.0009, "step": 142080 }, { "epoch": 0.9347841818911469, "grad_norm": 0.047805051447102585, "learning_rate": 6.421895462366003e-06, "loss": 0.0011, "step": 142090 }, { "epoch": 0.9348499700663803, "grad_norm": 0.03519216002163057, "learning_rate": 6.4213450459652005e-06, "loss": 0.0014, "step": 142100 }, { "epoch": 0.9349157582416137, "grad_norm": 0.032910030948875166, "learning_rate": 6.42079461082519e-06, "loss": 0.001, "step": 142110 }, { "epoch": 0.934981546416847, "grad_norm": 0.06280316389751983, "learning_rate": 6.4202441569532285e-06, "loss": 0.0009, "step": 142120 }, { "epoch": 0.9350473345920804, "grad_norm": 0.10124446441569505, "learning_rate": 6.419693684356576e-06, "loss": 0.0015, "step": 142130 }, { "epoch": 0.9351131227673138, "grad_norm": 0.06421566540715756, "learning_rate": 6.419143193042488e-06, "loss": 0.0011, "step": 142140 }, { "epoch": 0.9351789109425472, "grad_norm": 0.04167952267243667, "learning_rate": 6.4185926830182236e-06, "loss": 0.0004, "step": 142150 }, { "epoch": 0.9352446991177805, "grad_norm": 0.023404013191206576, "learning_rate": 6.4180421542910384e-06, "loss": 0.0013, "step": 142160 }, { "epoch": 0.9353104872930139, "grad_norm": 0.041818421935020944, "learning_rate": 6.417491606868193e-06, "loss": 0.0011, "step": 142170 }, { "epoch": 0.9353762754682473, "grad_norm": 0.02649056971546892, "learning_rate": 6.4169410407569435e-06, "loss": 0.0017, "step": 142180 }, { "epoch": 0.9354420636434807, "grad_norm": 0.07030497498891644, "learning_rate": 6.416390455964552e-06, "loss": 0.0016, "step": 142190 }, { "epoch": 0.9355078518187141, "grad_norm": 0.03131123195559479, "learning_rate": 6.4158398524982745e-06, "loss": 0.0007, "step": 142200 }, { "epoch": 0.9355736399939475, "grad_norm": 0.01625190556263536, "learning_rate": 6.415289230365371e-06, "loss": 0.0012, "step": 142210 }, { "epoch": 0.9356394281691809, "grad_norm": 0.3438745218659624, "learning_rate": 6.414738589573103e-06, "loss": 0.0013, "step": 142220 }, { "epoch": 0.9357052163444143, "grad_norm": 0.006605316847917251, "learning_rate": 6.414187930128728e-06, "loss": 0.0012, "step": 142230 }, { "epoch": 0.9357710045196477, "grad_norm": 0.025997463982665496, "learning_rate": 6.413637252039506e-06, "loss": 0.0019, "step": 142240 }, { "epoch": 0.9358367926948811, "grad_norm": 0.10932798466412447, "learning_rate": 6.413086555312697e-06, "loss": 0.0022, "step": 142250 }, { "epoch": 0.9359025808701144, "grad_norm": 0.044973272276760734, "learning_rate": 6.4125358399555625e-06, "loss": 0.002, "step": 142260 }, { "epoch": 0.9359683690453477, "grad_norm": 0.0626492626011895, "learning_rate": 6.411985105975364e-06, "loss": 0.0013, "step": 142270 }, { "epoch": 0.9360341572205811, "grad_norm": 0.02650819010284124, "learning_rate": 6.411434353379361e-06, "loss": 0.0023, "step": 142280 }, { "epoch": 0.9360999453958145, "grad_norm": 0.045333000592422644, "learning_rate": 6.410883582174815e-06, "loss": 0.0013, "step": 142290 }, { "epoch": 0.9361657335710479, "grad_norm": 0.0781351312125969, "learning_rate": 6.410332792368988e-06, "loss": 0.0016, "step": 142300 }, { "epoch": 0.9362315217462813, "grad_norm": 0.02883373429309768, "learning_rate": 6.409781983969139e-06, "loss": 0.0013, "step": 142310 }, { "epoch": 0.9362973099215147, "grad_norm": 0.10532719653713513, "learning_rate": 6.409231156982534e-06, "loss": 0.0012, "step": 142320 }, { "epoch": 0.9363630980967481, "grad_norm": 0.10302743214285943, "learning_rate": 6.408680311416432e-06, "loss": 0.0011, "step": 142330 }, { "epoch": 0.9364288862719815, "grad_norm": 0.02355008946603955, "learning_rate": 6.408129447278097e-06, "loss": 0.0016, "step": 142340 }, { "epoch": 0.9364946744472149, "grad_norm": 0.021258973949442227, "learning_rate": 6.407578564574792e-06, "loss": 0.0019, "step": 142350 }, { "epoch": 0.9365604626224482, "grad_norm": 0.01774401860267347, "learning_rate": 6.407027663313781e-06, "loss": 0.001, "step": 142360 }, { "epoch": 0.9366262507976816, "grad_norm": 0.00528379124071494, "learning_rate": 6.406476743502323e-06, "loss": 0.0012, "step": 142370 }, { "epoch": 0.936692038972915, "grad_norm": 0.06983541973504664, "learning_rate": 6.405925805147684e-06, "loss": 0.0011, "step": 142380 }, { "epoch": 0.9367578271481484, "grad_norm": 0.06759828015139052, "learning_rate": 6.405374848257128e-06, "loss": 0.0013, "step": 142390 }, { "epoch": 0.9368236153233818, "grad_norm": 0.06925403928641405, "learning_rate": 6.404823872837918e-06, "loss": 0.0014, "step": 142400 }, { "epoch": 0.9368894034986152, "grad_norm": 0.0032516966754586704, "learning_rate": 6.4042728788973195e-06, "loss": 0.0016, "step": 142410 }, { "epoch": 0.9369551916738486, "grad_norm": 0.03759791155580963, "learning_rate": 6.403721866442596e-06, "loss": 0.0012, "step": 142420 }, { "epoch": 0.937020979849082, "grad_norm": 0.018303751260816504, "learning_rate": 6.403170835481011e-06, "loss": 0.0022, "step": 142430 }, { "epoch": 0.9370867680243153, "grad_norm": 0.07303988378507116, "learning_rate": 6.402619786019831e-06, "loss": 0.0015, "step": 142440 }, { "epoch": 0.9371525561995487, "grad_norm": 0.021128196762457536, "learning_rate": 6.402068718066319e-06, "loss": 0.0019, "step": 142450 }, { "epoch": 0.9372183443747821, "grad_norm": 0.06152208294098822, "learning_rate": 6.401517631627743e-06, "loss": 0.0006, "step": 142460 }, { "epoch": 0.9372841325500154, "grad_norm": 0.016063157210520495, "learning_rate": 6.400966526711367e-06, "loss": 0.0007, "step": 142470 }, { "epoch": 0.9373499207252488, "grad_norm": 0.013594966957863091, "learning_rate": 6.400415403324459e-06, "loss": 0.0006, "step": 142480 }, { "epoch": 0.9374157089004822, "grad_norm": 0.041699276234085904, "learning_rate": 6.399864261474282e-06, "loss": 0.0015, "step": 142490 }, { "epoch": 0.9374814970757156, "grad_norm": 0.009501023317522173, "learning_rate": 6.3993131011681046e-06, "loss": 0.0007, "step": 142500 }, { "epoch": 0.937547285250949, "grad_norm": 0.012135412969586071, "learning_rate": 6.3987619224131905e-06, "loss": 0.0005, "step": 142510 }, { "epoch": 0.9376130734261824, "grad_norm": 0.12709051519984874, "learning_rate": 6.39821072521681e-06, "loss": 0.0013, "step": 142520 }, { "epoch": 0.9376788616014158, "grad_norm": 0.06658140304717312, "learning_rate": 6.397659509586229e-06, "loss": 0.0017, "step": 142530 }, { "epoch": 0.9377446497766492, "grad_norm": 0.003771942317543448, "learning_rate": 6.397108275528714e-06, "loss": 0.0006, "step": 142540 }, { "epoch": 0.9378104379518826, "grad_norm": 0.052845144267837034, "learning_rate": 6.396557023051533e-06, "loss": 0.0012, "step": 142550 }, { "epoch": 0.937876226127116, "grad_norm": 0.03897512548796769, "learning_rate": 6.396005752161954e-06, "loss": 0.0003, "step": 142560 }, { "epoch": 0.9379420143023492, "grad_norm": 0.014525798312420762, "learning_rate": 6.395454462867243e-06, "loss": 0.0018, "step": 142570 }, { "epoch": 0.9380078024775826, "grad_norm": 0.08662780857630958, "learning_rate": 6.394903155174672e-06, "loss": 0.0011, "step": 142580 }, { "epoch": 0.938073590652816, "grad_norm": 0.009512851995378671, "learning_rate": 6.394351829091507e-06, "loss": 0.0012, "step": 142590 }, { "epoch": 0.9381393788280494, "grad_norm": 0.08735691376698078, "learning_rate": 6.393800484625019e-06, "loss": 0.0012, "step": 142600 }, { "epoch": 0.9382051670032828, "grad_norm": 0.0350738991021941, "learning_rate": 6.3932491217824745e-06, "loss": 0.0018, "step": 142610 }, { "epoch": 0.9382709551785162, "grad_norm": 0.018087108618405814, "learning_rate": 6.392697740571143e-06, "loss": 0.0014, "step": 142620 }, { "epoch": 0.9383367433537496, "grad_norm": 0.09325324410212375, "learning_rate": 6.392146340998295e-06, "loss": 0.0022, "step": 142630 }, { "epoch": 0.938402531528983, "grad_norm": 0.013902468090293899, "learning_rate": 6.3915949230711985e-06, "loss": 0.0007, "step": 142640 }, { "epoch": 0.9384683197042164, "grad_norm": 0.02423525699818554, "learning_rate": 6.391043486797126e-06, "loss": 0.0032, "step": 142650 }, { "epoch": 0.9385341078794498, "grad_norm": 0.047216631677905865, "learning_rate": 6.390492032183346e-06, "loss": 0.0012, "step": 142660 }, { "epoch": 0.9385998960546831, "grad_norm": 0.049923566102500955, "learning_rate": 6.389940559237131e-06, "loss": 0.0014, "step": 142670 }, { "epoch": 0.9386656842299165, "grad_norm": 0.07002447887789262, "learning_rate": 6.389389067965749e-06, "loss": 0.0007, "step": 142680 }, { "epoch": 0.9387314724051499, "grad_norm": 0.09482322596601354, "learning_rate": 6.388837558376473e-06, "loss": 0.0011, "step": 142690 }, { "epoch": 0.9387972605803833, "grad_norm": 0.0019084649294602988, "learning_rate": 6.38828603047657e-06, "loss": 0.0012, "step": 142700 }, { "epoch": 0.9388630487556167, "grad_norm": 0.019774734848630012, "learning_rate": 6.387734484273318e-06, "loss": 0.0007, "step": 142710 }, { "epoch": 0.93892883693085, "grad_norm": 0.02964832800721183, "learning_rate": 6.387182919773984e-06, "loss": 0.001, "step": 142720 }, { "epoch": 0.9389946251060834, "grad_norm": 0.010756578385602723, "learning_rate": 6.386631336985842e-06, "loss": 0.0014, "step": 142730 }, { "epoch": 0.9390604132813168, "grad_norm": 0.11344896404870014, "learning_rate": 6.386079735916164e-06, "loss": 0.001, "step": 142740 }, { "epoch": 0.9391262014565502, "grad_norm": 0.015490179622989146, "learning_rate": 6.385528116572221e-06, "loss": 0.001, "step": 142750 }, { "epoch": 0.9391919896317836, "grad_norm": 0.030458506478449734, "learning_rate": 6.384976478961287e-06, "loss": 0.0009, "step": 142760 }, { "epoch": 0.9392577778070169, "grad_norm": 0.03766003773092496, "learning_rate": 6.384424823090631e-06, "loss": 0.0011, "step": 142770 }, { "epoch": 0.9393235659822503, "grad_norm": 0.027344173365964473, "learning_rate": 6.383873148967533e-06, "loss": 0.0012, "step": 142780 }, { "epoch": 0.9393893541574837, "grad_norm": 0.043123189721165946, "learning_rate": 6.383321456599262e-06, "loss": 0.0017, "step": 142790 }, { "epoch": 0.9394551423327171, "grad_norm": 0.03083686506375144, "learning_rate": 6.382769745993092e-06, "loss": 0.0011, "step": 142800 }, { "epoch": 0.9395209305079505, "grad_norm": 0.00016876426494993548, "learning_rate": 6.382218017156296e-06, "loss": 0.0004, "step": 142810 }, { "epoch": 0.9395867186831839, "grad_norm": 0.12140227757680883, "learning_rate": 6.381666270096152e-06, "loss": 0.0009, "step": 142820 }, { "epoch": 0.9396525068584173, "grad_norm": 0.0022600327892521787, "learning_rate": 6.381114504819929e-06, "loss": 0.0019, "step": 142830 }, { "epoch": 0.9397182950336507, "grad_norm": 0.03412392296307245, "learning_rate": 6.380562721334904e-06, "loss": 0.0011, "step": 142840 }, { "epoch": 0.9397840832088841, "grad_norm": 0.07393893787479054, "learning_rate": 6.380010919648353e-06, "loss": 0.0012, "step": 142850 }, { "epoch": 0.9398498713841175, "grad_norm": 0.04503891690731778, "learning_rate": 6.3794590997675495e-06, "loss": 0.0007, "step": 142860 }, { "epoch": 0.9399156595593507, "grad_norm": 0.021443308084654947, "learning_rate": 6.3789072616997695e-06, "loss": 0.0008, "step": 142870 }, { "epoch": 0.9399814477345841, "grad_norm": 0.09027311433903755, "learning_rate": 6.378355405452288e-06, "loss": 0.0024, "step": 142880 }, { "epoch": 0.9400472359098175, "grad_norm": 0.04583021110733806, "learning_rate": 6.377803531032382e-06, "loss": 0.0007, "step": 142890 }, { "epoch": 0.9401130240850509, "grad_norm": 0.11497223408707964, "learning_rate": 6.377251638447324e-06, "loss": 0.0011, "step": 142900 }, { "epoch": 0.9401788122602843, "grad_norm": 0.02415842155899587, "learning_rate": 6.376699727704394e-06, "loss": 0.0011, "step": 142910 }, { "epoch": 0.9402446004355177, "grad_norm": 0.01241034045610539, "learning_rate": 6.376147798810867e-06, "loss": 0.0011, "step": 142920 }, { "epoch": 0.9403103886107511, "grad_norm": 0.11725576691903311, "learning_rate": 6.3755958517740194e-06, "loss": 0.0006, "step": 142930 }, { "epoch": 0.9403761767859845, "grad_norm": 0.051317434084490465, "learning_rate": 6.375043886601129e-06, "loss": 0.0009, "step": 142940 }, { "epoch": 0.9404419649612179, "grad_norm": 0.022091061088076222, "learning_rate": 6.374491903299472e-06, "loss": 0.0017, "step": 142950 }, { "epoch": 0.9405077531364513, "grad_norm": 0.07898424843985158, "learning_rate": 6.3739399018763275e-06, "loss": 0.0006, "step": 142960 }, { "epoch": 0.9405735413116847, "grad_norm": 0.0015758074691238104, "learning_rate": 6.373387882338971e-06, "loss": 0.001, "step": 142970 }, { "epoch": 0.940639329486918, "grad_norm": 0.07428834457000263, "learning_rate": 6.3728358446946824e-06, "loss": 0.0005, "step": 142980 }, { "epoch": 0.9407051176621514, "grad_norm": 0.05218682417070812, "learning_rate": 6.372283788950739e-06, "loss": 0.0011, "step": 142990 }, { "epoch": 0.9407709058373848, "grad_norm": 0.03832734287680931, "learning_rate": 6.3717317151144175e-06, "loss": 0.0014, "step": 143000 }, { "epoch": 0.9408366940126182, "grad_norm": 0.05914389050064746, "learning_rate": 6.371179623192998e-06, "loss": 0.0013, "step": 143010 }, { "epoch": 0.9409024821878516, "grad_norm": 0.058284659137120824, "learning_rate": 6.370627513193759e-06, "loss": 0.001, "step": 143020 }, { "epoch": 0.940968270363085, "grad_norm": 0.07477307902920997, "learning_rate": 6.370075385123981e-06, "loss": 0.002, "step": 143030 }, { "epoch": 0.9410340585383183, "grad_norm": 0.06617259011157861, "learning_rate": 6.369523238990943e-06, "loss": 0.0016, "step": 143040 }, { "epoch": 0.9410998467135517, "grad_norm": 0.03805259434361894, "learning_rate": 6.368971074801924e-06, "loss": 0.0013, "step": 143050 }, { "epoch": 0.9411656348887851, "grad_norm": 0.017098879600353483, "learning_rate": 6.368418892564203e-06, "loss": 0.0013, "step": 143060 }, { "epoch": 0.9412314230640185, "grad_norm": 0.031048214558333467, "learning_rate": 6.367866692285061e-06, "loss": 0.0011, "step": 143070 }, { "epoch": 0.9412972112392518, "grad_norm": 0.086783198627093, "learning_rate": 6.367314473971777e-06, "loss": 0.0012, "step": 143080 }, { "epoch": 0.9413629994144852, "grad_norm": 0.021770598421803466, "learning_rate": 6.366762237631633e-06, "loss": 0.0008, "step": 143090 }, { "epoch": 0.9414287875897186, "grad_norm": 0.023844506824758416, "learning_rate": 6.366209983271909e-06, "loss": 0.0012, "step": 143100 }, { "epoch": 0.941494575764952, "grad_norm": 0.017632032206752198, "learning_rate": 6.365657710899887e-06, "loss": 0.0004, "step": 143110 }, { "epoch": 0.9415603639401854, "grad_norm": 0.00905044793187806, "learning_rate": 6.365105420522848e-06, "loss": 0.0009, "step": 143120 }, { "epoch": 0.9416261521154188, "grad_norm": 0.023511543644602055, "learning_rate": 6.364553112148073e-06, "loss": 0.0017, "step": 143130 }, { "epoch": 0.9416919402906522, "grad_norm": 0.051650283708271454, "learning_rate": 6.364000785782842e-06, "loss": 0.0015, "step": 143140 }, { "epoch": 0.9417577284658856, "grad_norm": 0.013125884201675665, "learning_rate": 6.36344844143444e-06, "loss": 0.0006, "step": 143150 }, { "epoch": 0.941823516641119, "grad_norm": 0.0697228692360951, "learning_rate": 6.362896079110148e-06, "loss": 0.0012, "step": 143160 }, { "epoch": 0.9418893048163524, "grad_norm": 0.01602233156765148, "learning_rate": 6.362343698817248e-06, "loss": 0.0015, "step": 143170 }, { "epoch": 0.9419550929915856, "grad_norm": 0.22428849465781, "learning_rate": 6.361791300563023e-06, "loss": 0.0017, "step": 143180 }, { "epoch": 0.942020881166819, "grad_norm": 0.06916070837412996, "learning_rate": 6.361238884354755e-06, "loss": 0.0011, "step": 143190 }, { "epoch": 0.9420866693420524, "grad_norm": 0.021146175019795085, "learning_rate": 6.360686450199729e-06, "loss": 0.0007, "step": 143200 }, { "epoch": 0.9421524575172858, "grad_norm": 0.018140296126826154, "learning_rate": 6.360133998105227e-06, "loss": 0.0039, "step": 143210 }, { "epoch": 0.9422182456925192, "grad_norm": 0.08837963610873216, "learning_rate": 6.359581528078533e-06, "loss": 0.0006, "step": 143220 }, { "epoch": 0.9422840338677526, "grad_norm": 0.03199057174080207, "learning_rate": 6.3590290401269295e-06, "loss": 0.0004, "step": 143230 }, { "epoch": 0.942349822042986, "grad_norm": 0.012002816601998567, "learning_rate": 6.358476534257704e-06, "loss": 0.0016, "step": 143240 }, { "epoch": 0.9424156102182194, "grad_norm": 0.03642421533958007, "learning_rate": 6.3579240104781385e-06, "loss": 0.0011, "step": 143250 }, { "epoch": 0.9424813983934528, "grad_norm": 0.03862433500819765, "learning_rate": 6.357371468795515e-06, "loss": 0.0042, "step": 143260 }, { "epoch": 0.9425471865686862, "grad_norm": 0.041878571584185786, "learning_rate": 6.356818909217124e-06, "loss": 0.0009, "step": 143270 }, { "epoch": 0.9426129747439195, "grad_norm": 0.13833384562498435, "learning_rate": 6.356266331750245e-06, "loss": 0.0023, "step": 143280 }, { "epoch": 0.9426787629191529, "grad_norm": 0.08285300535240991, "learning_rate": 6.355713736402167e-06, "loss": 0.0014, "step": 143290 }, { "epoch": 0.9427445510943863, "grad_norm": 0.1131427771539596, "learning_rate": 6.355161123180176e-06, "loss": 0.001, "step": 143300 }, { "epoch": 0.9428103392696197, "grad_norm": 0.051204909633891536, "learning_rate": 6.354608492091554e-06, "loss": 0.0006, "step": 143310 }, { "epoch": 0.942876127444853, "grad_norm": 0.2541234731203703, "learning_rate": 6.35405584314359e-06, "loss": 0.0019, "step": 143320 }, { "epoch": 0.9429419156200864, "grad_norm": 0.010138986158826125, "learning_rate": 6.3535031763435675e-06, "loss": 0.0009, "step": 143330 }, { "epoch": 0.9430077037953198, "grad_norm": 0.017375174570882827, "learning_rate": 6.352950491698775e-06, "loss": 0.0017, "step": 143340 }, { "epoch": 0.9430734919705532, "grad_norm": 0.008953442803919293, "learning_rate": 6.352397789216499e-06, "loss": 0.0008, "step": 143350 }, { "epoch": 0.9431392801457866, "grad_norm": 0.03808706767549846, "learning_rate": 6.351845068904027e-06, "loss": 0.0047, "step": 143360 }, { "epoch": 0.94320506832102, "grad_norm": 0.022881779215196966, "learning_rate": 6.351292330768645e-06, "loss": 0.0023, "step": 143370 }, { "epoch": 0.9432708564962534, "grad_norm": 0.023483157353706003, "learning_rate": 6.350739574817641e-06, "loss": 0.001, "step": 143380 }, { "epoch": 0.9433366446714867, "grad_norm": 0.1634480357565297, "learning_rate": 6.350186801058301e-06, "loss": 0.0011, "step": 143390 }, { "epoch": 0.9434024328467201, "grad_norm": 0.08821985688559608, "learning_rate": 6.349634009497915e-06, "loss": 0.0011, "step": 143400 }, { "epoch": 0.9434682210219535, "grad_norm": 0.04911397806018959, "learning_rate": 6.34908120014377e-06, "loss": 0.002, "step": 143410 }, { "epoch": 0.9435340091971869, "grad_norm": 0.09163669474306106, "learning_rate": 6.348528373003153e-06, "loss": 0.0008, "step": 143420 }, { "epoch": 0.9435997973724203, "grad_norm": 0.058924404195601104, "learning_rate": 6.347975528083357e-06, "loss": 0.0013, "step": 143430 }, { "epoch": 0.9436655855476537, "grad_norm": 0.0006423619722405583, "learning_rate": 6.347422665391666e-06, "loss": 0.0003, "step": 143440 }, { "epoch": 0.9437313737228871, "grad_norm": 0.025570664618992586, "learning_rate": 6.346869784935371e-06, "loss": 0.0008, "step": 143450 }, { "epoch": 0.9437971618981205, "grad_norm": 0.059888119769088796, "learning_rate": 6.346316886721762e-06, "loss": 0.001, "step": 143460 }, { "epoch": 0.9438629500733539, "grad_norm": 0.05055534272978929, "learning_rate": 6.3457639707581265e-06, "loss": 0.0009, "step": 143470 }, { "epoch": 0.9439287382485873, "grad_norm": 0.03163158002790047, "learning_rate": 6.345211037051755e-06, "loss": 0.0008, "step": 143480 }, { "epoch": 0.9439945264238205, "grad_norm": 0.0062736719397838295, "learning_rate": 6.344658085609939e-06, "loss": 0.0018, "step": 143490 }, { "epoch": 0.9440603145990539, "grad_norm": 0.08413814686970288, "learning_rate": 6.344105116439968e-06, "loss": 0.0009, "step": 143500 }, { "epoch": 0.9441261027742873, "grad_norm": 0.017193911310834968, "learning_rate": 6.343552129549131e-06, "loss": 0.0012, "step": 143510 }, { "epoch": 0.9441918909495207, "grad_norm": 0.18191423699034862, "learning_rate": 6.3429991249447194e-06, "loss": 0.0013, "step": 143520 }, { "epoch": 0.9442576791247541, "grad_norm": 0.08110488901107923, "learning_rate": 6.342446102634025e-06, "loss": 0.001, "step": 143530 }, { "epoch": 0.9443234672999875, "grad_norm": 0.14368533044797735, "learning_rate": 6.341893062624336e-06, "loss": 0.0018, "step": 143540 }, { "epoch": 0.9443892554752209, "grad_norm": 0.006006887868134974, "learning_rate": 6.341340004922948e-06, "loss": 0.0013, "step": 143550 }, { "epoch": 0.9444550436504543, "grad_norm": 0.08695376668078043, "learning_rate": 6.340786929537149e-06, "loss": 0.0007, "step": 143560 }, { "epoch": 0.9445208318256877, "grad_norm": 0.5730699023158583, "learning_rate": 6.3402338364742345e-06, "loss": 0.0012, "step": 143570 }, { "epoch": 0.9445866200009211, "grad_norm": 0.03370935958436434, "learning_rate": 6.3396807257414925e-06, "loss": 0.0018, "step": 143580 }, { "epoch": 0.9446524081761544, "grad_norm": 0.07011515731173476, "learning_rate": 6.339127597346219e-06, "loss": 0.0029, "step": 143590 }, { "epoch": 0.9447181963513878, "grad_norm": 0.011321937202355829, "learning_rate": 6.338574451295701e-06, "loss": 0.0011, "step": 143600 }, { "epoch": 0.9447839845266212, "grad_norm": 0.03462901196797634, "learning_rate": 6.338021287597238e-06, "loss": 0.0011, "step": 143610 }, { "epoch": 0.9448497727018546, "grad_norm": 0.21250393245274982, "learning_rate": 6.33746810625812e-06, "loss": 0.0012, "step": 143620 }, { "epoch": 0.944915560877088, "grad_norm": 0.049812577691357725, "learning_rate": 6.336914907285638e-06, "loss": 0.0036, "step": 143630 }, { "epoch": 0.9449813490523213, "grad_norm": 0.0319913071342855, "learning_rate": 6.336361690687089e-06, "loss": 0.0019, "step": 143640 }, { "epoch": 0.9450471372275547, "grad_norm": 0.02172707076586284, "learning_rate": 6.335808456469767e-06, "loss": 0.0012, "step": 143650 }, { "epoch": 0.9451129254027881, "grad_norm": 0.038269474178749194, "learning_rate": 6.335255204640962e-06, "loss": 0.0008, "step": 143660 }, { "epoch": 0.9451787135780215, "grad_norm": 0.014233443984823941, "learning_rate": 6.33470193520797e-06, "loss": 0.001, "step": 143670 }, { "epoch": 0.9452445017532549, "grad_norm": 0.02362244207275356, "learning_rate": 6.3341486481780865e-06, "loss": 0.0009, "step": 143680 }, { "epoch": 0.9453102899284882, "grad_norm": 0.2025475267311974, "learning_rate": 6.333595343558605e-06, "loss": 0.0007, "step": 143690 }, { "epoch": 0.9453760781037216, "grad_norm": 0.0020931708400961572, "learning_rate": 6.333042021356821e-06, "loss": 0.0007, "step": 143700 }, { "epoch": 0.945441866278955, "grad_norm": 0.0013398836319541728, "learning_rate": 6.33248868158003e-06, "loss": 0.002, "step": 143710 }, { "epoch": 0.9455076544541884, "grad_norm": 0.10051522061053755, "learning_rate": 6.331935324235525e-06, "loss": 0.0007, "step": 143720 }, { "epoch": 0.9455734426294218, "grad_norm": 0.0041512859543739065, "learning_rate": 6.331381949330604e-06, "loss": 0.0006, "step": 143730 }, { "epoch": 0.9456392308046552, "grad_norm": 0.050924025941393905, "learning_rate": 6.330828556872561e-06, "loss": 0.0021, "step": 143740 }, { "epoch": 0.9457050189798886, "grad_norm": 0.0153287604358072, "learning_rate": 6.330275146868694e-06, "loss": 0.0011, "step": 143750 }, { "epoch": 0.945770807155122, "grad_norm": 0.03796886652588266, "learning_rate": 6.3297217193262985e-06, "loss": 0.0012, "step": 143760 }, { "epoch": 0.9458365953303554, "grad_norm": 0.04869839433307397, "learning_rate": 6.329168274252669e-06, "loss": 0.0007, "step": 143770 }, { "epoch": 0.9459023835055888, "grad_norm": 0.06411844125886022, "learning_rate": 6.328614811655105e-06, "loss": 0.0017, "step": 143780 }, { "epoch": 0.945968171680822, "grad_norm": 0.09608742104846875, "learning_rate": 6.328061331540901e-06, "loss": 0.0011, "step": 143790 }, { "epoch": 0.9460339598560554, "grad_norm": 0.06305481519669066, "learning_rate": 6.327507833917356e-06, "loss": 0.0009, "step": 143800 }, { "epoch": 0.9460997480312888, "grad_norm": 0.058318828917507406, "learning_rate": 6.326954318791766e-06, "loss": 0.0009, "step": 143810 }, { "epoch": 0.9461655362065222, "grad_norm": 0.05603873165103164, "learning_rate": 6.32640078617143e-06, "loss": 0.0006, "step": 143820 }, { "epoch": 0.9462313243817556, "grad_norm": 0.018945511584684566, "learning_rate": 6.3258472360636455e-06, "loss": 0.0016, "step": 143830 }, { "epoch": 0.946297112556989, "grad_norm": 0.07829051406092243, "learning_rate": 6.32529366847571e-06, "loss": 0.0014, "step": 143840 }, { "epoch": 0.9463629007322224, "grad_norm": 0.07457565465917494, "learning_rate": 6.324740083414922e-06, "loss": 0.0015, "step": 143850 }, { "epoch": 0.9464286889074558, "grad_norm": 0.1414624175135228, "learning_rate": 6.324186480888579e-06, "loss": 0.0008, "step": 143860 }, { "epoch": 0.9464944770826892, "grad_norm": 0.021198344401014778, "learning_rate": 6.323632860903983e-06, "loss": 0.0076, "step": 143870 }, { "epoch": 0.9465602652579226, "grad_norm": 0.0695085896643636, "learning_rate": 6.323079223468429e-06, "loss": 0.0013, "step": 143880 }, { "epoch": 0.946626053433156, "grad_norm": 0.023774470844422798, "learning_rate": 6.3225255685892194e-06, "loss": 0.0033, "step": 143890 }, { "epoch": 0.9466918416083893, "grad_norm": 0.04409121626220544, "learning_rate": 6.321971896273653e-06, "loss": 0.0019, "step": 143900 }, { "epoch": 0.9467576297836227, "grad_norm": 0.44727600445213034, "learning_rate": 6.3214182065290275e-06, "loss": 0.0011, "step": 143910 }, { "epoch": 0.946823417958856, "grad_norm": 0.046633502652924014, "learning_rate": 6.3208644993626444e-06, "loss": 0.0012, "step": 143920 }, { "epoch": 0.9468892061340894, "grad_norm": 0.08757447180933046, "learning_rate": 6.320310774781805e-06, "loss": 0.0007, "step": 143930 }, { "epoch": 0.9469549943093228, "grad_norm": 0.038360366229592664, "learning_rate": 6.319757032793807e-06, "loss": 0.001, "step": 143940 }, { "epoch": 0.9470207824845562, "grad_norm": 0.010453935117950147, "learning_rate": 6.319203273405953e-06, "loss": 0.001, "step": 143950 }, { "epoch": 0.9470865706597896, "grad_norm": 0.04065214136155624, "learning_rate": 6.318649496625542e-06, "loss": 0.001, "step": 143960 }, { "epoch": 0.947152358835023, "grad_norm": 0.004515497225734441, "learning_rate": 6.3180957024598775e-06, "loss": 0.0004, "step": 143970 }, { "epoch": 0.9472181470102564, "grad_norm": 0.02114442258620374, "learning_rate": 6.317541890916258e-06, "loss": 0.0012, "step": 143980 }, { "epoch": 0.9472839351854898, "grad_norm": 0.0849080011469677, "learning_rate": 6.316988062001987e-06, "loss": 0.0009, "step": 143990 }, { "epoch": 0.9473497233607231, "grad_norm": 0.09642160859371757, "learning_rate": 6.316434215724368e-06, "loss": 0.0012, "step": 144000 }, { "epoch": 0.9474155115359565, "grad_norm": 0.015261916762784974, "learning_rate": 6.3158803520906984e-06, "loss": 0.0006, "step": 144010 }, { "epoch": 0.9474812997111899, "grad_norm": 0.01176281410565062, "learning_rate": 6.315326471108284e-06, "loss": 0.001, "step": 144020 }, { "epoch": 0.9475470878864233, "grad_norm": 0.04878232666742353, "learning_rate": 6.3147725727844255e-06, "loss": 0.0005, "step": 144030 }, { "epoch": 0.9476128760616567, "grad_norm": 0.04298123737081329, "learning_rate": 6.3142186571264254e-06, "loss": 0.0009, "step": 144040 }, { "epoch": 0.9476786642368901, "grad_norm": 0.0004982178243122181, "learning_rate": 6.313664724141588e-06, "loss": 0.0017, "step": 144050 }, { "epoch": 0.9477444524121235, "grad_norm": 0.11517185472180735, "learning_rate": 6.3131107738372164e-06, "loss": 0.0013, "step": 144060 }, { "epoch": 0.9478102405873569, "grad_norm": 0.0236506859324519, "learning_rate": 6.312556806220614e-06, "loss": 0.0009, "step": 144070 }, { "epoch": 0.9478760287625903, "grad_norm": 0.03215809739178613, "learning_rate": 6.312002821299082e-06, "loss": 0.0026, "step": 144080 }, { "epoch": 0.9479418169378236, "grad_norm": 0.0640819589105888, "learning_rate": 6.311448819079927e-06, "loss": 0.0007, "step": 144090 }, { "epoch": 0.9480076051130569, "grad_norm": 0.020319150144794663, "learning_rate": 6.310894799570452e-06, "loss": 0.0009, "step": 144100 }, { "epoch": 0.9480733932882903, "grad_norm": 0.014260809503165033, "learning_rate": 6.31034076277796e-06, "loss": 0.001, "step": 144110 }, { "epoch": 0.9481391814635237, "grad_norm": 0.00795354034459764, "learning_rate": 6.3097867087097575e-06, "loss": 0.0017, "step": 144120 }, { "epoch": 0.9482049696387571, "grad_norm": 0.0010471225785757994, "learning_rate": 6.309232637373148e-06, "loss": 0.0006, "step": 144130 }, { "epoch": 0.9482707578139905, "grad_norm": 0.06827122185265333, "learning_rate": 6.3086785487754374e-06, "loss": 0.001, "step": 144140 }, { "epoch": 0.9483365459892239, "grad_norm": 0.30029740422895523, "learning_rate": 6.308124442923931e-06, "loss": 0.0009, "step": 144150 }, { "epoch": 0.9484023341644573, "grad_norm": 0.058651737305156895, "learning_rate": 6.307570319825933e-06, "loss": 0.0007, "step": 144160 }, { "epoch": 0.9484681223396907, "grad_norm": 0.027640705416642705, "learning_rate": 6.3070161794887484e-06, "loss": 0.0008, "step": 144170 }, { "epoch": 0.9485339105149241, "grad_norm": 0.05186534545715869, "learning_rate": 6.306462021919686e-06, "loss": 0.0009, "step": 144180 }, { "epoch": 0.9485996986901575, "grad_norm": 0.01804854089477178, "learning_rate": 6.305907847126049e-06, "loss": 0.0008, "step": 144190 }, { "epoch": 0.9486654868653908, "grad_norm": 0.049120323856273615, "learning_rate": 6.3053536551151464e-06, "loss": 0.0013, "step": 144200 }, { "epoch": 0.9487312750406242, "grad_norm": 0.03885490798738463, "learning_rate": 6.304799445894283e-06, "loss": 0.0011, "step": 144210 }, { "epoch": 0.9487970632158576, "grad_norm": 0.047512163570359256, "learning_rate": 6.304245219470765e-06, "loss": 0.0015, "step": 144220 }, { "epoch": 0.948862851391091, "grad_norm": 0.008192083612461862, "learning_rate": 6.3036909758519e-06, "loss": 0.0011, "step": 144230 }, { "epoch": 0.9489286395663243, "grad_norm": 0.1303577780691975, "learning_rate": 6.303136715044995e-06, "loss": 0.0015, "step": 144240 }, { "epoch": 0.9489944277415577, "grad_norm": 0.04492328258674846, "learning_rate": 6.302582437057359e-06, "loss": 0.0019, "step": 144250 }, { "epoch": 0.9490602159167911, "grad_norm": 0.0746741645522733, "learning_rate": 6.302028141896299e-06, "loss": 0.0006, "step": 144260 }, { "epoch": 0.9491260040920245, "grad_norm": 0.024334393748735885, "learning_rate": 6.301473829569121e-06, "loss": 0.002, "step": 144270 }, { "epoch": 0.9491917922672579, "grad_norm": 0.004250312167967404, "learning_rate": 6.3009195000831355e-06, "loss": 0.0018, "step": 144280 }, { "epoch": 0.9492575804424913, "grad_norm": 0.05713488882940247, "learning_rate": 6.30036515344565e-06, "loss": 0.0014, "step": 144290 }, { "epoch": 0.9493233686177247, "grad_norm": 0.051351005138166435, "learning_rate": 6.299810789663972e-06, "loss": 0.001, "step": 144300 }, { "epoch": 0.949389156792958, "grad_norm": 0.0012981443915214023, "learning_rate": 6.299256408745412e-06, "loss": 0.0003, "step": 144310 }, { "epoch": 0.9494549449681914, "grad_norm": 0.048353702048370396, "learning_rate": 6.298702010697278e-06, "loss": 0.0014, "step": 144320 }, { "epoch": 0.9495207331434248, "grad_norm": 0.04246491981383534, "learning_rate": 6.29814759552688e-06, "loss": 0.0011, "step": 144330 }, { "epoch": 0.9495865213186582, "grad_norm": 0.05495586787939114, "learning_rate": 6.2975931632415276e-06, "loss": 0.0012, "step": 144340 }, { "epoch": 0.9496523094938916, "grad_norm": 0.051962865845501, "learning_rate": 6.29703871384853e-06, "loss": 0.0013, "step": 144350 }, { "epoch": 0.949718097669125, "grad_norm": 0.12033743493653111, "learning_rate": 6.296484247355196e-06, "loss": 0.0015, "step": 144360 }, { "epoch": 0.9497838858443584, "grad_norm": 0.004384070343705738, "learning_rate": 6.295929763768838e-06, "loss": 0.0007, "step": 144370 }, { "epoch": 0.9498496740195917, "grad_norm": 0.004230795704081912, "learning_rate": 6.295375263096765e-06, "loss": 0.0006, "step": 144380 }, { "epoch": 0.9499154621948251, "grad_norm": 0.03437368267914061, "learning_rate": 6.294820745346287e-06, "loss": 0.0011, "step": 144390 }, { "epoch": 0.9499812503700585, "grad_norm": 0.08677489455691587, "learning_rate": 6.2942662105247175e-06, "loss": 0.0015, "step": 144400 }, { "epoch": 0.9500470385452918, "grad_norm": 0.047389341927826276, "learning_rate": 6.293711658639365e-06, "loss": 0.001, "step": 144410 }, { "epoch": 0.9501128267205252, "grad_norm": 0.027507409698685814, "learning_rate": 6.293157089697542e-06, "loss": 0.0011, "step": 144420 }, { "epoch": 0.9501786148957586, "grad_norm": 0.06087036957520832, "learning_rate": 6.292602503706558e-06, "loss": 0.001, "step": 144430 }, { "epoch": 0.950244403070992, "grad_norm": 0.06130566850584314, "learning_rate": 6.292047900673728e-06, "loss": 0.0006, "step": 144440 }, { "epoch": 0.9503101912462254, "grad_norm": 0.13673499687195734, "learning_rate": 6.291493280606361e-06, "loss": 0.0007, "step": 144450 }, { "epoch": 0.9503759794214588, "grad_norm": 0.006314053902069462, "learning_rate": 6.290938643511771e-06, "loss": 0.002, "step": 144460 }, { "epoch": 0.9504417675966922, "grad_norm": 0.04089570269955361, "learning_rate": 6.2903839893972705e-06, "loss": 0.001, "step": 144470 }, { "epoch": 0.9505075557719256, "grad_norm": 0.07326620778256018, "learning_rate": 6.289829318270172e-06, "loss": 0.0019, "step": 144480 }, { "epoch": 0.950573343947159, "grad_norm": 0.042961372033181705, "learning_rate": 6.2892746301377874e-06, "loss": 0.0009, "step": 144490 }, { "epoch": 0.9506391321223924, "grad_norm": 0.04202565356954392, "learning_rate": 6.288719925007429e-06, "loss": 0.0012, "step": 144500 }, { "epoch": 0.9507049202976257, "grad_norm": 0.010438608882869122, "learning_rate": 6.288165202886412e-06, "loss": 0.0011, "step": 144510 }, { "epoch": 0.950770708472859, "grad_norm": 0.07657372900142237, "learning_rate": 6.28761046378205e-06, "loss": 0.0011, "step": 144520 }, { "epoch": 0.9508364966480924, "grad_norm": 0.135606597061676, "learning_rate": 6.287055707701657e-06, "loss": 0.0012, "step": 144530 }, { "epoch": 0.9509022848233258, "grad_norm": 0.01971036591113155, "learning_rate": 6.286500934652544e-06, "loss": 0.0011, "step": 144540 }, { "epoch": 0.9509680729985592, "grad_norm": 0.0018836145385029545, "learning_rate": 6.285946144642029e-06, "loss": 0.0015, "step": 144550 }, { "epoch": 0.9510338611737926, "grad_norm": 0.014083871851565772, "learning_rate": 6.285391337677423e-06, "loss": 0.0008, "step": 144560 }, { "epoch": 0.951099649349026, "grad_norm": 0.03131766022826059, "learning_rate": 6.284836513766044e-06, "loss": 0.0007, "step": 144570 }, { "epoch": 0.9511654375242594, "grad_norm": 0.07861197491701499, "learning_rate": 6.284281672915204e-06, "loss": 0.0013, "step": 144580 }, { "epoch": 0.9512312256994928, "grad_norm": 0.0029884722977618953, "learning_rate": 6.28372681513222e-06, "loss": 0.0011, "step": 144590 }, { "epoch": 0.9512970138747262, "grad_norm": 0.044591969293638255, "learning_rate": 6.283171940424406e-06, "loss": 0.0008, "step": 144600 }, { "epoch": 0.9513628020499595, "grad_norm": 0.018442243595503303, "learning_rate": 6.28261704879908e-06, "loss": 0.0011, "step": 144610 }, { "epoch": 0.9514285902251929, "grad_norm": 0.04921628205957075, "learning_rate": 6.282062140263554e-06, "loss": 0.0009, "step": 144620 }, { "epoch": 0.9514943784004263, "grad_norm": 0.012804106662520048, "learning_rate": 6.281507214825146e-06, "loss": 0.0006, "step": 144630 }, { "epoch": 0.9515601665756597, "grad_norm": 0.07634133008243521, "learning_rate": 6.280952272491172e-06, "loss": 0.0011, "step": 144640 }, { "epoch": 0.9516259547508931, "grad_norm": 0.08067839376922864, "learning_rate": 6.28039731326895e-06, "loss": 0.0009, "step": 144650 }, { "epoch": 0.9516917429261265, "grad_norm": 0.0394474460232599, "learning_rate": 6.279842337165795e-06, "loss": 0.0009, "step": 144660 }, { "epoch": 0.9517575311013599, "grad_norm": 0.005585404353656245, "learning_rate": 6.279287344189023e-06, "loss": 0.0009, "step": 144670 }, { "epoch": 0.9518233192765932, "grad_norm": 0.03628035664291903, "learning_rate": 6.278732334345954e-06, "loss": 0.0011, "step": 144680 }, { "epoch": 0.9518891074518266, "grad_norm": 0.06456917111169162, "learning_rate": 6.278177307643901e-06, "loss": 0.0009, "step": 144690 }, { "epoch": 0.95195489562706, "grad_norm": 0.009707722064183555, "learning_rate": 6.277622264090186e-06, "loss": 0.0011, "step": 144700 }, { "epoch": 0.9520206838022933, "grad_norm": 0.006041919307762464, "learning_rate": 6.277067203692124e-06, "loss": 0.0012, "step": 144710 }, { "epoch": 0.9520864719775267, "grad_norm": 0.023275404926422324, "learning_rate": 6.276512126457035e-06, "loss": 0.0005, "step": 144720 }, { "epoch": 0.9521522601527601, "grad_norm": 0.05994379401235853, "learning_rate": 6.275957032392234e-06, "loss": 0.0006, "step": 144730 }, { "epoch": 0.9522180483279935, "grad_norm": 0.003979291147227012, "learning_rate": 6.275401921505044e-06, "loss": 0.0038, "step": 144740 }, { "epoch": 0.9522838365032269, "grad_norm": 0.07231292661320024, "learning_rate": 6.27484679380278e-06, "loss": 0.0023, "step": 144750 }, { "epoch": 0.9523496246784603, "grad_norm": 0.2286784914104799, "learning_rate": 6.274291649292762e-06, "loss": 0.0015, "step": 144760 }, { "epoch": 0.9524154128536937, "grad_norm": 0.0148341524422537, "learning_rate": 6.27373648798231e-06, "loss": 0.001, "step": 144770 }, { "epoch": 0.9524812010289271, "grad_norm": 0.008008353748988407, "learning_rate": 6.273181309878741e-06, "loss": 0.0016, "step": 144780 }, { "epoch": 0.9525469892041605, "grad_norm": 0.13878086270482862, "learning_rate": 6.272626114989377e-06, "loss": 0.0024, "step": 144790 }, { "epoch": 0.9526127773793939, "grad_norm": 0.07685776525110544, "learning_rate": 6.272070903321537e-06, "loss": 0.001, "step": 144800 }, { "epoch": 0.9526785655546273, "grad_norm": 0.004459651504934727, "learning_rate": 6.2715156748825405e-06, "loss": 0.0012, "step": 144810 }, { "epoch": 0.9527443537298605, "grad_norm": 0.011199593609794252, "learning_rate": 6.270960429679707e-06, "loss": 0.0009, "step": 144820 }, { "epoch": 0.9528101419050939, "grad_norm": 0.07828389190458698, "learning_rate": 6.270405167720358e-06, "loss": 0.0009, "step": 144830 }, { "epoch": 0.9528759300803273, "grad_norm": 0.09900085643911778, "learning_rate": 6.269849889011815e-06, "loss": 0.0019, "step": 144840 }, { "epoch": 0.9529417182555607, "grad_norm": 0.023457971773373113, "learning_rate": 6.269294593561398e-06, "loss": 0.0007, "step": 144850 }, { "epoch": 0.9530075064307941, "grad_norm": 0.04149997090881856, "learning_rate": 6.268739281376428e-06, "loss": 0.0019, "step": 144860 }, { "epoch": 0.9530732946060275, "grad_norm": 0.039361505713995895, "learning_rate": 6.268183952464226e-06, "loss": 0.001, "step": 144870 }, { "epoch": 0.9531390827812609, "grad_norm": 0.07777225899841626, "learning_rate": 6.267628606832115e-06, "loss": 0.0011, "step": 144880 }, { "epoch": 0.9532048709564943, "grad_norm": 0.0743855406979396, "learning_rate": 6.267073244487414e-06, "loss": 0.0016, "step": 144890 }, { "epoch": 0.9532706591317277, "grad_norm": 0.013034078128702812, "learning_rate": 6.266517865437447e-06, "loss": 0.0004, "step": 144900 }, { "epoch": 0.9533364473069611, "grad_norm": 0.5272427564817105, "learning_rate": 6.265962469689536e-06, "loss": 0.0012, "step": 144910 }, { "epoch": 0.9534022354821944, "grad_norm": 0.031512373737472225, "learning_rate": 6.265407057251003e-06, "loss": 0.0018, "step": 144920 }, { "epoch": 0.9534680236574278, "grad_norm": 0.032522641427512614, "learning_rate": 6.264851628129172e-06, "loss": 0.0015, "step": 144930 }, { "epoch": 0.9535338118326612, "grad_norm": 0.2917856106676169, "learning_rate": 6.264296182331362e-06, "loss": 0.0023, "step": 144940 }, { "epoch": 0.9535996000078946, "grad_norm": 0.053670127702086276, "learning_rate": 6.2637407198649e-06, "loss": 0.0007, "step": 144950 }, { "epoch": 0.953665388183128, "grad_norm": 0.11584329412086172, "learning_rate": 6.263185240737109e-06, "loss": 0.0012, "step": 144960 }, { "epoch": 0.9537311763583614, "grad_norm": 0.06927008604364367, "learning_rate": 6.262629744955311e-06, "loss": 0.0011, "step": 144970 }, { "epoch": 0.9537969645335947, "grad_norm": 0.032145559170562846, "learning_rate": 6.26207423252683e-06, "loss": 0.0012, "step": 144980 }, { "epoch": 0.9538627527088281, "grad_norm": 0.04274638044623103, "learning_rate": 6.261518703458991e-06, "loss": 0.0014, "step": 144990 }, { "epoch": 0.9539285408840615, "grad_norm": 0.04253304758102245, "learning_rate": 6.260963157759116e-06, "loss": 0.0012, "step": 145000 }, { "epoch": 0.9539943290592949, "grad_norm": 0.15632040864519373, "learning_rate": 6.260407595434532e-06, "loss": 0.0005, "step": 145010 }, { "epoch": 0.9540601172345282, "grad_norm": 0.07142636033238084, "learning_rate": 6.259852016492563e-06, "loss": 0.0014, "step": 145020 }, { "epoch": 0.9541259054097616, "grad_norm": 0.04526441132413858, "learning_rate": 6.2592964209405315e-06, "loss": 0.001, "step": 145030 }, { "epoch": 0.954191693584995, "grad_norm": 0.09245516090942307, "learning_rate": 6.258740808785766e-06, "loss": 0.0012, "step": 145040 }, { "epoch": 0.9542574817602284, "grad_norm": 0.02900744983116513, "learning_rate": 6.258185180035589e-06, "loss": 0.0006, "step": 145050 }, { "epoch": 0.9543232699354618, "grad_norm": 0.08709729459941572, "learning_rate": 6.257629534697327e-06, "loss": 0.0012, "step": 145060 }, { "epoch": 0.9543890581106952, "grad_norm": 0.0024121970725471177, "learning_rate": 6.257073872778305e-06, "loss": 0.0005, "step": 145070 }, { "epoch": 0.9544548462859286, "grad_norm": 0.026104955128980298, "learning_rate": 6.2565181942858515e-06, "loss": 0.0007, "step": 145080 }, { "epoch": 0.954520634461162, "grad_norm": 0.001438409420844108, "learning_rate": 6.25596249922729e-06, "loss": 0.0014, "step": 145090 }, { "epoch": 0.9545864226363954, "grad_norm": 0.11805243036010668, "learning_rate": 6.255406787609948e-06, "loss": 0.0009, "step": 145100 }, { "epoch": 0.9546522108116288, "grad_norm": 0.034824118212059864, "learning_rate": 6.2548510594411505e-06, "loss": 0.0011, "step": 145110 }, { "epoch": 0.954717998986862, "grad_norm": 0.006704049820736728, "learning_rate": 6.254295314728227e-06, "loss": 0.0005, "step": 145120 }, { "epoch": 0.9547837871620954, "grad_norm": 0.06300214447002282, "learning_rate": 6.253739553478503e-06, "loss": 0.0014, "step": 145130 }, { "epoch": 0.9548495753373288, "grad_norm": 0.010917474165020017, "learning_rate": 6.253183775699304e-06, "loss": 0.0014, "step": 145140 }, { "epoch": 0.9549153635125622, "grad_norm": 0.009123693178617543, "learning_rate": 6.2526279813979605e-06, "loss": 0.0012, "step": 145150 }, { "epoch": 0.9549811516877956, "grad_norm": 0.12290940554670832, "learning_rate": 6.2520721705817984e-06, "loss": 0.0018, "step": 145160 }, { "epoch": 0.955046939863029, "grad_norm": 0.019293497445195363, "learning_rate": 6.2515163432581476e-06, "loss": 0.001, "step": 145170 }, { "epoch": 0.9551127280382624, "grad_norm": 0.0526456177605101, "learning_rate": 6.250960499434333e-06, "loss": 0.0009, "step": 145180 }, { "epoch": 0.9551785162134958, "grad_norm": 0.02228241112828366, "learning_rate": 6.250404639117687e-06, "loss": 0.0011, "step": 145190 }, { "epoch": 0.9552443043887292, "grad_norm": 0.006819966466331413, "learning_rate": 6.249848762315532e-06, "loss": 0.0008, "step": 145200 }, { "epoch": 0.9553100925639626, "grad_norm": 0.03594866171364517, "learning_rate": 6.249292869035203e-06, "loss": 0.0003, "step": 145210 }, { "epoch": 0.9553758807391959, "grad_norm": 0.09705847596160068, "learning_rate": 6.248736959284026e-06, "loss": 0.0021, "step": 145220 }, { "epoch": 0.9554416689144293, "grad_norm": 0.028026347687739206, "learning_rate": 6.24818103306933e-06, "loss": 0.001, "step": 145230 }, { "epoch": 0.9555074570896627, "grad_norm": 0.03815111256752695, "learning_rate": 6.247625090398447e-06, "loss": 0.0012, "step": 145240 }, { "epoch": 0.9555732452648961, "grad_norm": 0.024185255692369026, "learning_rate": 6.247069131278703e-06, "loss": 0.0015, "step": 145250 }, { "epoch": 0.9556390334401295, "grad_norm": 0.014282278886639249, "learning_rate": 6.246513155717431e-06, "loss": 0.0012, "step": 145260 }, { "epoch": 0.9557048216153629, "grad_norm": 0.003130658385828578, "learning_rate": 6.245957163721957e-06, "loss": 0.0009, "step": 145270 }, { "epoch": 0.9557706097905962, "grad_norm": 0.07041243078809029, "learning_rate": 6.245401155299616e-06, "loss": 0.0011, "step": 145280 }, { "epoch": 0.9558363979658296, "grad_norm": 0.035436334344913854, "learning_rate": 6.2448451304577365e-06, "loss": 0.0006, "step": 145290 }, { "epoch": 0.955902186141063, "grad_norm": 0.12534187325127427, "learning_rate": 6.244289089203648e-06, "loss": 0.0009, "step": 145300 }, { "epoch": 0.9559679743162964, "grad_norm": 0.01303404095003028, "learning_rate": 6.243733031544683e-06, "loss": 0.0006, "step": 145310 }, { "epoch": 0.9560337624915298, "grad_norm": 0.01023290238444319, "learning_rate": 6.243176957488173e-06, "loss": 0.0015, "step": 145320 }, { "epoch": 0.9560995506667631, "grad_norm": 0.012810732882725972, "learning_rate": 6.242620867041447e-06, "loss": 0.001, "step": 145330 }, { "epoch": 0.9561653388419965, "grad_norm": 0.07403072867000635, "learning_rate": 6.242064760211838e-06, "loss": 0.0006, "step": 145340 }, { "epoch": 0.9562311270172299, "grad_norm": 0.023128963298047964, "learning_rate": 6.241508637006679e-06, "loss": 0.0014, "step": 145350 }, { "epoch": 0.9562969151924633, "grad_norm": 0.0387169424883677, "learning_rate": 6.2409524974333005e-06, "loss": 0.0005, "step": 145360 }, { "epoch": 0.9563627033676967, "grad_norm": 0.023584829452913347, "learning_rate": 6.240396341499034e-06, "loss": 0.0007, "step": 145370 }, { "epoch": 0.9564284915429301, "grad_norm": 0.018213440423236603, "learning_rate": 6.239840169211216e-06, "loss": 0.0015, "step": 145380 }, { "epoch": 0.9564942797181635, "grad_norm": 0.09011547558578081, "learning_rate": 6.239283980577173e-06, "loss": 0.0016, "step": 145390 }, { "epoch": 0.9565600678933969, "grad_norm": 0.015319919494400317, "learning_rate": 6.238727775604241e-06, "loss": 0.0011, "step": 145400 }, { "epoch": 0.9566258560686303, "grad_norm": 0.04443584516856455, "learning_rate": 6.2381715542997546e-06, "loss": 0.0019, "step": 145410 }, { "epoch": 0.9566916442438637, "grad_norm": 0.001409642563449718, "learning_rate": 6.237615316671045e-06, "loss": 0.0026, "step": 145420 }, { "epoch": 0.9567574324190969, "grad_norm": 0.013759161014581264, "learning_rate": 6.237059062725448e-06, "loss": 0.0011, "step": 145430 }, { "epoch": 0.9568232205943303, "grad_norm": 0.057296848227414474, "learning_rate": 6.236502792470293e-06, "loss": 0.0108, "step": 145440 }, { "epoch": 0.9568890087695637, "grad_norm": 0.08015298620661528, "learning_rate": 6.235946505912919e-06, "loss": 0.0014, "step": 145450 }, { "epoch": 0.9569547969447971, "grad_norm": 0.027824723865529435, "learning_rate": 6.235390203060655e-06, "loss": 0.0006, "step": 145460 }, { "epoch": 0.9570205851200305, "grad_norm": 0.03018223801550484, "learning_rate": 6.23483388392084e-06, "loss": 0.001, "step": 145470 }, { "epoch": 0.9570863732952639, "grad_norm": 0.05946116188551915, "learning_rate": 6.234277548500807e-06, "loss": 0.0011, "step": 145480 }, { "epoch": 0.9571521614704973, "grad_norm": 0.03267153842780904, "learning_rate": 6.23372119680789e-06, "loss": 0.0018, "step": 145490 }, { "epoch": 0.9572179496457307, "grad_norm": 0.031153691247894653, "learning_rate": 6.233164828849425e-06, "loss": 0.0009, "step": 145500 }, { "epoch": 0.9572837378209641, "grad_norm": 0.031049672845076263, "learning_rate": 6.232608444632746e-06, "loss": 0.0005, "step": 145510 }, { "epoch": 0.9573495259961975, "grad_norm": 0.09199015482015316, "learning_rate": 6.23205204416519e-06, "loss": 0.0012, "step": 145520 }, { "epoch": 0.9574153141714308, "grad_norm": 0.19223215333183533, "learning_rate": 6.231495627454089e-06, "loss": 0.001, "step": 145530 }, { "epoch": 0.9574811023466642, "grad_norm": 0.02510918604694275, "learning_rate": 6.230939194506785e-06, "loss": 0.001, "step": 145540 }, { "epoch": 0.9575468905218976, "grad_norm": 0.010683562850832204, "learning_rate": 6.23038274533061e-06, "loss": 0.0012, "step": 145550 }, { "epoch": 0.957612678697131, "grad_norm": 0.04403979452148706, "learning_rate": 6.229826279932901e-06, "loss": 0.0014, "step": 145560 }, { "epoch": 0.9576784668723644, "grad_norm": 0.0195855582241798, "learning_rate": 6.229269798320995e-06, "loss": 0.0011, "step": 145570 }, { "epoch": 0.9577442550475977, "grad_norm": 0.038710955974938906, "learning_rate": 6.2287133005022284e-06, "loss": 0.0007, "step": 145580 }, { "epoch": 0.9578100432228311, "grad_norm": 0.013668779193203818, "learning_rate": 6.228156786483937e-06, "loss": 0.0012, "step": 145590 }, { "epoch": 0.9578758313980645, "grad_norm": 0.04061299771622564, "learning_rate": 6.227600256273459e-06, "loss": 0.0009, "step": 145600 }, { "epoch": 0.9579416195732979, "grad_norm": 0.02315725445542356, "learning_rate": 6.227043709878133e-06, "loss": 0.0005, "step": 145610 }, { "epoch": 0.9580074077485313, "grad_norm": 0.04869470321365069, "learning_rate": 6.226487147305296e-06, "loss": 0.0007, "step": 145620 }, { "epoch": 0.9580731959237646, "grad_norm": 0.07710762185436869, "learning_rate": 6.225930568562284e-06, "loss": 0.0006, "step": 145630 }, { "epoch": 0.958138984098998, "grad_norm": 0.020512661715164145, "learning_rate": 6.225373973656437e-06, "loss": 0.0011, "step": 145640 }, { "epoch": 0.9582047722742314, "grad_norm": 0.013211492658164278, "learning_rate": 6.2248173625950925e-06, "loss": 0.0004, "step": 145650 }, { "epoch": 0.9582705604494648, "grad_norm": 0.16200740248304873, "learning_rate": 6.2242607353855874e-06, "loss": 0.001, "step": 145660 }, { "epoch": 0.9583363486246982, "grad_norm": 0.005765467780352124, "learning_rate": 6.223704092035263e-06, "loss": 0.002, "step": 145670 }, { "epoch": 0.9584021367999316, "grad_norm": 0.036102956401536435, "learning_rate": 6.223147432551457e-06, "loss": 0.0006, "step": 145680 }, { "epoch": 0.958467924975165, "grad_norm": 0.008108835455087265, "learning_rate": 6.222590756941509e-06, "loss": 0.0008, "step": 145690 }, { "epoch": 0.9585337131503984, "grad_norm": 0.03758816474798227, "learning_rate": 6.222034065212756e-06, "loss": 0.0015, "step": 145700 }, { "epoch": 0.9585995013256318, "grad_norm": 0.07814546859465196, "learning_rate": 6.221477357372542e-06, "loss": 0.0011, "step": 145710 }, { "epoch": 0.9586652895008652, "grad_norm": 0.0555825379348324, "learning_rate": 6.220920633428201e-06, "loss": 0.0008, "step": 145720 }, { "epoch": 0.9587310776760986, "grad_norm": 0.27031424927247305, "learning_rate": 6.220363893387077e-06, "loss": 0.0013, "step": 145730 }, { "epoch": 0.9587968658513318, "grad_norm": 0.012721871401616151, "learning_rate": 6.21980713725651e-06, "loss": 0.0013, "step": 145740 }, { "epoch": 0.9588626540265652, "grad_norm": 0.0011641175339574827, "learning_rate": 6.219250365043838e-06, "loss": 0.0011, "step": 145750 }, { "epoch": 0.9589284422017986, "grad_norm": 0.01974057504862527, "learning_rate": 6.218693576756405e-06, "loss": 0.0006, "step": 145760 }, { "epoch": 0.958994230377032, "grad_norm": 0.14793827018614092, "learning_rate": 6.2181367724015485e-06, "loss": 0.0011, "step": 145770 }, { "epoch": 0.9590600185522654, "grad_norm": 0.05622758387107343, "learning_rate": 6.21757995198661e-06, "loss": 0.0009, "step": 145780 }, { "epoch": 0.9591258067274988, "grad_norm": 0.028747549516476182, "learning_rate": 6.2170231155189305e-06, "loss": 0.0009, "step": 145790 }, { "epoch": 0.9591915949027322, "grad_norm": 0.025516702435841046, "learning_rate": 6.2164662630058545e-06, "loss": 0.0007, "step": 145800 }, { "epoch": 0.9592573830779656, "grad_norm": 0.012060436366367654, "learning_rate": 6.215909394454722e-06, "loss": 0.0006, "step": 145810 }, { "epoch": 0.959323171253199, "grad_norm": 0.0716289473007679, "learning_rate": 6.215352509872874e-06, "loss": 0.0013, "step": 145820 }, { "epoch": 0.9593889594284324, "grad_norm": 0.019966826652705685, "learning_rate": 6.214795609267651e-06, "loss": 0.0015, "step": 145830 }, { "epoch": 0.9594547476036657, "grad_norm": 0.2075150630579549, "learning_rate": 6.214238692646397e-06, "loss": 0.0012, "step": 145840 }, { "epoch": 0.9595205357788991, "grad_norm": 0.030469161497864695, "learning_rate": 6.2136817600164555e-06, "loss": 0.0008, "step": 145850 }, { "epoch": 0.9595863239541325, "grad_norm": 0.12625890633029235, "learning_rate": 6.213124811385168e-06, "loss": 0.0016, "step": 145860 }, { "epoch": 0.9596521121293659, "grad_norm": 0.2115672722066494, "learning_rate": 6.212567846759878e-06, "loss": 0.0043, "step": 145870 }, { "epoch": 0.9597179003045992, "grad_norm": 0.04663356895871063, "learning_rate": 6.212010866147929e-06, "loss": 0.0008, "step": 145880 }, { "epoch": 0.9597836884798326, "grad_norm": 0.008396094627680605, "learning_rate": 6.211453869556662e-06, "loss": 0.0009, "step": 145890 }, { "epoch": 0.959849476655066, "grad_norm": 0.005012203867316998, "learning_rate": 6.210896856993421e-06, "loss": 0.0011, "step": 145900 }, { "epoch": 0.9599152648302994, "grad_norm": 0.08956712635598135, "learning_rate": 6.210339828465553e-06, "loss": 0.001, "step": 145910 }, { "epoch": 0.9599810530055328, "grad_norm": 0.052332831745692844, "learning_rate": 6.2097827839803984e-06, "loss": 0.0011, "step": 145920 }, { "epoch": 0.9600468411807662, "grad_norm": 0.03340695044992586, "learning_rate": 6.209225723545304e-06, "loss": 0.0009, "step": 145930 }, { "epoch": 0.9601126293559995, "grad_norm": 0.060382046421141554, "learning_rate": 6.2086686471676116e-06, "loss": 0.0012, "step": 145940 }, { "epoch": 0.9601784175312329, "grad_norm": 0.0014097864646868878, "learning_rate": 6.2081115548546665e-06, "loss": 0.0013, "step": 145950 }, { "epoch": 0.9602442057064663, "grad_norm": 0.08102077417724116, "learning_rate": 6.207554446613815e-06, "loss": 0.0011, "step": 145960 }, { "epoch": 0.9603099938816997, "grad_norm": 0.06131931617406237, "learning_rate": 6.2069973224524e-06, "loss": 0.0012, "step": 145970 }, { "epoch": 0.9603757820569331, "grad_norm": 0.06633551753892812, "learning_rate": 6.206440182377769e-06, "loss": 0.0018, "step": 145980 }, { "epoch": 0.9604415702321665, "grad_norm": 0.022500349617959586, "learning_rate": 6.205883026397265e-06, "loss": 0.0005, "step": 145990 }, { "epoch": 0.9605073584073999, "grad_norm": 0.007412994732269417, "learning_rate": 6.205325854518236e-06, "loss": 0.0005, "step": 146000 }, { "epoch": 0.9605731465826333, "grad_norm": 0.05046197073731932, "learning_rate": 6.204768666748026e-06, "loss": 0.0004, "step": 146010 }, { "epoch": 0.9606389347578667, "grad_norm": 0.01850552538850196, "learning_rate": 6.204211463093981e-06, "loss": 0.0009, "step": 146020 }, { "epoch": 0.9607047229331, "grad_norm": 0.12215619236971284, "learning_rate": 6.203654243563447e-06, "loss": 0.0023, "step": 146030 }, { "epoch": 0.9607705111083333, "grad_norm": 0.04995510848942789, "learning_rate": 6.203097008163773e-06, "loss": 0.0007, "step": 146040 }, { "epoch": 0.9608362992835667, "grad_norm": 0.02050193369623411, "learning_rate": 6.2025397569023045e-06, "loss": 0.0012, "step": 146050 }, { "epoch": 0.9609020874588001, "grad_norm": 0.04080579580785044, "learning_rate": 6.201982489786387e-06, "loss": 0.0013, "step": 146060 }, { "epoch": 0.9609678756340335, "grad_norm": 0.035005511490358174, "learning_rate": 6.201425206823369e-06, "loss": 0.001, "step": 146070 }, { "epoch": 0.9610336638092669, "grad_norm": 0.06759908187084272, "learning_rate": 6.200867908020597e-06, "loss": 0.0014, "step": 146080 }, { "epoch": 0.9610994519845003, "grad_norm": 0.09903921203017281, "learning_rate": 6.200310593385419e-06, "loss": 0.0009, "step": 146090 }, { "epoch": 0.9611652401597337, "grad_norm": 0.05759958146241524, "learning_rate": 6.1997532629251815e-06, "loss": 0.0011, "step": 146100 }, { "epoch": 0.9612310283349671, "grad_norm": 0.0310262355919355, "learning_rate": 6.199195916647234e-06, "loss": 0.0021, "step": 146110 }, { "epoch": 0.9612968165102005, "grad_norm": 0.052991263778123306, "learning_rate": 6.198638554558924e-06, "loss": 0.0013, "step": 146120 }, { "epoch": 0.9613626046854339, "grad_norm": 0.008117517928135115, "learning_rate": 6.1980811766675995e-06, "loss": 0.0008, "step": 146130 }, { "epoch": 0.9614283928606672, "grad_norm": 0.02128826682048231, "learning_rate": 6.19752378298061e-06, "loss": 0.0013, "step": 146140 }, { "epoch": 0.9614941810359006, "grad_norm": 0.09935985851622697, "learning_rate": 6.196966373505304e-06, "loss": 0.0031, "step": 146150 }, { "epoch": 0.961559969211134, "grad_norm": 0.0013773540909234533, "learning_rate": 6.196408948249029e-06, "loss": 0.0013, "step": 146160 }, { "epoch": 0.9616257573863674, "grad_norm": 0.006645116654203498, "learning_rate": 6.195851507219136e-06, "loss": 0.0008, "step": 146170 }, { "epoch": 0.9616915455616007, "grad_norm": 0.013635781290551808, "learning_rate": 6.195294050422974e-06, "loss": 0.0024, "step": 146180 }, { "epoch": 0.9617573337368341, "grad_norm": 0.0386811472340301, "learning_rate": 6.194736577867891e-06, "loss": 0.0011, "step": 146190 }, { "epoch": 0.9618231219120675, "grad_norm": 0.03348823548803145, "learning_rate": 6.1941790895612396e-06, "loss": 0.0017, "step": 146200 }, { "epoch": 0.9618889100873009, "grad_norm": 0.0325775866810395, "learning_rate": 6.193621585510368e-06, "loss": 0.0006, "step": 146210 }, { "epoch": 0.9619546982625343, "grad_norm": 0.0058070819324786424, "learning_rate": 6.193064065722627e-06, "loss": 0.0005, "step": 146220 }, { "epoch": 0.9620204864377677, "grad_norm": 0.011508309885762139, "learning_rate": 6.192506530205364e-06, "loss": 0.0008, "step": 146230 }, { "epoch": 0.9620862746130011, "grad_norm": 0.01688585379251041, "learning_rate": 6.191948978965935e-06, "loss": 0.001, "step": 146240 }, { "epoch": 0.9621520627882344, "grad_norm": 0.06456169467536227, "learning_rate": 6.191391412011688e-06, "loss": 0.0014, "step": 146250 }, { "epoch": 0.9622178509634678, "grad_norm": 0.055845186915308084, "learning_rate": 6.1908338293499736e-06, "loss": 0.0007, "step": 146260 }, { "epoch": 0.9622836391387012, "grad_norm": 0.02516697009141097, "learning_rate": 6.190276230988144e-06, "loss": 0.0015, "step": 146270 }, { "epoch": 0.9623494273139346, "grad_norm": 0.042887122550469226, "learning_rate": 6.18971861693355e-06, "loss": 0.0006, "step": 146280 }, { "epoch": 0.962415215489168, "grad_norm": 0.6102046569757676, "learning_rate": 6.189160987193542e-06, "loss": 0.0026, "step": 146290 }, { "epoch": 0.9624810036644014, "grad_norm": 0.04011860335200288, "learning_rate": 6.188603341775475e-06, "loss": 0.0012, "step": 146300 }, { "epoch": 0.9625467918396348, "grad_norm": 0.03525223790475285, "learning_rate": 6.188045680686701e-06, "loss": 0.0014, "step": 146310 }, { "epoch": 0.9626125800148682, "grad_norm": 0.042737752224780295, "learning_rate": 6.187488003934569e-06, "loss": 0.0016, "step": 146320 }, { "epoch": 0.9626783681901016, "grad_norm": 0.015282078956819281, "learning_rate": 6.186930311526433e-06, "loss": 0.0011, "step": 146330 }, { "epoch": 0.962744156365335, "grad_norm": 0.04093781362601654, "learning_rate": 6.186372603469647e-06, "loss": 0.0005, "step": 146340 }, { "epoch": 0.9628099445405682, "grad_norm": 0.06212614203626954, "learning_rate": 6.185814879771562e-06, "loss": 0.0012, "step": 146350 }, { "epoch": 0.9628757327158016, "grad_norm": 0.008970428341619487, "learning_rate": 6.185257140439531e-06, "loss": 0.0013, "step": 146360 }, { "epoch": 0.962941520891035, "grad_norm": 0.04351506022512131, "learning_rate": 6.18469938548091e-06, "loss": 0.001, "step": 146370 }, { "epoch": 0.9630073090662684, "grad_norm": 0.183271191067122, "learning_rate": 6.18414161490305e-06, "loss": 0.002, "step": 146380 }, { "epoch": 0.9630730972415018, "grad_norm": 0.0017359429814521625, "learning_rate": 6.183583828713306e-06, "loss": 0.0018, "step": 146390 }, { "epoch": 0.9631388854167352, "grad_norm": 0.010778713508024762, "learning_rate": 6.183026026919031e-06, "loss": 0.0009, "step": 146400 }, { "epoch": 0.9632046735919686, "grad_norm": 0.02773311132086416, "learning_rate": 6.18246820952758e-06, "loss": 0.001, "step": 146410 }, { "epoch": 0.963270461767202, "grad_norm": 0.03555733443250705, "learning_rate": 6.181910376546305e-06, "loss": 0.0008, "step": 146420 }, { "epoch": 0.9633362499424354, "grad_norm": 0.023038365354058208, "learning_rate": 6.181352527982565e-06, "loss": 0.0007, "step": 146430 }, { "epoch": 0.9634020381176688, "grad_norm": 0.028763722015860943, "learning_rate": 6.180794663843711e-06, "loss": 0.0016, "step": 146440 }, { "epoch": 0.9634678262929021, "grad_norm": 0.0068750612735813165, "learning_rate": 6.180236784137099e-06, "loss": 0.0013, "step": 146450 }, { "epoch": 0.9635336144681355, "grad_norm": 0.09250037094677148, "learning_rate": 6.179678888870086e-06, "loss": 0.0024, "step": 146460 }, { "epoch": 0.9635994026433689, "grad_norm": 0.04139884415999048, "learning_rate": 6.1791209780500236e-06, "loss": 0.0006, "step": 146470 }, { "epoch": 0.9636651908186022, "grad_norm": 0.017507220775597656, "learning_rate": 6.178563051684271e-06, "loss": 0.0005, "step": 146480 }, { "epoch": 0.9637309789938356, "grad_norm": 0.0230043402684815, "learning_rate": 6.178005109780181e-06, "loss": 0.0009, "step": 146490 }, { "epoch": 0.963796767169069, "grad_norm": 0.06561217772207299, "learning_rate": 6.177447152345112e-06, "loss": 0.0007, "step": 146500 }, { "epoch": 0.9638625553443024, "grad_norm": 0.017449337418222917, "learning_rate": 6.176889179386418e-06, "loss": 0.001, "step": 146510 }, { "epoch": 0.9639283435195358, "grad_norm": 0.09833956799778978, "learning_rate": 6.176331190911458e-06, "loss": 0.0011, "step": 146520 }, { "epoch": 0.9639941316947692, "grad_norm": 0.060000271891544686, "learning_rate": 6.175773186927587e-06, "loss": 0.0014, "step": 146530 }, { "epoch": 0.9640599198700026, "grad_norm": 0.10949134265527326, "learning_rate": 6.1752151674421625e-06, "loss": 0.0011, "step": 146540 }, { "epoch": 0.9641257080452359, "grad_norm": 0.02465212090084664, "learning_rate": 6.174657132462538e-06, "loss": 0.0007, "step": 146550 }, { "epoch": 0.9641914962204693, "grad_norm": 0.035395974900318934, "learning_rate": 6.174099081996076e-06, "loss": 0.0018, "step": 146560 }, { "epoch": 0.9642572843957027, "grad_norm": 1.6505896096381087, "learning_rate": 6.173541016050132e-06, "loss": 0.0018, "step": 146570 }, { "epoch": 0.9643230725709361, "grad_norm": 0.0425367723525389, "learning_rate": 6.172982934632064e-06, "loss": 0.0008, "step": 146580 }, { "epoch": 0.9643888607461695, "grad_norm": 0.000280744825670839, "learning_rate": 6.172424837749227e-06, "loss": 0.0007, "step": 146590 }, { "epoch": 0.9644546489214029, "grad_norm": 0.016125551535997453, "learning_rate": 6.171866725408983e-06, "loss": 0.0009, "step": 146600 }, { "epoch": 0.9645204370966363, "grad_norm": 0.04465621139676148, "learning_rate": 6.171308597618687e-06, "loss": 0.001, "step": 146610 }, { "epoch": 0.9645862252718697, "grad_norm": 0.02857596000491946, "learning_rate": 6.170750454385698e-06, "loss": 0.0014, "step": 146620 }, { "epoch": 0.964652013447103, "grad_norm": 0.02558366231173886, "learning_rate": 6.170192295717376e-06, "loss": 0.0009, "step": 146630 }, { "epoch": 0.9647178016223364, "grad_norm": 0.04387786816788813, "learning_rate": 6.16963412162108e-06, "loss": 0.0014, "step": 146640 }, { "epoch": 0.9647835897975697, "grad_norm": 0.17707393217630893, "learning_rate": 6.169075932104168e-06, "loss": 0.001, "step": 146650 }, { "epoch": 0.9648493779728031, "grad_norm": 0.003155447385897514, "learning_rate": 6.168517727173999e-06, "loss": 0.0008, "step": 146660 }, { "epoch": 0.9649151661480365, "grad_norm": 0.02511723994887998, "learning_rate": 6.167959506837934e-06, "loss": 0.0009, "step": 146670 }, { "epoch": 0.9649809543232699, "grad_norm": 0.038295380764647294, "learning_rate": 6.16740127110333e-06, "loss": 0.0026, "step": 146680 }, { "epoch": 0.9650467424985033, "grad_norm": 0.17195454508868505, "learning_rate": 6.16684301997755e-06, "loss": 0.0009, "step": 146690 }, { "epoch": 0.9651125306737367, "grad_norm": 0.04170880825439345, "learning_rate": 6.166284753467952e-06, "loss": 0.0024, "step": 146700 }, { "epoch": 0.9651783188489701, "grad_norm": 0.12477327620917637, "learning_rate": 6.1657264715818965e-06, "loss": 0.0023, "step": 146710 }, { "epoch": 0.9652441070242035, "grad_norm": 0.02902857962259948, "learning_rate": 6.165168174326745e-06, "loss": 0.0005, "step": 146720 }, { "epoch": 0.9653098951994369, "grad_norm": 0.006872138198967851, "learning_rate": 6.164609861709857e-06, "loss": 0.0011, "step": 146730 }, { "epoch": 0.9653756833746703, "grad_norm": 0.015454159162616632, "learning_rate": 6.164051533738594e-06, "loss": 0.0014, "step": 146740 }, { "epoch": 0.9654414715499037, "grad_norm": 0.01084693468171003, "learning_rate": 6.163493190420315e-06, "loss": 0.001, "step": 146750 }, { "epoch": 0.965507259725137, "grad_norm": 0.042557875912686303, "learning_rate": 6.162934831762386e-06, "loss": 0.0011, "step": 146760 }, { "epoch": 0.9655730479003704, "grad_norm": 0.005375756146296281, "learning_rate": 6.1623764577721655e-06, "loss": 0.0014, "step": 146770 }, { "epoch": 0.9656388360756037, "grad_norm": 0.0923446064225304, "learning_rate": 6.161818068457014e-06, "loss": 0.0013, "step": 146780 }, { "epoch": 0.9657046242508371, "grad_norm": 0.06105913068942907, "learning_rate": 6.161259663824295e-06, "loss": 0.0006, "step": 146790 }, { "epoch": 0.9657704124260705, "grad_norm": 0.07095743746111545, "learning_rate": 6.16070124388137e-06, "loss": 0.0009, "step": 146800 }, { "epoch": 0.9658362006013039, "grad_norm": 0.09882950087689267, "learning_rate": 6.160142808635601e-06, "loss": 0.0007, "step": 146810 }, { "epoch": 0.9659019887765373, "grad_norm": 0.08193069682269606, "learning_rate": 6.1595843580943515e-06, "loss": 0.0037, "step": 146820 }, { "epoch": 0.9659677769517707, "grad_norm": 0.05301731500193387, "learning_rate": 6.159025892264985e-06, "loss": 0.0007, "step": 146830 }, { "epoch": 0.9660335651270041, "grad_norm": 0.04913982699661102, "learning_rate": 6.158467411154861e-06, "loss": 0.0011, "step": 146840 }, { "epoch": 0.9660993533022375, "grad_norm": 0.0207905209968527, "learning_rate": 6.157908914771347e-06, "loss": 0.0014, "step": 146850 }, { "epoch": 0.9661651414774708, "grad_norm": 0.007761630412386493, "learning_rate": 6.157350403121801e-06, "loss": 0.0008, "step": 146860 }, { "epoch": 0.9662309296527042, "grad_norm": 0.09537372076273057, "learning_rate": 6.156791876213591e-06, "loss": 0.0012, "step": 146870 }, { "epoch": 0.9662967178279376, "grad_norm": 0.09902597560574977, "learning_rate": 6.15623333405408e-06, "loss": 0.001, "step": 146880 }, { "epoch": 0.966362506003171, "grad_norm": 0.022683293090144066, "learning_rate": 6.155674776650631e-06, "loss": 0.001, "step": 146890 }, { "epoch": 0.9664282941784044, "grad_norm": 0.23448973019753364, "learning_rate": 6.155116204010607e-06, "loss": 0.0009, "step": 146900 }, { "epoch": 0.9664940823536378, "grad_norm": 0.05454747783223091, "learning_rate": 6.154557616141375e-06, "loss": 0.0006, "step": 146910 }, { "epoch": 0.9665598705288712, "grad_norm": 0.026267595239865882, "learning_rate": 6.153999013050297e-06, "loss": 0.0011, "step": 146920 }, { "epoch": 0.9666256587041046, "grad_norm": 0.1416927825385016, "learning_rate": 6.1534403947447385e-06, "loss": 0.001, "step": 146930 }, { "epoch": 0.966691446879338, "grad_norm": 0.017951147157758245, "learning_rate": 6.152881761232064e-06, "loss": 0.0009, "step": 146940 }, { "epoch": 0.9667572350545713, "grad_norm": 0.00825893492800434, "learning_rate": 6.152323112519641e-06, "loss": 0.0009, "step": 146950 }, { "epoch": 0.9668230232298046, "grad_norm": 0.012217243794055562, "learning_rate": 6.151764448614832e-06, "loss": 0.001, "step": 146960 }, { "epoch": 0.966888811405038, "grad_norm": 0.014228012849815931, "learning_rate": 6.1512057695250034e-06, "loss": 0.0013, "step": 146970 }, { "epoch": 0.9669545995802714, "grad_norm": 0.013020610517305726, "learning_rate": 6.150647075257521e-06, "loss": 0.0007, "step": 146980 }, { "epoch": 0.9670203877555048, "grad_norm": 0.06073328459555808, "learning_rate": 6.15008836581975e-06, "loss": 0.001, "step": 146990 }, { "epoch": 0.9670861759307382, "grad_norm": 0.07188922665206124, "learning_rate": 6.1495296412190575e-06, "loss": 0.0007, "step": 147000 }, { "epoch": 0.9671519641059716, "grad_norm": 0.05117647358187646, "learning_rate": 6.1489709014628105e-06, "loss": 0.0009, "step": 147010 }, { "epoch": 0.967217752281205, "grad_norm": 0.040511467677119446, "learning_rate": 6.1484121465583735e-06, "loss": 0.0011, "step": 147020 }, { "epoch": 0.9672835404564384, "grad_norm": 0.047689437177788306, "learning_rate": 6.147853376513115e-06, "loss": 0.0006, "step": 147030 }, { "epoch": 0.9673493286316718, "grad_norm": 0.015203442957963368, "learning_rate": 6.147294591334401e-06, "loss": 0.0011, "step": 147040 }, { "epoch": 0.9674151168069052, "grad_norm": 0.023207943743584678, "learning_rate": 6.146735791029599e-06, "loss": 0.0017, "step": 147050 }, { "epoch": 0.9674809049821385, "grad_norm": 0.07288252016557696, "learning_rate": 6.146176975606074e-06, "loss": 0.002, "step": 147060 }, { "epoch": 0.9675466931573719, "grad_norm": 0.03474387424152993, "learning_rate": 6.1456181450711975e-06, "loss": 0.0011, "step": 147070 }, { "epoch": 0.9676124813326052, "grad_norm": 0.03562564586330221, "learning_rate": 6.145059299432333e-06, "loss": 0.0011, "step": 147080 }, { "epoch": 0.9676782695078386, "grad_norm": 0.30792426535113576, "learning_rate": 6.1445004386968535e-06, "loss": 0.0023, "step": 147090 }, { "epoch": 0.967744057683072, "grad_norm": 0.04016232157664955, "learning_rate": 6.1439415628721224e-06, "loss": 0.0014, "step": 147100 }, { "epoch": 0.9678098458583054, "grad_norm": 0.06353503945624162, "learning_rate": 6.14338267196551e-06, "loss": 0.0012, "step": 147110 }, { "epoch": 0.9678756340335388, "grad_norm": 0.195375835648982, "learning_rate": 6.142823765984384e-06, "loss": 0.0007, "step": 147120 }, { "epoch": 0.9679414222087722, "grad_norm": 0.052142833280180684, "learning_rate": 6.142264844936114e-06, "loss": 0.0022, "step": 147130 }, { "epoch": 0.9680072103840056, "grad_norm": 0.0612905207584531, "learning_rate": 6.1417059088280686e-06, "loss": 0.0012, "step": 147140 }, { "epoch": 0.968072998559239, "grad_norm": 0.06225813922607219, "learning_rate": 6.141146957667617e-06, "loss": 0.0014, "step": 147150 }, { "epoch": 0.9681387867344724, "grad_norm": 0.024969655723507282, "learning_rate": 6.140587991462129e-06, "loss": 0.0003, "step": 147160 }, { "epoch": 0.9682045749097057, "grad_norm": 0.030292174029468433, "learning_rate": 6.140029010218973e-06, "loss": 0.001, "step": 147170 }, { "epoch": 0.9682703630849391, "grad_norm": 0.004166044326333133, "learning_rate": 6.139470013945519e-06, "loss": 0.0007, "step": 147180 }, { "epoch": 0.9683361512601725, "grad_norm": 0.011760031894969155, "learning_rate": 6.138911002649136e-06, "loss": 0.0009, "step": 147190 }, { "epoch": 0.9684019394354059, "grad_norm": 0.0025074496367230334, "learning_rate": 6.138351976337196e-06, "loss": 0.0006, "step": 147200 }, { "epoch": 0.9684677276106393, "grad_norm": 0.012209714944365221, "learning_rate": 6.137792935017069e-06, "loss": 0.0017, "step": 147210 }, { "epoch": 0.9685335157858727, "grad_norm": 0.029048530137445065, "learning_rate": 6.137233878696124e-06, "loss": 0.0011, "step": 147220 }, { "epoch": 0.968599303961106, "grad_norm": 0.01983701885161568, "learning_rate": 6.136674807381733e-06, "loss": 0.0005, "step": 147230 }, { "epoch": 0.9686650921363394, "grad_norm": 0.039081774745859284, "learning_rate": 6.136115721081265e-06, "loss": 0.0015, "step": 147240 }, { "epoch": 0.9687308803115728, "grad_norm": 0.015990068018255546, "learning_rate": 6.135556619802092e-06, "loss": 0.0009, "step": 147250 }, { "epoch": 0.9687966684868062, "grad_norm": 0.13737464768656407, "learning_rate": 6.1349975035515875e-06, "loss": 0.0019, "step": 147260 }, { "epoch": 0.9688624566620395, "grad_norm": 0.15218781343969134, "learning_rate": 6.134438372337122e-06, "loss": 0.001, "step": 147270 }, { "epoch": 0.9689282448372729, "grad_norm": 0.013954207387328017, "learning_rate": 6.133879226166064e-06, "loss": 0.0004, "step": 147280 }, { "epoch": 0.9689940330125063, "grad_norm": 0.020342283456792436, "learning_rate": 6.133320065045789e-06, "loss": 0.0012, "step": 147290 }, { "epoch": 0.9690598211877397, "grad_norm": 0.11185446947849864, "learning_rate": 6.132760888983667e-06, "loss": 0.0013, "step": 147300 }, { "epoch": 0.9691256093629731, "grad_norm": 0.07531318332589129, "learning_rate": 6.132201697987072e-06, "loss": 0.0007, "step": 147310 }, { "epoch": 0.9691913975382065, "grad_norm": 0.07065325546535225, "learning_rate": 6.1316424920633744e-06, "loss": 0.001, "step": 147320 }, { "epoch": 0.9692571857134399, "grad_norm": 0.0277987133445728, "learning_rate": 6.131083271219949e-06, "loss": 0.0012, "step": 147330 }, { "epoch": 0.9693229738886733, "grad_norm": 0.0013555126374269828, "learning_rate": 6.130524035464167e-06, "loss": 0.0019, "step": 147340 }, { "epoch": 0.9693887620639067, "grad_norm": 0.029671287056067606, "learning_rate": 6.129964784803401e-06, "loss": 0.0008, "step": 147350 }, { "epoch": 0.9694545502391401, "grad_norm": 0.026936388798802204, "learning_rate": 6.129405519245027e-06, "loss": 0.0014, "step": 147360 }, { "epoch": 0.9695203384143734, "grad_norm": 0.058718772749937864, "learning_rate": 6.128846238796415e-06, "loss": 0.0011, "step": 147370 }, { "epoch": 0.9695861265896067, "grad_norm": 0.06350386831562962, "learning_rate": 6.128286943464941e-06, "loss": 0.0014, "step": 147380 }, { "epoch": 0.9696519147648401, "grad_norm": 0.042482827276930694, "learning_rate": 6.127727633257978e-06, "loss": 0.0008, "step": 147390 }, { "epoch": 0.9697177029400735, "grad_norm": 0.04124559193688553, "learning_rate": 6.1271683081829e-06, "loss": 0.0009, "step": 147400 }, { "epoch": 0.9697834911153069, "grad_norm": 0.0326739857308781, "learning_rate": 6.126608968247082e-06, "loss": 0.0007, "step": 147410 }, { "epoch": 0.9698492792905403, "grad_norm": 0.04324423257375666, "learning_rate": 6.126049613457897e-06, "loss": 0.0007, "step": 147420 }, { "epoch": 0.9699150674657737, "grad_norm": 0.06037508101109854, "learning_rate": 6.125490243822722e-06, "loss": 0.002, "step": 147430 }, { "epoch": 0.9699808556410071, "grad_norm": 0.017314019336569862, "learning_rate": 6.124930859348929e-06, "loss": 0.0003, "step": 147440 }, { "epoch": 0.9700466438162405, "grad_norm": 0.14790002135868213, "learning_rate": 6.124371460043893e-06, "loss": 0.0026, "step": 147450 }, { "epoch": 0.9701124319914739, "grad_norm": 0.020183623195298196, "learning_rate": 6.123812045914992e-06, "loss": 0.0015, "step": 147460 }, { "epoch": 0.9701782201667072, "grad_norm": 0.04633232804717589, "learning_rate": 6.123252616969598e-06, "loss": 0.0013, "step": 147470 }, { "epoch": 0.9702440083419406, "grad_norm": 0.03573978043491237, "learning_rate": 6.122693173215091e-06, "loss": 0.0005, "step": 147480 }, { "epoch": 0.970309796517174, "grad_norm": 0.04854205663250876, "learning_rate": 6.122133714658842e-06, "loss": 0.001, "step": 147490 }, { "epoch": 0.9703755846924074, "grad_norm": 0.09868952732425039, "learning_rate": 6.12157424130823e-06, "loss": 0.0007, "step": 147500 }, { "epoch": 0.9704413728676408, "grad_norm": 0.005186369725236239, "learning_rate": 6.121014753170629e-06, "loss": 0.0012, "step": 147510 }, { "epoch": 0.9705071610428742, "grad_norm": 0.09842428604878076, "learning_rate": 6.120455250253419e-06, "loss": 0.0005, "step": 147520 }, { "epoch": 0.9705729492181076, "grad_norm": 0.08853869344489194, "learning_rate": 6.119895732563972e-06, "loss": 0.0014, "step": 147530 }, { "epoch": 0.970638737393341, "grad_norm": 0.02654988431743578, "learning_rate": 6.119336200109669e-06, "loss": 0.0006, "step": 147540 }, { "epoch": 0.9707045255685743, "grad_norm": 0.0060712117009061465, "learning_rate": 6.118776652897883e-06, "loss": 0.0015, "step": 147550 }, { "epoch": 0.9707703137438077, "grad_norm": 0.019300554254484282, "learning_rate": 6.1182170909359935e-06, "loss": 0.0016, "step": 147560 }, { "epoch": 0.970836101919041, "grad_norm": 0.11869900948647692, "learning_rate": 6.117657514231379e-06, "loss": 0.0014, "step": 147570 }, { "epoch": 0.9709018900942744, "grad_norm": 0.027893099463802758, "learning_rate": 6.117097922791413e-06, "loss": 0.0015, "step": 147580 }, { "epoch": 0.9709676782695078, "grad_norm": 0.016888720696774398, "learning_rate": 6.116538316623477e-06, "loss": 0.0009, "step": 147590 }, { "epoch": 0.9710334664447412, "grad_norm": 0.0453484604127605, "learning_rate": 6.1159786957349475e-06, "loss": 0.0009, "step": 147600 }, { "epoch": 0.9710992546199746, "grad_norm": 0.002731321780217389, "learning_rate": 6.115419060133203e-06, "loss": 0.0014, "step": 147610 }, { "epoch": 0.971165042795208, "grad_norm": 0.02151076694507775, "learning_rate": 6.114859409825622e-06, "loss": 0.001, "step": 147620 }, { "epoch": 0.9712308309704414, "grad_norm": 0.04337647112118943, "learning_rate": 6.1142997448195814e-06, "loss": 0.001, "step": 147630 }, { "epoch": 0.9712966191456748, "grad_norm": 0.07571478232710886, "learning_rate": 6.113740065122461e-06, "loss": 0.0015, "step": 147640 }, { "epoch": 0.9713624073209082, "grad_norm": 0.05129238802547974, "learning_rate": 6.113180370741641e-06, "loss": 0.0009, "step": 147650 }, { "epoch": 0.9714281954961416, "grad_norm": 0.040711446126659064, "learning_rate": 6.1126206616844975e-06, "loss": 0.0015, "step": 147660 }, { "epoch": 0.971493983671375, "grad_norm": 0.02111196149591463, "learning_rate": 6.112060937958413e-06, "loss": 0.001, "step": 147670 }, { "epoch": 0.9715597718466082, "grad_norm": 0.03105333594597513, "learning_rate": 6.111501199570765e-06, "loss": 0.0012, "step": 147680 }, { "epoch": 0.9716255600218416, "grad_norm": 0.0009221156579904226, "learning_rate": 6.110941446528934e-06, "loss": 0.0011, "step": 147690 }, { "epoch": 0.971691348197075, "grad_norm": 0.05382633986045286, "learning_rate": 6.110381678840299e-06, "loss": 0.0011, "step": 147700 }, { "epoch": 0.9717571363723084, "grad_norm": 0.014894223162251988, "learning_rate": 6.10982189651224e-06, "loss": 0.0006, "step": 147710 }, { "epoch": 0.9718229245475418, "grad_norm": 0.020305147937241266, "learning_rate": 6.109262099552139e-06, "loss": 0.0009, "step": 147720 }, { "epoch": 0.9718887127227752, "grad_norm": 0.2756013798003922, "learning_rate": 6.108702287967375e-06, "loss": 0.0037, "step": 147730 }, { "epoch": 0.9719545008980086, "grad_norm": 0.039312633567145136, "learning_rate": 6.108142461765329e-06, "loss": 0.0008, "step": 147740 }, { "epoch": 0.972020289073242, "grad_norm": 0.09733506468941233, "learning_rate": 6.10758262095338e-06, "loss": 0.0012, "step": 147750 }, { "epoch": 0.9720860772484754, "grad_norm": 0.06131981133615908, "learning_rate": 6.107022765538912e-06, "loss": 0.001, "step": 147760 }, { "epoch": 0.9721518654237088, "grad_norm": 0.03798707564789501, "learning_rate": 6.106462895529305e-06, "loss": 0.0006, "step": 147770 }, { "epoch": 0.9722176535989421, "grad_norm": 0.04102783713542039, "learning_rate": 6.1059030109319395e-06, "loss": 0.0004, "step": 147780 }, { "epoch": 0.9722834417741755, "grad_norm": 0.03696847933511012, "learning_rate": 6.1053431117542e-06, "loss": 0.001, "step": 147790 }, { "epoch": 0.9723492299494089, "grad_norm": 0.08971366282429558, "learning_rate": 6.104783198003464e-06, "loss": 0.0017, "step": 147800 }, { "epoch": 0.9724150181246423, "grad_norm": 0.015019269023805088, "learning_rate": 6.104223269687117e-06, "loss": 0.0006, "step": 147810 }, { "epoch": 0.9724808062998757, "grad_norm": 0.0588739277012549, "learning_rate": 6.103663326812539e-06, "loss": 0.0007, "step": 147820 }, { "epoch": 0.972546594475109, "grad_norm": 0.050425449867858775, "learning_rate": 6.1031033693871134e-06, "loss": 0.0008, "step": 147830 }, { "epoch": 0.9726123826503424, "grad_norm": 0.003467764109522768, "learning_rate": 6.102543397418223e-06, "loss": 0.0011, "step": 147840 }, { "epoch": 0.9726781708255758, "grad_norm": 0.10697660236743327, "learning_rate": 6.101983410913249e-06, "loss": 0.0007, "step": 147850 }, { "epoch": 0.9727439590008092, "grad_norm": 0.04488225211868954, "learning_rate": 6.101423409879576e-06, "loss": 0.0019, "step": 147860 }, { "epoch": 0.9728097471760426, "grad_norm": 0.005657416572388903, "learning_rate": 6.1008633943245876e-06, "loss": 0.001, "step": 147870 }, { "epoch": 0.9728755353512759, "grad_norm": 0.668245240480934, "learning_rate": 6.100303364255666e-06, "loss": 0.0012, "step": 147880 }, { "epoch": 0.9729413235265093, "grad_norm": 0.009662367497528104, "learning_rate": 6.099743319680193e-06, "loss": 0.0012, "step": 147890 }, { "epoch": 0.9730071117017427, "grad_norm": 0.042721180764364014, "learning_rate": 6.0991832606055555e-06, "loss": 0.0007, "step": 147900 }, { "epoch": 0.9730728998769761, "grad_norm": 0.03418163783583728, "learning_rate": 6.098623187039136e-06, "loss": 0.0008, "step": 147910 }, { "epoch": 0.9731386880522095, "grad_norm": 0.04107993137826488, "learning_rate": 6.098063098988318e-06, "loss": 0.0012, "step": 147920 }, { "epoch": 0.9732044762274429, "grad_norm": 0.0829707772521288, "learning_rate": 6.0975029964604885e-06, "loss": 0.0012, "step": 147930 }, { "epoch": 0.9732702644026763, "grad_norm": 0.006715906219621545, "learning_rate": 6.096942879463028e-06, "loss": 0.0009, "step": 147940 }, { "epoch": 0.9733360525779097, "grad_norm": 0.031920377244585754, "learning_rate": 6.096382748003323e-06, "loss": 0.0016, "step": 147950 }, { "epoch": 0.9734018407531431, "grad_norm": 0.03595300160630581, "learning_rate": 6.095822602088758e-06, "loss": 0.0009, "step": 147960 }, { "epoch": 0.9734676289283765, "grad_norm": 0.03097980781193514, "learning_rate": 6.095262441726719e-06, "loss": 0.0008, "step": 147970 }, { "epoch": 0.9735334171036097, "grad_norm": 0.011996373943105066, "learning_rate": 6.0947022669245905e-06, "loss": 0.0012, "step": 147980 }, { "epoch": 0.9735992052788431, "grad_norm": 0.03607664737421169, "learning_rate": 6.094142077689758e-06, "loss": 0.0017, "step": 147990 }, { "epoch": 0.9736649934540765, "grad_norm": 0.01768910361297162, "learning_rate": 6.0935818740296085e-06, "loss": 0.0005, "step": 148000 }, { "epoch": 0.9737307816293099, "grad_norm": 0.008208534009090688, "learning_rate": 6.093021655951526e-06, "loss": 0.001, "step": 148010 }, { "epoch": 0.9737965698045433, "grad_norm": 0.05951444423896728, "learning_rate": 6.092461423462894e-06, "loss": 0.001, "step": 148020 }, { "epoch": 0.9738623579797767, "grad_norm": 0.06760693417290876, "learning_rate": 6.0919011765711044e-06, "loss": 0.0011, "step": 148030 }, { "epoch": 0.9739281461550101, "grad_norm": 0.01745257493244351, "learning_rate": 6.091340915283542e-06, "loss": 0.0009, "step": 148040 }, { "epoch": 0.9739939343302435, "grad_norm": 0.03309256041966732, "learning_rate": 6.090780639607589e-06, "loss": 0.0007, "step": 148050 }, { "epoch": 0.9740597225054769, "grad_norm": 0.034355549134910804, "learning_rate": 6.090220349550637e-06, "loss": 0.0015, "step": 148060 }, { "epoch": 0.9741255106807103, "grad_norm": 0.09833887817910195, "learning_rate": 6.089660045120071e-06, "loss": 0.0013, "step": 148070 }, { "epoch": 0.9741912988559437, "grad_norm": 0.0723846224188737, "learning_rate": 6.08909972632328e-06, "loss": 0.0012, "step": 148080 }, { "epoch": 0.974257087031177, "grad_norm": 0.07843776053862928, "learning_rate": 6.088539393167648e-06, "loss": 0.0008, "step": 148090 }, { "epoch": 0.9743228752064104, "grad_norm": 0.06618788440019006, "learning_rate": 6.087979045660564e-06, "loss": 0.0016, "step": 148100 }, { "epoch": 0.9743886633816438, "grad_norm": 0.0004206259361963421, "learning_rate": 6.087418683809416e-06, "loss": 0.0007, "step": 148110 }, { "epoch": 0.9744544515568772, "grad_norm": 0.0022570723203388355, "learning_rate": 6.086858307621592e-06, "loss": 0.001, "step": 148120 }, { "epoch": 0.9745202397321105, "grad_norm": 0.027081153054333222, "learning_rate": 6.086297917104481e-06, "loss": 0.0013, "step": 148130 }, { "epoch": 0.9745860279073439, "grad_norm": 0.015036828897244416, "learning_rate": 6.085737512265469e-06, "loss": 0.0013, "step": 148140 }, { "epoch": 0.9746518160825773, "grad_norm": 0.11627216697126275, "learning_rate": 6.085177093111944e-06, "loss": 0.0018, "step": 148150 }, { "epoch": 0.9747176042578107, "grad_norm": 0.032219049974722955, "learning_rate": 6.084616659651299e-06, "loss": 0.0008, "step": 148160 }, { "epoch": 0.9747833924330441, "grad_norm": 0.0603432797039031, "learning_rate": 6.08405621189092e-06, "loss": 0.0008, "step": 148170 }, { "epoch": 0.9748491806082775, "grad_norm": 0.18433816039194248, "learning_rate": 6.083495749838195e-06, "loss": 0.0021, "step": 148180 }, { "epoch": 0.9749149687835108, "grad_norm": 0.04580676024270635, "learning_rate": 6.082935273500515e-06, "loss": 0.0009, "step": 148190 }, { "epoch": 0.9749807569587442, "grad_norm": 0.02156044095391422, "learning_rate": 6.082374782885269e-06, "loss": 0.0012, "step": 148200 }, { "epoch": 0.9750465451339776, "grad_norm": 0.01903763005394577, "learning_rate": 6.081814277999845e-06, "loss": 0.0014, "step": 148210 }, { "epoch": 0.975112333309211, "grad_norm": 0.1198165461565474, "learning_rate": 6.081253758851634e-06, "loss": 0.0006, "step": 148220 }, { "epoch": 0.9751781214844444, "grad_norm": 0.011662379880952086, "learning_rate": 6.080693225448027e-06, "loss": 0.001, "step": 148230 }, { "epoch": 0.9752439096596778, "grad_norm": 0.031172394716405653, "learning_rate": 6.080132677796413e-06, "loss": 0.0009, "step": 148240 }, { "epoch": 0.9753096978349112, "grad_norm": 0.16853332515075475, "learning_rate": 6.079572115904182e-06, "loss": 0.0015, "step": 148250 }, { "epoch": 0.9753754860101446, "grad_norm": 0.1354040853749548, "learning_rate": 6.079011539778727e-06, "loss": 0.0018, "step": 148260 }, { "epoch": 0.975441274185378, "grad_norm": 0.044403085466053836, "learning_rate": 6.078450949427435e-06, "loss": 0.0006, "step": 148270 }, { "epoch": 0.9755070623606114, "grad_norm": 0.019471390118927127, "learning_rate": 6.0778903448576975e-06, "loss": 0.0008, "step": 148280 }, { "epoch": 0.9755728505358446, "grad_norm": 0.08541380858566644, "learning_rate": 6.077329726076908e-06, "loss": 0.0013, "step": 148290 }, { "epoch": 0.975638638711078, "grad_norm": 0.11398740620510449, "learning_rate": 6.076769093092456e-06, "loss": 0.0016, "step": 148300 }, { "epoch": 0.9757044268863114, "grad_norm": 0.11192954343069854, "learning_rate": 6.076208445911734e-06, "loss": 0.0018, "step": 148310 }, { "epoch": 0.9757702150615448, "grad_norm": 0.02921375879695225, "learning_rate": 6.075647784542134e-06, "loss": 0.0015, "step": 148320 }, { "epoch": 0.9758360032367782, "grad_norm": 0.003682548338951044, "learning_rate": 6.075087108991045e-06, "loss": 0.002, "step": 148330 }, { "epoch": 0.9759017914120116, "grad_norm": 0.006254639227432021, "learning_rate": 6.074526419265863e-06, "loss": 0.0022, "step": 148340 }, { "epoch": 0.975967579587245, "grad_norm": 0.07048606186905579, "learning_rate": 6.073965715373976e-06, "loss": 0.0015, "step": 148350 }, { "epoch": 0.9760333677624784, "grad_norm": 0.06362737051745271, "learning_rate": 6.07340499732278e-06, "loss": 0.0009, "step": 148360 }, { "epoch": 0.9760991559377118, "grad_norm": 0.04290660356801325, "learning_rate": 6.072844265119666e-06, "loss": 0.0006, "step": 148370 }, { "epoch": 0.9761649441129452, "grad_norm": 0.04462512817991533, "learning_rate": 6.072283518772027e-06, "loss": 0.0021, "step": 148380 }, { "epoch": 0.9762307322881785, "grad_norm": 0.24198803749217895, "learning_rate": 6.071722758287256e-06, "loss": 0.0024, "step": 148390 }, { "epoch": 0.9762965204634119, "grad_norm": 0.1905328053686723, "learning_rate": 6.071161983672747e-06, "loss": 0.0017, "step": 148400 }, { "epoch": 0.9763623086386453, "grad_norm": 0.016756725239868573, "learning_rate": 6.07060119493589e-06, "loss": 0.0008, "step": 148410 }, { "epoch": 0.9764280968138787, "grad_norm": 0.062029521233115906, "learning_rate": 6.070040392084082e-06, "loss": 0.0019, "step": 148420 }, { "epoch": 0.976493884989112, "grad_norm": 0.032614016098862116, "learning_rate": 6.069479575124715e-06, "loss": 0.0011, "step": 148430 }, { "epoch": 0.9765596731643454, "grad_norm": 0.060346498208279746, "learning_rate": 6.068918744065184e-06, "loss": 0.0006, "step": 148440 }, { "epoch": 0.9766254613395788, "grad_norm": 0.02547864205670299, "learning_rate": 6.0683578989128836e-06, "loss": 0.0018, "step": 148450 }, { "epoch": 0.9766912495148122, "grad_norm": 0.09702331118854628, "learning_rate": 6.067797039675206e-06, "loss": 0.0008, "step": 148460 }, { "epoch": 0.9767570376900456, "grad_norm": 0.013714813293768028, "learning_rate": 6.067236166359547e-06, "loss": 0.0007, "step": 148470 }, { "epoch": 0.976822825865279, "grad_norm": 0.0017421979624008353, "learning_rate": 6.0666752789732995e-06, "loss": 0.0005, "step": 148480 }, { "epoch": 0.9768886140405123, "grad_norm": 0.03136274197423552, "learning_rate": 6.0661143775238604e-06, "loss": 0.0009, "step": 148490 }, { "epoch": 0.9769544022157457, "grad_norm": 0.014216014738972466, "learning_rate": 6.065553462018625e-06, "loss": 0.0007, "step": 148500 }, { "epoch": 0.9770201903909791, "grad_norm": 0.12029834866644855, "learning_rate": 6.064992532464987e-06, "loss": 0.0008, "step": 148510 }, { "epoch": 0.9770859785662125, "grad_norm": 0.04148531826540674, "learning_rate": 6.064431588870342e-06, "loss": 0.0007, "step": 148520 }, { "epoch": 0.9771517667414459, "grad_norm": 0.047599629040556764, "learning_rate": 6.063870631242086e-06, "loss": 0.0007, "step": 148530 }, { "epoch": 0.9772175549166793, "grad_norm": 0.018758744765569532, "learning_rate": 6.063309659587613e-06, "loss": 0.0008, "step": 148540 }, { "epoch": 0.9772833430919127, "grad_norm": 0.035597598125716785, "learning_rate": 6.062748673914322e-06, "loss": 0.0013, "step": 148550 }, { "epoch": 0.9773491312671461, "grad_norm": 0.054385764335021805, "learning_rate": 6.062187674229608e-06, "loss": 0.0006, "step": 148560 }, { "epoch": 0.9774149194423795, "grad_norm": 0.08084142093452527, "learning_rate": 6.061626660540867e-06, "loss": 0.0016, "step": 148570 }, { "epoch": 0.9774807076176129, "grad_norm": 0.03504555203769041, "learning_rate": 6.061065632855494e-06, "loss": 0.0007, "step": 148580 }, { "epoch": 0.9775464957928462, "grad_norm": 0.06969605816407963, "learning_rate": 6.060504591180888e-06, "loss": 0.0015, "step": 148590 }, { "epoch": 0.9776122839680795, "grad_norm": 0.07629978183580419, "learning_rate": 6.059943535524445e-06, "loss": 0.0006, "step": 148600 }, { "epoch": 0.9776780721433129, "grad_norm": 0.028054980488469064, "learning_rate": 6.059382465893561e-06, "loss": 0.0007, "step": 148610 }, { "epoch": 0.9777438603185463, "grad_norm": 0.006594259738786207, "learning_rate": 6.058821382295634e-06, "loss": 0.0013, "step": 148620 }, { "epoch": 0.9778096484937797, "grad_norm": 0.09396978570073186, "learning_rate": 6.058260284738063e-06, "loss": 0.0013, "step": 148630 }, { "epoch": 0.9778754366690131, "grad_norm": 0.04756356245206657, "learning_rate": 6.057699173228243e-06, "loss": 0.0014, "step": 148640 }, { "epoch": 0.9779412248442465, "grad_norm": 0.019239527066774995, "learning_rate": 6.0571380477735745e-06, "loss": 0.0006, "step": 148650 }, { "epoch": 0.9780070130194799, "grad_norm": 0.03915990736632607, "learning_rate": 6.0565769083814505e-06, "loss": 0.001, "step": 148660 }, { "epoch": 0.9780728011947133, "grad_norm": 0.13971691315302462, "learning_rate": 6.056015755059276e-06, "loss": 0.0019, "step": 148670 }, { "epoch": 0.9781385893699467, "grad_norm": 0.07478243597999235, "learning_rate": 6.055454587814445e-06, "loss": 0.001, "step": 148680 }, { "epoch": 0.9782043775451801, "grad_norm": 0.027607509367973525, "learning_rate": 6.054893406654355e-06, "loss": 0.001, "step": 148690 }, { "epoch": 0.9782701657204134, "grad_norm": 0.047916934370992086, "learning_rate": 6.0543322115864075e-06, "loss": 0.0012, "step": 148700 }, { "epoch": 0.9783359538956468, "grad_norm": 0.022578342146150995, "learning_rate": 6.053771002618001e-06, "loss": 0.0006, "step": 148710 }, { "epoch": 0.9784017420708802, "grad_norm": 0.08306429594671356, "learning_rate": 6.0532097797565325e-06, "loss": 0.0007, "step": 148720 }, { "epoch": 0.9784675302461135, "grad_norm": 0.015655468947646644, "learning_rate": 6.052648543009404e-06, "loss": 0.0004, "step": 148730 }, { "epoch": 0.9785333184213469, "grad_norm": 0.1324810047567792, "learning_rate": 6.0520872923840135e-06, "loss": 0.0011, "step": 148740 }, { "epoch": 0.9785991065965803, "grad_norm": 0.15431829245296755, "learning_rate": 6.05152602788776e-06, "loss": 0.0011, "step": 148750 }, { "epoch": 0.9786648947718137, "grad_norm": 0.08873959089020397, "learning_rate": 6.050964749528045e-06, "loss": 0.002, "step": 148760 }, { "epoch": 0.9787306829470471, "grad_norm": 0.0019602602356766615, "learning_rate": 6.050403457312266e-06, "loss": 0.001, "step": 148770 }, { "epoch": 0.9787964711222805, "grad_norm": 0.03764859909171684, "learning_rate": 6.049842151247826e-06, "loss": 0.0007, "step": 148780 }, { "epoch": 0.9788622592975139, "grad_norm": 0.03910642568104559, "learning_rate": 6.049280831342123e-06, "loss": 0.001, "step": 148790 }, { "epoch": 0.9789280474727472, "grad_norm": 0.10791696454520099, "learning_rate": 6.048719497602559e-06, "loss": 0.0015, "step": 148800 }, { "epoch": 0.9789938356479806, "grad_norm": 0.01968315220786576, "learning_rate": 6.048158150036534e-06, "loss": 0.0012, "step": 148810 }, { "epoch": 0.979059623823214, "grad_norm": 0.06476339497716539, "learning_rate": 6.047596788651449e-06, "loss": 0.0016, "step": 148820 }, { "epoch": 0.9791254119984474, "grad_norm": 0.0444574566721001, "learning_rate": 6.0470354134547046e-06, "loss": 0.0012, "step": 148830 }, { "epoch": 0.9791912001736808, "grad_norm": 0.07943798233055953, "learning_rate": 6.046474024453703e-06, "loss": 0.001, "step": 148840 }, { "epoch": 0.9792569883489142, "grad_norm": 0.02061840996604177, "learning_rate": 6.045912621655845e-06, "loss": 0.0034, "step": 148850 }, { "epoch": 0.9793227765241476, "grad_norm": 0.028641792467856794, "learning_rate": 6.045351205068532e-06, "loss": 0.0009, "step": 148860 }, { "epoch": 0.979388564699381, "grad_norm": 0.03175313491387364, "learning_rate": 6.044789774699167e-06, "loss": 0.0004, "step": 148870 }, { "epoch": 0.9794543528746144, "grad_norm": 0.0006594454534538075, "learning_rate": 6.044228330555151e-06, "loss": 0.0008, "step": 148880 }, { "epoch": 0.9795201410498477, "grad_norm": 0.02672450141871719, "learning_rate": 6.043666872643886e-06, "loss": 0.0007, "step": 148890 }, { "epoch": 0.979585929225081, "grad_norm": 0.0322963618250292, "learning_rate": 6.043105400972775e-06, "loss": 0.0004, "step": 148900 }, { "epoch": 0.9796517174003144, "grad_norm": 0.020993676510320733, "learning_rate": 6.042543915549222e-06, "loss": 0.0014, "step": 148910 }, { "epoch": 0.9797175055755478, "grad_norm": 0.044107219293870945, "learning_rate": 6.041982416380624e-06, "loss": 0.0008, "step": 148920 }, { "epoch": 0.9797832937507812, "grad_norm": 0.043919810026953505, "learning_rate": 6.04142090347439e-06, "loss": 0.0014, "step": 148930 }, { "epoch": 0.9798490819260146, "grad_norm": 0.04844365742202227, "learning_rate": 6.040859376837921e-06, "loss": 0.0018, "step": 148940 }, { "epoch": 0.979914870101248, "grad_norm": 0.06570675821016898, "learning_rate": 6.040297836478619e-06, "loss": 0.0014, "step": 148950 }, { "epoch": 0.9799806582764814, "grad_norm": 0.01891909162649208, "learning_rate": 6.03973628240389e-06, "loss": 0.0011, "step": 148960 }, { "epoch": 0.9800464464517148, "grad_norm": 0.04000454342951456, "learning_rate": 6.0391747146211344e-06, "loss": 0.0005, "step": 148970 }, { "epoch": 0.9801122346269482, "grad_norm": 0.02042054657804605, "learning_rate": 6.038613133137758e-06, "loss": 0.0007, "step": 148980 }, { "epoch": 0.9801780228021816, "grad_norm": 0.01588233256325463, "learning_rate": 6.038051537961165e-06, "loss": 0.0009, "step": 148990 }, { "epoch": 0.9802438109774149, "grad_norm": 0.011961247296625442, "learning_rate": 6.0374899290987585e-06, "loss": 0.0006, "step": 149000 }, { "epoch": 0.9803095991526483, "grad_norm": 0.2747015208107174, "learning_rate": 6.0369283065579446e-06, "loss": 0.0027, "step": 149010 }, { "epoch": 0.9803753873278817, "grad_norm": 0.06336046548802139, "learning_rate": 6.0363666703461255e-06, "loss": 0.0012, "step": 149020 }, { "epoch": 0.980441175503115, "grad_norm": 0.016415927818855317, "learning_rate": 6.035805020470707e-06, "loss": 0.0006, "step": 149030 }, { "epoch": 0.9805069636783484, "grad_norm": 0.04081616679787174, "learning_rate": 6.035243356939095e-06, "loss": 0.0011, "step": 149040 }, { "epoch": 0.9805727518535818, "grad_norm": 0.08270524127273375, "learning_rate": 6.03468167975869e-06, "loss": 0.0009, "step": 149050 }, { "epoch": 0.9806385400288152, "grad_norm": 0.029137184292929212, "learning_rate": 6.034119988936904e-06, "loss": 0.0008, "step": 149060 }, { "epoch": 0.9807043282040486, "grad_norm": 0.0025982042640146615, "learning_rate": 6.033558284481138e-06, "loss": 0.0006, "step": 149070 }, { "epoch": 0.980770116379282, "grad_norm": 0.05364594010034511, "learning_rate": 6.0329965663987985e-06, "loss": 0.0006, "step": 149080 }, { "epoch": 0.9808359045545154, "grad_norm": 0.20356143139373245, "learning_rate": 6.032434834697293e-06, "loss": 0.0013, "step": 149090 }, { "epoch": 0.9809016927297488, "grad_norm": 0.14649147506154592, "learning_rate": 6.0318730893840235e-06, "loss": 0.0012, "step": 149100 }, { "epoch": 0.9809674809049821, "grad_norm": 0.020630238370192334, "learning_rate": 6.0313113304664e-06, "loss": 0.0008, "step": 149110 }, { "epoch": 0.9810332690802155, "grad_norm": 0.005489898262734958, "learning_rate": 6.030749557951826e-06, "loss": 0.0009, "step": 149120 }, { "epoch": 0.9810990572554489, "grad_norm": 0.0023838725043977918, "learning_rate": 6.030187771847711e-06, "loss": 0.0011, "step": 149130 }, { "epoch": 0.9811648454306823, "grad_norm": 0.04962633254269766, "learning_rate": 6.029625972161459e-06, "loss": 0.0009, "step": 149140 }, { "epoch": 0.9812306336059157, "grad_norm": 0.07369215372309522, "learning_rate": 6.029064158900478e-06, "loss": 0.0007, "step": 149150 }, { "epoch": 0.9812964217811491, "grad_norm": 0.06098749384481914, "learning_rate": 6.028502332072174e-06, "loss": 0.0012, "step": 149160 }, { "epoch": 0.9813622099563825, "grad_norm": 0.028132892121014712, "learning_rate": 6.027940491683957e-06, "loss": 0.0007, "step": 149170 }, { "epoch": 0.9814279981316159, "grad_norm": 0.017986379912760544, "learning_rate": 6.0273786377432295e-06, "loss": 0.0017, "step": 149180 }, { "epoch": 0.9814937863068492, "grad_norm": 0.01465758750985186, "learning_rate": 6.026816770257404e-06, "loss": 0.001, "step": 149190 }, { "epoch": 0.9815595744820826, "grad_norm": 0.05230097295401693, "learning_rate": 6.0262548892338865e-06, "loss": 0.0009, "step": 149200 }, { "epoch": 0.9816253626573159, "grad_norm": 0.03915648232450159, "learning_rate": 6.025692994680083e-06, "loss": 0.0006, "step": 149210 }, { "epoch": 0.9816911508325493, "grad_norm": 0.03839843684845494, "learning_rate": 6.0251310866034055e-06, "loss": 0.0015, "step": 149220 }, { "epoch": 0.9817569390077827, "grad_norm": 0.059805375520875934, "learning_rate": 6.0245691650112585e-06, "loss": 0.002, "step": 149230 }, { "epoch": 0.9818227271830161, "grad_norm": 0.11501857149629822, "learning_rate": 6.024007229911051e-06, "loss": 0.0012, "step": 149240 }, { "epoch": 0.9818885153582495, "grad_norm": 0.024640920205744474, "learning_rate": 6.023445281310194e-06, "loss": 0.0016, "step": 149250 }, { "epoch": 0.9819543035334829, "grad_norm": 0.04324387842823576, "learning_rate": 6.022883319216095e-06, "loss": 0.0009, "step": 149260 }, { "epoch": 0.9820200917087163, "grad_norm": 0.001138086056935315, "learning_rate": 6.022321343636162e-06, "loss": 0.0009, "step": 149270 }, { "epoch": 0.9820858798839497, "grad_norm": 0.00669861079127943, "learning_rate": 6.021759354577806e-06, "loss": 0.0007, "step": 149280 }, { "epoch": 0.9821516680591831, "grad_norm": 0.00157083749565895, "learning_rate": 6.021197352048435e-06, "loss": 0.0006, "step": 149290 }, { "epoch": 0.9822174562344165, "grad_norm": 0.07677897308188034, "learning_rate": 6.0206353360554585e-06, "loss": 0.0021, "step": 149300 }, { "epoch": 0.9822832444096498, "grad_norm": 0.058904272824045874, "learning_rate": 6.0200733066062856e-06, "loss": 0.0015, "step": 149310 }, { "epoch": 0.9823490325848832, "grad_norm": 0.025745675916407185, "learning_rate": 6.019511263708328e-06, "loss": 0.001, "step": 149320 }, { "epoch": 0.9824148207601165, "grad_norm": 0.0020172931394223234, "learning_rate": 6.018949207368996e-06, "loss": 0.0013, "step": 149330 }, { "epoch": 0.9824806089353499, "grad_norm": 0.08094166307424262, "learning_rate": 6.018387137595698e-06, "loss": 0.0013, "step": 149340 }, { "epoch": 0.9825463971105833, "grad_norm": 0.020006013426331303, "learning_rate": 6.017825054395845e-06, "loss": 0.0008, "step": 149350 }, { "epoch": 0.9826121852858167, "grad_norm": 0.024044734590324777, "learning_rate": 6.017262957776848e-06, "loss": 0.0012, "step": 149360 }, { "epoch": 0.9826779734610501, "grad_norm": 0.002755643388814702, "learning_rate": 6.016700847746116e-06, "loss": 0.0009, "step": 149370 }, { "epoch": 0.9827437616362835, "grad_norm": 0.10824019602828691, "learning_rate": 6.016138724311063e-06, "loss": 0.001, "step": 149380 }, { "epoch": 0.9828095498115169, "grad_norm": 0.019978369499776136, "learning_rate": 6.015576587479098e-06, "loss": 0.001, "step": 149390 }, { "epoch": 0.9828753379867503, "grad_norm": 0.028388227077255235, "learning_rate": 6.015014437257633e-06, "loss": 0.0005, "step": 149400 }, { "epoch": 0.9829411261619836, "grad_norm": 0.047358226862407884, "learning_rate": 6.014452273654079e-06, "loss": 0.0008, "step": 149410 }, { "epoch": 0.983006914337217, "grad_norm": 0.0019394319663698753, "learning_rate": 6.013890096675848e-06, "loss": 0.0009, "step": 149420 }, { "epoch": 0.9830727025124504, "grad_norm": 0.04139202305342001, "learning_rate": 6.013327906330353e-06, "loss": 0.0014, "step": 149430 }, { "epoch": 0.9831384906876838, "grad_norm": 0.0020977071536705514, "learning_rate": 6.012765702625002e-06, "loss": 0.0007, "step": 149440 }, { "epoch": 0.9832042788629172, "grad_norm": 0.020641012446054135, "learning_rate": 6.012203485567212e-06, "loss": 0.0016, "step": 149450 }, { "epoch": 0.9832700670381506, "grad_norm": 0.08562004225206547, "learning_rate": 6.0116412551643934e-06, "loss": 0.0007, "step": 149460 }, { "epoch": 0.983335855213384, "grad_norm": 0.16441740169292132, "learning_rate": 6.011079011423957e-06, "loss": 0.0011, "step": 149470 }, { "epoch": 0.9834016433886174, "grad_norm": 0.013076149356323306, "learning_rate": 6.0105167543533185e-06, "loss": 0.0013, "step": 149480 }, { "epoch": 0.9834674315638507, "grad_norm": 0.015194402061318403, "learning_rate": 6.009954483959889e-06, "loss": 0.0021, "step": 149490 }, { "epoch": 0.9835332197390841, "grad_norm": 0.06664548805471036, "learning_rate": 6.009392200251081e-06, "loss": 0.0009, "step": 149500 }, { "epoch": 0.9835990079143175, "grad_norm": 0.0187911730230955, "learning_rate": 6.008829903234309e-06, "loss": 0.0005, "step": 149510 }, { "epoch": 0.9836647960895508, "grad_norm": 0.03777421708085607, "learning_rate": 6.008267592916985e-06, "loss": 0.0008, "step": 149520 }, { "epoch": 0.9837305842647842, "grad_norm": 0.016946300366935597, "learning_rate": 6.0077052693065264e-06, "loss": 0.0014, "step": 149530 }, { "epoch": 0.9837963724400176, "grad_norm": 0.03780276592572312, "learning_rate": 6.0071429324103425e-06, "loss": 0.0009, "step": 149540 }, { "epoch": 0.983862160615251, "grad_norm": 0.01609150945646767, "learning_rate": 6.006580582235849e-06, "loss": 0.0011, "step": 149550 }, { "epoch": 0.9839279487904844, "grad_norm": 0.04964789086955419, "learning_rate": 6.006018218790459e-06, "loss": 0.0007, "step": 149560 }, { "epoch": 0.9839937369657178, "grad_norm": 0.0062065549481363155, "learning_rate": 6.005455842081588e-06, "loss": 0.0007, "step": 149570 }, { "epoch": 0.9840595251409512, "grad_norm": 0.06316109319901832, "learning_rate": 6.00489345211665e-06, "loss": 0.0009, "step": 149580 }, { "epoch": 0.9841253133161846, "grad_norm": 0.0005734697354839335, "learning_rate": 6.00433104890306e-06, "loss": 0.0005, "step": 149590 }, { "epoch": 0.984191101491418, "grad_norm": 0.02170128504209381, "learning_rate": 6.003768632448234e-06, "loss": 0.001, "step": 149600 }, { "epoch": 0.9842568896666514, "grad_norm": 0.021613101792351853, "learning_rate": 6.003206202759585e-06, "loss": 0.0013, "step": 149610 }, { "epoch": 0.9843226778418847, "grad_norm": 0.025519120869414304, "learning_rate": 6.002643759844527e-06, "loss": 0.0011, "step": 149620 }, { "epoch": 0.984388466017118, "grad_norm": 0.08002533509899297, "learning_rate": 6.002081303710479e-06, "loss": 0.0007, "step": 149630 }, { "epoch": 0.9844542541923514, "grad_norm": 0.019591952472169393, "learning_rate": 6.001518834364852e-06, "loss": 0.0007, "step": 149640 }, { "epoch": 0.9845200423675848, "grad_norm": 0.003940516463489467, "learning_rate": 6.000956351815066e-06, "loss": 0.0013, "step": 149650 }, { "epoch": 0.9845858305428182, "grad_norm": 0.03211194963088119, "learning_rate": 6.000393856068535e-06, "loss": 0.0014, "step": 149660 }, { "epoch": 0.9846516187180516, "grad_norm": 0.0068844386478081, "learning_rate": 5.999831347132674e-06, "loss": 0.0007, "step": 149670 }, { "epoch": 0.984717406893285, "grad_norm": 0.0194627399093129, "learning_rate": 5.9992688250149e-06, "loss": 0.0008, "step": 149680 }, { "epoch": 0.9847831950685184, "grad_norm": 0.1499393087066127, "learning_rate": 5.99870628972263e-06, "loss": 0.0013, "step": 149690 }, { "epoch": 0.9848489832437518, "grad_norm": 0.06681142403949177, "learning_rate": 5.99814374126328e-06, "loss": 0.0013, "step": 149700 }, { "epoch": 0.9849147714189852, "grad_norm": 0.16891539187131593, "learning_rate": 5.997581179644268e-06, "loss": 0.0013, "step": 149710 }, { "epoch": 0.9849805595942185, "grad_norm": 0.06608645930221707, "learning_rate": 5.9970186048730086e-06, "loss": 0.0016, "step": 149720 }, { "epoch": 0.9850463477694519, "grad_norm": 0.048611640665259165, "learning_rate": 5.996456016956921e-06, "loss": 0.0014, "step": 149730 }, { "epoch": 0.9851121359446853, "grad_norm": 0.06918055962266192, "learning_rate": 5.99589341590342e-06, "loss": 0.0015, "step": 149740 }, { "epoch": 0.9851779241199187, "grad_norm": 0.001722026132167904, "learning_rate": 5.995330801719924e-06, "loss": 0.0008, "step": 149750 }, { "epoch": 0.9852437122951521, "grad_norm": 0.0578789510839966, "learning_rate": 5.994768174413853e-06, "loss": 0.0006, "step": 149760 }, { "epoch": 0.9853095004703855, "grad_norm": 0.04160413773693874, "learning_rate": 5.994205533992622e-06, "loss": 0.0011, "step": 149770 }, { "epoch": 0.9853752886456189, "grad_norm": 0.030299890284784657, "learning_rate": 5.993642880463649e-06, "loss": 0.0007, "step": 149780 }, { "epoch": 0.9854410768208522, "grad_norm": 0.0393850684211156, "learning_rate": 5.993080213834353e-06, "loss": 0.0008, "step": 149790 }, { "epoch": 0.9855068649960856, "grad_norm": 0.013318322396314811, "learning_rate": 5.992517534112153e-06, "loss": 0.0012, "step": 149800 }, { "epoch": 0.985572653171319, "grad_norm": 0.013855092537556018, "learning_rate": 5.9919548413044655e-06, "loss": 0.0007, "step": 149810 }, { "epoch": 0.9856384413465523, "grad_norm": 0.07498006135742484, "learning_rate": 5.99139213541871e-06, "loss": 0.0019, "step": 149820 }, { "epoch": 0.9857042295217857, "grad_norm": 0.019517310754652125, "learning_rate": 5.990829416462306e-06, "loss": 0.0018, "step": 149830 }, { "epoch": 0.9857700176970191, "grad_norm": 0.013664440562580735, "learning_rate": 5.990266684442671e-06, "loss": 0.0012, "step": 149840 }, { "epoch": 0.9858358058722525, "grad_norm": 0.11858737922337732, "learning_rate": 5.989703939367227e-06, "loss": 0.0009, "step": 149850 }, { "epoch": 0.9859015940474859, "grad_norm": 0.06526145092912174, "learning_rate": 5.98914118124339e-06, "loss": 0.0004, "step": 149860 }, { "epoch": 0.9859673822227193, "grad_norm": 0.035840211378152666, "learning_rate": 5.988578410078582e-06, "loss": 0.0017, "step": 149870 }, { "epoch": 0.9860331703979527, "grad_norm": 0.00872506985465383, "learning_rate": 5.988015625880219e-06, "loss": 0.0003, "step": 149880 }, { "epoch": 0.9860989585731861, "grad_norm": 0.02409932005937151, "learning_rate": 5.987452828655724e-06, "loss": 0.0008, "step": 149890 }, { "epoch": 0.9861647467484195, "grad_norm": 0.03530945174789386, "learning_rate": 5.986890018412518e-06, "loss": 0.0005, "step": 149900 }, { "epoch": 0.9862305349236529, "grad_norm": 0.017424277799980525, "learning_rate": 5.9863271951580185e-06, "loss": 0.0012, "step": 149910 }, { "epoch": 0.9862963230988862, "grad_norm": 0.04389665098175548, "learning_rate": 5.9857643588996464e-06, "loss": 0.0007, "step": 149920 }, { "epoch": 0.9863621112741195, "grad_norm": 0.0475756410279884, "learning_rate": 5.985201509644823e-06, "loss": 0.0011, "step": 149930 }, { "epoch": 0.9864278994493529, "grad_norm": 0.001998290001061678, "learning_rate": 5.984638647400969e-06, "loss": 0.0014, "step": 149940 }, { "epoch": 0.9864936876245863, "grad_norm": 0.06958201016842147, "learning_rate": 5.9840757721755025e-06, "loss": 0.0014, "step": 149950 }, { "epoch": 0.9865594757998197, "grad_norm": 0.0464501309317992, "learning_rate": 5.9835128839758495e-06, "loss": 0.0003, "step": 149960 }, { "epoch": 0.9866252639750531, "grad_norm": 0.08634154542442556, "learning_rate": 5.982949982809427e-06, "loss": 0.0016, "step": 149970 }, { "epoch": 0.9866910521502865, "grad_norm": 0.012957747457845748, "learning_rate": 5.9823870686836595e-06, "loss": 0.0006, "step": 149980 }, { "epoch": 0.9867568403255199, "grad_norm": 0.04970846320376538, "learning_rate": 5.981824141605965e-06, "loss": 0.001, "step": 149990 }, { "epoch": 0.9868226285007533, "grad_norm": 0.0505089348214212, "learning_rate": 5.981261201583769e-06, "loss": 0.001, "step": 150000 }, { "epoch": 0.9868226285007533, "eval_loss": 0.0007096146000549197, "eval_runtime": 13.0959, "eval_samples_per_second": 15.272, "eval_steps_per_second": 7.636, "step": 150000 }, { "epoch": 0.9868884166759867, "grad_norm": 0.11204326889038478, "learning_rate": 5.980698248624489e-06, "loss": 0.001, "step": 150010 }, { "epoch": 0.9869542048512201, "grad_norm": 0.020696736079273973, "learning_rate": 5.980135282735552e-06, "loss": 0.0028, "step": 150020 }, { "epoch": 0.9870199930264534, "grad_norm": 0.20826040606416146, "learning_rate": 5.979572303924377e-06, "loss": 0.0009, "step": 150030 }, { "epoch": 0.9870857812016868, "grad_norm": 0.039860482338878934, "learning_rate": 5.9790093121983876e-06, "loss": 0.001, "step": 150040 }, { "epoch": 0.9871515693769202, "grad_norm": 0.013956568726111812, "learning_rate": 5.9784463075650055e-06, "loss": 0.0019, "step": 150050 }, { "epoch": 0.9872173575521536, "grad_norm": 0.03485580348784299, "learning_rate": 5.977883290031654e-06, "loss": 0.0008, "step": 150060 }, { "epoch": 0.987283145727387, "grad_norm": 0.04422707794164683, "learning_rate": 5.977320259605756e-06, "loss": 0.0006, "step": 150070 }, { "epoch": 0.9873489339026204, "grad_norm": 0.057772054658321324, "learning_rate": 5.976757216294733e-06, "loss": 0.0018, "step": 150080 }, { "epoch": 0.9874147220778537, "grad_norm": 0.03618007008200256, "learning_rate": 5.976194160106012e-06, "loss": 0.0011, "step": 150090 }, { "epoch": 0.9874805102530871, "grad_norm": 0.014381682809589558, "learning_rate": 5.975631091047013e-06, "loss": 0.0004, "step": 150100 }, { "epoch": 0.9875462984283205, "grad_norm": 0.0744773525940724, "learning_rate": 5.97506800912516e-06, "loss": 0.0012, "step": 150110 }, { "epoch": 0.9876120866035539, "grad_norm": 0.02543530418552563, "learning_rate": 5.974504914347878e-06, "loss": 0.0006, "step": 150120 }, { "epoch": 0.9876778747787872, "grad_norm": 0.008446261545162384, "learning_rate": 5.973941806722592e-06, "loss": 0.0007, "step": 150130 }, { "epoch": 0.9877436629540206, "grad_norm": 0.06796785997416765, "learning_rate": 5.9733786862567215e-06, "loss": 0.0008, "step": 150140 }, { "epoch": 0.987809451129254, "grad_norm": 0.009688571421883783, "learning_rate": 5.972815552957695e-06, "loss": 0.0022, "step": 150150 }, { "epoch": 0.9878752393044874, "grad_norm": 0.12742518705492378, "learning_rate": 5.972252406832936e-06, "loss": 0.0011, "step": 150160 }, { "epoch": 0.9879410274797208, "grad_norm": 0.046286578985212254, "learning_rate": 5.971689247889868e-06, "loss": 0.0013, "step": 150170 }, { "epoch": 0.9880068156549542, "grad_norm": 0.021686436132758915, "learning_rate": 5.971126076135917e-06, "loss": 0.0009, "step": 150180 }, { "epoch": 0.9880726038301876, "grad_norm": 0.0018979301610633085, "learning_rate": 5.970562891578508e-06, "loss": 0.002, "step": 150190 }, { "epoch": 0.988138392005421, "grad_norm": 0.011126929014736405, "learning_rate": 5.969999694225065e-06, "loss": 0.0006, "step": 150200 }, { "epoch": 0.9882041801806544, "grad_norm": 0.0770999546983208, "learning_rate": 5.969436484083012e-06, "loss": 0.001, "step": 150210 }, { "epoch": 0.9882699683558878, "grad_norm": 0.0638528612688433, "learning_rate": 5.9688732611597775e-06, "loss": 0.0012, "step": 150220 }, { "epoch": 0.988335756531121, "grad_norm": 0.09942311891108374, "learning_rate": 5.968310025462787e-06, "loss": 0.0016, "step": 150230 }, { "epoch": 0.9884015447063544, "grad_norm": 0.07051370674722122, "learning_rate": 5.967746776999465e-06, "loss": 0.0009, "step": 150240 }, { "epoch": 0.9884673328815878, "grad_norm": 0.014753933538024391, "learning_rate": 5.967183515777236e-06, "loss": 0.0019, "step": 150250 }, { "epoch": 0.9885331210568212, "grad_norm": 0.0009456336400821158, "learning_rate": 5.966620241803529e-06, "loss": 0.0006, "step": 150260 }, { "epoch": 0.9885989092320546, "grad_norm": 0.031109287912291072, "learning_rate": 5.966056955085767e-06, "loss": 0.0006, "step": 150270 }, { "epoch": 0.988664697407288, "grad_norm": 0.035074444718595756, "learning_rate": 5.96549365563138e-06, "loss": 0.0009, "step": 150280 }, { "epoch": 0.9887304855825214, "grad_norm": 0.06176072641102215, "learning_rate": 5.964930343447793e-06, "loss": 0.0008, "step": 150290 }, { "epoch": 0.9887962737577548, "grad_norm": 0.14599503532949845, "learning_rate": 5.964367018542433e-06, "loss": 0.0022, "step": 150300 }, { "epoch": 0.9888620619329882, "grad_norm": 0.01757113273849502, "learning_rate": 5.9638036809227265e-06, "loss": 0.0006, "step": 150310 }, { "epoch": 0.9889278501082216, "grad_norm": 0.00570876535593316, "learning_rate": 5.963240330596101e-06, "loss": 0.0012, "step": 150320 }, { "epoch": 0.9889936382834549, "grad_norm": 0.09417864589166444, "learning_rate": 5.9626769675699825e-06, "loss": 0.0012, "step": 150330 }, { "epoch": 0.9890594264586883, "grad_norm": 0.011818971055949975, "learning_rate": 5.9621135918518e-06, "loss": 0.0004, "step": 150340 }, { "epoch": 0.9891252146339217, "grad_norm": 0.07255388319547564, "learning_rate": 5.961550203448981e-06, "loss": 0.0012, "step": 150350 }, { "epoch": 0.9891910028091551, "grad_norm": 0.07674541068771033, "learning_rate": 5.9609868023689535e-06, "loss": 0.0009, "step": 150360 }, { "epoch": 0.9892567909843885, "grad_norm": 0.06820381582039266, "learning_rate": 5.960423388619144e-06, "loss": 0.0016, "step": 150370 }, { "epoch": 0.9893225791596219, "grad_norm": 0.03686466114177442, "learning_rate": 5.959859962206983e-06, "loss": 0.001, "step": 150380 }, { "epoch": 0.9893883673348552, "grad_norm": 0.008761469958940619, "learning_rate": 5.959296523139897e-06, "loss": 0.0016, "step": 150390 }, { "epoch": 0.9894541555100886, "grad_norm": 0.0003772801334458959, "learning_rate": 5.9587330714253135e-06, "loss": 0.0005, "step": 150400 }, { "epoch": 0.989519943685322, "grad_norm": 0.0018161434380566444, "learning_rate": 5.958169607070663e-06, "loss": 0.0006, "step": 150410 }, { "epoch": 0.9895857318605554, "grad_norm": 0.05618526124490584, "learning_rate": 5.957606130083375e-06, "loss": 0.001, "step": 150420 }, { "epoch": 0.9896515200357887, "grad_norm": 0.1341988703490467, "learning_rate": 5.957042640470876e-06, "loss": 0.0012, "step": 150430 }, { "epoch": 0.9897173082110221, "grad_norm": 0.04321271098413266, "learning_rate": 5.956479138240597e-06, "loss": 0.0008, "step": 150440 }, { "epoch": 0.9897830963862555, "grad_norm": 0.16550603304015055, "learning_rate": 5.955915623399967e-06, "loss": 0.0012, "step": 150450 }, { "epoch": 0.9898488845614889, "grad_norm": 0.03947897328019137, "learning_rate": 5.955352095956415e-06, "loss": 0.0013, "step": 150460 }, { "epoch": 0.9899146727367223, "grad_norm": 0.18168971045998883, "learning_rate": 5.954788555917368e-06, "loss": 0.0017, "step": 150470 }, { "epoch": 0.9899804609119557, "grad_norm": 0.03204932835661022, "learning_rate": 5.954225003290262e-06, "loss": 0.0009, "step": 150480 }, { "epoch": 0.9900462490871891, "grad_norm": 0.05246857719680601, "learning_rate": 5.953661438082522e-06, "loss": 0.0014, "step": 150490 }, { "epoch": 0.9901120372624225, "grad_norm": 0.07950317410709914, "learning_rate": 5.95309786030158e-06, "loss": 0.0015, "step": 150500 }, { "epoch": 0.9901778254376559, "grad_norm": 0.024669247750314264, "learning_rate": 5.952534269954865e-06, "loss": 0.0017, "step": 150510 }, { "epoch": 0.9902436136128893, "grad_norm": 0.06833728428608866, "learning_rate": 5.951970667049809e-06, "loss": 0.002, "step": 150520 }, { "epoch": 0.9903094017881227, "grad_norm": 0.039120886662691194, "learning_rate": 5.95140705159384e-06, "loss": 0.001, "step": 150530 }, { "epoch": 0.9903751899633559, "grad_norm": 0.03499753220660108, "learning_rate": 5.950843423594394e-06, "loss": 0.0007, "step": 150540 }, { "epoch": 0.9904409781385893, "grad_norm": 0.01879255990075291, "learning_rate": 5.950279783058897e-06, "loss": 0.0005, "step": 150550 }, { "epoch": 0.9905067663138227, "grad_norm": 0.016604651760491407, "learning_rate": 5.949716129994784e-06, "loss": 0.0015, "step": 150560 }, { "epoch": 0.9905725544890561, "grad_norm": 0.11085368775723506, "learning_rate": 5.949152464409481e-06, "loss": 0.0019, "step": 150570 }, { "epoch": 0.9906383426642895, "grad_norm": 0.006250053483661672, "learning_rate": 5.948588786310423e-06, "loss": 0.001, "step": 150580 }, { "epoch": 0.9907041308395229, "grad_norm": 0.032275318350176406, "learning_rate": 5.948025095705041e-06, "loss": 0.0007, "step": 150590 }, { "epoch": 0.9907699190147563, "grad_norm": 0.027738381467231756, "learning_rate": 5.947461392600768e-06, "loss": 0.001, "step": 150600 }, { "epoch": 0.9908357071899897, "grad_norm": 0.03511744304009156, "learning_rate": 5.9468976770050345e-06, "loss": 0.0011, "step": 150610 }, { "epoch": 0.9909014953652231, "grad_norm": 0.03246560127259682, "learning_rate": 5.9463339489252734e-06, "loss": 0.0016, "step": 150620 }, { "epoch": 0.9909672835404565, "grad_norm": 0.009285416376201873, "learning_rate": 5.945770208368916e-06, "loss": 0.0004, "step": 150630 }, { "epoch": 0.9910330717156898, "grad_norm": 0.26907942737509055, "learning_rate": 5.945206455343395e-06, "loss": 0.0025, "step": 150640 }, { "epoch": 0.9910988598909232, "grad_norm": 0.0304290940941273, "learning_rate": 5.944642689856143e-06, "loss": 0.001, "step": 150650 }, { "epoch": 0.9911646480661566, "grad_norm": 0.034659036052047947, "learning_rate": 5.944078911914592e-06, "loss": 0.0017, "step": 150660 }, { "epoch": 0.99123043624139, "grad_norm": 0.046780570240577385, "learning_rate": 5.943515121526178e-06, "loss": 0.0016, "step": 150670 }, { "epoch": 0.9912962244166234, "grad_norm": 0.16805190326930708, "learning_rate": 5.9429513186983316e-06, "loss": 0.0017, "step": 150680 }, { "epoch": 0.9913620125918567, "grad_norm": 0.10581575387405129, "learning_rate": 5.942387503438486e-06, "loss": 0.0015, "step": 150690 }, { "epoch": 0.9914278007670901, "grad_norm": 0.02002742819105592, "learning_rate": 5.9418236757540745e-06, "loss": 0.0009, "step": 150700 }, { "epoch": 0.9914935889423235, "grad_norm": 0.04587384540880996, "learning_rate": 5.941259835652531e-06, "loss": 0.0013, "step": 150710 }, { "epoch": 0.9915593771175569, "grad_norm": 0.01070717072806077, "learning_rate": 5.940695983141291e-06, "loss": 0.001, "step": 150720 }, { "epoch": 0.9916251652927903, "grad_norm": 0.0033995089176526204, "learning_rate": 5.940132118227786e-06, "loss": 0.0008, "step": 150730 }, { "epoch": 0.9916909534680236, "grad_norm": 0.016263692025589967, "learning_rate": 5.939568240919451e-06, "loss": 0.0006, "step": 150740 }, { "epoch": 0.991756741643257, "grad_norm": 0.023145594211851726, "learning_rate": 5.9390043512237205e-06, "loss": 0.0011, "step": 150750 }, { "epoch": 0.9918225298184904, "grad_norm": 0.01925478164127124, "learning_rate": 5.9384404491480294e-06, "loss": 0.0013, "step": 150760 }, { "epoch": 0.9918883179937238, "grad_norm": 0.01348792661512682, "learning_rate": 5.93787653469981e-06, "loss": 0.0003, "step": 150770 }, { "epoch": 0.9919541061689572, "grad_norm": 0.04904816059101131, "learning_rate": 5.937312607886498e-06, "loss": 0.0006, "step": 150780 }, { "epoch": 0.9920198943441906, "grad_norm": 0.027089300600823873, "learning_rate": 5.9367486687155304e-06, "loss": 0.0012, "step": 150790 }, { "epoch": 0.992085682519424, "grad_norm": 0.014052957450269977, "learning_rate": 5.93618471719434e-06, "loss": 0.0013, "step": 150800 }, { "epoch": 0.9921514706946574, "grad_norm": 0.045893131034137544, "learning_rate": 5.935620753330363e-06, "loss": 0.0007, "step": 150810 }, { "epoch": 0.9922172588698908, "grad_norm": 0.03376785374178372, "learning_rate": 5.935056777131034e-06, "loss": 0.0006, "step": 150820 }, { "epoch": 0.9922830470451242, "grad_norm": 0.006344750758925149, "learning_rate": 5.93449278860379e-06, "loss": 0.0004, "step": 150830 }, { "epoch": 0.9923488352203574, "grad_norm": 0.011724410829091484, "learning_rate": 5.933928787756064e-06, "loss": 0.0009, "step": 150840 }, { "epoch": 0.9924146233955908, "grad_norm": 0.07099679985624799, "learning_rate": 5.933364774595294e-06, "loss": 0.0013, "step": 150850 }, { "epoch": 0.9924804115708242, "grad_norm": 0.01520624098593395, "learning_rate": 5.932800749128916e-06, "loss": 0.0006, "step": 150860 }, { "epoch": 0.9925461997460576, "grad_norm": 0.048335132941008156, "learning_rate": 5.932236711364365e-06, "loss": 0.0014, "step": 150870 }, { "epoch": 0.992611987921291, "grad_norm": 0.015762629178372352, "learning_rate": 5.931672661309079e-06, "loss": 0.0017, "step": 150880 }, { "epoch": 0.9926777760965244, "grad_norm": 0.1083838288847813, "learning_rate": 5.931108598970494e-06, "loss": 0.0015, "step": 150890 }, { "epoch": 0.9927435642717578, "grad_norm": 0.014024465546206706, "learning_rate": 5.930544524356046e-06, "loss": 0.0016, "step": 150900 }, { "epoch": 0.9928093524469912, "grad_norm": 0.007889010672745668, "learning_rate": 5.9299804374731704e-06, "loss": 0.0009, "step": 150910 }, { "epoch": 0.9928751406222246, "grad_norm": 0.030707171314620484, "learning_rate": 5.929416338329308e-06, "loss": 0.0009, "step": 150920 }, { "epoch": 0.992940928797458, "grad_norm": 0.07784283454009429, "learning_rate": 5.928852226931892e-06, "loss": 0.0016, "step": 150930 }, { "epoch": 0.9930067169726914, "grad_norm": 0.07971882038828872, "learning_rate": 5.928288103288363e-06, "loss": 0.0014, "step": 150940 }, { "epoch": 0.9930725051479247, "grad_norm": 0.06307796462409075, "learning_rate": 5.927723967406157e-06, "loss": 0.0012, "step": 150950 }, { "epoch": 0.9931382933231581, "grad_norm": 0.02774090187233673, "learning_rate": 5.9271598192927115e-06, "loss": 0.0013, "step": 150960 }, { "epoch": 0.9932040814983915, "grad_norm": 0.015970696917310245, "learning_rate": 5.9265956589554645e-06, "loss": 0.001, "step": 150970 }, { "epoch": 0.9932698696736249, "grad_norm": 0.045912218375564376, "learning_rate": 5.926031486401853e-06, "loss": 0.0008, "step": 150980 }, { "epoch": 0.9933356578488582, "grad_norm": 0.004514561991708215, "learning_rate": 5.925467301639319e-06, "loss": 0.0007, "step": 150990 }, { "epoch": 0.9934014460240916, "grad_norm": 0.03854421056823652, "learning_rate": 5.9249031046752945e-06, "loss": 0.0019, "step": 151000 }, { "epoch": 0.993467234199325, "grad_norm": 0.07437366942982565, "learning_rate": 5.924338895517223e-06, "loss": 0.0015, "step": 151010 }, { "epoch": 0.9935330223745584, "grad_norm": 0.017144231442772176, "learning_rate": 5.9237746741725425e-06, "loss": 0.0046, "step": 151020 }, { "epoch": 0.9935988105497918, "grad_norm": 0.02269433972212841, "learning_rate": 5.92321044064869e-06, "loss": 0.0009, "step": 151030 }, { "epoch": 0.9936645987250252, "grad_norm": 0.029348130813424297, "learning_rate": 5.922646194953103e-06, "loss": 0.0008, "step": 151040 }, { "epoch": 0.9937303869002585, "grad_norm": 0.010380504266250281, "learning_rate": 5.922081937093225e-06, "loss": 0.001, "step": 151050 }, { "epoch": 0.9937961750754919, "grad_norm": 0.03691874411020103, "learning_rate": 5.921517667076493e-06, "loss": 0.0015, "step": 151060 }, { "epoch": 0.9938619632507253, "grad_norm": 0.1045519687971741, "learning_rate": 5.920953384910346e-06, "loss": 0.0008, "step": 151070 }, { "epoch": 0.9939277514259587, "grad_norm": 0.0576580959432977, "learning_rate": 5.9203890906022245e-06, "loss": 0.0026, "step": 151080 }, { "epoch": 0.9939935396011921, "grad_norm": 0.027988764980980237, "learning_rate": 5.919824784159567e-06, "loss": 0.001, "step": 151090 }, { "epoch": 0.9940593277764255, "grad_norm": 0.05540608138492022, "learning_rate": 5.919260465589813e-06, "loss": 0.0008, "step": 151100 }, { "epoch": 0.9941251159516589, "grad_norm": 0.050327568214692024, "learning_rate": 5.918696134900406e-06, "loss": 0.0014, "step": 151110 }, { "epoch": 0.9941909041268923, "grad_norm": 0.057710929408813436, "learning_rate": 5.918131792098782e-06, "loss": 0.0013, "step": 151120 }, { "epoch": 0.9942566923021257, "grad_norm": 0.0008709629458892345, "learning_rate": 5.917567437192385e-06, "loss": 0.0006, "step": 151130 }, { "epoch": 0.994322480477359, "grad_norm": 0.0721088288935568, "learning_rate": 5.9170030701886535e-06, "loss": 0.0033, "step": 151140 }, { "epoch": 0.9943882686525923, "grad_norm": 0.05601136995622883, "learning_rate": 5.9164386910950274e-06, "loss": 0.0008, "step": 151150 }, { "epoch": 0.9944540568278257, "grad_norm": 0.06995133651055303, "learning_rate": 5.91587429991895e-06, "loss": 0.0006, "step": 151160 }, { "epoch": 0.9945198450030591, "grad_norm": 0.1206174952764591, "learning_rate": 5.91530989666786e-06, "loss": 0.0014, "step": 151170 }, { "epoch": 0.9945856331782925, "grad_norm": 0.1616169036577152, "learning_rate": 5.9147454813492e-06, "loss": 0.0012, "step": 151180 }, { "epoch": 0.9946514213535259, "grad_norm": 0.020776439491135515, "learning_rate": 5.91418105397041e-06, "loss": 0.0022, "step": 151190 }, { "epoch": 0.9947172095287593, "grad_norm": 0.039648025669612966, "learning_rate": 5.913616614538934e-06, "loss": 0.0009, "step": 151200 }, { "epoch": 0.9947829977039927, "grad_norm": 0.008860700430828556, "learning_rate": 5.913052163062211e-06, "loss": 0.0017, "step": 151210 }, { "epoch": 0.9948487858792261, "grad_norm": 0.06702377101585263, "learning_rate": 5.912487699547683e-06, "loss": 0.0013, "step": 151220 }, { "epoch": 0.9949145740544595, "grad_norm": 0.06768443467968041, "learning_rate": 5.911923224002793e-06, "loss": 0.001, "step": 151230 }, { "epoch": 0.9949803622296929, "grad_norm": 0.005199931632665758, "learning_rate": 5.911358736434984e-06, "loss": 0.0007, "step": 151240 }, { "epoch": 0.9950461504049262, "grad_norm": 0.01815786508843248, "learning_rate": 5.910794236851697e-06, "loss": 0.0009, "step": 151250 }, { "epoch": 0.9951119385801596, "grad_norm": 0.01718969929023371, "learning_rate": 5.910229725260374e-06, "loss": 0.0011, "step": 151260 }, { "epoch": 0.995177726755393, "grad_norm": 0.04710566715331001, "learning_rate": 5.909665201668459e-06, "loss": 0.0009, "step": 151270 }, { "epoch": 0.9952435149306263, "grad_norm": 0.007739319596808023, "learning_rate": 5.9091006660833925e-06, "loss": 0.0007, "step": 151280 }, { "epoch": 0.9953093031058597, "grad_norm": 0.05105378819182339, "learning_rate": 5.90853611851262e-06, "loss": 0.0011, "step": 151290 }, { "epoch": 0.9953750912810931, "grad_norm": 0.01118374975862889, "learning_rate": 5.9079715589635825e-06, "loss": 0.0005, "step": 151300 }, { "epoch": 0.9954408794563265, "grad_norm": 0.05778901181024675, "learning_rate": 5.9074069874437246e-06, "loss": 0.0003, "step": 151310 }, { "epoch": 0.9955066676315599, "grad_norm": 0.020846082471481685, "learning_rate": 5.90684240396049e-06, "loss": 0.0005, "step": 151320 }, { "epoch": 0.9955724558067933, "grad_norm": 0.031722105755949276, "learning_rate": 5.906277808521321e-06, "loss": 0.0005, "step": 151330 }, { "epoch": 0.9956382439820267, "grad_norm": 0.07114023889993447, "learning_rate": 5.905713201133662e-06, "loss": 0.0007, "step": 151340 }, { "epoch": 0.99570403215726, "grad_norm": 0.064769281268409, "learning_rate": 5.905148581804956e-06, "loss": 0.0012, "step": 151350 }, { "epoch": 0.9957698203324934, "grad_norm": 0.036166960321796456, "learning_rate": 5.904583950542647e-06, "loss": 0.0024, "step": 151360 }, { "epoch": 0.9958356085077268, "grad_norm": 0.025930378176869627, "learning_rate": 5.904019307354182e-06, "loss": 0.001, "step": 151370 }, { "epoch": 0.9959013966829602, "grad_norm": 0.11537296359353369, "learning_rate": 5.903454652247001e-06, "loss": 0.0011, "step": 151380 }, { "epoch": 0.9959671848581936, "grad_norm": 0.03690770237647177, "learning_rate": 5.902889985228553e-06, "loss": 0.0008, "step": 151390 }, { "epoch": 0.996032973033427, "grad_norm": 0.031807892512489465, "learning_rate": 5.902325306306279e-06, "loss": 0.0009, "step": 151400 }, { "epoch": 0.9960987612086604, "grad_norm": 0.05379049103854773, "learning_rate": 5.901760615487626e-06, "loss": 0.0013, "step": 151410 }, { "epoch": 0.9961645493838938, "grad_norm": 0.0002330046566767093, "learning_rate": 5.901195912780038e-06, "loss": 0.0014, "step": 151420 }, { "epoch": 0.9962303375591272, "grad_norm": 0.008505808128579657, "learning_rate": 5.900631198190958e-06, "loss": 0.0006, "step": 151430 }, { "epoch": 0.9962961257343605, "grad_norm": 0.0634342802621661, "learning_rate": 5.900066471727836e-06, "loss": 0.0011, "step": 151440 }, { "epoch": 0.9963619139095939, "grad_norm": 0.016902284211496584, "learning_rate": 5.899501733398114e-06, "loss": 0.001, "step": 151450 }, { "epoch": 0.9964277020848272, "grad_norm": 0.04234760851937986, "learning_rate": 5.898936983209238e-06, "loss": 0.0031, "step": 151460 }, { "epoch": 0.9964934902600606, "grad_norm": 0.000923886778217873, "learning_rate": 5.898372221168655e-06, "loss": 0.0005, "step": 151470 }, { "epoch": 0.996559278435294, "grad_norm": 0.015498783925908123, "learning_rate": 5.8978074472838095e-06, "loss": 0.0006, "step": 151480 }, { "epoch": 0.9966250666105274, "grad_norm": 0.01599402915002757, "learning_rate": 5.897242661562149e-06, "loss": 0.0009, "step": 151490 }, { "epoch": 0.9966908547857608, "grad_norm": 0.05542293235295157, "learning_rate": 5.89667786401112e-06, "loss": 0.0014, "step": 151500 }, { "epoch": 0.9967566429609942, "grad_norm": 0.029257893116802314, "learning_rate": 5.896113054638166e-06, "loss": 0.0008, "step": 151510 }, { "epoch": 0.9968224311362276, "grad_norm": 0.010993022898579697, "learning_rate": 5.8955482334507355e-06, "loss": 0.001, "step": 151520 }, { "epoch": 0.996888219311461, "grad_norm": 0.07545818826381057, "learning_rate": 5.894983400456276e-06, "loss": 0.0019, "step": 151530 }, { "epoch": 0.9969540074866944, "grad_norm": 0.031227515635963248, "learning_rate": 5.894418555662231e-06, "loss": 0.0007, "step": 151540 }, { "epoch": 0.9970197956619278, "grad_norm": 0.009948423502253004, "learning_rate": 5.893853699076051e-06, "loss": 0.0029, "step": 151550 }, { "epoch": 0.9970855838371611, "grad_norm": 0.02343566460254393, "learning_rate": 5.893288830705182e-06, "loss": 0.0009, "step": 151560 }, { "epoch": 0.9971513720123945, "grad_norm": 0.0030933868286806485, "learning_rate": 5.892723950557072e-06, "loss": 0.0006, "step": 151570 }, { "epoch": 0.9972171601876278, "grad_norm": 0.0046322108342240036, "learning_rate": 5.892159058639167e-06, "loss": 0.0012, "step": 151580 }, { "epoch": 0.9972829483628612, "grad_norm": 0.016269969521048682, "learning_rate": 5.891594154958916e-06, "loss": 0.0008, "step": 151590 }, { "epoch": 0.9973487365380946, "grad_norm": 0.04780500142603286, "learning_rate": 5.891029239523767e-06, "loss": 0.0007, "step": 151600 }, { "epoch": 0.997414524713328, "grad_norm": 0.01770468764851282, "learning_rate": 5.8904643123411644e-06, "loss": 0.0009, "step": 151610 }, { "epoch": 0.9974803128885614, "grad_norm": 0.016281412978858663, "learning_rate": 5.889899373418561e-06, "loss": 0.0007, "step": 151620 }, { "epoch": 0.9975461010637948, "grad_norm": 0.06089420943035112, "learning_rate": 5.889334422763402e-06, "loss": 0.0009, "step": 151630 }, { "epoch": 0.9976118892390282, "grad_norm": 0.021739306470147417, "learning_rate": 5.888769460383137e-06, "loss": 0.0011, "step": 151640 }, { "epoch": 0.9976776774142616, "grad_norm": 0.0035569011297613452, "learning_rate": 5.888204486285214e-06, "loss": 0.0009, "step": 151650 }, { "epoch": 0.9977434655894949, "grad_norm": 0.04528194416475341, "learning_rate": 5.887639500477081e-06, "loss": 0.0008, "step": 151660 }, { "epoch": 0.9978092537647283, "grad_norm": 0.0015318934077753754, "learning_rate": 5.887074502966189e-06, "loss": 0.0007, "step": 151670 }, { "epoch": 0.9978750419399617, "grad_norm": 0.0234863992985171, "learning_rate": 5.886509493759985e-06, "loss": 0.001, "step": 151680 }, { "epoch": 0.9979408301151951, "grad_norm": 0.0005798786971997005, "learning_rate": 5.8859444728659195e-06, "loss": 0.0004, "step": 151690 }, { "epoch": 0.9980066182904285, "grad_norm": 0.04139380535419651, "learning_rate": 5.885379440291442e-06, "loss": 0.001, "step": 151700 }, { "epoch": 0.9980724064656619, "grad_norm": 0.028738184251771357, "learning_rate": 5.884814396044001e-06, "loss": 0.0015, "step": 151710 }, { "epoch": 0.9981381946408953, "grad_norm": 0.04923825907418125, "learning_rate": 5.884249340131046e-06, "loss": 0.0007, "step": 151720 }, { "epoch": 0.9982039828161287, "grad_norm": 0.007846187562308286, "learning_rate": 5.8836842725600275e-06, "loss": 0.0009, "step": 151730 }, { "epoch": 0.998269770991362, "grad_norm": 0.051680408746667815, "learning_rate": 5.883119193338393e-06, "loss": 0.0015, "step": 151740 }, { "epoch": 0.9983355591665954, "grad_norm": 0.06639745954845565, "learning_rate": 5.882554102473596e-06, "loss": 0.0007, "step": 151750 }, { "epoch": 0.9984013473418287, "grad_norm": 0.045692956785321405, "learning_rate": 5.881988999973087e-06, "loss": 0.001, "step": 151760 }, { "epoch": 0.9984671355170621, "grad_norm": 0.0074168092961900195, "learning_rate": 5.881423885844314e-06, "loss": 0.0016, "step": 151770 }, { "epoch": 0.9985329236922955, "grad_norm": 0.04427959431422592, "learning_rate": 5.880858760094727e-06, "loss": 0.001, "step": 151780 }, { "epoch": 0.9985987118675289, "grad_norm": 0.025977585430074786, "learning_rate": 5.88029362273178e-06, "loss": 0.0007, "step": 151790 }, { "epoch": 0.9986645000427623, "grad_norm": 0.027437723140047558, "learning_rate": 5.879728473762919e-06, "loss": 0.001, "step": 151800 }, { "epoch": 0.9987302882179957, "grad_norm": 0.06308328240647851, "learning_rate": 5.879163313195599e-06, "loss": 0.0019, "step": 151810 }, { "epoch": 0.9987960763932291, "grad_norm": 0.003987388301180326, "learning_rate": 5.878598141037272e-06, "loss": 0.0025, "step": 151820 }, { "epoch": 0.9988618645684625, "grad_norm": 0.10341905729024273, "learning_rate": 5.878032957295385e-06, "loss": 0.0008, "step": 151830 }, { "epoch": 0.9989276527436959, "grad_norm": 0.03610760231572761, "learning_rate": 5.877467761977393e-06, "loss": 0.0014, "step": 151840 }, { "epoch": 0.9989934409189293, "grad_norm": 0.013465267019669547, "learning_rate": 5.8769025550907465e-06, "loss": 0.0008, "step": 151850 }, { "epoch": 0.9990592290941626, "grad_norm": 0.055609303401753345, "learning_rate": 5.876337336642897e-06, "loss": 0.004, "step": 151860 }, { "epoch": 0.999125017269396, "grad_norm": 0.021350772618138284, "learning_rate": 5.875772106641295e-06, "loss": 0.0005, "step": 151870 }, { "epoch": 0.9991908054446293, "grad_norm": 0.04263910375500955, "learning_rate": 5.8752068650933954e-06, "loss": 0.0013, "step": 151880 }, { "epoch": 0.9992565936198627, "grad_norm": 0.025920305478035695, "learning_rate": 5.8746416120066494e-06, "loss": 0.0011, "step": 151890 }, { "epoch": 0.9993223817950961, "grad_norm": 0.03423612846665298, "learning_rate": 5.874076347388509e-06, "loss": 0.0017, "step": 151900 }, { "epoch": 0.9993881699703295, "grad_norm": 0.029463865519282144, "learning_rate": 5.873511071246427e-06, "loss": 0.0008, "step": 151910 }, { "epoch": 0.9994539581455629, "grad_norm": 0.0391377464598126, "learning_rate": 5.872945783587856e-06, "loss": 0.0008, "step": 151920 }, { "epoch": 0.9995197463207963, "grad_norm": 0.05453145216396577, "learning_rate": 5.8723804844202484e-06, "loss": 0.0013, "step": 151930 }, { "epoch": 0.9995855344960297, "grad_norm": 0.009105567350104466, "learning_rate": 5.871815173751057e-06, "loss": 0.0005, "step": 151940 }, { "epoch": 0.9996513226712631, "grad_norm": 0.012660956248034047, "learning_rate": 5.871249851587736e-06, "loss": 0.0009, "step": 151950 }, { "epoch": 0.9997171108464965, "grad_norm": 0.10309119388124761, "learning_rate": 5.870684517937738e-06, "loss": 0.001, "step": 151960 }, { "epoch": 0.9997828990217298, "grad_norm": 0.10957867460266546, "learning_rate": 5.870119172808517e-06, "loss": 0.0018, "step": 151970 }, { "epoch": 0.9998486871969632, "grad_norm": 0.01254365876470964, "learning_rate": 5.869553816207527e-06, "loss": 0.0005, "step": 151980 }, { "epoch": 0.9999144753721966, "grad_norm": 0.030462315118851717, "learning_rate": 5.86898844814222e-06, "loss": 0.0012, "step": 151990 }, { "epoch": 0.99998026354743, "grad_norm": 0.036944214428365596, "learning_rate": 5.868423068620049e-06, "loss": 0.001, "step": 152000 }, { "epoch": 1.0000460517226635, "grad_norm": 0.04163607716463599, "learning_rate": 5.867857677648472e-06, "loss": 0.0013, "step": 152010 }, { "epoch": 1.0001118398978968, "grad_norm": 0.0556560840277051, "learning_rate": 5.867292275234941e-06, "loss": 0.0007, "step": 152020 }, { "epoch": 1.00017762807313, "grad_norm": 0.07963270590740816, "learning_rate": 5.866726861386911e-06, "loss": 0.0008, "step": 152030 }, { "epoch": 1.0002434162483635, "grad_norm": 0.023210142507264275, "learning_rate": 5.866161436111835e-06, "loss": 0.0015, "step": 152040 }, { "epoch": 1.0003092044235968, "grad_norm": 0.07048965089783249, "learning_rate": 5.865595999417169e-06, "loss": 0.0004, "step": 152050 }, { "epoch": 1.0003749925988303, "grad_norm": 0.012791909064030871, "learning_rate": 5.865030551310366e-06, "loss": 0.0004, "step": 152060 }, { "epoch": 1.0004407807740636, "grad_norm": 0.029148106184686228, "learning_rate": 5.8644650917988825e-06, "loss": 0.001, "step": 152070 }, { "epoch": 1.0005065689492971, "grad_norm": 0.03182981175603506, "learning_rate": 5.863899620890175e-06, "loss": 0.0002, "step": 152080 }, { "epoch": 1.0005723571245304, "grad_norm": 0.010808294885127617, "learning_rate": 5.863334138591695e-06, "loss": 0.0006, "step": 152090 }, { "epoch": 1.000638145299764, "grad_norm": 0.03237614342700379, "learning_rate": 5.862768644910901e-06, "loss": 0.0006, "step": 152100 }, { "epoch": 1.0007039334749972, "grad_norm": 0.015100220110741006, "learning_rate": 5.862203139855248e-06, "loss": 0.0021, "step": 152110 }, { "epoch": 1.0007697216502305, "grad_norm": 0.11870911913175554, "learning_rate": 5.8616376234321915e-06, "loss": 0.0011, "step": 152120 }, { "epoch": 1.000835509825464, "grad_norm": 0.02414935523178231, "learning_rate": 5.861072095649185e-06, "loss": 0.001, "step": 152130 }, { "epoch": 1.0009012980006973, "grad_norm": 0.06325445498802551, "learning_rate": 5.860506556513689e-06, "loss": 0.0009, "step": 152140 }, { "epoch": 1.0009670861759308, "grad_norm": 0.008238252074315184, "learning_rate": 5.8599410060331566e-06, "loss": 0.0007, "step": 152150 }, { "epoch": 1.001032874351164, "grad_norm": 0.01702075878083972, "learning_rate": 5.859375444215044e-06, "loss": 0.0005, "step": 152160 }, { "epoch": 1.0010986625263976, "grad_norm": 0.004220157134258863, "learning_rate": 5.858809871066809e-06, "loss": 0.0005, "step": 152170 }, { "epoch": 1.0011644507016308, "grad_norm": 0.0005515705289005171, "learning_rate": 5.858244286595908e-06, "loss": 0.0006, "step": 152180 }, { "epoch": 1.0012302388768644, "grad_norm": 0.023694701443139368, "learning_rate": 5.857678690809796e-06, "loss": 0.0016, "step": 152190 }, { "epoch": 1.0012960270520976, "grad_norm": 0.10722334797765244, "learning_rate": 5.857113083715932e-06, "loss": 0.001, "step": 152200 }, { "epoch": 1.0013618152273311, "grad_norm": 0.03114594707491946, "learning_rate": 5.856547465321771e-06, "loss": 0.0008, "step": 152210 }, { "epoch": 1.0014276034025644, "grad_norm": 0.054332040540470763, "learning_rate": 5.855981835634773e-06, "loss": 0.0011, "step": 152220 }, { "epoch": 1.0014933915777977, "grad_norm": 0.13913509972208185, "learning_rate": 5.855416194662394e-06, "loss": 0.0009, "step": 152230 }, { "epoch": 1.0015591797530312, "grad_norm": 0.07622035755389185, "learning_rate": 5.854850542412091e-06, "loss": 0.0011, "step": 152240 }, { "epoch": 1.0016249679282645, "grad_norm": 0.04976558273643714, "learning_rate": 5.854284878891321e-06, "loss": 0.0007, "step": 152250 }, { "epoch": 1.001690756103498, "grad_norm": 0.023389503421932672, "learning_rate": 5.853719204107543e-06, "loss": 0.001, "step": 152260 }, { "epoch": 1.0017565442787313, "grad_norm": 0.0185535476662283, "learning_rate": 5.853153518068214e-06, "loss": 0.0007, "step": 152270 }, { "epoch": 1.0018223324539648, "grad_norm": 0.02428599058574973, "learning_rate": 5.852587820780794e-06, "loss": 0.0013, "step": 152280 }, { "epoch": 1.001888120629198, "grad_norm": 0.00868477595415399, "learning_rate": 5.85202211225274e-06, "loss": 0.0011, "step": 152290 }, { "epoch": 1.0019539088044316, "grad_norm": 0.030863224364983288, "learning_rate": 5.85145639249151e-06, "loss": 0.0008, "step": 152300 }, { "epoch": 1.0020196969796649, "grad_norm": 0.08486860565616101, "learning_rate": 5.850890661504562e-06, "loss": 0.0007, "step": 152310 }, { "epoch": 1.0020854851548981, "grad_norm": 0.02842256935091608, "learning_rate": 5.850324919299355e-06, "loss": 0.0011, "step": 152320 }, { "epoch": 1.0021512733301317, "grad_norm": 0.027040242066128106, "learning_rate": 5.84975916588335e-06, "loss": 0.0007, "step": 152330 }, { "epoch": 1.002217061505365, "grad_norm": 0.13808967014471368, "learning_rate": 5.849193401264004e-06, "loss": 0.0011, "step": 152340 }, { "epoch": 1.0022828496805984, "grad_norm": 0.05923527600756648, "learning_rate": 5.848627625448776e-06, "loss": 0.0006, "step": 152350 }, { "epoch": 1.0023486378558317, "grad_norm": 0.010668482170172012, "learning_rate": 5.848061838445126e-06, "loss": 0.0006, "step": 152360 }, { "epoch": 1.0024144260310652, "grad_norm": 0.06521260119578094, "learning_rate": 5.847496040260513e-06, "loss": 0.001, "step": 152370 }, { "epoch": 1.0024802142062985, "grad_norm": 0.0023155737645498995, "learning_rate": 5.846930230902396e-06, "loss": 0.0009, "step": 152380 }, { "epoch": 1.002546002381532, "grad_norm": 0.03372734177985097, "learning_rate": 5.846364410378236e-06, "loss": 0.0006, "step": 152390 }, { "epoch": 1.0026117905567653, "grad_norm": 0.06295091212531309, "learning_rate": 5.845798578695492e-06, "loss": 0.0009, "step": 152400 }, { "epoch": 1.0026775787319988, "grad_norm": 0.07235881962758325, "learning_rate": 5.845232735861624e-06, "loss": 0.0011, "step": 152410 }, { "epoch": 1.002743366907232, "grad_norm": 0.0567625214965537, "learning_rate": 5.844666881884094e-06, "loss": 0.001, "step": 152420 }, { "epoch": 1.0028091550824654, "grad_norm": 0.1601118496505654, "learning_rate": 5.844101016770359e-06, "loss": 0.0007, "step": 152430 }, { "epoch": 1.0028749432576989, "grad_norm": 0.010573223202982027, "learning_rate": 5.843535140527882e-06, "loss": 0.0006, "step": 152440 }, { "epoch": 1.0029407314329322, "grad_norm": 0.04863667306559252, "learning_rate": 5.842969253164121e-06, "loss": 0.0016, "step": 152450 }, { "epoch": 1.0030065196081657, "grad_norm": 0.08757807366865496, "learning_rate": 5.84240335468654e-06, "loss": 0.0026, "step": 152460 }, { "epoch": 1.003072307783399, "grad_norm": 0.045891377763918745, "learning_rate": 5.8418374451025985e-06, "loss": 0.0007, "step": 152470 }, { "epoch": 1.0031380959586325, "grad_norm": 0.011326719125558526, "learning_rate": 5.841271524419757e-06, "loss": 0.0012, "step": 152480 }, { "epoch": 1.0032038841338657, "grad_norm": 0.09024735789623098, "learning_rate": 5.840705592645477e-06, "loss": 0.0018, "step": 152490 }, { "epoch": 1.0032696723090992, "grad_norm": 0.08147975421558082, "learning_rate": 5.84013964978722e-06, "loss": 0.0009, "step": 152500 }, { "epoch": 1.0033354604843325, "grad_norm": 0.01823419110348836, "learning_rate": 5.839573695852445e-06, "loss": 0.001, "step": 152510 }, { "epoch": 1.003401248659566, "grad_norm": 0.0482648063928891, "learning_rate": 5.839007730848618e-06, "loss": 0.0015, "step": 152520 }, { "epoch": 1.0034670368347993, "grad_norm": 0.016116523491129183, "learning_rate": 5.838441754783199e-06, "loss": 0.0012, "step": 152530 }, { "epoch": 1.0035328250100326, "grad_norm": 0.01887981893429516, "learning_rate": 5.83787576766365e-06, "loss": 0.0006, "step": 152540 }, { "epoch": 1.003598613185266, "grad_norm": 0.02655408128186566, "learning_rate": 5.837309769497431e-06, "loss": 0.0023, "step": 152550 }, { "epoch": 1.0036644013604994, "grad_norm": 0.01819404466487187, "learning_rate": 5.836743760292008e-06, "loss": 0.0009, "step": 152560 }, { "epoch": 1.003730189535733, "grad_norm": 0.03728419672612027, "learning_rate": 5.836177740054838e-06, "loss": 0.0016, "step": 152570 }, { "epoch": 1.0037959777109662, "grad_norm": 0.02177634132255158, "learning_rate": 5.835611708793388e-06, "loss": 0.0007, "step": 152580 }, { "epoch": 1.0038617658861997, "grad_norm": 0.002352843230649433, "learning_rate": 5.83504566651512e-06, "loss": 0.0027, "step": 152590 }, { "epoch": 1.003927554061433, "grad_norm": 0.10431710964305269, "learning_rate": 5.834479613227496e-06, "loss": 0.0009, "step": 152600 }, { "epoch": 1.0039933422366665, "grad_norm": 0.005064287489737138, "learning_rate": 5.8339135489379795e-06, "loss": 0.0006, "step": 152610 }, { "epoch": 1.0040591304118998, "grad_norm": 0.025645516838383824, "learning_rate": 5.833347473654032e-06, "loss": 0.0013, "step": 152620 }, { "epoch": 1.004124918587133, "grad_norm": 0.026906791704727623, "learning_rate": 5.832781387383119e-06, "loss": 0.0008, "step": 152630 }, { "epoch": 1.0041907067623665, "grad_norm": 0.11458853902082208, "learning_rate": 5.832215290132701e-06, "loss": 0.0015, "step": 152640 }, { "epoch": 1.0042564949375998, "grad_norm": 0.04017481937248461, "learning_rate": 5.831649181910244e-06, "loss": 0.0007, "step": 152650 }, { "epoch": 1.0043222831128333, "grad_norm": 0.014581368412616187, "learning_rate": 5.831083062723211e-06, "loss": 0.0007, "step": 152660 }, { "epoch": 1.0043880712880666, "grad_norm": 0.1428236469507475, "learning_rate": 5.830516932579066e-06, "loss": 0.0009, "step": 152670 }, { "epoch": 1.0044538594633001, "grad_norm": 0.09460172510165447, "learning_rate": 5.829950791485272e-06, "loss": 0.0011, "step": 152680 }, { "epoch": 1.0045196476385334, "grad_norm": 0.003219951267548285, "learning_rate": 5.829384639449293e-06, "loss": 0.0013, "step": 152690 }, { "epoch": 1.004585435813767, "grad_norm": 0.019596268097997374, "learning_rate": 5.828818476478593e-06, "loss": 0.0009, "step": 152700 }, { "epoch": 1.0046512239890002, "grad_norm": 0.03566430362651741, "learning_rate": 5.828252302580639e-06, "loss": 0.001, "step": 152710 }, { "epoch": 1.0047170121642337, "grad_norm": 0.02743280256876114, "learning_rate": 5.8276861177628926e-06, "loss": 0.001, "step": 152720 }, { "epoch": 1.004782800339467, "grad_norm": 0.06569037569776365, "learning_rate": 5.827119922032821e-06, "loss": 0.0007, "step": 152730 }, { "epoch": 1.0048485885147003, "grad_norm": 0.029249929110537514, "learning_rate": 5.826553715397887e-06, "loss": 0.0014, "step": 152740 }, { "epoch": 1.0049143766899338, "grad_norm": 0.024485928489668934, "learning_rate": 5.825987497865556e-06, "loss": 0.0008, "step": 152750 }, { "epoch": 1.004980164865167, "grad_norm": 0.01789563497927328, "learning_rate": 5.825421269443293e-06, "loss": 0.0012, "step": 152760 }, { "epoch": 1.0050459530404006, "grad_norm": 0.12720709306193753, "learning_rate": 5.824855030138563e-06, "loss": 0.0019, "step": 152770 }, { "epoch": 1.0051117412156338, "grad_norm": 0.0016401196818677732, "learning_rate": 5.824288779958832e-06, "loss": 0.0008, "step": 152780 }, { "epoch": 1.0051775293908674, "grad_norm": 0.021493557702533205, "learning_rate": 5.823722518911566e-06, "loss": 0.0009, "step": 152790 }, { "epoch": 1.0052433175661006, "grad_norm": 0.023787503794265193, "learning_rate": 5.82315624700423e-06, "loss": 0.0007, "step": 152800 }, { "epoch": 1.0053091057413341, "grad_norm": 0.010755202705462055, "learning_rate": 5.82258996424429e-06, "loss": 0.0015, "step": 152810 }, { "epoch": 1.0053748939165674, "grad_norm": 0.020613538473945175, "learning_rate": 5.822023670639212e-06, "loss": 0.0013, "step": 152820 }, { "epoch": 1.005440682091801, "grad_norm": 0.03938798091131021, "learning_rate": 5.821457366196459e-06, "loss": 0.0005, "step": 152830 }, { "epoch": 1.0055064702670342, "grad_norm": 0.04756196084733997, "learning_rate": 5.820891050923503e-06, "loss": 0.001, "step": 152840 }, { "epoch": 1.0055722584422675, "grad_norm": 0.02005685697506282, "learning_rate": 5.820324724827807e-06, "loss": 0.0009, "step": 152850 }, { "epoch": 1.005638046617501, "grad_norm": 0.07239864100268832, "learning_rate": 5.8197583879168374e-06, "loss": 0.0007, "step": 152860 }, { "epoch": 1.0057038347927343, "grad_norm": 0.030482980221447926, "learning_rate": 5.819192040198062e-06, "loss": 0.0009, "step": 152870 }, { "epoch": 1.0057696229679678, "grad_norm": 0.02144324540600294, "learning_rate": 5.818625681678947e-06, "loss": 0.0011, "step": 152880 }, { "epoch": 1.005835411143201, "grad_norm": 0.021736537362020653, "learning_rate": 5.818059312366959e-06, "loss": 0.0009, "step": 152890 }, { "epoch": 1.0059011993184346, "grad_norm": 0.022487422561169146, "learning_rate": 5.817492932269565e-06, "loss": 0.0013, "step": 152900 }, { "epoch": 1.0059669874936679, "grad_norm": 0.03493565911317501, "learning_rate": 5.8169265413942335e-06, "loss": 0.0007, "step": 152910 }, { "epoch": 1.0060327756689014, "grad_norm": 0.03571595626424287, "learning_rate": 5.816360139748432e-06, "loss": 0.0014, "step": 152920 }, { "epoch": 1.0060985638441347, "grad_norm": 0.012424855230725918, "learning_rate": 5.8157937273396256e-06, "loss": 0.001, "step": 152930 }, { "epoch": 1.006164352019368, "grad_norm": 0.04894344767051011, "learning_rate": 5.815227304175284e-06, "loss": 0.0008, "step": 152940 }, { "epoch": 1.0062301401946014, "grad_norm": 0.057672063513852186, "learning_rate": 5.814660870262875e-06, "loss": 0.0011, "step": 152950 }, { "epoch": 1.0062959283698347, "grad_norm": 0.08770067587646198, "learning_rate": 5.814094425609864e-06, "loss": 0.0012, "step": 152960 }, { "epoch": 1.0063617165450682, "grad_norm": 0.009071715894781519, "learning_rate": 5.8135279702237225e-06, "loss": 0.001, "step": 152970 }, { "epoch": 1.0064275047203015, "grad_norm": 0.02369505864885192, "learning_rate": 5.812961504111918e-06, "loss": 0.0007, "step": 152980 }, { "epoch": 1.006493292895535, "grad_norm": 0.026081464333473012, "learning_rate": 5.8123950272819174e-06, "loss": 0.0011, "step": 152990 }, { "epoch": 1.0065590810707683, "grad_norm": 0.021326666008816084, "learning_rate": 5.81182853974119e-06, "loss": 0.0004, "step": 153000 }, { "epoch": 1.0066248692460018, "grad_norm": 0.024212388814450436, "learning_rate": 5.811262041497205e-06, "loss": 0.0007, "step": 153010 }, { "epoch": 1.006690657421235, "grad_norm": 0.03627960744715486, "learning_rate": 5.81069553255743e-06, "loss": 0.0004, "step": 153020 }, { "epoch": 1.0067564455964686, "grad_norm": 0.0680429976004182, "learning_rate": 5.810129012929334e-06, "loss": 0.0007, "step": 153030 }, { "epoch": 1.0068222337717019, "grad_norm": 0.05521011962677724, "learning_rate": 5.8095624826203876e-06, "loss": 0.0007, "step": 153040 }, { "epoch": 1.0068880219469352, "grad_norm": 0.0027038157586237762, "learning_rate": 5.808995941638059e-06, "loss": 0.001, "step": 153050 }, { "epoch": 1.0069538101221687, "grad_norm": 0.012968070478753951, "learning_rate": 5.808429389989818e-06, "loss": 0.0004, "step": 153060 }, { "epoch": 1.007019598297402, "grad_norm": 0.024262205352500666, "learning_rate": 5.807862827683133e-06, "loss": 0.0013, "step": 153070 }, { "epoch": 1.0070853864726355, "grad_norm": 0.14955145555793825, "learning_rate": 5.807296254725475e-06, "loss": 0.001, "step": 153080 }, { "epoch": 1.0071511746478687, "grad_norm": 0.012443352075199067, "learning_rate": 5.806729671124312e-06, "loss": 0.0008, "step": 153090 }, { "epoch": 1.0072169628231022, "grad_norm": 0.04020610976755493, "learning_rate": 5.8061630768871144e-06, "loss": 0.0005, "step": 153100 }, { "epoch": 1.0072827509983355, "grad_norm": 0.02875216446719251, "learning_rate": 5.805596472021355e-06, "loss": 0.0008, "step": 153110 }, { "epoch": 1.007348539173569, "grad_norm": 0.05002366708320544, "learning_rate": 5.805029856534501e-06, "loss": 0.0015, "step": 153120 }, { "epoch": 1.0074143273488023, "grad_norm": 0.01790144890354043, "learning_rate": 5.804463230434025e-06, "loss": 0.001, "step": 153130 }, { "epoch": 1.0074801155240356, "grad_norm": 0.10801356334170467, "learning_rate": 5.803896593727394e-06, "loss": 0.001, "step": 153140 }, { "epoch": 1.007545903699269, "grad_norm": 0.0036316728160202187, "learning_rate": 5.803329946422082e-06, "loss": 0.0006, "step": 153150 }, { "epoch": 1.0076116918745024, "grad_norm": 0.06535519814360209, "learning_rate": 5.8027632885255555e-06, "loss": 0.0005, "step": 153160 }, { "epoch": 1.007677480049736, "grad_norm": 0.010394482744545366, "learning_rate": 5.802196620045292e-06, "loss": 0.0004, "step": 153170 }, { "epoch": 1.0077432682249692, "grad_norm": 0.032748639472453614, "learning_rate": 5.801629940988758e-06, "loss": 0.0008, "step": 153180 }, { "epoch": 1.0078090564002027, "grad_norm": 0.006911432959674511, "learning_rate": 5.801063251363425e-06, "loss": 0.0006, "step": 153190 }, { "epoch": 1.007874844575436, "grad_norm": 0.047730185802830456, "learning_rate": 5.800496551176765e-06, "loss": 0.0004, "step": 153200 }, { "epoch": 1.0079406327506695, "grad_norm": 0.012350068444545241, "learning_rate": 5.799929840436249e-06, "loss": 0.0017, "step": 153210 }, { "epoch": 1.0080064209259028, "grad_norm": 0.08145899014194027, "learning_rate": 5.7993631191493485e-06, "loss": 0.0018, "step": 153220 }, { "epoch": 1.0080722091011363, "grad_norm": 0.165385679010209, "learning_rate": 5.798796387323536e-06, "loss": 0.0013, "step": 153230 }, { "epoch": 1.0081379972763695, "grad_norm": 0.07072012847840042, "learning_rate": 5.798229644966284e-06, "loss": 0.0014, "step": 153240 }, { "epoch": 1.0082037854516028, "grad_norm": 0.016542483493577502, "learning_rate": 5.797662892085063e-06, "loss": 0.0008, "step": 153250 }, { "epoch": 1.0082695736268363, "grad_norm": 0.10308967554799457, "learning_rate": 5.797096128687346e-06, "loss": 0.0007, "step": 153260 }, { "epoch": 1.0083353618020696, "grad_norm": 0.009834028812936423, "learning_rate": 5.796529354780604e-06, "loss": 0.0006, "step": 153270 }, { "epoch": 1.0084011499773031, "grad_norm": 0.1719051087995066, "learning_rate": 5.795962570372312e-06, "loss": 0.0008, "step": 153280 }, { "epoch": 1.0084669381525364, "grad_norm": 0.024414600924480622, "learning_rate": 5.795395775469938e-06, "loss": 0.0005, "step": 153290 }, { "epoch": 1.00853272632777, "grad_norm": 0.16820700155443646, "learning_rate": 5.794828970080961e-06, "loss": 0.0008, "step": 153300 }, { "epoch": 1.0085985145030032, "grad_norm": 0.05839173258196158, "learning_rate": 5.794262154212849e-06, "loss": 0.0008, "step": 153310 }, { "epoch": 1.0086643026782367, "grad_norm": 0.060655973942335316, "learning_rate": 5.7936953278730765e-06, "loss": 0.0011, "step": 153320 }, { "epoch": 1.00873009085347, "grad_norm": 0.005178192552799959, "learning_rate": 5.793128491069117e-06, "loss": 0.0005, "step": 153330 }, { "epoch": 1.0087958790287035, "grad_norm": 0.0550137609468396, "learning_rate": 5.792561643808443e-06, "loss": 0.0006, "step": 153340 }, { "epoch": 1.0088616672039368, "grad_norm": 0.010402259856154634, "learning_rate": 5.791994786098527e-06, "loss": 0.0018, "step": 153350 }, { "epoch": 1.00892745537917, "grad_norm": 0.014200218204839552, "learning_rate": 5.791427917946846e-06, "loss": 0.0008, "step": 153360 }, { "epoch": 1.0089932435544036, "grad_norm": 0.011603408885533009, "learning_rate": 5.79086103936087e-06, "loss": 0.002, "step": 153370 }, { "epoch": 1.0090590317296368, "grad_norm": 0.019141018961408473, "learning_rate": 5.790294150348075e-06, "loss": 0.0013, "step": 153380 }, { "epoch": 1.0091248199048704, "grad_norm": 0.011504453542050328, "learning_rate": 5.789727250915934e-06, "loss": 0.0011, "step": 153390 }, { "epoch": 1.0091906080801036, "grad_norm": 0.008365184922565199, "learning_rate": 5.789160341071921e-06, "loss": 0.0015, "step": 153400 }, { "epoch": 1.0092563962553371, "grad_norm": 0.0891763142640609, "learning_rate": 5.78859342082351e-06, "loss": 0.0008, "step": 153410 }, { "epoch": 1.0093221844305704, "grad_norm": 0.007296236104780364, "learning_rate": 5.788026490178177e-06, "loss": 0.001, "step": 153420 }, { "epoch": 1.009387972605804, "grad_norm": 0.039080621999000385, "learning_rate": 5.787459549143395e-06, "loss": 0.0006, "step": 153430 }, { "epoch": 1.0094537607810372, "grad_norm": 0.011742139108546499, "learning_rate": 5.786892597726639e-06, "loss": 0.001, "step": 153440 }, { "epoch": 1.0095195489562705, "grad_norm": 0.017670656981877326, "learning_rate": 5.786325635935383e-06, "loss": 0.001, "step": 153450 }, { "epoch": 1.009585337131504, "grad_norm": 0.03251855745610555, "learning_rate": 5.785758663777102e-06, "loss": 0.0007, "step": 153460 }, { "epoch": 1.0096511253067373, "grad_norm": 0.06816317808375955, "learning_rate": 5.7851916812592724e-06, "loss": 0.0008, "step": 153470 }, { "epoch": 1.0097169134819708, "grad_norm": 0.02206752240568516, "learning_rate": 5.784624688389369e-06, "loss": 0.0008, "step": 153480 }, { "epoch": 1.009782701657204, "grad_norm": 0.03744904816004813, "learning_rate": 5.7840576851748665e-06, "loss": 0.0009, "step": 153490 }, { "epoch": 1.0098484898324376, "grad_norm": 0.03232521976215335, "learning_rate": 5.783490671623241e-06, "loss": 0.0038, "step": 153500 }, { "epoch": 1.0099142780076709, "grad_norm": 0.07667682246359785, "learning_rate": 5.782923647741967e-06, "loss": 0.0009, "step": 153510 }, { "epoch": 1.0099800661829044, "grad_norm": 0.022766450848037478, "learning_rate": 5.7823566135385215e-06, "loss": 0.0004, "step": 153520 }, { "epoch": 1.0100458543581377, "grad_norm": 0.061631802002494455, "learning_rate": 5.781789569020377e-06, "loss": 0.0009, "step": 153530 }, { "epoch": 1.0101116425333712, "grad_norm": 0.08213626401555275, "learning_rate": 5.781222514195015e-06, "loss": 0.0011, "step": 153540 }, { "epoch": 1.0101774307086044, "grad_norm": 0.03519427488711092, "learning_rate": 5.78065544906991e-06, "loss": 0.0009, "step": 153550 }, { "epoch": 1.0102432188838377, "grad_norm": 0.11618030204106122, "learning_rate": 5.780088373652535e-06, "loss": 0.0015, "step": 153560 }, { "epoch": 1.0103090070590712, "grad_norm": 0.05333154001563428, "learning_rate": 5.779521287950369e-06, "loss": 0.001, "step": 153570 }, { "epoch": 1.0103747952343045, "grad_norm": 0.04393223449046844, "learning_rate": 5.778954191970888e-06, "loss": 0.0013, "step": 153580 }, { "epoch": 1.010440583409538, "grad_norm": 0.004720031799048187, "learning_rate": 5.77838708572157e-06, "loss": 0.0013, "step": 153590 }, { "epoch": 1.0105063715847713, "grad_norm": 0.011561372884818389, "learning_rate": 5.777819969209888e-06, "loss": 0.0011, "step": 153600 }, { "epoch": 1.0105721597600048, "grad_norm": 0.07843348448883945, "learning_rate": 5.777252842443323e-06, "loss": 0.0011, "step": 153610 }, { "epoch": 1.010637947935238, "grad_norm": 0.10070620096572584, "learning_rate": 5.77668570542935e-06, "loss": 0.0007, "step": 153620 }, { "epoch": 1.0107037361104716, "grad_norm": 0.009139194319882317, "learning_rate": 5.776118558175448e-06, "loss": 0.0006, "step": 153630 }, { "epoch": 1.0107695242857049, "grad_norm": 0.03301462311319159, "learning_rate": 5.775551400689092e-06, "loss": 0.0004, "step": 153640 }, { "epoch": 1.0108353124609382, "grad_norm": 0.01063055841141268, "learning_rate": 5.774984232977761e-06, "loss": 0.0011, "step": 153650 }, { "epoch": 1.0109011006361717, "grad_norm": 0.051037872188216274, "learning_rate": 5.774417055048931e-06, "loss": 0.001, "step": 153660 }, { "epoch": 1.010966888811405, "grad_norm": 0.07157844946692102, "learning_rate": 5.773849866910082e-06, "loss": 0.0007, "step": 153670 }, { "epoch": 1.0110326769866385, "grad_norm": 0.03253926955724588, "learning_rate": 5.773282668568692e-06, "loss": 0.0007, "step": 153680 }, { "epoch": 1.0110984651618717, "grad_norm": 0.0012967426465022435, "learning_rate": 5.772715460032235e-06, "loss": 0.0008, "step": 153690 }, { "epoch": 1.0111642533371052, "grad_norm": 0.05238913084367448, "learning_rate": 5.772148241308196e-06, "loss": 0.0008, "step": 153700 }, { "epoch": 1.0112300415123385, "grad_norm": 0.0045845364147384824, "learning_rate": 5.771581012404046e-06, "loss": 0.0015, "step": 153710 }, { "epoch": 1.011295829687572, "grad_norm": 0.07138853796820689, "learning_rate": 5.7710137733272675e-06, "loss": 0.0012, "step": 153720 }, { "epoch": 1.0113616178628053, "grad_norm": 0.09892161139813811, "learning_rate": 5.770446524085338e-06, "loss": 0.0007, "step": 153730 }, { "epoch": 1.0114274060380388, "grad_norm": 0.060181858762751804, "learning_rate": 5.769879264685737e-06, "loss": 0.0014, "step": 153740 }, { "epoch": 1.011493194213272, "grad_norm": 0.05045154619544648, "learning_rate": 5.769311995135943e-06, "loss": 0.0009, "step": 153750 }, { "epoch": 1.0115589823885054, "grad_norm": 0.03284368145864443, "learning_rate": 5.768744715443435e-06, "loss": 0.0004, "step": 153760 }, { "epoch": 1.011624770563739, "grad_norm": 0.04622294562615322, "learning_rate": 5.768177425615692e-06, "loss": 0.0006, "step": 153770 }, { "epoch": 1.0116905587389722, "grad_norm": 0.10292054667050367, "learning_rate": 5.7676101256601926e-06, "loss": 0.0011, "step": 153780 }, { "epoch": 1.0117563469142057, "grad_norm": 0.1423775933599275, "learning_rate": 5.767042815584416e-06, "loss": 0.0011, "step": 153790 }, { "epoch": 1.011822135089439, "grad_norm": 0.007356290437940563, "learning_rate": 5.7664754953958426e-06, "loss": 0.0011, "step": 153800 }, { "epoch": 1.0118879232646725, "grad_norm": 0.0382250477145951, "learning_rate": 5.765908165101952e-06, "loss": 0.0011, "step": 153810 }, { "epoch": 1.0119537114399058, "grad_norm": 0.008148027882532695, "learning_rate": 5.765340824710224e-06, "loss": 0.0014, "step": 153820 }, { "epoch": 1.0120194996151393, "grad_norm": 0.0030155511605966545, "learning_rate": 5.764773474228138e-06, "loss": 0.0005, "step": 153830 }, { "epoch": 1.0120852877903725, "grad_norm": 0.03267371565376411, "learning_rate": 5.764206113663175e-06, "loss": 0.0008, "step": 153840 }, { "epoch": 1.012151075965606, "grad_norm": 0.015843268143542266, "learning_rate": 5.763638743022814e-06, "loss": 0.0009, "step": 153850 }, { "epoch": 1.0122168641408393, "grad_norm": 0.031712716705744895, "learning_rate": 5.763071362314534e-06, "loss": 0.0006, "step": 153860 }, { "epoch": 1.0122826523160726, "grad_norm": 0.08162904189276424, "learning_rate": 5.762503971545818e-06, "loss": 0.0007, "step": 153870 }, { "epoch": 1.0123484404913061, "grad_norm": 0.03238851838902694, "learning_rate": 5.761936570724147e-06, "loss": 0.0006, "step": 153880 }, { "epoch": 1.0124142286665394, "grad_norm": 0.0670551329438624, "learning_rate": 5.761369159857e-06, "loss": 0.0007, "step": 153890 }, { "epoch": 1.012480016841773, "grad_norm": 0.13525779638273921, "learning_rate": 5.760801738951858e-06, "loss": 0.0007, "step": 153900 }, { "epoch": 1.0125458050170062, "grad_norm": 0.02345108320189989, "learning_rate": 5.760234308016202e-06, "loss": 0.0008, "step": 153910 }, { "epoch": 1.0126115931922397, "grad_norm": 0.02269807470840289, "learning_rate": 5.759666867057512e-06, "loss": 0.0008, "step": 153920 }, { "epoch": 1.012677381367473, "grad_norm": 0.057429622085260645, "learning_rate": 5.759099416083271e-06, "loss": 0.0014, "step": 153930 }, { "epoch": 1.0127431695427065, "grad_norm": 0.003146925099108367, "learning_rate": 5.758531955100961e-06, "loss": 0.0009, "step": 153940 }, { "epoch": 1.0128089577179398, "grad_norm": 0.023047157622964592, "learning_rate": 5.757964484118063e-06, "loss": 0.0009, "step": 153950 }, { "epoch": 1.012874745893173, "grad_norm": 0.027945454903350878, "learning_rate": 5.7573970031420554e-06, "loss": 0.001, "step": 153960 }, { "epoch": 1.0129405340684066, "grad_norm": 0.0314289086075175, "learning_rate": 5.756829512180424e-06, "loss": 0.0006, "step": 153970 }, { "epoch": 1.0130063222436398, "grad_norm": 0.03575746384193326, "learning_rate": 5.7562620112406495e-06, "loss": 0.0011, "step": 153980 }, { "epoch": 1.0130721104188734, "grad_norm": 0.0035003743355547998, "learning_rate": 5.755694500330211e-06, "loss": 0.0008, "step": 153990 }, { "epoch": 1.0131378985941066, "grad_norm": 0.041973811830569266, "learning_rate": 5.7551269794565955e-06, "loss": 0.0004, "step": 154000 }, { "epoch": 1.0132036867693401, "grad_norm": 0.03290848216423662, "learning_rate": 5.754559448627282e-06, "loss": 0.0012, "step": 154010 }, { "epoch": 1.0132694749445734, "grad_norm": 0.11745106537197066, "learning_rate": 5.753991907849755e-06, "loss": 0.0009, "step": 154020 }, { "epoch": 1.013335263119807, "grad_norm": 0.05231343245313049, "learning_rate": 5.753424357131495e-06, "loss": 0.0014, "step": 154030 }, { "epoch": 1.0134010512950402, "grad_norm": 0.022252831364996308, "learning_rate": 5.7528567964799865e-06, "loss": 0.0012, "step": 154040 }, { "epoch": 1.0134668394702737, "grad_norm": 0.006702617811244386, "learning_rate": 5.7522892259027094e-06, "loss": 0.0013, "step": 154050 }, { "epoch": 1.013532627645507, "grad_norm": 0.021268451107249255, "learning_rate": 5.75172164540715e-06, "loss": 0.0008, "step": 154060 }, { "epoch": 1.0135984158207403, "grad_norm": 0.03178900074380756, "learning_rate": 5.751154055000789e-06, "loss": 0.0005, "step": 154070 }, { "epoch": 1.0136642039959738, "grad_norm": 0.03664450257652462, "learning_rate": 5.750586454691112e-06, "loss": 0.0005, "step": 154080 }, { "epoch": 1.013729992171207, "grad_norm": 0.06492804927526144, "learning_rate": 5.7500188444856e-06, "loss": 0.0016, "step": 154090 }, { "epoch": 1.0137957803464406, "grad_norm": 0.07454460740998754, "learning_rate": 5.749451224391737e-06, "loss": 0.0006, "step": 154100 }, { "epoch": 1.0138615685216739, "grad_norm": 0.027197890251930413, "learning_rate": 5.7488835944170076e-06, "loss": 0.0019, "step": 154110 }, { "epoch": 1.0139273566969074, "grad_norm": 0.012572591169145032, "learning_rate": 5.748315954568894e-06, "loss": 0.0005, "step": 154120 }, { "epoch": 1.0139931448721407, "grad_norm": 0.010252414585451322, "learning_rate": 5.747748304854881e-06, "loss": 0.0003, "step": 154130 }, { "epoch": 1.0140589330473742, "grad_norm": 0.00030794289165332763, "learning_rate": 5.747180645282453e-06, "loss": 0.0008, "step": 154140 }, { "epoch": 1.0141247212226074, "grad_norm": 0.07808875587769602, "learning_rate": 5.7466129758590925e-06, "loss": 0.0009, "step": 154150 }, { "epoch": 1.0141905093978407, "grad_norm": 0.07122331064445024, "learning_rate": 5.746045296592285e-06, "loss": 0.0012, "step": 154160 }, { "epoch": 1.0142562975730742, "grad_norm": 0.017883442617515358, "learning_rate": 5.745477607489516e-06, "loss": 0.005, "step": 154170 }, { "epoch": 1.0143220857483075, "grad_norm": 0.0649624056678609, "learning_rate": 5.744909908558266e-06, "loss": 0.001, "step": 154180 }, { "epoch": 1.014387873923541, "grad_norm": 0.06627764493390757, "learning_rate": 5.744342199806024e-06, "loss": 0.0008, "step": 154190 }, { "epoch": 1.0144536620987743, "grad_norm": 0.038805706618427055, "learning_rate": 5.743774481240273e-06, "loss": 0.001, "step": 154200 }, { "epoch": 1.0145194502740078, "grad_norm": 0.0016042184208731524, "learning_rate": 5.743206752868498e-06, "loss": 0.0007, "step": 154210 }, { "epoch": 1.014585238449241, "grad_norm": 0.032271165907895284, "learning_rate": 5.742639014698182e-06, "loss": 0.0013, "step": 154220 }, { "epoch": 1.0146510266244746, "grad_norm": 0.008373714686208102, "learning_rate": 5.742071266736814e-06, "loss": 0.0007, "step": 154230 }, { "epoch": 1.0147168147997079, "grad_norm": 0.03575805619601837, "learning_rate": 5.741503508991877e-06, "loss": 0.0009, "step": 154240 }, { "epoch": 1.0147826029749414, "grad_norm": 0.04482188136390957, "learning_rate": 5.740935741470856e-06, "loss": 0.001, "step": 154250 }, { "epoch": 1.0148483911501747, "grad_norm": 0.014035360932003867, "learning_rate": 5.7403679641812375e-06, "loss": 0.001, "step": 154260 }, { "epoch": 1.014914179325408, "grad_norm": 0.04992173271102525, "learning_rate": 5.7398001771305055e-06, "loss": 0.0004, "step": 154270 }, { "epoch": 1.0149799675006415, "grad_norm": 0.031222673358297374, "learning_rate": 5.7392323803261485e-06, "loss": 0.0006, "step": 154280 }, { "epoch": 1.0150457556758747, "grad_norm": 0.030375005217197815, "learning_rate": 5.738664573775652e-06, "loss": 0.001, "step": 154290 }, { "epoch": 1.0151115438511082, "grad_norm": 0.053399467107600816, "learning_rate": 5.7380967574864995e-06, "loss": 0.0005, "step": 154300 }, { "epoch": 1.0151773320263415, "grad_norm": 0.03369441434031384, "learning_rate": 5.737528931466178e-06, "loss": 0.0006, "step": 154310 }, { "epoch": 1.015243120201575, "grad_norm": 0.039703719643236164, "learning_rate": 5.7369610957221755e-06, "loss": 0.0011, "step": 154320 }, { "epoch": 1.0153089083768083, "grad_norm": 0.0006501552456082591, "learning_rate": 5.736393250261979e-06, "loss": 0.0018, "step": 154330 }, { "epoch": 1.0153746965520418, "grad_norm": 0.10455821111669718, "learning_rate": 5.735825395093072e-06, "loss": 0.001, "step": 154340 }, { "epoch": 1.015440484727275, "grad_norm": 0.01859483530698723, "learning_rate": 5.735257530222943e-06, "loss": 0.0012, "step": 154350 }, { "epoch": 1.0155062729025086, "grad_norm": 0.07966469097404243, "learning_rate": 5.734689655659078e-06, "loss": 0.0018, "step": 154360 }, { "epoch": 1.015572061077742, "grad_norm": 0.06570581545962441, "learning_rate": 5.734121771408964e-06, "loss": 0.0009, "step": 154370 }, { "epoch": 1.0156378492529752, "grad_norm": 0.045310165114641275, "learning_rate": 5.733553877480088e-06, "loss": 0.0012, "step": 154380 }, { "epoch": 1.0157036374282087, "grad_norm": 0.01012656976432341, "learning_rate": 5.732985973879938e-06, "loss": 0.001, "step": 154390 }, { "epoch": 1.015769425603442, "grad_norm": 0.01881107526416677, "learning_rate": 5.732418060616003e-06, "loss": 0.0007, "step": 154400 }, { "epoch": 1.0158352137786755, "grad_norm": 0.05039465398499142, "learning_rate": 5.731850137695767e-06, "loss": 0.002, "step": 154410 }, { "epoch": 1.0159010019539088, "grad_norm": 0.044862867004168534, "learning_rate": 5.731282205126719e-06, "loss": 0.0008, "step": 154420 }, { "epoch": 1.0159667901291423, "grad_norm": 0.034902766812523855, "learning_rate": 5.730714262916346e-06, "loss": 0.0007, "step": 154430 }, { "epoch": 1.0160325783043755, "grad_norm": 0.017527120898531503, "learning_rate": 5.730146311072138e-06, "loss": 0.001, "step": 154440 }, { "epoch": 1.016098366479609, "grad_norm": 0.0436387635370447, "learning_rate": 5.72957834960158e-06, "loss": 0.0012, "step": 154450 }, { "epoch": 1.0161641546548423, "grad_norm": 0.05021407217009258, "learning_rate": 5.729010378512163e-06, "loss": 0.0012, "step": 154460 }, { "epoch": 1.0162299428300756, "grad_norm": 0.01753967479928158, "learning_rate": 5.728442397811373e-06, "loss": 0.0008, "step": 154470 }, { "epoch": 1.0162957310053091, "grad_norm": 0.09735650108674813, "learning_rate": 5.727874407506699e-06, "loss": 0.0017, "step": 154480 }, { "epoch": 1.0163615191805424, "grad_norm": 0.019083493403010476, "learning_rate": 5.7273064076056305e-06, "loss": 0.0004, "step": 154490 }, { "epoch": 1.016427307355776, "grad_norm": 0.028960260835598557, "learning_rate": 5.726738398115653e-06, "loss": 0.0006, "step": 154500 }, { "epoch": 1.0164930955310092, "grad_norm": 0.008287542697238904, "learning_rate": 5.726170379044259e-06, "loss": 0.0005, "step": 154510 }, { "epoch": 1.0165588837062427, "grad_norm": 0.14589373352115384, "learning_rate": 5.725602350398936e-06, "loss": 0.0008, "step": 154520 }, { "epoch": 1.016624671881476, "grad_norm": 0.000532998187863628, "learning_rate": 5.7250343121871725e-06, "loss": 0.0005, "step": 154530 }, { "epoch": 1.0166904600567095, "grad_norm": 0.01845378200789092, "learning_rate": 5.724466264416457e-06, "loss": 0.0011, "step": 154540 }, { "epoch": 1.0167562482319428, "grad_norm": 0.017030593287240683, "learning_rate": 5.72389820709428e-06, "loss": 0.0009, "step": 154550 }, { "epoch": 1.0168220364071763, "grad_norm": 0.021815587878991814, "learning_rate": 5.723330140228129e-06, "loss": 0.0007, "step": 154560 }, { "epoch": 1.0168878245824096, "grad_norm": 0.035219582390194515, "learning_rate": 5.722762063825498e-06, "loss": 0.0004, "step": 154570 }, { "epoch": 1.0169536127576428, "grad_norm": 0.0006331761135154339, "learning_rate": 5.722193977893871e-06, "loss": 0.0007, "step": 154580 }, { "epoch": 1.0170194009328763, "grad_norm": 0.06694709898058035, "learning_rate": 5.72162588244074e-06, "loss": 0.0027, "step": 154590 }, { "epoch": 1.0170851891081096, "grad_norm": 0.018409304958319977, "learning_rate": 5.721057777473596e-06, "loss": 0.0007, "step": 154600 }, { "epoch": 1.0171509772833431, "grad_norm": 0.033020153698168034, "learning_rate": 5.720489662999929e-06, "loss": 0.0006, "step": 154610 }, { "epoch": 1.0172167654585764, "grad_norm": 0.008381881855737685, "learning_rate": 5.719921539027226e-06, "loss": 0.0014, "step": 154620 }, { "epoch": 1.01728255363381, "grad_norm": 0.04890773702978211, "learning_rate": 5.719353405562979e-06, "loss": 0.0007, "step": 154630 }, { "epoch": 1.0173483418090432, "grad_norm": 0.06839864419200747, "learning_rate": 5.7187852626146786e-06, "loss": 0.0009, "step": 154640 }, { "epoch": 1.0174141299842767, "grad_norm": 0.139428928631702, "learning_rate": 5.718217110189818e-06, "loss": 0.0013, "step": 154650 }, { "epoch": 1.01747991815951, "grad_norm": 0.04317851359778587, "learning_rate": 5.717648948295883e-06, "loss": 0.0007, "step": 154660 }, { "epoch": 1.0175457063347433, "grad_norm": 0.15815601446521865, "learning_rate": 5.717080776940367e-06, "loss": 0.0008, "step": 154670 }, { "epoch": 1.0176114945099768, "grad_norm": 0.11789012459206773, "learning_rate": 5.71651259613076e-06, "loss": 0.0017, "step": 154680 }, { "epoch": 1.01767728268521, "grad_norm": 0.08128143717951701, "learning_rate": 5.715944405874552e-06, "loss": 0.0009, "step": 154690 }, { "epoch": 1.0177430708604436, "grad_norm": 0.02804884996253366, "learning_rate": 5.715376206179237e-06, "loss": 0.0006, "step": 154700 }, { "epoch": 1.0178088590356769, "grad_norm": 0.014529483392756446, "learning_rate": 5.714807997052304e-06, "loss": 0.0009, "step": 154710 }, { "epoch": 1.0178746472109104, "grad_norm": 0.00713684692017079, "learning_rate": 5.714239778501245e-06, "loss": 0.0005, "step": 154720 }, { "epoch": 1.0179404353861436, "grad_norm": 0.08386294797036895, "learning_rate": 5.7136715505335515e-06, "loss": 0.0014, "step": 154730 }, { "epoch": 1.0180062235613772, "grad_norm": 0.0005733326807458639, "learning_rate": 5.713103313156716e-06, "loss": 0.0004, "step": 154740 }, { "epoch": 1.0180720117366104, "grad_norm": 0.10087082980919991, "learning_rate": 5.712535066378227e-06, "loss": 0.0023, "step": 154750 }, { "epoch": 1.018137799911844, "grad_norm": 0.04600787740827237, "learning_rate": 5.711966810205579e-06, "loss": 0.0009, "step": 154760 }, { "epoch": 1.0182035880870772, "grad_norm": 0.2607198959124586, "learning_rate": 5.711398544646265e-06, "loss": 0.0019, "step": 154770 }, { "epoch": 1.0182693762623105, "grad_norm": 0.019496682535709967, "learning_rate": 5.710830269707775e-06, "loss": 0.0005, "step": 154780 }, { "epoch": 1.018335164437544, "grad_norm": 0.054421192084247595, "learning_rate": 5.710261985397601e-06, "loss": 0.0007, "step": 154790 }, { "epoch": 1.0184009526127773, "grad_norm": 0.09368999949226368, "learning_rate": 5.709693691723237e-06, "loss": 0.0008, "step": 154800 }, { "epoch": 1.0184667407880108, "grad_norm": 0.02505893198797646, "learning_rate": 5.709125388692174e-06, "loss": 0.0013, "step": 154810 }, { "epoch": 1.018532528963244, "grad_norm": 0.07770143062985756, "learning_rate": 5.708557076311905e-06, "loss": 0.0026, "step": 154820 }, { "epoch": 1.0185983171384776, "grad_norm": 0.007486546714982372, "learning_rate": 5.707988754589924e-06, "loss": 0.0008, "step": 154830 }, { "epoch": 1.0186641053137109, "grad_norm": 0.023430203795375727, "learning_rate": 5.707420423533721e-06, "loss": 0.0013, "step": 154840 }, { "epoch": 1.0187298934889444, "grad_norm": 0.1619718696824415, "learning_rate": 5.7068520831507925e-06, "loss": 0.0017, "step": 154850 }, { "epoch": 1.0187956816641777, "grad_norm": 0.01623537610285852, "learning_rate": 5.70628373344863e-06, "loss": 0.0024, "step": 154860 }, { "epoch": 1.0188614698394112, "grad_norm": 0.0191684999251106, "learning_rate": 5.705715374434725e-06, "loss": 0.0006, "step": 154870 }, { "epoch": 1.0189272580146445, "grad_norm": 0.09583256606635646, "learning_rate": 5.705147006116572e-06, "loss": 0.0007, "step": 154880 }, { "epoch": 1.0189930461898777, "grad_norm": 0.07822767732496974, "learning_rate": 5.704578628501666e-06, "loss": 0.0013, "step": 154890 }, { "epoch": 1.0190588343651112, "grad_norm": 0.029856079546992997, "learning_rate": 5.704010241597499e-06, "loss": 0.001, "step": 154900 }, { "epoch": 1.0191246225403445, "grad_norm": 0.04113656834576709, "learning_rate": 5.703441845411564e-06, "loss": 0.0007, "step": 154910 }, { "epoch": 1.019190410715578, "grad_norm": 0.05152954717827589, "learning_rate": 5.702873439951358e-06, "loss": 0.0015, "step": 154920 }, { "epoch": 1.0192561988908113, "grad_norm": 0.04665517823176933, "learning_rate": 5.702305025224371e-06, "loss": 0.0014, "step": 154930 }, { "epoch": 1.0193219870660448, "grad_norm": 0.001542767517783361, "learning_rate": 5.701736601238099e-06, "loss": 0.0004, "step": 154940 }, { "epoch": 1.019387775241278, "grad_norm": 0.015333176550041502, "learning_rate": 5.701168168000035e-06, "loss": 0.0008, "step": 154950 }, { "epoch": 1.0194535634165116, "grad_norm": 0.04749382872326191, "learning_rate": 5.700599725517676e-06, "loss": 0.0008, "step": 154960 }, { "epoch": 1.019519351591745, "grad_norm": 0.038271450160207235, "learning_rate": 5.700031273798514e-06, "loss": 0.0006, "step": 154970 }, { "epoch": 1.0195851397669782, "grad_norm": 0.03986585780367311, "learning_rate": 5.699462812850044e-06, "loss": 0.0009, "step": 154980 }, { "epoch": 1.0196509279422117, "grad_norm": 0.13092867983642384, "learning_rate": 5.6988943426797615e-06, "loss": 0.0013, "step": 154990 }, { "epoch": 1.019716716117445, "grad_norm": 0.18303000050943458, "learning_rate": 5.698325863295161e-06, "loss": 0.0012, "step": 155000 }, { "epoch": 1.0197825042926785, "grad_norm": 0.02699107430137085, "learning_rate": 5.697757374703737e-06, "loss": 0.0008, "step": 155010 }, { "epoch": 1.0198482924679118, "grad_norm": 0.004068352942521231, "learning_rate": 5.697188876912982e-06, "loss": 0.0005, "step": 155020 }, { "epoch": 1.0199140806431453, "grad_norm": 0.0594260795173971, "learning_rate": 5.6966203699303966e-06, "loss": 0.0007, "step": 155030 }, { "epoch": 1.0199798688183785, "grad_norm": 0.09875713478832472, "learning_rate": 5.6960518537634724e-06, "loss": 0.0007, "step": 155040 }, { "epoch": 1.020045656993612, "grad_norm": 0.05462522905836223, "learning_rate": 5.695483328419706e-06, "loss": 0.0008, "step": 155050 }, { "epoch": 1.0201114451688453, "grad_norm": 0.034069068476588374, "learning_rate": 5.694914793906593e-06, "loss": 0.0008, "step": 155060 }, { "epoch": 1.0201772333440788, "grad_norm": 0.02280144317552448, "learning_rate": 5.6943462502316285e-06, "loss": 0.0006, "step": 155070 }, { "epoch": 1.0202430215193121, "grad_norm": 0.092597363860083, "learning_rate": 5.693777697402307e-06, "loss": 0.0006, "step": 155080 }, { "epoch": 1.0203088096945454, "grad_norm": 0.08756641710204574, "learning_rate": 5.693209135426127e-06, "loss": 0.0016, "step": 155090 }, { "epoch": 1.020374597869779, "grad_norm": 0.00011053480709245768, "learning_rate": 5.692640564310583e-06, "loss": 0.0005, "step": 155100 }, { "epoch": 1.0204403860450122, "grad_norm": 0.023579695557485632, "learning_rate": 5.692071984063171e-06, "loss": 0.0005, "step": 155110 }, { "epoch": 1.0205061742202457, "grad_norm": 0.002116742201241469, "learning_rate": 5.691503394691388e-06, "loss": 0.0012, "step": 155120 }, { "epoch": 1.020571962395479, "grad_norm": 0.025988117222309422, "learning_rate": 5.690934796202729e-06, "loss": 0.001, "step": 155130 }, { "epoch": 1.0206377505707125, "grad_norm": 0.059231915771025094, "learning_rate": 5.690366188604694e-06, "loss": 0.0005, "step": 155140 }, { "epoch": 1.0207035387459458, "grad_norm": 0.031179840265457417, "learning_rate": 5.689797571904773e-06, "loss": 0.0006, "step": 155150 }, { "epoch": 1.0207693269211793, "grad_norm": 0.008022704512333601, "learning_rate": 5.68922894611047e-06, "loss": 0.001, "step": 155160 }, { "epoch": 1.0208351150964126, "grad_norm": 0.08356705780325573, "learning_rate": 5.688660311229278e-06, "loss": 0.0012, "step": 155170 }, { "epoch": 1.020900903271646, "grad_norm": 0.04603778296848186, "learning_rate": 5.688091667268695e-06, "loss": 0.0019, "step": 155180 }, { "epoch": 1.0209666914468793, "grad_norm": 0.003270618593761303, "learning_rate": 5.6875230142362155e-06, "loss": 0.0009, "step": 155190 }, { "epoch": 1.0210324796221126, "grad_norm": 0.027323849036538686, "learning_rate": 5.6869543521393415e-06, "loss": 0.0006, "step": 155200 }, { "epoch": 1.0210982677973461, "grad_norm": 0.0537322154750643, "learning_rate": 5.686385680985565e-06, "loss": 0.0008, "step": 155210 }, { "epoch": 1.0211640559725794, "grad_norm": 0.020651985506904395, "learning_rate": 5.685817000782388e-06, "loss": 0.0004, "step": 155220 }, { "epoch": 1.021229844147813, "grad_norm": 0.055660495243195815, "learning_rate": 5.685248311537306e-06, "loss": 0.0012, "step": 155230 }, { "epoch": 1.0212956323230462, "grad_norm": 0.00998126594963777, "learning_rate": 5.684679613257816e-06, "loss": 0.0012, "step": 155240 }, { "epoch": 1.0213614204982797, "grad_norm": 0.006225815191385568, "learning_rate": 5.6841109059514175e-06, "loss": 0.0015, "step": 155250 }, { "epoch": 1.021427208673513, "grad_norm": 0.003388483118038565, "learning_rate": 5.683542189625607e-06, "loss": 0.0005, "step": 155260 }, { "epoch": 1.0214929968487465, "grad_norm": 0.03560234295384347, "learning_rate": 5.682973464287884e-06, "loss": 0.001, "step": 155270 }, { "epoch": 1.0215587850239798, "grad_norm": 0.047333603785621535, "learning_rate": 5.682404729945745e-06, "loss": 0.0004, "step": 155280 }, { "epoch": 1.021624573199213, "grad_norm": 0.10685643606269524, "learning_rate": 5.6818359866066885e-06, "loss": 0.0024, "step": 155290 }, { "epoch": 1.0216903613744466, "grad_norm": 0.24305686217620795, "learning_rate": 5.6812672342782136e-06, "loss": 0.0009, "step": 155300 }, { "epoch": 1.0217561495496799, "grad_norm": 0.0411344089749237, "learning_rate": 5.680698472967819e-06, "loss": 0.0024, "step": 155310 }, { "epoch": 1.0218219377249134, "grad_norm": 0.05836396115706946, "learning_rate": 5.680129702683004e-06, "loss": 0.0008, "step": 155320 }, { "epoch": 1.0218877259001466, "grad_norm": 0.005022324328823385, "learning_rate": 5.679560923431264e-06, "loss": 0.0014, "step": 155330 }, { "epoch": 1.0219535140753802, "grad_norm": 0.02575771443343458, "learning_rate": 5.678992135220101e-06, "loss": 0.0005, "step": 155340 }, { "epoch": 1.0220193022506134, "grad_norm": 0.11372522494404592, "learning_rate": 5.678423338057013e-06, "loss": 0.0019, "step": 155350 }, { "epoch": 1.022085090425847, "grad_norm": 0.0119770693945029, "learning_rate": 5.6778545319495e-06, "loss": 0.001, "step": 155360 }, { "epoch": 1.0221508786010802, "grad_norm": 0.06276329803055962, "learning_rate": 5.67728571690506e-06, "loss": 0.0006, "step": 155370 }, { "epoch": 1.0222166667763137, "grad_norm": 0.008793540437664567, "learning_rate": 5.676716892931193e-06, "loss": 0.0007, "step": 155380 }, { "epoch": 1.022282454951547, "grad_norm": 0.005622702280410134, "learning_rate": 5.676148060035397e-06, "loss": 0.0009, "step": 155390 }, { "epoch": 1.0223482431267803, "grad_norm": 0.01645412718075184, "learning_rate": 5.675579218225174e-06, "loss": 0.0006, "step": 155400 }, { "epoch": 1.0224140313020138, "grad_norm": 0.051321982848980506, "learning_rate": 5.675010367508022e-06, "loss": 0.0011, "step": 155410 }, { "epoch": 1.022479819477247, "grad_norm": 0.031409934930431496, "learning_rate": 5.674441507891441e-06, "loss": 0.0011, "step": 155420 }, { "epoch": 1.0225456076524806, "grad_norm": 0.004417053787514584, "learning_rate": 5.673872639382932e-06, "loss": 0.0005, "step": 155430 }, { "epoch": 1.0226113958277139, "grad_norm": 0.0021318011301288465, "learning_rate": 5.673303761989994e-06, "loss": 0.0008, "step": 155440 }, { "epoch": 1.0226771840029474, "grad_norm": 0.004190753494311162, "learning_rate": 5.672734875720128e-06, "loss": 0.0006, "step": 155450 }, { "epoch": 1.0227429721781807, "grad_norm": 0.019242231352944682, "learning_rate": 5.672165980580833e-06, "loss": 0.0022, "step": 155460 }, { "epoch": 1.0228087603534142, "grad_norm": 0.05037126652402104, "learning_rate": 5.6715970765796094e-06, "loss": 0.0008, "step": 155470 }, { "epoch": 1.0228745485286475, "grad_norm": 0.047199279686740696, "learning_rate": 5.671028163723959e-06, "loss": 0.0013, "step": 155480 }, { "epoch": 1.0229403367038807, "grad_norm": 0.27598051283617797, "learning_rate": 5.670459242021382e-06, "loss": 0.0004, "step": 155490 }, { "epoch": 1.0230061248791142, "grad_norm": 0.09902428882608509, "learning_rate": 5.669890311479379e-06, "loss": 0.0017, "step": 155500 }, { "epoch": 1.0230719130543475, "grad_norm": 0.002886286106303575, "learning_rate": 5.669321372105451e-06, "loss": 0.0009, "step": 155510 }, { "epoch": 1.023137701229581, "grad_norm": 0.061328699016912167, "learning_rate": 5.668752423907097e-06, "loss": 0.0017, "step": 155520 }, { "epoch": 1.0232034894048143, "grad_norm": 0.4343351691460033, "learning_rate": 5.668183466891821e-06, "loss": 0.0008, "step": 155530 }, { "epoch": 1.0232692775800478, "grad_norm": 0.02600177557371948, "learning_rate": 5.667614501067124e-06, "loss": 0.0009, "step": 155540 }, { "epoch": 1.023335065755281, "grad_norm": 0.0694176151035675, "learning_rate": 5.667045526440506e-06, "loss": 0.0006, "step": 155550 }, { "epoch": 1.0234008539305146, "grad_norm": 0.06400688982512388, "learning_rate": 5.666476543019468e-06, "loss": 0.0005, "step": 155560 }, { "epoch": 1.023466642105748, "grad_norm": 0.12225743835810667, "learning_rate": 5.665907550811513e-06, "loss": 0.0006, "step": 155570 }, { "epoch": 1.0235324302809814, "grad_norm": 0.03431016473405325, "learning_rate": 5.665338549824142e-06, "loss": 0.0021, "step": 155580 }, { "epoch": 1.0235982184562147, "grad_norm": 0.08292384082593861, "learning_rate": 5.664769540064856e-06, "loss": 0.001, "step": 155590 }, { "epoch": 1.023664006631448, "grad_norm": 0.18280253844749736, "learning_rate": 5.664200521541159e-06, "loss": 0.0016, "step": 155600 }, { "epoch": 1.0237297948066815, "grad_norm": 0.19642621504585145, "learning_rate": 5.663631494260551e-06, "loss": 0.0006, "step": 155610 }, { "epoch": 1.0237955829819148, "grad_norm": 0.00098063568897653, "learning_rate": 5.663062458230535e-06, "loss": 0.0004, "step": 155620 }, { "epoch": 1.0238613711571483, "grad_norm": 0.07102457526251674, "learning_rate": 5.662493413458614e-06, "loss": 0.0007, "step": 155630 }, { "epoch": 1.0239271593323815, "grad_norm": 0.013234864566551197, "learning_rate": 5.6619243599522886e-06, "loss": 0.0014, "step": 155640 }, { "epoch": 1.023992947507615, "grad_norm": 0.04864961262449166, "learning_rate": 5.661355297719062e-06, "loss": 0.0006, "step": 155650 }, { "epoch": 1.0240587356828483, "grad_norm": 0.07876023945858945, "learning_rate": 5.660786226766437e-06, "loss": 0.001, "step": 155660 }, { "epoch": 1.0241245238580818, "grad_norm": 0.03203457439136646, "learning_rate": 5.660217147101918e-06, "loss": 0.001, "step": 155670 }, { "epoch": 1.0241903120333151, "grad_norm": 0.006651803472599365, "learning_rate": 5.659648058733004e-06, "loss": 0.0008, "step": 155680 }, { "epoch": 1.0242561002085484, "grad_norm": 0.09682660132938618, "learning_rate": 5.659078961667202e-06, "loss": 0.0014, "step": 155690 }, { "epoch": 1.024321888383782, "grad_norm": 0.05351891903917995, "learning_rate": 5.658509855912011e-06, "loss": 0.001, "step": 155700 }, { "epoch": 1.0243876765590152, "grad_norm": 0.02702077870928861, "learning_rate": 5.657940741474937e-06, "loss": 0.0009, "step": 155710 }, { "epoch": 1.0244534647342487, "grad_norm": 0.0049461387923283315, "learning_rate": 5.6573716183634834e-06, "loss": 0.0013, "step": 155720 }, { "epoch": 1.024519252909482, "grad_norm": 0.00124501362528885, "learning_rate": 5.656802486585151e-06, "loss": 0.0005, "step": 155730 }, { "epoch": 1.0245850410847155, "grad_norm": 0.0474929587828353, "learning_rate": 5.656233346147447e-06, "loss": 0.0008, "step": 155740 }, { "epoch": 1.0246508292599488, "grad_norm": 0.03803448420341266, "learning_rate": 5.655664197057873e-06, "loss": 0.0016, "step": 155750 }, { "epoch": 1.0247166174351823, "grad_norm": 0.016534392834045442, "learning_rate": 5.655095039323931e-06, "loss": 0.0023, "step": 155760 }, { "epoch": 1.0247824056104156, "grad_norm": 0.06038083190532664, "learning_rate": 5.6545258729531286e-06, "loss": 0.0016, "step": 155770 }, { "epoch": 1.024848193785649, "grad_norm": 0.010447322950298944, "learning_rate": 5.653956697952966e-06, "loss": 0.0008, "step": 155780 }, { "epoch": 1.0249139819608823, "grad_norm": 0.04237862046906, "learning_rate": 5.65338751433095e-06, "loss": 0.0011, "step": 155790 }, { "epoch": 1.0249797701361156, "grad_norm": 0.009153496547676164, "learning_rate": 5.652818322094585e-06, "loss": 0.0003, "step": 155800 }, { "epoch": 1.0250455583113491, "grad_norm": 0.044099993486311745, "learning_rate": 5.652249121251373e-06, "loss": 0.0006, "step": 155810 }, { "epoch": 1.0251113464865824, "grad_norm": 0.025956080411635895, "learning_rate": 5.651679911808821e-06, "loss": 0.0007, "step": 155820 }, { "epoch": 1.025177134661816, "grad_norm": 0.05232613773832663, "learning_rate": 5.651110693774431e-06, "loss": 0.0009, "step": 155830 }, { "epoch": 1.0252429228370492, "grad_norm": 0.02233868415910841, "learning_rate": 5.650541467155709e-06, "loss": 0.0006, "step": 155840 }, { "epoch": 1.0253087110122827, "grad_norm": 0.002782124210337632, "learning_rate": 5.649972231960159e-06, "loss": 0.0004, "step": 155850 }, { "epoch": 1.025374499187516, "grad_norm": 0.014517477334650395, "learning_rate": 5.649402988195288e-06, "loss": 0.0004, "step": 155860 }, { "epoch": 1.0254402873627495, "grad_norm": 0.08679802294397718, "learning_rate": 5.648833735868598e-06, "loss": 0.0008, "step": 155870 }, { "epoch": 1.0255060755379828, "grad_norm": 0.02388688235333938, "learning_rate": 5.648264474987596e-06, "loss": 0.0004, "step": 155880 }, { "epoch": 1.0255718637132163, "grad_norm": 0.026750228399018645, "learning_rate": 5.6476952055597876e-06, "loss": 0.0009, "step": 155890 }, { "epoch": 1.0256376518884496, "grad_norm": 0.056849665298373965, "learning_rate": 5.647125927592676e-06, "loss": 0.0005, "step": 155900 }, { "epoch": 1.0257034400636829, "grad_norm": 0.020795096754440537, "learning_rate": 5.646556641093767e-06, "loss": 0.0009, "step": 155910 }, { "epoch": 1.0257692282389164, "grad_norm": 0.02492679380389715, "learning_rate": 5.645987346070569e-06, "loss": 0.0005, "step": 155920 }, { "epoch": 1.0258350164141496, "grad_norm": 0.018886094705592923, "learning_rate": 5.645418042530586e-06, "loss": 0.0009, "step": 155930 }, { "epoch": 1.0259008045893832, "grad_norm": 0.031019678795101382, "learning_rate": 5.644848730481324e-06, "loss": 0.0008, "step": 155940 }, { "epoch": 1.0259665927646164, "grad_norm": 0.0456285685418438, "learning_rate": 5.644279409930287e-06, "loss": 0.0007, "step": 155950 }, { "epoch": 1.02603238093985, "grad_norm": 0.13315388011009519, "learning_rate": 5.643710080884983e-06, "loss": 0.0011, "step": 155960 }, { "epoch": 1.0260981691150832, "grad_norm": 0.028359512226824316, "learning_rate": 5.6431407433529174e-06, "loss": 0.0015, "step": 155970 }, { "epoch": 1.0261639572903167, "grad_norm": 0.10175831524389467, "learning_rate": 5.642571397341596e-06, "loss": 0.0014, "step": 155980 }, { "epoch": 1.02622974546555, "grad_norm": 0.09423975481648769, "learning_rate": 5.642002042858526e-06, "loss": 0.0014, "step": 155990 }, { "epoch": 1.0262955336407833, "grad_norm": 0.05139667665911468, "learning_rate": 5.641432679911214e-06, "loss": 0.0007, "step": 156000 }, { "epoch": 1.0263613218160168, "grad_norm": 0.029236128045065365, "learning_rate": 5.640863308507166e-06, "loss": 0.0011, "step": 156010 }, { "epoch": 1.02642710999125, "grad_norm": 0.10986703005665262, "learning_rate": 5.640293928653888e-06, "loss": 0.0008, "step": 156020 }, { "epoch": 1.0264928981664836, "grad_norm": 0.02279282014215231, "learning_rate": 5.639724540358889e-06, "loss": 0.0009, "step": 156030 }, { "epoch": 1.0265586863417169, "grad_norm": 0.04933079712348946, "learning_rate": 5.639155143629672e-06, "loss": 0.0008, "step": 156040 }, { "epoch": 1.0266244745169504, "grad_norm": 0.06351983939702749, "learning_rate": 5.638585738473748e-06, "loss": 0.0006, "step": 156050 }, { "epoch": 1.0266902626921837, "grad_norm": 0.0156879153508049, "learning_rate": 5.638016324898623e-06, "loss": 0.0007, "step": 156060 }, { "epoch": 1.0267560508674172, "grad_norm": 0.04161873671430575, "learning_rate": 5.6374469029118035e-06, "loss": 0.0014, "step": 156070 }, { "epoch": 1.0268218390426505, "grad_norm": 0.06581838652882245, "learning_rate": 5.636877472520797e-06, "loss": 0.001, "step": 156080 }, { "epoch": 1.026887627217884, "grad_norm": 0.011735260612105457, "learning_rate": 5.636308033733111e-06, "loss": 0.0019, "step": 156090 }, { "epoch": 1.0269534153931172, "grad_norm": 0.023427663357777337, "learning_rate": 5.635738586556254e-06, "loss": 0.0008, "step": 156100 }, { "epoch": 1.0270192035683505, "grad_norm": 0.09288499261418152, "learning_rate": 5.635169130997731e-06, "loss": 0.0009, "step": 156110 }, { "epoch": 1.027084991743584, "grad_norm": 0.05041434594904608, "learning_rate": 5.634599667065053e-06, "loss": 0.0009, "step": 156120 }, { "epoch": 1.0271507799188173, "grad_norm": 0.03403713465887208, "learning_rate": 5.634030194765725e-06, "loss": 0.0008, "step": 156130 }, { "epoch": 1.0272165680940508, "grad_norm": 0.00344612290659981, "learning_rate": 5.633460714107257e-06, "loss": 0.0007, "step": 156140 }, { "epoch": 1.027282356269284, "grad_norm": 0.07751076984546765, "learning_rate": 5.632891225097158e-06, "loss": 0.0018, "step": 156150 }, { "epoch": 1.0273481444445176, "grad_norm": 0.0014262437513435854, "learning_rate": 5.632321727742934e-06, "loss": 0.0005, "step": 156160 }, { "epoch": 1.027413932619751, "grad_norm": 0.004580848344069226, "learning_rate": 5.631752222052092e-06, "loss": 0.0005, "step": 156170 }, { "epoch": 1.0274797207949844, "grad_norm": 0.00989064419292789, "learning_rate": 5.631182708032146e-06, "loss": 0.0007, "step": 156180 }, { "epoch": 1.0275455089702177, "grad_norm": 0.04899883532472822, "learning_rate": 5.6306131856905985e-06, "loss": 0.0009, "step": 156190 }, { "epoch": 1.0276112971454512, "grad_norm": 0.009927704570795748, "learning_rate": 5.630043655034962e-06, "loss": 0.0008, "step": 156200 }, { "epoch": 1.0276770853206845, "grad_norm": 0.05456871196786702, "learning_rate": 5.6294741160727455e-06, "loss": 0.0008, "step": 156210 }, { "epoch": 1.0277428734959178, "grad_norm": 0.04284632533211511, "learning_rate": 5.628904568811454e-06, "loss": 0.0006, "step": 156220 }, { "epoch": 1.0278086616711513, "grad_norm": 0.012863809531043654, "learning_rate": 5.6283350132586e-06, "loss": 0.001, "step": 156230 }, { "epoch": 1.0278744498463845, "grad_norm": 0.09814057071344136, "learning_rate": 5.627765449421691e-06, "loss": 0.0008, "step": 156240 }, { "epoch": 1.027940238021618, "grad_norm": 0.04342046771900657, "learning_rate": 5.627195877308238e-06, "loss": 0.001, "step": 156250 }, { "epoch": 1.0280060261968513, "grad_norm": 0.059280285044234445, "learning_rate": 5.626626296925748e-06, "loss": 0.002, "step": 156260 }, { "epoch": 1.0280718143720848, "grad_norm": 0.02008295291589806, "learning_rate": 5.626056708281732e-06, "loss": 0.0012, "step": 156270 }, { "epoch": 1.0281376025473181, "grad_norm": 0.02219081375977362, "learning_rate": 5.625487111383698e-06, "loss": 0.0006, "step": 156280 }, { "epoch": 1.0282033907225516, "grad_norm": 0.0452359757864984, "learning_rate": 5.624917506239157e-06, "loss": 0.0006, "step": 156290 }, { "epoch": 1.028269178897785, "grad_norm": 0.00166815534169275, "learning_rate": 5.624347892855619e-06, "loss": 0.0009, "step": 156300 }, { "epoch": 1.0283349670730182, "grad_norm": 0.00862614054589106, "learning_rate": 5.623778271240593e-06, "loss": 0.0013, "step": 156310 }, { "epoch": 1.0284007552482517, "grad_norm": 0.05073271191047531, "learning_rate": 5.623208641401589e-06, "loss": 0.001, "step": 156320 }, { "epoch": 1.028466543423485, "grad_norm": 0.02641792307682215, "learning_rate": 5.6226390033461175e-06, "loss": 0.0006, "step": 156330 }, { "epoch": 1.0285323315987185, "grad_norm": 0.02286302054255837, "learning_rate": 5.622069357081688e-06, "loss": 0.0011, "step": 156340 }, { "epoch": 1.0285981197739518, "grad_norm": 0.026366717968243282, "learning_rate": 5.621499702615811e-06, "loss": 0.0012, "step": 156350 }, { "epoch": 1.0286639079491853, "grad_norm": 0.007642973511364207, "learning_rate": 5.620930039955999e-06, "loss": 0.0008, "step": 156360 }, { "epoch": 1.0287296961244186, "grad_norm": 0.011733310844264573, "learning_rate": 5.620360369109759e-06, "loss": 0.0012, "step": 156370 }, { "epoch": 1.028795484299652, "grad_norm": 0.030990357863160525, "learning_rate": 5.619790690084603e-06, "loss": 0.0011, "step": 156380 }, { "epoch": 1.0288612724748853, "grad_norm": 0.027537275920218852, "learning_rate": 5.619221002888043e-06, "loss": 0.0008, "step": 156390 }, { "epoch": 1.0289270606501189, "grad_norm": 0.0557695201280936, "learning_rate": 5.6186513075275895e-06, "loss": 0.0007, "step": 156400 }, { "epoch": 1.0289928488253521, "grad_norm": 0.07783952643285044, "learning_rate": 5.6180816040107525e-06, "loss": 0.0008, "step": 156410 }, { "epoch": 1.0290586370005854, "grad_norm": 0.04384628781241226, "learning_rate": 5.617511892345041e-06, "loss": 0.0005, "step": 156420 }, { "epoch": 1.029124425175819, "grad_norm": 0.021214141282576014, "learning_rate": 5.6169421725379714e-06, "loss": 0.0007, "step": 156430 }, { "epoch": 1.0291902133510522, "grad_norm": 0.028470276316464996, "learning_rate": 5.616372444597051e-06, "loss": 0.0004, "step": 156440 }, { "epoch": 1.0292560015262857, "grad_norm": 0.02745120173373043, "learning_rate": 5.615802708529793e-06, "loss": 0.0012, "step": 156450 }, { "epoch": 1.029321789701519, "grad_norm": 0.08451863561083693, "learning_rate": 5.615232964343708e-06, "loss": 0.0009, "step": 156460 }, { "epoch": 1.0293875778767525, "grad_norm": 0.005234670562426797, "learning_rate": 5.614663212046306e-06, "loss": 0.0007, "step": 156470 }, { "epoch": 1.0294533660519858, "grad_norm": 0.0017732280720602696, "learning_rate": 5.6140934516451005e-06, "loss": 0.0009, "step": 156480 }, { "epoch": 1.0295191542272193, "grad_norm": 0.1620633484681943, "learning_rate": 5.613523683147605e-06, "loss": 0.0012, "step": 156490 }, { "epoch": 1.0295849424024526, "grad_norm": 0.08049145821649303, "learning_rate": 5.612953906561329e-06, "loss": 0.001, "step": 156500 }, { "epoch": 1.0296507305776859, "grad_norm": 0.012249550931297423, "learning_rate": 5.612384121893786e-06, "loss": 0.0004, "step": 156510 }, { "epoch": 1.0297165187529194, "grad_norm": 0.06248082165880199, "learning_rate": 5.611814329152485e-06, "loss": 0.0004, "step": 156520 }, { "epoch": 1.0297823069281526, "grad_norm": 0.0027697239327250872, "learning_rate": 5.611244528344943e-06, "loss": 0.0013, "step": 156530 }, { "epoch": 1.0298480951033862, "grad_norm": 0.05051521990514588, "learning_rate": 5.610674719478668e-06, "loss": 0.0012, "step": 156540 }, { "epoch": 1.0299138832786194, "grad_norm": 0.0009249649862807713, "learning_rate": 5.610104902561175e-06, "loss": 0.0009, "step": 156550 }, { "epoch": 1.029979671453853, "grad_norm": 0.011148449525029847, "learning_rate": 5.609535077599975e-06, "loss": 0.0026, "step": 156560 }, { "epoch": 1.0300454596290862, "grad_norm": 0.10893247183941616, "learning_rate": 5.608965244602582e-06, "loss": 0.0009, "step": 156570 }, { "epoch": 1.0301112478043197, "grad_norm": 0.018960213411305528, "learning_rate": 5.608395403576508e-06, "loss": 0.0006, "step": 156580 }, { "epoch": 1.030177035979553, "grad_norm": 0.08549420215110966, "learning_rate": 5.607825554529267e-06, "loss": 0.001, "step": 156590 }, { "epoch": 1.0302428241547865, "grad_norm": 0.09157501231393153, "learning_rate": 5.6072556974683705e-06, "loss": 0.0007, "step": 156600 }, { "epoch": 1.0303086123300198, "grad_norm": 0.011881706056082445, "learning_rate": 5.6066858324013305e-06, "loss": 0.0004, "step": 156610 }, { "epoch": 1.030374400505253, "grad_norm": 0.048266012578335425, "learning_rate": 5.606115959335663e-06, "loss": 0.0006, "step": 156620 }, { "epoch": 1.0304401886804866, "grad_norm": 0.10154246648381325, "learning_rate": 5.605546078278881e-06, "loss": 0.0006, "step": 156630 }, { "epoch": 1.0305059768557199, "grad_norm": 0.07167575897449688, "learning_rate": 5.604976189238496e-06, "loss": 0.0007, "step": 156640 }, { "epoch": 1.0305717650309534, "grad_norm": 0.0006271807841939529, "learning_rate": 5.604406292222022e-06, "loss": 0.0007, "step": 156650 }, { "epoch": 1.0306375532061867, "grad_norm": 0.05802544504845286, "learning_rate": 5.603836387236974e-06, "loss": 0.0006, "step": 156660 }, { "epoch": 1.0307033413814202, "grad_norm": 0.06165935749041692, "learning_rate": 5.603266474290865e-06, "loss": 0.0008, "step": 156670 }, { "epoch": 1.0307691295566535, "grad_norm": 0.25416457037091095, "learning_rate": 5.602696553391207e-06, "loss": 0.0015, "step": 156680 }, { "epoch": 1.030834917731887, "grad_norm": 0.028206553983708405, "learning_rate": 5.6021266245455165e-06, "loss": 0.0005, "step": 156690 }, { "epoch": 1.0309007059071202, "grad_norm": 0.017835458774047873, "learning_rate": 5.6015566877613056e-06, "loss": 0.0006, "step": 156700 }, { "epoch": 1.0309664940823537, "grad_norm": 0.007037355114185512, "learning_rate": 5.6009867430460906e-06, "loss": 0.0009, "step": 156710 }, { "epoch": 1.031032282257587, "grad_norm": 0.02453668698186302, "learning_rate": 5.600416790407384e-06, "loss": 0.0009, "step": 156720 }, { "epoch": 1.0310980704328203, "grad_norm": 0.013392065578799421, "learning_rate": 5.599846829852701e-06, "loss": 0.0008, "step": 156730 }, { "epoch": 1.0311638586080538, "grad_norm": 0.03964946914064627, "learning_rate": 5.5992768613895536e-06, "loss": 0.0015, "step": 156740 }, { "epoch": 1.031229646783287, "grad_norm": 0.01671211180621878, "learning_rate": 5.59870688502546e-06, "loss": 0.001, "step": 156750 }, { "epoch": 1.0312954349585206, "grad_norm": 0.019466201051467086, "learning_rate": 5.598136900767933e-06, "loss": 0.0014, "step": 156760 }, { "epoch": 1.031361223133754, "grad_norm": 0.07048807174005692, "learning_rate": 5.597566908624488e-06, "loss": 0.0011, "step": 156770 }, { "epoch": 1.0314270113089874, "grad_norm": 0.021031221142069345, "learning_rate": 5.596996908602639e-06, "loss": 0.0007, "step": 156780 }, { "epoch": 1.0314927994842207, "grad_norm": 0.014164042110852273, "learning_rate": 5.596426900709902e-06, "loss": 0.001, "step": 156790 }, { "epoch": 1.0315585876594542, "grad_norm": 0.002065811194725169, "learning_rate": 5.5958568849537895e-06, "loss": 0.0005, "step": 156800 }, { "epoch": 1.0316243758346875, "grad_norm": 0.05752587448290276, "learning_rate": 5.59528686134182e-06, "loss": 0.0009, "step": 156810 }, { "epoch": 1.0316901640099208, "grad_norm": 0.03622384565575681, "learning_rate": 5.594716829881507e-06, "loss": 0.0013, "step": 156820 }, { "epoch": 1.0317559521851543, "grad_norm": 0.07448886996700281, "learning_rate": 5.594146790580367e-06, "loss": 0.0013, "step": 156830 }, { "epoch": 1.0318217403603875, "grad_norm": 0.008776290303364527, "learning_rate": 5.593576743445915e-06, "loss": 0.0004, "step": 156840 }, { "epoch": 1.031887528535621, "grad_norm": 0.012775926913310786, "learning_rate": 5.593006688485665e-06, "loss": 0.0008, "step": 156850 }, { "epoch": 1.0319533167108543, "grad_norm": 0.00304653181963416, "learning_rate": 5.592436625707134e-06, "loss": 0.0011, "step": 156860 }, { "epoch": 1.0320191048860878, "grad_norm": 0.20013126239426063, "learning_rate": 5.591866555117837e-06, "loss": 0.0006, "step": 156870 }, { "epoch": 1.0320848930613211, "grad_norm": 0.0007055760204245254, "learning_rate": 5.591296476725292e-06, "loss": 0.0006, "step": 156880 }, { "epoch": 1.0321506812365546, "grad_norm": 0.005262349473086376, "learning_rate": 5.590726390537014e-06, "loss": 0.0008, "step": 156890 }, { "epoch": 1.032216469411788, "grad_norm": 0.06292752384063478, "learning_rate": 5.590156296560518e-06, "loss": 0.0007, "step": 156900 }, { "epoch": 1.0322822575870214, "grad_norm": 0.019921094515168748, "learning_rate": 5.5895861948033205e-06, "loss": 0.0011, "step": 156910 }, { "epoch": 1.0323480457622547, "grad_norm": 0.0038971658660575887, "learning_rate": 5.589016085272939e-06, "loss": 0.001, "step": 156920 }, { "epoch": 1.032413833937488, "grad_norm": 0.043686012981444096, "learning_rate": 5.58844596797689e-06, "loss": 0.0007, "step": 156930 }, { "epoch": 1.0324796221127215, "grad_norm": 0.0998862767852493, "learning_rate": 5.587875842922686e-06, "loss": 0.0019, "step": 156940 }, { "epoch": 1.0325454102879548, "grad_norm": 0.05284146172358616, "learning_rate": 5.58730571011785e-06, "loss": 0.0006, "step": 156950 }, { "epoch": 1.0326111984631883, "grad_norm": 0.06790317752276114, "learning_rate": 5.586735569569893e-06, "loss": 0.0009, "step": 156960 }, { "epoch": 1.0326769866384216, "grad_norm": 0.2476541619390527, "learning_rate": 5.586165421286336e-06, "loss": 0.0012, "step": 156970 }, { "epoch": 1.032742774813655, "grad_norm": 0.0029003238428965492, "learning_rate": 5.585595265274693e-06, "loss": 0.0008, "step": 156980 }, { "epoch": 1.0328085629888883, "grad_norm": 0.0379114070320446, "learning_rate": 5.585025101542484e-06, "loss": 0.001, "step": 156990 }, { "epoch": 1.0328743511641219, "grad_norm": 0.017079108096721484, "learning_rate": 5.584454930097222e-06, "loss": 0.0015, "step": 157000 }, { "epoch": 1.0329401393393551, "grad_norm": 0.01122579770024596, "learning_rate": 5.583884750946428e-06, "loss": 0.0008, "step": 157010 }, { "epoch": 1.0330059275145884, "grad_norm": 0.028511265339257616, "learning_rate": 5.5833145640976174e-06, "loss": 0.0007, "step": 157020 }, { "epoch": 1.033071715689822, "grad_norm": 0.06044083646742025, "learning_rate": 5.5827443695583085e-06, "loss": 0.0005, "step": 157030 }, { "epoch": 1.0331375038650552, "grad_norm": 0.0014905232226457414, "learning_rate": 5.582174167336018e-06, "loss": 0.0029, "step": 157040 }, { "epoch": 1.0332032920402887, "grad_norm": 0.022451357375532555, "learning_rate": 5.581603957438264e-06, "loss": 0.0016, "step": 157050 }, { "epoch": 1.033269080215522, "grad_norm": 0.023224925538390284, "learning_rate": 5.5810337398725635e-06, "loss": 0.0014, "step": 157060 }, { "epoch": 1.0333348683907555, "grad_norm": 0.10291070115035969, "learning_rate": 5.580463514646435e-06, "loss": 0.0016, "step": 157070 }, { "epoch": 1.0334006565659888, "grad_norm": 0.14678178826442967, "learning_rate": 5.579893281767397e-06, "loss": 0.0012, "step": 157080 }, { "epoch": 1.0334664447412223, "grad_norm": 0.0034944634344577434, "learning_rate": 5.5793230412429665e-06, "loss": 0.0014, "step": 157090 }, { "epoch": 1.0335322329164556, "grad_norm": 0.022721503595163613, "learning_rate": 5.578752793080663e-06, "loss": 0.0009, "step": 157100 }, { "epoch": 1.033598021091689, "grad_norm": 0.008711318092718846, "learning_rate": 5.578182537288003e-06, "loss": 0.0005, "step": 157110 }, { "epoch": 1.0336638092669224, "grad_norm": 0.06167448269955586, "learning_rate": 5.5776122738725055e-06, "loss": 0.0006, "step": 157120 }, { "epoch": 1.0337295974421556, "grad_norm": 0.24444260413391908, "learning_rate": 5.577042002841688e-06, "loss": 0.001, "step": 157130 }, { "epoch": 1.0337953856173892, "grad_norm": 0.08922995670741889, "learning_rate": 5.576471724203072e-06, "loss": 0.001, "step": 157140 }, { "epoch": 1.0338611737926224, "grad_norm": 0.026216690063214587, "learning_rate": 5.575901437964173e-06, "loss": 0.0008, "step": 157150 }, { "epoch": 1.033926961967856, "grad_norm": 0.02225040494466802, "learning_rate": 5.575331144132512e-06, "loss": 0.0012, "step": 157160 }, { "epoch": 1.0339927501430892, "grad_norm": 0.03982214025636863, "learning_rate": 5.574760842715608e-06, "loss": 0.0023, "step": 157170 }, { "epoch": 1.0340585383183227, "grad_norm": 0.11335018302409863, "learning_rate": 5.574190533720976e-06, "loss": 0.0008, "step": 157180 }, { "epoch": 1.034124326493556, "grad_norm": 0.03129312444168166, "learning_rate": 5.573620217156137e-06, "loss": 0.0018, "step": 157190 }, { "epoch": 1.0341901146687895, "grad_norm": 0.0015334526564751687, "learning_rate": 5.573049893028612e-06, "loss": 0.0008, "step": 157200 }, { "epoch": 1.0342559028440228, "grad_norm": 0.034951606707389386, "learning_rate": 5.57247956134592e-06, "loss": 0.0014, "step": 157210 }, { "epoch": 1.0343216910192563, "grad_norm": 0.027236379463823263, "learning_rate": 5.571909222115579e-06, "loss": 0.0005, "step": 157220 }, { "epoch": 1.0343874791944896, "grad_norm": 0.04437053692762852, "learning_rate": 5.571338875345109e-06, "loss": 0.0029, "step": 157230 }, { "epoch": 1.0344532673697229, "grad_norm": 0.039709034022079426, "learning_rate": 5.5707685210420295e-06, "loss": 0.0011, "step": 157240 }, { "epoch": 1.0345190555449564, "grad_norm": 0.02364545587525322, "learning_rate": 5.570198159213859e-06, "loss": 0.0004, "step": 157250 }, { "epoch": 1.0345848437201897, "grad_norm": 0.021859911646942978, "learning_rate": 5.569627789868118e-06, "loss": 0.0008, "step": 157260 }, { "epoch": 1.0346506318954232, "grad_norm": 0.02954171330207753, "learning_rate": 5.569057413012328e-06, "loss": 0.001, "step": 157270 }, { "epoch": 1.0347164200706565, "grad_norm": 0.005159958159013583, "learning_rate": 5.568487028654007e-06, "loss": 0.0005, "step": 157280 }, { "epoch": 1.03478220824589, "grad_norm": 0.00203821321346579, "learning_rate": 5.567916636800675e-06, "loss": 0.0004, "step": 157290 }, { "epoch": 1.0348479964211232, "grad_norm": 0.03722149341025241, "learning_rate": 5.567346237459853e-06, "loss": 0.0004, "step": 157300 }, { "epoch": 1.0349137845963567, "grad_norm": 0.026170589164545198, "learning_rate": 5.566775830639062e-06, "loss": 0.0009, "step": 157310 }, { "epoch": 1.03497957277159, "grad_norm": 0.025314724265895563, "learning_rate": 5.566205416345818e-06, "loss": 0.0006, "step": 157320 }, { "epoch": 1.0350453609468233, "grad_norm": 0.00695894209571979, "learning_rate": 5.565634994587648e-06, "loss": 0.0006, "step": 157330 }, { "epoch": 1.0351111491220568, "grad_norm": 0.05795922408563274, "learning_rate": 5.565064565372067e-06, "loss": 0.0012, "step": 157340 }, { "epoch": 1.03517693729729, "grad_norm": 0.014149866755778291, "learning_rate": 5.564494128706598e-06, "loss": 0.0004, "step": 157350 }, { "epoch": 1.0352427254725236, "grad_norm": 0.004904840501269912, "learning_rate": 5.563923684598762e-06, "loss": 0.0008, "step": 157360 }, { "epoch": 1.035308513647757, "grad_norm": 0.02501386792290137, "learning_rate": 5.563353233056079e-06, "loss": 0.0011, "step": 157370 }, { "epoch": 1.0353743018229904, "grad_norm": 0.004133380410369509, "learning_rate": 5.5627827740860695e-06, "loss": 0.001, "step": 157380 }, { "epoch": 1.0354400899982237, "grad_norm": 0.06707093209098405, "learning_rate": 5.5622123076962566e-06, "loss": 0.0012, "step": 157390 }, { "epoch": 1.0355058781734572, "grad_norm": 0.023619444542948474, "learning_rate": 5.561641833894159e-06, "loss": 0.0007, "step": 157400 }, { "epoch": 1.0355716663486905, "grad_norm": 0.024197816308866894, "learning_rate": 5.5610713526873e-06, "loss": 0.0007, "step": 157410 }, { "epoch": 1.035637454523924, "grad_norm": 0.022423535843118417, "learning_rate": 5.560500864083198e-06, "loss": 0.0007, "step": 157420 }, { "epoch": 1.0357032426991573, "grad_norm": 0.00635404668142572, "learning_rate": 5.559930368089378e-06, "loss": 0.0005, "step": 157430 }, { "epoch": 1.0357690308743905, "grad_norm": 0.01250079276636109, "learning_rate": 5.559359864713358e-06, "loss": 0.0005, "step": 157440 }, { "epoch": 1.035834819049624, "grad_norm": 0.01231820401988262, "learning_rate": 5.558789353962662e-06, "loss": 0.0011, "step": 157450 }, { "epoch": 1.0359006072248573, "grad_norm": 0.02813113555007869, "learning_rate": 5.5582188358448106e-06, "loss": 0.0012, "step": 157460 }, { "epoch": 1.0359663954000908, "grad_norm": 0.11179740359540304, "learning_rate": 5.557648310367327e-06, "loss": 0.0008, "step": 157470 }, { "epoch": 1.0360321835753241, "grad_norm": 0.04948929718272177, "learning_rate": 5.557077777537732e-06, "loss": 0.0006, "step": 157480 }, { "epoch": 1.0360979717505576, "grad_norm": 0.023295332697007825, "learning_rate": 5.556507237363547e-06, "loss": 0.0009, "step": 157490 }, { "epoch": 1.036163759925791, "grad_norm": 0.0028753006668985593, "learning_rate": 5.555936689852295e-06, "loss": 0.0009, "step": 157500 }, { "epoch": 1.0362295481010244, "grad_norm": 0.032431802500810986, "learning_rate": 5.5553661350114975e-06, "loss": 0.0007, "step": 157510 }, { "epoch": 1.0362953362762577, "grad_norm": 0.04618228064341182, "learning_rate": 5.554795572848677e-06, "loss": 0.0009, "step": 157520 }, { "epoch": 1.0363611244514912, "grad_norm": 0.03825828680148319, "learning_rate": 5.5542250033713565e-06, "loss": 0.0007, "step": 157530 }, { "epoch": 1.0364269126267245, "grad_norm": 0.07257276762872633, "learning_rate": 5.5536544265870586e-06, "loss": 0.0007, "step": 157540 }, { "epoch": 1.0364927008019578, "grad_norm": 0.008248583053765048, "learning_rate": 5.553083842503304e-06, "loss": 0.0014, "step": 157550 }, { "epoch": 1.0365584889771913, "grad_norm": 0.0069731093540537515, "learning_rate": 5.552513251127617e-06, "loss": 0.0003, "step": 157560 }, { "epoch": 1.0366242771524246, "grad_norm": 0.247809611812706, "learning_rate": 5.55194265246752e-06, "loss": 0.0009, "step": 157570 }, { "epoch": 1.036690065327658, "grad_norm": 0.030681171670165607, "learning_rate": 5.551372046530535e-06, "loss": 0.0002, "step": 157580 }, { "epoch": 1.0367558535028913, "grad_norm": 0.05464676139125426, "learning_rate": 5.550801433324187e-06, "loss": 0.0004, "step": 157590 }, { "epoch": 1.0368216416781249, "grad_norm": 0.02421827104736333, "learning_rate": 5.550230812855998e-06, "loss": 0.0014, "step": 157600 }, { "epoch": 1.0368874298533581, "grad_norm": 0.08614956584166528, "learning_rate": 5.549660185133491e-06, "loss": 0.0011, "step": 157610 }, { "epoch": 1.0369532180285916, "grad_norm": 0.03805443101504887, "learning_rate": 5.549089550164188e-06, "loss": 0.0007, "step": 157620 }, { "epoch": 1.037019006203825, "grad_norm": 0.05626611931354888, "learning_rate": 5.5485189079556145e-06, "loss": 0.0007, "step": 157630 }, { "epoch": 1.0370847943790582, "grad_norm": 0.12028904942021192, "learning_rate": 5.547948258515292e-06, "loss": 0.001, "step": 157640 }, { "epoch": 1.0371505825542917, "grad_norm": 0.01741213879423735, "learning_rate": 5.5473776018507455e-06, "loss": 0.0043, "step": 157650 }, { "epoch": 1.037216370729525, "grad_norm": 0.16485284723913096, "learning_rate": 5.546806937969498e-06, "loss": 0.0007, "step": 157660 }, { "epoch": 1.0372821589047585, "grad_norm": 0.02097943053590623, "learning_rate": 5.546236266879074e-06, "loss": 0.0008, "step": 157670 }, { "epoch": 1.0373479470799918, "grad_norm": 0.0567334485995254, "learning_rate": 5.545665588586996e-06, "loss": 0.0011, "step": 157680 }, { "epoch": 1.0374137352552253, "grad_norm": 0.02087463301434574, "learning_rate": 5.545094903100789e-06, "loss": 0.0012, "step": 157690 }, { "epoch": 1.0374795234304586, "grad_norm": 0.013701150042348268, "learning_rate": 5.544524210427975e-06, "loss": 0.0012, "step": 157700 }, { "epoch": 1.037545311605692, "grad_norm": 0.03580424265017633, "learning_rate": 5.54395351057608e-06, "loss": 0.0003, "step": 157710 }, { "epoch": 1.0376110997809254, "grad_norm": 0.026058557252403528, "learning_rate": 5.5433828035526286e-06, "loss": 0.0003, "step": 157720 }, { "epoch": 1.0376768879561589, "grad_norm": 0.023431948421785986, "learning_rate": 5.542812089365144e-06, "loss": 0.0009, "step": 157730 }, { "epoch": 1.0377426761313921, "grad_norm": 0.061132487597922135, "learning_rate": 5.542241368021151e-06, "loss": 0.0014, "step": 157740 }, { "epoch": 1.0378084643066254, "grad_norm": 0.061921068705826574, "learning_rate": 5.541670639528173e-06, "loss": 0.0011, "step": 157750 }, { "epoch": 1.037874252481859, "grad_norm": 0.17718310678925112, "learning_rate": 5.541099903893736e-06, "loss": 0.0018, "step": 157760 }, { "epoch": 1.0379400406570922, "grad_norm": 0.048674907492818255, "learning_rate": 5.540529161125363e-06, "loss": 0.001, "step": 157770 }, { "epoch": 1.0380058288323257, "grad_norm": 0.03982815736293967, "learning_rate": 5.539958411230581e-06, "loss": 0.0007, "step": 157780 }, { "epoch": 1.038071617007559, "grad_norm": 0.001305988289823656, "learning_rate": 5.5393876542169135e-06, "loss": 0.0008, "step": 157790 }, { "epoch": 1.0381374051827925, "grad_norm": 0.002182987468547268, "learning_rate": 5.538816890091886e-06, "loss": 0.0011, "step": 157800 }, { "epoch": 1.0382031933580258, "grad_norm": 0.0429098430565978, "learning_rate": 5.538246118863023e-06, "loss": 0.0015, "step": 157810 }, { "epoch": 1.0382689815332593, "grad_norm": 0.015602978638692654, "learning_rate": 5.53767534053785e-06, "loss": 0.0012, "step": 157820 }, { "epoch": 1.0383347697084926, "grad_norm": 0.0879347486780155, "learning_rate": 5.5371045551238925e-06, "loss": 0.0008, "step": 157830 }, { "epoch": 1.0384005578837259, "grad_norm": 0.008448701105487016, "learning_rate": 5.536533762628673e-06, "loss": 0.001, "step": 157840 }, { "epoch": 1.0384663460589594, "grad_norm": 0.014342710910176434, "learning_rate": 5.53596296305972e-06, "loss": 0.0013, "step": 157850 }, { "epoch": 1.0385321342341927, "grad_norm": 0.0216146570048049, "learning_rate": 5.5353921564245595e-06, "loss": 0.0035, "step": 157860 }, { "epoch": 1.0385979224094262, "grad_norm": 0.07133111523298835, "learning_rate": 5.5348213427307155e-06, "loss": 0.0009, "step": 157870 }, { "epoch": 1.0386637105846594, "grad_norm": 0.08645098809563571, "learning_rate": 5.5342505219857135e-06, "loss": 0.0006, "step": 157880 }, { "epoch": 1.038729498759893, "grad_norm": 0.09791686247941613, "learning_rate": 5.53367969419708e-06, "loss": 0.0019, "step": 157890 }, { "epoch": 1.0387952869351262, "grad_norm": 0.0025646829920178992, "learning_rate": 5.53310885937234e-06, "loss": 0.0009, "step": 157900 }, { "epoch": 1.0388610751103597, "grad_norm": 0.052162986300372034, "learning_rate": 5.532538017519021e-06, "loss": 0.001, "step": 157910 }, { "epoch": 1.038926863285593, "grad_norm": 0.07452222133211091, "learning_rate": 5.531967168644647e-06, "loss": 0.0009, "step": 157920 }, { "epoch": 1.0389926514608265, "grad_norm": 0.013292893490514178, "learning_rate": 5.531396312756747e-06, "loss": 0.0016, "step": 157930 }, { "epoch": 1.0390584396360598, "grad_norm": 0.029793733647103488, "learning_rate": 5.530825449862845e-06, "loss": 0.0017, "step": 157940 }, { "epoch": 1.039124227811293, "grad_norm": 0.014460654922792892, "learning_rate": 5.5302545799704676e-06, "loss": 0.0004, "step": 157950 }, { "epoch": 1.0391900159865266, "grad_norm": 0.020857098684475637, "learning_rate": 5.529683703087142e-06, "loss": 0.0014, "step": 157960 }, { "epoch": 1.0392558041617599, "grad_norm": 0.019463907468348658, "learning_rate": 5.529112819220394e-06, "loss": 0.0009, "step": 157970 }, { "epoch": 1.0393215923369934, "grad_norm": 0.017856967407885612, "learning_rate": 5.52854192837775e-06, "loss": 0.001, "step": 157980 }, { "epoch": 1.0393873805122267, "grad_norm": 0.018448692366323835, "learning_rate": 5.527971030566737e-06, "loss": 0.001, "step": 157990 }, { "epoch": 1.0394531686874602, "grad_norm": 0.07242315857306182, "learning_rate": 5.527400125794883e-06, "loss": 0.0004, "step": 158000 }, { "epoch": 1.0395189568626935, "grad_norm": 0.016524941016678615, "learning_rate": 5.526829214069713e-06, "loss": 0.0007, "step": 158010 }, { "epoch": 1.039584745037927, "grad_norm": 0.052347345038241774, "learning_rate": 5.526258295398755e-06, "loss": 0.0009, "step": 158020 }, { "epoch": 1.0396505332131603, "grad_norm": 0.1601198247121409, "learning_rate": 5.525687369789535e-06, "loss": 0.0009, "step": 158030 }, { "epoch": 1.0397163213883935, "grad_norm": 0.035674088257363484, "learning_rate": 5.5251164372495826e-06, "loss": 0.0008, "step": 158040 }, { "epoch": 1.039782109563627, "grad_norm": 0.003535737794481767, "learning_rate": 5.524545497786422e-06, "loss": 0.0023, "step": 158050 }, { "epoch": 1.0398478977388603, "grad_norm": 0.03022658208598668, "learning_rate": 5.5239745514075834e-06, "loss": 0.0017, "step": 158060 }, { "epoch": 1.0399136859140938, "grad_norm": 0.14882916109424507, "learning_rate": 5.523403598120592e-06, "loss": 0.0009, "step": 158070 }, { "epoch": 1.0399794740893271, "grad_norm": 0.028946692583757067, "learning_rate": 5.5228326379329776e-06, "loss": 0.0007, "step": 158080 }, { "epoch": 1.0400452622645606, "grad_norm": 0.025191420801544174, "learning_rate": 5.522261670852266e-06, "loss": 0.0003, "step": 158090 }, { "epoch": 1.040111050439794, "grad_norm": 0.009022394548728099, "learning_rate": 5.521690696885983e-06, "loss": 0.0008, "step": 158100 }, { "epoch": 1.0401768386150274, "grad_norm": 0.04786089655112855, "learning_rate": 5.521119716041661e-06, "loss": 0.0006, "step": 158110 }, { "epoch": 1.0402426267902607, "grad_norm": 0.005038468165362725, "learning_rate": 5.5205487283268265e-06, "loss": 0.0003, "step": 158120 }, { "epoch": 1.0403084149654942, "grad_norm": 0.0035577866704796404, "learning_rate": 5.519977733749004e-06, "loss": 0.0006, "step": 158130 }, { "epoch": 1.0403742031407275, "grad_norm": 0.018954268508723986, "learning_rate": 5.519406732315726e-06, "loss": 0.0013, "step": 158140 }, { "epoch": 1.0404399913159608, "grad_norm": 0.0036887967663632422, "learning_rate": 5.518835724034517e-06, "loss": 0.0004, "step": 158150 }, { "epoch": 1.0405057794911943, "grad_norm": 0.04180411515363088, "learning_rate": 5.51826470891291e-06, "loss": 0.0005, "step": 158160 }, { "epoch": 1.0405715676664276, "grad_norm": 0.05726956514353466, "learning_rate": 5.517693686958428e-06, "loss": 0.0014, "step": 158170 }, { "epoch": 1.040637355841661, "grad_norm": 0.06591456120249388, "learning_rate": 5.517122658178602e-06, "loss": 0.001, "step": 158180 }, { "epoch": 1.0407031440168943, "grad_norm": 0.008760551749341814, "learning_rate": 5.516551622580961e-06, "loss": 0.0007, "step": 158190 }, { "epoch": 1.0407689321921278, "grad_norm": 0.10129732538500373, "learning_rate": 5.515980580173032e-06, "loss": 0.001, "step": 158200 }, { "epoch": 1.0408347203673611, "grad_norm": 0.009880386704698589, "learning_rate": 5.515409530962346e-06, "loss": 0.0009, "step": 158210 }, { "epoch": 1.0409005085425946, "grad_norm": 0.012423319348619316, "learning_rate": 5.5148384749564295e-06, "loss": 0.0007, "step": 158220 }, { "epoch": 1.040966296717828, "grad_norm": 0.01910350038424999, "learning_rate": 5.514267412162814e-06, "loss": 0.0007, "step": 158230 }, { "epoch": 1.0410320848930614, "grad_norm": 0.015127656976737361, "learning_rate": 5.513696342589026e-06, "loss": 0.0004, "step": 158240 }, { "epoch": 1.0410978730682947, "grad_norm": 0.13289212317645213, "learning_rate": 5.513125266242594e-06, "loss": 0.001, "step": 158250 }, { "epoch": 1.041163661243528, "grad_norm": 0.11058125024512601, "learning_rate": 5.51255418313105e-06, "loss": 0.0008, "step": 158260 }, { "epoch": 1.0412294494187615, "grad_norm": 0.0036770384163174893, "learning_rate": 5.5119830932619215e-06, "loss": 0.0007, "step": 158270 }, { "epoch": 1.0412952375939948, "grad_norm": 0.029577012644060506, "learning_rate": 5.511411996642737e-06, "loss": 0.0007, "step": 158280 }, { "epoch": 1.0413610257692283, "grad_norm": 0.019660975382204415, "learning_rate": 5.5108408932810276e-06, "loss": 0.0007, "step": 158290 }, { "epoch": 1.0414268139444616, "grad_norm": 0.0050939799641414995, "learning_rate": 5.5102697831843234e-06, "loss": 0.0003, "step": 158300 }, { "epoch": 1.041492602119695, "grad_norm": 0.02361501162846083, "learning_rate": 5.509698666360152e-06, "loss": 0.0004, "step": 158310 }, { "epoch": 1.0415583902949284, "grad_norm": 0.09261461528114058, "learning_rate": 5.509127542816044e-06, "loss": 0.0015, "step": 158320 }, { "epoch": 1.0416241784701619, "grad_norm": 0.06893526703693155, "learning_rate": 5.508556412559528e-06, "loss": 0.0007, "step": 158330 }, { "epoch": 1.0416899666453951, "grad_norm": 0.06948614080577857, "learning_rate": 5.507985275598135e-06, "loss": 0.0009, "step": 158340 }, { "epoch": 1.0417557548206284, "grad_norm": 0.031168375827534355, "learning_rate": 5.507414131939396e-06, "loss": 0.0009, "step": 158350 }, { "epoch": 1.041821542995862, "grad_norm": 0.01711915440659896, "learning_rate": 5.50684298159084e-06, "loss": 0.0008, "step": 158360 }, { "epoch": 1.0418873311710952, "grad_norm": 0.0022267829943547063, "learning_rate": 5.506271824559997e-06, "loss": 0.0012, "step": 158370 }, { "epoch": 1.0419531193463287, "grad_norm": 0.031191844017058174, "learning_rate": 5.505700660854396e-06, "loss": 0.001, "step": 158380 }, { "epoch": 1.042018907521562, "grad_norm": 0.007387129103946123, "learning_rate": 5.50512949048157e-06, "loss": 0.0009, "step": 158390 }, { "epoch": 1.0420846956967955, "grad_norm": 0.02573976083234785, "learning_rate": 5.504558313449047e-06, "loss": 0.0004, "step": 158400 }, { "epoch": 1.0421504838720288, "grad_norm": 0.049390957702198104, "learning_rate": 5.503987129764359e-06, "loss": 0.0011, "step": 158410 }, { "epoch": 1.0422162720472623, "grad_norm": 0.013182501786172415, "learning_rate": 5.503415939435035e-06, "loss": 0.0007, "step": 158420 }, { "epoch": 1.0422820602224956, "grad_norm": 0.7985301576300248, "learning_rate": 5.502844742468607e-06, "loss": 0.0009, "step": 158430 }, { "epoch": 1.042347848397729, "grad_norm": 0.03882595497695202, "learning_rate": 5.502273538872607e-06, "loss": 0.0007, "step": 158440 }, { "epoch": 1.0424136365729624, "grad_norm": 0.01276565960654029, "learning_rate": 5.501702328654563e-06, "loss": 0.0005, "step": 158450 }, { "epoch": 1.0424794247481957, "grad_norm": 0.02081138720093956, "learning_rate": 5.501131111822007e-06, "loss": 0.0013, "step": 158460 }, { "epoch": 1.0425452129234292, "grad_norm": 0.015698814121773978, "learning_rate": 5.500559888382468e-06, "loss": 0.0008, "step": 158470 }, { "epoch": 1.0426110010986624, "grad_norm": 0.023444950375958718, "learning_rate": 5.499988658343481e-06, "loss": 0.0003, "step": 158480 }, { "epoch": 1.042676789273896, "grad_norm": 0.005111198347990552, "learning_rate": 5.4994174217125764e-06, "loss": 0.0003, "step": 158490 }, { "epoch": 1.0427425774491292, "grad_norm": 0.005333088885712008, "learning_rate": 5.498846178497284e-06, "loss": 0.0018, "step": 158500 }, { "epoch": 1.0428083656243627, "grad_norm": 0.02758336366579032, "learning_rate": 5.4982749287051355e-06, "loss": 0.0011, "step": 158510 }, { "epoch": 1.042874153799596, "grad_norm": 0.06798530818995226, "learning_rate": 5.497703672343661e-06, "loss": 0.0012, "step": 158520 }, { "epoch": 1.0429399419748295, "grad_norm": 0.04020280074737258, "learning_rate": 5.497132409420395e-06, "loss": 0.0004, "step": 158530 }, { "epoch": 1.0430057301500628, "grad_norm": 0.05971733497279705, "learning_rate": 5.496561139942866e-06, "loss": 0.0007, "step": 158540 }, { "epoch": 1.0430715183252963, "grad_norm": 0.021336209069603353, "learning_rate": 5.495989863918608e-06, "loss": 0.0008, "step": 158550 }, { "epoch": 1.0431373065005296, "grad_norm": 0.040435937092177715, "learning_rate": 5.495418581355153e-06, "loss": 0.0008, "step": 158560 }, { "epoch": 1.0432030946757629, "grad_norm": 0.03607386304158096, "learning_rate": 5.494847292260032e-06, "loss": 0.001, "step": 158570 }, { "epoch": 1.0432688828509964, "grad_norm": 0.028051749574104674, "learning_rate": 5.494275996640775e-06, "loss": 0.0003, "step": 158580 }, { "epoch": 1.0433346710262297, "grad_norm": 0.22076189672565427, "learning_rate": 5.493704694504918e-06, "loss": 0.0011, "step": 158590 }, { "epoch": 1.0434004592014632, "grad_norm": 0.034127648614304756, "learning_rate": 5.493133385859988e-06, "loss": 0.0004, "step": 158600 }, { "epoch": 1.0434662473766965, "grad_norm": 0.00959383947889099, "learning_rate": 5.4925620707135215e-06, "loss": 0.0012, "step": 158610 }, { "epoch": 1.04353203555193, "grad_norm": 0.19966748829522984, "learning_rate": 5.491990749073051e-06, "loss": 0.0017, "step": 158620 }, { "epoch": 1.0435978237271633, "grad_norm": 0.0005632515047050093, "learning_rate": 5.491419420946107e-06, "loss": 0.0007, "step": 158630 }, { "epoch": 1.0436636119023968, "grad_norm": 0.007864997121408654, "learning_rate": 5.490848086340221e-06, "loss": 0.0009, "step": 158640 }, { "epoch": 1.04372940007763, "grad_norm": 0.06783968236775492, "learning_rate": 5.490276745262928e-06, "loss": 0.0006, "step": 158650 }, { "epoch": 1.0437951882528633, "grad_norm": 0.01630278578428838, "learning_rate": 5.4897053977217594e-06, "loss": 0.0005, "step": 158660 }, { "epoch": 1.0438609764280968, "grad_norm": 0.017690498008726102, "learning_rate": 5.489134043724247e-06, "loss": 0.0006, "step": 158670 }, { "epoch": 1.0439267646033301, "grad_norm": 0.01685641162256568, "learning_rate": 5.488562683277926e-06, "loss": 0.0003, "step": 158680 }, { "epoch": 1.0439925527785636, "grad_norm": 0.15597341666071146, "learning_rate": 5.487991316390327e-06, "loss": 0.0008, "step": 158690 }, { "epoch": 1.044058340953797, "grad_norm": 0.01569102931293296, "learning_rate": 5.487419943068984e-06, "loss": 0.0006, "step": 158700 }, { "epoch": 1.0441241291290304, "grad_norm": 0.09580482099414532, "learning_rate": 5.486848563321432e-06, "loss": 0.0014, "step": 158710 }, { "epoch": 1.0441899173042637, "grad_norm": 0.0066826832117880515, "learning_rate": 5.4862771771552e-06, "loss": 0.0006, "step": 158720 }, { "epoch": 1.0442557054794972, "grad_norm": 0.014144579560713208, "learning_rate": 5.4857057845778235e-06, "loss": 0.0004, "step": 158730 }, { "epoch": 1.0443214936547305, "grad_norm": 0.06448349684895906, "learning_rate": 5.485134385596836e-06, "loss": 0.0008, "step": 158740 }, { "epoch": 1.044387281829964, "grad_norm": 0.1181931583617462, "learning_rate": 5.484562980219771e-06, "loss": 0.0015, "step": 158750 }, { "epoch": 1.0444530700051973, "grad_norm": 0.05615068238347325, "learning_rate": 5.483991568454162e-06, "loss": 0.0005, "step": 158760 }, { "epoch": 1.0445188581804306, "grad_norm": 0.004890862285418931, "learning_rate": 5.483420150307541e-06, "loss": 0.0008, "step": 158770 }, { "epoch": 1.044584646355664, "grad_norm": 0.03681313979058139, "learning_rate": 5.482848725787444e-06, "loss": 0.0006, "step": 158780 }, { "epoch": 1.0446504345308973, "grad_norm": 0.015009599930780224, "learning_rate": 5.482277294901404e-06, "loss": 0.003, "step": 158790 }, { "epoch": 1.0447162227061308, "grad_norm": 0.0038670947532995228, "learning_rate": 5.481705857656952e-06, "loss": 0.0006, "step": 158800 }, { "epoch": 1.0447820108813641, "grad_norm": 0.020981339206055893, "learning_rate": 5.481134414061626e-06, "loss": 0.001, "step": 158810 }, { "epoch": 1.0448477990565976, "grad_norm": 0.023206916497460223, "learning_rate": 5.480562964122957e-06, "loss": 0.0007, "step": 158820 }, { "epoch": 1.044913587231831, "grad_norm": 0.0392831254202927, "learning_rate": 5.479991507848482e-06, "loss": 0.0007, "step": 158830 }, { "epoch": 1.0449793754070644, "grad_norm": 0.015466675911806416, "learning_rate": 5.479420045245734e-06, "loss": 0.001, "step": 158840 }, { "epoch": 1.0450451635822977, "grad_norm": 0.022177898276500126, "learning_rate": 5.478848576322245e-06, "loss": 0.0005, "step": 158850 }, { "epoch": 1.0451109517575312, "grad_norm": 0.03719226223347226, "learning_rate": 5.478277101085551e-06, "loss": 0.0011, "step": 158860 }, { "epoch": 1.0451767399327645, "grad_norm": 0.043828016850664724, "learning_rate": 5.477705619543188e-06, "loss": 0.0006, "step": 158870 }, { "epoch": 1.0452425281079978, "grad_norm": 0.004266367720576164, "learning_rate": 5.477134131702688e-06, "loss": 0.0006, "step": 158880 }, { "epoch": 1.0453083162832313, "grad_norm": 0.06277445059534764, "learning_rate": 5.476562637571586e-06, "loss": 0.001, "step": 158890 }, { "epoch": 1.0453741044584646, "grad_norm": 0.03220474406591658, "learning_rate": 5.4759911371574195e-06, "loss": 0.0008, "step": 158900 }, { "epoch": 1.045439892633698, "grad_norm": 0.0019707024022467628, "learning_rate": 5.4754196304677184e-06, "loss": 0.0008, "step": 158910 }, { "epoch": 1.0455056808089314, "grad_norm": 0.025036712158259748, "learning_rate": 5.474848117510021e-06, "loss": 0.0006, "step": 158920 }, { "epoch": 1.0455714689841649, "grad_norm": 0.00428131468317209, "learning_rate": 5.474276598291861e-06, "loss": 0.0002, "step": 158930 }, { "epoch": 1.0456372571593981, "grad_norm": 0.02335881991056318, "learning_rate": 5.4737050728207745e-06, "loss": 0.0006, "step": 158940 }, { "epoch": 1.0457030453346317, "grad_norm": 0.031822135027859244, "learning_rate": 5.473133541104295e-06, "loss": 0.0018, "step": 158950 }, { "epoch": 1.045768833509865, "grad_norm": 0.03542421203938147, "learning_rate": 5.472562003149959e-06, "loss": 0.0009, "step": 158960 }, { "epoch": 1.0458346216850982, "grad_norm": 0.10519017920555229, "learning_rate": 5.471990458965301e-06, "loss": 0.0015, "step": 158970 }, { "epoch": 1.0459004098603317, "grad_norm": 0.007298359930158283, "learning_rate": 5.471418908557856e-06, "loss": 0.001, "step": 158980 }, { "epoch": 1.045966198035565, "grad_norm": 0.06443125664745682, "learning_rate": 5.470847351935159e-06, "loss": 0.0007, "step": 158990 }, { "epoch": 1.0460319862107985, "grad_norm": 0.10123733573855788, "learning_rate": 5.470275789104748e-06, "loss": 0.0009, "step": 159000 }, { "epoch": 1.0460977743860318, "grad_norm": 0.04591310788618319, "learning_rate": 5.469704220074156e-06, "loss": 0.0006, "step": 159010 }, { "epoch": 1.0461635625612653, "grad_norm": 0.021528172624654424, "learning_rate": 5.469132644850919e-06, "loss": 0.0008, "step": 159020 }, { "epoch": 1.0462293507364986, "grad_norm": 0.013426193024304795, "learning_rate": 5.468561063442574e-06, "loss": 0.0007, "step": 159030 }, { "epoch": 1.046295138911732, "grad_norm": 0.004177394109638001, "learning_rate": 5.467989475856656e-06, "loss": 0.0006, "step": 159040 }, { "epoch": 1.0463609270869654, "grad_norm": 0.078199868777837, "learning_rate": 5.467417882100702e-06, "loss": 0.001, "step": 159050 }, { "epoch": 1.0464267152621987, "grad_norm": 0.008587526562744155, "learning_rate": 5.466846282182246e-06, "loss": 0.0005, "step": 159060 }, { "epoch": 1.0464925034374322, "grad_norm": 0.042913105028548275, "learning_rate": 5.466274676108825e-06, "loss": 0.0011, "step": 159070 }, { "epoch": 1.0465582916126654, "grad_norm": 0.007426493974526073, "learning_rate": 5.465703063887975e-06, "loss": 0.0007, "step": 159080 }, { "epoch": 1.046624079787899, "grad_norm": 0.02303709128862677, "learning_rate": 5.465131445527233e-06, "loss": 0.0006, "step": 159090 }, { "epoch": 1.0466898679631322, "grad_norm": 0.002027300229430436, "learning_rate": 5.464559821034133e-06, "loss": 0.0012, "step": 159100 }, { "epoch": 1.0467556561383657, "grad_norm": 0.026422319038817154, "learning_rate": 5.463988190416214e-06, "loss": 0.0009, "step": 159110 }, { "epoch": 1.046821444313599, "grad_norm": 0.0007941548242316564, "learning_rate": 5.463416553681011e-06, "loss": 0.0005, "step": 159120 }, { "epoch": 1.0468872324888325, "grad_norm": 0.08054012132167192, "learning_rate": 5.4628449108360614e-06, "loss": 0.0007, "step": 159130 }, { "epoch": 1.0469530206640658, "grad_norm": 0.22305792944877745, "learning_rate": 5.4622732618889014e-06, "loss": 0.0013, "step": 159140 }, { "epoch": 1.0470188088392993, "grad_norm": 0.06562183287645651, "learning_rate": 5.4617016068470675e-06, "loss": 0.0018, "step": 159150 }, { "epoch": 1.0470845970145326, "grad_norm": 0.02043034839470262, "learning_rate": 5.4611299457180965e-06, "loss": 0.0009, "step": 159160 }, { "epoch": 1.0471503851897659, "grad_norm": 0.2533983654846709, "learning_rate": 5.460558278509527e-06, "loss": 0.0011, "step": 159170 }, { "epoch": 1.0472161733649994, "grad_norm": 0.0005652171701030171, "learning_rate": 5.459986605228891e-06, "loss": 0.0007, "step": 159180 }, { "epoch": 1.0472819615402327, "grad_norm": 0.01187490383878535, "learning_rate": 5.459414925883731e-06, "loss": 0.0006, "step": 159190 }, { "epoch": 1.0473477497154662, "grad_norm": 0.006836124583014703, "learning_rate": 5.458843240481583e-06, "loss": 0.0006, "step": 159200 }, { "epoch": 1.0474135378906995, "grad_norm": 0.017795809611811216, "learning_rate": 5.458271549029981e-06, "loss": 0.0004, "step": 159210 }, { "epoch": 1.047479326065933, "grad_norm": 0.02629897431238191, "learning_rate": 5.4576998515364665e-06, "loss": 0.0004, "step": 159220 }, { "epoch": 1.0475451142411663, "grad_norm": 0.02512401872108417, "learning_rate": 5.4571281480085735e-06, "loss": 0.0011, "step": 159230 }, { "epoch": 1.0476109024163998, "grad_norm": 0.051977827389115314, "learning_rate": 5.4565564384538396e-06, "loss": 0.0013, "step": 159240 }, { "epoch": 1.047676690591633, "grad_norm": 0.010627956215110099, "learning_rate": 5.455984722879805e-06, "loss": 0.0008, "step": 159250 }, { "epoch": 1.0477424787668665, "grad_norm": 0.020535685154729217, "learning_rate": 5.4554130012940055e-06, "loss": 0.0003, "step": 159260 }, { "epoch": 1.0478082669420998, "grad_norm": 0.04132395230712358, "learning_rate": 5.454841273703979e-06, "loss": 0.0003, "step": 159270 }, { "epoch": 1.0478740551173331, "grad_norm": 0.01790808468484772, "learning_rate": 5.454269540117263e-06, "loss": 0.001, "step": 159280 }, { "epoch": 1.0479398432925666, "grad_norm": 0.05855607630358122, "learning_rate": 5.453697800541395e-06, "loss": 0.0007, "step": 159290 }, { "epoch": 1.0480056314678, "grad_norm": 0.18949023633916, "learning_rate": 5.453126054983914e-06, "loss": 0.003, "step": 159300 }, { "epoch": 1.0480714196430334, "grad_norm": 0.006837334199900449, "learning_rate": 5.4525543034523554e-06, "loss": 0.0007, "step": 159310 }, { "epoch": 1.0481372078182667, "grad_norm": 0.029791146809619872, "learning_rate": 5.451982545954261e-06, "loss": 0.0003, "step": 159320 }, { "epoch": 1.0482029959935002, "grad_norm": 0.03617784906314869, "learning_rate": 5.451410782497166e-06, "loss": 0.0007, "step": 159330 }, { "epoch": 1.0482687841687335, "grad_norm": 0.03195658260342474, "learning_rate": 5.450839013088611e-06, "loss": 0.0021, "step": 159340 }, { "epoch": 1.048334572343967, "grad_norm": 0.01821687665278068, "learning_rate": 5.450267237736133e-06, "loss": 0.0006, "step": 159350 }, { "epoch": 1.0484003605192003, "grad_norm": 0.05180937281328816, "learning_rate": 5.449695456447269e-06, "loss": 0.0005, "step": 159360 }, { "epoch": 1.0484661486944336, "grad_norm": 0.00012519666868565809, "learning_rate": 5.449123669229559e-06, "loss": 0.0009, "step": 159370 }, { "epoch": 1.048531936869667, "grad_norm": 0.010121178508026787, "learning_rate": 5.448551876090542e-06, "loss": 0.002, "step": 159380 }, { "epoch": 1.0485977250449003, "grad_norm": 0.11784252833332415, "learning_rate": 5.447980077037756e-06, "loss": 0.0009, "step": 159390 }, { "epoch": 1.0486635132201338, "grad_norm": 0.03272045925808334, "learning_rate": 5.44740827207874e-06, "loss": 0.001, "step": 159400 }, { "epoch": 1.0487293013953671, "grad_norm": 0.004640949027448878, "learning_rate": 5.446836461221031e-06, "loss": 0.0023, "step": 159410 }, { "epoch": 1.0487950895706006, "grad_norm": 0.01654151862938899, "learning_rate": 5.44626464447217e-06, "loss": 0.0012, "step": 159420 }, { "epoch": 1.048860877745834, "grad_norm": 0.03810113484337805, "learning_rate": 5.4456928218396954e-06, "loss": 0.0013, "step": 159430 }, { "epoch": 1.0489266659210674, "grad_norm": 0.028004068898223114, "learning_rate": 5.445120993331144e-06, "loss": 0.0008, "step": 159440 }, { "epoch": 1.0489924540963007, "grad_norm": 0.025232166890333568, "learning_rate": 5.444549158954059e-06, "loss": 0.001, "step": 159450 }, { "epoch": 1.0490582422715342, "grad_norm": 0.015404977999996253, "learning_rate": 5.443977318715976e-06, "loss": 0.0007, "step": 159460 }, { "epoch": 1.0491240304467675, "grad_norm": 0.04328993262691134, "learning_rate": 5.443405472624437e-06, "loss": 0.0005, "step": 159470 }, { "epoch": 1.0491898186220008, "grad_norm": 0.00268295242767107, "learning_rate": 5.442833620686978e-06, "loss": 0.0008, "step": 159480 }, { "epoch": 1.0492556067972343, "grad_norm": 0.02675768827885412, "learning_rate": 5.442261762911142e-06, "loss": 0.0007, "step": 159490 }, { "epoch": 1.0493213949724676, "grad_norm": 0.008007897739563676, "learning_rate": 5.441689899304464e-06, "loss": 0.0007, "step": 159500 }, { "epoch": 1.049387183147701, "grad_norm": 0.023986185535293008, "learning_rate": 5.441118029874488e-06, "loss": 0.0014, "step": 159510 }, { "epoch": 1.0494529713229344, "grad_norm": 0.035346483517963864, "learning_rate": 5.440546154628752e-06, "loss": 0.0009, "step": 159520 }, { "epoch": 1.0495187594981679, "grad_norm": 0.05187837327683933, "learning_rate": 5.439974273574794e-06, "loss": 0.0012, "step": 159530 }, { "epoch": 1.0495845476734011, "grad_norm": 0.045738265003815316, "learning_rate": 5.439402386720156e-06, "loss": 0.0005, "step": 159540 }, { "epoch": 1.0496503358486347, "grad_norm": 0.05840628358497801, "learning_rate": 5.438830494072378e-06, "loss": 0.001, "step": 159550 }, { "epoch": 1.049716124023868, "grad_norm": 0.06424033194397345, "learning_rate": 5.438258595638997e-06, "loss": 0.0016, "step": 159560 }, { "epoch": 1.0497819121991014, "grad_norm": 0.054598092947070034, "learning_rate": 5.437686691427556e-06, "loss": 0.0007, "step": 159570 }, { "epoch": 1.0498477003743347, "grad_norm": 0.037514520482913004, "learning_rate": 5.437114781445594e-06, "loss": 0.0008, "step": 159580 }, { "epoch": 1.049913488549568, "grad_norm": 0.014350616058020838, "learning_rate": 5.436542865700652e-06, "loss": 0.0005, "step": 159590 }, { "epoch": 1.0499792767248015, "grad_norm": 0.08029891625521417, "learning_rate": 5.435970944200267e-06, "loss": 0.0008, "step": 159600 }, { "epoch": 1.0500450649000348, "grad_norm": 0.013337920859169673, "learning_rate": 5.435399016951983e-06, "loss": 0.0008, "step": 159610 }, { "epoch": 1.0501108530752683, "grad_norm": 0.06867246545636697, "learning_rate": 5.43482708396334e-06, "loss": 0.0022, "step": 159620 }, { "epoch": 1.0501766412505016, "grad_norm": 0.012888473498100041, "learning_rate": 5.4342551452418745e-06, "loss": 0.0007, "step": 159630 }, { "epoch": 1.050242429425735, "grad_norm": 0.028680277941908926, "learning_rate": 5.433683200795132e-06, "loss": 0.0008, "step": 159640 }, { "epoch": 1.0503082176009684, "grad_norm": 0.034451385627052564, "learning_rate": 5.433111250630651e-06, "loss": 0.0011, "step": 159650 }, { "epoch": 1.0503740057762019, "grad_norm": 0.05040688751560058, "learning_rate": 5.432539294755973e-06, "loss": 0.0014, "step": 159660 }, { "epoch": 1.0504397939514352, "grad_norm": 0.021638076300143565, "learning_rate": 5.431967333178637e-06, "loss": 0.0005, "step": 159670 }, { "epoch": 1.0505055821266684, "grad_norm": 0.03074704756297293, "learning_rate": 5.431395365906185e-06, "loss": 0.001, "step": 159680 }, { "epoch": 1.050571370301902, "grad_norm": 0.023983843357351563, "learning_rate": 5.430823392946158e-06, "loss": 0.0005, "step": 159690 }, { "epoch": 1.0506371584771352, "grad_norm": 0.0008025264044236274, "learning_rate": 5.4302514143060945e-06, "loss": 0.0007, "step": 159700 }, { "epoch": 1.0507029466523687, "grad_norm": 0.102305935940917, "learning_rate": 5.42967942999354e-06, "loss": 0.0018, "step": 159710 }, { "epoch": 1.050768734827602, "grad_norm": 0.014298137461805556, "learning_rate": 5.429107440016033e-06, "loss": 0.0008, "step": 159720 }, { "epoch": 1.0508345230028355, "grad_norm": 0.01890980425207106, "learning_rate": 5.428535444381115e-06, "loss": 0.0009, "step": 159730 }, { "epoch": 1.0509003111780688, "grad_norm": 0.059898400898376634, "learning_rate": 5.427963443096327e-06, "loss": 0.0029, "step": 159740 }, { "epoch": 1.0509660993533023, "grad_norm": 0.01213774305913114, "learning_rate": 5.427391436169211e-06, "loss": 0.0016, "step": 159750 }, { "epoch": 1.0510318875285356, "grad_norm": 0.007118181180486053, "learning_rate": 5.426819423607307e-06, "loss": 0.0016, "step": 159760 }, { "epoch": 1.051097675703769, "grad_norm": 0.013034128763468284, "learning_rate": 5.426247405418158e-06, "loss": 0.0005, "step": 159770 }, { "epoch": 1.0511634638790024, "grad_norm": 0.0130560484621055, "learning_rate": 5.4256753816093055e-06, "loss": 0.0007, "step": 159780 }, { "epoch": 1.0512292520542357, "grad_norm": 0.002365796867165586, "learning_rate": 5.42510335218829e-06, "loss": 0.0012, "step": 159790 }, { "epoch": 1.0512950402294692, "grad_norm": 0.03706642680165309, "learning_rate": 5.424531317162654e-06, "loss": 0.001, "step": 159800 }, { "epoch": 1.0513608284047025, "grad_norm": 0.021464983292254543, "learning_rate": 5.4239592765399405e-06, "loss": 0.0009, "step": 159810 }, { "epoch": 1.051426616579936, "grad_norm": 0.04624517533678664, "learning_rate": 5.423387230327689e-06, "loss": 0.0011, "step": 159820 }, { "epoch": 1.0514924047551693, "grad_norm": 0.013131141044655507, "learning_rate": 5.4228151785334425e-06, "loss": 0.0004, "step": 159830 }, { "epoch": 1.0515581929304028, "grad_norm": 0.024304578675597998, "learning_rate": 5.422243121164743e-06, "loss": 0.0023, "step": 159840 }, { "epoch": 1.051623981105636, "grad_norm": 0.013805651922398802, "learning_rate": 5.421671058229133e-06, "loss": 0.0005, "step": 159850 }, { "epoch": 1.0516897692808695, "grad_norm": 0.10942613822426871, "learning_rate": 5.421098989734155e-06, "loss": 0.001, "step": 159860 }, { "epoch": 1.0517555574561028, "grad_norm": 0.026066638614992334, "learning_rate": 5.42052691568735e-06, "loss": 0.0012, "step": 159870 }, { "epoch": 1.0518213456313363, "grad_norm": 0.00866140359151877, "learning_rate": 5.419954836096261e-06, "loss": 0.001, "step": 159880 }, { "epoch": 1.0518871338065696, "grad_norm": 0.006296806093041296, "learning_rate": 5.4193827509684285e-06, "loss": 0.0006, "step": 159890 }, { "epoch": 1.051952921981803, "grad_norm": 0.014136259326384286, "learning_rate": 5.418810660311398e-06, "loss": 0.0006, "step": 159900 }, { "epoch": 1.0520187101570364, "grad_norm": 0.06128845433135941, "learning_rate": 5.4182385641327095e-06, "loss": 0.0013, "step": 159910 }, { "epoch": 1.0520844983322697, "grad_norm": 0.04704947469645336, "learning_rate": 5.417666462439909e-06, "loss": 0.0035, "step": 159920 }, { "epoch": 1.0521502865075032, "grad_norm": 0.09609821637126868, "learning_rate": 5.4170943552405355e-06, "loss": 0.0003, "step": 159930 }, { "epoch": 1.0522160746827365, "grad_norm": 0.08328379931902107, "learning_rate": 5.416522242542134e-06, "loss": 0.0012, "step": 159940 }, { "epoch": 1.05228186285797, "grad_norm": 0.02026695621108798, "learning_rate": 5.415950124352245e-06, "loss": 0.001, "step": 159950 }, { "epoch": 1.0523476510332033, "grad_norm": 0.1431982758999851, "learning_rate": 5.415378000678413e-06, "loss": 0.0011, "step": 159960 }, { "epoch": 1.0524134392084368, "grad_norm": 0.0013790583996380142, "learning_rate": 5.414805871528182e-06, "loss": 0.0006, "step": 159970 }, { "epoch": 1.05247922738367, "grad_norm": 0.07305036822379676, "learning_rate": 5.414233736909093e-06, "loss": 0.0018, "step": 159980 }, { "epoch": 1.0525450155589033, "grad_norm": 0.024056543534917468, "learning_rate": 5.41366159682869e-06, "loss": 0.0011, "step": 159990 }, { "epoch": 1.0526108037341368, "grad_norm": 0.04809087453324477, "learning_rate": 5.413089451294517e-06, "loss": 0.0008, "step": 160000 }, { "epoch": 1.0526765919093701, "grad_norm": 0.0008652280970472512, "learning_rate": 5.412517300314115e-06, "loss": 0.0006, "step": 160010 }, { "epoch": 1.0527423800846036, "grad_norm": 0.03651219998233354, "learning_rate": 5.41194514389503e-06, "loss": 0.0005, "step": 160020 }, { "epoch": 1.052808168259837, "grad_norm": 0.07031812429827984, "learning_rate": 5.411372982044804e-06, "loss": 0.0016, "step": 160030 }, { "epoch": 1.0528739564350704, "grad_norm": 0.09743973485714533, "learning_rate": 5.410800814770979e-06, "loss": 0.0011, "step": 160040 }, { "epoch": 1.0529397446103037, "grad_norm": 0.007771888861237091, "learning_rate": 5.4102286420811015e-06, "loss": 0.001, "step": 160050 }, { "epoch": 1.0530055327855372, "grad_norm": 0.017497852534407433, "learning_rate": 5.4096564639827134e-06, "loss": 0.0005, "step": 160060 }, { "epoch": 1.0530713209607705, "grad_norm": 0.006415087102175489, "learning_rate": 5.409084280483358e-06, "loss": 0.0012, "step": 160070 }, { "epoch": 1.053137109136004, "grad_norm": 0.03980087929004221, "learning_rate": 5.40851209159058e-06, "loss": 0.0007, "step": 160080 }, { "epoch": 1.0532028973112373, "grad_norm": 0.0009634264338020149, "learning_rate": 5.407939897311922e-06, "loss": 0.0005, "step": 160090 }, { "epoch": 1.0532686854864706, "grad_norm": 0.02683946183820022, "learning_rate": 5.40736769765493e-06, "loss": 0.0003, "step": 160100 }, { "epoch": 1.053334473661704, "grad_norm": 0.047683983313134566, "learning_rate": 5.406795492627147e-06, "loss": 0.001, "step": 160110 }, { "epoch": 1.0534002618369374, "grad_norm": 0.014404246713761178, "learning_rate": 5.406223282236116e-06, "loss": 0.0006, "step": 160120 }, { "epoch": 1.0534660500121709, "grad_norm": 0.07801794820217703, "learning_rate": 5.4056510664893825e-06, "loss": 0.0011, "step": 160130 }, { "epoch": 1.0535318381874041, "grad_norm": 0.15662916232673268, "learning_rate": 5.405078845394488e-06, "loss": 0.0011, "step": 160140 }, { "epoch": 1.0535976263626377, "grad_norm": 0.005688414662742397, "learning_rate": 5.40450661895898e-06, "loss": 0.0008, "step": 160150 }, { "epoch": 1.053663414537871, "grad_norm": 0.027859206688163416, "learning_rate": 5.403934387190402e-06, "loss": 0.0005, "step": 160160 }, { "epoch": 1.0537292027131044, "grad_norm": 0.011088921199920125, "learning_rate": 5.403362150096298e-06, "loss": 0.0006, "step": 160170 }, { "epoch": 1.0537949908883377, "grad_norm": 0.008804773824078594, "learning_rate": 5.402789907684213e-06, "loss": 0.0004, "step": 160180 }, { "epoch": 1.053860779063571, "grad_norm": 0.03887786641685826, "learning_rate": 5.40221765996169e-06, "loss": 0.0005, "step": 160190 }, { "epoch": 1.0539265672388045, "grad_norm": 0.07039343396911502, "learning_rate": 5.4016454069362745e-06, "loss": 0.0006, "step": 160200 }, { "epoch": 1.0539923554140378, "grad_norm": 0.09702868861935857, "learning_rate": 5.401073148615511e-06, "loss": 0.0011, "step": 160210 }, { "epoch": 1.0540581435892713, "grad_norm": 0.11217195242159637, "learning_rate": 5.400500885006945e-06, "loss": 0.0012, "step": 160220 }, { "epoch": 1.0541239317645046, "grad_norm": 0.02658457873120795, "learning_rate": 5.399928616118121e-06, "loss": 0.0005, "step": 160230 }, { "epoch": 1.054189719939738, "grad_norm": 0.0027696301269977807, "learning_rate": 5.399356341956583e-06, "loss": 0.0009, "step": 160240 }, { "epoch": 1.0542555081149714, "grad_norm": 0.11161100871895888, "learning_rate": 5.3987840625298774e-06, "loss": 0.0006, "step": 160250 }, { "epoch": 1.0543212962902049, "grad_norm": 0.0007047488587190075, "learning_rate": 5.398211777845548e-06, "loss": 0.0006, "step": 160260 }, { "epoch": 1.0543870844654382, "grad_norm": 0.053497641827181384, "learning_rate": 5.3976394879111385e-06, "loss": 0.0012, "step": 160270 }, { "epoch": 1.0544528726406717, "grad_norm": 0.03746863615902983, "learning_rate": 5.397067192734198e-06, "loss": 0.0006, "step": 160280 }, { "epoch": 1.054518660815905, "grad_norm": 0.15594690805360778, "learning_rate": 5.3964948923222685e-06, "loss": 0.0006, "step": 160290 }, { "epoch": 1.0545844489911382, "grad_norm": 0.015429028982386853, "learning_rate": 5.3959225866828964e-06, "loss": 0.0006, "step": 160300 }, { "epoch": 1.0546502371663717, "grad_norm": 0.003026733264892661, "learning_rate": 5.395350275823628e-06, "loss": 0.0006, "step": 160310 }, { "epoch": 1.054716025341605, "grad_norm": 0.039585979299735244, "learning_rate": 5.394777959752006e-06, "loss": 0.0016, "step": 160320 }, { "epoch": 1.0547818135168385, "grad_norm": 0.012627246352425809, "learning_rate": 5.394205638475577e-06, "loss": 0.001, "step": 160330 }, { "epoch": 1.0548476016920718, "grad_norm": 0.00015863240309231606, "learning_rate": 5.393633312001889e-06, "loss": 0.0007, "step": 160340 }, { "epoch": 1.0549133898673053, "grad_norm": 0.02092375221167231, "learning_rate": 5.393060980338485e-06, "loss": 0.0004, "step": 160350 }, { "epoch": 1.0549791780425386, "grad_norm": 0.006772102752258413, "learning_rate": 5.392488643492911e-06, "loss": 0.0008, "step": 160360 }, { "epoch": 1.055044966217772, "grad_norm": 0.2860533771260064, "learning_rate": 5.3919163014727136e-06, "loss": 0.0012, "step": 160370 }, { "epoch": 1.0551107543930054, "grad_norm": 0.0048092961516789925, "learning_rate": 5.3913439542854395e-06, "loss": 0.0008, "step": 160380 }, { "epoch": 1.0551765425682387, "grad_norm": 0.00300083159722411, "learning_rate": 5.390771601938633e-06, "loss": 0.0007, "step": 160390 }, { "epoch": 1.0552423307434722, "grad_norm": 0.015389184791748304, "learning_rate": 5.390199244439838e-06, "loss": 0.0005, "step": 160400 }, { "epoch": 1.0553081189187055, "grad_norm": 0.017655268873019492, "learning_rate": 5.3896268817966044e-06, "loss": 0.0016, "step": 160410 }, { "epoch": 1.055373907093939, "grad_norm": 0.05842462642404037, "learning_rate": 5.389054514016477e-06, "loss": 0.0008, "step": 160420 }, { "epoch": 1.0554396952691723, "grad_norm": 0.025483854646341512, "learning_rate": 5.388482141107002e-06, "loss": 0.0007, "step": 160430 }, { "epoch": 1.0555054834444058, "grad_norm": 0.075532915318732, "learning_rate": 5.387909763075726e-06, "loss": 0.0008, "step": 160440 }, { "epoch": 1.055571271619639, "grad_norm": 0.025306538610811537, "learning_rate": 5.387337379930194e-06, "loss": 0.0006, "step": 160450 }, { "epoch": 1.0556370597948725, "grad_norm": 0.01618066125569774, "learning_rate": 5.386764991677953e-06, "loss": 0.0039, "step": 160460 }, { "epoch": 1.0557028479701058, "grad_norm": 0.033717888754565925, "learning_rate": 5.38619259832655e-06, "loss": 0.0021, "step": 160470 }, { "epoch": 1.0557686361453393, "grad_norm": 0.04782484460887559, "learning_rate": 5.3856201998835314e-06, "loss": 0.0012, "step": 160480 }, { "epoch": 1.0558344243205726, "grad_norm": 0.034156957821711395, "learning_rate": 5.385047796356443e-06, "loss": 0.0008, "step": 160490 }, { "epoch": 1.055900212495806, "grad_norm": 0.08895577846864369, "learning_rate": 5.384475387752832e-06, "loss": 0.0029, "step": 160500 }, { "epoch": 1.0559660006710394, "grad_norm": 0.04820834031941563, "learning_rate": 5.383902974080245e-06, "loss": 0.0016, "step": 160510 }, { "epoch": 1.0560317888462727, "grad_norm": 0.049497888886276244, "learning_rate": 5.38333055534623e-06, "loss": 0.0009, "step": 160520 }, { "epoch": 1.0560975770215062, "grad_norm": 0.020586752843375522, "learning_rate": 5.382758131558331e-06, "loss": 0.0007, "step": 160530 }, { "epoch": 1.0561633651967395, "grad_norm": 0.12207566940895229, "learning_rate": 5.382185702724098e-06, "loss": 0.0011, "step": 160540 }, { "epoch": 1.056229153371973, "grad_norm": 0.10837256656839551, "learning_rate": 5.3816132688510755e-06, "loss": 0.0012, "step": 160550 }, { "epoch": 1.0562949415472063, "grad_norm": 0.05561892093578148, "learning_rate": 5.381040829946812e-06, "loss": 0.0014, "step": 160560 }, { "epoch": 1.0563607297224398, "grad_norm": 0.0026686750367721414, "learning_rate": 5.380468386018855e-06, "loss": 0.0004, "step": 160570 }, { "epoch": 1.056426517897673, "grad_norm": 0.020079900296704596, "learning_rate": 5.37989593707475e-06, "loss": 0.0008, "step": 160580 }, { "epoch": 1.0564923060729066, "grad_norm": 0.039323180970912565, "learning_rate": 5.379323483122044e-06, "loss": 0.0007, "step": 160590 }, { "epoch": 1.0565580942481398, "grad_norm": 0.03620423886637662, "learning_rate": 5.378751024168287e-06, "loss": 0.0007, "step": 160600 }, { "epoch": 1.0566238824233731, "grad_norm": 0.05732392303539378, "learning_rate": 5.378178560221025e-06, "loss": 0.0006, "step": 160610 }, { "epoch": 1.0566896705986066, "grad_norm": 0.009484448555217097, "learning_rate": 5.377606091287806e-06, "loss": 0.0005, "step": 160620 }, { "epoch": 1.05675545877384, "grad_norm": 0.03332574399141102, "learning_rate": 5.377033617376175e-06, "loss": 0.0008, "step": 160630 }, { "epoch": 1.0568212469490734, "grad_norm": 0.01604879615832925, "learning_rate": 5.3764611384936825e-06, "loss": 0.0005, "step": 160640 }, { "epoch": 1.0568870351243067, "grad_norm": 0.03929298838956361, "learning_rate": 5.375888654647875e-06, "loss": 0.0009, "step": 160650 }, { "epoch": 1.0569528232995402, "grad_norm": 0.042738411087082255, "learning_rate": 5.3753161658462995e-06, "loss": 0.0006, "step": 160660 }, { "epoch": 1.0570186114747735, "grad_norm": 0.010452772358605573, "learning_rate": 5.3747436720965054e-06, "loss": 0.0013, "step": 160670 }, { "epoch": 1.057084399650007, "grad_norm": 0.03357803417265253, "learning_rate": 5.3741711734060385e-06, "loss": 0.0005, "step": 160680 }, { "epoch": 1.0571501878252403, "grad_norm": 0.002547966802095441, "learning_rate": 5.373598669782449e-06, "loss": 0.0007, "step": 160690 }, { "epoch": 1.0572159760004736, "grad_norm": 0.045611727016564886, "learning_rate": 5.3730261612332834e-06, "loss": 0.0017, "step": 160700 }, { "epoch": 1.057281764175707, "grad_norm": 0.16940827348539808, "learning_rate": 5.37245364776609e-06, "loss": 0.0014, "step": 160710 }, { "epoch": 1.0573475523509404, "grad_norm": 0.0063331007942740245, "learning_rate": 5.371881129388417e-06, "loss": 0.0002, "step": 160720 }, { "epoch": 1.0574133405261739, "grad_norm": 0.016148923390351837, "learning_rate": 5.371308606107811e-06, "loss": 0.0008, "step": 160730 }, { "epoch": 1.0574791287014071, "grad_norm": 0.05249648862213935, "learning_rate": 5.370736077931824e-06, "loss": 0.0027, "step": 160740 }, { "epoch": 1.0575449168766407, "grad_norm": 0.0035461113895420802, "learning_rate": 5.370163544868001e-06, "loss": 0.0006, "step": 160750 }, { "epoch": 1.057610705051874, "grad_norm": 0.022031518889209886, "learning_rate": 5.369591006923892e-06, "loss": 0.001, "step": 160760 }, { "epoch": 1.0576764932271074, "grad_norm": 0.005304925664700363, "learning_rate": 5.3690184641070445e-06, "loss": 0.0016, "step": 160770 }, { "epoch": 1.0577422814023407, "grad_norm": 0.10371937385516436, "learning_rate": 5.368445916425007e-06, "loss": 0.0008, "step": 160780 }, { "epoch": 1.0578080695775742, "grad_norm": 0.044320127840938674, "learning_rate": 5.367873363885326e-06, "loss": 0.0007, "step": 160790 }, { "epoch": 1.0578738577528075, "grad_norm": 0.08003929654114601, "learning_rate": 5.367300806495556e-06, "loss": 0.0011, "step": 160800 }, { "epoch": 1.0579396459280408, "grad_norm": 0.026662570996681685, "learning_rate": 5.366728244263241e-06, "loss": 0.0008, "step": 160810 }, { "epoch": 1.0580054341032743, "grad_norm": 0.11777204404437398, "learning_rate": 5.36615567719593e-06, "loss": 0.0019, "step": 160820 }, { "epoch": 1.0580712222785076, "grad_norm": 0.0024922444914106316, "learning_rate": 5.3655831053011735e-06, "loss": 0.0008, "step": 160830 }, { "epoch": 1.058137010453741, "grad_norm": 0.062148592701871314, "learning_rate": 5.36501052858652e-06, "loss": 0.0007, "step": 160840 }, { "epoch": 1.0582027986289744, "grad_norm": 0.008474135358401651, "learning_rate": 5.3644379470595154e-06, "loss": 0.0004, "step": 160850 }, { "epoch": 1.0582685868042079, "grad_norm": 0.04246130773695295, "learning_rate": 5.363865360727713e-06, "loss": 0.0007, "step": 160860 }, { "epoch": 1.0583343749794412, "grad_norm": 0.005875131220695099, "learning_rate": 5.36329276959866e-06, "loss": 0.0008, "step": 160870 }, { "epoch": 1.0584001631546747, "grad_norm": 0.00014195737336721774, "learning_rate": 5.362720173679906e-06, "loss": 0.0006, "step": 160880 }, { "epoch": 1.058465951329908, "grad_norm": 0.05920882208950876, "learning_rate": 5.362147572979e-06, "loss": 0.0012, "step": 160890 }, { "epoch": 1.0585317395051415, "grad_norm": 0.017052841258035824, "learning_rate": 5.36157496750349e-06, "loss": 0.0008, "step": 160900 }, { "epoch": 1.0585975276803747, "grad_norm": 0.05330636080596786, "learning_rate": 5.361002357260927e-06, "loss": 0.0006, "step": 160910 }, { "epoch": 1.058663315855608, "grad_norm": 0.13221591949943395, "learning_rate": 5.360429742258858e-06, "loss": 0.0005, "step": 160920 }, { "epoch": 1.0587291040308415, "grad_norm": 0.015570400178569813, "learning_rate": 5.359857122504836e-06, "loss": 0.0005, "step": 160930 }, { "epoch": 1.0587948922060748, "grad_norm": 0.1053230096678483, "learning_rate": 5.359284498006409e-06, "loss": 0.0015, "step": 160940 }, { "epoch": 1.0588606803813083, "grad_norm": 0.07176130959277341, "learning_rate": 5.358711868771125e-06, "loss": 0.0014, "step": 160950 }, { "epoch": 1.0589264685565416, "grad_norm": 0.022301787259344945, "learning_rate": 5.3581392348065365e-06, "loss": 0.0006, "step": 160960 }, { "epoch": 1.058992256731775, "grad_norm": 0.03803564802506006, "learning_rate": 5.357566596120189e-06, "loss": 0.0007, "step": 160970 }, { "epoch": 1.0590580449070084, "grad_norm": 0.07090945908012233, "learning_rate": 5.356993952719636e-06, "loss": 0.0009, "step": 160980 }, { "epoch": 1.059123833082242, "grad_norm": 0.023409569265956845, "learning_rate": 5.3564213046124254e-06, "loss": 0.0014, "step": 160990 }, { "epoch": 1.0591896212574752, "grad_norm": 0.09961445658758139, "learning_rate": 5.355848651806109e-06, "loss": 0.0009, "step": 161000 }, { "epoch": 1.0592554094327085, "grad_norm": 0.027130636031011287, "learning_rate": 5.3552759943082354e-06, "loss": 0.0006, "step": 161010 }, { "epoch": 1.059321197607942, "grad_norm": 0.013790632272544191, "learning_rate": 5.354703332126354e-06, "loss": 0.0012, "step": 161020 }, { "epoch": 1.0593869857831753, "grad_norm": 0.024640715169943893, "learning_rate": 5.354130665268015e-06, "loss": 0.0008, "step": 161030 }, { "epoch": 1.0594527739584088, "grad_norm": 0.03254441009692342, "learning_rate": 5.353557993740769e-06, "loss": 0.001, "step": 161040 }, { "epoch": 1.059518562133642, "grad_norm": 0.04629237371824398, "learning_rate": 5.3529853175521675e-06, "loss": 0.0008, "step": 161050 }, { "epoch": 1.0595843503088755, "grad_norm": 0.0023266147072901486, "learning_rate": 5.352412636709758e-06, "loss": 0.0007, "step": 161060 }, { "epoch": 1.0596501384841088, "grad_norm": 0.045729758772130334, "learning_rate": 5.351839951221094e-06, "loss": 0.0011, "step": 161070 }, { "epoch": 1.0597159266593423, "grad_norm": 0.004713528839088787, "learning_rate": 5.351267261093722e-06, "loss": 0.0011, "step": 161080 }, { "epoch": 1.0597817148345756, "grad_norm": 0.04969250166988579, "learning_rate": 5.350694566335196e-06, "loss": 0.0009, "step": 161090 }, { "epoch": 1.0598475030098091, "grad_norm": 0.002492255716884045, "learning_rate": 5.350121866953064e-06, "loss": 0.0016, "step": 161100 }, { "epoch": 1.0599132911850424, "grad_norm": 0.0017313914208781608, "learning_rate": 5.349549162954878e-06, "loss": 0.001, "step": 161110 }, { "epoch": 1.0599790793602757, "grad_norm": 0.04319811926961559, "learning_rate": 5.348976454348189e-06, "loss": 0.0009, "step": 161120 }, { "epoch": 1.0600448675355092, "grad_norm": 0.033121846901693695, "learning_rate": 5.348403741140547e-06, "loss": 0.0004, "step": 161130 }, { "epoch": 1.0601106557107425, "grad_norm": 0.001797521329217435, "learning_rate": 5.347831023339501e-06, "loss": 0.0005, "step": 161140 }, { "epoch": 1.060176443885976, "grad_norm": 0.05139679600514779, "learning_rate": 5.347258300952605e-06, "loss": 0.0008, "step": 161150 }, { "epoch": 1.0602422320612093, "grad_norm": 0.02041240491816817, "learning_rate": 5.346685573987406e-06, "loss": 0.0007, "step": 161160 }, { "epoch": 1.0603080202364428, "grad_norm": 0.03393337061465339, "learning_rate": 5.3461128424514585e-06, "loss": 0.0007, "step": 161170 }, { "epoch": 1.060373808411676, "grad_norm": 0.002717839996609562, "learning_rate": 5.345540106352312e-06, "loss": 0.0003, "step": 161180 }, { "epoch": 1.0604395965869096, "grad_norm": 0.016444736456611788, "learning_rate": 5.344967365697518e-06, "loss": 0.0008, "step": 161190 }, { "epoch": 1.0605053847621428, "grad_norm": 0.027518779182903533, "learning_rate": 5.344394620494627e-06, "loss": 0.0005, "step": 161200 }, { "epoch": 1.0605711729373763, "grad_norm": 0.005506483017612217, "learning_rate": 5.34382187075119e-06, "loss": 0.0012, "step": 161210 }, { "epoch": 1.0606369611126096, "grad_norm": 0.038207876384524796, "learning_rate": 5.343249116474759e-06, "loss": 0.0014, "step": 161220 }, { "epoch": 1.060702749287843, "grad_norm": 0.0023959084633371963, "learning_rate": 5.342676357672885e-06, "loss": 0.0007, "step": 161230 }, { "epoch": 1.0607685374630764, "grad_norm": 0.04139387235884214, "learning_rate": 5.342103594353119e-06, "loss": 0.0009, "step": 161240 }, { "epoch": 1.0608343256383097, "grad_norm": 0.10437308572373954, "learning_rate": 5.341530826523012e-06, "loss": 0.0006, "step": 161250 }, { "epoch": 1.0609001138135432, "grad_norm": 0.04228050482710745, "learning_rate": 5.340958054190116e-06, "loss": 0.0007, "step": 161260 }, { "epoch": 1.0609659019887765, "grad_norm": 0.028050544168412565, "learning_rate": 5.340385277361983e-06, "loss": 0.0003, "step": 161270 }, { "epoch": 1.06103169016401, "grad_norm": 0.06556106213104675, "learning_rate": 5.339812496046165e-06, "loss": 0.0007, "step": 161280 }, { "epoch": 1.0610974783392433, "grad_norm": 0.018282854999920276, "learning_rate": 5.33923971025021e-06, "loss": 0.0014, "step": 161290 }, { "epoch": 1.0611632665144768, "grad_norm": 0.0045202828619330055, "learning_rate": 5.338666919981676e-06, "loss": 0.0006, "step": 161300 }, { "epoch": 1.06122905468971, "grad_norm": 0.0447578833343679, "learning_rate": 5.338094125248109e-06, "loss": 0.0013, "step": 161310 }, { "epoch": 1.0612948428649434, "grad_norm": 0.05424765075066864, "learning_rate": 5.3375213260570635e-06, "loss": 0.0008, "step": 161320 }, { "epoch": 1.0613606310401769, "grad_norm": 0.025997921433468456, "learning_rate": 5.33694852241609e-06, "loss": 0.0013, "step": 161330 }, { "epoch": 1.0614264192154101, "grad_norm": 0.05268295832538347, "learning_rate": 5.3363757143327425e-06, "loss": 0.0007, "step": 161340 }, { "epoch": 1.0614922073906436, "grad_norm": 0.016832228447433862, "learning_rate": 5.3358029018145705e-06, "loss": 0.0012, "step": 161350 }, { "epoch": 1.061557995565877, "grad_norm": 0.008932174088377995, "learning_rate": 5.335230084869127e-06, "loss": 0.0012, "step": 161360 }, { "epoch": 1.0616237837411104, "grad_norm": 0.028914084271373607, "learning_rate": 5.334657263503966e-06, "loss": 0.0003, "step": 161370 }, { "epoch": 1.0616895719163437, "grad_norm": 0.017460680325924523, "learning_rate": 5.334084437726638e-06, "loss": 0.0012, "step": 161380 }, { "epoch": 1.0617553600915772, "grad_norm": 0.017096601493742133, "learning_rate": 5.333511607544693e-06, "loss": 0.0008, "step": 161390 }, { "epoch": 1.0618211482668105, "grad_norm": 0.04341534728753167, "learning_rate": 5.332938772965688e-06, "loss": 0.0008, "step": 161400 }, { "epoch": 1.0618869364420438, "grad_norm": 0.020306678116320052, "learning_rate": 5.332365933997172e-06, "loss": 0.0009, "step": 161410 }, { "epoch": 1.0619527246172773, "grad_norm": 0.028792787451510465, "learning_rate": 5.3317930906466974e-06, "loss": 0.0015, "step": 161420 }, { "epoch": 1.0620185127925106, "grad_norm": 0.36110655674658504, "learning_rate": 5.331220242921818e-06, "loss": 0.0007, "step": 161430 }, { "epoch": 1.062084300967744, "grad_norm": 0.04794796416344471, "learning_rate": 5.330647390830086e-06, "loss": 0.0006, "step": 161440 }, { "epoch": 1.0621500891429774, "grad_norm": 0.014102955641519147, "learning_rate": 5.330074534379054e-06, "loss": 0.0013, "step": 161450 }, { "epoch": 1.0622158773182109, "grad_norm": 0.18039006526252582, "learning_rate": 5.329501673576274e-06, "loss": 0.0012, "step": 161460 }, { "epoch": 1.0622816654934442, "grad_norm": 0.0040736342749729355, "learning_rate": 5.3289288084292986e-06, "loss": 0.0007, "step": 161470 }, { "epoch": 1.0623474536686777, "grad_norm": 0.019837265990066563, "learning_rate": 5.328355938945681e-06, "loss": 0.0004, "step": 161480 }, { "epoch": 1.062413241843911, "grad_norm": 0.021810473944759996, "learning_rate": 5.327783065132973e-06, "loss": 0.0003, "step": 161490 }, { "epoch": 1.0624790300191445, "grad_norm": 0.03206272313715295, "learning_rate": 5.3272101869987305e-06, "loss": 0.0006, "step": 161500 }, { "epoch": 1.0625448181943777, "grad_norm": 0.06473985036519506, "learning_rate": 5.3266373045505026e-06, "loss": 0.0007, "step": 161510 }, { "epoch": 1.0626106063696112, "grad_norm": 0.0198113150983781, "learning_rate": 5.326064417795846e-06, "loss": 0.0011, "step": 161520 }, { "epoch": 1.0626763945448445, "grad_norm": 0.14898137105626968, "learning_rate": 5.32549152674231e-06, "loss": 0.0005, "step": 161530 }, { "epoch": 1.0627421827200778, "grad_norm": 0.05027213341001536, "learning_rate": 5.324918631397449e-06, "loss": 0.0009, "step": 161540 }, { "epoch": 1.0628079708953113, "grad_norm": 0.04016423079269001, "learning_rate": 5.324345731768816e-06, "loss": 0.0009, "step": 161550 }, { "epoch": 1.0628737590705446, "grad_norm": 0.10740838078296018, "learning_rate": 5.323772827863966e-06, "loss": 0.0013, "step": 161560 }, { "epoch": 1.062939547245778, "grad_norm": 0.018010893084678046, "learning_rate": 5.323199919690452e-06, "loss": 0.0005, "step": 161570 }, { "epoch": 1.0630053354210114, "grad_norm": 0.03494445860382675, "learning_rate": 5.322627007255824e-06, "loss": 0.0025, "step": 161580 }, { "epoch": 1.063071123596245, "grad_norm": 0.0434988387136088, "learning_rate": 5.322054090567639e-06, "loss": 0.0009, "step": 161590 }, { "epoch": 1.0631369117714782, "grad_norm": 0.014277971787374204, "learning_rate": 5.321481169633448e-06, "loss": 0.0005, "step": 161600 }, { "epoch": 1.0632026999467117, "grad_norm": 0.03862271292757852, "learning_rate": 5.320908244460806e-06, "loss": 0.0016, "step": 161610 }, { "epoch": 1.063268488121945, "grad_norm": 0.029777937487007446, "learning_rate": 5.3203353150572645e-06, "loss": 0.0022, "step": 161620 }, { "epoch": 1.0633342762971782, "grad_norm": 0.07402042585351827, "learning_rate": 5.31976238143038e-06, "loss": 0.001, "step": 161630 }, { "epoch": 1.0634000644724118, "grad_norm": 0.004096504395869302, "learning_rate": 5.319189443587704e-06, "loss": 0.0007, "step": 161640 }, { "epoch": 1.063465852647645, "grad_norm": 0.1308548518605542, "learning_rate": 5.318616501536792e-06, "loss": 0.0011, "step": 161650 }, { "epoch": 1.0635316408228785, "grad_norm": 0.012464574510583535, "learning_rate": 5.318043555285196e-06, "loss": 0.0011, "step": 161660 }, { "epoch": 1.0635974289981118, "grad_norm": 0.01303839374449123, "learning_rate": 5.317470604840471e-06, "loss": 0.0011, "step": 161670 }, { "epoch": 1.0636632171733453, "grad_norm": 0.021876305823349384, "learning_rate": 5.316897650210168e-06, "loss": 0.0006, "step": 161680 }, { "epoch": 1.0637290053485786, "grad_norm": 0.019107510933200896, "learning_rate": 5.316324691401845e-06, "loss": 0.0007, "step": 161690 }, { "epoch": 1.0637947935238121, "grad_norm": 0.04524132254691398, "learning_rate": 5.315751728423054e-06, "loss": 0.0008, "step": 161700 }, { "epoch": 1.0638605816990454, "grad_norm": 0.056600139826229336, "learning_rate": 5.315178761281349e-06, "loss": 0.0007, "step": 161710 }, { "epoch": 1.0639263698742787, "grad_norm": 0.050587282812505664, "learning_rate": 5.314605789984285e-06, "loss": 0.0013, "step": 161720 }, { "epoch": 1.0639921580495122, "grad_norm": 0.002392868787208497, "learning_rate": 5.3140328145394135e-06, "loss": 0.0006, "step": 161730 }, { "epoch": 1.0640579462247455, "grad_norm": 0.02002068423056601, "learning_rate": 5.313459834954293e-06, "loss": 0.0005, "step": 161740 }, { "epoch": 1.064123734399979, "grad_norm": 0.03256197913625445, "learning_rate": 5.312886851236473e-06, "loss": 0.0007, "step": 161750 }, { "epoch": 1.0641895225752123, "grad_norm": 0.12483608734298036, "learning_rate": 5.312313863393511e-06, "loss": 0.0009, "step": 161760 }, { "epoch": 1.0642553107504458, "grad_norm": 0.009899044489816343, "learning_rate": 5.311740871432961e-06, "loss": 0.0013, "step": 161770 }, { "epoch": 1.064321098925679, "grad_norm": 0.02760542760055954, "learning_rate": 5.3111678753623765e-06, "loss": 0.0009, "step": 161780 }, { "epoch": 1.0643868871009126, "grad_norm": 0.00026855503424248143, "learning_rate": 5.3105948751893125e-06, "loss": 0.0013, "step": 161790 }, { "epoch": 1.0644526752761458, "grad_norm": 0.019736647318631527, "learning_rate": 5.310021870921323e-06, "loss": 0.0011, "step": 161800 }, { "epoch": 1.0645184634513793, "grad_norm": 0.11493373148439254, "learning_rate": 5.309448862565962e-06, "loss": 0.0006, "step": 161810 }, { "epoch": 1.0645842516266126, "grad_norm": 0.10823282967750963, "learning_rate": 5.308875850130786e-06, "loss": 0.0007, "step": 161820 }, { "epoch": 1.064650039801846, "grad_norm": 0.01896209053516669, "learning_rate": 5.308302833623348e-06, "loss": 0.0015, "step": 161830 }, { "epoch": 1.0647158279770794, "grad_norm": 0.01830364768885456, "learning_rate": 5.3077298130512045e-06, "loss": 0.0005, "step": 161840 }, { "epoch": 1.0647816161523127, "grad_norm": 0.05289281290472177, "learning_rate": 5.307156788421909e-06, "loss": 0.0009, "step": 161850 }, { "epoch": 1.0648474043275462, "grad_norm": 0.045071067990130734, "learning_rate": 5.306583759743016e-06, "loss": 0.0004, "step": 161860 }, { "epoch": 1.0649131925027795, "grad_norm": 0.06273531361425284, "learning_rate": 5.306010727022081e-06, "loss": 0.0008, "step": 161870 }, { "epoch": 1.064978980678013, "grad_norm": 0.0537837296766046, "learning_rate": 5.305437690266658e-06, "loss": 0.0003, "step": 161880 }, { "epoch": 1.0650447688532463, "grad_norm": 0.006365402010100897, "learning_rate": 5.304864649484304e-06, "loss": 0.0007, "step": 161890 }, { "epoch": 1.0651105570284798, "grad_norm": 0.07232481327539766, "learning_rate": 5.304291604682573e-06, "loss": 0.0008, "step": 161900 }, { "epoch": 1.065176345203713, "grad_norm": 0.008069087479100507, "learning_rate": 5.3037185558690204e-06, "loss": 0.0003, "step": 161910 }, { "epoch": 1.0652421333789466, "grad_norm": 0.014084117368898799, "learning_rate": 5.3031455030512e-06, "loss": 0.0008, "step": 161920 }, { "epoch": 1.0653079215541799, "grad_norm": 0.038156723010377085, "learning_rate": 5.302572446236668e-06, "loss": 0.0008, "step": 161930 }, { "epoch": 1.0653737097294131, "grad_norm": 0.03440285841654051, "learning_rate": 5.3019993854329795e-06, "loss": 0.0007, "step": 161940 }, { "epoch": 1.0654394979046466, "grad_norm": 0.04113078652086553, "learning_rate": 5.30142632064769e-06, "loss": 0.001, "step": 161950 }, { "epoch": 1.06550528607988, "grad_norm": 0.04296451751527866, "learning_rate": 5.300853251888356e-06, "loss": 0.0013, "step": 161960 }, { "epoch": 1.0655710742551134, "grad_norm": 0.0031076480803276696, "learning_rate": 5.3002801791625305e-06, "loss": 0.0007, "step": 161970 }, { "epoch": 1.0656368624303467, "grad_norm": 0.00901027703636552, "learning_rate": 5.299707102477771e-06, "loss": 0.0014, "step": 161980 }, { "epoch": 1.0657026506055802, "grad_norm": 0.02688412115893336, "learning_rate": 5.299134021841631e-06, "loss": 0.0006, "step": 161990 }, { "epoch": 1.0657684387808135, "grad_norm": 0.04766390386330669, "learning_rate": 5.298560937261668e-06, "loss": 0.0013, "step": 162000 }, { "epoch": 1.065834226956047, "grad_norm": 0.01219333129684923, "learning_rate": 5.297987848745436e-06, "loss": 0.0016, "step": 162010 }, { "epoch": 1.0659000151312803, "grad_norm": 0.009760657225361059, "learning_rate": 5.297414756300493e-06, "loss": 0.0016, "step": 162020 }, { "epoch": 1.0659658033065136, "grad_norm": 0.013840102052476296, "learning_rate": 5.296841659934392e-06, "loss": 0.0011, "step": 162030 }, { "epoch": 1.066031591481747, "grad_norm": 0.05105754564806035, "learning_rate": 5.296268559654692e-06, "loss": 0.0012, "step": 162040 }, { "epoch": 1.0660973796569804, "grad_norm": 0.032952650908670306, "learning_rate": 5.2956954554689455e-06, "loss": 0.0011, "step": 162050 }, { "epoch": 1.0661631678322139, "grad_norm": 0.053808600510492154, "learning_rate": 5.295122347384708e-06, "loss": 0.0004, "step": 162060 }, { "epoch": 1.0662289560074472, "grad_norm": 0.006579149448166952, "learning_rate": 5.2945492354095394e-06, "loss": 0.0019, "step": 162070 }, { "epoch": 1.0662947441826807, "grad_norm": 0.03326042318394717, "learning_rate": 5.293976119550994e-06, "loss": 0.0007, "step": 162080 }, { "epoch": 1.066360532357914, "grad_norm": 0.030993247289328636, "learning_rate": 5.2934029998166266e-06, "loss": 0.002, "step": 162090 }, { "epoch": 1.0664263205331475, "grad_norm": 0.028425217391951373, "learning_rate": 5.292829876213994e-06, "loss": 0.0006, "step": 162100 }, { "epoch": 1.0664921087083807, "grad_norm": 0.021275568222489216, "learning_rate": 5.292256748750653e-06, "loss": 0.0017, "step": 162110 }, { "epoch": 1.0665578968836142, "grad_norm": 0.05397803380263886, "learning_rate": 5.291683617434159e-06, "loss": 0.0009, "step": 162120 }, { "epoch": 1.0666236850588475, "grad_norm": 0.008531448818233419, "learning_rate": 5.291110482272067e-06, "loss": 0.0006, "step": 162130 }, { "epoch": 1.0666894732340808, "grad_norm": 0.006336161048878259, "learning_rate": 5.290537343271936e-06, "loss": 0.0007, "step": 162140 }, { "epoch": 1.0667552614093143, "grad_norm": 0.006326922491347474, "learning_rate": 5.289964200441321e-06, "loss": 0.0009, "step": 162150 }, { "epoch": 1.0668210495845476, "grad_norm": 0.0957637133517743, "learning_rate": 5.289391053787778e-06, "loss": 0.002, "step": 162160 }, { "epoch": 1.066886837759781, "grad_norm": 0.04730844823881888, "learning_rate": 5.288817903318865e-06, "loss": 0.0008, "step": 162170 }, { "epoch": 1.0669526259350144, "grad_norm": 0.0349266248863429, "learning_rate": 5.2882447490421366e-06, "loss": 0.001, "step": 162180 }, { "epoch": 1.067018414110248, "grad_norm": 0.02835732732846677, "learning_rate": 5.287671590965149e-06, "loss": 0.0009, "step": 162190 }, { "epoch": 1.0670842022854812, "grad_norm": 0.014177406712631988, "learning_rate": 5.2870984290954605e-06, "loss": 0.0005, "step": 162200 }, { "epoch": 1.0671499904607147, "grad_norm": 0.041863240857796635, "learning_rate": 5.286525263440628e-06, "loss": 0.0006, "step": 162210 }, { "epoch": 1.067215778635948, "grad_norm": 0.06485955265821289, "learning_rate": 5.285952094008208e-06, "loss": 0.0004, "step": 162220 }, { "epoch": 1.0672815668111815, "grad_norm": 0.023678887583890963, "learning_rate": 5.2853789208057545e-06, "loss": 0.0008, "step": 162230 }, { "epoch": 1.0673473549864148, "grad_norm": 0.025651281178582532, "learning_rate": 5.284805743840828e-06, "loss": 0.0006, "step": 162240 }, { "epoch": 1.067413143161648, "grad_norm": 0.005856233546228178, "learning_rate": 5.284232563120983e-06, "loss": 0.0008, "step": 162250 }, { "epoch": 1.0674789313368815, "grad_norm": 0.11713433305183966, "learning_rate": 5.283659378653777e-06, "loss": 0.0011, "step": 162260 }, { "epoch": 1.0675447195121148, "grad_norm": 0.006220308801152751, "learning_rate": 5.283086190446768e-06, "loss": 0.0002, "step": 162270 }, { "epoch": 1.0676105076873483, "grad_norm": 0.05433564912373384, "learning_rate": 5.282512998507512e-06, "loss": 0.0008, "step": 162280 }, { "epoch": 1.0676762958625816, "grad_norm": 0.038837241762048925, "learning_rate": 5.2819398028435656e-06, "loss": 0.0004, "step": 162290 }, { "epoch": 1.0677420840378151, "grad_norm": 0.08436033422161901, "learning_rate": 5.281366603462487e-06, "loss": 0.0008, "step": 162300 }, { "epoch": 1.0678078722130484, "grad_norm": 0.050660130894253294, "learning_rate": 5.280793400371832e-06, "loss": 0.0013, "step": 162310 }, { "epoch": 1.067873660388282, "grad_norm": 0.03374127842977237, "learning_rate": 5.280220193579158e-06, "loss": 0.0005, "step": 162320 }, { "epoch": 1.0679394485635152, "grad_norm": 0.022510358744956504, "learning_rate": 5.279646983092023e-06, "loss": 0.0009, "step": 162330 }, { "epoch": 1.0680052367387485, "grad_norm": 0.09046920379445629, "learning_rate": 5.279073768917985e-06, "loss": 0.0006, "step": 162340 }, { "epoch": 1.068071024913982, "grad_norm": 0.02133130335260429, "learning_rate": 5.278500551064599e-06, "loss": 0.002, "step": 162350 }, { "epoch": 1.0681368130892153, "grad_norm": 0.0810849678424068, "learning_rate": 5.277927329539425e-06, "loss": 0.0011, "step": 162360 }, { "epoch": 1.0682026012644488, "grad_norm": 0.012558826903612423, "learning_rate": 5.2773541043500195e-06, "loss": 0.0013, "step": 162370 }, { "epoch": 1.068268389439682, "grad_norm": 0.01515731599173145, "learning_rate": 5.27678087550394e-06, "loss": 0.0011, "step": 162380 }, { "epoch": 1.0683341776149156, "grad_norm": 0.09240521423305824, "learning_rate": 5.276207643008741e-06, "loss": 0.0014, "step": 162390 }, { "epoch": 1.0683999657901488, "grad_norm": 0.016580669100361243, "learning_rate": 5.2756344068719844e-06, "loss": 0.0005, "step": 162400 }, { "epoch": 1.0684657539653823, "grad_norm": 0.0996645171965567, "learning_rate": 5.275061167101226e-06, "loss": 0.0008, "step": 162410 }, { "epoch": 1.0685315421406156, "grad_norm": 0.03369586018043958, "learning_rate": 5.2744879237040245e-06, "loss": 0.0008, "step": 162420 }, { "epoch": 1.068597330315849, "grad_norm": 0.008735902753973818, "learning_rate": 5.273914676687937e-06, "loss": 0.0008, "step": 162430 }, { "epoch": 1.0686631184910824, "grad_norm": 0.07166970943721432, "learning_rate": 5.273341426060521e-06, "loss": 0.0009, "step": 162440 }, { "epoch": 1.0687289066663157, "grad_norm": 0.008099200191789575, "learning_rate": 5.272768171829332e-06, "loss": 0.0005, "step": 162450 }, { "epoch": 1.0687946948415492, "grad_norm": 0.0051159242413902645, "learning_rate": 5.272194914001932e-06, "loss": 0.0008, "step": 162460 }, { "epoch": 1.0688604830167825, "grad_norm": 0.012026818865578598, "learning_rate": 5.271621652585877e-06, "loss": 0.0004, "step": 162470 }, { "epoch": 1.068926271192016, "grad_norm": 0.006014599961198271, "learning_rate": 5.2710483875887255e-06, "loss": 0.0004, "step": 162480 }, { "epoch": 1.0689920593672493, "grad_norm": 0.0189748628629943, "learning_rate": 5.270475119018035e-06, "loss": 0.001, "step": 162490 }, { "epoch": 1.0690578475424828, "grad_norm": 0.03552978114887488, "learning_rate": 5.2699018468813635e-06, "loss": 0.0008, "step": 162500 }, { "epoch": 1.069123635717716, "grad_norm": 0.006403208935558259, "learning_rate": 5.269328571186269e-06, "loss": 0.0003, "step": 162510 }, { "epoch": 1.0691894238929496, "grad_norm": 0.07035347508896225, "learning_rate": 5.26875529194031e-06, "loss": 0.0012, "step": 162520 }, { "epoch": 1.0692552120681829, "grad_norm": 0.04127780586450799, "learning_rate": 5.268182009151045e-06, "loss": 0.0011, "step": 162530 }, { "epoch": 1.0693210002434164, "grad_norm": 0.011273101448342237, "learning_rate": 5.267608722826031e-06, "loss": 0.0011, "step": 162540 }, { "epoch": 1.0693867884186496, "grad_norm": 0.06982927793010593, "learning_rate": 5.267035432972828e-06, "loss": 0.0006, "step": 162550 }, { "epoch": 1.069452576593883, "grad_norm": 0.025195083710075096, "learning_rate": 5.266462139598993e-06, "loss": 0.0005, "step": 162560 }, { "epoch": 1.0695183647691164, "grad_norm": 0.012930803439684875, "learning_rate": 5.265888842712084e-06, "loss": 0.0005, "step": 162570 }, { "epoch": 1.0695841529443497, "grad_norm": 0.0012208051723331364, "learning_rate": 5.265315542319661e-06, "loss": 0.0007, "step": 162580 }, { "epoch": 1.0696499411195832, "grad_norm": 0.0851974689432814, "learning_rate": 5.264742238429282e-06, "loss": 0.0021, "step": 162590 }, { "epoch": 1.0697157292948165, "grad_norm": 0.01854900838007193, "learning_rate": 5.264168931048506e-06, "loss": 0.0007, "step": 162600 }, { "epoch": 1.06978151747005, "grad_norm": 0.0008712311033416705, "learning_rate": 5.263595620184889e-06, "loss": 0.0005, "step": 162610 }, { "epoch": 1.0698473056452833, "grad_norm": 0.0876395239019343, "learning_rate": 5.263022305845993e-06, "loss": 0.0007, "step": 162620 }, { "epoch": 1.0699130938205168, "grad_norm": 0.06977084788005107, "learning_rate": 5.262448988039374e-06, "loss": 0.0006, "step": 162630 }, { "epoch": 1.06997888199575, "grad_norm": 0.05013333219693627, "learning_rate": 5.261875666772592e-06, "loss": 0.0007, "step": 162640 }, { "epoch": 1.0700446701709834, "grad_norm": 0.41551054013254923, "learning_rate": 5.261302342053205e-06, "loss": 0.0016, "step": 162650 }, { "epoch": 1.0701104583462169, "grad_norm": 0.07630951088081082, "learning_rate": 5.2607290138887734e-06, "loss": 0.0006, "step": 162660 }, { "epoch": 1.0701762465214502, "grad_norm": 0.0375109521234081, "learning_rate": 5.2601556822868545e-06, "loss": 0.0011, "step": 162670 }, { "epoch": 1.0702420346966837, "grad_norm": 0.025782747999107434, "learning_rate": 5.259582347255008e-06, "loss": 0.0009, "step": 162680 }, { "epoch": 1.070307822871917, "grad_norm": 0.006389748173293388, "learning_rate": 5.259009008800791e-06, "loss": 0.0012, "step": 162690 }, { "epoch": 1.0703736110471505, "grad_norm": 0.034788146138863746, "learning_rate": 5.258435666931766e-06, "loss": 0.0007, "step": 162700 }, { "epoch": 1.0704393992223837, "grad_norm": 0.07577261072196245, "learning_rate": 5.257862321655488e-06, "loss": 0.0009, "step": 162710 }, { "epoch": 1.0705051873976172, "grad_norm": 0.015459342734924985, "learning_rate": 5.257288972979518e-06, "loss": 0.001, "step": 162720 }, { "epoch": 1.0705709755728505, "grad_norm": 0.011313724686985377, "learning_rate": 5.256715620911417e-06, "loss": 0.0013, "step": 162730 }, { "epoch": 1.0706367637480838, "grad_norm": 0.011233983685240924, "learning_rate": 5.256142265458741e-06, "loss": 0.0009, "step": 162740 }, { "epoch": 1.0707025519233173, "grad_norm": 0.24832826374068673, "learning_rate": 5.2555689066290505e-06, "loss": 0.0011, "step": 162750 }, { "epoch": 1.0707683400985506, "grad_norm": 0.26725509358682964, "learning_rate": 5.254995544429905e-06, "loss": 0.003, "step": 162760 }, { "epoch": 1.070834128273784, "grad_norm": 0.006902240937537352, "learning_rate": 5.254422178868863e-06, "loss": 0.0004, "step": 162770 }, { "epoch": 1.0708999164490174, "grad_norm": 0.0026347129443604045, "learning_rate": 5.2538488099534846e-06, "loss": 0.0007, "step": 162780 }, { "epoch": 1.070965704624251, "grad_norm": 0.046020753840731544, "learning_rate": 5.253275437691329e-06, "loss": 0.0016, "step": 162790 }, { "epoch": 1.0710314927994842, "grad_norm": 0.001459756136995386, "learning_rate": 5.252702062089955e-06, "loss": 0.0006, "step": 162800 }, { "epoch": 1.0710972809747177, "grad_norm": 0.07202751620637877, "learning_rate": 5.252128683156923e-06, "loss": 0.0008, "step": 162810 }, { "epoch": 1.071163069149951, "grad_norm": 0.04139526878971479, "learning_rate": 5.251555300899792e-06, "loss": 0.0009, "step": 162820 }, { "epoch": 1.0712288573251845, "grad_norm": 0.02655075122541, "learning_rate": 5.250981915326122e-06, "loss": 0.0008, "step": 162830 }, { "epoch": 1.0712946455004178, "grad_norm": 0.05995418749380157, "learning_rate": 5.250408526443471e-06, "loss": 0.0012, "step": 162840 }, { "epoch": 1.071360433675651, "grad_norm": 0.013037929802445468, "learning_rate": 5.249835134259402e-06, "loss": 0.0005, "step": 162850 }, { "epoch": 1.0714262218508845, "grad_norm": 0.09048077070163643, "learning_rate": 5.2492617387814715e-06, "loss": 0.0008, "step": 162860 }, { "epoch": 1.0714920100261178, "grad_norm": 0.027541559103089436, "learning_rate": 5.2486883400172395e-06, "loss": 0.0007, "step": 162870 }, { "epoch": 1.0715577982013513, "grad_norm": 0.006165733302200902, "learning_rate": 5.248114937974267e-06, "loss": 0.001, "step": 162880 }, { "epoch": 1.0716235863765846, "grad_norm": 0.03539747649419113, "learning_rate": 5.247541532660112e-06, "loss": 0.0019, "step": 162890 }, { "epoch": 1.0716893745518181, "grad_norm": 0.11622781367672202, "learning_rate": 5.2469681240823374e-06, "loss": 0.0007, "step": 162900 }, { "epoch": 1.0717551627270514, "grad_norm": 0.0047180570188388735, "learning_rate": 5.246394712248501e-06, "loss": 0.0005, "step": 162910 }, { "epoch": 1.071820950902285, "grad_norm": 0.017864404682890536, "learning_rate": 5.245821297166164e-06, "loss": 0.0011, "step": 162920 }, { "epoch": 1.0718867390775182, "grad_norm": 0.052001185126308115, "learning_rate": 5.245247878842884e-06, "loss": 0.0004, "step": 162930 }, { "epoch": 1.0719525272527517, "grad_norm": 0.07499654192838802, "learning_rate": 5.2446744572862225e-06, "loss": 0.0009, "step": 162940 }, { "epoch": 1.072018315427985, "grad_norm": 0.052168397621819206, "learning_rate": 5.244101032503741e-06, "loss": 0.0009, "step": 162950 }, { "epoch": 1.0720841036032183, "grad_norm": 0.01570799588046628, "learning_rate": 5.2435276045029974e-06, "loss": 0.0009, "step": 162960 }, { "epoch": 1.0721498917784518, "grad_norm": 0.05328547543015235, "learning_rate": 5.242954173291552e-06, "loss": 0.0014, "step": 162970 }, { "epoch": 1.072215679953685, "grad_norm": 0.00858280904048889, "learning_rate": 5.2423807388769664e-06, "loss": 0.0007, "step": 162980 }, { "epoch": 1.0722814681289186, "grad_norm": 0.025924894870555975, "learning_rate": 5.241807301266801e-06, "loss": 0.0006, "step": 162990 }, { "epoch": 1.0723472563041518, "grad_norm": 0.039406162399897304, "learning_rate": 5.241233860468613e-06, "loss": 0.0009, "step": 163000 }, { "epoch": 1.0724130444793853, "grad_norm": 0.06356156182560362, "learning_rate": 5.240660416489966e-06, "loss": 0.0007, "step": 163010 }, { "epoch": 1.0724788326546186, "grad_norm": 0.0012733841553935177, "learning_rate": 5.240086969338419e-06, "loss": 0.0009, "step": 163020 }, { "epoch": 1.0725446208298521, "grad_norm": 0.01636208633781228, "learning_rate": 5.239513519021533e-06, "loss": 0.0011, "step": 163030 }, { "epoch": 1.0726104090050854, "grad_norm": 0.07945426432566674, "learning_rate": 5.238940065546868e-06, "loss": 0.0014, "step": 163040 }, { "epoch": 1.0726761971803187, "grad_norm": 0.029767463761424456, "learning_rate": 5.238366608921986e-06, "loss": 0.0021, "step": 163050 }, { "epoch": 1.0727419853555522, "grad_norm": 0.01489483195068979, "learning_rate": 5.237793149154444e-06, "loss": 0.0005, "step": 163060 }, { "epoch": 1.0728077735307855, "grad_norm": 0.011594463816834436, "learning_rate": 5.237219686251805e-06, "loss": 0.0007, "step": 163070 }, { "epoch": 1.072873561706019, "grad_norm": 0.057035093775109576, "learning_rate": 5.236646220221631e-06, "loss": 0.0012, "step": 163080 }, { "epoch": 1.0729393498812523, "grad_norm": 0.050051428310122756, "learning_rate": 5.236072751071479e-06, "loss": 0.001, "step": 163090 }, { "epoch": 1.0730051380564858, "grad_norm": 0.05925512385748477, "learning_rate": 5.2354992788089125e-06, "loss": 0.0012, "step": 163100 }, { "epoch": 1.073070926231719, "grad_norm": 0.003898436755714744, "learning_rate": 5.2349258034414915e-06, "loss": 0.0006, "step": 163110 }, { "epoch": 1.0731367144069526, "grad_norm": 0.03931839370578632, "learning_rate": 5.234352324976777e-06, "loss": 0.001, "step": 163120 }, { "epoch": 1.0732025025821859, "grad_norm": 0.05204800868179757, "learning_rate": 5.233778843422329e-06, "loss": 0.0012, "step": 163130 }, { "epoch": 1.0732682907574194, "grad_norm": 0.02531457324319712, "learning_rate": 5.233205358785709e-06, "loss": 0.0012, "step": 163140 }, { "epoch": 1.0733340789326526, "grad_norm": 0.00946240380631874, "learning_rate": 5.232631871074476e-06, "loss": 0.0005, "step": 163150 }, { "epoch": 1.073399867107886, "grad_norm": 0.04081677037335288, "learning_rate": 5.232058380296194e-06, "loss": 0.0008, "step": 163160 }, { "epoch": 1.0734656552831194, "grad_norm": 0.021927333911062722, "learning_rate": 5.231484886458425e-06, "loss": 0.0022, "step": 163170 }, { "epoch": 1.0735314434583527, "grad_norm": 0.0008755086308460849, "learning_rate": 5.230911389568725e-06, "loss": 0.0009, "step": 163180 }, { "epoch": 1.0735972316335862, "grad_norm": 0.019131496789979847, "learning_rate": 5.230337889634658e-06, "loss": 0.0006, "step": 163190 }, { "epoch": 1.0736630198088195, "grad_norm": 0.022065356412926847, "learning_rate": 5.229764386663786e-06, "loss": 0.0008, "step": 163200 }, { "epoch": 1.073728807984053, "grad_norm": 0.019441757975209212, "learning_rate": 5.229190880663668e-06, "loss": 0.0004, "step": 163210 }, { "epoch": 1.0737945961592863, "grad_norm": 0.016634337352069123, "learning_rate": 5.228617371641865e-06, "loss": 0.0014, "step": 163220 }, { "epoch": 1.0738603843345198, "grad_norm": 0.05532352883467246, "learning_rate": 5.2280438596059425e-06, "loss": 0.0007, "step": 163230 }, { "epoch": 1.073926172509753, "grad_norm": 0.0217212103905082, "learning_rate": 5.227470344563457e-06, "loss": 0.0006, "step": 163240 }, { "epoch": 1.0739919606849866, "grad_norm": 0.001808355228630032, "learning_rate": 5.226896826521971e-06, "loss": 0.0008, "step": 163250 }, { "epoch": 1.0740577488602199, "grad_norm": 0.010403526924719816, "learning_rate": 5.226323305489048e-06, "loss": 0.0012, "step": 163260 }, { "epoch": 1.0741235370354532, "grad_norm": 0.05514547779921974, "learning_rate": 5.225749781472246e-06, "loss": 0.0013, "step": 163270 }, { "epoch": 1.0741893252106867, "grad_norm": 0.021468152628020567, "learning_rate": 5.225176254479128e-06, "loss": 0.0012, "step": 163280 }, { "epoch": 1.07425511338592, "grad_norm": 0.017154460482931674, "learning_rate": 5.224602724517256e-06, "loss": 0.0005, "step": 163290 }, { "epoch": 1.0743209015611535, "grad_norm": 0.02635539919490175, "learning_rate": 5.224029191594191e-06, "loss": 0.0012, "step": 163300 }, { "epoch": 1.0743866897363867, "grad_norm": 0.003011589605524495, "learning_rate": 5.223455655717495e-06, "loss": 0.0005, "step": 163310 }, { "epoch": 1.0744524779116202, "grad_norm": 0.0183050343905959, "learning_rate": 5.22288211689473e-06, "loss": 0.0008, "step": 163320 }, { "epoch": 1.0745182660868535, "grad_norm": 0.013087641227999646, "learning_rate": 5.222308575133455e-06, "loss": 0.0008, "step": 163330 }, { "epoch": 1.074584054262087, "grad_norm": 0.03727940074439768, "learning_rate": 5.221735030441235e-06, "loss": 0.0009, "step": 163340 }, { "epoch": 1.0746498424373203, "grad_norm": 0.029188326916911738, "learning_rate": 5.2211614828256275e-06, "loss": 0.0006, "step": 163350 }, { "epoch": 1.0747156306125536, "grad_norm": 0.0015776610447131808, "learning_rate": 5.2205879322941986e-06, "loss": 0.0018, "step": 163360 }, { "epoch": 1.074781418787787, "grad_norm": 0.010380572370042206, "learning_rate": 5.220014378854509e-06, "loss": 0.0006, "step": 163370 }, { "epoch": 1.0748472069630204, "grad_norm": 0.020088951923763632, "learning_rate": 5.219440822514119e-06, "loss": 0.0006, "step": 163380 }, { "epoch": 1.074912995138254, "grad_norm": 0.039248001068672565, "learning_rate": 5.218867263280593e-06, "loss": 0.0006, "step": 163390 }, { "epoch": 1.0749787833134872, "grad_norm": 0.015081691307105266, "learning_rate": 5.218293701161489e-06, "loss": 0.0006, "step": 163400 }, { "epoch": 1.0750445714887207, "grad_norm": 0.19956636926945887, "learning_rate": 5.21772013616437e-06, "loss": 0.0011, "step": 163410 }, { "epoch": 1.075110359663954, "grad_norm": 0.03592100415444498, "learning_rate": 5.217146568296801e-06, "loss": 0.0005, "step": 163420 }, { "epoch": 1.0751761478391875, "grad_norm": 0.0017825864255462205, "learning_rate": 5.216572997566343e-06, "loss": 0.0012, "step": 163430 }, { "epoch": 1.0752419360144208, "grad_norm": 0.05597617357275539, "learning_rate": 5.215999423980556e-06, "loss": 0.0011, "step": 163440 }, { "epoch": 1.075307724189654, "grad_norm": 0.06407902779387056, "learning_rate": 5.215425847547003e-06, "loss": 0.0009, "step": 163450 }, { "epoch": 1.0753735123648875, "grad_norm": 0.010429617075387386, "learning_rate": 5.214852268273246e-06, "loss": 0.001, "step": 163460 }, { "epoch": 1.0754393005401208, "grad_norm": 0.013779356160469136, "learning_rate": 5.214278686166848e-06, "loss": 0.0006, "step": 163470 }, { "epoch": 1.0755050887153543, "grad_norm": 0.014193218290686769, "learning_rate": 5.2137051012353705e-06, "loss": 0.002, "step": 163480 }, { "epoch": 1.0755708768905876, "grad_norm": 0.0589759788117845, "learning_rate": 5.213131513486374e-06, "loss": 0.0013, "step": 163490 }, { "epoch": 1.0756366650658211, "grad_norm": 0.08556514039988156, "learning_rate": 5.212557922927425e-06, "loss": 0.0011, "step": 163500 }, { "epoch": 1.0757024532410544, "grad_norm": 0.03513744550201754, "learning_rate": 5.211984329566084e-06, "loss": 0.0009, "step": 163510 }, { "epoch": 1.075768241416288, "grad_norm": 0.0023800650706586038, "learning_rate": 5.211410733409911e-06, "loss": 0.0035, "step": 163520 }, { "epoch": 1.0758340295915212, "grad_norm": 0.05464601417031175, "learning_rate": 5.21083713446647e-06, "loss": 0.0013, "step": 163530 }, { "epoch": 1.0758998177667547, "grad_norm": 0.02903067536632002, "learning_rate": 5.210263532743322e-06, "loss": 0.0005, "step": 163540 }, { "epoch": 1.075965605941988, "grad_norm": 0.004737354021179665, "learning_rate": 5.209689928248033e-06, "loss": 0.0005, "step": 163550 }, { "epoch": 1.0760313941172215, "grad_norm": 0.003184666350320033, "learning_rate": 5.209116320988164e-06, "loss": 0.0009, "step": 163560 }, { "epoch": 1.0760971822924548, "grad_norm": 0.03068515558539541, "learning_rate": 5.208542710971276e-06, "loss": 0.0011, "step": 163570 }, { "epoch": 1.076162970467688, "grad_norm": 0.061471512579584824, "learning_rate": 5.207969098204932e-06, "loss": 0.0009, "step": 163580 }, { "epoch": 1.0762287586429216, "grad_norm": 0.037959795430618413, "learning_rate": 5.2073954826966954e-06, "loss": 0.0007, "step": 163590 }, { "epoch": 1.0762945468181548, "grad_norm": 0.08041390467939581, "learning_rate": 5.2068218644541295e-06, "loss": 0.001, "step": 163600 }, { "epoch": 1.0763603349933883, "grad_norm": 0.028660158336119883, "learning_rate": 5.206248243484794e-06, "loss": 0.0007, "step": 163610 }, { "epoch": 1.0764261231686216, "grad_norm": 0.015391145473163956, "learning_rate": 5.2056746197962545e-06, "loss": 0.0016, "step": 163620 }, { "epoch": 1.0764919113438551, "grad_norm": 0.013260302230332597, "learning_rate": 5.205100993396073e-06, "loss": 0.0008, "step": 163630 }, { "epoch": 1.0765576995190884, "grad_norm": 0.0145291144668298, "learning_rate": 5.204527364291813e-06, "loss": 0.0008, "step": 163640 }, { "epoch": 1.076623487694322, "grad_norm": 0.0019365365947340652, "learning_rate": 5.203953732491035e-06, "loss": 0.0006, "step": 163650 }, { "epoch": 1.0766892758695552, "grad_norm": 0.04091362620527502, "learning_rate": 5.203380098001302e-06, "loss": 0.0007, "step": 163660 }, { "epoch": 1.0767550640447885, "grad_norm": 0.018133992146861993, "learning_rate": 5.20280646083018e-06, "loss": 0.0008, "step": 163670 }, { "epoch": 1.076820852220022, "grad_norm": 0.06850024349276423, "learning_rate": 5.202232820985229e-06, "loss": 0.0011, "step": 163680 }, { "epoch": 1.0768866403952553, "grad_norm": 0.02165036543022357, "learning_rate": 5.201659178474014e-06, "loss": 0.0007, "step": 163690 }, { "epoch": 1.0769524285704888, "grad_norm": 0.02633652271679468, "learning_rate": 5.201085533304096e-06, "loss": 0.0009, "step": 163700 }, { "epoch": 1.077018216745722, "grad_norm": 0.07540677113936958, "learning_rate": 5.200511885483039e-06, "loss": 0.0006, "step": 163710 }, { "epoch": 1.0770840049209556, "grad_norm": 0.019775717689925865, "learning_rate": 5.199938235018407e-06, "loss": 0.0021, "step": 163720 }, { "epoch": 1.0771497930961889, "grad_norm": 0.04412576380073528, "learning_rate": 5.199364581917761e-06, "loss": 0.0006, "step": 163730 }, { "epoch": 1.0772155812714224, "grad_norm": 0.14959044808945357, "learning_rate": 5.1987909261886635e-06, "loss": 0.0016, "step": 163740 }, { "epoch": 1.0772813694466556, "grad_norm": 0.04828685726503872, "learning_rate": 5.198217267838683e-06, "loss": 0.0006, "step": 163750 }, { "epoch": 1.077347157621889, "grad_norm": 0.04091450382270709, "learning_rate": 5.197643606875376e-06, "loss": 0.0006, "step": 163760 }, { "epoch": 1.0774129457971224, "grad_norm": 0.08913446221195112, "learning_rate": 5.19706994330631e-06, "loss": 0.0005, "step": 163770 }, { "epoch": 1.0774787339723557, "grad_norm": 0.07216038301413041, "learning_rate": 5.196496277139047e-06, "loss": 0.0021, "step": 163780 }, { "epoch": 1.0775445221475892, "grad_norm": 0.010066603037031564, "learning_rate": 5.195922608381151e-06, "loss": 0.001, "step": 163790 }, { "epoch": 1.0776103103228225, "grad_norm": 0.0751596796310961, "learning_rate": 5.195348937040183e-06, "loss": 0.001, "step": 163800 }, { "epoch": 1.077676098498056, "grad_norm": 0.05692089358939624, "learning_rate": 5.19477526312371e-06, "loss": 0.0008, "step": 163810 }, { "epoch": 1.0777418866732893, "grad_norm": 0.06463828892142362, "learning_rate": 5.194201586639293e-06, "loss": 0.0006, "step": 163820 }, { "epoch": 1.0778076748485228, "grad_norm": 0.10009477507730113, "learning_rate": 5.193627907594494e-06, "loss": 0.0008, "step": 163830 }, { "epoch": 1.077873463023756, "grad_norm": 0.0477332375129619, "learning_rate": 5.19305422599688e-06, "loss": 0.0012, "step": 163840 }, { "epoch": 1.0779392511989896, "grad_norm": 0.0023786743461468264, "learning_rate": 5.192480541854011e-06, "loss": 0.0006, "step": 163850 }, { "epoch": 1.0780050393742229, "grad_norm": 0.10207885276283413, "learning_rate": 5.191906855173453e-06, "loss": 0.0009, "step": 163860 }, { "epoch": 1.0780708275494564, "grad_norm": 0.0005599943047700922, "learning_rate": 5.1913331659627695e-06, "loss": 0.001, "step": 163870 }, { "epoch": 1.0781366157246897, "grad_norm": 0.012984018174726717, "learning_rate": 5.190759474229523e-06, "loss": 0.0004, "step": 163880 }, { "epoch": 1.078202403899923, "grad_norm": 0.10441683383284082, "learning_rate": 5.190185779981278e-06, "loss": 0.001, "step": 163890 }, { "epoch": 1.0782681920751565, "grad_norm": 0.0016926336417427577, "learning_rate": 5.189612083225598e-06, "loss": 0.001, "step": 163900 }, { "epoch": 1.0783339802503897, "grad_norm": 0.0401624381876567, "learning_rate": 5.189038383970046e-06, "loss": 0.0005, "step": 163910 }, { "epoch": 1.0783997684256232, "grad_norm": 0.008392715482785869, "learning_rate": 5.188464682222185e-06, "loss": 0.0006, "step": 163920 }, { "epoch": 1.0784655566008565, "grad_norm": 0.08301131905756977, "learning_rate": 5.187890977989581e-06, "loss": 0.0003, "step": 163930 }, { "epoch": 1.07853134477609, "grad_norm": 0.027443570178759726, "learning_rate": 5.187317271279797e-06, "loss": 0.0012, "step": 163940 }, { "epoch": 1.0785971329513233, "grad_norm": 0.03651240840098988, "learning_rate": 5.1867435621003955e-06, "loss": 0.0009, "step": 163950 }, { "epoch": 1.0786629211265568, "grad_norm": 0.02260360614599594, "learning_rate": 5.186169850458943e-06, "loss": 0.0008, "step": 163960 }, { "epoch": 1.07872870930179, "grad_norm": 0.032147590914470156, "learning_rate": 5.185596136363001e-06, "loss": 0.0011, "step": 163970 }, { "epoch": 1.0787944974770234, "grad_norm": 0.009968986175238429, "learning_rate": 5.185022419820135e-06, "loss": 0.0018, "step": 163980 }, { "epoch": 1.078860285652257, "grad_norm": 0.07816367491630069, "learning_rate": 5.184448700837906e-06, "loss": 0.0011, "step": 163990 }, { "epoch": 1.0789260738274902, "grad_norm": 0.0858649375445773, "learning_rate": 5.183874979423882e-06, "loss": 0.0009, "step": 164000 }, { "epoch": 1.0789918620027237, "grad_norm": 0.06647603532168853, "learning_rate": 5.183301255585624e-06, "loss": 0.0006, "step": 164010 }, { "epoch": 1.079057650177957, "grad_norm": 0.07843118294794345, "learning_rate": 5.182727529330699e-06, "loss": 0.0007, "step": 164020 }, { "epoch": 1.0791234383531905, "grad_norm": 0.009938342493406457, "learning_rate": 5.1821538006666685e-06, "loss": 0.0008, "step": 164030 }, { "epoch": 1.0791892265284238, "grad_norm": 0.00951896159428454, "learning_rate": 5.181580069601098e-06, "loss": 0.001, "step": 164040 }, { "epoch": 1.0792550147036573, "grad_norm": 0.028895529982793475, "learning_rate": 5.181006336141549e-06, "loss": 0.0007, "step": 164050 }, { "epoch": 1.0793208028788905, "grad_norm": 0.03328114627159852, "learning_rate": 5.180432600295589e-06, "loss": 0.0011, "step": 164060 }, { "epoch": 1.0793865910541238, "grad_norm": 0.014826403197010595, "learning_rate": 5.179858862070781e-06, "loss": 0.0005, "step": 164070 }, { "epoch": 1.0794523792293573, "grad_norm": 0.015687206268981173, "learning_rate": 5.179285121474689e-06, "loss": 0.0014, "step": 164080 }, { "epoch": 1.0795181674045906, "grad_norm": 0.008884003996328275, "learning_rate": 5.178711378514879e-06, "loss": 0.0007, "step": 164090 }, { "epoch": 1.0795839555798241, "grad_norm": 0.030951050552459607, "learning_rate": 5.178137633198913e-06, "loss": 0.0004, "step": 164100 }, { "epoch": 1.0796497437550574, "grad_norm": 0.1549239224754585, "learning_rate": 5.177563885534357e-06, "loss": 0.0005, "step": 164110 }, { "epoch": 1.079715531930291, "grad_norm": 0.0350913962959884, "learning_rate": 5.176990135528772e-06, "loss": 0.0006, "step": 164120 }, { "epoch": 1.0797813201055242, "grad_norm": 0.0313000622992266, "learning_rate": 5.176416383189726e-06, "loss": 0.0008, "step": 164130 }, { "epoch": 1.0798471082807577, "grad_norm": 0.004753092720566955, "learning_rate": 5.175842628524783e-06, "loss": 0.0004, "step": 164140 }, { "epoch": 1.079912896455991, "grad_norm": 0.038616862325837005, "learning_rate": 5.175268871541507e-06, "loss": 0.0007, "step": 164150 }, { "epoch": 1.0799786846312245, "grad_norm": 0.01371488385247804, "learning_rate": 5.174695112247462e-06, "loss": 0.0006, "step": 164160 }, { "epoch": 1.0800444728064578, "grad_norm": 0.05074042341073687, "learning_rate": 5.174121350650213e-06, "loss": 0.0009, "step": 164170 }, { "epoch": 1.080110260981691, "grad_norm": 0.001754668350005322, "learning_rate": 5.173547586757324e-06, "loss": 0.0002, "step": 164180 }, { "epoch": 1.0801760491569246, "grad_norm": 0.05581485866797598, "learning_rate": 5.172973820576359e-06, "loss": 0.0005, "step": 164190 }, { "epoch": 1.0802418373321578, "grad_norm": 0.04153560162979648, "learning_rate": 5.172400052114885e-06, "loss": 0.0006, "step": 164200 }, { "epoch": 1.0803076255073913, "grad_norm": 0.019634967619023544, "learning_rate": 5.171826281380465e-06, "loss": 0.0006, "step": 164210 }, { "epoch": 1.0803734136826246, "grad_norm": 0.10716606716189057, "learning_rate": 5.171252508380664e-06, "loss": 0.0009, "step": 164220 }, { "epoch": 1.0804392018578581, "grad_norm": 0.0864971626945556, "learning_rate": 5.170678733123047e-06, "loss": 0.0005, "step": 164230 }, { "epoch": 1.0805049900330914, "grad_norm": 0.07104946112986602, "learning_rate": 5.170104955615177e-06, "loss": 0.0011, "step": 164240 }, { "epoch": 1.080570778208325, "grad_norm": 0.04279827304865527, "learning_rate": 5.169531175864619e-06, "loss": 0.0009, "step": 164250 }, { "epoch": 1.0806365663835582, "grad_norm": 0.011778595226689894, "learning_rate": 5.168957393878941e-06, "loss": 0.0009, "step": 164260 }, { "epoch": 1.0807023545587917, "grad_norm": 0.00013740086416110534, "learning_rate": 5.168383609665706e-06, "loss": 0.0006, "step": 164270 }, { "epoch": 1.080768142734025, "grad_norm": 0.01063013750665076, "learning_rate": 5.167809823232477e-06, "loss": 0.001, "step": 164280 }, { "epoch": 1.0808339309092583, "grad_norm": 0.039572358658525805, "learning_rate": 5.167236034586822e-06, "loss": 0.0008, "step": 164290 }, { "epoch": 1.0808997190844918, "grad_norm": 0.0987937151729066, "learning_rate": 5.166662243736303e-06, "loss": 0.0007, "step": 164300 }, { "epoch": 1.080965507259725, "grad_norm": 0.025735063050679418, "learning_rate": 5.166088450688486e-06, "loss": 0.0007, "step": 164310 }, { "epoch": 1.0810312954349586, "grad_norm": 0.005041230573920746, "learning_rate": 5.165514655450937e-06, "loss": 0.0007, "step": 164320 }, { "epoch": 1.0810970836101919, "grad_norm": 0.025671557646179035, "learning_rate": 5.16494085803122e-06, "loss": 0.0008, "step": 164330 }, { "epoch": 1.0811628717854254, "grad_norm": 0.08051280031928534, "learning_rate": 5.164367058436901e-06, "loss": 0.0012, "step": 164340 }, { "epoch": 1.0812286599606586, "grad_norm": 0.030722986985028514, "learning_rate": 5.163793256675543e-06, "loss": 0.0011, "step": 164350 }, { "epoch": 1.0812944481358922, "grad_norm": 0.045615380200458265, "learning_rate": 5.163219452754714e-06, "loss": 0.0006, "step": 164360 }, { "epoch": 1.0813602363111254, "grad_norm": 0.0025410724753647303, "learning_rate": 5.162645646681976e-06, "loss": 0.0007, "step": 164370 }, { "epoch": 1.0814260244863587, "grad_norm": 0.07159775196180614, "learning_rate": 5.162071838464896e-06, "loss": 0.0007, "step": 164380 }, { "epoch": 1.0814918126615922, "grad_norm": 0.0654688579404023, "learning_rate": 5.16149802811104e-06, "loss": 0.0006, "step": 164390 }, { "epoch": 1.0815576008368255, "grad_norm": 0.020634087272891052, "learning_rate": 5.16092421562797e-06, "loss": 0.0008, "step": 164400 }, { "epoch": 1.081623389012059, "grad_norm": 0.03149954268772766, "learning_rate": 5.160350401023254e-06, "loss": 0.0008, "step": 164410 }, { "epoch": 1.0816891771872923, "grad_norm": 0.020651538132109012, "learning_rate": 5.1597765843044566e-06, "loss": 0.0005, "step": 164420 }, { "epoch": 1.0817549653625258, "grad_norm": 0.015561750857887639, "learning_rate": 5.159202765479143e-06, "loss": 0.0006, "step": 164430 }, { "epoch": 1.081820753537759, "grad_norm": 0.0005176926099727697, "learning_rate": 5.158628944554878e-06, "loss": 0.0006, "step": 164440 }, { "epoch": 1.0818865417129926, "grad_norm": 0.061705760082453695, "learning_rate": 5.158055121539227e-06, "loss": 0.0022, "step": 164450 }, { "epoch": 1.0819523298882259, "grad_norm": 0.0031633928564311136, "learning_rate": 5.157481296439757e-06, "loss": 0.0012, "step": 164460 }, { "epoch": 1.0820181180634594, "grad_norm": 0.0008289766246505631, "learning_rate": 5.156907469264032e-06, "loss": 0.0006, "step": 164470 }, { "epoch": 1.0820839062386927, "grad_norm": 0.010892894206938954, "learning_rate": 5.156333640019617e-06, "loss": 0.0018, "step": 164480 }, { "epoch": 1.082149694413926, "grad_norm": 0.06517404263166203, "learning_rate": 5.155759808714078e-06, "loss": 0.0017, "step": 164490 }, { "epoch": 1.0822154825891594, "grad_norm": 0.036414230468040786, "learning_rate": 5.15518597535498e-06, "loss": 0.001, "step": 164500 }, { "epoch": 1.0822812707643927, "grad_norm": 0.16333281859056858, "learning_rate": 5.15461213994989e-06, "loss": 0.0017, "step": 164510 }, { "epoch": 1.0823470589396262, "grad_norm": 0.03287966163268218, "learning_rate": 5.154038302506371e-06, "loss": 0.0011, "step": 164520 }, { "epoch": 1.0824128471148595, "grad_norm": 0.0007733100290989065, "learning_rate": 5.153464463031992e-06, "loss": 0.0008, "step": 164530 }, { "epoch": 1.082478635290093, "grad_norm": 0.015456284060356436, "learning_rate": 5.152890621534315e-06, "loss": 0.0004, "step": 164540 }, { "epoch": 1.0825444234653263, "grad_norm": 0.016411258191666243, "learning_rate": 5.1523167780209085e-06, "loss": 0.0014, "step": 164550 }, { "epoch": 1.0826102116405598, "grad_norm": 0.015356040697522569, "learning_rate": 5.151742932499335e-06, "loss": 0.0012, "step": 164560 }, { "epoch": 1.082675999815793, "grad_norm": 0.03278156179624854, "learning_rate": 5.151169084977163e-06, "loss": 0.0003, "step": 164570 }, { "epoch": 1.0827417879910266, "grad_norm": 0.025359180574758767, "learning_rate": 5.150595235461957e-06, "loss": 0.0005, "step": 164580 }, { "epoch": 1.0828075761662599, "grad_norm": 0.03965823162579665, "learning_rate": 5.150021383961283e-06, "loss": 0.0012, "step": 164590 }, { "epoch": 1.0828733643414932, "grad_norm": 0.2906732950491834, "learning_rate": 5.149447530482708e-06, "loss": 0.0009, "step": 164600 }, { "epoch": 1.0829391525167267, "grad_norm": 0.015968299596136184, "learning_rate": 5.148873675033794e-06, "loss": 0.0012, "step": 164610 }, { "epoch": 1.08300494069196, "grad_norm": 0.04912880300376133, "learning_rate": 5.148299817622111e-06, "loss": 0.0009, "step": 164620 }, { "epoch": 1.0830707288671935, "grad_norm": 0.02746943244747312, "learning_rate": 5.147725958255222e-06, "loss": 0.0011, "step": 164630 }, { "epoch": 1.0831365170424267, "grad_norm": 0.08262153530351317, "learning_rate": 5.147152096940693e-06, "loss": 0.0008, "step": 164640 }, { "epoch": 1.0832023052176603, "grad_norm": 0.0018572048113110603, "learning_rate": 5.146578233686092e-06, "loss": 0.0005, "step": 164650 }, { "epoch": 1.0832680933928935, "grad_norm": 0.02404733915461315, "learning_rate": 5.146004368498984e-06, "loss": 0.0006, "step": 164660 }, { "epoch": 1.083333881568127, "grad_norm": 0.0013553187284364966, "learning_rate": 5.1454305013869334e-06, "loss": 0.0011, "step": 164670 }, { "epoch": 1.0833996697433603, "grad_norm": 0.1375740097673517, "learning_rate": 5.144856632357508e-06, "loss": 0.0011, "step": 164680 }, { "epoch": 1.0834654579185936, "grad_norm": 0.00850983200812231, "learning_rate": 5.144282761418272e-06, "loss": 0.0007, "step": 164690 }, { "epoch": 1.0835312460938271, "grad_norm": 0.010027694608170398, "learning_rate": 5.143708888576793e-06, "loss": 0.0011, "step": 164700 }, { "epoch": 1.0835970342690604, "grad_norm": 0.03268892302815872, "learning_rate": 5.143135013840637e-06, "loss": 0.0022, "step": 164710 }, { "epoch": 1.083662822444294, "grad_norm": 0.02514127843726163, "learning_rate": 5.142561137217369e-06, "loss": 0.001, "step": 164720 }, { "epoch": 1.0837286106195272, "grad_norm": 0.011979508426966531, "learning_rate": 5.141987258714555e-06, "loss": 0.0007, "step": 164730 }, { "epoch": 1.0837943987947607, "grad_norm": 0.03948371936119242, "learning_rate": 5.141413378339762e-06, "loss": 0.0007, "step": 164740 }, { "epoch": 1.083860186969994, "grad_norm": 0.02388098576924765, "learning_rate": 5.140839496100556e-06, "loss": 0.0006, "step": 164750 }, { "epoch": 1.0839259751452275, "grad_norm": 0.013331499083344302, "learning_rate": 5.140265612004504e-06, "loss": 0.0015, "step": 164760 }, { "epoch": 1.0839917633204608, "grad_norm": 0.0738936914941083, "learning_rate": 5.1396917260591694e-06, "loss": 0.001, "step": 164770 }, { "epoch": 1.084057551495694, "grad_norm": 0.041109672024925555, "learning_rate": 5.1391178382721205e-06, "loss": 0.0013, "step": 164780 }, { "epoch": 1.0841233396709276, "grad_norm": 0.10186370384021223, "learning_rate": 5.1385439486509226e-06, "loss": 0.0006, "step": 164790 }, { "epoch": 1.0841891278461608, "grad_norm": 0.008588670185422684, "learning_rate": 5.137970057203143e-06, "loss": 0.0005, "step": 164800 }, { "epoch": 1.0842549160213943, "grad_norm": 0.0012811227146973267, "learning_rate": 5.137396163936348e-06, "loss": 0.0008, "step": 164810 }, { "epoch": 1.0843207041966276, "grad_norm": 0.02750667611934708, "learning_rate": 5.1368222688581015e-06, "loss": 0.0002, "step": 164820 }, { "epoch": 1.0843864923718611, "grad_norm": 0.05430825903667609, "learning_rate": 5.136248371975973e-06, "loss": 0.0008, "step": 164830 }, { "epoch": 1.0844522805470944, "grad_norm": 0.08513284305967322, "learning_rate": 5.135674473297526e-06, "loss": 0.0006, "step": 164840 }, { "epoch": 1.084518068722328, "grad_norm": 0.09784514983265513, "learning_rate": 5.13510057283033e-06, "loss": 0.0008, "step": 164850 }, { "epoch": 1.0845838568975612, "grad_norm": 0.0013339191830443044, "learning_rate": 5.134526670581948e-06, "loss": 0.0014, "step": 164860 }, { "epoch": 1.0846496450727947, "grad_norm": 0.009854878823694284, "learning_rate": 5.133952766559948e-06, "loss": 0.0008, "step": 164870 }, { "epoch": 1.084715433248028, "grad_norm": 0.004924156478794955, "learning_rate": 5.133378860771897e-06, "loss": 0.0005, "step": 164880 }, { "epoch": 1.0847812214232615, "grad_norm": 0.0693606213541128, "learning_rate": 5.132804953225361e-06, "loss": 0.0005, "step": 164890 }, { "epoch": 1.0848470095984948, "grad_norm": 0.08433140176853858, "learning_rate": 5.132231043927907e-06, "loss": 0.0008, "step": 164900 }, { "epoch": 1.084912797773728, "grad_norm": 0.04800025896331953, "learning_rate": 5.1316571328871004e-06, "loss": 0.0007, "step": 164910 }, { "epoch": 1.0849785859489616, "grad_norm": 0.042199588755037785, "learning_rate": 5.1310832201105086e-06, "loss": 0.0004, "step": 164920 }, { "epoch": 1.0850443741241949, "grad_norm": 0.07558515778312427, "learning_rate": 5.130509305605698e-06, "loss": 0.0009, "step": 164930 }, { "epoch": 1.0851101622994284, "grad_norm": 0.018535293827166188, "learning_rate": 5.129935389380234e-06, "loss": 0.0004, "step": 164940 }, { "epoch": 1.0851759504746616, "grad_norm": 0.0074316443821933155, "learning_rate": 5.129361471441683e-06, "loss": 0.0004, "step": 164950 }, { "epoch": 1.0852417386498951, "grad_norm": 0.04685613833327229, "learning_rate": 5.128787551797614e-06, "loss": 0.0006, "step": 164960 }, { "epoch": 1.0853075268251284, "grad_norm": 0.04797524989636296, "learning_rate": 5.128213630455593e-06, "loss": 0.0004, "step": 164970 }, { "epoch": 1.085373315000362, "grad_norm": 0.01669814286530076, "learning_rate": 5.127639707423185e-06, "loss": 0.0008, "step": 164980 }, { "epoch": 1.0854391031755952, "grad_norm": 0.038135902422444475, "learning_rate": 5.127065782707959e-06, "loss": 0.0006, "step": 164990 }, { "epoch": 1.0855048913508285, "grad_norm": 0.030933094739036035, "learning_rate": 5.126491856317479e-06, "loss": 0.0007, "step": 165000 }, { "epoch": 1.085570679526062, "grad_norm": 0.04847098759988038, "learning_rate": 5.125917928259313e-06, "loss": 0.0005, "step": 165010 }, { "epoch": 1.0856364677012953, "grad_norm": 0.10023992325747995, "learning_rate": 5.125343998541029e-06, "loss": 0.0016, "step": 165020 }, { "epoch": 1.0857022558765288, "grad_norm": 0.1303520906152932, "learning_rate": 5.124770067170191e-06, "loss": 0.0009, "step": 165030 }, { "epoch": 1.085768044051762, "grad_norm": 0.02505117851022972, "learning_rate": 5.12419613415437e-06, "loss": 0.0007, "step": 165040 }, { "epoch": 1.0858338322269956, "grad_norm": 0.06768729479764916, "learning_rate": 5.123622199501129e-06, "loss": 0.0008, "step": 165050 }, { "epoch": 1.0858996204022289, "grad_norm": 0.020292154905428725, "learning_rate": 5.123048263218036e-06, "loss": 0.0006, "step": 165060 }, { "epoch": 1.0859654085774624, "grad_norm": 0.004587964528958148, "learning_rate": 5.122474325312657e-06, "loss": 0.0012, "step": 165070 }, { "epoch": 1.0860311967526957, "grad_norm": 0.07658416954621394, "learning_rate": 5.121900385792561e-06, "loss": 0.0012, "step": 165080 }, { "epoch": 1.086096984927929, "grad_norm": 0.029237368523996013, "learning_rate": 5.121326444665313e-06, "loss": 0.002, "step": 165090 }, { "epoch": 1.0861627731031624, "grad_norm": 0.15463064819363107, "learning_rate": 5.120752501938481e-06, "loss": 0.0013, "step": 165100 }, { "epoch": 1.0862285612783957, "grad_norm": 0.03990319615453527, "learning_rate": 5.1201785576196314e-06, "loss": 0.0011, "step": 165110 }, { "epoch": 1.0862943494536292, "grad_norm": 0.04799209628913956, "learning_rate": 5.1196046117163315e-06, "loss": 0.0007, "step": 165120 }, { "epoch": 1.0863601376288625, "grad_norm": 0.013559828434167224, "learning_rate": 5.1190306642361484e-06, "loss": 0.0007, "step": 165130 }, { "epoch": 1.086425925804096, "grad_norm": 0.023107888944488536, "learning_rate": 5.118456715186647e-06, "loss": 0.0004, "step": 165140 }, { "epoch": 1.0864917139793293, "grad_norm": 0.05655956323187836, "learning_rate": 5.117882764575398e-06, "loss": 0.0006, "step": 165150 }, { "epoch": 1.0865575021545628, "grad_norm": 0.026757998124716083, "learning_rate": 5.117308812409966e-06, "loss": 0.0013, "step": 165160 }, { "epoch": 1.086623290329796, "grad_norm": 0.07852462687232772, "learning_rate": 5.116734858697919e-06, "loss": 0.0014, "step": 165170 }, { "epoch": 1.0866890785050296, "grad_norm": 0.009024566101175123, "learning_rate": 5.116160903446823e-06, "loss": 0.0005, "step": 165180 }, { "epoch": 1.0867548666802629, "grad_norm": 0.033069821601495734, "learning_rate": 5.115586946664246e-06, "loss": 0.0005, "step": 165190 }, { "epoch": 1.0868206548554962, "grad_norm": 0.0008247412382433517, "learning_rate": 5.115012988357756e-06, "loss": 0.0013, "step": 165200 }, { "epoch": 1.0868864430307297, "grad_norm": 0.013797061061250065, "learning_rate": 5.114439028534917e-06, "loss": 0.001, "step": 165210 }, { "epoch": 1.086952231205963, "grad_norm": 0.014650727490784759, "learning_rate": 5.1138650672033e-06, "loss": 0.0014, "step": 165220 }, { "epoch": 1.0870180193811965, "grad_norm": 0.00942562959791894, "learning_rate": 5.113291104370469e-06, "loss": 0.0019, "step": 165230 }, { "epoch": 1.0870838075564297, "grad_norm": 0.021402879576683024, "learning_rate": 5.112717140043994e-06, "loss": 0.0003, "step": 165240 }, { "epoch": 1.0871495957316633, "grad_norm": 0.02260967669669706, "learning_rate": 5.112143174231441e-06, "loss": 0.0008, "step": 165250 }, { "epoch": 1.0872153839068965, "grad_norm": 0.01588473990174132, "learning_rate": 5.111569206940377e-06, "loss": 0.0007, "step": 165260 }, { "epoch": 1.08728117208213, "grad_norm": 0.018487126831043584, "learning_rate": 5.110995238178367e-06, "loss": 0.0006, "step": 165270 }, { "epoch": 1.0873469602573633, "grad_norm": 0.08074222893139346, "learning_rate": 5.110421267952983e-06, "loss": 0.0007, "step": 165280 }, { "epoch": 1.0874127484325968, "grad_norm": 0.13405570564023053, "learning_rate": 5.109847296271789e-06, "loss": 0.0008, "step": 165290 }, { "epoch": 1.0874785366078301, "grad_norm": 0.01972828841326523, "learning_rate": 5.109273323142354e-06, "loss": 0.0013, "step": 165300 }, { "epoch": 1.0875443247830634, "grad_norm": 0.0287854662281636, "learning_rate": 5.108699348572245e-06, "loss": 0.0004, "step": 165310 }, { "epoch": 1.087610112958297, "grad_norm": 0.03091642717356266, "learning_rate": 5.108125372569029e-06, "loss": 0.0007, "step": 165320 }, { "epoch": 1.0876759011335302, "grad_norm": 0.0019133737609735907, "learning_rate": 5.107551395140272e-06, "loss": 0.0007, "step": 165330 }, { "epoch": 1.0877416893087637, "grad_norm": 0.01674132200628554, "learning_rate": 5.1069774162935424e-06, "loss": 0.0009, "step": 165340 }, { "epoch": 1.087807477483997, "grad_norm": 0.005250979372791481, "learning_rate": 5.106403436036409e-06, "loss": 0.0019, "step": 165350 }, { "epoch": 1.0878732656592305, "grad_norm": 0.08709743519686824, "learning_rate": 5.105829454376439e-06, "loss": 0.0003, "step": 165360 }, { "epoch": 1.0879390538344638, "grad_norm": 0.04189065334789588, "learning_rate": 5.1052554713211974e-06, "loss": 0.0004, "step": 165370 }, { "epoch": 1.0880048420096973, "grad_norm": 0.0047838491472670655, "learning_rate": 5.104681486878255e-06, "loss": 0.0012, "step": 165380 }, { "epoch": 1.0880706301849306, "grad_norm": 0.02033441849816831, "learning_rate": 5.104107501055176e-06, "loss": 0.0015, "step": 165390 }, { "epoch": 1.0881364183601638, "grad_norm": 0.011913802913128125, "learning_rate": 5.103533513859529e-06, "loss": 0.0006, "step": 165400 }, { "epoch": 1.0882022065353973, "grad_norm": 0.0007151522494535362, "learning_rate": 5.102959525298884e-06, "loss": 0.0005, "step": 165410 }, { "epoch": 1.0882679947106306, "grad_norm": 0.05425078176106377, "learning_rate": 5.102385535380805e-06, "loss": 0.0014, "step": 165420 }, { "epoch": 1.0883337828858641, "grad_norm": 0.05862060026297816, "learning_rate": 5.1018115441128624e-06, "loss": 0.0009, "step": 165430 }, { "epoch": 1.0883995710610974, "grad_norm": 0.06675815936242051, "learning_rate": 5.1012375515026225e-06, "loss": 0.0015, "step": 165440 }, { "epoch": 1.088465359236331, "grad_norm": 0.011308037886297225, "learning_rate": 5.100663557557653e-06, "loss": 0.001, "step": 165450 }, { "epoch": 1.0885311474115642, "grad_norm": 0.1241741115271564, "learning_rate": 5.100089562285521e-06, "loss": 0.0005, "step": 165460 }, { "epoch": 1.0885969355867977, "grad_norm": 0.0009569599060492837, "learning_rate": 5.099515565693793e-06, "loss": 0.0011, "step": 165470 }, { "epoch": 1.088662723762031, "grad_norm": 0.07903780813349941, "learning_rate": 5.09894156779004e-06, "loss": 0.0006, "step": 165480 }, { "epoch": 1.0887285119372645, "grad_norm": 0.054563372491071634, "learning_rate": 5.098367568581828e-06, "loss": 0.0006, "step": 165490 }, { "epoch": 1.0887943001124978, "grad_norm": 0.022683010548524566, "learning_rate": 5.097793568076724e-06, "loss": 0.0008, "step": 165500 }, { "epoch": 1.088860088287731, "grad_norm": 0.017285215345328685, "learning_rate": 5.097219566282297e-06, "loss": 0.0005, "step": 165510 }, { "epoch": 1.0889258764629646, "grad_norm": 0.001163125282485762, "learning_rate": 5.096645563206113e-06, "loss": 0.0003, "step": 165520 }, { "epoch": 1.0889916646381979, "grad_norm": 0.005731654508035198, "learning_rate": 5.096071558855739e-06, "loss": 0.0005, "step": 165530 }, { "epoch": 1.0890574528134314, "grad_norm": 0.013508997927216694, "learning_rate": 5.095497553238748e-06, "loss": 0.0035, "step": 165540 }, { "epoch": 1.0891232409886646, "grad_norm": 0.0012137157265358339, "learning_rate": 5.094923546362703e-06, "loss": 0.0006, "step": 165550 }, { "epoch": 1.0891890291638981, "grad_norm": 0.0384045737734498, "learning_rate": 5.094349538235172e-06, "loss": 0.0006, "step": 165560 }, { "epoch": 1.0892548173391314, "grad_norm": 0.0551286502450439, "learning_rate": 5.093775528863726e-06, "loss": 0.0008, "step": 165570 }, { "epoch": 1.089320605514365, "grad_norm": 0.011393491372740212, "learning_rate": 5.093201518255929e-06, "loss": 0.0011, "step": 165580 }, { "epoch": 1.0893863936895982, "grad_norm": 0.155538260958155, "learning_rate": 5.0926275064193505e-06, "loss": 0.0011, "step": 165590 }, { "epoch": 1.0894521818648317, "grad_norm": 0.0018375172026670003, "learning_rate": 5.092053493361557e-06, "loss": 0.0012, "step": 165600 }, { "epoch": 1.089517970040065, "grad_norm": 0.016949420655285624, "learning_rate": 5.09147947909012e-06, "loss": 0.0007, "step": 165610 }, { "epoch": 1.0895837582152983, "grad_norm": 0.02930350032464892, "learning_rate": 5.0909054636126046e-06, "loss": 0.0011, "step": 165620 }, { "epoch": 1.0896495463905318, "grad_norm": 0.047527484885108336, "learning_rate": 5.09033144693658e-06, "loss": 0.0008, "step": 165630 }, { "epoch": 1.089715334565765, "grad_norm": 0.03683256976087969, "learning_rate": 5.089757429069611e-06, "loss": 0.0008, "step": 165640 }, { "epoch": 1.0897811227409986, "grad_norm": 0.048369659923206125, "learning_rate": 5.089183410019269e-06, "loss": 0.0012, "step": 165650 }, { "epoch": 1.0898469109162319, "grad_norm": 0.04919295807039075, "learning_rate": 5.088609389793121e-06, "loss": 0.0007, "step": 165660 }, { "epoch": 1.0899126990914654, "grad_norm": 0.03279098965966211, "learning_rate": 5.088035368398734e-06, "loss": 0.0012, "step": 165670 }, { "epoch": 1.0899784872666987, "grad_norm": 0.02651853840304339, "learning_rate": 5.087461345843677e-06, "loss": 0.0008, "step": 165680 }, { "epoch": 1.0900442754419322, "grad_norm": 0.009147189972977011, "learning_rate": 5.08688732213552e-06, "loss": 0.0011, "step": 165690 }, { "epoch": 1.0901100636171654, "grad_norm": 0.09288631658095038, "learning_rate": 5.086313297281827e-06, "loss": 0.0009, "step": 165700 }, { "epoch": 1.0901758517923987, "grad_norm": 0.19900310052167136, "learning_rate": 5.085739271290166e-06, "loss": 0.001, "step": 165710 }, { "epoch": 1.0902416399676322, "grad_norm": 0.19121256948215049, "learning_rate": 5.085165244168108e-06, "loss": 0.0007, "step": 165720 }, { "epoch": 1.0903074281428655, "grad_norm": 0.10949830247515448, "learning_rate": 5.084591215923221e-06, "loss": 0.0013, "step": 165730 }, { "epoch": 1.090373216318099, "grad_norm": 0.09592789578465931, "learning_rate": 5.0840171865630705e-06, "loss": 0.0008, "step": 165740 }, { "epoch": 1.0904390044933323, "grad_norm": 0.0244915513741031, "learning_rate": 5.083443156095226e-06, "loss": 0.001, "step": 165750 }, { "epoch": 1.0905047926685658, "grad_norm": 0.05045589677191625, "learning_rate": 5.0828691245272554e-06, "loss": 0.0008, "step": 165760 }, { "epoch": 1.090570580843799, "grad_norm": 0.17495955958794432, "learning_rate": 5.082295091866728e-06, "loss": 0.0008, "step": 165770 }, { "epoch": 1.0906363690190326, "grad_norm": 0.03488649991143941, "learning_rate": 5.0817210581212095e-06, "loss": 0.0009, "step": 165780 }, { "epoch": 1.0907021571942659, "grad_norm": 0.06552291513130905, "learning_rate": 5.0811470232982695e-06, "loss": 0.0009, "step": 165790 }, { "epoch": 1.0907679453694992, "grad_norm": 0.024790350872707815, "learning_rate": 5.080572987405478e-06, "loss": 0.0005, "step": 165800 }, { "epoch": 1.0908337335447327, "grad_norm": 0.02536943152684348, "learning_rate": 5.0799989504504e-06, "loss": 0.0007, "step": 165810 }, { "epoch": 1.090899521719966, "grad_norm": 0.008154712859452278, "learning_rate": 5.079424912440605e-06, "loss": 0.0006, "step": 165820 }, { "epoch": 1.0909653098951995, "grad_norm": 0.02464137730772029, "learning_rate": 5.078850873383661e-06, "loss": 0.0004, "step": 165830 }, { "epoch": 1.0910310980704327, "grad_norm": 0.2554499558844301, "learning_rate": 5.078276833287136e-06, "loss": 0.0004, "step": 165840 }, { "epoch": 1.0910968862456663, "grad_norm": 0.14273763701474404, "learning_rate": 5.077702792158599e-06, "loss": 0.0019, "step": 165850 }, { "epoch": 1.0911626744208995, "grad_norm": 0.013033697397588567, "learning_rate": 5.077128750005618e-06, "loss": 0.0041, "step": 165860 }, { "epoch": 1.091228462596133, "grad_norm": 0.009195313890401145, "learning_rate": 5.07655470683576e-06, "loss": 0.0005, "step": 165870 }, { "epoch": 1.0912942507713663, "grad_norm": 0.0002536785189425491, "learning_rate": 5.075980662656595e-06, "loss": 0.001, "step": 165880 }, { "epoch": 1.0913600389465998, "grad_norm": 0.03614310288385073, "learning_rate": 5.0754066174756915e-06, "loss": 0.0011, "step": 165890 }, { "epoch": 1.0914258271218331, "grad_norm": 0.03918478011753072, "learning_rate": 5.074832571300616e-06, "loss": 0.0008, "step": 165900 }, { "epoch": 1.0914916152970666, "grad_norm": 0.018778604430479656, "learning_rate": 5.074258524138936e-06, "loss": 0.0012, "step": 165910 }, { "epoch": 1.0915574034723, "grad_norm": 0.06500425231263392, "learning_rate": 5.073684475998224e-06, "loss": 0.0011, "step": 165920 }, { "epoch": 1.0916231916475332, "grad_norm": 0.008950096234247137, "learning_rate": 5.073110426886046e-06, "loss": 0.0006, "step": 165930 }, { "epoch": 1.0916889798227667, "grad_norm": 0.01061565254753875, "learning_rate": 5.072536376809969e-06, "loss": 0.0006, "step": 165940 }, { "epoch": 1.091754767998, "grad_norm": 0.10105730208504332, "learning_rate": 5.071962325777562e-06, "loss": 0.001, "step": 165950 }, { "epoch": 1.0918205561732335, "grad_norm": 0.012789771459032718, "learning_rate": 5.071388273796395e-06, "loss": 0.0006, "step": 165960 }, { "epoch": 1.0918863443484668, "grad_norm": 0.014924426958273836, "learning_rate": 5.070814220874034e-06, "loss": 0.0002, "step": 165970 }, { "epoch": 1.0919521325237003, "grad_norm": 0.007060769066988353, "learning_rate": 5.07024016701805e-06, "loss": 0.0019, "step": 165980 }, { "epoch": 1.0920179206989336, "grad_norm": 0.080222450349983, "learning_rate": 5.0696661122360095e-06, "loss": 0.002, "step": 165990 }, { "epoch": 1.092083708874167, "grad_norm": 0.011712453460402304, "learning_rate": 5.069092056535482e-06, "loss": 0.0005, "step": 166000 }, { "epoch": 1.0921494970494003, "grad_norm": 0.015539143101346372, "learning_rate": 5.068517999924034e-06, "loss": 0.0011, "step": 166010 }, { "epoch": 1.0922152852246336, "grad_norm": 0.10785219821359061, "learning_rate": 5.067943942409237e-06, "loss": 0.002, "step": 166020 }, { "epoch": 1.0922810733998671, "grad_norm": 0.04917872280525129, "learning_rate": 5.067369883998657e-06, "loss": 0.0005, "step": 166030 }, { "epoch": 1.0923468615751004, "grad_norm": 0.24123340417254321, "learning_rate": 5.066795824699863e-06, "loss": 0.0027, "step": 166040 }, { "epoch": 1.092412649750334, "grad_norm": 0.0132983395946621, "learning_rate": 5.0662217645204234e-06, "loss": 0.0012, "step": 166050 }, { "epoch": 1.0924784379255672, "grad_norm": 0.018382139920070987, "learning_rate": 5.065647703467909e-06, "loss": 0.0004, "step": 166060 }, { "epoch": 1.0925442261008007, "grad_norm": 0.018453302233895925, "learning_rate": 5.065073641549885e-06, "loss": 0.0005, "step": 166070 }, { "epoch": 1.092610014276034, "grad_norm": 0.002462784452890664, "learning_rate": 5.064499578773922e-06, "loss": 0.0008, "step": 166080 }, { "epoch": 1.0926758024512675, "grad_norm": 0.011505452071562651, "learning_rate": 5.0639255151475864e-06, "loss": 0.0008, "step": 166090 }, { "epoch": 1.0927415906265008, "grad_norm": 0.03508312894410055, "learning_rate": 5.0633514506784475e-06, "loss": 0.0006, "step": 166100 }, { "epoch": 1.092807378801734, "grad_norm": 0.01726580126144615, "learning_rate": 5.062777385374076e-06, "loss": 0.0011, "step": 166110 }, { "epoch": 1.0928731669769676, "grad_norm": 0.05344476125846256, "learning_rate": 5.062203319242039e-06, "loss": 0.0015, "step": 166120 }, { "epoch": 1.0929389551522009, "grad_norm": 0.02884824746332408, "learning_rate": 5.061629252289904e-06, "loss": 0.0004, "step": 166130 }, { "epoch": 1.0930047433274344, "grad_norm": 0.04671040611001436, "learning_rate": 5.061055184525242e-06, "loss": 0.0012, "step": 166140 }, { "epoch": 1.0930705315026676, "grad_norm": 0.009673696643867833, "learning_rate": 5.060481115955619e-06, "loss": 0.0008, "step": 166150 }, { "epoch": 1.0931363196779011, "grad_norm": 0.019397466203344396, "learning_rate": 5.059907046588606e-06, "loss": 0.0004, "step": 166160 }, { "epoch": 1.0932021078531344, "grad_norm": 0.013964102381561448, "learning_rate": 5.059332976431768e-06, "loss": 0.0005, "step": 166170 }, { "epoch": 1.093267896028368, "grad_norm": 0.03481247927198402, "learning_rate": 5.058758905492677e-06, "loss": 0.0011, "step": 166180 }, { "epoch": 1.0933336842036012, "grad_norm": 0.010019724782648249, "learning_rate": 5.058184833778901e-06, "loss": 0.0006, "step": 166190 }, { "epoch": 1.0933994723788347, "grad_norm": 0.034146858441318685, "learning_rate": 5.057610761298008e-06, "loss": 0.0008, "step": 166200 }, { "epoch": 1.093465260554068, "grad_norm": 0.017900585834367932, "learning_rate": 5.057036688057567e-06, "loss": 0.0004, "step": 166210 }, { "epoch": 1.0935310487293013, "grad_norm": 0.0189713257977723, "learning_rate": 5.056462614065146e-06, "loss": 0.0007, "step": 166220 }, { "epoch": 1.0935968369045348, "grad_norm": 0.012215082051582012, "learning_rate": 5.055888539328314e-06, "loss": 0.0008, "step": 166230 }, { "epoch": 1.093662625079768, "grad_norm": 0.027210350830436245, "learning_rate": 5.05531446385464e-06, "loss": 0.0014, "step": 166240 }, { "epoch": 1.0937284132550016, "grad_norm": 0.0334302083779285, "learning_rate": 5.054740387651695e-06, "loss": 0.0011, "step": 166250 }, { "epoch": 1.0937942014302349, "grad_norm": 0.004088205636774341, "learning_rate": 5.054166310727042e-06, "loss": 0.0008, "step": 166260 }, { "epoch": 1.0938599896054684, "grad_norm": 0.0064171452347571755, "learning_rate": 5.053592233088254e-06, "loss": 0.0004, "step": 166270 }, { "epoch": 1.0939257777807017, "grad_norm": 0.07567684857293067, "learning_rate": 5.0530181547429006e-06, "loss": 0.0007, "step": 166280 }, { "epoch": 1.0939915659559352, "grad_norm": 0.047203417077484155, "learning_rate": 5.052444075698546e-06, "loss": 0.0006, "step": 166290 }, { "epoch": 1.0940573541311684, "grad_norm": 0.027987634086192554, "learning_rate": 5.051869995962763e-06, "loss": 0.0012, "step": 166300 }, { "epoch": 1.094123142306402, "grad_norm": 0.027574723519151283, "learning_rate": 5.0512959155431175e-06, "loss": 0.0008, "step": 166310 }, { "epoch": 1.0941889304816352, "grad_norm": 0.02272466413674289, "learning_rate": 5.050721834447181e-06, "loss": 0.0012, "step": 166320 }, { "epoch": 1.0942547186568685, "grad_norm": 0.04140267077691216, "learning_rate": 5.05014775268252e-06, "loss": 0.0016, "step": 166330 }, { "epoch": 1.094320506832102, "grad_norm": 0.02772750362567355, "learning_rate": 5.0495736702567055e-06, "loss": 0.0005, "step": 166340 }, { "epoch": 1.0943862950073353, "grad_norm": 0.05264351231587327, "learning_rate": 5.048999587177305e-06, "loss": 0.0013, "step": 166350 }, { "epoch": 1.0944520831825688, "grad_norm": 0.07925724613695553, "learning_rate": 5.048425503451885e-06, "loss": 0.0009, "step": 166360 }, { "epoch": 1.094517871357802, "grad_norm": 0.029052771252553646, "learning_rate": 5.047851419088018e-06, "loss": 0.0006, "step": 166370 }, { "epoch": 1.0945836595330356, "grad_norm": 0.03226849634834762, "learning_rate": 5.047277334093271e-06, "loss": 0.0008, "step": 166380 }, { "epoch": 1.0946494477082689, "grad_norm": 0.047675758399178654, "learning_rate": 5.0467032484752145e-06, "loss": 0.0007, "step": 166390 }, { "epoch": 1.0947152358835024, "grad_norm": 0.031035276804252804, "learning_rate": 5.046129162241416e-06, "loss": 0.001, "step": 166400 }, { "epoch": 1.0947810240587357, "grad_norm": 0.04359736613090978, "learning_rate": 5.045555075399442e-06, "loss": 0.0009, "step": 166410 }, { "epoch": 1.094846812233969, "grad_norm": 0.017004712926465798, "learning_rate": 5.044980987956866e-06, "loss": 0.0006, "step": 166420 }, { "epoch": 1.0949126004092025, "grad_norm": 0.0120061943434434, "learning_rate": 5.044406899921253e-06, "loss": 0.0006, "step": 166430 }, { "epoch": 1.0949783885844357, "grad_norm": 0.110310154650143, "learning_rate": 5.043832811300174e-06, "loss": 0.0013, "step": 166440 }, { "epoch": 1.0950441767596693, "grad_norm": 0.06620690659358133, "learning_rate": 5.043258722101196e-06, "loss": 0.0005, "step": 166450 }, { "epoch": 1.0951099649349025, "grad_norm": 0.01838446988433446, "learning_rate": 5.042684632331891e-06, "loss": 0.0015, "step": 166460 }, { "epoch": 1.095175753110136, "grad_norm": 0.04802410406342466, "learning_rate": 5.042110541999826e-06, "loss": 0.0008, "step": 166470 }, { "epoch": 1.0952415412853693, "grad_norm": 0.15734114093246118, "learning_rate": 5.041536451112568e-06, "loss": 0.0006, "step": 166480 }, { "epoch": 1.0953073294606028, "grad_norm": 0.06528890265756153, "learning_rate": 5.04096235967769e-06, "loss": 0.0007, "step": 166490 }, { "epoch": 1.0953731176358361, "grad_norm": 0.031856641246632726, "learning_rate": 5.040388267702757e-06, "loss": 0.0007, "step": 166500 }, { "epoch": 1.0954389058110696, "grad_norm": 0.004649614698616662, "learning_rate": 5.03981417519534e-06, "loss": 0.001, "step": 166510 }, { "epoch": 1.095504693986303, "grad_norm": 0.02576318012518542, "learning_rate": 5.039240082163008e-06, "loss": 0.0016, "step": 166520 }, { "epoch": 1.0955704821615362, "grad_norm": 0.0032106993180770287, "learning_rate": 5.038665988613329e-06, "loss": 0.0008, "step": 166530 }, { "epoch": 1.0956362703367697, "grad_norm": 0.014887231017974515, "learning_rate": 5.038091894553873e-06, "loss": 0.0006, "step": 166540 }, { "epoch": 1.095702058512003, "grad_norm": 0.05037479637039265, "learning_rate": 5.037517799992208e-06, "loss": 0.0006, "step": 166550 }, { "epoch": 1.0957678466872365, "grad_norm": 0.022895798506045176, "learning_rate": 5.036943704935901e-06, "loss": 0.0013, "step": 166560 }, { "epoch": 1.0958336348624698, "grad_norm": 0.05865645375207926, "learning_rate": 5.036369609392525e-06, "loss": 0.0006, "step": 166570 }, { "epoch": 1.0958994230377033, "grad_norm": 0.06889508029726624, "learning_rate": 5.035795513369648e-06, "loss": 0.001, "step": 166580 }, { "epoch": 1.0959652112129366, "grad_norm": 0.022352254635783556, "learning_rate": 5.035221416874837e-06, "loss": 0.0004, "step": 166590 }, { "epoch": 1.09603099938817, "grad_norm": 0.04027118825257634, "learning_rate": 5.034647319915663e-06, "loss": 0.001, "step": 166600 }, { "epoch": 1.0960967875634033, "grad_norm": 0.0275334268558394, "learning_rate": 5.034073222499694e-06, "loss": 0.0004, "step": 166610 }, { "epoch": 1.0961625757386368, "grad_norm": 0.0025812083050672304, "learning_rate": 5.033499124634498e-06, "loss": 0.0009, "step": 166620 }, { "epoch": 1.0962283639138701, "grad_norm": 0.10152640477556697, "learning_rate": 5.032925026327644e-06, "loss": 0.0012, "step": 166630 }, { "epoch": 1.0962941520891034, "grad_norm": 0.009893821953684675, "learning_rate": 5.032350927586704e-06, "loss": 0.0004, "step": 166640 }, { "epoch": 1.096359940264337, "grad_norm": 0.03431204711837156, "learning_rate": 5.031776828419245e-06, "loss": 0.001, "step": 166650 }, { "epoch": 1.0964257284395702, "grad_norm": 0.028603099695549698, "learning_rate": 5.031202728832835e-06, "loss": 0.0009, "step": 166660 }, { "epoch": 1.0964915166148037, "grad_norm": 0.010113771763619736, "learning_rate": 5.030628628835044e-06, "loss": 0.0006, "step": 166670 }, { "epoch": 1.096557304790037, "grad_norm": 0.03203490625269162, "learning_rate": 5.03005452843344e-06, "loss": 0.0007, "step": 166680 }, { "epoch": 1.0966230929652705, "grad_norm": 0.15094054501164425, "learning_rate": 5.029480427635595e-06, "loss": 0.0008, "step": 166690 }, { "epoch": 1.0966888811405038, "grad_norm": 0.030341079813278795, "learning_rate": 5.0289063264490745e-06, "loss": 0.0017, "step": 166700 }, { "epoch": 1.0967546693157373, "grad_norm": 0.012856352655079738, "learning_rate": 5.02833222488145e-06, "loss": 0.0009, "step": 166710 }, { "epoch": 1.0968204574909706, "grad_norm": 0.032757751475958406, "learning_rate": 5.0277581229402895e-06, "loss": 0.0016, "step": 166720 }, { "epoch": 1.0968862456662039, "grad_norm": 0.09031865362353664, "learning_rate": 5.027184020633162e-06, "loss": 0.0009, "step": 166730 }, { "epoch": 1.0969520338414374, "grad_norm": 0.09833292646142151, "learning_rate": 5.026609917967635e-06, "loss": 0.0009, "step": 166740 }, { "epoch": 1.0970178220166706, "grad_norm": 0.0016702111169785271, "learning_rate": 5.026035814951281e-06, "loss": 0.0011, "step": 166750 }, { "epoch": 1.0970836101919041, "grad_norm": 0.04860799975937568, "learning_rate": 5.025461711591667e-06, "loss": 0.0006, "step": 166760 }, { "epoch": 1.0971493983671374, "grad_norm": 0.01903913535579876, "learning_rate": 5.024887607896363e-06, "loss": 0.001, "step": 166770 }, { "epoch": 1.097215186542371, "grad_norm": 0.0863944044708805, "learning_rate": 5.024313503872936e-06, "loss": 0.0013, "step": 166780 }, { "epoch": 1.0972809747176042, "grad_norm": 0.031589267688133166, "learning_rate": 5.023739399528959e-06, "loss": 0.0008, "step": 166790 }, { "epoch": 1.0973467628928377, "grad_norm": 0.060629512640273414, "learning_rate": 5.023165294871995e-06, "loss": 0.0006, "step": 166800 }, { "epoch": 1.097412551068071, "grad_norm": 0.023993668218840507, "learning_rate": 5.022591189909618e-06, "loss": 0.0004, "step": 166810 }, { "epoch": 1.0974783392433043, "grad_norm": 0.027197815104398063, "learning_rate": 5.0220170846493965e-06, "loss": 0.0005, "step": 166820 }, { "epoch": 1.0975441274185378, "grad_norm": 0.009750351009578368, "learning_rate": 5.021442979098898e-06, "loss": 0.001, "step": 166830 }, { "epoch": 1.097609915593771, "grad_norm": 0.03457978203793227, "learning_rate": 5.020868873265693e-06, "loss": 0.001, "step": 166840 }, { "epoch": 1.0976757037690046, "grad_norm": 0.024518217686106424, "learning_rate": 5.02029476715735e-06, "loss": 0.0006, "step": 166850 }, { "epoch": 1.0977414919442379, "grad_norm": 0.003014379548534118, "learning_rate": 5.019720660781437e-06, "loss": 0.0007, "step": 166860 }, { "epoch": 1.0978072801194714, "grad_norm": 0.03837633588147216, "learning_rate": 5.019146554145523e-06, "loss": 0.0018, "step": 166870 }, { "epoch": 1.0978730682947047, "grad_norm": 0.08000669449434682, "learning_rate": 5.018572447257181e-06, "loss": 0.0007, "step": 166880 }, { "epoch": 1.0979388564699382, "grad_norm": 0.03593847364992801, "learning_rate": 5.017998340123976e-06, "loss": 0.0007, "step": 166890 }, { "epoch": 1.0980046446451714, "grad_norm": 0.05538352566391687, "learning_rate": 5.017424232753478e-06, "loss": 0.0021, "step": 166900 }, { "epoch": 1.098070432820405, "grad_norm": 0.0010123341915190983, "learning_rate": 5.0168501251532575e-06, "loss": 0.0004, "step": 166910 }, { "epoch": 1.0981362209956382, "grad_norm": 0.09952312417939249, "learning_rate": 5.0162760173308835e-06, "loss": 0.0011, "step": 166920 }, { "epoch": 1.0982020091708717, "grad_norm": 0.017614275294092267, "learning_rate": 5.015701909293923e-06, "loss": 0.0008, "step": 166930 }, { "epoch": 1.098267797346105, "grad_norm": 0.11346953591846605, "learning_rate": 5.015127801049945e-06, "loss": 0.001, "step": 166940 }, { "epoch": 1.0983335855213383, "grad_norm": 0.07328457356675012, "learning_rate": 5.014553692606522e-06, "loss": 0.0005, "step": 166950 }, { "epoch": 1.0983993736965718, "grad_norm": 0.09847722153117755, "learning_rate": 5.013979583971221e-06, "loss": 0.0012, "step": 166960 }, { "epoch": 1.098465161871805, "grad_norm": 0.03366361550743908, "learning_rate": 5.01340547515161e-06, "loss": 0.0008, "step": 166970 }, { "epoch": 1.0985309500470386, "grad_norm": 0.03932976045993766, "learning_rate": 5.0128313661552605e-06, "loss": 0.001, "step": 166980 }, { "epoch": 1.0985967382222719, "grad_norm": 0.05374163127916907, "learning_rate": 5.012257256989741e-06, "loss": 0.001, "step": 166990 }, { "epoch": 1.0986625263975054, "grad_norm": 0.023146460882213303, "learning_rate": 5.011683147662618e-06, "loss": 0.0026, "step": 167000 }, { "epoch": 1.0987283145727387, "grad_norm": 0.020446620555240425, "learning_rate": 5.011109038181464e-06, "loss": 0.0006, "step": 167010 }, { "epoch": 1.0987941027479722, "grad_norm": 0.009292596303328814, "learning_rate": 5.010534928553847e-06, "loss": 0.0004, "step": 167020 }, { "epoch": 1.0988598909232055, "grad_norm": 0.06600761378433019, "learning_rate": 5.009960818787337e-06, "loss": 0.0008, "step": 167030 }, { "epoch": 1.0989256790984387, "grad_norm": 0.015226723797440955, "learning_rate": 5.009386708889501e-06, "loss": 0.0006, "step": 167040 }, { "epoch": 1.0989914672736723, "grad_norm": 0.036762173454744536, "learning_rate": 5.00881259886791e-06, "loss": 0.0013, "step": 167050 }, { "epoch": 1.0990572554489055, "grad_norm": 0.03545128271278643, "learning_rate": 5.008238488730133e-06, "loss": 0.0007, "step": 167060 }, { "epoch": 1.099123043624139, "grad_norm": 0.06809314733802573, "learning_rate": 5.007664378483737e-06, "loss": 0.0007, "step": 167070 }, { "epoch": 1.0991888317993723, "grad_norm": 0.021768265134595805, "learning_rate": 5.007090268136293e-06, "loss": 0.001, "step": 167080 }, { "epoch": 1.0992546199746058, "grad_norm": 0.03161379809121886, "learning_rate": 5.006516157695371e-06, "loss": 0.0006, "step": 167090 }, { "epoch": 1.0993204081498391, "grad_norm": 0.09073490780832401, "learning_rate": 5.005942047168538e-06, "loss": 0.0004, "step": 167100 }, { "epoch": 1.0993861963250726, "grad_norm": 0.10389415612175976, "learning_rate": 5.005367936563366e-06, "loss": 0.0008, "step": 167110 }, { "epoch": 1.099451984500306, "grad_norm": 0.016491272243522058, "learning_rate": 5.004793825887421e-06, "loss": 0.0011, "step": 167120 }, { "epoch": 1.0995177726755392, "grad_norm": 0.0007025785860458524, "learning_rate": 5.004219715148273e-06, "loss": 0.0005, "step": 167130 }, { "epoch": 1.0995835608507727, "grad_norm": 0.017095261897719014, "learning_rate": 5.003645604353493e-06, "loss": 0.0004, "step": 167140 }, { "epoch": 1.099649349026006, "grad_norm": 0.006182892666509917, "learning_rate": 5.0030714935106485e-06, "loss": 0.0005, "step": 167150 }, { "epoch": 1.0997151372012395, "grad_norm": 0.06790823333857335, "learning_rate": 5.002497382627309e-06, "loss": 0.002, "step": 167160 }, { "epoch": 1.0997809253764728, "grad_norm": 0.02974015862959445, "learning_rate": 5.001923271711043e-06, "loss": 0.0008, "step": 167170 }, { "epoch": 1.0998467135517063, "grad_norm": 0.026903994572671522, "learning_rate": 5.001349160769422e-06, "loss": 0.0013, "step": 167180 }, { "epoch": 1.0999125017269396, "grad_norm": 0.023824587659700237, "learning_rate": 5.000775049810013e-06, "loss": 0.0007, "step": 167190 }, { "epoch": 1.099978289902173, "grad_norm": 0.01818597932963461, "learning_rate": 5.0002009388403835e-06, "loss": 0.0016, "step": 167200 }, { "epoch": 1.1000440780774063, "grad_norm": 0.04797384363363945, "learning_rate": 4.9996268278681075e-06, "loss": 0.0005, "step": 167210 }, { "epoch": 1.1001098662526398, "grad_norm": 0.09720730877540289, "learning_rate": 4.99905271690075e-06, "loss": 0.0012, "step": 167220 }, { "epoch": 1.1001756544278731, "grad_norm": 0.040777759192510414, "learning_rate": 4.998478605945882e-06, "loss": 0.0012, "step": 167230 }, { "epoch": 1.1002414426031066, "grad_norm": 0.011836951198871307, "learning_rate": 4.997904495011073e-06, "loss": 0.001, "step": 167240 }, { "epoch": 1.10030723077834, "grad_norm": 0.030121290724787378, "learning_rate": 4.997330384103889e-06, "loss": 0.0013, "step": 167250 }, { "epoch": 1.1003730189535732, "grad_norm": 0.045808240428135805, "learning_rate": 4.9967562732319035e-06, "loss": 0.0011, "step": 167260 }, { "epoch": 1.1004388071288067, "grad_norm": 0.014527608639617495, "learning_rate": 4.996182162402684e-06, "loss": 0.0006, "step": 167270 }, { "epoch": 1.10050459530404, "grad_norm": 0.03852040288322043, "learning_rate": 4.995608051623799e-06, "loss": 0.0005, "step": 167280 }, { "epoch": 1.1005703834792735, "grad_norm": 0.06251644087695894, "learning_rate": 4.995033940902817e-06, "loss": 0.0003, "step": 167290 }, { "epoch": 1.1006361716545068, "grad_norm": 0.011020500362366844, "learning_rate": 4.9944598302473094e-06, "loss": 0.0008, "step": 167300 }, { "epoch": 1.1007019598297403, "grad_norm": 0.016991012929188053, "learning_rate": 4.993885719664843e-06, "loss": 0.0013, "step": 167310 }, { "epoch": 1.1007677480049736, "grad_norm": 0.03377888296455904, "learning_rate": 4.993311609162989e-06, "loss": 0.0004, "step": 167320 }, { "epoch": 1.100833536180207, "grad_norm": 0.08161996307323884, "learning_rate": 4.992737498749316e-06, "loss": 0.0014, "step": 167330 }, { "epoch": 1.1008993243554404, "grad_norm": 0.007988770473466144, "learning_rate": 4.992163388431392e-06, "loss": 0.0008, "step": 167340 }, { "epoch": 1.1009651125306736, "grad_norm": 0.007291019725287095, "learning_rate": 4.991589278216788e-06, "loss": 0.0009, "step": 167350 }, { "epoch": 1.1010309007059071, "grad_norm": 0.06404546607371363, "learning_rate": 4.991015168113071e-06, "loss": 0.0006, "step": 167360 }, { "epoch": 1.1010966888811404, "grad_norm": 0.06930709733926414, "learning_rate": 4.990441058127812e-06, "loss": 0.0009, "step": 167370 }, { "epoch": 1.101162477056374, "grad_norm": 0.023131189970860215, "learning_rate": 4.989866948268578e-06, "loss": 0.0004, "step": 167380 }, { "epoch": 1.1012282652316072, "grad_norm": 0.05827172603179214, "learning_rate": 4.989292838542941e-06, "loss": 0.0008, "step": 167390 }, { "epoch": 1.1012940534068407, "grad_norm": 0.003998913447108428, "learning_rate": 4.988718728958468e-06, "loss": 0.0015, "step": 167400 }, { "epoch": 1.101359841582074, "grad_norm": 0.002753542837265131, "learning_rate": 4.988144619522729e-06, "loss": 0.0013, "step": 167410 }, { "epoch": 1.1014256297573075, "grad_norm": 0.018407111946051727, "learning_rate": 4.9875705102432936e-06, "loss": 0.0019, "step": 167420 }, { "epoch": 1.1014914179325408, "grad_norm": 0.0016717272188260677, "learning_rate": 4.98699640112773e-06, "loss": 0.0008, "step": 167430 }, { "epoch": 1.101557206107774, "grad_norm": 0.026703712715675315, "learning_rate": 4.9864222921836065e-06, "loss": 0.0007, "step": 167440 }, { "epoch": 1.1016229942830076, "grad_norm": 0.006952672799757927, "learning_rate": 4.985848183418494e-06, "loss": 0.0013, "step": 167450 }, { "epoch": 1.1016887824582409, "grad_norm": 0.029140714923677948, "learning_rate": 4.985274074839961e-06, "loss": 0.0038, "step": 167460 }, { "epoch": 1.1017545706334744, "grad_norm": 0.02345089728328479, "learning_rate": 4.9846999664555775e-06, "loss": 0.0009, "step": 167470 }, { "epoch": 1.1018203588087077, "grad_norm": 0.01988531566057695, "learning_rate": 4.984125858272911e-06, "loss": 0.001, "step": 167480 }, { "epoch": 1.1018861469839412, "grad_norm": 0.05969572956936327, "learning_rate": 4.983551750299532e-06, "loss": 0.0009, "step": 167490 }, { "epoch": 1.1019519351591744, "grad_norm": 0.08271589691381383, "learning_rate": 4.982977642543008e-06, "loss": 0.0008, "step": 167500 }, { "epoch": 1.102017723334408, "grad_norm": 0.01725521335081569, "learning_rate": 4.982403535010909e-06, "loss": 0.0004, "step": 167510 }, { "epoch": 1.1020835115096412, "grad_norm": 0.08808010279869799, "learning_rate": 4.981829427710804e-06, "loss": 0.0018, "step": 167520 }, { "epoch": 1.1021492996848747, "grad_norm": 0.02835315306435267, "learning_rate": 4.9812553206502636e-06, "loss": 0.0004, "step": 167530 }, { "epoch": 1.102215087860108, "grad_norm": 0.004020580348480153, "learning_rate": 4.980681213836855e-06, "loss": 0.0009, "step": 167540 }, { "epoch": 1.1022808760353413, "grad_norm": 0.0010906205080951322, "learning_rate": 4.9801071072781474e-06, "loss": 0.0006, "step": 167550 }, { "epoch": 1.1023466642105748, "grad_norm": 0.0068879587335165925, "learning_rate": 4.979533000981711e-06, "loss": 0.0007, "step": 167560 }, { "epoch": 1.102412452385808, "grad_norm": 0.026269395054170946, "learning_rate": 4.978958894955113e-06, "loss": 0.0006, "step": 167570 }, { "epoch": 1.1024782405610416, "grad_norm": 0.04437842040272145, "learning_rate": 4.978384789205926e-06, "loss": 0.0007, "step": 167580 }, { "epoch": 1.1025440287362749, "grad_norm": 0.0015001555512153275, "learning_rate": 4.977810683741715e-06, "loss": 0.0009, "step": 167590 }, { "epoch": 1.1026098169115084, "grad_norm": 0.03624503538655156, "learning_rate": 4.977236578570052e-06, "loss": 0.0014, "step": 167600 }, { "epoch": 1.1026756050867417, "grad_norm": 0.0416306910476534, "learning_rate": 4.9766624736985056e-06, "loss": 0.0019, "step": 167610 }, { "epoch": 1.1027413932619752, "grad_norm": 0.09942645124188061, "learning_rate": 4.976088369134644e-06, "loss": 0.0012, "step": 167620 }, { "epoch": 1.1028071814372085, "grad_norm": 0.07997330925242743, "learning_rate": 4.975514264886036e-06, "loss": 0.0006, "step": 167630 }, { "epoch": 1.102872969612442, "grad_norm": 0.01905818593770449, "learning_rate": 4.97494016096025e-06, "loss": 0.0005, "step": 167640 }, { "epoch": 1.1029387577876753, "grad_norm": 0.04532869893383573, "learning_rate": 4.974366057364858e-06, "loss": 0.0008, "step": 167650 }, { "epoch": 1.1030045459629085, "grad_norm": 0.02031811957457606, "learning_rate": 4.9737919541074275e-06, "loss": 0.001, "step": 167660 }, { "epoch": 1.103070334138142, "grad_norm": 0.015129305156843223, "learning_rate": 4.973217851195527e-06, "loss": 0.0016, "step": 167670 }, { "epoch": 1.1031361223133753, "grad_norm": 0.04694252257386928, "learning_rate": 4.972643748636726e-06, "loss": 0.0007, "step": 167680 }, { "epoch": 1.1032019104886088, "grad_norm": 0.02201661878572962, "learning_rate": 4.972069646438594e-06, "loss": 0.0017, "step": 167690 }, { "epoch": 1.1032676986638421, "grad_norm": 0.024357217854923716, "learning_rate": 4.971495544608699e-06, "loss": 0.0008, "step": 167700 }, { "epoch": 1.1033334868390756, "grad_norm": 0.01770654569358221, "learning_rate": 4.970921443154611e-06, "loss": 0.0007, "step": 167710 }, { "epoch": 1.103399275014309, "grad_norm": 0.06058747389030985, "learning_rate": 4.970347342083898e-06, "loss": 0.0015, "step": 167720 }, { "epoch": 1.1034650631895424, "grad_norm": 0.023434378827530668, "learning_rate": 4.9697732414041305e-06, "loss": 0.0006, "step": 167730 }, { "epoch": 1.1035308513647757, "grad_norm": 0.04284269632578619, "learning_rate": 4.9691991411228765e-06, "loss": 0.0005, "step": 167740 }, { "epoch": 1.103596639540009, "grad_norm": 0.0007348348268338545, "learning_rate": 4.968625041247705e-06, "loss": 0.0005, "step": 167750 }, { "epoch": 1.1036624277152425, "grad_norm": 0.12890547140927952, "learning_rate": 4.968050941786186e-06, "loss": 0.0011, "step": 167760 }, { "epoch": 1.1037282158904758, "grad_norm": 0.01913239549175573, "learning_rate": 4.967476842745886e-06, "loss": 0.0005, "step": 167770 }, { "epoch": 1.1037940040657093, "grad_norm": 0.010628520943568006, "learning_rate": 4.966902744134377e-06, "loss": 0.0003, "step": 167780 }, { "epoch": 1.1038597922409426, "grad_norm": 0.015418508266393046, "learning_rate": 4.9663286459592265e-06, "loss": 0.0014, "step": 167790 }, { "epoch": 1.103925580416176, "grad_norm": 0.017487878031281906, "learning_rate": 4.965754548228004e-06, "loss": 0.0007, "step": 167800 }, { "epoch": 1.1039913685914093, "grad_norm": 0.02077800415512372, "learning_rate": 4.965180450948278e-06, "loss": 0.0009, "step": 167810 }, { "epoch": 1.1040571567666428, "grad_norm": 0.06937873955841053, "learning_rate": 4.964606354127619e-06, "loss": 0.0009, "step": 167820 }, { "epoch": 1.1041229449418761, "grad_norm": 0.03779308287831642, "learning_rate": 4.9640322577735925e-06, "loss": 0.0006, "step": 167830 }, { "epoch": 1.1041887331171096, "grad_norm": 0.004371346321390229, "learning_rate": 4.96345816189377e-06, "loss": 0.0009, "step": 167840 }, { "epoch": 1.104254521292343, "grad_norm": 0.018207862550970906, "learning_rate": 4.962884066495721e-06, "loss": 0.0005, "step": 167850 }, { "epoch": 1.1043203094675762, "grad_norm": 0.045846351103046024, "learning_rate": 4.962309971587013e-06, "loss": 0.0011, "step": 167860 }, { "epoch": 1.1043860976428097, "grad_norm": 0.09804458309597995, "learning_rate": 4.961735877175215e-06, "loss": 0.0008, "step": 167870 }, { "epoch": 1.104451885818043, "grad_norm": 0.10600454093741908, "learning_rate": 4.961161783267898e-06, "loss": 0.0011, "step": 167880 }, { "epoch": 1.1045176739932765, "grad_norm": 0.006645034656614036, "learning_rate": 4.960587689872629e-06, "loss": 0.0006, "step": 167890 }, { "epoch": 1.1045834621685098, "grad_norm": 0.14591990499269275, "learning_rate": 4.9600135969969744e-06, "loss": 0.001, "step": 167900 }, { "epoch": 1.1046492503437433, "grad_norm": 0.04054913429533774, "learning_rate": 4.959439504648509e-06, "loss": 0.0006, "step": 167910 }, { "epoch": 1.1047150385189766, "grad_norm": 0.031835634942799444, "learning_rate": 4.958865412834798e-06, "loss": 0.0009, "step": 167920 }, { "epoch": 1.10478082669421, "grad_norm": 0.023227069608563064, "learning_rate": 4.958291321563411e-06, "loss": 0.0005, "step": 167930 }, { "epoch": 1.1048466148694434, "grad_norm": 0.03218690019512321, "learning_rate": 4.957717230841917e-06, "loss": 0.0011, "step": 167940 }, { "epoch": 1.1049124030446769, "grad_norm": 0.09556143330695248, "learning_rate": 4.9571431406778844e-06, "loss": 0.0011, "step": 167950 }, { "epoch": 1.1049781912199101, "grad_norm": 0.005815499269552954, "learning_rate": 4.956569051078881e-06, "loss": 0.0003, "step": 167960 }, { "epoch": 1.1050439793951434, "grad_norm": 0.09424551782992709, "learning_rate": 4.95599496205248e-06, "loss": 0.0009, "step": 167970 }, { "epoch": 1.105109767570377, "grad_norm": 0.0007860996225863362, "learning_rate": 4.955420873606246e-06, "loss": 0.0009, "step": 167980 }, { "epoch": 1.1051755557456102, "grad_norm": 0.06608798483756195, "learning_rate": 4.95484678574775e-06, "loss": 0.0006, "step": 167990 }, { "epoch": 1.1052413439208437, "grad_norm": 0.07351743136248483, "learning_rate": 4.95427269848456e-06, "loss": 0.0013, "step": 168000 }, { "epoch": 1.105307132096077, "grad_norm": 0.06797004977978122, "learning_rate": 4.953698611824244e-06, "loss": 0.0009, "step": 168010 }, { "epoch": 1.1053729202713105, "grad_norm": 0.053728049080256496, "learning_rate": 4.953124525774374e-06, "loss": 0.002, "step": 168020 }, { "epoch": 1.1054387084465438, "grad_norm": 0.013117416586459185, "learning_rate": 4.952550440342513e-06, "loss": 0.0008, "step": 168030 }, { "epoch": 1.1055044966217773, "grad_norm": 0.06613753925278604, "learning_rate": 4.951976355536236e-06, "loss": 0.0015, "step": 168040 }, { "epoch": 1.1055702847970106, "grad_norm": 0.0036326872046339173, "learning_rate": 4.951402271363109e-06, "loss": 0.0003, "step": 168050 }, { "epoch": 1.1056360729722439, "grad_norm": 0.057703662331100636, "learning_rate": 4.9508281878307e-06, "loss": 0.0009, "step": 168060 }, { "epoch": 1.1057018611474774, "grad_norm": 0.03653034728655338, "learning_rate": 4.950254104946581e-06, "loss": 0.0006, "step": 168070 }, { "epoch": 1.1057676493227107, "grad_norm": 0.053291529913855856, "learning_rate": 4.949680022718317e-06, "loss": 0.0008, "step": 168080 }, { "epoch": 1.1058334374979442, "grad_norm": 0.006149358223337795, "learning_rate": 4.949105941153478e-06, "loss": 0.0006, "step": 168090 }, { "epoch": 1.1058992256731774, "grad_norm": 0.0006573152335905923, "learning_rate": 4.948531860259634e-06, "loss": 0.0005, "step": 168100 }, { "epoch": 1.105965013848411, "grad_norm": 0.013223308478753224, "learning_rate": 4.947957780044354e-06, "loss": 0.0009, "step": 168110 }, { "epoch": 1.1060308020236442, "grad_norm": 0.07218354413348788, "learning_rate": 4.947383700515204e-06, "loss": 0.0007, "step": 168120 }, { "epoch": 1.1060965901988777, "grad_norm": 0.011199333918504209, "learning_rate": 4.9468096216797565e-06, "loss": 0.0011, "step": 168130 }, { "epoch": 1.106162378374111, "grad_norm": 0.09940054194722156, "learning_rate": 4.9462355435455765e-06, "loss": 0.0009, "step": 168140 }, { "epoch": 1.1062281665493443, "grad_norm": 0.013491280849136216, "learning_rate": 4.945661466120235e-06, "loss": 0.0008, "step": 168150 }, { "epoch": 1.1062939547245778, "grad_norm": 0.025521861928412833, "learning_rate": 4.945087389411299e-06, "loss": 0.0006, "step": 168160 }, { "epoch": 1.106359742899811, "grad_norm": 0.012402819563146476, "learning_rate": 4.94451331342634e-06, "loss": 0.0007, "step": 168170 }, { "epoch": 1.1064255310750446, "grad_norm": 0.06399743518180542, "learning_rate": 4.943939238172924e-06, "loss": 0.0013, "step": 168180 }, { "epoch": 1.1064913192502779, "grad_norm": 0.05481583776275052, "learning_rate": 4.943365163658621e-06, "loss": 0.0012, "step": 168190 }, { "epoch": 1.1065571074255114, "grad_norm": 0.018260212367274334, "learning_rate": 4.942791089891e-06, "loss": 0.0009, "step": 168200 }, { "epoch": 1.1066228956007447, "grad_norm": 0.01829707705799617, "learning_rate": 4.942217016877629e-06, "loss": 0.0006, "step": 168210 }, { "epoch": 1.1066886837759782, "grad_norm": 0.27621021594933387, "learning_rate": 4.941642944626075e-06, "loss": 0.0021, "step": 168220 }, { "epoch": 1.1067544719512115, "grad_norm": 0.02104750310620553, "learning_rate": 4.94106887314391e-06, "loss": 0.001, "step": 168230 }, { "epoch": 1.106820260126445, "grad_norm": 0.015608405784016876, "learning_rate": 4.940494802438701e-06, "loss": 0.0006, "step": 168240 }, { "epoch": 1.1068860483016782, "grad_norm": 0.005374831692685502, "learning_rate": 4.939920732518017e-06, "loss": 0.0007, "step": 168250 }, { "epoch": 1.1069518364769118, "grad_norm": 0.07677457444720616, "learning_rate": 4.939346663389425e-06, "loss": 0.0008, "step": 168260 }, { "epoch": 1.107017624652145, "grad_norm": 0.08561162263990818, "learning_rate": 4.938772595060496e-06, "loss": 0.0007, "step": 168270 }, { "epoch": 1.1070834128273783, "grad_norm": 0.10424346534488506, "learning_rate": 4.938198527538797e-06, "loss": 0.0006, "step": 168280 }, { "epoch": 1.1071492010026118, "grad_norm": 0.012014407123919163, "learning_rate": 4.937624460831896e-06, "loss": 0.0013, "step": 168290 }, { "epoch": 1.107214989177845, "grad_norm": 0.09004213963358147, "learning_rate": 4.937050394947364e-06, "loss": 0.0007, "step": 168300 }, { "epoch": 1.1072807773530786, "grad_norm": 0.0004810282291339527, "learning_rate": 4.936476329892768e-06, "loss": 0.0003, "step": 168310 }, { "epoch": 1.107346565528312, "grad_norm": 0.0720552166685596, "learning_rate": 4.935902265675676e-06, "loss": 0.0009, "step": 168320 }, { "epoch": 1.1074123537035454, "grad_norm": 0.0013896229122857971, "learning_rate": 4.935328202303658e-06, "loss": 0.0005, "step": 168330 }, { "epoch": 1.1074781418787787, "grad_norm": 0.022257235319026708, "learning_rate": 4.934754139784282e-06, "loss": 0.0011, "step": 168340 }, { "epoch": 1.1075439300540122, "grad_norm": 0.08508011371373322, "learning_rate": 4.9341800781251145e-06, "loss": 0.001, "step": 168350 }, { "epoch": 1.1076097182292455, "grad_norm": 0.005562547722817931, "learning_rate": 4.933606017333727e-06, "loss": 0.0011, "step": 168360 }, { "epoch": 1.1076755064044788, "grad_norm": 0.06755899500993895, "learning_rate": 4.933031957417688e-06, "loss": 0.0005, "step": 168370 }, { "epoch": 1.1077412945797123, "grad_norm": 0.0019452569343090655, "learning_rate": 4.932457898384563e-06, "loss": 0.001, "step": 168380 }, { "epoch": 1.1078070827549455, "grad_norm": 0.023463943525133354, "learning_rate": 4.931883840241924e-06, "loss": 0.0011, "step": 168390 }, { "epoch": 1.107872870930179, "grad_norm": 0.0008633919412710788, "learning_rate": 4.931309782997337e-06, "loss": 0.0008, "step": 168400 }, { "epoch": 1.1079386591054123, "grad_norm": 0.03380815528191126, "learning_rate": 4.930735726658371e-06, "loss": 0.0008, "step": 168410 }, { "epoch": 1.1080044472806458, "grad_norm": 0.00041899876297345944, "learning_rate": 4.930161671232594e-06, "loss": 0.0005, "step": 168420 }, { "epoch": 1.1080702354558791, "grad_norm": 0.0034318657352480275, "learning_rate": 4.929587616727575e-06, "loss": 0.0006, "step": 168430 }, { "epoch": 1.1081360236311126, "grad_norm": 0.03400656693572473, "learning_rate": 4.929013563150884e-06, "loss": 0.0017, "step": 168440 }, { "epoch": 1.108201811806346, "grad_norm": 0.0016220592698121994, "learning_rate": 4.928439510510087e-06, "loss": 0.0019, "step": 168450 }, { "epoch": 1.1082675999815792, "grad_norm": 0.018095815144152583, "learning_rate": 4.927865458812753e-06, "loss": 0.0009, "step": 168460 }, { "epoch": 1.1083333881568127, "grad_norm": 0.022918451938147334, "learning_rate": 4.927291408066451e-06, "loss": 0.0014, "step": 168470 }, { "epoch": 1.108399176332046, "grad_norm": 0.10423904728986814, "learning_rate": 4.926717358278748e-06, "loss": 0.0016, "step": 168480 }, { "epoch": 1.1084649645072795, "grad_norm": 0.02850968013836715, "learning_rate": 4.9261433094572144e-06, "loss": 0.0008, "step": 168490 }, { "epoch": 1.1085307526825128, "grad_norm": 0.02786617707542136, "learning_rate": 4.9255692616094174e-06, "loss": 0.0005, "step": 168500 }, { "epoch": 1.1085965408577463, "grad_norm": 0.001695026274444772, "learning_rate": 4.924995214742925e-06, "loss": 0.0011, "step": 168510 }, { "epoch": 1.1086623290329796, "grad_norm": 0.04217486198443695, "learning_rate": 4.924421168865307e-06, "loss": 0.0004, "step": 168520 }, { "epoch": 1.108728117208213, "grad_norm": 0.02767774535268395, "learning_rate": 4.9238471239841304e-06, "loss": 0.0006, "step": 168530 }, { "epoch": 1.1087939053834464, "grad_norm": 0.000229839503651217, "learning_rate": 4.923273080106963e-06, "loss": 0.0006, "step": 168540 }, { "epoch": 1.1088596935586799, "grad_norm": 0.08064319757868448, "learning_rate": 4.922699037241372e-06, "loss": 0.0008, "step": 168550 }, { "epoch": 1.1089254817339131, "grad_norm": 0.007021390654720327, "learning_rate": 4.92212499539493e-06, "loss": 0.0003, "step": 168560 }, { "epoch": 1.1089912699091464, "grad_norm": 0.044911405958717836, "learning_rate": 4.9215509545752025e-06, "loss": 0.0011, "step": 168570 }, { "epoch": 1.10905705808438, "grad_norm": 0.0023239883779837284, "learning_rate": 4.920976914789757e-06, "loss": 0.0022, "step": 168580 }, { "epoch": 1.1091228462596132, "grad_norm": 0.01771850020284338, "learning_rate": 4.9204028760461635e-06, "loss": 0.0003, "step": 168590 }, { "epoch": 1.1091886344348467, "grad_norm": 0.07192806389906822, "learning_rate": 4.919828838351989e-06, "loss": 0.0009, "step": 168600 }, { "epoch": 1.10925442261008, "grad_norm": 0.053392651994643506, "learning_rate": 4.919254801714801e-06, "loss": 0.0004, "step": 168610 }, { "epoch": 1.1093202107853135, "grad_norm": 0.020262787558655716, "learning_rate": 4.91868076614217e-06, "loss": 0.0011, "step": 168620 }, { "epoch": 1.1093859989605468, "grad_norm": 0.018739687242000928, "learning_rate": 4.9181067316416625e-06, "loss": 0.0005, "step": 168630 }, { "epoch": 1.1094517871357803, "grad_norm": 0.0010146231200889773, "learning_rate": 4.9175326982208475e-06, "loss": 0.0018, "step": 168640 }, { "epoch": 1.1095175753110136, "grad_norm": 0.01947587257230876, "learning_rate": 4.916958665887292e-06, "loss": 0.0008, "step": 168650 }, { "epoch": 1.109583363486247, "grad_norm": 0.020846765504297725, "learning_rate": 4.916384634648565e-06, "loss": 0.0011, "step": 168660 }, { "epoch": 1.1096491516614804, "grad_norm": 0.06347768297033486, "learning_rate": 4.915810604512234e-06, "loss": 0.0012, "step": 168670 }, { "epoch": 1.1097149398367137, "grad_norm": 0.023887874713871517, "learning_rate": 4.915236575485868e-06, "loss": 0.0009, "step": 168680 }, { "epoch": 1.1097807280119472, "grad_norm": 0.003684022685021351, "learning_rate": 4.914662547577034e-06, "loss": 0.0008, "step": 168690 }, { "epoch": 1.1098465161871804, "grad_norm": 0.06922703355848524, "learning_rate": 4.9140885207933005e-06, "loss": 0.0009, "step": 168700 }, { "epoch": 1.109912304362414, "grad_norm": 0.010073621392564316, "learning_rate": 4.9135144951422365e-06, "loss": 0.0004, "step": 168710 }, { "epoch": 1.1099780925376472, "grad_norm": 0.03866903645336077, "learning_rate": 4.9129404706314085e-06, "loss": 0.001, "step": 168720 }, { "epoch": 1.1100438807128807, "grad_norm": 0.0089440525537869, "learning_rate": 4.9123664472683855e-06, "loss": 0.0004, "step": 168730 }, { "epoch": 1.110109668888114, "grad_norm": 0.06004314487009153, "learning_rate": 4.911792425060735e-06, "loss": 0.0012, "step": 168740 }, { "epoch": 1.1101754570633475, "grad_norm": 0.06650269162466062, "learning_rate": 4.911218404016025e-06, "loss": 0.0007, "step": 168750 }, { "epoch": 1.1102412452385808, "grad_norm": 0.006859677566137218, "learning_rate": 4.910644384141825e-06, "loss": 0.0011, "step": 168760 }, { "epoch": 1.110307033413814, "grad_norm": 0.02612212861784873, "learning_rate": 4.9100703654457e-06, "loss": 0.0007, "step": 168770 }, { "epoch": 1.1103728215890476, "grad_norm": 0.02659241888292053, "learning_rate": 4.909496347935221e-06, "loss": 0.0005, "step": 168780 }, { "epoch": 1.1104386097642809, "grad_norm": 0.06002829559145163, "learning_rate": 4.908922331617953e-06, "loss": 0.0003, "step": 168790 }, { "epoch": 1.1105043979395144, "grad_norm": 0.0002203084048634581, "learning_rate": 4.908348316501467e-06, "loss": 0.0009, "step": 168800 }, { "epoch": 1.1105701861147477, "grad_norm": 0.10553194020480591, "learning_rate": 4.907774302593327e-06, "loss": 0.0009, "step": 168810 }, { "epoch": 1.1106359742899812, "grad_norm": 0.07865508871453654, "learning_rate": 4.907200289901105e-06, "loss": 0.0011, "step": 168820 }, { "epoch": 1.1107017624652145, "grad_norm": 0.041564107357793714, "learning_rate": 4.906626278432367e-06, "loss": 0.0012, "step": 168830 }, { "epoch": 1.110767550640448, "grad_norm": 0.04061758706747282, "learning_rate": 4.90605226819468e-06, "loss": 0.0009, "step": 168840 }, { "epoch": 1.1108333388156812, "grad_norm": 0.043379126679932184, "learning_rate": 4.905478259195614e-06, "loss": 0.0005, "step": 168850 }, { "epoch": 1.1108991269909148, "grad_norm": 0.00048433377777340524, "learning_rate": 4.9049042514427345e-06, "loss": 0.0009, "step": 168860 }, { "epoch": 1.110964915166148, "grad_norm": 0.04358296921618936, "learning_rate": 4.904330244943609e-06, "loss": 0.0044, "step": 168870 }, { "epoch": 1.1110307033413813, "grad_norm": 0.06135160851093065, "learning_rate": 4.9037562397058086e-06, "loss": 0.0008, "step": 168880 }, { "epoch": 1.1110964915166148, "grad_norm": 0.08136082916364216, "learning_rate": 4.903182235736898e-06, "loss": 0.001, "step": 168890 }, { "epoch": 1.111162279691848, "grad_norm": 0.01984360009655761, "learning_rate": 4.902608233044447e-06, "loss": 0.0007, "step": 168900 }, { "epoch": 1.1112280678670816, "grad_norm": 0.05538877253377515, "learning_rate": 4.902034231636023e-06, "loss": 0.0009, "step": 168910 }, { "epoch": 1.111293856042315, "grad_norm": 0.05239092955463626, "learning_rate": 4.901460231519191e-06, "loss": 0.0007, "step": 168920 }, { "epoch": 1.1113596442175484, "grad_norm": 0.060716038316633365, "learning_rate": 4.900886232701522e-06, "loss": 0.0011, "step": 168930 }, { "epoch": 1.1114254323927817, "grad_norm": 0.02480896923791124, "learning_rate": 4.9003122351905815e-06, "loss": 0.0006, "step": 168940 }, { "epoch": 1.1114912205680152, "grad_norm": 0.007446831655770774, "learning_rate": 4.899738238993939e-06, "loss": 0.001, "step": 168950 }, { "epoch": 1.1115570087432485, "grad_norm": 0.12798387627734695, "learning_rate": 4.8991642441191605e-06, "loss": 0.0009, "step": 168960 }, { "epoch": 1.111622796918482, "grad_norm": 0.016903585915760726, "learning_rate": 4.898590250573816e-06, "loss": 0.0006, "step": 168970 }, { "epoch": 1.1116885850937153, "grad_norm": 0.07438256544019185, "learning_rate": 4.898016258365469e-06, "loss": 0.0003, "step": 168980 }, { "epoch": 1.1117543732689485, "grad_norm": 0.026652137227408185, "learning_rate": 4.897442267501692e-06, "loss": 0.0011, "step": 168990 }, { "epoch": 1.111820161444182, "grad_norm": 0.05306111910637828, "learning_rate": 4.896868277990047e-06, "loss": 0.0011, "step": 169000 }, { "epoch": 1.1118859496194153, "grad_norm": 0.05330946650215452, "learning_rate": 4.896294289838109e-06, "loss": 0.0012, "step": 169010 }, { "epoch": 1.1119517377946488, "grad_norm": 0.023454375414694733, "learning_rate": 4.895720303053439e-06, "loss": 0.0004, "step": 169020 }, { "epoch": 1.1120175259698821, "grad_norm": 0.10117938640414505, "learning_rate": 4.895146317643607e-06, "loss": 0.0008, "step": 169030 }, { "epoch": 1.1120833141451156, "grad_norm": 0.026034257582567936, "learning_rate": 4.894572333616178e-06, "loss": 0.0011, "step": 169040 }, { "epoch": 1.112149102320349, "grad_norm": 0.11923171793949516, "learning_rate": 4.893998350978725e-06, "loss": 0.0007, "step": 169050 }, { "epoch": 1.1122148904955824, "grad_norm": 0.08889354511522378, "learning_rate": 4.89342436973881e-06, "loss": 0.0007, "step": 169060 }, { "epoch": 1.1122806786708157, "grad_norm": 0.011737674205633824, "learning_rate": 4.892850389904004e-06, "loss": 0.0009, "step": 169070 }, { "epoch": 1.112346466846049, "grad_norm": 0.10521353860428108, "learning_rate": 4.892276411481873e-06, "loss": 0.0007, "step": 169080 }, { "epoch": 1.1124122550212825, "grad_norm": 0.06697323262571357, "learning_rate": 4.891702434479983e-06, "loss": 0.0008, "step": 169090 }, { "epoch": 1.1124780431965158, "grad_norm": 0.012716470295599868, "learning_rate": 4.891128458905905e-06, "loss": 0.0011, "step": 169100 }, { "epoch": 1.1125438313717493, "grad_norm": 0.013968045121145627, "learning_rate": 4.890554484767202e-06, "loss": 0.0008, "step": 169110 }, { "epoch": 1.1126096195469826, "grad_norm": 0.030567751686952128, "learning_rate": 4.889980512071444e-06, "loss": 0.0012, "step": 169120 }, { "epoch": 1.112675407722216, "grad_norm": 0.05040329070320866, "learning_rate": 4.889406540826199e-06, "loss": 0.0005, "step": 169130 }, { "epoch": 1.1127411958974494, "grad_norm": 0.0019080255727664613, "learning_rate": 4.888832571039033e-06, "loss": 0.0013, "step": 169140 }, { "epoch": 1.1128069840726829, "grad_norm": 0.013102739724316992, "learning_rate": 4.888258602717514e-06, "loss": 0.0008, "step": 169150 }, { "epoch": 1.1128727722479161, "grad_norm": 0.005691634830894523, "learning_rate": 4.887684635869208e-06, "loss": 0.0005, "step": 169160 }, { "epoch": 1.1129385604231494, "grad_norm": 0.020228908871074945, "learning_rate": 4.8871106705016825e-06, "loss": 0.0006, "step": 169170 }, { "epoch": 1.113004348598383, "grad_norm": 0.0008025453651420288, "learning_rate": 4.886536706622506e-06, "loss": 0.0009, "step": 169180 }, { "epoch": 1.1130701367736162, "grad_norm": 0.07496719174042424, "learning_rate": 4.885962744239245e-06, "loss": 0.0013, "step": 169190 }, { "epoch": 1.1131359249488497, "grad_norm": 0.007704220552543012, "learning_rate": 4.885388783359467e-06, "loss": 0.0006, "step": 169200 }, { "epoch": 1.113201713124083, "grad_norm": 0.023656682512351403, "learning_rate": 4.884814823990739e-06, "loss": 0.0005, "step": 169210 }, { "epoch": 1.1132675012993165, "grad_norm": 0.007341615815673693, "learning_rate": 4.884240866140627e-06, "loss": 0.0005, "step": 169220 }, { "epoch": 1.1133332894745498, "grad_norm": 0.0027992958108945414, "learning_rate": 4.883666909816701e-06, "loss": 0.0011, "step": 169230 }, { "epoch": 1.1133990776497833, "grad_norm": 0.04104706775390637, "learning_rate": 4.883092955026525e-06, "loss": 0.0003, "step": 169240 }, { "epoch": 1.1134648658250166, "grad_norm": 0.0005713749908948816, "learning_rate": 4.882519001777668e-06, "loss": 0.0008, "step": 169250 }, { "epoch": 1.11353065400025, "grad_norm": 0.0011097672260283191, "learning_rate": 4.881945050077695e-06, "loss": 0.0025, "step": 169260 }, { "epoch": 1.1135964421754834, "grad_norm": 0.06272154814668696, "learning_rate": 4.881371099934176e-06, "loss": 0.0008, "step": 169270 }, { "epoch": 1.1136622303507169, "grad_norm": 0.0017605395848866044, "learning_rate": 4.880797151354677e-06, "loss": 0.0008, "step": 169280 }, { "epoch": 1.1137280185259502, "grad_norm": 0.07725455906995797, "learning_rate": 4.880223204346764e-06, "loss": 0.0021, "step": 169290 }, { "epoch": 1.1137938067011834, "grad_norm": 0.02282420610278919, "learning_rate": 4.879649258918004e-06, "loss": 0.0004, "step": 169300 }, { "epoch": 1.113859594876417, "grad_norm": 0.0008145177438508515, "learning_rate": 4.879075315075965e-06, "loss": 0.0008, "step": 169310 }, { "epoch": 1.1139253830516502, "grad_norm": 0.030817753704363914, "learning_rate": 4.878501372828214e-06, "loss": 0.0008, "step": 169320 }, { "epoch": 1.1139911712268837, "grad_norm": 0.03950441359500377, "learning_rate": 4.877927432182317e-06, "loss": 0.0006, "step": 169330 }, { "epoch": 1.114056959402117, "grad_norm": 0.04489157680546916, "learning_rate": 4.877353493145842e-06, "loss": 0.0011, "step": 169340 }, { "epoch": 1.1141227475773505, "grad_norm": 0.05904686724368147, "learning_rate": 4.8767795557263546e-06, "loss": 0.0007, "step": 169350 }, { "epoch": 1.1141885357525838, "grad_norm": 0.05683932127743057, "learning_rate": 4.876205619931423e-06, "loss": 0.0016, "step": 169360 }, { "epoch": 1.1142543239278173, "grad_norm": 0.006509756646529739, "learning_rate": 4.875631685768612e-06, "loss": 0.0006, "step": 169370 }, { "epoch": 1.1143201121030506, "grad_norm": 0.014429411521516002, "learning_rate": 4.875057753245491e-06, "loss": 0.0005, "step": 169380 }, { "epoch": 1.1143859002782839, "grad_norm": 0.0005218232476345129, "learning_rate": 4.874483822369625e-06, "loss": 0.0005, "step": 169390 }, { "epoch": 1.1144516884535174, "grad_norm": 0.1235193485164768, "learning_rate": 4.8739098931485825e-06, "loss": 0.0009, "step": 169400 }, { "epoch": 1.1145174766287507, "grad_norm": 0.10677248448136431, "learning_rate": 4.873335965589928e-06, "loss": 0.0008, "step": 169410 }, { "epoch": 1.1145832648039842, "grad_norm": 0.01228683678761711, "learning_rate": 4.87276203970123e-06, "loss": 0.0005, "step": 169420 }, { "epoch": 1.1146490529792175, "grad_norm": 0.006646751136857095, "learning_rate": 4.872188115490053e-06, "loss": 0.0011, "step": 169430 }, { "epoch": 1.114714841154451, "grad_norm": 0.004316770132920476, "learning_rate": 4.8716141929639674e-06, "loss": 0.0008, "step": 169440 }, { "epoch": 1.1147806293296842, "grad_norm": 0.04586544869260268, "learning_rate": 4.871040272130537e-06, "loss": 0.0005, "step": 169450 }, { "epoch": 1.1148464175049178, "grad_norm": 0.048053120317255275, "learning_rate": 4.87046635299733e-06, "loss": 0.0006, "step": 169460 }, { "epoch": 1.114912205680151, "grad_norm": 0.014290532218525434, "learning_rate": 4.869892435571912e-06, "loss": 0.0006, "step": 169470 }, { "epoch": 1.1149779938553843, "grad_norm": 0.011937043164531612, "learning_rate": 4.869318519861849e-06, "loss": 0.0011, "step": 169480 }, { "epoch": 1.1150437820306178, "grad_norm": 0.022206397342976653, "learning_rate": 4.868744605874709e-06, "loss": 0.0007, "step": 169490 }, { "epoch": 1.115109570205851, "grad_norm": 0.01343142768266586, "learning_rate": 4.868170693618058e-06, "loss": 0.0011, "step": 169500 }, { "epoch": 1.1151753583810846, "grad_norm": 0.03591985258556253, "learning_rate": 4.867596783099462e-06, "loss": 0.0007, "step": 169510 }, { "epoch": 1.115241146556318, "grad_norm": 0.00041815280679205214, "learning_rate": 4.867022874326489e-06, "loss": 0.0003, "step": 169520 }, { "epoch": 1.1153069347315514, "grad_norm": 0.01228718935094312, "learning_rate": 4.866448967306704e-06, "loss": 0.0007, "step": 169530 }, { "epoch": 1.1153727229067847, "grad_norm": 0.04657686729998327, "learning_rate": 4.865875062047675e-06, "loss": 0.0012, "step": 169540 }, { "epoch": 1.1154385110820182, "grad_norm": 0.14073993344547386, "learning_rate": 4.8653011585569665e-06, "loss": 0.0014, "step": 169550 }, { "epoch": 1.1155042992572515, "grad_norm": 0.07889914344771469, "learning_rate": 4.864727256842145e-06, "loss": 0.0014, "step": 169560 }, { "epoch": 1.115570087432485, "grad_norm": 0.01568110522659132, "learning_rate": 4.864153356910779e-06, "loss": 0.0004, "step": 169570 }, { "epoch": 1.1156358756077183, "grad_norm": 0.025029772008991587, "learning_rate": 4.863579458770433e-06, "loss": 0.0007, "step": 169580 }, { "epoch": 1.1157016637829518, "grad_norm": 0.04506521647486248, "learning_rate": 4.863005562428675e-06, "loss": 0.0013, "step": 169590 }, { "epoch": 1.115767451958185, "grad_norm": 0.09817419576820301, "learning_rate": 4.86243166789307e-06, "loss": 0.0016, "step": 169600 }, { "epoch": 1.1158332401334183, "grad_norm": 0.027994309058319816, "learning_rate": 4.861857775171184e-06, "loss": 0.001, "step": 169610 }, { "epoch": 1.1158990283086518, "grad_norm": 0.041753046651354986, "learning_rate": 4.8612838842705854e-06, "loss": 0.0007, "step": 169620 }, { "epoch": 1.1159648164838851, "grad_norm": 0.030296640102223896, "learning_rate": 4.860709995198837e-06, "loss": 0.0015, "step": 169630 }, { "epoch": 1.1160306046591186, "grad_norm": 0.09458281506825982, "learning_rate": 4.860136107963508e-06, "loss": 0.0008, "step": 169640 }, { "epoch": 1.116096392834352, "grad_norm": 0.022683214673067914, "learning_rate": 4.8595622225721635e-06, "loss": 0.0006, "step": 169650 }, { "epoch": 1.1161621810095854, "grad_norm": 0.1396259940340442, "learning_rate": 4.85898833903237e-06, "loss": 0.0017, "step": 169660 }, { "epoch": 1.1162279691848187, "grad_norm": 0.01244563794509533, "learning_rate": 4.858414457351694e-06, "loss": 0.0011, "step": 169670 }, { "epoch": 1.1162937573600522, "grad_norm": 0.011656378283727684, "learning_rate": 4.8578405775377e-06, "loss": 0.0004, "step": 169680 }, { "epoch": 1.1163595455352855, "grad_norm": 0.011373442280465126, "learning_rate": 4.857266699597955e-06, "loss": 0.0005, "step": 169690 }, { "epoch": 1.1164253337105188, "grad_norm": 0.12861048732338, "learning_rate": 4.856692823540026e-06, "loss": 0.0006, "step": 169700 }, { "epoch": 1.1164911218857523, "grad_norm": 0.04628210152811438, "learning_rate": 4.856118949371479e-06, "loss": 0.0004, "step": 169710 }, { "epoch": 1.1165569100609856, "grad_norm": 7.269239661305232e-05, "learning_rate": 4.855545077099878e-06, "loss": 0.0005, "step": 169720 }, { "epoch": 1.116622698236219, "grad_norm": 0.02235400177407108, "learning_rate": 4.854971206732792e-06, "loss": 0.0006, "step": 169730 }, { "epoch": 1.1166884864114524, "grad_norm": 0.02312042368087368, "learning_rate": 4.854397338277784e-06, "loss": 0.0014, "step": 169740 }, { "epoch": 1.1167542745866859, "grad_norm": 0.0002948916756558423, "learning_rate": 4.853823471742423e-06, "loss": 0.0005, "step": 169750 }, { "epoch": 1.1168200627619191, "grad_norm": 0.08087112468591438, "learning_rate": 4.85324960713427e-06, "loss": 0.0009, "step": 169760 }, { "epoch": 1.1168858509371526, "grad_norm": 0.036380493617702604, "learning_rate": 4.852675744460897e-06, "loss": 0.0008, "step": 169770 }, { "epoch": 1.116951639112386, "grad_norm": 0.0019269354090811132, "learning_rate": 4.852101883729867e-06, "loss": 0.0008, "step": 169780 }, { "epoch": 1.1170174272876192, "grad_norm": 0.05046567469318648, "learning_rate": 4.851528024948745e-06, "loss": 0.0006, "step": 169790 }, { "epoch": 1.1170832154628527, "grad_norm": 0.06339976736819752, "learning_rate": 4.850954168125098e-06, "loss": 0.001, "step": 169800 }, { "epoch": 1.117149003638086, "grad_norm": 0.010119911655874605, "learning_rate": 4.850380313266492e-06, "loss": 0.0005, "step": 169810 }, { "epoch": 1.1172147918133195, "grad_norm": 0.03823437651796919, "learning_rate": 4.849806460380491e-06, "loss": 0.0008, "step": 169820 }, { "epoch": 1.1172805799885528, "grad_norm": 0.030354125839099536, "learning_rate": 4.849232609474663e-06, "loss": 0.0003, "step": 169830 }, { "epoch": 1.1173463681637863, "grad_norm": 0.002375797598169485, "learning_rate": 4.848658760556574e-06, "loss": 0.001, "step": 169840 }, { "epoch": 1.1174121563390196, "grad_norm": 0.00629181898814353, "learning_rate": 4.848084913633787e-06, "loss": 0.0006, "step": 169850 }, { "epoch": 1.117477944514253, "grad_norm": 0.015281334336295097, "learning_rate": 4.84751106871387e-06, "loss": 0.002, "step": 169860 }, { "epoch": 1.1175437326894864, "grad_norm": 0.003698693298074135, "learning_rate": 4.846937225804388e-06, "loss": 0.0016, "step": 169870 }, { "epoch": 1.1176095208647199, "grad_norm": 0.036752623509038324, "learning_rate": 4.846363384912906e-06, "loss": 0.0004, "step": 169880 }, { "epoch": 1.1176753090399532, "grad_norm": 0.034385150238549156, "learning_rate": 4.8457895460469885e-06, "loss": 0.0059, "step": 169890 }, { "epoch": 1.1177410972151864, "grad_norm": 0.14179685300861444, "learning_rate": 4.845215709214205e-06, "loss": 0.0007, "step": 169900 }, { "epoch": 1.11780688539042, "grad_norm": 0.0636789954648594, "learning_rate": 4.844641874422118e-06, "loss": 0.001, "step": 169910 }, { "epoch": 1.1178726735656532, "grad_norm": 0.03088507737916538, "learning_rate": 4.844068041678293e-06, "loss": 0.0006, "step": 169920 }, { "epoch": 1.1179384617408867, "grad_norm": 0.04241799872537933, "learning_rate": 4.843494210990299e-06, "loss": 0.0006, "step": 169930 }, { "epoch": 1.11800424991612, "grad_norm": 0.00961046296703481, "learning_rate": 4.842920382365696e-06, "loss": 0.0012, "step": 169940 }, { "epoch": 1.1180700380913535, "grad_norm": 0.07866093624272691, "learning_rate": 4.842346555812051e-06, "loss": 0.0023, "step": 169950 }, { "epoch": 1.1181358262665868, "grad_norm": 0.004449834424609354, "learning_rate": 4.841772731336933e-06, "loss": 0.0007, "step": 169960 }, { "epoch": 1.1182016144418203, "grad_norm": 0.003026863213485133, "learning_rate": 4.841198908947904e-06, "loss": 0.0003, "step": 169970 }, { "epoch": 1.1182674026170536, "grad_norm": 0.053638603614012986, "learning_rate": 4.84062508865253e-06, "loss": 0.0005, "step": 169980 }, { "epoch": 1.118333190792287, "grad_norm": 0.023657120895304286, "learning_rate": 4.840051270458377e-06, "loss": 0.0007, "step": 169990 }, { "epoch": 1.1183989789675204, "grad_norm": 0.025593162539019526, "learning_rate": 4.83947745437301e-06, "loss": 0.001, "step": 170000 }, { "epoch": 1.1184647671427537, "grad_norm": 0.053396278297223355, "learning_rate": 4.838903640403994e-06, "loss": 0.0009, "step": 170010 }, { "epoch": 1.1185305553179872, "grad_norm": 0.023854636151466657, "learning_rate": 4.838329828558893e-06, "loss": 0.0008, "step": 170020 }, { "epoch": 1.1185963434932205, "grad_norm": 0.04849208591385066, "learning_rate": 4.8377560188452745e-06, "loss": 0.0005, "step": 170030 }, { "epoch": 1.118662131668454, "grad_norm": 0.09682512640183681, "learning_rate": 4.837182211270703e-06, "loss": 0.0007, "step": 170040 }, { "epoch": 1.1187279198436872, "grad_norm": 0.0042449297142801955, "learning_rate": 4.836608405842744e-06, "loss": 0.0006, "step": 170050 }, { "epoch": 1.1187937080189208, "grad_norm": 0.0031159387688542285, "learning_rate": 4.836034602568962e-06, "loss": 0.0007, "step": 170060 }, { "epoch": 1.118859496194154, "grad_norm": 0.019363241040449144, "learning_rate": 4.835460801456922e-06, "loss": 0.0009, "step": 170070 }, { "epoch": 1.1189252843693875, "grad_norm": 0.07463949526936675, "learning_rate": 4.834887002514187e-06, "loss": 0.0013, "step": 170080 }, { "epoch": 1.1189910725446208, "grad_norm": 0.0769918071249026, "learning_rate": 4.8343132057483265e-06, "loss": 0.001, "step": 170090 }, { "epoch": 1.119056860719854, "grad_norm": 0.022680125043272364, "learning_rate": 4.8337394111669035e-06, "loss": 0.0008, "step": 170100 }, { "epoch": 1.1191226488950876, "grad_norm": 0.044783508477875565, "learning_rate": 4.833165618777482e-06, "loss": 0.0014, "step": 170110 }, { "epoch": 1.119188437070321, "grad_norm": 0.05246086868248202, "learning_rate": 4.832591828587628e-06, "loss": 0.0005, "step": 170120 }, { "epoch": 1.1192542252455544, "grad_norm": 0.04836848785509337, "learning_rate": 4.832018040604907e-06, "loss": 0.0012, "step": 170130 }, { "epoch": 1.1193200134207877, "grad_norm": 0.05641647999369876, "learning_rate": 4.831444254836882e-06, "loss": 0.0004, "step": 170140 }, { "epoch": 1.1193858015960212, "grad_norm": 0.09584913905130499, "learning_rate": 4.830870471291118e-06, "loss": 0.0011, "step": 170150 }, { "epoch": 1.1194515897712545, "grad_norm": 0.04941656387147279, "learning_rate": 4.830296689975182e-06, "loss": 0.0008, "step": 170160 }, { "epoch": 1.119517377946488, "grad_norm": 0.07625238792765311, "learning_rate": 4.829722910896638e-06, "loss": 0.0007, "step": 170170 }, { "epoch": 1.1195831661217213, "grad_norm": 0.022313674229645693, "learning_rate": 4.8291491340630495e-06, "loss": 0.0004, "step": 170180 }, { "epoch": 1.1196489542969548, "grad_norm": 0.03706514762634671, "learning_rate": 4.828575359481982e-06, "loss": 0.0004, "step": 170190 }, { "epoch": 1.119714742472188, "grad_norm": 0.0011723517652440704, "learning_rate": 4.828001587161002e-06, "loss": 0.0005, "step": 170200 }, { "epoch": 1.1197805306474213, "grad_norm": 0.02620412591477314, "learning_rate": 4.82742781710767e-06, "loss": 0.0015, "step": 170210 }, { "epoch": 1.1198463188226548, "grad_norm": 0.011011062943203085, "learning_rate": 4.826854049329554e-06, "loss": 0.0014, "step": 170220 }, { "epoch": 1.1199121069978881, "grad_norm": 0.014292352688093037, "learning_rate": 4.826280283834219e-06, "loss": 0.0005, "step": 170230 }, { "epoch": 1.1199778951731216, "grad_norm": 0.029483777155069564, "learning_rate": 4.825706520629228e-06, "loss": 0.0015, "step": 170240 }, { "epoch": 1.120043683348355, "grad_norm": 0.014233181259289521, "learning_rate": 4.825132759722145e-06, "loss": 0.0009, "step": 170250 }, { "epoch": 1.1201094715235884, "grad_norm": 0.07906957672589941, "learning_rate": 4.824559001120536e-06, "loss": 0.0007, "step": 170260 }, { "epoch": 1.1201752596988217, "grad_norm": 0.10976601068466126, "learning_rate": 4.823985244831966e-06, "loss": 0.0012, "step": 170270 }, { "epoch": 1.1202410478740552, "grad_norm": 0.002510285412360963, "learning_rate": 4.823411490863997e-06, "loss": 0.0008, "step": 170280 }, { "epoch": 1.1203068360492885, "grad_norm": 0.09656227312277307, "learning_rate": 4.822837739224196e-06, "loss": 0.0011, "step": 170290 }, { "epoch": 1.120372624224522, "grad_norm": 0.12756504121355647, "learning_rate": 4.822263989920126e-06, "loss": 0.0009, "step": 170300 }, { "epoch": 1.1204384123997553, "grad_norm": 0.04867085012207391, "learning_rate": 4.821690242959352e-06, "loss": 0.0005, "step": 170310 }, { "epoch": 1.1205042005749886, "grad_norm": 0.039901864144217314, "learning_rate": 4.821116498349438e-06, "loss": 0.0006, "step": 170320 }, { "epoch": 1.120569988750222, "grad_norm": 0.13634625792018548, "learning_rate": 4.8205427560979496e-06, "loss": 0.0006, "step": 170330 }, { "epoch": 1.1206357769254554, "grad_norm": 0.028490078187852375, "learning_rate": 4.8199690162124476e-06, "loss": 0.0006, "step": 170340 }, { "epoch": 1.1207015651006889, "grad_norm": 0.145734827443931, "learning_rate": 4.8193952787005005e-06, "loss": 0.0009, "step": 170350 }, { "epoch": 1.1207673532759221, "grad_norm": 0.0031316578903888615, "learning_rate": 4.81882154356967e-06, "loss": 0.0008, "step": 170360 }, { "epoch": 1.1208331414511556, "grad_norm": 0.11829093117183212, "learning_rate": 4.818247810827521e-06, "loss": 0.0007, "step": 170370 }, { "epoch": 1.120898929626389, "grad_norm": 0.052724909188087296, "learning_rate": 4.817674080481618e-06, "loss": 0.0014, "step": 170380 }, { "epoch": 1.1209647178016224, "grad_norm": 0.0351015686865451, "learning_rate": 4.817100352539525e-06, "loss": 0.0007, "step": 170390 }, { "epoch": 1.1210305059768557, "grad_norm": 0.018406633980579068, "learning_rate": 4.8165266270088054e-06, "loss": 0.0009, "step": 170400 }, { "epoch": 1.121096294152089, "grad_norm": 0.02780693533469095, "learning_rate": 4.815952903897023e-06, "loss": 0.001, "step": 170410 }, { "epoch": 1.1211620823273225, "grad_norm": 0.03625899807495158, "learning_rate": 4.815379183211744e-06, "loss": 0.001, "step": 170420 }, { "epoch": 1.1212278705025558, "grad_norm": 0.03906559835690602, "learning_rate": 4.814805464960531e-06, "loss": 0.0006, "step": 170430 }, { "epoch": 1.1212936586777893, "grad_norm": 0.10302743638950526, "learning_rate": 4.814231749150947e-06, "loss": 0.0009, "step": 170440 }, { "epoch": 1.1213594468530226, "grad_norm": 0.006756625932515889, "learning_rate": 4.8136580357905575e-06, "loss": 0.0012, "step": 170450 }, { "epoch": 1.121425235028256, "grad_norm": 0.014212821035627392, "learning_rate": 4.813084324886926e-06, "loss": 0.0018, "step": 170460 }, { "epoch": 1.1214910232034894, "grad_norm": 0.04936557644350197, "learning_rate": 4.812510616447615e-06, "loss": 0.0007, "step": 170470 }, { "epoch": 1.1215568113787229, "grad_norm": 0.015069400526159337, "learning_rate": 4.8119369104801905e-06, "loss": 0.0008, "step": 170480 }, { "epoch": 1.1216225995539562, "grad_norm": 0.10281719210339053, "learning_rate": 4.811363206992215e-06, "loss": 0.0008, "step": 170490 }, { "epoch": 1.1216883877291894, "grad_norm": 0.0050855212616060345, "learning_rate": 4.810789505991254e-06, "loss": 0.0009, "step": 170500 }, { "epoch": 1.121754175904423, "grad_norm": 0.026748479805149602, "learning_rate": 4.810215807484869e-06, "loss": 0.0008, "step": 170510 }, { "epoch": 1.1218199640796562, "grad_norm": 0.09980792995751055, "learning_rate": 4.809642111480626e-06, "loss": 0.0024, "step": 170520 }, { "epoch": 1.1218857522548897, "grad_norm": 0.06836947468220857, "learning_rate": 4.809068417986086e-06, "loss": 0.0013, "step": 170530 }, { "epoch": 1.121951540430123, "grad_norm": 0.004495223707296298, "learning_rate": 4.808494727008813e-06, "loss": 0.0011, "step": 170540 }, { "epoch": 1.1220173286053565, "grad_norm": 0.05502379269398063, "learning_rate": 4.807921038556373e-06, "loss": 0.0005, "step": 170550 }, { "epoch": 1.1220831167805898, "grad_norm": 0.0006547498433615011, "learning_rate": 4.807347352636328e-06, "loss": 0.0008, "step": 170560 }, { "epoch": 1.1221489049558233, "grad_norm": 0.012165921491998797, "learning_rate": 4.806773669256242e-06, "loss": 0.0012, "step": 170570 }, { "epoch": 1.1222146931310566, "grad_norm": 0.01119511796006218, "learning_rate": 4.8061999884236785e-06, "loss": 0.0006, "step": 170580 }, { "epoch": 1.12228048130629, "grad_norm": 0.057378066172873984, "learning_rate": 4.8056263101462006e-06, "loss": 0.0007, "step": 170590 }, { "epoch": 1.1223462694815234, "grad_norm": 0.09573785783409791, "learning_rate": 4.80505263443137e-06, "loss": 0.0012, "step": 170600 }, { "epoch": 1.122412057656757, "grad_norm": 0.03455072754186554, "learning_rate": 4.804478961286754e-06, "loss": 0.0005, "step": 170610 }, { "epoch": 1.1224778458319902, "grad_norm": 0.05616604254267565, "learning_rate": 4.803905290719914e-06, "loss": 0.0009, "step": 170620 }, { "epoch": 1.1225436340072235, "grad_norm": 0.024215426896930153, "learning_rate": 4.803331622738414e-06, "loss": 0.0009, "step": 170630 }, { "epoch": 1.122609422182457, "grad_norm": 0.04029989269495753, "learning_rate": 4.802757957349816e-06, "loss": 0.0008, "step": 170640 }, { "epoch": 1.1226752103576902, "grad_norm": 0.0035107575585986896, "learning_rate": 4.802184294561684e-06, "loss": 0.0009, "step": 170650 }, { "epoch": 1.1227409985329238, "grad_norm": 0.0033039281034698124, "learning_rate": 4.801610634381581e-06, "loss": 0.0005, "step": 170660 }, { "epoch": 1.122806786708157, "grad_norm": 0.02472160454538992, "learning_rate": 4.80103697681707e-06, "loss": 0.001, "step": 170670 }, { "epoch": 1.1228725748833905, "grad_norm": 0.00035382128701700146, "learning_rate": 4.800463321875715e-06, "loss": 0.0006, "step": 170680 }, { "epoch": 1.1229383630586238, "grad_norm": 0.016387691658324998, "learning_rate": 4.799889669565079e-06, "loss": 0.0008, "step": 170690 }, { "epoch": 1.1230041512338573, "grad_norm": 0.028704959574296834, "learning_rate": 4.799316019892725e-06, "loss": 0.0007, "step": 170700 }, { "epoch": 1.1230699394090906, "grad_norm": 0.09348308629374368, "learning_rate": 4.798742372866216e-06, "loss": 0.0011, "step": 170710 }, { "epoch": 1.123135727584324, "grad_norm": 0.016120573742010157, "learning_rate": 4.798168728493115e-06, "loss": 0.0008, "step": 170720 }, { "epoch": 1.1232015157595574, "grad_norm": 0.007935281679457158, "learning_rate": 4.797595086780984e-06, "loss": 0.0008, "step": 170730 }, { "epoch": 1.1232673039347907, "grad_norm": 0.020189152844530524, "learning_rate": 4.7970214477373875e-06, "loss": 0.0008, "step": 170740 }, { "epoch": 1.1233330921100242, "grad_norm": 0.0005320580939626908, "learning_rate": 4.7964478113698885e-06, "loss": 0.0013, "step": 170750 }, { "epoch": 1.1233988802852575, "grad_norm": 0.09602118187737797, "learning_rate": 4.795874177686048e-06, "loss": 0.0004, "step": 170760 }, { "epoch": 1.123464668460491, "grad_norm": 0.0518107131666403, "learning_rate": 4.795300546693432e-06, "loss": 0.001, "step": 170770 }, { "epoch": 1.1235304566357243, "grad_norm": 0.024842436239086267, "learning_rate": 4.7947269183996e-06, "loss": 0.0011, "step": 170780 }, { "epoch": 1.1235962448109578, "grad_norm": 0.019177610165875197, "learning_rate": 4.7941532928121176e-06, "loss": 0.0009, "step": 170790 }, { "epoch": 1.123662032986191, "grad_norm": 0.028126121932199936, "learning_rate": 4.793579669938545e-06, "loss": 0.0012, "step": 170800 }, { "epoch": 1.1237278211614243, "grad_norm": 0.06381575211381463, "learning_rate": 4.793006049786446e-06, "loss": 0.0006, "step": 170810 }, { "epoch": 1.1237936093366578, "grad_norm": 0.0169138870935943, "learning_rate": 4.792432432363385e-06, "loss": 0.0008, "step": 170820 }, { "epoch": 1.1238593975118911, "grad_norm": 0.026682963990039565, "learning_rate": 4.791858817676922e-06, "loss": 0.0007, "step": 170830 }, { "epoch": 1.1239251856871246, "grad_norm": 0.013404107043455758, "learning_rate": 4.791285205734621e-06, "loss": 0.0007, "step": 170840 }, { "epoch": 1.123990973862358, "grad_norm": 0.01308230618290717, "learning_rate": 4.790711596544045e-06, "loss": 0.0008, "step": 170850 }, { "epoch": 1.1240567620375914, "grad_norm": 0.06870569709498506, "learning_rate": 4.7901379901127545e-06, "loss": 0.0012, "step": 170860 }, { "epoch": 1.1241225502128247, "grad_norm": 0.019383351383715636, "learning_rate": 4.789564386448313e-06, "loss": 0.0012, "step": 170870 }, { "epoch": 1.1241883383880582, "grad_norm": 0.003404697850656014, "learning_rate": 4.7889907855582855e-06, "loss": 0.0035, "step": 170880 }, { "epoch": 1.1242541265632915, "grad_norm": 0.038556684682648244, "learning_rate": 4.788417187450231e-06, "loss": 0.0009, "step": 170890 }, { "epoch": 1.124319914738525, "grad_norm": 0.021854721094563132, "learning_rate": 4.787843592131714e-06, "loss": 0.0007, "step": 170900 }, { "epoch": 1.1243857029137583, "grad_norm": 0.033763603174734126, "learning_rate": 4.7872699996102966e-06, "loss": 0.0005, "step": 170910 }, { "epoch": 1.1244514910889916, "grad_norm": 0.06831778218217671, "learning_rate": 4.786696409893539e-06, "loss": 0.0006, "step": 170920 }, { "epoch": 1.124517279264225, "grad_norm": 0.021908475645236812, "learning_rate": 4.786122822989006e-06, "loss": 0.0009, "step": 170930 }, { "epoch": 1.1245830674394584, "grad_norm": 0.04905975530191773, "learning_rate": 4.785549238904257e-06, "loss": 0.0008, "step": 170940 }, { "epoch": 1.1246488556146919, "grad_norm": 0.012699819150867639, "learning_rate": 4.784975657646858e-06, "loss": 0.0004, "step": 170950 }, { "epoch": 1.1247146437899251, "grad_norm": 0.008030394941882612, "learning_rate": 4.784402079224369e-06, "loss": 0.0003, "step": 170960 }, { "epoch": 1.1247804319651586, "grad_norm": 0.038044509243983275, "learning_rate": 4.78382850364435e-06, "loss": 0.0006, "step": 170970 }, { "epoch": 1.124846220140392, "grad_norm": 0.06770312234416082, "learning_rate": 4.783254930914368e-06, "loss": 0.0006, "step": 170980 }, { "epoch": 1.1249120083156254, "grad_norm": 0.025306608453462415, "learning_rate": 4.782681361041981e-06, "loss": 0.0008, "step": 170990 }, { "epoch": 1.1249777964908587, "grad_norm": 0.010552484267567096, "learning_rate": 4.782107794034754e-06, "loss": 0.0004, "step": 171000 }, { "epoch": 1.1250435846660922, "grad_norm": 0.0925921401477701, "learning_rate": 4.781534229900247e-06, "loss": 0.0012, "step": 171010 }, { "epoch": 1.1251093728413255, "grad_norm": 0.043202427263608664, "learning_rate": 4.780960668646023e-06, "loss": 0.0006, "step": 171020 }, { "epoch": 1.1251751610165588, "grad_norm": 0.03170778798712204, "learning_rate": 4.780387110279642e-06, "loss": 0.0004, "step": 171030 }, { "epoch": 1.1252409491917923, "grad_norm": 0.055207099032074075, "learning_rate": 4.779813554808668e-06, "loss": 0.0012, "step": 171040 }, { "epoch": 1.1253067373670256, "grad_norm": 0.013136629497603807, "learning_rate": 4.779240002240662e-06, "loss": 0.0008, "step": 171050 }, { "epoch": 1.125372525542259, "grad_norm": 0.005526725321336701, "learning_rate": 4.778666452583187e-06, "loss": 0.0006, "step": 171060 }, { "epoch": 1.1254383137174924, "grad_norm": 0.012188879985918536, "learning_rate": 4.778092905843802e-06, "loss": 0.0005, "step": 171070 }, { "epoch": 1.1255041018927259, "grad_norm": 0.03203989132540941, "learning_rate": 4.777519362030072e-06, "loss": 0.0008, "step": 171080 }, { "epoch": 1.1255698900679592, "grad_norm": 0.05626521498636181, "learning_rate": 4.776945821149556e-06, "loss": 0.0006, "step": 171090 }, { "epoch": 1.1256356782431927, "grad_norm": 0.004183610082006725, "learning_rate": 4.776372283209816e-06, "loss": 0.0008, "step": 171100 }, { "epoch": 1.125701466418426, "grad_norm": 0.008277986414308623, "learning_rate": 4.775798748218416e-06, "loss": 0.0004, "step": 171110 }, { "epoch": 1.1257672545936592, "grad_norm": 0.012943347321882986, "learning_rate": 4.775225216182915e-06, "loss": 0.0005, "step": 171120 }, { "epoch": 1.1258330427688927, "grad_norm": 0.0060006222614587545, "learning_rate": 4.774651687110876e-06, "loss": 0.001, "step": 171130 }, { "epoch": 1.125898830944126, "grad_norm": 0.02586786859769408, "learning_rate": 4.774078161009859e-06, "loss": 0.0006, "step": 171140 }, { "epoch": 1.1259646191193595, "grad_norm": 0.0012393466536137535, "learning_rate": 4.773504637887427e-06, "loss": 0.0006, "step": 171150 }, { "epoch": 1.1260304072945928, "grad_norm": 0.016478825294820306, "learning_rate": 4.772931117751139e-06, "loss": 0.0002, "step": 171160 }, { "epoch": 1.1260961954698263, "grad_norm": 0.005613865122631704, "learning_rate": 4.772357600608561e-06, "loss": 0.0007, "step": 171170 }, { "epoch": 1.1261619836450596, "grad_norm": 0.004806770420913275, "learning_rate": 4.7717840864672495e-06, "loss": 0.0005, "step": 171180 }, { "epoch": 1.126227771820293, "grad_norm": 0.021158524280421573, "learning_rate": 4.771210575334769e-06, "loss": 0.0008, "step": 171190 }, { "epoch": 1.1262935599955264, "grad_norm": 0.030915734376491574, "learning_rate": 4.770637067218679e-06, "loss": 0.0006, "step": 171200 }, { "epoch": 1.1263593481707597, "grad_norm": 0.021564665375321666, "learning_rate": 4.77006356212654e-06, "loss": 0.0011, "step": 171210 }, { "epoch": 1.1264251363459932, "grad_norm": 0.005842502697868122, "learning_rate": 4.769490060065916e-06, "loss": 0.001, "step": 171220 }, { "epoch": 1.1264909245212267, "grad_norm": 0.006124827592082013, "learning_rate": 4.768916561044365e-06, "loss": 0.0006, "step": 171230 }, { "epoch": 1.12655671269646, "grad_norm": 0.04131594911409434, "learning_rate": 4.76834306506945e-06, "loss": 0.0007, "step": 171240 }, { "epoch": 1.1266225008716932, "grad_norm": 0.008729924776584077, "learning_rate": 4.767769572148732e-06, "loss": 0.002, "step": 171250 }, { "epoch": 1.1266882890469267, "grad_norm": 0.008229428373158225, "learning_rate": 4.7671960822897715e-06, "loss": 0.0008, "step": 171260 }, { "epoch": 1.12675407722216, "grad_norm": 0.016661286666073447, "learning_rate": 4.76662259550013e-06, "loss": 0.0007, "step": 171270 }, { "epoch": 1.1268198653973935, "grad_norm": 0.0010409799003410097, "learning_rate": 4.766049111787368e-06, "loss": 0.0011, "step": 171280 }, { "epoch": 1.1268856535726268, "grad_norm": 0.0014328891600657954, "learning_rate": 4.765475631159045e-06, "loss": 0.0003, "step": 171290 }, { "epoch": 1.1269514417478603, "grad_norm": 0.09773039052759118, "learning_rate": 4.764902153622724e-06, "loss": 0.0017, "step": 171300 }, { "epoch": 1.1270172299230936, "grad_norm": 0.16573989734862718, "learning_rate": 4.764328679185966e-06, "loss": 0.0008, "step": 171310 }, { "epoch": 1.1270830180983271, "grad_norm": 0.016787494463534662, "learning_rate": 4.76375520785633e-06, "loss": 0.0004, "step": 171320 }, { "epoch": 1.1271488062735604, "grad_norm": 0.04238929217894447, "learning_rate": 4.763181739641378e-06, "loss": 0.0009, "step": 171330 }, { "epoch": 1.1272145944487937, "grad_norm": 0.03766018146818506, "learning_rate": 4.76260827454867e-06, "loss": 0.0011, "step": 171340 }, { "epoch": 1.1272803826240272, "grad_norm": 0.0644556556963166, "learning_rate": 4.7620348125857664e-06, "loss": 0.0009, "step": 171350 }, { "epoch": 1.1273461707992605, "grad_norm": 0.08545463230691053, "learning_rate": 4.761461353760228e-06, "loss": 0.0007, "step": 171360 }, { "epoch": 1.127411958974494, "grad_norm": 0.005135449639762217, "learning_rate": 4.760887898079617e-06, "loss": 0.0006, "step": 171370 }, { "epoch": 1.1274777471497273, "grad_norm": 0.010583735019374929, "learning_rate": 4.760314445551492e-06, "loss": 0.0007, "step": 171380 }, { "epoch": 1.1275435353249608, "grad_norm": 0.015237089042536317, "learning_rate": 4.759740996183414e-06, "loss": 0.0007, "step": 171390 }, { "epoch": 1.127609323500194, "grad_norm": 0.05780261857070951, "learning_rate": 4.759167549982944e-06, "loss": 0.0004, "step": 171400 }, { "epoch": 1.1276751116754276, "grad_norm": 0.07197709882863632, "learning_rate": 4.758594106957641e-06, "loss": 0.001, "step": 171410 }, { "epoch": 1.1277408998506608, "grad_norm": 0.06338455116247997, "learning_rate": 4.758020667115065e-06, "loss": 0.0015, "step": 171420 }, { "epoch": 1.1278066880258941, "grad_norm": 0.038907343245225476, "learning_rate": 4.75744723046278e-06, "loss": 0.0005, "step": 171430 }, { "epoch": 1.1278724762011276, "grad_norm": 0.009147816496644538, "learning_rate": 4.756873797008342e-06, "loss": 0.0014, "step": 171440 }, { "epoch": 1.127938264376361, "grad_norm": 0.04667816425821863, "learning_rate": 4.756300366759315e-06, "loss": 0.0009, "step": 171450 }, { "epoch": 1.1280040525515944, "grad_norm": 0.004343521358655398, "learning_rate": 4.755726939723256e-06, "loss": 0.0007, "step": 171460 }, { "epoch": 1.1280698407268277, "grad_norm": 0.07130121055542007, "learning_rate": 4.755153515907726e-06, "loss": 0.001, "step": 171470 }, { "epoch": 1.1281356289020612, "grad_norm": 0.009573577498076894, "learning_rate": 4.754580095320286e-06, "loss": 0.0004, "step": 171480 }, { "epoch": 1.1282014170772945, "grad_norm": 0.026862879310283437, "learning_rate": 4.754006677968494e-06, "loss": 0.001, "step": 171490 }, { "epoch": 1.128267205252528, "grad_norm": 0.01613809031323279, "learning_rate": 4.753433263859913e-06, "loss": 0.0011, "step": 171500 }, { "epoch": 1.1283329934277613, "grad_norm": 0.004070778850304605, "learning_rate": 4.7528598530021004e-06, "loss": 0.0006, "step": 171510 }, { "epoch": 1.1283987816029946, "grad_norm": 0.02373763601613755, "learning_rate": 4.752286445402619e-06, "loss": 0.001, "step": 171520 }, { "epoch": 1.128464569778228, "grad_norm": 0.06592891525107973, "learning_rate": 4.751713041069026e-06, "loss": 0.0008, "step": 171530 }, { "epoch": 1.1285303579534613, "grad_norm": 0.07358111541495303, "learning_rate": 4.751139640008882e-06, "loss": 0.0007, "step": 171540 }, { "epoch": 1.1285961461286949, "grad_norm": 0.021577317420632618, "learning_rate": 4.750566242229746e-06, "loss": 0.0005, "step": 171550 }, { "epoch": 1.1286619343039281, "grad_norm": 0.02254748821067102, "learning_rate": 4.74999284773918e-06, "loss": 0.0009, "step": 171560 }, { "epoch": 1.1287277224791616, "grad_norm": 0.010921685947641048, "learning_rate": 4.749419456544741e-06, "loss": 0.0008, "step": 171570 }, { "epoch": 1.128793510654395, "grad_norm": 0.05241373564813307, "learning_rate": 4.748846068653992e-06, "loss": 0.0007, "step": 171580 }, { "epoch": 1.1288592988296284, "grad_norm": 0.025918678080847756, "learning_rate": 4.74827268407449e-06, "loss": 0.001, "step": 171590 }, { "epoch": 1.1289250870048617, "grad_norm": 0.05777123230327475, "learning_rate": 4.747699302813795e-06, "loss": 0.0007, "step": 171600 }, { "epoch": 1.1289908751800952, "grad_norm": 0.0083093690709171, "learning_rate": 4.747125924879467e-06, "loss": 0.0008, "step": 171610 }, { "epoch": 1.1290566633553285, "grad_norm": 0.011612649121119364, "learning_rate": 4.746552550279063e-06, "loss": 0.0005, "step": 171620 }, { "epoch": 1.129122451530562, "grad_norm": 0.05874391724952977, "learning_rate": 4.7459791790201475e-06, "loss": 0.0005, "step": 171630 }, { "epoch": 1.1291882397057953, "grad_norm": 0.020954611888662055, "learning_rate": 4.745405811110276e-06, "loss": 0.0014, "step": 171640 }, { "epoch": 1.1292540278810286, "grad_norm": 0.017700182742610523, "learning_rate": 4.74483244655701e-06, "loss": 0.0009, "step": 171650 }, { "epoch": 1.129319816056262, "grad_norm": 0.0009123006500951744, "learning_rate": 4.744259085367907e-06, "loss": 0.0005, "step": 171660 }, { "epoch": 1.1293856042314954, "grad_norm": 0.06425321335614971, "learning_rate": 4.743685727550528e-06, "loss": 0.0005, "step": 171670 }, { "epoch": 1.1294513924067289, "grad_norm": 0.05160252401491622, "learning_rate": 4.743112373112429e-06, "loss": 0.0007, "step": 171680 }, { "epoch": 1.1295171805819622, "grad_norm": 0.008886826790818775, "learning_rate": 4.742539022061173e-06, "loss": 0.0006, "step": 171690 }, { "epoch": 1.1295829687571957, "grad_norm": 0.023012753796642558, "learning_rate": 4.741965674404319e-06, "loss": 0.0006, "step": 171700 }, { "epoch": 1.129648756932429, "grad_norm": 0.00026893250537741103, "learning_rate": 4.741392330149423e-06, "loss": 0.0006, "step": 171710 }, { "epoch": 1.1297145451076624, "grad_norm": 0.0007453731792844054, "learning_rate": 4.7408189893040464e-06, "loss": 0.0007, "step": 171720 }, { "epoch": 1.1297803332828957, "grad_norm": 0.05392758767974159, "learning_rate": 4.740245651875747e-06, "loss": 0.0033, "step": 171730 }, { "epoch": 1.129846121458129, "grad_norm": 0.006441034533456112, "learning_rate": 4.739672317872086e-06, "loss": 0.001, "step": 171740 }, { "epoch": 1.1299119096333625, "grad_norm": 0.019066735821271334, "learning_rate": 4.739098987300618e-06, "loss": 0.0011, "step": 171750 }, { "epoch": 1.1299776978085958, "grad_norm": 0.024997928015639964, "learning_rate": 4.738525660168906e-06, "loss": 0.0007, "step": 171760 }, { "epoch": 1.1300434859838293, "grad_norm": 0.032864974406113225, "learning_rate": 4.737952336484507e-06, "loss": 0.0008, "step": 171770 }, { "epoch": 1.1301092741590626, "grad_norm": 0.016739126238124565, "learning_rate": 4.7373790162549805e-06, "loss": 0.0017, "step": 171780 }, { "epoch": 1.130175062334296, "grad_norm": 0.02195437764953277, "learning_rate": 4.736805699487886e-06, "loss": 0.0007, "step": 171790 }, { "epoch": 1.1302408505095294, "grad_norm": 0.016604098317341713, "learning_rate": 4.736232386190779e-06, "loss": 0.0006, "step": 171800 }, { "epoch": 1.1303066386847629, "grad_norm": 0.04562949753833267, "learning_rate": 4.73565907637122e-06, "loss": 0.0009, "step": 171810 }, { "epoch": 1.1303724268599962, "grad_norm": 0.02316946006382296, "learning_rate": 4.735085770036769e-06, "loss": 0.0007, "step": 171820 }, { "epoch": 1.1304382150352295, "grad_norm": 0.012475148920165527, "learning_rate": 4.734512467194983e-06, "loss": 0.0011, "step": 171830 }, { "epoch": 1.130504003210463, "grad_norm": 0.0029766435160243506, "learning_rate": 4.733939167853421e-06, "loss": 0.0011, "step": 171840 }, { "epoch": 1.1305697913856962, "grad_norm": 0.0015374275186100413, "learning_rate": 4.733365872019641e-06, "loss": 0.0003, "step": 171850 }, { "epoch": 1.1306355795609297, "grad_norm": 0.025607663030569175, "learning_rate": 4.732792579701203e-06, "loss": 0.0011, "step": 171860 }, { "epoch": 1.130701367736163, "grad_norm": 0.027759758899784083, "learning_rate": 4.7322192909056625e-06, "loss": 0.0003, "step": 171870 }, { "epoch": 1.1307671559113965, "grad_norm": 0.07028181770111025, "learning_rate": 4.7316460056405785e-06, "loss": 0.0011, "step": 171880 }, { "epoch": 1.1308329440866298, "grad_norm": 0.01444130741284398, "learning_rate": 4.731072723913512e-06, "loss": 0.0005, "step": 171890 }, { "epoch": 1.1308987322618633, "grad_norm": 0.004526371676421545, "learning_rate": 4.730499445732019e-06, "loss": 0.0005, "step": 171900 }, { "epoch": 1.1309645204370966, "grad_norm": 0.03037592585153197, "learning_rate": 4.729926171103658e-06, "loss": 0.0008, "step": 171910 }, { "epoch": 1.1310303086123301, "grad_norm": 0.02344793627129656, "learning_rate": 4.729352900035987e-06, "loss": 0.0009, "step": 171920 }, { "epoch": 1.1310960967875634, "grad_norm": 0.03655580498066758, "learning_rate": 4.728779632536565e-06, "loss": 0.0021, "step": 171930 }, { "epoch": 1.131161884962797, "grad_norm": 0.03351710634084245, "learning_rate": 4.728206368612948e-06, "loss": 0.0006, "step": 171940 }, { "epoch": 1.1312276731380302, "grad_norm": 0.00019218931792546563, "learning_rate": 4.727633108272696e-06, "loss": 0.0015, "step": 171950 }, { "epoch": 1.1312934613132635, "grad_norm": 0.004108478548927089, "learning_rate": 4.727059851523367e-06, "loss": 0.0007, "step": 171960 }, { "epoch": 1.131359249488497, "grad_norm": 0.016733693397123005, "learning_rate": 4.726486598372517e-06, "loss": 0.0012, "step": 171970 }, { "epoch": 1.1314250376637303, "grad_norm": 0.03381307652879302, "learning_rate": 4.725913348827705e-06, "loss": 0.001, "step": 171980 }, { "epoch": 1.1314908258389638, "grad_norm": 0.0004498367335320948, "learning_rate": 4.725340102896489e-06, "loss": 0.0009, "step": 171990 }, { "epoch": 1.131556614014197, "grad_norm": 0.03937772145311393, "learning_rate": 4.724766860586427e-06, "loss": 0.001, "step": 172000 }, { "epoch": 1.1316224021894306, "grad_norm": 0.047017708319876304, "learning_rate": 4.724193621905075e-06, "loss": 0.0006, "step": 172010 }, { "epoch": 1.1316881903646638, "grad_norm": 0.01723256552620824, "learning_rate": 4.7236203868599926e-06, "loss": 0.0004, "step": 172020 }, { "epoch": 1.1317539785398973, "grad_norm": 0.02831769761957005, "learning_rate": 4.723047155458737e-06, "loss": 0.0006, "step": 172030 }, { "epoch": 1.1318197667151306, "grad_norm": 0.03192299977951158, "learning_rate": 4.722473927708866e-06, "loss": 0.0012, "step": 172040 }, { "epoch": 1.131885554890364, "grad_norm": 0.022674762915895836, "learning_rate": 4.721900703617935e-06, "loss": 0.0013, "step": 172050 }, { "epoch": 1.1319513430655974, "grad_norm": 0.030971895685218093, "learning_rate": 4.7213274831935045e-06, "loss": 0.0017, "step": 172060 }, { "epoch": 1.1320171312408307, "grad_norm": 0.009063679187537208, "learning_rate": 4.720754266443129e-06, "loss": 0.0003, "step": 172070 }, { "epoch": 1.1320829194160642, "grad_norm": 0.15954086279300447, "learning_rate": 4.720181053374369e-06, "loss": 0.0006, "step": 172080 }, { "epoch": 1.1321487075912975, "grad_norm": 0.03413684160898925, "learning_rate": 4.719607843994779e-06, "loss": 0.0012, "step": 172090 }, { "epoch": 1.132214495766531, "grad_norm": 0.020249174783021226, "learning_rate": 4.719034638311918e-06, "loss": 0.0011, "step": 172100 }, { "epoch": 1.1322802839417643, "grad_norm": 0.0007397059370887663, "learning_rate": 4.718461436333342e-06, "loss": 0.0004, "step": 172110 }, { "epoch": 1.1323460721169978, "grad_norm": 0.03694892750435863, "learning_rate": 4.71788823806661e-06, "loss": 0.0005, "step": 172120 }, { "epoch": 1.132411860292231, "grad_norm": 0.12199978272147649, "learning_rate": 4.717315043519277e-06, "loss": 0.0011, "step": 172130 }, { "epoch": 1.1324776484674643, "grad_norm": 0.005532231195110835, "learning_rate": 4.7167418526989e-06, "loss": 0.0003, "step": 172140 }, { "epoch": 1.1325434366426979, "grad_norm": 0.14346053346016296, "learning_rate": 4.716168665613038e-06, "loss": 0.0006, "step": 172150 }, { "epoch": 1.1326092248179311, "grad_norm": 0.02143832133178608, "learning_rate": 4.715595482269248e-06, "loss": 0.0004, "step": 172160 }, { "epoch": 1.1326750129931646, "grad_norm": 0.01812567557398586, "learning_rate": 4.715022302675085e-06, "loss": 0.0003, "step": 172170 }, { "epoch": 1.132740801168398, "grad_norm": 0.11626529498309271, "learning_rate": 4.714449126838107e-06, "loss": 0.0014, "step": 172180 }, { "epoch": 1.1328065893436314, "grad_norm": 0.10146687300261983, "learning_rate": 4.713875954765871e-06, "loss": 0.0009, "step": 172190 }, { "epoch": 1.1328723775188647, "grad_norm": 0.013136095719042782, "learning_rate": 4.713302786465932e-06, "loss": 0.0039, "step": 172200 }, { "epoch": 1.1329381656940982, "grad_norm": 0.009650551210384502, "learning_rate": 4.71272962194585e-06, "loss": 0.001, "step": 172210 }, { "epoch": 1.1330039538693315, "grad_norm": 0.0031890950638899375, "learning_rate": 4.712156461213179e-06, "loss": 0.0007, "step": 172220 }, { "epoch": 1.1330697420445648, "grad_norm": 0.057470124757157084, "learning_rate": 4.711583304275476e-06, "loss": 0.0009, "step": 172230 }, { "epoch": 1.1331355302197983, "grad_norm": 0.022973366279573217, "learning_rate": 4.711010151140299e-06, "loss": 0.0006, "step": 172240 }, { "epoch": 1.1332013183950318, "grad_norm": 0.006719775922047947, "learning_rate": 4.710437001815203e-06, "loss": 0.0003, "step": 172250 }, { "epoch": 1.133267106570265, "grad_norm": 0.004342927180397861, "learning_rate": 4.7098638563077455e-06, "loss": 0.0007, "step": 172260 }, { "epoch": 1.1333328947454984, "grad_norm": 0.015201941948687845, "learning_rate": 4.70929071462548e-06, "loss": 0.0003, "step": 172270 }, { "epoch": 1.1333986829207319, "grad_norm": 0.026834641509270164, "learning_rate": 4.708717576775968e-06, "loss": 0.0008, "step": 172280 }, { "epoch": 1.1334644710959652, "grad_norm": 0.03380312556940065, "learning_rate": 4.708144442766763e-06, "loss": 0.0009, "step": 172290 }, { "epoch": 1.1335302592711987, "grad_norm": 0.04730026033953333, "learning_rate": 4.707571312605422e-06, "loss": 0.0008, "step": 172300 }, { "epoch": 1.133596047446432, "grad_norm": 0.07517656203876542, "learning_rate": 4.7069981862995e-06, "loss": 0.0004, "step": 172310 }, { "epoch": 1.1336618356216654, "grad_norm": 0.01432486108428589, "learning_rate": 4.706425063856553e-06, "loss": 0.0004, "step": 172320 }, { "epoch": 1.1337276237968987, "grad_norm": 0.11938511323494823, "learning_rate": 4.705851945284138e-06, "loss": 0.0028, "step": 172330 }, { "epoch": 1.1337934119721322, "grad_norm": 0.01964269643253622, "learning_rate": 4.705278830589812e-06, "loss": 0.0013, "step": 172340 }, { "epoch": 1.1338592001473655, "grad_norm": 0.04865361173299305, "learning_rate": 4.70470571978113e-06, "loss": 0.0005, "step": 172350 }, { "epoch": 1.1339249883225988, "grad_norm": 0.006323359199912836, "learning_rate": 4.704132612865648e-06, "loss": 0.0007, "step": 172360 }, { "epoch": 1.1339907764978323, "grad_norm": 0.027765030289218753, "learning_rate": 4.703559509850921e-06, "loss": 0.0006, "step": 172370 }, { "epoch": 1.1340565646730656, "grad_norm": 0.04709429797966497, "learning_rate": 4.702986410744507e-06, "loss": 0.0017, "step": 172380 }, { "epoch": 1.134122352848299, "grad_norm": 0.022639919919653306, "learning_rate": 4.702413315553961e-06, "loss": 0.0009, "step": 172390 }, { "epoch": 1.1341881410235324, "grad_norm": 0.001194874224524923, "learning_rate": 4.7018402242868365e-06, "loss": 0.0007, "step": 172400 }, { "epoch": 1.1342539291987659, "grad_norm": 0.008269748696785243, "learning_rate": 4.701267136950692e-06, "loss": 0.0026, "step": 172410 }, { "epoch": 1.1343197173739992, "grad_norm": 0.049488049272435206, "learning_rate": 4.700694053553083e-06, "loss": 0.001, "step": 172420 }, { "epoch": 1.1343855055492327, "grad_norm": 0.018377406624086427, "learning_rate": 4.7001209741015645e-06, "loss": 0.0011, "step": 172430 }, { "epoch": 1.134451293724466, "grad_norm": 0.05583186649929631, "learning_rate": 4.699547898603691e-06, "loss": 0.0004, "step": 172440 }, { "epoch": 1.1345170818996992, "grad_norm": 0.09658505601280472, "learning_rate": 4.69897482706702e-06, "loss": 0.0008, "step": 172450 }, { "epoch": 1.1345828700749327, "grad_norm": 0.02147890485281501, "learning_rate": 4.698401759499105e-06, "loss": 0.0008, "step": 172460 }, { "epoch": 1.134648658250166, "grad_norm": 0.0044921250463188685, "learning_rate": 4.697828695907502e-06, "loss": 0.0009, "step": 172470 }, { "epoch": 1.1347144464253995, "grad_norm": 0.037336820044720914, "learning_rate": 4.697255636299768e-06, "loss": 0.0007, "step": 172480 }, { "epoch": 1.1347802346006328, "grad_norm": 0.013236260126573134, "learning_rate": 4.696682580683457e-06, "loss": 0.0006, "step": 172490 }, { "epoch": 1.1348460227758663, "grad_norm": 0.000794821220770247, "learning_rate": 4.696109529066124e-06, "loss": 0.0004, "step": 172500 }, { "epoch": 1.1349118109510996, "grad_norm": 0.041157443063237854, "learning_rate": 4.695536481455323e-06, "loss": 0.0007, "step": 172510 }, { "epoch": 1.1349775991263331, "grad_norm": 0.02037797711103272, "learning_rate": 4.694963437858612e-06, "loss": 0.0007, "step": 172520 }, { "epoch": 1.1350433873015664, "grad_norm": 0.007182090442506178, "learning_rate": 4.694390398283543e-06, "loss": 0.0005, "step": 172530 }, { "epoch": 1.1351091754767997, "grad_norm": 0.003432406020374247, "learning_rate": 4.693817362737674e-06, "loss": 0.0007, "step": 172540 }, { "epoch": 1.1351749636520332, "grad_norm": 0.03952912024356701, "learning_rate": 4.6932443312285585e-06, "loss": 0.0021, "step": 172550 }, { "epoch": 1.1352407518272667, "grad_norm": 0.03395748598693893, "learning_rate": 4.6926713037637516e-06, "loss": 0.0003, "step": 172560 }, { "epoch": 1.1353065400025, "grad_norm": 0.057397778521718366, "learning_rate": 4.692098280350809e-06, "loss": 0.0009, "step": 172570 }, { "epoch": 1.1353723281777333, "grad_norm": 0.041568092405922054, "learning_rate": 4.6915252609972835e-06, "loss": 0.0011, "step": 172580 }, { "epoch": 1.1354381163529668, "grad_norm": 0.01014233355632384, "learning_rate": 4.69095224571073e-06, "loss": 0.0009, "step": 172590 }, { "epoch": 1.1355039045282, "grad_norm": 0.06027756989147917, "learning_rate": 4.690379234498706e-06, "loss": 0.0008, "step": 172600 }, { "epoch": 1.1355696927034336, "grad_norm": 0.021360997450316853, "learning_rate": 4.689806227368764e-06, "loss": 0.0008, "step": 172610 }, { "epoch": 1.1356354808786668, "grad_norm": 0.01578854320261518, "learning_rate": 4.689233224328458e-06, "loss": 0.0007, "step": 172620 }, { "epoch": 1.1357012690539003, "grad_norm": 0.008347977961576202, "learning_rate": 4.6886602253853455e-06, "loss": 0.0002, "step": 172630 }, { "epoch": 1.1357670572291336, "grad_norm": 0.007778352207026254, "learning_rate": 4.688087230546978e-06, "loss": 0.0005, "step": 172640 }, { "epoch": 1.1358328454043671, "grad_norm": 0.026001827713441793, "learning_rate": 4.6875142398209106e-06, "loss": 0.0009, "step": 172650 }, { "epoch": 1.1358986335796004, "grad_norm": 0.031227995598581835, "learning_rate": 4.686941253214697e-06, "loss": 0.0006, "step": 172660 }, { "epoch": 1.1359644217548337, "grad_norm": 0.024130045674603043, "learning_rate": 4.686368270735895e-06, "loss": 0.0012, "step": 172670 }, { "epoch": 1.1360302099300672, "grad_norm": 0.01731473096107182, "learning_rate": 4.6857952923920555e-06, "loss": 0.0003, "step": 172680 }, { "epoch": 1.1360959981053005, "grad_norm": 0.2022191891200458, "learning_rate": 4.685222318190733e-06, "loss": 0.0073, "step": 172690 }, { "epoch": 1.136161786280534, "grad_norm": 0.0022960502153475614, "learning_rate": 4.684649348139483e-06, "loss": 0.0014, "step": 172700 }, { "epoch": 1.1362275744557673, "grad_norm": 0.03509010958872129, "learning_rate": 4.68407638224586e-06, "loss": 0.0005, "step": 172710 }, { "epoch": 1.1362933626310008, "grad_norm": 0.01710288352059308, "learning_rate": 4.6835034205174146e-06, "loss": 0.001, "step": 172720 }, { "epoch": 1.136359150806234, "grad_norm": 0.04071206062984717, "learning_rate": 4.682930462961705e-06, "loss": 0.0009, "step": 172730 }, { "epoch": 1.1364249389814676, "grad_norm": 0.004760251800363335, "learning_rate": 4.682357509586283e-06, "loss": 0.0009, "step": 172740 }, { "epoch": 1.1364907271567009, "grad_norm": 0.10549888534772225, "learning_rate": 4.681784560398703e-06, "loss": 0.0007, "step": 172750 }, { "epoch": 1.1365565153319341, "grad_norm": 0.012056101294152766, "learning_rate": 4.681211615406519e-06, "loss": 0.0005, "step": 172760 }, { "epoch": 1.1366223035071676, "grad_norm": 0.10749942867693231, "learning_rate": 4.680638674617285e-06, "loss": 0.0007, "step": 172770 }, { "epoch": 1.136688091682401, "grad_norm": 0.01917027975594695, "learning_rate": 4.680065738038553e-06, "loss": 0.0009, "step": 172780 }, { "epoch": 1.1367538798576344, "grad_norm": 0.004619054831419526, "learning_rate": 4.6794928056778795e-06, "loss": 0.0008, "step": 172790 }, { "epoch": 1.1368196680328677, "grad_norm": 0.06302311447270875, "learning_rate": 4.678919877542816e-06, "loss": 0.0006, "step": 172800 }, { "epoch": 1.1368854562081012, "grad_norm": 0.022644726840030813, "learning_rate": 4.678346953640917e-06, "loss": 0.0009, "step": 172810 }, { "epoch": 1.1369512443833345, "grad_norm": 0.012082531609521142, "learning_rate": 4.677774033979734e-06, "loss": 0.0007, "step": 172820 }, { "epoch": 1.137017032558568, "grad_norm": 0.013458554448258973, "learning_rate": 4.677201118566822e-06, "loss": 0.0034, "step": 172830 }, { "epoch": 1.1370828207338013, "grad_norm": 0.012118482130329945, "learning_rate": 4.676628207409736e-06, "loss": 0.0007, "step": 172840 }, { "epoch": 1.1371486089090346, "grad_norm": 0.03068307616105934, "learning_rate": 4.676055300516027e-06, "loss": 0.001, "step": 172850 }, { "epoch": 1.137214397084268, "grad_norm": 0.27947698981866076, "learning_rate": 4.675482397893249e-06, "loss": 0.0011, "step": 172860 }, { "epoch": 1.1372801852595014, "grad_norm": 0.041804198044692674, "learning_rate": 4.674909499548956e-06, "loss": 0.0008, "step": 172870 }, { "epoch": 1.1373459734347349, "grad_norm": 0.016137336503646165, "learning_rate": 4.6743366054907e-06, "loss": 0.0007, "step": 172880 }, { "epoch": 1.1374117616099682, "grad_norm": 0.01690063206889513, "learning_rate": 4.673763715726033e-06, "loss": 0.0016, "step": 172890 }, { "epoch": 1.1374775497852017, "grad_norm": 0.0495011122502366, "learning_rate": 4.673190830262512e-06, "loss": 0.0006, "step": 172900 }, { "epoch": 1.137543337960435, "grad_norm": 0.000348073786838456, "learning_rate": 4.672617949107686e-06, "loss": 0.001, "step": 172910 }, { "epoch": 1.1376091261356684, "grad_norm": 0.03729828756087483, "learning_rate": 4.672045072269111e-06, "loss": 0.0007, "step": 172920 }, { "epoch": 1.1376749143109017, "grad_norm": 0.03861264458501589, "learning_rate": 4.6714721997543385e-06, "loss": 0.0005, "step": 172930 }, { "epoch": 1.1377407024861352, "grad_norm": 0.07874383325181361, "learning_rate": 4.67089933157092e-06, "loss": 0.0019, "step": 172940 }, { "epoch": 1.1378064906613685, "grad_norm": 0.003496449320806778, "learning_rate": 4.670326467726411e-06, "loss": 0.0006, "step": 172950 }, { "epoch": 1.137872278836602, "grad_norm": 0.055354629387452836, "learning_rate": 4.669753608228361e-06, "loss": 0.0011, "step": 172960 }, { "epoch": 1.1379380670118353, "grad_norm": 0.035280655550951674, "learning_rate": 4.669180753084326e-06, "loss": 0.0007, "step": 172970 }, { "epoch": 1.1380038551870686, "grad_norm": 0.0038173865545232523, "learning_rate": 4.668607902301857e-06, "loss": 0.0009, "step": 172980 }, { "epoch": 1.138069643362302, "grad_norm": 0.06150862954952962, "learning_rate": 4.668035055888506e-06, "loss": 0.0011, "step": 172990 }, { "epoch": 1.1381354315375354, "grad_norm": 0.005687315800492563, "learning_rate": 4.667462213851827e-06, "loss": 0.0004, "step": 173000 }, { "epoch": 1.1382012197127689, "grad_norm": 0.004432505458063605, "learning_rate": 4.666889376199371e-06, "loss": 0.0011, "step": 173010 }, { "epoch": 1.1382670078880022, "grad_norm": 0.06872861975096518, "learning_rate": 4.666316542938689e-06, "loss": 0.001, "step": 173020 }, { "epoch": 1.1383327960632357, "grad_norm": 0.00034960441796677403, "learning_rate": 4.6657437140773375e-06, "loss": 0.0013, "step": 173030 }, { "epoch": 1.138398584238469, "grad_norm": 0.1170212079382518, "learning_rate": 4.665170889622866e-06, "loss": 0.0015, "step": 173040 }, { "epoch": 1.1384643724137025, "grad_norm": 0.0010580282210151384, "learning_rate": 4.664598069582827e-06, "loss": 0.0003, "step": 173050 }, { "epoch": 1.1385301605889357, "grad_norm": 0.09812083998708826, "learning_rate": 4.664025253964773e-06, "loss": 0.0006, "step": 173060 }, { "epoch": 1.138595948764169, "grad_norm": 0.019018275683169418, "learning_rate": 4.663452442776255e-06, "loss": 0.0011, "step": 173070 }, { "epoch": 1.1386617369394025, "grad_norm": 0.022482605638332313, "learning_rate": 4.6628796360248275e-06, "loss": 0.0012, "step": 173080 }, { "epoch": 1.1387275251146358, "grad_norm": 0.00019883431647007004, "learning_rate": 4.662306833718039e-06, "loss": 0.0011, "step": 173090 }, { "epoch": 1.1387933132898693, "grad_norm": 0.043904874638018156, "learning_rate": 4.661734035863445e-06, "loss": 0.0007, "step": 173100 }, { "epoch": 1.1388591014651026, "grad_norm": 0.07389525437240177, "learning_rate": 4.661161242468594e-06, "loss": 0.0011, "step": 173110 }, { "epoch": 1.1389248896403361, "grad_norm": 0.13190764106522657, "learning_rate": 4.66058845354104e-06, "loss": 0.0008, "step": 173120 }, { "epoch": 1.1389906778155694, "grad_norm": 0.0140869305811398, "learning_rate": 4.660015669088335e-06, "loss": 0.0016, "step": 173130 }, { "epoch": 1.139056465990803, "grad_norm": 0.032312604144504695, "learning_rate": 4.659442889118029e-06, "loss": 0.0007, "step": 173140 }, { "epoch": 1.1391222541660362, "grad_norm": 0.031391755658618216, "learning_rate": 4.6588701136376725e-06, "loss": 0.0008, "step": 173150 }, { "epoch": 1.1391880423412695, "grad_norm": 0.11083860208312757, "learning_rate": 4.6582973426548215e-06, "loss": 0.0013, "step": 173160 }, { "epoch": 1.139253830516503, "grad_norm": 0.01214213980955267, "learning_rate": 4.657724576177024e-06, "loss": 0.0013, "step": 173170 }, { "epoch": 1.1393196186917363, "grad_norm": 0.047053730394972605, "learning_rate": 4.657151814211833e-06, "loss": 0.0008, "step": 173180 }, { "epoch": 1.1393854068669698, "grad_norm": 0.028724836328820183, "learning_rate": 4.656579056766799e-06, "loss": 0.0005, "step": 173190 }, { "epoch": 1.139451195042203, "grad_norm": 0.004789868645831438, "learning_rate": 4.656006303849474e-06, "loss": 0.0013, "step": 173200 }, { "epoch": 1.1395169832174366, "grad_norm": 0.021811032381461797, "learning_rate": 4.655433555467407e-06, "loss": 0.0007, "step": 173210 }, { "epoch": 1.1395827713926698, "grad_norm": 0.0010601700612290308, "learning_rate": 4.6548608116281515e-06, "loss": 0.0004, "step": 173220 }, { "epoch": 1.1396485595679033, "grad_norm": 0.03303796592719167, "learning_rate": 4.654288072339259e-06, "loss": 0.0008, "step": 173230 }, { "epoch": 1.1397143477431366, "grad_norm": 0.011197495546775805, "learning_rate": 4.65371533760828e-06, "loss": 0.0003, "step": 173240 }, { "epoch": 1.13978013591837, "grad_norm": 0.06140568665445479, "learning_rate": 4.653142607442764e-06, "loss": 0.0007, "step": 173250 }, { "epoch": 1.1398459240936034, "grad_norm": 0.03930562342748633, "learning_rate": 4.652569881850264e-06, "loss": 0.0008, "step": 173260 }, { "epoch": 1.139911712268837, "grad_norm": 0.00021173788554795054, "learning_rate": 4.65199716083833e-06, "loss": 0.0008, "step": 173270 }, { "epoch": 1.1399775004440702, "grad_norm": 0.053917791576722024, "learning_rate": 4.651424444414512e-06, "loss": 0.0008, "step": 173280 }, { "epoch": 1.1400432886193035, "grad_norm": 0.1349972812321696, "learning_rate": 4.650851732586362e-06, "loss": 0.0015, "step": 173290 }, { "epoch": 1.140109076794537, "grad_norm": 0.014420424574329047, "learning_rate": 4.650279025361431e-06, "loss": 0.0004, "step": 173300 }, { "epoch": 1.1401748649697703, "grad_norm": 0.06820138747818201, "learning_rate": 4.649706322747269e-06, "loss": 0.0015, "step": 173310 }, { "epoch": 1.1402406531450038, "grad_norm": 0.11932831911396966, "learning_rate": 4.649133624751426e-06, "loss": 0.001, "step": 173320 }, { "epoch": 1.140306441320237, "grad_norm": 0.02980875225318925, "learning_rate": 4.648560931381454e-06, "loss": 0.0007, "step": 173330 }, { "epoch": 1.1403722294954706, "grad_norm": 0.028411265324919846, "learning_rate": 4.647988242644903e-06, "loss": 0.0016, "step": 173340 }, { "epoch": 1.1404380176707039, "grad_norm": 0.05436228417994122, "learning_rate": 4.64741555854932e-06, "loss": 0.0005, "step": 173350 }, { "epoch": 1.1405038058459374, "grad_norm": 0.024323424023091126, "learning_rate": 4.646842879102261e-06, "loss": 0.0009, "step": 173360 }, { "epoch": 1.1405695940211706, "grad_norm": 0.19444754906267375, "learning_rate": 4.6462702043112725e-06, "loss": 0.001, "step": 173370 }, { "epoch": 1.140635382196404, "grad_norm": 0.03480142522191908, "learning_rate": 4.645697534183907e-06, "loss": 0.0007, "step": 173380 }, { "epoch": 1.1407011703716374, "grad_norm": 0.017588409021085674, "learning_rate": 4.645124868727713e-06, "loss": 0.0009, "step": 173390 }, { "epoch": 1.1407669585468707, "grad_norm": 0.016640599707879584, "learning_rate": 4.644552207950242e-06, "loss": 0.0005, "step": 173400 }, { "epoch": 1.1408327467221042, "grad_norm": 0.02324771834885588, "learning_rate": 4.6439795518590405e-06, "loss": 0.0006, "step": 173410 }, { "epoch": 1.1408985348973375, "grad_norm": 0.008838908192268607, "learning_rate": 4.643406900461663e-06, "loss": 0.0009, "step": 173420 }, { "epoch": 1.140964323072571, "grad_norm": 0.05360106912546593, "learning_rate": 4.642834253765657e-06, "loss": 0.0011, "step": 173430 }, { "epoch": 1.1410301112478043, "grad_norm": 0.03076972559298718, "learning_rate": 4.642261611778573e-06, "loss": 0.001, "step": 173440 }, { "epoch": 1.1410958994230378, "grad_norm": 0.08040267251314018, "learning_rate": 4.641688974507962e-06, "loss": 0.0009, "step": 173450 }, { "epoch": 1.141161687598271, "grad_norm": 0.04877744269112403, "learning_rate": 4.641116341961372e-06, "loss": 0.0009, "step": 173460 }, { "epoch": 1.1412274757735044, "grad_norm": 0.037537235142286504, "learning_rate": 4.640543714146352e-06, "loss": 0.0004, "step": 173470 }, { "epoch": 1.1412932639487379, "grad_norm": 0.002010064765045899, "learning_rate": 4.639971091070452e-06, "loss": 0.0007, "step": 173480 }, { "epoch": 1.1413590521239712, "grad_norm": 0.07586728015560408, "learning_rate": 4.639398472741224e-06, "loss": 0.0006, "step": 173490 }, { "epoch": 1.1414248402992047, "grad_norm": 0.020783084091372056, "learning_rate": 4.638825859166214e-06, "loss": 0.001, "step": 173500 }, { "epoch": 1.141490628474438, "grad_norm": 0.054747749463306086, "learning_rate": 4.6382532503529745e-06, "loss": 0.0014, "step": 173510 }, { "epoch": 1.1415564166496714, "grad_norm": 0.069747236429302, "learning_rate": 4.6376806463090535e-06, "loss": 0.0006, "step": 173520 }, { "epoch": 1.1416222048249047, "grad_norm": 0.0005565939511930721, "learning_rate": 4.637108047042e-06, "loss": 0.0015, "step": 173530 }, { "epoch": 1.1416879930001382, "grad_norm": 0.004523563828734659, "learning_rate": 4.636535452559363e-06, "loss": 0.0004, "step": 173540 }, { "epoch": 1.1417537811753715, "grad_norm": 0.02654063224043911, "learning_rate": 4.635962862868692e-06, "loss": 0.0005, "step": 173550 }, { "epoch": 1.1418195693506048, "grad_norm": 0.02773363127486992, "learning_rate": 4.635390277977537e-06, "loss": 0.0009, "step": 173560 }, { "epoch": 1.1418853575258383, "grad_norm": 0.022824326044450356, "learning_rate": 4.634817697893446e-06, "loss": 0.0008, "step": 173570 }, { "epoch": 1.1419511457010718, "grad_norm": 0.0008149402684163315, "learning_rate": 4.634245122623969e-06, "loss": 0.0006, "step": 173580 }, { "epoch": 1.142016933876305, "grad_norm": 0.21920555267930433, "learning_rate": 4.633672552176653e-06, "loss": 0.0008, "step": 173590 }, { "epoch": 1.1420827220515384, "grad_norm": 0.06518001199183902, "learning_rate": 4.633099986559049e-06, "loss": 0.0009, "step": 173600 }, { "epoch": 1.1421485102267719, "grad_norm": 0.28310609980996637, "learning_rate": 4.632527425778703e-06, "loss": 0.0015, "step": 173610 }, { "epoch": 1.1422142984020052, "grad_norm": 0.010549619183847359, "learning_rate": 4.6319548698431675e-06, "loss": 0.0008, "step": 173620 }, { "epoch": 1.1422800865772387, "grad_norm": 0.015958416849324548, "learning_rate": 4.631382318759988e-06, "loss": 0.0004, "step": 173630 }, { "epoch": 1.142345874752472, "grad_norm": 0.11530293197773255, "learning_rate": 4.630809772536715e-06, "loss": 0.0009, "step": 173640 }, { "epoch": 1.1424116629277055, "grad_norm": 0.08748311153358819, "learning_rate": 4.630237231180896e-06, "loss": 0.0018, "step": 173650 }, { "epoch": 1.1424774511029387, "grad_norm": 0.0065277868666136964, "learning_rate": 4.62966469470008e-06, "loss": 0.0012, "step": 173660 }, { "epoch": 1.1425432392781723, "grad_norm": 0.042681988130776304, "learning_rate": 4.629092163101814e-06, "loss": 0.0005, "step": 173670 }, { "epoch": 1.1426090274534055, "grad_norm": 0.0016945463066655292, "learning_rate": 4.628519636393649e-06, "loss": 0.0007, "step": 173680 }, { "epoch": 1.1426748156286388, "grad_norm": 0.018193171948034405, "learning_rate": 4.627947114583131e-06, "loss": 0.001, "step": 173690 }, { "epoch": 1.1427406038038723, "grad_norm": 0.06883882920849241, "learning_rate": 4.627374597677809e-06, "loss": 0.0012, "step": 173700 }, { "epoch": 1.1428063919791056, "grad_norm": 0.0022065323489322258, "learning_rate": 4.626802085685232e-06, "loss": 0.0003, "step": 173710 }, { "epoch": 1.1428721801543391, "grad_norm": 0.009171431989163342, "learning_rate": 4.626229578612946e-06, "loss": 0.0008, "step": 173720 }, { "epoch": 1.1429379683295724, "grad_norm": 0.025359017235505887, "learning_rate": 4.625657076468502e-06, "loss": 0.001, "step": 173730 }, { "epoch": 1.143003756504806, "grad_norm": 0.003441516067922398, "learning_rate": 4.625084579259444e-06, "loss": 0.0007, "step": 173740 }, { "epoch": 1.1430695446800392, "grad_norm": 0.023959626781054514, "learning_rate": 4.624512086993323e-06, "loss": 0.0004, "step": 173750 }, { "epoch": 1.1431353328552727, "grad_norm": 0.031025764920917957, "learning_rate": 4.623939599677685e-06, "loss": 0.0008, "step": 173760 }, { "epoch": 1.143201121030506, "grad_norm": 0.06335129136832388, "learning_rate": 4.62336711732008e-06, "loss": 0.0011, "step": 173770 }, { "epoch": 1.1432669092057393, "grad_norm": 0.018227815375711335, "learning_rate": 4.622794639928053e-06, "loss": 0.0005, "step": 173780 }, { "epoch": 1.1433326973809728, "grad_norm": 0.04377235114331485, "learning_rate": 4.622222167509154e-06, "loss": 0.0012, "step": 173790 }, { "epoch": 1.143398485556206, "grad_norm": 0.05584353638820203, "learning_rate": 4.621649700070927e-06, "loss": 0.0006, "step": 173800 }, { "epoch": 1.1434642737314396, "grad_norm": 0.02250459244516137, "learning_rate": 4.621077237620924e-06, "loss": 0.0015, "step": 173810 }, { "epoch": 1.1435300619066728, "grad_norm": 0.0422003158262277, "learning_rate": 4.620504780166691e-06, "loss": 0.0006, "step": 173820 }, { "epoch": 1.1435958500819063, "grad_norm": 0.007712781859484879, "learning_rate": 4.619932327715774e-06, "loss": 0.0013, "step": 173830 }, { "epoch": 1.1436616382571396, "grad_norm": 0.0017746136131440197, "learning_rate": 4.6193598802757214e-06, "loss": 0.0007, "step": 173840 }, { "epoch": 1.1437274264323731, "grad_norm": 0.018733657909999188, "learning_rate": 4.618787437854079e-06, "loss": 0.0011, "step": 173850 }, { "epoch": 1.1437932146076064, "grad_norm": 0.02113138026359188, "learning_rate": 4.618215000458396e-06, "loss": 0.0006, "step": 173860 }, { "epoch": 1.1438590027828397, "grad_norm": 0.02846244575288933, "learning_rate": 4.617642568096217e-06, "loss": 0.0008, "step": 173870 }, { "epoch": 1.1439247909580732, "grad_norm": 0.03688549307414877, "learning_rate": 4.617070140775092e-06, "loss": 0.0004, "step": 173880 }, { "epoch": 1.1439905791333065, "grad_norm": 0.0443917756355306, "learning_rate": 4.616497718502566e-06, "loss": 0.0007, "step": 173890 }, { "epoch": 1.14405636730854, "grad_norm": 0.03649533940261431, "learning_rate": 4.6159253012861865e-06, "loss": 0.001, "step": 173900 }, { "epoch": 1.1441221554837733, "grad_norm": 0.01707189041254735, "learning_rate": 4.6153528891335004e-06, "loss": 0.0006, "step": 173910 }, { "epoch": 1.1441879436590068, "grad_norm": 0.05429688375892609, "learning_rate": 4.614780482052054e-06, "loss": 0.0005, "step": 173920 }, { "epoch": 1.14425373183424, "grad_norm": 0.0017449078524825364, "learning_rate": 4.614208080049393e-06, "loss": 0.0005, "step": 173930 }, { "epoch": 1.1443195200094736, "grad_norm": 0.0011767616920989443, "learning_rate": 4.613635683133067e-06, "loss": 0.0006, "step": 173940 }, { "epoch": 1.1443853081847069, "grad_norm": 0.284122637256148, "learning_rate": 4.61306329131062e-06, "loss": 0.0013, "step": 173950 }, { "epoch": 1.1444510963599404, "grad_norm": 0.0074534702410017075, "learning_rate": 4.6124909045896e-06, "loss": 0.0007, "step": 173960 }, { "epoch": 1.1445168845351736, "grad_norm": 0.03746025980981246, "learning_rate": 4.611918522977553e-06, "loss": 0.0006, "step": 173970 }, { "epoch": 1.1445826727104071, "grad_norm": 0.06375719063778122, "learning_rate": 4.611346146482024e-06, "loss": 0.0004, "step": 173980 }, { "epoch": 1.1446484608856404, "grad_norm": 0.045843574052467764, "learning_rate": 4.610773775110562e-06, "loss": 0.0006, "step": 173990 }, { "epoch": 1.1447142490608737, "grad_norm": 0.019154242420689867, "learning_rate": 4.610201408870709e-06, "loss": 0.0019, "step": 174000 }, { "epoch": 1.1447800372361072, "grad_norm": 0.05567689344627429, "learning_rate": 4.609629047770015e-06, "loss": 0.0007, "step": 174010 }, { "epoch": 1.1448458254113405, "grad_norm": 0.026369333263735414, "learning_rate": 4.609056691816026e-06, "loss": 0.0006, "step": 174020 }, { "epoch": 1.144911613586574, "grad_norm": 0.004735056993537719, "learning_rate": 4.608484341016286e-06, "loss": 0.0009, "step": 174030 }, { "epoch": 1.1449774017618073, "grad_norm": 0.08857277992899956, "learning_rate": 4.607911995378342e-06, "loss": 0.0008, "step": 174040 }, { "epoch": 1.1450431899370408, "grad_norm": 0.03235708349791747, "learning_rate": 4.60733965490974e-06, "loss": 0.0011, "step": 174050 }, { "epoch": 1.145108978112274, "grad_norm": 0.021979455609023212, "learning_rate": 4.606767319618023e-06, "loss": 0.0006, "step": 174060 }, { "epoch": 1.1451747662875076, "grad_norm": 0.002613537106284679, "learning_rate": 4.606194989510742e-06, "loss": 0.0004, "step": 174070 }, { "epoch": 1.1452405544627409, "grad_norm": 0.06347582674244925, "learning_rate": 4.605622664595439e-06, "loss": 0.001, "step": 174080 }, { "epoch": 1.1453063426379742, "grad_norm": 0.08053274557086301, "learning_rate": 4.605050344879661e-06, "loss": 0.0006, "step": 174090 }, { "epoch": 1.1453721308132077, "grad_norm": 0.051667069176330994, "learning_rate": 4.604478030370954e-06, "loss": 0.0006, "step": 174100 }, { "epoch": 1.145437918988441, "grad_norm": 0.03278203839450959, "learning_rate": 4.6039057210768614e-06, "loss": 0.0005, "step": 174110 }, { "epoch": 1.1455037071636744, "grad_norm": 0.026670963916941865, "learning_rate": 4.60333341700493e-06, "loss": 0.0007, "step": 174120 }, { "epoch": 1.1455694953389077, "grad_norm": 0.006565780986804205, "learning_rate": 4.602761118162703e-06, "loss": 0.0006, "step": 174130 }, { "epoch": 1.1456352835141412, "grad_norm": 0.05343564535771974, "learning_rate": 4.60218882455773e-06, "loss": 0.0009, "step": 174140 }, { "epoch": 1.1457010716893745, "grad_norm": 0.03358468247124012, "learning_rate": 4.6016165361975535e-06, "loss": 0.0003, "step": 174150 }, { "epoch": 1.145766859864608, "grad_norm": 0.049705500225112755, "learning_rate": 4.6010442530897186e-06, "loss": 0.0012, "step": 174160 }, { "epoch": 1.1458326480398413, "grad_norm": 0.04908576575804195, "learning_rate": 4.600471975241771e-06, "loss": 0.0005, "step": 174170 }, { "epoch": 1.1458984362150746, "grad_norm": 0.030017190960673467, "learning_rate": 4.5998997026612555e-06, "loss": 0.0006, "step": 174180 }, { "epoch": 1.145964224390308, "grad_norm": 0.017246693034356924, "learning_rate": 4.599327435355715e-06, "loss": 0.0007, "step": 174190 }, { "epoch": 1.1460300125655414, "grad_norm": 0.02561255512210047, "learning_rate": 4.5987551733326974e-06, "loss": 0.0005, "step": 174200 }, { "epoch": 1.1460958007407749, "grad_norm": 0.019434807514668962, "learning_rate": 4.598182916599747e-06, "loss": 0.0003, "step": 174210 }, { "epoch": 1.1461615889160082, "grad_norm": 0.004388875181358668, "learning_rate": 4.597610665164407e-06, "loss": 0.0007, "step": 174220 }, { "epoch": 1.1462273770912417, "grad_norm": 0.09102425205710246, "learning_rate": 4.5970384190342234e-06, "loss": 0.0008, "step": 174230 }, { "epoch": 1.146293165266475, "grad_norm": 0.05983264382704738, "learning_rate": 4.59646617821674e-06, "loss": 0.0012, "step": 174240 }, { "epoch": 1.1463589534417085, "grad_norm": 0.06000967544620947, "learning_rate": 4.595893942719502e-06, "loss": 0.001, "step": 174250 }, { "epoch": 1.1464247416169417, "grad_norm": 0.01788459917377876, "learning_rate": 4.595321712550052e-06, "loss": 0.0008, "step": 174260 }, { "epoch": 1.1464905297921753, "grad_norm": 0.13454775989217962, "learning_rate": 4.594749487715937e-06, "loss": 0.0008, "step": 174270 }, { "epoch": 1.1465563179674085, "grad_norm": 0.032739776376297505, "learning_rate": 4.594177268224699e-06, "loss": 0.0058, "step": 174280 }, { "epoch": 1.146622106142642, "grad_norm": 0.09839061815181599, "learning_rate": 4.5936050540838845e-06, "loss": 0.0012, "step": 174290 }, { "epoch": 1.1466878943178753, "grad_norm": 0.027090088381285276, "learning_rate": 4.593032845301036e-06, "loss": 0.0009, "step": 174300 }, { "epoch": 1.1467536824931086, "grad_norm": 0.030316096622734354, "learning_rate": 4.592460641883698e-06, "loss": 0.0006, "step": 174310 }, { "epoch": 1.1468194706683421, "grad_norm": 0.009959819703410326, "learning_rate": 4.591888443839413e-06, "loss": 0.0005, "step": 174320 }, { "epoch": 1.1468852588435754, "grad_norm": 0.037032955005620635, "learning_rate": 4.591316251175727e-06, "loss": 0.0033, "step": 174330 }, { "epoch": 1.146951047018809, "grad_norm": 0.03842204613089505, "learning_rate": 4.5907440639001845e-06, "loss": 0.0003, "step": 174340 }, { "epoch": 1.1470168351940422, "grad_norm": 0.057114294393075735, "learning_rate": 4.590171882020327e-06, "loss": 0.001, "step": 174350 }, { "epoch": 1.1470826233692757, "grad_norm": 0.022363476588147906, "learning_rate": 4.5895997055437e-06, "loss": 0.0009, "step": 174360 }, { "epoch": 1.147148411544509, "grad_norm": 0.02416732637682284, "learning_rate": 4.589027534477847e-06, "loss": 0.0005, "step": 174370 }, { "epoch": 1.1472141997197425, "grad_norm": 0.027663521309863045, "learning_rate": 4.588455368830309e-06, "loss": 0.0006, "step": 174380 }, { "epoch": 1.1472799878949758, "grad_norm": 0.1705033787500344, "learning_rate": 4.587883208608632e-06, "loss": 0.0008, "step": 174390 }, { "epoch": 1.147345776070209, "grad_norm": 0.09852072681579464, "learning_rate": 4.587311053820359e-06, "loss": 0.002, "step": 174400 }, { "epoch": 1.1474115642454426, "grad_norm": 0.015341027458772489, "learning_rate": 4.586738904473035e-06, "loss": 0.0003, "step": 174410 }, { "epoch": 1.1474773524206758, "grad_norm": 0.00456886792895212, "learning_rate": 4.5861667605742e-06, "loss": 0.0008, "step": 174420 }, { "epoch": 1.1475431405959093, "grad_norm": 0.01704380977546482, "learning_rate": 4.5855946221314e-06, "loss": 0.0003, "step": 174430 }, { "epoch": 1.1476089287711426, "grad_norm": 0.0016663012636726766, "learning_rate": 4.585022489152177e-06, "loss": 0.0004, "step": 174440 }, { "epoch": 1.1476747169463761, "grad_norm": 0.06960881657624986, "learning_rate": 4.5844503616440726e-06, "loss": 0.0007, "step": 174450 }, { "epoch": 1.1477405051216094, "grad_norm": 0.09000491243484147, "learning_rate": 4.583878239614632e-06, "loss": 0.001, "step": 174460 }, { "epoch": 1.147806293296843, "grad_norm": 0.6268829902920046, "learning_rate": 4.583306123071397e-06, "loss": 0.0009, "step": 174470 }, { "epoch": 1.1478720814720762, "grad_norm": 0.012196894963627226, "learning_rate": 4.582734012021912e-06, "loss": 0.0006, "step": 174480 }, { "epoch": 1.1479378696473095, "grad_norm": 0.011769225763895379, "learning_rate": 4.582161906473718e-06, "loss": 0.0004, "step": 174490 }, { "epoch": 1.148003657822543, "grad_norm": 0.10675919062304796, "learning_rate": 4.581589806434359e-06, "loss": 0.0017, "step": 174500 }, { "epoch": 1.1480694459977763, "grad_norm": 0.21876005776935134, "learning_rate": 4.581017711911377e-06, "loss": 0.0018, "step": 174510 }, { "epoch": 1.1481352341730098, "grad_norm": 0.036830607061532766, "learning_rate": 4.580445622912313e-06, "loss": 0.0002, "step": 174520 }, { "epoch": 1.148201022348243, "grad_norm": 0.008081887648207846, "learning_rate": 4.5798735394447124e-06, "loss": 0.0006, "step": 174530 }, { "epoch": 1.1482668105234766, "grad_norm": 0.026671954257841628, "learning_rate": 4.579301461516116e-06, "loss": 0.0005, "step": 174540 }, { "epoch": 1.1483325986987098, "grad_norm": 0.00045803724972146416, "learning_rate": 4.578729389134067e-06, "loss": 0.0009, "step": 174550 }, { "epoch": 1.1483983868739434, "grad_norm": 0.13906317287584496, "learning_rate": 4.578157322306107e-06, "loss": 0.0034, "step": 174560 }, { "epoch": 1.1484641750491766, "grad_norm": 0.0032969146458162596, "learning_rate": 4.577585261039779e-06, "loss": 0.0008, "step": 174570 }, { "epoch": 1.14852996322441, "grad_norm": 0.05549564631384714, "learning_rate": 4.577013205342622e-06, "loss": 0.0006, "step": 174580 }, { "epoch": 1.1485957513996434, "grad_norm": 0.019413870785162105, "learning_rate": 4.576441155222183e-06, "loss": 0.0007, "step": 174590 }, { "epoch": 1.148661539574877, "grad_norm": 0.0051237824301652345, "learning_rate": 4.575869110686001e-06, "loss": 0.0003, "step": 174600 }, { "epoch": 1.1487273277501102, "grad_norm": 0.14044971147300223, "learning_rate": 4.575297071741619e-06, "loss": 0.0008, "step": 174610 }, { "epoch": 1.1487931159253435, "grad_norm": 0.07241765339165496, "learning_rate": 4.5747250383965774e-06, "loss": 0.001, "step": 174620 }, { "epoch": 1.148858904100577, "grad_norm": 0.014995736899489446, "learning_rate": 4.57415301065842e-06, "loss": 0.001, "step": 174630 }, { "epoch": 1.1489246922758103, "grad_norm": 0.013361356529074182, "learning_rate": 4.573580988534686e-06, "loss": 0.0005, "step": 174640 }, { "epoch": 1.1489904804510438, "grad_norm": 0.0413659790945424, "learning_rate": 4.573008972032918e-06, "loss": 0.0009, "step": 174650 }, { "epoch": 1.149056268626277, "grad_norm": 0.06402535797027081, "learning_rate": 4.572436961160659e-06, "loss": 0.0012, "step": 174660 }, { "epoch": 1.1491220568015106, "grad_norm": 0.030494845893654438, "learning_rate": 4.57186495592545e-06, "loss": 0.0007, "step": 174670 }, { "epoch": 1.1491878449767439, "grad_norm": 0.05346357251229494, "learning_rate": 4.5712929563348305e-06, "loss": 0.0008, "step": 174680 }, { "epoch": 1.1492536331519774, "grad_norm": 0.020180389146577653, "learning_rate": 4.570720962396346e-06, "loss": 0.0016, "step": 174690 }, { "epoch": 1.1493194213272107, "grad_norm": 0.0017052728705415291, "learning_rate": 4.570148974117532e-06, "loss": 0.0009, "step": 174700 }, { "epoch": 1.149385209502444, "grad_norm": 0.02777041680878749, "learning_rate": 4.569576991505933e-06, "loss": 0.0008, "step": 174710 }, { "epoch": 1.1494509976776774, "grad_norm": 0.0017801179718081551, "learning_rate": 4.56900501456909e-06, "loss": 0.0007, "step": 174720 }, { "epoch": 1.1495167858529107, "grad_norm": 0.007556898845017895, "learning_rate": 4.568433043314544e-06, "loss": 0.0005, "step": 174730 }, { "epoch": 1.1495825740281442, "grad_norm": 0.021385430220738643, "learning_rate": 4.567861077749834e-06, "loss": 0.0005, "step": 174740 }, { "epoch": 1.1496483622033775, "grad_norm": 0.09436330254016236, "learning_rate": 4.567289117882503e-06, "loss": 0.0004, "step": 174750 }, { "epoch": 1.149714150378611, "grad_norm": 0.0478054592231725, "learning_rate": 4.566717163720092e-06, "loss": 0.001, "step": 174760 }, { "epoch": 1.1497799385538443, "grad_norm": 0.021815764504935473, "learning_rate": 4.56614521527014e-06, "loss": 0.0007, "step": 174770 }, { "epoch": 1.1498457267290778, "grad_norm": 0.0024420750885373456, "learning_rate": 4.565573272540191e-06, "loss": 0.0006, "step": 174780 }, { "epoch": 1.149911514904311, "grad_norm": 0.04574212288882944, "learning_rate": 4.565001335537781e-06, "loss": 0.0009, "step": 174790 }, { "epoch": 1.1499773030795444, "grad_norm": 0.05495498308122935, "learning_rate": 4.564429404270454e-06, "loss": 0.0006, "step": 174800 }, { "epoch": 1.1500430912547779, "grad_norm": 0.00198576170509526, "learning_rate": 4.563857478745747e-06, "loss": 0.0005, "step": 174810 }, { "epoch": 1.1501088794300112, "grad_norm": 0.10467753259585436, "learning_rate": 4.563285558971204e-06, "loss": 0.0011, "step": 174820 }, { "epoch": 1.1501746676052447, "grad_norm": 0.09431333944360663, "learning_rate": 4.562713644954363e-06, "loss": 0.0012, "step": 174830 }, { "epoch": 1.150240455780478, "grad_norm": 0.03937223115326021, "learning_rate": 4.562141736702766e-06, "loss": 0.0008, "step": 174840 }, { "epoch": 1.1503062439557115, "grad_norm": 0.004698505495827592, "learning_rate": 4.561569834223951e-06, "loss": 0.0012, "step": 174850 }, { "epoch": 1.1503720321309447, "grad_norm": 0.05999262555949436, "learning_rate": 4.560997937525461e-06, "loss": 0.0008, "step": 174860 }, { "epoch": 1.1504378203061782, "grad_norm": 0.05062156939913671, "learning_rate": 4.560426046614832e-06, "loss": 0.0007, "step": 174870 }, { "epoch": 1.1505036084814115, "grad_norm": 0.0012918615069045082, "learning_rate": 4.559854161499606e-06, "loss": 0.0004, "step": 174880 }, { "epoch": 1.1505693966566448, "grad_norm": 0.032924668709326546, "learning_rate": 4.5592822821873235e-06, "loss": 0.0007, "step": 174890 }, { "epoch": 1.1506351848318783, "grad_norm": 0.03782668602611905, "learning_rate": 4.558710408685523e-06, "loss": 0.0006, "step": 174900 }, { "epoch": 1.1507009730071116, "grad_norm": 0.0046277096318626315, "learning_rate": 4.558138541001745e-06, "loss": 0.0005, "step": 174910 }, { "epoch": 1.150766761182345, "grad_norm": 0.010272808743007754, "learning_rate": 4.557566679143529e-06, "loss": 0.0006, "step": 174920 }, { "epoch": 1.1508325493575784, "grad_norm": 0.04470289100958545, "learning_rate": 4.5569948231184145e-06, "loss": 0.002, "step": 174930 }, { "epoch": 1.150898337532812, "grad_norm": 0.010485261289500157, "learning_rate": 4.5564229729339385e-06, "loss": 0.0004, "step": 174940 }, { "epoch": 1.1509641257080452, "grad_norm": 0.014198285506039871, "learning_rate": 4.555851128597644e-06, "loss": 0.0007, "step": 174950 }, { "epoch": 1.1510299138832787, "grad_norm": 0.0874711006317293, "learning_rate": 4.5552792901170696e-06, "loss": 0.0005, "step": 174960 }, { "epoch": 1.151095702058512, "grad_norm": 0.15561855177663725, "learning_rate": 4.554707457499753e-06, "loss": 0.0011, "step": 174970 }, { "epoch": 1.1511614902337455, "grad_norm": 0.0656558152308677, "learning_rate": 4.554135630753233e-06, "loss": 0.0006, "step": 174980 }, { "epoch": 1.1512272784089788, "grad_norm": 0.0375867210319725, "learning_rate": 4.553563809885051e-06, "loss": 0.0007, "step": 174990 }, { "epoch": 1.1512930665842123, "grad_norm": 0.14207618807328626, "learning_rate": 4.552991994902745e-06, "loss": 0.0013, "step": 175000 }, { "epoch": 1.1512930665842123, "eval_loss": 0.0005478959647007287, "eval_runtime": 13.0853, "eval_samples_per_second": 15.284, "eval_steps_per_second": 7.642, "step": 175000 }, { "epoch": 1.1513588547594455, "grad_norm": 0.021961658075060023, "learning_rate": 4.552420185813851e-06, "loss": 0.0007, "step": 175010 }, { "epoch": 1.1514246429346788, "grad_norm": 0.028498536748963306, "learning_rate": 4.551848382625912e-06, "loss": 0.0005, "step": 175020 }, { "epoch": 1.1514904311099123, "grad_norm": 0.027601531691083814, "learning_rate": 4.551276585346465e-06, "loss": 0.001, "step": 175030 }, { "epoch": 1.1515562192851456, "grad_norm": 0.020862759714357075, "learning_rate": 4.550704793983049e-06, "loss": 0.0009, "step": 175040 }, { "epoch": 1.1516220074603791, "grad_norm": 0.030642839258392472, "learning_rate": 4.550133008543202e-06, "loss": 0.0003, "step": 175050 }, { "epoch": 1.1516877956356124, "grad_norm": 0.023995056539572106, "learning_rate": 4.5495612290344635e-06, "loss": 0.0011, "step": 175060 }, { "epoch": 1.151753583810846, "grad_norm": 0.043821453421344754, "learning_rate": 4.548989455464368e-06, "loss": 0.0009, "step": 175070 }, { "epoch": 1.1518193719860792, "grad_norm": 0.007037197943775509, "learning_rate": 4.54841768784046e-06, "loss": 0.0003, "step": 175080 }, { "epoch": 1.1518851601613127, "grad_norm": 0.004499305133700031, "learning_rate": 4.547845926170275e-06, "loss": 0.0004, "step": 175090 }, { "epoch": 1.151950948336546, "grad_norm": 0.07178504957013974, "learning_rate": 4.54727417046135e-06, "loss": 0.0012, "step": 175100 }, { "epoch": 1.1520167365117793, "grad_norm": 0.009503399338928168, "learning_rate": 4.546702420721224e-06, "loss": 0.0006, "step": 175110 }, { "epoch": 1.1520825246870128, "grad_norm": 0.0164141400624519, "learning_rate": 4.5461306769574355e-06, "loss": 0.0013, "step": 175120 }, { "epoch": 1.152148312862246, "grad_norm": 0.035664156913262544, "learning_rate": 4.5455589391775216e-06, "loss": 0.0007, "step": 175130 }, { "epoch": 1.1522141010374796, "grad_norm": 0.014320487860955246, "learning_rate": 4.54498720738902e-06, "loss": 0.0005, "step": 175140 }, { "epoch": 1.1522798892127128, "grad_norm": 0.02989073492633222, "learning_rate": 4.5444154815994696e-06, "loss": 0.0009, "step": 175150 }, { "epoch": 1.1523456773879464, "grad_norm": 0.027911892808001836, "learning_rate": 4.543843761816408e-06, "loss": 0.0007, "step": 175160 }, { "epoch": 1.1524114655631796, "grad_norm": 0.02147072122219159, "learning_rate": 4.5432720480473725e-06, "loss": 0.001, "step": 175170 }, { "epoch": 1.1524772537384131, "grad_norm": 0.12801483690255094, "learning_rate": 4.5427003402999e-06, "loss": 0.0008, "step": 175180 }, { "epoch": 1.1525430419136464, "grad_norm": 0.008705625120237214, "learning_rate": 4.54212863858153e-06, "loss": 0.0007, "step": 175190 }, { "epoch": 1.1526088300888797, "grad_norm": 0.033065770273506226, "learning_rate": 4.541556942899796e-06, "loss": 0.0007, "step": 175200 }, { "epoch": 1.1526746182641132, "grad_norm": 0.03584930864459483, "learning_rate": 4.540985253262239e-06, "loss": 0.0013, "step": 175210 }, { "epoch": 1.1527404064393465, "grad_norm": 0.07622746613566811, "learning_rate": 4.540413569676396e-06, "loss": 0.0009, "step": 175220 }, { "epoch": 1.15280619461458, "grad_norm": 0.02869995260372387, "learning_rate": 4.539841892149802e-06, "loss": 0.0008, "step": 175230 }, { "epoch": 1.1528719827898133, "grad_norm": 0.03247068876139676, "learning_rate": 4.5392702206899954e-06, "loss": 0.0004, "step": 175240 }, { "epoch": 1.1529377709650468, "grad_norm": 0.03401768635440372, "learning_rate": 4.538698555304514e-06, "loss": 0.0008, "step": 175250 }, { "epoch": 1.15300355914028, "grad_norm": 0.09554005578804035, "learning_rate": 4.538126896000893e-06, "loss": 0.0013, "step": 175260 }, { "epoch": 1.1530693473155136, "grad_norm": 0.07622173274672242, "learning_rate": 4.537555242786669e-06, "loss": 0.0004, "step": 175270 }, { "epoch": 1.1531351354907469, "grad_norm": 0.03610376409160884, "learning_rate": 4.53698359566938e-06, "loss": 0.0006, "step": 175280 }, { "epoch": 1.1532009236659804, "grad_norm": 0.0963066674934892, "learning_rate": 4.5364119546565634e-06, "loss": 0.0008, "step": 175290 }, { "epoch": 1.1532667118412137, "grad_norm": 0.02365239535399228, "learning_rate": 4.535840319755754e-06, "loss": 0.0008, "step": 175300 }, { "epoch": 1.1533325000164472, "grad_norm": 0.010329142245770891, "learning_rate": 4.53526869097449e-06, "loss": 0.0006, "step": 175310 }, { "epoch": 1.1533982881916804, "grad_norm": 0.10297656006741665, "learning_rate": 4.534697068320307e-06, "loss": 0.0011, "step": 175320 }, { "epoch": 1.1534640763669137, "grad_norm": 0.013130381199787856, "learning_rate": 4.53412545180074e-06, "loss": 0.0007, "step": 175330 }, { "epoch": 1.1535298645421472, "grad_norm": 0.004620887823273954, "learning_rate": 4.5335538414233274e-06, "loss": 0.0007, "step": 175340 }, { "epoch": 1.1535956527173805, "grad_norm": 0.010902862332615833, "learning_rate": 4.532982237195605e-06, "loss": 0.0003, "step": 175350 }, { "epoch": 1.153661440892614, "grad_norm": 0.06735061226874944, "learning_rate": 4.532410639125108e-06, "loss": 0.0006, "step": 175360 }, { "epoch": 1.1537272290678473, "grad_norm": 0.13634612713489172, "learning_rate": 4.5318390472193725e-06, "loss": 0.0011, "step": 175370 }, { "epoch": 1.1537930172430808, "grad_norm": 0.042704734200568036, "learning_rate": 4.531267461485935e-06, "loss": 0.0007, "step": 175380 }, { "epoch": 1.153858805418314, "grad_norm": 0.009825162529934936, "learning_rate": 4.530695881932331e-06, "loss": 0.0007, "step": 175390 }, { "epoch": 1.1539245935935476, "grad_norm": 0.07604689365168209, "learning_rate": 4.530124308566096e-06, "loss": 0.0005, "step": 175400 }, { "epoch": 1.1539903817687809, "grad_norm": 0.12364478775242518, "learning_rate": 4.5295527413947675e-06, "loss": 0.001, "step": 175410 }, { "epoch": 1.1540561699440142, "grad_norm": 0.024904069991809056, "learning_rate": 4.52898118042588e-06, "loss": 0.0003, "step": 175420 }, { "epoch": 1.1541219581192477, "grad_norm": 0.06221755988426858, "learning_rate": 4.528409625666968e-06, "loss": 0.0009, "step": 175430 }, { "epoch": 1.154187746294481, "grad_norm": 0.0713508533643825, "learning_rate": 4.5278380771255684e-06, "loss": 0.0017, "step": 175440 }, { "epoch": 1.1542535344697145, "grad_norm": 0.05965952957191919, "learning_rate": 4.527266534809215e-06, "loss": 0.0007, "step": 175450 }, { "epoch": 1.1543193226449477, "grad_norm": 0.04161909782514579, "learning_rate": 4.526694998725444e-06, "loss": 0.0009, "step": 175460 }, { "epoch": 1.1543851108201812, "grad_norm": 0.025752414128973448, "learning_rate": 4.5261234688817915e-06, "loss": 0.0008, "step": 175470 }, { "epoch": 1.1544508989954145, "grad_norm": 0.05228972052003887, "learning_rate": 4.5255519452857914e-06, "loss": 0.001, "step": 175480 }, { "epoch": 1.154516687170648, "grad_norm": 0.0006618671583643532, "learning_rate": 4.52498042794498e-06, "loss": 0.001, "step": 175490 }, { "epoch": 1.1545824753458813, "grad_norm": 0.0188230131062617, "learning_rate": 4.52440891686689e-06, "loss": 0.0008, "step": 175500 }, { "epoch": 1.1546482635211146, "grad_norm": 0.0293350079184499, "learning_rate": 4.523837412059059e-06, "loss": 0.0004, "step": 175510 }, { "epoch": 1.154714051696348, "grad_norm": 0.003797868876745051, "learning_rate": 4.523265913529019e-06, "loss": 0.0005, "step": 175520 }, { "epoch": 1.1547798398715814, "grad_norm": 0.0010926681534352276, "learning_rate": 4.522694421284307e-06, "loss": 0.0004, "step": 175530 }, { "epoch": 1.154845628046815, "grad_norm": 0.0792522957177542, "learning_rate": 4.522122935332457e-06, "loss": 0.0007, "step": 175540 }, { "epoch": 1.1549114162220482, "grad_norm": 0.09292141516622827, "learning_rate": 4.521551455681003e-06, "loss": 0.0013, "step": 175550 }, { "epoch": 1.1549772043972817, "grad_norm": 0.022702340361499294, "learning_rate": 4.520979982337481e-06, "loss": 0.0016, "step": 175560 }, { "epoch": 1.155042992572515, "grad_norm": 0.061664008204590816, "learning_rate": 4.520408515309423e-06, "loss": 0.0005, "step": 175570 }, { "epoch": 1.1551087807477485, "grad_norm": 0.0004119100718667166, "learning_rate": 4.519837054604365e-06, "loss": 0.0022, "step": 175580 }, { "epoch": 1.1551745689229818, "grad_norm": 0.09438943880824945, "learning_rate": 4.51926560022984e-06, "loss": 0.0012, "step": 175590 }, { "epoch": 1.155240357098215, "grad_norm": 0.04071271182833236, "learning_rate": 4.518694152193383e-06, "loss": 0.0003, "step": 175600 }, { "epoch": 1.1553061452734485, "grad_norm": 0.00401372565019911, "learning_rate": 4.518122710502529e-06, "loss": 0.0005, "step": 175610 }, { "epoch": 1.155371933448682, "grad_norm": 0.03640907987410015, "learning_rate": 4.51755127516481e-06, "loss": 0.0004, "step": 175620 }, { "epoch": 1.1554377216239153, "grad_norm": 0.01962033248614818, "learning_rate": 4.516979846187761e-06, "loss": 0.0004, "step": 175630 }, { "epoch": 1.1555035097991486, "grad_norm": 0.07977053334057234, "learning_rate": 4.516408423578916e-06, "loss": 0.0016, "step": 175640 }, { "epoch": 1.1555692979743821, "grad_norm": 0.002353455553804635, "learning_rate": 4.515837007345808e-06, "loss": 0.0008, "step": 175650 }, { "epoch": 1.1556350861496154, "grad_norm": 0.06058640841245432, "learning_rate": 4.515265597495969e-06, "loss": 0.0002, "step": 175660 }, { "epoch": 1.155700874324849, "grad_norm": 0.0145864427786611, "learning_rate": 4.5146941940369375e-06, "loss": 0.0005, "step": 175670 }, { "epoch": 1.1557666625000822, "grad_norm": 0.0344315858563966, "learning_rate": 4.5141227969762425e-06, "loss": 0.0006, "step": 175680 }, { "epoch": 1.1558324506753157, "grad_norm": 0.02407502728908434, "learning_rate": 4.513551406321419e-06, "loss": 0.0013, "step": 175690 }, { "epoch": 1.155898238850549, "grad_norm": 0.005701033884802566, "learning_rate": 4.51298002208e-06, "loss": 0.0003, "step": 175700 }, { "epoch": 1.1559640270257825, "grad_norm": 0.04676400465400145, "learning_rate": 4.51240864425952e-06, "loss": 0.0002, "step": 175710 }, { "epoch": 1.1560298152010158, "grad_norm": 0.04205972475596482, "learning_rate": 4.511837272867509e-06, "loss": 0.0008, "step": 175720 }, { "epoch": 1.156095603376249, "grad_norm": 0.05819926172307204, "learning_rate": 4.511265907911502e-06, "loss": 0.0012, "step": 175730 }, { "epoch": 1.1561613915514826, "grad_norm": 0.00844774282814952, "learning_rate": 4.5106945493990324e-06, "loss": 0.0005, "step": 175740 }, { "epoch": 1.1562271797267158, "grad_norm": 0.002343574018772973, "learning_rate": 4.510123197337633e-06, "loss": 0.0009, "step": 175750 }, { "epoch": 1.1562929679019494, "grad_norm": 0.015952779657704783, "learning_rate": 4.509551851734836e-06, "loss": 0.0006, "step": 175760 }, { "epoch": 1.1563587560771826, "grad_norm": 0.00796683876291376, "learning_rate": 4.508980512598175e-06, "loss": 0.0009, "step": 175770 }, { "epoch": 1.1564245442524161, "grad_norm": 0.025727692106173747, "learning_rate": 4.5084091799351805e-06, "loss": 0.0008, "step": 175780 }, { "epoch": 1.1564903324276494, "grad_norm": 0.07767009704965941, "learning_rate": 4.507837853753386e-06, "loss": 0.0019, "step": 175790 }, { "epoch": 1.156556120602883, "grad_norm": 0.03184501559128239, "learning_rate": 4.507266534060325e-06, "loss": 0.0008, "step": 175800 }, { "epoch": 1.1566219087781162, "grad_norm": 0.03652862677814813, "learning_rate": 4.50669522086353e-06, "loss": 0.0014, "step": 175810 }, { "epoch": 1.1566876969533495, "grad_norm": 0.007826242763944752, "learning_rate": 4.506123914170532e-06, "loss": 0.0008, "step": 175820 }, { "epoch": 1.156753485128583, "grad_norm": 0.053143039198721174, "learning_rate": 4.505552613988864e-06, "loss": 0.001, "step": 175830 }, { "epoch": 1.1568192733038163, "grad_norm": 0.054361882582908964, "learning_rate": 4.504981320326057e-06, "loss": 0.0008, "step": 175840 }, { "epoch": 1.1568850614790498, "grad_norm": 0.00815148989548414, "learning_rate": 4.504410033189645e-06, "loss": 0.0008, "step": 175850 }, { "epoch": 1.156950849654283, "grad_norm": 0.010882294324111176, "learning_rate": 4.503838752587156e-06, "loss": 0.0013, "step": 175860 }, { "epoch": 1.1570166378295166, "grad_norm": 0.015642229078141372, "learning_rate": 4.503267478526127e-06, "loss": 0.0006, "step": 175870 }, { "epoch": 1.1570824260047499, "grad_norm": 0.0722111011626153, "learning_rate": 4.502696211014086e-06, "loss": 0.001, "step": 175880 }, { "epoch": 1.1571482141799834, "grad_norm": 0.07654251432299003, "learning_rate": 4.502124950058567e-06, "loss": 0.001, "step": 175890 }, { "epoch": 1.1572140023552167, "grad_norm": 0.11281407301547061, "learning_rate": 4.5015536956671e-06, "loss": 0.0008, "step": 175900 }, { "epoch": 1.15727979053045, "grad_norm": 0.04074107989408573, "learning_rate": 4.500982447847218e-06, "loss": 0.0005, "step": 175910 }, { "epoch": 1.1573455787056834, "grad_norm": 0.09762583519964897, "learning_rate": 4.500411206606449e-06, "loss": 0.0005, "step": 175920 }, { "epoch": 1.157411366880917, "grad_norm": 0.007636833155349018, "learning_rate": 4.499839971952329e-06, "loss": 0.0007, "step": 175930 }, { "epoch": 1.1574771550561502, "grad_norm": 0.03815440365514323, "learning_rate": 4.4992687438923866e-06, "loss": 0.0007, "step": 175940 }, { "epoch": 1.1575429432313835, "grad_norm": 0.028370267668139562, "learning_rate": 4.498697522434153e-06, "loss": 0.0004, "step": 175950 }, { "epoch": 1.157608731406617, "grad_norm": 0.04881233345783772, "learning_rate": 4.49812630758516e-06, "loss": 0.0005, "step": 175960 }, { "epoch": 1.1576745195818503, "grad_norm": 0.3193451520873012, "learning_rate": 4.497555099352938e-06, "loss": 0.0014, "step": 175970 }, { "epoch": 1.1577403077570838, "grad_norm": 0.0046737241702024136, "learning_rate": 4.496983897745018e-06, "loss": 0.001, "step": 175980 }, { "epoch": 1.157806095932317, "grad_norm": 0.012682955659917198, "learning_rate": 4.49641270276893e-06, "loss": 0.0006, "step": 175990 }, { "epoch": 1.1578718841075506, "grad_norm": 0.03155230882132199, "learning_rate": 4.495841514432207e-06, "loss": 0.0007, "step": 176000 }, { "epoch": 1.1579376722827839, "grad_norm": 0.04720934276426375, "learning_rate": 4.495270332742377e-06, "loss": 0.0005, "step": 176010 }, { "epoch": 1.1580034604580174, "grad_norm": 0.003110528085422082, "learning_rate": 4.494699157706973e-06, "loss": 0.0008, "step": 176020 }, { "epoch": 1.1580692486332507, "grad_norm": 0.07654817297589579, "learning_rate": 4.494127989333524e-06, "loss": 0.0006, "step": 176030 }, { "epoch": 1.158135036808484, "grad_norm": 0.01131933499928008, "learning_rate": 4.49355682762956e-06, "loss": 0.0003, "step": 176040 }, { "epoch": 1.1582008249837175, "grad_norm": 0.030836059687459248, "learning_rate": 4.492985672602611e-06, "loss": 0.0015, "step": 176050 }, { "epoch": 1.1582666131589507, "grad_norm": 0.025145974756716618, "learning_rate": 4.492414524260209e-06, "loss": 0.0005, "step": 176060 }, { "epoch": 1.1583324013341842, "grad_norm": 0.10461779532545198, "learning_rate": 4.491843382609883e-06, "loss": 0.0006, "step": 176070 }, { "epoch": 1.1583981895094175, "grad_norm": 0.018472857126836897, "learning_rate": 4.491272247659164e-06, "loss": 0.0005, "step": 176080 }, { "epoch": 1.158463977684651, "grad_norm": 0.0019033346308653554, "learning_rate": 4.49070111941558e-06, "loss": 0.0006, "step": 176090 }, { "epoch": 1.1585297658598843, "grad_norm": 0.003739713789620014, "learning_rate": 4.490129997886663e-06, "loss": 0.0011, "step": 176100 }, { "epoch": 1.1585955540351178, "grad_norm": 0.0019227322931963525, "learning_rate": 4.489558883079941e-06, "loss": 0.0003, "step": 176110 }, { "epoch": 1.158661342210351, "grad_norm": 0.1439533215571628, "learning_rate": 4.488987775002943e-06, "loss": 0.0006, "step": 176120 }, { "epoch": 1.1587271303855844, "grad_norm": 0.025847782652373173, "learning_rate": 4.488416673663201e-06, "loss": 0.0007, "step": 176130 }, { "epoch": 1.158792918560818, "grad_norm": 0.015195306624141442, "learning_rate": 4.487845579068244e-06, "loss": 0.0007, "step": 176140 }, { "epoch": 1.1588587067360512, "grad_norm": 0.05722590721211179, "learning_rate": 4.4872744912256e-06, "loss": 0.0004, "step": 176150 }, { "epoch": 1.1589244949112847, "grad_norm": 0.04087667121754988, "learning_rate": 4.486703410142799e-06, "loss": 0.0012, "step": 176160 }, { "epoch": 1.158990283086518, "grad_norm": 0.003307919487883337, "learning_rate": 4.48613233582737e-06, "loss": 0.0013, "step": 176170 }, { "epoch": 1.1590560712617515, "grad_norm": 0.04937854648031914, "learning_rate": 4.485561268286841e-06, "loss": 0.0006, "step": 176180 }, { "epoch": 1.1591218594369848, "grad_norm": 0.011165352806306435, "learning_rate": 4.484990207528745e-06, "loss": 0.0006, "step": 176190 }, { "epoch": 1.1591876476122183, "grad_norm": 0.05367413164982835, "learning_rate": 4.484419153560607e-06, "loss": 0.0005, "step": 176200 }, { "epoch": 1.1592534357874515, "grad_norm": 0.03366492331040592, "learning_rate": 4.483848106389958e-06, "loss": 0.0006, "step": 176210 }, { "epoch": 1.1593192239626848, "grad_norm": 0.03235359852378897, "learning_rate": 4.4832770660243255e-06, "loss": 0.0013, "step": 176220 }, { "epoch": 1.1593850121379183, "grad_norm": 0.00022706689218260175, "learning_rate": 4.4827060324712385e-06, "loss": 0.001, "step": 176230 }, { "epoch": 1.1594508003131516, "grad_norm": 0.14659716424413005, "learning_rate": 4.482135005738226e-06, "loss": 0.0006, "step": 176240 }, { "epoch": 1.1595165884883851, "grad_norm": 0.02552441205984271, "learning_rate": 4.481563985832815e-06, "loss": 0.0004, "step": 176250 }, { "epoch": 1.1595823766636184, "grad_norm": 0.004771672652022835, "learning_rate": 4.480992972762536e-06, "loss": 0.0005, "step": 176260 }, { "epoch": 1.159648164838852, "grad_norm": 0.035152903153227985, "learning_rate": 4.480421966534917e-06, "loss": 0.0005, "step": 176270 }, { "epoch": 1.1597139530140852, "grad_norm": 0.0014569538677916092, "learning_rate": 4.479850967157484e-06, "loss": 0.0007, "step": 176280 }, { "epoch": 1.1597797411893187, "grad_norm": 0.029311579374403474, "learning_rate": 4.479279974637769e-06, "loss": 0.0008, "step": 176290 }, { "epoch": 1.159845529364552, "grad_norm": 0.07564169697757611, "learning_rate": 4.478708988983297e-06, "loss": 0.0006, "step": 176300 }, { "epoch": 1.1599113175397855, "grad_norm": 0.06900999940677943, "learning_rate": 4.478138010201595e-06, "loss": 0.0005, "step": 176310 }, { "epoch": 1.1599771057150188, "grad_norm": 0.005247320855646579, "learning_rate": 4.477567038300194e-06, "loss": 0.0007, "step": 176320 }, { "epoch": 1.1600428938902523, "grad_norm": 0.012455449964838468, "learning_rate": 4.476996073286621e-06, "loss": 0.001, "step": 176330 }, { "epoch": 1.1601086820654856, "grad_norm": 0.014236572021482336, "learning_rate": 4.476425115168403e-06, "loss": 0.001, "step": 176340 }, { "epoch": 1.1601744702407188, "grad_norm": 0.03234372894715068, "learning_rate": 4.475854163953067e-06, "loss": 0.0007, "step": 176350 }, { "epoch": 1.1602402584159524, "grad_norm": 0.05045089861155941, "learning_rate": 4.475283219648141e-06, "loss": 0.0008, "step": 176360 }, { "epoch": 1.1603060465911856, "grad_norm": 0.026846792749796008, "learning_rate": 4.474712282261153e-06, "loss": 0.0018, "step": 176370 }, { "epoch": 1.1603718347664191, "grad_norm": 0.03746837994848576, "learning_rate": 4.4741413517996294e-06, "loss": 0.0015, "step": 176380 }, { "epoch": 1.1604376229416524, "grad_norm": 0.05820542256483548, "learning_rate": 4.473570428271098e-06, "loss": 0.0007, "step": 176390 }, { "epoch": 1.160503411116886, "grad_norm": 0.06764843653185808, "learning_rate": 4.472999511683086e-06, "loss": 0.0012, "step": 176400 }, { "epoch": 1.1605691992921192, "grad_norm": 0.07679829370661613, "learning_rate": 4.472428602043121e-06, "loss": 0.0009, "step": 176410 }, { "epoch": 1.1606349874673527, "grad_norm": 0.06941495580451383, "learning_rate": 4.471857699358729e-06, "loss": 0.0008, "step": 176420 }, { "epoch": 1.160700775642586, "grad_norm": 0.1519485515690672, "learning_rate": 4.471286803637437e-06, "loss": 0.0007, "step": 176430 }, { "epoch": 1.1607665638178193, "grad_norm": 0.01451242107854154, "learning_rate": 4.470715914886771e-06, "loss": 0.0002, "step": 176440 }, { "epoch": 1.1608323519930528, "grad_norm": 0.06325013091273635, "learning_rate": 4.470145033114259e-06, "loss": 0.0004, "step": 176450 }, { "epoch": 1.160898140168286, "grad_norm": 0.018605682572493547, "learning_rate": 4.469574158327427e-06, "loss": 0.0009, "step": 176460 }, { "epoch": 1.1609639283435196, "grad_norm": 0.02373279079420374, "learning_rate": 4.4690032905338025e-06, "loss": 0.0005, "step": 176470 }, { "epoch": 1.1610297165187529, "grad_norm": 0.0022369728637709196, "learning_rate": 4.468432429740911e-06, "loss": 0.0006, "step": 176480 }, { "epoch": 1.1610955046939864, "grad_norm": 0.0072086779287303055, "learning_rate": 4.467861575956277e-06, "loss": 0.0006, "step": 176490 }, { "epoch": 1.1611612928692197, "grad_norm": 0.006117768292769377, "learning_rate": 4.467290729187431e-06, "loss": 0.0002, "step": 176500 }, { "epoch": 1.1612270810444532, "grad_norm": 0.012516151222459094, "learning_rate": 4.466719889441894e-06, "loss": 0.0009, "step": 176510 }, { "epoch": 1.1612928692196864, "grad_norm": 0.015385916626976965, "learning_rate": 4.466149056727197e-06, "loss": 0.0013, "step": 176520 }, { "epoch": 1.1613586573949197, "grad_norm": 0.028230423575975397, "learning_rate": 4.465578231050863e-06, "loss": 0.0006, "step": 176530 }, { "epoch": 1.1614244455701532, "grad_norm": 0.1233218932087062, "learning_rate": 4.465007412420418e-06, "loss": 0.0019, "step": 176540 }, { "epoch": 1.1614902337453865, "grad_norm": 0.0003405970957906097, "learning_rate": 4.464436600843389e-06, "loss": 0.0011, "step": 176550 }, { "epoch": 1.16155602192062, "grad_norm": 0.0019795899306999753, "learning_rate": 4.4638657963273e-06, "loss": 0.0005, "step": 176560 }, { "epoch": 1.1616218100958533, "grad_norm": 0.03906299770177264, "learning_rate": 4.463294998879677e-06, "loss": 0.0006, "step": 176570 }, { "epoch": 1.1616875982710868, "grad_norm": 0.021972240410757918, "learning_rate": 4.462724208508049e-06, "loss": 0.0007, "step": 176580 }, { "epoch": 1.16175338644632, "grad_norm": 0.05167223891542068, "learning_rate": 4.4621534252199365e-06, "loss": 0.0019, "step": 176590 }, { "epoch": 1.1618191746215536, "grad_norm": 0.01865774098800016, "learning_rate": 4.4615826490228655e-06, "loss": 0.0006, "step": 176600 }, { "epoch": 1.1618849627967869, "grad_norm": 0.03240088380513882, "learning_rate": 4.461011879924362e-06, "loss": 0.001, "step": 176610 }, { "epoch": 1.1619507509720204, "grad_norm": 0.010571334785187292, "learning_rate": 4.460441117931953e-06, "loss": 0.0009, "step": 176620 }, { "epoch": 1.1620165391472537, "grad_norm": 0.09797328952841453, "learning_rate": 4.45987036305316e-06, "loss": 0.0006, "step": 176630 }, { "epoch": 1.1620823273224872, "grad_norm": 0.006287807990867442, "learning_rate": 4.459299615295512e-06, "loss": 0.0009, "step": 176640 }, { "epoch": 1.1621481154977205, "grad_norm": 0.03659502454160868, "learning_rate": 4.45872887466653e-06, "loss": 0.0005, "step": 176650 }, { "epoch": 1.1622139036729537, "grad_norm": 0.1318775798410661, "learning_rate": 4.458158141173741e-06, "loss": 0.0009, "step": 176660 }, { "epoch": 1.1622796918481872, "grad_norm": 0.02456938080769621, "learning_rate": 4.457587414824668e-06, "loss": 0.0008, "step": 176670 }, { "epoch": 1.1623454800234205, "grad_norm": 0.026639284771034638, "learning_rate": 4.4570166956268365e-06, "loss": 0.0009, "step": 176680 }, { "epoch": 1.162411268198654, "grad_norm": 0.03930351057492614, "learning_rate": 4.456445983587772e-06, "loss": 0.0008, "step": 176690 }, { "epoch": 1.1624770563738873, "grad_norm": 0.022065983180716196, "learning_rate": 4.455875278714998e-06, "loss": 0.0005, "step": 176700 }, { "epoch": 1.1625428445491208, "grad_norm": 0.014641430619800767, "learning_rate": 4.455304581016038e-06, "loss": 0.0009, "step": 176710 }, { "epoch": 1.162608632724354, "grad_norm": 0.04603361539376091, "learning_rate": 4.454733890498416e-06, "loss": 0.0002, "step": 176720 }, { "epoch": 1.1626744208995876, "grad_norm": 0.011032881052830502, "learning_rate": 4.454163207169658e-06, "loss": 0.0008, "step": 176730 }, { "epoch": 1.162740209074821, "grad_norm": 0.018297822109529696, "learning_rate": 4.453592531037285e-06, "loss": 0.0006, "step": 176740 }, { "epoch": 1.1628059972500542, "grad_norm": 0.0033532881993769436, "learning_rate": 4.453021862108824e-06, "loss": 0.0005, "step": 176750 }, { "epoch": 1.1628717854252877, "grad_norm": 0.255188660615803, "learning_rate": 4.452451200391796e-06, "loss": 0.001, "step": 176760 }, { "epoch": 1.162937573600521, "grad_norm": 0.2900969453162855, "learning_rate": 4.451880545893728e-06, "loss": 0.0017, "step": 176770 }, { "epoch": 1.1630033617757545, "grad_norm": 0.03672075906132965, "learning_rate": 4.45130989862214e-06, "loss": 0.0008, "step": 176780 }, { "epoch": 1.1630691499509878, "grad_norm": 0.0008006919473876115, "learning_rate": 4.450739258584558e-06, "loss": 0.0005, "step": 176790 }, { "epoch": 1.1631349381262213, "grad_norm": 0.01849468335760964, "learning_rate": 4.450168625788503e-06, "loss": 0.0007, "step": 176800 }, { "epoch": 1.1632007263014545, "grad_norm": 0.02666183326791526, "learning_rate": 4.449598000241501e-06, "loss": 0.0008, "step": 176810 }, { "epoch": 1.163266514476688, "grad_norm": 0.02776052877737026, "learning_rate": 4.449027381951074e-06, "loss": 0.0007, "step": 176820 }, { "epoch": 1.1633323026519213, "grad_norm": 0.03748444642056255, "learning_rate": 4.448456770924745e-06, "loss": 0.001, "step": 176830 }, { "epoch": 1.1633980908271546, "grad_norm": 0.0272379309410838, "learning_rate": 4.447886167170038e-06, "loss": 0.0008, "step": 176840 }, { "epoch": 1.1634638790023881, "grad_norm": 0.08584639168639205, "learning_rate": 4.447315570694474e-06, "loss": 0.0005, "step": 176850 }, { "epoch": 1.1635296671776214, "grad_norm": 0.07057486799024144, "learning_rate": 4.446744981505578e-06, "loss": 0.0011, "step": 176860 }, { "epoch": 1.163595455352855, "grad_norm": 0.012194486734104533, "learning_rate": 4.446174399610869e-06, "loss": 0.0015, "step": 176870 }, { "epoch": 1.1636612435280882, "grad_norm": 0.014535707867527125, "learning_rate": 4.445603825017874e-06, "loss": 0.0011, "step": 176880 }, { "epoch": 1.1637270317033217, "grad_norm": 0.01980792608383422, "learning_rate": 4.445033257734114e-06, "loss": 0.0013, "step": 176890 }, { "epoch": 1.163792819878555, "grad_norm": 0.035641392482211265, "learning_rate": 4.44446269776711e-06, "loss": 0.0008, "step": 176900 }, { "epoch": 1.1638586080537885, "grad_norm": 0.008056468539479023, "learning_rate": 4.443892145124387e-06, "loss": 0.0008, "step": 176910 }, { "epoch": 1.1639243962290218, "grad_norm": 0.01745225561554214, "learning_rate": 4.443321599813466e-06, "loss": 0.0007, "step": 176920 }, { "epoch": 1.163990184404255, "grad_norm": 0.028791755253778323, "learning_rate": 4.442751061841867e-06, "loss": 0.0004, "step": 176930 }, { "epoch": 1.1640559725794886, "grad_norm": 0.007498491533700522, "learning_rate": 4.442180531217115e-06, "loss": 0.0005, "step": 176940 }, { "epoch": 1.164121760754722, "grad_norm": 0.01879726094470703, "learning_rate": 4.441610007946731e-06, "loss": 0.0006, "step": 176950 }, { "epoch": 1.1641875489299554, "grad_norm": 0.07228552075999656, "learning_rate": 4.441039492038237e-06, "loss": 0.0011, "step": 176960 }, { "epoch": 1.1642533371051886, "grad_norm": 0.03110476987817068, "learning_rate": 4.440468983499155e-06, "loss": 0.0013, "step": 176970 }, { "epoch": 1.1643191252804221, "grad_norm": 0.049660711335065236, "learning_rate": 4.4398984823370054e-06, "loss": 0.0006, "step": 176980 }, { "epoch": 1.1643849134556554, "grad_norm": 0.07087351172682294, "learning_rate": 4.439327988559311e-06, "loss": 0.0006, "step": 176990 }, { "epoch": 1.164450701630889, "grad_norm": 0.0003109167847798076, "learning_rate": 4.438757502173592e-06, "loss": 0.0003, "step": 177000 }, { "epoch": 1.1645164898061222, "grad_norm": 0.07182276737996325, "learning_rate": 4.438187023187371e-06, "loss": 0.001, "step": 177010 }, { "epoch": 1.1645822779813557, "grad_norm": 0.0006709751197823781, "learning_rate": 4.437616551608169e-06, "loss": 0.0008, "step": 177020 }, { "epoch": 1.164648066156589, "grad_norm": 0.0008419423695913013, "learning_rate": 4.437046087443507e-06, "loss": 0.0008, "step": 177030 }, { "epoch": 1.1647138543318225, "grad_norm": 0.031010027775820685, "learning_rate": 4.436475630700907e-06, "loss": 0.001, "step": 177040 }, { "epoch": 1.1647796425070558, "grad_norm": 0.019683384578479842, "learning_rate": 4.435905181387889e-06, "loss": 0.0007, "step": 177050 }, { "epoch": 1.164845430682289, "grad_norm": 0.0016987993456847072, "learning_rate": 4.435334739511973e-06, "loss": 0.0006, "step": 177060 }, { "epoch": 1.1649112188575226, "grad_norm": 0.0533300449523476, "learning_rate": 4.434764305080681e-06, "loss": 0.0005, "step": 177070 }, { "epoch": 1.1649770070327559, "grad_norm": 0.05534205805087116, "learning_rate": 4.434193878101533e-06, "loss": 0.0008, "step": 177080 }, { "epoch": 1.1650427952079894, "grad_norm": 0.002405141867480592, "learning_rate": 4.433623458582051e-06, "loss": 0.0007, "step": 177090 }, { "epoch": 1.1651085833832227, "grad_norm": 0.0016600328629178542, "learning_rate": 4.433053046529755e-06, "loss": 0.0009, "step": 177100 }, { "epoch": 1.1651743715584562, "grad_norm": 0.02088410103319447, "learning_rate": 4.432482641952164e-06, "loss": 0.0006, "step": 177110 }, { "epoch": 1.1652401597336894, "grad_norm": 0.011918467747411525, "learning_rate": 4.431912244856799e-06, "loss": 0.0006, "step": 177120 }, { "epoch": 1.165305947908923, "grad_norm": 0.12418561840868192, "learning_rate": 4.43134185525118e-06, "loss": 0.0013, "step": 177130 }, { "epoch": 1.1653717360841562, "grad_norm": 0.0009107355306482337, "learning_rate": 4.4307714731428286e-06, "loss": 0.0003, "step": 177140 }, { "epoch": 1.1654375242593895, "grad_norm": 0.12873619543536505, "learning_rate": 4.4302010985392634e-06, "loss": 0.0016, "step": 177150 }, { "epoch": 1.165503312434623, "grad_norm": 0.13603561326783184, "learning_rate": 4.4296307314480056e-06, "loss": 0.0009, "step": 177160 }, { "epoch": 1.1655691006098563, "grad_norm": 0.03225837377540889, "learning_rate": 4.429060371876573e-06, "loss": 0.001, "step": 177170 }, { "epoch": 1.1656348887850898, "grad_norm": 0.062024394639823205, "learning_rate": 4.428490019832486e-06, "loss": 0.0015, "step": 177180 }, { "epoch": 1.165700676960323, "grad_norm": 0.03508132173239518, "learning_rate": 4.427919675323264e-06, "loss": 0.0007, "step": 177190 }, { "epoch": 1.1657664651355566, "grad_norm": 0.004480328921606303, "learning_rate": 4.427349338356428e-06, "loss": 0.0006, "step": 177200 }, { "epoch": 1.1658322533107899, "grad_norm": 0.06789019623671622, "learning_rate": 4.4267790089394965e-06, "loss": 0.001, "step": 177210 }, { "epoch": 1.1658980414860234, "grad_norm": 0.005265468359980518, "learning_rate": 4.426208687079989e-06, "loss": 0.0011, "step": 177220 }, { "epoch": 1.1659638296612567, "grad_norm": 0.004469285196374537, "learning_rate": 4.4256383727854235e-06, "loss": 0.0009, "step": 177230 }, { "epoch": 1.16602961783649, "grad_norm": 0.018379707056886022, "learning_rate": 4.42506806606332e-06, "loss": 0.0014, "step": 177240 }, { "epoch": 1.1660954060117235, "grad_norm": 0.05575784063128518, "learning_rate": 4.424497766921199e-06, "loss": 0.0012, "step": 177250 }, { "epoch": 1.1661611941869567, "grad_norm": 0.04383565941407114, "learning_rate": 4.4239274753665755e-06, "loss": 0.0018, "step": 177260 }, { "epoch": 1.1662269823621902, "grad_norm": 0.06185875189797863, "learning_rate": 4.423357191406972e-06, "loss": 0.0016, "step": 177270 }, { "epoch": 1.1662927705374235, "grad_norm": 0.02334477307977984, "learning_rate": 4.422786915049907e-06, "loss": 0.0008, "step": 177280 }, { "epoch": 1.166358558712657, "grad_norm": 0.03558917314617569, "learning_rate": 4.422216646302897e-06, "loss": 0.0006, "step": 177290 }, { "epoch": 1.1664243468878903, "grad_norm": 0.019385922813437814, "learning_rate": 4.421646385173461e-06, "loss": 0.0008, "step": 177300 }, { "epoch": 1.1664901350631238, "grad_norm": 0.053980791489023267, "learning_rate": 4.421076131669119e-06, "loss": 0.0006, "step": 177310 }, { "epoch": 1.166555923238357, "grad_norm": 0.0050218109699310415, "learning_rate": 4.420505885797386e-06, "loss": 0.0008, "step": 177320 }, { "epoch": 1.1666217114135906, "grad_norm": 0.019426195288748047, "learning_rate": 4.419935647565784e-06, "loss": 0.0009, "step": 177330 }, { "epoch": 1.166687499588824, "grad_norm": 0.029257407891519097, "learning_rate": 4.419365416981829e-06, "loss": 0.0002, "step": 177340 }, { "epoch": 1.1667532877640574, "grad_norm": 0.007440360396202246, "learning_rate": 4.418795194053041e-06, "loss": 0.0016, "step": 177350 }, { "epoch": 1.1668190759392907, "grad_norm": 0.03990669752547248, "learning_rate": 4.418224978786935e-06, "loss": 0.0006, "step": 177360 }, { "epoch": 1.166884864114524, "grad_norm": 0.011830139637036668, "learning_rate": 4.417654771191031e-06, "loss": 0.0004, "step": 177370 }, { "epoch": 1.1669506522897575, "grad_norm": 0.10186711765194569, "learning_rate": 4.417084571272846e-06, "loss": 0.0014, "step": 177380 }, { "epoch": 1.1670164404649908, "grad_norm": 0.045004306060240265, "learning_rate": 4.416514379039896e-06, "loss": 0.001, "step": 177390 }, { "epoch": 1.1670822286402243, "grad_norm": 0.08900705919239553, "learning_rate": 4.415944194499701e-06, "loss": 0.0009, "step": 177400 }, { "epoch": 1.1671480168154575, "grad_norm": 0.044064173991149286, "learning_rate": 4.415374017659777e-06, "loss": 0.001, "step": 177410 }, { "epoch": 1.167213804990691, "grad_norm": 0.0806767794024136, "learning_rate": 4.414803848527642e-06, "loss": 0.0008, "step": 177420 }, { "epoch": 1.1672795931659243, "grad_norm": 0.02100806130734973, "learning_rate": 4.414233687110813e-06, "loss": 0.0003, "step": 177430 }, { "epoch": 1.1673453813411578, "grad_norm": 0.03415346824550438, "learning_rate": 4.4136635334168075e-06, "loss": 0.0004, "step": 177440 }, { "epoch": 1.1674111695163911, "grad_norm": 0.0010399188467452001, "learning_rate": 4.41309338745314e-06, "loss": 0.0006, "step": 177450 }, { "epoch": 1.1674769576916244, "grad_norm": 0.027372071602702722, "learning_rate": 4.4125232492273306e-06, "loss": 0.0004, "step": 177460 }, { "epoch": 1.167542745866858, "grad_norm": 0.12896822381704012, "learning_rate": 4.411953118746895e-06, "loss": 0.0011, "step": 177470 }, { "epoch": 1.1676085340420912, "grad_norm": 0.0050508486824861, "learning_rate": 4.411382996019349e-06, "loss": 0.0006, "step": 177480 }, { "epoch": 1.1676743222173247, "grad_norm": 0.03264564522735486, "learning_rate": 4.410812881052211e-06, "loss": 0.0012, "step": 177490 }, { "epoch": 1.167740110392558, "grad_norm": 0.032219594686001024, "learning_rate": 4.410242773852995e-06, "loss": 0.0005, "step": 177500 }, { "epoch": 1.1678058985677915, "grad_norm": 0.04982124114831125, "learning_rate": 4.40967267442922e-06, "loss": 0.0004, "step": 177510 }, { "epoch": 1.1678716867430248, "grad_norm": 0.007635842759193574, "learning_rate": 4.409102582788399e-06, "loss": 0.0003, "step": 177520 }, { "epoch": 1.1679374749182583, "grad_norm": 0.004537882198629023, "learning_rate": 4.408532498938051e-06, "loss": 0.0014, "step": 177530 }, { "epoch": 1.1680032630934916, "grad_norm": 0.0807367659849296, "learning_rate": 4.407962422885692e-06, "loss": 0.0019, "step": 177540 }, { "epoch": 1.1680690512687248, "grad_norm": 0.01901497376227288, "learning_rate": 4.407392354638836e-06, "loss": 0.0007, "step": 177550 }, { "epoch": 1.1681348394439584, "grad_norm": 0.03092649023497017, "learning_rate": 4.406822294205001e-06, "loss": 0.0006, "step": 177560 }, { "epoch": 1.1682006276191916, "grad_norm": 0.015015499103962398, "learning_rate": 4.406252241591701e-06, "loss": 0.0017, "step": 177570 }, { "epoch": 1.1682664157944251, "grad_norm": 0.025160738203311737, "learning_rate": 4.405682196806453e-06, "loss": 0.001, "step": 177580 }, { "epoch": 1.1683322039696584, "grad_norm": 0.011676247716360786, "learning_rate": 4.405112159856771e-06, "loss": 0.0004, "step": 177590 }, { "epoch": 1.168397992144892, "grad_norm": 0.004105348168974209, "learning_rate": 4.404542130750171e-06, "loss": 0.0014, "step": 177600 }, { "epoch": 1.1684637803201252, "grad_norm": 0.004298817164693752, "learning_rate": 4.403972109494171e-06, "loss": 0.0004, "step": 177610 }, { "epoch": 1.1685295684953587, "grad_norm": 0.01620316436824204, "learning_rate": 4.403402096096282e-06, "loss": 0.0006, "step": 177620 }, { "epoch": 1.168595356670592, "grad_norm": 0.05998339879466437, "learning_rate": 4.402832090564023e-06, "loss": 0.0018, "step": 177630 }, { "epoch": 1.1686611448458255, "grad_norm": 0.12946024525485633, "learning_rate": 4.402262092904906e-06, "loss": 0.0013, "step": 177640 }, { "epoch": 1.1687269330210588, "grad_norm": 0.04773895231537418, "learning_rate": 4.401692103126446e-06, "loss": 0.0007, "step": 177650 }, { "epoch": 1.1687927211962923, "grad_norm": 0.03377251454968908, "learning_rate": 4.40112212123616e-06, "loss": 0.0006, "step": 177660 }, { "epoch": 1.1688585093715256, "grad_norm": 0.042429508141533234, "learning_rate": 4.400552147241562e-06, "loss": 0.0006, "step": 177670 }, { "epoch": 1.1689242975467589, "grad_norm": 0.002446708266095827, "learning_rate": 4.399982181150165e-06, "loss": 0.0006, "step": 177680 }, { "epoch": 1.1689900857219924, "grad_norm": 0.00872298469791729, "learning_rate": 4.399412222969486e-06, "loss": 0.0024, "step": 177690 }, { "epoch": 1.1690558738972257, "grad_norm": 0.008433666636394327, "learning_rate": 4.398842272707038e-06, "loss": 0.0004, "step": 177700 }, { "epoch": 1.1691216620724592, "grad_norm": 0.02576914102762433, "learning_rate": 4.398272330370334e-06, "loss": 0.0006, "step": 177710 }, { "epoch": 1.1691874502476924, "grad_norm": 0.039458869742153686, "learning_rate": 4.39770239596689e-06, "loss": 0.0005, "step": 177720 }, { "epoch": 1.169253238422926, "grad_norm": 0.002969683914731612, "learning_rate": 4.3971324695042215e-06, "loss": 0.0015, "step": 177730 }, { "epoch": 1.1693190265981592, "grad_norm": 0.11569301408188924, "learning_rate": 4.39656255098984e-06, "loss": 0.0008, "step": 177740 }, { "epoch": 1.1693848147733927, "grad_norm": 0.0354722605175381, "learning_rate": 4.3959926404312595e-06, "loss": 0.0004, "step": 177750 }, { "epoch": 1.169450602948626, "grad_norm": 0.04552015414024517, "learning_rate": 4.3954227378359945e-06, "loss": 0.0007, "step": 177760 }, { "epoch": 1.1695163911238593, "grad_norm": 0.0010520467745630698, "learning_rate": 4.394852843211559e-06, "loss": 0.0004, "step": 177770 }, { "epoch": 1.1695821792990928, "grad_norm": 0.03965745677468679, "learning_rate": 4.394282956565465e-06, "loss": 0.0008, "step": 177780 }, { "epoch": 1.169647967474326, "grad_norm": 0.013111836011601765, "learning_rate": 4.393713077905228e-06, "loss": 0.0009, "step": 177790 }, { "epoch": 1.1697137556495596, "grad_norm": 0.10931202887850917, "learning_rate": 4.39314320723836e-06, "loss": 0.0013, "step": 177800 }, { "epoch": 1.1697795438247929, "grad_norm": 0.05126063023391021, "learning_rate": 4.392573344572376e-06, "loss": 0.0006, "step": 177810 }, { "epoch": 1.1698453320000264, "grad_norm": 0.009451678960456456, "learning_rate": 4.392003489914787e-06, "loss": 0.0006, "step": 177820 }, { "epoch": 1.1699111201752597, "grad_norm": 0.04223614087165283, "learning_rate": 4.391433643273107e-06, "loss": 0.0005, "step": 177830 }, { "epoch": 1.1699769083504932, "grad_norm": 0.0027565780387283703, "learning_rate": 4.390863804654847e-06, "loss": 0.0009, "step": 177840 }, { "epoch": 1.1700426965257265, "grad_norm": 0.01704614151132488, "learning_rate": 4.3902939740675234e-06, "loss": 0.0007, "step": 177850 }, { "epoch": 1.1701084847009597, "grad_norm": 0.011181466550028545, "learning_rate": 4.389724151518647e-06, "loss": 0.0011, "step": 177860 }, { "epoch": 1.1701742728761932, "grad_norm": 0.10040677534677371, "learning_rate": 4.389154337015731e-06, "loss": 0.0008, "step": 177870 }, { "epoch": 1.1702400610514265, "grad_norm": 0.03375927233157717, "learning_rate": 4.3885845305662875e-06, "loss": 0.0011, "step": 177880 }, { "epoch": 1.17030584922666, "grad_norm": 0.06621557595176891, "learning_rate": 4.3880147321778285e-06, "loss": 0.0009, "step": 177890 }, { "epoch": 1.1703716374018933, "grad_norm": 0.02864320790439273, "learning_rate": 4.3874449418578665e-06, "loss": 0.001, "step": 177900 }, { "epoch": 1.1704374255771268, "grad_norm": 0.13978039435324727, "learning_rate": 4.386875159613914e-06, "loss": 0.0008, "step": 177910 }, { "epoch": 1.17050321375236, "grad_norm": 0.01983564916177381, "learning_rate": 4.386305385453483e-06, "loss": 0.0005, "step": 177920 }, { "epoch": 1.1705690019275936, "grad_norm": 0.010965228328183639, "learning_rate": 4.385735619384085e-06, "loss": 0.0008, "step": 177930 }, { "epoch": 1.170634790102827, "grad_norm": 0.10338090072607131, "learning_rate": 4.385165861413234e-06, "loss": 0.001, "step": 177940 }, { "epoch": 1.1707005782780602, "grad_norm": 0.5636674814960966, "learning_rate": 4.384596111548439e-06, "loss": 0.0009, "step": 177950 }, { "epoch": 1.1707663664532937, "grad_norm": 0.14598147681594867, "learning_rate": 4.384026369797213e-06, "loss": 0.001, "step": 177960 }, { "epoch": 1.1708321546285272, "grad_norm": 0.011733181097061856, "learning_rate": 4.383456636167066e-06, "loss": 0.0006, "step": 177970 }, { "epoch": 1.1708979428037605, "grad_norm": 0.02189349058075854, "learning_rate": 4.382886910665512e-06, "loss": 0.0007, "step": 177980 }, { "epoch": 1.1709637309789938, "grad_norm": 0.0031851400606646213, "learning_rate": 4.382317193300061e-06, "loss": 0.0008, "step": 177990 }, { "epoch": 1.1710295191542273, "grad_norm": 0.055490525531180106, "learning_rate": 4.381747484078225e-06, "loss": 0.0006, "step": 178000 }, { "epoch": 1.1710953073294605, "grad_norm": 0.013106952779698054, "learning_rate": 4.381177783007514e-06, "loss": 0.0011, "step": 178010 }, { "epoch": 1.171161095504694, "grad_norm": 0.018747873044279714, "learning_rate": 4.38060809009544e-06, "loss": 0.0006, "step": 178020 }, { "epoch": 1.1712268836799273, "grad_norm": 0.018732488317585904, "learning_rate": 4.380038405349512e-06, "loss": 0.0005, "step": 178030 }, { "epoch": 1.1712926718551608, "grad_norm": 0.088841947865066, "learning_rate": 4.379468728777242e-06, "loss": 0.0006, "step": 178040 }, { "epoch": 1.1713584600303941, "grad_norm": 0.028835785412104117, "learning_rate": 4.378899060386142e-06, "loss": 0.0006, "step": 178050 }, { "epoch": 1.1714242482056276, "grad_norm": 0.012827772302025791, "learning_rate": 4.3783294001837214e-06, "loss": 0.0011, "step": 178060 }, { "epoch": 1.171490036380861, "grad_norm": 0.08833320024041705, "learning_rate": 4.377759748177491e-06, "loss": 0.0007, "step": 178070 }, { "epoch": 1.1715558245560942, "grad_norm": 0.0656458782831927, "learning_rate": 4.37719010437496e-06, "loss": 0.0009, "step": 178080 }, { "epoch": 1.1716216127313277, "grad_norm": 0.000825307897464796, "learning_rate": 4.376620468783641e-06, "loss": 0.0008, "step": 178090 }, { "epoch": 1.171687400906561, "grad_norm": 0.0023478485364224542, "learning_rate": 4.376050841411041e-06, "loss": 0.0006, "step": 178100 }, { "epoch": 1.1717531890817945, "grad_norm": 0.011185027068742241, "learning_rate": 4.375481222264673e-06, "loss": 0.0015, "step": 178110 }, { "epoch": 1.1718189772570278, "grad_norm": 0.050759216281403056, "learning_rate": 4.374911611352045e-06, "loss": 0.0013, "step": 178120 }, { "epoch": 1.1718847654322613, "grad_norm": 0.030138803484843112, "learning_rate": 4.374342008680668e-06, "loss": 0.0005, "step": 178130 }, { "epoch": 1.1719505536074946, "grad_norm": 0.025628992982143717, "learning_rate": 4.373772414258052e-06, "loss": 0.0003, "step": 178140 }, { "epoch": 1.172016341782728, "grad_norm": 0.007779626925094629, "learning_rate": 4.3732028280917044e-06, "loss": 0.0009, "step": 178150 }, { "epoch": 1.1720821299579613, "grad_norm": 0.08756192296792349, "learning_rate": 4.372633250189137e-06, "loss": 0.0007, "step": 178160 }, { "epoch": 1.1721479181331946, "grad_norm": 0.015742800930048893, "learning_rate": 4.372063680557857e-06, "loss": 0.0003, "step": 178170 }, { "epoch": 1.1722137063084281, "grad_norm": 0.027960118954346542, "learning_rate": 4.371494119205376e-06, "loss": 0.001, "step": 178180 }, { "epoch": 1.1722794944836614, "grad_norm": 0.010681552856007574, "learning_rate": 4.370924566139203e-06, "loss": 0.0007, "step": 178190 }, { "epoch": 1.172345282658895, "grad_norm": 0.0005371308596645156, "learning_rate": 4.3703550213668465e-06, "loss": 0.0012, "step": 178200 }, { "epoch": 1.1724110708341282, "grad_norm": 0.368513228249796, "learning_rate": 4.369785484895814e-06, "loss": 0.0012, "step": 178210 }, { "epoch": 1.1724768590093617, "grad_norm": 0.05750651561894798, "learning_rate": 4.369215956733616e-06, "loss": 0.0008, "step": 178220 }, { "epoch": 1.172542647184595, "grad_norm": 0.027914154625988764, "learning_rate": 4.36864643688776e-06, "loss": 0.0006, "step": 178230 }, { "epoch": 1.1726084353598285, "grad_norm": 0.0023798466795392757, "learning_rate": 4.368076925365757e-06, "loss": 0.0005, "step": 178240 }, { "epoch": 1.1726742235350618, "grad_norm": 0.03734669488336794, "learning_rate": 4.367507422175113e-06, "loss": 0.0012, "step": 178250 }, { "epoch": 1.172740011710295, "grad_norm": 0.13547951478135933, "learning_rate": 4.366937927323338e-06, "loss": 0.001, "step": 178260 }, { "epoch": 1.1728057998855286, "grad_norm": 0.06448732028655926, "learning_rate": 4.36636844081794e-06, "loss": 0.0007, "step": 178270 }, { "epoch": 1.172871588060762, "grad_norm": 0.02826266633371011, "learning_rate": 4.365798962666426e-06, "loss": 0.0004, "step": 178280 }, { "epoch": 1.1729373762359954, "grad_norm": 0.03140455066152308, "learning_rate": 4.365229492876305e-06, "loss": 0.0008, "step": 178290 }, { "epoch": 1.1730031644112286, "grad_norm": 0.06067274828404333, "learning_rate": 4.364660031455085e-06, "loss": 0.0007, "step": 178300 }, { "epoch": 1.1730689525864622, "grad_norm": 0.028957908873212073, "learning_rate": 4.364090578410273e-06, "loss": 0.0009, "step": 178310 }, { "epoch": 1.1731347407616954, "grad_norm": 0.014996458954336581, "learning_rate": 4.363521133749379e-06, "loss": 0.0007, "step": 178320 }, { "epoch": 1.173200528936929, "grad_norm": 0.060794704407885286, "learning_rate": 4.3629516974799085e-06, "loss": 0.001, "step": 178330 }, { "epoch": 1.1732663171121622, "grad_norm": 0.032960823332492915, "learning_rate": 4.36238226960937e-06, "loss": 0.0005, "step": 178340 }, { "epoch": 1.1733321052873957, "grad_norm": 0.00890248229099314, "learning_rate": 4.3618128501452705e-06, "loss": 0.0012, "step": 178350 }, { "epoch": 1.173397893462629, "grad_norm": 0.2975703323998621, "learning_rate": 4.361243439095116e-06, "loss": 0.0009, "step": 178360 }, { "epoch": 1.1734636816378625, "grad_norm": 0.03618412097545291, "learning_rate": 4.360674036466417e-06, "loss": 0.0007, "step": 178370 }, { "epoch": 1.1735294698130958, "grad_norm": 0.06413457893990503, "learning_rate": 4.360104642266678e-06, "loss": 0.0018, "step": 178380 }, { "epoch": 1.173595257988329, "grad_norm": 0.017632097206754873, "learning_rate": 4.3595352565034064e-06, "loss": 0.001, "step": 178390 }, { "epoch": 1.1736610461635626, "grad_norm": 0.006399206550060399, "learning_rate": 4.358965879184109e-06, "loss": 0.0011, "step": 178400 }, { "epoch": 1.1737268343387959, "grad_norm": 0.0012983802890208613, "learning_rate": 4.358396510316294e-06, "loss": 0.0005, "step": 178410 }, { "epoch": 1.1737926225140294, "grad_norm": 0.039858839681048884, "learning_rate": 4.357827149907466e-06, "loss": 0.0007, "step": 178420 }, { "epoch": 1.1738584106892627, "grad_norm": 0.26857400072371396, "learning_rate": 4.357257797965132e-06, "loss": 0.002, "step": 178430 }, { "epoch": 1.1739241988644962, "grad_norm": 0.04513898138083395, "learning_rate": 4.3566884544968e-06, "loss": 0.0004, "step": 178440 }, { "epoch": 1.1739899870397295, "grad_norm": 0.022040795134593828, "learning_rate": 4.356119119509975e-06, "loss": 0.0014, "step": 178450 }, { "epoch": 1.174055775214963, "grad_norm": 0.07946179030877037, "learning_rate": 4.355549793012162e-06, "loss": 0.0017, "step": 178460 }, { "epoch": 1.1741215633901962, "grad_norm": 0.019822524568503946, "learning_rate": 4.354980475010872e-06, "loss": 0.0009, "step": 178470 }, { "epoch": 1.1741873515654295, "grad_norm": 0.1200008050184714, "learning_rate": 4.354411165513604e-06, "loss": 0.0006, "step": 178480 }, { "epoch": 1.174253139740663, "grad_norm": 0.023641004211928624, "learning_rate": 4.353841864527868e-06, "loss": 0.0008, "step": 178490 }, { "epoch": 1.1743189279158963, "grad_norm": 0.0377000398075302, "learning_rate": 4.353272572061169e-06, "loss": 0.0009, "step": 178500 }, { "epoch": 1.1743847160911298, "grad_norm": 0.005462628725538348, "learning_rate": 4.352703288121013e-06, "loss": 0.0008, "step": 178510 }, { "epoch": 1.174450504266363, "grad_norm": 0.019353327636334433, "learning_rate": 4.352134012714905e-06, "loss": 0.0004, "step": 178520 }, { "epoch": 1.1745162924415966, "grad_norm": 0.04839496613548459, "learning_rate": 4.3515647458503485e-06, "loss": 0.0009, "step": 178530 }, { "epoch": 1.17458208061683, "grad_norm": 0.05685528903099324, "learning_rate": 4.350995487534853e-06, "loss": 0.0005, "step": 178540 }, { "epoch": 1.1746478687920634, "grad_norm": 0.06275445172317617, "learning_rate": 4.35042623777592e-06, "loss": 0.0014, "step": 178550 }, { "epoch": 1.1747136569672967, "grad_norm": 0.006421502478943857, "learning_rate": 4.349856996581058e-06, "loss": 0.0005, "step": 178560 }, { "epoch": 1.17477944514253, "grad_norm": 0.07344726957815889, "learning_rate": 4.349287763957769e-06, "loss": 0.001, "step": 178570 }, { "epoch": 1.1748452333177635, "grad_norm": 0.041869259368116675, "learning_rate": 4.348718539913558e-06, "loss": 0.0004, "step": 178580 }, { "epoch": 1.1749110214929968, "grad_norm": 0.04822514127957591, "learning_rate": 4.348149324455932e-06, "loss": 0.0012, "step": 178590 }, { "epoch": 1.1749768096682303, "grad_norm": 0.06397109869142294, "learning_rate": 4.347580117592392e-06, "loss": 0.0005, "step": 178600 }, { "epoch": 1.1750425978434635, "grad_norm": 0.014676686722893321, "learning_rate": 4.3470109193304466e-06, "loss": 0.0006, "step": 178610 }, { "epoch": 1.175108386018697, "grad_norm": 0.08060567242052867, "learning_rate": 4.346441729677598e-06, "loss": 0.0011, "step": 178620 }, { "epoch": 1.1751741741939303, "grad_norm": 0.028534242714316458, "learning_rate": 4.3458725486413515e-06, "loss": 0.0005, "step": 178630 }, { "epoch": 1.1752399623691638, "grad_norm": 0.10948916987529654, "learning_rate": 4.345303376229209e-06, "loss": 0.0009, "step": 178640 }, { "epoch": 1.1753057505443971, "grad_norm": 0.004744398723899204, "learning_rate": 4.344734212448677e-06, "loss": 0.0007, "step": 178650 }, { "epoch": 1.1753715387196306, "grad_norm": 0.006103675994098579, "learning_rate": 4.344165057307257e-06, "loss": 0.0008, "step": 178660 }, { "epoch": 1.175437326894864, "grad_norm": 0.010909785737494633, "learning_rate": 4.343595910812456e-06, "loss": 0.0005, "step": 178670 }, { "epoch": 1.1755031150700974, "grad_norm": 0.04971980730225328, "learning_rate": 4.343026772971775e-06, "loss": 0.001, "step": 178680 }, { "epoch": 1.1755689032453307, "grad_norm": 0.008585413674803553, "learning_rate": 4.342457643792719e-06, "loss": 0.0013, "step": 178690 }, { "epoch": 1.175634691420564, "grad_norm": 0.0054390543063943305, "learning_rate": 4.341888523282792e-06, "loss": 0.0007, "step": 178700 }, { "epoch": 1.1757004795957975, "grad_norm": 0.022936479058416823, "learning_rate": 4.341319411449495e-06, "loss": 0.0023, "step": 178710 }, { "epoch": 1.1757662677710308, "grad_norm": 0.013388391253449419, "learning_rate": 4.340750308300334e-06, "loss": 0.0005, "step": 178720 }, { "epoch": 1.1758320559462643, "grad_norm": 0.10676581045331052, "learning_rate": 4.340181213842809e-06, "loss": 0.0011, "step": 178730 }, { "epoch": 1.1758978441214976, "grad_norm": 0.06980340120841119, "learning_rate": 4.339612128084425e-06, "loss": 0.0008, "step": 178740 }, { "epoch": 1.175963632296731, "grad_norm": 0.055307947908861795, "learning_rate": 4.339043051032686e-06, "loss": 0.0013, "step": 178750 }, { "epoch": 1.1760294204719643, "grad_norm": 0.005792420781867152, "learning_rate": 4.338473982695094e-06, "loss": 0.0003, "step": 178760 }, { "epoch": 1.1760952086471979, "grad_norm": 0.0006246592011470909, "learning_rate": 4.337904923079151e-06, "loss": 0.0007, "step": 178770 }, { "epoch": 1.1761609968224311, "grad_norm": 0.06932597096035988, "learning_rate": 4.337335872192358e-06, "loss": 0.0007, "step": 178780 }, { "epoch": 1.1762267849976644, "grad_norm": 0.036733089849297386, "learning_rate": 4.33676683004222e-06, "loss": 0.0007, "step": 178790 }, { "epoch": 1.176292573172898, "grad_norm": 0.020333314021873557, "learning_rate": 4.336197796636239e-06, "loss": 0.0012, "step": 178800 }, { "epoch": 1.1763583613481312, "grad_norm": 0.0006918821825932203, "learning_rate": 4.335628771981917e-06, "loss": 0.0008, "step": 178810 }, { "epoch": 1.1764241495233647, "grad_norm": 0.0148206074455228, "learning_rate": 4.335059756086756e-06, "loss": 0.0008, "step": 178820 }, { "epoch": 1.176489937698598, "grad_norm": 0.02562268324249543, "learning_rate": 4.3344907489582565e-06, "loss": 0.0012, "step": 178830 }, { "epoch": 1.1765557258738315, "grad_norm": 0.0013730976228195685, "learning_rate": 4.333921750603923e-06, "loss": 0.0032, "step": 178840 }, { "epoch": 1.1766215140490648, "grad_norm": 0.005854710733120001, "learning_rate": 4.333352761031256e-06, "loss": 0.0006, "step": 178850 }, { "epoch": 1.1766873022242983, "grad_norm": 0.05629673567470853, "learning_rate": 4.332783780247756e-06, "loss": 0.0004, "step": 178860 }, { "epoch": 1.1767530903995316, "grad_norm": 0.024222959924646136, "learning_rate": 4.332214808260925e-06, "loss": 0.0011, "step": 178870 }, { "epoch": 1.1768188785747649, "grad_norm": 0.003647637748376795, "learning_rate": 4.331645845078267e-06, "loss": 0.0004, "step": 178880 }, { "epoch": 1.1768846667499984, "grad_norm": 0.02040190246017174, "learning_rate": 4.331076890707281e-06, "loss": 0.0006, "step": 178890 }, { "epoch": 1.1769504549252316, "grad_norm": 0.04499361154537396, "learning_rate": 4.330507945155468e-06, "loss": 0.0108, "step": 178900 }, { "epoch": 1.1770162431004652, "grad_norm": 0.03684623756021, "learning_rate": 4.329939008430329e-06, "loss": 0.0009, "step": 178910 }, { "epoch": 1.1770820312756984, "grad_norm": 0.04717753795213305, "learning_rate": 4.3293700805393655e-06, "loss": 0.0005, "step": 178920 }, { "epoch": 1.177147819450932, "grad_norm": 0.0021128596000748578, "learning_rate": 4.328801161490079e-06, "loss": 0.001, "step": 178930 }, { "epoch": 1.1772136076261652, "grad_norm": 0.0011028500578934056, "learning_rate": 4.3282322512899696e-06, "loss": 0.0007, "step": 178940 }, { "epoch": 1.1772793958013987, "grad_norm": 0.002424476881949874, "learning_rate": 4.327663349946537e-06, "loss": 0.001, "step": 178950 }, { "epoch": 1.177345183976632, "grad_norm": 0.024303411852190582, "learning_rate": 4.327094457467284e-06, "loss": 0.0014, "step": 178960 }, { "epoch": 1.1774109721518655, "grad_norm": 0.005845188867244917, "learning_rate": 4.3265255738597074e-06, "loss": 0.0007, "step": 178970 }, { "epoch": 1.1774767603270988, "grad_norm": 0.011316971155290997, "learning_rate": 4.325956699131312e-06, "loss": 0.0005, "step": 178980 }, { "epoch": 1.1775425485023323, "grad_norm": 0.014505952269583918, "learning_rate": 4.3253878332895924e-06, "loss": 0.0008, "step": 178990 }, { "epoch": 1.1776083366775656, "grad_norm": 0.011526807854404898, "learning_rate": 4.324818976342053e-06, "loss": 0.0005, "step": 179000 }, { "epoch": 1.1776741248527989, "grad_norm": 0.027201328944051105, "learning_rate": 4.324250128296193e-06, "loss": 0.0011, "step": 179010 }, { "epoch": 1.1777399130280324, "grad_norm": 0.016032108996049867, "learning_rate": 4.32368128915951e-06, "loss": 0.0004, "step": 179020 }, { "epoch": 1.1778057012032657, "grad_norm": 0.006713129988275846, "learning_rate": 4.3231124589395055e-06, "loss": 0.0017, "step": 179030 }, { "epoch": 1.1778714893784992, "grad_norm": 0.006546162259555893, "learning_rate": 4.322543637643679e-06, "loss": 0.0006, "step": 179040 }, { "epoch": 1.1779372775537325, "grad_norm": 0.007435733167072514, "learning_rate": 4.321974825279529e-06, "loss": 0.0023, "step": 179050 }, { "epoch": 1.178003065728966, "grad_norm": 0.09537840424948306, "learning_rate": 4.321406021854556e-06, "loss": 0.0007, "step": 179060 }, { "epoch": 1.1780688539041992, "grad_norm": 0.06921545452739424, "learning_rate": 4.320837227376259e-06, "loss": 0.0012, "step": 179070 }, { "epoch": 1.1781346420794327, "grad_norm": 0.030655008231558704, "learning_rate": 4.320268441852136e-06, "loss": 0.0007, "step": 179080 }, { "epoch": 1.178200430254666, "grad_norm": 0.04709661760997424, "learning_rate": 4.319699665289686e-06, "loss": 0.0016, "step": 179090 }, { "epoch": 1.1782662184298993, "grad_norm": 0.03308372976183409, "learning_rate": 4.319130897696409e-06, "loss": 0.0007, "step": 179100 }, { "epoch": 1.1783320066051328, "grad_norm": 0.09224535867495906, "learning_rate": 4.318562139079804e-06, "loss": 0.0009, "step": 179110 }, { "epoch": 1.178397794780366, "grad_norm": 0.0013529830178437225, "learning_rate": 4.317993389447366e-06, "loss": 0.0006, "step": 179120 }, { "epoch": 1.1784635829555996, "grad_norm": 0.0001297655625835422, "learning_rate": 4.317424648806597e-06, "loss": 0.0004, "step": 179130 }, { "epoch": 1.178529371130833, "grad_norm": 0.007924678130408914, "learning_rate": 4.316855917164996e-06, "loss": 0.0007, "step": 179140 }, { "epoch": 1.1785951593060664, "grad_norm": 0.012574876175670996, "learning_rate": 4.316287194530059e-06, "loss": 0.0011, "step": 179150 }, { "epoch": 1.1786609474812997, "grad_norm": 0.0730507523445996, "learning_rate": 4.315718480909285e-06, "loss": 0.0006, "step": 179160 }, { "epoch": 1.1787267356565332, "grad_norm": 0.00973794609383518, "learning_rate": 4.315149776310172e-06, "loss": 0.0008, "step": 179170 }, { "epoch": 1.1787925238317665, "grad_norm": 0.0008576441377042785, "learning_rate": 4.314581080740216e-06, "loss": 0.0009, "step": 179180 }, { "epoch": 1.1788583120069998, "grad_norm": 0.0490103948143558, "learning_rate": 4.314012394206919e-06, "loss": 0.0006, "step": 179190 }, { "epoch": 1.1789241001822333, "grad_norm": 0.014466311206845524, "learning_rate": 4.313443716717775e-06, "loss": 0.0005, "step": 179200 }, { "epoch": 1.1789898883574665, "grad_norm": 0.04113604429644124, "learning_rate": 4.312875048280282e-06, "loss": 0.0008, "step": 179210 }, { "epoch": 1.1790556765327, "grad_norm": 0.004723708938886705, "learning_rate": 4.312306388901939e-06, "loss": 0.0005, "step": 179220 }, { "epoch": 1.1791214647079333, "grad_norm": 0.011096206929198493, "learning_rate": 4.311737738590242e-06, "loss": 0.0018, "step": 179230 }, { "epoch": 1.1791872528831668, "grad_norm": 0.061239166087054035, "learning_rate": 4.311169097352689e-06, "loss": 0.0016, "step": 179240 }, { "epoch": 1.1792530410584001, "grad_norm": 0.0040359429896369104, "learning_rate": 4.310600465196774e-06, "loss": 0.0006, "step": 179250 }, { "epoch": 1.1793188292336336, "grad_norm": 0.009053924322722725, "learning_rate": 4.3100318421299996e-06, "loss": 0.0009, "step": 179260 }, { "epoch": 1.179384617408867, "grad_norm": 0.020839175806668032, "learning_rate": 4.309463228159859e-06, "loss": 0.0006, "step": 179270 }, { "epoch": 1.1794504055841002, "grad_norm": 0.04107833549057177, "learning_rate": 4.3088946232938485e-06, "loss": 0.0006, "step": 179280 }, { "epoch": 1.1795161937593337, "grad_norm": 0.04485285498733409, "learning_rate": 4.3083260275394655e-06, "loss": 0.0012, "step": 179290 }, { "epoch": 1.1795819819345672, "grad_norm": 0.007007276114395292, "learning_rate": 4.307757440904207e-06, "loss": 0.0006, "step": 179300 }, { "epoch": 1.1796477701098005, "grad_norm": 0.0038924155809391867, "learning_rate": 4.307188863395568e-06, "loss": 0.0009, "step": 179310 }, { "epoch": 1.1797135582850338, "grad_norm": 0.00605984509933415, "learning_rate": 4.306620295021045e-06, "loss": 0.0005, "step": 179320 }, { "epoch": 1.1797793464602673, "grad_norm": 0.008805391172961974, "learning_rate": 4.306051735788136e-06, "loss": 0.0004, "step": 179330 }, { "epoch": 1.1798451346355006, "grad_norm": 0.049698968986053395, "learning_rate": 4.305483185704336e-06, "loss": 0.0006, "step": 179340 }, { "epoch": 1.179910922810734, "grad_norm": 0.07745320360675424, "learning_rate": 4.304914644777139e-06, "loss": 0.0006, "step": 179350 }, { "epoch": 1.1799767109859673, "grad_norm": 0.02037888041853974, "learning_rate": 4.304346113014043e-06, "loss": 0.0009, "step": 179360 }, { "epoch": 1.1800424991612009, "grad_norm": 0.017603234342679002, "learning_rate": 4.303777590422542e-06, "loss": 0.0006, "step": 179370 }, { "epoch": 1.1801082873364341, "grad_norm": 0.07208406027936233, "learning_rate": 4.303209077010132e-06, "loss": 0.0009, "step": 179380 }, { "epoch": 1.1801740755116676, "grad_norm": 0.05736031526874017, "learning_rate": 4.302640572784309e-06, "loss": 0.0008, "step": 179390 }, { "epoch": 1.180239863686901, "grad_norm": 0.01046420574048711, "learning_rate": 4.302072077752568e-06, "loss": 0.0016, "step": 179400 }, { "epoch": 1.1803056518621342, "grad_norm": 0.09918893556196624, "learning_rate": 4.301503591922404e-06, "loss": 0.0008, "step": 179410 }, { "epoch": 1.1803714400373677, "grad_norm": 0.014177104348367081, "learning_rate": 4.300935115301311e-06, "loss": 0.0007, "step": 179420 }, { "epoch": 1.180437228212601, "grad_norm": 0.014468228093879586, "learning_rate": 4.300366647896785e-06, "loss": 0.0006, "step": 179430 }, { "epoch": 1.1805030163878345, "grad_norm": 0.06006018099845194, "learning_rate": 4.29979818971632e-06, "loss": 0.0008, "step": 179440 }, { "epoch": 1.1805688045630678, "grad_norm": 0.1307593713231482, "learning_rate": 4.299229740767412e-06, "loss": 0.0008, "step": 179450 }, { "epoch": 1.1806345927383013, "grad_norm": 0.049303911457710965, "learning_rate": 4.298661301057554e-06, "loss": 0.0004, "step": 179460 }, { "epoch": 1.1807003809135346, "grad_norm": 0.043997502271007904, "learning_rate": 4.298092870594242e-06, "loss": 0.0005, "step": 179470 }, { "epoch": 1.180766169088768, "grad_norm": 0.02259127397287202, "learning_rate": 4.297524449384969e-06, "loss": 0.0011, "step": 179480 }, { "epoch": 1.1808319572640014, "grad_norm": 0.0009014597081670306, "learning_rate": 4.29695603743723e-06, "loss": 0.0007, "step": 179490 }, { "epoch": 1.1808977454392346, "grad_norm": 0.006306709276655021, "learning_rate": 4.296387634758518e-06, "loss": 0.0008, "step": 179500 }, { "epoch": 1.1809635336144682, "grad_norm": 0.020582144945641867, "learning_rate": 4.295819241356326e-06, "loss": 0.0006, "step": 179510 }, { "epoch": 1.1810293217897014, "grad_norm": 0.1330468787563152, "learning_rate": 4.29525085723815e-06, "loss": 0.0017, "step": 179520 }, { "epoch": 1.181095109964935, "grad_norm": 0.011605952053636223, "learning_rate": 4.294682482411483e-06, "loss": 0.0006, "step": 179530 }, { "epoch": 1.1811608981401682, "grad_norm": 0.0026902073138893056, "learning_rate": 4.294114116883819e-06, "loss": 0.0007, "step": 179540 }, { "epoch": 1.1812266863154017, "grad_norm": 0.01949674474061129, "learning_rate": 4.29354576066265e-06, "loss": 0.0003, "step": 179550 }, { "epoch": 1.181292474490635, "grad_norm": 0.023442806260685997, "learning_rate": 4.29297741375547e-06, "loss": 0.0011, "step": 179560 }, { "epoch": 1.1813582626658685, "grad_norm": 0.060844240827616955, "learning_rate": 4.292409076169771e-06, "loss": 0.0008, "step": 179570 }, { "epoch": 1.1814240508411018, "grad_norm": 0.07266783178908584, "learning_rate": 4.291840747913049e-06, "loss": 0.0007, "step": 179580 }, { "epoch": 1.181489839016335, "grad_norm": 0.02004984936311061, "learning_rate": 4.291272428992794e-06, "loss": 0.0003, "step": 179590 }, { "epoch": 1.1815556271915686, "grad_norm": 0.2168422452363016, "learning_rate": 4.290704119416501e-06, "loss": 0.0005, "step": 179600 }, { "epoch": 1.1816214153668019, "grad_norm": 0.002253995530360393, "learning_rate": 4.290135819191661e-06, "loss": 0.0005, "step": 179610 }, { "epoch": 1.1816872035420354, "grad_norm": 0.01801346057733952, "learning_rate": 4.2895675283257675e-06, "loss": 0.0008, "step": 179620 }, { "epoch": 1.1817529917172687, "grad_norm": 0.06704477572279334, "learning_rate": 4.288999246826313e-06, "loss": 0.0005, "step": 179630 }, { "epoch": 1.1818187798925022, "grad_norm": 0.03564987026682146, "learning_rate": 4.288430974700787e-06, "loss": 0.0004, "step": 179640 }, { "epoch": 1.1818845680677355, "grad_norm": 0.0010344066049936898, "learning_rate": 4.2878627119566864e-06, "loss": 0.0008, "step": 179650 }, { "epoch": 1.181950356242969, "grad_norm": 0.05159109866450114, "learning_rate": 4.2872944586015e-06, "loss": 0.0005, "step": 179660 }, { "epoch": 1.1820161444182022, "grad_norm": 0.004755976658976771, "learning_rate": 4.286726214642721e-06, "loss": 0.0007, "step": 179670 }, { "epoch": 1.1820819325934357, "grad_norm": 0.09096007166636762, "learning_rate": 4.286157980087841e-06, "loss": 0.0004, "step": 179680 }, { "epoch": 1.182147720768669, "grad_norm": 0.01961960488785035, "learning_rate": 4.285589754944351e-06, "loss": 0.0005, "step": 179690 }, { "epoch": 1.1822135089439025, "grad_norm": 0.11080833393059245, "learning_rate": 4.285021539219742e-06, "loss": 0.0028, "step": 179700 }, { "epoch": 1.1822792971191358, "grad_norm": 0.04468440579587651, "learning_rate": 4.284453332921509e-06, "loss": 0.0007, "step": 179710 }, { "epoch": 1.182345085294369, "grad_norm": 0.02612522604164539, "learning_rate": 4.283885136057139e-06, "loss": 0.0016, "step": 179720 }, { "epoch": 1.1824108734696026, "grad_norm": 0.03257643632446554, "learning_rate": 4.2833169486341255e-06, "loss": 0.0009, "step": 179730 }, { "epoch": 1.182476661644836, "grad_norm": 0.07799978874365145, "learning_rate": 4.28274877065996e-06, "loss": 0.0011, "step": 179740 }, { "epoch": 1.1825424498200694, "grad_norm": 0.0004868583641160472, "learning_rate": 4.282180602142132e-06, "loss": 0.0004, "step": 179750 }, { "epoch": 1.1826082379953027, "grad_norm": 0.004432378118964532, "learning_rate": 4.281612443088132e-06, "loss": 0.0003, "step": 179760 }, { "epoch": 1.1826740261705362, "grad_norm": 0.05727732552437561, "learning_rate": 4.2810442935054515e-06, "loss": 0.0003, "step": 179770 }, { "epoch": 1.1827398143457695, "grad_norm": 0.0011948778211019956, "learning_rate": 4.280476153401582e-06, "loss": 0.0004, "step": 179780 }, { "epoch": 1.182805602521003, "grad_norm": 0.028280291254907632, "learning_rate": 4.279908022784012e-06, "loss": 0.0008, "step": 179790 }, { "epoch": 1.1828713906962363, "grad_norm": 0.015172058522886193, "learning_rate": 4.279339901660234e-06, "loss": 0.0009, "step": 179800 }, { "epoch": 1.1829371788714695, "grad_norm": 0.11877757439976908, "learning_rate": 4.278771790037736e-06, "loss": 0.0015, "step": 179810 }, { "epoch": 1.183002967046703, "grad_norm": 0.0008476514774891126, "learning_rate": 4.27820368792401e-06, "loss": 0.0008, "step": 179820 }, { "epoch": 1.1830687552219363, "grad_norm": 0.008707391696562369, "learning_rate": 4.277635595326543e-06, "loss": 0.0014, "step": 179830 }, { "epoch": 1.1831345433971698, "grad_norm": 0.024349566007293093, "learning_rate": 4.277067512252828e-06, "loss": 0.0009, "step": 179840 }, { "epoch": 1.1832003315724031, "grad_norm": 0.03702738293697862, "learning_rate": 4.276499438710354e-06, "loss": 0.0008, "step": 179850 }, { "epoch": 1.1832661197476366, "grad_norm": 0.017168147380304807, "learning_rate": 4.27593137470661e-06, "loss": 0.0005, "step": 179860 }, { "epoch": 1.18333190792287, "grad_norm": 0.031203849151172275, "learning_rate": 4.275363320249085e-06, "loss": 0.0008, "step": 179870 }, { "epoch": 1.1833976960981034, "grad_norm": 0.02042965851094003, "learning_rate": 4.274795275345269e-06, "loss": 0.0006, "step": 179880 }, { "epoch": 1.1834634842733367, "grad_norm": 0.059340125047375646, "learning_rate": 4.274227240002652e-06, "loss": 0.0036, "step": 179890 }, { "epoch": 1.18352927244857, "grad_norm": 0.01536280895050337, "learning_rate": 4.27365921422872e-06, "loss": 0.0005, "step": 179900 }, { "epoch": 1.1835950606238035, "grad_norm": 0.04653886143284873, "learning_rate": 4.273091198030964e-06, "loss": 0.0005, "step": 179910 }, { "epoch": 1.1836608487990368, "grad_norm": 0.024488599762022393, "learning_rate": 4.272523191416875e-06, "loss": 0.0006, "step": 179920 }, { "epoch": 1.1837266369742703, "grad_norm": 0.012403175088675791, "learning_rate": 4.271955194393938e-06, "loss": 0.0005, "step": 179930 }, { "epoch": 1.1837924251495036, "grad_norm": 0.012134894856403426, "learning_rate": 4.271387206969643e-06, "loss": 0.0004, "step": 179940 }, { "epoch": 1.183858213324737, "grad_norm": 0.028817560981672857, "learning_rate": 4.270819229151479e-06, "loss": 0.0005, "step": 179950 }, { "epoch": 1.1839240014999703, "grad_norm": 0.03225833727674056, "learning_rate": 4.270251260946932e-06, "loss": 0.001, "step": 179960 }, { "epoch": 1.1839897896752039, "grad_norm": 0.004965399224248501, "learning_rate": 4.269683302363493e-06, "loss": 0.0007, "step": 179970 }, { "epoch": 1.1840555778504371, "grad_norm": 0.11796591093848349, "learning_rate": 4.269115353408649e-06, "loss": 0.0004, "step": 179980 }, { "epoch": 1.1841213660256706, "grad_norm": 0.027439910567318218, "learning_rate": 4.268547414089889e-06, "loss": 0.0005, "step": 179990 }, { "epoch": 1.184187154200904, "grad_norm": 0.02401690254331981, "learning_rate": 4.267979484414697e-06, "loss": 0.0008, "step": 180000 }, { "epoch": 1.1842529423761374, "grad_norm": 0.022187565630328473, "learning_rate": 4.267411564390565e-06, "loss": 0.0006, "step": 180010 }, { "epoch": 1.1843187305513707, "grad_norm": 0.01602722557034622, "learning_rate": 4.2668436540249796e-06, "loss": 0.0004, "step": 180020 }, { "epoch": 1.184384518726604, "grad_norm": 0.013758919166303028, "learning_rate": 4.266275753325425e-06, "loss": 0.0006, "step": 180030 }, { "epoch": 1.1844503069018375, "grad_norm": 0.1438253327566861, "learning_rate": 4.265707862299393e-06, "loss": 0.0013, "step": 180040 }, { "epoch": 1.1845160950770708, "grad_norm": 0.06521928355095229, "learning_rate": 4.265139980954368e-06, "loss": 0.0006, "step": 180050 }, { "epoch": 1.1845818832523043, "grad_norm": 0.025165837835614394, "learning_rate": 4.264572109297837e-06, "loss": 0.001, "step": 180060 }, { "epoch": 1.1846476714275376, "grad_norm": 0.0005400145917252035, "learning_rate": 4.2640042473372885e-06, "loss": 0.0007, "step": 180070 }, { "epoch": 1.184713459602771, "grad_norm": 0.012028011241608396, "learning_rate": 4.263436395080209e-06, "loss": 0.0003, "step": 180080 }, { "epoch": 1.1847792477780044, "grad_norm": 0.04575868164162972, "learning_rate": 4.262868552534082e-06, "loss": 0.0027, "step": 180090 }, { "epoch": 1.1848450359532379, "grad_norm": 0.02908229936942768, "learning_rate": 4.262300719706398e-06, "loss": 0.0008, "step": 180100 }, { "epoch": 1.1849108241284712, "grad_norm": 0.03570448826609519, "learning_rate": 4.261732896604642e-06, "loss": 0.0011, "step": 180110 }, { "epoch": 1.1849766123037044, "grad_norm": 0.10775293536147999, "learning_rate": 4.261165083236301e-06, "loss": 0.0008, "step": 180120 }, { "epoch": 1.185042400478938, "grad_norm": 0.003092694638039217, "learning_rate": 4.2605972796088595e-06, "loss": 0.0008, "step": 180130 }, { "epoch": 1.1851081886541712, "grad_norm": 0.004777464475258175, "learning_rate": 4.2600294857298044e-06, "loss": 0.0003, "step": 180140 }, { "epoch": 1.1851739768294047, "grad_norm": 0.01524546954446569, "learning_rate": 4.259461701606621e-06, "loss": 0.0013, "step": 180150 }, { "epoch": 1.185239765004638, "grad_norm": 0.01328906666846071, "learning_rate": 4.258893927246795e-06, "loss": 0.0005, "step": 180160 }, { "epoch": 1.1853055531798715, "grad_norm": 0.003429630363370282, "learning_rate": 4.258326162657813e-06, "loss": 0.0005, "step": 180170 }, { "epoch": 1.1853713413551048, "grad_norm": 0.02160278723808845, "learning_rate": 4.25775840784716e-06, "loss": 0.0006, "step": 180180 }, { "epoch": 1.1854371295303383, "grad_norm": 0.05429185209953095, "learning_rate": 4.257190662822322e-06, "loss": 0.0005, "step": 180190 }, { "epoch": 1.1855029177055716, "grad_norm": 0.05805262558661439, "learning_rate": 4.256622927590783e-06, "loss": 0.0005, "step": 180200 }, { "epoch": 1.1855687058808049, "grad_norm": 0.032114488929255024, "learning_rate": 4.25605520216003e-06, "loss": 0.0008, "step": 180210 }, { "epoch": 1.1856344940560384, "grad_norm": 0.048955401211089516, "learning_rate": 4.255487486537544e-06, "loss": 0.0008, "step": 180220 }, { "epoch": 1.1857002822312717, "grad_norm": 0.01908061102734909, "learning_rate": 4.254919780730814e-06, "loss": 0.0004, "step": 180230 }, { "epoch": 1.1857660704065052, "grad_norm": 0.037259000272145334, "learning_rate": 4.254352084747323e-06, "loss": 0.0005, "step": 180240 }, { "epoch": 1.1858318585817385, "grad_norm": 0.025258988218970196, "learning_rate": 4.253784398594556e-06, "loss": 0.0006, "step": 180250 }, { "epoch": 1.185897646756972, "grad_norm": 0.02779381665448584, "learning_rate": 4.253216722279997e-06, "loss": 0.001, "step": 180260 }, { "epoch": 1.1859634349322052, "grad_norm": 0.007671013382221226, "learning_rate": 4.252649055811131e-06, "loss": 0.0006, "step": 180270 }, { "epoch": 1.1860292231074387, "grad_norm": 0.02317909182747713, "learning_rate": 4.252081399195442e-06, "loss": 0.0003, "step": 180280 }, { "epoch": 1.186095011282672, "grad_norm": 0.029177441734145302, "learning_rate": 4.251513752440412e-06, "loss": 0.001, "step": 180290 }, { "epoch": 1.1861607994579053, "grad_norm": 0.14412939513435222, "learning_rate": 4.250946115553528e-06, "loss": 0.0013, "step": 180300 }, { "epoch": 1.1862265876331388, "grad_norm": 0.049881609525015094, "learning_rate": 4.250378488542273e-06, "loss": 0.0008, "step": 180310 }, { "epoch": 1.1862923758083723, "grad_norm": 0.10373268109907421, "learning_rate": 4.249810871414129e-06, "loss": 0.0007, "step": 180320 }, { "epoch": 1.1863581639836056, "grad_norm": 0.05869477602694588, "learning_rate": 4.249243264176583e-06, "loss": 0.0004, "step": 180330 }, { "epoch": 1.186423952158839, "grad_norm": 0.04790694097865518, "learning_rate": 4.248675666837114e-06, "loss": 0.0003, "step": 180340 }, { "epoch": 1.1864897403340724, "grad_norm": 0.01881257022848776, "learning_rate": 4.248108079403207e-06, "loss": 0.0006, "step": 180350 }, { "epoch": 1.1865555285093057, "grad_norm": 0.028267811161388268, "learning_rate": 4.247540501882347e-06, "loss": 0.0004, "step": 180360 }, { "epoch": 1.1866213166845392, "grad_norm": 0.1928176523870579, "learning_rate": 4.2469729342820175e-06, "loss": 0.0014, "step": 180370 }, { "epoch": 1.1866871048597725, "grad_norm": 0.031340286426351845, "learning_rate": 4.246405376609698e-06, "loss": 0.0005, "step": 180380 }, { "epoch": 1.186752893035006, "grad_norm": 0.08057395950135988, "learning_rate": 4.2458378288728705e-06, "loss": 0.0009, "step": 180390 }, { "epoch": 1.1868186812102393, "grad_norm": 0.024248254159806405, "learning_rate": 4.245270291079022e-06, "loss": 0.0004, "step": 180400 }, { "epoch": 1.1868844693854728, "grad_norm": 0.037859683791250154, "learning_rate": 4.244702763235633e-06, "loss": 0.0004, "step": 180410 }, { "epoch": 1.186950257560706, "grad_norm": 0.026698436776534214, "learning_rate": 4.244135245350185e-06, "loss": 0.0006, "step": 180420 }, { "epoch": 1.1870160457359393, "grad_norm": 0.012806268514187027, "learning_rate": 4.243567737430161e-06, "loss": 0.0007, "step": 180430 }, { "epoch": 1.1870818339111728, "grad_norm": 0.04934235757334652, "learning_rate": 4.243000239483043e-06, "loss": 0.0007, "step": 180440 }, { "epoch": 1.1871476220864061, "grad_norm": 0.027173008450926777, "learning_rate": 4.242432751516314e-06, "loss": 0.0008, "step": 180450 }, { "epoch": 1.1872134102616396, "grad_norm": 0.011047853679253147, "learning_rate": 4.241865273537453e-06, "loss": 0.0004, "step": 180460 }, { "epoch": 1.187279198436873, "grad_norm": 0.13069938452744892, "learning_rate": 4.241297805553944e-06, "loss": 0.0006, "step": 180470 }, { "epoch": 1.1873449866121064, "grad_norm": 0.023461659208460325, "learning_rate": 4.240730347573269e-06, "loss": 0.0005, "step": 180480 }, { "epoch": 1.1874107747873397, "grad_norm": 0.0019107793775012183, "learning_rate": 4.2401628996029094e-06, "loss": 0.0003, "step": 180490 }, { "epoch": 1.1874765629625732, "grad_norm": 0.018546320003647452, "learning_rate": 4.239595461650345e-06, "loss": 0.001, "step": 180500 }, { "epoch": 1.1875423511378065, "grad_norm": 0.005116449176783839, "learning_rate": 4.239028033723058e-06, "loss": 0.0006, "step": 180510 }, { "epoch": 1.1876081393130398, "grad_norm": 0.06333799746776457, "learning_rate": 4.238460615828527e-06, "loss": 0.0006, "step": 180520 }, { "epoch": 1.1876739274882733, "grad_norm": 0.02387063765674977, "learning_rate": 4.237893207974237e-06, "loss": 0.0005, "step": 180530 }, { "epoch": 1.1877397156635066, "grad_norm": 0.07784073822185147, "learning_rate": 4.237325810167667e-06, "loss": 0.0014, "step": 180540 }, { "epoch": 1.18780550383874, "grad_norm": 0.03952754117640841, "learning_rate": 4.236758422416297e-06, "loss": 0.0008, "step": 180550 }, { "epoch": 1.1878712920139733, "grad_norm": 0.0327669196939223, "learning_rate": 4.236191044727609e-06, "loss": 0.0006, "step": 180560 }, { "epoch": 1.1879370801892069, "grad_norm": 0.03225392636141675, "learning_rate": 4.235623677109082e-06, "loss": 0.001, "step": 180570 }, { "epoch": 1.1880028683644401, "grad_norm": 0.031847837256583696, "learning_rate": 4.235056319568196e-06, "loss": 0.0007, "step": 180580 }, { "epoch": 1.1880686565396736, "grad_norm": 0.011442933304768444, "learning_rate": 4.234488972112431e-06, "loss": 0.0016, "step": 180590 }, { "epoch": 1.188134444714907, "grad_norm": 0.024887829524598344, "learning_rate": 4.233921634749269e-06, "loss": 0.0008, "step": 180600 }, { "epoch": 1.1882002328901402, "grad_norm": 0.0037618486044914802, "learning_rate": 4.233354307486188e-06, "loss": 0.0009, "step": 180610 }, { "epoch": 1.1882660210653737, "grad_norm": 0.017026880040832117, "learning_rate": 4.23278699033067e-06, "loss": 0.0006, "step": 180620 }, { "epoch": 1.1883318092406072, "grad_norm": 0.011053346243752446, "learning_rate": 4.232219683290191e-06, "loss": 0.0013, "step": 180630 }, { "epoch": 1.1883975974158405, "grad_norm": 0.001967831580256809, "learning_rate": 4.231652386372233e-06, "loss": 0.0004, "step": 180640 }, { "epoch": 1.1884633855910738, "grad_norm": 0.002433598558527762, "learning_rate": 4.231085099584274e-06, "loss": 0.0005, "step": 180650 }, { "epoch": 1.1885291737663073, "grad_norm": 0.03883057717209475, "learning_rate": 4.2305178229337954e-06, "loss": 0.0005, "step": 180660 }, { "epoch": 1.1885949619415406, "grad_norm": 0.036837737657864, "learning_rate": 4.229950556428273e-06, "loss": 0.0004, "step": 180670 }, { "epoch": 1.188660750116774, "grad_norm": 0.0046819363443403525, "learning_rate": 4.229383300075189e-06, "loss": 0.0007, "step": 180680 }, { "epoch": 1.1887265382920074, "grad_norm": 0.0009850610333072812, "learning_rate": 4.22881605388202e-06, "loss": 0.0013, "step": 180690 }, { "epoch": 1.1887923264672409, "grad_norm": 0.022000068547237114, "learning_rate": 4.228248817856246e-06, "loss": 0.0003, "step": 180700 }, { "epoch": 1.1888581146424742, "grad_norm": 0.03295794264147117, "learning_rate": 4.227681592005343e-06, "loss": 0.0013, "step": 180710 }, { "epoch": 1.1889239028177077, "grad_norm": 0.03670760456156926, "learning_rate": 4.227114376336792e-06, "loss": 0.001, "step": 180720 }, { "epoch": 1.188989690992941, "grad_norm": 0.015187301336793777, "learning_rate": 4.226547170858071e-06, "loss": 0.0009, "step": 180730 }, { "epoch": 1.1890554791681742, "grad_norm": 0.022768896815373887, "learning_rate": 4.225979975576658e-06, "loss": 0.0005, "step": 180740 }, { "epoch": 1.1891212673434077, "grad_norm": 0.044434211961538114, "learning_rate": 4.22541279050003e-06, "loss": 0.0005, "step": 180750 }, { "epoch": 1.189187055518641, "grad_norm": 0.01314040755174765, "learning_rate": 4.224845615635666e-06, "loss": 0.0008, "step": 180760 }, { "epoch": 1.1892528436938745, "grad_norm": 0.014687810592432259, "learning_rate": 4.224278450991042e-06, "loss": 0.0014, "step": 180770 }, { "epoch": 1.1893186318691078, "grad_norm": 0.057545312127368756, "learning_rate": 4.223711296573636e-06, "loss": 0.0014, "step": 180780 }, { "epoch": 1.1893844200443413, "grad_norm": 0.023769436420140365, "learning_rate": 4.223144152390926e-06, "loss": 0.0004, "step": 180790 }, { "epoch": 1.1894502082195746, "grad_norm": 0.010001429897013305, "learning_rate": 4.2225770184503914e-06, "loss": 0.0003, "step": 180800 }, { "epoch": 1.189515996394808, "grad_norm": 0.018292976603777163, "learning_rate": 4.2220098947595066e-06, "loss": 0.0004, "step": 180810 }, { "epoch": 1.1895817845700414, "grad_norm": 0.009585810865453433, "learning_rate": 4.221442781325749e-06, "loss": 0.0006, "step": 180820 }, { "epoch": 1.1896475727452747, "grad_norm": 0.01644689814093085, "learning_rate": 4.2208756781565975e-06, "loss": 0.0011, "step": 180830 }, { "epoch": 1.1897133609205082, "grad_norm": 0.02072893043409561, "learning_rate": 4.220308585259526e-06, "loss": 0.0006, "step": 180840 }, { "epoch": 1.1897791490957415, "grad_norm": 0.05571908771008478, "learning_rate": 4.219741502642012e-06, "loss": 0.0005, "step": 180850 }, { "epoch": 1.189844937270975, "grad_norm": 0.049399517125382014, "learning_rate": 4.219174430311532e-06, "loss": 0.0006, "step": 180860 }, { "epoch": 1.1899107254462082, "grad_norm": 0.015336025136565271, "learning_rate": 4.218607368275564e-06, "loss": 0.0003, "step": 180870 }, { "epoch": 1.1899765136214417, "grad_norm": 0.027205282048089034, "learning_rate": 4.218040316541583e-06, "loss": 0.001, "step": 180880 }, { "epoch": 1.190042301796675, "grad_norm": 0.08042848114497594, "learning_rate": 4.217473275117065e-06, "loss": 0.0008, "step": 180890 }, { "epoch": 1.1901080899719085, "grad_norm": 0.02058730557311713, "learning_rate": 4.2169062440094855e-06, "loss": 0.001, "step": 180900 }, { "epoch": 1.1901738781471418, "grad_norm": 0.01964375435023427, "learning_rate": 4.21633922322632e-06, "loss": 0.001, "step": 180910 }, { "epoch": 1.190239666322375, "grad_norm": 0.01275348476167244, "learning_rate": 4.215772212775047e-06, "loss": 0.0006, "step": 180920 }, { "epoch": 1.1903054544976086, "grad_norm": 0.035090994466511254, "learning_rate": 4.2152052126631384e-06, "loss": 0.001, "step": 180930 }, { "epoch": 1.190371242672842, "grad_norm": 0.011392985449487332, "learning_rate": 4.214638222898073e-06, "loss": 0.001, "step": 180940 }, { "epoch": 1.1904370308480754, "grad_norm": 0.0006184471502442797, "learning_rate": 4.2140712434873235e-06, "loss": 0.0007, "step": 180950 }, { "epoch": 1.1905028190233087, "grad_norm": 0.009448184400569978, "learning_rate": 4.213504274438366e-06, "loss": 0.0011, "step": 180960 }, { "epoch": 1.1905686071985422, "grad_norm": 0.08528890283211499, "learning_rate": 4.2129373157586754e-06, "loss": 0.001, "step": 180970 }, { "epoch": 1.1906343953737755, "grad_norm": 0.03247954921217136, "learning_rate": 4.212370367455725e-06, "loss": 0.0009, "step": 180980 }, { "epoch": 1.190700183549009, "grad_norm": 0.05280979437515965, "learning_rate": 4.211803429536992e-06, "loss": 0.0004, "step": 180990 }, { "epoch": 1.1907659717242423, "grad_norm": 0.01154020074058242, "learning_rate": 4.211236502009951e-06, "loss": 0.0005, "step": 181000 }, { "epoch": 1.1908317598994758, "grad_norm": 0.03229030717299501, "learning_rate": 4.2106695848820755e-06, "loss": 0.0004, "step": 181010 }, { "epoch": 1.190897548074709, "grad_norm": 0.09384264949974462, "learning_rate": 4.210102678160839e-06, "loss": 0.001, "step": 181020 }, { "epoch": 1.1909633362499426, "grad_norm": 0.05461379454447906, "learning_rate": 4.209535781853717e-06, "loss": 0.0006, "step": 181030 }, { "epoch": 1.1910291244251758, "grad_norm": 0.02884667767653098, "learning_rate": 4.208968895968182e-06, "loss": 0.0006, "step": 181040 }, { "epoch": 1.1910949126004091, "grad_norm": 0.038653003704194985, "learning_rate": 4.20840202051171e-06, "loss": 0.0004, "step": 181050 }, { "epoch": 1.1911607007756426, "grad_norm": 0.024179785519790706, "learning_rate": 4.207835155491773e-06, "loss": 0.0009, "step": 181060 }, { "epoch": 1.191226488950876, "grad_norm": 0.01935483278820233, "learning_rate": 4.207268300915846e-06, "loss": 0.0014, "step": 181070 }, { "epoch": 1.1912922771261094, "grad_norm": 0.03496384164394608, "learning_rate": 4.206701456791402e-06, "loss": 0.0008, "step": 181080 }, { "epoch": 1.1913580653013427, "grad_norm": 0.0005100818564931101, "learning_rate": 4.206134623125913e-06, "loss": 0.0011, "step": 181090 }, { "epoch": 1.1914238534765762, "grad_norm": 0.15139136029494932, "learning_rate": 4.2055677999268546e-06, "loss": 0.0008, "step": 181100 }, { "epoch": 1.1914896416518095, "grad_norm": 0.0865836930729692, "learning_rate": 4.205000987201697e-06, "loss": 0.0011, "step": 181110 }, { "epoch": 1.191555429827043, "grad_norm": 0.08527823177254146, "learning_rate": 4.204434184957917e-06, "loss": 0.0013, "step": 181120 }, { "epoch": 1.1916212180022763, "grad_norm": 0.04144895661930683, "learning_rate": 4.203867393202983e-06, "loss": 0.0005, "step": 181130 }, { "epoch": 1.1916870061775096, "grad_norm": 0.10317355367735546, "learning_rate": 4.203300611944371e-06, "loss": 0.0005, "step": 181140 }, { "epoch": 1.191752794352743, "grad_norm": 0.007460075700416699, "learning_rate": 4.202733841189553e-06, "loss": 0.0008, "step": 181150 }, { "epoch": 1.1918185825279763, "grad_norm": 0.01289383583784396, "learning_rate": 4.2021670809460005e-06, "loss": 0.0004, "step": 181160 }, { "epoch": 1.1918843707032099, "grad_norm": 0.019061868687944215, "learning_rate": 4.201600331221184e-06, "loss": 0.0007, "step": 181170 }, { "epoch": 1.1919501588784431, "grad_norm": 0.012478405640655867, "learning_rate": 4.201033592022579e-06, "loss": 0.0005, "step": 181180 }, { "epoch": 1.1920159470536766, "grad_norm": 0.015963358714560796, "learning_rate": 4.200466863357656e-06, "loss": 0.0007, "step": 181190 }, { "epoch": 1.19208173522891, "grad_norm": 0.027802724916930706, "learning_rate": 4.199900145233887e-06, "loss": 0.0005, "step": 181200 }, { "epoch": 1.1921475234041434, "grad_norm": 0.057699138692713745, "learning_rate": 4.199333437658744e-06, "loss": 0.0004, "step": 181210 }, { "epoch": 1.1922133115793767, "grad_norm": 0.03441922628542279, "learning_rate": 4.198766740639698e-06, "loss": 0.0005, "step": 181220 }, { "epoch": 1.19227909975461, "grad_norm": 0.05499694440324609, "learning_rate": 4.198200054184221e-06, "loss": 0.0006, "step": 181230 }, { "epoch": 1.1923448879298435, "grad_norm": 0.019898941567419993, "learning_rate": 4.197633378299781e-06, "loss": 0.001, "step": 181240 }, { "epoch": 1.1924106761050768, "grad_norm": 0.0851745488855439, "learning_rate": 4.197066712993855e-06, "loss": 0.0006, "step": 181250 }, { "epoch": 1.1924764642803103, "grad_norm": 0.09188405836241596, "learning_rate": 4.19650005827391e-06, "loss": 0.0008, "step": 181260 }, { "epoch": 1.1925422524555436, "grad_norm": 0.01245879498671395, "learning_rate": 4.195933414147417e-06, "loss": 0.0005, "step": 181270 }, { "epoch": 1.192608040630777, "grad_norm": 0.05505827552928881, "learning_rate": 4.195366780621848e-06, "loss": 0.0009, "step": 181280 }, { "epoch": 1.1926738288060104, "grad_norm": 0.016513549869837055, "learning_rate": 4.1948001577046735e-06, "loss": 0.0007, "step": 181290 }, { "epoch": 1.1927396169812439, "grad_norm": 0.019844462519534332, "learning_rate": 4.194233545403363e-06, "loss": 0.0004, "step": 181300 }, { "epoch": 1.1928054051564771, "grad_norm": 0.044300888390811366, "learning_rate": 4.193666943725387e-06, "loss": 0.0005, "step": 181310 }, { "epoch": 1.1928711933317107, "grad_norm": 0.016863792590247676, "learning_rate": 4.193100352678217e-06, "loss": 0.0009, "step": 181320 }, { "epoch": 1.192936981506944, "grad_norm": 0.044618788975027315, "learning_rate": 4.192533772269322e-06, "loss": 0.0007, "step": 181330 }, { "epoch": 1.1930027696821774, "grad_norm": 0.00869204918921165, "learning_rate": 4.191967202506171e-06, "loss": 0.0008, "step": 181340 }, { "epoch": 1.1930685578574107, "grad_norm": 0.0072596791423912834, "learning_rate": 4.191400643396235e-06, "loss": 0.0006, "step": 181350 }, { "epoch": 1.193134346032644, "grad_norm": 0.11068786159276335, "learning_rate": 4.190834094946984e-06, "loss": 0.0008, "step": 181360 }, { "epoch": 1.1932001342078775, "grad_norm": 0.013658912558728021, "learning_rate": 4.190267557165884e-06, "loss": 0.0012, "step": 181370 }, { "epoch": 1.1932659223831108, "grad_norm": 0.08088912839948408, "learning_rate": 4.189701030060408e-06, "loss": 0.0011, "step": 181380 }, { "epoch": 1.1933317105583443, "grad_norm": 0.0376746492645664, "learning_rate": 4.1891345136380254e-06, "loss": 0.0006, "step": 181390 }, { "epoch": 1.1933974987335776, "grad_norm": 0.012615840867527187, "learning_rate": 4.188568007906204e-06, "loss": 0.0005, "step": 181400 }, { "epoch": 1.193463286908811, "grad_norm": 0.01938502526157861, "learning_rate": 4.188001512872412e-06, "loss": 0.001, "step": 181410 }, { "epoch": 1.1935290750840444, "grad_norm": 0.04240787902401546, "learning_rate": 4.187435028544118e-06, "loss": 0.001, "step": 181420 }, { "epoch": 1.1935948632592779, "grad_norm": 0.034629539557972246, "learning_rate": 4.186868554928792e-06, "loss": 0.0009, "step": 181430 }, { "epoch": 1.1936606514345112, "grad_norm": 0.10536690374696812, "learning_rate": 4.186302092033901e-06, "loss": 0.0009, "step": 181440 }, { "epoch": 1.1937264396097444, "grad_norm": 0.04423843014169264, "learning_rate": 4.185735639866915e-06, "loss": 0.0016, "step": 181450 }, { "epoch": 1.193792227784978, "grad_norm": 0.007766408692996683, "learning_rate": 4.185169198435302e-06, "loss": 0.0006, "step": 181460 }, { "epoch": 1.1938580159602112, "grad_norm": 0.023647044552730645, "learning_rate": 4.184602767746528e-06, "loss": 0.0006, "step": 181470 }, { "epoch": 1.1939238041354447, "grad_norm": 0.027433349395934892, "learning_rate": 4.184036347808063e-06, "loss": 0.0005, "step": 181480 }, { "epoch": 1.193989592310678, "grad_norm": 0.10249721516707458, "learning_rate": 4.183469938627375e-06, "loss": 0.0007, "step": 181490 }, { "epoch": 1.1940553804859115, "grad_norm": 0.002751427596915198, "learning_rate": 4.182903540211928e-06, "loss": 0.0014, "step": 181500 }, { "epoch": 1.1941211686611448, "grad_norm": 0.025069398549043144, "learning_rate": 4.182337152569194e-06, "loss": 0.0022, "step": 181510 }, { "epoch": 1.1941869568363783, "grad_norm": 0.01671586552454903, "learning_rate": 4.181770775706638e-06, "loss": 0.0006, "step": 181520 }, { "epoch": 1.1942527450116116, "grad_norm": 0.0016673594624006134, "learning_rate": 4.181204409631729e-06, "loss": 0.0003, "step": 181530 }, { "epoch": 1.1943185331868449, "grad_norm": 0.01224062525696821, "learning_rate": 4.180638054351932e-06, "loss": 0.0006, "step": 181540 }, { "epoch": 1.1943843213620784, "grad_norm": 0.019270952407549675, "learning_rate": 4.180071709874714e-06, "loss": 0.0009, "step": 181550 }, { "epoch": 1.1944501095373117, "grad_norm": 0.24060213777609682, "learning_rate": 4.179505376207541e-06, "loss": 0.0014, "step": 181560 }, { "epoch": 1.1945158977125452, "grad_norm": 0.945330917711943, "learning_rate": 4.178939053357883e-06, "loss": 0.001, "step": 181570 }, { "epoch": 1.1945816858877785, "grad_norm": 0.039259385685818333, "learning_rate": 4.178372741333204e-06, "loss": 0.0008, "step": 181580 }, { "epoch": 1.194647474063012, "grad_norm": 0.00723412828809803, "learning_rate": 4.177806440140971e-06, "loss": 0.0004, "step": 181590 }, { "epoch": 1.1947132622382453, "grad_norm": 0.02943868296539372, "learning_rate": 4.177240149788651e-06, "loss": 0.0005, "step": 181600 }, { "epoch": 1.1947790504134788, "grad_norm": 0.005436271331182646, "learning_rate": 4.176673870283707e-06, "loss": 0.001, "step": 181610 }, { "epoch": 1.194844838588712, "grad_norm": 0.025759958299256425, "learning_rate": 4.176107601633608e-06, "loss": 0.0045, "step": 181620 }, { "epoch": 1.1949106267639453, "grad_norm": 0.046811142338627564, "learning_rate": 4.175541343845818e-06, "loss": 0.0005, "step": 181630 }, { "epoch": 1.1949764149391788, "grad_norm": 0.024883280762086266, "learning_rate": 4.1749750969278044e-06, "loss": 0.0011, "step": 181640 }, { "epoch": 1.1950422031144123, "grad_norm": 0.03264531923292582, "learning_rate": 4.174408860887031e-06, "loss": 0.0005, "step": 181650 }, { "epoch": 1.1951079912896456, "grad_norm": 0.0451999621897893, "learning_rate": 4.1738426357309635e-06, "loss": 0.0006, "step": 181660 }, { "epoch": 1.195173779464879, "grad_norm": 0.0879489825265529, "learning_rate": 4.173276421467068e-06, "loss": 0.0013, "step": 181670 }, { "epoch": 1.1952395676401124, "grad_norm": 0.033138420752393906, "learning_rate": 4.172710218102809e-06, "loss": 0.0004, "step": 181680 }, { "epoch": 1.1953053558153457, "grad_norm": 0.06754314559581599, "learning_rate": 4.172144025645649e-06, "loss": 0.0014, "step": 181690 }, { "epoch": 1.1953711439905792, "grad_norm": 0.09950383162389133, "learning_rate": 4.171577844103057e-06, "loss": 0.001, "step": 181700 }, { "epoch": 1.1954369321658125, "grad_norm": 0.010253320105016261, "learning_rate": 4.171011673482496e-06, "loss": 0.0014, "step": 181710 }, { "epoch": 1.195502720341046, "grad_norm": 0.05135710053648185, "learning_rate": 4.1704455137914305e-06, "loss": 0.0008, "step": 181720 }, { "epoch": 1.1955685085162793, "grad_norm": 0.016850941334172108, "learning_rate": 4.169879365037323e-06, "loss": 0.0004, "step": 181730 }, { "epoch": 1.1956342966915128, "grad_norm": 0.044173565197409136, "learning_rate": 4.169313227227641e-06, "loss": 0.0004, "step": 181740 }, { "epoch": 1.195700084866746, "grad_norm": 0.0073969639458542616, "learning_rate": 4.168747100369844e-06, "loss": 0.0003, "step": 181750 }, { "epoch": 1.1957658730419793, "grad_norm": 0.0002663435717355064, "learning_rate": 4.168180984471399e-06, "loss": 0.0006, "step": 181760 }, { "epoch": 1.1958316612172128, "grad_norm": 0.08042745265583978, "learning_rate": 4.167614879539771e-06, "loss": 0.001, "step": 181770 }, { "epoch": 1.1958974493924461, "grad_norm": 0.03190327595332578, "learning_rate": 4.167048785582422e-06, "loss": 0.001, "step": 181780 }, { "epoch": 1.1959632375676796, "grad_norm": 0.02418430479129126, "learning_rate": 4.1664827026068135e-06, "loss": 0.0006, "step": 181790 }, { "epoch": 1.196029025742913, "grad_norm": 0.011493775559492108, "learning_rate": 4.1659166306204124e-06, "loss": 0.0011, "step": 181800 }, { "epoch": 1.1960948139181464, "grad_norm": 0.0066866398881171175, "learning_rate": 4.165350569630679e-06, "loss": 0.001, "step": 181810 }, { "epoch": 1.1961606020933797, "grad_norm": 0.011520558253863928, "learning_rate": 4.164784519645077e-06, "loss": 0.0011, "step": 181820 }, { "epoch": 1.1962263902686132, "grad_norm": 0.0011512662661869756, "learning_rate": 4.164218480671072e-06, "loss": 0.0005, "step": 181830 }, { "epoch": 1.1962921784438465, "grad_norm": 0.001913208957361406, "learning_rate": 4.1636524527161235e-06, "loss": 0.0005, "step": 181840 }, { "epoch": 1.1963579666190798, "grad_norm": 0.048240836355193946, "learning_rate": 4.163086435787694e-06, "loss": 0.0013, "step": 181850 }, { "epoch": 1.1964237547943133, "grad_norm": 0.025309479900284267, "learning_rate": 4.162520429893249e-06, "loss": 0.0004, "step": 181860 }, { "epoch": 1.1964895429695466, "grad_norm": 0.007588487997980448, "learning_rate": 4.161954435040248e-06, "loss": 0.0006, "step": 181870 }, { "epoch": 1.19655533114478, "grad_norm": 0.048067435136243626, "learning_rate": 4.161388451236154e-06, "loss": 0.0013, "step": 181880 }, { "epoch": 1.1966211193200134, "grad_norm": 0.021416901229333873, "learning_rate": 4.160822478488427e-06, "loss": 0.0005, "step": 181890 }, { "epoch": 1.1966869074952469, "grad_norm": 0.02473897856674548, "learning_rate": 4.160256516804533e-06, "loss": 0.0005, "step": 181900 }, { "epoch": 1.1967526956704801, "grad_norm": 0.01773531000097031, "learning_rate": 4.159690566191931e-06, "loss": 0.0005, "step": 181910 }, { "epoch": 1.1968184838457137, "grad_norm": 0.005446784350174355, "learning_rate": 4.1591246266580835e-06, "loss": 0.0008, "step": 181920 }, { "epoch": 1.196884272020947, "grad_norm": 0.018636465074546058, "learning_rate": 4.158558698210452e-06, "loss": 0.0007, "step": 181930 }, { "epoch": 1.1969500601961802, "grad_norm": 0.014364967760305401, "learning_rate": 4.157992780856497e-06, "loss": 0.0003, "step": 181940 }, { "epoch": 1.1970158483714137, "grad_norm": 0.019917297250872542, "learning_rate": 4.157426874603678e-06, "loss": 0.0005, "step": 181950 }, { "epoch": 1.197081636546647, "grad_norm": 0.030499448818687307, "learning_rate": 4.15686097945946e-06, "loss": 0.001, "step": 181960 }, { "epoch": 1.1971474247218805, "grad_norm": 0.03900296680562498, "learning_rate": 4.1562950954313016e-06, "loss": 0.0005, "step": 181970 }, { "epoch": 1.1972132128971138, "grad_norm": 0.01490474195429496, "learning_rate": 4.155729222526664e-06, "loss": 0.0012, "step": 181980 }, { "epoch": 1.1972790010723473, "grad_norm": 0.014882551925977486, "learning_rate": 4.155163360753007e-06, "loss": 0.0005, "step": 181990 }, { "epoch": 1.1973447892475806, "grad_norm": 0.04541600710128953, "learning_rate": 4.1545975101177914e-06, "loss": 0.0013, "step": 182000 }, { "epoch": 1.197410577422814, "grad_norm": 0.04607003441509675, "learning_rate": 4.1540316706284775e-06, "loss": 0.0012, "step": 182010 }, { "epoch": 1.1974763655980474, "grad_norm": 0.000995772203744295, "learning_rate": 4.153465842292525e-06, "loss": 0.001, "step": 182020 }, { "epoch": 1.1975421537732809, "grad_norm": 0.06620812657598325, "learning_rate": 4.1529000251173954e-06, "loss": 0.0005, "step": 182030 }, { "epoch": 1.1976079419485142, "grad_norm": 0.01863499559532075, "learning_rate": 4.152334219110547e-06, "loss": 0.0011, "step": 182040 }, { "epoch": 1.1976737301237477, "grad_norm": 0.02538834955513341, "learning_rate": 4.151768424279438e-06, "loss": 0.0011, "step": 182050 }, { "epoch": 1.197739518298981, "grad_norm": 0.02086116139593723, "learning_rate": 4.151202640631533e-06, "loss": 0.0011, "step": 182060 }, { "epoch": 1.1978053064742142, "grad_norm": 0.014617911160573859, "learning_rate": 4.1506368681742855e-06, "loss": 0.0006, "step": 182070 }, { "epoch": 1.1978710946494477, "grad_norm": 0.026409363087513876, "learning_rate": 4.1500711069151576e-06, "loss": 0.0008, "step": 182080 }, { "epoch": 1.197936882824681, "grad_norm": 0.057325551333680384, "learning_rate": 4.149505356861609e-06, "loss": 0.001, "step": 182090 }, { "epoch": 1.1980026709999145, "grad_norm": 0.06468837480414327, "learning_rate": 4.148939618021098e-06, "loss": 0.0009, "step": 182100 }, { "epoch": 1.1980684591751478, "grad_norm": 0.03746757255577244, "learning_rate": 4.148373890401083e-06, "loss": 0.0016, "step": 182110 }, { "epoch": 1.1981342473503813, "grad_norm": 0.032238005073085295, "learning_rate": 4.1478081740090215e-06, "loss": 0.0003, "step": 182120 }, { "epoch": 1.1982000355256146, "grad_norm": 0.0025742475443294804, "learning_rate": 4.1472424688523745e-06, "loss": 0.0004, "step": 182130 }, { "epoch": 1.198265823700848, "grad_norm": 0.03329102377334977, "learning_rate": 4.146676774938598e-06, "loss": 0.0008, "step": 182140 }, { "epoch": 1.1983316118760814, "grad_norm": 0.07956357268840547, "learning_rate": 4.146111092275151e-06, "loss": 0.0007, "step": 182150 }, { "epoch": 1.1983974000513147, "grad_norm": 0.023869954801811914, "learning_rate": 4.145545420869493e-06, "loss": 0.001, "step": 182160 }, { "epoch": 1.1984631882265482, "grad_norm": 0.003286192060353776, "learning_rate": 4.144979760729081e-06, "loss": 0.0011, "step": 182170 }, { "epoch": 1.1985289764017815, "grad_norm": 0.023517447139444786, "learning_rate": 4.144414111861371e-06, "loss": 0.0004, "step": 182180 }, { "epoch": 1.198594764577015, "grad_norm": 0.012246713233048896, "learning_rate": 4.143848474273823e-06, "loss": 0.0008, "step": 182190 }, { "epoch": 1.1986605527522483, "grad_norm": 0.00853335831834889, "learning_rate": 4.143282847973894e-06, "loss": 0.0007, "step": 182200 }, { "epoch": 1.1987263409274818, "grad_norm": 0.3490071130607331, "learning_rate": 4.142717232969039e-06, "loss": 0.0007, "step": 182210 }, { "epoch": 1.198792129102715, "grad_norm": 0.019537295025810964, "learning_rate": 4.142151629266718e-06, "loss": 0.0003, "step": 182220 }, { "epoch": 1.1988579172779485, "grad_norm": 0.0024238605035256672, "learning_rate": 4.141586036874386e-06, "loss": 0.0006, "step": 182230 }, { "epoch": 1.1989237054531818, "grad_norm": 0.03020170643334417, "learning_rate": 4.141020455799502e-06, "loss": 0.0004, "step": 182240 }, { "epoch": 1.1989894936284151, "grad_norm": 0.05222304480693544, "learning_rate": 4.1404548860495205e-06, "loss": 0.0006, "step": 182250 }, { "epoch": 1.1990552818036486, "grad_norm": 0.07533135155119647, "learning_rate": 4.139889327631901e-06, "loss": 0.0016, "step": 182260 }, { "epoch": 1.199121069978882, "grad_norm": 0.008722703127503168, "learning_rate": 4.139323780554096e-06, "loss": 0.0004, "step": 182270 }, { "epoch": 1.1991868581541154, "grad_norm": 0.015763313077316623, "learning_rate": 4.138758244823563e-06, "loss": 0.0015, "step": 182280 }, { "epoch": 1.1992526463293487, "grad_norm": 0.15807417621243752, "learning_rate": 4.13819272044776e-06, "loss": 0.0012, "step": 182290 }, { "epoch": 1.1993184345045822, "grad_norm": 0.021016174559103765, "learning_rate": 4.137627207434142e-06, "loss": 0.0015, "step": 182300 }, { "epoch": 1.1993842226798155, "grad_norm": 0.00036081195747440163, "learning_rate": 4.137061705790163e-06, "loss": 0.0003, "step": 182310 }, { "epoch": 1.199450010855049, "grad_norm": 0.0049247817817213635, "learning_rate": 4.1364962155232805e-06, "loss": 0.0006, "step": 182320 }, { "epoch": 1.1995157990302823, "grad_norm": 0.005856266141497083, "learning_rate": 4.135930736640949e-06, "loss": 0.0004, "step": 182330 }, { "epoch": 1.1995815872055158, "grad_norm": 0.02892757760987905, "learning_rate": 4.1353652691506265e-06, "loss": 0.0005, "step": 182340 }, { "epoch": 1.199647375380749, "grad_norm": 0.031055334064674464, "learning_rate": 4.134799813059765e-06, "loss": 0.0007, "step": 182350 }, { "epoch": 1.1997131635559826, "grad_norm": 0.13843235575720558, "learning_rate": 4.134234368375821e-06, "loss": 0.0012, "step": 182360 }, { "epoch": 1.1997789517312158, "grad_norm": 0.04510656553279569, "learning_rate": 4.133668935106249e-06, "loss": 0.0007, "step": 182370 }, { "epoch": 1.1998447399064491, "grad_norm": 0.12424679543523565, "learning_rate": 4.133103513258503e-06, "loss": 0.0008, "step": 182380 }, { "epoch": 1.1999105280816826, "grad_norm": 0.008448086230657135, "learning_rate": 4.132538102840039e-06, "loss": 0.0011, "step": 182390 }, { "epoch": 1.199976316256916, "grad_norm": 0.03107072002211757, "learning_rate": 4.131972703858312e-06, "loss": 0.0005, "step": 182400 }, { "epoch": 1.2000421044321494, "grad_norm": 0.05145749922740196, "learning_rate": 4.131407316320774e-06, "loss": 0.0005, "step": 182410 }, { "epoch": 1.2001078926073827, "grad_norm": 0.0029909847571303644, "learning_rate": 4.130841940234881e-06, "loss": 0.0006, "step": 182420 }, { "epoch": 1.2001736807826162, "grad_norm": 0.055227859894321996, "learning_rate": 4.130276575608086e-06, "loss": 0.0006, "step": 182430 }, { "epoch": 1.2002394689578495, "grad_norm": 0.06652364377331017, "learning_rate": 4.129711222447844e-06, "loss": 0.0007, "step": 182440 }, { "epoch": 1.200305257133083, "grad_norm": 0.027604728007483818, "learning_rate": 4.129145880761607e-06, "loss": 0.0007, "step": 182450 }, { "epoch": 1.2003710453083163, "grad_norm": 0.007383292856495705, "learning_rate": 4.128580550556829e-06, "loss": 0.0013, "step": 182460 }, { "epoch": 1.2004368334835496, "grad_norm": 0.037948366445051986, "learning_rate": 4.128015231840965e-06, "loss": 0.001, "step": 182470 }, { "epoch": 1.200502621658783, "grad_norm": 0.0018404675303894376, "learning_rate": 4.127449924621466e-06, "loss": 0.0004, "step": 182480 }, { "epoch": 1.2005684098340164, "grad_norm": 0.07992777822724509, "learning_rate": 4.126884628905787e-06, "loss": 0.0007, "step": 182490 }, { "epoch": 1.2006341980092499, "grad_norm": 0.001350802563240867, "learning_rate": 4.12631934470138e-06, "loss": 0.0007, "step": 182500 }, { "epoch": 1.2006999861844831, "grad_norm": 0.0005414391946429182, "learning_rate": 4.125754072015698e-06, "loss": 0.0003, "step": 182510 }, { "epoch": 1.2007657743597167, "grad_norm": 0.009767863003365952, "learning_rate": 4.1251888108561925e-06, "loss": 0.0006, "step": 182520 }, { "epoch": 1.20083156253495, "grad_norm": 0.20216356034747246, "learning_rate": 4.124623561230318e-06, "loss": 0.0011, "step": 182530 }, { "epoch": 1.2008973507101834, "grad_norm": 0.07339190305566014, "learning_rate": 4.124058323145526e-06, "loss": 0.0009, "step": 182540 }, { "epoch": 1.2009631388854167, "grad_norm": 0.3163095379455425, "learning_rate": 4.123493096609268e-06, "loss": 0.0014, "step": 182550 }, { "epoch": 1.20102892706065, "grad_norm": 0.011329180731335224, "learning_rate": 4.122927881628996e-06, "loss": 0.0006, "step": 182560 }, { "epoch": 1.2010947152358835, "grad_norm": 0.026580234643397534, "learning_rate": 4.122362678212164e-06, "loss": 0.0004, "step": 182570 }, { "epoch": 1.2011605034111168, "grad_norm": 0.0022986755430376674, "learning_rate": 4.12179748636622e-06, "loss": 0.0003, "step": 182580 }, { "epoch": 1.2012262915863503, "grad_norm": 0.03580978669844347, "learning_rate": 4.121232306098619e-06, "loss": 0.0002, "step": 182590 }, { "epoch": 1.2012920797615836, "grad_norm": 0.02407512045392315, "learning_rate": 4.12066713741681e-06, "loss": 0.001, "step": 182600 }, { "epoch": 1.201357867936817, "grad_norm": 0.04348014819349422, "learning_rate": 4.120101980328247e-06, "loss": 0.0005, "step": 182610 }, { "epoch": 1.2014236561120504, "grad_norm": 0.005703641195809745, "learning_rate": 4.119536834840379e-06, "loss": 0.0004, "step": 182620 }, { "epoch": 1.2014894442872839, "grad_norm": 0.046488895553235994, "learning_rate": 4.1189717009606575e-06, "loss": 0.0007, "step": 182630 }, { "epoch": 1.2015552324625172, "grad_norm": 0.04696424889564581, "learning_rate": 4.1184065786965315e-06, "loss": 0.0009, "step": 182640 }, { "epoch": 1.2016210206377504, "grad_norm": 0.003882290280313229, "learning_rate": 4.117841468055455e-06, "loss": 0.0004, "step": 182650 }, { "epoch": 1.201686808812984, "grad_norm": 0.013913200967632912, "learning_rate": 4.117276369044877e-06, "loss": 0.0005, "step": 182660 }, { "epoch": 1.2017525969882175, "grad_norm": 0.0036460294151411054, "learning_rate": 4.116711281672247e-06, "loss": 0.0002, "step": 182670 }, { "epoch": 1.2018183851634507, "grad_norm": 0.030637822624667688, "learning_rate": 4.116146205945016e-06, "loss": 0.0006, "step": 182680 }, { "epoch": 1.201884173338684, "grad_norm": 0.0014153036049537528, "learning_rate": 4.115581141870634e-06, "loss": 0.0013, "step": 182690 }, { "epoch": 1.2019499615139175, "grad_norm": 0.1000969214264461, "learning_rate": 4.115016089456551e-06, "loss": 0.0006, "step": 182700 }, { "epoch": 1.2020157496891508, "grad_norm": 0.06660541416707119, "learning_rate": 4.114451048710216e-06, "loss": 0.0008, "step": 182710 }, { "epoch": 1.2020815378643843, "grad_norm": 0.0027250501706539607, "learning_rate": 4.1138860196390795e-06, "loss": 0.0007, "step": 182720 }, { "epoch": 1.2021473260396176, "grad_norm": 0.05427221826892789, "learning_rate": 4.113321002250591e-06, "loss": 0.0009, "step": 182730 }, { "epoch": 1.202213114214851, "grad_norm": 0.051033504196598474, "learning_rate": 4.112755996552199e-06, "loss": 0.0008, "step": 182740 }, { "epoch": 1.2022789023900844, "grad_norm": 0.039539740117646424, "learning_rate": 4.112191002551353e-06, "loss": 0.001, "step": 182750 }, { "epoch": 1.202344690565318, "grad_norm": 0.0017734045056236722, "learning_rate": 4.111626020255501e-06, "loss": 0.0006, "step": 182760 }, { "epoch": 1.2024104787405512, "grad_norm": 0.04367142209910274, "learning_rate": 4.111061049672093e-06, "loss": 0.0005, "step": 182770 }, { "epoch": 1.2024762669157845, "grad_norm": 0.06546477377484024, "learning_rate": 4.1104960908085775e-06, "loss": 0.0005, "step": 182780 }, { "epoch": 1.202542055091018, "grad_norm": 0.056733010386816364, "learning_rate": 4.109931143672403e-06, "loss": 0.0004, "step": 182790 }, { "epoch": 1.2026078432662513, "grad_norm": 0.036144847022767, "learning_rate": 4.109366208271018e-06, "loss": 0.0002, "step": 182800 }, { "epoch": 1.2026736314414848, "grad_norm": 0.007903388186295187, "learning_rate": 4.10880128461187e-06, "loss": 0.0006, "step": 182810 }, { "epoch": 1.202739419616718, "grad_norm": 0.10993977234434132, "learning_rate": 4.108236372702408e-06, "loss": 0.0009, "step": 182820 }, { "epoch": 1.2028052077919515, "grad_norm": 0.011770347924599267, "learning_rate": 4.107671472550078e-06, "loss": 0.0005, "step": 182830 }, { "epoch": 1.2028709959671848, "grad_norm": 0.14307018061545426, "learning_rate": 4.107106584162329e-06, "loss": 0.0015, "step": 182840 }, { "epoch": 1.2029367841424183, "grad_norm": 0.04250713664871749, "learning_rate": 4.106541707546609e-06, "loss": 0.0005, "step": 182850 }, { "epoch": 1.2030025723176516, "grad_norm": 0.027082188577812742, "learning_rate": 4.105976842710365e-06, "loss": 0.0008, "step": 182860 }, { "epoch": 1.203068360492885, "grad_norm": 0.019707857948171204, "learning_rate": 4.105411989661045e-06, "loss": 0.0006, "step": 182870 }, { "epoch": 1.2031341486681184, "grad_norm": 0.0574469114521256, "learning_rate": 4.104847148406094e-06, "loss": 0.0007, "step": 182880 }, { "epoch": 1.2031999368433517, "grad_norm": 0.028741601927898527, "learning_rate": 4.1042823189529615e-06, "loss": 0.0009, "step": 182890 }, { "epoch": 1.2032657250185852, "grad_norm": 0.029693181739903433, "learning_rate": 4.103717501309091e-06, "loss": 0.0005, "step": 182900 }, { "epoch": 1.2033315131938185, "grad_norm": 0.055105778441201674, "learning_rate": 4.103152695481932e-06, "loss": 0.0014, "step": 182910 }, { "epoch": 1.203397301369052, "grad_norm": 0.04686759253240851, "learning_rate": 4.1025879014789314e-06, "loss": 0.0005, "step": 182920 }, { "epoch": 1.2034630895442853, "grad_norm": 0.010935017605772035, "learning_rate": 4.1020231193075335e-06, "loss": 0.0004, "step": 182930 }, { "epoch": 1.2035288777195188, "grad_norm": 0.07704387830031556, "learning_rate": 4.101458348975185e-06, "loss": 0.0008, "step": 182940 }, { "epoch": 1.203594665894752, "grad_norm": 0.005807237025405832, "learning_rate": 4.1008935904893324e-06, "loss": 0.0005, "step": 182950 }, { "epoch": 1.2036604540699853, "grad_norm": 0.08647976177043824, "learning_rate": 4.100328843857422e-06, "loss": 0.0006, "step": 182960 }, { "epoch": 1.2037262422452188, "grad_norm": 0.04222229419804782, "learning_rate": 4.0997641090868965e-06, "loss": 0.0005, "step": 182970 }, { "epoch": 1.2037920304204524, "grad_norm": 0.015849925683174955, "learning_rate": 4.099199386185205e-06, "loss": 0.0005, "step": 182980 }, { "epoch": 1.2038578185956856, "grad_norm": 0.020076370702577253, "learning_rate": 4.0986346751597925e-06, "loss": 0.0009, "step": 182990 }, { "epoch": 1.203923606770919, "grad_norm": 0.0682899199887007, "learning_rate": 4.098069976018103e-06, "loss": 0.0005, "step": 183000 }, { "epoch": 1.2039893949461524, "grad_norm": 0.0009775524658853553, "learning_rate": 4.097505288767581e-06, "loss": 0.001, "step": 183010 }, { "epoch": 1.2040551831213857, "grad_norm": 0.0821883631408924, "learning_rate": 4.096940613415673e-06, "loss": 0.0006, "step": 183020 }, { "epoch": 1.2041209712966192, "grad_norm": 0.008433793000977364, "learning_rate": 4.096375949969822e-06, "loss": 0.0011, "step": 183030 }, { "epoch": 1.2041867594718525, "grad_norm": 0.025372803133020993, "learning_rate": 4.0958112984374755e-06, "loss": 0.0003, "step": 183040 }, { "epoch": 1.204252547647086, "grad_norm": 0.028061923102717358, "learning_rate": 4.0952466588260755e-06, "loss": 0.001, "step": 183050 }, { "epoch": 1.2043183358223193, "grad_norm": 0.02948414340711193, "learning_rate": 4.094682031143067e-06, "loss": 0.0005, "step": 183060 }, { "epoch": 1.2043841239975528, "grad_norm": 0.044065105253477246, "learning_rate": 4.0941174153958935e-06, "loss": 0.001, "step": 183070 }, { "epoch": 1.204449912172786, "grad_norm": 0.12018335200532929, "learning_rate": 4.093552811592e-06, "loss": 0.001, "step": 183080 }, { "epoch": 1.2045157003480194, "grad_norm": 0.0022884423272913766, "learning_rate": 4.09298821973883e-06, "loss": 0.0005, "step": 183090 }, { "epoch": 1.2045814885232529, "grad_norm": 0.01830475661358061, "learning_rate": 4.092423639843825e-06, "loss": 0.0003, "step": 183100 }, { "epoch": 1.2046472766984861, "grad_norm": 0.021714074568470198, "learning_rate": 4.091859071914432e-06, "loss": 0.0007, "step": 183110 }, { "epoch": 1.2047130648737197, "grad_norm": 0.2113559279720653, "learning_rate": 4.091294515958093e-06, "loss": 0.001, "step": 183120 }, { "epoch": 1.204778853048953, "grad_norm": 0.0021223481460928913, "learning_rate": 4.090729971982252e-06, "loss": 0.0012, "step": 183130 }, { "epoch": 1.2048446412241864, "grad_norm": 0.0021387532867458434, "learning_rate": 4.090165439994349e-06, "loss": 0.001, "step": 183140 }, { "epoch": 1.2049104293994197, "grad_norm": 0.017722318851417677, "learning_rate": 4.08960092000183e-06, "loss": 0.0003, "step": 183150 }, { "epoch": 1.2049762175746532, "grad_norm": 0.20901137922474924, "learning_rate": 4.089036412012136e-06, "loss": 0.001, "step": 183160 }, { "epoch": 1.2050420057498865, "grad_norm": 0.046447237986302355, "learning_rate": 4.08847191603271e-06, "loss": 0.0003, "step": 183170 }, { "epoch": 1.2051077939251198, "grad_norm": 0.010912019706893185, "learning_rate": 4.087907432070996e-06, "loss": 0.0009, "step": 183180 }, { "epoch": 1.2051735821003533, "grad_norm": 0.002299938152618661, "learning_rate": 4.087342960134433e-06, "loss": 0.0006, "step": 183190 }, { "epoch": 1.2052393702755866, "grad_norm": 0.012247971851015317, "learning_rate": 4.086778500230466e-06, "loss": 0.0005, "step": 183200 }, { "epoch": 1.20530515845082, "grad_norm": 0.002804524572977826, "learning_rate": 4.086214052366535e-06, "loss": 0.001, "step": 183210 }, { "epoch": 1.2053709466260534, "grad_norm": 0.02835485446462058, "learning_rate": 4.085649616550083e-06, "loss": 0.0005, "step": 183220 }, { "epoch": 1.2054367348012869, "grad_norm": 0.03257564618720849, "learning_rate": 4.08508519278855e-06, "loss": 0.0004, "step": 183230 }, { "epoch": 1.2055025229765202, "grad_norm": 0.02645271527155776, "learning_rate": 4.084520781089381e-06, "loss": 0.0006, "step": 183240 }, { "epoch": 1.2055683111517537, "grad_norm": 0.07116422125093934, "learning_rate": 4.083956381460013e-06, "loss": 0.0008, "step": 183250 }, { "epoch": 1.205634099326987, "grad_norm": 0.05408961911484619, "learning_rate": 4.0833919939078895e-06, "loss": 0.001, "step": 183260 }, { "epoch": 1.2056998875022202, "grad_norm": 0.02352141981108095, "learning_rate": 4.082827618440451e-06, "loss": 0.0008, "step": 183270 }, { "epoch": 1.2057656756774537, "grad_norm": 0.15013129471801157, "learning_rate": 4.082263255065139e-06, "loss": 0.0012, "step": 183280 }, { "epoch": 1.205831463852687, "grad_norm": 0.034354742773357955, "learning_rate": 4.081698903789391e-06, "loss": 0.0005, "step": 183290 }, { "epoch": 1.2058972520279205, "grad_norm": 0.0027587131780363473, "learning_rate": 4.081134564620651e-06, "loss": 0.0003, "step": 183300 }, { "epoch": 1.2059630402031538, "grad_norm": 0.00883879533203035, "learning_rate": 4.080570237566359e-06, "loss": 0.001, "step": 183310 }, { "epoch": 1.2060288283783873, "grad_norm": 0.024455162519501417, "learning_rate": 4.080005922633954e-06, "loss": 0.0005, "step": 183320 }, { "epoch": 1.2060946165536206, "grad_norm": 0.06176873716203178, "learning_rate": 4.079441619830876e-06, "loss": 0.0005, "step": 183330 }, { "epoch": 1.206160404728854, "grad_norm": 0.026295039991124362, "learning_rate": 4.078877329164565e-06, "loss": 0.0008, "step": 183340 }, { "epoch": 1.2062261929040874, "grad_norm": 0.011445537832673438, "learning_rate": 4.078313050642462e-06, "loss": 0.0005, "step": 183350 }, { "epoch": 1.206291981079321, "grad_norm": 0.15266919688072197, "learning_rate": 4.077748784272003e-06, "loss": 0.0015, "step": 183360 }, { "epoch": 1.2063577692545542, "grad_norm": 0.00015607726340969644, "learning_rate": 4.0771845300606324e-06, "loss": 0.0005, "step": 183370 }, { "epoch": 1.2064235574297877, "grad_norm": 0.04794126310303644, "learning_rate": 4.076620288015785e-06, "loss": 0.0007, "step": 183380 }, { "epoch": 1.206489345605021, "grad_norm": 0.04239770368164665, "learning_rate": 4.076056058144904e-06, "loss": 0.0006, "step": 183390 }, { "epoch": 1.2065551337802543, "grad_norm": 0.012067239864535588, "learning_rate": 4.075491840455424e-06, "loss": 0.0008, "step": 183400 }, { "epoch": 1.2066209219554878, "grad_norm": 0.002927535767621236, "learning_rate": 4.074927634954786e-06, "loss": 0.0006, "step": 183410 }, { "epoch": 1.206686710130721, "grad_norm": 0.008150927668606842, "learning_rate": 4.0743634416504274e-06, "loss": 0.0006, "step": 183420 }, { "epoch": 1.2067524983059545, "grad_norm": 0.019995725725437095, "learning_rate": 4.073799260549788e-06, "loss": 0.0004, "step": 183430 }, { "epoch": 1.2068182864811878, "grad_norm": 0.022817244871632936, "learning_rate": 4.073235091660306e-06, "loss": 0.0007, "step": 183440 }, { "epoch": 1.2068840746564213, "grad_norm": 0.0007194932417656444, "learning_rate": 4.072670934989418e-06, "loss": 0.0003, "step": 183450 }, { "epoch": 1.2069498628316546, "grad_norm": 0.05063295291847289, "learning_rate": 4.072106790544564e-06, "loss": 0.0006, "step": 183460 }, { "epoch": 1.2070156510068881, "grad_norm": 0.012902088782180725, "learning_rate": 4.07154265833318e-06, "loss": 0.0005, "step": 183470 }, { "epoch": 1.2070814391821214, "grad_norm": 0.0012780198801127733, "learning_rate": 4.070978538362704e-06, "loss": 0.0004, "step": 183480 }, { "epoch": 1.2071472273573547, "grad_norm": 0.03759870811614248, "learning_rate": 4.070414430640573e-06, "loss": 0.0011, "step": 183490 }, { "epoch": 1.2072130155325882, "grad_norm": 0.018863760366448556, "learning_rate": 4.069850335174226e-06, "loss": 0.0006, "step": 183500 }, { "epoch": 1.2072788037078215, "grad_norm": 0.027520639877156913, "learning_rate": 4.069286251971098e-06, "loss": 0.0008, "step": 183510 }, { "epoch": 1.207344591883055, "grad_norm": 0.038196355131636085, "learning_rate": 4.068722181038628e-06, "loss": 0.001, "step": 183520 }, { "epoch": 1.2074103800582883, "grad_norm": 0.0420723432742075, "learning_rate": 4.068158122384251e-06, "loss": 0.0007, "step": 183530 }, { "epoch": 1.2074761682335218, "grad_norm": 0.020551803700696262, "learning_rate": 4.067594076015406e-06, "loss": 0.0005, "step": 183540 }, { "epoch": 1.207541956408755, "grad_norm": 0.09636630794750188, "learning_rate": 4.067030041939525e-06, "loss": 0.001, "step": 183550 }, { "epoch": 1.2076077445839886, "grad_norm": 0.09485783399082097, "learning_rate": 4.066466020164048e-06, "loss": 0.001, "step": 183560 }, { "epoch": 1.2076735327592218, "grad_norm": 0.0058352077154556, "learning_rate": 4.065902010696412e-06, "loss": 0.0011, "step": 183570 }, { "epoch": 1.2077393209344551, "grad_norm": 0.009473190368282364, "learning_rate": 4.06533801354405e-06, "loss": 0.0006, "step": 183580 }, { "epoch": 1.2078051091096886, "grad_norm": 0.006900540318695166, "learning_rate": 4.0647740287143984e-06, "loss": 0.0006, "step": 183590 }, { "epoch": 1.207870897284922, "grad_norm": 0.014458741622330573, "learning_rate": 4.064210056214895e-06, "loss": 0.0006, "step": 183600 }, { "epoch": 1.2079366854601554, "grad_norm": 0.037338659636830515, "learning_rate": 4.063646096052973e-06, "loss": 0.0007, "step": 183610 }, { "epoch": 1.2080024736353887, "grad_norm": 0.060786041484080816, "learning_rate": 4.063082148236067e-06, "loss": 0.002, "step": 183620 }, { "epoch": 1.2080682618106222, "grad_norm": 0.052412375631657064, "learning_rate": 4.062518212771617e-06, "loss": 0.0005, "step": 183630 }, { "epoch": 1.2081340499858555, "grad_norm": 0.0007960950256155216, "learning_rate": 4.061954289667053e-06, "loss": 0.0008, "step": 183640 }, { "epoch": 1.208199838161089, "grad_norm": 0.014356065944734377, "learning_rate": 4.061390378929811e-06, "loss": 0.0005, "step": 183650 }, { "epoch": 1.2082656263363223, "grad_norm": 0.05323512356847719, "learning_rate": 4.060826480567327e-06, "loss": 0.0006, "step": 183660 }, { "epoch": 1.2083314145115556, "grad_norm": 0.054553383763599604, "learning_rate": 4.060262594587035e-06, "loss": 0.0007, "step": 183670 }, { "epoch": 1.208397202686789, "grad_norm": 0.01043142416331537, "learning_rate": 4.059698720996368e-06, "loss": 0.0013, "step": 183680 }, { "epoch": 1.2084629908620226, "grad_norm": 0.03315701037025599, "learning_rate": 4.059134859802761e-06, "loss": 0.0017, "step": 183690 }, { "epoch": 1.2085287790372559, "grad_norm": 0.029409355722267116, "learning_rate": 4.05857101101365e-06, "loss": 0.0007, "step": 183700 }, { "epoch": 1.2085945672124891, "grad_norm": 0.00882230694184136, "learning_rate": 4.058007174636466e-06, "loss": 0.001, "step": 183710 }, { "epoch": 1.2086603553877227, "grad_norm": 0.02621252759299621, "learning_rate": 4.057443350678645e-06, "loss": 0.0007, "step": 183720 }, { "epoch": 1.208726143562956, "grad_norm": 0.004247994511263733, "learning_rate": 4.056879539147619e-06, "loss": 0.0008, "step": 183730 }, { "epoch": 1.2087919317381894, "grad_norm": 0.052933913765815264, "learning_rate": 4.056315740050821e-06, "loss": 0.0009, "step": 183740 }, { "epoch": 1.2088577199134227, "grad_norm": 0.01611702117887329, "learning_rate": 4.055751953395684e-06, "loss": 0.0008, "step": 183750 }, { "epoch": 1.2089235080886562, "grad_norm": 0.006055279267802593, "learning_rate": 4.055188179189643e-06, "loss": 0.0002, "step": 183760 }, { "epoch": 1.2089892962638895, "grad_norm": 0.07933197503821285, "learning_rate": 4.05462441744013e-06, "loss": 0.002, "step": 183770 }, { "epoch": 1.209055084439123, "grad_norm": 0.04137263599181519, "learning_rate": 4.054060668154578e-06, "loss": 0.0009, "step": 183780 }, { "epoch": 1.2091208726143563, "grad_norm": 0.004240391034005699, "learning_rate": 4.053496931340419e-06, "loss": 0.0005, "step": 183790 }, { "epoch": 1.2091866607895896, "grad_norm": 0.08621319547408106, "learning_rate": 4.052933207005086e-06, "loss": 0.0009, "step": 183800 }, { "epoch": 1.209252448964823, "grad_norm": 0.021068217457954008, "learning_rate": 4.052369495156009e-06, "loss": 0.0011, "step": 183810 }, { "epoch": 1.2093182371400564, "grad_norm": 0.00011502381144618158, "learning_rate": 4.051805795800623e-06, "loss": 0.0004, "step": 183820 }, { "epoch": 1.2093840253152899, "grad_norm": 0.024921048198254667, "learning_rate": 4.051242108946358e-06, "loss": 0.001, "step": 183830 }, { "epoch": 1.2094498134905232, "grad_norm": 0.016665578361251543, "learning_rate": 4.0506784346006475e-06, "loss": 0.0006, "step": 183840 }, { "epoch": 1.2095156016657567, "grad_norm": 0.072099319311336, "learning_rate": 4.050114772770921e-06, "loss": 0.0008, "step": 183850 }, { "epoch": 1.20958138984099, "grad_norm": 0.004374331646118205, "learning_rate": 4.049551123464611e-06, "loss": 0.0016, "step": 183860 }, { "epoch": 1.2096471780162235, "grad_norm": 0.03267981873610363, "learning_rate": 4.04898748668915e-06, "loss": 0.0019, "step": 183870 }, { "epoch": 1.2097129661914567, "grad_norm": 0.0021073233522947215, "learning_rate": 4.0484238624519645e-06, "loss": 0.0004, "step": 183880 }, { "epoch": 1.20977875436669, "grad_norm": 0.08417564347301408, "learning_rate": 4.047860250760491e-06, "loss": 0.0013, "step": 183890 }, { "epoch": 1.2098445425419235, "grad_norm": 0.03289625538333959, "learning_rate": 4.0472966516221576e-06, "loss": 0.0004, "step": 183900 }, { "epoch": 1.2099103307171568, "grad_norm": 0.005345294175875733, "learning_rate": 4.046733065044394e-06, "loss": 0.0006, "step": 183910 }, { "epoch": 1.2099761188923903, "grad_norm": 0.009468895520550313, "learning_rate": 4.0461694910346325e-06, "loss": 0.0005, "step": 183920 }, { "epoch": 1.2100419070676236, "grad_norm": 0.03969668102213549, "learning_rate": 4.045605929600303e-06, "loss": 0.0006, "step": 183930 }, { "epoch": 1.210107695242857, "grad_norm": 0.015082570757329454, "learning_rate": 4.045042380748832e-06, "loss": 0.0005, "step": 183940 }, { "epoch": 1.2101734834180904, "grad_norm": 0.00975104953390461, "learning_rate": 4.044478844487656e-06, "loss": 0.0007, "step": 183950 }, { "epoch": 1.210239271593324, "grad_norm": 0.028045801672203965, "learning_rate": 4.0439153208241995e-06, "loss": 0.0004, "step": 183960 }, { "epoch": 1.2103050597685572, "grad_norm": 0.06391647154333403, "learning_rate": 4.043351809765895e-06, "loss": 0.0006, "step": 183970 }, { "epoch": 1.2103708479437905, "grad_norm": 0.0432842605595098, "learning_rate": 4.04278831132017e-06, "loss": 0.0006, "step": 183980 }, { "epoch": 1.210436636119024, "grad_norm": 0.028332071199414, "learning_rate": 4.042224825494454e-06, "loss": 0.0009, "step": 183990 }, { "epoch": 1.2105024242942575, "grad_norm": 0.03703609824396462, "learning_rate": 4.041661352296177e-06, "loss": 0.0013, "step": 184000 }, { "epoch": 1.2105682124694908, "grad_norm": 0.003140507860303292, "learning_rate": 4.041097891732767e-06, "loss": 0.0004, "step": 184010 }, { "epoch": 1.210634000644724, "grad_norm": 0.004272132857967036, "learning_rate": 4.040534443811653e-06, "loss": 0.0004, "step": 184020 }, { "epoch": 1.2106997888199575, "grad_norm": 0.0003064946329082787, "learning_rate": 4.039971008540264e-06, "loss": 0.0005, "step": 184030 }, { "epoch": 1.2107655769951908, "grad_norm": 0.09125542912517284, "learning_rate": 4.039407585926029e-06, "loss": 0.0009, "step": 184040 }, { "epoch": 1.2108313651704243, "grad_norm": 0.029243265174553855, "learning_rate": 4.038844175976374e-06, "loss": 0.0006, "step": 184050 }, { "epoch": 1.2108971533456576, "grad_norm": 0.02170513316116597, "learning_rate": 4.038280778698729e-06, "loss": 0.0007, "step": 184060 }, { "epoch": 1.2109629415208911, "grad_norm": 0.006525999928104714, "learning_rate": 4.03771739410052e-06, "loss": 0.0006, "step": 184070 }, { "epoch": 1.2110287296961244, "grad_norm": 0.02486299890609747, "learning_rate": 4.0371540221891776e-06, "loss": 0.0009, "step": 184080 }, { "epoch": 1.211094517871358, "grad_norm": 0.0008526299883474729, "learning_rate": 4.036590662972128e-06, "loss": 0.0009, "step": 184090 }, { "epoch": 1.2111603060465912, "grad_norm": 0.06550870483050261, "learning_rate": 4.0360273164567985e-06, "loss": 0.0011, "step": 184100 }, { "epoch": 1.2112260942218245, "grad_norm": 0.05154042111597538, "learning_rate": 4.035463982650615e-06, "loss": 0.0018, "step": 184110 }, { "epoch": 1.211291882397058, "grad_norm": 0.016759175341256302, "learning_rate": 4.0349006615610076e-06, "loss": 0.0004, "step": 184120 }, { "epoch": 1.2113576705722913, "grad_norm": 0.025738385064118033, "learning_rate": 4.0343373531954e-06, "loss": 0.0005, "step": 184130 }, { "epoch": 1.2114234587475248, "grad_norm": 0.02030103957427237, "learning_rate": 4.033774057561221e-06, "loss": 0.0006, "step": 184140 }, { "epoch": 1.211489246922758, "grad_norm": 0.06618481789109336, "learning_rate": 4.0332107746658984e-06, "loss": 0.0009, "step": 184150 }, { "epoch": 1.2115550350979916, "grad_norm": 0.021215562106111717, "learning_rate": 4.032647504516854e-06, "loss": 0.0008, "step": 184160 }, { "epoch": 1.2116208232732248, "grad_norm": 0.03812685523143618, "learning_rate": 4.032084247121518e-06, "loss": 0.0005, "step": 184170 }, { "epoch": 1.2116866114484584, "grad_norm": 0.036437087486964406, "learning_rate": 4.031521002487315e-06, "loss": 0.0005, "step": 184180 }, { "epoch": 1.2117523996236916, "grad_norm": 0.05532718327458963, "learning_rate": 4.0309577706216705e-06, "loss": 0.0008, "step": 184190 }, { "epoch": 1.211818187798925, "grad_norm": 0.0020149466911043193, "learning_rate": 4.030394551532012e-06, "loss": 0.0005, "step": 184200 }, { "epoch": 1.2118839759741584, "grad_norm": 0.028440438667760384, "learning_rate": 4.0298313452257635e-06, "loss": 0.0006, "step": 184210 }, { "epoch": 1.2119497641493917, "grad_norm": 0.004528508080612499, "learning_rate": 4.0292681517103505e-06, "loss": 0.0006, "step": 184220 }, { "epoch": 1.2120155523246252, "grad_norm": 0.004179333298480839, "learning_rate": 4.028704970993199e-06, "loss": 0.0006, "step": 184230 }, { "epoch": 1.2120813404998585, "grad_norm": 0.0028274857893451056, "learning_rate": 4.0281418030817325e-06, "loss": 0.0005, "step": 184240 }, { "epoch": 1.212147128675092, "grad_norm": 0.05126971507008552, "learning_rate": 4.027578647983378e-06, "loss": 0.0005, "step": 184250 }, { "epoch": 1.2122129168503253, "grad_norm": 0.08057392186440168, "learning_rate": 4.027015505705559e-06, "loss": 0.0007, "step": 184260 }, { "epoch": 1.2122787050255588, "grad_norm": 0.01163694009087135, "learning_rate": 4.026452376255701e-06, "loss": 0.0015, "step": 184270 }, { "epoch": 1.212344493200792, "grad_norm": 0.061489736117607816, "learning_rate": 4.025889259641226e-06, "loss": 0.0005, "step": 184280 }, { "epoch": 1.2124102813760254, "grad_norm": 0.08128487719079865, "learning_rate": 4.025326155869561e-06, "loss": 0.0011, "step": 184290 }, { "epoch": 1.2124760695512589, "grad_norm": 0.04189200533445884, "learning_rate": 4.024763064948128e-06, "loss": 0.0008, "step": 184300 }, { "epoch": 1.2125418577264921, "grad_norm": 0.00948891894225898, "learning_rate": 4.0241999868843525e-06, "loss": 0.0012, "step": 184310 }, { "epoch": 1.2126076459017257, "grad_norm": 0.008406591395494002, "learning_rate": 4.0236369216856565e-06, "loss": 0.0003, "step": 184320 }, { "epoch": 1.212673434076959, "grad_norm": 0.15426358676960586, "learning_rate": 4.023073869359466e-06, "loss": 0.0014, "step": 184330 }, { "epoch": 1.2127392222521924, "grad_norm": 0.06555637612152819, "learning_rate": 4.022510829913202e-06, "loss": 0.0007, "step": 184340 }, { "epoch": 1.2128050104274257, "grad_norm": 0.41827433749942083, "learning_rate": 4.021947803354288e-06, "loss": 0.0007, "step": 184350 }, { "epoch": 1.2128707986026592, "grad_norm": 0.09885535555908308, "learning_rate": 4.021384789690148e-06, "loss": 0.0015, "step": 184360 }, { "epoch": 1.2129365867778925, "grad_norm": 0.004344435949277066, "learning_rate": 4.020821788928204e-06, "loss": 0.0008, "step": 184370 }, { "epoch": 1.213002374953126, "grad_norm": 0.026303786049139213, "learning_rate": 4.020258801075879e-06, "loss": 0.0015, "step": 184380 }, { "epoch": 1.2130681631283593, "grad_norm": 0.015955545332234727, "learning_rate": 4.019695826140596e-06, "loss": 0.0005, "step": 184390 }, { "epoch": 1.2131339513035928, "grad_norm": 0.020118546125909024, "learning_rate": 4.0191328641297764e-06, "loss": 0.001, "step": 184400 }, { "epoch": 1.213199739478826, "grad_norm": 0.02355621592693607, "learning_rate": 4.018569915050844e-06, "loss": 0.0006, "step": 184410 }, { "epoch": 1.2132655276540594, "grad_norm": 0.0784149912214609, "learning_rate": 4.018006978911219e-06, "loss": 0.0006, "step": 184420 }, { "epoch": 1.2133313158292929, "grad_norm": 0.02032657754138889, "learning_rate": 4.017444055718322e-06, "loss": 0.0003, "step": 184430 }, { "epoch": 1.2133971040045262, "grad_norm": 0.08012945088034268, "learning_rate": 4.016881145479578e-06, "loss": 0.0009, "step": 184440 }, { "epoch": 1.2134628921797597, "grad_norm": 0.10557152802601777, "learning_rate": 4.016318248202408e-06, "loss": 0.0013, "step": 184450 }, { "epoch": 1.213528680354993, "grad_norm": 0.02133933914345981, "learning_rate": 4.015755363894232e-06, "loss": 0.0002, "step": 184460 }, { "epoch": 1.2135944685302265, "grad_norm": 0.03953490548058816, "learning_rate": 4.015192492562471e-06, "loss": 0.0005, "step": 184470 }, { "epoch": 1.2136602567054597, "grad_norm": 0.06610985206921365, "learning_rate": 4.014629634214547e-06, "loss": 0.0009, "step": 184480 }, { "epoch": 1.2137260448806932, "grad_norm": 0.0122400358383275, "learning_rate": 4.01406678885788e-06, "loss": 0.0009, "step": 184490 }, { "epoch": 1.2137918330559265, "grad_norm": 0.016551710153234398, "learning_rate": 4.013503956499889e-06, "loss": 0.0007, "step": 184500 }, { "epoch": 1.2138576212311598, "grad_norm": 0.00036902048074006696, "learning_rate": 4.0129411371479984e-06, "loss": 0.0007, "step": 184510 }, { "epoch": 1.2139234094063933, "grad_norm": 0.010645730839624258, "learning_rate": 4.012378330809626e-06, "loss": 0.001, "step": 184520 }, { "epoch": 1.2139891975816266, "grad_norm": 0.03367776476609917, "learning_rate": 4.011815537492193e-06, "loss": 0.0009, "step": 184530 }, { "epoch": 1.21405498575686, "grad_norm": 0.015132276272435172, "learning_rate": 4.011252757203118e-06, "loss": 0.0008, "step": 184540 }, { "epoch": 1.2141207739320934, "grad_norm": 0.014960506919256671, "learning_rate": 4.010689989949822e-06, "loss": 0.0006, "step": 184550 }, { "epoch": 1.214186562107327, "grad_norm": 0.007368704506521379, "learning_rate": 4.010127235739723e-06, "loss": 0.0007, "step": 184560 }, { "epoch": 1.2142523502825602, "grad_norm": 0.01687653028537894, "learning_rate": 4.009564494580241e-06, "loss": 0.001, "step": 184570 }, { "epoch": 1.2143181384577937, "grad_norm": 0.15198113797541493, "learning_rate": 4.009001766478797e-06, "loss": 0.001, "step": 184580 }, { "epoch": 1.214383926633027, "grad_norm": 0.04516537753344205, "learning_rate": 4.008439051442809e-06, "loss": 0.0006, "step": 184590 }, { "epoch": 1.2144497148082603, "grad_norm": 0.0113859515756838, "learning_rate": 4.007876349479696e-06, "loss": 0.0006, "step": 184600 }, { "epoch": 1.2145155029834938, "grad_norm": 0.0012916169975310046, "learning_rate": 4.007313660596875e-06, "loss": 0.0003, "step": 184610 }, { "epoch": 1.214581291158727, "grad_norm": 0.0022497781749711795, "learning_rate": 4.006750984801768e-06, "loss": 0.0012, "step": 184620 }, { "epoch": 1.2146470793339605, "grad_norm": 0.18135658820144357, "learning_rate": 4.006188322101789e-06, "loss": 0.0011, "step": 184630 }, { "epoch": 1.2147128675091938, "grad_norm": 0.07637723237608825, "learning_rate": 4.00562567250436e-06, "loss": 0.0006, "step": 184640 }, { "epoch": 1.2147786556844273, "grad_norm": 0.0031421191684513116, "learning_rate": 4.005063036016898e-06, "loss": 0.0003, "step": 184650 }, { "epoch": 1.2148444438596606, "grad_norm": 0.012619010769312548, "learning_rate": 4.00450041264682e-06, "loss": 0.001, "step": 184660 }, { "epoch": 1.2149102320348941, "grad_norm": 0.022742770385873388, "learning_rate": 4.003937802401545e-06, "loss": 0.0005, "step": 184670 }, { "epoch": 1.2149760202101274, "grad_norm": 0.0211282334467825, "learning_rate": 4.00337520528849e-06, "loss": 0.0008, "step": 184680 }, { "epoch": 1.215041808385361, "grad_norm": 0.004414728826986618, "learning_rate": 4.00281262131507e-06, "loss": 0.0013, "step": 184690 }, { "epoch": 1.2151075965605942, "grad_norm": 0.025156898940050233, "learning_rate": 4.002250050488706e-06, "loss": 0.0005, "step": 184700 }, { "epoch": 1.2151733847358277, "grad_norm": 0.003884341658497343, "learning_rate": 4.0016874928168135e-06, "loss": 0.0006, "step": 184710 }, { "epoch": 1.215239172911061, "grad_norm": 0.021244903338083224, "learning_rate": 4.00112494830681e-06, "loss": 0.0003, "step": 184720 }, { "epoch": 1.2153049610862943, "grad_norm": 0.0233675188206582, "learning_rate": 4.00056241696611e-06, "loss": 0.0009, "step": 184730 }, { "epoch": 1.2153707492615278, "grad_norm": 0.0018486943517509384, "learning_rate": 3.999999898802132e-06, "loss": 0.0006, "step": 184740 }, { "epoch": 1.215436537436761, "grad_norm": 0.01774257705290584, "learning_rate": 3.999437393822292e-06, "loss": 0.0006, "step": 184750 }, { "epoch": 1.2155023256119946, "grad_norm": 0.035968354728514056, "learning_rate": 3.998874902034004e-06, "loss": 0.0006, "step": 184760 }, { "epoch": 1.2155681137872278, "grad_norm": 0.030175442335576563, "learning_rate": 3.9983124234446865e-06, "loss": 0.0006, "step": 184770 }, { "epoch": 1.2156339019624613, "grad_norm": 0.07383109750309168, "learning_rate": 3.9977499580617555e-06, "loss": 0.0008, "step": 184780 }, { "epoch": 1.2156996901376946, "grad_norm": 0.08096653297086577, "learning_rate": 3.997187505892626e-06, "loss": 0.0005, "step": 184790 }, { "epoch": 1.2157654783129281, "grad_norm": 0.027932248418279775, "learning_rate": 3.996625066944713e-06, "loss": 0.0006, "step": 184800 }, { "epoch": 1.2158312664881614, "grad_norm": 0.08973147328275365, "learning_rate": 3.996062641225431e-06, "loss": 0.0008, "step": 184810 }, { "epoch": 1.2158970546633947, "grad_norm": 0.05076720012092903, "learning_rate": 3.995500228742195e-06, "loss": 0.0005, "step": 184820 }, { "epoch": 1.2159628428386282, "grad_norm": 0.09158845009325735, "learning_rate": 3.994937829502423e-06, "loss": 0.001, "step": 184830 }, { "epoch": 1.2160286310138615, "grad_norm": 0.000403050877104869, "learning_rate": 3.994375443513527e-06, "loss": 0.0004, "step": 184840 }, { "epoch": 1.216094419189095, "grad_norm": 0.01735167593997369, "learning_rate": 3.993813070782923e-06, "loss": 0.0005, "step": 184850 }, { "epoch": 1.2161602073643283, "grad_norm": 0.061917491162608626, "learning_rate": 3.993250711318024e-06, "loss": 0.0013, "step": 184860 }, { "epoch": 1.2162259955395618, "grad_norm": 0.059306258313994976, "learning_rate": 3.992688365126244e-06, "loss": 0.0011, "step": 184870 }, { "epoch": 1.216291783714795, "grad_norm": 0.08346657619188938, "learning_rate": 3.992126032215e-06, "loss": 0.0008, "step": 184880 }, { "epoch": 1.2163575718900286, "grad_norm": 0.1183068916092504, "learning_rate": 3.991563712591701e-06, "loss": 0.001, "step": 184890 }, { "epoch": 1.2164233600652619, "grad_norm": 0.030029088143296412, "learning_rate": 3.991001406263765e-06, "loss": 0.0008, "step": 184900 }, { "epoch": 1.2164891482404951, "grad_norm": 0.05804366161093563, "learning_rate": 3.990439113238605e-06, "loss": 0.0006, "step": 184910 }, { "epoch": 1.2165549364157286, "grad_norm": 0.1472952147106056, "learning_rate": 3.989876833523632e-06, "loss": 0.0011, "step": 184920 }, { "epoch": 1.216620724590962, "grad_norm": 0.01877678406463273, "learning_rate": 3.989314567126261e-06, "loss": 0.0005, "step": 184930 }, { "epoch": 1.2166865127661954, "grad_norm": 0.0018886999606405898, "learning_rate": 3.9887523140539055e-06, "loss": 0.0003, "step": 184940 }, { "epoch": 1.2167523009414287, "grad_norm": 0.0009941746870314786, "learning_rate": 3.988190074313976e-06, "loss": 0.0004, "step": 184950 }, { "epoch": 1.2168180891166622, "grad_norm": 0.022171999487464827, "learning_rate": 3.987627847913886e-06, "loss": 0.0004, "step": 184960 }, { "epoch": 1.2168838772918955, "grad_norm": 0.05528648549889959, "learning_rate": 3.98706563486105e-06, "loss": 0.0012, "step": 184970 }, { "epoch": 1.216949665467129, "grad_norm": 0.0367400879688439, "learning_rate": 3.986503435162878e-06, "loss": 0.0006, "step": 184980 }, { "epoch": 1.2170154536423623, "grad_norm": 0.0031540405700114255, "learning_rate": 3.985941248826784e-06, "loss": 0.0008, "step": 184990 }, { "epoch": 1.2170812418175956, "grad_norm": 0.0009260003592950315, "learning_rate": 3.985379075860178e-06, "loss": 0.0006, "step": 185000 }, { "epoch": 1.217147029992829, "grad_norm": 0.01383528679735905, "learning_rate": 3.984816916270472e-06, "loss": 0.0009, "step": 185010 }, { "epoch": 1.2172128181680626, "grad_norm": 0.0027904501883686244, "learning_rate": 3.984254770065078e-06, "loss": 0.0009, "step": 185020 }, { "epoch": 1.2172786063432959, "grad_norm": 0.06352625676473785, "learning_rate": 3.983692637251409e-06, "loss": 0.0015, "step": 185030 }, { "epoch": 1.2173443945185292, "grad_norm": 0.06657451618447205, "learning_rate": 3.983130517836874e-06, "loss": 0.0009, "step": 185040 }, { "epoch": 1.2174101826937627, "grad_norm": 0.04476592156260872, "learning_rate": 3.982568411828886e-06, "loss": 0.0005, "step": 185050 }, { "epoch": 1.217475970868996, "grad_norm": 0.018477453522241317, "learning_rate": 3.982006319234853e-06, "loss": 0.0007, "step": 185060 }, { "epoch": 1.2175417590442295, "grad_norm": 0.009486149203097646, "learning_rate": 3.981444240062189e-06, "loss": 0.0013, "step": 185070 }, { "epoch": 1.2176075472194627, "grad_norm": 0.00046552381695454993, "learning_rate": 3.9808821743183025e-06, "loss": 0.0012, "step": 185080 }, { "epoch": 1.2176733353946962, "grad_norm": 0.004498675213410368, "learning_rate": 3.980320122010603e-06, "loss": 0.0004, "step": 185090 }, { "epoch": 1.2177391235699295, "grad_norm": 0.022078902601765676, "learning_rate": 3.979758083146503e-06, "loss": 0.0005, "step": 185100 }, { "epoch": 1.217804911745163, "grad_norm": 0.03555683774490537, "learning_rate": 3.979196057733412e-06, "loss": 0.0005, "step": 185110 }, { "epoch": 1.2178706999203963, "grad_norm": 0.013800570654905319, "learning_rate": 3.97863404577874e-06, "loss": 0.0003, "step": 185120 }, { "epoch": 1.2179364880956296, "grad_norm": 0.09852617373511698, "learning_rate": 3.9780720472898945e-06, "loss": 0.001, "step": 185130 }, { "epoch": 1.218002276270863, "grad_norm": 0.008297129965206174, "learning_rate": 3.977510062274288e-06, "loss": 0.0014, "step": 185140 }, { "epoch": 1.2180680644460964, "grad_norm": 0.012369110147134469, "learning_rate": 3.9769480907393265e-06, "loss": 0.0011, "step": 185150 }, { "epoch": 1.21813385262133, "grad_norm": 0.07553182011425856, "learning_rate": 3.976386132692422e-06, "loss": 0.0009, "step": 185160 }, { "epoch": 1.2181996407965632, "grad_norm": 0.028508751896986494, "learning_rate": 3.975824188140982e-06, "loss": 0.0005, "step": 185170 }, { "epoch": 1.2182654289717967, "grad_norm": 0.000641323192007189, "learning_rate": 3.975262257092415e-06, "loss": 0.0012, "step": 185180 }, { "epoch": 1.21833121714703, "grad_norm": 0.02682205530601279, "learning_rate": 3.974700339554131e-06, "loss": 0.0005, "step": 185190 }, { "epoch": 1.2183970053222635, "grad_norm": 0.005162530498475633, "learning_rate": 3.974138435533538e-06, "loss": 0.0005, "step": 185200 }, { "epoch": 1.2184627934974968, "grad_norm": 0.08411829512492144, "learning_rate": 3.973576545038042e-06, "loss": 0.0013, "step": 185210 }, { "epoch": 1.21852858167273, "grad_norm": 0.06888827570645689, "learning_rate": 3.973014668075054e-06, "loss": 0.0008, "step": 185220 }, { "epoch": 1.2185943698479635, "grad_norm": 0.009298652671232734, "learning_rate": 3.97245280465198e-06, "loss": 0.0002, "step": 185230 }, { "epoch": 1.2186601580231968, "grad_norm": 0.02636615929768673, "learning_rate": 3.9718909547762295e-06, "loss": 0.0008, "step": 185240 }, { "epoch": 1.2187259461984303, "grad_norm": 0.02173777468357472, "learning_rate": 3.971329118455208e-06, "loss": 0.0004, "step": 185250 }, { "epoch": 1.2187917343736636, "grad_norm": 0.03331558226997516, "learning_rate": 3.9707672956963236e-06, "loss": 0.0006, "step": 185260 }, { "epoch": 1.2188575225488971, "grad_norm": 0.026718984792044365, "learning_rate": 3.9702054865069834e-06, "loss": 0.0007, "step": 185270 }, { "epoch": 1.2189233107241304, "grad_norm": 0.0013727525013472932, "learning_rate": 3.969643690894594e-06, "loss": 0.001, "step": 185280 }, { "epoch": 1.218989098899364, "grad_norm": 0.014661529769482571, "learning_rate": 3.969081908866564e-06, "loss": 0.0006, "step": 185290 }, { "epoch": 1.2190548870745972, "grad_norm": 0.02006783083137524, "learning_rate": 3.968520140430298e-06, "loss": 0.0009, "step": 185300 }, { "epoch": 1.2191206752498305, "grad_norm": 0.0105812590172187, "learning_rate": 3.967958385593203e-06, "loss": 0.0008, "step": 185310 }, { "epoch": 1.219186463425064, "grad_norm": 0.06278868555637374, "learning_rate": 3.967396644362685e-06, "loss": 0.0016, "step": 185320 }, { "epoch": 1.2192522516002973, "grad_norm": 0.011006686804374263, "learning_rate": 3.966834916746151e-06, "loss": 0.0003, "step": 185330 }, { "epoch": 1.2193180397755308, "grad_norm": 0.028311301995878772, "learning_rate": 3.966273202751006e-06, "loss": 0.001, "step": 185340 }, { "epoch": 1.219383827950764, "grad_norm": 0.0221167488586771, "learning_rate": 3.965711502384656e-06, "loss": 0.0008, "step": 185350 }, { "epoch": 1.2194496161259976, "grad_norm": 0.0443971674532511, "learning_rate": 3.965149815654507e-06, "loss": 0.0004, "step": 185360 }, { "epoch": 1.2195154043012308, "grad_norm": 0.010364141372979288, "learning_rate": 3.964588142567963e-06, "loss": 0.0009, "step": 185370 }, { "epoch": 1.2195811924764643, "grad_norm": 0.028807407068043397, "learning_rate": 3.964026483132431e-06, "loss": 0.0005, "step": 185380 }, { "epoch": 1.2196469806516976, "grad_norm": 0.03746825017033677, "learning_rate": 3.9634648373553156e-06, "loss": 0.0005, "step": 185390 }, { "epoch": 1.2197127688269311, "grad_norm": 0.010730283959717136, "learning_rate": 3.96290320524402e-06, "loss": 0.0009, "step": 185400 }, { "epoch": 1.2197785570021644, "grad_norm": 0.014706321853072527, "learning_rate": 3.96234158680595e-06, "loss": 0.0011, "step": 185410 }, { "epoch": 1.219844345177398, "grad_norm": 0.032480258200135316, "learning_rate": 3.96177998204851e-06, "loss": 0.0007, "step": 185420 }, { "epoch": 1.2199101333526312, "grad_norm": 0.3077154315613652, "learning_rate": 3.961218390979105e-06, "loss": 0.0011, "step": 185430 }, { "epoch": 1.2199759215278645, "grad_norm": 0.014001429291840672, "learning_rate": 3.960656813605138e-06, "loss": 0.0012, "step": 185440 }, { "epoch": 1.220041709703098, "grad_norm": 0.16002277562705727, "learning_rate": 3.960095249934014e-06, "loss": 0.0013, "step": 185450 }, { "epoch": 1.2201074978783313, "grad_norm": 0.006541534254550588, "learning_rate": 3.9595336999731345e-06, "loss": 0.0009, "step": 185460 }, { "epoch": 1.2201732860535648, "grad_norm": 0.08722476464196575, "learning_rate": 3.958972163729906e-06, "loss": 0.0011, "step": 185470 }, { "epoch": 1.220239074228798, "grad_norm": 0.037869162805936385, "learning_rate": 3.958410641211729e-06, "loss": 0.0009, "step": 185480 }, { "epoch": 1.2203048624040316, "grad_norm": 0.03876143859951299, "learning_rate": 3.957849132426009e-06, "loss": 0.0005, "step": 185490 }, { "epoch": 1.2203706505792649, "grad_norm": 0.048774425147495984, "learning_rate": 3.9572876373801485e-06, "loss": 0.0006, "step": 185500 }, { "epoch": 1.2204364387544984, "grad_norm": 0.03340478174463277, "learning_rate": 3.956726156081549e-06, "loss": 0.0006, "step": 185510 }, { "epoch": 1.2205022269297316, "grad_norm": 0.02760748245074654, "learning_rate": 3.9561646885376145e-06, "loss": 0.0007, "step": 185520 }, { "epoch": 1.220568015104965, "grad_norm": 0.007360274171394426, "learning_rate": 3.955603234755748e-06, "loss": 0.0013, "step": 185530 }, { "epoch": 1.2206338032801984, "grad_norm": 0.003989008061714521, "learning_rate": 3.9550417947433485e-06, "loss": 0.0004, "step": 185540 }, { "epoch": 1.2206995914554317, "grad_norm": 0.0793413911059787, "learning_rate": 3.9544803685078224e-06, "loss": 0.0005, "step": 185550 }, { "epoch": 1.2207653796306652, "grad_norm": 0.041518315930528925, "learning_rate": 3.953918956056569e-06, "loss": 0.0005, "step": 185560 }, { "epoch": 1.2208311678058985, "grad_norm": 0.00325532555902029, "learning_rate": 3.953357557396991e-06, "loss": 0.0007, "step": 185570 }, { "epoch": 1.220896955981132, "grad_norm": 0.018489483402500364, "learning_rate": 3.95279617253649e-06, "loss": 0.0004, "step": 185580 }, { "epoch": 1.2209627441563653, "grad_norm": 0.02468390619612398, "learning_rate": 3.952234801482467e-06, "loss": 0.0009, "step": 185590 }, { "epoch": 1.2210285323315988, "grad_norm": 0.012394346906398943, "learning_rate": 3.951673444242323e-06, "loss": 0.0002, "step": 185600 }, { "epoch": 1.221094320506832, "grad_norm": 0.015537115555628395, "learning_rate": 3.951112100823458e-06, "loss": 0.0009, "step": 185610 }, { "epoch": 1.2211601086820654, "grad_norm": 0.09136118563175476, "learning_rate": 3.950550771233276e-06, "loss": 0.0011, "step": 185620 }, { "epoch": 1.2212258968572989, "grad_norm": 0.021761565963897948, "learning_rate": 3.949989455479174e-06, "loss": 0.0002, "step": 185630 }, { "epoch": 1.2212916850325322, "grad_norm": 0.03304159558010191, "learning_rate": 3.949428153568555e-06, "loss": 0.0008, "step": 185640 }, { "epoch": 1.2213574732077657, "grad_norm": 0.0024004009058837907, "learning_rate": 3.9488668655088194e-06, "loss": 0.0011, "step": 185650 }, { "epoch": 1.221423261382999, "grad_norm": 0.002384685417226814, "learning_rate": 3.948305591307365e-06, "loss": 0.0006, "step": 185660 }, { "epoch": 1.2214890495582325, "grad_norm": 0.03514839538540361, "learning_rate": 3.9477443309715925e-06, "loss": 0.0009, "step": 185670 }, { "epoch": 1.2215548377334657, "grad_norm": 0.005308674361488982, "learning_rate": 3.947183084508904e-06, "loss": 0.0003, "step": 185680 }, { "epoch": 1.2216206259086992, "grad_norm": 0.06698314540716738, "learning_rate": 3.946621851926697e-06, "loss": 0.0009, "step": 185690 }, { "epoch": 1.2216864140839325, "grad_norm": 0.11235957523069555, "learning_rate": 3.94606063323237e-06, "loss": 0.0013, "step": 185700 }, { "epoch": 1.221752202259166, "grad_norm": 0.1351528019451061, "learning_rate": 3.945499428433325e-06, "loss": 0.0012, "step": 185710 }, { "epoch": 1.2218179904343993, "grad_norm": 0.05600727070078669, "learning_rate": 3.9449382375369586e-06, "loss": 0.001, "step": 185720 }, { "epoch": 1.2218837786096328, "grad_norm": 0.04717266510286601, "learning_rate": 3.94437706055067e-06, "loss": 0.0005, "step": 185730 }, { "epoch": 1.221949566784866, "grad_norm": 0.02812676908020754, "learning_rate": 3.9438158974818565e-06, "loss": 0.0005, "step": 185740 }, { "epoch": 1.2220153549600994, "grad_norm": 0.087532928333154, "learning_rate": 3.94325474833792e-06, "loss": 0.0008, "step": 185750 }, { "epoch": 1.222081143135333, "grad_norm": 0.003314411541348013, "learning_rate": 3.942693613126257e-06, "loss": 0.0008, "step": 185760 }, { "epoch": 1.2221469313105662, "grad_norm": 0.05014769536274234, "learning_rate": 3.942132491854266e-06, "loss": 0.0008, "step": 185770 }, { "epoch": 1.2222127194857997, "grad_norm": 0.004360103653354932, "learning_rate": 3.941571384529344e-06, "loss": 0.0005, "step": 185780 }, { "epoch": 1.222278507661033, "grad_norm": 0.04665662183795137, "learning_rate": 3.941010291158889e-06, "loss": 0.0006, "step": 185790 }, { "epoch": 1.2223442958362665, "grad_norm": 0.0981228658754518, "learning_rate": 3.940449211750298e-06, "loss": 0.0008, "step": 185800 }, { "epoch": 1.2224100840114998, "grad_norm": 0.03311086668035643, "learning_rate": 3.93988814631097e-06, "loss": 0.0009, "step": 185810 }, { "epoch": 1.2224758721867333, "grad_norm": 0.0009893883238603282, "learning_rate": 3.939327094848301e-06, "loss": 0.0005, "step": 185820 }, { "epoch": 1.2225416603619665, "grad_norm": 0.05048210870288754, "learning_rate": 3.938766057369689e-06, "loss": 0.0004, "step": 185830 }, { "epoch": 1.2226074485371998, "grad_norm": 0.012996948814271414, "learning_rate": 3.938205033882529e-06, "loss": 0.0005, "step": 185840 }, { "epoch": 1.2226732367124333, "grad_norm": 0.02514118205354165, "learning_rate": 3.937644024394219e-06, "loss": 0.0009, "step": 185850 }, { "epoch": 1.2227390248876666, "grad_norm": 0.041936252024479016, "learning_rate": 3.937083028912155e-06, "loss": 0.0007, "step": 185860 }, { "epoch": 1.2228048130629001, "grad_norm": 0.01241968804403504, "learning_rate": 3.936522047443732e-06, "loss": 0.0005, "step": 185870 }, { "epoch": 1.2228706012381334, "grad_norm": 0.05861141357825336, "learning_rate": 3.935961079996349e-06, "loss": 0.0008, "step": 185880 }, { "epoch": 1.222936389413367, "grad_norm": 0.029385389408944394, "learning_rate": 3.9354001265774e-06, "loss": 0.0003, "step": 185890 }, { "epoch": 1.2230021775886002, "grad_norm": 0.011572788017049829, "learning_rate": 3.9348391871942805e-06, "loss": 0.0006, "step": 185900 }, { "epoch": 1.2230679657638337, "grad_norm": 0.02479251354231063, "learning_rate": 3.9342782618543865e-06, "loss": 0.0005, "step": 185910 }, { "epoch": 1.223133753939067, "grad_norm": 0.00019553618110031463, "learning_rate": 3.933717350565113e-06, "loss": 0.0005, "step": 185920 }, { "epoch": 1.2231995421143003, "grad_norm": 0.008243286403756158, "learning_rate": 3.933156453333855e-06, "loss": 0.0011, "step": 185930 }, { "epoch": 1.2232653302895338, "grad_norm": 0.006083870265182713, "learning_rate": 3.932595570168008e-06, "loss": 0.0006, "step": 185940 }, { "epoch": 1.223331118464767, "grad_norm": 0.17825766654481695, "learning_rate": 3.932034701074967e-06, "loss": 0.0014, "step": 185950 }, { "epoch": 1.2233969066400006, "grad_norm": 0.001467153622280185, "learning_rate": 3.931473846062126e-06, "loss": 0.0019, "step": 185960 }, { "epoch": 1.2234626948152338, "grad_norm": 0.08158980666018877, "learning_rate": 3.930913005136879e-06, "loss": 0.0011, "step": 185970 }, { "epoch": 1.2235284829904673, "grad_norm": 0.04550518103060078, "learning_rate": 3.930352178306621e-06, "loss": 0.0006, "step": 185980 }, { "epoch": 1.2235942711657006, "grad_norm": 0.002138546790396135, "learning_rate": 3.929791365578747e-06, "loss": 0.0012, "step": 185990 }, { "epoch": 1.2236600593409341, "grad_norm": 0.05596805794486941, "learning_rate": 3.929230566960647e-06, "loss": 0.0002, "step": 186000 }, { "epoch": 1.2237258475161674, "grad_norm": 0.010963919574661389, "learning_rate": 3.928669782459719e-06, "loss": 0.0005, "step": 186010 }, { "epoch": 1.2237916356914007, "grad_norm": 0.018228497446976123, "learning_rate": 3.928109012083355e-06, "loss": 0.0007, "step": 186020 }, { "epoch": 1.2238574238666342, "grad_norm": 0.10420253966588618, "learning_rate": 3.927548255838946e-06, "loss": 0.001, "step": 186030 }, { "epoch": 1.2239232120418677, "grad_norm": 0.08353645412198829, "learning_rate": 3.926987513733889e-06, "loss": 0.001, "step": 186040 }, { "epoch": 1.223989000217101, "grad_norm": 0.019837189524804812, "learning_rate": 3.926426785775574e-06, "loss": 0.0006, "step": 186050 }, { "epoch": 1.2240547883923343, "grad_norm": 0.0005645434187844972, "learning_rate": 3.925866071971396e-06, "loss": 0.0012, "step": 186060 }, { "epoch": 1.2241205765675678, "grad_norm": 0.09112533516948466, "learning_rate": 3.925305372328745e-06, "loss": 0.0017, "step": 186070 }, { "epoch": 1.224186364742801, "grad_norm": 0.05456318649738242, "learning_rate": 3.924744686855014e-06, "loss": 0.0017, "step": 186080 }, { "epoch": 1.2242521529180346, "grad_norm": 0.0002768037995622541, "learning_rate": 3.9241840155575965e-06, "loss": 0.0006, "step": 186090 }, { "epoch": 1.2243179410932679, "grad_norm": 0.0315597750974285, "learning_rate": 3.923623358443882e-06, "loss": 0.0011, "step": 186100 }, { "epoch": 1.2243837292685014, "grad_norm": 0.010670989049149997, "learning_rate": 3.923062715521265e-06, "loss": 0.0008, "step": 186110 }, { "epoch": 1.2244495174437346, "grad_norm": 0.06015853866232626, "learning_rate": 3.922502086797136e-06, "loss": 0.0013, "step": 186120 }, { "epoch": 1.2245153056189682, "grad_norm": 0.014592732716191615, "learning_rate": 3.921941472278887e-06, "loss": 0.0004, "step": 186130 }, { "epoch": 1.2245810937942014, "grad_norm": 0.016635361939150432, "learning_rate": 3.9213808719739075e-06, "loss": 0.0003, "step": 186140 }, { "epoch": 1.2246468819694347, "grad_norm": 0.12795190317642116, "learning_rate": 3.920820285889591e-06, "loss": 0.0013, "step": 186150 }, { "epoch": 1.2247126701446682, "grad_norm": 0.005688472825955197, "learning_rate": 3.920259714033325e-06, "loss": 0.0004, "step": 186160 }, { "epoch": 1.2247784583199015, "grad_norm": 0.040508144748168315, "learning_rate": 3.919699156412504e-06, "loss": 0.0003, "step": 186170 }, { "epoch": 1.224844246495135, "grad_norm": 0.008826788809840395, "learning_rate": 3.919138613034515e-06, "loss": 0.0005, "step": 186180 }, { "epoch": 1.2249100346703683, "grad_norm": 0.05470727704157963, "learning_rate": 3.918578083906752e-06, "loss": 0.0011, "step": 186190 }, { "epoch": 1.2249758228456018, "grad_norm": 0.0688788792438454, "learning_rate": 3.918017569036602e-06, "loss": 0.0006, "step": 186200 }, { "epoch": 1.225041611020835, "grad_norm": 0.02134675409764147, "learning_rate": 3.917457068431456e-06, "loss": 0.0009, "step": 186210 }, { "epoch": 1.2251073991960686, "grad_norm": 0.0983868022103511, "learning_rate": 3.9168965820987024e-06, "loss": 0.0008, "step": 186220 }, { "epoch": 1.2251731873713019, "grad_norm": 0.02766712649331895, "learning_rate": 3.916336110045732e-06, "loss": 0.001, "step": 186230 }, { "epoch": 1.2252389755465352, "grad_norm": 0.10202794433865044, "learning_rate": 3.915775652279935e-06, "loss": 0.0008, "step": 186240 }, { "epoch": 1.2253047637217687, "grad_norm": 0.025595559734567303, "learning_rate": 3.9152152088087e-06, "loss": 0.0008, "step": 186250 }, { "epoch": 1.225370551897002, "grad_norm": 0.03166249511313082, "learning_rate": 3.914654779639415e-06, "loss": 0.0007, "step": 186260 }, { "epoch": 1.2254363400722355, "grad_norm": 0.02436240218148503, "learning_rate": 3.91409436477947e-06, "loss": 0.0009, "step": 186270 }, { "epoch": 1.2255021282474687, "grad_norm": 0.00540487316909036, "learning_rate": 3.913533964236253e-06, "loss": 0.0007, "step": 186280 }, { "epoch": 1.2255679164227022, "grad_norm": 0.03374755104474817, "learning_rate": 3.912973578017151e-06, "loss": 0.0013, "step": 186290 }, { "epoch": 1.2256337045979355, "grad_norm": 0.04041257007489492, "learning_rate": 3.9124132061295535e-06, "loss": 0.0007, "step": 186300 }, { "epoch": 1.225699492773169, "grad_norm": 0.01000720791166829, "learning_rate": 3.91185284858085e-06, "loss": 0.0003, "step": 186310 }, { "epoch": 1.2257652809484023, "grad_norm": 0.05683424579625587, "learning_rate": 3.911292505378427e-06, "loss": 0.0004, "step": 186320 }, { "epoch": 1.2258310691236356, "grad_norm": 0.022830273655288023, "learning_rate": 3.910732176529671e-06, "loss": 0.0003, "step": 186330 }, { "epoch": 1.225896857298869, "grad_norm": 0.03223950475960824, "learning_rate": 3.910171862041971e-06, "loss": 0.0006, "step": 186340 }, { "epoch": 1.2259626454741026, "grad_norm": 0.0006239609146183577, "learning_rate": 3.909611561922713e-06, "loss": 0.0006, "step": 186350 }, { "epoch": 1.226028433649336, "grad_norm": 0.04525070490460655, "learning_rate": 3.909051276179285e-06, "loss": 0.0007, "step": 186360 }, { "epoch": 1.2260942218245692, "grad_norm": 0.023748792850851325, "learning_rate": 3.908491004819075e-06, "loss": 0.0006, "step": 186370 }, { "epoch": 1.2261600099998027, "grad_norm": 0.06375206906101703, "learning_rate": 3.9079307478494675e-06, "loss": 0.0005, "step": 186380 }, { "epoch": 1.226225798175036, "grad_norm": 0.04960472225321022, "learning_rate": 3.90737050527785e-06, "loss": 0.0005, "step": 186390 }, { "epoch": 1.2262915863502695, "grad_norm": 0.07109338346023651, "learning_rate": 3.906810277111609e-06, "loss": 0.0014, "step": 186400 }, { "epoch": 1.2263573745255028, "grad_norm": 0.0153678671005992, "learning_rate": 3.90625006335813e-06, "loss": 0.0003, "step": 186410 }, { "epoch": 1.2264231627007363, "grad_norm": 0.023165012114968134, "learning_rate": 3.905689864024799e-06, "loss": 0.0008, "step": 186420 }, { "epoch": 1.2264889508759695, "grad_norm": 0.03642437236796974, "learning_rate": 3.905129679119002e-06, "loss": 0.0009, "step": 186430 }, { "epoch": 1.226554739051203, "grad_norm": 0.04522681531497602, "learning_rate": 3.904569508648126e-06, "loss": 0.0012, "step": 186440 }, { "epoch": 1.2266205272264363, "grad_norm": 0.0028872909375804497, "learning_rate": 3.904009352619554e-06, "loss": 0.0005, "step": 186450 }, { "epoch": 1.2266863154016696, "grad_norm": 0.152201273487395, "learning_rate": 3.903449211040672e-06, "loss": 0.002, "step": 186460 }, { "epoch": 1.2267521035769031, "grad_norm": 0.012019891034732952, "learning_rate": 3.902889083918865e-06, "loss": 0.0006, "step": 186470 }, { "epoch": 1.2268178917521364, "grad_norm": 0.051588069530749736, "learning_rate": 3.902328971261519e-06, "loss": 0.0007, "step": 186480 }, { "epoch": 1.22688367992737, "grad_norm": 0.015123118671724686, "learning_rate": 3.901768873076015e-06, "loss": 0.0004, "step": 186490 }, { "epoch": 1.2269494681026032, "grad_norm": 0.14242868638303632, "learning_rate": 3.901208789369742e-06, "loss": 0.0017, "step": 186500 }, { "epoch": 1.2270152562778367, "grad_norm": 0.045770246254659176, "learning_rate": 3.900648720150082e-06, "loss": 0.0006, "step": 186510 }, { "epoch": 1.22708104445307, "grad_norm": 0.002864455136698824, "learning_rate": 3.9000886654244186e-06, "loss": 0.0007, "step": 186520 }, { "epoch": 1.2271468326283035, "grad_norm": 0.0007332212915802767, "learning_rate": 3.899528625200137e-06, "loss": 0.0009, "step": 186530 }, { "epoch": 1.2272126208035368, "grad_norm": 0.04515187010054939, "learning_rate": 3.898968599484619e-06, "loss": 0.0012, "step": 186540 }, { "epoch": 1.22727840897877, "grad_norm": 0.016347418243086484, "learning_rate": 3.898408588285249e-06, "loss": 0.0005, "step": 186550 }, { "epoch": 1.2273441971540036, "grad_norm": 0.020790446065773273, "learning_rate": 3.897848591609411e-06, "loss": 0.0008, "step": 186560 }, { "epoch": 1.2274099853292368, "grad_norm": 0.008912341296085351, "learning_rate": 3.897288609464487e-06, "loss": 0.0004, "step": 186570 }, { "epoch": 1.2274757735044703, "grad_norm": 0.017411186255703228, "learning_rate": 3.896728641857862e-06, "loss": 0.0006, "step": 186580 }, { "epoch": 1.2275415616797036, "grad_norm": 0.004948352289035384, "learning_rate": 3.896168688796915e-06, "loss": 0.001, "step": 186590 }, { "epoch": 1.2276073498549371, "grad_norm": 0.009851353243190513, "learning_rate": 3.8956087502890316e-06, "loss": 0.0011, "step": 186600 }, { "epoch": 1.2276731380301704, "grad_norm": 0.0010796808588884438, "learning_rate": 3.895048826341593e-06, "loss": 0.0009, "step": 186610 }, { "epoch": 1.227738926205404, "grad_norm": 0.03701157636791244, "learning_rate": 3.89448891696198e-06, "loss": 0.001, "step": 186620 }, { "epoch": 1.2278047143806372, "grad_norm": 0.014373542952521324, "learning_rate": 3.893929022157577e-06, "loss": 0.0012, "step": 186630 }, { "epoch": 1.2278705025558705, "grad_norm": 0.004297812928529593, "learning_rate": 3.893369141935764e-06, "loss": 0.0004, "step": 186640 }, { "epoch": 1.227936290731104, "grad_norm": 0.03541781910254699, "learning_rate": 3.8928092763039235e-06, "loss": 0.0007, "step": 186650 }, { "epoch": 1.2280020789063373, "grad_norm": 0.03288516664672759, "learning_rate": 3.892249425269437e-06, "loss": 0.0005, "step": 186660 }, { "epoch": 1.2280678670815708, "grad_norm": 0.04056472445896471, "learning_rate": 3.891689588839685e-06, "loss": 0.0004, "step": 186670 }, { "epoch": 1.228133655256804, "grad_norm": 0.006241999481342423, "learning_rate": 3.891129767022047e-06, "loss": 0.0004, "step": 186680 }, { "epoch": 1.2281994434320376, "grad_norm": 0.010293349341638412, "learning_rate": 3.890569959823907e-06, "loss": 0.0004, "step": 186690 }, { "epoch": 1.2282652316072709, "grad_norm": 0.017160014418490452, "learning_rate": 3.890010167252644e-06, "loss": 0.0016, "step": 186700 }, { "epoch": 1.2283310197825044, "grad_norm": 0.036481531509429004, "learning_rate": 3.889450389315637e-06, "loss": 0.0007, "step": 186710 }, { "epoch": 1.2283968079577376, "grad_norm": 0.010682798457814799, "learning_rate": 3.8888906260202685e-06, "loss": 0.0005, "step": 186720 }, { "epoch": 1.2284625961329712, "grad_norm": 0.03055052723405778, "learning_rate": 3.888330877373917e-06, "loss": 0.0026, "step": 186730 }, { "epoch": 1.2285283843082044, "grad_norm": 0.02226756784500937, "learning_rate": 3.887771143383963e-06, "loss": 0.0006, "step": 186740 }, { "epoch": 1.228594172483438, "grad_norm": 0.025398513270066533, "learning_rate": 3.887211424057785e-06, "loss": 0.0006, "step": 186750 }, { "epoch": 1.2286599606586712, "grad_norm": 0.02647527386120776, "learning_rate": 3.8866517194027645e-06, "loss": 0.0009, "step": 186760 }, { "epoch": 1.2287257488339045, "grad_norm": 0.01935961243895915, "learning_rate": 3.886092029426279e-06, "loss": 0.0003, "step": 186770 }, { "epoch": 1.228791537009138, "grad_norm": 0.03850594267745355, "learning_rate": 3.885532354135709e-06, "loss": 0.001, "step": 186780 }, { "epoch": 1.2288573251843713, "grad_norm": 0.00999500158000922, "learning_rate": 3.884972693538431e-06, "loss": 0.0003, "step": 186790 }, { "epoch": 1.2289231133596048, "grad_norm": 0.015938254610216702, "learning_rate": 3.8844130476418265e-06, "loss": 0.0007, "step": 186800 }, { "epoch": 1.228988901534838, "grad_norm": 0.034841611101515256, "learning_rate": 3.883853416453271e-06, "loss": 0.0011, "step": 186810 }, { "epoch": 1.2290546897100716, "grad_norm": 0.021125284879760606, "learning_rate": 3.883293799980145e-06, "loss": 0.001, "step": 186820 }, { "epoch": 1.2291204778853049, "grad_norm": 0.023252765968217144, "learning_rate": 3.882734198229825e-06, "loss": 0.0012, "step": 186830 }, { "epoch": 1.2291862660605384, "grad_norm": 0.032357950805229845, "learning_rate": 3.882174611209691e-06, "loss": 0.0007, "step": 186840 }, { "epoch": 1.2292520542357717, "grad_norm": 0.04148044431121602, "learning_rate": 3.881615038927118e-06, "loss": 0.0007, "step": 186850 }, { "epoch": 1.229317842411005, "grad_norm": 0.15858534859922951, "learning_rate": 3.881055481389487e-06, "loss": 0.0007, "step": 186860 }, { "epoch": 1.2293836305862385, "grad_norm": 0.008990679062829288, "learning_rate": 3.880495938604171e-06, "loss": 0.001, "step": 186870 }, { "epoch": 1.2294494187614717, "grad_norm": 0.2596127079670077, "learning_rate": 3.8799364105785485e-06, "loss": 0.0009, "step": 186880 }, { "epoch": 1.2295152069367052, "grad_norm": 0.0022515102676692504, "learning_rate": 3.879376897319999e-06, "loss": 0.0006, "step": 186890 }, { "epoch": 1.2295809951119385, "grad_norm": 0.06827735761859495, "learning_rate": 3.878817398835896e-06, "loss": 0.0006, "step": 186900 }, { "epoch": 1.229646783287172, "grad_norm": 0.013403453371056473, "learning_rate": 3.878257915133618e-06, "loss": 0.0004, "step": 186910 }, { "epoch": 1.2297125714624053, "grad_norm": 0.11479234884413579, "learning_rate": 3.877698446220542e-06, "loss": 0.0028, "step": 186920 }, { "epoch": 1.2297783596376388, "grad_norm": 0.08061882014737784, "learning_rate": 3.877138992104041e-06, "loss": 0.0012, "step": 186930 }, { "epoch": 1.229844147812872, "grad_norm": 0.05244108378559426, "learning_rate": 3.876579552791491e-06, "loss": 0.0009, "step": 186940 }, { "epoch": 1.2299099359881054, "grad_norm": 0.05150994206989583, "learning_rate": 3.876020128290271e-06, "loss": 0.0012, "step": 186950 }, { "epoch": 1.229975724163339, "grad_norm": 0.9301925992639397, "learning_rate": 3.875460718607756e-06, "loss": 0.0004, "step": 186960 }, { "epoch": 1.2300415123385722, "grad_norm": 0.009355539384349864, "learning_rate": 3.874901323751319e-06, "loss": 0.0005, "step": 186970 }, { "epoch": 1.2301073005138057, "grad_norm": 0.0513245142394063, "learning_rate": 3.874341943728337e-06, "loss": 0.0005, "step": 186980 }, { "epoch": 1.230173088689039, "grad_norm": 0.0038592312096461185, "learning_rate": 3.873782578546184e-06, "loss": 0.0009, "step": 186990 }, { "epoch": 1.2302388768642725, "grad_norm": 0.020902058311700788, "learning_rate": 3.873223228212235e-06, "loss": 0.0006, "step": 187000 }, { "epoch": 1.2303046650395058, "grad_norm": 0.0032470392373279092, "learning_rate": 3.8726638927338635e-06, "loss": 0.0004, "step": 187010 }, { "epoch": 1.2303704532147393, "grad_norm": 0.03463189360126752, "learning_rate": 3.8721045721184455e-06, "loss": 0.0014, "step": 187020 }, { "epoch": 1.2304362413899725, "grad_norm": 0.02172973281826288, "learning_rate": 3.871545266373356e-06, "loss": 0.0015, "step": 187030 }, { "epoch": 1.230502029565206, "grad_norm": 0.021740382032893395, "learning_rate": 3.870985975505966e-06, "loss": 0.0003, "step": 187040 }, { "epoch": 1.2305678177404393, "grad_norm": 0.03030106032845882, "learning_rate": 3.8704266995236525e-06, "loss": 0.0011, "step": 187050 }, { "epoch": 1.2306336059156728, "grad_norm": 0.020925991796548766, "learning_rate": 3.869867438433786e-06, "loss": 0.0006, "step": 187060 }, { "epoch": 1.2306993940909061, "grad_norm": 0.011646891364756051, "learning_rate": 3.869308192243741e-06, "loss": 0.0006, "step": 187070 }, { "epoch": 1.2307651822661394, "grad_norm": 0.024320995282065678, "learning_rate": 3.868748960960892e-06, "loss": 0.0004, "step": 187080 }, { "epoch": 1.230830970441373, "grad_norm": 0.010610211366903908, "learning_rate": 3.86818974459261e-06, "loss": 0.0005, "step": 187090 }, { "epoch": 1.2308967586166062, "grad_norm": 0.02697870957737893, "learning_rate": 3.8676305431462694e-06, "loss": 0.0007, "step": 187100 }, { "epoch": 1.2309625467918397, "grad_norm": 0.023528957399608086, "learning_rate": 3.867071356629243e-06, "loss": 0.0005, "step": 187110 }, { "epoch": 1.231028334967073, "grad_norm": 0.08601199573541955, "learning_rate": 3.866512185048901e-06, "loss": 0.0015, "step": 187120 }, { "epoch": 1.2310941231423065, "grad_norm": 0.05086415023495587, "learning_rate": 3.865953028412617e-06, "loss": 0.0006, "step": 187130 }, { "epoch": 1.2311599113175398, "grad_norm": 0.003127781194796355, "learning_rate": 3.865393886727762e-06, "loss": 0.0005, "step": 187140 }, { "epoch": 1.2312256994927733, "grad_norm": 0.010157821540853073, "learning_rate": 3.86483476000171e-06, "loss": 0.001, "step": 187150 }, { "epoch": 1.2312914876680066, "grad_norm": 0.08792684249976804, "learning_rate": 3.864275648241831e-06, "loss": 0.001, "step": 187160 }, { "epoch": 1.2313572758432398, "grad_norm": 0.037720221453019204, "learning_rate": 3.863716551455497e-06, "loss": 0.0008, "step": 187170 }, { "epoch": 1.2314230640184733, "grad_norm": 0.04373553760178954, "learning_rate": 3.863157469650079e-06, "loss": 0.0008, "step": 187180 }, { "epoch": 1.2314888521937066, "grad_norm": 0.00038506392579298556, "learning_rate": 3.862598402832948e-06, "loss": 0.0009, "step": 187190 }, { "epoch": 1.2315546403689401, "grad_norm": 0.005696293750688185, "learning_rate": 3.862039351011473e-06, "loss": 0.0011, "step": 187200 }, { "epoch": 1.2316204285441734, "grad_norm": 0.009087366212765213, "learning_rate": 3.8614803141930275e-06, "loss": 0.0011, "step": 187210 }, { "epoch": 1.231686216719407, "grad_norm": 0.02452827135177428, "learning_rate": 3.860921292384981e-06, "loss": 0.0011, "step": 187220 }, { "epoch": 1.2317520048946402, "grad_norm": 0.04793665269879226, "learning_rate": 3.860362285594703e-06, "loss": 0.0013, "step": 187230 }, { "epoch": 1.2318177930698737, "grad_norm": 0.014647554911461316, "learning_rate": 3.859803293829565e-06, "loss": 0.0007, "step": 187240 }, { "epoch": 1.231883581245107, "grad_norm": 0.0311704579709879, "learning_rate": 3.859244317096936e-06, "loss": 0.0009, "step": 187250 }, { "epoch": 1.2319493694203403, "grad_norm": 0.03917846476322903, "learning_rate": 3.858685355404185e-06, "loss": 0.0008, "step": 187260 }, { "epoch": 1.2320151575955738, "grad_norm": 0.0944105235296555, "learning_rate": 3.85812640875868e-06, "loss": 0.0006, "step": 187270 }, { "epoch": 1.232080945770807, "grad_norm": 0.04747462017495707, "learning_rate": 3.857567477167794e-06, "loss": 0.0006, "step": 187280 }, { "epoch": 1.2321467339460406, "grad_norm": 0.03660152353034232, "learning_rate": 3.857008560638894e-06, "loss": 0.001, "step": 187290 }, { "epoch": 1.2322125221212739, "grad_norm": 0.013183390538274898, "learning_rate": 3.856449659179349e-06, "loss": 0.0023, "step": 187300 }, { "epoch": 1.2322783102965074, "grad_norm": 0.07152269174362333, "learning_rate": 3.8558907727965275e-06, "loss": 0.001, "step": 187310 }, { "epoch": 1.2323440984717406, "grad_norm": 0.004957232598415888, "learning_rate": 3.855331901497798e-06, "loss": 0.0008, "step": 187320 }, { "epoch": 1.2324098866469742, "grad_norm": 0.0007987940257721694, "learning_rate": 3.854773045290528e-06, "loss": 0.001, "step": 187330 }, { "epoch": 1.2324756748222074, "grad_norm": 0.019168457711015278, "learning_rate": 3.854214204182087e-06, "loss": 0.001, "step": 187340 }, { "epoch": 1.2325414629974407, "grad_norm": 0.01621808022283334, "learning_rate": 3.853655378179843e-06, "loss": 0.0006, "step": 187350 }, { "epoch": 1.2326072511726742, "grad_norm": 0.03722823096430512, "learning_rate": 3.853096567291161e-06, "loss": 0.0005, "step": 187360 }, { "epoch": 1.2326730393479077, "grad_norm": 0.16842445569860479, "learning_rate": 3.852537771523412e-06, "loss": 0.0016, "step": 187370 }, { "epoch": 1.232738827523141, "grad_norm": 0.005776282214873291, "learning_rate": 3.851978990883961e-06, "loss": 0.0006, "step": 187380 }, { "epoch": 1.2328046156983743, "grad_norm": 0.02594405302470401, "learning_rate": 3.8514202253801756e-06, "loss": 0.0002, "step": 187390 }, { "epoch": 1.2328704038736078, "grad_norm": 0.013727007191398325, "learning_rate": 3.850861475019421e-06, "loss": 0.0009, "step": 187400 }, { "epoch": 1.232936192048841, "grad_norm": 0.05062933813389435, "learning_rate": 3.8503027398090665e-06, "loss": 0.0003, "step": 187410 }, { "epoch": 1.2330019802240746, "grad_norm": 0.016814772318620882, "learning_rate": 3.849744019756478e-06, "loss": 0.0012, "step": 187420 }, { "epoch": 1.2330677683993079, "grad_norm": 0.03690016587841464, "learning_rate": 3.8491853148690205e-06, "loss": 0.0008, "step": 187430 }, { "epoch": 1.2331335565745414, "grad_norm": 0.02285594541405007, "learning_rate": 3.848626625154061e-06, "loss": 0.0005, "step": 187440 }, { "epoch": 1.2331993447497747, "grad_norm": 0.021403247879463884, "learning_rate": 3.848067950618965e-06, "loss": 0.0009, "step": 187450 }, { "epoch": 1.2332651329250082, "grad_norm": 0.030109454286162074, "learning_rate": 3.847509291271097e-06, "loss": 0.0004, "step": 187460 }, { "epoch": 1.2333309211002415, "grad_norm": 0.008396690301299766, "learning_rate": 3.846950647117826e-06, "loss": 0.0004, "step": 187470 }, { "epoch": 1.2333967092754747, "grad_norm": 0.02427976277170772, "learning_rate": 3.846392018166513e-06, "loss": 0.0008, "step": 187480 }, { "epoch": 1.2334624974507082, "grad_norm": 0.10082294454176882, "learning_rate": 3.845833404424526e-06, "loss": 0.0008, "step": 187490 }, { "epoch": 1.2335282856259415, "grad_norm": 0.03022714321213958, "learning_rate": 3.845274805899228e-06, "loss": 0.0008, "step": 187500 }, { "epoch": 1.233594073801175, "grad_norm": 0.03961789403322278, "learning_rate": 3.844716222597985e-06, "loss": 0.0004, "step": 187510 }, { "epoch": 1.2336598619764083, "grad_norm": 0.03190240677328116, "learning_rate": 3.844157654528161e-06, "loss": 0.0006, "step": 187520 }, { "epoch": 1.2337256501516418, "grad_norm": 0.004416556358028407, "learning_rate": 3.843599101697118e-06, "loss": 0.0006, "step": 187530 }, { "epoch": 1.233791438326875, "grad_norm": 0.006338225030464686, "learning_rate": 3.843040564112224e-06, "loss": 0.0006, "step": 187540 }, { "epoch": 1.2338572265021086, "grad_norm": 0.014099204513792622, "learning_rate": 3.842482041780841e-06, "loss": 0.0002, "step": 187550 }, { "epoch": 1.233923014677342, "grad_norm": 0.030662054982239038, "learning_rate": 3.8419235347103325e-06, "loss": 0.0005, "step": 187560 }, { "epoch": 1.2339888028525752, "grad_norm": 0.045515625724317506, "learning_rate": 3.841365042908062e-06, "loss": 0.0007, "step": 187570 }, { "epoch": 1.2340545910278087, "grad_norm": 0.007035262846895493, "learning_rate": 3.840806566381393e-06, "loss": 0.0004, "step": 187580 }, { "epoch": 1.234120379203042, "grad_norm": 0.03900419136503709, "learning_rate": 3.8402481051376865e-06, "loss": 0.0005, "step": 187590 }, { "epoch": 1.2341861673782755, "grad_norm": 0.04986872011412271, "learning_rate": 3.839689659184309e-06, "loss": 0.0004, "step": 187600 }, { "epoch": 1.2342519555535088, "grad_norm": 0.01225608106717605, "learning_rate": 3.83913122852862e-06, "loss": 0.0005, "step": 187610 }, { "epoch": 1.2343177437287423, "grad_norm": 0.01574767293098518, "learning_rate": 3.838572813177984e-06, "loss": 0.0004, "step": 187620 }, { "epoch": 1.2343835319039755, "grad_norm": 0.007227790786169507, "learning_rate": 3.838014413139763e-06, "loss": 0.0005, "step": 187630 }, { "epoch": 1.234449320079209, "grad_norm": 0.018825193324909034, "learning_rate": 3.837456028421318e-06, "loss": 0.0005, "step": 187640 }, { "epoch": 1.2345151082544423, "grad_norm": 0.0013059030466744763, "learning_rate": 3.836897659030011e-06, "loss": 0.0002, "step": 187650 }, { "epoch": 1.2345808964296756, "grad_norm": 0.0637863860357773, "learning_rate": 3.836339304973202e-06, "loss": 0.0014, "step": 187660 }, { "epoch": 1.2346466846049091, "grad_norm": 0.005673696543754541, "learning_rate": 3.8357809662582565e-06, "loss": 0.0005, "step": 187670 }, { "epoch": 1.2347124727801424, "grad_norm": 0.002933441886629519, "learning_rate": 3.835222642892533e-06, "loss": 0.0004, "step": 187680 }, { "epoch": 1.234778260955376, "grad_norm": 0.08006449949284263, "learning_rate": 3.834664334883393e-06, "loss": 0.0008, "step": 187690 }, { "epoch": 1.2348440491306092, "grad_norm": 0.004269502815826416, "learning_rate": 3.834106042238197e-06, "loss": 0.0012, "step": 187700 }, { "epoch": 1.2349098373058427, "grad_norm": 0.01917652682348148, "learning_rate": 3.833547764964306e-06, "loss": 0.0006, "step": 187710 }, { "epoch": 1.234975625481076, "grad_norm": 0.029454505827596948, "learning_rate": 3.832989503069081e-06, "loss": 0.0005, "step": 187720 }, { "epoch": 1.2350414136563095, "grad_norm": 0.07228067899171152, "learning_rate": 3.83243125655988e-06, "loss": 0.0004, "step": 187730 }, { "epoch": 1.2351072018315428, "grad_norm": 0.02720672484929403, "learning_rate": 3.831873025444066e-06, "loss": 0.0005, "step": 187740 }, { "epoch": 1.2351729900067763, "grad_norm": 0.01812605540522211, "learning_rate": 3.831314809728996e-06, "loss": 0.0008, "step": 187750 }, { "epoch": 1.2352387781820096, "grad_norm": 0.006196862000425318, "learning_rate": 3.830756609422032e-06, "loss": 0.0005, "step": 187760 }, { "epoch": 1.235304566357243, "grad_norm": 0.0003965438176682717, "learning_rate": 3.830198424530533e-06, "loss": 0.0006, "step": 187770 }, { "epoch": 1.2353703545324763, "grad_norm": 0.012239225492078825, "learning_rate": 3.829640255061855e-06, "loss": 0.0006, "step": 187780 }, { "epoch": 1.2354361427077096, "grad_norm": 0.007707424618574439, "learning_rate": 3.82908210102336e-06, "loss": 0.0007, "step": 187790 }, { "epoch": 1.2355019308829431, "grad_norm": 0.04410544379046653, "learning_rate": 3.828523962422407e-06, "loss": 0.0006, "step": 187800 }, { "epoch": 1.2355677190581764, "grad_norm": 0.003907565654660589, "learning_rate": 3.8279658392663545e-06, "loss": 0.0021, "step": 187810 }, { "epoch": 1.23563350723341, "grad_norm": 0.013856222328524521, "learning_rate": 3.82740773156256e-06, "loss": 0.0015, "step": 187820 }, { "epoch": 1.2356992954086432, "grad_norm": 0.0095080993826505, "learning_rate": 3.826849639318382e-06, "loss": 0.0003, "step": 187830 }, { "epoch": 1.2357650835838767, "grad_norm": 0.02347113874382274, "learning_rate": 3.826291562541177e-06, "loss": 0.0004, "step": 187840 }, { "epoch": 1.23583087175911, "grad_norm": 0.10319141767140347, "learning_rate": 3.825733501238304e-06, "loss": 0.001, "step": 187850 }, { "epoch": 1.2358966599343435, "grad_norm": 0.2799713579673629, "learning_rate": 3.825175455417121e-06, "loss": 0.0013, "step": 187860 }, { "epoch": 1.2359624481095768, "grad_norm": 0.04909187059134466, "learning_rate": 3.824617425084985e-06, "loss": 0.001, "step": 187870 }, { "epoch": 1.23602823628481, "grad_norm": 0.008801652942063623, "learning_rate": 3.824059410249254e-06, "loss": 0.0007, "step": 187880 }, { "epoch": 1.2360940244600436, "grad_norm": 0.0008330587338580127, "learning_rate": 3.823501410917284e-06, "loss": 0.0006, "step": 187890 }, { "epoch": 1.2361598126352769, "grad_norm": 0.026080629607082503, "learning_rate": 3.822943427096432e-06, "loss": 0.001, "step": 187900 }, { "epoch": 1.2362256008105104, "grad_norm": 0.01019063338051558, "learning_rate": 3.822385458794053e-06, "loss": 0.0003, "step": 187910 }, { "epoch": 1.2362913889857436, "grad_norm": 0.017865186008987868, "learning_rate": 3.821827506017505e-06, "loss": 0.0008, "step": 187920 }, { "epoch": 1.2363571771609771, "grad_norm": 0.011607264383545717, "learning_rate": 3.821269568774144e-06, "loss": 0.0007, "step": 187930 }, { "epoch": 1.2364229653362104, "grad_norm": 0.02495595335088708, "learning_rate": 3.820711647071328e-06, "loss": 0.0004, "step": 187940 }, { "epoch": 1.236488753511444, "grad_norm": 0.015433455485912215, "learning_rate": 3.820153740916409e-06, "loss": 0.0009, "step": 187950 }, { "epoch": 1.2365545416866772, "grad_norm": 0.0022222033080489044, "learning_rate": 3.819595850316742e-06, "loss": 0.0005, "step": 187960 }, { "epoch": 1.2366203298619105, "grad_norm": 0.016096380574812816, "learning_rate": 3.819037975279686e-06, "loss": 0.0011, "step": 187970 }, { "epoch": 1.236686118037144, "grad_norm": 0.01615325751570107, "learning_rate": 3.818480115812593e-06, "loss": 0.0004, "step": 187980 }, { "epoch": 1.2367519062123773, "grad_norm": 0.006285769310119607, "learning_rate": 3.817922271922821e-06, "loss": 0.001, "step": 187990 }, { "epoch": 1.2368176943876108, "grad_norm": 0.021802852003277032, "learning_rate": 3.817364443617722e-06, "loss": 0.0003, "step": 188000 }, { "epoch": 1.236883482562844, "grad_norm": 0.02691605304211208, "learning_rate": 3.816806630904652e-06, "loss": 0.0011, "step": 188010 }, { "epoch": 1.2369492707380776, "grad_norm": 0.07650553993030007, "learning_rate": 3.816248833790962e-06, "loss": 0.0015, "step": 188020 }, { "epoch": 1.2370150589133109, "grad_norm": 0.010062306500242656, "learning_rate": 3.815691052284011e-06, "loss": 0.0006, "step": 188030 }, { "epoch": 1.2370808470885444, "grad_norm": 0.016652160178925186, "learning_rate": 3.815133286391151e-06, "loss": 0.0012, "step": 188040 }, { "epoch": 1.2371466352637777, "grad_norm": 0.07800342926864408, "learning_rate": 3.814575536119735e-06, "loss": 0.0013, "step": 188050 }, { "epoch": 1.2372124234390112, "grad_norm": 0.06862259222562642, "learning_rate": 3.8140178014771166e-06, "loss": 0.0009, "step": 188060 }, { "epoch": 1.2372782116142444, "grad_norm": 0.05518430790339251, "learning_rate": 3.813460082470649e-06, "loss": 0.0008, "step": 188070 }, { "epoch": 1.237343999789478, "grad_norm": 8.869964597252872e-05, "learning_rate": 3.8129023791076865e-06, "loss": 0.0005, "step": 188080 }, { "epoch": 1.2374097879647112, "grad_norm": 0.05363426259822904, "learning_rate": 3.8123446913955787e-06, "loss": 0.0005, "step": 188090 }, { "epoch": 1.2374755761399445, "grad_norm": 0.016948961318571914, "learning_rate": 3.8117870193416818e-06, "loss": 0.0015, "step": 188100 }, { "epoch": 1.237541364315178, "grad_norm": 0.0004108692966609689, "learning_rate": 3.811229362953347e-06, "loss": 0.001, "step": 188110 }, { "epoch": 1.2376071524904113, "grad_norm": 0.1677472622654228, "learning_rate": 3.8106717222379264e-06, "loss": 0.0012, "step": 188120 }, { "epoch": 1.2376729406656448, "grad_norm": 0.030198762872071874, "learning_rate": 3.8101140972027716e-06, "loss": 0.0007, "step": 188130 }, { "epoch": 1.237738728840878, "grad_norm": 0.012977307003307919, "learning_rate": 3.8095564878552353e-06, "loss": 0.0004, "step": 188140 }, { "epoch": 1.2378045170161116, "grad_norm": 0.01369763898929107, "learning_rate": 3.8089988942026673e-06, "loss": 0.0005, "step": 188150 }, { "epoch": 1.2378703051913449, "grad_norm": 0.009757008677519302, "learning_rate": 3.8084413162524215e-06, "loss": 0.0005, "step": 188160 }, { "epoch": 1.2379360933665784, "grad_norm": 0.002804172675955961, "learning_rate": 3.8078837540118476e-06, "loss": 0.0003, "step": 188170 }, { "epoch": 1.2380018815418117, "grad_norm": 0.03260065055599391, "learning_rate": 3.8073262074882965e-06, "loss": 0.0011, "step": 188180 }, { "epoch": 1.238067669717045, "grad_norm": 0.05968984079600782, "learning_rate": 3.8067686766891197e-06, "loss": 0.0007, "step": 188190 }, { "epoch": 1.2381334578922785, "grad_norm": 0.07863533486526733, "learning_rate": 3.8062111616216674e-06, "loss": 0.0006, "step": 188200 }, { "epoch": 1.2381992460675117, "grad_norm": 0.02994361941551417, "learning_rate": 3.8056536622932895e-06, "loss": 0.0005, "step": 188210 }, { "epoch": 1.2382650342427453, "grad_norm": 0.04044667376654609, "learning_rate": 3.8050961787113363e-06, "loss": 0.0006, "step": 188220 }, { "epoch": 1.2383308224179785, "grad_norm": 0.04323303135603063, "learning_rate": 3.8045387108831582e-06, "loss": 0.0005, "step": 188230 }, { "epoch": 1.238396610593212, "grad_norm": 0.053305922354309775, "learning_rate": 3.803981258816105e-06, "loss": 0.0006, "step": 188240 }, { "epoch": 1.2384623987684453, "grad_norm": 0.0037303236231395003, "learning_rate": 3.8034238225175262e-06, "loss": 0.0007, "step": 188250 }, { "epoch": 1.2385281869436788, "grad_norm": 0.05765840992853583, "learning_rate": 3.8028664019947703e-06, "loss": 0.0011, "step": 188260 }, { "epoch": 1.2385939751189121, "grad_norm": 0.010973404331639111, "learning_rate": 3.8023089972551872e-06, "loss": 0.0004, "step": 188270 }, { "epoch": 1.2386597632941454, "grad_norm": 0.011124376826290402, "learning_rate": 3.8017516083061253e-06, "loss": 0.0004, "step": 188280 }, { "epoch": 1.238725551469379, "grad_norm": 0.03548966881126681, "learning_rate": 3.8011942351549343e-06, "loss": 0.0007, "step": 188290 }, { "epoch": 1.2387913396446122, "grad_norm": 0.06284225587401127, "learning_rate": 3.8006368778089613e-06, "loss": 0.0006, "step": 188300 }, { "epoch": 1.2388571278198457, "grad_norm": 0.03590508925624189, "learning_rate": 3.8000795362755556e-06, "loss": 0.0008, "step": 188310 }, { "epoch": 1.238922915995079, "grad_norm": 0.10187939758537057, "learning_rate": 3.799522210562065e-06, "loss": 0.0009, "step": 188320 }, { "epoch": 1.2389887041703125, "grad_norm": 0.07886749274141601, "learning_rate": 3.798964900675837e-06, "loss": 0.0012, "step": 188330 }, { "epoch": 1.2390544923455458, "grad_norm": 0.03574342891643986, "learning_rate": 3.79840760662422e-06, "loss": 0.0005, "step": 188340 }, { "epoch": 1.2391202805207793, "grad_norm": 0.04313737476455227, "learning_rate": 3.7978503284145594e-06, "loss": 0.0007, "step": 188350 }, { "epoch": 1.2391860686960126, "grad_norm": 0.05403006171491544, "learning_rate": 3.797293066054205e-06, "loss": 0.001, "step": 188360 }, { "epoch": 1.2392518568712458, "grad_norm": 0.027515353879623783, "learning_rate": 3.7967358195505033e-06, "loss": 0.0014, "step": 188370 }, { "epoch": 1.2393176450464793, "grad_norm": 0.041442185148351324, "learning_rate": 3.7961785889108e-06, "loss": 0.0004, "step": 188380 }, { "epoch": 1.2393834332217128, "grad_norm": 0.02442519969464066, "learning_rate": 3.795621374142443e-06, "loss": 0.0006, "step": 188390 }, { "epoch": 1.2394492213969461, "grad_norm": 0.02376287379829661, "learning_rate": 3.795064175252777e-06, "loss": 0.0009, "step": 188400 }, { "epoch": 1.2395150095721794, "grad_norm": 0.0038357004281483293, "learning_rate": 3.794506992249149e-06, "loss": 0.0006, "step": 188410 }, { "epoch": 1.239580797747413, "grad_norm": 0.014570031878664987, "learning_rate": 3.7939498251389062e-06, "loss": 0.0009, "step": 188420 }, { "epoch": 1.2396465859226462, "grad_norm": 0.033622570613753115, "learning_rate": 3.793392673929393e-06, "loss": 0.0007, "step": 188430 }, { "epoch": 1.2397123740978797, "grad_norm": 0.057429961942117524, "learning_rate": 3.7928355386279554e-06, "loss": 0.0005, "step": 188440 }, { "epoch": 1.239778162273113, "grad_norm": 0.003469879568407521, "learning_rate": 3.7922784192419387e-06, "loss": 0.0005, "step": 188450 }, { "epoch": 1.2398439504483465, "grad_norm": 0.026554652513227812, "learning_rate": 3.791721315778688e-06, "loss": 0.0009, "step": 188460 }, { "epoch": 1.2399097386235798, "grad_norm": 0.08543325769909939, "learning_rate": 3.7911642282455486e-06, "loss": 0.0006, "step": 188470 }, { "epoch": 1.2399755267988133, "grad_norm": 0.00773487740498035, "learning_rate": 3.790607156649863e-06, "loss": 0.0004, "step": 188480 }, { "epoch": 1.2400413149740466, "grad_norm": 0.0008554408807094273, "learning_rate": 3.7900501009989786e-06, "loss": 0.0009, "step": 188490 }, { "epoch": 1.2401071031492799, "grad_norm": 0.018227326386115225, "learning_rate": 3.7894930613002393e-06, "loss": 0.0005, "step": 188500 }, { "epoch": 1.2401728913245134, "grad_norm": 0.03962627889611574, "learning_rate": 3.7889360375609887e-06, "loss": 0.0007, "step": 188510 }, { "epoch": 1.2402386794997466, "grad_norm": 0.06675633559558294, "learning_rate": 3.78837902978857e-06, "loss": 0.0018, "step": 188520 }, { "epoch": 1.2403044676749801, "grad_norm": 0.02237766162453711, "learning_rate": 3.787822037990327e-06, "loss": 0.0008, "step": 188530 }, { "epoch": 1.2403702558502134, "grad_norm": 0.008585573944338334, "learning_rate": 3.787265062173603e-06, "loss": 0.0005, "step": 188540 }, { "epoch": 1.240436044025447, "grad_norm": 0.049781406790013694, "learning_rate": 3.7867081023457424e-06, "loss": 0.0009, "step": 188550 }, { "epoch": 1.2405018322006802, "grad_norm": 0.03044976183089198, "learning_rate": 3.786151158514088e-06, "loss": 0.0013, "step": 188560 }, { "epoch": 1.2405676203759137, "grad_norm": 0.03560950763087765, "learning_rate": 3.785594230685982e-06, "loss": 0.0004, "step": 188570 }, { "epoch": 1.240633408551147, "grad_norm": 0.0756455860035553, "learning_rate": 3.785037318868767e-06, "loss": 0.0007, "step": 188580 }, { "epoch": 1.2406991967263803, "grad_norm": 0.012474237142795773, "learning_rate": 3.7844804230697864e-06, "loss": 0.0005, "step": 188590 }, { "epoch": 1.2407649849016138, "grad_norm": 0.07103812128194982, "learning_rate": 3.783923543296381e-06, "loss": 0.0016, "step": 188600 }, { "epoch": 1.240830773076847, "grad_norm": 0.09959147678448499, "learning_rate": 3.7833666795558925e-06, "loss": 0.0007, "step": 188610 }, { "epoch": 1.2408965612520806, "grad_norm": 0.027302084995067668, "learning_rate": 3.782809831855665e-06, "loss": 0.0007, "step": 188620 }, { "epoch": 1.2409623494273139, "grad_norm": 0.014975685813641866, "learning_rate": 3.7822530002030384e-06, "loss": 0.0007, "step": 188630 }, { "epoch": 1.2410281376025474, "grad_norm": 0.019485533443832977, "learning_rate": 3.781696184605354e-06, "loss": 0.0008, "step": 188640 }, { "epoch": 1.2410939257777807, "grad_norm": 0.03299684187147623, "learning_rate": 3.7811393850699536e-06, "loss": 0.0005, "step": 188650 }, { "epoch": 1.2411597139530142, "grad_norm": 0.04590186424496854, "learning_rate": 3.7805826016041773e-06, "loss": 0.001, "step": 188660 }, { "epoch": 1.2412255021282474, "grad_norm": 0.013892471126865364, "learning_rate": 3.7800258342153655e-06, "loss": 0.0008, "step": 188670 }, { "epoch": 1.2412912903034807, "grad_norm": 0.05529458465818433, "learning_rate": 3.7794690829108605e-06, "loss": 0.0015, "step": 188680 }, { "epoch": 1.2413570784787142, "grad_norm": 0.06575451940058868, "learning_rate": 3.7789123476980017e-06, "loss": 0.0008, "step": 188690 }, { "epoch": 1.2414228666539477, "grad_norm": 0.021257823855769586, "learning_rate": 3.778355628584129e-06, "loss": 0.0008, "step": 188700 }, { "epoch": 1.241488654829181, "grad_norm": 0.011893528736893703, "learning_rate": 3.777798925576582e-06, "loss": 0.0004, "step": 188710 }, { "epoch": 1.2415544430044143, "grad_norm": 0.0243288606146309, "learning_rate": 3.7772422386827002e-06, "loss": 0.0015, "step": 188720 }, { "epoch": 1.2416202311796478, "grad_norm": 0.04450451207442512, "learning_rate": 3.776685567909824e-06, "loss": 0.0006, "step": 188730 }, { "epoch": 1.241686019354881, "grad_norm": 0.05550480750899137, "learning_rate": 3.7761289132652906e-06, "loss": 0.0009, "step": 188740 }, { "epoch": 1.2417518075301146, "grad_norm": 0.052994313009378055, "learning_rate": 3.7755722747564423e-06, "loss": 0.0009, "step": 188750 }, { "epoch": 1.2418175957053479, "grad_norm": 0.02188446874984511, "learning_rate": 3.775015652390615e-06, "loss": 0.0006, "step": 188760 }, { "epoch": 1.2418833838805814, "grad_norm": 0.0023059158288962806, "learning_rate": 3.774459046175149e-06, "loss": 0.0009, "step": 188770 }, { "epoch": 1.2419491720558147, "grad_norm": 0.10539307947783726, "learning_rate": 3.7739024561173822e-06, "loss": 0.001, "step": 188780 }, { "epoch": 1.2420149602310482, "grad_norm": 0.027462518423043845, "learning_rate": 3.7733458822246523e-06, "loss": 0.0005, "step": 188790 }, { "epoch": 1.2420807484062815, "grad_norm": 0.12020998721207413, "learning_rate": 3.7727893245042967e-06, "loss": 0.0009, "step": 188800 }, { "epoch": 1.2421465365815147, "grad_norm": 0.025600223440704138, "learning_rate": 3.7722327829636547e-06, "loss": 0.0006, "step": 188810 }, { "epoch": 1.2422123247567483, "grad_norm": 0.08609145045345526, "learning_rate": 3.7716762576100628e-06, "loss": 0.001, "step": 188820 }, { "epoch": 1.2422781129319815, "grad_norm": 0.0544685293361016, "learning_rate": 3.7711197484508594e-06, "loss": 0.0034, "step": 188830 }, { "epoch": 1.242343901107215, "grad_norm": 0.01519337662039889, "learning_rate": 3.7705632554933803e-06, "loss": 0.0006, "step": 188840 }, { "epoch": 1.2424096892824483, "grad_norm": 0.008496569319980241, "learning_rate": 3.770006778744963e-06, "loss": 0.0011, "step": 188850 }, { "epoch": 1.2424754774576818, "grad_norm": 0.043865416000957044, "learning_rate": 3.7694503182129437e-06, "loss": 0.0007, "step": 188860 }, { "epoch": 1.2425412656329151, "grad_norm": 0.00885441553828904, "learning_rate": 3.768893873904659e-06, "loss": 0.0005, "step": 188870 }, { "epoch": 1.2426070538081486, "grad_norm": 0.009155865264235421, "learning_rate": 3.768337445827446e-06, "loss": 0.0007, "step": 188880 }, { "epoch": 1.242672841983382, "grad_norm": 0.008891586205639582, "learning_rate": 3.7677810339886394e-06, "loss": 0.0008, "step": 188890 }, { "epoch": 1.2427386301586152, "grad_norm": 0.011737828891705795, "learning_rate": 3.767224638395576e-06, "loss": 0.0008, "step": 188900 }, { "epoch": 1.2428044183338487, "grad_norm": 0.006031459436822973, "learning_rate": 3.7666682590555912e-06, "loss": 0.0006, "step": 188910 }, { "epoch": 1.242870206509082, "grad_norm": 0.06711804383989435, "learning_rate": 3.7661118959760203e-06, "loss": 0.0009, "step": 188920 }, { "epoch": 1.2429359946843155, "grad_norm": 0.04690544124146556, "learning_rate": 3.765555549164197e-06, "loss": 0.0011, "step": 188930 }, { "epoch": 1.2430017828595488, "grad_norm": 0.03507270061130351, "learning_rate": 3.764999218627459e-06, "loss": 0.0005, "step": 188940 }, { "epoch": 1.2430675710347823, "grad_norm": 0.13670287749053622, "learning_rate": 3.7644429043731397e-06, "loss": 0.0029, "step": 188950 }, { "epoch": 1.2431333592100156, "grad_norm": 0.007544866589341286, "learning_rate": 3.763886606408573e-06, "loss": 0.0007, "step": 188960 }, { "epoch": 1.243199147385249, "grad_norm": 0.017730773726067846, "learning_rate": 3.7633303247410945e-06, "loss": 0.0005, "step": 188970 }, { "epoch": 1.2432649355604823, "grad_norm": 0.04808240496081698, "learning_rate": 3.762774059378038e-06, "loss": 0.0004, "step": 188980 }, { "epoch": 1.2433307237357156, "grad_norm": 0.007878480940302605, "learning_rate": 3.762217810326736e-06, "loss": 0.0009, "step": 188990 }, { "epoch": 1.2433965119109491, "grad_norm": 0.005804158889511698, "learning_rate": 3.7616615775945233e-06, "loss": 0.0008, "step": 189000 }, { "epoch": 1.2434623000861824, "grad_norm": 0.002289607585298469, "learning_rate": 3.7611053611887326e-06, "loss": 0.0007, "step": 189010 }, { "epoch": 1.243528088261416, "grad_norm": 0.039527756836677023, "learning_rate": 3.760549161116699e-06, "loss": 0.0008, "step": 189020 }, { "epoch": 1.2435938764366492, "grad_norm": 0.02310548155557981, "learning_rate": 3.759992977385754e-06, "loss": 0.0008, "step": 189030 }, { "epoch": 1.2436596646118827, "grad_norm": 0.03190299034148342, "learning_rate": 3.7594368100032304e-06, "loss": 0.0007, "step": 189040 }, { "epoch": 1.243725452787116, "grad_norm": 0.05320872909028555, "learning_rate": 3.7588806589764613e-06, "loss": 0.0007, "step": 189050 }, { "epoch": 1.2437912409623495, "grad_norm": 0.03983809141051166, "learning_rate": 3.7583245243127776e-06, "loss": 0.0006, "step": 189060 }, { "epoch": 1.2438570291375828, "grad_norm": 0.0003825363305260723, "learning_rate": 3.7577684060195137e-06, "loss": 0.0027, "step": 189070 }, { "epoch": 1.2439228173128163, "grad_norm": 0.00618701051317396, "learning_rate": 3.7572123041040004e-06, "loss": 0.0003, "step": 189080 }, { "epoch": 1.2439886054880496, "grad_norm": 0.003090773867575345, "learning_rate": 3.75665621857357e-06, "loss": 0.0008, "step": 189090 }, { "epoch": 1.244054393663283, "grad_norm": 0.021454068801668762, "learning_rate": 3.756100149435553e-06, "loss": 0.0004, "step": 189100 }, { "epoch": 1.2441201818385164, "grad_norm": 0.0012027920470979003, "learning_rate": 3.755544096697281e-06, "loss": 0.0006, "step": 189110 }, { "epoch": 1.2441859700137496, "grad_norm": 0.021678717213902864, "learning_rate": 3.754988060366086e-06, "loss": 0.0007, "step": 189120 }, { "epoch": 1.2442517581889831, "grad_norm": 0.03037633882772484, "learning_rate": 3.7544320404492974e-06, "loss": 0.0011, "step": 189130 }, { "epoch": 1.2443175463642164, "grad_norm": 0.058531829873804495, "learning_rate": 3.7538760369542466e-06, "loss": 0.0016, "step": 189140 }, { "epoch": 1.24438333453945, "grad_norm": 0.00884969356400572, "learning_rate": 3.7533200498882647e-06, "loss": 0.0008, "step": 189150 }, { "epoch": 1.2444491227146832, "grad_norm": 0.03236056992172779, "learning_rate": 3.752764079258681e-06, "loss": 0.0004, "step": 189160 }, { "epoch": 1.2445149108899167, "grad_norm": 0.00322257324219327, "learning_rate": 3.7522081250728258e-06, "loss": 0.0004, "step": 189170 }, { "epoch": 1.24458069906515, "grad_norm": 0.0010689937014975056, "learning_rate": 3.751652187338029e-06, "loss": 0.0007, "step": 189180 }, { "epoch": 1.2446464872403835, "grad_norm": 0.012600316130247102, "learning_rate": 3.7510962660616184e-06, "loss": 0.0006, "step": 189190 }, { "epoch": 1.2447122754156168, "grad_norm": 0.009157445937037312, "learning_rate": 3.750540361250926e-06, "loss": 0.0004, "step": 189200 }, { "epoch": 1.24477806359085, "grad_norm": 0.12254850601035548, "learning_rate": 3.7499844729132808e-06, "loss": 0.0014, "step": 189210 }, { "epoch": 1.2448438517660836, "grad_norm": 0.059584611687950505, "learning_rate": 3.74942860105601e-06, "loss": 0.0008, "step": 189220 }, { "epoch": 1.2449096399413169, "grad_norm": 0.009933908907986972, "learning_rate": 3.7488727456864426e-06, "loss": 0.0005, "step": 189230 }, { "epoch": 1.2449754281165504, "grad_norm": 0.018028814559273472, "learning_rate": 3.7483169068119085e-06, "loss": 0.0008, "step": 189240 }, { "epoch": 1.2450412162917837, "grad_norm": 0.01883323059482805, "learning_rate": 3.7477610844397344e-06, "loss": 0.0014, "step": 189250 }, { "epoch": 1.2451070044670172, "grad_norm": 0.07266174586139124, "learning_rate": 3.7472052785772476e-06, "loss": 0.0006, "step": 189260 }, { "epoch": 1.2451727926422504, "grad_norm": 0.01595784404593349, "learning_rate": 3.7466494892317784e-06, "loss": 0.0007, "step": 189270 }, { "epoch": 1.245238580817484, "grad_norm": 0.1023710297173523, "learning_rate": 3.7460937164106535e-06, "loss": 0.001, "step": 189280 }, { "epoch": 1.2453043689927172, "grad_norm": 0.12530546212283414, "learning_rate": 3.7455379601212007e-06, "loss": 0.0017, "step": 189290 }, { "epoch": 1.2453701571679505, "grad_norm": 0.06458197260820499, "learning_rate": 3.7449822203707455e-06, "loss": 0.0007, "step": 189300 }, { "epoch": 1.245435945343184, "grad_norm": 0.012738089379094918, "learning_rate": 3.7444264971666156e-06, "loss": 0.0004, "step": 189310 }, { "epoch": 1.2455017335184173, "grad_norm": 0.10758791261247942, "learning_rate": 3.7438707905161374e-06, "loss": 0.0009, "step": 189320 }, { "epoch": 1.2455675216936508, "grad_norm": 0.019503924708529616, "learning_rate": 3.743315100426639e-06, "loss": 0.0009, "step": 189330 }, { "epoch": 1.245633309868884, "grad_norm": 0.0003709540503358806, "learning_rate": 3.7427594269054456e-06, "loss": 0.0003, "step": 189340 }, { "epoch": 1.2456990980441176, "grad_norm": 0.02879760272622252, "learning_rate": 3.7422037699598835e-06, "loss": 0.0006, "step": 189350 }, { "epoch": 1.2457648862193509, "grad_norm": 0.18162621617699035, "learning_rate": 3.741648129597278e-06, "loss": 0.0009, "step": 189360 }, { "epoch": 1.2458306743945844, "grad_norm": 0.03446290486523925, "learning_rate": 3.741092505824955e-06, "loss": 0.001, "step": 189370 }, { "epoch": 1.2458964625698177, "grad_norm": 0.07225230755966393, "learning_rate": 3.7405368986502403e-06, "loss": 0.0006, "step": 189380 }, { "epoch": 1.2459622507450512, "grad_norm": 0.01832127440296027, "learning_rate": 3.739981308080458e-06, "loss": 0.0006, "step": 189390 }, { "epoch": 1.2460280389202845, "grad_norm": 0.019559214343158545, "learning_rate": 3.7394257341229347e-06, "loss": 0.0007, "step": 189400 }, { "epoch": 1.246093827095518, "grad_norm": 0.03940013009508423, "learning_rate": 3.7388701767849943e-06, "loss": 0.0003, "step": 189410 }, { "epoch": 1.2461596152707513, "grad_norm": 0.005465567328402506, "learning_rate": 3.738314636073962e-06, "loss": 0.0003, "step": 189420 }, { "epoch": 1.2462254034459845, "grad_norm": 0.06219951297909248, "learning_rate": 3.737759111997161e-06, "loss": 0.0004, "step": 189430 }, { "epoch": 1.246291191621218, "grad_norm": 0.0021917370290967996, "learning_rate": 3.7372036045619165e-06, "loss": 0.0009, "step": 189440 }, { "epoch": 1.2463569797964513, "grad_norm": 0.03065231853820122, "learning_rate": 3.7366481137755504e-06, "loss": 0.0014, "step": 189450 }, { "epoch": 1.2464227679716848, "grad_norm": 0.0009764490736390957, "learning_rate": 3.736092639645389e-06, "loss": 0.0007, "step": 189460 }, { "epoch": 1.2464885561469181, "grad_norm": 0.04163040797646123, "learning_rate": 3.7355371821787545e-06, "loss": 0.001, "step": 189470 }, { "epoch": 1.2465543443221516, "grad_norm": 0.03413655429172934, "learning_rate": 3.7349817413829702e-06, "loss": 0.0007, "step": 189480 }, { "epoch": 1.246620132497385, "grad_norm": 0.0005505348930138357, "learning_rate": 3.7344263172653596e-06, "loss": 0.0008, "step": 189490 }, { "epoch": 1.2466859206726184, "grad_norm": 0.1254765837280658, "learning_rate": 3.733870909833244e-06, "loss": 0.0011, "step": 189500 }, { "epoch": 1.2467517088478517, "grad_norm": 0.0013964065758068874, "learning_rate": 3.7333155190939474e-06, "loss": 0.0004, "step": 189510 }, { "epoch": 1.246817497023085, "grad_norm": 0.01477090853299657, "learning_rate": 3.7327601450547906e-06, "loss": 0.0007, "step": 189520 }, { "epoch": 1.2468832851983185, "grad_norm": 0.030875069294648085, "learning_rate": 3.732204787723098e-06, "loss": 0.0008, "step": 189530 }, { "epoch": 1.2469490733735518, "grad_norm": 0.012185354099639304, "learning_rate": 3.7316494471061903e-06, "loss": 0.0004, "step": 189540 }, { "epoch": 1.2470148615487853, "grad_norm": 0.00821250868306376, "learning_rate": 3.7310941232113895e-06, "loss": 0.0004, "step": 189550 }, { "epoch": 1.2470806497240186, "grad_norm": 0.14657926495463122, "learning_rate": 3.7305388160460166e-06, "loss": 0.0009, "step": 189560 }, { "epoch": 1.247146437899252, "grad_norm": 0.018082658996419507, "learning_rate": 3.7299835256173927e-06, "loss": 0.0006, "step": 189570 }, { "epoch": 1.2472122260744853, "grad_norm": 0.01643155523683957, "learning_rate": 3.7294282519328386e-06, "loss": 0.0002, "step": 189580 }, { "epoch": 1.2472780142497188, "grad_norm": 0.0028868162608159695, "learning_rate": 3.728872994999676e-06, "loss": 0.0008, "step": 189590 }, { "epoch": 1.2473438024249521, "grad_norm": 0.020900050548772933, "learning_rate": 3.728317754825226e-06, "loss": 0.0007, "step": 189600 }, { "epoch": 1.2474095906001854, "grad_norm": 0.04660286373142396, "learning_rate": 3.7277625314168074e-06, "loss": 0.0006, "step": 189610 }, { "epoch": 1.247475378775419, "grad_norm": 0.014364222176552656, "learning_rate": 3.7272073247817415e-06, "loss": 0.0005, "step": 189620 }, { "epoch": 1.2475411669506522, "grad_norm": 0.03319194444701436, "learning_rate": 3.726652134927348e-06, "loss": 0.0006, "step": 189630 }, { "epoch": 1.2476069551258857, "grad_norm": 0.02490901877077554, "learning_rate": 3.7260969618609456e-06, "loss": 0.0003, "step": 189640 }, { "epoch": 1.247672743301119, "grad_norm": 0.009149689919079414, "learning_rate": 3.725541805589854e-06, "loss": 0.0004, "step": 189650 }, { "epoch": 1.2477385314763525, "grad_norm": 0.24128202889693934, "learning_rate": 3.724986666121394e-06, "loss": 0.0014, "step": 189660 }, { "epoch": 1.2478043196515858, "grad_norm": 0.22494619381957526, "learning_rate": 3.7244315434628832e-06, "loss": 0.001, "step": 189670 }, { "epoch": 1.2478701078268193, "grad_norm": 0.033435623244383914, "learning_rate": 3.7238764376216413e-06, "loss": 0.0004, "step": 189680 }, { "epoch": 1.2479358960020526, "grad_norm": 0.015715704344347308, "learning_rate": 3.723321348604987e-06, "loss": 0.0003, "step": 189690 }, { "epoch": 1.2480016841772859, "grad_norm": 0.013419771821636508, "learning_rate": 3.7227662764202366e-06, "loss": 0.0006, "step": 189700 }, { "epoch": 1.2480674723525194, "grad_norm": 0.022494656557461887, "learning_rate": 3.7222112210747096e-06, "loss": 0.0008, "step": 189710 }, { "epoch": 1.2481332605277529, "grad_norm": 0.0008282428500189161, "learning_rate": 3.7216561825757257e-06, "loss": 0.0004, "step": 189720 }, { "epoch": 1.2481990487029861, "grad_norm": 0.045237678568328214, "learning_rate": 3.7211011609306e-06, "loss": 0.0012, "step": 189730 }, { "epoch": 1.2482648368782194, "grad_norm": 0.03927536123311065, "learning_rate": 3.7205461561466514e-06, "loss": 0.0003, "step": 189740 }, { "epoch": 1.248330625053453, "grad_norm": 0.03305094351079115, "learning_rate": 3.719991168231197e-06, "loss": 0.0005, "step": 189750 }, { "epoch": 1.2483964132286862, "grad_norm": 0.0066318568931619404, "learning_rate": 3.7194361971915525e-06, "loss": 0.0007, "step": 189760 }, { "epoch": 1.2484622014039197, "grad_norm": 0.021050916273907892, "learning_rate": 3.718881243035037e-06, "loss": 0.0006, "step": 189770 }, { "epoch": 1.248527989579153, "grad_norm": 0.0007206530300643587, "learning_rate": 3.7183263057689645e-06, "loss": 0.0003, "step": 189780 }, { "epoch": 1.2485937777543865, "grad_norm": 0.061821476241866506, "learning_rate": 3.7177713854006534e-06, "loss": 0.0003, "step": 189790 }, { "epoch": 1.2486595659296198, "grad_norm": 0.1465299910167185, "learning_rate": 3.717216481937419e-06, "loss": 0.0007, "step": 189800 }, { "epoch": 1.2487253541048533, "grad_norm": 0.02307197188897662, "learning_rate": 3.7166615953865783e-06, "loss": 0.0005, "step": 189810 }, { "epoch": 1.2487911422800866, "grad_norm": 0.009288802388870683, "learning_rate": 3.716106725755445e-06, "loss": 0.0014, "step": 189820 }, { "epoch": 1.2488569304553199, "grad_norm": 0.07393874298698117, "learning_rate": 3.7155518730513364e-06, "loss": 0.0013, "step": 189830 }, { "epoch": 1.2489227186305534, "grad_norm": 0.010693563272018779, "learning_rate": 3.714997037281568e-06, "loss": 0.0005, "step": 189840 }, { "epoch": 1.2489885068057867, "grad_norm": 0.05127208239091397, "learning_rate": 3.7144422184534527e-06, "loss": 0.0005, "step": 189850 }, { "epoch": 1.2490542949810202, "grad_norm": 0.05797752807733614, "learning_rate": 3.7138874165743068e-06, "loss": 0.001, "step": 189860 }, { "epoch": 1.2491200831562534, "grad_norm": 0.0807164376187751, "learning_rate": 3.713332631651444e-06, "loss": 0.001, "step": 189870 }, { "epoch": 1.249185871331487, "grad_norm": 0.014980229971709719, "learning_rate": 3.7127778636921785e-06, "loss": 0.0019, "step": 189880 }, { "epoch": 1.2492516595067202, "grad_norm": 0.07870579172028806, "learning_rate": 3.7122231127038266e-06, "loss": 0.0007, "step": 189890 }, { "epoch": 1.2493174476819537, "grad_norm": 0.018343959467601083, "learning_rate": 3.7116683786937003e-06, "loss": 0.0008, "step": 189900 }, { "epoch": 1.249383235857187, "grad_norm": 0.3458619734533905, "learning_rate": 3.7111136616691135e-06, "loss": 0.0015, "step": 189910 }, { "epoch": 1.2494490240324203, "grad_norm": 0.01454429048666749, "learning_rate": 3.71055896163738e-06, "loss": 0.0005, "step": 189920 }, { "epoch": 1.2495148122076538, "grad_norm": 0.0473326446273459, "learning_rate": 3.7100042786058133e-06, "loss": 0.0017, "step": 189930 }, { "epoch": 1.249580600382887, "grad_norm": 0.07397732530634313, "learning_rate": 3.7094496125817257e-06, "loss": 0.0005, "step": 189940 }, { "epoch": 1.2496463885581206, "grad_norm": 0.6453734407574749, "learning_rate": 3.7088949635724293e-06, "loss": 0.0012, "step": 189950 }, { "epoch": 1.2497121767333539, "grad_norm": 0.013758244180997229, "learning_rate": 3.7083403315852396e-06, "loss": 0.0008, "step": 189960 }, { "epoch": 1.2497779649085874, "grad_norm": 0.02540476007004545, "learning_rate": 3.707785716627466e-06, "loss": 0.0005, "step": 189970 }, { "epoch": 1.2498437530838207, "grad_norm": 0.08075859519174683, "learning_rate": 3.7072311187064224e-06, "loss": 0.001, "step": 189980 }, { "epoch": 1.2499095412590542, "grad_norm": 0.045238855375188035, "learning_rate": 3.70667653782942e-06, "loss": 0.0004, "step": 189990 }, { "epoch": 1.2499753294342875, "grad_norm": 0.013653774264066856, "learning_rate": 3.70612197400377e-06, "loss": 0.0005, "step": 190000 }, { "epoch": 1.2500411176095207, "grad_norm": 0.007021632887380136, "learning_rate": 3.7055674272367837e-06, "loss": 0.0003, "step": 190010 }, { "epoch": 1.2501069057847543, "grad_norm": 0.09378037736393119, "learning_rate": 3.7050128975357735e-06, "loss": 0.001, "step": 190020 }, { "epoch": 1.2501726939599878, "grad_norm": 0.06192839177539037, "learning_rate": 3.70445838490805e-06, "loss": 0.0006, "step": 190030 }, { "epoch": 1.250238482135221, "grad_norm": 0.04248937483578819, "learning_rate": 3.7039038893609235e-06, "loss": 0.0007, "step": 190040 }, { "epoch": 1.2503042703104543, "grad_norm": 0.049551687609524456, "learning_rate": 3.7033494109017056e-06, "loss": 0.001, "step": 190050 }, { "epoch": 1.2503700584856878, "grad_norm": 0.03009721160553573, "learning_rate": 3.702794949537706e-06, "loss": 0.0006, "step": 190060 }, { "epoch": 1.2504358466609211, "grad_norm": 0.020648351389925684, "learning_rate": 3.7022405052762333e-06, "loss": 0.0008, "step": 190070 }, { "epoch": 1.2505016348361546, "grad_norm": 0.013957997462537668, "learning_rate": 3.7016860781245987e-06, "loss": 0.0005, "step": 190080 }, { "epoch": 1.250567423011388, "grad_norm": 0.04083663183952909, "learning_rate": 3.7011316680901126e-06, "loss": 0.0009, "step": 190090 }, { "epoch": 1.2506332111866212, "grad_norm": 0.053515488855224495, "learning_rate": 3.700577275180084e-06, "loss": 0.0003, "step": 190100 }, { "epoch": 1.2506989993618547, "grad_norm": 0.030919703193551355, "learning_rate": 3.7000228994018217e-06, "loss": 0.0007, "step": 190110 }, { "epoch": 1.2507647875370882, "grad_norm": 0.03330662147705767, "learning_rate": 3.699468540762634e-06, "loss": 0.0008, "step": 190120 }, { "epoch": 1.2508305757123215, "grad_norm": 0.04981141219803888, "learning_rate": 3.6989141992698315e-06, "loss": 0.0003, "step": 190130 }, { "epoch": 1.2508963638875548, "grad_norm": 0.021583187047862606, "learning_rate": 3.6983598749307198e-06, "loss": 0.0004, "step": 190140 }, { "epoch": 1.2509621520627883, "grad_norm": 0.008059905336255628, "learning_rate": 3.6978055677526103e-06, "loss": 0.0004, "step": 190150 }, { "epoch": 1.2510279402380216, "grad_norm": 0.020352386502335604, "learning_rate": 3.6972512777428093e-06, "loss": 0.0009, "step": 190160 }, { "epoch": 1.251093728413255, "grad_norm": 0.022231630491084533, "learning_rate": 3.696697004908626e-06, "loss": 0.0012, "step": 190170 }, { "epoch": 1.2511595165884883, "grad_norm": 0.008745219404929793, "learning_rate": 3.6961427492573663e-06, "loss": 0.0004, "step": 190180 }, { "epoch": 1.2512253047637218, "grad_norm": 0.0919005950306216, "learning_rate": 3.6955885107963383e-06, "loss": 0.0005, "step": 190190 }, { "epoch": 1.2512910929389551, "grad_norm": 0.0043730366291029775, "learning_rate": 3.6950342895328493e-06, "loss": 0.0003, "step": 190200 }, { "epoch": 1.2513568811141886, "grad_norm": 0.010404529526906441, "learning_rate": 3.694480085474206e-06, "loss": 0.0006, "step": 190210 }, { "epoch": 1.251422669289422, "grad_norm": 0.0404859772603653, "learning_rate": 3.6939258986277154e-06, "loss": 0.0007, "step": 190220 }, { "epoch": 1.2514884574646552, "grad_norm": 0.022054938128876614, "learning_rate": 3.6933717290006843e-06, "loss": 0.0004, "step": 190230 }, { "epoch": 1.2515542456398887, "grad_norm": 0.012290355869529267, "learning_rate": 3.6928175766004183e-06, "loss": 0.0006, "step": 190240 }, { "epoch": 1.251620033815122, "grad_norm": 0.019831660905655888, "learning_rate": 3.692263441434223e-06, "loss": 0.0007, "step": 190250 }, { "epoch": 1.2516858219903555, "grad_norm": 0.0023044374033937753, "learning_rate": 3.6917093235094054e-06, "loss": 0.0008, "step": 190260 }, { "epoch": 1.2517516101655888, "grad_norm": 0.02898323447930708, "learning_rate": 3.6911552228332694e-06, "loss": 0.0008, "step": 190270 }, { "epoch": 1.2518173983408223, "grad_norm": 0.032786168853096376, "learning_rate": 3.6906011394131224e-06, "loss": 0.0007, "step": 190280 }, { "epoch": 1.2518831865160556, "grad_norm": 0.014440699701975512, "learning_rate": 3.6900470732562688e-06, "loss": 0.0016, "step": 190290 }, { "epoch": 1.251948974691289, "grad_norm": 0.05848219700764929, "learning_rate": 3.689493024370012e-06, "loss": 0.0013, "step": 190300 }, { "epoch": 1.2520147628665224, "grad_norm": 0.03387360468202599, "learning_rate": 3.688938992761659e-06, "loss": 0.001, "step": 190310 }, { "epoch": 1.2520805510417556, "grad_norm": 0.01680255017772581, "learning_rate": 3.688384978438513e-06, "loss": 0.0004, "step": 190320 }, { "epoch": 1.2521463392169891, "grad_norm": 0.03260786560629064, "learning_rate": 3.6878309814078783e-06, "loss": 0.0006, "step": 190330 }, { "epoch": 1.2522121273922227, "grad_norm": 0.05766263573072919, "learning_rate": 3.6872770016770577e-06, "loss": 0.0006, "step": 190340 }, { "epoch": 1.252277915567456, "grad_norm": 0.019780521040618348, "learning_rate": 3.6867230392533565e-06, "loss": 0.0006, "step": 190350 }, { "epoch": 1.2523437037426892, "grad_norm": 0.33689492027655144, "learning_rate": 3.6861690941440785e-06, "loss": 0.0004, "step": 190360 }, { "epoch": 1.2524094919179227, "grad_norm": 0.009053797135320842, "learning_rate": 3.685615166356526e-06, "loss": 0.0005, "step": 190370 }, { "epoch": 1.252475280093156, "grad_norm": 0.030643288552305654, "learning_rate": 3.6850612558980026e-06, "loss": 0.0011, "step": 190380 }, { "epoch": 1.2525410682683895, "grad_norm": 0.010316848862889551, "learning_rate": 3.6845073627758105e-06, "loss": 0.0004, "step": 190390 }, { "epoch": 1.2526068564436228, "grad_norm": 0.03548701540628143, "learning_rate": 3.6839534869972515e-06, "loss": 0.001, "step": 190400 }, { "epoch": 1.252672644618856, "grad_norm": 0.00679678411013525, "learning_rate": 3.6833996285696306e-06, "loss": 0.0003, "step": 190410 }, { "epoch": 1.2527384327940896, "grad_norm": 0.0328752684565588, "learning_rate": 3.6828457875002486e-06, "loss": 0.0011, "step": 190420 }, { "epoch": 1.252804220969323, "grad_norm": 0.003906734736081441, "learning_rate": 3.682291963796407e-06, "loss": 0.0005, "step": 190430 }, { "epoch": 1.2528700091445564, "grad_norm": 0.010814704020560725, "learning_rate": 3.6817381574654087e-06, "loss": 0.0009, "step": 190440 }, { "epoch": 1.2529357973197897, "grad_norm": 0.10522552966288427, "learning_rate": 3.681184368514553e-06, "loss": 0.001, "step": 190450 }, { "epoch": 1.2530015854950232, "grad_norm": 0.02831436827313471, "learning_rate": 3.6806305969511434e-06, "loss": 0.0007, "step": 190460 }, { "epoch": 1.2530673736702564, "grad_norm": 0.0017470219235812148, "learning_rate": 3.6800768427824786e-06, "loss": 0.0006, "step": 190470 }, { "epoch": 1.25313316184549, "grad_norm": 0.017904924706749828, "learning_rate": 3.679523106015861e-06, "loss": 0.0014, "step": 190480 }, { "epoch": 1.2531989500207232, "grad_norm": 0.038093067423299605, "learning_rate": 3.678969386658592e-06, "loss": 0.0011, "step": 190490 }, { "epoch": 1.2532647381959567, "grad_norm": 0.025322750407889942, "learning_rate": 3.6784156847179696e-06, "loss": 0.0004, "step": 190500 }, { "epoch": 1.25333052637119, "grad_norm": 0.008139838859397097, "learning_rate": 3.6778620002012955e-06, "loss": 0.0007, "step": 190510 }, { "epoch": 1.2533963145464235, "grad_norm": 0.011407395452665137, "learning_rate": 3.6773083331158686e-06, "loss": 0.0005, "step": 190520 }, { "epoch": 1.2534621027216568, "grad_norm": 0.03899688990181821, "learning_rate": 3.6767546834689887e-06, "loss": 0.0003, "step": 190530 }, { "epoch": 1.25352789089689, "grad_norm": 0.022954376673460352, "learning_rate": 3.6762010512679565e-06, "loss": 0.0003, "step": 190540 }, { "epoch": 1.2535936790721236, "grad_norm": 0.0472215145269183, "learning_rate": 3.67564743652007e-06, "loss": 0.0007, "step": 190550 }, { "epoch": 1.2536594672473569, "grad_norm": 0.05557701745972251, "learning_rate": 3.6750938392326274e-06, "loss": 0.0012, "step": 190560 }, { "epoch": 1.2537252554225904, "grad_norm": 0.012118452407273779, "learning_rate": 3.674540259412929e-06, "loss": 0.0004, "step": 190570 }, { "epoch": 1.2537910435978237, "grad_norm": 0.05219868177133554, "learning_rate": 3.6739866970682724e-06, "loss": 0.0006, "step": 190580 }, { "epoch": 1.2538568317730572, "grad_norm": 0.005051641263389359, "learning_rate": 3.6734331522059563e-06, "loss": 0.001, "step": 190590 }, { "epoch": 1.2539226199482905, "grad_norm": 0.04087254053659506, "learning_rate": 3.672879624833277e-06, "loss": 0.0006, "step": 190600 }, { "epoch": 1.253988408123524, "grad_norm": 0.019587579348332773, "learning_rate": 3.6723261149575352e-06, "loss": 0.0003, "step": 190610 }, { "epoch": 1.2540541962987573, "grad_norm": 0.01927037440338786, "learning_rate": 3.6717726225860262e-06, "loss": 0.0005, "step": 190620 }, { "epoch": 1.2541199844739905, "grad_norm": 0.014609641082574055, "learning_rate": 3.6712191477260485e-06, "loss": 0.0005, "step": 190630 }, { "epoch": 1.254185772649224, "grad_norm": 0.025834946078545185, "learning_rate": 3.6706656903848987e-06, "loss": 0.0004, "step": 190640 }, { "epoch": 1.2542515608244575, "grad_norm": 0.029157667869963703, "learning_rate": 3.670112250569874e-06, "loss": 0.0008, "step": 190650 }, { "epoch": 1.2543173489996908, "grad_norm": 0.016773243645750297, "learning_rate": 3.6695588282882687e-06, "loss": 0.0006, "step": 190660 }, { "epoch": 1.2543831371749241, "grad_norm": 0.011655554334111612, "learning_rate": 3.6690054235473826e-06, "loss": 0.0006, "step": 190670 }, { "epoch": 1.2544489253501576, "grad_norm": 0.028543827853159526, "learning_rate": 3.6684520363545106e-06, "loss": 0.0005, "step": 190680 }, { "epoch": 1.254514713525391, "grad_norm": 0.01249876645521372, "learning_rate": 3.667898666716949e-06, "loss": 0.0003, "step": 190690 }, { "epoch": 1.2545805017006244, "grad_norm": 0.02590963666729062, "learning_rate": 3.6673453146419924e-06, "loss": 0.0008, "step": 190700 }, { "epoch": 1.2546462898758577, "grad_norm": 0.03280765400387198, "learning_rate": 3.666791980136937e-06, "loss": 0.0006, "step": 190710 }, { "epoch": 1.254712078051091, "grad_norm": 0.030624767098342102, "learning_rate": 3.6662386632090785e-06, "loss": 0.0005, "step": 190720 }, { "epoch": 1.2547778662263245, "grad_norm": 0.024128262782719418, "learning_rate": 3.66568536386571e-06, "loss": 0.0005, "step": 190730 }, { "epoch": 1.254843654401558, "grad_norm": 0.00020212714262280324, "learning_rate": 3.665132082114128e-06, "loss": 0.0006, "step": 190740 }, { "epoch": 1.2549094425767913, "grad_norm": 0.02296392289087349, "learning_rate": 3.664578817961627e-06, "loss": 0.0003, "step": 190750 }, { "epoch": 1.2549752307520246, "grad_norm": 0.05101087481503993, "learning_rate": 3.6640255714155003e-06, "loss": 0.0014, "step": 190760 }, { "epoch": 1.255041018927258, "grad_norm": 0.05566904472911044, "learning_rate": 3.663472342483043e-06, "loss": 0.0011, "step": 190770 }, { "epoch": 1.2551068071024913, "grad_norm": 0.029434847571012696, "learning_rate": 3.6629191311715493e-06, "loss": 0.0005, "step": 190780 }, { "epoch": 1.2551725952777248, "grad_norm": 0.028780278598032494, "learning_rate": 3.66236593748831e-06, "loss": 0.0012, "step": 190790 }, { "epoch": 1.2552383834529581, "grad_norm": 0.016921413933781212, "learning_rate": 3.661812761440623e-06, "loss": 0.0007, "step": 190800 }, { "epoch": 1.2553041716281914, "grad_norm": 0.019129793584294206, "learning_rate": 3.661259603035778e-06, "loss": 0.0006, "step": 190810 }, { "epoch": 1.255369959803425, "grad_norm": 0.06266657318690035, "learning_rate": 3.660706462281069e-06, "loss": 0.001, "step": 190820 }, { "epoch": 1.2554357479786584, "grad_norm": 0.022531527391696306, "learning_rate": 3.6601533391837883e-06, "loss": 0.0005, "step": 190830 }, { "epoch": 1.2555015361538917, "grad_norm": 0.003334027778350041, "learning_rate": 3.6596002337512293e-06, "loss": 0.0008, "step": 190840 }, { "epoch": 1.255567324329125, "grad_norm": 0.23931127091395551, "learning_rate": 3.6590471459906837e-06, "loss": 0.0011, "step": 190850 }, { "epoch": 1.2556331125043585, "grad_norm": 0.09309091529860958, "learning_rate": 3.658494075909441e-06, "loss": 0.0007, "step": 190860 }, { "epoch": 1.2556989006795918, "grad_norm": 0.07601982861669118, "learning_rate": 3.657941023514798e-06, "loss": 0.001, "step": 190870 }, { "epoch": 1.2557646888548253, "grad_norm": 0.005880003244917645, "learning_rate": 3.657387988814043e-06, "loss": 0.0007, "step": 190880 }, { "epoch": 1.2558304770300586, "grad_norm": 0.021473227527282212, "learning_rate": 3.656834971814467e-06, "loss": 0.0004, "step": 190890 }, { "epoch": 1.255896265205292, "grad_norm": 0.003773959042199251, "learning_rate": 3.656281972523363e-06, "loss": 0.0003, "step": 190900 }, { "epoch": 1.2559620533805254, "grad_norm": 0.040267287854700975, "learning_rate": 3.65572899094802e-06, "loss": 0.001, "step": 190910 }, { "epoch": 1.2560278415557589, "grad_norm": 0.011357258966844256, "learning_rate": 3.655176027095728e-06, "loss": 0.0005, "step": 190920 }, { "epoch": 1.2560936297309921, "grad_norm": 0.053516521397766435, "learning_rate": 3.6546230809737804e-06, "loss": 0.0005, "step": 190930 }, { "epoch": 1.2561594179062254, "grad_norm": 0.0073449832807901475, "learning_rate": 3.654070152589465e-06, "loss": 0.0005, "step": 190940 }, { "epoch": 1.256225206081459, "grad_norm": 0.011770893352580216, "learning_rate": 3.6535172419500724e-06, "loss": 0.001, "step": 190950 }, { "epoch": 1.2562909942566924, "grad_norm": 0.0007368534763690132, "learning_rate": 3.652964349062892e-06, "loss": 0.0007, "step": 190960 }, { "epoch": 1.2563567824319257, "grad_norm": 0.038369389261175973, "learning_rate": 3.6524114739352133e-06, "loss": 0.0005, "step": 190970 }, { "epoch": 1.256422570607159, "grad_norm": 0.04950982540455523, "learning_rate": 3.651858616574325e-06, "loss": 0.0008, "step": 190980 }, { "epoch": 1.2564883587823925, "grad_norm": 0.005463525107730134, "learning_rate": 3.6513057769875166e-06, "loss": 0.0005, "step": 190990 }, { "epoch": 1.2565541469576258, "grad_norm": 0.028258863901343186, "learning_rate": 3.650752955182077e-06, "loss": 0.0004, "step": 191000 }, { "epoch": 1.2566199351328593, "grad_norm": 0.016121479124646648, "learning_rate": 3.6502001511652947e-06, "loss": 0.0008, "step": 191010 }, { "epoch": 1.2566857233080926, "grad_norm": 0.020300711901368677, "learning_rate": 3.6496473649444574e-06, "loss": 0.0007, "step": 191020 }, { "epoch": 1.2567515114833259, "grad_norm": 0.001234860524823971, "learning_rate": 3.649094596526853e-06, "loss": 0.0011, "step": 191030 }, { "epoch": 1.2568172996585594, "grad_norm": 0.0015888001703466272, "learning_rate": 3.6485418459197706e-06, "loss": 0.0014, "step": 191040 }, { "epoch": 1.2568830878337929, "grad_norm": 0.018084221643733054, "learning_rate": 3.647989113130495e-06, "loss": 0.0011, "step": 191050 }, { "epoch": 1.2569488760090262, "grad_norm": 0.02311351238397594, "learning_rate": 3.6474363981663163e-06, "loss": 0.0006, "step": 191060 }, { "epoch": 1.2570146641842594, "grad_norm": 0.029582616533304042, "learning_rate": 3.646883701034521e-06, "loss": 0.0003, "step": 191070 }, { "epoch": 1.257080452359493, "grad_norm": 0.047418555237049784, "learning_rate": 3.6463310217423955e-06, "loss": 0.0005, "step": 191080 }, { "epoch": 1.2571462405347262, "grad_norm": 0.04041276101849406, "learning_rate": 3.6457783602972257e-06, "loss": 0.0007, "step": 191090 }, { "epoch": 1.2572120287099597, "grad_norm": 0.04029504898296618, "learning_rate": 3.645225716706299e-06, "loss": 0.0016, "step": 191100 }, { "epoch": 1.257277816885193, "grad_norm": 0.017274640842641208, "learning_rate": 3.6446730909769013e-06, "loss": 0.0004, "step": 191110 }, { "epoch": 1.2573436050604263, "grad_norm": 0.06635325135763832, "learning_rate": 3.644120483116317e-06, "loss": 0.0014, "step": 191120 }, { "epoch": 1.2574093932356598, "grad_norm": 0.05046032997926373, "learning_rate": 3.643567893131834e-06, "loss": 0.0006, "step": 191130 }, { "epoch": 1.2574751814108933, "grad_norm": 0.010014367473090141, "learning_rate": 3.6430153210307372e-06, "loss": 0.0005, "step": 191140 }, { "epoch": 1.2575409695861266, "grad_norm": 0.04620692268589292, "learning_rate": 3.6424627668203117e-06, "loss": 0.001, "step": 191150 }, { "epoch": 1.2576067577613599, "grad_norm": 0.012935381597883362, "learning_rate": 3.641910230507842e-06, "loss": 0.0011, "step": 191160 }, { "epoch": 1.2576725459365934, "grad_norm": 0.020973086151683328, "learning_rate": 3.6413577121006126e-06, "loss": 0.0006, "step": 191170 }, { "epoch": 1.2577383341118267, "grad_norm": 0.02450165323534498, "learning_rate": 3.640805211605908e-06, "loss": 0.0004, "step": 191180 }, { "epoch": 1.2578041222870602, "grad_norm": 0.009955096581979728, "learning_rate": 3.640252729031013e-06, "loss": 0.0005, "step": 191190 }, { "epoch": 1.2578699104622935, "grad_norm": 0.020599572692260835, "learning_rate": 3.6397002643832123e-06, "loss": 0.0006, "step": 191200 }, { "epoch": 1.257935698637527, "grad_norm": 0.02420809458109274, "learning_rate": 3.639147817669788e-06, "loss": 0.0009, "step": 191210 }, { "epoch": 1.2580014868127603, "grad_norm": 0.01509325815191619, "learning_rate": 3.638595388898025e-06, "loss": 0.0006, "step": 191220 }, { "epoch": 1.2580672749879938, "grad_norm": 0.025258166808283137, "learning_rate": 3.638042978075206e-06, "loss": 0.0007, "step": 191230 }, { "epoch": 1.258133063163227, "grad_norm": 0.029042300213764017, "learning_rate": 3.6374905852086136e-06, "loss": 0.0017, "step": 191240 }, { "epoch": 1.2581988513384603, "grad_norm": 0.02866866984152145, "learning_rate": 3.63693821030553e-06, "loss": 0.0007, "step": 191250 }, { "epoch": 1.2582646395136938, "grad_norm": 0.005346570894971366, "learning_rate": 3.6363858533732403e-06, "loss": 0.0004, "step": 191260 }, { "epoch": 1.2583304276889271, "grad_norm": 0.004475609246542421, "learning_rate": 3.6358335144190253e-06, "loss": 0.0004, "step": 191270 }, { "epoch": 1.2583962158641606, "grad_norm": 0.015894590418066425, "learning_rate": 3.6352811934501675e-06, "loss": 0.0012, "step": 191280 }, { "epoch": 1.258462004039394, "grad_norm": 0.0065385436553886695, "learning_rate": 3.6347288904739485e-06, "loss": 0.0004, "step": 191290 }, { "epoch": 1.2585277922146274, "grad_norm": 0.004782354535565892, "learning_rate": 3.6341766054976497e-06, "loss": 0.0005, "step": 191300 }, { "epoch": 1.2585935803898607, "grad_norm": 0.03751987778990334, "learning_rate": 3.6336243385285518e-06, "loss": 0.0012, "step": 191310 }, { "epoch": 1.2586593685650942, "grad_norm": 0.020018568610609275, "learning_rate": 3.633072089573938e-06, "loss": 0.0007, "step": 191320 }, { "epoch": 1.2587251567403275, "grad_norm": 0.012521411081334534, "learning_rate": 3.632519858641088e-06, "loss": 0.0002, "step": 191330 }, { "epoch": 1.2587909449155608, "grad_norm": 0.0018054108683942752, "learning_rate": 3.631967645737282e-06, "loss": 0.001, "step": 191340 }, { "epoch": 1.2588567330907943, "grad_norm": 0.015117890654110806, "learning_rate": 3.631415450869802e-06, "loss": 0.0005, "step": 191350 }, { "epoch": 1.2589225212660278, "grad_norm": 0.011790924692332167, "learning_rate": 3.630863274045927e-06, "loss": 0.0008, "step": 191360 }, { "epoch": 1.258988309441261, "grad_norm": 0.006875121824351422, "learning_rate": 3.6303111152729377e-06, "loss": 0.0005, "step": 191370 }, { "epoch": 1.2590540976164943, "grad_norm": 0.06660193863396852, "learning_rate": 3.6297589745581117e-06, "loss": 0.001, "step": 191380 }, { "epoch": 1.2591198857917278, "grad_norm": 0.0002917838081399526, "learning_rate": 3.629206851908732e-06, "loss": 0.0006, "step": 191390 }, { "epoch": 1.2591856739669611, "grad_norm": 0.07304658243509617, "learning_rate": 3.6286547473320754e-06, "loss": 0.0004, "step": 191400 }, { "epoch": 1.2592514621421946, "grad_norm": 0.015193179644412648, "learning_rate": 3.6281026608354218e-06, "loss": 0.0005, "step": 191410 }, { "epoch": 1.259317250317428, "grad_norm": 0.01316678137571392, "learning_rate": 3.62755059242605e-06, "loss": 0.0006, "step": 191420 }, { "epoch": 1.2593830384926612, "grad_norm": 0.016220371483034162, "learning_rate": 3.6269985421112376e-06, "loss": 0.0002, "step": 191430 }, { "epoch": 1.2594488266678947, "grad_norm": 0.04784874703305978, "learning_rate": 3.6264465098982634e-06, "loss": 0.0008, "step": 191440 }, { "epoch": 1.2595146148431282, "grad_norm": 0.04184825129509495, "learning_rate": 3.6258944957944063e-06, "loss": 0.001, "step": 191450 }, { "epoch": 1.2595804030183615, "grad_norm": 0.0398852156495911, "learning_rate": 3.6253424998069437e-06, "loss": 0.0006, "step": 191460 }, { "epoch": 1.2596461911935948, "grad_norm": 0.002498970819057697, "learning_rate": 3.624790521943153e-06, "loss": 0.0003, "step": 191470 }, { "epoch": 1.2597119793688283, "grad_norm": 0.019098473533957695, "learning_rate": 3.6242385622103114e-06, "loss": 0.0009, "step": 191480 }, { "epoch": 1.2597777675440616, "grad_norm": 0.061841453253476134, "learning_rate": 3.6236866206156963e-06, "loss": 0.0004, "step": 191490 }, { "epoch": 1.259843555719295, "grad_norm": 0.05285630411139678, "learning_rate": 3.6231346971665843e-06, "loss": 0.0005, "step": 191500 }, { "epoch": 1.2599093438945284, "grad_norm": 0.009995116561075131, "learning_rate": 3.622582791870251e-06, "loss": 0.0008, "step": 191510 }, { "epoch": 1.2599751320697619, "grad_norm": 0.030975305187248586, "learning_rate": 3.6220309047339753e-06, "loss": 0.001, "step": 191520 }, { "epoch": 1.2600409202449951, "grad_norm": 0.037263389305268485, "learning_rate": 3.621479035765032e-06, "loss": 0.0004, "step": 191530 }, { "epoch": 1.2601067084202286, "grad_norm": 0.030469870262399654, "learning_rate": 3.6209271849706975e-06, "loss": 0.0014, "step": 191540 }, { "epoch": 1.260172496595462, "grad_norm": 0.04050416588514399, "learning_rate": 3.6203753523582463e-06, "loss": 0.0008, "step": 191550 }, { "epoch": 1.2602382847706952, "grad_norm": 0.03610301816885497, "learning_rate": 3.6198235379349543e-06, "loss": 0.0007, "step": 191560 }, { "epoch": 1.2603040729459287, "grad_norm": 0.017972409706067272, "learning_rate": 3.6192717417080968e-06, "loss": 0.0004, "step": 191570 }, { "epoch": 1.260369861121162, "grad_norm": 0.03302451001124023, "learning_rate": 3.6187199636849492e-06, "loss": 0.0004, "step": 191580 }, { "epoch": 1.2604356492963955, "grad_norm": 0.08761000851264909, "learning_rate": 3.618168203872786e-06, "loss": 0.0006, "step": 191590 }, { "epoch": 1.2605014374716288, "grad_norm": 0.036805157756612195, "learning_rate": 3.6176164622788823e-06, "loss": 0.0004, "step": 191600 }, { "epoch": 1.2605672256468623, "grad_norm": 0.16543212512879107, "learning_rate": 3.617064738910511e-06, "loss": 0.0008, "step": 191610 }, { "epoch": 1.2606330138220956, "grad_norm": 0.038764704250511116, "learning_rate": 3.6165130337749477e-06, "loss": 0.0021, "step": 191620 }, { "epoch": 1.260698801997329, "grad_norm": 0.07367211655338327, "learning_rate": 3.615961346879464e-06, "loss": 0.0009, "step": 191630 }, { "epoch": 1.2607645901725624, "grad_norm": 0.005862156604526188, "learning_rate": 3.6154096782313343e-06, "loss": 0.0009, "step": 191640 }, { "epoch": 1.2608303783477957, "grad_norm": 0.11972721894848327, "learning_rate": 3.6148580278378327e-06, "loss": 0.0009, "step": 191650 }, { "epoch": 1.2608961665230292, "grad_norm": 0.0004020000452271695, "learning_rate": 3.614306395706232e-06, "loss": 0.0002, "step": 191660 }, { "epoch": 1.2609619546982627, "grad_norm": 0.011269865335781792, "learning_rate": 3.6137547818438047e-06, "loss": 0.0009, "step": 191670 }, { "epoch": 1.261027742873496, "grad_norm": 0.04380609223194307, "learning_rate": 3.613203186257823e-06, "loss": 0.0005, "step": 191680 }, { "epoch": 1.2610935310487292, "grad_norm": 0.007908380338483208, "learning_rate": 3.6126516089555603e-06, "loss": 0.0008, "step": 191690 }, { "epoch": 1.2611593192239627, "grad_norm": 0.02211955674478742, "learning_rate": 3.612100049944286e-06, "loss": 0.0007, "step": 191700 }, { "epoch": 1.261225107399196, "grad_norm": 0.052124082425330516, "learning_rate": 3.6115485092312757e-06, "loss": 0.0004, "step": 191710 }, { "epoch": 1.2612908955744295, "grad_norm": 0.04231758260553667, "learning_rate": 3.6109969868237994e-06, "loss": 0.0009, "step": 191720 }, { "epoch": 1.2613566837496628, "grad_norm": 0.024432966341491277, "learning_rate": 3.610445482729129e-06, "loss": 0.0008, "step": 191730 }, { "epoch": 1.261422471924896, "grad_norm": 0.0025543035426279824, "learning_rate": 3.609893996954533e-06, "loss": 0.0005, "step": 191740 }, { "epoch": 1.2614882601001296, "grad_norm": 0.014759946375446645, "learning_rate": 3.609342529507285e-06, "loss": 0.002, "step": 191750 }, { "epoch": 1.261554048275363, "grad_norm": 0.09361035533640404, "learning_rate": 3.6087910803946537e-06, "loss": 0.0013, "step": 191760 }, { "epoch": 1.2616198364505964, "grad_norm": 0.00840664570255362, "learning_rate": 3.6082396496239115e-06, "loss": 0.0006, "step": 191770 }, { "epoch": 1.2616856246258297, "grad_norm": 0.0620019173175205, "learning_rate": 3.607688237202327e-06, "loss": 0.0006, "step": 191780 }, { "epoch": 1.2617514128010632, "grad_norm": 0.026909360568194794, "learning_rate": 3.60713684313717e-06, "loss": 0.0009, "step": 191790 }, { "epoch": 1.2618172009762965, "grad_norm": 0.0103138531838569, "learning_rate": 3.6065854674357114e-06, "loss": 0.0007, "step": 191800 }, { "epoch": 1.26188298915153, "grad_norm": 0.01504816734067866, "learning_rate": 3.6060341101052194e-06, "loss": 0.0006, "step": 191810 }, { "epoch": 1.2619487773267632, "grad_norm": 0.046167685034955226, "learning_rate": 3.6054827711529643e-06, "loss": 0.0006, "step": 191820 }, { "epoch": 1.2620145655019965, "grad_norm": 0.058398292535769995, "learning_rate": 3.6049314505862145e-06, "loss": 0.0003, "step": 191830 }, { "epoch": 1.26208035367723, "grad_norm": 0.0588484754182846, "learning_rate": 3.604380148412239e-06, "loss": 0.0012, "step": 191840 }, { "epoch": 1.2621461418524635, "grad_norm": 0.0008893932056601182, "learning_rate": 3.6038288646383056e-06, "loss": 0.0007, "step": 191850 }, { "epoch": 1.2622119300276968, "grad_norm": 0.005602473867120128, "learning_rate": 3.603277599271683e-06, "loss": 0.0007, "step": 191860 }, { "epoch": 1.26227771820293, "grad_norm": 0.009883229393838327, "learning_rate": 3.6027263523196378e-06, "loss": 0.0009, "step": 191870 }, { "epoch": 1.2623435063781636, "grad_norm": 0.01731898962473965, "learning_rate": 3.60217512378944e-06, "loss": 0.0008, "step": 191880 }, { "epoch": 1.262409294553397, "grad_norm": 5.0631513072963384e-05, "learning_rate": 3.601623913688356e-06, "loss": 0.0011, "step": 191890 }, { "epoch": 1.2624750827286304, "grad_norm": 0.04600949583429701, "learning_rate": 3.6010727220236518e-06, "loss": 0.0011, "step": 191900 }, { "epoch": 1.2625408709038637, "grad_norm": 0.030205573104502954, "learning_rate": 3.6005215488025967e-06, "loss": 0.0004, "step": 191910 }, { "epoch": 1.2626066590790972, "grad_norm": 0.01895215658622635, "learning_rate": 3.5999703940324564e-06, "loss": 0.001, "step": 191920 }, { "epoch": 1.2626724472543305, "grad_norm": 0.0007431389436745936, "learning_rate": 3.5994192577204973e-06, "loss": 0.0007, "step": 191930 }, { "epoch": 1.262738235429564, "grad_norm": 0.002359307500125276, "learning_rate": 3.5988681398739844e-06, "loss": 0.0006, "step": 191940 }, { "epoch": 1.2628040236047973, "grad_norm": 0.023453310727815285, "learning_rate": 3.598317040500186e-06, "loss": 0.0004, "step": 191950 }, { "epoch": 1.2628698117800305, "grad_norm": 0.06630379891519589, "learning_rate": 3.5977659596063667e-06, "loss": 0.0009, "step": 191960 }, { "epoch": 1.262935599955264, "grad_norm": 0.02537818032323808, "learning_rate": 3.5972148971997924e-06, "loss": 0.0008, "step": 191970 }, { "epoch": 1.2630013881304976, "grad_norm": 0.018235992724427212, "learning_rate": 3.5966638532877285e-06, "loss": 0.0013, "step": 191980 }, { "epoch": 1.2630671763057308, "grad_norm": 0.025394494721067324, "learning_rate": 3.596112827877439e-06, "loss": 0.0005, "step": 191990 }, { "epoch": 1.2631329644809641, "grad_norm": 0.005461621360288444, "learning_rate": 3.595561820976189e-06, "loss": 0.0013, "step": 192000 }, { "epoch": 1.2631987526561976, "grad_norm": 0.012491153993030085, "learning_rate": 3.5950108325912436e-06, "loss": 0.0007, "step": 192010 }, { "epoch": 1.263264540831431, "grad_norm": 0.0037650652145698443, "learning_rate": 3.594459862729868e-06, "loss": 0.0015, "step": 192020 }, { "epoch": 1.2633303290066644, "grad_norm": 0.00042985039437640747, "learning_rate": 3.593908911399324e-06, "loss": 0.0006, "step": 192030 }, { "epoch": 1.2633961171818977, "grad_norm": 0.03932468366251224, "learning_rate": 3.593357978606878e-06, "loss": 0.0021, "step": 192040 }, { "epoch": 1.263461905357131, "grad_norm": 0.00725760929219793, "learning_rate": 3.592807064359791e-06, "loss": 0.0007, "step": 192050 }, { "epoch": 1.2635276935323645, "grad_norm": 0.0009066271790201817, "learning_rate": 3.5922561686653284e-06, "loss": 0.0004, "step": 192060 }, { "epoch": 1.263593481707598, "grad_norm": 0.055847007761403125, "learning_rate": 3.591705291530751e-06, "loss": 0.0008, "step": 192070 }, { "epoch": 1.2636592698828313, "grad_norm": 0.013529863893693136, "learning_rate": 3.591154432963324e-06, "loss": 0.0007, "step": 192080 }, { "epoch": 1.2637250580580646, "grad_norm": 0.10363775174952287, "learning_rate": 3.590603592970309e-06, "loss": 0.0011, "step": 192090 }, { "epoch": 1.263790846233298, "grad_norm": 0.015619965519381695, "learning_rate": 3.590052771558969e-06, "loss": 0.0005, "step": 192100 }, { "epoch": 1.2638566344085314, "grad_norm": 0.027838068421714917, "learning_rate": 3.589501968736565e-06, "loss": 0.0009, "step": 192110 }, { "epoch": 1.2639224225837649, "grad_norm": 0.005639972673973547, "learning_rate": 3.588951184510359e-06, "loss": 0.0006, "step": 192120 }, { "epoch": 1.2639882107589981, "grad_norm": 0.01111640272818178, "learning_rate": 3.5884004188876124e-06, "loss": 0.0005, "step": 192130 }, { "epoch": 1.2640539989342314, "grad_norm": 0.04614436630164729, "learning_rate": 3.5878496718755886e-06, "loss": 0.0005, "step": 192140 }, { "epoch": 1.264119787109465, "grad_norm": 0.036120550667554145, "learning_rate": 3.5872989434815463e-06, "loss": 0.0004, "step": 192150 }, { "epoch": 1.2641855752846984, "grad_norm": 0.0003259487543927842, "learning_rate": 3.586748233712748e-06, "loss": 0.0003, "step": 192160 }, { "epoch": 1.2642513634599317, "grad_norm": 0.05291565345992261, "learning_rate": 3.586197542576453e-06, "loss": 0.0006, "step": 192170 }, { "epoch": 1.264317151635165, "grad_norm": 0.010208696798366312, "learning_rate": 3.585646870079923e-06, "loss": 0.0006, "step": 192180 }, { "epoch": 1.2643829398103985, "grad_norm": 0.07867420871160667, "learning_rate": 3.5850962162304176e-06, "loss": 0.0005, "step": 192190 }, { "epoch": 1.2644487279856318, "grad_norm": 0.023168694134242173, "learning_rate": 3.5845455810351953e-06, "loss": 0.0004, "step": 192200 }, { "epoch": 1.2645145161608653, "grad_norm": 0.002019120218066766, "learning_rate": 3.5839949645015183e-06, "loss": 0.0006, "step": 192210 }, { "epoch": 1.2645803043360986, "grad_norm": 0.025702056269242365, "learning_rate": 3.583444366636645e-06, "loss": 0.0006, "step": 192220 }, { "epoch": 1.264646092511332, "grad_norm": 0.13447798670558364, "learning_rate": 3.582893787447833e-06, "loss": 0.0014, "step": 192230 }, { "epoch": 1.2647118806865654, "grad_norm": 0.014820727354596967, "learning_rate": 3.5823432269423435e-06, "loss": 0.0007, "step": 192240 }, { "epoch": 1.2647776688617989, "grad_norm": 0.013844424479803432, "learning_rate": 3.581792685127434e-06, "loss": 0.0004, "step": 192250 }, { "epoch": 1.2648434570370322, "grad_norm": 0.030677739952998634, "learning_rate": 3.581242162010362e-06, "loss": 0.0006, "step": 192260 }, { "epoch": 1.2649092452122654, "grad_norm": 0.017268134712540663, "learning_rate": 3.5806916575983885e-06, "loss": 0.0006, "step": 192270 }, { "epoch": 1.264975033387499, "grad_norm": 0.03352372499587991, "learning_rate": 3.580141171898769e-06, "loss": 0.0005, "step": 192280 }, { "epoch": 1.2650408215627322, "grad_norm": 0.041038528079395974, "learning_rate": 3.5795907049187615e-06, "loss": 0.0006, "step": 192290 }, { "epoch": 1.2651066097379657, "grad_norm": 0.010927964059885482, "learning_rate": 3.5790402566656246e-06, "loss": 0.0003, "step": 192300 }, { "epoch": 1.265172397913199, "grad_norm": 0.06298172095662434, "learning_rate": 3.5784898271466144e-06, "loss": 0.0015, "step": 192310 }, { "epoch": 1.2652381860884325, "grad_norm": 0.007947474935646143, "learning_rate": 3.5779394163689886e-06, "loss": 0.0005, "step": 192320 }, { "epoch": 1.2653039742636658, "grad_norm": 0.00232120578353858, "learning_rate": 3.577389024340002e-06, "loss": 0.0007, "step": 192330 }, { "epoch": 1.2653697624388993, "grad_norm": 0.005375386629421427, "learning_rate": 3.576838651066913e-06, "loss": 0.0006, "step": 192340 }, { "epoch": 1.2654355506141326, "grad_norm": 0.03546170621517465, "learning_rate": 3.576288296556978e-06, "loss": 0.0003, "step": 192350 }, { "epoch": 1.2655013387893659, "grad_norm": 0.06291424014197121, "learning_rate": 3.5757379608174516e-06, "loss": 0.0005, "step": 192360 }, { "epoch": 1.2655671269645994, "grad_norm": 0.0191954712225556, "learning_rate": 3.5751876438555904e-06, "loss": 0.0009, "step": 192370 }, { "epoch": 1.265632915139833, "grad_norm": 0.04403144387963964, "learning_rate": 3.57463734567865e-06, "loss": 0.0011, "step": 192380 }, { "epoch": 1.2656987033150662, "grad_norm": 0.07183242786721329, "learning_rate": 3.5740870662938833e-06, "loss": 0.0008, "step": 192390 }, { "epoch": 1.2657644914902995, "grad_norm": 0.07100924272515367, "learning_rate": 3.5735368057085483e-06, "loss": 0.0007, "step": 192400 }, { "epoch": 1.265830279665533, "grad_norm": 0.015955264064883862, "learning_rate": 3.572986563929899e-06, "loss": 0.0008, "step": 192410 }, { "epoch": 1.2658960678407662, "grad_norm": 0.044309309457432666, "learning_rate": 3.57243634096519e-06, "loss": 0.0011, "step": 192420 }, { "epoch": 1.2659618560159998, "grad_norm": 0.006164321291545823, "learning_rate": 3.571886136821674e-06, "loss": 0.0005, "step": 192430 }, { "epoch": 1.266027644191233, "grad_norm": 0.025027703525667327, "learning_rate": 3.571335951506606e-06, "loss": 0.0007, "step": 192440 }, { "epoch": 1.2660934323664663, "grad_norm": 0.02083144162804817, "learning_rate": 3.570785785027239e-06, "loss": 0.0009, "step": 192450 }, { "epoch": 1.2661592205416998, "grad_norm": 0.016733075748972426, "learning_rate": 3.570235637390827e-06, "loss": 0.0009, "step": 192460 }, { "epoch": 1.2662250087169333, "grad_norm": 0.026045120970108356, "learning_rate": 3.569685508604624e-06, "loss": 0.0006, "step": 192470 }, { "epoch": 1.2662907968921666, "grad_norm": 0.028758060845342857, "learning_rate": 3.5691353986758825e-06, "loss": 0.0012, "step": 192480 }, { "epoch": 1.2663565850674, "grad_norm": 0.03509522487135427, "learning_rate": 3.568585307611855e-06, "loss": 0.0008, "step": 192490 }, { "epoch": 1.2664223732426334, "grad_norm": 0.13199454538972774, "learning_rate": 3.5680352354197933e-06, "loss": 0.0006, "step": 192500 }, { "epoch": 1.2664881614178667, "grad_norm": 0.00887573648522044, "learning_rate": 3.567485182106951e-06, "loss": 0.0005, "step": 192510 }, { "epoch": 1.2665539495931002, "grad_norm": 0.12587278762888887, "learning_rate": 3.566935147680578e-06, "loss": 0.001, "step": 192520 }, { "epoch": 1.2666197377683335, "grad_norm": 0.009677284908008366, "learning_rate": 3.566385132147928e-06, "loss": 0.0007, "step": 192530 }, { "epoch": 1.266685525943567, "grad_norm": 0.028432268701575773, "learning_rate": 3.565835135516253e-06, "loss": 0.0018, "step": 192540 }, { "epoch": 1.2667513141188003, "grad_norm": 0.04443327139523745, "learning_rate": 3.5652851577928028e-06, "loss": 0.0005, "step": 192550 }, { "epoch": 1.2668171022940338, "grad_norm": 0.025624982220438727, "learning_rate": 3.5647351989848278e-06, "loss": 0.0009, "step": 192560 }, { "epoch": 1.266882890469267, "grad_norm": 0.005835128419122485, "learning_rate": 3.5641852590995807e-06, "loss": 0.0006, "step": 192570 }, { "epoch": 1.2669486786445003, "grad_norm": 0.08449460976718796, "learning_rate": 3.56363533814431e-06, "loss": 0.0008, "step": 192580 }, { "epoch": 1.2670144668197338, "grad_norm": 0.05481309936356814, "learning_rate": 3.5630854361262667e-06, "loss": 0.001, "step": 192590 }, { "epoch": 1.2670802549949671, "grad_norm": 0.05576291791308469, "learning_rate": 3.562535553052701e-06, "loss": 0.0009, "step": 192600 }, { "epoch": 1.2671460431702006, "grad_norm": 0.06071969448424953, "learning_rate": 3.5619856889308635e-06, "loss": 0.0008, "step": 192610 }, { "epoch": 1.267211831345434, "grad_norm": 0.07797873507241898, "learning_rate": 3.561435843768002e-06, "loss": 0.002, "step": 192620 }, { "epoch": 1.2672776195206674, "grad_norm": 0.030614637364334424, "learning_rate": 3.5608860175713666e-06, "loss": 0.0011, "step": 192630 }, { "epoch": 1.2673434076959007, "grad_norm": 0.0012596704802175585, "learning_rate": 3.5603362103482062e-06, "loss": 0.0003, "step": 192640 }, { "epoch": 1.2674091958711342, "grad_norm": 0.020142177794321845, "learning_rate": 3.559786422105769e-06, "loss": 0.0007, "step": 192650 }, { "epoch": 1.2674749840463675, "grad_norm": 0.021192160472323016, "learning_rate": 3.5592366528513047e-06, "loss": 0.0011, "step": 192660 }, { "epoch": 1.2675407722216008, "grad_norm": 0.025534910914131414, "learning_rate": 3.558686902592061e-06, "loss": 0.0005, "step": 192670 }, { "epoch": 1.2676065603968343, "grad_norm": 0.010296676648242995, "learning_rate": 3.5581371713352854e-06, "loss": 0.0005, "step": 192680 }, { "epoch": 1.2676723485720678, "grad_norm": 0.07793001547104352, "learning_rate": 3.5575874590882252e-06, "loss": 0.0007, "step": 192690 }, { "epoch": 1.267738136747301, "grad_norm": 0.014051344188624774, "learning_rate": 3.55703776585813e-06, "loss": 0.0003, "step": 192700 }, { "epoch": 1.2678039249225344, "grad_norm": 0.010581348311371658, "learning_rate": 3.556488091652245e-06, "loss": 0.0006, "step": 192710 }, { "epoch": 1.2678697130977679, "grad_norm": 0.017100668525166767, "learning_rate": 3.5559384364778166e-06, "loss": 0.0007, "step": 192720 }, { "epoch": 1.2679355012730011, "grad_norm": 0.03248371262284837, "learning_rate": 3.555388800342094e-06, "loss": 0.0008, "step": 192730 }, { "epoch": 1.2680012894482346, "grad_norm": 0.020431381016795546, "learning_rate": 3.554839183252322e-06, "loss": 0.0008, "step": 192740 }, { "epoch": 1.268067077623468, "grad_norm": 0.028745372092079606, "learning_rate": 3.5542895852157476e-06, "loss": 0.0006, "step": 192750 }, { "epoch": 1.2681328657987012, "grad_norm": 0.00019644593345026148, "learning_rate": 3.5537400062396168e-06, "loss": 0.0004, "step": 192760 }, { "epoch": 1.2681986539739347, "grad_norm": 0.009853732690937818, "learning_rate": 3.5531904463311738e-06, "loss": 0.0006, "step": 192770 }, { "epoch": 1.2682644421491682, "grad_norm": 0.0448013072002234, "learning_rate": 3.552640905497665e-06, "loss": 0.0015, "step": 192780 }, { "epoch": 1.2683302303244015, "grad_norm": 0.007593127560244464, "learning_rate": 3.5520913837463373e-06, "loss": 0.0003, "step": 192790 }, { "epoch": 1.2683960184996348, "grad_norm": 0.020291988937094343, "learning_rate": 3.551541881084434e-06, "loss": 0.0006, "step": 192800 }, { "epoch": 1.2684618066748683, "grad_norm": 0.0904022000275484, "learning_rate": 3.5509923975191997e-06, "loss": 0.0009, "step": 192810 }, { "epoch": 1.2685275948501016, "grad_norm": 0.005007091990136682, "learning_rate": 3.5504429330578787e-06, "loss": 0.0006, "step": 192820 }, { "epoch": 1.268593383025335, "grad_norm": 0.027683313457418063, "learning_rate": 3.5498934877077162e-06, "loss": 0.0004, "step": 192830 }, { "epoch": 1.2686591712005684, "grad_norm": 0.04061065375384414, "learning_rate": 3.549344061475956e-06, "loss": 0.0009, "step": 192840 }, { "epoch": 1.2687249593758019, "grad_norm": 0.00775794341592578, "learning_rate": 3.5487946543698405e-06, "loss": 0.001, "step": 192850 }, { "epoch": 1.2687907475510352, "grad_norm": 0.003208337043254465, "learning_rate": 3.5482452663966146e-06, "loss": 0.0012, "step": 192860 }, { "epoch": 1.2688565357262687, "grad_norm": 0.14554293493808176, "learning_rate": 3.547695897563521e-06, "loss": 0.0008, "step": 192870 }, { "epoch": 1.268922323901502, "grad_norm": 0.009785586719241226, "learning_rate": 3.5471465478778034e-06, "loss": 0.0005, "step": 192880 }, { "epoch": 1.2689881120767352, "grad_norm": 0.012559003364592326, "learning_rate": 3.5465972173467033e-06, "loss": 0.0007, "step": 192890 }, { "epoch": 1.2690539002519687, "grad_norm": 0.011396751529944783, "learning_rate": 3.5460479059774634e-06, "loss": 0.0012, "step": 192900 }, { "epoch": 1.269119688427202, "grad_norm": 0.02216363801932496, "learning_rate": 3.5454986137773257e-06, "loss": 0.0005, "step": 192910 }, { "epoch": 1.2691854766024355, "grad_norm": 0.015659713989183307, "learning_rate": 3.5449493407535328e-06, "loss": 0.0005, "step": 192920 }, { "epoch": 1.2692512647776688, "grad_norm": 0.02359201460835472, "learning_rate": 3.5444000869133272e-06, "loss": 0.0014, "step": 192930 }, { "epoch": 1.2693170529529023, "grad_norm": 0.00351442367022732, "learning_rate": 3.543850852263949e-06, "loss": 0.0004, "step": 192940 }, { "epoch": 1.2693828411281356, "grad_norm": 0.017518685029416847, "learning_rate": 3.543301636812639e-06, "loss": 0.0007, "step": 192950 }, { "epoch": 1.269448629303369, "grad_norm": 0.019508892348750317, "learning_rate": 3.54275244056664e-06, "loss": 0.0007, "step": 192960 }, { "epoch": 1.2695144174786024, "grad_norm": 0.02572512862815003, "learning_rate": 3.5422032635331905e-06, "loss": 0.0013, "step": 192970 }, { "epoch": 1.2695802056538357, "grad_norm": 0.13692093780551504, "learning_rate": 3.541654105719532e-06, "loss": 0.0006, "step": 192980 }, { "epoch": 1.2696459938290692, "grad_norm": 0.036774428284546294, "learning_rate": 3.5411049671329043e-06, "loss": 0.001, "step": 192990 }, { "epoch": 1.2697117820043027, "grad_norm": 0.05699586940752347, "learning_rate": 3.5405558477805485e-06, "loss": 0.0011, "step": 193000 }, { "epoch": 1.269777570179536, "grad_norm": 0.010410644022906946, "learning_rate": 3.5400067476697032e-06, "loss": 0.0004, "step": 193010 }, { "epoch": 1.2698433583547692, "grad_norm": 0.025717232458773776, "learning_rate": 3.5394576668076075e-06, "loss": 0.0009, "step": 193020 }, { "epoch": 1.2699091465300028, "grad_norm": 0.003348032915549125, "learning_rate": 3.538908605201502e-06, "loss": 0.0008, "step": 193030 }, { "epoch": 1.269974934705236, "grad_norm": 0.0038048193492493947, "learning_rate": 3.5383595628586233e-06, "loss": 0.0003, "step": 193040 }, { "epoch": 1.2700407228804695, "grad_norm": 0.026055739610354317, "learning_rate": 3.537810539786212e-06, "loss": 0.0006, "step": 193050 }, { "epoch": 1.2701065110557028, "grad_norm": 0.0064860277865563965, "learning_rate": 3.537261535991506e-06, "loss": 0.0005, "step": 193060 }, { "epoch": 1.270172299230936, "grad_norm": 0.0011596445535494242, "learning_rate": 3.5367125514817435e-06, "loss": 0.0005, "step": 193070 }, { "epoch": 1.2702380874061696, "grad_norm": 0.06361459200082073, "learning_rate": 3.5361635862641624e-06, "loss": 0.0006, "step": 193080 }, { "epoch": 1.2703038755814031, "grad_norm": 0.05421161750964715, "learning_rate": 3.535614640346e-06, "loss": 0.0005, "step": 193090 }, { "epoch": 1.2703696637566364, "grad_norm": 0.039174532115739215, "learning_rate": 3.535065713734494e-06, "loss": 0.0006, "step": 193100 }, { "epoch": 1.2704354519318697, "grad_norm": 0.013907317386198005, "learning_rate": 3.53451680643688e-06, "loss": 0.0006, "step": 193110 }, { "epoch": 1.2705012401071032, "grad_norm": 0.007219370436943142, "learning_rate": 3.5339679184603974e-06, "loss": 0.0007, "step": 193120 }, { "epoch": 1.2705670282823365, "grad_norm": 0.027924360413867273, "learning_rate": 3.533419049812282e-06, "loss": 0.0021, "step": 193130 }, { "epoch": 1.27063281645757, "grad_norm": 0.03249157598452677, "learning_rate": 3.5328702004997693e-06, "loss": 0.0013, "step": 193140 }, { "epoch": 1.2706986046328033, "grad_norm": 0.01552057665985389, "learning_rate": 3.5323213705300964e-06, "loss": 0.0006, "step": 193150 }, { "epoch": 1.2707643928080365, "grad_norm": 0.013129092967942731, "learning_rate": 3.531772559910498e-06, "loss": 0.0009, "step": 193160 }, { "epoch": 1.27083018098327, "grad_norm": 0.04075675924275186, "learning_rate": 3.5312237686482097e-06, "loss": 0.0012, "step": 193170 }, { "epoch": 1.2708959691585036, "grad_norm": 0.16385296652478962, "learning_rate": 3.5306749967504685e-06, "loss": 0.0007, "step": 193180 }, { "epoch": 1.2709617573337368, "grad_norm": 0.014871091554368384, "learning_rate": 3.530126244224509e-06, "loss": 0.0002, "step": 193190 }, { "epoch": 1.2710275455089701, "grad_norm": 0.060704265936279256, "learning_rate": 3.529577511077565e-06, "loss": 0.0005, "step": 193200 }, { "epoch": 1.2710933336842036, "grad_norm": 0.12035605283569611, "learning_rate": 3.5290287973168714e-06, "loss": 0.001, "step": 193210 }, { "epoch": 1.271159121859437, "grad_norm": 0.10938909168723303, "learning_rate": 3.528480102949663e-06, "loss": 0.0004, "step": 193220 }, { "epoch": 1.2712249100346704, "grad_norm": 0.12410010117476014, "learning_rate": 3.5279314279831735e-06, "loss": 0.0006, "step": 193230 }, { "epoch": 1.2712906982099037, "grad_norm": 0.03991496468643656, "learning_rate": 3.5273827724246355e-06, "loss": 0.0004, "step": 193240 }, { "epoch": 1.2713564863851372, "grad_norm": 0.22272765063980215, "learning_rate": 3.5268341362812852e-06, "loss": 0.0021, "step": 193250 }, { "epoch": 1.2714222745603705, "grad_norm": 0.1442865915723096, "learning_rate": 3.526285519560354e-06, "loss": 0.001, "step": 193260 }, { "epoch": 1.271488062735604, "grad_norm": 0.024158212785158367, "learning_rate": 3.5257369222690755e-06, "loss": 0.0006, "step": 193270 }, { "epoch": 1.2715538509108373, "grad_norm": 0.01647420812226641, "learning_rate": 3.525188344414683e-06, "loss": 0.0012, "step": 193280 }, { "epoch": 1.2716196390860706, "grad_norm": 0.01420757519717116, "learning_rate": 3.5246397860044078e-06, "loss": 0.0007, "step": 193290 }, { "epoch": 1.271685427261304, "grad_norm": 0.014993240406347341, "learning_rate": 3.5240912470454823e-06, "loss": 0.0009, "step": 193300 }, { "epoch": 1.2717512154365376, "grad_norm": 0.014668429692358608, "learning_rate": 3.5235427275451393e-06, "loss": 0.0002, "step": 193310 }, { "epoch": 1.2718170036117709, "grad_norm": 0.012303546783118444, "learning_rate": 3.522994227510611e-06, "loss": 0.0006, "step": 193320 }, { "epoch": 1.2718827917870041, "grad_norm": 0.0013297175869035726, "learning_rate": 3.5224457469491284e-06, "loss": 0.0005, "step": 193330 }, { "epoch": 1.2719485799622376, "grad_norm": 0.007187866800119842, "learning_rate": 3.5218972858679224e-06, "loss": 0.0009, "step": 193340 }, { "epoch": 1.272014368137471, "grad_norm": 0.008862574359679536, "learning_rate": 3.5213488442742237e-06, "loss": 0.0007, "step": 193350 }, { "epoch": 1.2720801563127044, "grad_norm": 0.005199114306051625, "learning_rate": 3.5208004221752633e-06, "loss": 0.0004, "step": 193360 }, { "epoch": 1.2721459444879377, "grad_norm": 0.1023103337167873, "learning_rate": 3.520252019578271e-06, "loss": 0.0013, "step": 193370 }, { "epoch": 1.272211732663171, "grad_norm": 0.0016785750225912892, "learning_rate": 3.519703636490479e-06, "loss": 0.0007, "step": 193380 }, { "epoch": 1.2722775208384045, "grad_norm": 0.02589861444658201, "learning_rate": 3.5191552729191164e-06, "loss": 0.0012, "step": 193390 }, { "epoch": 1.272343309013638, "grad_norm": 0.06413906217358414, "learning_rate": 3.518606928871412e-06, "loss": 0.0006, "step": 193400 }, { "epoch": 1.2724090971888713, "grad_norm": 0.01785558438231929, "learning_rate": 3.518058604354596e-06, "loss": 0.001, "step": 193410 }, { "epoch": 1.2724748853641046, "grad_norm": 0.01344487033094875, "learning_rate": 3.517510299375898e-06, "loss": 0.0005, "step": 193420 }, { "epoch": 1.272540673539338, "grad_norm": 0.07000701383350649, "learning_rate": 3.516962013942544e-06, "loss": 0.001, "step": 193430 }, { "epoch": 1.2726064617145714, "grad_norm": 0.10704834715100274, "learning_rate": 3.5164137480617676e-06, "loss": 0.0013, "step": 193440 }, { "epoch": 1.2726722498898049, "grad_norm": 0.03977841714812123, "learning_rate": 3.515865501740794e-06, "loss": 0.0005, "step": 193450 }, { "epoch": 1.2727380380650382, "grad_norm": 0.06757423491387574, "learning_rate": 3.5153172749868512e-06, "loss": 0.0008, "step": 193460 }, { "epoch": 1.2728038262402714, "grad_norm": 0.018469784705567042, "learning_rate": 3.514769067807169e-06, "loss": 0.0003, "step": 193470 }, { "epoch": 1.272869614415505, "grad_norm": 0.0022653311644884088, "learning_rate": 3.514220880208973e-06, "loss": 0.0015, "step": 193480 }, { "epoch": 1.2729354025907385, "grad_norm": 0.04560062199647471, "learning_rate": 3.513672712199492e-06, "loss": 0.0005, "step": 193490 }, { "epoch": 1.2730011907659717, "grad_norm": 0.019726092995907725, "learning_rate": 3.5131245637859514e-06, "loss": 0.001, "step": 193500 }, { "epoch": 1.273066978941205, "grad_norm": 0.024064142483777348, "learning_rate": 3.5125764349755807e-06, "loss": 0.0004, "step": 193510 }, { "epoch": 1.2731327671164385, "grad_norm": 0.1309981135205644, "learning_rate": 3.512028325775605e-06, "loss": 0.0006, "step": 193520 }, { "epoch": 1.2731985552916718, "grad_norm": 0.015092172799940374, "learning_rate": 3.5114802361932498e-06, "loss": 0.0004, "step": 193530 }, { "epoch": 1.2732643434669053, "grad_norm": 0.0035873810200140895, "learning_rate": 3.5109321662357434e-06, "loss": 0.0003, "step": 193540 }, { "epoch": 1.2733301316421386, "grad_norm": 0.07332825782815496, "learning_rate": 3.5103841159103092e-06, "loss": 0.0014, "step": 193550 }, { "epoch": 1.273395919817372, "grad_norm": 0.020502018012695546, "learning_rate": 3.509836085224174e-06, "loss": 0.0005, "step": 193560 }, { "epoch": 1.2734617079926054, "grad_norm": 0.004094170682227388, "learning_rate": 3.509288074184563e-06, "loss": 0.0007, "step": 193570 }, { "epoch": 1.273527496167839, "grad_norm": 0.010110209786525963, "learning_rate": 3.5087400827987016e-06, "loss": 0.0013, "step": 193580 }, { "epoch": 1.2735932843430722, "grad_norm": 0.04035361330094759, "learning_rate": 3.5081921110738147e-06, "loss": 0.0004, "step": 193590 }, { "epoch": 1.2736590725183055, "grad_norm": 0.037901248597798524, "learning_rate": 3.507644159017126e-06, "loss": 0.0005, "step": 193600 }, { "epoch": 1.273724860693539, "grad_norm": 0.09634750381162424, "learning_rate": 3.5070962266358604e-06, "loss": 0.0007, "step": 193610 }, { "epoch": 1.2737906488687722, "grad_norm": 0.03414667746902434, "learning_rate": 3.5065483139372424e-06, "loss": 0.0012, "step": 193620 }, { "epoch": 1.2738564370440058, "grad_norm": 0.013403467035751048, "learning_rate": 3.506000420928495e-06, "loss": 0.0005, "step": 193630 }, { "epoch": 1.273922225219239, "grad_norm": 0.011680072681561595, "learning_rate": 3.5054525476168412e-06, "loss": 0.0006, "step": 193640 }, { "epoch": 1.2739880133944725, "grad_norm": 0.025802798208851657, "learning_rate": 3.5049046940095045e-06, "loss": 0.0006, "step": 193650 }, { "epoch": 1.2740538015697058, "grad_norm": 0.007024206505457818, "learning_rate": 3.5043568601137066e-06, "loss": 0.001, "step": 193660 }, { "epoch": 1.2741195897449393, "grad_norm": 0.03488677068913858, "learning_rate": 3.5038090459366735e-06, "loss": 0.0004, "step": 193670 }, { "epoch": 1.2741853779201726, "grad_norm": 0.0185760188150051, "learning_rate": 3.503261251485626e-06, "loss": 0.001, "step": 193680 }, { "epoch": 1.274251166095406, "grad_norm": 0.08351253016373932, "learning_rate": 3.5027134767677864e-06, "loss": 0.0004, "step": 193690 }, { "epoch": 1.2743169542706394, "grad_norm": 0.14571214346450295, "learning_rate": 3.502165721790376e-06, "loss": 0.0015, "step": 193700 }, { "epoch": 1.274382742445873, "grad_norm": 0.05173928243512353, "learning_rate": 3.501617986560617e-06, "loss": 0.0009, "step": 193710 }, { "epoch": 1.2744485306211062, "grad_norm": 0.02593603029855423, "learning_rate": 3.5010702710857312e-06, "loss": 0.0006, "step": 193720 }, { "epoch": 1.2745143187963395, "grad_norm": 0.012922531246177166, "learning_rate": 3.5005225753729376e-06, "loss": 0.0013, "step": 193730 }, { "epoch": 1.274580106971573, "grad_norm": 0.014181139239262917, "learning_rate": 3.49997489942946e-06, "loss": 0.0007, "step": 193740 }, { "epoch": 1.2746458951468063, "grad_norm": 0.033512983953989495, "learning_rate": 3.4994272432625185e-06, "loss": 0.0004, "step": 193750 }, { "epoch": 1.2747116833220398, "grad_norm": 0.0008726647491822178, "learning_rate": 3.4988796068793324e-06, "loss": 0.0005, "step": 193760 }, { "epoch": 1.274777471497273, "grad_norm": 0.06585427934348415, "learning_rate": 3.498331990287123e-06, "loss": 0.0008, "step": 193770 }, { "epoch": 1.2748432596725063, "grad_norm": 0.03717875135039386, "learning_rate": 3.4977843934931077e-06, "loss": 0.0004, "step": 193780 }, { "epoch": 1.2749090478477398, "grad_norm": 0.0437162564593084, "learning_rate": 3.497236816504508e-06, "loss": 0.0001, "step": 193790 }, { "epoch": 1.2749748360229733, "grad_norm": 0.052170111210830296, "learning_rate": 3.4966892593285433e-06, "loss": 0.0006, "step": 193800 }, { "epoch": 1.2750406241982066, "grad_norm": 0.030114494757854476, "learning_rate": 3.4961417219724327e-06, "loss": 0.0007, "step": 193810 }, { "epoch": 1.27510641237344, "grad_norm": 0.028416493315628472, "learning_rate": 3.495594204443395e-06, "loss": 0.0005, "step": 193820 }, { "epoch": 1.2751722005486734, "grad_norm": 0.03969737789592645, "learning_rate": 3.495046706748648e-06, "loss": 0.0007, "step": 193830 }, { "epoch": 1.2752379887239067, "grad_norm": 0.19529514666955228, "learning_rate": 3.4944992288954103e-06, "loss": 0.0008, "step": 193840 }, { "epoch": 1.2753037768991402, "grad_norm": 0.04732068219724761, "learning_rate": 3.493951770890901e-06, "loss": 0.0007, "step": 193850 }, { "epoch": 1.2753695650743735, "grad_norm": 0.14178314640245632, "learning_rate": 3.493404332742335e-06, "loss": 0.0009, "step": 193860 }, { "epoch": 1.275435353249607, "grad_norm": 0.04738698408600134, "learning_rate": 3.4928569144569326e-06, "loss": 0.0003, "step": 193870 }, { "epoch": 1.2755011414248403, "grad_norm": 0.05807442219644717, "learning_rate": 3.4923095160419106e-06, "loss": 0.0008, "step": 193880 }, { "epoch": 1.2755669296000738, "grad_norm": 0.025106270635330444, "learning_rate": 3.491762137504485e-06, "loss": 0.0006, "step": 193890 }, { "epoch": 1.275632717775307, "grad_norm": 0.06866452813171064, "learning_rate": 3.4912147788518735e-06, "loss": 0.0005, "step": 193900 }, { "epoch": 1.2756985059505404, "grad_norm": 0.09910632607703138, "learning_rate": 3.4906674400912923e-06, "loss": 0.0014, "step": 193910 }, { "epoch": 1.2757642941257739, "grad_norm": 0.020418000287351266, "learning_rate": 3.4901201212299553e-06, "loss": 0.0009, "step": 193920 }, { "epoch": 1.2758300823010071, "grad_norm": 0.11244763617532606, "learning_rate": 3.4895728222750823e-06, "loss": 0.0011, "step": 193930 }, { "epoch": 1.2758958704762406, "grad_norm": 0.16224768144079033, "learning_rate": 3.489025543233887e-06, "loss": 0.0014, "step": 193940 }, { "epoch": 1.275961658651474, "grad_norm": 0.2732296825265933, "learning_rate": 3.4884782841135843e-06, "loss": 0.0003, "step": 193950 }, { "epoch": 1.2760274468267074, "grad_norm": 0.0011138706800202763, "learning_rate": 3.487931044921391e-06, "loss": 0.0007, "step": 193960 }, { "epoch": 1.2760932350019407, "grad_norm": 0.01974069039094281, "learning_rate": 3.487383825664521e-06, "loss": 0.0012, "step": 193970 }, { "epoch": 1.2761590231771742, "grad_norm": 0.02291445370902088, "learning_rate": 3.4868366263501883e-06, "loss": 0.0006, "step": 193980 }, { "epoch": 1.2762248113524075, "grad_norm": 0.12440451002905893, "learning_rate": 3.4862894469856067e-06, "loss": 0.0007, "step": 193990 }, { "epoch": 1.2762905995276408, "grad_norm": 0.00025704302347910536, "learning_rate": 3.485742287577993e-06, "loss": 0.0005, "step": 194000 }, { "epoch": 1.2763563877028743, "grad_norm": 0.023403066871169335, "learning_rate": 3.485195148134559e-06, "loss": 0.0007, "step": 194010 }, { "epoch": 1.2764221758781078, "grad_norm": 0.06382308821079666, "learning_rate": 3.484648028662519e-06, "loss": 0.0006, "step": 194020 }, { "epoch": 1.276487964053341, "grad_norm": 0.05173847087398698, "learning_rate": 3.4841009291690853e-06, "loss": 0.0007, "step": 194030 }, { "epoch": 1.2765537522285744, "grad_norm": 0.01133845983871441, "learning_rate": 3.4835538496614728e-06, "loss": 0.0015, "step": 194040 }, { "epoch": 1.2766195404038079, "grad_norm": 0.01126213878640788, "learning_rate": 3.483006790146891e-06, "loss": 0.0011, "step": 194050 }, { "epoch": 1.2766853285790412, "grad_norm": 0.009171170991629432, "learning_rate": 3.482459750632556e-06, "loss": 0.0006, "step": 194060 }, { "epoch": 1.2767511167542747, "grad_norm": 0.015143630301071591, "learning_rate": 3.4819127311256783e-06, "loss": 0.0009, "step": 194070 }, { "epoch": 1.276816904929508, "grad_norm": 0.03244402759280495, "learning_rate": 3.4813657316334702e-06, "loss": 0.0005, "step": 194080 }, { "epoch": 1.2768826931047412, "grad_norm": 0.014995394131371638, "learning_rate": 3.480818752163144e-06, "loss": 0.0006, "step": 194090 }, { "epoch": 1.2769484812799747, "grad_norm": 0.010818437162058226, "learning_rate": 3.4802717927219098e-06, "loss": 0.0007, "step": 194100 }, { "epoch": 1.2770142694552082, "grad_norm": 0.006795092963714021, "learning_rate": 3.4797248533169802e-06, "loss": 0.0004, "step": 194110 }, { "epoch": 1.2770800576304415, "grad_norm": 0.003909236256715874, "learning_rate": 3.4791779339555637e-06, "loss": 0.001, "step": 194120 }, { "epoch": 1.2771458458056748, "grad_norm": 0.011927731269855985, "learning_rate": 3.478631034644874e-06, "loss": 0.0006, "step": 194130 }, { "epoch": 1.2772116339809083, "grad_norm": 0.04468958824707375, "learning_rate": 3.4780841553921195e-06, "loss": 0.0005, "step": 194140 }, { "epoch": 1.2772774221561416, "grad_norm": 0.07885031490980779, "learning_rate": 3.477537296204512e-06, "loss": 0.0004, "step": 194150 }, { "epoch": 1.277343210331375, "grad_norm": 0.09013057823710062, "learning_rate": 3.47699045708926e-06, "loss": 0.0006, "step": 194160 }, { "epoch": 1.2774089985066084, "grad_norm": 0.015036714027460901, "learning_rate": 3.476443638053574e-06, "loss": 0.0016, "step": 194170 }, { "epoch": 1.2774747866818417, "grad_norm": 0.015641534767829767, "learning_rate": 3.4758968391046616e-06, "loss": 0.0003, "step": 194180 }, { "epoch": 1.2775405748570752, "grad_norm": 0.032565067035976294, "learning_rate": 3.4753500602497327e-06, "loss": 0.0005, "step": 194190 }, { "epoch": 1.2776063630323087, "grad_norm": 0.03956921201219648, "learning_rate": 3.4748033014959977e-06, "loss": 0.0008, "step": 194200 }, { "epoch": 1.277672151207542, "grad_norm": 0.10118276179576521, "learning_rate": 3.474256562850663e-06, "loss": 0.0006, "step": 194210 }, { "epoch": 1.2777379393827752, "grad_norm": 0.021968449917897682, "learning_rate": 3.4737098443209387e-06, "loss": 0.0005, "step": 194220 }, { "epoch": 1.2778037275580088, "grad_norm": 0.038810761536218014, "learning_rate": 3.473163145914032e-06, "loss": 0.0004, "step": 194230 }, { "epoch": 1.277869515733242, "grad_norm": 0.0033481504381941327, "learning_rate": 3.4726164676371497e-06, "loss": 0.0006, "step": 194240 }, { "epoch": 1.2779353039084755, "grad_norm": 0.006966792901059554, "learning_rate": 3.4720698094975004e-06, "loss": 0.0004, "step": 194250 }, { "epoch": 1.2780010920837088, "grad_norm": 0.0549370334954379, "learning_rate": 3.4715231715022914e-06, "loss": 0.0012, "step": 194260 }, { "epoch": 1.2780668802589423, "grad_norm": 0.023382042131960164, "learning_rate": 3.4709765536587293e-06, "loss": 0.0005, "step": 194270 }, { "epoch": 1.2781326684341756, "grad_norm": 0.13371586107009473, "learning_rate": 3.4704299559740207e-06, "loss": 0.0011, "step": 194280 }, { "epoch": 1.2781984566094091, "grad_norm": 0.05338020131308254, "learning_rate": 3.469883378455372e-06, "loss": 0.0009, "step": 194290 }, { "epoch": 1.2782642447846424, "grad_norm": 0.005939689260746949, "learning_rate": 3.4693368211099894e-06, "loss": 0.0011, "step": 194300 }, { "epoch": 1.2783300329598757, "grad_norm": 0.03536170620043755, "learning_rate": 3.4687902839450793e-06, "loss": 0.0014, "step": 194310 }, { "epoch": 1.2783958211351092, "grad_norm": 0.07153363169700865, "learning_rate": 3.4682437669678455e-06, "loss": 0.001, "step": 194320 }, { "epoch": 1.2784616093103427, "grad_norm": 0.000327873951760056, "learning_rate": 3.467697270185496e-06, "loss": 0.0006, "step": 194330 }, { "epoch": 1.278527397485576, "grad_norm": 0.01605580664899426, "learning_rate": 3.467150793605234e-06, "loss": 0.0004, "step": 194340 }, { "epoch": 1.2785931856608093, "grad_norm": 0.01975326995188151, "learning_rate": 3.4666043372342655e-06, "loss": 0.0005, "step": 194350 }, { "epoch": 1.2786589738360428, "grad_norm": 0.08153525488945787, "learning_rate": 3.4660579010797945e-06, "loss": 0.0008, "step": 194360 }, { "epoch": 1.278724762011276, "grad_norm": 0.0008874382030043539, "learning_rate": 3.465511485149025e-06, "loss": 0.0009, "step": 194370 }, { "epoch": 1.2787905501865096, "grad_norm": 0.04662349952389306, "learning_rate": 3.4649650894491606e-06, "loss": 0.0006, "step": 194380 }, { "epoch": 1.2788563383617428, "grad_norm": 0.01460950504275791, "learning_rate": 3.4644187139874065e-06, "loss": 0.0005, "step": 194390 }, { "epoch": 1.2789221265369761, "grad_norm": 0.014892194036640449, "learning_rate": 3.4638723587709655e-06, "loss": 0.0003, "step": 194400 }, { "epoch": 1.2789879147122096, "grad_norm": 0.011191422005772635, "learning_rate": 3.463326023807041e-06, "loss": 0.0004, "step": 194410 }, { "epoch": 1.2790537028874431, "grad_norm": 0.08108861201277479, "learning_rate": 3.4627797091028354e-06, "loss": 0.0006, "step": 194420 }, { "epoch": 1.2791194910626764, "grad_norm": 0.08155362023568631, "learning_rate": 3.462233414665552e-06, "loss": 0.0005, "step": 194430 }, { "epoch": 1.2791852792379097, "grad_norm": 0.0007485632234290684, "learning_rate": 3.4616871405023924e-06, "loss": 0.0005, "step": 194440 }, { "epoch": 1.2792510674131432, "grad_norm": 0.14948861636162822, "learning_rate": 3.461140886620559e-06, "loss": 0.0003, "step": 194450 }, { "epoch": 1.2793168555883765, "grad_norm": 0.08393505253243175, "learning_rate": 3.4605946530272536e-06, "loss": 0.0014, "step": 194460 }, { "epoch": 1.27938264376361, "grad_norm": 0.05192099768031557, "learning_rate": 3.4600484397296795e-06, "loss": 0.0007, "step": 194470 }, { "epoch": 1.2794484319388433, "grad_norm": 0.0055120662211431875, "learning_rate": 3.459502246735037e-06, "loss": 0.0009, "step": 194480 }, { "epoch": 1.2795142201140766, "grad_norm": 0.008014629272468649, "learning_rate": 3.458956074050526e-06, "loss": 0.0003, "step": 194490 }, { "epoch": 1.27958000828931, "grad_norm": 0.111359894597523, "learning_rate": 3.458409921683349e-06, "loss": 0.0006, "step": 194500 }, { "epoch": 1.2796457964645436, "grad_norm": 0.014185761209919132, "learning_rate": 3.457863789640704e-06, "loss": 0.0009, "step": 194510 }, { "epoch": 1.2797115846397769, "grad_norm": 0.007651581295916183, "learning_rate": 3.4573176779297944e-06, "loss": 0.0008, "step": 194520 }, { "epoch": 1.2797773728150101, "grad_norm": 0.0557658617947288, "learning_rate": 3.4567715865578188e-06, "loss": 0.0009, "step": 194530 }, { "epoch": 1.2798431609902436, "grad_norm": 0.013829667580174098, "learning_rate": 3.456225515531977e-06, "loss": 0.0009, "step": 194540 }, { "epoch": 1.279908949165477, "grad_norm": 0.03041375852868102, "learning_rate": 3.455679464859468e-06, "loss": 0.0009, "step": 194550 }, { "epoch": 1.2799747373407104, "grad_norm": 0.024124911692925553, "learning_rate": 3.455133434547493e-06, "loss": 0.001, "step": 194560 }, { "epoch": 1.2800405255159437, "grad_norm": 0.009742914571505066, "learning_rate": 3.4545874246032484e-06, "loss": 0.0005, "step": 194570 }, { "epoch": 1.2801063136911772, "grad_norm": 0.041391027540673864, "learning_rate": 3.4540414350339325e-06, "loss": 0.0009, "step": 194580 }, { "epoch": 1.2801721018664105, "grad_norm": 0.02001613681124962, "learning_rate": 3.4534954658467467e-06, "loss": 0.0005, "step": 194590 }, { "epoch": 1.280237890041644, "grad_norm": 0.036346899946901906, "learning_rate": 3.4529495170488873e-06, "loss": 0.0003, "step": 194600 }, { "epoch": 1.2803036782168773, "grad_norm": 0.02561388908120887, "learning_rate": 3.4524035886475525e-06, "loss": 0.0008, "step": 194610 }, { "epoch": 1.2803694663921106, "grad_norm": 0.031378214875257306, "learning_rate": 3.45185768064994e-06, "loss": 0.0006, "step": 194620 }, { "epoch": 1.280435254567344, "grad_norm": 0.04573474646646263, "learning_rate": 3.451311793063247e-06, "loss": 0.0007, "step": 194630 }, { "epoch": 1.2805010427425774, "grad_norm": 0.17418458086218822, "learning_rate": 3.4507659258946693e-06, "loss": 0.0017, "step": 194640 }, { "epoch": 1.2805668309178109, "grad_norm": 0.015450814575742707, "learning_rate": 3.450220079151405e-06, "loss": 0.0012, "step": 194650 }, { "epoch": 1.2806326190930442, "grad_norm": 0.035631288390850696, "learning_rate": 3.4496742528406514e-06, "loss": 0.0004, "step": 194660 }, { "epoch": 1.2806984072682777, "grad_norm": 0.012646111737105456, "learning_rate": 3.449128446969604e-06, "loss": 0.0003, "step": 194670 }, { "epoch": 1.280764195443511, "grad_norm": 0.0920636169856709, "learning_rate": 3.4485826615454583e-06, "loss": 0.0012, "step": 194680 }, { "epoch": 1.2808299836187444, "grad_norm": 0.07333784018270208, "learning_rate": 3.44803689657541e-06, "loss": 0.0011, "step": 194690 }, { "epoch": 1.2808957717939777, "grad_norm": 0.04272601092011013, "learning_rate": 3.4474911520666553e-06, "loss": 0.0009, "step": 194700 }, { "epoch": 1.280961559969211, "grad_norm": 0.055278653947691506, "learning_rate": 3.446945428026388e-06, "loss": 0.0011, "step": 194710 }, { "epoch": 1.2810273481444445, "grad_norm": 0.022566122477843148, "learning_rate": 3.446399724461804e-06, "loss": 0.0011, "step": 194720 }, { "epoch": 1.281093136319678, "grad_norm": 0.018424681378841495, "learning_rate": 3.4458540413800988e-06, "loss": 0.0012, "step": 194730 }, { "epoch": 1.2811589244949113, "grad_norm": 0.0025839846362463548, "learning_rate": 3.4453083787884657e-06, "loss": 0.0007, "step": 194740 }, { "epoch": 1.2812247126701446, "grad_norm": 0.013510657509393767, "learning_rate": 3.444762736694099e-06, "loss": 0.0022, "step": 194750 }, { "epoch": 1.281290500845378, "grad_norm": 0.027697861854855146, "learning_rate": 3.444217115104192e-06, "loss": 0.0026, "step": 194760 }, { "epoch": 1.2813562890206114, "grad_norm": 0.003779110297535491, "learning_rate": 3.4436715140259382e-06, "loss": 0.0005, "step": 194770 }, { "epoch": 1.2814220771958449, "grad_norm": 0.05853134753337224, "learning_rate": 3.4431259334665314e-06, "loss": 0.0011, "step": 194780 }, { "epoch": 1.2814878653710782, "grad_norm": 0.012756179066768864, "learning_rate": 3.442580373433165e-06, "loss": 0.0004, "step": 194790 }, { "epoch": 1.2815536535463115, "grad_norm": 0.10728878006389811, "learning_rate": 3.4420348339330325e-06, "loss": 0.0004, "step": 194800 }, { "epoch": 1.281619441721545, "grad_norm": 0.015117387430185317, "learning_rate": 3.4414893149733234e-06, "loss": 0.0005, "step": 194810 }, { "epoch": 1.2816852298967785, "grad_norm": 0.04800220742457845, "learning_rate": 3.440943816561233e-06, "loss": 0.0009, "step": 194820 }, { "epoch": 1.2817510180720117, "grad_norm": 0.01902994674480373, "learning_rate": 3.4403983387039507e-06, "loss": 0.0013, "step": 194830 }, { "epoch": 1.281816806247245, "grad_norm": 0.06596671972703398, "learning_rate": 3.439852881408669e-06, "loss": 0.0004, "step": 194840 }, { "epoch": 1.2818825944224785, "grad_norm": 0.005802604131568281, "learning_rate": 3.439307444682581e-06, "loss": 0.0006, "step": 194850 }, { "epoch": 1.2819483825977118, "grad_norm": 0.012517540970850205, "learning_rate": 3.438762028532876e-06, "loss": 0.0007, "step": 194860 }, { "epoch": 1.2820141707729453, "grad_norm": 0.007496111305056427, "learning_rate": 3.4382166329667455e-06, "loss": 0.0004, "step": 194870 }, { "epoch": 1.2820799589481786, "grad_norm": 0.0251874964131389, "learning_rate": 3.4376712579913796e-06, "loss": 0.0007, "step": 194880 }, { "epoch": 1.2821457471234121, "grad_norm": 0.025937389283384578, "learning_rate": 3.437125903613968e-06, "loss": 0.0008, "step": 194890 }, { "epoch": 1.2822115352986454, "grad_norm": 0.0014097156866962296, "learning_rate": 3.436580569841702e-06, "loss": 0.0009, "step": 194900 }, { "epoch": 1.282277323473879, "grad_norm": 0.02325602880356893, "learning_rate": 3.436035256681771e-06, "loss": 0.0008, "step": 194910 }, { "epoch": 1.2823431116491122, "grad_norm": 0.009403789967868767, "learning_rate": 3.435489964141365e-06, "loss": 0.0017, "step": 194920 }, { "epoch": 1.2824088998243455, "grad_norm": 0.054244575557136555, "learning_rate": 3.434944692227673e-06, "loss": 0.0016, "step": 194930 }, { "epoch": 1.282474687999579, "grad_norm": 0.047201181082511916, "learning_rate": 3.4343994409478826e-06, "loss": 0.0004, "step": 194940 }, { "epoch": 1.2825404761748123, "grad_norm": 0.04256073346736868, "learning_rate": 3.4338542103091844e-06, "loss": 0.0006, "step": 194950 }, { "epoch": 1.2826062643500458, "grad_norm": 0.03428044948525186, "learning_rate": 3.4333090003187646e-06, "loss": 0.0003, "step": 194960 }, { "epoch": 1.282672052525279, "grad_norm": 0.02192165336831744, "learning_rate": 3.4327638109838125e-06, "loss": 0.0004, "step": 194970 }, { "epoch": 1.2827378407005126, "grad_norm": 0.029125246192357016, "learning_rate": 3.4322186423115167e-06, "loss": 0.0009, "step": 194980 }, { "epoch": 1.2828036288757458, "grad_norm": 0.021201783983031755, "learning_rate": 3.431673494309064e-06, "loss": 0.0006, "step": 194990 }, { "epoch": 1.2828694170509793, "grad_norm": 0.021961186185099835, "learning_rate": 3.4311283669836414e-06, "loss": 0.0005, "step": 195000 }, { "epoch": 1.2829352052262126, "grad_norm": 0.037188884410249166, "learning_rate": 3.4305832603424373e-06, "loss": 0.0003, "step": 195010 }, { "epoch": 1.283000993401446, "grad_norm": 0.034219945005657955, "learning_rate": 3.430038174392637e-06, "loss": 0.0005, "step": 195020 }, { "epoch": 1.2830667815766794, "grad_norm": 0.0009349245361992218, "learning_rate": 3.4294931091414264e-06, "loss": 0.0005, "step": 195030 }, { "epoch": 1.283132569751913, "grad_norm": 0.0003022209615227029, "learning_rate": 3.4289480645959937e-06, "loss": 0.0006, "step": 195040 }, { "epoch": 1.2831983579271462, "grad_norm": 0.002493646940391549, "learning_rate": 3.4284030407635236e-06, "loss": 0.0003, "step": 195050 }, { "epoch": 1.2832641461023795, "grad_norm": 0.010565225588773377, "learning_rate": 3.4278580376512027e-06, "loss": 0.0009, "step": 195060 }, { "epoch": 1.283329934277613, "grad_norm": 0.02647521134928014, "learning_rate": 3.427313055266216e-06, "loss": 0.0008, "step": 195070 }, { "epoch": 1.2833957224528463, "grad_norm": 0.0020071639420348388, "learning_rate": 3.4267680936157478e-06, "loss": 0.0009, "step": 195080 }, { "epoch": 1.2834615106280798, "grad_norm": 0.0010808425840854472, "learning_rate": 3.4262231527069833e-06, "loss": 0.001, "step": 195090 }, { "epoch": 1.283527298803313, "grad_norm": 0.05400935251508281, "learning_rate": 3.4256782325471074e-06, "loss": 0.0006, "step": 195100 }, { "epoch": 1.2835930869785463, "grad_norm": 0.03928894955163576, "learning_rate": 3.4251333331433046e-06, "loss": 0.0012, "step": 195110 }, { "epoch": 1.2836588751537799, "grad_norm": 0.03016770045023334, "learning_rate": 3.4245884545027586e-06, "loss": 0.0007, "step": 195120 }, { "epoch": 1.2837246633290134, "grad_norm": 0.025787250337045015, "learning_rate": 3.4240435966326535e-06, "loss": 0.0004, "step": 195130 }, { "epoch": 1.2837904515042466, "grad_norm": 0.011906380509143498, "learning_rate": 3.4234987595401726e-06, "loss": 0.0012, "step": 195140 }, { "epoch": 1.28385623967948, "grad_norm": 0.012691986701527937, "learning_rate": 3.4229539432324987e-06, "loss": 0.0006, "step": 195150 }, { "epoch": 1.2839220278547134, "grad_norm": 0.01320879287223223, "learning_rate": 3.422409147716814e-06, "loss": 0.0007, "step": 195160 }, { "epoch": 1.2839878160299467, "grad_norm": 0.07383684004281772, "learning_rate": 3.4218643730003033e-06, "loss": 0.0008, "step": 195170 }, { "epoch": 1.2840536042051802, "grad_norm": 0.04933044243075846, "learning_rate": 3.4213196190901475e-06, "loss": 0.0004, "step": 195180 }, { "epoch": 1.2841193923804135, "grad_norm": 0.04872973477348565, "learning_rate": 3.4207748859935296e-06, "loss": 0.0005, "step": 195190 }, { "epoch": 1.284185180555647, "grad_norm": 0.001131732308683055, "learning_rate": 3.420230173717631e-06, "loss": 0.0006, "step": 195200 }, { "epoch": 1.2842509687308803, "grad_norm": 0.04003695397239364, "learning_rate": 3.4196854822696323e-06, "loss": 0.0005, "step": 195210 }, { "epoch": 1.2843167569061138, "grad_norm": 0.057521170583536396, "learning_rate": 3.4191408116567173e-06, "loss": 0.0004, "step": 195220 }, { "epoch": 1.284382545081347, "grad_norm": 0.023966641273052675, "learning_rate": 3.418596161886063e-06, "loss": 0.0008, "step": 195230 }, { "epoch": 1.2844483332565804, "grad_norm": 0.0401977781642534, "learning_rate": 3.4180515329648534e-06, "loss": 0.0006, "step": 195240 }, { "epoch": 1.2845141214318139, "grad_norm": 0.05204960203389674, "learning_rate": 3.417506924900269e-06, "loss": 0.0004, "step": 195250 }, { "epoch": 1.2845799096070472, "grad_norm": 0.0005833431822280252, "learning_rate": 3.416962337699489e-06, "loss": 0.0006, "step": 195260 }, { "epoch": 1.2846456977822807, "grad_norm": 0.001268899876002665, "learning_rate": 3.4164177713696924e-06, "loss": 0.0003, "step": 195270 }, { "epoch": 1.284711485957514, "grad_norm": 0.07720257717631963, "learning_rate": 3.4158732259180604e-06, "loss": 0.0005, "step": 195280 }, { "epoch": 1.2847772741327474, "grad_norm": 0.0038833028469647817, "learning_rate": 3.415328701351771e-06, "loss": 0.0005, "step": 195290 }, { "epoch": 1.2848430623079807, "grad_norm": 0.06550561447894117, "learning_rate": 3.414784197678005e-06, "loss": 0.0003, "step": 195300 }, { "epoch": 1.2849088504832142, "grad_norm": 0.028902405629112768, "learning_rate": 3.4142397149039397e-06, "loss": 0.0009, "step": 195310 }, { "epoch": 1.2849746386584475, "grad_norm": 0.028493432669430153, "learning_rate": 3.413695253036755e-06, "loss": 0.0007, "step": 195320 }, { "epoch": 1.2850404268336808, "grad_norm": 0.026126429550000857, "learning_rate": 3.4131508120836276e-06, "loss": 0.0012, "step": 195330 }, { "epoch": 1.2851062150089143, "grad_norm": 0.00487191578139599, "learning_rate": 3.4126063920517372e-06, "loss": 0.0006, "step": 195340 }, { "epoch": 1.2851720031841478, "grad_norm": 0.04887448726420509, "learning_rate": 3.4120619929482603e-06, "loss": 0.0004, "step": 195350 }, { "epoch": 1.285237791359381, "grad_norm": 0.06260462383586045, "learning_rate": 3.411517614780373e-06, "loss": 0.0005, "step": 195360 }, { "epoch": 1.2853035795346144, "grad_norm": 0.008245135779958767, "learning_rate": 3.410973257555256e-06, "loss": 0.0002, "step": 195370 }, { "epoch": 1.2853693677098479, "grad_norm": 0.04907873515355539, "learning_rate": 3.4104289212800826e-06, "loss": 0.0002, "step": 195380 }, { "epoch": 1.2854351558850812, "grad_norm": 0.016753846719012342, "learning_rate": 3.409884605962032e-06, "loss": 0.0003, "step": 195390 }, { "epoch": 1.2855009440603147, "grad_norm": 0.009336916773189917, "learning_rate": 3.40934031160828e-06, "loss": 0.0005, "step": 195400 }, { "epoch": 1.285566732235548, "grad_norm": 0.07479017085900294, "learning_rate": 3.4087960382260016e-06, "loss": 0.0005, "step": 195410 }, { "epoch": 1.2856325204107812, "grad_norm": 0.0018401486142864433, "learning_rate": 3.4082517858223724e-06, "loss": 0.0002, "step": 195420 }, { "epoch": 1.2856983085860147, "grad_norm": 0.12151247238264667, "learning_rate": 3.4077075544045697e-06, "loss": 0.0006, "step": 195430 }, { "epoch": 1.2857640967612483, "grad_norm": 0.049268130675102856, "learning_rate": 3.407163343979768e-06, "loss": 0.0006, "step": 195440 }, { "epoch": 1.2858298849364815, "grad_norm": 0.009271159711746564, "learning_rate": 3.4066191545551415e-06, "loss": 0.0005, "step": 195450 }, { "epoch": 1.2858956731117148, "grad_norm": 0.006888072578699581, "learning_rate": 3.406074986137865e-06, "loss": 0.0003, "step": 195460 }, { "epoch": 1.2859614612869483, "grad_norm": 0.04167272636759457, "learning_rate": 3.4055308387351137e-06, "loss": 0.0006, "step": 195470 }, { "epoch": 1.2860272494621816, "grad_norm": 0.0380551489002876, "learning_rate": 3.4049867123540616e-06, "loss": 0.0008, "step": 195480 }, { "epoch": 1.2860930376374151, "grad_norm": 0.0022157649146448068, "learning_rate": 3.40444260700188e-06, "loss": 0.0005, "step": 195490 }, { "epoch": 1.2861588258126484, "grad_norm": 0.020046424275349466, "learning_rate": 3.403898522685747e-06, "loss": 0.0019, "step": 195500 }, { "epoch": 1.2862246139878817, "grad_norm": 0.012259457606558336, "learning_rate": 3.403354459412832e-06, "loss": 0.0005, "step": 195510 }, { "epoch": 1.2862904021631152, "grad_norm": 0.0015740479783585316, "learning_rate": 3.402810417190312e-06, "loss": 0.0006, "step": 195520 }, { "epoch": 1.2863561903383487, "grad_norm": 0.02507412346727874, "learning_rate": 3.4022663960253556e-06, "loss": 0.0007, "step": 195530 }, { "epoch": 1.286421978513582, "grad_norm": 0.024093901542517865, "learning_rate": 3.4017223959251366e-06, "loss": 0.0006, "step": 195540 }, { "epoch": 1.2864877666888153, "grad_norm": 0.08135129935478319, "learning_rate": 3.4011784168968277e-06, "loss": 0.0005, "step": 195550 }, { "epoch": 1.2865535548640488, "grad_norm": 0.045381923212894575, "learning_rate": 3.4006344589476003e-06, "loss": 0.0006, "step": 195560 }, { "epoch": 1.286619343039282, "grad_norm": 0.06282018539808579, "learning_rate": 3.4000905220846266e-06, "loss": 0.0011, "step": 195570 }, { "epoch": 1.2866851312145156, "grad_norm": 0.029716264344237478, "learning_rate": 3.3995466063150773e-06, "loss": 0.0009, "step": 195580 }, { "epoch": 1.2867509193897488, "grad_norm": 0.031912791894155614, "learning_rate": 3.399002711646123e-06, "loss": 0.0005, "step": 195590 }, { "epoch": 1.2868167075649823, "grad_norm": 0.013892449766393837, "learning_rate": 3.398458838084936e-06, "loss": 0.0012, "step": 195600 }, { "epoch": 1.2868824957402156, "grad_norm": 0.10196197612477925, "learning_rate": 3.3979149856386863e-06, "loss": 0.0009, "step": 195610 }, { "epoch": 1.2869482839154491, "grad_norm": 0.007657380922598731, "learning_rate": 3.3973711543145436e-06, "loss": 0.0006, "step": 195620 }, { "epoch": 1.2870140720906824, "grad_norm": 0.031067648011236345, "learning_rate": 3.396827344119678e-06, "loss": 0.0011, "step": 195630 }, { "epoch": 1.2870798602659157, "grad_norm": 0.04232662040631176, "learning_rate": 3.3962835550612593e-06, "loss": 0.0007, "step": 195640 }, { "epoch": 1.2871456484411492, "grad_norm": 0.020326132999637744, "learning_rate": 3.3957397871464566e-06, "loss": 0.0002, "step": 195650 }, { "epoch": 1.2872114366163827, "grad_norm": 0.054384857787359346, "learning_rate": 3.3951960403824398e-06, "loss": 0.0005, "step": 195660 }, { "epoch": 1.287277224791616, "grad_norm": 0.012119157382393157, "learning_rate": 3.3946523147763767e-06, "loss": 0.0004, "step": 195670 }, { "epoch": 1.2873430129668493, "grad_norm": 0.0856146412280105, "learning_rate": 3.394108610335437e-06, "loss": 0.0007, "step": 195680 }, { "epoch": 1.2874088011420828, "grad_norm": 0.02007090469482342, "learning_rate": 3.393564927066788e-06, "loss": 0.0005, "step": 195690 }, { "epoch": 1.287474589317316, "grad_norm": 0.05708104720906759, "learning_rate": 3.3930212649775984e-06, "loss": 0.0005, "step": 195700 }, { "epoch": 1.2875403774925496, "grad_norm": 0.007804599178263287, "learning_rate": 3.3924776240750355e-06, "loss": 0.0007, "step": 195710 }, { "epoch": 1.2876061656677829, "grad_norm": 0.03485689937714918, "learning_rate": 3.3919340043662653e-06, "loss": 0.0007, "step": 195720 }, { "epoch": 1.2876719538430161, "grad_norm": 0.2308611799913619, "learning_rate": 3.391390405858458e-06, "loss": 0.0006, "step": 195730 }, { "epoch": 1.2877377420182496, "grad_norm": 0.04620094025665955, "learning_rate": 3.3908468285587786e-06, "loss": 0.0007, "step": 195740 }, { "epoch": 1.2878035301934831, "grad_norm": 0.02859522577218515, "learning_rate": 3.390303272474394e-06, "loss": 0.0005, "step": 195750 }, { "epoch": 1.2878693183687164, "grad_norm": 0.0006390850816014434, "learning_rate": 3.38975973761247e-06, "loss": 0.0013, "step": 195760 }, { "epoch": 1.2879351065439497, "grad_norm": 0.05818505384491825, "learning_rate": 3.389216223980174e-06, "loss": 0.0021, "step": 195770 }, { "epoch": 1.2880008947191832, "grad_norm": 0.058342614868350795, "learning_rate": 3.38867273158467e-06, "loss": 0.0014, "step": 195780 }, { "epoch": 1.2880666828944165, "grad_norm": 0.008799653873317615, "learning_rate": 3.3881292604331253e-06, "loss": 0.001, "step": 195790 }, { "epoch": 1.28813247106965, "grad_norm": 0.03814685253302059, "learning_rate": 3.387585810532704e-06, "loss": 0.0007, "step": 195800 }, { "epoch": 1.2881982592448833, "grad_norm": 0.041862087305234184, "learning_rate": 3.3870423818905707e-06, "loss": 0.0004, "step": 195810 }, { "epoch": 1.2882640474201166, "grad_norm": 0.029114023040743512, "learning_rate": 3.386498974513891e-06, "loss": 0.0006, "step": 195820 }, { "epoch": 1.28832983559535, "grad_norm": 0.0074958536755588565, "learning_rate": 3.3859555884098294e-06, "loss": 0.0008, "step": 195830 }, { "epoch": 1.2883956237705836, "grad_norm": 0.019317681277340387, "learning_rate": 3.3854122235855486e-06, "loss": 0.0007, "step": 195840 }, { "epoch": 1.2884614119458169, "grad_norm": 0.19625720993156048, "learning_rate": 3.384868880048212e-06, "loss": 0.0006, "step": 195850 }, { "epoch": 1.2885272001210502, "grad_norm": 0.0011875820533423366, "learning_rate": 3.384325557804986e-06, "loss": 0.0004, "step": 195860 }, { "epoch": 1.2885929882962837, "grad_norm": 0.021309743416626427, "learning_rate": 3.383782256863032e-06, "loss": 0.0006, "step": 195870 }, { "epoch": 1.288658776471517, "grad_norm": 0.02894759729713808, "learning_rate": 3.383238977229513e-06, "loss": 0.0002, "step": 195880 }, { "epoch": 1.2887245646467504, "grad_norm": 0.017727754135948688, "learning_rate": 3.382695718911592e-06, "loss": 0.0008, "step": 195890 }, { "epoch": 1.2887903528219837, "grad_norm": 0.11863112264212798, "learning_rate": 3.3821524819164307e-06, "loss": 0.0011, "step": 195900 }, { "epoch": 1.2888561409972172, "grad_norm": 0.004516030266785737, "learning_rate": 3.3816092662511905e-06, "loss": 0.0002, "step": 195910 }, { "epoch": 1.2889219291724505, "grad_norm": 0.022196654184819094, "learning_rate": 3.381066071923036e-06, "loss": 0.0004, "step": 195920 }, { "epoch": 1.288987717347684, "grad_norm": 0.017476742754002066, "learning_rate": 3.380522898939127e-06, "loss": 0.0005, "step": 195930 }, { "epoch": 1.2890535055229173, "grad_norm": 0.015341799217044948, "learning_rate": 3.3799797473066244e-06, "loss": 0.0007, "step": 195940 }, { "epoch": 1.2891192936981506, "grad_norm": 0.03364130158889435, "learning_rate": 3.37943661703269e-06, "loss": 0.0004, "step": 195950 }, { "epoch": 1.289185081873384, "grad_norm": 0.039803257886927026, "learning_rate": 3.3788935081244843e-06, "loss": 0.0009, "step": 195960 }, { "epoch": 1.2892508700486174, "grad_norm": 0.007719799404297346, "learning_rate": 3.3783504205891676e-06, "loss": 0.0003, "step": 195970 }, { "epoch": 1.2893166582238509, "grad_norm": 0.037481087565673823, "learning_rate": 3.3778073544338993e-06, "loss": 0.0011, "step": 195980 }, { "epoch": 1.2893824463990842, "grad_norm": 0.005675821136362964, "learning_rate": 3.377264309665841e-06, "loss": 0.0005, "step": 195990 }, { "epoch": 1.2894482345743177, "grad_norm": 0.027952850662315586, "learning_rate": 3.3767212862921507e-06, "loss": 0.0003, "step": 196000 }, { "epoch": 1.289514022749551, "grad_norm": 0.03084431190305859, "learning_rate": 3.3761782843199887e-06, "loss": 0.0006, "step": 196010 }, { "epoch": 1.2895798109247845, "grad_norm": 0.004247657499198449, "learning_rate": 3.3756353037565133e-06, "loss": 0.0007, "step": 196020 }, { "epoch": 1.2896455991000177, "grad_norm": 0.0014413681027981339, "learning_rate": 3.3750923446088836e-06, "loss": 0.0008, "step": 196030 }, { "epoch": 1.289711387275251, "grad_norm": 0.0005439696476276518, "learning_rate": 3.374549406884257e-06, "loss": 0.0005, "step": 196040 }, { "epoch": 1.2897771754504845, "grad_norm": 0.007457863873086119, "learning_rate": 3.374006490589794e-06, "loss": 0.0008, "step": 196050 }, { "epoch": 1.289842963625718, "grad_norm": 0.011117308709832073, "learning_rate": 3.3734635957326506e-06, "loss": 0.0006, "step": 196060 }, { "epoch": 1.2899087518009513, "grad_norm": 0.03626855999024677, "learning_rate": 3.3729207223199857e-06, "loss": 0.0008, "step": 196070 }, { "epoch": 1.2899745399761846, "grad_norm": 0.03703113751631236, "learning_rate": 3.372377870358955e-06, "loss": 0.0005, "step": 196080 }, { "epoch": 1.2900403281514181, "grad_norm": 0.03489274573974763, "learning_rate": 3.371835039856717e-06, "loss": 0.0011, "step": 196090 }, { "epoch": 1.2901061163266514, "grad_norm": 0.016986421822610433, "learning_rate": 3.371292230820427e-06, "loss": 0.0009, "step": 196100 }, { "epoch": 1.290171904501885, "grad_norm": 0.04222756686752665, "learning_rate": 3.370749443257242e-06, "loss": 0.0003, "step": 196110 }, { "epoch": 1.2902376926771182, "grad_norm": 0.05124115290254035, "learning_rate": 3.3702066771743193e-06, "loss": 0.0007, "step": 196120 }, { "epoch": 1.2903034808523515, "grad_norm": 0.03404017613991301, "learning_rate": 3.3696639325788138e-06, "loss": 0.0005, "step": 196130 }, { "epoch": 1.290369269027585, "grad_norm": 0.021450870092796597, "learning_rate": 3.3691212094778814e-06, "loss": 0.0012, "step": 196140 }, { "epoch": 1.2904350572028185, "grad_norm": 0.0188104906605048, "learning_rate": 3.3685785078786776e-06, "loss": 0.0004, "step": 196150 }, { "epoch": 1.2905008453780518, "grad_norm": 0.0269504508868305, "learning_rate": 3.3680358277883567e-06, "loss": 0.001, "step": 196160 }, { "epoch": 1.290566633553285, "grad_norm": 0.21787362111861636, "learning_rate": 3.3674931692140732e-06, "loss": 0.0008, "step": 196170 }, { "epoch": 1.2906324217285186, "grad_norm": 0.00588076057678422, "learning_rate": 3.3669505321629838e-06, "loss": 0.0004, "step": 196180 }, { "epoch": 1.2906982099037518, "grad_norm": 0.012873328587173162, "learning_rate": 3.3664079166422403e-06, "loss": 0.001, "step": 196190 }, { "epoch": 1.2907639980789853, "grad_norm": 0.02729188164852524, "learning_rate": 3.365865322658999e-06, "loss": 0.0008, "step": 196200 }, { "epoch": 1.2908297862542186, "grad_norm": 0.010996550391252026, "learning_rate": 3.3653227502204106e-06, "loss": 0.0005, "step": 196210 }, { "epoch": 1.2908955744294521, "grad_norm": 0.0019420877911677805, "learning_rate": 3.3647801993336304e-06, "loss": 0.0012, "step": 196220 }, { "epoch": 1.2909613626046854, "grad_norm": 0.007481810951480306, "learning_rate": 3.364237670005811e-06, "loss": 0.0005, "step": 196230 }, { "epoch": 1.291027150779919, "grad_norm": 0.038643482908275545, "learning_rate": 3.3636951622441044e-06, "loss": 0.0007, "step": 196240 }, { "epoch": 1.2910929389551522, "grad_norm": 0.0020776554440488553, "learning_rate": 3.363152676055665e-06, "loss": 0.0008, "step": 196250 }, { "epoch": 1.2911587271303855, "grad_norm": 0.07141505184093809, "learning_rate": 3.3626102114476434e-06, "loss": 0.0007, "step": 196260 }, { "epoch": 1.291224515305619, "grad_norm": 0.015739442784459632, "learning_rate": 3.3620677684271925e-06, "loss": 0.0006, "step": 196270 }, { "epoch": 1.2912903034808523, "grad_norm": 0.021389529926597105, "learning_rate": 3.361525347001463e-06, "loss": 0.0009, "step": 196280 }, { "epoch": 1.2913560916560858, "grad_norm": 0.07126521669660138, "learning_rate": 3.360982947177607e-06, "loss": 0.0011, "step": 196290 }, { "epoch": 1.291421879831319, "grad_norm": 0.0339838649070288, "learning_rate": 3.360440568962774e-06, "loss": 0.0006, "step": 196300 }, { "epoch": 1.2914876680065526, "grad_norm": 0.09665241393656679, "learning_rate": 3.3598982123641175e-06, "loss": 0.0008, "step": 196310 }, { "epoch": 1.2915534561817859, "grad_norm": 0.011516084819663206, "learning_rate": 3.3593558773887868e-06, "loss": 0.0008, "step": 196320 }, { "epoch": 1.2916192443570194, "grad_norm": 0.025816385639098712, "learning_rate": 3.3588135640439314e-06, "loss": 0.001, "step": 196330 }, { "epoch": 1.2916850325322526, "grad_norm": 0.019092600650641077, "learning_rate": 3.3582712723367016e-06, "loss": 0.0009, "step": 196340 }, { "epoch": 1.291750820707486, "grad_norm": 0.04294599832587241, "learning_rate": 3.3577290022742477e-06, "loss": 0.0009, "step": 196350 }, { "epoch": 1.2918166088827194, "grad_norm": 0.0030225764172414136, "learning_rate": 3.357186753863718e-06, "loss": 0.0008, "step": 196360 }, { "epoch": 1.291882397057953, "grad_norm": 0.022168529444852703, "learning_rate": 3.3566445271122615e-06, "loss": 0.0011, "step": 196370 }, { "epoch": 1.2919481852331862, "grad_norm": 0.13921873812194077, "learning_rate": 3.3561023220270285e-06, "loss": 0.001, "step": 196380 }, { "epoch": 1.2920139734084195, "grad_norm": 0.40300120012372537, "learning_rate": 3.355560138615167e-06, "loss": 0.0005, "step": 196390 }, { "epoch": 1.292079761583653, "grad_norm": 0.05786268578341213, "learning_rate": 3.3550179768838243e-06, "loss": 0.0008, "step": 196400 }, { "epoch": 1.2921455497588863, "grad_norm": 0.22315433594860418, "learning_rate": 3.3544758368401494e-06, "loss": 0.0009, "step": 196410 }, { "epoch": 1.2922113379341198, "grad_norm": 0.0055604448044461385, "learning_rate": 3.353933718491289e-06, "loss": 0.0008, "step": 196420 }, { "epoch": 1.292277126109353, "grad_norm": 0.014038583244869915, "learning_rate": 3.3533916218443897e-06, "loss": 0.0017, "step": 196430 }, { "epoch": 1.2923429142845864, "grad_norm": 0.018916666578874287, "learning_rate": 3.3528495469066012e-06, "loss": 0.0007, "step": 196440 }, { "epoch": 1.2924087024598199, "grad_norm": 0.0674472373522266, "learning_rate": 3.3523074936850684e-06, "loss": 0.001, "step": 196450 }, { "epoch": 1.2924744906350534, "grad_norm": 0.02612932954023179, "learning_rate": 3.351765462186939e-06, "loss": 0.0041, "step": 196460 }, { "epoch": 1.2925402788102867, "grad_norm": 0.003284152938660822, "learning_rate": 3.3512234524193577e-06, "loss": 0.0004, "step": 196470 }, { "epoch": 1.29260606698552, "grad_norm": 0.047138875589288115, "learning_rate": 3.3506814643894713e-06, "loss": 0.0007, "step": 196480 }, { "epoch": 1.2926718551607534, "grad_norm": 0.03535432099041578, "learning_rate": 3.350139498104426e-06, "loss": 0.0007, "step": 196490 }, { "epoch": 1.2927376433359867, "grad_norm": 0.0064478815534142935, "learning_rate": 3.349597553571365e-06, "loss": 0.0002, "step": 196500 }, { "epoch": 1.2928034315112202, "grad_norm": 0.007701431142607595, "learning_rate": 3.349055630797436e-06, "loss": 0.0008, "step": 196510 }, { "epoch": 1.2928692196864535, "grad_norm": 0.08579054652918441, "learning_rate": 3.3485137297897828e-06, "loss": 0.0011, "step": 196520 }, { "epoch": 1.2929350078616868, "grad_norm": 0.0228442978784652, "learning_rate": 3.347971850555549e-06, "loss": 0.0005, "step": 196530 }, { "epoch": 1.2930007960369203, "grad_norm": 0.0022441057204807885, "learning_rate": 3.3474299931018805e-06, "loss": 0.0007, "step": 196540 }, { "epoch": 1.2930665842121538, "grad_norm": 0.03766507382489899, "learning_rate": 3.3468881574359202e-06, "loss": 0.0004, "step": 196550 }, { "epoch": 1.293132372387387, "grad_norm": 0.06956396205310005, "learning_rate": 3.34634634356481e-06, "loss": 0.0008, "step": 196560 }, { "epoch": 1.2931981605626204, "grad_norm": 0.010227242374681795, "learning_rate": 3.345804551495697e-06, "loss": 0.0003, "step": 196570 }, { "epoch": 1.2932639487378539, "grad_norm": 0.04677352486038718, "learning_rate": 3.345262781235722e-06, "loss": 0.0007, "step": 196580 }, { "epoch": 1.2933297369130872, "grad_norm": 0.0002442474762846716, "learning_rate": 3.3447210327920275e-06, "loss": 0.0006, "step": 196590 }, { "epoch": 1.2933955250883207, "grad_norm": 0.009634346074063386, "learning_rate": 3.3441793061717577e-06, "loss": 0.0006, "step": 196600 }, { "epoch": 1.293461313263554, "grad_norm": 0.025168202585016546, "learning_rate": 3.3436376013820525e-06, "loss": 0.0004, "step": 196610 }, { "epoch": 1.2935271014387875, "grad_norm": 0.023309978097212164, "learning_rate": 3.343095918430056e-06, "loss": 0.0009, "step": 196620 }, { "epoch": 1.2935928896140207, "grad_norm": 0.027347410533051633, "learning_rate": 3.3425542573229076e-06, "loss": 0.0004, "step": 196630 }, { "epoch": 1.2936586777892543, "grad_norm": 0.023144579182830503, "learning_rate": 3.3420126180677505e-06, "loss": 0.0004, "step": 196640 }, { "epoch": 1.2937244659644875, "grad_norm": 0.02096590389888177, "learning_rate": 3.3414710006717254e-06, "loss": 0.0002, "step": 196650 }, { "epoch": 1.2937902541397208, "grad_norm": 0.01908576790040347, "learning_rate": 3.3409294051419728e-06, "loss": 0.0006, "step": 196660 }, { "epoch": 1.2938560423149543, "grad_norm": 0.011621452311960734, "learning_rate": 3.340387831485633e-06, "loss": 0.0004, "step": 196670 }, { "epoch": 1.2939218304901878, "grad_norm": 0.06528087135605307, "learning_rate": 3.3398462797098463e-06, "loss": 0.0008, "step": 196680 }, { "epoch": 1.2939876186654211, "grad_norm": 0.06603600998551377, "learning_rate": 3.3393047498217513e-06, "loss": 0.0004, "step": 196690 }, { "epoch": 1.2940534068406544, "grad_norm": 0.029252616420778223, "learning_rate": 3.3387632418284906e-06, "loss": 0.0004, "step": 196700 }, { "epoch": 1.294119195015888, "grad_norm": 0.06452998821282147, "learning_rate": 3.338221755737201e-06, "loss": 0.0018, "step": 196710 }, { "epoch": 1.2941849831911212, "grad_norm": 0.010718259741014952, "learning_rate": 3.3376802915550234e-06, "loss": 0.0007, "step": 196720 }, { "epoch": 1.2942507713663547, "grad_norm": 0.047874710818850454, "learning_rate": 3.3371388492890945e-06, "loss": 0.0011, "step": 196730 }, { "epoch": 1.294316559541588, "grad_norm": 0.06377535177809293, "learning_rate": 3.336597428946554e-06, "loss": 0.0005, "step": 196740 }, { "epoch": 1.2943823477168213, "grad_norm": 0.023247173404946163, "learning_rate": 3.33605603053454e-06, "loss": 0.0013, "step": 196750 }, { "epoch": 1.2944481358920548, "grad_norm": 0.027643282300475944, "learning_rate": 3.335514654060189e-06, "loss": 0.001, "step": 196760 }, { "epoch": 1.2945139240672883, "grad_norm": 0.07565608944055628, "learning_rate": 3.3349732995306404e-06, "loss": 0.0006, "step": 196770 }, { "epoch": 1.2945797122425216, "grad_norm": 0.04001918400573494, "learning_rate": 3.334431966953031e-06, "loss": 0.0009, "step": 196780 }, { "epoch": 1.2946455004177548, "grad_norm": 0.0038822292288889083, "learning_rate": 3.3338906563344988e-06, "loss": 0.0004, "step": 196790 }, { "epoch": 1.2947112885929883, "grad_norm": 0.12671737124430588, "learning_rate": 3.3333493676821783e-06, "loss": 0.0008, "step": 196800 }, { "epoch": 1.2947770767682216, "grad_norm": 0.021163570018714307, "learning_rate": 3.3328081010032066e-06, "loss": 0.0005, "step": 196810 }, { "epoch": 1.2948428649434551, "grad_norm": 0.1035428726633122, "learning_rate": 3.33226685630472e-06, "loss": 0.0008, "step": 196820 }, { "epoch": 1.2949086531186884, "grad_norm": 0.018194241996431385, "learning_rate": 3.331725633593855e-06, "loss": 0.0006, "step": 196830 }, { "epoch": 1.2949744412939217, "grad_norm": 0.06092481646392698, "learning_rate": 3.3311844328777466e-06, "loss": 0.0015, "step": 196840 }, { "epoch": 1.2950402294691552, "grad_norm": 0.05080115041306599, "learning_rate": 3.3306432541635302e-06, "loss": 0.0005, "step": 196850 }, { "epoch": 1.2951060176443887, "grad_norm": 0.0025670234761621005, "learning_rate": 3.3301020974583414e-06, "loss": 0.0005, "step": 196860 }, { "epoch": 1.295171805819622, "grad_norm": 0.014724043844354518, "learning_rate": 3.3295609627693138e-06, "loss": 0.0003, "step": 196870 }, { "epoch": 1.2952375939948553, "grad_norm": 0.08721604460037681, "learning_rate": 3.3290198501035824e-06, "loss": 0.0008, "step": 196880 }, { "epoch": 1.2953033821700888, "grad_norm": 0.0027439334059871693, "learning_rate": 3.3284787594682797e-06, "loss": 0.0005, "step": 196890 }, { "epoch": 1.295369170345322, "grad_norm": 0.12965969183535525, "learning_rate": 3.327937690870543e-06, "loss": 0.0008, "step": 196900 }, { "epoch": 1.2954349585205556, "grad_norm": 0.0305889043576941, "learning_rate": 3.3273966443175025e-06, "loss": 0.0005, "step": 196910 }, { "epoch": 1.2955007466957889, "grad_norm": 0.020253830908289448, "learning_rate": 3.326855619816293e-06, "loss": 0.0005, "step": 196920 }, { "epoch": 1.2955665348710224, "grad_norm": 0.028179898085889662, "learning_rate": 3.326314617374048e-06, "loss": 0.0005, "step": 196930 }, { "epoch": 1.2956323230462556, "grad_norm": 0.011413999348739061, "learning_rate": 3.325773636997898e-06, "loss": 0.0005, "step": 196940 }, { "epoch": 1.2956981112214891, "grad_norm": 0.014009159092480858, "learning_rate": 3.3252326786949775e-06, "loss": 0.0006, "step": 196950 }, { "epoch": 1.2957638993967224, "grad_norm": 0.10402470464129955, "learning_rate": 3.324691742472417e-06, "loss": 0.0006, "step": 196960 }, { "epoch": 1.2958296875719557, "grad_norm": 0.0020139354738665126, "learning_rate": 3.3241508283373493e-06, "loss": 0.0002, "step": 196970 }, { "epoch": 1.2958954757471892, "grad_norm": 0.05664827751220079, "learning_rate": 3.3236099362969052e-06, "loss": 0.0007, "step": 196980 }, { "epoch": 1.2959612639224225, "grad_norm": 0.04930228392150233, "learning_rate": 3.3230690663582164e-06, "loss": 0.0006, "step": 196990 }, { "epoch": 1.296027052097656, "grad_norm": 0.034110172433186876, "learning_rate": 3.3225282185284148e-06, "loss": 0.0008, "step": 197000 }, { "epoch": 1.2960928402728893, "grad_norm": 0.012841859359592691, "learning_rate": 3.3219873928146286e-06, "loss": 0.0017, "step": 197010 }, { "epoch": 1.2961586284481228, "grad_norm": 0.03494183298616932, "learning_rate": 3.321446589223989e-06, "loss": 0.001, "step": 197020 }, { "epoch": 1.296224416623356, "grad_norm": 0.06727044739985759, "learning_rate": 3.3209058077636276e-06, "loss": 0.0005, "step": 197030 }, { "epoch": 1.2962902047985896, "grad_norm": 0.0706775058666969, "learning_rate": 3.3203650484406726e-06, "loss": 0.0011, "step": 197040 }, { "epoch": 1.2963559929738229, "grad_norm": 0.02649133451304524, "learning_rate": 3.3198243112622543e-06, "loss": 0.0011, "step": 197050 }, { "epoch": 1.2964217811490562, "grad_norm": 0.023520684828335477, "learning_rate": 3.319283596235501e-06, "loss": 0.0007, "step": 197060 }, { "epoch": 1.2964875693242897, "grad_norm": 0.008470709388934534, "learning_rate": 3.318742903367541e-06, "loss": 0.001, "step": 197070 }, { "epoch": 1.2965533574995232, "grad_norm": 0.018361705878707557, "learning_rate": 3.318202232665505e-06, "loss": 0.0005, "step": 197080 }, { "epoch": 1.2966191456747564, "grad_norm": 0.02098881431497016, "learning_rate": 3.317661584136519e-06, "loss": 0.0004, "step": 197090 }, { "epoch": 1.2966849338499897, "grad_norm": 0.0071319513312788725, "learning_rate": 3.3171209577877128e-06, "loss": 0.0003, "step": 197100 }, { "epoch": 1.2967507220252232, "grad_norm": 0.005708766667647769, "learning_rate": 3.316580353626213e-06, "loss": 0.0003, "step": 197110 }, { "epoch": 1.2968165102004565, "grad_norm": 0.023719147775940565, "learning_rate": 3.316039771659148e-06, "loss": 0.0003, "step": 197120 }, { "epoch": 1.29688229837569, "grad_norm": 0.0874879377516872, "learning_rate": 3.3154992118936435e-06, "loss": 0.0017, "step": 197130 }, { "epoch": 1.2969480865509233, "grad_norm": 0.07008557073540689, "learning_rate": 3.314958674336828e-06, "loss": 0.0011, "step": 197140 }, { "epoch": 1.2970138747261566, "grad_norm": 0.020294467858502547, "learning_rate": 3.3144181589958257e-06, "loss": 0.0007, "step": 197150 }, { "epoch": 1.29707966290139, "grad_norm": 0.09186148944219566, "learning_rate": 3.313877665877765e-06, "loss": 0.0009, "step": 197160 }, { "epoch": 1.2971454510766236, "grad_norm": 0.02791199129712277, "learning_rate": 3.313337194989771e-06, "loss": 0.0006, "step": 197170 }, { "epoch": 1.2972112392518569, "grad_norm": 0.009726582286546326, "learning_rate": 3.31279674633897e-06, "loss": 0.0002, "step": 197180 }, { "epoch": 1.2972770274270902, "grad_norm": 0.06267717256207354, "learning_rate": 3.312256319932486e-06, "loss": 0.0037, "step": 197190 }, { "epoch": 1.2973428156023237, "grad_norm": 0.0036649003124847412, "learning_rate": 3.3117159157774448e-06, "loss": 0.0004, "step": 197200 }, { "epoch": 1.297408603777557, "grad_norm": 0.016892762637228397, "learning_rate": 3.3111755338809713e-06, "loss": 0.0013, "step": 197210 }, { "epoch": 1.2974743919527905, "grad_norm": 0.02379120840853053, "learning_rate": 3.310635174250189e-06, "loss": 0.0005, "step": 197220 }, { "epoch": 1.2975401801280237, "grad_norm": 0.04348414796750882, "learning_rate": 3.3100948368922236e-06, "loss": 0.001, "step": 197230 }, { "epoch": 1.2976059683032573, "grad_norm": 0.01871637085043388, "learning_rate": 3.3095545218141987e-06, "loss": 0.0005, "step": 197240 }, { "epoch": 1.2976717564784905, "grad_norm": 0.07275418804604501, "learning_rate": 3.3090142290232373e-06, "loss": 0.0015, "step": 197250 }, { "epoch": 1.297737544653724, "grad_norm": 0.012951063840683955, "learning_rate": 3.308473958526463e-06, "loss": 0.0002, "step": 197260 }, { "epoch": 1.2978033328289573, "grad_norm": 0.02558616764214571, "learning_rate": 3.3079337103309983e-06, "loss": 0.0007, "step": 197270 }, { "epoch": 1.2978691210041906, "grad_norm": 0.01680927319834205, "learning_rate": 3.307393484443966e-06, "loss": 0.0004, "step": 197280 }, { "epoch": 1.2979349091794241, "grad_norm": 0.01516549604334274, "learning_rate": 3.3068532808724884e-06, "loss": 0.0005, "step": 197290 }, { "epoch": 1.2980006973546574, "grad_norm": 0.053026085725790144, "learning_rate": 3.306313099623689e-06, "loss": 0.0007, "step": 197300 }, { "epoch": 1.298066485529891, "grad_norm": 0.03469180144047795, "learning_rate": 3.305772940704689e-06, "loss": 0.0005, "step": 197310 }, { "epoch": 1.2981322737051242, "grad_norm": 0.03760639777359239, "learning_rate": 3.305232804122609e-06, "loss": 0.0005, "step": 197320 }, { "epoch": 1.2981980618803577, "grad_norm": 0.09929734731214147, "learning_rate": 3.3046926898845704e-06, "loss": 0.0015, "step": 197330 }, { "epoch": 1.298263850055591, "grad_norm": 0.00042123531668125766, "learning_rate": 3.304152597997695e-06, "loss": 0.0005, "step": 197340 }, { "epoch": 1.2983296382308245, "grad_norm": 0.039229312504780386, "learning_rate": 3.303612528469102e-06, "loss": 0.0006, "step": 197350 }, { "epoch": 1.2983954264060578, "grad_norm": 0.026169433156767053, "learning_rate": 3.3030724813059134e-06, "loss": 0.0004, "step": 197360 }, { "epoch": 1.298461214581291, "grad_norm": 0.0662104130933118, "learning_rate": 3.302532456515248e-06, "loss": 0.0003, "step": 197370 }, { "epoch": 1.2985270027565246, "grad_norm": 0.00808113326274958, "learning_rate": 3.3019924541042277e-06, "loss": 0.0011, "step": 197380 }, { "epoch": 1.298592790931758, "grad_norm": 0.014934959096474649, "learning_rate": 3.301452474079969e-06, "loss": 0.0006, "step": 197390 }, { "epoch": 1.2986585791069913, "grad_norm": 0.027335617867481166, "learning_rate": 3.3009125164495938e-06, "loss": 0.0006, "step": 197400 }, { "epoch": 1.2987243672822246, "grad_norm": 0.08043546450736215, "learning_rate": 3.3003725812202194e-06, "loss": 0.0007, "step": 197410 }, { "epoch": 1.2987901554574581, "grad_norm": 0.045356236651185454, "learning_rate": 3.2998326683989644e-06, "loss": 0.0006, "step": 197420 }, { "epoch": 1.2988559436326914, "grad_norm": 0.057508393276572055, "learning_rate": 3.299292777992947e-06, "loss": 0.0007, "step": 197430 }, { "epoch": 1.298921731807925, "grad_norm": 0.028516143046308708, "learning_rate": 3.2987529100092853e-06, "loss": 0.0003, "step": 197440 }, { "epoch": 1.2989875199831582, "grad_norm": 0.017780499278290448, "learning_rate": 3.2982130644550964e-06, "loss": 0.0005, "step": 197450 }, { "epoch": 1.2990533081583915, "grad_norm": 0.05426213521383808, "learning_rate": 3.2976732413374994e-06, "loss": 0.0005, "step": 197460 }, { "epoch": 1.299119096333625, "grad_norm": 0.0016615069271223879, "learning_rate": 3.297133440663611e-06, "loss": 0.0005, "step": 197470 }, { "epoch": 1.2991848845088585, "grad_norm": 0.061581982692166304, "learning_rate": 3.296593662440547e-06, "loss": 0.0006, "step": 197480 }, { "epoch": 1.2992506726840918, "grad_norm": 0.1709381095825094, "learning_rate": 3.2960539066754236e-06, "loss": 0.0012, "step": 197490 }, { "epoch": 1.299316460859325, "grad_norm": 0.02201711655855239, "learning_rate": 3.2955141733753594e-06, "loss": 0.0006, "step": 197500 }, { "epoch": 1.2993822490345586, "grad_norm": 0.10821864432884147, "learning_rate": 3.2949744625474667e-06, "loss": 0.0007, "step": 197510 }, { "epoch": 1.2994480372097919, "grad_norm": 0.0062020715169433934, "learning_rate": 3.2944347741988646e-06, "loss": 0.0007, "step": 197520 }, { "epoch": 1.2995138253850254, "grad_norm": 0.03770957065607693, "learning_rate": 3.2938951083366668e-06, "loss": 0.0007, "step": 197530 }, { "epoch": 1.2995796135602586, "grad_norm": 0.006711624499513189, "learning_rate": 3.2933554649679887e-06, "loss": 0.0006, "step": 197540 }, { "epoch": 1.2996454017354921, "grad_norm": 0.02818400263473621, "learning_rate": 3.2928158440999447e-06, "loss": 0.001, "step": 197550 }, { "epoch": 1.2997111899107254, "grad_norm": 0.02602375591678364, "learning_rate": 3.2922762457396494e-06, "loss": 0.0008, "step": 197560 }, { "epoch": 1.299776978085959, "grad_norm": 0.03186001717640031, "learning_rate": 3.2917366698942166e-06, "loss": 0.0017, "step": 197570 }, { "epoch": 1.2998427662611922, "grad_norm": 0.009538261952412353, "learning_rate": 3.29119711657076e-06, "loss": 0.0005, "step": 197580 }, { "epoch": 1.2999085544364255, "grad_norm": 0.00135405270165004, "learning_rate": 3.2906575857763945e-06, "loss": 0.0004, "step": 197590 }, { "epoch": 1.299974342611659, "grad_norm": 0.025560924362169477, "learning_rate": 3.2901180775182316e-06, "loss": 0.0004, "step": 197600 }, { "epoch": 1.3000401307868923, "grad_norm": 0.004028513847975046, "learning_rate": 3.2895785918033864e-06, "loss": 0.0008, "step": 197610 }, { "epoch": 1.3001059189621258, "grad_norm": 0.0004080548222509484, "learning_rate": 3.2890391286389693e-06, "loss": 0.0007, "step": 197620 }, { "epoch": 1.300171707137359, "grad_norm": 0.024589045473766495, "learning_rate": 3.288499688032094e-06, "loss": 0.0005, "step": 197630 }, { "epoch": 1.3002374953125926, "grad_norm": 0.09810785437177424, "learning_rate": 3.287960269989871e-06, "loss": 0.0018, "step": 197640 }, { "epoch": 1.3003032834878259, "grad_norm": 0.0582440314209262, "learning_rate": 3.2874208745194148e-06, "loss": 0.0003, "step": 197650 }, { "epoch": 1.3003690716630594, "grad_norm": 0.006776603186535979, "learning_rate": 3.2868815016278343e-06, "loss": 0.0009, "step": 197660 }, { "epoch": 1.3004348598382927, "grad_norm": 0.019303293589312372, "learning_rate": 3.2863421513222426e-06, "loss": 0.0004, "step": 197670 }, { "epoch": 1.300500648013526, "grad_norm": 0.07056615148588496, "learning_rate": 3.285802823609749e-06, "loss": 0.0008, "step": 197680 }, { "epoch": 1.3005664361887594, "grad_norm": 0.0029508840465494535, "learning_rate": 3.2852635184974647e-06, "loss": 0.0008, "step": 197690 }, { "epoch": 1.300632224363993, "grad_norm": 0.02349296924923276, "learning_rate": 3.2847242359925003e-06, "loss": 0.0006, "step": 197700 }, { "epoch": 1.3006980125392262, "grad_norm": 0.02456020040290789, "learning_rate": 3.284184976101964e-06, "loss": 0.0028, "step": 197710 }, { "epoch": 1.3007638007144595, "grad_norm": 0.1566125779346452, "learning_rate": 3.2836457388329683e-06, "loss": 0.0007, "step": 197720 }, { "epoch": 1.300829588889693, "grad_norm": 0.012528661310651345, "learning_rate": 3.283106524192621e-06, "loss": 0.0005, "step": 197730 }, { "epoch": 1.3008953770649263, "grad_norm": 0.025768971117612623, "learning_rate": 3.2825673321880314e-06, "loss": 0.0004, "step": 197740 }, { "epoch": 1.3009611652401598, "grad_norm": 0.0010155463681621683, "learning_rate": 3.2820281628263086e-06, "loss": 0.0015, "step": 197750 }, { "epoch": 1.301026953415393, "grad_norm": 0.0014192061347255453, "learning_rate": 3.2814890161145606e-06, "loss": 0.0007, "step": 197760 }, { "epoch": 1.3010927415906264, "grad_norm": 0.12344398299895908, "learning_rate": 3.280949892059895e-06, "loss": 0.001, "step": 197770 }, { "epoch": 1.3011585297658599, "grad_norm": 0.02359052312397965, "learning_rate": 3.2804107906694207e-06, "loss": 0.0008, "step": 197780 }, { "epoch": 1.3012243179410934, "grad_norm": 0.008493705008816434, "learning_rate": 3.2798717119502453e-06, "loss": 0.0006, "step": 197790 }, { "epoch": 1.3012901061163267, "grad_norm": 0.019367594996217753, "learning_rate": 3.279332655909475e-06, "loss": 0.001, "step": 197800 }, { "epoch": 1.30135589429156, "grad_norm": 0.08155584210211894, "learning_rate": 3.278793622554219e-06, "loss": 0.0007, "step": 197810 }, { "epoch": 1.3014216824667935, "grad_norm": 0.037501089715028205, "learning_rate": 3.2782546118915816e-06, "loss": 0.0002, "step": 197820 }, { "epoch": 1.3014874706420267, "grad_norm": 0.022068168691309627, "learning_rate": 3.2777156239286704e-06, "loss": 0.0007, "step": 197830 }, { "epoch": 1.3015532588172603, "grad_norm": 0.046704102870281455, "learning_rate": 3.2771766586725907e-06, "loss": 0.0009, "step": 197840 }, { "epoch": 1.3016190469924935, "grad_norm": 0.033674513151898434, "learning_rate": 3.276637716130449e-06, "loss": 0.0005, "step": 197850 }, { "epoch": 1.3016848351677268, "grad_norm": 0.07260502021889702, "learning_rate": 3.276098796309351e-06, "loss": 0.0011, "step": 197860 }, { "epoch": 1.3017506233429603, "grad_norm": 0.06251325947878325, "learning_rate": 3.2755598992164016e-06, "loss": 0.0005, "step": 197870 }, { "epoch": 1.3018164115181938, "grad_norm": 0.0632476790546026, "learning_rate": 3.2750210248587055e-06, "loss": 0.0012, "step": 197880 }, { "epoch": 1.3018821996934271, "grad_norm": 0.019232464772016622, "learning_rate": 3.2744821732433676e-06, "loss": 0.0006, "step": 197890 }, { "epoch": 1.3019479878686604, "grad_norm": 0.02370594099190261, "learning_rate": 3.27394334437749e-06, "loss": 0.0011, "step": 197900 }, { "epoch": 1.302013776043894, "grad_norm": 0.010382377606518566, "learning_rate": 3.2734045382681812e-06, "loss": 0.0006, "step": 197910 }, { "epoch": 1.3020795642191272, "grad_norm": 0.0018099319549249324, "learning_rate": 3.272865754922542e-06, "loss": 0.0003, "step": 197920 }, { "epoch": 1.3021453523943607, "grad_norm": 0.06570751817348226, "learning_rate": 3.2723269943476756e-06, "loss": 0.0004, "step": 197930 }, { "epoch": 1.302211140569594, "grad_norm": 0.006448072945774617, "learning_rate": 3.2717882565506853e-06, "loss": 0.0022, "step": 197940 }, { "epoch": 1.3022769287448275, "grad_norm": 0.042459725309515445, "learning_rate": 3.2712495415386754e-06, "loss": 0.0009, "step": 197950 }, { "epoch": 1.3023427169200608, "grad_norm": 0.0025058910340099556, "learning_rate": 3.2707108493187467e-06, "loss": 0.0005, "step": 197960 }, { "epoch": 1.3024085050952943, "grad_norm": 0.008321883456292771, "learning_rate": 3.2701721798980006e-06, "loss": 0.0007, "step": 197970 }, { "epoch": 1.3024742932705275, "grad_norm": 0.0044615666919918815, "learning_rate": 3.2696335332835417e-06, "loss": 0.0005, "step": 197980 }, { "epoch": 1.3025400814457608, "grad_norm": 0.01164122787999377, "learning_rate": 3.26909490948247e-06, "loss": 0.0007, "step": 197990 }, { "epoch": 1.3026058696209943, "grad_norm": 0.016228139333769313, "learning_rate": 3.268556308501888e-06, "loss": 0.0003, "step": 198000 }, { "epoch": 1.3026716577962278, "grad_norm": 0.048181865141587316, "learning_rate": 3.2680177303488946e-06, "loss": 0.0005, "step": 198010 }, { "epoch": 1.3027374459714611, "grad_norm": 0.020575850679291942, "learning_rate": 3.267479175030592e-06, "loss": 0.0005, "step": 198020 }, { "epoch": 1.3028032341466944, "grad_norm": 0.004806271297946143, "learning_rate": 3.266940642554079e-06, "loss": 0.0003, "step": 198030 }, { "epoch": 1.302869022321928, "grad_norm": 0.002397367097254048, "learning_rate": 3.266402132926458e-06, "loss": 0.0012, "step": 198040 }, { "epoch": 1.3029348104971612, "grad_norm": 0.15153383792389238, "learning_rate": 3.2658636461548274e-06, "loss": 0.0008, "step": 198050 }, { "epoch": 1.3030005986723947, "grad_norm": 0.0029411713933297093, "learning_rate": 3.265325182246287e-06, "loss": 0.0007, "step": 198060 }, { "epoch": 1.303066386847628, "grad_norm": 0.027766302351397786, "learning_rate": 3.2647867412079357e-06, "loss": 0.0007, "step": 198070 }, { "epoch": 1.3031321750228613, "grad_norm": 0.007551147823773915, "learning_rate": 3.2642483230468736e-06, "loss": 0.0007, "step": 198080 }, { "epoch": 1.3031979631980948, "grad_norm": 0.03713557788543529, "learning_rate": 3.263709927770197e-06, "loss": 0.0006, "step": 198090 }, { "epoch": 1.3032637513733283, "grad_norm": 0.04203114839421277, "learning_rate": 3.263171555385005e-06, "loss": 0.0015, "step": 198100 }, { "epoch": 1.3033295395485616, "grad_norm": 0.012726272869712929, "learning_rate": 3.2626332058983974e-06, "loss": 0.0004, "step": 198110 }, { "epoch": 1.3033953277237948, "grad_norm": 0.0035371238049069654, "learning_rate": 3.26209487931747e-06, "loss": 0.0004, "step": 198120 }, { "epoch": 1.3034611158990284, "grad_norm": 0.03567147035738885, "learning_rate": 3.2615565756493206e-06, "loss": 0.0009, "step": 198130 }, { "epoch": 1.3035269040742616, "grad_norm": 0.012903155194037856, "learning_rate": 3.2610182949010473e-06, "loss": 0.0005, "step": 198140 }, { "epoch": 1.3035926922494951, "grad_norm": 0.07368902347382215, "learning_rate": 3.2604800370797446e-06, "loss": 0.0006, "step": 198150 }, { "epoch": 1.3036584804247284, "grad_norm": 0.027376691846231642, "learning_rate": 3.25994180219251e-06, "loss": 0.0005, "step": 198160 }, { "epoch": 1.3037242685999617, "grad_norm": 0.0695711297246893, "learning_rate": 3.2594035902464406e-06, "loss": 0.0011, "step": 198170 }, { "epoch": 1.3037900567751952, "grad_norm": 0.030482377478798103, "learning_rate": 3.2588654012486313e-06, "loss": 0.0005, "step": 198180 }, { "epoch": 1.3038558449504287, "grad_norm": 0.03340438851180897, "learning_rate": 3.258327235206179e-06, "loss": 0.0006, "step": 198190 }, { "epoch": 1.303921633125662, "grad_norm": 0.001321564799578621, "learning_rate": 3.2577890921261768e-06, "loss": 0.0007, "step": 198200 }, { "epoch": 1.3039874213008953, "grad_norm": 0.023939327113480294, "learning_rate": 3.2572509720157215e-06, "loss": 0.0006, "step": 198210 }, { "epoch": 1.3040532094761288, "grad_norm": 0.009593758095197923, "learning_rate": 3.2567128748819066e-06, "loss": 0.0011, "step": 198220 }, { "epoch": 1.304118997651362, "grad_norm": 0.013692712769638939, "learning_rate": 3.256174800731826e-06, "loss": 0.0004, "step": 198230 }, { "epoch": 1.3041847858265956, "grad_norm": 0.04487010630729072, "learning_rate": 3.255636749572575e-06, "loss": 0.0015, "step": 198240 }, { "epoch": 1.3042505740018289, "grad_norm": 0.021125857171776428, "learning_rate": 3.255098721411247e-06, "loss": 0.0009, "step": 198250 }, { "epoch": 1.3043163621770624, "grad_norm": 0.09837467820979941, "learning_rate": 3.2545607162549365e-06, "loss": 0.0008, "step": 198260 }, { "epoch": 1.3043821503522957, "grad_norm": 0.02890878711653429, "learning_rate": 3.2540227341107345e-06, "loss": 0.0006, "step": 198270 }, { "epoch": 1.3044479385275292, "grad_norm": 0.00338298498542586, "learning_rate": 3.2534847749857358e-06, "loss": 0.0003, "step": 198280 }, { "epoch": 1.3045137267027624, "grad_norm": 0.006777872931397818, "learning_rate": 3.25294683888703e-06, "loss": 0.0017, "step": 198290 }, { "epoch": 1.3045795148779957, "grad_norm": 0.007745114113734609, "learning_rate": 3.2524089258217122e-06, "loss": 0.0006, "step": 198300 }, { "epoch": 1.3046453030532292, "grad_norm": 0.015396214324540825, "learning_rate": 3.251871035796874e-06, "loss": 0.0014, "step": 198310 }, { "epoch": 1.3047110912284625, "grad_norm": 0.037258926708698, "learning_rate": 3.2513331688196057e-06, "loss": 0.0007, "step": 198320 }, { "epoch": 1.304776879403696, "grad_norm": 0.05321586149293378, "learning_rate": 3.2507953248970005e-06, "loss": 0.0007, "step": 198330 }, { "epoch": 1.3048426675789293, "grad_norm": 0.004300990534316743, "learning_rate": 3.250257504036147e-06, "loss": 0.0006, "step": 198340 }, { "epoch": 1.3049084557541628, "grad_norm": 0.03891681138427859, "learning_rate": 3.249719706244138e-06, "loss": 0.0009, "step": 198350 }, { "epoch": 1.304974243929396, "grad_norm": 0.0077122206308729525, "learning_rate": 3.2491819315280615e-06, "loss": 0.0011, "step": 198360 }, { "epoch": 1.3050400321046296, "grad_norm": 0.04150062856032386, "learning_rate": 3.24864417989501e-06, "loss": 0.0004, "step": 198370 }, { "epoch": 1.3051058202798629, "grad_norm": 0.0017556838896547332, "learning_rate": 3.248106451352073e-06, "loss": 0.0006, "step": 198380 }, { "epoch": 1.3051716084550962, "grad_norm": 0.005154651959507749, "learning_rate": 3.247568745906339e-06, "loss": 0.0005, "step": 198390 }, { "epoch": 1.3052373966303297, "grad_norm": 0.003355853423167482, "learning_rate": 3.247031063564898e-06, "loss": 0.0004, "step": 198400 }, { "epoch": 1.3053031848055632, "grad_norm": 0.020765994589737536, "learning_rate": 3.2464934043348384e-06, "loss": 0.0007, "step": 198410 }, { "epoch": 1.3053689729807965, "grad_norm": 0.009105091620810319, "learning_rate": 3.245955768223247e-06, "loss": 0.0004, "step": 198420 }, { "epoch": 1.3054347611560297, "grad_norm": 0.04590870365209664, "learning_rate": 3.2454181552372156e-06, "loss": 0.0007, "step": 198430 }, { "epoch": 1.3055005493312632, "grad_norm": 0.003929056416315867, "learning_rate": 3.2448805653838306e-06, "loss": 0.0004, "step": 198440 }, { "epoch": 1.3055663375064965, "grad_norm": 0.025337233451531032, "learning_rate": 3.244342998670179e-06, "loss": 0.001, "step": 198450 }, { "epoch": 1.30563212568173, "grad_norm": 0.007997592824211645, "learning_rate": 3.243805455103349e-06, "loss": 0.0003, "step": 198460 }, { "epoch": 1.3056979138569633, "grad_norm": 0.14282909108418235, "learning_rate": 3.243267934690427e-06, "loss": 0.0008, "step": 198470 }, { "epoch": 1.3057637020321966, "grad_norm": 0.08653330793829354, "learning_rate": 3.2427304374385006e-06, "loss": 0.0009, "step": 198480 }, { "epoch": 1.30582949020743, "grad_norm": 0.017640101071665925, "learning_rate": 3.2421929633546544e-06, "loss": 0.0007, "step": 198490 }, { "epoch": 1.3058952783826636, "grad_norm": 0.010198761064748074, "learning_rate": 3.241655512445977e-06, "loss": 0.0008, "step": 198500 }, { "epoch": 1.305961066557897, "grad_norm": 0.05168872376212697, "learning_rate": 3.2411180847195534e-06, "loss": 0.0004, "step": 198510 }, { "epoch": 1.3060268547331302, "grad_norm": 0.08257738648374574, "learning_rate": 3.240580680182468e-06, "loss": 0.0008, "step": 198520 }, { "epoch": 1.3060926429083637, "grad_norm": 0.04217223588366385, "learning_rate": 3.2400432988418072e-06, "loss": 0.0005, "step": 198530 }, { "epoch": 1.306158431083597, "grad_norm": 0.02222078700058158, "learning_rate": 3.239505940704656e-06, "loss": 0.0008, "step": 198540 }, { "epoch": 1.3062242192588305, "grad_norm": 0.012529328822621654, "learning_rate": 3.2389686057780965e-06, "loss": 0.0003, "step": 198550 }, { "epoch": 1.3062900074340638, "grad_norm": 0.008057213552778459, "learning_rate": 3.2384312940692166e-06, "loss": 0.0003, "step": 198560 }, { "epoch": 1.3063557956092973, "grad_norm": 0.014513976385623678, "learning_rate": 3.2378940055850983e-06, "loss": 0.0004, "step": 198570 }, { "epoch": 1.3064215837845305, "grad_norm": 0.07734237980976559, "learning_rate": 3.2373567403328267e-06, "loss": 0.0006, "step": 198580 }, { "epoch": 1.306487371959764, "grad_norm": 0.026161451850780195, "learning_rate": 3.236819498319483e-06, "loss": 0.0005, "step": 198590 }, { "epoch": 1.3065531601349973, "grad_norm": 0.015689723897259512, "learning_rate": 3.236282279552152e-06, "loss": 0.001, "step": 198600 }, { "epoch": 1.3066189483102306, "grad_norm": 0.027173056167482276, "learning_rate": 3.2357450840379156e-06, "loss": 0.0004, "step": 198610 }, { "epoch": 1.3066847364854641, "grad_norm": 0.06293625297277847, "learning_rate": 3.2352079117838553e-06, "loss": 0.001, "step": 198620 }, { "epoch": 1.3067505246606974, "grad_norm": 0.01583219981450871, "learning_rate": 3.2346707627970565e-06, "loss": 0.0008, "step": 198630 }, { "epoch": 1.306816312835931, "grad_norm": 0.03559503813916819, "learning_rate": 3.2341336370845983e-06, "loss": 0.0006, "step": 198640 }, { "epoch": 1.3068821010111642, "grad_norm": 0.023219410374735298, "learning_rate": 3.233596534653563e-06, "loss": 0.0003, "step": 198650 }, { "epoch": 1.3069478891863977, "grad_norm": 0.03099009974106532, "learning_rate": 3.233059455511032e-06, "loss": 0.0012, "step": 198660 }, { "epoch": 1.307013677361631, "grad_norm": 0.0033695663904772188, "learning_rate": 3.2325223996640865e-06, "loss": 0.0002, "step": 198670 }, { "epoch": 1.3070794655368645, "grad_norm": 0.01843803466843606, "learning_rate": 3.231985367119805e-06, "loss": 0.0003, "step": 198680 }, { "epoch": 1.3071452537120978, "grad_norm": 0.03026682813338275, "learning_rate": 3.2314483578852706e-06, "loss": 0.0004, "step": 198690 }, { "epoch": 1.307211041887331, "grad_norm": 0.0025754767874333656, "learning_rate": 3.230911371967562e-06, "loss": 0.0008, "step": 198700 }, { "epoch": 1.3072768300625646, "grad_norm": 0.09497788515481898, "learning_rate": 3.2303744093737595e-06, "loss": 0.0007, "step": 198710 }, { "epoch": 1.307342618237798, "grad_norm": 0.12054630927169928, "learning_rate": 3.229837470110942e-06, "loss": 0.0008, "step": 198720 }, { "epoch": 1.3074084064130314, "grad_norm": 0.0213170090523336, "learning_rate": 3.2293005541861887e-06, "loss": 0.0012, "step": 198730 }, { "epoch": 1.3074741945882646, "grad_norm": 0.015432082499918466, "learning_rate": 3.2287636616065778e-06, "loss": 0.0006, "step": 198740 }, { "epoch": 1.3075399827634981, "grad_norm": 0.03434154737879932, "learning_rate": 3.2282267923791875e-06, "loss": 0.0007, "step": 198750 }, { "epoch": 1.3076057709387314, "grad_norm": 0.0001514604903750963, "learning_rate": 3.2276899465110978e-06, "loss": 0.0008, "step": 198760 }, { "epoch": 1.307671559113965, "grad_norm": 0.027444664521975193, "learning_rate": 3.2271531240093855e-06, "loss": 0.0003, "step": 198770 }, { "epoch": 1.3077373472891982, "grad_norm": 0.005296525922811845, "learning_rate": 3.2266163248811286e-06, "loss": 0.0022, "step": 198780 }, { "epoch": 1.3078031354644315, "grad_norm": 0.04399593448776123, "learning_rate": 3.2260795491334036e-06, "loss": 0.0004, "step": 198790 }, { "epoch": 1.307868923639665, "grad_norm": 0.02212501195503928, "learning_rate": 3.225542796773288e-06, "loss": 0.0007, "step": 198800 }, { "epoch": 1.3079347118148985, "grad_norm": 0.0007676191992402538, "learning_rate": 3.225006067807856e-06, "loss": 0.0006, "step": 198810 }, { "epoch": 1.3080004999901318, "grad_norm": 0.028709292672201497, "learning_rate": 3.224469362244188e-06, "loss": 0.0009, "step": 198820 }, { "epoch": 1.308066288165365, "grad_norm": 0.051148181646819696, "learning_rate": 3.223932680089357e-06, "loss": 0.0004, "step": 198830 }, { "epoch": 1.3081320763405986, "grad_norm": 0.0655382604583549, "learning_rate": 3.2233960213504406e-06, "loss": 0.0005, "step": 198840 }, { "epoch": 1.3081978645158319, "grad_norm": 0.019127380767698637, "learning_rate": 3.222859386034513e-06, "loss": 0.0003, "step": 198850 }, { "epoch": 1.3082636526910654, "grad_norm": 0.046703289147671835, "learning_rate": 3.2223227741486495e-06, "loss": 0.0007, "step": 198860 }, { "epoch": 1.3083294408662987, "grad_norm": 0.26854971146710244, "learning_rate": 3.2217861856999253e-06, "loss": 0.0013, "step": 198870 }, { "epoch": 1.308395229041532, "grad_norm": 0.023057267580168204, "learning_rate": 3.2212496206954134e-06, "loss": 0.0005, "step": 198880 }, { "epoch": 1.3084610172167654, "grad_norm": 4.9372927198728814e-05, "learning_rate": 3.2207130791421898e-06, "loss": 0.0002, "step": 198890 }, { "epoch": 1.308526805391999, "grad_norm": 0.01091929693471032, "learning_rate": 3.2201765610473272e-06, "loss": 0.001, "step": 198900 }, { "epoch": 1.3085925935672322, "grad_norm": 0.027733993480082195, "learning_rate": 3.2196400664179e-06, "loss": 0.0011, "step": 198910 }, { "epoch": 1.3086583817424655, "grad_norm": 0.03948462943391624, "learning_rate": 3.2191035952609804e-06, "loss": 0.0006, "step": 198920 }, { "epoch": 1.308724169917699, "grad_norm": 0.001252425844376017, "learning_rate": 3.218567147583643e-06, "loss": 0.0005, "step": 198930 }, { "epoch": 1.3087899580929323, "grad_norm": 0.054413895139690964, "learning_rate": 3.218030723392957e-06, "loss": 0.0005, "step": 198940 }, { "epoch": 1.3088557462681658, "grad_norm": 0.056518211699130905, "learning_rate": 3.2174943226959986e-06, "loss": 0.0007, "step": 198950 }, { "epoch": 1.308921534443399, "grad_norm": 0.021771188968064334, "learning_rate": 3.216957945499838e-06, "loss": 0.0006, "step": 198960 }, { "epoch": 1.3089873226186326, "grad_norm": 0.061232291180509026, "learning_rate": 3.216421591811546e-06, "loss": 0.0006, "step": 198970 }, { "epoch": 1.3090531107938659, "grad_norm": 0.02416247853775065, "learning_rate": 3.2158852616381963e-06, "loss": 0.0007, "step": 198980 }, { "epoch": 1.3091188989690994, "grad_norm": 0.02927288915129762, "learning_rate": 3.2153489549868576e-06, "loss": 0.0006, "step": 198990 }, { "epoch": 1.3091846871443327, "grad_norm": 0.020838985400618654, "learning_rate": 3.214812671864602e-06, "loss": 0.001, "step": 199000 }, { "epoch": 1.309250475319566, "grad_norm": 0.012465870314763768, "learning_rate": 3.2142764122784988e-06, "loss": 0.0004, "step": 199010 }, { "epoch": 1.3093162634947995, "grad_norm": 0.1945001052561119, "learning_rate": 3.213740176235619e-06, "loss": 0.0019, "step": 199020 }, { "epoch": 1.309382051670033, "grad_norm": 0.016555900905785646, "learning_rate": 3.213203963743033e-06, "loss": 0.0007, "step": 199030 }, { "epoch": 1.3094478398452662, "grad_norm": 0.026279396310313733, "learning_rate": 3.2126677748078093e-06, "loss": 0.0005, "step": 199040 }, { "epoch": 1.3095136280204995, "grad_norm": 0.0016182194467642878, "learning_rate": 3.212131609437017e-06, "loss": 0.0006, "step": 199050 }, { "epoch": 1.309579416195733, "grad_norm": 0.027229283408035647, "learning_rate": 3.211595467637726e-06, "loss": 0.0014, "step": 199060 }, { "epoch": 1.3096452043709663, "grad_norm": 0.005020272846204585, "learning_rate": 3.2110593494170027e-06, "loss": 0.0003, "step": 199070 }, { "epoch": 1.3097109925461998, "grad_norm": 0.002731181905804228, "learning_rate": 3.210523254781918e-06, "loss": 0.0007, "step": 199080 }, { "epoch": 1.309776780721433, "grad_norm": 0.00015943715083556684, "learning_rate": 3.209987183739539e-06, "loss": 0.0004, "step": 199090 }, { "epoch": 1.3098425688966664, "grad_norm": 0.011622553994702654, "learning_rate": 3.2094511362969326e-06, "loss": 0.0003, "step": 199100 }, { "epoch": 1.3099083570719, "grad_norm": 0.030104493848269107, "learning_rate": 3.208915112461167e-06, "loss": 0.0008, "step": 199110 }, { "epoch": 1.3099741452471334, "grad_norm": 0.017973700162498395, "learning_rate": 3.208379112239308e-06, "loss": 0.0005, "step": 199120 }, { "epoch": 1.3100399334223667, "grad_norm": 0.04965476064008202, "learning_rate": 3.207843135638424e-06, "loss": 0.0004, "step": 199130 }, { "epoch": 1.3101057215976, "grad_norm": 0.003016986301828052, "learning_rate": 3.2073071826655788e-06, "loss": 0.0005, "step": 199140 }, { "epoch": 1.3101715097728335, "grad_norm": 0.08354210173770221, "learning_rate": 3.2067712533278417e-06, "loss": 0.0006, "step": 199150 }, { "epoch": 1.3102372979480668, "grad_norm": 0.004511856823113409, "learning_rate": 3.206235347632276e-06, "loss": 0.0008, "step": 199160 }, { "epoch": 1.3103030861233003, "grad_norm": 0.023855573167439765, "learning_rate": 3.2056994655859487e-06, "loss": 0.0003, "step": 199170 }, { "epoch": 1.3103688742985335, "grad_norm": 0.002521754917984053, "learning_rate": 3.2051636071959243e-06, "loss": 0.001, "step": 199180 }, { "epoch": 1.3104346624737668, "grad_norm": 0.04264839608200502, "learning_rate": 3.2046277724692674e-06, "loss": 0.0006, "step": 199190 }, { "epoch": 1.3105004506490003, "grad_norm": 0.03591828143017313, "learning_rate": 3.2040919614130415e-06, "loss": 0.0003, "step": 199200 }, { "epoch": 1.3105662388242338, "grad_norm": 0.02088985495437691, "learning_rate": 3.2035561740343135e-06, "loss": 0.0004, "step": 199210 }, { "epoch": 1.3106320269994671, "grad_norm": 0.022981172400317766, "learning_rate": 3.2030204103401454e-06, "loss": 0.0008, "step": 199220 }, { "epoch": 1.3106978151747004, "grad_norm": 0.010330281698499007, "learning_rate": 3.202484670337602e-06, "loss": 0.001, "step": 199230 }, { "epoch": 1.310763603349934, "grad_norm": 0.03977265808221487, "learning_rate": 3.201948954033745e-06, "loss": 0.0006, "step": 199240 }, { "epoch": 1.3108293915251672, "grad_norm": 0.048233141014210223, "learning_rate": 3.2014132614356385e-06, "loss": 0.0006, "step": 199250 }, { "epoch": 1.3108951797004007, "grad_norm": 0.02323870525181744, "learning_rate": 3.200877592550344e-06, "loss": 0.0019, "step": 199260 }, { "epoch": 1.310960967875634, "grad_norm": 0.08588257174615797, "learning_rate": 3.200341947384925e-06, "loss": 0.0008, "step": 199270 }, { "epoch": 1.3110267560508675, "grad_norm": 0.033408030884811264, "learning_rate": 3.199806325946443e-06, "loss": 0.0006, "step": 199280 }, { "epoch": 1.3110925442261008, "grad_norm": 0.04010021543067572, "learning_rate": 3.1992707282419606e-06, "loss": 0.0005, "step": 199290 }, { "epoch": 1.3111583324013343, "grad_norm": 0.04743675398581984, "learning_rate": 3.1987351542785407e-06, "loss": 0.0009, "step": 199300 }, { "epoch": 1.3112241205765676, "grad_norm": 0.015496865969085354, "learning_rate": 3.1981996040632378e-06, "loss": 0.0004, "step": 199310 }, { "epoch": 1.3112899087518008, "grad_norm": 0.016688271572732925, "learning_rate": 3.1976640776031187e-06, "loss": 0.001, "step": 199320 }, { "epoch": 1.3113556969270344, "grad_norm": 0.0014679152494982992, "learning_rate": 3.1971285749052427e-06, "loss": 0.001, "step": 199330 }, { "epoch": 1.3114214851022676, "grad_norm": 0.10782025696517826, "learning_rate": 3.196593095976669e-06, "loss": 0.0008, "step": 199340 }, { "epoch": 1.3114872732775011, "grad_norm": 0.060176934591356146, "learning_rate": 3.196057640824457e-06, "loss": 0.0011, "step": 199350 }, { "epoch": 1.3115530614527344, "grad_norm": 0.006450023269267966, "learning_rate": 3.1955222094556683e-06, "loss": 0.0008, "step": 199360 }, { "epoch": 1.311618849627968, "grad_norm": 0.02023900221342254, "learning_rate": 3.194986801877359e-06, "loss": 0.0007, "step": 199370 }, { "epoch": 1.3116846378032012, "grad_norm": 0.00026321402130490034, "learning_rate": 3.194451418096591e-06, "loss": 0.0006, "step": 199380 }, { "epoch": 1.3117504259784347, "grad_norm": 0.017441754705804384, "learning_rate": 3.1939160581204215e-06, "loss": 0.0006, "step": 199390 }, { "epoch": 1.311816214153668, "grad_norm": 0.03669970383624627, "learning_rate": 3.193380721955909e-06, "loss": 0.0004, "step": 199400 }, { "epoch": 1.3118820023289013, "grad_norm": 0.0016202852358222634, "learning_rate": 3.1928454096101115e-06, "loss": 0.0009, "step": 199410 }, { "epoch": 1.3119477905041348, "grad_norm": 0.10078671597355196, "learning_rate": 3.1923101210900863e-06, "loss": 0.0007, "step": 199420 }, { "epoch": 1.3120135786793683, "grad_norm": 0.00585012658640262, "learning_rate": 3.1917748564028916e-06, "loss": 0.0003, "step": 199430 }, { "epoch": 1.3120793668546016, "grad_norm": 0.012833905188007718, "learning_rate": 3.191239615555582e-06, "loss": 0.0005, "step": 199440 }, { "epoch": 1.3121451550298349, "grad_norm": 0.004088143113382547, "learning_rate": 3.1907043985552173e-06, "loss": 0.0012, "step": 199450 }, { "epoch": 1.3122109432050684, "grad_norm": 0.011694022254263503, "learning_rate": 3.190169205408852e-06, "loss": 0.0027, "step": 199460 }, { "epoch": 1.3122767313803017, "grad_norm": 0.020462939770711633, "learning_rate": 3.1896340361235433e-06, "loss": 0.0006, "step": 199470 }, { "epoch": 1.3123425195555352, "grad_norm": 0.003943818033567449, "learning_rate": 3.189098890706346e-06, "loss": 0.001, "step": 199480 }, { "epoch": 1.3124083077307684, "grad_norm": 0.09165636871691576, "learning_rate": 3.1885637691643155e-06, "loss": 0.0005, "step": 199490 }, { "epoch": 1.3124740959060017, "grad_norm": 0.03932750297516264, "learning_rate": 3.1880286715045074e-06, "loss": 0.0004, "step": 199500 }, { "epoch": 1.3125398840812352, "grad_norm": 0.017264421976942584, "learning_rate": 3.187493597733976e-06, "loss": 0.0008, "step": 199510 }, { "epoch": 1.3126056722564687, "grad_norm": 0.022513120484620015, "learning_rate": 3.1869585478597764e-06, "loss": 0.0007, "step": 199520 }, { "epoch": 1.312671460431702, "grad_norm": 0.0043383801662226955, "learning_rate": 3.1864235218889634e-06, "loss": 0.0005, "step": 199530 }, { "epoch": 1.3127372486069353, "grad_norm": 0.0005012981331140405, "learning_rate": 3.1858885198285895e-06, "loss": 0.0008, "step": 199540 }, { "epoch": 1.3128030367821688, "grad_norm": 0.0003009965577422744, "learning_rate": 3.1853535416857083e-06, "loss": 0.0004, "step": 199550 }, { "epoch": 1.312868824957402, "grad_norm": 0.018499501327195883, "learning_rate": 3.184818587467373e-06, "loss": 0.0004, "step": 199560 }, { "epoch": 1.3129346131326356, "grad_norm": 0.07130446693925452, "learning_rate": 3.1842836571806366e-06, "loss": 0.0009, "step": 199570 }, { "epoch": 1.3130004013078689, "grad_norm": 0.007282788872076596, "learning_rate": 3.183748750832554e-06, "loss": 0.0011, "step": 199580 }, { "epoch": 1.3130661894831024, "grad_norm": 0.08583481207320066, "learning_rate": 3.183213868430174e-06, "loss": 0.0007, "step": 199590 }, { "epoch": 1.3131319776583357, "grad_norm": 0.00647504418413408, "learning_rate": 3.1826790099805503e-06, "loss": 0.0003, "step": 199600 }, { "epoch": 1.3131977658335692, "grad_norm": 0.011509802371286178, "learning_rate": 3.182144175490735e-06, "loss": 0.0003, "step": 199610 }, { "epoch": 1.3132635540088025, "grad_norm": 0.008394799118194013, "learning_rate": 3.181609364967778e-06, "loss": 0.0009, "step": 199620 }, { "epoch": 1.3133293421840357, "grad_norm": 0.2628446787659213, "learning_rate": 3.181074578418731e-06, "loss": 0.0007, "step": 199630 }, { "epoch": 1.3133951303592692, "grad_norm": 0.11264220011224411, "learning_rate": 3.180539815850645e-06, "loss": 0.0006, "step": 199640 }, { "epoch": 1.3134609185345025, "grad_norm": 0.04808149933753513, "learning_rate": 3.1800050772705704e-06, "loss": 0.0008, "step": 199650 }, { "epoch": 1.313526706709736, "grad_norm": 0.0952254628693653, "learning_rate": 3.1794703626855568e-06, "loss": 0.0009, "step": 199660 }, { "epoch": 1.3135924948849693, "grad_norm": 0.008376598059201618, "learning_rate": 3.1789356721026544e-06, "loss": 0.0004, "step": 199670 }, { "epoch": 1.3136582830602028, "grad_norm": 0.03815017132501553, "learning_rate": 3.1784010055289116e-06, "loss": 0.0011, "step": 199680 }, { "epoch": 1.313724071235436, "grad_norm": 0.10068820332174251, "learning_rate": 3.177866362971379e-06, "loss": 0.0008, "step": 199690 }, { "epoch": 1.3137898594106696, "grad_norm": 0.016422081499789975, "learning_rate": 3.177331744437104e-06, "loss": 0.001, "step": 199700 }, { "epoch": 1.313855647585903, "grad_norm": 0.02563226025972104, "learning_rate": 3.1767971499331363e-06, "loss": 0.0004, "step": 199710 }, { "epoch": 1.3139214357611362, "grad_norm": 0.046216042612753215, "learning_rate": 3.1762625794665236e-06, "loss": 0.0011, "step": 199720 }, { "epoch": 1.3139872239363697, "grad_norm": 0.11536178042135146, "learning_rate": 3.175728033044314e-06, "loss": 0.0015, "step": 199730 }, { "epoch": 1.3140530121116032, "grad_norm": 0.15682940145216942, "learning_rate": 3.1751935106735543e-06, "loss": 0.0009, "step": 199740 }, { "epoch": 1.3141188002868365, "grad_norm": 0.0713272833107933, "learning_rate": 3.174659012361293e-06, "loss": 0.0007, "step": 199750 }, { "epoch": 1.3141845884620698, "grad_norm": 0.028059743900757526, "learning_rate": 3.174124538114574e-06, "loss": 0.0004, "step": 199760 }, { "epoch": 1.3142503766373033, "grad_norm": 0.004960360954690826, "learning_rate": 3.173590087940448e-06, "loss": 0.0008, "step": 199770 }, { "epoch": 1.3143161648125365, "grad_norm": 0.04663090836516799, "learning_rate": 3.1730556618459586e-06, "loss": 0.0006, "step": 199780 }, { "epoch": 1.31438195298777, "grad_norm": 0.007402275068404234, "learning_rate": 3.172521259838153e-06, "loss": 0.0005, "step": 199790 }, { "epoch": 1.3144477411630033, "grad_norm": 0.08138240291003827, "learning_rate": 3.171986881924076e-06, "loss": 0.0007, "step": 199800 }, { "epoch": 1.3145135293382366, "grad_norm": 0.10698865480325843, "learning_rate": 3.1714525281107735e-06, "loss": 0.0016, "step": 199810 }, { "epoch": 1.3145793175134701, "grad_norm": 0.04751071654359695, "learning_rate": 3.1709181984052893e-06, "loss": 0.0008, "step": 199820 }, { "epoch": 1.3146451056887036, "grad_norm": 0.08480843785078035, "learning_rate": 3.1703838928146693e-06, "loss": 0.0009, "step": 199830 }, { "epoch": 1.314710893863937, "grad_norm": 0.04704307847147239, "learning_rate": 3.1698496113459577e-06, "loss": 0.0011, "step": 199840 }, { "epoch": 1.3147766820391702, "grad_norm": 0.003107866577343849, "learning_rate": 3.1693153540061985e-06, "loss": 0.0006, "step": 199850 }, { "epoch": 1.3148424702144037, "grad_norm": 0.004494879425481834, "learning_rate": 3.1687811208024343e-06, "loss": 0.0005, "step": 199860 }, { "epoch": 1.314908258389637, "grad_norm": 0.013744638664650262, "learning_rate": 3.1682469117417104e-06, "loss": 0.0007, "step": 199870 }, { "epoch": 1.3149740465648705, "grad_norm": 0.05470400377759111, "learning_rate": 3.1677127268310693e-06, "loss": 0.0006, "step": 199880 }, { "epoch": 1.3150398347401038, "grad_norm": 0.09148058513930482, "learning_rate": 3.167178566077552e-06, "loss": 0.0009, "step": 199890 }, { "epoch": 1.3151056229153373, "grad_norm": 0.013538133505660698, "learning_rate": 3.1666444294882035e-06, "loss": 0.0004, "step": 199900 }, { "epoch": 1.3151714110905706, "grad_norm": 0.014059633520623243, "learning_rate": 3.1661103170700646e-06, "loss": 0.0004, "step": 199910 }, { "epoch": 1.315237199265804, "grad_norm": 0.0140972455887298, "learning_rate": 3.1655762288301774e-06, "loss": 0.0005, "step": 199920 }, { "epoch": 1.3153029874410374, "grad_norm": 0.005984457898926181, "learning_rate": 3.1650421647755834e-06, "loss": 0.0027, "step": 199930 }, { "epoch": 1.3153687756162706, "grad_norm": 0.006255177811083395, "learning_rate": 3.1645081249133236e-06, "loss": 0.0003, "step": 199940 }, { "epoch": 1.3154345637915041, "grad_norm": 0.04436156210573207, "learning_rate": 3.1639741092504394e-06, "loss": 0.0005, "step": 199950 }, { "epoch": 1.3155003519667374, "grad_norm": 0.01297408922508822, "learning_rate": 3.1634401177939687e-06, "loss": 0.0015, "step": 199960 }, { "epoch": 1.315566140141971, "grad_norm": 0.03493237185964197, "learning_rate": 3.1629061505509563e-06, "loss": 0.0007, "step": 199970 }, { "epoch": 1.3156319283172042, "grad_norm": 0.02328337584956089, "learning_rate": 3.162372207528439e-06, "loss": 0.0004, "step": 199980 }, { "epoch": 1.3156977164924377, "grad_norm": 0.09533162286168056, "learning_rate": 3.161838288733457e-06, "loss": 0.0012, "step": 199990 }, { "epoch": 1.315763504667671, "grad_norm": 0.016583728596215527, "learning_rate": 3.16130439417305e-06, "loss": 0.0004, "step": 200000 }, { "epoch": 1.315763504667671, "eval_loss": 0.0004762701573781669, "eval_runtime": 13.0967, "eval_samples_per_second": 15.271, "eval_steps_per_second": 7.636, "step": 200000 }, { "epoch": 1.3158292928429045, "grad_norm": 0.005324102034359119, "learning_rate": 3.1607705238542562e-06, "loss": 0.0005, "step": 200010 }, { "epoch": 1.3158950810181378, "grad_norm": 0.0001626338979511114, "learning_rate": 3.160236677784113e-06, "loss": 0.0006, "step": 200020 }, { "epoch": 1.315960869193371, "grad_norm": 0.04547418592035173, "learning_rate": 3.1597028559696624e-06, "loss": 0.0005, "step": 200030 }, { "epoch": 1.3160266573686046, "grad_norm": 0.0381917223137442, "learning_rate": 3.15916905841794e-06, "loss": 0.0004, "step": 200040 }, { "epoch": 1.316092445543838, "grad_norm": 0.021160896840387265, "learning_rate": 3.1586352851359836e-06, "loss": 0.0006, "step": 200050 }, { "epoch": 1.3161582337190714, "grad_norm": 0.0007661779802316068, "learning_rate": 3.1581015361308305e-06, "loss": 0.0003, "step": 200060 }, { "epoch": 1.3162240218943047, "grad_norm": 0.02235371282892607, "learning_rate": 3.1575678114095174e-06, "loss": 0.0007, "step": 200070 }, { "epoch": 1.3162898100695382, "grad_norm": 0.01509113547158038, "learning_rate": 3.1570341109790827e-06, "loss": 0.0004, "step": 200080 }, { "epoch": 1.3163555982447714, "grad_norm": 0.009760582450079896, "learning_rate": 3.15650043484656e-06, "loss": 0.0003, "step": 200090 }, { "epoch": 1.316421386420005, "grad_norm": 0.03906701192059846, "learning_rate": 3.1559667830189878e-06, "loss": 0.001, "step": 200100 }, { "epoch": 1.3164871745952382, "grad_norm": 0.041197848883951387, "learning_rate": 3.1554331555034006e-06, "loss": 0.0005, "step": 200110 }, { "epoch": 1.3165529627704715, "grad_norm": 0.06655643627628133, "learning_rate": 3.154899552306835e-06, "loss": 0.0005, "step": 200120 }, { "epoch": 1.316618750945705, "grad_norm": 0.11029440483083872, "learning_rate": 3.154365973436324e-06, "loss": 0.0008, "step": 200130 }, { "epoch": 1.3166845391209385, "grad_norm": 0.06107885111129934, "learning_rate": 3.1538324188989047e-06, "loss": 0.0007, "step": 200140 }, { "epoch": 1.3167503272961718, "grad_norm": 0.019143840396039443, "learning_rate": 3.153298888701609e-06, "loss": 0.0005, "step": 200150 }, { "epoch": 1.316816115471405, "grad_norm": 0.011572262598792534, "learning_rate": 3.152765382851474e-06, "loss": 0.0004, "step": 200160 }, { "epoch": 1.3168819036466386, "grad_norm": 0.021525772040702936, "learning_rate": 3.152231901355532e-06, "loss": 0.0003, "step": 200170 }, { "epoch": 1.3169476918218719, "grad_norm": 0.041822901893162914, "learning_rate": 3.1516984442208165e-06, "loss": 0.0014, "step": 200180 }, { "epoch": 1.3170134799971054, "grad_norm": 0.04597730042732389, "learning_rate": 3.15116501145436e-06, "loss": 0.001, "step": 200190 }, { "epoch": 1.3170792681723387, "grad_norm": 0.07654830351090278, "learning_rate": 3.150631603063197e-06, "loss": 0.0006, "step": 200200 }, { "epoch": 1.317145056347572, "grad_norm": 0.034289920197552566, "learning_rate": 3.1500982190543593e-06, "loss": 0.0004, "step": 200210 }, { "epoch": 1.3172108445228055, "grad_norm": 0.012058689717585636, "learning_rate": 3.1495648594348773e-06, "loss": 0.0003, "step": 200220 }, { "epoch": 1.317276632698039, "grad_norm": 0.04325317006795779, "learning_rate": 3.149031524211785e-06, "loss": 0.0011, "step": 200230 }, { "epoch": 1.3173424208732722, "grad_norm": 0.3266543567299963, "learning_rate": 3.148498213392114e-06, "loss": 0.0011, "step": 200240 }, { "epoch": 1.3174082090485055, "grad_norm": 0.03905949737725363, "learning_rate": 3.1479649269828956e-06, "loss": 0.0005, "step": 200250 }, { "epoch": 1.317473997223739, "grad_norm": 0.025477033191089727, "learning_rate": 3.1474316649911596e-06, "loss": 0.0004, "step": 200260 }, { "epoch": 1.3175397853989723, "grad_norm": 0.0038709707117562096, "learning_rate": 3.146898427423937e-06, "loss": 0.0008, "step": 200270 }, { "epoch": 1.3176055735742058, "grad_norm": 0.00963140512411295, "learning_rate": 3.146365214288257e-06, "loss": 0.0017, "step": 200280 }, { "epoch": 1.317671361749439, "grad_norm": 0.028934694805388225, "learning_rate": 3.1458320255911524e-06, "loss": 0.0007, "step": 200290 }, { "epoch": 1.3177371499246726, "grad_norm": 0.02426456641781308, "learning_rate": 3.1452988613396507e-06, "loss": 0.0005, "step": 200300 }, { "epoch": 1.317802938099906, "grad_norm": 0.004003894675115712, "learning_rate": 3.144765721540781e-06, "loss": 0.001, "step": 200310 }, { "epoch": 1.3178687262751394, "grad_norm": 0.05279402704703559, "learning_rate": 3.1442326062015737e-06, "loss": 0.0008, "step": 200320 }, { "epoch": 1.3179345144503727, "grad_norm": 0.009758273702297, "learning_rate": 3.143699515329057e-06, "loss": 0.0007, "step": 200330 }, { "epoch": 1.318000302625606, "grad_norm": 0.05803179800224077, "learning_rate": 3.1431664489302587e-06, "loss": 0.0015, "step": 200340 }, { "epoch": 1.3180660908008395, "grad_norm": 0.0008270002481585812, "learning_rate": 3.1426334070122065e-06, "loss": 0.0004, "step": 200350 }, { "epoch": 1.318131878976073, "grad_norm": 0.04892689295412437, "learning_rate": 3.142100389581929e-06, "loss": 0.0008, "step": 200360 }, { "epoch": 1.3181976671513063, "grad_norm": 0.0250407046225793, "learning_rate": 3.141567396646454e-06, "loss": 0.0006, "step": 200370 }, { "epoch": 1.3182634553265395, "grad_norm": 0.01942542917091113, "learning_rate": 3.141034428212807e-06, "loss": 0.0012, "step": 200380 }, { "epoch": 1.318329243501773, "grad_norm": 0.05313152732823037, "learning_rate": 3.1405014842880167e-06, "loss": 0.0005, "step": 200390 }, { "epoch": 1.3183950316770063, "grad_norm": 0.048646872422372776, "learning_rate": 3.1399685648791074e-06, "loss": 0.0005, "step": 200400 }, { "epoch": 1.3184608198522398, "grad_norm": 0.05564307459991999, "learning_rate": 3.139435669993105e-06, "loss": 0.0005, "step": 200410 }, { "epoch": 1.3185266080274731, "grad_norm": 0.017777583787193355, "learning_rate": 3.138902799637038e-06, "loss": 0.0005, "step": 200420 }, { "epoch": 1.3185923962027064, "grad_norm": 0.013434577355039508, "learning_rate": 3.1383699538179302e-06, "loss": 0.0003, "step": 200430 }, { "epoch": 1.31865818437794, "grad_norm": 0.020370851020057285, "learning_rate": 3.1378371325428065e-06, "loss": 0.0009, "step": 200440 }, { "epoch": 1.3187239725531734, "grad_norm": 0.06837488206115093, "learning_rate": 3.137304335818692e-06, "loss": 0.0006, "step": 200450 }, { "epoch": 1.3187897607284067, "grad_norm": 0.047322931309522205, "learning_rate": 3.136771563652611e-06, "loss": 0.0005, "step": 200460 }, { "epoch": 1.31885554890364, "grad_norm": 0.00786544031459838, "learning_rate": 3.1362388160515876e-06, "loss": 0.0005, "step": 200470 }, { "epoch": 1.3189213370788735, "grad_norm": 8.940978710724297e-05, "learning_rate": 3.1357060930226457e-06, "loss": 0.0006, "step": 200480 }, { "epoch": 1.3189871252541068, "grad_norm": 0.004534763484793889, "learning_rate": 3.1351733945728093e-06, "loss": 0.0009, "step": 200490 }, { "epoch": 1.3190529134293403, "grad_norm": 0.029522758759224794, "learning_rate": 3.1346407207091008e-06, "loss": 0.0004, "step": 200500 }, { "epoch": 1.3191187016045736, "grad_norm": 0.00183868647337079, "learning_rate": 3.1341080714385436e-06, "loss": 0.0005, "step": 200510 }, { "epoch": 1.3191844897798068, "grad_norm": 0.060125615169327226, "learning_rate": 3.1335754467681602e-06, "loss": 0.0005, "step": 200520 }, { "epoch": 1.3192502779550404, "grad_norm": 0.02023392555526766, "learning_rate": 3.133042846704972e-06, "loss": 0.0003, "step": 200530 }, { "epoch": 1.3193160661302739, "grad_norm": 0.018051042123944268, "learning_rate": 3.1325102712560006e-06, "loss": 0.0004, "step": 200540 }, { "epoch": 1.3193818543055071, "grad_norm": 0.12016057916231257, "learning_rate": 3.13197772042827e-06, "loss": 0.0005, "step": 200550 }, { "epoch": 1.3194476424807404, "grad_norm": 0.11447675984913205, "learning_rate": 3.1314451942287993e-06, "loss": 0.001, "step": 200560 }, { "epoch": 1.319513430655974, "grad_norm": 0.016995686935733927, "learning_rate": 3.1309126926646107e-06, "loss": 0.0005, "step": 200570 }, { "epoch": 1.3195792188312072, "grad_norm": 0.012809496256227052, "learning_rate": 3.1303802157427234e-06, "loss": 0.0005, "step": 200580 }, { "epoch": 1.3196450070064407, "grad_norm": 0.08856829859443234, "learning_rate": 3.1298477634701573e-06, "loss": 0.0009, "step": 200590 }, { "epoch": 1.319710795181674, "grad_norm": 0.03693502987235695, "learning_rate": 3.1293153358539347e-06, "loss": 0.0004, "step": 200600 }, { "epoch": 1.3197765833569075, "grad_norm": 0.059061599789746864, "learning_rate": 3.1287829329010723e-06, "loss": 0.0005, "step": 200610 }, { "epoch": 1.3198423715321408, "grad_norm": 0.0008396768803149025, "learning_rate": 3.1282505546185913e-06, "loss": 0.0008, "step": 200620 }, { "epoch": 1.3199081597073743, "grad_norm": 0.00046644415063157714, "learning_rate": 3.127718201013511e-06, "loss": 0.0009, "step": 200630 }, { "epoch": 1.3199739478826076, "grad_norm": 0.03609108600786295, "learning_rate": 3.1271858720928487e-06, "loss": 0.0003, "step": 200640 }, { "epoch": 1.3200397360578409, "grad_norm": 0.019345640367163516, "learning_rate": 3.1266535678636232e-06, "loss": 0.0008, "step": 200650 }, { "epoch": 1.3201055242330744, "grad_norm": 0.036169593661158995, "learning_rate": 3.1261212883328528e-06, "loss": 0.0006, "step": 200660 }, { "epoch": 1.3201713124083077, "grad_norm": 0.020642456101984185, "learning_rate": 3.125589033507554e-06, "loss": 0.0003, "step": 200670 }, { "epoch": 1.3202371005835412, "grad_norm": 0.033177160844043416, "learning_rate": 3.1250568033947458e-06, "loss": 0.0007, "step": 200680 }, { "epoch": 1.3203028887587744, "grad_norm": 0.017453463627265995, "learning_rate": 3.1245245980014437e-06, "loss": 0.0006, "step": 200690 }, { "epoch": 1.320368676934008, "grad_norm": 0.004991636124741247, "learning_rate": 3.1239924173346657e-06, "loss": 0.0008, "step": 200700 }, { "epoch": 1.3204344651092412, "grad_norm": 0.05433521867880174, "learning_rate": 3.1234602614014275e-06, "loss": 0.0004, "step": 200710 }, { "epoch": 1.3205002532844747, "grad_norm": 0.018794967286893047, "learning_rate": 3.122928130208745e-06, "loss": 0.0009, "step": 200720 }, { "epoch": 1.320566041459708, "grad_norm": 0.0014577210504469927, "learning_rate": 3.1223960237636337e-06, "loss": 0.0006, "step": 200730 }, { "epoch": 1.3206318296349413, "grad_norm": 0.03453534015500797, "learning_rate": 3.121863942073108e-06, "loss": 0.0013, "step": 200740 }, { "epoch": 1.3206976178101748, "grad_norm": 0.07714657705187121, "learning_rate": 3.1213318851441854e-06, "loss": 0.0007, "step": 200750 }, { "epoch": 1.3207634059854083, "grad_norm": 0.003925250965391842, "learning_rate": 3.12079985298388e-06, "loss": 0.001, "step": 200760 }, { "epoch": 1.3208291941606416, "grad_norm": 0.042134927201482465, "learning_rate": 3.120267845599205e-06, "loss": 0.0006, "step": 200770 }, { "epoch": 1.3208949823358749, "grad_norm": 0.000551005596365666, "learning_rate": 3.119735862997175e-06, "loss": 0.0004, "step": 200780 }, { "epoch": 1.3209607705111084, "grad_norm": 0.01721950102579579, "learning_rate": 3.1192039051848033e-06, "loss": 0.001, "step": 200790 }, { "epoch": 1.3210265586863417, "grad_norm": 0.019237846945672453, "learning_rate": 3.1186719721691032e-06, "loss": 0.0008, "step": 200800 }, { "epoch": 1.3210923468615752, "grad_norm": 0.051852968324584685, "learning_rate": 3.1181400639570892e-06, "loss": 0.0008, "step": 200810 }, { "epoch": 1.3211581350368085, "grad_norm": 0.03022882815080782, "learning_rate": 3.1176081805557735e-06, "loss": 0.0007, "step": 200820 }, { "epoch": 1.3212239232120417, "grad_norm": 0.04500043406523825, "learning_rate": 3.117076321972168e-06, "loss": 0.0011, "step": 200830 }, { "epoch": 1.3212897113872752, "grad_norm": 0.06033801669449433, "learning_rate": 3.116544488213285e-06, "loss": 0.0006, "step": 200840 }, { "epoch": 1.3213554995625088, "grad_norm": 0.04878223343436023, "learning_rate": 3.116012679286137e-06, "loss": 0.001, "step": 200850 }, { "epoch": 1.321421287737742, "grad_norm": 0.033510203266674345, "learning_rate": 3.1154808951977334e-06, "loss": 0.0004, "step": 200860 }, { "epoch": 1.3214870759129753, "grad_norm": 0.026889065820166404, "learning_rate": 3.1149491359550865e-06, "loss": 0.0005, "step": 200870 }, { "epoch": 1.3215528640882088, "grad_norm": 0.025107172557196536, "learning_rate": 3.1144174015652074e-06, "loss": 0.0023, "step": 200880 }, { "epoch": 1.321618652263442, "grad_norm": 0.004467681263537065, "learning_rate": 3.1138856920351067e-06, "loss": 0.0012, "step": 200890 }, { "epoch": 1.3216844404386756, "grad_norm": 0.052967909503528664, "learning_rate": 3.1133540073717948e-06, "loss": 0.0011, "step": 200900 }, { "epoch": 1.321750228613909, "grad_norm": 0.017800901592075622, "learning_rate": 3.1128223475822805e-06, "loss": 0.003, "step": 200910 }, { "epoch": 1.3218160167891424, "grad_norm": 0.0015317479535845318, "learning_rate": 3.1122907126735733e-06, "loss": 0.0005, "step": 200920 }, { "epoch": 1.3218818049643757, "grad_norm": 0.04735502511289913, "learning_rate": 3.111759102652682e-06, "loss": 0.0005, "step": 200930 }, { "epoch": 1.3219475931396092, "grad_norm": 0.02622108972089227, "learning_rate": 3.111227517526617e-06, "loss": 0.001, "step": 200940 }, { "epoch": 1.3220133813148425, "grad_norm": 0.07320137122680233, "learning_rate": 3.1106959573023863e-06, "loss": 0.0006, "step": 200950 }, { "epoch": 1.3220791694900758, "grad_norm": 0.008081036336659703, "learning_rate": 3.1101644219869974e-06, "loss": 0.0003, "step": 200960 }, { "epoch": 1.3221449576653093, "grad_norm": 0.0365165021594104, "learning_rate": 3.109632911587459e-06, "loss": 0.0007, "step": 200970 }, { "epoch": 1.3222107458405425, "grad_norm": 0.011201596611937818, "learning_rate": 3.109101426110778e-06, "loss": 0.0007, "step": 200980 }, { "epoch": 1.322276534015776, "grad_norm": 0.018406417846483836, "learning_rate": 3.1085699655639604e-06, "loss": 0.0011, "step": 200990 }, { "epoch": 1.3223423221910093, "grad_norm": 0.004494291816251885, "learning_rate": 3.108038529954014e-06, "loss": 0.0007, "step": 201000 }, { "epoch": 1.3224081103662428, "grad_norm": 0.0016336564126097424, "learning_rate": 3.1075071192879467e-06, "loss": 0.0009, "step": 201010 }, { "epoch": 1.3224738985414761, "grad_norm": 0.017306829096384067, "learning_rate": 3.1069757335727637e-06, "loss": 0.0008, "step": 201020 }, { "epoch": 1.3225396867167096, "grad_norm": 0.02481397272162344, "learning_rate": 3.1064443728154704e-06, "loss": 0.0002, "step": 201030 }, { "epoch": 1.322605474891943, "grad_norm": 0.04067121419766621, "learning_rate": 3.1059130370230727e-06, "loss": 0.0006, "step": 201040 }, { "epoch": 1.3226712630671762, "grad_norm": 0.009111140050670445, "learning_rate": 3.105381726202576e-06, "loss": 0.0004, "step": 201050 }, { "epoch": 1.3227370512424097, "grad_norm": 0.038891945199878784, "learning_rate": 3.1048504403609838e-06, "loss": 0.0003, "step": 201060 }, { "epoch": 1.3228028394176432, "grad_norm": 0.01812434846818925, "learning_rate": 3.104319179505303e-06, "loss": 0.0006, "step": 201070 }, { "epoch": 1.3228686275928765, "grad_norm": 0.02633863619340767, "learning_rate": 3.103787943642536e-06, "loss": 0.0005, "step": 201080 }, { "epoch": 1.3229344157681098, "grad_norm": 0.015033986311073012, "learning_rate": 3.103256732779688e-06, "loss": 0.0007, "step": 201090 }, { "epoch": 1.3230002039433433, "grad_norm": 0.02502418028289593, "learning_rate": 3.1027255469237615e-06, "loss": 0.0007, "step": 201100 }, { "epoch": 1.3230659921185766, "grad_norm": 0.027324976218593935, "learning_rate": 3.1021943860817597e-06, "loss": 0.0003, "step": 201110 }, { "epoch": 1.32313178029381, "grad_norm": 0.05373527241283311, "learning_rate": 3.1016632502606864e-06, "loss": 0.0005, "step": 201120 }, { "epoch": 1.3231975684690434, "grad_norm": 0.03244261453506318, "learning_rate": 3.101132139467542e-06, "loss": 0.0005, "step": 201130 }, { "epoch": 1.3232633566442766, "grad_norm": 0.023375823384642, "learning_rate": 3.1006010537093324e-06, "loss": 0.0006, "step": 201140 }, { "epoch": 1.3233291448195101, "grad_norm": 0.0002463394855783626, "learning_rate": 3.1000699929930565e-06, "loss": 0.001, "step": 201150 }, { "epoch": 1.3233949329947436, "grad_norm": 0.05347553485649703, "learning_rate": 3.099538957325717e-06, "loss": 0.0004, "step": 201160 }, { "epoch": 1.323460721169977, "grad_norm": 0.008055195005517027, "learning_rate": 3.0990079467143157e-06, "loss": 0.0005, "step": 201170 }, { "epoch": 1.3235265093452102, "grad_norm": 0.07282570192431552, "learning_rate": 3.0984769611658527e-06, "loss": 0.0012, "step": 201180 }, { "epoch": 1.3235922975204437, "grad_norm": 0.05760399909143642, "learning_rate": 3.0979460006873274e-06, "loss": 0.001, "step": 201190 }, { "epoch": 1.323658085695677, "grad_norm": 0.005211700311990863, "learning_rate": 3.097415065285744e-06, "loss": 0.001, "step": 201200 }, { "epoch": 1.3237238738709105, "grad_norm": 0.03661341382938256, "learning_rate": 3.0968841549680984e-06, "loss": 0.0006, "step": 201210 }, { "epoch": 1.3237896620461438, "grad_norm": 0.04702731918539228, "learning_rate": 3.0963532697413913e-06, "loss": 0.0006, "step": 201220 }, { "epoch": 1.323855450221377, "grad_norm": 0.0011398645585618788, "learning_rate": 3.095822409612621e-06, "loss": 0.0009, "step": 201230 }, { "epoch": 1.3239212383966106, "grad_norm": 0.03990696949019766, "learning_rate": 3.095291574588789e-06, "loss": 0.0004, "step": 201240 }, { "epoch": 1.323987026571844, "grad_norm": 0.028764092900554896, "learning_rate": 3.094760764676893e-06, "loss": 0.0005, "step": 201250 }, { "epoch": 1.3240528147470774, "grad_norm": 0.014203893076878703, "learning_rate": 3.0942299798839305e-06, "loss": 0.0005, "step": 201260 }, { "epoch": 1.3241186029223107, "grad_norm": 0.035273543256247666, "learning_rate": 3.093699220216899e-06, "loss": 0.0007, "step": 201270 }, { "epoch": 1.3241843910975442, "grad_norm": 0.010608793634359764, "learning_rate": 3.093168485682798e-06, "loss": 0.0003, "step": 201280 }, { "epoch": 1.3242501792727774, "grad_norm": 0.042506362614301976, "learning_rate": 3.0926377762886227e-06, "loss": 0.0009, "step": 201290 }, { "epoch": 1.324315967448011, "grad_norm": 0.012629853143143977, "learning_rate": 3.092107092041371e-06, "loss": 0.0003, "step": 201300 }, { "epoch": 1.3243817556232442, "grad_norm": 0.008216473001448064, "learning_rate": 3.09157643294804e-06, "loss": 0.0002, "step": 201310 }, { "epoch": 1.3244475437984777, "grad_norm": 0.02126566856391434, "learning_rate": 3.091045799015625e-06, "loss": 0.0002, "step": 201320 }, { "epoch": 1.324513331973711, "grad_norm": 0.02297534629526847, "learning_rate": 3.0905151902511234e-06, "loss": 0.0007, "step": 201330 }, { "epoch": 1.3245791201489445, "grad_norm": 0.03719855591924845, "learning_rate": 3.0899846066615297e-06, "loss": 0.0009, "step": 201340 }, { "epoch": 1.3246449083241778, "grad_norm": 0.013058499443657388, "learning_rate": 3.0894540482538392e-06, "loss": 0.0009, "step": 201350 }, { "epoch": 1.324710696499411, "grad_norm": 0.07949487399888173, "learning_rate": 3.088923515035046e-06, "loss": 0.0005, "step": 201360 }, { "epoch": 1.3247764846746446, "grad_norm": 0.02976487958821802, "learning_rate": 3.0883930070121465e-06, "loss": 0.0004, "step": 201370 }, { "epoch": 1.324842272849878, "grad_norm": 0.033121447877916425, "learning_rate": 3.0878625241921343e-06, "loss": 0.0005, "step": 201380 }, { "epoch": 1.3249080610251114, "grad_norm": 0.0002211146383582504, "learning_rate": 3.087332066582004e-06, "loss": 0.0005, "step": 201390 }, { "epoch": 1.3249738492003447, "grad_norm": 0.025704734953516824, "learning_rate": 3.0868016341887473e-06, "loss": 0.0005, "step": 201400 }, { "epoch": 1.3250396373755782, "grad_norm": 0.03475983390456039, "learning_rate": 3.0862712270193597e-06, "loss": 0.0008, "step": 201410 }, { "epoch": 1.3251054255508115, "grad_norm": 0.01789944737860677, "learning_rate": 3.0857408450808326e-06, "loss": 0.001, "step": 201420 }, { "epoch": 1.325171213726045, "grad_norm": 0.011517616492593667, "learning_rate": 3.085210488380158e-06, "loss": 0.0004, "step": 201430 }, { "epoch": 1.3252370019012782, "grad_norm": 0.001821605346172718, "learning_rate": 3.0846801569243314e-06, "loss": 0.0005, "step": 201440 }, { "epoch": 1.3253027900765115, "grad_norm": 0.0016322628991938951, "learning_rate": 3.0841498507203423e-06, "loss": 0.0006, "step": 201450 }, { "epoch": 1.325368578251745, "grad_norm": 0.04419120105261779, "learning_rate": 3.0836195697751827e-06, "loss": 0.0006, "step": 201460 }, { "epoch": 1.3254343664269785, "grad_norm": 0.0028445497335830958, "learning_rate": 3.083089314095844e-06, "loss": 0.0007, "step": 201470 }, { "epoch": 1.3255001546022118, "grad_norm": 0.025437879705762678, "learning_rate": 3.082559083689317e-06, "loss": 0.0008, "step": 201480 }, { "epoch": 1.325565942777445, "grad_norm": 0.011693177063330243, "learning_rate": 3.082028878562591e-06, "loss": 0.0005, "step": 201490 }, { "epoch": 1.3256317309526786, "grad_norm": 0.007011475245679445, "learning_rate": 3.081498698722659e-06, "loss": 0.0006, "step": 201500 }, { "epoch": 1.325697519127912, "grad_norm": 0.06217956837276296, "learning_rate": 3.0809685441765103e-06, "loss": 0.0006, "step": 201510 }, { "epoch": 1.3257633073031454, "grad_norm": 0.007603966958851941, "learning_rate": 3.080438414931134e-06, "loss": 0.0004, "step": 201520 }, { "epoch": 1.3258290954783787, "grad_norm": 0.018154890562631906, "learning_rate": 3.079908310993519e-06, "loss": 0.0005, "step": 201530 }, { "epoch": 1.325894883653612, "grad_norm": 0.11166252215590923, "learning_rate": 3.0793782323706543e-06, "loss": 0.0011, "step": 201540 }, { "epoch": 1.3259606718288455, "grad_norm": 0.006890745076962402, "learning_rate": 3.0788481790695296e-06, "loss": 0.0005, "step": 201550 }, { "epoch": 1.326026460004079, "grad_norm": 0.1093089426858653, "learning_rate": 3.078318151097131e-06, "loss": 0.0009, "step": 201560 }, { "epoch": 1.3260922481793123, "grad_norm": 0.02871141702167357, "learning_rate": 3.077788148460449e-06, "loss": 0.0006, "step": 201570 }, { "epoch": 1.3261580363545455, "grad_norm": 0.03501741723676291, "learning_rate": 3.07725817116647e-06, "loss": 0.0009, "step": 201580 }, { "epoch": 1.326223824529779, "grad_norm": 0.01782188383893737, "learning_rate": 3.0767282192221815e-06, "loss": 0.0004, "step": 201590 }, { "epoch": 1.3262896127050123, "grad_norm": 0.005565567647611107, "learning_rate": 3.076198292634571e-06, "loss": 0.0006, "step": 201600 }, { "epoch": 1.3263554008802458, "grad_norm": 0.0278648776064393, "learning_rate": 3.075668391410623e-06, "loss": 0.0003, "step": 201610 }, { "epoch": 1.3264211890554791, "grad_norm": 0.06447200301672956, "learning_rate": 3.0751385155573253e-06, "loss": 0.0006, "step": 201620 }, { "epoch": 1.3264869772307126, "grad_norm": 0.12089604627720239, "learning_rate": 3.074608665081665e-06, "loss": 0.0008, "step": 201630 }, { "epoch": 1.326552765405946, "grad_norm": 0.05120987176908442, "learning_rate": 3.074078839990626e-06, "loss": 0.0007, "step": 201640 }, { "epoch": 1.3266185535811794, "grad_norm": 0.035865657606993846, "learning_rate": 3.073549040291195e-06, "loss": 0.0016, "step": 201650 }, { "epoch": 1.3266843417564127, "grad_norm": 0.029492844080464858, "learning_rate": 3.0730192659903555e-06, "loss": 0.0003, "step": 201660 }, { "epoch": 1.326750129931646, "grad_norm": 0.013205109240534469, "learning_rate": 3.0724895170950932e-06, "loss": 0.0007, "step": 201670 }, { "epoch": 1.3268159181068795, "grad_norm": 0.0021340349170435517, "learning_rate": 3.0719597936123914e-06, "loss": 0.0004, "step": 201680 }, { "epoch": 1.3268817062821128, "grad_norm": 0.01212376877663185, "learning_rate": 3.071430095549234e-06, "loss": 0.0006, "step": 201690 }, { "epoch": 1.3269474944573463, "grad_norm": 0.014106332123654789, "learning_rate": 3.0709004229126053e-06, "loss": 0.002, "step": 201700 }, { "epoch": 1.3270132826325796, "grad_norm": 0.0852229951133626, "learning_rate": 3.070370775709489e-06, "loss": 0.0012, "step": 201710 }, { "epoch": 1.327079070807813, "grad_norm": 0.08394043188440872, "learning_rate": 3.0698411539468676e-06, "loss": 0.0005, "step": 201720 }, { "epoch": 1.3271448589830463, "grad_norm": 0.02084694497042877, "learning_rate": 3.0693115576317234e-06, "loss": 0.0005, "step": 201730 }, { "epoch": 1.3272106471582799, "grad_norm": 0.13889544488896602, "learning_rate": 3.068781986771039e-06, "loss": 0.0011, "step": 201740 }, { "epoch": 1.3272764353335131, "grad_norm": 0.007587239042550902, "learning_rate": 3.068252441371795e-06, "loss": 0.0007, "step": 201750 }, { "epoch": 1.3273422235087464, "grad_norm": 0.03512173942028089, "learning_rate": 3.0677229214409754e-06, "loss": 0.0009, "step": 201760 }, { "epoch": 1.32740801168398, "grad_norm": 0.001590877330626502, "learning_rate": 3.0671934269855607e-06, "loss": 0.0003, "step": 201770 }, { "epoch": 1.3274737998592134, "grad_norm": 0.02777729565573913, "learning_rate": 3.0666639580125307e-06, "loss": 0.0005, "step": 201780 }, { "epoch": 1.3275395880344467, "grad_norm": 0.031258807991493304, "learning_rate": 3.066134514528867e-06, "loss": 0.0015, "step": 201790 }, { "epoch": 1.32760537620968, "grad_norm": 0.04399652220442622, "learning_rate": 3.0656050965415494e-06, "loss": 0.0005, "step": 201800 }, { "epoch": 1.3276711643849135, "grad_norm": 0.012255066213271413, "learning_rate": 3.0650757040575572e-06, "loss": 0.0014, "step": 201810 }, { "epoch": 1.3277369525601468, "grad_norm": 0.013514848352379566, "learning_rate": 3.0645463370838714e-06, "loss": 0.0012, "step": 201820 }, { "epoch": 1.3278027407353803, "grad_norm": 0.06706638092450086, "learning_rate": 3.0640169956274703e-06, "loss": 0.0009, "step": 201830 }, { "epoch": 1.3278685289106136, "grad_norm": 0.05190069538815441, "learning_rate": 3.0634876796953334e-06, "loss": 0.0007, "step": 201840 }, { "epoch": 1.3279343170858469, "grad_norm": 0.0742900690395471, "learning_rate": 3.062958389294439e-06, "loss": 0.0006, "step": 201850 }, { "epoch": 1.3280001052610804, "grad_norm": 0.15990972161889508, "learning_rate": 3.062429124431765e-06, "loss": 0.001, "step": 201860 }, { "epoch": 1.3280658934363139, "grad_norm": 0.0012002494653484356, "learning_rate": 3.06189988511429e-06, "loss": 0.0008, "step": 201870 }, { "epoch": 1.3281316816115472, "grad_norm": 0.056597775283018056, "learning_rate": 3.06137067134899e-06, "loss": 0.0002, "step": 201880 }, { "epoch": 1.3281974697867804, "grad_norm": 0.005668926300115359, "learning_rate": 3.0608414831428447e-06, "loss": 0.0003, "step": 201890 }, { "epoch": 1.328263257962014, "grad_norm": 0.034887343739566014, "learning_rate": 3.0603123205028296e-06, "loss": 0.0009, "step": 201900 }, { "epoch": 1.3283290461372472, "grad_norm": 0.027262846062717533, "learning_rate": 3.059783183435921e-06, "loss": 0.0004, "step": 201910 }, { "epoch": 1.3283948343124807, "grad_norm": 0.01864730180974748, "learning_rate": 3.0592540719490958e-06, "loss": 0.0002, "step": 201920 }, { "epoch": 1.328460622487714, "grad_norm": 0.02432929483801923, "learning_rate": 3.0587249860493293e-06, "loss": 0.0013, "step": 201930 }, { "epoch": 1.3285264106629475, "grad_norm": 0.002365695146592532, "learning_rate": 3.0581959257435972e-06, "loss": 0.0012, "step": 201940 }, { "epoch": 1.3285921988381808, "grad_norm": 0.029909383822226323, "learning_rate": 3.057666891038874e-06, "loss": 0.0006, "step": 201950 }, { "epoch": 1.3286579870134143, "grad_norm": 0.03947511043230248, "learning_rate": 3.057137881942136e-06, "loss": 0.0006, "step": 201960 }, { "epoch": 1.3287237751886476, "grad_norm": 0.0026611989201283295, "learning_rate": 3.056608898460357e-06, "loss": 0.0009, "step": 201970 }, { "epoch": 1.3287895633638809, "grad_norm": 0.01294657163992825, "learning_rate": 3.0560799406005116e-06, "loss": 0.0013, "step": 201980 }, { "epoch": 1.3288553515391144, "grad_norm": 0.07750257802058212, "learning_rate": 3.0555510083695734e-06, "loss": 0.0004, "step": 201990 }, { "epoch": 1.3289211397143477, "grad_norm": 0.010557607606675947, "learning_rate": 3.055022101774516e-06, "loss": 0.0006, "step": 202000 }, { "epoch": 1.3289869278895812, "grad_norm": 0.018253537514275515, "learning_rate": 3.054493220822311e-06, "loss": 0.0008, "step": 202010 }, { "epoch": 1.3290527160648145, "grad_norm": 0.09555689110263797, "learning_rate": 3.0539643655199335e-06, "loss": 0.0007, "step": 202020 }, { "epoch": 1.329118504240048, "grad_norm": 0.015236075314595966, "learning_rate": 3.053435535874355e-06, "loss": 0.0004, "step": 202030 }, { "epoch": 1.3291842924152812, "grad_norm": 0.02862787119194192, "learning_rate": 3.0529067318925482e-06, "loss": 0.0004, "step": 202040 }, { "epoch": 1.3292500805905147, "grad_norm": 0.027466670337782054, "learning_rate": 3.0523779535814845e-06, "loss": 0.0007, "step": 202050 }, { "epoch": 1.329315868765748, "grad_norm": 0.01429368172791447, "learning_rate": 3.0518492009481347e-06, "loss": 0.0007, "step": 202060 }, { "epoch": 1.3293816569409813, "grad_norm": 0.0587926210331014, "learning_rate": 3.0513204739994717e-06, "loss": 0.0008, "step": 202070 }, { "epoch": 1.3294474451162148, "grad_norm": 0.021392217860344186, "learning_rate": 3.050791772742464e-06, "loss": 0.0009, "step": 202080 }, { "epoch": 1.3295132332914483, "grad_norm": 0.04644922585237134, "learning_rate": 3.050263097184084e-06, "loss": 0.0009, "step": 202090 }, { "epoch": 1.3295790214666816, "grad_norm": 0.020959975407926615, "learning_rate": 3.0497344473313016e-06, "loss": 0.0008, "step": 202100 }, { "epoch": 1.329644809641915, "grad_norm": 0.04040579110200322, "learning_rate": 3.049205823191086e-06, "loss": 0.0004, "step": 202110 }, { "epoch": 1.3297105978171484, "grad_norm": 0.025425507716441094, "learning_rate": 3.0486772247704066e-06, "loss": 0.0002, "step": 202120 }, { "epoch": 1.3297763859923817, "grad_norm": 0.04604325930170299, "learning_rate": 3.0481486520762325e-06, "loss": 0.0011, "step": 202130 }, { "epoch": 1.3298421741676152, "grad_norm": 0.02031454321371174, "learning_rate": 3.0476201051155317e-06, "loss": 0.0004, "step": 202140 }, { "epoch": 1.3299079623428485, "grad_norm": 0.0011177529624355723, "learning_rate": 3.0470915838952754e-06, "loss": 0.0002, "step": 202150 }, { "epoch": 1.3299737505180818, "grad_norm": 0.031151001506564344, "learning_rate": 3.0465630884224294e-06, "loss": 0.0008, "step": 202160 }, { "epoch": 1.3300395386933153, "grad_norm": 0.05435466285978164, "learning_rate": 3.046034618703962e-06, "loss": 0.0005, "step": 202170 }, { "epoch": 1.3301053268685488, "grad_norm": 0.020833727542785, "learning_rate": 3.0455061747468404e-06, "loss": 0.0011, "step": 202180 }, { "epoch": 1.330171115043782, "grad_norm": 0.0038562142142469077, "learning_rate": 3.044977756558032e-06, "loss": 0.0003, "step": 202190 }, { "epoch": 1.3302369032190153, "grad_norm": 0.011957461307544357, "learning_rate": 3.0444493641445038e-06, "loss": 0.0005, "step": 202200 }, { "epoch": 1.3303026913942488, "grad_norm": 0.13818466037518734, "learning_rate": 3.0439209975132207e-06, "loss": 0.0013, "step": 202210 }, { "epoch": 1.3303684795694821, "grad_norm": 0.0013540184251603718, "learning_rate": 3.0433926566711504e-06, "loss": 0.0002, "step": 202220 }, { "epoch": 1.3304342677447156, "grad_norm": 0.00305372795532723, "learning_rate": 3.0428643416252584e-06, "loss": 0.0006, "step": 202230 }, { "epoch": 1.330500055919949, "grad_norm": 0.001467610962486039, "learning_rate": 3.0423360523825095e-06, "loss": 0.0005, "step": 202240 }, { "epoch": 1.3305658440951824, "grad_norm": 0.0009790535504271929, "learning_rate": 3.0418077889498697e-06, "loss": 0.0004, "step": 202250 }, { "epoch": 1.3306316322704157, "grad_norm": 0.029686784033649275, "learning_rate": 3.041279551334302e-06, "loss": 0.001, "step": 202260 }, { "epoch": 1.3306974204456492, "grad_norm": 0.0022084057770682796, "learning_rate": 3.040751339542771e-06, "loss": 0.0005, "step": 202270 }, { "epoch": 1.3307632086208825, "grad_norm": 0.02696028981827204, "learning_rate": 3.0402231535822434e-06, "loss": 0.0011, "step": 202280 }, { "epoch": 1.3308289967961158, "grad_norm": 0.0019405520862924813, "learning_rate": 3.03969499345968e-06, "loss": 0.0004, "step": 202290 }, { "epoch": 1.3308947849713493, "grad_norm": 0.02104490836683596, "learning_rate": 3.0391668591820454e-06, "loss": 0.0006, "step": 202300 }, { "epoch": 1.3309605731465826, "grad_norm": 0.004326362105463326, "learning_rate": 3.0386387507563023e-06, "loss": 0.0005, "step": 202310 }, { "epoch": 1.331026361321816, "grad_norm": 0.03177856560032333, "learning_rate": 3.0381106681894134e-06, "loss": 0.0004, "step": 202320 }, { "epoch": 1.3310921494970493, "grad_norm": 0.024082836415386697, "learning_rate": 3.0375826114883412e-06, "loss": 0.0006, "step": 202330 }, { "epoch": 1.3311579376722829, "grad_norm": 0.0004250797131223185, "learning_rate": 3.037054580660046e-06, "loss": 0.0004, "step": 202340 }, { "epoch": 1.3312237258475161, "grad_norm": 0.019088529922546327, "learning_rate": 3.0365265757114916e-06, "loss": 0.0002, "step": 202350 }, { "epoch": 1.3312895140227496, "grad_norm": 0.059988919343909756, "learning_rate": 3.0359985966496385e-06, "loss": 0.0009, "step": 202360 }, { "epoch": 1.331355302197983, "grad_norm": 0.00047007468150185523, "learning_rate": 3.0354706434814484e-06, "loss": 0.0007, "step": 202370 }, { "epoch": 1.3314210903732162, "grad_norm": 0.2907926555606094, "learning_rate": 3.034942716213881e-06, "loss": 0.0021, "step": 202380 }, { "epoch": 1.3314868785484497, "grad_norm": 0.0144859591522117, "learning_rate": 3.0344148148538967e-06, "loss": 0.0008, "step": 202390 }, { "epoch": 1.3315526667236832, "grad_norm": 0.055072849380966134, "learning_rate": 3.0338869394084537e-06, "loss": 0.0011, "step": 202400 }, { "epoch": 1.3316184548989165, "grad_norm": 0.004514171953519656, "learning_rate": 3.033359089884515e-06, "loss": 0.0006, "step": 202410 }, { "epoch": 1.3316842430741498, "grad_norm": 0.019364036464056794, "learning_rate": 3.032831266289038e-06, "loss": 0.0004, "step": 202420 }, { "epoch": 1.3317500312493833, "grad_norm": 0.007677052515898128, "learning_rate": 3.032303468628982e-06, "loss": 0.0002, "step": 202430 }, { "epoch": 1.3318158194246166, "grad_norm": 0.06043614305317397, "learning_rate": 3.0317756969113057e-06, "loss": 0.0013, "step": 202440 }, { "epoch": 1.33188160759985, "grad_norm": 0.018775919058897778, "learning_rate": 3.031247951142966e-06, "loss": 0.0016, "step": 202450 }, { "epoch": 1.3319473957750834, "grad_norm": 0.049320101283958764, "learning_rate": 3.0307202313309225e-06, "loss": 0.0009, "step": 202460 }, { "epoch": 1.3320131839503166, "grad_norm": 0.06356054925913342, "learning_rate": 3.0301925374821305e-06, "loss": 0.0009, "step": 202470 }, { "epoch": 1.3320789721255502, "grad_norm": 0.032539590669764, "learning_rate": 3.0296648696035503e-06, "loss": 0.0006, "step": 202480 }, { "epoch": 1.3321447603007837, "grad_norm": 0.0422750205126797, "learning_rate": 3.029137227702137e-06, "loss": 0.0007, "step": 202490 }, { "epoch": 1.332210548476017, "grad_norm": 0.0275227971746954, "learning_rate": 3.0286096117848467e-06, "loss": 0.0005, "step": 202500 }, { "epoch": 1.3322763366512502, "grad_norm": 0.014749536997083663, "learning_rate": 3.0280820218586358e-06, "loss": 0.0012, "step": 202510 }, { "epoch": 1.3323421248264837, "grad_norm": 0.06665223621831387, "learning_rate": 3.0275544579304618e-06, "loss": 0.0005, "step": 202520 }, { "epoch": 1.332407913001717, "grad_norm": 0.024583162452731806, "learning_rate": 3.0270269200072767e-06, "loss": 0.0007, "step": 202530 }, { "epoch": 1.3324737011769505, "grad_norm": 0.001065978933348781, "learning_rate": 3.0264994080960384e-06, "loss": 0.0004, "step": 202540 }, { "epoch": 1.3325394893521838, "grad_norm": 0.036537776185098535, "learning_rate": 3.025971922203702e-06, "loss": 0.0008, "step": 202550 }, { "epoch": 1.332605277527417, "grad_norm": 0.0013626214532515481, "learning_rate": 3.0254444623372203e-06, "loss": 0.001, "step": 202560 }, { "epoch": 1.3326710657026506, "grad_norm": 0.07694412483881409, "learning_rate": 3.024917028503548e-06, "loss": 0.0007, "step": 202570 }, { "epoch": 1.332736853877884, "grad_norm": 0.06363292537802684, "learning_rate": 3.024389620709639e-06, "loss": 0.0007, "step": 202580 }, { "epoch": 1.3328026420531174, "grad_norm": 0.019128324512778783, "learning_rate": 3.0238622389624463e-06, "loss": 0.0007, "step": 202590 }, { "epoch": 1.3328684302283507, "grad_norm": 0.01578309145381847, "learning_rate": 3.0233348832689223e-06, "loss": 0.0004, "step": 202600 }, { "epoch": 1.3329342184035842, "grad_norm": 0.027607154829995758, "learning_rate": 3.0228075536360225e-06, "loss": 0.0007, "step": 202610 }, { "epoch": 1.3330000065788175, "grad_norm": 0.021282702980132736, "learning_rate": 3.0222802500706967e-06, "loss": 0.0005, "step": 202620 }, { "epoch": 1.333065794754051, "grad_norm": 0.08529866387221254, "learning_rate": 3.021752972579898e-06, "loss": 0.0011, "step": 202630 }, { "epoch": 1.3331315829292842, "grad_norm": 0.0011668798800569002, "learning_rate": 3.0212257211705776e-06, "loss": 0.0002, "step": 202640 }, { "epoch": 1.3331973711045177, "grad_norm": 0.07652661179292881, "learning_rate": 3.020698495849688e-06, "loss": 0.0014, "step": 202650 }, { "epoch": 1.333263159279751, "grad_norm": 0.06851221995065021, "learning_rate": 3.020171296624177e-06, "loss": 0.0003, "step": 202660 }, { "epoch": 1.3333289474549845, "grad_norm": 0.023592804273529836, "learning_rate": 3.0196441235009987e-06, "loss": 0.0004, "step": 202670 }, { "epoch": 1.3333947356302178, "grad_norm": 0.0023697545775082183, "learning_rate": 3.019116976487103e-06, "loss": 0.0009, "step": 202680 }, { "epoch": 1.333460523805451, "grad_norm": 0.022998394158031444, "learning_rate": 3.0185898555894387e-06, "loss": 0.0004, "step": 202690 }, { "epoch": 1.3335263119806846, "grad_norm": 0.04180613747987974, "learning_rate": 3.0180627608149558e-06, "loss": 0.0007, "step": 202700 }, { "epoch": 1.333592100155918, "grad_norm": 0.07438877200722903, "learning_rate": 3.0175356921706045e-06, "loss": 0.0004, "step": 202710 }, { "epoch": 1.3336578883311514, "grad_norm": 0.1149949838268866, "learning_rate": 3.0170086496633323e-06, "loss": 0.001, "step": 202720 }, { "epoch": 1.3337236765063847, "grad_norm": 0.02811646220912526, "learning_rate": 3.0164816333000873e-06, "loss": 0.0011, "step": 202730 }, { "epoch": 1.3337894646816182, "grad_norm": 0.03278787437845237, "learning_rate": 3.0159546430878207e-06, "loss": 0.0006, "step": 202740 }, { "epoch": 1.3338552528568515, "grad_norm": 0.021015374002076884, "learning_rate": 3.015427679033478e-06, "loss": 0.0004, "step": 202750 }, { "epoch": 1.333921041032085, "grad_norm": 0.04367806858840639, "learning_rate": 3.014900741144007e-06, "loss": 0.0007, "step": 202760 }, { "epoch": 1.3339868292073183, "grad_norm": 0.05904556420333716, "learning_rate": 3.014373829426356e-06, "loss": 0.0011, "step": 202770 }, { "epoch": 1.3340526173825515, "grad_norm": 0.010604719302496521, "learning_rate": 3.0138469438874707e-06, "loss": 0.0009, "step": 202780 }, { "epoch": 1.334118405557785, "grad_norm": 0.033059811809483905, "learning_rate": 3.013320084534297e-06, "loss": 0.0008, "step": 202790 }, { "epoch": 1.3341841937330186, "grad_norm": 0.21446988278965057, "learning_rate": 3.0127932513737833e-06, "loss": 0.0019, "step": 202800 }, { "epoch": 1.3342499819082518, "grad_norm": 0.029758832266281964, "learning_rate": 3.0122664444128745e-06, "loss": 0.0007, "step": 202810 }, { "epoch": 1.3343157700834851, "grad_norm": 0.02234905760973733, "learning_rate": 3.011739663658516e-06, "loss": 0.0008, "step": 202820 }, { "epoch": 1.3343815582587186, "grad_norm": 9.092183684557293e-05, "learning_rate": 3.0112129091176522e-06, "loss": 0.001, "step": 202830 }, { "epoch": 1.334447346433952, "grad_norm": 0.038294401156572525, "learning_rate": 3.010686180797229e-06, "loss": 0.0018, "step": 202840 }, { "epoch": 1.3345131346091854, "grad_norm": 0.058989884131042364, "learning_rate": 3.010159478704191e-06, "loss": 0.0006, "step": 202850 }, { "epoch": 1.3345789227844187, "grad_norm": 0.09178456515061062, "learning_rate": 3.00963280284548e-06, "loss": 0.0008, "step": 202860 }, { "epoch": 1.334644710959652, "grad_norm": 0.01994708070184208, "learning_rate": 3.0091061532280423e-06, "loss": 0.0006, "step": 202870 }, { "epoch": 1.3347104991348855, "grad_norm": 0.007841331196723594, "learning_rate": 3.008579529858821e-06, "loss": 0.0007, "step": 202880 }, { "epoch": 1.334776287310119, "grad_norm": 0.0017671770073224744, "learning_rate": 3.008052932744759e-06, "loss": 0.0006, "step": 202890 }, { "epoch": 1.3348420754853523, "grad_norm": 0.026730493247140236, "learning_rate": 3.007526361892798e-06, "loss": 0.0007, "step": 202900 }, { "epoch": 1.3349078636605856, "grad_norm": 0.04323644339914938, "learning_rate": 3.0069998173098813e-06, "loss": 0.0005, "step": 202910 }, { "epoch": 1.334973651835819, "grad_norm": 0.11639725379053915, "learning_rate": 3.00647329900295e-06, "loss": 0.0006, "step": 202920 }, { "epoch": 1.3350394400110523, "grad_norm": 0.014046255874725511, "learning_rate": 3.005946806978948e-06, "loss": 0.0003, "step": 202930 }, { "epoch": 1.3351052281862859, "grad_norm": 0.09388996100513369, "learning_rate": 3.005420341244814e-06, "loss": 0.0014, "step": 202940 }, { "epoch": 1.3351710163615191, "grad_norm": 0.012900913512991398, "learning_rate": 3.004893901807491e-06, "loss": 0.0005, "step": 202950 }, { "epoch": 1.3352368045367526, "grad_norm": 0.03463909054780307, "learning_rate": 3.0043674886739183e-06, "loss": 0.0005, "step": 202960 }, { "epoch": 1.335302592711986, "grad_norm": 0.013285980146890397, "learning_rate": 3.003841101851037e-06, "loss": 0.0001, "step": 202970 }, { "epoch": 1.3353683808872194, "grad_norm": 0.042175844434344696, "learning_rate": 3.003314741345787e-06, "loss": 0.0003, "step": 202980 }, { "epoch": 1.3354341690624527, "grad_norm": 0.014922112919408273, "learning_rate": 3.002788407165106e-06, "loss": 0.0005, "step": 202990 }, { "epoch": 1.335499957237686, "grad_norm": 0.009260400008261437, "learning_rate": 3.002262099315937e-06, "loss": 0.0005, "step": 203000 }, { "epoch": 1.3355657454129195, "grad_norm": 0.0022103260428593388, "learning_rate": 3.001735817805216e-06, "loss": 0.0016, "step": 203010 }, { "epoch": 1.3356315335881528, "grad_norm": 0.01756891792672894, "learning_rate": 3.001209562639883e-06, "loss": 0.0004, "step": 203020 }, { "epoch": 1.3356973217633863, "grad_norm": 0.10283117680854392, "learning_rate": 3.0006833338268747e-06, "loss": 0.0005, "step": 203030 }, { "epoch": 1.3357631099386196, "grad_norm": 0.0305818778858152, "learning_rate": 3.000157131373131e-06, "loss": 0.0003, "step": 203040 }, { "epoch": 1.335828898113853, "grad_norm": 0.026017918039157206, "learning_rate": 2.9996309552855867e-06, "loss": 0.0003, "step": 203050 }, { "epoch": 1.3358946862890864, "grad_norm": 0.03186093042919023, "learning_rate": 2.999104805571182e-06, "loss": 0.0003, "step": 203060 }, { "epoch": 1.3359604744643199, "grad_norm": 0.001788005896211726, "learning_rate": 2.9985786822368524e-06, "loss": 0.0007, "step": 203070 }, { "epoch": 1.3360262626395532, "grad_norm": 0.053537379071485106, "learning_rate": 2.9980525852895336e-06, "loss": 0.0006, "step": 203080 }, { "epoch": 1.3360920508147864, "grad_norm": 0.012338494137936917, "learning_rate": 2.9975265147361643e-06, "loss": 0.0005, "step": 203090 }, { "epoch": 1.33615783899002, "grad_norm": 0.056145507040915144, "learning_rate": 2.997000470583677e-06, "loss": 0.001, "step": 203100 }, { "epoch": 1.3362236271652534, "grad_norm": 0.0003882697270193303, "learning_rate": 2.9964744528390087e-06, "loss": 0.0002, "step": 203110 }, { "epoch": 1.3362894153404867, "grad_norm": 0.08577323753123697, "learning_rate": 2.995948461509094e-06, "loss": 0.0005, "step": 203120 }, { "epoch": 1.33635520351572, "grad_norm": 0.005915644897899155, "learning_rate": 2.9954224966008687e-06, "loss": 0.001, "step": 203130 }, { "epoch": 1.3364209916909535, "grad_norm": 5.817825578911085e-05, "learning_rate": 2.9948965581212662e-06, "loss": 0.0004, "step": 203140 }, { "epoch": 1.3364867798661868, "grad_norm": 0.05146775685042596, "learning_rate": 2.9943706460772203e-06, "loss": 0.0007, "step": 203150 }, { "epoch": 1.3365525680414203, "grad_norm": 0.03569611073358588, "learning_rate": 2.9938447604756655e-06, "loss": 0.0001, "step": 203160 }, { "epoch": 1.3366183562166536, "grad_norm": 0.1391598010898497, "learning_rate": 2.9933189013235353e-06, "loss": 0.0007, "step": 203170 }, { "epoch": 1.3366841443918869, "grad_norm": 0.020704709134708274, "learning_rate": 2.992793068627762e-06, "loss": 0.0004, "step": 203180 }, { "epoch": 1.3367499325671204, "grad_norm": 0.011193390489663946, "learning_rate": 2.992267262395279e-06, "loss": 0.0018, "step": 203190 }, { "epoch": 1.3368157207423539, "grad_norm": 0.02469302895524664, "learning_rate": 2.9917414826330182e-06, "loss": 0.0004, "step": 203200 }, { "epoch": 1.3368815089175872, "grad_norm": 0.0350622320961163, "learning_rate": 2.991215729347911e-06, "loss": 0.0007, "step": 203210 }, { "epoch": 1.3369472970928205, "grad_norm": 0.0006727313605247581, "learning_rate": 2.990690002546889e-06, "loss": 0.0008, "step": 203220 }, { "epoch": 1.337013085268054, "grad_norm": 0.009175744968986705, "learning_rate": 2.9901643022368843e-06, "loss": 0.0004, "step": 203230 }, { "epoch": 1.3370788734432872, "grad_norm": 0.013008041171033642, "learning_rate": 2.989638628424828e-06, "loss": 0.0004, "step": 203240 }, { "epoch": 1.3371446616185207, "grad_norm": 0.03585109483619535, "learning_rate": 2.98911298111765e-06, "loss": 0.0004, "step": 203250 }, { "epoch": 1.337210449793754, "grad_norm": 0.03812271349175553, "learning_rate": 2.9885873603222807e-06, "loss": 0.0013, "step": 203260 }, { "epoch": 1.3372762379689875, "grad_norm": 0.058645125657490825, "learning_rate": 2.9880617660456502e-06, "loss": 0.0011, "step": 203270 }, { "epoch": 1.3373420261442208, "grad_norm": 0.048132357465893456, "learning_rate": 2.987536198294686e-06, "loss": 0.0006, "step": 203280 }, { "epoch": 1.3374078143194543, "grad_norm": 0.008129932246331623, "learning_rate": 2.98701065707632e-06, "loss": 0.0003, "step": 203290 }, { "epoch": 1.3374736024946876, "grad_norm": 0.032917300964872526, "learning_rate": 2.9864851423974806e-06, "loss": 0.0007, "step": 203300 }, { "epoch": 1.337539390669921, "grad_norm": 0.016773065122873562, "learning_rate": 2.9859596542650953e-06, "loss": 0.0009, "step": 203310 }, { "epoch": 1.3376051788451544, "grad_norm": 0.0801386331648191, "learning_rate": 2.985434192686093e-06, "loss": 0.0014, "step": 203320 }, { "epoch": 1.3376709670203877, "grad_norm": 0.008423827295086126, "learning_rate": 2.9849087576674e-06, "loss": 0.0008, "step": 203330 }, { "epoch": 1.3377367551956212, "grad_norm": 0.014094489817852057, "learning_rate": 2.9843833492159454e-06, "loss": 0.0006, "step": 203340 }, { "epoch": 1.3378025433708545, "grad_norm": 0.0169493243995552, "learning_rate": 2.9838579673386544e-06, "loss": 0.0006, "step": 203350 }, { "epoch": 1.337868331546088, "grad_norm": 0.04986717957299288, "learning_rate": 2.9833326120424554e-06, "loss": 0.0006, "step": 203360 }, { "epoch": 1.3379341197213213, "grad_norm": 0.0031286594922261815, "learning_rate": 2.9828072833342747e-06, "loss": 0.0007, "step": 203370 }, { "epoch": 1.3379999078965548, "grad_norm": 0.011132691537934534, "learning_rate": 2.9822819812210375e-06, "loss": 0.0002, "step": 203380 }, { "epoch": 1.338065696071788, "grad_norm": 0.002345083854419173, "learning_rate": 2.9817567057096696e-06, "loss": 0.0009, "step": 203390 }, { "epoch": 1.3381314842470213, "grad_norm": 0.023217602535485812, "learning_rate": 2.981231456807097e-06, "loss": 0.0002, "step": 203400 }, { "epoch": 1.3381972724222548, "grad_norm": 0.02533692692929636, "learning_rate": 2.9807062345202443e-06, "loss": 0.0009, "step": 203410 }, { "epoch": 1.3382630605974883, "grad_norm": 0.013587434323220062, "learning_rate": 2.9801810388560343e-06, "loss": 0.0011, "step": 203420 }, { "epoch": 1.3383288487727216, "grad_norm": 0.00401828564765984, "learning_rate": 2.979655869821394e-06, "loss": 0.0004, "step": 203430 }, { "epoch": 1.338394636947955, "grad_norm": 0.03394318525270762, "learning_rate": 2.979130727423246e-06, "loss": 0.0006, "step": 203440 }, { "epoch": 1.3384604251231884, "grad_norm": 0.03314309593424435, "learning_rate": 2.9786056116685147e-06, "loss": 0.0009, "step": 203450 }, { "epoch": 1.3385262132984217, "grad_norm": 0.020699208747266547, "learning_rate": 2.978080522564122e-06, "loss": 0.001, "step": 203460 }, { "epoch": 1.3385920014736552, "grad_norm": 0.00024296551719374146, "learning_rate": 2.9775554601169916e-06, "loss": 0.0005, "step": 203470 }, { "epoch": 1.3386577896488885, "grad_norm": 0.01963426558272174, "learning_rate": 2.9770304243340443e-06, "loss": 0.0004, "step": 203480 }, { "epoch": 1.3387235778241218, "grad_norm": 0.03815513561609523, "learning_rate": 2.976505415222205e-06, "loss": 0.0005, "step": 203490 }, { "epoch": 1.3387893659993553, "grad_norm": 0.11414269608131818, "learning_rate": 2.9759804327883947e-06, "loss": 0.0006, "step": 203500 }, { "epoch": 1.3388551541745888, "grad_norm": 0.025167501852793307, "learning_rate": 2.975455477039534e-06, "loss": 0.001, "step": 203510 }, { "epoch": 1.338920942349822, "grad_norm": 0.0024054753152628113, "learning_rate": 2.9749305479825443e-06, "loss": 0.0004, "step": 203520 }, { "epoch": 1.3389867305250553, "grad_norm": 0.04138502690172966, "learning_rate": 2.9744056456243466e-06, "loss": 0.0011, "step": 203530 }, { "epoch": 1.3390525187002889, "grad_norm": 0.06886105066572772, "learning_rate": 2.9738807699718607e-06, "loss": 0.0012, "step": 203540 }, { "epoch": 1.3391183068755221, "grad_norm": 0.003512313085669715, "learning_rate": 2.9733559210320063e-06, "loss": 0.0005, "step": 203550 }, { "epoch": 1.3391840950507556, "grad_norm": 0.042464700441543046, "learning_rate": 2.9728310988117037e-06, "loss": 0.0005, "step": 203560 }, { "epoch": 1.339249883225989, "grad_norm": 0.018035187537817427, "learning_rate": 2.9723063033178733e-06, "loss": 0.0005, "step": 203570 }, { "epoch": 1.3393156714012222, "grad_norm": 0.023278326090606514, "learning_rate": 2.971781534557433e-06, "loss": 0.0006, "step": 203580 }, { "epoch": 1.3393814595764557, "grad_norm": 0.08815248241887068, "learning_rate": 2.9712567925373016e-06, "loss": 0.0013, "step": 203590 }, { "epoch": 1.3394472477516892, "grad_norm": 0.0003520897594849193, "learning_rate": 2.9707320772643967e-06, "loss": 0.0005, "step": 203600 }, { "epoch": 1.3395130359269225, "grad_norm": 0.02987702497999376, "learning_rate": 2.9702073887456364e-06, "loss": 0.0004, "step": 203610 }, { "epoch": 1.3395788241021558, "grad_norm": 0.051170422320653025, "learning_rate": 2.969682726987939e-06, "loss": 0.0009, "step": 203620 }, { "epoch": 1.3396446122773893, "grad_norm": 0.03946357547782198, "learning_rate": 2.9691580919982216e-06, "loss": 0.0008, "step": 203630 }, { "epoch": 1.3397104004526226, "grad_norm": 0.03080482545594603, "learning_rate": 2.9686334837834007e-06, "loss": 0.0002, "step": 203640 }, { "epoch": 1.339776188627856, "grad_norm": 0.08252797949803166, "learning_rate": 2.9681089023503933e-06, "loss": 0.0009, "step": 203650 }, { "epoch": 1.3398419768030894, "grad_norm": 0.024569975108747262, "learning_rate": 2.967584347706115e-06, "loss": 0.0003, "step": 203660 }, { "epoch": 1.3399077649783229, "grad_norm": 0.009169291912639951, "learning_rate": 2.967059819857482e-06, "loss": 0.0009, "step": 203670 }, { "epoch": 1.3399735531535562, "grad_norm": 0.029132144842919618, "learning_rate": 2.9665353188114075e-06, "loss": 0.0009, "step": 203680 }, { "epoch": 1.3400393413287897, "grad_norm": 0.02347575474322044, "learning_rate": 2.9660108445748105e-06, "loss": 0.0004, "step": 203690 }, { "epoch": 1.340105129504023, "grad_norm": 0.02033618306002581, "learning_rate": 2.9654863971546038e-06, "loss": 0.0019, "step": 203700 }, { "epoch": 1.3401709176792562, "grad_norm": 0.034525498200860955, "learning_rate": 2.964961976557702e-06, "loss": 0.0004, "step": 203710 }, { "epoch": 1.3402367058544897, "grad_norm": 0.03694641353607296, "learning_rate": 2.9644375827910188e-06, "loss": 0.0005, "step": 203720 }, { "epoch": 1.3403024940297232, "grad_norm": 0.015830488875050116, "learning_rate": 2.9639132158614675e-06, "loss": 0.0002, "step": 203730 }, { "epoch": 1.3403682822049565, "grad_norm": 0.05382983769087207, "learning_rate": 2.963388875775961e-06, "loss": 0.0014, "step": 203740 }, { "epoch": 1.3404340703801898, "grad_norm": 0.02113590695787101, "learning_rate": 2.9628645625414154e-06, "loss": 0.0009, "step": 203750 }, { "epoch": 1.3404998585554233, "grad_norm": 0.18263825619195298, "learning_rate": 2.9623402761647403e-06, "loss": 0.0008, "step": 203760 }, { "epoch": 1.3405656467306566, "grad_norm": 0.10845910225064362, "learning_rate": 2.9618160166528488e-06, "loss": 0.0007, "step": 203770 }, { "epoch": 1.34063143490589, "grad_norm": 0.04404221795716998, "learning_rate": 2.961291784012653e-06, "loss": 0.0007, "step": 203780 }, { "epoch": 1.3406972230811234, "grad_norm": 0.0063499255375242996, "learning_rate": 2.9607675782510635e-06, "loss": 0.0006, "step": 203790 }, { "epoch": 1.3407630112563567, "grad_norm": 0.02649504276990509, "learning_rate": 2.9602433993749934e-06, "loss": 0.0008, "step": 203800 }, { "epoch": 1.3408287994315902, "grad_norm": 0.03148209176244509, "learning_rate": 2.959719247391351e-06, "loss": 0.0007, "step": 203810 }, { "epoch": 1.3408945876068237, "grad_norm": 0.035291588864137316, "learning_rate": 2.9591951223070493e-06, "loss": 0.0004, "step": 203820 }, { "epoch": 1.340960375782057, "grad_norm": 0.008408209074493692, "learning_rate": 2.9586710241289973e-06, "loss": 0.0009, "step": 203830 }, { "epoch": 1.3410261639572902, "grad_norm": 0.035034614807609735, "learning_rate": 2.958146952864105e-06, "loss": 0.0011, "step": 203840 }, { "epoch": 1.3410919521325237, "grad_norm": 0.0594741721355199, "learning_rate": 2.9576229085192815e-06, "loss": 0.0005, "step": 203850 }, { "epoch": 1.341157740307757, "grad_norm": 0.04059175310593486, "learning_rate": 2.957098891101436e-06, "loss": 0.0014, "step": 203860 }, { "epoch": 1.3412235284829905, "grad_norm": 0.06588428126448494, "learning_rate": 2.956574900617476e-06, "loss": 0.0008, "step": 203870 }, { "epoch": 1.3412893166582238, "grad_norm": 0.05136049248845707, "learning_rate": 2.956050937074313e-06, "loss": 0.0006, "step": 203880 }, { "epoch": 1.341355104833457, "grad_norm": 0.010784134064759685, "learning_rate": 2.9555270004788523e-06, "loss": 0.0004, "step": 203890 }, { "epoch": 1.3414208930086906, "grad_norm": 0.07529540763567479, "learning_rate": 2.9550030908380024e-06, "loss": 0.0025, "step": 203900 }, { "epoch": 1.3414866811839241, "grad_norm": 0.0011907890889466526, "learning_rate": 2.954479208158671e-06, "loss": 0.0012, "step": 203910 }, { "epoch": 1.3415524693591574, "grad_norm": 0.02744539354681926, "learning_rate": 2.9539553524477643e-06, "loss": 0.001, "step": 203920 }, { "epoch": 1.3416182575343907, "grad_norm": 0.019565196316827897, "learning_rate": 2.953431523712189e-06, "loss": 0.0008, "step": 203930 }, { "epoch": 1.3416840457096242, "grad_norm": 0.052199607241974325, "learning_rate": 2.9529077219588507e-06, "loss": 0.0011, "step": 203940 }, { "epoch": 1.3417498338848575, "grad_norm": 0.029192760221765805, "learning_rate": 2.952383947194658e-06, "loss": 0.0003, "step": 203950 }, { "epoch": 1.341815622060091, "grad_norm": 0.026050479631059293, "learning_rate": 2.951860199426514e-06, "loss": 0.0003, "step": 203960 }, { "epoch": 1.3418814102353243, "grad_norm": 0.04992890760083513, "learning_rate": 2.951336478661324e-06, "loss": 0.0007, "step": 203970 }, { "epoch": 1.3419471984105578, "grad_norm": 0.03855601843119865, "learning_rate": 2.950812784905993e-06, "loss": 0.0003, "step": 203980 }, { "epoch": 1.342012986585791, "grad_norm": 0.029705801613891133, "learning_rate": 2.950289118167426e-06, "loss": 0.0008, "step": 203990 }, { "epoch": 1.3420787747610246, "grad_norm": 0.031764310524292655, "learning_rate": 2.9497654784525253e-06, "loss": 0.0006, "step": 204000 }, { "epoch": 1.3421445629362578, "grad_norm": 0.005813186788646669, "learning_rate": 2.9492418657681977e-06, "loss": 0.0008, "step": 204010 }, { "epoch": 1.3422103511114911, "grad_norm": 0.008132885322130697, "learning_rate": 2.9487182801213447e-06, "loss": 0.0006, "step": 204020 }, { "epoch": 1.3422761392867246, "grad_norm": 0.01404351022935616, "learning_rate": 2.94819472151887e-06, "loss": 0.0004, "step": 204030 }, { "epoch": 1.342341927461958, "grad_norm": 0.02182626597663074, "learning_rate": 2.947671189967675e-06, "loss": 0.0004, "step": 204040 }, { "epoch": 1.3424077156371914, "grad_norm": 0.075585487135284, "learning_rate": 2.9471476854746635e-06, "loss": 0.0006, "step": 204050 }, { "epoch": 1.3424735038124247, "grad_norm": 0.005045552893667307, "learning_rate": 2.9466242080467368e-06, "loss": 0.0005, "step": 204060 }, { "epoch": 1.3425392919876582, "grad_norm": 0.03127295465096946, "learning_rate": 2.9461007576907953e-06, "loss": 0.0006, "step": 204070 }, { "epoch": 1.3426050801628915, "grad_norm": 0.02292382847540863, "learning_rate": 2.945577334413742e-06, "loss": 0.0006, "step": 204080 }, { "epoch": 1.342670868338125, "grad_norm": 0.045994931115952195, "learning_rate": 2.945053938222478e-06, "loss": 0.0008, "step": 204090 }, { "epoch": 1.3427366565133583, "grad_norm": 0.03076151731214129, "learning_rate": 2.9445305691239025e-06, "loss": 0.0004, "step": 204100 }, { "epoch": 1.3428024446885916, "grad_norm": 0.05967172756630313, "learning_rate": 2.9440072271249164e-06, "loss": 0.0008, "step": 204110 }, { "epoch": 1.342868232863825, "grad_norm": 0.11460194765955403, "learning_rate": 2.94348391223242e-06, "loss": 0.0005, "step": 204120 }, { "epoch": 1.3429340210390586, "grad_norm": 0.013178811661018404, "learning_rate": 2.9429606244533104e-06, "loss": 0.0011, "step": 204130 }, { "epoch": 1.3429998092142919, "grad_norm": 0.00346168129493319, "learning_rate": 2.942437363794489e-06, "loss": 0.0003, "step": 204140 }, { "epoch": 1.3430655973895251, "grad_norm": 0.004655671258978292, "learning_rate": 2.9419141302628546e-06, "loss": 0.0003, "step": 204150 }, { "epoch": 1.3431313855647586, "grad_norm": 0.01687228361879727, "learning_rate": 2.941390923865305e-06, "loss": 0.0003, "step": 204160 }, { "epoch": 1.343197173739992, "grad_norm": 0.02830023364722681, "learning_rate": 2.9408677446087376e-06, "loss": 0.0006, "step": 204170 }, { "epoch": 1.3432629619152254, "grad_norm": 0.05112138380644965, "learning_rate": 2.9403445925000506e-06, "loss": 0.0012, "step": 204180 }, { "epoch": 1.3433287500904587, "grad_norm": 0.027013064327664007, "learning_rate": 2.9398214675461417e-06, "loss": 0.0005, "step": 204190 }, { "epoch": 1.343394538265692, "grad_norm": 0.028236376988165373, "learning_rate": 2.939298369753906e-06, "loss": 0.0005, "step": 204200 }, { "epoch": 1.3434603264409255, "grad_norm": 0.02837659640837405, "learning_rate": 2.938775299130243e-06, "loss": 0.0006, "step": 204210 }, { "epoch": 1.343526114616159, "grad_norm": 0.03391137623110731, "learning_rate": 2.938252255682048e-06, "loss": 0.0008, "step": 204220 }, { "epoch": 1.3435919027913923, "grad_norm": 0.0074746350318877115, "learning_rate": 2.9377292394162156e-06, "loss": 0.001, "step": 204230 }, { "epoch": 1.3436576909666256, "grad_norm": 0.03587352253775981, "learning_rate": 2.937206250339642e-06, "loss": 0.0004, "step": 204240 }, { "epoch": 1.343723479141859, "grad_norm": 0.03770599981409245, "learning_rate": 2.936683288459223e-06, "loss": 0.0007, "step": 204250 }, { "epoch": 1.3437892673170924, "grad_norm": 0.007253720934955341, "learning_rate": 2.9361603537818512e-06, "loss": 0.001, "step": 204260 }, { "epoch": 1.3438550554923259, "grad_norm": 0.01571228939935924, "learning_rate": 2.9356374463144246e-06, "loss": 0.0006, "step": 204270 }, { "epoch": 1.3439208436675592, "grad_norm": 0.017900150241399543, "learning_rate": 2.9351145660638343e-06, "loss": 0.0006, "step": 204280 }, { "epoch": 1.3439866318427927, "grad_norm": 0.0015604637230649358, "learning_rate": 2.9345917130369757e-06, "loss": 0.0009, "step": 204290 }, { "epoch": 1.344052420018026, "grad_norm": 0.029831761309251956, "learning_rate": 2.9340688872407415e-06, "loss": 0.0013, "step": 204300 }, { "epoch": 1.3441182081932594, "grad_norm": 0.04827112748543276, "learning_rate": 2.9335460886820244e-06, "loss": 0.0006, "step": 204310 }, { "epoch": 1.3441839963684927, "grad_norm": 0.007070838623209156, "learning_rate": 2.933023317367718e-06, "loss": 0.0004, "step": 204320 }, { "epoch": 1.344249784543726, "grad_norm": 0.003616024577011417, "learning_rate": 2.932500573304713e-06, "loss": 0.0002, "step": 204330 }, { "epoch": 1.3443155727189595, "grad_norm": 0.03315063014000542, "learning_rate": 2.9319778564999025e-06, "loss": 0.0011, "step": 204340 }, { "epoch": 1.3443813608941928, "grad_norm": 0.05539195450510522, "learning_rate": 2.9314551669601788e-06, "loss": 0.0004, "step": 204350 }, { "epoch": 1.3444471490694263, "grad_norm": 0.0076413584403644975, "learning_rate": 2.930932504692432e-06, "loss": 0.0004, "step": 204360 }, { "epoch": 1.3445129372446596, "grad_norm": 0.0543849586472276, "learning_rate": 2.930409869703553e-06, "loss": 0.0006, "step": 204370 }, { "epoch": 1.344578725419893, "grad_norm": 0.05672702983359699, "learning_rate": 2.9298872620004323e-06, "loss": 0.0005, "step": 204380 }, { "epoch": 1.3446445135951264, "grad_norm": 0.03151759340307555, "learning_rate": 2.929364681589959e-06, "loss": 0.0004, "step": 204390 }, { "epoch": 1.3447103017703599, "grad_norm": 0.018324858755285812, "learning_rate": 2.9288421284790257e-06, "loss": 0.0005, "step": 204400 }, { "epoch": 1.3447760899455932, "grad_norm": 0.05701071754762199, "learning_rate": 2.9283196026745196e-06, "loss": 0.0006, "step": 204410 }, { "epoch": 1.3448418781208265, "grad_norm": 0.04599320684599268, "learning_rate": 2.9277971041833308e-06, "loss": 0.0007, "step": 204420 }, { "epoch": 1.34490766629606, "grad_norm": 0.05534925071624757, "learning_rate": 2.9272746330123473e-06, "loss": 0.0004, "step": 204430 }, { "epoch": 1.3449734544712935, "grad_norm": 0.01876272853847335, "learning_rate": 2.926752189168458e-06, "loss": 0.0006, "step": 204440 }, { "epoch": 1.3450392426465267, "grad_norm": 0.060488841219342455, "learning_rate": 2.92622977265855e-06, "loss": 0.0005, "step": 204450 }, { "epoch": 1.34510503082176, "grad_norm": 0.07669547200960534, "learning_rate": 2.9257073834895105e-06, "loss": 0.0005, "step": 204460 }, { "epoch": 1.3451708189969935, "grad_norm": 0.08236840649382517, "learning_rate": 2.9251850216682287e-06, "loss": 0.0006, "step": 204470 }, { "epoch": 1.3452366071722268, "grad_norm": 0.050302105976626044, "learning_rate": 2.9246626872015904e-06, "loss": 0.0008, "step": 204480 }, { "epoch": 1.3453023953474603, "grad_norm": 0.04458795396102054, "learning_rate": 2.924140380096482e-06, "loss": 0.0004, "step": 204490 }, { "epoch": 1.3453681835226936, "grad_norm": 0.04615402185877441, "learning_rate": 2.9236181003597907e-06, "loss": 0.0007, "step": 204500 }, { "epoch": 1.345433971697927, "grad_norm": 0.044014212624905256, "learning_rate": 2.9230958479984006e-06, "loss": 0.0008, "step": 204510 }, { "epoch": 1.3454997598731604, "grad_norm": 0.011369937887361202, "learning_rate": 2.9225736230191975e-06, "loss": 0.0004, "step": 204520 }, { "epoch": 1.345565548048394, "grad_norm": 0.027516864766336385, "learning_rate": 2.9220514254290677e-06, "loss": 0.0006, "step": 204530 }, { "epoch": 1.3456313362236272, "grad_norm": 0.0006158203323148314, "learning_rate": 2.9215292552348956e-06, "loss": 0.0003, "step": 204540 }, { "epoch": 1.3456971243988605, "grad_norm": 0.021707167863963282, "learning_rate": 2.9210071124435654e-06, "loss": 0.0005, "step": 204550 }, { "epoch": 1.345762912574094, "grad_norm": 0.062159208289335445, "learning_rate": 2.92048499706196e-06, "loss": 0.0009, "step": 204560 }, { "epoch": 1.3458287007493273, "grad_norm": 0.0020238853557424685, "learning_rate": 2.9199629090969645e-06, "loss": 0.0008, "step": 204570 }, { "epoch": 1.3458944889245608, "grad_norm": 0.0638844923205418, "learning_rate": 2.9194408485554617e-06, "loss": 0.0005, "step": 204580 }, { "epoch": 1.345960277099794, "grad_norm": 0.05061296575761702, "learning_rate": 2.918918815444334e-06, "loss": 0.0006, "step": 204590 }, { "epoch": 1.3460260652750273, "grad_norm": 0.006491308446514599, "learning_rate": 2.9183968097704646e-06, "loss": 0.0003, "step": 204600 }, { "epoch": 1.3460918534502608, "grad_norm": 0.0162773090903484, "learning_rate": 2.9178748315407356e-06, "loss": 0.0006, "step": 204610 }, { "epoch": 1.3461576416254943, "grad_norm": 0.09556319607474248, "learning_rate": 2.917352880762027e-06, "loss": 0.0008, "step": 204620 }, { "epoch": 1.3462234298007276, "grad_norm": 0.02176640432377795, "learning_rate": 2.916830957441223e-06, "loss": 0.0004, "step": 204630 }, { "epoch": 1.346289217975961, "grad_norm": 0.003294047738804582, "learning_rate": 2.916309061585204e-06, "loss": 0.0003, "step": 204640 }, { "epoch": 1.3463550061511944, "grad_norm": 0.04510820764635111, "learning_rate": 2.9157871932008507e-06, "loss": 0.0007, "step": 204650 }, { "epoch": 1.3464207943264277, "grad_norm": 0.0485112513201799, "learning_rate": 2.9152653522950436e-06, "loss": 0.0004, "step": 204660 }, { "epoch": 1.3464865825016612, "grad_norm": 0.03225574620761628, "learning_rate": 2.9147435388746615e-06, "loss": 0.0014, "step": 204670 }, { "epoch": 1.3465523706768945, "grad_norm": 0.09571194939168265, "learning_rate": 2.9142217529465857e-06, "loss": 0.0006, "step": 204680 }, { "epoch": 1.346618158852128, "grad_norm": 0.001551279303001337, "learning_rate": 2.9136999945176938e-06, "loss": 0.0009, "step": 204690 }, { "epoch": 1.3466839470273613, "grad_norm": 0.02412942871227645, "learning_rate": 2.9131782635948665e-06, "loss": 0.0007, "step": 204700 }, { "epoch": 1.3467497352025948, "grad_norm": 0.03083088702516466, "learning_rate": 2.912656560184981e-06, "loss": 0.0003, "step": 204710 }, { "epoch": 1.346815523377828, "grad_norm": 0.012547891039971351, "learning_rate": 2.912134884294916e-06, "loss": 0.0004, "step": 204720 }, { "epoch": 1.3468813115530613, "grad_norm": 0.028148005164521418, "learning_rate": 2.9116132359315495e-06, "loss": 0.0004, "step": 204730 }, { "epoch": 1.3469470997282948, "grad_norm": 0.01466167614552146, "learning_rate": 2.911091615101759e-06, "loss": 0.0014, "step": 204740 }, { "epoch": 1.3470128879035284, "grad_norm": 0.012934927135124312, "learning_rate": 2.910570021812419e-06, "loss": 0.0009, "step": 204750 }, { "epoch": 1.3470786760787616, "grad_norm": 0.008474521338646806, "learning_rate": 2.910048456070411e-06, "loss": 0.0008, "step": 204760 }, { "epoch": 1.347144464253995, "grad_norm": 0.01010264133898033, "learning_rate": 2.909526917882609e-06, "loss": 0.0004, "step": 204770 }, { "epoch": 1.3472102524292284, "grad_norm": 0.03528281284408002, "learning_rate": 2.9090054072558893e-06, "loss": 0.0007, "step": 204780 }, { "epoch": 1.3472760406044617, "grad_norm": 0.051928652697169744, "learning_rate": 2.9084839241971273e-06, "loss": 0.0007, "step": 204790 }, { "epoch": 1.3473418287796952, "grad_norm": 0.032649357859896144, "learning_rate": 2.907962468713198e-06, "loss": 0.0008, "step": 204800 }, { "epoch": 1.3474076169549285, "grad_norm": 0.01816923384463465, "learning_rate": 2.907441040810978e-06, "loss": 0.0006, "step": 204810 }, { "epoch": 1.3474734051301618, "grad_norm": 0.008221803865078791, "learning_rate": 2.9069196404973394e-06, "loss": 0.0005, "step": 204820 }, { "epoch": 1.3475391933053953, "grad_norm": 0.030475234762323077, "learning_rate": 2.906398267779158e-06, "loss": 0.0007, "step": 204830 }, { "epoch": 1.3476049814806288, "grad_norm": 0.00015088585923464244, "learning_rate": 2.9058769226633076e-06, "loss": 0.0003, "step": 204840 }, { "epoch": 1.347670769655862, "grad_norm": 0.021629748128387506, "learning_rate": 2.9053556051566612e-06, "loss": 0.0004, "step": 204850 }, { "epoch": 1.3477365578310954, "grad_norm": 0.014439683935799343, "learning_rate": 2.9048343152660918e-06, "loss": 0.0003, "step": 204860 }, { "epoch": 1.3478023460063289, "grad_norm": 0.04298112697617363, "learning_rate": 2.9043130529984725e-06, "loss": 0.0011, "step": 204870 }, { "epoch": 1.3478681341815621, "grad_norm": 0.0673960103097239, "learning_rate": 2.903791818360674e-06, "loss": 0.001, "step": 204880 }, { "epoch": 1.3479339223567957, "grad_norm": 0.05803819919526017, "learning_rate": 2.9032706113595714e-06, "loss": 0.0015, "step": 204890 }, { "epoch": 1.347999710532029, "grad_norm": 0.005609815230847007, "learning_rate": 2.9027494320020353e-06, "loss": 0.0007, "step": 204900 }, { "epoch": 1.3480654987072622, "grad_norm": 0.0448478184894264, "learning_rate": 2.9022282802949365e-06, "loss": 0.0003, "step": 204910 }, { "epoch": 1.3481312868824957, "grad_norm": 0.00041217619832838973, "learning_rate": 2.901707156245146e-06, "loss": 0.0002, "step": 204920 }, { "epoch": 1.3481970750577292, "grad_norm": 0.0018948948594336111, "learning_rate": 2.901186059859535e-06, "loss": 0.0006, "step": 204930 }, { "epoch": 1.3482628632329625, "grad_norm": 0.0031339074739207707, "learning_rate": 2.9006649911449724e-06, "loss": 0.0009, "step": 204940 }, { "epoch": 1.3483286514081958, "grad_norm": 0.0035872326709433014, "learning_rate": 2.9001439501083295e-06, "loss": 0.0007, "step": 204950 }, { "epoch": 1.3483944395834293, "grad_norm": 0.020469179070655638, "learning_rate": 2.8996229367564745e-06, "loss": 0.0006, "step": 204960 }, { "epoch": 1.3484602277586626, "grad_norm": 0.02035108446095663, "learning_rate": 2.8991019510962775e-06, "loss": 0.0013, "step": 204970 }, { "epoch": 1.348526015933896, "grad_norm": 0.0025475709705629537, "learning_rate": 2.8985809931346063e-06, "loss": 0.0007, "step": 204980 }, { "epoch": 1.3485918041091294, "grad_norm": 0.01964373730495235, "learning_rate": 2.898060062878331e-06, "loss": 0.0005, "step": 204990 }, { "epoch": 1.3486575922843629, "grad_norm": 0.0005852637226640806, "learning_rate": 2.897539160334317e-06, "loss": 0.0005, "step": 205000 }, { "epoch": 1.3487233804595962, "grad_norm": 0.006299107316912378, "learning_rate": 2.897018285509434e-06, "loss": 0.0008, "step": 205010 }, { "epoch": 1.3487891686348297, "grad_norm": 0.002529296353504138, "learning_rate": 2.8964974384105483e-06, "loss": 0.0004, "step": 205020 }, { "epoch": 1.348854956810063, "grad_norm": 0.0805310918308432, "learning_rate": 2.8959766190445276e-06, "loss": 0.001, "step": 205030 }, { "epoch": 1.3489207449852962, "grad_norm": 0.006062281164069447, "learning_rate": 2.8954558274182378e-06, "loss": 0.0005, "step": 205040 }, { "epoch": 1.3489865331605297, "grad_norm": 0.00848892544003633, "learning_rate": 2.8949350635385455e-06, "loss": 0.001, "step": 205050 }, { "epoch": 1.349052321335763, "grad_norm": 0.035298913689976216, "learning_rate": 2.894414327412316e-06, "loss": 0.0006, "step": 205060 }, { "epoch": 1.3491181095109965, "grad_norm": 0.0034673736583135627, "learning_rate": 2.893893619046415e-06, "loss": 0.0009, "step": 205070 }, { "epoch": 1.3491838976862298, "grad_norm": 0.05033662099167875, "learning_rate": 2.893372938447708e-06, "loss": 0.0008, "step": 205080 }, { "epoch": 1.3492496858614633, "grad_norm": 0.02770900285677816, "learning_rate": 2.89285228562306e-06, "loss": 0.0003, "step": 205090 }, { "epoch": 1.3493154740366966, "grad_norm": 0.10186052390687149, "learning_rate": 2.892331660579334e-06, "loss": 0.0005, "step": 205100 }, { "epoch": 1.34938126221193, "grad_norm": 0.00041054019991994, "learning_rate": 2.891811063323393e-06, "loss": 0.001, "step": 205110 }, { "epoch": 1.3494470503871634, "grad_norm": 0.08801948474019396, "learning_rate": 2.8912904938621045e-06, "loss": 0.0015, "step": 205120 }, { "epoch": 1.3495128385623967, "grad_norm": 0.025250787522947505, "learning_rate": 2.8907699522023293e-06, "loss": 0.0003, "step": 205130 }, { "epoch": 1.3495786267376302, "grad_norm": 0.029082335272216386, "learning_rate": 2.8902494383509307e-06, "loss": 0.0009, "step": 205140 }, { "epoch": 1.3496444149128637, "grad_norm": 0.020251250522792443, "learning_rate": 2.889728952314771e-06, "loss": 0.0007, "step": 205150 }, { "epoch": 1.349710203088097, "grad_norm": 0.017282805221719155, "learning_rate": 2.8892084941007125e-06, "loss": 0.0013, "step": 205160 }, { "epoch": 1.3497759912633303, "grad_norm": 0.03951603757527124, "learning_rate": 2.8886880637156175e-06, "loss": 0.0008, "step": 205170 }, { "epoch": 1.3498417794385638, "grad_norm": 0.0034364504246070363, "learning_rate": 2.8881676611663467e-06, "loss": 0.0005, "step": 205180 }, { "epoch": 1.349907567613797, "grad_norm": 0.0004640376391093707, "learning_rate": 2.887647286459761e-06, "loss": 0.0006, "step": 205190 }, { "epoch": 1.3499733557890305, "grad_norm": 0.019732923880997663, "learning_rate": 2.887126939602722e-06, "loss": 0.0008, "step": 205200 }, { "epoch": 1.3500391439642638, "grad_norm": 0.0104100660788442, "learning_rate": 2.886606620602089e-06, "loss": 0.0005, "step": 205210 }, { "epoch": 1.3501049321394971, "grad_norm": 0.10062911970329366, "learning_rate": 2.8860863294647234e-06, "loss": 0.0007, "step": 205220 }, { "epoch": 1.3501707203147306, "grad_norm": 0.008576701053535866, "learning_rate": 2.8855660661974827e-06, "loss": 0.0003, "step": 205230 }, { "epoch": 1.3502365084899641, "grad_norm": 0.0010275202335637335, "learning_rate": 2.885045830807226e-06, "loss": 0.0002, "step": 205240 }, { "epoch": 1.3503022966651974, "grad_norm": 0.019255371708502028, "learning_rate": 2.8845256233008152e-06, "loss": 0.0006, "step": 205250 }, { "epoch": 1.3503680848404307, "grad_norm": 0.028121032252085353, "learning_rate": 2.884005443685107e-06, "loss": 0.001, "step": 205260 }, { "epoch": 1.3504338730156642, "grad_norm": 0.06400817656789312, "learning_rate": 2.88348529196696e-06, "loss": 0.0009, "step": 205270 }, { "epoch": 1.3504996611908975, "grad_norm": 0.0462247941337689, "learning_rate": 2.8829651681532304e-06, "loss": 0.0008, "step": 205280 }, { "epoch": 1.350565449366131, "grad_norm": 0.025365765492657486, "learning_rate": 2.882445072250778e-06, "loss": 0.0004, "step": 205290 }, { "epoch": 1.3506312375413643, "grad_norm": 0.07548113159589166, "learning_rate": 2.881925004266457e-06, "loss": 0.0005, "step": 205300 }, { "epoch": 1.3506970257165978, "grad_norm": 0.019744865087817958, "learning_rate": 2.881404964207127e-06, "loss": 0.0002, "step": 205310 }, { "epoch": 1.350762813891831, "grad_norm": 0.01885777436158932, "learning_rate": 2.880884952079642e-06, "loss": 0.0004, "step": 205320 }, { "epoch": 1.3508286020670646, "grad_norm": 0.017912452344466045, "learning_rate": 2.8803649678908585e-06, "loss": 0.0004, "step": 205330 }, { "epoch": 1.3508943902422978, "grad_norm": 0.07614800530887211, "learning_rate": 2.8798450116476328e-06, "loss": 0.0008, "step": 205340 }, { "epoch": 1.3509601784175311, "grad_norm": 0.08560089572958933, "learning_rate": 2.87932508335682e-06, "loss": 0.0008, "step": 205350 }, { "epoch": 1.3510259665927646, "grad_norm": 0.028848227880182126, "learning_rate": 2.8788051830252735e-06, "loss": 0.0004, "step": 205360 }, { "epoch": 1.351091754767998, "grad_norm": 0.10888923030191412, "learning_rate": 2.878285310659847e-06, "loss": 0.0008, "step": 205370 }, { "epoch": 1.3511575429432314, "grad_norm": 0.02641236090896636, "learning_rate": 2.8777654662673982e-06, "loss": 0.0002, "step": 205380 }, { "epoch": 1.3512233311184647, "grad_norm": 0.005773422179450137, "learning_rate": 2.8772456498547787e-06, "loss": 0.0007, "step": 205390 }, { "epoch": 1.3512891192936982, "grad_norm": 0.008023688836880056, "learning_rate": 2.876725861428842e-06, "loss": 0.0013, "step": 205400 }, { "epoch": 1.3513549074689315, "grad_norm": 0.06393438022369075, "learning_rate": 2.8762061009964413e-06, "loss": 0.0018, "step": 205410 }, { "epoch": 1.351420695644165, "grad_norm": 0.05167428371554993, "learning_rate": 2.875686368564429e-06, "loss": 0.0005, "step": 205420 }, { "epoch": 1.3514864838193983, "grad_norm": 0.03256709166133566, "learning_rate": 2.875166664139657e-06, "loss": 0.0002, "step": 205430 }, { "epoch": 1.3515522719946316, "grad_norm": 0.04192908368686485, "learning_rate": 2.874646987728977e-06, "loss": 0.0011, "step": 205440 }, { "epoch": 1.351618060169865, "grad_norm": 0.005713026076888693, "learning_rate": 2.874127339339242e-06, "loss": 0.0006, "step": 205450 }, { "epoch": 1.3516838483450986, "grad_norm": 0.02347797109455213, "learning_rate": 2.8736077189773017e-06, "loss": 0.0009, "step": 205460 }, { "epoch": 1.3517496365203319, "grad_norm": 0.026834514272049175, "learning_rate": 2.8730881266500067e-06, "loss": 0.0004, "step": 205470 }, { "epoch": 1.3518154246955651, "grad_norm": 0.009942164697071975, "learning_rate": 2.8725685623642076e-06, "loss": 0.0002, "step": 205480 }, { "epoch": 1.3518812128707987, "grad_norm": 0.022247156096985757, "learning_rate": 2.8720490261267555e-06, "loss": 0.0004, "step": 205490 }, { "epoch": 1.351947001046032, "grad_norm": 0.0011716808505954181, "learning_rate": 2.8715295179444957e-06, "loss": 0.0012, "step": 205500 }, { "epoch": 1.3520127892212654, "grad_norm": 0.04697570017782532, "learning_rate": 2.8710100378242846e-06, "loss": 0.0011, "step": 205510 }, { "epoch": 1.3520785773964987, "grad_norm": 0.03910846521215813, "learning_rate": 2.8704905857729666e-06, "loss": 0.0017, "step": 205520 }, { "epoch": 1.352144365571732, "grad_norm": 0.05291744551066627, "learning_rate": 2.8699711617973913e-06, "loss": 0.0009, "step": 205530 }, { "epoch": 1.3522101537469655, "grad_norm": 0.0003986488436040194, "learning_rate": 2.8694517659044064e-06, "loss": 0.0007, "step": 205540 }, { "epoch": 1.352275941922199, "grad_norm": 0.04781293555226042, "learning_rate": 2.8689323981008598e-06, "loss": 0.0004, "step": 205550 }, { "epoch": 1.3523417300974323, "grad_norm": 0.10324682532891123, "learning_rate": 2.8684130583935993e-06, "loss": 0.0008, "step": 205560 }, { "epoch": 1.3524075182726656, "grad_norm": 0.03238012055405711, "learning_rate": 2.867893746789472e-06, "loss": 0.001, "step": 205570 }, { "epoch": 1.352473306447899, "grad_norm": 0.02228943572222637, "learning_rate": 2.8673744632953236e-06, "loss": 0.0007, "step": 205580 }, { "epoch": 1.3525390946231324, "grad_norm": 0.015221816716404294, "learning_rate": 2.866855207918001e-06, "loss": 0.0011, "step": 205590 }, { "epoch": 1.3526048827983659, "grad_norm": 0.008824864561759866, "learning_rate": 2.866335980664351e-06, "loss": 0.0003, "step": 205600 }, { "epoch": 1.3526706709735992, "grad_norm": 0.00010146062045640532, "learning_rate": 2.8658167815412176e-06, "loss": 0.0006, "step": 205610 }, { "epoch": 1.3527364591488327, "grad_norm": 0.001536901418745849, "learning_rate": 2.865297610555447e-06, "loss": 0.0024, "step": 205620 }, { "epoch": 1.352802247324066, "grad_norm": 0.026316516088863033, "learning_rate": 2.864778467713881e-06, "loss": 0.0008, "step": 205630 }, { "epoch": 1.3528680354992995, "grad_norm": 0.020542360136141274, "learning_rate": 2.8642593530233697e-06, "loss": 0.0007, "step": 205640 }, { "epoch": 1.3529338236745327, "grad_norm": 0.05206641410128585, "learning_rate": 2.8637402664907543e-06, "loss": 0.0013, "step": 205650 }, { "epoch": 1.352999611849766, "grad_norm": 0.005778770025236226, "learning_rate": 2.8632212081228785e-06, "loss": 0.0006, "step": 205660 }, { "epoch": 1.3530654000249995, "grad_norm": 0.007046240232141328, "learning_rate": 2.8627021779265853e-06, "loss": 0.0003, "step": 205670 }, { "epoch": 1.3531311882002328, "grad_norm": 0.013059102585647145, "learning_rate": 2.862183175908718e-06, "loss": 0.0007, "step": 205680 }, { "epoch": 1.3531969763754663, "grad_norm": 0.0031617686440090827, "learning_rate": 2.86166420207612e-06, "loss": 0.0006, "step": 205690 }, { "epoch": 1.3532627645506996, "grad_norm": 0.008819651654943417, "learning_rate": 2.8611452564356314e-06, "loss": 0.0005, "step": 205700 }, { "epoch": 1.353328552725933, "grad_norm": 0.03054473500700656, "learning_rate": 2.860626338994096e-06, "loss": 0.0011, "step": 205710 }, { "epoch": 1.3533943409011664, "grad_norm": 0.025616849376031113, "learning_rate": 2.8601074497583547e-06, "loss": 0.0012, "step": 205720 }, { "epoch": 1.3534601290764, "grad_norm": 0.033871833598753225, "learning_rate": 2.859588588735248e-06, "loss": 0.0005, "step": 205730 }, { "epoch": 1.3535259172516332, "grad_norm": 0.03793642145533934, "learning_rate": 2.8590697559316185e-06, "loss": 0.0011, "step": 205740 }, { "epoch": 1.3535917054268665, "grad_norm": 0.06357870665675817, "learning_rate": 2.8585509513543042e-06, "loss": 0.0019, "step": 205750 }, { "epoch": 1.3536574936021, "grad_norm": 0.004281672746795532, "learning_rate": 2.8580321750101436e-06, "loss": 0.0004, "step": 205760 }, { "epoch": 1.3537232817773335, "grad_norm": 0.0334826654700664, "learning_rate": 2.857513426905982e-06, "loss": 0.0003, "step": 205770 }, { "epoch": 1.3537890699525668, "grad_norm": 0.014511779015357855, "learning_rate": 2.8569947070486547e-06, "loss": 0.0006, "step": 205780 }, { "epoch": 1.3538548581278, "grad_norm": 0.006090924002073468, "learning_rate": 2.8564760154450023e-06, "loss": 0.0006, "step": 205790 }, { "epoch": 1.3539206463030335, "grad_norm": 0.04222434637401347, "learning_rate": 2.8559573521018614e-06, "loss": 0.0006, "step": 205800 }, { "epoch": 1.3539864344782668, "grad_norm": 0.1438945481559732, "learning_rate": 2.855438717026071e-06, "loss": 0.0008, "step": 205810 }, { "epoch": 1.3540522226535003, "grad_norm": 0.06325665451101979, "learning_rate": 2.85492011022447e-06, "loss": 0.0007, "step": 205820 }, { "epoch": 1.3541180108287336, "grad_norm": 0.02547822072000379, "learning_rate": 2.8544015317038944e-06, "loss": 0.0005, "step": 205830 }, { "epoch": 1.354183799003967, "grad_norm": 0.05147952915596495, "learning_rate": 2.853882981471181e-06, "loss": 0.001, "step": 205840 }, { "epoch": 1.3542495871792004, "grad_norm": 0.07979854052900252, "learning_rate": 2.853364459533167e-06, "loss": 0.0014, "step": 205850 }, { "epoch": 1.354315375354434, "grad_norm": 0.020102443374337694, "learning_rate": 2.8528459658966888e-06, "loss": 0.0006, "step": 205860 }, { "epoch": 1.3543811635296672, "grad_norm": 0.011895120746070216, "learning_rate": 2.8523275005685823e-06, "loss": 0.0006, "step": 205870 }, { "epoch": 1.3544469517049005, "grad_norm": 0.0186178777564782, "learning_rate": 2.8518090635556827e-06, "loss": 0.0003, "step": 205880 }, { "epoch": 1.354512739880134, "grad_norm": 0.03696881802524922, "learning_rate": 2.851290654864824e-06, "loss": 0.0006, "step": 205890 }, { "epoch": 1.3545785280553673, "grad_norm": 0.030423080756609898, "learning_rate": 2.850772274502843e-06, "loss": 0.0003, "step": 205900 }, { "epoch": 1.3546443162306008, "grad_norm": 0.0449546243729807, "learning_rate": 2.8502539224765746e-06, "loss": 0.0007, "step": 205910 }, { "epoch": 1.354710104405834, "grad_norm": 0.04571276580511875, "learning_rate": 2.849735598792851e-06, "loss": 0.0009, "step": 205920 }, { "epoch": 1.3547758925810673, "grad_norm": 0.014664515965009941, "learning_rate": 2.8492173034585065e-06, "loss": 0.0004, "step": 205930 }, { "epoch": 1.3548416807563008, "grad_norm": 0.009245955220416852, "learning_rate": 2.8486990364803747e-06, "loss": 0.0004, "step": 205940 }, { "epoch": 1.3549074689315344, "grad_norm": 0.0507875453969735, "learning_rate": 2.8481807978652876e-06, "loss": 0.0004, "step": 205950 }, { "epoch": 1.3549732571067676, "grad_norm": 0.03818928750101434, "learning_rate": 2.8476625876200783e-06, "loss": 0.0004, "step": 205960 }, { "epoch": 1.355039045282001, "grad_norm": 0.008580850873954644, "learning_rate": 2.8471444057515785e-06, "loss": 0.0004, "step": 205970 }, { "epoch": 1.3551048334572344, "grad_norm": 0.016563830062924993, "learning_rate": 2.846626252266621e-06, "loss": 0.0005, "step": 205980 }, { "epoch": 1.3551706216324677, "grad_norm": 0.05370740334703277, "learning_rate": 2.8461081271720363e-06, "loss": 0.0014, "step": 205990 }, { "epoch": 1.3552364098077012, "grad_norm": 0.015135148033245545, "learning_rate": 2.845590030474656e-06, "loss": 0.0006, "step": 206000 }, { "epoch": 1.3553021979829345, "grad_norm": 0.0621772239867786, "learning_rate": 2.8450719621813106e-06, "loss": 0.0014, "step": 206010 }, { "epoch": 1.355367986158168, "grad_norm": 0.07943992728003949, "learning_rate": 2.844553922298828e-06, "loss": 0.0006, "step": 206020 }, { "epoch": 1.3554337743334013, "grad_norm": 0.06882439779511457, "learning_rate": 2.8440359108340422e-06, "loss": 0.0006, "step": 206030 }, { "epoch": 1.3554995625086348, "grad_norm": 0.031455300679902425, "learning_rate": 2.8435179277937807e-06, "loss": 0.0005, "step": 206040 }, { "epoch": 1.355565350683868, "grad_norm": 0.027663024349281598, "learning_rate": 2.8429999731848733e-06, "loss": 0.0009, "step": 206050 }, { "epoch": 1.3556311388591014, "grad_norm": 0.014775438585705404, "learning_rate": 2.8424820470141485e-06, "loss": 0.0006, "step": 206060 }, { "epoch": 1.3556969270343349, "grad_norm": 0.008134787930954929, "learning_rate": 2.841964149288434e-06, "loss": 0.0007, "step": 206070 }, { "epoch": 1.3557627152095684, "grad_norm": 0.016240270598088403, "learning_rate": 2.8414462800145594e-06, "loss": 0.0006, "step": 206080 }, { "epoch": 1.3558285033848017, "grad_norm": 0.022267812805271882, "learning_rate": 2.84092843919935e-06, "loss": 0.0003, "step": 206090 }, { "epoch": 1.355894291560035, "grad_norm": 0.02043704416784322, "learning_rate": 2.8404106268496356e-06, "loss": 0.0007, "step": 206100 }, { "epoch": 1.3559600797352684, "grad_norm": 0.02935319201431916, "learning_rate": 2.839892842972241e-06, "loss": 0.0003, "step": 206110 }, { "epoch": 1.3560258679105017, "grad_norm": 0.001584076877337612, "learning_rate": 2.8393750875739947e-06, "loss": 0.0007, "step": 206120 }, { "epoch": 1.3560916560857352, "grad_norm": 0.03591648160931335, "learning_rate": 2.8388573606617207e-06, "loss": 0.0006, "step": 206130 }, { "epoch": 1.3561574442609685, "grad_norm": 0.09887896591417426, "learning_rate": 2.8383396622422464e-06, "loss": 0.0009, "step": 206140 }, { "epoch": 1.3562232324362018, "grad_norm": 0.0038719859074201985, "learning_rate": 2.837821992322395e-06, "loss": 0.0004, "step": 206150 }, { "epoch": 1.3562890206114353, "grad_norm": 0.07278933824978169, "learning_rate": 2.837304350908995e-06, "loss": 0.0013, "step": 206160 }, { "epoch": 1.3563548087866688, "grad_norm": 0.006796316621306845, "learning_rate": 2.8367867380088693e-06, "loss": 0.0003, "step": 206170 }, { "epoch": 1.356420596961902, "grad_norm": 0.007612137948092972, "learning_rate": 2.8362691536288424e-06, "loss": 0.0009, "step": 206180 }, { "epoch": 1.3564863851371354, "grad_norm": 0.024368792534936155, "learning_rate": 2.835751597775738e-06, "loss": 0.0006, "step": 206190 }, { "epoch": 1.3565521733123689, "grad_norm": 0.0027190826236416824, "learning_rate": 2.835234070456379e-06, "loss": 0.0005, "step": 206200 }, { "epoch": 1.3566179614876022, "grad_norm": 0.07631241287535144, "learning_rate": 2.8347165716775893e-06, "loss": 0.0011, "step": 206210 }, { "epoch": 1.3566837496628357, "grad_norm": 0.004447590372053495, "learning_rate": 2.8341991014461923e-06, "loss": 0.0006, "step": 206220 }, { "epoch": 1.356749537838069, "grad_norm": 0.019860614360133026, "learning_rate": 2.833681659769009e-06, "loss": 0.0006, "step": 206230 }, { "epoch": 1.3568153260133022, "grad_norm": 0.042468672773474235, "learning_rate": 2.833164246652862e-06, "loss": 0.001, "step": 206240 }, { "epoch": 1.3568811141885357, "grad_norm": 0.030577589217426208, "learning_rate": 2.832646862104573e-06, "loss": 0.0005, "step": 206250 }, { "epoch": 1.3569469023637692, "grad_norm": 0.0057274557293103216, "learning_rate": 2.832129506130964e-06, "loss": 0.0005, "step": 206260 }, { "epoch": 1.3570126905390025, "grad_norm": 0.012725960075033509, "learning_rate": 2.831612178738855e-06, "loss": 0.0004, "step": 206270 }, { "epoch": 1.3570784787142358, "grad_norm": 0.02196244238936824, "learning_rate": 2.831094879935064e-06, "loss": 0.0008, "step": 206280 }, { "epoch": 1.3571442668894693, "grad_norm": 0.008620188770491055, "learning_rate": 2.830577609726416e-06, "loss": 0.0004, "step": 206290 }, { "epoch": 1.3572100550647026, "grad_norm": 0.00040226040482316977, "learning_rate": 2.830060368119729e-06, "loss": 0.0007, "step": 206300 }, { "epoch": 1.357275843239936, "grad_norm": 0.01325217218672505, "learning_rate": 2.829543155121822e-06, "loss": 0.0008, "step": 206310 }, { "epoch": 1.3573416314151694, "grad_norm": 0.0034440950500572065, "learning_rate": 2.8290259707395135e-06, "loss": 0.0002, "step": 206320 }, { "epoch": 1.357407419590403, "grad_norm": 0.011599564420172506, "learning_rate": 2.8285088149796225e-06, "loss": 0.0005, "step": 206330 }, { "epoch": 1.3574732077656362, "grad_norm": 0.0015707787766143949, "learning_rate": 2.827991687848968e-06, "loss": 0.0008, "step": 206340 }, { "epoch": 1.3575389959408697, "grad_norm": 0.09233049375432224, "learning_rate": 2.8274745893543665e-06, "loss": 0.0005, "step": 206350 }, { "epoch": 1.357604784116103, "grad_norm": 0.0007655268541507513, "learning_rate": 2.8269575195026365e-06, "loss": 0.0004, "step": 206360 }, { "epoch": 1.3576705722913363, "grad_norm": 0.07845911441339166, "learning_rate": 2.8264404783005957e-06, "loss": 0.0014, "step": 206370 }, { "epoch": 1.3577363604665698, "grad_norm": 0.006852150813668037, "learning_rate": 2.825923465755059e-06, "loss": 0.0005, "step": 206380 }, { "epoch": 1.357802148641803, "grad_norm": 0.003385865465544414, "learning_rate": 2.825406481872844e-06, "loss": 0.0006, "step": 206390 }, { "epoch": 1.3578679368170365, "grad_norm": 0.006123387862557627, "learning_rate": 2.8248895266607663e-06, "loss": 0.0004, "step": 206400 }, { "epoch": 1.3579337249922698, "grad_norm": 0.01694588132569888, "learning_rate": 2.82437260012564e-06, "loss": 0.0005, "step": 206410 }, { "epoch": 1.3579995131675033, "grad_norm": 0.064102881957228, "learning_rate": 2.8238557022742835e-06, "loss": 0.0004, "step": 206420 }, { "epoch": 1.3580653013427366, "grad_norm": 0.09525118052612877, "learning_rate": 2.8233388331135103e-06, "loss": 0.0007, "step": 206430 }, { "epoch": 1.3581310895179701, "grad_norm": 0.03921637516629978, "learning_rate": 2.822821992650135e-06, "loss": 0.0006, "step": 206440 }, { "epoch": 1.3581968776932034, "grad_norm": 0.02680858362248534, "learning_rate": 2.822305180890971e-06, "loss": 0.0012, "step": 206450 }, { "epoch": 1.3582626658684367, "grad_norm": 0.11518986948744933, "learning_rate": 2.821788397842833e-06, "loss": 0.0006, "step": 206460 }, { "epoch": 1.3583284540436702, "grad_norm": 0.031500200479808144, "learning_rate": 2.821271643512533e-06, "loss": 0.0007, "step": 206470 }, { "epoch": 1.3583942422189037, "grad_norm": 0.005320095657608922, "learning_rate": 2.820754917906885e-06, "loss": 0.0005, "step": 206480 }, { "epoch": 1.358460030394137, "grad_norm": 0.001422399883970268, "learning_rate": 2.820238221032701e-06, "loss": 0.0005, "step": 206490 }, { "epoch": 1.3585258185693703, "grad_norm": 0.031735869823396955, "learning_rate": 2.8197215528967937e-06, "loss": 0.0006, "step": 206500 }, { "epoch": 1.3585916067446038, "grad_norm": 0.004544371812463481, "learning_rate": 2.819204913505975e-06, "loss": 0.0006, "step": 206510 }, { "epoch": 1.358657394919837, "grad_norm": 0.00718255983520424, "learning_rate": 2.818688302867056e-06, "loss": 0.0003, "step": 206520 }, { "epoch": 1.3587231830950706, "grad_norm": 0.003306491944466788, "learning_rate": 2.8181717209868475e-06, "loss": 0.0005, "step": 206530 }, { "epoch": 1.3587889712703038, "grad_norm": 0.047226663278345855, "learning_rate": 2.8176551678721587e-06, "loss": 0.0004, "step": 206540 }, { "epoch": 1.3588547594455371, "grad_norm": 0.07481375071164606, "learning_rate": 2.8171386435298036e-06, "loss": 0.0011, "step": 206550 }, { "epoch": 1.3589205476207706, "grad_norm": 0.006364265966362512, "learning_rate": 2.8166221479665906e-06, "loss": 0.0008, "step": 206560 }, { "epoch": 1.3589863357960041, "grad_norm": 0.000663148078216379, "learning_rate": 2.8161056811893286e-06, "loss": 0.0002, "step": 206570 }, { "epoch": 1.3590521239712374, "grad_norm": 0.013570390030888899, "learning_rate": 2.8155892432048272e-06, "loss": 0.0004, "step": 206580 }, { "epoch": 1.3591179121464707, "grad_norm": 0.04252610048565881, "learning_rate": 2.815072834019895e-06, "loss": 0.0012, "step": 206590 }, { "epoch": 1.3591837003217042, "grad_norm": 0.05878189782053292, "learning_rate": 2.8145564536413408e-06, "loss": 0.0009, "step": 206600 }, { "epoch": 1.3592494884969375, "grad_norm": 0.03380874288308974, "learning_rate": 2.8140401020759713e-06, "loss": 0.0008, "step": 206610 }, { "epoch": 1.359315276672171, "grad_norm": 0.025884921777546983, "learning_rate": 2.813523779330596e-06, "loss": 0.0006, "step": 206620 }, { "epoch": 1.3593810648474043, "grad_norm": 0.03874263524413203, "learning_rate": 2.8130074854120205e-06, "loss": 0.0004, "step": 206630 }, { "epoch": 1.3594468530226378, "grad_norm": 0.07113963727360312, "learning_rate": 2.812491220327053e-06, "loss": 0.0006, "step": 206640 }, { "epoch": 1.359512641197871, "grad_norm": 0.0008770721544974572, "learning_rate": 2.8119749840824996e-06, "loss": 0.0002, "step": 206650 }, { "epoch": 1.3595784293731046, "grad_norm": 0.0424867678890143, "learning_rate": 2.8114587766851663e-06, "loss": 0.0005, "step": 206660 }, { "epoch": 1.3596442175483379, "grad_norm": 0.04077273896375743, "learning_rate": 2.810942598141857e-06, "loss": 0.0007, "step": 206670 }, { "epoch": 1.3597100057235711, "grad_norm": 0.01482531662026434, "learning_rate": 2.8104264484593807e-06, "loss": 0.0006, "step": 206680 }, { "epoch": 1.3597757938988047, "grad_norm": 0.00800391263543723, "learning_rate": 2.8099103276445407e-06, "loss": 0.0003, "step": 206690 }, { "epoch": 1.359841582074038, "grad_norm": 0.028195600955214267, "learning_rate": 2.8093942357041416e-06, "loss": 0.0005, "step": 206700 }, { "epoch": 1.3599073702492714, "grad_norm": 0.04335215181203578, "learning_rate": 2.8088781726449876e-06, "loss": 0.0017, "step": 206710 }, { "epoch": 1.3599731584245047, "grad_norm": 0.03342874145342955, "learning_rate": 2.8083621384738825e-06, "loss": 0.0008, "step": 206720 }, { "epoch": 1.3600389465997382, "grad_norm": 0.003891834439396007, "learning_rate": 2.8078461331976297e-06, "loss": 0.0003, "step": 206730 }, { "epoch": 1.3601047347749715, "grad_norm": 0.08799916204172278, "learning_rate": 2.8073301568230326e-06, "loss": 0.0012, "step": 206740 }, { "epoch": 1.360170522950205, "grad_norm": 0.07258930833923993, "learning_rate": 2.8068142093568932e-06, "loss": 0.0005, "step": 206750 }, { "epoch": 1.3602363111254383, "grad_norm": 0.06790925852458575, "learning_rate": 2.806298290806014e-06, "loss": 0.0004, "step": 206760 }, { "epoch": 1.3603020993006716, "grad_norm": 0.09235294127068272, "learning_rate": 2.8057824011771983e-06, "loss": 0.0009, "step": 206770 }, { "epoch": 1.360367887475905, "grad_norm": 0.027123050207598403, "learning_rate": 2.805266540477246e-06, "loss": 0.0005, "step": 206780 }, { "epoch": 1.3604336756511386, "grad_norm": 0.009487617663737846, "learning_rate": 2.804750708712959e-06, "loss": 0.0002, "step": 206790 }, { "epoch": 1.3604994638263719, "grad_norm": 0.024037841342882782, "learning_rate": 2.8042349058911363e-06, "loss": 0.0006, "step": 206800 }, { "epoch": 1.3605652520016052, "grad_norm": 0.0170695300100949, "learning_rate": 2.8037191320185812e-06, "loss": 0.0009, "step": 206810 }, { "epoch": 1.3606310401768387, "grad_norm": 0.0013527818154470623, "learning_rate": 2.803203387102093e-06, "loss": 0.0005, "step": 206820 }, { "epoch": 1.360696828352072, "grad_norm": 0.03094478331796433, "learning_rate": 2.8026876711484706e-06, "loss": 0.0002, "step": 206830 }, { "epoch": 1.3607626165273055, "grad_norm": 0.020964983908977275, "learning_rate": 2.8021719841645138e-06, "loss": 0.0009, "step": 206840 }, { "epoch": 1.3608284047025387, "grad_norm": 0.04434503200594596, "learning_rate": 2.8016563261570217e-06, "loss": 0.0007, "step": 206850 }, { "epoch": 1.360894192877772, "grad_norm": 0.02323785063592121, "learning_rate": 2.8011406971327914e-06, "loss": 0.0006, "step": 206860 }, { "epoch": 1.3609599810530055, "grad_norm": 0.014956889330267648, "learning_rate": 2.800625097098623e-06, "loss": 0.0009, "step": 206870 }, { "epoch": 1.361025769228239, "grad_norm": 0.03836071981660489, "learning_rate": 2.800109526061314e-06, "loss": 0.0009, "step": 206880 }, { "epoch": 1.3610915574034723, "grad_norm": 0.00029973621654166767, "learning_rate": 2.799593984027659e-06, "loss": 0.0007, "step": 206890 }, { "epoch": 1.3611573455787056, "grad_norm": 0.036848222298682336, "learning_rate": 2.799078471004458e-06, "loss": 0.0007, "step": 206900 }, { "epoch": 1.361223133753939, "grad_norm": 0.02097991885729938, "learning_rate": 2.798562986998503e-06, "loss": 0.0005, "step": 206910 }, { "epoch": 1.3612889219291724, "grad_norm": 0.026710092783170332, "learning_rate": 2.7980475320165965e-06, "loss": 0.0004, "step": 206920 }, { "epoch": 1.361354710104406, "grad_norm": 0.014861807224097263, "learning_rate": 2.7975321060655325e-06, "loss": 0.0017, "step": 206930 }, { "epoch": 1.3614204982796392, "grad_norm": 0.013655092481774983, "learning_rate": 2.7970167091521043e-06, "loss": 0.0005, "step": 206940 }, { "epoch": 1.3614862864548725, "grad_norm": 0.013470405895451716, "learning_rate": 2.796501341283109e-06, "loss": 0.0008, "step": 206950 }, { "epoch": 1.361552074630106, "grad_norm": 0.051213315679826184, "learning_rate": 2.7959860024653394e-06, "loss": 0.0013, "step": 206960 }, { "epoch": 1.3616178628053395, "grad_norm": 0.005326842226004832, "learning_rate": 2.7954706927055917e-06, "loss": 0.0004, "step": 206970 }, { "epoch": 1.3616836509805728, "grad_norm": 0.006496982241823685, "learning_rate": 2.7949554120106586e-06, "loss": 0.0002, "step": 206980 }, { "epoch": 1.361749439155806, "grad_norm": 0.027493199868271378, "learning_rate": 2.794440160387334e-06, "loss": 0.0008, "step": 206990 }, { "epoch": 1.3618152273310395, "grad_norm": 0.009019398948717481, "learning_rate": 2.7939249378424117e-06, "loss": 0.001, "step": 207000 }, { "epoch": 1.3618810155062728, "grad_norm": 0.04470907525339279, "learning_rate": 2.793409744382683e-06, "loss": 0.0005, "step": 207010 }, { "epoch": 1.3619468036815063, "grad_norm": 0.01586616859743899, "learning_rate": 2.792894580014941e-06, "loss": 0.0009, "step": 207020 }, { "epoch": 1.3620125918567396, "grad_norm": 0.013868343366071515, "learning_rate": 2.7923794447459787e-06, "loss": 0.0007, "step": 207030 }, { "epoch": 1.3620783800319731, "grad_norm": 0.09414612593328885, "learning_rate": 2.7918643385825837e-06, "loss": 0.0008, "step": 207040 }, { "epoch": 1.3621441682072064, "grad_norm": 0.03425552760389633, "learning_rate": 2.791349261531553e-06, "loss": 0.0008, "step": 207050 }, { "epoch": 1.36220995638244, "grad_norm": 0.09029867563273551, "learning_rate": 2.7908342135996747e-06, "loss": 0.0007, "step": 207060 }, { "epoch": 1.3622757445576732, "grad_norm": 0.00981904260318345, "learning_rate": 2.790319194793739e-06, "loss": 0.0011, "step": 207070 }, { "epoch": 1.3623415327329065, "grad_norm": 0.01678793103244188, "learning_rate": 2.7898042051205365e-06, "loss": 0.0004, "step": 207080 }, { "epoch": 1.36240732090814, "grad_norm": 0.10657850382279747, "learning_rate": 2.789289244586857e-06, "loss": 0.0025, "step": 207090 }, { "epoch": 1.3624731090833735, "grad_norm": 0.0036785488030059103, "learning_rate": 2.788774313199489e-06, "loss": 0.0004, "step": 207100 }, { "epoch": 1.3625388972586068, "grad_norm": 0.026948047096136348, "learning_rate": 2.788259410965223e-06, "loss": 0.0004, "step": 207110 }, { "epoch": 1.36260468543384, "grad_norm": 0.0095432844965953, "learning_rate": 2.7877445378908454e-06, "loss": 0.0002, "step": 207120 }, { "epoch": 1.3626704736090736, "grad_norm": 0.009374044190620401, "learning_rate": 2.7872296939831454e-06, "loss": 0.0008, "step": 207130 }, { "epoch": 1.3627362617843068, "grad_norm": 0.033750999642539005, "learning_rate": 2.7867148792489114e-06, "loss": 0.0009, "step": 207140 }, { "epoch": 1.3628020499595404, "grad_norm": 0.015306049111289684, "learning_rate": 2.7862000936949295e-06, "loss": 0.0005, "step": 207150 }, { "epoch": 1.3628678381347736, "grad_norm": 0.003060064051993852, "learning_rate": 2.785685337327988e-06, "loss": 0.0005, "step": 207160 }, { "epoch": 1.362933626310007, "grad_norm": 0.025218001540578672, "learning_rate": 2.785170610154871e-06, "loss": 0.001, "step": 207170 }, { "epoch": 1.3629994144852404, "grad_norm": 0.0168808093784002, "learning_rate": 2.784655912182368e-06, "loss": 0.0007, "step": 207180 }, { "epoch": 1.363065202660474, "grad_norm": 0.08821941218318143, "learning_rate": 2.7841412434172645e-06, "loss": 0.0007, "step": 207190 }, { "epoch": 1.3631309908357072, "grad_norm": 0.024982303971057374, "learning_rate": 2.783626603866344e-06, "loss": 0.0008, "step": 207200 }, { "epoch": 1.3631967790109405, "grad_norm": 0.0104934550185798, "learning_rate": 2.783111993536393e-06, "loss": 0.0007, "step": 207210 }, { "epoch": 1.363262567186174, "grad_norm": 0.06117591405917275, "learning_rate": 2.782597412434196e-06, "loss": 0.0009, "step": 207220 }, { "epoch": 1.3633283553614073, "grad_norm": 0.0013325082161455507, "learning_rate": 2.7820828605665364e-06, "loss": 0.0005, "step": 207230 }, { "epoch": 1.3633941435366408, "grad_norm": 0.05737235795632817, "learning_rate": 2.7815683379401994e-06, "loss": 0.0004, "step": 207240 }, { "epoch": 1.363459931711874, "grad_norm": 0.041857492466052394, "learning_rate": 2.781053844561967e-06, "loss": 0.0003, "step": 207250 }, { "epoch": 1.3635257198871074, "grad_norm": 0.047378529351783445, "learning_rate": 2.7805393804386237e-06, "loss": 0.0005, "step": 207260 }, { "epoch": 1.3635915080623409, "grad_norm": 0.03148067822409863, "learning_rate": 2.7800249455769525e-06, "loss": 0.0007, "step": 207270 }, { "epoch": 1.3636572962375744, "grad_norm": 0.014366414780546228, "learning_rate": 2.779510539983734e-06, "loss": 0.0004, "step": 207280 }, { "epoch": 1.3637230844128077, "grad_norm": 0.034632425672097275, "learning_rate": 2.778996163665752e-06, "loss": 0.0009, "step": 207290 }, { "epoch": 1.363788872588041, "grad_norm": 0.008159656852089262, "learning_rate": 2.7784818166297854e-06, "loss": 0.0007, "step": 207300 }, { "epoch": 1.3638546607632744, "grad_norm": 0.06847794056689904, "learning_rate": 2.777967498882619e-06, "loss": 0.0004, "step": 207310 }, { "epoch": 1.3639204489385077, "grad_norm": 0.030820999957390405, "learning_rate": 2.7774532104310326e-06, "loss": 0.002, "step": 207320 }, { "epoch": 1.3639862371137412, "grad_norm": 0.01878274497744875, "learning_rate": 2.7769389512818057e-06, "loss": 0.0004, "step": 207330 }, { "epoch": 1.3640520252889745, "grad_norm": 0.018657749194039862, "learning_rate": 2.7764247214417184e-06, "loss": 0.0006, "step": 207340 }, { "epoch": 1.364117813464208, "grad_norm": 0.003483614549797702, "learning_rate": 2.775910520917552e-06, "loss": 0.0009, "step": 207350 }, { "epoch": 1.3641836016394413, "grad_norm": 0.20388262056739817, "learning_rate": 2.775396349716083e-06, "loss": 0.0009, "step": 207360 }, { "epoch": 1.3642493898146748, "grad_norm": 0.06021279748378773, "learning_rate": 2.7748822078440927e-06, "loss": 0.0007, "step": 207370 }, { "epoch": 1.364315177989908, "grad_norm": 0.026824008525679174, "learning_rate": 2.7743680953083586e-06, "loss": 0.001, "step": 207380 }, { "epoch": 1.3643809661651414, "grad_norm": 0.0032128839541617016, "learning_rate": 2.7738540121156592e-06, "loss": 0.0008, "step": 207390 }, { "epoch": 1.3644467543403749, "grad_norm": 0.0119512155197479, "learning_rate": 2.7733399582727714e-06, "loss": 0.0009, "step": 207400 }, { "epoch": 1.3645125425156082, "grad_norm": 0.011835499716961588, "learning_rate": 2.772825933786474e-06, "loss": 0.0006, "step": 207410 }, { "epoch": 1.3645783306908417, "grad_norm": 0.011174113885310996, "learning_rate": 2.7723119386635428e-06, "loss": 0.0002, "step": 207420 }, { "epoch": 1.364644118866075, "grad_norm": 0.0406565380505089, "learning_rate": 2.7717979729107524e-06, "loss": 0.0005, "step": 207430 }, { "epoch": 1.3647099070413085, "grad_norm": 0.09898388696434307, "learning_rate": 2.7712840365348837e-06, "loss": 0.0006, "step": 207440 }, { "epoch": 1.3647756952165417, "grad_norm": 0.23621996332682912, "learning_rate": 2.7707701295427103e-06, "loss": 0.0007, "step": 207450 }, { "epoch": 1.3648414833917752, "grad_norm": 0.01536101660180128, "learning_rate": 2.770256251941007e-06, "loss": 0.0005, "step": 207460 }, { "epoch": 1.3649072715670085, "grad_norm": 0.024494181065950527, "learning_rate": 2.7697424037365495e-06, "loss": 0.0003, "step": 207470 }, { "epoch": 1.3649730597422418, "grad_norm": 0.010863935991159167, "learning_rate": 2.769228584936112e-06, "loss": 0.001, "step": 207480 }, { "epoch": 1.3650388479174753, "grad_norm": 0.07280841435769535, "learning_rate": 2.76871479554647e-06, "loss": 0.0008, "step": 207490 }, { "epoch": 1.3651046360927088, "grad_norm": 0.006179578771004947, "learning_rate": 2.7682010355743947e-06, "loss": 0.0006, "step": 207500 }, { "epoch": 1.365170424267942, "grad_norm": 0.13877144141524825, "learning_rate": 2.7676873050266627e-06, "loss": 0.001, "step": 207510 }, { "epoch": 1.3652362124431754, "grad_norm": 0.06363825436564655, "learning_rate": 2.767173603910045e-06, "loss": 0.0006, "step": 207520 }, { "epoch": 1.365302000618409, "grad_norm": 0.015041756453229662, "learning_rate": 2.766659932231315e-06, "loss": 0.0008, "step": 207530 }, { "epoch": 1.3653677887936422, "grad_norm": 0.015896332030110042, "learning_rate": 2.7661462899972447e-06, "loss": 0.0006, "step": 207540 }, { "epoch": 1.3654335769688757, "grad_norm": 0.03760104590262912, "learning_rate": 2.765632677214607e-06, "loss": 0.0005, "step": 207550 }, { "epoch": 1.365499365144109, "grad_norm": 0.001988138555854355, "learning_rate": 2.7651190938901695e-06, "loss": 0.001, "step": 207560 }, { "epoch": 1.3655651533193423, "grad_norm": 0.026652816419312192, "learning_rate": 2.76460554003071e-06, "loss": 0.0004, "step": 207570 }, { "epoch": 1.3656309414945758, "grad_norm": 0.017203717228968938, "learning_rate": 2.7640920156429953e-06, "loss": 0.0003, "step": 207580 }, { "epoch": 1.3656967296698093, "grad_norm": 0.056113293599675894, "learning_rate": 2.7635785207337958e-06, "loss": 0.0009, "step": 207590 }, { "epoch": 1.3657625178450425, "grad_norm": 0.03462077998195535, "learning_rate": 2.7630650553098826e-06, "loss": 0.0006, "step": 207600 }, { "epoch": 1.3658283060202758, "grad_norm": 0.036762184824984896, "learning_rate": 2.7625516193780244e-06, "loss": 0.0005, "step": 207610 }, { "epoch": 1.3658940941955093, "grad_norm": 0.0025250906591700577, "learning_rate": 2.7620382129449907e-06, "loss": 0.0004, "step": 207620 }, { "epoch": 1.3659598823707426, "grad_norm": 0.010982641725534011, "learning_rate": 2.7615248360175506e-06, "loss": 0.0009, "step": 207630 }, { "epoch": 1.3660256705459761, "grad_norm": 0.031588085636471475, "learning_rate": 2.7610114886024717e-06, "loss": 0.0011, "step": 207640 }, { "epoch": 1.3660914587212094, "grad_norm": 0.11915080177264298, "learning_rate": 2.7604981707065233e-06, "loss": 0.0008, "step": 207650 }, { "epoch": 1.366157246896443, "grad_norm": 0.01885503229106985, "learning_rate": 2.759984882336472e-06, "loss": 0.0005, "step": 207660 }, { "epoch": 1.3662230350716762, "grad_norm": 0.016425301344324558, "learning_rate": 2.7594716234990855e-06, "loss": 0.0004, "step": 207670 }, { "epoch": 1.3662888232469097, "grad_norm": 0.023920770276225096, "learning_rate": 2.75895839420113e-06, "loss": 0.0006, "step": 207680 }, { "epoch": 1.366354611422143, "grad_norm": 0.011687684915214398, "learning_rate": 2.7584451944493717e-06, "loss": 0.0005, "step": 207690 }, { "epoch": 1.3664203995973763, "grad_norm": 0.04838254291712699, "learning_rate": 2.757932024250579e-06, "loss": 0.0002, "step": 207700 }, { "epoch": 1.3664861877726098, "grad_norm": 0.03697982391407741, "learning_rate": 2.7574188836115163e-06, "loss": 0.0005, "step": 207710 }, { "epoch": 1.366551975947843, "grad_norm": 0.013944953518679361, "learning_rate": 2.756905772538948e-06, "loss": 0.0004, "step": 207720 }, { "epoch": 1.3666177641230766, "grad_norm": 0.07204915145380493, "learning_rate": 2.756392691039641e-06, "loss": 0.0011, "step": 207730 }, { "epoch": 1.3666835522983098, "grad_norm": 0.0008055816429124182, "learning_rate": 2.755879639120358e-06, "loss": 0.0008, "step": 207740 }, { "epoch": 1.3667493404735434, "grad_norm": 0.009610870765323786, "learning_rate": 2.7553666167878647e-06, "loss": 0.0003, "step": 207750 }, { "epoch": 1.3668151286487766, "grad_norm": 0.011714402165914729, "learning_rate": 2.7548536240489233e-06, "loss": 0.0002, "step": 207760 }, { "epoch": 1.3668809168240101, "grad_norm": 0.03663365296067752, "learning_rate": 2.7543406609102976e-06, "loss": 0.0017, "step": 207770 }, { "epoch": 1.3669467049992434, "grad_norm": 0.020056704826452682, "learning_rate": 2.7538277273787505e-06, "loss": 0.0002, "step": 207780 }, { "epoch": 1.3670124931744767, "grad_norm": 0.0361249250104796, "learning_rate": 2.7533148234610456e-06, "loss": 0.0009, "step": 207790 }, { "epoch": 1.3670782813497102, "grad_norm": 0.01940116508419645, "learning_rate": 2.7528019491639444e-06, "loss": 0.0003, "step": 207800 }, { "epoch": 1.3671440695249437, "grad_norm": 0.024028396843857176, "learning_rate": 2.752289104494208e-06, "loss": 0.0009, "step": 207810 }, { "epoch": 1.367209857700177, "grad_norm": 0.03345945995256593, "learning_rate": 2.7517762894585975e-06, "loss": 0.0009, "step": 207820 }, { "epoch": 1.3672756458754103, "grad_norm": 0.03588591687821756, "learning_rate": 2.7512635040638764e-06, "loss": 0.0004, "step": 207830 }, { "epoch": 1.3673414340506438, "grad_norm": 0.06675377220375094, "learning_rate": 2.750750748316804e-06, "loss": 0.0008, "step": 207840 }, { "epoch": 1.367407222225877, "grad_norm": 0.015771115211095126, "learning_rate": 2.7502380222241398e-06, "loss": 0.0009, "step": 207850 }, { "epoch": 1.3674730104011106, "grad_norm": 0.02506694802346251, "learning_rate": 2.749725325792645e-06, "loss": 0.0004, "step": 207860 }, { "epoch": 1.3675387985763439, "grad_norm": 0.03729364903096472, "learning_rate": 2.749212659029078e-06, "loss": 0.0004, "step": 207870 }, { "epoch": 1.3676045867515771, "grad_norm": 0.05141743987569719, "learning_rate": 2.748700021940198e-06, "loss": 0.0004, "step": 207880 }, { "epoch": 1.3676703749268107, "grad_norm": 0.053314424918085045, "learning_rate": 2.7481874145327648e-06, "loss": 0.0004, "step": 207890 }, { "epoch": 1.3677361631020442, "grad_norm": 0.10031808266805319, "learning_rate": 2.7476748368135347e-06, "loss": 0.0014, "step": 207900 }, { "epoch": 1.3678019512772774, "grad_norm": 0.04335981868362772, "learning_rate": 2.7471622887892667e-06, "loss": 0.0003, "step": 207910 }, { "epoch": 1.3678677394525107, "grad_norm": 0.005250088070427127, "learning_rate": 2.746649770466719e-06, "loss": 0.0006, "step": 207920 }, { "epoch": 1.3679335276277442, "grad_norm": 0.0007910158166230391, "learning_rate": 2.746137281852648e-06, "loss": 0.0002, "step": 207930 }, { "epoch": 1.3679993158029775, "grad_norm": 0.01745688206630069, "learning_rate": 2.7456248229538096e-06, "loss": 0.0006, "step": 207940 }, { "epoch": 1.368065103978211, "grad_norm": 0.00767238044636308, "learning_rate": 2.7451123937769585e-06, "loss": 0.0004, "step": 207950 }, { "epoch": 1.3681308921534443, "grad_norm": 0.013038745087400392, "learning_rate": 2.744599994328856e-06, "loss": 0.0006, "step": 207960 }, { "epoch": 1.3681966803286778, "grad_norm": 0.04015021053300705, "learning_rate": 2.7440876246162547e-06, "loss": 0.0007, "step": 207970 }, { "epoch": 1.368262468503911, "grad_norm": 0.03852032396940773, "learning_rate": 2.743575284645909e-06, "loss": 0.0006, "step": 207980 }, { "epoch": 1.3683282566791446, "grad_norm": 0.03359422570662247, "learning_rate": 2.743062974424575e-06, "loss": 0.0005, "step": 207990 }, { "epoch": 1.3683940448543779, "grad_norm": 0.017341968360283788, "learning_rate": 2.7425506939590064e-06, "loss": 0.0012, "step": 208000 }, { "epoch": 1.3684598330296112, "grad_norm": 0.033282932949535105, "learning_rate": 2.7420384432559572e-06, "loss": 0.0005, "step": 208010 }, { "epoch": 1.3685256212048447, "grad_norm": 0.014255991593139483, "learning_rate": 2.7415262223221804e-06, "loss": 0.0006, "step": 208020 }, { "epoch": 1.368591409380078, "grad_norm": 0.016839168044766185, "learning_rate": 2.7410140311644306e-06, "loss": 0.0005, "step": 208030 }, { "epoch": 1.3686571975553115, "grad_norm": 0.011291282625217164, "learning_rate": 2.74050186978946e-06, "loss": 0.0008, "step": 208040 }, { "epoch": 1.3687229857305447, "grad_norm": 0.0634641304159214, "learning_rate": 2.7399897382040204e-06, "loss": 0.0003, "step": 208050 }, { "epoch": 1.3687887739057782, "grad_norm": 0.01871179430813207, "learning_rate": 2.739477636414864e-06, "loss": 0.0007, "step": 208060 }, { "epoch": 1.3688545620810115, "grad_norm": 0.03475980720371583, "learning_rate": 2.7389655644287426e-06, "loss": 0.0003, "step": 208070 }, { "epoch": 1.368920350256245, "grad_norm": 0.06821052717692395, "learning_rate": 2.738453522252406e-06, "loss": 0.0005, "step": 208080 }, { "epoch": 1.3689861384314783, "grad_norm": 0.01494043997675609, "learning_rate": 2.737941509892608e-06, "loss": 0.001, "step": 208090 }, { "epoch": 1.3690519266067116, "grad_norm": 0.005784143140153053, "learning_rate": 2.737429527356098e-06, "loss": 0.0003, "step": 208100 }, { "epoch": 1.369117714781945, "grad_norm": 0.010224321896323722, "learning_rate": 2.7369175746496256e-06, "loss": 0.0007, "step": 208110 }, { "epoch": 1.3691835029571786, "grad_norm": 0.08727740505820769, "learning_rate": 2.73640565177994e-06, "loss": 0.0004, "step": 208120 }, { "epoch": 1.369249291132412, "grad_norm": 0.02079526473775625, "learning_rate": 2.7358937587537915e-06, "loss": 0.0008, "step": 208130 }, { "epoch": 1.3693150793076452, "grad_norm": 0.07724838481024482, "learning_rate": 2.735381895577928e-06, "loss": 0.0016, "step": 208140 }, { "epoch": 1.3693808674828787, "grad_norm": 0.021970401309095243, "learning_rate": 2.7348700622590984e-06, "loss": 0.0006, "step": 208150 }, { "epoch": 1.369446655658112, "grad_norm": 0.02006400102350346, "learning_rate": 2.7343582588040507e-06, "loss": 0.0006, "step": 208160 }, { "epoch": 1.3695124438333455, "grad_norm": 0.03404192980112561, "learning_rate": 2.7338464852195322e-06, "loss": 0.0006, "step": 208170 }, { "epoch": 1.3695782320085788, "grad_norm": 0.04977148276299128, "learning_rate": 2.7333347415122913e-06, "loss": 0.0007, "step": 208180 }, { "epoch": 1.369644020183812, "grad_norm": 0.007699273737221353, "learning_rate": 2.7328230276890745e-06, "loss": 0.0006, "step": 208190 }, { "epoch": 1.3697098083590455, "grad_norm": 0.008059318727118531, "learning_rate": 2.7323113437566273e-06, "loss": 0.0005, "step": 208200 }, { "epoch": 1.369775596534279, "grad_norm": 0.001389120472935371, "learning_rate": 2.731799689721695e-06, "loss": 0.0002, "step": 208210 }, { "epoch": 1.3698413847095123, "grad_norm": 0.0667994544008824, "learning_rate": 2.7312880655910264e-06, "loss": 0.0005, "step": 208220 }, { "epoch": 1.3699071728847456, "grad_norm": 0.008245985239843745, "learning_rate": 2.730776471371365e-06, "loss": 0.0009, "step": 208230 }, { "epoch": 1.3699729610599791, "grad_norm": 0.017707500724975178, "learning_rate": 2.7302649070694565e-06, "loss": 0.0004, "step": 208240 }, { "epoch": 1.3700387492352124, "grad_norm": 0.1176529407022991, "learning_rate": 2.7297533726920453e-06, "loss": 0.0007, "step": 208250 }, { "epoch": 1.370104537410446, "grad_norm": 0.08449223065003685, "learning_rate": 2.7292418682458745e-06, "loss": 0.0005, "step": 208260 }, { "epoch": 1.3701703255856792, "grad_norm": 0.00040200622953660283, "learning_rate": 2.7287303937376885e-06, "loss": 0.0004, "step": 208270 }, { "epoch": 1.3702361137609125, "grad_norm": 0.022626922786231872, "learning_rate": 2.7282189491742313e-06, "loss": 0.0011, "step": 208280 }, { "epoch": 1.370301901936146, "grad_norm": 0.05433516612800693, "learning_rate": 2.727707534562245e-06, "loss": 0.0004, "step": 208290 }, { "epoch": 1.3703676901113795, "grad_norm": 0.007399559427649323, "learning_rate": 2.727196149908472e-06, "loss": 0.0004, "step": 208300 }, { "epoch": 1.3704334782866128, "grad_norm": 0.0899204602967519, "learning_rate": 2.7266847952196552e-06, "loss": 0.0004, "step": 208310 }, { "epoch": 1.370499266461846, "grad_norm": 0.1058360169435305, "learning_rate": 2.7261734705025357e-06, "loss": 0.0011, "step": 208320 }, { "epoch": 1.3705650546370796, "grad_norm": 0.016641217197140356, "learning_rate": 2.7256621757638557e-06, "loss": 0.0005, "step": 208330 }, { "epoch": 1.3706308428123128, "grad_norm": 0.028308207589517933, "learning_rate": 2.725150911010353e-06, "loss": 0.0003, "step": 208340 }, { "epoch": 1.3706966309875463, "grad_norm": 0.0524485360764194, "learning_rate": 2.7246396762487732e-06, "loss": 0.0003, "step": 208350 }, { "epoch": 1.3707624191627796, "grad_norm": 0.020920776324114803, "learning_rate": 2.7241284714858542e-06, "loss": 0.0011, "step": 208360 }, { "epoch": 1.3708282073380131, "grad_norm": 0.051725150562625125, "learning_rate": 2.723617296728336e-06, "loss": 0.0006, "step": 208370 }, { "epoch": 1.3708939955132464, "grad_norm": 0.03192736345157393, "learning_rate": 2.723106151982957e-06, "loss": 0.0006, "step": 208380 }, { "epoch": 1.37095978368848, "grad_norm": 0.03947185264214376, "learning_rate": 2.722595037256457e-06, "loss": 0.001, "step": 208390 }, { "epoch": 1.3710255718637132, "grad_norm": 0.05801453490428907, "learning_rate": 2.7220839525555752e-06, "loss": 0.001, "step": 208400 }, { "epoch": 1.3710913600389465, "grad_norm": 0.13723689305860176, "learning_rate": 2.7215728978870486e-06, "loss": 0.0004, "step": 208410 }, { "epoch": 1.37115714821418, "grad_norm": 0.010960725972831487, "learning_rate": 2.7210618732576155e-06, "loss": 0.0006, "step": 208420 }, { "epoch": 1.3712229363894135, "grad_norm": 0.03218002880111029, "learning_rate": 2.7205508786740143e-06, "loss": 0.0006, "step": 208430 }, { "epoch": 1.3712887245646468, "grad_norm": 0.021702623778293965, "learning_rate": 2.72003991414298e-06, "loss": 0.0005, "step": 208440 }, { "epoch": 1.37135451273988, "grad_norm": 0.04702997303965005, "learning_rate": 2.719528979671251e-06, "loss": 0.001, "step": 208450 }, { "epoch": 1.3714203009151136, "grad_norm": 0.026043105991875065, "learning_rate": 2.7190180752655628e-06, "loss": 0.0004, "step": 208460 }, { "epoch": 1.3714860890903469, "grad_norm": 0.18788239662678882, "learning_rate": 2.7185072009326487e-06, "loss": 0.001, "step": 208470 }, { "epoch": 1.3715518772655804, "grad_norm": 0.02916813217724353, "learning_rate": 2.717996356679249e-06, "loss": 0.0005, "step": 208480 }, { "epoch": 1.3716176654408136, "grad_norm": 0.0243411931944859, "learning_rate": 2.717485542512097e-06, "loss": 0.0005, "step": 208490 }, { "epoch": 1.371683453616047, "grad_norm": 0.10539119138080423, "learning_rate": 2.7169747584379263e-06, "loss": 0.0013, "step": 208500 }, { "epoch": 1.3717492417912804, "grad_norm": 0.006002964261047472, "learning_rate": 2.7164640044634715e-06, "loss": 0.0004, "step": 208510 }, { "epoch": 1.371815029966514, "grad_norm": 0.1016311895939292, "learning_rate": 2.715953280595467e-06, "loss": 0.001, "step": 208520 }, { "epoch": 1.3718808181417472, "grad_norm": 0.024314567050846187, "learning_rate": 2.715442586840646e-06, "loss": 0.0006, "step": 208530 }, { "epoch": 1.3719466063169805, "grad_norm": 0.01618645918842364, "learning_rate": 2.7149319232057412e-06, "loss": 0.0011, "step": 208540 }, { "epoch": 1.372012394492214, "grad_norm": 0.04752216273732465, "learning_rate": 2.7144212896974853e-06, "loss": 0.0007, "step": 208550 }, { "epoch": 1.3720781826674473, "grad_norm": 0.05243906422258413, "learning_rate": 2.7139106863226107e-06, "loss": 0.0006, "step": 208560 }, { "epoch": 1.3721439708426808, "grad_norm": 0.06537378790930681, "learning_rate": 2.7134001130878496e-06, "loss": 0.0006, "step": 208570 }, { "epoch": 1.372209759017914, "grad_norm": 0.1074827687207458, "learning_rate": 2.7128895699999335e-06, "loss": 0.0014, "step": 208580 }, { "epoch": 1.3722755471931474, "grad_norm": 0.025317696137422296, "learning_rate": 2.7123790570655928e-06, "loss": 0.0004, "step": 208590 }, { "epoch": 1.3723413353683809, "grad_norm": 0.006209604619623337, "learning_rate": 2.7118685742915563e-06, "loss": 0.0009, "step": 208600 }, { "epoch": 1.3724071235436144, "grad_norm": 0.028338883065681112, "learning_rate": 2.7113581216845585e-06, "loss": 0.0004, "step": 208610 }, { "epoch": 1.3724729117188477, "grad_norm": 0.03201135060696008, "learning_rate": 2.7108476992513277e-06, "loss": 0.0004, "step": 208620 }, { "epoch": 1.372538699894081, "grad_norm": 0.05456548035089837, "learning_rate": 2.7103373069985923e-06, "loss": 0.0005, "step": 208630 }, { "epoch": 1.3726044880693145, "grad_norm": 0.30469746652411994, "learning_rate": 2.709826944933083e-06, "loss": 0.0007, "step": 208640 }, { "epoch": 1.3726702762445477, "grad_norm": 0.052148797658536104, "learning_rate": 2.709316613061526e-06, "loss": 0.0022, "step": 208650 }, { "epoch": 1.3727360644197812, "grad_norm": 0.029735285718450747, "learning_rate": 2.708806311390652e-06, "loss": 0.0006, "step": 208660 }, { "epoch": 1.3728018525950145, "grad_norm": 0.03353999092069258, "learning_rate": 2.7082960399271886e-06, "loss": 0.0005, "step": 208670 }, { "epoch": 1.372867640770248, "grad_norm": 0.03725874546175301, "learning_rate": 2.7077857986778615e-06, "loss": 0.0007, "step": 208680 }, { "epoch": 1.3729334289454813, "grad_norm": 0.009825923716040146, "learning_rate": 2.7072755876493994e-06, "loss": 0.0009, "step": 208690 }, { "epoch": 1.3729992171207148, "grad_norm": 0.01598986372773637, "learning_rate": 2.706765406848528e-06, "loss": 0.0006, "step": 208700 }, { "epoch": 1.373065005295948, "grad_norm": 0.010450855246265715, "learning_rate": 2.7062552562819753e-06, "loss": 0.0006, "step": 208710 }, { "epoch": 1.3731307934711814, "grad_norm": 0.025765899053166834, "learning_rate": 2.7057451359564655e-06, "loss": 0.0006, "step": 208720 }, { "epoch": 1.373196581646415, "grad_norm": 0.008699728150596523, "learning_rate": 2.7052350458787225e-06, "loss": 0.0004, "step": 208730 }, { "epoch": 1.3732623698216482, "grad_norm": 0.04772616237494737, "learning_rate": 2.704724986055476e-06, "loss": 0.0005, "step": 208740 }, { "epoch": 1.3733281579968817, "grad_norm": 0.034499852276463676, "learning_rate": 2.704214956493448e-06, "loss": 0.0006, "step": 208750 }, { "epoch": 1.373393946172115, "grad_norm": 0.01984721858064745, "learning_rate": 2.703704957199363e-06, "loss": 0.0003, "step": 208760 }, { "epoch": 1.3734597343473485, "grad_norm": 0.00029355756186977934, "learning_rate": 2.703194988179947e-06, "loss": 0.0003, "step": 208770 }, { "epoch": 1.3735255225225818, "grad_norm": 0.09226937512555151, "learning_rate": 2.702685049441919e-06, "loss": 0.001, "step": 208780 }, { "epoch": 1.3735913106978153, "grad_norm": 0.0484734995906404, "learning_rate": 2.702175140992005e-06, "loss": 0.0008, "step": 208790 }, { "epoch": 1.3736570988730485, "grad_norm": 0.4679262515200082, "learning_rate": 2.7016652628369265e-06, "loss": 0.0089, "step": 208800 }, { "epoch": 1.3737228870482818, "grad_norm": 0.00020026973737365, "learning_rate": 2.7011554149834073e-06, "loss": 0.0003, "step": 208810 }, { "epoch": 1.3737886752235153, "grad_norm": 0.026264207248028373, "learning_rate": 2.7006455974381686e-06, "loss": 0.0003, "step": 208820 }, { "epoch": 1.3738544633987488, "grad_norm": 0.11076077426056391, "learning_rate": 2.7001358102079295e-06, "loss": 0.0014, "step": 208830 }, { "epoch": 1.3739202515739821, "grad_norm": 0.026937656407553152, "learning_rate": 2.6996260532994155e-06, "loss": 0.0007, "step": 208840 }, { "epoch": 1.3739860397492154, "grad_norm": 0.00037448675672177464, "learning_rate": 2.6991163267193455e-06, "loss": 0.0008, "step": 208850 }, { "epoch": 1.374051827924449, "grad_norm": 0.06106790595070024, "learning_rate": 2.6986066304744392e-06, "loss": 0.0005, "step": 208860 }, { "epoch": 1.3741176160996822, "grad_norm": 0.004385396545930945, "learning_rate": 2.6980969645714165e-06, "loss": 0.0003, "step": 208870 }, { "epoch": 1.3741834042749157, "grad_norm": 0.012637817078102404, "learning_rate": 2.6975873290169977e-06, "loss": 0.0005, "step": 208880 }, { "epoch": 1.374249192450149, "grad_norm": 0.017476269331542595, "learning_rate": 2.697077723817902e-06, "loss": 0.0017, "step": 208890 }, { "epoch": 1.3743149806253823, "grad_norm": 0.07193121387184703, "learning_rate": 2.696568148980847e-06, "loss": 0.0009, "step": 208900 }, { "epoch": 1.3743807688006158, "grad_norm": 0.03298593437866854, "learning_rate": 2.6960586045125516e-06, "loss": 0.0005, "step": 208910 }, { "epoch": 1.3744465569758493, "grad_norm": 0.008724826885959679, "learning_rate": 2.695549090419734e-06, "loss": 0.0002, "step": 208920 }, { "epoch": 1.3745123451510826, "grad_norm": 0.03805270076669246, "learning_rate": 2.6950396067091113e-06, "loss": 0.0008, "step": 208930 }, { "epoch": 1.3745781333263158, "grad_norm": 0.0661284571060711, "learning_rate": 2.6945301533874e-06, "loss": 0.0005, "step": 208940 }, { "epoch": 1.3746439215015493, "grad_norm": 0.001246326607520518, "learning_rate": 2.6940207304613187e-06, "loss": 0.0005, "step": 208950 }, { "epoch": 1.3747097096767826, "grad_norm": 0.0003865414377461251, "learning_rate": 2.69351133793758e-06, "loss": 0.0004, "step": 208960 }, { "epoch": 1.3747754978520161, "grad_norm": 0.02801349316573894, "learning_rate": 2.6930019758229035e-06, "loss": 0.0004, "step": 208970 }, { "epoch": 1.3748412860272494, "grad_norm": 0.026783696145914637, "learning_rate": 2.692492644124004e-06, "loss": 0.0005, "step": 208980 }, { "epoch": 1.374907074202483, "grad_norm": 0.0012257006042019764, "learning_rate": 2.691983342847597e-06, "loss": 0.0006, "step": 208990 }, { "epoch": 1.3749728623777162, "grad_norm": 0.03139276076029133, "learning_rate": 2.691474072000395e-06, "loss": 0.0008, "step": 209000 }, { "epoch": 1.3750386505529497, "grad_norm": 0.03185363733252541, "learning_rate": 2.6909648315891145e-06, "loss": 0.0006, "step": 209010 }, { "epoch": 1.375104438728183, "grad_norm": 0.021102390607159405, "learning_rate": 2.690455621620468e-06, "loss": 0.0005, "step": 209020 }, { "epoch": 1.3751702269034163, "grad_norm": 0.0019862432795671185, "learning_rate": 2.6899464421011696e-06, "loss": 0.0008, "step": 209030 }, { "epoch": 1.3752360150786498, "grad_norm": 0.012466351926812592, "learning_rate": 2.689437293037932e-06, "loss": 0.0004, "step": 209040 }, { "epoch": 1.375301803253883, "grad_norm": 0.0020705056126179974, "learning_rate": 2.6889281744374685e-06, "loss": 0.0005, "step": 209050 }, { "epoch": 1.3753675914291166, "grad_norm": 0.03174104623813409, "learning_rate": 2.6884190863064908e-06, "loss": 0.0005, "step": 209060 }, { "epoch": 1.3754333796043499, "grad_norm": 0.08999766005704649, "learning_rate": 2.687910028651711e-06, "loss": 0.0009, "step": 209070 }, { "epoch": 1.3754991677795834, "grad_norm": 0.0013092140359498078, "learning_rate": 2.6874010014798412e-06, "loss": 0.001, "step": 209080 }, { "epoch": 1.3755649559548166, "grad_norm": 0.060028593525839884, "learning_rate": 2.6868920047975894e-06, "loss": 0.001, "step": 209090 }, { "epoch": 1.3756307441300502, "grad_norm": 0.036547738509930434, "learning_rate": 2.6863830386116708e-06, "loss": 0.0014, "step": 209100 }, { "epoch": 1.3756965323052834, "grad_norm": 0.02088845831608262, "learning_rate": 2.685874102928794e-06, "loss": 0.0007, "step": 209110 }, { "epoch": 1.3757623204805167, "grad_norm": 0.037109621388676434, "learning_rate": 2.685365197755668e-06, "loss": 0.0009, "step": 209120 }, { "epoch": 1.3758281086557502, "grad_norm": 0.024387467782378883, "learning_rate": 2.6848563230990033e-06, "loss": 0.0006, "step": 209130 }, { "epoch": 1.3758938968309837, "grad_norm": 0.0070538383830337295, "learning_rate": 2.6843474789655082e-06, "loss": 0.0005, "step": 209140 }, { "epoch": 1.375959685006217, "grad_norm": 0.0694040078373277, "learning_rate": 2.683838665361892e-06, "loss": 0.0005, "step": 209150 }, { "epoch": 1.3760254731814503, "grad_norm": 0.006330577923504039, "learning_rate": 2.6833298822948623e-06, "loss": 0.0003, "step": 209160 }, { "epoch": 1.3760912613566838, "grad_norm": 0.05782378928180845, "learning_rate": 2.682821129771127e-06, "loss": 0.0005, "step": 209170 }, { "epoch": 1.376157049531917, "grad_norm": 0.0006126069276843703, "learning_rate": 2.682312407797395e-06, "loss": 0.0004, "step": 209180 }, { "epoch": 1.3762228377071506, "grad_norm": 0.027834240249674073, "learning_rate": 2.681803716380371e-06, "loss": 0.0006, "step": 209190 }, { "epoch": 1.3762886258823839, "grad_norm": 0.034405498791461514, "learning_rate": 2.6812950555267635e-06, "loss": 0.0004, "step": 209200 }, { "epoch": 1.3763544140576172, "grad_norm": 0.0013019499463028568, "learning_rate": 2.680786425243278e-06, "loss": 0.0003, "step": 209210 }, { "epoch": 1.3764202022328507, "grad_norm": 0.12026805107275897, "learning_rate": 2.6802778255366186e-06, "loss": 0.0007, "step": 209220 }, { "epoch": 1.3764859904080842, "grad_norm": 0.019984598260778298, "learning_rate": 2.679769256413494e-06, "loss": 0.0006, "step": 209230 }, { "epoch": 1.3765517785833175, "grad_norm": 0.026085405261049775, "learning_rate": 2.6792607178806085e-06, "loss": 0.0005, "step": 209240 }, { "epoch": 1.3766175667585507, "grad_norm": 0.006069428979973729, "learning_rate": 2.678752209944666e-06, "loss": 0.0008, "step": 209250 }, { "epoch": 1.3766833549337842, "grad_norm": 0.00027515797970295493, "learning_rate": 2.6782437326123712e-06, "loss": 0.0003, "step": 209260 }, { "epoch": 1.3767491431090175, "grad_norm": 0.048497830592721385, "learning_rate": 2.677735285890427e-06, "loss": 0.001, "step": 209270 }, { "epoch": 1.376814931284251, "grad_norm": 0.11600697493383472, "learning_rate": 2.6772268697855374e-06, "loss": 0.0007, "step": 209280 }, { "epoch": 1.3768807194594843, "grad_norm": 0.036682473557504584, "learning_rate": 2.6767184843044057e-06, "loss": 0.0004, "step": 209290 }, { "epoch": 1.3769465076347176, "grad_norm": 0.05766600255756039, "learning_rate": 2.6762101294537345e-06, "loss": 0.0004, "step": 209300 }, { "epoch": 1.377012295809951, "grad_norm": 0.015050415990934297, "learning_rate": 2.675701805240225e-06, "loss": 0.0008, "step": 209310 }, { "epoch": 1.3770780839851846, "grad_norm": 0.007184583660464174, "learning_rate": 2.675193511670581e-06, "loss": 0.0003, "step": 209320 }, { "epoch": 1.377143872160418, "grad_norm": 0.0323292704492405, "learning_rate": 2.674685248751502e-06, "loss": 0.0003, "step": 209330 }, { "epoch": 1.3772096603356512, "grad_norm": 0.00602507657838953, "learning_rate": 2.6741770164896894e-06, "loss": 0.0003, "step": 209340 }, { "epoch": 1.3772754485108847, "grad_norm": 0.03271185740903271, "learning_rate": 2.6736688148918428e-06, "loss": 0.0005, "step": 209350 }, { "epoch": 1.377341236686118, "grad_norm": 0.007572180828172562, "learning_rate": 2.6731606439646652e-06, "loss": 0.0004, "step": 209360 }, { "epoch": 1.3774070248613515, "grad_norm": 0.2486876007474868, "learning_rate": 2.6726525037148556e-06, "loss": 0.0018, "step": 209370 }, { "epoch": 1.3774728130365848, "grad_norm": 0.026056005681242618, "learning_rate": 2.672144394149112e-06, "loss": 0.0005, "step": 209380 }, { "epoch": 1.3775386012118183, "grad_norm": 0.024117544441108262, "learning_rate": 2.6716363152741343e-06, "loss": 0.0006, "step": 209390 }, { "epoch": 1.3776043893870515, "grad_norm": 0.004275764400586765, "learning_rate": 2.671128267096621e-06, "loss": 0.0006, "step": 209400 }, { "epoch": 1.377670177562285, "grad_norm": 0.12316906780490501, "learning_rate": 2.6706202496232697e-06, "loss": 0.0019, "step": 209410 }, { "epoch": 1.3777359657375183, "grad_norm": 0.012188325628062682, "learning_rate": 2.670112262860779e-06, "loss": 0.0006, "step": 209420 }, { "epoch": 1.3778017539127516, "grad_norm": 0.14008311090287184, "learning_rate": 2.669604306815846e-06, "loss": 0.001, "step": 209430 }, { "epoch": 1.3778675420879851, "grad_norm": 0.0030733872384681275, "learning_rate": 2.6690963814951677e-06, "loss": 0.0002, "step": 209440 }, { "epoch": 1.3779333302632186, "grad_norm": 0.00814616900024872, "learning_rate": 2.6685884869054402e-06, "loss": 0.0003, "step": 209450 }, { "epoch": 1.377999118438452, "grad_norm": 0.012135346697888738, "learning_rate": 2.6680806230533597e-06, "loss": 0.0006, "step": 209460 }, { "epoch": 1.3780649066136852, "grad_norm": 0.0006677227391562923, "learning_rate": 2.667572789945623e-06, "loss": 0.0007, "step": 209470 }, { "epoch": 1.3781306947889187, "grad_norm": 0.07828294716925716, "learning_rate": 2.6670649875889217e-06, "loss": 0.0008, "step": 209480 }, { "epoch": 1.378196482964152, "grad_norm": 0.0014481581872788714, "learning_rate": 2.666557215989956e-06, "loss": 0.0006, "step": 209490 }, { "epoch": 1.3782622711393855, "grad_norm": 0.026106200059855214, "learning_rate": 2.6660494751554182e-06, "loss": 0.0008, "step": 209500 }, { "epoch": 1.3783280593146188, "grad_norm": 0.038914377794714086, "learning_rate": 2.6655417650920025e-06, "loss": 0.0003, "step": 209510 }, { "epoch": 1.378393847489852, "grad_norm": 0.007600391728772748, "learning_rate": 2.6650340858064027e-06, "loss": 0.0007, "step": 209520 }, { "epoch": 1.3784596356650856, "grad_norm": 0.00556971158353865, "learning_rate": 2.6645264373053115e-06, "loss": 0.0007, "step": 209530 }, { "epoch": 1.378525423840319, "grad_norm": 0.043529347468921537, "learning_rate": 2.664018819595422e-06, "loss": 0.0013, "step": 209540 }, { "epoch": 1.3785912120155523, "grad_norm": 0.038568460442334467, "learning_rate": 2.6635112326834265e-06, "loss": 0.001, "step": 209550 }, { "epoch": 1.3786570001907856, "grad_norm": 0.05767418088566427, "learning_rate": 2.663003676576018e-06, "loss": 0.0003, "step": 209560 }, { "epoch": 1.3787227883660191, "grad_norm": 0.018866040636479824, "learning_rate": 2.6624961512798885e-06, "loss": 0.0005, "step": 209570 }, { "epoch": 1.3787885765412524, "grad_norm": 0.030724240333482058, "learning_rate": 2.6619886568017273e-06, "loss": 0.0005, "step": 209580 }, { "epoch": 1.378854364716486, "grad_norm": 0.019779638918908035, "learning_rate": 2.6614811931482267e-06, "loss": 0.0007, "step": 209590 }, { "epoch": 1.3789201528917192, "grad_norm": 0.009424298674460336, "learning_rate": 2.6609737603260773e-06, "loss": 0.0005, "step": 209600 }, { "epoch": 1.3789859410669525, "grad_norm": 0.019290634108534075, "learning_rate": 2.6604663583419666e-06, "loss": 0.0005, "step": 209610 }, { "epoch": 1.379051729242186, "grad_norm": 0.028689751816504072, "learning_rate": 2.6599589872025878e-06, "loss": 0.0008, "step": 209620 }, { "epoch": 1.3791175174174195, "grad_norm": 0.008044759192540073, "learning_rate": 2.6594516469146294e-06, "loss": 0.0025, "step": 209630 }, { "epoch": 1.3791833055926528, "grad_norm": 0.017400867027380217, "learning_rate": 2.658944337484779e-06, "loss": 0.0004, "step": 209640 }, { "epoch": 1.379249093767886, "grad_norm": 0.0002829714891177993, "learning_rate": 2.658437058919726e-06, "loss": 0.0006, "step": 209650 }, { "epoch": 1.3793148819431196, "grad_norm": 0.04909722548633181, "learning_rate": 2.6579298112261585e-06, "loss": 0.0004, "step": 209660 }, { "epoch": 1.3793806701183529, "grad_norm": 0.019618978859699918, "learning_rate": 2.6574225944107633e-06, "loss": 0.0006, "step": 209670 }, { "epoch": 1.3794464582935864, "grad_norm": 0.02291890994287284, "learning_rate": 2.6569154084802278e-06, "loss": 0.0006, "step": 209680 }, { "epoch": 1.3795122464688196, "grad_norm": 0.09315620200963559, "learning_rate": 2.6564082534412392e-06, "loss": 0.0007, "step": 209690 }, { "epoch": 1.3795780346440532, "grad_norm": 0.05151225794407017, "learning_rate": 2.6559011293004845e-06, "loss": 0.0004, "step": 209700 }, { "epoch": 1.3796438228192864, "grad_norm": 0.01369812191707346, "learning_rate": 2.655394036064648e-06, "loss": 0.0003, "step": 209710 }, { "epoch": 1.37970961099452, "grad_norm": 0.11446159914258575, "learning_rate": 2.654886973740416e-06, "loss": 0.0014, "step": 209720 }, { "epoch": 1.3797753991697532, "grad_norm": 0.026798991243456877, "learning_rate": 2.654379942334474e-06, "loss": 0.0014, "step": 209730 }, { "epoch": 1.3798411873449865, "grad_norm": 0.005830930424421674, "learning_rate": 2.6538729418535052e-06, "loss": 0.0007, "step": 209740 }, { "epoch": 1.37990697552022, "grad_norm": 0.004184497042034018, "learning_rate": 2.653365972304197e-06, "loss": 0.0004, "step": 209750 }, { "epoch": 1.3799727636954533, "grad_norm": 0.008963275903824387, "learning_rate": 2.652859033693232e-06, "loss": 0.0005, "step": 209760 }, { "epoch": 1.3800385518706868, "grad_norm": 0.0202244136615367, "learning_rate": 2.6523521260272935e-06, "loss": 0.0004, "step": 209770 }, { "epoch": 1.38010434004592, "grad_norm": 0.02862898210334574, "learning_rate": 2.651845249313064e-06, "loss": 0.0014, "step": 209780 }, { "epoch": 1.3801701282211536, "grad_norm": 0.0004943965721446376, "learning_rate": 2.6513384035572274e-06, "loss": 0.0005, "step": 209790 }, { "epoch": 1.3802359163963869, "grad_norm": 0.01775585830100511, "learning_rate": 2.650831588766466e-06, "loss": 0.0004, "step": 209800 }, { "epoch": 1.3803017045716204, "grad_norm": 0.04448690844137159, "learning_rate": 2.6503248049474606e-06, "loss": 0.0006, "step": 209810 }, { "epoch": 1.3803674927468537, "grad_norm": 0.0007143466570484709, "learning_rate": 2.6498180521068937e-06, "loss": 0.001, "step": 209820 }, { "epoch": 1.380433280922087, "grad_norm": 0.04395087809561305, "learning_rate": 2.649311330251446e-06, "loss": 0.0003, "step": 209830 }, { "epoch": 1.3804990690973205, "grad_norm": 0.010750578049000287, "learning_rate": 2.648804639387798e-06, "loss": 0.0002, "step": 209840 }, { "epoch": 1.380564857272554, "grad_norm": 0.041349364973210415, "learning_rate": 2.6482979795226304e-06, "loss": 0.0008, "step": 209850 }, { "epoch": 1.3806306454477872, "grad_norm": 0.0021047483457609945, "learning_rate": 2.647791350662623e-06, "loss": 0.0006, "step": 209860 }, { "epoch": 1.3806964336230205, "grad_norm": 0.04731687375436097, "learning_rate": 2.647284752814453e-06, "loss": 0.0006, "step": 209870 }, { "epoch": 1.380762221798254, "grad_norm": 0.03607833262179874, "learning_rate": 2.646778185984803e-06, "loss": 0.0005, "step": 209880 }, { "epoch": 1.3808280099734873, "grad_norm": 0.0365986195120825, "learning_rate": 2.646271650180351e-06, "loss": 0.0006, "step": 209890 }, { "epoch": 1.3808937981487208, "grad_norm": 0.04263190606211213, "learning_rate": 2.6457651454077738e-06, "loss": 0.0003, "step": 209900 }, { "epoch": 1.380959586323954, "grad_norm": 0.05244297083133084, "learning_rate": 2.6452586716737503e-06, "loss": 0.001, "step": 209910 }, { "epoch": 1.3810253744991874, "grad_norm": 0.020813245761886638, "learning_rate": 2.6447522289849583e-06, "loss": 0.0003, "step": 209920 }, { "epoch": 1.381091162674421, "grad_norm": 0.028653602164552955, "learning_rate": 2.644245817348073e-06, "loss": 0.0007, "step": 209930 }, { "epoch": 1.3811569508496544, "grad_norm": 0.008939679927366597, "learning_rate": 2.6437394367697722e-06, "loss": 0.0002, "step": 209940 }, { "epoch": 1.3812227390248877, "grad_norm": 0.08165687024536712, "learning_rate": 2.6432330872567325e-06, "loss": 0.0007, "step": 209950 }, { "epoch": 1.381288527200121, "grad_norm": 0.013971166713291829, "learning_rate": 2.642726768815628e-06, "loss": 0.0005, "step": 209960 }, { "epoch": 1.3813543153753545, "grad_norm": 0.022171693160641708, "learning_rate": 2.642220481453136e-06, "loss": 0.0011, "step": 209970 }, { "epoch": 1.3814201035505878, "grad_norm": 0.0023717665991439835, "learning_rate": 2.6417142251759307e-06, "loss": 0.0003, "step": 209980 }, { "epoch": 1.3814858917258213, "grad_norm": 0.0002934286275736869, "learning_rate": 2.641207999990686e-06, "loss": 0.0003, "step": 209990 }, { "epoch": 1.3815516799010545, "grad_norm": 0.08867272648005142, "learning_rate": 2.6407018059040756e-06, "loss": 0.0009, "step": 210000 }, { "epoch": 1.381617468076288, "grad_norm": 0.023095598443480317, "learning_rate": 2.6401956429227753e-06, "loss": 0.0008, "step": 210010 }, { "epoch": 1.3816832562515213, "grad_norm": 0.001511921069211829, "learning_rate": 2.639689511053458e-06, "loss": 0.0004, "step": 210020 }, { "epoch": 1.3817490444267548, "grad_norm": 0.04309785257387408, "learning_rate": 2.639183410302796e-06, "loss": 0.0007, "step": 210030 }, { "epoch": 1.3818148326019881, "grad_norm": 0.049296384391229485, "learning_rate": 2.6386773406774614e-06, "loss": 0.0008, "step": 210040 }, { "epoch": 1.3818806207772214, "grad_norm": 0.010496758464084979, "learning_rate": 2.6381713021841266e-06, "loss": 0.0013, "step": 210050 }, { "epoch": 1.381946408952455, "grad_norm": 0.03303109505351542, "learning_rate": 2.6376652948294636e-06, "loss": 0.0007, "step": 210060 }, { "epoch": 1.3820121971276882, "grad_norm": 0.010548241093956282, "learning_rate": 2.6371593186201437e-06, "loss": 0.001, "step": 210070 }, { "epoch": 1.3820779853029217, "grad_norm": 0.0602853013982693, "learning_rate": 2.6366533735628367e-06, "loss": 0.0005, "step": 210080 }, { "epoch": 1.382143773478155, "grad_norm": 0.0205451826390059, "learning_rate": 2.6361474596642144e-06, "loss": 0.0002, "step": 210090 }, { "epoch": 1.3822095616533885, "grad_norm": 0.07643674769360254, "learning_rate": 2.6356415769309462e-06, "loss": 0.0005, "step": 210100 }, { "epoch": 1.3822753498286218, "grad_norm": 0.04794682966254942, "learning_rate": 2.6351357253697022e-06, "loss": 0.0003, "step": 210110 }, { "epoch": 1.3823411380038553, "grad_norm": 0.040195336738274495, "learning_rate": 2.634629904987151e-06, "loss": 0.0007, "step": 210120 }, { "epoch": 1.3824069261790886, "grad_norm": 0.00911701137789281, "learning_rate": 2.63412411578996e-06, "loss": 0.0009, "step": 210130 }, { "epoch": 1.3824727143543218, "grad_norm": 0.009056702023898347, "learning_rate": 2.6336183577848007e-06, "loss": 0.0004, "step": 210140 }, { "epoch": 1.3825385025295553, "grad_norm": 0.023160906578309114, "learning_rate": 2.6331126309783393e-06, "loss": 0.0006, "step": 210150 }, { "epoch": 1.3826042907047889, "grad_norm": 0.017537274642131777, "learning_rate": 2.632606935377244e-06, "loss": 0.0009, "step": 210160 }, { "epoch": 1.3826700788800221, "grad_norm": 0.1042207991374028, "learning_rate": 2.632101270988182e-06, "loss": 0.001, "step": 210170 }, { "epoch": 1.3827358670552554, "grad_norm": 0.05410539388699661, "learning_rate": 2.6315956378178197e-06, "loss": 0.0004, "step": 210180 }, { "epoch": 1.382801655230489, "grad_norm": 0.036845117766702254, "learning_rate": 2.631090035872823e-06, "loss": 0.0005, "step": 210190 }, { "epoch": 1.3828674434057222, "grad_norm": 0.006162007562913775, "learning_rate": 2.630584465159858e-06, "loss": 0.0003, "step": 210200 }, { "epoch": 1.3829332315809557, "grad_norm": 0.008365663639730162, "learning_rate": 2.630078925685591e-06, "loss": 0.0005, "step": 210210 }, { "epoch": 1.382999019756189, "grad_norm": 0.0018557942533748999, "learning_rate": 2.6295734174566865e-06, "loss": 0.0003, "step": 210220 }, { "epoch": 1.3830648079314223, "grad_norm": 0.035133169625761206, "learning_rate": 2.629067940479809e-06, "loss": 0.0007, "step": 210230 }, { "epoch": 1.3831305961066558, "grad_norm": 0.0006828176359457676, "learning_rate": 2.628562494761624e-06, "loss": 0.0006, "step": 210240 }, { "epoch": 1.3831963842818893, "grad_norm": 0.025231665110378897, "learning_rate": 2.6280570803087935e-06, "loss": 0.0005, "step": 210250 }, { "epoch": 1.3832621724571226, "grad_norm": 0.03885423650381413, "learning_rate": 2.6275516971279804e-06, "loss": 0.0008, "step": 210260 }, { "epoch": 1.3833279606323559, "grad_norm": 0.029189552902584653, "learning_rate": 2.6270463452258498e-06, "loss": 0.0006, "step": 210270 }, { "epoch": 1.3833937488075894, "grad_norm": 0.030640209231645527, "learning_rate": 2.626541024609065e-06, "loss": 0.0006, "step": 210280 }, { "epoch": 1.3834595369828226, "grad_norm": 0.013847563865146623, "learning_rate": 2.6260357352842868e-06, "loss": 0.0007, "step": 210290 }, { "epoch": 1.3835253251580562, "grad_norm": 0.05658101984780917, "learning_rate": 2.625530477258177e-06, "loss": 0.0004, "step": 210300 }, { "epoch": 1.3835911133332894, "grad_norm": 0.010802371674334984, "learning_rate": 2.6250252505373973e-06, "loss": 0.0003, "step": 210310 }, { "epoch": 1.383656901508523, "grad_norm": 0.00017473810505542767, "learning_rate": 2.624520055128608e-06, "loss": 0.0005, "step": 210320 }, { "epoch": 1.3837226896837562, "grad_norm": 0.03782045456295833, "learning_rate": 2.624014891038471e-06, "loss": 0.0003, "step": 210330 }, { "epoch": 1.3837884778589897, "grad_norm": 0.06811387259504725, "learning_rate": 2.6235097582736446e-06, "loss": 0.0007, "step": 210340 }, { "epoch": 1.383854266034223, "grad_norm": 0.18429796756170114, "learning_rate": 2.6230046568407904e-06, "loss": 0.0011, "step": 210350 }, { "epoch": 1.3839200542094563, "grad_norm": 0.05390686928659986, "learning_rate": 2.6224995867465662e-06, "loss": 0.0005, "step": 210360 }, { "epoch": 1.3839858423846898, "grad_norm": 0.012263980287677572, "learning_rate": 2.6219945479976323e-06, "loss": 0.0006, "step": 210370 }, { "epoch": 1.384051630559923, "grad_norm": 0.001283066069537408, "learning_rate": 2.621489540600646e-06, "loss": 0.0008, "step": 210380 }, { "epoch": 1.3841174187351566, "grad_norm": 0.04458393253143958, "learning_rate": 2.6209845645622646e-06, "loss": 0.0011, "step": 210390 }, { "epoch": 1.3841832069103899, "grad_norm": 0.009327719557039961, "learning_rate": 2.620479619889148e-06, "loss": 0.0002, "step": 210400 }, { "epoch": 1.3842489950856234, "grad_norm": 0.026584002671422018, "learning_rate": 2.6199747065879534e-06, "loss": 0.0006, "step": 210410 }, { "epoch": 1.3843147832608567, "grad_norm": 0.1340163077964899, "learning_rate": 2.6194698246653362e-06, "loss": 0.0009, "step": 210420 }, { "epoch": 1.3843805714360902, "grad_norm": 0.017361387019006056, "learning_rate": 2.6189649741279537e-06, "loss": 0.0005, "step": 210430 }, { "epoch": 1.3844463596113235, "grad_norm": 0.03307758850566169, "learning_rate": 2.6184601549824617e-06, "loss": 0.0008, "step": 210440 }, { "epoch": 1.3845121477865567, "grad_norm": 0.006449680551184055, "learning_rate": 2.617955367235515e-06, "loss": 0.0018, "step": 210450 }, { "epoch": 1.3845779359617902, "grad_norm": 0.01624670093804222, "learning_rate": 2.6174506108937702e-06, "loss": 0.0005, "step": 210460 }, { "epoch": 1.3846437241370237, "grad_norm": 0.042747750333298175, "learning_rate": 2.6169458859638818e-06, "loss": 0.0005, "step": 210470 }, { "epoch": 1.384709512312257, "grad_norm": 0.016387778114200675, "learning_rate": 2.6164411924525036e-06, "loss": 0.0002, "step": 210480 }, { "epoch": 1.3847753004874903, "grad_norm": 0.10237090778257796, "learning_rate": 2.6159365303662888e-06, "loss": 0.0011, "step": 210490 }, { "epoch": 1.3848410886627238, "grad_norm": 0.021316308832874404, "learning_rate": 2.615431899711892e-06, "loss": 0.0005, "step": 210500 }, { "epoch": 1.384906876837957, "grad_norm": 0.28053471848441736, "learning_rate": 2.6149273004959675e-06, "loss": 0.0008, "step": 210510 }, { "epoch": 1.3849726650131906, "grad_norm": 0.06482482782647274, "learning_rate": 2.6144227327251637e-06, "loss": 0.0005, "step": 210520 }, { "epoch": 1.385038453188424, "grad_norm": 0.03375977592496373, "learning_rate": 2.613918196406138e-06, "loss": 0.0005, "step": 210530 }, { "epoch": 1.3851042413636572, "grad_norm": 0.04183444216546651, "learning_rate": 2.6134136915455392e-06, "loss": 0.0005, "step": 210540 }, { "epoch": 1.3851700295388907, "grad_norm": 0.0055603448505320875, "learning_rate": 2.6129092181500203e-06, "loss": 0.0004, "step": 210550 }, { "epoch": 1.3852358177141242, "grad_norm": 0.09041526651825639, "learning_rate": 2.612404776226232e-06, "loss": 0.0008, "step": 210560 }, { "epoch": 1.3853016058893575, "grad_norm": 0.04234756004935775, "learning_rate": 2.611900365780825e-06, "loss": 0.0008, "step": 210570 }, { "epoch": 1.3853673940645908, "grad_norm": 0.003872458803962384, "learning_rate": 2.6113959868204485e-06, "loss": 0.0048, "step": 210580 }, { "epoch": 1.3854331822398243, "grad_norm": 0.10965538187366322, "learning_rate": 2.6108916393517534e-06, "loss": 0.001, "step": 210590 }, { "epoch": 1.3854989704150575, "grad_norm": 0.038979656133146585, "learning_rate": 2.610387323381388e-06, "loss": 0.0003, "step": 210600 }, { "epoch": 1.385564758590291, "grad_norm": 0.008574318148174874, "learning_rate": 2.6098830389160025e-06, "loss": 0.0004, "step": 210610 }, { "epoch": 1.3856305467655243, "grad_norm": 0.027279945697239856, "learning_rate": 2.609378785962245e-06, "loss": 0.0013, "step": 210620 }, { "epoch": 1.3856963349407576, "grad_norm": 0.35120083135160557, "learning_rate": 2.608874564526763e-06, "loss": 0.0007, "step": 210630 }, { "epoch": 1.3857621231159911, "grad_norm": 0.00015725416804034461, "learning_rate": 2.6083703746162052e-06, "loss": 0.0003, "step": 210640 }, { "epoch": 1.3858279112912246, "grad_norm": 0.009104687022845523, "learning_rate": 2.607866216237217e-06, "loss": 0.0005, "step": 210650 }, { "epoch": 1.385893699466458, "grad_norm": 0.032288588872919494, "learning_rate": 2.6073620893964498e-06, "loss": 0.0005, "step": 210660 }, { "epoch": 1.3859594876416912, "grad_norm": 0.07980891425071737, "learning_rate": 2.6068579941005456e-06, "loss": 0.0013, "step": 210670 }, { "epoch": 1.3860252758169247, "grad_norm": 0.008544859009794372, "learning_rate": 2.606353930356151e-06, "loss": 0.0012, "step": 210680 }, { "epoch": 1.386091063992158, "grad_norm": 0.014022161870292722, "learning_rate": 2.605849898169913e-06, "loss": 0.0004, "step": 210690 }, { "epoch": 1.3861568521673915, "grad_norm": 0.015155238253345785, "learning_rate": 2.605345897548477e-06, "loss": 0.0005, "step": 210700 }, { "epoch": 1.3862226403426248, "grad_norm": 0.019191440842695598, "learning_rate": 2.604841928498487e-06, "loss": 0.0004, "step": 210710 }, { "epoch": 1.3862884285178583, "grad_norm": 0.035585043641050376, "learning_rate": 2.604337991026587e-06, "loss": 0.0007, "step": 210720 }, { "epoch": 1.3863542166930916, "grad_norm": 0.014796381420381794, "learning_rate": 2.603834085139422e-06, "loss": 0.0005, "step": 210730 }, { "epoch": 1.386420004868325, "grad_norm": 0.03926358941942667, "learning_rate": 2.6033302108436353e-06, "loss": 0.0005, "step": 210740 }, { "epoch": 1.3864857930435583, "grad_norm": 0.015821417377778757, "learning_rate": 2.6028263681458678e-06, "loss": 0.0008, "step": 210750 }, { "epoch": 1.3865515812187916, "grad_norm": 0.007859205507073086, "learning_rate": 2.6023225570527655e-06, "loss": 0.0004, "step": 210760 }, { "epoch": 1.3866173693940251, "grad_norm": 0.007380196820971892, "learning_rate": 2.60181877757097e-06, "loss": 0.0006, "step": 210770 }, { "epoch": 1.3866831575692586, "grad_norm": 0.033699981591105696, "learning_rate": 2.601315029707123e-06, "loss": 0.0007, "step": 210780 }, { "epoch": 1.386748945744492, "grad_norm": 0.002139091496335801, "learning_rate": 2.600811313467866e-06, "loss": 0.0008, "step": 210790 }, { "epoch": 1.3868147339197252, "grad_norm": 0.02647992812976441, "learning_rate": 2.6003076288598384e-06, "loss": 0.0014, "step": 210800 }, { "epoch": 1.3868805220949587, "grad_norm": 0.014438519476055848, "learning_rate": 2.599803975889683e-06, "loss": 0.0005, "step": 210810 }, { "epoch": 1.386946310270192, "grad_norm": 0.0047337755006735096, "learning_rate": 2.5993003545640393e-06, "loss": 0.0004, "step": 210820 }, { "epoch": 1.3870120984454255, "grad_norm": 0.03838816158040837, "learning_rate": 2.5987967648895473e-06, "loss": 0.0005, "step": 210830 }, { "epoch": 1.3870778866206588, "grad_norm": 0.08458667372773984, "learning_rate": 2.598293206872846e-06, "loss": 0.0008, "step": 210840 }, { "epoch": 1.387143674795892, "grad_norm": 0.027263571859258748, "learning_rate": 2.5977896805205744e-06, "loss": 0.0006, "step": 210850 }, { "epoch": 1.3872094629711256, "grad_norm": 0.010279554821248042, "learning_rate": 2.597286185839371e-06, "loss": 0.0008, "step": 210860 }, { "epoch": 1.387275251146359, "grad_norm": 0.03354519310147476, "learning_rate": 2.596782722835874e-06, "loss": 0.0005, "step": 210870 }, { "epoch": 1.3873410393215924, "grad_norm": 0.04168604352351993, "learning_rate": 2.59627929151672e-06, "loss": 0.0003, "step": 210880 }, { "epoch": 1.3874068274968256, "grad_norm": 0.49241148503971993, "learning_rate": 2.595775891888549e-06, "loss": 0.0013, "step": 210890 }, { "epoch": 1.3874726156720592, "grad_norm": 0.03310077188916375, "learning_rate": 2.5952725239579967e-06, "loss": 0.0009, "step": 210900 }, { "epoch": 1.3875384038472924, "grad_norm": 0.004217414253235807, "learning_rate": 2.5947691877316995e-06, "loss": 0.0003, "step": 210910 }, { "epoch": 1.387604192022526, "grad_norm": 0.1461519656406726, "learning_rate": 2.5942658832162927e-06, "loss": 0.0012, "step": 210920 }, { "epoch": 1.3876699801977592, "grad_norm": 0.0699288570264018, "learning_rate": 2.593762610418412e-06, "loss": 0.0006, "step": 210930 }, { "epoch": 1.3877357683729925, "grad_norm": 0.0003082809918937602, "learning_rate": 2.593259369344694e-06, "loss": 0.0005, "step": 210940 }, { "epoch": 1.387801556548226, "grad_norm": 0.015333134365942473, "learning_rate": 2.5927561600017726e-06, "loss": 0.0007, "step": 210950 }, { "epoch": 1.3878673447234595, "grad_norm": 0.06632842925949274, "learning_rate": 2.5922529823962823e-06, "loss": 0.0008, "step": 210960 }, { "epoch": 1.3879331328986928, "grad_norm": 0.11481842135458802, "learning_rate": 2.5917498365348565e-06, "loss": 0.0009, "step": 210970 }, { "epoch": 1.387998921073926, "grad_norm": 0.026673956659974338, "learning_rate": 2.5912467224241294e-06, "loss": 0.0006, "step": 210980 }, { "epoch": 1.3880647092491596, "grad_norm": 0.04461050866029976, "learning_rate": 2.5907436400707344e-06, "loss": 0.0008, "step": 210990 }, { "epoch": 1.3881304974243929, "grad_norm": 0.009715600064991387, "learning_rate": 2.590240589481303e-06, "loss": 0.0008, "step": 211000 }, { "epoch": 1.3881962855996264, "grad_norm": 0.02267426336260229, "learning_rate": 2.5897375706624662e-06, "loss": 0.0008, "step": 211010 }, { "epoch": 1.3882620737748597, "grad_norm": 0.003365934492003261, "learning_rate": 2.5892345836208607e-06, "loss": 0.0008, "step": 211020 }, { "epoch": 1.3883278619500932, "grad_norm": 0.019055973980758346, "learning_rate": 2.5887316283631144e-06, "loss": 0.0004, "step": 211030 }, { "epoch": 1.3883936501253265, "grad_norm": 0.09560843529905028, "learning_rate": 2.5882287048958593e-06, "loss": 0.0007, "step": 211040 }, { "epoch": 1.38845943830056, "grad_norm": 0.02949689346352039, "learning_rate": 2.5877258132257255e-06, "loss": 0.0006, "step": 211050 }, { "epoch": 1.3885252264757932, "grad_norm": 0.016461036092597484, "learning_rate": 2.587222953359344e-06, "loss": 0.0008, "step": 211060 }, { "epoch": 1.3885910146510265, "grad_norm": 0.06064278019896739, "learning_rate": 2.5867201253033435e-06, "loss": 0.0008, "step": 211070 }, { "epoch": 1.38865680282626, "grad_norm": 0.0016258974776076943, "learning_rate": 2.5862173290643546e-06, "loss": 0.0009, "step": 211080 }, { "epoch": 1.3887225910014933, "grad_norm": 0.07914200265171474, "learning_rate": 2.585714564649005e-06, "loss": 0.0006, "step": 211090 }, { "epoch": 1.3887883791767268, "grad_norm": 0.031613329142217976, "learning_rate": 2.5852118320639243e-06, "loss": 0.0006, "step": 211100 }, { "epoch": 1.38885416735196, "grad_norm": 0.0001785660568269665, "learning_rate": 2.584709131315739e-06, "loss": 0.0004, "step": 211110 }, { "epoch": 1.3889199555271936, "grad_norm": 0.026305418786156533, "learning_rate": 2.5842064624110783e-06, "loss": 0.0003, "step": 211120 }, { "epoch": 1.388985743702427, "grad_norm": 0.0013248815587153723, "learning_rate": 2.5837038253565693e-06, "loss": 0.0006, "step": 211130 }, { "epoch": 1.3890515318776604, "grad_norm": 0.024769660740734726, "learning_rate": 2.5832012201588363e-06, "loss": 0.0007, "step": 211140 }, { "epoch": 1.3891173200528937, "grad_norm": 0.02886917560797794, "learning_rate": 2.5826986468245106e-06, "loss": 0.0004, "step": 211150 }, { "epoch": 1.389183108228127, "grad_norm": 0.033195979812833, "learning_rate": 2.5821961053602145e-06, "loss": 0.001, "step": 211160 }, { "epoch": 1.3892488964033605, "grad_norm": 0.02552840472426584, "learning_rate": 2.5816935957725753e-06, "loss": 0.0006, "step": 211170 }, { "epoch": 1.389314684578594, "grad_norm": 0.006494024199631958, "learning_rate": 2.5811911180682168e-06, "loss": 0.0008, "step": 211180 }, { "epoch": 1.3893804727538273, "grad_norm": 0.000156821117564222, "learning_rate": 2.580688672253765e-06, "loss": 0.0004, "step": 211190 }, { "epoch": 1.3894462609290605, "grad_norm": 0.008930492701795219, "learning_rate": 2.5801862583358433e-06, "loss": 0.0003, "step": 211200 }, { "epoch": 1.389512049104294, "grad_norm": 0.04120653042776075, "learning_rate": 2.5796838763210763e-06, "loss": 0.0008, "step": 211210 }, { "epoch": 1.3895778372795273, "grad_norm": 0.00021854953606246822, "learning_rate": 2.5791815262160865e-06, "loss": 0.0003, "step": 211220 }, { "epoch": 1.3896436254547608, "grad_norm": 0.043895861361513196, "learning_rate": 2.578679208027498e-06, "loss": 0.0003, "step": 211230 }, { "epoch": 1.3897094136299941, "grad_norm": 0.05263521133622966, "learning_rate": 2.578176921761932e-06, "loss": 0.0006, "step": 211240 }, { "epoch": 1.3897752018052274, "grad_norm": 0.0378817092498544, "learning_rate": 2.5776746674260127e-06, "loss": 0.0007, "step": 211250 }, { "epoch": 1.389840989980461, "grad_norm": 0.014804882340095795, "learning_rate": 2.57717244502636e-06, "loss": 0.0006, "step": 211260 }, { "epoch": 1.3899067781556944, "grad_norm": 0.05529582779404518, "learning_rate": 2.5766702545695955e-06, "loss": 0.0004, "step": 211270 }, { "epoch": 1.3899725663309277, "grad_norm": 0.028425887045411595, "learning_rate": 2.5761680960623415e-06, "loss": 0.0006, "step": 211280 }, { "epoch": 1.390038354506161, "grad_norm": 0.044234236808163165, "learning_rate": 2.5756659695112186e-06, "loss": 0.0004, "step": 211290 }, { "epoch": 1.3901041426813945, "grad_norm": 0.03277014767043557, "learning_rate": 2.5751638749228453e-06, "loss": 0.0006, "step": 211300 }, { "epoch": 1.3901699308566278, "grad_norm": 0.002332268545618917, "learning_rate": 2.5746618123038426e-06, "loss": 0.0004, "step": 211310 }, { "epoch": 1.3902357190318613, "grad_norm": 0.009852246405523016, "learning_rate": 2.574159781660829e-06, "loss": 0.0006, "step": 211320 }, { "epoch": 1.3903015072070946, "grad_norm": 0.020550493655222362, "learning_rate": 2.573657783000424e-06, "loss": 0.0003, "step": 211330 }, { "epoch": 1.390367295382328, "grad_norm": 0.043569686633593105, "learning_rate": 2.5731558163292447e-06, "loss": 0.001, "step": 211340 }, { "epoch": 1.3904330835575613, "grad_norm": 0.04481641261978905, "learning_rate": 2.572653881653911e-06, "loss": 0.0005, "step": 211350 }, { "epoch": 1.3904988717327948, "grad_norm": 0.05922652624329457, "learning_rate": 2.572151978981039e-06, "loss": 0.0005, "step": 211360 }, { "epoch": 1.3905646599080281, "grad_norm": 0.044777243139758595, "learning_rate": 2.571650108317246e-06, "loss": 0.0007, "step": 211370 }, { "epoch": 1.3906304480832614, "grad_norm": 0.0258492738298761, "learning_rate": 2.5711482696691504e-06, "loss": 0.001, "step": 211380 }, { "epoch": 1.390696236258495, "grad_norm": 0.021112500665557025, "learning_rate": 2.5706464630433656e-06, "loss": 0.001, "step": 211390 }, { "epoch": 1.3907620244337282, "grad_norm": 0.04747712250898498, "learning_rate": 2.570144688446508e-06, "loss": 0.0008, "step": 211400 }, { "epoch": 1.3908278126089617, "grad_norm": 0.0373520930857703, "learning_rate": 2.5696429458851958e-06, "loss": 0.0004, "step": 211410 }, { "epoch": 1.390893600784195, "grad_norm": 0.012995816172538102, "learning_rate": 2.5691412353660426e-06, "loss": 0.0009, "step": 211420 }, { "epoch": 1.3909593889594285, "grad_norm": 0.00033506102912105447, "learning_rate": 2.568639556895662e-06, "loss": 0.0006, "step": 211430 }, { "epoch": 1.3910251771346618, "grad_norm": 0.02435278461702546, "learning_rate": 2.56813791048067e-06, "loss": 0.0019, "step": 211440 }, { "epoch": 1.3910909653098953, "grad_norm": 0.007684993045690052, "learning_rate": 2.567636296127679e-06, "loss": 0.0006, "step": 211450 }, { "epoch": 1.3911567534851286, "grad_norm": 0.016807594061365876, "learning_rate": 2.567134713843302e-06, "loss": 0.0004, "step": 211460 }, { "epoch": 1.3912225416603619, "grad_norm": 0.06094539688027002, "learning_rate": 2.5666331636341535e-06, "loss": 0.0005, "step": 211470 }, { "epoch": 1.3912883298355954, "grad_norm": 0.010421798278395721, "learning_rate": 2.5661316455068452e-06, "loss": 0.0005, "step": 211480 }, { "epoch": 1.3913541180108289, "grad_norm": 0.02613275193846934, "learning_rate": 2.565630159467989e-06, "loss": 0.0004, "step": 211490 }, { "epoch": 1.3914199061860621, "grad_norm": 0.010987235806790062, "learning_rate": 2.565128705524196e-06, "loss": 0.0015, "step": 211500 }, { "epoch": 1.3914856943612954, "grad_norm": 0.0631480566122449, "learning_rate": 2.564627283682079e-06, "loss": 0.0005, "step": 211510 }, { "epoch": 1.391551482536529, "grad_norm": 0.08285124173969897, "learning_rate": 2.564125893948248e-06, "loss": 0.0012, "step": 211520 }, { "epoch": 1.3916172707117622, "grad_norm": 0.006657133122431877, "learning_rate": 2.563624536329311e-06, "loss": 0.0007, "step": 211530 }, { "epoch": 1.3916830588869957, "grad_norm": 0.04030153572704816, "learning_rate": 2.5631232108318814e-06, "loss": 0.0006, "step": 211540 }, { "epoch": 1.391748847062229, "grad_norm": 0.02314384620025726, "learning_rate": 2.562621917462569e-06, "loss": 0.0004, "step": 211550 }, { "epoch": 1.3918146352374623, "grad_norm": 0.006629118018987998, "learning_rate": 2.562120656227981e-06, "loss": 0.0008, "step": 211560 }, { "epoch": 1.3918804234126958, "grad_norm": 0.0716240868570963, "learning_rate": 2.5616194271347265e-06, "loss": 0.0009, "step": 211570 }, { "epoch": 1.3919462115879293, "grad_norm": 0.003573037958705817, "learning_rate": 2.5611182301894146e-06, "loss": 0.0005, "step": 211580 }, { "epoch": 1.3920119997631626, "grad_norm": 0.0020679865563062033, "learning_rate": 2.560617065398652e-06, "loss": 0.0007, "step": 211590 }, { "epoch": 1.3920777879383959, "grad_norm": 0.04731202015770268, "learning_rate": 2.5601159327690462e-06, "loss": 0.0006, "step": 211600 }, { "epoch": 1.3921435761136294, "grad_norm": 0.012252040528255527, "learning_rate": 2.559614832307205e-06, "loss": 0.0007, "step": 211610 }, { "epoch": 1.3922093642888627, "grad_norm": 0.021230497329936825, "learning_rate": 2.559113764019735e-06, "loss": 0.0003, "step": 211620 }, { "epoch": 1.3922751524640962, "grad_norm": 0.01128135095387182, "learning_rate": 2.558612727913241e-06, "loss": 0.0008, "step": 211630 }, { "epoch": 1.3923409406393294, "grad_norm": 0.0011739463160952918, "learning_rate": 2.5581117239943303e-06, "loss": 0.0005, "step": 211640 }, { "epoch": 1.3924067288145627, "grad_norm": 0.012782500487913401, "learning_rate": 2.5576107522696075e-06, "loss": 0.0004, "step": 211650 }, { "epoch": 1.3924725169897962, "grad_norm": 0.034967960330536764, "learning_rate": 2.5571098127456758e-06, "loss": 0.0004, "step": 211660 }, { "epoch": 1.3925383051650297, "grad_norm": 0.005074400111269418, "learning_rate": 2.556608905429143e-06, "loss": 0.0005, "step": 211670 }, { "epoch": 1.392604093340263, "grad_norm": 0.016772930881368976, "learning_rate": 2.5561080303266118e-06, "loss": 0.0007, "step": 211680 }, { "epoch": 1.3926698815154963, "grad_norm": 0.011636536531234094, "learning_rate": 2.555607187444685e-06, "loss": 0.0003, "step": 211690 }, { "epoch": 1.3927356696907298, "grad_norm": 0.049279070556039216, "learning_rate": 2.5551063767899664e-06, "loss": 0.0006, "step": 211700 }, { "epoch": 1.392801457865963, "grad_norm": 0.06838673191845623, "learning_rate": 2.5546055983690593e-06, "loss": 0.0006, "step": 211710 }, { "epoch": 1.3928672460411966, "grad_norm": 0.04724463022684806, "learning_rate": 2.554104852188565e-06, "loss": 0.0004, "step": 211720 }, { "epoch": 1.3929330342164299, "grad_norm": 0.049093968060857855, "learning_rate": 2.5536041382550856e-06, "loss": 0.0012, "step": 211730 }, { "epoch": 1.3929988223916634, "grad_norm": 0.005509414206265766, "learning_rate": 2.5531034565752235e-06, "loss": 0.0005, "step": 211740 }, { "epoch": 1.3930646105668967, "grad_norm": 0.04279756513402809, "learning_rate": 2.552602807155579e-06, "loss": 0.0007, "step": 211750 }, { "epoch": 1.3931303987421302, "grad_norm": 0.0018680689665718875, "learning_rate": 2.5521021900027522e-06, "loss": 0.0003, "step": 211760 }, { "epoch": 1.3931961869173635, "grad_norm": 0.0006069157598962112, "learning_rate": 2.5516016051233437e-06, "loss": 0.0007, "step": 211770 }, { "epoch": 1.3932619750925967, "grad_norm": 0.005381540218209813, "learning_rate": 2.551101052523954e-06, "loss": 0.0002, "step": 211780 }, { "epoch": 1.3933277632678303, "grad_norm": 0.06283342749800123, "learning_rate": 2.5506005322111793e-06, "loss": 0.0004, "step": 211790 }, { "epoch": 1.3933935514430638, "grad_norm": 0.04016188721385502, "learning_rate": 2.5501000441916235e-06, "loss": 0.0005, "step": 211800 }, { "epoch": 1.393459339618297, "grad_norm": 0.02546216312368603, "learning_rate": 2.5495995884718832e-06, "loss": 0.0007, "step": 211810 }, { "epoch": 1.3935251277935303, "grad_norm": 0.029309057584276228, "learning_rate": 2.5490991650585555e-06, "loss": 0.0004, "step": 211820 }, { "epoch": 1.3935909159687638, "grad_norm": 0.018531233296496925, "learning_rate": 2.5485987739582384e-06, "loss": 0.0013, "step": 211830 }, { "epoch": 1.3936567041439971, "grad_norm": 0.101583267538018, "learning_rate": 2.54809841517753e-06, "loss": 0.001, "step": 211840 }, { "epoch": 1.3937224923192306, "grad_norm": 0.020130093757059913, "learning_rate": 2.5475980887230263e-06, "loss": 0.0005, "step": 211850 }, { "epoch": 1.393788280494464, "grad_norm": 0.0769302967388421, "learning_rate": 2.547097794601323e-06, "loss": 0.0005, "step": 211860 }, { "epoch": 1.3938540686696972, "grad_norm": 0.0034925135555728282, "learning_rate": 2.546597532819017e-06, "loss": 0.0005, "step": 211870 }, { "epoch": 1.3939198568449307, "grad_norm": 0.15108609055765143, "learning_rate": 2.546097303382704e-06, "loss": 0.001, "step": 211880 }, { "epoch": 1.3939856450201642, "grad_norm": 0.005443900497262104, "learning_rate": 2.5455971062989795e-06, "loss": 0.0006, "step": 211890 }, { "epoch": 1.3940514331953975, "grad_norm": 0.06813531587732809, "learning_rate": 2.5450969415744364e-06, "loss": 0.0005, "step": 211900 }, { "epoch": 1.3941172213706308, "grad_norm": 0.03225808892695848, "learning_rate": 2.5445968092156698e-06, "loss": 0.0009, "step": 211910 }, { "epoch": 1.3941830095458643, "grad_norm": 0.010333286307665751, "learning_rate": 2.5440967092292725e-06, "loss": 0.001, "step": 211920 }, { "epoch": 1.3942487977210976, "grad_norm": 0.0009165412863323641, "learning_rate": 2.54359664162184e-06, "loss": 0.0005, "step": 211930 }, { "epoch": 1.394314585896331, "grad_norm": 0.005267128287467327, "learning_rate": 2.5430966063999647e-06, "loss": 0.0006, "step": 211940 }, { "epoch": 1.3943803740715643, "grad_norm": 0.10634255976129911, "learning_rate": 2.5425966035702388e-06, "loss": 0.0011, "step": 211950 }, { "epoch": 1.3944461622467976, "grad_norm": 0.03343039579033483, "learning_rate": 2.5420966331392545e-06, "loss": 0.0004, "step": 211960 }, { "epoch": 1.3945119504220311, "grad_norm": 0.02413175424026959, "learning_rate": 2.5415966951136027e-06, "loss": 0.0007, "step": 211970 }, { "epoch": 1.3945777385972646, "grad_norm": 0.04520377043269317, "learning_rate": 2.5410967894998763e-06, "loss": 0.0008, "step": 211980 }, { "epoch": 1.394643526772498, "grad_norm": 0.037655147445868876, "learning_rate": 2.540596916304664e-06, "loss": 0.0008, "step": 211990 }, { "epoch": 1.3947093149477312, "grad_norm": 0.028091929843863443, "learning_rate": 2.540097075534558e-06, "loss": 0.0004, "step": 212000 }, { "epoch": 1.3947751031229647, "grad_norm": 0.06172568973883028, "learning_rate": 2.5395972671961467e-06, "loss": 0.0004, "step": 212010 }, { "epoch": 1.394840891298198, "grad_norm": 0.0037683628037608725, "learning_rate": 2.5390974912960214e-06, "loss": 0.0006, "step": 212020 }, { "epoch": 1.3949066794734315, "grad_norm": 0.003933264834780577, "learning_rate": 2.5385977478407697e-06, "loss": 0.0003, "step": 212030 }, { "epoch": 1.3949724676486648, "grad_norm": 0.03927208100821841, "learning_rate": 2.538098036836981e-06, "loss": 0.0009, "step": 212040 }, { "epoch": 1.3950382558238983, "grad_norm": 0.048226517783696285, "learning_rate": 2.5375983582912424e-06, "loss": 0.0005, "step": 212050 }, { "epoch": 1.3951040439991316, "grad_norm": 0.012857383667581853, "learning_rate": 2.5370987122101436e-06, "loss": 0.0006, "step": 212060 }, { "epoch": 1.395169832174365, "grad_norm": 0.01998241629738051, "learning_rate": 2.5365990986002714e-06, "loss": 0.0003, "step": 212070 }, { "epoch": 1.3952356203495984, "grad_norm": 0.061662750602348104, "learning_rate": 2.536099517468213e-06, "loss": 0.0004, "step": 212080 }, { "epoch": 1.3953014085248316, "grad_norm": 0.00022047434199629583, "learning_rate": 2.535599968820554e-06, "loss": 0.0003, "step": 212090 }, { "epoch": 1.3953671967000651, "grad_norm": 0.006878118392418421, "learning_rate": 2.535100452663881e-06, "loss": 0.0004, "step": 212100 }, { "epoch": 1.3954329848752984, "grad_norm": 0.002722277000381283, "learning_rate": 2.53460096900478e-06, "loss": 0.0006, "step": 212110 }, { "epoch": 1.395498773050532, "grad_norm": 0.013070056358655942, "learning_rate": 2.534101517849836e-06, "loss": 0.0003, "step": 212120 }, { "epoch": 1.3955645612257652, "grad_norm": 0.08385414117416178, "learning_rate": 2.5336020992056336e-06, "loss": 0.0005, "step": 212130 }, { "epoch": 1.3956303494009987, "grad_norm": 0.03386124384879973, "learning_rate": 2.533102713078758e-06, "loss": 0.0003, "step": 212140 }, { "epoch": 1.395696137576232, "grad_norm": 0.04642248815969788, "learning_rate": 2.5326033594757917e-06, "loss": 0.0006, "step": 212150 }, { "epoch": 1.3957619257514655, "grad_norm": 0.03701276258355845, "learning_rate": 2.5321040384033197e-06, "loss": 0.0009, "step": 212160 }, { "epoch": 1.3958277139266988, "grad_norm": 0.008562988596231213, "learning_rate": 2.531604749867924e-06, "loss": 0.0008, "step": 212170 }, { "epoch": 1.395893502101932, "grad_norm": 0.16159889404488045, "learning_rate": 2.531105493876188e-06, "loss": 0.0004, "step": 212180 }, { "epoch": 1.3959592902771656, "grad_norm": 0.03139423369461625, "learning_rate": 2.5306062704346923e-06, "loss": 0.0005, "step": 212190 }, { "epoch": 1.396025078452399, "grad_norm": 0.043934808800900955, "learning_rate": 2.5301070795500214e-06, "loss": 0.0005, "step": 212200 }, { "epoch": 1.3960908666276324, "grad_norm": 0.004488658552591478, "learning_rate": 2.529607921228756e-06, "loss": 0.0005, "step": 212210 }, { "epoch": 1.3961566548028657, "grad_norm": 0.0003162216304182552, "learning_rate": 2.5291087954774774e-06, "loss": 0.0005, "step": 212220 }, { "epoch": 1.3962224429780992, "grad_norm": 0.018787064612956962, "learning_rate": 2.5286097023027644e-06, "loss": 0.0005, "step": 212230 }, { "epoch": 1.3962882311533324, "grad_norm": 0.009401910711530808, "learning_rate": 2.5281106417111976e-06, "loss": 0.0004, "step": 212240 }, { "epoch": 1.396354019328566, "grad_norm": 0.017364944264446715, "learning_rate": 2.5276116137093577e-06, "loss": 0.0009, "step": 212250 }, { "epoch": 1.3964198075037992, "grad_norm": 0.0003007001514742368, "learning_rate": 2.5271126183038237e-06, "loss": 0.0003, "step": 212260 }, { "epoch": 1.3964855956790325, "grad_norm": 0.01793506862747232, "learning_rate": 2.5266136555011735e-06, "loss": 0.0003, "step": 212270 }, { "epoch": 1.396551383854266, "grad_norm": 0.00043188593713287945, "learning_rate": 2.5261147253079865e-06, "loss": 0.0015, "step": 212280 }, { "epoch": 1.3966171720294995, "grad_norm": 0.04983030767907542, "learning_rate": 2.5256158277308403e-06, "loss": 0.0008, "step": 212290 }, { "epoch": 1.3966829602047328, "grad_norm": 0.042650180006690165, "learning_rate": 2.5251169627763117e-06, "loss": 0.0006, "step": 212300 }, { "epoch": 1.396748748379966, "grad_norm": 0.029683125959935337, "learning_rate": 2.5246181304509792e-06, "loss": 0.0023, "step": 212310 }, { "epoch": 1.3968145365551996, "grad_norm": 0.01080255673831095, "learning_rate": 2.5241193307614163e-06, "loss": 0.0002, "step": 212320 }, { "epoch": 1.3968803247304329, "grad_norm": 0.029921246780175752, "learning_rate": 2.5236205637142042e-06, "loss": 0.0006, "step": 212330 }, { "epoch": 1.3969461129056664, "grad_norm": 0.021194334516076926, "learning_rate": 2.5231218293159165e-06, "loss": 0.0004, "step": 212340 }, { "epoch": 1.3970119010808997, "grad_norm": 0.04941172850572577, "learning_rate": 2.5226231275731274e-06, "loss": 0.0005, "step": 212350 }, { "epoch": 1.3970776892561332, "grad_norm": 0.0029005220404001737, "learning_rate": 2.5221244584924132e-06, "loss": 0.0004, "step": 212360 }, { "epoch": 1.3971434774313665, "grad_norm": 0.018476052303008157, "learning_rate": 2.521625822080348e-06, "loss": 0.0002, "step": 212370 }, { "epoch": 1.3972092656066, "grad_norm": 0.040926882051561204, "learning_rate": 2.5211272183435055e-06, "loss": 0.0008, "step": 212380 }, { "epoch": 1.3972750537818333, "grad_norm": 0.022446501918987036, "learning_rate": 2.52062864728846e-06, "loss": 0.001, "step": 212390 }, { "epoch": 1.3973408419570665, "grad_norm": 0.06804765059407869, "learning_rate": 2.5201301089217846e-06, "loss": 0.0006, "step": 212400 }, { "epoch": 1.3974066301323, "grad_norm": 0.03007489675143368, "learning_rate": 2.519631603250052e-06, "loss": 0.0006, "step": 212410 }, { "epoch": 1.3974724183075333, "grad_norm": 0.0205572070621694, "learning_rate": 2.5191331302798345e-06, "loss": 0.0007, "step": 212420 }, { "epoch": 1.3975382064827668, "grad_norm": 0.0035649289427865864, "learning_rate": 2.518634690017704e-06, "loss": 0.0015, "step": 212430 }, { "epoch": 1.3976039946580001, "grad_norm": 0.02145176557670782, "learning_rate": 2.5181362824702322e-06, "loss": 0.0014, "step": 212440 }, { "epoch": 1.3976697828332336, "grad_norm": 0.04378468917850882, "learning_rate": 2.517637907643988e-06, "loss": 0.0007, "step": 212450 }, { "epoch": 1.397735571008467, "grad_norm": 0.06662542935586722, "learning_rate": 2.5171395655455454e-06, "loss": 0.0008, "step": 212460 }, { "epoch": 1.3978013591837004, "grad_norm": 0.016637876643971827, "learning_rate": 2.516641256181474e-06, "loss": 0.0003, "step": 212470 }, { "epoch": 1.3978671473589337, "grad_norm": 0.018602271552213508, "learning_rate": 2.5161429795583426e-06, "loss": 0.0006, "step": 212480 }, { "epoch": 1.397932935534167, "grad_norm": 0.008137283397875764, "learning_rate": 2.5156447356827207e-06, "loss": 0.0007, "step": 212490 }, { "epoch": 1.3979987237094005, "grad_norm": 0.01211107298013343, "learning_rate": 2.5151465245611784e-06, "loss": 0.0004, "step": 212500 }, { "epoch": 1.398064511884634, "grad_norm": 0.06367381079351249, "learning_rate": 2.514648346200282e-06, "loss": 0.0004, "step": 212510 }, { "epoch": 1.3981303000598673, "grad_norm": 0.007490015385339977, "learning_rate": 2.514150200606601e-06, "loss": 0.0004, "step": 212520 }, { "epoch": 1.3981960882351006, "grad_norm": 0.0003618409511326959, "learning_rate": 2.5136520877867023e-06, "loss": 0.0004, "step": 212530 }, { "epoch": 1.398261876410334, "grad_norm": 0.030197526994405522, "learning_rate": 2.513154007747154e-06, "loss": 0.0007, "step": 212540 }, { "epoch": 1.3983276645855673, "grad_norm": 0.057412875714336946, "learning_rate": 2.5126559604945224e-06, "loss": 0.0007, "step": 212550 }, { "epoch": 1.3983934527608008, "grad_norm": 0.030083236237494, "learning_rate": 2.512157946035373e-06, "loss": 0.0007, "step": 212560 }, { "epoch": 1.3984592409360341, "grad_norm": 0.01746062754562947, "learning_rate": 2.511659964376273e-06, "loss": 0.0012, "step": 212570 }, { "epoch": 1.3985250291112674, "grad_norm": 0.05378740342511109, "learning_rate": 2.511162015523787e-06, "loss": 0.0004, "step": 212580 }, { "epoch": 1.398590817286501, "grad_norm": 0.02380518861568715, "learning_rate": 2.5106640994844807e-06, "loss": 0.0004, "step": 212590 }, { "epoch": 1.3986566054617344, "grad_norm": 0.018469703081177204, "learning_rate": 2.5101662162649176e-06, "loss": 0.0003, "step": 212600 }, { "epoch": 1.3987223936369677, "grad_norm": 0.05522219448026652, "learning_rate": 2.5096683658716636e-06, "loss": 0.0004, "step": 212610 }, { "epoch": 1.398788181812201, "grad_norm": 0.0016526044395427414, "learning_rate": 2.5091705483112803e-06, "loss": 0.0005, "step": 212620 }, { "epoch": 1.3988539699874345, "grad_norm": 0.0015023853325633726, "learning_rate": 2.508672763590333e-06, "loss": 0.0004, "step": 212630 }, { "epoch": 1.3989197581626678, "grad_norm": 0.059668391288530284, "learning_rate": 2.5081750117153838e-06, "loss": 0.001, "step": 212640 }, { "epoch": 1.3989855463379013, "grad_norm": 0.02008537118731377, "learning_rate": 2.5076772926929944e-06, "loss": 0.0004, "step": 212650 }, { "epoch": 1.3990513345131346, "grad_norm": 0.012150103204750795, "learning_rate": 2.507179606529728e-06, "loss": 0.001, "step": 212660 }, { "epoch": 1.399117122688368, "grad_norm": 0.0008798025441667428, "learning_rate": 2.5066819532321448e-06, "loss": 0.0015, "step": 212670 }, { "epoch": 1.3991829108636014, "grad_norm": 0.004164816664329395, "learning_rate": 2.5061843328068046e-06, "loss": 0.0003, "step": 212680 }, { "epoch": 1.3992486990388349, "grad_norm": 0.02371800580989762, "learning_rate": 2.505686745260273e-06, "loss": 0.0005, "step": 212690 }, { "epoch": 1.3993144872140681, "grad_norm": 0.0031861128135599872, "learning_rate": 2.5051891905991065e-06, "loss": 0.0008, "step": 212700 }, { "epoch": 1.3993802753893014, "grad_norm": 0.008945217844308084, "learning_rate": 2.5046916688298667e-06, "loss": 0.0003, "step": 212710 }, { "epoch": 1.399446063564535, "grad_norm": 0.01237055640311666, "learning_rate": 2.5041941799591118e-06, "loss": 0.001, "step": 212720 }, { "epoch": 1.3995118517397682, "grad_norm": 0.037661323350935146, "learning_rate": 2.5036967239934006e-06, "loss": 0.0005, "step": 212730 }, { "epoch": 1.3995776399150017, "grad_norm": 8.798891162486154e-05, "learning_rate": 2.503199300939293e-06, "loss": 0.0006, "step": 212740 }, { "epoch": 1.399643428090235, "grad_norm": 0.06991877521499737, "learning_rate": 2.5027019108033457e-06, "loss": 0.0014, "step": 212750 }, { "epoch": 1.3997092162654685, "grad_norm": 0.07050760982482843, "learning_rate": 2.5022045535921174e-06, "loss": 0.0009, "step": 212760 }, { "epoch": 1.3997750044407018, "grad_norm": 0.00977267673188463, "learning_rate": 2.5017072293121647e-06, "loss": 0.0005, "step": 212770 }, { "epoch": 1.3998407926159353, "grad_norm": 0.029698645946043763, "learning_rate": 2.501209937970044e-06, "loss": 0.0005, "step": 212780 }, { "epoch": 1.3999065807911686, "grad_norm": 0.00022803944991747006, "learning_rate": 2.5007126795723126e-06, "loss": 0.0005, "step": 212790 }, { "epoch": 1.3999723689664019, "grad_norm": 0.05010585005111903, "learning_rate": 2.500215454125526e-06, "loss": 0.0003, "step": 212800 }, { "epoch": 1.4000381571416354, "grad_norm": 0.039967315850853585, "learning_rate": 2.4997182616362374e-06, "loss": 0.0006, "step": 212810 }, { "epoch": 1.4001039453168689, "grad_norm": 0.055527620643584706, "learning_rate": 2.499221102111007e-06, "loss": 0.0007, "step": 212820 }, { "epoch": 1.4001697334921022, "grad_norm": 0.056451470221179696, "learning_rate": 2.4987239755563856e-06, "loss": 0.0005, "step": 212830 }, { "epoch": 1.4002355216673354, "grad_norm": 0.020525243494378658, "learning_rate": 2.4982268819789283e-06, "loss": 0.0005, "step": 212840 }, { "epoch": 1.400301309842569, "grad_norm": 0.004963301513090892, "learning_rate": 2.49772982138519e-06, "loss": 0.0005, "step": 212850 }, { "epoch": 1.4003670980178022, "grad_norm": 0.05890489318057827, "learning_rate": 2.4972327937817215e-06, "loss": 0.0002, "step": 212860 }, { "epoch": 1.4004328861930357, "grad_norm": 0.008326469524139488, "learning_rate": 2.496735799175078e-06, "loss": 0.0007, "step": 212870 }, { "epoch": 1.400498674368269, "grad_norm": 0.02076147511719866, "learning_rate": 2.4962388375718104e-06, "loss": 0.0006, "step": 212880 }, { "epoch": 1.4005644625435023, "grad_norm": 0.000339637491367677, "learning_rate": 2.4957419089784718e-06, "loss": 0.0008, "step": 212890 }, { "epoch": 1.4006302507187358, "grad_norm": 0.050284507405496226, "learning_rate": 2.495245013401613e-06, "loss": 0.0013, "step": 212900 }, { "epoch": 1.4006960388939693, "grad_norm": 0.03672992988078123, "learning_rate": 2.4947481508477855e-06, "loss": 0.0008, "step": 212910 }, { "epoch": 1.4007618270692026, "grad_norm": 0.023812404794550386, "learning_rate": 2.4942513213235402e-06, "loss": 0.0004, "step": 212920 }, { "epoch": 1.4008276152444359, "grad_norm": 0.04873831327127477, "learning_rate": 2.4937545248354273e-06, "loss": 0.0005, "step": 212930 }, { "epoch": 1.4008934034196694, "grad_norm": 0.07203445582968787, "learning_rate": 2.4932577613899938e-06, "loss": 0.0006, "step": 212940 }, { "epoch": 1.4009591915949027, "grad_norm": 0.010488459825186475, "learning_rate": 2.4927610309937945e-06, "loss": 0.0004, "step": 212950 }, { "epoch": 1.4010249797701362, "grad_norm": 0.04867689224096547, "learning_rate": 2.492264333653375e-06, "loss": 0.0003, "step": 212960 }, { "epoch": 1.4010907679453695, "grad_norm": 0.07513157995288347, "learning_rate": 2.4917676693752847e-06, "loss": 0.0006, "step": 212970 }, { "epoch": 1.4011565561206027, "grad_norm": 0.04791232902802122, "learning_rate": 2.4912710381660713e-06, "loss": 0.0005, "step": 212980 }, { "epoch": 1.4012223442958363, "grad_norm": 0.04781967994842765, "learning_rate": 2.4907744400322825e-06, "loss": 0.0005, "step": 212990 }, { "epoch": 1.4012881324710698, "grad_norm": 0.02476433656848358, "learning_rate": 2.4902778749804657e-06, "loss": 0.0007, "step": 213000 }, { "epoch": 1.401353920646303, "grad_norm": 0.10515296772809582, "learning_rate": 2.489781343017168e-06, "loss": 0.0009, "step": 213010 }, { "epoch": 1.4014197088215363, "grad_norm": 0.03927011741058668, "learning_rate": 2.4892848441489348e-06, "loss": 0.0007, "step": 213020 }, { "epoch": 1.4014854969967698, "grad_norm": 0.13103091005012715, "learning_rate": 2.488788378382313e-06, "loss": 0.0009, "step": 213030 }, { "epoch": 1.4015512851720031, "grad_norm": 0.016693190625851888, "learning_rate": 2.488291945723847e-06, "loss": 0.0002, "step": 213040 }, { "epoch": 1.4016170733472366, "grad_norm": 0.041185330625654305, "learning_rate": 2.487795546180083e-06, "loss": 0.0005, "step": 213050 }, { "epoch": 1.40168286152247, "grad_norm": 0.006503134725816485, "learning_rate": 2.4872991797575646e-06, "loss": 0.0005, "step": 213060 }, { "epoch": 1.4017486496977034, "grad_norm": 0.018835802422019477, "learning_rate": 2.4868028464628345e-06, "loss": 0.0005, "step": 213070 }, { "epoch": 1.4018144378729367, "grad_norm": 0.0273489672413554, "learning_rate": 2.4863065463024404e-06, "loss": 0.0006, "step": 213080 }, { "epoch": 1.4018802260481702, "grad_norm": 0.1875021204884835, "learning_rate": 2.4858102792829235e-06, "loss": 0.0012, "step": 213090 }, { "epoch": 1.4019460142234035, "grad_norm": 0.038142544112272275, "learning_rate": 2.4853140454108273e-06, "loss": 0.0007, "step": 213100 }, { "epoch": 1.4020118023986368, "grad_norm": 0.01987033934881287, "learning_rate": 2.484817844692693e-06, "loss": 0.0003, "step": 213110 }, { "epoch": 1.4020775905738703, "grad_norm": 0.051082502016881026, "learning_rate": 2.484321677135063e-06, "loss": 0.0007, "step": 213120 }, { "epoch": 1.4021433787491038, "grad_norm": 0.015849624960796945, "learning_rate": 2.4838255427444792e-06, "loss": 0.0003, "step": 213130 }, { "epoch": 1.402209166924337, "grad_norm": 0.015049062918396038, "learning_rate": 2.483329441527482e-06, "loss": 0.0003, "step": 213140 }, { "epoch": 1.4022749550995703, "grad_norm": 0.02922601061743645, "learning_rate": 2.4828333734906134e-06, "loss": 0.0007, "step": 213150 }, { "epoch": 1.4023407432748038, "grad_norm": 0.037489154751027355, "learning_rate": 2.4823373386404125e-06, "loss": 0.0005, "step": 213160 }, { "epoch": 1.4024065314500371, "grad_norm": 0.16490759693503337, "learning_rate": 2.4818413369834194e-06, "loss": 0.0006, "step": 213170 }, { "epoch": 1.4024723196252706, "grad_norm": 0.02448093022638408, "learning_rate": 2.4813453685261736e-06, "loss": 0.0006, "step": 213180 }, { "epoch": 1.402538107800504, "grad_norm": 0.0453535548106434, "learning_rate": 2.480849433275213e-06, "loss": 0.0007, "step": 213190 }, { "epoch": 1.4026038959757372, "grad_norm": 0.006978751687627392, "learning_rate": 2.4803535312370757e-06, "loss": 0.001, "step": 213200 }, { "epoch": 1.4026696841509707, "grad_norm": 0.10916345008841387, "learning_rate": 2.479857662418303e-06, "loss": 0.0005, "step": 213210 }, { "epoch": 1.4027354723262042, "grad_norm": 0.039061422921667026, "learning_rate": 2.4793618268254304e-06, "loss": 0.0004, "step": 213220 }, { "epoch": 1.4028012605014375, "grad_norm": 0.010252774385831902, "learning_rate": 2.4788660244649954e-06, "loss": 0.0008, "step": 213230 }, { "epoch": 1.4028670486766708, "grad_norm": 0.00212172562630864, "learning_rate": 2.478370255343534e-06, "loss": 0.0002, "step": 213240 }, { "epoch": 1.4029328368519043, "grad_norm": 0.017582077737473837, "learning_rate": 2.4778745194675834e-06, "loss": 0.0004, "step": 213250 }, { "epoch": 1.4029986250271376, "grad_norm": 0.07974832876854249, "learning_rate": 2.4773788168436787e-06, "loss": 0.0006, "step": 213260 }, { "epoch": 1.403064413202371, "grad_norm": 0.0027536762267492605, "learning_rate": 2.4768831474783554e-06, "loss": 0.0002, "step": 213270 }, { "epoch": 1.4031302013776044, "grad_norm": 0.03942204507557916, "learning_rate": 2.476387511378149e-06, "loss": 0.0007, "step": 213280 }, { "epoch": 1.4031959895528376, "grad_norm": 0.039205324992414324, "learning_rate": 2.4758919085495937e-06, "loss": 0.0007, "step": 213290 }, { "epoch": 1.4032617777280711, "grad_norm": 0.028608601781137076, "learning_rate": 2.4753963389992237e-06, "loss": 0.0007, "step": 213300 }, { "epoch": 1.4033275659033047, "grad_norm": 0.014021346101242713, "learning_rate": 2.4749008027335724e-06, "loss": 0.0005, "step": 213310 }, { "epoch": 1.403393354078538, "grad_norm": 0.008707804185890289, "learning_rate": 2.4744052997591734e-06, "loss": 0.0006, "step": 213320 }, { "epoch": 1.4034591422537712, "grad_norm": 0.06216089768774819, "learning_rate": 2.4739098300825578e-06, "loss": 0.0006, "step": 213330 }, { "epoch": 1.4035249304290047, "grad_norm": 0.0165683019222982, "learning_rate": 2.4734143937102608e-06, "loss": 0.0007, "step": 213340 }, { "epoch": 1.403590718604238, "grad_norm": 0.08059837825744938, "learning_rate": 2.4729189906488126e-06, "loss": 0.0007, "step": 213350 }, { "epoch": 1.4036565067794715, "grad_norm": 0.07595627849176331, "learning_rate": 2.4724236209047455e-06, "loss": 0.0004, "step": 213360 }, { "epoch": 1.4037222949547048, "grad_norm": 0.09102371835206648, "learning_rate": 2.4719282844845894e-06, "loss": 0.0007, "step": 213370 }, { "epoch": 1.4037880831299383, "grad_norm": 0.012884448518328583, "learning_rate": 2.471432981394876e-06, "loss": 0.001, "step": 213380 }, { "epoch": 1.4038538713051716, "grad_norm": 0.0012955884844630197, "learning_rate": 2.4709377116421344e-06, "loss": 0.0014, "step": 213390 }, { "epoch": 1.403919659480405, "grad_norm": 0.0033077915296725376, "learning_rate": 2.4704424752328952e-06, "loss": 0.0007, "step": 213400 }, { "epoch": 1.4039854476556384, "grad_norm": 0.010947225042934597, "learning_rate": 2.4699472721736877e-06, "loss": 0.0007, "step": 213410 }, { "epoch": 1.4040512358308717, "grad_norm": 0.0012371973300249555, "learning_rate": 2.46945210247104e-06, "loss": 0.0003, "step": 213420 }, { "epoch": 1.4041170240061052, "grad_norm": 0.049109935538619115, "learning_rate": 2.4689569661314806e-06, "loss": 0.0004, "step": 213430 }, { "epoch": 1.4041828121813384, "grad_norm": 0.03211538877100719, "learning_rate": 2.468461863161538e-06, "loss": 0.0005, "step": 213440 }, { "epoch": 1.404248600356572, "grad_norm": 0.0415770660330947, "learning_rate": 2.467966793567739e-06, "loss": 0.0006, "step": 213450 }, { "epoch": 1.4043143885318052, "grad_norm": 0.017422548880727768, "learning_rate": 2.4674717573566094e-06, "loss": 0.0006, "step": 213460 }, { "epoch": 1.4043801767070387, "grad_norm": 0.043086418310017464, "learning_rate": 2.466976754534679e-06, "loss": 0.0009, "step": 213470 }, { "epoch": 1.404445964882272, "grad_norm": 0.0006044398771633112, "learning_rate": 2.4664817851084726e-06, "loss": 0.0006, "step": 213480 }, { "epoch": 1.4045117530575055, "grad_norm": 0.03617847633500194, "learning_rate": 2.4659868490845163e-06, "loss": 0.0007, "step": 213490 }, { "epoch": 1.4045775412327388, "grad_norm": 0.07341572355826419, "learning_rate": 2.4654919464693343e-06, "loss": 0.0009, "step": 213500 }, { "epoch": 1.404643329407972, "grad_norm": 0.0007293366850529193, "learning_rate": 2.464997077269452e-06, "loss": 0.0006, "step": 213510 }, { "epoch": 1.4047091175832056, "grad_norm": 0.05905838491765266, "learning_rate": 2.4645022414913937e-06, "loss": 0.0004, "step": 213520 }, { "epoch": 1.404774905758439, "grad_norm": 0.008671734664818355, "learning_rate": 2.464007439141684e-06, "loss": 0.0217, "step": 213530 }, { "epoch": 1.4048406939336724, "grad_norm": 0.031229040391480372, "learning_rate": 2.463512670226846e-06, "loss": 0.0008, "step": 213540 }, { "epoch": 1.4049064821089057, "grad_norm": 0.014536584510565996, "learning_rate": 2.4630179347534028e-06, "loss": 0.0006, "step": 213550 }, { "epoch": 1.4049722702841392, "grad_norm": 0.02631712226204439, "learning_rate": 2.462523232727877e-06, "loss": 0.0006, "step": 213560 }, { "epoch": 1.4050380584593725, "grad_norm": 0.12160811119897243, "learning_rate": 2.46202856415679e-06, "loss": 0.0008, "step": 213570 }, { "epoch": 1.405103846634606, "grad_norm": 0.02362009662856136, "learning_rate": 2.4615339290466654e-06, "loss": 0.0008, "step": 213580 }, { "epoch": 1.4051696348098393, "grad_norm": 0.028673728328984387, "learning_rate": 2.461039327404022e-06, "loss": 0.0005, "step": 213590 }, { "epoch": 1.4052354229850725, "grad_norm": 0.03004098018644396, "learning_rate": 2.4605447592353835e-06, "loss": 0.0005, "step": 213600 }, { "epoch": 1.405301211160306, "grad_norm": 0.05442122094108359, "learning_rate": 2.460050224547269e-06, "loss": 0.0003, "step": 213610 }, { "epoch": 1.4053669993355395, "grad_norm": 0.00039291747913018383, "learning_rate": 2.459555723346199e-06, "loss": 0.0005, "step": 213620 }, { "epoch": 1.4054327875107728, "grad_norm": 0.0037258615837504807, "learning_rate": 2.459061255638693e-06, "loss": 0.0004, "step": 213630 }, { "epoch": 1.4054985756860061, "grad_norm": 0.0006249174597733701, "learning_rate": 2.4585668214312692e-06, "loss": 0.0002, "step": 213640 }, { "epoch": 1.4055643638612396, "grad_norm": 0.012858236377437432, "learning_rate": 2.4580724207304475e-06, "loss": 0.0002, "step": 213650 }, { "epoch": 1.405630152036473, "grad_norm": 0.024193365607820302, "learning_rate": 2.457578053542745e-06, "loss": 0.0002, "step": 213660 }, { "epoch": 1.4056959402117064, "grad_norm": 0.005496951143158406, "learning_rate": 2.4570837198746806e-06, "loss": 0.0003, "step": 213670 }, { "epoch": 1.4057617283869397, "grad_norm": 0.03178609875482873, "learning_rate": 2.456589419732771e-06, "loss": 0.0006, "step": 213680 }, { "epoch": 1.4058275165621732, "grad_norm": 0.002363308227375991, "learning_rate": 2.456095153123533e-06, "loss": 0.0003, "step": 213690 }, { "epoch": 1.4058933047374065, "grad_norm": 0.021395742150805003, "learning_rate": 2.4556009200534836e-06, "loss": 0.0004, "step": 213700 }, { "epoch": 1.40595909291264, "grad_norm": 0.04562443241580596, "learning_rate": 2.4551067205291383e-06, "loss": 0.0004, "step": 213710 }, { "epoch": 1.4060248810878733, "grad_norm": 0.06108369447901602, "learning_rate": 2.4546125545570116e-06, "loss": 0.0008, "step": 213720 }, { "epoch": 1.4060906692631066, "grad_norm": 0.008734123068339811, "learning_rate": 2.4541184221436217e-06, "loss": 0.0002, "step": 213730 }, { "epoch": 1.40615645743834, "grad_norm": 0.02137917478430144, "learning_rate": 2.4536243232954814e-06, "loss": 0.0007, "step": 213740 }, { "epoch": 1.4062222456135733, "grad_norm": 0.021282054127273707, "learning_rate": 2.453130258019106e-06, "loss": 0.0005, "step": 213750 }, { "epoch": 1.4062880337888068, "grad_norm": 0.09484156522233989, "learning_rate": 2.4526362263210072e-06, "loss": 0.0006, "step": 213760 }, { "epoch": 1.4063538219640401, "grad_norm": 0.0328566915376754, "learning_rate": 2.4521422282077006e-06, "loss": 0.0002, "step": 213770 }, { "epoch": 1.4064196101392736, "grad_norm": 0.09524204971687923, "learning_rate": 2.451648263685698e-06, "loss": 0.0006, "step": 213780 }, { "epoch": 1.406485398314507, "grad_norm": 0.04510792338485681, "learning_rate": 2.451154332761512e-06, "loss": 0.0006, "step": 213790 }, { "epoch": 1.4065511864897404, "grad_norm": 0.020949442996334975, "learning_rate": 2.450660435441655e-06, "loss": 0.0006, "step": 213800 }, { "epoch": 1.4066169746649737, "grad_norm": 0.0006089341302297416, "learning_rate": 2.450166571732639e-06, "loss": 0.0005, "step": 213810 }, { "epoch": 1.406682762840207, "grad_norm": 0.035783480078238145, "learning_rate": 2.4496727416409737e-06, "loss": 0.0011, "step": 213820 }, { "epoch": 1.4067485510154405, "grad_norm": 0.0164475767886832, "learning_rate": 2.449178945173171e-06, "loss": 0.0009, "step": 213830 }, { "epoch": 1.406814339190674, "grad_norm": 0.019361801169354573, "learning_rate": 2.448685182335741e-06, "loss": 0.0005, "step": 213840 }, { "epoch": 1.4068801273659073, "grad_norm": 0.009136130856599995, "learning_rate": 2.4481914531351915e-06, "loss": 0.0005, "step": 213850 }, { "epoch": 1.4069459155411406, "grad_norm": 0.0035350739219474746, "learning_rate": 2.4476977575780353e-06, "loss": 0.0003, "step": 213860 }, { "epoch": 1.407011703716374, "grad_norm": 0.022664044429730905, "learning_rate": 2.4472040956707804e-06, "loss": 0.0003, "step": 213870 }, { "epoch": 1.4070774918916074, "grad_norm": 0.06422202038782779, "learning_rate": 2.4467104674199346e-06, "loss": 0.0009, "step": 213880 }, { "epoch": 1.4071432800668409, "grad_norm": 0.055795476598574756, "learning_rate": 2.4462168728320055e-06, "loss": 0.0008, "step": 213890 }, { "epoch": 1.4072090682420741, "grad_norm": 0.031082639653609287, "learning_rate": 2.4457233119135023e-06, "loss": 0.0007, "step": 213900 }, { "epoch": 1.4072748564173074, "grad_norm": 0.02339674745316845, "learning_rate": 2.4452297846709305e-06, "loss": 0.0003, "step": 213910 }, { "epoch": 1.407340644592541, "grad_norm": 0.026247143940215302, "learning_rate": 2.444736291110798e-06, "loss": 0.0004, "step": 213920 }, { "epoch": 1.4074064327677744, "grad_norm": 0.03616218780450688, "learning_rate": 2.4442428312396107e-06, "loss": 0.0004, "step": 213930 }, { "epoch": 1.4074722209430077, "grad_norm": 0.01942126456140098, "learning_rate": 2.443749405063874e-06, "loss": 0.0004, "step": 213940 }, { "epoch": 1.407538009118241, "grad_norm": 0.009347724403829982, "learning_rate": 2.443256012590093e-06, "loss": 0.0007, "step": 213950 }, { "epoch": 1.4076037972934745, "grad_norm": 0.024990293019055157, "learning_rate": 2.4427626538247747e-06, "loss": 0.0004, "step": 213960 }, { "epoch": 1.4076695854687078, "grad_norm": 0.006930305675020057, "learning_rate": 2.4422693287744215e-06, "loss": 0.0004, "step": 213970 }, { "epoch": 1.4077353736439413, "grad_norm": 0.00012870735910848867, "learning_rate": 2.4417760374455362e-06, "loss": 0.0022, "step": 213980 }, { "epoch": 1.4078011618191746, "grad_norm": 0.03839115682746753, "learning_rate": 2.441282779844627e-06, "loss": 0.0004, "step": 213990 }, { "epoch": 1.4078669499944079, "grad_norm": 0.011458697614263961, "learning_rate": 2.440789555978194e-06, "loss": 0.0004, "step": 214000 }, { "epoch": 1.4079327381696414, "grad_norm": 0.060235866542600484, "learning_rate": 2.4402963658527403e-06, "loss": 0.0011, "step": 214010 }, { "epoch": 1.4079985263448749, "grad_norm": 0.005261782194423603, "learning_rate": 2.4398032094747688e-06, "loss": 0.0013, "step": 214020 }, { "epoch": 1.4080643145201082, "grad_norm": 0.07208719907436398, "learning_rate": 2.4393100868507803e-06, "loss": 0.0011, "step": 214030 }, { "epoch": 1.4081301026953414, "grad_norm": 0.034614408052353975, "learning_rate": 2.438816997987277e-06, "loss": 0.0007, "step": 214040 }, { "epoch": 1.408195890870575, "grad_norm": 0.15791574701980482, "learning_rate": 2.4383239428907596e-06, "loss": 0.0005, "step": 214050 }, { "epoch": 1.4082616790458082, "grad_norm": 0.01836034346393458, "learning_rate": 2.4378309215677285e-06, "loss": 0.0007, "step": 214060 }, { "epoch": 1.4083274672210417, "grad_norm": 0.040848444900779955, "learning_rate": 2.437337934024684e-06, "loss": 0.0003, "step": 214070 }, { "epoch": 1.408393255396275, "grad_norm": 0.024898943326356504, "learning_rate": 2.4368449802681255e-06, "loss": 0.0005, "step": 214080 }, { "epoch": 1.4084590435715085, "grad_norm": 0.06938184861818122, "learning_rate": 2.4363520603045526e-06, "loss": 0.0006, "step": 214090 }, { "epoch": 1.4085248317467418, "grad_norm": 0.005200738790945358, "learning_rate": 2.4358591741404632e-06, "loss": 0.0008, "step": 214100 }, { "epoch": 1.4085906199219753, "grad_norm": 0.004598820889490401, "learning_rate": 2.435366321782354e-06, "loss": 0.0005, "step": 214110 }, { "epoch": 1.4086564080972086, "grad_norm": 0.09027639377911648, "learning_rate": 2.434873503236727e-06, "loss": 0.0007, "step": 214120 }, { "epoch": 1.4087221962724419, "grad_norm": 0.05411683935621028, "learning_rate": 2.4343807185100782e-06, "loss": 0.0008, "step": 214130 }, { "epoch": 1.4087879844476754, "grad_norm": 0.03377195484378524, "learning_rate": 2.433887967608903e-06, "loss": 0.0002, "step": 214140 }, { "epoch": 1.408853772622909, "grad_norm": 0.015759815701740693, "learning_rate": 2.433395250539699e-06, "loss": 0.0003, "step": 214150 }, { "epoch": 1.4089195607981422, "grad_norm": 0.06609684833492963, "learning_rate": 2.4329025673089618e-06, "loss": 0.0005, "step": 214160 }, { "epoch": 1.4089853489733755, "grad_norm": 0.0020930464158762084, "learning_rate": 2.432409917923187e-06, "loss": 0.0001, "step": 214170 }, { "epoch": 1.409051137148609, "grad_norm": 0.005808030217860028, "learning_rate": 2.4319173023888705e-06, "loss": 0.0005, "step": 214180 }, { "epoch": 1.4091169253238423, "grad_norm": 0.029889472460107588, "learning_rate": 2.431424720712506e-06, "loss": 0.0006, "step": 214190 }, { "epoch": 1.4091827134990758, "grad_norm": 0.0013126882457883701, "learning_rate": 2.430932172900588e-06, "loss": 0.0006, "step": 214200 }, { "epoch": 1.409248501674309, "grad_norm": 0.011786476473236211, "learning_rate": 2.430439658959611e-06, "loss": 0.0006, "step": 214210 }, { "epoch": 1.4093142898495423, "grad_norm": 0.02354939671388363, "learning_rate": 2.4299471788960673e-06, "loss": 0.0007, "step": 214220 }, { "epoch": 1.4093800780247758, "grad_norm": 0.04579641149700544, "learning_rate": 2.429454732716451e-06, "loss": 0.0009, "step": 214230 }, { "epoch": 1.4094458662000093, "grad_norm": 0.027650941534461966, "learning_rate": 2.4289623204272515e-06, "loss": 0.0006, "step": 214240 }, { "epoch": 1.4095116543752426, "grad_norm": 0.07174454513298228, "learning_rate": 2.4284699420349657e-06, "loss": 0.0014, "step": 214250 }, { "epoch": 1.409577442550476, "grad_norm": 0.010836328172382469, "learning_rate": 2.4279775975460824e-06, "loss": 0.0006, "step": 214260 }, { "epoch": 1.4096432307257094, "grad_norm": 0.027327844438788246, "learning_rate": 2.4274852869670933e-06, "loss": 0.0004, "step": 214270 }, { "epoch": 1.4097090189009427, "grad_norm": 0.0009094775822784608, "learning_rate": 2.4269930103044894e-06, "loss": 0.0003, "step": 214280 }, { "epoch": 1.4097748070761762, "grad_norm": 0.006141499528440117, "learning_rate": 2.42650076756476e-06, "loss": 0.0011, "step": 214290 }, { "epoch": 1.4098405952514095, "grad_norm": 0.0077804816400413234, "learning_rate": 2.426008558754395e-06, "loss": 0.0004, "step": 214300 }, { "epoch": 1.4099063834266428, "grad_norm": 0.02361312102189928, "learning_rate": 2.4255163838798844e-06, "loss": 0.0006, "step": 214310 }, { "epoch": 1.4099721716018763, "grad_norm": 0.004741502051586997, "learning_rate": 2.4250242429477173e-06, "loss": 0.0022, "step": 214320 }, { "epoch": 1.4100379597771098, "grad_norm": 0.021497719683323153, "learning_rate": 2.4245321359643813e-06, "loss": 0.0005, "step": 214330 }, { "epoch": 1.410103747952343, "grad_norm": 0.005567567593681795, "learning_rate": 2.4240400629363647e-06, "loss": 0.0002, "step": 214340 }, { "epoch": 1.4101695361275763, "grad_norm": 0.06130104744811915, "learning_rate": 2.4235480238701552e-06, "loss": 0.0004, "step": 214350 }, { "epoch": 1.4102353243028098, "grad_norm": 0.025595536629058755, "learning_rate": 2.4230560187722396e-06, "loss": 0.0014, "step": 214360 }, { "epoch": 1.4103011124780431, "grad_norm": 0.06516801908060467, "learning_rate": 2.422564047649103e-06, "loss": 0.0006, "step": 214370 }, { "epoch": 1.4103669006532766, "grad_norm": 0.02303306746039474, "learning_rate": 2.422072110507236e-06, "loss": 0.0007, "step": 214380 }, { "epoch": 1.41043268882851, "grad_norm": 0.038212275481821545, "learning_rate": 2.421580207353121e-06, "loss": 0.0013, "step": 214390 }, { "epoch": 1.4104984770037434, "grad_norm": 0.001356101678242011, "learning_rate": 2.4210883381932445e-06, "loss": 0.0005, "step": 214400 }, { "epoch": 1.4105642651789767, "grad_norm": 0.1685335875613498, "learning_rate": 2.4205965030340904e-06, "loss": 0.001, "step": 214410 }, { "epoch": 1.4106300533542102, "grad_norm": 0.006733788443722742, "learning_rate": 2.4201047018821437e-06, "loss": 0.0011, "step": 214420 }, { "epoch": 1.4106958415294435, "grad_norm": 0.0014478213850047954, "learning_rate": 2.419612934743889e-06, "loss": 0.0007, "step": 214430 }, { "epoch": 1.4107616297046768, "grad_norm": 0.05309817948384867, "learning_rate": 2.419121201625809e-06, "loss": 0.0005, "step": 214440 }, { "epoch": 1.4108274178799103, "grad_norm": 0.020085883485042966, "learning_rate": 2.4186295025343885e-06, "loss": 0.0004, "step": 214450 }, { "epoch": 1.4108932060551436, "grad_norm": 0.03801284188578775, "learning_rate": 2.418137837476107e-06, "loss": 0.0003, "step": 214460 }, { "epoch": 1.410958994230377, "grad_norm": 0.06530501930494445, "learning_rate": 2.417646206457447e-06, "loss": 0.0007, "step": 214470 }, { "epoch": 1.4110247824056104, "grad_norm": 0.010437601195772129, "learning_rate": 2.4171546094848924e-06, "loss": 0.0006, "step": 214480 }, { "epoch": 1.4110905705808439, "grad_norm": 0.18687969959611178, "learning_rate": 2.4166630465649236e-06, "loss": 0.0013, "step": 214490 }, { "epoch": 1.4111563587560771, "grad_norm": 0.01016042039822084, "learning_rate": 2.4161715177040218e-06, "loss": 0.0009, "step": 214500 }, { "epoch": 1.4112221469313107, "grad_norm": 0.021608515186082238, "learning_rate": 2.415680022908667e-06, "loss": 0.0005, "step": 214510 }, { "epoch": 1.411287935106544, "grad_norm": 0.03771359471210289, "learning_rate": 2.4151885621853393e-06, "loss": 0.0006, "step": 214520 }, { "epoch": 1.4113537232817772, "grad_norm": 0.005393907513571287, "learning_rate": 2.4146971355405174e-06, "loss": 0.0004, "step": 214530 }, { "epoch": 1.4114195114570107, "grad_norm": 0.05033038534372635, "learning_rate": 2.4142057429806816e-06, "loss": 0.0004, "step": 214540 }, { "epoch": 1.4114852996322442, "grad_norm": 0.002983885969507032, "learning_rate": 2.4137143845123096e-06, "loss": 0.0002, "step": 214550 }, { "epoch": 1.4115510878074775, "grad_norm": 0.044252473722841294, "learning_rate": 2.41322306014188e-06, "loss": 0.0005, "step": 214560 }, { "epoch": 1.4116168759827108, "grad_norm": 0.07048680250734722, "learning_rate": 2.4127317698758695e-06, "loss": 0.0011, "step": 214570 }, { "epoch": 1.4116826641579443, "grad_norm": 0.02683534204931293, "learning_rate": 2.4122405137207567e-06, "loss": 0.0006, "step": 214580 }, { "epoch": 1.4117484523331776, "grad_norm": 0.013121201665571246, "learning_rate": 2.4117492916830176e-06, "loss": 0.0004, "step": 214590 }, { "epoch": 1.411814240508411, "grad_norm": 0.013490618646974279, "learning_rate": 2.4112581037691274e-06, "loss": 0.0007, "step": 214600 }, { "epoch": 1.4118800286836444, "grad_norm": 0.02022249877657004, "learning_rate": 2.4107669499855645e-06, "loss": 0.0021, "step": 214610 }, { "epoch": 1.4119458168588777, "grad_norm": 0.017803030887878463, "learning_rate": 2.4102758303388037e-06, "loss": 0.0004, "step": 214620 }, { "epoch": 1.4120116050341112, "grad_norm": 0.01970149944520732, "learning_rate": 2.409784744835319e-06, "loss": 0.0006, "step": 214630 }, { "epoch": 1.4120773932093447, "grad_norm": 0.06379780001013194, "learning_rate": 2.409293693481585e-06, "loss": 0.0003, "step": 214640 }, { "epoch": 1.412143181384578, "grad_norm": 0.025446113023839798, "learning_rate": 2.408802676284077e-06, "loss": 0.0006, "step": 214650 }, { "epoch": 1.4122089695598112, "grad_norm": 0.00567787002126383, "learning_rate": 2.4083116932492677e-06, "loss": 0.0004, "step": 214660 }, { "epoch": 1.4122747577350447, "grad_norm": 0.009067011249500176, "learning_rate": 2.40782074438363e-06, "loss": 0.0004, "step": 214670 }, { "epoch": 1.412340545910278, "grad_norm": 0.04178495773711037, "learning_rate": 2.4073298296936377e-06, "loss": 0.0003, "step": 214680 }, { "epoch": 1.4124063340855115, "grad_norm": 0.03043456416902993, "learning_rate": 2.406838949185762e-06, "loss": 0.0003, "step": 214690 }, { "epoch": 1.4124721222607448, "grad_norm": 0.058302193070676495, "learning_rate": 2.406348102866475e-06, "loss": 0.0003, "step": 214700 }, { "epoch": 1.4125379104359783, "grad_norm": 0.005336407626023544, "learning_rate": 2.405857290742248e-06, "loss": 0.0001, "step": 214710 }, { "epoch": 1.4126036986112116, "grad_norm": 0.03355911354286195, "learning_rate": 2.4053665128195526e-06, "loss": 0.001, "step": 214720 }, { "epoch": 1.412669486786445, "grad_norm": 0.024916792932944153, "learning_rate": 2.404875769104857e-06, "loss": 0.0003, "step": 214730 }, { "epoch": 1.4127352749616784, "grad_norm": 0.002350849148520333, "learning_rate": 2.404385059604635e-06, "loss": 0.0012, "step": 214740 }, { "epoch": 1.4128010631369117, "grad_norm": 0.012743792373336563, "learning_rate": 2.403894384325354e-06, "loss": 0.0005, "step": 214750 }, { "epoch": 1.4128668513121452, "grad_norm": 0.03351811388291035, "learning_rate": 2.4034037432734835e-06, "loss": 0.0008, "step": 214760 }, { "epoch": 1.4129326394873785, "grad_norm": 0.018152557734947068, "learning_rate": 2.402913136455492e-06, "loss": 0.0007, "step": 214770 }, { "epoch": 1.412998427662612, "grad_norm": 0.01658842774935471, "learning_rate": 2.4024225638778483e-06, "loss": 0.0006, "step": 214780 }, { "epoch": 1.4130642158378452, "grad_norm": 0.03747285987551891, "learning_rate": 2.4019320255470192e-06, "loss": 0.0004, "step": 214790 }, { "epoch": 1.4131300040130788, "grad_norm": 0.04761795869974317, "learning_rate": 2.4014415214694724e-06, "loss": 0.0004, "step": 214800 }, { "epoch": 1.413195792188312, "grad_norm": 0.052629706339337404, "learning_rate": 2.4009510516516753e-06, "loss": 0.0007, "step": 214810 }, { "epoch": 1.4132615803635455, "grad_norm": 0.10734848226078976, "learning_rate": 2.400460616100094e-06, "loss": 0.0008, "step": 214820 }, { "epoch": 1.4133273685387788, "grad_norm": 0.04622854357168021, "learning_rate": 2.399970214821194e-06, "loss": 0.0005, "step": 214830 }, { "epoch": 1.4133931567140121, "grad_norm": 0.023774154426461497, "learning_rate": 2.399479847821441e-06, "loss": 0.0007, "step": 214840 }, { "epoch": 1.4134589448892456, "grad_norm": 0.040419386638705423, "learning_rate": 2.3989895151073004e-06, "loss": 0.0006, "step": 214850 }, { "epoch": 1.4135247330644791, "grad_norm": 0.0010991521034396164, "learning_rate": 2.398499216685235e-06, "loss": 0.0003, "step": 214860 }, { "epoch": 1.4135905212397124, "grad_norm": 0.029360133474576236, "learning_rate": 2.3980089525617125e-06, "loss": 0.0003, "step": 214870 }, { "epoch": 1.4136563094149457, "grad_norm": 0.027453598092942125, "learning_rate": 2.3975187227431946e-06, "loss": 0.0004, "step": 214880 }, { "epoch": 1.4137220975901792, "grad_norm": 0.0004781996923187121, "learning_rate": 2.3970285272361442e-06, "loss": 0.0003, "step": 214890 }, { "epoch": 1.4137878857654125, "grad_norm": 0.00018348336894704026, "learning_rate": 2.3965383660470255e-06, "loss": 0.0005, "step": 214900 }, { "epoch": 1.413853673940646, "grad_norm": 0.03403502368290179, "learning_rate": 2.396048239182299e-06, "loss": 0.0007, "step": 214910 }, { "epoch": 1.4139194621158793, "grad_norm": 0.025363678325531516, "learning_rate": 2.3955581466484283e-06, "loss": 0.0008, "step": 214920 }, { "epoch": 1.4139852502911125, "grad_norm": 0.004910897201821566, "learning_rate": 2.395068088451874e-06, "loss": 0.0001, "step": 214930 }, { "epoch": 1.414051038466346, "grad_norm": 9.22072568129296e-05, "learning_rate": 2.3945780645990975e-06, "loss": 0.0008, "step": 214940 }, { "epoch": 1.4141168266415796, "grad_norm": 0.011404561238488204, "learning_rate": 2.3940880750965585e-06, "loss": 0.0004, "step": 214950 }, { "epoch": 1.4141826148168128, "grad_norm": 0.023899270839456716, "learning_rate": 2.3935981199507173e-06, "loss": 0.0008, "step": 214960 }, { "epoch": 1.4142484029920461, "grad_norm": 0.022487210309571785, "learning_rate": 2.393108199168034e-06, "loss": 0.0006, "step": 214970 }, { "epoch": 1.4143141911672796, "grad_norm": 0.06248228571054577, "learning_rate": 2.3926183127549686e-06, "loss": 0.0007, "step": 214980 }, { "epoch": 1.414379979342513, "grad_norm": 0.0033253539303848287, "learning_rate": 2.3921284607179763e-06, "loss": 0.0008, "step": 214990 }, { "epoch": 1.4144457675177464, "grad_norm": 0.02878434462443559, "learning_rate": 2.3916386430635196e-06, "loss": 0.0027, "step": 215000 }, { "epoch": 1.4145115556929797, "grad_norm": 0.021111435172487997, "learning_rate": 2.3911488597980547e-06, "loss": 0.0015, "step": 215010 }, { "epoch": 1.4145773438682132, "grad_norm": 0.00037890943737223237, "learning_rate": 2.3906591109280393e-06, "loss": 0.0006, "step": 215020 }, { "epoch": 1.4146431320434465, "grad_norm": 0.006308006015921706, "learning_rate": 2.39016939645993e-06, "loss": 0.0007, "step": 215030 }, { "epoch": 1.41470892021868, "grad_norm": 0.0024701402688347552, "learning_rate": 2.3896797164001827e-06, "loss": 0.0005, "step": 215040 }, { "epoch": 1.4147747083939133, "grad_norm": 0.04402994624013021, "learning_rate": 2.389190070755254e-06, "loss": 0.0008, "step": 215050 }, { "epoch": 1.4148404965691466, "grad_norm": 0.20226777086110082, "learning_rate": 2.3887004595315997e-06, "loss": 0.0012, "step": 215060 }, { "epoch": 1.41490628474438, "grad_norm": 0.014535553100304995, "learning_rate": 2.388210882735674e-06, "loss": 0.0005, "step": 215070 }, { "epoch": 1.4149720729196134, "grad_norm": 0.016950097439668396, "learning_rate": 2.387721340373933e-06, "loss": 0.0005, "step": 215080 }, { "epoch": 1.4150378610948469, "grad_norm": 0.00029296664448336536, "learning_rate": 2.38723183245283e-06, "loss": 0.0007, "step": 215090 }, { "epoch": 1.4151036492700801, "grad_norm": 0.0027886526003543235, "learning_rate": 2.386742358978818e-06, "loss": 0.0005, "step": 215100 }, { "epoch": 1.4151694374453136, "grad_norm": 0.031729935368640064, "learning_rate": 2.386252919958351e-06, "loss": 0.0004, "step": 215110 }, { "epoch": 1.415235225620547, "grad_norm": 0.014877267022656621, "learning_rate": 2.385763515397881e-06, "loss": 0.0004, "step": 215120 }, { "epoch": 1.4153010137957804, "grad_norm": 0.05435859292042059, "learning_rate": 2.385274145303862e-06, "loss": 0.0006, "step": 215130 }, { "epoch": 1.4153668019710137, "grad_norm": 0.0771706284840041, "learning_rate": 2.3847848096827457e-06, "loss": 0.0011, "step": 215140 }, { "epoch": 1.415432590146247, "grad_norm": 0.01905823345479069, "learning_rate": 2.384295508540983e-06, "loss": 0.0005, "step": 215150 }, { "epoch": 1.4154983783214805, "grad_norm": 0.02891851257557547, "learning_rate": 2.383806241885025e-06, "loss": 0.0004, "step": 215160 }, { "epoch": 1.415564166496714, "grad_norm": 0.028273791679573754, "learning_rate": 2.383317009721322e-06, "loss": 0.0004, "step": 215170 }, { "epoch": 1.4156299546719473, "grad_norm": 0.008043901716582931, "learning_rate": 2.3828278120563247e-06, "loss": 0.0005, "step": 215180 }, { "epoch": 1.4156957428471806, "grad_norm": 0.000490910338758014, "learning_rate": 2.3823386488964824e-06, "loss": 0.0005, "step": 215190 }, { "epoch": 1.415761531022414, "grad_norm": 0.00860463591347274, "learning_rate": 2.381849520248244e-06, "loss": 0.0006, "step": 215200 }, { "epoch": 1.4158273191976474, "grad_norm": 0.012569419346214238, "learning_rate": 2.3813604261180586e-06, "loss": 0.0003, "step": 215210 }, { "epoch": 1.4158931073728809, "grad_norm": 0.1457246564730341, "learning_rate": 2.380871366512374e-06, "loss": 0.0015, "step": 215220 }, { "epoch": 1.4159588955481142, "grad_norm": 0.007796170645861975, "learning_rate": 2.3803823414376385e-06, "loss": 0.0009, "step": 215230 }, { "epoch": 1.4160246837233474, "grad_norm": 0.02389712176226916, "learning_rate": 2.3798933509002996e-06, "loss": 0.0005, "step": 215240 }, { "epoch": 1.416090471898581, "grad_norm": 0.008525077943653996, "learning_rate": 2.379404394906802e-06, "loss": 0.0004, "step": 215250 }, { "epoch": 1.4161562600738145, "grad_norm": 0.011590376589878193, "learning_rate": 2.3789154734635963e-06, "loss": 0.0005, "step": 215260 }, { "epoch": 1.4162220482490477, "grad_norm": 0.0007880522019857389, "learning_rate": 2.378426586577126e-06, "loss": 0.0004, "step": 215270 }, { "epoch": 1.416287836424281, "grad_norm": 0.009929181116260899, "learning_rate": 2.3779377342538367e-06, "loss": 0.0005, "step": 215280 }, { "epoch": 1.4163536245995145, "grad_norm": 0.020153015407330062, "learning_rate": 2.3774489165001745e-06, "loss": 0.0004, "step": 215290 }, { "epoch": 1.4164194127747478, "grad_norm": 0.030254960661275743, "learning_rate": 2.376960133322583e-06, "loss": 0.0004, "step": 215300 }, { "epoch": 1.4164852009499813, "grad_norm": 0.032353613346065825, "learning_rate": 2.3764713847275073e-06, "loss": 0.0005, "step": 215310 }, { "epoch": 1.4165509891252146, "grad_norm": 0.005935701170775488, "learning_rate": 2.3759826707213902e-06, "loss": 0.0014, "step": 215320 }, { "epoch": 1.4166167773004479, "grad_norm": 0.052859358653078, "learning_rate": 2.375493991310675e-06, "loss": 0.0005, "step": 215330 }, { "epoch": 1.4166825654756814, "grad_norm": 0.043511886111444194, "learning_rate": 2.3750053465018054e-06, "loss": 0.0009, "step": 215340 }, { "epoch": 1.416748353650915, "grad_norm": 0.005356473372117561, "learning_rate": 2.3745167363012227e-06, "loss": 0.0005, "step": 215350 }, { "epoch": 1.4168141418261482, "grad_norm": 0.02787914136605225, "learning_rate": 2.3740281607153694e-06, "loss": 0.0006, "step": 215360 }, { "epoch": 1.4168799300013815, "grad_norm": 0.035450244896070494, "learning_rate": 2.3735396197506872e-06, "loss": 0.0005, "step": 215370 }, { "epoch": 1.416945718176615, "grad_norm": 0.018905322065568997, "learning_rate": 2.373051113413615e-06, "loss": 0.0004, "step": 215380 }, { "epoch": 1.4170115063518482, "grad_norm": 0.0007701037253535881, "learning_rate": 2.372562641710596e-06, "loss": 0.0014, "step": 215390 }, { "epoch": 1.4170772945270818, "grad_norm": 0.007298836872152118, "learning_rate": 2.3720742046480706e-06, "loss": 0.0005, "step": 215400 }, { "epoch": 1.417143082702315, "grad_norm": 0.14005318036195707, "learning_rate": 2.371585802232476e-06, "loss": 0.0009, "step": 215410 }, { "epoch": 1.4172088708775485, "grad_norm": 0.028831690382845464, "learning_rate": 2.3710974344702535e-06, "loss": 0.0005, "step": 215420 }, { "epoch": 1.4172746590527818, "grad_norm": 0.04504866110557309, "learning_rate": 2.3706091013678407e-06, "loss": 0.0006, "step": 215430 }, { "epoch": 1.4173404472280153, "grad_norm": 0.05956075220319299, "learning_rate": 2.370120802931676e-06, "loss": 0.0006, "step": 215440 }, { "epoch": 1.4174062354032486, "grad_norm": 0.006512381898718788, "learning_rate": 2.3696325391681967e-06, "loss": 0.0004, "step": 215450 }, { "epoch": 1.417472023578482, "grad_norm": 0.03516357713548765, "learning_rate": 2.369144310083841e-06, "loss": 0.0005, "step": 215460 }, { "epoch": 1.4175378117537154, "grad_norm": 0.011166997414024663, "learning_rate": 2.368656115685045e-06, "loss": 0.0007, "step": 215470 }, { "epoch": 1.4176035999289487, "grad_norm": 0.01950489631473218, "learning_rate": 2.368167955978246e-06, "loss": 0.0007, "step": 215480 }, { "epoch": 1.4176693881041822, "grad_norm": 0.015572412635309247, "learning_rate": 2.3676798309698795e-06, "loss": 0.0004, "step": 215490 }, { "epoch": 1.4177351762794155, "grad_norm": 0.07713573388974958, "learning_rate": 2.367191740666381e-06, "loss": 0.0006, "step": 215500 }, { "epoch": 1.417800964454649, "grad_norm": 0.01925771889600532, "learning_rate": 2.366703685074184e-06, "loss": 0.0003, "step": 215510 }, { "epoch": 1.4178667526298823, "grad_norm": 0.03028200505798504, "learning_rate": 2.3662156641997257e-06, "loss": 0.0006, "step": 215520 }, { "epoch": 1.4179325408051158, "grad_norm": 0.03497614182595879, "learning_rate": 2.36572767804944e-06, "loss": 0.0009, "step": 215530 }, { "epoch": 1.417998328980349, "grad_norm": 0.05981480003961154, "learning_rate": 2.365239726629759e-06, "loss": 0.0007, "step": 215540 }, { "epoch": 1.4180641171555823, "grad_norm": 0.0655280151569934, "learning_rate": 2.3647518099471173e-06, "loss": 0.0004, "step": 215550 }, { "epoch": 1.4181299053308158, "grad_norm": 0.012945439460955646, "learning_rate": 2.3642639280079464e-06, "loss": 0.0004, "step": 215560 }, { "epoch": 1.4181956935060493, "grad_norm": 0.02199149251085001, "learning_rate": 2.36377608081868e-06, "loss": 0.0005, "step": 215570 }, { "epoch": 1.4182614816812826, "grad_norm": 0.008840012338824, "learning_rate": 2.3632882683857482e-06, "loss": 0.0003, "step": 215580 }, { "epoch": 1.418327269856516, "grad_norm": 0.00029526018278133666, "learning_rate": 2.3628004907155844e-06, "loss": 0.0003, "step": 215590 }, { "epoch": 1.4183930580317494, "grad_norm": 0.006795857932698889, "learning_rate": 2.362312747814618e-06, "loss": 0.0005, "step": 215600 }, { "epoch": 1.4184588462069827, "grad_norm": 0.014176895779944137, "learning_rate": 2.36182503968928e-06, "loss": 0.0008, "step": 215610 }, { "epoch": 1.4185246343822162, "grad_norm": 0.01979449221956799, "learning_rate": 2.3613373663460002e-06, "loss": 0.0004, "step": 215620 }, { "epoch": 1.4185904225574495, "grad_norm": 0.11038275612643164, "learning_rate": 2.3608497277912092e-06, "loss": 0.0008, "step": 215630 }, { "epoch": 1.4186562107326828, "grad_norm": 0.00506394937714379, "learning_rate": 2.3603621240313323e-06, "loss": 0.0008, "step": 215640 }, { "epoch": 1.4187219989079163, "grad_norm": 0.013195113779268155, "learning_rate": 2.359874555072803e-06, "loss": 0.0008, "step": 215650 }, { "epoch": 1.4187877870831498, "grad_norm": 0.06045330429712753, "learning_rate": 2.359387020922048e-06, "loss": 0.0005, "step": 215660 }, { "epoch": 1.418853575258383, "grad_norm": 0.024320049177791667, "learning_rate": 2.3588995215854944e-06, "loss": 0.0002, "step": 215670 }, { "epoch": 1.4189193634336164, "grad_norm": 0.015020634935351471, "learning_rate": 2.3584120570695695e-06, "loss": 0.0005, "step": 215680 }, { "epoch": 1.4189851516088499, "grad_norm": 0.0013048814275872045, "learning_rate": 2.3579246273807006e-06, "loss": 0.0004, "step": 215690 }, { "epoch": 1.4190509397840831, "grad_norm": 0.013460729869296575, "learning_rate": 2.357437232525313e-06, "loss": 0.0005, "step": 215700 }, { "epoch": 1.4191167279593166, "grad_norm": 0.022864231694593864, "learning_rate": 2.3569498725098337e-06, "loss": 0.0005, "step": 215710 }, { "epoch": 1.41918251613455, "grad_norm": 0.0005211762968562283, "learning_rate": 2.3564625473406877e-06, "loss": 0.0007, "step": 215720 }, { "epoch": 1.4192483043097834, "grad_norm": 0.002346459563377655, "learning_rate": 2.3559752570242996e-06, "loss": 0.0005, "step": 215730 }, { "epoch": 1.4193140924850167, "grad_norm": 0.002597336400944847, "learning_rate": 2.3554880015670946e-06, "loss": 0.0004, "step": 215740 }, { "epoch": 1.4193798806602502, "grad_norm": 0.021582428936884154, "learning_rate": 2.3550007809754954e-06, "loss": 0.0002, "step": 215750 }, { "epoch": 1.4194456688354835, "grad_norm": 0.08868121462485927, "learning_rate": 2.354513595255927e-06, "loss": 0.0007, "step": 215760 }, { "epoch": 1.4195114570107168, "grad_norm": 0.023591247195873764, "learning_rate": 2.3540264444148107e-06, "loss": 0.0004, "step": 215770 }, { "epoch": 1.4195772451859503, "grad_norm": 0.013717747257317083, "learning_rate": 2.353539328458572e-06, "loss": 0.0005, "step": 215780 }, { "epoch": 1.4196430333611836, "grad_norm": 0.026982489821451624, "learning_rate": 2.353052247393632e-06, "loss": 0.0004, "step": 215790 }, { "epoch": 1.419708821536417, "grad_norm": 0.03670346916329478, "learning_rate": 2.3525652012264115e-06, "loss": 0.0005, "step": 215800 }, { "epoch": 1.4197746097116504, "grad_norm": 0.014628822216517928, "learning_rate": 2.3520781899633326e-06, "loss": 0.0006, "step": 215810 }, { "epoch": 1.4198403978868839, "grad_norm": 0.020177525059564938, "learning_rate": 2.351591213610816e-06, "loss": 0.0008, "step": 215820 }, { "epoch": 1.4199061860621172, "grad_norm": 0.043833949213706945, "learning_rate": 2.351104272175282e-06, "loss": 0.0014, "step": 215830 }, { "epoch": 1.4199719742373507, "grad_norm": 0.004442870400326248, "learning_rate": 2.3506173656631503e-06, "loss": 0.0004, "step": 215840 }, { "epoch": 1.420037762412584, "grad_norm": 0.002786049763468519, "learning_rate": 2.35013049408084e-06, "loss": 0.0007, "step": 215850 }, { "epoch": 1.4201035505878172, "grad_norm": 0.02418637200653831, "learning_rate": 2.349643657434771e-06, "loss": 0.0006, "step": 215860 }, { "epoch": 1.4201693387630507, "grad_norm": 0.0037849117961713565, "learning_rate": 2.3491568557313615e-06, "loss": 0.0003, "step": 215870 }, { "epoch": 1.4202351269382842, "grad_norm": 0.006003638432341094, "learning_rate": 2.348670088977029e-06, "loss": 0.0005, "step": 215880 }, { "epoch": 1.4203009151135175, "grad_norm": 0.02924708202039704, "learning_rate": 2.348183357178192e-06, "loss": 0.0028, "step": 215890 }, { "epoch": 1.4203667032887508, "grad_norm": 0.038370944479859614, "learning_rate": 2.3476966603412655e-06, "loss": 0.0006, "step": 215900 }, { "epoch": 1.4204324914639843, "grad_norm": 0.03207283684748692, "learning_rate": 2.347209998472669e-06, "loss": 0.0005, "step": 215910 }, { "epoch": 1.4204982796392176, "grad_norm": 0.07757078973721572, "learning_rate": 2.346723371578818e-06, "loss": 0.0006, "step": 215920 }, { "epoch": 1.420564067814451, "grad_norm": 0.021838573418154327, "learning_rate": 2.3462367796661277e-06, "loss": 0.0007, "step": 215930 }, { "epoch": 1.4206298559896844, "grad_norm": 0.021291386116763733, "learning_rate": 2.3457502227410138e-06, "loss": 0.0009, "step": 215940 }, { "epoch": 1.4206956441649177, "grad_norm": 0.013020347592056192, "learning_rate": 2.3452637008098904e-06, "loss": 0.0003, "step": 215950 }, { "epoch": 1.4207614323401512, "grad_norm": 0.0010585760383204875, "learning_rate": 2.3447772138791726e-06, "loss": 0.0003, "step": 215960 }, { "epoch": 1.4208272205153847, "grad_norm": 0.034723949493904914, "learning_rate": 2.344290761955274e-06, "loss": 0.0005, "step": 215970 }, { "epoch": 1.420893008690618, "grad_norm": 0.035082666653409304, "learning_rate": 2.343804345044608e-06, "loss": 0.0011, "step": 215980 }, { "epoch": 1.4209587968658512, "grad_norm": 0.02832143635649357, "learning_rate": 2.343317963153588e-06, "loss": 0.0006, "step": 215990 }, { "epoch": 1.4210245850410848, "grad_norm": 0.04190879317252618, "learning_rate": 2.3428316162886262e-06, "loss": 0.0007, "step": 216000 }, { "epoch": 1.421090373216318, "grad_norm": 0.0030720988999089613, "learning_rate": 2.342345304456134e-06, "loss": 0.0005, "step": 216010 }, { "epoch": 1.4211561613915515, "grad_norm": 0.00024673629348395996, "learning_rate": 2.3418590276625243e-06, "loss": 0.0011, "step": 216020 }, { "epoch": 1.4212219495667848, "grad_norm": 0.008963915282213427, "learning_rate": 2.3413727859142056e-06, "loss": 0.0007, "step": 216030 }, { "epoch": 1.4212877377420183, "grad_norm": 0.09031341439815777, "learning_rate": 2.340886579217592e-06, "loss": 0.0003, "step": 216040 }, { "epoch": 1.4213535259172516, "grad_norm": 0.07455399321349229, "learning_rate": 2.3404004075790927e-06, "loss": 0.001, "step": 216050 }, { "epoch": 1.4214193140924851, "grad_norm": 0.023679245399064058, "learning_rate": 2.339914271005117e-06, "loss": 0.0003, "step": 216060 }, { "epoch": 1.4214851022677184, "grad_norm": 0.03117334127338978, "learning_rate": 2.339428169502074e-06, "loss": 0.001, "step": 216070 }, { "epoch": 1.4215508904429517, "grad_norm": 0.002038386273858246, "learning_rate": 2.3389421030763733e-06, "loss": 0.0016, "step": 216080 }, { "epoch": 1.4216166786181852, "grad_norm": 0.05885259614629153, "learning_rate": 2.338456071734422e-06, "loss": 0.0007, "step": 216090 }, { "epoch": 1.4216824667934185, "grad_norm": 0.012914791290009074, "learning_rate": 2.3379700754826295e-06, "loss": 0.0002, "step": 216100 }, { "epoch": 1.421748254968652, "grad_norm": 0.017409996316539514, "learning_rate": 2.3374841143274018e-06, "loss": 0.0003, "step": 216110 }, { "epoch": 1.4218140431438853, "grad_norm": 0.0014143965200840041, "learning_rate": 2.336998188275147e-06, "loss": 0.0007, "step": 216120 }, { "epoch": 1.4218798313191188, "grad_norm": 0.00048290253819971765, "learning_rate": 2.336512297332271e-06, "loss": 0.0009, "step": 216130 }, { "epoch": 1.421945619494352, "grad_norm": 0.020787325110627364, "learning_rate": 2.3360264415051804e-06, "loss": 0.0004, "step": 216140 }, { "epoch": 1.4220114076695856, "grad_norm": 0.03513029546453948, "learning_rate": 2.3355406208002796e-06, "loss": 0.0008, "step": 216150 }, { "epoch": 1.4220771958448188, "grad_norm": 0.05340844210971097, "learning_rate": 2.3350548352239734e-06, "loss": 0.0007, "step": 216160 }, { "epoch": 1.4221429840200521, "grad_norm": 0.0062603750643795775, "learning_rate": 2.334569084782669e-06, "loss": 0.0006, "step": 216170 }, { "epoch": 1.4222087721952856, "grad_norm": 0.014481224378013963, "learning_rate": 2.3340833694827698e-06, "loss": 0.0004, "step": 216180 }, { "epoch": 1.4222745603705191, "grad_norm": 0.004064708186050943, "learning_rate": 2.3335976893306782e-06, "loss": 0.0004, "step": 216190 }, { "epoch": 1.4223403485457524, "grad_norm": 0.02652582451855487, "learning_rate": 2.3331120443327986e-06, "loss": 0.0003, "step": 216200 }, { "epoch": 1.4224061367209857, "grad_norm": 0.005633897408204944, "learning_rate": 2.3326264344955336e-06, "loss": 0.0012, "step": 216210 }, { "epoch": 1.4224719248962192, "grad_norm": 0.00011668873871914212, "learning_rate": 2.3321408598252847e-06, "loss": 0.0006, "step": 216220 }, { "epoch": 1.4225377130714525, "grad_norm": 0.004532858858031648, "learning_rate": 2.3316553203284554e-06, "loss": 0.0006, "step": 216230 }, { "epoch": 1.422603501246686, "grad_norm": 0.0012328078304309367, "learning_rate": 2.3311698160114455e-06, "loss": 0.0013, "step": 216240 }, { "epoch": 1.4226692894219193, "grad_norm": 0.03345054873453737, "learning_rate": 2.3306843468806565e-06, "loss": 0.0009, "step": 216250 }, { "epoch": 1.4227350775971526, "grad_norm": 0.014939483856600056, "learning_rate": 2.3301989129424898e-06, "loss": 0.0007, "step": 216260 }, { "epoch": 1.422800865772386, "grad_norm": 0.008243723099568657, "learning_rate": 2.329713514203344e-06, "loss": 0.0006, "step": 216270 }, { "epoch": 1.4228666539476196, "grad_norm": 0.03463682591485974, "learning_rate": 2.329228150669619e-06, "loss": 0.0016, "step": 216280 }, { "epoch": 1.4229324421228529, "grad_norm": 0.08373267355173801, "learning_rate": 2.3287428223477133e-06, "loss": 0.0004, "step": 216290 }, { "epoch": 1.4229982302980861, "grad_norm": 0.02652256780822106, "learning_rate": 2.3282575292440274e-06, "loss": 0.0003, "step": 216300 }, { "epoch": 1.4230640184733196, "grad_norm": 0.03145962468933634, "learning_rate": 2.327772271364959e-06, "loss": 0.0004, "step": 216310 }, { "epoch": 1.423129806648553, "grad_norm": 0.005891934377034349, "learning_rate": 2.327287048716905e-06, "loss": 0.0001, "step": 216320 }, { "epoch": 1.4231955948237864, "grad_norm": 0.010518418985139666, "learning_rate": 2.326801861306263e-06, "loss": 0.0014, "step": 216330 }, { "epoch": 1.4232613829990197, "grad_norm": 0.028283631184201262, "learning_rate": 2.3263167091394322e-06, "loss": 0.0009, "step": 216340 }, { "epoch": 1.423327171174253, "grad_norm": 0.10183890957098438, "learning_rate": 2.325831592222804e-06, "loss": 0.0006, "step": 216350 }, { "epoch": 1.4233929593494865, "grad_norm": 0.003776985673983131, "learning_rate": 2.325346510562777e-06, "loss": 0.0005, "step": 216360 }, { "epoch": 1.42345874752472, "grad_norm": 0.07853470046290721, "learning_rate": 2.3248614641657457e-06, "loss": 0.0009, "step": 216370 }, { "epoch": 1.4235245356999533, "grad_norm": 0.021025782140126438, "learning_rate": 2.3243764530381065e-06, "loss": 0.0004, "step": 216380 }, { "epoch": 1.4235903238751866, "grad_norm": 0.04626724066586294, "learning_rate": 2.323891477186252e-06, "loss": 0.0005, "step": 216390 }, { "epoch": 1.42365611205042, "grad_norm": 0.00048730508892353455, "learning_rate": 2.323406536616576e-06, "loss": 0.0001, "step": 216400 }, { "epoch": 1.4237219002256534, "grad_norm": 0.03244987252365373, "learning_rate": 2.3229216313354746e-06, "loss": 0.0007, "step": 216410 }, { "epoch": 1.4237876884008869, "grad_norm": 0.011368789723798773, "learning_rate": 2.32243676134934e-06, "loss": 0.0003, "step": 216420 }, { "epoch": 1.4238534765761202, "grad_norm": 0.017017738006711844, "learning_rate": 2.3219519266645637e-06, "loss": 0.0011, "step": 216430 }, { "epoch": 1.4239192647513537, "grad_norm": 0.04210246025336979, "learning_rate": 2.3214671272875385e-06, "loss": 0.0003, "step": 216440 }, { "epoch": 1.423985052926587, "grad_norm": 0.03449131169337741, "learning_rate": 2.320982363224655e-06, "loss": 0.0009, "step": 216450 }, { "epoch": 1.4240508411018205, "grad_norm": 0.0282820257304121, "learning_rate": 2.3204976344823066e-06, "loss": 0.0004, "step": 216460 }, { "epoch": 1.4241166292770537, "grad_norm": 0.0009256147625390882, "learning_rate": 2.320012941066882e-06, "loss": 0.0006, "step": 216470 }, { "epoch": 1.424182417452287, "grad_norm": 0.029370931789067425, "learning_rate": 2.3195282829847727e-06, "loss": 0.0009, "step": 216480 }, { "epoch": 1.4242482056275205, "grad_norm": 0.0406123242818254, "learning_rate": 2.319043660242367e-06, "loss": 0.0005, "step": 216490 }, { "epoch": 1.424313993802754, "grad_norm": 0.0012972673366781296, "learning_rate": 2.318559072846056e-06, "loss": 0.0003, "step": 216500 }, { "epoch": 1.4243797819779873, "grad_norm": 0.0347970572704982, "learning_rate": 2.318074520802228e-06, "loss": 0.0009, "step": 216510 }, { "epoch": 1.4244455701532206, "grad_norm": 0.060206320226901304, "learning_rate": 2.317590004117271e-06, "loss": 0.0005, "step": 216520 }, { "epoch": 1.424511358328454, "grad_norm": 0.08441128168785617, "learning_rate": 2.317105522797571e-06, "loss": 0.0007, "step": 216530 }, { "epoch": 1.4245771465036874, "grad_norm": 0.045775337887559095, "learning_rate": 2.3166210768495196e-06, "loss": 0.0011, "step": 216540 }, { "epoch": 1.424642934678921, "grad_norm": 0.02616041275177538, "learning_rate": 2.3161366662795014e-06, "loss": 0.0005, "step": 216550 }, { "epoch": 1.4247087228541542, "grad_norm": 0.0002242655621282594, "learning_rate": 2.3156522910939037e-06, "loss": 0.0005, "step": 216560 }, { "epoch": 1.4247745110293875, "grad_norm": 0.036756429854859836, "learning_rate": 2.315167951299112e-06, "loss": 0.0008, "step": 216570 }, { "epoch": 1.424840299204621, "grad_norm": 0.04635261537847648, "learning_rate": 2.3146836469015126e-06, "loss": 0.0004, "step": 216580 }, { "epoch": 1.4249060873798545, "grad_norm": 0.06501743544081537, "learning_rate": 2.3141993779074895e-06, "loss": 0.0007, "step": 216590 }, { "epoch": 1.4249718755550878, "grad_norm": 0.05740820583209108, "learning_rate": 2.3137151443234284e-06, "loss": 0.001, "step": 216600 }, { "epoch": 1.425037663730321, "grad_norm": 0.019286866095816358, "learning_rate": 2.313230946155713e-06, "loss": 0.0009, "step": 216610 }, { "epoch": 1.4251034519055545, "grad_norm": 0.005981143985697198, "learning_rate": 2.312746783410727e-06, "loss": 0.0005, "step": 216620 }, { "epoch": 1.4251692400807878, "grad_norm": 0.0012327043799084782, "learning_rate": 2.3122626560948536e-06, "loss": 0.0003, "step": 216630 }, { "epoch": 1.4252350282560213, "grad_norm": 0.029603940058099385, "learning_rate": 2.311778564214476e-06, "loss": 0.0013, "step": 216640 }, { "epoch": 1.4253008164312546, "grad_norm": 0.012012587501086427, "learning_rate": 2.311294507775977e-06, "loss": 0.0007, "step": 216650 }, { "epoch": 1.425366604606488, "grad_norm": 0.009658540007705694, "learning_rate": 2.310810486785735e-06, "loss": 0.0004, "step": 216660 }, { "epoch": 1.4254323927817214, "grad_norm": 0.0231373551988087, "learning_rate": 2.3103265012501365e-06, "loss": 0.0003, "step": 216670 }, { "epoch": 1.425498180956955, "grad_norm": 0.01324406496964313, "learning_rate": 2.30984255117556e-06, "loss": 0.0005, "step": 216680 }, { "epoch": 1.4255639691321882, "grad_norm": 0.024867030655386542, "learning_rate": 2.3093586365683864e-06, "loss": 0.0009, "step": 216690 }, { "epoch": 1.4256297573074215, "grad_norm": 0.022859335190444273, "learning_rate": 2.3088747574349945e-06, "loss": 0.0009, "step": 216700 }, { "epoch": 1.425695545482655, "grad_norm": 0.02037443305004008, "learning_rate": 2.3083909137817654e-06, "loss": 0.0006, "step": 216710 }, { "epoch": 1.4257613336578883, "grad_norm": 0.02132855651098384, "learning_rate": 2.3079071056150774e-06, "loss": 0.0006, "step": 216720 }, { "epoch": 1.4258271218331218, "grad_norm": 0.03690997855797231, "learning_rate": 2.3074233329413095e-06, "loss": 0.001, "step": 216730 }, { "epoch": 1.425892910008355, "grad_norm": 0.06720174722604418, "learning_rate": 2.306939595766839e-06, "loss": 0.001, "step": 216740 }, { "epoch": 1.4259586981835886, "grad_norm": 0.027162449801853956, "learning_rate": 2.3064558940980442e-06, "loss": 0.0005, "step": 216750 }, { "epoch": 1.4260244863588218, "grad_norm": 0.025987424665702117, "learning_rate": 2.305972227941302e-06, "loss": 0.0006, "step": 216760 }, { "epoch": 1.4260902745340553, "grad_norm": 0.06592013519040339, "learning_rate": 2.3054885973029894e-06, "loss": 0.0004, "step": 216770 }, { "epoch": 1.4261560627092886, "grad_norm": 0.02446911731532448, "learning_rate": 2.3050050021894826e-06, "loss": 0.0009, "step": 216780 }, { "epoch": 1.426221850884522, "grad_norm": 0.00227877538530289, "learning_rate": 2.304521442607155e-06, "loss": 0.0003, "step": 216790 }, { "epoch": 1.4262876390597554, "grad_norm": 0.008812983773947397, "learning_rate": 2.3040379185623867e-06, "loss": 0.0002, "step": 216800 }, { "epoch": 1.4263534272349887, "grad_norm": 0.023869087563239914, "learning_rate": 2.3035544300615496e-06, "loss": 0.0003, "step": 216810 }, { "epoch": 1.4264192154102222, "grad_norm": 0.0014727085706913573, "learning_rate": 2.3030709771110186e-06, "loss": 0.0004, "step": 216820 }, { "epoch": 1.4264850035854555, "grad_norm": 0.020219146382269952, "learning_rate": 2.3025875597171676e-06, "loss": 0.0004, "step": 216830 }, { "epoch": 1.426550791760689, "grad_norm": 0.022841289739663408, "learning_rate": 2.30210417788637e-06, "loss": 0.0006, "step": 216840 }, { "epoch": 1.4266165799359223, "grad_norm": 0.010474097570640082, "learning_rate": 2.301620831624999e-06, "loss": 0.0008, "step": 216850 }, { "epoch": 1.4266823681111558, "grad_norm": 0.02373732860342018, "learning_rate": 2.3011375209394265e-06, "loss": 0.0006, "step": 216860 }, { "epoch": 1.426748156286389, "grad_norm": 0.00723026631725599, "learning_rate": 2.3006542458360255e-06, "loss": 0.0009, "step": 216870 }, { "epoch": 1.4268139444616224, "grad_norm": 0.02361983182338636, "learning_rate": 2.300171006321166e-06, "loss": 0.0004, "step": 216880 }, { "epoch": 1.4268797326368559, "grad_norm": 0.09465324454417696, "learning_rate": 2.299687802401221e-06, "loss": 0.0008, "step": 216890 }, { "epoch": 1.4269455208120894, "grad_norm": 0.03397818391664846, "learning_rate": 2.2992046340825595e-06, "loss": 0.0003, "step": 216900 }, { "epoch": 1.4270113089873226, "grad_norm": 0.020920353850793216, "learning_rate": 2.2987215013715524e-06, "loss": 0.0012, "step": 216910 }, { "epoch": 1.427077097162556, "grad_norm": 0.0015032848229438282, "learning_rate": 2.298238404274568e-06, "loss": 0.001, "step": 216920 }, { "epoch": 1.4271428853377894, "grad_norm": 0.012394516645504951, "learning_rate": 2.297755342797978e-06, "loss": 0.0005, "step": 216930 }, { "epoch": 1.4272086735130227, "grad_norm": 0.009665207456451316, "learning_rate": 2.297272316948151e-06, "loss": 0.0006, "step": 216940 }, { "epoch": 1.4272744616882562, "grad_norm": 0.006528619838207256, "learning_rate": 2.2967893267314534e-06, "loss": 0.0004, "step": 216950 }, { "epoch": 1.4273402498634895, "grad_norm": 0.05403707537498262, "learning_rate": 2.2963063721542538e-06, "loss": 0.0004, "step": 216960 }, { "epoch": 1.4274060380387228, "grad_norm": 0.02011218103756175, "learning_rate": 2.29582345322292e-06, "loss": 0.0004, "step": 216970 }, { "epoch": 1.4274718262139563, "grad_norm": 0.05197248977670555, "learning_rate": 2.2953405699438187e-06, "loss": 0.0005, "step": 216980 }, { "epoch": 1.4275376143891898, "grad_norm": 0.023189904372752593, "learning_rate": 2.294857722323316e-06, "loss": 0.001, "step": 216990 }, { "epoch": 1.427603402564423, "grad_norm": 0.02837987027469149, "learning_rate": 2.2943749103677777e-06, "loss": 0.0008, "step": 217000 }, { "epoch": 1.4276691907396564, "grad_norm": 0.07940964921232531, "learning_rate": 2.293892134083569e-06, "loss": 0.0004, "step": 217010 }, { "epoch": 1.4277349789148899, "grad_norm": 0.008197873395702526, "learning_rate": 2.293409393477056e-06, "loss": 0.0003, "step": 217020 }, { "epoch": 1.4278007670901232, "grad_norm": 0.021085174962168253, "learning_rate": 2.2929266885546024e-06, "loss": 0.0007, "step": 217030 }, { "epoch": 1.4278665552653567, "grad_norm": 0.0010122369097221207, "learning_rate": 2.2924440193225726e-06, "loss": 0.0005, "step": 217040 }, { "epoch": 1.42793234344059, "grad_norm": 0.07737802607434414, "learning_rate": 2.291961385787328e-06, "loss": 0.0005, "step": 217050 }, { "epoch": 1.4279981316158235, "grad_norm": 0.029701182523760368, "learning_rate": 2.2914787879552357e-06, "loss": 0.0007, "step": 217060 }, { "epoch": 1.4280639197910567, "grad_norm": 0.03622818975555218, "learning_rate": 2.290996225832656e-06, "loss": 0.0004, "step": 217070 }, { "epoch": 1.4281297079662902, "grad_norm": 0.3406253180077429, "learning_rate": 2.290513699425952e-06, "loss": 0.0008, "step": 217080 }, { "epoch": 1.4281954961415235, "grad_norm": 0.01924582347506679, "learning_rate": 2.2900312087414844e-06, "loss": 0.0007, "step": 217090 }, { "epoch": 1.4282612843167568, "grad_norm": 0.0015817426732956815, "learning_rate": 2.289548753785615e-06, "loss": 0.0004, "step": 217100 }, { "epoch": 1.4283270724919903, "grad_norm": 0.01202142457683081, "learning_rate": 2.289066334564704e-06, "loss": 0.0003, "step": 217110 }, { "epoch": 1.4283928606672236, "grad_norm": 0.04162419819796871, "learning_rate": 2.2885839510851127e-06, "loss": 0.0002, "step": 217120 }, { "epoch": 1.428458648842457, "grad_norm": 0.039718843396515144, "learning_rate": 2.2881016033531998e-06, "loss": 0.0012, "step": 217130 }, { "epoch": 1.4285244370176904, "grad_norm": 0.04320657406560103, "learning_rate": 2.2876192913753255e-06, "loss": 0.0004, "step": 217140 }, { "epoch": 1.428590225192924, "grad_norm": 0.004141351335175742, "learning_rate": 2.2871370151578478e-06, "loss": 0.0007, "step": 217150 }, { "epoch": 1.4286560133681572, "grad_norm": 0.004666968513321088, "learning_rate": 2.2866547747071257e-06, "loss": 0.0003, "step": 217160 }, { "epoch": 1.4287218015433907, "grad_norm": 0.023167031977642506, "learning_rate": 2.2861725700295175e-06, "loss": 0.0006, "step": 217170 }, { "epoch": 1.428787589718624, "grad_norm": 0.03034986331894647, "learning_rate": 2.2856904011313776e-06, "loss": 0.0005, "step": 217180 }, { "epoch": 1.4288533778938572, "grad_norm": 0.0016324634369798544, "learning_rate": 2.2852082680190672e-06, "loss": 0.0002, "step": 217190 }, { "epoch": 1.4289191660690908, "grad_norm": 0.20726916169491916, "learning_rate": 2.284726170698942e-06, "loss": 0.0008, "step": 217200 }, { "epoch": 1.4289849542443243, "grad_norm": 0.009322790020589585, "learning_rate": 2.2842441091773563e-06, "loss": 0.0003, "step": 217210 }, { "epoch": 1.4290507424195575, "grad_norm": 0.07084499567120735, "learning_rate": 2.2837620834606676e-06, "loss": 0.0008, "step": 217220 }, { "epoch": 1.4291165305947908, "grad_norm": 0.023924710311034016, "learning_rate": 2.283280093555229e-06, "loss": 0.0006, "step": 217230 }, { "epoch": 1.4291823187700243, "grad_norm": 0.023738256177174556, "learning_rate": 2.2827981394673963e-06, "loss": 0.0008, "step": 217240 }, { "epoch": 1.4292481069452576, "grad_norm": 0.1229502407353653, "learning_rate": 2.282316221203524e-06, "loss": 0.0009, "step": 217250 }, { "epoch": 1.4293138951204911, "grad_norm": 0.0712002431314019, "learning_rate": 2.281834338769964e-06, "loss": 0.0007, "step": 217260 }, { "epoch": 1.4293796832957244, "grad_norm": 0.02738558072592016, "learning_rate": 2.2813524921730716e-06, "loss": 0.0004, "step": 217270 }, { "epoch": 1.4294454714709577, "grad_norm": 0.012930455218462537, "learning_rate": 2.280870681419199e-06, "loss": 0.0002, "step": 217280 }, { "epoch": 1.4295112596461912, "grad_norm": 0.017269762511494854, "learning_rate": 2.280388906514697e-06, "loss": 0.0005, "step": 217290 }, { "epoch": 1.4295770478214247, "grad_norm": 0.022297491663313747, "learning_rate": 2.279907167465919e-06, "loss": 0.0005, "step": 217300 }, { "epoch": 1.429642835996658, "grad_norm": 0.004006090315957759, "learning_rate": 2.279425464279214e-06, "loss": 0.0005, "step": 217310 }, { "epoch": 1.4297086241718913, "grad_norm": 0.02415657998359266, "learning_rate": 2.2789437969609364e-06, "loss": 0.0004, "step": 217320 }, { "epoch": 1.4297744123471248, "grad_norm": 0.024882532342968264, "learning_rate": 2.2784621655174344e-06, "loss": 0.0005, "step": 217330 }, { "epoch": 1.429840200522358, "grad_norm": 0.10348439607985303, "learning_rate": 2.277980569955058e-06, "loss": 0.0012, "step": 217340 }, { "epoch": 1.4299059886975916, "grad_norm": 0.017702932748147496, "learning_rate": 2.277499010280158e-06, "loss": 0.001, "step": 217350 }, { "epoch": 1.4299717768728248, "grad_norm": 0.028683049541181237, "learning_rate": 2.2770174864990816e-06, "loss": 0.0005, "step": 217360 }, { "epoch": 1.4300375650480581, "grad_norm": 0.08906121862624584, "learning_rate": 2.276535998618178e-06, "loss": 0.0004, "step": 217370 }, { "epoch": 1.4301033532232916, "grad_norm": 0.008719921975895195, "learning_rate": 2.276054546643795e-06, "loss": 0.0004, "step": 217380 }, { "epoch": 1.4301691413985251, "grad_norm": 0.016662315201864137, "learning_rate": 2.2755731305822805e-06, "loss": 0.0004, "step": 217390 }, { "epoch": 1.4302349295737584, "grad_norm": 0.024326034010992148, "learning_rate": 2.275091750439981e-06, "loss": 0.0004, "step": 217400 }, { "epoch": 1.4303007177489917, "grad_norm": 0.010589561698989738, "learning_rate": 2.2746104062232436e-06, "loss": 0.0007, "step": 217410 }, { "epoch": 1.4303665059242252, "grad_norm": 0.0009137303133589151, "learning_rate": 2.2741290979384146e-06, "loss": 0.0006, "step": 217420 }, { "epoch": 1.4304322940994585, "grad_norm": 0.006302256517950094, "learning_rate": 2.2736478255918387e-06, "loss": 0.0019, "step": 217430 }, { "epoch": 1.430498082274692, "grad_norm": 0.02542506803396479, "learning_rate": 2.27316658918986e-06, "loss": 0.0004, "step": 217440 }, { "epoch": 1.4305638704499253, "grad_norm": 0.024767799926838426, "learning_rate": 2.272685388738826e-06, "loss": 0.0008, "step": 217450 }, { "epoch": 1.4306296586251588, "grad_norm": 0.026933889818367585, "learning_rate": 2.27220422424508e-06, "loss": 0.0008, "step": 217460 }, { "epoch": 1.430695446800392, "grad_norm": 0.000863642010827635, "learning_rate": 2.271723095714966e-06, "loss": 0.0003, "step": 217470 }, { "epoch": 1.4307612349756256, "grad_norm": 0.11649219237579926, "learning_rate": 2.271242003154826e-06, "loss": 0.0005, "step": 217480 }, { "epoch": 1.4308270231508589, "grad_norm": 0.03827697190137891, "learning_rate": 2.270760946571004e-06, "loss": 0.0008, "step": 217490 }, { "epoch": 1.4308928113260921, "grad_norm": 0.002798647071712406, "learning_rate": 2.270279925969841e-06, "loss": 0.0009, "step": 217500 }, { "epoch": 1.4309585995013256, "grad_norm": 0.020095071308821683, "learning_rate": 2.2697989413576806e-06, "loss": 0.0019, "step": 217510 }, { "epoch": 1.4310243876765592, "grad_norm": 0.018679641636106303, "learning_rate": 2.2693179927408625e-06, "loss": 0.0003, "step": 217520 }, { "epoch": 1.4310901758517924, "grad_norm": 0.061943805770876846, "learning_rate": 2.2688370801257285e-06, "loss": 0.0008, "step": 217530 }, { "epoch": 1.4311559640270257, "grad_norm": 0.0010259378957462955, "learning_rate": 2.268356203518619e-06, "loss": 0.0007, "step": 217540 }, { "epoch": 1.4312217522022592, "grad_norm": 0.049591608721053594, "learning_rate": 2.2678753629258736e-06, "loss": 0.0004, "step": 217550 }, { "epoch": 1.4312875403774925, "grad_norm": 0.04837726989303885, "learning_rate": 2.267394558353832e-06, "loss": 0.0003, "step": 217560 }, { "epoch": 1.431353328552726, "grad_norm": 0.0060671175433459675, "learning_rate": 2.266913789808831e-06, "loss": 0.0002, "step": 217570 }, { "epoch": 1.4314191167279593, "grad_norm": 0.04013198750149826, "learning_rate": 2.2664330572972137e-06, "loss": 0.0004, "step": 217580 }, { "epoch": 1.4314849049031926, "grad_norm": 0.0374587813485861, "learning_rate": 2.2659523608253146e-06, "loss": 0.0003, "step": 217590 }, { "epoch": 1.431550693078426, "grad_norm": 0.027567110790828325, "learning_rate": 2.2654717003994734e-06, "loss": 0.0008, "step": 217600 }, { "epoch": 1.4316164812536596, "grad_norm": 0.011210541922265059, "learning_rate": 2.2649910760260257e-06, "loss": 0.0003, "step": 217610 }, { "epoch": 1.4316822694288929, "grad_norm": 0.018684885839511115, "learning_rate": 2.2645104877113087e-06, "loss": 0.0004, "step": 217620 }, { "epoch": 1.4317480576041262, "grad_norm": 0.056507088164677935, "learning_rate": 2.2640299354616585e-06, "loss": 0.0008, "step": 217630 }, { "epoch": 1.4318138457793597, "grad_norm": 0.02170790475714222, "learning_rate": 2.2635494192834103e-06, "loss": 0.0003, "step": 217640 }, { "epoch": 1.431879633954593, "grad_norm": 0.002696627559144586, "learning_rate": 2.2630689391829e-06, "loss": 0.0011, "step": 217650 }, { "epoch": 1.4319454221298265, "grad_norm": 0.01031498895103502, "learning_rate": 2.262588495166462e-06, "loss": 0.0005, "step": 217660 }, { "epoch": 1.4320112103050597, "grad_norm": 0.019525316482265475, "learning_rate": 2.26210808724043e-06, "loss": 0.0003, "step": 217670 }, { "epoch": 1.432076998480293, "grad_norm": 0.030631263614629027, "learning_rate": 2.2616277154111383e-06, "loss": 0.0005, "step": 217680 }, { "epoch": 1.4321427866555265, "grad_norm": 0.007585513153228497, "learning_rate": 2.2611473796849204e-06, "loss": 0.001, "step": 217690 }, { "epoch": 1.43220857483076, "grad_norm": 0.08811828420089785, "learning_rate": 2.2606670800681076e-06, "loss": 0.0012, "step": 217700 }, { "epoch": 1.4322743630059933, "grad_norm": 0.022697641414097275, "learning_rate": 2.2601868165670344e-06, "loss": 0.0006, "step": 217710 }, { "epoch": 1.4323401511812266, "grad_norm": 0.019579417408075294, "learning_rate": 2.2597065891880322e-06, "loss": 0.0004, "step": 217720 }, { "epoch": 1.43240593935646, "grad_norm": 0.02287329162509939, "learning_rate": 2.259226397937432e-06, "loss": 0.0005, "step": 217730 }, { "epoch": 1.4324717275316934, "grad_norm": 0.01749991464621862, "learning_rate": 2.2587462428215647e-06, "loss": 0.0004, "step": 217740 }, { "epoch": 1.432537515706927, "grad_norm": 0.12152445665164563, "learning_rate": 2.2582661238467603e-06, "loss": 0.0009, "step": 217750 }, { "epoch": 1.4326033038821602, "grad_norm": 0.00034307582940561106, "learning_rate": 2.257786041019349e-06, "loss": 0.0006, "step": 217760 }, { "epoch": 1.4326690920573937, "grad_norm": 0.0007279224213553936, "learning_rate": 2.257305994345661e-06, "loss": 0.0004, "step": 217770 }, { "epoch": 1.432734880232627, "grad_norm": 0.07646828503808195, "learning_rate": 2.2568259838320244e-06, "loss": 0.0011, "step": 217780 }, { "epoch": 1.4328006684078605, "grad_norm": 0.02856359054962196, "learning_rate": 2.256346009484768e-06, "loss": 0.0005, "step": 217790 }, { "epoch": 1.4328664565830938, "grad_norm": 0.036121253745966524, "learning_rate": 2.2558660713102204e-06, "loss": 0.0006, "step": 217800 }, { "epoch": 1.432932244758327, "grad_norm": 0.017222300891725936, "learning_rate": 2.2553861693147085e-06, "loss": 0.0005, "step": 217810 }, { "epoch": 1.4329980329335605, "grad_norm": 0.004895349687393856, "learning_rate": 2.254906303504559e-06, "loss": 0.0006, "step": 217820 }, { "epoch": 1.4330638211087938, "grad_norm": 0.007080208617699013, "learning_rate": 2.254426473886098e-06, "loss": 0.0012, "step": 217830 }, { "epoch": 1.4331296092840273, "grad_norm": 0.0004176846044687137, "learning_rate": 2.253946680465654e-06, "loss": 0.0006, "step": 217840 }, { "epoch": 1.4331953974592606, "grad_norm": 0.014158856368446606, "learning_rate": 2.2534669232495517e-06, "loss": 0.0003, "step": 217850 }, { "epoch": 1.4332611856344941, "grad_norm": 0.0005530298681882532, "learning_rate": 2.2529872022441156e-06, "loss": 0.0025, "step": 217860 }, { "epoch": 1.4333269738097274, "grad_norm": 0.0429142915766716, "learning_rate": 2.2525075174556708e-06, "loss": 0.0005, "step": 217870 }, { "epoch": 1.433392761984961, "grad_norm": 0.019333181670415606, "learning_rate": 2.2520278688905417e-06, "loss": 0.0004, "step": 217880 }, { "epoch": 1.4334585501601942, "grad_norm": 0.028204932492065238, "learning_rate": 2.2515482565550523e-06, "loss": 0.0004, "step": 217890 }, { "epoch": 1.4335243383354275, "grad_norm": 0.006384622153082916, "learning_rate": 2.251068680455525e-06, "loss": 0.0007, "step": 217900 }, { "epoch": 1.433590126510661, "grad_norm": 0.004829652667682165, "learning_rate": 2.2505891405982828e-06, "loss": 0.0006, "step": 217910 }, { "epoch": 1.4336559146858945, "grad_norm": 0.04676294867660859, "learning_rate": 2.250109636989648e-06, "loss": 0.0004, "step": 217920 }, { "epoch": 1.4337217028611278, "grad_norm": 0.029530191169247624, "learning_rate": 2.249630169635943e-06, "loss": 0.0013, "step": 217930 }, { "epoch": 1.433787491036361, "grad_norm": 0.02167283560009884, "learning_rate": 2.249150738543489e-06, "loss": 0.0003, "step": 217940 }, { "epoch": 1.4338532792115946, "grad_norm": 0.04111297997244278, "learning_rate": 2.2486713437186063e-06, "loss": 0.0009, "step": 217950 }, { "epoch": 1.4339190673868278, "grad_norm": 0.10447095886148906, "learning_rate": 2.2481919851676142e-06, "loss": 0.0008, "step": 217960 }, { "epoch": 1.4339848555620613, "grad_norm": 0.07972820170692915, "learning_rate": 2.247712662896836e-06, "loss": 0.0012, "step": 217970 }, { "epoch": 1.4340506437372946, "grad_norm": 0.009405105037975372, "learning_rate": 2.2472333769125897e-06, "loss": 0.0005, "step": 217980 }, { "epoch": 1.434116431912528, "grad_norm": 0.01151878808676886, "learning_rate": 2.2467541272211933e-06, "loss": 0.0005, "step": 217990 }, { "epoch": 1.4341822200877614, "grad_norm": 0.14580482113162732, "learning_rate": 2.2462749138289665e-06, "loss": 0.0006, "step": 218000 }, { "epoch": 1.434248008262995, "grad_norm": 0.018872007478386583, "learning_rate": 2.2457957367422257e-06, "loss": 0.0004, "step": 218010 }, { "epoch": 1.4343137964382282, "grad_norm": 0.023471719498033512, "learning_rate": 2.24531659596729e-06, "loss": 0.0005, "step": 218020 }, { "epoch": 1.4343795846134615, "grad_norm": 0.025859397689981512, "learning_rate": 2.244837491510476e-06, "loss": 0.0006, "step": 218030 }, { "epoch": 1.434445372788695, "grad_norm": 0.03066943507606016, "learning_rate": 2.2443584233781004e-06, "loss": 0.0005, "step": 218040 }, { "epoch": 1.4345111609639283, "grad_norm": 0.017422065838055868, "learning_rate": 2.2438793915764785e-06, "loss": 0.0007, "step": 218050 }, { "epoch": 1.4345769491391618, "grad_norm": 0.012391763513290268, "learning_rate": 2.2434003961119262e-06, "loss": 0.0004, "step": 218060 }, { "epoch": 1.434642737314395, "grad_norm": 0.024234225649022556, "learning_rate": 2.242921436990759e-06, "loss": 0.0004, "step": 218070 }, { "epoch": 1.4347085254896286, "grad_norm": 0.013855699842600084, "learning_rate": 2.2424425142192924e-06, "loss": 0.0014, "step": 218080 }, { "epoch": 1.4347743136648619, "grad_norm": 0.014059099104266818, "learning_rate": 2.241963627803837e-06, "loss": 0.0008, "step": 218090 }, { "epoch": 1.4348401018400954, "grad_norm": 0.040845738881610874, "learning_rate": 2.2414847777507115e-06, "loss": 0.0006, "step": 218100 }, { "epoch": 1.4349058900153286, "grad_norm": 0.020784278479765876, "learning_rate": 2.2410059640662264e-06, "loss": 0.0005, "step": 218110 }, { "epoch": 1.434971678190562, "grad_norm": 0.04401246058139874, "learning_rate": 2.2405271867566953e-06, "loss": 0.0008, "step": 218120 }, { "epoch": 1.4350374663657954, "grad_norm": 0.014432249648602494, "learning_rate": 2.2400484458284296e-06, "loss": 0.0007, "step": 218130 }, { "epoch": 1.4351032545410287, "grad_norm": 0.05139799266479946, "learning_rate": 2.2395697412877417e-06, "loss": 0.0014, "step": 218140 }, { "epoch": 1.4351690427162622, "grad_norm": 0.006327635556841496, "learning_rate": 2.2390910731409425e-06, "loss": 0.0005, "step": 218150 }, { "epoch": 1.4352348308914955, "grad_norm": 0.018622385134670037, "learning_rate": 2.238612441394343e-06, "loss": 0.0006, "step": 218160 }, { "epoch": 1.435300619066729, "grad_norm": 0.0010068671531830056, "learning_rate": 2.238133846054254e-06, "loss": 0.0013, "step": 218170 }, { "epoch": 1.4353664072419623, "grad_norm": 0.00881962783498555, "learning_rate": 2.2376552871269845e-06, "loss": 0.0008, "step": 218180 }, { "epoch": 1.4354321954171958, "grad_norm": 0.02872492094845031, "learning_rate": 2.2371767646188447e-06, "loss": 0.0007, "step": 218190 }, { "epoch": 1.435497983592429, "grad_norm": 0.006830883492942126, "learning_rate": 2.236698278536143e-06, "loss": 0.0006, "step": 218200 }, { "epoch": 1.4355637717676624, "grad_norm": 0.01731905880956468, "learning_rate": 2.2362198288851878e-06, "loss": 0.0005, "step": 218210 }, { "epoch": 1.4356295599428959, "grad_norm": 0.02208796723849448, "learning_rate": 2.235741415672285e-06, "loss": 0.0004, "step": 218220 }, { "epoch": 1.4356953481181294, "grad_norm": 0.001585535912855442, "learning_rate": 2.2352630389037464e-06, "loss": 0.0006, "step": 218230 }, { "epoch": 1.4357611362933627, "grad_norm": 0.04656000911201922, "learning_rate": 2.234784698585879e-06, "loss": 0.0005, "step": 218240 }, { "epoch": 1.435826924468596, "grad_norm": 0.03255166232204782, "learning_rate": 2.2343063947249853e-06, "loss": 0.0004, "step": 218250 }, { "epoch": 1.4358927126438294, "grad_norm": 0.02967668743937444, "learning_rate": 2.2338281273273725e-06, "loss": 0.0006, "step": 218260 }, { "epoch": 1.4359585008190627, "grad_norm": 0.005333010233861855, "learning_rate": 2.2333498963993477e-06, "loss": 0.0039, "step": 218270 }, { "epoch": 1.4360242889942962, "grad_norm": 0.011686804300481371, "learning_rate": 2.2328717019472145e-06, "loss": 0.0005, "step": 218280 }, { "epoch": 1.4360900771695295, "grad_norm": 0.045310655468993155, "learning_rate": 2.2323935439772786e-06, "loss": 0.0004, "step": 218290 }, { "epoch": 1.4361558653447628, "grad_norm": 0.00021399860132651687, "learning_rate": 2.2319154224958434e-06, "loss": 0.0006, "step": 218300 }, { "epoch": 1.4362216535199963, "grad_norm": 0.005330601037251007, "learning_rate": 2.2314373375092122e-06, "loss": 0.0006, "step": 218310 }, { "epoch": 1.4362874416952298, "grad_norm": 0.11045281921764831, "learning_rate": 2.2309592890236868e-06, "loss": 0.0005, "step": 218320 }, { "epoch": 1.436353229870463, "grad_norm": 0.046434926808387504, "learning_rate": 2.230481277045574e-06, "loss": 0.0014, "step": 218330 }, { "epoch": 1.4364190180456964, "grad_norm": 0.02028301444848414, "learning_rate": 2.2300033015811733e-06, "loss": 0.0003, "step": 218340 }, { "epoch": 1.4364848062209299, "grad_norm": 0.020959492696704107, "learning_rate": 2.229525362636787e-06, "loss": 0.0007, "step": 218350 }, { "epoch": 1.4365505943961632, "grad_norm": 0.010938432827332572, "learning_rate": 2.229047460218716e-06, "loss": 0.0005, "step": 218360 }, { "epoch": 1.4366163825713967, "grad_norm": 0.06188935852843637, "learning_rate": 2.2285695943332606e-06, "loss": 0.0004, "step": 218370 }, { "epoch": 1.43668217074663, "grad_norm": 0.02822249115951382, "learning_rate": 2.2280917649867223e-06, "loss": 0.0004, "step": 218380 }, { "epoch": 1.4367479589218635, "grad_norm": 0.05464135385180403, "learning_rate": 2.2276139721854e-06, "loss": 0.0007, "step": 218390 }, { "epoch": 1.4368137470970967, "grad_norm": 0.040263921775725506, "learning_rate": 2.2271362159355924e-06, "loss": 0.0009, "step": 218400 }, { "epoch": 1.4368795352723303, "grad_norm": 0.03266756644975641, "learning_rate": 2.2266584962436e-06, "loss": 0.0007, "step": 218410 }, { "epoch": 1.4369453234475635, "grad_norm": 0.16440181163165232, "learning_rate": 2.2261808131157192e-06, "loss": 0.0011, "step": 218420 }, { "epoch": 1.4370111116227968, "grad_norm": 0.007511324029278644, "learning_rate": 2.225703166558249e-06, "loss": 0.0005, "step": 218430 }, { "epoch": 1.4370768997980303, "grad_norm": 0.057739237999267806, "learning_rate": 2.2252255565774866e-06, "loss": 0.001, "step": 218440 }, { "epoch": 1.4371426879732636, "grad_norm": 0.008555286936521153, "learning_rate": 2.224747983179727e-06, "loss": 0.0003, "step": 218450 }, { "epoch": 1.4372084761484971, "grad_norm": 0.010532055155951256, "learning_rate": 2.2242704463712704e-06, "loss": 0.0004, "step": 218460 }, { "epoch": 1.4372742643237304, "grad_norm": 0.09471562580514009, "learning_rate": 2.2237929461584102e-06, "loss": 0.0006, "step": 218470 }, { "epoch": 1.437340052498964, "grad_norm": 0.08747234390657448, "learning_rate": 2.223315482547443e-06, "loss": 0.0007, "step": 218480 }, { "epoch": 1.4374058406741972, "grad_norm": 0.054854260408515494, "learning_rate": 2.2228380555446623e-06, "loss": 0.001, "step": 218490 }, { "epoch": 1.4374716288494307, "grad_norm": 0.04636053526584632, "learning_rate": 2.222360665156364e-06, "loss": 0.0009, "step": 218500 }, { "epoch": 1.437537417024664, "grad_norm": 0.00802392398125685, "learning_rate": 2.2218833113888406e-06, "loss": 0.0004, "step": 218510 }, { "epoch": 1.4376032051998973, "grad_norm": 0.0013398232092838903, "learning_rate": 2.221405994248387e-06, "loss": 0.0008, "step": 218520 }, { "epoch": 1.4376689933751308, "grad_norm": 0.013489155942479836, "learning_rate": 2.2209287137412953e-06, "loss": 0.0007, "step": 218530 }, { "epoch": 1.4377347815503643, "grad_norm": 0.006914361887708468, "learning_rate": 2.2204514698738587e-06, "loss": 0.0006, "step": 218540 }, { "epoch": 1.4378005697255976, "grad_norm": 0.012062855614694412, "learning_rate": 2.219974262652369e-06, "loss": 0.0006, "step": 218550 }, { "epoch": 1.4378663579008308, "grad_norm": 0.0007581520553369646, "learning_rate": 2.219497092083117e-06, "loss": 0.0002, "step": 218560 }, { "epoch": 1.4379321460760643, "grad_norm": 0.0363205780842758, "learning_rate": 2.219019958172395e-06, "loss": 0.0006, "step": 218570 }, { "epoch": 1.4379979342512976, "grad_norm": 0.01410566717978293, "learning_rate": 2.218542860926491e-06, "loss": 0.0003, "step": 218580 }, { "epoch": 1.4380637224265311, "grad_norm": 0.10655244071542888, "learning_rate": 2.218065800351699e-06, "loss": 0.0007, "step": 218590 }, { "epoch": 1.4381295106017644, "grad_norm": 0.02609186210103586, "learning_rate": 2.217588776454307e-06, "loss": 0.0003, "step": 218600 }, { "epoch": 1.4381952987769977, "grad_norm": 0.005709892952655516, "learning_rate": 2.2171117892406037e-06, "loss": 0.0002, "step": 218610 }, { "epoch": 1.4382610869522312, "grad_norm": 0.46828938064636816, "learning_rate": 2.216634838716878e-06, "loss": 0.0035, "step": 218620 }, { "epoch": 1.4383268751274647, "grad_norm": 0.018073375353998115, "learning_rate": 2.2161579248894175e-06, "loss": 0.0004, "step": 218630 }, { "epoch": 1.438392663302698, "grad_norm": 0.02032589143169671, "learning_rate": 2.2156810477645114e-06, "loss": 0.0006, "step": 218640 }, { "epoch": 1.4384584514779313, "grad_norm": 0.007993650030451716, "learning_rate": 2.2152042073484457e-06, "loss": 0.0006, "step": 218650 }, { "epoch": 1.4385242396531648, "grad_norm": 0.05006598102506702, "learning_rate": 2.2147274036475077e-06, "loss": 0.0007, "step": 218660 }, { "epoch": 1.438590027828398, "grad_norm": 0.16023275625279035, "learning_rate": 2.2142506366679827e-06, "loss": 0.0009, "step": 218670 }, { "epoch": 1.4386558160036316, "grad_norm": 0.016715013406690606, "learning_rate": 2.2137739064161578e-06, "loss": 0.0002, "step": 218680 }, { "epoch": 1.4387216041788649, "grad_norm": 0.045994831075004454, "learning_rate": 2.213297212898317e-06, "loss": 0.0004, "step": 218690 }, { "epoch": 1.4387873923540981, "grad_norm": 0.011489708545126934, "learning_rate": 2.212820556120746e-06, "loss": 0.0003, "step": 218700 }, { "epoch": 1.4388531805293316, "grad_norm": 0.03421452772601158, "learning_rate": 2.212343936089727e-06, "loss": 0.0002, "step": 218710 }, { "epoch": 1.4389189687045651, "grad_norm": 0.006864410955868417, "learning_rate": 2.2118673528115475e-06, "loss": 0.0002, "step": 218720 }, { "epoch": 1.4389847568797984, "grad_norm": 0.005737172275248387, "learning_rate": 2.211390806292489e-06, "loss": 0.0011, "step": 218730 }, { "epoch": 1.4390505450550317, "grad_norm": 0.016512989295834526, "learning_rate": 2.210914296538834e-06, "loss": 0.0004, "step": 218740 }, { "epoch": 1.4391163332302652, "grad_norm": 0.031621412221650956, "learning_rate": 2.210437823556866e-06, "loss": 0.0005, "step": 218750 }, { "epoch": 1.4391821214054985, "grad_norm": 0.07077488131718446, "learning_rate": 2.209961387352865e-06, "loss": 0.001, "step": 218760 }, { "epoch": 1.439247909580732, "grad_norm": 0.039471981439052166, "learning_rate": 2.2094849879331136e-06, "loss": 0.0004, "step": 218770 }, { "epoch": 1.4393136977559653, "grad_norm": 0.08645377257544071, "learning_rate": 2.209008625303893e-06, "loss": 0.0007, "step": 218780 }, { "epoch": 1.4393794859311988, "grad_norm": 0.015101713502497853, "learning_rate": 2.2085322994714836e-06, "loss": 0.0004, "step": 218790 }, { "epoch": 1.439445274106432, "grad_norm": 0.008900137510251708, "learning_rate": 2.2080560104421643e-06, "loss": 0.0004, "step": 218800 }, { "epoch": 1.4395110622816656, "grad_norm": 0.002632049320017552, "learning_rate": 2.207579758222215e-06, "loss": 0.0003, "step": 218810 }, { "epoch": 1.4395768504568989, "grad_norm": 0.009020243250494895, "learning_rate": 2.2071035428179157e-06, "loss": 0.0003, "step": 218820 }, { "epoch": 1.4396426386321322, "grad_norm": 0.02206779410140354, "learning_rate": 2.2066273642355433e-06, "loss": 0.0008, "step": 218830 }, { "epoch": 1.4397084268073657, "grad_norm": 0.05723346252316226, "learning_rate": 2.206151222481375e-06, "loss": 0.0003, "step": 218840 }, { "epoch": 1.4397742149825992, "grad_norm": 0.026208537430891677, "learning_rate": 2.2056751175616927e-06, "loss": 0.0006, "step": 218850 }, { "epoch": 1.4398400031578324, "grad_norm": 0.008333644149186204, "learning_rate": 2.2051990494827697e-06, "loss": 0.0006, "step": 218860 }, { "epoch": 1.4399057913330657, "grad_norm": 0.06116636373823199, "learning_rate": 2.2047230182508832e-06, "loss": 0.0008, "step": 218870 }, { "epoch": 1.4399715795082992, "grad_norm": 0.007338195752936924, "learning_rate": 2.20424702387231e-06, "loss": 0.0002, "step": 218880 }, { "epoch": 1.4400373676835325, "grad_norm": 0.00017007158900270118, "learning_rate": 2.2037710663533252e-06, "loss": 0.0004, "step": 218890 }, { "epoch": 1.440103155858766, "grad_norm": 0.03750305009196308, "learning_rate": 2.203295145700204e-06, "loss": 0.0004, "step": 218900 }, { "epoch": 1.4401689440339993, "grad_norm": 0.045703071857353415, "learning_rate": 2.2028192619192207e-06, "loss": 0.0008, "step": 218910 }, { "epoch": 1.4402347322092326, "grad_norm": 0.03131102625980145, "learning_rate": 2.2023434150166502e-06, "loss": 0.0013, "step": 218920 }, { "epoch": 1.440300520384466, "grad_norm": 0.029907798594150943, "learning_rate": 2.2018676049987648e-06, "loss": 0.0012, "step": 218930 }, { "epoch": 1.4403663085596996, "grad_norm": 0.0351262467688946, "learning_rate": 2.2013918318718386e-06, "loss": 0.0005, "step": 218940 }, { "epoch": 1.4404320967349329, "grad_norm": 0.04406767726087627, "learning_rate": 2.200916095642144e-06, "loss": 0.0005, "step": 218950 }, { "epoch": 1.4404978849101662, "grad_norm": 0.0193029800904555, "learning_rate": 2.2004403963159536e-06, "loss": 0.0005, "step": 218960 }, { "epoch": 1.4405636730853997, "grad_norm": 0.0002989286522781566, "learning_rate": 2.1999647338995367e-06, "loss": 0.0006, "step": 218970 }, { "epoch": 1.440629461260633, "grad_norm": 0.024877895072461294, "learning_rate": 2.1994891083991683e-06, "loss": 0.0005, "step": 218980 }, { "epoch": 1.4406952494358665, "grad_norm": 0.040349862505157215, "learning_rate": 2.199013519821117e-06, "loss": 0.0005, "step": 218990 }, { "epoch": 1.4407610376110997, "grad_norm": 0.01627310936308662, "learning_rate": 2.198537968171654e-06, "loss": 0.0002, "step": 219000 }, { "epoch": 1.440826825786333, "grad_norm": 0.007644221155939924, "learning_rate": 2.1980624534570482e-06, "loss": 0.0005, "step": 219010 }, { "epoch": 1.4408926139615665, "grad_norm": 0.037533652303039164, "learning_rate": 2.197586975683569e-06, "loss": 0.0017, "step": 219020 }, { "epoch": 1.4409584021368, "grad_norm": 0.019319414088439006, "learning_rate": 2.197111534857485e-06, "loss": 0.0003, "step": 219030 }, { "epoch": 1.4410241903120333, "grad_norm": 0.029286948897696008, "learning_rate": 2.196636130985065e-06, "loss": 0.0003, "step": 219040 }, { "epoch": 1.4410899784872666, "grad_norm": 0.010328050448636383, "learning_rate": 2.196160764072576e-06, "loss": 0.0003, "step": 219050 }, { "epoch": 1.4411557666625001, "grad_norm": 0.037561074042575616, "learning_rate": 2.195685434126286e-06, "loss": 0.0005, "step": 219060 }, { "epoch": 1.4412215548377334, "grad_norm": 0.02910363226621223, "learning_rate": 2.1952101411524617e-06, "loss": 0.0004, "step": 219070 }, { "epoch": 1.441287343012967, "grad_norm": 0.03289208843604597, "learning_rate": 2.194734885157369e-06, "loss": 0.0004, "step": 219080 }, { "epoch": 1.4413531311882002, "grad_norm": 0.08448082006437112, "learning_rate": 2.194259666147275e-06, "loss": 0.0006, "step": 219090 }, { "epoch": 1.4414189193634337, "grad_norm": 0.04583281018828221, "learning_rate": 2.1937844841284412e-06, "loss": 0.0006, "step": 219100 }, { "epoch": 1.441484707538667, "grad_norm": 0.012923910099275671, "learning_rate": 2.1933093391071373e-06, "loss": 0.0013, "step": 219110 }, { "epoch": 1.4415504957139005, "grad_norm": 0.009097273294253298, "learning_rate": 2.192834231089626e-06, "loss": 0.0006, "step": 219120 }, { "epoch": 1.4416162838891338, "grad_norm": 0.025051936490267806, "learning_rate": 2.1923591600821716e-06, "loss": 0.0004, "step": 219130 }, { "epoch": 1.441682072064367, "grad_norm": 0.05961803780152436, "learning_rate": 2.1918841260910357e-06, "loss": 0.0008, "step": 219140 }, { "epoch": 1.4417478602396006, "grad_norm": 0.021309786268515853, "learning_rate": 2.1914091291224834e-06, "loss": 0.0028, "step": 219150 }, { "epoch": 1.4418136484148338, "grad_norm": 0.03759908401399456, "learning_rate": 2.190934169182775e-06, "loss": 0.0007, "step": 219160 }, { "epoch": 1.4418794365900673, "grad_norm": 0.015461626292312555, "learning_rate": 2.1904592462781743e-06, "loss": 0.0004, "step": 219170 }, { "epoch": 1.4419452247653006, "grad_norm": 0.025801883053662673, "learning_rate": 2.1899843604149413e-06, "loss": 0.0009, "step": 219180 }, { "epoch": 1.4420110129405341, "grad_norm": 0.046984644905995875, "learning_rate": 2.189509511599338e-06, "loss": 0.0015, "step": 219190 }, { "epoch": 1.4420768011157674, "grad_norm": 0.06148795842764738, "learning_rate": 2.1890346998376244e-06, "loss": 0.0003, "step": 219200 }, { "epoch": 1.442142589291001, "grad_norm": 0.0031922906415998444, "learning_rate": 2.1885599251360613e-06, "loss": 0.0005, "step": 219210 }, { "epoch": 1.4422083774662342, "grad_norm": 0.00029011187499209133, "learning_rate": 2.188085187500906e-06, "loss": 0.0007, "step": 219220 }, { "epoch": 1.4422741656414675, "grad_norm": 0.004878285546764527, "learning_rate": 2.1876104869384186e-06, "loss": 0.0004, "step": 219230 }, { "epoch": 1.442339953816701, "grad_norm": 0.011017193996026466, "learning_rate": 2.187135823454859e-06, "loss": 0.0005, "step": 219240 }, { "epoch": 1.4424057419919345, "grad_norm": 0.018405237835784048, "learning_rate": 2.1866611970564843e-06, "loss": 0.0008, "step": 219250 }, { "epoch": 1.4424715301671678, "grad_norm": 0.012352720997929818, "learning_rate": 2.1861866077495523e-06, "loss": 0.0007, "step": 219260 }, { "epoch": 1.442537318342401, "grad_norm": 9.140585993325761e-05, "learning_rate": 2.1857120555403192e-06, "loss": 0.0003, "step": 219270 }, { "epoch": 1.4426031065176346, "grad_norm": 0.0022524752550969807, "learning_rate": 2.185237540435043e-06, "loss": 0.0019, "step": 219280 }, { "epoch": 1.4426688946928679, "grad_norm": 0.033164606145535305, "learning_rate": 2.184763062439978e-06, "loss": 0.0004, "step": 219290 }, { "epoch": 1.4427346828681014, "grad_norm": 0.012904589278699913, "learning_rate": 2.1842886215613805e-06, "loss": 0.0005, "step": 219300 }, { "epoch": 1.4428004710433346, "grad_norm": 0.09008940076342276, "learning_rate": 2.1838142178055063e-06, "loss": 0.0007, "step": 219310 }, { "epoch": 1.442866259218568, "grad_norm": 0.03403697035621522, "learning_rate": 2.1833398511786093e-06, "loss": 0.0006, "step": 219320 }, { "epoch": 1.4429320473938014, "grad_norm": 0.029984869941821195, "learning_rate": 2.1828655216869434e-06, "loss": 0.0005, "step": 219330 }, { "epoch": 1.442997835569035, "grad_norm": 0.05639192065548509, "learning_rate": 2.182391229336763e-06, "loss": 0.0009, "step": 219340 }, { "epoch": 1.4430636237442682, "grad_norm": 0.009216659752076575, "learning_rate": 2.1819169741343206e-06, "loss": 0.0006, "step": 219350 }, { "epoch": 1.4431294119195015, "grad_norm": 0.056810236299606666, "learning_rate": 2.1814427560858668e-06, "loss": 0.001, "step": 219360 }, { "epoch": 1.443195200094735, "grad_norm": 0.010485064276737332, "learning_rate": 2.1809685751976585e-06, "loss": 0.0004, "step": 219370 }, { "epoch": 1.4432609882699683, "grad_norm": 0.007624924556122258, "learning_rate": 2.180494431475944e-06, "loss": 0.0002, "step": 219380 }, { "epoch": 1.4433267764452018, "grad_norm": 0.02007630621148507, "learning_rate": 2.1800203249269763e-06, "loss": 0.0007, "step": 219390 }, { "epoch": 1.443392564620435, "grad_norm": 0.05473480151690998, "learning_rate": 2.1795462555570044e-06, "loss": 0.0007, "step": 219400 }, { "epoch": 1.4434583527956686, "grad_norm": 0.0033766589542462097, "learning_rate": 2.17907222337228e-06, "loss": 0.0002, "step": 219410 }, { "epoch": 1.4435241409709019, "grad_norm": 0.0027382197066533978, "learning_rate": 2.178598228379051e-06, "loss": 0.0001, "step": 219420 }, { "epoch": 1.4435899291461354, "grad_norm": 0.03333403511726329, "learning_rate": 2.1781242705835685e-06, "loss": 0.0015, "step": 219430 }, { "epoch": 1.4436557173213687, "grad_norm": 0.025614199419860352, "learning_rate": 2.17765034999208e-06, "loss": 0.0005, "step": 219440 }, { "epoch": 1.443721505496602, "grad_norm": 0.00010155895652973725, "learning_rate": 2.177176466610834e-06, "loss": 0.0003, "step": 219450 }, { "epoch": 1.4437872936718354, "grad_norm": 0.07285026776198382, "learning_rate": 2.176702620446079e-06, "loss": 0.0007, "step": 219460 }, { "epoch": 1.4438530818470687, "grad_norm": 0.005467268282302332, "learning_rate": 2.176228811504061e-06, "loss": 0.0005, "step": 219470 }, { "epoch": 1.4439188700223022, "grad_norm": 0.04868309264896487, "learning_rate": 2.175755039791027e-06, "loss": 0.0007, "step": 219480 }, { "epoch": 1.4439846581975355, "grad_norm": 0.030855025498893796, "learning_rate": 2.1752813053132226e-06, "loss": 0.0003, "step": 219490 }, { "epoch": 1.444050446372769, "grad_norm": 0.022786345075316274, "learning_rate": 2.1748076080768958e-06, "loss": 0.0005, "step": 219500 }, { "epoch": 1.4441162345480023, "grad_norm": 0.0006558332506827132, "learning_rate": 2.174333948088291e-06, "loss": 0.0007, "step": 219510 }, { "epoch": 1.4441820227232358, "grad_norm": 0.0006508691987668423, "learning_rate": 2.173860325353653e-06, "loss": 0.0003, "step": 219520 }, { "epoch": 1.444247810898469, "grad_norm": 0.024832946273719767, "learning_rate": 2.1733867398792254e-06, "loss": 0.0007, "step": 219530 }, { "epoch": 1.4443135990737024, "grad_norm": 0.02495373118322626, "learning_rate": 2.172913191671252e-06, "loss": 0.0004, "step": 219540 }, { "epoch": 1.4443793872489359, "grad_norm": 0.005802369113131023, "learning_rate": 2.1724396807359773e-06, "loss": 0.0003, "step": 219550 }, { "epoch": 1.4444451754241694, "grad_norm": 0.017777170240366427, "learning_rate": 2.1719662070796427e-06, "loss": 0.0008, "step": 219560 }, { "epoch": 1.4445109635994027, "grad_norm": 0.04298173995784723, "learning_rate": 2.1714927707084915e-06, "loss": 0.0005, "step": 219570 }, { "epoch": 1.444576751774636, "grad_norm": 0.02510799433459625, "learning_rate": 2.1710193716287654e-06, "loss": 0.0003, "step": 219580 }, { "epoch": 1.4446425399498695, "grad_norm": 0.216272546016804, "learning_rate": 2.1705460098467057e-06, "loss": 0.0009, "step": 219590 }, { "epoch": 1.4447083281251027, "grad_norm": 0.026079563147617522, "learning_rate": 2.1700726853685526e-06, "loss": 0.0005, "step": 219600 }, { "epoch": 1.4447741163003363, "grad_norm": 0.07018235149950162, "learning_rate": 2.1695993982005474e-06, "loss": 0.0004, "step": 219610 }, { "epoch": 1.4448399044755695, "grad_norm": 0.003577361898418718, "learning_rate": 2.1691261483489286e-06, "loss": 0.0002, "step": 219620 }, { "epoch": 1.4449056926508028, "grad_norm": 0.013554635349087598, "learning_rate": 2.1686529358199378e-06, "loss": 0.001, "step": 219630 }, { "epoch": 1.4449714808260363, "grad_norm": 0.0059650513004905455, "learning_rate": 2.1681797606198122e-06, "loss": 0.0006, "step": 219640 }, { "epoch": 1.4450372690012698, "grad_norm": 0.011941310406654552, "learning_rate": 2.1677066227547916e-06, "loss": 0.0014, "step": 219650 }, { "epoch": 1.4451030571765031, "grad_norm": 0.06957186716960845, "learning_rate": 2.1672335222311125e-06, "loss": 0.0005, "step": 219660 }, { "epoch": 1.4451688453517364, "grad_norm": 0.04198782966665407, "learning_rate": 2.166760459055013e-06, "loss": 0.0003, "step": 219670 }, { "epoch": 1.44523463352697, "grad_norm": 0.04339803575147021, "learning_rate": 2.16628743323273e-06, "loss": 0.0004, "step": 219680 }, { "epoch": 1.4453004217022032, "grad_norm": 0.03506284489171909, "learning_rate": 2.1658144447704997e-06, "loss": 0.0005, "step": 219690 }, { "epoch": 1.4453662098774367, "grad_norm": 0.020798250665242116, "learning_rate": 2.1653414936745583e-06, "loss": 0.0007, "step": 219700 }, { "epoch": 1.44543199805267, "grad_norm": 0.020800628299758116, "learning_rate": 2.1648685799511408e-06, "loss": 0.0004, "step": 219710 }, { "epoch": 1.4454977862279033, "grad_norm": 0.014128335243613982, "learning_rate": 2.164395703606483e-06, "loss": 0.0008, "step": 219720 }, { "epoch": 1.4455635744031368, "grad_norm": 0.028167009877215172, "learning_rate": 2.1639228646468186e-06, "loss": 0.0002, "step": 219730 }, { "epoch": 1.4456293625783703, "grad_norm": 0.06434183306527848, "learning_rate": 2.163450063078382e-06, "loss": 0.0006, "step": 219740 }, { "epoch": 1.4456951507536036, "grad_norm": 0.2534335268347162, "learning_rate": 2.1629772989074043e-06, "loss": 0.003, "step": 219750 }, { "epoch": 1.4457609389288368, "grad_norm": 0.0221487889769197, "learning_rate": 2.1625045721401227e-06, "loss": 0.0005, "step": 219760 }, { "epoch": 1.4458267271040703, "grad_norm": 0.013550285640465488, "learning_rate": 2.162031882782768e-06, "loss": 0.0009, "step": 219770 }, { "epoch": 1.4458925152793036, "grad_norm": 0.0009358621775969265, "learning_rate": 2.1615592308415716e-06, "loss": 0.0004, "step": 219780 }, { "epoch": 1.4459583034545371, "grad_norm": 0.028480365101176627, "learning_rate": 2.161086616322766e-06, "loss": 0.0004, "step": 219790 }, { "epoch": 1.4460240916297704, "grad_norm": 0.029499951240260426, "learning_rate": 2.1606140392325803e-06, "loss": 0.0003, "step": 219800 }, { "epoch": 1.446089879805004, "grad_norm": 0.04439251710011079, "learning_rate": 2.1601414995772468e-06, "loss": 0.0007, "step": 219810 }, { "epoch": 1.4461556679802372, "grad_norm": 0.021877870161218234, "learning_rate": 2.1596689973629955e-06, "loss": 0.0008, "step": 219820 }, { "epoch": 1.4462214561554707, "grad_norm": 0.06505071753119318, "learning_rate": 2.1591965325960547e-06, "loss": 0.0003, "step": 219830 }, { "epoch": 1.446287244330704, "grad_norm": 0.056766274700918815, "learning_rate": 2.158724105282654e-06, "loss": 0.0007, "step": 219840 }, { "epoch": 1.4463530325059373, "grad_norm": 0.016536975488268137, "learning_rate": 2.1582517154290226e-06, "loss": 0.0005, "step": 219850 }, { "epoch": 1.4464188206811708, "grad_norm": 1.1044922579561984, "learning_rate": 2.157779363041388e-06, "loss": 0.0006, "step": 219860 }, { "epoch": 1.4464846088564043, "grad_norm": 0.0004161212368166803, "learning_rate": 2.1573070481259778e-06, "loss": 0.0008, "step": 219870 }, { "epoch": 1.4465503970316376, "grad_norm": 0.14240915263019688, "learning_rate": 2.156834770689017e-06, "loss": 0.0008, "step": 219880 }, { "epoch": 1.4466161852068709, "grad_norm": 0.039604883694643975, "learning_rate": 2.1563625307367357e-06, "loss": 0.0009, "step": 219890 }, { "epoch": 1.4466819733821044, "grad_norm": 0.009237731440307213, "learning_rate": 2.155890328275359e-06, "loss": 0.0003, "step": 219900 }, { "epoch": 1.4467477615573376, "grad_norm": 0.13755661827114865, "learning_rate": 2.155418163311112e-06, "loss": 0.0014, "step": 219910 }, { "epoch": 1.4468135497325711, "grad_norm": 0.0077184264133269035, "learning_rate": 2.15494603585022e-06, "loss": 0.0003, "step": 219920 }, { "epoch": 1.4468793379078044, "grad_norm": 0.03970586365337495, "learning_rate": 2.154473945898907e-06, "loss": 0.0005, "step": 219930 }, { "epoch": 1.4469451260830377, "grad_norm": 0.0017766384732343825, "learning_rate": 2.1540018934633978e-06, "loss": 0.0005, "step": 219940 }, { "epoch": 1.4470109142582712, "grad_norm": 0.02536918950658592, "learning_rate": 2.1535298785499154e-06, "loss": 0.0007, "step": 219950 }, { "epoch": 1.4470767024335047, "grad_norm": 0.07949367718024716, "learning_rate": 2.153057901164683e-06, "loss": 0.0008, "step": 219960 }, { "epoch": 1.447142490608738, "grad_norm": 0.01806465337203853, "learning_rate": 2.1525859613139237e-06, "loss": 0.0007, "step": 219970 }, { "epoch": 1.4472082787839713, "grad_norm": 0.05606617275368211, "learning_rate": 2.152114059003859e-06, "loss": 0.0003, "step": 219980 }, { "epoch": 1.4472740669592048, "grad_norm": 0.025201387779723777, "learning_rate": 2.1516421942407113e-06, "loss": 0.0009, "step": 219990 }, { "epoch": 1.447339855134438, "grad_norm": 0.03204854265492152, "learning_rate": 2.151170367030701e-06, "loss": 0.0006, "step": 220000 }, { "epoch": 1.4474056433096716, "grad_norm": 0.0074992959848098, "learning_rate": 2.150698577380047e-06, "loss": 0.0002, "step": 220010 }, { "epoch": 1.4474714314849049, "grad_norm": 0.0035296563971224924, "learning_rate": 2.1502268252949737e-06, "loss": 0.0005, "step": 220020 }, { "epoch": 1.4475372196601382, "grad_norm": 0.004965597228372284, "learning_rate": 2.1497551107816985e-06, "loss": 0.0003, "step": 220030 }, { "epoch": 1.4476030078353717, "grad_norm": 0.02273201656657477, "learning_rate": 2.14928343384644e-06, "loss": 0.0006, "step": 220040 }, { "epoch": 1.4476687960106052, "grad_norm": 0.0004937440561661361, "learning_rate": 2.148811794495417e-06, "loss": 0.0002, "step": 220050 }, { "epoch": 1.4477345841858384, "grad_norm": 0.010207205492466181, "learning_rate": 2.148340192734849e-06, "loss": 0.0004, "step": 220060 }, { "epoch": 1.4478003723610717, "grad_norm": 0.08291654808369674, "learning_rate": 2.1478686285709526e-06, "loss": 0.001, "step": 220070 }, { "epoch": 1.4478661605363052, "grad_norm": 0.011923658468832716, "learning_rate": 2.147397102009945e-06, "loss": 0.0003, "step": 220080 }, { "epoch": 1.4479319487115385, "grad_norm": 0.012284661106300127, "learning_rate": 2.146925613058043e-06, "loss": 0.0004, "step": 220090 }, { "epoch": 1.447997736886772, "grad_norm": 0.013138756714520836, "learning_rate": 2.146454161721462e-06, "loss": 0.0004, "step": 220100 }, { "epoch": 1.4480635250620053, "grad_norm": 0.01026843705887522, "learning_rate": 2.145982748006419e-06, "loss": 0.0006, "step": 220110 }, { "epoch": 1.4481293132372388, "grad_norm": 0.00031851629091241674, "learning_rate": 2.1455113719191285e-06, "loss": 0.0005, "step": 220120 }, { "epoch": 1.448195101412472, "grad_norm": 0.034244989048743325, "learning_rate": 2.1450400334658055e-06, "loss": 0.0009, "step": 220130 }, { "epoch": 1.4482608895877056, "grad_norm": 0.0645114701527163, "learning_rate": 2.1445687326526637e-06, "loss": 0.0006, "step": 220140 }, { "epoch": 1.4483266777629389, "grad_norm": 0.0476810633234249, "learning_rate": 2.144097469485917e-06, "loss": 0.0005, "step": 220150 }, { "epoch": 1.4483924659381722, "grad_norm": 0.07230172181564572, "learning_rate": 2.1436262439717786e-06, "loss": 0.0005, "step": 220160 }, { "epoch": 1.4484582541134057, "grad_norm": 0.004279115045513487, "learning_rate": 2.1431550561164616e-06, "loss": 0.0003, "step": 220170 }, { "epoch": 1.448524042288639, "grad_norm": 0.008159156951444323, "learning_rate": 2.1426839059261774e-06, "loss": 0.0002, "step": 220180 }, { "epoch": 1.4485898304638725, "grad_norm": 0.026466692298949174, "learning_rate": 2.1422127934071384e-06, "loss": 0.0006, "step": 220190 }, { "epoch": 1.4486556186391057, "grad_norm": 0.020193500246355755, "learning_rate": 2.1417417185655555e-06, "loss": 0.0004, "step": 220200 }, { "epoch": 1.4487214068143393, "grad_norm": 0.09574032756878656, "learning_rate": 2.1412706814076397e-06, "loss": 0.0008, "step": 220210 }, { "epoch": 1.4487871949895725, "grad_norm": 0.0601731174973501, "learning_rate": 2.140799681939601e-06, "loss": 0.0012, "step": 220220 }, { "epoch": 1.448852983164806, "grad_norm": 0.05433124144914009, "learning_rate": 2.1403287201676486e-06, "loss": 0.0024, "step": 220230 }, { "epoch": 1.4489187713400393, "grad_norm": 0.003364188011241611, "learning_rate": 2.139857796097991e-06, "loss": 0.0003, "step": 220240 }, { "epoch": 1.4489845595152726, "grad_norm": 0.03408838101671995, "learning_rate": 2.1393869097368395e-06, "loss": 0.0013, "step": 220250 }, { "epoch": 1.4490503476905061, "grad_norm": 0.014589688080250374, "learning_rate": 2.1389160610904014e-06, "loss": 0.0003, "step": 220260 }, { "epoch": 1.4491161358657396, "grad_norm": 0.03372840138996295, "learning_rate": 2.138445250164884e-06, "loss": 0.001, "step": 220270 }, { "epoch": 1.449181924040973, "grad_norm": 0.03028618495688716, "learning_rate": 2.1379744769664945e-06, "loss": 0.0005, "step": 220280 }, { "epoch": 1.4492477122162062, "grad_norm": 0.012592761998256234, "learning_rate": 2.13750374150144e-06, "loss": 0.0011, "step": 220290 }, { "epoch": 1.4493135003914397, "grad_norm": 0.06866499134025202, "learning_rate": 2.1370330437759264e-06, "loss": 0.0006, "step": 220300 }, { "epoch": 1.449379288566673, "grad_norm": 0.04050958234446366, "learning_rate": 2.136562383796159e-06, "loss": 0.0005, "step": 220310 }, { "epoch": 1.4494450767419065, "grad_norm": 0.011608150540633867, "learning_rate": 2.1360917615683437e-06, "loss": 0.0006, "step": 220320 }, { "epoch": 1.4495108649171398, "grad_norm": 0.012550583987369374, "learning_rate": 2.1356211770986853e-06, "loss": 0.0004, "step": 220330 }, { "epoch": 1.449576653092373, "grad_norm": 0.03313898898368761, "learning_rate": 2.1351506303933877e-06, "loss": 0.0006, "step": 220340 }, { "epoch": 1.4496424412676066, "grad_norm": 0.0031005659624417964, "learning_rate": 2.134680121458655e-06, "loss": 0.0004, "step": 220350 }, { "epoch": 1.44970822944284, "grad_norm": 0.03502520471832647, "learning_rate": 2.13420965030069e-06, "loss": 0.0003, "step": 220360 }, { "epoch": 1.4497740176180733, "grad_norm": 0.056959892982192124, "learning_rate": 2.133739216925694e-06, "loss": 0.0006, "step": 220370 }, { "epoch": 1.4498398057933066, "grad_norm": 0.009181758159355189, "learning_rate": 2.1332688213398727e-06, "loss": 0.0005, "step": 220380 }, { "epoch": 1.4499055939685401, "grad_norm": 0.03078320959739136, "learning_rate": 2.132798463549426e-06, "loss": 0.0013, "step": 220390 }, { "epoch": 1.4499713821437734, "grad_norm": 0.0059386076669488705, "learning_rate": 2.132328143560556e-06, "loss": 0.0004, "step": 220400 }, { "epoch": 1.450037170319007, "grad_norm": 0.1312057663956665, "learning_rate": 2.1318578613794617e-06, "loss": 0.0011, "step": 220410 }, { "epoch": 1.4501029584942402, "grad_norm": 0.0028035021290642027, "learning_rate": 2.131387617012345e-06, "loss": 0.0005, "step": 220420 }, { "epoch": 1.4501687466694737, "grad_norm": 0.09758335797358432, "learning_rate": 2.1309174104654053e-06, "loss": 0.0005, "step": 220430 }, { "epoch": 1.450234534844707, "grad_norm": 0.0027545710942032536, "learning_rate": 2.1304472417448407e-06, "loss": 0.0001, "step": 220440 }, { "epoch": 1.4503003230199405, "grad_norm": 0.011400337902890054, "learning_rate": 2.129977110856852e-06, "loss": 0.0004, "step": 220450 }, { "epoch": 1.4503661111951738, "grad_norm": 0.03376238580280815, "learning_rate": 2.1295070178076354e-06, "loss": 0.0006, "step": 220460 }, { "epoch": 1.450431899370407, "grad_norm": 0.010903956528728102, "learning_rate": 2.1290369626033906e-06, "loss": 0.0005, "step": 220470 }, { "epoch": 1.4504976875456406, "grad_norm": 0.01938371722688796, "learning_rate": 2.128566945250313e-06, "loss": 0.0005, "step": 220480 }, { "epoch": 1.4505634757208739, "grad_norm": 0.007246153998633012, "learning_rate": 2.1280969657546012e-06, "loss": 0.0007, "step": 220490 }, { "epoch": 1.4506292638961074, "grad_norm": 0.03260033554987053, "learning_rate": 2.1276270241224483e-06, "loss": 0.0002, "step": 220500 }, { "epoch": 1.4506950520713406, "grad_norm": 0.009164045217695623, "learning_rate": 2.1271571203600543e-06, "loss": 0.0009, "step": 220510 }, { "epoch": 1.4507608402465741, "grad_norm": 0.02343368849997088, "learning_rate": 2.126687254473612e-06, "loss": 0.0011, "step": 220520 }, { "epoch": 1.4508266284218074, "grad_norm": 0.01230037582679356, "learning_rate": 2.1262174264693176e-06, "loss": 0.0007, "step": 220530 }, { "epoch": 1.450892416597041, "grad_norm": 0.0031395705132488878, "learning_rate": 2.1257476363533638e-06, "loss": 0.0003, "step": 220540 }, { "epoch": 1.4509582047722742, "grad_norm": 0.03895836764885371, "learning_rate": 2.1252778841319453e-06, "loss": 0.0003, "step": 220550 }, { "epoch": 1.4510239929475075, "grad_norm": 0.06013326901589516, "learning_rate": 2.124808169811255e-06, "loss": 0.0006, "step": 220560 }, { "epoch": 1.451089781122741, "grad_norm": 0.014172174510557498, "learning_rate": 2.124338493397486e-06, "loss": 0.0004, "step": 220570 }, { "epoch": 1.4511555692979745, "grad_norm": 0.03614438192681496, "learning_rate": 2.12386885489683e-06, "loss": 0.0003, "step": 220580 }, { "epoch": 1.4512213574732078, "grad_norm": 0.04979484810264426, "learning_rate": 2.12339925431548e-06, "loss": 0.0007, "step": 220590 }, { "epoch": 1.451287145648441, "grad_norm": 0.003102510147341654, "learning_rate": 2.1229296916596255e-06, "loss": 0.0003, "step": 220600 }, { "epoch": 1.4513529338236746, "grad_norm": 0.010960217126071535, "learning_rate": 2.1224601669354582e-06, "loss": 0.0006, "step": 220610 }, { "epoch": 1.4514187219989079, "grad_norm": 0.003394215988974934, "learning_rate": 2.121990680149169e-06, "loss": 0.0003, "step": 220620 }, { "epoch": 1.4514845101741414, "grad_norm": 0.03297757479473296, "learning_rate": 2.121521231306945e-06, "loss": 0.001, "step": 220630 }, { "epoch": 1.4515502983493747, "grad_norm": 0.0006394101985988913, "learning_rate": 2.1210518204149793e-06, "loss": 0.0004, "step": 220640 }, { "epoch": 1.451616086524608, "grad_norm": 0.014357516578614152, "learning_rate": 2.120582447479459e-06, "loss": 0.0007, "step": 220650 }, { "epoch": 1.4516818746998414, "grad_norm": 0.008975140559074465, "learning_rate": 2.1201131125065722e-06, "loss": 0.0004, "step": 220660 }, { "epoch": 1.451747662875075, "grad_norm": 0.022462248188580287, "learning_rate": 2.1196438155025068e-06, "loss": 0.0006, "step": 220670 }, { "epoch": 1.4518134510503082, "grad_norm": 0.0897483168511062, "learning_rate": 2.11917455647345e-06, "loss": 0.0014, "step": 220680 }, { "epoch": 1.4518792392255415, "grad_norm": 0.010752977719712198, "learning_rate": 2.118705335425589e-06, "loss": 0.0005, "step": 220690 }, { "epoch": 1.451945027400775, "grad_norm": 0.033893959917951307, "learning_rate": 2.1182361523651095e-06, "loss": 0.0008, "step": 220700 }, { "epoch": 1.4520108155760083, "grad_norm": 0.03723888438898382, "learning_rate": 2.1177670072981972e-06, "loss": 0.0006, "step": 220710 }, { "epoch": 1.4520766037512418, "grad_norm": 0.010147856068479958, "learning_rate": 2.117297900231038e-06, "loss": 0.0006, "step": 220720 }, { "epoch": 1.452142391926475, "grad_norm": 0.05675605642563829, "learning_rate": 2.116828831169816e-06, "loss": 0.0005, "step": 220730 }, { "epoch": 1.4522081801017086, "grad_norm": 0.0908615339192464, "learning_rate": 2.116359800120716e-06, "loss": 0.0011, "step": 220740 }, { "epoch": 1.4522739682769419, "grad_norm": 0.02936879256729579, "learning_rate": 2.1158908070899216e-06, "loss": 0.0004, "step": 220750 }, { "epoch": 1.4523397564521754, "grad_norm": 0.046636331024876375, "learning_rate": 2.1154218520836146e-06, "loss": 0.0005, "step": 220760 }, { "epoch": 1.4524055446274087, "grad_norm": 0.17802087363202307, "learning_rate": 2.11495293510798e-06, "loss": 0.001, "step": 220770 }, { "epoch": 1.452471332802642, "grad_norm": 0.007426006012558964, "learning_rate": 2.1144840561692e-06, "loss": 0.0001, "step": 220780 }, { "epoch": 1.4525371209778755, "grad_norm": 0.04109949110781802, "learning_rate": 2.1140152152734555e-06, "loss": 0.0002, "step": 220790 }, { "epoch": 1.4526029091531087, "grad_norm": 0.010735788121275161, "learning_rate": 2.113546412426928e-06, "loss": 0.0009, "step": 220800 }, { "epoch": 1.4526686973283423, "grad_norm": 0.0051368791683226896, "learning_rate": 2.113077647635798e-06, "loss": 0.0004, "step": 220810 }, { "epoch": 1.4527344855035755, "grad_norm": 0.07001345067314127, "learning_rate": 2.1126089209062457e-06, "loss": 0.0008, "step": 220820 }, { "epoch": 1.452800273678809, "grad_norm": 0.04501718266144484, "learning_rate": 2.112140232244451e-06, "loss": 0.0005, "step": 220830 }, { "epoch": 1.4528660618540423, "grad_norm": 0.016192375086521767, "learning_rate": 2.1116715816565934e-06, "loss": 0.0006, "step": 220840 }, { "epoch": 1.4529318500292758, "grad_norm": 0.01003139074502564, "learning_rate": 2.1112029691488515e-06, "loss": 0.0005, "step": 220850 }, { "epoch": 1.4529976382045091, "grad_norm": 0.028372101044343335, "learning_rate": 2.110734394727404e-06, "loss": 0.0003, "step": 220860 }, { "epoch": 1.4530634263797424, "grad_norm": 0.002974002851851301, "learning_rate": 2.1102658583984277e-06, "loss": 0.0006, "step": 220870 }, { "epoch": 1.453129214554976, "grad_norm": 0.026530156308070364, "learning_rate": 2.1097973601681003e-06, "loss": 0.0003, "step": 220880 }, { "epoch": 1.4531950027302094, "grad_norm": 0.005825050186285768, "learning_rate": 2.109328900042597e-06, "loss": 0.0005, "step": 220890 }, { "epoch": 1.4532607909054427, "grad_norm": 0.07780611561063905, "learning_rate": 2.1088604780280976e-06, "loss": 0.0007, "step": 220900 }, { "epoch": 1.453326579080676, "grad_norm": 0.015096328429419398, "learning_rate": 2.108392094130775e-06, "loss": 0.0006, "step": 220910 }, { "epoch": 1.4533923672559095, "grad_norm": 0.017376757834011925, "learning_rate": 2.107923748356806e-06, "loss": 0.0003, "step": 220920 }, { "epoch": 1.4534581554311428, "grad_norm": 0.011797031380550372, "learning_rate": 2.1074554407123643e-06, "loss": 0.0005, "step": 220930 }, { "epoch": 1.4535239436063763, "grad_norm": 0.006638751410000386, "learning_rate": 2.1069871712036242e-06, "loss": 0.0003, "step": 220940 }, { "epoch": 1.4535897317816096, "grad_norm": 0.00031674181536369144, "learning_rate": 2.1065189398367604e-06, "loss": 0.0003, "step": 220950 }, { "epoch": 1.4536555199568428, "grad_norm": 0.034600547256367555, "learning_rate": 2.1060507466179447e-06, "loss": 0.0006, "step": 220960 }, { "epoch": 1.4537213081320763, "grad_norm": 0.006704930694106962, "learning_rate": 2.1055825915533512e-06, "loss": 0.0007, "step": 220970 }, { "epoch": 1.4537870963073098, "grad_norm": 0.01805438046014017, "learning_rate": 2.105114474649151e-06, "loss": 0.0014, "step": 220980 }, { "epoch": 1.4538528844825431, "grad_norm": 0.033131849465206205, "learning_rate": 2.1046463959115167e-06, "loss": 0.0007, "step": 220990 }, { "epoch": 1.4539186726577764, "grad_norm": 0.013755793764982345, "learning_rate": 2.1041783553466182e-06, "loss": 0.0008, "step": 221000 }, { "epoch": 1.45398446083301, "grad_norm": 0.01338117000037443, "learning_rate": 2.103710352960628e-06, "loss": 0.0005, "step": 221010 }, { "epoch": 1.4540502490082432, "grad_norm": 0.029821759268358776, "learning_rate": 2.1032423887597132e-06, "loss": 0.0008, "step": 221020 }, { "epoch": 1.4541160371834767, "grad_norm": 0.005840186166180775, "learning_rate": 2.1027744627500476e-06, "loss": 0.0004, "step": 221030 }, { "epoch": 1.45418182535871, "grad_norm": 0.04765749453268152, "learning_rate": 2.102306574937798e-06, "loss": 0.0012, "step": 221040 }, { "epoch": 1.4542476135339433, "grad_norm": 0.04817297821686293, "learning_rate": 2.101838725329134e-06, "loss": 0.0007, "step": 221050 }, { "epoch": 1.4543134017091768, "grad_norm": 0.05346297224022532, "learning_rate": 2.1013709139302236e-06, "loss": 0.0008, "step": 221060 }, { "epoch": 1.4543791898844103, "grad_norm": 0.022285641938189927, "learning_rate": 2.1009031407472343e-06, "loss": 0.0005, "step": 221070 }, { "epoch": 1.4544449780596436, "grad_norm": 0.01029960817137444, "learning_rate": 2.100435405786333e-06, "loss": 0.0003, "step": 221080 }, { "epoch": 1.4545107662348769, "grad_norm": 0.009985603762300368, "learning_rate": 2.099967709053686e-06, "loss": 0.0009, "step": 221090 }, { "epoch": 1.4545765544101104, "grad_norm": 0.021888497860450055, "learning_rate": 2.0995000505554613e-06, "loss": 0.0011, "step": 221100 }, { "epoch": 1.4546423425853436, "grad_norm": 0.01980472584518557, "learning_rate": 2.0990324302978234e-06, "loss": 0.0012, "step": 221110 }, { "epoch": 1.4547081307605771, "grad_norm": 0.008114419626720541, "learning_rate": 2.0985648482869363e-06, "loss": 0.0003, "step": 221120 }, { "epoch": 1.4547739189358104, "grad_norm": 0.03346671444553167, "learning_rate": 2.0980973045289664e-06, "loss": 0.0004, "step": 221130 }, { "epoch": 1.454839707111044, "grad_norm": 0.018994631211102623, "learning_rate": 2.0976297990300775e-06, "loss": 0.0002, "step": 221140 }, { "epoch": 1.4549054952862772, "grad_norm": 0.0013697670350125057, "learning_rate": 2.097162331796431e-06, "loss": 0.0002, "step": 221150 }, { "epoch": 1.4549712834615107, "grad_norm": 0.0732126122740409, "learning_rate": 2.096694902834194e-06, "loss": 0.0009, "step": 221160 }, { "epoch": 1.455037071636744, "grad_norm": 0.042812448271693004, "learning_rate": 2.0962275121495278e-06, "loss": 0.0016, "step": 221170 }, { "epoch": 1.4551028598119773, "grad_norm": 0.01550095387656025, "learning_rate": 2.0957601597485932e-06, "loss": 0.0002, "step": 221180 }, { "epoch": 1.4551686479872108, "grad_norm": 0.0037263698030526415, "learning_rate": 2.095292845637553e-06, "loss": 0.0005, "step": 221190 }, { "epoch": 1.4552344361624443, "grad_norm": 0.07659200431597263, "learning_rate": 2.0948255698225682e-06, "loss": 0.0009, "step": 221200 }, { "epoch": 1.4553002243376776, "grad_norm": 0.006108489896220223, "learning_rate": 2.0943583323097988e-06, "loss": 0.001, "step": 221210 }, { "epoch": 1.4553660125129109, "grad_norm": 0.0666465714544471, "learning_rate": 2.0938911331054055e-06, "loss": 0.0006, "step": 221220 }, { "epoch": 1.4554318006881444, "grad_norm": 0.0013548258221026293, "learning_rate": 2.0934239722155473e-06, "loss": 0.0006, "step": 221230 }, { "epoch": 1.4554975888633777, "grad_norm": 0.00831815986138409, "learning_rate": 2.092956849646384e-06, "loss": 0.0004, "step": 221240 }, { "epoch": 1.4555633770386112, "grad_norm": 0.02078797246557079, "learning_rate": 2.0924897654040744e-06, "loss": 0.0004, "step": 221250 }, { "epoch": 1.4556291652138444, "grad_norm": 0.022788091635905797, "learning_rate": 2.0920227194947756e-06, "loss": 0.0003, "step": 221260 }, { "epoch": 1.4556949533890777, "grad_norm": 0.01646996510733758, "learning_rate": 2.091555711924646e-06, "loss": 0.0004, "step": 221270 }, { "epoch": 1.4557607415643112, "grad_norm": 0.18526044004346212, "learning_rate": 2.0910887426998404e-06, "loss": 0.0019, "step": 221280 }, { "epoch": 1.4558265297395447, "grad_norm": 0.04619881133657292, "learning_rate": 2.09062181182652e-06, "loss": 0.0009, "step": 221290 }, { "epoch": 1.455892317914778, "grad_norm": 0.009826912596260881, "learning_rate": 2.0901549193108373e-06, "loss": 0.0008, "step": 221300 }, { "epoch": 1.4559581060900113, "grad_norm": 0.005204966264703323, "learning_rate": 2.089688065158949e-06, "loss": 0.0009, "step": 221310 }, { "epoch": 1.4560238942652448, "grad_norm": 0.049202564501388935, "learning_rate": 2.0892212493770103e-06, "loss": 0.0008, "step": 221320 }, { "epoch": 1.456089682440478, "grad_norm": 0.06286459932267213, "learning_rate": 2.088754471971176e-06, "loss": 0.0007, "step": 221330 }, { "epoch": 1.4561554706157116, "grad_norm": 0.03584706474418139, "learning_rate": 2.0882877329475994e-06, "loss": 0.0008, "step": 221340 }, { "epoch": 1.4562212587909449, "grad_norm": 0.05937186820952803, "learning_rate": 2.087821032312434e-06, "loss": 0.0005, "step": 221350 }, { "epoch": 1.4562870469661782, "grad_norm": 0.024962632315046782, "learning_rate": 2.087354370071833e-06, "loss": 0.0003, "step": 221360 }, { "epoch": 1.4563528351414117, "grad_norm": 0.032960912021312294, "learning_rate": 2.086887746231949e-06, "loss": 0.0005, "step": 221370 }, { "epoch": 1.4564186233166452, "grad_norm": 0.04301635775308824, "learning_rate": 2.0864211607989348e-06, "loss": 0.0005, "step": 221380 }, { "epoch": 1.4564844114918785, "grad_norm": 0.01696887820160656, "learning_rate": 2.085954613778941e-06, "loss": 0.0004, "step": 221390 }, { "epoch": 1.4565501996671117, "grad_norm": 0.006696509365942298, "learning_rate": 2.085488105178119e-06, "loss": 0.0007, "step": 221400 }, { "epoch": 1.4566159878423453, "grad_norm": 0.0034905576312261564, "learning_rate": 2.085021635002619e-06, "loss": 0.0008, "step": 221410 }, { "epoch": 1.4566817760175785, "grad_norm": 0.023878131762561178, "learning_rate": 2.0845552032585896e-06, "loss": 0.0007, "step": 221420 }, { "epoch": 1.456747564192812, "grad_norm": 0.029321656434832345, "learning_rate": 2.084088809952184e-06, "loss": 0.0004, "step": 221430 }, { "epoch": 1.4568133523680453, "grad_norm": 0.38745392531075307, "learning_rate": 2.0836224550895484e-06, "loss": 0.0003, "step": 221440 }, { "epoch": 1.4568791405432788, "grad_norm": 0.042018502412451454, "learning_rate": 2.0831561386768324e-06, "loss": 0.0004, "step": 221450 }, { "epoch": 1.4569449287185121, "grad_norm": 0.012869667594519369, "learning_rate": 2.0826898607201835e-06, "loss": 0.0003, "step": 221460 }, { "epoch": 1.4570107168937456, "grad_norm": 0.00018275706322644212, "learning_rate": 2.082223621225749e-06, "loss": 0.0009, "step": 221470 }, { "epoch": 1.457076505068979, "grad_norm": 0.003766235003341813, "learning_rate": 2.081757420199676e-06, "loss": 0.0004, "step": 221480 }, { "epoch": 1.4571422932442122, "grad_norm": 0.021600232498269863, "learning_rate": 2.0812912576481113e-06, "loss": 0.0004, "step": 221490 }, { "epoch": 1.4572080814194457, "grad_norm": 0.0009861509185743577, "learning_rate": 2.0808251335772005e-06, "loss": 0.0004, "step": 221500 }, { "epoch": 1.457273869594679, "grad_norm": 0.03536528109540372, "learning_rate": 2.0803590479930896e-06, "loss": 0.0003, "step": 221510 }, { "epoch": 1.4573396577699125, "grad_norm": 0.028800585841515115, "learning_rate": 2.079893000901923e-06, "loss": 0.0003, "step": 221520 }, { "epoch": 1.4574054459451458, "grad_norm": 0.01722875078426912, "learning_rate": 2.0794269923098447e-06, "loss": 0.0003, "step": 221530 }, { "epoch": 1.4574712341203793, "grad_norm": 0.05303007170898469, "learning_rate": 2.0789610222229994e-06, "loss": 0.0004, "step": 221540 }, { "epoch": 1.4575370222956125, "grad_norm": 0.018526861205542044, "learning_rate": 2.0784950906475286e-06, "loss": 0.0014, "step": 221550 }, { "epoch": 1.457602810470846, "grad_norm": 0.01570116581173318, "learning_rate": 2.0780291975895783e-06, "loss": 0.0004, "step": 221560 }, { "epoch": 1.4576685986460793, "grad_norm": 0.04973213476115305, "learning_rate": 2.07756334305529e-06, "loss": 0.0006, "step": 221570 }, { "epoch": 1.4577343868213126, "grad_norm": 0.035480526273097196, "learning_rate": 2.077097527050804e-06, "loss": 0.0005, "step": 221580 }, { "epoch": 1.4578001749965461, "grad_norm": 0.02966973564238251, "learning_rate": 2.076631749582264e-06, "loss": 0.0005, "step": 221590 }, { "epoch": 1.4578659631717796, "grad_norm": 0.08035255122167333, "learning_rate": 2.076166010655809e-06, "loss": 0.0011, "step": 221600 }, { "epoch": 1.457931751347013, "grad_norm": 0.005487155339490803, "learning_rate": 2.07570031027758e-06, "loss": 0.0009, "step": 221610 }, { "epoch": 1.4579975395222462, "grad_norm": 7.528586828960121e-05, "learning_rate": 2.0752346484537165e-06, "loss": 0.0007, "step": 221620 }, { "epoch": 1.4580633276974797, "grad_norm": 0.01858765614458942, "learning_rate": 2.0747690251903586e-06, "loss": 0.0004, "step": 221630 }, { "epoch": 1.458129115872713, "grad_norm": 0.011637661070552977, "learning_rate": 2.0743034404936444e-06, "loss": 0.0008, "step": 221640 }, { "epoch": 1.4581949040479465, "grad_norm": 0.0021020563522430222, "learning_rate": 2.073837894369713e-06, "loss": 0.0004, "step": 221650 }, { "epoch": 1.4582606922231798, "grad_norm": 0.10948161677921904, "learning_rate": 2.0733723868247013e-06, "loss": 0.0008, "step": 221660 }, { "epoch": 1.458326480398413, "grad_norm": 0.01560820159081332, "learning_rate": 2.072906917864747e-06, "loss": 0.001, "step": 221670 }, { "epoch": 1.4583922685736466, "grad_norm": 0.026902312349126687, "learning_rate": 2.0724414874959857e-06, "loss": 0.0004, "step": 221680 }, { "epoch": 1.45845805674888, "grad_norm": 7.085431907063955e-05, "learning_rate": 2.0719760957245567e-06, "loss": 0.0003, "step": 221690 }, { "epoch": 1.4585238449241134, "grad_norm": 0.03599798399012234, "learning_rate": 2.071510742556594e-06, "loss": 0.0005, "step": 221700 }, { "epoch": 1.4585896330993466, "grad_norm": 0.06640832464997908, "learning_rate": 2.071045427998233e-06, "loss": 0.0006, "step": 221710 }, { "epoch": 1.4586554212745801, "grad_norm": 0.0030443077224480706, "learning_rate": 2.070580152055608e-06, "loss": 0.0012, "step": 221720 }, { "epoch": 1.4587212094498134, "grad_norm": 0.0001415698073762757, "learning_rate": 2.0701149147348536e-06, "loss": 0.0006, "step": 221730 }, { "epoch": 1.458786997625047, "grad_norm": 0.027720111615947396, "learning_rate": 2.0696497160421043e-06, "loss": 0.0008, "step": 221740 }, { "epoch": 1.4588527858002802, "grad_norm": 0.03744306277271112, "learning_rate": 2.069184555983492e-06, "loss": 0.0005, "step": 221750 }, { "epoch": 1.4589185739755137, "grad_norm": 0.008228420014191289, "learning_rate": 2.0687194345651507e-06, "loss": 0.0003, "step": 221760 }, { "epoch": 1.458984362150747, "grad_norm": 0.04494199347948834, "learning_rate": 2.068254351793212e-06, "loss": 0.0005, "step": 221770 }, { "epoch": 1.4590501503259805, "grad_norm": 0.047646431941435835, "learning_rate": 2.067789307673807e-06, "loss": 0.0007, "step": 221780 }, { "epoch": 1.4591159385012138, "grad_norm": 0.07165704065762922, "learning_rate": 2.0673243022130685e-06, "loss": 0.0008, "step": 221790 }, { "epoch": 1.459181726676447, "grad_norm": 0.04089382119867793, "learning_rate": 2.0668593354171257e-06, "loss": 0.0009, "step": 221800 }, { "epoch": 1.4592475148516806, "grad_norm": 0.02249631470995839, "learning_rate": 2.0663944072921077e-06, "loss": 0.0004, "step": 221810 }, { "epoch": 1.4593133030269139, "grad_norm": 0.0176943594597222, "learning_rate": 2.0659295178441473e-06, "loss": 0.0005, "step": 221820 }, { "epoch": 1.4593790912021474, "grad_norm": 0.07659238487485241, "learning_rate": 2.0654646670793723e-06, "loss": 0.0009, "step": 221830 }, { "epoch": 1.4594448793773807, "grad_norm": 0.03660979995934056, "learning_rate": 2.0649998550039113e-06, "loss": 0.0004, "step": 221840 }, { "epoch": 1.4595106675526142, "grad_norm": 0.028303568238675712, "learning_rate": 2.0645350816238923e-06, "loss": 0.0011, "step": 221850 }, { "epoch": 1.4595764557278474, "grad_norm": 0.007787522038751289, "learning_rate": 2.064070346945443e-06, "loss": 0.0007, "step": 221860 }, { "epoch": 1.459642243903081, "grad_norm": 0.016293059347110554, "learning_rate": 2.063605650974691e-06, "loss": 0.0005, "step": 221870 }, { "epoch": 1.4597080320783142, "grad_norm": 0.047868129558345705, "learning_rate": 2.0631409937177623e-06, "loss": 0.0004, "step": 221880 }, { "epoch": 1.4597738202535475, "grad_norm": 0.036696261726200864, "learning_rate": 2.062676375180783e-06, "loss": 0.0007, "step": 221890 }, { "epoch": 1.459839608428781, "grad_norm": 0.007795435684772652, "learning_rate": 2.0622117953698786e-06, "loss": 0.0004, "step": 221900 }, { "epoch": 1.4599053966040145, "grad_norm": 0.01630528932041403, "learning_rate": 2.061747254291175e-06, "loss": 0.0006, "step": 221910 }, { "epoch": 1.4599711847792478, "grad_norm": 0.01081882183324743, "learning_rate": 2.0612827519507956e-06, "loss": 0.0004, "step": 221920 }, { "epoch": 1.460036972954481, "grad_norm": 0.01793765684053027, "learning_rate": 2.060818288354865e-06, "loss": 0.0005, "step": 221930 }, { "epoch": 1.4601027611297146, "grad_norm": 0.006371136700724597, "learning_rate": 2.0603538635095054e-06, "loss": 0.0004, "step": 221940 }, { "epoch": 1.4601685493049479, "grad_norm": 0.02183484404111239, "learning_rate": 2.0598894774208435e-06, "loss": 0.001, "step": 221950 }, { "epoch": 1.4602343374801814, "grad_norm": 0.02388992609654286, "learning_rate": 2.059425130094999e-06, "loss": 0.0004, "step": 221960 }, { "epoch": 1.4603001256554147, "grad_norm": 0.08526449551754989, "learning_rate": 2.058960821538095e-06, "loss": 0.0009, "step": 221970 }, { "epoch": 1.460365913830648, "grad_norm": 0.03435213078228218, "learning_rate": 2.0584965517562515e-06, "loss": 0.0007, "step": 221980 }, { "epoch": 1.4604317020058815, "grad_norm": 0.016470004151770076, "learning_rate": 2.0580323207555915e-06, "loss": 0.0003, "step": 221990 }, { "epoch": 1.460497490181115, "grad_norm": 0.009734037295412607, "learning_rate": 2.057568128542234e-06, "loss": 0.001, "step": 222000 }, { "epoch": 1.4605632783563482, "grad_norm": 0.01974318323149152, "learning_rate": 2.0571039751223e-06, "loss": 0.0003, "step": 222010 }, { "epoch": 1.4606290665315815, "grad_norm": 0.05055556398862725, "learning_rate": 2.0566398605019095e-06, "loss": 0.0012, "step": 222020 }, { "epoch": 1.460694854706815, "grad_norm": 0.026849517646897744, "learning_rate": 2.056175784687179e-06, "loss": 0.0007, "step": 222030 }, { "epoch": 1.4607606428820483, "grad_norm": 0.04765691108107478, "learning_rate": 2.0557117476842265e-06, "loss": 0.0007, "step": 222040 }, { "epoch": 1.4608264310572818, "grad_norm": 0.027807015164258025, "learning_rate": 2.0552477494991736e-06, "loss": 0.0005, "step": 222050 }, { "epoch": 1.460892219232515, "grad_norm": 0.08128394413246018, "learning_rate": 2.0547837901381355e-06, "loss": 0.0006, "step": 222060 }, { "epoch": 1.4609580074077484, "grad_norm": 0.024036190481146084, "learning_rate": 2.0543198696072302e-06, "loss": 0.0005, "step": 222070 }, { "epoch": 1.461023795582982, "grad_norm": 0.013769413591496417, "learning_rate": 2.053855987912573e-06, "loss": 0.0002, "step": 222080 }, { "epoch": 1.4610895837582154, "grad_norm": 0.06874580451555612, "learning_rate": 2.0533921450602803e-06, "loss": 0.0008, "step": 222090 }, { "epoch": 1.4611553719334487, "grad_norm": 0.0004906863843847267, "learning_rate": 2.052928341056467e-06, "loss": 0.0008, "step": 222100 }, { "epoch": 1.461221160108682, "grad_norm": 0.05132169027610178, "learning_rate": 2.0524645759072483e-06, "loss": 0.0004, "step": 222110 }, { "epoch": 1.4612869482839155, "grad_norm": 6.0342416474980844e-05, "learning_rate": 2.052000849618739e-06, "loss": 0.0004, "step": 222120 }, { "epoch": 1.4613527364591488, "grad_norm": 0.025552218363030673, "learning_rate": 2.0515371621970513e-06, "loss": 0.0004, "step": 222130 }, { "epoch": 1.4614185246343823, "grad_norm": 0.05471065523090216, "learning_rate": 2.0510735136483005e-06, "loss": 0.0005, "step": 222140 }, { "epoch": 1.4614843128096155, "grad_norm": 0.02018180382957566, "learning_rate": 2.0506099039785987e-06, "loss": 0.0003, "step": 222150 }, { "epoch": 1.461550100984849, "grad_norm": 0.003437402055222902, "learning_rate": 2.050146333194058e-06, "loss": 0.0003, "step": 222160 }, { "epoch": 1.4616158891600823, "grad_norm": 0.017691779130635975, "learning_rate": 2.049682801300788e-06, "loss": 0.0004, "step": 222170 }, { "epoch": 1.4616816773353158, "grad_norm": 0.019346313284873202, "learning_rate": 2.0492193083049043e-06, "loss": 0.0006, "step": 222180 }, { "epoch": 1.4617474655105491, "grad_norm": 0.014430187262383168, "learning_rate": 2.0487558542125154e-06, "loss": 0.0004, "step": 222190 }, { "epoch": 1.4618132536857824, "grad_norm": 0.05680565289108698, "learning_rate": 2.0482924390297323e-06, "loss": 0.0003, "step": 222200 }, { "epoch": 1.461879041861016, "grad_norm": 0.016519704821466866, "learning_rate": 2.0478290627626637e-06, "loss": 0.0008, "step": 222210 }, { "epoch": 1.4619448300362494, "grad_norm": 0.02647619509345248, "learning_rate": 2.047365725417419e-06, "loss": 0.0004, "step": 222220 }, { "epoch": 1.4620106182114827, "grad_norm": 0.11304728178827896, "learning_rate": 2.046902427000107e-06, "loss": 0.0007, "step": 222230 }, { "epoch": 1.462076406386716, "grad_norm": 0.009053271532796796, "learning_rate": 2.0464391675168365e-06, "loss": 0.0002, "step": 222240 }, { "epoch": 1.4621421945619495, "grad_norm": 0.06918211179036299, "learning_rate": 2.0459759469737145e-06, "loss": 0.0004, "step": 222250 }, { "epoch": 1.4622079827371828, "grad_norm": 0.007380526738031981, "learning_rate": 2.0455127653768485e-06, "loss": 0.0012, "step": 222260 }, { "epoch": 1.4622737709124163, "grad_norm": 0.051517727389903165, "learning_rate": 2.045049622732345e-06, "loss": 0.0005, "step": 222270 }, { "epoch": 1.4623395590876496, "grad_norm": 0.023224771469411095, "learning_rate": 2.04458651904631e-06, "loss": 0.0021, "step": 222280 }, { "epoch": 1.4624053472628828, "grad_norm": 0.05395423572663679, "learning_rate": 2.0441234543248496e-06, "loss": 0.0006, "step": 222290 }, { "epoch": 1.4624711354381164, "grad_norm": 0.003710314863858776, "learning_rate": 2.0436604285740665e-06, "loss": 0.0005, "step": 222300 }, { "epoch": 1.4625369236133499, "grad_norm": 0.041681369306781825, "learning_rate": 2.0431974418000693e-06, "loss": 0.0007, "step": 222310 }, { "epoch": 1.4626027117885831, "grad_norm": 0.033131375170478615, "learning_rate": 2.04273449400896e-06, "loss": 0.0008, "step": 222320 }, { "epoch": 1.4626684999638164, "grad_norm": 0.0677300746744077, "learning_rate": 2.0422715852068425e-06, "loss": 0.0004, "step": 222330 }, { "epoch": 1.46273428813905, "grad_norm": 0.0003974352849209974, "learning_rate": 2.04180871539982e-06, "loss": 0.0013, "step": 222340 }, { "epoch": 1.4628000763142832, "grad_norm": 0.04527227591953743, "learning_rate": 2.0413458845939944e-06, "loss": 0.0003, "step": 222350 }, { "epoch": 1.4628658644895167, "grad_norm": 0.03489988587952138, "learning_rate": 2.040883092795468e-06, "loss": 0.0006, "step": 222360 }, { "epoch": 1.46293165266475, "grad_norm": 0.07487615698046222, "learning_rate": 2.0404203400103423e-06, "loss": 0.0003, "step": 222370 }, { "epoch": 1.4629974408399833, "grad_norm": 0.12613533628567092, "learning_rate": 2.0399576262447186e-06, "loss": 0.0009, "step": 222380 }, { "epoch": 1.4630632290152168, "grad_norm": 0.006939433131642419, "learning_rate": 2.039494951504697e-06, "loss": 0.0007, "step": 222390 }, { "epoch": 1.4631290171904503, "grad_norm": 0.028777118272307743, "learning_rate": 2.0390323157963777e-06, "loss": 0.0004, "step": 222400 }, { "epoch": 1.4631948053656836, "grad_norm": 0.03861309604214018, "learning_rate": 2.0385697191258604e-06, "loss": 0.0004, "step": 222410 }, { "epoch": 1.4632605935409169, "grad_norm": 0.010550385211356933, "learning_rate": 2.038107161499243e-06, "loss": 0.0004, "step": 222420 }, { "epoch": 1.4633263817161504, "grad_norm": 0.044906056004453555, "learning_rate": 2.037644642922623e-06, "loss": 0.0004, "step": 222430 }, { "epoch": 1.4633921698913837, "grad_norm": 0.00028639673975979017, "learning_rate": 2.037182163402102e-06, "loss": 0.0006, "step": 222440 }, { "epoch": 1.4634579580666172, "grad_norm": 0.14506899552209054, "learning_rate": 2.0367197229437752e-06, "loss": 0.0006, "step": 222450 }, { "epoch": 1.4635237462418504, "grad_norm": 0.012251732820279101, "learning_rate": 2.0362573215537397e-06, "loss": 0.0005, "step": 222460 }, { "epoch": 1.463589534417084, "grad_norm": 0.03144889341319421, "learning_rate": 2.0357949592380917e-06, "loss": 0.0007, "step": 222470 }, { "epoch": 1.4636553225923172, "grad_norm": 0.05577443402095783, "learning_rate": 2.035332636002927e-06, "loss": 0.0005, "step": 222480 }, { "epoch": 1.4637211107675507, "grad_norm": 0.005827877592562382, "learning_rate": 2.0348703518543406e-06, "loss": 0.0003, "step": 222490 }, { "epoch": 1.463786898942784, "grad_norm": 0.02336679245790266, "learning_rate": 2.0344081067984284e-06, "loss": 0.0007, "step": 222500 }, { "epoch": 1.4638526871180173, "grad_norm": 0.025383620890304312, "learning_rate": 2.033945900841284e-06, "loss": 0.0003, "step": 222510 }, { "epoch": 1.4639184752932508, "grad_norm": 0.07965909944230275, "learning_rate": 2.0334837339890006e-06, "loss": 0.001, "step": 222520 }, { "epoch": 1.463984263468484, "grad_norm": 0.011491473459651291, "learning_rate": 2.0330216062476734e-06, "loss": 0.0005, "step": 222530 }, { "epoch": 1.4640500516437176, "grad_norm": 0.023738602077237173, "learning_rate": 2.0325595176233924e-06, "loss": 0.0004, "step": 222540 }, { "epoch": 1.4641158398189509, "grad_norm": 0.03480204263498832, "learning_rate": 2.032097468122252e-06, "loss": 0.0008, "step": 222550 }, { "epoch": 1.4641816279941844, "grad_norm": 0.041153742769193105, "learning_rate": 2.0316354577503415e-06, "loss": 0.0006, "step": 222560 }, { "epoch": 1.4642474161694177, "grad_norm": 0.05256160526006702, "learning_rate": 2.0311734865137563e-06, "loss": 0.0007, "step": 222570 }, { "epoch": 1.4643132043446512, "grad_norm": 0.03211994574507282, "learning_rate": 2.0307115544185834e-06, "loss": 0.0009, "step": 222580 }, { "epoch": 1.4643789925198845, "grad_norm": 0.004626307763977701, "learning_rate": 2.030249661470915e-06, "loss": 0.0007, "step": 222590 }, { "epoch": 1.4644447806951177, "grad_norm": 0.05406526601089156, "learning_rate": 2.0297878076768394e-06, "loss": 0.0002, "step": 222600 }, { "epoch": 1.4645105688703512, "grad_norm": 0.03173660373884639, "learning_rate": 2.029325993042447e-06, "loss": 0.0006, "step": 222610 }, { "epoch": 1.4645763570455848, "grad_norm": 0.11778087945249292, "learning_rate": 2.028864217573825e-06, "loss": 0.001, "step": 222620 }, { "epoch": 1.464642145220818, "grad_norm": 0.013288282347398013, "learning_rate": 2.0284024812770627e-06, "loss": 0.0004, "step": 222630 }, { "epoch": 1.4647079333960513, "grad_norm": 0.04112967283025059, "learning_rate": 2.027940784158247e-06, "loss": 0.0009, "step": 222640 }, { "epoch": 1.4647737215712848, "grad_norm": 0.052008373432143394, "learning_rate": 2.0274791262234654e-06, "loss": 0.0007, "step": 222650 }, { "epoch": 1.464839509746518, "grad_norm": 0.026262286490624502, "learning_rate": 2.0270175074788043e-06, "loss": 0.0014, "step": 222660 }, { "epoch": 1.4649052979217516, "grad_norm": 0.0177651353927544, "learning_rate": 2.0265559279303498e-06, "loss": 0.0005, "step": 222670 }, { "epoch": 1.464971086096985, "grad_norm": 0.0014340629834987736, "learning_rate": 2.0260943875841872e-06, "loss": 0.0006, "step": 222680 }, { "epoch": 1.4650368742722182, "grad_norm": 0.029518445717347425, "learning_rate": 2.0256328864463997e-06, "loss": 0.0003, "step": 222690 }, { "epoch": 1.4651026624474517, "grad_norm": 0.05848970598398474, "learning_rate": 2.0251714245230757e-06, "loss": 0.0013, "step": 222700 }, { "epoch": 1.4651684506226852, "grad_norm": 0.021352287804120795, "learning_rate": 2.0247100018202977e-06, "loss": 0.0006, "step": 222710 }, { "epoch": 1.4652342387979185, "grad_norm": 0.003082490429520458, "learning_rate": 2.0242486183441486e-06, "loss": 0.0005, "step": 222720 }, { "epoch": 1.4653000269731518, "grad_norm": 0.04219550779820638, "learning_rate": 2.0237872741007107e-06, "loss": 0.0006, "step": 222730 }, { "epoch": 1.4653658151483853, "grad_norm": 0.026690919710225837, "learning_rate": 2.0233259690960678e-06, "loss": 0.0003, "step": 222740 }, { "epoch": 1.4654316033236185, "grad_norm": 0.04089606947389988, "learning_rate": 2.022864703336301e-06, "loss": 0.0003, "step": 222750 }, { "epoch": 1.465497391498852, "grad_norm": 0.018381724252038243, "learning_rate": 2.022403476827492e-06, "loss": 0.0003, "step": 222760 }, { "epoch": 1.4655631796740853, "grad_norm": 0.005221083094879673, "learning_rate": 2.021942289575721e-06, "loss": 0.0009, "step": 222770 }, { "epoch": 1.4656289678493188, "grad_norm": 0.015310063821010989, "learning_rate": 2.0214811415870694e-06, "loss": 0.0006, "step": 222780 }, { "epoch": 1.4656947560245521, "grad_norm": 0.008274239478095606, "learning_rate": 2.0210200328676167e-06, "loss": 0.0005, "step": 222790 }, { "epoch": 1.4657605441997856, "grad_norm": 0.025075385141999005, "learning_rate": 2.020558963423441e-06, "loss": 0.0007, "step": 222800 }, { "epoch": 1.465826332375019, "grad_norm": 0.09638930397400174, "learning_rate": 2.0200979332606235e-06, "loss": 0.0005, "step": 222810 }, { "epoch": 1.4658921205502522, "grad_norm": 0.0806709651899415, "learning_rate": 2.0196369423852387e-06, "loss": 0.0005, "step": 222820 }, { "epoch": 1.4659579087254857, "grad_norm": 0.19142867601973343, "learning_rate": 2.019175990803369e-06, "loss": 0.0005, "step": 222830 }, { "epoch": 1.466023696900719, "grad_norm": 0.013893160504606532, "learning_rate": 2.018715078521089e-06, "loss": 0.0012, "step": 222840 }, { "epoch": 1.4660894850759525, "grad_norm": 0.09479081086945287, "learning_rate": 2.0182542055444758e-06, "loss": 0.0009, "step": 222850 }, { "epoch": 1.4661552732511858, "grad_norm": 0.017056241400063007, "learning_rate": 2.017793371879606e-06, "loss": 0.0006, "step": 222860 }, { "epoch": 1.4662210614264193, "grad_norm": 0.07624693720841771, "learning_rate": 2.0173325775325547e-06, "loss": 0.0006, "step": 222870 }, { "epoch": 1.4662868496016526, "grad_norm": 0.004964559632160204, "learning_rate": 2.0168718225093973e-06, "loss": 0.0011, "step": 222880 }, { "epoch": 1.466352637776886, "grad_norm": 0.031307723291184676, "learning_rate": 2.016411106816209e-06, "loss": 0.001, "step": 222890 }, { "epoch": 1.4664184259521194, "grad_norm": 0.03201156298322851, "learning_rate": 2.0159504304590633e-06, "loss": 0.0004, "step": 222900 }, { "epoch": 1.4664842141273526, "grad_norm": 0.0523646053168654, "learning_rate": 2.015489793444034e-06, "loss": 0.0022, "step": 222910 }, { "epoch": 1.4665500023025861, "grad_norm": 0.01049475322412516, "learning_rate": 2.0150291957771937e-06, "loss": 0.0008, "step": 222920 }, { "epoch": 1.4666157904778196, "grad_norm": 0.014130727597424295, "learning_rate": 2.014568637464616e-06, "loss": 0.0006, "step": 222930 }, { "epoch": 1.466681578653053, "grad_norm": 0.01677610880078584, "learning_rate": 2.0141081185123723e-06, "loss": 0.0006, "step": 222940 }, { "epoch": 1.4667473668282862, "grad_norm": 0.11294769627966329, "learning_rate": 2.0136476389265324e-06, "loss": 0.0006, "step": 222950 }, { "epoch": 1.4668131550035197, "grad_norm": 0.0005244082325574524, "learning_rate": 2.0131871987131712e-06, "loss": 0.0006, "step": 222960 }, { "epoch": 1.466878943178753, "grad_norm": 0.05609718877593293, "learning_rate": 2.012726797878357e-06, "loss": 0.0006, "step": 222970 }, { "epoch": 1.4669447313539865, "grad_norm": 0.004524777550920805, "learning_rate": 2.0122664364281603e-06, "loss": 0.0003, "step": 222980 }, { "epoch": 1.4670105195292198, "grad_norm": 0.014648388185282852, "learning_rate": 2.0118061143686502e-06, "loss": 0.0007, "step": 222990 }, { "epoch": 1.467076307704453, "grad_norm": 0.038187343576090174, "learning_rate": 2.0113458317058953e-06, "loss": 0.0007, "step": 223000 }, { "epoch": 1.4671420958796866, "grad_norm": 0.018751036243714776, "learning_rate": 2.0108855884459644e-06, "loss": 0.0004, "step": 223010 }, { "epoch": 1.46720788405492, "grad_norm": 0.0673336617559847, "learning_rate": 2.0104253845949255e-06, "loss": 0.0007, "step": 223020 }, { "epoch": 1.4672736722301534, "grad_norm": 0.006134715932787455, "learning_rate": 2.0099652201588465e-06, "loss": 0.0007, "step": 223030 }, { "epoch": 1.4673394604053867, "grad_norm": 0.008035315161516867, "learning_rate": 2.0095050951437935e-06, "loss": 0.0008, "step": 223040 }, { "epoch": 1.4674052485806202, "grad_norm": 0.002308716302502476, "learning_rate": 2.0090450095558326e-06, "loss": 0.0003, "step": 223050 }, { "epoch": 1.4674710367558534, "grad_norm": 0.040011583876044435, "learning_rate": 2.0085849634010306e-06, "loss": 0.0004, "step": 223060 }, { "epoch": 1.467536824931087, "grad_norm": 0.05324950249117248, "learning_rate": 2.0081249566854523e-06, "loss": 0.0008, "step": 223070 }, { "epoch": 1.4676026131063202, "grad_norm": 0.010776410090552307, "learning_rate": 2.0076649894151603e-06, "loss": 0.0003, "step": 223080 }, { "epoch": 1.4676684012815537, "grad_norm": 0.03833184481400428, "learning_rate": 2.007205061596223e-06, "loss": 0.0008, "step": 223090 }, { "epoch": 1.467734189456787, "grad_norm": 0.01934594750906824, "learning_rate": 2.0067451732347027e-06, "loss": 0.0004, "step": 223100 }, { "epoch": 1.4677999776320205, "grad_norm": 0.0044027309302843345, "learning_rate": 2.0062853243366616e-06, "loss": 0.0004, "step": 223110 }, { "epoch": 1.4678657658072538, "grad_norm": 0.0060199037286003615, "learning_rate": 2.005825514908163e-06, "loss": 0.0006, "step": 223120 }, { "epoch": 1.467931553982487, "grad_norm": 0.006666464681556435, "learning_rate": 2.0053657449552687e-06, "loss": 0.0006, "step": 223130 }, { "epoch": 1.4679973421577206, "grad_norm": 0.002177184447096944, "learning_rate": 2.0049060144840417e-06, "loss": 0.0008, "step": 223140 }, { "epoch": 1.4680631303329539, "grad_norm": 0.025838675101014553, "learning_rate": 2.0044463235005412e-06, "loss": 0.0004, "step": 223150 }, { "epoch": 1.4681289185081874, "grad_norm": 0.0004004814911904781, "learning_rate": 2.003986672010829e-06, "loss": 0.0007, "step": 223160 }, { "epoch": 1.4681947066834207, "grad_norm": 0.008450676768678285, "learning_rate": 2.0035270600209656e-06, "loss": 0.0006, "step": 223170 }, { "epoch": 1.4682604948586542, "grad_norm": 0.09478524106384753, "learning_rate": 2.00306748753701e-06, "loss": 0.0007, "step": 223180 }, { "epoch": 1.4683262830338875, "grad_norm": 0.12017032990639397, "learning_rate": 2.0026079545650205e-06, "loss": 0.0006, "step": 223190 }, { "epoch": 1.468392071209121, "grad_norm": 0.06220380242723141, "learning_rate": 2.002148461111057e-06, "loss": 0.0007, "step": 223200 }, { "epoch": 1.4684578593843542, "grad_norm": 0.0164087377205937, "learning_rate": 2.0016890071811747e-06, "loss": 0.0005, "step": 223210 }, { "epoch": 1.4685236475595875, "grad_norm": 0.0007184275987769372, "learning_rate": 2.001229592781435e-06, "loss": 0.0006, "step": 223220 }, { "epoch": 1.468589435734821, "grad_norm": 0.002742519678942517, "learning_rate": 2.000770217917894e-06, "loss": 0.0006, "step": 223230 }, { "epoch": 1.4686552239100545, "grad_norm": 0.0008860339092330966, "learning_rate": 2.000310882596607e-06, "loss": 0.0014, "step": 223240 }, { "epoch": 1.4687210120852878, "grad_norm": 0.017149173209714916, "learning_rate": 1.9998515868236303e-06, "loss": 0.0002, "step": 223250 }, { "epoch": 1.468786800260521, "grad_norm": 0.02999756136617145, "learning_rate": 1.999392330605019e-06, "loss": 0.0004, "step": 223260 }, { "epoch": 1.4688525884357546, "grad_norm": 0.014830776444898882, "learning_rate": 1.998933113946828e-06, "loss": 0.0004, "step": 223270 }, { "epoch": 1.468918376610988, "grad_norm": 0.014349088468765248, "learning_rate": 1.998473936855113e-06, "loss": 0.0004, "step": 223280 }, { "epoch": 1.4689841647862214, "grad_norm": 0.021275915754592575, "learning_rate": 1.9980147993359267e-06, "loss": 0.0004, "step": 223290 }, { "epoch": 1.4690499529614547, "grad_norm": 0.018718023365495502, "learning_rate": 1.9975557013953224e-06, "loss": 0.0002, "step": 223300 }, { "epoch": 1.469115741136688, "grad_norm": 0.00831590345085531, "learning_rate": 1.997096643039353e-06, "loss": 0.0005, "step": 223310 }, { "epoch": 1.4691815293119215, "grad_norm": 0.03340041971378646, "learning_rate": 1.9966376242740704e-06, "loss": 0.0004, "step": 223320 }, { "epoch": 1.469247317487155, "grad_norm": 0.10233209890837322, "learning_rate": 1.9961786451055276e-06, "loss": 0.0006, "step": 223330 }, { "epoch": 1.4693131056623883, "grad_norm": 0.0209690020418009, "learning_rate": 1.995719705539773e-06, "loss": 0.0008, "step": 223340 }, { "epoch": 1.4693788938376215, "grad_norm": 0.07307332666039787, "learning_rate": 1.9952608055828614e-06, "loss": 0.0007, "step": 223350 }, { "epoch": 1.469444682012855, "grad_norm": 0.05088126731242019, "learning_rate": 1.994801945240841e-06, "loss": 0.0006, "step": 223360 }, { "epoch": 1.4695104701880883, "grad_norm": 0.04052440027333024, "learning_rate": 1.994343124519761e-06, "loss": 0.0004, "step": 223370 }, { "epoch": 1.4695762583633218, "grad_norm": 0.07556473429630552, "learning_rate": 1.9938843434256715e-06, "loss": 0.0007, "step": 223380 }, { "epoch": 1.4696420465385551, "grad_norm": 0.004914892545726716, "learning_rate": 1.9934256019646205e-06, "loss": 0.0006, "step": 223390 }, { "epoch": 1.4697078347137884, "grad_norm": 0.010100001670972388, "learning_rate": 1.9929669001426565e-06, "loss": 0.0005, "step": 223400 }, { "epoch": 1.469773622889022, "grad_norm": 0.0391967407412463, "learning_rate": 1.992508237965827e-06, "loss": 0.0003, "step": 223410 }, { "epoch": 1.4698394110642554, "grad_norm": 0.027692577866308604, "learning_rate": 1.9920496154401787e-06, "loss": 0.0004, "step": 223420 }, { "epoch": 1.4699051992394887, "grad_norm": 0.015411281720642602, "learning_rate": 1.991591032571758e-06, "loss": 0.0009, "step": 223430 }, { "epoch": 1.469970987414722, "grad_norm": 0.04344326779371164, "learning_rate": 1.991132489366612e-06, "loss": 0.001, "step": 223440 }, { "epoch": 1.4700367755899555, "grad_norm": 0.04914369435277656, "learning_rate": 1.990673985830785e-06, "loss": 0.0005, "step": 223450 }, { "epoch": 1.4701025637651888, "grad_norm": 0.011287071844440588, "learning_rate": 1.9902155219703227e-06, "loss": 0.0004, "step": 223460 }, { "epoch": 1.4701683519404223, "grad_norm": 0.03494461362851526, "learning_rate": 1.9897570977912674e-06, "loss": 0.0006, "step": 223470 }, { "epoch": 1.4702341401156556, "grad_norm": 0.0014028356753684032, "learning_rate": 1.989298713299667e-06, "loss": 0.0005, "step": 223480 }, { "epoch": 1.470299928290889, "grad_norm": 0.0019686518928300227, "learning_rate": 1.9888403685015627e-06, "loss": 0.0005, "step": 223490 }, { "epoch": 1.4703657164661224, "grad_norm": 0.047522137830803134, "learning_rate": 1.9883820634029977e-06, "loss": 0.0002, "step": 223500 }, { "epoch": 1.4704315046413559, "grad_norm": 0.010067665965065515, "learning_rate": 1.9879237980100134e-06, "loss": 0.0006, "step": 223510 }, { "epoch": 1.4704972928165891, "grad_norm": 0.05955349597236299, "learning_rate": 1.9874655723286535e-06, "loss": 0.0004, "step": 223520 }, { "epoch": 1.4705630809918224, "grad_norm": 0.04103081708301313, "learning_rate": 1.9870073863649574e-06, "loss": 0.0006, "step": 223530 }, { "epoch": 1.470628869167056, "grad_norm": 0.01551043261894081, "learning_rate": 1.9865492401249677e-06, "loss": 0.0003, "step": 223540 }, { "epoch": 1.4706946573422894, "grad_norm": 0.0015391454250022526, "learning_rate": 1.9860911336147226e-06, "loss": 0.0007, "step": 223550 }, { "epoch": 1.4707604455175227, "grad_norm": 0.009556629596527713, "learning_rate": 1.9856330668402634e-06, "loss": 0.0005, "step": 223560 }, { "epoch": 1.470826233692756, "grad_norm": 0.009135451519463905, "learning_rate": 1.9851750398076292e-06, "loss": 0.0005, "step": 223570 }, { "epoch": 1.4708920218679895, "grad_norm": 0.0005737438955133646, "learning_rate": 1.9847170525228576e-06, "loss": 0.0006, "step": 223580 }, { "epoch": 1.4709578100432228, "grad_norm": 0.048307706714533105, "learning_rate": 1.984259104991988e-06, "loss": 0.0004, "step": 223590 }, { "epoch": 1.4710235982184563, "grad_norm": 0.0114038242920679, "learning_rate": 1.9838011972210557e-06, "loss": 0.0002, "step": 223600 }, { "epoch": 1.4710893863936896, "grad_norm": 0.0022415994849788903, "learning_rate": 1.9833433292161015e-06, "loss": 0.0011, "step": 223610 }, { "epoch": 1.4711551745689229, "grad_norm": 0.0566001576254281, "learning_rate": 1.98288550098316e-06, "loss": 0.0007, "step": 223620 }, { "epoch": 1.4712209627441564, "grad_norm": 0.008637119804685543, "learning_rate": 1.982427712528267e-06, "loss": 0.0005, "step": 223630 }, { "epoch": 1.4712867509193899, "grad_norm": 0.0036679365993341974, "learning_rate": 1.981969963857459e-06, "loss": 0.0003, "step": 223640 }, { "epoch": 1.4713525390946232, "grad_norm": 0.005077772516960594, "learning_rate": 1.9815122549767702e-06, "loss": 0.0003, "step": 223650 }, { "epoch": 1.4714183272698564, "grad_norm": 0.028197064655130114, "learning_rate": 1.981054585892236e-06, "loss": 0.0005, "step": 223660 }, { "epoch": 1.47148411544509, "grad_norm": 0.03746574268395295, "learning_rate": 1.9805969566098892e-06, "loss": 0.0003, "step": 223670 }, { "epoch": 1.4715499036203232, "grad_norm": 0.023637729725731124, "learning_rate": 1.980139367135764e-06, "loss": 0.0003, "step": 223680 }, { "epoch": 1.4716156917955567, "grad_norm": 0.02049672024404334, "learning_rate": 1.9796818174758926e-06, "loss": 0.0005, "step": 223690 }, { "epoch": 1.47168147997079, "grad_norm": 0.01881379797344125, "learning_rate": 1.9792243076363083e-06, "loss": 0.0004, "step": 223700 }, { "epoch": 1.4717472681460233, "grad_norm": 0.02092837719815462, "learning_rate": 1.978766837623043e-06, "loss": 0.0013, "step": 223710 }, { "epoch": 1.4718130563212568, "grad_norm": 0.0014955791771513302, "learning_rate": 1.978309407442127e-06, "loss": 0.0005, "step": 223720 }, { "epoch": 1.4718788444964903, "grad_norm": 0.012568359598911486, "learning_rate": 1.9778520170995904e-06, "loss": 0.0006, "step": 223730 }, { "epoch": 1.4719446326717236, "grad_norm": 0.06665396137794835, "learning_rate": 1.9773946666014665e-06, "loss": 0.0005, "step": 223740 }, { "epoch": 1.4720104208469569, "grad_norm": 0.08945419869466528, "learning_rate": 1.9769373559537834e-06, "loss": 0.0008, "step": 223750 }, { "epoch": 1.4720762090221904, "grad_norm": 0.056288322688550926, "learning_rate": 1.9764800851625703e-06, "loss": 0.0007, "step": 223760 }, { "epoch": 1.4721419971974237, "grad_norm": 0.04352464330111802, "learning_rate": 1.976022854233856e-06, "loss": 0.0009, "step": 223770 }, { "epoch": 1.4722077853726572, "grad_norm": 0.0033298667989234486, "learning_rate": 1.975565663173669e-06, "loss": 0.0008, "step": 223780 }, { "epoch": 1.4722735735478905, "grad_norm": 0.0667213994783076, "learning_rate": 1.9751085119880364e-06, "loss": 0.0009, "step": 223790 }, { "epoch": 1.472339361723124, "grad_norm": 0.00031738368078303896, "learning_rate": 1.9746514006829853e-06, "loss": 0.0008, "step": 223800 }, { "epoch": 1.4724051498983572, "grad_norm": 0.04119234791830096, "learning_rate": 1.9741943292645434e-06, "loss": 0.0004, "step": 223810 }, { "epoch": 1.4724709380735908, "grad_norm": 0.0013683925115883966, "learning_rate": 1.9737372977387355e-06, "loss": 0.0015, "step": 223820 }, { "epoch": 1.472536726248824, "grad_norm": 0.021182576838816258, "learning_rate": 1.9732803061115874e-06, "loss": 0.0012, "step": 223830 }, { "epoch": 1.4726025144240573, "grad_norm": 0.01676329871774908, "learning_rate": 1.972823354389125e-06, "loss": 0.0002, "step": 223840 }, { "epoch": 1.4726683025992908, "grad_norm": 0.05085453271200924, "learning_rate": 1.972366442577372e-06, "loss": 0.0005, "step": 223850 }, { "epoch": 1.472734090774524, "grad_norm": 0.006961078870451281, "learning_rate": 1.971909570682351e-06, "loss": 0.0004, "step": 223860 }, { "epoch": 1.4727998789497576, "grad_norm": 0.004712030474158331, "learning_rate": 1.9714527387100884e-06, "loss": 0.0003, "step": 223870 }, { "epoch": 1.472865667124991, "grad_norm": 0.009347688893862866, "learning_rate": 1.9709959466666062e-06, "loss": 0.0007, "step": 223880 }, { "epoch": 1.4729314553002244, "grad_norm": 0.10078433240584253, "learning_rate": 1.970539194557926e-06, "loss": 0.0006, "step": 223890 }, { "epoch": 1.4729972434754577, "grad_norm": 0.02281865091866901, "learning_rate": 1.9700824823900703e-06, "loss": 0.0003, "step": 223900 }, { "epoch": 1.4730630316506912, "grad_norm": 0.10749068199099279, "learning_rate": 1.9696258101690606e-06, "loss": 0.0013, "step": 223910 }, { "epoch": 1.4731288198259245, "grad_norm": 0.036214076386658973, "learning_rate": 1.969169177900919e-06, "loss": 0.0006, "step": 223920 }, { "epoch": 1.4731946080011578, "grad_norm": 0.010405577799555097, "learning_rate": 1.968712585591663e-06, "loss": 0.0002, "step": 223930 }, { "epoch": 1.4732603961763913, "grad_norm": 0.02302110850860637, "learning_rate": 1.968256033247313e-06, "loss": 0.0001, "step": 223940 }, { "epoch": 1.4733261843516248, "grad_norm": 0.0009786367989082086, "learning_rate": 1.967799520873888e-06, "loss": 0.0007, "step": 223950 }, { "epoch": 1.473391972526858, "grad_norm": 0.022299804846670292, "learning_rate": 1.967343048477407e-06, "loss": 0.0004, "step": 223960 }, { "epoch": 1.4734577607020913, "grad_norm": 0.037097057081306194, "learning_rate": 1.96688661606389e-06, "loss": 0.0004, "step": 223970 }, { "epoch": 1.4735235488773248, "grad_norm": 0.007011275631713221, "learning_rate": 1.9664302236393536e-06, "loss": 0.0006, "step": 223980 }, { "epoch": 1.4735893370525581, "grad_norm": 0.020263587660326557, "learning_rate": 1.9659738712098144e-06, "loss": 0.0005, "step": 223990 }, { "epoch": 1.4736551252277916, "grad_norm": 0.053605570577815884, "learning_rate": 1.9655175587812892e-06, "loss": 0.0007, "step": 224000 }, { "epoch": 1.473720913403025, "grad_norm": 0.005040471924213994, "learning_rate": 1.9650612863597946e-06, "loss": 0.0006, "step": 224010 }, { "epoch": 1.4737867015782582, "grad_norm": 0.020705395158484553, "learning_rate": 1.964605053951346e-06, "loss": 0.0006, "step": 224020 }, { "epoch": 1.4738524897534917, "grad_norm": 0.0024482905632245205, "learning_rate": 1.964148861561958e-06, "loss": 0.0004, "step": 224030 }, { "epoch": 1.4739182779287252, "grad_norm": 0.03794849887425391, "learning_rate": 1.9636927091976455e-06, "loss": 0.0005, "step": 224040 }, { "epoch": 1.4739840661039585, "grad_norm": 0.01676478396334897, "learning_rate": 1.963236596864422e-06, "loss": 0.0004, "step": 224050 }, { "epoch": 1.4740498542791918, "grad_norm": 0.0015861034511488808, "learning_rate": 1.9627805245683012e-06, "loss": 0.0014, "step": 224060 }, { "epoch": 1.4741156424544253, "grad_norm": 0.021576122214979176, "learning_rate": 1.9623244923152963e-06, "loss": 0.0005, "step": 224070 }, { "epoch": 1.4741814306296586, "grad_norm": 0.10219418681351453, "learning_rate": 1.961868500111419e-06, "loss": 0.0009, "step": 224080 }, { "epoch": 1.474247218804892, "grad_norm": 0.019698263946944163, "learning_rate": 1.9614125479626808e-06, "loss": 0.0008, "step": 224090 }, { "epoch": 1.4743130069801254, "grad_norm": 0.015525768754605545, "learning_rate": 1.9609566358750946e-06, "loss": 0.0006, "step": 224100 }, { "epoch": 1.4743787951553589, "grad_norm": 0.01800727805041582, "learning_rate": 1.9605007638546715e-06, "loss": 0.0009, "step": 224110 }, { "epoch": 1.4744445833305921, "grad_norm": 0.035716945390273716, "learning_rate": 1.96004493190742e-06, "loss": 0.0015, "step": 224120 }, { "epoch": 1.4745103715058256, "grad_norm": 0.0009984141768346016, "learning_rate": 1.959589140039351e-06, "loss": 0.0013, "step": 224130 }, { "epoch": 1.474576159681059, "grad_norm": 0.014231196833104743, "learning_rate": 1.9591333882564727e-06, "loss": 0.0006, "step": 224140 }, { "epoch": 1.4746419478562922, "grad_norm": 0.02008953444880013, "learning_rate": 1.9586776765647947e-06, "loss": 0.0002, "step": 224150 }, { "epoch": 1.4747077360315257, "grad_norm": 0.003753036103322607, "learning_rate": 1.958222004970325e-06, "loss": 0.0007, "step": 224160 }, { "epoch": 1.474773524206759, "grad_norm": 0.021386756968032165, "learning_rate": 1.9577663734790713e-06, "loss": 0.0005, "step": 224170 }, { "epoch": 1.4748393123819925, "grad_norm": 0.007656037931262583, "learning_rate": 1.95731078209704e-06, "loss": 0.0007, "step": 224180 }, { "epoch": 1.4749051005572258, "grad_norm": 0.06917757900008142, "learning_rate": 1.9568552308302387e-06, "loss": 0.0005, "step": 224190 }, { "epoch": 1.4749708887324593, "grad_norm": 0.09023796286758032, "learning_rate": 1.9563997196846728e-06, "loss": 0.0007, "step": 224200 }, { "epoch": 1.4750366769076926, "grad_norm": 0.010318571771521089, "learning_rate": 1.955944248666348e-06, "loss": 0.0005, "step": 224210 }, { "epoch": 1.475102465082926, "grad_norm": 0.03912123861129634, "learning_rate": 1.9554888177812673e-06, "loss": 0.0005, "step": 224220 }, { "epoch": 1.4751682532581594, "grad_norm": 0.024249509277852455, "learning_rate": 1.955033427035439e-06, "loss": 0.0004, "step": 224230 }, { "epoch": 1.4752340414333927, "grad_norm": 0.013059558354192133, "learning_rate": 1.9545780764348654e-06, "loss": 0.0008, "step": 224240 }, { "epoch": 1.4752998296086262, "grad_norm": 0.009073679095526177, "learning_rate": 1.9541227659855495e-06, "loss": 0.0005, "step": 224250 }, { "epoch": 1.4753656177838597, "grad_norm": 0.040793758929484183, "learning_rate": 1.9536674956934942e-06, "loss": 0.0006, "step": 224260 }, { "epoch": 1.475431405959093, "grad_norm": 0.0012673435409268033, "learning_rate": 1.9532122655647017e-06, "loss": 0.0002, "step": 224270 }, { "epoch": 1.4754971941343262, "grad_norm": 0.00507089051828286, "learning_rate": 1.9527570756051745e-06, "loss": 0.0006, "step": 224280 }, { "epoch": 1.4755629823095597, "grad_norm": 0.03517058906358468, "learning_rate": 1.9523019258209134e-06, "loss": 0.0011, "step": 224290 }, { "epoch": 1.475628770484793, "grad_norm": 0.014851520666136552, "learning_rate": 1.9518468162179192e-06, "loss": 0.0004, "step": 224300 }, { "epoch": 1.4756945586600265, "grad_norm": 0.015117406657429654, "learning_rate": 1.9513917468021927e-06, "loss": 0.0007, "step": 224310 }, { "epoch": 1.4757603468352598, "grad_norm": 0.033405063160944645, "learning_rate": 1.950936717579733e-06, "loss": 0.0005, "step": 224320 }, { "epoch": 1.475826135010493, "grad_norm": 0.04960727295885134, "learning_rate": 1.9504817285565388e-06, "loss": 0.0004, "step": 224330 }, { "epoch": 1.4758919231857266, "grad_norm": 0.06483157297869728, "learning_rate": 1.950026779738609e-06, "loss": 0.0006, "step": 224340 }, { "epoch": 1.47595771136096, "grad_norm": 0.04359775782180908, "learning_rate": 1.949571871131941e-06, "loss": 0.0003, "step": 224350 }, { "epoch": 1.4760234995361934, "grad_norm": 0.012493703286164786, "learning_rate": 1.949117002742535e-06, "loss": 0.0002, "step": 224360 }, { "epoch": 1.4760892877114267, "grad_norm": 0.0032414193372092897, "learning_rate": 1.9486621745763863e-06, "loss": 0.0006, "step": 224370 }, { "epoch": 1.4761550758866602, "grad_norm": 0.07075218090584699, "learning_rate": 1.9482073866394914e-06, "loss": 0.0004, "step": 224380 }, { "epoch": 1.4762208640618935, "grad_norm": 0.0011617906863972962, "learning_rate": 1.947752638937846e-06, "loss": 0.0004, "step": 224390 }, { "epoch": 1.476286652237127, "grad_norm": 0.023103141018906072, "learning_rate": 1.9472979314774464e-06, "loss": 0.0003, "step": 224400 }, { "epoch": 1.4763524404123602, "grad_norm": 0.0005168445808178061, "learning_rate": 1.946843264264287e-06, "loss": 0.0007, "step": 224410 }, { "epoch": 1.4764182285875935, "grad_norm": 0.006843923933357792, "learning_rate": 1.946388637304362e-06, "loss": 0.0005, "step": 224420 }, { "epoch": 1.476484016762827, "grad_norm": 0.11888219625461753, "learning_rate": 1.9459340506036663e-06, "loss": 0.0006, "step": 224430 }, { "epoch": 1.4765498049380605, "grad_norm": 0.00338382922017949, "learning_rate": 1.9454795041681924e-06, "loss": 0.0006, "step": 224440 }, { "epoch": 1.4766155931132938, "grad_norm": 0.00019596708464992, "learning_rate": 1.9450249980039326e-06, "loss": 0.0002, "step": 224450 }, { "epoch": 1.476681381288527, "grad_norm": 0.014629988576807501, "learning_rate": 1.94457053211688e-06, "loss": 0.0005, "step": 224460 }, { "epoch": 1.4767471694637606, "grad_norm": 0.031753672970912535, "learning_rate": 1.9441161065130258e-06, "loss": 0.0007, "step": 224470 }, { "epoch": 1.476812957638994, "grad_norm": 0.040418958193718185, "learning_rate": 1.9436617211983604e-06, "loss": 0.0007, "step": 224480 }, { "epoch": 1.4768787458142274, "grad_norm": 0.0001905646313572649, "learning_rate": 1.943207376178877e-06, "loss": 0.0005, "step": 224490 }, { "epoch": 1.4769445339894607, "grad_norm": 0.02851620366154126, "learning_rate": 1.942753071460564e-06, "loss": 0.0005, "step": 224500 }, { "epoch": 1.4770103221646942, "grad_norm": 0.00039711654296308317, "learning_rate": 1.942298807049412e-06, "loss": 0.0006, "step": 224510 }, { "epoch": 1.4770761103399275, "grad_norm": 0.014246976132888365, "learning_rate": 1.941844582951409e-06, "loss": 0.0005, "step": 224520 }, { "epoch": 1.477141898515161, "grad_norm": 0.0020830406351557374, "learning_rate": 1.941390399172544e-06, "loss": 0.0004, "step": 224530 }, { "epoch": 1.4772076866903943, "grad_norm": 0.012696643717127927, "learning_rate": 1.940936255718805e-06, "loss": 0.0009, "step": 224540 }, { "epoch": 1.4772734748656275, "grad_norm": 0.034983750631777735, "learning_rate": 1.94048215259618e-06, "loss": 0.0009, "step": 224550 }, { "epoch": 1.477339263040861, "grad_norm": 0.1956224512019361, "learning_rate": 1.9400280898106544e-06, "loss": 0.0021, "step": 224560 }, { "epoch": 1.4774050512160946, "grad_norm": 0.04435944029897945, "learning_rate": 1.9395740673682163e-06, "loss": 0.0003, "step": 224570 }, { "epoch": 1.4774708393913278, "grad_norm": 0.0011091824233931881, "learning_rate": 1.939120085274851e-06, "loss": 0.0011, "step": 224580 }, { "epoch": 1.4775366275665611, "grad_norm": 0.11250769736472385, "learning_rate": 1.938666143536544e-06, "loss": 0.0012, "step": 224590 }, { "epoch": 1.4776024157417946, "grad_norm": 0.10535409694970428, "learning_rate": 1.9382122421592795e-06, "loss": 0.0012, "step": 224600 }, { "epoch": 1.477668203917028, "grad_norm": 0.010097202896825318, "learning_rate": 1.937758381149041e-06, "loss": 0.0004, "step": 224610 }, { "epoch": 1.4777339920922614, "grad_norm": 0.00345798075032642, "learning_rate": 1.9373045605118147e-06, "loss": 0.0005, "step": 224620 }, { "epoch": 1.4777997802674947, "grad_norm": 0.0540888662533886, "learning_rate": 1.936850780253583e-06, "loss": 0.0005, "step": 224630 }, { "epoch": 1.477865568442728, "grad_norm": 0.04698795945445582, "learning_rate": 1.936397040380328e-06, "loss": 0.001, "step": 224640 }, { "epoch": 1.4779313566179615, "grad_norm": 0.0038272749714477325, "learning_rate": 1.935943340898032e-06, "loss": 0.0003, "step": 224650 }, { "epoch": 1.477997144793195, "grad_norm": 0.012390069711450295, "learning_rate": 1.9354896818126774e-06, "loss": 0.0011, "step": 224660 }, { "epoch": 1.4780629329684283, "grad_norm": 0.00021359618133238184, "learning_rate": 1.9350360631302446e-06, "loss": 0.0003, "step": 224670 }, { "epoch": 1.4781287211436616, "grad_norm": 0.07409997271285677, "learning_rate": 1.9345824848567133e-06, "loss": 0.001, "step": 224680 }, { "epoch": 1.478194509318895, "grad_norm": 0.015168939563463124, "learning_rate": 1.9341289469980657e-06, "loss": 0.0002, "step": 224690 }, { "epoch": 1.4782602974941284, "grad_norm": 0.07592476507625578, "learning_rate": 1.9336754495602793e-06, "loss": 0.0008, "step": 224700 }, { "epoch": 1.4783260856693619, "grad_norm": 0.016029513310650802, "learning_rate": 1.933221992549334e-06, "loss": 0.0008, "step": 224710 }, { "epoch": 1.4783918738445951, "grad_norm": 0.03100772581338683, "learning_rate": 1.9327685759712084e-06, "loss": 0.0003, "step": 224720 }, { "epoch": 1.4784576620198284, "grad_norm": 0.027227285948485235, "learning_rate": 1.9323151998318794e-06, "loss": 0.0008, "step": 224730 }, { "epoch": 1.478523450195062, "grad_norm": 0.019813217972312742, "learning_rate": 1.9318618641373238e-06, "loss": 0.0003, "step": 224740 }, { "epoch": 1.4785892383702954, "grad_norm": 0.024087640621270667, "learning_rate": 1.9314085688935215e-06, "loss": 0.0002, "step": 224750 }, { "epoch": 1.4786550265455287, "grad_norm": 0.03462617027951306, "learning_rate": 1.9309553141064468e-06, "loss": 0.0006, "step": 224760 }, { "epoch": 1.478720814720762, "grad_norm": 0.0674360590146253, "learning_rate": 1.930502099782076e-06, "loss": 0.0005, "step": 224770 }, { "epoch": 1.4787866028959955, "grad_norm": 0.03615853416637487, "learning_rate": 1.930048925926384e-06, "loss": 0.0005, "step": 224780 }, { "epoch": 1.4788523910712288, "grad_norm": 0.04978307317924642, "learning_rate": 1.9295957925453453e-06, "loss": 0.0004, "step": 224790 }, { "epoch": 1.4789181792464623, "grad_norm": 0.008068216350460379, "learning_rate": 1.929142699644934e-06, "loss": 0.0005, "step": 224800 }, { "epoch": 1.4789839674216956, "grad_norm": 0.0060950359375773, "learning_rate": 1.9286896472311245e-06, "loss": 0.0009, "step": 224810 }, { "epoch": 1.479049755596929, "grad_norm": 0.004855401291737349, "learning_rate": 1.928236635309889e-06, "loss": 0.0004, "step": 224820 }, { "epoch": 1.4791155437721624, "grad_norm": 0.01204792014343731, "learning_rate": 1.927783663887201e-06, "loss": 0.0004, "step": 224830 }, { "epoch": 1.4791813319473959, "grad_norm": 0.031976144346255085, "learning_rate": 1.927330732969032e-06, "loss": 0.0002, "step": 224840 }, { "epoch": 1.4792471201226292, "grad_norm": 0.005833180380851247, "learning_rate": 1.9268778425613533e-06, "loss": 0.0005, "step": 224850 }, { "epoch": 1.4793129082978624, "grad_norm": 0.04550390001291188, "learning_rate": 1.926424992670136e-06, "loss": 0.0007, "step": 224860 }, { "epoch": 1.479378696473096, "grad_norm": 0.02058636125610578, "learning_rate": 1.9259721833013496e-06, "loss": 0.0007, "step": 224870 }, { "epoch": 1.4794444846483292, "grad_norm": 0.04134183546522081, "learning_rate": 1.9255194144609663e-06, "loss": 0.0003, "step": 224880 }, { "epoch": 1.4795102728235627, "grad_norm": 0.02635657233474161, "learning_rate": 1.9250666861549546e-06, "loss": 0.0017, "step": 224890 }, { "epoch": 1.479576060998796, "grad_norm": 0.029963487928244645, "learning_rate": 1.9246139983892827e-06, "loss": 0.0006, "step": 224900 }, { "epoch": 1.4796418491740295, "grad_norm": 0.055265537810964695, "learning_rate": 1.924161351169919e-06, "loss": 0.0005, "step": 224910 }, { "epoch": 1.4797076373492628, "grad_norm": 0.0415471563082368, "learning_rate": 1.9237087445028312e-06, "loss": 0.0005, "step": 224920 }, { "epoch": 1.4797734255244963, "grad_norm": 0.06365149399715925, "learning_rate": 1.9232561783939876e-06, "loss": 0.0007, "step": 224930 }, { "epoch": 1.4798392136997296, "grad_norm": 0.004941980044314705, "learning_rate": 1.922803652849353e-06, "loss": 0.0003, "step": 224940 }, { "epoch": 1.4799050018749629, "grad_norm": 0.039081001457650116, "learning_rate": 1.9223511678748953e-06, "loss": 0.0006, "step": 224950 }, { "epoch": 1.4799707900501964, "grad_norm": 0.020285874126339226, "learning_rate": 1.921898723476579e-06, "loss": 0.0002, "step": 224960 }, { "epoch": 1.4800365782254299, "grad_norm": 0.002214939003667222, "learning_rate": 1.92144631966037e-06, "loss": 0.0002, "step": 224970 }, { "epoch": 1.4801023664006632, "grad_norm": 0.021827167144270108, "learning_rate": 1.920993956432232e-06, "loss": 0.0006, "step": 224980 }, { "epoch": 1.4801681545758965, "grad_norm": 0.0006518382260186366, "learning_rate": 1.92054163379813e-06, "loss": 0.0002, "step": 224990 }, { "epoch": 1.48023394275113, "grad_norm": 0.034118279091321084, "learning_rate": 1.920089351764025e-06, "loss": 0.0007, "step": 225000 }, { "epoch": 1.48023394275113, "eval_loss": 0.0003926597419194877, "eval_runtime": 13.1199, "eval_samples_per_second": 15.244, "eval_steps_per_second": 7.622, "step": 225000 }, { "epoch": 1.4802997309263632, "grad_norm": 0.07347021704738875, "learning_rate": 1.919637110335884e-06, "loss": 0.0007, "step": 225010 }, { "epoch": 1.4803655191015967, "grad_norm": 0.001291977647485884, "learning_rate": 1.919184909519667e-06, "loss": 0.0004, "step": 225020 }, { "epoch": 1.48043130727683, "grad_norm": 0.022315153076812866, "learning_rate": 1.918732749321336e-06, "loss": 0.0004, "step": 225030 }, { "epoch": 1.4804970954520633, "grad_norm": 0.03284142418409872, "learning_rate": 1.9182806297468525e-06, "loss": 0.0004, "step": 225040 }, { "epoch": 1.4805628836272968, "grad_norm": 0.0026909493037928062, "learning_rate": 1.9178285508021776e-06, "loss": 0.0006, "step": 225050 }, { "epoch": 1.4806286718025303, "grad_norm": 0.03231966873873158, "learning_rate": 1.9173765124932713e-06, "loss": 0.0008, "step": 225060 }, { "epoch": 1.4806944599777636, "grad_norm": 0.00024306846843574463, "learning_rate": 1.9169245148260935e-06, "loss": 0.0012, "step": 225070 }, { "epoch": 1.480760248152997, "grad_norm": 0.00954519672051498, "learning_rate": 1.916472557806603e-06, "loss": 0.0017, "step": 225080 }, { "epoch": 1.4808260363282304, "grad_norm": 0.010875017967143203, "learning_rate": 1.916020641440759e-06, "loss": 0.0006, "step": 225090 }, { "epoch": 1.4808918245034637, "grad_norm": 0.01424230108417119, "learning_rate": 1.915568765734519e-06, "loss": 0.0006, "step": 225100 }, { "epoch": 1.4809576126786972, "grad_norm": 0.030648364548617385, "learning_rate": 1.915116930693841e-06, "loss": 0.0007, "step": 225110 }, { "epoch": 1.4810234008539305, "grad_norm": 0.006078026942946038, "learning_rate": 1.914665136324682e-06, "loss": 0.0003, "step": 225120 }, { "epoch": 1.481089189029164, "grad_norm": 0.004760040886883626, "learning_rate": 1.9142133826329973e-06, "loss": 0.0002, "step": 225130 }, { "epoch": 1.4811549772043973, "grad_norm": 0.03868091008495257, "learning_rate": 1.9137616696247452e-06, "loss": 0.0006, "step": 225140 }, { "epoch": 1.4812207653796308, "grad_norm": 0.02644572715067572, "learning_rate": 1.9133099973058807e-06, "loss": 0.0004, "step": 225150 }, { "epoch": 1.481286553554864, "grad_norm": 0.05350202723383092, "learning_rate": 1.9128583656823574e-06, "loss": 0.0004, "step": 225160 }, { "epoch": 1.4813523417300973, "grad_norm": 0.01141000517015697, "learning_rate": 1.9124067747601307e-06, "loss": 0.0011, "step": 225170 }, { "epoch": 1.4814181299053308, "grad_norm": 0.008677635983226205, "learning_rate": 1.9119552245451532e-06, "loss": 0.001, "step": 225180 }, { "epoch": 1.4814839180805641, "grad_norm": 0.01359576730146976, "learning_rate": 1.9115037150433802e-06, "loss": 0.0004, "step": 225190 }, { "epoch": 1.4815497062557976, "grad_norm": 0.06649388511385028, "learning_rate": 1.911052246260763e-06, "loss": 0.0006, "step": 225200 }, { "epoch": 1.481615494431031, "grad_norm": 0.02124786664677474, "learning_rate": 1.9106008182032538e-06, "loss": 0.0007, "step": 225210 }, { "epoch": 1.4816812826062644, "grad_norm": 0.049544284289520596, "learning_rate": 1.910149430876804e-06, "loss": 0.0005, "step": 225220 }, { "epoch": 1.4817470707814977, "grad_norm": 0.024927466298184386, "learning_rate": 1.909698084287367e-06, "loss": 0.0004, "step": 225230 }, { "epoch": 1.4818128589567312, "grad_norm": 0.03540475855206238, "learning_rate": 1.9092467784408907e-06, "loss": 0.0009, "step": 225240 }, { "epoch": 1.4818786471319645, "grad_norm": 0.024553117798381714, "learning_rate": 1.9087955133433266e-06, "loss": 0.0006, "step": 225250 }, { "epoch": 1.4819444353071978, "grad_norm": 0.03436666597497617, "learning_rate": 1.908344289000622e-06, "loss": 0.0007, "step": 225260 }, { "epoch": 1.4820102234824313, "grad_norm": 0.014652331514196967, "learning_rate": 1.90789310541873e-06, "loss": 0.0004, "step": 225270 }, { "epoch": 1.4820760116576648, "grad_norm": 0.11621425851202967, "learning_rate": 1.9074419626035962e-06, "loss": 0.0005, "step": 225280 }, { "epoch": 1.482141799832898, "grad_norm": 0.010181255484732423, "learning_rate": 1.9069908605611697e-06, "loss": 0.0003, "step": 225290 }, { "epoch": 1.4822075880081313, "grad_norm": 0.03914422620308275, "learning_rate": 1.9065397992973972e-06, "loss": 0.0005, "step": 225300 }, { "epoch": 1.4822733761833649, "grad_norm": 0.0652233355316798, "learning_rate": 1.9060887788182258e-06, "loss": 0.0007, "step": 225310 }, { "epoch": 1.4823391643585981, "grad_norm": 0.01075731487192244, "learning_rate": 1.9056377991296022e-06, "loss": 0.0004, "step": 225320 }, { "epoch": 1.4824049525338316, "grad_norm": 0.0032937708457614293, "learning_rate": 1.9051868602374712e-06, "loss": 0.0002, "step": 225330 }, { "epoch": 1.482470740709065, "grad_norm": 0.0048966970029113545, "learning_rate": 1.904735962147779e-06, "loss": 0.0005, "step": 225340 }, { "epoch": 1.4825365288842982, "grad_norm": 0.052204826904490964, "learning_rate": 1.9042851048664695e-06, "loss": 0.0005, "step": 225350 }, { "epoch": 1.4826023170595317, "grad_norm": 0.07126590843457568, "learning_rate": 1.9038342883994876e-06, "loss": 0.0002, "step": 225360 }, { "epoch": 1.4826681052347652, "grad_norm": 0.0729696116510304, "learning_rate": 1.9033835127527761e-06, "loss": 0.0006, "step": 225370 }, { "epoch": 1.4827338934099985, "grad_norm": 0.0011399980272661944, "learning_rate": 1.9029327779322792e-06, "loss": 0.0005, "step": 225380 }, { "epoch": 1.4827996815852318, "grad_norm": 0.04102430611481149, "learning_rate": 1.9024820839439368e-06, "loss": 0.0004, "step": 225390 }, { "epoch": 1.4828654697604653, "grad_norm": 0.008851292637878535, "learning_rate": 1.9020314307936943e-06, "loss": 0.0005, "step": 225400 }, { "epoch": 1.4829312579356986, "grad_norm": 0.022113431922107962, "learning_rate": 1.9015808184874923e-06, "loss": 0.0011, "step": 225410 }, { "epoch": 1.482997046110932, "grad_norm": 0.010019431932094557, "learning_rate": 1.901130247031271e-06, "loss": 0.0003, "step": 225420 }, { "epoch": 1.4830628342861654, "grad_norm": 0.032854122410662025, "learning_rate": 1.900679716430971e-06, "loss": 0.0008, "step": 225430 }, { "epoch": 1.4831286224613989, "grad_norm": 0.002871386576152953, "learning_rate": 1.9002292266925326e-06, "loss": 0.0007, "step": 225440 }, { "epoch": 1.4831944106366322, "grad_norm": 0.034442351339567655, "learning_rate": 1.899778777821894e-06, "loss": 0.0005, "step": 225450 }, { "epoch": 1.4832601988118657, "grad_norm": 0.03798918056598853, "learning_rate": 1.8993283698249953e-06, "loss": 0.0003, "step": 225460 }, { "epoch": 1.483325986987099, "grad_norm": 0.02652926694359735, "learning_rate": 1.8988780027077741e-06, "loss": 0.0005, "step": 225470 }, { "epoch": 1.4833917751623322, "grad_norm": 0.001717035176549914, "learning_rate": 1.8984276764761678e-06, "loss": 0.0006, "step": 225480 }, { "epoch": 1.4834575633375657, "grad_norm": 0.003800133805879696, "learning_rate": 1.8979773911361143e-06, "loss": 0.0005, "step": 225490 }, { "epoch": 1.483523351512799, "grad_norm": 0.004124473071483967, "learning_rate": 1.8975271466935496e-06, "loss": 0.0005, "step": 225500 }, { "epoch": 1.4835891396880325, "grad_norm": 0.11906162497278167, "learning_rate": 1.8970769431544096e-06, "loss": 0.0014, "step": 225510 }, { "epoch": 1.4836549278632658, "grad_norm": 0.03403631389167562, "learning_rate": 1.896626780524629e-06, "loss": 0.0007, "step": 225520 }, { "epoch": 1.4837207160384993, "grad_norm": 0.022448885169204024, "learning_rate": 1.8961766588101455e-06, "loss": 0.0013, "step": 225530 }, { "epoch": 1.4837865042137326, "grad_norm": 0.035563058566779916, "learning_rate": 1.8957265780168926e-06, "loss": 0.001, "step": 225540 }, { "epoch": 1.483852292388966, "grad_norm": 0.013570685920492515, "learning_rate": 1.895276538150803e-06, "loss": 0.0014, "step": 225550 }, { "epoch": 1.4839180805641994, "grad_norm": 0.008780877043033519, "learning_rate": 1.8948265392178116e-06, "loss": 0.0002, "step": 225560 }, { "epoch": 1.4839838687394327, "grad_norm": 0.04700562139275436, "learning_rate": 1.8943765812238501e-06, "loss": 0.0002, "step": 225570 }, { "epoch": 1.4840496569146662, "grad_norm": 0.022825410213391517, "learning_rate": 1.893926664174851e-06, "loss": 0.0003, "step": 225580 }, { "epoch": 1.4841154450898997, "grad_norm": 0.016188218145536837, "learning_rate": 1.893476788076747e-06, "loss": 0.002, "step": 225590 }, { "epoch": 1.484181233265133, "grad_norm": 0.0045831735874563365, "learning_rate": 1.8930269529354677e-06, "loss": 0.0004, "step": 225600 }, { "epoch": 1.4842470214403662, "grad_norm": 0.0013603341540055385, "learning_rate": 1.8925771587569453e-06, "loss": 0.0004, "step": 225610 }, { "epoch": 1.4843128096155997, "grad_norm": 0.05899485288079782, "learning_rate": 1.892127405547109e-06, "loss": 0.0006, "step": 225620 }, { "epoch": 1.484378597790833, "grad_norm": 0.007768312807314445, "learning_rate": 1.8916776933118887e-06, "loss": 0.0005, "step": 225630 }, { "epoch": 1.4844443859660665, "grad_norm": 0.027133276616058633, "learning_rate": 1.8912280220572133e-06, "loss": 0.0005, "step": 225640 }, { "epoch": 1.4845101741412998, "grad_norm": 0.08535105787776225, "learning_rate": 1.89077839178901e-06, "loss": 0.0005, "step": 225650 }, { "epoch": 1.484575962316533, "grad_norm": 0.05895698756903153, "learning_rate": 1.8903288025132099e-06, "loss": 0.0007, "step": 225660 }, { "epoch": 1.4846417504917666, "grad_norm": 0.04038834492705479, "learning_rate": 1.8898792542357392e-06, "loss": 0.0006, "step": 225670 }, { "epoch": 1.4847075386670001, "grad_norm": 0.007412613248731309, "learning_rate": 1.8894297469625244e-06, "loss": 0.0005, "step": 225680 }, { "epoch": 1.4847733268422334, "grad_norm": 0.013078925387072234, "learning_rate": 1.8889802806994917e-06, "loss": 0.0006, "step": 225690 }, { "epoch": 1.4848391150174667, "grad_norm": 0.03952202675783415, "learning_rate": 1.8885308554525672e-06, "loss": 0.0005, "step": 225700 }, { "epoch": 1.4849049031927002, "grad_norm": 0.020051282969655505, "learning_rate": 1.8880814712276763e-06, "loss": 0.0005, "step": 225710 }, { "epoch": 1.4849706913679335, "grad_norm": 0.011288320723171244, "learning_rate": 1.8876321280307435e-06, "loss": 0.0008, "step": 225720 }, { "epoch": 1.485036479543167, "grad_norm": 0.0354196619887168, "learning_rate": 1.8871828258676927e-06, "loss": 0.0006, "step": 225730 }, { "epoch": 1.4851022677184003, "grad_norm": 0.03324048773151182, "learning_rate": 1.8867335647444484e-06, "loss": 0.001, "step": 225740 }, { "epoch": 1.4851680558936335, "grad_norm": 0.029935273693236942, "learning_rate": 1.8862843446669332e-06, "loss": 0.0007, "step": 225750 }, { "epoch": 1.485233844068867, "grad_norm": 0.00104877866365967, "learning_rate": 1.8858351656410695e-06, "loss": 0.0006, "step": 225760 }, { "epoch": 1.4852996322441006, "grad_norm": 0.06956329113500914, "learning_rate": 1.8853860276727799e-06, "loss": 0.0006, "step": 225770 }, { "epoch": 1.4853654204193338, "grad_norm": 0.03662242838841083, "learning_rate": 1.8849369307679839e-06, "loss": 0.0005, "step": 225780 }, { "epoch": 1.4854312085945671, "grad_norm": 0.006543593733682357, "learning_rate": 1.884487874932605e-06, "loss": 0.0005, "step": 225790 }, { "epoch": 1.4854969967698006, "grad_norm": 0.05568427218178028, "learning_rate": 1.8840388601725635e-06, "loss": 0.0007, "step": 225800 }, { "epoch": 1.485562784945034, "grad_norm": 0.005346368660096393, "learning_rate": 1.8835898864937802e-06, "loss": 0.0005, "step": 225810 }, { "epoch": 1.4856285731202674, "grad_norm": 0.0730451464153403, "learning_rate": 1.883140953902171e-06, "loss": 0.0004, "step": 225820 }, { "epoch": 1.4856943612955007, "grad_norm": 0.09598222199003577, "learning_rate": 1.8826920624036565e-06, "loss": 0.0007, "step": 225830 }, { "epoch": 1.4857601494707342, "grad_norm": 0.057536327374481305, "learning_rate": 1.8822432120041545e-06, "loss": 0.0007, "step": 225840 }, { "epoch": 1.4858259376459675, "grad_norm": 0.02238318857411624, "learning_rate": 1.881794402709583e-06, "loss": 0.0005, "step": 225850 }, { "epoch": 1.485891725821201, "grad_norm": 0.09215623784374175, "learning_rate": 1.8813456345258597e-06, "loss": 0.0016, "step": 225860 }, { "epoch": 1.4859575139964343, "grad_norm": 0.007624411193584674, "learning_rate": 1.8808969074589006e-06, "loss": 0.0006, "step": 225870 }, { "epoch": 1.4860233021716676, "grad_norm": 0.02820358292820167, "learning_rate": 1.880448221514622e-06, "loss": 0.0002, "step": 225880 }, { "epoch": 1.486089090346901, "grad_norm": 0.032905036155332454, "learning_rate": 1.8799995766989377e-06, "loss": 0.0009, "step": 225890 }, { "epoch": 1.4861548785221346, "grad_norm": 0.0563745201244817, "learning_rate": 1.8795509730177658e-06, "loss": 0.0017, "step": 225900 }, { "epoch": 1.4862206666973679, "grad_norm": 0.11189837730623887, "learning_rate": 1.8791024104770195e-06, "loss": 0.0009, "step": 225910 }, { "epoch": 1.4862864548726011, "grad_norm": 0.045065649491484304, "learning_rate": 1.8786538890826128e-06, "loss": 0.0003, "step": 225920 }, { "epoch": 1.4863522430478346, "grad_norm": 0.013744565180223642, "learning_rate": 1.8782054088404583e-06, "loss": 0.0014, "step": 225930 }, { "epoch": 1.486418031223068, "grad_norm": 0.05623019943212791, "learning_rate": 1.8777569697564696e-06, "loss": 0.0005, "step": 225940 }, { "epoch": 1.4864838193983014, "grad_norm": 0.006905954901984944, "learning_rate": 1.8773085718365586e-06, "loss": 0.0007, "step": 225950 }, { "epoch": 1.4865496075735347, "grad_norm": 0.010314260180587214, "learning_rate": 1.8768602150866373e-06, "loss": 0.0005, "step": 225960 }, { "epoch": 1.486615395748768, "grad_norm": 0.006001850587175351, "learning_rate": 1.8764118995126167e-06, "loss": 0.0004, "step": 225970 }, { "epoch": 1.4866811839240015, "grad_norm": 0.006529648835830871, "learning_rate": 1.8759636251204072e-06, "loss": 0.0007, "step": 225980 }, { "epoch": 1.486746972099235, "grad_norm": 0.0012496594288694384, "learning_rate": 1.8755153919159197e-06, "loss": 0.0002, "step": 225990 }, { "epoch": 1.4868127602744683, "grad_norm": 0.013329072883242128, "learning_rate": 1.875067199905063e-06, "loss": 0.0002, "step": 226000 }, { "epoch": 1.4868785484497016, "grad_norm": 0.0019526767736728727, "learning_rate": 1.8746190490937466e-06, "loss": 0.0003, "step": 226010 }, { "epoch": 1.486944336624935, "grad_norm": 0.01925080862565805, "learning_rate": 1.8741709394878765e-06, "loss": 0.0007, "step": 226020 }, { "epoch": 1.4870101248001684, "grad_norm": 0.023651606887079898, "learning_rate": 1.8737228710933653e-06, "loss": 0.0006, "step": 226030 }, { "epoch": 1.4870759129754019, "grad_norm": 0.013159628452803572, "learning_rate": 1.8732748439161174e-06, "loss": 0.0015, "step": 226040 }, { "epoch": 1.4871417011506352, "grad_norm": 0.0230405580304019, "learning_rate": 1.8728268579620408e-06, "loss": 0.0035, "step": 226050 }, { "epoch": 1.4872074893258684, "grad_norm": 0.012264915740716992, "learning_rate": 1.8723789132370412e-06, "loss": 0.0006, "step": 226060 }, { "epoch": 1.487273277501102, "grad_norm": 0.012752702990490764, "learning_rate": 1.8719310097470244e-06, "loss": 0.0011, "step": 226070 }, { "epoch": 1.4873390656763354, "grad_norm": 0.006897449038070595, "learning_rate": 1.8714831474978957e-06, "loss": 0.0009, "step": 226080 }, { "epoch": 1.4874048538515687, "grad_norm": 0.04098611778809954, "learning_rate": 1.8710353264955594e-06, "loss": 0.0004, "step": 226090 }, { "epoch": 1.487470642026802, "grad_norm": 0.02738795462960243, "learning_rate": 1.87058754674592e-06, "loss": 0.0004, "step": 226100 }, { "epoch": 1.4875364302020355, "grad_norm": 0.04543658693283927, "learning_rate": 1.8701398082548816e-06, "loss": 0.0009, "step": 226110 }, { "epoch": 1.4876022183772688, "grad_norm": 0.007349936733398182, "learning_rate": 1.8696921110283461e-06, "loss": 0.0004, "step": 226120 }, { "epoch": 1.4876680065525023, "grad_norm": 0.03040233113288024, "learning_rate": 1.8692444550722172e-06, "loss": 0.0007, "step": 226130 }, { "epoch": 1.4877337947277356, "grad_norm": 0.008358483999777813, "learning_rate": 1.8687968403923956e-06, "loss": 0.0007, "step": 226140 }, { "epoch": 1.487799582902969, "grad_norm": 0.005491087679416434, "learning_rate": 1.8683492669947823e-06, "loss": 0.0003, "step": 226150 }, { "epoch": 1.4878653710782024, "grad_norm": 0.016637516631001163, "learning_rate": 1.8679017348852808e-06, "loss": 0.001, "step": 226160 }, { "epoch": 1.4879311592534359, "grad_norm": 0.0050568262011374526, "learning_rate": 1.8674542440697895e-06, "loss": 0.0006, "step": 226170 }, { "epoch": 1.4879969474286692, "grad_norm": 0.06303820848404237, "learning_rate": 1.8670067945542087e-06, "loss": 0.0004, "step": 226180 }, { "epoch": 1.4880627356039025, "grad_norm": 0.0011120653501117258, "learning_rate": 1.8665593863444376e-06, "loss": 0.0007, "step": 226190 }, { "epoch": 1.488128523779136, "grad_norm": 0.019267055360731342, "learning_rate": 1.8661120194463744e-06, "loss": 0.0005, "step": 226200 }, { "epoch": 1.4881943119543692, "grad_norm": 0.0325773127142201, "learning_rate": 1.8656646938659185e-06, "loss": 0.0003, "step": 226210 }, { "epoch": 1.4882601001296027, "grad_norm": 0.0069709536933518345, "learning_rate": 1.865217409608966e-06, "loss": 0.0004, "step": 226220 }, { "epoch": 1.488325888304836, "grad_norm": 0.01129231421987848, "learning_rate": 1.8647701666814144e-06, "loss": 0.0006, "step": 226230 }, { "epoch": 1.4883916764800695, "grad_norm": 0.05074366268074158, "learning_rate": 1.8643229650891609e-06, "loss": 0.0007, "step": 226240 }, { "epoch": 1.4884574646553028, "grad_norm": 0.0026968575468957886, "learning_rate": 1.8638758048381005e-06, "loss": 0.0005, "step": 226250 }, { "epoch": 1.4885232528305363, "grad_norm": 0.049688547808224175, "learning_rate": 1.8634286859341293e-06, "loss": 0.0008, "step": 226260 }, { "epoch": 1.4885890410057696, "grad_norm": 0.00364042880442362, "learning_rate": 1.862981608383142e-06, "loss": 0.0008, "step": 226270 }, { "epoch": 1.488654829181003, "grad_norm": 0.04926916802228467, "learning_rate": 1.862534572191031e-06, "loss": 0.0005, "step": 226280 }, { "epoch": 1.4887206173562364, "grad_norm": 0.03419985780218507, "learning_rate": 1.8620875773636938e-06, "loss": 0.0008, "step": 226290 }, { "epoch": 1.48878640553147, "grad_norm": 0.0016462332348110948, "learning_rate": 1.8616406239070217e-06, "loss": 0.0006, "step": 226300 }, { "epoch": 1.4888521937067032, "grad_norm": 0.08838845758658663, "learning_rate": 1.8611937118269075e-06, "loss": 0.0007, "step": 226310 }, { "epoch": 1.4889179818819365, "grad_norm": 0.033062947733555934, "learning_rate": 1.8607468411292435e-06, "loss": 0.0007, "step": 226320 }, { "epoch": 1.48898377005717, "grad_norm": 0.011398413656675495, "learning_rate": 1.8603000118199217e-06, "loss": 0.0004, "step": 226330 }, { "epoch": 1.4890495582324033, "grad_norm": 0.00757455737689507, "learning_rate": 1.8598532239048317e-06, "loss": 0.0005, "step": 226340 }, { "epoch": 1.4891153464076368, "grad_norm": 0.11057482935696475, "learning_rate": 1.8594064773898652e-06, "loss": 0.0006, "step": 226350 }, { "epoch": 1.48918113458287, "grad_norm": 0.04638512168850698, "learning_rate": 1.8589597722809122e-06, "loss": 0.0012, "step": 226360 }, { "epoch": 1.4892469227581033, "grad_norm": 0.0204213575749429, "learning_rate": 1.8585131085838615e-06, "loss": 0.0004, "step": 226370 }, { "epoch": 1.4893127109333368, "grad_norm": 0.017563229636769952, "learning_rate": 1.8580664863046022e-06, "loss": 0.0011, "step": 226380 }, { "epoch": 1.4893784991085703, "grad_norm": 0.02529697660129391, "learning_rate": 1.8576199054490229e-06, "loss": 0.0005, "step": 226390 }, { "epoch": 1.4894442872838036, "grad_norm": 0.007547511232394532, "learning_rate": 1.8571733660230113e-06, "loss": 0.0013, "step": 226400 }, { "epoch": 1.489510075459037, "grad_norm": 0.032848642708337, "learning_rate": 1.856726868032453e-06, "loss": 0.0004, "step": 226410 }, { "epoch": 1.4895758636342704, "grad_norm": 0.017087435484951737, "learning_rate": 1.8562804114832377e-06, "loss": 0.0003, "step": 226420 }, { "epoch": 1.4896416518095037, "grad_norm": 0.06989598057305277, "learning_rate": 1.85583399638125e-06, "loss": 0.0007, "step": 226430 }, { "epoch": 1.4897074399847372, "grad_norm": 0.0015152535020601006, "learning_rate": 1.8553876227323759e-06, "loss": 0.0007, "step": 226440 }, { "epoch": 1.4897732281599705, "grad_norm": 0.003926125916089504, "learning_rate": 1.8549412905424997e-06, "loss": 0.0008, "step": 226450 }, { "epoch": 1.489839016335204, "grad_norm": 0.03053271852938987, "learning_rate": 1.8544949998175066e-06, "loss": 0.0004, "step": 226460 }, { "epoch": 1.4899048045104373, "grad_norm": 0.00045201343833024674, "learning_rate": 1.8540487505632803e-06, "loss": 0.0006, "step": 226470 }, { "epoch": 1.4899705926856708, "grad_norm": 0.0006128083508762127, "learning_rate": 1.853602542785704e-06, "loss": 0.0003, "step": 226480 }, { "epoch": 1.490036380860904, "grad_norm": 0.035765618975483006, "learning_rate": 1.8531563764906612e-06, "loss": 0.0005, "step": 226490 }, { "epoch": 1.4901021690361373, "grad_norm": 0.004300927978152793, "learning_rate": 1.8527102516840334e-06, "loss": 0.0002, "step": 226500 }, { "epoch": 1.4901679572113709, "grad_norm": 0.01642803867589699, "learning_rate": 1.852264168371703e-06, "loss": 0.0005, "step": 226510 }, { "epoch": 1.4902337453866041, "grad_norm": 0.00803557479087661, "learning_rate": 1.851818126559551e-06, "loss": 0.0008, "step": 226520 }, { "epoch": 1.4902995335618376, "grad_norm": 0.05679010654558113, "learning_rate": 1.851372126253458e-06, "loss": 0.0005, "step": 226530 }, { "epoch": 1.490365321737071, "grad_norm": 0.05740400935273973, "learning_rate": 1.850926167459302e-06, "loss": 0.0008, "step": 226540 }, { "epoch": 1.4904311099123044, "grad_norm": 0.04996151174237093, "learning_rate": 1.8504802501829666e-06, "loss": 0.0006, "step": 226550 }, { "epoch": 1.4904968980875377, "grad_norm": 0.0023482931378299766, "learning_rate": 1.8500343744303294e-06, "loss": 0.0003, "step": 226560 }, { "epoch": 1.4905626862627712, "grad_norm": 0.07288946067643774, "learning_rate": 1.8495885402072683e-06, "loss": 0.0005, "step": 226570 }, { "epoch": 1.4906284744380045, "grad_norm": 0.01573344894518753, "learning_rate": 1.8491427475196617e-06, "loss": 0.0004, "step": 226580 }, { "epoch": 1.4906942626132378, "grad_norm": 0.039991232831878644, "learning_rate": 1.8486969963733864e-06, "loss": 0.0004, "step": 226590 }, { "epoch": 1.4907600507884713, "grad_norm": 0.003967136618395269, "learning_rate": 1.8482512867743197e-06, "loss": 0.0003, "step": 226600 }, { "epoch": 1.4908258389637048, "grad_norm": 0.03231147941873967, "learning_rate": 1.8478056187283371e-06, "loss": 0.0037, "step": 226610 }, { "epoch": 1.490891627138938, "grad_norm": 0.00486755343160314, "learning_rate": 1.847359992241316e-06, "loss": 0.0004, "step": 226620 }, { "epoch": 1.4909574153141714, "grad_norm": 0.07123112225983826, "learning_rate": 1.8469144073191304e-06, "loss": 0.0007, "step": 226630 }, { "epoch": 1.4910232034894049, "grad_norm": 0.010749778145254341, "learning_rate": 1.8464688639676547e-06, "loss": 0.0004, "step": 226640 }, { "epoch": 1.4910889916646382, "grad_norm": 0.004061153819383068, "learning_rate": 1.8460233621927638e-06, "loss": 0.0003, "step": 226650 }, { "epoch": 1.4911547798398717, "grad_norm": 0.08017192993498358, "learning_rate": 1.8455779020003306e-06, "loss": 0.0005, "step": 226660 }, { "epoch": 1.491220568015105, "grad_norm": 0.0034670112191811193, "learning_rate": 1.8451324833962269e-06, "loss": 0.0004, "step": 226670 }, { "epoch": 1.4912863561903382, "grad_norm": 0.0008087367454270552, "learning_rate": 1.8446871063863287e-06, "loss": 0.0006, "step": 226680 }, { "epoch": 1.4913521443655717, "grad_norm": 0.05453887545285141, "learning_rate": 1.8442417709765053e-06, "loss": 0.001, "step": 226690 }, { "epoch": 1.4914179325408052, "grad_norm": 0.06731781673833731, "learning_rate": 1.8437964771726292e-06, "loss": 0.0007, "step": 226700 }, { "epoch": 1.4914837207160385, "grad_norm": 0.0014729362843929142, "learning_rate": 1.8433512249805701e-06, "loss": 0.0005, "step": 226710 }, { "epoch": 1.4915495088912718, "grad_norm": 0.010033375576964479, "learning_rate": 1.8429060144061995e-06, "loss": 0.0004, "step": 226720 }, { "epoch": 1.4916152970665053, "grad_norm": 0.00040747201060429357, "learning_rate": 1.842460845455386e-06, "loss": 0.0002, "step": 226730 }, { "epoch": 1.4916810852417386, "grad_norm": 0.0003393796957857974, "learning_rate": 1.8420157181339993e-06, "loss": 0.0004, "step": 226740 }, { "epoch": 1.491746873416972, "grad_norm": 0.002538894352766253, "learning_rate": 1.8415706324479083e-06, "loss": 0.0003, "step": 226750 }, { "epoch": 1.4918126615922054, "grad_norm": 0.008361403320057728, "learning_rate": 1.8411255884029798e-06, "loss": 0.0004, "step": 226760 }, { "epoch": 1.4918784497674387, "grad_norm": 0.06615781542665941, "learning_rate": 1.840680586005083e-06, "loss": 0.0011, "step": 226770 }, { "epoch": 1.4919442379426722, "grad_norm": 0.013833446450129403, "learning_rate": 1.840235625260084e-06, "loss": 0.0003, "step": 226780 }, { "epoch": 1.4920100261179057, "grad_norm": 0.04759539146650266, "learning_rate": 1.8397907061738496e-06, "loss": 0.0006, "step": 226790 }, { "epoch": 1.492075814293139, "grad_norm": 0.008200166877443536, "learning_rate": 1.839345828752243e-06, "loss": 0.0005, "step": 226800 }, { "epoch": 1.4921416024683722, "grad_norm": 0.00901570332253412, "learning_rate": 1.8389009930011342e-06, "loss": 0.0007, "step": 226810 }, { "epoch": 1.4922073906436057, "grad_norm": 0.0013278158519726226, "learning_rate": 1.8384561989263855e-06, "loss": 0.0004, "step": 226820 }, { "epoch": 1.492273178818839, "grad_norm": 0.024071122837685033, "learning_rate": 1.8380114465338616e-06, "loss": 0.0007, "step": 226830 }, { "epoch": 1.4923389669940725, "grad_norm": 0.03171603038496628, "learning_rate": 1.8375667358294258e-06, "loss": 0.0008, "step": 226840 }, { "epoch": 1.4924047551693058, "grad_norm": 0.014348565930567742, "learning_rate": 1.837122066818941e-06, "loss": 0.0006, "step": 226850 }, { "epoch": 1.4924705433445393, "grad_norm": 0.03605251749338102, "learning_rate": 1.8366774395082703e-06, "loss": 0.001, "step": 226860 }, { "epoch": 1.4925363315197726, "grad_norm": 0.0023897514549465603, "learning_rate": 1.8362328539032752e-06, "loss": 0.0003, "step": 226870 }, { "epoch": 1.4926021196950061, "grad_norm": 0.046227233037868735, "learning_rate": 1.8357883100098184e-06, "loss": 0.0009, "step": 226880 }, { "epoch": 1.4926679078702394, "grad_norm": 0.008725527855283726, "learning_rate": 1.8353438078337593e-06, "loss": 0.0006, "step": 226890 }, { "epoch": 1.4927336960454727, "grad_norm": 0.005700920348925029, "learning_rate": 1.834899347380959e-06, "loss": 0.001, "step": 226900 }, { "epoch": 1.4927994842207062, "grad_norm": 0.016176458338974565, "learning_rate": 1.8344549286572772e-06, "loss": 0.0005, "step": 226910 }, { "epoch": 1.4928652723959397, "grad_norm": 0.006381300993325172, "learning_rate": 1.834010551668573e-06, "loss": 0.0004, "step": 226920 }, { "epoch": 1.492931060571173, "grad_norm": 0.02331398338385664, "learning_rate": 1.833566216420704e-06, "loss": 0.0011, "step": 226930 }, { "epoch": 1.4929968487464063, "grad_norm": 0.024109476759211265, "learning_rate": 1.8331219229195313e-06, "loss": 0.0001, "step": 226940 }, { "epoch": 1.4930626369216398, "grad_norm": 0.018220736619300158, "learning_rate": 1.832677671170911e-06, "loss": 0.0004, "step": 226950 }, { "epoch": 1.493128425096873, "grad_norm": 0.03771287508166289, "learning_rate": 1.8322334611807002e-06, "loss": 0.0005, "step": 226960 }, { "epoch": 1.4931942132721066, "grad_norm": 0.00039664905100997224, "learning_rate": 1.8317892929547553e-06, "loss": 0.0013, "step": 226970 }, { "epoch": 1.4932600014473398, "grad_norm": 0.030151969852775094, "learning_rate": 1.8313451664989324e-06, "loss": 0.0007, "step": 226980 }, { "epoch": 1.4933257896225731, "grad_norm": 0.05359459651883793, "learning_rate": 1.8309010818190865e-06, "loss": 0.0005, "step": 226990 }, { "epoch": 1.4933915777978066, "grad_norm": 0.007977789273059515, "learning_rate": 1.8304570389210735e-06, "loss": 0.0003, "step": 227000 }, { "epoch": 1.4934573659730401, "grad_norm": 0.06992993912486455, "learning_rate": 1.830013037810746e-06, "loss": 0.0005, "step": 227010 }, { "epoch": 1.4935231541482734, "grad_norm": 0.013968195196809346, "learning_rate": 1.8295690784939596e-06, "loss": 0.0002, "step": 227020 }, { "epoch": 1.4935889423235067, "grad_norm": 0.043009128544752986, "learning_rate": 1.829125160976567e-06, "loss": 0.0006, "step": 227030 }, { "epoch": 1.4936547304987402, "grad_norm": 0.0434017256016712, "learning_rate": 1.8286812852644197e-06, "loss": 0.0003, "step": 227040 }, { "epoch": 1.4937205186739735, "grad_norm": 0.0035370947470027693, "learning_rate": 1.8282374513633716e-06, "loss": 0.0006, "step": 227050 }, { "epoch": 1.493786306849207, "grad_norm": 0.0447131587604798, "learning_rate": 1.827793659279271e-06, "loss": 0.0009, "step": 227060 }, { "epoch": 1.4938520950244403, "grad_norm": 0.01710414188988833, "learning_rate": 1.8273499090179736e-06, "loss": 0.0005, "step": 227070 }, { "epoch": 1.4939178831996736, "grad_norm": 0.016652914714407097, "learning_rate": 1.8269062005853272e-06, "loss": 0.0006, "step": 227080 }, { "epoch": 1.493983671374907, "grad_norm": 0.002372100710301481, "learning_rate": 1.8264625339871821e-06, "loss": 0.0004, "step": 227090 }, { "epoch": 1.4940494595501406, "grad_norm": 0.03945925271075795, "learning_rate": 1.8260189092293878e-06, "loss": 0.0007, "step": 227100 }, { "epoch": 1.4941152477253739, "grad_norm": 0.012430057902965893, "learning_rate": 1.8255753263177928e-06, "loss": 0.0006, "step": 227110 }, { "epoch": 1.4941810359006071, "grad_norm": 0.09639296410740668, "learning_rate": 1.8251317852582456e-06, "loss": 0.0006, "step": 227120 }, { "epoch": 1.4942468240758406, "grad_norm": 0.0008280170964613784, "learning_rate": 1.824688286056594e-06, "loss": 0.0005, "step": 227130 }, { "epoch": 1.494312612251074, "grad_norm": 0.031998983959381534, "learning_rate": 1.8242448287186842e-06, "loss": 0.0003, "step": 227140 }, { "epoch": 1.4943784004263074, "grad_norm": 0.027919312103906982, "learning_rate": 1.8238014132503645e-06, "loss": 0.001, "step": 227150 }, { "epoch": 1.4944441886015407, "grad_norm": 0.017591569835827967, "learning_rate": 1.8233580396574796e-06, "loss": 0.0004, "step": 227160 }, { "epoch": 1.4945099767767742, "grad_norm": 0.0003269322822250292, "learning_rate": 1.8229147079458754e-06, "loss": 0.0003, "step": 227170 }, { "epoch": 1.4945757649520075, "grad_norm": 0.013383061768586958, "learning_rate": 1.822471418121397e-06, "loss": 0.0005, "step": 227180 }, { "epoch": 1.494641553127241, "grad_norm": 0.07381497788597184, "learning_rate": 1.8220281701898868e-06, "loss": 0.0005, "step": 227190 }, { "epoch": 1.4947073413024743, "grad_norm": 0.0009106799285546723, "learning_rate": 1.8215849641571919e-06, "loss": 0.0006, "step": 227200 }, { "epoch": 1.4947731294777076, "grad_norm": 0.0021282844889338078, "learning_rate": 1.8211418000291547e-06, "loss": 0.0008, "step": 227210 }, { "epoch": 1.494838917652941, "grad_norm": 0.0009230707877039203, "learning_rate": 1.8206986778116176e-06, "loss": 0.0002, "step": 227220 }, { "epoch": 1.4949047058281744, "grad_norm": 0.03648251595990204, "learning_rate": 1.8202555975104224e-06, "loss": 0.0007, "step": 227230 }, { "epoch": 1.4949704940034079, "grad_norm": 0.003733536684689744, "learning_rate": 1.8198125591314109e-06, "loss": 0.0008, "step": 227240 }, { "epoch": 1.4950362821786412, "grad_norm": 0.02513184159324691, "learning_rate": 1.8193695626804241e-06, "loss": 0.0006, "step": 227250 }, { "epoch": 1.4951020703538747, "grad_norm": 0.013400906286729084, "learning_rate": 1.818926608163303e-06, "loss": 0.0002, "step": 227260 }, { "epoch": 1.495167858529108, "grad_norm": 0.0015805292303838626, "learning_rate": 1.8184836955858864e-06, "loss": 0.0005, "step": 227270 }, { "epoch": 1.4952336467043414, "grad_norm": 0.06849172919548459, "learning_rate": 1.818040824954015e-06, "loss": 0.0004, "step": 227280 }, { "epoch": 1.4952994348795747, "grad_norm": 0.0759908101220954, "learning_rate": 1.8175979962735274e-06, "loss": 0.0005, "step": 227290 }, { "epoch": 1.495365223054808, "grad_norm": 0.034378767099972035, "learning_rate": 1.8171552095502614e-06, "loss": 0.0002, "step": 227300 }, { "epoch": 1.4954310112300415, "grad_norm": 0.004250637751051783, "learning_rate": 1.8167124647900552e-06, "loss": 0.0005, "step": 227310 }, { "epoch": 1.495496799405275, "grad_norm": 0.0030161406096773915, "learning_rate": 1.8162697619987446e-06, "loss": 0.0002, "step": 227320 }, { "epoch": 1.4955625875805083, "grad_norm": 0.016006421985558753, "learning_rate": 1.8158271011821688e-06, "loss": 0.0001, "step": 227330 }, { "epoch": 1.4956283757557416, "grad_norm": 0.01669917525096589, "learning_rate": 1.8153844823461625e-06, "loss": 0.0006, "step": 227340 }, { "epoch": 1.495694163930975, "grad_norm": 0.017238517000021276, "learning_rate": 1.8149419054965617e-06, "loss": 0.0011, "step": 227350 }, { "epoch": 1.4957599521062084, "grad_norm": 0.08624266119408033, "learning_rate": 1.8144993706392011e-06, "loss": 0.0008, "step": 227360 }, { "epoch": 1.4958257402814419, "grad_norm": 0.02023373695717321, "learning_rate": 1.814056877779915e-06, "loss": 0.0003, "step": 227370 }, { "epoch": 1.4958915284566752, "grad_norm": 0.020714543264102204, "learning_rate": 1.8136144269245376e-06, "loss": 0.0009, "step": 227380 }, { "epoch": 1.4959573166319085, "grad_norm": 0.0061068309151524345, "learning_rate": 1.8131720180789013e-06, "loss": 0.0006, "step": 227390 }, { "epoch": 1.496023104807142, "grad_norm": 0.13609288453274418, "learning_rate": 1.8127296512488406e-06, "loss": 0.0014, "step": 227400 }, { "epoch": 1.4960888929823755, "grad_norm": 0.03491863527490781, "learning_rate": 1.8122873264401863e-06, "loss": 0.0006, "step": 227410 }, { "epoch": 1.4961546811576087, "grad_norm": 0.021806494018787217, "learning_rate": 1.8118450436587703e-06, "loss": 0.0002, "step": 227420 }, { "epoch": 1.496220469332842, "grad_norm": 0.03340150822026205, "learning_rate": 1.8114028029104242e-06, "loss": 0.0012, "step": 227430 }, { "epoch": 1.4962862575080755, "grad_norm": 0.024593598173255248, "learning_rate": 1.810960604200978e-06, "loss": 0.0005, "step": 227440 }, { "epoch": 1.4963520456833088, "grad_norm": 0.004548200397782443, "learning_rate": 1.8105184475362608e-06, "loss": 0.0008, "step": 227450 }, { "epoch": 1.4964178338585423, "grad_norm": 0.0004512923828313362, "learning_rate": 1.810076332922105e-06, "loss": 0.0008, "step": 227460 }, { "epoch": 1.4964836220337756, "grad_norm": 0.11451699866731065, "learning_rate": 1.8096342603643374e-06, "loss": 0.0005, "step": 227470 }, { "epoch": 1.4965494102090091, "grad_norm": 0.0006867943138445158, "learning_rate": 1.8091922298687865e-06, "loss": 0.0007, "step": 227480 }, { "epoch": 1.4966151983842424, "grad_norm": 0.0049755007007930615, "learning_rate": 1.8087502414412806e-06, "loss": 0.0004, "step": 227490 }, { "epoch": 1.496680986559476, "grad_norm": 0.007325988243746062, "learning_rate": 1.8083082950876463e-06, "loss": 0.0005, "step": 227500 }, { "epoch": 1.4967467747347092, "grad_norm": 0.0004792163317855404, "learning_rate": 1.8078663908137112e-06, "loss": 0.0002, "step": 227510 }, { "epoch": 1.4968125629099425, "grad_norm": 0.004680106997466143, "learning_rate": 1.8074245286253006e-06, "loss": 0.0005, "step": 227520 }, { "epoch": 1.496878351085176, "grad_norm": 0.022458650525142586, "learning_rate": 1.8069827085282398e-06, "loss": 0.0003, "step": 227530 }, { "epoch": 1.4969441392604093, "grad_norm": 0.034500343928781325, "learning_rate": 1.8065409305283548e-06, "loss": 0.0009, "step": 227540 }, { "epoch": 1.4970099274356428, "grad_norm": 0.02067988112013391, "learning_rate": 1.8060991946314694e-06, "loss": 0.0003, "step": 227550 }, { "epoch": 1.497075715610876, "grad_norm": 0.018544211789916866, "learning_rate": 1.805657500843408e-06, "loss": 0.0006, "step": 227560 }, { "epoch": 1.4971415037861096, "grad_norm": 0.04515155708821189, "learning_rate": 1.8052158491699935e-06, "loss": 0.0004, "step": 227570 }, { "epoch": 1.4972072919613428, "grad_norm": 0.01872995641793811, "learning_rate": 1.804774239617047e-06, "loss": 0.0005, "step": 227580 }, { "epoch": 1.4972730801365763, "grad_norm": 0.03251555771465367, "learning_rate": 1.8043326721903941e-06, "loss": 0.0003, "step": 227590 }, { "epoch": 1.4973388683118096, "grad_norm": 0.025331378145260815, "learning_rate": 1.8038911468958554e-06, "loss": 0.0003, "step": 227600 }, { "epoch": 1.497404656487043, "grad_norm": 0.029236311199482497, "learning_rate": 1.8034496637392512e-06, "loss": 0.0004, "step": 227610 }, { "epoch": 1.4974704446622764, "grad_norm": 0.010558517029491525, "learning_rate": 1.8030082227264029e-06, "loss": 0.0006, "step": 227620 }, { "epoch": 1.49753623283751, "grad_norm": 0.03754968324004515, "learning_rate": 1.8025668238631295e-06, "loss": 0.0004, "step": 227630 }, { "epoch": 1.4976020210127432, "grad_norm": 0.017943529831882216, "learning_rate": 1.8021254671552512e-06, "loss": 0.0003, "step": 227640 }, { "epoch": 1.4976678091879765, "grad_norm": 0.03815201812515932, "learning_rate": 1.8016841526085872e-06, "loss": 0.0006, "step": 227650 }, { "epoch": 1.49773359736321, "grad_norm": 0.012649843944302335, "learning_rate": 1.801242880228955e-06, "loss": 0.0004, "step": 227660 }, { "epoch": 1.4977993855384433, "grad_norm": 0.04077314070012419, "learning_rate": 1.800801650022173e-06, "loss": 0.0003, "step": 227670 }, { "epoch": 1.4978651737136768, "grad_norm": 0.027716855178398586, "learning_rate": 1.800360461994058e-06, "loss": 0.001, "step": 227680 }, { "epoch": 1.49793096188891, "grad_norm": 0.007045166115245504, "learning_rate": 1.799919316150427e-06, "loss": 0.0002, "step": 227690 }, { "epoch": 1.4979967500641433, "grad_norm": 0.02695936647531015, "learning_rate": 1.7994782124970962e-06, "loss": 0.001, "step": 227700 }, { "epoch": 1.4980625382393769, "grad_norm": 0.05799945353570744, "learning_rate": 1.7990371510398807e-06, "loss": 0.0003, "step": 227710 }, { "epoch": 1.4981283264146104, "grad_norm": 0.011678074293488789, "learning_rate": 1.7985961317845963e-06, "loss": 0.0006, "step": 227720 }, { "epoch": 1.4981941145898436, "grad_norm": 0.015574420618529665, "learning_rate": 1.7981551547370569e-06, "loss": 0.0007, "step": 227730 }, { "epoch": 1.498259902765077, "grad_norm": 0.0007511243297840131, "learning_rate": 1.7977142199030762e-06, "loss": 0.0002, "step": 227740 }, { "epoch": 1.4983256909403104, "grad_norm": 0.06405167449964502, "learning_rate": 1.797273327288468e-06, "loss": 0.0006, "step": 227750 }, { "epoch": 1.4983914791155437, "grad_norm": 0.03663603950305679, "learning_rate": 1.7968324768990448e-06, "loss": 0.0009, "step": 227760 }, { "epoch": 1.4984572672907772, "grad_norm": 0.06996163440154962, "learning_rate": 1.796391668740619e-06, "loss": 0.0007, "step": 227770 }, { "epoch": 1.4985230554660105, "grad_norm": 0.006075947976315394, "learning_rate": 1.795950902819002e-06, "loss": 0.0009, "step": 227780 }, { "epoch": 1.4985888436412438, "grad_norm": 0.09859450402826662, "learning_rate": 1.7955101791400054e-06, "loss": 0.0011, "step": 227790 }, { "epoch": 1.4986546318164773, "grad_norm": 0.0007715846459185105, "learning_rate": 1.7950694977094396e-06, "loss": 0.0005, "step": 227800 }, { "epoch": 1.4987204199917108, "grad_norm": 0.06490350285262522, "learning_rate": 1.7946288585331128e-06, "loss": 0.0005, "step": 227810 }, { "epoch": 1.498786208166944, "grad_norm": 0.05866308560684122, "learning_rate": 1.7941882616168378e-06, "loss": 0.0005, "step": 227820 }, { "epoch": 1.4988519963421774, "grad_norm": 0.03933030959484832, "learning_rate": 1.7937477069664217e-06, "loss": 0.0007, "step": 227830 }, { "epoch": 1.4989177845174109, "grad_norm": 0.00034903345990356147, "learning_rate": 1.7933071945876735e-06, "loss": 0.0011, "step": 227840 }, { "epoch": 1.4989835726926442, "grad_norm": 0.025057515286816722, "learning_rate": 1.7928667244864e-06, "loss": 0.0004, "step": 227850 }, { "epoch": 1.4990493608678777, "grad_norm": 0.012239165253169064, "learning_rate": 1.7924262966684086e-06, "loss": 0.0002, "step": 227860 }, { "epoch": 1.499115149043111, "grad_norm": 0.09686807703404789, "learning_rate": 1.7919859111395065e-06, "loss": 0.0007, "step": 227870 }, { "epoch": 1.4991809372183444, "grad_norm": 0.02876943857367143, "learning_rate": 1.7915455679055e-06, "loss": 0.0003, "step": 227880 }, { "epoch": 1.4992467253935777, "grad_norm": 0.011183517240307889, "learning_rate": 1.791105266972194e-06, "loss": 0.0006, "step": 227890 }, { "epoch": 1.4993125135688112, "grad_norm": 0.005192527754048089, "learning_rate": 1.7906650083453931e-06, "loss": 0.0003, "step": 227900 }, { "epoch": 1.4993783017440445, "grad_norm": 0.021240515537863897, "learning_rate": 1.790224792030903e-06, "loss": 0.0008, "step": 227910 }, { "epoch": 1.4994440899192778, "grad_norm": 0.003419688982002083, "learning_rate": 1.7897846180345262e-06, "loss": 0.0002, "step": 227920 }, { "epoch": 1.4995098780945113, "grad_norm": 0.010758665568709627, "learning_rate": 1.7893444863620674e-06, "loss": 0.0005, "step": 227930 }, { "epoch": 1.4995756662697448, "grad_norm": 0.021214597869950263, "learning_rate": 1.7889043970193264e-06, "loss": 0.0008, "step": 227940 }, { "epoch": 1.499641454444978, "grad_norm": 0.018850571171333406, "learning_rate": 1.7884643500121096e-06, "loss": 0.0008, "step": 227950 }, { "epoch": 1.4997072426202114, "grad_norm": 0.025072232434359024, "learning_rate": 1.7880243453462164e-06, "loss": 0.0005, "step": 227960 }, { "epoch": 1.4997730307954449, "grad_norm": 0.004579141037534659, "learning_rate": 1.7875843830274486e-06, "loss": 0.0009, "step": 227970 }, { "epoch": 1.4998388189706782, "grad_norm": 0.2634186237966953, "learning_rate": 1.7871444630616064e-06, "loss": 0.0007, "step": 227980 }, { "epoch": 1.4999046071459117, "grad_norm": 0.013974821795426183, "learning_rate": 1.786704585454489e-06, "loss": 0.0003, "step": 227990 }, { "epoch": 1.499970395321145, "grad_norm": 0.005336737003945657, "learning_rate": 1.786264750211897e-06, "loss": 0.0007, "step": 228000 }, { "epoch": 1.5000361834963782, "grad_norm": 0.01413141169309739, "learning_rate": 1.7858249573396286e-06, "loss": 0.0009, "step": 228010 }, { "epoch": 1.5001019716716117, "grad_norm": 0.00319029493355686, "learning_rate": 1.7853852068434818e-06, "loss": 0.0004, "step": 228020 }, { "epoch": 1.5001677598468453, "grad_norm": 0.0482434544076511, "learning_rate": 1.7849454987292553e-06, "loss": 0.0006, "step": 228030 }, { "epoch": 1.5002335480220785, "grad_norm": 0.023726539641792305, "learning_rate": 1.7845058330027453e-06, "loss": 0.0016, "step": 228040 }, { "epoch": 1.5002993361973118, "grad_norm": 0.002893389826723941, "learning_rate": 1.7840662096697487e-06, "loss": 0.0007, "step": 228050 }, { "epoch": 1.5003651243725453, "grad_norm": 0.026754076731661646, "learning_rate": 1.783626628736062e-06, "loss": 0.0007, "step": 228060 }, { "epoch": 1.5004309125477788, "grad_norm": 0.012355406510227025, "learning_rate": 1.7831870902074783e-06, "loss": 0.0004, "step": 228070 }, { "epoch": 1.5004967007230121, "grad_norm": 0.056339317747097224, "learning_rate": 1.7827475940897965e-06, "loss": 0.0005, "step": 228080 }, { "epoch": 1.5005624888982454, "grad_norm": 0.10010377631288431, "learning_rate": 1.7823081403888093e-06, "loss": 0.0007, "step": 228090 }, { "epoch": 1.5006282770734787, "grad_norm": 0.002760924073160756, "learning_rate": 1.7818687291103104e-06, "loss": 0.0012, "step": 228100 }, { "epoch": 1.5006940652487122, "grad_norm": 0.06891742189585902, "learning_rate": 1.7814293602600925e-06, "loss": 0.0009, "step": 228110 }, { "epoch": 1.5007598534239457, "grad_norm": 0.04913669570279264, "learning_rate": 1.780990033843949e-06, "loss": 0.0004, "step": 228120 }, { "epoch": 1.500825641599179, "grad_norm": 0.027765464121203596, "learning_rate": 1.7805507498676717e-06, "loss": 0.0006, "step": 228130 }, { "epoch": 1.5008914297744123, "grad_norm": 0.043611811682322074, "learning_rate": 1.780111508337052e-06, "loss": 0.0003, "step": 228140 }, { "epoch": 1.5009572179496458, "grad_norm": 0.005468929561060923, "learning_rate": 1.7796723092578816e-06, "loss": 0.0006, "step": 228150 }, { "epoch": 1.5010230061248793, "grad_norm": 0.025847932493299888, "learning_rate": 1.7792331526359502e-06, "loss": 0.0004, "step": 228160 }, { "epoch": 1.5010887943001125, "grad_norm": 0.01992229407401331, "learning_rate": 1.778794038477048e-06, "loss": 0.0004, "step": 228170 }, { "epoch": 1.5011545824753458, "grad_norm": 0.0044672364655291424, "learning_rate": 1.7783549667869649e-06, "loss": 0.0007, "step": 228180 }, { "epoch": 1.5012203706505791, "grad_norm": 0.04130320306905794, "learning_rate": 1.7779159375714887e-06, "loss": 0.0013, "step": 228190 }, { "epoch": 1.5012861588258126, "grad_norm": 0.019360901754735132, "learning_rate": 1.7774769508364064e-06, "loss": 0.0018, "step": 228200 }, { "epoch": 1.5013519470010461, "grad_norm": 0.05316976158374563, "learning_rate": 1.7770380065875091e-06, "loss": 0.0006, "step": 228210 }, { "epoch": 1.5014177351762794, "grad_norm": 0.004074164737682271, "learning_rate": 1.7765991048305825e-06, "loss": 0.0005, "step": 228220 }, { "epoch": 1.5014835233515127, "grad_norm": 0.041301465945746095, "learning_rate": 1.7761602455714128e-06, "loss": 0.0004, "step": 228230 }, { "epoch": 1.5015493115267462, "grad_norm": 0.0063197930953006526, "learning_rate": 1.7757214288157853e-06, "loss": 0.0003, "step": 228240 }, { "epoch": 1.5016150997019797, "grad_norm": 0.052889355477479305, "learning_rate": 1.7752826545694869e-06, "loss": 0.0005, "step": 228250 }, { "epoch": 1.501680887877213, "grad_norm": 0.017859712510623293, "learning_rate": 1.7748439228383007e-06, "loss": 0.0003, "step": 228260 }, { "epoch": 1.5017466760524463, "grad_norm": 0.017437996084253785, "learning_rate": 1.7744052336280126e-06, "loss": 0.0005, "step": 228270 }, { "epoch": 1.5018124642276796, "grad_norm": 0.019810510216619923, "learning_rate": 1.7739665869444055e-06, "loss": 0.0002, "step": 228280 }, { "epoch": 1.501878252402913, "grad_norm": 0.035225963351057835, "learning_rate": 1.7735279827932634e-06, "loss": 0.0003, "step": 228290 }, { "epoch": 1.5019440405781466, "grad_norm": 0.019251129758195148, "learning_rate": 1.7730894211803673e-06, "loss": 0.0005, "step": 228300 }, { "epoch": 1.5020098287533798, "grad_norm": 0.00551559201432234, "learning_rate": 1.7726509021115006e-06, "loss": 0.0005, "step": 228310 }, { "epoch": 1.5020756169286131, "grad_norm": 0.029907703910737285, "learning_rate": 1.772212425592445e-06, "loss": 0.0003, "step": 228320 }, { "epoch": 1.5021414051038466, "grad_norm": 0.008735563634239597, "learning_rate": 1.7717739916289784e-06, "loss": 0.0003, "step": 228330 }, { "epoch": 1.5022071932790801, "grad_norm": 0.015364781310654826, "learning_rate": 1.7713356002268855e-06, "loss": 0.0019, "step": 228340 }, { "epoch": 1.5022729814543134, "grad_norm": 0.030687816033780537, "learning_rate": 1.7708972513919447e-06, "loss": 0.0019, "step": 228350 }, { "epoch": 1.5023387696295467, "grad_norm": 0.0012043285681955964, "learning_rate": 1.7704589451299343e-06, "loss": 0.0004, "step": 228360 }, { "epoch": 1.5024045578047802, "grad_norm": 0.007354160272800268, "learning_rate": 1.7700206814466336e-06, "loss": 0.0002, "step": 228370 }, { "epoch": 1.5024703459800135, "grad_norm": 0.0010937377824617783, "learning_rate": 1.7695824603478212e-06, "loss": 0.0004, "step": 228380 }, { "epoch": 1.502536134155247, "grad_norm": 0.009928025176159854, "learning_rate": 1.7691442818392734e-06, "loss": 0.0004, "step": 228390 }, { "epoch": 1.5026019223304803, "grad_norm": 0.053585875792933536, "learning_rate": 1.768706145926768e-06, "loss": 0.0002, "step": 228400 }, { "epoch": 1.5026677105057136, "grad_norm": 0.06215674549864422, "learning_rate": 1.7682680526160816e-06, "loss": 0.0005, "step": 228410 }, { "epoch": 1.502733498680947, "grad_norm": 0.026687397570721944, "learning_rate": 1.7678300019129896e-06, "loss": 0.0003, "step": 228420 }, { "epoch": 1.5027992868561806, "grad_norm": 0.024082647938400804, "learning_rate": 1.7673919938232676e-06, "loss": 0.0003, "step": 228430 }, { "epoch": 1.5028650750314139, "grad_norm": 0.007333037829776225, "learning_rate": 1.7669540283526904e-06, "loss": 0.0009, "step": 228440 }, { "epoch": 1.5029308632066471, "grad_norm": 0.0016914145944949854, "learning_rate": 1.7665161055070318e-06, "loss": 0.001, "step": 228450 }, { "epoch": 1.5029966513818807, "grad_norm": 0.01051935621240701, "learning_rate": 1.7660782252920644e-06, "loss": 0.0005, "step": 228460 }, { "epoch": 1.5030624395571142, "grad_norm": 0.058600253428419236, "learning_rate": 1.7656403877135642e-06, "loss": 0.0013, "step": 228470 }, { "epoch": 1.5031282277323474, "grad_norm": 0.013547085855315341, "learning_rate": 1.765202592777302e-06, "loss": 0.0006, "step": 228480 }, { "epoch": 1.5031940159075807, "grad_norm": 0.015523901203679587, "learning_rate": 1.7647648404890495e-06, "loss": 0.0001, "step": 228490 }, { "epoch": 1.503259804082814, "grad_norm": 0.009524379701139703, "learning_rate": 1.7643271308545789e-06, "loss": 0.0003, "step": 228500 }, { "epoch": 1.5033255922580475, "grad_norm": 0.026795839337310424, "learning_rate": 1.7638894638796606e-06, "loss": 0.0005, "step": 228510 }, { "epoch": 1.503391380433281, "grad_norm": 0.03937781513758707, "learning_rate": 1.7634518395700644e-06, "loss": 0.0004, "step": 228520 }, { "epoch": 1.5034571686085143, "grad_norm": 0.03275754929024641, "learning_rate": 1.7630142579315612e-06, "loss": 0.0006, "step": 228530 }, { "epoch": 1.5035229567837476, "grad_norm": 0.0014979595622853109, "learning_rate": 1.762576718969919e-06, "loss": 0.0008, "step": 228540 }, { "epoch": 1.503588744958981, "grad_norm": 0.041051021768157864, "learning_rate": 1.7621392226909062e-06, "loss": 0.0005, "step": 228550 }, { "epoch": 1.5036545331342146, "grad_norm": 0.1132814338834416, "learning_rate": 1.7617017691002918e-06, "loss": 0.0007, "step": 228560 }, { "epoch": 1.5037203213094479, "grad_norm": 0.0013563694304636208, "learning_rate": 1.761264358203843e-06, "loss": 0.0008, "step": 228570 }, { "epoch": 1.5037861094846812, "grad_norm": 0.009200994898349153, "learning_rate": 1.7608269900073266e-06, "loss": 0.0003, "step": 228580 }, { "epoch": 1.5038518976599144, "grad_norm": 0.0503582218651614, "learning_rate": 1.7603896645165064e-06, "loss": 0.0005, "step": 228590 }, { "epoch": 1.503917685835148, "grad_norm": 0.02731038045821865, "learning_rate": 1.7599523817371527e-06, "loss": 0.0005, "step": 228600 }, { "epoch": 1.5039834740103815, "grad_norm": 0.0072288950209758425, "learning_rate": 1.759515141675029e-06, "loss": 0.0004, "step": 228610 }, { "epoch": 1.5040492621856147, "grad_norm": 0.06961652415248318, "learning_rate": 1.7590779443358986e-06, "loss": 0.0008, "step": 228620 }, { "epoch": 1.504115050360848, "grad_norm": 0.012156675272958972, "learning_rate": 1.758640789725527e-06, "loss": 0.0004, "step": 228630 }, { "epoch": 1.5041808385360815, "grad_norm": 0.04741570841251469, "learning_rate": 1.758203677849677e-06, "loss": 0.0002, "step": 228640 }, { "epoch": 1.504246626711315, "grad_norm": 0.019062103621989856, "learning_rate": 1.7577666087141116e-06, "loss": 0.0003, "step": 228650 }, { "epoch": 1.5043124148865483, "grad_norm": 0.0039032185533484586, "learning_rate": 1.7573295823245934e-06, "loss": 0.0003, "step": 228660 }, { "epoch": 1.5043782030617816, "grad_norm": 0.053158806462277094, "learning_rate": 1.756892598686884e-06, "loss": 0.0004, "step": 228670 }, { "epoch": 1.504443991237015, "grad_norm": 0.04909178031294556, "learning_rate": 1.7564556578067455e-06, "loss": 0.0005, "step": 228680 }, { "epoch": 1.5045097794122484, "grad_norm": 0.003634604871894729, "learning_rate": 1.7560187596899369e-06, "loss": 0.0006, "step": 228690 }, { "epoch": 1.504575567587482, "grad_norm": 0.03483937034427971, "learning_rate": 1.7555819043422195e-06, "loss": 0.001, "step": 228700 }, { "epoch": 1.5046413557627152, "grad_norm": 0.012044224095177195, "learning_rate": 1.755145091769353e-06, "loss": 0.0002, "step": 228710 }, { "epoch": 1.5047071439379485, "grad_norm": 0.043089026412658056, "learning_rate": 1.754708321977094e-06, "loss": 0.0013, "step": 228720 }, { "epoch": 1.504772932113182, "grad_norm": 0.05334974055307015, "learning_rate": 1.7542715949712048e-06, "loss": 0.0003, "step": 228730 }, { "epoch": 1.5048387202884155, "grad_norm": 0.0019249951746356407, "learning_rate": 1.7538349107574408e-06, "loss": 0.0004, "step": 228740 }, { "epoch": 1.5049045084636488, "grad_norm": 0.04317401919281128, "learning_rate": 1.7533982693415607e-06, "loss": 0.0002, "step": 228750 }, { "epoch": 1.504970296638882, "grad_norm": 0.0065484831872931375, "learning_rate": 1.7529616707293202e-06, "loss": 0.0002, "step": 228760 }, { "epoch": 1.5050360848141155, "grad_norm": 0.021025042872503435, "learning_rate": 1.752525114926476e-06, "loss": 0.0002, "step": 228770 }, { "epoch": 1.505101872989349, "grad_norm": 0.02106160826421534, "learning_rate": 1.7520886019387833e-06, "loss": 0.0004, "step": 228780 }, { "epoch": 1.5051676611645823, "grad_norm": 0.11375827119571687, "learning_rate": 1.751652131771997e-06, "loss": 0.001, "step": 228790 }, { "epoch": 1.5052334493398156, "grad_norm": 0.05421268155593943, "learning_rate": 1.7512157044318723e-06, "loss": 0.0008, "step": 228800 }, { "epoch": 1.505299237515049, "grad_norm": 0.03125244822297969, "learning_rate": 1.7507793199241624e-06, "loss": 0.0006, "step": 228810 }, { "epoch": 1.5053650256902824, "grad_norm": 0.029323786868240723, "learning_rate": 1.7503429782546211e-06, "loss": 0.0005, "step": 228820 }, { "epoch": 1.505430813865516, "grad_norm": 0.03522960405678622, "learning_rate": 1.749906679429001e-06, "loss": 0.0003, "step": 228830 }, { "epoch": 1.5054966020407492, "grad_norm": 0.03006013928890718, "learning_rate": 1.7494704234530545e-06, "loss": 0.0005, "step": 228840 }, { "epoch": 1.5055623902159825, "grad_norm": 0.010744853479270617, "learning_rate": 1.7490342103325315e-06, "loss": 0.0006, "step": 228850 }, { "epoch": 1.505628178391216, "grad_norm": 0.032210471720232184, "learning_rate": 1.7485980400731861e-06, "loss": 0.0028, "step": 228860 }, { "epoch": 1.5056939665664495, "grad_norm": 0.055453240070736334, "learning_rate": 1.7481619126807675e-06, "loss": 0.001, "step": 228870 }, { "epoch": 1.5057597547416828, "grad_norm": 0.0038050121400588525, "learning_rate": 1.747725828161026e-06, "loss": 0.0011, "step": 228880 }, { "epoch": 1.505825542916916, "grad_norm": 0.008128647235669455, "learning_rate": 1.7472897865197102e-06, "loss": 0.0006, "step": 228890 }, { "epoch": 1.5058913310921493, "grad_norm": 0.001885306635389607, "learning_rate": 1.7468537877625697e-06, "loss": 0.0005, "step": 228900 }, { "epoch": 1.5059571192673828, "grad_norm": 0.033153932546826914, "learning_rate": 1.7464178318953518e-06, "loss": 0.0012, "step": 228910 }, { "epoch": 1.5060229074426164, "grad_norm": 0.004941038347934327, "learning_rate": 1.7459819189238054e-06, "loss": 0.0004, "step": 228920 }, { "epoch": 1.5060886956178496, "grad_norm": 0.007080144450166286, "learning_rate": 1.7455460488536769e-06, "loss": 0.0007, "step": 228930 }, { "epoch": 1.506154483793083, "grad_norm": 0.005041715285556811, "learning_rate": 1.745110221690713e-06, "loss": 0.0008, "step": 228940 }, { "epoch": 1.5062202719683164, "grad_norm": 0.05284855896609681, "learning_rate": 1.7446744374406593e-06, "loss": 0.0003, "step": 228950 }, { "epoch": 1.50628606014355, "grad_norm": 0.006825781867802735, "learning_rate": 1.7442386961092623e-06, "loss": 0.0002, "step": 228960 }, { "epoch": 1.5063518483187832, "grad_norm": 0.1268748354717381, "learning_rate": 1.7438029977022658e-06, "loss": 0.0009, "step": 228970 }, { "epoch": 1.5064176364940165, "grad_norm": 0.0321582872267001, "learning_rate": 1.7433673422254126e-06, "loss": 0.0009, "step": 228980 }, { "epoch": 1.50648342466925, "grad_norm": 0.0849076832849118, "learning_rate": 1.7429317296844506e-06, "loss": 0.0004, "step": 228990 }, { "epoch": 1.5065492128444833, "grad_norm": 0.03853321704416971, "learning_rate": 1.742496160085121e-06, "loss": 0.0007, "step": 229000 }, { "epoch": 1.5066150010197168, "grad_norm": 0.053457304597102635, "learning_rate": 1.7420606334331657e-06, "loss": 0.0003, "step": 229010 }, { "epoch": 1.50668078919495, "grad_norm": 0.014880621147793559, "learning_rate": 1.7416251497343272e-06, "loss": 0.0005, "step": 229020 }, { "epoch": 1.5067465773701834, "grad_norm": 0.02861535111062618, "learning_rate": 1.741189708994347e-06, "loss": 0.0012, "step": 229030 }, { "epoch": 1.5068123655454169, "grad_norm": 0.004113511326692772, "learning_rate": 1.7407543112189662e-06, "loss": 0.0003, "step": 229040 }, { "epoch": 1.5068781537206504, "grad_norm": 0.0811127864876343, "learning_rate": 1.7403189564139245e-06, "loss": 0.0004, "step": 229050 }, { "epoch": 1.5069439418958837, "grad_norm": 0.10843353526306228, "learning_rate": 1.7398836445849627e-06, "loss": 0.0007, "step": 229060 }, { "epoch": 1.507009730071117, "grad_norm": 0.0016000125257326842, "learning_rate": 1.739448375737819e-06, "loss": 0.0005, "step": 229070 }, { "epoch": 1.5070755182463504, "grad_norm": 0.2279496221782606, "learning_rate": 1.7390131498782325e-06, "loss": 0.0011, "step": 229080 }, { "epoch": 1.507141306421584, "grad_norm": 0.011296876813208746, "learning_rate": 1.7385779670119412e-06, "loss": 0.0006, "step": 229090 }, { "epoch": 1.5072070945968172, "grad_norm": 0.026534304619948483, "learning_rate": 1.7381428271446831e-06, "loss": 0.0004, "step": 229100 }, { "epoch": 1.5072728827720505, "grad_norm": 0.031087237949078736, "learning_rate": 1.7377077302821926e-06, "loss": 0.0005, "step": 229110 }, { "epoch": 1.5073386709472838, "grad_norm": 0.028713708191711818, "learning_rate": 1.73727267643021e-06, "loss": 0.0012, "step": 229120 }, { "epoch": 1.5074044591225173, "grad_norm": 0.021531229601979875, "learning_rate": 1.7368376655944697e-06, "loss": 0.0005, "step": 229130 }, { "epoch": 1.5074702472977508, "grad_norm": 0.02020885899915913, "learning_rate": 1.7364026977807058e-06, "loss": 0.0005, "step": 229140 }, { "epoch": 1.507536035472984, "grad_norm": 0.040222367172877554, "learning_rate": 1.7359677729946538e-06, "loss": 0.0008, "step": 229150 }, { "epoch": 1.5076018236482174, "grad_norm": 0.002577544926259456, "learning_rate": 1.7355328912420483e-06, "loss": 0.001, "step": 229160 }, { "epoch": 1.5076676118234509, "grad_norm": 0.05409089834295043, "learning_rate": 1.7350980525286215e-06, "loss": 0.0009, "step": 229170 }, { "epoch": 1.5077333999986844, "grad_norm": 0.04406483799992689, "learning_rate": 1.734663256860108e-06, "loss": 0.0006, "step": 229180 }, { "epoch": 1.5077991881739177, "grad_norm": 0.0319670192960055, "learning_rate": 1.7342285042422386e-06, "loss": 0.0005, "step": 229190 }, { "epoch": 1.507864976349151, "grad_norm": 0.0011220056343534878, "learning_rate": 1.733793794680746e-06, "loss": 0.0005, "step": 229200 }, { "epoch": 1.5079307645243842, "grad_norm": 0.02735225225060084, "learning_rate": 1.7333591281813616e-06, "loss": 0.0006, "step": 229210 }, { "epoch": 1.5079965526996177, "grad_norm": 0.03529639716819547, "learning_rate": 1.7329245047498155e-06, "loss": 0.0006, "step": 229220 }, { "epoch": 1.5080623408748512, "grad_norm": 0.02877223537023032, "learning_rate": 1.7324899243918385e-06, "loss": 0.0007, "step": 229230 }, { "epoch": 1.5081281290500845, "grad_norm": 0.04430907969431238, "learning_rate": 1.7320553871131578e-06, "loss": 0.0007, "step": 229240 }, { "epoch": 1.5081939172253178, "grad_norm": 0.03995115257900317, "learning_rate": 1.7316208929195066e-06, "loss": 0.0006, "step": 229250 }, { "epoch": 1.5082597054005513, "grad_norm": 0.09597509619059229, "learning_rate": 1.7311864418166108e-06, "loss": 0.0006, "step": 229260 }, { "epoch": 1.5083254935757848, "grad_norm": 0.043915132125385586, "learning_rate": 1.7307520338101984e-06, "loss": 0.0005, "step": 229270 }, { "epoch": 1.508391281751018, "grad_norm": 0.00615234899603245, "learning_rate": 1.7303176689059976e-06, "loss": 0.0002, "step": 229280 }, { "epoch": 1.5084570699262514, "grad_norm": 0.03141829885244819, "learning_rate": 1.7298833471097342e-06, "loss": 0.0008, "step": 229290 }, { "epoch": 1.5085228581014847, "grad_norm": 0.03587217184934905, "learning_rate": 1.7294490684271342e-06, "loss": 0.0005, "step": 229300 }, { "epoch": 1.5085886462767182, "grad_norm": 0.24084971516579481, "learning_rate": 1.729014832863924e-06, "loss": 0.0022, "step": 229310 }, { "epoch": 1.5086544344519517, "grad_norm": 0.08158062732728265, "learning_rate": 1.7285806404258282e-06, "loss": 0.0008, "step": 229320 }, { "epoch": 1.508720222627185, "grad_norm": 0.03196987376616294, "learning_rate": 1.7281464911185713e-06, "loss": 0.0003, "step": 229330 }, { "epoch": 1.5087860108024183, "grad_norm": 0.0018686779115344783, "learning_rate": 1.7277123849478771e-06, "loss": 0.0003, "step": 229340 }, { "epoch": 1.5088517989776518, "grad_norm": 0.008663633746492806, "learning_rate": 1.7272783219194689e-06, "loss": 0.0001, "step": 229350 }, { "epoch": 1.5089175871528853, "grad_norm": 0.008902238124380086, "learning_rate": 1.7268443020390695e-06, "loss": 0.0004, "step": 229360 }, { "epoch": 1.5089833753281185, "grad_norm": 0.09326984553446459, "learning_rate": 1.7264103253123993e-06, "loss": 0.0004, "step": 229370 }, { "epoch": 1.5090491635033518, "grad_norm": 0.04892473477960015, "learning_rate": 1.7259763917451839e-06, "loss": 0.0004, "step": 229380 }, { "epoch": 1.5091149516785853, "grad_norm": 8.169131037485014e-05, "learning_rate": 1.7255425013431415e-06, "loss": 0.0001, "step": 229390 }, { "epoch": 1.5091807398538186, "grad_norm": 0.005543623775793171, "learning_rate": 1.7251086541119937e-06, "loss": 0.0012, "step": 229400 }, { "epoch": 1.5092465280290521, "grad_norm": 0.0052761130250991205, "learning_rate": 1.7246748500574596e-06, "loss": 0.0002, "step": 229410 }, { "epoch": 1.5093123162042854, "grad_norm": 0.019424811430912292, "learning_rate": 1.724241089185259e-06, "loss": 0.0008, "step": 229420 }, { "epoch": 1.5093781043795187, "grad_norm": 0.09486396804353714, "learning_rate": 1.7238073715011105e-06, "loss": 0.001, "step": 229430 }, { "epoch": 1.5094438925547522, "grad_norm": 0.027568019431268263, "learning_rate": 1.7233736970107324e-06, "loss": 0.0004, "step": 229440 }, { "epoch": 1.5095096807299857, "grad_norm": 0.021920288016485635, "learning_rate": 1.7229400657198419e-06, "loss": 0.0006, "step": 229450 }, { "epoch": 1.509575468905219, "grad_norm": 0.017277559984542407, "learning_rate": 1.7225064776341565e-06, "loss": 0.0004, "step": 229460 }, { "epoch": 1.5096412570804523, "grad_norm": 0.010067190215654243, "learning_rate": 1.7220729327593926e-06, "loss": 0.0011, "step": 229470 }, { "epoch": 1.5097070452556858, "grad_norm": 0.008588077972871895, "learning_rate": 1.7216394311012663e-06, "loss": 0.0002, "step": 229480 }, { "epoch": 1.5097728334309193, "grad_norm": 0.05901608520684031, "learning_rate": 1.7212059726654922e-06, "loss": 0.0005, "step": 229490 }, { "epoch": 1.5098386216061526, "grad_norm": 0.004199438942824444, "learning_rate": 1.7207725574577844e-06, "loss": 0.0001, "step": 229500 }, { "epoch": 1.5099044097813858, "grad_norm": 0.04569001579282895, "learning_rate": 1.7203391854838602e-06, "loss": 0.0005, "step": 229510 }, { "epoch": 1.5099701979566191, "grad_norm": 0.007204722113516682, "learning_rate": 1.719905856749431e-06, "loss": 0.0009, "step": 229520 }, { "epoch": 1.5100359861318526, "grad_norm": 0.000509430766723157, "learning_rate": 1.7194725712602101e-06, "loss": 0.0002, "step": 229530 }, { "epoch": 1.5101017743070861, "grad_norm": 0.014956678802190931, "learning_rate": 1.7190393290219098e-06, "loss": 0.0005, "step": 229540 }, { "epoch": 1.5101675624823194, "grad_norm": 0.00689858144874614, "learning_rate": 1.718606130040243e-06, "loss": 0.0005, "step": 229550 }, { "epoch": 1.5102333506575527, "grad_norm": 0.001705290299980786, "learning_rate": 1.7181729743209202e-06, "loss": 0.0005, "step": 229560 }, { "epoch": 1.5102991388327862, "grad_norm": 0.008021474035966768, "learning_rate": 1.7177398618696523e-06, "loss": 0.0005, "step": 229570 }, { "epoch": 1.5103649270080197, "grad_norm": 0.06722905246948191, "learning_rate": 1.7173067926921493e-06, "loss": 0.0002, "step": 229580 }, { "epoch": 1.510430715183253, "grad_norm": 0.028918598747206006, "learning_rate": 1.7168737667941215e-06, "loss": 0.0008, "step": 229590 }, { "epoch": 1.5104965033584863, "grad_norm": 0.00395447692487654, "learning_rate": 1.7164407841812775e-06, "loss": 0.0004, "step": 229600 }, { "epoch": 1.5105622915337196, "grad_norm": 0.012195837566524884, "learning_rate": 1.716007844859326e-06, "loss": 0.0001, "step": 229610 }, { "epoch": 1.510628079708953, "grad_norm": 0.015276409886186133, "learning_rate": 1.7155749488339746e-06, "loss": 0.0005, "step": 229620 }, { "epoch": 1.5106938678841866, "grad_norm": 0.020754492147132556, "learning_rate": 1.7151420961109305e-06, "loss": 0.0006, "step": 229630 }, { "epoch": 1.5107596560594199, "grad_norm": 0.0164089550927082, "learning_rate": 1.7147092866959014e-06, "loss": 0.0004, "step": 229640 }, { "epoch": 1.5108254442346531, "grad_norm": 0.04180717908332969, "learning_rate": 1.7142765205945932e-06, "loss": 0.0006, "step": 229650 }, { "epoch": 1.5108912324098867, "grad_norm": 0.013899180151864648, "learning_rate": 1.713843797812711e-06, "loss": 0.0004, "step": 229660 }, { "epoch": 1.5109570205851202, "grad_norm": 0.013368427095926005, "learning_rate": 1.7134111183559604e-06, "loss": 0.0002, "step": 229670 }, { "epoch": 1.5110228087603534, "grad_norm": 0.04567631888120781, "learning_rate": 1.7129784822300455e-06, "loss": 0.0003, "step": 229680 }, { "epoch": 1.5110885969355867, "grad_norm": 0.0003386532893881908, "learning_rate": 1.7125458894406704e-06, "loss": 0.0003, "step": 229690 }, { "epoch": 1.5111543851108202, "grad_norm": 0.03167779720484914, "learning_rate": 1.7121133399935386e-06, "loss": 0.0013, "step": 229700 }, { "epoch": 1.5112201732860535, "grad_norm": 0.004364419243870334, "learning_rate": 1.711680833894353e-06, "loss": 0.0004, "step": 229710 }, { "epoch": 1.511285961461287, "grad_norm": 0.015461902174350475, "learning_rate": 1.7112483711488153e-06, "loss": 0.0005, "step": 229720 }, { "epoch": 1.5113517496365203, "grad_norm": 0.028368415072886004, "learning_rate": 1.710815951762626e-06, "loss": 0.0005, "step": 229730 }, { "epoch": 1.5114175378117536, "grad_norm": 0.018530351375571717, "learning_rate": 1.7103835757414895e-06, "loss": 0.0017, "step": 229740 }, { "epoch": 1.511483325986987, "grad_norm": 0.10368500379228184, "learning_rate": 1.709951243091104e-06, "loss": 0.0013, "step": 229750 }, { "epoch": 1.5115491141622206, "grad_norm": 0.05027811212231266, "learning_rate": 1.7095189538171703e-06, "loss": 0.0012, "step": 229760 }, { "epoch": 1.5116149023374539, "grad_norm": 0.032888925911760104, "learning_rate": 1.7090867079253876e-06, "loss": 0.0007, "step": 229770 }, { "epoch": 1.5116806905126872, "grad_norm": 0.022140225089609174, "learning_rate": 1.7086545054214538e-06, "loss": 0.0003, "step": 229780 }, { "epoch": 1.5117464786879207, "grad_norm": 0.03878577936729728, "learning_rate": 1.7082223463110682e-06, "loss": 0.0003, "step": 229790 }, { "epoch": 1.5118122668631542, "grad_norm": 0.07050345864718667, "learning_rate": 1.7077902305999277e-06, "loss": 0.0007, "step": 229800 }, { "epoch": 1.5118780550383875, "grad_norm": 0.00031685957680489547, "learning_rate": 1.7073581582937298e-06, "loss": 0.0011, "step": 229810 }, { "epoch": 1.5119438432136207, "grad_norm": 0.04355814109384565, "learning_rate": 1.7069261293981709e-06, "loss": 0.0006, "step": 229820 }, { "epoch": 1.512009631388854, "grad_norm": 0.030374528534911956, "learning_rate": 1.7064941439189464e-06, "loss": 0.0004, "step": 229830 }, { "epoch": 1.5120754195640875, "grad_norm": 0.011728690587792558, "learning_rate": 1.706062201861753e-06, "loss": 0.0003, "step": 229840 }, { "epoch": 1.512141207739321, "grad_norm": 0.002941062541698925, "learning_rate": 1.7056303032322841e-06, "loss": 0.0007, "step": 229850 }, { "epoch": 1.5122069959145543, "grad_norm": 0.03580310879210726, "learning_rate": 1.7051984480362328e-06, "loss": 0.0003, "step": 229860 }, { "epoch": 1.5122727840897876, "grad_norm": 0.04268287577763414, "learning_rate": 1.7047666362792965e-06, "loss": 0.0004, "step": 229870 }, { "epoch": 1.512338572265021, "grad_norm": 0.12306735196877, "learning_rate": 1.7043348679671656e-06, "loss": 0.0009, "step": 229880 }, { "epoch": 1.5124043604402546, "grad_norm": 0.03458633845120789, "learning_rate": 1.703903143105533e-06, "loss": 0.0002, "step": 229890 }, { "epoch": 1.512470148615488, "grad_norm": 0.005555721809096298, "learning_rate": 1.7034714617000914e-06, "loss": 0.0003, "step": 229900 }, { "epoch": 1.5125359367907212, "grad_norm": 0.005096540020396555, "learning_rate": 1.703039823756531e-06, "loss": 0.0005, "step": 229910 }, { "epoch": 1.5126017249659545, "grad_norm": 0.03409680001635709, "learning_rate": 1.7026082292805436e-06, "loss": 0.0002, "step": 229920 }, { "epoch": 1.512667513141188, "grad_norm": 0.0030978893554396386, "learning_rate": 1.7021766782778186e-06, "loss": 0.0004, "step": 229930 }, { "epoch": 1.5127333013164215, "grad_norm": 0.036296230177558425, "learning_rate": 1.7017451707540462e-06, "loss": 0.0003, "step": 229940 }, { "epoch": 1.5127990894916548, "grad_norm": 0.005116731532195114, "learning_rate": 1.7013137067149143e-06, "loss": 0.0005, "step": 229950 }, { "epoch": 1.512864877666888, "grad_norm": 0.00026143963752429586, "learning_rate": 1.700882286166113e-06, "loss": 0.0007, "step": 229960 }, { "epoch": 1.5129306658421215, "grad_norm": 0.00039346370420512454, "learning_rate": 1.7004509091133292e-06, "loss": 0.0003, "step": 229970 }, { "epoch": 1.512996454017355, "grad_norm": 0.05496860186763439, "learning_rate": 1.7000195755622505e-06, "loss": 0.0004, "step": 229980 }, { "epoch": 1.5130622421925883, "grad_norm": 0.0022784004670584244, "learning_rate": 1.6995882855185624e-06, "loss": 0.0009, "step": 229990 }, { "epoch": 1.5131280303678216, "grad_norm": 0.002738106585406448, "learning_rate": 1.6991570389879536e-06, "loss": 0.0002, "step": 230000 }, { "epoch": 1.5131938185430551, "grad_norm": 0.0772345778229762, "learning_rate": 1.6987258359761082e-06, "loss": 0.0003, "step": 230010 }, { "epoch": 1.5132596067182884, "grad_norm": 0.00036939984447696136, "learning_rate": 1.698294676488712e-06, "loss": 0.0006, "step": 230020 }, { "epoch": 1.513325394893522, "grad_norm": 0.03365297698051026, "learning_rate": 1.6978635605314486e-06, "loss": 0.0003, "step": 230030 }, { "epoch": 1.5133911830687552, "grad_norm": 0.011962345173602121, "learning_rate": 1.6974324881100024e-06, "loss": 0.0003, "step": 230040 }, { "epoch": 1.5134569712439885, "grad_norm": 0.03205724379026358, "learning_rate": 1.6970014592300566e-06, "loss": 0.001, "step": 230050 }, { "epoch": 1.513522759419222, "grad_norm": 0.10673161371001748, "learning_rate": 1.6965704738972937e-06, "loss": 0.0006, "step": 230060 }, { "epoch": 1.5135885475944555, "grad_norm": 0.05618331405408448, "learning_rate": 1.6961395321173962e-06, "loss": 0.0006, "step": 230070 }, { "epoch": 1.5136543357696888, "grad_norm": 0.006124249771526499, "learning_rate": 1.6957086338960454e-06, "loss": 0.0005, "step": 230080 }, { "epoch": 1.513720123944922, "grad_norm": 0.0009820775704832562, "learning_rate": 1.695277779238923e-06, "loss": 0.0004, "step": 230090 }, { "epoch": 1.5137859121201556, "grad_norm": 0.01748560062610394, "learning_rate": 1.6948469681517082e-06, "loss": 0.0004, "step": 230100 }, { "epoch": 1.513851700295389, "grad_norm": 0.023759473203242645, "learning_rate": 1.6944162006400822e-06, "loss": 0.0004, "step": 230110 }, { "epoch": 1.5139174884706224, "grad_norm": 0.023306810513116227, "learning_rate": 1.693985476709722e-06, "loss": 0.0007, "step": 230120 }, { "epoch": 1.5139832766458556, "grad_norm": 0.07518654121285105, "learning_rate": 1.6935547963663097e-06, "loss": 0.0005, "step": 230130 }, { "epoch": 1.514049064821089, "grad_norm": 0.03879243119440837, "learning_rate": 1.6931241596155213e-06, "loss": 0.001, "step": 230140 }, { "epoch": 1.5141148529963224, "grad_norm": 0.05591113233541221, "learning_rate": 1.692693566463035e-06, "loss": 0.0007, "step": 230150 }, { "epoch": 1.514180641171556, "grad_norm": 0.0123218301728187, "learning_rate": 1.6922630169145283e-06, "loss": 0.0004, "step": 230160 }, { "epoch": 1.5142464293467892, "grad_norm": 0.0008768158393674872, "learning_rate": 1.691832510975676e-06, "loss": 0.0007, "step": 230170 }, { "epoch": 1.5143122175220225, "grad_norm": 0.016036602790054658, "learning_rate": 1.6914020486521555e-06, "loss": 0.0001, "step": 230180 }, { "epoch": 1.514378005697256, "grad_norm": 0.0450443907638909, "learning_rate": 1.6909716299496416e-06, "loss": 0.001, "step": 230190 }, { "epoch": 1.5144437938724895, "grad_norm": 0.005102157761866099, "learning_rate": 1.6905412548738088e-06, "loss": 0.0008, "step": 230200 }, { "epoch": 1.5145095820477228, "grad_norm": 0.04193407176159735, "learning_rate": 1.690110923430331e-06, "loss": 0.0005, "step": 230210 }, { "epoch": 1.514575370222956, "grad_norm": 0.07227569845797627, "learning_rate": 1.6896806356248823e-06, "loss": 0.0008, "step": 230220 }, { "epoch": 1.5146411583981894, "grad_norm": 0.058286951840825135, "learning_rate": 1.6892503914631354e-06, "loss": 0.0007, "step": 230230 }, { "epoch": 1.5147069465734229, "grad_norm": 0.03646280956753448, "learning_rate": 1.688820190950763e-06, "loss": 0.0005, "step": 230240 }, { "epoch": 1.5147727347486564, "grad_norm": 0.008106049324453695, "learning_rate": 1.6883900340934346e-06, "loss": 0.0003, "step": 230250 }, { "epoch": 1.5148385229238897, "grad_norm": 0.010856797757717864, "learning_rate": 1.687959920896825e-06, "loss": 0.0011, "step": 230260 }, { "epoch": 1.514904311099123, "grad_norm": 0.01885285340471997, "learning_rate": 1.6875298513666038e-06, "loss": 0.0005, "step": 230270 }, { "epoch": 1.5149700992743564, "grad_norm": 0.02437293932575239, "learning_rate": 1.6870998255084409e-06, "loss": 0.0008, "step": 230280 }, { "epoch": 1.51503588744959, "grad_norm": 0.056671673741263, "learning_rate": 1.6866698433280049e-06, "loss": 0.0005, "step": 230290 }, { "epoch": 1.5151016756248232, "grad_norm": 0.05002375613833165, "learning_rate": 1.6862399048309658e-06, "loss": 0.0007, "step": 230300 }, { "epoch": 1.5151674638000565, "grad_norm": 0.017987924379814526, "learning_rate": 1.6858100100229918e-06, "loss": 0.0003, "step": 230310 }, { "epoch": 1.5152332519752898, "grad_norm": 0.037073965078802916, "learning_rate": 1.6853801589097502e-06, "loss": 0.0005, "step": 230320 }, { "epoch": 1.5152990401505233, "grad_norm": 0.06656208035807638, "learning_rate": 1.684950351496909e-06, "loss": 0.0008, "step": 230330 }, { "epoch": 1.5153648283257568, "grad_norm": 0.00946249998728746, "learning_rate": 1.6845205877901339e-06, "loss": 0.0002, "step": 230340 }, { "epoch": 1.51543061650099, "grad_norm": 0.0130258030496539, "learning_rate": 1.6840908677950913e-06, "loss": 0.0004, "step": 230350 }, { "epoch": 1.5154964046762234, "grad_norm": 0.08928005010233173, "learning_rate": 1.6836611915174471e-06, "loss": 0.0008, "step": 230360 }, { "epoch": 1.5155621928514569, "grad_norm": 0.03283212037320308, "learning_rate": 1.6832315589628657e-06, "loss": 0.0006, "step": 230370 }, { "epoch": 1.5156279810266904, "grad_norm": 0.020238604343538528, "learning_rate": 1.6828019701370103e-06, "loss": 0.0006, "step": 230380 }, { "epoch": 1.5156937692019237, "grad_norm": 0.02236181139140738, "learning_rate": 1.6823724250455475e-06, "loss": 0.0007, "step": 230390 }, { "epoch": 1.515759557377157, "grad_norm": 0.016358422242300455, "learning_rate": 1.6819429236941387e-06, "loss": 0.0005, "step": 230400 }, { "epoch": 1.5158253455523905, "grad_norm": 0.012961196936077057, "learning_rate": 1.6815134660884468e-06, "loss": 0.0003, "step": 230410 }, { "epoch": 1.515891133727624, "grad_norm": 0.006979662731894344, "learning_rate": 1.6810840522341344e-06, "loss": 0.0003, "step": 230420 }, { "epoch": 1.5159569219028572, "grad_norm": 0.02816039809058765, "learning_rate": 1.6806546821368619e-06, "loss": 0.0009, "step": 230430 }, { "epoch": 1.5160227100780905, "grad_norm": 0.018876560324607102, "learning_rate": 1.6802253558022908e-06, "loss": 0.0002, "step": 230440 }, { "epoch": 1.5160884982533238, "grad_norm": 0.28268563326265783, "learning_rate": 1.6797960732360813e-06, "loss": 0.0025, "step": 230450 }, { "epoch": 1.5161542864285573, "grad_norm": 0.01598326914821519, "learning_rate": 1.6793668344438928e-06, "loss": 0.0005, "step": 230460 }, { "epoch": 1.5162200746037908, "grad_norm": 0.04132886839919576, "learning_rate": 1.678937639431385e-06, "loss": 0.0006, "step": 230470 }, { "epoch": 1.516285862779024, "grad_norm": 0.024176218919936102, "learning_rate": 1.6785084882042158e-06, "loss": 0.0003, "step": 230480 }, { "epoch": 1.5163516509542574, "grad_norm": 0.015493796164825257, "learning_rate": 1.678079380768044e-06, "loss": 0.001, "step": 230490 }, { "epoch": 1.516417439129491, "grad_norm": 0.05745835392823232, "learning_rate": 1.6776503171285264e-06, "loss": 0.0005, "step": 230500 }, { "epoch": 1.5164832273047244, "grad_norm": 0.01082830073448022, "learning_rate": 1.6772212972913182e-06, "loss": 0.0007, "step": 230510 }, { "epoch": 1.5165490154799577, "grad_norm": 0.06846149228301472, "learning_rate": 1.676792321262079e-06, "loss": 0.0002, "step": 230520 }, { "epoch": 1.516614803655191, "grad_norm": 0.015215090423498969, "learning_rate": 1.6763633890464632e-06, "loss": 0.0002, "step": 230530 }, { "epoch": 1.5166805918304243, "grad_norm": 0.04583215834095795, "learning_rate": 1.6759345006501254e-06, "loss": 0.0005, "step": 230540 }, { "epoch": 1.5167463800056578, "grad_norm": 0.016110272825419124, "learning_rate": 1.6755056560787208e-06, "loss": 0.0006, "step": 230550 }, { "epoch": 1.5168121681808913, "grad_norm": 0.21381139566673188, "learning_rate": 1.6750768553379027e-06, "loss": 0.0006, "step": 230560 }, { "epoch": 1.5168779563561245, "grad_norm": 0.014315715354647854, "learning_rate": 1.6746480984333247e-06, "loss": 0.0007, "step": 230570 }, { "epoch": 1.5169437445313578, "grad_norm": 0.01096594410948237, "learning_rate": 1.674219385370639e-06, "loss": 0.0007, "step": 230580 }, { "epoch": 1.5170095327065913, "grad_norm": 0.06849314736410093, "learning_rate": 1.673790716155499e-06, "loss": 0.0007, "step": 230590 }, { "epoch": 1.5170753208818248, "grad_norm": 0.09923730152624528, "learning_rate": 1.6733620907935556e-06, "loss": 0.0005, "step": 230600 }, { "epoch": 1.5171411090570581, "grad_norm": 0.050675012041023905, "learning_rate": 1.6729335092904597e-06, "loss": 0.0005, "step": 230610 }, { "epoch": 1.5172068972322914, "grad_norm": 0.00030571476731896776, "learning_rate": 1.6725049716518622e-06, "loss": 0.0002, "step": 230620 }, { "epoch": 1.5172726854075247, "grad_norm": 0.026381708281132787, "learning_rate": 1.6720764778834132e-06, "loss": 0.0004, "step": 230630 }, { "epoch": 1.5173384735827582, "grad_norm": 0.11848174658429718, "learning_rate": 1.671648027990761e-06, "loss": 0.0006, "step": 230640 }, { "epoch": 1.5174042617579917, "grad_norm": 0.002454860424430525, "learning_rate": 1.671219621979554e-06, "loss": 0.0004, "step": 230650 }, { "epoch": 1.517470049933225, "grad_norm": 0.015658833994088555, "learning_rate": 1.6707912598554426e-06, "loss": 0.0009, "step": 230660 }, { "epoch": 1.5175358381084583, "grad_norm": 0.030088759321940266, "learning_rate": 1.670362941624073e-06, "loss": 0.0006, "step": 230670 }, { "epoch": 1.5176016262836918, "grad_norm": 0.07407561113089177, "learning_rate": 1.6699346672910926e-06, "loss": 0.0012, "step": 230680 }, { "epoch": 1.5176674144589253, "grad_norm": 0.0017831127467535656, "learning_rate": 1.6695064368621477e-06, "loss": 0.0004, "step": 230690 }, { "epoch": 1.5177332026341586, "grad_norm": 0.01612472044815146, "learning_rate": 1.6690782503428837e-06, "loss": 0.0003, "step": 230700 }, { "epoch": 1.5177989908093918, "grad_norm": 0.10550530995311501, "learning_rate": 1.6686501077389466e-06, "loss": 0.0007, "step": 230710 }, { "epoch": 1.5178647789846254, "grad_norm": 0.00842142355311872, "learning_rate": 1.6682220090559803e-06, "loss": 0.0005, "step": 230720 }, { "epoch": 1.5179305671598586, "grad_norm": 0.06659803353638759, "learning_rate": 1.6677939542996297e-06, "loss": 0.0009, "step": 230730 }, { "epoch": 1.5179963553350921, "grad_norm": 0.006480346982875321, "learning_rate": 1.6673659434755374e-06, "loss": 0.0003, "step": 230740 }, { "epoch": 1.5180621435103254, "grad_norm": 0.034646015829134874, "learning_rate": 1.6669379765893473e-06, "loss": 0.0003, "step": 230750 }, { "epoch": 1.5181279316855587, "grad_norm": 0.014036495449811953, "learning_rate": 1.666510053646701e-06, "loss": 0.0006, "step": 230760 }, { "epoch": 1.5181937198607922, "grad_norm": 0.015056893083347957, "learning_rate": 1.6660821746532413e-06, "loss": 0.0006, "step": 230770 }, { "epoch": 1.5182595080360257, "grad_norm": 0.03508573738304687, "learning_rate": 1.665654339614607e-06, "loss": 0.0004, "step": 230780 }, { "epoch": 1.518325296211259, "grad_norm": 0.07599783923902653, "learning_rate": 1.6652265485364415e-06, "loss": 0.0004, "step": 230790 }, { "epoch": 1.5183910843864923, "grad_norm": 0.007978161811094127, "learning_rate": 1.6647988014243844e-06, "loss": 0.0002, "step": 230800 }, { "epoch": 1.5184568725617258, "grad_norm": 0.02929157766092452, "learning_rate": 1.6643710982840744e-06, "loss": 0.0003, "step": 230810 }, { "epoch": 1.5185226607369593, "grad_norm": 0.09738411577907088, "learning_rate": 1.6639434391211507e-06, "loss": 0.0007, "step": 230820 }, { "epoch": 1.5185884489121926, "grad_norm": 0.0003970625653914604, "learning_rate": 1.6635158239412513e-06, "loss": 0.0002, "step": 230830 }, { "epoch": 1.5186542370874259, "grad_norm": 0.12509109204667232, "learning_rate": 1.6630882527500148e-06, "loss": 0.0006, "step": 230840 }, { "epoch": 1.5187200252626591, "grad_norm": 0.18230507613051164, "learning_rate": 1.6626607255530769e-06, "loss": 0.0007, "step": 230850 }, { "epoch": 1.5187858134378927, "grad_norm": 0.020961724286305886, "learning_rate": 1.6622332423560755e-06, "loss": 0.0003, "step": 230860 }, { "epoch": 1.5188516016131262, "grad_norm": 0.016001312727861268, "learning_rate": 1.6618058031646462e-06, "loss": 0.0003, "step": 230870 }, { "epoch": 1.5189173897883594, "grad_norm": 0.02435607818392665, "learning_rate": 1.6613784079844242e-06, "loss": 0.0005, "step": 230880 }, { "epoch": 1.5189831779635927, "grad_norm": 0.053157488584728615, "learning_rate": 1.6609510568210447e-06, "loss": 0.0004, "step": 230890 }, { "epoch": 1.5190489661388262, "grad_norm": 0.0072672240177368876, "learning_rate": 1.6605237496801413e-06, "loss": 0.0006, "step": 230900 }, { "epoch": 1.5191147543140597, "grad_norm": 0.0010264057444791062, "learning_rate": 1.6600964865673469e-06, "loss": 0.0009, "step": 230910 }, { "epoch": 1.519180542489293, "grad_norm": 0.04615337868233782, "learning_rate": 1.6596692674882969e-06, "loss": 0.0006, "step": 230920 }, { "epoch": 1.5192463306645263, "grad_norm": 0.028291043616498184, "learning_rate": 1.6592420924486229e-06, "loss": 0.0005, "step": 230930 }, { "epoch": 1.5193121188397596, "grad_norm": 0.011155950119386434, "learning_rate": 1.6588149614539572e-06, "loss": 0.0006, "step": 230940 }, { "epoch": 1.519377907014993, "grad_norm": 0.021360199011670584, "learning_rate": 1.6583878745099303e-06, "loss": 0.0006, "step": 230950 }, { "epoch": 1.5194436951902266, "grad_norm": 0.016698130861294718, "learning_rate": 1.657960831622173e-06, "loss": 0.0005, "step": 230960 }, { "epoch": 1.5195094833654599, "grad_norm": 0.03884675575085364, "learning_rate": 1.657533832796316e-06, "loss": 0.0012, "step": 230970 }, { "epoch": 1.5195752715406932, "grad_norm": 0.006724856732588176, "learning_rate": 1.6571068780379885e-06, "loss": 0.0004, "step": 230980 }, { "epoch": 1.5196410597159267, "grad_norm": 0.033207402514895096, "learning_rate": 1.6566799673528195e-06, "loss": 0.0006, "step": 230990 }, { "epoch": 1.5197068478911602, "grad_norm": 0.1618247312898207, "learning_rate": 1.6562531007464383e-06, "loss": 0.001, "step": 231000 }, { "epoch": 1.5197726360663935, "grad_norm": 0.042155261527163194, "learning_rate": 1.6558262782244717e-06, "loss": 0.0006, "step": 231010 }, { "epoch": 1.5198384242416267, "grad_norm": 0.003068333334721458, "learning_rate": 1.6553994997925472e-06, "loss": 0.0007, "step": 231020 }, { "epoch": 1.5199042124168602, "grad_norm": 0.05441028624539843, "learning_rate": 1.654972765456292e-06, "loss": 0.0005, "step": 231030 }, { "epoch": 1.5199700005920935, "grad_norm": 0.0014048232425117582, "learning_rate": 1.6545460752213305e-06, "loss": 0.0004, "step": 231040 }, { "epoch": 1.520035788767327, "grad_norm": 0.01938085493679179, "learning_rate": 1.654119429093291e-06, "loss": 0.0002, "step": 231050 }, { "epoch": 1.5201015769425603, "grad_norm": 0.015745564219268166, "learning_rate": 1.6536928270777969e-06, "loss": 0.0003, "step": 231060 }, { "epoch": 1.5201673651177936, "grad_norm": 0.07390254098787657, "learning_rate": 1.6532662691804736e-06, "loss": 0.0004, "step": 231070 }, { "epoch": 1.520233153293027, "grad_norm": 0.00032024289717531997, "learning_rate": 1.652839755406943e-06, "loss": 0.0005, "step": 231080 }, { "epoch": 1.5202989414682606, "grad_norm": 0.01781055765007466, "learning_rate": 1.6524132857628305e-06, "loss": 0.0015, "step": 231090 }, { "epoch": 1.520364729643494, "grad_norm": 0.0019922719967640783, "learning_rate": 1.6519868602537569e-06, "loss": 0.0003, "step": 231100 }, { "epoch": 1.5204305178187272, "grad_norm": 0.0020808535687713743, "learning_rate": 1.6515604788853457e-06, "loss": 0.0008, "step": 231110 }, { "epoch": 1.5204963059939607, "grad_norm": 0.002695633255890829, "learning_rate": 1.651134141663217e-06, "loss": 0.0004, "step": 231120 }, { "epoch": 1.5205620941691942, "grad_norm": 0.16361177670504898, "learning_rate": 1.6507078485929927e-06, "loss": 0.0004, "step": 231130 }, { "epoch": 1.5206278823444275, "grad_norm": 0.040279860631724985, "learning_rate": 1.650281599680293e-06, "loss": 0.0007, "step": 231140 }, { "epoch": 1.5206936705196608, "grad_norm": 0.004123053499918956, "learning_rate": 1.6498553949307372e-06, "loss": 0.0008, "step": 231150 }, { "epoch": 1.520759458694894, "grad_norm": 0.0801442000857061, "learning_rate": 1.6494292343499447e-06, "loss": 0.0006, "step": 231160 }, { "epoch": 1.5208252468701275, "grad_norm": 0.026745815835294385, "learning_rate": 1.6490031179435318e-06, "loss": 0.0005, "step": 231170 }, { "epoch": 1.520891035045361, "grad_norm": 0.06560534550932767, "learning_rate": 1.6485770457171208e-06, "loss": 0.0006, "step": 231180 }, { "epoch": 1.5209568232205943, "grad_norm": 0.10265989205690461, "learning_rate": 1.648151017676327e-06, "loss": 0.0004, "step": 231190 }, { "epoch": 1.5210226113958276, "grad_norm": 0.08016242312573983, "learning_rate": 1.6477250338267675e-06, "loss": 0.0005, "step": 231200 }, { "epoch": 1.5210883995710611, "grad_norm": 0.010102224172996997, "learning_rate": 1.6472990941740574e-06, "loss": 0.001, "step": 231210 }, { "epoch": 1.5211541877462946, "grad_norm": 0.0005205013659447189, "learning_rate": 1.646873198723814e-06, "loss": 0.0002, "step": 231220 }, { "epoch": 1.521219975921528, "grad_norm": 0.002731028515604171, "learning_rate": 1.646447347481651e-06, "loss": 0.0002, "step": 231230 }, { "epoch": 1.5212857640967612, "grad_norm": 0.03647952509968126, "learning_rate": 1.6460215404531838e-06, "loss": 0.001, "step": 231240 }, { "epoch": 1.5213515522719945, "grad_norm": 0.012168751902274873, "learning_rate": 1.6455957776440256e-06, "loss": 0.0007, "step": 231250 }, { "epoch": 1.521417340447228, "grad_norm": 0.08171333310342213, "learning_rate": 1.6451700590597902e-06, "loss": 0.0013, "step": 231260 }, { "epoch": 1.5214831286224615, "grad_norm": 0.01476105548828093, "learning_rate": 1.6447443847060896e-06, "loss": 0.0003, "step": 231270 }, { "epoch": 1.5215489167976948, "grad_norm": 0.07519040871323072, "learning_rate": 1.644318754588537e-06, "loss": 0.0004, "step": 231280 }, { "epoch": 1.521614704972928, "grad_norm": 0.04080916173601416, "learning_rate": 1.6438931687127435e-06, "loss": 0.0005, "step": 231290 }, { "epoch": 1.5216804931481616, "grad_norm": 0.011180417314160175, "learning_rate": 1.6434676270843185e-06, "loss": 0.0002, "step": 231300 }, { "epoch": 1.521746281323395, "grad_norm": 0.011329166067837198, "learning_rate": 1.643042129708875e-06, "loss": 0.0003, "step": 231310 }, { "epoch": 1.5218120694986284, "grad_norm": 0.014598969720959394, "learning_rate": 1.6426166765920225e-06, "loss": 0.0007, "step": 231320 }, { "epoch": 1.5218778576738616, "grad_norm": 0.024971964466417255, "learning_rate": 1.6421912677393692e-06, "loss": 0.0003, "step": 231330 }, { "epoch": 1.5219436458490951, "grad_norm": 0.025068460040352646, "learning_rate": 1.6417659031565236e-06, "loss": 0.0008, "step": 231340 }, { "epoch": 1.5220094340243284, "grad_norm": 0.05851908915473992, "learning_rate": 1.6413405828490947e-06, "loss": 0.0003, "step": 231350 }, { "epoch": 1.522075222199562, "grad_norm": 0.000983154785419527, "learning_rate": 1.6409153068226897e-06, "loss": 0.0006, "step": 231360 }, { "epoch": 1.5221410103747952, "grad_norm": 0.0032733872050213116, "learning_rate": 1.640490075082915e-06, "loss": 0.0004, "step": 231370 }, { "epoch": 1.5222067985500285, "grad_norm": 0.011728776701600324, "learning_rate": 1.640064887635377e-06, "loss": 0.0007, "step": 231380 }, { "epoch": 1.522272586725262, "grad_norm": 0.009917314250585854, "learning_rate": 1.6396397444856816e-06, "loss": 0.0006, "step": 231390 }, { "epoch": 1.5223383749004955, "grad_norm": 0.0407090903994346, "learning_rate": 1.6392146456394343e-06, "loss": 0.0008, "step": 231400 }, { "epoch": 1.5224041630757288, "grad_norm": 0.007684926239169669, "learning_rate": 1.638789591102239e-06, "loss": 0.0007, "step": 231410 }, { "epoch": 1.522469951250962, "grad_norm": 0.06731063487465425, "learning_rate": 1.6383645808797006e-06, "loss": 0.0006, "step": 231420 }, { "epoch": 1.5225357394261956, "grad_norm": 0.03676096449448229, "learning_rate": 1.6379396149774196e-06, "loss": 0.0003, "step": 231430 }, { "epoch": 1.522601527601429, "grad_norm": 0.0032599149362914064, "learning_rate": 1.6375146934010028e-06, "loss": 0.0004, "step": 231440 }, { "epoch": 1.5226673157766624, "grad_norm": 0.041812058178232504, "learning_rate": 1.637089816156051e-06, "loss": 0.001, "step": 231450 }, { "epoch": 1.5227331039518957, "grad_norm": 0.051168746590600965, "learning_rate": 1.6366649832481657e-06, "loss": 0.0005, "step": 231460 }, { "epoch": 1.522798892127129, "grad_norm": 0.07971534885913598, "learning_rate": 1.6362401946829476e-06, "loss": 0.001, "step": 231470 }, { "epoch": 1.5228646803023624, "grad_norm": 0.0001890559062579641, "learning_rate": 1.6358154504659973e-06, "loss": 0.0009, "step": 231480 }, { "epoch": 1.522930468477596, "grad_norm": 0.006111817944977698, "learning_rate": 1.6353907506029172e-06, "loss": 0.0006, "step": 231490 }, { "epoch": 1.5229962566528292, "grad_norm": 0.02055668802492637, "learning_rate": 1.6349660950993012e-06, "loss": 0.0004, "step": 231500 }, { "epoch": 1.5230620448280625, "grad_norm": 0.010208914375057205, "learning_rate": 1.634541483960752e-06, "loss": 0.0007, "step": 231510 }, { "epoch": 1.523127833003296, "grad_norm": 0.05175431291687012, "learning_rate": 1.6341169171928667e-06, "loss": 0.0011, "step": 231520 }, { "epoch": 1.5231936211785295, "grad_norm": 0.012062806056335706, "learning_rate": 1.6336923948012407e-06, "loss": 0.0007, "step": 231530 }, { "epoch": 1.5232594093537628, "grad_norm": 0.0026499281935420554, "learning_rate": 1.633267916791475e-06, "loss": 0.0002, "step": 231540 }, { "epoch": 1.523325197528996, "grad_norm": 0.006127845194405801, "learning_rate": 1.6328434831691643e-06, "loss": 0.0002, "step": 231550 }, { "epoch": 1.5233909857042294, "grad_norm": 0.028707460647622064, "learning_rate": 1.6324190939399043e-06, "loss": 0.0002, "step": 231560 }, { "epoch": 1.5234567738794629, "grad_norm": 0.020995246348523452, "learning_rate": 1.6319947491092896e-06, "loss": 0.0003, "step": 231570 }, { "epoch": 1.5235225620546964, "grad_norm": 0.0713983302824336, "learning_rate": 1.631570448682916e-06, "loss": 0.0021, "step": 231580 }, { "epoch": 1.5235883502299297, "grad_norm": 0.15214827200511838, "learning_rate": 1.6311461926663763e-06, "loss": 0.0007, "step": 231590 }, { "epoch": 1.523654138405163, "grad_norm": 0.033207489993432975, "learning_rate": 1.6307219810652646e-06, "loss": 0.0009, "step": 231600 }, { "epoch": 1.5237199265803965, "grad_norm": 0.00023935913787056427, "learning_rate": 1.6302978138851739e-06, "loss": 0.0005, "step": 231610 }, { "epoch": 1.52378571475563, "grad_norm": 0.008742366856271428, "learning_rate": 1.6298736911316965e-06, "loss": 0.0008, "step": 231620 }, { "epoch": 1.5238515029308632, "grad_norm": 0.006212338520004672, "learning_rate": 1.6294496128104231e-06, "loss": 0.0004, "step": 231630 }, { "epoch": 1.5239172911060965, "grad_norm": 0.035676886344863025, "learning_rate": 1.629025578926946e-06, "loss": 0.0002, "step": 231640 }, { "epoch": 1.5239830792813298, "grad_norm": 0.019742870100129848, "learning_rate": 1.6286015894868556e-06, "loss": 0.0003, "step": 231650 }, { "epoch": 1.5240488674565633, "grad_norm": 0.058203472011628, "learning_rate": 1.6281776444957393e-06, "loss": 0.0004, "step": 231660 }, { "epoch": 1.5241146556317968, "grad_norm": 0.07338287631598356, "learning_rate": 1.6277537439591906e-06, "loss": 0.0054, "step": 231670 }, { "epoch": 1.52418044380703, "grad_norm": 0.07679176214174012, "learning_rate": 1.6273298878827965e-06, "loss": 0.0007, "step": 231680 }, { "epoch": 1.5242462319822634, "grad_norm": 0.03707973264356356, "learning_rate": 1.6269060762721445e-06, "loss": 0.0008, "step": 231690 }, { "epoch": 1.524312020157497, "grad_norm": 0.04310826580746259, "learning_rate": 1.626482309132823e-06, "loss": 0.0015, "step": 231700 }, { "epoch": 1.5243778083327304, "grad_norm": 0.03199677661265853, "learning_rate": 1.6260585864704187e-06, "loss": 0.0006, "step": 231710 }, { "epoch": 1.5244435965079637, "grad_norm": 0.07473799045047885, "learning_rate": 1.6256349082905183e-06, "loss": 0.0007, "step": 231720 }, { "epoch": 1.524509384683197, "grad_norm": 0.03873803318474985, "learning_rate": 1.625211274598707e-06, "loss": 0.0009, "step": 231730 }, { "epoch": 1.5245751728584305, "grad_norm": 0.049109378230059605, "learning_rate": 1.6247876854005701e-06, "loss": 0.0007, "step": 231740 }, { "epoch": 1.5246409610336638, "grad_norm": 0.01683783816208941, "learning_rate": 1.6243641407016931e-06, "loss": 0.0005, "step": 231750 }, { "epoch": 1.5247067492088973, "grad_norm": 0.001394536491944626, "learning_rate": 1.623940640507659e-06, "loss": 0.0003, "step": 231760 }, { "epoch": 1.5247725373841305, "grad_norm": 0.018341488412817, "learning_rate": 1.6235171848240522e-06, "loss": 0.0004, "step": 231770 }, { "epoch": 1.5248383255593638, "grad_norm": 0.019281374940730775, "learning_rate": 1.6230937736564551e-06, "loss": 0.0002, "step": 231780 }, { "epoch": 1.5249041137345973, "grad_norm": 0.010993249557629108, "learning_rate": 1.6226704070104482e-06, "loss": 0.0004, "step": 231790 }, { "epoch": 1.5249699019098308, "grad_norm": 0.013813048539416355, "learning_rate": 1.6222470848916172e-06, "loss": 0.0005, "step": 231800 }, { "epoch": 1.5250356900850641, "grad_norm": 0.0006466523088908171, "learning_rate": 1.6218238073055408e-06, "loss": 0.0002, "step": 231810 }, { "epoch": 1.5251014782602974, "grad_norm": 0.006553183652829673, "learning_rate": 1.6214005742577998e-06, "loss": 0.0004, "step": 231820 }, { "epoch": 1.525167266435531, "grad_norm": 0.0035731224901323763, "learning_rate": 1.6209773857539746e-06, "loss": 0.0006, "step": 231830 }, { "epoch": 1.5252330546107644, "grad_norm": 0.06074626029484829, "learning_rate": 1.6205542417996445e-06, "loss": 0.0004, "step": 231840 }, { "epoch": 1.5252988427859977, "grad_norm": 0.019869023181233878, "learning_rate": 1.6201311424003874e-06, "loss": 0.0004, "step": 231850 }, { "epoch": 1.525364630961231, "grad_norm": 0.06029057159810463, "learning_rate": 1.6197080875617827e-06, "loss": 0.0006, "step": 231860 }, { "epoch": 1.5254304191364643, "grad_norm": 0.005797692136785279, "learning_rate": 1.6192850772894069e-06, "loss": 0.0003, "step": 231870 }, { "epoch": 1.5254962073116978, "grad_norm": 0.025943947436216527, "learning_rate": 1.6188621115888382e-06, "loss": 0.0006, "step": 231880 }, { "epoch": 1.5255619954869313, "grad_norm": 0.052538251645860644, "learning_rate": 1.618439190465652e-06, "loss": 0.001, "step": 231890 }, { "epoch": 1.5256277836621646, "grad_norm": 0.0332007269804694, "learning_rate": 1.6180163139254246e-06, "loss": 0.0007, "step": 231900 }, { "epoch": 1.5256935718373978, "grad_norm": 0.012445067959321692, "learning_rate": 1.6175934819737317e-06, "loss": 0.001, "step": 231910 }, { "epoch": 1.5257593600126313, "grad_norm": 0.026932845235990557, "learning_rate": 1.6171706946161453e-06, "loss": 0.001, "step": 231920 }, { "epoch": 1.5258251481878649, "grad_norm": 0.006457180064082572, "learning_rate": 1.6167479518582435e-06, "loss": 0.0008, "step": 231930 }, { "epoch": 1.5258909363630981, "grad_norm": 0.0029651072950300787, "learning_rate": 1.6163252537055978e-06, "loss": 0.0005, "step": 231940 }, { "epoch": 1.5259567245383314, "grad_norm": 0.03231933686914296, "learning_rate": 1.6159026001637818e-06, "loss": 0.0004, "step": 231950 }, { "epoch": 1.5260225127135647, "grad_norm": 0.0012239822997736052, "learning_rate": 1.6154799912383667e-06, "loss": 0.0011, "step": 231960 }, { "epoch": 1.5260883008887982, "grad_norm": 0.0055042369975194915, "learning_rate": 1.6150574269349257e-06, "loss": 0.0012, "step": 231970 }, { "epoch": 1.5261540890640317, "grad_norm": 0.07814330473437837, "learning_rate": 1.6146349072590285e-06, "loss": 0.0008, "step": 231980 }, { "epoch": 1.526219877239265, "grad_norm": 0.0005506780715105001, "learning_rate": 1.6142124322162462e-06, "loss": 0.0003, "step": 231990 }, { "epoch": 1.5262856654144983, "grad_norm": 0.026152155099182426, "learning_rate": 1.613790001812149e-06, "loss": 0.0006, "step": 232000 }, { "epoch": 1.5263514535897318, "grad_norm": 0.03496296420669146, "learning_rate": 1.613367616052306e-06, "loss": 0.0008, "step": 232010 }, { "epoch": 1.5264172417649653, "grad_norm": 0.057535570141045454, "learning_rate": 1.6129452749422858e-06, "loss": 0.0003, "step": 232020 }, { "epoch": 1.5264830299401986, "grad_norm": 0.07841982712955411, "learning_rate": 1.6125229784876573e-06, "loss": 0.0006, "step": 232030 }, { "epoch": 1.5265488181154319, "grad_norm": 0.012521952773722707, "learning_rate": 1.6121007266939875e-06, "loss": 0.0004, "step": 232040 }, { "epoch": 1.5266146062906654, "grad_norm": 0.09007583377064197, "learning_rate": 1.6116785195668428e-06, "loss": 0.0006, "step": 232050 }, { "epoch": 1.5266803944658986, "grad_norm": 0.017829307256989668, "learning_rate": 1.611256357111791e-06, "loss": 0.0003, "step": 232060 }, { "epoch": 1.5267461826411322, "grad_norm": 0.05938785296662696, "learning_rate": 1.610834239334398e-06, "loss": 0.0006, "step": 232070 }, { "epoch": 1.5268119708163654, "grad_norm": 0.03582366173347615, "learning_rate": 1.6104121662402282e-06, "loss": 0.0005, "step": 232080 }, { "epoch": 1.5268777589915987, "grad_norm": 0.08930945685283492, "learning_rate": 1.609990137834847e-06, "loss": 0.0006, "step": 232090 }, { "epoch": 1.5269435471668322, "grad_norm": 0.02881093338184773, "learning_rate": 1.609568154123818e-06, "loss": 0.0006, "step": 232100 }, { "epoch": 1.5270093353420657, "grad_norm": 0.023560473108567764, "learning_rate": 1.609146215112704e-06, "loss": 0.0006, "step": 232110 }, { "epoch": 1.527075123517299, "grad_norm": 0.021442300620450744, "learning_rate": 1.6087243208070692e-06, "loss": 0.0005, "step": 232120 }, { "epoch": 1.5271409116925323, "grad_norm": 0.0007148302613388609, "learning_rate": 1.6083024712124751e-06, "loss": 0.0002, "step": 232130 }, { "epoch": 1.5272066998677658, "grad_norm": 0.00974871855050843, "learning_rate": 1.6078806663344836e-06, "loss": 0.0002, "step": 232140 }, { "epoch": 1.5272724880429993, "grad_norm": 0.011690134075819753, "learning_rate": 1.607458906178656e-06, "loss": 0.0004, "step": 232150 }, { "epoch": 1.5273382762182326, "grad_norm": 0.004919788656160947, "learning_rate": 1.6070371907505534e-06, "loss": 0.0008, "step": 232160 }, { "epoch": 1.5274040643934659, "grad_norm": 0.05515218273556467, "learning_rate": 1.6066155200557343e-06, "loss": 0.0004, "step": 232170 }, { "epoch": 1.5274698525686992, "grad_norm": 0.03968170099772496, "learning_rate": 1.6061938940997572e-06, "loss": 0.0004, "step": 232180 }, { "epoch": 1.5275356407439327, "grad_norm": 0.03198958004121431, "learning_rate": 1.6057723128881846e-06, "loss": 0.0014, "step": 232190 }, { "epoch": 1.5276014289191662, "grad_norm": 0.0007565604605685081, "learning_rate": 1.6053507764265724e-06, "loss": 0.0007, "step": 232200 }, { "epoch": 1.5276672170943995, "grad_norm": 0.007688728082715763, "learning_rate": 1.6049292847204784e-06, "loss": 0.0004, "step": 232210 }, { "epoch": 1.5277330052696327, "grad_norm": 0.005201484952068277, "learning_rate": 1.604507837775459e-06, "loss": 0.0003, "step": 232220 }, { "epoch": 1.5277987934448662, "grad_norm": 0.023144792215438286, "learning_rate": 1.604086435597072e-06, "loss": 0.0005, "step": 232230 }, { "epoch": 1.5278645816200997, "grad_norm": 0.011869404371663865, "learning_rate": 1.6036650781908725e-06, "loss": 0.0004, "step": 232240 }, { "epoch": 1.527930369795333, "grad_norm": 0.06479826071951027, "learning_rate": 1.603243765562415e-06, "loss": 0.0006, "step": 232250 }, { "epoch": 1.5279961579705663, "grad_norm": 0.01989665817013662, "learning_rate": 1.6028224977172558e-06, "loss": 0.0008, "step": 232260 }, { "epoch": 1.5280619461457996, "grad_norm": 0.00460734794081378, "learning_rate": 1.6024012746609469e-06, "loss": 0.0004, "step": 232270 }, { "epoch": 1.528127734321033, "grad_norm": 0.035611121449416694, "learning_rate": 1.6019800963990434e-06, "loss": 0.0008, "step": 232280 }, { "epoch": 1.5281935224962666, "grad_norm": 0.010737261989168675, "learning_rate": 1.6015589629370976e-06, "loss": 0.0007, "step": 232290 }, { "epoch": 1.5282593106715, "grad_norm": 0.04351113289694291, "learning_rate": 1.6011378742806616e-06, "loss": 0.0004, "step": 232300 }, { "epoch": 1.5283250988467332, "grad_norm": 0.035778173617733916, "learning_rate": 1.600716830435286e-06, "loss": 0.0007, "step": 232310 }, { "epoch": 1.5283908870219667, "grad_norm": 0.0037621718601867685, "learning_rate": 1.6002958314065247e-06, "loss": 0.0007, "step": 232320 }, { "epoch": 1.5284566751972002, "grad_norm": 0.0055174335763555704, "learning_rate": 1.5998748771999262e-06, "loss": 0.0003, "step": 232330 }, { "epoch": 1.5285224633724335, "grad_norm": 0.005980709379129266, "learning_rate": 1.5994539678210414e-06, "loss": 0.0005, "step": 232340 }, { "epoch": 1.5285882515476668, "grad_norm": 0.0004745629523619372, "learning_rate": 1.599033103275419e-06, "loss": 0.0004, "step": 232350 }, { "epoch": 1.5286540397229003, "grad_norm": 0.0003212522121959602, "learning_rate": 1.598612283568608e-06, "loss": 0.0003, "step": 232360 }, { "epoch": 1.5287198278981335, "grad_norm": 0.06259525969170136, "learning_rate": 1.5981915087061562e-06, "loss": 0.0007, "step": 232370 }, { "epoch": 1.528785616073367, "grad_norm": 0.01527200083868993, "learning_rate": 1.5977707786936114e-06, "loss": 0.0002, "step": 232380 }, { "epoch": 1.5288514042486003, "grad_norm": 0.03194987881247893, "learning_rate": 1.5973500935365205e-06, "loss": 0.0007, "step": 232390 }, { "epoch": 1.5289171924238336, "grad_norm": 0.015235524379973418, "learning_rate": 1.59692945324043e-06, "loss": 0.0005, "step": 232400 }, { "epoch": 1.5289829805990671, "grad_norm": 0.003935806867118153, "learning_rate": 1.5965088578108855e-06, "loss": 0.0003, "step": 232410 }, { "epoch": 1.5290487687743006, "grad_norm": 0.0069995114834668914, "learning_rate": 1.5960883072534328e-06, "loss": 0.0006, "step": 232420 }, { "epoch": 1.529114556949534, "grad_norm": 0.02288689978245026, "learning_rate": 1.5956678015736154e-06, "loss": 0.0008, "step": 232430 }, { "epoch": 1.5291803451247672, "grad_norm": 0.03083545432729595, "learning_rate": 1.5952473407769759e-06, "loss": 0.0006, "step": 232440 }, { "epoch": 1.5292461333000007, "grad_norm": 0.07251604677064648, "learning_rate": 1.5948269248690617e-06, "loss": 0.0015, "step": 232450 }, { "epoch": 1.5293119214752342, "grad_norm": 0.005871028687094536, "learning_rate": 1.5944065538554138e-06, "loss": 0.0003, "step": 232460 }, { "epoch": 1.5293777096504675, "grad_norm": 0.048790860114503704, "learning_rate": 1.593986227741574e-06, "loss": 0.0015, "step": 232470 }, { "epoch": 1.5294434978257008, "grad_norm": 0.02178808227212558, "learning_rate": 1.5935659465330844e-06, "loss": 0.0005, "step": 232480 }, { "epoch": 1.529509286000934, "grad_norm": 0.0510332540245129, "learning_rate": 1.5931457102354852e-06, "loss": 0.0008, "step": 232490 }, { "epoch": 1.5295750741761676, "grad_norm": 0.0002412675073825599, "learning_rate": 1.592725518854318e-06, "loss": 0.0005, "step": 232500 }, { "epoch": 1.529640862351401, "grad_norm": 0.04855101466985365, "learning_rate": 1.592305372395122e-06, "loss": 0.0004, "step": 232510 }, { "epoch": 1.5297066505266343, "grad_norm": 0.011050960655835074, "learning_rate": 1.5918852708634363e-06, "loss": 0.0005, "step": 232520 }, { "epoch": 1.5297724387018676, "grad_norm": 0.016117584423627346, "learning_rate": 1.5914652142648002e-06, "loss": 0.0003, "step": 232530 }, { "epoch": 1.5298382268771011, "grad_norm": 0.05398288289015455, "learning_rate": 1.5910452026047512e-06, "loss": 0.0006, "step": 232540 }, { "epoch": 1.5299040150523346, "grad_norm": 0.011256641966450714, "learning_rate": 1.5906252358888275e-06, "loss": 0.0005, "step": 232550 }, { "epoch": 1.529969803227568, "grad_norm": 0.0019286054292397782, "learning_rate": 1.5902053141225655e-06, "loss": 0.0005, "step": 232560 }, { "epoch": 1.5300355914028012, "grad_norm": 0.02734823067836646, "learning_rate": 1.5897854373114996e-06, "loss": 0.0008, "step": 232570 }, { "epoch": 1.5301013795780345, "grad_norm": 0.03975614806503714, "learning_rate": 1.5893656054611688e-06, "loss": 0.0004, "step": 232580 }, { "epoch": 1.530167167753268, "grad_norm": 0.03420290539479572, "learning_rate": 1.5889458185771072e-06, "loss": 0.0006, "step": 232590 }, { "epoch": 1.5302329559285015, "grad_norm": 0.05560450435475916, "learning_rate": 1.5885260766648491e-06, "loss": 0.0006, "step": 232600 }, { "epoch": 1.5302987441037348, "grad_norm": 0.033590720765397146, "learning_rate": 1.5881063797299284e-06, "loss": 0.0003, "step": 232610 }, { "epoch": 1.530364532278968, "grad_norm": 0.016852914508557224, "learning_rate": 1.5876867277778779e-06, "loss": 0.0005, "step": 232620 }, { "epoch": 1.5304303204542016, "grad_norm": 0.009548477013473711, "learning_rate": 1.5872671208142316e-06, "loss": 0.0009, "step": 232630 }, { "epoch": 1.530496108629435, "grad_norm": 0.04187518739091528, "learning_rate": 1.5868475588445204e-06, "loss": 0.001, "step": 232640 }, { "epoch": 1.5305618968046684, "grad_norm": 0.02784907433572058, "learning_rate": 1.5864280418742762e-06, "loss": 0.0006, "step": 232650 }, { "epoch": 1.5306276849799016, "grad_norm": 0.0007915980139736337, "learning_rate": 1.5860085699090305e-06, "loss": 0.0006, "step": 232660 }, { "epoch": 1.530693473155135, "grad_norm": 0.012891852129185645, "learning_rate": 1.585589142954313e-06, "loss": 0.0011, "step": 232670 }, { "epoch": 1.5307592613303684, "grad_norm": 0.016476383828805223, "learning_rate": 1.5851697610156536e-06, "loss": 0.0003, "step": 232680 }, { "epoch": 1.530825049505602, "grad_norm": 0.0009060369965887722, "learning_rate": 1.5847504240985824e-06, "loss": 0.0003, "step": 232690 }, { "epoch": 1.5308908376808352, "grad_norm": 0.20556861350390446, "learning_rate": 1.5843311322086247e-06, "loss": 0.0004, "step": 232700 }, { "epoch": 1.5309566258560685, "grad_norm": 0.030944164963098618, "learning_rate": 1.583911885351313e-06, "loss": 0.0006, "step": 232710 }, { "epoch": 1.531022414031302, "grad_norm": 0.029318097996416426, "learning_rate": 1.5834926835321728e-06, "loss": 0.0006, "step": 232720 }, { "epoch": 1.5310882022065355, "grad_norm": 0.0418909679512206, "learning_rate": 1.5830735267567314e-06, "loss": 0.0016, "step": 232730 }, { "epoch": 1.5311539903817688, "grad_norm": 0.008291890678785463, "learning_rate": 1.582654415030514e-06, "loss": 0.0011, "step": 232740 }, { "epoch": 1.531219778557002, "grad_norm": 0.006866100723896191, "learning_rate": 1.5822353483590469e-06, "loss": 0.0012, "step": 232750 }, { "epoch": 1.5312855667322356, "grad_norm": 0.06070011556572133, "learning_rate": 1.5818163267478549e-06, "loss": 0.0007, "step": 232760 }, { "epoch": 1.531351354907469, "grad_norm": 0.005051155155294211, "learning_rate": 1.5813973502024622e-06, "loss": 0.0002, "step": 232770 }, { "epoch": 1.5314171430827024, "grad_norm": 0.039017487331681, "learning_rate": 1.5809784187283933e-06, "loss": 0.0003, "step": 232780 }, { "epoch": 1.5314829312579357, "grad_norm": 0.02443537883486004, "learning_rate": 1.5805595323311712e-06, "loss": 0.0002, "step": 232790 }, { "epoch": 1.531548719433169, "grad_norm": 0.01638920421071003, "learning_rate": 1.5801406910163181e-06, "loss": 0.0005, "step": 232800 }, { "epoch": 1.5316145076084025, "grad_norm": 0.03380452798537076, "learning_rate": 1.5797218947893567e-06, "loss": 0.0006, "step": 232810 }, { "epoch": 1.531680295783636, "grad_norm": 0.009291533555341607, "learning_rate": 1.5793031436558076e-06, "loss": 0.0007, "step": 232820 }, { "epoch": 1.5317460839588692, "grad_norm": 0.001416105237104803, "learning_rate": 1.578884437621191e-06, "loss": 0.0006, "step": 232830 }, { "epoch": 1.5318118721341025, "grad_norm": 0.0002954706773783423, "learning_rate": 1.5784657766910306e-06, "loss": 0.0002, "step": 232840 }, { "epoch": 1.531877660309336, "grad_norm": 0.04413508131330104, "learning_rate": 1.578047160870843e-06, "loss": 0.0003, "step": 232850 }, { "epoch": 1.5319434484845695, "grad_norm": 0.02033526108420713, "learning_rate": 1.5776285901661486e-06, "loss": 0.0004, "step": 232860 }, { "epoch": 1.5320092366598028, "grad_norm": 0.015319321461536873, "learning_rate": 1.5772100645824656e-06, "loss": 0.0006, "step": 232870 }, { "epoch": 1.532075024835036, "grad_norm": 0.04559252059515797, "learning_rate": 1.5767915841253112e-06, "loss": 0.0007, "step": 232880 }, { "epoch": 1.5321408130102694, "grad_norm": 0.004701675967626174, "learning_rate": 1.5763731488002037e-06, "loss": 0.0004, "step": 232890 }, { "epoch": 1.532206601185503, "grad_norm": 0.022204655431680397, "learning_rate": 1.5759547586126594e-06, "loss": 0.0005, "step": 232900 }, { "epoch": 1.5322723893607364, "grad_norm": 0.002453264690297265, "learning_rate": 1.575536413568195e-06, "loss": 0.0003, "step": 232910 }, { "epoch": 1.5323381775359697, "grad_norm": 0.0018625617441429586, "learning_rate": 1.575118113672325e-06, "loss": 0.0003, "step": 232920 }, { "epoch": 1.532403965711203, "grad_norm": 0.05947897307549162, "learning_rate": 1.574699858930564e-06, "loss": 0.0007, "step": 232930 }, { "epoch": 1.5324697538864365, "grad_norm": 0.00029588222571666094, "learning_rate": 1.574281649348428e-06, "loss": 0.0002, "step": 232940 }, { "epoch": 1.53253554206167, "grad_norm": 0.009821594301541961, "learning_rate": 1.5738634849314294e-06, "loss": 0.0007, "step": 232950 }, { "epoch": 1.5326013302369033, "grad_norm": 0.028911629314081408, "learning_rate": 1.57344536568508e-06, "loss": 0.0005, "step": 232960 }, { "epoch": 1.5326671184121365, "grad_norm": 0.02612246548453065, "learning_rate": 1.573027291614896e-06, "loss": 0.0005, "step": 232970 }, { "epoch": 1.5327329065873698, "grad_norm": 0.03312279895633009, "learning_rate": 1.5726092627263872e-06, "loss": 0.0007, "step": 232980 }, { "epoch": 1.5327986947626033, "grad_norm": 0.020456482903519178, "learning_rate": 1.5721912790250654e-06, "loss": 0.0002, "step": 232990 }, { "epoch": 1.5328644829378368, "grad_norm": 0.00766921949744412, "learning_rate": 1.5717733405164415e-06, "loss": 0.0005, "step": 233000 }, { "epoch": 1.5329302711130701, "grad_norm": 0.04910390580149865, "learning_rate": 1.5713554472060244e-06, "loss": 0.0005, "step": 233010 }, { "epoch": 1.5329960592883034, "grad_norm": 0.019551364295603585, "learning_rate": 1.5709375990993254e-06, "loss": 0.0004, "step": 233020 }, { "epoch": 1.533061847463537, "grad_norm": 0.0420839830966107, "learning_rate": 1.570519796201852e-06, "loss": 0.0003, "step": 233030 }, { "epoch": 1.5331276356387704, "grad_norm": 0.05777394851871602, "learning_rate": 1.5701020385191135e-06, "loss": 0.0002, "step": 233040 }, { "epoch": 1.5331934238140037, "grad_norm": 0.14400735703316098, "learning_rate": 1.5696843260566175e-06, "loss": 0.0008, "step": 233050 }, { "epoch": 1.533259211989237, "grad_norm": 0.05440777301194827, "learning_rate": 1.5692666588198706e-06, "loss": 0.0009, "step": 233060 }, { "epoch": 1.5333250001644705, "grad_norm": 0.0042660062435765055, "learning_rate": 1.56884903681438e-06, "loss": 0.0002, "step": 233070 }, { "epoch": 1.5333907883397038, "grad_norm": 0.03837997696232994, "learning_rate": 1.5684314600456518e-06, "loss": 0.0006, "step": 233080 }, { "epoch": 1.5334565765149373, "grad_norm": 0.10497444633306892, "learning_rate": 1.5680139285191892e-06, "loss": 0.001, "step": 233090 }, { "epoch": 1.5335223646901706, "grad_norm": 0.019065516837217102, "learning_rate": 1.5675964422405004e-06, "loss": 0.0005, "step": 233100 }, { "epoch": 1.5335881528654038, "grad_norm": 0.04861731255351011, "learning_rate": 1.5671790012150883e-06, "loss": 0.0002, "step": 233110 }, { "epoch": 1.5336539410406373, "grad_norm": 0.022893302739100143, "learning_rate": 1.5667616054484564e-06, "loss": 0.0007, "step": 233120 }, { "epoch": 1.5337197292158709, "grad_norm": 0.10576869900585534, "learning_rate": 1.5663442549461078e-06, "loss": 0.0004, "step": 233130 }, { "epoch": 1.5337855173911041, "grad_norm": 0.024010656660057035, "learning_rate": 1.5659269497135443e-06, "loss": 0.0004, "step": 233140 }, { "epoch": 1.5338513055663374, "grad_norm": 0.007639052506764734, "learning_rate": 1.5655096897562683e-06, "loss": 0.0004, "step": 233150 }, { "epoch": 1.533917093741571, "grad_norm": 0.009883942833741345, "learning_rate": 1.5650924750797803e-06, "loss": 0.0003, "step": 233160 }, { "epoch": 1.5339828819168044, "grad_norm": 0.025475974056986046, "learning_rate": 1.564675305689582e-06, "loss": 0.0002, "step": 233170 }, { "epoch": 1.5340486700920377, "grad_norm": 0.01222120555468622, "learning_rate": 1.564258181591173e-06, "loss": 0.0003, "step": 233180 }, { "epoch": 1.534114458267271, "grad_norm": 0.08547126392525288, "learning_rate": 1.563841102790052e-06, "loss": 0.0009, "step": 233190 }, { "epoch": 1.5341802464425043, "grad_norm": 0.05751594074817177, "learning_rate": 1.5634240692917185e-06, "loss": 0.0008, "step": 233200 }, { "epoch": 1.5342460346177378, "grad_norm": 0.039093891672676906, "learning_rate": 1.5630070811016707e-06, "loss": 0.0004, "step": 233210 }, { "epoch": 1.5343118227929713, "grad_norm": 0.004728219909928018, "learning_rate": 1.5625901382254049e-06, "loss": 0.0008, "step": 233220 }, { "epoch": 1.5343776109682046, "grad_norm": 0.0021309272925449066, "learning_rate": 1.5621732406684203e-06, "loss": 0.0008, "step": 233230 }, { "epoch": 1.5344433991434379, "grad_norm": 0.03827887324898356, "learning_rate": 1.561756388436213e-06, "loss": 0.0006, "step": 233240 }, { "epoch": 1.5345091873186714, "grad_norm": 0.0051033551353872495, "learning_rate": 1.561339581534278e-06, "loss": 0.0002, "step": 233250 }, { "epoch": 1.5345749754939049, "grad_norm": 0.060810548654438606, "learning_rate": 1.5609228199681108e-06, "loss": 0.0003, "step": 233260 }, { "epoch": 1.5346407636691382, "grad_norm": 0.004031126885051685, "learning_rate": 1.5605061037432061e-06, "loss": 0.0013, "step": 233270 }, { "epoch": 1.5347065518443714, "grad_norm": 0.0380370089575721, "learning_rate": 1.5600894328650583e-06, "loss": 0.0004, "step": 233280 }, { "epoch": 1.5347723400196047, "grad_norm": 0.04595600215989431, "learning_rate": 1.5596728073391598e-06, "loss": 0.0011, "step": 233290 }, { "epoch": 1.5348381281948382, "grad_norm": 0.0026088876844292536, "learning_rate": 1.5592562271710043e-06, "loss": 0.0006, "step": 233300 }, { "epoch": 1.5349039163700717, "grad_norm": 0.017539520328212103, "learning_rate": 1.5588396923660842e-06, "loss": 0.0005, "step": 233310 }, { "epoch": 1.534969704545305, "grad_norm": 0.04154139845211422, "learning_rate": 1.5584232029298902e-06, "loss": 0.0011, "step": 233320 }, { "epoch": 1.5350354927205383, "grad_norm": 0.0017361899103785492, "learning_rate": 1.5580067588679143e-06, "loss": 0.0005, "step": 233330 }, { "epoch": 1.5351012808957718, "grad_norm": 0.05742315395908798, "learning_rate": 1.5575903601856462e-06, "loss": 0.0012, "step": 233340 }, { "epoch": 1.5351670690710053, "grad_norm": 0.0030437498859081473, "learning_rate": 1.5571740068885749e-06, "loss": 0.0002, "step": 233350 }, { "epoch": 1.5352328572462386, "grad_norm": 0.04519381834083685, "learning_rate": 1.5567576989821924e-06, "loss": 0.0007, "step": 233360 }, { "epoch": 1.5352986454214719, "grad_norm": 0.02050634161549902, "learning_rate": 1.5563414364719858e-06, "loss": 0.0003, "step": 233370 }, { "epoch": 1.5353644335967054, "grad_norm": 0.01256460751105512, "learning_rate": 1.555925219363445e-06, "loss": 0.0006, "step": 233380 }, { "epoch": 1.5354302217719387, "grad_norm": 0.02739084141903675, "learning_rate": 1.5555090476620537e-06, "loss": 0.0005, "step": 233390 }, { "epoch": 1.5354960099471722, "grad_norm": 0.015060540876594997, "learning_rate": 1.5550929213733012e-06, "loss": 0.0004, "step": 233400 }, { "epoch": 1.5355617981224055, "grad_norm": 0.016619458627077692, "learning_rate": 1.554676840502673e-06, "loss": 0.0002, "step": 233410 }, { "epoch": 1.5356275862976387, "grad_norm": 0.04558642325483081, "learning_rate": 1.554260805055655e-06, "loss": 0.0006, "step": 233420 }, { "epoch": 1.5356933744728722, "grad_norm": 0.003233265401562514, "learning_rate": 1.5538448150377327e-06, "loss": 0.0005, "step": 233430 }, { "epoch": 1.5357591626481057, "grad_norm": 0.01801227508294906, "learning_rate": 1.5534288704543903e-06, "loss": 0.0005, "step": 233440 }, { "epoch": 1.535824950823339, "grad_norm": 0.023105532082988225, "learning_rate": 1.5530129713111096e-06, "loss": 0.0003, "step": 233450 }, { "epoch": 1.5358907389985723, "grad_norm": 0.021643506021156035, "learning_rate": 1.5525971176133774e-06, "loss": 0.001, "step": 233460 }, { "epoch": 1.5359565271738058, "grad_norm": 0.020484831724977243, "learning_rate": 1.5521813093666755e-06, "loss": 0.0005, "step": 233470 }, { "epoch": 1.5360223153490393, "grad_norm": 0.037508708118070305, "learning_rate": 1.5517655465764848e-06, "loss": 0.0005, "step": 233480 }, { "epoch": 1.5360881035242726, "grad_norm": 0.0695530068063598, "learning_rate": 1.551349829248287e-06, "loss": 0.0003, "step": 233490 }, { "epoch": 1.536153891699506, "grad_norm": 0.007556807183954525, "learning_rate": 1.5509341573875635e-06, "loss": 0.0004, "step": 233500 }, { "epoch": 1.5362196798747392, "grad_norm": 0.005959245247077078, "learning_rate": 1.5505185309997944e-06, "loss": 0.0004, "step": 233510 }, { "epoch": 1.5362854680499727, "grad_norm": 0.0316722887816793, "learning_rate": 1.5501029500904596e-06, "loss": 0.0004, "step": 233520 }, { "epoch": 1.5363512562252062, "grad_norm": 0.008789665330479436, "learning_rate": 1.5496874146650376e-06, "loss": 0.0005, "step": 233530 }, { "epoch": 1.5364170444004395, "grad_norm": 0.03351721192930425, "learning_rate": 1.5492719247290073e-06, "loss": 0.0004, "step": 233540 }, { "epoch": 1.5364828325756728, "grad_norm": 0.03931853901463381, "learning_rate": 1.548856480287846e-06, "loss": 0.0006, "step": 233550 }, { "epoch": 1.5365486207509063, "grad_norm": 0.0065546123797665485, "learning_rate": 1.5484410813470319e-06, "loss": 0.0007, "step": 233560 }, { "epoch": 1.5366144089261398, "grad_norm": 0.010021218197770056, "learning_rate": 1.548025727912041e-06, "loss": 0.0003, "step": 233570 }, { "epoch": 1.536680197101373, "grad_norm": 0.04126592811644721, "learning_rate": 1.5476104199883473e-06, "loss": 0.0008, "step": 233580 }, { "epoch": 1.5367459852766063, "grad_norm": 0.0435875028722717, "learning_rate": 1.5471951575814308e-06, "loss": 0.0004, "step": 233590 }, { "epoch": 1.5368117734518396, "grad_norm": 0.035731271784725266, "learning_rate": 1.5467799406967643e-06, "loss": 0.0004, "step": 233600 }, { "epoch": 1.5368775616270731, "grad_norm": 0.004198968488085491, "learning_rate": 1.5463647693398215e-06, "loss": 0.0003, "step": 233610 }, { "epoch": 1.5369433498023066, "grad_norm": 0.07920873598735982, "learning_rate": 1.5459496435160765e-06, "loss": 0.0005, "step": 233620 }, { "epoch": 1.53700913797754, "grad_norm": 0.004685519785250303, "learning_rate": 1.5455345632310026e-06, "loss": 0.0005, "step": 233630 }, { "epoch": 1.5370749261527732, "grad_norm": 0.035302621402066796, "learning_rate": 1.5451195284900716e-06, "loss": 0.0021, "step": 233640 }, { "epoch": 1.5371407143280067, "grad_norm": 0.0038542312116970788, "learning_rate": 1.5447045392987564e-06, "loss": 0.0002, "step": 233650 }, { "epoch": 1.5372065025032402, "grad_norm": 0.0028757210960573478, "learning_rate": 1.544289595662527e-06, "loss": 0.001, "step": 233660 }, { "epoch": 1.5372722906784735, "grad_norm": 0.03329347397047819, "learning_rate": 1.543874697586855e-06, "loss": 0.0001, "step": 233670 }, { "epoch": 1.5373380788537068, "grad_norm": 0.00036463882851029024, "learning_rate": 1.5434598450772103e-06, "loss": 0.0005, "step": 233680 }, { "epoch": 1.53740386702894, "grad_norm": 0.06797572758230062, "learning_rate": 1.543045038139062e-06, "loss": 0.0006, "step": 233690 }, { "epoch": 1.5374696552041736, "grad_norm": 0.0017340451276885052, "learning_rate": 1.54263027677788e-06, "loss": 0.0003, "step": 233700 }, { "epoch": 1.537535443379407, "grad_norm": 0.024465292966826827, "learning_rate": 1.5422155609991295e-06, "loss": 0.0001, "step": 233710 }, { "epoch": 1.5376012315546403, "grad_norm": 0.00037908753908924114, "learning_rate": 1.541800890808282e-06, "loss": 0.001, "step": 233720 }, { "epoch": 1.5376670197298736, "grad_norm": 0.0402059185523269, "learning_rate": 1.5413862662108038e-06, "loss": 0.0003, "step": 233730 }, { "epoch": 1.5377328079051071, "grad_norm": 0.011632279725534793, "learning_rate": 1.54097168721216e-06, "loss": 0.0005, "step": 233740 }, { "epoch": 1.5377985960803406, "grad_norm": 0.02392821028789499, "learning_rate": 1.5405571538178171e-06, "loss": 0.0052, "step": 233750 }, { "epoch": 1.537864384255574, "grad_norm": 0.02290399788929589, "learning_rate": 1.5401426660332408e-06, "loss": 0.0007, "step": 233760 }, { "epoch": 1.5379301724308072, "grad_norm": 0.02795473009718575, "learning_rate": 1.5397282238638949e-06, "loss": 0.0009, "step": 233770 }, { "epoch": 1.5379959606060407, "grad_norm": 0.020965642587816642, "learning_rate": 1.5393138273152442e-06, "loss": 0.0006, "step": 233780 }, { "epoch": 1.5380617487812742, "grad_norm": 0.0009425782573103681, "learning_rate": 1.5388994763927512e-06, "loss": 0.0006, "step": 233790 }, { "epoch": 1.5381275369565075, "grad_norm": 0.05685943398266836, "learning_rate": 1.53848517110188e-06, "loss": 0.0007, "step": 233800 }, { "epoch": 1.5381933251317408, "grad_norm": 0.028234318272644696, "learning_rate": 1.5380709114480925e-06, "loss": 0.0003, "step": 233810 }, { "epoch": 1.538259113306974, "grad_norm": 0.01504608010843909, "learning_rate": 1.5376566974368495e-06, "loss": 0.0005, "step": 233820 }, { "epoch": 1.5383249014822076, "grad_norm": 0.0009079430532799497, "learning_rate": 1.5372425290736125e-06, "loss": 0.0003, "step": 233830 }, { "epoch": 1.538390689657441, "grad_norm": 0.028781351312201428, "learning_rate": 1.536828406363841e-06, "loss": 0.0004, "step": 233840 }, { "epoch": 1.5384564778326744, "grad_norm": 0.019013220610432386, "learning_rate": 1.536414329312998e-06, "loss": 0.0003, "step": 233850 }, { "epoch": 1.5385222660079076, "grad_norm": 0.04252038494528481, "learning_rate": 1.5360002979265398e-06, "loss": 0.0003, "step": 233860 }, { "epoch": 1.5385880541831412, "grad_norm": 0.02092353254862855, "learning_rate": 1.5355863122099262e-06, "loss": 0.0002, "step": 233870 }, { "epoch": 1.5386538423583747, "grad_norm": 0.014265566225690198, "learning_rate": 1.5351723721686158e-06, "loss": 0.0009, "step": 233880 }, { "epoch": 1.538719630533608, "grad_norm": 0.0018479819291658418, "learning_rate": 1.5347584778080643e-06, "loss": 0.0003, "step": 233890 }, { "epoch": 1.5387854187088412, "grad_norm": 0.06882965759270375, "learning_rate": 1.5343446291337298e-06, "loss": 0.0008, "step": 233900 }, { "epoch": 1.5388512068840745, "grad_norm": 0.03033617490255524, "learning_rate": 1.5339308261510682e-06, "loss": 0.0006, "step": 233910 }, { "epoch": 1.538916995059308, "grad_norm": 0.0026617137139541428, "learning_rate": 1.5335170688655354e-06, "loss": 0.0002, "step": 233920 }, { "epoch": 1.5389827832345415, "grad_norm": 0.000522001071078508, "learning_rate": 1.533103357282586e-06, "loss": 0.0002, "step": 233930 }, { "epoch": 1.5390485714097748, "grad_norm": 0.006776188629931021, "learning_rate": 1.5326896914076743e-06, "loss": 0.0004, "step": 233940 }, { "epoch": 1.539114359585008, "grad_norm": 0.006493644807011051, "learning_rate": 1.5322760712462548e-06, "loss": 0.0005, "step": 233950 }, { "epoch": 1.5391801477602416, "grad_norm": 0.02810348864987957, "learning_rate": 1.5318624968037805e-06, "loss": 0.0005, "step": 233960 }, { "epoch": 1.539245935935475, "grad_norm": 0.024499007514164964, "learning_rate": 1.5314489680857026e-06, "loss": 0.0005, "step": 233970 }, { "epoch": 1.5393117241107084, "grad_norm": 0.026248887627055597, "learning_rate": 1.5310354850974752e-06, "loss": 0.0004, "step": 233980 }, { "epoch": 1.5393775122859417, "grad_norm": 0.009978793901837426, "learning_rate": 1.5306220478445493e-06, "loss": 0.0013, "step": 233990 }, { "epoch": 1.539443300461175, "grad_norm": 0.04972008807599068, "learning_rate": 1.5302086563323753e-06, "loss": 0.0008, "step": 234000 }, { "epoch": 1.5395090886364085, "grad_norm": 0.05018824078527456, "learning_rate": 1.5297953105664038e-06, "loss": 0.001, "step": 234010 }, { "epoch": 1.539574876811642, "grad_norm": 0.007060517421954688, "learning_rate": 1.5293820105520835e-06, "loss": 0.0004, "step": 234020 }, { "epoch": 1.5396406649868752, "grad_norm": 0.023342986502604343, "learning_rate": 1.5289687562948647e-06, "loss": 0.0006, "step": 234030 }, { "epoch": 1.5397064531621085, "grad_norm": 0.006623797468194284, "learning_rate": 1.5285555478001946e-06, "loss": 0.0004, "step": 234040 }, { "epoch": 1.539772241337342, "grad_norm": 0.008646048916527297, "learning_rate": 1.5281423850735211e-06, "loss": 0.0009, "step": 234050 }, { "epoch": 1.5398380295125755, "grad_norm": 0.031764732411949034, "learning_rate": 1.5277292681202927e-06, "loss": 0.0008, "step": 234060 }, { "epoch": 1.5399038176878088, "grad_norm": 0.020105894079799555, "learning_rate": 1.5273161969459544e-06, "loss": 0.0006, "step": 234070 }, { "epoch": 1.539969605863042, "grad_norm": 0.010995082531458767, "learning_rate": 1.526903171555953e-06, "loss": 0.0011, "step": 234080 }, { "epoch": 1.5400353940382756, "grad_norm": 0.0002888079318554036, "learning_rate": 1.5264901919557335e-06, "loss": 0.0005, "step": 234090 }, { "epoch": 1.540101182213509, "grad_norm": 0.0037722739655144328, "learning_rate": 1.5260772581507394e-06, "loss": 0.0003, "step": 234100 }, { "epoch": 1.5401669703887424, "grad_norm": 0.01025046010349118, "learning_rate": 1.5256643701464179e-06, "loss": 0.0002, "step": 234110 }, { "epoch": 1.5402327585639757, "grad_norm": 0.15724644883538222, "learning_rate": 1.5252515279482117e-06, "loss": 0.0012, "step": 234120 }, { "epoch": 1.540298546739209, "grad_norm": 0.027413853284449716, "learning_rate": 1.524838731561562e-06, "loss": 0.0005, "step": 234130 }, { "epoch": 1.5403643349144425, "grad_norm": 0.020418234529034417, "learning_rate": 1.5244259809919132e-06, "loss": 0.0005, "step": 234140 }, { "epoch": 1.540430123089676, "grad_norm": 0.03579265090386505, "learning_rate": 1.5240132762447057e-06, "loss": 0.0005, "step": 234150 }, { "epoch": 1.5404959112649093, "grad_norm": 0.02429166834910504, "learning_rate": 1.5236006173253815e-06, "loss": 0.0009, "step": 234160 }, { "epoch": 1.5405616994401425, "grad_norm": 0.006123604428263492, "learning_rate": 1.5231880042393804e-06, "loss": 0.0004, "step": 234170 }, { "epoch": 1.540627487615376, "grad_norm": 0.05753741693522432, "learning_rate": 1.5227754369921427e-06, "loss": 0.0005, "step": 234180 }, { "epoch": 1.5406932757906096, "grad_norm": 0.039102981932963375, "learning_rate": 1.5223629155891079e-06, "loss": 0.0007, "step": 234190 }, { "epoch": 1.5407590639658428, "grad_norm": 0.010048308350427824, "learning_rate": 1.5219504400357143e-06, "loss": 0.0003, "step": 234200 }, { "epoch": 1.5408248521410761, "grad_norm": 0.033971413263123564, "learning_rate": 1.5215380103374005e-06, "loss": 0.0005, "step": 234210 }, { "epoch": 1.5408906403163094, "grad_norm": 0.013093622310706182, "learning_rate": 1.5211256264996039e-06, "loss": 0.0006, "step": 234220 }, { "epoch": 1.540956428491543, "grad_norm": 0.010417652608555358, "learning_rate": 1.5207132885277598e-06, "loss": 0.0005, "step": 234230 }, { "epoch": 1.5410222166667764, "grad_norm": 0.0205354877164307, "learning_rate": 1.5203009964273075e-06, "loss": 0.0005, "step": 234240 }, { "epoch": 1.5410880048420097, "grad_norm": 0.03420617910787725, "learning_rate": 1.5198887502036813e-06, "loss": 0.0002, "step": 234250 }, { "epoch": 1.541153793017243, "grad_norm": 0.011650802844989991, "learning_rate": 1.5194765498623166e-06, "loss": 0.0005, "step": 234260 }, { "epoch": 1.5412195811924765, "grad_norm": 0.0019565267462617764, "learning_rate": 1.5190643954086471e-06, "loss": 0.0002, "step": 234270 }, { "epoch": 1.54128536936771, "grad_norm": 0.07358110249219409, "learning_rate": 1.5186522868481073e-06, "loss": 0.0004, "step": 234280 }, { "epoch": 1.5413511575429433, "grad_norm": 0.07274360286397888, "learning_rate": 1.5182402241861306e-06, "loss": 0.0006, "step": 234290 }, { "epoch": 1.5414169457181766, "grad_norm": 0.07091056484411111, "learning_rate": 1.5178282074281498e-06, "loss": 0.0003, "step": 234300 }, { "epoch": 1.5414827338934098, "grad_norm": 0.04085765647163359, "learning_rate": 1.5174162365795964e-06, "loss": 0.0008, "step": 234310 }, { "epoch": 1.5415485220686433, "grad_norm": 0.013846944007427567, "learning_rate": 1.5170043116459021e-06, "loss": 0.0013, "step": 234320 }, { "epoch": 1.5416143102438769, "grad_norm": 0.018407288500043646, "learning_rate": 1.5165924326324976e-06, "loss": 0.0003, "step": 234330 }, { "epoch": 1.5416800984191101, "grad_norm": 0.04797844819895851, "learning_rate": 1.5161805995448137e-06, "loss": 0.0004, "step": 234340 }, { "epoch": 1.5417458865943434, "grad_norm": 0.011925540500747303, "learning_rate": 1.5157688123882796e-06, "loss": 0.0003, "step": 234350 }, { "epoch": 1.541811674769577, "grad_norm": 0.023866896718226744, "learning_rate": 1.5153570711683236e-06, "loss": 0.0004, "step": 234360 }, { "epoch": 1.5418774629448104, "grad_norm": 0.011418223736076826, "learning_rate": 1.5149453758903759e-06, "loss": 0.0005, "step": 234370 }, { "epoch": 1.5419432511200437, "grad_norm": 0.008193996606339918, "learning_rate": 1.5145337265598636e-06, "loss": 0.0006, "step": 234380 }, { "epoch": 1.542009039295277, "grad_norm": 0.0005772602596885636, "learning_rate": 1.5141221231822145e-06, "loss": 0.0005, "step": 234390 }, { "epoch": 1.5420748274705105, "grad_norm": 0.0016850915209605697, "learning_rate": 1.5137105657628543e-06, "loss": 0.0003, "step": 234400 }, { "epoch": 1.5421406156457438, "grad_norm": 0.0022709780870095184, "learning_rate": 1.513299054307209e-06, "loss": 0.0005, "step": 234410 }, { "epoch": 1.5422064038209773, "grad_norm": 0.04108140283178411, "learning_rate": 1.5128875888207044e-06, "loss": 0.0007, "step": 234420 }, { "epoch": 1.5422721919962106, "grad_norm": 0.006936036560119973, "learning_rate": 1.512476169308766e-06, "loss": 0.0004, "step": 234430 }, { "epoch": 1.5423379801714439, "grad_norm": 0.06466300560614283, "learning_rate": 1.5120647957768164e-06, "loss": 0.0005, "step": 234440 }, { "epoch": 1.5424037683466774, "grad_norm": 0.00017654176035101962, "learning_rate": 1.5116534682302804e-06, "loss": 0.0003, "step": 234450 }, { "epoch": 1.5424695565219109, "grad_norm": 0.030094103862727272, "learning_rate": 1.5112421866745808e-06, "loss": 0.0004, "step": 234460 }, { "epoch": 1.5425353446971442, "grad_norm": 0.014651209628533333, "learning_rate": 1.5108309511151399e-06, "loss": 0.0009, "step": 234470 }, { "epoch": 1.5426011328723774, "grad_norm": 0.01812578222233882, "learning_rate": 1.510419761557379e-06, "loss": 0.0004, "step": 234480 }, { "epoch": 1.542666921047611, "grad_norm": 0.025733122199320183, "learning_rate": 1.5100086180067185e-06, "loss": 0.0005, "step": 234490 }, { "epoch": 1.5427327092228444, "grad_norm": 0.001620015455810735, "learning_rate": 1.509597520468582e-06, "loss": 0.0007, "step": 234500 }, { "epoch": 1.5427984973980777, "grad_norm": 0.051019116008670574, "learning_rate": 1.5091864689483876e-06, "loss": 0.0005, "step": 234510 }, { "epoch": 1.542864285573311, "grad_norm": 0.0001948097539506763, "learning_rate": 1.5087754634515544e-06, "loss": 0.0002, "step": 234520 }, { "epoch": 1.5429300737485443, "grad_norm": 0.008862163585181026, "learning_rate": 1.5083645039835015e-06, "loss": 0.0005, "step": 234530 }, { "epoch": 1.5429958619237778, "grad_norm": 0.009672914081926819, "learning_rate": 1.507953590549648e-06, "loss": 0.0016, "step": 234540 }, { "epoch": 1.5430616500990113, "grad_norm": 0.002682344231568447, "learning_rate": 1.5075427231554096e-06, "loss": 0.0011, "step": 234550 }, { "epoch": 1.5431274382742446, "grad_norm": 0.0066311132768082605, "learning_rate": 1.5071319018062042e-06, "loss": 0.0007, "step": 234560 }, { "epoch": 1.5431932264494779, "grad_norm": 0.021611900633020355, "learning_rate": 1.5067211265074478e-06, "loss": 0.0004, "step": 234570 }, { "epoch": 1.5432590146247114, "grad_norm": 0.032115162307575926, "learning_rate": 1.5063103972645566e-06, "loss": 0.0009, "step": 234580 }, { "epoch": 1.5433248027999449, "grad_norm": 0.036237111098417873, "learning_rate": 1.5058997140829456e-06, "loss": 0.0005, "step": 234590 }, { "epoch": 1.5433905909751782, "grad_norm": 0.04073300607755353, "learning_rate": 1.505489076968029e-06, "loss": 0.0005, "step": 234600 }, { "epoch": 1.5434563791504115, "grad_norm": 0.0007885824626682292, "learning_rate": 1.505078485925221e-06, "loss": 0.0003, "step": 234610 }, { "epoch": 1.5435221673256447, "grad_norm": 0.23509022248337408, "learning_rate": 1.5046679409599335e-06, "loss": 0.0005, "step": 234620 }, { "epoch": 1.5435879555008782, "grad_norm": 0.02326310976958826, "learning_rate": 1.5042574420775814e-06, "loss": 0.0004, "step": 234630 }, { "epoch": 1.5436537436761117, "grad_norm": 0.007837142948436668, "learning_rate": 1.503846989283576e-06, "loss": 0.0004, "step": 234640 }, { "epoch": 1.543719531851345, "grad_norm": 0.039715993732489054, "learning_rate": 1.5034365825833285e-06, "loss": 0.0005, "step": 234650 }, { "epoch": 1.5437853200265783, "grad_norm": 0.011969863069117005, "learning_rate": 1.50302622198225e-06, "loss": 0.0003, "step": 234660 }, { "epoch": 1.5438511082018118, "grad_norm": 0.0004887787711350671, "learning_rate": 1.5026159074857505e-06, "loss": 0.0007, "step": 234670 }, { "epoch": 1.5439168963770453, "grad_norm": 0.06748263204264107, "learning_rate": 1.50220563909924e-06, "loss": 0.0017, "step": 234680 }, { "epoch": 1.5439826845522786, "grad_norm": 0.03799059324788818, "learning_rate": 1.5017954168281268e-06, "loss": 0.0004, "step": 234690 }, { "epoch": 1.544048472727512, "grad_norm": 0.050381441010079284, "learning_rate": 1.50138524067782e-06, "loss": 0.0003, "step": 234700 }, { "epoch": 1.5441142609027454, "grad_norm": 0.022809731242618838, "learning_rate": 1.5009751106537274e-06, "loss": 0.0003, "step": 234710 }, { "epoch": 1.5441800490779787, "grad_norm": 0.022169550436091305, "learning_rate": 1.5005650267612559e-06, "loss": 0.0015, "step": 234720 }, { "epoch": 1.5442458372532122, "grad_norm": 0.000997820818759212, "learning_rate": 1.500154989005812e-06, "loss": 0.0009, "step": 234730 }, { "epoch": 1.5443116254284455, "grad_norm": 0.006367622270997529, "learning_rate": 1.4997449973928019e-06, "loss": 0.0003, "step": 234740 }, { "epoch": 1.5443774136036788, "grad_norm": 0.04571131419110248, "learning_rate": 1.4993350519276296e-06, "loss": 0.0006, "step": 234750 }, { "epoch": 1.5444432017789123, "grad_norm": 0.025423875128315572, "learning_rate": 1.4989251526157029e-06, "loss": 0.0004, "step": 234760 }, { "epoch": 1.5445089899541458, "grad_norm": 0.042567547411004124, "learning_rate": 1.4985152994624241e-06, "loss": 0.0003, "step": 234770 }, { "epoch": 1.544574778129379, "grad_norm": 0.006658569754186864, "learning_rate": 1.498105492473197e-06, "loss": 0.0003, "step": 234780 }, { "epoch": 1.5446405663046123, "grad_norm": 0.0008378759279668378, "learning_rate": 1.4976957316534247e-06, "loss": 0.0005, "step": 234790 }, { "epoch": 1.5447063544798458, "grad_norm": 0.007302104686094341, "learning_rate": 1.4972860170085091e-06, "loss": 0.0002, "step": 234800 }, { "epoch": 1.5447721426550793, "grad_norm": 0.11607762780187052, "learning_rate": 1.4968763485438525e-06, "loss": 0.001, "step": 234810 }, { "epoch": 1.5448379308303126, "grad_norm": 0.007514032508200148, "learning_rate": 1.4964667262648558e-06, "loss": 0.0005, "step": 234820 }, { "epoch": 1.544903719005546, "grad_norm": 0.018474751079793166, "learning_rate": 1.4960571501769195e-06, "loss": 0.0004, "step": 234830 }, { "epoch": 1.5449695071807792, "grad_norm": 0.013915895143576176, "learning_rate": 1.495647620285443e-06, "loss": 0.0002, "step": 234840 }, { "epoch": 1.5450352953560127, "grad_norm": 0.01807326448909497, "learning_rate": 1.4952381365958269e-06, "loss": 0.0006, "step": 234850 }, { "epoch": 1.5451010835312462, "grad_norm": 0.01914064255739611, "learning_rate": 1.4948286991134686e-06, "loss": 0.0007, "step": 234860 }, { "epoch": 1.5451668717064795, "grad_norm": 0.008520302654526026, "learning_rate": 1.4944193078437662e-06, "loss": 0.0007, "step": 234870 }, { "epoch": 1.5452326598817128, "grad_norm": 0.00432765592496745, "learning_rate": 1.4940099627921168e-06, "loss": 0.0005, "step": 234880 }, { "epoch": 1.5452984480569463, "grad_norm": 0.014500954484240481, "learning_rate": 1.4936006639639194e-06, "loss": 0.0004, "step": 234890 }, { "epoch": 1.5453642362321798, "grad_norm": 0.06140562688127839, "learning_rate": 1.4931914113645685e-06, "loss": 0.0005, "step": 234900 }, { "epoch": 1.545430024407413, "grad_norm": 0.03212407927792145, "learning_rate": 1.4927822049994605e-06, "loss": 0.0011, "step": 234910 }, { "epoch": 1.5454958125826463, "grad_norm": 0.00010990221651543817, "learning_rate": 1.4923730448739903e-06, "loss": 0.0005, "step": 234920 }, { "epoch": 1.5455616007578796, "grad_norm": 0.028379456189496333, "learning_rate": 1.491963930993552e-06, "loss": 0.0004, "step": 234930 }, { "epoch": 1.5456273889331131, "grad_norm": 0.0002928129314959178, "learning_rate": 1.491554863363539e-06, "loss": 0.0004, "step": 234940 }, { "epoch": 1.5456931771083466, "grad_norm": 0.040249440199787566, "learning_rate": 1.4911458419893455e-06, "loss": 0.0004, "step": 234950 }, { "epoch": 1.54575896528358, "grad_norm": 0.02048721888692782, "learning_rate": 1.4907368668763639e-06, "loss": 0.001, "step": 234960 }, { "epoch": 1.5458247534588132, "grad_norm": 0.00966379248254263, "learning_rate": 1.4903279380299857e-06, "loss": 0.0003, "step": 234970 }, { "epoch": 1.5458905416340467, "grad_norm": 0.003657590479248788, "learning_rate": 1.489919055455602e-06, "loss": 0.0002, "step": 234980 }, { "epoch": 1.5459563298092802, "grad_norm": 0.03214782353350737, "learning_rate": 1.4895102191586048e-06, "loss": 0.0006, "step": 234990 }, { "epoch": 1.5460221179845135, "grad_norm": 0.06104291036702859, "learning_rate": 1.4891014291443828e-06, "loss": 0.0006, "step": 235000 }, { "epoch": 1.5460879061597468, "grad_norm": 0.01310601996332322, "learning_rate": 1.4886926854183253e-06, "loss": 0.0006, "step": 235010 }, { "epoch": 1.54615369433498, "grad_norm": 0.030463262600340896, "learning_rate": 1.4882839879858236e-06, "loss": 0.0011, "step": 235020 }, { "epoch": 1.5462194825102136, "grad_norm": 0.007538839816971122, "learning_rate": 1.487875336852264e-06, "loss": 0.0004, "step": 235030 }, { "epoch": 1.546285270685447, "grad_norm": 0.053664987750906785, "learning_rate": 1.4874667320230352e-06, "loss": 0.0005, "step": 235040 }, { "epoch": 1.5463510588606804, "grad_norm": 0.0505506890548587, "learning_rate": 1.4870581735035244e-06, "loss": 0.0005, "step": 235050 }, { "epoch": 1.5464168470359136, "grad_norm": 0.016433236633287447, "learning_rate": 1.4866496612991171e-06, "loss": 0.0008, "step": 235060 }, { "epoch": 1.5464826352111471, "grad_norm": 0.019937252318358085, "learning_rate": 1.4862411954151996e-06, "loss": 0.0007, "step": 235070 }, { "epoch": 1.5465484233863807, "grad_norm": 0.019547590605989672, "learning_rate": 1.4858327758571578e-06, "loss": 0.0005, "step": 235080 }, { "epoch": 1.546614211561614, "grad_norm": 0.04508307059645267, "learning_rate": 1.4854244026303755e-06, "loss": 0.0011, "step": 235090 }, { "epoch": 1.5466799997368472, "grad_norm": 0.007705645130879365, "learning_rate": 1.4850160757402364e-06, "loss": 0.0004, "step": 235100 }, { "epoch": 1.5467457879120807, "grad_norm": 0.016610429537650472, "learning_rate": 1.4846077951921257e-06, "loss": 0.001, "step": 235110 }, { "epoch": 1.546811576087314, "grad_norm": 0.014833753693266937, "learning_rate": 1.4841995609914245e-06, "loss": 0.0006, "step": 235120 }, { "epoch": 1.5468773642625475, "grad_norm": 0.007073840653513506, "learning_rate": 1.483791373143516e-06, "loss": 0.0002, "step": 235130 }, { "epoch": 1.5469431524377808, "grad_norm": 0.004055005954476252, "learning_rate": 1.4833832316537793e-06, "loss": 0.0006, "step": 235140 }, { "epoch": 1.547008940613014, "grad_norm": 0.0006482974530338719, "learning_rate": 1.4829751365275997e-06, "loss": 0.0015, "step": 235150 }, { "epoch": 1.5470747287882476, "grad_norm": 0.035532521359175354, "learning_rate": 1.4825670877703552e-06, "loss": 0.0005, "step": 235160 }, { "epoch": 1.547140516963481, "grad_norm": 0.05180909558748621, "learning_rate": 1.4821590853874258e-06, "loss": 0.0004, "step": 235170 }, { "epoch": 1.5472063051387144, "grad_norm": 0.012425976548822307, "learning_rate": 1.4817511293841906e-06, "loss": 0.0013, "step": 235180 }, { "epoch": 1.5472720933139477, "grad_norm": 0.03849416648634989, "learning_rate": 1.4813432197660283e-06, "loss": 0.0005, "step": 235190 }, { "epoch": 1.5473378814891812, "grad_norm": 0.0009347452446727391, "learning_rate": 1.4809353565383166e-06, "loss": 0.0006, "step": 235200 }, { "epoch": 1.5474036696644147, "grad_norm": 0.06827417993188233, "learning_rate": 1.4805275397064334e-06, "loss": 0.0003, "step": 235210 }, { "epoch": 1.547469457839648, "grad_norm": 0.014342948856898552, "learning_rate": 1.4801197692757546e-06, "loss": 0.0007, "step": 235220 }, { "epoch": 1.5475352460148812, "grad_norm": 0.003890741703768903, "learning_rate": 1.4797120452516566e-06, "loss": 0.0003, "step": 235230 }, { "epoch": 1.5476010341901145, "grad_norm": 0.07154337651391461, "learning_rate": 1.4793043676395152e-06, "loss": 0.0006, "step": 235240 }, { "epoch": 1.547666822365348, "grad_norm": 0.040012128321504636, "learning_rate": 1.478896736444705e-06, "loss": 0.0003, "step": 235250 }, { "epoch": 1.5477326105405815, "grad_norm": 0.05499326271724973, "learning_rate": 1.4784891516726001e-06, "loss": 0.0005, "step": 235260 }, { "epoch": 1.5477983987158148, "grad_norm": 0.04231988286968626, "learning_rate": 1.478081613328573e-06, "loss": 0.0006, "step": 235270 }, { "epoch": 1.547864186891048, "grad_norm": 0.09614000573353348, "learning_rate": 1.4776741214180013e-06, "loss": 0.0004, "step": 235280 }, { "epoch": 1.5479299750662816, "grad_norm": 0.016384432052064773, "learning_rate": 1.4772666759462522e-06, "loss": 0.0003, "step": 235290 }, { "epoch": 1.547995763241515, "grad_norm": 0.007542494621298514, "learning_rate": 1.4768592769187e-06, "loss": 0.0008, "step": 235300 }, { "epoch": 1.5480615514167484, "grad_norm": 0.032920740362474735, "learning_rate": 1.4764519243407155e-06, "loss": 0.0003, "step": 235310 }, { "epoch": 1.5481273395919817, "grad_norm": 0.02658341296627505, "learning_rate": 1.4760446182176686e-06, "loss": 0.0006, "step": 235320 }, { "epoch": 1.548193127767215, "grad_norm": 0.016435270538915272, "learning_rate": 1.4756373585549306e-06, "loss": 0.0004, "step": 235330 }, { "epoch": 1.5482589159424485, "grad_norm": 0.027233204674200595, "learning_rate": 1.47523014535787e-06, "loss": 0.0013, "step": 235340 }, { "epoch": 1.548324704117682, "grad_norm": 0.02699011840287695, "learning_rate": 1.4748229786318558e-06, "loss": 0.0004, "step": 235350 }, { "epoch": 1.5483904922929153, "grad_norm": 0.02281823413732007, "learning_rate": 1.474415858382256e-06, "loss": 0.0003, "step": 235360 }, { "epoch": 1.5484562804681485, "grad_norm": 0.024960300253513902, "learning_rate": 1.474008784614438e-06, "loss": 0.0007, "step": 235370 }, { "epoch": 1.548522068643382, "grad_norm": 0.0049900934103576, "learning_rate": 1.4736017573337675e-06, "loss": 0.0007, "step": 235380 }, { "epoch": 1.5485878568186155, "grad_norm": 0.06594713251364187, "learning_rate": 1.473194776545614e-06, "loss": 0.0011, "step": 235390 }, { "epoch": 1.5486536449938488, "grad_norm": 0.03007121657783462, "learning_rate": 1.4727878422553409e-06, "loss": 0.0007, "step": 235400 }, { "epoch": 1.5487194331690821, "grad_norm": 0.05402502857381985, "learning_rate": 1.4723809544683144e-06, "loss": 0.0004, "step": 235410 }, { "epoch": 1.5487852213443156, "grad_norm": 0.0392224238587825, "learning_rate": 1.4719741131898985e-06, "loss": 0.0006, "step": 235420 }, { "epoch": 1.548851009519549, "grad_norm": 0.012922573887664086, "learning_rate": 1.4715673184254564e-06, "loss": 0.0006, "step": 235430 }, { "epoch": 1.5489167976947824, "grad_norm": 0.059833540864566485, "learning_rate": 1.4711605701803523e-06, "loss": 0.0008, "step": 235440 }, { "epoch": 1.5489825858700157, "grad_norm": 0.02246745921583127, "learning_rate": 1.4707538684599477e-06, "loss": 0.0004, "step": 235450 }, { "epoch": 1.549048374045249, "grad_norm": 0.00046895864935414576, "learning_rate": 1.4703472132696057e-06, "loss": 0.0004, "step": 235460 }, { "epoch": 1.5491141622204825, "grad_norm": 0.00019822763251207086, "learning_rate": 1.4699406046146874e-06, "loss": 0.0003, "step": 235470 }, { "epoch": 1.549179950395716, "grad_norm": 0.06517430282392842, "learning_rate": 1.4695340425005534e-06, "loss": 0.0006, "step": 235480 }, { "epoch": 1.5492457385709493, "grad_norm": 0.02979618161204284, "learning_rate": 1.469127526932564e-06, "loss": 0.0002, "step": 235490 }, { "epoch": 1.5493115267461826, "grad_norm": 0.030722188768474264, "learning_rate": 1.4687210579160783e-06, "loss": 0.0003, "step": 235500 }, { "epoch": 1.549377314921416, "grad_norm": 0.0004980473424305051, "learning_rate": 1.4683146354564542e-06, "loss": 0.0004, "step": 235510 }, { "epoch": 1.5494431030966496, "grad_norm": 0.04348678746177381, "learning_rate": 1.4679082595590533e-06, "loss": 0.0004, "step": 235520 }, { "epoch": 1.5495088912718828, "grad_norm": 0.012904356607619558, "learning_rate": 1.4675019302292314e-06, "loss": 0.0002, "step": 235530 }, { "epoch": 1.5495746794471161, "grad_norm": 0.007897431859198271, "learning_rate": 1.4670956474723457e-06, "loss": 0.0004, "step": 235540 }, { "epoch": 1.5496404676223494, "grad_norm": 0.004394572326783665, "learning_rate": 1.4666894112937524e-06, "loss": 0.0003, "step": 235550 }, { "epoch": 1.549706255797583, "grad_norm": 0.0032898182622983618, "learning_rate": 1.4662832216988082e-06, "loss": 0.0002, "step": 235560 }, { "epoch": 1.5497720439728164, "grad_norm": 0.04374160220368932, "learning_rate": 1.4658770786928673e-06, "loss": 0.0003, "step": 235570 }, { "epoch": 1.5498378321480497, "grad_norm": 0.07293225314729239, "learning_rate": 1.4654709822812852e-06, "loss": 0.0004, "step": 235580 }, { "epoch": 1.549903620323283, "grad_norm": 0.04661100421032197, "learning_rate": 1.4650649324694155e-06, "loss": 0.0004, "step": 235590 }, { "epoch": 1.5499694084985165, "grad_norm": 0.006183193577373175, "learning_rate": 1.4646589292626113e-06, "loss": 0.0004, "step": 235600 }, { "epoch": 1.55003519667375, "grad_norm": 0.006519393740206234, "learning_rate": 1.4642529726662263e-06, "loss": 0.0002, "step": 235610 }, { "epoch": 1.5501009848489833, "grad_norm": 0.09589725037467495, "learning_rate": 1.4638470626856117e-06, "loss": 0.0006, "step": 235620 }, { "epoch": 1.5501667730242166, "grad_norm": 0.011669619386839962, "learning_rate": 1.4634411993261198e-06, "loss": 0.0008, "step": 235630 }, { "epoch": 1.5502325611994499, "grad_norm": 0.02253845247138497, "learning_rate": 1.4630353825931e-06, "loss": 0.0004, "step": 235640 }, { "epoch": 1.5502983493746834, "grad_norm": 0.051659724463456916, "learning_rate": 1.4626296124919053e-06, "loss": 0.0006, "step": 235650 }, { "epoch": 1.5503641375499169, "grad_norm": 0.000654235530520389, "learning_rate": 1.462223889027884e-06, "loss": 0.0005, "step": 235660 }, { "epoch": 1.5504299257251501, "grad_norm": 0.036440914802249015, "learning_rate": 1.4618182122063856e-06, "loss": 0.0007, "step": 235670 }, { "epoch": 1.5504957139003834, "grad_norm": 0.01089130995465788, "learning_rate": 1.461412582032758e-06, "loss": 0.0006, "step": 235680 }, { "epoch": 1.550561502075617, "grad_norm": 0.018894400625197018, "learning_rate": 1.4610069985123493e-06, "loss": 0.0005, "step": 235690 }, { "epoch": 1.5506272902508504, "grad_norm": 0.06764565779281048, "learning_rate": 1.460601461650507e-06, "loss": 0.0006, "step": 235700 }, { "epoch": 1.5506930784260837, "grad_norm": 0.002927121167085137, "learning_rate": 1.4601959714525777e-06, "loss": 0.0002, "step": 235710 }, { "epoch": 1.550758866601317, "grad_norm": 0.0004416852254642352, "learning_rate": 1.4597905279239071e-06, "loss": 0.0004, "step": 235720 }, { "epoch": 1.5508246547765505, "grad_norm": 0.033703057346998244, "learning_rate": 1.4593851310698414e-06, "loss": 0.0002, "step": 235730 }, { "epoch": 1.5508904429517838, "grad_norm": 0.011911629454873373, "learning_rate": 1.4589797808957245e-06, "loss": 0.0005, "step": 235740 }, { "epoch": 1.5509562311270173, "grad_norm": 0.04212811323045117, "learning_rate": 1.458574477406901e-06, "loss": 0.0007, "step": 235750 }, { "epoch": 1.5510220193022506, "grad_norm": 0.011421011952932337, "learning_rate": 1.4581692206087145e-06, "loss": 0.0005, "step": 235760 }, { "epoch": 1.5510878074774839, "grad_norm": 0.030945575596981342, "learning_rate": 1.4577640105065062e-06, "loss": 0.0006, "step": 235770 }, { "epoch": 1.5511535956527174, "grad_norm": 0.022654149148024234, "learning_rate": 1.4573588471056215e-06, "loss": 0.0008, "step": 235780 }, { "epoch": 1.5512193838279509, "grad_norm": 0.02006950436292903, "learning_rate": 1.4569537304114012e-06, "loss": 0.0007, "step": 235790 }, { "epoch": 1.5512851720031842, "grad_norm": 0.009707363741841854, "learning_rate": 1.4565486604291857e-06, "loss": 0.0005, "step": 235800 }, { "epoch": 1.5513509601784174, "grad_norm": 0.043342881098701075, "learning_rate": 1.4561436371643163e-06, "loss": 0.0007, "step": 235810 }, { "epoch": 1.551416748353651, "grad_norm": 0.05107141027027926, "learning_rate": 1.4557386606221319e-06, "loss": 0.0007, "step": 235820 }, { "epoch": 1.5514825365288845, "grad_norm": 0.06985812718170883, "learning_rate": 1.4553337308079718e-06, "loss": 0.0008, "step": 235830 }, { "epoch": 1.5515483247041177, "grad_norm": 0.015039277568042521, "learning_rate": 1.454928847727176e-06, "loss": 0.0004, "step": 235840 }, { "epoch": 1.551614112879351, "grad_norm": 0.05859546754681678, "learning_rate": 1.454524011385081e-06, "loss": 0.0002, "step": 235850 }, { "epoch": 1.5516799010545843, "grad_norm": 0.007969679505424957, "learning_rate": 1.4541192217870255e-06, "loss": 0.0007, "step": 235860 }, { "epoch": 1.5517456892298178, "grad_norm": 0.0067700746624386465, "learning_rate": 1.453714478938345e-06, "loss": 0.0003, "step": 235870 }, { "epoch": 1.5518114774050513, "grad_norm": 0.03691228618795284, "learning_rate": 1.4533097828443765e-06, "loss": 0.0011, "step": 235880 }, { "epoch": 1.5518772655802846, "grad_norm": 0.00954674570725809, "learning_rate": 1.4529051335104555e-06, "loss": 0.0002, "step": 235890 }, { "epoch": 1.5519430537555179, "grad_norm": 0.027757467176364934, "learning_rate": 1.452500530941915e-06, "loss": 0.0025, "step": 235900 }, { "epoch": 1.5520088419307514, "grad_norm": 0.017844873791539498, "learning_rate": 1.4520959751440927e-06, "loss": 0.0006, "step": 235910 }, { "epoch": 1.552074630105985, "grad_norm": 0.001273135013116179, "learning_rate": 1.4516914661223213e-06, "loss": 0.0003, "step": 235920 }, { "epoch": 1.5521404182812182, "grad_norm": 0.07316786082141644, "learning_rate": 1.4512870038819332e-06, "loss": 0.0009, "step": 235930 }, { "epoch": 1.5522062064564515, "grad_norm": 0.017394992667983524, "learning_rate": 1.4508825884282606e-06, "loss": 0.0004, "step": 235940 }, { "epoch": 1.5522719946316847, "grad_norm": 0.059512099971773876, "learning_rate": 1.4504782197666368e-06, "loss": 0.0006, "step": 235950 }, { "epoch": 1.5523377828069183, "grad_norm": 0.026327439451635546, "learning_rate": 1.4500738979023916e-06, "loss": 0.0002, "step": 235960 }, { "epoch": 1.5524035709821518, "grad_norm": 0.08642531726123438, "learning_rate": 1.4496696228408557e-06, "loss": 0.0005, "step": 235970 }, { "epoch": 1.552469359157385, "grad_norm": 0.10922968488936906, "learning_rate": 1.4492653945873603e-06, "loss": 0.0007, "step": 235980 }, { "epoch": 1.5525351473326183, "grad_norm": 0.027830143875042217, "learning_rate": 1.4488612131472334e-06, "loss": 0.0008, "step": 235990 }, { "epoch": 1.5526009355078518, "grad_norm": 0.09428643612897195, "learning_rate": 1.4484570785258052e-06, "loss": 0.0006, "step": 236000 }, { "epoch": 1.5526667236830853, "grad_norm": 0.2399727303770749, "learning_rate": 1.4480529907284025e-06, "loss": 0.0004, "step": 236010 }, { "epoch": 1.5527325118583186, "grad_norm": 0.013428355457446828, "learning_rate": 1.4476489497603534e-06, "loss": 0.0005, "step": 236020 }, { "epoch": 1.552798300033552, "grad_norm": 0.017018120450633566, "learning_rate": 1.4472449556269835e-06, "loss": 0.0008, "step": 236030 }, { "epoch": 1.5528640882087852, "grad_norm": 0.01590576034589605, "learning_rate": 1.446841008333622e-06, "loss": 0.0003, "step": 236040 }, { "epoch": 1.5529298763840187, "grad_norm": 0.03996425196302472, "learning_rate": 1.4464371078855931e-06, "loss": 0.0004, "step": 236050 }, { "epoch": 1.5529956645592522, "grad_norm": 0.012618710184004745, "learning_rate": 1.4460332542882222e-06, "loss": 0.0004, "step": 236060 }, { "epoch": 1.5530614527344855, "grad_norm": 0.02818400060933517, "learning_rate": 1.4456294475468336e-06, "loss": 0.0007, "step": 236070 }, { "epoch": 1.5531272409097188, "grad_norm": 0.0006994804264071893, "learning_rate": 1.4452256876667503e-06, "loss": 0.0006, "step": 236080 }, { "epoch": 1.5531930290849523, "grad_norm": 0.00760436363413618, "learning_rate": 1.4448219746532966e-06, "loss": 0.0005, "step": 236090 }, { "epoch": 1.5532588172601858, "grad_norm": 0.0650534535538934, "learning_rate": 1.4444183085117947e-06, "loss": 0.0009, "step": 236100 }, { "epoch": 1.553324605435419, "grad_norm": 0.05048959022058215, "learning_rate": 1.4440146892475664e-06, "loss": 0.0004, "step": 236110 }, { "epoch": 1.5533903936106523, "grad_norm": 0.019491396759046226, "learning_rate": 1.4436111168659333e-06, "loss": 0.0009, "step": 236120 }, { "epoch": 1.5534561817858858, "grad_norm": 0.01913998402301293, "learning_rate": 1.443207591372216e-06, "loss": 0.0004, "step": 236130 }, { "epoch": 1.5535219699611194, "grad_norm": 0.09864238622566313, "learning_rate": 1.4428041127717352e-06, "loss": 0.0005, "step": 236140 }, { "epoch": 1.5535877581363526, "grad_norm": 0.006766581227849195, "learning_rate": 1.44240068106981e-06, "loss": 0.0005, "step": 236150 }, { "epoch": 1.553653546311586, "grad_norm": 0.023501355649735177, "learning_rate": 1.441997296271757e-06, "loss": 0.0009, "step": 236160 }, { "epoch": 1.5537193344868192, "grad_norm": 0.0007511965106743099, "learning_rate": 1.4415939583828991e-06, "loss": 0.0003, "step": 236170 }, { "epoch": 1.5537851226620527, "grad_norm": 0.006635692176321964, "learning_rate": 1.4411906674085513e-06, "loss": 0.0007, "step": 236180 }, { "epoch": 1.5538509108372862, "grad_norm": 0.01629091513609725, "learning_rate": 1.4407874233540308e-06, "loss": 0.0002, "step": 236190 }, { "epoch": 1.5539166990125195, "grad_norm": 0.007309515174455738, "learning_rate": 1.4403842262246543e-06, "loss": 0.0005, "step": 236200 }, { "epoch": 1.5539824871877528, "grad_norm": 0.09024381796635651, "learning_rate": 1.4399810760257377e-06, "loss": 0.0021, "step": 236210 }, { "epoch": 1.5540482753629863, "grad_norm": 0.04983731390105024, "learning_rate": 1.4395779727625957e-06, "loss": 0.0005, "step": 236220 }, { "epoch": 1.5541140635382198, "grad_norm": 0.007940333394527134, "learning_rate": 1.4391749164405433e-06, "loss": 0.0008, "step": 236230 }, { "epoch": 1.554179851713453, "grad_norm": 0.1798017498464358, "learning_rate": 1.4387719070648943e-06, "loss": 0.0008, "step": 236240 }, { "epoch": 1.5542456398886864, "grad_norm": 0.04175965495288803, "learning_rate": 1.4383689446409622e-06, "loss": 0.0003, "step": 236250 }, { "epoch": 1.5543114280639196, "grad_norm": 0.014057554324380743, "learning_rate": 1.4379660291740589e-06, "loss": 0.0006, "step": 236260 }, { "epoch": 1.5543772162391531, "grad_norm": 0.04090849205204622, "learning_rate": 1.4375631606694978e-06, "loss": 0.0005, "step": 236270 }, { "epoch": 1.5544430044143867, "grad_norm": 0.0014526131175281569, "learning_rate": 1.4371603391325895e-06, "loss": 0.0009, "step": 236280 }, { "epoch": 1.55450879258962, "grad_norm": 0.0233920325304589, "learning_rate": 1.4367575645686432e-06, "loss": 0.0003, "step": 236290 }, { "epoch": 1.5545745807648532, "grad_norm": 0.04406903876857352, "learning_rate": 1.4363548369829728e-06, "loss": 0.0002, "step": 236300 }, { "epoch": 1.5546403689400867, "grad_norm": 0.00018421897788960953, "learning_rate": 1.4359521563808866e-06, "loss": 0.0004, "step": 236310 }, { "epoch": 1.5547061571153202, "grad_norm": 0.040669601850736065, "learning_rate": 1.4355495227676924e-06, "loss": 0.0005, "step": 236320 }, { "epoch": 1.5547719452905535, "grad_norm": 0.038594588906515966, "learning_rate": 1.4351469361486992e-06, "loss": 0.0004, "step": 236330 }, { "epoch": 1.5548377334657868, "grad_norm": 0.03353047292555022, "learning_rate": 1.434744396529215e-06, "loss": 0.0003, "step": 236340 }, { "epoch": 1.55490352164102, "grad_norm": 0.005582743361826149, "learning_rate": 1.4343419039145472e-06, "loss": 0.001, "step": 236350 }, { "epoch": 1.5549693098162536, "grad_norm": 0.04486216591820709, "learning_rate": 1.433939458310002e-06, "loss": 0.0007, "step": 236360 }, { "epoch": 1.555035097991487, "grad_norm": 0.05305335323638143, "learning_rate": 1.4335370597208847e-06, "loss": 0.0006, "step": 236370 }, { "epoch": 1.5551008861667204, "grad_norm": 0.01864211963036977, "learning_rate": 1.433134708152501e-06, "loss": 0.0007, "step": 236380 }, { "epoch": 1.5551666743419537, "grad_norm": 0.04365720262668508, "learning_rate": 1.432732403610156e-06, "loss": 0.0002, "step": 236390 }, { "epoch": 1.5552324625171872, "grad_norm": 0.014486623667011993, "learning_rate": 1.432330146099153e-06, "loss": 0.0009, "step": 236400 }, { "epoch": 1.5552982506924207, "grad_norm": 0.004412819325438113, "learning_rate": 1.431927935624796e-06, "loss": 0.0006, "step": 236410 }, { "epoch": 1.555364038867654, "grad_norm": 0.031131191254356108, "learning_rate": 1.4315257721923858e-06, "loss": 0.0006, "step": 236420 }, { "epoch": 1.5554298270428872, "grad_norm": 0.004026699179310416, "learning_rate": 1.4311236558072284e-06, "loss": 0.0002, "step": 236430 }, { "epoch": 1.5554956152181207, "grad_norm": 0.0153336499333427, "learning_rate": 1.4307215864746227e-06, "loss": 0.0004, "step": 236440 }, { "epoch": 1.555561403393354, "grad_norm": 0.0696258542405933, "learning_rate": 1.4303195641998707e-06, "loss": 0.0007, "step": 236450 }, { "epoch": 1.5556271915685875, "grad_norm": 0.004481647683661877, "learning_rate": 1.4299175889882722e-06, "loss": 0.0003, "step": 236460 }, { "epoch": 1.5556929797438208, "grad_norm": 0.046986287829517644, "learning_rate": 1.4295156608451266e-06, "loss": 0.0004, "step": 236470 }, { "epoch": 1.555758767919054, "grad_norm": 0.00995577028938005, "learning_rate": 1.4291137797757336e-06, "loss": 0.0005, "step": 236480 }, { "epoch": 1.5558245560942876, "grad_norm": 0.0865641900730033, "learning_rate": 1.4287119457853916e-06, "loss": 0.0006, "step": 236490 }, { "epoch": 1.555890344269521, "grad_norm": 0.024590581509195134, "learning_rate": 1.428310158879398e-06, "loss": 0.0003, "step": 236500 }, { "epoch": 1.5559561324447544, "grad_norm": 0.0066708616929025836, "learning_rate": 1.4279084190630504e-06, "loss": 0.0005, "step": 236510 }, { "epoch": 1.5560219206199877, "grad_norm": 0.06692041636770452, "learning_rate": 1.4275067263416447e-06, "loss": 0.0006, "step": 236520 }, { "epoch": 1.5560877087952212, "grad_norm": 0.04654290154979366, "learning_rate": 1.4271050807204785e-06, "loss": 0.0005, "step": 236530 }, { "epoch": 1.5561534969704547, "grad_norm": 0.018866970126951148, "learning_rate": 1.4267034822048447e-06, "loss": 0.0003, "step": 236540 }, { "epoch": 1.556219285145688, "grad_norm": 0.012045316377318862, "learning_rate": 1.4263019308000387e-06, "loss": 0.0002, "step": 236550 }, { "epoch": 1.5562850733209213, "grad_norm": 0.048852835223509666, "learning_rate": 1.4259004265113568e-06, "loss": 0.0013, "step": 236560 }, { "epoch": 1.5563508614961545, "grad_norm": 0.016432678780199734, "learning_rate": 1.4254989693440907e-06, "loss": 0.0004, "step": 236570 }, { "epoch": 1.556416649671388, "grad_norm": 0.059091678797243405, "learning_rate": 1.4250975593035337e-06, "loss": 0.0021, "step": 236580 }, { "epoch": 1.5564824378466215, "grad_norm": 0.0038210879837209237, "learning_rate": 1.4246961963949779e-06, "loss": 0.0005, "step": 236590 }, { "epoch": 1.5565482260218548, "grad_norm": 0.01865390580086126, "learning_rate": 1.424294880623715e-06, "loss": 0.0001, "step": 236600 }, { "epoch": 1.5566140141970881, "grad_norm": 0.09668750732730165, "learning_rate": 1.423893611995036e-06, "loss": 0.0004, "step": 236610 }, { "epoch": 1.5566798023723216, "grad_norm": 0.004538388829693143, "learning_rate": 1.423492390514231e-06, "loss": 0.0004, "step": 236620 }, { "epoch": 1.5567455905475551, "grad_norm": 0.08469973694393576, "learning_rate": 1.42309121618659e-06, "loss": 0.0003, "step": 236630 }, { "epoch": 1.5568113787227884, "grad_norm": 0.006017949115178563, "learning_rate": 1.4226900890174022e-06, "loss": 0.0004, "step": 236640 }, { "epoch": 1.5568771668980217, "grad_norm": 0.01850057219699676, "learning_rate": 1.4222890090119556e-06, "loss": 0.0007, "step": 236650 }, { "epoch": 1.556942955073255, "grad_norm": 0.047535477849686515, "learning_rate": 1.4218879761755388e-06, "loss": 0.0009, "step": 236660 }, { "epoch": 1.5570087432484885, "grad_norm": 0.044065848130614385, "learning_rate": 1.4214869905134386e-06, "loss": 0.0005, "step": 236670 }, { "epoch": 1.557074531423722, "grad_norm": 0.00932156725416804, "learning_rate": 1.4210860520309405e-06, "loss": 0.0006, "step": 236680 }, { "epoch": 1.5571403195989553, "grad_norm": 0.0010104765914098056, "learning_rate": 1.4206851607333333e-06, "loss": 0.0003, "step": 236690 }, { "epoch": 1.5572061077741886, "grad_norm": 0.011886741876020214, "learning_rate": 1.420284316625901e-06, "loss": 0.0004, "step": 236700 }, { "epoch": 1.557271895949422, "grad_norm": 0.0156578994203434, "learning_rate": 1.4198835197139287e-06, "loss": 0.0009, "step": 236710 }, { "epoch": 1.5573376841246556, "grad_norm": 0.0033216578078745537, "learning_rate": 1.4194827700026998e-06, "loss": 0.0004, "step": 236720 }, { "epoch": 1.5574034722998888, "grad_norm": 0.0024086899117687875, "learning_rate": 1.4190820674974982e-06, "loss": 0.0003, "step": 236730 }, { "epoch": 1.5574692604751221, "grad_norm": 0.01303545484637134, "learning_rate": 1.4186814122036068e-06, "loss": 0.0003, "step": 236740 }, { "epoch": 1.5575350486503556, "grad_norm": 0.053391684671133706, "learning_rate": 1.418280804126308e-06, "loss": 0.0003, "step": 236750 }, { "epoch": 1.557600836825589, "grad_norm": 0.0006332101353890604, "learning_rate": 1.4178802432708837e-06, "loss": 0.0004, "step": 236760 }, { "epoch": 1.5576666250008224, "grad_norm": 0.02438841669748772, "learning_rate": 1.4174797296426145e-06, "loss": 0.0007, "step": 236770 }, { "epoch": 1.5577324131760557, "grad_norm": 0.04323589116093464, "learning_rate": 1.4170792632467812e-06, "loss": 0.0006, "step": 236780 }, { "epoch": 1.557798201351289, "grad_norm": 0.011811541077426237, "learning_rate": 1.416678844088663e-06, "loss": 0.0006, "step": 236790 }, { "epoch": 1.5578639895265225, "grad_norm": 0.01673336786317357, "learning_rate": 1.4162784721735396e-06, "loss": 0.0003, "step": 236800 }, { "epoch": 1.557929777701756, "grad_norm": 0.027522496120050623, "learning_rate": 1.415878147506688e-06, "loss": 0.0004, "step": 236810 }, { "epoch": 1.5579955658769893, "grad_norm": 0.006045477467460545, "learning_rate": 1.415477870093389e-06, "loss": 0.0004, "step": 236820 }, { "epoch": 1.5580613540522226, "grad_norm": 0.017101022722515628, "learning_rate": 1.4150776399389188e-06, "loss": 0.0004, "step": 236830 }, { "epoch": 1.558127142227456, "grad_norm": 0.02107608138070197, "learning_rate": 1.4146774570485533e-06, "loss": 0.0003, "step": 236840 }, { "epoch": 1.5581929304026896, "grad_norm": 0.017316232444961374, "learning_rate": 1.414277321427569e-06, "loss": 0.0004, "step": 236850 }, { "epoch": 1.5582587185779229, "grad_norm": 0.05169989559966424, "learning_rate": 1.4138772330812417e-06, "loss": 0.0004, "step": 236860 }, { "epoch": 1.5583245067531561, "grad_norm": 0.03566629323719705, "learning_rate": 1.4134771920148461e-06, "loss": 0.0007, "step": 236870 }, { "epoch": 1.5583902949283894, "grad_norm": 0.037606143961354976, "learning_rate": 1.413077198233656e-06, "loss": 0.0007, "step": 236880 }, { "epoch": 1.558456083103623, "grad_norm": 0.050143871122546264, "learning_rate": 1.4126772517429449e-06, "loss": 0.0008, "step": 236890 }, { "epoch": 1.5585218712788564, "grad_norm": 0.013463361582317986, "learning_rate": 1.4122773525479865e-06, "loss": 0.0012, "step": 236900 }, { "epoch": 1.5585876594540897, "grad_norm": 0.026145271643947796, "learning_rate": 1.4118775006540525e-06, "loss": 0.0006, "step": 236910 }, { "epoch": 1.558653447629323, "grad_norm": 0.0215507671143589, "learning_rate": 1.4114776960664145e-06, "loss": 0.0004, "step": 236920 }, { "epoch": 1.5587192358045565, "grad_norm": 0.05826140949575887, "learning_rate": 1.411077938790344e-06, "loss": 0.0006, "step": 236930 }, { "epoch": 1.55878502397979, "grad_norm": 0.05058119409392345, "learning_rate": 1.41067822883111e-06, "loss": 0.0006, "step": 236940 }, { "epoch": 1.5588508121550233, "grad_norm": 0.019219420844627258, "learning_rate": 1.4102785661939843e-06, "loss": 0.0006, "step": 236950 }, { "epoch": 1.5589166003302566, "grad_norm": 0.025442193185336175, "learning_rate": 1.4098789508842365e-06, "loss": 0.0005, "step": 236960 }, { "epoch": 1.5589823885054899, "grad_norm": 0.044535072888764, "learning_rate": 1.4094793829071335e-06, "loss": 0.0013, "step": 236970 }, { "epoch": 1.5590481766807234, "grad_norm": 0.0027000185778162473, "learning_rate": 1.4090798622679436e-06, "loss": 0.0004, "step": 236980 }, { "epoch": 1.5591139648559569, "grad_norm": 0.005560366899904598, "learning_rate": 1.4086803889719347e-06, "loss": 0.0041, "step": 236990 }, { "epoch": 1.5591797530311902, "grad_norm": 0.010894080248917378, "learning_rate": 1.4082809630243732e-06, "loss": 0.0006, "step": 237000 }, { "epoch": 1.5592455412064234, "grad_norm": 0.028096907108805472, "learning_rate": 1.4078815844305255e-06, "loss": 0.0003, "step": 237010 }, { "epoch": 1.559311329381657, "grad_norm": 0.07058049393768537, "learning_rate": 1.4074822531956567e-06, "loss": 0.0004, "step": 237020 }, { "epoch": 1.5593771175568905, "grad_norm": 6.710590457441479e-05, "learning_rate": 1.4070829693250315e-06, "loss": 0.0007, "step": 237030 }, { "epoch": 1.5594429057321237, "grad_norm": 0.06080596300848937, "learning_rate": 1.4066837328239141e-06, "loss": 0.0005, "step": 237040 }, { "epoch": 1.559508693907357, "grad_norm": 0.011298114390454958, "learning_rate": 1.4062845436975686e-06, "loss": 0.0003, "step": 237050 }, { "epoch": 1.5595744820825905, "grad_norm": 0.009119087909489319, "learning_rate": 1.4058854019512574e-06, "loss": 0.0003, "step": 237060 }, { "epoch": 1.5596402702578238, "grad_norm": 0.0032643223795519364, "learning_rate": 1.405486307590242e-06, "loss": 0.0007, "step": 237070 }, { "epoch": 1.5597060584330573, "grad_norm": 0.04420254755667997, "learning_rate": 1.4050872606197863e-06, "loss": 0.0004, "step": 237080 }, { "epoch": 1.5597718466082906, "grad_norm": 0.00024909801900307955, "learning_rate": 1.4046882610451502e-06, "loss": 0.0005, "step": 237090 }, { "epoch": 1.5598376347835239, "grad_norm": 0.020162226070922305, "learning_rate": 1.4042893088715948e-06, "loss": 0.0004, "step": 237100 }, { "epoch": 1.5599034229587574, "grad_norm": 0.04140021684978049, "learning_rate": 1.4038904041043787e-06, "loss": 0.0004, "step": 237110 }, { "epoch": 1.559969211133991, "grad_norm": 6.222560052469206e-05, "learning_rate": 1.4034915467487625e-06, "loss": 0.0005, "step": 237120 }, { "epoch": 1.5600349993092242, "grad_norm": 0.014703594247268706, "learning_rate": 1.403092736810004e-06, "loss": 0.0028, "step": 237130 }, { "epoch": 1.5601007874844575, "grad_norm": 0.06272346593163848, "learning_rate": 1.402693974293361e-06, "loss": 0.0005, "step": 237140 }, { "epoch": 1.560166575659691, "grad_norm": 0.03301040593430853, "learning_rate": 1.402295259204091e-06, "loss": 0.0008, "step": 237150 }, { "epoch": 1.5602323638349245, "grad_norm": 0.027646235154085565, "learning_rate": 1.4018965915474515e-06, "loss": 0.0004, "step": 237160 }, { "epoch": 1.5602981520101578, "grad_norm": 0.03369001444085605, "learning_rate": 1.4014979713286968e-06, "loss": 0.0003, "step": 237170 }, { "epoch": 1.560363940185391, "grad_norm": 0.014681477059758889, "learning_rate": 1.4010993985530847e-06, "loss": 0.0004, "step": 237180 }, { "epoch": 1.5604297283606243, "grad_norm": 0.0031198837953566212, "learning_rate": 1.4007008732258682e-06, "loss": 0.0002, "step": 237190 }, { "epoch": 1.5604955165358578, "grad_norm": 0.02150155176044105, "learning_rate": 1.400302395352302e-06, "loss": 0.0005, "step": 237200 }, { "epoch": 1.5605613047110913, "grad_norm": 0.004971145230832677, "learning_rate": 1.39990396493764e-06, "loss": 0.0003, "step": 237210 }, { "epoch": 1.5606270928863246, "grad_norm": 0.041413203429062635, "learning_rate": 1.399505581987135e-06, "loss": 0.0004, "step": 237220 }, { "epoch": 1.560692881061558, "grad_norm": 0.014892678146433337, "learning_rate": 1.399107246506039e-06, "loss": 0.0005, "step": 237230 }, { "epoch": 1.5607586692367914, "grad_norm": 0.03407318730695905, "learning_rate": 1.3987089584996043e-06, "loss": 0.0005, "step": 237240 }, { "epoch": 1.560824457412025, "grad_norm": 0.023330901159787983, "learning_rate": 1.398310717973082e-06, "loss": 0.0035, "step": 237250 }, { "epoch": 1.5608902455872582, "grad_norm": 0.003161329988251628, "learning_rate": 1.3979125249317222e-06, "loss": 0.0006, "step": 237260 }, { "epoch": 1.5609560337624915, "grad_norm": 0.06980367176244122, "learning_rate": 1.3975143793807745e-06, "loss": 0.0006, "step": 237270 }, { "epoch": 1.5610218219377248, "grad_norm": 0.003944688470288568, "learning_rate": 1.3971162813254879e-06, "loss": 0.0004, "step": 237280 }, { "epoch": 1.5610876101129583, "grad_norm": 0.03886823900825042, "learning_rate": 1.3967182307711125e-06, "loss": 0.0004, "step": 237290 }, { "epoch": 1.5611533982881918, "grad_norm": 0.016443857941199586, "learning_rate": 1.3963202277228932e-06, "loss": 0.0006, "step": 237300 }, { "epoch": 1.561219186463425, "grad_norm": 0.023652439650367535, "learning_rate": 1.395922272186081e-06, "loss": 0.0006, "step": 237310 }, { "epoch": 1.5612849746386583, "grad_norm": 0.016500540978500296, "learning_rate": 1.3955243641659205e-06, "loss": 0.0005, "step": 237320 }, { "epoch": 1.5613507628138918, "grad_norm": 0.03137622949562247, "learning_rate": 1.3951265036676586e-06, "loss": 0.0004, "step": 237330 }, { "epoch": 1.5614165509891254, "grad_norm": 0.01743844831374416, "learning_rate": 1.3947286906965407e-06, "loss": 0.0006, "step": 237340 }, { "epoch": 1.5614823391643586, "grad_norm": 0.003737346854625407, "learning_rate": 1.3943309252578108e-06, "loss": 0.0003, "step": 237350 }, { "epoch": 1.561548127339592, "grad_norm": 0.010949189016034927, "learning_rate": 1.3939332073567135e-06, "loss": 0.0006, "step": 237360 }, { "epoch": 1.5616139155148252, "grad_norm": 0.009854378166743191, "learning_rate": 1.3935355369984933e-06, "loss": 0.0011, "step": 237370 }, { "epoch": 1.5616797036900587, "grad_norm": 0.001401323872274174, "learning_rate": 1.3931379141883917e-06, "loss": 0.0005, "step": 237380 }, { "epoch": 1.5617454918652922, "grad_norm": 0.018666738692126355, "learning_rate": 1.3927403389316514e-06, "loss": 0.0005, "step": 237390 }, { "epoch": 1.5618112800405255, "grad_norm": 0.027520491000611703, "learning_rate": 1.3923428112335147e-06, "loss": 0.0016, "step": 237400 }, { "epoch": 1.5618770682157588, "grad_norm": 0.028714128367992045, "learning_rate": 1.391945331099222e-06, "loss": 0.0006, "step": 237410 }, { "epoch": 1.5619428563909923, "grad_norm": 0.02172576984864143, "learning_rate": 1.391547898534014e-06, "loss": 0.002, "step": 237420 }, { "epoch": 1.5620086445662258, "grad_norm": 0.19473083534115712, "learning_rate": 1.391150513543129e-06, "loss": 0.0006, "step": 237430 }, { "epoch": 1.562074432741459, "grad_norm": 0.004232122526774243, "learning_rate": 1.3907531761318094e-06, "loss": 0.0005, "step": 237440 }, { "epoch": 1.5621402209166924, "grad_norm": 0.00025706361517822057, "learning_rate": 1.3903558863052918e-06, "loss": 0.0006, "step": 237450 }, { "epoch": 1.5622060090919259, "grad_norm": 0.013549272579744892, "learning_rate": 1.3899586440688146e-06, "loss": 0.0003, "step": 237460 }, { "epoch": 1.5622717972671591, "grad_norm": 0.014735648169082849, "learning_rate": 1.3895614494276144e-06, "loss": 0.0005, "step": 237470 }, { "epoch": 1.5623375854423927, "grad_norm": 0.01121355686123378, "learning_rate": 1.3891643023869283e-06, "loss": 0.001, "step": 237480 }, { "epoch": 1.562403373617626, "grad_norm": 0.026382443293913115, "learning_rate": 1.3887672029519928e-06, "loss": 0.0006, "step": 237490 }, { "epoch": 1.5624691617928592, "grad_norm": 0.013364003097831834, "learning_rate": 1.388370151128043e-06, "loss": 0.0003, "step": 237500 }, { "epoch": 1.5625349499680927, "grad_norm": 0.014417257135994491, "learning_rate": 1.3879731469203128e-06, "loss": 0.0003, "step": 237510 }, { "epoch": 1.5626007381433262, "grad_norm": 0.05957733857820321, "learning_rate": 1.387576190334038e-06, "loss": 0.0006, "step": 237520 }, { "epoch": 1.5626665263185595, "grad_norm": 0.012976593266001084, "learning_rate": 1.3871792813744506e-06, "loss": 0.0003, "step": 237530 }, { "epoch": 1.5627323144937928, "grad_norm": 0.04528501399529673, "learning_rate": 1.3867824200467844e-06, "loss": 0.0006, "step": 237540 }, { "epoch": 1.5627981026690263, "grad_norm": 5.0863602628788116e-05, "learning_rate": 1.3863856063562714e-06, "loss": 0.0005, "step": 237550 }, { "epoch": 1.5628638908442598, "grad_norm": 0.03525539260364645, "learning_rate": 1.3859888403081412e-06, "loss": 0.0006, "step": 237560 }, { "epoch": 1.562929679019493, "grad_norm": 0.05691231745121524, "learning_rate": 1.385592121907629e-06, "loss": 0.0005, "step": 237570 }, { "epoch": 1.5629954671947264, "grad_norm": 0.0741953518773888, "learning_rate": 1.385195451159963e-06, "loss": 0.0006, "step": 237580 }, { "epoch": 1.5630612553699597, "grad_norm": 0.12334591396487704, "learning_rate": 1.3847988280703728e-06, "loss": 0.0009, "step": 237590 }, { "epoch": 1.5631270435451932, "grad_norm": 0.026965933207370077, "learning_rate": 1.384402252644088e-06, "loss": 0.001, "step": 237600 }, { "epoch": 1.5631928317204267, "grad_norm": 0.010268215752112583, "learning_rate": 1.384005724886337e-06, "loss": 0.0008, "step": 237610 }, { "epoch": 1.56325861989566, "grad_norm": 0.008125717374191455, "learning_rate": 1.3836092448023476e-06, "loss": 0.0003, "step": 237620 }, { "epoch": 1.5633244080708932, "grad_norm": 0.018655485394809723, "learning_rate": 1.3832128123973466e-06, "loss": 0.0003, "step": 237630 }, { "epoch": 1.5633901962461267, "grad_norm": 0.01744105577795333, "learning_rate": 1.382816427676561e-06, "loss": 0.0009, "step": 237640 }, { "epoch": 1.5634559844213602, "grad_norm": 0.0060606599222083, "learning_rate": 1.3824200906452167e-06, "loss": 0.0006, "step": 237650 }, { "epoch": 1.5635217725965935, "grad_norm": 0.011774475672262004, "learning_rate": 1.3820238013085397e-06, "loss": 0.0004, "step": 237660 }, { "epoch": 1.5635875607718268, "grad_norm": 0.02297956363134114, "learning_rate": 1.3816275596717538e-06, "loss": 0.0009, "step": 237670 }, { "epoch": 1.56365334894706, "grad_norm": 0.026608225512160463, "learning_rate": 1.3812313657400834e-06, "loss": 0.0001, "step": 237680 }, { "epoch": 1.5637191371222936, "grad_norm": 0.036993704305319376, "learning_rate": 1.3808352195187509e-06, "loss": 0.0005, "step": 237690 }, { "epoch": 1.563784925297527, "grad_norm": 0.0014855843607289698, "learning_rate": 1.3804391210129813e-06, "loss": 0.0003, "step": 237700 }, { "epoch": 1.5638507134727604, "grad_norm": 0.04213472685836964, "learning_rate": 1.380043070227996e-06, "loss": 0.0004, "step": 237710 }, { "epoch": 1.5639165016479937, "grad_norm": 0.03520486769059758, "learning_rate": 1.379647067169017e-06, "loss": 0.0005, "step": 237720 }, { "epoch": 1.5639822898232272, "grad_norm": 0.01921507513197921, "learning_rate": 1.3792511118412643e-06, "loss": 0.0005, "step": 237730 }, { "epoch": 1.5640480779984607, "grad_norm": 0.005174485850769864, "learning_rate": 1.3788552042499588e-06, "loss": 0.0003, "step": 237740 }, { "epoch": 1.564113866173694, "grad_norm": 0.02047625425169443, "learning_rate": 1.3784593444003191e-06, "loss": 0.0003, "step": 237750 }, { "epoch": 1.5641796543489273, "grad_norm": 0.05372731666745783, "learning_rate": 1.3780635322975661e-06, "loss": 0.0006, "step": 237760 }, { "epoch": 1.5642454425241608, "grad_norm": 0.02865916480679003, "learning_rate": 1.377667767946917e-06, "loss": 0.0004, "step": 237770 }, { "epoch": 1.564311230699394, "grad_norm": 0.005043455780191643, "learning_rate": 1.3772720513535904e-06, "loss": 0.0007, "step": 237780 }, { "epoch": 1.5643770188746275, "grad_norm": 0.06148029431645193, "learning_rate": 1.376876382522802e-06, "loss": 0.0006, "step": 237790 }, { "epoch": 1.5644428070498608, "grad_norm": 0.0003192509884677732, "learning_rate": 1.3764807614597702e-06, "loss": 0.0022, "step": 237800 }, { "epoch": 1.5645085952250941, "grad_norm": 0.06985446452318377, "learning_rate": 1.3760851881697101e-06, "loss": 0.0004, "step": 237810 }, { "epoch": 1.5645743834003276, "grad_norm": 0.008677289796564217, "learning_rate": 1.3756896626578352e-06, "loss": 0.0004, "step": 237820 }, { "epoch": 1.5646401715755611, "grad_norm": 0.0134096931649193, "learning_rate": 1.3752941849293633e-06, "loss": 0.001, "step": 237830 }, { "epoch": 1.5647059597507944, "grad_norm": 0.005283394678435163, "learning_rate": 1.3748987549895072e-06, "loss": 0.0005, "step": 237840 }, { "epoch": 1.5647717479260277, "grad_norm": 0.020616878626773364, "learning_rate": 1.3745033728434804e-06, "loss": 0.0006, "step": 237850 }, { "epoch": 1.5648375361012612, "grad_norm": 0.006416521138087206, "learning_rate": 1.374108038496495e-06, "loss": 0.0006, "step": 237860 }, { "epoch": 1.5649033242764947, "grad_norm": 0.026497609065803385, "learning_rate": 1.373712751953764e-06, "loss": 0.0005, "step": 237870 }, { "epoch": 1.564969112451728, "grad_norm": 0.031022919082678478, "learning_rate": 1.3733175132204985e-06, "loss": 0.0002, "step": 237880 }, { "epoch": 1.5650349006269613, "grad_norm": 0.07572103100647914, "learning_rate": 1.3729223223019095e-06, "loss": 0.0004, "step": 237890 }, { "epoch": 1.5651006888021946, "grad_norm": 0.028579830686747437, "learning_rate": 1.3725271792032064e-06, "loss": 0.0003, "step": 237900 }, { "epoch": 1.565166476977428, "grad_norm": 0.03426901395179399, "learning_rate": 1.3721320839296004e-06, "loss": 0.0004, "step": 237910 }, { "epoch": 1.5652322651526616, "grad_norm": 0.032268264468996596, "learning_rate": 1.3717370364862992e-06, "loss": 0.0005, "step": 237920 }, { "epoch": 1.5652980533278948, "grad_norm": 0.0003707758501831051, "learning_rate": 1.3713420368785114e-06, "loss": 0.0003, "step": 237930 }, { "epoch": 1.5653638415031281, "grad_norm": 0.05250739053611224, "learning_rate": 1.3709470851114453e-06, "loss": 0.0006, "step": 237940 }, { "epoch": 1.5654296296783616, "grad_norm": 0.034495425029206994, "learning_rate": 1.370552181190306e-06, "loss": 0.0004, "step": 237950 }, { "epoch": 1.5654954178535951, "grad_norm": 0.014799328739632873, "learning_rate": 1.3701573251203032e-06, "loss": 0.0003, "step": 237960 }, { "epoch": 1.5655612060288284, "grad_norm": 0.026495852217031515, "learning_rate": 1.369762516906641e-06, "loss": 0.0004, "step": 237970 }, { "epoch": 1.5656269942040617, "grad_norm": 0.055914664025082494, "learning_rate": 1.3693677565545248e-06, "loss": 0.0003, "step": 237980 }, { "epoch": 1.565692782379295, "grad_norm": 0.03304707885596382, "learning_rate": 1.368973044069159e-06, "loss": 0.0004, "step": 237990 }, { "epoch": 1.5657585705545285, "grad_norm": 0.0008343499282284342, "learning_rate": 1.3685783794557477e-06, "loss": 0.0004, "step": 238000 }, { "epoch": 1.565824358729762, "grad_norm": 0.0017178934579790523, "learning_rate": 1.3681837627194944e-06, "loss": 0.0008, "step": 238010 }, { "epoch": 1.5658901469049953, "grad_norm": 0.019343743005241526, "learning_rate": 1.3677891938656013e-06, "loss": 0.0003, "step": 238020 }, { "epoch": 1.5659559350802286, "grad_norm": 0.03577533854447036, "learning_rate": 1.3673946728992705e-06, "loss": 0.0008, "step": 238030 }, { "epoch": 1.566021723255462, "grad_norm": 0.0006286862031086729, "learning_rate": 1.3670001998257042e-06, "loss": 0.0005, "step": 238040 }, { "epoch": 1.5660875114306956, "grad_norm": 0.02654565182287899, "learning_rate": 1.3666057746501022e-06, "loss": 0.0003, "step": 238050 }, { "epoch": 1.5661532996059289, "grad_norm": 0.034142575443866376, "learning_rate": 1.3662113973776652e-06, "loss": 0.0003, "step": 238060 }, { "epoch": 1.5662190877811621, "grad_norm": 0.036832354331819986, "learning_rate": 1.3658170680135923e-06, "loss": 0.0004, "step": 238070 }, { "epoch": 1.5662848759563957, "grad_norm": 0.003325023645220255, "learning_rate": 1.3654227865630814e-06, "loss": 0.0006, "step": 238080 }, { "epoch": 1.566350664131629, "grad_norm": 0.011038960489881794, "learning_rate": 1.3650285530313334e-06, "loss": 0.0005, "step": 238090 }, { "epoch": 1.5664164523068624, "grad_norm": 0.017953590585172464, "learning_rate": 1.3646343674235445e-06, "loss": 0.0004, "step": 238100 }, { "epoch": 1.5664822404820957, "grad_norm": 0.4394202726448471, "learning_rate": 1.364240229744912e-06, "loss": 0.001, "step": 238110 }, { "epoch": 1.566548028657329, "grad_norm": 0.001113219220180265, "learning_rate": 1.363846140000632e-06, "loss": 0.0001, "step": 238120 }, { "epoch": 1.5666138168325625, "grad_norm": 0.025804693251796764, "learning_rate": 1.3634520981959003e-06, "loss": 0.0012, "step": 238130 }, { "epoch": 1.566679605007796, "grad_norm": 0.04285422990054837, "learning_rate": 1.363058104335912e-06, "loss": 0.0007, "step": 238140 }, { "epoch": 1.5667453931830293, "grad_norm": 0.022807211963450667, "learning_rate": 1.3626641584258616e-06, "loss": 0.0001, "step": 238150 }, { "epoch": 1.5668111813582626, "grad_norm": 0.020823566331028623, "learning_rate": 1.3622702604709427e-06, "loss": 0.0009, "step": 238160 }, { "epoch": 1.566876969533496, "grad_norm": 0.014341672175365094, "learning_rate": 1.3618764104763483e-06, "loss": 0.0005, "step": 238170 }, { "epoch": 1.5669427577087296, "grad_norm": 0.07358230547725746, "learning_rate": 1.3614826084472715e-06, "loss": 0.0005, "step": 238180 }, { "epoch": 1.5670085458839629, "grad_norm": 0.019153120294327072, "learning_rate": 1.3610888543889046e-06, "loss": 0.0014, "step": 238190 }, { "epoch": 1.5670743340591962, "grad_norm": 0.002722062756469548, "learning_rate": 1.3606951483064378e-06, "loss": 0.0004, "step": 238200 }, { "epoch": 1.5671401222344294, "grad_norm": 0.05617958778886667, "learning_rate": 1.360301490205061e-06, "loss": 0.0005, "step": 238210 }, { "epoch": 1.567205910409663, "grad_norm": 0.013972063975726038, "learning_rate": 1.3599078800899673e-06, "loss": 0.0003, "step": 238220 }, { "epoch": 1.5672716985848965, "grad_norm": 0.024212595715398348, "learning_rate": 1.3595143179663445e-06, "loss": 0.0003, "step": 238230 }, { "epoch": 1.5673374867601297, "grad_norm": 0.018609044954260735, "learning_rate": 1.359120803839381e-06, "loss": 0.0023, "step": 238240 }, { "epoch": 1.567403274935363, "grad_norm": 0.007773193484804777, "learning_rate": 1.3587273377142652e-06, "loss": 0.0006, "step": 238250 }, { "epoch": 1.5674690631105965, "grad_norm": 0.08629529665596983, "learning_rate": 1.3583339195961849e-06, "loss": 0.0005, "step": 238260 }, { "epoch": 1.56753485128583, "grad_norm": 0.032448855712728174, "learning_rate": 1.3579405494903264e-06, "loss": 0.0006, "step": 238270 }, { "epoch": 1.5676006394610633, "grad_norm": 0.04125538289955105, "learning_rate": 1.3575472274018763e-06, "loss": 0.0003, "step": 238280 }, { "epoch": 1.5676664276362966, "grad_norm": 0.03250460006183727, "learning_rate": 1.3571539533360206e-06, "loss": 0.0011, "step": 238290 }, { "epoch": 1.5677322158115299, "grad_norm": 0.0004919774319827052, "learning_rate": 1.3567607272979432e-06, "loss": 0.0004, "step": 238300 }, { "epoch": 1.5677980039867634, "grad_norm": 0.012492155095188159, "learning_rate": 1.3563675492928296e-06, "loss": 0.0001, "step": 238310 }, { "epoch": 1.567863792161997, "grad_norm": 0.014894792390608036, "learning_rate": 1.3559744193258634e-06, "loss": 0.0012, "step": 238320 }, { "epoch": 1.5679295803372302, "grad_norm": 0.030598540369751345, "learning_rate": 1.3555813374022265e-06, "loss": 0.0004, "step": 238330 }, { "epoch": 1.5679953685124635, "grad_norm": 0.0009247770000524546, "learning_rate": 1.3551883035271008e-06, "loss": 0.0003, "step": 238340 }, { "epoch": 1.568061156687697, "grad_norm": 0.012947199950020074, "learning_rate": 1.354795317705671e-06, "loss": 0.0007, "step": 238350 }, { "epoch": 1.5681269448629305, "grad_norm": 0.006391052551780696, "learning_rate": 1.3544023799431171e-06, "loss": 0.0004, "step": 238360 }, { "epoch": 1.5681927330381638, "grad_norm": 0.046178925684849007, "learning_rate": 1.3540094902446189e-06, "loss": 0.0002, "step": 238370 }, { "epoch": 1.568258521213397, "grad_norm": 0.03343158255337501, "learning_rate": 1.3536166486153568e-06, "loss": 0.0006, "step": 238380 }, { "epoch": 1.5683243093886303, "grad_norm": 0.04785646545794167, "learning_rate": 1.3532238550605098e-06, "loss": 0.0004, "step": 238390 }, { "epoch": 1.5683900975638638, "grad_norm": 0.029907355932201018, "learning_rate": 1.352831109585257e-06, "loss": 0.0004, "step": 238400 }, { "epoch": 1.5684558857390973, "grad_norm": 0.0905940310758242, "learning_rate": 1.3524384121947758e-06, "loss": 0.0006, "step": 238410 }, { "epoch": 1.5685216739143306, "grad_norm": 0.02247709199268402, "learning_rate": 1.3520457628942441e-06, "loss": 0.0005, "step": 238420 }, { "epoch": 1.568587462089564, "grad_norm": 0.05554797245887082, "learning_rate": 1.3516531616888385e-06, "loss": 0.0006, "step": 238430 }, { "epoch": 1.5686532502647974, "grad_norm": 0.04009626693122057, "learning_rate": 1.3512606085837348e-06, "loss": 0.0007, "step": 238440 }, { "epoch": 1.568719038440031, "grad_norm": 0.0009395354890531285, "learning_rate": 1.3508681035841093e-06, "loss": 0.0001, "step": 238450 }, { "epoch": 1.5687848266152642, "grad_norm": 0.042977461139830336, "learning_rate": 1.3504756466951358e-06, "loss": 0.0006, "step": 238460 }, { "epoch": 1.5688506147904975, "grad_norm": 0.02366845478814951, "learning_rate": 1.3500832379219874e-06, "loss": 0.0006, "step": 238470 }, { "epoch": 1.568916402965731, "grad_norm": 0.007249259800675906, "learning_rate": 1.3496908772698408e-06, "loss": 0.0006, "step": 238480 }, { "epoch": 1.5689821911409645, "grad_norm": 0.026587295186044604, "learning_rate": 1.3492985647438673e-06, "loss": 0.0005, "step": 238490 }, { "epoch": 1.5690479793161978, "grad_norm": 0.028950243832095345, "learning_rate": 1.3489063003492398e-06, "loss": 0.0014, "step": 238500 }, { "epoch": 1.569113767491431, "grad_norm": 0.03493234169189774, "learning_rate": 1.3485140840911288e-06, "loss": 0.0013, "step": 238510 }, { "epoch": 1.5691795556666643, "grad_norm": 0.007147742223808057, "learning_rate": 1.348121915974706e-06, "loss": 0.0005, "step": 238520 }, { "epoch": 1.5692453438418978, "grad_norm": 0.059021581952490125, "learning_rate": 1.3477297960051422e-06, "loss": 0.0009, "step": 238530 }, { "epoch": 1.5693111320171313, "grad_norm": 0.0066275440674687285, "learning_rate": 1.3473377241876062e-06, "loss": 0.0002, "step": 238540 }, { "epoch": 1.5693769201923646, "grad_norm": 0.041086110060128175, "learning_rate": 1.346945700527268e-06, "loss": 0.0007, "step": 238550 }, { "epoch": 1.569442708367598, "grad_norm": 0.05594510816654306, "learning_rate": 1.346553725029296e-06, "loss": 0.0004, "step": 238560 }, { "epoch": 1.5695084965428314, "grad_norm": 0.045868650091414796, "learning_rate": 1.3461617976988572e-06, "loss": 0.0004, "step": 238570 }, { "epoch": 1.569574284718065, "grad_norm": 0.03525239638799068, "learning_rate": 1.3457699185411198e-06, "loss": 0.0005, "step": 238580 }, { "epoch": 1.5696400728932982, "grad_norm": 0.04316043578551042, "learning_rate": 1.3453780875612498e-06, "loss": 0.0006, "step": 238590 }, { "epoch": 1.5697058610685315, "grad_norm": 0.02628135130617585, "learning_rate": 1.3449863047644124e-06, "loss": 0.0003, "step": 238600 }, { "epoch": 1.5697716492437648, "grad_norm": 0.009727461510786498, "learning_rate": 1.344594570155775e-06, "loss": 0.0004, "step": 238610 }, { "epoch": 1.5698374374189983, "grad_norm": 0.04984011025664658, "learning_rate": 1.3442028837405014e-06, "loss": 0.0004, "step": 238620 }, { "epoch": 1.5699032255942318, "grad_norm": 0.002262183637961419, "learning_rate": 1.3438112455237552e-06, "loss": 0.0004, "step": 238630 }, { "epoch": 1.569969013769465, "grad_norm": 0.0019474712578422598, "learning_rate": 1.3434196555106998e-06, "loss": 0.0004, "step": 238640 }, { "epoch": 1.5700348019446984, "grad_norm": 0.007961966992586954, "learning_rate": 1.3430281137064989e-06, "loss": 0.0002, "step": 238650 }, { "epoch": 1.5701005901199319, "grad_norm": 0.049889837477052605, "learning_rate": 1.3426366201163132e-06, "loss": 0.0008, "step": 238660 }, { "epoch": 1.5701663782951654, "grad_norm": 0.03119452050984772, "learning_rate": 1.3422451747453051e-06, "loss": 0.0011, "step": 238670 }, { "epoch": 1.5702321664703986, "grad_norm": 0.058285078239977764, "learning_rate": 1.3418537775986356e-06, "loss": 0.0004, "step": 238680 }, { "epoch": 1.570297954645632, "grad_norm": 0.0018057249201536608, "learning_rate": 1.3414624286814648e-06, "loss": 0.0002, "step": 238690 }, { "epoch": 1.5703637428208652, "grad_norm": 0.1325796463538104, "learning_rate": 1.3410711279989518e-06, "loss": 0.001, "step": 238700 }, { "epoch": 1.5704295309960987, "grad_norm": 0.04395182378015542, "learning_rate": 1.3406798755562562e-06, "loss": 0.0007, "step": 238710 }, { "epoch": 1.5704953191713322, "grad_norm": 0.031674917967889044, "learning_rate": 1.3402886713585355e-06, "loss": 0.0004, "step": 238720 }, { "epoch": 1.5705611073465655, "grad_norm": 0.004595620620978544, "learning_rate": 1.3398975154109472e-06, "loss": 0.0005, "step": 238730 }, { "epoch": 1.5706268955217988, "grad_norm": 0.005506186622879183, "learning_rate": 1.3395064077186504e-06, "loss": 0.0001, "step": 238740 }, { "epoch": 1.5706926836970323, "grad_norm": 0.043088024922464344, "learning_rate": 1.3391153482867996e-06, "loss": 0.0005, "step": 238750 }, { "epoch": 1.5707584718722658, "grad_norm": 0.006557915092533396, "learning_rate": 1.3387243371205521e-06, "loss": 0.0002, "step": 238760 }, { "epoch": 1.570824260047499, "grad_norm": 0.03360057216050967, "learning_rate": 1.3383333742250614e-06, "loss": 0.0003, "step": 238770 }, { "epoch": 1.5708900482227324, "grad_norm": 0.014185099242452032, "learning_rate": 1.337942459605483e-06, "loss": 0.0003, "step": 238780 }, { "epoch": 1.5709558363979659, "grad_norm": 0.08369529674129587, "learning_rate": 1.3375515932669708e-06, "loss": 0.0004, "step": 238790 }, { "epoch": 1.5710216245731992, "grad_norm": 0.07411207745452213, "learning_rate": 1.3371607752146771e-06, "loss": 0.0005, "step": 238800 }, { "epoch": 1.5710874127484327, "grad_norm": 0.0016240287610222086, "learning_rate": 1.3367700054537558e-06, "loss": 0.0006, "step": 238810 }, { "epoch": 1.571153200923666, "grad_norm": 0.0433514263474822, "learning_rate": 1.336379283989358e-06, "loss": 0.0003, "step": 238820 }, { "epoch": 1.5712189890988992, "grad_norm": 0.01070541789935692, "learning_rate": 1.335988610826635e-06, "loss": 0.0013, "step": 238830 }, { "epoch": 1.5712847772741327, "grad_norm": 0.006250359141059859, "learning_rate": 1.335597985970738e-06, "loss": 0.0002, "step": 238840 }, { "epoch": 1.5713505654493662, "grad_norm": 0.022902558612829475, "learning_rate": 1.3352074094268163e-06, "loss": 0.0008, "step": 238850 }, { "epoch": 1.5714163536245995, "grad_norm": 0.0011077104109978495, "learning_rate": 1.3348168812000184e-06, "loss": 0.0004, "step": 238860 }, { "epoch": 1.5714821417998328, "grad_norm": 0.01220429438695973, "learning_rate": 1.3344264012954966e-06, "loss": 0.0004, "step": 238870 }, { "epoch": 1.5715479299750663, "grad_norm": 0.015582601704600197, "learning_rate": 1.3340359697183964e-06, "loss": 0.0003, "step": 238880 }, { "epoch": 1.5716137181502998, "grad_norm": 0.031046476033547085, "learning_rate": 1.3336455864738656e-06, "loss": 0.0004, "step": 238890 }, { "epoch": 1.571679506325533, "grad_norm": 0.01646113813764553, "learning_rate": 1.3332552515670517e-06, "loss": 0.0003, "step": 238900 }, { "epoch": 1.5717452945007664, "grad_norm": 0.03231768525462372, "learning_rate": 1.3328649650031006e-06, "loss": 0.0005, "step": 238910 }, { "epoch": 1.5718110826759997, "grad_norm": 0.008354803695126388, "learning_rate": 1.3324747267871574e-06, "loss": 0.0003, "step": 238920 }, { "epoch": 1.5718768708512332, "grad_norm": 0.00989358133391032, "learning_rate": 1.3320845369243674e-06, "loss": 0.0002, "step": 238930 }, { "epoch": 1.5719426590264667, "grad_norm": 0.015404559013445045, "learning_rate": 1.3316943954198753e-06, "loss": 0.0006, "step": 238940 }, { "epoch": 1.5720084472017, "grad_norm": 0.011499059714288879, "learning_rate": 1.3313043022788242e-06, "loss": 0.0003, "step": 238950 }, { "epoch": 1.5720742353769332, "grad_norm": 0.00044188291259268035, "learning_rate": 1.3309142575063578e-06, "loss": 0.0006, "step": 238960 }, { "epoch": 1.5721400235521668, "grad_norm": 0.037315806673871355, "learning_rate": 1.3305242611076175e-06, "loss": 0.0004, "step": 238970 }, { "epoch": 1.5722058117274003, "grad_norm": 0.03272670069938683, "learning_rate": 1.3301343130877459e-06, "loss": 0.0002, "step": 238980 }, { "epoch": 1.5722715999026335, "grad_norm": 0.0024949621258639836, "learning_rate": 1.3297444134518821e-06, "loss": 0.0002, "step": 238990 }, { "epoch": 1.5723373880778668, "grad_norm": 0.06262332240919112, "learning_rate": 1.3293545622051707e-06, "loss": 0.0006, "step": 239000 }, { "epoch": 1.5724031762531, "grad_norm": 0.00095210822312433, "learning_rate": 1.3289647593527482e-06, "loss": 0.0004, "step": 239010 }, { "epoch": 1.5724689644283336, "grad_norm": 0.021625188132496295, "learning_rate": 1.3285750048997553e-06, "loss": 0.0014, "step": 239020 }, { "epoch": 1.5725347526035671, "grad_norm": 0.021011935139328596, "learning_rate": 1.3281852988513304e-06, "loss": 0.0005, "step": 239030 }, { "epoch": 1.5726005407788004, "grad_norm": 0.05237923931145752, "learning_rate": 1.3277956412126108e-06, "loss": 0.0006, "step": 239040 }, { "epoch": 1.5726663289540337, "grad_norm": 0.011317591497204918, "learning_rate": 1.3274060319887343e-06, "loss": 0.0006, "step": 239050 }, { "epoch": 1.5727321171292672, "grad_norm": 0.03497059722340865, "learning_rate": 1.327016471184837e-06, "loss": 0.0009, "step": 239060 }, { "epoch": 1.5727979053045007, "grad_norm": 0.023463779394578527, "learning_rate": 1.3266269588060576e-06, "loss": 0.0003, "step": 239070 }, { "epoch": 1.572863693479734, "grad_norm": 0.026449471606873477, "learning_rate": 1.3262374948575268e-06, "loss": 0.0004, "step": 239080 }, { "epoch": 1.5729294816549673, "grad_norm": 0.03431341310306638, "learning_rate": 1.3258480793443813e-06, "loss": 0.0006, "step": 239090 }, { "epoch": 1.5729952698302008, "grad_norm": 0.0010668961578478024, "learning_rate": 1.3254587122717561e-06, "loss": 0.0004, "step": 239100 }, { "epoch": 1.573061058005434, "grad_norm": 0.05352745992226339, "learning_rate": 1.325069393644785e-06, "loss": 0.0005, "step": 239110 }, { "epoch": 1.5731268461806676, "grad_norm": 0.00432681158778504, "learning_rate": 1.3246801234685996e-06, "loss": 0.0004, "step": 239120 }, { "epoch": 1.5731926343559008, "grad_norm": 0.08537090278039183, "learning_rate": 1.324290901748333e-06, "loss": 0.0007, "step": 239130 }, { "epoch": 1.5732584225311341, "grad_norm": 0.0171053614094903, "learning_rate": 1.3239017284891159e-06, "loss": 0.0007, "step": 239140 }, { "epoch": 1.5733242107063676, "grad_norm": 0.014606264528179456, "learning_rate": 1.32351260369608e-06, "loss": 0.0001, "step": 239150 }, { "epoch": 1.5733899988816011, "grad_norm": 0.026686478092452975, "learning_rate": 1.3231235273743549e-06, "loss": 0.0005, "step": 239160 }, { "epoch": 1.5734557870568344, "grad_norm": 0.06121801695435674, "learning_rate": 1.3227344995290703e-06, "loss": 0.0006, "step": 239170 }, { "epoch": 1.5735215752320677, "grad_norm": 0.01982789470065039, "learning_rate": 1.3223455201653563e-06, "loss": 0.0011, "step": 239180 }, { "epoch": 1.5735873634073012, "grad_norm": 0.00881288820439989, "learning_rate": 1.3219565892883395e-06, "loss": 0.0004, "step": 239190 }, { "epoch": 1.5736531515825347, "grad_norm": 0.28417394998884177, "learning_rate": 1.321567706903149e-06, "loss": 0.0006, "step": 239200 }, { "epoch": 1.573718939757768, "grad_norm": 0.03164223668085685, "learning_rate": 1.321178873014911e-06, "loss": 0.0007, "step": 239210 }, { "epoch": 1.5737847279330013, "grad_norm": 0.024446795616658655, "learning_rate": 1.3207900876287511e-06, "loss": 0.0003, "step": 239220 }, { "epoch": 1.5738505161082346, "grad_norm": 0.013860902795884351, "learning_rate": 1.3204013507497975e-06, "loss": 0.0017, "step": 239230 }, { "epoch": 1.573916304283468, "grad_norm": 0.027440236427610377, "learning_rate": 1.3200126623831749e-06, "loss": 0.0006, "step": 239240 }, { "epoch": 1.5739820924587016, "grad_norm": 0.03203369404478859, "learning_rate": 1.3196240225340068e-06, "loss": 0.0003, "step": 239250 }, { "epoch": 1.5740478806339349, "grad_norm": 0.031940107496122944, "learning_rate": 1.3192354312074169e-06, "loss": 0.0007, "step": 239260 }, { "epoch": 1.5741136688091681, "grad_norm": 0.020047120817576616, "learning_rate": 1.3188468884085293e-06, "loss": 0.0002, "step": 239270 }, { "epoch": 1.5741794569844016, "grad_norm": 0.030541668231304678, "learning_rate": 1.318458394142466e-06, "loss": 0.0004, "step": 239280 }, { "epoch": 1.5742452451596352, "grad_norm": 0.02042318521929387, "learning_rate": 1.3180699484143494e-06, "loss": 0.0006, "step": 239290 }, { "epoch": 1.5743110333348684, "grad_norm": 0.04694893242386739, "learning_rate": 1.3176815512293007e-06, "loss": 0.0004, "step": 239300 }, { "epoch": 1.5743768215101017, "grad_norm": 0.003053699056231333, "learning_rate": 1.3172932025924407e-06, "loss": 0.0008, "step": 239310 }, { "epoch": 1.574442609685335, "grad_norm": 0.012594578592875123, "learning_rate": 1.3169049025088888e-06, "loss": 0.0005, "step": 239320 }, { "epoch": 1.5745083978605685, "grad_norm": 0.009505390700619948, "learning_rate": 1.3165166509837647e-06, "loss": 0.0004, "step": 239330 }, { "epoch": 1.574574186035802, "grad_norm": 0.034096421244274436, "learning_rate": 1.3161284480221874e-06, "loss": 0.0009, "step": 239340 }, { "epoch": 1.5746399742110353, "grad_norm": 0.03575037983828319, "learning_rate": 1.3157402936292734e-06, "loss": 0.0005, "step": 239350 }, { "epoch": 1.5747057623862686, "grad_norm": 0.0024208460891193676, "learning_rate": 1.3153521878101438e-06, "loss": 0.0001, "step": 239360 }, { "epoch": 1.574771550561502, "grad_norm": 0.03775267757006893, "learning_rate": 1.3149641305699128e-06, "loss": 0.0009, "step": 239370 }, { "epoch": 1.5748373387367356, "grad_norm": 0.02736922213939655, "learning_rate": 1.3145761219136972e-06, "loss": 0.0004, "step": 239380 }, { "epoch": 1.5749031269119689, "grad_norm": 0.0024197287227746733, "learning_rate": 1.3141881618466124e-06, "loss": 0.0005, "step": 239390 }, { "epoch": 1.5749689150872022, "grad_norm": 0.00776364004204183, "learning_rate": 1.3138002503737735e-06, "loss": 0.0003, "step": 239400 }, { "epoch": 1.5750347032624357, "grad_norm": 0.026058410974207958, "learning_rate": 1.3134123875002946e-06, "loss": 0.0005, "step": 239410 }, { "epoch": 1.575100491437669, "grad_norm": 0.00010427182541293203, "learning_rate": 1.3130245732312896e-06, "loss": 0.0004, "step": 239420 }, { "epoch": 1.5751662796129025, "grad_norm": 0.02106414028057919, "learning_rate": 1.312636807571871e-06, "loss": 0.0007, "step": 239430 }, { "epoch": 1.5752320677881357, "grad_norm": 0.01218104468078979, "learning_rate": 1.3122490905271517e-06, "loss": 0.0004, "step": 239440 }, { "epoch": 1.575297855963369, "grad_norm": 0.03771239814895502, "learning_rate": 1.3118614221022436e-06, "loss": 0.0004, "step": 239450 }, { "epoch": 1.5753636441386025, "grad_norm": 0.01972133690367557, "learning_rate": 1.3114738023022566e-06, "loss": 0.0005, "step": 239460 }, { "epoch": 1.575429432313836, "grad_norm": 0.01626660568415771, "learning_rate": 1.3110862311323025e-06, "loss": 0.0004, "step": 239470 }, { "epoch": 1.5754952204890693, "grad_norm": 0.011661480419349377, "learning_rate": 1.3106987085974888e-06, "loss": 0.0005, "step": 239480 }, { "epoch": 1.5755610086643026, "grad_norm": 0.004795098445372359, "learning_rate": 1.310311234702928e-06, "loss": 0.0008, "step": 239490 }, { "epoch": 1.575626796839536, "grad_norm": 0.062083427739991225, "learning_rate": 1.3099238094537263e-06, "loss": 0.0005, "step": 239500 }, { "epoch": 1.5756925850147696, "grad_norm": 0.018398053157032166, "learning_rate": 1.3095364328549924e-06, "loss": 0.0021, "step": 239510 }, { "epoch": 1.575758373190003, "grad_norm": 0.014359749521730318, "learning_rate": 1.3091491049118339e-06, "loss": 0.0004, "step": 239520 }, { "epoch": 1.5758241613652362, "grad_norm": 0.03135635403023468, "learning_rate": 1.3087618256293567e-06, "loss": 0.0011, "step": 239530 }, { "epoch": 1.5758899495404695, "grad_norm": 0.01970645060997349, "learning_rate": 1.3083745950126664e-06, "loss": 0.0006, "step": 239540 }, { "epoch": 1.575955737715703, "grad_norm": 0.01805883721748133, "learning_rate": 1.307987413066869e-06, "loss": 0.0004, "step": 239550 }, { "epoch": 1.5760215258909365, "grad_norm": 0.041680042935938716, "learning_rate": 1.3076002797970688e-06, "loss": 0.0003, "step": 239560 }, { "epoch": 1.5760873140661698, "grad_norm": 0.007344903931882423, "learning_rate": 1.3072131952083706e-06, "loss": 0.0007, "step": 239570 }, { "epoch": 1.576153102241403, "grad_norm": 0.004290616438770858, "learning_rate": 1.3068261593058761e-06, "loss": 0.0003, "step": 239580 }, { "epoch": 1.5762188904166365, "grad_norm": 0.039781310652473034, "learning_rate": 1.3064391720946901e-06, "loss": 0.0002, "step": 239590 }, { "epoch": 1.57628467859187, "grad_norm": 0.05103638861063371, "learning_rate": 1.306052233579913e-06, "loss": 0.0004, "step": 239600 }, { "epoch": 1.5763504667671033, "grad_norm": 0.00035754502344588853, "learning_rate": 1.3056653437666455e-06, "loss": 0.0005, "step": 239610 }, { "epoch": 1.5764162549423366, "grad_norm": 0.013827953095395692, "learning_rate": 1.3052785026599913e-06, "loss": 0.0004, "step": 239620 }, { "epoch": 1.57648204311757, "grad_norm": 0.07715038615413397, "learning_rate": 1.3048917102650493e-06, "loss": 0.0005, "step": 239630 }, { "epoch": 1.5765478312928034, "grad_norm": 0.12047561060213052, "learning_rate": 1.3045049665869186e-06, "loss": 0.0011, "step": 239640 }, { "epoch": 1.576613619468037, "grad_norm": 0.0031158193925541174, "learning_rate": 1.3041182716306982e-06, "loss": 0.0011, "step": 239650 }, { "epoch": 1.5766794076432702, "grad_norm": 0.006872309954933102, "learning_rate": 1.3037316254014864e-06, "loss": 0.0005, "step": 239660 }, { "epoch": 1.5767451958185035, "grad_norm": 0.011957328258945186, "learning_rate": 1.3033450279043807e-06, "loss": 0.0003, "step": 239670 }, { "epoch": 1.576810983993737, "grad_norm": 0.017473712202767697, "learning_rate": 1.3029584791444787e-06, "loss": 0.0004, "step": 239680 }, { "epoch": 1.5768767721689705, "grad_norm": 0.01562776733556226, "learning_rate": 1.302571979126876e-06, "loss": 0.0005, "step": 239690 }, { "epoch": 1.5769425603442038, "grad_norm": 0.0050970129973061775, "learning_rate": 1.302185527856668e-06, "loss": 0.0005, "step": 239700 }, { "epoch": 1.577008348519437, "grad_norm": 0.000556204244206389, "learning_rate": 1.3017991253389507e-06, "loss": 0.0002, "step": 239710 }, { "epoch": 1.5770741366946703, "grad_norm": 0.0651624073827897, "learning_rate": 1.3014127715788176e-06, "loss": 0.0007, "step": 239720 }, { "epoch": 1.5771399248699038, "grad_norm": 0.01434280145738063, "learning_rate": 1.3010264665813632e-06, "loss": 0.0006, "step": 239730 }, { "epoch": 1.5772057130451373, "grad_norm": 0.03890440332446298, "learning_rate": 1.3006402103516786e-06, "loss": 0.0005, "step": 239740 }, { "epoch": 1.5772715012203706, "grad_norm": 0.04367599449749242, "learning_rate": 1.300254002894859e-06, "loss": 0.0004, "step": 239750 }, { "epoch": 1.577337289395604, "grad_norm": 0.020384237512847764, "learning_rate": 1.2998678442159952e-06, "loss": 0.0004, "step": 239760 }, { "epoch": 1.5774030775708374, "grad_norm": 0.07440903673075139, "learning_rate": 1.2994817343201783e-06, "loss": 0.0003, "step": 239770 }, { "epoch": 1.577468865746071, "grad_norm": 0.0178503979102696, "learning_rate": 1.2990956732124988e-06, "loss": 0.0008, "step": 239780 }, { "epoch": 1.5775346539213042, "grad_norm": 0.018862330137266535, "learning_rate": 1.2987096608980464e-06, "loss": 0.0005, "step": 239790 }, { "epoch": 1.5776004420965375, "grad_norm": 0.00598683676854108, "learning_rate": 1.2983236973819109e-06, "loss": 0.0003, "step": 239800 }, { "epoch": 1.577666230271771, "grad_norm": 0.04493780608995245, "learning_rate": 1.2979377826691797e-06, "loss": 0.0007, "step": 239810 }, { "epoch": 1.5777320184470043, "grad_norm": 0.02658451048179048, "learning_rate": 1.297551916764942e-06, "loss": 0.0003, "step": 239820 }, { "epoch": 1.5777978066222378, "grad_norm": 0.036048845146015085, "learning_rate": 1.2971660996742846e-06, "loss": 0.0006, "step": 239830 }, { "epoch": 1.577863594797471, "grad_norm": 0.02647674341871231, "learning_rate": 1.2967803314022942e-06, "loss": 0.0002, "step": 239840 }, { "epoch": 1.5779293829727044, "grad_norm": 0.018741997003752103, "learning_rate": 1.2963946119540566e-06, "loss": 0.0004, "step": 239850 }, { "epoch": 1.5779951711479379, "grad_norm": 0.005390435028797172, "learning_rate": 1.2960089413346577e-06, "loss": 0.0008, "step": 239860 }, { "epoch": 1.5780609593231714, "grad_norm": 0.003664320523064589, "learning_rate": 1.2956233195491818e-06, "loss": 0.0003, "step": 239870 }, { "epoch": 1.5781267474984046, "grad_norm": 0.009748166943074357, "learning_rate": 1.2952377466027116e-06, "loss": 0.0004, "step": 239880 }, { "epoch": 1.578192535673638, "grad_norm": 0.06534898166205048, "learning_rate": 1.294852222500334e-06, "loss": 0.0004, "step": 239890 }, { "epoch": 1.5782583238488714, "grad_norm": 0.016387438076071313, "learning_rate": 1.2944667472471295e-06, "loss": 0.0003, "step": 239900 }, { "epoch": 1.578324112024105, "grad_norm": 0.03885285917162925, "learning_rate": 1.2940813208481807e-06, "loss": 0.0009, "step": 239910 }, { "epoch": 1.5783899001993382, "grad_norm": 0.08159557180048499, "learning_rate": 1.2936959433085688e-06, "loss": 0.0007, "step": 239920 }, { "epoch": 1.5784556883745715, "grad_norm": 0.00047280908855051147, "learning_rate": 1.2933106146333752e-06, "loss": 0.0011, "step": 239930 }, { "epoch": 1.5785214765498048, "grad_norm": 0.000608057739385531, "learning_rate": 1.2929253348276794e-06, "loss": 0.0009, "step": 239940 }, { "epoch": 1.5785872647250383, "grad_norm": 0.03985562001238053, "learning_rate": 1.2925401038965617e-06, "loss": 0.0013, "step": 239950 }, { "epoch": 1.5786530529002718, "grad_norm": 0.005674764966106453, "learning_rate": 1.2921549218451012e-06, "loss": 0.0007, "step": 239960 }, { "epoch": 1.578718841075505, "grad_norm": 0.048463416578724584, "learning_rate": 1.2917697886783753e-06, "loss": 0.0006, "step": 239970 }, { "epoch": 1.5787846292507384, "grad_norm": 0.005109261343517552, "learning_rate": 1.291384704401462e-06, "loss": 0.0002, "step": 239980 }, { "epoch": 1.5788504174259719, "grad_norm": 0.03326269338833852, "learning_rate": 1.2909996690194387e-06, "loss": 0.0003, "step": 239990 }, { "epoch": 1.5789162056012054, "grad_norm": 0.04008438706921901, "learning_rate": 1.290614682537381e-06, "loss": 0.0006, "step": 240000 }, { "epoch": 1.5789819937764387, "grad_norm": 0.014165326210943116, "learning_rate": 1.2902297449603641e-06, "loss": 0.0004, "step": 240010 }, { "epoch": 1.579047781951672, "grad_norm": 0.030492041582560223, "learning_rate": 1.2898448562934652e-06, "loss": 0.0003, "step": 240020 }, { "epoch": 1.5791135701269052, "grad_norm": 0.029367136936525578, "learning_rate": 1.2894600165417582e-06, "loss": 0.0004, "step": 240030 }, { "epoch": 1.5791793583021387, "grad_norm": 0.01331416426360471, "learning_rate": 1.2890752257103156e-06, "loss": 0.0012, "step": 240040 }, { "epoch": 1.5792451464773722, "grad_norm": 0.027698724142102425, "learning_rate": 1.2886904838042113e-06, "loss": 0.0003, "step": 240050 }, { "epoch": 1.5793109346526055, "grad_norm": 0.06758416749056353, "learning_rate": 1.2883057908285173e-06, "loss": 0.0006, "step": 240060 }, { "epoch": 1.5793767228278388, "grad_norm": 0.0025049345812331145, "learning_rate": 1.2879211467883063e-06, "loss": 0.0009, "step": 240070 }, { "epoch": 1.5794425110030723, "grad_norm": 0.019473516014649833, "learning_rate": 1.287536551688649e-06, "loss": 0.0009, "step": 240080 }, { "epoch": 1.5795082991783058, "grad_norm": 0.00586472820656244, "learning_rate": 1.2871520055346159e-06, "loss": 0.0006, "step": 240090 }, { "epoch": 1.579574087353539, "grad_norm": 0.01700804388080708, "learning_rate": 1.286767508331277e-06, "loss": 0.0004, "step": 240100 }, { "epoch": 1.5796398755287724, "grad_norm": 0.036960522003188064, "learning_rate": 1.286383060083702e-06, "loss": 0.0006, "step": 240110 }, { "epoch": 1.579705663704006, "grad_norm": 0.0146486811471413, "learning_rate": 1.2859986607969583e-06, "loss": 0.0004, "step": 240120 }, { "epoch": 1.5797714518792392, "grad_norm": 0.0007847134097565578, "learning_rate": 1.285614310476115e-06, "loss": 0.0003, "step": 240130 }, { "epoch": 1.5798372400544727, "grad_norm": 0.008629503978478802, "learning_rate": 1.2852300091262377e-06, "loss": 0.0001, "step": 240140 }, { "epoch": 1.579903028229706, "grad_norm": 0.005794301367614327, "learning_rate": 1.2848457567523958e-06, "loss": 0.0002, "step": 240150 }, { "epoch": 1.5799688164049392, "grad_norm": 0.028516524406828465, "learning_rate": 1.284461553359654e-06, "loss": 0.0002, "step": 240160 }, { "epoch": 1.5800346045801728, "grad_norm": 0.020132359327635428, "learning_rate": 1.2840773989530775e-06, "loss": 0.0002, "step": 240170 }, { "epoch": 1.5801003927554063, "grad_norm": 0.004314921989723295, "learning_rate": 1.283693293537731e-06, "loss": 0.0004, "step": 240180 }, { "epoch": 1.5801661809306395, "grad_norm": 0.03644622506799315, "learning_rate": 1.2833092371186795e-06, "loss": 0.0009, "step": 240190 }, { "epoch": 1.5802319691058728, "grad_norm": 0.07056113860253412, "learning_rate": 1.2829252297009854e-06, "loss": 0.0004, "step": 240200 }, { "epoch": 1.5802977572811063, "grad_norm": 0.010161927844653683, "learning_rate": 1.2825412712897116e-06, "loss": 0.001, "step": 240210 }, { "epoch": 1.5803635454563398, "grad_norm": 0.042446874248629474, "learning_rate": 1.2821573618899208e-06, "loss": 0.0005, "step": 240220 }, { "epoch": 1.5804293336315731, "grad_norm": 0.008548200144249637, "learning_rate": 1.2817735015066746e-06, "loss": 0.0002, "step": 240230 }, { "epoch": 1.5804951218068064, "grad_norm": 0.008562673070870828, "learning_rate": 1.2813896901450328e-06, "loss": 0.0004, "step": 240240 }, { "epoch": 1.5805609099820397, "grad_norm": 0.028949047588771724, "learning_rate": 1.2810059278100567e-06, "loss": 0.0006, "step": 240250 }, { "epoch": 1.5806266981572732, "grad_norm": 0.04392647940362016, "learning_rate": 1.2806222145068058e-06, "loss": 0.0012, "step": 240260 }, { "epoch": 1.5806924863325067, "grad_norm": 0.0018088062210026817, "learning_rate": 1.280238550240337e-06, "loss": 0.0005, "step": 240270 }, { "epoch": 1.58075827450774, "grad_norm": 0.03806439480668776, "learning_rate": 1.2798549350157113e-06, "loss": 0.0013, "step": 240280 }, { "epoch": 1.5808240626829733, "grad_norm": 0.026794969970429468, "learning_rate": 1.279471368837986e-06, "loss": 0.0003, "step": 240290 }, { "epoch": 1.5808898508582068, "grad_norm": 0.0049671370658016765, "learning_rate": 1.2790878517122173e-06, "loss": 0.0005, "step": 240300 }, { "epoch": 1.5809556390334403, "grad_norm": 0.019165854801620974, "learning_rate": 1.2787043836434615e-06, "loss": 0.0006, "step": 240310 }, { "epoch": 1.5810214272086736, "grad_norm": 0.06091042325350101, "learning_rate": 1.278320964636775e-06, "loss": 0.0008, "step": 240320 }, { "epoch": 1.5810872153839068, "grad_norm": 0.0441549978407394, "learning_rate": 1.2779375946972117e-06, "loss": 0.0005, "step": 240330 }, { "epoch": 1.5811530035591401, "grad_norm": 0.04586501001278627, "learning_rate": 1.2775542738298268e-06, "loss": 0.0003, "step": 240340 }, { "epoch": 1.5812187917343736, "grad_norm": 0.012047464249736467, "learning_rate": 1.2771710020396745e-06, "loss": 0.0003, "step": 240350 }, { "epoch": 1.5812845799096071, "grad_norm": 0.0009758562343956631, "learning_rate": 1.2767877793318061e-06, "loss": 0.0004, "step": 240360 }, { "epoch": 1.5813503680848404, "grad_norm": 0.0004888713797099569, "learning_rate": 1.276404605711276e-06, "loss": 0.0003, "step": 240370 }, { "epoch": 1.5814161562600737, "grad_norm": 0.05083303276274486, "learning_rate": 1.2760214811831356e-06, "loss": 0.0004, "step": 240380 }, { "epoch": 1.5814819444353072, "grad_norm": 0.015952668854466542, "learning_rate": 1.2756384057524352e-06, "loss": 0.0008, "step": 240390 }, { "epoch": 1.5815477326105407, "grad_norm": 0.0008087479136770328, "learning_rate": 1.2752553794242245e-06, "loss": 0.0004, "step": 240400 }, { "epoch": 1.581613520785774, "grad_norm": 0.016621823598009743, "learning_rate": 1.2748724022035557e-06, "loss": 0.0007, "step": 240410 }, { "epoch": 1.5816793089610073, "grad_norm": 0.06595553015873862, "learning_rate": 1.2744894740954777e-06, "loss": 0.0004, "step": 240420 }, { "epoch": 1.5817450971362408, "grad_norm": 0.032737486080213735, "learning_rate": 1.2741065951050385e-06, "loss": 0.0002, "step": 240430 }, { "epoch": 1.581810885311474, "grad_norm": 0.0006611310018200381, "learning_rate": 1.2737237652372858e-06, "loss": 0.0003, "step": 240440 }, { "epoch": 1.5818766734867076, "grad_norm": 0.004027319826042629, "learning_rate": 1.2733409844972665e-06, "loss": 0.0003, "step": 240450 }, { "epoch": 1.5819424616619409, "grad_norm": 0.01725540629105914, "learning_rate": 1.2729582528900287e-06, "loss": 0.0007, "step": 240460 }, { "epoch": 1.5820082498371741, "grad_norm": 0.0028520667642684683, "learning_rate": 1.2725755704206167e-06, "loss": 0.0008, "step": 240470 }, { "epoch": 1.5820740380124076, "grad_norm": 0.04743999631631669, "learning_rate": 1.2721929370940767e-06, "loss": 0.0002, "step": 240480 }, { "epoch": 1.5821398261876412, "grad_norm": 0.0003648656646357983, "learning_rate": 1.2718103529154534e-06, "loss": 0.0006, "step": 240490 }, { "epoch": 1.5822056143628744, "grad_norm": 0.0023815922381832644, "learning_rate": 1.271427817889791e-06, "loss": 0.0006, "step": 240500 }, { "epoch": 1.5822714025381077, "grad_norm": 0.006489391664679259, "learning_rate": 1.2710453320221322e-06, "loss": 0.0004, "step": 240510 }, { "epoch": 1.5823371907133412, "grad_norm": 0.011016344436516357, "learning_rate": 1.2706628953175204e-06, "loss": 0.0006, "step": 240520 }, { "epoch": 1.5824029788885747, "grad_norm": 0.019477129754760493, "learning_rate": 1.270280507780996e-06, "loss": 0.0004, "step": 240530 }, { "epoch": 1.582468767063808, "grad_norm": 0.023158081014347916, "learning_rate": 1.2698981694176033e-06, "loss": 0.0004, "step": 240540 }, { "epoch": 1.5825345552390413, "grad_norm": 0.01196663274692882, "learning_rate": 1.2695158802323815e-06, "loss": 0.0005, "step": 240550 }, { "epoch": 1.5826003434142746, "grad_norm": 0.034527351200499434, "learning_rate": 1.269133640230371e-06, "loss": 0.0011, "step": 240560 }, { "epoch": 1.582666131589508, "grad_norm": 0.0695606737393366, "learning_rate": 1.2687514494166115e-06, "loss": 0.0005, "step": 240570 }, { "epoch": 1.5827319197647416, "grad_norm": 0.018511332065103667, "learning_rate": 1.2683693077961417e-06, "loss": 0.0004, "step": 240580 }, { "epoch": 1.5827977079399749, "grad_norm": 0.06660750464567763, "learning_rate": 1.2679872153739996e-06, "loss": 0.001, "step": 240590 }, { "epoch": 1.5828634961152082, "grad_norm": 0.005809136879515032, "learning_rate": 1.2676051721552229e-06, "loss": 0.0003, "step": 240600 }, { "epoch": 1.5829292842904417, "grad_norm": 0.016688377590699622, "learning_rate": 1.2672231781448485e-06, "loss": 0.0004, "step": 240610 }, { "epoch": 1.5829950724656752, "grad_norm": 0.0010513501875457232, "learning_rate": 1.2668412333479125e-06, "loss": 0.0005, "step": 240620 }, { "epoch": 1.5830608606409085, "grad_norm": 0.010630569153791156, "learning_rate": 1.2664593377694507e-06, "loss": 0.0005, "step": 240630 }, { "epoch": 1.5831266488161417, "grad_norm": 0.01578011865720392, "learning_rate": 1.2660774914144986e-06, "loss": 0.0006, "step": 240640 }, { "epoch": 1.583192436991375, "grad_norm": 0.015946633707712266, "learning_rate": 1.2656956942880893e-06, "loss": 0.0004, "step": 240650 }, { "epoch": 1.5832582251666085, "grad_norm": 0.09174255745014935, "learning_rate": 1.2653139463952558e-06, "loss": 0.0009, "step": 240660 }, { "epoch": 1.583324013341842, "grad_norm": 0.020643617401978405, "learning_rate": 1.2649322477410337e-06, "loss": 0.0004, "step": 240670 }, { "epoch": 1.5833898015170753, "grad_norm": 0.04256292101567934, "learning_rate": 1.2645505983304545e-06, "loss": 0.0004, "step": 240680 }, { "epoch": 1.5834555896923086, "grad_norm": 0.03282683719722277, "learning_rate": 1.264168998168549e-06, "loss": 0.0006, "step": 240690 }, { "epoch": 1.583521377867542, "grad_norm": 0.005267537502997696, "learning_rate": 1.2637874472603495e-06, "loss": 0.0002, "step": 240700 }, { "epoch": 1.5835871660427756, "grad_norm": 0.017028477994285016, "learning_rate": 1.263405945610885e-06, "loss": 0.0004, "step": 240710 }, { "epoch": 1.583652954218009, "grad_norm": 0.04908183830876569, "learning_rate": 1.2630244932251867e-06, "loss": 0.0003, "step": 240720 }, { "epoch": 1.5837187423932422, "grad_norm": 0.03368606951798956, "learning_rate": 1.2626430901082826e-06, "loss": 0.0005, "step": 240730 }, { "epoch": 1.5837845305684755, "grad_norm": 0.020715247797350265, "learning_rate": 1.2622617362652012e-06, "loss": 0.0004, "step": 240740 }, { "epoch": 1.583850318743709, "grad_norm": 0.010157016696250914, "learning_rate": 1.2618804317009714e-06, "loss": 0.0007, "step": 240750 }, { "epoch": 1.5839161069189425, "grad_norm": 0.01712786679247138, "learning_rate": 1.261499176420619e-06, "loss": 0.0013, "step": 240760 }, { "epoch": 1.5839818950941758, "grad_norm": 0.003951826251534129, "learning_rate": 1.2611179704291715e-06, "loss": 0.0008, "step": 240770 }, { "epoch": 1.584047683269409, "grad_norm": 0.013926803561637885, "learning_rate": 1.2607368137316544e-06, "loss": 0.0003, "step": 240780 }, { "epoch": 1.5841134714446425, "grad_norm": 0.03331570093917974, "learning_rate": 1.2603557063330918e-06, "loss": 0.0004, "step": 240790 }, { "epoch": 1.584179259619876, "grad_norm": 0.022805123017310434, "learning_rate": 1.2599746482385107e-06, "loss": 0.0004, "step": 240800 }, { "epoch": 1.5842450477951093, "grad_norm": 0.02915571755427826, "learning_rate": 1.2595936394529334e-06, "loss": 0.0005, "step": 240810 }, { "epoch": 1.5843108359703426, "grad_norm": 0.02452822443982126, "learning_rate": 1.2592126799813837e-06, "loss": 0.0004, "step": 240820 }, { "epoch": 1.5843766241455761, "grad_norm": 0.0004258579526408135, "learning_rate": 1.2588317698288843e-06, "loss": 0.0008, "step": 240830 }, { "epoch": 1.5844424123208096, "grad_norm": 0.06637073839657849, "learning_rate": 1.258450909000457e-06, "loss": 0.0006, "step": 240840 }, { "epoch": 1.584508200496043, "grad_norm": 0.008140184838269561, "learning_rate": 1.2580700975011228e-06, "loss": 0.0003, "step": 240850 }, { "epoch": 1.5845739886712762, "grad_norm": 0.03879945048579709, "learning_rate": 1.2576893353359026e-06, "loss": 0.0004, "step": 240860 }, { "epoch": 1.5846397768465095, "grad_norm": 0.05929012334497845, "learning_rate": 1.2573086225098168e-06, "loss": 0.0003, "step": 240870 }, { "epoch": 1.584705565021743, "grad_norm": 0.03581405885938037, "learning_rate": 1.2569279590278838e-06, "loss": 0.0003, "step": 240880 }, { "epoch": 1.5847713531969765, "grad_norm": 0.019802740340561363, "learning_rate": 1.2565473448951238e-06, "loss": 0.0006, "step": 240890 }, { "epoch": 1.5848371413722098, "grad_norm": 0.005548264884348692, "learning_rate": 1.2561667801165533e-06, "loss": 0.0004, "step": 240900 }, { "epoch": 1.584902929547443, "grad_norm": 0.01735277131099583, "learning_rate": 1.2557862646971908e-06, "loss": 0.0002, "step": 240910 }, { "epoch": 1.5849687177226766, "grad_norm": 0.08897029755259056, "learning_rate": 1.255405798642051e-06, "loss": 0.0008, "step": 240920 }, { "epoch": 1.58503450589791, "grad_norm": 0.02565601234979502, "learning_rate": 1.2550253819561531e-06, "loss": 0.0005, "step": 240930 }, { "epoch": 1.5851002940731433, "grad_norm": 0.003346349625488852, "learning_rate": 1.254645014644511e-06, "loss": 0.0003, "step": 240940 }, { "epoch": 1.5851660822483766, "grad_norm": 0.008569465758441612, "learning_rate": 1.2542646967121402e-06, "loss": 0.0002, "step": 240950 }, { "epoch": 1.58523187042361, "grad_norm": 0.03783393445209813, "learning_rate": 1.2538844281640555e-06, "loss": 0.0008, "step": 240960 }, { "epoch": 1.5852976585988434, "grad_norm": 0.010045007199803982, "learning_rate": 1.253504209005268e-06, "loss": 0.0006, "step": 240970 }, { "epoch": 1.585363446774077, "grad_norm": 0.014736201236663693, "learning_rate": 1.2531240392407911e-06, "loss": 0.0003, "step": 240980 }, { "epoch": 1.5854292349493102, "grad_norm": 0.0332385374957356, "learning_rate": 1.2527439188756385e-06, "loss": 0.0003, "step": 240990 }, { "epoch": 1.5854950231245435, "grad_norm": 0.010159203496706763, "learning_rate": 1.2523638479148209e-06, "loss": 0.0004, "step": 241000 }, { "epoch": 1.585560811299777, "grad_norm": 0.0016390882761249418, "learning_rate": 1.2519838263633494e-06, "loss": 0.0005, "step": 241010 }, { "epoch": 1.5856265994750105, "grad_norm": 0.08596610454038575, "learning_rate": 1.2516038542262321e-06, "loss": 0.0007, "step": 241020 }, { "epoch": 1.5856923876502438, "grad_norm": 0.03870904676681622, "learning_rate": 1.2512239315084828e-06, "loss": 0.0011, "step": 241030 }, { "epoch": 1.585758175825477, "grad_norm": 0.03251000476605747, "learning_rate": 1.2508440582151077e-06, "loss": 0.0007, "step": 241040 }, { "epoch": 1.5858239640007104, "grad_norm": 0.005295271969749303, "learning_rate": 1.2504642343511163e-06, "loss": 0.0009, "step": 241050 }, { "epoch": 1.5858897521759439, "grad_norm": 0.02367727250858067, "learning_rate": 1.250084459921515e-06, "loss": 0.0004, "step": 241060 }, { "epoch": 1.5859555403511774, "grad_norm": 0.026903162006398623, "learning_rate": 1.2497047349313118e-06, "loss": 0.0001, "step": 241070 }, { "epoch": 1.5860213285264106, "grad_norm": 0.02154762184451242, "learning_rate": 1.2493250593855126e-06, "loss": 0.0006, "step": 241080 }, { "epoch": 1.586087116701644, "grad_norm": 0.010123003881522742, "learning_rate": 1.2489454332891239e-06, "loss": 0.0004, "step": 241090 }, { "epoch": 1.5861529048768774, "grad_norm": 0.009961896106797577, "learning_rate": 1.2485658566471493e-06, "loss": 0.0003, "step": 241100 }, { "epoch": 1.586218693052111, "grad_norm": 0.014065343752530296, "learning_rate": 1.248186329464594e-06, "loss": 0.0002, "step": 241110 }, { "epoch": 1.5862844812273442, "grad_norm": 0.044469442202263024, "learning_rate": 1.247806851746462e-06, "loss": 0.0008, "step": 241120 }, { "epoch": 1.5863502694025775, "grad_norm": 0.03969690647976893, "learning_rate": 1.2474274234977557e-06, "loss": 0.0004, "step": 241130 }, { "epoch": 1.586416057577811, "grad_norm": 0.024665831013676177, "learning_rate": 1.2470480447234778e-06, "loss": 0.0004, "step": 241140 }, { "epoch": 1.5864818457530443, "grad_norm": 0.024245564398667128, "learning_rate": 1.246668715428629e-06, "loss": 0.0005, "step": 241150 }, { "epoch": 1.5865476339282778, "grad_norm": 0.01495419206696964, "learning_rate": 1.246289435618213e-06, "loss": 0.0001, "step": 241160 }, { "epoch": 1.586613422103511, "grad_norm": 0.011277747209095877, "learning_rate": 1.2459102052972288e-06, "loss": 0.0003, "step": 241170 }, { "epoch": 1.5866792102787444, "grad_norm": 0.02475228842044658, "learning_rate": 1.2455310244706764e-06, "loss": 0.0007, "step": 241180 }, { "epoch": 1.5867449984539779, "grad_norm": 0.041804289930513404, "learning_rate": 1.2451518931435548e-06, "loss": 0.0003, "step": 241190 }, { "epoch": 1.5868107866292114, "grad_norm": 0.017763759621818066, "learning_rate": 1.2447728113208623e-06, "loss": 0.0009, "step": 241200 }, { "epoch": 1.5868765748044447, "grad_norm": 0.006827850991539438, "learning_rate": 1.2443937790075978e-06, "loss": 0.0004, "step": 241210 }, { "epoch": 1.586942362979678, "grad_norm": 0.0002593979339353592, "learning_rate": 1.2440147962087574e-06, "loss": 0.0002, "step": 241220 }, { "epoch": 1.5870081511549115, "grad_norm": 0.008488678202415246, "learning_rate": 1.2436358629293378e-06, "loss": 0.0002, "step": 241230 }, { "epoch": 1.587073939330145, "grad_norm": 0.04601086255953369, "learning_rate": 1.2432569791743354e-06, "loss": 0.0007, "step": 241240 }, { "epoch": 1.5871397275053782, "grad_norm": 0.03601389515007775, "learning_rate": 1.2428781449487448e-06, "loss": 0.0009, "step": 241250 }, { "epoch": 1.5872055156806115, "grad_norm": 0.011573782136117696, "learning_rate": 1.2424993602575613e-06, "loss": 0.0007, "step": 241260 }, { "epoch": 1.5872713038558448, "grad_norm": 0.0010324740002234803, "learning_rate": 1.2421206251057788e-06, "loss": 0.0005, "step": 241270 }, { "epoch": 1.5873370920310783, "grad_norm": 0.06554659118882343, "learning_rate": 1.2417419394983881e-06, "loss": 0.0004, "step": 241280 }, { "epoch": 1.5874028802063118, "grad_norm": 0.015127299692582597, "learning_rate": 1.2413633034403867e-06, "loss": 0.0003, "step": 241290 }, { "epoch": 1.587468668381545, "grad_norm": 0.05940449313158596, "learning_rate": 1.2409847169367628e-06, "loss": 0.0003, "step": 241300 }, { "epoch": 1.5875344565567784, "grad_norm": 0.024064368321119068, "learning_rate": 1.2406061799925095e-06, "loss": 0.0005, "step": 241310 }, { "epoch": 1.587600244732012, "grad_norm": 0.04035724521363669, "learning_rate": 1.2402276926126167e-06, "loss": 0.0004, "step": 241320 }, { "epoch": 1.5876660329072454, "grad_norm": 0.0009893507468737833, "learning_rate": 1.2398492548020751e-06, "loss": 0.0004, "step": 241330 }, { "epoch": 1.5877318210824787, "grad_norm": 0.07274510602159306, "learning_rate": 1.2394708665658734e-06, "loss": 0.0009, "step": 241340 }, { "epoch": 1.587797609257712, "grad_norm": 0.035587013387205865, "learning_rate": 1.239092527909e-06, "loss": 0.0006, "step": 241350 }, { "epoch": 1.5878633974329452, "grad_norm": 0.03341853295691995, "learning_rate": 1.2387142388364436e-06, "loss": 0.0008, "step": 241360 }, { "epoch": 1.5879291856081788, "grad_norm": 0.0709404155001156, "learning_rate": 1.2383359993531919e-06, "loss": 0.0004, "step": 241370 }, { "epoch": 1.5879949737834123, "grad_norm": 0.03805213864779248, "learning_rate": 1.2379578094642314e-06, "loss": 0.0003, "step": 241380 }, { "epoch": 1.5880607619586455, "grad_norm": 0.018902769078863308, "learning_rate": 1.237579669174548e-06, "loss": 0.0003, "step": 241390 }, { "epoch": 1.5881265501338788, "grad_norm": 0.032340309771834164, "learning_rate": 1.2372015784891268e-06, "loss": 0.0006, "step": 241400 }, { "epoch": 1.5881923383091123, "grad_norm": 0.018847539850441283, "learning_rate": 1.2368235374129517e-06, "loss": 0.0005, "step": 241410 }, { "epoch": 1.5882581264843458, "grad_norm": 0.008457400567716477, "learning_rate": 1.2364455459510095e-06, "loss": 0.0008, "step": 241420 }, { "epoch": 1.5883239146595791, "grad_norm": 0.02735739192035618, "learning_rate": 1.2360676041082825e-06, "loss": 0.0003, "step": 241430 }, { "epoch": 1.5883897028348124, "grad_norm": 0.004459213026355255, "learning_rate": 1.2356897118897538e-06, "loss": 0.0001, "step": 241440 }, { "epoch": 1.588455491010046, "grad_norm": 0.02954810389756578, "learning_rate": 1.2353118693004045e-06, "loss": 0.0004, "step": 241450 }, { "epoch": 1.5885212791852792, "grad_norm": 0.013560791640846774, "learning_rate": 1.2349340763452171e-06, "loss": 0.001, "step": 241460 }, { "epoch": 1.5885870673605127, "grad_norm": 0.40046333935228673, "learning_rate": 1.2345563330291728e-06, "loss": 0.002, "step": 241470 }, { "epoch": 1.588652855535746, "grad_norm": 0.03039966182373828, "learning_rate": 1.2341786393572502e-06, "loss": 0.0003, "step": 241480 }, { "epoch": 1.5887186437109793, "grad_norm": 0.004766101876160299, "learning_rate": 1.2338009953344305e-06, "loss": 0.0005, "step": 241490 }, { "epoch": 1.5887844318862128, "grad_norm": 0.041216709918633784, "learning_rate": 1.2334234009656916e-06, "loss": 0.0008, "step": 241500 }, { "epoch": 1.5888502200614463, "grad_norm": 0.04065371970659151, "learning_rate": 1.233045856256012e-06, "loss": 0.0008, "step": 241510 }, { "epoch": 1.5889160082366796, "grad_norm": 0.05041576908570492, "learning_rate": 1.2326683612103701e-06, "loss": 0.0007, "step": 241520 }, { "epoch": 1.5889817964119128, "grad_norm": 0.029574387246912026, "learning_rate": 1.2322909158337415e-06, "loss": 0.0002, "step": 241530 }, { "epoch": 1.5890475845871463, "grad_norm": 0.06186459977506085, "learning_rate": 1.2319135201311022e-06, "loss": 0.0004, "step": 241540 }, { "epoch": 1.5891133727623798, "grad_norm": 0.030160464167305036, "learning_rate": 1.23153617410743e-06, "loss": 0.0007, "step": 241550 }, { "epoch": 1.5891791609376131, "grad_norm": 0.032747088699831506, "learning_rate": 1.2311588777676985e-06, "loss": 0.0011, "step": 241560 }, { "epoch": 1.5892449491128464, "grad_norm": 0.0016635981348597276, "learning_rate": 1.230781631116883e-06, "loss": 0.0005, "step": 241570 }, { "epoch": 1.5893107372880797, "grad_norm": 0.00015815907044659994, "learning_rate": 1.2304044341599557e-06, "loss": 0.0007, "step": 241580 }, { "epoch": 1.5893765254633132, "grad_norm": 0.0009748812897625072, "learning_rate": 1.2300272869018904e-06, "loss": 0.0001, "step": 241590 }, { "epoch": 1.5894423136385467, "grad_norm": 0.06606058808616112, "learning_rate": 1.2296501893476593e-06, "loss": 0.0004, "step": 241600 }, { "epoch": 1.58950810181378, "grad_norm": 0.047732468515770676, "learning_rate": 1.2292731415022341e-06, "loss": 0.0006, "step": 241610 }, { "epoch": 1.5895738899890133, "grad_norm": 0.0010575314825395838, "learning_rate": 1.2288961433705865e-06, "loss": 0.0004, "step": 241620 }, { "epoch": 1.5896396781642468, "grad_norm": 0.0028134847248110647, "learning_rate": 1.2285191949576859e-06, "loss": 0.0004, "step": 241630 }, { "epoch": 1.5897054663394803, "grad_norm": 0.00605352330504909, "learning_rate": 1.2281422962685025e-06, "loss": 0.0006, "step": 241640 }, { "epoch": 1.5897712545147136, "grad_norm": 0.0017666588861245914, "learning_rate": 1.2277654473080054e-06, "loss": 0.0003, "step": 241650 }, { "epoch": 1.5898370426899469, "grad_norm": 0.029766078812443943, "learning_rate": 1.2273886480811625e-06, "loss": 0.0005, "step": 241660 }, { "epoch": 1.5899028308651801, "grad_norm": 0.005966723694375203, "learning_rate": 1.2270118985929409e-06, "loss": 0.0007, "step": 241670 }, { "epoch": 1.5899686190404136, "grad_norm": 0.00969759620761538, "learning_rate": 1.2266351988483105e-06, "loss": 0.0006, "step": 241680 }, { "epoch": 1.5900344072156471, "grad_norm": 0.012684014941604576, "learning_rate": 1.226258548852236e-06, "loss": 0.0002, "step": 241690 }, { "epoch": 1.5901001953908804, "grad_norm": 0.005355360913388881, "learning_rate": 1.2258819486096828e-06, "loss": 0.0012, "step": 241700 }, { "epoch": 1.5901659835661137, "grad_norm": 0.028338137976294687, "learning_rate": 1.2255053981256172e-06, "loss": 0.0005, "step": 241710 }, { "epoch": 1.5902317717413472, "grad_norm": 0.015639877230717775, "learning_rate": 1.225128897405003e-06, "loss": 0.0003, "step": 241720 }, { "epoch": 1.5902975599165807, "grad_norm": 0.1159828668035022, "learning_rate": 1.2247524464528037e-06, "loss": 0.0005, "step": 241730 }, { "epoch": 1.590363348091814, "grad_norm": 0.039941794530548705, "learning_rate": 1.2243760452739833e-06, "loss": 0.0006, "step": 241740 }, { "epoch": 1.5904291362670473, "grad_norm": 0.00048248372314627395, "learning_rate": 1.2239996938735039e-06, "loss": 0.0003, "step": 241750 }, { "epoch": 1.5904949244422808, "grad_norm": 0.001629710278811131, "learning_rate": 1.2236233922563274e-06, "loss": 0.0005, "step": 241760 }, { "epoch": 1.590560712617514, "grad_norm": 0.020101512096845645, "learning_rate": 1.2232471404274144e-06, "loss": 0.0004, "step": 241770 }, { "epoch": 1.5906265007927476, "grad_norm": 0.016308877119845543, "learning_rate": 1.2228709383917265e-06, "loss": 0.0005, "step": 241780 }, { "epoch": 1.5906922889679809, "grad_norm": 0.0023027177341816596, "learning_rate": 1.222494786154223e-06, "loss": 0.0003, "step": 241790 }, { "epoch": 1.5907580771432142, "grad_norm": 0.008984179381516844, "learning_rate": 1.2221186837198624e-06, "loss": 0.0005, "step": 241800 }, { "epoch": 1.5908238653184477, "grad_norm": 0.024277681890464035, "learning_rate": 1.2217426310936048e-06, "loss": 0.0007, "step": 241810 }, { "epoch": 1.5908896534936812, "grad_norm": 0.004087234778135779, "learning_rate": 1.2213666282804076e-06, "loss": 0.0003, "step": 241820 }, { "epoch": 1.5909554416689144, "grad_norm": 0.0059998911217549345, "learning_rate": 1.2209906752852286e-06, "loss": 0.0005, "step": 241830 }, { "epoch": 1.5910212298441477, "grad_norm": 0.018709959209901823, "learning_rate": 1.220614772113023e-06, "loss": 0.0005, "step": 241840 }, { "epoch": 1.5910870180193812, "grad_norm": 0.009609971003871226, "learning_rate": 1.220238918768748e-06, "loss": 0.0006, "step": 241850 }, { "epoch": 1.5911528061946147, "grad_norm": 0.006533394001294123, "learning_rate": 1.2198631152573588e-06, "loss": 0.0007, "step": 241860 }, { "epoch": 1.591218594369848, "grad_norm": 0.008433193227621375, "learning_rate": 1.2194873615838087e-06, "loss": 0.0005, "step": 241870 }, { "epoch": 1.5912843825450813, "grad_norm": 0.004931358826940347, "learning_rate": 1.219111657753053e-06, "loss": 0.0002, "step": 241880 }, { "epoch": 1.5913501707203146, "grad_norm": 0.139113854955565, "learning_rate": 1.2187360037700452e-06, "loss": 0.0007, "step": 241890 }, { "epoch": 1.591415958895548, "grad_norm": 0.02534927438476064, "learning_rate": 1.218360399639737e-06, "loss": 0.0006, "step": 241900 }, { "epoch": 1.5914817470707816, "grad_norm": 0.09154178265396011, "learning_rate": 1.2179848453670812e-06, "loss": 0.0007, "step": 241910 }, { "epoch": 1.5915475352460149, "grad_norm": 0.039450304455117845, "learning_rate": 1.2176093409570289e-06, "loss": 0.0008, "step": 241920 }, { "epoch": 1.5916133234212482, "grad_norm": 0.01955584031351973, "learning_rate": 1.2172338864145289e-06, "loss": 0.0005, "step": 241930 }, { "epoch": 1.5916791115964817, "grad_norm": 0.03714827405821233, "learning_rate": 1.216858481744535e-06, "loss": 0.0007, "step": 241940 }, { "epoch": 1.5917448997717152, "grad_norm": 0.00250807258665803, "learning_rate": 1.2164831269519944e-06, "loss": 0.0004, "step": 241950 }, { "epoch": 1.5918106879469485, "grad_norm": 0.003977464649082237, "learning_rate": 1.216107822041856e-06, "loss": 0.0003, "step": 241960 }, { "epoch": 1.5918764761221817, "grad_norm": 0.005128531450617, "learning_rate": 1.2157325670190683e-06, "loss": 0.0005, "step": 241970 }, { "epoch": 1.591942264297415, "grad_norm": 0.013587357439829318, "learning_rate": 1.2153573618885783e-06, "loss": 0.0008, "step": 241980 }, { "epoch": 1.5920080524726485, "grad_norm": 0.11060991726154573, "learning_rate": 1.214982206655333e-06, "loss": 0.0006, "step": 241990 }, { "epoch": 1.592073840647882, "grad_norm": 0.027158269848501188, "learning_rate": 1.2146071013242777e-06, "loss": 0.0003, "step": 242000 }, { "epoch": 1.5921396288231153, "grad_norm": 0.019878860745169858, "learning_rate": 1.2142320459003592e-06, "loss": 0.0009, "step": 242010 }, { "epoch": 1.5922054169983486, "grad_norm": 0.016320358602369828, "learning_rate": 1.2138570403885215e-06, "loss": 0.0009, "step": 242020 }, { "epoch": 1.5922712051735821, "grad_norm": 0.03652259745461184, "learning_rate": 1.2134820847937083e-06, "loss": 0.0004, "step": 242030 }, { "epoch": 1.5923369933488156, "grad_norm": 0.013373872276208378, "learning_rate": 1.2131071791208637e-06, "loss": 0.0011, "step": 242040 }, { "epoch": 1.592402781524049, "grad_norm": 0.014878374141117628, "learning_rate": 1.2127323233749305e-06, "loss": 0.0005, "step": 242050 }, { "epoch": 1.5924685696992822, "grad_norm": 0.003981279818440967, "learning_rate": 1.2123575175608493e-06, "loss": 0.0003, "step": 242060 }, { "epoch": 1.5925343578745155, "grad_norm": 0.018234837696390865, "learning_rate": 1.2119827616835645e-06, "loss": 0.0003, "step": 242070 }, { "epoch": 1.592600146049749, "grad_norm": 0.04982056407742247, "learning_rate": 1.2116080557480153e-06, "loss": 0.0011, "step": 242080 }, { "epoch": 1.5926659342249825, "grad_norm": 0.012245544788389807, "learning_rate": 1.2112333997591423e-06, "loss": 0.0019, "step": 242090 }, { "epoch": 1.5927317224002158, "grad_norm": 0.0005150652921372331, "learning_rate": 1.2108587937218847e-06, "loss": 0.001, "step": 242100 }, { "epoch": 1.592797510575449, "grad_norm": 0.02414454935741662, "learning_rate": 1.2104842376411813e-06, "loss": 0.0008, "step": 242110 }, { "epoch": 1.5928632987506826, "grad_norm": 0.03396174356361921, "learning_rate": 1.2101097315219702e-06, "loss": 0.0006, "step": 242120 }, { "epoch": 1.592929086925916, "grad_norm": 0.026451506093431255, "learning_rate": 1.2097352753691893e-06, "loss": 0.0007, "step": 242130 }, { "epoch": 1.5929948751011493, "grad_norm": 0.00030645119275184943, "learning_rate": 1.209360869187775e-06, "loss": 0.0005, "step": 242140 }, { "epoch": 1.5930606632763826, "grad_norm": 0.01533952816285038, "learning_rate": 1.2089865129826645e-06, "loss": 0.0005, "step": 242150 }, { "epoch": 1.5931264514516161, "grad_norm": 0.0007159008631881062, "learning_rate": 1.2086122067587918e-06, "loss": 0.0007, "step": 242160 }, { "epoch": 1.5931922396268494, "grad_norm": 0.13210037422191115, "learning_rate": 1.2082379505210933e-06, "loss": 0.0003, "step": 242170 }, { "epoch": 1.593258027802083, "grad_norm": 0.008795899322117584, "learning_rate": 1.2078637442745027e-06, "loss": 0.0004, "step": 242180 }, { "epoch": 1.5933238159773162, "grad_norm": 0.03151107531977201, "learning_rate": 1.2074895880239518e-06, "loss": 0.0002, "step": 242190 }, { "epoch": 1.5933896041525495, "grad_norm": 0.05417586487650146, "learning_rate": 1.2071154817743768e-06, "loss": 0.0006, "step": 242200 }, { "epoch": 1.593455392327783, "grad_norm": 0.010607621825757694, "learning_rate": 1.2067414255307085e-06, "loss": 0.0005, "step": 242210 }, { "epoch": 1.5935211805030165, "grad_norm": 0.0209556057496251, "learning_rate": 1.2063674192978787e-06, "loss": 0.0004, "step": 242220 }, { "epoch": 1.5935869686782498, "grad_norm": 0.0220050005250851, "learning_rate": 1.2059934630808174e-06, "loss": 0.0006, "step": 242230 }, { "epoch": 1.593652756853483, "grad_norm": 0.03580769919080942, "learning_rate": 1.2056195568844559e-06, "loss": 0.0003, "step": 242240 }, { "epoch": 1.5937185450287166, "grad_norm": 0.0557710686291227, "learning_rate": 1.2052457007137236e-06, "loss": 0.0006, "step": 242250 }, { "epoch": 1.59378433320395, "grad_norm": 0.019782489936457582, "learning_rate": 1.2048718945735493e-06, "loss": 0.0006, "step": 242260 }, { "epoch": 1.5938501213791834, "grad_norm": 0.03222605280496531, "learning_rate": 1.2044981384688614e-06, "loss": 0.0013, "step": 242270 }, { "epoch": 1.5939159095544166, "grad_norm": 0.05130299041684827, "learning_rate": 1.2041244324045876e-06, "loss": 0.0005, "step": 242280 }, { "epoch": 1.59398169772965, "grad_norm": 0.044978039716627304, "learning_rate": 1.2037507763856543e-06, "loss": 0.0002, "step": 242290 }, { "epoch": 1.5940474859048834, "grad_norm": 0.059372054937994787, "learning_rate": 1.2033771704169888e-06, "loss": 0.0007, "step": 242300 }, { "epoch": 1.594113274080117, "grad_norm": 0.0002826550087461912, "learning_rate": 1.2030036145035161e-06, "loss": 0.0005, "step": 242310 }, { "epoch": 1.5941790622553502, "grad_norm": 0.0061868246416713, "learning_rate": 1.2026301086501608e-06, "loss": 0.0006, "step": 242320 }, { "epoch": 1.5942448504305835, "grad_norm": 0.009165763401448791, "learning_rate": 1.2022566528618484e-06, "loss": 0.0003, "step": 242330 }, { "epoch": 1.594310638605817, "grad_norm": 0.0391106692677283, "learning_rate": 1.2018832471435026e-06, "loss": 0.0005, "step": 242340 }, { "epoch": 1.5943764267810505, "grad_norm": 0.03545770035984743, "learning_rate": 1.2015098915000455e-06, "loss": 0.0005, "step": 242350 }, { "epoch": 1.5944422149562838, "grad_norm": 0.08601332640725917, "learning_rate": 1.2011365859364005e-06, "loss": 0.0018, "step": 242360 }, { "epoch": 1.594508003131517, "grad_norm": 0.032570897080854005, "learning_rate": 1.2007633304574884e-06, "loss": 0.0003, "step": 242370 }, { "epoch": 1.5945737913067504, "grad_norm": 0.0008441959254292241, "learning_rate": 1.2003901250682305e-06, "loss": 0.0002, "step": 242380 }, { "epoch": 1.5946395794819839, "grad_norm": 0.0017366621963383544, "learning_rate": 1.200016969773547e-06, "loss": 0.0004, "step": 242390 }, { "epoch": 1.5947053676572174, "grad_norm": 0.027497440858211143, "learning_rate": 1.1996438645783581e-06, "loss": 0.0005, "step": 242400 }, { "epoch": 1.5947711558324507, "grad_norm": 0.05350441457351901, "learning_rate": 1.1992708094875826e-06, "loss": 0.0001, "step": 242410 }, { "epoch": 1.594836944007684, "grad_norm": 0.019542429479890145, "learning_rate": 1.198897804506139e-06, "loss": 0.0004, "step": 242420 }, { "epoch": 1.5949027321829174, "grad_norm": 0.05140789773416478, "learning_rate": 1.1985248496389452e-06, "loss": 0.0005, "step": 242430 }, { "epoch": 1.594968520358151, "grad_norm": 0.02829961878837765, "learning_rate": 1.198151944890918e-06, "loss": 0.0004, "step": 242440 }, { "epoch": 1.5950343085333842, "grad_norm": 0.025192423032856674, "learning_rate": 1.1977790902669722e-06, "loss": 0.0007, "step": 242450 }, { "epoch": 1.5951000967086175, "grad_norm": 0.09289588317689305, "learning_rate": 1.197406285772027e-06, "loss": 0.0005, "step": 242460 }, { "epoch": 1.595165884883851, "grad_norm": 0.005746143930794219, "learning_rate": 1.1970335314109953e-06, "loss": 0.0004, "step": 242470 }, { "epoch": 1.5952316730590843, "grad_norm": 0.03708155354009661, "learning_rate": 1.1966608271887924e-06, "loss": 0.0008, "step": 242480 }, { "epoch": 1.5952974612343178, "grad_norm": 0.022635992383023638, "learning_rate": 1.196288173110332e-06, "loss": 0.0006, "step": 242490 }, { "epoch": 1.595363249409551, "grad_norm": 0.001783791223046792, "learning_rate": 1.195915569180527e-06, "loss": 0.0004, "step": 242500 }, { "epoch": 1.5954290375847844, "grad_norm": 0.023629034370001543, "learning_rate": 1.1955430154042891e-06, "loss": 0.0006, "step": 242510 }, { "epoch": 1.5954948257600179, "grad_norm": 0.0007454314665156218, "learning_rate": 1.1951705117865314e-06, "loss": 0.0007, "step": 242520 }, { "epoch": 1.5955606139352514, "grad_norm": 0.02115644130656142, "learning_rate": 1.1947980583321639e-06, "loss": 0.0005, "step": 242530 }, { "epoch": 1.5956264021104847, "grad_norm": 0.042446739643204424, "learning_rate": 1.194425655046098e-06, "loss": 0.0004, "step": 242540 }, { "epoch": 1.595692190285718, "grad_norm": 0.0087975192253675, "learning_rate": 1.194053301933243e-06, "loss": 0.0002, "step": 242550 }, { "epoch": 1.5957579784609515, "grad_norm": 0.03667288556261964, "learning_rate": 1.1936809989985083e-06, "loss": 0.0005, "step": 242560 }, { "epoch": 1.595823766636185, "grad_norm": 0.004144797233643462, "learning_rate": 1.1933087462468024e-06, "loss": 0.0004, "step": 242570 }, { "epoch": 1.5958895548114183, "grad_norm": 0.11663634752595466, "learning_rate": 1.1929365436830314e-06, "loss": 0.0006, "step": 242580 }, { "epoch": 1.5959553429866515, "grad_norm": 0.027452270917644225, "learning_rate": 1.1925643913121055e-06, "loss": 0.0003, "step": 242590 }, { "epoch": 1.5960211311618848, "grad_norm": 7.608819771164384e-05, "learning_rate": 1.1921922891389298e-06, "loss": 0.0003, "step": 242600 }, { "epoch": 1.5960869193371183, "grad_norm": 0.053316248217306766, "learning_rate": 1.19182023716841e-06, "loss": 0.0007, "step": 242610 }, { "epoch": 1.5961527075123518, "grad_norm": 0.08277725891566783, "learning_rate": 1.1914482354054513e-06, "loss": 0.0007, "step": 242620 }, { "epoch": 1.5962184956875851, "grad_norm": 0.34660546887599347, "learning_rate": 1.1910762838549584e-06, "loss": 0.004, "step": 242630 }, { "epoch": 1.5962842838628184, "grad_norm": 0.016882256784780395, "learning_rate": 1.1907043825218357e-06, "loss": 0.0004, "step": 242640 }, { "epoch": 1.596350072038052, "grad_norm": 0.07163109114295245, "learning_rate": 1.1903325314109848e-06, "loss": 0.0007, "step": 242650 }, { "epoch": 1.5964158602132854, "grad_norm": 0.02696931750955188, "learning_rate": 1.1899607305273103e-06, "loss": 0.0004, "step": 242660 }, { "epoch": 1.5964816483885187, "grad_norm": 0.007436410091211537, "learning_rate": 1.1895889798757122e-06, "loss": 0.0009, "step": 242670 }, { "epoch": 1.596547436563752, "grad_norm": 0.039739905317730716, "learning_rate": 1.1892172794610928e-06, "loss": 0.0003, "step": 242680 }, { "epoch": 1.5966132247389853, "grad_norm": 0.004336561030122651, "learning_rate": 1.188845629288352e-06, "loss": 0.0008, "step": 242690 }, { "epoch": 1.5966790129142188, "grad_norm": 0.005820183523553679, "learning_rate": 1.1884740293623908e-06, "loss": 0.0004, "step": 242700 }, { "epoch": 1.5967448010894523, "grad_norm": 0.0012295210338854764, "learning_rate": 1.1881024796881057e-06, "loss": 0.0002, "step": 242710 }, { "epoch": 1.5968105892646856, "grad_norm": 0.059451014383524514, "learning_rate": 1.187730980270399e-06, "loss": 0.0009, "step": 242720 }, { "epoch": 1.5968763774399188, "grad_norm": 0.084221950014799, "learning_rate": 1.1873595311141666e-06, "loss": 0.0009, "step": 242730 }, { "epoch": 1.5969421656151523, "grad_norm": 0.010348645847222447, "learning_rate": 1.186988132224306e-06, "loss": 0.0006, "step": 242740 }, { "epoch": 1.5970079537903858, "grad_norm": 0.006310073636871125, "learning_rate": 1.1866167836057135e-06, "loss": 0.0003, "step": 242750 }, { "epoch": 1.5970737419656191, "grad_norm": 0.014590988989292154, "learning_rate": 1.1862454852632855e-06, "loss": 0.0006, "step": 242760 }, { "epoch": 1.5971395301408524, "grad_norm": 0.013233092988872002, "learning_rate": 1.1858742372019172e-06, "loss": 0.0006, "step": 242770 }, { "epoch": 1.597205318316086, "grad_norm": 0.007492261511133388, "learning_rate": 1.1855030394265027e-06, "loss": 0.0004, "step": 242780 }, { "epoch": 1.5972711064913192, "grad_norm": 0.0021676755068397355, "learning_rate": 1.1851318919419363e-06, "loss": 0.0004, "step": 242790 }, { "epoch": 1.5973368946665527, "grad_norm": 0.002662913968908275, "learning_rate": 1.1847607947531115e-06, "loss": 0.0007, "step": 242800 }, { "epoch": 1.597402682841786, "grad_norm": 0.01850570275058765, "learning_rate": 1.1843897478649202e-06, "loss": 0.0005, "step": 242810 }, { "epoch": 1.5974684710170193, "grad_norm": 0.016829612671783057, "learning_rate": 1.1840187512822542e-06, "loss": 0.0003, "step": 242820 }, { "epoch": 1.5975342591922528, "grad_norm": 0.03836748069401807, "learning_rate": 1.1836478050100065e-06, "loss": 0.0005, "step": 242830 }, { "epoch": 1.5976000473674863, "grad_norm": 0.010974586431371917, "learning_rate": 1.1832769090530638e-06, "loss": 0.0011, "step": 242840 }, { "epoch": 1.5976658355427196, "grad_norm": 0.0070844502932296055, "learning_rate": 1.1829060634163226e-06, "loss": 0.0007, "step": 242850 }, { "epoch": 1.5977316237179529, "grad_norm": 0.04197127814409009, "learning_rate": 1.1825352681046659e-06, "loss": 0.0006, "step": 242860 }, { "epoch": 1.5977974118931864, "grad_norm": 0.13490978882124086, "learning_rate": 1.1821645231229856e-06, "loss": 0.0011, "step": 242870 }, { "epoch": 1.5978632000684199, "grad_norm": 0.0009509773093370422, "learning_rate": 1.1817938284761682e-06, "loss": 0.0003, "step": 242880 }, { "epoch": 1.5979289882436531, "grad_norm": 0.008189655860164399, "learning_rate": 1.1814231841691015e-06, "loss": 0.0011, "step": 242890 }, { "epoch": 1.5979947764188864, "grad_norm": 0.014307086241784285, "learning_rate": 1.1810525902066728e-06, "loss": 0.0006, "step": 242900 }, { "epoch": 1.5980605645941197, "grad_norm": 0.043426079804406284, "learning_rate": 1.1806820465937668e-06, "loss": 0.0014, "step": 242910 }, { "epoch": 1.5981263527693532, "grad_norm": 0.03197035650913698, "learning_rate": 1.1803115533352699e-06, "loss": 0.0006, "step": 242920 }, { "epoch": 1.5981921409445867, "grad_norm": 0.009674534987578707, "learning_rate": 1.1799411104360659e-06, "loss": 0.0004, "step": 242930 }, { "epoch": 1.59825792911982, "grad_norm": 0.015184665140439821, "learning_rate": 1.179570717901038e-06, "loss": 0.0006, "step": 242940 }, { "epoch": 1.5983237172950533, "grad_norm": 0.020742264509508147, "learning_rate": 1.1792003757350718e-06, "loss": 0.0006, "step": 242950 }, { "epoch": 1.5983895054702868, "grad_norm": 0.018793796789009318, "learning_rate": 1.1788300839430488e-06, "loss": 0.0005, "step": 242960 }, { "epoch": 1.5984552936455203, "grad_norm": 0.0460963571371978, "learning_rate": 1.178459842529851e-06, "loss": 0.0014, "step": 242970 }, { "epoch": 1.5985210818207536, "grad_norm": 0.0027675066775562636, "learning_rate": 1.1780896515003598e-06, "loss": 0.0003, "step": 242980 }, { "epoch": 1.5985868699959869, "grad_norm": 0.049501120210388744, "learning_rate": 1.1777195108594558e-06, "loss": 0.0008, "step": 242990 }, { "epoch": 1.5986526581712202, "grad_norm": 0.014435074494784553, "learning_rate": 1.177349420612019e-06, "loss": 0.0004, "step": 243000 }, { "epoch": 1.5987184463464537, "grad_norm": 0.010404352104501505, "learning_rate": 1.1769793807629282e-06, "loss": 0.0003, "step": 243010 }, { "epoch": 1.5987842345216872, "grad_norm": 0.054414011406074964, "learning_rate": 1.1766093913170628e-06, "loss": 0.0006, "step": 243020 }, { "epoch": 1.5988500226969204, "grad_norm": 0.02967556402596711, "learning_rate": 1.1762394522793002e-06, "loss": 0.0005, "step": 243030 }, { "epoch": 1.5989158108721537, "grad_norm": 0.014142328391437621, "learning_rate": 1.175869563654518e-06, "loss": 0.0002, "step": 243040 }, { "epoch": 1.5989815990473872, "grad_norm": 0.16500069996954936, "learning_rate": 1.1754997254475936e-06, "loss": 0.0007, "step": 243050 }, { "epoch": 1.5990473872226207, "grad_norm": 0.04990893226964612, "learning_rate": 1.1751299376634018e-06, "loss": 0.0009, "step": 243060 }, { "epoch": 1.599113175397854, "grad_norm": 0.013344321049728642, "learning_rate": 1.1747602003068165e-06, "loss": 0.0004, "step": 243070 }, { "epoch": 1.5991789635730873, "grad_norm": 0.016492995777437854, "learning_rate": 1.174390513382716e-06, "loss": 0.0008, "step": 243080 }, { "epoch": 1.5992447517483206, "grad_norm": 0.03845590025725492, "learning_rate": 1.1740208768959726e-06, "loss": 0.0012, "step": 243090 }, { "epoch": 1.599310539923554, "grad_norm": 0.048669785239192645, "learning_rate": 1.1736512908514602e-06, "loss": 0.0004, "step": 243100 }, { "epoch": 1.5993763280987876, "grad_norm": 0.14567139325269338, "learning_rate": 1.1732817552540504e-06, "loss": 0.0007, "step": 243110 }, { "epoch": 1.5994421162740209, "grad_norm": 0.030215214614004175, "learning_rate": 1.1729122701086153e-06, "loss": 0.0003, "step": 243120 }, { "epoch": 1.5995079044492542, "grad_norm": 0.15967201177445645, "learning_rate": 1.172542835420027e-06, "loss": 0.0007, "step": 243130 }, { "epoch": 1.5995736926244877, "grad_norm": 0.006963301391039044, "learning_rate": 1.172173451193156e-06, "loss": 0.0004, "step": 243140 }, { "epoch": 1.5996394807997212, "grad_norm": 0.00034297256834786994, "learning_rate": 1.1718041174328715e-06, "loss": 0.0031, "step": 243150 }, { "epoch": 1.5997052689749545, "grad_norm": 0.03204407935136902, "learning_rate": 1.171434834144044e-06, "loss": 0.0007, "step": 243160 }, { "epoch": 1.5997710571501877, "grad_norm": 0.03819287768956706, "learning_rate": 1.1710656013315413e-06, "loss": 0.0005, "step": 243170 }, { "epoch": 1.5998368453254213, "grad_norm": 0.015216016869950298, "learning_rate": 1.1706964190002324e-06, "loss": 0.0005, "step": 243180 }, { "epoch": 1.5999026335006548, "grad_norm": 0.026346446601851474, "learning_rate": 1.1703272871549838e-06, "loss": 0.0004, "step": 243190 }, { "epoch": 1.599968421675888, "grad_norm": 0.030812924938945687, "learning_rate": 1.1699582058006604e-06, "loss": 0.0007, "step": 243200 }, { "epoch": 1.6000342098511213, "grad_norm": 0.08089923859408206, "learning_rate": 1.1695891749421318e-06, "loss": 0.0007, "step": 243210 }, { "epoch": 1.6000999980263546, "grad_norm": 0.013942106451703405, "learning_rate": 1.1692201945842623e-06, "loss": 0.0003, "step": 243220 }, { "epoch": 1.6001657862015881, "grad_norm": 0.02777858227687971, "learning_rate": 1.1688512647319156e-06, "loss": 0.0008, "step": 243230 }, { "epoch": 1.6002315743768216, "grad_norm": 0.02497214592347495, "learning_rate": 1.1684823853899562e-06, "loss": 0.0004, "step": 243240 }, { "epoch": 1.600297362552055, "grad_norm": 0.000273492821521201, "learning_rate": 1.1681135565632478e-06, "loss": 0.0003, "step": 243250 }, { "epoch": 1.6003631507272882, "grad_norm": 0.0009504818349187967, "learning_rate": 1.1677447782566525e-06, "loss": 0.001, "step": 243260 }, { "epoch": 1.6004289389025217, "grad_norm": 0.0006935940496219346, "learning_rate": 1.1673760504750326e-06, "loss": 0.0005, "step": 243270 }, { "epoch": 1.6004947270777552, "grad_norm": 0.0529148751571804, "learning_rate": 1.167007373223249e-06, "loss": 0.0018, "step": 243280 }, { "epoch": 1.6005605152529885, "grad_norm": 0.0057048165086357435, "learning_rate": 1.1666387465061629e-06, "loss": 0.0003, "step": 243290 }, { "epoch": 1.6006263034282218, "grad_norm": 0.07526771171487703, "learning_rate": 1.1662701703286343e-06, "loss": 0.001, "step": 243300 }, { "epoch": 1.600692091603455, "grad_norm": 0.01083817827564698, "learning_rate": 1.1659016446955229e-06, "loss": 0.0005, "step": 243310 }, { "epoch": 1.6007578797786886, "grad_norm": 0.018686791994232812, "learning_rate": 1.1655331696116868e-06, "loss": 0.0003, "step": 243320 }, { "epoch": 1.600823667953922, "grad_norm": 0.0033421037058578695, "learning_rate": 1.1651647450819826e-06, "loss": 0.0009, "step": 243330 }, { "epoch": 1.6008894561291553, "grad_norm": 0.0010475377754751403, "learning_rate": 1.1647963711112708e-06, "loss": 0.0004, "step": 243340 }, { "epoch": 1.6009552443043886, "grad_norm": 0.03624726159278965, "learning_rate": 1.1644280477044062e-06, "loss": 0.0011, "step": 243350 }, { "epoch": 1.6010210324796221, "grad_norm": 0.001519833513624039, "learning_rate": 1.1640597748662458e-06, "loss": 0.0003, "step": 243360 }, { "epoch": 1.6010868206548556, "grad_norm": 0.04774250579422381, "learning_rate": 1.163691552601644e-06, "loss": 0.0005, "step": 243370 }, { "epoch": 1.601152608830089, "grad_norm": 0.054143169282395286, "learning_rate": 1.1633233809154555e-06, "loss": 0.0003, "step": 243380 }, { "epoch": 1.6012183970053222, "grad_norm": 0.02260811229687273, "learning_rate": 1.162955259812535e-06, "loss": 0.0005, "step": 243390 }, { "epoch": 1.6012841851805555, "grad_norm": 0.0685840825137654, "learning_rate": 1.1625871892977354e-06, "loss": 0.0007, "step": 243400 }, { "epoch": 1.601349973355789, "grad_norm": 0.0008355168340693265, "learning_rate": 1.1622191693759095e-06, "loss": 0.0005, "step": 243410 }, { "epoch": 1.6014157615310225, "grad_norm": 0.016174316207551182, "learning_rate": 1.1618512000519094e-06, "loss": 0.0003, "step": 243420 }, { "epoch": 1.6014815497062558, "grad_norm": 0.050182428832162, "learning_rate": 1.1614832813305866e-06, "loss": 0.0004, "step": 243430 }, { "epoch": 1.601547337881489, "grad_norm": 0.0005260759930165908, "learning_rate": 1.1611154132167912e-06, "loss": 0.0003, "step": 243440 }, { "epoch": 1.6016131260567226, "grad_norm": 0.005379709724349035, "learning_rate": 1.160747595715374e-06, "loss": 0.0003, "step": 243450 }, { "epoch": 1.601678914231956, "grad_norm": 0.001994776329973385, "learning_rate": 1.1603798288311824e-06, "loss": 0.0002, "step": 243460 }, { "epoch": 1.6017447024071894, "grad_norm": 0.054224051958561684, "learning_rate": 1.1600121125690677e-06, "loss": 0.0007, "step": 243470 }, { "epoch": 1.6018104905824226, "grad_norm": 0.02973267333361863, "learning_rate": 1.1596444469338769e-06, "loss": 0.0006, "step": 243480 }, { "epoch": 1.6018762787576561, "grad_norm": 0.00017403403408210942, "learning_rate": 1.1592768319304576e-06, "loss": 0.0005, "step": 243490 }, { "epoch": 1.6019420669328894, "grad_norm": 0.016616375448271235, "learning_rate": 1.1589092675636565e-06, "loss": 0.0005, "step": 243500 }, { "epoch": 1.602007855108123, "grad_norm": 0.011830044647249646, "learning_rate": 1.1585417538383186e-06, "loss": 0.0004, "step": 243510 }, { "epoch": 1.6020736432833562, "grad_norm": 0.014214658731877303, "learning_rate": 1.1581742907592908e-06, "loss": 0.0004, "step": 243520 }, { "epoch": 1.6021394314585895, "grad_norm": 0.002322063000963418, "learning_rate": 1.1578068783314162e-06, "loss": 0.0005, "step": 243530 }, { "epoch": 1.602205219633823, "grad_norm": 0.09142236464730552, "learning_rate": 1.1574395165595397e-06, "loss": 0.0007, "step": 243540 }, { "epoch": 1.6022710078090565, "grad_norm": 0.027812130113298153, "learning_rate": 1.157072205448505e-06, "loss": 0.0011, "step": 243550 }, { "epoch": 1.6023367959842898, "grad_norm": 0.031120655183087452, "learning_rate": 1.1567049450031536e-06, "loss": 0.0003, "step": 243560 }, { "epoch": 1.602402584159523, "grad_norm": 0.0043935371860133595, "learning_rate": 1.156337735228329e-06, "loss": 0.0003, "step": 243570 }, { "epoch": 1.6024683723347566, "grad_norm": 0.05344041006675345, "learning_rate": 1.1559705761288714e-06, "loss": 0.0003, "step": 243580 }, { "epoch": 1.60253416050999, "grad_norm": 0.0004175713526895801, "learning_rate": 1.15560346770962e-06, "loss": 0.0008, "step": 243590 }, { "epoch": 1.6025999486852234, "grad_norm": 0.10626857440858761, "learning_rate": 1.1552364099754188e-06, "loss": 0.0004, "step": 243600 }, { "epoch": 1.6026657368604567, "grad_norm": 0.012413460460946591, "learning_rate": 1.1548694029311047e-06, "loss": 0.0006, "step": 243610 }, { "epoch": 1.60273152503569, "grad_norm": 0.02047163954733515, "learning_rate": 1.1545024465815164e-06, "loss": 0.0008, "step": 243620 }, { "epoch": 1.6027973132109234, "grad_norm": 0.009191691733397678, "learning_rate": 1.1541355409314926e-06, "loss": 0.0003, "step": 243630 }, { "epoch": 1.602863101386157, "grad_norm": 0.001606070417759644, "learning_rate": 1.1537686859858698e-06, "loss": 0.0002, "step": 243640 }, { "epoch": 1.6029288895613902, "grad_norm": 0.0017314822365012477, "learning_rate": 1.1534018817494852e-06, "loss": 0.0003, "step": 243650 }, { "epoch": 1.6029946777366235, "grad_norm": 0.06437863896241766, "learning_rate": 1.1530351282271746e-06, "loss": 0.0004, "step": 243660 }, { "epoch": 1.603060465911857, "grad_norm": 0.004529222691861403, "learning_rate": 1.152668425423773e-06, "loss": 0.0005, "step": 243670 }, { "epoch": 1.6031262540870905, "grad_norm": 0.0430848783423276, "learning_rate": 1.1523017733441162e-06, "loss": 0.0004, "step": 243680 }, { "epoch": 1.6031920422623238, "grad_norm": 0.0015004888353611237, "learning_rate": 1.1519351719930366e-06, "loss": 0.0003, "step": 243690 }, { "epoch": 1.603257830437557, "grad_norm": 0.00606552977668515, "learning_rate": 1.1515686213753691e-06, "loss": 0.0004, "step": 243700 }, { "epoch": 1.6033236186127904, "grad_norm": 0.010582677635784783, "learning_rate": 1.151202121495945e-06, "loss": 0.0008, "step": 243710 }, { "epoch": 1.6033894067880239, "grad_norm": 0.02203629658782073, "learning_rate": 1.1508356723595958e-06, "loss": 0.0013, "step": 243720 }, { "epoch": 1.6034551949632574, "grad_norm": 0.015953614512846617, "learning_rate": 1.1504692739711553e-06, "loss": 0.0009, "step": 243730 }, { "epoch": 1.6035209831384907, "grad_norm": 0.015547964465054388, "learning_rate": 1.150102926335453e-06, "loss": 0.0003, "step": 243740 }, { "epoch": 1.603586771313724, "grad_norm": 0.012110900249133106, "learning_rate": 1.1497366294573188e-06, "loss": 0.0003, "step": 243750 }, { "epoch": 1.6036525594889575, "grad_norm": 0.07607327010534064, "learning_rate": 1.1493703833415808e-06, "loss": 0.0004, "step": 243760 }, { "epoch": 1.603718347664191, "grad_norm": 0.060631774807938243, "learning_rate": 1.1490041879930697e-06, "loss": 0.0004, "step": 243770 }, { "epoch": 1.6037841358394243, "grad_norm": 0.0009471581741852392, "learning_rate": 1.1486380434166116e-06, "loss": 0.0003, "step": 243780 }, { "epoch": 1.6038499240146575, "grad_norm": 0.018058661457007092, "learning_rate": 1.1482719496170352e-06, "loss": 0.0002, "step": 243790 }, { "epoch": 1.603915712189891, "grad_norm": 0.014813336950143875, "learning_rate": 1.1479059065991666e-06, "loss": 0.0005, "step": 243800 }, { "epoch": 1.6039815003651243, "grad_norm": 0.02147886197893652, "learning_rate": 1.1475399143678317e-06, "loss": 0.0003, "step": 243810 }, { "epoch": 1.6040472885403578, "grad_norm": 0.0003635594912750623, "learning_rate": 1.1471739729278558e-06, "loss": 0.0004, "step": 243820 }, { "epoch": 1.6041130767155911, "grad_norm": 0.014351899268743818, "learning_rate": 1.1468080822840634e-06, "loss": 0.0007, "step": 243830 }, { "epoch": 1.6041788648908244, "grad_norm": 0.0151307700212059, "learning_rate": 1.1464422424412786e-06, "loss": 0.0004, "step": 243840 }, { "epoch": 1.604244653066058, "grad_norm": 0.010861657941678265, "learning_rate": 1.1460764534043233e-06, "loss": 0.0006, "step": 243850 }, { "epoch": 1.6043104412412914, "grad_norm": 0.020794199241606057, "learning_rate": 1.1457107151780227e-06, "loss": 0.0002, "step": 243860 }, { "epoch": 1.6043762294165247, "grad_norm": 0.08171959250198342, "learning_rate": 1.145345027767198e-06, "loss": 0.0006, "step": 243870 }, { "epoch": 1.604442017591758, "grad_norm": 0.0123491658920663, "learning_rate": 1.1449793911766698e-06, "loss": 0.0003, "step": 243880 }, { "epoch": 1.6045078057669915, "grad_norm": 0.007213694487377793, "learning_rate": 1.1446138054112589e-06, "loss": 0.0002, "step": 243890 }, { "epoch": 1.604573593942225, "grad_norm": 0.009007756944898783, "learning_rate": 1.1442482704757851e-06, "loss": 0.0002, "step": 243900 }, { "epoch": 1.6046393821174583, "grad_norm": 0.00888255636556788, "learning_rate": 1.1438827863750679e-06, "loss": 0.0009, "step": 243910 }, { "epoch": 1.6047051702926916, "grad_norm": 0.024700134010987456, "learning_rate": 1.143517353113926e-06, "loss": 0.0004, "step": 243920 }, { "epoch": 1.6047709584679248, "grad_norm": 0.019770796936569596, "learning_rate": 1.1431519706971771e-06, "loss": 0.0003, "step": 243930 }, { "epoch": 1.6048367466431583, "grad_norm": 0.06863597615359325, "learning_rate": 1.1427866391296382e-06, "loss": 0.0007, "step": 243940 }, { "epoch": 1.6049025348183918, "grad_norm": 0.02236399120478323, "learning_rate": 1.1424213584161264e-06, "loss": 0.0006, "step": 243950 }, { "epoch": 1.6049683229936251, "grad_norm": 0.007856203255918555, "learning_rate": 1.1420561285614574e-06, "loss": 0.0005, "step": 243960 }, { "epoch": 1.6050341111688584, "grad_norm": 0.0005762881991611033, "learning_rate": 1.1416909495704465e-06, "loss": 0.0006, "step": 243970 }, { "epoch": 1.605099899344092, "grad_norm": 0.04603262639136987, "learning_rate": 1.141325821447906e-06, "loss": 0.0006, "step": 243980 }, { "epoch": 1.6051656875193254, "grad_norm": 0.02392261400112848, "learning_rate": 1.1409607441986543e-06, "loss": 0.0005, "step": 243990 }, { "epoch": 1.6052314756945587, "grad_norm": 0.010199970194088227, "learning_rate": 1.1405957178275018e-06, "loss": 0.0003, "step": 244000 }, { "epoch": 1.605297263869792, "grad_norm": 0.0004380772559475291, "learning_rate": 1.1402307423392617e-06, "loss": 0.0004, "step": 244010 }, { "epoch": 1.6053630520450253, "grad_norm": 0.013879914226456498, "learning_rate": 1.1398658177387456e-06, "loss": 0.0005, "step": 244020 }, { "epoch": 1.6054288402202588, "grad_norm": 0.011766641510560302, "learning_rate": 1.1395009440307652e-06, "loss": 0.0004, "step": 244030 }, { "epoch": 1.6054946283954923, "grad_norm": 0.03864598186914487, "learning_rate": 1.1391361212201308e-06, "loss": 0.0005, "step": 244040 }, { "epoch": 1.6055604165707256, "grad_norm": 0.04378469193849011, "learning_rate": 1.1387713493116515e-06, "loss": 0.0004, "step": 244050 }, { "epoch": 1.6056262047459589, "grad_norm": 0.00022393470028415888, "learning_rate": 1.1384066283101374e-06, "loss": 0.0004, "step": 244060 }, { "epoch": 1.6056919929211924, "grad_norm": 0.035519122345572585, "learning_rate": 1.1380419582203972e-06, "loss": 0.0004, "step": 244070 }, { "epoch": 1.6057577810964259, "grad_norm": 0.022009099967443668, "learning_rate": 1.1376773390472384e-06, "loss": 0.0004, "step": 244080 }, { "epoch": 1.6058235692716591, "grad_norm": 0.00036998444885769934, "learning_rate": 1.137312770795468e-06, "loss": 0.0006, "step": 244090 }, { "epoch": 1.6058893574468924, "grad_norm": 0.0039394854043646515, "learning_rate": 1.1369482534698923e-06, "loss": 0.0009, "step": 244100 }, { "epoch": 1.6059551456221257, "grad_norm": 0.015940206877730356, "learning_rate": 1.1365837870753165e-06, "loss": 0.0002, "step": 244110 }, { "epoch": 1.6060209337973592, "grad_norm": 0.027829273394217036, "learning_rate": 1.1362193716165482e-06, "loss": 0.0002, "step": 244120 }, { "epoch": 1.6060867219725927, "grad_norm": 0.0013370305708283467, "learning_rate": 1.1358550070983904e-06, "loss": 0.0011, "step": 244130 }, { "epoch": 1.606152510147826, "grad_norm": 0.08722158528244882, "learning_rate": 1.135490693525647e-06, "loss": 0.001, "step": 244140 }, { "epoch": 1.6062182983230593, "grad_norm": 9.737660260340607e-05, "learning_rate": 1.1351264309031219e-06, "loss": 0.0007, "step": 244150 }, { "epoch": 1.6062840864982928, "grad_norm": 0.005452646951643455, "learning_rate": 1.1347622192356162e-06, "loss": 0.0011, "step": 244160 }, { "epoch": 1.6063498746735263, "grad_norm": 0.00027713008774387575, "learning_rate": 1.1343980585279324e-06, "loss": 0.0003, "step": 244170 }, { "epoch": 1.6064156628487596, "grad_norm": 0.0014704822625789995, "learning_rate": 1.1340339487848717e-06, "loss": 0.0003, "step": 244180 }, { "epoch": 1.6064814510239929, "grad_norm": 0.010770659871640274, "learning_rate": 1.133669890011234e-06, "loss": 0.0003, "step": 244190 }, { "epoch": 1.6065472391992264, "grad_norm": 0.03234745358345454, "learning_rate": 1.1333058822118205e-06, "loss": 0.0005, "step": 244200 }, { "epoch": 1.6066130273744599, "grad_norm": 0.05374033683676, "learning_rate": 1.1329419253914293e-06, "loss": 0.0008, "step": 244210 }, { "epoch": 1.6066788155496932, "grad_norm": 0.05481356614924563, "learning_rate": 1.1325780195548586e-06, "loss": 0.0003, "step": 244220 }, { "epoch": 1.6067446037249264, "grad_norm": 0.03405494891624866, "learning_rate": 1.1322141647069069e-06, "loss": 0.0004, "step": 244230 }, { "epoch": 1.6068103919001597, "grad_norm": 0.04514868627431015, "learning_rate": 1.1318503608523695e-06, "loss": 0.0011, "step": 244240 }, { "epoch": 1.6068761800753932, "grad_norm": 0.00047364851474383736, "learning_rate": 1.1314866079960456e-06, "loss": 0.0005, "step": 244250 }, { "epoch": 1.6069419682506267, "grad_norm": 0.046661668559625884, "learning_rate": 1.1311229061427299e-06, "loss": 0.0005, "step": 244260 }, { "epoch": 1.60700775642586, "grad_norm": 0.0005495894832933507, "learning_rate": 1.1307592552972174e-06, "loss": 0.0003, "step": 244270 }, { "epoch": 1.6070735446010933, "grad_norm": 0.00031683995514389244, "learning_rate": 1.130395655464302e-06, "loss": 0.0007, "step": 244280 }, { "epoch": 1.6071393327763268, "grad_norm": 0.045777322367551485, "learning_rate": 1.1300321066487785e-06, "loss": 0.0004, "step": 244290 }, { "epoch": 1.6072051209515603, "grad_norm": 0.0014964968609025777, "learning_rate": 1.1296686088554387e-06, "loss": 0.0001, "step": 244300 }, { "epoch": 1.6072709091267936, "grad_norm": 0.0016885079629406622, "learning_rate": 1.1293051620890761e-06, "loss": 0.0014, "step": 244310 }, { "epoch": 1.6073366973020269, "grad_norm": 0.03892129794917201, "learning_rate": 1.1289417663544816e-06, "loss": 0.0007, "step": 244320 }, { "epoch": 1.6074024854772602, "grad_norm": 0.0044560083001055125, "learning_rate": 1.1285784216564472e-06, "loss": 0.0005, "step": 244330 }, { "epoch": 1.6074682736524937, "grad_norm": 0.017523780278912014, "learning_rate": 1.1282151279997623e-06, "loss": 0.0002, "step": 244340 }, { "epoch": 1.6075340618277272, "grad_norm": 0.01429560722255723, "learning_rate": 1.1278518853892172e-06, "loss": 0.0005, "step": 244350 }, { "epoch": 1.6075998500029605, "grad_norm": 0.010755968092443205, "learning_rate": 1.127488693829601e-06, "loss": 0.0009, "step": 244360 }, { "epoch": 1.6076656381781937, "grad_norm": 0.008979376427067889, "learning_rate": 1.1271255533256997e-06, "loss": 0.0002, "step": 244370 }, { "epoch": 1.6077314263534273, "grad_norm": 0.01633843191287139, "learning_rate": 1.1267624638823056e-06, "loss": 0.0007, "step": 244380 }, { "epoch": 1.6077972145286608, "grad_norm": 0.0006318255534498914, "learning_rate": 1.1263994255042028e-06, "loss": 0.0005, "step": 244390 }, { "epoch": 1.607863002703894, "grad_norm": 0.006347039958658479, "learning_rate": 1.1260364381961774e-06, "loss": 0.0004, "step": 244400 }, { "epoch": 1.6079287908791273, "grad_norm": 0.038690825994212356, "learning_rate": 1.1256735019630165e-06, "loss": 0.0003, "step": 244410 }, { "epoch": 1.6079945790543606, "grad_norm": 0.024840175437129942, "learning_rate": 1.1253106168095045e-06, "loss": 0.0004, "step": 244420 }, { "epoch": 1.6080603672295941, "grad_norm": 0.007106894899476886, "learning_rate": 1.1249477827404253e-06, "loss": 0.0004, "step": 244430 }, { "epoch": 1.6081261554048276, "grad_norm": 0.008300209103183324, "learning_rate": 1.1245849997605629e-06, "loss": 0.0001, "step": 244440 }, { "epoch": 1.608191943580061, "grad_norm": 0.019174063978860532, "learning_rate": 1.1242222678747006e-06, "loss": 0.0005, "step": 244450 }, { "epoch": 1.6082577317552942, "grad_norm": 0.0021799365969992156, "learning_rate": 1.12385958708762e-06, "loss": 0.0003, "step": 244460 }, { "epoch": 1.6083235199305277, "grad_norm": 0.03088125388917405, "learning_rate": 1.1234969574041033e-06, "loss": 0.0004, "step": 244470 }, { "epoch": 1.6083893081057612, "grad_norm": 0.016164348603774158, "learning_rate": 1.123134378828931e-06, "loss": 0.0003, "step": 244480 }, { "epoch": 1.6084550962809945, "grad_norm": 0.011805535239579393, "learning_rate": 1.122771851366884e-06, "loss": 0.0003, "step": 244490 }, { "epoch": 1.6085208844562278, "grad_norm": 0.04777344963853606, "learning_rate": 1.12240937502274e-06, "loss": 0.0005, "step": 244500 }, { "epoch": 1.6085866726314613, "grad_norm": 0.001041856675712184, "learning_rate": 1.1220469498012804e-06, "loss": 0.0005, "step": 244510 }, { "epoch": 1.6086524608066946, "grad_norm": 0.0004310312306963309, "learning_rate": 1.1216845757072831e-06, "loss": 0.0006, "step": 244520 }, { "epoch": 1.608718248981928, "grad_norm": 0.002630326066038613, "learning_rate": 1.1213222527455254e-06, "loss": 0.0002, "step": 244530 }, { "epoch": 1.6087840371571613, "grad_norm": 0.013349823195020349, "learning_rate": 1.1209599809207833e-06, "loss": 0.0002, "step": 244540 }, { "epoch": 1.6088498253323946, "grad_norm": 0.04241630793069335, "learning_rate": 1.1205977602378338e-06, "loss": 0.0004, "step": 244550 }, { "epoch": 1.6089156135076281, "grad_norm": 0.0004166794131489461, "learning_rate": 1.1202355907014528e-06, "loss": 0.0004, "step": 244560 }, { "epoch": 1.6089814016828616, "grad_norm": 0.017519932623761968, "learning_rate": 1.119873472316414e-06, "loss": 0.0005, "step": 244570 }, { "epoch": 1.609047189858095, "grad_norm": 0.019794021721622864, "learning_rate": 1.1195114050874928e-06, "loss": 0.0004, "step": 244580 }, { "epoch": 1.6091129780333282, "grad_norm": 0.0008394441076613119, "learning_rate": 1.1191493890194626e-06, "loss": 0.0003, "step": 244590 }, { "epoch": 1.6091787662085617, "grad_norm": 0.014388752334746232, "learning_rate": 1.118787424117096e-06, "loss": 0.0002, "step": 244600 }, { "epoch": 1.6092445543837952, "grad_norm": 0.020767276792120492, "learning_rate": 1.1184255103851643e-06, "loss": 0.0005, "step": 244610 }, { "epoch": 1.6093103425590285, "grad_norm": 0.00481286596664388, "learning_rate": 1.1180636478284407e-06, "loss": 0.0002, "step": 244620 }, { "epoch": 1.6093761307342618, "grad_norm": 0.009645642389185981, "learning_rate": 1.1177018364516939e-06, "loss": 0.0005, "step": 244630 }, { "epoch": 1.609441918909495, "grad_norm": 0.00537105523862423, "learning_rate": 1.1173400762596965e-06, "loss": 0.0004, "step": 244640 }, { "epoch": 1.6095077070847286, "grad_norm": 0.0370529994271104, "learning_rate": 1.116978367257217e-06, "loss": 0.0003, "step": 244650 }, { "epoch": 1.609573495259962, "grad_norm": 0.006180104574923443, "learning_rate": 1.1166167094490238e-06, "loss": 0.0003, "step": 244660 }, { "epoch": 1.6096392834351954, "grad_norm": 0.01990522770377268, "learning_rate": 1.116255102839886e-06, "loss": 0.0002, "step": 244670 }, { "epoch": 1.6097050716104286, "grad_norm": 0.009589488100036792, "learning_rate": 1.11589354743457e-06, "loss": 0.0004, "step": 244680 }, { "epoch": 1.6097708597856621, "grad_norm": 0.028944980928784437, "learning_rate": 1.115532043237843e-06, "loss": 0.0004, "step": 244690 }, { "epoch": 1.6098366479608957, "grad_norm": 0.007382725386331826, "learning_rate": 1.1151705902544713e-06, "loss": 0.0001, "step": 244700 }, { "epoch": 1.609902436136129, "grad_norm": 0.00012475706725948235, "learning_rate": 1.1148091884892204e-06, "loss": 0.0008, "step": 244710 }, { "epoch": 1.6099682243113622, "grad_norm": 0.0015880931376622033, "learning_rate": 1.1144478379468548e-06, "loss": 0.0002, "step": 244720 }, { "epoch": 1.6100340124865955, "grad_norm": 0.005236305648272531, "learning_rate": 1.1140865386321387e-06, "loss": 0.0002, "step": 244730 }, { "epoch": 1.610099800661829, "grad_norm": 0.0015637317662482277, "learning_rate": 1.1137252905498352e-06, "loss": 0.0004, "step": 244740 }, { "epoch": 1.6101655888370625, "grad_norm": 0.0008316603199226444, "learning_rate": 1.1133640937047074e-06, "loss": 0.0005, "step": 244750 }, { "epoch": 1.6102313770122958, "grad_norm": 5.5543103649835954e-05, "learning_rate": 1.1130029481015176e-06, "loss": 0.0002, "step": 244760 }, { "epoch": 1.610297165187529, "grad_norm": 0.015903685688386897, "learning_rate": 1.1126418537450267e-06, "loss": 0.0005, "step": 244770 }, { "epoch": 1.6103629533627626, "grad_norm": 0.043716993985569885, "learning_rate": 1.1122808106399956e-06, "loss": 0.0006, "step": 244780 }, { "epoch": 1.610428741537996, "grad_norm": 0.10748766017683845, "learning_rate": 1.1119198187911845e-06, "loss": 0.0004, "step": 244790 }, { "epoch": 1.6104945297132294, "grad_norm": 0.02236782259160179, "learning_rate": 1.1115588782033527e-06, "loss": 0.0007, "step": 244800 }, { "epoch": 1.6105603178884627, "grad_norm": 0.01798742149440199, "learning_rate": 1.1111979888812585e-06, "loss": 0.0003, "step": 244810 }, { "epoch": 1.6106261060636962, "grad_norm": 0.008282582328748912, "learning_rate": 1.11083715082966e-06, "loss": 0.0004, "step": 244820 }, { "epoch": 1.6106918942389294, "grad_norm": 0.0006320277165580936, "learning_rate": 1.1104763640533157e-06, "loss": 0.0003, "step": 244830 }, { "epoch": 1.610757682414163, "grad_norm": 0.002971415146165483, "learning_rate": 1.1101156285569804e-06, "loss": 0.0003, "step": 244840 }, { "epoch": 1.6108234705893962, "grad_norm": 0.06391126226406679, "learning_rate": 1.1097549443454113e-06, "loss": 0.0004, "step": 244850 }, { "epoch": 1.6108892587646295, "grad_norm": 0.0028111188853021133, "learning_rate": 1.1093943114233635e-06, "loss": 0.0003, "step": 244860 }, { "epoch": 1.610955046939863, "grad_norm": 0.02501305557385999, "learning_rate": 1.10903372979559e-06, "loss": 0.0008, "step": 244870 }, { "epoch": 1.6110208351150965, "grad_norm": 0.004542739637858073, "learning_rate": 1.108673199466848e-06, "loss": 0.0003, "step": 244880 }, { "epoch": 1.6110866232903298, "grad_norm": 0.03379252565875156, "learning_rate": 1.1083127204418893e-06, "loss": 0.0004, "step": 244890 }, { "epoch": 1.611152411465563, "grad_norm": 0.04001014376938719, "learning_rate": 1.1079522927254655e-06, "loss": 0.0005, "step": 244900 }, { "epoch": 1.6112181996407966, "grad_norm": 0.0929756419181852, "learning_rate": 1.1075919163223298e-06, "loss": 0.0007, "step": 244910 }, { "epoch": 1.61128398781603, "grad_norm": 0.04506086902913487, "learning_rate": 1.1072315912372333e-06, "loss": 0.0004, "step": 244920 }, { "epoch": 1.6113497759912634, "grad_norm": 0.0005018197947602927, "learning_rate": 1.1068713174749256e-06, "loss": 0.0006, "step": 244930 }, { "epoch": 1.6114155641664967, "grad_norm": 0.019974218095928503, "learning_rate": 1.1065110950401574e-06, "loss": 0.0002, "step": 244940 }, { "epoch": 1.61148135234173, "grad_norm": 0.03495201407829159, "learning_rate": 1.1061509239376782e-06, "loss": 0.0005, "step": 244950 }, { "epoch": 1.6115471405169635, "grad_norm": 0.023375163178211936, "learning_rate": 1.1057908041722358e-06, "loss": 0.0003, "step": 244960 }, { "epoch": 1.611612928692197, "grad_norm": 0.00816813287718245, "learning_rate": 1.1054307357485787e-06, "loss": 0.0003, "step": 244970 }, { "epoch": 1.6116787168674302, "grad_norm": 0.034521341118687994, "learning_rate": 1.1050707186714533e-06, "loss": 0.0007, "step": 244980 }, { "epoch": 1.6117445050426635, "grad_norm": 0.014079340056010926, "learning_rate": 1.1047107529456069e-06, "loss": 0.0013, "step": 244990 }, { "epoch": 1.611810293217897, "grad_norm": 0.03409705552783929, "learning_rate": 1.1043508385757835e-06, "loss": 0.0004, "step": 245000 }, { "epoch": 1.6118760813931305, "grad_norm": 0.07722480259912477, "learning_rate": 1.1039909755667317e-06, "loss": 0.0009, "step": 245010 }, { "epoch": 1.6119418695683638, "grad_norm": 0.008248986740737369, "learning_rate": 1.1036311639231935e-06, "loss": 0.0006, "step": 245020 }, { "epoch": 1.612007657743597, "grad_norm": 0.0007065506465545448, "learning_rate": 1.1032714036499131e-06, "loss": 0.0003, "step": 245030 }, { "epoch": 1.6120734459188304, "grad_norm": 0.0065063127116640135, "learning_rate": 1.1029116947516343e-06, "loss": 0.0001, "step": 245040 }, { "epoch": 1.612139234094064, "grad_norm": 0.002405183468941145, "learning_rate": 1.1025520372330988e-06, "loss": 0.0006, "step": 245050 }, { "epoch": 1.6122050222692974, "grad_norm": 0.005048849073080689, "learning_rate": 1.102192431099049e-06, "loss": 0.0004, "step": 245060 }, { "epoch": 1.6122708104445307, "grad_norm": 0.02678553525571477, "learning_rate": 1.1018328763542252e-06, "loss": 0.0005, "step": 245070 }, { "epoch": 1.612336598619764, "grad_norm": 0.04224923170422804, "learning_rate": 1.1014733730033683e-06, "loss": 0.0005, "step": 245080 }, { "epoch": 1.6124023867949975, "grad_norm": 0.005441307332744002, "learning_rate": 1.1011139210512179e-06, "loss": 0.0005, "step": 245090 }, { "epoch": 1.612468174970231, "grad_norm": 0.0009173041547450232, "learning_rate": 1.1007545205025134e-06, "loss": 0.0005, "step": 245100 }, { "epoch": 1.6125339631454643, "grad_norm": 0.02056424856716978, "learning_rate": 1.1003951713619925e-06, "loss": 0.0003, "step": 245110 }, { "epoch": 1.6125997513206975, "grad_norm": 0.044030703897323845, "learning_rate": 1.100035873634394e-06, "loss": 0.0003, "step": 245120 }, { "epoch": 1.612665539495931, "grad_norm": 4.615195055654813e-05, "learning_rate": 1.0996766273244524e-06, "loss": 0.0009, "step": 245130 }, { "epoch": 1.6127313276711643, "grad_norm": 0.002310826199117492, "learning_rate": 1.0993174324369078e-06, "loss": 0.0002, "step": 245140 }, { "epoch": 1.6127971158463978, "grad_norm": 0.04461336199451864, "learning_rate": 1.0989582889764938e-06, "loss": 0.0004, "step": 245150 }, { "epoch": 1.6128629040216311, "grad_norm": 0.011384046149914775, "learning_rate": 1.0985991969479453e-06, "loss": 0.0004, "step": 245160 }, { "epoch": 1.6129286921968644, "grad_norm": 0.020918746346930313, "learning_rate": 1.0982401563559975e-06, "loss": 0.0013, "step": 245170 }, { "epoch": 1.612994480372098, "grad_norm": 0.010928260683983898, "learning_rate": 1.097881167205383e-06, "loss": 0.0004, "step": 245180 }, { "epoch": 1.6130602685473314, "grad_norm": 0.07355978848807172, "learning_rate": 1.0975222295008353e-06, "loss": 0.0003, "step": 245190 }, { "epoch": 1.6131260567225647, "grad_norm": 9.171692595840735e-05, "learning_rate": 1.0971633432470862e-06, "loss": 0.0008, "step": 245200 }, { "epoch": 1.613191844897798, "grad_norm": 0.014223081710757464, "learning_rate": 1.0968045084488682e-06, "loss": 0.0008, "step": 245210 }, { "epoch": 1.6132576330730315, "grad_norm": 0.01739147326563335, "learning_rate": 1.0964457251109117e-06, "loss": 0.0002, "step": 245220 }, { "epoch": 1.613323421248265, "grad_norm": 0.00014382554019665972, "learning_rate": 1.096086993237947e-06, "loss": 0.001, "step": 245230 }, { "epoch": 1.6133892094234983, "grad_norm": 0.0023708851760706083, "learning_rate": 1.0957283128347035e-06, "loss": 0.0005, "step": 245240 }, { "epoch": 1.6134549975987316, "grad_norm": 0.07838464060988025, "learning_rate": 1.0953696839059102e-06, "loss": 0.0007, "step": 245250 }, { "epoch": 1.6135207857739648, "grad_norm": 0.03073299946578687, "learning_rate": 1.0950111064562947e-06, "loss": 0.0007, "step": 245260 }, { "epoch": 1.6135865739491984, "grad_norm": 0.006826042796895701, "learning_rate": 1.0946525804905856e-06, "loss": 0.0002, "step": 245270 }, { "epoch": 1.6136523621244319, "grad_norm": 0.038043716459631505, "learning_rate": 1.0942941060135098e-06, "loss": 0.0005, "step": 245280 }, { "epoch": 1.6137181502996651, "grad_norm": 0.019316707099276843, "learning_rate": 1.093935683029793e-06, "loss": 0.0005, "step": 245290 }, { "epoch": 1.6137839384748984, "grad_norm": 0.02345792795143818, "learning_rate": 1.0935773115441606e-06, "loss": 0.0003, "step": 245300 }, { "epoch": 1.613849726650132, "grad_norm": 0.013037737217610345, "learning_rate": 1.0932189915613378e-06, "loss": 0.0005, "step": 245310 }, { "epoch": 1.6139155148253654, "grad_norm": 0.014897436504381337, "learning_rate": 1.0928607230860483e-06, "loss": 0.0006, "step": 245320 }, { "epoch": 1.6139813030005987, "grad_norm": 0.04518782615271807, "learning_rate": 1.0925025061230156e-06, "loss": 0.0006, "step": 245330 }, { "epoch": 1.614047091175832, "grad_norm": 0.000525993507770539, "learning_rate": 1.0921443406769627e-06, "loss": 0.0003, "step": 245340 }, { "epoch": 1.6141128793510653, "grad_norm": 0.028411355459114343, "learning_rate": 1.091786226752612e-06, "loss": 0.0004, "step": 245350 }, { "epoch": 1.6141786675262988, "grad_norm": 0.020630477310989614, "learning_rate": 1.0914281643546842e-06, "loss": 0.0003, "step": 245360 }, { "epoch": 1.6142444557015323, "grad_norm": 0.0939417202436014, "learning_rate": 1.0910701534879004e-06, "loss": 0.0007, "step": 245370 }, { "epoch": 1.6143102438767656, "grad_norm": 0.014123100837572199, "learning_rate": 1.0907121941569804e-06, "loss": 0.0004, "step": 245380 }, { "epoch": 1.6143760320519989, "grad_norm": 0.006867444425955645, "learning_rate": 1.0903542863666428e-06, "loss": 0.0003, "step": 245390 }, { "epoch": 1.6144418202272324, "grad_norm": 0.0006747779174761505, "learning_rate": 1.0899964301216086e-06, "loss": 0.0003, "step": 245400 }, { "epoch": 1.6145076084024659, "grad_norm": 0.04236025356204983, "learning_rate": 1.089638625426595e-06, "loss": 0.0005, "step": 245410 }, { "epoch": 1.6145733965776992, "grad_norm": 0.04283821572539939, "learning_rate": 1.0892808722863185e-06, "loss": 0.0006, "step": 245420 }, { "epoch": 1.6146391847529324, "grad_norm": 0.011393650486840087, "learning_rate": 1.088923170705496e-06, "loss": 0.0003, "step": 245430 }, { "epoch": 1.6147049729281657, "grad_norm": 0.015496271159220872, "learning_rate": 1.088565520688844e-06, "loss": 0.0002, "step": 245440 }, { "epoch": 1.6147707611033992, "grad_norm": 0.0036437650853082698, "learning_rate": 1.0882079222410774e-06, "loss": 0.0004, "step": 245450 }, { "epoch": 1.6148365492786327, "grad_norm": 0.022098034627223137, "learning_rate": 1.0878503753669106e-06, "loss": 0.0002, "step": 245460 }, { "epoch": 1.614902337453866, "grad_norm": 0.012087998697627627, "learning_rate": 1.0874928800710582e-06, "loss": 0.0004, "step": 245470 }, { "epoch": 1.6149681256290993, "grad_norm": 0.006060869559227306, "learning_rate": 1.0871354363582333e-06, "loss": 0.0002, "step": 245480 }, { "epoch": 1.6150339138043328, "grad_norm": 0.021409886626128864, "learning_rate": 1.0867780442331476e-06, "loss": 0.0005, "step": 245490 }, { "epoch": 1.6150997019795663, "grad_norm": 0.05962367412252152, "learning_rate": 1.0864207037005142e-06, "loss": 0.0006, "step": 245500 }, { "epoch": 1.6151654901547996, "grad_norm": 0.0329167510445166, "learning_rate": 1.0860634147650439e-06, "loss": 0.0006, "step": 245510 }, { "epoch": 1.6152312783300329, "grad_norm": 0.004301999585314311, "learning_rate": 1.0857061774314459e-06, "loss": 0.0004, "step": 245520 }, { "epoch": 1.6152970665052664, "grad_norm": 0.023540602688685155, "learning_rate": 1.0853489917044324e-06, "loss": 0.0002, "step": 245530 }, { "epoch": 1.6153628546805, "grad_norm": 0.011086303525464638, "learning_rate": 1.0849918575887114e-06, "loss": 0.0004, "step": 245540 }, { "epoch": 1.6154286428557332, "grad_norm": 0.04825740600459005, "learning_rate": 1.0846347750889918e-06, "loss": 0.0004, "step": 245550 }, { "epoch": 1.6154944310309665, "grad_norm": 0.015203191616869076, "learning_rate": 1.084277744209981e-06, "loss": 0.0005, "step": 245560 }, { "epoch": 1.6155602192061997, "grad_norm": 0.07879858438452955, "learning_rate": 1.0839207649563864e-06, "loss": 0.0004, "step": 245570 }, { "epoch": 1.6156260073814332, "grad_norm": 0.20982861826853882, "learning_rate": 1.0835638373329143e-06, "loss": 0.0018, "step": 245580 }, { "epoch": 1.6156917955566668, "grad_norm": 0.03918482794529905, "learning_rate": 1.0832069613442708e-06, "loss": 0.0004, "step": 245590 }, { "epoch": 1.6157575837319, "grad_norm": 0.04930598712592104, "learning_rate": 1.0828501369951606e-06, "loss": 0.0003, "step": 245600 }, { "epoch": 1.6158233719071333, "grad_norm": 0.01209017611532387, "learning_rate": 1.0824933642902884e-06, "loss": 0.0006, "step": 245610 }, { "epoch": 1.6158891600823668, "grad_norm": 0.012773254945740982, "learning_rate": 1.0821366432343577e-06, "loss": 0.0002, "step": 245620 }, { "epoch": 1.6159549482576003, "grad_norm": 0.09570486303048673, "learning_rate": 1.0817799738320716e-06, "loss": 0.0006, "step": 245630 }, { "epoch": 1.6160207364328336, "grad_norm": 0.01216514209162187, "learning_rate": 1.0814233560881327e-06, "loss": 0.0005, "step": 245640 }, { "epoch": 1.616086524608067, "grad_norm": 0.07553289766823963, "learning_rate": 1.0810667900072408e-06, "loss": 0.0003, "step": 245650 }, { "epoch": 1.6161523127833002, "grad_norm": 0.005971297366602371, "learning_rate": 1.0807102755941006e-06, "loss": 0.0001, "step": 245660 }, { "epoch": 1.6162181009585337, "grad_norm": 0.03456507840481721, "learning_rate": 1.0803538128534103e-06, "loss": 0.0004, "step": 245670 }, { "epoch": 1.6162838891337672, "grad_norm": 0.03001167568292182, "learning_rate": 1.0799974017898695e-06, "loss": 0.0007, "step": 245680 }, { "epoch": 1.6163496773090005, "grad_norm": 0.0002748887334290646, "learning_rate": 1.0796410424081771e-06, "loss": 0.0004, "step": 245690 }, { "epoch": 1.6164154654842338, "grad_norm": 0.0041131060339247155, "learning_rate": 1.0792847347130324e-06, "loss": 0.0015, "step": 245700 }, { "epoch": 1.6164812536594673, "grad_norm": 0.017843109613023277, "learning_rate": 1.0789284787091319e-06, "loss": 0.0006, "step": 245710 }, { "epoch": 1.6165470418347008, "grad_norm": 0.008673548927484778, "learning_rate": 1.0785722744011734e-06, "loss": 0.0002, "step": 245720 }, { "epoch": 1.616612830009934, "grad_norm": 0.031684689207637534, "learning_rate": 1.0782161217938524e-06, "loss": 0.0006, "step": 245730 }, { "epoch": 1.6166786181851673, "grad_norm": 0.004150832346532906, "learning_rate": 1.0778600208918648e-06, "loss": 0.0007, "step": 245740 }, { "epoch": 1.6167444063604006, "grad_norm": 0.0023376966851537916, "learning_rate": 1.0775039716999052e-06, "loss": 0.0007, "step": 245750 }, { "epoch": 1.6168101945356341, "grad_norm": 0.06717829108138267, "learning_rate": 1.0771479742226686e-06, "loss": 0.0009, "step": 245760 }, { "epoch": 1.6168759827108676, "grad_norm": 0.033256060721011796, "learning_rate": 1.0767920284648475e-06, "loss": 0.0005, "step": 245770 }, { "epoch": 1.616941770886101, "grad_norm": 0.00576168875583213, "learning_rate": 1.0764361344311342e-06, "loss": 0.0005, "step": 245780 }, { "epoch": 1.6170075590613342, "grad_norm": 0.03577251559405933, "learning_rate": 1.0760802921262232e-06, "loss": 0.0007, "step": 245790 }, { "epoch": 1.6170733472365677, "grad_norm": 0.03189281931637859, "learning_rate": 1.075724501554804e-06, "loss": 0.0008, "step": 245800 }, { "epoch": 1.6171391354118012, "grad_norm": 0.02289189265054864, "learning_rate": 1.0753687627215686e-06, "loss": 0.001, "step": 245810 }, { "epoch": 1.6172049235870345, "grad_norm": 0.051573193510040305, "learning_rate": 1.0750130756312066e-06, "loss": 0.0005, "step": 245820 }, { "epoch": 1.6172707117622678, "grad_norm": 0.024857161138307386, "learning_rate": 1.074657440288407e-06, "loss": 0.0002, "step": 245830 }, { "epoch": 1.6173364999375013, "grad_norm": 0.04546250184431234, "learning_rate": 1.0743018566978592e-06, "loss": 0.0007, "step": 245840 }, { "epoch": 1.6174022881127346, "grad_norm": 0.042515778763264775, "learning_rate": 1.0739463248642507e-06, "loss": 0.0004, "step": 245850 }, { "epoch": 1.617468076287968, "grad_norm": 0.05872477900141975, "learning_rate": 1.0735908447922693e-06, "loss": 0.0009, "step": 245860 }, { "epoch": 1.6175338644632014, "grad_norm": 0.005543274634024085, "learning_rate": 1.0732354164866015e-06, "loss": 0.0003, "step": 245870 }, { "epoch": 1.6175996526384346, "grad_norm": 0.0009269906483809926, "learning_rate": 1.0728800399519334e-06, "loss": 0.0002, "step": 245880 }, { "epoch": 1.6176654408136681, "grad_norm": 0.0201118953984236, "learning_rate": 1.0725247151929502e-06, "loss": 0.0002, "step": 245890 }, { "epoch": 1.6177312289889016, "grad_norm": 0.05179219271761382, "learning_rate": 1.0721694422143364e-06, "loss": 0.0004, "step": 245900 }, { "epoch": 1.617797017164135, "grad_norm": 0.08290622229704542, "learning_rate": 1.0718142210207755e-06, "loss": 0.0004, "step": 245910 }, { "epoch": 1.6178628053393682, "grad_norm": 0.057850328918344004, "learning_rate": 1.0714590516169522e-06, "loss": 0.0011, "step": 245920 }, { "epoch": 1.6179285935146017, "grad_norm": 0.05285229395804372, "learning_rate": 1.0711039340075486e-06, "loss": 0.0004, "step": 245930 }, { "epoch": 1.6179943816898352, "grad_norm": 0.04640068687960693, "learning_rate": 1.0707488681972467e-06, "loss": 0.0004, "step": 245940 }, { "epoch": 1.6180601698650685, "grad_norm": 0.028301222176011845, "learning_rate": 1.0703938541907271e-06, "loss": 0.0003, "step": 245950 }, { "epoch": 1.6181259580403018, "grad_norm": 0.03020730309417342, "learning_rate": 1.0700388919926708e-06, "loss": 0.0005, "step": 245960 }, { "epoch": 1.618191746215535, "grad_norm": 0.002845279942464669, "learning_rate": 1.0696839816077575e-06, "loss": 0.0004, "step": 245970 }, { "epoch": 1.6182575343907686, "grad_norm": 0.11844456662328355, "learning_rate": 1.0693291230406666e-06, "loss": 0.0006, "step": 245980 }, { "epoch": 1.618323322566002, "grad_norm": 0.0031898419659611627, "learning_rate": 1.068974316296077e-06, "loss": 0.0004, "step": 245990 }, { "epoch": 1.6183891107412354, "grad_norm": 0.010623203331960505, "learning_rate": 1.0686195613786654e-06, "loss": 0.0009, "step": 246000 }, { "epoch": 1.6184548989164687, "grad_norm": 0.0031003789342169217, "learning_rate": 1.0682648582931094e-06, "loss": 0.0005, "step": 246010 }, { "epoch": 1.6185206870917022, "grad_norm": 0.007390809035682788, "learning_rate": 1.0679102070440856e-06, "loss": 0.0009, "step": 246020 }, { "epoch": 1.6185864752669357, "grad_norm": 0.05140477571310102, "learning_rate": 1.0675556076362697e-06, "loss": 0.0006, "step": 246030 }, { "epoch": 1.618652263442169, "grad_norm": 0.0012819093473107565, "learning_rate": 1.067201060074336e-06, "loss": 0.0002, "step": 246040 }, { "epoch": 1.6187180516174022, "grad_norm": 0.003454606481599559, "learning_rate": 1.0668465643629604e-06, "loss": 0.0003, "step": 246050 }, { "epoch": 1.6187838397926355, "grad_norm": 0.04608976588004766, "learning_rate": 1.0664921205068163e-06, "loss": 0.0005, "step": 246060 }, { "epoch": 1.618849627967869, "grad_norm": 0.008526273559042534, "learning_rate": 1.0661377285105768e-06, "loss": 0.0006, "step": 246070 }, { "epoch": 1.6189154161431025, "grad_norm": 0.002323552483146736, "learning_rate": 1.0657833883789131e-06, "loss": 0.0005, "step": 246080 }, { "epoch": 1.6189812043183358, "grad_norm": 0.01551040297532451, "learning_rate": 1.0654291001164978e-06, "loss": 0.0001, "step": 246090 }, { "epoch": 1.619046992493569, "grad_norm": 0.029102118027462286, "learning_rate": 1.0650748637280017e-06, "loss": 0.001, "step": 246100 }, { "epoch": 1.6191127806688026, "grad_norm": 0.02996089865477402, "learning_rate": 1.0647206792180952e-06, "loss": 0.0004, "step": 246110 }, { "epoch": 1.619178568844036, "grad_norm": 0.0445137078805835, "learning_rate": 1.0643665465914483e-06, "loss": 0.0003, "step": 246120 }, { "epoch": 1.6192443570192694, "grad_norm": 0.06890312715471948, "learning_rate": 1.064012465852729e-06, "loss": 0.0004, "step": 246130 }, { "epoch": 1.6193101451945027, "grad_norm": 0.03892493124122005, "learning_rate": 1.063658437006606e-06, "loss": 0.0004, "step": 246140 }, { "epoch": 1.6193759333697362, "grad_norm": 0.011920787710046047, "learning_rate": 1.0633044600577468e-06, "loss": 0.0003, "step": 246150 }, { "epoch": 1.6194417215449695, "grad_norm": 0.029586438298969512, "learning_rate": 1.0629505350108182e-06, "loss": 0.0005, "step": 246160 }, { "epoch": 1.619507509720203, "grad_norm": 0.007617076256300897, "learning_rate": 1.0625966618704852e-06, "loss": 0.0004, "step": 246170 }, { "epoch": 1.6195732978954362, "grad_norm": 0.038886338907202896, "learning_rate": 1.0622428406414165e-06, "loss": 0.0005, "step": 246180 }, { "epoch": 1.6196390860706695, "grad_norm": 0.07804559318002167, "learning_rate": 1.0618890713282748e-06, "loss": 0.0006, "step": 246190 }, { "epoch": 1.619704874245903, "grad_norm": 0.08862233853971492, "learning_rate": 1.0615353539357242e-06, "loss": 0.0009, "step": 246200 }, { "epoch": 1.6197706624211365, "grad_norm": 0.015564645086592561, "learning_rate": 1.0611816884684289e-06, "loss": 0.0003, "step": 246210 }, { "epoch": 1.6198364505963698, "grad_norm": 0.09190833462935738, "learning_rate": 1.0608280749310513e-06, "loss": 0.0004, "step": 246220 }, { "epoch": 1.619902238771603, "grad_norm": 0.00013032759180658322, "learning_rate": 1.0604745133282528e-06, "loss": 0.0005, "step": 246230 }, { "epoch": 1.6199680269468366, "grad_norm": 0.01514201422991563, "learning_rate": 1.0601210036646965e-06, "loss": 0.0004, "step": 246240 }, { "epoch": 1.6200338151220701, "grad_norm": 0.03832167219309877, "learning_rate": 1.0597675459450413e-06, "loss": 0.0005, "step": 246250 }, { "epoch": 1.6200996032973034, "grad_norm": 0.017356140081950015, "learning_rate": 1.0594141401739478e-06, "loss": 0.0004, "step": 246260 }, { "epoch": 1.6201653914725367, "grad_norm": 0.0009366749498310384, "learning_rate": 1.059060786356076e-06, "loss": 0.0004, "step": 246270 }, { "epoch": 1.62023117964777, "grad_norm": 0.019277263148380484, "learning_rate": 1.0587074844960843e-06, "loss": 0.0002, "step": 246280 }, { "epoch": 1.6202969678230035, "grad_norm": 0.01220066309590003, "learning_rate": 1.0583542345986303e-06, "loss": 0.0002, "step": 246290 }, { "epoch": 1.620362755998237, "grad_norm": 0.004589901240506714, "learning_rate": 1.0580010366683696e-06, "loss": 0.0006, "step": 246300 }, { "epoch": 1.6204285441734703, "grad_norm": 0.04878959332798153, "learning_rate": 1.0576478907099624e-06, "loss": 0.0004, "step": 246310 }, { "epoch": 1.6204943323487035, "grad_norm": 0.025356470562546928, "learning_rate": 1.0572947967280634e-06, "loss": 0.0004, "step": 246320 }, { "epoch": 1.620560120523937, "grad_norm": 0.031577306847488544, "learning_rate": 1.056941754727327e-06, "loss": 0.0009, "step": 246330 }, { "epoch": 1.6206259086991706, "grad_norm": 0.04429016302971839, "learning_rate": 1.056588764712408e-06, "loss": 0.0017, "step": 246340 }, { "epoch": 1.6206916968744038, "grad_norm": 0.05190617674124277, "learning_rate": 1.0562358266879603e-06, "loss": 0.0007, "step": 246350 }, { "epoch": 1.6207574850496371, "grad_norm": 0.02157633726672078, "learning_rate": 1.0558829406586374e-06, "loss": 0.0001, "step": 246360 }, { "epoch": 1.6208232732248704, "grad_norm": 0.02064052639998073, "learning_rate": 1.0555301066290918e-06, "loss": 0.0007, "step": 246370 }, { "epoch": 1.620889061400104, "grad_norm": 0.04442395444834916, "learning_rate": 1.055177324603975e-06, "loss": 0.0003, "step": 246380 }, { "epoch": 1.6209548495753374, "grad_norm": 0.008715815068692729, "learning_rate": 1.0548245945879375e-06, "loss": 0.0006, "step": 246390 }, { "epoch": 1.6210206377505707, "grad_norm": 0.011298776804074687, "learning_rate": 1.054471916585631e-06, "loss": 0.0004, "step": 246400 }, { "epoch": 1.621086425925804, "grad_norm": 0.02813222299266658, "learning_rate": 1.0541192906017046e-06, "loss": 0.0008, "step": 246410 }, { "epoch": 1.6211522141010375, "grad_norm": 0.012962728384228775, "learning_rate": 1.0537667166408077e-06, "loss": 0.0004, "step": 246420 }, { "epoch": 1.621218002276271, "grad_norm": 0.014155370668835373, "learning_rate": 1.0534141947075865e-06, "loss": 0.0004, "step": 246430 }, { "epoch": 1.6212837904515043, "grad_norm": 0.06931437176544548, "learning_rate": 1.0530617248066926e-06, "loss": 0.0004, "step": 246440 }, { "epoch": 1.6213495786267376, "grad_norm": 0.003319715458207046, "learning_rate": 1.0527093069427707e-06, "loss": 0.0004, "step": 246450 }, { "epoch": 1.6214153668019708, "grad_norm": 0.027602777974491757, "learning_rate": 1.0523569411204683e-06, "loss": 0.0003, "step": 246460 }, { "epoch": 1.6214811549772044, "grad_norm": 0.043841528461006946, "learning_rate": 1.052004627344429e-06, "loss": 0.0004, "step": 246470 }, { "epoch": 1.6215469431524379, "grad_norm": 0.03603053161358648, "learning_rate": 1.0516523656192996e-06, "loss": 0.0003, "step": 246480 }, { "epoch": 1.6216127313276711, "grad_norm": 0.027861687033361972, "learning_rate": 1.0513001559497239e-06, "loss": 0.0004, "step": 246490 }, { "epoch": 1.6216785195029044, "grad_norm": 0.03578483658190592, "learning_rate": 1.0509479983403448e-06, "loss": 0.0005, "step": 246500 }, { "epoch": 1.621744307678138, "grad_norm": 0.09546179742604759, "learning_rate": 1.0505958927958066e-06, "loss": 0.0006, "step": 246510 }, { "epoch": 1.6218100958533714, "grad_norm": 0.03634991280018555, "learning_rate": 1.0502438393207498e-06, "loss": 0.0006, "step": 246520 }, { "epoch": 1.6218758840286047, "grad_norm": 0.030873684117289607, "learning_rate": 1.049891837919817e-06, "loss": 0.0006, "step": 246530 }, { "epoch": 1.621941672203838, "grad_norm": 0.04159726689237911, "learning_rate": 1.0495398885976488e-06, "loss": 0.0006, "step": 246540 }, { "epoch": 1.6220074603790715, "grad_norm": 0.005774013577200076, "learning_rate": 1.0491879913588853e-06, "loss": 0.0006, "step": 246550 }, { "epoch": 1.622073248554305, "grad_norm": 0.002663608964251753, "learning_rate": 1.0488361462081648e-06, "loss": 0.0003, "step": 246560 }, { "epoch": 1.6221390367295383, "grad_norm": 0.027568030521075555, "learning_rate": 1.0484843531501282e-06, "loss": 0.0005, "step": 246570 }, { "epoch": 1.6222048249047716, "grad_norm": 0.014731402775482783, "learning_rate": 1.0481326121894132e-06, "loss": 0.0002, "step": 246580 }, { "epoch": 1.6222706130800049, "grad_norm": 0.008865800595636625, "learning_rate": 1.0477809233306563e-06, "loss": 0.001, "step": 246590 }, { "epoch": 1.6223364012552384, "grad_norm": 0.04945910263028411, "learning_rate": 1.0474292865784946e-06, "loss": 0.0008, "step": 246600 }, { "epoch": 1.6224021894304719, "grad_norm": 0.12449741610798028, "learning_rate": 1.0470777019375638e-06, "loss": 0.0008, "step": 246610 }, { "epoch": 1.6224679776057052, "grad_norm": 0.05618342864451099, "learning_rate": 1.0467261694125002e-06, "loss": 0.0005, "step": 246620 }, { "epoch": 1.6225337657809384, "grad_norm": 0.013800174417934949, "learning_rate": 1.0463746890079374e-06, "loss": 0.0012, "step": 246630 }, { "epoch": 1.622599553956172, "grad_norm": 0.019259326702400322, "learning_rate": 1.046023260728511e-06, "loss": 0.0004, "step": 246640 }, { "epoch": 1.6226653421314055, "grad_norm": 0.002489586120348592, "learning_rate": 1.0456718845788516e-06, "loss": 0.0004, "step": 246650 }, { "epoch": 1.6227311303066387, "grad_norm": 0.013810375382552994, "learning_rate": 1.0453205605635924e-06, "loss": 0.0004, "step": 246660 }, { "epoch": 1.622796918481872, "grad_norm": 0.11349218244166259, "learning_rate": 1.044969288687367e-06, "loss": 0.0006, "step": 246670 }, { "epoch": 1.6228627066571053, "grad_norm": 0.04962128107737463, "learning_rate": 1.0446180689548058e-06, "loss": 0.0004, "step": 246680 }, { "epoch": 1.6229284948323388, "grad_norm": 0.027769098471847147, "learning_rate": 1.0442669013705399e-06, "loss": 0.0006, "step": 246690 }, { "epoch": 1.6229942830075723, "grad_norm": 0.15710109385427867, "learning_rate": 1.043915785939198e-06, "loss": 0.0007, "step": 246700 }, { "epoch": 1.6230600711828056, "grad_norm": 0.022515268226171736, "learning_rate": 1.0435647226654094e-06, "loss": 0.0004, "step": 246710 }, { "epoch": 1.6231258593580389, "grad_norm": 0.04683120584705603, "learning_rate": 1.0432137115538032e-06, "loss": 0.0006, "step": 246720 }, { "epoch": 1.6231916475332724, "grad_norm": 0.00021227882487714233, "learning_rate": 1.0428627526090074e-06, "loss": 0.0007, "step": 246730 }, { "epoch": 1.623257435708506, "grad_norm": 0.03196289860068363, "learning_rate": 1.042511845835648e-06, "loss": 0.0004, "step": 246740 }, { "epoch": 1.6233232238837392, "grad_norm": 0.027610713858692987, "learning_rate": 1.0421609912383523e-06, "loss": 0.0015, "step": 246750 }, { "epoch": 1.6233890120589725, "grad_norm": 0.009555191411331479, "learning_rate": 1.0418101888217452e-06, "loss": 0.0002, "step": 246760 }, { "epoch": 1.6234548002342057, "grad_norm": 0.009209876220416625, "learning_rate": 1.041459438590453e-06, "loss": 0.0003, "step": 246770 }, { "epoch": 1.6235205884094392, "grad_norm": 0.06582708594447283, "learning_rate": 1.0411087405490989e-06, "loss": 0.001, "step": 246780 }, { "epoch": 1.6235863765846728, "grad_norm": 0.006291974910973186, "learning_rate": 1.0407580947023056e-06, "loss": 0.0005, "step": 246790 }, { "epoch": 1.623652164759906, "grad_norm": 0.002471604634125893, "learning_rate": 1.0404075010546989e-06, "loss": 0.0003, "step": 246800 }, { "epoch": 1.6237179529351393, "grad_norm": 0.0011699182694781954, "learning_rate": 1.0400569596108995e-06, "loss": 0.0004, "step": 246810 }, { "epoch": 1.6237837411103728, "grad_norm": 0.020710667688298563, "learning_rate": 1.0397064703755288e-06, "loss": 0.0008, "step": 246820 }, { "epoch": 1.6238495292856063, "grad_norm": 0.0006247830974466755, "learning_rate": 1.0393560333532083e-06, "loss": 0.0006, "step": 246830 }, { "epoch": 1.6239153174608396, "grad_norm": 0.03837793624125768, "learning_rate": 1.0390056485485579e-06, "loss": 0.0004, "step": 246840 }, { "epoch": 1.623981105636073, "grad_norm": 0.04332976683505528, "learning_rate": 1.0386553159661966e-06, "loss": 0.0003, "step": 246850 }, { "epoch": 1.6240468938113064, "grad_norm": 0.030224882104728207, "learning_rate": 1.0383050356107438e-06, "loss": 0.0007, "step": 246860 }, { "epoch": 1.6241126819865397, "grad_norm": 0.13228924179807044, "learning_rate": 1.037954807486818e-06, "loss": 0.0006, "step": 246870 }, { "epoch": 1.6241784701617732, "grad_norm": 0.05179659511142469, "learning_rate": 1.0376046315990363e-06, "loss": 0.0004, "step": 246880 }, { "epoch": 1.6242442583370065, "grad_norm": 0.012516227599990989, "learning_rate": 1.0372545079520147e-06, "loss": 0.0018, "step": 246890 }, { "epoch": 1.6243100465122398, "grad_norm": 0.03793291198315318, "learning_rate": 1.0369044365503706e-06, "loss": 0.0002, "step": 246900 }, { "epoch": 1.6243758346874733, "grad_norm": 0.029992188767855987, "learning_rate": 1.0365544173987186e-06, "loss": 0.0003, "step": 246910 }, { "epoch": 1.6244416228627068, "grad_norm": 0.01933636991725104, "learning_rate": 1.0362044505016715e-06, "loss": 0.0002, "step": 246920 }, { "epoch": 1.62450741103794, "grad_norm": 0.004104202179682877, "learning_rate": 1.0358545358638479e-06, "loss": 0.0003, "step": 246930 }, { "epoch": 1.6245731992131733, "grad_norm": 0.015193134885064671, "learning_rate": 1.0355046734898578e-06, "loss": 0.0006, "step": 246940 }, { "epoch": 1.6246389873884068, "grad_norm": 0.030890356547361563, "learning_rate": 1.0351548633843144e-06, "loss": 0.0004, "step": 246950 }, { "epoch": 1.6247047755636403, "grad_norm": 0.008991495258037335, "learning_rate": 1.0348051055518305e-06, "loss": 0.0002, "step": 246960 }, { "epoch": 1.6247705637388736, "grad_norm": 0.0025286894880757736, "learning_rate": 1.0344553999970163e-06, "loss": 0.0002, "step": 246970 }, { "epoch": 1.624836351914107, "grad_norm": 0.010194858935076979, "learning_rate": 1.034105746724483e-06, "loss": 0.0004, "step": 246980 }, { "epoch": 1.6249021400893402, "grad_norm": 0.07140816171399236, "learning_rate": 1.0337561457388406e-06, "loss": 0.0005, "step": 246990 }, { "epoch": 1.6249679282645737, "grad_norm": 0.0010784486139492483, "learning_rate": 1.0334065970446977e-06, "loss": 0.0004, "step": 247000 }, { "epoch": 1.6250337164398072, "grad_norm": 0.004911754756599754, "learning_rate": 1.0330571006466628e-06, "loss": 0.0005, "step": 247010 }, { "epoch": 1.6250995046150405, "grad_norm": 0.04339083401137367, "learning_rate": 1.0327076565493438e-06, "loss": 0.0003, "step": 247020 }, { "epoch": 1.6251652927902738, "grad_norm": 0.01950188729722921, "learning_rate": 1.0323582647573483e-06, "loss": 0.0004, "step": 247030 }, { "epoch": 1.6252310809655073, "grad_norm": 0.033204649533541374, "learning_rate": 1.0320089252752824e-06, "loss": 0.0007, "step": 247040 }, { "epoch": 1.6252968691407408, "grad_norm": 0.04204249543332793, "learning_rate": 1.0316596381077503e-06, "loss": 0.0005, "step": 247050 }, { "epoch": 1.625362657315974, "grad_norm": 0.03602873228778683, "learning_rate": 1.0313104032593601e-06, "loss": 0.0009, "step": 247060 }, { "epoch": 1.6254284454912074, "grad_norm": 0.03570051450989996, "learning_rate": 1.0309612207347148e-06, "loss": 0.0004, "step": 247070 }, { "epoch": 1.6254942336664406, "grad_norm": 0.022699576166273768, "learning_rate": 1.0306120905384175e-06, "loss": 0.0003, "step": 247080 }, { "epoch": 1.6255600218416741, "grad_norm": 0.1572288010337517, "learning_rate": 1.0302630126750717e-06, "loss": 0.0007, "step": 247090 }, { "epoch": 1.6256258100169076, "grad_norm": 0.07821806773571933, "learning_rate": 1.0299139871492803e-06, "loss": 0.0004, "step": 247100 }, { "epoch": 1.625691598192141, "grad_norm": 0.010002894335535587, "learning_rate": 1.0295650139656433e-06, "loss": 0.0004, "step": 247110 }, { "epoch": 1.6257573863673742, "grad_norm": 4.969486601934617e-05, "learning_rate": 1.029216093128763e-06, "loss": 0.0005, "step": 247120 }, { "epoch": 1.6258231745426077, "grad_norm": 0.004832855364460624, "learning_rate": 1.028867224643239e-06, "loss": 0.0002, "step": 247130 }, { "epoch": 1.6258889627178412, "grad_norm": 0.039043262182955375, "learning_rate": 1.0285184085136707e-06, "loss": 0.0004, "step": 247140 }, { "epoch": 1.6259547508930745, "grad_norm": 0.007250665029374195, "learning_rate": 1.0281696447446575e-06, "loss": 0.0005, "step": 247150 }, { "epoch": 1.6260205390683078, "grad_norm": 0.06635385978998061, "learning_rate": 1.0278209333407969e-06, "loss": 0.0006, "step": 247160 }, { "epoch": 1.6260863272435413, "grad_norm": 0.0156610942189287, "learning_rate": 1.0274722743066868e-06, "loss": 0.0005, "step": 247170 }, { "epoch": 1.6261521154187746, "grad_norm": 0.20775281658669761, "learning_rate": 1.0271236676469225e-06, "loss": 0.0011, "step": 247180 }, { "epoch": 1.626217903594008, "grad_norm": 0.005925869979941752, "learning_rate": 1.026775113366103e-06, "loss": 0.0005, "step": 247190 }, { "epoch": 1.6262836917692414, "grad_norm": 0.05465838818868347, "learning_rate": 1.026426611468822e-06, "loss": 0.0004, "step": 247200 }, { "epoch": 1.6263494799444747, "grad_norm": 0.018715485874828235, "learning_rate": 1.0260781619596744e-06, "loss": 0.0005, "step": 247210 }, { "epoch": 1.6264152681197082, "grad_norm": 0.01866031297432727, "learning_rate": 1.025729764843254e-06, "loss": 0.0004, "step": 247220 }, { "epoch": 1.6264810562949417, "grad_norm": 0.032685947737581544, "learning_rate": 1.0253814201241546e-06, "loss": 0.0003, "step": 247230 }, { "epoch": 1.626546844470175, "grad_norm": 0.09020625981460532, "learning_rate": 1.025033127806968e-06, "loss": 0.001, "step": 247240 }, { "epoch": 1.6266126326454082, "grad_norm": 0.03953948901273782, "learning_rate": 1.0246848878962873e-06, "loss": 0.0004, "step": 247250 }, { "epoch": 1.6266784208206417, "grad_norm": 0.006085209597547259, "learning_rate": 1.0243367003967026e-06, "loss": 0.0004, "step": 247260 }, { "epoch": 1.6267442089958752, "grad_norm": 0.037181876423353494, "learning_rate": 1.0239885653128046e-06, "loss": 0.0005, "step": 247270 }, { "epoch": 1.6268099971711085, "grad_norm": 0.009269781017854584, "learning_rate": 1.0236404826491835e-06, "loss": 0.0007, "step": 247280 }, { "epoch": 1.6268757853463418, "grad_norm": 0.03897057388600874, "learning_rate": 1.0232924524104287e-06, "loss": 0.0007, "step": 247290 }, { "epoch": 1.626941573521575, "grad_norm": 0.00011621387832873626, "learning_rate": 1.0229444746011286e-06, "loss": 0.0003, "step": 247300 }, { "epoch": 1.6270073616968086, "grad_norm": 0.05889471932487629, "learning_rate": 1.0225965492258688e-06, "loss": 0.0015, "step": 247310 }, { "epoch": 1.627073149872042, "grad_norm": 0.011338537112586634, "learning_rate": 1.0222486762892403e-06, "loss": 0.0003, "step": 247320 }, { "epoch": 1.6271389380472754, "grad_norm": 0.03244402002442016, "learning_rate": 1.0219008557958276e-06, "loss": 0.0003, "step": 247330 }, { "epoch": 1.6272047262225087, "grad_norm": 0.014630927371136774, "learning_rate": 1.021553087750216e-06, "loss": 0.0005, "step": 247340 }, { "epoch": 1.6272705143977422, "grad_norm": 0.02564506325289028, "learning_rate": 1.0212053721569915e-06, "loss": 0.0007, "step": 247350 }, { "epoch": 1.6273363025729757, "grad_norm": 0.01036892993513461, "learning_rate": 1.0208577090207373e-06, "loss": 0.0003, "step": 247360 }, { "epoch": 1.627402090748209, "grad_norm": 0.01869225366320218, "learning_rate": 1.0205100983460375e-06, "loss": 0.0005, "step": 247370 }, { "epoch": 1.6274678789234422, "grad_norm": 0.03451975263189334, "learning_rate": 1.0201625401374754e-06, "loss": 0.001, "step": 247380 }, { "epoch": 1.6275336670986755, "grad_norm": 0.0002813498696870994, "learning_rate": 1.0198150343996332e-06, "loss": 0.0003, "step": 247390 }, { "epoch": 1.627599455273909, "grad_norm": 0.019581836597316906, "learning_rate": 1.0194675811370925e-06, "loss": 0.0002, "step": 247400 }, { "epoch": 1.6276652434491425, "grad_norm": 0.023750802143546076, "learning_rate": 1.0191201803544332e-06, "loss": 0.0011, "step": 247410 }, { "epoch": 1.6277310316243758, "grad_norm": 0.00040481646002849, "learning_rate": 1.0187728320562369e-06, "loss": 0.0004, "step": 247420 }, { "epoch": 1.627796819799609, "grad_norm": 0.005497656451275703, "learning_rate": 1.0184255362470818e-06, "loss": 0.0003, "step": 247430 }, { "epoch": 1.6278626079748426, "grad_norm": 0.02385414985630363, "learning_rate": 1.0180782929315457e-06, "loss": 0.0004, "step": 247440 }, { "epoch": 1.6279283961500761, "grad_norm": 0.0018668473433211467, "learning_rate": 1.0177311021142105e-06, "loss": 0.0005, "step": 247450 }, { "epoch": 1.6279941843253094, "grad_norm": 0.023169481860442192, "learning_rate": 1.0173839637996507e-06, "loss": 0.0009, "step": 247460 }, { "epoch": 1.6280599725005427, "grad_norm": 0.014428117564199193, "learning_rate": 1.0170368779924444e-06, "loss": 0.0006, "step": 247470 }, { "epoch": 1.6281257606757762, "grad_norm": 0.008246573560945972, "learning_rate": 1.0166898446971667e-06, "loss": 0.0006, "step": 247480 }, { "epoch": 1.6281915488510095, "grad_norm": 0.014727735959124095, "learning_rate": 1.016342863918393e-06, "loss": 0.0008, "step": 247490 }, { "epoch": 1.628257337026243, "grad_norm": 0.004531116836744565, "learning_rate": 1.015995935660698e-06, "loss": 0.0004, "step": 247500 }, { "epoch": 1.6283231252014763, "grad_norm": 0.04093123785148517, "learning_rate": 1.015649059928656e-06, "loss": 0.0006, "step": 247510 }, { "epoch": 1.6283889133767095, "grad_norm": 0.006156659060338689, "learning_rate": 1.01530223672684e-06, "loss": 0.0012, "step": 247520 }, { "epoch": 1.628454701551943, "grad_norm": 0.046504065567097914, "learning_rate": 1.0149554660598226e-06, "loss": 0.0007, "step": 247530 }, { "epoch": 1.6285204897271766, "grad_norm": 0.08516194423732795, "learning_rate": 1.014608747932176e-06, "loss": 0.0006, "step": 247540 }, { "epoch": 1.6285862779024098, "grad_norm": 0.023035556165474265, "learning_rate": 1.0142620823484705e-06, "loss": 0.0003, "step": 247550 }, { "epoch": 1.6286520660776431, "grad_norm": 0.034847307242292964, "learning_rate": 1.0139154693132775e-06, "loss": 0.0007, "step": 247560 }, { "epoch": 1.6287178542528766, "grad_norm": 0.003526242626844297, "learning_rate": 1.013568908831165e-06, "loss": 0.0005, "step": 247570 }, { "epoch": 1.6287836424281101, "grad_norm": 0.05699818538878563, "learning_rate": 1.013222400906705e-06, "loss": 0.0006, "step": 247580 }, { "epoch": 1.6288494306033434, "grad_norm": 0.05389097574858917, "learning_rate": 1.0128759455444643e-06, "loss": 0.0009, "step": 247590 }, { "epoch": 1.6289152187785767, "grad_norm": 0.04840342332812046, "learning_rate": 1.0125295427490105e-06, "loss": 0.0006, "step": 247600 }, { "epoch": 1.62898100695381, "grad_norm": 0.059296614163801525, "learning_rate": 1.012183192524911e-06, "loss": 0.0007, "step": 247610 }, { "epoch": 1.6290467951290435, "grad_norm": 0.008998202957728034, "learning_rate": 1.011836894876732e-06, "loss": 0.0002, "step": 247620 }, { "epoch": 1.629112583304277, "grad_norm": 0.0779230146493149, "learning_rate": 1.0114906498090393e-06, "loss": 0.001, "step": 247630 }, { "epoch": 1.6291783714795103, "grad_norm": 0.0035133773013346936, "learning_rate": 1.011144457326398e-06, "loss": 0.0001, "step": 247640 }, { "epoch": 1.6292441596547436, "grad_norm": 0.016178189924243618, "learning_rate": 1.0107983174333713e-06, "loss": 0.0005, "step": 247650 }, { "epoch": 1.629309947829977, "grad_norm": 0.019193160151954835, "learning_rate": 1.0104522301345237e-06, "loss": 0.0005, "step": 247660 }, { "epoch": 1.6293757360052106, "grad_norm": 0.005743063226655229, "learning_rate": 1.0101061954344182e-06, "loss": 0.0002, "step": 247670 }, { "epoch": 1.6294415241804439, "grad_norm": 0.0333866607712962, "learning_rate": 1.0097602133376166e-06, "loss": 0.0009, "step": 247680 }, { "epoch": 1.6295073123556771, "grad_norm": 0.009117853094396477, "learning_rate": 1.0094142838486799e-06, "loss": 0.0002, "step": 247690 }, { "epoch": 1.6295731005309104, "grad_norm": 0.04383422139402393, "learning_rate": 1.0090684069721685e-06, "loss": 0.0006, "step": 247700 }, { "epoch": 1.629638888706144, "grad_norm": 0.0008648155403930806, "learning_rate": 1.0087225827126445e-06, "loss": 0.0007, "step": 247710 }, { "epoch": 1.6297046768813774, "grad_norm": 0.019200598194459854, "learning_rate": 1.0083768110746662e-06, "loss": 0.0005, "step": 247720 }, { "epoch": 1.6297704650566107, "grad_norm": 0.032679487972879236, "learning_rate": 1.0080310920627923e-06, "loss": 0.0006, "step": 247730 }, { "epoch": 1.629836253231844, "grad_norm": 0.059706500940542535, "learning_rate": 1.0076854256815804e-06, "loss": 0.0004, "step": 247740 }, { "epoch": 1.6299020414070775, "grad_norm": 0.037432850107142215, "learning_rate": 1.0073398119355887e-06, "loss": 0.0001, "step": 247750 }, { "epoch": 1.629967829582311, "grad_norm": 0.016376948256501506, "learning_rate": 1.006994250829373e-06, "loss": 0.0003, "step": 247760 }, { "epoch": 1.6300336177575443, "grad_norm": 0.03222572972867462, "learning_rate": 1.0066487423674897e-06, "loss": 0.0005, "step": 247770 }, { "epoch": 1.6300994059327776, "grad_norm": 0.02206165287320763, "learning_rate": 1.0063032865544937e-06, "loss": 0.0004, "step": 247780 }, { "epoch": 1.6301651941080109, "grad_norm": 0.011676305707914213, "learning_rate": 1.0059578833949397e-06, "loss": 0.0004, "step": 247790 }, { "epoch": 1.6302309822832444, "grad_norm": 0.0022699821418081293, "learning_rate": 1.0056125328933814e-06, "loss": 0.0007, "step": 247800 }, { "epoch": 1.6302967704584779, "grad_norm": 0.033887668143629264, "learning_rate": 1.0052672350543725e-06, "loss": 0.0003, "step": 247810 }, { "epoch": 1.6303625586337112, "grad_norm": 0.005939957566223204, "learning_rate": 1.004921989882464e-06, "loss": 0.0003, "step": 247820 }, { "epoch": 1.6304283468089444, "grad_norm": 0.004634119387564165, "learning_rate": 1.0045767973822085e-06, "loss": 0.001, "step": 247830 }, { "epoch": 1.630494134984178, "grad_norm": 0.0440453941664299, "learning_rate": 1.0042316575581585e-06, "loss": 0.0005, "step": 247840 }, { "epoch": 1.6305599231594115, "grad_norm": 0.04643243970832728, "learning_rate": 1.0038865704148626e-06, "loss": 0.0005, "step": 247850 }, { "epoch": 1.6306257113346447, "grad_norm": 0.20250863225397275, "learning_rate": 1.003541535956871e-06, "loss": 0.0008, "step": 247860 }, { "epoch": 1.630691499509878, "grad_norm": 0.11432915708442373, "learning_rate": 1.0031965541887329e-06, "loss": 0.0007, "step": 247870 }, { "epoch": 1.6307572876851115, "grad_norm": 0.03173059525456151, "learning_rate": 1.0028516251149967e-06, "loss": 0.0005, "step": 247880 }, { "epoch": 1.6308230758603448, "grad_norm": 0.0006575364683737413, "learning_rate": 1.002506748740209e-06, "loss": 0.0006, "step": 247890 }, { "epoch": 1.6308888640355783, "grad_norm": 0.001599959481553508, "learning_rate": 1.0021619250689173e-06, "loss": 0.0003, "step": 247900 }, { "epoch": 1.6309546522108116, "grad_norm": 0.04752179736540444, "learning_rate": 1.0018171541056687e-06, "loss": 0.0003, "step": 247910 }, { "epoch": 1.6310204403860449, "grad_norm": 0.060847397812049904, "learning_rate": 1.001472435855007e-06, "loss": 0.0006, "step": 247920 }, { "epoch": 1.6310862285612784, "grad_norm": 0.004170799543069545, "learning_rate": 1.0011277703214784e-06, "loss": 0.0005, "step": 247930 }, { "epoch": 1.631152016736512, "grad_norm": 0.009984532944391803, "learning_rate": 1.0007831575096267e-06, "loss": 0.0002, "step": 247940 }, { "epoch": 1.6312178049117452, "grad_norm": 0.013961370400212164, "learning_rate": 1.0004385974239943e-06, "loss": 0.0003, "step": 247950 }, { "epoch": 1.6312835930869785, "grad_norm": 0.023484540102129102, "learning_rate": 1.0000940900691242e-06, "loss": 0.0003, "step": 247960 }, { "epoch": 1.631349381262212, "grad_norm": 0.043533811367337485, "learning_rate": 9.9974963544956e-07, "loss": 0.0004, "step": 247970 }, { "epoch": 1.6314151694374455, "grad_norm": 0.016013369632489706, "learning_rate": 9.994052335698424e-07, "loss": 0.0005, "step": 247980 }, { "epoch": 1.6314809576126788, "grad_norm": 0.020447880960674077, "learning_rate": 9.990608844345106e-07, "loss": 0.0004, "step": 247990 }, { "epoch": 1.631546745787912, "grad_norm": 0.013249737727239969, "learning_rate": 9.987165880481065e-07, "loss": 0.0004, "step": 248000 }, { "epoch": 1.6316125339631453, "grad_norm": 0.00045160721748195863, "learning_rate": 9.983723444151682e-07, "loss": 0.0001, "step": 248010 }, { "epoch": 1.6316783221383788, "grad_norm": 0.038364114993848904, "learning_rate": 9.980281535402348e-07, "loss": 0.0008, "step": 248020 }, { "epoch": 1.6317441103136123, "grad_norm": 0.0423889612766635, "learning_rate": 9.976840154278433e-07, "loss": 0.0011, "step": 248030 }, { "epoch": 1.6318098984888456, "grad_norm": 0.019465840852440394, "learning_rate": 9.97339930082532e-07, "loss": 0.0007, "step": 248040 }, { "epoch": 1.631875686664079, "grad_norm": 0.0007854844284381592, "learning_rate": 9.969958975088362e-07, "loss": 0.0004, "step": 248050 }, { "epoch": 1.6319414748393124, "grad_norm": 0.034936333687024125, "learning_rate": 9.966519177112926e-07, "loss": 0.0002, "step": 248060 }, { "epoch": 1.632007263014546, "grad_norm": 0.0360272862873421, "learning_rate": 9.963079906944362e-07, "loss": 0.0006, "step": 248070 }, { "epoch": 1.6320730511897792, "grad_norm": 0.0003295950918974906, "learning_rate": 9.95964116462801e-07, "loss": 0.0006, "step": 248080 }, { "epoch": 1.6321388393650125, "grad_norm": 0.01625890164773989, "learning_rate": 9.956202950209193e-07, "loss": 0.0005, "step": 248090 }, { "epoch": 1.6322046275402458, "grad_norm": 0.04294012068948663, "learning_rate": 9.952765263733272e-07, "loss": 0.0005, "step": 248100 }, { "epoch": 1.6322704157154793, "grad_norm": 0.016165366585997196, "learning_rate": 9.949328105245548e-07, "loss": 0.0001, "step": 248110 }, { "epoch": 1.6323362038907128, "grad_norm": 0.012257585702916409, "learning_rate": 9.945891474791347e-07, "loss": 0.0003, "step": 248120 }, { "epoch": 1.632401992065946, "grad_norm": 0.032254414267635846, "learning_rate": 9.942455372415976e-07, "loss": 0.0004, "step": 248130 }, { "epoch": 1.6324677802411793, "grad_norm": 0.004804574202922978, "learning_rate": 9.939019798164729e-07, "loss": 0.0003, "step": 248140 }, { "epoch": 1.6325335684164128, "grad_norm": 0.008648699560360493, "learning_rate": 9.93558475208291e-07, "loss": 0.0003, "step": 248150 }, { "epoch": 1.6325993565916463, "grad_norm": 0.001851787979236752, "learning_rate": 9.932150234215808e-07, "loss": 0.0006, "step": 248160 }, { "epoch": 1.6326651447668796, "grad_norm": 0.01646811156991525, "learning_rate": 9.9287162446087e-07, "loss": 0.0003, "step": 248170 }, { "epoch": 1.632730932942113, "grad_norm": 0.03273600998265305, "learning_rate": 9.925282783306856e-07, "loss": 0.0018, "step": 248180 }, { "epoch": 1.6327967211173464, "grad_norm": 0.048562099586358896, "learning_rate": 9.921849850355547e-07, "loss": 0.0005, "step": 248190 }, { "epoch": 1.6328625092925797, "grad_norm": 0.020635582915313234, "learning_rate": 9.91841744580004e-07, "loss": 0.0004, "step": 248200 }, { "epoch": 1.6329282974678132, "grad_norm": 0.08240611014711546, "learning_rate": 9.914985569685576e-07, "loss": 0.0007, "step": 248210 }, { "epoch": 1.6329940856430465, "grad_norm": 0.04078333357408262, "learning_rate": 9.911554222057396e-07, "loss": 0.0002, "step": 248220 }, { "epoch": 1.6330598738182798, "grad_norm": 0.0066344913168447485, "learning_rate": 9.908123402960767e-07, "loss": 0.0011, "step": 248230 }, { "epoch": 1.6331256619935133, "grad_norm": 0.00044596631456152126, "learning_rate": 9.9046931124409e-07, "loss": 0.0005, "step": 248240 }, { "epoch": 1.6331914501687468, "grad_norm": 0.0005302199700643926, "learning_rate": 9.901263350543033e-07, "loss": 0.0004, "step": 248250 }, { "epoch": 1.63325723834398, "grad_norm": 0.0018999536572844539, "learning_rate": 9.89783411731237e-07, "loss": 0.0002, "step": 248260 }, { "epoch": 1.6333230265192134, "grad_norm": 0.009962850130364591, "learning_rate": 9.894405412794133e-07, "loss": 0.0003, "step": 248270 }, { "epoch": 1.6333888146944469, "grad_norm": 0.0045371110814253005, "learning_rate": 9.890977237033523e-07, "loss": 0.0007, "step": 248280 }, { "epoch": 1.6334546028696804, "grad_norm": 0.023506416972513475, "learning_rate": 9.887549590075741e-07, "loss": 0.0003, "step": 248290 }, { "epoch": 1.6335203910449136, "grad_norm": 0.009748811707950732, "learning_rate": 9.884122471965968e-07, "loss": 0.0004, "step": 248300 }, { "epoch": 1.633586179220147, "grad_norm": 0.0467159965289082, "learning_rate": 9.880695882749396e-07, "loss": 0.0005, "step": 248310 }, { "epoch": 1.6336519673953802, "grad_norm": 0.03938804870341262, "learning_rate": 9.877269822471203e-07, "loss": 0.0006, "step": 248320 }, { "epoch": 1.6337177555706137, "grad_norm": 0.0820099086361825, "learning_rate": 9.873844291176554e-07, "loss": 0.0004, "step": 248330 }, { "epoch": 1.6337835437458472, "grad_norm": 0.02666715651777826, "learning_rate": 9.87041928891061e-07, "loss": 0.0004, "step": 248340 }, { "epoch": 1.6338493319210805, "grad_norm": 0.007349170298590035, "learning_rate": 9.866994815718518e-07, "loss": 0.0005, "step": 248350 }, { "epoch": 1.6339151200963138, "grad_norm": 0.04604137531611176, "learning_rate": 9.863570871645451e-07, "loss": 0.0004, "step": 248360 }, { "epoch": 1.6339809082715473, "grad_norm": 0.00018942815104724348, "learning_rate": 9.860147456736542e-07, "loss": 0.0008, "step": 248370 }, { "epoch": 1.6340466964467808, "grad_norm": 0.004529130939636853, "learning_rate": 9.856724571036913e-07, "loss": 0.0006, "step": 248380 }, { "epoch": 1.634112484622014, "grad_norm": 0.08660289856447732, "learning_rate": 9.853302214591708e-07, "loss": 0.0012, "step": 248390 }, { "epoch": 1.6341782727972474, "grad_norm": 0.03188199309917326, "learning_rate": 9.849880387446037e-07, "loss": 0.0005, "step": 248400 }, { "epoch": 1.6342440609724806, "grad_norm": 0.0022481852628144275, "learning_rate": 9.846459089645015e-07, "loss": 0.0004, "step": 248410 }, { "epoch": 1.6343098491477142, "grad_norm": 0.02824735312148634, "learning_rate": 9.843038321233756e-07, "loss": 0.0007, "step": 248420 }, { "epoch": 1.6343756373229477, "grad_norm": 0.02307602162171285, "learning_rate": 9.83961808225735e-07, "loss": 0.0003, "step": 248430 }, { "epoch": 1.634441425498181, "grad_norm": 0.060842365942316005, "learning_rate": 9.836198372760902e-07, "loss": 0.0004, "step": 248440 }, { "epoch": 1.6345072136734142, "grad_norm": 0.0033689365301639703, "learning_rate": 9.832779192789483e-07, "loss": 0.0003, "step": 248450 }, { "epoch": 1.6345730018486477, "grad_norm": 0.009916373112918153, "learning_rate": 9.829360542388183e-07, "loss": 0.0003, "step": 248460 }, { "epoch": 1.6346387900238812, "grad_norm": 0.062320436464958794, "learning_rate": 9.825942421602076e-07, "loss": 0.0008, "step": 248470 }, { "epoch": 1.6347045781991145, "grad_norm": 0.002640484455161813, "learning_rate": 9.8225248304762e-07, "loss": 0.0003, "step": 248480 }, { "epoch": 1.6347703663743478, "grad_norm": 0.1308587522760147, "learning_rate": 9.819107769055652e-07, "loss": 0.0008, "step": 248490 }, { "epoch": 1.6348361545495813, "grad_norm": 0.0019729701704036943, "learning_rate": 9.81569123738546e-07, "loss": 0.0009, "step": 248500 }, { "epoch": 1.6349019427248146, "grad_norm": 0.021979741770099388, "learning_rate": 9.812275235510682e-07, "loss": 0.0007, "step": 248510 }, { "epoch": 1.634967730900048, "grad_norm": 0.04193145622083112, "learning_rate": 9.808859763476341e-07, "loss": 0.0006, "step": 248520 }, { "epoch": 1.6350335190752814, "grad_norm": 0.005577760714286318, "learning_rate": 9.805444821327487e-07, "loss": 0.0005, "step": 248530 }, { "epoch": 1.6350993072505147, "grad_norm": 0.04460663450636602, "learning_rate": 9.802030409109115e-07, "loss": 0.0002, "step": 248540 }, { "epoch": 1.6351650954257482, "grad_norm": 0.013226699990709662, "learning_rate": 9.798616526866256e-07, "loss": 0.0004, "step": 248550 }, { "epoch": 1.6352308836009817, "grad_norm": 0.031090587174589564, "learning_rate": 9.79520317464392e-07, "loss": 0.0004, "step": 248560 }, { "epoch": 1.635296671776215, "grad_norm": 0.07804533893207548, "learning_rate": 9.791790352487106e-07, "loss": 0.0006, "step": 248570 }, { "epoch": 1.6353624599514482, "grad_norm": 0.015623388500426544, "learning_rate": 9.788378060440796e-07, "loss": 0.0007, "step": 248580 }, { "epoch": 1.6354282481266817, "grad_norm": 0.08175790236095686, "learning_rate": 9.784966298550003e-07, "loss": 0.0005, "step": 248590 }, { "epoch": 1.6354940363019153, "grad_norm": 0.025454835646081583, "learning_rate": 9.7815550668597e-07, "loss": 0.0007, "step": 248600 }, { "epoch": 1.6355598244771485, "grad_norm": 0.0139066075965089, "learning_rate": 9.77814436541486e-07, "loss": 0.0009, "step": 248610 }, { "epoch": 1.6356256126523818, "grad_norm": 0.01344913889852954, "learning_rate": 9.774734194260444e-07, "loss": 0.0002, "step": 248620 }, { "epoch": 1.635691400827615, "grad_norm": 0.018590672907191783, "learning_rate": 9.771324553441419e-07, "loss": 0.0003, "step": 248630 }, { "epoch": 1.6357571890028486, "grad_norm": 0.012655883129681456, "learning_rate": 9.76791544300274e-07, "loss": 0.0007, "step": 248640 }, { "epoch": 1.6358229771780821, "grad_norm": 0.01278185248165325, "learning_rate": 9.764506862989342e-07, "loss": 0.0005, "step": 248650 }, { "epoch": 1.6358887653533154, "grad_norm": 0.04277134637080449, "learning_rate": 9.761098813446173e-07, "loss": 0.0004, "step": 248660 }, { "epoch": 1.6359545535285487, "grad_norm": 0.005841740295503679, "learning_rate": 9.75769129441817e-07, "loss": 0.0004, "step": 248670 }, { "epoch": 1.6360203417037822, "grad_norm": 0.10072338858424558, "learning_rate": 9.754284305950246e-07, "loss": 0.0005, "step": 248680 }, { "epoch": 1.6360861298790157, "grad_norm": 0.0015202451158485724, "learning_rate": 9.750877848087326e-07, "loss": 0.0004, "step": 248690 }, { "epoch": 1.636151918054249, "grad_norm": 0.006941318215615071, "learning_rate": 9.747471920874325e-07, "loss": 0.0012, "step": 248700 }, { "epoch": 1.6362177062294823, "grad_norm": 0.024687507712634497, "learning_rate": 9.744066524356122e-07, "loss": 0.0007, "step": 248710 }, { "epoch": 1.6362834944047155, "grad_norm": 0.07922392058921299, "learning_rate": 9.74066165857765e-07, "loss": 0.0006, "step": 248720 }, { "epoch": 1.636349282579949, "grad_norm": 0.018454494279829668, "learning_rate": 9.737257323583782e-07, "loss": 0.0003, "step": 248730 }, { "epoch": 1.6364150707551826, "grad_norm": 0.0035408229588738795, "learning_rate": 9.733853519419406e-07, "loss": 0.0004, "step": 248740 }, { "epoch": 1.6364808589304158, "grad_norm": 0.0015236620018097855, "learning_rate": 9.730450246129396e-07, "loss": 0.0023, "step": 248750 }, { "epoch": 1.6365466471056491, "grad_norm": 0.030534534572077696, "learning_rate": 9.727047503758619e-07, "loss": 0.0004, "step": 248760 }, { "epoch": 1.6366124352808826, "grad_norm": 0.0005960425966418173, "learning_rate": 9.723645292351935e-07, "loss": 0.0005, "step": 248770 }, { "epoch": 1.6366782234561161, "grad_norm": 0.0013326166044071502, "learning_rate": 9.72024361195421e-07, "loss": 0.0006, "step": 248780 }, { "epoch": 1.6367440116313494, "grad_norm": 0.027421884981692565, "learning_rate": 9.716842462610282e-07, "loss": 0.0003, "step": 248790 }, { "epoch": 1.6368097998065827, "grad_norm": 0.04666232325558473, "learning_rate": 9.713441844364997e-07, "loss": 0.0003, "step": 248800 }, { "epoch": 1.636875587981816, "grad_norm": 0.23621283036739574, "learning_rate": 9.710041757263184e-07, "loss": 0.0006, "step": 248810 }, { "epoch": 1.6369413761570495, "grad_norm": 0.01953507622071819, "learning_rate": 9.706642201349676e-07, "loss": 0.0008, "step": 248820 }, { "epoch": 1.637007164332283, "grad_norm": 0.02217153790662885, "learning_rate": 9.703243176669286e-07, "loss": 0.0003, "step": 248830 }, { "epoch": 1.6370729525075163, "grad_norm": 0.0014359280878443463, "learning_rate": 9.699844683266824e-07, "loss": 0.0003, "step": 248840 }, { "epoch": 1.6371387406827496, "grad_norm": 0.001106569214620047, "learning_rate": 9.69644672118712e-07, "loss": 0.0003, "step": 248850 }, { "epoch": 1.637204528857983, "grad_norm": 0.00030211980792465496, "learning_rate": 9.693049290474954e-07, "loss": 0.0004, "step": 248860 }, { "epoch": 1.6372703170332166, "grad_norm": 0.0021434804151576844, "learning_rate": 9.68965239117512e-07, "loss": 0.0005, "step": 248870 }, { "epoch": 1.6373361052084499, "grad_norm": 0.027497310731622943, "learning_rate": 9.686256023332408e-07, "loss": 0.0004, "step": 248880 }, { "epoch": 1.6374018933836831, "grad_norm": 0.0015391555843493724, "learning_rate": 9.682860186991588e-07, "loss": 0.0005, "step": 248890 }, { "epoch": 1.6374676815589166, "grad_norm": 0.00026796747995817664, "learning_rate": 9.679464882197442e-07, "loss": 0.0005, "step": 248900 }, { "epoch": 1.6375334697341501, "grad_norm": 0.010101856901994364, "learning_rate": 9.676070108994728e-07, "loss": 0.0006, "step": 248910 }, { "epoch": 1.6375992579093834, "grad_norm": 0.0011074128173468895, "learning_rate": 9.6726758674282e-07, "loss": 0.0002, "step": 248920 }, { "epoch": 1.6376650460846167, "grad_norm": 0.0005654954008036563, "learning_rate": 9.669282157542614e-07, "loss": 0.0002, "step": 248930 }, { "epoch": 1.63773083425985, "grad_norm": 0.010384152004300099, "learning_rate": 9.665888979382716e-07, "loss": 0.0001, "step": 248940 }, { "epoch": 1.6377966224350835, "grad_norm": 0.01602963296813283, "learning_rate": 9.662496332993227e-07, "loss": 0.0004, "step": 248950 }, { "epoch": 1.637862410610317, "grad_norm": 0.02444291394596225, "learning_rate": 9.65910421841889e-07, "loss": 0.001, "step": 248960 }, { "epoch": 1.6379281987855503, "grad_norm": 0.019051408685605595, "learning_rate": 9.65571263570441e-07, "loss": 0.0008, "step": 248970 }, { "epoch": 1.6379939869607836, "grad_norm": 0.04164794509118663, "learning_rate": 9.65232158489453e-07, "loss": 0.0007, "step": 248980 }, { "epoch": 1.638059775136017, "grad_norm": 0.01442224568145495, "learning_rate": 9.648931066033946e-07, "loss": 0.0005, "step": 248990 }, { "epoch": 1.6381255633112506, "grad_norm": 0.01038061193203081, "learning_rate": 9.645541079167354e-07, "loss": 0.0002, "step": 249000 }, { "epoch": 1.6381913514864839, "grad_norm": 0.006731827945637979, "learning_rate": 9.642151624339447e-07, "loss": 0.0002, "step": 249010 }, { "epoch": 1.6382571396617172, "grad_norm": 0.007273369405313283, "learning_rate": 9.638762701594917e-07, "loss": 0.0002, "step": 249020 }, { "epoch": 1.6383229278369504, "grad_norm": 0.04427003805447508, "learning_rate": 9.635374310978445e-07, "loss": 0.0008, "step": 249030 }, { "epoch": 1.638388716012184, "grad_norm": 0.043575456656389365, "learning_rate": 9.631986452534704e-07, "loss": 0.0006, "step": 249040 }, { "epoch": 1.6384545041874174, "grad_norm": 0.028342807680082266, "learning_rate": 9.628599126308352e-07, "loss": 0.0003, "step": 249050 }, { "epoch": 1.6385202923626507, "grad_norm": 0.03619057187462606, "learning_rate": 9.625212332344053e-07, "loss": 0.0005, "step": 249060 }, { "epoch": 1.638586080537884, "grad_norm": 0.0014995335649143733, "learning_rate": 9.621826070686463e-07, "loss": 0.0003, "step": 249070 }, { "epoch": 1.6386518687131175, "grad_norm": 0.01323088701818807, "learning_rate": 9.618440341380224e-07, "loss": 0.0004, "step": 249080 }, { "epoch": 1.638717656888351, "grad_norm": 0.014254052717046443, "learning_rate": 9.61505514446997e-07, "loss": 0.0008, "step": 249090 }, { "epoch": 1.6387834450635843, "grad_norm": 0.0029930129865852655, "learning_rate": 9.611670480000334e-07, "loss": 0.0004, "step": 249100 }, { "epoch": 1.6388492332388176, "grad_norm": 0.0016122857042969033, "learning_rate": 9.608286348015933e-07, "loss": 0.0007, "step": 249110 }, { "epoch": 1.6389150214140509, "grad_norm": 0.04204786046320469, "learning_rate": 9.6049027485614e-07, "loss": 0.0005, "step": 249120 }, { "epoch": 1.6389808095892844, "grad_norm": 0.04965915660952367, "learning_rate": 9.601519681681343e-07, "loss": 0.0003, "step": 249130 }, { "epoch": 1.6390465977645179, "grad_norm": 0.005480535901816821, "learning_rate": 9.598137147420355e-07, "loss": 0.0004, "step": 249140 }, { "epoch": 1.6391123859397512, "grad_norm": 0.00684928986114085, "learning_rate": 9.594755145823038e-07, "loss": 0.0005, "step": 249150 }, { "epoch": 1.6391781741149845, "grad_norm": 0.0035164028094591114, "learning_rate": 9.591373676933975e-07, "loss": 0.0005, "step": 249160 }, { "epoch": 1.639243962290218, "grad_norm": 0.00034664763937242735, "learning_rate": 9.587992740797748e-07, "loss": 0.0005, "step": 249170 }, { "epoch": 1.6393097504654515, "grad_norm": 0.019769675080911372, "learning_rate": 9.584612337458937e-07, "loss": 0.0005, "step": 249180 }, { "epoch": 1.6393755386406847, "grad_norm": 0.009966847666628972, "learning_rate": 9.58123246696211e-07, "loss": 0.0004, "step": 249190 }, { "epoch": 1.639441326815918, "grad_norm": 0.0013910090349128251, "learning_rate": 9.577853129351822e-07, "loss": 0.0002, "step": 249200 }, { "epoch": 1.6395071149911515, "grad_norm": 0.009918983202786636, "learning_rate": 9.574474324672634e-07, "loss": 0.0002, "step": 249210 }, { "epoch": 1.6395729031663848, "grad_norm": 0.005003109611736097, "learning_rate": 9.57109605296908e-07, "loss": 0.0001, "step": 249220 }, { "epoch": 1.6396386913416183, "grad_norm": 0.05843653337673352, "learning_rate": 9.567718314285719e-07, "loss": 0.0003, "step": 249230 }, { "epoch": 1.6397044795168516, "grad_norm": 0.017079911895666308, "learning_rate": 9.564341108667052e-07, "loss": 0.0007, "step": 249240 }, { "epoch": 1.639770267692085, "grad_norm": 0.019268509755501723, "learning_rate": 9.56096443615764e-07, "loss": 0.0004, "step": 249250 }, { "epoch": 1.6398360558673184, "grad_norm": 0.025486316412791154, "learning_rate": 9.557588296801984e-07, "loss": 0.0004, "step": 249260 }, { "epoch": 1.639901844042552, "grad_norm": 0.003258900508850788, "learning_rate": 9.554212690644598e-07, "loss": 0.0006, "step": 249270 }, { "epoch": 1.6399676322177852, "grad_norm": 0.0008087243652314166, "learning_rate": 9.550837617729991e-07, "loss": 0.0002, "step": 249280 }, { "epoch": 1.6400334203930185, "grad_norm": 0.016673347548477997, "learning_rate": 9.547463078102653e-07, "loss": 0.0007, "step": 249290 }, { "epoch": 1.640099208568252, "grad_norm": 0.0005065176402003346, "learning_rate": 9.544089071807078e-07, "loss": 0.0003, "step": 249300 }, { "epoch": 1.6401649967434855, "grad_norm": 0.013197531330420594, "learning_rate": 9.540715598887745e-07, "loss": 0.001, "step": 249310 }, { "epoch": 1.6402307849187188, "grad_norm": 0.0007393958531190722, "learning_rate": 9.537342659389138e-07, "loss": 0.0005, "step": 249320 }, { "epoch": 1.640296573093952, "grad_norm": 0.05613382396861563, "learning_rate": 9.533970253355723e-07, "loss": 0.0009, "step": 249330 }, { "epoch": 1.6403623612691853, "grad_norm": 0.05567720793960788, "learning_rate": 9.530598380831957e-07, "loss": 0.0008, "step": 249340 }, { "epoch": 1.6404281494444188, "grad_norm": 0.0076981943263556485, "learning_rate": 9.527227041862302e-07, "loss": 0.0005, "step": 249350 }, { "epoch": 1.6404939376196523, "grad_norm": 0.05750922037459442, "learning_rate": 9.523856236491208e-07, "loss": 0.0012, "step": 249360 }, { "epoch": 1.6405597257948856, "grad_norm": 0.03766296681762992, "learning_rate": 9.520485964763093e-07, "loss": 0.0006, "step": 249370 }, { "epoch": 1.640625513970119, "grad_norm": 0.057820287153294125, "learning_rate": 9.517116226722428e-07, "loss": 0.0004, "step": 249380 }, { "epoch": 1.6406913021453524, "grad_norm": 0.027401104388981964, "learning_rate": 9.513747022413617e-07, "loss": 0.0008, "step": 249390 }, { "epoch": 1.640757090320586, "grad_norm": 0.00493823423291693, "learning_rate": 9.510378351881089e-07, "loss": 0.0002, "step": 249400 }, { "epoch": 1.6408228784958192, "grad_norm": 0.02642156846061043, "learning_rate": 9.507010215169249e-07, "loss": 0.0003, "step": 249410 }, { "epoch": 1.6408886666710525, "grad_norm": 0.01794683249627712, "learning_rate": 9.503642612322511e-07, "loss": 0.0003, "step": 249420 }, { "epoch": 1.6409544548462858, "grad_norm": 0.0037000448060520727, "learning_rate": 9.500275543385273e-07, "loss": 0.0003, "step": 249430 }, { "epoch": 1.6410202430215193, "grad_norm": 0.02413608875656227, "learning_rate": 9.496909008401917e-07, "loss": 0.0004, "step": 249440 }, { "epoch": 1.6410860311967528, "grad_norm": 0.009527032466504222, "learning_rate": 9.493543007416839e-07, "loss": 0.0003, "step": 249450 }, { "epoch": 1.641151819371986, "grad_norm": 0.01978843940175455, "learning_rate": 9.490177540474415e-07, "loss": 0.0014, "step": 249460 }, { "epoch": 1.6412176075472193, "grad_norm": 0.011162460065239483, "learning_rate": 9.486812607619012e-07, "loss": 0.0003, "step": 249470 }, { "epoch": 1.6412833957224529, "grad_norm": 0.005560589785738837, "learning_rate": 9.483448208894991e-07, "loss": 0.0002, "step": 249480 }, { "epoch": 1.6413491838976864, "grad_norm": 0.009680723403887759, "learning_rate": 9.480084344346719e-07, "loss": 0.0003, "step": 249490 }, { "epoch": 1.6414149720729196, "grad_norm": 0.001249687065441506, "learning_rate": 9.476721014018525e-07, "loss": 0.0005, "step": 249500 }, { "epoch": 1.641480760248153, "grad_norm": 0.003818134695504409, "learning_rate": 9.473358217954776e-07, "loss": 0.0004, "step": 249510 }, { "epoch": 1.6415465484233864, "grad_norm": 0.01427439034325031, "learning_rate": 9.469995956199802e-07, "loss": 0.0004, "step": 249520 }, { "epoch": 1.6416123365986197, "grad_norm": 0.015696466384033637, "learning_rate": 9.466634228797922e-07, "loss": 0.0004, "step": 249530 }, { "epoch": 1.6416781247738532, "grad_norm": 0.031771374500496365, "learning_rate": 9.463273035793469e-07, "loss": 0.0005, "step": 249540 }, { "epoch": 1.6417439129490865, "grad_norm": 0.030837701490409083, "learning_rate": 9.459912377230745e-07, "loss": 0.0009, "step": 249550 }, { "epoch": 1.6418097011243198, "grad_norm": 0.01691720581045649, "learning_rate": 9.456552253154067e-07, "loss": 0.0003, "step": 249560 }, { "epoch": 1.6418754892995533, "grad_norm": 0.0741636083591293, "learning_rate": 9.453192663607735e-07, "loss": 0.0004, "step": 249570 }, { "epoch": 1.6419412774747868, "grad_norm": 0.0006589617166113274, "learning_rate": 9.449833608636033e-07, "loss": 0.0003, "step": 249580 }, { "epoch": 1.64200706565002, "grad_norm": 0.007836200835798921, "learning_rate": 9.446475088283258e-07, "loss": 0.0002, "step": 249590 }, { "epoch": 1.6420728538252534, "grad_norm": 0.038957397534041656, "learning_rate": 9.443117102593685e-07, "loss": 0.001, "step": 249600 }, { "epoch": 1.6421386420004869, "grad_norm": 0.006866932430303929, "learning_rate": 9.43975965161158e-07, "loss": 0.0008, "step": 249610 }, { "epoch": 1.6422044301757204, "grad_norm": 0.008921357359887054, "learning_rate": 9.436402735381218e-07, "loss": 0.0009, "step": 249620 }, { "epoch": 1.6422702183509537, "grad_norm": 0.05035639743129668, "learning_rate": 9.43304635394684e-07, "loss": 0.0005, "step": 249630 }, { "epoch": 1.642336006526187, "grad_norm": 0.037545744436559704, "learning_rate": 9.429690507352723e-07, "loss": 0.0008, "step": 249640 }, { "epoch": 1.6424017947014202, "grad_norm": 0.005121451182759541, "learning_rate": 9.426335195643099e-07, "loss": 0.0003, "step": 249650 }, { "epoch": 1.6424675828766537, "grad_norm": 0.049227800764271745, "learning_rate": 9.422980418862199e-07, "loss": 0.0007, "step": 249660 }, { "epoch": 1.6425333710518872, "grad_norm": 0.0013034918652796294, "learning_rate": 9.419626177054264e-07, "loss": 0.0005, "step": 249670 }, { "epoch": 1.6425991592271205, "grad_norm": 0.07134574362709817, "learning_rate": 9.416272470263504e-07, "loss": 0.0009, "step": 249680 }, { "epoch": 1.6426649474023538, "grad_norm": 0.010207209012169998, "learning_rate": 9.412919298534145e-07, "loss": 0.0007, "step": 249690 }, { "epoch": 1.6427307355775873, "grad_norm": 0.042013786126772805, "learning_rate": 9.409566661910391e-07, "loss": 0.0009, "step": 249700 }, { "epoch": 1.6427965237528208, "grad_norm": 0.02779781843734176, "learning_rate": 9.406214560436444e-07, "loss": 0.0005, "step": 249710 }, { "epoch": 1.642862311928054, "grad_norm": 0.08245711386272607, "learning_rate": 9.402862994156503e-07, "loss": 0.0007, "step": 249720 }, { "epoch": 1.6429281001032874, "grad_norm": 0.007932376041178187, "learning_rate": 9.399511963114743e-07, "loss": 0.0007, "step": 249730 }, { "epoch": 1.6429938882785207, "grad_norm": 0.06491639131665769, "learning_rate": 9.396161467355358e-07, "loss": 0.0003, "step": 249740 }, { "epoch": 1.6430596764537542, "grad_norm": 0.06423029146691105, "learning_rate": 9.392811506922517e-07, "loss": 0.0003, "step": 249750 }, { "epoch": 1.6431254646289877, "grad_norm": 0.0330677190170924, "learning_rate": 9.389462081860373e-07, "loss": 0.0006, "step": 249760 }, { "epoch": 1.643191252804221, "grad_norm": 0.0036548974562478653, "learning_rate": 9.386113192213108e-07, "loss": 0.0005, "step": 249770 }, { "epoch": 1.6432570409794542, "grad_norm": 0.010839240160410338, "learning_rate": 9.382764838024865e-07, "loss": 0.0004, "step": 249780 }, { "epoch": 1.6433228291546877, "grad_norm": 0.003526104182591046, "learning_rate": 9.379417019339787e-07, "loss": 0.0009, "step": 249790 }, { "epoch": 1.6433886173299213, "grad_norm": 0.02780384288029507, "learning_rate": 9.37606973620202e-07, "loss": 0.0005, "step": 249800 }, { "epoch": 1.6434544055051545, "grad_norm": 0.04287205136068899, "learning_rate": 9.37272298865568e-07, "loss": 0.0027, "step": 249810 }, { "epoch": 1.6435201936803878, "grad_norm": 0.002356181231975468, "learning_rate": 9.369376776744904e-07, "loss": 0.0003, "step": 249820 }, { "epoch": 1.6435859818556213, "grad_norm": 0.004703872428979806, "learning_rate": 9.366031100513806e-07, "loss": 0.0003, "step": 249830 }, { "epoch": 1.6436517700308546, "grad_norm": 0.009651593532611065, "learning_rate": 9.36268596000649e-07, "loss": 0.0007, "step": 249840 }, { "epoch": 1.6437175582060881, "grad_norm": 0.060261709719000155, "learning_rate": 9.359341355267065e-07, "loss": 0.0003, "step": 249850 }, { "epoch": 1.6437833463813214, "grad_norm": 0.003763498629151211, "learning_rate": 9.355997286339624e-07, "loss": 0.0008, "step": 249860 }, { "epoch": 1.6438491345565547, "grad_norm": 0.0143184095314882, "learning_rate": 9.352653753268254e-07, "loss": 0.0007, "step": 249870 }, { "epoch": 1.6439149227317882, "grad_norm": 0.01639781392669856, "learning_rate": 9.349310756097041e-07, "loss": 0.0003, "step": 249880 }, { "epoch": 1.6439807109070217, "grad_norm": 0.024685003597302167, "learning_rate": 9.345968294870045e-07, "loss": 0.0005, "step": 249890 }, { "epoch": 1.644046499082255, "grad_norm": 0.051879534423465025, "learning_rate": 9.342626369631358e-07, "loss": 0.0006, "step": 249900 }, { "epoch": 1.6441122872574883, "grad_norm": 0.00015000721503478852, "learning_rate": 9.339284980425028e-07, "loss": 0.0003, "step": 249910 }, { "epoch": 1.6441780754327218, "grad_norm": 0.0003239189275914057, "learning_rate": 9.33594412729511e-07, "loss": 0.0005, "step": 249920 }, { "epoch": 1.6442438636079553, "grad_norm": 0.002621729777846549, "learning_rate": 9.332603810285651e-07, "loss": 0.0004, "step": 249930 }, { "epoch": 1.6443096517831886, "grad_norm": 0.012884448232966774, "learning_rate": 9.329264029440682e-07, "loss": 0.0002, "step": 249940 }, { "epoch": 1.6443754399584218, "grad_norm": 0.00021996876972767016, "learning_rate": 9.325924784804247e-07, "loss": 0.0003, "step": 249950 }, { "epoch": 1.6444412281336551, "grad_norm": 0.00499375470996964, "learning_rate": 9.322586076420365e-07, "loss": 0.0006, "step": 249960 }, { "epoch": 1.6445070163088886, "grad_norm": 0.01735605007804999, "learning_rate": 9.319247904333051e-07, "loss": 0.0003, "step": 249970 }, { "epoch": 1.6445728044841221, "grad_norm": 0.09663141258002483, "learning_rate": 9.31591026858632e-07, "loss": 0.0008, "step": 249980 }, { "epoch": 1.6446385926593554, "grad_norm": 0.023598741218291006, "learning_rate": 9.312573169224177e-07, "loss": 0.0003, "step": 249990 }, { "epoch": 1.6447043808345887, "grad_norm": 0.009800137247755143, "learning_rate": 9.309236606290611e-07, "loss": 0.0007, "step": 250000 }, { "epoch": 1.6447043808345887, "eval_loss": 0.0003584875084925443, "eval_runtime": 13.1046, "eval_samples_per_second": 15.262, "eval_steps_per_second": 7.631, "step": 250000 }, { "epoch": 1.6447701690098222, "grad_norm": 0.010176253872046128, "learning_rate": 9.305900579829624e-07, "loss": 0.0003, "step": 250010 }, { "epoch": 1.6448359571850557, "grad_norm": 0.045608171617962365, "learning_rate": 9.30256508988518e-07, "loss": 0.0005, "step": 250020 }, { "epoch": 1.644901745360289, "grad_norm": 0.029446324147128958, "learning_rate": 9.299230136501275e-07, "loss": 0.0004, "step": 250030 }, { "epoch": 1.6449675335355223, "grad_norm": 0.009527935992206324, "learning_rate": 9.29589571972187e-07, "loss": 0.0002, "step": 250040 }, { "epoch": 1.6450333217107556, "grad_norm": 0.013672965281348296, "learning_rate": 9.292561839590925e-07, "loss": 0.0002, "step": 250050 }, { "epoch": 1.645099109885989, "grad_norm": 0.010712984980193707, "learning_rate": 9.289228496152397e-07, "loss": 0.0004, "step": 250060 }, { "epoch": 1.6451648980612226, "grad_norm": 0.06059091426329785, "learning_rate": 9.285895689450231e-07, "loss": 0.0005, "step": 250070 }, { "epoch": 1.6452306862364559, "grad_norm": 0.06469393456609869, "learning_rate": 9.282563419528368e-07, "loss": 0.0002, "step": 250080 }, { "epoch": 1.6452964744116891, "grad_norm": 0.0013608780730212558, "learning_rate": 9.279231686430734e-07, "loss": 0.0004, "step": 250090 }, { "epoch": 1.6453622625869226, "grad_norm": 0.02184531952762078, "learning_rate": 9.275900490201267e-07, "loss": 0.0002, "step": 250100 }, { "epoch": 1.6454280507621561, "grad_norm": 0.017028976176300635, "learning_rate": 9.272569830883882e-07, "loss": 0.0003, "step": 250110 }, { "epoch": 1.6454938389373894, "grad_norm": 0.07671993189767254, "learning_rate": 9.269239708522488e-07, "loss": 0.0003, "step": 250120 }, { "epoch": 1.6455596271126227, "grad_norm": 0.06554465087784933, "learning_rate": 9.265910123160993e-07, "loss": 0.0005, "step": 250130 }, { "epoch": 1.645625415287856, "grad_norm": 0.003596382105611929, "learning_rate": 9.262581074843286e-07, "loss": 0.0005, "step": 250140 }, { "epoch": 1.6456912034630895, "grad_norm": 0.056254715704213795, "learning_rate": 9.259252563613258e-07, "loss": 0.0005, "step": 250150 }, { "epoch": 1.645756991638323, "grad_norm": 0.027878855769014308, "learning_rate": 9.255924589514808e-07, "loss": 0.0002, "step": 250160 }, { "epoch": 1.6458227798135563, "grad_norm": 0.01828073879378904, "learning_rate": 9.252597152591803e-07, "loss": 0.0002, "step": 250170 }, { "epoch": 1.6458885679887896, "grad_norm": 0.03213731528810753, "learning_rate": 9.249270252888115e-07, "loss": 0.0005, "step": 250180 }, { "epoch": 1.645954356164023, "grad_norm": 0.015127297250224532, "learning_rate": 9.245943890447606e-07, "loss": 0.0004, "step": 250190 }, { "epoch": 1.6460201443392566, "grad_norm": 0.024379126620886985, "learning_rate": 9.242618065314124e-07, "loss": 0.0003, "step": 250200 }, { "epoch": 1.6460859325144899, "grad_norm": 0.014225603180701976, "learning_rate": 9.239292777531528e-07, "loss": 0.0002, "step": 250210 }, { "epoch": 1.6461517206897232, "grad_norm": 0.00849915107803812, "learning_rate": 9.235968027143649e-07, "loss": 0.0002, "step": 250220 }, { "epoch": 1.6462175088649567, "grad_norm": 0.020823925374482334, "learning_rate": 9.232643814194331e-07, "loss": 0.0005, "step": 250230 }, { "epoch": 1.64628329704019, "grad_norm": 0.0037845627290737276, "learning_rate": 9.229320138727388e-07, "loss": 0.0004, "step": 250240 }, { "epoch": 1.6463490852154234, "grad_norm": 0.015153177492603994, "learning_rate": 9.225997000786652e-07, "loss": 0.0005, "step": 250250 }, { "epoch": 1.6464148733906567, "grad_norm": 0.02156003152497351, "learning_rate": 9.222674400415932e-07, "loss": 0.0012, "step": 250260 }, { "epoch": 1.64648066156589, "grad_norm": 0.03150794702864077, "learning_rate": 9.219352337659027e-07, "loss": 0.0003, "step": 250270 }, { "epoch": 1.6465464497411235, "grad_norm": 0.024039027876852137, "learning_rate": 9.216030812559734e-07, "loss": 0.0006, "step": 250280 }, { "epoch": 1.646612237916357, "grad_norm": 0.01676776099051663, "learning_rate": 9.212709825161859e-07, "loss": 0.0008, "step": 250290 }, { "epoch": 1.6466780260915903, "grad_norm": 0.004212433562863266, "learning_rate": 9.209389375509181e-07, "loss": 0.0002, "step": 250300 }, { "epoch": 1.6467438142668236, "grad_norm": 0.00755191592606917, "learning_rate": 9.206069463645473e-07, "loss": 0.0005, "step": 250310 }, { "epoch": 1.646809602442057, "grad_norm": 0.017257529360534395, "learning_rate": 9.20275008961451e-07, "loss": 0.0002, "step": 250320 }, { "epoch": 1.6468753906172906, "grad_norm": 0.05267334448012479, "learning_rate": 9.19943125346005e-07, "loss": 0.0006, "step": 250330 }, { "epoch": 1.6469411787925239, "grad_norm": 0.0016101741112855776, "learning_rate": 9.196112955225855e-07, "loss": 0.0003, "step": 250340 }, { "epoch": 1.6470069669677572, "grad_norm": 0.04361663394392583, "learning_rate": 9.192795194955667e-07, "loss": 0.0004, "step": 250350 }, { "epoch": 1.6470727551429905, "grad_norm": 0.010768877433995336, "learning_rate": 9.189477972693234e-07, "loss": 0.0004, "step": 250360 }, { "epoch": 1.647138543318224, "grad_norm": 0.0009654516889705666, "learning_rate": 9.186161288482287e-07, "loss": 0.001, "step": 250370 }, { "epoch": 1.6472043314934575, "grad_norm": 0.043507092840781816, "learning_rate": 9.182845142366553e-07, "loss": 0.0005, "step": 250380 }, { "epoch": 1.6472701196686907, "grad_norm": 0.00039966643028659153, "learning_rate": 9.179529534389753e-07, "loss": 0.0003, "step": 250390 }, { "epoch": 1.647335907843924, "grad_norm": 0.011843799767958076, "learning_rate": 9.1762144645956e-07, "loss": 0.0004, "step": 250400 }, { "epoch": 1.6474016960191575, "grad_norm": 0.006724767000897013, "learning_rate": 9.17289993302779e-07, "loss": 0.0004, "step": 250410 }, { "epoch": 1.647467484194391, "grad_norm": 0.038895407583397816, "learning_rate": 9.169585939730046e-07, "loss": 0.0007, "step": 250420 }, { "epoch": 1.6475332723696243, "grad_norm": 0.002272583768602466, "learning_rate": 9.166272484746064e-07, "loss": 0.0003, "step": 250430 }, { "epoch": 1.6475990605448576, "grad_norm": 0.006376708960043579, "learning_rate": 9.162959568119501e-07, "loss": 0.0005, "step": 250440 }, { "epoch": 1.647664848720091, "grad_norm": 0.03692904265149722, "learning_rate": 9.159647189894039e-07, "loss": 0.0004, "step": 250450 }, { "epoch": 1.6477306368953244, "grad_norm": 0.024826763432748512, "learning_rate": 9.156335350113365e-07, "loss": 0.0004, "step": 250460 }, { "epoch": 1.647796425070558, "grad_norm": 0.0427181607835489, "learning_rate": 9.15302404882113e-07, "loss": 0.0012, "step": 250470 }, { "epoch": 1.6478622132457912, "grad_norm": 0.09415681707080005, "learning_rate": 9.149713286060996e-07, "loss": 0.0007, "step": 250480 }, { "epoch": 1.6479280014210245, "grad_norm": 0.004219344909036393, "learning_rate": 9.146403061876613e-07, "loss": 0.0012, "step": 250490 }, { "epoch": 1.647993789596258, "grad_norm": 0.013339708016883759, "learning_rate": 9.143093376311618e-07, "loss": 0.0002, "step": 250500 }, { "epoch": 1.6480595777714915, "grad_norm": 0.0002541637798885729, "learning_rate": 9.139784229409643e-07, "loss": 0.0003, "step": 250510 }, { "epoch": 1.6481253659467248, "grad_norm": 0.0012319004810246198, "learning_rate": 9.136475621214336e-07, "loss": 0.0006, "step": 250520 }, { "epoch": 1.648191154121958, "grad_norm": 0.019897804460180978, "learning_rate": 9.133167551769307e-07, "loss": 0.0006, "step": 250530 }, { "epoch": 1.6482569422971916, "grad_norm": 0.058265846800892716, "learning_rate": 9.129860021118164e-07, "loss": 0.0005, "step": 250540 }, { "epoch": 1.6483227304724248, "grad_norm": 0.009459176967554876, "learning_rate": 9.12655302930452e-07, "loss": 0.0003, "step": 250550 }, { "epoch": 1.6483885186476583, "grad_norm": 0.013152734509243893, "learning_rate": 9.123246576371975e-07, "loss": 0.0003, "step": 250560 }, { "epoch": 1.6484543068228916, "grad_norm": 0.021260603425004278, "learning_rate": 9.119940662364119e-07, "loss": 0.0006, "step": 250570 }, { "epoch": 1.648520094998125, "grad_norm": 0.0360629583029747, "learning_rate": 9.116635287324543e-07, "loss": 0.0009, "step": 250580 }, { "epoch": 1.6485858831733584, "grad_norm": 0.005083462270666122, "learning_rate": 9.11333045129682e-07, "loss": 0.0003, "step": 250590 }, { "epoch": 1.648651671348592, "grad_norm": 0.004558005192460136, "learning_rate": 9.110026154324519e-07, "loss": 0.0005, "step": 250600 }, { "epoch": 1.6487174595238252, "grad_norm": 9.188614197860463e-05, "learning_rate": 9.106722396451212e-07, "loss": 0.0003, "step": 250610 }, { "epoch": 1.6487832476990585, "grad_norm": 0.020772944640927105, "learning_rate": 9.103419177720451e-07, "loss": 0.0014, "step": 250620 }, { "epoch": 1.648849035874292, "grad_norm": 0.0016373774943350668, "learning_rate": 9.100116498175787e-07, "loss": 0.0003, "step": 250630 }, { "epoch": 1.6489148240495255, "grad_norm": 0.008989755327535301, "learning_rate": 9.09681435786075e-07, "loss": 0.0006, "step": 250640 }, { "epoch": 1.6489806122247588, "grad_norm": 0.0017983475708318374, "learning_rate": 9.093512756818906e-07, "loss": 0.0007, "step": 250650 }, { "epoch": 1.649046400399992, "grad_norm": 0.00046540616076834945, "learning_rate": 9.090211695093764e-07, "loss": 0.0003, "step": 250660 }, { "epoch": 1.6491121885752253, "grad_norm": 0.001149127083006066, "learning_rate": 9.086911172728852e-07, "loss": 0.0002, "step": 250670 }, { "epoch": 1.6491779767504589, "grad_norm": 0.019810520279744084, "learning_rate": 9.083611189767677e-07, "loss": 0.0003, "step": 250680 }, { "epoch": 1.6492437649256924, "grad_norm": 0.05694245712264283, "learning_rate": 9.080311746253756e-07, "loss": 0.0008, "step": 250690 }, { "epoch": 1.6493095531009256, "grad_norm": 0.00789172783822514, "learning_rate": 9.077012842230582e-07, "loss": 0.0005, "step": 250700 }, { "epoch": 1.649375341276159, "grad_norm": 0.005749741949025959, "learning_rate": 9.073714477741647e-07, "loss": 0.0005, "step": 250710 }, { "epoch": 1.6494411294513924, "grad_norm": 0.00012744679130871595, "learning_rate": 9.070416652830444e-07, "loss": 0.0007, "step": 250720 }, { "epoch": 1.649506917626626, "grad_norm": 0.0445648675448955, "learning_rate": 9.067119367540444e-07, "loss": 0.0005, "step": 250730 }, { "epoch": 1.6495727058018592, "grad_norm": 0.07350951879690219, "learning_rate": 9.06382262191513e-07, "loss": 0.0003, "step": 250740 }, { "epoch": 1.6496384939770925, "grad_norm": 0.04240004333616973, "learning_rate": 9.060526415997955e-07, "loss": 0.0005, "step": 250750 }, { "epoch": 1.6497042821523258, "grad_norm": 0.054719624843846036, "learning_rate": 9.057230749832385e-07, "loss": 0.0009, "step": 250760 }, { "epoch": 1.6497700703275593, "grad_norm": 0.06573960883974954, "learning_rate": 9.053935623461851e-07, "loss": 0.0005, "step": 250770 }, { "epoch": 1.6498358585027928, "grad_norm": 0.020859255805167395, "learning_rate": 9.050641036929825e-07, "loss": 0.0002, "step": 250780 }, { "epoch": 1.649901646678026, "grad_norm": 0.009495490680775006, "learning_rate": 9.047346990279731e-07, "loss": 0.0003, "step": 250790 }, { "epoch": 1.6499674348532594, "grad_norm": 0.023453998455939356, "learning_rate": 9.044053483554998e-07, "loss": 0.0003, "step": 250800 }, { "epoch": 1.6500332230284929, "grad_norm": 0.04382524086445509, "learning_rate": 9.040760516799052e-07, "loss": 0.001, "step": 250810 }, { "epoch": 1.6500990112037264, "grad_norm": 0.09318134853797447, "learning_rate": 9.037468090055302e-07, "loss": 0.0006, "step": 250820 }, { "epoch": 1.6501647993789597, "grad_norm": 0.06395043944367738, "learning_rate": 9.034176203367157e-07, "loss": 0.0002, "step": 250830 }, { "epoch": 1.650230587554193, "grad_norm": 0.049124244370221004, "learning_rate": 9.030884856778016e-07, "loss": 0.0002, "step": 250840 }, { "epoch": 1.6502963757294264, "grad_norm": 0.021828958300786974, "learning_rate": 9.027594050331273e-07, "loss": 0.0002, "step": 250850 }, { "epoch": 1.6503621639046597, "grad_norm": 0.0007589179907042068, "learning_rate": 9.024303784070321e-07, "loss": 0.001, "step": 250860 }, { "epoch": 1.6504279520798932, "grad_norm": 0.010924561565230802, "learning_rate": 9.021014058038536e-07, "loss": 0.0004, "step": 250870 }, { "epoch": 1.6504937402551265, "grad_norm": 0.005064053786089348, "learning_rate": 9.017724872279288e-07, "loss": 0.0003, "step": 250880 }, { "epoch": 1.6505595284303598, "grad_norm": 0.009934945971188823, "learning_rate": 9.014436226835937e-07, "loss": 0.0005, "step": 250890 }, { "epoch": 1.6506253166055933, "grad_norm": 0.07578644313857408, "learning_rate": 9.01114812175184e-07, "loss": 0.0005, "step": 250900 }, { "epoch": 1.6506911047808268, "grad_norm": 0.005576735906941582, "learning_rate": 9.00786055707037e-07, "loss": 0.0005, "step": 250910 }, { "epoch": 1.65075689295606, "grad_norm": 0.0013511262718863633, "learning_rate": 9.004573532834849e-07, "loss": 0.0003, "step": 250920 }, { "epoch": 1.6508226811312934, "grad_norm": 0.005809225971344913, "learning_rate": 9.001287049088626e-07, "loss": 0.0001, "step": 250930 }, { "epoch": 1.6508884693065269, "grad_norm": 0.0013763089348780113, "learning_rate": 8.998001105875021e-07, "loss": 0.0003, "step": 250940 }, { "epoch": 1.6509542574817604, "grad_norm": 0.06256614094600345, "learning_rate": 8.994715703237356e-07, "loss": 0.0004, "step": 250950 }, { "epoch": 1.6510200456569937, "grad_norm": 0.005308498057169777, "learning_rate": 8.991430841218957e-07, "loss": 0.0004, "step": 250960 }, { "epoch": 1.651085833832227, "grad_norm": 0.011626065566621296, "learning_rate": 8.988146519863117e-07, "loss": 0.0001, "step": 250970 }, { "epoch": 1.6511516220074602, "grad_norm": 0.03145668003266093, "learning_rate": 8.984862739213151e-07, "loss": 0.0003, "step": 250980 }, { "epoch": 1.6512174101826937, "grad_norm": 0.13552625806421142, "learning_rate": 8.981579499312349e-07, "loss": 0.0007, "step": 250990 }, { "epoch": 1.6512831983579273, "grad_norm": 0.02808146844639141, "learning_rate": 8.97829680020399e-07, "loss": 0.0009, "step": 251000 }, { "epoch": 1.6513489865331605, "grad_norm": 0.02234849080851747, "learning_rate": 8.975014641931362e-07, "loss": 0.0003, "step": 251010 }, { "epoch": 1.6514147747083938, "grad_norm": 0.008175593107346585, "learning_rate": 8.971733024537732e-07, "loss": 0.0009, "step": 251020 }, { "epoch": 1.6514805628836273, "grad_norm": 0.02657912209702112, "learning_rate": 8.968451948066359e-07, "loss": 0.0003, "step": 251030 }, { "epoch": 1.6515463510588608, "grad_norm": 0.003171963141541185, "learning_rate": 8.96517141256052e-07, "loss": 0.0002, "step": 251040 }, { "epoch": 1.6516121392340941, "grad_norm": 0.048901279308964804, "learning_rate": 8.961891418063456e-07, "loss": 0.0004, "step": 251050 }, { "epoch": 1.6516779274093274, "grad_norm": 0.02356964830769133, "learning_rate": 8.958611964618408e-07, "loss": 0.0001, "step": 251060 }, { "epoch": 1.6517437155845607, "grad_norm": 0.06405710099855715, "learning_rate": 8.955333052268623e-07, "loss": 0.0009, "step": 251070 }, { "epoch": 1.6518095037597942, "grad_norm": 0.024607073702364243, "learning_rate": 8.952054681057315e-07, "loss": 0.0005, "step": 251080 }, { "epoch": 1.6518752919350277, "grad_norm": 0.0004173161925105233, "learning_rate": 8.948776851027718e-07, "loss": 0.0007, "step": 251090 }, { "epoch": 1.651941080110261, "grad_norm": 0.04290599923223426, "learning_rate": 8.945499562223048e-07, "loss": 0.0004, "step": 251100 }, { "epoch": 1.6520068682854943, "grad_norm": 0.000987168640636017, "learning_rate": 8.942222814686502e-07, "loss": 0.0003, "step": 251110 }, { "epoch": 1.6520726564607278, "grad_norm": 0.00011502370948482881, "learning_rate": 8.938946608461296e-07, "loss": 0.0002, "step": 251120 }, { "epoch": 1.6521384446359613, "grad_norm": 0.006521756611442275, "learning_rate": 8.935670943590613e-07, "loss": 0.0002, "step": 251130 }, { "epoch": 1.6522042328111946, "grad_norm": 0.010191544578165533, "learning_rate": 8.93239582011764e-07, "loss": 0.0005, "step": 251140 }, { "epoch": 1.6522700209864278, "grad_norm": 0.041443718515060055, "learning_rate": 8.929121238085564e-07, "loss": 0.0008, "step": 251150 }, { "epoch": 1.6523358091616611, "grad_norm": 0.014541236362334722, "learning_rate": 8.925847197537541e-07, "loss": 0.0003, "step": 251160 }, { "epoch": 1.6524015973368946, "grad_norm": 0.015067328581148613, "learning_rate": 8.922573698516757e-07, "loss": 0.0009, "step": 251170 }, { "epoch": 1.6524673855121281, "grad_norm": 0.001003500965207188, "learning_rate": 8.919300741066367e-07, "loss": 0.0002, "step": 251180 }, { "epoch": 1.6525331736873614, "grad_norm": 0.0007770430641919423, "learning_rate": 8.916028325229514e-07, "loss": 0.0005, "step": 251190 }, { "epoch": 1.6525989618625947, "grad_norm": 0.04689063444897616, "learning_rate": 8.912756451049343e-07, "loss": 0.0004, "step": 251200 }, { "epoch": 1.6526647500378282, "grad_norm": 0.019637086869094742, "learning_rate": 8.909485118568994e-07, "loss": 0.0002, "step": 251210 }, { "epoch": 1.6527305382130617, "grad_norm": 0.004305257196858953, "learning_rate": 8.906214327831597e-07, "loss": 0.0003, "step": 251220 }, { "epoch": 1.652796326388295, "grad_norm": 0.011977348277047986, "learning_rate": 8.902944078880277e-07, "loss": 0.0005, "step": 251230 }, { "epoch": 1.6528621145635283, "grad_norm": 0.05369741153700714, "learning_rate": 8.899674371758144e-07, "loss": 0.0002, "step": 251240 }, { "epoch": 1.6529279027387618, "grad_norm": 0.004723197076870163, "learning_rate": 8.896405206508302e-07, "loss": 0.0003, "step": 251250 }, { "epoch": 1.6529936909139953, "grad_norm": 0.023077871694202366, "learning_rate": 8.893136583173861e-07, "loss": 0.0006, "step": 251260 }, { "epoch": 1.6530594790892286, "grad_norm": 0.030180357734137835, "learning_rate": 8.889868501797916e-07, "loss": 0.0007, "step": 251270 }, { "epoch": 1.6531252672644619, "grad_norm": 0.14642358813334122, "learning_rate": 8.886600962423547e-07, "loss": 0.0014, "step": 251280 }, { "epoch": 1.6531910554396951, "grad_norm": 0.033096670211263876, "learning_rate": 8.883333965093821e-07, "loss": 0.0004, "step": 251290 }, { "epoch": 1.6532568436149286, "grad_norm": 0.001588209789462825, "learning_rate": 8.880067509851842e-07, "loss": 0.0002, "step": 251300 }, { "epoch": 1.6533226317901621, "grad_norm": 0.005472139776913354, "learning_rate": 8.876801596740659e-07, "loss": 0.0009, "step": 251310 }, { "epoch": 1.6533884199653954, "grad_norm": 0.00012164373907134136, "learning_rate": 8.873536225803331e-07, "loss": 0.0003, "step": 251320 }, { "epoch": 1.6534542081406287, "grad_norm": 0.029280875144377976, "learning_rate": 8.870271397082908e-07, "loss": 0.0004, "step": 251330 }, { "epoch": 1.6535199963158622, "grad_norm": 0.001365827687154839, "learning_rate": 8.867007110622433e-07, "loss": 0.0004, "step": 251340 }, { "epoch": 1.6535857844910957, "grad_norm": 0.045506136075685655, "learning_rate": 8.863743366464944e-07, "loss": 0.0011, "step": 251350 }, { "epoch": 1.653651572666329, "grad_norm": 0.046988567818258324, "learning_rate": 8.860480164653473e-07, "loss": 0.0006, "step": 251360 }, { "epoch": 1.6537173608415623, "grad_norm": 0.036344601518894466, "learning_rate": 8.857217505231041e-07, "loss": 0.0003, "step": 251370 }, { "epoch": 1.6537831490167956, "grad_norm": 0.0023421267844223473, "learning_rate": 8.853955388240664e-07, "loss": 0.0005, "step": 251380 }, { "epoch": 1.653848937192029, "grad_norm": 0.024821158351497175, "learning_rate": 8.850693813725347e-07, "loss": 0.0007, "step": 251390 }, { "epoch": 1.6539147253672626, "grad_norm": 0.0007842479650798862, "learning_rate": 8.847432781728094e-07, "loss": 0.0002, "step": 251400 }, { "epoch": 1.6539805135424959, "grad_norm": 0.09726013102977014, "learning_rate": 8.844172292291897e-07, "loss": 0.0003, "step": 251410 }, { "epoch": 1.6540463017177292, "grad_norm": 0.03993253434328205, "learning_rate": 8.840912345459734e-07, "loss": 0.0004, "step": 251420 }, { "epoch": 1.6541120898929627, "grad_norm": 0.015975433303486882, "learning_rate": 8.837652941274605e-07, "loss": 0.0008, "step": 251430 }, { "epoch": 1.6541778780681962, "grad_norm": 0.029854024583561716, "learning_rate": 8.834394079779474e-07, "loss": 0.0002, "step": 251440 }, { "epoch": 1.6542436662434294, "grad_norm": 0.017604018881384912, "learning_rate": 8.831135761017301e-07, "loss": 0.0005, "step": 251450 }, { "epoch": 1.6543094544186627, "grad_norm": 0.00225558311378702, "learning_rate": 8.827877985031047e-07, "loss": 0.0008, "step": 251460 }, { "epoch": 1.654375242593896, "grad_norm": 0.07078592280634982, "learning_rate": 8.824620751863666e-07, "loss": 0.0005, "step": 251470 }, { "epoch": 1.6544410307691295, "grad_norm": 0.029933757858303118, "learning_rate": 8.821364061558096e-07, "loss": 0.0005, "step": 251480 }, { "epoch": 1.654506818944363, "grad_norm": 0.004838548997223503, "learning_rate": 8.818107914157282e-07, "loss": 0.0005, "step": 251490 }, { "epoch": 1.6545726071195963, "grad_norm": 0.016240966717784893, "learning_rate": 8.814852309704141e-07, "loss": 0.0005, "step": 251500 }, { "epoch": 1.6546383952948296, "grad_norm": 0.022699770774262665, "learning_rate": 8.811597248241605e-07, "loss": 0.0003, "step": 251510 }, { "epoch": 1.654704183470063, "grad_norm": 0.000514028696773747, "learning_rate": 8.808342729812591e-07, "loss": 0.0011, "step": 251520 }, { "epoch": 1.6547699716452966, "grad_norm": 0.0146917447557337, "learning_rate": 8.805088754459995e-07, "loss": 0.0002, "step": 251530 }, { "epoch": 1.6548357598205299, "grad_norm": 0.04121442430747074, "learning_rate": 8.801835322226731e-07, "loss": 0.0003, "step": 251540 }, { "epoch": 1.6549015479957632, "grad_norm": 0.004137374659369851, "learning_rate": 8.798582433155673e-07, "loss": 0.0006, "step": 251550 }, { "epoch": 1.6549673361709967, "grad_norm": 0.00981747415052481, "learning_rate": 8.795330087289733e-07, "loss": 0.0005, "step": 251560 }, { "epoch": 1.65503312434623, "grad_norm": 0.009387217281183783, "learning_rate": 8.792078284671785e-07, "loss": 0.0003, "step": 251570 }, { "epoch": 1.6550989125214635, "grad_norm": 0.009463968889938033, "learning_rate": 8.788827025344687e-07, "loss": 0.0003, "step": 251580 }, { "epoch": 1.6551647006966967, "grad_norm": 0.012453234277009374, "learning_rate": 8.785576309351318e-07, "loss": 0.0002, "step": 251590 }, { "epoch": 1.65523048887193, "grad_norm": 0.003322606629293802, "learning_rate": 8.782326136734532e-07, "loss": 0.0004, "step": 251600 }, { "epoch": 1.6552962770471635, "grad_norm": 0.02076785420871083, "learning_rate": 8.779076507537171e-07, "loss": 0.0003, "step": 251610 }, { "epoch": 1.655362065222397, "grad_norm": 0.009937499984337975, "learning_rate": 8.775827421802091e-07, "loss": 0.0009, "step": 251620 }, { "epoch": 1.6554278533976303, "grad_norm": 0.016136723515764086, "learning_rate": 8.772578879572119e-07, "loss": 0.0005, "step": 251630 }, { "epoch": 1.6554936415728636, "grad_norm": 0.0003512087589578242, "learning_rate": 8.769330880890093e-07, "loss": 0.0003, "step": 251640 }, { "epoch": 1.6555594297480971, "grad_norm": 0.01390670815088297, "learning_rate": 8.766083425798827e-07, "loss": 0.0005, "step": 251650 }, { "epoch": 1.6556252179233306, "grad_norm": 0.01000978127301339, "learning_rate": 8.762836514341139e-07, "loss": 0.0004, "step": 251660 }, { "epoch": 1.655691006098564, "grad_norm": 0.015201201152395752, "learning_rate": 8.759590146559838e-07, "loss": 0.0007, "step": 251670 }, { "epoch": 1.6557567942737972, "grad_norm": 0.0002983807314860571, "learning_rate": 8.756344322497712e-07, "loss": 0.0003, "step": 251680 }, { "epoch": 1.6558225824490305, "grad_norm": 0.01290668157413045, "learning_rate": 8.753099042197577e-07, "loss": 0.0003, "step": 251690 }, { "epoch": 1.655888370624264, "grad_norm": 0.002109884692234105, "learning_rate": 8.749854305702204e-07, "loss": 0.0003, "step": 251700 }, { "epoch": 1.6559541587994975, "grad_norm": 0.029194777585052942, "learning_rate": 8.746610113054383e-07, "loss": 0.0014, "step": 251710 }, { "epoch": 1.6560199469747308, "grad_norm": 0.022662736925013777, "learning_rate": 8.743366464296871e-07, "loss": 0.0001, "step": 251720 }, { "epoch": 1.656085735149964, "grad_norm": 0.004598251065070067, "learning_rate": 8.740123359472446e-07, "loss": 0.0005, "step": 251730 }, { "epoch": 1.6561515233251975, "grad_norm": 0.03200506282149559, "learning_rate": 8.736880798623859e-07, "loss": 0.0009, "step": 251740 }, { "epoch": 1.656217311500431, "grad_norm": 0.08505683035976219, "learning_rate": 8.733638781793862e-07, "loss": 0.0015, "step": 251750 }, { "epoch": 1.6562830996756643, "grad_norm": 0.02334682797026552, "learning_rate": 8.730397309025196e-07, "loss": 0.0003, "step": 251760 }, { "epoch": 1.6563488878508976, "grad_norm": 0.00278822625666395, "learning_rate": 8.727156380360602e-07, "loss": 0.0004, "step": 251770 }, { "epoch": 1.656414676026131, "grad_norm": 0.01714103776247148, "learning_rate": 8.723915995842802e-07, "loss": 0.0011, "step": 251780 }, { "epoch": 1.6564804642013644, "grad_norm": 0.001890828492141103, "learning_rate": 8.720676155514524e-07, "loss": 0.0004, "step": 251790 }, { "epoch": 1.656546252376598, "grad_norm": 0.018147588512183087, "learning_rate": 8.717436859418482e-07, "loss": 0.0011, "step": 251800 }, { "epoch": 1.6566120405518312, "grad_norm": 0.02433574422230151, "learning_rate": 8.71419810759736e-07, "loss": 0.0005, "step": 251810 }, { "epoch": 1.6566778287270645, "grad_norm": 0.007881455902020886, "learning_rate": 8.710959900093902e-07, "loss": 0.0006, "step": 251820 }, { "epoch": 1.656743616902298, "grad_norm": 0.04386187826378048, "learning_rate": 8.707722236950772e-07, "loss": 0.0003, "step": 251830 }, { "epoch": 1.6568094050775315, "grad_norm": 0.05961526540074016, "learning_rate": 8.70448511821066e-07, "loss": 0.0007, "step": 251840 }, { "epoch": 1.6568751932527648, "grad_norm": 0.07514087139391651, "learning_rate": 8.701248543916251e-07, "loss": 0.0004, "step": 251850 }, { "epoch": 1.656940981427998, "grad_norm": 0.03112915109582521, "learning_rate": 8.69801251411021e-07, "loss": 0.0008, "step": 251860 }, { "epoch": 1.6570067696032316, "grad_norm": 0.0056006289264024746, "learning_rate": 8.694777028835205e-07, "loss": 0.0003, "step": 251870 }, { "epoch": 1.6570725577784648, "grad_norm": 0.005959482087687311, "learning_rate": 8.691542088133892e-07, "loss": 0.0009, "step": 251880 }, { "epoch": 1.6571383459536984, "grad_norm": 0.030180733954082422, "learning_rate": 8.688307692048919e-07, "loss": 0.0004, "step": 251890 }, { "epoch": 1.6572041341289316, "grad_norm": 0.011862267276262948, "learning_rate": 8.685073840622926e-07, "loss": 0.0003, "step": 251900 }, { "epoch": 1.657269922304165, "grad_norm": 0.020584653028851242, "learning_rate": 8.681840533898556e-07, "loss": 0.0015, "step": 251910 }, { "epoch": 1.6573357104793984, "grad_norm": 0.062456366533736256, "learning_rate": 8.678607771918434e-07, "loss": 0.0017, "step": 251920 }, { "epoch": 1.657401498654632, "grad_norm": 0.0343808605911143, "learning_rate": 8.67537555472518e-07, "loss": 0.0002, "step": 251930 }, { "epoch": 1.6574672868298652, "grad_norm": 0.02905921749538601, "learning_rate": 8.6721438823614e-07, "loss": 0.0002, "step": 251940 }, { "epoch": 1.6575330750050985, "grad_norm": 0.02175466021206866, "learning_rate": 8.668912754869719e-07, "loss": 0.0004, "step": 251950 }, { "epoch": 1.657598863180332, "grad_norm": 0.00292723917679927, "learning_rate": 8.665682172292728e-07, "loss": 0.0004, "step": 251960 }, { "epoch": 1.6576646513555655, "grad_norm": 0.009842852956828133, "learning_rate": 8.662452134673022e-07, "loss": 0.0006, "step": 251970 }, { "epoch": 1.6577304395307988, "grad_norm": 0.00716485943156171, "learning_rate": 8.659222642053177e-07, "loss": 0.0002, "step": 251980 }, { "epoch": 1.657796227706032, "grad_norm": 0.005737389845130922, "learning_rate": 8.655993694475784e-07, "loss": 0.0003, "step": 251990 }, { "epoch": 1.6578620158812654, "grad_norm": 0.011798102270763635, "learning_rate": 8.652765291983401e-07, "loss": 0.0005, "step": 252000 }, { "epoch": 1.6579278040564989, "grad_norm": 0.005641548029309369, "learning_rate": 8.649537434618599e-07, "loss": 0.0005, "step": 252010 }, { "epoch": 1.6579935922317324, "grad_norm": 0.02239557927732446, "learning_rate": 8.646310122423934e-07, "loss": 0.0003, "step": 252020 }, { "epoch": 1.6580593804069657, "grad_norm": 0.018983963742754184, "learning_rate": 8.643083355441956e-07, "loss": 0.0003, "step": 252030 }, { "epoch": 1.658125168582199, "grad_norm": 0.028219429326320426, "learning_rate": 8.639857133715207e-07, "loss": 0.0006, "step": 252040 }, { "epoch": 1.6581909567574324, "grad_norm": 0.000753188007614125, "learning_rate": 8.636631457286221e-07, "loss": 0.0002, "step": 252050 }, { "epoch": 1.658256744932666, "grad_norm": 0.0014118393080911472, "learning_rate": 8.633406326197518e-07, "loss": 0.0006, "step": 252060 }, { "epoch": 1.6583225331078992, "grad_norm": 0.028538010411003643, "learning_rate": 8.630181740491622e-07, "loss": 0.0005, "step": 252070 }, { "epoch": 1.6583883212831325, "grad_norm": 0.008588061306746338, "learning_rate": 8.626957700211059e-07, "loss": 0.0004, "step": 252080 }, { "epoch": 1.6584541094583658, "grad_norm": 0.0014185282644257515, "learning_rate": 8.623734205398327e-07, "loss": 0.0011, "step": 252090 }, { "epoch": 1.6585198976335993, "grad_norm": 0.017893904153196662, "learning_rate": 8.620511256095926e-07, "loss": 0.0005, "step": 252100 }, { "epoch": 1.6585856858088328, "grad_norm": 0.05011169281285292, "learning_rate": 8.617288852346345e-07, "loss": 0.0008, "step": 252110 }, { "epoch": 1.658651473984066, "grad_norm": 0.031520738018974655, "learning_rate": 8.614066994192072e-07, "loss": 0.0008, "step": 252120 }, { "epoch": 1.6587172621592994, "grad_norm": 0.05631228183469427, "learning_rate": 8.610845681675584e-07, "loss": 0.0006, "step": 252130 }, { "epoch": 1.6587830503345329, "grad_norm": 0.0192428087043971, "learning_rate": 8.607624914839341e-07, "loss": 0.0004, "step": 252140 }, { "epoch": 1.6588488385097664, "grad_norm": 0.017780064322187424, "learning_rate": 8.604404693725821e-07, "loss": 0.0003, "step": 252150 }, { "epoch": 1.6589146266849997, "grad_norm": 0.019321224373248447, "learning_rate": 8.601185018377473e-07, "loss": 0.0009, "step": 252160 }, { "epoch": 1.658980414860233, "grad_norm": 0.0069563416679771645, "learning_rate": 8.597965888836746e-07, "loss": 0.0003, "step": 252170 }, { "epoch": 1.6590462030354665, "grad_norm": 0.02292835966808065, "learning_rate": 8.594747305146084e-07, "loss": 0.0003, "step": 252180 }, { "epoch": 1.6591119912106997, "grad_norm": 0.023601268843869004, "learning_rate": 8.591529267347914e-07, "loss": 0.0003, "step": 252190 }, { "epoch": 1.6591777793859332, "grad_norm": 0.036623831372458, "learning_rate": 8.588311775484659e-07, "loss": 0.0005, "step": 252200 }, { "epoch": 1.6592435675611665, "grad_norm": 0.02360887837578803, "learning_rate": 8.585094829598762e-07, "loss": 0.0003, "step": 252210 }, { "epoch": 1.6593093557363998, "grad_norm": 0.013389391321837134, "learning_rate": 8.581878429732615e-07, "loss": 0.0003, "step": 252220 }, { "epoch": 1.6593751439116333, "grad_norm": 0.000923651927637754, "learning_rate": 8.578662575928636e-07, "loss": 0.0005, "step": 252230 }, { "epoch": 1.6594409320868668, "grad_norm": 0.0981057145969278, "learning_rate": 8.575447268229215e-07, "loss": 0.0006, "step": 252240 }, { "epoch": 1.6595067202621, "grad_norm": 0.016722645228024205, "learning_rate": 8.572232506676748e-07, "loss": 0.0003, "step": 252250 }, { "epoch": 1.6595725084373334, "grad_norm": 0.0230594144852893, "learning_rate": 8.569018291313608e-07, "loss": 0.0005, "step": 252260 }, { "epoch": 1.659638296612567, "grad_norm": 0.04550440484099012, "learning_rate": 8.565804622182189e-07, "loss": 0.0009, "step": 252270 }, { "epoch": 1.6597040847878004, "grad_norm": 0.001582680682970318, "learning_rate": 8.562591499324846e-07, "loss": 0.0004, "step": 252280 }, { "epoch": 1.6597698729630337, "grad_norm": 0.040015685454162286, "learning_rate": 8.559378922783945e-07, "loss": 0.0007, "step": 252290 }, { "epoch": 1.659835661138267, "grad_norm": 0.02322487218733079, "learning_rate": 8.556166892601847e-07, "loss": 0.0018, "step": 252300 }, { "epoch": 1.6599014493135003, "grad_norm": 0.006736125755762564, "learning_rate": 8.55295540882089e-07, "loss": 0.0004, "step": 252310 }, { "epoch": 1.6599672374887338, "grad_norm": 0.0495492283753936, "learning_rate": 8.549744471483423e-07, "loss": 0.0005, "step": 252320 }, { "epoch": 1.6600330256639673, "grad_norm": 0.02364021263243826, "learning_rate": 8.54653408063178e-07, "loss": 0.0011, "step": 252330 }, { "epoch": 1.6600988138392005, "grad_norm": 0.06553019348731623, "learning_rate": 8.543324236308276e-07, "loss": 0.0004, "step": 252340 }, { "epoch": 1.6601646020144338, "grad_norm": 0.027351365327420236, "learning_rate": 8.540114938555244e-07, "loss": 0.0003, "step": 252350 }, { "epoch": 1.6602303901896673, "grad_norm": 0.03742917851041039, "learning_rate": 8.536906187414984e-07, "loss": 0.0002, "step": 252360 }, { "epoch": 1.6602961783649008, "grad_norm": 0.024935390165968373, "learning_rate": 8.533697982929812e-07, "loss": 0.0006, "step": 252370 }, { "epoch": 1.6603619665401341, "grad_norm": 0.029165778445204394, "learning_rate": 8.530490325142015e-07, "loss": 0.0004, "step": 252380 }, { "epoch": 1.6604277547153674, "grad_norm": 0.02910833867527775, "learning_rate": 8.52728321409389e-07, "loss": 0.0002, "step": 252390 }, { "epoch": 1.6604935428906007, "grad_norm": 0.015015144767151307, "learning_rate": 8.524076649827717e-07, "loss": 0.0004, "step": 252400 }, { "epoch": 1.6605593310658342, "grad_norm": 0.0732229201312656, "learning_rate": 8.520870632385769e-07, "loss": 0.0006, "step": 252410 }, { "epoch": 1.6606251192410677, "grad_norm": 0.036704215073870235, "learning_rate": 8.517665161810324e-07, "loss": 0.0003, "step": 252420 }, { "epoch": 1.660690907416301, "grad_norm": 0.017459113070502477, "learning_rate": 8.514460238143618e-07, "loss": 0.0007, "step": 252430 }, { "epoch": 1.6607566955915343, "grad_norm": 0.03982788537534262, "learning_rate": 8.511255861427941e-07, "loss": 0.0013, "step": 252440 }, { "epoch": 1.6608224837667678, "grad_norm": 0.041657538850499845, "learning_rate": 8.508052031705527e-07, "loss": 0.0004, "step": 252450 }, { "epoch": 1.6608882719420013, "grad_norm": 0.02556821416149978, "learning_rate": 8.504848749018607e-07, "loss": 0.0003, "step": 252460 }, { "epoch": 1.6609540601172346, "grad_norm": 0.019234756314744594, "learning_rate": 8.50164601340942e-07, "loss": 0.0007, "step": 252470 }, { "epoch": 1.6610198482924678, "grad_norm": 0.014792322326098197, "learning_rate": 8.498443824920188e-07, "loss": 0.0004, "step": 252480 }, { "epoch": 1.6610856364677011, "grad_norm": 0.024025411741150932, "learning_rate": 8.495242183593133e-07, "loss": 0.0007, "step": 252490 }, { "epoch": 1.6611514246429346, "grad_norm": 0.0005954679870269298, "learning_rate": 8.492041089470465e-07, "loss": 0.0004, "step": 252500 }, { "epoch": 1.6612172128181681, "grad_norm": 0.030087282951605794, "learning_rate": 8.48884054259439e-07, "loss": 0.0004, "step": 252510 }, { "epoch": 1.6612830009934014, "grad_norm": 0.07663929451163129, "learning_rate": 8.485640543007096e-07, "loss": 0.0005, "step": 252520 }, { "epoch": 1.6613487891686347, "grad_norm": 0.014604544258264265, "learning_rate": 8.482441090750776e-07, "loss": 0.0004, "step": 252530 }, { "epoch": 1.6614145773438682, "grad_norm": 0.011729235356736725, "learning_rate": 8.479242185867615e-07, "loss": 0.0012, "step": 252540 }, { "epoch": 1.6614803655191017, "grad_norm": 0.030341615181862232, "learning_rate": 8.476043828399788e-07, "loss": 0.0002, "step": 252550 }, { "epoch": 1.661546153694335, "grad_norm": 0.006929522134406344, "learning_rate": 8.472846018389447e-07, "loss": 0.0008, "step": 252560 }, { "epoch": 1.6616119418695683, "grad_norm": 0.00020228989680539503, "learning_rate": 8.469648755878779e-07, "loss": 0.0006, "step": 252570 }, { "epoch": 1.6616777300448018, "grad_norm": 0.00019601797611949366, "learning_rate": 8.466452040909922e-07, "loss": 0.0004, "step": 252580 }, { "epoch": 1.661743518220035, "grad_norm": 0.029997931306343695, "learning_rate": 8.463255873525028e-07, "loss": 0.0005, "step": 252590 }, { "epoch": 1.6618093063952686, "grad_norm": 0.02185834667185252, "learning_rate": 8.460060253766228e-07, "loss": 0.0003, "step": 252600 }, { "epoch": 1.6618750945705019, "grad_norm": 0.01810566412254536, "learning_rate": 8.456865181675661e-07, "loss": 0.0008, "step": 252610 }, { "epoch": 1.6619408827457351, "grad_norm": 0.007266676660198144, "learning_rate": 8.453670657295443e-07, "loss": 0.0004, "step": 252620 }, { "epoch": 1.6620066709209687, "grad_norm": 0.028189286540037963, "learning_rate": 8.450476680667697e-07, "loss": 0.0004, "step": 252630 }, { "epoch": 1.6620724590962022, "grad_norm": 0.018774313197651795, "learning_rate": 8.447283251834531e-07, "loss": 0.0006, "step": 252640 }, { "epoch": 1.6621382472714354, "grad_norm": 0.02569550098882191, "learning_rate": 8.444090370838054e-07, "loss": 0.0005, "step": 252650 }, { "epoch": 1.6622040354466687, "grad_norm": 0.05158150128710304, "learning_rate": 8.440898037720346e-07, "loss": 0.0004, "step": 252660 }, { "epoch": 1.6622698236219022, "grad_norm": 0.02695318283582163, "learning_rate": 8.437706252523515e-07, "loss": 0.0008, "step": 252670 }, { "epoch": 1.6623356117971357, "grad_norm": 0.02722664424048861, "learning_rate": 8.434515015289624e-07, "loss": 0.0003, "step": 252680 }, { "epoch": 1.662401399972369, "grad_norm": 0.02652704362231484, "learning_rate": 8.431324326060741e-07, "loss": 0.0009, "step": 252690 }, { "epoch": 1.6624671881476023, "grad_norm": 0.02156563861959307, "learning_rate": 8.428134184878961e-07, "loss": 0.0004, "step": 252700 }, { "epoch": 1.6625329763228356, "grad_norm": 0.042219698667595416, "learning_rate": 8.424944591786327e-07, "loss": 0.0006, "step": 252710 }, { "epoch": 1.662598764498069, "grad_norm": 0.034263826511262536, "learning_rate": 8.421755546824895e-07, "loss": 0.0005, "step": 252720 }, { "epoch": 1.6626645526733026, "grad_norm": 0.01893106587967296, "learning_rate": 8.418567050036707e-07, "loss": 0.0004, "step": 252730 }, { "epoch": 1.6627303408485359, "grad_norm": 0.001544425774587593, "learning_rate": 8.415379101463795e-07, "loss": 0.0014, "step": 252740 }, { "epoch": 1.6627961290237692, "grad_norm": 0.004906712427652152, "learning_rate": 8.412191701148198e-07, "loss": 0.0004, "step": 252750 }, { "epoch": 1.6628619171990027, "grad_norm": 0.009383985440902758, "learning_rate": 8.409004849131936e-07, "loss": 0.0001, "step": 252760 }, { "epoch": 1.6629277053742362, "grad_norm": 0.014580745072275634, "learning_rate": 8.405818545457023e-07, "loss": 0.0005, "step": 252770 }, { "epoch": 1.6629934935494695, "grad_norm": 0.038489968575642235, "learning_rate": 8.402632790165471e-07, "loss": 0.0005, "step": 252780 }, { "epoch": 1.6630592817247027, "grad_norm": 0.058122289184630624, "learning_rate": 8.399447583299281e-07, "loss": 0.0003, "step": 252790 }, { "epoch": 1.663125069899936, "grad_norm": 0.019893506157063246, "learning_rate": 8.396262924900445e-07, "loss": 0.0005, "step": 252800 }, { "epoch": 1.6631908580751695, "grad_norm": 0.0050312332444693115, "learning_rate": 8.393078815010952e-07, "loss": 0.0006, "step": 252810 }, { "epoch": 1.663256646250403, "grad_norm": 0.010329195822955646, "learning_rate": 8.389895253672764e-07, "loss": 0.0004, "step": 252820 }, { "epoch": 1.6633224344256363, "grad_norm": 0.09986987153797852, "learning_rate": 8.386712240927886e-07, "loss": 0.0007, "step": 252830 }, { "epoch": 1.6633882226008696, "grad_norm": 0.028415094410980134, "learning_rate": 8.383529776818267e-07, "loss": 0.0003, "step": 252840 }, { "epoch": 1.663454010776103, "grad_norm": 0.03596462071928261, "learning_rate": 8.380347861385868e-07, "loss": 0.0004, "step": 252850 }, { "epoch": 1.6635197989513366, "grad_norm": 0.05099739460513875, "learning_rate": 8.377166494672634e-07, "loss": 0.0007, "step": 252860 }, { "epoch": 1.66358558712657, "grad_norm": 0.013570939337796269, "learning_rate": 8.373985676720514e-07, "loss": 0.0006, "step": 252870 }, { "epoch": 1.6636513753018032, "grad_norm": 0.03301805591454211, "learning_rate": 8.370805407571441e-07, "loss": 0.0016, "step": 252880 }, { "epoch": 1.6637171634770367, "grad_norm": 0.016442180080688932, "learning_rate": 8.367625687267344e-07, "loss": 0.0002, "step": 252890 }, { "epoch": 1.66378295165227, "grad_norm": 0.04921830134412241, "learning_rate": 8.36444651585015e-07, "loss": 0.0007, "step": 252900 }, { "epoch": 1.6638487398275035, "grad_norm": 0.059656038145310085, "learning_rate": 8.361267893361769e-07, "loss": 0.0009, "step": 252910 }, { "epoch": 1.6639145280027368, "grad_norm": 0.0338577030941334, "learning_rate": 8.358089819844107e-07, "loss": 0.0003, "step": 252920 }, { "epoch": 1.66398031617797, "grad_norm": 0.01724996410524124, "learning_rate": 8.354912295339063e-07, "loss": 0.0005, "step": 252930 }, { "epoch": 1.6640461043532035, "grad_norm": 0.012519290565155822, "learning_rate": 8.351735319888538e-07, "loss": 0.0003, "step": 252940 }, { "epoch": 1.664111892528437, "grad_norm": 0.0016267728097414706, "learning_rate": 8.348558893534403e-07, "loss": 0.0003, "step": 252950 }, { "epoch": 1.6641776807036703, "grad_norm": 0.005835126165664676, "learning_rate": 8.345383016318554e-07, "loss": 0.0002, "step": 252960 }, { "epoch": 1.6642434688789036, "grad_norm": 0.006172323879773309, "learning_rate": 8.34220768828286e-07, "loss": 0.0007, "step": 252970 }, { "epoch": 1.6643092570541371, "grad_norm": 0.0016642813472883438, "learning_rate": 8.339032909469174e-07, "loss": 0.0004, "step": 252980 }, { "epoch": 1.6643750452293706, "grad_norm": 0.15226189366938553, "learning_rate": 8.335858679919362e-07, "loss": 0.0004, "step": 252990 }, { "epoch": 1.664440833404604, "grad_norm": 0.040410978011480955, "learning_rate": 8.332684999675267e-07, "loss": 0.0003, "step": 253000 }, { "epoch": 1.6645066215798372, "grad_norm": 0.1078892318428172, "learning_rate": 8.329511868778739e-07, "loss": 0.0005, "step": 253010 }, { "epoch": 1.6645724097550705, "grad_norm": 0.008585807555672358, "learning_rate": 8.326339287271601e-07, "loss": 0.0004, "step": 253020 }, { "epoch": 1.664638197930304, "grad_norm": 0.06676696557483093, "learning_rate": 8.323167255195691e-07, "loss": 0.0006, "step": 253030 }, { "epoch": 1.6647039861055375, "grad_norm": 0.003782264045319274, "learning_rate": 8.319995772592826e-07, "loss": 0.0006, "step": 253040 }, { "epoch": 1.6647697742807708, "grad_norm": 0.015226106326071116, "learning_rate": 8.316824839504822e-07, "loss": 0.0007, "step": 253050 }, { "epoch": 1.664835562456004, "grad_norm": 0.044752002808494486, "learning_rate": 8.31365445597348e-07, "loss": 0.0002, "step": 253060 }, { "epoch": 1.6649013506312376, "grad_norm": 0.017279056244250933, "learning_rate": 8.310484622040604e-07, "loss": 0.0003, "step": 253070 }, { "epoch": 1.664967138806471, "grad_norm": 0.027576816371196057, "learning_rate": 8.307315337747968e-07, "loss": 0.0005, "step": 253080 }, { "epoch": 1.6650329269817044, "grad_norm": 0.013460888477050273, "learning_rate": 8.304146603137381e-07, "loss": 0.0007, "step": 253090 }, { "epoch": 1.6650987151569376, "grad_norm": 0.022223278331762852, "learning_rate": 8.300978418250616e-07, "loss": 0.0008, "step": 253100 }, { "epoch": 1.665164503332171, "grad_norm": 0.00016937900369322058, "learning_rate": 8.297810783129434e-07, "loss": 0.0003, "step": 253110 }, { "epoch": 1.6652302915074044, "grad_norm": 0.05694082271235764, "learning_rate": 8.294643697815602e-07, "loss": 0.0003, "step": 253120 }, { "epoch": 1.665296079682638, "grad_norm": 0.05839838451789244, "learning_rate": 8.291477162350869e-07, "loss": 0.0015, "step": 253130 }, { "epoch": 1.6653618678578712, "grad_norm": 0.0008941583762967745, "learning_rate": 8.288311176776992e-07, "loss": 0.0002, "step": 253140 }, { "epoch": 1.6654276560331045, "grad_norm": 0.02096177277812442, "learning_rate": 8.285145741135708e-07, "loss": 0.0005, "step": 253150 }, { "epoch": 1.665493444208338, "grad_norm": 0.0003994138808905855, "learning_rate": 8.281980855468747e-07, "loss": 0.0009, "step": 253160 }, { "epoch": 1.6655592323835715, "grad_norm": 0.006656557679225718, "learning_rate": 8.278816519817839e-07, "loss": 0.0004, "step": 253170 }, { "epoch": 1.6656250205588048, "grad_norm": 0.02542010354503008, "learning_rate": 8.275652734224704e-07, "loss": 0.0005, "step": 253180 }, { "epoch": 1.665690808734038, "grad_norm": 0.07031416667057895, "learning_rate": 8.272489498731051e-07, "loss": 0.0004, "step": 253190 }, { "epoch": 1.6657565969092716, "grad_norm": 0.005497755049162693, "learning_rate": 8.269326813378586e-07, "loss": 0.0005, "step": 253200 }, { "epoch": 1.6658223850845049, "grad_norm": 0.0019166237807045196, "learning_rate": 8.266164678208993e-07, "loss": 0.0002, "step": 253210 }, { "epoch": 1.6658881732597384, "grad_norm": 0.009452881150618153, "learning_rate": 8.263003093263994e-07, "loss": 0.0001, "step": 253220 }, { "epoch": 1.6659539614349717, "grad_norm": 0.0370132871990858, "learning_rate": 8.259842058585243e-07, "loss": 0.0004, "step": 253230 }, { "epoch": 1.666019749610205, "grad_norm": 0.009329117773857117, "learning_rate": 8.256681574214431e-07, "loss": 0.0001, "step": 253240 }, { "epoch": 1.6660855377854384, "grad_norm": 0.04888095660040468, "learning_rate": 8.253521640193219e-07, "loss": 0.0005, "step": 253250 }, { "epoch": 1.666151325960672, "grad_norm": 0.025024482025027574, "learning_rate": 8.250362256563271e-07, "loss": 0.0002, "step": 253260 }, { "epoch": 1.6662171141359052, "grad_norm": 0.0045684981737787525, "learning_rate": 8.247203423366235e-07, "loss": 0.0003, "step": 253270 }, { "epoch": 1.6662829023111385, "grad_norm": 0.0009576589416994555, "learning_rate": 8.244045140643764e-07, "loss": 0.0006, "step": 253280 }, { "epoch": 1.666348690486372, "grad_norm": 0.00017268371202158741, "learning_rate": 8.240887408437497e-07, "loss": 0.0003, "step": 253290 }, { "epoch": 1.6664144786616055, "grad_norm": 0.06498513906251262, "learning_rate": 8.237730226789065e-07, "loss": 0.0007, "step": 253300 }, { "epoch": 1.6664802668368388, "grad_norm": 0.016220462159781367, "learning_rate": 8.234573595740087e-07, "loss": 0.0005, "step": 253310 }, { "epoch": 1.666546055012072, "grad_norm": 0.028293641962368367, "learning_rate": 8.23141751533219e-07, "loss": 0.0002, "step": 253320 }, { "epoch": 1.6666118431873054, "grad_norm": 0.14014629273133355, "learning_rate": 8.228261985606977e-07, "loss": 0.0018, "step": 253330 }, { "epoch": 1.6666776313625389, "grad_norm": 0.022255724031669217, "learning_rate": 8.225107006606037e-07, "loss": 0.0002, "step": 253340 }, { "epoch": 1.6667434195377724, "grad_norm": 0.04629652692475054, "learning_rate": 8.221952578371001e-07, "loss": 0.0007, "step": 253350 }, { "epoch": 1.6668092077130057, "grad_norm": 0.06438013615581728, "learning_rate": 8.218798700943436e-07, "loss": 0.0004, "step": 253360 }, { "epoch": 1.666874995888239, "grad_norm": 0.015161935804120953, "learning_rate": 8.215645374364923e-07, "loss": 0.0001, "step": 253370 }, { "epoch": 1.6669407840634725, "grad_norm": 0.003060207200062365, "learning_rate": 8.212492598677041e-07, "loss": 0.0004, "step": 253380 }, { "epoch": 1.667006572238706, "grad_norm": 0.011411115253179683, "learning_rate": 8.209340373921353e-07, "loss": 0.001, "step": 253390 }, { "epoch": 1.6670723604139392, "grad_norm": 0.03594294795310901, "learning_rate": 8.206188700139422e-07, "loss": 0.0003, "step": 253400 }, { "epoch": 1.6671381485891725, "grad_norm": 0.023960527375533155, "learning_rate": 8.203037577372797e-07, "loss": 0.0006, "step": 253410 }, { "epoch": 1.6672039367644058, "grad_norm": 0.004070769198881704, "learning_rate": 8.199887005663021e-07, "loss": 0.0001, "step": 253420 }, { "epoch": 1.6672697249396393, "grad_norm": 0.004522823372224295, "learning_rate": 8.196736985051634e-07, "loss": 0.0002, "step": 253430 }, { "epoch": 1.6673355131148728, "grad_norm": 0.03510631621979898, "learning_rate": 8.193587515580164e-07, "loss": 0.0002, "step": 253440 }, { "epoch": 1.667401301290106, "grad_norm": 0.021838354594000808, "learning_rate": 8.19043859729014e-07, "loss": 0.0001, "step": 253450 }, { "epoch": 1.6674670894653394, "grad_norm": 0.024173775610971546, "learning_rate": 8.187290230223072e-07, "loss": 0.0007, "step": 253460 }, { "epoch": 1.667532877640573, "grad_norm": 0.001467956909640873, "learning_rate": 8.184142414420459e-07, "loss": 0.0002, "step": 253470 }, { "epoch": 1.6675986658158064, "grad_norm": 0.03160414750663994, "learning_rate": 8.180995149923826e-07, "loss": 0.0004, "step": 253480 }, { "epoch": 1.6676644539910397, "grad_norm": 0.0062034786701621195, "learning_rate": 8.177848436774654e-07, "loss": 0.0004, "step": 253490 }, { "epoch": 1.667730242166273, "grad_norm": 0.006577946211996795, "learning_rate": 8.174702275014434e-07, "loss": 0.0003, "step": 253500 }, { "epoch": 1.6677960303415063, "grad_norm": 0.01815240143335639, "learning_rate": 8.171556664684637e-07, "loss": 0.0007, "step": 253510 }, { "epoch": 1.6678618185167398, "grad_norm": 0.03826883484956466, "learning_rate": 8.168411605826743e-07, "loss": 0.0004, "step": 253520 }, { "epoch": 1.6679276066919733, "grad_norm": 0.004259285038997006, "learning_rate": 8.165267098482216e-07, "loss": 0.001, "step": 253530 }, { "epoch": 1.6679933948672065, "grad_norm": 0.008151233968727023, "learning_rate": 8.162123142692507e-07, "loss": 0.0006, "step": 253540 }, { "epoch": 1.6680591830424398, "grad_norm": 0.0047968915325428355, "learning_rate": 8.158979738499073e-07, "loss": 0.0006, "step": 253550 }, { "epoch": 1.6681249712176733, "grad_norm": 0.004038391842198615, "learning_rate": 8.155836885943353e-07, "loss": 0.0002, "step": 253560 }, { "epoch": 1.6681907593929068, "grad_norm": 0.015555933437922116, "learning_rate": 8.152694585066784e-07, "loss": 0.0003, "step": 253570 }, { "epoch": 1.6682565475681401, "grad_norm": 0.028776127529824957, "learning_rate": 8.149552835910795e-07, "loss": 0.001, "step": 253580 }, { "epoch": 1.6683223357433734, "grad_norm": 0.024064155802565775, "learning_rate": 8.14641163851681e-07, "loss": 0.0006, "step": 253590 }, { "epoch": 1.668388123918607, "grad_norm": 0.08700529803203376, "learning_rate": 8.143270992926228e-07, "loss": 0.0013, "step": 253600 }, { "epoch": 1.6684539120938404, "grad_norm": 0.013031292094087623, "learning_rate": 8.140130899180482e-07, "loss": 0.0006, "step": 253610 }, { "epoch": 1.6685197002690737, "grad_norm": 0.0120808325502051, "learning_rate": 8.136991357320956e-07, "loss": 0.0002, "step": 253620 }, { "epoch": 1.668585488444307, "grad_norm": 0.0009200708095396352, "learning_rate": 8.133852367389045e-07, "loss": 0.0002, "step": 253630 }, { "epoch": 1.6686512766195403, "grad_norm": 0.029221899284808575, "learning_rate": 8.130713929426127e-07, "loss": 0.0001, "step": 253640 }, { "epoch": 1.6687170647947738, "grad_norm": 0.002024226143695267, "learning_rate": 8.127576043473589e-07, "loss": 0.0002, "step": 253650 }, { "epoch": 1.6687828529700073, "grad_norm": 0.06767977298367212, "learning_rate": 8.124438709572796e-07, "loss": 0.0008, "step": 253660 }, { "epoch": 1.6688486411452406, "grad_norm": 0.011341791609020385, "learning_rate": 8.121301927765118e-07, "loss": 0.0006, "step": 253670 }, { "epoch": 1.6689144293204738, "grad_norm": 0.009867621604475532, "learning_rate": 8.118165698091901e-07, "loss": 0.001, "step": 253680 }, { "epoch": 1.6689802174957074, "grad_norm": 0.029368994645688207, "learning_rate": 8.115030020594494e-07, "loss": 0.0003, "step": 253690 }, { "epoch": 1.6690460056709409, "grad_norm": 0.03353421286197332, "learning_rate": 8.111894895314248e-07, "loss": 0.0004, "step": 253700 }, { "epoch": 1.6691117938461741, "grad_norm": 0.010221864453578934, "learning_rate": 8.108760322292486e-07, "loss": 0.0003, "step": 253710 }, { "epoch": 1.6691775820214074, "grad_norm": 0.0358839102766643, "learning_rate": 8.105626301570546e-07, "loss": 0.0008, "step": 253720 }, { "epoch": 1.6692433701966407, "grad_norm": 0.051686868911564855, "learning_rate": 8.102492833189723e-07, "loss": 0.0004, "step": 253730 }, { "epoch": 1.6693091583718742, "grad_norm": 0.013997163453255254, "learning_rate": 8.099359917191362e-07, "loss": 0.0037, "step": 253740 }, { "epoch": 1.6693749465471077, "grad_norm": 0.07200725206962277, "learning_rate": 8.096227553616753e-07, "loss": 0.0006, "step": 253750 }, { "epoch": 1.669440734722341, "grad_norm": 0.028839584006142248, "learning_rate": 8.09309574250719e-07, "loss": 0.0004, "step": 253760 }, { "epoch": 1.6695065228975743, "grad_norm": 0.007220548822358396, "learning_rate": 8.089964483903967e-07, "loss": 0.0003, "step": 253770 }, { "epoch": 1.6695723110728078, "grad_norm": 0.0001500220844378856, "learning_rate": 8.086833777848368e-07, "loss": 0.0003, "step": 253780 }, { "epoch": 1.6696380992480413, "grad_norm": 0.03478080160912151, "learning_rate": 8.083703624381667e-07, "loss": 0.0002, "step": 253790 }, { "epoch": 1.6697038874232746, "grad_norm": 0.007570441881594444, "learning_rate": 8.080574023545135e-07, "loss": 0.0001, "step": 253800 }, { "epoch": 1.6697696755985079, "grad_norm": 0.005412804003902788, "learning_rate": 8.077444975380028e-07, "loss": 0.0005, "step": 253810 }, { "epoch": 1.6698354637737411, "grad_norm": 0.044597755274614424, "learning_rate": 8.074316479927602e-07, "loss": 0.0004, "step": 253820 }, { "epoch": 1.6699012519489747, "grad_norm": 0.06215504593285817, "learning_rate": 8.071188537229102e-07, "loss": 0.001, "step": 253830 }, { "epoch": 1.6699670401242082, "grad_norm": 0.0010128907719540526, "learning_rate": 8.068061147325773e-07, "loss": 0.0002, "step": 253840 }, { "epoch": 1.6700328282994414, "grad_norm": 0.025921585031740975, "learning_rate": 8.064934310258838e-07, "loss": 0.0008, "step": 253850 }, { "epoch": 1.6700986164746747, "grad_norm": 0.0011991476906598778, "learning_rate": 8.061808026069518e-07, "loss": 0.0004, "step": 253860 }, { "epoch": 1.6701644046499082, "grad_norm": 0.022032989835076948, "learning_rate": 8.058682294799053e-07, "loss": 0.0006, "step": 253870 }, { "epoch": 1.6702301928251417, "grad_norm": 0.2324412897591688, "learning_rate": 8.055557116488638e-07, "loss": 0.0007, "step": 253880 }, { "epoch": 1.670295981000375, "grad_norm": 0.04255864683445427, "learning_rate": 8.052432491179473e-07, "loss": 0.0007, "step": 253890 }, { "epoch": 1.6703617691756083, "grad_norm": 0.0071598419181330775, "learning_rate": 8.04930841891276e-07, "loss": 0.0001, "step": 253900 }, { "epoch": 1.6704275573508418, "grad_norm": 0.01763279472370769, "learning_rate": 8.046184899729687e-07, "loss": 0.0004, "step": 253910 }, { "epoch": 1.670493345526075, "grad_norm": 0.022187051202371532, "learning_rate": 8.043061933671425e-07, "loss": 0.0005, "step": 253920 }, { "epoch": 1.6705591337013086, "grad_norm": 0.007733151750824437, "learning_rate": 8.03993952077916e-07, "loss": 0.0006, "step": 253930 }, { "epoch": 1.6706249218765419, "grad_norm": 0.005739945100199197, "learning_rate": 8.036817661094054e-07, "loss": 0.0005, "step": 253940 }, { "epoch": 1.6706907100517752, "grad_norm": 0.006603829114909709, "learning_rate": 8.033696354657266e-07, "loss": 0.0006, "step": 253950 }, { "epoch": 1.6707564982270087, "grad_norm": 0.004716131165327407, "learning_rate": 8.030575601509943e-07, "loss": 0.0005, "step": 253960 }, { "epoch": 1.6708222864022422, "grad_norm": 0.01227326015729133, "learning_rate": 8.027455401693241e-07, "loss": 0.0006, "step": 253970 }, { "epoch": 1.6708880745774755, "grad_norm": 0.0005894262882047891, "learning_rate": 8.02433575524828e-07, "loss": 0.0002, "step": 253980 }, { "epoch": 1.6709538627527087, "grad_norm": 0.00022444949103561803, "learning_rate": 8.021216662216191e-07, "loss": 0.0006, "step": 253990 }, { "epoch": 1.6710196509279422, "grad_norm": 0.02078157557793859, "learning_rate": 8.018098122638123e-07, "loss": 0.0005, "step": 254000 }, { "epoch": 1.6710854391031758, "grad_norm": 0.01603344730082246, "learning_rate": 8.014980136555167e-07, "loss": 0.0006, "step": 254010 }, { "epoch": 1.671151227278409, "grad_norm": 0.12423765150454766, "learning_rate": 8.011862704008438e-07, "loss": 0.0008, "step": 254020 }, { "epoch": 1.6712170154536423, "grad_norm": 0.017538227125720226, "learning_rate": 8.008745825039039e-07, "loss": 0.0002, "step": 254030 }, { "epoch": 1.6712828036288756, "grad_norm": 0.01571281074055568, "learning_rate": 8.005629499688055e-07, "loss": 0.0007, "step": 254040 }, { "epoch": 1.671348591804109, "grad_norm": 0.018492442689316595, "learning_rate": 8.002513727996581e-07, "loss": 0.0003, "step": 254050 }, { "epoch": 1.6714143799793426, "grad_norm": 0.03730783869207707, "learning_rate": 7.99939851000569e-07, "loss": 0.0001, "step": 254060 }, { "epoch": 1.671480168154576, "grad_norm": 0.06218186645078713, "learning_rate": 7.996283845756458e-07, "loss": 0.0003, "step": 254070 }, { "epoch": 1.6715459563298092, "grad_norm": 0.030707091784548595, "learning_rate": 7.993169735289946e-07, "loss": 0.0005, "step": 254080 }, { "epoch": 1.6716117445050427, "grad_norm": 0.005394225962403351, "learning_rate": 7.990056178647216e-07, "loss": 0.0004, "step": 254090 }, { "epoch": 1.6716775326802762, "grad_norm": 0.04725865186671869, "learning_rate": 7.986943175869305e-07, "loss": 0.004, "step": 254100 }, { "epoch": 1.6717433208555095, "grad_norm": 0.02956159863103344, "learning_rate": 7.983830726997266e-07, "loss": 0.0009, "step": 254110 }, { "epoch": 1.6718091090307428, "grad_norm": 0.0017909629151414916, "learning_rate": 7.980718832072121e-07, "loss": 0.0004, "step": 254120 }, { "epoch": 1.671874897205976, "grad_norm": 0.026801904488560582, "learning_rate": 7.97760749113492e-07, "loss": 0.0004, "step": 254130 }, { "epoch": 1.6719406853812095, "grad_norm": 0.020472880570793366, "learning_rate": 7.97449670422667e-07, "loss": 0.0007, "step": 254140 }, { "epoch": 1.672006473556443, "grad_norm": 0.011681726358206527, "learning_rate": 7.971386471388387e-07, "loss": 0.0004, "step": 254150 }, { "epoch": 1.6720722617316763, "grad_norm": 0.18051717318613605, "learning_rate": 7.968276792661072e-07, "loss": 0.0011, "step": 254160 }, { "epoch": 1.6721380499069096, "grad_norm": 0.0013770330595655386, "learning_rate": 7.965167668085727e-07, "loss": 0.0007, "step": 254170 }, { "epoch": 1.6722038380821431, "grad_norm": 0.02390471813478774, "learning_rate": 7.962059097703345e-07, "loss": 0.0005, "step": 254180 }, { "epoch": 1.6722696262573766, "grad_norm": 0.034718119839899496, "learning_rate": 7.958951081554905e-07, "loss": 0.0008, "step": 254190 }, { "epoch": 1.67233541443261, "grad_norm": 0.009075854316988222, "learning_rate": 7.955843619681391e-07, "loss": 0.0004, "step": 254200 }, { "epoch": 1.6724012026078432, "grad_norm": 0.02753834337287087, "learning_rate": 7.952736712123777e-07, "loss": 0.0007, "step": 254210 }, { "epoch": 1.6724669907830767, "grad_norm": 0.015118989813205391, "learning_rate": 7.949630358922999e-07, "loss": 0.0003, "step": 254220 }, { "epoch": 1.67253277895831, "grad_norm": 0.05090501461890537, "learning_rate": 7.946524560120017e-07, "loss": 0.0007, "step": 254230 }, { "epoch": 1.6725985671335435, "grad_norm": 0.010327866159972623, "learning_rate": 7.943419315755802e-07, "loss": 0.0008, "step": 254240 }, { "epoch": 1.6726643553087768, "grad_norm": 0.0427873583534764, "learning_rate": 7.940314625871281e-07, "loss": 0.0004, "step": 254250 }, { "epoch": 1.67273014348401, "grad_norm": 0.019363402098275306, "learning_rate": 7.937210490507386e-07, "loss": 0.0006, "step": 254260 }, { "epoch": 1.6727959316592436, "grad_norm": 0.008663620795192844, "learning_rate": 7.934106909705048e-07, "loss": 0.0004, "step": 254270 }, { "epoch": 1.672861719834477, "grad_norm": 0.0005090597636301613, "learning_rate": 7.931003883505173e-07, "loss": 0.001, "step": 254280 }, { "epoch": 1.6729275080097104, "grad_norm": 0.016646893971643425, "learning_rate": 7.92790141194868e-07, "loss": 0.0006, "step": 254290 }, { "epoch": 1.6729932961849436, "grad_norm": 0.004699289308731807, "learning_rate": 7.924799495076474e-07, "loss": 0.0005, "step": 254300 }, { "epoch": 1.6730590843601771, "grad_norm": 0.07547889050184603, "learning_rate": 7.921698132929445e-07, "loss": 0.0006, "step": 254310 }, { "epoch": 1.6731248725354106, "grad_norm": 0.057146306188789915, "learning_rate": 7.918597325548489e-07, "loss": 0.0005, "step": 254320 }, { "epoch": 1.673190660710644, "grad_norm": 0.028920110102358572, "learning_rate": 7.915497072974476e-07, "loss": 0.0003, "step": 254330 }, { "epoch": 1.6732564488858772, "grad_norm": 0.00474301912978703, "learning_rate": 7.912397375248293e-07, "loss": 0.0003, "step": 254340 }, { "epoch": 1.6733222370611105, "grad_norm": 0.0033337778871578134, "learning_rate": 7.909298232410801e-07, "loss": 0.0004, "step": 254350 }, { "epoch": 1.673388025236344, "grad_norm": 0.03913395963554426, "learning_rate": 7.906199644502849e-07, "loss": 0.0006, "step": 254360 }, { "epoch": 1.6734538134115775, "grad_norm": 0.00796361831195685, "learning_rate": 7.903101611565311e-07, "loss": 0.0004, "step": 254370 }, { "epoch": 1.6735196015868108, "grad_norm": 0.032583279845292475, "learning_rate": 7.900004133639016e-07, "loss": 0.0005, "step": 254380 }, { "epoch": 1.673585389762044, "grad_norm": 0.037012710739343056, "learning_rate": 7.896907210764815e-07, "loss": 0.0003, "step": 254390 }, { "epoch": 1.6736511779372776, "grad_norm": 0.018183825643963026, "learning_rate": 7.893810842983523e-07, "loss": 0.0008, "step": 254400 }, { "epoch": 1.673716966112511, "grad_norm": 0.04724284979129996, "learning_rate": 7.890715030335977e-07, "loss": 0.0004, "step": 254410 }, { "epoch": 1.6737827542877444, "grad_norm": 0.012507425857157483, "learning_rate": 7.887619772862981e-07, "loss": 0.0007, "step": 254420 }, { "epoch": 1.6738485424629777, "grad_norm": 0.03847942890269805, "learning_rate": 7.884525070605342e-07, "loss": 0.0005, "step": 254430 }, { "epoch": 1.673914330638211, "grad_norm": 0.0003612254861278964, "learning_rate": 7.881430923603878e-07, "loss": 0.0005, "step": 254440 }, { "epoch": 1.6739801188134444, "grad_norm": 0.0003840886705353985, "learning_rate": 7.878337331899361e-07, "loss": 0.0004, "step": 254450 }, { "epoch": 1.674045906988678, "grad_norm": 0.04152806811216667, "learning_rate": 7.875244295532591e-07, "loss": 0.0005, "step": 254460 }, { "epoch": 1.6741116951639112, "grad_norm": 0.05002253825660048, "learning_rate": 7.872151814544348e-07, "loss": 0.0006, "step": 254470 }, { "epoch": 1.6741774833391445, "grad_norm": 0.046360319151996544, "learning_rate": 7.869059888975395e-07, "loss": 0.0004, "step": 254480 }, { "epoch": 1.674243271514378, "grad_norm": 0.0002484543005748174, "learning_rate": 7.86596851886649e-07, "loss": 0.0013, "step": 254490 }, { "epoch": 1.6743090596896115, "grad_norm": 0.007256701514467822, "learning_rate": 7.862877704258415e-07, "loss": 0.001, "step": 254500 }, { "epoch": 1.6743748478648448, "grad_norm": 0.014073771076553231, "learning_rate": 7.859787445191902e-07, "loss": 0.0004, "step": 254510 }, { "epoch": 1.674440636040078, "grad_norm": 0.029959458514056833, "learning_rate": 7.856697741707698e-07, "loss": 0.0004, "step": 254520 }, { "epoch": 1.6745064242153116, "grad_norm": 0.06492945811231024, "learning_rate": 7.853608593846535e-07, "loss": 0.0005, "step": 254530 }, { "epoch": 1.6745722123905449, "grad_norm": 0.0480036631993628, "learning_rate": 7.850520001649148e-07, "loss": 0.0003, "step": 254540 }, { "epoch": 1.6746380005657784, "grad_norm": 0.023802932442882122, "learning_rate": 7.847431965156244e-07, "loss": 0.0004, "step": 254550 }, { "epoch": 1.6747037887410117, "grad_norm": 0.030663411168399886, "learning_rate": 7.84434448440855e-07, "loss": 0.0004, "step": 254560 }, { "epoch": 1.674769576916245, "grad_norm": 0.011916748639827099, "learning_rate": 7.841257559446769e-07, "loss": 0.0002, "step": 254570 }, { "epoch": 1.6748353650914785, "grad_norm": 0.03219996188246866, "learning_rate": 7.838171190311589e-07, "loss": 0.0001, "step": 254580 }, { "epoch": 1.674901153266712, "grad_norm": 0.012956579849940942, "learning_rate": 7.83508537704371e-07, "loss": 0.0003, "step": 254590 }, { "epoch": 1.6749669414419452, "grad_norm": 0.06563317985011627, "learning_rate": 7.832000119683819e-07, "loss": 0.0005, "step": 254600 }, { "epoch": 1.6750327296171785, "grad_norm": 0.0018542826189253143, "learning_rate": 7.828915418272581e-07, "loss": 0.0002, "step": 254610 }, { "epoch": 1.675098517792412, "grad_norm": 0.023020168197113937, "learning_rate": 7.825831272850664e-07, "loss": 0.0003, "step": 254620 }, { "epoch": 1.6751643059676455, "grad_norm": 0.00027427739080884515, "learning_rate": 7.822747683458754e-07, "loss": 0.0002, "step": 254630 }, { "epoch": 1.6752300941428788, "grad_norm": 0.05803604210505738, "learning_rate": 7.819664650137482e-07, "loss": 0.0004, "step": 254640 }, { "epoch": 1.675295882318112, "grad_norm": 0.020490221961316935, "learning_rate": 7.816582172927506e-07, "loss": 0.0002, "step": 254650 }, { "epoch": 1.6753616704933454, "grad_norm": 0.01747245866921799, "learning_rate": 7.81350025186946e-07, "loss": 0.0004, "step": 254660 }, { "epoch": 1.675427458668579, "grad_norm": 0.021193744865656852, "learning_rate": 7.810418887003979e-07, "loss": 0.0002, "step": 254670 }, { "epoch": 1.6754932468438124, "grad_norm": 0.012242769985894623, "learning_rate": 7.807338078371691e-07, "loss": 0.0002, "step": 254680 }, { "epoch": 1.6755590350190457, "grad_norm": 0.0003392701034557658, "learning_rate": 7.804257826013207e-07, "loss": 0.0003, "step": 254690 }, { "epoch": 1.675624823194279, "grad_norm": 0.013011238851679405, "learning_rate": 7.801178129969139e-07, "loss": 0.0005, "step": 254700 }, { "epoch": 1.6756906113695125, "grad_norm": 0.14043765738024153, "learning_rate": 7.798098990280095e-07, "loss": 0.001, "step": 254710 }, { "epoch": 1.675756399544746, "grad_norm": 0.06023939422973363, "learning_rate": 7.79502040698667e-07, "loss": 0.0002, "step": 254720 }, { "epoch": 1.6758221877199793, "grad_norm": 0.0005197301163674269, "learning_rate": 7.791942380129447e-07, "loss": 0.0004, "step": 254730 }, { "epoch": 1.6758879758952125, "grad_norm": 0.01563580615748013, "learning_rate": 7.788864909749011e-07, "loss": 0.0006, "step": 254740 }, { "epoch": 1.6759537640704458, "grad_norm": 0.03217134504447177, "learning_rate": 7.785787995885924e-07, "loss": 0.0003, "step": 254750 }, { "epoch": 1.6760195522456793, "grad_norm": 0.030620458589341835, "learning_rate": 7.78271163858077e-07, "loss": 0.0004, "step": 254760 }, { "epoch": 1.6760853404209128, "grad_norm": 0.02409534794788589, "learning_rate": 7.779635837874111e-07, "loss": 0.0005, "step": 254770 }, { "epoch": 1.6761511285961461, "grad_norm": 0.05440653214074151, "learning_rate": 7.776560593806482e-07, "loss": 0.0004, "step": 254780 }, { "epoch": 1.6762169167713794, "grad_norm": 0.021902248756349094, "learning_rate": 7.773485906418438e-07, "loss": 0.0003, "step": 254790 }, { "epoch": 1.676282704946613, "grad_norm": 0.0015634160488491218, "learning_rate": 7.770411775750514e-07, "loss": 0.0003, "step": 254800 }, { "epoch": 1.6763484931218464, "grad_norm": 0.0015774479524962122, "learning_rate": 7.767338201843239e-07, "loss": 0.0003, "step": 254810 }, { "epoch": 1.6764142812970797, "grad_norm": 0.004016103509526708, "learning_rate": 7.764265184737135e-07, "loss": 0.0003, "step": 254820 }, { "epoch": 1.676480069472313, "grad_norm": 0.0290274632069224, "learning_rate": 7.761192724472716e-07, "loss": 0.0004, "step": 254830 }, { "epoch": 1.6765458576475463, "grad_norm": 0.002932104791753123, "learning_rate": 7.758120821090492e-07, "loss": 0.0003, "step": 254840 }, { "epoch": 1.6766116458227798, "grad_norm": 0.1184759931632183, "learning_rate": 7.755049474630966e-07, "loss": 0.0005, "step": 254850 }, { "epoch": 1.6766774339980133, "grad_norm": 0.0004350552556654328, "learning_rate": 7.751978685134626e-07, "loss": 0.0004, "step": 254860 }, { "epoch": 1.6767432221732466, "grad_norm": 0.05194576687895405, "learning_rate": 7.748908452641956e-07, "loss": 0.001, "step": 254870 }, { "epoch": 1.6768090103484798, "grad_norm": 0.00302423227632859, "learning_rate": 7.745838777193427e-07, "loss": 0.0009, "step": 254880 }, { "epoch": 1.6768747985237134, "grad_norm": 0.004799209159274479, "learning_rate": 7.74276965882953e-07, "loss": 0.0005, "step": 254890 }, { "epoch": 1.6769405866989469, "grad_norm": 0.02775327114465577, "learning_rate": 7.739701097590724e-07, "loss": 0.0003, "step": 254900 }, { "epoch": 1.6770063748741801, "grad_norm": 0.00029593401444681216, "learning_rate": 7.73663309351746e-07, "loss": 0.0003, "step": 254910 }, { "epoch": 1.6770721630494134, "grad_norm": 0.025393239074635435, "learning_rate": 7.733565646650188e-07, "loss": 0.0004, "step": 254920 }, { "epoch": 1.677137951224647, "grad_norm": 0.03232549351867278, "learning_rate": 7.730498757029348e-07, "loss": 0.0002, "step": 254930 }, { "epoch": 1.6772037393998802, "grad_norm": 0.0013958326732479898, "learning_rate": 7.727432424695374e-07, "loss": 0.0004, "step": 254940 }, { "epoch": 1.6772695275751137, "grad_norm": 0.010465221387115092, "learning_rate": 7.724366649688691e-07, "loss": 0.0004, "step": 254950 }, { "epoch": 1.677335315750347, "grad_norm": 0.002830295286081054, "learning_rate": 7.721301432049732e-07, "loss": 0.0003, "step": 254960 }, { "epoch": 1.6774011039255803, "grad_norm": 0.01435110391246171, "learning_rate": 7.718236771818893e-07, "loss": 0.0006, "step": 254970 }, { "epoch": 1.6774668921008138, "grad_norm": 0.02888548364396833, "learning_rate": 7.715172669036586e-07, "loss": 0.0005, "step": 254980 }, { "epoch": 1.6775326802760473, "grad_norm": 0.04092205677587826, "learning_rate": 7.712109123743206e-07, "loss": 0.0003, "step": 254990 }, { "epoch": 1.6775984684512806, "grad_norm": 0.00793193077008568, "learning_rate": 7.709046135979148e-07, "loss": 0.0004, "step": 255000 }, { "epoch": 1.6776642566265139, "grad_norm": 0.06515394202944724, "learning_rate": 7.705983705784781e-07, "loss": 0.0005, "step": 255010 }, { "epoch": 1.6777300448017474, "grad_norm": 0.0006335344833448834, "learning_rate": 7.7029218332005e-07, "loss": 0.0004, "step": 255020 }, { "epoch": 1.6777958329769809, "grad_norm": 0.06450999625769217, "learning_rate": 7.699860518266666e-07, "loss": 0.0006, "step": 255030 }, { "epoch": 1.6778616211522142, "grad_norm": 0.008445549550055899, "learning_rate": 7.696799761023637e-07, "loss": 0.0003, "step": 255040 }, { "epoch": 1.6779274093274474, "grad_norm": 0.02208557345351788, "learning_rate": 7.69373956151177e-07, "loss": 0.0006, "step": 255050 }, { "epoch": 1.6779931975026807, "grad_norm": 0.03522625838238451, "learning_rate": 7.69067991977141e-07, "loss": 0.0003, "step": 255060 }, { "epoch": 1.6780589856779142, "grad_norm": 0.16217478820502465, "learning_rate": 7.687620835842891e-07, "loss": 0.0004, "step": 255070 }, { "epoch": 1.6781247738531477, "grad_norm": 0.00021097408752237593, "learning_rate": 7.684562309766547e-07, "loss": 0.0006, "step": 255080 }, { "epoch": 1.678190562028381, "grad_norm": 0.001792251716537257, "learning_rate": 7.681504341582707e-07, "loss": 0.0002, "step": 255090 }, { "epoch": 1.6782563502036143, "grad_norm": 6.78187828731939e-05, "learning_rate": 7.678446931331679e-07, "loss": 0.0006, "step": 255100 }, { "epoch": 1.6783221383788478, "grad_norm": 0.005194230231862004, "learning_rate": 7.67539007905378e-07, "loss": 0.0009, "step": 255110 }, { "epoch": 1.6783879265540813, "grad_norm": 0.009158226581185962, "learning_rate": 7.67233378478931e-07, "loss": 0.0002, "step": 255120 }, { "epoch": 1.6784537147293146, "grad_norm": 0.010484977978648865, "learning_rate": 7.66927804857856e-07, "loss": 0.0004, "step": 255130 }, { "epoch": 1.6785195029045479, "grad_norm": 0.019620261665100655, "learning_rate": 7.666222870461809e-07, "loss": 0.0005, "step": 255140 }, { "epoch": 1.6785852910797812, "grad_norm": 0.03216814868987072, "learning_rate": 7.663168250479358e-07, "loss": 0.0003, "step": 255150 }, { "epoch": 1.6786510792550147, "grad_norm": 0.010128353678466779, "learning_rate": 7.660114188671475e-07, "loss": 0.0001, "step": 255160 }, { "epoch": 1.6787168674302482, "grad_norm": 0.05699221317553699, "learning_rate": 7.657060685078411e-07, "loss": 0.0007, "step": 255170 }, { "epoch": 1.6787826556054815, "grad_norm": 0.07563071735563416, "learning_rate": 7.654007739740438e-07, "loss": 0.0005, "step": 255180 }, { "epoch": 1.6788484437807147, "grad_norm": 0.013568839615386389, "learning_rate": 7.650955352697798e-07, "loss": 0.0006, "step": 255190 }, { "epoch": 1.6789142319559482, "grad_norm": 0.00019537340224535528, "learning_rate": 7.647903523990736e-07, "loss": 0.0003, "step": 255200 }, { "epoch": 1.6789800201311817, "grad_norm": 0.0034520338443420596, "learning_rate": 7.644852253659491e-07, "loss": 0.0005, "step": 255210 }, { "epoch": 1.679045808306415, "grad_norm": 0.021668725642984596, "learning_rate": 7.641801541744287e-07, "loss": 0.0004, "step": 255220 }, { "epoch": 1.6791115964816483, "grad_norm": 0.01735034914190104, "learning_rate": 7.638751388285348e-07, "loss": 0.0005, "step": 255230 }, { "epoch": 1.6791773846568818, "grad_norm": 0.0007387248866309945, "learning_rate": 7.635701793322881e-07, "loss": 0.0003, "step": 255240 }, { "epoch": 1.679243172832115, "grad_norm": 0.03250602746485062, "learning_rate": 7.632652756897102e-07, "loss": 0.001, "step": 255250 }, { "epoch": 1.6793089610073486, "grad_norm": 0.012132470836777409, "learning_rate": 7.629604279048209e-07, "loss": 0.0007, "step": 255260 }, { "epoch": 1.679374749182582, "grad_norm": 0.014674573226796585, "learning_rate": 7.626556359816373e-07, "loss": 0.0005, "step": 255270 }, { "epoch": 1.6794405373578152, "grad_norm": 0.02445444697128179, "learning_rate": 7.623508999241808e-07, "loss": 0.0003, "step": 255280 }, { "epoch": 1.6795063255330487, "grad_norm": 0.02618828210361987, "learning_rate": 7.620462197364681e-07, "loss": 0.0002, "step": 255290 }, { "epoch": 1.6795721137082822, "grad_norm": 0.0031172676915574207, "learning_rate": 7.617415954225154e-07, "loss": 0.0002, "step": 255300 }, { "epoch": 1.6796379018835155, "grad_norm": 0.08178475909363225, "learning_rate": 7.614370269863397e-07, "loss": 0.0004, "step": 255310 }, { "epoch": 1.6797036900587488, "grad_norm": 0.039140931459586255, "learning_rate": 7.611325144319559e-07, "loss": 0.0011, "step": 255320 }, { "epoch": 1.6797694782339823, "grad_norm": 0.11935968425037198, "learning_rate": 7.608280577633792e-07, "loss": 0.001, "step": 255330 }, { "epoch": 1.6798352664092158, "grad_norm": 0.009548724631722544, "learning_rate": 7.605236569846236e-07, "loss": 0.0004, "step": 255340 }, { "epoch": 1.679901054584449, "grad_norm": 0.020273677220288045, "learning_rate": 7.602193120997015e-07, "loss": 0.0005, "step": 255350 }, { "epoch": 1.6799668427596823, "grad_norm": 0.020545872071333744, "learning_rate": 7.599150231126268e-07, "loss": 0.0003, "step": 255360 }, { "epoch": 1.6800326309349156, "grad_norm": 0.040092395334781684, "learning_rate": 7.596107900274096e-07, "loss": 0.0004, "step": 255370 }, { "epoch": 1.6800984191101491, "grad_norm": 0.048634387404593235, "learning_rate": 7.593066128480625e-07, "loss": 0.0004, "step": 255380 }, { "epoch": 1.6801642072853826, "grad_norm": 0.027631875740676184, "learning_rate": 7.590024915785954e-07, "loss": 0.0007, "step": 255390 }, { "epoch": 1.680229995460616, "grad_norm": 0.004213102025232371, "learning_rate": 7.586984262230163e-07, "loss": 0.0002, "step": 255400 }, { "epoch": 1.6802957836358492, "grad_norm": 0.010705313491934331, "learning_rate": 7.583944167853363e-07, "loss": 0.0002, "step": 255410 }, { "epoch": 1.6803615718110827, "grad_norm": 0.014924626779710187, "learning_rate": 7.58090463269563e-07, "loss": 0.0007, "step": 255420 }, { "epoch": 1.6804273599863162, "grad_norm": 0.02862339973268061, "learning_rate": 7.577865656797034e-07, "loss": 0.0003, "step": 255430 }, { "epoch": 1.6804931481615495, "grad_norm": 0.027394826975507953, "learning_rate": 7.574827240197636e-07, "loss": 0.0004, "step": 255440 }, { "epoch": 1.6805589363367828, "grad_norm": 0.07805481634335465, "learning_rate": 7.571789382937506e-07, "loss": 0.0009, "step": 255450 }, { "epoch": 1.680624724512016, "grad_norm": 0.0249100237020644, "learning_rate": 7.568752085056686e-07, "loss": 0.0002, "step": 255460 }, { "epoch": 1.6806905126872496, "grad_norm": 0.02976788343458552, "learning_rate": 7.565715346595226e-07, "loss": 0.0002, "step": 255470 }, { "epoch": 1.680756300862483, "grad_norm": 0.0007547758066317958, "learning_rate": 7.562679167593162e-07, "loss": 0.0009, "step": 255480 }, { "epoch": 1.6808220890377163, "grad_norm": 0.013717971127289107, "learning_rate": 7.559643548090517e-07, "loss": 0.0003, "step": 255490 }, { "epoch": 1.6808878772129496, "grad_norm": 0.028858244745852087, "learning_rate": 7.55660848812732e-07, "loss": 0.0004, "step": 255500 }, { "epoch": 1.6809536653881831, "grad_norm": 0.04697441661661379, "learning_rate": 7.553573987743584e-07, "loss": 0.0005, "step": 255510 }, { "epoch": 1.6810194535634166, "grad_norm": 0.04683123094511607, "learning_rate": 7.550540046979315e-07, "loss": 0.0003, "step": 255520 }, { "epoch": 1.68108524173865, "grad_norm": 0.0051053417560698615, "learning_rate": 7.547506665874504e-07, "loss": 0.0004, "step": 255530 }, { "epoch": 1.6811510299138832, "grad_norm": 0.013484021497660533, "learning_rate": 7.544473844469164e-07, "loss": 0.0004, "step": 255540 }, { "epoch": 1.6812168180891167, "grad_norm": 0.001225688799685354, "learning_rate": 7.541441582803266e-07, "loss": 0.0004, "step": 255550 }, { "epoch": 1.68128260626435, "grad_norm": 0.028479199896299583, "learning_rate": 7.538409880916797e-07, "loss": 0.0005, "step": 255560 }, { "epoch": 1.6813483944395835, "grad_norm": 0.10015959944991029, "learning_rate": 7.535378738849719e-07, "loss": 0.0005, "step": 255570 }, { "epoch": 1.6814141826148168, "grad_norm": 0.09773763461324898, "learning_rate": 7.532348156641994e-07, "loss": 0.0009, "step": 255580 }, { "epoch": 1.68147997079005, "grad_norm": 0.017414707974553488, "learning_rate": 7.529318134333585e-07, "loss": 0.0009, "step": 255590 }, { "epoch": 1.6815457589652836, "grad_norm": 0.025074494362531567, "learning_rate": 7.526288671964438e-07, "loss": 0.0006, "step": 255600 }, { "epoch": 1.681611547140517, "grad_norm": 0.02673878596494688, "learning_rate": 7.523259769574486e-07, "loss": 0.0003, "step": 255610 }, { "epoch": 1.6816773353157504, "grad_norm": 0.032987730298093446, "learning_rate": 7.520231427203678e-07, "loss": 0.0006, "step": 255620 }, { "epoch": 1.6817431234909836, "grad_norm": 0.00043131197288764423, "learning_rate": 7.517203644891923e-07, "loss": 0.0003, "step": 255630 }, { "epoch": 1.6818089116662172, "grad_norm": 0.02831337212232384, "learning_rate": 7.514176422679148e-07, "loss": 0.0002, "step": 255640 }, { "epoch": 1.6818746998414507, "grad_norm": 0.0118551753453966, "learning_rate": 7.511149760605269e-07, "loss": 0.0002, "step": 255650 }, { "epoch": 1.681940488016684, "grad_norm": 0.03611098578335944, "learning_rate": 7.508123658710165e-07, "loss": 0.0003, "step": 255660 }, { "epoch": 1.6820062761919172, "grad_norm": 0.08348127021393598, "learning_rate": 7.505098117033771e-07, "loss": 0.0003, "step": 255670 }, { "epoch": 1.6820720643671505, "grad_norm": 0.003147539088825314, "learning_rate": 7.50207313561595e-07, "loss": 0.0005, "step": 255680 }, { "epoch": 1.682137852542384, "grad_norm": 0.015197493923735175, "learning_rate": 7.499048714496598e-07, "loss": 0.0004, "step": 255690 }, { "epoch": 1.6822036407176175, "grad_norm": 0.005656949733298403, "learning_rate": 7.496024853715578e-07, "loss": 0.0005, "step": 255700 }, { "epoch": 1.6822694288928508, "grad_norm": 0.007725639359612747, "learning_rate": 7.493001553312768e-07, "loss": 0.0002, "step": 255710 }, { "epoch": 1.682335217068084, "grad_norm": 0.011637227732339033, "learning_rate": 7.489978813328013e-07, "loss": 0.0002, "step": 255720 }, { "epoch": 1.6824010052433176, "grad_norm": 0.08251921717853603, "learning_rate": 7.486956633801179e-07, "loss": 0.0006, "step": 255730 }, { "epoch": 1.682466793418551, "grad_norm": 0.014668104505121298, "learning_rate": 7.483935014772104e-07, "loss": 0.0005, "step": 255740 }, { "epoch": 1.6825325815937844, "grad_norm": 0.03206361690371941, "learning_rate": 7.480913956280622e-07, "loss": 0.0005, "step": 255750 }, { "epoch": 1.6825983697690177, "grad_norm": 0.007466431981653737, "learning_rate": 7.47789345836657e-07, "loss": 0.0004, "step": 255760 }, { "epoch": 1.682664157944251, "grad_norm": 0.0006783945690014265, "learning_rate": 7.47487352106977e-07, "loss": 0.0001, "step": 255770 }, { "epoch": 1.6827299461194845, "grad_norm": 0.0371526721822776, "learning_rate": 7.471854144430035e-07, "loss": 0.0005, "step": 255780 }, { "epoch": 1.682795734294718, "grad_norm": 0.039451249012589444, "learning_rate": 7.468835328487162e-07, "loss": 0.0006, "step": 255790 }, { "epoch": 1.6828615224699512, "grad_norm": 0.03068978149410282, "learning_rate": 7.465817073280974e-07, "loss": 0.0002, "step": 255800 }, { "epoch": 1.6829273106451845, "grad_norm": 0.1192281676146496, "learning_rate": 7.462799378851254e-07, "loss": 0.0016, "step": 255810 }, { "epoch": 1.682993098820418, "grad_norm": 0.00858897591470334, "learning_rate": 7.459782245237784e-07, "loss": 0.0003, "step": 255820 }, { "epoch": 1.6830588869956515, "grad_norm": 0.015772877663771768, "learning_rate": 7.456765672480348e-07, "loss": 0.0003, "step": 255830 }, { "epoch": 1.6831246751708848, "grad_norm": 0.02940815582800422, "learning_rate": 7.453749660618714e-07, "loss": 0.0008, "step": 255840 }, { "epoch": 1.683190463346118, "grad_norm": 0.04550261620031839, "learning_rate": 7.450734209692645e-07, "loss": 0.0005, "step": 255850 }, { "epoch": 1.6832562515213514, "grad_norm": 0.016781630376516362, "learning_rate": 7.447719319741892e-07, "loss": 0.0004, "step": 255860 }, { "epoch": 1.683322039696585, "grad_norm": 0.017606419439377203, "learning_rate": 7.444704990806217e-07, "loss": 0.0002, "step": 255870 }, { "epoch": 1.6833878278718184, "grad_norm": 0.018634132553632724, "learning_rate": 7.441691222925351e-07, "loss": 0.0012, "step": 255880 }, { "epoch": 1.6834536160470517, "grad_norm": 0.019337897430941237, "learning_rate": 7.438678016139028e-07, "loss": 0.0002, "step": 255890 }, { "epoch": 1.683519404222285, "grad_norm": 0.02315000263139655, "learning_rate": 7.435665370486977e-07, "loss": 0.0004, "step": 255900 }, { "epoch": 1.6835851923975185, "grad_norm": 0.02920957309505898, "learning_rate": 7.432653286008917e-07, "loss": 0.0004, "step": 255910 }, { "epoch": 1.683650980572752, "grad_norm": 0.03965643547261796, "learning_rate": 7.429641762744555e-07, "loss": 0.0005, "step": 255920 }, { "epoch": 1.6837167687479853, "grad_norm": 0.005326413910191659, "learning_rate": 7.426630800733603e-07, "loss": 0.0003, "step": 255930 }, { "epoch": 1.6837825569232185, "grad_norm": 0.02696196165029706, "learning_rate": 7.423620400015757e-07, "loss": 0.0004, "step": 255940 }, { "epoch": 1.683848345098452, "grad_norm": 0.021620339949442704, "learning_rate": 7.420610560630708e-07, "loss": 0.0005, "step": 255950 }, { "epoch": 1.6839141332736856, "grad_norm": 0.015915143426833853, "learning_rate": 7.417601282618137e-07, "loss": 0.0003, "step": 255960 }, { "epoch": 1.6839799214489188, "grad_norm": 0.004703548506179206, "learning_rate": 7.414592566017709e-07, "loss": 0.0009, "step": 255970 }, { "epoch": 1.6840457096241521, "grad_norm": 0.030162658051301347, "learning_rate": 7.411584410869105e-07, "loss": 0.0003, "step": 255980 }, { "epoch": 1.6841114977993854, "grad_norm": 0.01700976628858406, "learning_rate": 7.408576817211976e-07, "loss": 0.001, "step": 255990 }, { "epoch": 1.684177285974619, "grad_norm": 0.03830617952403117, "learning_rate": 7.405569785085975e-07, "loss": 0.0002, "step": 256000 }, { "epoch": 1.6842430741498524, "grad_norm": 0.056319434868628254, "learning_rate": 7.402563314530753e-07, "loss": 0.0014, "step": 256010 }, { "epoch": 1.6843088623250857, "grad_norm": 0.016994749280721724, "learning_rate": 7.399557405585944e-07, "loss": 0.0005, "step": 256020 }, { "epoch": 1.684374650500319, "grad_norm": 0.041009506974407875, "learning_rate": 7.396552058291179e-07, "loss": 0.0007, "step": 256030 }, { "epoch": 1.6844404386755525, "grad_norm": 0.010695677742481098, "learning_rate": 7.39354727268608e-07, "loss": 0.0003, "step": 256040 }, { "epoch": 1.684506226850786, "grad_norm": 0.029258984894159595, "learning_rate": 7.39054304881025e-07, "loss": 0.0005, "step": 256050 }, { "epoch": 1.6845720150260193, "grad_norm": 0.017599794297072088, "learning_rate": 7.387539386703324e-07, "loss": 0.0006, "step": 256060 }, { "epoch": 1.6846378032012526, "grad_norm": 0.026382951370642695, "learning_rate": 7.384536286404887e-07, "loss": 0.0004, "step": 256070 }, { "epoch": 1.6847035913764858, "grad_norm": 0.05116400246656881, "learning_rate": 7.381533747954533e-07, "loss": 0.0003, "step": 256080 }, { "epoch": 1.6847693795517193, "grad_norm": 0.08120839970822961, "learning_rate": 7.378531771391845e-07, "loss": 0.0005, "step": 256090 }, { "epoch": 1.6848351677269529, "grad_norm": 0.4205087274816561, "learning_rate": 7.375530356756411e-07, "loss": 0.0025, "step": 256100 }, { "epoch": 1.6849009559021861, "grad_norm": 0.03960585614751515, "learning_rate": 7.372529504087806e-07, "loss": 0.0006, "step": 256110 }, { "epoch": 1.6849667440774194, "grad_norm": 0.005623109937420472, "learning_rate": 7.369529213425575e-07, "loss": 0.0003, "step": 256120 }, { "epoch": 1.685032532252653, "grad_norm": 0.02274694778834176, "learning_rate": 7.366529484809281e-07, "loss": 0.0004, "step": 256130 }, { "epoch": 1.6850983204278864, "grad_norm": 0.021923700872866998, "learning_rate": 7.363530318278478e-07, "loss": 0.0007, "step": 256140 }, { "epoch": 1.6851641086031197, "grad_norm": 0.06580700066748008, "learning_rate": 7.360531713872687e-07, "loss": 0.0002, "step": 256150 }, { "epoch": 1.685229896778353, "grad_norm": 0.02222477801125618, "learning_rate": 7.357533671631473e-07, "loss": 0.0005, "step": 256160 }, { "epoch": 1.6852956849535863, "grad_norm": 0.00016708877678762765, "learning_rate": 7.354536191594353e-07, "loss": 0.001, "step": 256170 }, { "epoch": 1.6853614731288198, "grad_norm": 0.005963419181376595, "learning_rate": 7.351539273800839e-07, "loss": 0.0003, "step": 256180 }, { "epoch": 1.6854272613040533, "grad_norm": 0.07540201781610845, "learning_rate": 7.348542918290447e-07, "loss": 0.0004, "step": 256190 }, { "epoch": 1.6854930494792866, "grad_norm": 0.04624556472870358, "learning_rate": 7.345547125102681e-07, "loss": 0.0007, "step": 256200 }, { "epoch": 1.6855588376545199, "grad_norm": 0.031247788671204296, "learning_rate": 7.342551894277033e-07, "loss": 0.0002, "step": 256210 }, { "epoch": 1.6856246258297534, "grad_norm": 0.036215860185360375, "learning_rate": 7.339557225853006e-07, "loss": 0.0003, "step": 256220 }, { "epoch": 1.6856904140049869, "grad_norm": 0.01261059169045528, "learning_rate": 7.336563119870066e-07, "loss": 0.0005, "step": 256230 }, { "epoch": 1.6857562021802202, "grad_norm": 0.007910713523435565, "learning_rate": 7.333569576367694e-07, "loss": 0.0006, "step": 256240 }, { "epoch": 1.6858219903554534, "grad_norm": 0.03469949333762091, "learning_rate": 7.330576595385358e-07, "loss": 0.0008, "step": 256250 }, { "epoch": 1.685887778530687, "grad_norm": 0.1189690399639231, "learning_rate": 7.327584176962521e-07, "loss": 0.0007, "step": 256260 }, { "epoch": 1.6859535667059202, "grad_norm": 0.01697566000006459, "learning_rate": 7.324592321138635e-07, "loss": 0.0004, "step": 256270 }, { "epoch": 1.6860193548811537, "grad_norm": 0.10434994026347987, "learning_rate": 7.321601027953123e-07, "loss": 0.001, "step": 256280 }, { "epoch": 1.686085143056387, "grad_norm": 0.008436470879191228, "learning_rate": 7.318610297445461e-07, "loss": 0.0011, "step": 256290 }, { "epoch": 1.6861509312316203, "grad_norm": 0.010397738778736556, "learning_rate": 7.315620129655054e-07, "loss": 0.0007, "step": 256300 }, { "epoch": 1.6862167194068538, "grad_norm": 0.0690022954243589, "learning_rate": 7.312630524621333e-07, "loss": 0.0004, "step": 256310 }, { "epoch": 1.6862825075820873, "grad_norm": 0.007616459738405259, "learning_rate": 7.309641482383717e-07, "loss": 0.0003, "step": 256320 }, { "epoch": 1.6863482957573206, "grad_norm": 0.0023266963381595716, "learning_rate": 7.306653002981601e-07, "loss": 0.0004, "step": 256330 }, { "epoch": 1.6864140839325539, "grad_norm": 0.0176289095496236, "learning_rate": 7.303665086454392e-07, "loss": 0.0003, "step": 256340 }, { "epoch": 1.6864798721077874, "grad_norm": 0.0004226766452351056, "learning_rate": 7.300677732841488e-07, "loss": 0.0004, "step": 256350 }, { "epoch": 1.6865456602830209, "grad_norm": 0.0037199823035131726, "learning_rate": 7.297690942182272e-07, "loss": 0.0006, "step": 256360 }, { "epoch": 1.6866114484582542, "grad_norm": 0.036350269757437374, "learning_rate": 7.294704714516115e-07, "loss": 0.0005, "step": 256370 }, { "epoch": 1.6866772366334875, "grad_norm": 0.009427546802775301, "learning_rate": 7.291719049882401e-07, "loss": 0.0003, "step": 256380 }, { "epoch": 1.6867430248087207, "grad_norm": 0.00246465601111608, "learning_rate": 7.288733948320482e-07, "loss": 0.0002, "step": 256390 }, { "epoch": 1.6868088129839542, "grad_norm": 0.03709466519118722, "learning_rate": 7.285749409869713e-07, "loss": 0.0003, "step": 256400 }, { "epoch": 1.6868746011591877, "grad_norm": 0.07567060310633417, "learning_rate": 7.282765434569444e-07, "loss": 0.0009, "step": 256410 }, { "epoch": 1.686940389334421, "grad_norm": 0.0004143692713273782, "learning_rate": 7.279782022459031e-07, "loss": 0.0008, "step": 256420 }, { "epoch": 1.6870061775096543, "grad_norm": 0.01023784240543907, "learning_rate": 7.276799173577792e-07, "loss": 0.0003, "step": 256430 }, { "epoch": 1.6870719656848878, "grad_norm": 0.008117836657244503, "learning_rate": 7.273816887965057e-07, "loss": 0.0003, "step": 256440 }, { "epoch": 1.6871377538601213, "grad_norm": 0.0007396128623378275, "learning_rate": 7.270835165660151e-07, "loss": 0.0009, "step": 256450 }, { "epoch": 1.6872035420353546, "grad_norm": 0.017445536197840155, "learning_rate": 7.267854006702374e-07, "loss": 0.0003, "step": 256460 }, { "epoch": 1.687269330210588, "grad_norm": 0.058677244375593664, "learning_rate": 7.264873411131041e-07, "loss": 0.0005, "step": 256470 }, { "epoch": 1.6873351183858212, "grad_norm": 0.011810984791199007, "learning_rate": 7.261893378985441e-07, "loss": 0.0008, "step": 256480 }, { "epoch": 1.6874009065610547, "grad_norm": 0.0003134664600293282, "learning_rate": 7.258913910304865e-07, "loss": 0.0007, "step": 256490 }, { "epoch": 1.6874666947362882, "grad_norm": 0.03665677071848111, "learning_rate": 7.2559350051286e-07, "loss": 0.0004, "step": 256500 }, { "epoch": 1.6875324829115215, "grad_norm": 0.001091301320370427, "learning_rate": 7.252956663495914e-07, "loss": 0.0005, "step": 256510 }, { "epoch": 1.6875982710867548, "grad_norm": 0.0035160871094991704, "learning_rate": 7.249978885446074e-07, "loss": 0.0001, "step": 256520 }, { "epoch": 1.6876640592619883, "grad_norm": 0.018645203257790788, "learning_rate": 7.247001671018339e-07, "loss": 0.0004, "step": 256530 }, { "epoch": 1.6877298474372218, "grad_norm": 0.05369296134509117, "learning_rate": 7.244025020251955e-07, "loss": 0.0004, "step": 256540 }, { "epoch": 1.687795635612455, "grad_norm": 0.0009742667302038277, "learning_rate": 7.241048933186185e-07, "loss": 0.0006, "step": 256550 }, { "epoch": 1.6878614237876883, "grad_norm": 0.03419990946605953, "learning_rate": 7.238073409860258e-07, "loss": 0.0005, "step": 256560 }, { "epoch": 1.6879272119629218, "grad_norm": 0.01599492096289461, "learning_rate": 7.235098450313399e-07, "loss": 0.0005, "step": 256570 }, { "epoch": 1.6879930001381551, "grad_norm": 0.013047811326245834, "learning_rate": 7.232124054584833e-07, "loss": 0.0004, "step": 256580 }, { "epoch": 1.6880587883133886, "grad_norm": 0.0003249460297789807, "learning_rate": 7.229150222713771e-07, "loss": 0.0002, "step": 256590 }, { "epoch": 1.688124576488622, "grad_norm": 0.6025016657833315, "learning_rate": 7.226176954739428e-07, "loss": 0.0005, "step": 256600 }, { "epoch": 1.6881903646638552, "grad_norm": 0.012207924124664885, "learning_rate": 7.223204250701e-07, "loss": 0.0001, "step": 256610 }, { "epoch": 1.6882561528390887, "grad_norm": 0.04975631578621769, "learning_rate": 7.220232110637682e-07, "loss": 0.0007, "step": 256620 }, { "epoch": 1.6883219410143222, "grad_norm": 0.0005465965434439883, "learning_rate": 7.217260534588649e-07, "loss": 0.0004, "step": 256630 }, { "epoch": 1.6883877291895555, "grad_norm": 0.01005573119644563, "learning_rate": 7.214289522593093e-07, "loss": 0.0003, "step": 256640 }, { "epoch": 1.6884535173647888, "grad_norm": 0.044433277358375885, "learning_rate": 7.211319074690176e-07, "loss": 0.0004, "step": 256650 }, { "epoch": 1.6885193055400223, "grad_norm": 0.0005249595167297149, "learning_rate": 7.208349190919061e-07, "loss": 0.0005, "step": 256660 }, { "epoch": 1.6885850937152558, "grad_norm": 0.0620232952660707, "learning_rate": 7.205379871318896e-07, "loss": 0.0006, "step": 256670 }, { "epoch": 1.688650881890489, "grad_norm": 0.031018395000812934, "learning_rate": 7.202411115928843e-07, "loss": 0.0018, "step": 256680 }, { "epoch": 1.6887166700657223, "grad_norm": 0.1702150812130223, "learning_rate": 7.199442924788041e-07, "loss": 0.0004, "step": 256690 }, { "epoch": 1.6887824582409556, "grad_norm": 0.004333973804658393, "learning_rate": 7.196475297935623e-07, "loss": 0.0004, "step": 256700 }, { "epoch": 1.6888482464161891, "grad_norm": 0.04224507996502504, "learning_rate": 7.193508235410706e-07, "loss": 0.0007, "step": 256710 }, { "epoch": 1.6889140345914226, "grad_norm": 0.0035927795800009517, "learning_rate": 7.19054173725241e-07, "loss": 0.001, "step": 256720 }, { "epoch": 1.688979822766656, "grad_norm": 0.01592293771722423, "learning_rate": 7.187575803499852e-07, "loss": 0.0003, "step": 256730 }, { "epoch": 1.6890456109418892, "grad_norm": 0.01953649992386483, "learning_rate": 7.184610434192136e-07, "loss": 0.0003, "step": 256740 }, { "epoch": 1.6891113991171227, "grad_norm": 0.02397608983688333, "learning_rate": 7.181645629368345e-07, "loss": 0.0002, "step": 256750 }, { "epoch": 1.6891771872923562, "grad_norm": 0.003424219277125736, "learning_rate": 7.178681389067582e-07, "loss": 0.0004, "step": 256760 }, { "epoch": 1.6892429754675895, "grad_norm": 0.0449186934517184, "learning_rate": 7.175717713328922e-07, "loss": 0.0006, "step": 256770 }, { "epoch": 1.6893087636428228, "grad_norm": 0.0018925584867180972, "learning_rate": 7.172754602191434e-07, "loss": 0.0003, "step": 256780 }, { "epoch": 1.689374551818056, "grad_norm": 0.021295677147833775, "learning_rate": 7.169792055694191e-07, "loss": 0.0006, "step": 256790 }, { "epoch": 1.6894403399932896, "grad_norm": 0.014415845468791886, "learning_rate": 7.166830073876241e-07, "loss": 0.0006, "step": 256800 }, { "epoch": 1.689506128168523, "grad_norm": 0.007971640292776713, "learning_rate": 7.163868656776651e-07, "loss": 0.0005, "step": 256810 }, { "epoch": 1.6895719163437564, "grad_norm": 0.036425101034547006, "learning_rate": 7.160907804434458e-07, "loss": 0.0008, "step": 256820 }, { "epoch": 1.6896377045189896, "grad_norm": 0.003251122089804248, "learning_rate": 7.157947516888697e-07, "loss": 0.0005, "step": 256830 }, { "epoch": 1.6897034926942232, "grad_norm": 0.005467426203141685, "learning_rate": 7.154987794178398e-07, "loss": 0.0005, "step": 256840 }, { "epoch": 1.6897692808694567, "grad_norm": 0.01409020470826435, "learning_rate": 7.152028636342578e-07, "loss": 0.0004, "step": 256850 }, { "epoch": 1.68983506904469, "grad_norm": 0.020772190846433822, "learning_rate": 7.149070043420259e-07, "loss": 0.0005, "step": 256860 }, { "epoch": 1.6899008572199232, "grad_norm": 0.019945943033190604, "learning_rate": 7.146112015450441e-07, "loss": 0.0004, "step": 256870 }, { "epoch": 1.6899666453951565, "grad_norm": 0.023589935426406843, "learning_rate": 7.143154552472125e-07, "loss": 0.0003, "step": 256880 }, { "epoch": 1.69003243357039, "grad_norm": 0.04571962219470385, "learning_rate": 7.140197654524306e-07, "loss": 0.0003, "step": 256890 }, { "epoch": 1.6900982217456235, "grad_norm": 0.017155496381021183, "learning_rate": 7.13724132164596e-07, "loss": 0.0003, "step": 256900 }, { "epoch": 1.6901640099208568, "grad_norm": 0.15232982189872876, "learning_rate": 7.134285553876069e-07, "loss": 0.0003, "step": 256910 }, { "epoch": 1.69022979809609, "grad_norm": 0.03191019979440266, "learning_rate": 7.131330351253606e-07, "loss": 0.0006, "step": 256920 }, { "epoch": 1.6902955862713236, "grad_norm": 0.001194415839941101, "learning_rate": 7.128375713817515e-07, "loss": 0.0004, "step": 256930 }, { "epoch": 1.690361374446557, "grad_norm": 0.038704444198887494, "learning_rate": 7.125421641606772e-07, "loss": 0.0003, "step": 256940 }, { "epoch": 1.6904271626217904, "grad_norm": 0.010538322640676029, "learning_rate": 7.12246813466032e-07, "loss": 0.0003, "step": 256950 }, { "epoch": 1.6904929507970237, "grad_norm": 0.007091671902298362, "learning_rate": 7.11951519301709e-07, "loss": 0.0004, "step": 256960 }, { "epoch": 1.6905587389722572, "grad_norm": 0.005779932071892099, "learning_rate": 7.116562816716022e-07, "loss": 0.0006, "step": 256970 }, { "epoch": 1.6906245271474907, "grad_norm": 0.02672560068345843, "learning_rate": 7.113611005796034e-07, "loss": 0.0006, "step": 256980 }, { "epoch": 1.690690315322724, "grad_norm": 0.009491915409283626, "learning_rate": 7.110659760296052e-07, "loss": 0.0005, "step": 256990 }, { "epoch": 1.6907561034979572, "grad_norm": 0.008523022528519046, "learning_rate": 7.107709080254971e-07, "loss": 0.0013, "step": 257000 }, { "epoch": 1.6908218916731905, "grad_norm": 0.03363959616240185, "learning_rate": 7.104758965711706e-07, "loss": 0.0003, "step": 257010 }, { "epoch": 1.690887679848424, "grad_norm": 0.00799984916374817, "learning_rate": 7.101809416705147e-07, "loss": 0.0002, "step": 257020 }, { "epoch": 1.6909534680236575, "grad_norm": 0.047581407574328466, "learning_rate": 7.09886043327418e-07, "loss": 0.0006, "step": 257030 }, { "epoch": 1.6910192561988908, "grad_norm": 0.00048493727990809915, "learning_rate": 7.095912015457684e-07, "loss": 0.0004, "step": 257040 }, { "epoch": 1.691085044374124, "grad_norm": 0.022271515435087107, "learning_rate": 7.092964163294542e-07, "loss": 0.0006, "step": 257050 }, { "epoch": 1.6911508325493576, "grad_norm": 0.016992854085408397, "learning_rate": 7.090016876823591e-07, "loss": 0.0005, "step": 257060 }, { "epoch": 1.6912166207245911, "grad_norm": 0.022282591274159526, "learning_rate": 7.087070156083725e-07, "loss": 0.0003, "step": 257070 }, { "epoch": 1.6912824088998244, "grad_norm": 0.024035990460231128, "learning_rate": 7.084124001113774e-07, "loss": 0.0003, "step": 257080 }, { "epoch": 1.6913481970750577, "grad_norm": 0.0058344018423857, "learning_rate": 7.081178411952583e-07, "loss": 0.0008, "step": 257090 }, { "epoch": 1.691413985250291, "grad_norm": 0.00035078313341507, "learning_rate": 7.078233388638988e-07, "loss": 0.001, "step": 257100 }, { "epoch": 1.6914797734255245, "grad_norm": 0.07175280253484087, "learning_rate": 7.075288931211816e-07, "loss": 0.0007, "step": 257110 }, { "epoch": 1.691545561600758, "grad_norm": 0.02765736927502104, "learning_rate": 7.072345039709888e-07, "loss": 0.0003, "step": 257120 }, { "epoch": 1.6916113497759913, "grad_norm": 0.0979459380229347, "learning_rate": 7.069401714172014e-07, "loss": 0.0007, "step": 257130 }, { "epoch": 1.6916771379512245, "grad_norm": 0.004076489866397794, "learning_rate": 7.066458954637006e-07, "loss": 0.0005, "step": 257140 }, { "epoch": 1.691742926126458, "grad_norm": 0.013933306457557242, "learning_rate": 7.063516761143652e-07, "loss": 0.0007, "step": 257150 }, { "epoch": 1.6918087143016916, "grad_norm": 0.04931193644232125, "learning_rate": 7.060575133730752e-07, "loss": 0.0003, "step": 257160 }, { "epoch": 1.6918745024769248, "grad_norm": 0.029867494008095803, "learning_rate": 7.057634072437075e-07, "loss": 0.0006, "step": 257170 }, { "epoch": 1.6919402906521581, "grad_norm": 0.09635575698291252, "learning_rate": 7.054693577301413e-07, "loss": 0.0006, "step": 257180 }, { "epoch": 1.6920060788273914, "grad_norm": 0.010548858813446917, "learning_rate": 7.051753648362514e-07, "loss": 0.0004, "step": 257190 }, { "epoch": 1.692071867002625, "grad_norm": 0.05535247262913106, "learning_rate": 7.048814285659161e-07, "loss": 0.0005, "step": 257200 }, { "epoch": 1.6921376551778584, "grad_norm": 0.022465290317594245, "learning_rate": 7.045875489230097e-07, "loss": 0.0003, "step": 257210 }, { "epoch": 1.6922034433530917, "grad_norm": 0.0036426740615191125, "learning_rate": 7.042937259114064e-07, "loss": 0.0002, "step": 257220 }, { "epoch": 1.692269231528325, "grad_norm": 0.0016931241026164267, "learning_rate": 7.039999595349805e-07, "loss": 0.0003, "step": 257230 }, { "epoch": 1.6923350197035585, "grad_norm": 0.00039987341486525535, "learning_rate": 7.037062497976049e-07, "loss": 0.0002, "step": 257240 }, { "epoch": 1.692400807878792, "grad_norm": 0.0028389541937937084, "learning_rate": 7.034125967031519e-07, "loss": 0.0003, "step": 257250 }, { "epoch": 1.6924665960540253, "grad_norm": 0.046878579612087995, "learning_rate": 7.031190002554933e-07, "loss": 0.0006, "step": 257260 }, { "epoch": 1.6925323842292586, "grad_norm": 0.019312215781833616, "learning_rate": 7.028254604584995e-07, "loss": 0.0004, "step": 257270 }, { "epoch": 1.692598172404492, "grad_norm": 6.342338687625669e-05, "learning_rate": 7.025319773160405e-07, "loss": 0.0003, "step": 257280 }, { "epoch": 1.6926639605797253, "grad_norm": 0.05454356012638264, "learning_rate": 7.022385508319856e-07, "loss": 0.0004, "step": 257290 }, { "epoch": 1.6927297487549589, "grad_norm": 0.022552399308075594, "learning_rate": 7.01945181010204e-07, "loss": 0.0005, "step": 257300 }, { "epoch": 1.6927955369301921, "grad_norm": 0.02010490898698631, "learning_rate": 7.01651867854563e-07, "loss": 0.0005, "step": 257310 }, { "epoch": 1.6928613251054254, "grad_norm": 0.15509982914438697, "learning_rate": 7.013586113689291e-07, "loss": 0.0009, "step": 257320 }, { "epoch": 1.692927113280659, "grad_norm": 0.005474953705610825, "learning_rate": 7.010654115571703e-07, "loss": 0.0009, "step": 257330 }, { "epoch": 1.6929929014558924, "grad_norm": 0.04921669851276501, "learning_rate": 7.00772268423151e-07, "loss": 0.0007, "step": 257340 }, { "epoch": 1.6930586896311257, "grad_norm": 0.014010977919534852, "learning_rate": 7.004791819707363e-07, "loss": 0.0001, "step": 257350 }, { "epoch": 1.693124477806359, "grad_norm": 0.022315029264348793, "learning_rate": 7.001861522037906e-07, "loss": 0.0003, "step": 257360 }, { "epoch": 1.6931902659815925, "grad_norm": 0.14677526796677848, "learning_rate": 6.998931791261765e-07, "loss": 0.0003, "step": 257370 }, { "epoch": 1.693256054156826, "grad_norm": 0.08280750914166297, "learning_rate": 6.996002627417575e-07, "loss": 0.0004, "step": 257380 }, { "epoch": 1.6933218423320593, "grad_norm": 0.010539205173085804, "learning_rate": 6.993074030543945e-07, "loss": 0.0002, "step": 257390 }, { "epoch": 1.6933876305072926, "grad_norm": 0.016293225775047018, "learning_rate": 6.990146000679493e-07, "loss": 0.0013, "step": 257400 }, { "epoch": 1.6934534186825259, "grad_norm": 0.03874618518020154, "learning_rate": 6.987218537862822e-07, "loss": 0.0006, "step": 257410 }, { "epoch": 1.6935192068577594, "grad_norm": 0.018287349169640976, "learning_rate": 6.984291642132524e-07, "loss": 0.0014, "step": 257420 }, { "epoch": 1.6935849950329929, "grad_norm": 0.2247566352855152, "learning_rate": 6.981365313527189e-07, "loss": 0.0005, "step": 257430 }, { "epoch": 1.6936507832082262, "grad_norm": 0.006225587153348377, "learning_rate": 6.978439552085397e-07, "loss": 0.0004, "step": 257440 }, { "epoch": 1.6937165713834594, "grad_norm": 0.01956002354800673, "learning_rate": 6.97551435784572e-07, "loss": 0.0002, "step": 257450 }, { "epoch": 1.693782359558693, "grad_norm": 0.09414937127744158, "learning_rate": 6.972589730846735e-07, "loss": 0.0007, "step": 257460 }, { "epoch": 1.6938481477339264, "grad_norm": 0.01988371191197501, "learning_rate": 6.96966567112699e-07, "loss": 0.0006, "step": 257470 }, { "epoch": 1.6939139359091597, "grad_norm": 9.85311093409631e-05, "learning_rate": 6.966742178725044e-07, "loss": 0.0001, "step": 257480 }, { "epoch": 1.693979724084393, "grad_norm": 0.03214899950945103, "learning_rate": 6.963819253679438e-07, "loss": 0.0002, "step": 257490 }, { "epoch": 1.6940455122596263, "grad_norm": 0.0006411211903964621, "learning_rate": 6.960896896028702e-07, "loss": 0.0005, "step": 257500 }, { "epoch": 1.6941113004348598, "grad_norm": 0.012971830933988026, "learning_rate": 6.957975105811371e-07, "loss": 0.0008, "step": 257510 }, { "epoch": 1.6941770886100933, "grad_norm": 0.004715730053311682, "learning_rate": 6.955053883065965e-07, "loss": 0.0003, "step": 257520 }, { "epoch": 1.6942428767853266, "grad_norm": 0.032322349130056965, "learning_rate": 6.952133227830998e-07, "loss": 0.0005, "step": 257530 }, { "epoch": 1.6943086649605599, "grad_norm": 0.022970561432301646, "learning_rate": 6.949213140144972e-07, "loss": 0.0002, "step": 257540 }, { "epoch": 1.6943744531357934, "grad_norm": 0.03612729156628779, "learning_rate": 6.946293620046396e-07, "loss": 0.0004, "step": 257550 }, { "epoch": 1.6944402413110269, "grad_norm": 0.028031523056138843, "learning_rate": 6.943374667573755e-07, "loss": 0.0005, "step": 257560 }, { "epoch": 1.6945060294862602, "grad_norm": 0.008263011528963065, "learning_rate": 6.940456282765528e-07, "loss": 0.002, "step": 257570 }, { "epoch": 1.6945718176614935, "grad_norm": 0.0031017482459703306, "learning_rate": 6.937538465660188e-07, "loss": 0.0004, "step": 257580 }, { "epoch": 1.694637605836727, "grad_norm": 0.008057714386515345, "learning_rate": 6.934621216296223e-07, "loss": 0.0003, "step": 257590 }, { "epoch": 1.6947033940119602, "grad_norm": 0.0884696755968729, "learning_rate": 6.931704534712081e-07, "loss": 0.0003, "step": 257600 }, { "epoch": 1.6947691821871937, "grad_norm": 0.1258498432699469, "learning_rate": 6.928788420946219e-07, "loss": 0.0002, "step": 257610 }, { "epoch": 1.694834970362427, "grad_norm": 0.006046279111644707, "learning_rate": 6.925872875037088e-07, "loss": 0.0003, "step": 257620 }, { "epoch": 1.6949007585376603, "grad_norm": 0.043332957894403744, "learning_rate": 6.922957897023113e-07, "loss": 0.0002, "step": 257630 }, { "epoch": 1.6949665467128938, "grad_norm": 0.053851546629428745, "learning_rate": 6.92004348694274e-07, "loss": 0.0003, "step": 257640 }, { "epoch": 1.6950323348881273, "grad_norm": 0.03363184664013997, "learning_rate": 6.917129644834387e-07, "loss": 0.0011, "step": 257650 }, { "epoch": 1.6950981230633606, "grad_norm": 0.006835996657991448, "learning_rate": 6.914216370736471e-07, "loss": 0.0005, "step": 257660 }, { "epoch": 1.695163911238594, "grad_norm": 0.05580397822548991, "learning_rate": 6.911303664687402e-07, "loss": 0.001, "step": 257670 }, { "epoch": 1.6952296994138274, "grad_norm": 0.016124596246128137, "learning_rate": 6.908391526725578e-07, "loss": 0.0002, "step": 257680 }, { "epoch": 1.695295487589061, "grad_norm": 0.06534637103556967, "learning_rate": 6.905479956889399e-07, "loss": 0.0009, "step": 257690 }, { "epoch": 1.6953612757642942, "grad_norm": 0.11582404824161642, "learning_rate": 6.902568955217242e-07, "loss": 0.0006, "step": 257700 }, { "epoch": 1.6954270639395275, "grad_norm": 0.006983713696344685, "learning_rate": 6.899658521747488e-07, "loss": 0.0008, "step": 257710 }, { "epoch": 1.6954928521147608, "grad_norm": 0.04648229592818019, "learning_rate": 6.896748656518515e-07, "loss": 0.0002, "step": 257720 }, { "epoch": 1.6955586402899943, "grad_norm": 0.0011626401682447338, "learning_rate": 6.893839359568689e-07, "loss": 0.0004, "step": 257730 }, { "epoch": 1.6956244284652278, "grad_norm": 0.0981240647487557, "learning_rate": 6.890930630936366e-07, "loss": 0.0011, "step": 257740 }, { "epoch": 1.695690216640461, "grad_norm": 0.030762367563505717, "learning_rate": 6.888022470659883e-07, "loss": 0.0004, "step": 257750 }, { "epoch": 1.6957560048156943, "grad_norm": 0.04086021940370965, "learning_rate": 6.885114878777599e-07, "loss": 0.0002, "step": 257760 }, { "epoch": 1.6958217929909278, "grad_norm": 0.0050992566184065655, "learning_rate": 6.882207855327832e-07, "loss": 0.0006, "step": 257770 }, { "epoch": 1.6958875811661613, "grad_norm": 0.056541189945523894, "learning_rate": 6.87930140034892e-07, "loss": 0.0003, "step": 257780 }, { "epoch": 1.6959533693413946, "grad_norm": 0.046820643846382994, "learning_rate": 6.876395513879175e-07, "loss": 0.0005, "step": 257790 }, { "epoch": 1.696019157516628, "grad_norm": 0.009087211001974202, "learning_rate": 6.873490195956911e-07, "loss": 0.0002, "step": 257800 }, { "epoch": 1.6960849456918612, "grad_norm": 0.001767030652319461, "learning_rate": 6.870585446620431e-07, "loss": 0.0001, "step": 257810 }, { "epoch": 1.6961507338670947, "grad_norm": 0.0019182366685431594, "learning_rate": 6.867681265908033e-07, "loss": 0.0004, "step": 257820 }, { "epoch": 1.6962165220423282, "grad_norm": 0.028664291154106653, "learning_rate": 6.864777653858007e-07, "loss": 0.0004, "step": 257830 }, { "epoch": 1.6962823102175615, "grad_norm": 0.0067765577738521585, "learning_rate": 6.861874610508623e-07, "loss": 0.0002, "step": 257840 }, { "epoch": 1.6963480983927948, "grad_norm": 0.045147729132763145, "learning_rate": 6.858972135898179e-07, "loss": 0.0005, "step": 257850 }, { "epoch": 1.6964138865680283, "grad_norm": 0.007297747724190088, "learning_rate": 6.856070230064926e-07, "loss": 0.0004, "step": 257860 }, { "epoch": 1.6964796747432618, "grad_norm": 0.017791083586946678, "learning_rate": 6.853168893047124e-07, "loss": 0.0005, "step": 257870 }, { "epoch": 1.696545462918495, "grad_norm": 0.004377247299621873, "learning_rate": 6.85026812488303e-07, "loss": 0.0004, "step": 257880 }, { "epoch": 1.6966112510937283, "grad_norm": 0.023293893021469062, "learning_rate": 6.847367925610881e-07, "loss": 0.0001, "step": 257890 }, { "epoch": 1.6966770392689619, "grad_norm": 0.012462753551407933, "learning_rate": 6.844468295268919e-07, "loss": 0.0006, "step": 257900 }, { "epoch": 1.6967428274441951, "grad_norm": 0.0321247158589327, "learning_rate": 6.841569233895368e-07, "loss": 0.0007, "step": 257910 }, { "epoch": 1.6968086156194286, "grad_norm": 0.0002487975840312369, "learning_rate": 6.838670741528458e-07, "loss": 0.0002, "step": 257920 }, { "epoch": 1.696874403794662, "grad_norm": 0.07667798330298284, "learning_rate": 6.835772818206394e-07, "loss": 0.0004, "step": 257930 }, { "epoch": 1.6969401919698952, "grad_norm": 0.013352010076909056, "learning_rate": 6.832875463967387e-07, "loss": 0.0005, "step": 257940 }, { "epoch": 1.6970059801451287, "grad_norm": 0.026902365181395757, "learning_rate": 6.82997867884963e-07, "loss": 0.0004, "step": 257950 }, { "epoch": 1.6970717683203622, "grad_norm": 0.013970951197355792, "learning_rate": 6.827082462891327e-07, "loss": 0.0003, "step": 257960 }, { "epoch": 1.6971375564955955, "grad_norm": 0.04486994470364564, "learning_rate": 6.82418681613064e-07, "loss": 0.0007, "step": 257970 }, { "epoch": 1.6972033446708288, "grad_norm": 0.06178789537728744, "learning_rate": 6.821291738605779e-07, "loss": 0.0006, "step": 257980 }, { "epoch": 1.6972691328460623, "grad_norm": 0.025150564037642292, "learning_rate": 6.81839723035489e-07, "loss": 0.0003, "step": 257990 }, { "epoch": 1.6973349210212958, "grad_norm": 0.009150365472019872, "learning_rate": 6.815503291416148e-07, "loss": 0.0007, "step": 258000 }, { "epoch": 1.697400709196529, "grad_norm": 0.01730248428002158, "learning_rate": 6.81260992182769e-07, "loss": 0.0004, "step": 258010 }, { "epoch": 1.6974664973717624, "grad_norm": 0.006093668889150085, "learning_rate": 6.809717121627668e-07, "loss": 0.0002, "step": 258020 }, { "epoch": 1.6975322855469956, "grad_norm": 0.016516936421019476, "learning_rate": 6.80682489085423e-07, "loss": 0.0004, "step": 258030 }, { "epoch": 1.6975980737222292, "grad_norm": 0.03189763800990732, "learning_rate": 6.803933229545501e-07, "loss": 0.0005, "step": 258040 }, { "epoch": 1.6976638618974627, "grad_norm": 0.05064802468777186, "learning_rate": 6.801042137739599e-07, "loss": 0.0019, "step": 258050 }, { "epoch": 1.697729650072696, "grad_norm": 0.015606794194969738, "learning_rate": 6.798151615474652e-07, "loss": 0.0003, "step": 258060 }, { "epoch": 1.6977954382479292, "grad_norm": 0.022991519259269233, "learning_rate": 6.795261662788755e-07, "loss": 0.0004, "step": 258070 }, { "epoch": 1.6978612264231627, "grad_norm": 0.03685601077084507, "learning_rate": 6.792372279720027e-07, "loss": 0.0003, "step": 258080 }, { "epoch": 1.6979270145983962, "grad_norm": 0.012348079305738459, "learning_rate": 6.789483466306556e-07, "loss": 0.0004, "step": 258090 }, { "epoch": 1.6979928027736295, "grad_norm": 0.00874796307434164, "learning_rate": 6.786595222586429e-07, "loss": 0.0005, "step": 258100 }, { "epoch": 1.6980585909488628, "grad_norm": 0.007810352925365089, "learning_rate": 6.783707548597717e-07, "loss": 0.0002, "step": 258110 }, { "epoch": 1.698124379124096, "grad_norm": 0.04175108564555693, "learning_rate": 6.7808204443785e-07, "loss": 0.0004, "step": 258120 }, { "epoch": 1.6981901672993296, "grad_norm": 0.003153643303047296, "learning_rate": 6.777933909966838e-07, "loss": 0.0003, "step": 258130 }, { "epoch": 1.698255955474563, "grad_norm": 0.09973955527943712, "learning_rate": 6.775047945400787e-07, "loss": 0.001, "step": 258140 }, { "epoch": 1.6983217436497964, "grad_norm": 0.01814256711306604, "learning_rate": 6.772162550718398e-07, "loss": 0.0005, "step": 258150 }, { "epoch": 1.6983875318250297, "grad_norm": 0.007803643712802619, "learning_rate": 6.769277725957713e-07, "loss": 0.0003, "step": 258160 }, { "epoch": 1.6984533200002632, "grad_norm": 0.00598850701913561, "learning_rate": 6.766393471156762e-07, "loss": 0.0005, "step": 258170 }, { "epoch": 1.6985191081754967, "grad_norm": 0.07316160049451256, "learning_rate": 6.763509786353578e-07, "loss": 0.001, "step": 258180 }, { "epoch": 1.69858489635073, "grad_norm": 0.003335210756769376, "learning_rate": 6.760626671586174e-07, "loss": 0.0002, "step": 258190 }, { "epoch": 1.6986506845259632, "grad_norm": 0.014491735663483808, "learning_rate": 6.757744126892552e-07, "loss": 0.0003, "step": 258200 }, { "epoch": 1.6987164727011965, "grad_norm": 0.01225107974544221, "learning_rate": 6.754862152310738e-07, "loss": 0.0002, "step": 258210 }, { "epoch": 1.69878226087643, "grad_norm": 0.008662025253205245, "learning_rate": 6.751980747878717e-07, "loss": 0.0004, "step": 258220 }, { "epoch": 1.6988480490516635, "grad_norm": 0.002970896210072875, "learning_rate": 6.749099913634483e-07, "loss": 0.0004, "step": 258230 }, { "epoch": 1.6989138372268968, "grad_norm": 0.0009508339745098113, "learning_rate": 6.74621964961601e-07, "loss": 0.0002, "step": 258240 }, { "epoch": 1.69897962540213, "grad_norm": 0.029893079164569035, "learning_rate": 6.743339955861272e-07, "loss": 0.0004, "step": 258250 }, { "epoch": 1.6990454135773636, "grad_norm": 0.0499935702779575, "learning_rate": 6.740460832408241e-07, "loss": 0.0007, "step": 258260 }, { "epoch": 1.6991112017525971, "grad_norm": 0.0069001003277009915, "learning_rate": 6.737582279294868e-07, "loss": 0.0006, "step": 258270 }, { "epoch": 1.6991769899278304, "grad_norm": 0.027375381803790313, "learning_rate": 6.734704296559114e-07, "loss": 0.0006, "step": 258280 }, { "epoch": 1.6992427781030637, "grad_norm": 0.020607127572614584, "learning_rate": 6.731826884238918e-07, "loss": 0.0008, "step": 258290 }, { "epoch": 1.6993085662782972, "grad_norm": 0.021645038357364154, "learning_rate": 6.728950042372212e-07, "loss": 0.0005, "step": 258300 }, { "epoch": 1.6993743544535307, "grad_norm": 0.03554123134852783, "learning_rate": 6.726073770996927e-07, "loss": 0.0005, "step": 258310 }, { "epoch": 1.699440142628764, "grad_norm": 0.026338701139700153, "learning_rate": 6.723198070150989e-07, "loss": 0.0003, "step": 258320 }, { "epoch": 1.6995059308039973, "grad_norm": 0.015366357359782862, "learning_rate": 6.720322939872309e-07, "loss": 0.0004, "step": 258330 }, { "epoch": 1.6995717189792305, "grad_norm": 0.043539310149307335, "learning_rate": 6.717448380198776e-07, "loss": 0.0002, "step": 258340 }, { "epoch": 1.699637507154464, "grad_norm": 0.033446680633798055, "learning_rate": 6.714574391168321e-07, "loss": 0.0007, "step": 258350 }, { "epoch": 1.6997032953296975, "grad_norm": 0.020780306703276217, "learning_rate": 6.711700972818818e-07, "loss": 0.0008, "step": 258360 }, { "epoch": 1.6997690835049308, "grad_norm": 0.06761453553099556, "learning_rate": 6.708828125188149e-07, "loss": 0.0004, "step": 258370 }, { "epoch": 1.6998348716801641, "grad_norm": 0.021994043437319897, "learning_rate": 6.7059558483142e-07, "loss": 0.0007, "step": 258380 }, { "epoch": 1.6999006598553976, "grad_norm": 0.026982505600529987, "learning_rate": 6.703084142234822e-07, "loss": 0.0003, "step": 258390 }, { "epoch": 1.6999664480306311, "grad_norm": 0.07113289990740664, "learning_rate": 6.700213006987894e-07, "loss": 0.0003, "step": 258400 }, { "epoch": 1.7000322362058644, "grad_norm": 0.02027147417912372, "learning_rate": 6.697342442611255e-07, "loss": 0.0004, "step": 258410 }, { "epoch": 1.7000980243810977, "grad_norm": 0.0052033300472353276, "learning_rate": 6.694472449142758e-07, "loss": 0.0019, "step": 258420 }, { "epoch": 1.700163812556331, "grad_norm": 0.016444655495899685, "learning_rate": 6.691603026620241e-07, "loss": 0.0009, "step": 258430 }, { "epoch": 1.7002296007315645, "grad_norm": 0.04450439466467163, "learning_rate": 6.68873417508154e-07, "loss": 0.0012, "step": 258440 }, { "epoch": 1.700295388906798, "grad_norm": 0.25987151772150374, "learning_rate": 6.68586589456447e-07, "loss": 0.0006, "step": 258450 }, { "epoch": 1.7003611770820313, "grad_norm": 0.06483414025286634, "learning_rate": 6.682998185106848e-07, "loss": 0.0007, "step": 258460 }, { "epoch": 1.7004269652572646, "grad_norm": 0.04689886614631609, "learning_rate": 6.680131046746474e-07, "loss": 0.0006, "step": 258470 }, { "epoch": 1.700492753432498, "grad_norm": 0.005515780330724163, "learning_rate": 6.677264479521167e-07, "loss": 0.0005, "step": 258480 }, { "epoch": 1.7005585416077316, "grad_norm": 0.019120829839880282, "learning_rate": 6.674398483468714e-07, "loss": 0.0007, "step": 258490 }, { "epoch": 1.7006243297829648, "grad_norm": 0.02387076783243915, "learning_rate": 6.6715330586269e-07, "loss": 0.0005, "step": 258500 }, { "epoch": 1.7006901179581981, "grad_norm": 0.019957446235375695, "learning_rate": 6.668668205033496e-07, "loss": 0.0006, "step": 258510 }, { "epoch": 1.7007559061334314, "grad_norm": 0.017266622605006592, "learning_rate": 6.665803922726283e-07, "loss": 0.0002, "step": 258520 }, { "epoch": 1.700821694308665, "grad_norm": 0.005370356488256706, "learning_rate": 6.662940211743019e-07, "loss": 0.0004, "step": 258530 }, { "epoch": 1.7008874824838984, "grad_norm": 0.11434840591580252, "learning_rate": 6.660077072121462e-07, "loss": 0.0011, "step": 258540 }, { "epoch": 1.7009532706591317, "grad_norm": 0.06066926263741269, "learning_rate": 6.657214503899357e-07, "loss": 0.0005, "step": 258550 }, { "epoch": 1.701019058834365, "grad_norm": 0.02729924366858171, "learning_rate": 6.654352507114442e-07, "loss": 0.0006, "step": 258560 }, { "epoch": 1.7010848470095985, "grad_norm": 0.0264687763260809, "learning_rate": 6.651491081804457e-07, "loss": 0.0003, "step": 258570 }, { "epoch": 1.701150635184832, "grad_norm": 0.00593888688189415, "learning_rate": 6.648630228007125e-07, "loss": 0.0004, "step": 258580 }, { "epoch": 1.7012164233600653, "grad_norm": 0.008891372703956998, "learning_rate": 6.645769945760156e-07, "loss": 0.0004, "step": 258590 }, { "epoch": 1.7012822115352986, "grad_norm": 0.02637628806375707, "learning_rate": 6.642910235101263e-07, "loss": 0.0007, "step": 258600 }, { "epoch": 1.701347999710532, "grad_norm": 0.03899541125558643, "learning_rate": 6.640051096068162e-07, "loss": 0.0003, "step": 258610 }, { "epoch": 1.7014137878857654, "grad_norm": 0.009854763437285717, "learning_rate": 6.637192528698539e-07, "loss": 0.0002, "step": 258620 }, { "epoch": 1.7014795760609989, "grad_norm": 0.012076210736246915, "learning_rate": 6.634334533030079e-07, "loss": 0.0005, "step": 258630 }, { "epoch": 1.7015453642362321, "grad_norm": 0.0012070074537172861, "learning_rate": 6.63147710910047e-07, "loss": 0.0005, "step": 258640 }, { "epoch": 1.7016111524114654, "grad_norm": 0.006213077700438917, "learning_rate": 6.628620256947377e-07, "loss": 0.0004, "step": 258650 }, { "epoch": 1.701676940586699, "grad_norm": 0.007689607678628372, "learning_rate": 6.625763976608468e-07, "loss": 0.0002, "step": 258660 }, { "epoch": 1.7017427287619324, "grad_norm": 0.0009158082615838288, "learning_rate": 6.622908268121397e-07, "loss": 0.0005, "step": 258670 }, { "epoch": 1.7018085169371657, "grad_norm": 0.07267327553588179, "learning_rate": 6.620053131523824e-07, "loss": 0.0005, "step": 258680 }, { "epoch": 1.701874305112399, "grad_norm": 0.1141962712382241, "learning_rate": 6.617198566853384e-07, "loss": 0.0008, "step": 258690 }, { "epoch": 1.7019400932876325, "grad_norm": 0.004938518512042593, "learning_rate": 6.614344574147713e-07, "loss": 0.0007, "step": 258700 }, { "epoch": 1.702005881462866, "grad_norm": 0.00012287126691435312, "learning_rate": 6.611491153444433e-07, "loss": 0.0002, "step": 258710 }, { "epoch": 1.7020716696380993, "grad_norm": 0.07673758108003327, "learning_rate": 6.608638304781174e-07, "loss": 0.0005, "step": 258720 }, { "epoch": 1.7021374578133326, "grad_norm": 0.0005433710940707158, "learning_rate": 6.605786028195533e-07, "loss": 0.0005, "step": 258730 }, { "epoch": 1.7022032459885659, "grad_norm": 0.031355867449801025, "learning_rate": 6.602934323725135e-07, "loss": 0.0004, "step": 258740 }, { "epoch": 1.7022690341637994, "grad_norm": 0.018405981707158018, "learning_rate": 6.600083191407569e-07, "loss": 0.0002, "step": 258750 }, { "epoch": 1.7023348223390329, "grad_norm": 0.017738995482014918, "learning_rate": 6.597232631280426e-07, "loss": 0.0004, "step": 258760 }, { "epoch": 1.7024006105142662, "grad_norm": 0.033580476458395986, "learning_rate": 6.594382643381281e-07, "loss": 0.0004, "step": 258770 }, { "epoch": 1.7024663986894994, "grad_norm": 0.029283178337473383, "learning_rate": 6.591533227747715e-07, "loss": 0.0002, "step": 258780 }, { "epoch": 1.702532186864733, "grad_norm": 0.04642729248027848, "learning_rate": 6.588684384417293e-07, "loss": 0.0006, "step": 258790 }, { "epoch": 1.7025979750399665, "grad_norm": 0.07238568621532884, "learning_rate": 6.585836113427574e-07, "loss": 0.0009, "step": 258800 }, { "epoch": 1.7026637632151997, "grad_norm": 0.008206453816178521, "learning_rate": 6.582988414816116e-07, "loss": 0.0006, "step": 258810 }, { "epoch": 1.702729551390433, "grad_norm": 0.0505091930122616, "learning_rate": 6.580141288620451e-07, "loss": 0.0007, "step": 258820 }, { "epoch": 1.7027953395656663, "grad_norm": 0.026957828152948032, "learning_rate": 6.577294734878126e-07, "loss": 0.0005, "step": 258830 }, { "epoch": 1.7028611277408998, "grad_norm": 0.009652580429302038, "learning_rate": 6.574448753626672e-07, "loss": 0.0004, "step": 258840 }, { "epoch": 1.7029269159161333, "grad_norm": 0.004840060636092655, "learning_rate": 6.571603344903604e-07, "loss": 0.0004, "step": 258850 }, { "epoch": 1.7029927040913666, "grad_norm": 0.02371469437595903, "learning_rate": 6.568758508746426e-07, "loss": 0.0007, "step": 258860 }, { "epoch": 1.7030584922665999, "grad_norm": 0.023269963224720906, "learning_rate": 6.565914245192667e-07, "loss": 0.0007, "step": 258870 }, { "epoch": 1.7031242804418334, "grad_norm": 0.0002793679559174392, "learning_rate": 6.563070554279821e-07, "loss": 0.0002, "step": 258880 }, { "epoch": 1.703190068617067, "grad_norm": 0.002551026184500349, "learning_rate": 6.560227436045369e-07, "loss": 0.0006, "step": 258890 }, { "epoch": 1.7032558567923002, "grad_norm": 0.05376047004373052, "learning_rate": 6.557384890526808e-07, "loss": 0.0004, "step": 258900 }, { "epoch": 1.7033216449675335, "grad_norm": 0.0005543297519580893, "learning_rate": 6.554542917761602e-07, "loss": 0.0004, "step": 258910 }, { "epoch": 1.703387433142767, "grad_norm": 0.008585317432060353, "learning_rate": 6.551701517787229e-07, "loss": 0.0006, "step": 258920 }, { "epoch": 1.7034532213180003, "grad_norm": 0.0013715371361840022, "learning_rate": 6.548860690641145e-07, "loss": 0.0003, "step": 258930 }, { "epoch": 1.7035190094932338, "grad_norm": 0.010071645945685041, "learning_rate": 6.546020436360811e-07, "loss": 0.0002, "step": 258940 }, { "epoch": 1.703584797668467, "grad_norm": 0.054369215389244406, "learning_rate": 6.543180754983664e-07, "loss": 0.0005, "step": 258950 }, { "epoch": 1.7036505858437003, "grad_norm": 0.01879126813844079, "learning_rate": 6.540341646547149e-07, "loss": 0.0009, "step": 258960 }, { "epoch": 1.7037163740189338, "grad_norm": 0.00018274306036956253, "learning_rate": 6.537503111088689e-07, "loss": 0.0003, "step": 258970 }, { "epoch": 1.7037821621941673, "grad_norm": 0.034905072345352194, "learning_rate": 6.534665148645719e-07, "loss": 0.0003, "step": 258980 }, { "epoch": 1.7038479503694006, "grad_norm": 0.013852021409287785, "learning_rate": 6.531827759255637e-07, "loss": 0.0003, "step": 258990 }, { "epoch": 1.703913738544634, "grad_norm": 0.025296460012804146, "learning_rate": 6.528990942955882e-07, "loss": 0.0009, "step": 259000 }, { "epoch": 1.7039795267198674, "grad_norm": 0.053061028019561904, "learning_rate": 6.526154699783832e-07, "loss": 0.0007, "step": 259010 }, { "epoch": 1.704045314895101, "grad_norm": 0.014435041133835914, "learning_rate": 6.523319029776887e-07, "loss": 0.0004, "step": 259020 }, { "epoch": 1.7041111030703342, "grad_norm": 0.019087449548851564, "learning_rate": 6.520483932972427e-07, "loss": 0.0003, "step": 259030 }, { "epoch": 1.7041768912455675, "grad_norm": 0.0021535232624717887, "learning_rate": 6.517649409407839e-07, "loss": 0.0002, "step": 259040 }, { "epoch": 1.7042426794208008, "grad_norm": 0.039393016642353826, "learning_rate": 6.514815459120489e-07, "loss": 0.0004, "step": 259050 }, { "epoch": 1.7043084675960343, "grad_norm": 0.03796594713001755, "learning_rate": 6.511982082147744e-07, "loss": 0.0004, "step": 259060 }, { "epoch": 1.7043742557712678, "grad_norm": 0.031075723167793425, "learning_rate": 6.509149278526955e-07, "loss": 0.0006, "step": 259070 }, { "epoch": 1.704440043946501, "grad_norm": 0.009211525606273427, "learning_rate": 6.506317048295469e-07, "loss": 0.0001, "step": 259080 }, { "epoch": 1.7045058321217343, "grad_norm": 0.011943126541168712, "learning_rate": 6.503485391490627e-07, "loss": 0.0005, "step": 259090 }, { "epoch": 1.7045716202969678, "grad_norm": 0.0402566551652941, "learning_rate": 6.500654308149768e-07, "loss": 0.0003, "step": 259100 }, { "epoch": 1.7046374084722014, "grad_norm": 0.1029501645658135, "learning_rate": 6.497823798310215e-07, "loss": 0.0008, "step": 259110 }, { "epoch": 1.7047031966474346, "grad_norm": 0.010949780146462584, "learning_rate": 6.494993862009269e-07, "loss": 0.0002, "step": 259120 }, { "epoch": 1.704768984822668, "grad_norm": 0.004395106394359747, "learning_rate": 6.49216449928427e-07, "loss": 0.0002, "step": 259130 }, { "epoch": 1.7048347729979012, "grad_norm": 0.016938241650977723, "learning_rate": 6.489335710172506e-07, "loss": 0.0004, "step": 259140 }, { "epoch": 1.7049005611731347, "grad_norm": 0.010275781953358746, "learning_rate": 6.486507494711274e-07, "loss": 0.0008, "step": 259150 }, { "epoch": 1.7049663493483682, "grad_norm": 0.005315477783813467, "learning_rate": 6.483679852937858e-07, "loss": 0.0003, "step": 259160 }, { "epoch": 1.7050321375236015, "grad_norm": 0.04098571970479458, "learning_rate": 6.480852784889535e-07, "loss": 0.0004, "step": 259170 }, { "epoch": 1.7050979256988348, "grad_norm": 0.052533120611093234, "learning_rate": 6.478026290603589e-07, "loss": 0.0004, "step": 259180 }, { "epoch": 1.7051637138740683, "grad_norm": 0.030408693796251822, "learning_rate": 6.475200370117279e-07, "loss": 0.0008, "step": 259190 }, { "epoch": 1.7052295020493018, "grad_norm": 0.012760707415894053, "learning_rate": 6.472375023467859e-07, "loss": 0.0003, "step": 259200 }, { "epoch": 1.705295290224535, "grad_norm": 0.011864803753978569, "learning_rate": 6.46955025069258e-07, "loss": 0.0003, "step": 259210 }, { "epoch": 1.7053610783997684, "grad_norm": 0.006960020966936226, "learning_rate": 6.466726051828686e-07, "loss": 0.0004, "step": 259220 }, { "epoch": 1.7054268665750016, "grad_norm": 0.099328403704273, "learning_rate": 6.46390242691341e-07, "loss": 0.0011, "step": 259230 }, { "epoch": 1.7054926547502351, "grad_norm": 0.025366392259378364, "learning_rate": 6.461079375983986e-07, "loss": 0.0009, "step": 259240 }, { "epoch": 1.7055584429254687, "grad_norm": 0.03451204645672522, "learning_rate": 6.458256899077614e-07, "loss": 0.0005, "step": 259250 }, { "epoch": 1.705624231100702, "grad_norm": 0.03004686819272036, "learning_rate": 6.45543499623153e-07, "loss": 0.0003, "step": 259260 }, { "epoch": 1.7056900192759352, "grad_norm": 0.005651077554028996, "learning_rate": 6.452613667482926e-07, "loss": 0.0003, "step": 259270 }, { "epoch": 1.7057558074511687, "grad_norm": 0.07191647396992541, "learning_rate": 6.449792912869001e-07, "loss": 0.0003, "step": 259280 }, { "epoch": 1.7058215956264022, "grad_norm": 0.023310479970924825, "learning_rate": 6.446972732426948e-07, "loss": 0.0004, "step": 259290 }, { "epoch": 1.7058873838016355, "grad_norm": 0.0028569044782413575, "learning_rate": 6.444153126193942e-07, "loss": 0.0005, "step": 259300 }, { "epoch": 1.7059531719768688, "grad_norm": 0.0006563858814741556, "learning_rate": 6.441334094207158e-07, "loss": 0.0004, "step": 259310 }, { "epoch": 1.7060189601521023, "grad_norm": 0.00816401970565351, "learning_rate": 6.438515636503767e-07, "loss": 0.0008, "step": 259320 }, { "epoch": 1.7060847483273358, "grad_norm": 0.0607288495376964, "learning_rate": 6.435697753120923e-07, "loss": 0.0012, "step": 259330 }, { "epoch": 1.706150536502569, "grad_norm": 0.000774767939417972, "learning_rate": 6.432880444095785e-07, "loss": 0.0002, "step": 259340 }, { "epoch": 1.7062163246778024, "grad_norm": 0.009388470261340405, "learning_rate": 6.430063709465484e-07, "loss": 0.0005, "step": 259350 }, { "epoch": 1.7062821128530357, "grad_norm": 0.02741066357753834, "learning_rate": 6.427247549267168e-07, "loss": 0.0002, "step": 259360 }, { "epoch": 1.7063479010282692, "grad_norm": 0.07837686009966323, "learning_rate": 6.424431963537958e-07, "loss": 0.0006, "step": 259370 }, { "epoch": 1.7064136892035027, "grad_norm": 0.0354157321135731, "learning_rate": 6.421616952314974e-07, "loss": 0.0008, "step": 259380 }, { "epoch": 1.706479477378736, "grad_norm": 0.006985557527079675, "learning_rate": 6.418802515635337e-07, "loss": 0.0006, "step": 259390 }, { "epoch": 1.7065452655539692, "grad_norm": 0.032594114814334045, "learning_rate": 6.415988653536153e-07, "loss": 0.0008, "step": 259400 }, { "epoch": 1.7066110537292027, "grad_norm": 0.001134636668520123, "learning_rate": 6.413175366054519e-07, "loss": 0.0002, "step": 259410 }, { "epoch": 1.7066768419044362, "grad_norm": 0.008506622576819257, "learning_rate": 6.41036265322752e-07, "loss": 0.0002, "step": 259420 }, { "epoch": 1.7067426300796695, "grad_norm": 0.03012594701329539, "learning_rate": 6.407550515092242e-07, "loss": 0.0004, "step": 259430 }, { "epoch": 1.7068084182549028, "grad_norm": 0.034666131728901775, "learning_rate": 6.404738951685768e-07, "loss": 0.0001, "step": 259440 }, { "epoch": 1.706874206430136, "grad_norm": 0.001859175278850925, "learning_rate": 6.401927963045152e-07, "loss": 0.0002, "step": 259450 }, { "epoch": 1.7069399946053696, "grad_norm": 0.02684237260672992, "learning_rate": 6.399117549207467e-07, "loss": 0.0004, "step": 259460 }, { "epoch": 1.707005782780603, "grad_norm": 0.04133412566301682, "learning_rate": 6.396307710209759e-07, "loss": 0.0002, "step": 259470 }, { "epoch": 1.7070715709558364, "grad_norm": 0.009620599354003861, "learning_rate": 6.393498446089075e-07, "loss": 0.0005, "step": 259480 }, { "epoch": 1.7071373591310697, "grad_norm": 0.00020791493679178416, "learning_rate": 6.390689756882457e-07, "loss": 0.0005, "step": 259490 }, { "epoch": 1.7072031473063032, "grad_norm": 0.12023114560965104, "learning_rate": 6.387881642626925e-07, "loss": 0.0008, "step": 259500 }, { "epoch": 1.7072689354815367, "grad_norm": 0.013640657536091144, "learning_rate": 6.385074103359501e-07, "loss": 0.0006, "step": 259510 }, { "epoch": 1.70733472365677, "grad_norm": 0.017731127439615333, "learning_rate": 6.382267139117215e-07, "loss": 0.0002, "step": 259520 }, { "epoch": 1.7074005118320033, "grad_norm": 0.03306161279411878, "learning_rate": 6.379460749937067e-07, "loss": 0.0005, "step": 259530 }, { "epoch": 1.7074663000072365, "grad_norm": 0.001032345618536012, "learning_rate": 6.376654935856058e-07, "loss": 0.0003, "step": 259540 }, { "epoch": 1.70753208818247, "grad_norm": 0.014534124550833026, "learning_rate": 6.373849696911172e-07, "loss": 0.0005, "step": 259550 }, { "epoch": 1.7075978763577035, "grad_norm": 0.00021000089456261312, "learning_rate": 6.371045033139406e-07, "loss": 0.0005, "step": 259560 }, { "epoch": 1.7076636645329368, "grad_norm": 0.025665701995497434, "learning_rate": 6.368240944577731e-07, "loss": 0.0003, "step": 259570 }, { "epoch": 1.7077294527081701, "grad_norm": 0.007729152155748098, "learning_rate": 6.365437431263111e-07, "loss": 0.0004, "step": 259580 }, { "epoch": 1.7077952408834036, "grad_norm": 0.029084448757261468, "learning_rate": 6.362634493232517e-07, "loss": 0.0009, "step": 259590 }, { "epoch": 1.7078610290586371, "grad_norm": 0.005697321421851239, "learning_rate": 6.359832130522897e-07, "loss": 0.0004, "step": 259600 }, { "epoch": 1.7079268172338704, "grad_norm": 0.013360074033241857, "learning_rate": 6.3570303431712e-07, "loss": 0.0006, "step": 259610 }, { "epoch": 1.7079926054091037, "grad_norm": 0.02472369411872467, "learning_rate": 6.354229131214368e-07, "loss": 0.0005, "step": 259620 }, { "epoch": 1.7080583935843372, "grad_norm": 0.015353602690583475, "learning_rate": 6.351428494689332e-07, "loss": 0.0005, "step": 259630 }, { "epoch": 1.7081241817595705, "grad_norm": 0.04583725256916485, "learning_rate": 6.348628433632997e-07, "loss": 0.0005, "step": 259640 }, { "epoch": 1.708189969934804, "grad_norm": 0.04017665059721181, "learning_rate": 6.345828948082311e-07, "loss": 0.0004, "step": 259650 }, { "epoch": 1.7082557581100373, "grad_norm": 0.0005370506816079532, "learning_rate": 6.343030038074166e-07, "loss": 0.0005, "step": 259660 }, { "epoch": 1.7083215462852706, "grad_norm": 0.002818675598019274, "learning_rate": 6.340231703645466e-07, "loss": 0.0004, "step": 259670 }, { "epoch": 1.708387334460504, "grad_norm": 0.008612270868222681, "learning_rate": 6.33743394483311e-07, "loss": 0.0005, "step": 259680 }, { "epoch": 1.7084531226357376, "grad_norm": 0.006581931749004388, "learning_rate": 6.334636761673968e-07, "loss": 0.0003, "step": 259690 }, { "epoch": 1.7085189108109708, "grad_norm": 0.08341837241990181, "learning_rate": 6.331840154204938e-07, "loss": 0.0004, "step": 259700 }, { "epoch": 1.7085846989862041, "grad_norm": 0.018198937040722514, "learning_rate": 6.329044122462875e-07, "loss": 0.0015, "step": 259710 }, { "epoch": 1.7086504871614376, "grad_norm": 0.038082772538947374, "learning_rate": 6.326248666484647e-07, "loss": 0.0006, "step": 259720 }, { "epoch": 1.7087162753366711, "grad_norm": 0.010525958670046616, "learning_rate": 6.323453786307116e-07, "loss": 0.0002, "step": 259730 }, { "epoch": 1.7087820635119044, "grad_norm": 0.004173491538008644, "learning_rate": 6.320659481967123e-07, "loss": 0.0006, "step": 259740 }, { "epoch": 1.7088478516871377, "grad_norm": 0.020877914132547658, "learning_rate": 6.317865753501512e-07, "loss": 0.0004, "step": 259750 }, { "epoch": 1.708913639862371, "grad_norm": 0.034474625784823326, "learning_rate": 6.315072600947114e-07, "loss": 0.0004, "step": 259760 }, { "epoch": 1.7089794280376045, "grad_norm": 0.013826367189801394, "learning_rate": 6.312280024340744e-07, "loss": 0.0003, "step": 259770 }, { "epoch": 1.709045216212838, "grad_norm": 0.013796613850098578, "learning_rate": 6.30948802371924e-07, "loss": 0.0003, "step": 259780 }, { "epoch": 1.7091110043880713, "grad_norm": 0.031041828815634564, "learning_rate": 6.306696599119406e-07, "loss": 0.0004, "step": 259790 }, { "epoch": 1.7091767925633046, "grad_norm": 0.016366319237156368, "learning_rate": 6.303905750578038e-07, "loss": 0.0003, "step": 259800 }, { "epoch": 1.709242580738538, "grad_norm": 0.010659537623599606, "learning_rate": 6.301115478131936e-07, "loss": 0.0003, "step": 259810 }, { "epoch": 1.7093083689137716, "grad_norm": 0.02095541491252319, "learning_rate": 6.298325781817887e-07, "loss": 0.0005, "step": 259820 }, { "epoch": 1.7093741570890049, "grad_norm": 0.050569369418371146, "learning_rate": 6.295536661672669e-07, "loss": 0.0002, "step": 259830 }, { "epoch": 1.7094399452642381, "grad_norm": 0.020863591568052828, "learning_rate": 6.292748117733049e-07, "loss": 0.0004, "step": 259840 }, { "epoch": 1.7095057334394714, "grad_norm": 0.0001572896001286565, "learning_rate": 6.289960150035807e-07, "loss": 0.0004, "step": 259850 }, { "epoch": 1.709571521614705, "grad_norm": 0.03751111536818245, "learning_rate": 6.287172758617682e-07, "loss": 0.0001, "step": 259860 }, { "epoch": 1.7096373097899384, "grad_norm": 0.008456851868761566, "learning_rate": 6.28438594351543e-07, "loss": 0.0001, "step": 259870 }, { "epoch": 1.7097030979651717, "grad_norm": 0.00414877375414102, "learning_rate": 6.2815997047658e-07, "loss": 0.0003, "step": 259880 }, { "epoch": 1.709768886140405, "grad_norm": 0.005384969098915145, "learning_rate": 6.278814042405518e-07, "loss": 0.0003, "step": 259890 }, { "epoch": 1.7098346743156385, "grad_norm": 0.0004962304937525398, "learning_rate": 6.276028956471314e-07, "loss": 0.0004, "step": 259900 }, { "epoch": 1.709900462490872, "grad_norm": 0.03210037793820178, "learning_rate": 6.273244446999904e-07, "loss": 0.0005, "step": 259910 }, { "epoch": 1.7099662506661053, "grad_norm": 0.009061361368561937, "learning_rate": 6.270460514028004e-07, "loss": 0.0004, "step": 259920 }, { "epoch": 1.7100320388413386, "grad_norm": 0.030288293066553604, "learning_rate": 6.267677157592312e-07, "loss": 0.0004, "step": 259930 }, { "epoch": 1.710097827016572, "grad_norm": 0.1116029108797829, "learning_rate": 6.264894377729525e-07, "loss": 0.0004, "step": 259940 }, { "epoch": 1.7101636151918054, "grad_norm": 0.027579656791566504, "learning_rate": 6.262112174476337e-07, "loss": 0.0003, "step": 259950 }, { "epoch": 1.7102294033670389, "grad_norm": 0.09613242424938187, "learning_rate": 6.259330547869425e-07, "loss": 0.0002, "step": 259960 }, { "epoch": 1.7102951915422722, "grad_norm": 0.0034357568875117927, "learning_rate": 6.256549497945458e-07, "loss": 0.0002, "step": 259970 }, { "epoch": 1.7103609797175054, "grad_norm": 0.04830676526022708, "learning_rate": 6.253769024741113e-07, "loss": 0.0002, "step": 259980 }, { "epoch": 1.710426767892739, "grad_norm": 0.016747971566705447, "learning_rate": 6.250989128293039e-07, "loss": 0.0004, "step": 259990 }, { "epoch": 1.7104925560679725, "grad_norm": 0.006820711390220364, "learning_rate": 6.248209808637878e-07, "loss": 0.0002, "step": 260000 }, { "epoch": 1.7105583442432057, "grad_norm": 0.014853706692282271, "learning_rate": 6.245431065812297e-07, "loss": 0.0003, "step": 260010 }, { "epoch": 1.710624132418439, "grad_norm": 0.1882882893928244, "learning_rate": 6.242652899852919e-07, "loss": 0.0012, "step": 260020 }, { "epoch": 1.7106899205936725, "grad_norm": 0.04614099333738059, "learning_rate": 6.239875310796367e-07, "loss": 0.0005, "step": 260030 }, { "epoch": 1.710755708768906, "grad_norm": 0.0003092595100667092, "learning_rate": 6.237098298679267e-07, "loss": 0.0003, "step": 260040 }, { "epoch": 1.7108214969441393, "grad_norm": 0.003958872621348941, "learning_rate": 6.234321863538228e-07, "loss": 0.0011, "step": 260050 }, { "epoch": 1.7108872851193726, "grad_norm": 0.0004300223996122608, "learning_rate": 6.23154600540986e-07, "loss": 0.0004, "step": 260060 }, { "epoch": 1.7109530732946059, "grad_norm": 0.01344316807900911, "learning_rate": 6.228770724330757e-07, "loss": 0.0003, "step": 260070 }, { "epoch": 1.7110188614698394, "grad_norm": 0.06109945354924635, "learning_rate": 6.22599602033751e-07, "loss": 0.0008, "step": 260080 }, { "epoch": 1.711084649645073, "grad_norm": 0.050904508262373534, "learning_rate": 6.223221893466697e-07, "loss": 0.0012, "step": 260090 }, { "epoch": 1.7111504378203062, "grad_norm": 0.027069238386130474, "learning_rate": 6.220448343754897e-07, "loss": 0.0001, "step": 260100 }, { "epoch": 1.7112162259955395, "grad_norm": 0.006168194329415069, "learning_rate": 6.217675371238673e-07, "loss": 0.0002, "step": 260110 }, { "epoch": 1.711282014170773, "grad_norm": 0.0637612930147645, "learning_rate": 6.214902975954589e-07, "loss": 0.0005, "step": 260120 }, { "epoch": 1.7113478023460065, "grad_norm": 0.0197646646818391, "learning_rate": 6.212131157939177e-07, "loss": 0.0001, "step": 260130 }, { "epoch": 1.7114135905212398, "grad_norm": 0.00853650679064317, "learning_rate": 6.209359917229018e-07, "loss": 0.0002, "step": 260140 }, { "epoch": 1.711479378696473, "grad_norm": 0.018189728850888525, "learning_rate": 6.206589253860623e-07, "loss": 0.0004, "step": 260150 }, { "epoch": 1.7115451668717063, "grad_norm": 0.05017447239192347, "learning_rate": 6.203819167870528e-07, "loss": 0.0005, "step": 260160 }, { "epoch": 1.7116109550469398, "grad_norm": 0.02391777530044142, "learning_rate": 6.20104965929525e-07, "loss": 0.0003, "step": 260170 }, { "epoch": 1.7116767432221733, "grad_norm": 0.0035477435007275813, "learning_rate": 6.198280728171307e-07, "loss": 0.0004, "step": 260180 }, { "epoch": 1.7117425313974066, "grad_norm": 0.04130258926570069, "learning_rate": 6.195512374535201e-07, "loss": 0.0005, "step": 260190 }, { "epoch": 1.71180831957264, "grad_norm": 0.00527799880701134, "learning_rate": 6.192744598423434e-07, "loss": 0.0003, "step": 260200 }, { "epoch": 1.7118741077478734, "grad_norm": 0.006519470790223424, "learning_rate": 6.189977399872493e-07, "loss": 0.0004, "step": 260210 }, { "epoch": 1.711939895923107, "grad_norm": 0.041796488958623194, "learning_rate": 6.187210778918867e-07, "loss": 0.0004, "step": 260220 }, { "epoch": 1.7120056840983402, "grad_norm": 0.023695426711150974, "learning_rate": 6.184444735599027e-07, "loss": 0.0005, "step": 260230 }, { "epoch": 1.7120714722735735, "grad_norm": 0.051782483756041844, "learning_rate": 6.181679269949436e-07, "loss": 0.0006, "step": 260240 }, { "epoch": 1.712137260448807, "grad_norm": 0.011245993111507186, "learning_rate": 6.178914382006568e-07, "loss": 0.0006, "step": 260250 }, { "epoch": 1.7122030486240403, "grad_norm": 0.09223493486233081, "learning_rate": 6.176150071806852e-07, "loss": 0.0008, "step": 260260 }, { "epoch": 1.7122688367992738, "grad_norm": 0.07151658891225722, "learning_rate": 6.173386339386762e-07, "loss": 0.0006, "step": 260270 }, { "epoch": 1.712334624974507, "grad_norm": 0.03339892671033541, "learning_rate": 6.170623184782714e-07, "loss": 0.0005, "step": 260280 }, { "epoch": 1.7124004131497403, "grad_norm": 0.012891930004860334, "learning_rate": 6.167860608031157e-07, "loss": 0.0002, "step": 260290 }, { "epoch": 1.7124662013249738, "grad_norm": 0.003657986778713834, "learning_rate": 6.165098609168496e-07, "loss": 0.0002, "step": 260300 }, { "epoch": 1.7125319895002074, "grad_norm": 0.06465976866057971, "learning_rate": 6.162337188231149e-07, "loss": 0.0004, "step": 260310 }, { "epoch": 1.7125977776754406, "grad_norm": 0.043062673326864635, "learning_rate": 6.15957634525553e-07, "loss": 0.0003, "step": 260320 }, { "epoch": 1.712663565850674, "grad_norm": 0.006686699710718379, "learning_rate": 6.156816080278033e-07, "loss": 0.0002, "step": 260330 }, { "epoch": 1.7127293540259074, "grad_norm": 0.002666705913953838, "learning_rate": 6.15405639333505e-07, "loss": 0.0001, "step": 260340 }, { "epoch": 1.712795142201141, "grad_norm": 0.028098523053389456, "learning_rate": 6.151297284462965e-07, "loss": 0.0006, "step": 260350 }, { "epoch": 1.7128609303763742, "grad_norm": 0.028304867371398836, "learning_rate": 6.148538753698158e-07, "loss": 0.0004, "step": 260360 }, { "epoch": 1.7129267185516075, "grad_norm": 0.027331632059438313, "learning_rate": 6.145780801076989e-07, "loss": 0.0003, "step": 260370 }, { "epoch": 1.7129925067268408, "grad_norm": 0.00791713872637599, "learning_rate": 6.14302342663583e-07, "loss": 0.0003, "step": 260380 }, { "epoch": 1.7130582949020743, "grad_norm": 0.023367293984267193, "learning_rate": 6.140266630411018e-07, "loss": 0.0003, "step": 260390 }, { "epoch": 1.7131240830773078, "grad_norm": 0.003549192518470281, "learning_rate": 6.137510412438924e-07, "loss": 0.0005, "step": 260400 }, { "epoch": 1.713189871252541, "grad_norm": 0.02312299549274149, "learning_rate": 6.134754772755868e-07, "loss": 0.0004, "step": 260410 }, { "epoch": 1.7132556594277744, "grad_norm": 0.039630419384056655, "learning_rate": 6.13199971139819e-07, "loss": 0.0004, "step": 260420 }, { "epoch": 1.7133214476030079, "grad_norm": 0.05029609449809571, "learning_rate": 6.129245228402209e-07, "loss": 0.0003, "step": 260430 }, { "epoch": 1.7133872357782414, "grad_norm": 0.060600652450109226, "learning_rate": 6.126491323804235e-07, "loss": 0.0012, "step": 260440 }, { "epoch": 1.7134530239534747, "grad_norm": 0.07188224509229946, "learning_rate": 6.123737997640583e-07, "loss": 0.0005, "step": 260450 }, { "epoch": 1.713518812128708, "grad_norm": 0.010360108759814605, "learning_rate": 6.120985249947553e-07, "loss": 0.0006, "step": 260460 }, { "epoch": 1.7135846003039412, "grad_norm": 0.0387611700675905, "learning_rate": 6.118233080761438e-07, "loss": 0.0005, "step": 260470 }, { "epoch": 1.7136503884791747, "grad_norm": 0.0381778833208733, "learning_rate": 6.115481490118518e-07, "loss": 0.0004, "step": 260480 }, { "epoch": 1.7137161766544082, "grad_norm": 0.04844645351757632, "learning_rate": 6.112730478055079e-07, "loss": 0.0006, "step": 260490 }, { "epoch": 1.7137819648296415, "grad_norm": 0.011958749463244012, "learning_rate": 6.109980044607378e-07, "loss": 0.0003, "step": 260500 }, { "epoch": 1.7138477530048748, "grad_norm": 0.015325309988775562, "learning_rate": 6.107230189811685e-07, "loss": 0.0002, "step": 260510 }, { "epoch": 1.7139135411801083, "grad_norm": 0.0261866042315945, "learning_rate": 6.104480913704247e-07, "loss": 0.0004, "step": 260520 }, { "epoch": 1.7139793293553418, "grad_norm": 0.05291381582492069, "learning_rate": 6.101732216321327e-07, "loss": 0.0003, "step": 260530 }, { "epoch": 1.714045117530575, "grad_norm": 0.011872681410002783, "learning_rate": 6.098984097699157e-07, "loss": 0.001, "step": 260540 }, { "epoch": 1.7141109057058084, "grad_norm": 0.02182148878561466, "learning_rate": 6.096236557873963e-07, "loss": 0.0006, "step": 260550 }, { "epoch": 1.7141766938810417, "grad_norm": 0.034327325947744174, "learning_rate": 6.093489596881969e-07, "loss": 0.0004, "step": 260560 }, { "epoch": 1.7142424820562752, "grad_norm": 0.05195643833054645, "learning_rate": 6.090743214759398e-07, "loss": 0.0004, "step": 260570 }, { "epoch": 1.7143082702315087, "grad_norm": 0.039564033119948695, "learning_rate": 6.087997411542457e-07, "loss": 0.0004, "step": 260580 }, { "epoch": 1.714374058406742, "grad_norm": 0.01448682506882555, "learning_rate": 6.085252187267343e-07, "loss": 0.0004, "step": 260590 }, { "epoch": 1.7144398465819752, "grad_norm": 0.10969493192098392, "learning_rate": 6.082507541970251e-07, "loss": 0.0003, "step": 260600 }, { "epoch": 1.7145056347572087, "grad_norm": 0.025272186588682816, "learning_rate": 6.079763475687372e-07, "loss": 0.0004, "step": 260610 }, { "epoch": 1.7145714229324422, "grad_norm": 0.045371862408969725, "learning_rate": 6.077019988454874e-07, "loss": 0.0006, "step": 260620 }, { "epoch": 1.7146372111076755, "grad_norm": 0.01175363645671445, "learning_rate": 6.074277080308938e-07, "loss": 0.0009, "step": 260630 }, { "epoch": 1.7147029992829088, "grad_norm": 0.012694847990147588, "learning_rate": 6.071534751285719e-07, "loss": 0.0002, "step": 260640 }, { "epoch": 1.7147687874581423, "grad_norm": 0.0015827928904548925, "learning_rate": 6.068793001421364e-07, "loss": 0.0003, "step": 260650 }, { "epoch": 1.7148345756333756, "grad_norm": 0.10007049827208343, "learning_rate": 6.06605183075204e-07, "loss": 0.0003, "step": 260660 }, { "epoch": 1.714900363808609, "grad_norm": 0.3209097816429814, "learning_rate": 6.063311239313885e-07, "loss": 0.001, "step": 260670 }, { "epoch": 1.7149661519838424, "grad_norm": 0.00033401240543594254, "learning_rate": 6.060571227143019e-07, "loss": 0.0005, "step": 260680 }, { "epoch": 1.7150319401590757, "grad_norm": 0.009780482776354326, "learning_rate": 6.057831794275576e-07, "loss": 0.0009, "step": 260690 }, { "epoch": 1.7150977283343092, "grad_norm": 0.020079420191595292, "learning_rate": 6.05509294074767e-07, "loss": 0.0004, "step": 260700 }, { "epoch": 1.7151635165095427, "grad_norm": 0.004881757552509017, "learning_rate": 6.052354666595411e-07, "loss": 0.0006, "step": 260710 }, { "epoch": 1.715229304684776, "grad_norm": 0.04887666423970397, "learning_rate": 6.049616971854899e-07, "loss": 0.001, "step": 260720 }, { "epoch": 1.7152950928600093, "grad_norm": 0.027344029828399473, "learning_rate": 6.046879856562233e-07, "loss": 0.0009, "step": 260730 }, { "epoch": 1.7153608810352428, "grad_norm": 0.028422220441848077, "learning_rate": 6.044143320753488e-07, "loss": 0.0004, "step": 260740 }, { "epoch": 1.7154266692104763, "grad_norm": 0.0004514814095354923, "learning_rate": 6.041407364464758e-07, "loss": 0.0001, "step": 260750 }, { "epoch": 1.7154924573857095, "grad_norm": 0.058402831009796576, "learning_rate": 6.038671987732103e-07, "loss": 0.0011, "step": 260760 }, { "epoch": 1.7155582455609428, "grad_norm": 0.02207451021455981, "learning_rate": 6.03593719059159e-07, "loss": 0.0006, "step": 260770 }, { "epoch": 1.7156240337361761, "grad_norm": 0.1196994949203499, "learning_rate": 6.033202973079266e-07, "loss": 0.0006, "step": 260780 }, { "epoch": 1.7156898219114096, "grad_norm": 0.03122464734189958, "learning_rate": 6.030469335231198e-07, "loss": 0.0002, "step": 260790 }, { "epoch": 1.7157556100866431, "grad_norm": 0.031103209637431194, "learning_rate": 6.027736277083418e-07, "loss": 0.0003, "step": 260800 }, { "epoch": 1.7158213982618764, "grad_norm": 0.09579343720064491, "learning_rate": 6.025003798671958e-07, "loss": 0.0003, "step": 260810 }, { "epoch": 1.7158871864371097, "grad_norm": 0.016761659387104454, "learning_rate": 6.022271900032845e-07, "loss": 0.0003, "step": 260820 }, { "epoch": 1.7159529746123432, "grad_norm": 0.00011630161788856685, "learning_rate": 6.019540581202093e-07, "loss": 0.0006, "step": 260830 }, { "epoch": 1.7160187627875767, "grad_norm": 0.09388546570661675, "learning_rate": 6.016809842215715e-07, "loss": 0.0005, "step": 260840 }, { "epoch": 1.71608455096281, "grad_norm": 0.1809609752361373, "learning_rate": 6.014079683109714e-07, "loss": 0.0009, "step": 260850 }, { "epoch": 1.7161503391380433, "grad_norm": 0.14042545704305356, "learning_rate": 6.011350103920083e-07, "loss": 0.0013, "step": 260860 }, { "epoch": 1.7162161273132766, "grad_norm": 0.0059138684920923695, "learning_rate": 6.008621104682805e-07, "loss": 0.0013, "step": 260870 }, { "epoch": 1.71628191548851, "grad_norm": 0.029269784045520934, "learning_rate": 6.005892685433873e-07, "loss": 0.0008, "step": 260880 }, { "epoch": 1.7163477036637436, "grad_norm": 0.002604449108462729, "learning_rate": 6.003164846209241e-07, "loss": 0.0007, "step": 260890 }, { "epoch": 1.7164134918389768, "grad_norm": 0.06567131638128205, "learning_rate": 6.000437587044888e-07, "loss": 0.0005, "step": 260900 }, { "epoch": 1.7164792800142101, "grad_norm": 0.024996933098466328, "learning_rate": 5.997710907976756e-07, "loss": 0.0005, "step": 260910 }, { "epoch": 1.7165450681894436, "grad_norm": 0.07273174694371616, "learning_rate": 5.994984809040811e-07, "loss": 0.0005, "step": 260920 }, { "epoch": 1.7166108563646771, "grad_norm": 0.011998754439033046, "learning_rate": 5.99225929027299e-07, "loss": 0.0009, "step": 260930 }, { "epoch": 1.7166766445399104, "grad_norm": 0.03735349383666668, "learning_rate": 5.989534351709214e-07, "loss": 0.0005, "step": 260940 }, { "epoch": 1.7167424327151437, "grad_norm": 0.007743183400089969, "learning_rate": 5.986809993385423e-07, "loss": 0.0002, "step": 260950 }, { "epoch": 1.7168082208903772, "grad_norm": 0.06629623396440001, "learning_rate": 5.984086215337531e-07, "loss": 0.0007, "step": 260960 }, { "epoch": 1.7168740090656105, "grad_norm": 0.02504359106716191, "learning_rate": 5.98136301760145e-07, "loss": 0.0014, "step": 260970 }, { "epoch": 1.716939797240844, "grad_norm": 0.04911242052699127, "learning_rate": 5.978640400213076e-07, "loss": 0.0005, "step": 260980 }, { "epoch": 1.7170055854160773, "grad_norm": 0.02684197327404764, "learning_rate": 5.975918363208305e-07, "loss": 0.0002, "step": 260990 }, { "epoch": 1.7170713735913106, "grad_norm": 9.418228233036656e-05, "learning_rate": 5.973196906623036e-07, "loss": 0.0003, "step": 261000 }, { "epoch": 1.717137161766544, "grad_norm": 0.02319841621786233, "learning_rate": 5.970476030493139e-07, "loss": 0.0008, "step": 261010 }, { "epoch": 1.7172029499417776, "grad_norm": 0.012314798837018713, "learning_rate": 5.967755734854491e-07, "loss": 0.0005, "step": 261020 }, { "epoch": 1.7172687381170109, "grad_norm": 0.04201922461744846, "learning_rate": 5.965036019742954e-07, "loss": 0.0004, "step": 261030 }, { "epoch": 1.7173345262922441, "grad_norm": 0.028413281483612524, "learning_rate": 5.962316885194374e-07, "loss": 0.0004, "step": 261040 }, { "epoch": 1.7174003144674777, "grad_norm": 0.12113134759185193, "learning_rate": 5.95959833124462e-07, "loss": 0.0004, "step": 261050 }, { "epoch": 1.7174661026427112, "grad_norm": 0.03261046283135004, "learning_rate": 5.956880357929535e-07, "loss": 0.0003, "step": 261060 }, { "epoch": 1.7175318908179444, "grad_norm": 0.010557710841462006, "learning_rate": 5.954162965284938e-07, "loss": 0.0008, "step": 261070 }, { "epoch": 1.7175976789931777, "grad_norm": 0.0003993286538933164, "learning_rate": 5.951446153346663e-07, "loss": 0.0005, "step": 261080 }, { "epoch": 1.717663467168411, "grad_norm": 0.055314934021603764, "learning_rate": 5.948729922150525e-07, "loss": 0.0004, "step": 261090 }, { "epoch": 1.7177292553436445, "grad_norm": 0.005665424899096955, "learning_rate": 5.946014271732342e-07, "loss": 0.0006, "step": 261100 }, { "epoch": 1.717795043518878, "grad_norm": 0.041769877078494444, "learning_rate": 5.943299202127912e-07, "loss": 0.0008, "step": 261110 }, { "epoch": 1.7178608316941113, "grad_norm": 0.03220906129927373, "learning_rate": 5.940584713373032e-07, "loss": 0.0004, "step": 261120 }, { "epoch": 1.7179266198693446, "grad_norm": 0.019385590968070437, "learning_rate": 5.937870805503492e-07, "loss": 0.0001, "step": 261130 }, { "epoch": 1.717992408044578, "grad_norm": 0.02485724724962311, "learning_rate": 5.93515747855507e-07, "loss": 0.0006, "step": 261140 }, { "epoch": 1.7180581962198116, "grad_norm": 0.003293116151849879, "learning_rate": 5.932444732563536e-07, "loss": 0.0003, "step": 261150 }, { "epoch": 1.7181239843950449, "grad_norm": 0.026067607911839416, "learning_rate": 5.929732567564661e-07, "loss": 0.0011, "step": 261160 }, { "epoch": 1.7181897725702782, "grad_norm": 0.01884905086856508, "learning_rate": 5.927020983594195e-07, "loss": 0.0004, "step": 261170 }, { "epoch": 1.7182555607455114, "grad_norm": 0.02479896799088112, "learning_rate": 5.924309980687898e-07, "loss": 0.003, "step": 261180 }, { "epoch": 1.718321348920745, "grad_norm": 0.04643611941096987, "learning_rate": 5.921599558881508e-07, "loss": 0.0007, "step": 261190 }, { "epoch": 1.7183871370959785, "grad_norm": 0.029040294435093327, "learning_rate": 5.918889718210763e-07, "loss": 0.0009, "step": 261200 }, { "epoch": 1.7184529252712117, "grad_norm": 0.015016820767078547, "learning_rate": 5.916180458711384e-07, "loss": 0.0004, "step": 261210 }, { "epoch": 1.718518713446445, "grad_norm": 0.030256437576544313, "learning_rate": 5.913471780419095e-07, "loss": 0.0006, "step": 261220 }, { "epoch": 1.7185845016216785, "grad_norm": 0.029972907475842924, "learning_rate": 5.910763683369597e-07, "loss": 0.0002, "step": 261230 }, { "epoch": 1.718650289796912, "grad_norm": 0.02124374895452964, "learning_rate": 5.908056167598608e-07, "loss": 0.0007, "step": 261240 }, { "epoch": 1.7187160779721453, "grad_norm": 0.019356869119827376, "learning_rate": 5.905349233141816e-07, "loss": 0.0002, "step": 261250 }, { "epoch": 1.7187818661473786, "grad_norm": 0.06584530016802867, "learning_rate": 5.902642880034914e-07, "loss": 0.0003, "step": 261260 }, { "epoch": 1.718847654322612, "grad_norm": 0.08082069390346115, "learning_rate": 5.89993710831358e-07, "loss": 0.0004, "step": 261270 }, { "epoch": 1.7189134424978454, "grad_norm": 0.00025403240489040854, "learning_rate": 5.897231918013491e-07, "loss": 0.0005, "step": 261280 }, { "epoch": 1.718979230673079, "grad_norm": 0.008163668804158187, "learning_rate": 5.894527309170306e-07, "loss": 0.0003, "step": 261290 }, { "epoch": 1.7190450188483122, "grad_norm": 0.008921227978940783, "learning_rate": 5.891823281819675e-07, "loss": 0.0002, "step": 261300 }, { "epoch": 1.7191108070235455, "grad_norm": 0.008769443003821395, "learning_rate": 5.88911983599727e-07, "loss": 0.0003, "step": 261310 }, { "epoch": 1.719176595198779, "grad_norm": 0.014337261989796712, "learning_rate": 5.886416971738723e-07, "loss": 0.0005, "step": 261320 }, { "epoch": 1.7192423833740125, "grad_norm": 0.0005786366657389075, "learning_rate": 5.883714689079672e-07, "loss": 0.0006, "step": 261330 }, { "epoch": 1.7193081715492458, "grad_norm": 0.07507571090861216, "learning_rate": 5.881012988055739e-07, "loss": 0.0003, "step": 261340 }, { "epoch": 1.719373959724479, "grad_norm": 0.043347433481309226, "learning_rate": 5.878311868702552e-07, "loss": 0.0004, "step": 261350 }, { "epoch": 1.7194397478997125, "grad_norm": 0.002891497352046404, "learning_rate": 5.875611331055708e-07, "loss": 0.0012, "step": 261360 }, { "epoch": 1.719505536074946, "grad_norm": 0.001561909772445292, "learning_rate": 5.872911375150824e-07, "loss": 0.0002, "step": 261370 }, { "epoch": 1.7195713242501793, "grad_norm": 0.00047747372364304996, "learning_rate": 5.870212001023495e-07, "loss": 0.0006, "step": 261380 }, { "epoch": 1.7196371124254126, "grad_norm": 0.018606055491998355, "learning_rate": 5.867513208709308e-07, "loss": 0.0005, "step": 261390 }, { "epoch": 1.719702900600646, "grad_norm": 0.0007977535533020822, "learning_rate": 5.864814998243845e-07, "loss": 0.0014, "step": 261400 }, { "epoch": 1.7197686887758794, "grad_norm": 0.038520957678691875, "learning_rate": 5.862117369662673e-07, "loss": 0.0005, "step": 261410 }, { "epoch": 1.719834476951113, "grad_norm": 0.009099418107703398, "learning_rate": 5.859420323001369e-07, "loss": 0.0003, "step": 261420 }, { "epoch": 1.7199002651263462, "grad_norm": 0.013090362532818327, "learning_rate": 5.856723858295471e-07, "loss": 0.0004, "step": 261430 }, { "epoch": 1.7199660533015795, "grad_norm": 0.06066304227096271, "learning_rate": 5.854027975580556e-07, "loss": 0.0004, "step": 261440 }, { "epoch": 1.720031841476813, "grad_norm": 0.012523910600279513, "learning_rate": 5.851332674892157e-07, "loss": 0.0006, "step": 261450 }, { "epoch": 1.7200976296520465, "grad_norm": 0.012522392509764885, "learning_rate": 5.848637956265807e-07, "loss": 0.0002, "step": 261460 }, { "epoch": 1.7201634178272798, "grad_norm": 0.008107445289152138, "learning_rate": 5.845943819737033e-07, "loss": 0.0002, "step": 261470 }, { "epoch": 1.720229206002513, "grad_norm": 0.013263811992331224, "learning_rate": 5.843250265341355e-07, "loss": 0.0003, "step": 261480 }, { "epoch": 1.7202949941777463, "grad_norm": 0.007136738875750488, "learning_rate": 5.840557293114285e-07, "loss": 0.0005, "step": 261490 }, { "epoch": 1.7203607823529798, "grad_norm": 0.04809987528652336, "learning_rate": 5.837864903091334e-07, "loss": 0.0003, "step": 261500 }, { "epoch": 1.7204265705282134, "grad_norm": 0.024042180439542073, "learning_rate": 5.835173095307989e-07, "loss": 0.0002, "step": 261510 }, { "epoch": 1.7204923587034466, "grad_norm": 0.041361816745389246, "learning_rate": 5.832481869799744e-07, "loss": 0.0004, "step": 261520 }, { "epoch": 1.72055814687868, "grad_norm": 0.02782652692592353, "learning_rate": 5.829791226602077e-07, "loss": 0.0005, "step": 261530 }, { "epoch": 1.7206239350539134, "grad_norm": 0.0024361810645554265, "learning_rate": 5.827101165750471e-07, "loss": 0.0004, "step": 261540 }, { "epoch": 1.720689723229147, "grad_norm": 0.022278049839070738, "learning_rate": 5.824411687280384e-07, "loss": 0.0006, "step": 261550 }, { "epoch": 1.7207555114043802, "grad_norm": 0.0106852719407607, "learning_rate": 5.821722791227263e-07, "loss": 0.0007, "step": 261560 }, { "epoch": 1.7208212995796135, "grad_norm": 0.056857420841250124, "learning_rate": 5.819034477626584e-07, "loss": 0.0017, "step": 261570 }, { "epoch": 1.7208870877548468, "grad_norm": 0.020387082730363746, "learning_rate": 5.816346746513779e-07, "loss": 0.0002, "step": 261580 }, { "epoch": 1.7209528759300803, "grad_norm": 0.0381367329530341, "learning_rate": 5.813659597924282e-07, "loss": 0.0019, "step": 261590 }, { "epoch": 1.7210186641053138, "grad_norm": 0.014889263772132094, "learning_rate": 5.810973031893524e-07, "loss": 0.0003, "step": 261600 }, { "epoch": 1.721084452280547, "grad_norm": 0.0016083665730650112, "learning_rate": 5.808287048456917e-07, "loss": 0.0003, "step": 261610 }, { "epoch": 1.7211502404557804, "grad_norm": 0.014656526050842979, "learning_rate": 5.805601647649883e-07, "loss": 0.0003, "step": 261620 }, { "epoch": 1.7212160286310139, "grad_norm": 0.004876705237612407, "learning_rate": 5.802916829507821e-07, "loss": 0.0005, "step": 261630 }, { "epoch": 1.7212818168062474, "grad_norm": 0.00985394623349036, "learning_rate": 5.80023259406613e-07, "loss": 0.0007, "step": 261640 }, { "epoch": 1.7213476049814807, "grad_norm": 0.0077355852791281, "learning_rate": 5.797548941360198e-07, "loss": 0.0001, "step": 261650 }, { "epoch": 1.721413393156714, "grad_norm": 0.003969445915159016, "learning_rate": 5.794865871425409e-07, "loss": 0.0007, "step": 261660 }, { "epoch": 1.7214791813319474, "grad_norm": 0.04350891775855006, "learning_rate": 5.792183384297134e-07, "loss": 0.0004, "step": 261670 }, { "epoch": 1.721544969507181, "grad_norm": 0.026921050773596376, "learning_rate": 5.789501480010746e-07, "loss": 0.0006, "step": 261680 }, { "epoch": 1.7216107576824142, "grad_norm": 0.009032181929510616, "learning_rate": 5.786820158601581e-07, "loss": 0.0006, "step": 261690 }, { "epoch": 1.7216765458576475, "grad_norm": 0.046303825557954675, "learning_rate": 5.784139420105017e-07, "loss": 0.0007, "step": 261700 }, { "epoch": 1.7217423340328808, "grad_norm": 0.013837100565812666, "learning_rate": 5.781459264556394e-07, "loss": 0.0007, "step": 261710 }, { "epoch": 1.7218081222081143, "grad_norm": 0.026846807582318267, "learning_rate": 5.778779691991038e-07, "loss": 0.0004, "step": 261720 }, { "epoch": 1.7218739103833478, "grad_norm": 0.013814890995838497, "learning_rate": 5.776100702444276e-07, "loss": 0.0002, "step": 261730 }, { "epoch": 1.721939698558581, "grad_norm": 0.02199557770804416, "learning_rate": 5.773422295951436e-07, "loss": 0.0003, "step": 261740 }, { "epoch": 1.7220054867338144, "grad_norm": 0.013003594895569888, "learning_rate": 5.770744472547823e-07, "loss": 0.0004, "step": 261750 }, { "epoch": 1.7220712749090479, "grad_norm": 0.0021499319647368333, "learning_rate": 5.768067232268748e-07, "loss": 0.0003, "step": 261760 }, { "epoch": 1.7221370630842814, "grad_norm": 0.043683774808510496, "learning_rate": 5.76539057514951e-07, "loss": 0.0004, "step": 261770 }, { "epoch": 1.7222028512595147, "grad_norm": 0.006126538125159787, "learning_rate": 5.762714501225386e-07, "loss": 0.0004, "step": 261780 }, { "epoch": 1.722268639434748, "grad_norm": 0.056629063532968635, "learning_rate": 5.76003901053167e-07, "loss": 0.0003, "step": 261790 }, { "epoch": 1.7223344276099812, "grad_norm": 0.08521131457083789, "learning_rate": 5.75736410310363e-07, "loss": 0.0005, "step": 261800 }, { "epoch": 1.7224002157852147, "grad_norm": 0.015372205229858936, "learning_rate": 5.75468977897653e-07, "loss": 0.0003, "step": 261810 }, { "epoch": 1.7224660039604482, "grad_norm": 0.003635028925611257, "learning_rate": 5.752016038185638e-07, "loss": 0.0006, "step": 261820 }, { "epoch": 1.7225317921356815, "grad_norm": 0.02450561859417717, "learning_rate": 5.749342880766196e-07, "loss": 0.0006, "step": 261830 }, { "epoch": 1.7225975803109148, "grad_norm": 0.16053474264840403, "learning_rate": 5.746670306753454e-07, "loss": 0.0012, "step": 261840 }, { "epoch": 1.7226633684861483, "grad_norm": 0.04022044476955777, "learning_rate": 5.743998316182641e-07, "loss": 0.0002, "step": 261850 }, { "epoch": 1.7227291566613818, "grad_norm": 0.021621386689986217, "learning_rate": 5.74132690908899e-07, "loss": 0.002, "step": 261860 }, { "epoch": 1.722794944836615, "grad_norm": 0.007918656768397989, "learning_rate": 5.738656085507722e-07, "loss": 0.0003, "step": 261870 }, { "epoch": 1.7228607330118484, "grad_norm": 0.02978742891421357, "learning_rate": 5.735985845474046e-07, "loss": 0.0002, "step": 261880 }, { "epoch": 1.7229265211870817, "grad_norm": 0.03479206154644639, "learning_rate": 5.733316189023164e-07, "loss": 0.0004, "step": 261890 }, { "epoch": 1.7229923093623152, "grad_norm": 0.026597522369867024, "learning_rate": 5.730647116190279e-07, "loss": 0.0004, "step": 261900 }, { "epoch": 1.7230580975375487, "grad_norm": 0.08503305893385012, "learning_rate": 5.727978627010577e-07, "loss": 0.0004, "step": 261910 }, { "epoch": 1.723123885712782, "grad_norm": 0.021376532151474278, "learning_rate": 5.725310721519234e-07, "loss": 0.0006, "step": 261920 }, { "epoch": 1.7231896738880152, "grad_norm": 0.00612029418068619, "learning_rate": 5.722643399751437e-07, "loss": 0.0004, "step": 261930 }, { "epoch": 1.7232554620632488, "grad_norm": 0.006738392881178702, "learning_rate": 5.719976661742349e-07, "loss": 0.0003, "step": 261940 }, { "epoch": 1.7233212502384823, "grad_norm": 0.0026272263754268206, "learning_rate": 5.717310507527124e-07, "loss": 0.0002, "step": 261950 }, { "epoch": 1.7233870384137155, "grad_norm": 0.0019062578229455283, "learning_rate": 5.714644937140918e-07, "loss": 0.0003, "step": 261960 }, { "epoch": 1.7234528265889488, "grad_norm": 0.10066809672823084, "learning_rate": 5.711979950618867e-07, "loss": 0.0006, "step": 261970 }, { "epoch": 1.7235186147641823, "grad_norm": 0.027286150949347904, "learning_rate": 5.709315547996114e-07, "loss": 0.0007, "step": 261980 }, { "epoch": 1.7235844029394156, "grad_norm": 0.025242482077081126, "learning_rate": 5.706651729307783e-07, "loss": 0.0009, "step": 261990 }, { "epoch": 1.7236501911146491, "grad_norm": 0.05973150691312505, "learning_rate": 5.703988494588997e-07, "loss": 0.0002, "step": 262000 }, { "epoch": 1.7237159792898824, "grad_norm": 0.0013457006651079986, "learning_rate": 5.701325843874861e-07, "loss": 0.0005, "step": 262010 }, { "epoch": 1.7237817674651157, "grad_norm": 0.011987244057439787, "learning_rate": 5.698663777200492e-07, "loss": 0.0004, "step": 262020 }, { "epoch": 1.7238475556403492, "grad_norm": 0.019160476520970148, "learning_rate": 5.696002294600972e-07, "loss": 0.0003, "step": 262030 }, { "epoch": 1.7239133438155827, "grad_norm": 0.019412760914541933, "learning_rate": 5.693341396111402e-07, "loss": 0.0002, "step": 262040 }, { "epoch": 1.723979131990816, "grad_norm": 0.05064640705369464, "learning_rate": 5.690681081766853e-07, "loss": 0.0009, "step": 262050 }, { "epoch": 1.7240449201660493, "grad_norm": 0.05493743903364578, "learning_rate": 5.688021351602413e-07, "loss": 0.0003, "step": 262060 }, { "epoch": 1.7241107083412828, "grad_norm": 0.07414037956318804, "learning_rate": 5.685362205653138e-07, "loss": 0.0007, "step": 262070 }, { "epoch": 1.7241764965165163, "grad_norm": 0.02015389983037801, "learning_rate": 5.682703643954096e-07, "loss": 0.0015, "step": 262080 }, { "epoch": 1.7242422846917496, "grad_norm": 0.05926990603237335, "learning_rate": 5.680045666540324e-07, "loss": 0.0003, "step": 262090 }, { "epoch": 1.7243080728669828, "grad_norm": 0.011804178245095658, "learning_rate": 5.677388273446877e-07, "loss": 0.0003, "step": 262100 }, { "epoch": 1.7243738610422161, "grad_norm": 0.003688844319981343, "learning_rate": 5.674731464708788e-07, "loss": 0.0002, "step": 262110 }, { "epoch": 1.7244396492174496, "grad_norm": 0.021004438432111493, "learning_rate": 5.67207524036108e-07, "loss": 0.0004, "step": 262120 }, { "epoch": 1.7245054373926831, "grad_norm": 0.007005664015599423, "learning_rate": 5.669419600438775e-07, "loss": 0.0006, "step": 262130 }, { "epoch": 1.7245712255679164, "grad_norm": 0.12156894964438188, "learning_rate": 5.666764544976889e-07, "loss": 0.0005, "step": 262140 }, { "epoch": 1.7246370137431497, "grad_norm": 0.01100172028050283, "learning_rate": 5.66411007401042e-07, "loss": 0.0004, "step": 262150 }, { "epoch": 1.7247028019183832, "grad_norm": 0.047342457294626064, "learning_rate": 5.661456187574377e-07, "loss": 0.0004, "step": 262160 }, { "epoch": 1.7247685900936167, "grad_norm": 0.015771812365795404, "learning_rate": 5.658802885703735e-07, "loss": 0.0004, "step": 262170 }, { "epoch": 1.72483437826885, "grad_norm": 0.03536993208567466, "learning_rate": 5.656150168433466e-07, "loss": 0.0006, "step": 262180 }, { "epoch": 1.7249001664440833, "grad_norm": 0.022057117316605257, "learning_rate": 5.653498035798577e-07, "loss": 0.0003, "step": 262190 }, { "epoch": 1.7249659546193166, "grad_norm": 0.023964815645213435, "learning_rate": 5.650846487834016e-07, "loss": 0.0002, "step": 262200 }, { "epoch": 1.72503174279455, "grad_norm": 0.06195915879128214, "learning_rate": 5.64819552457474e-07, "loss": 0.0006, "step": 262210 }, { "epoch": 1.7250975309697836, "grad_norm": 0.0016684368365777196, "learning_rate": 5.645545146055697e-07, "loss": 0.0008, "step": 262220 }, { "epoch": 1.7251633191450169, "grad_norm": 0.02990392019226736, "learning_rate": 5.642895352311839e-07, "loss": 0.0002, "step": 262230 }, { "epoch": 1.7252291073202501, "grad_norm": 0.013540164362539249, "learning_rate": 5.640246143378097e-07, "loss": 0.0009, "step": 262240 }, { "epoch": 1.7252948954954836, "grad_norm": 0.018686890071048564, "learning_rate": 5.637597519289395e-07, "loss": 0.0006, "step": 262250 }, { "epoch": 1.7253606836707172, "grad_norm": 0.0051485578233982626, "learning_rate": 5.63494948008066e-07, "loss": 0.0005, "step": 262260 }, { "epoch": 1.7254264718459504, "grad_norm": 0.013775269581594437, "learning_rate": 5.632302025786795e-07, "loss": 0.0001, "step": 262270 }, { "epoch": 1.7254922600211837, "grad_norm": 0.03563981406265555, "learning_rate": 5.629655156442715e-07, "loss": 0.0006, "step": 262280 }, { "epoch": 1.7255580481964172, "grad_norm": 0.00010341954415608614, "learning_rate": 5.627008872083306e-07, "loss": 0.0003, "step": 262290 }, { "epoch": 1.7256238363716505, "grad_norm": 0.009104473672128644, "learning_rate": 5.624363172743464e-07, "loss": 0.0003, "step": 262300 }, { "epoch": 1.725689624546884, "grad_norm": 0.002561791568229947, "learning_rate": 5.621718058458059e-07, "loss": 0.0002, "step": 262310 }, { "epoch": 1.7257554127221173, "grad_norm": 0.028824952768784134, "learning_rate": 5.619073529261981e-07, "loss": 0.0001, "step": 262320 }, { "epoch": 1.7258212008973506, "grad_norm": 5.682892312357676e-05, "learning_rate": 5.616429585190092e-07, "loss": 0.0005, "step": 262330 }, { "epoch": 1.725886989072584, "grad_norm": 0.04117016768489869, "learning_rate": 5.61378622627724e-07, "loss": 0.0007, "step": 262340 }, { "epoch": 1.7259527772478176, "grad_norm": 0.025758831911172595, "learning_rate": 5.611143452558288e-07, "loss": 0.0006, "step": 262350 }, { "epoch": 1.7260185654230509, "grad_norm": 0.0006853459123611254, "learning_rate": 5.608501264068073e-07, "loss": 0.0004, "step": 262360 }, { "epoch": 1.7260843535982842, "grad_norm": 0.003320234238851455, "learning_rate": 5.605859660841429e-07, "loss": 0.0002, "step": 262370 }, { "epoch": 1.7261501417735177, "grad_norm": 0.008620458117597627, "learning_rate": 5.603218642913183e-07, "loss": 0.0003, "step": 262380 }, { "epoch": 1.7262159299487512, "grad_norm": 0.004401064503414308, "learning_rate": 5.600578210318159e-07, "loss": 0.0004, "step": 262390 }, { "epoch": 1.7262817181239845, "grad_norm": 0.003874394336532133, "learning_rate": 5.597938363091165e-07, "loss": 0.0004, "step": 262400 }, { "epoch": 1.7263475062992177, "grad_norm": 0.04835490243605376, "learning_rate": 5.595299101267004e-07, "loss": 0.0007, "step": 262410 }, { "epoch": 1.726413294474451, "grad_norm": 0.02413805189628929, "learning_rate": 5.59266042488047e-07, "loss": 0.0005, "step": 262420 }, { "epoch": 1.7264790826496845, "grad_norm": 0.005964343290224177, "learning_rate": 5.590022333966361e-07, "loss": 0.0008, "step": 262430 }, { "epoch": 1.726544870824918, "grad_norm": 0.0013159325671413598, "learning_rate": 5.587384828559439e-07, "loss": 0.0003, "step": 262440 }, { "epoch": 1.7266106590001513, "grad_norm": 0.0011808260955295325, "learning_rate": 5.584747908694505e-07, "loss": 0.0003, "step": 262450 }, { "epoch": 1.7266764471753846, "grad_norm": 0.007128408669271877, "learning_rate": 5.582111574406307e-07, "loss": 0.0005, "step": 262460 }, { "epoch": 1.726742235350618, "grad_norm": 0.04808782431133782, "learning_rate": 5.579475825729607e-07, "loss": 0.0007, "step": 262470 }, { "epoch": 1.7268080235258516, "grad_norm": 0.010811432915380606, "learning_rate": 5.576840662699157e-07, "loss": 0.0003, "step": 262480 }, { "epoch": 1.726873811701085, "grad_norm": 0.009677194321286553, "learning_rate": 5.574206085349692e-07, "loss": 0.0001, "step": 262490 }, { "epoch": 1.7269395998763182, "grad_norm": 0.051792825229019096, "learning_rate": 5.571572093715949e-07, "loss": 0.0008, "step": 262500 }, { "epoch": 1.7270053880515515, "grad_norm": 0.01757493870973505, "learning_rate": 5.568938687832664e-07, "loss": 0.0002, "step": 262510 }, { "epoch": 1.727071176226785, "grad_norm": 0.04833689828429595, "learning_rate": 5.566305867734545e-07, "loss": 0.0002, "step": 262520 }, { "epoch": 1.7271369644020185, "grad_norm": 0.011671574479931258, "learning_rate": 5.563673633456307e-07, "loss": 0.0004, "step": 262530 }, { "epoch": 1.7272027525772518, "grad_norm": 0.002937823565087127, "learning_rate": 5.561041985032656e-07, "loss": 0.0004, "step": 262540 }, { "epoch": 1.727268540752485, "grad_norm": 0.009301640036017252, "learning_rate": 5.558410922498292e-07, "loss": 0.0005, "step": 262550 }, { "epoch": 1.7273343289277185, "grad_norm": 0.04500137011723848, "learning_rate": 5.555780445887887e-07, "loss": 0.0003, "step": 262560 }, { "epoch": 1.727400117102952, "grad_norm": 0.052235576174536384, "learning_rate": 5.553150555236131e-07, "loss": 0.0007, "step": 262570 }, { "epoch": 1.7274659052781853, "grad_norm": 0.019745224218152606, "learning_rate": 5.550521250577706e-07, "loss": 0.0008, "step": 262580 }, { "epoch": 1.7275316934534186, "grad_norm": 0.0011756908974218736, "learning_rate": 5.547892531947264e-07, "loss": 0.0007, "step": 262590 }, { "epoch": 1.7275974816286521, "grad_norm": 0.0006451147097960496, "learning_rate": 5.545264399379475e-07, "loss": 0.0007, "step": 262600 }, { "epoch": 1.7276632698038854, "grad_norm": 0.0001223222626341755, "learning_rate": 5.542636852908978e-07, "loss": 0.0003, "step": 262610 }, { "epoch": 1.727729057979119, "grad_norm": 0.08989711544226292, "learning_rate": 5.540009892570414e-07, "loss": 0.0008, "step": 262620 }, { "epoch": 1.7277948461543522, "grad_norm": 0.02010476535551859, "learning_rate": 5.537383518398426e-07, "loss": 0.0006, "step": 262630 }, { "epoch": 1.7278606343295855, "grad_norm": 0.03189192310680685, "learning_rate": 5.534757730427637e-07, "loss": 0.0003, "step": 262640 }, { "epoch": 1.727926422504819, "grad_norm": 0.024529027994824672, "learning_rate": 5.532132528692664e-07, "loss": 0.0003, "step": 262650 }, { "epoch": 1.7279922106800525, "grad_norm": 0.04982936084392565, "learning_rate": 5.529507913228116e-07, "loss": 0.0002, "step": 262660 }, { "epoch": 1.7280579988552858, "grad_norm": 0.0005436120745849486, "learning_rate": 5.526883884068601e-07, "loss": 0.0003, "step": 262670 }, { "epoch": 1.728123787030519, "grad_norm": 0.01596174196645679, "learning_rate": 5.524260441248713e-07, "loss": 0.0004, "step": 262680 }, { "epoch": 1.7281895752057526, "grad_norm": 0.0045214663562601, "learning_rate": 5.521637584803036e-07, "loss": 0.0004, "step": 262690 }, { "epoch": 1.728255363380986, "grad_norm": 0.053115532665834686, "learning_rate": 5.519015314766146e-07, "loss": 0.0003, "step": 262700 }, { "epoch": 1.7283211515562193, "grad_norm": 0.0026525721392934975, "learning_rate": 5.516393631172635e-07, "loss": 0.0007, "step": 262710 }, { "epoch": 1.7283869397314526, "grad_norm": 0.03226564879889652, "learning_rate": 5.51377253405705e-07, "loss": 0.0006, "step": 262720 }, { "epoch": 1.728452727906686, "grad_norm": 0.004495553113011868, "learning_rate": 5.511152023453959e-07, "loss": 0.0016, "step": 262730 }, { "epoch": 1.7285185160819194, "grad_norm": 0.000300613040848815, "learning_rate": 5.508532099397901e-07, "loss": 0.0005, "step": 262740 }, { "epoch": 1.728584304257153, "grad_norm": 0.08228504642643908, "learning_rate": 5.505912761923426e-07, "loss": 0.0005, "step": 262750 }, { "epoch": 1.7286500924323862, "grad_norm": 0.015518056753414841, "learning_rate": 5.503294011065058e-07, "loss": 0.0006, "step": 262760 }, { "epoch": 1.7287158806076195, "grad_norm": 0.0010808483906831609, "learning_rate": 5.500675846857334e-07, "loss": 0.0007, "step": 262770 }, { "epoch": 1.728781668782853, "grad_norm": 0.007944553594185238, "learning_rate": 5.498058269334767e-07, "loss": 0.0009, "step": 262780 }, { "epoch": 1.7288474569580865, "grad_norm": 0.0190000983434724, "learning_rate": 5.49544127853186e-07, "loss": 0.0003, "step": 262790 }, { "epoch": 1.7289132451333198, "grad_norm": 0.0006621853292825549, "learning_rate": 5.492824874483132e-07, "loss": 0.0003, "step": 262800 }, { "epoch": 1.728979033308553, "grad_norm": 0.03635378326546371, "learning_rate": 5.490209057223062e-07, "loss": 0.0007, "step": 262810 }, { "epoch": 1.7290448214837864, "grad_norm": 0.008619216279326119, "learning_rate": 5.487593826786147e-07, "loss": 0.0002, "step": 262820 }, { "epoch": 1.7291106096590199, "grad_norm": 0.0020903529392331983, "learning_rate": 5.484979183206857e-07, "loss": 0.0006, "step": 262830 }, { "epoch": 1.7291763978342534, "grad_norm": 0.04082827587140272, "learning_rate": 5.482365126519678e-07, "loss": 0.0003, "step": 262840 }, { "epoch": 1.7292421860094866, "grad_norm": 0.01911067565135747, "learning_rate": 5.479751656759069e-07, "loss": 0.0002, "step": 262850 }, { "epoch": 1.72930797418472, "grad_norm": 0.04661809188041247, "learning_rate": 5.477138773959479e-07, "loss": 0.0005, "step": 262860 }, { "epoch": 1.7293737623599534, "grad_norm": 0.03308624879938718, "learning_rate": 5.474526478155368e-07, "loss": 0.0004, "step": 262870 }, { "epoch": 1.729439550535187, "grad_norm": 0.052144005558762176, "learning_rate": 5.471914769381164e-07, "loss": 0.0005, "step": 262880 }, { "epoch": 1.7295053387104202, "grad_norm": 0.036244303668710516, "learning_rate": 5.469303647671314e-07, "loss": 0.0003, "step": 262890 }, { "epoch": 1.7295711268856535, "grad_norm": 0.011400826907607037, "learning_rate": 5.466693113060234e-07, "loss": 0.0002, "step": 262900 }, { "epoch": 1.7296369150608868, "grad_norm": 0.021688080094862533, "learning_rate": 5.464083165582341e-07, "loss": 0.0005, "step": 262910 }, { "epoch": 1.7297027032361203, "grad_norm": 0.007042540469008426, "learning_rate": 5.461473805272049e-07, "loss": 0.0002, "step": 262920 }, { "epoch": 1.7297684914113538, "grad_norm": 0.047236353391985376, "learning_rate": 5.458865032163763e-07, "loss": 0.0003, "step": 262930 }, { "epoch": 1.729834279586587, "grad_norm": 0.018707667824751247, "learning_rate": 5.456256846291869e-07, "loss": 0.0005, "step": 262940 }, { "epoch": 1.7299000677618204, "grad_norm": 0.04050379196040083, "learning_rate": 5.453649247690757e-07, "loss": 0.0002, "step": 262950 }, { "epoch": 1.7299658559370539, "grad_norm": 0.0016211760028536561, "learning_rate": 5.451042236394799e-07, "loss": 0.0005, "step": 262960 }, { "epoch": 1.7300316441122874, "grad_norm": 0.01071687221120008, "learning_rate": 5.448435812438385e-07, "loss": 0.0004, "step": 262970 }, { "epoch": 1.7300974322875207, "grad_norm": 0.00415241824931171, "learning_rate": 5.445829975855865e-07, "loss": 0.0002, "step": 262980 }, { "epoch": 1.730163220462754, "grad_norm": 0.09061462480377966, "learning_rate": 5.443224726681595e-07, "loss": 0.0006, "step": 262990 }, { "epoch": 1.7302290086379875, "grad_norm": 0.04087710989814645, "learning_rate": 5.440620064949931e-07, "loss": 0.0005, "step": 263000 }, { "epoch": 1.7302947968132207, "grad_norm": 0.01615734715220476, "learning_rate": 5.438015990695206e-07, "loss": 0.0003, "step": 263010 }, { "epoch": 1.7303605849884542, "grad_norm": 0.003240381270539758, "learning_rate": 5.435412503951754e-07, "loss": 0.0005, "step": 263020 }, { "epoch": 1.7304263731636875, "grad_norm": 0.004463343420420513, "learning_rate": 5.432809604753897e-07, "loss": 0.0007, "step": 263030 }, { "epoch": 1.7304921613389208, "grad_norm": 0.004828303550067293, "learning_rate": 5.430207293135958e-07, "loss": 0.0004, "step": 263040 }, { "epoch": 1.7305579495141543, "grad_norm": 0.0042380442606647735, "learning_rate": 5.427605569132238e-07, "loss": 0.0006, "step": 263050 }, { "epoch": 1.7306237376893878, "grad_norm": 0.005997600492420281, "learning_rate": 5.425004432777047e-07, "loss": 0.0005, "step": 263060 }, { "epoch": 1.730689525864621, "grad_norm": 0.008984723391544215, "learning_rate": 5.422403884104677e-07, "loss": 0.0004, "step": 263070 }, { "epoch": 1.7307553140398544, "grad_norm": 0.08270997712946751, "learning_rate": 5.419803923149408e-07, "loss": 0.0006, "step": 263080 }, { "epoch": 1.730821102215088, "grad_norm": 0.005563155273155541, "learning_rate": 5.417204549945515e-07, "loss": 0.0004, "step": 263090 }, { "epoch": 1.7308868903903214, "grad_norm": 0.006145887673255298, "learning_rate": 5.414605764527287e-07, "loss": 0.0002, "step": 263100 }, { "epoch": 1.7309526785655547, "grad_norm": 0.012130580780913055, "learning_rate": 5.412007566928973e-07, "loss": 0.0004, "step": 263110 }, { "epoch": 1.731018466740788, "grad_norm": 0.004376039985648135, "learning_rate": 5.40940995718483e-07, "loss": 0.0006, "step": 263120 }, { "epoch": 1.7310842549160212, "grad_norm": 0.003413883715066179, "learning_rate": 5.406812935329103e-07, "loss": 0.0002, "step": 263130 }, { "epoch": 1.7311500430912548, "grad_norm": 0.010593590276359326, "learning_rate": 5.404216501396043e-07, "loss": 0.0004, "step": 263140 }, { "epoch": 1.7312158312664883, "grad_norm": 0.04674093219237848, "learning_rate": 5.401620655419865e-07, "loss": 0.0004, "step": 263150 }, { "epoch": 1.7312816194417215, "grad_norm": 0.004035845731959894, "learning_rate": 5.399025397434804e-07, "loss": 0.0004, "step": 263160 }, { "epoch": 1.7313474076169548, "grad_norm": 0.3777502227094774, "learning_rate": 5.396430727475072e-07, "loss": 0.0018, "step": 263170 }, { "epoch": 1.7314131957921883, "grad_norm": 0.021940628342549548, "learning_rate": 5.393836645574879e-07, "loss": 0.0006, "step": 263180 }, { "epoch": 1.7314789839674218, "grad_norm": 0.0011245629131865348, "learning_rate": 5.391243151768428e-07, "loss": 0.0002, "step": 263190 }, { "epoch": 1.7315447721426551, "grad_norm": 0.001365021461344206, "learning_rate": 5.388650246089904e-07, "loss": 0.0003, "step": 263200 }, { "epoch": 1.7316105603178884, "grad_norm": 0.031208481246770732, "learning_rate": 5.386057928573507e-07, "loss": 0.0006, "step": 263210 }, { "epoch": 1.7316763484931217, "grad_norm": 0.04237762779498158, "learning_rate": 5.383466199253384e-07, "loss": 0.0009, "step": 263220 }, { "epoch": 1.7317421366683552, "grad_norm": 0.02061774404294521, "learning_rate": 5.380875058163742e-07, "loss": 0.0004, "step": 263230 }, { "epoch": 1.7318079248435887, "grad_norm": 0.004791639504206522, "learning_rate": 5.378284505338726e-07, "loss": 0.0003, "step": 263240 }, { "epoch": 1.731873713018822, "grad_norm": 0.011020245922393852, "learning_rate": 5.375694540812493e-07, "loss": 0.0005, "step": 263250 }, { "epoch": 1.7319395011940553, "grad_norm": 0.03570677709282409, "learning_rate": 5.373105164619186e-07, "loss": 0.0003, "step": 263260 }, { "epoch": 1.7320052893692888, "grad_norm": 0.0608373548874471, "learning_rate": 5.370516376792945e-07, "loss": 0.0012, "step": 263270 }, { "epoch": 1.7320710775445223, "grad_norm": 0.038341829584801035, "learning_rate": 5.367928177367898e-07, "loss": 0.0003, "step": 263280 }, { "epoch": 1.7321368657197556, "grad_norm": 0.011445003505302006, "learning_rate": 5.365340566378174e-07, "loss": 0.0002, "step": 263290 }, { "epoch": 1.7322026538949888, "grad_norm": 0.025581351650431457, "learning_rate": 5.362753543857885e-07, "loss": 0.0002, "step": 263300 }, { "epoch": 1.7322684420702223, "grad_norm": 0.04481317011495537, "learning_rate": 5.360167109841141e-07, "loss": 0.0006, "step": 263310 }, { "epoch": 1.7323342302454556, "grad_norm": 0.0047760558321405305, "learning_rate": 5.357581264362038e-07, "loss": 0.0005, "step": 263320 }, { "epoch": 1.7324000184206891, "grad_norm": 0.06518268421601517, "learning_rate": 5.354996007454672e-07, "loss": 0.0002, "step": 263330 }, { "epoch": 1.7324658065959224, "grad_norm": 0.03969546441044963, "learning_rate": 5.352411339153124e-07, "loss": 0.0005, "step": 263340 }, { "epoch": 1.7325315947711557, "grad_norm": 0.019302857277636927, "learning_rate": 5.349827259491463e-07, "loss": 0.0002, "step": 263350 }, { "epoch": 1.7325973829463892, "grad_norm": 0.06663878108846465, "learning_rate": 5.347243768503774e-07, "loss": 0.0003, "step": 263360 }, { "epoch": 1.7326631711216227, "grad_norm": 0.0028939287217695877, "learning_rate": 5.344660866224116e-07, "loss": 0.0002, "step": 263370 }, { "epoch": 1.732728959296856, "grad_norm": 0.015566798462859785, "learning_rate": 5.342078552686536e-07, "loss": 0.0006, "step": 263380 }, { "epoch": 1.7327947474720893, "grad_norm": 0.01848753329019613, "learning_rate": 5.339496827925078e-07, "loss": 0.0004, "step": 263390 }, { "epoch": 1.7328605356473228, "grad_norm": 0.04579308717207922, "learning_rate": 5.336915691973788e-07, "loss": 0.0003, "step": 263400 }, { "epoch": 1.7329263238225563, "grad_norm": 0.0012084921295572426, "learning_rate": 5.334335144866687e-07, "loss": 0.0005, "step": 263410 }, { "epoch": 1.7329921119977896, "grad_norm": 0.010247406783016203, "learning_rate": 5.331755186637805e-07, "loss": 0.0002, "step": 263420 }, { "epoch": 1.7330579001730229, "grad_norm": 0.0008581493662662347, "learning_rate": 5.329175817321153e-07, "loss": 0.0009, "step": 263430 }, { "epoch": 1.7331236883482561, "grad_norm": 0.024987295858036475, "learning_rate": 5.326597036950737e-07, "loss": 0.0004, "step": 263440 }, { "epoch": 1.7331894765234896, "grad_norm": 0.06940820176013121, "learning_rate": 5.324018845560552e-07, "loss": 0.0016, "step": 263450 }, { "epoch": 1.7332552646987232, "grad_norm": 0.037536443244767646, "learning_rate": 5.3214412431846e-07, "loss": 0.0008, "step": 263460 }, { "epoch": 1.7333210528739564, "grad_norm": 0.0008345964383345332, "learning_rate": 5.318864229856852e-07, "loss": 0.0003, "step": 263470 }, { "epoch": 1.7333868410491897, "grad_norm": 0.019241720196013737, "learning_rate": 5.316287805611286e-07, "loss": 0.0004, "step": 263480 }, { "epoch": 1.7334526292244232, "grad_norm": 0.017070720790926995, "learning_rate": 5.313711970481883e-07, "loss": 0.0004, "step": 263490 }, { "epoch": 1.7335184173996567, "grad_norm": 4.4787598135319686e-05, "learning_rate": 5.311136724502591e-07, "loss": 0.0005, "step": 263500 }, { "epoch": 1.73358420557489, "grad_norm": 0.014630960493451507, "learning_rate": 5.308562067707368e-07, "loss": 0.0003, "step": 263510 }, { "epoch": 1.7336499937501233, "grad_norm": 0.003134264158842436, "learning_rate": 5.305988000130152e-07, "loss": 0.0006, "step": 263520 }, { "epoch": 1.7337157819253566, "grad_norm": 0.0018662663763503841, "learning_rate": 5.303414521804889e-07, "loss": 0.0002, "step": 263530 }, { "epoch": 1.73378157010059, "grad_norm": 0.03948783309303451, "learning_rate": 5.300841632765502e-07, "loss": 0.0005, "step": 263540 }, { "epoch": 1.7338473582758236, "grad_norm": 0.03777772610561792, "learning_rate": 5.298269333045913e-07, "loss": 0.0006, "step": 263550 }, { "epoch": 1.7339131464510569, "grad_norm": 0.012819992361572668, "learning_rate": 5.295697622680035e-07, "loss": 0.0003, "step": 263560 }, { "epoch": 1.7339789346262902, "grad_norm": 0.008822513539367445, "learning_rate": 5.293126501701773e-07, "loss": 0.0004, "step": 263570 }, { "epoch": 1.7340447228015237, "grad_norm": 0.02945875357018899, "learning_rate": 5.290555970145028e-07, "loss": 0.0004, "step": 263580 }, { "epoch": 1.7341105109767572, "grad_norm": 0.03385227290141693, "learning_rate": 5.28798602804369e-07, "loss": 0.0004, "step": 263590 }, { "epoch": 1.7341762991519905, "grad_norm": 0.010536926665160404, "learning_rate": 5.285416675431643e-07, "loss": 0.0004, "step": 263600 }, { "epoch": 1.7342420873272237, "grad_norm": 0.00999744767699801, "learning_rate": 5.282847912342743e-07, "loss": 0.0003, "step": 263610 }, { "epoch": 1.7343078755024572, "grad_norm": 0.0012112155940306741, "learning_rate": 5.28027973881089e-07, "loss": 0.0004, "step": 263620 }, { "epoch": 1.7343736636776905, "grad_norm": 0.04273464389126381, "learning_rate": 5.277712154869919e-07, "loss": 0.0007, "step": 263630 }, { "epoch": 1.734439451852924, "grad_norm": 0.0036415282223651413, "learning_rate": 5.27514516055369e-07, "loss": 0.0006, "step": 263640 }, { "epoch": 1.7345052400281573, "grad_norm": 0.000588186446110347, "learning_rate": 5.272578755896046e-07, "loss": 0.001, "step": 263650 }, { "epoch": 1.7345710282033906, "grad_norm": 0.010590358760868845, "learning_rate": 5.270012940930824e-07, "loss": 0.0003, "step": 263660 }, { "epoch": 1.734636816378624, "grad_norm": 0.01754948735363576, "learning_rate": 5.267447715691848e-07, "loss": 0.0003, "step": 263670 }, { "epoch": 1.7347026045538576, "grad_norm": 0.033682762100143766, "learning_rate": 5.264883080212952e-07, "loss": 0.0004, "step": 263680 }, { "epoch": 1.734768392729091, "grad_norm": 0.002352304427242523, "learning_rate": 5.262319034527929e-07, "loss": 0.0004, "step": 263690 }, { "epoch": 1.7348341809043242, "grad_norm": 0.001485735567292759, "learning_rate": 5.259755578670589e-07, "loss": 0.0003, "step": 263700 }, { "epoch": 1.7348999690795577, "grad_norm": 0.06279538870592216, "learning_rate": 5.25719271267473e-07, "loss": 0.0003, "step": 263710 }, { "epoch": 1.7349657572547912, "grad_norm": 0.04090139759929663, "learning_rate": 5.254630436574132e-07, "loss": 0.0012, "step": 263720 }, { "epoch": 1.7350315454300245, "grad_norm": 0.0002995821153878746, "learning_rate": 5.2520687504026e-07, "loss": 0.0006, "step": 263730 }, { "epoch": 1.7350973336052578, "grad_norm": 0.0006614851769309873, "learning_rate": 5.249507654193892e-07, "loss": 0.0002, "step": 263740 }, { "epoch": 1.735163121780491, "grad_norm": 0.03341353343739199, "learning_rate": 5.246947147981779e-07, "loss": 0.0002, "step": 263750 }, { "epoch": 1.7352289099557245, "grad_norm": 0.011893882280429023, "learning_rate": 5.244387231800019e-07, "loss": 0.0003, "step": 263760 }, { "epoch": 1.735294698130958, "grad_norm": 0.04076638829299112, "learning_rate": 5.241827905682356e-07, "loss": 0.0002, "step": 263770 }, { "epoch": 1.7353604863061913, "grad_norm": 0.0132602035069602, "learning_rate": 5.239269169662537e-07, "loss": 0.0002, "step": 263780 }, { "epoch": 1.7354262744814246, "grad_norm": 0.03443512362197994, "learning_rate": 5.236711023774294e-07, "loss": 0.0011, "step": 263790 }, { "epoch": 1.7354920626566581, "grad_norm": 0.013940641534396846, "learning_rate": 5.234153468051356e-07, "loss": 0.001, "step": 263800 }, { "epoch": 1.7355578508318916, "grad_norm": 0.025861700379506088, "learning_rate": 5.231596502527448e-07, "loss": 0.0005, "step": 263810 }, { "epoch": 1.735623639007125, "grad_norm": 0.000683983175004229, "learning_rate": 5.229040127236268e-07, "loss": 0.0006, "step": 263820 }, { "epoch": 1.7356894271823582, "grad_norm": 0.06977873614386655, "learning_rate": 5.226484342211535e-07, "loss": 0.0005, "step": 263830 }, { "epoch": 1.7357552153575915, "grad_norm": 0.02132481625210008, "learning_rate": 5.223929147486928e-07, "loss": 0.0008, "step": 263840 }, { "epoch": 1.735821003532825, "grad_norm": 0.005783644084221353, "learning_rate": 5.221374543096141e-07, "loss": 0.0001, "step": 263850 }, { "epoch": 1.7358867917080585, "grad_norm": 0.0004702521923690074, "learning_rate": 5.218820529072866e-07, "loss": 0.0004, "step": 263860 }, { "epoch": 1.7359525798832918, "grad_norm": 0.022148099591323283, "learning_rate": 5.216267105450767e-07, "loss": 0.0006, "step": 263870 }, { "epoch": 1.736018368058525, "grad_norm": 0.006993329164328828, "learning_rate": 5.213714272263509e-07, "loss": 0.0019, "step": 263880 }, { "epoch": 1.7360841562337586, "grad_norm": 0.04044405343731779, "learning_rate": 5.211162029544747e-07, "loss": 0.0005, "step": 263890 }, { "epoch": 1.736149944408992, "grad_norm": 0.021035538246351976, "learning_rate": 5.208610377328127e-07, "loss": 0.0005, "step": 263900 }, { "epoch": 1.7362157325842253, "grad_norm": 0.02624109360581563, "learning_rate": 5.2060593156473e-07, "loss": 0.0002, "step": 263910 }, { "epoch": 1.7362815207594586, "grad_norm": 0.003480442575436558, "learning_rate": 5.203508844535893e-07, "loss": 0.0006, "step": 263920 }, { "epoch": 1.736347308934692, "grad_norm": 0.20171263450310684, "learning_rate": 5.200958964027536e-07, "loss": 0.0007, "step": 263930 }, { "epoch": 1.7364130971099254, "grad_norm": 0.004409211485686985, "learning_rate": 5.19840967415584e-07, "loss": 0.0005, "step": 263940 }, { "epoch": 1.736478885285159, "grad_norm": 0.023865522967397104, "learning_rate": 5.19586097495442e-07, "loss": 0.0007, "step": 263950 }, { "epoch": 1.7365446734603922, "grad_norm": 0.043868235867900486, "learning_rate": 5.193312866456879e-07, "loss": 0.0004, "step": 263960 }, { "epoch": 1.7366104616356255, "grad_norm": 0.006920774085114556, "learning_rate": 5.190765348696814e-07, "loss": 0.0003, "step": 263970 }, { "epoch": 1.736676249810859, "grad_norm": 0.07043782492310055, "learning_rate": 5.188218421707791e-07, "loss": 0.0005, "step": 263980 }, { "epoch": 1.7367420379860925, "grad_norm": 0.00015640342974455708, "learning_rate": 5.185672085523419e-07, "loss": 0.0002, "step": 263990 }, { "epoch": 1.7368078261613258, "grad_norm": 0.0005915931780983906, "learning_rate": 5.183126340177258e-07, "loss": 0.0008, "step": 264000 }, { "epoch": 1.736873614336559, "grad_norm": 0.006182163002687902, "learning_rate": 5.180581185702865e-07, "loss": 0.0004, "step": 264010 }, { "epoch": 1.7369394025117926, "grad_norm": 0.014248070699955153, "learning_rate": 5.178036622133808e-07, "loss": 0.0002, "step": 264020 }, { "epoch": 1.737005190687026, "grad_norm": 0.006443414497548389, "learning_rate": 5.175492649503622e-07, "loss": 0.0004, "step": 264030 }, { "epoch": 1.7370709788622594, "grad_norm": 0.0026938015456618174, "learning_rate": 5.172949267845856e-07, "loss": 0.0003, "step": 264040 }, { "epoch": 1.7371367670374926, "grad_norm": 0.007056165983845479, "learning_rate": 5.170406477194034e-07, "loss": 0.0001, "step": 264050 }, { "epoch": 1.737202555212726, "grad_norm": 0.04955591746643628, "learning_rate": 5.167864277581686e-07, "loss": 0.0003, "step": 264060 }, { "epoch": 1.7372683433879594, "grad_norm": 0.025927839575258772, "learning_rate": 5.165322669042333e-07, "loss": 0.0004, "step": 264070 }, { "epoch": 1.737334131563193, "grad_norm": 0.009980534394427743, "learning_rate": 5.162781651609472e-07, "loss": 0.0006, "step": 264080 }, { "epoch": 1.7373999197384262, "grad_norm": 0.013743289081832024, "learning_rate": 5.160241225316615e-07, "loss": 0.0011, "step": 264090 }, { "epoch": 1.7374657079136595, "grad_norm": 0.010122134548210107, "learning_rate": 5.157701390197251e-07, "loss": 0.0006, "step": 264100 }, { "epoch": 1.737531496088893, "grad_norm": 0.01980300479732631, "learning_rate": 5.155162146284853e-07, "loss": 0.0004, "step": 264110 }, { "epoch": 1.7375972842641265, "grad_norm": 0.09824677646318818, "learning_rate": 5.152623493612923e-07, "loss": 0.0004, "step": 264120 }, { "epoch": 1.7376630724393598, "grad_norm": 0.03340491379678395, "learning_rate": 5.150085432214925e-07, "loss": 0.0007, "step": 264130 }, { "epoch": 1.737728860614593, "grad_norm": 0.0005956926587596181, "learning_rate": 5.14754796212431e-07, "loss": 0.0003, "step": 264140 }, { "epoch": 1.7377946487898264, "grad_norm": 0.021517232101199768, "learning_rate": 5.14501108337454e-07, "loss": 0.0008, "step": 264150 }, { "epoch": 1.7378604369650599, "grad_norm": 0.00803448711266464, "learning_rate": 5.142474795999058e-07, "loss": 0.0005, "step": 264160 }, { "epoch": 1.7379262251402934, "grad_norm": 0.004193204709337224, "learning_rate": 5.139939100031304e-07, "loss": 0.0004, "step": 264170 }, { "epoch": 1.7379920133155267, "grad_norm": 0.0075913100079922, "learning_rate": 5.137403995504708e-07, "loss": 0.0004, "step": 264180 }, { "epoch": 1.73805780149076, "grad_norm": 0.0004949215232795032, "learning_rate": 5.134869482452697e-07, "loss": 0.0003, "step": 264190 }, { "epoch": 1.7381235896659935, "grad_norm": 0.04194044023078012, "learning_rate": 5.132335560908685e-07, "loss": 0.0007, "step": 264200 }, { "epoch": 1.738189377841227, "grad_norm": 0.004219225324786332, "learning_rate": 5.129802230906078e-07, "loss": 0.0004, "step": 264210 }, { "epoch": 1.7382551660164602, "grad_norm": 0.004786481333021994, "learning_rate": 5.127269492478277e-07, "loss": 0.0006, "step": 264220 }, { "epoch": 1.7383209541916935, "grad_norm": 0.00025072409570917243, "learning_rate": 5.12473734565867e-07, "loss": 0.0005, "step": 264230 }, { "epoch": 1.7383867423669268, "grad_norm": 0.019365721593491363, "learning_rate": 5.122205790480633e-07, "loss": 0.0005, "step": 264240 }, { "epoch": 1.7384525305421603, "grad_norm": 0.01927283476951932, "learning_rate": 5.119674826977566e-07, "loss": 0.0002, "step": 264250 }, { "epoch": 1.7385183187173938, "grad_norm": 0.07702263366322233, "learning_rate": 5.117144455182826e-07, "loss": 0.0008, "step": 264260 }, { "epoch": 1.738584106892627, "grad_norm": 0.004766925970686353, "learning_rate": 5.114614675129775e-07, "loss": 0.0004, "step": 264270 }, { "epoch": 1.7386498950678604, "grad_norm": 0.00829976951193856, "learning_rate": 5.112085486851759e-07, "loss": 0.0002, "step": 264280 }, { "epoch": 1.738715683243094, "grad_norm": 0.06610477448678186, "learning_rate": 5.109556890382133e-07, "loss": 0.0007, "step": 264290 }, { "epoch": 1.7387814714183274, "grad_norm": 0.00259481373092983, "learning_rate": 5.107028885754222e-07, "loss": 0.0005, "step": 264300 }, { "epoch": 1.7388472595935607, "grad_norm": 0.03300676203576092, "learning_rate": 5.104501473001372e-07, "loss": 0.0004, "step": 264310 }, { "epoch": 1.738913047768794, "grad_norm": 0.1594934461202485, "learning_rate": 5.101974652156888e-07, "loss": 0.0008, "step": 264320 }, { "epoch": 1.7389788359440275, "grad_norm": 0.009022281201076548, "learning_rate": 5.0994484232541e-07, "loss": 0.0005, "step": 264330 }, { "epoch": 1.7390446241192608, "grad_norm": 0.008989061195486509, "learning_rate": 5.096922786326298e-07, "loss": 0.0004, "step": 264340 }, { "epoch": 1.7391104122944943, "grad_norm": 0.0008574368076378768, "learning_rate": 5.094397741406793e-07, "loss": 0.0002, "step": 264350 }, { "epoch": 1.7391762004697275, "grad_norm": 0.007444677813062054, "learning_rate": 5.09187328852887e-07, "loss": 0.0003, "step": 264360 }, { "epoch": 1.7392419886449608, "grad_norm": 0.13441919322610663, "learning_rate": 5.089349427725804e-07, "loss": 0.0007, "step": 264370 }, { "epoch": 1.7393077768201943, "grad_norm": 0.0007287858656756665, "learning_rate": 5.086826159030884e-07, "loss": 0.0005, "step": 264380 }, { "epoch": 1.7393735649954278, "grad_norm": 0.00899924533331865, "learning_rate": 5.084303482477371e-07, "loss": 0.0003, "step": 264390 }, { "epoch": 1.7394393531706611, "grad_norm": 0.02871977973128141, "learning_rate": 5.081781398098528e-07, "loss": 0.0002, "step": 264400 }, { "epoch": 1.7395051413458944, "grad_norm": 0.004988860076833557, "learning_rate": 5.079259905927603e-07, "loss": 0.0002, "step": 264410 }, { "epoch": 1.739570929521128, "grad_norm": 0.05706550221236564, "learning_rate": 5.076739005997844e-07, "loss": 0.0005, "step": 264420 }, { "epoch": 1.7396367176963614, "grad_norm": 0.015601150049575577, "learning_rate": 5.074218698342475e-07, "loss": 0.0006, "step": 264430 }, { "epoch": 1.7397025058715947, "grad_norm": 0.042393169349389914, "learning_rate": 5.071698982994739e-07, "loss": 0.0009, "step": 264440 }, { "epoch": 1.739768294046828, "grad_norm": 0.014996737017190225, "learning_rate": 5.069179859987844e-07, "loss": 0.0006, "step": 264450 }, { "epoch": 1.7398340822220613, "grad_norm": 0.0526791917384166, "learning_rate": 5.06666132935501e-07, "loss": 0.0006, "step": 264460 }, { "epoch": 1.7398998703972948, "grad_norm": 0.006649683749854388, "learning_rate": 5.064143391129439e-07, "loss": 0.0006, "step": 264470 }, { "epoch": 1.7399656585725283, "grad_norm": 0.09183984485128788, "learning_rate": 5.061626045344331e-07, "loss": 0.0006, "step": 264480 }, { "epoch": 1.7400314467477616, "grad_norm": 0.0006071557256596639, "learning_rate": 5.059109292032871e-07, "loss": 0.0003, "step": 264490 }, { "epoch": 1.7400972349229948, "grad_norm": 0.009257188838173336, "learning_rate": 5.056593131228227e-07, "loss": 0.0002, "step": 264500 }, { "epoch": 1.7401630230982283, "grad_norm": 0.0007746644776397691, "learning_rate": 5.054077562963599e-07, "loss": 0.0004, "step": 264510 }, { "epoch": 1.7402288112734619, "grad_norm": 0.01477478027001336, "learning_rate": 5.051562587272146e-07, "loss": 0.0003, "step": 264520 }, { "epoch": 1.7402945994486951, "grad_norm": 0.03376982825298717, "learning_rate": 5.049048204187012e-07, "loss": 0.0004, "step": 264530 }, { "epoch": 1.7403603876239284, "grad_norm": 0.03870967114113154, "learning_rate": 5.04653441374136e-07, "loss": 0.0003, "step": 264540 }, { "epoch": 1.7404261757991617, "grad_norm": 0.00166414415395875, "learning_rate": 5.044021215968331e-07, "loss": 0.0013, "step": 264550 }, { "epoch": 1.7404919639743952, "grad_norm": 0.06126603615603389, "learning_rate": 5.041508610901047e-07, "loss": 0.0004, "step": 264560 }, { "epoch": 1.7405577521496287, "grad_norm": 0.008144163515440374, "learning_rate": 5.038996598572649e-07, "loss": 0.0003, "step": 264570 }, { "epoch": 1.740623540324862, "grad_norm": 0.07941574039472123, "learning_rate": 5.036485179016248e-07, "loss": 0.0004, "step": 264580 }, { "epoch": 1.7406893285000953, "grad_norm": 0.0494119717260274, "learning_rate": 5.033974352264959e-07, "loss": 0.0003, "step": 264590 }, { "epoch": 1.7407551166753288, "grad_norm": 0.050307178533289004, "learning_rate": 5.03146411835188e-07, "loss": 0.0003, "step": 264600 }, { "epoch": 1.7408209048505623, "grad_norm": 0.019761566983567198, "learning_rate": 5.02895447731011e-07, "loss": 0.0002, "step": 264610 }, { "epoch": 1.7408866930257956, "grad_norm": 0.02360604861708345, "learning_rate": 5.026445429172739e-07, "loss": 0.0002, "step": 264620 }, { "epoch": 1.7409524812010289, "grad_norm": 0.022870213062315993, "learning_rate": 5.023936973972832e-07, "loss": 0.0013, "step": 264630 }, { "epoch": 1.7410182693762624, "grad_norm": 0.05152355893919333, "learning_rate": 5.021429111743481e-07, "loss": 0.0004, "step": 264640 }, { "epoch": 1.7410840575514956, "grad_norm": 0.024042178073215847, "learning_rate": 5.018921842517738e-07, "loss": 0.0004, "step": 264650 }, { "epoch": 1.7411498457267292, "grad_norm": 0.04100587321994436, "learning_rate": 5.016415166328669e-07, "loss": 0.0003, "step": 264660 }, { "epoch": 1.7412156339019624, "grad_norm": 0.17214369548204214, "learning_rate": 5.013909083209312e-07, "loss": 0.0005, "step": 264670 }, { "epoch": 1.7412814220771957, "grad_norm": 0.005347181110884431, "learning_rate": 5.01140359319271e-07, "loss": 0.0003, "step": 264680 }, { "epoch": 1.7413472102524292, "grad_norm": 5.629483879868928e-05, "learning_rate": 5.008898696311898e-07, "loss": 0.0004, "step": 264690 }, { "epoch": 1.7414129984276627, "grad_norm": 0.016142284585732847, "learning_rate": 5.0063943925999e-07, "loss": 0.0003, "step": 264700 }, { "epoch": 1.741478786602896, "grad_norm": 0.004756439701246183, "learning_rate": 5.003890682089735e-07, "loss": 0.0005, "step": 264710 }, { "epoch": 1.7415445747781293, "grad_norm": 0.0013203040895527526, "learning_rate": 5.001387564814408e-07, "loss": 0.0007, "step": 264720 }, { "epoch": 1.7416103629533628, "grad_norm": 0.019762697212120723, "learning_rate": 4.998885040806923e-07, "loss": 0.0003, "step": 264730 }, { "epoch": 1.7416761511285963, "grad_norm": 0.031105771772417877, "learning_rate": 4.996383110100273e-07, "loss": 0.0003, "step": 264740 }, { "epoch": 1.7417419393038296, "grad_norm": 0.027536597026820844, "learning_rate": 4.993881772727443e-07, "loss": 0.0007, "step": 264750 }, { "epoch": 1.7418077274790629, "grad_norm": 0.03612523476729522, "learning_rate": 4.991381028721403e-07, "loss": 0.0004, "step": 264760 }, { "epoch": 1.7418735156542962, "grad_norm": 0.013885237315700984, "learning_rate": 4.988880878115138e-07, "loss": 0.0002, "step": 264770 }, { "epoch": 1.7419393038295297, "grad_norm": 0.0008019470777791206, "learning_rate": 4.986381320941614e-07, "loss": 0.0003, "step": 264780 }, { "epoch": 1.7420050920047632, "grad_norm": 0.03496792306029691, "learning_rate": 4.983882357233766e-07, "loss": 0.0005, "step": 264790 }, { "epoch": 1.7420708801799965, "grad_norm": 0.012553049164652576, "learning_rate": 4.981383987024558e-07, "loss": 0.0003, "step": 264800 }, { "epoch": 1.7421366683552297, "grad_norm": 0.0004732842018427569, "learning_rate": 4.978886210346917e-07, "loss": 0.0001, "step": 264810 }, { "epoch": 1.7422024565304632, "grad_norm": 0.000872160890898398, "learning_rate": 4.97638902723378e-07, "loss": 0.0003, "step": 264820 }, { "epoch": 1.7422682447056967, "grad_norm": 0.0371878555077513, "learning_rate": 4.973892437718069e-07, "loss": 0.0004, "step": 264830 }, { "epoch": 1.74233403288093, "grad_norm": 0.017909885336022432, "learning_rate": 4.971396441832699e-07, "loss": 0.0004, "step": 264840 }, { "epoch": 1.7423998210561633, "grad_norm": 0.0016173892532364322, "learning_rate": 4.968901039610579e-07, "loss": 0.0005, "step": 264850 }, { "epoch": 1.7424656092313966, "grad_norm": 0.004119849418876986, "learning_rate": 4.966406231084603e-07, "loss": 0.0003, "step": 264860 }, { "epoch": 1.74253139740663, "grad_norm": 0.03124544924831021, "learning_rate": 4.963912016287676e-07, "loss": 0.0005, "step": 264870 }, { "epoch": 1.7425971855818636, "grad_norm": 0.020208739885062766, "learning_rate": 4.961418395252665e-07, "loss": 0.0003, "step": 264880 }, { "epoch": 1.742662973757097, "grad_norm": 0.0032941413615201107, "learning_rate": 4.958925368012446e-07, "loss": 0.0006, "step": 264890 }, { "epoch": 1.7427287619323302, "grad_norm": 0.0018182379621178397, "learning_rate": 4.956432934599909e-07, "loss": 0.0003, "step": 264900 }, { "epoch": 1.7427945501075637, "grad_norm": 0.02855971849194523, "learning_rate": 4.953941095047898e-07, "loss": 0.0006, "step": 264910 }, { "epoch": 1.7428603382827972, "grad_norm": 0.03187432584021364, "learning_rate": 4.951449849389273e-07, "loss": 0.0004, "step": 264920 }, { "epoch": 1.7429261264580305, "grad_norm": 0.03628995977166779, "learning_rate": 4.948959197656872e-07, "loss": 0.0005, "step": 264930 }, { "epoch": 1.7429919146332638, "grad_norm": 0.015608854864968706, "learning_rate": 4.946469139883536e-07, "loss": 0.0005, "step": 264940 }, { "epoch": 1.7430577028084973, "grad_norm": 0.012934252090523991, "learning_rate": 4.943979676102095e-07, "loss": 0.0004, "step": 264950 }, { "epoch": 1.7431234909837305, "grad_norm": 0.0002968500948779198, "learning_rate": 4.941490806345373e-07, "loss": 0.0002, "step": 264960 }, { "epoch": 1.743189279158964, "grad_norm": 0.011880225869134889, "learning_rate": 4.939002530646175e-07, "loss": 0.0001, "step": 264970 }, { "epoch": 1.7432550673341973, "grad_norm": 0.008867979014606907, "learning_rate": 4.93651484903731e-07, "loss": 0.0002, "step": 264980 }, { "epoch": 1.7433208555094306, "grad_norm": 0.03150820065224538, "learning_rate": 4.934027761551579e-07, "loss": 0.0008, "step": 264990 }, { "epoch": 1.7433866436846641, "grad_norm": 0.0036698169101108784, "learning_rate": 4.931541268221768e-07, "loss": 0.0003, "step": 265000 }, { "epoch": 1.7434524318598976, "grad_norm": 0.034237438440599434, "learning_rate": 4.929055369080665e-07, "loss": 0.0005, "step": 265010 }, { "epoch": 1.743518220035131, "grad_norm": 0.07399902822578837, "learning_rate": 4.926570064161029e-07, "loss": 0.0004, "step": 265020 }, { "epoch": 1.7435840082103642, "grad_norm": 0.013108242706273952, "learning_rate": 4.92408535349565e-07, "loss": 0.0006, "step": 265030 }, { "epoch": 1.7436497963855977, "grad_norm": 0.045220198364389654, "learning_rate": 4.921601237117279e-07, "loss": 0.0004, "step": 265040 }, { "epoch": 1.7437155845608312, "grad_norm": 0.0009471039284553535, "learning_rate": 4.919117715058658e-07, "loss": 0.0005, "step": 265050 }, { "epoch": 1.7437813727360645, "grad_norm": 0.027029658893557587, "learning_rate": 4.916634787352537e-07, "loss": 0.0005, "step": 265060 }, { "epoch": 1.7438471609112978, "grad_norm": 0.0068637804471395935, "learning_rate": 4.914152454031651e-07, "loss": 0.0004, "step": 265070 }, { "epoch": 1.743912949086531, "grad_norm": 0.061100891822850056, "learning_rate": 4.911670715128725e-07, "loss": 0.0003, "step": 265080 }, { "epoch": 1.7439787372617646, "grad_norm": 0.00046537875243282103, "learning_rate": 4.909189570676482e-07, "loss": 0.0001, "step": 265090 }, { "epoch": 1.744044525436998, "grad_norm": 0.050393036993661926, "learning_rate": 4.906709020707629e-07, "loss": 0.0004, "step": 265100 }, { "epoch": 1.7441103136122313, "grad_norm": 0.013709201994085806, "learning_rate": 4.904229065254872e-07, "loss": 0.0004, "step": 265110 }, { "epoch": 1.7441761017874646, "grad_norm": 0.019914137851272728, "learning_rate": 4.901749704350911e-07, "loss": 0.0007, "step": 265120 }, { "epoch": 1.7442418899626981, "grad_norm": 0.04904502724303453, "learning_rate": 4.899270938028428e-07, "loss": 0.0007, "step": 265130 }, { "epoch": 1.7443076781379316, "grad_norm": 0.01836583901901465, "learning_rate": 4.896792766320107e-07, "loss": 0.0004, "step": 265140 }, { "epoch": 1.744373466313165, "grad_norm": 0.030120066293261912, "learning_rate": 4.894315189258608e-07, "loss": 0.0006, "step": 265150 }, { "epoch": 1.7444392544883982, "grad_norm": 0.009363607630130243, "learning_rate": 4.89183820687662e-07, "loss": 0.0005, "step": 265160 }, { "epoch": 1.7445050426636315, "grad_norm": 0.02075557732867845, "learning_rate": 4.889361819206784e-07, "loss": 0.0003, "step": 265170 }, { "epoch": 1.744570830838865, "grad_norm": 0.041370751415534736, "learning_rate": 4.886886026281756e-07, "loss": 0.0007, "step": 265180 }, { "epoch": 1.7446366190140985, "grad_norm": 0.014603125161793457, "learning_rate": 4.884410828134173e-07, "loss": 0.0005, "step": 265190 }, { "epoch": 1.7447024071893318, "grad_norm": 0.07853088230161585, "learning_rate": 4.881936224796668e-07, "loss": 0.0017, "step": 265200 }, { "epoch": 1.744768195364565, "grad_norm": 0.03077459781553694, "learning_rate": 4.879462216301867e-07, "loss": 0.0003, "step": 265210 }, { "epoch": 1.7448339835397986, "grad_norm": 0.015713461808396955, "learning_rate": 4.876988802682387e-07, "loss": 0.0001, "step": 265220 }, { "epoch": 1.744899771715032, "grad_norm": 0.004358679794256495, "learning_rate": 4.87451598397084e-07, "loss": 0.0002, "step": 265230 }, { "epoch": 1.7449655598902654, "grad_norm": 0.32357121162921526, "learning_rate": 4.87204376019983e-07, "loss": 0.0005, "step": 265240 }, { "epoch": 1.7450313480654986, "grad_norm": 0.172215771921758, "learning_rate": 4.86957213140194e-07, "loss": 0.0006, "step": 265250 }, { "epoch": 1.745097136240732, "grad_norm": 0.020959281200120354, "learning_rate": 4.867101097609772e-07, "loss": 0.0004, "step": 265260 }, { "epoch": 1.7451629244159654, "grad_norm": 0.03043084748616151, "learning_rate": 4.86463065885589e-07, "loss": 0.0003, "step": 265270 }, { "epoch": 1.745228712591199, "grad_norm": 0.0008774370577803572, "learning_rate": 4.862160815172867e-07, "loss": 0.0003, "step": 265280 }, { "epoch": 1.7452945007664322, "grad_norm": 0.011467218290543795, "learning_rate": 4.859691566593277e-07, "loss": 0.0004, "step": 265290 }, { "epoch": 1.7453602889416655, "grad_norm": 0.005951092595162912, "learning_rate": 4.857222913149673e-07, "loss": 0.001, "step": 265300 }, { "epoch": 1.745426077116899, "grad_norm": 0.0034613778958977365, "learning_rate": 4.854754854874594e-07, "loss": 0.0003, "step": 265310 }, { "epoch": 1.7454918652921325, "grad_norm": 0.01065976515791887, "learning_rate": 4.852287391800581e-07, "loss": 0.0004, "step": 265320 }, { "epoch": 1.7455576534673658, "grad_norm": 0.11222790497195709, "learning_rate": 4.849820523960164e-07, "loss": 0.0007, "step": 265330 }, { "epoch": 1.745623441642599, "grad_norm": 0.0004439208764876593, "learning_rate": 4.847354251385871e-07, "loss": 0.0002, "step": 265340 }, { "epoch": 1.7456892298178326, "grad_norm": 0.010706213090040563, "learning_rate": 4.844888574110218e-07, "loss": 0.0002, "step": 265350 }, { "epoch": 1.7457550179930659, "grad_norm": 0.044462454477354835, "learning_rate": 4.84242349216571e-07, "loss": 0.0017, "step": 265360 }, { "epoch": 1.7458208061682994, "grad_norm": 0.07801894122970784, "learning_rate": 4.839959005584849e-07, "loss": 0.0009, "step": 265370 }, { "epoch": 1.7458865943435327, "grad_norm": 0.011644306048422311, "learning_rate": 4.837495114400121e-07, "loss": 0.0002, "step": 265380 }, { "epoch": 1.745952382518766, "grad_norm": 0.00023680285003193973, "learning_rate": 4.83503181864402e-07, "loss": 0.0003, "step": 265390 }, { "epoch": 1.7460181706939994, "grad_norm": 0.06110597064559001, "learning_rate": 4.832569118349017e-07, "loss": 0.0005, "step": 265400 }, { "epoch": 1.746083958869233, "grad_norm": 0.026529747718244748, "learning_rate": 4.830107013547569e-07, "loss": 0.0015, "step": 265410 }, { "epoch": 1.7461497470444662, "grad_norm": 0.017425414024380324, "learning_rate": 4.82764550427216e-07, "loss": 0.0003, "step": 265420 }, { "epoch": 1.7462155352196995, "grad_norm": 0.01827944392460029, "learning_rate": 4.825184590555227e-07, "loss": 0.0003, "step": 265430 }, { "epoch": 1.746281323394933, "grad_norm": 0.024299574912761916, "learning_rate": 4.822724272429224e-07, "loss": 0.0002, "step": 265440 }, { "epoch": 1.7463471115701665, "grad_norm": 0.00646749568829759, "learning_rate": 4.820264549926584e-07, "loss": 0.0005, "step": 265450 }, { "epoch": 1.7464128997453998, "grad_norm": 0.05670369107974011, "learning_rate": 4.817805423079736e-07, "loss": 0.0003, "step": 265460 }, { "epoch": 1.746478687920633, "grad_norm": 0.0031169000082467368, "learning_rate": 4.815346891921103e-07, "loss": 0.0012, "step": 265470 }, { "epoch": 1.7465444760958664, "grad_norm": 0.0025088459496974457, "learning_rate": 4.812888956483092e-07, "loss": 0.0004, "step": 265480 }, { "epoch": 1.7466102642710999, "grad_norm": 0.07976082930255748, "learning_rate": 4.810431616798117e-07, "loss": 0.0006, "step": 265490 }, { "epoch": 1.7466760524463334, "grad_norm": 0.0016772043425581553, "learning_rate": 4.807974872898574e-07, "loss": 0.0004, "step": 265500 }, { "epoch": 1.7467418406215667, "grad_norm": 0.007038764877037531, "learning_rate": 4.805518724816854e-07, "loss": 0.0005, "step": 265510 }, { "epoch": 1.7468076287968, "grad_norm": 0.006781458288761796, "learning_rate": 4.803063172585331e-07, "loss": 0.0004, "step": 265520 }, { "epoch": 1.7468734169720335, "grad_norm": 0.048963782653140595, "learning_rate": 4.800608216236391e-07, "loss": 0.0005, "step": 265530 }, { "epoch": 1.746939205147267, "grad_norm": 0.002679643522265834, "learning_rate": 4.798153855802385e-07, "loss": 0.0011, "step": 265540 }, { "epoch": 1.7470049933225003, "grad_norm": 0.0048211849740295585, "learning_rate": 4.795700091315685e-07, "loss": 0.0008, "step": 265550 }, { "epoch": 1.7470707814977335, "grad_norm": 0.03297912704664047, "learning_rate": 4.793246922808642e-07, "loss": 0.0004, "step": 265560 }, { "epoch": 1.7471365696729668, "grad_norm": 0.015747320683278866, "learning_rate": 4.790794350313605e-07, "loss": 0.0002, "step": 265570 }, { "epoch": 1.7472023578482003, "grad_norm": 0.03473527615331911, "learning_rate": 4.788342373862892e-07, "loss": 0.0007, "step": 265580 }, { "epoch": 1.7472681460234338, "grad_norm": 0.012811654096723057, "learning_rate": 4.785890993488835e-07, "loss": 0.0004, "step": 265590 }, { "epoch": 1.7473339341986671, "grad_norm": 0.0009653553384446215, "learning_rate": 4.783440209223755e-07, "loss": 0.001, "step": 265600 }, { "epoch": 1.7473997223739004, "grad_norm": 0.012377309658394082, "learning_rate": 4.780990021099962e-07, "loss": 0.0004, "step": 265610 }, { "epoch": 1.747465510549134, "grad_norm": 0.001298743851509557, "learning_rate": 4.778540429149769e-07, "loss": 0.0002, "step": 265620 }, { "epoch": 1.7475312987243674, "grad_norm": 0.016181728885533625, "learning_rate": 4.776091433405461e-07, "loss": 0.0004, "step": 265630 }, { "epoch": 1.7475970868996007, "grad_norm": 0.000539655374695959, "learning_rate": 4.773643033899317e-07, "loss": 0.0003, "step": 265640 }, { "epoch": 1.747662875074834, "grad_norm": 0.08357113148866947, "learning_rate": 4.771195230663644e-07, "loss": 0.001, "step": 265650 }, { "epoch": 1.7477286632500675, "grad_norm": 0.02381399151990372, "learning_rate": 4.768748023730696e-07, "loss": 0.0004, "step": 265660 }, { "epoch": 1.7477944514253008, "grad_norm": 0.033967616207589896, "learning_rate": 4.766301413132746e-07, "loss": 0.0003, "step": 265670 }, { "epoch": 1.7478602396005343, "grad_norm": 0.009227278042979709, "learning_rate": 4.7638553989020395e-07, "loss": 0.0005, "step": 265680 }, { "epoch": 1.7479260277757676, "grad_norm": 0.08694618833250314, "learning_rate": 4.761409981070836e-07, "loss": 0.0005, "step": 265690 }, { "epoch": 1.7479918159510008, "grad_norm": 0.03457592929685862, "learning_rate": 4.7589651596713696e-07, "loss": 0.0005, "step": 265700 }, { "epoch": 1.7480576041262343, "grad_norm": 0.05764521477350928, "learning_rate": 4.756520934735875e-07, "loss": 0.0008, "step": 265710 }, { "epoch": 1.7481233923014678, "grad_norm": 0.01286902856512064, "learning_rate": 4.7540773062965726e-07, "loss": 0.0003, "step": 265720 }, { "epoch": 1.7481891804767011, "grad_norm": 0.005812852428384466, "learning_rate": 4.751634274385691e-07, "loss": 0.0004, "step": 265730 }, { "epoch": 1.7482549686519344, "grad_norm": 0.0061328072413605016, "learning_rate": 4.7491918390354277e-07, "loss": 0.0003, "step": 265740 }, { "epoch": 1.748320756827168, "grad_norm": 0.0075627522403565834, "learning_rate": 4.7467500002779886e-07, "loss": 0.0006, "step": 265750 }, { "epoch": 1.7483865450024014, "grad_norm": 0.05103980395634797, "learning_rate": 4.7443087581455717e-07, "loss": 0.0006, "step": 265760 }, { "epoch": 1.7484523331776347, "grad_norm": 0.055060642386525194, "learning_rate": 4.7418681126703445e-07, "loss": 0.0012, "step": 265770 }, { "epoch": 1.748518121352868, "grad_norm": 0.0374890985388681, "learning_rate": 4.7394280638845104e-07, "loss": 0.0005, "step": 265780 }, { "epoch": 1.7485839095281013, "grad_norm": 0.032020407339320704, "learning_rate": 4.736988611820226e-07, "loss": 0.0004, "step": 265790 }, { "epoch": 1.7486496977033348, "grad_norm": 0.06332288056924001, "learning_rate": 4.73454975650966e-07, "loss": 0.0008, "step": 265800 }, { "epoch": 1.7487154858785683, "grad_norm": 0.002941705606162784, "learning_rate": 4.732111497984959e-07, "loss": 0.0016, "step": 265810 }, { "epoch": 1.7487812740538016, "grad_norm": 0.04396331264662697, "learning_rate": 4.7296738362782703e-07, "loss": 0.0002, "step": 265820 }, { "epoch": 1.7488470622290349, "grad_norm": 0.037096572802701074, "learning_rate": 4.727236771421734e-07, "loss": 0.0004, "step": 265830 }, { "epoch": 1.7489128504042684, "grad_norm": 0.000893556933510667, "learning_rate": 4.72480030344748e-07, "loss": 0.0002, "step": 265840 }, { "epoch": 1.7489786385795019, "grad_norm": 0.1550779118295708, "learning_rate": 4.722364432387633e-07, "loss": 0.0009, "step": 265850 }, { "epoch": 1.7490444267547351, "grad_norm": 0.023541585825745767, "learning_rate": 4.7199291582743114e-07, "loss": 0.0004, "step": 265860 }, { "epoch": 1.7491102149299684, "grad_norm": 0.04327078645009849, "learning_rate": 4.7174944811396117e-07, "loss": 0.001, "step": 265870 }, { "epoch": 1.7491760031052017, "grad_norm": 0.02590516323925826, "learning_rate": 4.7150604010156364e-07, "loss": 0.0003, "step": 265880 }, { "epoch": 1.7492417912804352, "grad_norm": 0.01573784115051859, "learning_rate": 4.7126269179344876e-07, "loss": 0.0004, "step": 265890 }, { "epoch": 1.7493075794556687, "grad_norm": 0.04845158151408331, "learning_rate": 4.7101940319282223e-07, "loss": 0.002, "step": 265900 }, { "epoch": 1.749373367630902, "grad_norm": 0.0005000438322813273, "learning_rate": 4.707761743028949e-07, "loss": 0.0003, "step": 265910 }, { "epoch": 1.7494391558061353, "grad_norm": 0.0461484136198487, "learning_rate": 4.7053300512687194e-07, "loss": 0.0002, "step": 265920 }, { "epoch": 1.7495049439813688, "grad_norm": 0.04956030733407797, "learning_rate": 4.702898956679591e-07, "loss": 0.0014, "step": 265930 }, { "epoch": 1.7495707321566023, "grad_norm": 0.011451844889959614, "learning_rate": 4.700468459293622e-07, "loss": 0.0002, "step": 265940 }, { "epoch": 1.7496365203318356, "grad_norm": 0.017519844311593778, "learning_rate": 4.698038559142848e-07, "loss": 0.0003, "step": 265950 }, { "epoch": 1.7497023085070689, "grad_norm": 0.0770614554116748, "learning_rate": 4.6956092562593146e-07, "loss": 0.0009, "step": 265960 }, { "epoch": 1.7497680966823024, "grad_norm": 0.026741862594490635, "learning_rate": 4.693180550675047e-07, "loss": 0.0005, "step": 265970 }, { "epoch": 1.7498338848575357, "grad_norm": 0.018879754417404294, "learning_rate": 4.690752442422064e-07, "loss": 0.0004, "step": 265980 }, { "epoch": 1.7498996730327692, "grad_norm": 0.021827656732190438, "learning_rate": 4.688324931532373e-07, "loss": 0.0003, "step": 265990 }, { "epoch": 1.7499654612080024, "grad_norm": 0.061692014677369775, "learning_rate": 4.6858980180379875e-07, "loss": 0.0006, "step": 266000 }, { "epoch": 1.7500312493832357, "grad_norm": 0.033518999576764776, "learning_rate": 4.683471701970904e-07, "loss": 0.0004, "step": 266010 }, { "epoch": 1.7500970375584692, "grad_norm": 0.00039284964669339953, "learning_rate": 4.6810459833631027e-07, "loss": 0.001, "step": 266020 }, { "epoch": 1.7501628257337027, "grad_norm": 0.04760518786815161, "learning_rate": 4.6786208622465633e-07, "loss": 0.0006, "step": 266030 }, { "epoch": 1.750228613908936, "grad_norm": 0.0018575932539995552, "learning_rate": 4.6761963386532774e-07, "loss": 0.0003, "step": 266040 }, { "epoch": 1.7502944020841693, "grad_norm": 0.08613644170082432, "learning_rate": 4.673772412615196e-07, "loss": 0.0006, "step": 266050 }, { "epoch": 1.7503601902594028, "grad_norm": 0.029843746994654474, "learning_rate": 4.6713490841642786e-07, "loss": 0.0007, "step": 266060 }, { "epoch": 1.7504259784346363, "grad_norm": 0.0007174353556195373, "learning_rate": 4.668926353332476e-07, "loss": 0.0004, "step": 266070 }, { "epoch": 1.7504917666098696, "grad_norm": 0.04792936005675798, "learning_rate": 4.6665042201517296e-07, "loss": 0.0007, "step": 266080 }, { "epoch": 1.7505575547851029, "grad_norm": 0.0032614529902755597, "learning_rate": 4.66408268465397e-07, "loss": 0.0005, "step": 266090 }, { "epoch": 1.7506233429603362, "grad_norm": 0.0028678943303904244, "learning_rate": 4.661661746871127e-07, "loss": 0.0005, "step": 266100 }, { "epoch": 1.7506891311355697, "grad_norm": 0.04026193188207906, "learning_rate": 4.659241406835119e-07, "loss": 0.0003, "step": 266110 }, { "epoch": 1.7507549193108032, "grad_norm": 0.16574299310983756, "learning_rate": 4.656821664577854e-07, "loss": 0.0012, "step": 266120 }, { "epoch": 1.7508207074860365, "grad_norm": 0.004013088413618492, "learning_rate": 4.6544025201312295e-07, "loss": 0.0002, "step": 266130 }, { "epoch": 1.7508864956612697, "grad_norm": 0.007914410447970168, "learning_rate": 4.6519839735271523e-07, "loss": 0.0006, "step": 266140 }, { "epoch": 1.7509522838365033, "grad_norm": 0.00011875860954478226, "learning_rate": 4.6495660247974973e-07, "loss": 0.0008, "step": 266150 }, { "epoch": 1.7510180720117368, "grad_norm": 0.05863804398862177, "learning_rate": 4.6471486739741336e-07, "loss": 0.0002, "step": 266160 }, { "epoch": 1.75108386018697, "grad_norm": 0.025638698335097264, "learning_rate": 4.644731921088963e-07, "loss": 0.0008, "step": 266170 }, { "epoch": 1.7511496483622033, "grad_norm": 0.006468401179615445, "learning_rate": 4.6423157661738217e-07, "loss": 0.0003, "step": 266180 }, { "epoch": 1.7512154365374366, "grad_norm": 0.06748636805115948, "learning_rate": 4.639900209260578e-07, "loss": 0.0005, "step": 266190 }, { "epoch": 1.7512812247126701, "grad_norm": 0.007443272912272683, "learning_rate": 4.6374852503810796e-07, "loss": 0.0005, "step": 266200 }, { "epoch": 1.7513470128879036, "grad_norm": 0.042313097616293205, "learning_rate": 4.635070889567156e-07, "loss": 0.0004, "step": 266210 }, { "epoch": 1.751412801063137, "grad_norm": 0.09365762524854071, "learning_rate": 4.6326571268506426e-07, "loss": 0.0005, "step": 266220 }, { "epoch": 1.7514785892383702, "grad_norm": 0.04293324043916473, "learning_rate": 4.6302439622633587e-07, "loss": 0.0005, "step": 266230 }, { "epoch": 1.7515443774136037, "grad_norm": 0.009158825874100412, "learning_rate": 4.6278313958371293e-07, "loss": 0.0002, "step": 266240 }, { "epoch": 1.7516101655888372, "grad_norm": 0.00217424981624825, "learning_rate": 4.6254194276037556e-07, "loss": 0.0003, "step": 266250 }, { "epoch": 1.7516759537640705, "grad_norm": 0.0023157914564468566, "learning_rate": 4.6230080575950407e-07, "loss": 0.0007, "step": 266260 }, { "epoch": 1.7517417419393038, "grad_norm": 0.05746517834865622, "learning_rate": 4.620597285842776e-07, "loss": 0.0004, "step": 266270 }, { "epoch": 1.751807530114537, "grad_norm": 0.031154292718475095, "learning_rate": 4.6181871123787414e-07, "loss": 0.0004, "step": 266280 }, { "epoch": 1.7518733182897706, "grad_norm": 0.001341122284424988, "learning_rate": 4.6157775372347057e-07, "loss": 0.0006, "step": 266290 }, { "epoch": 1.751939106465004, "grad_norm": 0.017054931607288278, "learning_rate": 4.613368560442455e-07, "loss": 0.0003, "step": 266300 }, { "epoch": 1.7520048946402373, "grad_norm": 0.004657449657234677, "learning_rate": 4.610960182033741e-07, "loss": 0.0007, "step": 266310 }, { "epoch": 1.7520706828154706, "grad_norm": 0.03218645742793522, "learning_rate": 4.608552402040323e-07, "loss": 0.0005, "step": 266320 }, { "epoch": 1.7521364709907041, "grad_norm": 0.00741794915381201, "learning_rate": 4.6061452204939296e-07, "loss": 0.0005, "step": 266330 }, { "epoch": 1.7522022591659376, "grad_norm": 0.03527876557752288, "learning_rate": 4.603738637426314e-07, "loss": 0.0006, "step": 266340 }, { "epoch": 1.752268047341171, "grad_norm": 0.053202002572656885, "learning_rate": 4.601332652869195e-07, "loss": 0.001, "step": 266350 }, { "epoch": 1.7523338355164042, "grad_norm": 0.01336362248057544, "learning_rate": 4.598927266854297e-07, "loss": 0.0003, "step": 266360 }, { "epoch": 1.7523996236916377, "grad_norm": 0.0019829600728428452, "learning_rate": 4.596522479413329e-07, "loss": 0.0002, "step": 266370 }, { "epoch": 1.7524654118668712, "grad_norm": 0.08117696779886784, "learning_rate": 4.5941182905780034e-07, "loss": 0.0005, "step": 266380 }, { "epoch": 1.7525312000421045, "grad_norm": 0.0011421728004031907, "learning_rate": 4.591714700380007e-07, "loss": 0.0004, "step": 266390 }, { "epoch": 1.7525969882173378, "grad_norm": 0.04155948398535535, "learning_rate": 4.5893117088510406e-07, "loss": 0.0005, "step": 266400 }, { "epoch": 1.752662776392571, "grad_norm": 0.01362993792664804, "learning_rate": 4.5869093160227804e-07, "loss": 0.0002, "step": 266410 }, { "epoch": 1.7527285645678046, "grad_norm": 0.03280292957694304, "learning_rate": 4.58450752192689e-07, "loss": 0.0007, "step": 266420 }, { "epoch": 1.752794352743038, "grad_norm": 0.004432890838763021, "learning_rate": 4.582106326595048e-07, "loss": 0.0005, "step": 266430 }, { "epoch": 1.7528601409182714, "grad_norm": 0.021237818404289344, "learning_rate": 4.579705730058914e-07, "loss": 0.0003, "step": 266440 }, { "epoch": 1.7529259290935046, "grad_norm": 0.011228140538933937, "learning_rate": 4.577305732350129e-07, "loss": 0.0005, "step": 266450 }, { "epoch": 1.7529917172687381, "grad_norm": 0.0011317587772447992, "learning_rate": 4.5749063335003453e-07, "loss": 0.0003, "step": 266460 }, { "epoch": 1.7530575054439717, "grad_norm": 0.02462805149407466, "learning_rate": 4.572507533541182e-07, "loss": 0.0002, "step": 266470 }, { "epoch": 1.753123293619205, "grad_norm": 0.06790686478570374, "learning_rate": 4.5701093325042745e-07, "loss": 0.0008, "step": 266480 }, { "epoch": 1.7531890817944382, "grad_norm": 0.019210324551340544, "learning_rate": 4.5677117304212427e-07, "loss": 0.0004, "step": 266490 }, { "epoch": 1.7532548699696715, "grad_norm": 0.02141805856965813, "learning_rate": 4.5653147273236943e-07, "loss": 0.0004, "step": 266500 }, { "epoch": 1.753320658144905, "grad_norm": 0.02973211042852533, "learning_rate": 4.562918323243232e-07, "loss": 0.0003, "step": 266510 }, { "epoch": 1.7533864463201385, "grad_norm": 0.01890410060665318, "learning_rate": 4.560522518211447e-07, "loss": 0.0005, "step": 266520 }, { "epoch": 1.7534522344953718, "grad_norm": 0.0018916611080227546, "learning_rate": 4.558127312259925e-07, "loss": 0.0002, "step": 266530 }, { "epoch": 1.753518022670605, "grad_norm": 0.03156274773311929, "learning_rate": 4.5557327054202517e-07, "loss": 0.0004, "step": 266540 }, { "epoch": 1.7535838108458386, "grad_norm": 0.009794017820865555, "learning_rate": 4.553338697723986e-07, "loss": 0.0002, "step": 266550 }, { "epoch": 1.753649599021072, "grad_norm": 0.012662610595059649, "learning_rate": 4.5509452892027075e-07, "loss": 0.0008, "step": 266560 }, { "epoch": 1.7537153871963054, "grad_norm": 0.0006357054742731647, "learning_rate": 4.548552479887963e-07, "loss": 0.0003, "step": 266570 }, { "epoch": 1.7537811753715387, "grad_norm": 0.036561558949463016, "learning_rate": 4.5461602698113003e-07, "loss": 0.0007, "step": 266580 }, { "epoch": 1.753846963546772, "grad_norm": 0.011966878779057036, "learning_rate": 4.5437686590042607e-07, "loss": 0.0006, "step": 266590 }, { "epoch": 1.7539127517220054, "grad_norm": 0.010952617012213967, "learning_rate": 4.541377647498368e-07, "loss": 0.0003, "step": 266600 }, { "epoch": 1.753978539897239, "grad_norm": 0.006773726100917271, "learning_rate": 4.538987235325148e-07, "loss": 0.0004, "step": 266610 }, { "epoch": 1.7540443280724722, "grad_norm": 0.009921112295772994, "learning_rate": 4.536597422516126e-07, "loss": 0.0005, "step": 266620 }, { "epoch": 1.7541101162477055, "grad_norm": 0.020186327670076575, "learning_rate": 4.5342082091027974e-07, "loss": 0.001, "step": 266630 }, { "epoch": 1.754175904422939, "grad_norm": 0.06040565968694197, "learning_rate": 4.531819595116671e-07, "loss": 0.0002, "step": 266640 }, { "epoch": 1.7542416925981725, "grad_norm": 0.05559254676066712, "learning_rate": 4.5294315805892273e-07, "loss": 0.0003, "step": 266650 }, { "epoch": 1.7543074807734058, "grad_norm": 0.00015341811141632585, "learning_rate": 4.527044165551964e-07, "loss": 0.0005, "step": 266660 }, { "epoch": 1.754373268948639, "grad_norm": 0.0455229079334441, "learning_rate": 4.524657350036349e-07, "loss": 0.0004, "step": 266670 }, { "epoch": 1.7544390571238726, "grad_norm": 0.014280296990601836, "learning_rate": 4.522271134073841e-07, "loss": 0.0003, "step": 266680 }, { "epoch": 1.7545048452991059, "grad_norm": 0.032023635067706537, "learning_rate": 4.519885517695921e-07, "loss": 0.0003, "step": 266690 }, { "epoch": 1.7545706334743394, "grad_norm": 0.0024683251690385765, "learning_rate": 4.51750050093403e-07, "loss": 0.0004, "step": 266700 }, { "epoch": 1.7546364216495727, "grad_norm": 0.010935417466902847, "learning_rate": 4.515116083819615e-07, "loss": 0.0001, "step": 266710 }, { "epoch": 1.754702209824806, "grad_norm": 0.00038375627554949535, "learning_rate": 4.512732266384107e-07, "loss": 0.0008, "step": 266720 }, { "epoch": 1.7547679980000395, "grad_norm": 0.04825722301119273, "learning_rate": 4.510349048658946e-07, "loss": 0.0004, "step": 266730 }, { "epoch": 1.754833786175273, "grad_norm": 0.0002888227987347525, "learning_rate": 4.507966430675542e-07, "loss": 0.0002, "step": 266740 }, { "epoch": 1.7548995743505063, "grad_norm": 0.026823096026931958, "learning_rate": 4.5055844124653083e-07, "loss": 0.0003, "step": 266750 }, { "epoch": 1.7549653625257395, "grad_norm": 0.004434092830047978, "learning_rate": 4.503202994059658e-07, "loss": 0.0005, "step": 266760 }, { "epoch": 1.755031150700973, "grad_norm": 0.07952843633046187, "learning_rate": 4.5008221754899775e-07, "loss": 0.0003, "step": 266770 }, { "epoch": 1.7550969388762065, "grad_norm": 0.06218117987508557, "learning_rate": 4.4984419567876634e-07, "loss": 0.0003, "step": 266780 }, { "epoch": 1.7551627270514398, "grad_norm": 0.006308672397445452, "learning_rate": 4.496062337984092e-07, "loss": 0.0002, "step": 266790 }, { "epoch": 1.7552285152266731, "grad_norm": 0.0008902334993587522, "learning_rate": 4.4936833191106423e-07, "loss": 0.0003, "step": 266800 }, { "epoch": 1.7552943034019064, "grad_norm": 0.03443178956637341, "learning_rate": 4.4913049001986676e-07, "loss": 0.0003, "step": 266810 }, { "epoch": 1.75536009157714, "grad_norm": 0.014610415043471275, "learning_rate": 4.488927081279537e-07, "loss": 0.0002, "step": 266820 }, { "epoch": 1.7554258797523734, "grad_norm": 0.017633062828276393, "learning_rate": 4.486549862384604e-07, "loss": 0.0003, "step": 266830 }, { "epoch": 1.7554916679276067, "grad_norm": 0.012606814305938935, "learning_rate": 4.484173243545198e-07, "loss": 0.0008, "step": 266840 }, { "epoch": 1.75555745610284, "grad_norm": 0.0026974338581581725, "learning_rate": 4.481797224792661e-07, "loss": 0.0004, "step": 266850 }, { "epoch": 1.7556232442780735, "grad_norm": 0.09752158368581758, "learning_rate": 4.479421806158313e-07, "loss": 0.0005, "step": 266860 }, { "epoch": 1.755689032453307, "grad_norm": 0.25537361746481263, "learning_rate": 4.477046987673478e-07, "loss": 0.0005, "step": 266870 }, { "epoch": 1.7557548206285403, "grad_norm": 0.0010958282898766785, "learning_rate": 4.474672769369459e-07, "loss": 0.0001, "step": 266880 }, { "epoch": 1.7558206088037736, "grad_norm": 0.023165221113916976, "learning_rate": 4.4722991512775647e-07, "loss": 0.0004, "step": 266890 }, { "epoch": 1.7558863969790068, "grad_norm": 0.006664592107378389, "learning_rate": 4.469926133429081e-07, "loss": 0.0003, "step": 266900 }, { "epoch": 1.7559521851542403, "grad_norm": 0.009326740506001139, "learning_rate": 4.4675537158552995e-07, "loss": 0.0004, "step": 266910 }, { "epoch": 1.7560179733294738, "grad_norm": 0.06637072782166871, "learning_rate": 4.4651818985875006e-07, "loss": 0.0007, "step": 266920 }, { "epoch": 1.7560837615047071, "grad_norm": 0.019134934791030782, "learning_rate": 4.462810681656954e-07, "loss": 0.0003, "step": 266930 }, { "epoch": 1.7561495496799404, "grad_norm": 0.0025835302774811827, "learning_rate": 4.4604400650949065e-07, "loss": 0.0014, "step": 266940 }, { "epoch": 1.756215337855174, "grad_norm": 0.0008067294421374894, "learning_rate": 4.4580700489326335e-07, "loss": 0.0003, "step": 266950 }, { "epoch": 1.7562811260304074, "grad_norm": 0.002194612781595368, "learning_rate": 4.4557006332013817e-07, "loss": 0.0002, "step": 266960 }, { "epoch": 1.7563469142056407, "grad_norm": 0.07234997160394477, "learning_rate": 4.453331817932377e-07, "loss": 0.0004, "step": 266970 }, { "epoch": 1.756412702380874, "grad_norm": 0.0003939138610150146, "learning_rate": 4.450963603156855e-07, "loss": 0.0004, "step": 266980 }, { "epoch": 1.7564784905561075, "grad_norm": 0.028933672285881857, "learning_rate": 4.4485959889060406e-07, "loss": 0.0003, "step": 266990 }, { "epoch": 1.7565442787313408, "grad_norm": 0.015964967866719994, "learning_rate": 4.446228975211148e-07, "loss": 0.0002, "step": 267000 }, { "epoch": 1.7566100669065743, "grad_norm": 0.057439331964491004, "learning_rate": 4.443862562103385e-07, "loss": 0.0006, "step": 267010 }, { "epoch": 1.7566758550818076, "grad_norm": 0.010339703036744488, "learning_rate": 4.4414967496139494e-07, "loss": 0.0003, "step": 267020 }, { "epoch": 1.7567416432570409, "grad_norm": 0.013197992739204753, "learning_rate": 4.439131537774027e-07, "loss": 0.0005, "step": 267030 }, { "epoch": 1.7568074314322744, "grad_norm": 0.03191504919330956, "learning_rate": 4.4367669266148104e-07, "loss": 0.0005, "step": 267040 }, { "epoch": 1.7568732196075079, "grad_norm": 0.0005920481453298281, "learning_rate": 4.434402916167468e-07, "loss": 0.0004, "step": 267050 }, { "epoch": 1.7569390077827411, "grad_norm": 0.009077572638140944, "learning_rate": 4.432039506463176e-07, "loss": 0.0007, "step": 267060 }, { "epoch": 1.7570047959579744, "grad_norm": 0.033569128455206626, "learning_rate": 4.429676697533075e-07, "loss": 0.0002, "step": 267070 }, { "epoch": 1.757070584133208, "grad_norm": 0.004806120328725227, "learning_rate": 4.4273144894083407e-07, "loss": 0.0009, "step": 267080 }, { "epoch": 1.7571363723084414, "grad_norm": 0.11300880196123997, "learning_rate": 4.424952882120104e-07, "loss": 0.0004, "step": 267090 }, { "epoch": 1.7572021604836747, "grad_norm": 0.06628872833681698, "learning_rate": 4.4225918756995e-07, "loss": 0.0002, "step": 267100 }, { "epoch": 1.757267948658908, "grad_norm": 0.1626953163957972, "learning_rate": 4.4202314701776606e-07, "loss": 0.0015, "step": 267110 }, { "epoch": 1.7573337368341413, "grad_norm": 0.1041504059256684, "learning_rate": 4.4178716655857045e-07, "loss": 0.0005, "step": 267120 }, { "epoch": 1.7573995250093748, "grad_norm": 0.16602723699716693, "learning_rate": 4.415512461954746e-07, "loss": 0.0007, "step": 267130 }, { "epoch": 1.7574653131846083, "grad_norm": 0.02853825128102618, "learning_rate": 4.413153859315883e-07, "loss": 0.0003, "step": 267140 }, { "epoch": 1.7575311013598416, "grad_norm": 0.060836211602957456, "learning_rate": 4.410795857700217e-07, "loss": 0.0002, "step": 267150 }, { "epoch": 1.7575968895350749, "grad_norm": 0.11379240793100975, "learning_rate": 4.4084384571388307e-07, "loss": 0.0006, "step": 267160 }, { "epoch": 1.7576626777103084, "grad_norm": 0.02576170354373114, "learning_rate": 4.4060816576628084e-07, "loss": 0.0004, "step": 267170 }, { "epoch": 1.7577284658855419, "grad_norm": 0.06795685897884063, "learning_rate": 4.4037254593032264e-07, "loss": 0.0011, "step": 267180 }, { "epoch": 1.7577942540607752, "grad_norm": 0.044633570447500964, "learning_rate": 4.4013698620911427e-07, "loss": 0.0015, "step": 267190 }, { "epoch": 1.7578600422360084, "grad_norm": 0.015406979962895794, "learning_rate": 4.399014866057605e-07, "loss": 0.0003, "step": 267200 }, { "epoch": 1.7579258304112417, "grad_norm": 0.014325382193786436, "learning_rate": 4.3966604712336826e-07, "loss": 0.0003, "step": 267210 }, { "epoch": 1.7579916185864752, "grad_norm": 0.09744332703081464, "learning_rate": 4.3943066776504063e-07, "loss": 0.0006, "step": 267220 }, { "epoch": 1.7580574067617087, "grad_norm": 0.12110861295496964, "learning_rate": 4.3919534853388066e-07, "loss": 0.0007, "step": 267230 }, { "epoch": 1.758123194936942, "grad_norm": 0.005268992903816428, "learning_rate": 4.389600894329915e-07, "loss": 0.0003, "step": 267240 }, { "epoch": 1.7581889831121753, "grad_norm": 0.020720043940982508, "learning_rate": 4.3872489046547394e-07, "loss": 0.0004, "step": 267250 }, { "epoch": 1.7582547712874088, "grad_norm": 0.12401003008263732, "learning_rate": 4.3848975163442995e-07, "loss": 0.0021, "step": 267260 }, { "epoch": 1.7583205594626423, "grad_norm": 0.02500554791933702, "learning_rate": 4.3825467294295874e-07, "loss": 0.0017, "step": 267270 }, { "epoch": 1.7583863476378756, "grad_norm": 0.00034059910407803696, "learning_rate": 4.380196543941595e-07, "loss": 0.0003, "step": 267280 }, { "epoch": 1.7584521358131089, "grad_norm": 0.07433530729477407, "learning_rate": 4.377846959911314e-07, "loss": 0.0005, "step": 267290 }, { "epoch": 1.7585179239883424, "grad_norm": 0.008429557353613677, "learning_rate": 4.375497977369719e-07, "loss": 0.0008, "step": 267300 }, { "epoch": 1.7585837121635757, "grad_norm": 0.02262081788153234, "learning_rate": 4.373149596347781e-07, "loss": 0.0004, "step": 267310 }, { "epoch": 1.7586495003388092, "grad_norm": 0.02821923138220115, "learning_rate": 4.3708018168764577e-07, "loss": 0.0002, "step": 267320 }, { "epoch": 1.7587152885140425, "grad_norm": 0.006702859363323392, "learning_rate": 4.3684546389866967e-07, "loss": 0.0004, "step": 267330 }, { "epoch": 1.7587810766892757, "grad_norm": 0.0021107660445244397, "learning_rate": 4.3661080627094567e-07, "loss": 0.0001, "step": 267340 }, { "epoch": 1.7588468648645093, "grad_norm": 0.0005302678867105564, "learning_rate": 4.3637620880756736e-07, "loss": 0.0005, "step": 267350 }, { "epoch": 1.7589126530397428, "grad_norm": 0.0007324312139266719, "learning_rate": 4.3614167151162734e-07, "loss": 0.0003, "step": 267360 }, { "epoch": 1.758978441214976, "grad_norm": 0.0010402493765950844, "learning_rate": 4.359071943862175e-07, "loss": 0.0005, "step": 267370 }, { "epoch": 1.7590442293902093, "grad_norm": 0.06800514843997436, "learning_rate": 4.356727774344294e-07, "loss": 0.0004, "step": 267380 }, { "epoch": 1.7591100175654428, "grad_norm": 0.048103623305688716, "learning_rate": 4.3543842065935424e-07, "loss": 0.0005, "step": 267390 }, { "epoch": 1.7591758057406763, "grad_norm": 0.005335062861586939, "learning_rate": 4.3520412406408086e-07, "loss": 0.0003, "step": 267400 }, { "epoch": 1.7592415939159096, "grad_norm": 0.016850463020397784, "learning_rate": 4.349698876516989e-07, "loss": 0.0005, "step": 267410 }, { "epoch": 1.759307382091143, "grad_norm": 0.017117460090719863, "learning_rate": 4.3473571142529645e-07, "loss": 0.0003, "step": 267420 }, { "epoch": 1.7593731702663762, "grad_norm": 0.010134597262346865, "learning_rate": 4.3450159538796057e-07, "loss": 0.0004, "step": 267430 }, { "epoch": 1.7594389584416097, "grad_norm": 0.03294843928418622, "learning_rate": 4.3426753954277813e-07, "loss": 0.0004, "step": 267440 }, { "epoch": 1.7595047466168432, "grad_norm": 0.0001842168914833652, "learning_rate": 4.3403354389283505e-07, "loss": 0.0006, "step": 267450 }, { "epoch": 1.7595705347920765, "grad_norm": 0.043250098071098, "learning_rate": 4.337996084412155e-07, "loss": 0.0002, "step": 267460 }, { "epoch": 1.7596363229673098, "grad_norm": 0.015446078922941375, "learning_rate": 4.3356573319100595e-07, "loss": 0.0002, "step": 267470 }, { "epoch": 1.7597021111425433, "grad_norm": 0.05525300689419061, "learning_rate": 4.3333191814528775e-07, "loss": 0.0003, "step": 267480 }, { "epoch": 1.7597678993177768, "grad_norm": 0.002748602167106526, "learning_rate": 4.3309816330714403e-07, "loss": 0.0003, "step": 267490 }, { "epoch": 1.75983368749301, "grad_norm": 0.02187952060151577, "learning_rate": 4.3286446867965684e-07, "loss": 0.0004, "step": 267500 }, { "epoch": 1.7598994756682433, "grad_norm": 0.1895298976838522, "learning_rate": 4.3263083426590745e-07, "loss": 0.0006, "step": 267510 }, { "epoch": 1.7599652638434766, "grad_norm": 0.0022837933190268354, "learning_rate": 4.3239726006897574e-07, "loss": 0.0002, "step": 267520 }, { "epoch": 1.7600310520187101, "grad_norm": 0.054958952715484656, "learning_rate": 4.321637460919409e-07, "loss": 0.0008, "step": 267530 }, { "epoch": 1.7600968401939436, "grad_norm": 0.0035547331888753126, "learning_rate": 4.3193029233788264e-07, "loss": 0.0006, "step": 267540 }, { "epoch": 1.760162628369177, "grad_norm": 0.0021513956421567253, "learning_rate": 4.3169689880987796e-07, "loss": 0.0004, "step": 267550 }, { "epoch": 1.7602284165444102, "grad_norm": 0.0013593370907008162, "learning_rate": 4.314635655110039e-07, "loss": 0.0006, "step": 267560 }, { "epoch": 1.7602942047196437, "grad_norm": 0.006665218594231619, "learning_rate": 4.312302924443368e-07, "loss": 0.0004, "step": 267570 }, { "epoch": 1.7603599928948772, "grad_norm": 0.0002930747580591255, "learning_rate": 4.3099707961295314e-07, "loss": 0.0002, "step": 267580 }, { "epoch": 1.7604257810701105, "grad_norm": 0.022454799967203565, "learning_rate": 4.3076392701992653e-07, "loss": 0.0017, "step": 267590 }, { "epoch": 1.7604915692453438, "grad_norm": 0.01743309903124522, "learning_rate": 4.305308346683318e-07, "loss": 0.0004, "step": 267600 }, { "epoch": 1.760557357420577, "grad_norm": 0.120609120975483, "learning_rate": 4.3029780256124144e-07, "loss": 0.0005, "step": 267610 }, { "epoch": 1.7606231455958106, "grad_norm": 0.11288476233002419, "learning_rate": 4.3006483070172743e-07, "loss": 0.0007, "step": 267620 }, { "epoch": 1.760688933771044, "grad_norm": 0.0007037718043052461, "learning_rate": 4.298319190928624e-07, "loss": 0.0006, "step": 267630 }, { "epoch": 1.7607547219462774, "grad_norm": 0.04178474434585823, "learning_rate": 4.2959906773771597e-07, "loss": 0.0003, "step": 267640 }, { "epoch": 1.7608205101215106, "grad_norm": 0.022252172249362955, "learning_rate": 4.2936627663935913e-07, "loss": 0.0004, "step": 267650 }, { "epoch": 1.7608862982967441, "grad_norm": 0.008690874068636065, "learning_rate": 4.2913354580086e-07, "loss": 0.0003, "step": 267660 }, { "epoch": 1.7609520864719777, "grad_norm": 0.0010433249905882254, "learning_rate": 4.2890087522528766e-07, "loss": 0.0004, "step": 267670 }, { "epoch": 1.761017874647211, "grad_norm": 0.06907321309842729, "learning_rate": 4.2866826491570926e-07, "loss": 0.0004, "step": 267680 }, { "epoch": 1.7610836628224442, "grad_norm": 0.0027722274828692224, "learning_rate": 4.2843571487519166e-07, "loss": 0.0004, "step": 267690 }, { "epoch": 1.7611494509976777, "grad_norm": 0.05686502729576983, "learning_rate": 4.2820322510680025e-07, "loss": 0.0003, "step": 267700 }, { "epoch": 1.761215239172911, "grad_norm": 0.0445945487052897, "learning_rate": 4.27970795613602e-07, "loss": 0.0005, "step": 267710 }, { "epoch": 1.7612810273481445, "grad_norm": 0.01115233462408503, "learning_rate": 4.277384263986595e-07, "loss": 0.0004, "step": 267720 }, { "epoch": 1.7613468155233778, "grad_norm": 0.01939891976305715, "learning_rate": 4.2750611746503744e-07, "loss": 0.0003, "step": 267730 }, { "epoch": 1.761412603698611, "grad_norm": 0.010922970081502455, "learning_rate": 4.272738688157979e-07, "loss": 0.0004, "step": 267740 }, { "epoch": 1.7614783918738446, "grad_norm": 0.017235846638883116, "learning_rate": 4.270416804540034e-07, "loss": 0.0008, "step": 267750 }, { "epoch": 1.761544180049078, "grad_norm": 0.013048418398985146, "learning_rate": 4.2680955238271427e-07, "loss": 0.0003, "step": 267760 }, { "epoch": 1.7616099682243114, "grad_norm": 0.008409121139898959, "learning_rate": 4.2657748460499247e-07, "loss": 0.0001, "step": 267770 }, { "epoch": 1.7616757563995447, "grad_norm": 0.025337075756099968, "learning_rate": 4.263454771238962e-07, "loss": 0.0002, "step": 267780 }, { "epoch": 1.7617415445747782, "grad_norm": 4.0443677372405825e-05, "learning_rate": 4.2611352994248455e-07, "loss": 0.0003, "step": 267790 }, { "epoch": 1.7618073327500117, "grad_norm": 0.011344785201863678, "learning_rate": 4.2588164306381576e-07, "loss": 0.0003, "step": 267800 }, { "epoch": 1.761873120925245, "grad_norm": 0.004501861561477358, "learning_rate": 4.2564981649094726e-07, "loss": 0.0003, "step": 267810 }, { "epoch": 1.7619389091004782, "grad_norm": 0.002907410100521737, "learning_rate": 4.2541805022693503e-07, "loss": 0.0003, "step": 267820 }, { "epoch": 1.7620046972757115, "grad_norm": 0.03335898065234714, "learning_rate": 4.251863442748344e-07, "loss": 0.0004, "step": 267830 }, { "epoch": 1.762070485450945, "grad_norm": 0.018929116247870647, "learning_rate": 4.2495469863770125e-07, "loss": 0.0002, "step": 267840 }, { "epoch": 1.7621362736261785, "grad_norm": 0.005903078903715933, "learning_rate": 4.247231133185892e-07, "loss": 0.0007, "step": 267850 }, { "epoch": 1.7622020618014118, "grad_norm": 0.030074711627582244, "learning_rate": 4.2449158832055194e-07, "loss": 0.0005, "step": 267860 }, { "epoch": 1.762267849976645, "grad_norm": 0.0009177692790663934, "learning_rate": 4.242601236466409e-07, "loss": 0.0004, "step": 267870 }, { "epoch": 1.7623336381518786, "grad_norm": 0.007338208939647656, "learning_rate": 4.240287192999082e-07, "loss": 0.0007, "step": 267880 }, { "epoch": 1.762399426327112, "grad_norm": 0.005456541278480008, "learning_rate": 4.2379737528340513e-07, "loss": 0.0003, "step": 267890 }, { "epoch": 1.7624652145023454, "grad_norm": 0.00439942839742483, "learning_rate": 4.235660916001816e-07, "loss": 0.0002, "step": 267900 }, { "epoch": 1.7625310026775787, "grad_norm": 0.007766881618523252, "learning_rate": 4.233348682532862e-07, "loss": 0.0001, "step": 267910 }, { "epoch": 1.762596790852812, "grad_norm": 0.006939834594361097, "learning_rate": 4.231037052457676e-07, "loss": 0.0008, "step": 267920 }, { "epoch": 1.7626625790280455, "grad_norm": 0.03949017617504778, "learning_rate": 4.2287260258067453e-07, "loss": 0.0008, "step": 267930 }, { "epoch": 1.762728367203279, "grad_norm": 0.09850204990752516, "learning_rate": 4.2264156026105283e-07, "loss": 0.0005, "step": 267940 }, { "epoch": 1.7627941553785123, "grad_norm": 0.056887084050025044, "learning_rate": 4.2241057828994903e-07, "loss": 0.0003, "step": 267950 }, { "epoch": 1.7628599435537455, "grad_norm": 0.11220055904305525, "learning_rate": 4.2217965667040726e-07, "loss": 0.0005, "step": 267960 }, { "epoch": 1.762925731728979, "grad_norm": 0.1444050492850731, "learning_rate": 4.219487954054735e-07, "loss": 0.0011, "step": 267970 }, { "epoch": 1.7629915199042125, "grad_norm": 0.11484136816979801, "learning_rate": 4.217179944981914e-07, "loss": 0.0004, "step": 267980 }, { "epoch": 1.7630573080794458, "grad_norm": 0.0939445205527297, "learning_rate": 4.214872539516035e-07, "loss": 0.0004, "step": 267990 }, { "epoch": 1.7631230962546791, "grad_norm": 0.005158357405986822, "learning_rate": 4.2125657376875187e-07, "loss": 0.0003, "step": 268000 }, { "epoch": 1.7631888844299126, "grad_norm": 0.005402251077780732, "learning_rate": 4.2102595395267785e-07, "loss": 0.0002, "step": 268010 }, { "epoch": 1.763254672605146, "grad_norm": 0.002244807888823393, "learning_rate": 4.207953945064225e-07, "loss": 0.0001, "step": 268020 }, { "epoch": 1.7633204607803794, "grad_norm": 0.029544724837337485, "learning_rate": 4.205648954330244e-07, "loss": 0.0011, "step": 268030 }, { "epoch": 1.7633862489556127, "grad_norm": 0.015300250474680937, "learning_rate": 4.203344567355233e-07, "loss": 0.0003, "step": 268040 }, { "epoch": 1.763452037130846, "grad_norm": 0.001174408017693632, "learning_rate": 4.2010407841695687e-07, "loss": 0.0005, "step": 268050 }, { "epoch": 1.7635178253060795, "grad_norm": 0.024062275822938566, "learning_rate": 4.1987376048036265e-07, "loss": 0.0002, "step": 268060 }, { "epoch": 1.763583613481313, "grad_norm": 0.007236123511261696, "learning_rate": 4.196435029287776e-07, "loss": 0.0002, "step": 268070 }, { "epoch": 1.7636494016565463, "grad_norm": 0.056310570068208554, "learning_rate": 4.194133057652372e-07, "loss": 0.0008, "step": 268080 }, { "epoch": 1.7637151898317796, "grad_norm": 0.015272390543541087, "learning_rate": 4.19183168992775e-07, "loss": 0.0003, "step": 268090 }, { "epoch": 1.763780978007013, "grad_norm": 0.0016139488320040189, "learning_rate": 4.189530926144275e-07, "loss": 0.0002, "step": 268100 }, { "epoch": 1.7638467661822466, "grad_norm": 0.014626059949291621, "learning_rate": 4.187230766332273e-07, "loss": 0.0006, "step": 268110 }, { "epoch": 1.7639125543574798, "grad_norm": 0.05428437538648027, "learning_rate": 4.1849312105220643e-07, "loss": 0.0006, "step": 268120 }, { "epoch": 1.7639783425327131, "grad_norm": 0.007738130141379339, "learning_rate": 4.182632258743968e-07, "loss": 0.0003, "step": 268130 }, { "epoch": 1.7640441307079464, "grad_norm": 0.011747457746942764, "learning_rate": 4.180333911028295e-07, "loss": 0.0005, "step": 268140 }, { "epoch": 1.76410991888318, "grad_norm": 0.0010948119327744686, "learning_rate": 4.1780361674053527e-07, "loss": 0.0003, "step": 268150 }, { "epoch": 1.7641757070584134, "grad_norm": 0.03568481409980614, "learning_rate": 4.1757390279054233e-07, "loss": 0.0005, "step": 268160 }, { "epoch": 1.7642414952336467, "grad_norm": 0.0013293006506571285, "learning_rate": 4.1734424925587993e-07, "loss": 0.0002, "step": 268170 }, { "epoch": 1.76430728340888, "grad_norm": 0.011169581315703147, "learning_rate": 4.171146561395756e-07, "loss": 0.0007, "step": 268180 }, { "epoch": 1.7643730715841135, "grad_norm": 0.007022463598659384, "learning_rate": 4.1688512344465695e-07, "loss": 0.0003, "step": 268190 }, { "epoch": 1.764438859759347, "grad_norm": 0.0006253180834378196, "learning_rate": 4.166556511741493e-07, "loss": 0.0003, "step": 268200 }, { "epoch": 1.7645046479345803, "grad_norm": 0.0040282993768391935, "learning_rate": 4.164262393310786e-07, "loss": 0.0003, "step": 268210 }, { "epoch": 1.7645704361098136, "grad_norm": 0.05077583027733323, "learning_rate": 4.1619688791846855e-07, "loss": 0.0008, "step": 268220 }, { "epoch": 1.7646362242850469, "grad_norm": 0.01865640401353212, "learning_rate": 4.159675969393445e-07, "loss": 0.0003, "step": 268230 }, { "epoch": 1.7647020124602804, "grad_norm": 0.0023553416730234087, "learning_rate": 4.1573836639672847e-07, "loss": 0.0004, "step": 268240 }, { "epoch": 1.7647678006355139, "grad_norm": 0.0018675854398887898, "learning_rate": 4.1550919629364307e-07, "loss": 0.0003, "step": 268250 }, { "epoch": 1.7648335888107471, "grad_norm": 0.03355758878093568, "learning_rate": 4.1528008663310916e-07, "loss": 0.0002, "step": 268260 }, { "epoch": 1.7648993769859804, "grad_norm": 0.025202413410171813, "learning_rate": 4.1505103741814825e-07, "loss": 0.0005, "step": 268270 }, { "epoch": 1.764965165161214, "grad_norm": 0.06646790115687272, "learning_rate": 4.14822048651779e-07, "loss": 0.001, "step": 268280 }, { "epoch": 1.7650309533364474, "grad_norm": 0.031116729628380448, "learning_rate": 4.145931203370213e-07, "loss": 0.0004, "step": 268290 }, { "epoch": 1.7650967415116807, "grad_norm": 0.11334633297449753, "learning_rate": 4.1436425247689313e-07, "loss": 0.0007, "step": 268300 }, { "epoch": 1.765162529686914, "grad_norm": 0.02363892934633766, "learning_rate": 4.141354450744123e-07, "loss": 0.0004, "step": 268310 }, { "epoch": 1.7652283178621475, "grad_norm": 0.024868744651495283, "learning_rate": 4.1390669813259456e-07, "loss": 0.0007, "step": 268320 }, { "epoch": 1.7652941060373808, "grad_norm": 0.010036936672897614, "learning_rate": 4.1367801165445584e-07, "loss": 0.0006, "step": 268330 }, { "epoch": 1.7653598942126143, "grad_norm": 0.013980280236676372, "learning_rate": 4.134493856430122e-07, "loss": 0.0003, "step": 268340 }, { "epoch": 1.7654256823878476, "grad_norm": 0.01631389673411788, "learning_rate": 4.132208201012761e-07, "loss": 0.0002, "step": 268350 }, { "epoch": 1.7654914705630809, "grad_norm": 0.004700124252372385, "learning_rate": 4.1299231503226235e-07, "loss": 0.0005, "step": 268360 }, { "epoch": 1.7655572587383144, "grad_norm": 0.003029416381807355, "learning_rate": 4.1276387043898415e-07, "loss": 0.0004, "step": 268370 }, { "epoch": 1.7656230469135479, "grad_norm": 0.03240163363409641, "learning_rate": 4.125354863244524e-07, "loss": 0.0007, "step": 268380 }, { "epoch": 1.7656888350887812, "grad_norm": 0.011839295155434353, "learning_rate": 4.1230716269167804e-07, "loss": 0.0003, "step": 268390 }, { "epoch": 1.7657546232640144, "grad_norm": 0.019027546780105115, "learning_rate": 4.1207889954367143e-07, "loss": 0.0001, "step": 268400 }, { "epoch": 1.765820411439248, "grad_norm": 0.002921681739988148, "learning_rate": 4.1185069688344235e-07, "loss": 0.0008, "step": 268410 }, { "epoch": 1.7658861996144815, "grad_norm": 0.0008184852725634215, "learning_rate": 4.1162255471399904e-07, "loss": 0.0006, "step": 268420 }, { "epoch": 1.7659519877897147, "grad_norm": 0.04746825631045971, "learning_rate": 4.113944730383501e-07, "loss": 0.0005, "step": 268430 }, { "epoch": 1.766017775964948, "grad_norm": 0.00013018962610820948, "learning_rate": 4.111664518595015e-07, "loss": 0.0003, "step": 268440 }, { "epoch": 1.7660835641401813, "grad_norm": 0.001149928223165419, "learning_rate": 4.1093849118046035e-07, "loss": 0.0004, "step": 268450 }, { "epoch": 1.7661493523154148, "grad_norm": 0.07060603193693021, "learning_rate": 4.107105910042314e-07, "loss": 0.0004, "step": 268460 }, { "epoch": 1.7662151404906483, "grad_norm": 0.028361794166352667, "learning_rate": 4.1048275133382e-07, "loss": 0.0004, "step": 268470 }, { "epoch": 1.7662809286658816, "grad_norm": 0.039813060356309764, "learning_rate": 4.102549721722293e-07, "loss": 0.0003, "step": 268480 }, { "epoch": 1.7663467168411149, "grad_norm": 0.004357069642509702, "learning_rate": 4.1002725352246306e-07, "loss": 0.0004, "step": 268490 }, { "epoch": 1.7664125050163484, "grad_norm": 0.03924984292642271, "learning_rate": 4.097995953875239e-07, "loss": 0.0004, "step": 268500 }, { "epoch": 1.766478293191582, "grad_norm": 0.005559722545666331, "learning_rate": 4.095719977704121e-07, "loss": 0.0002, "step": 268510 }, { "epoch": 1.7665440813668152, "grad_norm": 0.006892917151367601, "learning_rate": 4.0934446067412924e-07, "loss": 0.0004, "step": 268520 }, { "epoch": 1.7666098695420485, "grad_norm": 0.011752043184615426, "learning_rate": 4.091169841016751e-07, "loss": 0.0002, "step": 268530 }, { "epoch": 1.7666756577172817, "grad_norm": 0.014979859392709717, "learning_rate": 4.088895680560484e-07, "loss": 0.0009, "step": 268540 }, { "epoch": 1.7667414458925152, "grad_norm": 0.08863823004309536, "learning_rate": 4.0866221254024787e-07, "loss": 0.0007, "step": 268550 }, { "epoch": 1.7668072340677488, "grad_norm": 0.06278733623651842, "learning_rate": 4.084349175572705e-07, "loss": 0.0008, "step": 268560 }, { "epoch": 1.766873022242982, "grad_norm": 0.08063980572466797, "learning_rate": 4.0820768311011284e-07, "loss": 0.0003, "step": 268570 }, { "epoch": 1.7669388104182153, "grad_norm": 0.03810079460535775, "learning_rate": 4.079805092017719e-07, "loss": 0.0011, "step": 268580 }, { "epoch": 1.7670045985934488, "grad_norm": 0.004577184173746864, "learning_rate": 4.077533958352414e-07, "loss": 0.0003, "step": 268590 }, { "epoch": 1.7670703867686823, "grad_norm": 0.0006699513385583524, "learning_rate": 4.0752634301351623e-07, "loss": 0.0003, "step": 268600 }, { "epoch": 1.7671361749439156, "grad_norm": 0.042042825584183374, "learning_rate": 4.072993507395895e-07, "loss": 0.0004, "step": 268610 }, { "epoch": 1.767201963119149, "grad_norm": 0.011925639983899885, "learning_rate": 4.070724190164549e-07, "loss": 0.0003, "step": 268620 }, { "epoch": 1.7672677512943822, "grad_norm": 0.024052191107987925, "learning_rate": 4.0684554784710395e-07, "loss": 0.0008, "step": 268630 }, { "epoch": 1.7673335394696157, "grad_norm": 0.0029395841640276164, "learning_rate": 4.066187372345276e-07, "loss": 0.0007, "step": 268640 }, { "epoch": 1.7673993276448492, "grad_norm": 0.01879371407233855, "learning_rate": 4.063919871817157e-07, "loss": 0.0002, "step": 268650 }, { "epoch": 1.7674651158200825, "grad_norm": 0.07146299238987867, "learning_rate": 4.061652976916586e-07, "loss": 0.0009, "step": 268660 }, { "epoch": 1.7675309039953158, "grad_norm": 0.04234023853295904, "learning_rate": 4.059386687673439e-07, "loss": 0.0006, "step": 268670 }, { "epoch": 1.7675966921705493, "grad_norm": 0.0002609704943640128, "learning_rate": 4.057121004117609e-07, "loss": 0.0005, "step": 268680 }, { "epoch": 1.7676624803457828, "grad_norm": 0.005365406619262899, "learning_rate": 4.0548559262789553e-07, "loss": 0.0002, "step": 268690 }, { "epoch": 1.767728268521016, "grad_norm": 0.009651433411515888, "learning_rate": 4.052591454187343e-07, "loss": 0.0005, "step": 268700 }, { "epoch": 1.7677940566962493, "grad_norm": 0.040082940727387696, "learning_rate": 4.050327587872638e-07, "loss": 0.0003, "step": 268710 }, { "epoch": 1.7678598448714828, "grad_norm": 0.008631016808204557, "learning_rate": 4.0480643273646705e-07, "loss": 0.0005, "step": 268720 }, { "epoch": 1.7679256330467163, "grad_norm": 0.005708985544183348, "learning_rate": 4.045801672693295e-07, "loss": 0.0005, "step": 268730 }, { "epoch": 1.7679914212219496, "grad_norm": 0.024898770133041925, "learning_rate": 4.043539623888321e-07, "loss": 0.0014, "step": 268740 }, { "epoch": 1.768057209397183, "grad_norm": 0.009803191161479213, "learning_rate": 4.0412781809795966e-07, "loss": 0.0004, "step": 268750 }, { "epoch": 1.7681229975724162, "grad_norm": 0.0023317901724232626, "learning_rate": 4.0390173439969314e-07, "loss": 0.0002, "step": 268760 }, { "epoch": 1.7681887857476497, "grad_norm": 0.0022488121343801444, "learning_rate": 4.0367571129701245e-07, "loss": 0.0002, "step": 268770 }, { "epoch": 1.7682545739228832, "grad_norm": 0.005993619544667679, "learning_rate": 4.0344974879289787e-07, "loss": 0.0002, "step": 268780 }, { "epoch": 1.7683203620981165, "grad_norm": 0.02447531139080802, "learning_rate": 4.032238468903288e-07, "loss": 0.0002, "step": 268790 }, { "epoch": 1.7683861502733498, "grad_norm": 0.027963091360634493, "learning_rate": 4.0299800559228277e-07, "loss": 0.0008, "step": 268800 }, { "epoch": 1.7684519384485833, "grad_norm": 0.09638102877383847, "learning_rate": 4.0277222490173796e-07, "loss": 0.0006, "step": 268810 }, { "epoch": 1.7685177266238168, "grad_norm": 0.015604606121116766, "learning_rate": 4.025465048216709e-07, "loss": 0.0006, "step": 268820 }, { "epoch": 1.76858351479905, "grad_norm": 0.010505048284550124, "learning_rate": 4.023208453550581e-07, "loss": 0.0005, "step": 268830 }, { "epoch": 1.7686493029742834, "grad_norm": 0.005605170159055544, "learning_rate": 4.020952465048733e-07, "loss": 0.0002, "step": 268840 }, { "epoch": 1.7687150911495166, "grad_norm": 0.016644502596211547, "learning_rate": 4.0186970827409243e-07, "loss": 0.001, "step": 268850 }, { "epoch": 1.7687808793247501, "grad_norm": 0.031101223736811963, "learning_rate": 4.0164423066568757e-07, "loss": 0.0006, "step": 268860 }, { "epoch": 1.7688466674999836, "grad_norm": 0.00044792073605854146, "learning_rate": 4.0141881368263194e-07, "loss": 0.0003, "step": 268870 }, { "epoch": 1.768912455675217, "grad_norm": 0.0036960804845558334, "learning_rate": 4.0119345732789805e-07, "loss": 0.0004, "step": 268880 }, { "epoch": 1.7689782438504502, "grad_norm": 0.0006768940771130971, "learning_rate": 4.00968161604457e-07, "loss": 0.0001, "step": 268890 }, { "epoch": 1.7690440320256837, "grad_norm": 0.07030461792784837, "learning_rate": 4.0074292651527846e-07, "loss": 0.0005, "step": 268900 }, { "epoch": 1.7691098202009172, "grad_norm": 0.008838809685568657, "learning_rate": 4.005177520633319e-07, "loss": 0.0008, "step": 268910 }, { "epoch": 1.7691756083761505, "grad_norm": 0.00197004363553029, "learning_rate": 4.0029263825158703e-07, "loss": 0.0008, "step": 268920 }, { "epoch": 1.7692413965513838, "grad_norm": 0.014586667283691141, "learning_rate": 4.00067585083011e-07, "loss": 0.0003, "step": 268930 }, { "epoch": 1.769307184726617, "grad_norm": 0.007080561559252495, "learning_rate": 3.998425925605709e-07, "loss": 0.0002, "step": 268940 }, { "epoch": 1.7693729729018506, "grad_norm": 0.008252413691701885, "learning_rate": 3.996176606872332e-07, "loss": 0.0003, "step": 268950 }, { "epoch": 1.769438761077084, "grad_norm": 0.0065008148028631385, "learning_rate": 3.993927894659638e-07, "loss": 0.0002, "step": 268960 }, { "epoch": 1.7695045492523174, "grad_norm": 0.09624102823574394, "learning_rate": 3.9916797889972715e-07, "loss": 0.0004, "step": 268970 }, { "epoch": 1.7695703374275507, "grad_norm": 0.02057573585058919, "learning_rate": 3.9894322899148685e-07, "loss": 0.0003, "step": 268980 }, { "epoch": 1.7696361256027842, "grad_norm": 0.0008818719155930357, "learning_rate": 3.987185397442067e-07, "loss": 0.0003, "step": 268990 }, { "epoch": 1.7697019137780177, "grad_norm": 0.004351737141843656, "learning_rate": 3.9849391116084766e-07, "loss": 0.0002, "step": 269000 }, { "epoch": 1.769767701953251, "grad_norm": 0.052871103453587066, "learning_rate": 3.9826934324437295e-07, "loss": 0.0004, "step": 269010 }, { "epoch": 1.7698334901284842, "grad_norm": 0.03364415117695596, "learning_rate": 3.980448359977429e-07, "loss": 0.0003, "step": 269020 }, { "epoch": 1.7698992783037177, "grad_norm": 0.021722773827104812, "learning_rate": 3.978203894239174e-07, "loss": 0.0003, "step": 269030 }, { "epoch": 1.769965066478951, "grad_norm": 0.06576363182454042, "learning_rate": 3.975960035258547e-07, "loss": 0.0006, "step": 269040 }, { "epoch": 1.7700308546541845, "grad_norm": 0.015194052925614931, "learning_rate": 3.9737167830651456e-07, "loss": 0.0005, "step": 269050 }, { "epoch": 1.7700966428294178, "grad_norm": 0.0023426640826398164, "learning_rate": 3.9714741376885304e-07, "loss": 0.0006, "step": 269060 }, { "epoch": 1.770162431004651, "grad_norm": 0.0002570034597195098, "learning_rate": 3.9692320991582767e-07, "loss": 0.0005, "step": 269070 }, { "epoch": 1.7702282191798846, "grad_norm": 0.007190162170211274, "learning_rate": 3.9669906675039506e-07, "loss": 0.0009, "step": 269080 }, { "epoch": 1.770294007355118, "grad_norm": 0.003419276768743318, "learning_rate": 3.964749842755089e-07, "loss": 0.0008, "step": 269090 }, { "epoch": 1.7703597955303514, "grad_norm": 0.008720655562805421, "learning_rate": 3.962509624941241e-07, "loss": 0.0004, "step": 269100 }, { "epoch": 1.7704255837055847, "grad_norm": 0.03195791301940117, "learning_rate": 3.960270014091949e-07, "loss": 0.0002, "step": 269110 }, { "epoch": 1.7704913718808182, "grad_norm": 0.03398698903673585, "learning_rate": 3.9580310102367294e-07, "loss": 0.0004, "step": 269120 }, { "epoch": 1.7705571600560517, "grad_norm": 0.01893835494794126, "learning_rate": 3.955792613405096e-07, "loss": 0.0003, "step": 269130 }, { "epoch": 1.770622948231285, "grad_norm": 0.049587370922111955, "learning_rate": 3.9535548236265873e-07, "loss": 0.0004, "step": 269140 }, { "epoch": 1.7706887364065182, "grad_norm": 0.036339841145243496, "learning_rate": 3.9513176409306855e-07, "loss": 0.0004, "step": 269150 }, { "epoch": 1.7707545245817515, "grad_norm": 0.0008407045270382773, "learning_rate": 3.949081065346888e-07, "loss": 0.0003, "step": 269160 }, { "epoch": 1.770820312756985, "grad_norm": 0.05270652623862502, "learning_rate": 3.9468450969046837e-07, "loss": 0.0006, "step": 269170 }, { "epoch": 1.7708861009322185, "grad_norm": 0.004104082109784042, "learning_rate": 3.944609735633559e-07, "loss": 0.0002, "step": 269180 }, { "epoch": 1.7709518891074518, "grad_norm": 0.006238340888565442, "learning_rate": 3.942374981562974e-07, "loss": 0.0004, "step": 269190 }, { "epoch": 1.771017677282685, "grad_norm": 0.004702809315460647, "learning_rate": 3.9401408347224004e-07, "loss": 0.0001, "step": 269200 }, { "epoch": 1.7710834654579186, "grad_norm": 0.0026034607801740973, "learning_rate": 3.9379072951412854e-07, "loss": 0.0001, "step": 269210 }, { "epoch": 1.7711492536331521, "grad_norm": 0.014497647417985282, "learning_rate": 3.9356743628490844e-07, "loss": 0.0012, "step": 269220 }, { "epoch": 1.7712150418083854, "grad_norm": 0.00976134231928514, "learning_rate": 3.933442037875235e-07, "loss": 0.0004, "step": 269230 }, { "epoch": 1.7712808299836187, "grad_norm": 0.011498638689325644, "learning_rate": 3.9312103202491627e-07, "loss": 0.0004, "step": 269240 }, { "epoch": 1.771346618158852, "grad_norm": 0.017359766981315798, "learning_rate": 3.9289792100002944e-07, "loss": 0.0002, "step": 269250 }, { "epoch": 1.7714124063340855, "grad_norm": 0.15591061285693458, "learning_rate": 3.92674870715804e-07, "loss": 0.0013, "step": 269260 }, { "epoch": 1.771478194509319, "grad_norm": 0.03644098914033952, "learning_rate": 3.924518811751815e-07, "loss": 0.0005, "step": 269270 }, { "epoch": 1.7715439826845523, "grad_norm": 0.03743415815162377, "learning_rate": 3.922289523811024e-07, "loss": 0.0003, "step": 269280 }, { "epoch": 1.7716097708597855, "grad_norm": 0.01307619514856772, "learning_rate": 3.9200608433650476e-07, "loss": 0.0004, "step": 269290 }, { "epoch": 1.771675559035019, "grad_norm": 0.00543139382030447, "learning_rate": 3.9178327704432694e-07, "loss": 0.0005, "step": 269300 }, { "epoch": 1.7717413472102526, "grad_norm": 0.012793421691579727, "learning_rate": 3.915605305075071e-07, "loss": 0.0006, "step": 269310 }, { "epoch": 1.7718071353854858, "grad_norm": 0.0879393443375078, "learning_rate": 3.913378447289812e-07, "loss": 0.0004, "step": 269320 }, { "epoch": 1.7718729235607191, "grad_norm": 0.011949015628344157, "learning_rate": 3.9111521971168576e-07, "loss": 0.0008, "step": 269330 }, { "epoch": 1.7719387117359526, "grad_norm": 0.00037649328827015833, "learning_rate": 3.9089265545855513e-07, "loss": 0.0004, "step": 269340 }, { "epoch": 1.772004499911186, "grad_norm": 0.0357197117912091, "learning_rate": 3.906701519725248e-07, "loss": 0.0003, "step": 269350 }, { "epoch": 1.7720702880864194, "grad_norm": 0.01766649830631554, "learning_rate": 3.904477092565273e-07, "loss": 0.0003, "step": 269360 }, { "epoch": 1.7721360762616527, "grad_norm": 0.00013883814684592682, "learning_rate": 3.9022532731349596e-07, "loss": 0.0006, "step": 269370 }, { "epoch": 1.772201864436886, "grad_norm": 0.023253915398456507, "learning_rate": 3.9000300614636223e-07, "loss": 0.0004, "step": 269380 }, { "epoch": 1.7722676526121195, "grad_norm": 0.018643971160357663, "learning_rate": 3.8978074575805715e-07, "loss": 0.0008, "step": 269390 }, { "epoch": 1.772333440787353, "grad_norm": 0.05407808457106244, "learning_rate": 3.8955854615151166e-07, "loss": 0.0004, "step": 269400 }, { "epoch": 1.7723992289625863, "grad_norm": 0.04733988252675452, "learning_rate": 3.893364073296546e-07, "loss": 0.0008, "step": 269410 }, { "epoch": 1.7724650171378196, "grad_norm": 0.01966267970482328, "learning_rate": 3.8911432929541524e-07, "loss": 0.0002, "step": 269420 }, { "epoch": 1.772530805313053, "grad_norm": 0.0442552039750547, "learning_rate": 3.8889231205172127e-07, "loss": 0.0007, "step": 269430 }, { "epoch": 1.7725965934882866, "grad_norm": 0.002029819804238943, "learning_rate": 3.886703556014998e-07, "loss": 0.0004, "step": 269440 }, { "epoch": 1.7726623816635199, "grad_norm": 0.020654106654582536, "learning_rate": 3.8844845994767675e-07, "loss": 0.0003, "step": 269450 }, { "epoch": 1.7727281698387531, "grad_norm": 0.02483339863850126, "learning_rate": 3.882266250931782e-07, "loss": 0.0003, "step": 269460 }, { "epoch": 1.7727939580139864, "grad_norm": 0.014294418340510423, "learning_rate": 3.880048510409284e-07, "loss": 0.0003, "step": 269470 }, { "epoch": 1.77285974618922, "grad_norm": 0.03272881734873038, "learning_rate": 3.877831377938518e-07, "loss": 0.0005, "step": 269480 }, { "epoch": 1.7729255343644534, "grad_norm": 0.020211980256560232, "learning_rate": 3.875614853548698e-07, "loss": 0.0002, "step": 269490 }, { "epoch": 1.7729913225396867, "grad_norm": 0.004027750641471337, "learning_rate": 3.873398937269074e-07, "loss": 0.0002, "step": 269500 }, { "epoch": 1.77305711071492, "grad_norm": 0.02068357318019156, "learning_rate": 3.871183629128844e-07, "loss": 0.0002, "step": 269510 }, { "epoch": 1.7731228988901535, "grad_norm": 0.008713011592534556, "learning_rate": 3.868968929157224e-07, "loss": 0.0003, "step": 269520 }, { "epoch": 1.773188687065387, "grad_norm": 0.0005238233884403851, "learning_rate": 3.866754837383402e-07, "loss": 0.0014, "step": 269530 }, { "epoch": 1.7732544752406203, "grad_norm": 0.034902346078783036, "learning_rate": 3.8645413538365763e-07, "loss": 0.0005, "step": 269540 }, { "epoch": 1.7733202634158536, "grad_norm": 0.0007334436099251887, "learning_rate": 3.862328478545924e-07, "loss": 0.0003, "step": 269550 }, { "epoch": 1.7733860515910869, "grad_norm": 0.015100067756389928, "learning_rate": 3.8601162115406264e-07, "loss": 0.0013, "step": 269560 }, { "epoch": 1.7734518397663204, "grad_norm": 0.00013214930311686795, "learning_rate": 3.8579045528498505e-07, "loss": 0.0003, "step": 269570 }, { "epoch": 1.7735176279415539, "grad_norm": 0.04545599982938891, "learning_rate": 3.85569350250275e-07, "loss": 0.0013, "step": 269580 }, { "epoch": 1.7735834161167872, "grad_norm": 0.01773251757861803, "learning_rate": 3.8534830605284733e-07, "loss": 0.0005, "step": 269590 }, { "epoch": 1.7736492042920204, "grad_norm": 0.08249151021227935, "learning_rate": 3.851273226956176e-07, "loss": 0.0008, "step": 269600 }, { "epoch": 1.773714992467254, "grad_norm": 0.0006144535612931437, "learning_rate": 3.849064001814978e-07, "loss": 0.0004, "step": 269610 }, { "epoch": 1.7737807806424875, "grad_norm": 0.01598759697693346, "learning_rate": 3.8468553851340075e-07, "loss": 0.0003, "step": 269620 }, { "epoch": 1.7738465688177207, "grad_norm": 0.0048000443715354, "learning_rate": 3.8446473769423955e-07, "loss": 0.0004, "step": 269630 }, { "epoch": 1.773912356992954, "grad_norm": 0.04195542265420704, "learning_rate": 3.842439977269241e-07, "loss": 0.0003, "step": 269640 }, { "epoch": 1.7739781451681873, "grad_norm": 0.03306082361331258, "learning_rate": 3.8402331861436606e-07, "loss": 0.0014, "step": 269650 }, { "epoch": 1.7740439333434208, "grad_norm": 0.04975587199690832, "learning_rate": 3.83802700359473e-07, "loss": 0.0005, "step": 269660 }, { "epoch": 1.7741097215186543, "grad_norm": 0.007075146688262207, "learning_rate": 3.835821429651554e-07, "loss": 0.0004, "step": 269670 }, { "epoch": 1.7741755096938876, "grad_norm": 0.07510941948299077, "learning_rate": 3.8336164643431985e-07, "loss": 0.0002, "step": 269680 }, { "epoch": 1.7742412978691209, "grad_norm": 0.010181561480643284, "learning_rate": 3.831412107698734e-07, "loss": 0.0005, "step": 269690 }, { "epoch": 1.7743070860443544, "grad_norm": 0.08181868385810513, "learning_rate": 3.8292083597472327e-07, "loss": 0.0006, "step": 269700 }, { "epoch": 1.774372874219588, "grad_norm": 0.011552573075957891, "learning_rate": 3.827005220517743e-07, "loss": 0.0007, "step": 269710 }, { "epoch": 1.7744386623948212, "grad_norm": 0.007863092817297775, "learning_rate": 3.8248026900393086e-07, "loss": 0.0005, "step": 269720 }, { "epoch": 1.7745044505700545, "grad_norm": 0.005241972945332222, "learning_rate": 3.8226007683409727e-07, "loss": 0.0003, "step": 269730 }, { "epoch": 1.774570238745288, "grad_norm": 0.008186580492017224, "learning_rate": 3.820399455451762e-07, "loss": 0.0002, "step": 269740 }, { "epoch": 1.7746360269205215, "grad_norm": 0.005094606973244609, "learning_rate": 3.818198751400698e-07, "loss": 0.001, "step": 269750 }, { "epoch": 1.7747018150957548, "grad_norm": 0.0034378591366495868, "learning_rate": 3.8159986562168016e-07, "loss": 0.0004, "step": 269760 }, { "epoch": 1.774767603270988, "grad_norm": 0.015128922047727209, "learning_rate": 3.8137991699290775e-07, "loss": 0.0004, "step": 269770 }, { "epoch": 1.7748333914462213, "grad_norm": 0.0007902640923438853, "learning_rate": 3.811600292566525e-07, "loss": 0.0006, "step": 269780 }, { "epoch": 1.7748991796214548, "grad_norm": 0.0394427129075576, "learning_rate": 3.809402024158132e-07, "loss": 0.0009, "step": 269790 }, { "epoch": 1.7749649677966883, "grad_norm": 0.018226167579288192, "learning_rate": 3.8072043647328747e-07, "loss": 0.0003, "step": 269800 }, { "epoch": 1.7750307559719216, "grad_norm": 0.030232769214292386, "learning_rate": 3.805007314319736e-07, "loss": 0.0004, "step": 269810 }, { "epoch": 1.775096544147155, "grad_norm": 0.00726298843911261, "learning_rate": 3.802810872947682e-07, "loss": 0.0002, "step": 269820 }, { "epoch": 1.7751623323223884, "grad_norm": 0.006930082922671993, "learning_rate": 3.8006150406456665e-07, "loss": 0.0002, "step": 269830 }, { "epoch": 1.775228120497622, "grad_norm": 0.0135193108805166, "learning_rate": 3.798419817442639e-07, "loss": 0.0002, "step": 269840 }, { "epoch": 1.7752939086728552, "grad_norm": 0.028620419562589504, "learning_rate": 3.796225203367548e-07, "loss": 0.0004, "step": 269850 }, { "epoch": 1.7753596968480885, "grad_norm": 0.03861883762508237, "learning_rate": 3.7940311984493215e-07, "loss": 0.0004, "step": 269860 }, { "epoch": 1.7754254850233218, "grad_norm": 0.010666404608806754, "learning_rate": 3.7918378027168856e-07, "loss": 0.0003, "step": 269870 }, { "epoch": 1.7754912731985553, "grad_norm": 0.018079270483907346, "learning_rate": 3.789645016199156e-07, "loss": 0.0005, "step": 269880 }, { "epoch": 1.7755570613737888, "grad_norm": 0.009473717316434304, "learning_rate": 3.7874528389250487e-07, "loss": 0.0008, "step": 269890 }, { "epoch": 1.775622849549022, "grad_norm": 0.031307818124942904, "learning_rate": 3.7852612709234684e-07, "loss": 0.0003, "step": 269900 }, { "epoch": 1.7756886377242553, "grad_norm": 0.004849674999624099, "learning_rate": 3.7830703122233083e-07, "loss": 0.0005, "step": 269910 }, { "epoch": 1.7757544258994888, "grad_norm": 0.00010610036332588586, "learning_rate": 3.7808799628534454e-07, "loss": 0.0008, "step": 269920 }, { "epoch": 1.7758202140747223, "grad_norm": 0.01106104299138242, "learning_rate": 3.7786902228427626e-07, "loss": 0.0002, "step": 269930 }, { "epoch": 1.7758860022499556, "grad_norm": 0.045146171695848954, "learning_rate": 3.776501092220131e-07, "loss": 0.0006, "step": 269940 }, { "epoch": 1.775951790425189, "grad_norm": 0.0006813846375897833, "learning_rate": 3.77431257101441e-07, "loss": 0.0034, "step": 269950 }, { "epoch": 1.7760175786004222, "grad_norm": 0.029327522731035886, "learning_rate": 3.7721246592544556e-07, "loss": 0.0001, "step": 269960 }, { "epoch": 1.7760833667756557, "grad_norm": 0.012951321263742854, "learning_rate": 3.7699373569691164e-07, "loss": 0.0004, "step": 269970 }, { "epoch": 1.7761491549508892, "grad_norm": 0.0025916298550788105, "learning_rate": 3.7677506641872254e-07, "loss": 0.0005, "step": 269980 }, { "epoch": 1.7762149431261225, "grad_norm": 0.020587235384208494, "learning_rate": 3.765564580937608e-07, "loss": 0.0004, "step": 269990 }, { "epoch": 1.7762807313013558, "grad_norm": 0.021584841226614117, "learning_rate": 3.763379107249093e-07, "loss": 0.0006, "step": 270000 }, { "epoch": 1.7763465194765893, "grad_norm": 0.006166912275606418, "learning_rate": 3.7611942431504844e-07, "loss": 0.0003, "step": 270010 }, { "epoch": 1.7764123076518228, "grad_norm": 0.007512547791187619, "learning_rate": 3.7590099886706034e-07, "loss": 0.0004, "step": 270020 }, { "epoch": 1.776478095827056, "grad_norm": 0.00849708798777593, "learning_rate": 3.756826343838238e-07, "loss": 0.0001, "step": 270030 }, { "epoch": 1.7765438840022894, "grad_norm": 0.057742816304926683, "learning_rate": 3.7546433086821823e-07, "loss": 0.0005, "step": 270040 }, { "epoch": 1.7766096721775229, "grad_norm": 0.032537585409163336, "learning_rate": 3.752460883231213e-07, "loss": 0.0009, "step": 270050 }, { "epoch": 1.7766754603527561, "grad_norm": 0.05045842045833972, "learning_rate": 3.750279067514107e-07, "loss": 0.0007, "step": 270060 }, { "epoch": 1.7767412485279896, "grad_norm": 0.009210422708610558, "learning_rate": 3.748097861559624e-07, "loss": 0.0004, "step": 270070 }, { "epoch": 1.776807036703223, "grad_norm": 0.04845476876959253, "learning_rate": 3.745917265396526e-07, "loss": 0.0005, "step": 270080 }, { "epoch": 1.7768728248784562, "grad_norm": 0.0012953325755418832, "learning_rate": 3.743737279053561e-07, "loss": 0.0005, "step": 270090 }, { "epoch": 1.7769386130536897, "grad_norm": 0.001108952299358354, "learning_rate": 3.741557902559467e-07, "loss": 0.0003, "step": 270100 }, { "epoch": 1.7770044012289232, "grad_norm": 0.17981558956608146, "learning_rate": 3.739379135942989e-07, "loss": 0.0006, "step": 270110 }, { "epoch": 1.7770701894041565, "grad_norm": 0.0019001157285106957, "learning_rate": 3.7372009792328356e-07, "loss": 0.0009, "step": 270120 }, { "epoch": 1.7771359775793898, "grad_norm": 0.0019210049065164928, "learning_rate": 3.735023432457735e-07, "loss": 0.0006, "step": 270130 }, { "epoch": 1.7772017657546233, "grad_norm": 0.0030573851558472536, "learning_rate": 3.732846495646386e-07, "loss": 0.0004, "step": 270140 }, { "epoch": 1.7772675539298568, "grad_norm": 0.010326662224806262, "learning_rate": 3.730670168827505e-07, "loss": 0.0003, "step": 270150 }, { "epoch": 1.77733334210509, "grad_norm": 0.0736899054930709, "learning_rate": 3.7284944520297795e-07, "loss": 0.0007, "step": 270160 }, { "epoch": 1.7773991302803234, "grad_norm": 0.0037590739370519403, "learning_rate": 3.7263193452818867e-07, "loss": 0.0002, "step": 270170 }, { "epoch": 1.7774649184555567, "grad_norm": 0.037635414663266344, "learning_rate": 3.7241448486125156e-07, "loss": 0.0004, "step": 270180 }, { "epoch": 1.7775307066307902, "grad_norm": 0.00885663458908114, "learning_rate": 3.7219709620503254e-07, "loss": 0.0006, "step": 270190 }, { "epoch": 1.7775964948060237, "grad_norm": 0.0051493353797043995, "learning_rate": 3.719797685623977e-07, "loss": 0.0012, "step": 270200 }, { "epoch": 1.777662282981257, "grad_norm": 0.012529515064569409, "learning_rate": 3.7176250193621255e-07, "loss": 0.0004, "step": 270210 }, { "epoch": 1.7777280711564902, "grad_norm": 3.339681209317426e-05, "learning_rate": 3.7154529632934197e-07, "loss": 0.0006, "step": 270220 }, { "epoch": 1.7777938593317237, "grad_norm": 0.00492673850461308, "learning_rate": 3.713281517446493e-07, "loss": 0.0001, "step": 270230 }, { "epoch": 1.7778596475069572, "grad_norm": 0.026302661069405386, "learning_rate": 3.711110681849972e-07, "loss": 0.0003, "step": 270240 }, { "epoch": 1.7779254356821905, "grad_norm": 0.04863584682180607, "learning_rate": 3.708940456532478e-07, "loss": 0.0004, "step": 270250 }, { "epoch": 1.7779912238574238, "grad_norm": 0.02348958598588749, "learning_rate": 3.706770841522622e-07, "loss": 0.0003, "step": 270260 }, { "epoch": 1.778057012032657, "grad_norm": 0.031161985585082125, "learning_rate": 3.7046018368490086e-07, "loss": 0.0004, "step": 270270 }, { "epoch": 1.7781228002078906, "grad_norm": 0.02397753547119741, "learning_rate": 3.702433442540243e-07, "loss": 0.0002, "step": 270280 }, { "epoch": 1.778188588383124, "grad_norm": 0.06852476044718811, "learning_rate": 3.7002656586249074e-07, "loss": 0.0004, "step": 270290 }, { "epoch": 1.7782543765583574, "grad_norm": 0.014708672916809083, "learning_rate": 3.6980984851315795e-07, "loss": 0.0002, "step": 270300 }, { "epoch": 1.7783201647335907, "grad_norm": 0.026766366975003435, "learning_rate": 3.6959319220888357e-07, "loss": 0.0015, "step": 270310 }, { "epoch": 1.7783859529088242, "grad_norm": 0.017672283068932728, "learning_rate": 3.693765969525237e-07, "loss": 0.0004, "step": 270320 }, { "epoch": 1.7784517410840577, "grad_norm": 0.010625019209176068, "learning_rate": 3.6916006274693437e-07, "loss": 0.0002, "step": 270330 }, { "epoch": 1.778517529259291, "grad_norm": 0.14251976426139937, "learning_rate": 3.6894358959497e-07, "loss": 0.0012, "step": 270340 }, { "epoch": 1.7785833174345242, "grad_norm": 0.054561090854558625, "learning_rate": 3.687271774994849e-07, "loss": 0.0006, "step": 270350 }, { "epoch": 1.7786491056097578, "grad_norm": 0.031190835422451258, "learning_rate": 3.6851082646333193e-07, "loss": 0.0003, "step": 270360 }, { "epoch": 1.778714893784991, "grad_norm": 0.05013283734632476, "learning_rate": 3.682945364893642e-07, "loss": 0.0009, "step": 270370 }, { "epoch": 1.7787806819602245, "grad_norm": 0.24155788879127563, "learning_rate": 3.680783075804323e-07, "loss": 0.001, "step": 270380 }, { "epoch": 1.7788464701354578, "grad_norm": 0.10890666056358099, "learning_rate": 3.6786213973938776e-07, "loss": 0.0004, "step": 270390 }, { "epoch": 1.778912258310691, "grad_norm": 0.01036204113409722, "learning_rate": 3.6764603296907953e-07, "loss": 0.001, "step": 270400 }, { "epoch": 1.7789780464859246, "grad_norm": 0.04362785024587237, "learning_rate": 3.67429987272358e-07, "loss": 0.0012, "step": 270410 }, { "epoch": 1.7790438346611581, "grad_norm": 0.017597521224185165, "learning_rate": 3.672140026520715e-07, "loss": 0.0004, "step": 270420 }, { "epoch": 1.7791096228363914, "grad_norm": 0.06152239966094647, "learning_rate": 3.669980791110672e-07, "loss": 0.0003, "step": 270430 }, { "epoch": 1.7791754110116247, "grad_norm": 0.020181856687448155, "learning_rate": 3.667822166521923e-07, "loss": 0.0004, "step": 270440 }, { "epoch": 1.7792411991868582, "grad_norm": 0.021843133790091854, "learning_rate": 3.665664152782916e-07, "loss": 0.0005, "step": 270450 }, { "epoch": 1.7793069873620917, "grad_norm": 0.004878041526973355, "learning_rate": 3.6635067499221133e-07, "loss": 0.0007, "step": 270460 }, { "epoch": 1.779372775537325, "grad_norm": 0.0431036708818442, "learning_rate": 3.6613499579679576e-07, "loss": 0.0004, "step": 270470 }, { "epoch": 1.7794385637125583, "grad_norm": 0.03646886577459971, "learning_rate": 3.6591937769488825e-07, "loss": 0.0006, "step": 270480 }, { "epoch": 1.7795043518877915, "grad_norm": 0.006746388149793362, "learning_rate": 3.6570382068933096e-07, "loss": 0.0002, "step": 270490 }, { "epoch": 1.779570140063025, "grad_norm": 0.01912609360506552, "learning_rate": 3.654883247829666e-07, "loss": 0.0004, "step": 270500 }, { "epoch": 1.7796359282382586, "grad_norm": 0.015616936553298108, "learning_rate": 3.652728899786362e-07, "loss": 0.0008, "step": 270510 }, { "epoch": 1.7797017164134918, "grad_norm": 0.006641375122148292, "learning_rate": 3.6505751627917973e-07, "loss": 0.0002, "step": 270520 }, { "epoch": 1.7797675045887251, "grad_norm": 0.010626076080736352, "learning_rate": 3.648422036874366e-07, "loss": 0.0004, "step": 270530 }, { "epoch": 1.7798332927639586, "grad_norm": 0.016085686408081817, "learning_rate": 3.6462695220624623e-07, "loss": 0.0003, "step": 270540 }, { "epoch": 1.7798990809391921, "grad_norm": 0.0644072079751092, "learning_rate": 3.644117618384463e-07, "loss": 0.0004, "step": 270550 }, { "epoch": 1.7799648691144254, "grad_norm": 0.07244559864794341, "learning_rate": 3.6419663258687345e-07, "loss": 0.0003, "step": 270560 }, { "epoch": 1.7800306572896587, "grad_norm": 0.02830888653520444, "learning_rate": 3.6398156445436427e-07, "loss": 0.0004, "step": 270570 }, { "epoch": 1.780096445464892, "grad_norm": 0.05729548066151149, "learning_rate": 3.6376655744375435e-07, "loss": 0.001, "step": 270580 }, { "epoch": 1.7801622336401255, "grad_norm": 0.0030914634726264614, "learning_rate": 3.63551611557878e-07, "loss": 0.0007, "step": 270590 }, { "epoch": 1.780228021815359, "grad_norm": 0.03253970143815287, "learning_rate": 3.6333672679956967e-07, "loss": 0.0003, "step": 270600 }, { "epoch": 1.7802938099905923, "grad_norm": 0.030081140435400156, "learning_rate": 3.631219031716621e-07, "loss": 0.0003, "step": 270610 }, { "epoch": 1.7803595981658256, "grad_norm": 0.023416045166664817, "learning_rate": 3.6290714067698797e-07, "loss": 0.0002, "step": 270620 }, { "epoch": 1.780425386341059, "grad_norm": 0.005797205774664017, "learning_rate": 3.6269243931837785e-07, "loss": 0.0003, "step": 270630 }, { "epoch": 1.7804911745162926, "grad_norm": 0.054700161512417286, "learning_rate": 3.624777990986628e-07, "loss": 0.0005, "step": 270640 }, { "epoch": 1.7805569626915259, "grad_norm": 0.0284147066386868, "learning_rate": 3.6226322002067274e-07, "loss": 0.0002, "step": 270650 }, { "epoch": 1.7806227508667591, "grad_norm": 0.015365623045176267, "learning_rate": 3.6204870208723597e-07, "loss": 0.0006, "step": 270660 }, { "epoch": 1.7806885390419926, "grad_norm": 0.030264561997108273, "learning_rate": 3.618342453011825e-07, "loss": 0.0005, "step": 270670 }, { "epoch": 1.780754327217226, "grad_norm": 0.02618772151989963, "learning_rate": 3.6161984966533836e-07, "loss": 0.0003, "step": 270680 }, { "epoch": 1.7808201153924594, "grad_norm": 0.004845457875155288, "learning_rate": 3.6140551518253077e-07, "loss": 0.0007, "step": 270690 }, { "epoch": 1.7808859035676927, "grad_norm": 0.024632475275227873, "learning_rate": 3.611912418555852e-07, "loss": 0.0002, "step": 270700 }, { "epoch": 1.780951691742926, "grad_norm": 0.024121194988370075, "learning_rate": 3.609770296873272e-07, "loss": 0.0007, "step": 270710 }, { "epoch": 1.7810174799181595, "grad_norm": 0.028194165271438615, "learning_rate": 3.6076287868058003e-07, "loss": 0.0004, "step": 270720 }, { "epoch": 1.781083268093393, "grad_norm": 0.0020980135437651745, "learning_rate": 3.6054878883816756e-07, "loss": 0.0014, "step": 270730 }, { "epoch": 1.7811490562686263, "grad_norm": 0.0007339326888282685, "learning_rate": 3.6033476016291255e-07, "loss": 0.0004, "step": 270740 }, { "epoch": 1.7812148444438596, "grad_norm": 0.00945118516445528, "learning_rate": 3.601207926576367e-07, "loss": 0.0019, "step": 270750 }, { "epoch": 1.781280632619093, "grad_norm": 0.08445711142187441, "learning_rate": 3.599068863251609e-07, "loss": 0.0003, "step": 270760 }, { "epoch": 1.7813464207943266, "grad_norm": 0.01843002237187475, "learning_rate": 3.5969304116830585e-07, "loss": 0.0007, "step": 270770 }, { "epoch": 1.7814122089695599, "grad_norm": 0.004424955886391489, "learning_rate": 3.5947925718988974e-07, "loss": 0.0004, "step": 270780 }, { "epoch": 1.7814779971447932, "grad_norm": 0.006222819180342796, "learning_rate": 3.592655343927309e-07, "loss": 0.0001, "step": 270790 }, { "epoch": 1.7815437853200264, "grad_norm": 0.02318064771379241, "learning_rate": 3.5905187277964935e-07, "loss": 0.0006, "step": 270800 }, { "epoch": 1.78160957349526, "grad_norm": 0.017022243472643195, "learning_rate": 3.588382723534606e-07, "loss": 0.0003, "step": 270810 }, { "epoch": 1.7816753616704935, "grad_norm": 0.016149828955034775, "learning_rate": 3.5862473311698063e-07, "loss": 0.0005, "step": 270820 }, { "epoch": 1.7817411498457267, "grad_norm": 0.07950131371120372, "learning_rate": 3.584112550730251e-07, "loss": 0.0007, "step": 270830 }, { "epoch": 1.78180693802096, "grad_norm": 0.02291607458991033, "learning_rate": 3.5819783822440837e-07, "loss": 0.0009, "step": 270840 }, { "epoch": 1.7818727261961935, "grad_norm": 0.022690155098786736, "learning_rate": 3.5798448257394427e-07, "loss": 0.0002, "step": 270850 }, { "epoch": 1.781938514371427, "grad_norm": 0.022987566104177425, "learning_rate": 3.577711881244456e-07, "loss": 0.0005, "step": 270860 }, { "epoch": 1.7820043025466603, "grad_norm": 0.015642349503947908, "learning_rate": 3.5755795487872444e-07, "loss": 0.0014, "step": 270870 }, { "epoch": 1.7820700907218936, "grad_norm": 0.04979455034555575, "learning_rate": 3.5734478283959264e-07, "loss": 0.0006, "step": 270880 }, { "epoch": 1.7821358788971269, "grad_norm": 0.03170823884984497, "learning_rate": 3.571316720098594e-07, "loss": 0.0005, "step": 270890 }, { "epoch": 1.7822016670723604, "grad_norm": 0.0033116170063453238, "learning_rate": 3.56918622392336e-07, "loss": 0.0004, "step": 270900 }, { "epoch": 1.782267455247594, "grad_norm": 0.0161028117138718, "learning_rate": 3.5670563398983006e-07, "loss": 0.0007, "step": 270910 }, { "epoch": 1.7823332434228272, "grad_norm": 0.012297152288474496, "learning_rate": 3.5649270680514993e-07, "loss": 0.0003, "step": 270920 }, { "epoch": 1.7823990315980605, "grad_norm": 0.01450709729450558, "learning_rate": 3.5627984084110336e-07, "loss": 0.0002, "step": 270930 }, { "epoch": 1.782464819773294, "grad_norm": 0.02236278886689328, "learning_rate": 3.560670361004964e-07, "loss": 0.0004, "step": 270940 }, { "epoch": 1.7825306079485275, "grad_norm": 0.09530487357110913, "learning_rate": 3.5585429258613526e-07, "loss": 0.0004, "step": 270950 }, { "epoch": 1.7825963961237608, "grad_norm": 0.048162007601105054, "learning_rate": 3.556416103008237e-07, "loss": 0.0008, "step": 270960 }, { "epoch": 1.782662184298994, "grad_norm": 0.01763402923799089, "learning_rate": 3.554289892473667e-07, "loss": 0.0001, "step": 270970 }, { "epoch": 1.7827279724742273, "grad_norm": 0.0005269440611544193, "learning_rate": 3.552164294285676e-07, "loss": 0.0009, "step": 270980 }, { "epoch": 1.7827937606494608, "grad_norm": 0.023814578160180188, "learning_rate": 3.550039308472275e-07, "loss": 0.0003, "step": 270990 }, { "epoch": 1.7828595488246943, "grad_norm": 0.029473203841284705, "learning_rate": 3.547914935061497e-07, "loss": 0.0005, "step": 271000 }, { "epoch": 1.7829253369999276, "grad_norm": 0.06858603572539793, "learning_rate": 3.545791174081337e-07, "loss": 0.0007, "step": 271010 }, { "epoch": 1.782991125175161, "grad_norm": 0.027484464449280255, "learning_rate": 3.543668025559804e-07, "loss": 0.0004, "step": 271020 }, { "epoch": 1.7830569133503944, "grad_norm": 0.028301959188603662, "learning_rate": 3.5415454895248835e-07, "loss": 0.0007, "step": 271030 }, { "epoch": 1.783122701525628, "grad_norm": 0.033570595111099916, "learning_rate": 3.539423566004557e-07, "loss": 0.004, "step": 271040 }, { "epoch": 1.7831884897008612, "grad_norm": 0.006950246335148078, "learning_rate": 3.5373022550268033e-07, "loss": 0.0015, "step": 271050 }, { "epoch": 1.7832542778760945, "grad_norm": 0.010365952188353336, "learning_rate": 3.535181556619599e-07, "loss": 0.0005, "step": 271060 }, { "epoch": 1.783320066051328, "grad_norm": 0.048823909091253614, "learning_rate": 3.5330614708108946e-07, "loss": 0.001, "step": 271070 }, { "epoch": 1.7833858542265615, "grad_norm": 0.002895178482636728, "learning_rate": 3.5309419976286395e-07, "loss": 0.0004, "step": 271080 }, { "epoch": 1.7834516424017948, "grad_norm": 0.005739689205589105, "learning_rate": 3.528823137100784e-07, "loss": 0.0001, "step": 271090 }, { "epoch": 1.783517430577028, "grad_norm": 0.09441621124661304, "learning_rate": 3.5267048892552613e-07, "loss": 0.0005, "step": 271100 }, { "epoch": 1.7835832187522613, "grad_norm": 0.007910725316900997, "learning_rate": 3.524587254119999e-07, "loss": 0.0002, "step": 271110 }, { "epoch": 1.7836490069274948, "grad_norm": 0.017849453723559772, "learning_rate": 3.522470231722913e-07, "loss": 0.0004, "step": 271120 }, { "epoch": 1.7837147951027283, "grad_norm": 0.03004718877846681, "learning_rate": 3.520353822091915e-07, "loss": 0.0007, "step": 271130 }, { "epoch": 1.7837805832779616, "grad_norm": 0.08180445969867733, "learning_rate": 3.5182380252549163e-07, "loss": 0.001, "step": 271140 }, { "epoch": 1.783846371453195, "grad_norm": 0.040453351380081166, "learning_rate": 3.5161228412398e-07, "loss": 0.0008, "step": 271150 }, { "epoch": 1.7839121596284284, "grad_norm": 0.0038798203414904656, "learning_rate": 3.514008270074454e-07, "loss": 0.0005, "step": 271160 }, { "epoch": 1.783977947803662, "grad_norm": 0.0012165837161717762, "learning_rate": 3.5118943117867674e-07, "loss": 0.0001, "step": 271170 }, { "epoch": 1.7840437359788952, "grad_norm": 0.03402483317944019, "learning_rate": 3.5097809664045915e-07, "loss": 0.0007, "step": 271180 }, { "epoch": 1.7841095241541285, "grad_norm": 0.017764811528127023, "learning_rate": 3.507668233955813e-07, "loss": 0.0004, "step": 271190 }, { "epoch": 1.7841753123293618, "grad_norm": 0.014707457505867107, "learning_rate": 3.5055561144682724e-07, "loss": 0.0004, "step": 271200 }, { "epoch": 1.7842411005045953, "grad_norm": 0.01702564717204227, "learning_rate": 3.503444607969825e-07, "loss": 0.0005, "step": 271210 }, { "epoch": 1.7843068886798288, "grad_norm": 0.033507293766908544, "learning_rate": 3.5013337144882976e-07, "loss": 0.0007, "step": 271220 }, { "epoch": 1.784372676855062, "grad_norm": 0.0017497571493457216, "learning_rate": 3.49922343405153e-07, "loss": 0.0004, "step": 271230 }, { "epoch": 1.7844384650302954, "grad_norm": 0.021188186767792983, "learning_rate": 3.4971137666873377e-07, "loss": 0.0004, "step": 271240 }, { "epoch": 1.7845042532055289, "grad_norm": 0.02773350246004901, "learning_rate": 3.49500471242355e-07, "loss": 0.0005, "step": 271250 }, { "epoch": 1.7845700413807624, "grad_norm": 0.04058261606233748, "learning_rate": 3.492896271287949e-07, "loss": 0.0006, "step": 271260 }, { "epoch": 1.7846358295559956, "grad_norm": 0.0179785681894072, "learning_rate": 3.490788443308346e-07, "loss": 0.0017, "step": 271270 }, { "epoch": 1.784701617731229, "grad_norm": 0.07554184682627225, "learning_rate": 3.4886812285125196e-07, "loss": 0.0003, "step": 271280 }, { "epoch": 1.7847674059064622, "grad_norm": 0.00016646808010697966, "learning_rate": 3.4865746269282743e-07, "loss": 0.0002, "step": 271290 }, { "epoch": 1.7848331940816957, "grad_norm": 0.019640090618795125, "learning_rate": 3.484468638583366e-07, "loss": 0.0003, "step": 271300 }, { "epoch": 1.7848989822569292, "grad_norm": 0.033523459053066154, "learning_rate": 3.482363263505567e-07, "loss": 0.0003, "step": 271310 }, { "epoch": 1.7849647704321625, "grad_norm": 0.007467875052335212, "learning_rate": 3.480258501722633e-07, "loss": 0.0002, "step": 271320 }, { "epoch": 1.7850305586073958, "grad_norm": 0.000402720978463566, "learning_rate": 3.478154353262314e-07, "loss": 0.0004, "step": 271330 }, { "epoch": 1.7850963467826293, "grad_norm": 0.009541694840910553, "learning_rate": 3.476050818152349e-07, "loss": 0.001, "step": 271340 }, { "epoch": 1.7851621349578628, "grad_norm": 0.09176188909332293, "learning_rate": 3.4739478964204764e-07, "loss": 0.001, "step": 271350 }, { "epoch": 1.785227923133096, "grad_norm": 0.020351438119927897, "learning_rate": 3.4718455880944134e-07, "loss": 0.0008, "step": 271360 }, { "epoch": 1.7852937113083294, "grad_norm": 0.03748396050926075, "learning_rate": 3.469743893201888e-07, "loss": 0.0005, "step": 271370 }, { "epoch": 1.7853594994835629, "grad_norm": 0.004456520437998032, "learning_rate": 3.4676428117705994e-07, "loss": 0.0005, "step": 271380 }, { "epoch": 1.7854252876587962, "grad_norm": 0.003656991787542715, "learning_rate": 3.4655423438282543e-07, "loss": 0.0006, "step": 271390 }, { "epoch": 1.7854910758340297, "grad_norm": 0.014831716544742309, "learning_rate": 3.463442489402541e-07, "loss": 0.0001, "step": 271400 }, { "epoch": 1.785556864009263, "grad_norm": 0.019657208401210866, "learning_rate": 3.461343248521143e-07, "loss": 0.0004, "step": 271410 }, { "epoch": 1.7856226521844962, "grad_norm": 0.01282983505818313, "learning_rate": 3.4592446212117437e-07, "loss": 0.0003, "step": 271420 }, { "epoch": 1.7856884403597297, "grad_norm": 0.0469900517907365, "learning_rate": 3.457146607502015e-07, "loss": 0.0004, "step": 271430 }, { "epoch": 1.7857542285349632, "grad_norm": 0.012394498202800796, "learning_rate": 3.4550492074196086e-07, "loss": 0.0004, "step": 271440 }, { "epoch": 1.7858200167101965, "grad_norm": 0.029490143683577903, "learning_rate": 3.4529524209921784e-07, "loss": 0.0004, "step": 271450 }, { "epoch": 1.7858858048854298, "grad_norm": 0.024214855132634255, "learning_rate": 3.4508562482473696e-07, "loss": 0.0002, "step": 271460 }, { "epoch": 1.7859515930606633, "grad_norm": 0.013659435839792905, "learning_rate": 3.4487606892128156e-07, "loss": 0.0008, "step": 271470 }, { "epoch": 1.7860173812358968, "grad_norm": 0.05000232155975972, "learning_rate": 3.4466657439161556e-07, "loss": 0.0005, "step": 271480 }, { "epoch": 1.78608316941113, "grad_norm": 0.002465472728144118, "learning_rate": 3.444571412384995e-07, "loss": 0.0005, "step": 271490 }, { "epoch": 1.7861489575863634, "grad_norm": 0.024324389732080252, "learning_rate": 3.4424776946469565e-07, "loss": 0.0004, "step": 271500 }, { "epoch": 1.7862147457615967, "grad_norm": 0.004663937650442869, "learning_rate": 3.4403845907296397e-07, "loss": 0.0007, "step": 271510 }, { "epoch": 1.7862805339368302, "grad_norm": 0.0873085773520374, "learning_rate": 3.438292100660634e-07, "loss": 0.0004, "step": 271520 }, { "epoch": 1.7863463221120637, "grad_norm": 0.0032165730688639817, "learning_rate": 3.4362002244675395e-07, "loss": 0.0003, "step": 271530 }, { "epoch": 1.786412110287297, "grad_norm": 0.02022240606983994, "learning_rate": 3.4341089621779234e-07, "loss": 0.0001, "step": 271540 }, { "epoch": 1.7864778984625302, "grad_norm": 0.028783288950106668, "learning_rate": 3.4320183138193687e-07, "loss": 0.0013, "step": 271550 }, { "epoch": 1.7865436866377638, "grad_norm": 0.0012621868830441211, "learning_rate": 3.4299282794194364e-07, "loss": 0.001, "step": 271560 }, { "epoch": 1.7866094748129973, "grad_norm": 0.006899765948061616, "learning_rate": 3.4278388590056777e-07, "loss": 0.0005, "step": 271570 }, { "epoch": 1.7866752629882305, "grad_norm": 0.028200766348589833, "learning_rate": 3.4257500526056367e-07, "loss": 0.0003, "step": 271580 }, { "epoch": 1.7867410511634638, "grad_norm": 0.008412265768198262, "learning_rate": 3.423661860246863e-07, "loss": 0.0002, "step": 271590 }, { "epoch": 1.786806839338697, "grad_norm": 0.011739147278652265, "learning_rate": 3.42157428195688e-07, "loss": 0.0012, "step": 271600 }, { "epoch": 1.7868726275139306, "grad_norm": 0.12993111666529245, "learning_rate": 3.419487317763215e-07, "loss": 0.0003, "step": 271610 }, { "epoch": 1.7869384156891641, "grad_norm": 0.015484552022352608, "learning_rate": 3.417400967693374e-07, "loss": 0.0006, "step": 271620 }, { "epoch": 1.7870042038643974, "grad_norm": 0.029584592222322924, "learning_rate": 3.4153152317748737e-07, "loss": 0.0004, "step": 271630 }, { "epoch": 1.7870699920396307, "grad_norm": 0.014974044780265216, "learning_rate": 3.4132301100352096e-07, "loss": 0.0003, "step": 271640 }, { "epoch": 1.7871357802148642, "grad_norm": 0.022636137572248267, "learning_rate": 3.4111456025018694e-07, "loss": 0.0003, "step": 271650 }, { "epoch": 1.7872015683900977, "grad_norm": 0.0053036015501645335, "learning_rate": 3.4090617092023383e-07, "loss": 0.0003, "step": 271660 }, { "epoch": 1.787267356565331, "grad_norm": 0.028777482658578754, "learning_rate": 3.406978430164082e-07, "loss": 0.0005, "step": 271670 }, { "epoch": 1.7873331447405643, "grad_norm": 0.0008150670768876647, "learning_rate": 3.404895765414584e-07, "loss": 0.0004, "step": 271680 }, { "epoch": 1.7873989329157978, "grad_norm": 0.010083235221325993, "learning_rate": 3.4028137149812957e-07, "loss": 0.0005, "step": 271690 }, { "epoch": 1.787464721091031, "grad_norm": 0.004954969131523291, "learning_rate": 3.4007322788916607e-07, "loss": 0.0003, "step": 271700 }, { "epoch": 1.7875305092662646, "grad_norm": 0.030280239211679216, "learning_rate": 3.3986514571731245e-07, "loss": 0.0006, "step": 271710 }, { "epoch": 1.7875962974414978, "grad_norm": 0.04915618592105795, "learning_rate": 3.39657124985312e-07, "loss": 0.0004, "step": 271720 }, { "epoch": 1.7876620856167311, "grad_norm": 0.006001574541789668, "learning_rate": 3.394491656959076e-07, "loss": 0.0003, "step": 271730 }, { "epoch": 1.7877278737919646, "grad_norm": 0.14871949877986199, "learning_rate": 3.3924126785184087e-07, "loss": 0.0008, "step": 271740 }, { "epoch": 1.7877936619671981, "grad_norm": 0.0711508555603275, "learning_rate": 3.39033431455853e-07, "loss": 0.0006, "step": 271750 }, { "epoch": 1.7878594501424314, "grad_norm": 0.020745412840658735, "learning_rate": 3.3882565651068346e-07, "loss": 0.0001, "step": 271760 }, { "epoch": 1.7879252383176647, "grad_norm": 0.015451051122985894, "learning_rate": 3.3861794301907227e-07, "loss": 0.0003, "step": 271770 }, { "epoch": 1.7879910264928982, "grad_norm": 0.0025631548131122926, "learning_rate": 3.384102909837572e-07, "loss": 0.0002, "step": 271780 }, { "epoch": 1.7880568146681317, "grad_norm": 0.01185759787646357, "learning_rate": 3.3820270040747674e-07, "loss": 0.0006, "step": 271790 }, { "epoch": 1.788122602843365, "grad_norm": 0.024089371074746017, "learning_rate": 3.379951712929669e-07, "loss": 0.0004, "step": 271800 }, { "epoch": 1.7881883910185983, "grad_norm": 0.03796672564340467, "learning_rate": 3.3778770364296496e-07, "loss": 0.0003, "step": 271810 }, { "epoch": 1.7882541791938316, "grad_norm": 0.014638594652248956, "learning_rate": 3.375802974602055e-07, "loss": 0.0005, "step": 271820 }, { "epoch": 1.788319967369065, "grad_norm": 0.0009956371899098595, "learning_rate": 3.3737295274742343e-07, "loss": 0.0005, "step": 271830 }, { "epoch": 1.7883857555442986, "grad_norm": 0.02047819677674205, "learning_rate": 3.371656695073516e-07, "loss": 0.0002, "step": 271840 }, { "epoch": 1.7884515437195319, "grad_norm": 0.0068785579520225845, "learning_rate": 3.369584477427235e-07, "loss": 0.0005, "step": 271850 }, { "epoch": 1.7885173318947651, "grad_norm": 8.269884122211143e-05, "learning_rate": 3.367512874562712e-07, "loss": 0.0002, "step": 271860 }, { "epoch": 1.7885831200699986, "grad_norm": 0.00035603723230237007, "learning_rate": 3.3654418865072535e-07, "loss": 0.0003, "step": 271870 }, { "epoch": 1.7886489082452321, "grad_norm": 0.00612116241937708, "learning_rate": 3.363371513288172e-07, "loss": 0.0004, "step": 271880 }, { "epoch": 1.7887146964204654, "grad_norm": 0.013952050761163345, "learning_rate": 3.3613017549327555e-07, "loss": 0.0006, "step": 271890 }, { "epoch": 1.7887804845956987, "grad_norm": 0.04479634265229968, "learning_rate": 3.3592326114682996e-07, "loss": 0.0005, "step": 271900 }, { "epoch": 1.788846272770932, "grad_norm": 0.012254746809806571, "learning_rate": 3.3571640829220766e-07, "loss": 0.0002, "step": 271910 }, { "epoch": 1.7889120609461655, "grad_norm": 0.046244037522527176, "learning_rate": 3.355096169321359e-07, "loss": 0.0003, "step": 271920 }, { "epoch": 1.788977849121399, "grad_norm": 0.011572147838899875, "learning_rate": 3.353028870693409e-07, "loss": 0.0006, "step": 271930 }, { "epoch": 1.7890436372966323, "grad_norm": 0.0033792365037347847, "learning_rate": 3.3509621870654985e-07, "loss": 0.0003, "step": 271940 }, { "epoch": 1.7891094254718656, "grad_norm": 0.020611097724363515, "learning_rate": 3.3488961184648563e-07, "loss": 0.0002, "step": 271950 }, { "epoch": 1.789175213647099, "grad_norm": 0.009645780425263551, "learning_rate": 3.3468306649187323e-07, "loss": 0.0013, "step": 271960 }, { "epoch": 1.7892410018223326, "grad_norm": 0.01149352164063852, "learning_rate": 3.3447658264543493e-07, "loss": 0.0006, "step": 271970 }, { "epoch": 1.7893067899975659, "grad_norm": 0.08428723417506977, "learning_rate": 3.3427016030989355e-07, "loss": 0.001, "step": 271980 }, { "epoch": 1.7893725781727992, "grad_norm": 0.0002699863957682458, "learning_rate": 3.3406379948797084e-07, "loss": 0.0003, "step": 271990 }, { "epoch": 1.7894383663480324, "grad_norm": 0.04120325674757337, "learning_rate": 3.338575001823874e-07, "loss": 0.0008, "step": 272000 }, { "epoch": 1.789504154523266, "grad_norm": 0.021909037483534722, "learning_rate": 3.336512623958621e-07, "loss": 0.0004, "step": 272010 }, { "epoch": 1.7895699426984994, "grad_norm": 0.020771109095310627, "learning_rate": 3.334450861311156e-07, "loss": 0.0002, "step": 272020 }, { "epoch": 1.7896357308737327, "grad_norm": 0.02440154782093082, "learning_rate": 3.332389713908646e-07, "loss": 0.001, "step": 272030 }, { "epoch": 1.789701519048966, "grad_norm": 0.035053842568416475, "learning_rate": 3.330329181778275e-07, "loss": 0.0004, "step": 272040 }, { "epoch": 1.7897673072241995, "grad_norm": 0.0058776789949926906, "learning_rate": 3.3282692649472094e-07, "loss": 0.0004, "step": 272050 }, { "epoch": 1.789833095399433, "grad_norm": 0.02095317386011819, "learning_rate": 3.326209963442595e-07, "loss": 0.0005, "step": 272060 }, { "epoch": 1.7898988835746663, "grad_norm": 0.012396663780486976, "learning_rate": 3.324151277291604e-07, "loss": 0.0002, "step": 272070 }, { "epoch": 1.7899646717498996, "grad_norm": 0.04954432473453912, "learning_rate": 3.3220932065213596e-07, "loss": 0.0003, "step": 272080 }, { "epoch": 1.790030459925133, "grad_norm": 0.000574771412765235, "learning_rate": 3.3200357511590064e-07, "loss": 0.0002, "step": 272090 }, { "epoch": 1.7900962481003666, "grad_norm": 0.01700873857625789, "learning_rate": 3.3179789112316676e-07, "loss": 0.0009, "step": 272100 }, { "epoch": 1.7901620362755999, "grad_norm": 0.018945825989497553, "learning_rate": 3.3159226867664596e-07, "loss": 0.0003, "step": 272110 }, { "epoch": 1.7902278244508332, "grad_norm": 0.005645582642498654, "learning_rate": 3.313867077790489e-07, "loss": 0.0006, "step": 272120 }, { "epoch": 1.7902936126260665, "grad_norm": 0.011973059323648566, "learning_rate": 3.3118120843308675e-07, "loss": 0.0007, "step": 272130 }, { "epoch": 1.7903594008013, "grad_norm": 0.01733960910470312, "learning_rate": 3.3097577064146734e-07, "loss": 0.0015, "step": 272140 }, { "epoch": 1.7904251889765335, "grad_norm": 0.04334888524245949, "learning_rate": 3.3077039440690017e-07, "loss": 0.0002, "step": 272150 }, { "epoch": 1.7904909771517667, "grad_norm": 0.013599578307101194, "learning_rate": 3.305650797320925e-07, "loss": 0.0004, "step": 272160 }, { "epoch": 1.790556765327, "grad_norm": 0.02419989370006052, "learning_rate": 3.303598266197522e-07, "loss": 0.0004, "step": 272170 }, { "epoch": 1.7906225535022335, "grad_norm": 0.02288218495243548, "learning_rate": 3.3015463507258374e-07, "loss": 0.0003, "step": 272180 }, { "epoch": 1.790688341677467, "grad_norm": 0.03030858551854728, "learning_rate": 3.299495050932933e-07, "loss": 0.0003, "step": 272190 }, { "epoch": 1.7907541298527003, "grad_norm": 0.04174880579543924, "learning_rate": 3.2974443668458535e-07, "loss": 0.0006, "step": 272200 }, { "epoch": 1.7908199180279336, "grad_norm": 0.014842959747486026, "learning_rate": 3.295394298491639e-07, "loss": 0.0006, "step": 272210 }, { "epoch": 1.790885706203167, "grad_norm": 0.016986531671284822, "learning_rate": 3.293344845897317e-07, "loss": 0.0003, "step": 272220 }, { "epoch": 1.7909514943784004, "grad_norm": 0.02030523798967184, "learning_rate": 3.2912960090899003e-07, "loss": 0.0004, "step": 272230 }, { "epoch": 1.791017282553634, "grad_norm": 0.014991036249877788, "learning_rate": 3.289247788096406e-07, "loss": 0.0004, "step": 272240 }, { "epoch": 1.7910830707288672, "grad_norm": 0.03142878577657542, "learning_rate": 3.287200182943839e-07, "loss": 0.0003, "step": 272250 }, { "epoch": 1.7911488589041005, "grad_norm": 0.0722684737660875, "learning_rate": 3.2851531936591907e-07, "loss": 0.0006, "step": 272260 }, { "epoch": 1.791214647079334, "grad_norm": 0.0036731666925957315, "learning_rate": 3.283106820269455e-07, "loss": 0.0006, "step": 272270 }, { "epoch": 1.7912804352545675, "grad_norm": 0.004937152661127438, "learning_rate": 3.281061062801605e-07, "loss": 0.0003, "step": 272280 }, { "epoch": 1.7913462234298008, "grad_norm": 0.00016972955203446846, "learning_rate": 3.279015921282619e-07, "loss": 0.0003, "step": 272290 }, { "epoch": 1.791412011605034, "grad_norm": 0.057773540892552454, "learning_rate": 3.2769713957394587e-07, "loss": 0.0006, "step": 272300 }, { "epoch": 1.7914777997802673, "grad_norm": 0.021303644737446014, "learning_rate": 3.2749274861990757e-07, "loss": 0.0003, "step": 272310 }, { "epoch": 1.7915435879555008, "grad_norm": 0.0016344882073266692, "learning_rate": 3.272884192688408e-07, "loss": 0.0004, "step": 272320 }, { "epoch": 1.7916093761307343, "grad_norm": 0.0019137207876245086, "learning_rate": 3.2708415152344186e-07, "loss": 0.0012, "step": 272330 }, { "epoch": 1.7916751643059676, "grad_norm": 0.00021083933669854466, "learning_rate": 3.2687994538640244e-07, "loss": 0.0003, "step": 272340 }, { "epoch": 1.791740952481201, "grad_norm": 0.00785728869625808, "learning_rate": 3.266758008604154e-07, "loss": 0.0005, "step": 272350 }, { "epoch": 1.7918067406564344, "grad_norm": 0.0035486172966644716, "learning_rate": 3.2647171794817135e-07, "loss": 0.0003, "step": 272360 }, { "epoch": 1.791872528831668, "grad_norm": 0.006116679475348911, "learning_rate": 3.2626769665236093e-07, "loss": 0.0004, "step": 272370 }, { "epoch": 1.7919383170069012, "grad_norm": 0.0006786670629584059, "learning_rate": 3.260637369756753e-07, "loss": 0.0004, "step": 272380 }, { "epoch": 1.7920041051821345, "grad_norm": 0.02865022672371325, "learning_rate": 3.258598389208023e-07, "loss": 0.0006, "step": 272390 }, { "epoch": 1.792069893357368, "grad_norm": 0.002852687056611487, "learning_rate": 3.256560024904304e-07, "loss": 0.0005, "step": 272400 }, { "epoch": 1.7921356815326013, "grad_norm": 0.009190960703629106, "learning_rate": 3.254522276872468e-07, "loss": 0.0002, "step": 272410 }, { "epoch": 1.7922014697078348, "grad_norm": 0.013304744644996902, "learning_rate": 3.252485145139389e-07, "loss": 0.0003, "step": 272420 }, { "epoch": 1.792267257883068, "grad_norm": 0.0007614697418969956, "learning_rate": 3.250448629731917e-07, "loss": 0.0002, "step": 272430 }, { "epoch": 1.7923330460583013, "grad_norm": 0.04344388843063955, "learning_rate": 3.248412730676903e-07, "loss": 0.0004, "step": 272440 }, { "epoch": 1.7923988342335349, "grad_norm": 0.029629409691821998, "learning_rate": 3.246377448001181e-07, "loss": 0.0003, "step": 272450 }, { "epoch": 1.7924646224087684, "grad_norm": 0.00015093007330825646, "learning_rate": 3.244342781731602e-07, "loss": 0.0004, "step": 272460 }, { "epoch": 1.7925304105840016, "grad_norm": 0.005367423851075881, "learning_rate": 3.242308731894989e-07, "loss": 0.0003, "step": 272470 }, { "epoch": 1.792596198759235, "grad_norm": 0.08378722213248613, "learning_rate": 3.240275298518147e-07, "loss": 0.0005, "step": 272480 }, { "epoch": 1.7926619869344684, "grad_norm": 0.10790638068498157, "learning_rate": 3.23824248162789e-07, "loss": 0.0017, "step": 272490 }, { "epoch": 1.792727775109702, "grad_norm": 0.011569345314782861, "learning_rate": 3.236210281251023e-07, "loss": 0.0009, "step": 272500 }, { "epoch": 1.7927935632849352, "grad_norm": 0.0458618807910364, "learning_rate": 3.2341786974143305e-07, "loss": 0.0005, "step": 272510 }, { "epoch": 1.7928593514601685, "grad_norm": 0.02635454660381734, "learning_rate": 3.232147730144608e-07, "loss": 0.0006, "step": 272520 }, { "epoch": 1.7929251396354018, "grad_norm": 0.05954272766851975, "learning_rate": 3.2301173794686227e-07, "loss": 0.0011, "step": 272530 }, { "epoch": 1.7929909278106353, "grad_norm": 0.10524029364990335, "learning_rate": 3.228087645413147e-07, "loss": 0.0009, "step": 272540 }, { "epoch": 1.7930567159858688, "grad_norm": 0.0075862210006637735, "learning_rate": 3.226058528004944e-07, "loss": 0.0008, "step": 272550 }, { "epoch": 1.793122504161102, "grad_norm": 0.008484731991579965, "learning_rate": 3.224030027270758e-07, "loss": 0.0005, "step": 272560 }, { "epoch": 1.7931882923363354, "grad_norm": 0.024964946601318552, "learning_rate": 3.2220021432373403e-07, "loss": 0.0004, "step": 272570 }, { "epoch": 1.7932540805115689, "grad_norm": 0.05601796687375045, "learning_rate": 3.2199748759314196e-07, "loss": 0.0004, "step": 272580 }, { "epoch": 1.7933198686868024, "grad_norm": 0.013192662503638127, "learning_rate": 3.21794822537973e-07, "loss": 0.0012, "step": 272590 }, { "epoch": 1.7933856568620357, "grad_norm": 0.006287654505404646, "learning_rate": 3.215922191608994e-07, "loss": 0.0003, "step": 272600 }, { "epoch": 1.793451445037269, "grad_norm": 0.00023379765312112353, "learning_rate": 3.2138967746459195e-07, "loss": 0.0003, "step": 272610 }, { "epoch": 1.7935172332125022, "grad_norm": 0.04395562645762504, "learning_rate": 3.211871974517206e-07, "loss": 0.0004, "step": 272620 }, { "epoch": 1.7935830213877357, "grad_norm": 0.013808343370058436, "learning_rate": 3.2098477912495486e-07, "loss": 0.0006, "step": 272630 }, { "epoch": 1.7936488095629692, "grad_norm": 0.012843025394333534, "learning_rate": 3.2078242248696437e-07, "loss": 0.0008, "step": 272640 }, { "epoch": 1.7937145977382025, "grad_norm": 0.0335262628629113, "learning_rate": 3.2058012754041577e-07, "loss": 0.0006, "step": 272650 }, { "epoch": 1.7937803859134358, "grad_norm": 0.0961274712255659, "learning_rate": 3.2037789428797707e-07, "loss": 0.0011, "step": 272660 }, { "epoch": 1.7938461740886693, "grad_norm": 0.09274417044815517, "learning_rate": 3.2017572273231434e-07, "loss": 0.0005, "step": 272670 }, { "epoch": 1.7939119622639028, "grad_norm": 0.01653151762030046, "learning_rate": 3.199736128760933e-07, "loss": 0.0007, "step": 272680 }, { "epoch": 1.793977750439136, "grad_norm": 0.023743725873486388, "learning_rate": 3.197715647219779e-07, "loss": 0.0006, "step": 272690 }, { "epoch": 1.7940435386143694, "grad_norm": 0.03286295698096516, "learning_rate": 3.195695782726321e-07, "loss": 0.0007, "step": 272700 }, { "epoch": 1.7941093267896029, "grad_norm": 0.0029210284258577718, "learning_rate": 3.193676535307183e-07, "loss": 0.0004, "step": 272710 }, { "epoch": 1.7941751149648362, "grad_norm": 0.011346546869752328, "learning_rate": 3.191657904989004e-07, "loss": 0.0003, "step": 272720 }, { "epoch": 1.7942409031400697, "grad_norm": 0.015338344339332403, "learning_rate": 3.189639891798391e-07, "loss": 0.0007, "step": 272730 }, { "epoch": 1.794306691315303, "grad_norm": 0.0002769399257848024, "learning_rate": 3.1876224957619494e-07, "loss": 0.0003, "step": 272740 }, { "epoch": 1.7943724794905362, "grad_norm": 0.00810732908129836, "learning_rate": 3.1856057169062706e-07, "loss": 0.0006, "step": 272750 }, { "epoch": 1.7944382676657697, "grad_norm": 0.003158677684990253, "learning_rate": 3.1835895552579546e-07, "loss": 0.0006, "step": 272760 }, { "epoch": 1.7945040558410033, "grad_norm": 0.04863522209062619, "learning_rate": 3.181574010843569e-07, "loss": 0.0002, "step": 272770 }, { "epoch": 1.7945698440162365, "grad_norm": 0.034626163032786025, "learning_rate": 3.179559083689704e-07, "loss": 0.0003, "step": 272780 }, { "epoch": 1.7946356321914698, "grad_norm": 0.028676181015627526, "learning_rate": 3.1775447738229105e-07, "loss": 0.0003, "step": 272790 }, { "epoch": 1.7947014203667033, "grad_norm": 0.06914954746621844, "learning_rate": 3.1755310812697504e-07, "loss": 0.0004, "step": 272800 }, { "epoch": 1.7947672085419368, "grad_norm": 0.017376624375850833, "learning_rate": 3.173518006056775e-07, "loss": 0.0003, "step": 272810 }, { "epoch": 1.7948329967171701, "grad_norm": 0.034083269921725584, "learning_rate": 3.171505548210524e-07, "loss": 0.0006, "step": 272820 }, { "epoch": 1.7948987848924034, "grad_norm": 0.03060395102230482, "learning_rate": 3.169493707757526e-07, "loss": 0.0004, "step": 272830 }, { "epoch": 1.7949645730676367, "grad_norm": 0.005009657414781426, "learning_rate": 3.167482484724299e-07, "loss": 0.0004, "step": 272840 }, { "epoch": 1.7950303612428702, "grad_norm": 0.02888255309452428, "learning_rate": 3.165471879137377e-07, "loss": 0.0008, "step": 272850 }, { "epoch": 1.7950961494181037, "grad_norm": 0.016310958828789086, "learning_rate": 3.163461891023262e-07, "loss": 0.0006, "step": 272860 }, { "epoch": 1.795161937593337, "grad_norm": 0.022575586058511017, "learning_rate": 3.161452520408448e-07, "loss": 0.0004, "step": 272870 }, { "epoch": 1.7952277257685703, "grad_norm": 0.026988134049553185, "learning_rate": 3.1594437673194314e-07, "loss": 0.0009, "step": 272880 }, { "epoch": 1.7952935139438038, "grad_norm": 0.0034559297790388178, "learning_rate": 3.1574356317826904e-07, "loss": 0.0004, "step": 272890 }, { "epoch": 1.7953593021190373, "grad_norm": 0.03225440123085167, "learning_rate": 3.15542811382471e-07, "loss": 0.0007, "step": 272900 }, { "epoch": 1.7954250902942706, "grad_norm": 0.0868832740179074, "learning_rate": 3.1534212134719524e-07, "loss": 0.0004, "step": 272910 }, { "epoch": 1.7954908784695038, "grad_norm": 0.054012697591265126, "learning_rate": 3.1514149307508737e-07, "loss": 0.0006, "step": 272920 }, { "epoch": 1.7955566666447371, "grad_norm": 0.028716600174877437, "learning_rate": 3.1494092656879306e-07, "loss": 0.0006, "step": 272930 }, { "epoch": 1.7956224548199706, "grad_norm": 0.00904984079045429, "learning_rate": 3.1474042183095633e-07, "loss": 0.0003, "step": 272940 }, { "epoch": 1.7956882429952041, "grad_norm": 0.030683560378600535, "learning_rate": 3.145399788642206e-07, "loss": 0.0006, "step": 272950 }, { "epoch": 1.7957540311704374, "grad_norm": 0.03537467491543059, "learning_rate": 3.143395976712288e-07, "loss": 0.0005, "step": 272960 }, { "epoch": 1.7958198193456707, "grad_norm": 0.015570554286178767, "learning_rate": 3.1413927825462155e-07, "loss": 0.0004, "step": 272970 }, { "epoch": 1.7958856075209042, "grad_norm": 0.06003499399018763, "learning_rate": 3.1393902061704175e-07, "loss": 0.0003, "step": 272980 }, { "epoch": 1.7959513956961377, "grad_norm": 0.008695190055507503, "learning_rate": 3.1373882476112895e-07, "loss": 0.0002, "step": 272990 }, { "epoch": 1.796017183871371, "grad_norm": 0.006164999674885059, "learning_rate": 3.1353869068952214e-07, "loss": 0.0005, "step": 273000 }, { "epoch": 1.7960829720466043, "grad_norm": 0.00012786904031919077, "learning_rate": 3.1333861840486093e-07, "loss": 0.0003, "step": 273010 }, { "epoch": 1.7961487602218378, "grad_norm": 0.01894267350932179, "learning_rate": 3.1313860790978145e-07, "loss": 0.0005, "step": 273020 }, { "epoch": 1.796214548397071, "grad_norm": 0.04071757893337256, "learning_rate": 3.129386592069222e-07, "loss": 0.0003, "step": 273030 }, { "epoch": 1.7962803365723046, "grad_norm": 0.04569004582104795, "learning_rate": 3.127387722989184e-07, "loss": 0.0004, "step": 273040 }, { "epoch": 1.7963461247475379, "grad_norm": 0.027760660731370332, "learning_rate": 3.12538947188406e-07, "loss": 0.0014, "step": 273050 }, { "epoch": 1.7964119129227711, "grad_norm": 0.024592158302197537, "learning_rate": 3.1233918387801874e-07, "loss": 0.0002, "step": 273060 }, { "epoch": 1.7964777010980046, "grad_norm": 0.0076019149505735205, "learning_rate": 3.121394823703916e-07, "loss": 0.0003, "step": 273070 }, { "epoch": 1.7965434892732381, "grad_norm": 0.006673282794288715, "learning_rate": 3.119398426681558e-07, "loss": 0.0004, "step": 273080 }, { "epoch": 1.7966092774484714, "grad_norm": 0.022048950893278647, "learning_rate": 3.117402647739448e-07, "loss": 0.0003, "step": 273090 }, { "epoch": 1.7966750656237047, "grad_norm": 0.02224115097382336, "learning_rate": 3.1154074869038877e-07, "loss": 0.0003, "step": 273100 }, { "epoch": 1.7967408537989382, "grad_norm": 0.0014005433140362037, "learning_rate": 3.1134129442011883e-07, "loss": 0.0004, "step": 273110 }, { "epoch": 1.7968066419741717, "grad_norm": 0.01614535096663922, "learning_rate": 3.111419019657652e-07, "loss": 0.0006, "step": 273120 }, { "epoch": 1.796872430149405, "grad_norm": 0.0018283741178818037, "learning_rate": 3.1094257132995577e-07, "loss": 0.0003, "step": 273130 }, { "epoch": 1.7969382183246383, "grad_norm": 0.002573227201296641, "learning_rate": 3.107433025153189e-07, "loss": 0.0002, "step": 273140 }, { "epoch": 1.7970040064998716, "grad_norm": 0.016559131387236182, "learning_rate": 3.105440955244826e-07, "loss": 0.0003, "step": 273150 }, { "epoch": 1.797069794675105, "grad_norm": 0.024801489700521033, "learning_rate": 3.103449503600708e-07, "loss": 0.0002, "step": 273160 }, { "epoch": 1.7971355828503386, "grad_norm": 0.04249903036366939, "learning_rate": 3.101458670247115e-07, "loss": 0.0003, "step": 273170 }, { "epoch": 1.7972013710255719, "grad_norm": 0.013558945609507687, "learning_rate": 3.0994684552102805e-07, "loss": 0.0003, "step": 273180 }, { "epoch": 1.7972671592008052, "grad_norm": 0.0038652187163411844, "learning_rate": 3.097478858516451e-07, "loss": 0.0002, "step": 273190 }, { "epoch": 1.7973329473760387, "grad_norm": 0.01719681249902286, "learning_rate": 3.0954898801918487e-07, "loss": 0.0004, "step": 273200 }, { "epoch": 1.7973987355512722, "grad_norm": 0.0006188862777525735, "learning_rate": 3.093501520262704e-07, "loss": 0.0001, "step": 273210 }, { "epoch": 1.7974645237265054, "grad_norm": 0.011580919837106086, "learning_rate": 3.091513778755234e-07, "loss": 0.0003, "step": 273220 }, { "epoch": 1.7975303119017387, "grad_norm": 0.012360987431466679, "learning_rate": 3.0895266556956407e-07, "loss": 0.0002, "step": 273230 }, { "epoch": 1.797596100076972, "grad_norm": 0.02200353070846976, "learning_rate": 3.087540151110124e-07, "loss": 0.0007, "step": 273240 }, { "epoch": 1.7976618882522055, "grad_norm": 0.02983451838011991, "learning_rate": 3.085554265024876e-07, "loss": 0.0003, "step": 273250 }, { "epoch": 1.797727676427439, "grad_norm": 0.020939027903325608, "learning_rate": 3.083568997466074e-07, "loss": 0.0003, "step": 273260 }, { "epoch": 1.7977934646026723, "grad_norm": 0.03796218760935488, "learning_rate": 3.0815843484598984e-07, "loss": 0.0005, "step": 273270 }, { "epoch": 1.7978592527779056, "grad_norm": 0.040403542240846385, "learning_rate": 3.0796003180325105e-07, "loss": 0.0008, "step": 273280 }, { "epoch": 1.797925040953139, "grad_norm": 0.008391013321282968, "learning_rate": 3.077616906210074e-07, "loss": 0.0002, "step": 273290 }, { "epoch": 1.7979908291283726, "grad_norm": 0.010574246806382514, "learning_rate": 3.075634113018727e-07, "loss": 0.0003, "step": 273300 }, { "epoch": 1.7980566173036059, "grad_norm": 0.023726552197008456, "learning_rate": 3.0736519384846233e-07, "loss": 0.0004, "step": 273310 }, { "epoch": 1.7981224054788392, "grad_norm": 0.00023275661788383438, "learning_rate": 3.0716703826338846e-07, "loss": 0.0003, "step": 273320 }, { "epoch": 1.7981881936540725, "grad_norm": 0.04115561657413222, "learning_rate": 3.0696894454926464e-07, "loss": 0.0003, "step": 273330 }, { "epoch": 1.798253981829306, "grad_norm": 0.041080692067702575, "learning_rate": 3.0677091270870094e-07, "loss": 0.0005, "step": 273340 }, { "epoch": 1.7983197700045395, "grad_norm": 0.05410414741786746, "learning_rate": 3.0657294274431035e-07, "loss": 0.0005, "step": 273350 }, { "epoch": 1.7983855581797727, "grad_norm": 0.01012900982307352, "learning_rate": 3.0637503465870243e-07, "loss": 0.0009, "step": 273360 }, { "epoch": 1.798451346355006, "grad_norm": 0.0003483633194704064, "learning_rate": 3.061771884544856e-07, "loss": 0.0006, "step": 273370 }, { "epoch": 1.7985171345302395, "grad_norm": 0.04623346683547206, "learning_rate": 3.0597940413426896e-07, "loss": 0.0004, "step": 273380 }, { "epoch": 1.798582922705473, "grad_norm": 0.045246575621951904, "learning_rate": 3.0578168170065923e-07, "loss": 0.0003, "step": 273390 }, { "epoch": 1.7986487108807063, "grad_norm": 0.00033759654597879024, "learning_rate": 3.055840211562644e-07, "loss": 0.0004, "step": 273400 }, { "epoch": 1.7987144990559396, "grad_norm": 0.01896644554894429, "learning_rate": 3.0538642250368957e-07, "loss": 0.0006, "step": 273410 }, { "epoch": 1.7987802872311731, "grad_norm": 0.00998676141295234, "learning_rate": 3.051888857455404e-07, "loss": 0.0007, "step": 273420 }, { "epoch": 1.7988460754064064, "grad_norm": 0.008262596112469845, "learning_rate": 3.049914108844215e-07, "loss": 0.0006, "step": 273430 }, { "epoch": 1.79891186358164, "grad_norm": 0.020774454159389947, "learning_rate": 3.047939979229353e-07, "loss": 0.0004, "step": 273440 }, { "epoch": 1.7989776517568732, "grad_norm": 0.025447141420990905, "learning_rate": 3.0459664686368516e-07, "loss": 0.0004, "step": 273450 }, { "epoch": 1.7990434399321065, "grad_norm": 0.006829922417904951, "learning_rate": 3.043993577092735e-07, "loss": 0.0003, "step": 273460 }, { "epoch": 1.79910922810734, "grad_norm": 0.01472050582436375, "learning_rate": 3.042021304623e-07, "loss": 0.0005, "step": 273470 }, { "epoch": 1.7991750162825735, "grad_norm": 0.02503995797838127, "learning_rate": 3.0400496512536684e-07, "loss": 0.0004, "step": 273480 }, { "epoch": 1.7992408044578068, "grad_norm": 0.02170078078935743, "learning_rate": 3.03807861701072e-07, "loss": 0.0004, "step": 273490 }, { "epoch": 1.79930659263304, "grad_norm": 0.012871006800077611, "learning_rate": 3.036108201920146e-07, "loss": 0.0002, "step": 273500 }, { "epoch": 1.7993723808082736, "grad_norm": 0.04455995278117513, "learning_rate": 3.0341384060079305e-07, "loss": 0.0003, "step": 273510 }, { "epoch": 1.799438168983507, "grad_norm": 0.03871476750690172, "learning_rate": 3.0321692293000304e-07, "loss": 0.0003, "step": 273520 }, { "epoch": 1.7995039571587403, "grad_norm": 0.01618703846588373, "learning_rate": 3.03020067182242e-07, "loss": 0.0003, "step": 273530 }, { "epoch": 1.7995697453339736, "grad_norm": 0.03529661277656881, "learning_rate": 3.0282327336010443e-07, "loss": 0.0003, "step": 273540 }, { "epoch": 1.799635533509207, "grad_norm": 0.08182688275898366, "learning_rate": 3.0262654146618555e-07, "loss": 0.0005, "step": 273550 }, { "epoch": 1.7997013216844404, "grad_norm": 0.026150794565164146, "learning_rate": 3.024298715030788e-07, "loss": 0.0004, "step": 273560 }, { "epoch": 1.799767109859674, "grad_norm": 0.00018528907605032714, "learning_rate": 3.022332634733771e-07, "loss": 0.0007, "step": 273570 }, { "epoch": 1.7998328980349072, "grad_norm": 0.01830375596930227, "learning_rate": 3.0203671737967233e-07, "loss": 0.0006, "step": 273580 }, { "epoch": 1.7998986862101405, "grad_norm": 0.05334892451171449, "learning_rate": 3.018402332245562e-07, "loss": 0.0006, "step": 273590 }, { "epoch": 1.799964474385374, "grad_norm": 0.008854677910860802, "learning_rate": 3.0164381101061846e-07, "loss": 0.0004, "step": 273600 }, { "epoch": 1.8000302625606075, "grad_norm": 0.024647966380806862, "learning_rate": 3.014474507404497e-07, "loss": 0.0005, "step": 273610 }, { "epoch": 1.8000960507358408, "grad_norm": 0.0018630648460319363, "learning_rate": 3.012511524166384e-07, "loss": 0.0005, "step": 273620 }, { "epoch": 1.800161838911074, "grad_norm": 0.01840876897042343, "learning_rate": 3.010549160417731e-07, "loss": 0.0006, "step": 273630 }, { "epoch": 1.8002276270863073, "grad_norm": 0.013923158630860721, "learning_rate": 3.0085874161844e-07, "loss": 0.0003, "step": 273640 }, { "epoch": 1.8002934152615409, "grad_norm": 0.017261605600466093, "learning_rate": 3.006626291492259e-07, "loss": 0.0018, "step": 273650 }, { "epoch": 1.8003592034367744, "grad_norm": 0.047024701325894314, "learning_rate": 3.004665786367161e-07, "loss": 0.0005, "step": 273660 }, { "epoch": 1.8004249916120076, "grad_norm": 0.020390499183304312, "learning_rate": 3.002705900834962e-07, "loss": 0.0003, "step": 273670 }, { "epoch": 1.800490779787241, "grad_norm": 0.005132729190877503, "learning_rate": 3.0007466349214974e-07, "loss": 0.0003, "step": 273680 }, { "epoch": 1.8005565679624744, "grad_norm": 0.006732320541172565, "learning_rate": 2.998787988652591e-07, "loss": 0.0005, "step": 273690 }, { "epoch": 1.800622356137708, "grad_norm": 0.040745898792219515, "learning_rate": 2.996829962054076e-07, "loss": 0.0006, "step": 273700 }, { "epoch": 1.8006881443129412, "grad_norm": 0.027961868632921872, "learning_rate": 2.994872555151762e-07, "loss": 0.0005, "step": 273710 }, { "epoch": 1.8007539324881745, "grad_norm": 0.01658118376983651, "learning_rate": 2.9929157679714607e-07, "loss": 0.0007, "step": 273720 }, { "epoch": 1.800819720663408, "grad_norm": 0.019763978737942802, "learning_rate": 2.9909596005389563e-07, "loss": 0.0003, "step": 273730 }, { "epoch": 1.8008855088386413, "grad_norm": 0.028241660225307056, "learning_rate": 2.989004052880057e-07, "loss": 0.0005, "step": 273740 }, { "epoch": 1.8009512970138748, "grad_norm": 0.046480152787100264, "learning_rate": 2.9870491250205413e-07, "loss": 0.0002, "step": 273750 }, { "epoch": 1.801017085189108, "grad_norm": 0.017677451451471888, "learning_rate": 2.985094816986173e-07, "loss": 0.0003, "step": 273760 }, { "epoch": 1.8010828733643414, "grad_norm": 0.016391437569580204, "learning_rate": 2.98314112880273e-07, "loss": 0.0007, "step": 273770 }, { "epoch": 1.8011486615395749, "grad_norm": 0.008244870778496428, "learning_rate": 2.981188060495965e-07, "loss": 0.0003, "step": 273780 }, { "epoch": 1.8012144497148084, "grad_norm": 0.033361184851601444, "learning_rate": 2.9792356120916246e-07, "loss": 0.0003, "step": 273790 }, { "epoch": 1.8012802378900417, "grad_norm": 0.008716186670395783, "learning_rate": 2.977283783615459e-07, "loss": 0.0004, "step": 273800 }, { "epoch": 1.801346026065275, "grad_norm": 0.006229458994371684, "learning_rate": 2.9753325750931874e-07, "loss": 0.0017, "step": 273810 }, { "epoch": 1.8014118142405084, "grad_norm": 0.09071748203840467, "learning_rate": 2.9733819865505496e-07, "loss": 0.0005, "step": 273820 }, { "epoch": 1.801477602415742, "grad_norm": 0.07807419092785334, "learning_rate": 2.971432018013248e-07, "loss": 0.0008, "step": 273830 }, { "epoch": 1.8015433905909752, "grad_norm": 0.009486962398561147, "learning_rate": 2.9694826695070065e-07, "loss": 0.0008, "step": 273840 }, { "epoch": 1.8016091787662085, "grad_norm": 0.010776221078959588, "learning_rate": 2.96753394105751e-07, "loss": 0.0001, "step": 273850 }, { "epoch": 1.8016749669414418, "grad_norm": 0.0044433319679326394, "learning_rate": 2.965585832690454e-07, "loss": 0.0001, "step": 273860 }, { "epoch": 1.8017407551166753, "grad_norm": 0.023151837825867824, "learning_rate": 2.963638344431541e-07, "loss": 0.0002, "step": 273870 }, { "epoch": 1.8018065432919088, "grad_norm": 0.011122582617473988, "learning_rate": 2.961691476306427e-07, "loss": 0.0002, "step": 273880 }, { "epoch": 1.801872331467142, "grad_norm": 0.007126350932836151, "learning_rate": 2.9597452283407823e-07, "loss": 0.0004, "step": 273890 }, { "epoch": 1.8019381196423754, "grad_norm": 0.07871493807230812, "learning_rate": 2.9577996005602793e-07, "loss": 0.001, "step": 273900 }, { "epoch": 1.8020039078176089, "grad_norm": 0.004909973395063744, "learning_rate": 2.9558545929905537e-07, "loss": 0.0005, "step": 273910 }, { "epoch": 1.8020696959928424, "grad_norm": 0.05592533173573356, "learning_rate": 2.953910205657257e-07, "loss": 0.0003, "step": 273920 }, { "epoch": 1.8021354841680757, "grad_norm": 0.02384162216461816, "learning_rate": 2.9519664385860235e-07, "loss": 0.0005, "step": 273930 }, { "epoch": 1.802201272343309, "grad_norm": 0.004377962847519855, "learning_rate": 2.9500232918024783e-07, "loss": 0.0001, "step": 273940 }, { "epoch": 1.8022670605185422, "grad_norm": 0.0019146637642449456, "learning_rate": 2.9480807653322396e-07, "loss": 0.0004, "step": 273950 }, { "epoch": 1.8023328486937757, "grad_norm": 0.01259892992871905, "learning_rate": 2.94613885920092e-07, "loss": 0.0003, "step": 273960 }, { "epoch": 1.8023986368690093, "grad_norm": 0.057036490647785085, "learning_rate": 2.9441975734341265e-07, "loss": 0.0008, "step": 273970 }, { "epoch": 1.8024644250442425, "grad_norm": 0.06109490397380861, "learning_rate": 2.9422569080574395e-07, "loss": 0.0005, "step": 273980 }, { "epoch": 1.8025302132194758, "grad_norm": 0.02675799795773698, "learning_rate": 2.940316863096454e-07, "loss": 0.0005, "step": 273990 }, { "epoch": 1.8025960013947093, "grad_norm": 0.008575648650432064, "learning_rate": 2.938377438576751e-07, "loss": 0.0002, "step": 274000 }, { "epoch": 1.8026617895699428, "grad_norm": 0.017030535701536117, "learning_rate": 2.9364386345238914e-07, "loss": 0.0005, "step": 274010 }, { "epoch": 1.8027275777451761, "grad_norm": 0.0001050250248666274, "learning_rate": 2.9345004509634457e-07, "loss": 0.0003, "step": 274020 }, { "epoch": 1.8027933659204094, "grad_norm": 0.018290593526195005, "learning_rate": 2.9325628879209646e-07, "loss": 0.0018, "step": 274030 }, { "epoch": 1.802859154095643, "grad_norm": 0.04700047423848959, "learning_rate": 2.9306259454219944e-07, "loss": 0.0004, "step": 274040 }, { "epoch": 1.8029249422708762, "grad_norm": 0.0013038040417013214, "learning_rate": 2.928689623492065e-07, "loss": 0.0004, "step": 274050 }, { "epoch": 1.8029907304461097, "grad_norm": 0.003815202811604122, "learning_rate": 2.926753922156711e-07, "loss": 0.0001, "step": 274060 }, { "epoch": 1.803056518621343, "grad_norm": 0.02152290696536285, "learning_rate": 2.9248188414414514e-07, "loss": 0.0015, "step": 274070 }, { "epoch": 1.8031223067965763, "grad_norm": 0.05934746017891621, "learning_rate": 2.9228843813717935e-07, "loss": 0.0004, "step": 274080 }, { "epoch": 1.8031880949718098, "grad_norm": 0.027462166861843273, "learning_rate": 2.920950541973255e-07, "loss": 0.0002, "step": 274090 }, { "epoch": 1.8032538831470433, "grad_norm": 0.07467737859394533, "learning_rate": 2.919017323271317e-07, "loss": 0.0004, "step": 274100 }, { "epoch": 1.8033196713222766, "grad_norm": 0.018410335691007203, "learning_rate": 2.917084725291475e-07, "loss": 0.0012, "step": 274110 }, { "epoch": 1.8033854594975098, "grad_norm": 0.07267553571872581, "learning_rate": 2.9151527480592026e-07, "loss": 0.0004, "step": 274120 }, { "epoch": 1.8034512476727433, "grad_norm": 0.0473434290214946, "learning_rate": 2.913221391599985e-07, "loss": 0.0013, "step": 274130 }, { "epoch": 1.8035170358479768, "grad_norm": 0.07282640491228844, "learning_rate": 2.9112906559392694e-07, "loss": 0.0005, "step": 274140 }, { "epoch": 1.8035828240232101, "grad_norm": 0.04852263006844539, "learning_rate": 2.9093605411025237e-07, "loss": 0.0003, "step": 274150 }, { "epoch": 1.8036486121984434, "grad_norm": 0.004783138368364511, "learning_rate": 2.907431047115189e-07, "loss": 0.0005, "step": 274160 }, { "epoch": 1.8037144003736767, "grad_norm": 0.01909807350265925, "learning_rate": 2.9055021740027e-07, "loss": 0.0011, "step": 274170 }, { "epoch": 1.8037801885489102, "grad_norm": 0.02374921162961454, "learning_rate": 2.9035739217904977e-07, "loss": 0.0003, "step": 274180 }, { "epoch": 1.8038459767241437, "grad_norm": 0.03459866401992271, "learning_rate": 2.9016462905039953e-07, "loss": 0.0005, "step": 274190 }, { "epoch": 1.803911764899377, "grad_norm": 0.05519048980927315, "learning_rate": 2.8997192801686056e-07, "loss": 0.0004, "step": 274200 }, { "epoch": 1.8039775530746103, "grad_norm": 0.03204050191334004, "learning_rate": 2.897792890809742e-07, "loss": 0.0002, "step": 274210 }, { "epoch": 1.8040433412498438, "grad_norm": 0.03563020265048411, "learning_rate": 2.8958671224528e-07, "loss": 0.0003, "step": 274220 }, { "epoch": 1.8041091294250773, "grad_norm": 0.04518858751580919, "learning_rate": 2.8939419751231714e-07, "loss": 0.0006, "step": 274230 }, { "epoch": 1.8041749176003106, "grad_norm": 0.0052379918263690825, "learning_rate": 2.8920174488462294e-07, "loss": 0.0002, "step": 274240 }, { "epoch": 1.8042407057755439, "grad_norm": 0.014052223677941258, "learning_rate": 2.890093543647343e-07, "loss": 0.0003, "step": 274250 }, { "epoch": 1.8043064939507771, "grad_norm": 0.019938947215340813, "learning_rate": 2.888170259551898e-07, "loss": 0.0004, "step": 274260 }, { "epoch": 1.8043722821260106, "grad_norm": 0.019324480020267574, "learning_rate": 2.8862475965852397e-07, "loss": 0.0006, "step": 274270 }, { "epoch": 1.8044380703012441, "grad_norm": 0.011970322306727961, "learning_rate": 2.8843255547727157e-07, "loss": 0.0002, "step": 274280 }, { "epoch": 1.8045038584764774, "grad_norm": 0.050056066518289595, "learning_rate": 2.8824041341396656e-07, "loss": 0.0005, "step": 274290 }, { "epoch": 1.8045696466517107, "grad_norm": 0.012038249621520312, "learning_rate": 2.8804833347114257e-07, "loss": 0.0004, "step": 274300 }, { "epoch": 1.8046354348269442, "grad_norm": 0.04029045215893024, "learning_rate": 2.8785631565133145e-07, "loss": 0.0004, "step": 274310 }, { "epoch": 1.8047012230021777, "grad_norm": 0.037681145398453145, "learning_rate": 2.8766435995706555e-07, "loss": 0.0003, "step": 274320 }, { "epoch": 1.804767011177411, "grad_norm": 0.006503153996105407, "learning_rate": 2.874724663908751e-07, "loss": 0.0002, "step": 274330 }, { "epoch": 1.8048327993526443, "grad_norm": 0.004223774855700911, "learning_rate": 2.8728063495529036e-07, "loss": 0.0002, "step": 274340 }, { "epoch": 1.8048985875278776, "grad_norm": 0.0009103841614924136, "learning_rate": 2.8708886565284034e-07, "loss": 0.0016, "step": 274350 }, { "epoch": 1.804964375703111, "grad_norm": 0.03403234413633722, "learning_rate": 2.8689715848605304e-07, "loss": 0.0006, "step": 274360 }, { "epoch": 1.8050301638783446, "grad_norm": 0.030629868055820852, "learning_rate": 2.867055134574559e-07, "loss": 0.0013, "step": 274370 }, { "epoch": 1.8050959520535779, "grad_norm": 0.0005682833930082086, "learning_rate": 2.8651393056957577e-07, "loss": 0.0003, "step": 274380 }, { "epoch": 1.8051617402288112, "grad_norm": 0.0005307316669616493, "learning_rate": 2.86322409824939e-07, "loss": 0.0008, "step": 274390 }, { "epoch": 1.8052275284040447, "grad_norm": 0.005333274104296367, "learning_rate": 2.8613095122607017e-07, "loss": 0.0002, "step": 274400 }, { "epoch": 1.8052933165792782, "grad_norm": 0.010004017528674896, "learning_rate": 2.85939554775494e-07, "loss": 0.0002, "step": 274410 }, { "epoch": 1.8053591047545114, "grad_norm": 0.011859677080136431, "learning_rate": 2.857482204757328e-07, "loss": 0.0001, "step": 274420 }, { "epoch": 1.8054248929297447, "grad_norm": 0.01421421948254984, "learning_rate": 2.8555694832931023e-07, "loss": 0.0004, "step": 274430 }, { "epoch": 1.8054906811049782, "grad_norm": 0.0030408920575604316, "learning_rate": 2.853657383387476e-07, "loss": 0.0003, "step": 274440 }, { "epoch": 1.8055564692802117, "grad_norm": 0.05121095784131395, "learning_rate": 2.8517459050656613e-07, "loss": 0.0003, "step": 274450 }, { "epoch": 1.805622257455445, "grad_norm": 0.18714305100648324, "learning_rate": 2.8498350483528556e-07, "loss": 0.0005, "step": 274460 }, { "epoch": 1.8056880456306783, "grad_norm": 3.705792781137323e-05, "learning_rate": 2.84792481327425e-07, "loss": 0.0005, "step": 274470 }, { "epoch": 1.8057538338059116, "grad_norm": 0.007729188139397439, "learning_rate": 2.8460151998550346e-07, "loss": 0.0008, "step": 274480 }, { "epoch": 1.805819621981145, "grad_norm": 0.0033609001560160525, "learning_rate": 2.8441062081203843e-07, "loss": 0.0001, "step": 274490 }, { "epoch": 1.8058854101563786, "grad_norm": 0.012665791718929242, "learning_rate": 2.8421978380954676e-07, "loss": 0.0002, "step": 274500 }, { "epoch": 1.8059511983316119, "grad_norm": 0.0015664530312540093, "learning_rate": 2.8402900898054364e-07, "loss": 0.0003, "step": 274510 }, { "epoch": 1.8060169865068452, "grad_norm": 0.009084349869142748, "learning_rate": 2.83838296327546e-07, "loss": 0.0003, "step": 274520 }, { "epoch": 1.8060827746820787, "grad_norm": 0.005925902541235499, "learning_rate": 2.836476458530674e-07, "loss": 0.0003, "step": 274530 }, { "epoch": 1.8061485628573122, "grad_norm": 0.044158028239080334, "learning_rate": 2.8345705755962074e-07, "loss": 0.0002, "step": 274540 }, { "epoch": 1.8062143510325455, "grad_norm": 0.05998717147264067, "learning_rate": 2.832665314497196e-07, "loss": 0.0006, "step": 274550 }, { "epoch": 1.8062801392077787, "grad_norm": 0.02271342441742256, "learning_rate": 2.8307606752587537e-07, "loss": 0.0004, "step": 274560 }, { "epoch": 1.806345927383012, "grad_norm": 0.0019769720158692296, "learning_rate": 2.8288566579059987e-07, "loss": 0.0007, "step": 274570 }, { "epoch": 1.8064117155582455, "grad_norm": 0.04022212572465541, "learning_rate": 2.8269532624640214e-07, "loss": 0.0006, "step": 274580 }, { "epoch": 1.806477503733479, "grad_norm": 0.004408286357793249, "learning_rate": 2.8250504889579313e-07, "loss": 0.0006, "step": 274590 }, { "epoch": 1.8065432919087123, "grad_norm": 0.000565365159699835, "learning_rate": 2.8231483374128067e-07, "loss": 0.0001, "step": 274600 }, { "epoch": 1.8066090800839456, "grad_norm": 0.0033917132109642567, "learning_rate": 2.8212468078537225e-07, "loss": 0.0002, "step": 274610 }, { "epoch": 1.8066748682591791, "grad_norm": 0.04537158791253095, "learning_rate": 2.819345900305753e-07, "loss": 0.0004, "step": 274620 }, { "epoch": 1.8067406564344126, "grad_norm": 0.03760286761051508, "learning_rate": 2.817445614793962e-07, "loss": 0.0004, "step": 274630 }, { "epoch": 1.806806444609646, "grad_norm": 0.008371194178037606, "learning_rate": 2.815545951343396e-07, "loss": 0.0006, "step": 274640 }, { "epoch": 1.8068722327848792, "grad_norm": 0.005983897540914069, "learning_rate": 2.813646909979112e-07, "loss": 0.0004, "step": 274650 }, { "epoch": 1.8069380209601125, "grad_norm": 0.05955231427079801, "learning_rate": 2.811748490726135e-07, "loss": 0.0005, "step": 274660 }, { "epoch": 1.807003809135346, "grad_norm": 0.005273786896808348, "learning_rate": 2.809850693609506e-07, "loss": 0.001, "step": 274670 }, { "epoch": 1.8070695973105795, "grad_norm": 0.01063831234020085, "learning_rate": 2.8079535186542383e-07, "loss": 0.0003, "step": 274680 }, { "epoch": 1.8071353854858128, "grad_norm": 0.0485249006006627, "learning_rate": 2.8060569658853456e-07, "loss": 0.0005, "step": 274690 }, { "epoch": 1.807201173661046, "grad_norm": 0.0003624180995692885, "learning_rate": 2.8041610353278295e-07, "loss": 0.0002, "step": 274700 }, { "epoch": 1.8072669618362796, "grad_norm": 0.01485343570251225, "learning_rate": 2.802265727006692e-07, "loss": 0.0007, "step": 274710 }, { "epoch": 1.807332750011513, "grad_norm": 0.006005410019069478, "learning_rate": 2.8003710409469197e-07, "loss": 0.0006, "step": 274720 }, { "epoch": 1.8073985381867463, "grad_norm": 0.01597230177169979, "learning_rate": 2.798476977173487e-07, "loss": 0.0003, "step": 274730 }, { "epoch": 1.8074643263619796, "grad_norm": 0.013507056974457086, "learning_rate": 2.7965835357113723e-07, "loss": 0.0008, "step": 274740 }, { "epoch": 1.8075301145372131, "grad_norm": 0.03293509246730417, "learning_rate": 2.794690716585535e-07, "loss": 0.0002, "step": 274750 }, { "epoch": 1.8075959027124464, "grad_norm": 0.018502061720828685, "learning_rate": 2.792798519820933e-07, "loss": 0.0003, "step": 274760 }, { "epoch": 1.80766169088768, "grad_norm": 0.04583112213230283, "learning_rate": 2.790906945442501e-07, "loss": 0.0004, "step": 274770 }, { "epoch": 1.8077274790629132, "grad_norm": 0.03074104261735372, "learning_rate": 2.789015993475197e-07, "loss": 0.0007, "step": 274780 }, { "epoch": 1.8077932672381465, "grad_norm": 0.03725473382485291, "learning_rate": 2.787125663943946e-07, "loss": 0.0003, "step": 274790 }, { "epoch": 1.80785905541338, "grad_norm": 0.004075679178967969, "learning_rate": 2.7852359568736665e-07, "loss": 0.0005, "step": 274800 }, { "epoch": 1.8079248435886135, "grad_norm": 0.0037220073840573287, "learning_rate": 2.783346872289272e-07, "loss": 0.0003, "step": 274810 }, { "epoch": 1.8079906317638468, "grad_norm": 0.0015109318536655654, "learning_rate": 2.7814584102156703e-07, "loss": 0.0004, "step": 274820 }, { "epoch": 1.80805641993908, "grad_norm": 0.001201473563439273, "learning_rate": 2.7795705706777587e-07, "loss": 0.0002, "step": 274830 }, { "epoch": 1.8081222081143136, "grad_norm": 0.030225409134332432, "learning_rate": 2.777683353700428e-07, "loss": 0.0006, "step": 274840 }, { "epoch": 1.808187996289547, "grad_norm": 0.01667502316768158, "learning_rate": 2.7757967593085576e-07, "loss": 0.0005, "step": 274850 }, { "epoch": 1.8082537844647804, "grad_norm": 0.02051143482776231, "learning_rate": 2.7739107875270176e-07, "loss": 0.0001, "step": 274860 }, { "epoch": 1.8083195726400136, "grad_norm": 0.032509191075945076, "learning_rate": 2.772025438380682e-07, "loss": 0.0002, "step": 274870 }, { "epoch": 1.808385360815247, "grad_norm": 0.019686180322017566, "learning_rate": 2.7701407118944036e-07, "loss": 0.0005, "step": 274880 }, { "epoch": 1.8084511489904804, "grad_norm": 0.02055027072632157, "learning_rate": 2.768256608093023e-07, "loss": 0.0003, "step": 274890 }, { "epoch": 1.808516937165714, "grad_norm": 0.00020840838004687978, "learning_rate": 2.766373127001387e-07, "loss": 0.0006, "step": 274900 }, { "epoch": 1.8085827253409472, "grad_norm": 0.012497080952351474, "learning_rate": 2.764490268644332e-07, "loss": 0.0003, "step": 274910 }, { "epoch": 1.8086485135161805, "grad_norm": 0.08088701516062112, "learning_rate": 2.7626080330466765e-07, "loss": 0.0006, "step": 274920 }, { "epoch": 1.808714301691414, "grad_norm": 0.012473309733346557, "learning_rate": 2.760726420233234e-07, "loss": 0.0002, "step": 274930 }, { "epoch": 1.8087800898666475, "grad_norm": 0.048936225091255704, "learning_rate": 2.758845430228818e-07, "loss": 0.0008, "step": 274940 }, { "epoch": 1.8088458780418808, "grad_norm": 0.0019220851217650956, "learning_rate": 2.7569650630582314e-07, "loss": 0.0002, "step": 274950 }, { "epoch": 1.808911666217114, "grad_norm": 0.0009238501964563922, "learning_rate": 2.755085318746248e-07, "loss": 0.0002, "step": 274960 }, { "epoch": 1.8089774543923474, "grad_norm": 0.010985106791149828, "learning_rate": 2.7532061973176704e-07, "loss": 0.0006, "step": 274970 }, { "epoch": 1.8090432425675809, "grad_norm": 0.014139653927568137, "learning_rate": 2.751327698797257e-07, "loss": 0.0003, "step": 274980 }, { "epoch": 1.8091090307428144, "grad_norm": 0.01223655794581211, "learning_rate": 2.7494498232097876e-07, "loss": 0.0004, "step": 274990 }, { "epoch": 1.8091748189180477, "grad_norm": 0.010021567601463563, "learning_rate": 2.7475725705800096e-07, "loss": 0.0003, "step": 275000 }, { "epoch": 1.8091748189180477, "eval_loss": 0.0003325959260109812, "eval_runtime": 13.1041, "eval_samples_per_second": 15.262, "eval_steps_per_second": 7.631, "step": 275000 }, { "epoch": 1.809240607093281, "grad_norm": 0.013509830494677487, "learning_rate": 2.74569594093268e-07, "loss": 0.0002, "step": 275010 }, { "epoch": 1.8093063952685144, "grad_norm": 0.015126616006959137, "learning_rate": 2.743819934292535e-07, "loss": 0.0006, "step": 275020 }, { "epoch": 1.809372183443748, "grad_norm": 0.007381162723775473, "learning_rate": 2.741944550684306e-07, "loss": 0.0002, "step": 275030 }, { "epoch": 1.8094379716189812, "grad_norm": 0.016410124888356498, "learning_rate": 2.740069790132738e-07, "loss": 0.0002, "step": 275040 }, { "epoch": 1.8095037597942145, "grad_norm": 0.004363718341566121, "learning_rate": 2.738195652662523e-07, "loss": 0.0002, "step": 275050 }, { "epoch": 1.809569547969448, "grad_norm": 0.03451723357749413, "learning_rate": 2.7363221382983805e-07, "loss": 0.0002, "step": 275060 }, { "epoch": 1.8096353361446813, "grad_norm": 0.027442120059973808, "learning_rate": 2.734449247065013e-07, "loss": 0.0003, "step": 275070 }, { "epoch": 1.8097011243199148, "grad_norm": 0.047320254478197, "learning_rate": 2.732576978987111e-07, "loss": 0.0005, "step": 275080 }, { "epoch": 1.809766912495148, "grad_norm": 0.07273304196184524, "learning_rate": 2.730705334089362e-07, "loss": 0.0003, "step": 275090 }, { "epoch": 1.8098327006703814, "grad_norm": 0.0004679002676797706, "learning_rate": 2.7288343123964335e-07, "loss": 0.0003, "step": 275100 }, { "epoch": 1.8098984888456149, "grad_norm": 0.0018071157326381543, "learning_rate": 2.7269639139330004e-07, "loss": 0.0005, "step": 275110 }, { "epoch": 1.8099642770208484, "grad_norm": 0.012851202589275648, "learning_rate": 2.725094138723722e-07, "loss": 0.0001, "step": 275120 }, { "epoch": 1.8100300651960817, "grad_norm": 0.01884687062197393, "learning_rate": 2.723224986793238e-07, "loss": 0.0003, "step": 275130 }, { "epoch": 1.810095853371315, "grad_norm": 0.01233078997418112, "learning_rate": 2.721356458166213e-07, "loss": 0.0003, "step": 275140 }, { "epoch": 1.8101616415465485, "grad_norm": 0.001564376615847412, "learning_rate": 2.7194885528672666e-07, "loss": 0.0005, "step": 275150 }, { "epoch": 1.810227429721782, "grad_norm": 0.01804560892877242, "learning_rate": 2.717621270921034e-07, "loss": 0.0002, "step": 275160 }, { "epoch": 1.8102932178970152, "grad_norm": 0.009500628523381993, "learning_rate": 2.7157546123521226e-07, "loss": 0.0001, "step": 275170 }, { "epoch": 1.8103590060722485, "grad_norm": 1.8739270985939632e-05, "learning_rate": 2.713888577185153e-07, "loss": 0.0006, "step": 275180 }, { "epoch": 1.8104247942474818, "grad_norm": 0.0012238897177674541, "learning_rate": 2.7120231654447217e-07, "loss": 0.0005, "step": 275190 }, { "epoch": 1.8104905824227153, "grad_norm": 0.039994602271337776, "learning_rate": 2.710158377155431e-07, "loss": 0.0004, "step": 275200 }, { "epoch": 1.8105563705979488, "grad_norm": 0.06753376147976459, "learning_rate": 2.70829421234185e-07, "loss": 0.0005, "step": 275210 }, { "epoch": 1.810622158773182, "grad_norm": 0.05659279647771941, "learning_rate": 2.706430671028576e-07, "loss": 0.0004, "step": 275220 }, { "epoch": 1.8106879469484154, "grad_norm": 0.055968330085305815, "learning_rate": 2.7045677532401615e-07, "loss": 0.0003, "step": 275230 }, { "epoch": 1.810753735123649, "grad_norm": 0.010002506058199927, "learning_rate": 2.702705459001176e-07, "loss": 0.0004, "step": 275240 }, { "epoch": 1.8108195232988824, "grad_norm": 0.0005926115590278972, "learning_rate": 2.7008437883361716e-07, "loss": 0.0004, "step": 275250 }, { "epoch": 1.8108853114741157, "grad_norm": 0.006939531851063408, "learning_rate": 2.698982741269679e-07, "loss": 0.0008, "step": 275260 }, { "epoch": 1.810951099649349, "grad_norm": 0.000960067979244494, "learning_rate": 2.697122317826262e-07, "loss": 0.0006, "step": 275270 }, { "epoch": 1.8110168878245823, "grad_norm": 0.0043160765006409, "learning_rate": 2.6952625180304283e-07, "loss": 0.0003, "step": 275280 }, { "epoch": 1.8110826759998158, "grad_norm": 0.06045323588498534, "learning_rate": 2.693403341906703e-07, "loss": 0.0004, "step": 275290 }, { "epoch": 1.8111484641750493, "grad_norm": 0.06756698952801807, "learning_rate": 2.6915447894796057e-07, "loss": 0.0007, "step": 275300 }, { "epoch": 1.8112142523502825, "grad_norm": 0.012013073732195805, "learning_rate": 2.6896868607736217e-07, "loss": 0.0004, "step": 275310 }, { "epoch": 1.8112800405255158, "grad_norm": 0.02473476446752313, "learning_rate": 2.687829555813265e-07, "loss": 0.0004, "step": 275320 }, { "epoch": 1.8113458287007493, "grad_norm": 0.010706836273079456, "learning_rate": 2.6859728746230107e-07, "loss": 0.0001, "step": 275330 }, { "epoch": 1.8114116168759828, "grad_norm": 0.09091693892071133, "learning_rate": 2.684116817227339e-07, "loss": 0.0004, "step": 275340 }, { "epoch": 1.8114774050512161, "grad_norm": 0.09419893742719226, "learning_rate": 2.682261383650725e-07, "loss": 0.0005, "step": 275350 }, { "epoch": 1.8115431932264494, "grad_norm": 0.0404355865217962, "learning_rate": 2.6804065739176265e-07, "loss": 0.0003, "step": 275360 }, { "epoch": 1.811608981401683, "grad_norm": 0.008209385761337956, "learning_rate": 2.6785523880525024e-07, "loss": 0.0005, "step": 275370 }, { "epoch": 1.8116747695769162, "grad_norm": 0.04360029627446411, "learning_rate": 2.6766988260797935e-07, "loss": 0.0006, "step": 275380 }, { "epoch": 1.8117405577521497, "grad_norm": 0.004223508864782578, "learning_rate": 2.67484588802393e-07, "loss": 0.0008, "step": 275390 }, { "epoch": 1.811806345927383, "grad_norm": 0.037022434686936306, "learning_rate": 2.6729935739093605e-07, "loss": 0.0003, "step": 275400 }, { "epoch": 1.8118721341026163, "grad_norm": 0.020081185578363803, "learning_rate": 2.6711418837604977e-07, "loss": 0.0003, "step": 275410 }, { "epoch": 1.8119379222778498, "grad_norm": 0.0015601819672654406, "learning_rate": 2.669290817601755e-07, "loss": 0.0009, "step": 275420 }, { "epoch": 1.8120037104530833, "grad_norm": 0.0011304983416083757, "learning_rate": 2.6674403754575307e-07, "loss": 0.0004, "step": 275430 }, { "epoch": 1.8120694986283166, "grad_norm": 0.08001923010018758, "learning_rate": 2.665590557352227e-07, "loss": 0.0006, "step": 275440 }, { "epoch": 1.8121352868035498, "grad_norm": 0.0022054597279445834, "learning_rate": 2.6637413633102295e-07, "loss": 0.0005, "step": 275450 }, { "epoch": 1.8122010749787834, "grad_norm": 0.05976975501725301, "learning_rate": 2.6618927933559245e-07, "loss": 0.0005, "step": 275460 }, { "epoch": 1.8122668631540169, "grad_norm": 0.042113154740367593, "learning_rate": 2.6600448475136766e-07, "loss": 0.0007, "step": 275470 }, { "epoch": 1.8123326513292501, "grad_norm": 0.002520085900391684, "learning_rate": 2.6581975258078543e-07, "loss": 0.0001, "step": 275480 }, { "epoch": 1.8123984395044834, "grad_norm": 0.03386188811421182, "learning_rate": 2.6563508282628047e-07, "loss": 0.0003, "step": 275490 }, { "epoch": 1.8124642276797167, "grad_norm": 0.003976570474798422, "learning_rate": 2.654504754902887e-07, "loss": 0.0005, "step": 275500 }, { "epoch": 1.8125300158549502, "grad_norm": 0.02907386284778739, "learning_rate": 2.652659305752431e-07, "loss": 0.0002, "step": 275510 }, { "epoch": 1.8125958040301837, "grad_norm": 0.016661946618235914, "learning_rate": 2.6508144808357616e-07, "loss": 0.0004, "step": 275520 }, { "epoch": 1.812661592205417, "grad_norm": 0.0545521448523747, "learning_rate": 2.648970280177221e-07, "loss": 0.0005, "step": 275530 }, { "epoch": 1.8127273803806503, "grad_norm": 0.00202557769186916, "learning_rate": 2.6471267038011116e-07, "loss": 0.0007, "step": 275540 }, { "epoch": 1.8127931685558838, "grad_norm": 0.0008368927088670907, "learning_rate": 2.6452837517317364e-07, "loss": 0.0004, "step": 275550 }, { "epoch": 1.8128589567311173, "grad_norm": 0.00028875343241715703, "learning_rate": 2.643441423993398e-07, "loss": 0.0003, "step": 275560 }, { "epoch": 1.8129247449063506, "grad_norm": 0.0013083034580935145, "learning_rate": 2.6415997206103884e-07, "loss": 0.0005, "step": 275570 }, { "epoch": 1.8129905330815839, "grad_norm": 0.025694823149569664, "learning_rate": 2.639758641606982e-07, "loss": 0.0003, "step": 275580 }, { "epoch": 1.8130563212568171, "grad_norm": 0.004979809148363735, "learning_rate": 2.637918187007454e-07, "loss": 0.0005, "step": 275590 }, { "epoch": 1.8131221094320507, "grad_norm": 0.02669428568483682, "learning_rate": 2.6360783568360694e-07, "loss": 0.0007, "step": 275600 }, { "epoch": 1.8131878976072842, "grad_norm": 0.01211344097896034, "learning_rate": 2.6342391511170906e-07, "loss": 0.0003, "step": 275610 }, { "epoch": 1.8132536857825174, "grad_norm": 0.006941651918550004, "learning_rate": 2.6324005698747547e-07, "loss": 0.0004, "step": 275620 }, { "epoch": 1.8133194739577507, "grad_norm": 0.024462455918523474, "learning_rate": 2.630562613133314e-07, "loss": 0.0003, "step": 275630 }, { "epoch": 1.8133852621329842, "grad_norm": 0.002235056842695719, "learning_rate": 2.628725280916988e-07, "loss": 0.0001, "step": 275640 }, { "epoch": 1.8134510503082177, "grad_norm": 0.0027461819511708668, "learning_rate": 2.6268885732500026e-07, "loss": 0.0002, "step": 275650 }, { "epoch": 1.813516838483451, "grad_norm": 0.008936658371486158, "learning_rate": 2.625052490156588e-07, "loss": 0.0003, "step": 275660 }, { "epoch": 1.8135826266586843, "grad_norm": 0.026957721384002434, "learning_rate": 2.623217031660935e-07, "loss": 0.0005, "step": 275670 }, { "epoch": 1.8136484148339176, "grad_norm": 0.06731326962226472, "learning_rate": 2.6213821977872476e-07, "loss": 0.0002, "step": 275680 }, { "epoch": 1.813714203009151, "grad_norm": 0.04226330362810858, "learning_rate": 2.619547988559723e-07, "loss": 0.0005, "step": 275690 }, { "epoch": 1.8137799911843846, "grad_norm": 0.02014780865316641, "learning_rate": 2.617714404002536e-07, "loss": 0.0003, "step": 275700 }, { "epoch": 1.8138457793596179, "grad_norm": 0.055540321543540216, "learning_rate": 2.615881444139862e-07, "loss": 0.0004, "step": 275710 }, { "epoch": 1.8139115675348512, "grad_norm": 0.0007279957013411985, "learning_rate": 2.6140491089958644e-07, "loss": 0.0003, "step": 275720 }, { "epoch": 1.8139773557100847, "grad_norm": 0.03472424431108107, "learning_rate": 2.6122173985947074e-07, "loss": 0.0002, "step": 275730 }, { "epoch": 1.8140431438853182, "grad_norm": 0.024245688489823645, "learning_rate": 2.6103863129605334e-07, "loss": 0.0004, "step": 275740 }, { "epoch": 1.8141089320605515, "grad_norm": 0.04284485635906087, "learning_rate": 2.608555852117489e-07, "loss": 0.0004, "step": 275750 }, { "epoch": 1.8141747202357847, "grad_norm": 0.022000815082744006, "learning_rate": 2.6067260160897114e-07, "loss": 0.0003, "step": 275760 }, { "epoch": 1.8142405084110182, "grad_norm": 0.04448669435379505, "learning_rate": 2.6048968049013136e-07, "loss": 0.0001, "step": 275770 }, { "epoch": 1.8143062965862515, "grad_norm": 0.03849619947625835, "learning_rate": 2.60306821857641e-07, "loss": 0.0004, "step": 275780 }, { "epoch": 1.814372084761485, "grad_norm": 0.03295727108672619, "learning_rate": 2.6012402571391316e-07, "loss": 0.0007, "step": 275790 }, { "epoch": 1.8144378729367183, "grad_norm": 0.001723156084601217, "learning_rate": 2.599412920613559e-07, "loss": 0.0004, "step": 275800 }, { "epoch": 1.8145036611119516, "grad_norm": 0.017337510661582713, "learning_rate": 2.597586209023789e-07, "loss": 0.0006, "step": 275810 }, { "epoch": 1.814569449287185, "grad_norm": 0.00048628107459808387, "learning_rate": 2.5957601223939087e-07, "loss": 0.0004, "step": 275820 }, { "epoch": 1.8146352374624186, "grad_norm": 0.004960526296570914, "learning_rate": 2.5939346607479875e-07, "loss": 0.0002, "step": 275830 }, { "epoch": 1.814701025637652, "grad_norm": 0.06351014698627365, "learning_rate": 2.592109824110095e-07, "loss": 0.0007, "step": 275840 }, { "epoch": 1.8147668138128852, "grad_norm": 0.024768288607961167, "learning_rate": 2.59028561250429e-07, "loss": 0.0003, "step": 275850 }, { "epoch": 1.8148326019881187, "grad_norm": 0.02526665668048567, "learning_rate": 2.5884620259546247e-07, "loss": 0.0004, "step": 275860 }, { "epoch": 1.8148983901633522, "grad_norm": 0.013862111485652874, "learning_rate": 2.5866390644851416e-07, "loss": 0.0005, "step": 275870 }, { "epoch": 1.8149641783385855, "grad_norm": 0.062015459339022305, "learning_rate": 2.584816728119871e-07, "loss": 0.0008, "step": 275880 }, { "epoch": 1.8150299665138188, "grad_norm": 0.06210069049638264, "learning_rate": 2.5829950168828443e-07, "loss": 0.0002, "step": 275890 }, { "epoch": 1.815095754689052, "grad_norm": 0.059650560474212806, "learning_rate": 2.5811739307980755e-07, "loss": 0.0003, "step": 275900 }, { "epoch": 1.8151615428642855, "grad_norm": 0.020189255203866212, "learning_rate": 2.579353469889567e-07, "loss": 0.0005, "step": 275910 }, { "epoch": 1.815227331039519, "grad_norm": 0.016143085287863988, "learning_rate": 2.577533634181334e-07, "loss": 0.0002, "step": 275920 }, { "epoch": 1.8152931192147523, "grad_norm": 0.03685203889039159, "learning_rate": 2.5757144236973617e-07, "loss": 0.0007, "step": 275930 }, { "epoch": 1.8153589073899856, "grad_norm": 0.032277881205373775, "learning_rate": 2.573895838461643e-07, "loss": 0.0002, "step": 275940 }, { "epoch": 1.8154246955652191, "grad_norm": 0.004760133189433327, "learning_rate": 2.5720778784981416e-07, "loss": 0.0005, "step": 275950 }, { "epoch": 1.8154904837404526, "grad_norm": 0.006877130761173465, "learning_rate": 2.5702605438308325e-07, "loss": 0.0005, "step": 275960 }, { "epoch": 1.815556271915686, "grad_norm": 0.01808630498250468, "learning_rate": 2.5684438344836803e-07, "loss": 0.0003, "step": 275970 }, { "epoch": 1.8156220600909192, "grad_norm": 0.017644512767673547, "learning_rate": 2.566627750480627e-07, "loss": 0.0004, "step": 275980 }, { "epoch": 1.8156878482661525, "grad_norm": 0.057005310888678704, "learning_rate": 2.5648122918456196e-07, "loss": 0.0005, "step": 275990 }, { "epoch": 1.815753636441386, "grad_norm": 0.0003620671844325713, "learning_rate": 2.5629974586025954e-07, "loss": 0.0011, "step": 276000 }, { "epoch": 1.8158194246166195, "grad_norm": 0.018573008198865183, "learning_rate": 2.561183250775484e-07, "loss": 0.0003, "step": 276010 }, { "epoch": 1.8158852127918528, "grad_norm": 0.029709174026595696, "learning_rate": 2.5593696683881954e-07, "loss": 0.0003, "step": 276020 }, { "epoch": 1.815951000967086, "grad_norm": 0.026249137880312752, "learning_rate": 2.5575567114646484e-07, "loss": 0.0004, "step": 276030 }, { "epoch": 1.8160167891423196, "grad_norm": 0.05196329021544984, "learning_rate": 2.555744380028735e-07, "loss": 0.0005, "step": 276040 }, { "epoch": 1.816082577317553, "grad_norm": 0.011477859948114198, "learning_rate": 2.5539326741043644e-07, "loss": 0.0003, "step": 276050 }, { "epoch": 1.8161483654927864, "grad_norm": 0.004862346851806859, "learning_rate": 2.552121593715412e-07, "loss": 0.0002, "step": 276060 }, { "epoch": 1.8162141536680196, "grad_norm": 0.01649448318197155, "learning_rate": 2.550311138885758e-07, "loss": 0.0011, "step": 276070 }, { "epoch": 1.8162799418432531, "grad_norm": 0.03412933412527888, "learning_rate": 2.548501309639273e-07, "loss": 0.0003, "step": 276080 }, { "epoch": 1.8163457300184864, "grad_norm": 0.009380906429609332, "learning_rate": 2.5466921059998153e-07, "loss": 0.0003, "step": 276090 }, { "epoch": 1.81641151819372, "grad_norm": 0.020800207179003674, "learning_rate": 2.5448835279912376e-07, "loss": 0.0004, "step": 276100 }, { "epoch": 1.8164773063689532, "grad_norm": 0.011663609745555286, "learning_rate": 2.543075575637388e-07, "loss": 0.0004, "step": 276110 }, { "epoch": 1.8165430945441865, "grad_norm": 0.013952000668502669, "learning_rate": 2.5412682489621034e-07, "loss": 0.0005, "step": 276120 }, { "epoch": 1.81660888271942, "grad_norm": 0.015039220015574695, "learning_rate": 2.5394615479892025e-07, "loss": 0.0006, "step": 276130 }, { "epoch": 1.8166746708946535, "grad_norm": 0.00021061442220695307, "learning_rate": 2.537655472742517e-07, "loss": 0.0003, "step": 276140 }, { "epoch": 1.8167404590698868, "grad_norm": 0.0030481842412182604, "learning_rate": 2.5358500232458505e-07, "loss": 0.0001, "step": 276150 }, { "epoch": 1.81680624724512, "grad_norm": 0.02739609217802552, "learning_rate": 2.534045199523005e-07, "loss": 0.0003, "step": 276160 }, { "epoch": 1.8168720354203536, "grad_norm": 0.020915636776323965, "learning_rate": 2.532241001597774e-07, "loss": 0.0002, "step": 276170 }, { "epoch": 1.816937823595587, "grad_norm": 0.04059541744576993, "learning_rate": 2.5304374294939594e-07, "loss": 0.0007, "step": 276180 }, { "epoch": 1.8170036117708204, "grad_norm": 0.00901749017240459, "learning_rate": 2.5286344832353314e-07, "loss": 0.0003, "step": 276190 }, { "epoch": 1.8170693999460537, "grad_norm": 0.0025224465630331616, "learning_rate": 2.526832162845655e-07, "loss": 0.0003, "step": 276200 }, { "epoch": 1.817135188121287, "grad_norm": 0.01163762592016113, "learning_rate": 2.5250304683486993e-07, "loss": 0.0004, "step": 276210 }, { "epoch": 1.8172009762965204, "grad_norm": 0.007695220923806494, "learning_rate": 2.523229399768212e-07, "loss": 0.0003, "step": 276220 }, { "epoch": 1.817266764471754, "grad_norm": 0.018932068107566997, "learning_rate": 2.5214289571279416e-07, "loss": 0.0005, "step": 276230 }, { "epoch": 1.8173325526469872, "grad_norm": 0.023226149905698747, "learning_rate": 2.519629140451624e-07, "loss": 0.0004, "step": 276240 }, { "epoch": 1.8173983408222205, "grad_norm": 0.02597081586345454, "learning_rate": 2.5178299497629897e-07, "loss": 0.0003, "step": 276250 }, { "epoch": 1.817464128997454, "grad_norm": 0.051866037015400795, "learning_rate": 2.51603138508576e-07, "loss": 0.0004, "step": 276260 }, { "epoch": 1.8175299171726875, "grad_norm": 0.0020063402029426207, "learning_rate": 2.514233446443648e-07, "loss": 0.0002, "step": 276270 }, { "epoch": 1.8175957053479208, "grad_norm": 0.0814839096605081, "learning_rate": 2.5124361338603574e-07, "loss": 0.0012, "step": 276280 }, { "epoch": 1.817661493523154, "grad_norm": 0.057309035173359744, "learning_rate": 2.5106394473595806e-07, "loss": 0.0004, "step": 276290 }, { "epoch": 1.8177272816983874, "grad_norm": 0.0075652456560303906, "learning_rate": 2.508843386965004e-07, "loss": 0.0007, "step": 276300 }, { "epoch": 1.8177930698736209, "grad_norm": 0.0012606599069520641, "learning_rate": 2.507047952700314e-07, "loss": 0.0003, "step": 276310 }, { "epoch": 1.8178588580488544, "grad_norm": 0.0036301677259108567, "learning_rate": 2.5052531445891817e-07, "loss": 0.0004, "step": 276320 }, { "epoch": 1.8179246462240877, "grad_norm": 0.003833781677708908, "learning_rate": 2.50345896265527e-07, "loss": 0.0005, "step": 276330 }, { "epoch": 1.817990434399321, "grad_norm": 0.005917639283170189, "learning_rate": 2.5016654069222266e-07, "loss": 0.0002, "step": 276340 }, { "epoch": 1.8180562225745545, "grad_norm": 0.07915388776032016, "learning_rate": 2.499872477413706e-07, "loss": 0.0004, "step": 276350 }, { "epoch": 1.818122010749788, "grad_norm": 0.03279547753512605, "learning_rate": 2.4980801741533444e-07, "loss": 0.0009, "step": 276360 }, { "epoch": 1.8181877989250212, "grad_norm": 0.010521023145814547, "learning_rate": 2.4962884971647674e-07, "loss": 0.0003, "step": 276370 }, { "epoch": 1.8182535871002545, "grad_norm": 0.030749727680006497, "learning_rate": 2.4944974464716e-07, "loss": 0.0003, "step": 276380 }, { "epoch": 1.818319375275488, "grad_norm": 0.014093728806627296, "learning_rate": 2.492707022097457e-07, "loss": 0.0003, "step": 276390 }, { "epoch": 1.8183851634507213, "grad_norm": 0.00046355874217024766, "learning_rate": 2.490917224065936e-07, "loss": 0.0003, "step": 276400 }, { "epoch": 1.8184509516259548, "grad_norm": 0.005256500486317525, "learning_rate": 2.489128052400647e-07, "loss": 0.0004, "step": 276410 }, { "epoch": 1.818516739801188, "grad_norm": 0.00024042977185208573, "learning_rate": 2.4873395071251693e-07, "loss": 0.0001, "step": 276420 }, { "epoch": 1.8185825279764214, "grad_norm": 0.11181622115943393, "learning_rate": 2.4855515882630797e-07, "loss": 0.0007, "step": 276430 }, { "epoch": 1.818648316151655, "grad_norm": 0.05272248213796596, "learning_rate": 2.4837642958379593e-07, "loss": 0.0005, "step": 276440 }, { "epoch": 1.8187141043268884, "grad_norm": 0.0160740769371493, "learning_rate": 2.481977629873372e-07, "loss": 0.0003, "step": 276450 }, { "epoch": 1.8187798925021217, "grad_norm": 0.02157035014288172, "learning_rate": 2.4801915903928665e-07, "loss": 0.0003, "step": 276460 }, { "epoch": 1.818845680677355, "grad_norm": 0.041816369480005014, "learning_rate": 2.4784061774200006e-07, "loss": 0.0002, "step": 276470 }, { "epoch": 1.8189114688525885, "grad_norm": 0.05368759982691224, "learning_rate": 2.476621390978301e-07, "loss": 0.0006, "step": 276480 }, { "epoch": 1.818977257027822, "grad_norm": 0.023524287854339642, "learning_rate": 2.474837231091309e-07, "loss": 0.0011, "step": 276490 }, { "epoch": 1.8190430452030553, "grad_norm": 0.030233320795151536, "learning_rate": 2.47305369778254e-07, "loss": 0.0002, "step": 276500 }, { "epoch": 1.8191088333782885, "grad_norm": 0.01908298811582138, "learning_rate": 2.471270791075514e-07, "loss": 0.002, "step": 276510 }, { "epoch": 1.8191746215535218, "grad_norm": 0.013480522191668395, "learning_rate": 2.4694885109937283e-07, "loss": 0.0003, "step": 276520 }, { "epoch": 1.8192404097287553, "grad_norm": 0.010201704679603922, "learning_rate": 2.467706857560692e-07, "loss": 0.0001, "step": 276530 }, { "epoch": 1.8193061979039888, "grad_norm": 0.020792793779200718, "learning_rate": 2.465925830799887e-07, "loss": 0.0005, "step": 276540 }, { "epoch": 1.8193719860792221, "grad_norm": 0.010193300556328253, "learning_rate": 2.464145430734799e-07, "loss": 0.0006, "step": 276550 }, { "epoch": 1.8194377742544554, "grad_norm": 0.01968102054899748, "learning_rate": 2.4623656573888875e-07, "loss": 0.0006, "step": 276560 }, { "epoch": 1.819503562429689, "grad_norm": 0.011364720651311383, "learning_rate": 2.4605865107856394e-07, "loss": 0.0002, "step": 276570 }, { "epoch": 1.8195693506049224, "grad_norm": 0.019824878550500014, "learning_rate": 2.4588079909484976e-07, "loss": 0.0002, "step": 276580 }, { "epoch": 1.8196351387801557, "grad_norm": 0.0004127890866399559, "learning_rate": 2.457030097900914e-07, "loss": 0.0004, "step": 276590 }, { "epoch": 1.819700926955389, "grad_norm": 0.013638103116936967, "learning_rate": 2.455252831666333e-07, "loss": 0.0003, "step": 276600 }, { "epoch": 1.8197667151306223, "grad_norm": 0.019031043895683858, "learning_rate": 2.4534761922681727e-07, "loss": 0.0005, "step": 276610 }, { "epoch": 1.8198325033058558, "grad_norm": 0.013800846655207583, "learning_rate": 2.4517001797298714e-07, "loss": 0.0002, "step": 276620 }, { "epoch": 1.8198982914810893, "grad_norm": 0.04929092244171412, "learning_rate": 2.449924794074837e-07, "loss": 0.0004, "step": 276630 }, { "epoch": 1.8199640796563226, "grad_norm": 0.019323519898839462, "learning_rate": 2.4481500353264743e-07, "loss": 0.0003, "step": 276640 }, { "epoch": 1.8200298678315558, "grad_norm": 0.019493033483266087, "learning_rate": 2.446375903508186e-07, "loss": 0.0004, "step": 276650 }, { "epoch": 1.8200956560067894, "grad_norm": 0.009350878561074027, "learning_rate": 2.4446023986433585e-07, "loss": 0.0004, "step": 276660 }, { "epoch": 1.8201614441820229, "grad_norm": 0.00023162945517429256, "learning_rate": 2.44282952075538e-07, "loss": 0.0006, "step": 276670 }, { "epoch": 1.8202272323572561, "grad_norm": 0.059064207819600145, "learning_rate": 2.441057269867625e-07, "loss": 0.0008, "step": 276680 }, { "epoch": 1.8202930205324894, "grad_norm": 0.06079304421399283, "learning_rate": 2.439285646003442e-07, "loss": 0.0003, "step": 276690 }, { "epoch": 1.8203588087077227, "grad_norm": 0.01983531219657694, "learning_rate": 2.437514649186212e-07, "loss": 0.0007, "step": 276700 }, { "epoch": 1.8204245968829562, "grad_norm": 0.016403471930260864, "learning_rate": 2.435744279439273e-07, "loss": 0.0004, "step": 276710 }, { "epoch": 1.8204903850581897, "grad_norm": 0.04379847082294553, "learning_rate": 2.433974536785966e-07, "loss": 0.0004, "step": 276720 }, { "epoch": 1.820556173233423, "grad_norm": 0.01371337133577522, "learning_rate": 2.432205421249623e-07, "loss": 0.0003, "step": 276730 }, { "epoch": 1.8206219614086563, "grad_norm": 0.02376690647821757, "learning_rate": 2.430436932853569e-07, "loss": 0.0007, "step": 276740 }, { "epoch": 1.8206877495838898, "grad_norm": 0.0001040789113747108, "learning_rate": 2.428669071621126e-07, "loss": 0.0003, "step": 276750 }, { "epoch": 1.8207535377591233, "grad_norm": 0.013893863029161867, "learning_rate": 2.4269018375755904e-07, "loss": 0.0003, "step": 276760 }, { "epoch": 1.8208193259343566, "grad_norm": 0.001122522858508746, "learning_rate": 2.4251352307402664e-07, "loss": 0.0003, "step": 276770 }, { "epoch": 1.8208851141095899, "grad_norm": 0.038107499690037724, "learning_rate": 2.423369251138452e-07, "loss": 0.0008, "step": 276780 }, { "epoch": 1.8209509022848234, "grad_norm": 0.025780696473779636, "learning_rate": 2.421603898793418e-07, "loss": 0.0003, "step": 276790 }, { "epoch": 1.8210166904600569, "grad_norm": 0.03070434157741945, "learning_rate": 2.4198391737284497e-07, "loss": 0.0007, "step": 276800 }, { "epoch": 1.8210824786352902, "grad_norm": 0.008917910542852523, "learning_rate": 2.418075075966808e-07, "loss": 0.0007, "step": 276810 }, { "epoch": 1.8211482668105234, "grad_norm": 0.008799779586178906, "learning_rate": 2.4163116055317403e-07, "loss": 0.0003, "step": 276820 }, { "epoch": 1.8212140549857567, "grad_norm": 0.14558822620923714, "learning_rate": 2.4145487624465227e-07, "loss": 0.0005, "step": 276830 }, { "epoch": 1.8212798431609902, "grad_norm": 0.03852830615634807, "learning_rate": 2.4127865467343745e-07, "loss": 0.0003, "step": 276840 }, { "epoch": 1.8213456313362237, "grad_norm": 0.01125097033412594, "learning_rate": 2.41102495841854e-07, "loss": 0.0004, "step": 276850 }, { "epoch": 1.821411419511457, "grad_norm": 0.0030267321770649116, "learning_rate": 2.409263997522243e-07, "loss": 0.0004, "step": 276860 }, { "epoch": 1.8214772076866903, "grad_norm": 0.0024900627317115385, "learning_rate": 2.4075036640687e-07, "loss": 0.0002, "step": 276870 }, { "epoch": 1.8215429958619238, "grad_norm": 0.030193452243891934, "learning_rate": 2.4057439580811136e-07, "loss": 0.0003, "step": 276880 }, { "epoch": 1.8216087840371573, "grad_norm": 0.017600151132214966, "learning_rate": 2.4039848795826883e-07, "loss": 0.0003, "step": 276890 }, { "epoch": 1.8216745722123906, "grad_norm": 0.03514518814707134, "learning_rate": 2.402226428596616e-07, "loss": 0.0003, "step": 276900 }, { "epoch": 1.8217403603876239, "grad_norm": 0.050497641701260594, "learning_rate": 2.400468605146078e-07, "loss": 0.0006, "step": 276910 }, { "epoch": 1.8218061485628572, "grad_norm": 0.016926681857352452, "learning_rate": 2.3987114092542563e-07, "loss": 0.0005, "step": 276920 }, { "epoch": 1.8218719367380907, "grad_norm": 0.0013528423099327723, "learning_rate": 2.39695484094431e-07, "loss": 0.0006, "step": 276930 }, { "epoch": 1.8219377249133242, "grad_norm": 0.05062878770196239, "learning_rate": 2.3951989002394037e-07, "loss": 0.0006, "step": 276940 }, { "epoch": 1.8220035130885575, "grad_norm": 0.0516866140898158, "learning_rate": 2.3934435871626806e-07, "loss": 0.0004, "step": 276950 }, { "epoch": 1.8220693012637907, "grad_norm": 0.054204482003394484, "learning_rate": 2.3916889017372936e-07, "loss": 0.0045, "step": 276960 }, { "epoch": 1.8221350894390242, "grad_norm": 0.052927889580765074, "learning_rate": 2.389934843986369e-07, "loss": 0.0004, "step": 276970 }, { "epoch": 1.8222008776142578, "grad_norm": 0.007398048192124007, "learning_rate": 2.388181413933038e-07, "loss": 0.0002, "step": 276980 }, { "epoch": 1.822266665789491, "grad_norm": 0.014483190083663148, "learning_rate": 2.386428611600411e-07, "loss": 0.0003, "step": 276990 }, { "epoch": 1.8223324539647243, "grad_norm": 0.006361880379158486, "learning_rate": 2.384676437011602e-07, "loss": 0.0005, "step": 277000 }, { "epoch": 1.8223982421399576, "grad_norm": 0.03630969582612363, "learning_rate": 2.3829248901897084e-07, "loss": 0.0003, "step": 277010 }, { "epoch": 1.822464030315191, "grad_norm": 0.02238288098312725, "learning_rate": 2.3811739711578297e-07, "loss": 0.0003, "step": 277020 }, { "epoch": 1.8225298184904246, "grad_norm": 0.020579760789817877, "learning_rate": 2.3794236799390413e-07, "loss": 0.0003, "step": 277030 }, { "epoch": 1.822595606665658, "grad_norm": 0.004611894919025642, "learning_rate": 2.3776740165564306e-07, "loss": 0.0002, "step": 277040 }, { "epoch": 1.8226613948408912, "grad_norm": 0.03918456413846886, "learning_rate": 2.375924981033051e-07, "loss": 0.0002, "step": 277050 }, { "epoch": 1.8227271830161247, "grad_norm": 0.015046396397717769, "learning_rate": 2.3741765733919674e-07, "loss": 0.0004, "step": 277060 }, { "epoch": 1.8227929711913582, "grad_norm": 0.011967991354369394, "learning_rate": 2.3724287936562396e-07, "loss": 0.0003, "step": 277070 }, { "epoch": 1.8228587593665915, "grad_norm": 0.022859295659028667, "learning_rate": 2.3706816418489043e-07, "loss": 0.0006, "step": 277080 }, { "epoch": 1.8229245475418248, "grad_norm": 0.028675223467800548, "learning_rate": 2.3689351179929986e-07, "loss": 0.0004, "step": 277090 }, { "epoch": 1.8229903357170583, "grad_norm": 0.006726767187412438, "learning_rate": 2.3671892221115433e-07, "loss": 0.0006, "step": 277100 }, { "epoch": 1.8230561238922915, "grad_norm": 0.03322974417206653, "learning_rate": 2.3654439542275587e-07, "loss": 0.0005, "step": 277110 }, { "epoch": 1.823121912067525, "grad_norm": 0.004259014081732758, "learning_rate": 2.3636993143640597e-07, "loss": 0.0006, "step": 277120 }, { "epoch": 1.8231877002427583, "grad_norm": 0.018920761291421097, "learning_rate": 2.361955302544039e-07, "loss": 0.0008, "step": 277130 }, { "epoch": 1.8232534884179916, "grad_norm": 0.026690329803697344, "learning_rate": 2.3602119187905005e-07, "loss": 0.0003, "step": 277140 }, { "epoch": 1.8233192765932251, "grad_norm": 0.009274411683204553, "learning_rate": 2.3584691631264144e-07, "loss": 0.0003, "step": 277150 }, { "epoch": 1.8233850647684586, "grad_norm": 0.010213925773287339, "learning_rate": 2.356727035574774e-07, "loss": 0.0003, "step": 277160 }, { "epoch": 1.823450852943692, "grad_norm": 0.004043645420295064, "learning_rate": 2.3549855361585384e-07, "loss": 0.0003, "step": 277170 }, { "epoch": 1.8235166411189252, "grad_norm": 0.007485701666972317, "learning_rate": 2.3532446649006724e-07, "loss": 0.0005, "step": 277180 }, { "epoch": 1.8235824292941587, "grad_norm": 0.010584715125425066, "learning_rate": 2.3515044218241135e-07, "loss": 0.0002, "step": 277190 }, { "epoch": 1.8236482174693922, "grad_norm": 0.0011261407345042366, "learning_rate": 2.3497648069518264e-07, "loss": 0.0004, "step": 277200 }, { "epoch": 1.8237140056446255, "grad_norm": 0.049851587912709404, "learning_rate": 2.3480258203067374e-07, "loss": 0.0003, "step": 277210 }, { "epoch": 1.8237797938198588, "grad_norm": 0.00019917254635971676, "learning_rate": 2.346287461911767e-07, "loss": 0.0005, "step": 277220 }, { "epoch": 1.823845581995092, "grad_norm": 0.053577503477057205, "learning_rate": 2.3445497317898468e-07, "loss": 0.0008, "step": 277230 }, { "epoch": 1.8239113701703256, "grad_norm": 0.03121673066250804, "learning_rate": 2.3428126299638754e-07, "loss": 0.0002, "step": 277240 }, { "epoch": 1.823977158345559, "grad_norm": 0.02450993492799155, "learning_rate": 2.3410761564567675e-07, "loss": 0.0003, "step": 277250 }, { "epoch": 1.8240429465207924, "grad_norm": 0.014372596717206795, "learning_rate": 2.3393403112914048e-07, "loss": 0.0002, "step": 277260 }, { "epoch": 1.8241087346960256, "grad_norm": 0.04336148606852154, "learning_rate": 2.3376050944906748e-07, "loss": 0.0008, "step": 277270 }, { "epoch": 1.8241745228712591, "grad_norm": 0.07179048043519609, "learning_rate": 2.3358705060774588e-07, "loss": 0.0005, "step": 277280 }, { "epoch": 1.8242403110464926, "grad_norm": 0.02479799026549, "learning_rate": 2.334136546074628e-07, "loss": 0.0003, "step": 277290 }, { "epoch": 1.824306099221726, "grad_norm": 0.021869629245047994, "learning_rate": 2.3324032145050412e-07, "loss": 0.0002, "step": 277300 }, { "epoch": 1.8243718873969592, "grad_norm": 0.0008104257495772068, "learning_rate": 2.3306705113915473e-07, "loss": 0.0003, "step": 277310 }, { "epoch": 1.8244376755721925, "grad_norm": 0.10050081589404512, "learning_rate": 2.3289384367569833e-07, "loss": 0.0007, "step": 277320 }, { "epoch": 1.824503463747426, "grad_norm": 0.025239743209310442, "learning_rate": 2.3272069906242035e-07, "loss": 0.0004, "step": 277330 }, { "epoch": 1.8245692519226595, "grad_norm": 0.02803232201546373, "learning_rate": 2.325476173016028e-07, "loss": 0.0002, "step": 277340 }, { "epoch": 1.8246350400978928, "grad_norm": 0.007502094574595781, "learning_rate": 2.3237459839552723e-07, "loss": 0.0003, "step": 277350 }, { "epoch": 1.824700828273126, "grad_norm": 0.10103713183301856, "learning_rate": 2.3220164234647512e-07, "loss": 0.001, "step": 277360 }, { "epoch": 1.8247666164483596, "grad_norm": 0.007775485057654875, "learning_rate": 2.320287491567269e-07, "loss": 0.0007, "step": 277370 }, { "epoch": 1.824832404623593, "grad_norm": 0.027361934802937055, "learning_rate": 2.3185591882856128e-07, "loss": 0.0007, "step": 277380 }, { "epoch": 1.8248981927988264, "grad_norm": 0.03258788223967651, "learning_rate": 2.3168315136425756e-07, "loss": 0.0006, "step": 277390 }, { "epoch": 1.8249639809740597, "grad_norm": 0.0032765601509668627, "learning_rate": 2.315104467660928e-07, "loss": 0.0003, "step": 277400 }, { "epoch": 1.8250297691492932, "grad_norm": 0.030002197088989192, "learning_rate": 2.313378050363452e-07, "loss": 0.0003, "step": 277410 }, { "epoch": 1.8250955573245264, "grad_norm": 0.017518280615070058, "learning_rate": 2.3116522617728955e-07, "loss": 0.0003, "step": 277420 }, { "epoch": 1.82516134549976, "grad_norm": 0.052766359040462105, "learning_rate": 2.309927101912024e-07, "loss": 0.0004, "step": 277430 }, { "epoch": 1.8252271336749932, "grad_norm": 0.009957166176826371, "learning_rate": 2.3082025708035693e-07, "loss": 0.0004, "step": 277440 }, { "epoch": 1.8252929218502265, "grad_norm": 0.002642816019655067, "learning_rate": 2.3064786684702745e-07, "loss": 0.0001, "step": 277450 }, { "epoch": 1.82535871002546, "grad_norm": 0.030016241136635313, "learning_rate": 2.304755394934871e-07, "loss": 0.0003, "step": 277460 }, { "epoch": 1.8254244982006935, "grad_norm": 0.010371260020720257, "learning_rate": 2.3030327502200744e-07, "loss": 0.0006, "step": 277470 }, { "epoch": 1.8254902863759268, "grad_norm": 0.03874932312412454, "learning_rate": 2.3013107343485942e-07, "loss": 0.0004, "step": 277480 }, { "epoch": 1.82555607455116, "grad_norm": 0.03653122530825978, "learning_rate": 2.2995893473431396e-07, "loss": 0.0003, "step": 277490 }, { "epoch": 1.8256218627263936, "grad_norm": 0.02991432462703358, "learning_rate": 2.297868589226404e-07, "loss": 0.0003, "step": 277500 }, { "epoch": 1.825687650901627, "grad_norm": 0.015615008232131957, "learning_rate": 2.2961484600210748e-07, "loss": 0.0003, "step": 277510 }, { "epoch": 1.8257534390768604, "grad_norm": 0.03283753720295446, "learning_rate": 2.2944289597498226e-07, "loss": 0.0005, "step": 277520 }, { "epoch": 1.8258192272520937, "grad_norm": 0.03559574692065225, "learning_rate": 2.2927100884353292e-07, "loss": 0.0003, "step": 277530 }, { "epoch": 1.825885015427327, "grad_norm": 0.01443168317346042, "learning_rate": 2.2909918461002488e-07, "loss": 0.0004, "step": 277540 }, { "epoch": 1.8259508036025605, "grad_norm": 0.0473501998732761, "learning_rate": 2.2892742327672357e-07, "loss": 0.0005, "step": 277550 }, { "epoch": 1.826016591777794, "grad_norm": 0.050349290108239214, "learning_rate": 2.2875572484589326e-07, "loss": 0.0006, "step": 277560 }, { "epoch": 1.8260823799530272, "grad_norm": 0.06877681470117511, "learning_rate": 2.2858408931979825e-07, "loss": 0.0006, "step": 277570 }, { "epoch": 1.8261481681282605, "grad_norm": 0.16070090476163296, "learning_rate": 2.2841251670070063e-07, "loss": 0.0004, "step": 277580 }, { "epoch": 1.826213956303494, "grad_norm": 0.003987288318080684, "learning_rate": 2.282410069908636e-07, "loss": 0.0003, "step": 277590 }, { "epoch": 1.8262797444787275, "grad_norm": 9.043860748983692e-05, "learning_rate": 2.280695601925481e-07, "loss": 0.0003, "step": 277600 }, { "epoch": 1.8263455326539608, "grad_norm": 0.043319019942185, "learning_rate": 2.2789817630801347e-07, "loss": 0.0005, "step": 277610 }, { "epoch": 1.826411320829194, "grad_norm": 0.014454060094022319, "learning_rate": 2.277268553395201e-07, "loss": 0.0005, "step": 277620 }, { "epoch": 1.8264771090044274, "grad_norm": 0.020275952305375405, "learning_rate": 2.2755559728932676e-07, "loss": 0.0003, "step": 277630 }, { "epoch": 1.826542897179661, "grad_norm": 0.030188679096472923, "learning_rate": 2.2738440215969104e-07, "loss": 0.0006, "step": 277640 }, { "epoch": 1.8266086853548944, "grad_norm": 0.01415694159336235, "learning_rate": 2.2721326995287007e-07, "loss": 0.0002, "step": 277650 }, { "epoch": 1.8266744735301277, "grad_norm": 0.12232425867998406, "learning_rate": 2.2704220067112036e-07, "loss": 0.0005, "step": 277660 }, { "epoch": 1.826740261705361, "grad_norm": 0.01877359624342764, "learning_rate": 2.268711943166968e-07, "loss": 0.0008, "step": 277670 }, { "epoch": 1.8268060498805945, "grad_norm": 0.04549600318560551, "learning_rate": 2.2670025089185366e-07, "loss": 0.0003, "step": 277680 }, { "epoch": 1.826871838055828, "grad_norm": 0.006239102357073352, "learning_rate": 2.2652937039884581e-07, "loss": 0.0015, "step": 277690 }, { "epoch": 1.8269376262310613, "grad_norm": 0.008348774720768937, "learning_rate": 2.2635855283992537e-07, "loss": 0.0005, "step": 277700 }, { "epoch": 1.8270034144062945, "grad_norm": 0.09225638305285337, "learning_rate": 2.261877982173444e-07, "loss": 0.0009, "step": 277710 }, { "epoch": 1.827069202581528, "grad_norm": 0.07834602779649931, "learning_rate": 2.260171065333544e-07, "loss": 0.0004, "step": 277720 }, { "epoch": 1.8271349907567613, "grad_norm": 0.01200220589520298, "learning_rate": 2.258464777902064e-07, "loss": 0.0003, "step": 277730 }, { "epoch": 1.8272007789319948, "grad_norm": 0.0009359851857511719, "learning_rate": 2.2567591199014915e-07, "loss": 0.0006, "step": 277740 }, { "epoch": 1.8272665671072281, "grad_norm": 0.046384877833044964, "learning_rate": 2.255054091354314e-07, "loss": 0.0005, "step": 277750 }, { "epoch": 1.8273323552824614, "grad_norm": 0.053886530598941, "learning_rate": 2.2533496922830134e-07, "loss": 0.0003, "step": 277760 }, { "epoch": 1.827398143457695, "grad_norm": 0.014731413899897636, "learning_rate": 2.2516459227100607e-07, "loss": 0.0001, "step": 277770 }, { "epoch": 1.8274639316329284, "grad_norm": 0.0011078383714506607, "learning_rate": 2.2499427826579212e-07, "loss": 0.0003, "step": 277780 }, { "epoch": 1.8275297198081617, "grad_norm": 0.027170841361573196, "learning_rate": 2.2482402721490438e-07, "loss": 0.0003, "step": 277790 }, { "epoch": 1.827595507983395, "grad_norm": 0.024178869882387394, "learning_rate": 2.2465383912058768e-07, "loss": 0.0003, "step": 277800 }, { "epoch": 1.8276612961586285, "grad_norm": 0.041514001192263235, "learning_rate": 2.2448371398508585e-07, "loss": 0.0006, "step": 277810 }, { "epoch": 1.827727084333862, "grad_norm": 0.01857243323551713, "learning_rate": 2.2431365181064201e-07, "loss": 0.0002, "step": 277820 }, { "epoch": 1.8277928725090953, "grad_norm": 0.03560715743493876, "learning_rate": 2.2414365259949777e-07, "loss": 0.0008, "step": 277830 }, { "epoch": 1.8278586606843286, "grad_norm": 0.006012842176948875, "learning_rate": 2.2397371635389465e-07, "loss": 0.0003, "step": 277840 }, { "epoch": 1.8279244488595618, "grad_norm": 0.02267894884191014, "learning_rate": 2.2380384307607361e-07, "loss": 0.0005, "step": 277850 }, { "epoch": 1.8279902370347954, "grad_norm": 0.014289517361324871, "learning_rate": 2.23634032768274e-07, "loss": 0.0003, "step": 277860 }, { "epoch": 1.8280560252100289, "grad_norm": 0.019212979505709417, "learning_rate": 2.2346428543273402e-07, "loss": 0.0005, "step": 277870 }, { "epoch": 1.8281218133852621, "grad_norm": 0.030475257655383697, "learning_rate": 2.2329460107169297e-07, "loss": 0.0011, "step": 277880 }, { "epoch": 1.8281876015604954, "grad_norm": 6.024837731034843e-05, "learning_rate": 2.2312497968738633e-07, "loss": 0.0001, "step": 277890 }, { "epoch": 1.828253389735729, "grad_norm": 0.046073616685702735, "learning_rate": 2.2295542128205172e-07, "loss": 0.0007, "step": 277900 }, { "epoch": 1.8283191779109624, "grad_norm": 0.029325207756888967, "learning_rate": 2.2278592585792459e-07, "loss": 0.0004, "step": 277910 }, { "epoch": 1.8283849660861957, "grad_norm": 0.002730324542613289, "learning_rate": 2.226164934172387e-07, "loss": 0.0004, "step": 277920 }, { "epoch": 1.828450754261429, "grad_norm": 0.017739955102526767, "learning_rate": 2.2244712396222835e-07, "loss": 0.0005, "step": 277930 }, { "epoch": 1.8285165424366623, "grad_norm": 0.006993504776691854, "learning_rate": 2.2227781749512623e-07, "loss": 0.0007, "step": 277940 }, { "epoch": 1.8285823306118958, "grad_norm": 0.006846520131234286, "learning_rate": 2.22108574018165e-07, "loss": 0.0003, "step": 277950 }, { "epoch": 1.8286481187871293, "grad_norm": 0.018967641065523018, "learning_rate": 2.2193939353357563e-07, "loss": 0.0002, "step": 277960 }, { "epoch": 1.8287139069623626, "grad_norm": 0.004436036235158763, "learning_rate": 2.2177027604358858e-07, "loss": 0.0003, "step": 277970 }, { "epoch": 1.8287796951375959, "grad_norm": 0.02644519540037911, "learning_rate": 2.2160122155043373e-07, "loss": 0.0007, "step": 277980 }, { "epoch": 1.8288454833128294, "grad_norm": 0.0002839134344217365, "learning_rate": 2.214322300563404e-07, "loss": 0.0003, "step": 277990 }, { "epoch": 1.8289112714880629, "grad_norm": 0.054134280436031745, "learning_rate": 2.212633015635357e-07, "loss": 0.0006, "step": 278000 }, { "epoch": 1.8289770596632962, "grad_norm": 0.07098350924826827, "learning_rate": 2.210944360742473e-07, "loss": 0.0012, "step": 278010 }, { "epoch": 1.8290428478385294, "grad_norm": 0.009615225228215285, "learning_rate": 2.2092563359070174e-07, "loss": 0.0003, "step": 278020 }, { "epoch": 1.8291086360137627, "grad_norm": 0.0075832953451453545, "learning_rate": 2.207568941151239e-07, "loss": 0.0003, "step": 278030 }, { "epoch": 1.8291744241889962, "grad_norm": 0.0146922660432756, "learning_rate": 2.205882176497387e-07, "loss": 0.0003, "step": 278040 }, { "epoch": 1.8292402123642297, "grad_norm": 0.017491172409601248, "learning_rate": 2.204196041967699e-07, "loss": 0.0003, "step": 278050 }, { "epoch": 1.829306000539463, "grad_norm": 0.06629113081414743, "learning_rate": 2.2025105375844125e-07, "loss": 0.001, "step": 278060 }, { "epoch": 1.8293717887146963, "grad_norm": 0.045395299400113505, "learning_rate": 2.2008256633697434e-07, "loss": 0.0006, "step": 278070 }, { "epoch": 1.8294375768899298, "grad_norm": 0.010160049932197284, "learning_rate": 2.1991414193459016e-07, "loss": 0.0006, "step": 278080 }, { "epoch": 1.8295033650651633, "grad_norm": 0.017223268108817774, "learning_rate": 2.197457805535097e-07, "loss": 0.0005, "step": 278090 }, { "epoch": 1.8295691532403966, "grad_norm": 0.05540649449033072, "learning_rate": 2.1957748219595232e-07, "loss": 0.0005, "step": 278100 }, { "epoch": 1.8296349414156299, "grad_norm": 0.016810643507951077, "learning_rate": 2.194092468641379e-07, "loss": 0.0005, "step": 278110 }, { "epoch": 1.8297007295908634, "grad_norm": 0.008633356799883047, "learning_rate": 2.1924107456028355e-07, "loss": 0.0003, "step": 278120 }, { "epoch": 1.8297665177660967, "grad_norm": 0.04661470003987922, "learning_rate": 2.1907296528660638e-07, "loss": 0.0007, "step": 278130 }, { "epoch": 1.8298323059413302, "grad_norm": 0.004010416101691272, "learning_rate": 2.1890491904532353e-07, "loss": 0.0004, "step": 278140 }, { "epoch": 1.8298980941165635, "grad_norm": 0.06410248366078121, "learning_rate": 2.187369358386504e-07, "loss": 0.0004, "step": 278150 }, { "epoch": 1.8299638822917967, "grad_norm": 0.016585069215718236, "learning_rate": 2.1856901566880084e-07, "loss": 0.0003, "step": 278160 }, { "epoch": 1.8300296704670302, "grad_norm": 0.03541398787546498, "learning_rate": 2.1840115853798915e-07, "loss": 0.0003, "step": 278170 }, { "epoch": 1.8300954586422638, "grad_norm": 0.021546841908788295, "learning_rate": 2.1823336444842913e-07, "loss": 0.0003, "step": 278180 }, { "epoch": 1.830161246817497, "grad_norm": 0.012454621058337723, "learning_rate": 2.180656334023318e-07, "loss": 0.0005, "step": 278190 }, { "epoch": 1.8302270349927303, "grad_norm": 0.02788831882577581, "learning_rate": 2.1789796540190976e-07, "loss": 0.0007, "step": 278200 }, { "epoch": 1.8302928231679638, "grad_norm": 0.053996881928202616, "learning_rate": 2.1773036044937246e-07, "loss": 0.0006, "step": 278210 }, { "epoch": 1.8303586113431973, "grad_norm": 0.01870756012681263, "learning_rate": 2.175628185469303e-07, "loss": 0.0005, "step": 278220 }, { "epoch": 1.8304243995184306, "grad_norm": 0.0022670413573643395, "learning_rate": 2.173953396967915e-07, "loss": 0.0005, "step": 278230 }, { "epoch": 1.830490187693664, "grad_norm": 0.05026649111147907, "learning_rate": 2.1722792390116487e-07, "loss": 0.0004, "step": 278240 }, { "epoch": 1.8305559758688972, "grad_norm": 0.0034631367652198626, "learning_rate": 2.1706057116225754e-07, "loss": 0.0004, "step": 278250 }, { "epoch": 1.8306217640441307, "grad_norm": 0.023338248211326195, "learning_rate": 2.1689328148227607e-07, "loss": 0.0002, "step": 278260 }, { "epoch": 1.8306875522193642, "grad_norm": 0.07158273861329248, "learning_rate": 2.1672605486342535e-07, "loss": 0.0004, "step": 278270 }, { "epoch": 1.8307533403945975, "grad_norm": 0.013638561489329582, "learning_rate": 2.1655889130791085e-07, "loss": 0.0006, "step": 278280 }, { "epoch": 1.8308191285698308, "grad_norm": 0.0019433296920019101, "learning_rate": 2.1639179081793583e-07, "loss": 0.0004, "step": 278290 }, { "epoch": 1.8308849167450643, "grad_norm": 2.2295457557294058e-05, "learning_rate": 2.1622475339570348e-07, "loss": 0.0003, "step": 278300 }, { "epoch": 1.8309507049202978, "grad_norm": 0.03427926589387322, "learning_rate": 2.160577790434165e-07, "loss": 0.0003, "step": 278310 }, { "epoch": 1.831016493095531, "grad_norm": 0.00010158258393108803, "learning_rate": 2.1589086776327595e-07, "loss": 0.0004, "step": 278320 }, { "epoch": 1.8310822812707643, "grad_norm": 0.01599647116875434, "learning_rate": 2.1572401955748278e-07, "loss": 0.0003, "step": 278330 }, { "epoch": 1.8311480694459976, "grad_norm": 0.013340491307009131, "learning_rate": 2.1555723442823638e-07, "loss": 0.0005, "step": 278340 }, { "epoch": 1.8312138576212311, "grad_norm": 0.004682120849901875, "learning_rate": 2.1539051237773556e-07, "loss": 0.0004, "step": 278350 }, { "epoch": 1.8312796457964646, "grad_norm": 0.011673115305018076, "learning_rate": 2.1522385340817741e-07, "loss": 0.0002, "step": 278360 }, { "epoch": 1.831345433971698, "grad_norm": 0.020193638707652395, "learning_rate": 2.1505725752176186e-07, "loss": 0.0002, "step": 278370 }, { "epoch": 1.8314112221469312, "grad_norm": 0.07267529996826183, "learning_rate": 2.1489072472068329e-07, "loss": 0.0002, "step": 278380 }, { "epoch": 1.8314770103221647, "grad_norm": 0.055120110499393186, "learning_rate": 2.1472425500713822e-07, "loss": 0.0009, "step": 278390 }, { "epoch": 1.8315427984973982, "grad_norm": 0.059928514278632515, "learning_rate": 2.145578483833205e-07, "loss": 0.0003, "step": 278400 }, { "epoch": 1.8316085866726315, "grad_norm": 0.02233732564415734, "learning_rate": 2.14391504851425e-07, "loss": 0.0005, "step": 278410 }, { "epoch": 1.8316743748478648, "grad_norm": 0.00011925508355988702, "learning_rate": 2.142252244136439e-07, "loss": 0.0003, "step": 278420 }, { "epoch": 1.8317401630230983, "grad_norm": 0.01084942696033654, "learning_rate": 2.1405900707217043e-07, "loss": 0.0001, "step": 278430 }, { "epoch": 1.8318059511983316, "grad_norm": 0.010255485914518468, "learning_rate": 2.1389285282919559e-07, "loss": 0.0006, "step": 278440 }, { "epoch": 1.831871739373565, "grad_norm": 0.04559628569805337, "learning_rate": 2.1372676168690986e-07, "loss": 0.0003, "step": 278450 }, { "epoch": 1.8319375275487984, "grad_norm": 0.0313062758124366, "learning_rate": 2.1356073364750264e-07, "loss": 0.0004, "step": 278460 }, { "epoch": 1.8320033157240316, "grad_norm": 0.022206475881175303, "learning_rate": 2.1339476871316378e-07, "loss": 0.0003, "step": 278470 }, { "epoch": 1.8320691038992651, "grad_norm": 0.050682205632461286, "learning_rate": 2.1322886688608048e-07, "loss": 0.0012, "step": 278480 }, { "epoch": 1.8321348920744986, "grad_norm": 0.042087635392073476, "learning_rate": 2.1306302816844037e-07, "loss": 0.0004, "step": 278490 }, { "epoch": 1.832200680249732, "grad_norm": 0.10040180141582701, "learning_rate": 2.128972525624301e-07, "loss": 0.0004, "step": 278500 }, { "epoch": 1.8322664684249652, "grad_norm": 0.07417056948635806, "learning_rate": 2.1273154007023566e-07, "loss": 0.0004, "step": 278510 }, { "epoch": 1.8323322566001987, "grad_norm": 0.005963912825746372, "learning_rate": 2.1256589069404087e-07, "loss": 0.0005, "step": 278520 }, { "epoch": 1.8323980447754322, "grad_norm": 0.027206261644893547, "learning_rate": 2.124003044360301e-07, "loss": 0.0003, "step": 278530 }, { "epoch": 1.8324638329506655, "grad_norm": 0.008407289705259762, "learning_rate": 2.122347812983866e-07, "loss": 0.0004, "step": 278540 }, { "epoch": 1.8325296211258988, "grad_norm": 0.0020376931123193896, "learning_rate": 2.120693212832925e-07, "loss": 0.0005, "step": 278550 }, { "epoch": 1.832595409301132, "grad_norm": 0.010470366652648269, "learning_rate": 2.119039243929294e-07, "loss": 0.0005, "step": 278560 }, { "epoch": 1.8326611974763656, "grad_norm": 0.019546443604258147, "learning_rate": 2.1173859062947776e-07, "loss": 0.0005, "step": 278570 }, { "epoch": 1.832726985651599, "grad_norm": 0.010300986675124114, "learning_rate": 2.1157331999511755e-07, "loss": 0.0006, "step": 278580 }, { "epoch": 1.8327927738268324, "grad_norm": 0.02924241712229197, "learning_rate": 2.1140811249202698e-07, "loss": 0.0005, "step": 278590 }, { "epoch": 1.8328585620020656, "grad_norm": 0.000751838856635062, "learning_rate": 2.1124296812238543e-07, "loss": 0.0003, "step": 278600 }, { "epoch": 1.8329243501772992, "grad_norm": 0.035445573916224965, "learning_rate": 2.1107788688836894e-07, "loss": 0.0005, "step": 278610 }, { "epoch": 1.8329901383525327, "grad_norm": 0.03292308353981738, "learning_rate": 2.109128687921541e-07, "loss": 0.0005, "step": 278620 }, { "epoch": 1.833055926527766, "grad_norm": 0.06691624395283494, "learning_rate": 2.1074791383591752e-07, "loss": 0.0005, "step": 278630 }, { "epoch": 1.8331217147029992, "grad_norm": 0.01212081294284205, "learning_rate": 2.1058302202183357e-07, "loss": 0.0001, "step": 278640 }, { "epoch": 1.8331875028782325, "grad_norm": 0.051175907995327646, "learning_rate": 2.1041819335207603e-07, "loss": 0.0006, "step": 278650 }, { "epoch": 1.833253291053466, "grad_norm": 0.023849426279228363, "learning_rate": 2.102534278288182e-07, "loss": 0.0002, "step": 278660 }, { "epoch": 1.8333190792286995, "grad_norm": 0.026733254565855964, "learning_rate": 2.100887254542322e-07, "loss": 0.0003, "step": 278670 }, { "epoch": 1.8333848674039328, "grad_norm": 0.014358502450265234, "learning_rate": 2.099240862304891e-07, "loss": 0.0002, "step": 278680 }, { "epoch": 1.833450655579166, "grad_norm": 0.041594688989347664, "learning_rate": 2.097595101597605e-07, "loss": 0.0005, "step": 278690 }, { "epoch": 1.8335164437543996, "grad_norm": 0.042376360638650815, "learning_rate": 2.0959499724421517e-07, "loss": 0.0009, "step": 278700 }, { "epoch": 1.833582231929633, "grad_norm": 0.011529628775697206, "learning_rate": 2.094305474860231e-07, "loss": 0.0003, "step": 278710 }, { "epoch": 1.8336480201048664, "grad_norm": 0.001602519503345401, "learning_rate": 2.092661608873514e-07, "loss": 0.0002, "step": 278720 }, { "epoch": 1.8337138082800997, "grad_norm": 0.00012875839070410492, "learning_rate": 2.0910183745036782e-07, "loss": 0.0002, "step": 278730 }, { "epoch": 1.8337795964553332, "grad_norm": 0.01068921646045457, "learning_rate": 2.0893757717723896e-07, "loss": 0.0002, "step": 278740 }, { "epoch": 1.8338453846305665, "grad_norm": 0.002337454398568874, "learning_rate": 2.087733800701297e-07, "loss": 0.0002, "step": 278750 }, { "epoch": 1.8339111728058, "grad_norm": 0.0010324029712906533, "learning_rate": 2.0860924613120615e-07, "loss": 0.0004, "step": 278760 }, { "epoch": 1.8339769609810332, "grad_norm": 0.023576168955357208, "learning_rate": 2.0844517536263154e-07, "loss": 0.0004, "step": 278770 }, { "epoch": 1.8340427491562665, "grad_norm": 0.03715545813294575, "learning_rate": 2.0828116776656858e-07, "loss": 0.0006, "step": 278780 }, { "epoch": 1.8341085373315, "grad_norm": 0.025513924344982192, "learning_rate": 2.081172233451806e-07, "loss": 0.0004, "step": 278790 }, { "epoch": 1.8341743255067335, "grad_norm": 0.03734474023836067, "learning_rate": 2.07953342100628e-07, "loss": 0.0005, "step": 278800 }, { "epoch": 1.8342401136819668, "grad_norm": 0.02342797850415662, "learning_rate": 2.0778952403507246e-07, "loss": 0.0003, "step": 278810 }, { "epoch": 1.8343059018572, "grad_norm": 0.04320922660508061, "learning_rate": 2.0762576915067278e-07, "loss": 0.0003, "step": 278820 }, { "epoch": 1.8343716900324336, "grad_norm": 0.0586470000046625, "learning_rate": 2.0746207744958947e-07, "loss": 0.0002, "step": 278830 }, { "epoch": 1.8344374782076671, "grad_norm": 0.02206475199591877, "learning_rate": 2.0729844893397854e-07, "loss": 0.0005, "step": 278840 }, { "epoch": 1.8345032663829004, "grad_norm": 0.0382777854595437, "learning_rate": 2.0713488360599777e-07, "loss": 0.0021, "step": 278850 }, { "epoch": 1.8345690545581337, "grad_norm": 0.00038956688998081055, "learning_rate": 2.0697138146780482e-07, "loss": 0.0003, "step": 278860 }, { "epoch": 1.834634842733367, "grad_norm": 0.017248258398955957, "learning_rate": 2.0680794252155467e-07, "loss": 0.0006, "step": 278870 }, { "epoch": 1.8347006309086005, "grad_norm": 0.06850887319994534, "learning_rate": 2.066445667694017e-07, "loss": 0.0004, "step": 278880 }, { "epoch": 1.834766419083834, "grad_norm": 0.013802075246577006, "learning_rate": 2.0648125421350085e-07, "loss": 0.0006, "step": 278890 }, { "epoch": 1.8348322072590673, "grad_norm": 0.0004013043550341134, "learning_rate": 2.0631800485600485e-07, "loss": 0.0001, "step": 278900 }, { "epoch": 1.8348979954343005, "grad_norm": 0.03357404050984301, "learning_rate": 2.0615481869906527e-07, "loss": 0.0003, "step": 278910 }, { "epoch": 1.834963783609534, "grad_norm": 0.022965130463090955, "learning_rate": 2.0599169574483436e-07, "loss": 0.0002, "step": 278920 }, { "epoch": 1.8350295717847676, "grad_norm": 0.032436242916849475, "learning_rate": 2.058286359954631e-07, "loss": 0.0008, "step": 278930 }, { "epoch": 1.8350953599600008, "grad_norm": 0.039687170925533984, "learning_rate": 2.0566563945310037e-07, "loss": 0.0002, "step": 278940 }, { "epoch": 1.8351611481352341, "grad_norm": 0.03317974201285001, "learning_rate": 2.0550270611989553e-07, "loss": 0.0005, "step": 278950 }, { "epoch": 1.8352269363104674, "grad_norm": 0.003449607366681702, "learning_rate": 2.0533983599799634e-07, "loss": 0.0003, "step": 278960 }, { "epoch": 1.835292724485701, "grad_norm": 0.0002882033666923066, "learning_rate": 2.0517702908955106e-07, "loss": 0.0004, "step": 278970 }, { "epoch": 1.8353585126609344, "grad_norm": 0.003244468444077525, "learning_rate": 2.0501428539670464e-07, "loss": 0.0009, "step": 278980 }, { "epoch": 1.8354243008361677, "grad_norm": 0.00043960424553335603, "learning_rate": 2.0485160492160428e-07, "loss": 0.0003, "step": 278990 }, { "epoch": 1.835490089011401, "grad_norm": 0.022607374556687015, "learning_rate": 2.0468898766639432e-07, "loss": 0.0004, "step": 279000 }, { "epoch": 1.8355558771866345, "grad_norm": 0.00037358896810031975, "learning_rate": 2.045264336332181e-07, "loss": 0.0004, "step": 279010 }, { "epoch": 1.835621665361868, "grad_norm": 0.0024156190831955446, "learning_rate": 2.0436394282422e-07, "loss": 0.0003, "step": 279020 }, { "epoch": 1.8356874535371013, "grad_norm": 0.0002380379247213339, "learning_rate": 2.0420151524154109e-07, "loss": 0.0005, "step": 279030 }, { "epoch": 1.8357532417123346, "grad_norm": 0.008752096702235883, "learning_rate": 2.0403915088732295e-07, "loss": 0.0002, "step": 279040 }, { "epoch": 1.8358190298875678, "grad_norm": 0.06946799433625336, "learning_rate": 2.038768497637067e-07, "loss": 0.0008, "step": 279050 }, { "epoch": 1.8358848180628013, "grad_norm": 0.00279415595206895, "learning_rate": 2.0371461187283226e-07, "loss": 0.0015, "step": 279060 }, { "epoch": 1.8359506062380349, "grad_norm": 0.0008531056492718067, "learning_rate": 2.035524372168385e-07, "loss": 0.0002, "step": 279070 }, { "epoch": 1.8360163944132681, "grad_norm": 0.035575758269064346, "learning_rate": 2.0339032579786312e-07, "loss": 0.0004, "step": 279080 }, { "epoch": 1.8360821825885014, "grad_norm": 0.0806065051834844, "learning_rate": 2.032282776180433e-07, "loss": 0.0005, "step": 279090 }, { "epoch": 1.836147970763735, "grad_norm": 0.0021974027699255753, "learning_rate": 2.0306629267951627e-07, "loss": 0.0002, "step": 279100 }, { "epoch": 1.8362137589389684, "grad_norm": 0.005230155571163848, "learning_rate": 2.029043709844164e-07, "loss": 0.0002, "step": 279110 }, { "epoch": 1.8362795471142017, "grad_norm": 0.001125091675934249, "learning_rate": 2.027425125348803e-07, "loss": 0.0005, "step": 279120 }, { "epoch": 1.836345335289435, "grad_norm": 0.02328970641798945, "learning_rate": 2.0258071733304074e-07, "loss": 0.0002, "step": 279130 }, { "epoch": 1.8364111234646685, "grad_norm": 0.0035287311013722177, "learning_rate": 2.0241898538103156e-07, "loss": 0.0003, "step": 279140 }, { "epoch": 1.836476911639902, "grad_norm": 0.017974251166448112, "learning_rate": 2.022573166809838e-07, "loss": 0.0003, "step": 279150 }, { "epoch": 1.8365426998151353, "grad_norm": 0.029094664357676506, "learning_rate": 2.0209571123503025e-07, "loss": 0.0004, "step": 279160 }, { "epoch": 1.8366084879903686, "grad_norm": 0.0462167985309973, "learning_rate": 2.0193416904530082e-07, "loss": 0.0003, "step": 279170 }, { "epoch": 1.8366742761656019, "grad_norm": 0.032531031633649225, "learning_rate": 2.017726901139261e-07, "loss": 0.0004, "step": 279180 }, { "epoch": 1.8367400643408354, "grad_norm": 0.011200379943982453, "learning_rate": 2.0161127444303373e-07, "loss": 0.0004, "step": 279190 }, { "epoch": 1.8368058525160689, "grad_norm": 0.022218827445499918, "learning_rate": 2.0144992203475265e-07, "loss": 0.0012, "step": 279200 }, { "epoch": 1.8368716406913022, "grad_norm": 0.028201219312840742, "learning_rate": 2.0128863289121004e-07, "loss": 0.0007, "step": 279210 }, { "epoch": 1.8369374288665354, "grad_norm": 0.009043841001782533, "learning_rate": 2.0112740701453248e-07, "loss": 0.0002, "step": 279220 }, { "epoch": 1.837003217041769, "grad_norm": 0.08941512932587016, "learning_rate": 2.0096624440684553e-07, "loss": 0.0005, "step": 279230 }, { "epoch": 1.8370690052170024, "grad_norm": 0.0002537706077470393, "learning_rate": 2.0080514507027306e-07, "loss": 0.0006, "step": 279240 }, { "epoch": 1.8371347933922357, "grad_norm": 0.007779415769708237, "learning_rate": 2.006441090069411e-07, "loss": 0.0012, "step": 279250 }, { "epoch": 1.837200581567469, "grad_norm": 0.04475121864947378, "learning_rate": 2.0048313621897075e-07, "loss": 0.0004, "step": 279260 }, { "epoch": 1.8372663697427023, "grad_norm": 6.299150092220155e-05, "learning_rate": 2.003222267084859e-07, "loss": 0.0003, "step": 279270 }, { "epoch": 1.8373321579179358, "grad_norm": 0.023551970125983927, "learning_rate": 2.0016138047760703e-07, "loss": 0.0004, "step": 279280 }, { "epoch": 1.8373979460931693, "grad_norm": 0.017713485452761222, "learning_rate": 2.0000059752845468e-07, "loss": 0.0009, "step": 279290 }, { "epoch": 1.8374637342684026, "grad_norm": 0.022914545626320223, "learning_rate": 1.9983987786314883e-07, "loss": 0.0003, "step": 279300 }, { "epoch": 1.8375295224436359, "grad_norm": 0.0011161306591651573, "learning_rate": 1.9967922148380836e-07, "loss": 0.0004, "step": 279310 }, { "epoch": 1.8375953106188694, "grad_norm": 0.07927133723486027, "learning_rate": 1.995186283925521e-07, "loss": 0.0008, "step": 279320 }, { "epoch": 1.8376610987941029, "grad_norm": 0.01813818627571622, "learning_rate": 1.9935809859149668e-07, "loss": 0.0004, "step": 279330 }, { "epoch": 1.8377268869693362, "grad_norm": 0.006414058814608996, "learning_rate": 1.991976320827582e-07, "loss": 0.0002, "step": 279340 }, { "epoch": 1.8377926751445695, "grad_norm": 0.03426153491876903, "learning_rate": 1.9903722886845279e-07, "loss": 0.0013, "step": 279350 }, { "epoch": 1.8378584633198027, "grad_norm": 0.0036730428922762065, "learning_rate": 1.9887688895069533e-07, "loss": 0.0002, "step": 279360 }, { "epoch": 1.8379242514950362, "grad_norm": 0.01711466486835299, "learning_rate": 1.9871661233159922e-07, "loss": 0.0002, "step": 279370 }, { "epoch": 1.8379900396702697, "grad_norm": 0.029074615247718722, "learning_rate": 1.9855639901327829e-07, "loss": 0.0003, "step": 279380 }, { "epoch": 1.838055827845503, "grad_norm": 0.0026251393627355632, "learning_rate": 1.9839624899784414e-07, "loss": 0.0005, "step": 279390 }, { "epoch": 1.8381216160207363, "grad_norm": 0.022112630808091158, "learning_rate": 1.982361622874085e-07, "loss": 0.0003, "step": 279400 }, { "epoch": 1.8381874041959698, "grad_norm": 0.038054957870343846, "learning_rate": 1.9807613888408238e-07, "loss": 0.0006, "step": 279410 }, { "epoch": 1.8382531923712033, "grad_norm": 0.0037516444903091953, "learning_rate": 1.979161787899747e-07, "loss": 0.0003, "step": 279420 }, { "epoch": 1.8383189805464366, "grad_norm": 0.02779036428306908, "learning_rate": 1.9775628200719544e-07, "loss": 0.0005, "step": 279430 }, { "epoch": 1.83838476872167, "grad_norm": 0.0008302419485138409, "learning_rate": 1.9759644853785176e-07, "loss": 0.0003, "step": 279440 }, { "epoch": 1.8384505568969034, "grad_norm": 0.009156438492492877, "learning_rate": 1.9743667838405145e-07, "loss": 0.0003, "step": 279450 }, { "epoch": 1.8385163450721367, "grad_norm": 0.04687926397637048, "learning_rate": 1.972769715479006e-07, "loss": 0.0003, "step": 279460 }, { "epoch": 1.8385821332473702, "grad_norm": 0.00155238162132153, "learning_rate": 1.9711732803150473e-07, "loss": 0.0004, "step": 279470 }, { "epoch": 1.8386479214226035, "grad_norm": 0.03824753512065189, "learning_rate": 1.9695774783696942e-07, "loss": 0.0006, "step": 279480 }, { "epoch": 1.8387137095978368, "grad_norm": 0.05038606951456134, "learning_rate": 1.9679823096639794e-07, "loss": 0.0006, "step": 279490 }, { "epoch": 1.8387794977730703, "grad_norm": 0.046974447522168165, "learning_rate": 1.9663877742189253e-07, "loss": 0.0006, "step": 279500 }, { "epoch": 1.8388452859483038, "grad_norm": 0.007126987923985912, "learning_rate": 1.9647938720555703e-07, "loss": 0.0006, "step": 279510 }, { "epoch": 1.838911074123537, "grad_norm": 0.00269620907582037, "learning_rate": 1.9632006031949258e-07, "loss": 0.0003, "step": 279520 }, { "epoch": 1.8389768622987703, "grad_norm": 0.026324718606704253, "learning_rate": 1.9616079676579913e-07, "loss": 0.0005, "step": 279530 }, { "epoch": 1.8390426504740038, "grad_norm": 0.015618841899853726, "learning_rate": 1.9600159654657668e-07, "loss": 0.0004, "step": 279540 }, { "epoch": 1.8391084386492373, "grad_norm": 0.004672249830271631, "learning_rate": 1.9584245966392413e-07, "loss": 0.0003, "step": 279550 }, { "epoch": 1.8391742268244706, "grad_norm": 0.034097764919603236, "learning_rate": 1.9568338611993975e-07, "loss": 0.0005, "step": 279560 }, { "epoch": 1.839240014999704, "grad_norm": 0.11233000211709951, "learning_rate": 1.955243759167208e-07, "loss": 0.0011, "step": 279570 }, { "epoch": 1.8393058031749372, "grad_norm": 0.02779048296762346, "learning_rate": 1.953654290563628e-07, "loss": 0.0003, "step": 279580 }, { "epoch": 1.8393715913501707, "grad_norm": 0.03860562333437362, "learning_rate": 1.9520654554096296e-07, "loss": 0.0021, "step": 279590 }, { "epoch": 1.8394373795254042, "grad_norm": 0.030722222459036205, "learning_rate": 1.9504772537261463e-07, "loss": 0.0004, "step": 279600 }, { "epoch": 1.8395031677006375, "grad_norm": 0.03670312476711836, "learning_rate": 1.9488896855341221e-07, "loss": 0.0002, "step": 279610 }, { "epoch": 1.8395689558758708, "grad_norm": 0.0013030750338978816, "learning_rate": 1.9473027508544907e-07, "loss": 0.0004, "step": 279620 }, { "epoch": 1.8396347440511043, "grad_norm": 0.01606215617481169, "learning_rate": 1.9457164497081626e-07, "loss": 0.0003, "step": 279630 }, { "epoch": 1.8397005322263378, "grad_norm": 0.0709948107595457, "learning_rate": 1.9441307821160715e-07, "loss": 0.0007, "step": 279640 }, { "epoch": 1.839766320401571, "grad_norm": 0.04715341350942732, "learning_rate": 1.9425457480991062e-07, "loss": 0.0009, "step": 279650 }, { "epoch": 1.8398321085768043, "grad_norm": 0.025932759309612006, "learning_rate": 1.9409613476781718e-07, "loss": 0.0006, "step": 279660 }, { "epoch": 1.8398978967520376, "grad_norm": 0.024412445287236174, "learning_rate": 1.9393775808741578e-07, "loss": 0.0005, "step": 279670 }, { "epoch": 1.8399636849272711, "grad_norm": 0.012356947729537151, "learning_rate": 1.9377944477079413e-07, "loss": 0.0005, "step": 279680 }, { "epoch": 1.8400294731025046, "grad_norm": 0.001401373829680317, "learning_rate": 1.936211948200395e-07, "loss": 0.0004, "step": 279690 }, { "epoch": 1.840095261277738, "grad_norm": 0.03616435495794918, "learning_rate": 1.9346300823723852e-07, "loss": 0.0006, "step": 279700 }, { "epoch": 1.8401610494529712, "grad_norm": 0.0668723015656265, "learning_rate": 1.9330488502447618e-07, "loss": 0.0004, "step": 279710 }, { "epoch": 1.8402268376282047, "grad_norm": 0.026909569020825728, "learning_rate": 1.931468251838381e-07, "loss": 0.0009, "step": 279720 }, { "epoch": 1.8402926258034382, "grad_norm": 0.006978305549072485, "learning_rate": 1.9298882871740754e-07, "loss": 0.0002, "step": 279730 }, { "epoch": 1.8403584139786715, "grad_norm": 0.060995603014610864, "learning_rate": 1.9283089562726732e-07, "loss": 0.0003, "step": 279740 }, { "epoch": 1.8404242021539048, "grad_norm": 0.022917146363563068, "learning_rate": 1.926730259155002e-07, "loss": 0.0002, "step": 279750 }, { "epoch": 1.8404899903291383, "grad_norm": 0.02322504402383605, "learning_rate": 1.925152195841873e-07, "loss": 0.0005, "step": 279760 }, { "epoch": 1.8405557785043716, "grad_norm": 0.033211578403413905, "learning_rate": 1.9235747663540916e-07, "loss": 0.0004, "step": 279770 }, { "epoch": 1.840621566679605, "grad_norm": 0.03199805463647647, "learning_rate": 1.921997970712458e-07, "loss": 0.0004, "step": 279780 }, { "epoch": 1.8406873548548384, "grad_norm": 0.047949834348925906, "learning_rate": 1.9204218089377557e-07, "loss": 0.0002, "step": 279790 }, { "epoch": 1.8407531430300716, "grad_norm": 0.021025350073558307, "learning_rate": 1.918846281050768e-07, "loss": 0.0003, "step": 279800 }, { "epoch": 1.8408189312053052, "grad_norm": 0.0006351369636994694, "learning_rate": 1.917271387072267e-07, "loss": 0.0002, "step": 279810 }, { "epoch": 1.8408847193805387, "grad_norm": 0.00015980642936243365, "learning_rate": 1.9156971270230196e-07, "loss": 0.0002, "step": 279820 }, { "epoch": 1.840950507555772, "grad_norm": 0.0029496267729134805, "learning_rate": 1.91412350092377e-07, "loss": 0.0005, "step": 279830 }, { "epoch": 1.8410162957310052, "grad_norm": 0.031804287284959626, "learning_rate": 1.9125505087952801e-07, "loss": 0.0006, "step": 279840 }, { "epoch": 1.8410820839062387, "grad_norm": 0.014658716648884778, "learning_rate": 1.910978150658277e-07, "loss": 0.0005, "step": 279850 }, { "epoch": 1.8411478720814722, "grad_norm": 0.0033112267610073236, "learning_rate": 1.9094064265334943e-07, "loss": 0.0005, "step": 279860 }, { "epoch": 1.8412136602567055, "grad_norm": 0.012134721135129485, "learning_rate": 1.9078353364416546e-07, "loss": 0.0003, "step": 279870 }, { "epoch": 1.8412794484319388, "grad_norm": 0.02356549790218211, "learning_rate": 1.9062648804034688e-07, "loss": 0.0004, "step": 279880 }, { "epoch": 1.841345236607172, "grad_norm": 0.005931271341156437, "learning_rate": 1.904695058439643e-07, "loss": 0.0006, "step": 279890 }, { "epoch": 1.8414110247824056, "grad_norm": 0.01809136700620954, "learning_rate": 1.9031258705708767e-07, "loss": 0.0004, "step": 279900 }, { "epoch": 1.841476812957639, "grad_norm": 0.036206966937212225, "learning_rate": 1.9015573168178593e-07, "loss": 0.0004, "step": 279910 }, { "epoch": 1.8415426011328724, "grad_norm": 0.06664847161975702, "learning_rate": 1.8999893972012629e-07, "loss": 0.0005, "step": 279920 }, { "epoch": 1.8416083893081057, "grad_norm": 0.07535212337221278, "learning_rate": 1.8984221117417712e-07, "loss": 0.0003, "step": 279930 }, { "epoch": 1.8416741774833392, "grad_norm": 0.019505938811437095, "learning_rate": 1.8968554604600396e-07, "loss": 0.0005, "step": 279940 }, { "epoch": 1.8417399656585727, "grad_norm": 0.07847993147471757, "learning_rate": 1.895289443376719e-07, "loss": 0.0011, "step": 279950 }, { "epoch": 1.841805753833806, "grad_norm": 0.008123014644423238, "learning_rate": 1.893724060512464e-07, "loss": 0.0006, "step": 279960 }, { "epoch": 1.8418715420090392, "grad_norm": 0.04020126942081073, "learning_rate": 1.8921593118879145e-07, "loss": 0.0004, "step": 279970 }, { "epoch": 1.8419373301842725, "grad_norm": 0.006255733141472253, "learning_rate": 1.8905951975236868e-07, "loss": 0.0004, "step": 279980 }, { "epoch": 1.842003118359506, "grad_norm": 0.015263063641714727, "learning_rate": 1.8890317174404204e-07, "loss": 0.001, "step": 279990 }, { "epoch": 1.8420689065347395, "grad_norm": 0.015259102516916385, "learning_rate": 1.8874688716587153e-07, "loss": 0.0005, "step": 280000 }, { "epoch": 1.8421346947099728, "grad_norm": 0.02064179996199544, "learning_rate": 1.8859066601991772e-07, "loss": 0.0008, "step": 280010 }, { "epoch": 1.842200482885206, "grad_norm": 0.06125695537790457, "learning_rate": 1.884345083082406e-07, "loss": 0.0005, "step": 280020 }, { "epoch": 1.8422662710604396, "grad_norm": 0.006219386580428022, "learning_rate": 1.8827841403289914e-07, "loss": 0.0007, "step": 280030 }, { "epoch": 1.8423320592356731, "grad_norm": 0.035130804004858754, "learning_rate": 1.881223831959511e-07, "loss": 0.0004, "step": 280040 }, { "epoch": 1.8423978474109064, "grad_norm": 0.005471584335374995, "learning_rate": 1.8796641579945429e-07, "loss": 0.0005, "step": 280050 }, { "epoch": 1.8424636355861397, "grad_norm": 0.041149397125214926, "learning_rate": 1.8781051184546373e-07, "loss": 0.0005, "step": 280060 }, { "epoch": 1.8425294237613732, "grad_norm": 0.01570874466914072, "learning_rate": 1.8765467133603554e-07, "loss": 0.0006, "step": 280070 }, { "epoch": 1.8425952119366065, "grad_norm": 0.11838250051547551, "learning_rate": 1.874988942732242e-07, "loss": 0.0008, "step": 280080 }, { "epoch": 1.84266100011184, "grad_norm": 0.14443195200394418, "learning_rate": 1.8734318065908365e-07, "loss": 0.0004, "step": 280090 }, { "epoch": 1.8427267882870733, "grad_norm": 0.11198824806640705, "learning_rate": 1.8718753049566717e-07, "loss": 0.001, "step": 280100 }, { "epoch": 1.8427925764623065, "grad_norm": 0.026854400979529482, "learning_rate": 1.87031943785026e-07, "loss": 0.0005, "step": 280110 }, { "epoch": 1.84285836463754, "grad_norm": 0.014894348030660382, "learning_rate": 1.868764205292123e-07, "loss": 0.0006, "step": 280120 }, { "epoch": 1.8429241528127736, "grad_norm": 0.019195371444980253, "learning_rate": 1.8672096073027558e-07, "loss": 0.0007, "step": 280130 }, { "epoch": 1.8429899409880068, "grad_norm": 0.020191706879217536, "learning_rate": 1.8656556439026642e-07, "loss": 0.0004, "step": 280140 }, { "epoch": 1.8430557291632401, "grad_norm": 0.01670888512846881, "learning_rate": 1.8641023151123206e-07, "loss": 0.0006, "step": 280150 }, { "epoch": 1.8431215173384736, "grad_norm": 0.016626128985971492, "learning_rate": 1.8625496209522253e-07, "loss": 0.0004, "step": 280160 }, { "epoch": 1.8431873055137071, "grad_norm": 0.028187521737729353, "learning_rate": 1.8609975614428398e-07, "loss": 0.0004, "step": 280170 }, { "epoch": 1.8432530936889404, "grad_norm": 0.036188418009863235, "learning_rate": 1.8594461366046257e-07, "loss": 0.0004, "step": 280180 }, { "epoch": 1.8433188818641737, "grad_norm": 0.10121946312147155, "learning_rate": 1.857895346458033e-07, "loss": 0.0005, "step": 280190 }, { "epoch": 1.843384670039407, "grad_norm": 0.012667333159326069, "learning_rate": 1.8563451910235174e-07, "loss": 0.0005, "step": 280200 }, { "epoch": 1.8434504582146405, "grad_norm": 0.019709576838553188, "learning_rate": 1.8547956703215076e-07, "loss": 0.0004, "step": 280210 }, { "epoch": 1.843516246389874, "grad_norm": 0.0003573543392950073, "learning_rate": 1.8532467843724367e-07, "loss": 0.0001, "step": 280220 }, { "epoch": 1.8435820345651073, "grad_norm": 0.03207001486007687, "learning_rate": 1.8516985331967275e-07, "loss": 0.0005, "step": 280230 }, { "epoch": 1.8436478227403406, "grad_norm": 0.046339969138857635, "learning_rate": 1.8501509168147856e-07, "loss": 0.0006, "step": 280240 }, { "epoch": 1.843713610915574, "grad_norm": 0.0004713062215717097, "learning_rate": 1.8486039352470176e-07, "loss": 0.0002, "step": 280250 }, { "epoch": 1.8437793990908076, "grad_norm": 0.016283139884187354, "learning_rate": 1.847057588513823e-07, "loss": 0.0003, "step": 280260 }, { "epoch": 1.8438451872660409, "grad_norm": 0.0013808599584930075, "learning_rate": 1.845511876635586e-07, "loss": 0.0006, "step": 280270 }, { "epoch": 1.8439109754412741, "grad_norm": 0.0007308689740625655, "learning_rate": 1.8439667996326792e-07, "loss": 0.0003, "step": 280280 }, { "epoch": 1.8439767636165074, "grad_norm": 0.0009572053593791391, "learning_rate": 1.8424223575254917e-07, "loss": 0.0003, "step": 280290 }, { "epoch": 1.844042551791741, "grad_norm": 0.016511547039897597, "learning_rate": 1.8408785503343684e-07, "loss": 0.0004, "step": 280300 }, { "epoch": 1.8441083399669744, "grad_norm": 0.025444208352302593, "learning_rate": 1.8393353780796707e-07, "loss": 0.0004, "step": 280310 }, { "epoch": 1.8441741281422077, "grad_norm": 0.04803229301850068, "learning_rate": 1.837792840781738e-07, "loss": 0.0005, "step": 280320 }, { "epoch": 1.844239916317441, "grad_norm": 0.0007196881639284738, "learning_rate": 1.836250938460915e-07, "loss": 0.0003, "step": 280330 }, { "epoch": 1.8443057044926745, "grad_norm": 0.012920792901216038, "learning_rate": 1.8347096711375245e-07, "loss": 0.0019, "step": 280340 }, { "epoch": 1.844371492667908, "grad_norm": 0.011929907726841485, "learning_rate": 1.833169038831889e-07, "loss": 0.0011, "step": 280350 }, { "epoch": 1.8444372808431413, "grad_norm": 0.07762687195636452, "learning_rate": 1.83162904156432e-07, "loss": 0.0002, "step": 280360 }, { "epoch": 1.8445030690183746, "grad_norm": 0.00667414082093059, "learning_rate": 1.8300896793551182e-07, "loss": 0.0002, "step": 280370 }, { "epoch": 1.8445688571936079, "grad_norm": 0.0732287203754485, "learning_rate": 1.8285509522245892e-07, "loss": 0.0005, "step": 280380 }, { "epoch": 1.8446346453688414, "grad_norm": 0.002408957308095103, "learning_rate": 1.827012860193006e-07, "loss": 0.0007, "step": 280390 }, { "epoch": 1.8447004335440749, "grad_norm": 0.001201180768920181, "learning_rate": 1.8254754032806576e-07, "loss": 0.0002, "step": 280400 }, { "epoch": 1.8447662217193082, "grad_norm": 0.000363576681113678, "learning_rate": 1.8239385815078004e-07, "loss": 0.0001, "step": 280410 }, { "epoch": 1.8448320098945414, "grad_norm": 0.007681276155262096, "learning_rate": 1.8224023948947124e-07, "loss": 0.0005, "step": 280420 }, { "epoch": 1.844897798069775, "grad_norm": 0.060728905294557156, "learning_rate": 1.8208668434616384e-07, "loss": 0.0002, "step": 280430 }, { "epoch": 1.8449635862450084, "grad_norm": 0.04232163757554577, "learning_rate": 1.819331927228829e-07, "loss": 0.0007, "step": 280440 }, { "epoch": 1.8450293744202417, "grad_norm": 0.002930892609881064, "learning_rate": 1.8177976462165127e-07, "loss": 0.0003, "step": 280450 }, { "epoch": 1.845095162595475, "grad_norm": 0.02874256542132031, "learning_rate": 1.8162640004449227e-07, "loss": 0.0003, "step": 280460 }, { "epoch": 1.8451609507707085, "grad_norm": 0.04569739508007809, "learning_rate": 1.8147309899342767e-07, "loss": 0.0005, "step": 280470 }, { "epoch": 1.8452267389459418, "grad_norm": 0.006883621334720988, "learning_rate": 1.813198614704792e-07, "loss": 0.0001, "step": 280480 }, { "epoch": 1.8452925271211753, "grad_norm": 0.008623662095339251, "learning_rate": 1.8116668747766575e-07, "loss": 0.0002, "step": 280490 }, { "epoch": 1.8453583152964086, "grad_norm": 0.08119537013770421, "learning_rate": 1.810135770170085e-07, "loss": 0.0004, "step": 280500 }, { "epoch": 1.8454241034716419, "grad_norm": 0.027097711591016944, "learning_rate": 1.8086053009052528e-07, "loss": 0.0003, "step": 280510 }, { "epoch": 1.8454898916468754, "grad_norm": 0.001587895316208024, "learning_rate": 1.8070754670023338e-07, "loss": 0.0006, "step": 280520 }, { "epoch": 1.8455556798221089, "grad_norm": 0.019285750098399567, "learning_rate": 1.8055462684815062e-07, "loss": 0.0005, "step": 280530 }, { "epoch": 1.8456214679973422, "grad_norm": 0.034407478694522965, "learning_rate": 1.8040177053629204e-07, "loss": 0.0003, "step": 280540 }, { "epoch": 1.8456872561725755, "grad_norm": 0.0017858857427460494, "learning_rate": 1.8024897776667439e-07, "loss": 0.0003, "step": 280550 }, { "epoch": 1.845753044347809, "grad_norm": 0.014142435169609105, "learning_rate": 1.8009624854131102e-07, "loss": 0.0003, "step": 280560 }, { "epoch": 1.8458188325230425, "grad_norm": 0.0006853281722003137, "learning_rate": 1.7994358286221647e-07, "loss": 0.0003, "step": 280570 }, { "epoch": 1.8458846206982757, "grad_norm": 0.08661220792838116, "learning_rate": 1.7979098073140245e-07, "loss": 0.0009, "step": 280580 }, { "epoch": 1.845950408873509, "grad_norm": 0.0004909794696229272, "learning_rate": 1.796384421508812e-07, "loss": 0.0002, "step": 280590 }, { "epoch": 1.8460161970487423, "grad_norm": 0.028385136734351123, "learning_rate": 1.7948596712266454e-07, "loss": 0.0006, "step": 280600 }, { "epoch": 1.8460819852239758, "grad_norm": 0.0008814271454501651, "learning_rate": 1.793335556487613e-07, "loss": 0.0004, "step": 280610 }, { "epoch": 1.8461477733992093, "grad_norm": 0.018729739780127784, "learning_rate": 1.791812077311822e-07, "loss": 0.0011, "step": 280620 }, { "epoch": 1.8462135615744426, "grad_norm": 0.015885949816096017, "learning_rate": 1.7902892337193555e-07, "loss": 0.0008, "step": 280630 }, { "epoch": 1.846279349749676, "grad_norm": 0.10609304318384064, "learning_rate": 1.788767025730287e-07, "loss": 0.0003, "step": 280640 }, { "epoch": 1.8463451379249094, "grad_norm": 0.0122155347602183, "learning_rate": 1.7872454533646888e-07, "loss": 0.0005, "step": 280650 }, { "epoch": 1.846410926100143, "grad_norm": 0.06746250004148246, "learning_rate": 1.7857245166426173e-07, "loss": 0.0007, "step": 280660 }, { "epoch": 1.8464767142753762, "grad_norm": 0.0004122002272933857, "learning_rate": 1.7842042155841232e-07, "loss": 0.0002, "step": 280670 }, { "epoch": 1.8465425024506095, "grad_norm": 0.0016001628676435937, "learning_rate": 1.782684550209257e-07, "loss": 0.0004, "step": 280680 }, { "epoch": 1.8466082906258428, "grad_norm": 0.027976737965137378, "learning_rate": 1.7811655205380586e-07, "loss": 0.0004, "step": 280690 }, { "epoch": 1.8466740788010763, "grad_norm": 0.011878642702372627, "learning_rate": 1.7796471265905445e-07, "loss": 0.0008, "step": 280700 }, { "epoch": 1.8467398669763098, "grad_norm": 0.000392085048114333, "learning_rate": 1.778129368386733e-07, "loss": 0.0003, "step": 280710 }, { "epoch": 1.846805655151543, "grad_norm": 0.004338448031275847, "learning_rate": 1.7766122459466518e-07, "loss": 0.0007, "step": 280720 }, { "epoch": 1.8468714433267763, "grad_norm": 0.03256369578592957, "learning_rate": 1.7750957592902796e-07, "loss": 0.0007, "step": 280730 }, { "epoch": 1.8469372315020098, "grad_norm": 0.03790799799240906, "learning_rate": 1.7735799084376227e-07, "loss": 0.0003, "step": 280740 }, { "epoch": 1.8470030196772433, "grad_norm": 0.02930299629153491, "learning_rate": 1.7720646934086595e-07, "loss": 0.0004, "step": 280750 }, { "epoch": 1.8470688078524766, "grad_norm": 0.035844918701922725, "learning_rate": 1.7705501142233738e-07, "loss": 0.0004, "step": 280760 }, { "epoch": 1.84713459602771, "grad_norm": 0.013449009009650474, "learning_rate": 1.769036170901728e-07, "loss": 0.0002, "step": 280770 }, { "epoch": 1.8472003842029434, "grad_norm": 0.00021622390498340284, "learning_rate": 1.7675228634636887e-07, "loss": 0.0006, "step": 280780 }, { "epoch": 1.8472661723781767, "grad_norm": 0.03780577176145359, "learning_rate": 1.7660101919292017e-07, "loss": 0.0003, "step": 280790 }, { "epoch": 1.8473319605534102, "grad_norm": 0.009816931585397707, "learning_rate": 1.7644981563182174e-07, "loss": 0.0003, "step": 280800 }, { "epoch": 1.8473977487286435, "grad_norm": 0.04183848600950448, "learning_rate": 1.7629867566506586e-07, "loss": 0.001, "step": 280810 }, { "epoch": 1.8474635369038768, "grad_norm": 0.00410080051774293, "learning_rate": 1.7614759929464652e-07, "loss": 0.0004, "step": 280820 }, { "epoch": 1.8475293250791103, "grad_norm": 0.0077594021496276405, "learning_rate": 1.7599658652255492e-07, "loss": 0.0003, "step": 280830 }, { "epoch": 1.8475951132543438, "grad_norm": 0.002714417428181897, "learning_rate": 1.7584563735078163e-07, "loss": 0.0004, "step": 280840 }, { "epoch": 1.847660901429577, "grad_norm": 0.013234566070586066, "learning_rate": 1.756947517813179e-07, "loss": 0.0003, "step": 280850 }, { "epoch": 1.8477266896048103, "grad_norm": 0.017759244241524023, "learning_rate": 1.7554392981615154e-07, "loss": 0.0007, "step": 280860 }, { "epoch": 1.8477924777800439, "grad_norm": 0.0004888574993841115, "learning_rate": 1.7539317145727264e-07, "loss": 0.0004, "step": 280870 }, { "epoch": 1.8478582659552774, "grad_norm": 0.011414414211389624, "learning_rate": 1.752424767066674e-07, "loss": 0.0002, "step": 280880 }, { "epoch": 1.8479240541305106, "grad_norm": 0.012794398773688572, "learning_rate": 1.750918455663231e-07, "loss": 0.0003, "step": 280890 }, { "epoch": 1.847989842305744, "grad_norm": 0.0456661806416477, "learning_rate": 1.7494127803822536e-07, "loss": 0.0012, "step": 280900 }, { "epoch": 1.8480556304809772, "grad_norm": 0.028319628810005354, "learning_rate": 1.7479077412436042e-07, "loss": 0.0007, "step": 280910 }, { "epoch": 1.8481214186562107, "grad_norm": 0.03146491158129568, "learning_rate": 1.7464033382671165e-07, "loss": 0.0012, "step": 280920 }, { "epoch": 1.8481872068314442, "grad_norm": 0.030312707969007452, "learning_rate": 1.7448995714726247e-07, "loss": 0.0003, "step": 280930 }, { "epoch": 1.8482529950066775, "grad_norm": 0.019575084919048857, "learning_rate": 1.7433964408799576e-07, "loss": 0.0002, "step": 280940 }, { "epoch": 1.8483187831819108, "grad_norm": 0.004653144883819192, "learning_rate": 1.7418939465089323e-07, "loss": 0.0009, "step": 280950 }, { "epoch": 1.8483845713571443, "grad_norm": 0.004780336834469127, "learning_rate": 1.7403920883793558e-07, "loss": 0.0004, "step": 280960 }, { "epoch": 1.8484503595323778, "grad_norm": 0.017365113993672656, "learning_rate": 1.7388908665110282e-07, "loss": 0.0003, "step": 280970 }, { "epoch": 1.848516147707611, "grad_norm": 0.07969690768043847, "learning_rate": 1.7373902809237454e-07, "loss": 0.0005, "step": 280980 }, { "epoch": 1.8485819358828444, "grad_norm": 0.015561866527910875, "learning_rate": 1.735890331637291e-07, "loss": 0.0002, "step": 280990 }, { "epoch": 1.8486477240580776, "grad_norm": 0.005812509818278766, "learning_rate": 1.734391018671433e-07, "loss": 0.0002, "step": 281000 }, { "epoch": 1.8487135122333112, "grad_norm": 0.043560551257987565, "learning_rate": 1.7328923420459497e-07, "loss": 0.0004, "step": 281010 }, { "epoch": 1.8487793004085447, "grad_norm": 0.05862499155159678, "learning_rate": 1.731394301780598e-07, "loss": 0.0003, "step": 281020 }, { "epoch": 1.848845088583778, "grad_norm": 0.017562832752239667, "learning_rate": 1.7298968978951115e-07, "loss": 0.0005, "step": 281030 }, { "epoch": 1.8489108767590112, "grad_norm": 0.025815683215449692, "learning_rate": 1.728400130409258e-07, "loss": 0.0013, "step": 281040 }, { "epoch": 1.8489766649342447, "grad_norm": 0.0018865051805859474, "learning_rate": 1.726903999342755e-07, "loss": 0.0012, "step": 281050 }, { "epoch": 1.8490424531094782, "grad_norm": 0.020312932952817504, "learning_rate": 1.7254085047153313e-07, "loss": 0.0004, "step": 281060 }, { "epoch": 1.8491082412847115, "grad_norm": 0.003413354219896108, "learning_rate": 1.7239136465467044e-07, "loss": 0.0004, "step": 281070 }, { "epoch": 1.8491740294599448, "grad_norm": 0.031902707104369925, "learning_rate": 1.7224194248565806e-07, "loss": 0.0007, "step": 281080 }, { "epoch": 1.8492398176351783, "grad_norm": 0.035127670655335355, "learning_rate": 1.720925839664661e-07, "loss": 0.0004, "step": 281090 }, { "epoch": 1.8493056058104116, "grad_norm": 0.0067267132480821785, "learning_rate": 1.719432890990641e-07, "loss": 0.0005, "step": 281100 }, { "epoch": 1.849371393985645, "grad_norm": 0.039021471325573, "learning_rate": 1.7179405788541993e-07, "loss": 0.0004, "step": 281110 }, { "epoch": 1.8494371821608784, "grad_norm": 0.00030427900552406424, "learning_rate": 1.7164489032750142e-07, "loss": 0.0002, "step": 281120 }, { "epoch": 1.8495029703361117, "grad_norm": 0.019479458705151106, "learning_rate": 1.7149578642727426e-07, "loss": 0.0016, "step": 281130 }, { "epoch": 1.8495687585113452, "grad_norm": 0.02198110030455424, "learning_rate": 1.7134674618670577e-07, "loss": 0.0005, "step": 281140 }, { "epoch": 1.8496345466865787, "grad_norm": 0.06285033511459172, "learning_rate": 1.711977696077599e-07, "loss": 0.0011, "step": 281150 }, { "epoch": 1.849700334861812, "grad_norm": 0.077953971543816, "learning_rate": 1.7104885669240011e-07, "loss": 0.0008, "step": 281160 }, { "epoch": 1.8497661230370452, "grad_norm": 0.009172043130748049, "learning_rate": 1.709000074425915e-07, "loss": 0.0007, "step": 281170 }, { "epoch": 1.8498319112122787, "grad_norm": 0.010181147068771788, "learning_rate": 1.7075122186029581e-07, "loss": 0.0002, "step": 281180 }, { "epoch": 1.8498976993875123, "grad_norm": 0.009013042572176161, "learning_rate": 1.7060249994747368e-07, "loss": 0.0003, "step": 281190 }, { "epoch": 1.8499634875627455, "grad_norm": 0.0009854141669698507, "learning_rate": 1.7045384170608746e-07, "loss": 0.0004, "step": 281200 }, { "epoch": 1.8500292757379788, "grad_norm": 0.08819119937259656, "learning_rate": 1.703052471380956e-07, "loss": 0.0006, "step": 281210 }, { "epoch": 1.850095063913212, "grad_norm": 0.016939732411148233, "learning_rate": 1.7015671624545872e-07, "loss": 0.0008, "step": 281220 }, { "epoch": 1.8501608520884456, "grad_norm": 0.04673595133686645, "learning_rate": 1.7000824903013357e-07, "loss": 0.0006, "step": 281230 }, { "epoch": 1.8502266402636791, "grad_norm": 0.007849161730261475, "learning_rate": 1.6985984549407808e-07, "loss": 0.0005, "step": 281240 }, { "epoch": 1.8502924284389124, "grad_norm": 0.01177610281660412, "learning_rate": 1.6971150563924955e-07, "loss": 0.0002, "step": 281250 }, { "epoch": 1.8503582166141457, "grad_norm": 0.10228360263027729, "learning_rate": 1.695632294676025e-07, "loss": 0.0006, "step": 281260 }, { "epoch": 1.8504240047893792, "grad_norm": 0.061699638698939546, "learning_rate": 1.6941501698109263e-07, "loss": 0.0003, "step": 281270 }, { "epoch": 1.8504897929646127, "grad_norm": 0.029685231789714037, "learning_rate": 1.6926686818167393e-07, "loss": 0.0005, "step": 281280 }, { "epoch": 1.850555581139846, "grad_norm": 0.055423683006466114, "learning_rate": 1.691187830712987e-07, "loss": 0.0004, "step": 281290 }, { "epoch": 1.8506213693150793, "grad_norm": 0.021732907443573143, "learning_rate": 1.6897076165192094e-07, "loss": 0.0002, "step": 281300 }, { "epoch": 1.8506871574903125, "grad_norm": 0.056219975522354367, "learning_rate": 1.6882280392549134e-07, "loss": 0.0005, "step": 281310 }, { "epoch": 1.850752945665546, "grad_norm": 0.022435657073485465, "learning_rate": 1.6867490989395996e-07, "loss": 0.0002, "step": 281320 }, { "epoch": 1.8508187338407796, "grad_norm": 0.008229917946600633, "learning_rate": 1.6852707955927805e-07, "loss": 0.0006, "step": 281330 }, { "epoch": 1.8508845220160128, "grad_norm": 0.0682404270741094, "learning_rate": 1.683793129233935e-07, "loss": 0.0005, "step": 281340 }, { "epoch": 1.8509503101912461, "grad_norm": 0.017714937689306924, "learning_rate": 1.682316099882547e-07, "loss": 0.0007, "step": 281350 }, { "epoch": 1.8510160983664796, "grad_norm": 0.0007437420233894019, "learning_rate": 1.6808397075580962e-07, "loss": 0.0004, "step": 281360 }, { "epoch": 1.8510818865417131, "grad_norm": 0.014165828685289586, "learning_rate": 1.6793639522800386e-07, "loss": 0.0005, "step": 281370 }, { "epoch": 1.8511476747169464, "grad_norm": 0.007191905480902648, "learning_rate": 1.6778888340678368e-07, "loss": 0.0001, "step": 281380 }, { "epoch": 1.8512134628921797, "grad_norm": 0.027215564267897153, "learning_rate": 1.676414352940936e-07, "loss": 0.0006, "step": 281390 }, { "epoch": 1.851279251067413, "grad_norm": 0.019182174991499688, "learning_rate": 1.6749405089187765e-07, "loss": 0.0003, "step": 281400 }, { "epoch": 1.8513450392426465, "grad_norm": 0.005079230671641877, "learning_rate": 1.6734673020207927e-07, "loss": 0.0002, "step": 281410 }, { "epoch": 1.85141082741788, "grad_norm": 0.004323381166910863, "learning_rate": 1.6719947322663966e-07, "loss": 0.0003, "step": 281420 }, { "epoch": 1.8514766155931133, "grad_norm": 0.013022730854026224, "learning_rate": 1.6705227996750172e-07, "loss": 0.0004, "step": 281430 }, { "epoch": 1.8515424037683466, "grad_norm": 0.03153388392115852, "learning_rate": 1.669051504266056e-07, "loss": 0.0003, "step": 281440 }, { "epoch": 1.85160819194358, "grad_norm": 0.00170594575656825, "learning_rate": 1.6675808460589082e-07, "loss": 0.0006, "step": 281450 }, { "epoch": 1.8516739801188136, "grad_norm": 0.009630538628106717, "learning_rate": 1.6661108250729697e-07, "loss": 0.0002, "step": 281460 }, { "epoch": 1.8517397682940469, "grad_norm": 0.00902171435887038, "learning_rate": 1.664641441327608e-07, "loss": 0.0006, "step": 281470 }, { "epoch": 1.8518055564692801, "grad_norm": 0.03019784410668145, "learning_rate": 1.6631726948422077e-07, "loss": 0.0011, "step": 281480 }, { "epoch": 1.8518713446445136, "grad_norm": 0.04355251526082831, "learning_rate": 1.6617045856361313e-07, "loss": 0.0003, "step": 281490 }, { "epoch": 1.8519371328197471, "grad_norm": 0.036963739928768086, "learning_rate": 1.6602371137287244e-07, "loss": 0.0003, "step": 281500 }, { "epoch": 1.8520029209949804, "grad_norm": 0.0005352531100992599, "learning_rate": 1.658770279139349e-07, "loss": 0.0012, "step": 281510 }, { "epoch": 1.8520687091702137, "grad_norm": 0.029373441475925285, "learning_rate": 1.6573040818873343e-07, "loss": 0.0004, "step": 281520 }, { "epoch": 1.852134497345447, "grad_norm": 0.06297029174087498, "learning_rate": 1.6558385219920148e-07, "loss": 0.0007, "step": 281530 }, { "epoch": 1.8522002855206805, "grad_norm": 0.0014858459915893967, "learning_rate": 1.654373599472714e-07, "loss": 0.0004, "step": 281540 }, { "epoch": 1.852266073695914, "grad_norm": 0.001224070399444533, "learning_rate": 1.652909314348733e-07, "loss": 0.0002, "step": 281550 }, { "epoch": 1.8523318618711473, "grad_norm": 0.0027979363656566027, "learning_rate": 1.6514456666393952e-07, "loss": 0.0001, "step": 281560 }, { "epoch": 1.8523976500463806, "grad_norm": 0.01699642132533684, "learning_rate": 1.6499826563639908e-07, "loss": 0.0003, "step": 281570 }, { "epoch": 1.852463438221614, "grad_norm": 0.005454347441090645, "learning_rate": 1.6485202835418103e-07, "loss": 0.0002, "step": 281580 }, { "epoch": 1.8525292263968476, "grad_norm": 0.005893926399007671, "learning_rate": 1.6470585481921265e-07, "loss": 0.0008, "step": 281590 }, { "epoch": 1.8525950145720809, "grad_norm": 0.01396950389644431, "learning_rate": 1.6455974503342131e-07, "loss": 0.0006, "step": 281600 }, { "epoch": 1.8526608027473142, "grad_norm": 0.030860708330382926, "learning_rate": 1.6441369899873382e-07, "loss": 0.0014, "step": 281610 }, { "epoch": 1.8527265909225474, "grad_norm": 0.020035238370084606, "learning_rate": 1.642677167170753e-07, "loss": 0.0003, "step": 281620 }, { "epoch": 1.852792379097781, "grad_norm": 0.04579267644704933, "learning_rate": 1.641217981903709e-07, "loss": 0.0004, "step": 281630 }, { "epoch": 1.8528581672730144, "grad_norm": 0.019076476889274288, "learning_rate": 1.6397594342054401e-07, "loss": 0.0004, "step": 281640 }, { "epoch": 1.8529239554482477, "grad_norm": 0.02477515620024011, "learning_rate": 1.6383015240951706e-07, "loss": 0.0002, "step": 281650 }, { "epoch": 1.852989743623481, "grad_norm": 0.03019360548283349, "learning_rate": 1.6368442515921345e-07, "loss": 0.0002, "step": 281660 }, { "epoch": 1.8530555317987145, "grad_norm": 0.013840579681812508, "learning_rate": 1.6353876167155337e-07, "loss": 0.0004, "step": 281670 }, { "epoch": 1.853121319973948, "grad_norm": 0.017787228311677422, "learning_rate": 1.6339316194845745e-07, "loss": 0.0002, "step": 281680 }, { "epoch": 1.8531871081491813, "grad_norm": 0.002423097298445456, "learning_rate": 1.6324762599184585e-07, "loss": 0.0003, "step": 281690 }, { "epoch": 1.8532528963244146, "grad_norm": 0.033648422465572286, "learning_rate": 1.6310215380363704e-07, "loss": 0.0003, "step": 281700 }, { "epoch": 1.8533186844996479, "grad_norm": 0.034761096092231714, "learning_rate": 1.629567453857489e-07, "loss": 0.0003, "step": 281710 }, { "epoch": 1.8533844726748814, "grad_norm": 0.05400182632362783, "learning_rate": 1.6281140074009828e-07, "loss": 0.0005, "step": 281720 }, { "epoch": 1.8534502608501149, "grad_norm": 0.04607015700532648, "learning_rate": 1.6266611986860192e-07, "loss": 0.0006, "step": 281730 }, { "epoch": 1.8535160490253482, "grad_norm": 0.012275745175497722, "learning_rate": 1.62520902773175e-07, "loss": 0.0014, "step": 281740 }, { "epoch": 1.8535818372005815, "grad_norm": 0.05977540425237022, "learning_rate": 1.623757494557321e-07, "loss": 0.0006, "step": 281750 }, { "epoch": 1.853647625375815, "grad_norm": 0.0396020783877763, "learning_rate": 1.6223065991818665e-07, "loss": 0.0005, "step": 281760 }, { "epoch": 1.8537134135510485, "grad_norm": 0.023991041335079157, "learning_rate": 1.6208563416245217e-07, "loss": 0.0003, "step": 281770 }, { "epoch": 1.8537792017262817, "grad_norm": 0.002507537711931271, "learning_rate": 1.6194067219043986e-07, "loss": 0.0005, "step": 281780 }, { "epoch": 1.853844989901515, "grad_norm": 0.001177646994572057, "learning_rate": 1.6179577400406155e-07, "loss": 0.0003, "step": 281790 }, { "epoch": 1.8539107780767485, "grad_norm": 0.0383667533780375, "learning_rate": 1.6165093960522737e-07, "loss": 0.0005, "step": 281800 }, { "epoch": 1.8539765662519818, "grad_norm": 0.04483227778850733, "learning_rate": 1.615061689958469e-07, "loss": 0.0003, "step": 281810 }, { "epoch": 1.8540423544272153, "grad_norm": 0.02872674563773954, "learning_rate": 1.6136146217782866e-07, "loss": 0.0006, "step": 281820 }, { "epoch": 1.8541081426024486, "grad_norm": 0.0020075127090155766, "learning_rate": 1.6121681915308107e-07, "loss": 0.0007, "step": 281830 }, { "epoch": 1.854173930777682, "grad_norm": 0.052496459852560504, "learning_rate": 1.610722399235104e-07, "loss": 0.0004, "step": 281840 }, { "epoch": 1.8542397189529154, "grad_norm": 0.01738631284607856, "learning_rate": 1.6092772449102288e-07, "loss": 0.0005, "step": 281850 }, { "epoch": 1.854305507128149, "grad_norm": 0.002982481094701967, "learning_rate": 1.6078327285752427e-07, "loss": 0.0003, "step": 281860 }, { "epoch": 1.8543712953033822, "grad_norm": 0.06360313892574952, "learning_rate": 1.6063888502491854e-07, "loss": 0.0004, "step": 281870 }, { "epoch": 1.8544370834786155, "grad_norm": 0.01967300577446816, "learning_rate": 1.6049456099510974e-07, "loss": 0.0002, "step": 281880 }, { "epoch": 1.854502871653849, "grad_norm": 0.032622317041429136, "learning_rate": 1.603503007700008e-07, "loss": 0.0005, "step": 281890 }, { "epoch": 1.8545686598290825, "grad_norm": 0.026595762727944963, "learning_rate": 1.6020610435149298e-07, "loss": 0.0006, "step": 281900 }, { "epoch": 1.8546344480043158, "grad_norm": 0.011390224988426387, "learning_rate": 1.6006197174148808e-07, "loss": 0.0004, "step": 281910 }, { "epoch": 1.854700236179549, "grad_norm": 0.001728405389128497, "learning_rate": 1.599179029418857e-07, "loss": 0.0003, "step": 281920 }, { "epoch": 1.8547660243547823, "grad_norm": 0.04132385078149365, "learning_rate": 1.5977389795458541e-07, "loss": 0.0022, "step": 281930 }, { "epoch": 1.8548318125300158, "grad_norm": 0.01320713021832767, "learning_rate": 1.5962995678148573e-07, "loss": 0.0002, "step": 281940 }, { "epoch": 1.8548976007052493, "grad_norm": 0.00048280998833784695, "learning_rate": 1.5948607942448513e-07, "loss": 0.0003, "step": 281950 }, { "epoch": 1.8549633888804826, "grad_norm": 0.009600686732412269, "learning_rate": 1.5934226588548041e-07, "loss": 0.0003, "step": 281960 }, { "epoch": 1.855029177055716, "grad_norm": 0.017219411376225204, "learning_rate": 1.5919851616636673e-07, "loss": 0.0002, "step": 281970 }, { "epoch": 1.8550949652309494, "grad_norm": 0.027087837279527897, "learning_rate": 1.5905483026904033e-07, "loss": 0.0004, "step": 281980 }, { "epoch": 1.855160753406183, "grad_norm": 0.057961686443912384, "learning_rate": 1.5891120819539475e-07, "loss": 0.0005, "step": 281990 }, { "epoch": 1.8552265415814162, "grad_norm": 0.06534048651544563, "learning_rate": 1.5876764994732396e-07, "loss": 0.0005, "step": 282000 }, { "epoch": 1.8552923297566495, "grad_norm": 0.010528287570395426, "learning_rate": 1.5862415552672038e-07, "loss": 0.0002, "step": 282010 }, { "epoch": 1.8553581179318828, "grad_norm": 0.027589853122395834, "learning_rate": 1.5848072493547585e-07, "loss": 0.0004, "step": 282020 }, { "epoch": 1.8554239061071163, "grad_norm": 0.06502309845985045, "learning_rate": 1.5833735817548212e-07, "loss": 0.0004, "step": 282030 }, { "epoch": 1.8554896942823498, "grad_norm": 0.0006883626404992427, "learning_rate": 1.581940552486283e-07, "loss": 0.0009, "step": 282040 }, { "epoch": 1.855555482457583, "grad_norm": 4.884260102044431e-05, "learning_rate": 1.580508161568045e-07, "loss": 0.0005, "step": 282050 }, { "epoch": 1.8556212706328163, "grad_norm": 0.007719560143684601, "learning_rate": 1.579076409018987e-07, "loss": 0.0003, "step": 282060 }, { "epoch": 1.8556870588080498, "grad_norm": 0.00833457317807304, "learning_rate": 1.5776452948579825e-07, "loss": 0.0002, "step": 282070 }, { "epoch": 1.8557528469832834, "grad_norm": 0.052067912666380084, "learning_rate": 1.5762148191039107e-07, "loss": 0.0003, "step": 282080 }, { "epoch": 1.8558186351585166, "grad_norm": 0.04766417798535569, "learning_rate": 1.5747849817756233e-07, "loss": 0.0003, "step": 282090 }, { "epoch": 1.85588442333375, "grad_norm": 0.009686975558322833, "learning_rate": 1.5733557828919778e-07, "loss": 0.0003, "step": 282100 }, { "epoch": 1.8559502115089834, "grad_norm": 0.00843241081740884, "learning_rate": 1.5719272224718086e-07, "loss": 0.0005, "step": 282110 }, { "epoch": 1.8560159996842167, "grad_norm": 0.04328990650391443, "learning_rate": 1.5704993005339508e-07, "loss": 0.0014, "step": 282120 }, { "epoch": 1.8560817878594502, "grad_norm": 0.019897942315576532, "learning_rate": 1.5690720170972396e-07, "loss": 0.0006, "step": 282130 }, { "epoch": 1.8561475760346835, "grad_norm": 0.056665817890854536, "learning_rate": 1.5676453721804817e-07, "loss": 0.0004, "step": 282140 }, { "epoch": 1.8562133642099168, "grad_norm": 0.014370944621339456, "learning_rate": 1.566219365802496e-07, "loss": 0.0003, "step": 282150 }, { "epoch": 1.8562791523851503, "grad_norm": 0.010758059949991029, "learning_rate": 1.5647939979820727e-07, "loss": 0.0015, "step": 282160 }, { "epoch": 1.8563449405603838, "grad_norm": 0.006172587160681187, "learning_rate": 1.5633692687380075e-07, "loss": 0.0002, "step": 282170 }, { "epoch": 1.856410728735617, "grad_norm": 0.0756932024718077, "learning_rate": 1.5619451780890916e-07, "loss": 0.0003, "step": 282180 }, { "epoch": 1.8564765169108504, "grad_norm": 0.012492907180598145, "learning_rate": 1.5605217260540927e-07, "loss": 0.0003, "step": 282190 }, { "epoch": 1.8565423050860839, "grad_norm": 0.029231134984800258, "learning_rate": 1.5590989126517743e-07, "loss": 0.0002, "step": 282200 }, { "epoch": 1.8566080932613174, "grad_norm": 0.020045971939068413, "learning_rate": 1.557676737900904e-07, "loss": 0.0003, "step": 282210 }, { "epoch": 1.8566738814365507, "grad_norm": 0.016483625339274815, "learning_rate": 1.5562552018202283e-07, "loss": 0.0002, "step": 282220 }, { "epoch": 1.856739669611784, "grad_norm": 0.038850669180595085, "learning_rate": 1.554834304428493e-07, "loss": 0.0005, "step": 282230 }, { "epoch": 1.8568054577870172, "grad_norm": 0.00013199361937029538, "learning_rate": 1.5534140457444226e-07, "loss": 0.0002, "step": 282240 }, { "epoch": 1.8568712459622507, "grad_norm": 0.0022181789683736026, "learning_rate": 1.551994425786746e-07, "loss": 0.0005, "step": 282250 }, { "epoch": 1.8569370341374842, "grad_norm": 0.019546284005832797, "learning_rate": 1.5505754445741873e-07, "loss": 0.0003, "step": 282260 }, { "epoch": 1.8570028223127175, "grad_norm": 0.026773875203161025, "learning_rate": 1.5491571021254425e-07, "loss": 0.0019, "step": 282270 }, { "epoch": 1.8570686104879508, "grad_norm": 0.022896026363349878, "learning_rate": 1.5477393984592193e-07, "loss": 0.0002, "step": 282280 }, { "epoch": 1.8571343986631843, "grad_norm": 0.010321212394944532, "learning_rate": 1.5463223335942025e-07, "loss": 0.0006, "step": 282290 }, { "epoch": 1.8572001868384178, "grad_norm": 0.043455699825402816, "learning_rate": 1.5449059075490824e-07, "loss": 0.0005, "step": 282300 }, { "epoch": 1.857265975013651, "grad_norm": 0.008421285403309985, "learning_rate": 1.5434901203425279e-07, "loss": 0.0003, "step": 282310 }, { "epoch": 1.8573317631888844, "grad_norm": 0.02418482580166687, "learning_rate": 1.5420749719932014e-07, "loss": 0.0001, "step": 282320 }, { "epoch": 1.8573975513641177, "grad_norm": 0.02112847248767354, "learning_rate": 1.5406604625197664e-07, "loss": 0.0002, "step": 282330 }, { "epoch": 1.8574633395393512, "grad_norm": 0.028826225645256994, "learning_rate": 1.5392465919408739e-07, "loss": 0.0006, "step": 282340 }, { "epoch": 1.8575291277145847, "grad_norm": 0.01015542987920561, "learning_rate": 1.5378333602751594e-07, "loss": 0.0003, "step": 282350 }, { "epoch": 1.857594915889818, "grad_norm": 0.04224094915948795, "learning_rate": 1.5364207675412635e-07, "loss": 0.0009, "step": 282360 }, { "epoch": 1.8576607040650512, "grad_norm": 0.0021009044696735334, "learning_rate": 1.535008813757799e-07, "loss": 0.0005, "step": 282370 }, { "epoch": 1.8577264922402847, "grad_norm": 0.010912229596031553, "learning_rate": 1.53359749894339e-07, "loss": 0.0003, "step": 282380 }, { "epoch": 1.8577922804155182, "grad_norm": 0.04439394581988945, "learning_rate": 1.5321868231166326e-07, "loss": 0.0005, "step": 282390 }, { "epoch": 1.8578580685907515, "grad_norm": 0.01781969679558284, "learning_rate": 1.5307767862961398e-07, "loss": 0.0007, "step": 282400 }, { "epoch": 1.8579238567659848, "grad_norm": 0.015532550037657019, "learning_rate": 1.5293673885004912e-07, "loss": 0.0003, "step": 282410 }, { "epoch": 1.857989644941218, "grad_norm": 0.012153951146769308, "learning_rate": 1.5279586297482718e-07, "loss": 0.0014, "step": 282420 }, { "epoch": 1.8580554331164516, "grad_norm": 0.02071648567807919, "learning_rate": 1.5265505100580558e-07, "loss": 0.0009, "step": 282430 }, { "epoch": 1.858121221291685, "grad_norm": 0.020896453163012522, "learning_rate": 1.525143029448406e-07, "loss": 0.0006, "step": 282440 }, { "epoch": 1.8581870094669184, "grad_norm": 0.03867119898161529, "learning_rate": 1.5237361879378798e-07, "loss": 0.0004, "step": 282450 }, { "epoch": 1.8582527976421517, "grad_norm": 0.0195949591445421, "learning_rate": 1.5223299855450237e-07, "loss": 0.0003, "step": 282460 }, { "epoch": 1.8583185858173852, "grad_norm": 0.038789918639596806, "learning_rate": 1.5209244222883723e-07, "loss": 0.0007, "step": 282470 }, { "epoch": 1.8583843739926187, "grad_norm": 0.01697831960934446, "learning_rate": 1.5195194981864724e-07, "loss": 0.0006, "step": 282480 }, { "epoch": 1.858450162167852, "grad_norm": 0.011835439937676684, "learning_rate": 1.518115213257837e-07, "loss": 0.0004, "step": 282490 }, { "epoch": 1.8585159503430853, "grad_norm": 0.0054953657152412815, "learning_rate": 1.5167115675209786e-07, "loss": 0.0003, "step": 282500 }, { "epoch": 1.8585817385183188, "grad_norm": 0.010385662089098562, "learning_rate": 1.5153085609944052e-07, "loss": 0.0004, "step": 282510 }, { "epoch": 1.8586475266935523, "grad_norm": 0.00032100039267930443, "learning_rate": 1.5139061936966183e-07, "loss": 0.0002, "step": 282520 }, { "epoch": 1.8587133148687855, "grad_norm": 0.012537391007708657, "learning_rate": 1.5125044656460975e-07, "loss": 0.0003, "step": 282530 }, { "epoch": 1.8587791030440188, "grad_norm": 0.03034021007897894, "learning_rate": 1.5111033768613336e-07, "loss": 0.0006, "step": 282540 }, { "epoch": 1.8588448912192521, "grad_norm": 0.0014428064235124255, "learning_rate": 1.50970292736079e-07, "loss": 0.0007, "step": 282550 }, { "epoch": 1.8589106793944856, "grad_norm": 0.020821393308616588, "learning_rate": 1.508303117162935e-07, "loss": 0.0002, "step": 282560 }, { "epoch": 1.8589764675697191, "grad_norm": 0.009359731834090517, "learning_rate": 1.5069039462862257e-07, "loss": 0.0004, "step": 282570 }, { "epoch": 1.8590422557449524, "grad_norm": 0.025188304993989562, "learning_rate": 1.5055054147491032e-07, "loss": 0.0007, "step": 282580 }, { "epoch": 1.8591080439201857, "grad_norm": 0.004045767238481709, "learning_rate": 1.5041075225700143e-07, "loss": 0.0003, "step": 282590 }, { "epoch": 1.8591738320954192, "grad_norm": 0.006876270126228335, "learning_rate": 1.5027102697673767e-07, "loss": 0.0002, "step": 282600 }, { "epoch": 1.8592396202706527, "grad_norm": 0.001303506324820541, "learning_rate": 1.5013136563596264e-07, "loss": 0.0001, "step": 282610 }, { "epoch": 1.859305408445886, "grad_norm": 0.04487916925082127, "learning_rate": 1.4999176823651652e-07, "loss": 0.0003, "step": 282620 }, { "epoch": 1.8593711966211193, "grad_norm": 0.0009124175269433687, "learning_rate": 1.4985223478024057e-07, "loss": 0.0004, "step": 282630 }, { "epoch": 1.8594369847963526, "grad_norm": 0.021412959258178627, "learning_rate": 1.4971276526897339e-07, "loss": 0.0003, "step": 282640 }, { "epoch": 1.859502772971586, "grad_norm": 0.008897490595210238, "learning_rate": 1.4957335970455511e-07, "loss": 0.0005, "step": 282650 }, { "epoch": 1.8595685611468196, "grad_norm": 0.00032280735621821545, "learning_rate": 1.4943401808882263e-07, "loss": 0.0003, "step": 282660 }, { "epoch": 1.8596343493220528, "grad_norm": 0.03598353380001236, "learning_rate": 1.4929474042361335e-07, "loss": 0.0005, "step": 282670 }, { "epoch": 1.8597001374972861, "grad_norm": 0.035151820296704814, "learning_rate": 1.491555267107636e-07, "loss": 0.0003, "step": 282680 }, { "epoch": 1.8597659256725196, "grad_norm": 0.13552344375159495, "learning_rate": 1.4901637695210858e-07, "loss": 0.0006, "step": 282690 }, { "epoch": 1.8598317138477531, "grad_norm": 0.035321545422816944, "learning_rate": 1.488772911494829e-07, "loss": 0.0004, "step": 282700 }, { "epoch": 1.8598975020229864, "grad_norm": 0.006685761574245529, "learning_rate": 1.4873826930472067e-07, "loss": 0.0006, "step": 282710 }, { "epoch": 1.8599632901982197, "grad_norm": 0.0049222964372501905, "learning_rate": 1.4859931141965489e-07, "loss": 0.0003, "step": 282720 }, { "epoch": 1.860029078373453, "grad_norm": 0.001598677955078326, "learning_rate": 1.4846041749611683e-07, "loss": 0.0004, "step": 282730 }, { "epoch": 1.8600948665486865, "grad_norm": 0.10969450552659504, "learning_rate": 1.483215875359384e-07, "loss": 0.0003, "step": 282740 }, { "epoch": 1.86016065472392, "grad_norm": 0.07064337278578374, "learning_rate": 1.4818282154094977e-07, "loss": 0.0003, "step": 282750 }, { "epoch": 1.8602264428991533, "grad_norm": 0.006067472963948905, "learning_rate": 1.4804411951298002e-07, "loss": 0.0005, "step": 282760 }, { "epoch": 1.8602922310743866, "grad_norm": 0.025132785579778074, "learning_rate": 1.479054814538583e-07, "loss": 0.0006, "step": 282770 }, { "epoch": 1.86035801924962, "grad_norm": 0.037635128747739006, "learning_rate": 1.4776690736541198e-07, "loss": 0.0002, "step": 282780 }, { "epoch": 1.8604238074248536, "grad_norm": 0.02018629542612223, "learning_rate": 1.4762839724946853e-07, "loss": 0.0006, "step": 282790 }, { "epoch": 1.8604895956000869, "grad_norm": 0.040499820050388526, "learning_rate": 1.474899511078537e-07, "loss": 0.0012, "step": 282800 }, { "epoch": 1.8605553837753201, "grad_norm": 0.06908784845198639, "learning_rate": 1.4735156894239322e-07, "loss": 0.0002, "step": 282810 }, { "epoch": 1.8606211719505537, "grad_norm": 0.15742509211263814, "learning_rate": 1.4721325075491122e-07, "loss": 0.0004, "step": 282820 }, { "epoch": 1.860686960125787, "grad_norm": 0.010908977161641867, "learning_rate": 1.4707499654723122e-07, "loss": 0.0002, "step": 282830 }, { "epoch": 1.8607527483010204, "grad_norm": 0.05883423364698044, "learning_rate": 1.4693680632117623e-07, "loss": 0.0003, "step": 282840 }, { "epoch": 1.8608185364762537, "grad_norm": 0.00032789648378491815, "learning_rate": 1.4679868007856812e-07, "loss": 0.0002, "step": 282850 }, { "epoch": 1.860884324651487, "grad_norm": 0.06033315751689005, "learning_rate": 1.4666061782122764e-07, "loss": 0.0006, "step": 282860 }, { "epoch": 1.8609501128267205, "grad_norm": 0.00419256517025696, "learning_rate": 1.4652261955097557e-07, "loss": 0.0004, "step": 282870 }, { "epoch": 1.861015901001954, "grad_norm": 0.01199026849975519, "learning_rate": 1.4638468526963101e-07, "loss": 0.0004, "step": 282880 }, { "epoch": 1.8610816891771873, "grad_norm": 0.018759163202156266, "learning_rate": 1.462468149790125e-07, "loss": 0.0004, "step": 282890 }, { "epoch": 1.8611474773524206, "grad_norm": 0.006399841316948809, "learning_rate": 1.4610900868093802e-07, "loss": 0.0002, "step": 282900 }, { "epoch": 1.861213265527654, "grad_norm": 0.007340512677301719, "learning_rate": 1.4597126637722392e-07, "loss": 0.0004, "step": 282910 }, { "epoch": 1.8612790537028876, "grad_norm": 0.029273225630631904, "learning_rate": 1.458335880696865e-07, "loss": 0.0007, "step": 282920 }, { "epoch": 1.8613448418781209, "grad_norm": 0.03502618483857514, "learning_rate": 1.4569597376014043e-07, "loss": 0.0006, "step": 282930 }, { "epoch": 1.8614106300533542, "grad_norm": 0.014054947012124858, "learning_rate": 1.4555842345040094e-07, "loss": 0.0002, "step": 282940 }, { "epoch": 1.8614764182285874, "grad_norm": 0.0049749639981008405, "learning_rate": 1.45420937142281e-07, "loss": 0.0002, "step": 282950 }, { "epoch": 1.861542206403821, "grad_norm": 0.00019415830961257505, "learning_rate": 1.4528351483759306e-07, "loss": 0.0007, "step": 282960 }, { "epoch": 1.8616079945790545, "grad_norm": 0.011082477825492189, "learning_rate": 1.4514615653814955e-07, "loss": 0.0006, "step": 282970 }, { "epoch": 1.8616737827542877, "grad_norm": 0.033840058250601227, "learning_rate": 1.450088622457607e-07, "loss": 0.0003, "step": 282980 }, { "epoch": 1.861739570929521, "grad_norm": 0.018960138660465527, "learning_rate": 1.448716319622373e-07, "loss": 0.0004, "step": 282990 }, { "epoch": 1.8618053591047545, "grad_norm": 0.017070612206428824, "learning_rate": 1.447344656893884e-07, "loss": 0.0007, "step": 283000 }, { "epoch": 1.861871147279988, "grad_norm": 0.004467979888735823, "learning_rate": 1.4459736342902208e-07, "loss": 0.0003, "step": 283010 }, { "epoch": 1.8619369354552213, "grad_norm": 0.0008810856705621393, "learning_rate": 1.4446032518294572e-07, "loss": 0.0004, "step": 283020 }, { "epoch": 1.8620027236304546, "grad_norm": 0.00394573093674889, "learning_rate": 1.4432335095296735e-07, "loss": 0.0006, "step": 283030 }, { "epoch": 1.8620685118056879, "grad_norm": 0.010941273944160347, "learning_rate": 1.4418644074089106e-07, "loss": 0.0002, "step": 283040 }, { "epoch": 1.8621342999809214, "grad_norm": 0.0006932415169768254, "learning_rate": 1.440495945485232e-07, "loss": 0.0003, "step": 283050 }, { "epoch": 1.862200088156155, "grad_norm": 0.05337704659174969, "learning_rate": 1.4391281237766786e-07, "loss": 0.0007, "step": 283060 }, { "epoch": 1.8622658763313882, "grad_norm": 0.010980567769109759, "learning_rate": 1.437760942301275e-07, "loss": 0.0003, "step": 283070 }, { "epoch": 1.8623316645066215, "grad_norm": 0.033423951696432905, "learning_rate": 1.436394401077057e-07, "loss": 0.0005, "step": 283080 }, { "epoch": 1.862397452681855, "grad_norm": 0.013448275963206576, "learning_rate": 1.4350285001220264e-07, "loss": 0.0007, "step": 283090 }, { "epoch": 1.8624632408570885, "grad_norm": 0.037733564818075, "learning_rate": 1.4336632394542138e-07, "loss": 0.0002, "step": 283100 }, { "epoch": 1.8625290290323218, "grad_norm": 0.001333466919441363, "learning_rate": 1.432298619091599e-07, "loss": 0.0004, "step": 283110 }, { "epoch": 1.862594817207555, "grad_norm": 0.04986252004784079, "learning_rate": 1.4309346390521893e-07, "loss": 0.0005, "step": 283120 }, { "epoch": 1.8626606053827885, "grad_norm": 0.006574351640684681, "learning_rate": 1.4295712993539544e-07, "loss": 0.0002, "step": 283130 }, { "epoch": 1.8627263935580218, "grad_norm": 0.0005382308741512214, "learning_rate": 1.4282086000148742e-07, "loss": 0.0003, "step": 283140 }, { "epoch": 1.8627921817332553, "grad_norm": 0.09712859961906015, "learning_rate": 1.426846541052912e-07, "loss": 0.0007, "step": 283150 }, { "epoch": 1.8628579699084886, "grad_norm": 0.003054664108576732, "learning_rate": 1.425485122486031e-07, "loss": 0.0006, "step": 283160 }, { "epoch": 1.862923758083722, "grad_norm": 0.004587493595872502, "learning_rate": 1.4241243443321784e-07, "loss": 0.0003, "step": 283170 }, { "epoch": 1.8629895462589554, "grad_norm": 0.00039845177801342747, "learning_rate": 1.4227642066092894e-07, "loss": 0.0002, "step": 283180 }, { "epoch": 1.863055334434189, "grad_norm": 0.0005372906778376498, "learning_rate": 1.4214047093353e-07, "loss": 0.0002, "step": 283190 }, { "epoch": 1.8631211226094222, "grad_norm": 0.038628882108615585, "learning_rate": 1.42004585252814e-07, "loss": 0.0003, "step": 283200 }, { "epoch": 1.8631869107846555, "grad_norm": 0.030850536390959554, "learning_rate": 1.4186876362057122e-07, "loss": 0.0003, "step": 283210 }, { "epoch": 1.863252698959889, "grad_norm": 0.05353883513702462, "learning_rate": 1.4173300603859298e-07, "loss": 0.0003, "step": 283220 }, { "epoch": 1.8633184871351225, "grad_norm": 0.09222770134281884, "learning_rate": 1.4159731250866948e-07, "loss": 0.0006, "step": 283230 }, { "epoch": 1.8633842753103558, "grad_norm": 0.02553135744903554, "learning_rate": 1.4146168303258934e-07, "loss": 0.0003, "step": 283240 }, { "epoch": 1.863450063485589, "grad_norm": 0.03863363763723255, "learning_rate": 1.413261176121411e-07, "loss": 0.0007, "step": 283250 }, { "epoch": 1.8635158516608223, "grad_norm": 0.02596961673436185, "learning_rate": 1.4119061624911167e-07, "loss": 0.0006, "step": 283260 }, { "epoch": 1.8635816398360558, "grad_norm": 0.022735477711418855, "learning_rate": 1.4105517894528742e-07, "loss": 0.0003, "step": 283270 }, { "epoch": 1.8636474280112894, "grad_norm": 0.00876125894803817, "learning_rate": 1.4091980570245467e-07, "loss": 0.0004, "step": 283280 }, { "epoch": 1.8637132161865226, "grad_norm": 0.05687682836191488, "learning_rate": 1.40784496522397e-07, "loss": 0.0007, "step": 283290 }, { "epoch": 1.863779004361756, "grad_norm": 0.03709566972023668, "learning_rate": 1.4064925140689966e-07, "loss": 0.0006, "step": 283300 }, { "epoch": 1.8638447925369894, "grad_norm": 0.24507811758998102, "learning_rate": 1.405140703577451e-07, "loss": 0.0027, "step": 283310 }, { "epoch": 1.863910580712223, "grad_norm": 0.015589222054464565, "learning_rate": 1.4037895337671526e-07, "loss": 0.0002, "step": 283320 }, { "epoch": 1.8639763688874562, "grad_norm": 0.018974835780178438, "learning_rate": 1.40243900465592e-07, "loss": 0.0002, "step": 283330 }, { "epoch": 1.8640421570626895, "grad_norm": 0.0015626075922420082, "learning_rate": 1.4010891162615558e-07, "loss": 0.0005, "step": 283340 }, { "epoch": 1.8641079452379228, "grad_norm": 0.02031839213964235, "learning_rate": 1.399739868601857e-07, "loss": 0.0005, "step": 283350 }, { "epoch": 1.8641737334131563, "grad_norm": 0.007325982757881455, "learning_rate": 1.3983912616946148e-07, "loss": 0.0007, "step": 283360 }, { "epoch": 1.8642395215883898, "grad_norm": 0.02518687776160074, "learning_rate": 1.3970432955576152e-07, "loss": 0.0005, "step": 283370 }, { "epoch": 1.864305309763623, "grad_norm": 0.1212111437752794, "learning_rate": 1.3956959702086215e-07, "loss": 0.0008, "step": 283380 }, { "epoch": 1.8643710979388564, "grad_norm": 0.009094755693493049, "learning_rate": 1.3943492856653974e-07, "loss": 0.0003, "step": 283390 }, { "epoch": 1.8644368861140899, "grad_norm": 0.025291390813429702, "learning_rate": 1.3930032419456952e-07, "loss": 0.0003, "step": 283400 }, { "epoch": 1.8645026742893234, "grad_norm": 0.00018923900773993736, "learning_rate": 1.391657839067273e-07, "loss": 0.0008, "step": 283410 }, { "epoch": 1.8645684624645567, "grad_norm": 0.015489332401541945, "learning_rate": 1.3903130770478557e-07, "loss": 0.0011, "step": 283420 }, { "epoch": 1.86463425063979, "grad_norm": 0.014275510990890986, "learning_rate": 1.3889689559051788e-07, "loss": 0.0006, "step": 283430 }, { "epoch": 1.8647000388150234, "grad_norm": 0.011944070481863881, "learning_rate": 1.3876254756569673e-07, "loss": 0.0004, "step": 283440 }, { "epoch": 1.8647658269902567, "grad_norm": 0.002962067029656422, "learning_rate": 1.3862826363209293e-07, "loss": 0.0002, "step": 283450 }, { "epoch": 1.8648316151654902, "grad_norm": 0.0065841556764023295, "learning_rate": 1.3849404379147668e-07, "loss": 0.0003, "step": 283460 }, { "epoch": 1.8648974033407235, "grad_norm": 0.013794788216724746, "learning_rate": 1.3835988804561772e-07, "loss": 0.0004, "step": 283470 }, { "epoch": 1.8649631915159568, "grad_norm": 0.005426496564836946, "learning_rate": 1.3822579639628408e-07, "loss": 0.0002, "step": 283480 }, { "epoch": 1.8650289796911903, "grad_norm": 0.0014510969739896357, "learning_rate": 1.3809176884524544e-07, "loss": 0.0002, "step": 283490 }, { "epoch": 1.8650947678664238, "grad_norm": 0.022348077483543, "learning_rate": 1.379578053942676e-07, "loss": 0.0004, "step": 283500 }, { "epoch": 1.865160556041657, "grad_norm": 0.007354126877305932, "learning_rate": 1.3782390604511698e-07, "loss": 0.0009, "step": 283510 }, { "epoch": 1.8652263442168904, "grad_norm": 0.002310560981116342, "learning_rate": 1.3769007079955876e-07, "loss": 0.0003, "step": 283520 }, { "epoch": 1.8652921323921239, "grad_norm": 0.0037895412947892416, "learning_rate": 1.3755629965935823e-07, "loss": 0.0001, "step": 283530 }, { "epoch": 1.8653579205673574, "grad_norm": 0.018443036861896522, "learning_rate": 1.3742259262627787e-07, "loss": 0.0002, "step": 283540 }, { "epoch": 1.8654237087425907, "grad_norm": 0.0982432396102815, "learning_rate": 1.3728894970208073e-07, "loss": 0.0008, "step": 283550 }, { "epoch": 1.865489496917824, "grad_norm": 0.13334296595859615, "learning_rate": 1.371553708885298e-07, "loss": 0.0006, "step": 283560 }, { "epoch": 1.8655552850930572, "grad_norm": 0.01572889734359033, "learning_rate": 1.3702185618738484e-07, "loss": 0.0002, "step": 283570 }, { "epoch": 1.8656210732682907, "grad_norm": 0.0013329086654044532, "learning_rate": 1.3688840560040717e-07, "loss": 0.0001, "step": 283580 }, { "epoch": 1.8656868614435242, "grad_norm": 0.0010911639219337405, "learning_rate": 1.3675501912935596e-07, "loss": 0.0003, "step": 283590 }, { "epoch": 1.8657526496187575, "grad_norm": 0.026630671376430375, "learning_rate": 1.3662169677598925e-07, "loss": 0.0002, "step": 283600 }, { "epoch": 1.8658184377939908, "grad_norm": 0.0047678884722214526, "learning_rate": 1.364884385420645e-07, "loss": 0.0003, "step": 283610 }, { "epoch": 1.8658842259692243, "grad_norm": 0.07599327249243379, "learning_rate": 1.3635524442934035e-07, "loss": 0.0005, "step": 283620 }, { "epoch": 1.8659500141444578, "grad_norm": 0.018856534047783275, "learning_rate": 1.3622211443957146e-07, "loss": 0.0002, "step": 283630 }, { "epoch": 1.866015802319691, "grad_norm": 0.030462492133203768, "learning_rate": 1.3608904857451366e-07, "loss": 0.0004, "step": 283640 }, { "epoch": 1.8660815904949244, "grad_norm": 0.016196739273681638, "learning_rate": 1.3595604683592057e-07, "loss": 0.0004, "step": 283650 }, { "epoch": 1.8661473786701577, "grad_norm": 0.020465601109137078, "learning_rate": 1.358231092255463e-07, "loss": 0.0004, "step": 283660 }, { "epoch": 1.8662131668453912, "grad_norm": 0.026791940747945137, "learning_rate": 1.356902357451434e-07, "loss": 0.0003, "step": 283670 }, { "epoch": 1.8662789550206247, "grad_norm": 0.034434141753961804, "learning_rate": 1.3555742639646373e-07, "loss": 0.0003, "step": 283680 }, { "epoch": 1.866344743195858, "grad_norm": 0.03171881937422944, "learning_rate": 1.3542468118125819e-07, "loss": 0.0005, "step": 283690 }, { "epoch": 1.8664105313710913, "grad_norm": 0.00035309882213809245, "learning_rate": 1.352920001012764e-07, "loss": 0.0001, "step": 283700 }, { "epoch": 1.8664763195463248, "grad_norm": 0.002817963990076498, "learning_rate": 1.351593831582687e-07, "loss": 0.0006, "step": 283710 }, { "epoch": 1.8665421077215583, "grad_norm": 0.030799662678451613, "learning_rate": 1.3502683035398312e-07, "loss": 0.0006, "step": 283720 }, { "epoch": 1.8666078958967915, "grad_norm": 0.01734838397157279, "learning_rate": 1.3489434169016658e-07, "loss": 0.0005, "step": 283730 }, { "epoch": 1.8666736840720248, "grad_norm": 0.007940521819375957, "learning_rate": 1.34761917168566e-07, "loss": 0.0003, "step": 283740 }, { "epoch": 1.8667394722472581, "grad_norm": 0.0168431157266609, "learning_rate": 1.3462955679092838e-07, "loss": 0.0013, "step": 283750 }, { "epoch": 1.8668052604224916, "grad_norm": 0.009755793488655773, "learning_rate": 1.344972605589978e-07, "loss": 0.0004, "step": 283760 }, { "epoch": 1.8668710485977251, "grad_norm": 0.03622664696308318, "learning_rate": 1.3436502847451905e-07, "loss": 0.0004, "step": 283770 }, { "epoch": 1.8669368367729584, "grad_norm": 0.02771617663241476, "learning_rate": 1.3423286053923513e-07, "loss": 0.0004, "step": 283780 }, { "epoch": 1.8670026249481917, "grad_norm": 0.001587443605611619, "learning_rate": 1.3410075675488853e-07, "loss": 0.0004, "step": 283790 }, { "epoch": 1.8670684131234252, "grad_norm": 0.022645026269608074, "learning_rate": 1.339687171232207e-07, "loss": 0.0007, "step": 283800 }, { "epoch": 1.8671342012986587, "grad_norm": 0.030815711896514338, "learning_rate": 1.3383674164597294e-07, "loss": 0.0004, "step": 283810 }, { "epoch": 1.867199989473892, "grad_norm": 0.012656237391879934, "learning_rate": 1.33704830324885e-07, "loss": 0.0003, "step": 283820 }, { "epoch": 1.8672657776491253, "grad_norm": 0.003125367063536149, "learning_rate": 1.335729831616961e-07, "loss": 0.0002, "step": 283830 }, { "epoch": 1.8673315658243588, "grad_norm": 0.0659784499887236, "learning_rate": 1.3344120015814478e-07, "loss": 0.0003, "step": 283840 }, { "epoch": 1.867397353999592, "grad_norm": 0.02657871830315603, "learning_rate": 1.3330948131596745e-07, "loss": 0.0003, "step": 283850 }, { "epoch": 1.8674631421748256, "grad_norm": 0.01313391166611519, "learning_rate": 1.3317782663690216e-07, "loss": 0.0003, "step": 283860 }, { "epoch": 1.8675289303500588, "grad_norm": 0.03396279610783396, "learning_rate": 1.3304623612268308e-07, "loss": 0.0008, "step": 283870 }, { "epoch": 1.8675947185252921, "grad_norm": 0.10284683492858385, "learning_rate": 1.329147097750466e-07, "loss": 0.0007, "step": 283880 }, { "epoch": 1.8676605067005256, "grad_norm": 0.057679564568430576, "learning_rate": 1.3278324759572637e-07, "loss": 0.0002, "step": 283890 }, { "epoch": 1.8677262948757591, "grad_norm": 0.042391774583254285, "learning_rate": 1.3265184958645538e-07, "loss": 0.0003, "step": 283900 }, { "epoch": 1.8677920830509924, "grad_norm": 0.038704959407459195, "learning_rate": 1.3252051574896619e-07, "loss": 0.0014, "step": 283910 }, { "epoch": 1.8678578712262257, "grad_norm": 0.0012159512056677708, "learning_rate": 1.323892460849896e-07, "loss": 0.0003, "step": 283920 }, { "epoch": 1.8679236594014592, "grad_norm": 0.006678955469490915, "learning_rate": 1.3225804059625702e-07, "loss": 0.0003, "step": 283930 }, { "epoch": 1.8679894475766927, "grad_norm": 0.0009435809443919975, "learning_rate": 1.3212689928449873e-07, "loss": 0.0007, "step": 283940 }, { "epoch": 1.868055235751926, "grad_norm": 0.006676379316952054, "learning_rate": 1.3199582215144224e-07, "loss": 0.0004, "step": 283950 }, { "epoch": 1.8681210239271593, "grad_norm": 0.11865439995085657, "learning_rate": 1.3186480919881729e-07, "loss": 0.0007, "step": 283960 }, { "epoch": 1.8681868121023926, "grad_norm": 0.01225291657573089, "learning_rate": 1.3173386042835023e-07, "loss": 0.0011, "step": 283970 }, { "epoch": 1.868252600277626, "grad_norm": 0.001850585346775055, "learning_rate": 1.316029758417675e-07, "loss": 0.0008, "step": 283980 }, { "epoch": 1.8683183884528596, "grad_norm": 0.01745733077522375, "learning_rate": 1.3147215544079495e-07, "loss": 0.0003, "step": 283990 }, { "epoch": 1.8683841766280929, "grad_norm": 0.009742403938652723, "learning_rate": 1.3134139922715672e-07, "loss": 0.0003, "step": 284000 }, { "epoch": 1.8684499648033261, "grad_norm": 0.057169397483517764, "learning_rate": 1.312107072025781e-07, "loss": 0.0004, "step": 284010 }, { "epoch": 1.8685157529785597, "grad_norm": 0.007354036733900658, "learning_rate": 1.310800793687811e-07, "loss": 0.0004, "step": 284020 }, { "epoch": 1.8685815411537932, "grad_norm": 0.00024873349215486895, "learning_rate": 1.3094951572748815e-07, "loss": 0.0002, "step": 284030 }, { "epoch": 1.8686473293290264, "grad_norm": 0.00020049951061266012, "learning_rate": 1.3081901628042072e-07, "loss": 0.0006, "step": 284040 }, { "epoch": 1.8687131175042597, "grad_norm": 0.017826933484801532, "learning_rate": 1.3068858102929906e-07, "loss": 0.0003, "step": 284050 }, { "epoch": 1.868778905679493, "grad_norm": 0.01670628738112962, "learning_rate": 1.3055820997584346e-07, "loss": 0.0004, "step": 284060 }, { "epoch": 1.8688446938547265, "grad_norm": 0.030978658211795265, "learning_rate": 1.30427903121772e-07, "loss": 0.0005, "step": 284070 }, { "epoch": 1.86891048202996, "grad_norm": 0.02856305002872693, "learning_rate": 1.3029766046880333e-07, "loss": 0.0004, "step": 284080 }, { "epoch": 1.8689762702051933, "grad_norm": 0.0574770164740446, "learning_rate": 1.3016748201865381e-07, "loss": 0.0004, "step": 284090 }, { "epoch": 1.8690420583804266, "grad_norm": 0.00618436499837092, "learning_rate": 1.3003736777304043e-07, "loss": 0.0002, "step": 284100 }, { "epoch": 1.86910784655566, "grad_norm": 0.027631624081233697, "learning_rate": 1.2990731773367848e-07, "loss": 0.0001, "step": 284110 }, { "epoch": 1.8691736347308936, "grad_norm": 0.000216814630757254, "learning_rate": 1.297773319022827e-07, "loss": 0.0006, "step": 284120 }, { "epoch": 1.8692394229061269, "grad_norm": 0.026269686630633594, "learning_rate": 1.296474102805656e-07, "loss": 0.0006, "step": 284130 }, { "epoch": 1.8693052110813602, "grad_norm": 0.040780646117109014, "learning_rate": 1.295175528702419e-07, "loss": 0.0004, "step": 284140 }, { "epoch": 1.8693709992565937, "grad_norm": 0.007956419765240846, "learning_rate": 1.2938775967302254e-07, "loss": 0.0004, "step": 284150 }, { "epoch": 1.869436787431827, "grad_norm": 0.017814772464617948, "learning_rate": 1.2925803069061937e-07, "loss": 0.0004, "step": 284160 }, { "epoch": 1.8695025756070605, "grad_norm": 0.030917637663571158, "learning_rate": 1.291283659247422e-07, "loss": 0.0006, "step": 284170 }, { "epoch": 1.8695683637822937, "grad_norm": 0.0010631860155249843, "learning_rate": 1.2899876537710076e-07, "loss": 0.0005, "step": 284180 }, { "epoch": 1.869634151957527, "grad_norm": 0.033014030598073105, "learning_rate": 1.288692290494037e-07, "loss": 0.0005, "step": 284190 }, { "epoch": 1.8696999401327605, "grad_norm": 0.04592391562120136, "learning_rate": 1.2873975694335906e-07, "loss": 0.0005, "step": 284200 }, { "epoch": 1.869765728307994, "grad_norm": 0.0011147089697745608, "learning_rate": 1.2861034906067327e-07, "loss": 0.0006, "step": 284210 }, { "epoch": 1.8698315164832273, "grad_norm": 0.0038645294413303948, "learning_rate": 1.284810054030533e-07, "loss": 0.0006, "step": 284220 }, { "epoch": 1.8698973046584606, "grad_norm": 0.0026970170334233595, "learning_rate": 1.2835172597220337e-07, "loss": 0.0004, "step": 284230 }, { "epoch": 1.869963092833694, "grad_norm": 0.015098390488433583, "learning_rate": 1.2822251076982872e-07, "loss": 0.0006, "step": 284240 }, { "epoch": 1.8700288810089276, "grad_norm": 0.0026633278719156482, "learning_rate": 1.2809335979763305e-07, "loss": 0.0005, "step": 284250 }, { "epoch": 1.870094669184161, "grad_norm": 0.04844555365046706, "learning_rate": 1.279642730573183e-07, "loss": 0.0007, "step": 284260 }, { "epoch": 1.8701604573593942, "grad_norm": 0.017498133066514904, "learning_rate": 1.27835250550587e-07, "loss": 0.0002, "step": 284270 }, { "epoch": 1.8702262455346275, "grad_norm": 0.035219524929693706, "learning_rate": 1.2770629227914e-07, "loss": 0.0004, "step": 284280 }, { "epoch": 1.870292033709861, "grad_norm": 0.02673110013338002, "learning_rate": 1.2757739824467764e-07, "loss": 0.0004, "step": 284290 }, { "epoch": 1.8703578218850945, "grad_norm": 0.028536238520372237, "learning_rate": 1.2744856844889963e-07, "loss": 0.0007, "step": 284300 }, { "epoch": 1.8704236100603278, "grad_norm": 0.006405911652796839, "learning_rate": 1.2731980289350353e-07, "loss": 0.0007, "step": 284310 }, { "epoch": 1.870489398235561, "grad_norm": 0.013522045227865808, "learning_rate": 1.2719110158018743e-07, "loss": 0.0003, "step": 284320 }, { "epoch": 1.8705551864107945, "grad_norm": 0.027890185404078362, "learning_rate": 1.270624645106483e-07, "loss": 0.0008, "step": 284330 }, { "epoch": 1.870620974586028, "grad_norm": 0.005508386110624576, "learning_rate": 1.2693389168658253e-07, "loss": 0.0019, "step": 284340 }, { "epoch": 1.8706867627612613, "grad_norm": 0.0281866420952255, "learning_rate": 1.2680538310968438e-07, "loss": 0.0006, "step": 284350 }, { "epoch": 1.8707525509364946, "grad_norm": 0.0027137846382038764, "learning_rate": 1.2667693878164855e-07, "loss": 0.0005, "step": 284360 }, { "epoch": 1.870818339111728, "grad_norm": 0.015393521957949977, "learning_rate": 1.2654855870416815e-07, "loss": 0.0006, "step": 284370 }, { "epoch": 1.8708841272869614, "grad_norm": 0.007668850069855407, "learning_rate": 1.2642024287893628e-07, "loss": 0.0002, "step": 284380 }, { "epoch": 1.870949915462195, "grad_norm": 0.0456431874254952, "learning_rate": 1.262919913076438e-07, "loss": 0.0005, "step": 284390 }, { "epoch": 1.8710157036374282, "grad_norm": 0.0005059629077814537, "learning_rate": 1.261638039919827e-07, "loss": 0.0014, "step": 284400 }, { "epoch": 1.8710814918126615, "grad_norm": 0.013835498666201651, "learning_rate": 1.2603568093364215e-07, "loss": 0.0002, "step": 284410 }, { "epoch": 1.871147279987895, "grad_norm": 0.0029590655239194277, "learning_rate": 1.259076221343125e-07, "loss": 0.0002, "step": 284420 }, { "epoch": 1.8712130681631285, "grad_norm": 0.022656415375834273, "learning_rate": 1.2577962759568074e-07, "loss": 0.0005, "step": 284430 }, { "epoch": 1.8712788563383618, "grad_norm": 0.03214232983244962, "learning_rate": 1.2565169731943495e-07, "loss": 0.0004, "step": 284440 }, { "epoch": 1.871344644513595, "grad_norm": 0.04168370794149924, "learning_rate": 1.2552383130726208e-07, "loss": 0.0006, "step": 284450 }, { "epoch": 1.8714104326888286, "grad_norm": 0.028270405611474567, "learning_rate": 1.253960295608475e-07, "loss": 0.0004, "step": 284460 }, { "epoch": 1.8714762208640618, "grad_norm": 0.0028158247683751987, "learning_rate": 1.2526829208187652e-07, "loss": 0.0005, "step": 284470 }, { "epoch": 1.8715420090392954, "grad_norm": 0.055198188100016504, "learning_rate": 1.2514061887203277e-07, "loss": 0.0007, "step": 284480 }, { "epoch": 1.8716077972145286, "grad_norm": 0.04837518212643327, "learning_rate": 1.2501300993299937e-07, "loss": 0.0008, "step": 284490 }, { "epoch": 1.871673585389762, "grad_norm": 0.011652350155586995, "learning_rate": 1.2488546526645996e-07, "loss": 0.0003, "step": 284500 }, { "epoch": 1.8717393735649954, "grad_norm": 0.036257780459881546, "learning_rate": 1.2475798487409486e-07, "loss": 0.0009, "step": 284510 }, { "epoch": 1.871805161740229, "grad_norm": 0.02383750933108503, "learning_rate": 1.2463056875758494e-07, "loss": 0.0002, "step": 284520 }, { "epoch": 1.8718709499154622, "grad_norm": 0.021318718267027387, "learning_rate": 1.2450321691861057e-07, "loss": 0.0003, "step": 284530 }, { "epoch": 1.8719367380906955, "grad_norm": 0.029678296780230998, "learning_rate": 1.2437592935885033e-07, "loss": 0.0008, "step": 284540 }, { "epoch": 1.872002526265929, "grad_norm": 0.049462684297734205, "learning_rate": 1.2424870607998297e-07, "loss": 0.0003, "step": 284550 }, { "epoch": 1.8720683144411625, "grad_norm": 0.04896924827870852, "learning_rate": 1.2412154708368485e-07, "loss": 0.0003, "step": 284560 }, { "epoch": 1.8721341026163958, "grad_norm": 0.0016195955155568719, "learning_rate": 1.2399445237163355e-07, "loss": 0.0006, "step": 284570 }, { "epoch": 1.872199890791629, "grad_norm": 0.016283177229207364, "learning_rate": 1.238674219455044e-07, "loss": 0.0002, "step": 284580 }, { "epoch": 1.8722656789668624, "grad_norm": 0.004322481311216118, "learning_rate": 1.2374045580697158e-07, "loss": 0.0004, "step": 284590 }, { "epoch": 1.8723314671420959, "grad_norm": 0.02315431151207731, "learning_rate": 1.2361355395770935e-07, "loss": 0.001, "step": 284600 }, { "epoch": 1.8723972553173294, "grad_norm": 0.049546567494247296, "learning_rate": 1.2348671639939136e-07, "loss": 0.0007, "step": 284610 }, { "epoch": 1.8724630434925627, "grad_norm": 0.0006905323996442974, "learning_rate": 1.233599431336885e-07, "loss": 0.0006, "step": 284620 }, { "epoch": 1.872528831667796, "grad_norm": 0.048500670793394485, "learning_rate": 1.2323323416227328e-07, "loss": 0.0005, "step": 284630 }, { "epoch": 1.8725946198430294, "grad_norm": 0.008949218895771993, "learning_rate": 1.2310658948681663e-07, "loss": 0.0001, "step": 284640 }, { "epoch": 1.872660408018263, "grad_norm": 0.013261435555442444, "learning_rate": 1.229800091089872e-07, "loss": 0.0003, "step": 284650 }, { "epoch": 1.8727261961934962, "grad_norm": 0.039585499287257866, "learning_rate": 1.228534930304548e-07, "loss": 0.0004, "step": 284660 }, { "epoch": 1.8727919843687295, "grad_norm": 0.0026735029121054887, "learning_rate": 1.2272704125288637e-07, "loss": 0.0011, "step": 284670 }, { "epoch": 1.8728577725439628, "grad_norm": 0.019922404206083096, "learning_rate": 1.226006537779495e-07, "loss": 0.0002, "step": 284680 }, { "epoch": 1.8729235607191963, "grad_norm": 0.044617723804451455, "learning_rate": 1.2247433060731118e-07, "loss": 0.0004, "step": 284690 }, { "epoch": 1.8729893488944298, "grad_norm": 0.04965630845437533, "learning_rate": 1.223480717426362e-07, "loss": 0.0003, "step": 284700 }, { "epoch": 1.873055137069663, "grad_norm": 0.005042838146272446, "learning_rate": 1.2222187718558875e-07, "loss": 0.001, "step": 284710 }, { "epoch": 1.8731209252448964, "grad_norm": 0.011606739032634662, "learning_rate": 1.2209574693783365e-07, "loss": 0.0002, "step": 284720 }, { "epoch": 1.8731867134201299, "grad_norm": 0.01838703419583418, "learning_rate": 1.2196968100103347e-07, "loss": 0.0005, "step": 284730 }, { "epoch": 1.8732525015953634, "grad_norm": 0.011586007262630179, "learning_rate": 1.218436793768496e-07, "loss": 0.0008, "step": 284740 }, { "epoch": 1.8733182897705967, "grad_norm": 0.07208505645915589, "learning_rate": 1.2171774206694352e-07, "loss": 0.0003, "step": 284750 }, { "epoch": 1.87338407794583, "grad_norm": 0.012194251655005102, "learning_rate": 1.2159186907297615e-07, "loss": 0.0002, "step": 284760 }, { "epoch": 1.8734498661210632, "grad_norm": 0.005177914314690221, "learning_rate": 1.2146606039660668e-07, "loss": 0.0005, "step": 284770 }, { "epoch": 1.8735156542962967, "grad_norm": 0.0009629534855679645, "learning_rate": 1.2134031603949438e-07, "loss": 0.0006, "step": 284780 }, { "epoch": 1.8735814424715302, "grad_norm": 0.09419384152569715, "learning_rate": 1.2121463600329564e-07, "loss": 0.0008, "step": 284790 }, { "epoch": 1.8736472306467635, "grad_norm": 0.006797455147430134, "learning_rate": 1.2108902028966917e-07, "loss": 0.0002, "step": 284800 }, { "epoch": 1.8737130188219968, "grad_norm": 0.012379399174439379, "learning_rate": 1.209634689002698e-07, "loss": 0.0004, "step": 284810 }, { "epoch": 1.8737788069972303, "grad_norm": 0.05138915774230229, "learning_rate": 1.2083798183675333e-07, "loss": 0.0003, "step": 284820 }, { "epoch": 1.8738445951724638, "grad_norm": 0.0006736844543254645, "learning_rate": 1.2071255910077407e-07, "loss": 0.0004, "step": 284830 }, { "epoch": 1.873910383347697, "grad_norm": 0.00036807752621802745, "learning_rate": 1.2058720069398622e-07, "loss": 0.0005, "step": 284840 }, { "epoch": 1.8739761715229304, "grad_norm": 0.016095245056284888, "learning_rate": 1.2046190661804126e-07, "loss": 0.0005, "step": 284850 }, { "epoch": 1.874041959698164, "grad_norm": 0.036109142262760255, "learning_rate": 1.2033667687459227e-07, "loss": 0.0003, "step": 284860 }, { "epoch": 1.8741077478733974, "grad_norm": 0.06962067745593789, "learning_rate": 1.2021151146528964e-07, "loss": 0.0003, "step": 284870 }, { "epoch": 1.8741735360486307, "grad_norm": 0.01030365565545953, "learning_rate": 1.200864103917837e-07, "loss": 0.0006, "step": 284880 }, { "epoch": 1.874239324223864, "grad_norm": 0.0013612311860163977, "learning_rate": 1.199613736557237e-07, "loss": 0.0003, "step": 284890 }, { "epoch": 1.8743051123990973, "grad_norm": 0.015091236494202825, "learning_rate": 1.1983640125875884e-07, "loss": 0.0003, "step": 284900 }, { "epoch": 1.8743709005743308, "grad_norm": 0.005129856895729227, "learning_rate": 1.1971149320253616e-07, "loss": 0.0005, "step": 284910 }, { "epoch": 1.8744366887495643, "grad_norm": 0.01048762643425124, "learning_rate": 1.1958664948870214e-07, "loss": 0.0009, "step": 284920 }, { "epoch": 1.8745024769247975, "grad_norm": 0.243910504711423, "learning_rate": 1.1946187011890375e-07, "loss": 0.0016, "step": 284930 }, { "epoch": 1.8745682651000308, "grad_norm": 0.012087833366164636, "learning_rate": 1.1933715509478527e-07, "loss": 0.0006, "step": 284940 }, { "epoch": 1.8746340532752643, "grad_norm": 0.016493845304947685, "learning_rate": 1.1921250441799093e-07, "loss": 0.0003, "step": 284950 }, { "epoch": 1.8746998414504978, "grad_norm": 0.03232549569145646, "learning_rate": 1.190879180901644e-07, "loss": 0.0002, "step": 284960 }, { "epoch": 1.8747656296257311, "grad_norm": 0.030486596806718787, "learning_rate": 1.1896339611294882e-07, "loss": 0.0002, "step": 284970 }, { "epoch": 1.8748314178009644, "grad_norm": 0.06355081737251737, "learning_rate": 1.1883893848798456e-07, "loss": 0.0012, "step": 284980 }, { "epoch": 1.8748972059761977, "grad_norm": 0.034155661236545315, "learning_rate": 1.1871454521691362e-07, "loss": 0.0003, "step": 284990 }, { "epoch": 1.8749629941514312, "grad_norm": 0.040259072144215834, "learning_rate": 1.185902163013758e-07, "loss": 0.0003, "step": 285000 }, { "epoch": 1.8750287823266647, "grad_norm": 0.0011251050553357028, "learning_rate": 1.1846595174300923e-07, "loss": 0.0003, "step": 285010 }, { "epoch": 1.875094570501898, "grad_norm": 0.028670648596458202, "learning_rate": 1.1834175154345429e-07, "loss": 0.0004, "step": 285020 }, { "epoch": 1.8751603586771313, "grad_norm": 0.004359445913172144, "learning_rate": 1.1821761570434632e-07, "loss": 0.0004, "step": 285030 }, { "epoch": 1.8752261468523648, "grad_norm": 0.02143691741198654, "learning_rate": 1.1809354422732345e-07, "loss": 0.0007, "step": 285040 }, { "epoch": 1.8752919350275983, "grad_norm": 0.005632001973085906, "learning_rate": 1.1796953711402104e-07, "loss": 0.0003, "step": 285050 }, { "epoch": 1.8753577232028316, "grad_norm": 0.007386083835967113, "learning_rate": 1.1784559436607391e-07, "loss": 0.0004, "step": 285060 }, { "epoch": 1.8754235113780648, "grad_norm": 0.048608456662605416, "learning_rate": 1.1772171598511573e-07, "loss": 0.0006, "step": 285070 }, { "epoch": 1.8754892995532981, "grad_norm": 0.01707440692552067, "learning_rate": 1.175979019727802e-07, "loss": 0.0003, "step": 285080 }, { "epoch": 1.8755550877285316, "grad_norm": 0.006188432635525589, "learning_rate": 1.1747415233069992e-07, "loss": 0.0002, "step": 285090 }, { "epoch": 1.8756208759037651, "grad_norm": 0.02050468485937023, "learning_rate": 1.1735046706050579e-07, "loss": 0.0002, "step": 285100 }, { "epoch": 1.8756866640789984, "grad_norm": 0.01015813333256755, "learning_rate": 1.1722684616382874e-07, "loss": 0.0002, "step": 285110 }, { "epoch": 1.8757524522542317, "grad_norm": 0.10499839751817194, "learning_rate": 1.1710328964229912e-07, "loss": 0.0005, "step": 285120 }, { "epoch": 1.8758182404294652, "grad_norm": 0.026543967144041995, "learning_rate": 1.1697979749754507e-07, "loss": 0.0006, "step": 285130 }, { "epoch": 1.8758840286046987, "grad_norm": 0.030052579426945728, "learning_rate": 1.1685636973119475e-07, "loss": 0.0002, "step": 285140 }, { "epoch": 1.875949816779932, "grad_norm": 0.000617934914975822, "learning_rate": 1.1673300634487684e-07, "loss": 0.0003, "step": 285150 }, { "epoch": 1.8760156049551653, "grad_norm": 0.0055202146737546405, "learning_rate": 1.1660970734021615e-07, "loss": 0.0005, "step": 285160 }, { "epoch": 1.8760813931303988, "grad_norm": 0.027315244450756653, "learning_rate": 1.1648647271883918e-07, "loss": 0.0012, "step": 285170 }, { "epoch": 1.876147181305632, "grad_norm": 0.00194856592652541, "learning_rate": 1.1636330248237015e-07, "loss": 0.0002, "step": 285180 }, { "epoch": 1.8762129694808656, "grad_norm": 0.010138008836985724, "learning_rate": 1.1624019663243336e-07, "loss": 0.0003, "step": 285190 }, { "epoch": 1.8762787576560989, "grad_norm": 0.013832267424822992, "learning_rate": 1.1611715517065191e-07, "loss": 0.0002, "step": 285200 }, { "epoch": 1.8763445458313321, "grad_norm": 0.02269615612992438, "learning_rate": 1.1599417809864732e-07, "loss": 0.0002, "step": 285210 }, { "epoch": 1.8764103340065657, "grad_norm": 0.08221818175962284, "learning_rate": 1.1587126541804161e-07, "loss": 0.0004, "step": 285220 }, { "epoch": 1.8764761221817992, "grad_norm": 0.023255050287169936, "learning_rate": 1.1574841713045515e-07, "loss": 0.0002, "step": 285230 }, { "epoch": 1.8765419103570324, "grad_norm": 0.02770632531684006, "learning_rate": 1.1562563323750719e-07, "loss": 0.0005, "step": 285240 }, { "epoch": 1.8766076985322657, "grad_norm": 0.0048902699916439545, "learning_rate": 1.1550291374081701e-07, "loss": 0.0002, "step": 285250 }, { "epoch": 1.8766734867074992, "grad_norm": 0.08472129407358917, "learning_rate": 1.1538025864200219e-07, "loss": 0.0004, "step": 285260 }, { "epoch": 1.8767392748827327, "grad_norm": 0.02397592439183162, "learning_rate": 1.1525766794267978e-07, "loss": 0.0002, "step": 285270 }, { "epoch": 1.876805063057966, "grad_norm": 0.0016545565512305366, "learning_rate": 1.1513514164446682e-07, "loss": 0.0004, "step": 285280 }, { "epoch": 1.8768708512331993, "grad_norm": 0.0007720774230419966, "learning_rate": 1.1501267974897812e-07, "loss": 0.0003, "step": 285290 }, { "epoch": 1.8769366394084326, "grad_norm": 0.06560288337090503, "learning_rate": 1.148902822578285e-07, "loss": 0.0003, "step": 285300 }, { "epoch": 1.877002427583666, "grad_norm": 0.03506436171413662, "learning_rate": 1.1476794917263112e-07, "loss": 0.0007, "step": 285310 }, { "epoch": 1.8770682157588996, "grad_norm": 0.03110196435258616, "learning_rate": 1.1464568049499913e-07, "loss": 0.0002, "step": 285320 }, { "epoch": 1.8771340039341329, "grad_norm": 0.03515438193478664, "learning_rate": 1.1452347622654458e-07, "loss": 0.0005, "step": 285330 }, { "epoch": 1.8771997921093662, "grad_norm": 0.03225813588770376, "learning_rate": 1.1440133636887895e-07, "loss": 0.0005, "step": 285340 }, { "epoch": 1.8772655802845997, "grad_norm": 0.015679900632508216, "learning_rate": 1.1427926092361207e-07, "loss": 0.0004, "step": 285350 }, { "epoch": 1.8773313684598332, "grad_norm": 0.006054813353485805, "learning_rate": 1.1415724989235322e-07, "loss": 0.0005, "step": 285360 }, { "epoch": 1.8773971566350665, "grad_norm": 0.04549064178202521, "learning_rate": 1.1403530327671164e-07, "loss": 0.0006, "step": 285370 }, { "epoch": 1.8774629448102997, "grad_norm": 0.025037568092855365, "learning_rate": 1.1391342107829495e-07, "loss": 0.0008, "step": 285380 }, { "epoch": 1.877528732985533, "grad_norm": 0.00887695825143012, "learning_rate": 1.1379160329870964e-07, "loss": 0.0004, "step": 285390 }, { "epoch": 1.8775945211607665, "grad_norm": 0.01536745862696794, "learning_rate": 1.1366984993956165e-07, "loss": 0.0006, "step": 285400 }, { "epoch": 1.877660309336, "grad_norm": 0.01528916692310975, "learning_rate": 1.1354816100245692e-07, "loss": 0.0003, "step": 285410 }, { "epoch": 1.8777260975112333, "grad_norm": 0.14646903821335108, "learning_rate": 1.134265364889997e-07, "loss": 0.0008, "step": 285420 }, { "epoch": 1.8777918856864666, "grad_norm": 0.008697731925332698, "learning_rate": 1.1330497640079319e-07, "loss": 0.0006, "step": 285430 }, { "epoch": 1.8778576738617, "grad_norm": 0.021648992989472864, "learning_rate": 1.1318348073943996e-07, "loss": 0.0007, "step": 285440 }, { "epoch": 1.8779234620369336, "grad_norm": 0.046895815071254544, "learning_rate": 1.1306204950654209e-07, "loss": 0.0003, "step": 285450 }, { "epoch": 1.877989250212167, "grad_norm": 0.049502118064858165, "learning_rate": 1.1294068270370051e-07, "loss": 0.0003, "step": 285460 }, { "epoch": 1.8780550383874002, "grad_norm": 0.051314399968707125, "learning_rate": 1.1281938033251561e-07, "loss": 0.0007, "step": 285470 }, { "epoch": 1.8781208265626337, "grad_norm": 0.01436063189982948, "learning_rate": 1.126981423945861e-07, "loss": 0.0005, "step": 285480 }, { "epoch": 1.878186614737867, "grad_norm": 0.026100492072238402, "learning_rate": 1.1257696889151015e-07, "loss": 0.0004, "step": 285490 }, { "epoch": 1.8782524029131005, "grad_norm": 0.004083498220511762, "learning_rate": 1.1245585982488649e-07, "loss": 0.0005, "step": 285500 }, { "epoch": 1.8783181910883338, "grad_norm": 0.012030905367147243, "learning_rate": 1.1233481519631051e-07, "loss": 0.0005, "step": 285510 }, { "epoch": 1.878383979263567, "grad_norm": 0.07571559373450246, "learning_rate": 1.1221383500737926e-07, "loss": 0.0003, "step": 285520 }, { "epoch": 1.8784497674388005, "grad_norm": 0.036340915525893705, "learning_rate": 1.1209291925968647e-07, "loss": 0.0003, "step": 285530 }, { "epoch": 1.878515555614034, "grad_norm": 0.025347192312135274, "learning_rate": 1.1197206795482806e-07, "loss": 0.0004, "step": 285540 }, { "epoch": 1.8785813437892673, "grad_norm": 0.05970857342695795, "learning_rate": 1.1185128109439558e-07, "loss": 0.0006, "step": 285550 }, { "epoch": 1.8786471319645006, "grad_norm": 0.030242807996939007, "learning_rate": 1.1173055867998273e-07, "loss": 0.0003, "step": 285560 }, { "epoch": 1.8787129201397341, "grad_norm": 0.005826046044079085, "learning_rate": 1.11609900713181e-07, "loss": 0.0003, "step": 285570 }, { "epoch": 1.8787787083149676, "grad_norm": 0.011807746654482825, "learning_rate": 1.1148930719558027e-07, "loss": 0.0004, "step": 285580 }, { "epoch": 1.878844496490201, "grad_norm": 0.0356439180041447, "learning_rate": 1.1136877812877145e-07, "loss": 0.0006, "step": 285590 }, { "epoch": 1.8789102846654342, "grad_norm": 0.02479200675585057, "learning_rate": 1.1124831351434273e-07, "loss": 0.0003, "step": 285600 }, { "epoch": 1.8789760728406675, "grad_norm": 0.040316098063156794, "learning_rate": 1.1112791335388284e-07, "loss": 0.0001, "step": 285610 }, { "epoch": 1.879041861015901, "grad_norm": 0.02493729632755086, "learning_rate": 1.110075776489794e-07, "loss": 0.0008, "step": 285620 }, { "epoch": 1.8791076491911345, "grad_norm": 0.003673633460259453, "learning_rate": 1.1088730640121836e-07, "loss": 0.0003, "step": 285630 }, { "epoch": 1.8791734373663678, "grad_norm": 0.0010197866006881981, "learning_rate": 1.1076709961218624e-07, "loss": 0.0003, "step": 285640 }, { "epoch": 1.879239225541601, "grad_norm": 0.005429928681298576, "learning_rate": 1.1064695728346675e-07, "loss": 0.0002, "step": 285650 }, { "epoch": 1.8793050137168346, "grad_norm": 0.05690394873549672, "learning_rate": 1.105268794166442e-07, "loss": 0.0005, "step": 285660 }, { "epoch": 1.879370801892068, "grad_norm": 0.05510268490067583, "learning_rate": 1.104068660133023e-07, "loss": 0.0003, "step": 285670 }, { "epoch": 1.8794365900673013, "grad_norm": 0.027444263028759897, "learning_rate": 1.102869170750226e-07, "loss": 0.0005, "step": 285680 }, { "epoch": 1.8795023782425346, "grad_norm": 0.015578833342539261, "learning_rate": 1.1016703260338713e-07, "loss": 0.0003, "step": 285690 }, { "epoch": 1.879568166417768, "grad_norm": 0.04777722120709944, "learning_rate": 1.100472125999763e-07, "loss": 0.0002, "step": 285700 }, { "epoch": 1.8796339545930014, "grad_norm": 0.0009780799962589763, "learning_rate": 1.0992745706636997e-07, "loss": 0.0003, "step": 285710 }, { "epoch": 1.879699742768235, "grad_norm": 0.037184986955912575, "learning_rate": 1.0980776600414633e-07, "loss": 0.0003, "step": 285720 }, { "epoch": 1.8797655309434682, "grad_norm": 0.00884478867818588, "learning_rate": 1.096881394148841e-07, "loss": 0.0004, "step": 285730 }, { "epoch": 1.8798313191187015, "grad_norm": 0.03588700177043566, "learning_rate": 1.0956857730015979e-07, "loss": 0.0005, "step": 285740 }, { "epoch": 1.879897107293935, "grad_norm": 0.010513529906629798, "learning_rate": 1.0944907966155049e-07, "loss": 0.0002, "step": 285750 }, { "epoch": 1.8799628954691685, "grad_norm": 0.0019510044000198357, "learning_rate": 1.0932964650063105e-07, "loss": 0.0005, "step": 285760 }, { "epoch": 1.8800286836444018, "grad_norm": 0.03984399127382851, "learning_rate": 1.0921027781897686e-07, "loss": 0.0004, "step": 285770 }, { "epoch": 1.880094471819635, "grad_norm": 0.08077820214220345, "learning_rate": 1.0909097361816113e-07, "loss": 0.0006, "step": 285780 }, { "epoch": 1.8801602599948686, "grad_norm": 0.00029099343500087806, "learning_rate": 1.089717338997559e-07, "loss": 0.0003, "step": 285790 }, { "epoch": 1.8802260481701019, "grad_norm": 0.02597880735864656, "learning_rate": 1.0885255866533496e-07, "loss": 0.001, "step": 285800 }, { "epoch": 1.8802918363453354, "grad_norm": 0.02948373576165116, "learning_rate": 1.0873344791646868e-07, "loss": 0.0003, "step": 285810 }, { "epoch": 1.8803576245205686, "grad_norm": 0.04804184269297348, "learning_rate": 1.0861440165472748e-07, "loss": 0.0004, "step": 285820 }, { "epoch": 1.880423412695802, "grad_norm": 0.005676073592060638, "learning_rate": 1.0849541988168122e-07, "loss": 0.0001, "step": 285830 }, { "epoch": 1.8804892008710354, "grad_norm": 0.04323149801042707, "learning_rate": 1.083765025988981e-07, "loss": 0.0005, "step": 285840 }, { "epoch": 1.880554989046269, "grad_norm": 0.02497444776700986, "learning_rate": 1.0825764980794572e-07, "loss": 0.0004, "step": 285850 }, { "epoch": 1.8806207772215022, "grad_norm": 0.04042894515471041, "learning_rate": 1.0813886151039177e-07, "loss": 0.0006, "step": 285860 }, { "epoch": 1.8806865653967355, "grad_norm": 0.012494050059235462, "learning_rate": 1.0802013770780218e-07, "loss": 0.0005, "step": 285870 }, { "epoch": 1.880752353571969, "grad_norm": 0.027094015836709725, "learning_rate": 1.079014784017418e-07, "loss": 0.0005, "step": 285880 }, { "epoch": 1.8808181417472025, "grad_norm": 0.02590862442939237, "learning_rate": 1.0778288359377553e-07, "loss": 0.0005, "step": 285890 }, { "epoch": 1.8808839299224358, "grad_norm": 0.017947397882177793, "learning_rate": 1.076643532854671e-07, "loss": 0.0005, "step": 285900 }, { "epoch": 1.880949718097669, "grad_norm": 0.016928848012224565, "learning_rate": 1.0754588747837858e-07, "loss": 0.0006, "step": 285910 }, { "epoch": 1.8810155062729024, "grad_norm": 0.015421487947467304, "learning_rate": 1.0742748617407151e-07, "loss": 0.0004, "step": 285920 }, { "epoch": 1.8810812944481359, "grad_norm": 0.001623068020060646, "learning_rate": 1.0730914937410797e-07, "loss": 0.0004, "step": 285930 }, { "epoch": 1.8811470826233694, "grad_norm": 0.019146211689129992, "learning_rate": 1.0719087708004783e-07, "loss": 0.0005, "step": 285940 }, { "epoch": 1.8812128707986027, "grad_norm": 0.00058516682115732, "learning_rate": 1.0707266929344984e-07, "loss": 0.0002, "step": 285950 }, { "epoch": 1.881278658973836, "grad_norm": 0.11647940814165864, "learning_rate": 1.069545260158733e-07, "loss": 0.0004, "step": 285960 }, { "epoch": 1.8813444471490695, "grad_norm": 0.01164481288392814, "learning_rate": 1.0683644724887588e-07, "loss": 0.0003, "step": 285970 }, { "epoch": 1.881410235324303, "grad_norm": 0.02259384018706161, "learning_rate": 1.0671843299401297e-07, "loss": 0.0004, "step": 285980 }, { "epoch": 1.8814760234995362, "grad_norm": 0.014520855933244516, "learning_rate": 1.0660048325284223e-07, "loss": 0.0004, "step": 285990 }, { "epoch": 1.8815418116747695, "grad_norm": 0.0006369370852255821, "learning_rate": 1.0648259802691741e-07, "loss": 0.0007, "step": 286000 }, { "epoch": 1.8816075998500028, "grad_norm": 0.02435457196929065, "learning_rate": 1.0636477731779338e-07, "loss": 0.0002, "step": 286010 }, { "epoch": 1.8816733880252363, "grad_norm": 0.0051196428121285725, "learning_rate": 1.0624702112702335e-07, "loss": 0.0011, "step": 286020 }, { "epoch": 1.8817391762004698, "grad_norm": 0.030788833230468833, "learning_rate": 1.0612932945615939e-07, "loss": 0.0005, "step": 286030 }, { "epoch": 1.881804964375703, "grad_norm": 0.00021596949576750896, "learning_rate": 1.0601170230675417e-07, "loss": 0.0001, "step": 286040 }, { "epoch": 1.8818707525509364, "grad_norm": 0.024036983860578023, "learning_rate": 1.05894139680357e-07, "loss": 0.0007, "step": 286050 }, { "epoch": 1.88193654072617, "grad_norm": 0.006187102850933928, "learning_rate": 1.0577664157851997e-07, "loss": 0.0002, "step": 286060 }, { "epoch": 1.8820023289014034, "grad_norm": 0.009292647752561362, "learning_rate": 1.0565920800279017e-07, "loss": 0.0005, "step": 286070 }, { "epoch": 1.8820681170766367, "grad_norm": 0.01570291851956067, "learning_rate": 1.0554183895471692e-07, "loss": 0.0001, "step": 286080 }, { "epoch": 1.88213390525187, "grad_norm": 0.030600365565942735, "learning_rate": 1.0542453443584788e-07, "loss": 0.0003, "step": 286090 }, { "epoch": 1.8821996934271032, "grad_norm": 0.052281515521139604, "learning_rate": 1.0530729444772847e-07, "loss": 0.0002, "step": 286100 }, { "epoch": 1.8822654816023368, "grad_norm": 0.011330198419101847, "learning_rate": 1.0519011899190524e-07, "loss": 0.0002, "step": 286110 }, { "epoch": 1.8823312697775703, "grad_norm": 0.04665844404154872, "learning_rate": 1.0507300806992304e-07, "loss": 0.0009, "step": 286120 }, { "epoch": 1.8823970579528035, "grad_norm": 0.006510719065361615, "learning_rate": 1.0495596168332567e-07, "loss": 0.0035, "step": 286130 }, { "epoch": 1.8824628461280368, "grad_norm": 0.019529647205787558, "learning_rate": 1.0483897983365632e-07, "loss": 0.0003, "step": 286140 }, { "epoch": 1.8825286343032703, "grad_norm": 0.0003118687110282809, "learning_rate": 1.0472206252245708e-07, "loss": 0.0004, "step": 286150 }, { "epoch": 1.8825944224785038, "grad_norm": 0.07484879491154459, "learning_rate": 1.0460520975127008e-07, "loss": 0.0008, "step": 286160 }, { "epoch": 1.8826602106537371, "grad_norm": 0.020228205802235975, "learning_rate": 1.0448842152163463e-07, "loss": 0.0005, "step": 286170 }, { "epoch": 1.8827259988289704, "grad_norm": 0.09552104651338032, "learning_rate": 1.0437169783509172e-07, "loss": 0.0007, "step": 286180 }, { "epoch": 1.882791787004204, "grad_norm": 0.005899691895766167, "learning_rate": 1.0425503869317955e-07, "loss": 0.0003, "step": 286190 }, { "epoch": 1.8828575751794372, "grad_norm": 0.006847599501130248, "learning_rate": 1.041384440974369e-07, "loss": 0.0004, "step": 286200 }, { "epoch": 1.8829233633546707, "grad_norm": 0.00668860599787023, "learning_rate": 1.0402191404940032e-07, "loss": 0.0006, "step": 286210 }, { "epoch": 1.882989151529904, "grad_norm": 0.0018623855920171823, "learning_rate": 1.0390544855060636e-07, "loss": 0.0003, "step": 286220 }, { "epoch": 1.8830549397051373, "grad_norm": 0.0404558963454659, "learning_rate": 1.0378904760259101e-07, "loss": 0.0006, "step": 286230 }, { "epoch": 1.8831207278803708, "grad_norm": 0.021877848684350578, "learning_rate": 1.0367271120688804e-07, "loss": 0.0003, "step": 286240 }, { "epoch": 1.8831865160556043, "grad_norm": 0.014122203144358962, "learning_rate": 1.0355643936503124e-07, "loss": 0.0003, "step": 286250 }, { "epoch": 1.8832523042308376, "grad_norm": 0.02978179401394651, "learning_rate": 1.0344023207855436e-07, "loss": 0.0006, "step": 286260 }, { "epoch": 1.8833180924060708, "grad_norm": 0.06666777045382227, "learning_rate": 1.0332408934898897e-07, "loss": 0.0006, "step": 286270 }, { "epoch": 1.8833838805813043, "grad_norm": 0.0007330193783676522, "learning_rate": 1.032080111778666e-07, "loss": 0.0003, "step": 286280 }, { "epoch": 1.8834496687565379, "grad_norm": 0.00043831594736637306, "learning_rate": 1.0309199756671717e-07, "loss": 0.0004, "step": 286290 }, { "epoch": 1.8835154569317711, "grad_norm": 0.0008113544163735236, "learning_rate": 1.0297604851707055e-07, "loss": 0.0002, "step": 286300 }, { "epoch": 1.8835812451070044, "grad_norm": 0.006761308140457319, "learning_rate": 1.0286016403045551e-07, "loss": 0.0008, "step": 286310 }, { "epoch": 1.8836470332822377, "grad_norm": 0.09781529479312737, "learning_rate": 1.0274434410839973e-07, "loss": 0.0005, "step": 286320 }, { "epoch": 1.8837128214574712, "grad_norm": 0.02506515163164759, "learning_rate": 1.0262858875243031e-07, "loss": 0.0006, "step": 286330 }, { "epoch": 1.8837786096327047, "grad_norm": 0.005033353707322772, "learning_rate": 1.0251289796407271e-07, "loss": 0.0003, "step": 286340 }, { "epoch": 1.883844397807938, "grad_norm": 0.0014273488016701503, "learning_rate": 1.0239727174485347e-07, "loss": 0.0004, "step": 286350 }, { "epoch": 1.8839101859831713, "grad_norm": 0.03699157010204598, "learning_rate": 1.0228171009629639e-07, "loss": 0.0006, "step": 286360 }, { "epoch": 1.8839759741584048, "grad_norm": 0.08247934743677077, "learning_rate": 1.0216621301992468e-07, "loss": 0.0004, "step": 286370 }, { "epoch": 1.8840417623336383, "grad_norm": 0.049385872714345004, "learning_rate": 1.0205078051726158e-07, "loss": 0.0011, "step": 286380 }, { "epoch": 1.8841075505088716, "grad_norm": 0.003003640989823609, "learning_rate": 1.0193541258982864e-07, "loss": 0.0004, "step": 286390 }, { "epoch": 1.8841733386841049, "grad_norm": 0.0004597950784935424, "learning_rate": 1.0182010923914798e-07, "loss": 0.0004, "step": 286400 }, { "epoch": 1.8842391268593381, "grad_norm": 9.668827211910151e-05, "learning_rate": 1.0170487046673783e-07, "loss": 0.0008, "step": 286410 }, { "epoch": 1.8843049150345716, "grad_norm": 0.0007278820933827505, "learning_rate": 1.0158969627411808e-07, "loss": 0.0006, "step": 286420 }, { "epoch": 1.8843707032098052, "grad_norm": 0.016039149855755084, "learning_rate": 1.0147458666280862e-07, "loss": 0.0003, "step": 286430 }, { "epoch": 1.8844364913850384, "grad_norm": 0.02244020723316987, "learning_rate": 1.0135954163432548e-07, "loss": 0.0007, "step": 286440 }, { "epoch": 1.8845022795602717, "grad_norm": 0.11591946790709313, "learning_rate": 1.0124456119018633e-07, "loss": 0.0004, "step": 286450 }, { "epoch": 1.8845680677355052, "grad_norm": 0.033616133667279835, "learning_rate": 1.0112964533190661e-07, "loss": 0.0006, "step": 286460 }, { "epoch": 1.8846338559107387, "grad_norm": 0.04030057717332707, "learning_rate": 1.0101479406100178e-07, "loss": 0.0003, "step": 286470 }, { "epoch": 1.884699644085972, "grad_norm": 0.024777490657556737, "learning_rate": 1.009000073789862e-07, "loss": 0.0004, "step": 286480 }, { "epoch": 1.8847654322612053, "grad_norm": 0.011299950562208488, "learning_rate": 1.0078528528737252e-07, "loss": 0.0005, "step": 286490 }, { "epoch": 1.8848312204364388, "grad_norm": 0.0015714597546513348, "learning_rate": 1.0067062778767345e-07, "loss": 0.0002, "step": 286500 }, { "epoch": 1.884897008611672, "grad_norm": 0.012981454982227128, "learning_rate": 1.0055603488140109e-07, "loss": 0.0003, "step": 286510 }, { "epoch": 1.8849627967869056, "grad_norm": 0.007200262244424348, "learning_rate": 1.0044150657006591e-07, "loss": 0.0003, "step": 286520 }, { "epoch": 1.8850285849621389, "grad_norm": 0.08594283701818675, "learning_rate": 1.0032704285517781e-07, "loss": 0.0007, "step": 286530 }, { "epoch": 1.8850943731373722, "grad_norm": 0.0029983162455438366, "learning_rate": 1.0021264373824557e-07, "loss": 0.0006, "step": 286540 }, { "epoch": 1.8851601613126057, "grad_norm": 0.007878179377719478, "learning_rate": 1.0009830922077801e-07, "loss": 0.0004, "step": 286550 }, { "epoch": 1.8852259494878392, "grad_norm": 0.002243567326035455, "learning_rate": 9.99840393042828e-08, "loss": 0.0006, "step": 286560 }, { "epoch": 1.8852917376630725, "grad_norm": 0.006822076146389927, "learning_rate": 9.98698339902665e-08, "loss": 0.0003, "step": 286570 }, { "epoch": 1.8853575258383057, "grad_norm": 0.00454570891678435, "learning_rate": 9.975569328023349e-08, "loss": 0.0005, "step": 286580 }, { "epoch": 1.8854233140135392, "grad_norm": 0.03397773030096122, "learning_rate": 9.964161717569032e-08, "loss": 0.0004, "step": 286590 }, { "epoch": 1.8854891021887727, "grad_norm": 0.007809789011948056, "learning_rate": 9.952760567813969e-08, "loss": 0.0002, "step": 286600 }, { "epoch": 1.885554890364006, "grad_norm": 0.03476607750228536, "learning_rate": 9.941365878908594e-08, "loss": 0.0004, "step": 286610 }, { "epoch": 1.8856206785392393, "grad_norm": 0.1253484931094905, "learning_rate": 9.929977651003009e-08, "loss": 0.0013, "step": 286620 }, { "epoch": 1.8856864667144726, "grad_norm": 0.01790164355169446, "learning_rate": 9.918595884247429e-08, "loss": 0.0003, "step": 286630 }, { "epoch": 1.885752254889706, "grad_norm": 0.0534195798894905, "learning_rate": 9.907220578791899e-08, "loss": 0.0006, "step": 286640 }, { "epoch": 1.8858180430649396, "grad_norm": 0.043356460530608654, "learning_rate": 9.89585173478641e-08, "loss": 0.0005, "step": 286650 }, { "epoch": 1.885883831240173, "grad_norm": 0.020902164012908758, "learning_rate": 9.884489352380788e-08, "loss": 0.0002, "step": 286660 }, { "epoch": 1.8859496194154062, "grad_norm": 0.006896773595828339, "learning_rate": 9.873133431724913e-08, "loss": 0.0004, "step": 286670 }, { "epoch": 1.8860154075906397, "grad_norm": 0.021729217438506143, "learning_rate": 9.861783972968442e-08, "loss": 0.0002, "step": 286680 }, { "epoch": 1.8860811957658732, "grad_norm": 0.0010399325182468152, "learning_rate": 9.850440976261089e-08, "loss": 0.0004, "step": 286690 }, { "epoch": 1.8861469839411065, "grad_norm": 0.04555238312012004, "learning_rate": 9.839104441752289e-08, "loss": 0.0006, "step": 286700 }, { "epoch": 1.8862127721163398, "grad_norm": 0.0034111675198030445, "learning_rate": 9.827774369591647e-08, "loss": 0.0004, "step": 286710 }, { "epoch": 1.886278560291573, "grad_norm": 0.0017153581426016145, "learning_rate": 9.816450759928376e-08, "loss": 0.0002, "step": 286720 }, { "epoch": 1.8863443484668065, "grad_norm": 0.03996494342474577, "learning_rate": 9.80513361291191e-08, "loss": 0.0004, "step": 286730 }, { "epoch": 1.88641013664204, "grad_norm": 0.0006281378130422097, "learning_rate": 9.79382292869141e-08, "loss": 0.0004, "step": 286740 }, { "epoch": 1.8864759248172733, "grad_norm": 0.06283465232820676, "learning_rate": 9.782518707415922e-08, "loss": 0.0003, "step": 286750 }, { "epoch": 1.8865417129925066, "grad_norm": 0.015659966959787445, "learning_rate": 9.771220949234605e-08, "loss": 0.0004, "step": 286760 }, { "epoch": 1.8866075011677401, "grad_norm": 0.02085169936821751, "learning_rate": 9.75992965429634e-08, "loss": 0.0006, "step": 286770 }, { "epoch": 1.8866732893429736, "grad_norm": 0.008222792970317305, "learning_rate": 9.748644822749952e-08, "loss": 0.0005, "step": 286780 }, { "epoch": 1.886739077518207, "grad_norm": 0.04009268682341779, "learning_rate": 9.737366454744324e-08, "loss": 0.0005, "step": 286790 }, { "epoch": 1.8868048656934402, "grad_norm": 0.022315028341012795, "learning_rate": 9.726094550428112e-08, "loss": 0.0007, "step": 286800 }, { "epoch": 1.8868706538686737, "grad_norm": 0.016252290729585343, "learning_rate": 9.714829109949863e-08, "loss": 0.0004, "step": 286810 }, { "epoch": 1.886936442043907, "grad_norm": 0.0017436370287789577, "learning_rate": 9.703570133458185e-08, "loss": 0.0002, "step": 286820 }, { "epoch": 1.8870022302191405, "grad_norm": 0.047002979902760776, "learning_rate": 9.692317621101455e-08, "loss": 0.0004, "step": 286830 }, { "epoch": 1.8870680183943738, "grad_norm": 0.007550011018614352, "learning_rate": 9.68107157302811e-08, "loss": 0.0003, "step": 286840 }, { "epoch": 1.887133806569607, "grad_norm": 0.004204079230435584, "learning_rate": 9.669831989386314e-08, "loss": 0.0003, "step": 286850 }, { "epoch": 1.8871995947448406, "grad_norm": 0.05534759631566324, "learning_rate": 9.658598870324387e-08, "loss": 0.0002, "step": 286860 }, { "epoch": 1.887265382920074, "grad_norm": 0.03303939522530305, "learning_rate": 9.64737221599027e-08, "loss": 0.001, "step": 286870 }, { "epoch": 1.8873311710953073, "grad_norm": 0.041327066094113483, "learning_rate": 9.636152026532119e-08, "loss": 0.0007, "step": 286880 }, { "epoch": 1.8873969592705406, "grad_norm": 0.014380210155249113, "learning_rate": 9.624938302097763e-08, "loss": 0.0002, "step": 286890 }, { "epoch": 1.8874627474457741, "grad_norm": 0.02470194951416577, "learning_rate": 9.613731042835084e-08, "loss": 0.0003, "step": 286900 }, { "epoch": 1.8875285356210076, "grad_norm": 0.003912867697565544, "learning_rate": 9.602530248891795e-08, "loss": 0.0003, "step": 286910 }, { "epoch": 1.887594323796241, "grad_norm": 0.0009331583122105607, "learning_rate": 9.591335920415667e-08, "loss": 0.0001, "step": 286920 }, { "epoch": 1.8876601119714742, "grad_norm": 0.02084184625055058, "learning_rate": 9.580148057554195e-08, "loss": 0.0005, "step": 286930 }, { "epoch": 1.8877259001467075, "grad_norm": 0.14203298837595152, "learning_rate": 9.568966660454926e-08, "loss": 0.0007, "step": 286940 }, { "epoch": 1.887791688321941, "grad_norm": 0.0026186875280084397, "learning_rate": 9.557791729265242e-08, "loss": 0.0003, "step": 286950 }, { "epoch": 1.8878574764971745, "grad_norm": 0.029341273214414716, "learning_rate": 9.546623264132526e-08, "loss": 0.0006, "step": 286960 }, { "epoch": 1.8879232646724078, "grad_norm": 0.002240372210399865, "learning_rate": 9.535461265204049e-08, "loss": 0.0006, "step": 286970 }, { "epoch": 1.887989052847641, "grad_norm": 0.017154076457008328, "learning_rate": 9.524305732626859e-08, "loss": 0.0002, "step": 286980 }, { "epoch": 1.8880548410228746, "grad_norm": 0.0385330663446619, "learning_rate": 9.513156666548118e-08, "loss": 0.0004, "step": 286990 }, { "epoch": 1.888120629198108, "grad_norm": 0.01056065876903451, "learning_rate": 9.502014067114818e-08, "loss": 0.0003, "step": 287000 }, { "epoch": 1.8881864173733414, "grad_norm": 0.006529986118046279, "learning_rate": 9.490877934473786e-08, "loss": 0.0005, "step": 287010 }, { "epoch": 1.8882522055485746, "grad_norm": 0.011195782205268952, "learning_rate": 9.479748268771905e-08, "loss": 0.0003, "step": 287020 }, { "epoch": 1.888317993723808, "grad_norm": 0.0159790872117838, "learning_rate": 9.468625070155946e-08, "loss": 0.0003, "step": 287030 }, { "epoch": 1.8883837818990414, "grad_norm": 0.01501671409888588, "learning_rate": 9.457508338772514e-08, "loss": 0.0002, "step": 287040 }, { "epoch": 1.888449570074275, "grad_norm": 0.014487605609518867, "learning_rate": 9.446398074768104e-08, "loss": 0.0006, "step": 287050 }, { "epoch": 1.8885153582495082, "grad_norm": 0.009125981803970942, "learning_rate": 9.43529427828932e-08, "loss": 0.0002, "step": 287060 }, { "epoch": 1.8885811464247415, "grad_norm": 0.04164297164815842, "learning_rate": 9.424196949482489e-08, "loss": 0.0005, "step": 287070 }, { "epoch": 1.888646934599975, "grad_norm": 0.013471710045690333, "learning_rate": 9.41310608849394e-08, "loss": 0.0001, "step": 287080 }, { "epoch": 1.8887127227752085, "grad_norm": 0.01919692830896139, "learning_rate": 9.402021695469888e-08, "loss": 0.0006, "step": 287090 }, { "epoch": 1.8887785109504418, "grad_norm": 0.11641415624769741, "learning_rate": 9.390943770556437e-08, "loss": 0.0009, "step": 287100 }, { "epoch": 1.888844299125675, "grad_norm": 0.018546359833726545, "learning_rate": 9.379872313899696e-08, "loss": 0.0004, "step": 287110 }, { "epoch": 1.8889100873009084, "grad_norm": 0.019811090959773233, "learning_rate": 9.368807325645657e-08, "loss": 0.0004, "step": 287120 }, { "epoch": 1.8889758754761419, "grad_norm": 0.001048823457907196, "learning_rate": 9.357748805940093e-08, "loss": 0.0007, "step": 287130 }, { "epoch": 1.8890416636513754, "grad_norm": 0.015707523024145936, "learning_rate": 9.346696754928942e-08, "loss": 0.0004, "step": 287140 }, { "epoch": 1.8891074518266087, "grad_norm": 0.009994692685728807, "learning_rate": 9.335651172757754e-08, "loss": 0.0007, "step": 287150 }, { "epoch": 1.889173240001842, "grad_norm": 0.008790003582449421, "learning_rate": 9.324612059572302e-08, "loss": 0.0005, "step": 287160 }, { "epoch": 1.8892390281770755, "grad_norm": 0.060088449388768554, "learning_rate": 9.313579415518026e-08, "loss": 0.0006, "step": 287170 }, { "epoch": 1.889304816352309, "grad_norm": 0.011690480365367321, "learning_rate": 9.30255324074042e-08, "loss": 0.0002, "step": 287180 }, { "epoch": 1.8893706045275422, "grad_norm": 0.15226216261261688, "learning_rate": 9.291533535384867e-08, "loss": 0.0008, "step": 287190 }, { "epoch": 1.8894363927027755, "grad_norm": 0.0016931581132501033, "learning_rate": 9.28052029959664e-08, "loss": 0.0005, "step": 287200 }, { "epoch": 1.889502180878009, "grad_norm": 0.02203301665381445, "learning_rate": 9.269513533520902e-08, "loss": 0.0001, "step": 287210 }, { "epoch": 1.8895679690532425, "grad_norm": 0.00315348568769842, "learning_rate": 9.258513237302868e-08, "loss": 0.0004, "step": 287220 }, { "epoch": 1.8896337572284758, "grad_norm": 0.027532989226731232, "learning_rate": 9.24751941108748e-08, "loss": 0.0005, "step": 287230 }, { "epoch": 1.889699545403709, "grad_norm": 0.019285703726856177, "learning_rate": 9.236532055019731e-08, "loss": 0.0005, "step": 287240 }, { "epoch": 1.8897653335789424, "grad_norm": 0.019369096261330307, "learning_rate": 9.225551169244451e-08, "loss": 0.0002, "step": 287250 }, { "epoch": 1.889831121754176, "grad_norm": 0.01165757416143156, "learning_rate": 9.214576753906357e-08, "loss": 0.0004, "step": 287260 }, { "epoch": 1.8898969099294094, "grad_norm": 0.005466411574878271, "learning_rate": 9.203608809150278e-08, "loss": 0.0004, "step": 287270 }, { "epoch": 1.8899626981046427, "grad_norm": 0.10789597312664552, "learning_rate": 9.192647335120708e-08, "loss": 0.0009, "step": 287280 }, { "epoch": 1.890028486279876, "grad_norm": 0.005997386105925008, "learning_rate": 9.1816923319622e-08, "loss": 0.0003, "step": 287290 }, { "epoch": 1.8900942744551095, "grad_norm": 0.005799893830534711, "learning_rate": 9.170743799819138e-08, "loss": 0.0005, "step": 287300 }, { "epoch": 1.890160062630343, "grad_norm": 0.004369541426609506, "learning_rate": 9.159801738835961e-08, "loss": 0.0002, "step": 287310 }, { "epoch": 1.8902258508055763, "grad_norm": 0.01557510757507928, "learning_rate": 9.148866149156833e-08, "loss": 0.0004, "step": 287320 }, { "epoch": 1.8902916389808095, "grad_norm": 0.04883067460736026, "learning_rate": 9.13793703092597e-08, "loss": 0.0009, "step": 287330 }, { "epoch": 1.8903574271560428, "grad_norm": 0.0047650287324421895, "learning_rate": 9.127014384287536e-08, "loss": 0.0006, "step": 287340 }, { "epoch": 1.8904232153312763, "grad_norm": 0.03178068649385934, "learning_rate": 9.116098209385415e-08, "loss": 0.0002, "step": 287350 }, { "epoch": 1.8904890035065098, "grad_norm": 0.02357411603955343, "learning_rate": 9.10518850636366e-08, "loss": 0.0003, "step": 287360 }, { "epoch": 1.8905547916817431, "grad_norm": 0.011338638900680367, "learning_rate": 9.094285275365933e-08, "loss": 0.0004, "step": 287370 }, { "epoch": 1.8906205798569764, "grad_norm": 0.0051598009919777005, "learning_rate": 9.083388516536174e-08, "loss": 0.0006, "step": 287380 }, { "epoch": 1.89068636803221, "grad_norm": 0.01083180180732493, "learning_rate": 9.072498230017879e-08, "loss": 0.0005, "step": 287390 }, { "epoch": 1.8907521562074434, "grad_norm": 0.051470082738902, "learning_rate": 9.061614415954767e-08, "loss": 0.0007, "step": 287400 }, { "epoch": 1.8908179443826767, "grad_norm": 0.05998349665947011, "learning_rate": 9.050737074490224e-08, "loss": 0.0006, "step": 287410 }, { "epoch": 1.89088373255791, "grad_norm": 0.0038619102276070093, "learning_rate": 9.039866205767689e-08, "loss": 0.0004, "step": 287420 }, { "epoch": 1.8909495207331433, "grad_norm": 0.007798334340379774, "learning_rate": 9.029001809930549e-08, "loss": 0.0004, "step": 287430 }, { "epoch": 1.8910153089083768, "grad_norm": 0.006625592489399114, "learning_rate": 9.018143887121966e-08, "loss": 0.0002, "step": 287440 }, { "epoch": 1.8910810970836103, "grad_norm": 0.08986048723537589, "learning_rate": 9.007292437485105e-08, "loss": 0.0004, "step": 287450 }, { "epoch": 1.8911468852588436, "grad_norm": 0.0004968191977869747, "learning_rate": 8.996447461163017e-08, "loss": 0.0005, "step": 287460 }, { "epoch": 1.8912126734340768, "grad_norm": 0.009766800559989207, "learning_rate": 8.985608958298697e-08, "loss": 0.0005, "step": 287470 }, { "epoch": 1.8912784616093103, "grad_norm": 0.04671332380789187, "learning_rate": 8.974776929035145e-08, "loss": 0.0005, "step": 287480 }, { "epoch": 1.8913442497845439, "grad_norm": 0.006196600454697155, "learning_rate": 8.963951373515023e-08, "loss": 0.0003, "step": 287490 }, { "epoch": 1.8914100379597771, "grad_norm": 0.04525814634520756, "learning_rate": 8.953132291881106e-08, "loss": 0.0003, "step": 287500 }, { "epoch": 1.8914758261350104, "grad_norm": 0.11318015613321969, "learning_rate": 8.942319684276112e-08, "loss": 0.0006, "step": 287510 }, { "epoch": 1.891541614310244, "grad_norm": 0.03905124159978845, "learning_rate": 8.931513550842486e-08, "loss": 0.0002, "step": 287520 }, { "epoch": 1.8916074024854772, "grad_norm": 0.07565862082180054, "learning_rate": 8.920713891722721e-08, "loss": 0.0005, "step": 287530 }, { "epoch": 1.8916731906607107, "grad_norm": 0.0014804570244930733, "learning_rate": 8.90992070705926e-08, "loss": 0.0005, "step": 287540 }, { "epoch": 1.891738978835944, "grad_norm": 0.041715527363627124, "learning_rate": 8.899133996994324e-08, "loss": 0.001, "step": 287550 }, { "epoch": 1.8918047670111773, "grad_norm": 0.0008183478113313749, "learning_rate": 8.888353761670188e-08, "loss": 0.0003, "step": 287560 }, { "epoch": 1.8918705551864108, "grad_norm": 0.08248292275436352, "learning_rate": 8.877580001228902e-08, "loss": 0.0003, "step": 287570 }, { "epoch": 1.8919363433616443, "grad_norm": 0.05371942511883357, "learning_rate": 8.866812715812634e-08, "loss": 0.0007, "step": 287580 }, { "epoch": 1.8920021315368776, "grad_norm": 0.006288958797720262, "learning_rate": 8.856051905563212e-08, "loss": 0.0003, "step": 287590 }, { "epoch": 1.8920679197121109, "grad_norm": 0.07890847853107327, "learning_rate": 8.84529757062258e-08, "loss": 0.0006, "step": 287600 }, { "epoch": 1.8921337078873444, "grad_norm": 0.024995934054149307, "learning_rate": 8.834549711132512e-08, "loss": 0.0007, "step": 287610 }, { "epoch": 1.8921994960625779, "grad_norm": 0.0010434401340440687, "learning_rate": 8.823808327234728e-08, "loss": 0.0002, "step": 287620 }, { "epoch": 1.8922652842378112, "grad_norm": 0.0025312898766783522, "learning_rate": 8.81307341907084e-08, "loss": 0.0004, "step": 287630 }, { "epoch": 1.8923310724130444, "grad_norm": 0.059821015276058515, "learning_rate": 8.80234498678234e-08, "loss": 0.001, "step": 287640 }, { "epoch": 1.8923968605882777, "grad_norm": 0.030927131853278268, "learning_rate": 8.791623030510677e-08, "loss": 0.0006, "step": 287650 }, { "epoch": 1.8924626487635112, "grad_norm": 0.027798493257066514, "learning_rate": 8.780907550397289e-08, "loss": 0.0004, "step": 287660 }, { "epoch": 1.8925284369387447, "grad_norm": 0.0005068527209892511, "learning_rate": 8.770198546583342e-08, "loss": 0.0002, "step": 287670 }, { "epoch": 1.892594225113978, "grad_norm": 0.011089842739876881, "learning_rate": 8.759496019210056e-08, "loss": 0.0004, "step": 287680 }, { "epoch": 1.8926600132892113, "grad_norm": 0.021111588255623894, "learning_rate": 8.748799968418597e-08, "loss": 0.0004, "step": 287690 }, { "epoch": 1.8927258014644448, "grad_norm": 0.07174394647367743, "learning_rate": 8.738110394349908e-08, "loss": 0.0011, "step": 287700 }, { "epoch": 1.8927915896396783, "grad_norm": 0.0046874154202909924, "learning_rate": 8.727427297144986e-08, "loss": 0.0005, "step": 287710 }, { "epoch": 1.8928573778149116, "grad_norm": 0.002083608806957241, "learning_rate": 8.716750676944552e-08, "loss": 0.0006, "step": 287720 }, { "epoch": 1.8929231659901449, "grad_norm": 0.0227509359922526, "learning_rate": 8.70608053388955e-08, "loss": 0.0002, "step": 287730 }, { "epoch": 1.8929889541653782, "grad_norm": 0.006087700578209882, "learning_rate": 8.695416868120532e-08, "loss": 0.0005, "step": 287740 }, { "epoch": 1.8930547423406117, "grad_norm": 0.006947521370758423, "learning_rate": 8.684759679778109e-08, "loss": 0.0005, "step": 287750 }, { "epoch": 1.8931205305158452, "grad_norm": 0.03174639028209699, "learning_rate": 8.674108969002836e-08, "loss": 0.0006, "step": 287760 }, { "epoch": 1.8931863186910785, "grad_norm": 0.08718639050792308, "learning_rate": 8.663464735935101e-08, "loss": 0.0004, "step": 287770 }, { "epoch": 1.8932521068663117, "grad_norm": 0.005118627698437155, "learning_rate": 8.652826980715235e-08, "loss": 0.0005, "step": 287780 }, { "epoch": 1.8933178950415452, "grad_norm": 0.061547998169546765, "learning_rate": 8.642195703483514e-08, "loss": 0.0007, "step": 287790 }, { "epoch": 1.8933836832167787, "grad_norm": 0.003892625656831763, "learning_rate": 8.631570904380049e-08, "loss": 0.0005, "step": 287800 }, { "epoch": 1.893449471392012, "grad_norm": 0.014165952348094571, "learning_rate": 8.620952583544951e-08, "loss": 0.0005, "step": 287810 }, { "epoch": 1.8935152595672453, "grad_norm": 0.008147222335923781, "learning_rate": 8.610340741118162e-08, "loss": 0.0006, "step": 287820 }, { "epoch": 1.8935810477424788, "grad_norm": 0.013710116648046942, "learning_rate": 8.599735377239682e-08, "loss": 0.0004, "step": 287830 }, { "epoch": 1.893646835917712, "grad_norm": 0.030261190735391467, "learning_rate": 8.589136492049344e-08, "loss": 0.0003, "step": 287840 }, { "epoch": 1.8937126240929456, "grad_norm": 0.03612480182140687, "learning_rate": 8.578544085686702e-08, "loss": 0.0004, "step": 287850 }, { "epoch": 1.893778412268179, "grad_norm": 0.0007243936854204648, "learning_rate": 8.567958158291645e-08, "loss": 0.0006, "step": 287860 }, { "epoch": 1.8938442004434122, "grad_norm": 0.02975964504391188, "learning_rate": 8.557378710003561e-08, "loss": 0.0005, "step": 287870 }, { "epoch": 1.8939099886186457, "grad_norm": 0.026832418657788272, "learning_rate": 8.546805740962061e-08, "loss": 0.0004, "step": 287880 }, { "epoch": 1.8939757767938792, "grad_norm": 0.0212321819864626, "learning_rate": 8.536239251306477e-08, "loss": 0.0003, "step": 287890 }, { "epoch": 1.8940415649691125, "grad_norm": 0.03047195907233039, "learning_rate": 8.525679241176088e-08, "loss": 0.0005, "step": 287900 }, { "epoch": 1.8941073531443458, "grad_norm": 0.04753447366777355, "learning_rate": 8.51512571071017e-08, "loss": 0.0003, "step": 287910 }, { "epoch": 1.8941731413195793, "grad_norm": 0.08079010455460063, "learning_rate": 8.504578660047836e-08, "loss": 0.0005, "step": 287920 }, { "epoch": 1.8942389294948128, "grad_norm": 0.04290758460072697, "learning_rate": 8.494038089328193e-08, "loss": 0.0003, "step": 287930 }, { "epoch": 1.894304717670046, "grad_norm": 0.034438801845731386, "learning_rate": 8.483503998690134e-08, "loss": 0.0004, "step": 287940 }, { "epoch": 1.8943705058452793, "grad_norm": 0.006649489660081403, "learning_rate": 8.472976388272547e-08, "loss": 0.0003, "step": 287950 }, { "epoch": 1.8944362940205126, "grad_norm": 0.021680498011823566, "learning_rate": 8.462455258214264e-08, "loss": 0.0003, "step": 287960 }, { "epoch": 1.8945020821957461, "grad_norm": 0.004244145724153624, "learning_rate": 8.451940608654008e-08, "loss": 0.0006, "step": 287970 }, { "epoch": 1.8945678703709796, "grad_norm": 0.012591830685431356, "learning_rate": 8.441432439730335e-08, "loss": 0.0004, "step": 287980 }, { "epoch": 1.894633658546213, "grad_norm": 0.14095002570671838, "learning_rate": 8.430930751581912e-08, "loss": 0.0004, "step": 287990 }, { "epoch": 1.8946994467214462, "grad_norm": 0.0031475535582669704, "learning_rate": 8.420435544347073e-08, "loss": 0.0007, "step": 288000 }, { "epoch": 1.8947652348966797, "grad_norm": 0.06931647370798742, "learning_rate": 8.409946818164261e-08, "loss": 0.0007, "step": 288010 }, { "epoch": 1.8948310230719132, "grad_norm": 0.05896508734508506, "learning_rate": 8.399464573171757e-08, "loss": 0.0008, "step": 288020 }, { "epoch": 1.8948968112471465, "grad_norm": 0.011411382889824222, "learning_rate": 8.388988809507726e-08, "loss": 0.0001, "step": 288030 }, { "epoch": 1.8949625994223798, "grad_norm": 0.04076701033440533, "learning_rate": 8.378519527310281e-08, "loss": 0.0011, "step": 288040 }, { "epoch": 1.895028387597613, "grad_norm": 0.0782627482011819, "learning_rate": 8.368056726717421e-08, "loss": 0.0005, "step": 288050 }, { "epoch": 1.8950941757728466, "grad_norm": 0.012767854379071486, "learning_rate": 8.357600407867206e-08, "loss": 0.0015, "step": 288060 }, { "epoch": 1.89515996394808, "grad_norm": 0.04910277352791222, "learning_rate": 8.347150570897355e-08, "loss": 0.0008, "step": 288070 }, { "epoch": 1.8952257521233133, "grad_norm": 0.0330714377106813, "learning_rate": 8.336707215945761e-08, "loss": 0.0004, "step": 288080 }, { "epoch": 1.8952915402985466, "grad_norm": 0.012831962509563205, "learning_rate": 8.326270343150033e-08, "loss": 0.0004, "step": 288090 }, { "epoch": 1.8953573284737801, "grad_norm": 0.001186249141844742, "learning_rate": 8.315839952647787e-08, "loss": 0.0004, "step": 288100 }, { "epoch": 1.8954231166490136, "grad_norm": 0.03748294490364472, "learning_rate": 8.305416044576519e-08, "loss": 0.0003, "step": 288110 }, { "epoch": 1.895488904824247, "grad_norm": 0.03733076087887289, "learning_rate": 8.294998619073735e-08, "loss": 0.0007, "step": 288120 }, { "epoch": 1.8955546929994802, "grad_norm": 0.002915851662806221, "learning_rate": 8.284587676276712e-08, "loss": 0.0008, "step": 288130 }, { "epoch": 1.8956204811747137, "grad_norm": 0.01773435217478585, "learning_rate": 8.274183216322729e-08, "loss": 0.0004, "step": 288140 }, { "epoch": 1.895686269349947, "grad_norm": 0.004553120684829262, "learning_rate": 8.263785239348953e-08, "loss": 0.0004, "step": 288150 }, { "epoch": 1.8957520575251805, "grad_norm": 0.007691776052797598, "learning_rate": 8.253393745492444e-08, "loss": 0.0005, "step": 288160 }, { "epoch": 1.8958178457004138, "grad_norm": 0.0007294620717423533, "learning_rate": 8.243008734890312e-08, "loss": 0.0002, "step": 288170 }, { "epoch": 1.895883633875647, "grad_norm": 0.02998249338347846, "learning_rate": 8.232630207679337e-08, "loss": 0.0002, "step": 288180 }, { "epoch": 1.8959494220508806, "grad_norm": 0.0007585876047184694, "learning_rate": 8.222258163996466e-08, "loss": 0.0003, "step": 288190 }, { "epoch": 1.896015210226114, "grad_norm": 0.009950334281838313, "learning_rate": 8.211892603978366e-08, "loss": 0.0003, "step": 288200 }, { "epoch": 1.8960809984013474, "grad_norm": 0.0026134013292297502, "learning_rate": 8.201533527761707e-08, "loss": 0.0003, "step": 288210 }, { "epoch": 1.8961467865765806, "grad_norm": 0.01935234481972022, "learning_rate": 8.1911809354831e-08, "loss": 0.0003, "step": 288220 }, { "epoch": 1.8962125747518142, "grad_norm": 0.044743312847615395, "learning_rate": 8.18083482727905e-08, "loss": 0.0004, "step": 288230 }, { "epoch": 1.8962783629270477, "grad_norm": 0.0014746404034278327, "learning_rate": 8.170495203285833e-08, "loss": 0.0003, "step": 288240 }, { "epoch": 1.896344151102281, "grad_norm": 0.00012009408018453844, "learning_rate": 8.160162063639953e-08, "loss": 0.0001, "step": 288250 }, { "epoch": 1.8964099392775142, "grad_norm": 0.0420306790758113, "learning_rate": 8.149835408477525e-08, "loss": 0.0008, "step": 288260 }, { "epoch": 1.8964757274527475, "grad_norm": 0.028205750851272134, "learning_rate": 8.139515237934714e-08, "loss": 0.0005, "step": 288270 }, { "epoch": 1.896541515627981, "grad_norm": 0.012179029721294539, "learning_rate": 8.129201552147636e-08, "loss": 0.0006, "step": 288280 }, { "epoch": 1.8966073038032145, "grad_norm": 0.0011199422469007295, "learning_rate": 8.118894351252238e-08, "loss": 0.0002, "step": 288290 }, { "epoch": 1.8966730919784478, "grad_norm": 0.016603371629120546, "learning_rate": 8.108593635384409e-08, "loss": 0.0004, "step": 288300 }, { "epoch": 1.896738880153681, "grad_norm": 0.025870777382037934, "learning_rate": 8.098299404679932e-08, "loss": 0.0006, "step": 288310 }, { "epoch": 1.8968046683289146, "grad_norm": 0.006051481439766031, "learning_rate": 8.088011659274531e-08, "loss": 0.0007, "step": 288320 }, { "epoch": 1.896870456504148, "grad_norm": 0.011834553825714975, "learning_rate": 8.077730399303818e-08, "loss": 0.0004, "step": 288330 }, { "epoch": 1.8969362446793814, "grad_norm": 0.004464524164003355, "learning_rate": 8.067455624903353e-08, "loss": 0.0003, "step": 288340 }, { "epoch": 1.8970020328546147, "grad_norm": 0.004737469923532613, "learning_rate": 8.057187336208694e-08, "loss": 0.0001, "step": 288350 }, { "epoch": 1.897067821029848, "grad_norm": 0.00017189739347862692, "learning_rate": 8.046925533355177e-08, "loss": 0.0003, "step": 288360 }, { "epoch": 1.8971336092050815, "grad_norm": 0.007533894399422595, "learning_rate": 8.036670216478027e-08, "loss": 0.0006, "step": 288370 }, { "epoch": 1.897199397380315, "grad_norm": 0.020267060869236735, "learning_rate": 8.026421385712468e-08, "loss": 0.0005, "step": 288380 }, { "epoch": 1.8972651855555482, "grad_norm": 0.00892038226049743, "learning_rate": 8.016179041193672e-08, "loss": 0.0003, "step": 288390 }, { "epoch": 1.8973309737307815, "grad_norm": 0.00731171419479653, "learning_rate": 8.005943183056642e-08, "loss": 0.0007, "step": 288400 }, { "epoch": 1.897396761906015, "grad_norm": 0.00046004909088153355, "learning_rate": 7.995713811436378e-08, "loss": 0.0005, "step": 288410 }, { "epoch": 1.8974625500812485, "grad_norm": 0.0025953013584885027, "learning_rate": 7.985490926467664e-08, "loss": 0.0004, "step": 288420 }, { "epoch": 1.8975283382564818, "grad_norm": 0.08236337246115373, "learning_rate": 7.975274528285337e-08, "loss": 0.0006, "step": 288430 }, { "epoch": 1.897594126431715, "grad_norm": 0.042006148074866495, "learning_rate": 7.965064617024065e-08, "loss": 0.0005, "step": 288440 }, { "epoch": 1.8976599146069484, "grad_norm": 0.016904366112143077, "learning_rate": 7.954861192818464e-08, "loss": 0.0006, "step": 288450 }, { "epoch": 1.897725702782182, "grad_norm": 0.012157184772662822, "learning_rate": 7.944664255803091e-08, "loss": 0.0005, "step": 288460 }, { "epoch": 1.8977914909574154, "grad_norm": 0.018460001491145878, "learning_rate": 7.934473806112287e-08, "loss": 0.0002, "step": 288470 }, { "epoch": 1.8978572791326487, "grad_norm": 0.06152060680646251, "learning_rate": 7.924289843880495e-08, "loss": 0.0004, "step": 288480 }, { "epoch": 1.897923067307882, "grad_norm": 0.00017856099371735683, "learning_rate": 7.914112369242e-08, "loss": 0.0008, "step": 288490 }, { "epoch": 1.8979888554831155, "grad_norm": 0.005342467449700286, "learning_rate": 7.903941382330916e-08, "loss": 0.0006, "step": 288500 }, { "epoch": 1.898054643658349, "grad_norm": 0.006022481531923725, "learning_rate": 7.893776883281356e-08, "loss": 0.0001, "step": 288510 }, { "epoch": 1.8981204318335823, "grad_norm": 0.00027946181018972, "learning_rate": 7.883618872227328e-08, "loss": 0.0003, "step": 288520 }, { "epoch": 1.8981862200088155, "grad_norm": 0.03506494772741099, "learning_rate": 7.873467349302777e-08, "loss": 0.001, "step": 288530 }, { "epoch": 1.898252008184049, "grad_norm": 0.00027604217818852405, "learning_rate": 7.863322314641542e-08, "loss": 0.0004, "step": 288540 }, { "epoch": 1.8983177963592823, "grad_norm": 0.021835056089390258, "learning_rate": 7.853183768377348e-08, "loss": 0.0005, "step": 288550 }, { "epoch": 1.8983835845345158, "grad_norm": 0.006073711363919033, "learning_rate": 7.843051710643868e-08, "loss": 0.0004, "step": 288560 }, { "epoch": 1.8984493727097491, "grad_norm": 0.006159573227168067, "learning_rate": 7.832926141574715e-08, "loss": 0.0003, "step": 288570 }, { "epoch": 1.8985151608849824, "grad_norm": 0.05424379771904841, "learning_rate": 7.822807061303395e-08, "loss": 0.0003, "step": 288580 }, { "epoch": 1.898580949060216, "grad_norm": 0.028855307813263745, "learning_rate": 7.812694469963245e-08, "loss": 0.0004, "step": 288590 }, { "epoch": 1.8986467372354494, "grad_norm": 0.0011712228024143042, "learning_rate": 7.802588367687657e-08, "loss": 0.0002, "step": 288600 }, { "epoch": 1.8987125254106827, "grad_norm": 0.046585094888751816, "learning_rate": 7.792488754609861e-08, "loss": 0.0008, "step": 288610 }, { "epoch": 1.898778313585916, "grad_norm": 0.054074609381474945, "learning_rate": 7.782395630862971e-08, "loss": 0.0005, "step": 288620 }, { "epoch": 1.8988441017611495, "grad_norm": 0.03981975289163392, "learning_rate": 7.772308996580103e-08, "loss": 0.0003, "step": 288630 }, { "epoch": 1.898909889936383, "grad_norm": 0.004497745474995901, "learning_rate": 7.76222885189426e-08, "loss": 0.0004, "step": 288640 }, { "epoch": 1.8989756781116163, "grad_norm": 0.036571167376282014, "learning_rate": 7.752155196938283e-08, "loss": 0.0005, "step": 288650 }, { "epoch": 1.8990414662868496, "grad_norm": 0.010820344052117944, "learning_rate": 7.742088031845007e-08, "loss": 0.0005, "step": 288660 }, { "epoch": 1.8991072544620828, "grad_norm": 0.011277269029434206, "learning_rate": 7.732027356747162e-08, "loss": 0.0002, "step": 288670 }, { "epoch": 1.8991730426373163, "grad_norm": 0.08431548445678479, "learning_rate": 7.721973171777364e-08, "loss": 0.0005, "step": 288680 }, { "epoch": 1.8992388308125498, "grad_norm": 0.004850680014992586, "learning_rate": 7.711925477068171e-08, "loss": 0.0001, "step": 288690 }, { "epoch": 1.8993046189877831, "grad_norm": 0.011379053122774317, "learning_rate": 7.701884272752147e-08, "loss": 0.0004, "step": 288700 }, { "epoch": 1.8993704071630164, "grad_norm": 0.018343363828780646, "learning_rate": 7.691849558961573e-08, "loss": 0.0004, "step": 288710 }, { "epoch": 1.89943619533825, "grad_norm": 0.03186657346442736, "learning_rate": 7.681821335828733e-08, "loss": 0.0007, "step": 288720 }, { "epoch": 1.8995019835134834, "grad_norm": 0.03474236789925885, "learning_rate": 7.671799603485908e-08, "loss": 0.0004, "step": 288730 }, { "epoch": 1.8995677716887167, "grad_norm": 0.017738085886467806, "learning_rate": 7.661784362065216e-08, "loss": 0.0004, "step": 288740 }, { "epoch": 1.89963355986395, "grad_norm": 0.06964844330247949, "learning_rate": 7.651775611698664e-08, "loss": 0.001, "step": 288750 }, { "epoch": 1.8996993480391833, "grad_norm": 0.018165870076617508, "learning_rate": 7.641773352518256e-08, "loss": 0.0006, "step": 288760 }, { "epoch": 1.8997651362144168, "grad_norm": 0.0016500456202520612, "learning_rate": 7.63177758465583e-08, "loss": 0.0008, "step": 288770 }, { "epoch": 1.8998309243896503, "grad_norm": 0.10174945709704927, "learning_rate": 7.621788308243172e-08, "loss": 0.001, "step": 288780 }, { "epoch": 1.8998967125648836, "grad_norm": 0.020526122177893368, "learning_rate": 7.611805523412009e-08, "loss": 0.0004, "step": 288790 }, { "epoch": 1.8999625007401169, "grad_norm": 0.0001789378965085906, "learning_rate": 7.6018292302939e-08, "loss": 0.0002, "step": 288800 }, { "epoch": 1.9000282889153504, "grad_norm": 0.057070221756983744, "learning_rate": 7.591859429020465e-08, "loss": 0.0004, "step": 288810 }, { "epoch": 1.9000940770905839, "grad_norm": 0.02049508649243854, "learning_rate": 7.581896119723042e-08, "loss": 0.0006, "step": 288820 }, { "epoch": 1.9001598652658171, "grad_norm": 0.008059018576011066, "learning_rate": 7.571939302533083e-08, "loss": 0.0004, "step": 288830 }, { "epoch": 1.9002256534410504, "grad_norm": 0.01070471555349812, "learning_rate": 7.561988977581813e-08, "loss": 0.0004, "step": 288840 }, { "epoch": 1.900291441616284, "grad_norm": 0.017041374104160848, "learning_rate": 7.552045145000352e-08, "loss": 0.0006, "step": 288850 }, { "epoch": 1.9003572297915172, "grad_norm": 0.00022756297236815922, "learning_rate": 7.542107804919929e-08, "loss": 0.0007, "step": 288860 }, { "epoch": 1.9004230179667507, "grad_norm": 0.010919157282021263, "learning_rate": 7.532176957471493e-08, "loss": 0.0003, "step": 288870 }, { "epoch": 1.900488806141984, "grad_norm": 0.034860455532945755, "learning_rate": 7.52225260278594e-08, "loss": 0.0005, "step": 288880 }, { "epoch": 1.9005545943172173, "grad_norm": 0.04971492684474747, "learning_rate": 7.512334740994221e-08, "loss": 0.0012, "step": 288890 }, { "epoch": 1.9006203824924508, "grad_norm": 0.0422188612219872, "learning_rate": 7.502423372227009e-08, "loss": 0.0011, "step": 288900 }, { "epoch": 1.9006861706676843, "grad_norm": 0.01141176998651528, "learning_rate": 7.492518496614976e-08, "loss": 0.0003, "step": 288910 }, { "epoch": 1.9007519588429176, "grad_norm": 0.013612913842652674, "learning_rate": 7.482620114288742e-08, "loss": 0.0002, "step": 288920 }, { "epoch": 1.9008177470181509, "grad_norm": 0.0026520279753151133, "learning_rate": 7.472728225378756e-08, "loss": 0.0004, "step": 288930 }, { "epoch": 1.9008835351933844, "grad_norm": 0.04991208027110498, "learning_rate": 7.462842830015526e-08, "loss": 0.0007, "step": 288940 }, { "epoch": 1.9009493233686179, "grad_norm": 0.06468512706971548, "learning_rate": 7.452963928329338e-08, "loss": 0.0009, "step": 288950 }, { "epoch": 1.9010151115438512, "grad_norm": 0.0008425086955964194, "learning_rate": 7.443091520450419e-08, "loss": 0.0001, "step": 288960 }, { "epoch": 1.9010808997190844, "grad_norm": 0.023769410364971794, "learning_rate": 7.433225606508887e-08, "loss": 0.0004, "step": 288970 }, { "epoch": 1.9011466878943177, "grad_norm": 0.2350677828828689, "learning_rate": 7.423366186634917e-08, "loss": 0.0008, "step": 288980 }, { "epoch": 1.9012124760695512, "grad_norm": 0.007334913620899001, "learning_rate": 7.413513260958405e-08, "loss": 0.0003, "step": 288990 }, { "epoch": 1.9012782642447847, "grad_norm": 0.0034105789624178733, "learning_rate": 7.403666829609302e-08, "loss": 0.0006, "step": 289000 }, { "epoch": 1.901344052420018, "grad_norm": 0.05442399768256277, "learning_rate": 7.393826892717448e-08, "loss": 0.0008, "step": 289010 }, { "epoch": 1.9014098405952513, "grad_norm": 0.017436298998304116, "learning_rate": 7.383993450412574e-08, "loss": 0.0002, "step": 289020 }, { "epoch": 1.9014756287704848, "grad_norm": 0.05450828034699085, "learning_rate": 7.374166502824242e-08, "loss": 0.0003, "step": 289030 }, { "epoch": 1.9015414169457183, "grad_norm": 0.04423201562219954, "learning_rate": 7.364346050082072e-08, "loss": 0.0004, "step": 289040 }, { "epoch": 1.9016072051209516, "grad_norm": 0.01436029539246915, "learning_rate": 7.354532092315569e-08, "loss": 0.0005, "step": 289050 }, { "epoch": 1.9016729932961849, "grad_norm": 0.0037892642484655265, "learning_rate": 7.344724629654021e-08, "loss": 0.0018, "step": 289060 }, { "epoch": 1.9017387814714182, "grad_norm": 0.03218275909812097, "learning_rate": 7.334923662226823e-08, "loss": 0.0003, "step": 289070 }, { "epoch": 1.9018045696466517, "grad_norm": 0.006814006149743383, "learning_rate": 7.325129190163205e-08, "loss": 0.0005, "step": 289080 }, { "epoch": 1.9018703578218852, "grad_norm": 0.016617863834978066, "learning_rate": 7.315341213592231e-08, "loss": 0.0002, "step": 289090 }, { "epoch": 1.9019361459971185, "grad_norm": 0.019327634201816428, "learning_rate": 7.305559732642963e-08, "loss": 0.0003, "step": 289100 }, { "epoch": 1.9020019341723517, "grad_norm": 0.004366950307348531, "learning_rate": 7.295784747444356e-08, "loss": 0.0004, "step": 289110 }, { "epoch": 1.9020677223475853, "grad_norm": 0.03333644025005565, "learning_rate": 7.286016258125251e-08, "loss": 0.0007, "step": 289120 }, { "epoch": 1.9021335105228188, "grad_norm": 0.006483058875712199, "learning_rate": 7.276254264814542e-08, "loss": 0.0004, "step": 289130 }, { "epoch": 1.902199298698052, "grad_norm": 0.01489358032780855, "learning_rate": 7.266498767640905e-08, "loss": 0.0003, "step": 289140 }, { "epoch": 1.9022650868732853, "grad_norm": 0.057700613401811596, "learning_rate": 7.256749766732906e-08, "loss": 0.0006, "step": 289150 }, { "epoch": 1.9023308750485188, "grad_norm": 0.001113298071706177, "learning_rate": 7.247007262219108e-08, "loss": 0.0005, "step": 289160 }, { "epoch": 1.9023966632237521, "grad_norm": 0.13748298344002013, "learning_rate": 7.237271254227962e-08, "loss": 0.0007, "step": 289170 }, { "epoch": 1.9024624513989856, "grad_norm": 0.005848331057062771, "learning_rate": 7.227541742887812e-08, "loss": 0.0022, "step": 289180 }, { "epoch": 1.902528239574219, "grad_norm": 0.014345861961227368, "learning_rate": 7.217818728326886e-08, "loss": 0.0003, "step": 289190 }, { "epoch": 1.9025940277494522, "grad_norm": 0.011697732883919554, "learning_rate": 7.208102210673474e-08, "loss": 0.0009, "step": 289200 }, { "epoch": 1.9026598159246857, "grad_norm": 0.041828181140270426, "learning_rate": 7.198392190055636e-08, "loss": 0.0005, "step": 289210 }, { "epoch": 1.9027256040999192, "grad_norm": 0.01122588395491579, "learning_rate": 7.188688666601384e-08, "loss": 0.0004, "step": 289220 }, { "epoch": 1.9027913922751525, "grad_norm": 0.0033234478208265515, "learning_rate": 7.178991640438615e-08, "loss": 0.0005, "step": 289230 }, { "epoch": 1.9028571804503858, "grad_norm": 0.0010834948569031031, "learning_rate": 7.169301111695227e-08, "loss": 0.0004, "step": 289240 }, { "epoch": 1.9029229686256193, "grad_norm": 0.021056414926719533, "learning_rate": 7.159617080498948e-08, "loss": 0.0003, "step": 289250 }, { "epoch": 1.9029887568008528, "grad_norm": 0.05117191382445677, "learning_rate": 7.149939546977514e-08, "loss": 0.0004, "step": 289260 }, { "epoch": 1.903054544976086, "grad_norm": 0.01510117375866834, "learning_rate": 7.140268511258486e-08, "loss": 0.0001, "step": 289270 }, { "epoch": 1.9031203331513193, "grad_norm": 0.0009502758038300319, "learning_rate": 7.130603973469319e-08, "loss": 0.0004, "step": 289280 }, { "epoch": 1.9031861213265526, "grad_norm": 0.007363143150649701, "learning_rate": 7.120945933737466e-08, "loss": 0.0004, "step": 289290 }, { "epoch": 1.9032519095017861, "grad_norm": 0.001955412602867781, "learning_rate": 7.111294392190271e-08, "loss": 0.0005, "step": 289300 }, { "epoch": 1.9033176976770196, "grad_norm": 0.026857880485905188, "learning_rate": 7.10164934895502e-08, "loss": 0.0006, "step": 289310 }, { "epoch": 1.903383485852253, "grad_norm": 0.009468558134611263, "learning_rate": 7.092010804158778e-08, "loss": 0.0004, "step": 289320 }, { "epoch": 1.9034492740274862, "grad_norm": 0.009652520069255323, "learning_rate": 7.082378757928665e-08, "loss": 0.0002, "step": 289330 }, { "epoch": 1.9035150622027197, "grad_norm": 0.0073607581634989195, "learning_rate": 7.072753210391692e-08, "loss": 0.0004, "step": 289340 }, { "epoch": 1.9035808503779532, "grad_norm": 0.038403270663292396, "learning_rate": 7.063134161674756e-08, "loss": 0.0004, "step": 289350 }, { "epoch": 1.9036466385531865, "grad_norm": 0.015336854325620198, "learning_rate": 7.053521611904702e-08, "loss": 0.0004, "step": 289360 }, { "epoch": 1.9037124267284198, "grad_norm": 0.013435516601248988, "learning_rate": 7.043915561208147e-08, "loss": 0.0002, "step": 289370 }, { "epoch": 1.903778214903653, "grad_norm": 0.014662744497169094, "learning_rate": 7.034316009711828e-08, "loss": 0.0005, "step": 289380 }, { "epoch": 1.9038440030788866, "grad_norm": 0.01546047857640885, "learning_rate": 7.024722957542363e-08, "loss": 0.0002, "step": 289390 }, { "epoch": 1.90390979125412, "grad_norm": 0.027908172976666406, "learning_rate": 7.015136404826095e-08, "loss": 0.0008, "step": 289400 }, { "epoch": 1.9039755794293534, "grad_norm": 0.010995435433771773, "learning_rate": 7.005556351689535e-08, "loss": 0.0002, "step": 289410 }, { "epoch": 1.9040413676045866, "grad_norm": 0.00015241994744847397, "learning_rate": 6.995982798258916e-08, "loss": 0.0003, "step": 289420 }, { "epoch": 1.9041071557798201, "grad_norm": 0.06695125514525657, "learning_rate": 6.986415744660524e-08, "loss": 0.0008, "step": 289430 }, { "epoch": 1.9041729439550537, "grad_norm": 0.01598599921351972, "learning_rate": 6.976855191020426e-08, "loss": 0.0004, "step": 289440 }, { "epoch": 1.904238732130287, "grad_norm": 0.08197515012871337, "learning_rate": 6.967301137464633e-08, "loss": 0.0005, "step": 289450 }, { "epoch": 1.9043045203055202, "grad_norm": 0.03197877333103026, "learning_rate": 6.95775358411921e-08, "loss": 0.0003, "step": 289460 }, { "epoch": 1.9043703084807535, "grad_norm": 0.0013178374087956662, "learning_rate": 6.948212531109998e-08, "loss": 0.0003, "step": 289470 }, { "epoch": 1.904436096655987, "grad_norm": 0.06412470345654654, "learning_rate": 6.938677978562735e-08, "loss": 0.0003, "step": 289480 }, { "epoch": 1.9045018848312205, "grad_norm": 0.031165401284435863, "learning_rate": 6.929149926603206e-08, "loss": 0.0005, "step": 289490 }, { "epoch": 1.9045676730064538, "grad_norm": 0.03770446255275583, "learning_rate": 6.91962837535698e-08, "loss": 0.0001, "step": 289500 }, { "epoch": 1.904633461181687, "grad_norm": 0.02820620329415425, "learning_rate": 6.910113324949563e-08, "loss": 0.0002, "step": 289510 }, { "epoch": 1.9046992493569206, "grad_norm": 0.011720845050491922, "learning_rate": 6.900604775506469e-08, "loss": 0.0006, "step": 289520 }, { "epoch": 1.904765037532154, "grad_norm": 0.010752073187724817, "learning_rate": 6.891102727153043e-08, "loss": 0.0003, "step": 289530 }, { "epoch": 1.9048308257073874, "grad_norm": 0.003784021734596866, "learning_rate": 6.881607180014571e-08, "loss": 0.0003, "step": 289540 }, { "epoch": 1.9048966138826207, "grad_norm": 0.007356202839927112, "learning_rate": 6.872118134216177e-08, "loss": 0.0004, "step": 289550 }, { "epoch": 1.9049624020578542, "grad_norm": 0.02864482278988142, "learning_rate": 6.862635589883037e-08, "loss": 0.0003, "step": 289560 }, { "epoch": 1.9050281902330877, "grad_norm": 0.029792792011547737, "learning_rate": 6.853159547140165e-08, "loss": 0.0014, "step": 289570 }, { "epoch": 1.905093978408321, "grad_norm": 0.0313019896496157, "learning_rate": 6.843690006112458e-08, "loss": 0.0004, "step": 289580 }, { "epoch": 1.9051597665835542, "grad_norm": 0.013546812842784544, "learning_rate": 6.834226966924761e-08, "loss": 0.0003, "step": 289590 }, { "epoch": 1.9052255547587875, "grad_norm": 0.021024346337041408, "learning_rate": 6.824770429701866e-08, "loss": 0.0005, "step": 289600 }, { "epoch": 1.905291342934021, "grad_norm": 0.03902426599879353, "learning_rate": 6.815320394568448e-08, "loss": 0.0003, "step": 289610 }, { "epoch": 1.9053571311092545, "grad_norm": 0.015507451346475264, "learning_rate": 6.805876861649074e-08, "loss": 0.0003, "step": 289620 }, { "epoch": 1.9054229192844878, "grad_norm": 0.015119882283052425, "learning_rate": 6.796439831068258e-08, "loss": 0.0003, "step": 289630 }, { "epoch": 1.905488707459721, "grad_norm": 0.12071473038314226, "learning_rate": 6.787009302950453e-08, "loss": 0.0006, "step": 289640 }, { "epoch": 1.9055544956349546, "grad_norm": 0.045426684387930495, "learning_rate": 6.777585277419951e-08, "loss": 0.0009, "step": 289650 }, { "epoch": 1.905620283810188, "grad_norm": 0.00584731241315473, "learning_rate": 6.768167754600986e-08, "loss": 0.0007, "step": 289660 }, { "epoch": 1.9056860719854214, "grad_norm": 0.0723897360425789, "learning_rate": 6.758756734617788e-08, "loss": 0.0005, "step": 289670 }, { "epoch": 1.9057518601606547, "grad_norm": 0.001302815807181321, "learning_rate": 6.749352217594373e-08, "loss": 0.0005, "step": 289680 }, { "epoch": 1.905817648335888, "grad_norm": 0.00044374280460188, "learning_rate": 6.739954203654753e-08, "loss": 0.0003, "step": 289690 }, { "epoch": 1.9058834365111215, "grad_norm": 0.0018923756883189227, "learning_rate": 6.730562692922826e-08, "loss": 0.0003, "step": 289700 }, { "epoch": 1.905949224686355, "grad_norm": 0.0010547057920143835, "learning_rate": 6.721177685522439e-08, "loss": 0.0002, "step": 289710 }, { "epoch": 1.9060150128615883, "grad_norm": 0.03184842482505776, "learning_rate": 6.711799181577272e-08, "loss": 0.0002, "step": 289720 }, { "epoch": 1.9060808010368215, "grad_norm": 0.00041814774980975504, "learning_rate": 6.702427181211003e-08, "loss": 0.0007, "step": 289730 }, { "epoch": 1.906146589212055, "grad_norm": 0.02645697869815919, "learning_rate": 6.693061684547197e-08, "loss": 0.0004, "step": 289740 }, { "epoch": 1.9062123773872885, "grad_norm": 0.0101324467134909, "learning_rate": 6.683702691709315e-08, "loss": 0.0004, "step": 289750 }, { "epoch": 1.9062781655625218, "grad_norm": 0.02745214011590099, "learning_rate": 6.674350202820756e-08, "loss": 0.0005, "step": 289760 }, { "epoch": 1.9063439537377551, "grad_norm": 0.024265207877441408, "learning_rate": 6.665004218004811e-08, "loss": 0.0003, "step": 289770 }, { "epoch": 1.9064097419129884, "grad_norm": 0.013714422279922594, "learning_rate": 6.65566473738477e-08, "loss": 0.0012, "step": 289780 }, { "epoch": 1.906475530088222, "grad_norm": 0.008891938426448087, "learning_rate": 6.646331761083646e-08, "loss": 0.0005, "step": 289790 }, { "epoch": 1.9065413182634554, "grad_norm": 0.04920662832116648, "learning_rate": 6.637005289224618e-08, "loss": 0.0004, "step": 289800 }, { "epoch": 1.9066071064386887, "grad_norm": 0.0007851098872361937, "learning_rate": 6.627685321930533e-08, "loss": 0.0005, "step": 289810 }, { "epoch": 1.906672894613922, "grad_norm": 0.010589484210017229, "learning_rate": 6.618371859324291e-08, "loss": 0.0003, "step": 289820 }, { "epoch": 1.9067386827891555, "grad_norm": 0.021718548502600164, "learning_rate": 6.609064901528739e-08, "loss": 0.0003, "step": 289830 }, { "epoch": 1.906804470964389, "grad_norm": 0.02579215666848412, "learning_rate": 6.599764448666557e-08, "loss": 0.0005, "step": 289840 }, { "epoch": 1.9068702591396223, "grad_norm": 0.0025639063263462286, "learning_rate": 6.590470500860369e-08, "loss": 0.0008, "step": 289850 }, { "epoch": 1.9069360473148556, "grad_norm": 0.0502593131493578, "learning_rate": 6.581183058232632e-08, "loss": 0.0005, "step": 289860 }, { "epoch": 1.907001835490089, "grad_norm": 0.024938021766889133, "learning_rate": 6.57190212090586e-08, "loss": 0.0005, "step": 289870 }, { "epoch": 1.9070676236653223, "grad_norm": 0.00296141675349947, "learning_rate": 6.562627689002454e-08, "loss": 0.0027, "step": 289880 }, { "epoch": 1.9071334118405558, "grad_norm": 0.03281710005416959, "learning_rate": 6.553359762644596e-08, "loss": 0.0002, "step": 289890 }, { "epoch": 1.9071992000157891, "grad_norm": 0.017277562550845917, "learning_rate": 6.544098341954519e-08, "loss": 0.0002, "step": 289900 }, { "epoch": 1.9072649881910224, "grad_norm": 0.056056949807239825, "learning_rate": 6.53484342705435e-08, "loss": 0.0005, "step": 289910 }, { "epoch": 1.907330776366256, "grad_norm": 0.13079618133970855, "learning_rate": 6.5255950180661e-08, "loss": 0.0004, "step": 289920 }, { "epoch": 1.9073965645414894, "grad_norm": 0.027936027888146326, "learning_rate": 6.516353115111673e-08, "loss": 0.0003, "step": 289930 }, { "epoch": 1.9074623527167227, "grad_norm": 0.005581323547621174, "learning_rate": 6.507117718312917e-08, "loss": 0.0003, "step": 289940 }, { "epoch": 1.907528140891956, "grad_norm": 0.010905134605854488, "learning_rate": 6.49788882779162e-08, "loss": 0.0003, "step": 289950 }, { "epoch": 1.9075939290671895, "grad_norm": 0.012959048039534186, "learning_rate": 6.488666443669467e-08, "loss": 0.0003, "step": 289960 }, { "epoch": 1.907659717242423, "grad_norm": 0.01597475751144041, "learning_rate": 6.479450566067969e-08, "loss": 0.0005, "step": 289970 }, { "epoch": 1.9077255054176563, "grad_norm": 0.01748276845237701, "learning_rate": 6.470241195108695e-08, "loss": 0.0004, "step": 289980 }, { "epoch": 1.9077912935928896, "grad_norm": 0.004831555316223553, "learning_rate": 6.461038330913105e-08, "loss": 0.0023, "step": 289990 }, { "epoch": 1.9078570817681229, "grad_norm": 0.0009143277695697888, "learning_rate": 6.451841973602435e-08, "loss": 0.0004, "step": 290000 }, { "epoch": 1.9079228699433564, "grad_norm": 0.004594549898519294, "learning_rate": 6.442652123297977e-08, "loss": 0.0006, "step": 290010 }, { "epoch": 1.9079886581185899, "grad_norm": 0.0006543215258760164, "learning_rate": 6.433468780120855e-08, "loss": 0.0002, "step": 290020 }, { "epoch": 1.9080544462938231, "grad_norm": 0.015408550745593995, "learning_rate": 6.424291944192196e-08, "loss": 0.0007, "step": 290030 }, { "epoch": 1.9081202344690564, "grad_norm": 0.001671242171184717, "learning_rate": 6.415121615632957e-08, "loss": 0.0005, "step": 290040 }, { "epoch": 1.90818602264429, "grad_norm": 0.02509910732786277, "learning_rate": 6.4059577945641e-08, "loss": 0.0002, "step": 290050 }, { "epoch": 1.9082518108195234, "grad_norm": 0.0034783246406561432, "learning_rate": 6.396800481106358e-08, "loss": 0.0003, "step": 290060 }, { "epoch": 1.9083175989947567, "grad_norm": 0.026787727959570694, "learning_rate": 6.38764967538047e-08, "loss": 0.0004, "step": 290070 }, { "epoch": 1.90838338716999, "grad_norm": 0.00017141522019739128, "learning_rate": 6.378505377507172e-08, "loss": 0.0002, "step": 290080 }, { "epoch": 1.9084491753452233, "grad_norm": 0.0165186682932685, "learning_rate": 6.369367587606922e-08, "loss": 0.0002, "step": 290090 }, { "epoch": 1.9085149635204568, "grad_norm": 0.0002209152071961793, "learning_rate": 6.360236305800238e-08, "loss": 0.0003, "step": 290100 }, { "epoch": 1.9085807516956903, "grad_norm": 0.04218418730949985, "learning_rate": 6.351111532207521e-08, "loss": 0.0007, "step": 290110 }, { "epoch": 1.9086465398709236, "grad_norm": 0.02210048048176177, "learning_rate": 6.341993266949065e-08, "loss": 0.0006, "step": 290120 }, { "epoch": 1.9087123280461569, "grad_norm": 0.0004430701533155815, "learning_rate": 6.33288151014505e-08, "loss": 0.0001, "step": 290130 }, { "epoch": 1.9087781162213904, "grad_norm": 0.03872332929604607, "learning_rate": 6.32377626191566e-08, "loss": 0.0004, "step": 290140 }, { "epoch": 1.9088439043966239, "grad_norm": 0.003150237045671854, "learning_rate": 6.314677522380852e-08, "loss": 0.0005, "step": 290150 }, { "epoch": 1.9089096925718572, "grad_norm": 0.045845292627834676, "learning_rate": 6.305585291660643e-08, "loss": 0.0006, "step": 290160 }, { "epoch": 1.9089754807470904, "grad_norm": 0.011430848961085314, "learning_rate": 6.296499569874992e-08, "loss": 0.0005, "step": 290170 }, { "epoch": 1.909041268922324, "grad_norm": 0.01841750719549277, "learning_rate": 6.287420357143582e-08, "loss": 0.0008, "step": 290180 }, { "epoch": 1.9091070570975572, "grad_norm": 0.016313879930175683, "learning_rate": 6.278347653586148e-08, "loss": 0.0003, "step": 290190 }, { "epoch": 1.9091728452727907, "grad_norm": 0.009649082742581324, "learning_rate": 6.269281459322263e-08, "loss": 0.0001, "step": 290200 }, { "epoch": 1.909238633448024, "grad_norm": 0.00262837581267539, "learning_rate": 6.260221774471498e-08, "loss": 0.0003, "step": 290210 }, { "epoch": 1.9093044216232573, "grad_norm": 0.0013245879249470733, "learning_rate": 6.251168599153257e-08, "loss": 0.0011, "step": 290220 }, { "epoch": 1.9093702097984908, "grad_norm": 0.021296566253696532, "learning_rate": 6.242121933486945e-08, "loss": 0.0002, "step": 290230 }, { "epoch": 1.9094359979737243, "grad_norm": 0.011033794589194647, "learning_rate": 6.233081777591854e-08, "loss": 0.0007, "step": 290240 }, { "epoch": 1.9095017861489576, "grad_norm": 0.04662602627462018, "learning_rate": 6.224048131587112e-08, "loss": 0.0007, "step": 290250 }, { "epoch": 1.9095675743241909, "grad_norm": 0.00281578078567427, "learning_rate": 6.215020995591792e-08, "loss": 0.0003, "step": 290260 }, { "epoch": 1.9096333624994244, "grad_norm": 0.028498554628260814, "learning_rate": 6.206000369725018e-08, "loss": 0.0002, "step": 290270 }, { "epoch": 1.909699150674658, "grad_norm": 7.800009046555426e-05, "learning_rate": 6.196986254105641e-08, "loss": 0.0004, "step": 290280 }, { "epoch": 1.9097649388498912, "grad_norm": 0.07365742971015994, "learning_rate": 6.18797864885251e-08, "loss": 0.0005, "step": 290290 }, { "epoch": 1.9098307270251245, "grad_norm": 0.028908939929095278, "learning_rate": 6.17897755408442e-08, "loss": 0.0005, "step": 290300 }, { "epoch": 1.9098965152003577, "grad_norm": 0.01280715177065345, "learning_rate": 6.169982969919996e-08, "loss": 0.0011, "step": 290310 }, { "epoch": 1.9099623033755913, "grad_norm": 0.05057875824958993, "learning_rate": 6.160994896477867e-08, "loss": 0.0005, "step": 290320 }, { "epoch": 1.9100280915508248, "grad_norm": 0.004375239096083213, "learning_rate": 6.15201333387655e-08, "loss": 0.0005, "step": 290330 }, { "epoch": 1.910093879726058, "grad_norm": 0.003424127863556828, "learning_rate": 6.143038282234392e-08, "loss": 0.0011, "step": 290340 }, { "epoch": 1.9101596679012913, "grad_norm": 0.01925487296025083, "learning_rate": 6.134069741669745e-08, "loss": 0.0003, "step": 290350 }, { "epoch": 1.9102254560765248, "grad_norm": 0.007798023901768887, "learning_rate": 6.125107712300848e-08, "loss": 0.0005, "step": 290360 }, { "epoch": 1.9102912442517583, "grad_norm": 0.03786098952609987, "learning_rate": 6.116152194245883e-08, "loss": 0.0008, "step": 290370 }, { "epoch": 1.9103570324269916, "grad_norm": 0.001298657708120463, "learning_rate": 6.107203187622868e-08, "loss": 0.0008, "step": 290380 }, { "epoch": 1.910422820602225, "grad_norm": 0.020850183154790074, "learning_rate": 6.098260692549817e-08, "loss": 0.0006, "step": 290390 }, { "epoch": 1.9104886087774582, "grad_norm": 0.00552084470512178, "learning_rate": 6.089324709144694e-08, "loss": 0.0003, "step": 290400 }, { "epoch": 1.9105543969526917, "grad_norm": 0.022506195849358614, "learning_rate": 6.080395237525238e-08, "loss": 0.0004, "step": 290410 }, { "epoch": 1.9106201851279252, "grad_norm": 0.009923097098653506, "learning_rate": 6.071472277809187e-08, "loss": 0.0004, "step": 290420 }, { "epoch": 1.9106859733031585, "grad_norm": 0.0031744719817066664, "learning_rate": 6.062555830114225e-08, "loss": 0.0004, "step": 290430 }, { "epoch": 1.9107517614783918, "grad_norm": 0.010599888089250276, "learning_rate": 6.053645894557814e-08, "loss": 0.0002, "step": 290440 }, { "epoch": 1.9108175496536253, "grad_norm": 0.03416143230999116, "learning_rate": 6.044742471257526e-08, "loss": 0.0003, "step": 290450 }, { "epoch": 1.9108833378288588, "grad_norm": 0.0432803932428949, "learning_rate": 6.03584556033071e-08, "loss": 0.0007, "step": 290460 }, { "epoch": 1.910949126004092, "grad_norm": 0.05136474093828463, "learning_rate": 6.02695516189461e-08, "loss": 0.0003, "step": 290470 }, { "epoch": 1.9110149141793253, "grad_norm": 0.02099529409490077, "learning_rate": 6.018071276066517e-08, "loss": 0.0002, "step": 290480 }, { "epoch": 1.9110807023545588, "grad_norm": 0.020827044510539475, "learning_rate": 6.009193902963506e-08, "loss": 0.0006, "step": 290490 }, { "epoch": 1.9111464905297921, "grad_norm": 0.031160465788422027, "learning_rate": 6.00032304270265e-08, "loss": 0.0003, "step": 290500 }, { "epoch": 1.9112122787050256, "grad_norm": 0.011031039092683709, "learning_rate": 5.991458695400853e-08, "loss": 0.0002, "step": 290510 }, { "epoch": 1.911278066880259, "grad_norm": 0.01833692108426007, "learning_rate": 5.982600861175025e-08, "loss": 0.0004, "step": 290520 }, { "epoch": 1.9113438550554922, "grad_norm": 0.016104211855730206, "learning_rate": 5.97374954014196e-08, "loss": 0.0004, "step": 290530 }, { "epoch": 1.9114096432307257, "grad_norm": 0.0045628504253752335, "learning_rate": 5.964904732418342e-08, "loss": 0.0002, "step": 290540 }, { "epoch": 1.9114754314059592, "grad_norm": 0.05312064739074385, "learning_rate": 5.9560664381208e-08, "loss": 0.0008, "step": 290550 }, { "epoch": 1.9115412195811925, "grad_norm": 0.012428934042213522, "learning_rate": 5.947234657365797e-08, "loss": 0.0006, "step": 290560 }, { "epoch": 1.9116070077564258, "grad_norm": 0.024192895905958293, "learning_rate": 5.93840939026985e-08, "loss": 0.0013, "step": 290570 }, { "epoch": 1.9116727959316593, "grad_norm": 0.0055247442430092705, "learning_rate": 5.929590636949256e-08, "loss": 0.0005, "step": 290580 }, { "epoch": 1.9117385841068928, "grad_norm": 0.03479467159256711, "learning_rate": 5.9207783975203104e-08, "loss": 0.0002, "step": 290590 }, { "epoch": 1.911804372282126, "grad_norm": 0.046557962232209385, "learning_rate": 5.911972672099198e-08, "loss": 0.0003, "step": 290600 }, { "epoch": 1.9118701604573594, "grad_norm": 0.006233059871535871, "learning_rate": 5.9031734608019364e-08, "loss": 0.0005, "step": 290610 }, { "epoch": 1.9119359486325926, "grad_norm": 0.007416669845838622, "learning_rate": 5.894380763744656e-08, "loss": 0.0037, "step": 290620 }, { "epoch": 1.9120017368078261, "grad_norm": 0.030481139141559373, "learning_rate": 5.8855945810432634e-08, "loss": 0.0001, "step": 290630 }, { "epoch": 1.9120675249830597, "grad_norm": 0.013638734492209063, "learning_rate": 5.8768149128135e-08, "loss": 0.0003, "step": 290640 }, { "epoch": 1.912133313158293, "grad_norm": 0.02458074335801514, "learning_rate": 5.868041759171161e-08, "loss": 0.0002, "step": 290650 }, { "epoch": 1.9121991013335262, "grad_norm": 0.012385667439138074, "learning_rate": 5.8592751202319885e-08, "loss": 0.0002, "step": 290660 }, { "epoch": 1.9122648895087597, "grad_norm": 0.02533400015003539, "learning_rate": 5.8505149961115e-08, "loss": 0.0003, "step": 290670 }, { "epoch": 1.9123306776839932, "grad_norm": 0.00014799742731560263, "learning_rate": 5.84176138692516e-08, "loss": 0.0005, "step": 290680 }, { "epoch": 1.9123964658592265, "grad_norm": 0.013289822379087771, "learning_rate": 5.8330142927884306e-08, "loss": 0.0008, "step": 290690 }, { "epoch": 1.9124622540344598, "grad_norm": 0.014275468468009287, "learning_rate": 5.824273713816664e-08, "loss": 0.0002, "step": 290700 }, { "epoch": 1.912528042209693, "grad_norm": 0.039261455744142894, "learning_rate": 5.8155396501249906e-08, "loss": 0.0005, "step": 290710 }, { "epoch": 1.9125938303849266, "grad_norm": 0.001559061599478216, "learning_rate": 5.806812101828652e-08, "loss": 0.0003, "step": 290720 }, { "epoch": 1.91265961856016, "grad_norm": 0.0051686110414753995, "learning_rate": 5.798091069042666e-08, "loss": 0.0004, "step": 290730 }, { "epoch": 1.9127254067353934, "grad_norm": 0.03663826024557844, "learning_rate": 5.7893765518820535e-08, "loss": 0.0004, "step": 290740 }, { "epoch": 1.9127911949106267, "grad_norm": 0.012217439682133516, "learning_rate": 5.7806685504616654e-08, "loss": 0.0005, "step": 290750 }, { "epoch": 1.9128569830858602, "grad_norm": 0.05610829443945414, "learning_rate": 5.7719670648963e-08, "loss": 0.0002, "step": 290760 }, { "epoch": 1.9129227712610937, "grad_norm": 0.021926118177711125, "learning_rate": 5.7632720953007536e-08, "loss": 0.0004, "step": 290770 }, { "epoch": 1.912988559436327, "grad_norm": 0.005267661234436676, "learning_rate": 5.754583641789546e-08, "loss": 0.0002, "step": 290780 }, { "epoch": 1.9130543476115602, "grad_norm": 0.04077242919735773, "learning_rate": 5.745901704477308e-08, "loss": 0.0005, "step": 290790 }, { "epoch": 1.9131201357867935, "grad_norm": 0.016493889354033404, "learning_rate": 5.737226283478559e-08, "loss": 0.0004, "step": 290800 }, { "epoch": 1.913185923962027, "grad_norm": 0.016605771277753645, "learning_rate": 5.72855737890754e-08, "loss": 0.0003, "step": 290810 }, { "epoch": 1.9132517121372605, "grad_norm": 0.010094258791221956, "learning_rate": 5.719894990878661e-08, "loss": 0.0002, "step": 290820 }, { "epoch": 1.9133175003124938, "grad_norm": 0.017471248577288316, "learning_rate": 5.7112391195060515e-08, "loss": 0.0002, "step": 290830 }, { "epoch": 1.913383288487727, "grad_norm": 0.02829428084071585, "learning_rate": 5.7025897649038983e-08, "loss": 0.0004, "step": 290840 }, { "epoch": 1.9134490766629606, "grad_norm": 0.023297025288676325, "learning_rate": 5.693946927186167e-08, "loss": 0.0007, "step": 290850 }, { "epoch": 1.913514864838194, "grad_norm": 0.003115135880983171, "learning_rate": 5.6853106064668206e-08, "loss": 0.0003, "step": 290860 }, { "epoch": 1.9135806530134274, "grad_norm": 0.1029410782457115, "learning_rate": 5.676680802859713e-08, "loss": 0.0009, "step": 290870 }, { "epoch": 1.9136464411886607, "grad_norm": 0.006214516211673048, "learning_rate": 5.6680575164786974e-08, "loss": 0.0002, "step": 290880 }, { "epoch": 1.9137122293638942, "grad_norm": 0.08731766991862268, "learning_rate": 5.659440747437406e-08, "loss": 0.0006, "step": 290890 }, { "epoch": 1.9137780175391275, "grad_norm": 0.012024191631161491, "learning_rate": 5.650830495849413e-08, "loss": 0.0004, "step": 290900 }, { "epoch": 1.913843805714361, "grad_norm": 0.03920947895162252, "learning_rate": 5.6422267618282955e-08, "loss": 0.0005, "step": 290910 }, { "epoch": 1.9139095938895943, "grad_norm": 0.031222720400901596, "learning_rate": 5.633629545487407e-08, "loss": 0.0003, "step": 290920 }, { "epoch": 1.9139753820648275, "grad_norm": 0.054332280247708094, "learning_rate": 5.6250388469402117e-08, "loss": 0.0006, "step": 290930 }, { "epoch": 1.914041170240061, "grad_norm": 0.017552645555504964, "learning_rate": 5.6164546662998974e-08, "loss": 0.0001, "step": 290940 }, { "epoch": 1.9141069584152945, "grad_norm": 0.01048039832644562, "learning_rate": 5.6078770036796516e-08, "loss": 0.0003, "step": 290950 }, { "epoch": 1.9141727465905278, "grad_norm": 0.1318520059071212, "learning_rate": 5.5993058591926055e-08, "loss": 0.0008, "step": 290960 }, { "epoch": 1.9142385347657611, "grad_norm": 0.009663243328042236, "learning_rate": 5.5907412329516686e-08, "loss": 0.0003, "step": 290970 }, { "epoch": 1.9143043229409946, "grad_norm": 0.08747579538776994, "learning_rate": 5.5821831250698066e-08, "loss": 0.0013, "step": 290980 }, { "epoch": 1.9143701111162281, "grad_norm": 0.0044963593604818095, "learning_rate": 5.5736315356598735e-08, "loss": 0.0003, "step": 290990 }, { "epoch": 1.9144358992914614, "grad_norm": 0.014670682766478269, "learning_rate": 5.565086464834557e-08, "loss": 0.0007, "step": 291000 }, { "epoch": 1.9145016874666947, "grad_norm": 0.04282712595536418, "learning_rate": 5.5565479127066005e-08, "loss": 0.0007, "step": 291010 }, { "epoch": 1.914567475641928, "grad_norm": 0.005301694843637166, "learning_rate": 5.548015879388524e-08, "loss": 0.0004, "step": 291020 }, { "epoch": 1.9146332638171615, "grad_norm": 0.004039343957550962, "learning_rate": 5.539490364992794e-08, "loss": 0.0002, "step": 291030 }, { "epoch": 1.914699051992395, "grad_norm": 0.008127424935452493, "learning_rate": 5.530971369631821e-08, "loss": 0.0001, "step": 291040 }, { "epoch": 1.9147648401676283, "grad_norm": 0.001061949030500793, "learning_rate": 5.522458893417959e-08, "loss": 0.0003, "step": 291050 }, { "epoch": 1.9148306283428616, "grad_norm": 0.014562472496037532, "learning_rate": 5.5139529364634517e-08, "loss": 0.0007, "step": 291060 }, { "epoch": 1.914896416518095, "grad_norm": 0.02637069764612633, "learning_rate": 5.505453498880375e-08, "loss": 0.0004, "step": 291070 }, { "epoch": 1.9149622046933286, "grad_norm": 0.02023119144537261, "learning_rate": 5.4969605807808637e-08, "loss": 0.0006, "step": 291080 }, { "epoch": 1.9150279928685618, "grad_norm": 0.009227308963090972, "learning_rate": 5.4884741822768264e-08, "loss": 0.0009, "step": 291090 }, { "epoch": 1.9150937810437951, "grad_norm": 0.0034102976986618615, "learning_rate": 5.479994303480118e-08, "loss": 0.0003, "step": 291100 }, { "epoch": 1.9151595692190284, "grad_norm": 0.03181883494066997, "learning_rate": 5.4715209445026506e-08, "loss": 0.0009, "step": 291110 }, { "epoch": 1.915225357394262, "grad_norm": 0.029540472914601962, "learning_rate": 5.463054105456056e-08, "loss": 0.0004, "step": 291120 }, { "epoch": 1.9152911455694954, "grad_norm": 0.010410729489644862, "learning_rate": 5.454593786451967e-08, "loss": 0.0002, "step": 291130 }, { "epoch": 1.9153569337447287, "grad_norm": 0.042520047129203584, "learning_rate": 5.446139987601962e-08, "loss": 0.0011, "step": 291140 }, { "epoch": 1.915422721919962, "grad_norm": 0.022524412161295807, "learning_rate": 5.437692709017506e-08, "loss": 0.0005, "step": 291150 }, { "epoch": 1.9154885100951955, "grad_norm": 0.039787200163417635, "learning_rate": 5.4292519508099e-08, "loss": 0.0005, "step": 291160 }, { "epoch": 1.915554298270429, "grad_norm": 0.004452606587597199, "learning_rate": 5.420817713090443e-08, "loss": 0.0005, "step": 291170 }, { "epoch": 1.9156200864456623, "grad_norm": 0.04654076476208451, "learning_rate": 5.4123899959703794e-08, "loss": 0.0005, "step": 291180 }, { "epoch": 1.9156858746208956, "grad_norm": 0.029265108223060696, "learning_rate": 5.403968799560788e-08, "loss": 0.0005, "step": 291190 }, { "epoch": 1.915751662796129, "grad_norm": 0.011884702847943827, "learning_rate": 5.395554123972746e-08, "loss": 0.0003, "step": 291200 }, { "epoch": 1.9158174509713624, "grad_norm": 0.01683091246646884, "learning_rate": 5.387145969317109e-08, "loss": 0.0005, "step": 291210 }, { "epoch": 1.9158832391465959, "grad_norm": 0.021129454014369015, "learning_rate": 5.3787443357048444e-08, "loss": 0.0004, "step": 291220 }, { "epoch": 1.9159490273218291, "grad_norm": 0.010745702525858209, "learning_rate": 5.370349223246585e-08, "loss": 0.0003, "step": 291230 }, { "epoch": 1.9160148154970624, "grad_norm": 0.0007989334213071615, "learning_rate": 5.3619606320531316e-08, "loss": 0.0005, "step": 291240 }, { "epoch": 1.916080603672296, "grad_norm": 0.043912912537804626, "learning_rate": 5.3535785622350064e-08, "loss": 0.0006, "step": 291250 }, { "epoch": 1.9161463918475294, "grad_norm": 0.04376459762953635, "learning_rate": 5.3452030139027333e-08, "loss": 0.0004, "step": 291260 }, { "epoch": 1.9162121800227627, "grad_norm": 0.003567089026259387, "learning_rate": 5.3368339871667786e-08, "loss": 0.0004, "step": 291270 }, { "epoch": 1.916277968197996, "grad_norm": 0.02047881720953975, "learning_rate": 5.3284714821373876e-08, "loss": 0.0003, "step": 291280 }, { "epoch": 1.9163437563732295, "grad_norm": 0.01831191350784422, "learning_rate": 5.320115498924916e-08, "loss": 0.0009, "step": 291290 }, { "epoch": 1.916409544548463, "grad_norm": 0.02080874475290614, "learning_rate": 5.311766037639499e-08, "loss": 0.0003, "step": 291300 }, { "epoch": 1.9164753327236963, "grad_norm": 0.04530174544008773, "learning_rate": 5.303423098391158e-08, "loss": 0.0003, "step": 291310 }, { "epoch": 1.9165411208989296, "grad_norm": 0.003923713930870901, "learning_rate": 5.2950866812899735e-08, "loss": 0.0004, "step": 291320 }, { "epoch": 1.9166069090741629, "grad_norm": 0.011606465434905608, "learning_rate": 5.2867567864458015e-08, "loss": 0.0006, "step": 291330 }, { "epoch": 1.9166726972493964, "grad_norm": 0.005862485765601652, "learning_rate": 5.2784334139684976e-08, "loss": 0.0003, "step": 291340 }, { "epoch": 1.9167384854246299, "grad_norm": 0.05636604974233124, "learning_rate": 5.2701165639678084e-08, "loss": 0.0004, "step": 291350 }, { "epoch": 1.9168042735998632, "grad_norm": 0.02373890689263187, "learning_rate": 5.2618062365533126e-08, "loss": 0.0004, "step": 291360 }, { "epoch": 1.9168700617750964, "grad_norm": 0.005788844904990799, "learning_rate": 5.253502431834589e-08, "loss": 0.0002, "step": 291370 }, { "epoch": 1.91693584995033, "grad_norm": 0.04900514390927205, "learning_rate": 5.2452051499212174e-08, "loss": 0.0004, "step": 291380 }, { "epoch": 1.9170016381255635, "grad_norm": 0.02350966163634705, "learning_rate": 5.236914390922443e-08, "loss": 0.0006, "step": 291390 }, { "epoch": 1.9170674263007967, "grad_norm": 0.03326999327128141, "learning_rate": 5.228630154947678e-08, "loss": 0.0002, "step": 291400 }, { "epoch": 1.91713321447603, "grad_norm": 0.0035075573101400986, "learning_rate": 5.220352442106169e-08, "loss": 0.0004, "step": 291410 }, { "epoch": 1.9171990026512633, "grad_norm": 0.027784040948965482, "learning_rate": 5.21208125250694e-08, "loss": 0.0004, "step": 291420 }, { "epoch": 1.9172647908264968, "grad_norm": 0.012936017238739345, "learning_rate": 5.203816586259014e-08, "loss": 0.0006, "step": 291430 }, { "epoch": 1.9173305790017303, "grad_norm": 0.011550585386772838, "learning_rate": 5.195558443471527e-08, "loss": 0.0002, "step": 291440 }, { "epoch": 1.9173963671769636, "grad_norm": 0.03606520545564704, "learning_rate": 5.1873068242532796e-08, "loss": 0.0003, "step": 291450 }, { "epoch": 1.9174621553521969, "grad_norm": 0.009894927081568927, "learning_rate": 5.17906172871302e-08, "loss": 0.001, "step": 291460 }, { "epoch": 1.9175279435274304, "grad_norm": 0.01134460448410848, "learning_rate": 5.170823156959437e-08, "loss": 0.0005, "step": 291470 }, { "epoch": 1.917593731702664, "grad_norm": 0.04266468802140242, "learning_rate": 5.1625911091012224e-08, "loss": 0.0004, "step": 291480 }, { "epoch": 1.9176595198778972, "grad_norm": 0.0014243617475304365, "learning_rate": 5.154365585246901e-08, "loss": 0.0004, "step": 291490 }, { "epoch": 1.9177253080531305, "grad_norm": 0.07313625401546026, "learning_rate": 5.14614658550483e-08, "loss": 0.0005, "step": 291500 }, { "epoch": 1.917791096228364, "grad_norm": 0.0069512887447859125, "learning_rate": 5.1379341099834777e-08, "loss": 0.0002, "step": 291510 }, { "epoch": 1.9178568844035973, "grad_norm": 0.012629284800638728, "learning_rate": 5.129728158791036e-08, "loss": 0.0003, "step": 291520 }, { "epoch": 1.9179226725788308, "grad_norm": 0.006925760802254622, "learning_rate": 5.121528732035752e-08, "loss": 0.0002, "step": 291530 }, { "epoch": 1.917988460754064, "grad_norm": 0.015951803008862685, "learning_rate": 5.1133358298257606e-08, "loss": 0.0003, "step": 291540 }, { "epoch": 1.9180542489292973, "grad_norm": 0.00921015357884775, "learning_rate": 5.105149452268976e-08, "loss": 0.0024, "step": 291550 }, { "epoch": 1.9181200371045308, "grad_norm": 0.018733681167821935, "learning_rate": 5.096969599473312e-08, "loss": 0.0003, "step": 291560 }, { "epoch": 1.9181858252797643, "grad_norm": 0.03713105223652453, "learning_rate": 5.088796271546736e-08, "loss": 0.0003, "step": 291570 }, { "epoch": 1.9182516134549976, "grad_norm": 0.04506593728257634, "learning_rate": 5.080629468596943e-08, "loss": 0.0004, "step": 291580 }, { "epoch": 1.918317401630231, "grad_norm": 0.02262961687631726, "learning_rate": 5.072469190731621e-08, "loss": 0.0003, "step": 291590 }, { "epoch": 1.9183831898054644, "grad_norm": 0.009284120676764885, "learning_rate": 5.064315438058354e-08, "loss": 0.0006, "step": 291600 }, { "epoch": 1.918448977980698, "grad_norm": 0.00012392530245219945, "learning_rate": 5.056168210684609e-08, "loss": 0.0002, "step": 291610 }, { "epoch": 1.9185147661559312, "grad_norm": 0.03214061337124475, "learning_rate": 5.048027508717857e-08, "loss": 0.0009, "step": 291620 }, { "epoch": 1.9185805543311645, "grad_norm": 0.009089873450828982, "learning_rate": 5.0398933322653996e-08, "loss": 0.0009, "step": 291630 }, { "epoch": 1.9186463425063978, "grad_norm": 0.04019264324065635, "learning_rate": 5.031765681434431e-08, "loss": 0.0005, "step": 291640 }, { "epoch": 1.9187121306816313, "grad_norm": 0.0015671850484921307, "learning_rate": 5.023644556332141e-08, "loss": 0.0004, "step": 291650 }, { "epoch": 1.9187779188568648, "grad_norm": 0.02714942349154158, "learning_rate": 5.015529957065668e-08, "loss": 0.0003, "step": 291660 }, { "epoch": 1.918843707032098, "grad_norm": 0.001250435315956741, "learning_rate": 5.00742188374187e-08, "loss": 0.0003, "step": 291670 }, { "epoch": 1.9189094952073313, "grad_norm": 0.005006939293633891, "learning_rate": 4.9993203364677725e-08, "loss": 0.0002, "step": 291680 }, { "epoch": 1.9189752833825648, "grad_norm": 0.06960724758396274, "learning_rate": 4.9912253153500676e-08, "loss": 0.0003, "step": 291690 }, { "epoch": 1.9190410715577984, "grad_norm": 0.041477657879261734, "learning_rate": 4.98313682049556e-08, "loss": 0.0004, "step": 291700 }, { "epoch": 1.9191068597330316, "grad_norm": 0.005541455184180163, "learning_rate": 4.9750548520108857e-08, "loss": 0.0002, "step": 291710 }, { "epoch": 1.919172647908265, "grad_norm": 0.03329564791831786, "learning_rate": 4.9669794100025705e-08, "loss": 0.0003, "step": 291720 }, { "epoch": 1.9192384360834982, "grad_norm": 0.07086737852496763, "learning_rate": 4.958910494577085e-08, "loss": 0.0004, "step": 291730 }, { "epoch": 1.9193042242587317, "grad_norm": 0.006843454615726195, "learning_rate": 4.950848105840844e-08, "loss": 0.0004, "step": 291740 }, { "epoch": 1.9193700124339652, "grad_norm": 0.012671635686909299, "learning_rate": 4.942792243900096e-08, "loss": 0.0011, "step": 291750 }, { "epoch": 1.9194358006091985, "grad_norm": 0.04142990792196051, "learning_rate": 4.9347429088610895e-08, "loss": 0.0007, "step": 291760 }, { "epoch": 1.9195015887844318, "grad_norm": 0.0142760646221154, "learning_rate": 4.926700100829907e-08, "loss": 0.0003, "step": 291770 }, { "epoch": 1.9195673769596653, "grad_norm": 0.0383321166018874, "learning_rate": 4.918663819912628e-08, "loss": 0.0014, "step": 291780 }, { "epoch": 1.9196331651348988, "grad_norm": 0.02065987430733623, "learning_rate": 4.91063406621517e-08, "loss": 0.0004, "step": 291790 }, { "epoch": 1.919698953310132, "grad_norm": 0.004964349149171603, "learning_rate": 4.902610839843447e-08, "loss": 0.0002, "step": 291800 }, { "epoch": 1.9197647414853654, "grad_norm": 0.0024903015899146023, "learning_rate": 4.894594140903208e-08, "loss": 0.0004, "step": 291810 }, { "epoch": 1.9198305296605986, "grad_norm": 0.022743310486331925, "learning_rate": 4.8865839695000917e-08, "loss": 0.0004, "step": 291820 }, { "epoch": 1.9198963178358321, "grad_norm": 0.04799422872359487, "learning_rate": 4.87858032573979e-08, "loss": 0.0005, "step": 291830 }, { "epoch": 1.9199621060110657, "grad_norm": 0.012919306574751164, "learning_rate": 4.870583209727775e-08, "loss": 0.0003, "step": 291840 }, { "epoch": 1.920027894186299, "grad_norm": 0.008493965234377608, "learning_rate": 4.862592621569517e-08, "loss": 0.0004, "step": 291850 }, { "epoch": 1.9200936823615322, "grad_norm": 0.010769206917180594, "learning_rate": 4.854608561370377e-08, "loss": 0.0006, "step": 291860 }, { "epoch": 1.9201594705367657, "grad_norm": 0.026357290502516055, "learning_rate": 4.846631029235549e-08, "loss": 0.0004, "step": 291870 }, { "epoch": 1.9202252587119992, "grad_norm": 0.014452057546842605, "learning_rate": 4.8386600252702254e-08, "loss": 0.0003, "step": 291880 }, { "epoch": 1.9202910468872325, "grad_norm": 0.015801762143620217, "learning_rate": 4.830695549579545e-08, "loss": 0.0008, "step": 291890 }, { "epoch": 1.9203568350624658, "grad_norm": 0.0016888474443743392, "learning_rate": 4.822737602268535e-08, "loss": 0.0005, "step": 291900 }, { "epoch": 1.9204226232376993, "grad_norm": 0.06754141161753001, "learning_rate": 4.814786183442e-08, "loss": 0.0009, "step": 291910 }, { "epoch": 1.9204884114129328, "grad_norm": 0.011279261207513307, "learning_rate": 4.8068412932048555e-08, "loss": 0.0004, "step": 291920 }, { "epoch": 1.920554199588166, "grad_norm": 0.02546101584773324, "learning_rate": 4.7989029316618527e-08, "loss": 0.0002, "step": 291930 }, { "epoch": 1.9206199877633994, "grad_norm": 0.0007457270978330226, "learning_rate": 4.7909710989176296e-08, "loss": 0.0005, "step": 291940 }, { "epoch": 1.9206857759386327, "grad_norm": 0.0019315503093891681, "learning_rate": 4.7830457950767685e-08, "loss": 0.0003, "step": 291950 }, { "epoch": 1.9207515641138662, "grad_norm": 0.03479998817279754, "learning_rate": 4.775127020243686e-08, "loss": 0.0003, "step": 291960 }, { "epoch": 1.9208173522890997, "grad_norm": 0.01884715452230794, "learning_rate": 4.7672147745229105e-08, "loss": 0.0006, "step": 291970 }, { "epoch": 1.920883140464333, "grad_norm": 0.02550928571718357, "learning_rate": 4.759309058018635e-08, "loss": 0.0001, "step": 291980 }, { "epoch": 1.9209489286395662, "grad_norm": 0.001584884230894892, "learning_rate": 4.7514098708352216e-08, "loss": 0.0006, "step": 291990 }, { "epoch": 1.9210147168147997, "grad_norm": 0.05768393692418402, "learning_rate": 4.7435172130767537e-08, "loss": 0.0006, "step": 292000 }, { "epoch": 1.9210805049900332, "grad_norm": 0.006550619665484904, "learning_rate": 4.735631084847203e-08, "loss": 0.0004, "step": 292010 }, { "epoch": 1.9211462931652665, "grad_norm": 0.028791803227559427, "learning_rate": 4.727751486250709e-08, "loss": 0.0003, "step": 292020 }, { "epoch": 1.9212120813404998, "grad_norm": 0.005839470898071982, "learning_rate": 4.719878417391022e-08, "loss": 0.0002, "step": 292030 }, { "epoch": 1.921277869515733, "grad_norm": 0.04938642062573207, "learning_rate": 4.712011878371947e-08, "loss": 0.0004, "step": 292040 }, { "epoch": 1.9213436576909666, "grad_norm": 0.031637114104766176, "learning_rate": 4.704151869297291e-08, "loss": 0.001, "step": 292050 }, { "epoch": 1.9214094458662, "grad_norm": 0.01571416172233253, "learning_rate": 4.696298390270637e-08, "loss": 0.0002, "step": 292060 }, { "epoch": 1.9214752340414334, "grad_norm": 0.002407724295924911, "learning_rate": 4.6884514413954587e-08, "loss": 0.0004, "step": 292070 }, { "epoch": 1.9215410222166667, "grad_norm": 0.02030360811326981, "learning_rate": 4.6806110227753386e-08, "loss": 0.0005, "step": 292080 }, { "epoch": 1.9216068103919002, "grad_norm": 0.0010004713600750496, "learning_rate": 4.6727771345135284e-08, "loss": 0.0002, "step": 292090 }, { "epoch": 1.9216725985671337, "grad_norm": 0.015172956558967015, "learning_rate": 4.664949776713335e-08, "loss": 0.0006, "step": 292100 }, { "epoch": 1.921738386742367, "grad_norm": 0.00773240793161505, "learning_rate": 4.657128949478063e-08, "loss": 0.0004, "step": 292110 }, { "epoch": 1.9218041749176002, "grad_norm": 0.009551763469447935, "learning_rate": 4.649314652910686e-08, "loss": 0.0004, "step": 292120 }, { "epoch": 1.9218699630928335, "grad_norm": 0.01733436387721261, "learning_rate": 4.641506887114289e-08, "loss": 0.0003, "step": 292130 }, { "epoch": 1.921935751268067, "grad_norm": 0.003724385505507425, "learning_rate": 4.633705652191789e-08, "loss": 0.0006, "step": 292140 }, { "epoch": 1.9220015394433005, "grad_norm": 0.01318138520578705, "learning_rate": 4.6259109482460486e-08, "loss": 0.0006, "step": 292150 }, { "epoch": 1.9220673276185338, "grad_norm": 0.06405848604801787, "learning_rate": 4.618122775379874e-08, "loss": 0.0004, "step": 292160 }, { "epoch": 1.922133115793767, "grad_norm": 0.05375800721056077, "learning_rate": 4.610341133695906e-08, "loss": 0.0004, "step": 292170 }, { "epoch": 1.9221989039690006, "grad_norm": 0.005868886478538918, "learning_rate": 4.602566023296728e-08, "loss": 0.0003, "step": 292180 }, { "epoch": 1.9222646921442341, "grad_norm": 0.01161415425050564, "learning_rate": 4.5947974442848155e-08, "loss": 0.0004, "step": 292190 }, { "epoch": 1.9223304803194674, "grad_norm": 0.0016426425887224605, "learning_rate": 4.587035396762696e-08, "loss": 0.0006, "step": 292200 }, { "epoch": 1.9223962684947007, "grad_norm": 0.06023711520913831, "learning_rate": 4.5792798808326765e-08, "loss": 0.0003, "step": 292210 }, { "epoch": 1.9224620566699342, "grad_norm": 0.029188146146223882, "learning_rate": 4.5715308965969543e-08, "loss": 0.0004, "step": 292220 }, { "epoch": 1.9225278448451675, "grad_norm": 0.027814826816109944, "learning_rate": 4.563788444157668e-08, "loss": 0.0007, "step": 292230 }, { "epoch": 1.922593633020401, "grad_norm": 0.000578498931015424, "learning_rate": 4.556052523617016e-08, "loss": 0.0004, "step": 292240 }, { "epoch": 1.9226594211956343, "grad_norm": 0.02930073031926329, "learning_rate": 4.5483231350768594e-08, "loss": 0.0003, "step": 292250 }, { "epoch": 1.9227252093708675, "grad_norm": 0.0026646959134052316, "learning_rate": 4.540600278639229e-08, "loss": 0.0005, "step": 292260 }, { "epoch": 1.922790997546101, "grad_norm": 0.04520796698509635, "learning_rate": 4.53288395440582e-08, "loss": 0.001, "step": 292270 }, { "epoch": 1.9228567857213346, "grad_norm": 0.007341462734209098, "learning_rate": 4.5251741624784407e-08, "loss": 0.0003, "step": 292280 }, { "epoch": 1.9229225738965678, "grad_norm": 0.021303704430270815, "learning_rate": 4.517470902958731e-08, "loss": 0.0001, "step": 292290 }, { "epoch": 1.9229883620718011, "grad_norm": 7.469111789120231e-05, "learning_rate": 4.509774175948223e-08, "loss": 0.001, "step": 292300 }, { "epoch": 1.9230541502470346, "grad_norm": 0.07603232746017752, "learning_rate": 4.502083981548388e-08, "loss": 0.0005, "step": 292310 }, { "epoch": 1.9231199384222681, "grad_norm": 0.03083366606749593, "learning_rate": 4.494400319860648e-08, "loss": 0.0003, "step": 292320 }, { "epoch": 1.9231857265975014, "grad_norm": 0.01201458595604725, "learning_rate": 4.4867231909863083e-08, "loss": 0.0006, "step": 292330 }, { "epoch": 1.9232515147727347, "grad_norm": 0.003960395453036904, "learning_rate": 4.4790525950265675e-08, "loss": 0.0003, "step": 292340 }, { "epoch": 1.923317302947968, "grad_norm": 0.013451535402537898, "learning_rate": 4.471388532082566e-08, "loss": 0.0003, "step": 292350 }, { "epoch": 1.9233830911232015, "grad_norm": 0.001550553809436166, "learning_rate": 4.4637310022553335e-08, "loss": 0.0009, "step": 292360 }, { "epoch": 1.923448879298435, "grad_norm": 0.032398423931339626, "learning_rate": 4.4560800056458465e-08, "loss": 0.0011, "step": 292370 }, { "epoch": 1.9235146674736683, "grad_norm": 0.010424097610713338, "learning_rate": 4.4484355423549674e-08, "loss": 0.0003, "step": 292380 }, { "epoch": 1.9235804556489016, "grad_norm": 0.020397799054660982, "learning_rate": 4.4407976124834493e-08, "loss": 0.0004, "step": 292390 }, { "epoch": 1.923646243824135, "grad_norm": 0.003021893675385822, "learning_rate": 4.4331662161320457e-08, "loss": 0.0002, "step": 292400 }, { "epoch": 1.9237120319993686, "grad_norm": 0.020332769320044262, "learning_rate": 4.4255413534013415e-08, "loss": 0.0003, "step": 292410 }, { "epoch": 1.9237778201746019, "grad_norm": 3.8961701334784466e-05, "learning_rate": 4.4179230243918683e-08, "loss": 0.001, "step": 292420 }, { "epoch": 1.9238436083498351, "grad_norm": 0.03612215430370891, "learning_rate": 4.4103112292040454e-08, "loss": 0.001, "step": 292430 }, { "epoch": 1.9239093965250684, "grad_norm": 0.009593684191252418, "learning_rate": 4.402705967938292e-08, "loss": 0.0006, "step": 292440 }, { "epoch": 1.923975184700302, "grad_norm": 0.03068314109109423, "learning_rate": 4.395107240694807e-08, "loss": 0.0003, "step": 292450 }, { "epoch": 1.9240409728755354, "grad_norm": 0.00013707341853973958, "learning_rate": 4.387515047573787e-08, "loss": 0.0003, "step": 292460 }, { "epoch": 1.9241067610507687, "grad_norm": 0.035402719922219925, "learning_rate": 4.379929388675375e-08, "loss": 0.0003, "step": 292470 }, { "epoch": 1.924172549226002, "grad_norm": 0.016892584734343387, "learning_rate": 4.37235026409949e-08, "loss": 0.0002, "step": 292480 }, { "epoch": 1.9242383374012355, "grad_norm": 0.007218979764850789, "learning_rate": 4.364777673946163e-08, "loss": 0.0002, "step": 292490 }, { "epoch": 1.924304125576469, "grad_norm": 0.04008556707827065, "learning_rate": 4.357211618315205e-08, "loss": 0.0003, "step": 292500 }, { "epoch": 1.9243699137517023, "grad_norm": 0.015316105892061947, "learning_rate": 4.3496520973063113e-08, "loss": 0.0004, "step": 292510 }, { "epoch": 1.9244357019269356, "grad_norm": 0.029792139145458998, "learning_rate": 4.342099111019182e-08, "loss": 0.0006, "step": 292520 }, { "epoch": 1.924501490102169, "grad_norm": 0.0011704614869419154, "learning_rate": 4.3345526595534596e-08, "loss": 0.0002, "step": 292530 }, { "epoch": 1.9245672782774024, "grad_norm": 0.07549234785532592, "learning_rate": 4.3270127430085076e-08, "loss": 0.0004, "step": 292540 }, { "epoch": 1.9246330664526359, "grad_norm": 0.026573469681928242, "learning_rate": 4.319479361483803e-08, "loss": 0.0006, "step": 292550 }, { "epoch": 1.9246988546278692, "grad_norm": 0.10903528509135631, "learning_rate": 4.31195251507871e-08, "loss": 0.0005, "step": 292560 }, { "epoch": 1.9247646428031024, "grad_norm": 0.026562947043750052, "learning_rate": 4.3044322038923726e-08, "loss": 0.0004, "step": 292570 }, { "epoch": 1.924830430978336, "grad_norm": 0.0014350559664266702, "learning_rate": 4.296918428023988e-08, "loss": 0.0008, "step": 292580 }, { "epoch": 1.9248962191535695, "grad_norm": 0.00758821924478254, "learning_rate": 4.289411187572645e-08, "loss": 0.0006, "step": 292590 }, { "epoch": 1.9249620073288027, "grad_norm": 0.010129518907472566, "learning_rate": 4.281910482637264e-08, "loss": 0.0005, "step": 292600 }, { "epoch": 1.925027795504036, "grad_norm": 0.021618466461301913, "learning_rate": 4.2744163133167646e-08, "loss": 0.0006, "step": 292610 }, { "epoch": 1.9250935836792695, "grad_norm": 0.03307987425883154, "learning_rate": 4.266928679709958e-08, "loss": 0.0002, "step": 292620 }, { "epoch": 1.925159371854503, "grad_norm": 0.010755603008173435, "learning_rate": 4.259447581915543e-08, "loss": 0.0002, "step": 292630 }, { "epoch": 1.9252251600297363, "grad_norm": 0.0003003757394412816, "learning_rate": 4.2519730200322186e-08, "loss": 0.0003, "step": 292640 }, { "epoch": 1.9252909482049696, "grad_norm": 0.07953405431125794, "learning_rate": 4.244504994158405e-08, "loss": 0.0008, "step": 292650 }, { "epoch": 1.9253567363802029, "grad_norm": 0.019899278814406403, "learning_rate": 4.237043504392691e-08, "loss": 0.0005, "step": 292660 }, { "epoch": 1.9254225245554364, "grad_norm": 0.01086664189787041, "learning_rate": 4.22958855083333e-08, "loss": 0.0003, "step": 292670 }, { "epoch": 1.92548831273067, "grad_norm": 0.027338769708077634, "learning_rate": 4.222140133578689e-08, "loss": 0.0002, "step": 292680 }, { "epoch": 1.9255541009059032, "grad_norm": 0.04876398846867353, "learning_rate": 4.214698252726967e-08, "loss": 0.0009, "step": 292690 }, { "epoch": 1.9256198890811365, "grad_norm": 0.01388473212293541, "learning_rate": 4.2072629083762525e-08, "loss": 0.0002, "step": 292700 }, { "epoch": 1.92568567725637, "grad_norm": 0.0009011118373856585, "learning_rate": 4.199834100624578e-08, "loss": 0.0005, "step": 292710 }, { "epoch": 1.9257514654316035, "grad_norm": 0.02807476655669891, "learning_rate": 4.192411829569864e-08, "loss": 0.0008, "step": 292720 }, { "epoch": 1.9258172536068368, "grad_norm": 0.00020477915625445535, "learning_rate": 4.1849960953100347e-08, "loss": 0.0005, "step": 292730 }, { "epoch": 1.92588304178207, "grad_norm": 0.014777693634912088, "learning_rate": 4.177586897942787e-08, "loss": 0.0002, "step": 292740 }, { "epoch": 1.9259488299573033, "grad_norm": 0.082956818280891, "learning_rate": 4.1701842375658776e-08, "loss": 0.0005, "step": 292750 }, { "epoch": 1.9260146181325368, "grad_norm": 0.0016562406399606346, "learning_rate": 4.162788114276783e-08, "loss": 0.0007, "step": 292760 }, { "epoch": 1.9260804063077703, "grad_norm": 0.04588118603668183, "learning_rate": 4.155398528173149e-08, "loss": 0.0003, "step": 292770 }, { "epoch": 1.9261461944830036, "grad_norm": 0.06961041880215073, "learning_rate": 4.1480154793523965e-08, "loss": 0.0003, "step": 292780 }, { "epoch": 1.926211982658237, "grad_norm": 0.02917131532341076, "learning_rate": 4.140638967911726e-08, "loss": 0.0002, "step": 292790 }, { "epoch": 1.9262777708334704, "grad_norm": 0.04315501657351832, "learning_rate": 4.1332689939485026e-08, "loss": 0.0002, "step": 292800 }, { "epoch": 1.926343559008704, "grad_norm": 0.0010271790380181362, "learning_rate": 4.1259055575598725e-08, "loss": 0.0002, "step": 292810 }, { "epoch": 1.9264093471839372, "grad_norm": 0.01855704804091064, "learning_rate": 4.1185486588429246e-08, "loss": 0.0002, "step": 292820 }, { "epoch": 1.9264751353591705, "grad_norm": 0.06434191956010309, "learning_rate": 4.111198297894636e-08, "loss": 0.0007, "step": 292830 }, { "epoch": 1.9265409235344038, "grad_norm": 0.0408034150182662, "learning_rate": 4.10385447481193e-08, "loss": 0.0004, "step": 292840 }, { "epoch": 1.9266067117096373, "grad_norm": 6.645415902415909e-05, "learning_rate": 4.096517189691618e-08, "loss": 0.0003, "step": 292850 }, { "epoch": 1.9266724998848708, "grad_norm": 0.02160285065376021, "learning_rate": 4.0891864426303996e-08, "loss": 0.0003, "step": 292860 }, { "epoch": 1.926738288060104, "grad_norm": 0.009624533856974673, "learning_rate": 4.0818622337249756e-08, "loss": 0.0004, "step": 292870 }, { "epoch": 1.9268040762353373, "grad_norm": 0.025420259632381776, "learning_rate": 4.074544563071936e-08, "loss": 0.003, "step": 292880 }, { "epoch": 1.9268698644105708, "grad_norm": 0.0008732940960868861, "learning_rate": 4.067233430767703e-08, "loss": 0.0007, "step": 292890 }, { "epoch": 1.9269356525858043, "grad_norm": 0.020794967923614103, "learning_rate": 4.0599288369086445e-08, "loss": 0.0001, "step": 292900 }, { "epoch": 1.9270014407610376, "grad_norm": 0.01571407495383386, "learning_rate": 4.052630781591127e-08, "loss": 0.0004, "step": 292910 }, { "epoch": 1.927067228936271, "grad_norm": 0.020792541462633117, "learning_rate": 4.045339264911352e-08, "loss": 0.0002, "step": 292920 }, { "epoch": 1.9271330171115044, "grad_norm": 0.0019383239274262602, "learning_rate": 4.038054286965465e-08, "loss": 0.0002, "step": 292930 }, { "epoch": 1.927198805286738, "grad_norm": 0.05428183613081773, "learning_rate": 4.030775847849444e-08, "loss": 0.0008, "step": 292940 }, { "epoch": 1.9272645934619712, "grad_norm": 0.000460536145907562, "learning_rate": 4.023503947659324e-08, "loss": 0.0003, "step": 292950 }, { "epoch": 1.9273303816372045, "grad_norm": 0.02384225380975293, "learning_rate": 4.0162385864909173e-08, "loss": 0.0007, "step": 292960 }, { "epoch": 1.9273961698124378, "grad_norm": 0.026881167642504783, "learning_rate": 4.008979764440091e-08, "loss": 0.0005, "step": 292970 }, { "epoch": 1.9274619579876713, "grad_norm": 0.0796194154298555, "learning_rate": 4.0017274816024356e-08, "loss": 0.0007, "step": 292980 }, { "epoch": 1.9275277461629048, "grad_norm": 0.06298834134733275, "learning_rate": 3.994481738073708e-08, "loss": 0.0003, "step": 292990 }, { "epoch": 1.927593534338138, "grad_norm": 0.03966539620897666, "learning_rate": 3.9872425339493314e-08, "loss": 0.0004, "step": 293000 }, { "epoch": 1.9276593225133714, "grad_norm": 0.02892532425517587, "learning_rate": 3.980009869324786e-08, "loss": 0.0005, "step": 293010 }, { "epoch": 1.9277251106886049, "grad_norm": 0.01874710290996823, "learning_rate": 3.972783744295383e-08, "loss": 0.0005, "step": 293020 }, { "epoch": 1.9277908988638384, "grad_norm": 0.008451322700679985, "learning_rate": 3.965564158956436e-08, "loss": 0.0006, "step": 293030 }, { "epoch": 1.9278566870390716, "grad_norm": 0.004519360859995687, "learning_rate": 3.9583511134031475e-08, "loss": 0.0002, "step": 293040 }, { "epoch": 1.927922475214305, "grad_norm": 0.04661511632009704, "learning_rate": 3.951144607730606e-08, "loss": 0.0005, "step": 293050 }, { "epoch": 1.9279882633895382, "grad_norm": 0.05358670501203636, "learning_rate": 3.9439446420337926e-08, "loss": 0.0006, "step": 293060 }, { "epoch": 1.9280540515647717, "grad_norm": 0.025381242833475297, "learning_rate": 3.9367512164076306e-08, "loss": 0.0008, "step": 293070 }, { "epoch": 1.9281198397400052, "grad_norm": 0.005173117386439336, "learning_rate": 3.929564330946989e-08, "loss": 0.0003, "step": 293080 }, { "epoch": 1.9281856279152385, "grad_norm": 0.008815629585774755, "learning_rate": 3.922383985746569e-08, "loss": 0.0004, "step": 293090 }, { "epoch": 1.9282514160904718, "grad_norm": 0.04128202644074402, "learning_rate": 3.9152101809010745e-08, "loss": 0.0008, "step": 293100 }, { "epoch": 1.9283172042657053, "grad_norm": 0.012728748426203881, "learning_rate": 3.90804291650515e-08, "loss": 0.0003, "step": 293110 }, { "epoch": 1.9283829924409388, "grad_norm": 0.0011816378668055886, "learning_rate": 3.900882192653221e-08, "loss": 0.0004, "step": 293120 }, { "epoch": 1.928448780616172, "grad_norm": 0.0655123244522576, "learning_rate": 3.893728009439657e-08, "loss": 0.0006, "step": 293130 }, { "epoch": 1.9285145687914054, "grad_norm": 0.03072179070417547, "learning_rate": 3.886580366958825e-08, "loss": 0.0004, "step": 293140 }, { "epoch": 1.9285803569666387, "grad_norm": 0.01584331037916197, "learning_rate": 3.879439265304985e-08, "loss": 0.0003, "step": 293150 }, { "epoch": 1.9286461451418722, "grad_norm": 0.02829126299037175, "learning_rate": 3.8723047045722826e-08, "loss": 0.0006, "step": 293160 }, { "epoch": 1.9287119333171057, "grad_norm": 0.06997844168495095, "learning_rate": 3.865176684854699e-08, "loss": 0.0004, "step": 293170 }, { "epoch": 1.928777721492339, "grad_norm": 0.0306737674187581, "learning_rate": 3.858055206246325e-08, "loss": 0.0002, "step": 293180 }, { "epoch": 1.9288435096675722, "grad_norm": 0.004456225253156699, "learning_rate": 3.850940268840975e-08, "loss": 0.0004, "step": 293190 }, { "epoch": 1.9289092978428057, "grad_norm": 0.007570031043976862, "learning_rate": 3.843831872732462e-08, "loss": 0.0002, "step": 293200 }, { "epoch": 1.9289750860180392, "grad_norm": 0.0006738406678105065, "learning_rate": 3.836730018014545e-08, "loss": 0.0002, "step": 293210 }, { "epoch": 1.9290408741932725, "grad_norm": 0.02905073430114603, "learning_rate": 3.8296347047808157e-08, "loss": 0.0006, "step": 293220 }, { "epoch": 1.9291066623685058, "grad_norm": 0.14610041126649323, "learning_rate": 3.822545933124755e-08, "loss": 0.0009, "step": 293230 }, { "epoch": 1.9291724505437393, "grad_norm": 0.21327761088765956, "learning_rate": 3.81546370314001e-08, "loss": 0.0028, "step": 293240 }, { "epoch": 1.9292382387189726, "grad_norm": 0.01659162406582305, "learning_rate": 3.808388014919784e-08, "loss": 0.0006, "step": 293250 }, { "epoch": 1.929304026894206, "grad_norm": 0.015215796778994215, "learning_rate": 3.801318868557446e-08, "loss": 0.0006, "step": 293260 }, { "epoch": 1.9293698150694394, "grad_norm": 0.030683912076261803, "learning_rate": 3.794256264146201e-08, "loss": 0.0002, "step": 293270 }, { "epoch": 1.9294356032446727, "grad_norm": 0.011397784681502928, "learning_rate": 3.7872002017790846e-08, "loss": 0.0003, "step": 293280 }, { "epoch": 1.9295013914199062, "grad_norm": 0.05564583946544293, "learning_rate": 3.780150681549189e-08, "loss": 0.0003, "step": 293290 }, { "epoch": 1.9295671795951397, "grad_norm": 0.0028547569986246946, "learning_rate": 3.7731077035494946e-08, "loss": 0.0004, "step": 293300 }, { "epoch": 1.929632967770373, "grad_norm": 0.0003301571389457788, "learning_rate": 3.7660712678727616e-08, "loss": 0.0004, "step": 293310 }, { "epoch": 1.9296987559456062, "grad_norm": 0.002066375105424542, "learning_rate": 3.759041374611805e-08, "loss": 0.0008, "step": 293320 }, { "epoch": 1.9297645441208398, "grad_norm": 0.010730148575956123, "learning_rate": 3.7520180238592716e-08, "loss": 0.0005, "step": 293330 }, { "epoch": 1.9298303322960733, "grad_norm": 0.0024425962185344423, "learning_rate": 3.7450012157078665e-08, "loss": 0.0004, "step": 293340 }, { "epoch": 1.9298961204713065, "grad_norm": 0.01857687069486596, "learning_rate": 3.737990950249959e-08, "loss": 0.0003, "step": 293350 }, { "epoch": 1.9299619086465398, "grad_norm": 0.02644048997802842, "learning_rate": 3.730987227578031e-08, "loss": 0.0002, "step": 293360 }, { "epoch": 1.930027696821773, "grad_norm": 0.02504043265779306, "learning_rate": 3.723990047784509e-08, "loss": 0.0003, "step": 293370 }, { "epoch": 1.9300934849970066, "grad_norm": 0.010624342933103775, "learning_rate": 3.716999410961486e-08, "loss": 0.0008, "step": 293380 }, { "epoch": 1.9301592731722401, "grad_norm": 0.012268646809147946, "learning_rate": 3.7100153172012765e-08, "loss": 0.0002, "step": 293390 }, { "epoch": 1.9302250613474734, "grad_norm": 0.02243611323812868, "learning_rate": 3.703037766595863e-08, "loss": 0.0002, "step": 293400 }, { "epoch": 1.9302908495227067, "grad_norm": 6.969992539129059e-05, "learning_rate": 3.6960667592372266e-08, "loss": 0.0001, "step": 293410 }, { "epoch": 1.9303566376979402, "grad_norm": 0.021053413282465856, "learning_rate": 3.68910229521735e-08, "loss": 0.0004, "step": 293420 }, { "epoch": 1.9304224258731737, "grad_norm": 0.004055297359192791, "learning_rate": 3.68214437462805e-08, "loss": 0.0005, "step": 293430 }, { "epoch": 1.930488214048407, "grad_norm": 0.03655675126659833, "learning_rate": 3.675192997560972e-08, "loss": 0.0005, "step": 293440 }, { "epoch": 1.9305540022236403, "grad_norm": 0.03618405308374537, "learning_rate": 3.6682481641078794e-08, "loss": 0.0005, "step": 293450 }, { "epoch": 1.9306197903988735, "grad_norm": 0.017229320370820887, "learning_rate": 3.6613098743602525e-08, "loss": 0.0002, "step": 293460 }, { "epoch": 1.930685578574107, "grad_norm": 0.050001665620377646, "learning_rate": 3.654378128409575e-08, "loss": 0.0005, "step": 293470 }, { "epoch": 1.9307513667493406, "grad_norm": 0.00029166661187855146, "learning_rate": 3.647452926347217e-08, "loss": 0.0007, "step": 293480 }, { "epoch": 1.9308171549245738, "grad_norm": 0.017661050252700507, "learning_rate": 3.640534268264551e-08, "loss": 0.0005, "step": 293490 }, { "epoch": 1.9308829430998071, "grad_norm": 0.016022608068303443, "learning_rate": 3.633622154252725e-08, "loss": 0.0015, "step": 293500 }, { "epoch": 1.9309487312750406, "grad_norm": 0.029795788555569563, "learning_rate": 3.626716584402945e-08, "loss": 0.0006, "step": 293510 }, { "epoch": 1.9310145194502741, "grad_norm": 0.02495762186735116, "learning_rate": 3.619817558806193e-08, "loss": 0.0004, "step": 293520 }, { "epoch": 1.9310803076255074, "grad_norm": 0.04841833382150142, "learning_rate": 3.612925077553453e-08, "loss": 0.0004, "step": 293530 }, { "epoch": 1.9311460958007407, "grad_norm": 0.061932493252341965, "learning_rate": 3.6060391407355954e-08, "loss": 0.0005, "step": 293540 }, { "epoch": 1.9312118839759742, "grad_norm": 0.17751916237082518, "learning_rate": 3.5991597484433815e-08, "loss": 0.0007, "step": 293550 }, { "epoch": 1.9312776721512075, "grad_norm": 0.00044400046640425176, "learning_rate": 3.592286900767516e-08, "loss": 0.0002, "step": 293560 }, { "epoch": 1.931343460326441, "grad_norm": 0.016982033029773178, "learning_rate": 3.58542059779865e-08, "loss": 0.0002, "step": 293570 }, { "epoch": 1.9314092485016743, "grad_norm": 0.01864839601275447, "learning_rate": 3.578560839627265e-08, "loss": 0.0004, "step": 293580 }, { "epoch": 1.9314750366769076, "grad_norm": 0.022596385150506753, "learning_rate": 3.5717076263438456e-08, "loss": 0.0004, "step": 293590 }, { "epoch": 1.931540824852141, "grad_norm": 0.032488436487266334, "learning_rate": 3.564860958038707e-08, "loss": 0.0006, "step": 293600 }, { "epoch": 1.9316066130273746, "grad_norm": 0.003262716363361402, "learning_rate": 3.5580208348021114e-08, "loss": 0.0003, "step": 293610 }, { "epoch": 1.9316724012026079, "grad_norm": 0.009730782920061787, "learning_rate": 3.551187256724209e-08, "loss": 0.0002, "step": 293620 }, { "epoch": 1.9317381893778411, "grad_norm": 0.03744354653360027, "learning_rate": 3.5443602238952046e-08, "loss": 0.0003, "step": 293630 }, { "epoch": 1.9318039775530746, "grad_norm": 0.001049261707686105, "learning_rate": 3.537539736405027e-08, "loss": 0.0002, "step": 293640 }, { "epoch": 1.9318697657283082, "grad_norm": 0.008848934456102433, "learning_rate": 3.5307257943436036e-08, "loss": 0.0003, "step": 293650 }, { "epoch": 1.9319355539035414, "grad_norm": 0.01768865441204163, "learning_rate": 3.5239183978008074e-08, "loss": 0.0004, "step": 293660 }, { "epoch": 1.9320013420787747, "grad_norm": 0.002396892306981551, "learning_rate": 3.5171175468663446e-08, "loss": 0.0003, "step": 293670 }, { "epoch": 1.932067130254008, "grad_norm": 0.02180694372624379, "learning_rate": 3.5103232416299205e-08, "loss": 0.0003, "step": 293680 }, { "epoch": 1.9321329184292415, "grad_norm": 0.024943278630920676, "learning_rate": 3.503535482181075e-08, "loss": 0.0002, "step": 293690 }, { "epoch": 1.932198706604475, "grad_norm": 0.07240489545329462, "learning_rate": 3.496754268609293e-08, "loss": 0.0006, "step": 293700 }, { "epoch": 1.9322644947797083, "grad_norm": 0.09849542570235849, "learning_rate": 3.489979601004001e-08, "loss": 0.0005, "step": 293710 }, { "epoch": 1.9323302829549416, "grad_norm": 0.053444302390874866, "learning_rate": 3.483211479454518e-08, "loss": 0.0009, "step": 293720 }, { "epoch": 1.932396071130175, "grad_norm": 0.015076393505018494, "learning_rate": 3.4764499040501055e-08, "loss": 0.0003, "step": 293730 }, { "epoch": 1.9324618593054086, "grad_norm": 0.0020186550441016237, "learning_rate": 3.469694874879803e-08, "loss": 0.0004, "step": 293740 }, { "epoch": 1.9325276474806419, "grad_norm": 0.006914874494314827, "learning_rate": 3.462946392032762e-08, "loss": 0.0008, "step": 293750 }, { "epoch": 1.9325934356558752, "grad_norm": 0.009326901213503084, "learning_rate": 3.456204455597967e-08, "loss": 0.0006, "step": 293760 }, { "epoch": 1.9326592238311084, "grad_norm": 0.02231801280987474, "learning_rate": 3.44946906566429e-08, "loss": 0.0003, "step": 293770 }, { "epoch": 1.932725012006342, "grad_norm": 0.01621732841245892, "learning_rate": 3.442740222320495e-08, "loss": 0.0003, "step": 293780 }, { "epoch": 1.9327908001815755, "grad_norm": 0.04298366754301223, "learning_rate": 3.4360179256552885e-08, "loss": 0.0005, "step": 293790 }, { "epoch": 1.9328565883568087, "grad_norm": 0.046167806735954164, "learning_rate": 3.4293021757573766e-08, "loss": 0.0006, "step": 293800 }, { "epoch": 1.932922376532042, "grad_norm": 0.00012551252679257691, "learning_rate": 3.422592972715188e-08, "loss": 0.0008, "step": 293810 }, { "epoch": 1.9329881647072755, "grad_norm": 0.007651076945855582, "learning_rate": 3.415890316617265e-08, "loss": 0.0004, "step": 293820 }, { "epoch": 1.933053952882509, "grad_norm": 0.019375464566981162, "learning_rate": 3.40919420755198e-08, "loss": 0.0005, "step": 293830 }, { "epoch": 1.9331197410577423, "grad_norm": 0.011652359154104796, "learning_rate": 3.4025046456075404e-08, "loss": 0.0002, "step": 293840 }, { "epoch": 1.9331855292329756, "grad_norm": 0.0007377548260235317, "learning_rate": 3.395821630872209e-08, "loss": 0.0003, "step": 293850 }, { "epoch": 1.933251317408209, "grad_norm": 0.03390450652742218, "learning_rate": 3.389145163434082e-08, "loss": 0.0006, "step": 293860 }, { "epoch": 1.9333171055834424, "grad_norm": 0.017904092141926106, "learning_rate": 3.3824752433811445e-08, "loss": 0.0015, "step": 293870 }, { "epoch": 1.933382893758676, "grad_norm": 0.050399284981181645, "learning_rate": 3.375811870801382e-08, "loss": 0.0003, "step": 293880 }, { "epoch": 1.9334486819339092, "grad_norm": 0.05352852957041959, "learning_rate": 3.369155045782613e-08, "loss": 0.0002, "step": 293890 }, { "epoch": 1.9335144701091425, "grad_norm": 0.02687951155196943, "learning_rate": 3.362504768412656e-08, "loss": 0.0003, "step": 293900 }, { "epoch": 1.933580258284376, "grad_norm": 0.04459203644000556, "learning_rate": 3.355861038779107e-08, "loss": 0.0008, "step": 293910 }, { "epoch": 1.9336460464596095, "grad_norm": 0.00039561462222919083, "learning_rate": 3.349223856969619e-08, "loss": 0.0004, "step": 293920 }, { "epoch": 1.9337118346348428, "grad_norm": 0.046757145257123094, "learning_rate": 3.3425932230716776e-08, "loss": 0.0003, "step": 293930 }, { "epoch": 1.933777622810076, "grad_norm": 0.03944946151593869, "learning_rate": 3.335969137172712e-08, "loss": 0.0008, "step": 293940 }, { "epoch": 1.9338434109853095, "grad_norm": 0.035228757358550494, "learning_rate": 3.329351599360042e-08, "loss": 0.0003, "step": 293950 }, { "epoch": 1.933909199160543, "grad_norm": 0.00645792136068128, "learning_rate": 3.3227406097209313e-08, "loss": 0.0004, "step": 293960 }, { "epoch": 1.9339749873357763, "grad_norm": 0.018716281879216517, "learning_rate": 3.316136168342476e-08, "loss": 0.0002, "step": 293970 }, { "epoch": 1.9340407755110096, "grad_norm": 0.0004665383617757379, "learning_rate": 3.30953827531183e-08, "loss": 0.0004, "step": 293980 }, { "epoch": 1.934106563686243, "grad_norm": 0.0071794764263443, "learning_rate": 3.302946930715978e-08, "loss": 0.0007, "step": 293990 }, { "epoch": 1.9341723518614764, "grad_norm": 0.021559464508156365, "learning_rate": 3.296362134641795e-08, "loss": 0.0005, "step": 294000 }, { "epoch": 1.93423814003671, "grad_norm": 0.04907086306332793, "learning_rate": 3.2897838871760455e-08, "loss": 0.0006, "step": 294010 }, { "epoch": 1.9343039282119432, "grad_norm": 0.010822437987023401, "learning_rate": 3.283212188405549e-08, "loss": 0.0006, "step": 294020 }, { "epoch": 1.9343697163871765, "grad_norm": 0.024221829690828336, "learning_rate": 3.276647038416902e-08, "loss": 0.0002, "step": 294030 }, { "epoch": 1.93443550456241, "grad_norm": 0.001976859340931471, "learning_rate": 3.270088437296648e-08, "loss": 0.0003, "step": 294040 }, { "epoch": 1.9345012927376435, "grad_norm": 0.012302853609865832, "learning_rate": 3.263536385131272e-08, "loss": 0.0002, "step": 294050 }, { "epoch": 1.9345670809128768, "grad_norm": 0.013177617821418745, "learning_rate": 3.256990882007205e-08, "loss": 0.0004, "step": 294060 }, { "epoch": 1.93463286908811, "grad_norm": 0.03428764166212028, "learning_rate": 3.2504519280106564e-08, "loss": 0.0007, "step": 294070 }, { "epoch": 1.9346986572633433, "grad_norm": 0.04620236996456456, "learning_rate": 3.2439195232278897e-08, "loss": 0.0014, "step": 294080 }, { "epoch": 1.9347644454385768, "grad_norm": 0.007388770943566438, "learning_rate": 3.237393667745004e-08, "loss": 0.0002, "step": 294090 }, { "epoch": 1.9348302336138103, "grad_norm": 0.04031858094857446, "learning_rate": 3.230874361648095e-08, "loss": 0.0004, "step": 294100 }, { "epoch": 1.9348960217890436, "grad_norm": 0.022773639649487747, "learning_rate": 3.224361605022985e-08, "loss": 0.0002, "step": 294110 }, { "epoch": 1.934961809964277, "grad_norm": 0.03397833130665056, "learning_rate": 3.2178553979556605e-08, "loss": 0.0006, "step": 294120 }, { "epoch": 1.9350275981395104, "grad_norm": 0.009632271743169362, "learning_rate": 3.211355740531885e-08, "loss": 0.0003, "step": 294130 }, { "epoch": 1.935093386314744, "grad_norm": 0.009136119386257538, "learning_rate": 3.204862632837369e-08, "loss": 0.0004, "step": 294140 }, { "epoch": 1.9351591744899772, "grad_norm": 0.058932666638081645, "learning_rate": 3.198376074957599e-08, "loss": 0.0004, "step": 294150 }, { "epoch": 1.9352249626652105, "grad_norm": 0.05176485723001917, "learning_rate": 3.191896066978229e-08, "loss": 0.0006, "step": 294160 }, { "epoch": 1.9352907508404438, "grad_norm": 0.0002140557683055324, "learning_rate": 3.1854226089845785e-08, "loss": 0.0009, "step": 294170 }, { "epoch": 1.9353565390156773, "grad_norm": 0.03178169851169697, "learning_rate": 3.1789557010621366e-08, "loss": 0.0002, "step": 294180 }, { "epoch": 1.9354223271909108, "grad_norm": 0.010756794477365862, "learning_rate": 3.172495343296001e-08, "loss": 0.0009, "step": 294190 }, { "epoch": 1.935488115366144, "grad_norm": 0.07089928401973956, "learning_rate": 3.1660415357714915e-08, "loss": 0.0006, "step": 294200 }, { "epoch": 1.9355539035413774, "grad_norm": 0.06245952781142764, "learning_rate": 3.159594278573597e-08, "loss": 0.0006, "step": 294210 }, { "epoch": 1.9356196917166109, "grad_norm": 0.26366108052014153, "learning_rate": 3.153153571787359e-08, "loss": 0.0007, "step": 294220 }, { "epoch": 1.9356854798918444, "grad_norm": 0.014628626621913813, "learning_rate": 3.1467194154976565e-08, "loss": 0.0008, "step": 294230 }, { "epoch": 1.9357512680670776, "grad_norm": 0.0003220375512883295, "learning_rate": 3.140291809789364e-08, "loss": 0.0004, "step": 294240 }, { "epoch": 1.935817056242311, "grad_norm": 0.1149158785221576, "learning_rate": 3.1338707547471926e-08, "loss": 0.0008, "step": 294250 }, { "epoch": 1.9358828444175444, "grad_norm": 0.034098623545011426, "learning_rate": 3.1274562504558515e-08, "loss": 0.0002, "step": 294260 }, { "epoch": 1.935948632592778, "grad_norm": 0.06128778812574146, "learning_rate": 3.1210482969998845e-08, "loss": 0.0003, "step": 294270 }, { "epoch": 1.9360144207680112, "grad_norm": 0.02585518635522575, "learning_rate": 3.1146468944637245e-08, "loss": 0.0008, "step": 294280 }, { "epoch": 1.9360802089432445, "grad_norm": 0.00935105248596332, "learning_rate": 3.108252042931803e-08, "loss": 0.0005, "step": 294290 }, { "epoch": 1.9361459971184778, "grad_norm": 0.010047227705954571, "learning_rate": 3.1018637424884976e-08, "loss": 0.0003, "step": 294300 }, { "epoch": 1.9362117852937113, "grad_norm": 0.007376134772805779, "learning_rate": 3.095481993217908e-08, "loss": 0.0004, "step": 294310 }, { "epoch": 1.9362775734689448, "grad_norm": 0.03356250452801326, "learning_rate": 3.089106795204244e-08, "loss": 0.0002, "step": 294320 }, { "epoch": 1.936343361644178, "grad_norm": 0.09058724515841239, "learning_rate": 3.08273814853155e-08, "loss": 0.0003, "step": 294330 }, { "epoch": 1.9364091498194114, "grad_norm": 6.936499499806183e-05, "learning_rate": 3.076376053283814e-08, "loss": 0.0005, "step": 294340 }, { "epoch": 1.9364749379946449, "grad_norm": 0.027962360657551467, "learning_rate": 3.0700205095448575e-08, "loss": 0.0003, "step": 294350 }, { "epoch": 1.9365407261698784, "grad_norm": 0.07605863851876465, "learning_rate": 3.063671517398503e-08, "loss": 0.0003, "step": 294360 }, { "epoch": 1.9366065143451117, "grad_norm": 0.002411776783961726, "learning_rate": 3.057329076928461e-08, "loss": 0.0005, "step": 294370 }, { "epoch": 1.936672302520345, "grad_norm": 0.008095299465644802, "learning_rate": 3.050993188218332e-08, "loss": 0.0005, "step": 294380 }, { "epoch": 1.9367380906955782, "grad_norm": 0.01940408549352128, "learning_rate": 3.044663851351659e-08, "loss": 0.0003, "step": 294390 }, { "epoch": 1.9368038788708117, "grad_norm": 0.004896582545735728, "learning_rate": 3.038341066411932e-08, "loss": 0.0004, "step": 294400 }, { "epoch": 1.9368696670460452, "grad_norm": 0.010007454278242643, "learning_rate": 3.032024833482472e-08, "loss": 0.0004, "step": 294410 }, { "epoch": 1.9369354552212785, "grad_norm": 0.036765965349689685, "learning_rate": 3.025715152646547e-08, "loss": 0.0005, "step": 294420 }, { "epoch": 1.9370012433965118, "grad_norm": 0.04158353267538373, "learning_rate": 3.019412023987367e-08, "loss": 0.0004, "step": 294430 }, { "epoch": 1.9370670315717453, "grad_norm": 0.0031833713616750766, "learning_rate": 3.0131154475879776e-08, "loss": 0.0002, "step": 294440 }, { "epoch": 1.9371328197469788, "grad_norm": 0.019391027291214138, "learning_rate": 3.006825423531479e-08, "loss": 0.0005, "step": 294450 }, { "epoch": 1.937198607922212, "grad_norm": 0.04669871243528628, "learning_rate": 3.000541951900748e-08, "loss": 0.0008, "step": 294460 }, { "epoch": 1.9372643960974454, "grad_norm": 0.145176638238969, "learning_rate": 2.9942650327786093e-08, "loss": 0.0005, "step": 294470 }, { "epoch": 1.9373301842726787, "grad_norm": 0.014998996982257321, "learning_rate": 2.987994666247829e-08, "loss": 0.0006, "step": 294480 }, { "epoch": 1.9373959724479122, "grad_norm": 0.030858380492939402, "learning_rate": 2.981730852391118e-08, "loss": 0.0004, "step": 294490 }, { "epoch": 1.9374617606231457, "grad_norm": 0.018574744586082263, "learning_rate": 2.9754735912910226e-08, "loss": 0.0004, "step": 294500 }, { "epoch": 1.937527548798379, "grad_norm": 0.047924545427508194, "learning_rate": 2.9692228830300872e-08, "loss": 0.0005, "step": 294510 }, { "epoch": 1.9375933369736122, "grad_norm": 0.0007862994703710935, "learning_rate": 2.9629787276906353e-08, "loss": 0.0004, "step": 294520 }, { "epoch": 1.9376591251488458, "grad_norm": 0.018620739603550266, "learning_rate": 2.956741125355045e-08, "loss": 0.0003, "step": 294530 }, { "epoch": 1.9377249133240793, "grad_norm": 0.03817377125650672, "learning_rate": 2.9505100761055838e-08, "loss": 0.0006, "step": 294540 }, { "epoch": 1.9377907014993125, "grad_norm": 9.96989182773434e-05, "learning_rate": 2.944285580024353e-08, "loss": 0.0005, "step": 294550 }, { "epoch": 1.9378564896745458, "grad_norm": 0.032251369046912406, "learning_rate": 2.9380676371934536e-08, "loss": 0.0007, "step": 294560 }, { "epoch": 1.9379222778497793, "grad_norm": 0.01948030799217528, "learning_rate": 2.931856247694764e-08, "loss": 0.0004, "step": 294570 }, { "epoch": 1.9379880660250126, "grad_norm": 0.052850944840990885, "learning_rate": 2.9256514116103308e-08, "loss": 0.0003, "step": 294580 }, { "epoch": 1.9380538542002461, "grad_norm": 0.004003512510990404, "learning_rate": 2.9194531290218654e-08, "loss": 0.0006, "step": 294590 }, { "epoch": 1.9381196423754794, "grad_norm": 0.0036756457650835113, "learning_rate": 2.9132614000110805e-08, "loss": 0.0002, "step": 294600 }, { "epoch": 1.9381854305507127, "grad_norm": 0.003967147142906411, "learning_rate": 2.9070762246596328e-08, "loss": 0.0003, "step": 294610 }, { "epoch": 1.9382512187259462, "grad_norm": 0.0064950768116405445, "learning_rate": 2.900897603049124e-08, "loss": 0.0003, "step": 294620 }, { "epoch": 1.9383170069011797, "grad_norm": 0.03131351525429358, "learning_rate": 2.8947255352608784e-08, "loss": 0.0006, "step": 294630 }, { "epoch": 1.938382795076413, "grad_norm": 0.027047096792105114, "learning_rate": 2.8885600213763855e-08, "loss": 0.0005, "step": 294640 }, { "epoch": 1.9384485832516463, "grad_norm": 0.0044999085266795875, "learning_rate": 2.8824010614768583e-08, "loss": 0.0003, "step": 294650 }, { "epoch": 1.9385143714268798, "grad_norm": 0.02506380607128901, "learning_rate": 2.8762486556436208e-08, "loss": 0.0027, "step": 294660 }, { "epoch": 1.9385801596021133, "grad_norm": 0.007813212124272051, "learning_rate": 2.8701028039576085e-08, "loss": 0.0009, "step": 294670 }, { "epoch": 1.9386459477773466, "grad_norm": 0.0014790752156693628, "learning_rate": 2.8639635064999783e-08, "loss": 0.0007, "step": 294680 }, { "epoch": 1.9387117359525798, "grad_norm": 0.000274617576568747, "learning_rate": 2.8578307633516655e-08, "loss": 0.0004, "step": 294690 }, { "epoch": 1.9387775241278131, "grad_norm": 0.06491287852397414, "learning_rate": 2.851704574593439e-08, "loss": 0.0002, "step": 294700 }, { "epoch": 1.9388433123030466, "grad_norm": 0.022154890220845387, "learning_rate": 2.8455849403061784e-08, "loss": 0.0004, "step": 294710 }, { "epoch": 1.9389091004782801, "grad_norm": 0.011182141920552996, "learning_rate": 2.8394718605704863e-08, "loss": 0.0002, "step": 294720 }, { "epoch": 1.9389748886535134, "grad_norm": 0.02444571383109085, "learning_rate": 2.8333653354669645e-08, "loss": 0.0002, "step": 294730 }, { "epoch": 1.9390406768287467, "grad_norm": 0.06584371171810485, "learning_rate": 2.8272653650761596e-08, "loss": 0.0006, "step": 294740 }, { "epoch": 1.9391064650039802, "grad_norm": 0.001846425702804757, "learning_rate": 2.8211719494784517e-08, "loss": 0.0007, "step": 294750 }, { "epoch": 1.9391722531792137, "grad_norm": 0.06213187485955186, "learning_rate": 2.8150850887542212e-08, "loss": 0.0005, "step": 294760 }, { "epoch": 1.939238041354447, "grad_norm": 0.012356109792422408, "learning_rate": 2.809004782983682e-08, "loss": 0.0002, "step": 294770 }, { "epoch": 1.9393038295296803, "grad_norm": 0.007733787067415701, "learning_rate": 2.8029310322469915e-08, "loss": 0.0005, "step": 294780 }, { "epoch": 1.9393696177049136, "grad_norm": 0.04794672192332654, "learning_rate": 2.7968638366243085e-08, "loss": 0.0006, "step": 294790 }, { "epoch": 1.939435405880147, "grad_norm": 0.01133369412514226, "learning_rate": 2.790803196195513e-08, "loss": 0.0003, "step": 294800 }, { "epoch": 1.9395011940553806, "grad_norm": 0.035786825653953636, "learning_rate": 2.7847491110405966e-08, "loss": 0.0002, "step": 294810 }, { "epoch": 1.9395669822306139, "grad_norm": 0.001094619134454046, "learning_rate": 2.778701581239329e-08, "loss": 0.0003, "step": 294820 }, { "epoch": 1.9396327704058471, "grad_norm": 0.010940293814432293, "learning_rate": 2.7726606068714245e-08, "loss": 0.0005, "step": 294830 }, { "epoch": 1.9396985585810806, "grad_norm": 0.0082334765105453, "learning_rate": 2.766626188016597e-08, "loss": 0.0004, "step": 294840 }, { "epoch": 1.9397643467563142, "grad_norm": 0.015339855830272912, "learning_rate": 2.7605983247543377e-08, "loss": 0.0006, "step": 294850 }, { "epoch": 1.9398301349315474, "grad_norm": 0.001636534648860047, "learning_rate": 2.7545770171641397e-08, "loss": 0.0006, "step": 294860 }, { "epoch": 1.9398959231067807, "grad_norm": 0.01365462525626138, "learning_rate": 2.7485622653254385e-08, "loss": 0.0004, "step": 294870 }, { "epoch": 1.9399617112820142, "grad_norm": 0.03331914936647594, "learning_rate": 2.742554069317449e-08, "loss": 0.001, "step": 294880 }, { "epoch": 1.9400274994572475, "grad_norm": 0.05817114608431037, "learning_rate": 2.7365524292193857e-08, "loss": 0.0004, "step": 294890 }, { "epoch": 1.940093287632481, "grad_norm": 0.05924764378661268, "learning_rate": 2.7305573451104627e-08, "loss": 0.0005, "step": 294900 }, { "epoch": 1.9401590758077143, "grad_norm": 0.0027689193902483526, "learning_rate": 2.7245688170696728e-08, "loss": 0.0003, "step": 294910 }, { "epoch": 1.9402248639829476, "grad_norm": 0.0171600252681334, "learning_rate": 2.7185868451759524e-08, "loss": 0.0004, "step": 294920 }, { "epoch": 1.940290652158181, "grad_norm": 0.013788838591097525, "learning_rate": 2.7126114295082384e-08, "loss": 0.0006, "step": 294930 }, { "epoch": 1.9403564403334146, "grad_norm": 0.00035100300298106615, "learning_rate": 2.7066425701451904e-08, "loss": 0.0004, "step": 294940 }, { "epoch": 1.9404222285086479, "grad_norm": 0.03777428410877739, "learning_rate": 2.7006802671655786e-08, "loss": 0.0005, "step": 294950 }, { "epoch": 1.9404880166838812, "grad_norm": 0.08716981956586593, "learning_rate": 2.694724520648062e-08, "loss": 0.0006, "step": 294960 }, { "epoch": 1.9405538048591147, "grad_norm": 0.03157118916421319, "learning_rate": 2.688775330671023e-08, "loss": 0.0003, "step": 294970 }, { "epoch": 1.9406195930343482, "grad_norm": 0.014152687415988413, "learning_rate": 2.6828326973130092e-08, "loss": 0.0007, "step": 294980 }, { "epoch": 1.9406853812095815, "grad_norm": 5.5914586915824576e-05, "learning_rate": 2.6768966206523473e-08, "loss": 0.0003, "step": 294990 }, { "epoch": 1.9407511693848147, "grad_norm": 0.002573018121772093, "learning_rate": 2.6709671007673076e-08, "loss": 0.0007, "step": 295000 }, { "epoch": 1.940816957560048, "grad_norm": 0.022184689059517725, "learning_rate": 2.6650441377359946e-08, "loss": 0.0012, "step": 295010 }, { "epoch": 1.9408827457352815, "grad_norm": 0.0014531436824955738, "learning_rate": 2.6591277316365127e-08, "loss": 0.0004, "step": 295020 }, { "epoch": 1.940948533910515, "grad_norm": 0.04961936440105759, "learning_rate": 2.6532178825469655e-08, "loss": 0.0004, "step": 295030 }, { "epoch": 1.9410143220857483, "grad_norm": 0.06118694076547976, "learning_rate": 2.6473145905451802e-08, "loss": 0.0009, "step": 295040 }, { "epoch": 1.9410801102609816, "grad_norm": 0.02131992501731149, "learning_rate": 2.6414178557090388e-08, "loss": 0.0002, "step": 295050 }, { "epoch": 1.941145898436215, "grad_norm": 0.0012179290816544553, "learning_rate": 2.6355276781162565e-08, "loss": 0.0012, "step": 295060 }, { "epoch": 1.9412116866114486, "grad_norm": 0.0017748157165687023, "learning_rate": 2.6296440578444382e-08, "loss": 0.0003, "step": 295070 }, { "epoch": 1.941277474786682, "grad_norm": 0.0005927227692846066, "learning_rate": 2.6237669949712442e-08, "loss": 0.0003, "step": 295080 }, { "epoch": 1.9413432629619152, "grad_norm": 0.011020254919842869, "learning_rate": 2.617896489574112e-08, "loss": 0.0011, "step": 295090 }, { "epoch": 1.9414090511371485, "grad_norm": 0.008054494787103909, "learning_rate": 2.6120325417304803e-08, "loss": 0.0005, "step": 295100 }, { "epoch": 1.941474839312382, "grad_norm": 0.00245701681831614, "learning_rate": 2.6061751515176204e-08, "loss": 0.0003, "step": 295110 }, { "epoch": 1.9415406274876155, "grad_norm": 0.008123967719978972, "learning_rate": 2.6003243190126927e-08, "loss": 0.0001, "step": 295120 }, { "epoch": 1.9416064156628488, "grad_norm": 0.003549565015392754, "learning_rate": 2.5944800442929685e-08, "loss": 0.0006, "step": 295130 }, { "epoch": 1.941672203838082, "grad_norm": 0.04040819643181648, "learning_rate": 2.5886423274354422e-08, "loss": 0.0006, "step": 295140 }, { "epoch": 1.9417379920133155, "grad_norm": 0.0012850704075699144, "learning_rate": 2.582811168517052e-08, "loss": 0.0009, "step": 295150 }, { "epoch": 1.941803780188549, "grad_norm": 0.004175081636616962, "learning_rate": 2.576986567614681e-08, "loss": 0.0004, "step": 295160 }, { "epoch": 1.9418695683637823, "grad_norm": 0.10365385132398683, "learning_rate": 2.5711685248051565e-08, "loss": 0.0007, "step": 295170 }, { "epoch": 1.9419353565390156, "grad_norm": 0.10275636058117556, "learning_rate": 2.565357040165195e-08, "loss": 0.0005, "step": 295180 }, { "epoch": 1.942001144714249, "grad_norm": 0.0035868542794852466, "learning_rate": 2.5595521137713465e-08, "loss": 0.0004, "step": 295190 }, { "epoch": 1.9420669328894824, "grad_norm": 0.09369703233506343, "learning_rate": 2.553753745700216e-08, "loss": 0.0004, "step": 295200 }, { "epoch": 1.942132721064716, "grad_norm": 0.07880390476913343, "learning_rate": 2.5479619360281872e-08, "loss": 0.0011, "step": 295210 }, { "epoch": 1.9421985092399492, "grad_norm": 0.05115947875893707, "learning_rate": 2.5421766848316985e-08, "loss": 0.0004, "step": 295220 }, { "epoch": 1.9422642974151825, "grad_norm": 0.07582893965853305, "learning_rate": 2.536397992186912e-08, "loss": 0.0003, "step": 295230 }, { "epoch": 1.942330085590416, "grad_norm": 0.041594697264708054, "learning_rate": 2.530625858170155e-08, "loss": 0.0004, "step": 295240 }, { "epoch": 1.9423958737656495, "grad_norm": 0.010211801591253419, "learning_rate": 2.524860282857422e-08, "loss": 0.0003, "step": 295250 }, { "epoch": 1.9424616619408828, "grad_norm": 0.02406115600777772, "learning_rate": 2.5191012663247637e-08, "loss": 0.0007, "step": 295260 }, { "epoch": 1.942527450116116, "grad_norm": 0.025891248554092066, "learning_rate": 2.513348808648064e-08, "loss": 0.0002, "step": 295270 }, { "epoch": 1.9425932382913496, "grad_norm": 0.01304674351453934, "learning_rate": 2.5076029099032063e-08, "loss": 0.0002, "step": 295280 }, { "epoch": 1.942659026466583, "grad_norm": 0.05068745853749557, "learning_rate": 2.5018635701659634e-08, "loss": 0.0002, "step": 295290 }, { "epoch": 1.9427248146418163, "grad_norm": 0.07885851231099655, "learning_rate": 2.4961307895119967e-08, "loss": 0.0008, "step": 295300 }, { "epoch": 1.9427906028170496, "grad_norm": 0.02453252267266121, "learning_rate": 2.4904045680168575e-08, "loss": 0.0001, "step": 295310 }, { "epoch": 1.942856390992283, "grad_norm": 0.05210343359019358, "learning_rate": 2.4846849057560963e-08, "loss": 0.0002, "step": 295320 }, { "epoch": 1.9429221791675164, "grad_norm": 0.050761232255180734, "learning_rate": 2.4789718028050414e-08, "loss": 0.0007, "step": 295330 }, { "epoch": 1.94298796734275, "grad_norm": 6.121232450582096e-05, "learning_rate": 2.473265259239077e-08, "loss": 0.0002, "step": 295340 }, { "epoch": 1.9430537555179832, "grad_norm": 0.10600749283855844, "learning_rate": 2.4675652751333656e-08, "loss": 0.0003, "step": 295350 }, { "epoch": 1.9431195436932165, "grad_norm": 0.014984007193483834, "learning_rate": 2.46187185056318e-08, "loss": 0.0002, "step": 295360 }, { "epoch": 1.94318533186845, "grad_norm": 0.00855602973775171, "learning_rate": 2.4561849856034605e-08, "loss": 0.0005, "step": 295370 }, { "epoch": 1.9432511200436835, "grad_norm": 0.044161563417983676, "learning_rate": 2.4505046803292574e-08, "loss": 0.0004, "step": 295380 }, { "epoch": 1.9433169082189168, "grad_norm": 0.011366239718488432, "learning_rate": 2.4448309348154563e-08, "loss": 0.0003, "step": 295390 }, { "epoch": 1.94338269639415, "grad_norm": 0.01564829763481001, "learning_rate": 2.4391637491368304e-08, "loss": 0.0002, "step": 295400 }, { "epoch": 1.9434484845693833, "grad_norm": 0.006308962069861824, "learning_rate": 2.4335031233680418e-08, "loss": 0.0002, "step": 295410 }, { "epoch": 1.9435142727446169, "grad_norm": 0.06346059704108145, "learning_rate": 2.427849057583864e-08, "loss": 0.0004, "step": 295420 }, { "epoch": 1.9435800609198504, "grad_norm": 0.04337172559257966, "learning_rate": 2.4222015518587384e-08, "loss": 0.0008, "step": 295430 }, { "epoch": 1.9436458490950836, "grad_norm": 0.016002890783161042, "learning_rate": 2.4165606062671597e-08, "loss": 0.0002, "step": 295440 }, { "epoch": 1.943711637270317, "grad_norm": 0.008735510136114615, "learning_rate": 2.4109262208834582e-08, "loss": 0.0003, "step": 295450 }, { "epoch": 1.9437774254455504, "grad_norm": 0.017721215221878257, "learning_rate": 2.4052983957820186e-08, "loss": 0.0002, "step": 295460 }, { "epoch": 1.943843213620784, "grad_norm": 0.008835550499678217, "learning_rate": 2.399677131036893e-08, "loss": 0.0004, "step": 295470 }, { "epoch": 1.9439090017960172, "grad_norm": 0.021117919218917043, "learning_rate": 2.3940624267222988e-08, "loss": 0.0003, "step": 295480 }, { "epoch": 1.9439747899712505, "grad_norm": 0.017868106543542817, "learning_rate": 2.3884542829122338e-08, "loss": 0.0004, "step": 295490 }, { "epoch": 1.9440405781464838, "grad_norm": 0.009370412865599233, "learning_rate": 2.3828526996806377e-08, "loss": 0.0004, "step": 295500 }, { "epoch": 1.9441063663217173, "grad_norm": 0.014058429134408577, "learning_rate": 2.3772576771013968e-08, "loss": 0.0009, "step": 295510 }, { "epoch": 1.9441721544969508, "grad_norm": 0.010438253022635818, "learning_rate": 2.3716692152481734e-08, "loss": 0.0006, "step": 295520 }, { "epoch": 1.944237942672184, "grad_norm": 0.016435060250444028, "learning_rate": 2.3660873141947428e-08, "loss": 0.0005, "step": 295530 }, { "epoch": 1.9443037308474174, "grad_norm": 0.033889248600837016, "learning_rate": 2.360511974014601e-08, "loss": 0.0008, "step": 295540 }, { "epoch": 1.9443695190226509, "grad_norm": 0.04641207273971553, "learning_rate": 2.3549431947814115e-08, "loss": 0.0004, "step": 295550 }, { "epoch": 1.9444353071978844, "grad_norm": 0.0006175070508925853, "learning_rate": 2.3493809765684496e-08, "loss": 0.0008, "step": 295560 }, { "epoch": 1.9445010953731177, "grad_norm": 0.0335477261796837, "learning_rate": 2.3438253194491e-08, "loss": 0.0003, "step": 295570 }, { "epoch": 1.944566883548351, "grad_norm": 0.026721062268927186, "learning_rate": 2.3382762234966382e-08, "loss": 0.0012, "step": 295580 }, { "epoch": 1.9446326717235844, "grad_norm": 0.06488893205538633, "learning_rate": 2.3327336887841723e-08, "loss": 0.0003, "step": 295590 }, { "epoch": 1.9446984598988177, "grad_norm": 0.0014229166350322134, "learning_rate": 2.32719771538481e-08, "loss": 0.0007, "step": 295600 }, { "epoch": 1.9447642480740512, "grad_norm": 0.03618038024005627, "learning_rate": 2.321668303371549e-08, "loss": 0.0003, "step": 295610 }, { "epoch": 1.9448300362492845, "grad_norm": 0.03142553883310213, "learning_rate": 2.31614545281722e-08, "loss": 0.0012, "step": 295620 }, { "epoch": 1.9448958244245178, "grad_norm": 0.09519005933192819, "learning_rate": 2.310629163794764e-08, "loss": 0.0003, "step": 295630 }, { "epoch": 1.9449616125997513, "grad_norm": 0.00070346929060139, "learning_rate": 2.305119436376735e-08, "loss": 0.0024, "step": 295640 }, { "epoch": 1.9450274007749848, "grad_norm": 0.03174783377279191, "learning_rate": 2.299616270635907e-08, "loss": 0.0004, "step": 295650 }, { "epoch": 1.945093188950218, "grad_norm": 0.003939303509190254, "learning_rate": 2.2941196666448338e-08, "loss": 0.0001, "step": 295660 }, { "epoch": 1.9451589771254514, "grad_norm": 0.024202877739515807, "learning_rate": 2.288629624475902e-08, "loss": 0.0002, "step": 295670 }, { "epoch": 1.9452247653006849, "grad_norm": 0.06187757498243374, "learning_rate": 2.2831461442015533e-08, "loss": 0.0002, "step": 295680 }, { "epoch": 1.9452905534759184, "grad_norm": 0.036060373334801984, "learning_rate": 2.2776692258940636e-08, "loss": 0.0013, "step": 295690 }, { "epoch": 1.9453563416511517, "grad_norm": 0.004280779848000802, "learning_rate": 2.272198869625597e-08, "loss": 0.0002, "step": 295700 }, { "epoch": 1.945422129826385, "grad_norm": 0.03311509005812661, "learning_rate": 2.2667350754683737e-08, "loss": 0.0003, "step": 295710 }, { "epoch": 1.9454879180016182, "grad_norm": 0.003420290147697826, "learning_rate": 2.2612778434943915e-08, "loss": 0.0003, "step": 295720 }, { "epoch": 1.9455537061768517, "grad_norm": 0.016191697262897622, "learning_rate": 2.2558271737755377e-08, "loss": 0.0003, "step": 295730 }, { "epoch": 1.9456194943520853, "grad_norm": 0.020784411588850565, "learning_rate": 2.2503830663837544e-08, "loss": 0.0004, "step": 295740 }, { "epoch": 1.9456852825273185, "grad_norm": 0.00658134238208665, "learning_rate": 2.244945521390818e-08, "loss": 0.0004, "step": 295750 }, { "epoch": 1.9457510707025518, "grad_norm": 0.004058931625932264, "learning_rate": 2.239514538868337e-08, "loss": 0.0005, "step": 295760 }, { "epoch": 1.9458168588777853, "grad_norm": 0.026293971460320715, "learning_rate": 2.234090118887977e-08, "loss": 0.0002, "step": 295770 }, { "epoch": 1.9458826470530188, "grad_norm": 0.0006632992808325232, "learning_rate": 2.2286722615212365e-08, "loss": 0.0006, "step": 295780 }, { "epoch": 1.9459484352282521, "grad_norm": 0.0027067964803819894, "learning_rate": 2.2232609668395577e-08, "loss": 0.0004, "step": 295790 }, { "epoch": 1.9460142234034854, "grad_norm": 0.0006416040451591277, "learning_rate": 2.2178562349142176e-08, "loss": 0.001, "step": 295800 }, { "epoch": 1.9460800115787187, "grad_norm": 0.02183903343812326, "learning_rate": 2.2124580658166027e-08, "loss": 0.0003, "step": 295810 }, { "epoch": 1.9461457997539522, "grad_norm": 0.0017008141109060501, "learning_rate": 2.2070664596177683e-08, "loss": 0.0006, "step": 295820 }, { "epoch": 1.9462115879291857, "grad_norm": 0.0016670896223233173, "learning_rate": 2.201681416388879e-08, "loss": 0.0005, "step": 295830 }, { "epoch": 1.946277376104419, "grad_norm": 0.001039144050902034, "learning_rate": 2.196302936200878e-08, "loss": 0.0003, "step": 295840 }, { "epoch": 1.9463431642796523, "grad_norm": 0.01764671999377371, "learning_rate": 2.1909310191246534e-08, "loss": 0.0004, "step": 295850 }, { "epoch": 1.9464089524548858, "grad_norm": 0.06068429950741127, "learning_rate": 2.1855656652311485e-08, "loss": 0.0005, "step": 295860 }, { "epoch": 1.9464747406301193, "grad_norm": 0.016985218856636036, "learning_rate": 2.180206874590973e-08, "loss": 0.0001, "step": 295870 }, { "epoch": 1.9465405288053526, "grad_norm": 0.0013371038247002819, "learning_rate": 2.1748546472747934e-08, "loss": 0.0005, "step": 295880 }, { "epoch": 1.9466063169805858, "grad_norm": 0.042836965009505755, "learning_rate": 2.1695089833532746e-08, "loss": 0.0005, "step": 295890 }, { "epoch": 1.9466721051558193, "grad_norm": 0.0064054083284033015, "learning_rate": 2.1641698828967494e-08, "loss": 0.0001, "step": 295900 }, { "epoch": 1.9467378933310526, "grad_norm": 0.007343791554139268, "learning_rate": 2.1588373459757173e-08, "loss": 0.0001, "step": 295910 }, { "epoch": 1.9468036815062861, "grad_norm": 0.021650980315518556, "learning_rate": 2.153511372660455e-08, "loss": 0.0003, "step": 295920 }, { "epoch": 1.9468694696815194, "grad_norm": 0.002523603344738452, "learning_rate": 2.1481919630211846e-08, "loss": 0.0009, "step": 295930 }, { "epoch": 1.9469352578567527, "grad_norm": 0.013470976957565332, "learning_rate": 2.1428791171280163e-08, "loss": 0.0003, "step": 295940 }, { "epoch": 1.9470010460319862, "grad_norm": 0.016068255917141647, "learning_rate": 2.137572835051005e-08, "loss": 0.0002, "step": 295950 }, { "epoch": 1.9470668342072197, "grad_norm": 0.017897621292516375, "learning_rate": 2.1322731168601507e-08, "loss": 0.0002, "step": 295960 }, { "epoch": 1.947132622382453, "grad_norm": 0.008341939635204226, "learning_rate": 2.1269799626252863e-08, "loss": 0.0004, "step": 295970 }, { "epoch": 1.9471984105576863, "grad_norm": 0.017843571411877493, "learning_rate": 2.1216933724161338e-08, "loss": 0.0004, "step": 295980 }, { "epoch": 1.9472641987329198, "grad_norm": 0.025151686400769747, "learning_rate": 2.116413346302526e-08, "loss": 0.0002, "step": 295990 }, { "epoch": 1.9473299869081533, "grad_norm": 0.010544107005743176, "learning_rate": 2.1111398843539632e-08, "loss": 0.0003, "step": 296000 }, { "epoch": 1.9473957750833866, "grad_norm": 0.025275738599770253, "learning_rate": 2.1058729866400564e-08, "loss": 0.0006, "step": 296010 }, { "epoch": 1.9474615632586199, "grad_norm": 0.010101137814616861, "learning_rate": 2.100612653230194e-08, "loss": 0.0002, "step": 296020 }, { "epoch": 1.9475273514338531, "grad_norm": 0.013570168897458575, "learning_rate": 2.09535888419371e-08, "loss": 0.0001, "step": 296030 }, { "epoch": 1.9475931396090866, "grad_norm": 0.001576600666165362, "learning_rate": 2.0901116795999378e-08, "loss": 0.0002, "step": 296040 }, { "epoch": 1.9476589277843201, "grad_norm": 4.536648150590362e-05, "learning_rate": 2.0848710395179884e-08, "loss": 0.0003, "step": 296050 }, { "epoch": 1.9477247159595534, "grad_norm": 0.015511869138119224, "learning_rate": 2.0796369640169732e-08, "loss": 0.0004, "step": 296060 }, { "epoch": 1.9477905041347867, "grad_norm": 0.007979930028900043, "learning_rate": 2.0744094531659487e-08, "loss": 0.0002, "step": 296070 }, { "epoch": 1.9478562923100202, "grad_norm": 0.006160947823924562, "learning_rate": 2.0691885070338037e-08, "loss": 0.0002, "step": 296080 }, { "epoch": 1.9479220804852537, "grad_norm": 0.1019760787532599, "learning_rate": 2.063974125689372e-08, "loss": 0.0018, "step": 296090 }, { "epoch": 1.947987868660487, "grad_norm": 0.0114671132670251, "learning_rate": 2.0587663092013765e-08, "loss": 0.0007, "step": 296100 }, { "epoch": 1.9480536568357203, "grad_norm": 0.11734007617519869, "learning_rate": 2.0535650576384848e-08, "loss": 0.0005, "step": 296110 }, { "epoch": 1.9481194450109536, "grad_norm": 0.0076289624986454274, "learning_rate": 2.0483703710693083e-08, "loss": 0.0002, "step": 296120 }, { "epoch": 1.948185233186187, "grad_norm": 0.05990589409619791, "learning_rate": 2.0431822495622923e-08, "loss": 0.0005, "step": 296130 }, { "epoch": 1.9482510213614206, "grad_norm": 0.0025467713978998313, "learning_rate": 2.0380006931858264e-08, "loss": 0.0004, "step": 296140 }, { "epoch": 1.9483168095366539, "grad_norm": 0.028069674971125592, "learning_rate": 2.032825702008301e-08, "loss": 0.0004, "step": 296150 }, { "epoch": 1.9483825977118872, "grad_norm": 0.0007715333212984464, "learning_rate": 2.027657276097883e-08, "loss": 0.0003, "step": 296160 }, { "epoch": 1.9484483858871207, "grad_norm": 0.03545875794593163, "learning_rate": 2.022495415522685e-08, "loss": 0.0004, "step": 296170 }, { "epoch": 1.9485141740623542, "grad_norm": 0.03685221999250459, "learning_rate": 2.0173401203508748e-08, "loss": 0.0006, "step": 296180 }, { "epoch": 1.9485799622375874, "grad_norm": 0.022111837476180524, "learning_rate": 2.012191390650342e-08, "loss": 0.0004, "step": 296190 }, { "epoch": 1.9486457504128207, "grad_norm": 0.06498056108834013, "learning_rate": 2.0070492264889217e-08, "loss": 0.0006, "step": 296200 }, { "epoch": 1.9487115385880542, "grad_norm": 0.007605865056933662, "learning_rate": 2.0019136279345043e-08, "loss": 0.0003, "step": 296210 }, { "epoch": 1.9487773267632875, "grad_norm": 0.008617004158463202, "learning_rate": 1.996784595054757e-08, "loss": 0.0002, "step": 296220 }, { "epoch": 1.948843114938521, "grad_norm": 0.03236813256790419, "learning_rate": 1.991662127917293e-08, "loss": 0.0024, "step": 296230 }, { "epoch": 1.9489089031137543, "grad_norm": 0.05387655100235881, "learning_rate": 1.986546226589614e-08, "loss": 0.0007, "step": 296240 }, { "epoch": 1.9489746912889876, "grad_norm": 0.03297290067231363, "learning_rate": 1.9814368911392767e-08, "loss": 0.0003, "step": 296250 }, { "epoch": 1.949040479464221, "grad_norm": 0.05081278848714036, "learning_rate": 1.976334121633561e-08, "loss": 0.0006, "step": 296260 }, { "epoch": 1.9491062676394546, "grad_norm": 0.03974526920007431, "learning_rate": 1.9712379181397456e-08, "loss": 0.0011, "step": 296270 }, { "epoch": 1.9491720558146879, "grad_norm": 0.024114621438603902, "learning_rate": 1.9661482807249998e-08, "loss": 0.0004, "step": 296280 }, { "epoch": 1.9492378439899212, "grad_norm": 0.005643449189378179, "learning_rate": 1.961065209456492e-08, "loss": 0.0003, "step": 296290 }, { "epoch": 1.9493036321651547, "grad_norm": 0.0006074167087770218, "learning_rate": 1.955988704401168e-08, "loss": 0.0021, "step": 296300 }, { "epoch": 1.9493694203403882, "grad_norm": 0.046717955727150304, "learning_rate": 1.950918765626031e-08, "loss": 0.0004, "step": 296310 }, { "epoch": 1.9494352085156215, "grad_norm": 0.04632756213068761, "learning_rate": 1.945855393197915e-08, "loss": 0.0006, "step": 296320 }, { "epoch": 1.9495009966908547, "grad_norm": 0.0019065970041324668, "learning_rate": 1.9407985871834902e-08, "loss": 0.0003, "step": 296330 }, { "epoch": 1.949566784866088, "grad_norm": 0.034630746168371626, "learning_rate": 1.9357483476495355e-08, "loss": 0.0004, "step": 296340 }, { "epoch": 1.9496325730413215, "grad_norm": 0.0019312398727986937, "learning_rate": 1.9307046746625536e-08, "loss": 0.0006, "step": 296350 }, { "epoch": 1.949698361216555, "grad_norm": 0.01588455265154176, "learning_rate": 1.9256675682891026e-08, "loss": 0.0009, "step": 296360 }, { "epoch": 1.9497641493917883, "grad_norm": 0.05108261757822396, "learning_rate": 1.920637028595518e-08, "loss": 0.0006, "step": 296370 }, { "epoch": 1.9498299375670216, "grad_norm": 0.0008184719393390499, "learning_rate": 1.915613055648191e-08, "loss": 0.0001, "step": 296380 }, { "epoch": 1.9498957257422551, "grad_norm": 0.027387581332938945, "learning_rate": 1.9105956495132916e-08, "loss": 0.0008, "step": 296390 }, { "epoch": 1.9499615139174886, "grad_norm": 0.07949754581675635, "learning_rate": 1.905584810257044e-08, "loss": 0.0004, "step": 296400 }, { "epoch": 1.950027302092722, "grad_norm": 0.024040433991816358, "learning_rate": 1.9005805379455066e-08, "loss": 0.0004, "step": 296410 }, { "epoch": 1.9500930902679552, "grad_norm": 0.007828450225829495, "learning_rate": 1.8955828326445713e-08, "loss": 0.0012, "step": 296420 }, { "epoch": 1.9501588784431885, "grad_norm": 0.004251333744577019, "learning_rate": 1.890591694420185e-08, "loss": 0.0004, "step": 296430 }, { "epoch": 1.950224666618422, "grad_norm": 0.0005002123312013258, "learning_rate": 1.8856071233381846e-08, "loss": 0.0001, "step": 296440 }, { "epoch": 1.9502904547936555, "grad_norm": 0.03442729048084843, "learning_rate": 1.880629119464239e-08, "loss": 0.0008, "step": 296450 }, { "epoch": 1.9503562429688888, "grad_norm": 0.013826874379760315, "learning_rate": 1.8756576828640184e-08, "loss": 0.0003, "step": 296460 }, { "epoch": 1.950422031144122, "grad_norm": 0.0009012355602580352, "learning_rate": 1.8706928136030256e-08, "loss": 0.0004, "step": 296470 }, { "epoch": 1.9504878193193556, "grad_norm": 0.022065035891604797, "learning_rate": 1.8657345117467085e-08, "loss": 0.0003, "step": 296480 }, { "epoch": 1.950553607494589, "grad_norm": 0.008957433302972677, "learning_rate": 1.8607827773605148e-08, "loss": 0.0007, "step": 296490 }, { "epoch": 1.9506193956698223, "grad_norm": 0.002805432417811061, "learning_rate": 1.85583761050967e-08, "loss": 0.0008, "step": 296500 }, { "epoch": 1.9506851838450556, "grad_norm": 0.04547293253464251, "learning_rate": 1.850899011259344e-08, "loss": 0.0007, "step": 296510 }, { "epoch": 1.950750972020289, "grad_norm": 0.044103271290002445, "learning_rate": 1.845966979674707e-08, "loss": 0.001, "step": 296520 }, { "epoch": 1.9508167601955224, "grad_norm": 0.0639228267816355, "learning_rate": 1.841041515820763e-08, "loss": 0.0002, "step": 296530 }, { "epoch": 1.950882548370756, "grad_norm": 0.035877276577003306, "learning_rate": 1.8361226197624593e-08, "loss": 0.0003, "step": 296540 }, { "epoch": 1.9509483365459892, "grad_norm": 0.004626964162527637, "learning_rate": 1.8312102915646333e-08, "loss": 0.0004, "step": 296550 }, { "epoch": 1.9510141247212225, "grad_norm": 0.019966639048564428, "learning_rate": 1.8263045312920114e-08, "loss": 0.0005, "step": 296560 }, { "epoch": 1.951079912896456, "grad_norm": 0.0018412265885322844, "learning_rate": 1.821405339009319e-08, "loss": 0.0009, "step": 296570 }, { "epoch": 1.9511457010716895, "grad_norm": 0.007203970664387111, "learning_rate": 1.816512714781171e-08, "loss": 0.0003, "step": 296580 }, { "epoch": 1.9512114892469228, "grad_norm": 0.04984921230124818, "learning_rate": 1.811626658672072e-08, "loss": 0.0003, "step": 296590 }, { "epoch": 1.951277277422156, "grad_norm": 0.000948358414491882, "learning_rate": 1.806747170746359e-08, "loss": 0.0001, "step": 296600 }, { "epoch": 1.9513430655973896, "grad_norm": 0.01734401736971841, "learning_rate": 1.801874251068425e-08, "loss": 0.0004, "step": 296610 }, { "epoch": 1.9514088537726229, "grad_norm": 0.00922161810205508, "learning_rate": 1.7970078997025518e-08, "loss": 0.0015, "step": 296620 }, { "epoch": 1.9514746419478564, "grad_norm": 0.033923820928512836, "learning_rate": 1.7921481167127998e-08, "loss": 0.001, "step": 296630 }, { "epoch": 1.9515404301230896, "grad_norm": 0.01745297530675731, "learning_rate": 1.787294902163339e-08, "loss": 0.0002, "step": 296640 }, { "epoch": 1.951606218298323, "grad_norm": 0.02348713278479823, "learning_rate": 1.7824482561180635e-08, "loss": 0.0002, "step": 296650 }, { "epoch": 1.9516720064735564, "grad_norm": 0.1693622495290331, "learning_rate": 1.7776081786409773e-08, "loss": 0.0007, "step": 296660 }, { "epoch": 1.95173779464879, "grad_norm": 0.0004639241829975424, "learning_rate": 1.772774669795807e-08, "loss": 0.0004, "step": 296670 }, { "epoch": 1.9518035828240232, "grad_norm": 0.05463160858611579, "learning_rate": 1.7679477296462798e-08, "loss": 0.0004, "step": 296680 }, { "epoch": 1.9518693709992565, "grad_norm": 0.020027256439838406, "learning_rate": 1.7631273582560672e-08, "loss": 0.0004, "step": 296690 }, { "epoch": 1.95193515917449, "grad_norm": 0.0005965920814716257, "learning_rate": 1.7583135556887286e-08, "loss": 0.0005, "step": 296700 }, { "epoch": 1.9520009473497235, "grad_norm": 0.031151510295468844, "learning_rate": 1.753506322007714e-08, "loss": 0.0006, "step": 296710 }, { "epoch": 1.9520667355249568, "grad_norm": 0.0681606180976785, "learning_rate": 1.7487056572764173e-08, "loss": 0.0008, "step": 296720 }, { "epoch": 1.95213252370019, "grad_norm": 0.08340722505544829, "learning_rate": 1.7439115615580647e-08, "loss": 0.0004, "step": 296730 }, { "epoch": 1.9521983118754234, "grad_norm": 0.04190392036099258, "learning_rate": 1.7391240349159955e-08, "loss": 0.0006, "step": 296740 }, { "epoch": 1.9522641000506569, "grad_norm": 0.0004319681888870063, "learning_rate": 1.734343077413214e-08, "loss": 0.0006, "step": 296750 }, { "epoch": 1.9523298882258904, "grad_norm": 0.006340640473904336, "learning_rate": 1.7295686891127816e-08, "loss": 0.0004, "step": 296760 }, { "epoch": 1.9523956764011237, "grad_norm": 0.017157809381019493, "learning_rate": 1.7248008700776476e-08, "loss": 0.0005, "step": 296770 }, { "epoch": 1.952461464576357, "grad_norm": 0.007084235614203633, "learning_rate": 1.7200396203706503e-08, "loss": 0.0004, "step": 296780 }, { "epoch": 1.9525272527515904, "grad_norm": 0.0033059583550083876, "learning_rate": 1.715284940054629e-08, "loss": 0.0005, "step": 296790 }, { "epoch": 1.952593040926824, "grad_norm": 0.018378250585719215, "learning_rate": 1.710536829192255e-08, "loss": 0.0005, "step": 296800 }, { "epoch": 1.9526588291020572, "grad_norm": 0.052335441869021324, "learning_rate": 1.705795287846035e-08, "loss": 0.0007, "step": 296810 }, { "epoch": 1.9527246172772905, "grad_norm": 0.0384814584756431, "learning_rate": 1.7010603160785844e-08, "loss": 0.0004, "step": 296820 }, { "epoch": 1.9527904054525238, "grad_norm": 0.0029227811407424387, "learning_rate": 1.6963319139522983e-08, "loss": 0.0005, "step": 296830 }, { "epoch": 1.9528561936277573, "grad_norm": 2.8894785817632785e-05, "learning_rate": 1.6916100815295155e-08, "loss": 0.0005, "step": 296840 }, { "epoch": 1.9529219818029908, "grad_norm": 0.0072360331786868815, "learning_rate": 1.6868948188724642e-08, "loss": 0.0003, "step": 296850 }, { "epoch": 1.952987769978224, "grad_norm": 0.01803556099943498, "learning_rate": 1.6821861260433725e-08, "loss": 0.0003, "step": 296860 }, { "epoch": 1.9530535581534574, "grad_norm": 0.03165767826715522, "learning_rate": 1.6774840031042462e-08, "loss": 0.0002, "step": 296870 }, { "epoch": 1.9531193463286909, "grad_norm": 0.033138128090373024, "learning_rate": 1.672788450117091e-08, "loss": 0.0003, "step": 296880 }, { "epoch": 1.9531851345039244, "grad_norm": 0.04468358889017871, "learning_rate": 1.6680994671439133e-08, "loss": 0.0007, "step": 296890 }, { "epoch": 1.9532509226791577, "grad_norm": 0.01031605337520541, "learning_rate": 1.6634170542463858e-08, "loss": 0.0002, "step": 296900 }, { "epoch": 1.953316710854391, "grad_norm": 0.0013878573136343623, "learning_rate": 1.6587412114863478e-08, "loss": 0.0002, "step": 296910 }, { "epoch": 1.9533824990296245, "grad_norm": 0.010201273815204782, "learning_rate": 1.654071938925417e-08, "loss": 0.0004, "step": 296920 }, { "epoch": 1.9534482872048577, "grad_norm": 0.04916852997108734, "learning_rate": 1.649409236625099e-08, "loss": 0.0002, "step": 296930 }, { "epoch": 1.9535140753800913, "grad_norm": 0.008670819308843449, "learning_rate": 1.6447531046469566e-08, "loss": 0.0004, "step": 296940 }, { "epoch": 1.9535798635553245, "grad_norm": 0.03648739192096274, "learning_rate": 1.6401035430523293e-08, "loss": 0.0001, "step": 296950 }, { "epoch": 1.9536456517305578, "grad_norm": 0.021787213909537355, "learning_rate": 1.6354605519025013e-08, "loss": 0.0006, "step": 296960 }, { "epoch": 1.9537114399057913, "grad_norm": 0.022381482430529172, "learning_rate": 1.630824131258757e-08, "loss": 0.0004, "step": 296970 }, { "epoch": 1.9537772280810248, "grad_norm": 0.05844636510417566, "learning_rate": 1.6261942811821584e-08, "loss": 0.0004, "step": 296980 }, { "epoch": 1.9538430162562581, "grad_norm": 0.004142187541069898, "learning_rate": 1.6215710017337684e-08, "loss": 0.0002, "step": 296990 }, { "epoch": 1.9539088044314914, "grad_norm": 0.003795412096283999, "learning_rate": 1.6169542929745375e-08, "loss": 0.0002, "step": 297000 }, { "epoch": 1.953974592606725, "grad_norm": 0.031608519778963805, "learning_rate": 1.6123441549653063e-08, "loss": 0.0004, "step": 297010 }, { "epoch": 1.9540403807819584, "grad_norm": 0.06737106452722191, "learning_rate": 1.607740587766915e-08, "loss": 0.001, "step": 297020 }, { "epoch": 1.9541061689571917, "grad_norm": 0.009001831571915616, "learning_rate": 1.6031435914399816e-08, "loss": 0.0003, "step": 297030 }, { "epoch": 1.954171957132425, "grad_norm": 0.007093539976825708, "learning_rate": 1.598553166045236e-08, "loss": 0.0004, "step": 297040 }, { "epoch": 1.9542377453076583, "grad_norm": 0.08968620110470998, "learning_rate": 1.5939693116430178e-08, "loss": 0.0004, "step": 297050 }, { "epoch": 1.9543035334828918, "grad_norm": 0.03091130752375526, "learning_rate": 1.5893920282939455e-08, "loss": 0.0002, "step": 297060 }, { "epoch": 1.9543693216581253, "grad_norm": 0.01624853227355514, "learning_rate": 1.5848213160582494e-08, "loss": 0.0009, "step": 297070 }, { "epoch": 1.9544351098333586, "grad_norm": 0.002312115561904656, "learning_rate": 1.5802571749962136e-08, "loss": 0.0004, "step": 297080 }, { "epoch": 1.9545008980085918, "grad_norm": 0.009229319371574474, "learning_rate": 1.575699605168013e-08, "loss": 0.0004, "step": 297090 }, { "epoch": 1.9545666861838253, "grad_norm": 0.09285648929084873, "learning_rate": 1.5711486066337655e-08, "loss": 0.0005, "step": 297100 }, { "epoch": 1.9546324743590588, "grad_norm": 0.00019245472366817294, "learning_rate": 1.566604179453479e-08, "loss": 0.0002, "step": 297110 }, { "epoch": 1.9546982625342921, "grad_norm": 0.032313244811479416, "learning_rate": 1.5620663236869948e-08, "loss": 0.001, "step": 297120 }, { "epoch": 1.9547640507095254, "grad_norm": 0.02002297730943135, "learning_rate": 1.557535039394209e-08, "loss": 0.0003, "step": 297130 }, { "epoch": 1.9548298388847587, "grad_norm": 0.16296713766504464, "learning_rate": 1.5530103266348518e-08, "loss": 0.0014, "step": 297140 }, { "epoch": 1.9548956270599922, "grad_norm": 0.04695319843575278, "learning_rate": 1.548492185468542e-08, "loss": 0.0005, "step": 297150 }, { "epoch": 1.9549614152352257, "grad_norm": 0.03679169703964122, "learning_rate": 1.5439806159548988e-08, "loss": 0.0008, "step": 297160 }, { "epoch": 1.955027203410459, "grad_norm": 0.027922635242492073, "learning_rate": 1.5394756181533744e-08, "loss": 0.0002, "step": 297170 }, { "epoch": 1.9550929915856923, "grad_norm": 0.0364370457087473, "learning_rate": 1.5349771921233657e-08, "loss": 0.0003, "step": 297180 }, { "epoch": 1.9551587797609258, "grad_norm": 0.0169186124912465, "learning_rate": 1.530485337924159e-08, "loss": 0.0003, "step": 297190 }, { "epoch": 1.9552245679361593, "grad_norm": 0.006587323642917624, "learning_rate": 1.5260000556149845e-08, "loss": 0.0003, "step": 297200 }, { "epoch": 1.9552903561113926, "grad_norm": 0.02137374215256769, "learning_rate": 1.5215213452550172e-08, "loss": 0.0002, "step": 297210 }, { "epoch": 1.9553561442866259, "grad_norm": 0.008127911682996878, "learning_rate": 1.517049206903265e-08, "loss": 0.0004, "step": 297220 }, { "epoch": 1.9554219324618594, "grad_norm": 0.02481272692334689, "learning_rate": 1.5125836406187367e-08, "loss": 0.0003, "step": 297230 }, { "epoch": 1.9554877206370926, "grad_norm": 0.03133706154647739, "learning_rate": 1.5081246464602184e-08, "loss": 0.0003, "step": 297240 }, { "epoch": 1.9555535088123261, "grad_norm": 0.01524834521238416, "learning_rate": 1.503672224486552e-08, "loss": 0.0004, "step": 297250 }, { "epoch": 1.9556192969875594, "grad_norm": 0.0009685901281424313, "learning_rate": 1.4992263747564684e-08, "loss": 0.0004, "step": 297260 }, { "epoch": 1.9556850851627927, "grad_norm": 0.009290578236145835, "learning_rate": 1.4947870973285317e-08, "loss": 0.0007, "step": 297270 }, { "epoch": 1.9557508733380262, "grad_norm": 0.06254119978373528, "learning_rate": 1.490354392261306e-08, "loss": 0.0006, "step": 297280 }, { "epoch": 1.9558166615132597, "grad_norm": 0.02009356577957696, "learning_rate": 1.4859282596132451e-08, "loss": 0.0003, "step": 297290 }, { "epoch": 1.955882449688493, "grad_norm": 0.014434521108291446, "learning_rate": 1.4815086994426353e-08, "loss": 0.0003, "step": 297300 }, { "epoch": 1.9559482378637263, "grad_norm": 0.02186346595687125, "learning_rate": 1.4770957118077634e-08, "loss": 0.0004, "step": 297310 }, { "epoch": 1.9560140260389598, "grad_norm": 0.0010098658446515577, "learning_rate": 1.4726892967668604e-08, "loss": 0.0003, "step": 297320 }, { "epoch": 1.9560798142141933, "grad_norm": 0.02658159329648429, "learning_rate": 1.4682894543779913e-08, "loss": 0.0005, "step": 297330 }, { "epoch": 1.9561456023894266, "grad_norm": 0.0393082015183092, "learning_rate": 1.463896184699165e-08, "loss": 0.0003, "step": 297340 }, { "epoch": 1.9562113905646599, "grad_norm": 0.013412512969798113, "learning_rate": 1.4595094877882798e-08, "loss": 0.0002, "step": 297350 }, { "epoch": 1.9562771787398932, "grad_norm": 0.0075341001951244975, "learning_rate": 1.4551293637032338e-08, "loss": 0.0004, "step": 297360 }, { "epoch": 1.9563429669151267, "grad_norm": 0.08347932246147644, "learning_rate": 1.4507558125017584e-08, "loss": 0.0005, "step": 297370 }, { "epoch": 1.9564087550903602, "grad_norm": 0.016656207463285888, "learning_rate": 1.446388834241419e-08, "loss": 0.0004, "step": 297380 }, { "epoch": 1.9564745432655934, "grad_norm": 0.015537404127602116, "learning_rate": 1.4420284289798913e-08, "loss": 0.0006, "step": 297390 }, { "epoch": 1.9565403314408267, "grad_norm": 0.009587460956720655, "learning_rate": 1.4376745967746852e-08, "loss": 0.0002, "step": 297400 }, { "epoch": 1.9566061196160602, "grad_norm": 0.015683027948250626, "learning_rate": 1.433327337683088e-08, "loss": 0.0002, "step": 297410 }, { "epoch": 1.9566719077912937, "grad_norm": 0.035820469133574374, "learning_rate": 1.4289866517624984e-08, "loss": 0.0002, "step": 297420 }, { "epoch": 1.956737695966527, "grad_norm": 0.022470802229587482, "learning_rate": 1.4246525390701482e-08, "loss": 0.0001, "step": 297430 }, { "epoch": 1.9568034841417603, "grad_norm": 0.013044784874959185, "learning_rate": 1.4203249996631029e-08, "loss": 0.0001, "step": 297440 }, { "epoch": 1.9568692723169936, "grad_norm": 0.053223399968950265, "learning_rate": 1.4160040335985393e-08, "loss": 0.0003, "step": 297450 }, { "epoch": 1.956935060492227, "grad_norm": 0.018785403561503503, "learning_rate": 1.4116896409333003e-08, "loss": 0.0002, "step": 297460 }, { "epoch": 1.9570008486674606, "grad_norm": 0.01081863972177368, "learning_rate": 1.4073818217243406e-08, "loss": 0.0004, "step": 297470 }, { "epoch": 1.9570666368426939, "grad_norm": 0.022276624631349367, "learning_rate": 1.4030805760284482e-08, "loss": 0.0007, "step": 297480 }, { "epoch": 1.9571324250179272, "grad_norm": 0.018363745943712678, "learning_rate": 1.3987859039022999e-08, "loss": 0.0013, "step": 297490 }, { "epoch": 1.9571982131931607, "grad_norm": 0.020204490504486965, "learning_rate": 1.3944978054025726e-08, "loss": 0.0003, "step": 297500 }, { "epoch": 1.9572640013683942, "grad_norm": 0.002210709723138151, "learning_rate": 1.3902162805857765e-08, "loss": 0.0005, "step": 297510 }, { "epoch": 1.9573297895436275, "grad_norm": 0.025447128028999823, "learning_rate": 1.3859413295083112e-08, "loss": 0.0003, "step": 297520 }, { "epoch": 1.9573955777188607, "grad_norm": 0.13805205767341652, "learning_rate": 1.3816729522266314e-08, "loss": 0.0012, "step": 297530 }, { "epoch": 1.957461365894094, "grad_norm": 0.01196538829463795, "learning_rate": 1.3774111487969143e-08, "loss": 0.0005, "step": 297540 }, { "epoch": 1.9575271540693275, "grad_norm": 0.006072040797559238, "learning_rate": 1.373155919275393e-08, "loss": 0.0004, "step": 297550 }, { "epoch": 1.957592942244561, "grad_norm": 0.08145224304112744, "learning_rate": 1.3689072637181889e-08, "loss": 0.0003, "step": 297560 }, { "epoch": 1.9576587304197943, "grad_norm": 0.004992074316878187, "learning_rate": 1.3646651821812574e-08, "loss": 0.0002, "step": 297570 }, { "epoch": 1.9577245185950276, "grad_norm": 0.004500293880019523, "learning_rate": 1.3604296747206092e-08, "loss": 0.0003, "step": 297580 }, { "epoch": 1.9577903067702611, "grad_norm": 0.004997497637109497, "learning_rate": 1.3562007413920331e-08, "loss": 0.0002, "step": 297590 }, { "epoch": 1.9578560949454946, "grad_norm": 0.004049115643206139, "learning_rate": 1.3519783822512622e-08, "loss": 0.0003, "step": 297600 }, { "epoch": 1.957921883120728, "grad_norm": 0.003862091755200567, "learning_rate": 1.3477625973540299e-08, "loss": 0.0004, "step": 297610 }, { "epoch": 1.9579876712959612, "grad_norm": 0.04488632939284358, "learning_rate": 1.343553386755847e-08, "loss": 0.0003, "step": 297620 }, { "epoch": 1.9580534594711947, "grad_norm": 0.041426982007187134, "learning_rate": 1.3393507505122805e-08, "loss": 0.0009, "step": 297630 }, { "epoch": 1.9581192476464282, "grad_norm": 0.04373907777802977, "learning_rate": 1.3351546886787304e-08, "loss": 0.0003, "step": 297640 }, { "epoch": 1.9581850358216615, "grad_norm": 0.0009894021135788522, "learning_rate": 1.3309652013104302e-08, "loss": 0.0002, "step": 297650 }, { "epoch": 1.9582508239968948, "grad_norm": 0.012768225591138944, "learning_rate": 1.3267822884627247e-08, "loss": 0.0004, "step": 297660 }, { "epoch": 1.958316612172128, "grad_norm": 0.009891295865323169, "learning_rate": 1.3226059501906808e-08, "loss": 0.0003, "step": 297670 }, { "epoch": 1.9583824003473616, "grad_norm": 0.030670294422147037, "learning_rate": 1.3184361865493655e-08, "loss": 0.0014, "step": 297680 }, { "epoch": 1.958448188522595, "grad_norm": 0.009563046040616597, "learning_rate": 1.3142729975938461e-08, "loss": 0.0004, "step": 297690 }, { "epoch": 1.9585139766978283, "grad_norm": 0.003903074403550962, "learning_rate": 1.310116383378912e-08, "loss": 0.0002, "step": 297700 }, { "epoch": 1.9585797648730616, "grad_norm": 0.022736885495018965, "learning_rate": 1.3059663439593529e-08, "loss": 0.0002, "step": 297710 }, { "epoch": 1.9586455530482951, "grad_norm": 0.021058297470434997, "learning_rate": 1.3018228793900133e-08, "loss": 0.0003, "step": 297720 }, { "epoch": 1.9587113412235286, "grad_norm": 0.0009585316057202563, "learning_rate": 1.2976859897253502e-08, "loss": 0.0002, "step": 297730 }, { "epoch": 1.958777129398762, "grad_norm": 0.016387070006050063, "learning_rate": 1.2935556750200417e-08, "loss": 0.0004, "step": 297740 }, { "epoch": 1.9588429175739952, "grad_norm": 0.0065506947981095725, "learning_rate": 1.289431935328489e-08, "loss": 0.0003, "step": 297750 }, { "epoch": 1.9589087057492285, "grad_norm": 0.00471159662952836, "learning_rate": 1.2853147707050373e-08, "loss": 0.0004, "step": 297760 }, { "epoch": 1.958974493924462, "grad_norm": 0.02463731387765716, "learning_rate": 1.2812041812040321e-08, "loss": 0.0005, "step": 297770 }, { "epoch": 1.9590402820996955, "grad_norm": 0.014002518991865909, "learning_rate": 1.2771001668795968e-08, "loss": 0.0003, "step": 297780 }, { "epoch": 1.9591060702749288, "grad_norm": 0.0401323913763813, "learning_rate": 1.2730027277858548e-08, "loss": 0.0003, "step": 297790 }, { "epoch": 1.959171858450162, "grad_norm": 0.014788677562624234, "learning_rate": 1.2689118639768738e-08, "loss": 0.0005, "step": 297800 }, { "epoch": 1.9592376466253956, "grad_norm": 0.0009102265714865517, "learning_rate": 1.2648275755065553e-08, "loss": 0.0002, "step": 297810 }, { "epoch": 1.959303434800629, "grad_norm": 0.028888654066704406, "learning_rate": 1.260749862428745e-08, "loss": 0.0004, "step": 297820 }, { "epoch": 1.9593692229758624, "grad_norm": 0.004707810480092369, "learning_rate": 1.256678724797178e-08, "loss": 0.0006, "step": 297830 }, { "epoch": 1.9594350111510956, "grad_norm": 0.026768330857179595, "learning_rate": 1.2526141626655885e-08, "loss": 0.0005, "step": 297840 }, { "epoch": 1.959500799326329, "grad_norm": 0.06838057661233995, "learning_rate": 1.2485561760875453e-08, "loss": 0.0002, "step": 297850 }, { "epoch": 1.9595665875015624, "grad_norm": 0.0016240081785936498, "learning_rate": 1.2445047651165054e-08, "loss": 0.0002, "step": 297860 }, { "epoch": 1.959632375676796, "grad_norm": 0.02395031631745176, "learning_rate": 1.2404599298059261e-08, "loss": 0.0002, "step": 297870 }, { "epoch": 1.9596981638520292, "grad_norm": 0.000586734466876914, "learning_rate": 1.2364216702091536e-08, "loss": 0.0003, "step": 297880 }, { "epoch": 1.9597639520272625, "grad_norm": 0.04082067989137536, "learning_rate": 1.2323899863793675e-08, "loss": 0.0003, "step": 297890 }, { "epoch": 1.959829740202496, "grad_norm": 0.006322993286883522, "learning_rate": 1.2283648783698032e-08, "loss": 0.0002, "step": 297900 }, { "epoch": 1.9598955283777295, "grad_norm": 0.037255986716417964, "learning_rate": 1.2243463462334182e-08, "loss": 0.0011, "step": 297910 }, { "epoch": 1.9599613165529628, "grad_norm": 0.011976995396743918, "learning_rate": 1.2203343900233366e-08, "loss": 0.0005, "step": 297920 }, { "epoch": 1.960027104728196, "grad_norm": 0.02712626586638816, "learning_rate": 1.2163290097922942e-08, "loss": 0.0017, "step": 297930 }, { "epoch": 1.9600928929034296, "grad_norm": 0.042823178164852115, "learning_rate": 1.2123302055931929e-08, "loss": 0.0005, "step": 297940 }, { "epoch": 1.9601586810786629, "grad_norm": 0.03139209039220148, "learning_rate": 1.2083379774787684e-08, "loss": 0.0002, "step": 297950 }, { "epoch": 1.9602244692538964, "grad_norm": 0.005658239936072204, "learning_rate": 1.2043523255015899e-08, "loss": 0.0005, "step": 297960 }, { "epoch": 1.9602902574291297, "grad_norm": 0.008221577282033612, "learning_rate": 1.2003732497142262e-08, "loss": 0.0003, "step": 297970 }, { "epoch": 1.960356045604363, "grad_norm": 0.05730866795319812, "learning_rate": 1.196400750169191e-08, "loss": 0.0003, "step": 297980 }, { "epoch": 1.9604218337795964, "grad_norm": 0.0357161029185122, "learning_rate": 1.1924348269187757e-08, "loss": 0.0008, "step": 297990 }, { "epoch": 1.96048762195483, "grad_norm": 0.03653478165145333, "learning_rate": 1.188475480015383e-08, "loss": 0.0006, "step": 298000 }, { "epoch": 1.9605534101300632, "grad_norm": 0.004916098769309996, "learning_rate": 1.1845227095110822e-08, "loss": 0.0005, "step": 298010 }, { "epoch": 1.9606191983052965, "grad_norm": 0.02354309102554874, "learning_rate": 1.180576515458054e-08, "loss": 0.0003, "step": 298020 }, { "epoch": 1.96068498648053, "grad_norm": 0.011535348474439672, "learning_rate": 1.176636897908312e-08, "loss": 0.0006, "step": 298030 }, { "epoch": 1.9607507746557635, "grad_norm": 0.004077193798194358, "learning_rate": 1.1727038569137594e-08, "loss": 0.0002, "step": 298040 }, { "epoch": 1.9608165628309968, "grad_norm": 0.018324394138718038, "learning_rate": 1.1687773925263546e-08, "loss": 0.0007, "step": 298050 }, { "epoch": 1.96088235100623, "grad_norm": 0.03431898152834986, "learning_rate": 1.1648575047977783e-08, "loss": 0.0007, "step": 298060 }, { "epoch": 1.9609481391814634, "grad_norm": 0.018038163543057692, "learning_rate": 1.1609441937797672e-08, "loss": 0.0004, "step": 298070 }, { "epoch": 1.9610139273566969, "grad_norm": 0.002004650642189757, "learning_rate": 1.1570374595238354e-08, "loss": 0.0005, "step": 298080 }, { "epoch": 1.9610797155319304, "grad_norm": 0.07800888664387653, "learning_rate": 1.1531373020814973e-08, "loss": 0.0002, "step": 298090 }, { "epoch": 1.9611455037071637, "grad_norm": 0.004742090106997249, "learning_rate": 1.1492437215042673e-08, "loss": 0.0004, "step": 298100 }, { "epoch": 1.961211291882397, "grad_norm": 0.009827813947777113, "learning_rate": 1.1453567178434378e-08, "loss": 0.0004, "step": 298110 }, { "epoch": 1.9612770800576305, "grad_norm": 0.0163847792364395, "learning_rate": 1.1414762911502453e-08, "loss": 0.0006, "step": 298120 }, { "epoch": 1.961342868232864, "grad_norm": 0.02372265663203516, "learning_rate": 1.1376024414758158e-08, "loss": 0.0006, "step": 298130 }, { "epoch": 1.9614086564080973, "grad_norm": 0.016162491777151424, "learning_rate": 1.133735168871275e-08, "loss": 0.0002, "step": 298140 }, { "epoch": 1.9614744445833305, "grad_norm": 0.09324726400530435, "learning_rate": 1.1298744733875267e-08, "loss": 0.0007, "step": 298150 }, { "epoch": 1.9615402327585638, "grad_norm": 0.007816268567313791, "learning_rate": 1.1260203550755855e-08, "loss": 0.0005, "step": 298160 }, { "epoch": 1.9616060209337973, "grad_norm": 0.13515950838016227, "learning_rate": 1.1221728139861887e-08, "loss": 0.0006, "step": 298170 }, { "epoch": 1.9616718091090308, "grad_norm": 0.009820939356739382, "learning_rate": 1.118331850170129e-08, "loss": 0.0005, "step": 298180 }, { "epoch": 1.9617375972842641, "grad_norm": 0.09539658228700718, "learning_rate": 1.114497463677977e-08, "loss": 0.0008, "step": 298190 }, { "epoch": 1.9618033854594974, "grad_norm": 0.03361224862824863, "learning_rate": 1.1106696545603035e-08, "loss": 0.0003, "step": 298200 }, { "epoch": 1.961869173634731, "grad_norm": 0.01221763912068533, "learning_rate": 1.106848422867568e-08, "loss": 0.0008, "step": 298210 }, { "epoch": 1.9619349618099644, "grad_norm": 0.019596647396179417, "learning_rate": 1.1030337686501746e-08, "loss": 0.0028, "step": 298220 }, { "epoch": 1.9620007499851977, "grad_norm": 0.03932354234411302, "learning_rate": 1.0992256919583611e-08, "loss": 0.0011, "step": 298230 }, { "epoch": 1.962066538160431, "grad_norm": 0.0012635599963829996, "learning_rate": 1.0954241928424203e-08, "loss": 0.0002, "step": 298240 }, { "epoch": 1.9621323263356645, "grad_norm": 0.030531317178690665, "learning_rate": 1.0916292713524789e-08, "loss": 0.0006, "step": 298250 }, { "epoch": 1.9621981145108978, "grad_norm": 0.03177610628242654, "learning_rate": 1.0878409275384417e-08, "loss": 0.0003, "step": 298260 }, { "epoch": 1.9622639026861313, "grad_norm": 0.01744040509839519, "learning_rate": 1.0840591614503793e-08, "loss": 0.0003, "step": 298270 }, { "epoch": 1.9623296908613646, "grad_norm": 0.00027436176664550905, "learning_rate": 1.0802839731380855e-08, "loss": 0.0009, "step": 298280 }, { "epoch": 1.9623954790365978, "grad_norm": 0.03820296709497582, "learning_rate": 1.0765153626513536e-08, "loss": 0.0004, "step": 298290 }, { "epoch": 1.9624612672118313, "grad_norm": 0.00031331435409726586, "learning_rate": 1.0727533300398663e-08, "loss": 0.0003, "step": 298300 }, { "epoch": 1.9625270553870648, "grad_norm": 0.017377133273156195, "learning_rate": 1.0689978753532504e-08, "loss": 0.0004, "step": 298310 }, { "epoch": 1.9625928435622981, "grad_norm": 0.031374562918486, "learning_rate": 1.0652489986409665e-08, "loss": 0.0001, "step": 298320 }, { "epoch": 1.9626586317375314, "grad_norm": 0.06515630157013053, "learning_rate": 1.0615066999524748e-08, "loss": 0.0004, "step": 298330 }, { "epoch": 1.962724419912765, "grad_norm": 0.018896089234447407, "learning_rate": 1.0577709793370694e-08, "loss": 0.0003, "step": 298340 }, { "epoch": 1.9627902080879984, "grad_norm": 0.002268255560303875, "learning_rate": 1.0540418368440996e-08, "loss": 0.001, "step": 298350 }, { "epoch": 1.9628559962632317, "grad_norm": 0.02455689504859551, "learning_rate": 1.0503192725226375e-08, "loss": 0.0007, "step": 298360 }, { "epoch": 1.962921784438465, "grad_norm": 0.02238611540085476, "learning_rate": 1.04660328642181e-08, "loss": 0.0006, "step": 298370 }, { "epoch": 1.9629875726136983, "grad_norm": 0.012864587907182872, "learning_rate": 1.0428938785905784e-08, "loss": 0.0007, "step": 298380 }, { "epoch": 1.9630533607889318, "grad_norm": 0.022785855185161876, "learning_rate": 1.0391910490778479e-08, "loss": 0.0006, "step": 298390 }, { "epoch": 1.9631191489641653, "grad_norm": 0.028609821276111166, "learning_rate": 1.0354947979324681e-08, "loss": 0.0001, "step": 298400 }, { "epoch": 1.9631849371393986, "grad_norm": 0.0023332491012998413, "learning_rate": 1.031805125203178e-08, "loss": 0.0003, "step": 298410 }, { "epoch": 1.9632507253146319, "grad_norm": 0.037350045684571016, "learning_rate": 1.0281220309385498e-08, "loss": 0.0003, "step": 298420 }, { "epoch": 1.9633165134898654, "grad_norm": 0.0526617679638497, "learning_rate": 1.0244455151872668e-08, "loss": 0.0005, "step": 298430 }, { "epoch": 1.9633823016650989, "grad_norm": 0.012761468201806098, "learning_rate": 1.020775577997679e-08, "loss": 0.0002, "step": 298440 }, { "epoch": 1.9634480898403321, "grad_norm": 0.025159827087181032, "learning_rate": 1.0171122194181926e-08, "loss": 0.0005, "step": 298450 }, { "epoch": 1.9635138780155654, "grad_norm": 0.07760580085099239, "learning_rate": 1.0134554394971574e-08, "loss": 0.0005, "step": 298460 }, { "epoch": 1.9635796661907987, "grad_norm": 0.011588791072672112, "learning_rate": 1.0098052382828128e-08, "loss": 0.0004, "step": 298470 }, { "epoch": 1.9636454543660322, "grad_norm": 0.0028794972681196415, "learning_rate": 1.0061616158231202e-08, "loss": 0.0003, "step": 298480 }, { "epoch": 1.9637112425412657, "grad_norm": 0.020245859697950945, "learning_rate": 1.0025245721663191e-08, "loss": 0.0004, "step": 298490 }, { "epoch": 1.963777030716499, "grad_norm": 0.01006610081081816, "learning_rate": 9.988941073602597e-09, "loss": 0.0003, "step": 298500 }, { "epoch": 1.9638428188917323, "grad_norm": 0.03663874661916934, "learning_rate": 9.952702214527931e-09, "loss": 0.0002, "step": 298510 }, { "epoch": 1.9639086070669658, "grad_norm": 0.0002752285997449087, "learning_rate": 9.916529144917142e-09, "loss": 0.0007, "step": 298520 }, { "epoch": 1.9639743952421993, "grad_norm": 0.037315577009542036, "learning_rate": 9.880421865247069e-09, "loss": 0.0009, "step": 298530 }, { "epoch": 1.9640401834174326, "grad_norm": 0.1251580133928904, "learning_rate": 9.844380375994e-09, "loss": 0.0005, "step": 298540 }, { "epoch": 1.9641059715926659, "grad_norm": 0.01453948566544548, "learning_rate": 9.80840467763311e-09, "loss": 0.0004, "step": 298550 }, { "epoch": 1.9641717597678994, "grad_norm": 0.013573595862363666, "learning_rate": 9.772494770639018e-09, "loss": 0.0003, "step": 298560 }, { "epoch": 1.9642375479431327, "grad_norm": 0.004072217238588608, "learning_rate": 9.736650655484125e-09, "loss": 0.0002, "step": 298570 }, { "epoch": 1.9643033361183662, "grad_norm": 0.00014702898393101346, "learning_rate": 9.700872332641943e-09, "loss": 0.0005, "step": 298580 }, { "epoch": 1.9643691242935994, "grad_norm": 0.0280461756916881, "learning_rate": 9.665159802583757e-09, "loss": 0.0005, "step": 298590 }, { "epoch": 1.9644349124688327, "grad_norm": 0.014836798286708282, "learning_rate": 9.629513065780305e-09, "loss": 0.0004, "step": 298600 }, { "epoch": 1.9645007006440662, "grad_norm": 0.01699400306343984, "learning_rate": 9.593932122701766e-09, "loss": 0.0009, "step": 298610 }, { "epoch": 1.9645664888192997, "grad_norm": 0.07928295898174469, "learning_rate": 9.558416973817208e-09, "loss": 0.0005, "step": 298620 }, { "epoch": 1.964632276994533, "grad_norm": 0.0003800895074317223, "learning_rate": 9.522967619595148e-09, "loss": 0.0006, "step": 298630 }, { "epoch": 1.9646980651697663, "grad_norm": 0.001260004325091938, "learning_rate": 9.48758406050243e-09, "loss": 0.0003, "step": 298640 }, { "epoch": 1.9647638533449998, "grad_norm": 0.006373339975933351, "learning_rate": 9.452266297005907e-09, "loss": 0.0003, "step": 298650 }, { "epoch": 1.9648296415202333, "grad_norm": 0.0226538806772992, "learning_rate": 9.417014329571317e-09, "loss": 0.0002, "step": 298660 }, { "epoch": 1.9648954296954666, "grad_norm": 0.0421344849967816, "learning_rate": 9.381828158663286e-09, "loss": 0.0004, "step": 298670 }, { "epoch": 1.9649612178706999, "grad_norm": 0.01240173646936209, "learning_rate": 9.34670778474589e-09, "loss": 0.0003, "step": 298680 }, { "epoch": 1.9650270060459332, "grad_norm": 0.012865230040491248, "learning_rate": 9.311653208281535e-09, "loss": 0.0003, "step": 298690 }, { "epoch": 1.9650927942211667, "grad_norm": 0.0020712226017748653, "learning_rate": 9.27666442973263e-09, "loss": 0.0001, "step": 298700 }, { "epoch": 1.9651585823964002, "grad_norm": 0.03406385184811414, "learning_rate": 9.241741449561026e-09, "loss": 0.0003, "step": 298710 }, { "epoch": 1.9652243705716335, "grad_norm": 0.018550805557944028, "learning_rate": 9.206884268226357e-09, "loss": 0.0002, "step": 298720 }, { "epoch": 1.9652901587468667, "grad_norm": 0.0372696061495687, "learning_rate": 9.172092886188811e-09, "loss": 0.0007, "step": 298730 }, { "epoch": 1.9653559469221002, "grad_norm": 0.03237285533205306, "learning_rate": 9.137367303906907e-09, "loss": 0.0003, "step": 298740 }, { "epoch": 1.9654217350973338, "grad_norm": 0.004036388520333453, "learning_rate": 9.102707521838617e-09, "loss": 0.0003, "step": 298750 }, { "epoch": 1.965487523272567, "grad_norm": 0.0038296617033604704, "learning_rate": 9.068113540440238e-09, "loss": 0.0005, "step": 298760 }, { "epoch": 1.9655533114478003, "grad_norm": 0.01092434677183598, "learning_rate": 9.033585360168628e-09, "loss": 0.0003, "step": 298770 }, { "epoch": 1.9656190996230336, "grad_norm": 0.0009377422914367756, "learning_rate": 8.999122981478425e-09, "loss": 0.0002, "step": 298780 }, { "epoch": 1.965684887798267, "grad_norm": 0.012332086844060468, "learning_rate": 8.964726404824819e-09, "loss": 0.0003, "step": 298790 }, { "epoch": 1.9657506759735006, "grad_norm": 0.00014853736908996583, "learning_rate": 8.930395630660227e-09, "loss": 0.0005, "step": 298800 }, { "epoch": 1.965816464148734, "grad_norm": 0.03191300853638022, "learning_rate": 8.896130659438174e-09, "loss": 0.0006, "step": 298810 }, { "epoch": 1.9658822523239672, "grad_norm": 0.025472759430763426, "learning_rate": 8.861931491609965e-09, "loss": 0.0001, "step": 298820 }, { "epoch": 1.9659480404992007, "grad_norm": 0.09381453018360607, "learning_rate": 8.827798127626352e-09, "loss": 0.0005, "step": 298830 }, { "epoch": 1.9660138286744342, "grad_norm": 0.002949210127658123, "learning_rate": 8.793730567937531e-09, "loss": 0.0003, "step": 298840 }, { "epoch": 1.9660796168496675, "grad_norm": 0.023416097098062746, "learning_rate": 8.75972881299314e-09, "loss": 0.0005, "step": 298850 }, { "epoch": 1.9661454050249008, "grad_norm": 0.025250374304143588, "learning_rate": 8.725792863240601e-09, "loss": 0.0005, "step": 298860 }, { "epoch": 1.966211193200134, "grad_norm": 0.022238771122434088, "learning_rate": 8.691922719127888e-09, "loss": 0.0002, "step": 298870 }, { "epoch": 1.9662769813753675, "grad_norm": 0.0006202870937013497, "learning_rate": 8.65811838110131e-09, "loss": 0.0004, "step": 298880 }, { "epoch": 1.966342769550601, "grad_norm": 0.04315484933897894, "learning_rate": 8.624379849606624e-09, "loss": 0.0004, "step": 298890 }, { "epoch": 1.9664085577258343, "grad_norm": 0.08319503375457543, "learning_rate": 8.59070712508847e-09, "loss": 0.0006, "step": 298900 }, { "epoch": 1.9664743459010676, "grad_norm": 0.029454325380853658, "learning_rate": 8.557100207991498e-09, "loss": 0.0003, "step": 298910 }, { "epoch": 1.9665401340763011, "grad_norm": 0.00045229356970686544, "learning_rate": 8.523559098758128e-09, "loss": 0.0004, "step": 298920 }, { "epoch": 1.9666059222515346, "grad_norm": 7.962535637658398e-05, "learning_rate": 8.490083797830228e-09, "loss": 0.0002, "step": 298930 }, { "epoch": 1.966671710426768, "grad_norm": 0.006841459046480118, "learning_rate": 8.456674305650226e-09, "loss": 0.0007, "step": 298940 }, { "epoch": 1.9667374986020012, "grad_norm": 0.06515840371438099, "learning_rate": 8.423330622657766e-09, "loss": 0.0008, "step": 298950 }, { "epoch": 1.9668032867772347, "grad_norm": 0.0014562790641254964, "learning_rate": 8.390052749292498e-09, "loss": 0.0003, "step": 298960 }, { "epoch": 1.966869074952468, "grad_norm": 0.0001327273494571055, "learning_rate": 8.356840685993517e-09, "loss": 0.0002, "step": 298970 }, { "epoch": 1.9669348631277015, "grad_norm": 0.0033148138342448674, "learning_rate": 8.323694433198248e-09, "loss": 0.0007, "step": 298980 }, { "epoch": 1.9670006513029348, "grad_norm": 0.27985145040527104, "learning_rate": 8.290613991344676e-09, "loss": 0.0023, "step": 298990 }, { "epoch": 1.967066439478168, "grad_norm": 0.003353823856865399, "learning_rate": 8.257599360867452e-09, "loss": 0.0007, "step": 299000 }, { "epoch": 1.9671322276534016, "grad_norm": 0.010648318283385158, "learning_rate": 8.224650542202894e-09, "loss": 0.0004, "step": 299010 }, { "epoch": 1.967198015828635, "grad_norm": 0.008533617017407142, "learning_rate": 8.1917675357851e-09, "loss": 0.0004, "step": 299020 }, { "epoch": 1.9672638040038684, "grad_norm": 0.006396028003849336, "learning_rate": 8.158950342048166e-09, "loss": 0.0004, "step": 299030 }, { "epoch": 1.9673295921791016, "grad_norm": 0.014133546014940126, "learning_rate": 8.12619896142397e-09, "loss": 0.0004, "step": 299040 }, { "epoch": 1.9673953803543351, "grad_norm": 0.02190283619851685, "learning_rate": 8.093513394344387e-09, "loss": 0.0002, "step": 299050 }, { "epoch": 1.9674611685295686, "grad_norm": 0.02451916492281438, "learning_rate": 8.060893641240186e-09, "loss": 0.0003, "step": 299060 }, { "epoch": 1.967526956704802, "grad_norm": 0.03363978625139951, "learning_rate": 8.028339702542131e-09, "loss": 0.0002, "step": 299070 }, { "epoch": 1.9675927448800352, "grad_norm": 0.0021767699912707103, "learning_rate": 7.995851578679325e-09, "loss": 0.0002, "step": 299080 }, { "epoch": 1.9676585330552685, "grad_norm": 0.021089481548674772, "learning_rate": 7.963429270079758e-09, "loss": 0.0003, "step": 299090 }, { "epoch": 1.967724321230502, "grad_norm": 0.03263950077856359, "learning_rate": 7.931072777170867e-09, "loss": 0.0002, "step": 299100 }, { "epoch": 1.9677901094057355, "grad_norm": 0.0009041117408548203, "learning_rate": 7.898782100379532e-09, "loss": 0.0005, "step": 299110 }, { "epoch": 1.9678558975809688, "grad_norm": 0.06502528285647408, "learning_rate": 7.866557240130968e-09, "loss": 0.0003, "step": 299120 }, { "epoch": 1.967921685756202, "grad_norm": 0.035451503328094935, "learning_rate": 7.83439819685039e-09, "loss": 0.0003, "step": 299130 }, { "epoch": 1.9679874739314356, "grad_norm": 0.05720733360159253, "learning_rate": 7.802304970961905e-09, "loss": 0.0004, "step": 299140 }, { "epoch": 1.968053262106669, "grad_norm": 0.05834280330192186, "learning_rate": 7.770277562887951e-09, "loss": 0.0006, "step": 299150 }, { "epoch": 1.9681190502819024, "grad_norm": 0.0022386087597721428, "learning_rate": 7.73831597305208e-09, "loss": 0.0008, "step": 299160 }, { "epoch": 1.9681848384571357, "grad_norm": 0.008131674533976731, "learning_rate": 7.70642020187451e-09, "loss": 0.0008, "step": 299170 }, { "epoch": 1.968250626632369, "grad_norm": 0.019679487933162917, "learning_rate": 7.67459024977657e-09, "loss": 0.0005, "step": 299180 }, { "epoch": 1.9683164148076024, "grad_norm": 0.008969155015807376, "learning_rate": 7.642826117176816e-09, "loss": 0.0004, "step": 299190 }, { "epoch": 1.968382202982836, "grad_norm": 0.044415070895453795, "learning_rate": 7.611127804495466e-09, "loss": 0.0003, "step": 299200 }, { "epoch": 1.9684479911580692, "grad_norm": 0.009431879338310377, "learning_rate": 7.579495312149409e-09, "loss": 0.0003, "step": 299210 }, { "epoch": 1.9685137793333025, "grad_norm": 0.003249332621636459, "learning_rate": 7.547928640555535e-09, "loss": 0.0002, "step": 299220 }, { "epoch": 1.968579567508536, "grad_norm": 0.06209741397072512, "learning_rate": 7.516427790131287e-09, "loss": 0.0006, "step": 299230 }, { "epoch": 1.9686453556837695, "grad_norm": 0.005099032647895113, "learning_rate": 7.484992761290222e-09, "loss": 0.0003, "step": 299240 }, { "epoch": 1.9687111438590028, "grad_norm": 0.04518785925352303, "learning_rate": 7.453623554448119e-09, "loss": 0.0005, "step": 299250 }, { "epoch": 1.968776932034236, "grad_norm": 0.010054166248439456, "learning_rate": 7.422320170018537e-09, "loss": 0.0004, "step": 299260 }, { "epoch": 1.9688427202094696, "grad_norm": 0.0009686896676469474, "learning_rate": 7.3910826084133695e-09, "loss": 0.0003, "step": 299270 }, { "epoch": 1.9689085083847029, "grad_norm": 0.03100299349413289, "learning_rate": 7.359910870045062e-09, "loss": 0.0002, "step": 299280 }, { "epoch": 1.9689742965599364, "grad_norm": 0.03845965994006091, "learning_rate": 7.3288049553238425e-09, "loss": 0.0004, "step": 299290 }, { "epoch": 1.9690400847351697, "grad_norm": 0.028898855914787584, "learning_rate": 7.29776486466105e-09, "loss": 0.0003, "step": 299300 }, { "epoch": 1.969105872910403, "grad_norm": 0.01936841871749493, "learning_rate": 7.266790598465245e-09, "loss": 0.0005, "step": 299310 }, { "epoch": 1.9691716610856365, "grad_norm": 0.11780619867603374, "learning_rate": 7.235882157144436e-09, "loss": 0.0002, "step": 299320 }, { "epoch": 1.96923744926087, "grad_norm": 0.02120993430998451, "learning_rate": 7.205039541106629e-09, "loss": 0.0005, "step": 299330 }, { "epoch": 1.9693032374361032, "grad_norm": 0.018936899549594724, "learning_rate": 7.174262750758165e-09, "loss": 0.0003, "step": 299340 }, { "epoch": 1.9693690256113365, "grad_norm": 0.04859372571014863, "learning_rate": 7.1435517865053874e-09, "loss": 0.0003, "step": 299350 }, { "epoch": 1.96943481378657, "grad_norm": 0.008545342877505867, "learning_rate": 7.112906648752415e-09, "loss": 0.0004, "step": 299360 }, { "epoch": 1.9695006019618035, "grad_norm": 0.03198850102153084, "learning_rate": 7.082327337903927e-09, "loss": 0.0005, "step": 299370 }, { "epoch": 1.9695663901370368, "grad_norm": 0.0010870290153758573, "learning_rate": 7.051813854362377e-09, "loss": 0.0005, "step": 299380 }, { "epoch": 1.96963217831227, "grad_norm": 0.014684363385876248, "learning_rate": 7.021366198530221e-09, "loss": 0.0004, "step": 299390 }, { "epoch": 1.9696979664875034, "grad_norm": 0.02430806938805795, "learning_rate": 6.9909843708099165e-09, "loss": 0.0003, "step": 299400 }, { "epoch": 1.969763754662737, "grad_norm": 0.030663685963644893, "learning_rate": 6.960668371600587e-09, "loss": 0.0006, "step": 299410 }, { "epoch": 1.9698295428379704, "grad_norm": 0.025418196220769643, "learning_rate": 6.9304182013030245e-09, "loss": 0.0014, "step": 299420 }, { "epoch": 1.9698953310132037, "grad_norm": 0.007709719325984193, "learning_rate": 6.900233860315797e-09, "loss": 0.0002, "step": 299430 }, { "epoch": 1.969961119188437, "grad_norm": 0.07438695116475176, "learning_rate": 6.870115349036366e-09, "loss": 0.0007, "step": 299440 }, { "epoch": 1.9700269073636705, "grad_norm": 0.003099092774943907, "learning_rate": 6.840062667861636e-09, "loss": 0.0003, "step": 299450 }, { "epoch": 1.970092695538904, "grad_norm": 0.03464134068048154, "learning_rate": 6.810075817189066e-09, "loss": 0.0005, "step": 299460 }, { "epoch": 1.9701584837141373, "grad_norm": 0.0003317695598485609, "learning_rate": 6.780154797412786e-09, "loss": 0.0006, "step": 299470 }, { "epoch": 1.9702242718893705, "grad_norm": 0.06654191212579633, "learning_rate": 6.7502996089280346e-09, "loss": 0.0002, "step": 299480 }, { "epoch": 1.9702900600646038, "grad_norm": 0.05517418560502164, "learning_rate": 6.7205102521278324e-09, "loss": 0.0006, "step": 299490 }, { "epoch": 1.9703558482398373, "grad_norm": 0.008924473459816794, "learning_rate": 6.690786727405196e-09, "loss": 0.0006, "step": 299500 }, { "epoch": 1.9704216364150708, "grad_norm": 0.04888960273858161, "learning_rate": 6.661129035152592e-09, "loss": 0.0005, "step": 299510 }, { "epoch": 1.9704874245903041, "grad_norm": 0.014557489679805705, "learning_rate": 6.631537175759705e-09, "loss": 0.0008, "step": 299520 }, { "epoch": 1.9705532127655374, "grad_norm": 0.002748252762878676, "learning_rate": 6.602011149617893e-09, "loss": 0.0006, "step": 299530 }, { "epoch": 1.970619000940771, "grad_norm": 0.04874217828061365, "learning_rate": 6.5725509571157305e-09, "loss": 0.0006, "step": 299540 }, { "epoch": 1.9706847891160044, "grad_norm": 0.08117046465784179, "learning_rate": 6.543156598642353e-09, "loss": 0.0004, "step": 299550 }, { "epoch": 1.9707505772912377, "grad_norm": 0.0007735986404035314, "learning_rate": 6.5138280745841166e-09, "loss": 0.0006, "step": 299560 }, { "epoch": 1.970816365466471, "grad_norm": 0.026893395635694, "learning_rate": 6.484565385329045e-09, "loss": 0.0013, "step": 299570 }, { "epoch": 1.9708821536417045, "grad_norm": 0.057898973573718725, "learning_rate": 6.455368531261829e-09, "loss": 0.0003, "step": 299580 }, { "epoch": 1.9709479418169378, "grad_norm": 0.008118375795590623, "learning_rate": 6.426237512768274e-09, "loss": 0.0003, "step": 299590 }, { "epoch": 1.9710137299921713, "grad_norm": 0.011700491754918494, "learning_rate": 6.39717233023196e-09, "loss": 0.0005, "step": 299600 }, { "epoch": 1.9710795181674046, "grad_norm": 0.017726349017571214, "learning_rate": 6.368172984035914e-09, "loss": 0.0003, "step": 299610 }, { "epoch": 1.9711453063426378, "grad_norm": 0.02642998271313788, "learning_rate": 6.339239474563164e-09, "loss": 0.0007, "step": 299620 }, { "epoch": 1.9712110945178714, "grad_norm": 0.05421993099475119, "learning_rate": 6.3103718021945145e-09, "loss": 0.0005, "step": 299630 }, { "epoch": 1.9712768826931049, "grad_norm": 0.009625994176287383, "learning_rate": 6.281569967310774e-09, "loss": 0.0003, "step": 299640 }, { "epoch": 1.9713426708683381, "grad_norm": 0.032099487729565473, "learning_rate": 6.2528339702916386e-09, "loss": 0.0002, "step": 299650 }, { "epoch": 1.9714084590435714, "grad_norm": 0.34218033439032197, "learning_rate": 6.2241638115156934e-09, "loss": 0.0007, "step": 299660 }, { "epoch": 1.971474247218805, "grad_norm": 0.003530562618927867, "learning_rate": 6.1955594913615245e-09, "loss": 0.0003, "step": 299670 }, { "epoch": 1.9715400353940384, "grad_norm": 0.01654447421368591, "learning_rate": 6.167021010206054e-09, "loss": 0.0002, "step": 299680 }, { "epoch": 1.9716058235692717, "grad_norm": 0.0016291943137201046, "learning_rate": 6.138548368425645e-09, "loss": 0.0003, "step": 299690 }, { "epoch": 1.971671611744505, "grad_norm": 0.0047201899039615225, "learning_rate": 6.110141566395e-09, "loss": 0.0004, "step": 299700 }, { "epoch": 1.9717373999197383, "grad_norm": 0.031788919522693804, "learning_rate": 6.081800604489374e-09, "loss": 0.0007, "step": 299710 }, { "epoch": 1.9718031880949718, "grad_norm": 0.00650657170126849, "learning_rate": 6.053525483082357e-09, "loss": 0.0005, "step": 299720 }, { "epoch": 1.9718689762702053, "grad_norm": 0.0003491157170091358, "learning_rate": 6.025316202546428e-09, "loss": 0.0006, "step": 299730 }, { "epoch": 1.9719347644454386, "grad_norm": 0.010627491022272832, "learning_rate": 5.997172763253512e-09, "loss": 0.0003, "step": 299740 }, { "epoch": 1.9720005526206719, "grad_norm": 0.0018847661562679733, "learning_rate": 5.969095165574978e-09, "loss": 0.0007, "step": 299750 }, { "epoch": 1.9720663407959054, "grad_norm": 0.002976704540836069, "learning_rate": 5.941083409880533e-09, "loss": 0.0003, "step": 299760 }, { "epoch": 1.9721321289711389, "grad_norm": 0.004739159165469654, "learning_rate": 5.9131374965398784e-09, "loss": 0.0004, "step": 299770 }, { "epoch": 1.9721979171463722, "grad_norm": 0.025919957543757047, "learning_rate": 5.885257425921054e-09, "loss": 0.0004, "step": 299780 }, { "epoch": 1.9722637053216054, "grad_norm": 0.0014726159419189062, "learning_rate": 5.857443198392099e-09, "loss": 0.0003, "step": 299790 }, { "epoch": 1.9723294934968387, "grad_norm": 0.005424905014130149, "learning_rate": 5.829694814319942e-09, "loss": 0.0009, "step": 299800 }, { "epoch": 1.9723952816720722, "grad_norm": 0.03368090899063488, "learning_rate": 5.802012274069846e-09, "loss": 0.0003, "step": 299810 }, { "epoch": 1.9724610698473057, "grad_norm": 0.024114984482796316, "learning_rate": 5.7743955780065195e-09, "loss": 0.0004, "step": 299820 }, { "epoch": 1.972526858022539, "grad_norm": 0.02745770928846183, "learning_rate": 5.746844726494671e-09, "loss": 0.0011, "step": 299830 }, { "epoch": 1.9725926461977723, "grad_norm": 0.0028508496355294983, "learning_rate": 5.7193597198973436e-09, "loss": 0.0004, "step": 299840 }, { "epoch": 1.9726584343730058, "grad_norm": 0.00751254570696759, "learning_rate": 5.691940558577025e-09, "loss": 0.0005, "step": 299850 }, { "epoch": 1.9727242225482393, "grad_norm": 0.03211920206644788, "learning_rate": 5.664587242895092e-09, "loss": 0.0005, "step": 299860 }, { "epoch": 1.9727900107234726, "grad_norm": 0.21729081185453447, "learning_rate": 5.637299773212368e-09, "loss": 0.0012, "step": 299870 }, { "epoch": 1.9728557988987059, "grad_norm": 0.027149300724440543, "learning_rate": 5.61007814988801e-09, "loss": 0.0004, "step": 299880 }, { "epoch": 1.9729215870739392, "grad_norm": 0.04089874059226994, "learning_rate": 5.582922373281729e-09, "loss": 0.0002, "step": 299890 }, { "epoch": 1.9729873752491727, "grad_norm": 0.03628279933781358, "learning_rate": 5.555832443751019e-09, "loss": 0.0005, "step": 299900 }, { "epoch": 1.9730531634244062, "grad_norm": 0.019960447045491263, "learning_rate": 5.5288083616533705e-09, "loss": 0.0011, "step": 299910 }, { "epoch": 1.9731189515996395, "grad_norm": 0.04994025594212506, "learning_rate": 5.50185012734461e-09, "loss": 0.0006, "step": 299920 }, { "epoch": 1.9731847397748727, "grad_norm": 0.03269861329672555, "learning_rate": 5.474957741180009e-09, "loss": 0.0004, "step": 299930 }, { "epoch": 1.9732505279501062, "grad_norm": 0.0003074688313722479, "learning_rate": 5.448131203515394e-09, "loss": 0.0002, "step": 299940 }, { "epoch": 1.9733163161253398, "grad_norm": 0.0010357542928996052, "learning_rate": 5.421370514703261e-09, "loss": 0.0003, "step": 299950 }, { "epoch": 1.973382104300573, "grad_norm": 0.00531647030289762, "learning_rate": 5.394675675096661e-09, "loss": 0.0004, "step": 299960 }, { "epoch": 1.9734478924758063, "grad_norm": 0.032214066322373315, "learning_rate": 5.368046685047535e-09, "loss": 0.0004, "step": 299970 }, { "epoch": 1.9735136806510398, "grad_norm": 0.00822779847594792, "learning_rate": 5.3414835449072665e-09, "loss": 0.0003, "step": 299980 }, { "epoch": 1.9735794688262733, "grad_norm": 0.0030866986348826336, "learning_rate": 5.3149862550255785e-09, "loss": 0.0004, "step": 299990 }, { "epoch": 1.9736452570015066, "grad_norm": 0.05454853942444125, "learning_rate": 5.288554815752745e-09, "loss": 0.0002, "step": 300000 }, { "epoch": 1.9736452570015066, "eval_loss": 0.0003299810632597655, "eval_runtime": 13.0969, "eval_samples_per_second": 15.271, "eval_steps_per_second": 7.635, "step": 300000 }, { "epoch": 1.97371104517674, "grad_norm": 0.029956290959436076, "learning_rate": 5.26218922743571e-09, "loss": 0.0004, "step": 300010 }, { "epoch": 1.9737768333519732, "grad_norm": 0.048566674071949666, "learning_rate": 5.23588949042364e-09, "loss": 0.0005, "step": 300020 }, { "epoch": 1.9738426215272067, "grad_norm": 0.12086100983743192, "learning_rate": 5.2096556050623696e-09, "loss": 0.0007, "step": 300030 }, { "epoch": 1.9739084097024402, "grad_norm": 0.015027309291113868, "learning_rate": 5.1834875716982866e-09, "loss": 0.0003, "step": 300040 }, { "epoch": 1.9739741978776735, "grad_norm": 0.02827929839123272, "learning_rate": 5.157385390675563e-09, "loss": 0.0005, "step": 300050 }, { "epoch": 1.9740399860529068, "grad_norm": 0.0006765397117115261, "learning_rate": 5.131349062339474e-09, "loss": 0.0003, "step": 300060 }, { "epoch": 1.9741057742281403, "grad_norm": 0.03292960983371022, "learning_rate": 5.105378587031973e-09, "loss": 0.0003, "step": 300070 }, { "epoch": 1.9741715624033738, "grad_norm": 0.009955176568206112, "learning_rate": 5.079473965096671e-09, "loss": 0.0003, "step": 300080 }, { "epoch": 1.974237350578607, "grad_norm": 0.06565000253435423, "learning_rate": 5.053635196874962e-09, "loss": 0.0004, "step": 300090 }, { "epoch": 1.9743031387538403, "grad_norm": 0.00895151730854324, "learning_rate": 5.0278622827065745e-09, "loss": 0.0008, "step": 300100 }, { "epoch": 1.9743689269290736, "grad_norm": 0.05949972988961636, "learning_rate": 5.002155222932348e-09, "loss": 0.0006, "step": 300110 }, { "epoch": 1.9744347151043071, "grad_norm": 0.05380466349303466, "learning_rate": 4.976514017890344e-09, "loss": 0.0004, "step": 300120 }, { "epoch": 1.9745005032795406, "grad_norm": 0.047299363791850665, "learning_rate": 4.950938667919181e-09, "loss": 0.0003, "step": 300130 }, { "epoch": 1.974566291454774, "grad_norm": 0.0006446586540821383, "learning_rate": 4.925429173355812e-09, "loss": 0.0012, "step": 300140 }, { "epoch": 1.9746320796300072, "grad_norm": 0.0005997881037881853, "learning_rate": 4.899985534536633e-09, "loss": 0.0003, "step": 300150 }, { "epoch": 1.9746978678052407, "grad_norm": 0.04750773248937183, "learning_rate": 4.874607751796934e-09, "loss": 0.0003, "step": 300160 }, { "epoch": 1.9747636559804742, "grad_norm": 0.007346579840793914, "learning_rate": 4.849295825471445e-09, "loss": 0.0005, "step": 300170 }, { "epoch": 1.9748294441557075, "grad_norm": 0.0022078590558904216, "learning_rate": 4.824049755894344e-09, "loss": 0.0003, "step": 300180 }, { "epoch": 1.9748952323309408, "grad_norm": 0.16351830914540985, "learning_rate": 4.7988695433975885e-09, "loss": 0.0003, "step": 300190 }, { "epoch": 1.974961020506174, "grad_norm": 0.020469739069061316, "learning_rate": 4.773755188314244e-09, "loss": 0.0005, "step": 300200 }, { "epoch": 1.9750268086814076, "grad_norm": 0.03052219378154497, "learning_rate": 4.7487066909740475e-09, "loss": 0.0005, "step": 300210 }, { "epoch": 1.975092596856641, "grad_norm": 0.004797138085537432, "learning_rate": 4.7237240517084005e-09, "loss": 0.0003, "step": 300220 }, { "epoch": 1.9751583850318744, "grad_norm": 0.026069349557437494, "learning_rate": 4.6988072708459285e-09, "loss": 0.0008, "step": 300230 }, { "epoch": 1.9752241732071076, "grad_norm": 0.046394230286267345, "learning_rate": 4.673956348715813e-09, "loss": 0.0003, "step": 300240 }, { "epoch": 1.9752899613823411, "grad_norm": 0.020348702962112584, "learning_rate": 4.649171285645016e-09, "loss": 0.0004, "step": 300250 }, { "epoch": 1.9753557495575746, "grad_norm": 0.047403860690650036, "learning_rate": 4.62445208196105e-09, "loss": 0.0004, "step": 300260 }, { "epoch": 1.975421537732808, "grad_norm": 0.11091350234967556, "learning_rate": 4.599798737988659e-09, "loss": 0.0011, "step": 300270 }, { "epoch": 1.9754873259080412, "grad_norm": 0.010267026741442817, "learning_rate": 4.575211254054246e-09, "loss": 0.0002, "step": 300280 }, { "epoch": 1.9755531140832747, "grad_norm": 0.014348205404076761, "learning_rate": 4.550689630480887e-09, "loss": 0.0013, "step": 300290 }, { "epoch": 1.975618902258508, "grad_norm": 0.053220974915484213, "learning_rate": 4.526233867592766e-09, "loss": 0.0004, "step": 300300 }, { "epoch": 1.9756846904337415, "grad_norm": 0.04025849701461795, "learning_rate": 4.501843965711849e-09, "loss": 0.0006, "step": 300310 }, { "epoch": 1.9757504786089748, "grad_norm": 0.03310653704204917, "learning_rate": 4.477519925159546e-09, "loss": 0.0004, "step": 300320 }, { "epoch": 1.975816266784208, "grad_norm": 0.008218885972788591, "learning_rate": 4.453261746256709e-09, "loss": 0.0003, "step": 300330 }, { "epoch": 1.9758820549594416, "grad_norm": 0.02526064707469575, "learning_rate": 4.4290694293230855e-09, "loss": 0.0002, "step": 300340 }, { "epoch": 1.975947843134675, "grad_norm": 0.027281759297878697, "learning_rate": 4.404942974677862e-09, "loss": 0.0004, "step": 300350 }, { "epoch": 1.9760136313099084, "grad_norm": 0.0104147453463983, "learning_rate": 4.380882382639118e-09, "loss": 0.0007, "step": 300360 }, { "epoch": 1.9760794194851417, "grad_norm": 0.041866977866195, "learning_rate": 4.356887653523823e-09, "loss": 0.0005, "step": 300370 }, { "epoch": 1.9761452076603752, "grad_norm": 0.012461508481395624, "learning_rate": 4.332958787648389e-09, "loss": 0.0004, "step": 300380 }, { "epoch": 1.9762109958356087, "grad_norm": 0.017670284847272264, "learning_rate": 4.309095785328676e-09, "loss": 0.001, "step": 300390 }, { "epoch": 1.976276784010842, "grad_norm": 0.032350814111474606, "learning_rate": 4.285298646878877e-09, "loss": 0.0004, "step": 300400 }, { "epoch": 1.9763425721860752, "grad_norm": 0.006978669331165277, "learning_rate": 4.2615673726126296e-09, "loss": 0.0007, "step": 300410 }, { "epoch": 1.9764083603613085, "grad_norm": 0.028200171562677595, "learning_rate": 4.237901962843016e-09, "loss": 0.0005, "step": 300420 }, { "epoch": 1.976474148536542, "grad_norm": 0.03355374283249485, "learning_rate": 4.2143024178820105e-09, "loss": 0.0005, "step": 300430 }, { "epoch": 1.9765399367117755, "grad_norm": 0.0003038294470197633, "learning_rate": 4.190768738041029e-09, "loss": 0.0012, "step": 300440 }, { "epoch": 1.9766057248870088, "grad_norm": 0.016570206558226365, "learning_rate": 4.167300923629825e-09, "loss": 0.0004, "step": 300450 }, { "epoch": 1.976671513062242, "grad_norm": 0.002264543384401217, "learning_rate": 4.143898974958149e-09, "loss": 0.0004, "step": 300460 }, { "epoch": 1.9767373012374756, "grad_norm": 0.01909419798800287, "learning_rate": 4.120562892334645e-09, "loss": 0.0004, "step": 300470 }, { "epoch": 1.976803089412709, "grad_norm": 0.0026151968588072365, "learning_rate": 4.097292676066289e-09, "loss": 0.0003, "step": 300480 }, { "epoch": 1.9768688775879424, "grad_norm": 0.029027872112945705, "learning_rate": 4.074088326460613e-09, "loss": 0.0007, "step": 300490 }, { "epoch": 1.9769346657631757, "grad_norm": 0.011939518288592622, "learning_rate": 4.050949843823482e-09, "loss": 0.0004, "step": 300500 }, { "epoch": 1.977000453938409, "grad_norm": 0.012052926252500045, "learning_rate": 4.027877228460209e-09, "loss": 0.0002, "step": 300510 }, { "epoch": 1.9770662421136425, "grad_norm": 0.026872195420368943, "learning_rate": 4.004870480673884e-09, "loss": 0.0004, "step": 300520 }, { "epoch": 1.977132030288876, "grad_norm": 0.016450028209362887, "learning_rate": 3.9819296007687085e-09, "loss": 0.0004, "step": 300530 }, { "epoch": 1.9771978184641092, "grad_norm": 0.013305921662065569, "learning_rate": 3.959054589046662e-09, "loss": 0.0019, "step": 300540 }, { "epoch": 1.9772636066393425, "grad_norm": 0.00018210006388928477, "learning_rate": 3.936245445810283e-09, "loss": 0.0003, "step": 300550 }, { "epoch": 1.977329394814576, "grad_norm": 0.047270795332605926, "learning_rate": 3.913502171359329e-09, "loss": 0.0006, "step": 300560 }, { "epoch": 1.9773951829898095, "grad_norm": 0.013212199453557207, "learning_rate": 3.8908247659935614e-09, "loss": 0.0002, "step": 300570 }, { "epoch": 1.9774609711650428, "grad_norm": 0.024330379213104342, "learning_rate": 3.868213230012741e-09, "loss": 0.0004, "step": 300580 }, { "epoch": 1.977526759340276, "grad_norm": 0.0016283729700637027, "learning_rate": 3.845667563714961e-09, "loss": 0.0003, "step": 300590 }, { "epoch": 1.9775925475155096, "grad_norm": 0.0382323043248587, "learning_rate": 3.823187767396652e-09, "loss": 0.0003, "step": 300600 }, { "epoch": 1.977658335690743, "grad_norm": 0.0006699942407596046, "learning_rate": 3.800773841354244e-09, "loss": 0.0004, "step": 300610 }, { "epoch": 1.9777241238659764, "grad_norm": 0.053438948115701684, "learning_rate": 3.778425785884165e-09, "loss": 0.0004, "step": 300620 }, { "epoch": 1.9777899120412097, "grad_norm": 0.03096108469621855, "learning_rate": 3.756143601280626e-09, "loss": 0.0003, "step": 300630 }, { "epoch": 1.977855700216443, "grad_norm": 0.011588407585553608, "learning_rate": 3.733927287836725e-09, "loss": 0.0007, "step": 300640 }, { "epoch": 1.9779214883916765, "grad_norm": 0.030182172732610212, "learning_rate": 3.7117768458466707e-09, "loss": 0.0003, "step": 300650 }, { "epoch": 1.97798727656691, "grad_norm": 0.07504014830089353, "learning_rate": 3.689692275600787e-09, "loss": 0.0008, "step": 300660 }, { "epoch": 1.9780530647421433, "grad_norm": 0.00010266520483673765, "learning_rate": 3.6676735773921723e-09, "loss": 0.0004, "step": 300670 }, { "epoch": 1.9781188529173765, "grad_norm": 0.0023675179467129584, "learning_rate": 3.645720751509485e-09, "loss": 0.0006, "step": 300680 }, { "epoch": 1.97818464109261, "grad_norm": 0.014592350765365327, "learning_rate": 3.6238337982436034e-09, "loss": 0.0006, "step": 300690 }, { "epoch": 1.9782504292678436, "grad_norm": 0.005196453493116293, "learning_rate": 3.60201271788152e-09, "loss": 0.0002, "step": 300700 }, { "epoch": 1.9783162174430768, "grad_norm": 0.0032711126941139044, "learning_rate": 3.5802575107118932e-09, "loss": 0.0006, "step": 300710 }, { "epoch": 1.9783820056183101, "grad_norm": 0.004325179074504659, "learning_rate": 3.558568177021715e-09, "loss": 0.0006, "step": 300720 }, { "epoch": 1.9784477937935434, "grad_norm": 0.04199943860894292, "learning_rate": 3.5369447170963135e-09, "loss": 0.0005, "step": 300730 }, { "epoch": 1.978513581968777, "grad_norm": 0.05791255621120489, "learning_rate": 3.5153871312215704e-09, "loss": 0.0005, "step": 300740 }, { "epoch": 1.9785793701440104, "grad_norm": 0.035630686956945094, "learning_rate": 3.493895419680593e-09, "loss": 0.0021, "step": 300750 }, { "epoch": 1.9786451583192437, "grad_norm": 0.009356222607815247, "learning_rate": 3.4724695827575984e-09, "loss": 0.0009, "step": 300760 }, { "epoch": 1.978710946494477, "grad_norm": 0.023553230720283837, "learning_rate": 3.4511096207345828e-09, "loss": 0.0003, "step": 300770 }, { "epoch": 1.9787767346697105, "grad_norm": 0.026784136455358102, "learning_rate": 3.4298155338935435e-09, "loss": 0.0006, "step": 300780 }, { "epoch": 1.978842522844944, "grad_norm": 0.01158657972014868, "learning_rate": 3.4085873225153667e-09, "loss": 0.0002, "step": 300790 }, { "epoch": 1.9789083110201773, "grad_norm": 0.07658391893553472, "learning_rate": 3.387424986879273e-09, "loss": 0.0006, "step": 300800 }, { "epoch": 1.9789740991954106, "grad_norm": 0.04486300738889367, "learning_rate": 3.366328527264484e-09, "loss": 0.001, "step": 300810 }, { "epoch": 1.9790398873706438, "grad_norm": 0.0081761659986039, "learning_rate": 3.3452979439496658e-09, "loss": 0.0005, "step": 300820 }, { "epoch": 1.9791056755458774, "grad_norm": 0.011573118520620316, "learning_rate": 3.3243332372118188e-09, "loss": 0.0002, "step": 300830 }, { "epoch": 1.9791714637211109, "grad_norm": 0.046947952111050816, "learning_rate": 3.303434407326833e-09, "loss": 0.0004, "step": 300840 }, { "epoch": 1.9792372518963441, "grad_norm": 0.0366425020403032, "learning_rate": 3.2826014545705996e-09, "loss": 0.0004, "step": 300850 }, { "epoch": 1.9793030400715774, "grad_norm": 0.13023770305818957, "learning_rate": 3.2618343792178984e-09, "loss": 0.0006, "step": 300860 }, { "epoch": 1.979368828246811, "grad_norm": 0.005061541100758395, "learning_rate": 3.241133181542955e-09, "loss": 0.0001, "step": 300870 }, { "epoch": 1.9794346164220444, "grad_norm": 0.001663771053884286, "learning_rate": 3.220497861817773e-09, "loss": 0.0001, "step": 300880 }, { "epoch": 1.9795004045972777, "grad_norm": 0.002545271480998351, "learning_rate": 3.1999284203143577e-09, "loss": 0.0005, "step": 300890 }, { "epoch": 1.979566192772511, "grad_norm": 0.02646400574315936, "learning_rate": 3.179424857305269e-09, "loss": 0.0004, "step": 300900 }, { "epoch": 1.9796319809477445, "grad_norm": 0.0202434728976305, "learning_rate": 3.1589871730591803e-09, "loss": 0.0006, "step": 300910 }, { "epoch": 1.9796977691229778, "grad_norm": 0.024220943240641665, "learning_rate": 3.1386153678464315e-09, "loss": 0.0004, "step": 300920 }, { "epoch": 1.9797635572982113, "grad_norm": 0.08592026833453799, "learning_rate": 3.1183094419356963e-09, "loss": 0.0006, "step": 300930 }, { "epoch": 1.9798293454734446, "grad_norm": 0.041975861644048, "learning_rate": 3.0980693955945383e-09, "loss": 0.0004, "step": 300940 }, { "epoch": 1.9798951336486779, "grad_norm": 0.004621531960895337, "learning_rate": 3.0778952290894114e-09, "loss": 0.0001, "step": 300950 }, { "epoch": 1.9799609218239114, "grad_norm": 0.008068476725148979, "learning_rate": 3.057786942686769e-09, "loss": 0.0006, "step": 300960 }, { "epoch": 1.9800267099991449, "grad_norm": 0.011614585839543605, "learning_rate": 3.037744536651399e-09, "loss": 0.0002, "step": 300970 }, { "epoch": 1.9800924981743782, "grad_norm": 0.04634431929796833, "learning_rate": 3.01776801124809e-09, "loss": 0.0006, "step": 300980 }, { "epoch": 1.9801582863496114, "grad_norm": 0.03547807451189444, "learning_rate": 2.9978573667394094e-09, "loss": 0.0003, "step": 300990 }, { "epoch": 1.980224074524845, "grad_norm": 0.1206455150315447, "learning_rate": 2.9780126033884806e-09, "loss": 0.0008, "step": 301000 }, { "epoch": 1.9802898627000785, "grad_norm": 0.1514564828406448, "learning_rate": 2.9582337214573153e-09, "loss": 0.0004, "step": 301010 }, { "epoch": 1.9803556508753117, "grad_norm": 0.04161740294581504, "learning_rate": 2.9385207212051515e-09, "loss": 0.0005, "step": 301020 }, { "epoch": 1.980421439050545, "grad_norm": 0.05027952215056446, "learning_rate": 2.918873602893446e-09, "loss": 0.0003, "step": 301030 }, { "epoch": 1.9804872272257783, "grad_norm": 0.047563376046644096, "learning_rate": 2.8992923667803263e-09, "loss": 0.0005, "step": 301040 }, { "epoch": 1.9805530154010118, "grad_norm": 0.024767983117531343, "learning_rate": 2.879777013124474e-09, "loss": 0.0007, "step": 301050 }, { "epoch": 1.9806188035762453, "grad_norm": 0.03618770948189522, "learning_rate": 2.8603275421829056e-09, "loss": 0.0003, "step": 301060 }, { "epoch": 1.9806845917514786, "grad_norm": 0.017767545755126603, "learning_rate": 2.840943954212083e-09, "loss": 0.0004, "step": 301070 }, { "epoch": 1.9807503799267119, "grad_norm": 0.027040466398675557, "learning_rate": 2.8216262494673573e-09, "loss": 0.0004, "step": 301080 }, { "epoch": 1.9808161681019454, "grad_norm": 0.009484337374314892, "learning_rate": 2.80237442820408e-09, "loss": 0.0004, "step": 301090 }, { "epoch": 1.980881956277179, "grad_norm": 0.004340009288784707, "learning_rate": 2.7831884906753813e-09, "loss": 0.0004, "step": 301100 }, { "epoch": 1.9809477444524122, "grad_norm": 0.03558011951163537, "learning_rate": 2.764068437134393e-09, "loss": 0.0003, "step": 301110 }, { "epoch": 1.9810135326276455, "grad_norm": 0.03128256134606122, "learning_rate": 2.745014267833135e-09, "loss": 0.0006, "step": 301120 }, { "epoch": 1.9810793208028787, "grad_norm": 0.0012277831897923252, "learning_rate": 2.726025983023628e-09, "loss": 0.0003, "step": 301130 }, { "epoch": 1.9811451089781122, "grad_norm": 0.0805660726672594, "learning_rate": 2.7071035829545623e-09, "loss": 0.0003, "step": 301140 }, { "epoch": 1.9812108971533458, "grad_norm": 0.024653994606124333, "learning_rate": 2.688247067877403e-09, "loss": 0.0003, "step": 301150 }, { "epoch": 1.981276685328579, "grad_norm": 0.006251040848470106, "learning_rate": 2.6694564380391752e-09, "loss": 0.0003, "step": 301160 }, { "epoch": 1.9813424735038123, "grad_norm": 0.04410241516528103, "learning_rate": 2.6507316936880136e-09, "loss": 0.0005, "step": 301170 }, { "epoch": 1.9814082616790458, "grad_norm": 0.03904481882782283, "learning_rate": 2.6320728350714977e-09, "loss": 0.0006, "step": 301180 }, { "epoch": 1.9814740498542793, "grad_norm": 0.03820282890205537, "learning_rate": 2.6134798624344316e-09, "loss": 0.0003, "step": 301190 }, { "epoch": 1.9815398380295126, "grad_norm": 0.028515423014034924, "learning_rate": 2.594952776023285e-09, "loss": 0.0009, "step": 301200 }, { "epoch": 1.981605626204746, "grad_norm": 0.00410974010448848, "learning_rate": 2.576491576081197e-09, "loss": 0.0003, "step": 301210 }, { "epoch": 1.9816714143799792, "grad_norm": 0.050477362812759406, "learning_rate": 2.5580962628524165e-09, "loss": 0.0004, "step": 301220 }, { "epoch": 1.9817372025552127, "grad_norm": 0.020522011240475355, "learning_rate": 2.539766836578417e-09, "loss": 0.0005, "step": 301230 }, { "epoch": 1.9818029907304462, "grad_norm": 0.039015334322384146, "learning_rate": 2.5215032975023367e-09, "loss": 0.0004, "step": 301240 }, { "epoch": 1.9818687789056795, "grad_norm": 0.009988916691074982, "learning_rate": 2.5033056458634298e-09, "loss": 0.0005, "step": 301250 }, { "epoch": 1.9819345670809128, "grad_norm": 0.023837336131574315, "learning_rate": 2.4851738819026138e-09, "loss": 0.0002, "step": 301260 }, { "epoch": 1.9820003552561463, "grad_norm": 0.041319153593798216, "learning_rate": 2.4671080058585873e-09, "loss": 0.0006, "step": 301270 }, { "epoch": 1.9820661434313798, "grad_norm": 0.017726408780826364, "learning_rate": 2.4491080179694928e-09, "loss": 0.0004, "step": 301280 }, { "epoch": 1.982131931606613, "grad_norm": 0.005961156707129588, "learning_rate": 2.4311739184729176e-09, "loss": 0.0003, "step": 301290 }, { "epoch": 1.9821977197818463, "grad_norm": 0.011698692307320778, "learning_rate": 2.41330570760534e-09, "loss": 0.0004, "step": 301300 }, { "epoch": 1.9822635079570798, "grad_norm": 0.003884779900489133, "learning_rate": 2.395503385601572e-09, "loss": 0.0005, "step": 301310 }, { "epoch": 1.9823292961323131, "grad_norm": 0.09482614713499592, "learning_rate": 2.3777669526969806e-09, "loss": 0.0005, "step": 301320 }, { "epoch": 1.9823950843075466, "grad_norm": 0.0003687914372912607, "learning_rate": 2.3600964091258228e-09, "loss": 0.0002, "step": 301330 }, { "epoch": 1.98246087248278, "grad_norm": 0.0023051944099781972, "learning_rate": 2.3424917551201355e-09, "loss": 0.0005, "step": 301340 }, { "epoch": 1.9825266606580132, "grad_norm": 0.030914635062289465, "learning_rate": 2.3249529909125103e-09, "loss": 0.0006, "step": 301350 }, { "epoch": 1.9825924488332467, "grad_norm": 0.012820180339858026, "learning_rate": 2.3074801167338733e-09, "loss": 0.0003, "step": 301360 }, { "epoch": 1.9826582370084802, "grad_norm": 0.06262444590354624, "learning_rate": 2.2900731328151514e-09, "loss": 0.0006, "step": 301370 }, { "epoch": 1.9827240251837135, "grad_norm": 0.036858445747611344, "learning_rate": 2.2727320393856057e-09, "loss": 0.0003, "step": 301380 }, { "epoch": 1.9827898133589468, "grad_norm": 0.11566185935499096, "learning_rate": 2.2554568366733864e-09, "loss": 0.0006, "step": 301390 }, { "epoch": 1.9828556015341803, "grad_norm": 0.005000081800260514, "learning_rate": 2.2382475249072e-09, "loss": 0.0006, "step": 301400 }, { "epoch": 1.9829213897094138, "grad_norm": 0.014229651104564072, "learning_rate": 2.2211041043129766e-09, "loss": 0.0008, "step": 301410 }, { "epoch": 1.982987177884647, "grad_norm": 0.05883453084858275, "learning_rate": 2.204026575117202e-09, "loss": 0.0003, "step": 301420 }, { "epoch": 1.9830529660598804, "grad_norm": 0.033482132860072594, "learning_rate": 2.187014937544696e-09, "loss": 0.0005, "step": 301430 }, { "epoch": 1.9831187542351136, "grad_norm": 0.01514686748363834, "learning_rate": 2.170069191820279e-09, "loss": 0.0002, "step": 301440 }, { "epoch": 1.9831845424103471, "grad_norm": 0.01632546537607077, "learning_rate": 2.1531893381671055e-09, "loss": 0.0005, "step": 301450 }, { "epoch": 1.9832503305855806, "grad_norm": 0.014351053348090309, "learning_rate": 2.1363753768072203e-09, "loss": 0.0001, "step": 301460 }, { "epoch": 1.983316118760814, "grad_norm": 0.04978083841659297, "learning_rate": 2.1196273079632235e-09, "loss": 0.0003, "step": 301470 }, { "epoch": 1.9833819069360472, "grad_norm": 0.09838249119203439, "learning_rate": 2.102945131855494e-09, "loss": 0.0007, "step": 301480 }, { "epoch": 1.9834476951112807, "grad_norm": 0.009816387159775326, "learning_rate": 2.0863288487038558e-09, "loss": 0.0003, "step": 301490 }, { "epoch": 1.9835134832865142, "grad_norm": 0.0034806026501417115, "learning_rate": 2.0697784587275783e-09, "loss": 0.0018, "step": 301500 }, { "epoch": 1.9835792714617475, "grad_norm": 0.000730333039725807, "learning_rate": 2.05329396214482e-09, "loss": 0.0006, "step": 301510 }, { "epoch": 1.9836450596369808, "grad_norm": 0.011749133591411106, "learning_rate": 2.0368753591731847e-09, "loss": 0.0002, "step": 301520 }, { "epoch": 1.983710847812214, "grad_norm": 0.18543723980160237, "learning_rate": 2.0205226500280563e-09, "loss": 0.0019, "step": 301530 }, { "epoch": 1.9837766359874476, "grad_norm": 0.0006041766321197334, "learning_rate": 2.0042358349264825e-09, "loss": 0.0003, "step": 301540 }, { "epoch": 1.983842424162681, "grad_norm": 0.005638036056996234, "learning_rate": 1.988014914082181e-09, "loss": 0.0006, "step": 301550 }, { "epoch": 1.9839082123379144, "grad_norm": 0.03659792312835433, "learning_rate": 1.9718598877094264e-09, "loss": 0.0003, "step": 301560 }, { "epoch": 1.9839740005131477, "grad_norm": 0.033054073132626406, "learning_rate": 1.955770756020825e-09, "loss": 0.0005, "step": 301570 }, { "epoch": 1.9840397886883812, "grad_norm": 0.11876867924211029, "learning_rate": 1.93974751922954e-09, "loss": 0.0006, "step": 301580 }, { "epoch": 1.9841055768636147, "grad_norm": 0.005349277538502052, "learning_rate": 1.9237901775454036e-09, "loss": 0.0004, "step": 301590 }, { "epoch": 1.984171365038848, "grad_norm": 0.03034707252878646, "learning_rate": 1.9078987311793585e-09, "loss": 0.0002, "step": 301600 }, { "epoch": 1.9842371532140812, "grad_norm": 0.01728558512886943, "learning_rate": 1.892073180341791e-09, "loss": 0.0003, "step": 301610 }, { "epoch": 1.9843029413893147, "grad_norm": 0.012662649930139611, "learning_rate": 1.876313525239759e-09, "loss": 0.0004, "step": 301620 }, { "epoch": 1.984368729564548, "grad_norm": 0.09385039676508193, "learning_rate": 1.8606197660825387e-09, "loss": 0.0008, "step": 301630 }, { "epoch": 1.9844345177397815, "grad_norm": 0.0398348346108481, "learning_rate": 1.8449919030760765e-09, "loss": 0.0004, "step": 301640 }, { "epoch": 1.9845003059150148, "grad_norm": 0.001113398566893334, "learning_rate": 1.829429936426319e-09, "loss": 0.0015, "step": 301650 }, { "epoch": 1.984566094090248, "grad_norm": 0.044686017140623036, "learning_rate": 1.8139338663392126e-09, "loss": 0.0003, "step": 301660 }, { "epoch": 1.9846318822654816, "grad_norm": 0.002099864405773155, "learning_rate": 1.798503693019038e-09, "loss": 0.0002, "step": 301670 }, { "epoch": 1.984697670440715, "grad_norm": 0.07865501595517069, "learning_rate": 1.7831394166684112e-09, "loss": 0.0005, "step": 301680 }, { "epoch": 1.9847634586159484, "grad_norm": 0.04942769007774578, "learning_rate": 1.767841037490503e-09, "loss": 0.0002, "step": 301690 }, { "epoch": 1.9848292467911817, "grad_norm": 0.0005204687647669632, "learning_rate": 1.752608555686819e-09, "loss": 0.0006, "step": 301700 }, { "epoch": 1.9848950349664152, "grad_norm": 0.003190685572901695, "learning_rate": 1.7374419714577539e-09, "loss": 0.0004, "step": 301710 }, { "epoch": 1.9849608231416487, "grad_norm": 0.07368884580787591, "learning_rate": 1.7223412850042586e-09, "loss": 0.0012, "step": 301720 }, { "epoch": 1.985026611316882, "grad_norm": 0.017957622828253056, "learning_rate": 1.707306496525063e-09, "loss": 0.0011, "step": 301730 }, { "epoch": 1.9850923994921152, "grad_norm": 0.007754175586987599, "learning_rate": 1.6923376062177866e-09, "loss": 0.0004, "step": 301740 }, { "epoch": 1.9851581876673485, "grad_norm": 0.026064790702271058, "learning_rate": 1.6774346142806042e-09, "loss": 0.0004, "step": 301750 }, { "epoch": 1.985223975842582, "grad_norm": 7.934165631056789e-05, "learning_rate": 1.6625975209094703e-09, "loss": 0.0005, "step": 301760 }, { "epoch": 1.9852897640178155, "grad_norm": 0.00040419741935302666, "learning_rate": 1.647826326299784e-09, "loss": 0.0002, "step": 301770 }, { "epoch": 1.9853555521930488, "grad_norm": 0.027437085300840734, "learning_rate": 1.6331210306469447e-09, "loss": 0.0005, "step": 301780 }, { "epoch": 1.985421340368282, "grad_norm": 0.010704838718950059, "learning_rate": 1.6184816341446863e-09, "loss": 0.0004, "step": 301790 }, { "epoch": 1.9854871285435156, "grad_norm": 0.03909205956383649, "learning_rate": 1.6039081369856323e-09, "loss": 0.0002, "step": 301800 }, { "epoch": 1.9855529167187491, "grad_norm": 0.002024387897289444, "learning_rate": 1.5894005393624067e-09, "loss": 0.0005, "step": 301810 }, { "epoch": 1.9856187048939824, "grad_norm": 0.0245483390359688, "learning_rate": 1.5749588414654126e-09, "loss": 0.0004, "step": 301820 }, { "epoch": 1.9856844930692157, "grad_norm": 0.04017063755068691, "learning_rate": 1.5605830434861635e-09, "loss": 0.0003, "step": 301830 }, { "epoch": 1.985750281244449, "grad_norm": 0.012506192346874495, "learning_rate": 1.5462731456139524e-09, "loss": 0.0002, "step": 301840 }, { "epoch": 1.9858160694196825, "grad_norm": 0.03923909511971375, "learning_rate": 1.5320291480364069e-09, "loss": 0.0004, "step": 301850 }, { "epoch": 1.985881857594916, "grad_norm": 0.00016083922495529077, "learning_rate": 1.51785105094282e-09, "loss": 0.0004, "step": 301860 }, { "epoch": 1.9859476457701493, "grad_norm": 0.00386278390247412, "learning_rate": 1.5037388545191545e-09, "loss": 0.0003, "step": 301870 }, { "epoch": 1.9860134339453825, "grad_norm": 0.0027429092888161272, "learning_rate": 1.4896925589513723e-09, "loss": 0.0004, "step": 301880 }, { "epoch": 1.986079222120616, "grad_norm": 0.02352875192050827, "learning_rate": 1.4757121644254357e-09, "loss": 0.0006, "step": 301890 }, { "epoch": 1.9861450102958496, "grad_norm": 0.00012995804990568143, "learning_rate": 1.461797671125087e-09, "loss": 0.0002, "step": 301900 }, { "epoch": 1.9862107984710828, "grad_norm": 0.020317702524623917, "learning_rate": 1.4479490792340678e-09, "loss": 0.0002, "step": 301910 }, { "epoch": 1.9862765866463161, "grad_norm": 0.006684252910762579, "learning_rate": 1.4341663889344548e-09, "loss": 0.0007, "step": 301920 }, { "epoch": 1.9863423748215496, "grad_norm": 0.008989752911965805, "learning_rate": 1.4204496004088798e-09, "loss": 0.0004, "step": 301930 }, { "epoch": 1.986408162996783, "grad_norm": 0.009936644214722368, "learning_rate": 1.4067987138371986e-09, "loss": 0.0002, "step": 301940 }, { "epoch": 1.9864739511720164, "grad_norm": 0.0007678591489003112, "learning_rate": 1.3932137293998226e-09, "loss": 0.0007, "step": 301950 }, { "epoch": 1.9865397393472497, "grad_norm": 0.031162336770049075, "learning_rate": 1.379694647275498e-09, "loss": 0.0003, "step": 301960 }, { "epoch": 1.986605527522483, "grad_norm": 0.005591496320483888, "learning_rate": 1.3662414676435254e-09, "loss": 0.0004, "step": 301970 }, { "epoch": 1.9866713156977165, "grad_norm": 0.032361106047368526, "learning_rate": 1.3528541906798752e-09, "loss": 0.0004, "step": 301980 }, { "epoch": 1.98673710387295, "grad_norm": 0.006758337537887447, "learning_rate": 1.3395328165621836e-09, "loss": 0.0027, "step": 301990 }, { "epoch": 1.9868028920481833, "grad_norm": 5.73807164195751e-05, "learning_rate": 1.326277345465865e-09, "loss": 0.0002, "step": 302000 }, { "epoch": 1.9868686802234166, "grad_norm": 0.007364819590046853, "learning_rate": 1.31308777756467e-09, "loss": 0.0006, "step": 302010 }, { "epoch": 1.98693446839865, "grad_norm": 0.009558309931085076, "learning_rate": 1.2999641130340135e-09, "loss": 0.0005, "step": 302020 }, { "epoch": 1.9870002565738836, "grad_norm": 0.0020822909594177884, "learning_rate": 1.28690635204598e-09, "loss": 0.0002, "step": 302030 }, { "epoch": 1.9870660447491169, "grad_norm": 0.07680591427143373, "learning_rate": 1.2739144947726544e-09, "loss": 0.0007, "step": 302040 }, { "epoch": 1.9871318329243501, "grad_norm": 0.006784880242468111, "learning_rate": 1.2609885413855661e-09, "loss": 0.0002, "step": 302050 }, { "epoch": 1.9871976210995834, "grad_norm": 0.03272762838865371, "learning_rate": 1.248128492055689e-09, "loss": 0.0003, "step": 302060 }, { "epoch": 1.987263409274817, "grad_norm": 0.013508319180402467, "learning_rate": 1.2353343469512224e-09, "loss": 0.0003, "step": 302070 }, { "epoch": 1.9873291974500504, "grad_norm": 0.04937387287690741, "learning_rate": 1.2226061062420302e-09, "loss": 0.0005, "step": 302080 }, { "epoch": 1.9873949856252837, "grad_norm": 0.046705679416513636, "learning_rate": 1.2099437700957562e-09, "loss": 0.0005, "step": 302090 }, { "epoch": 1.987460773800517, "grad_norm": 0.041079616839317705, "learning_rate": 1.1973473386789336e-09, "loss": 0.0004, "step": 302100 }, { "epoch": 1.9875265619757505, "grad_norm": 0.0010585053148526595, "learning_rate": 1.1848168121580961e-09, "loss": 0.0009, "step": 302110 }, { "epoch": 1.987592350150984, "grad_norm": 0.0410570842612659, "learning_rate": 1.1723521906975565e-09, "loss": 0.0003, "step": 302120 }, { "epoch": 1.9876581383262173, "grad_norm": 0.03315914409015619, "learning_rate": 1.1599534744627383e-09, "loss": 0.0004, "step": 302130 }, { "epoch": 1.9877239265014506, "grad_norm": 0.01771955875524435, "learning_rate": 1.1476206636168442e-09, "loss": 0.0008, "step": 302140 }, { "epoch": 1.9877897146766839, "grad_norm": 0.004177777995840009, "learning_rate": 1.1353537583219665e-09, "loss": 0.0004, "step": 302150 }, { "epoch": 1.9878555028519174, "grad_norm": 0.04848524101184738, "learning_rate": 1.123152758740198e-09, "loss": 0.0005, "step": 302160 }, { "epoch": 1.9879212910271509, "grad_norm": 0.06600791779259763, "learning_rate": 1.111017665032521e-09, "loss": 0.0005, "step": 302170 }, { "epoch": 1.9879870792023842, "grad_norm": 0.022277151045723247, "learning_rate": 1.0989484773588076e-09, "loss": 0.0005, "step": 302180 }, { "epoch": 1.9880528673776174, "grad_norm": 0.02430544605174023, "learning_rate": 1.0869451958778198e-09, "loss": 0.0001, "step": 302190 }, { "epoch": 1.988118655552851, "grad_norm": 0.00849195288552488, "learning_rate": 1.0750078207483195e-09, "loss": 0.0002, "step": 302200 }, { "epoch": 1.9881844437280844, "grad_norm": 0.0004993106205748001, "learning_rate": 1.063136352127403e-09, "loss": 0.0002, "step": 302210 }, { "epoch": 1.9882502319033177, "grad_norm": 0.0371337831975514, "learning_rate": 1.0513307901716118e-09, "loss": 0.0005, "step": 302220 }, { "epoch": 1.988316020078551, "grad_norm": 0.04847354594112723, "learning_rate": 1.0395911350363775e-09, "loss": 0.0005, "step": 302230 }, { "epoch": 1.9883818082537843, "grad_norm": 0.033933969632388206, "learning_rate": 1.0279173868771308e-09, "loss": 0.0005, "step": 302240 }, { "epoch": 1.9884475964290178, "grad_norm": 0.012743799837574019, "learning_rate": 1.016309545847638e-09, "loss": 0.0002, "step": 302250 }, { "epoch": 1.9885133846042513, "grad_norm": 0.0389867953764689, "learning_rate": 1.0047676120999993e-09, "loss": 0.0002, "step": 302260 }, { "epoch": 1.9885791727794846, "grad_norm": 0.08056829846505457, "learning_rate": 9.93291585787981e-10, "loss": 0.0004, "step": 302270 }, { "epoch": 1.9886449609547179, "grad_norm": 0.0009501515631015385, "learning_rate": 9.818814670614629e-10, "loss": 0.0003, "step": 302280 }, { "epoch": 1.9887107491299514, "grad_norm": 0.005934451358264444, "learning_rate": 9.705372560714354e-10, "loss": 0.0003, "step": 302290 }, { "epoch": 1.9887765373051849, "grad_norm": 0.026205735491350503, "learning_rate": 9.592589529672236e-10, "loss": 0.0003, "step": 302300 }, { "epoch": 1.9888423254804182, "grad_norm": 0.027756079411481536, "learning_rate": 9.480465578981523e-10, "loss": 0.0004, "step": 302310 }, { "epoch": 1.9889081136556515, "grad_norm": 0.03884604142714791, "learning_rate": 9.369000710113264e-10, "loss": 0.0003, "step": 302320 }, { "epoch": 1.988973901830885, "grad_norm": 0.0004260645279380607, "learning_rate": 9.258194924538499e-10, "loss": 0.0007, "step": 302330 }, { "epoch": 1.9890396900061185, "grad_norm": 0.0013284800885976376, "learning_rate": 9.148048223722728e-10, "loss": 0.0008, "step": 302340 }, { "epoch": 1.9891054781813517, "grad_norm": 0.0030230259802807966, "learning_rate": 9.038560609114788e-10, "loss": 0.0009, "step": 302350 }, { "epoch": 1.989171266356585, "grad_norm": 0.007723592013835471, "learning_rate": 8.929732082157972e-10, "loss": 0.0005, "step": 302360 }, { "epoch": 1.9892370545318183, "grad_norm": 0.008405200971652382, "learning_rate": 8.821562644284465e-10, "loss": 0.0003, "step": 302370 }, { "epoch": 1.9893028427070518, "grad_norm": 0.08080338858211916, "learning_rate": 8.714052296926456e-10, "loss": 0.0008, "step": 302380 }, { "epoch": 1.9893686308822853, "grad_norm": 0.024317159388623968, "learning_rate": 8.60720104149948e-10, "loss": 0.0006, "step": 302390 }, { "epoch": 1.9894344190575186, "grad_norm": 0.013569895159134055, "learning_rate": 8.501008879407969e-10, "loss": 0.0005, "step": 302400 }, { "epoch": 1.989500207232752, "grad_norm": 0.06329686174234671, "learning_rate": 8.395475812061904e-10, "loss": 0.0004, "step": 302410 }, { "epoch": 1.9895659954079854, "grad_norm": 0.008418019240462997, "learning_rate": 8.290601840837964e-10, "loss": 0.0002, "step": 302420 }, { "epoch": 1.989631783583219, "grad_norm": 0.006205070272006467, "learning_rate": 8.186386967129478e-10, "loss": 0.0002, "step": 302430 }, { "epoch": 1.9896975717584522, "grad_norm": 0.0033827172788826925, "learning_rate": 8.082831192307572e-10, "loss": 0.0002, "step": 302440 }, { "epoch": 1.9897633599336855, "grad_norm": 0.01612129581712905, "learning_rate": 7.979934517737819e-10, "loss": 0.0006, "step": 302450 }, { "epoch": 1.9898291481089188, "grad_norm": 0.0006958355053493594, "learning_rate": 7.877696944780245e-10, "loss": 0.0005, "step": 302460 }, { "epoch": 1.9898949362841523, "grad_norm": 8.225904241791418e-05, "learning_rate": 7.776118474772665e-10, "loss": 0.0005, "step": 302470 }, { "epoch": 1.9899607244593858, "grad_norm": 0.00893529714526345, "learning_rate": 7.675199109069553e-10, "loss": 0.0002, "step": 302480 }, { "epoch": 1.990026512634619, "grad_norm": 0.026435296910380383, "learning_rate": 7.574938848986524e-10, "loss": 0.0012, "step": 302490 }, { "epoch": 1.9900923008098523, "grad_norm": 0.06162118365785481, "learning_rate": 7.475337695850293e-10, "loss": 0.0005, "step": 302500 }, { "epoch": 1.9901580889850858, "grad_norm": 0.02533442946827093, "learning_rate": 7.376395650976475e-10, "loss": 0.0005, "step": 302510 }, { "epoch": 1.9902238771603193, "grad_norm": 0.07633617841057229, "learning_rate": 7.278112715669583e-10, "loss": 0.0005, "step": 302520 }, { "epoch": 1.9902896653355526, "grad_norm": 0.007674117833321551, "learning_rate": 7.180488891228576e-10, "loss": 0.0002, "step": 302530 }, { "epoch": 1.990355453510786, "grad_norm": 0.01779216918826711, "learning_rate": 7.083524178930212e-10, "loss": 0.0009, "step": 302540 }, { "epoch": 1.9904212416860192, "grad_norm": 0.057242452735042, "learning_rate": 6.987218580062349e-10, "loss": 0.0004, "step": 302550 }, { "epoch": 1.9904870298612527, "grad_norm": 0.03367212325854554, "learning_rate": 6.891572095885091e-10, "loss": 0.0004, "step": 302560 }, { "epoch": 1.9905528180364862, "grad_norm": 0.015362223398556057, "learning_rate": 6.796584727669641e-10, "loss": 0.0004, "step": 302570 }, { "epoch": 1.9906186062117195, "grad_norm": 0.04425622479234298, "learning_rate": 6.702256476665004e-10, "loss": 0.0004, "step": 302580 }, { "epoch": 1.9906843943869528, "grad_norm": 0.0082687237734726, "learning_rate": 6.608587344114625e-10, "loss": 0.0003, "step": 302590 }, { "epoch": 1.9907501825621863, "grad_norm": 0.014545184562477156, "learning_rate": 6.515577331250856e-10, "loss": 0.0003, "step": 302600 }, { "epoch": 1.9908159707374198, "grad_norm": 0.05247552840367139, "learning_rate": 6.423226439306041e-10, "loss": 0.0004, "step": 302610 }, { "epoch": 1.990881758912653, "grad_norm": 0.005424308426664923, "learning_rate": 6.331534669490325e-10, "loss": 0.0005, "step": 302620 }, { "epoch": 1.9909475470878863, "grad_norm": 0.0006619218414203871, "learning_rate": 6.24050202301385e-10, "loss": 0.0004, "step": 302630 }, { "epoch": 1.9910133352631199, "grad_norm": 0.03032023878475849, "learning_rate": 6.15012850108121e-10, "loss": 0.0004, "step": 302640 }, { "epoch": 1.9910791234383531, "grad_norm": 0.04802928812201368, "learning_rate": 6.060414104885892e-10, "loss": 0.0003, "step": 302650 }, { "epoch": 1.9911449116135866, "grad_norm": 0.018936614488698687, "learning_rate": 5.971358835599184e-10, "loss": 0.0004, "step": 302660 }, { "epoch": 1.99121069978882, "grad_norm": 0.0058819235291527276, "learning_rate": 5.882962694409023e-10, "loss": 0.0008, "step": 302670 }, { "epoch": 1.9912764879640532, "grad_norm": 0.028913701893968257, "learning_rate": 5.795225682470041e-10, "loss": 0.0012, "step": 302680 }, { "epoch": 1.9913422761392867, "grad_norm": 0.011748346157832702, "learning_rate": 5.70814780094242e-10, "loss": 0.0002, "step": 302690 }, { "epoch": 1.9914080643145202, "grad_norm": 0.026481556530620037, "learning_rate": 5.621729050975244e-10, "loss": 0.0004, "step": 302700 }, { "epoch": 1.9914738524897535, "grad_norm": 0.006032107383662685, "learning_rate": 5.535969433712041e-10, "loss": 0.0003, "step": 302710 }, { "epoch": 1.9915396406649868, "grad_norm": 0.000395024861166198, "learning_rate": 5.450868950274135e-10, "loss": 0.0004, "step": 302720 }, { "epoch": 1.9916054288402203, "grad_norm": 0.00037229593483809536, "learning_rate": 5.366427601788404e-10, "loss": 0.0007, "step": 302730 }, { "epoch": 1.9916712170154538, "grad_norm": 0.03854313074313611, "learning_rate": 5.282645389370622e-10, "loss": 0.0013, "step": 302740 }, { "epoch": 1.991737005190687, "grad_norm": 0.05861359015914732, "learning_rate": 5.19952231412546e-10, "loss": 0.0007, "step": 302750 }, { "epoch": 1.9918027933659204, "grad_norm": 0.031959830951855804, "learning_rate": 5.117058377140938e-10, "loss": 0.0006, "step": 302760 }, { "epoch": 1.9918685815411536, "grad_norm": 0.026021882703563363, "learning_rate": 5.035253579510624e-10, "loss": 0.0003, "step": 302770 }, { "epoch": 1.9919343697163872, "grad_norm": 0.012593854839851558, "learning_rate": 4.954107922316986e-10, "loss": 0.0002, "step": 302780 }, { "epoch": 1.9920001578916207, "grad_norm": 0.01668308053227257, "learning_rate": 4.873621406620288e-10, "loss": 0.0007, "step": 302790 }, { "epoch": 1.992065946066854, "grad_norm": 0.027488944827273768, "learning_rate": 4.793794033486343e-10, "loss": 0.0002, "step": 302800 }, { "epoch": 1.9921317342420872, "grad_norm": 0.02211884558687292, "learning_rate": 4.714625803964312e-10, "loss": 0.0001, "step": 302810 }, { "epoch": 1.9921975224173207, "grad_norm": 0.03969998618617182, "learning_rate": 4.636116719108907e-10, "loss": 0.0003, "step": 302820 }, { "epoch": 1.9922633105925542, "grad_norm": 0.04019331113900685, "learning_rate": 4.5582667799415336e-10, "loss": 0.0008, "step": 302830 }, { "epoch": 1.9923290987677875, "grad_norm": 0.03397879796130661, "learning_rate": 4.481075987494699e-10, "loss": 0.0008, "step": 302840 }, { "epoch": 1.9923948869430208, "grad_norm": 0.002772806149818966, "learning_rate": 4.404544342789807e-10, "loss": 0.0005, "step": 302850 }, { "epoch": 1.992460675118254, "grad_norm": 0.011788274783419764, "learning_rate": 4.3286718468316113e-10, "loss": 0.0006, "step": 302860 }, { "epoch": 1.9925264632934876, "grad_norm": 0.013761009526073356, "learning_rate": 4.2534585006193117e-10, "loss": 0.0005, "step": 302870 }, { "epoch": 1.992592251468721, "grad_norm": 0.026996089476274878, "learning_rate": 4.1789043051465585e-10, "loss": 0.0003, "step": 302880 }, { "epoch": 1.9926580396439544, "grad_norm": 0.0648225791178299, "learning_rate": 4.105009261390347e-10, "loss": 0.0002, "step": 302890 }, { "epoch": 1.9927238278191877, "grad_norm": 0.00209133288418637, "learning_rate": 4.031773370338776e-10, "loss": 0.001, "step": 302900 }, { "epoch": 1.9927896159944212, "grad_norm": 0.023306572492392017, "learning_rate": 3.959196632941087e-10, "loss": 0.0004, "step": 302910 }, { "epoch": 1.9928554041696547, "grad_norm": 0.05001112783612275, "learning_rate": 3.887279050168724e-10, "loss": 0.0009, "step": 302920 }, { "epoch": 1.992921192344888, "grad_norm": 0.01018801884766233, "learning_rate": 3.816020622959826e-10, "loss": 0.0005, "step": 302930 }, { "epoch": 1.9929869805201212, "grad_norm": 0.001264440202820144, "learning_rate": 3.745421352258083e-10, "loss": 0.0002, "step": 302940 }, { "epoch": 1.9930527686953547, "grad_norm": 0.022748063569340865, "learning_rate": 3.6754812389960815e-10, "loss": 0.0007, "step": 302950 }, { "epoch": 1.993118556870588, "grad_norm": 0.006745774341208887, "learning_rate": 3.606200284089756e-10, "loss": 0.0005, "step": 302960 }, { "epoch": 1.9931843450458215, "grad_norm": 0.04325724176801005, "learning_rate": 3.5375784884550403e-10, "loss": 0.0005, "step": 302970 }, { "epoch": 1.9932501332210548, "grad_norm": 0.006458327660550181, "learning_rate": 3.469615852996766e-10, "loss": 0.0002, "step": 302980 }, { "epoch": 1.993315921396288, "grad_norm": 0.020403361494128216, "learning_rate": 3.402312378614214e-10, "loss": 0.0003, "step": 302990 }, { "epoch": 1.9933817095715216, "grad_norm": 0.00934247369096422, "learning_rate": 3.335668066190012e-10, "loss": 0.0005, "step": 303000 }, { "epoch": 1.9934474977467551, "grad_norm": 0.04435347238775093, "learning_rate": 3.269682916612338e-10, "loss": 0.0005, "step": 303010 }, { "epoch": 1.9935132859219884, "grad_norm": 0.04263979978004682, "learning_rate": 3.2043569307360635e-10, "loss": 0.0007, "step": 303020 }, { "epoch": 1.9935790740972217, "grad_norm": 0.002702490213219981, "learning_rate": 3.1396901094327137e-10, "loss": 0.0005, "step": 303030 }, { "epoch": 1.9936448622724552, "grad_norm": 0.0015741138319379081, "learning_rate": 3.0756824535516095e-10, "loss": 0.0003, "step": 303040 }, { "epoch": 1.9937106504476887, "grad_norm": 0.023836312671377976, "learning_rate": 3.012333963942071e-10, "loss": 0.0008, "step": 303050 }, { "epoch": 1.993776438622922, "grad_norm": 0.000277481174268512, "learning_rate": 2.949644641431215e-10, "loss": 0.0005, "step": 303060 }, { "epoch": 1.9938422267981553, "grad_norm": 0.003505875439039174, "learning_rate": 2.887614486846158e-10, "loss": 0.0004, "step": 303070 }, { "epoch": 1.9939080149733885, "grad_norm": 0.004333353675803576, "learning_rate": 2.8262435010084634e-10, "loss": 0.0005, "step": 303080 }, { "epoch": 1.993973803148622, "grad_norm": 0.01016884140343617, "learning_rate": 2.765531684728595e-10, "loss": 0.0003, "step": 303090 }, { "epoch": 1.9940395913238556, "grad_norm": 0.027788266992697576, "learning_rate": 2.705479038805914e-10, "loss": 0.0003, "step": 303100 }, { "epoch": 1.9941053794990888, "grad_norm": 0.05453213027578035, "learning_rate": 2.6460855640286777e-10, "loss": 0.0002, "step": 303110 }, { "epoch": 1.9941711676743221, "grad_norm": 0.0010778797534881836, "learning_rate": 2.587351261185145e-10, "loss": 0.0002, "step": 303120 }, { "epoch": 1.9942369558495556, "grad_norm": 0.040851404791051156, "learning_rate": 2.5292761310413693e-10, "loss": 0.0006, "step": 303130 }, { "epoch": 1.9943027440247891, "grad_norm": 0.07613586544743156, "learning_rate": 2.4718601743745077e-10, "loss": 0.0006, "step": 303140 }, { "epoch": 1.9943685322000224, "grad_norm": 0.03253585942036055, "learning_rate": 2.4151033919339596e-10, "loss": 0.0011, "step": 303150 }, { "epoch": 1.9944343203752557, "grad_norm": 0.0010235945139023532, "learning_rate": 2.359005784463575e-10, "loss": 0.0002, "step": 303160 }, { "epoch": 1.994500108550489, "grad_norm": 0.0518823029969535, "learning_rate": 2.3035673527127543e-10, "loss": 0.0002, "step": 303170 }, { "epoch": 1.9945658967257225, "grad_norm": 0.04295742006093075, "learning_rate": 2.248788097414245e-10, "loss": 0.0003, "step": 303180 }, { "epoch": 1.994631684900956, "grad_norm": 0.03630213068582193, "learning_rate": 2.1946680192785896e-10, "loss": 0.0008, "step": 303190 }, { "epoch": 1.9946974730761893, "grad_norm": 0.0818966648399237, "learning_rate": 2.141207119027433e-10, "loss": 0.0004, "step": 303200 }, { "epoch": 1.9947632612514226, "grad_norm": 0.011827361873746235, "learning_rate": 2.088405397360216e-10, "loss": 0.0008, "step": 303210 }, { "epoch": 1.994829049426656, "grad_norm": 0.030037090441207975, "learning_rate": 2.0362628549763785e-10, "loss": 0.0008, "step": 303220 }, { "epoch": 1.9948948376018896, "grad_norm": 0.012132104164524826, "learning_rate": 1.9847794925698105e-10, "loss": 0.0003, "step": 303230 }, { "epoch": 1.9949606257771229, "grad_norm": 0.044107018928920196, "learning_rate": 1.9339553108066454e-10, "loss": 0.0007, "step": 303240 }, { "epoch": 1.9950264139523561, "grad_norm": 0.023623886053359404, "learning_rate": 1.8837903103641196e-10, "loss": 0.0007, "step": 303250 }, { "epoch": 1.9950922021275896, "grad_norm": 0.02988121154144063, "learning_rate": 1.834284491902816e-10, "loss": 0.0006, "step": 303260 }, { "epoch": 1.995157990302823, "grad_norm": 0.007638436452009935, "learning_rate": 1.7854378560777653e-10, "loss": 0.001, "step": 303270 }, { "epoch": 1.9952237784780564, "grad_norm": 0.047646375918188925, "learning_rate": 1.7372504035273464e-10, "loss": 0.0004, "step": 303280 }, { "epoch": 1.9952895666532897, "grad_norm": 0.004588552489715377, "learning_rate": 1.6897221348899373e-10, "loss": 0.0003, "step": 303290 }, { "epoch": 1.995355354828523, "grad_norm": 0.09601504844982701, "learning_rate": 1.6428530507983653e-10, "loss": 0.0004, "step": 303300 }, { "epoch": 1.9954211430037565, "grad_norm": 0.021150676825494964, "learning_rate": 1.596643151857702e-10, "loss": 0.0002, "step": 303310 }, { "epoch": 1.99548693117899, "grad_norm": 0.00732845433198173, "learning_rate": 1.5510924386841208e-10, "loss": 0.0002, "step": 303320 }, { "epoch": 1.9955527193542233, "grad_norm": 0.001184247536919644, "learning_rate": 1.5062009118826936e-10, "loss": 0.0003, "step": 303330 }, { "epoch": 1.9956185075294566, "grad_norm": 0.028722930793605127, "learning_rate": 1.4619685720362874e-10, "loss": 0.0007, "step": 303340 }, { "epoch": 1.99568429570469, "grad_norm": 0.028636668428504593, "learning_rate": 1.4183954197333204e-10, "loss": 0.0003, "step": 303350 }, { "epoch": 1.9957500838799236, "grad_norm": 0.004080580214926536, "learning_rate": 1.3754814555455575e-10, "loss": 0.0003, "step": 303360 }, { "epoch": 1.9958158720551569, "grad_norm": 0.007838534771751025, "learning_rate": 1.3332266800447634e-10, "loss": 0.0004, "step": 303370 }, { "epoch": 1.9958816602303902, "grad_norm": 0.021299411327426354, "learning_rate": 1.2916310937804987e-10, "loss": 0.0003, "step": 303380 }, { "epoch": 1.9959474484056234, "grad_norm": 0.04457754314411546, "learning_rate": 1.2506946973023237e-10, "loss": 0.0003, "step": 303390 }, { "epoch": 1.996013236580857, "grad_norm": 0.009139371317710955, "learning_rate": 1.2104174911542475e-10, "loss": 0.0003, "step": 303400 }, { "epoch": 1.9960790247560904, "grad_norm": 0.0020435135140113715, "learning_rate": 1.1707994758636264e-10, "loss": 0.0003, "step": 303410 }, { "epoch": 1.9961448129313237, "grad_norm": 0.0023653978318704974, "learning_rate": 1.1318406519578162e-10, "loss": 0.0002, "step": 303420 }, { "epoch": 1.996210601106557, "grad_norm": 0.008214489886273928, "learning_rate": 1.0935410199419683e-10, "loss": 0.0004, "step": 303430 }, { "epoch": 1.9962763892817905, "grad_norm": 0.004627542911132709, "learning_rate": 1.0559005803323364e-10, "loss": 0.0004, "step": 303440 }, { "epoch": 1.996342177457024, "grad_norm": 0.004125229967716085, "learning_rate": 1.0189193336118674e-10, "loss": 0.0006, "step": 303450 }, { "epoch": 1.9964079656322573, "grad_norm": 0.014382843578945452, "learning_rate": 9.82597280280162e-11, "loss": 0.0005, "step": 303460 }, { "epoch": 1.9964737538074906, "grad_norm": 0.024410047204019417, "learning_rate": 9.469344208090648e-11, "loss": 0.0007, "step": 303470 }, { "epoch": 1.9965395419827239, "grad_norm": 0.02269695629825082, "learning_rate": 9.119307556704205e-11, "loss": 0.0009, "step": 303480 }, { "epoch": 1.9966053301579574, "grad_norm": 0.05770546922019893, "learning_rate": 8.775862853249717e-11, "loss": 0.0005, "step": 303490 }, { "epoch": 1.9966711183331909, "grad_norm": 0.00903691279798381, "learning_rate": 8.439010102279099e-11, "loss": 0.0005, "step": 303500 }, { "epoch": 1.9967369065084242, "grad_norm": 0.02971959613211785, "learning_rate": 8.108749308233244e-11, "loss": 0.0007, "step": 303510 }, { "epoch": 1.9968026946836575, "grad_norm": 0.26815483026278525, "learning_rate": 7.78508047544202e-11, "loss": 0.0013, "step": 303520 }, { "epoch": 1.996868482858891, "grad_norm": 0.01777576638077182, "learning_rate": 7.468003608179786e-11, "loss": 0.0003, "step": 303530 }, { "epoch": 1.9969342710341245, "grad_norm": 0.03386259021371439, "learning_rate": 7.157518710609878e-11, "loss": 0.0004, "step": 303540 }, { "epoch": 1.9970000592093577, "grad_norm": 0.010031651438211784, "learning_rate": 6.853625786895634e-11, "loss": 0.0003, "step": 303550 }, { "epoch": 1.997065847384591, "grad_norm": 0.017935745950467628, "learning_rate": 6.556324840922834e-11, "loss": 0.0003, "step": 303560 }, { "epoch": 1.9971316355598243, "grad_norm": 0.000897244633806768, "learning_rate": 6.265615876743792e-11, "loss": 0.0013, "step": 303570 }, { "epoch": 1.9971974237350578, "grad_norm": 0.015144270383547574, "learning_rate": 5.981498898077753e-11, "loss": 0.0004, "step": 303580 }, { "epoch": 1.9972632119102913, "grad_norm": 0.00020051451226622914, "learning_rate": 5.7039739087549897e-11, "loss": 0.0003, "step": 303590 }, { "epoch": 1.9973290000855246, "grad_norm": 0.004540176452150821, "learning_rate": 5.4330409124392356e-11, "loss": 0.0002, "step": 303600 }, { "epoch": 1.997394788260758, "grad_norm": 0.0002222217578456899, "learning_rate": 5.1686999125721835e-11, "loss": 0.0002, "step": 303610 }, { "epoch": 1.9974605764359914, "grad_norm": 0.01643755448031625, "learning_rate": 4.9109509128175694e-11, "loss": 0.0007, "step": 303620 }, { "epoch": 1.997526364611225, "grad_norm": 0.0033528131315293187, "learning_rate": 4.6597939164505504e-11, "loss": 0.0005, "step": 303630 }, { "epoch": 1.9975921527864582, "grad_norm": 0.004346667693893262, "learning_rate": 4.415228926801796e-11, "loss": 0.0004, "step": 303640 }, { "epoch": 1.9976579409616915, "grad_norm": 0.05801454575402464, "learning_rate": 4.1772559471464636e-11, "loss": 0.0005, "step": 303650 }, { "epoch": 1.997723729136925, "grad_norm": 0.06774011188230682, "learning_rate": 3.9458749805931784e-11, "loss": 0.0004, "step": 303660 }, { "epoch": 1.9977895173121583, "grad_norm": 0.038642097790933146, "learning_rate": 3.721086030195053e-11, "loss": 0.0011, "step": 303670 }, { "epoch": 1.9978553054873918, "grad_norm": 0.004882091106739382, "learning_rate": 3.502889098838669e-11, "loss": 0.0002, "step": 303680 }, { "epoch": 1.997921093662625, "grad_norm": 0.04592395240780976, "learning_rate": 3.291284189521626e-11, "loss": 0.0003, "step": 303690 }, { "epoch": 1.9979868818378583, "grad_norm": 0.007713808437510288, "learning_rate": 3.086271305019484e-11, "loss": 0.0005, "step": 303700 }, { "epoch": 1.9980526700130918, "grad_norm": 0.02865123656972255, "learning_rate": 2.887850447941265e-11, "loss": 0.0003, "step": 303710 }, { "epoch": 1.9981184581883253, "grad_norm": 0.019204183561407635, "learning_rate": 2.6960216210070166e-11, "loss": 0.0005, "step": 303720 }, { "epoch": 1.9981842463635586, "grad_norm": 0.047307275036796194, "learning_rate": 2.5107848266592294e-11, "loss": 0.0004, "step": 303730 }, { "epoch": 1.998250034538792, "grad_norm": 0.00991288859275092, "learning_rate": 2.332140067395905e-11, "loss": 0.0002, "step": 303740 }, { "epoch": 1.9983158227140254, "grad_norm": 0.024007809063697174, "learning_rate": 2.1600873455485116e-11, "loss": 0.0004, "step": 303750 }, { "epoch": 1.998381610889259, "grad_norm": 0.01896954129219302, "learning_rate": 1.9946266633930066e-11, "loss": 0.0007, "step": 303760 }, { "epoch": 1.9984473990644922, "grad_norm": 0.021373566056751482, "learning_rate": 1.8357580231498363e-11, "loss": 0.0002, "step": 303770 }, { "epoch": 1.9985131872397255, "grad_norm": 0.06040603359819244, "learning_rate": 1.6834814268174015e-11, "loss": 0.0004, "step": 303780 }, { "epoch": 1.9985789754149588, "grad_norm": 0.004595128641506296, "learning_rate": 1.5377968765051267e-11, "loss": 0.0005, "step": 303790 }, { "epoch": 1.9986447635901923, "grad_norm": 0.01930880203559015, "learning_rate": 1.3987043741003903e-11, "loss": 0.0004, "step": 303800 }, { "epoch": 1.9987105517654258, "grad_norm": 0.007262949538612987, "learning_rate": 1.2662039213795496e-11, "loss": 0.0003, "step": 303810 }, { "epoch": 1.998776339940659, "grad_norm": 0.04745783016135326, "learning_rate": 1.1402955201744725e-11, "loss": 0.0005, "step": 303820 }, { "epoch": 1.9988421281158923, "grad_norm": 0.01551145317126522, "learning_rate": 1.0209791720949824e-11, "loss": 0.0003, "step": 303830 }, { "epoch": 1.9989079162911259, "grad_norm": 0.028470616198874137, "learning_rate": 9.082548787509027e-12, "loss": 0.0016, "step": 303840 }, { "epoch": 1.9989737044663594, "grad_norm": 0.010858513083076273, "learning_rate": 8.021226415300121e-12, "loss": 0.0002, "step": 303850 }, { "epoch": 1.9990394926415926, "grad_norm": 0.03236573889699246, "learning_rate": 7.025824619866229e-12, "loss": 0.0004, "step": 303860 }, { "epoch": 1.999105280816826, "grad_norm": 0.048607601091161474, "learning_rate": 6.096343412864691e-12, "loss": 0.0003, "step": 303870 }, { "epoch": 1.9991710689920592, "grad_norm": 0.01252746359644469, "learning_rate": 5.232782807618186e-12, "loss": 0.0001, "step": 303880 }, { "epoch": 1.9992368571672927, "grad_norm": 0.0790760704880376, "learning_rate": 4.435142815228943e-12, "loss": 0.0003, "step": 303890 }, { "epoch": 1.9993026453425262, "grad_norm": 0.25123474649621436, "learning_rate": 3.703423445688969e-12, "loss": 0.0013, "step": 303900 }, { "epoch": 1.9993684335177595, "grad_norm": 0.024292056679411464, "learning_rate": 3.0376247095453836e-12, "loss": 0.0003, "step": 303910 }, { "epoch": 1.9994342216929928, "grad_norm": 0.010180038951181209, "learning_rate": 2.437746614569747e-12, "loss": 0.0005, "step": 303920 }, { "epoch": 1.9995000098682263, "grad_norm": 0.004856623688672345, "learning_rate": 1.903789169088732e-12, "loss": 0.0004, "step": 303930 }, { "epoch": 1.9995657980434598, "grad_norm": 0.0056584496399265695, "learning_rate": 1.4357523808739004e-12, "loss": 0.0005, "step": 303940 }, { "epoch": 1.999631586218693, "grad_norm": 0.0005359020471822595, "learning_rate": 1.0336362549212554e-12, "loss": 0.0003, "step": 303950 }, { "epoch": 1.9996973743939264, "grad_norm": 0.07949964737118399, "learning_rate": 6.974407973370234e-13, "loss": 0.0004, "step": 303960 }, { "epoch": 1.9997631625691599, "grad_norm": 0.01119107723688146, "learning_rate": 4.2716601200698537e-13, "loss": 0.0007, "step": 303970 }, { "epoch": 1.9998289507443932, "grad_norm": 0.015929463850475778, "learning_rate": 2.2281190281692177e-13, "loss": 0.0003, "step": 303980 }, { "epoch": 1.9998947389196267, "grad_norm": 0.01444661927402747, "learning_rate": 8.437847254239018e-14, "loss": 0.0004, "step": 303990 }, { "epoch": 1.99996052709486, "grad_norm": 0.03734511719522013, "learning_rate": 1.1865722848725115e-14, "loss": 0.0008, "step": 304000 }, { "epoch": 2.0, "step": 304006, "total_flos": 4010407409123328.0, "train_loss": 0.0024581665951860818, "train_runtime": 623670.1327, "train_samples_per_second": 3.9, "train_steps_per_second": 0.487 } ], "logging_steps": 10, "max_steps": 304006, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4010407409123328.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }