{ "run_name": "run_large_20251112_150502", "timestamp": "20251112_150502", "phase": "large", "config": { "arch_layout": [ "m4", [ "T22" ], "m4" ], "d_model": [ 1024, 1536 ], "d_intermediate": [ 0, 4096 ], "vocab_size": 256, "ssm_cfg": { "chunk_size": 256, "d_conv": 4, "d_state": 128, "expand": 2 }, "attn_cfg": { "num_heads": [ 16, 16 ], "rotary_emb_dim": [ 32, 48 ], "window_size": [ 1023, -1 ] }, "tie_embeddings": false }, "training_args": { "data": "datasets/PI1M/PI1M_v2.csv", "max_samples": null, "batch_size": 16, "epochs": 22, "lr": 0.0001, "weight_decay": 0.1, "gradient_accumulation": 8, "concatenate": true, "num_concatenate": 10, "concatenate_separator": " ", "checkpoint_bytes": 1000000, "num_test_samples": 5, "num_visualize": 5, "skip_visualization": false }, "dataset_info": { "train_size": 99574, "test_size": 5, "test_smiles_file": "checkpoints/run_large_20251112_150502/test_smiles.txt" }, "model_info": { "num_parameters": 622923776, "device": "cuda", "dtype": "torch.bfloat16", "use_amp": true }, "training_history": [ { "checkpoint_type": "bytes", "bytes_threshold": 1000000, "cumulative_training_bytes": 1006216, "metrics": { "loss": 2.65240060560631, "ce_loss": 2.6424005681818183, "lb_loss": 0.9999999900658926 } }, { "checkpoint_type": "bytes", "bytes_threshold": 2000000, "cumulative_training_bytes": 2003501, "metrics": { "loss": 1.91590709904678, "ce_loss": 1.9059070849236641, "lb_loss": 0.9999999920375474 } }, { "checkpoint_type": "bytes", "bytes_threshold": 3000000, "cumulative_training_bytes": 3003412, "metrics": { "loss": 1.5786327193408218, "ce_loss": 1.5686327131043256, "lb_loss": 0.9999999945400325 } }, { "checkpoint_type": "bytes", "bytes_threshold": 4000000, "cumulative_training_bytes": 4003745, "metrics": { "loss": 1.373363253724484, "ce_loss": 1.3633632514312977, "lb_loss": 0.9999999956775257 } }, { "checkpoint_type": "bytes", "bytes_threshold": 5000000, "cumulative_training_bytes": 5000057, "metrics": { "loss": 1.2337313646205703, "ce_loss": 1.223731364678899, "lb_loss": 0.9999999960810402 } }, { "checkpoint_type": "bytes", "bytes_threshold": 6000000, "cumulative_training_bytes": 6004365, "metrics": { "loss": 1.1314371002707513, "ce_loss": 1.121437101910828, "lb_loss": 0.9999999965831732 } }, { "checkpoint_type": "bytes", "bytes_threshold": 7000000, "cumulative_training_bytes": 7005346, "metrics": { "loss": 1.0538173757786313, "ce_loss": 1.043817378548035, "lb_loss": 0.999999995575201 } }, { "checkpoint_type": "bytes", "bytes_threshold": 8000000, "cumulative_training_bytes": 8001141, "metrics": { "loss": 0.9934003011218449, "ce_loss": 0.9834003047323135, "lb_loss": 0.9999999950424435 } }, { "checkpoint_type": "bytes", "bytes_threshold": 9000000, "cumulative_training_bytes": 9001157, "metrics": { "loss": 0.944607644944041, "ce_loss": 0.9346076492141037, "lb_loss": 0.999999994632037 } }, { "checkpoint_type": "bytes", "bytes_threshold": 10000000, "cumulative_training_bytes": 10003863, "metrics": { "loss": 0.9043117426221888, "ce_loss": 0.8943117474197247, "lb_loss": 0.9999999956253471 } }, { "checkpoint_type": "bytes", "bytes_threshold": 11000000, "cumulative_training_bytes": 11001271, "metrics": { "loss": 0.8709831310744411, "ce_loss": 0.8609831363004172, "lb_loss": 0.9999999957306827 } }, { "checkpoint_type": "bytes", "bytes_threshold": 12000000, "cumulative_training_bytes": 12005542, "metrics": { "loss": 0.8426262691113658, "ce_loss": 0.8326262746972594, "lb_loss": 0.9999999964670288 } }, { "checkpoint_type": "bytes", "bytes_threshold": 13000000, "cumulative_training_bytes": 13006694, "metrics": { "loss": 0.8181169518302469, "ce_loss": 0.8081169577205882, "lb_loss": 0.9999999966340907 } }, { "checkpoint_type": "bytes", "bytes_threshold": 14000000, "cumulative_training_bytes": 14004830, "metrics": { "loss": 0.797012246241335, "ce_loss": 0.7870122523907104, "lb_loss": 0.9999999969057698 } }, { "checkpoint_type": "bytes", "bytes_threshold": 15000000, "cumulative_training_bytes": 15004637, "metrics": { "loss": 0.778617926120515, "ce_loss": 0.7686179324961754, "lb_loss": 0.9999999971732626 } }, { "checkpoint_type": "bytes", "bytes_threshold": 16000000, "cumulative_training_bytes": 16000188, "metrics": { "loss": 0.7623379522393846, "ce_loss": 0.7523379588115734, "lb_loss": 0.9999999962372964 } }, { "checkpoint_type": "bytes", "bytes_threshold": 17000000, "cumulative_training_bytes": 17001433, "metrics": { "loss": 0.7477327509037983, "ce_loss": 0.7377327576507651, "lb_loss": 0.9999999957885107 } }, { "checkpoint_type": "bytes", "bytes_threshold": 18000000, "cumulative_training_bytes": 18004044, "metrics": { "loss": 0.7345919368928714, "ce_loss": 0.7245919437951551, "lb_loss": 0.9999999957190034 } }, { "checkpoint_type": "bytes", "bytes_threshold": 19000000, "cumulative_training_bytes": 19005595, "metrics": { "loss": 0.7230352938847073, "ce_loss": 0.7130353009259259, "lb_loss": 0.9999999958487908 } }, { "checkpoint_type": "bytes", "bytes_threshold": 20000000, "cumulative_training_bytes": 20004400, "metrics": { "loss": 0.7124804242391204, "ce_loss": 0.7024804314053538, "lb_loss": 0.9999999958516079 } }, { "checkpoint_type": "bytes", "bytes_threshold": 21000000, "cumulative_training_bytes": 21006730, "metrics": { "loss": 0.7027192320688224, "ce_loss": 0.6927192393481427, "lb_loss": 0.9999999955936844 } }, { "checkpoint_type": "bytes", "bytes_threshold": 22000000, "cumulative_training_bytes": 22003387, "metrics": { "loss": 0.6940317696092523, "ce_loss": 0.6840317769906119, "lb_loss": 0.9999999956270584 } }, { "checkpoint_type": "bytes", "bytes_threshold": 23000000, "cumulative_training_bytes": 23005816, "metrics": { "loss": 0.6859377783162276, "ce_loss": 0.6759377857914866, "lb_loss": 0.9999999957184559 } }, { "checkpoint_type": "bytes", "bytes_threshold": 24000000, "cumulative_training_bytes": 24002853, "metrics": { "loss": 0.6783933619005801, "ce_loss": 0.6683933694612687, "lb_loss": 0.9999999955348768 } }, { "checkpoint_type": "bytes", "bytes_threshold": 25000000, "cumulative_training_bytes": 25001613, "metrics": { "loss": 0.6714792498981766, "ce_loss": 0.6614792575374961, "lb_loss": 0.9999999954023966 } }, { "checkpoint_type": "bytes", "bytes_threshold": 26000000, "cumulative_training_bytes": 26001520, "metrics": { "loss": 0.665006937754656, "ce_loss": 0.6550069454665882, "lb_loss": 0.9999999952274173 } }, { "checkpoint_type": "bytes", "bytes_threshold": 27000000, "cumulative_training_bytes": 27002890, "metrics": { "loss": 0.6590575319028495, "ce_loss": 0.6490575396825397, "lb_loss": 0.9999999950836305 } }, { "checkpoint_type": "bytes", "bytes_threshold": 28000000, "cumulative_training_bytes": 28003403, "metrics": { "loss": 0.6535597506296638, "ce_loss": 0.6435597584722602, "lb_loss": 0.9999999951619077 } }, { "checkpoint_type": "bytes", "bytes_threshold": 29000000, "cumulative_training_bytes": 29006503, "metrics": { "loss": 0.648446149750561, "ce_loss": 0.6384461576517151, "lb_loss": 0.9999999951718666 } }, { "checkpoint_type": "bytes", "bytes_threshold": 30000000, "cumulative_training_bytes": 30000438, "metrics": { "loss": 0.6435842753673087, "ce_loss": 0.6335842833227041, "lb_loss": 0.9999999954080095 } }, { "checkpoint_type": "bytes", "bytes_threshold": 31000000, "cumulative_training_bytes": 31000470, "metrics": { "loss": 0.6390364181845314, "ce_loss": 0.6290364261910639, "lb_loss": 0.9999999954240818 } }, { "checkpoint_type": "bytes", "bytes_threshold": 32000000, "cumulative_training_bytes": 32001821, "metrics": { "loss": 0.6348851022396517, "ce_loss": 0.6248851102941176, "lb_loss": 0.9999999954533999 } }, { "checkpoint_type": "bytes", "bytes_threshold": 33000000, "cumulative_training_bytes": 33002772, "metrics": { "loss": 0.6308636427589162, "ce_loss": 0.6208636508580705, "lb_loss": 0.9999999956319416 } }, { "checkpoint_type": "bytes", "bytes_threshold": 34000000, "cumulative_training_bytes": 34007477, "metrics": { "loss": 0.6269874756813264, "ce_loss": 0.6169874838228674, "lb_loss": 0.999999995760732 } }, { "checkpoint_type": "bytes", "bytes_threshold": 35000000, "cumulative_training_bytes": 35006427, "metrics": { "loss": 0.6233431712141606, "ce_loss": 0.6133431793953641, "lb_loss": 0.9999999954902237 } }, { "checkpoint_type": "bytes", "bytes_threshold": 36000000, "cumulative_training_bytes": 36003597, "metrics": { "loss": 0.6199277473362917, "ce_loss": 0.6099277555549649, "lb_loss": 0.9999999953360601 } }, { "checkpoint_type": "bytes", "bytes_threshold": 37000000, "cumulative_training_bytes": 37005705, "metrics": { "loss": 0.6167014650337747, "ce_loss": 0.6067014732881671, "lb_loss": 0.9999999952651668 } }, { "checkpoint_type": "bytes", "bytes_threshold": 38000000, "cumulative_training_bytes": 38004996, "metrics": { "loss": 0.613597062496674, "ce_loss": 0.6035970707846495, "lb_loss": 0.999999995257084 } }, { "checkpoint_type": "bytes", "bytes_threshold": 39000000, "cumulative_training_bytes": 39003622, "metrics": { "loss": 0.6106150688542822, "ce_loss": 0.6006150771741264, "lb_loss": 0.9999999950270958 } }, { "checkpoint_type": "bytes", "bytes_threshold": 40000000, "cumulative_training_bytes": 40003412, "metrics": { "loss": 0.6077792466508302, "ce_loss": 0.5977792550009571, "lb_loss": 0.9999999950253398 } }, { "checkpoint_type": "bytes", "bytes_threshold": 41000000, "cumulative_training_bytes": 41001660, "metrics": { "loss": 0.6051997902903125, "ce_loss": 0.5951997986694678, "lb_loss": 0.9999999949466837 } }, { "checkpoint_type": "bytes", "bytes_threshold": 42000000, "cumulative_training_bytes": 42003749, "metrics": { "loss": 0.6027762772521528, "ce_loss": 0.59277628565895, "lb_loss": 0.9999999948935139 } }, { "checkpoint_type": "bytes", "bytes_threshold": 43000000, "cumulative_training_bytes": 43005510, "metrics": { "loss": 0.600378182215773, "ce_loss": 0.5903781906489229, "lb_loss": 0.9999999950232191 } }, { "checkpoint_type": "bytes", "bytes_threshold": 44000000, "cumulative_training_bytes": 44002690, "metrics": { "loss": 0.5980829723362137, "ce_loss": 0.5880829807943275, "lb_loss": 0.9999999951150534 } }, { "checkpoint_type": "bytes", "bytes_threshold": 45000000, "cumulative_training_bytes": 45005962, "metrics": { "loss": 0.5958610677767789, "ce_loss": 0.5858610762589316, "lb_loss": 0.9999999951529397 } }, { "checkpoint_type": "bytes", "bytes_threshold": 46000000, "cumulative_training_bytes": 46002930, "metrics": { "loss": 0.5937242965882373, "ce_loss": 0.583724305093209, "lb_loss": 0.999999995138769 } }, { "checkpoint_type": "bytes", "bytes_threshold": 47000000, "cumulative_training_bytes": 47006814, "metrics": { "loss": 0.5916914568989765, "ce_loss": 0.5816914654259652, "lb_loss": 0.9999999951551201 } }, { "epoch": 1, "checkpoint_type": "epoch", "metrics": { "loss": 0.590403846496175, "ce_loss": 0.5804038550369537, "lb_loss": 0.999999995077637, "training_bytes": 47653393 }, "cumulative_training_bytes": 47653393, "training_bytes_this_epoch": 47653393 }, { "checkpoint_type": "bytes", "bytes_threshold": 48000000, "cumulative_training_bytes": 48005678, "metrics": { "loss": 0.492634161658909, "ce_loss": 0.48263417119565216, "lb_loss": 0.9999999961127406 } }, { "checkpoint_type": "bytes", "bytes_threshold": 49000000, "cumulative_training_bytes": 49006971, "metrics": { "loss": 0.49733226447455625, "ce_loss": 0.4873322740112994, "lb_loss": 0.9999999989897518 } }, { "checkpoint_type": "bytes", "bytes_threshold": 50000000, "cumulative_training_bytes": 50003595, "metrics": { "loss": 0.4957809859844295, "ce_loss": 0.48578099552117265, "lb_loss": 0.9999999959228093 } }, { "checkpoint_type": "bytes", "bytes_threshold": 51000000, "cumulative_training_bytes": 51000980, "metrics": { "loss": 0.49479064406737583, "ce_loss": 0.484790653604119, "lb_loss": 0.9999999971357035 } }, { "checkpoint_type": "bytes", "bytes_threshold": 52000000, "cumulative_training_bytes": 52001887, "metrics": { "loss": 0.4952002545477639, "ce_loss": 0.48520026408450706, "lb_loss": 0.9999999968518674 } }, { "checkpoint_type": "bytes", "bytes_threshold": 53000000, "cumulative_training_bytes": 53005646, "metrics": { "loss": 0.49493103034165453, "ce_loss": 0.4849310398783977, "lb_loss": 0.9999999980387599 } }, { "checkpoint_type": "bytes", "bytes_threshold": 54000000, "cumulative_training_bytes": 54005477, "metrics": { "loss": 0.49478444076446165, "ce_loss": 0.4847844503012048, "lb_loss": 0.9999999974865511 } }, { "checkpoint_type": "bytes", "bytes_threshold": 55000000, "cumulative_training_bytes": 55002218, "metrics": { "loss": 0.4945906480153402, "ce_loss": 0.48459065755208336, "lb_loss": 0.999999997826914 } }, { "checkpoint_type": "bytes", "bytes_threshold": 56000000, "cumulative_training_bytes": 56003171, "metrics": { "loss": 0.4947634672047784, "ce_loss": 0.48476347674152154, "lb_loss": 0.9999999967766507 } }, { "checkpoint_type": "bytes", "bytes_threshold": 57000000, "cumulative_training_bytes": 57006709, "metrics": { "loss": 0.4947378050090834, "ce_loss": 0.48473781454582654, "lb_loss": 0.9999999970734217 } }, { "checkpoint_type": "bytes", "bytes_threshold": 58000000, "cumulative_training_bytes": 58004016, "metrics": { "loss": 0.49519697872139296, "ce_loss": 0.4851969882581361, "lb_loss": 0.9999999964290117 } }, { "checkpoint_type": "bytes", "bytes_threshold": 59000000, "cumulative_training_bytes": 59006803, "metrics": { "loss": 0.49550630157249487, "ce_loss": 0.48550631110923803, "lb_loss": 0.9999999969052208 } }, { "checkpoint_type": "bytes", "bytes_threshold": 60000000, "cumulative_training_bytes": 60002534, "metrics": { "loss": 0.4955277337986567, "ce_loss": 0.4855277433353999, "lb_loss": 0.9999999967851183 } }, { "checkpoint_type": "bytes", "bytes_threshold": 61000000, "cumulative_training_bytes": 61006527, "metrics": { "loss": 0.49546354407564214, "ce_loss": 0.4854635536123853, "lb_loss": 0.9999999964797716 } }, { "checkpoint_type": "bytes", "bytes_threshold": 62000000, "cumulative_training_bytes": 62003543, "metrics": { "loss": 0.49532445517777124, "ce_loss": 0.4853244647145144, "lb_loss": 0.999999995547145 } }, { "checkpoint_type": "bytes", "bytes_threshold": 63000000, "cumulative_training_bytes": 63006878, "metrics": { "loss": 0.495203972695177, "ce_loss": 0.4852039822319202, "lb_loss": 0.9999999965812797 } }, { "checkpoint_type": "bytes", "bytes_threshold": 64000000, "cumulative_training_bytes": 64003765, "metrics": { "loss": 0.495035485193936, "ce_loss": 0.48503549473067914, "lb_loss": 0.9999999969290347 } }, { "checkpoint_type": "bytes", "bytes_threshold": 65000000, "cumulative_training_bytes": 65007467, "metrics": { "loss": 0.4949671338326302, "ce_loss": 0.48496714336937335, "lb_loss": 0.9999999965015809 } }, { "checkpoint_type": "bytes", "bytes_threshold": 66000000, "cumulative_training_bytes": 66003916, "metrics": { "loss": 0.49481925303629526, "ce_loss": 0.4848192625730384, "lb_loss": 0.9999999961689836 } }, { "checkpoint_type": "bytes", "bytes_threshold": 67000000, "cumulative_training_bytes": 67000894, "metrics": { "loss": 0.4946804078722038, "ce_loss": 0.48468041740894696, "lb_loss": 0.9999999959178133 } }, { "checkpoint_type": "bytes", "bytes_threshold": 68000000, "cumulative_training_bytes": 68004976, "metrics": { "loss": 0.494755765303302, "ce_loss": 0.4847557748400452, "lb_loss": 0.9999999962985449 } }, { "checkpoint_type": "bytes", "bytes_threshold": 69000000, "cumulative_training_bytes": 69000639, "metrics": { "loss": 0.4949356291428406, "ce_loss": 0.4849356386795838, "lb_loss": 0.9999999967278398 } }, { "checkpoint_type": "bytes", "bytes_threshold": 70000000, "cumulative_training_bytes": 70003667, "metrics": { "loss": 0.4949633375760053, "ce_loss": 0.48496334711274847, "lb_loss": 0.9999999967726066 } }, { "checkpoint_type": "bytes", "bytes_threshold": 71000000, "cumulative_training_bytes": 71005669, "metrics": { "loss": 0.4949598389791637, "ce_loss": 0.48495984851590684, "lb_loss": 0.9999999968330756 } }, { "checkpoint_type": "bytes", "bytes_threshold": 72000000, "cumulative_training_bytes": 72002076, "metrics": { "loss": 0.4950010472027976, "ce_loss": 0.48500105673954075, "lb_loss": 0.9999999969625818 } }, { "checkpoint_type": "bytes", "bytes_threshold": 73000000, "cumulative_training_bytes": 73005205, "metrics": { "loss": 0.49494027558410275, "ce_loss": 0.4849402851208459, "lb_loss": 0.9999999970467789 } }, { "checkpoint_type": "bytes", "bytes_threshold": 74000000, "cumulative_training_bytes": 74001335, "metrics": { "loss": 0.4949183452961057, "ce_loss": 0.48491835483284884, "lb_loss": 0.9999999970370947 } }, { "checkpoint_type": "bytes", "bytes_threshold": 75000000, "cumulative_training_bytes": 75003123, "metrics": { "loss": 0.49487051867664245, "ce_loss": 0.4848705282133856, "lb_loss": 0.9999999971290958 } }, { "checkpoint_type": "bytes", "bytes_threshold": 76000000, "cumulative_training_bytes": 76000007, "metrics": { "loss": 0.4947565385245916, "ce_loss": 0.4847565480613348, "lb_loss": 0.999999997036678 } }, { "checkpoint_type": "bytes", "bytes_threshold": 77000000, "cumulative_training_bytes": 77005449, "metrics": { "loss": 0.4946735897979816, "ce_loss": 0.48467359933472476, "lb_loss": 0.9999999965789143 } }, { "checkpoint_type": "bytes", "bytes_threshold": 78000000, "cumulative_training_bytes": 78003357, "metrics": { "loss": 0.4945139713489495, "ce_loss": 0.48451398088569264, "lb_loss": 0.9999999965858556 } }, { "checkpoint_type": "bytes", "bytes_threshold": 79000000, "cumulative_training_bytes": 79005000, "metrics": { "loss": 0.4944432114879271, "ce_loss": 0.48444322102467025, "lb_loss": 0.9999999965495113 } }, { "checkpoint_type": "bytes", "bytes_threshold": 80000000, "cumulative_training_bytes": 80002503, "metrics": { "loss": 0.49434447288513184, "ce_loss": 0.484344482421875, "lb_loss": 0.9999999965145958 } }, { "checkpoint_type": "bytes", "bytes_threshold": 81000000, "cumulative_training_bytes": 81006707, "metrics": { "loss": 0.4942619738157253, "ce_loss": 0.48426198335246845, "lb_loss": 0.9999999964552002 } }, { "checkpoint_type": "bytes", "bytes_threshold": 82000000, "cumulative_training_bytes": 82000170, "metrics": { "loss": 0.4941502329922488, "ce_loss": 0.48415024252899197, "lb_loss": 0.9999999965571804 } }, { "checkpoint_type": "bytes", "bytes_threshold": 83000000, "cumulative_training_bytes": 83002106, "metrics": { "loss": 0.4940558883505808, "ce_loss": 0.48405589788732395, "lb_loss": 0.9999999969907081 } }, { "checkpoint_type": "bytes", "bytes_threshold": 84000000, "cumulative_training_bytes": 84002576, "metrics": { "loss": 0.4939967936922918, "ce_loss": 0.483996803229035, "lb_loss": 0.9999999966467678 } }, { "checkpoint_type": "bytes", "bytes_threshold": 85000000, "cumulative_training_bytes": 85005146, "metrics": { "loss": 0.493969307538303, "ce_loss": 0.48396931707504615, "lb_loss": 0.9999999965168498 } }, { "checkpoint_type": "bytes", "bytes_threshold": 86000000, "cumulative_training_bytes": 86000950, "metrics": { "loss": 0.49385267483014317, "ce_loss": 0.48385268436688633, "lb_loss": 0.9999999967263277 } }, { "checkpoint_type": "bytes", "bytes_threshold": 87000000, "cumulative_training_bytes": 87002861, "metrics": { "loss": 0.4937762804836928, "ce_loss": 0.483776290020436, "lb_loss": 0.9999999968097942 } }, { "checkpoint_type": "bytes", "bytes_threshold": 88000000, "cumulative_training_bytes": 88007699, "metrics": { "loss": 0.49367403024784595, "ce_loss": 0.4836740397845891, "lb_loss": 0.9999999967759872 } }, { "checkpoint_type": "bytes", "bytes_threshold": 89000000, "cumulative_training_bytes": 89003123, "metrics": { "loss": 0.49364641225432926, "ce_loss": 0.4836464217910724, "lb_loss": 0.9999999966438577 } }, { "checkpoint_type": "bytes", "bytes_threshold": 90000000, "cumulative_training_bytes": 90001615, "metrics": { "loss": 0.49368486275077944, "ce_loss": 0.4836848722875226, "lb_loss": 0.9999999966694693 } }, { "checkpoint_type": "bytes", "bytes_threshold": 91000000, "cumulative_training_bytes": 91003122, "metrics": { "loss": 0.49359353345477774, "ce_loss": 0.4835935429915209, "lb_loss": 0.9999999963990834 } }, { "checkpoint_type": "bytes", "bytes_threshold": 92000000, "cumulative_training_bytes": 92003002, "metrics": { "loss": 0.4935082090351788, "ce_loss": 0.48350821857192194, "lb_loss": 0.9999999964799191 } }, { "checkpoint_type": "bytes", "bytes_threshold": 93000000, "cumulative_training_bytes": 93004006, "metrics": { "loss": 0.49344559055613085, "ce_loss": 0.483445600092874, "lb_loss": 0.9999999965275916 } }, { "checkpoint_type": "bytes", "bytes_threshold": 94000000, "cumulative_training_bytes": 94007117, "metrics": { "loss": 0.4933724529818427, "ce_loss": 0.48337246251858584, "lb_loss": 0.9999999964254938 } }, { "checkpoint_type": "bytes", "bytes_threshold": 95000000, "cumulative_training_bytes": 95006117, "metrics": { "loss": 0.4932983081985414, "ce_loss": 0.4832983177352846, "lb_loss": 0.9999999962602519 } }, { "epoch": 2, "checkpoint_type": "epoch", "metrics": { "loss": 0.49327259198558976, "ce_loss": 0.4832726015223329, "lb_loss": 0.9999999961789439, "training_bytes": 47653382 }, "cumulative_training_bytes": 95306775, "training_bytes_this_epoch": 47653382 }, { "checkpoint_type": "bytes", "bytes_threshold": 96000000, "cumulative_training_bytes": 96002802, "metrics": { "loss": 0.48830098634237773, "ce_loss": 0.4783009958791209, "lb_loss": 0.9999999927950429 } }, { "checkpoint_type": "bytes", "bytes_threshold": 97000000, "cumulative_training_bytes": 97003339, "metrics": { "loss": 0.48807572244523884, "ce_loss": 0.478075731981982, "lb_loss": 0.9999999940932334 } }, { "checkpoint_type": "bytes", "bytes_threshold": 98000000, "cumulative_training_bytes": 98005141, "metrics": { "loss": 0.4878903941459764, "ce_loss": 0.47789040368271957, "lb_loss": 0.9999999947655978 } }, { "checkpoint_type": "bytes", "bytes_threshold": 99000000, "cumulative_training_bytes": 99004099, "metrics": { "loss": 0.487553206042967, "ce_loss": 0.47755321557971014, "lb_loss": 0.9999999979021139 } }, { "checkpoint_type": "bytes", "bytes_threshold": 100000000, "cumulative_training_bytes": 100001082, "metrics": { "loss": 0.4875374598759811, "ce_loss": 0.4775374694127243, "lb_loss": 0.9999999975691417 } }, { "checkpoint_type": "bytes", "bytes_threshold": 101000000, "cumulative_training_bytes": 101002438, "metrics": { "loss": 0.48744717208288046, "ce_loss": 0.47744718161962363, "lb_loss": 0.9999999971960181 } }, { "checkpoint_type": "bytes", "bytes_threshold": 102000000, "cumulative_training_bytes": 102006394, "metrics": { "loss": 0.48746427617754257, "ce_loss": 0.47746428571428573, "lb_loss": 0.9999999976158143 } }, { "checkpoint_type": "bytes", "bytes_threshold": 103000000, "cumulative_training_bytes": 103004065, "metrics": { "loss": 0.487491439468232, "ce_loss": 0.47749144900497514, "lb_loss": 0.9999999987545298 } }, { "checkpoint_type": "bytes", "bytes_threshold": 104000000, "cumulative_training_bytes": 104001074, "metrics": { "loss": 0.4877171573134771, "ce_loss": 0.47771716685022025, "lb_loss": 0.9999999984245468 } }, { "checkpoint_type": "bytes", "bytes_threshold": 105000000, "cumulative_training_bytes": 105004888, "metrics": { "loss": 0.4877519528447734, "ce_loss": 0.47775196238151657, "lb_loss": 0.999999998210919 } }, { "checkpoint_type": "bytes", "bytes_threshold": 106000000, "cumulative_training_bytes": 106007608, "metrics": { "loss": 0.4879144388347672, "ce_loss": 0.47791444837151037, "lb_loss": 0.999999997525362 } }, { "checkpoint_type": "bytes", "bytes_threshold": 107000000, "cumulative_training_bytes": 107001146, "metrics": { "loss": 0.4879950379419733, "ce_loss": 0.47799504747871646, "lb_loss": 0.9999999976970045 } }, { "checkpoint_type": "bytes", "bytes_threshold": 108000000, "cumulative_training_bytes": 108007350, "metrics": { "loss": 0.4880941454512014, "ce_loss": 0.47809415498794455, "lb_loss": 0.9999999979161728 } }, { "checkpoint_type": "bytes", "bytes_threshold": 109000000, "cumulative_training_bytes": 109006285, "metrics": { "loss": 0.4880014053598471, "ce_loss": 0.47800141489659026, "lb_loss": 0.9999999974678854 } }, { "checkpoint_type": "bytes", "bytes_threshold": 110000000, "cumulative_training_bytes": 110005030, "metrics": { "loss": 0.4881331284840902, "ce_loss": 0.47813313802083335, "lb_loss": 0.9999999972991646 } }, { "checkpoint_type": "bytes", "bytes_threshold": 111000000, "cumulative_training_bytes": 111001255, "metrics": { "loss": 0.48820883192667147, "ce_loss": 0.47820884146341464, "lb_loss": 0.9999999975285879 } }, { "checkpoint_type": "bytes", "bytes_threshold": 112000000, "cumulative_training_bytes": 112000565, "metrics": { "loss": 0.48811692710316507, "ce_loss": 0.47811693663990823, "lb_loss": 0.9999999980860894 } }, { "checkpoint_type": "bytes", "bytes_threshold": 113000000, "cumulative_training_bytes": 113002042, "metrics": { "loss": 0.4881750229167402, "ce_loss": 0.47817503245348336, "lb_loss": 0.9999999979108714 } }, { "checkpoint_type": "bytes", "bytes_threshold": 114000000, "cumulative_training_bytes": 114007396, "metrics": { "loss": 0.4890242917531831, "ce_loss": 0.4790243012899263, "lb_loss": 0.999999997876493 } }, { "checkpoint_type": "bytes", "bytes_threshold": 115000000, "cumulative_training_bytes": 115004141, "metrics": { "loss": 0.48968308777184605, "ce_loss": 0.4796830973085892, "lb_loss": 0.9999999972201488 } }, { "checkpoint_type": "bytes", "bytes_threshold": 116000000, "cumulative_training_bytes": 116007191, "metrics": { "loss": 0.48999201616591953, "ce_loss": 0.4799920257026627, "lb_loss": 0.9999999972446078 } }, { "checkpoint_type": "bytes", "bytes_threshold": 117000000, "cumulative_training_bytes": 117006642, "metrics": { "loss": 0.49022899178600815, "ce_loss": 0.4802290013227513, "lb_loss": 0.9999999971827082 } }, { "checkpoint_type": "bytes", "bytes_threshold": 118000000, "cumulative_training_bytes": 118004076, "metrics": { "loss": 0.49022830517657895, "ce_loss": 0.4802283147133221, "lb_loss": 0.9999999977886979 } }, { "checkpoint_type": "bytes", "bytes_threshold": 119000000, "cumulative_training_bytes": 119004633, "metrics": { "loss": 0.4901273379996058, "ce_loss": 0.48012734753634895, "lb_loss": 0.9999999980934217 } }, { "checkpoint_type": "bytes", "bytes_threshold": 120000000, "cumulative_training_bytes": 120007595, "metrics": { "loss": 0.4901242494139698, "ce_loss": 0.480124258950713, "lb_loss": 0.9999999980969378 } }, { "checkpoint_type": "bytes", "bytes_threshold": 121000000, "cumulative_training_bytes": 121001712, "metrics": { "loss": 0.4901183884817312, "ce_loss": 0.4801183980184744, "lb_loss": 0.9999999977088798 } }, { "checkpoint_type": "bytes", "bytes_threshold": 122000000, "cumulative_training_bytes": 122006493, "metrics": { "loss": 0.4901035447362709, "ce_loss": 0.48010355427301404, "lb_loss": 0.9999999979146066 } }, { "checkpoint_type": "bytes", "bytes_threshold": 123000000, "cumulative_training_bytes": 123005264, "metrics": { "loss": 0.4900173131754493, "ce_loss": 0.48001732271219244, "lb_loss": 0.999999997643499 } }, { "checkpoint_type": "bytes", "bytes_threshold": 124000000, "cumulative_training_bytes": 124002472, "metrics": { "loss": 0.4899558301349942, "ce_loss": 0.4799558396717374, "lb_loss": 0.9999999973275739 } }, { "checkpoint_type": "bytes", "bytes_threshold": 125000000, "cumulative_training_bytes": 125002266, "metrics": { "loss": 0.4899716454722821, "ce_loss": 0.4799716550090253, "lb_loss": 0.9999999974024794 } }, { "checkpoint_type": "bytes", "bytes_threshold": 126000000, "cumulative_training_bytes": 126000850, "metrics": { "loss": 0.4900195558162127, "ce_loss": 0.48001956535295587, "lb_loss": 0.9999999972346062 } }, { "checkpoint_type": "bytes", "bytes_threshold": 127000000, "cumulative_training_bytes": 127007464, "metrics": { "loss": 0.49008660731108294, "ce_loss": 0.4800866168478261, "lb_loss": 0.9999999973796992 } }, { "checkpoint_type": "bytes", "bytes_threshold": 128000000, "cumulative_training_bytes": 128005894, "metrics": { "loss": 0.49008085990119593, "ce_loss": 0.4800808694379391, "lb_loss": 0.9999999971523776 } }, { "checkpoint_type": "bytes", "bytes_threshold": 129000000, "cumulative_training_bytes": 129003149, "metrics": { "loss": 0.4900173022530296, "ce_loss": 0.48001731178977275, "lb_loss": 0.9999999971552329 } }, { "checkpoint_type": "bytes", "bytes_threshold": 130000000, "cumulative_training_bytes": 130006682, "metrics": { "loss": 0.48996655758420143, "ce_loss": 0.4799665671209446, "lb_loss": 0.9999999974084937 } }, { "checkpoint_type": "bytes", "bytes_threshold": 131000000, "cumulative_training_bytes": 131001772, "metrics": { "loss": 0.4899122611669685, "ce_loss": 0.4799122707037117, "lb_loss": 0.9999999974551975 } }, { "checkpoint_type": "bytes", "bytes_threshold": 132000000, "cumulative_training_bytes": 132002042, "metrics": { "loss": 0.48989445976104484, "ce_loss": 0.479894469297788, "lb_loss": 0.9999999973630667 } }, { "checkpoint_type": "bytes", "bytes_threshold": 133000000, "cumulative_training_bytes": 133001074, "metrics": { "loss": 0.48980843977248073, "ce_loss": 0.4798084493092239, "lb_loss": 0.9999999974206035 } }, { "checkpoint_type": "bytes", "bytes_threshold": 134000000, "cumulative_training_bytes": 134006568, "metrics": { "loss": 0.48973665680008643, "ce_loss": 0.4797366663368296, "lb_loss": 0.9999999974284954 } }, { "checkpoint_type": "bytes", "bytes_threshold": 135000000, "cumulative_training_bytes": 135007246, "metrics": { "loss": 0.4897220016997538, "ce_loss": 0.47972201123649694, "lb_loss": 0.9999999975164732 } }, { "checkpoint_type": "bytes", "bytes_threshold": 136000000, "cumulative_training_bytes": 136007771, "metrics": { "loss": 0.4896772011876218, "ce_loss": 0.479677210724365, "lb_loss": 0.9999999976561861 } }, { "checkpoint_type": "bytes", "bytes_threshold": 137000000, "cumulative_training_bytes": 137003574, "metrics": { "loss": 0.4896326092649097, "ce_loss": 0.47963261880165287, "lb_loss": 0.9999999975807848 } }, { "checkpoint_type": "bytes", "bytes_threshold": 138000000, "cumulative_training_bytes": 138005969, "metrics": { "loss": 0.48960041247276187, "ce_loss": 0.47960042200950503, "lb_loss": 0.9999999974772783 } }, { "checkpoint_type": "bytes", "bytes_threshold": 139000000, "cumulative_training_bytes": 139005934, "metrics": { "loss": 0.48958851661535074, "ce_loss": 0.4795885261520939, "lb_loss": 0.999999997409856 } }, { "checkpoint_type": "bytes", "bytes_threshold": 140000000, "cumulative_training_bytes": 140005424, "metrics": { "loss": 0.48959421686784743, "ce_loss": 0.4795942264045906, "lb_loss": 0.9999999973352497 } }, { "checkpoint_type": "bytes", "bytes_threshold": 141000000, "cumulative_training_bytes": 141006781, "metrics": { "loss": 0.4895374967457162, "ce_loss": 0.4795375062824594, "lb_loss": 0.9999999973438037 } }, { "checkpoint_type": "bytes", "bytes_threshold": 142000000, "cumulative_training_bytes": 142002589, "metrics": { "loss": 0.48945935367853804, "ce_loss": 0.4794593632152812, "lb_loss": 0.9999999972538276 } }, { "epoch": 3, "checkpoint_type": "epoch", "metrics": { "loss": 0.48934782746518485, "ce_loss": 0.479347837001928, "lb_loss": 0.9999999973377103, "training_bytes": 47653416 }, "cumulative_training_bytes": 142960191, "training_bytes_this_epoch": 47653416 }, { "checkpoint_type": "bytes", "bytes_threshold": 143000000, "cumulative_training_bytes": 143005811, "metrics": { "loss": 0.4911197821299235, "ce_loss": 0.4811197916666667, "lb_loss": 0.9999999503294627 } }, { "checkpoint_type": "bytes", "bytes_threshold": 144000000, "cumulative_training_bytes": 144005584, "metrics": { "loss": 0.4834604684044333, "ce_loss": 0.47346047794117646, "lb_loss": 0.9999999938642278 } }, { "checkpoint_type": "bytes", "bytes_threshold": 145000000, "cumulative_training_bytes": 145002597, "metrics": { "loss": 0.4844184585083696, "ce_loss": 0.4744184680451128, "lb_loss": 0.9999999959666029 } }, { "checkpoint_type": "bytes", "bytes_threshold": 146000000, "cumulative_training_bytes": 146003208, "metrics": { "loss": 0.4850127817101082, "ce_loss": 0.47501279124685136, "lb_loss": 0.9999999965468341 } }, { "checkpoint_type": "bytes", "bytes_threshold": 147000000, "cumulative_training_bytes": 147007482, "metrics": { "loss": 0.48537507924166595, "ce_loss": 0.4753750887784091, "lb_loss": 0.999999994807171 } }, { "checkpoint_type": "bytes", "bytes_threshold": 148000000, "cumulative_training_bytes": 148004641, "metrics": { "loss": 0.48546422868514133, "ce_loss": 0.4754642382218845, "lb_loss": 0.9999999916662199 } }, { "checkpoint_type": "bytes", "bytes_threshold": 149000000, "cumulative_training_bytes": 149002815, "metrics": { "loss": 0.48531080502543955, "ce_loss": 0.4753108145621827, "lb_loss": 0.9999999919064759 } }, { "checkpoint_type": "bytes", "bytes_threshold": 150000000, "cumulative_training_bytes": 150002243, "metrics": { "loss": 0.48544034682887166, "ce_loss": 0.4754403563656148, "lb_loss": 0.9999999926061703 } }, { "checkpoint_type": "bytes", "bytes_threshold": 151000000, "cumulative_training_bytes": 151002455, "metrics": { "loss": 0.48702938034420923, "ce_loss": 0.4770293898809524, "lb_loss": 0.9999999929609753 } }, { "checkpoint_type": "bytes", "bytes_threshold": 152000000, "cumulative_training_bytes": 152006519, "metrics": { "loss": 0.487319925899328, "ce_loss": 0.47731993543607115, "lb_loss": 0.9999999927323718 } }, { "checkpoint_type": "bytes", "bytes_threshold": 153000000, "cumulative_training_bytes": 153001757, "metrics": { "loss": 0.4873133485868266, "ce_loss": 0.4773133581235698, "lb_loss": 0.9999999942259421 } }, { "checkpoint_type": "bytes", "bytes_threshold": 154000000, "cumulative_training_bytes": 154001996, "metrics": { "loss": 0.4871629306957343, "ce_loss": 0.47716294023247746, "lb_loss": 0.9999999934645845 } }, { "checkpoint_type": "bytes", "bytes_threshold": 155000000, "cumulative_training_bytes": 155007554, "metrics": { "loss": 0.487115378901552, "ce_loss": 0.47711538843829515, "lb_loss": 0.9999999934404558 } }, { "checkpoint_type": "bytes", "bytes_threshold": 156000000, "cumulative_training_bytes": 156006842, "metrics": { "loss": 0.4871095489629632, "ce_loss": 0.4771095584997064, "lb_loss": 0.999999993630038 } }, { "checkpoint_type": "bytes", "bytes_threshold": 157000000, "cumulative_training_bytes": 157000485, "metrics": { "loss": 0.4868759286455712, "ce_loss": 0.4768759381823144, "lb_loss": 0.9999999938833661 } }, { "checkpoint_type": "bytes", "bytes_threshold": 158000000, "cumulative_training_bytes": 158002706, "metrics": { "loss": 0.4868062579747189, "ce_loss": 0.47680626751146205, "lb_loss": 0.9999999946255619 } }, { "checkpoint_type": "bytes", "bytes_threshold": 159000000, "cumulative_training_bytes": 159005682, "metrics": { "loss": 0.48680872971827116, "ce_loss": 0.4768087392550143, "lb_loss": 0.9999999952748945 } }, { "checkpoint_type": "bytes", "bytes_threshold": 160000000, "cumulative_training_bytes": 160001107, "metrics": { "loss": 0.4867460350338504, "ce_loss": 0.47674604457059355, "lb_loss": 0.9999999951758831 } }, { "checkpoint_type": "bytes", "bytes_threshold": 161000000, "cumulative_training_bytes": 161002648, "metrics": { "loss": 0.4867391425333205, "ce_loss": 0.4767391520700637, "lb_loss": 0.9999999951405131 } }, { "checkpoint_type": "bytes", "bytes_threshold": 162000000, "cumulative_training_bytes": 162004313, "metrics": { "loss": 0.4866803379089527, "ce_loss": 0.4766803474456959, "lb_loss": 0.9999999951088706 } }, { "checkpoint_type": "bytes", "bytes_threshold": 163000000, "cumulative_training_bytes": 163005127, "metrics": { "loss": 0.48664981015183156, "ce_loss": 0.4766498196885747, "lb_loss": 0.9999999953081556 } }, { "checkpoint_type": "bytes", "bytes_threshold": 164000000, "cumulative_training_bytes": 164000917, "metrics": { "loss": 0.4866300357399223, "ce_loss": 0.47663004527666547, "lb_loss": 0.9999999955301941 } }, { "checkpoint_type": "bytes", "bytes_threshold": 165000000, "cumulative_training_bytes": 165003825, "metrics": { "loss": 0.48657877781558484, "ce_loss": 0.476578787352328, "lb_loss": 0.9999999957129391 } }, { "checkpoint_type": "bytes", "bytes_threshold": 166000000, "cumulative_training_bytes": 166000316, "metrics": { "loss": 0.4866410308836623, "ce_loss": 0.47664104042040545, "lb_loss": 0.9999999958599632 } }, { "checkpoint_type": "bytes", "bytes_threshold": 167000000, "cumulative_training_bytes": 167004464, "metrics": { "loss": 0.4865475621193078, "ce_loss": 0.47654757165605094, "lb_loss": 0.9999999961845435 } }, { "checkpoint_type": "bytes", "bytes_threshold": 168000000, "cumulative_training_bytes": 168000312, "metrics": { "loss": 0.4865374044540825, "ce_loss": 0.47653741399082566, "lb_loss": 0.9999999960081293 } }, { "checkpoint_type": "bytes", "bytes_threshold": 169000000, "cumulative_training_bytes": 169004041, "metrics": { "loss": 0.48646658576610896, "ce_loss": 0.4764665953028521, "lb_loss": 0.9999999961969398 } }, { "checkpoint_type": "bytes", "bytes_threshold": 170000000, "cumulative_training_bytes": 170004115, "metrics": { "loss": 0.4864624011017053, "ce_loss": 0.47646241063844846, "lb_loss": 0.9999999961523616 } }, { "checkpoint_type": "bytes", "bytes_threshold": 171000000, "cumulative_training_bytes": 171002131, "metrics": { "loss": 0.4865150353376767, "ce_loss": 0.47651504487441987, "lb_loss": 0.9999999958831627 } }, { "checkpoint_type": "bytes", "bytes_threshold": 172000000, "cumulative_training_bytes": 172000593, "metrics": { "loss": 0.48652182180880243, "ce_loss": 0.4765218313455456, "lb_loss": 0.9999999957268151 } }, { "checkpoint_type": "bytes", "bytes_threshold": 173000000, "cumulative_training_bytes": 173003618, "metrics": { "loss": 0.48656199285179186, "ce_loss": 0.476562002388535, "lb_loss": 0.999999995732763 } }, { "checkpoint_type": "bytes", "bytes_threshold": 174000000, "cumulative_training_bytes": 174000074, "metrics": { "loss": 0.4865022831574615, "ce_loss": 0.4765022926942047, "lb_loss": 0.9999999959283633 } }, { "checkpoint_type": "bytes", "bytes_threshold": 175000000, "cumulative_training_bytes": 175004955, "metrics": { "loss": 0.4864411783514556, "ce_loss": 0.4764411878881988, "lb_loss": 0.9999999958849158 } }, { "checkpoint_type": "bytes", "bytes_threshold": 176000000, "cumulative_training_bytes": 176002879, "metrics": { "loss": 0.4863869083270196, "ce_loss": 0.47638691786376275, "lb_loss": 0.9999999958569524 } }, { "checkpoint_type": "bytes", "bytes_threshold": 177000000, "cumulative_training_bytes": 177007060, "metrics": { "loss": 0.48635870139197285, "ce_loss": 0.476358710928716, "lb_loss": 0.9999999955903017 } }, { "checkpoint_type": "bytes", "bytes_threshold": 178000000, "cumulative_training_bytes": 178007425, "metrics": { "loss": 0.4863939705036675, "ce_loss": 0.47639398004041067, "lb_loss": 0.9999999956123273 } }, { "checkpoint_type": "bytes", "bytes_threshold": 179000000, "cumulative_training_bytes": 179000051, "metrics": { "loss": 0.4864388644543359, "ce_loss": 0.47643887399107904, "lb_loss": 0.9999999957081618 } }, { "checkpoint_type": "bytes", "bytes_threshold": 180000000, "cumulative_training_bytes": 180003883, "metrics": { "loss": 0.48640507803300265, "ce_loss": 0.4764050875697458, "lb_loss": 0.9999999957381263 } }, { "checkpoint_type": "bytes", "bytes_threshold": 181000000, "cumulative_training_bytes": 181006010, "metrics": { "loss": 0.48639232886868944, "ce_loss": 0.4763923384054326, "lb_loss": 0.9999999955626321 } }, { "checkpoint_type": "bytes", "bytes_threshold": 182000000, "cumulative_training_bytes": 182004597, "metrics": { "loss": 0.4863602845809039, "ce_loss": 0.4763602941176471, "lb_loss": 0.9999999954653721 } }, { "checkpoint_type": "bytes", "bytes_threshold": 183000000, "cumulative_training_bytes": 183005222, "metrics": { "loss": 0.48625701125675586, "ce_loss": 0.476257020793499, "lb_loss": 0.999999995703451 } }, { "checkpoint_type": "bytes", "bytes_threshold": 184000000, "cumulative_training_bytes": 184003606, "metrics": { "loss": 0.4863026811115777, "ce_loss": 0.47630269064832087, "lb_loss": 0.9999999957186962 } }, { "checkpoint_type": "bytes", "bytes_threshold": 185000000, "cumulative_training_bytes": 185005739, "metrics": { "loss": 0.48644190962643297, "ce_loss": 0.47644191916317613, "lb_loss": 0.9999999957339964 } }, { "checkpoint_type": "bytes", "bytes_threshold": 186000000, "cumulative_training_bytes": 186001058, "metrics": { "loss": 0.48650585296548066, "ce_loss": 0.4765058625022238, "lb_loss": 0.999999995779639 } }, { "checkpoint_type": "bytes", "bytes_threshold": 187000000, "cumulative_training_bytes": 187002105, "metrics": { "loss": 0.4865529829006699, "ce_loss": 0.47655299243741306, "lb_loss": 0.9999999958343068 } }, { "checkpoint_type": "bytes", "bytes_threshold": 188000000, "cumulative_training_bytes": 188006850, "metrics": { "loss": 0.4865126912536699, "ce_loss": 0.4765127007904131, "lb_loss": 0.9999999957649598 } }, { "checkpoint_type": "bytes", "bytes_threshold": 189000000, "cumulative_training_bytes": 189001113, "metrics": { "loss": 0.4865072715625417, "ce_loss": 0.47650728109928486, "lb_loss": 0.9999999957573943 } }, { "checkpoint_type": "bytes", "bytes_threshold": 190000000, "cumulative_training_bytes": 190003438, "metrics": { "loss": 0.4864817460378011, "ce_loss": 0.47648175557454425, "lb_loss": 0.9999999960709829 } }, { "epoch": 4, "checkpoint_type": "epoch", "metrics": { "loss": 0.48646113130917584, "ce_loss": 0.476461140845919, "lb_loss": 0.9999999960640249, "training_bytes": 47653407 }, "cumulative_training_bytes": 190613598, "training_bytes_this_epoch": 47653407 }, { "checkpoint_type": "bytes", "bytes_threshold": 191000000, "cumulative_training_bytes": 191004593, "metrics": { "loss": 0.4838434340907078, "ce_loss": 0.47384344362745096, "lb_loss": 0.9999999871440962 } }, { "checkpoint_type": "bytes", "bytes_threshold": 192000000, "cumulative_training_bytes": 192004632, "metrics": { "loss": 0.4837293860676524, "ce_loss": 0.4737293956043956, "lb_loss": 0.9999999944325332 } }, { "checkpoint_type": "bytes", "bytes_threshold": 193000000, "cumulative_training_bytes": 193001112, "metrics": { "loss": 0.48330102211389786, "ce_loss": 0.473301031650641, "lb_loss": 0.9999999961791894 } }, { "checkpoint_type": "bytes", "bytes_threshold": 194000000, "cumulative_training_bytes": 194004855, "metrics": { "loss": 0.4841243908582907, "ce_loss": 0.47412440039503384, "lb_loss": 0.9999999960981159 } }, { "checkpoint_type": "bytes", "bytes_threshold": 195000000, "cumulative_training_bytes": 195000605, "metrics": { "loss": 0.48411852776692177, "ce_loss": 0.47411853730366493, "lb_loss": 0.9999999941747643 } }, { "checkpoint_type": "bytes", "bytes_threshold": 196000000, "cumulative_training_bytes": 196000298, "metrics": { "loss": 0.4837925535855897, "ce_loss": 0.47379256312233287, "lb_loss": 0.9999999937258268 } }, { "checkpoint_type": "bytes", "bytes_threshold": 197000000, "cumulative_training_bytes": 197005504, "metrics": { "loss": 0.4837077430112185, "ce_loss": 0.47370775254796166, "lb_loss": 0.9999999942825281 } }, { "checkpoint_type": "bytes", "bytes_threshold": 198000000, "cumulative_training_bytes": 198005978, "metrics": { "loss": 0.48402241598139156, "ce_loss": 0.4740224255181347, "lb_loss": 0.9999999936380535 } }, { "checkpoint_type": "bytes", "bytes_threshold": 199000000, "cumulative_training_bytes": 199002040, "metrics": { "loss": 0.4838441685454486, "ce_loss": 0.47384417808219176, "lb_loss": 0.9999999936312846 } }, { "checkpoint_type": "bytes", "bytes_threshold": 200000000, "cumulative_training_bytes": 200003598, "metrics": { "loss": 0.48375069142165816, "ce_loss": 0.4737507009584013, "lb_loss": 0.999999993242214 } }, { "checkpoint_type": "bytes", "bytes_threshold": 201000000, "cumulative_training_bytes": 201005092, "metrics": { "loss": 0.48378752730924063, "ce_loss": 0.4737875368459838, "lb_loss": 0.9999999931918055 } }, { "checkpoint_type": "bytes", "bytes_threshold": 202000000, "cumulative_training_bytes": 202001683, "metrics": { "loss": 0.4836702367645346, "ce_loss": 0.47367024630127774, "lb_loss": 0.9999999932258339 } }, { "checkpoint_type": "bytes", "bytes_threshold": 203000000, "cumulative_training_bytes": 203001980, "metrics": { "loss": 0.4837692078612791, "ce_loss": 0.47376921739802225, "lb_loss": 0.9999999935164292 } }, { "checkpoint_type": "bytes", "bytes_threshold": 204000000, "cumulative_training_bytes": 204006015, "metrics": { "loss": 0.4837763001402151, "ce_loss": 0.4737763096769583, "lb_loss": 0.9999999933204629 } }, { "checkpoint_type": "bytes", "bytes_threshold": 205000000, "cumulative_training_bytes": 205004293, "metrics": { "loss": 0.483796516208334, "ce_loss": 0.4737965257450772, "lb_loss": 0.9999999935288199 } }, { "checkpoint_type": "bytes", "bytes_threshold": 206000000, "cumulative_training_bytes": 206000909, "metrics": { "loss": 0.48376647876589496, "ce_loss": 0.4737664883026381, "lb_loss": 0.9999999937398805 } }, { "checkpoint_type": "bytes", "bytes_threshold": 207000000, "cumulative_training_bytes": 207000656, "metrics": { "loss": 0.4837419186423125, "ce_loss": 0.47374192817905564, "lb_loss": 0.9999999936744954 } }, { "checkpoint_type": "bytes", "bytes_threshold": 208000000, "cumulative_training_bytes": 208006392, "metrics": { "loss": 0.48362419888836694, "ce_loss": 0.4736242084251101, "lb_loss": 0.9999999941183082 } }, { "checkpoint_type": "bytes", "bytes_threshold": 209000000, "cumulative_training_bytes": 209002552, "metrics": { "loss": 0.4836824448903402, "ce_loss": 0.47368245442708334, "lb_loss": 0.9999999945114056 } }, { "checkpoint_type": "bytes", "bytes_threshold": 210000000, "cumulative_training_bytes": 210001732, "metrics": { "loss": 0.48379524184610945, "ce_loss": 0.4737952513828526, "lb_loss": 0.9999999946306365 } }, { "checkpoint_type": "bytes", "bytes_threshold": 211000000, "cumulative_training_bytes": 211005793, "metrics": { "loss": 0.48375900037122077, "ce_loss": 0.47375900990796393, "lb_loss": 0.9999999952083418 } }, { "checkpoint_type": "bytes", "bytes_threshold": 212000000, "cumulative_training_bytes": 212001830, "metrics": { "loss": 0.48375242487407344, "ce_loss": 0.4737524344108166, "lb_loss": 0.9999999954314491 } }, { "checkpoint_type": "bytes", "bytes_threshold": 213000000, "cumulative_training_bytes": 213002388, "metrics": { "loss": 0.48380419419230236, "ce_loss": 0.4738042037290455, "lb_loss": 0.9999999950856245 } }, { "checkpoint_type": "bytes", "bytes_threshold": 214000000, "cumulative_training_bytes": 214006969, "metrics": { "loss": 0.4837882087834926, "ce_loss": 0.47378821832023577, "lb_loss": 0.9999999953159415 } }, { "checkpoint_type": "bytes", "bytes_threshold": 215000000, "cumulative_training_bytes": 215004172, "metrics": { "loss": 0.483773889254086, "ce_loss": 0.47377389879082915, "lb_loss": 0.9999999951327866 } }, { "checkpoint_type": "bytes", "bytes_threshold": 216000000, "cumulative_training_bytes": 216001255, "metrics": { "loss": 0.4837848567502212, "ce_loss": 0.47378486628696437, "lb_loss": 0.9999999949999725 } }, { "checkpoint_type": "bytes", "bytes_threshold": 217000000, "cumulative_training_bytes": 217004188, "metrics": { "loss": 0.4841484393021538, "ce_loss": 0.47414844883889695, "lb_loss": 0.9999999951381988 } }, { "checkpoint_type": "bytes", "bytes_threshold": 218000000, "cumulative_training_bytes": 218001725, "metrics": { "loss": 0.4844258503625857, "ce_loss": 0.47442585989932884, "lb_loss": 0.9999999952662978 } }, { "checkpoint_type": "bytes", "bytes_threshold": 219000000, "cumulative_training_bytes": 219003579, "metrics": { "loss": 0.48454192548483765, "ce_loss": 0.4745419350215808, "lb_loss": 0.9999999954175011 } }, { "checkpoint_type": "bytes", "bytes_threshold": 220000000, "cumulative_training_bytes": 220002944, "metrics": { "loss": 0.4846480412058311, "ce_loss": 0.47464805074257427, "lb_loss": 0.9999999953409605 } }, { "checkpoint_type": "bytes", "bytes_threshold": 221000000, "cumulative_training_bytes": 221001782, "metrics": { "loss": 0.48471910264516666, "ce_loss": 0.4747191121819098, "lb_loss": 0.9999999951493322 } }, { "checkpoint_type": "bytes", "bytes_threshold": 222000000, "cumulative_training_bytes": 222000475, "metrics": { "loss": 0.4846779797594266, "ce_loss": 0.4746779892961698, "lb_loss": 0.9999999951286762 } }, { "checkpoint_type": "bytes", "bytes_threshold": 223000000, "cumulative_training_bytes": 223000890, "metrics": { "loss": 0.484692938897063, "ce_loss": 0.47469294843380616, "lb_loss": 0.9999999951386283 } }, { "checkpoint_type": "bytes", "bytes_threshold": 224000000, "cumulative_training_bytes": 224000466, "metrics": { "loss": 0.484730315864633, "ce_loss": 0.47473032540137616, "lb_loss": 0.9999999953656021 } }, { "checkpoint_type": "bytes", "bytes_threshold": 225000000, "cumulative_training_bytes": 225004782, "metrics": { "loss": 0.4847559259453321, "ce_loss": 0.4747559354820753, "lb_loss": 0.999999995487513 } }, { "checkpoint_type": "bytes", "bytes_threshold": 226000000, "cumulative_training_bytes": 226004131, "metrics": { "loss": 0.48467487820400557, "ce_loss": 0.47467488774074873, "lb_loss": 0.9999999957305481 } }, { "checkpoint_type": "bytes", "bytes_threshold": 227000000, "cumulative_training_bytes": 227007102, "metrics": { "loss": 0.48469074567159015, "ce_loss": 0.4746907552083333, "lb_loss": 0.9999999959736762 } }, { "checkpoint_type": "bytes", "bytes_threshold": 228000000, "cumulative_training_bytes": 228004925, "metrics": { "loss": 0.48469536216098363, "ce_loss": 0.4746953716977268, "lb_loss": 0.9999999959108016 } }, { "checkpoint_type": "bytes", "bytes_threshold": 229000000, "cumulative_training_bytes": 229001388, "metrics": { "loss": 0.4846923528211264, "ce_loss": 0.47469236235786955, "lb_loss": 0.9999999959217248 } }, { "checkpoint_type": "bytes", "bytes_threshold": 230000000, "cumulative_training_bytes": 230000104, "metrics": { "loss": 0.4846553168778009, "ce_loss": 0.47465532641454405, "lb_loss": 0.9999999958393803 } }, { "checkpoint_type": "bytes", "bytes_threshold": 231000000, "cumulative_training_bytes": 231004256, "metrics": { "loss": 0.4846512128513873, "ce_loss": 0.47465122238813046, "lb_loss": 0.9999999958523124 } }, { "checkpoint_type": "bytes", "bytes_threshold": 232000000, "cumulative_training_bytes": 232007293, "metrics": { "loss": 0.484631408132082, "ce_loss": 0.47463141766882516, "lb_loss": 0.9999999958756453 } }, { "checkpoint_type": "bytes", "bytes_threshold": 233000000, "cumulative_training_bytes": 233002626, "metrics": { "loss": 0.48464676948313035, "ce_loss": 0.4746467790198735, "lb_loss": 0.9999999958325207 } }, { "checkpoint_type": "bytes", "bytes_threshold": 234000000, "cumulative_training_bytes": 234002992, "metrics": { "loss": 0.4846627954623744, "ce_loss": 0.47466280499911756, "lb_loss": 0.9999999956448424 } }, { "checkpoint_type": "bytes", "bytes_threshold": 235000000, "cumulative_training_bytes": 235001903, "metrics": { "loss": 0.48463665597990335, "ce_loss": 0.4746366655166465, "lb_loss": 0.999999995661004 } }, { "checkpoint_type": "bytes", "bytes_threshold": 236000000, "cumulative_training_bytes": 236005937, "metrics": { "loss": 0.4846179318009761, "ce_loss": 0.4746179413377193, "lb_loss": 0.999999995596013 } }, { "checkpoint_type": "bytes", "bytes_threshold": 237000000, "cumulative_training_bytes": 237000271, "metrics": { "loss": 0.4846171031654688, "ce_loss": 0.47461711270221196, "lb_loss": 0.9999999953855103 } }, { "checkpoint_type": "bytes", "bytes_threshold": 238000000, "cumulative_training_bytes": 238005016, "metrics": { "loss": 0.48460431618227445, "ce_loss": 0.4746043257190176, "lb_loss": 0.9999999954639218 } }, { "epoch": 5, "checkpoint_type": "epoch", "metrics": { "loss": 0.4846178382098828, "ce_loss": 0.47461784774662596, "lb_loss": 0.9999999954128174, "training_bytes": 47653413 }, "cumulative_training_bytes": 238267011, "training_bytes_this_epoch": 47653413 }, { "checkpoint_type": "bytes", "bytes_threshold": 239000000, "cumulative_training_bytes": 239004387, "metrics": { "loss": 0.4854953190715043, "ce_loss": 0.47549532860824745, "lb_loss": 0.9999999821800547 } }, { "checkpoint_type": "bytes", "bytes_threshold": 240000000, "cumulative_training_bytes": 240001325, "metrics": { "loss": 0.4832241094059881, "ce_loss": 0.47322411894273125, "lb_loss": 0.9999999915975831 } }, { "checkpoint_type": "bytes", "bytes_threshold": 241000000, "cumulative_training_bytes": 241000019, "metrics": { "loss": 0.483070870351525, "ce_loss": 0.4730708798882682, "lb_loss": 0.9999999950051973 } }, { "checkpoint_type": "bytes", "bytes_threshold": 242000000, "cumulative_training_bytes": 242004419, "metrics": { "loss": 0.48300372896018934, "ce_loss": 0.4730037384969325, "lb_loss": 0.9999999974402913 } }, { "checkpoint_type": "bytes", "bytes_threshold": 243000000, "cumulative_training_bytes": 243002073, "metrics": { "loss": 0.48269094866196444, "ce_loss": 0.4726909581987076, "lb_loss": 0.9999999974964123 } }, { "checkpoint_type": "bytes", "bytes_threshold": 244000000, "cumulative_training_bytes": 244003061, "metrics": { "loss": 0.48260155296325685, "ce_loss": 0.4726015625, "lb_loss": 0.9999999978542328 } }, { "checkpoint_type": "bytes", "bytes_threshold": 245000000, "cumulative_training_bytes": 245004215, "metrics": { "loss": 0.48258308141104345, "ce_loss": 0.4725830909477866, "lb_loss": 0.9999999963465939 } }, { "checkpoint_type": "bytes", "bytes_threshold": 246000000, "cumulative_training_bytes": 246005003, "metrics": { "loss": 0.48249219340298016, "ce_loss": 0.4724922029397233, "lb_loss": 0.9999999967017192 } }, { "checkpoint_type": "bytes", "bytes_threshold": 247000000, "cumulative_training_bytes": 247006730, "metrics": { "loss": 0.4825161212594073, "ce_loss": 0.47251613079615046, "lb_loss": 0.9999999975490653 } }, { "checkpoint_type": "bytes", "bytes_threshold": 248000000, "cumulative_training_bytes": 248005797, "metrics": { "loss": 0.4824859365551657, "ce_loss": 0.47248594609190886, "lb_loss": 0.9999999967692691 } }, { "checkpoint_type": "bytes", "bytes_threshold": 249000000, "cumulative_training_bytes": 249006395, "metrics": { "loss": 0.48267154272465285, "ce_loss": 0.472671552261396, "lb_loss": 0.9999999966461774 } }, { "checkpoint_type": "bytes", "bytes_threshold": 250000000, "cumulative_training_bytes": 250006518, "metrics": { "loss": 0.4824703498178853, "ce_loss": 0.47247035935462844, "lb_loss": 0.9999999973578123 } }, { "checkpoint_type": "bytes", "bytes_threshold": 251000000, "cumulative_training_bytes": 251001307, "metrics": { "loss": 0.482704364336454, "ce_loss": 0.47270437387319714, "lb_loss": 0.9999999973851328 } }, { "checkpoint_type": "bytes", "bytes_threshold": 252000000, "cumulative_training_bytes": 252005393, "metrics": { "loss": 0.48272043788665514, "ce_loss": 0.4727204474233983, "lb_loss": 0.9999999973767315 } }, { "checkpoint_type": "bytes", "bytes_threshold": 253000000, "cumulative_training_bytes": 253007637, "metrics": { "loss": 0.4827677896584801, "ce_loss": 0.47276779919522327, "lb_loss": 0.9999999973075784 } }, { "checkpoint_type": "bytes", "bytes_threshold": 254000000, "cumulative_training_bytes": 254001252, "metrics": { "loss": 0.48291178065051366, "ce_loss": 0.4729117901872568, "lb_loss": 0.9999999969559885 } }, { "checkpoint_type": "bytes", "bytes_threshold": 255000000, "cumulative_training_bytes": 255003892, "metrics": { "loss": 0.4829045114966359, "ce_loss": 0.47290452103337904, "lb_loss": 0.9999999969475445 } }, { "checkpoint_type": "bytes", "bytes_threshold": 256000000, "cumulative_training_bytes": 256002122, "metrics": { "loss": 0.48277088224141823, "ce_loss": 0.4727708917781614, "lb_loss": 0.9999999973760579 } }, { "checkpoint_type": "bytes", "bytes_threshold": 257000000, "cumulative_training_bytes": 257002914, "metrics": { "loss": 0.4828110224281261, "ce_loss": 0.4728110319648693, "lb_loss": 0.9999999978816977 } }, { "checkpoint_type": "bytes", "bytes_threshold": 258000000, "cumulative_training_bytes": 258007111, "metrics": { "loss": 0.48270698081804553, "ce_loss": 0.4727069903547887, "lb_loss": 0.9999999975501774 } }, { "checkpoint_type": "bytes", "bytes_threshold": 259000000, "cumulative_training_bytes": 259001323, "metrics": { "loss": 0.4827247332188862, "ce_loss": 0.47272474275562937, "lb_loss": 0.9999999972056884 } }, { "checkpoint_type": "bytes", "bytes_threshold": 260000000, "cumulative_training_bytes": 260005225, "metrics": { "loss": 0.48267412118508785, "ce_loss": 0.472674130721831, "lb_loss": 0.9999999975654441 } }, { "checkpoint_type": "bytes", "bytes_threshold": 261000000, "cumulative_training_bytes": 261001447, "metrics": { "loss": 0.48262796289591675, "ce_loss": 0.4726279724326599, "lb_loss": 0.999999997290698 } }, { "checkpoint_type": "bytes", "bytes_threshold": 262000000, "cumulative_training_bytes": 262002819, "metrics": { "loss": 0.48269655004081247, "ce_loss": 0.47269655957755563, "lb_loss": 0.9999999973474876 } }, { "checkpoint_type": "bytes", "bytes_threshold": 263000000, "cumulative_training_bytes": 263000855, "metrics": { "loss": 0.4826931146895026, "ce_loss": 0.47269312422624576, "lb_loss": 0.9999999976571371 } }, { "checkpoint_type": "bytes", "bytes_threshold": 264000000, "cumulative_training_bytes": 264003342, "metrics": { "loss": 0.48269632542012775, "ce_loss": 0.4726963349568709, "lb_loss": 0.9999999976243241 } }, { "checkpoint_type": "bytes", "bytes_threshold": 265000000, "cumulative_training_bytes": 265002459, "metrics": { "loss": 0.48277645857805784, "ce_loss": 0.472776468114801, "lb_loss": 0.9999999977475484 } }, { "checkpoint_type": "bytes", "bytes_threshold": 266000000, "cumulative_training_bytes": 266001589, "metrics": { "loss": 0.4827284785946397, "ce_loss": 0.47272848813138285, "lb_loss": 0.9999999978448224 } }, { "checkpoint_type": "bytes", "bytes_threshold": 267000000, "cumulative_training_bytes": 267007878, "metrics": { "loss": 0.48272075498110445, "ce_loss": 0.4727207645178476, "lb_loss": 0.9999999976659876 } }, { "checkpoint_type": "bytes", "bytes_threshold": 268000000, "cumulative_training_bytes": 268004701, "metrics": { "loss": 0.48267082352839585, "ce_loss": 0.472670833065139, "lb_loss": 0.9999999977287622 } }, { "checkpoint_type": "bytes", "bytes_threshold": 269000000, "cumulative_training_bytes": 269000341, "metrics": { "loss": 0.482630938395494, "ce_loss": 0.47263094793223714, "lb_loss": 0.9999999978171692 } }, { "checkpoint_type": "bytes", "bytes_threshold": 270000000, "cumulative_training_bytes": 270005290, "metrics": { "loss": 0.4826039372364776, "ce_loss": 0.47260394677322076, "lb_loss": 0.9999999979149159 } }, { "checkpoint_type": "bytes", "bytes_threshold": 271000000, "cumulative_training_bytes": 271001402, "metrics": { "loss": 0.4826388793521457, "ce_loss": 0.4726388888888889, "lb_loss": 0.9999999976436994 } }, { "checkpoint_type": "bytes", "bytes_threshold": 272000000, "cumulative_training_bytes": 272007043, "metrics": { "loss": 0.4826473747120085, "ce_loss": 0.4726473842487517, "lb_loss": 0.9999999977002293 } }, { "checkpoint_type": "bytes", "bytes_threshold": 273000000, "cumulative_training_bytes": 273004999, "metrics": { "loss": 0.4826480611873036, "ce_loss": 0.47264807072404674, "lb_loss": 0.9999999978585944 } }, { "checkpoint_type": "bytes", "bytes_threshold": 274000000, "cumulative_training_bytes": 274003293, "metrics": { "loss": 0.4827403403186553, "ce_loss": 0.4727403498553985, "lb_loss": 0.9999999979697647 } }, { "checkpoint_type": "bytes", "bytes_threshold": 275000000, "cumulative_training_bytes": 275006178, "metrics": { "loss": 0.4827058927345634, "ce_loss": 0.47270590227130654, "lb_loss": 0.999999997876142 } }, { "checkpoint_type": "bytes", "bytes_threshold": 276000000, "cumulative_training_bytes": 276000051, "metrics": { "loss": 0.4826930918783512, "ce_loss": 0.47269310141509435, "lb_loss": 0.9999999979442504 } }, { "checkpoint_type": "bytes", "bytes_threshold": 277000000, "cumulative_training_bytes": 277003305, "metrics": { "loss": 0.48266627628341496, "ce_loss": 0.4726662858201581, "lb_loss": 0.9999999977972196 } }, { "checkpoint_type": "bytes", "bytes_threshold": 278000000, "cumulative_training_bytes": 278006257, "metrics": { "loss": 0.48266338925443386, "ce_loss": 0.472663398791177, "lb_loss": 0.9999999979102205 } }, { "checkpoint_type": "bytes", "bytes_threshold": 279000000, "cumulative_training_bytes": 279003019, "metrics": { "loss": 0.48267459345141694, "ce_loss": 0.4726746029881601, "lb_loss": 0.9999999979052681 } }, { "checkpoint_type": "bytes", "bytes_threshold": 280000000, "cumulative_training_bytes": 280004197, "metrics": { "loss": 0.4827149918388988, "ce_loss": 0.472715001375642, "lb_loss": 0.9999999978790717 } }, { "checkpoint_type": "bytes", "bytes_threshold": 281000000, "cumulative_training_bytes": 281006273, "metrics": { "loss": 0.4827076661080715, "ce_loss": 0.47270767564481464, "lb_loss": 0.9999999981530354 } }, { "checkpoint_type": "bytes", "bytes_threshold": 282000000, "cumulative_training_bytes": 282004302, "metrics": { "loss": 0.4826935047070867, "ce_loss": 0.4726935142438299, "lb_loss": 0.999999998111598 } }, { "checkpoint_type": "bytes", "bytes_threshold": 283000000, "cumulative_training_bytes": 283002020, "metrics": { "loss": 0.4826445411007718, "ce_loss": 0.472644550637515, "lb_loss": 0.9999999981536127 } }, { "checkpoint_type": "bytes", "bytes_threshold": 284000000, "cumulative_training_bytes": 284001833, "metrics": { "loss": 0.4826856697701378, "ce_loss": 0.472685679306881, "lb_loss": 0.9999999981439036 } }, { "checkpoint_type": "bytes", "bytes_threshold": 285000000, "cumulative_training_bytes": 285006144, "metrics": { "loss": 0.48266743957449537, "ce_loss": 0.47266744911123854, "lb_loss": 0.9999999983399754 } }, { "epoch": 6, "checkpoint_type": "epoch", "metrics": { "loss": 0.4826675374587581, "ce_loss": 0.4726675469955013, "lb_loss": 0.9999999983528279, "training_bytes": 47653391 }, "cumulative_training_bytes": 285920402, "training_bytes_this_epoch": 47653391 }, { "checkpoint_type": "bytes", "bytes_threshold": 286000000, "cumulative_training_bytes": 286004101, "metrics": { "loss": 0.4783948768268932, "ce_loss": 0.46839488636363635, "lb_loss": 0.9999999837441877 } }, { "checkpoint_type": "bytes", "bytes_threshold": 287000000, "cumulative_training_bytes": 287000512, "metrics": { "loss": 0.4791101411724767, "ce_loss": 0.46911015070921985, "lb_loss": 1.0 } }, { "checkpoint_type": "bytes", "bytes_threshold": 288000000, "cumulative_training_bytes": 288007385, "metrics": { "loss": 0.47894315667204806, "ce_loss": 0.46894316620879123, "lb_loss": 0.9999999986900078 } }, { "checkpoint_type": "bytes", "bytes_threshold": 289000000, "cumulative_training_bytes": 289001067, "metrics": { "loss": 0.47977744083546525, "ce_loss": 0.4697774503722084, "lb_loss": 1.000000001331121 } }, { "checkpoint_type": "bytes", "bytes_threshold": 290000000, "cumulative_training_bytes": 290005553, "metrics": { "loss": 0.47966071728909954, "ce_loss": 0.4696607268258427, "lb_loss": 1.0000000011161918 } }, { "checkpoint_type": "bytes", "bytes_threshold": 291000000, "cumulative_training_bytes": 291003954, "metrics": { "loss": 0.479647133723799, "ce_loss": 0.4696471432605422, "lb_loss": 1.0000000008078942 } }, { "checkpoint_type": "bytes", "bytes_threshold": 292000000, "cumulative_training_bytes": 292001941, "metrics": { "loss": 0.47993072094184624, "ce_loss": 0.4699307304785894, "lb_loss": 0.9999999990991741 } }, { "checkpoint_type": "bytes", "bytes_threshold": 293000000, "cumulative_training_bytes": 293006561, "metrics": { "loss": 0.4797550580308244, "ce_loss": 0.4697550675675676, "lb_loss": 0.9999999986468134 } }, { "checkpoint_type": "bytes", "bytes_threshold": 294000000, "cumulative_training_bytes": 294002191, "metrics": { "loss": 0.4799922149893232, "ce_loss": 0.46999222452606637, "lb_loss": 0.9999999979660974 } }, { "checkpoint_type": "bytes", "bytes_threshold": 295000000, "cumulative_training_bytes": 295005131, "metrics": { "loss": 0.4803671647887206, "ce_loss": 0.47036717432546377, "lb_loss": 0.9999999961302212 } }, { "checkpoint_type": "bytes", "bytes_threshold": 296000000, "cumulative_training_bytes": 296006078, "metrics": { "loss": 0.48054443332582325, "ce_loss": 0.4705444428625664, "lb_loss": 0.9999999972392686 } }, { "checkpoint_type": "bytes", "bytes_threshold": 297000000, "cumulative_training_bytes": 297002743, "metrics": { "loss": 0.4806437253457724, "ce_loss": 0.47064373488251554, "lb_loss": 0.9999999974461037 } }, { "checkpoint_type": "bytes", "bytes_threshold": 298000000, "cumulative_training_bytes": 298000425, "metrics": { "loss": 0.4805099084245758, "ce_loss": 0.47050991796131897, "lb_loss": 0.9999999969763022 } }, { "checkpoint_type": "bytes", "bytes_threshold": 299000000, "cumulative_training_bytes": 299006190, "metrics": { "loss": 0.4805110048075191, "ce_loss": 0.4705110143442623, "lb_loss": 0.9999999977665707 } }, { "checkpoint_type": "bytes", "bytes_threshold": 300000000, "cumulative_training_bytes": 300006090, "metrics": { "loss": 0.4805480549684227, "ce_loss": 0.47054806450516584, "lb_loss": 0.9999999978284333 } }, { "checkpoint_type": "bytes", "bytes_threshold": 301000000, "cumulative_training_bytes": 301000170, "metrics": { "loss": 0.4807665994652883, "ce_loss": 0.4707666090020315, "lb_loss": 0.9999999979718074 } }, { "checkpoint_type": "bytes", "bytes_threshold": 302000000, "cumulative_training_bytes": 302000958, "metrics": { "loss": 0.4808426243918283, "ce_loss": 0.47084263392857145, "lb_loss": 0.9999999977009637 } }, { "checkpoint_type": "bytes", "bytes_threshold": 303000000, "cumulative_training_bytes": 303002933, "metrics": { "loss": 0.4808974649702044, "ce_loss": 0.4708974745069476, "lb_loss": 0.9999999980229746 } }, { "checkpoint_type": "bytes", "bytes_threshold": 304000000, "cumulative_training_bytes": 304005876, "metrics": { "loss": 0.48091397358137705, "ce_loss": 0.4709139831181202, "lb_loss": 0.9999999976531617 } }, { "checkpoint_type": "bytes", "bytes_threshold": 305000000, "cumulative_training_bytes": 305007296, "metrics": { "loss": 0.4807987339423901, "ce_loss": 0.47079874347913325, "lb_loss": 0.9999999981582834 } }, { "checkpoint_type": "bytes", "bytes_threshold": 306000000, "cumulative_training_bytes": 306001572, "metrics": { "loss": 0.4808528401486115, "ce_loss": 0.4708528496853547, "lb_loss": 0.9999999983177942 } }, { "checkpoint_type": "bytes", "bytes_threshold": 307000000, "cumulative_training_bytes": 307002658, "metrics": { "loss": 0.4808918300473469, "ce_loss": 0.4708918395840901, "lb_loss": 0.999999998073079 } }, { "checkpoint_type": "bytes", "bytes_threshold": 308000000, "cumulative_training_bytes": 308001046, "metrics": { "loss": 0.4807877983023134, "ce_loss": 0.47078780783905655, "lb_loss": 0.9999999978498498 } }, { "checkpoint_type": "bytes", "bytes_threshold": 309000000, "cumulative_training_bytes": 309007041, "metrics": { "loss": 0.48085081446823197, "ce_loss": 0.47085082400497513, "lb_loss": 0.9999999974695208 } }, { "checkpoint_type": "bytes", "bytes_threshold": 310000000, "cumulative_training_bytes": 310000806, "metrics": { "loss": 0.4808527825936225, "ce_loss": 0.4708527921303657, "lb_loss": 0.9999999975172628 } }, { "checkpoint_type": "bytes", "bytes_threshold": 311000000, "cumulative_training_bytes": 311006686, "metrics": { "loss": 0.4808801844879821, "ce_loss": 0.47088019402472525, "lb_loss": 0.9999999978712627 } }, { "checkpoint_type": "bytes", "bytes_threshold": 312000000, "cumulative_training_bytes": 312003061, "metrics": { "loss": 0.48086826519901726, "ce_loss": 0.47086827473576043, "lb_loss": 0.9999999976900138 } }, { "checkpoint_type": "bytes", "bytes_threshold": 313000000, "cumulative_training_bytes": 313000859, "metrics": { "loss": 0.4808953347788677, "ce_loss": 0.47089534431561086, "lb_loss": 0.9999999976232311 } }, { "checkpoint_type": "bytes", "bytes_threshold": 314000000, "cumulative_training_bytes": 314006832, "metrics": { "loss": 0.4809533795717628, "ce_loss": 0.470953389108506, "lb_loss": 0.9999999977575134 } }, { "checkpoint_type": "bytes", "bytes_threshold": 315000000, "cumulative_training_bytes": 315007663, "metrics": { "loss": 0.4810331776506746, "ce_loss": 0.47103318718741777, "lb_loss": 0.9999999979289779 } }, { "checkpoint_type": "bytes", "bytes_threshold": 316000000, "cumulative_training_bytes": 316001449, "metrics": { "loss": 0.4811241632807677, "ce_loss": 0.47112417281751084, "lb_loss": 0.9999999979064798 } }, { "checkpoint_type": "bytes", "bytes_threshold": 317000000, "cumulative_training_bytes": 317001561, "metrics": { "loss": 0.48121448784626175, "ce_loss": 0.4712144973830049, "lb_loss": 0.9999999978419007 } }, { "checkpoint_type": "bytes", "bytes_threshold": 318000000, "cumulative_training_bytes": 318006954, "metrics": { "loss": 0.48121900889561187, "ce_loss": 0.47121901843235503, "lb_loss": 0.999999997795581 } }, { "checkpoint_type": "bytes", "bytes_threshold": 319000000, "cumulative_training_bytes": 319003501, "metrics": { "loss": 0.4812446206125278, "ce_loss": 0.471244630149271, "lb_loss": 0.999999997834314 } }, { "checkpoint_type": "bytes", "bytes_threshold": 320000000, "cumulative_training_bytes": 320000438, "metrics": { "loss": 0.48125381685058555, "ce_loss": 0.4712538263873287, "lb_loss": 0.999999997991306 } }, { "checkpoint_type": "bytes", "bytes_threshold": 321000000, "cumulative_training_bytes": 321001768, "metrics": { "loss": 0.48127089427163744, "ce_loss": 0.4712709038083806, "lb_loss": 0.9999999977365325 } }, { "checkpoint_type": "bytes", "bytes_threshold": 322000000, "cumulative_training_bytes": 322002583, "metrics": { "loss": 0.4812634008441183, "ce_loss": 0.47126341038086145, "lb_loss": 0.9999999978753278 } }, { "checkpoint_type": "bytes", "bytes_threshold": 323000000, "cumulative_training_bytes": 323002023, "metrics": { "loss": 0.48125308707762515, "ce_loss": 0.4712530966143683, "lb_loss": 0.9999999980927498 } }, { "checkpoint_type": "bytes", "bytes_threshold": 324000000, "cumulative_training_bytes": 324002741, "metrics": { "loss": 0.4813194785285835, "ce_loss": 0.47131948806532664, "lb_loss": 0.99999999827476 } }, { "checkpoint_type": "bytes", "bytes_threshold": 325000000, "cumulative_training_bytes": 325006173, "metrics": { "loss": 0.48130787273411463, "ce_loss": 0.4713078822708578, "lb_loss": 0.9999999981555946 } }, { "checkpoint_type": "bytes", "bytes_threshold": 326000000, "cumulative_training_bytes": 326000306, "metrics": { "loss": 0.4813103896229207, "ce_loss": 0.4713103991596639, "lb_loss": 0.9999999982583059 } }, { "checkpoint_type": "bytes", "bytes_threshold": 327000000, "cumulative_training_bytes": 327005036, "metrics": { "loss": 0.48133414187465984, "ce_loss": 0.471334151411403, "lb_loss": 0.9999999982786063 } }, { "checkpoint_type": "bytes", "bytes_threshold": 328000000, "cumulative_training_bytes": 328006990, "metrics": { "loss": 0.4813546307870109, "ce_loss": 0.47135464032375407, "lb_loss": 0.9999999983087805 } }, { "checkpoint_type": "bytes", "bytes_threshold": 329000000, "cumulative_training_bytes": 329005070, "metrics": { "loss": 0.4813430519415795, "ce_loss": 0.47134306147832267, "lb_loss": 0.9999999982843013 } }, { "checkpoint_type": "bytes", "bytes_threshold": 330000000, "cumulative_training_bytes": 330001490, "metrics": { "loss": 0.48131808865273235, "ce_loss": 0.4713180981894755, "lb_loss": 0.9999999983954985 } }, { "checkpoint_type": "bytes", "bytes_threshold": 331000000, "cumulative_training_bytes": 331007691, "metrics": { "loss": 0.48127986607881124, "ce_loss": 0.4712798756155544, "lb_loss": 0.9999999983502196 } }, { "checkpoint_type": "bytes", "bytes_threshold": 332000000, "cumulative_training_bytes": 332005409, "metrics": { "loss": 0.4814225170457456, "ce_loss": 0.4714225265824888, "lb_loss": 0.9999999983561437 } }, { "checkpoint_type": "bytes", "bytes_threshold": 333000000, "cumulative_training_bytes": 333001906, "metrics": { "loss": 0.4816718959560199, "ce_loss": 0.47167190549276306, "lb_loss": 0.9999999982551901 } }, { "epoch": 7, "checkpoint_type": "epoch", "metrics": { "loss": 0.4817229831126784, "ce_loss": 0.4717229926494216, "lb_loss": 0.9999999982283324, "training_bytes": 47653415 }, "cumulative_training_bytes": 333573817, "training_bytes_this_epoch": 47653415 }, { "checkpoint_type": "bytes", "bytes_threshold": 334000000, "cumulative_training_bytes": 334004035, "metrics": { "loss": 0.4830050127846854, "ce_loss": 0.47300502232142855, "lb_loss": 0.9999999872275761 } }, { "checkpoint_type": "bytes", "bytes_threshold": 335000000, "cumulative_training_bytes": 335002482, "metrics": { "loss": 0.4816376752750848, "ce_loss": 0.47163768481182794, "lb_loss": 0.9999999945522636 } }, { "checkpoint_type": "bytes", "bytes_threshold": 336000000, "cumulative_training_bytes": 336003192, "metrics": { "loss": 0.48123298533707387, "ce_loss": 0.47123299487381703, "lb_loss": 0.9999999947352364 } }, { "checkpoint_type": "bytes", "bytes_threshold": 337000000, "cumulative_training_bytes": 337002345, "metrics": { "loss": 0.48121870550799956, "ce_loss": 0.4712187150447427, "lb_loss": 0.9999999915993454 } }, { "checkpoint_type": "bytes", "bytes_threshold": 338000000, "cumulative_training_bytes": 338006062, "metrics": { "loss": 0.48133054421973354, "ce_loss": 0.4713305537564767, "lb_loss": 0.9999999932056881 } }, { "checkpoint_type": "bytes", "bytes_threshold": 339000000, "cumulative_training_bytes": 339006251, "metrics": { "loss": 0.48139633729424275, "ce_loss": 0.4713963468309859, "lb_loss": 0.9999999942074359 } }, { "checkpoint_type": "bytes", "bytes_threshold": 340000000, "cumulative_training_bytes": 340006632, "metrics": { "loss": 0.48136963969036745, "ce_loss": 0.4713696492271106, "lb_loss": 0.9999999957475878 } }, { "checkpoint_type": "bytes", "bytes_threshold": 341000000, "cumulative_training_bytes": 341002987, "metrics": { "loss": 0.4813286786712898, "ce_loss": 0.471328688208033, "lb_loss": 0.9999999964396814 } }, { "checkpoint_type": "bytes", "bytes_threshold": 342000000, "cumulative_training_bytes": 342001745, "metrics": { "loss": 0.4810277493188427, "ce_loss": 0.47102775885558584, "lb_loss": 0.9999999965352432 } }, { "checkpoint_type": "bytes", "bytes_threshold": 343000000, "cumulative_training_bytes": 343001355, "metrics": { "loss": 0.4808982693015996, "ce_loss": 0.4708982788383428, "lb_loss": 0.9999999960780047 } }, { "checkpoint_type": "bytes", "bytes_threshold": 344000000, "cumulative_training_bytes": 344002997, "metrics": { "loss": 0.4810716566710395, "ce_loss": 0.4710716662077827, "lb_loss": 0.9999999964552304 } }, { "checkpoint_type": "bytes", "bytes_threshold": 345000000, "cumulative_training_bytes": 345008016, "metrics": { "loss": 0.48147442988762096, "ce_loss": 0.4714744394243641, "lb_loss": 0.9999999960901906 } }, { "checkpoint_type": "bytes", "bytes_threshold": 346000000, "cumulative_training_bytes": 346005271, "metrics": { "loss": 0.481584667572247, "ce_loss": 0.47158467710899016, "lb_loss": 0.9999999966233822 } }, { "checkpoint_type": "bytes", "bytes_threshold": 347000000, "cumulative_training_bytes": 347007029, "metrics": { "loss": 0.4815711710188124, "ce_loss": 0.4715711805555556, "lb_loss": 0.9999999961282453 } }, { "checkpoint_type": "bytes", "bytes_threshold": 348000000, "cumulative_training_bytes": 348004167, "metrics": { "loss": 0.48147504019800486, "ce_loss": 0.471475049734748, "lb_loss": 0.9999999969960524 } }, { "checkpoint_type": "bytes", "bytes_threshold": 349000000, "cumulative_training_bytes": 349004042, "metrics": { "loss": 0.4814087606245472, "ce_loss": 0.47140877016129035, "lb_loss": 0.9999999970419531 } }, { "checkpoint_type": "bytes", "bytes_threshold": 350000000, "cumulative_training_bytes": 350007816, "metrics": { "loss": 0.481473079932036, "ce_loss": 0.47147308946877914, "lb_loss": 0.9999999975835955 } }, { "checkpoint_type": "bytes", "bytes_threshold": 351000000, "cumulative_training_bytes": 351001302, "metrics": { "loss": 0.4814454059399704, "ce_loss": 0.47144541547671354, "lb_loss": 0.9999999974073551 } }, { "checkpoint_type": "bytes", "bytes_threshold": 352000000, "cumulative_training_bytes": 352004241, "metrics": { "loss": 0.481444767415895, "ce_loss": 0.47144477695263814, "lb_loss": 0.9999999979446674 } }, { "checkpoint_type": "bytes", "bytes_threshold": 353000000, "cumulative_training_bytes": 353002044, "metrics": { "loss": 0.48147728557752, "ce_loss": 0.4714772951142632, "lb_loss": 0.9999999978863601 } }, { "checkpoint_type": "bytes", "bytes_threshold": 354000000, "cumulative_training_bytes": 354002090, "metrics": { "loss": 0.48136562600486105, "ce_loss": 0.4713656355416042, "lb_loss": 0.9999999975648777 } }, { "checkpoint_type": "bytes", "bytes_threshold": 355000000, "cumulative_training_bytes": 355007491, "metrics": { "loss": 0.48128996009526825, "ce_loss": 0.4712899696320114, "lb_loss": 0.9999999974658975 } }, { "checkpoint_type": "bytes", "bytes_threshold": 356000000, "cumulative_training_bytes": 356005562, "metrics": { "loss": 0.4812372408046703, "ce_loss": 0.47123725034141345, "lb_loss": 0.9999999976190702 } }, { "checkpoint_type": "bytes", "bytes_threshold": 357000000, "cumulative_training_bytes": 357002412, "metrics": { "loss": 0.48114877785292665, "ce_loss": 0.4711487873896698, "lb_loss": 0.999999997993044 } }, { "checkpoint_type": "bytes", "bytes_threshold": 358000000, "cumulative_training_bytes": 358000907, "metrics": { "loss": 0.48121007079241046, "ce_loss": 0.4712100803291536, "lb_loss": 0.9999999978512432 } }, { "checkpoint_type": "bytes", "bytes_threshold": 359000000, "cumulative_training_bytes": 359004191, "metrics": { "loss": 0.4811665489170358, "ce_loss": 0.47116655845377897, "lb_loss": 0.9999999974155167 } }, { "checkpoint_type": "bytes", "bytes_threshold": 360000000, "cumulative_training_bytes": 360005688, "metrics": { "loss": 0.4811812123998154, "ce_loss": 0.47118122193655854, "lb_loss": 0.9999999971682614 } }, { "checkpoint_type": "bytes", "bytes_threshold": 361000000, "cumulative_training_bytes": 361001697, "metrics": { "loss": 0.4811540463328827, "ce_loss": 0.4711540558696259, "lb_loss": 0.9999999971379121 } }, { "checkpoint_type": "bytes", "bytes_threshold": 362000000, "cumulative_training_bytes": 362004914, "metrics": { "loss": 0.4811149918098768, "ce_loss": 0.47111500134662, "lb_loss": 0.9999999970623081 } }, { "checkpoint_type": "bytes", "bytes_threshold": 363000000, "cumulative_training_bytes": 363004386, "metrics": { "loss": 0.48118021857850135, "ce_loss": 0.4711802281152445, "lb_loss": 0.9999999971469161 } }, { "checkpoint_type": "bytes", "bytes_threshold": 364000000, "cumulative_training_bytes": 364001806, "metrics": { "loss": 0.4812275201877158, "ce_loss": 0.47122752972445897, "lb_loss": 0.9999999971802533 } }, { "checkpoint_type": "bytes", "bytes_threshold": 365000000, "cumulative_training_bytes": 365007448, "metrics": { "loss": 0.48119603568250163, "ce_loss": 0.4711960452192448, "lb_loss": 0.9999999972121579 } }, { "checkpoint_type": "bytes", "bytes_threshold": 366000000, "cumulative_training_bytes": 366007819, "metrics": { "loss": 0.4811812530045694, "ce_loss": 0.47118126254131254, "lb_loss": 0.9999999974109408 } }, { "checkpoint_type": "bytes", "bytes_threshold": 367000000, "cumulative_training_bytes": 367003761, "metrics": { "loss": 0.4811495695974758, "ce_loss": 0.47114957913421895, "lb_loss": 0.9999999974880315 } }, { "checkpoint_type": "bytes", "bytes_threshold": 368000000, "cumulative_training_bytes": 368001579, "metrics": { "loss": 0.48113743278591636, "ce_loss": 0.4711374423226595, "lb_loss": 0.9999999974816806 } }, { "checkpoint_type": "bytes", "bytes_threshold": 369000000, "cumulative_training_bytes": 369002487, "metrics": { "loss": 0.4811002396259621, "ce_loss": 0.47110024916270526, "lb_loss": 0.9999999975400848 } }, { "checkpoint_type": "bytes", "bytes_threshold": 370000000, "cumulative_training_bytes": 370006595, "metrics": { "loss": 0.4810897184785962, "ce_loss": 0.4710897280153394, "lb_loss": 0.9999999973698308 } }, { "checkpoint_type": "bytes", "bytes_threshold": 371000000, "cumulative_training_bytes": 371003993, "metrics": { "loss": 0.4811078026436618, "ce_loss": 0.471107812180405, "lb_loss": 0.9999999973422351 } }, { "checkpoint_type": "bytes", "bytes_threshold": 372000000, "cumulative_training_bytes": 372005909, "metrics": { "loss": 0.4811544351843724, "ce_loss": 0.47115444472111556, "lb_loss": 0.9999999974828317 } }, { "checkpoint_type": "bytes", "bytes_threshold": 373000000, "cumulative_training_bytes": 373004014, "metrics": { "loss": 0.4811229425263636, "ce_loss": 0.4711229520631068, "lb_loss": 0.9999999972570289 } }, { "checkpoint_type": "bytes", "bytes_threshold": 374000000, "cumulative_training_bytes": 374004738, "metrics": { "loss": 0.4811517313740692, "ce_loss": 0.47115174091081236, "lb_loss": 0.9999999972347778 } }, { "checkpoint_type": "bytes", "bytes_threshold": 375000000, "cumulative_training_bytes": 375000531, "metrics": { "loss": 0.4811597229526303, "ce_loss": 0.4711597324893735, "lb_loss": 0.9999999972461354 } }, { "checkpoint_type": "bytes", "bytes_threshold": 376000000, "cumulative_training_bytes": 376004266, "metrics": { "loss": 0.48114152894214535, "ce_loss": 0.4711415384788885, "lb_loss": 0.9999999971499042 } }, { "checkpoint_type": "bytes", "bytes_threshold": 377000000, "cumulative_training_bytes": 377004301, "metrics": { "loss": 0.48106536643293246, "ce_loss": 0.4710653759696756, "lb_loss": 0.9999999970260729 } }, { "checkpoint_type": "bytes", "bytes_threshold": 378000000, "cumulative_training_bytes": 378002475, "metrics": { "loss": 0.48104782418438746, "ce_loss": 0.4710478337211306, "lb_loss": 0.9999999969180638 } }, { "checkpoint_type": "bytes", "bytes_threshold": 379000000, "cumulative_training_bytes": 379003430, "metrics": { "loss": 0.4810510765495538, "ce_loss": 0.471051086086297, "lb_loss": 0.9999999969760663 } }, { "checkpoint_type": "bytes", "bytes_threshold": 380000000, "cumulative_training_bytes": 380004459, "metrics": { "loss": 0.4810564429904666, "ce_loss": 0.4710564525272098, "lb_loss": 0.9999999969922458 } }, { "checkpoint_type": "bytes", "bytes_threshold": 381000000, "cumulative_training_bytes": 381002688, "metrics": { "loss": 0.4810578564424302, "ce_loss": 0.4710578659791734, "lb_loss": 0.9999999970938646 } }, { "epoch": 8, "checkpoint_type": "epoch", "metrics": { "loss": 0.4810454771880319, "ce_loss": 0.4710454867247751, "lb_loss": 0.9999999970791426, "training_bytes": 47653397 }, "cumulative_training_bytes": 381227214, "training_bytes_this_epoch": 47653397 }, { "checkpoint_type": "bytes", "bytes_threshold": 382000000, "cumulative_training_bytes": 382003673, "metrics": { "loss": 0.47660348675038555, "ce_loss": 0.4666034962871287, "lb_loss": 0.99999999763942 } }, { "checkpoint_type": "bytes", "bytes_threshold": 383000000, "cumulative_training_bytes": 383005458, "metrics": { "loss": 0.47849743119601545, "ce_loss": 0.4684974407327586, "lb_loss": 0.9999999971739177 } }, { "checkpoint_type": "bytes", "bytes_threshold": 384000000, "cumulative_training_bytes": 384002363, "metrics": { "loss": 0.4783237560019309, "ce_loss": 0.46832376553867405, "lb_loss": 0.9999999990120777 } }, { "checkpoint_type": "bytes", "bytes_threshold": 385000000, "cumulative_training_bytes": 385005560, "metrics": { "loss": 0.4780487670859749, "ce_loss": 0.46804877662271804, "lb_loss": 0.999999998549177 } }, { "checkpoint_type": "bytes", "bytes_threshold": 386000000, "cumulative_training_bytes": 386002393, "metrics": { "loss": 0.47821703500579293, "ce_loss": 0.4682170445425361, "lb_loss": 0.999999998373549 } }, { "checkpoint_type": "bytes", "bytes_threshold": 387000000, "cumulative_training_bytes": 387003159, "metrics": { "loss": 0.4784002930163072, "ce_loss": 0.4684003025530504, "lb_loss": 0.9999999972332061 } }, { "checkpoint_type": "bytes", "bytes_threshold": 388000000, "cumulative_training_bytes": 388000670, "metrics": { "loss": 0.47836776124945596, "ce_loss": 0.4683677707861991, "lb_loss": 0.999999998044644 } }, { "checkpoint_type": "bytes", "bytes_threshold": 389000000, "cumulative_training_bytes": 389001849, "metrics": { "loss": 0.47809766595586767, "ce_loss": 0.46809767549261083, "lb_loss": 0.9999999978272198 } }, { "checkpoint_type": "bytes", "bytes_threshold": 390000000, "cumulative_training_bytes": 390002629, "metrics": { "loss": 0.47807849722590956, "ce_loss": 0.4680785067626527, "lb_loss": 0.99999999849168 } }, { "checkpoint_type": "bytes", "bytes_threshold": 391000000, "cumulative_training_bytes": 391006180, "metrics": { "loss": 0.47816267644602894, "ce_loss": 0.4681626859827721, "lb_loss": 0.9999999978062504 } }, { "checkpoint_type": "bytes", "bytes_threshold": 392000000, "cumulative_training_bytes": 392004730, "metrics": { "loss": 0.478326905857433, "ce_loss": 0.46832691539417615, "lb_loss": 0.9999999977563593 } }, { "checkpoint_type": "bytes", "bytes_threshold": 393000000, "cumulative_training_bytes": 393003485, "metrics": { "loss": 0.47875887407274353, "ce_loss": 0.4687588836094867, "lb_loss": 0.999999997250208 } }, { "checkpoint_type": "bytes", "bytes_threshold": 394000000, "cumulative_training_bytes": 394000207, "metrics": { "loss": 0.4788389284500753, "ce_loss": 0.46883893798681847, "lb_loss": 0.9999999963930083 } }, { "checkpoint_type": "bytes", "bytes_threshold": 395000000, "cumulative_training_bytes": 395003542, "metrics": { "loss": 0.4789453029632568, "ce_loss": 0.4689453125, "lb_loss": 0.9999999956621064 } }, { "checkpoint_type": "bytes", "bytes_threshold": 396000000, "cumulative_training_bytes": 396000470, "metrics": { "loss": 0.47899286611092523, "ce_loss": 0.4689928756476684, "lb_loss": 0.9999999959542961 } }, { "checkpoint_type": "bytes", "bytes_threshold": 397000000, "cumulative_training_bytes": 397005545, "metrics": { "loss": 0.4790248115452559, "ce_loss": 0.46902482108199905, "lb_loss": 0.9999999956619618 } }, { "checkpoint_type": "bytes", "bytes_threshold": 398000000, "cumulative_training_bytes": 398006179, "metrics": { "loss": 0.47905739380495393, "ce_loss": 0.4690574033416971, "lb_loss": 0.9999999957308717 } }, { "checkpoint_type": "bytes", "bytes_threshold": 399000000, "cumulative_training_bytes": 399000874, "metrics": { "loss": 0.4790856049658408, "ce_loss": 0.46908561450258396, "lb_loss": 0.9999999954051544 } }, { "checkpoint_type": "bytes", "bytes_threshold": 400000000, "cumulative_training_bytes": 400002954, "metrics": { "loss": 0.4790493687755275, "ce_loss": 0.46904937831227067, "lb_loss": 0.9999999951159667 } }, { "checkpoint_type": "bytes", "bytes_threshold": 401000000, "cumulative_training_bytes": 401003161, "metrics": { "loss": 0.4790388382178058, "ce_loss": 0.46903884775454896, "lb_loss": 0.9999999944848974 } }, { "checkpoint_type": "bytes", "bytes_threshold": 402000000, "cumulative_training_bytes": 402007261, "metrics": { "loss": 0.47903641041535705, "ce_loss": 0.4690364199521002, "lb_loss": 0.9999999945973682 } }, { "checkpoint_type": "bytes", "bytes_threshold": 403000000, "cumulative_training_bytes": 403002393, "metrics": { "loss": 0.47906452325158316, "ce_loss": 0.4690645327883263, "lb_loss": 0.99999999488624 } }, { "checkpoint_type": "bytes", "bytes_threshold": 404000000, "cumulative_training_bytes": 404005905, "metrics": { "loss": 0.4790848118918283, "ce_loss": 0.46908482142857144, "lb_loss": 0.9999999945704676 } }, { "checkpoint_type": "bytes", "bytes_threshold": 405000000, "cumulative_training_bytes": 405004571, "metrics": { "loss": 0.4790582130880556, "ce_loss": 0.46905822262479874, "lb_loss": 0.9999999946442203 } }, { "checkpoint_type": "bytes", "bytes_threshold": 406000000, "cumulative_training_bytes": 406003780, "metrics": { "loss": 0.47909593810622436, "ce_loss": 0.4690959476429675, "lb_loss": 0.9999999946936204 } }, { "checkpoint_type": "bytes", "bytes_threshold": 407000000, "cumulative_training_bytes": 407001187, "metrics": { "loss": 0.4790645799877739, "ce_loss": 0.4690645895245171, "lb_loss": 0.9999999947037774 } }, { "checkpoint_type": "bytes", "bytes_threshold": 408000000, "cumulative_training_bytes": 408003413, "metrics": { "loss": 0.47919748849672367, "ce_loss": 0.46919749803346683, "lb_loss": 0.9999999948169874 } }, { "checkpoint_type": "bytes", "bytes_threshold": 409000000, "cumulative_training_bytes": 409005684, "metrics": { "loss": 0.4793487980177096, "ce_loss": 0.46934880755445274, "lb_loss": 0.9999999948891524 } }, { "checkpoint_type": "bytes", "bytes_threshold": 410000000, "cumulative_training_bytes": 410004022, "metrics": { "loss": 0.4794526581056198, "ce_loss": 0.469452667642363, "lb_loss": 0.9999999954638296 } }, { "checkpoint_type": "bytes", "bytes_threshold": 411000000, "cumulative_training_bytes": 411000093, "metrics": { "loss": 0.479534154075654, "ce_loss": 0.46953416361239714, "lb_loss": 0.9999999953702153 } }, { "checkpoint_type": "bytes", "bytes_threshold": 412000000, "cumulative_training_bytes": 412003081, "metrics": { "loss": 0.479515883720286, "ce_loss": 0.46951589325702914, "lb_loss": 0.9999999951948483 } }, { "checkpoint_type": "bytes", "bytes_threshold": 413000000, "cumulative_training_bytes": 413002872, "metrics": { "loss": 0.47955900778253396, "ce_loss": 0.4695590173192771, "lb_loss": 0.9999999952316284 } }, { "checkpoint_type": "bytes", "bytes_threshold": 414000000, "cumulative_training_bytes": 414004353, "metrics": { "loss": 0.47949838415484564, "ce_loss": 0.4694983936915888, "lb_loss": 0.9999999954460937 } }, { "checkpoint_type": "bytes", "bytes_threshold": 415000000, "cumulative_training_bytes": 415000550, "metrics": { "loss": 0.47950582459950714, "ce_loss": 0.4695058341362503, "lb_loss": 0.9999999952840578 } }, { "checkpoint_type": "bytes", "bytes_threshold": 416000000, "cumulative_training_bytes": 416004238, "metrics": { "loss": 0.47945263384722864, "ce_loss": 0.4694526433839718, "lb_loss": 0.9999999952625986 } }, { "checkpoint_type": "bytes", "bytes_threshold": 417000000, "cumulative_training_bytes": 417004582, "metrics": { "loss": 0.47940799961351366, "ce_loss": 0.4694080091502568, "lb_loss": 0.9999999954964812 } }, { "checkpoint_type": "bytes", "bytes_threshold": 418000000, "cumulative_training_bytes": 418000528, "metrics": { "loss": 0.4794532293480965, "ce_loss": 0.46945323888483964, "lb_loss": 0.9999999955563385 } }, { "checkpoint_type": "bytes", "bytes_threshold": 419000000, "cumulative_training_bytes": 419005587, "metrics": { "loss": 0.4794745432201999, "ce_loss": 0.46947455275694305, "lb_loss": 0.9999999953964384 } }, { "checkpoint_type": "bytes", "bytes_threshold": 420000000, "cumulative_training_bytes": 420003878, "metrics": { "loss": 0.47951866702822527, "ce_loss": 0.46951867656496843, "lb_loss": 0.9999999954449057 } }, { "checkpoint_type": "bytes", "bytes_threshold": 421000000, "cumulative_training_bytes": 421005882, "metrics": { "loss": 0.4795383837500711, "ce_loss": 0.46953839328681424, "lb_loss": 0.999999995399141 } }, { "checkpoint_type": "bytes", "bytes_threshold": 422000000, "cumulative_training_bytes": 422001156, "metrics": { "loss": 0.4795866248529282, "ce_loss": 0.46958663438967135, "lb_loss": 0.9999999953547554 } }, { "checkpoint_type": "bytes", "bytes_threshold": 423000000, "cumulative_training_bytes": 423005818, "metrics": { "loss": 0.47955937749129934, "ce_loss": 0.4695593870280425, "lb_loss": 0.9999999953570429 } }, { "checkpoint_type": "bytes", "bytes_threshold": 424000000, "cumulative_training_bytes": 424005361, "metrics": { "loss": 0.47958130087134704, "ce_loss": 0.4695813104080902, "lb_loss": 0.9999999956472714 } }, { "checkpoint_type": "bytes", "bytes_threshold": 425000000, "cumulative_training_bytes": 425000747, "metrics": { "loss": 0.4796109096516423, "ce_loss": 0.4696109191883855, "lb_loss": 0.9999999956836937 } }, { "checkpoint_type": "bytes", "bytes_threshold": 426000000, "cumulative_training_bytes": 426002610, "metrics": { "loss": 0.479592292318592, "ce_loss": 0.4695923018553352, "lb_loss": 0.9999999958517287 } }, { "checkpoint_type": "bytes", "bytes_threshold": 427000000, "cumulative_training_bytes": 427005445, "metrics": { "loss": 0.4796241437497596, "ce_loss": 0.46962415328650275, "lb_loss": 0.9999999959326484 } }, { "checkpoint_type": "bytes", "bytes_threshold": 428000000, "cumulative_training_bytes": 428003339, "metrics": { "loss": 0.47960202485923, "ce_loss": 0.46960203439597314, "lb_loss": 0.9999999960387157 } }, { "epoch": 9, "checkpoint_type": "epoch", "metrics": { "loss": 0.4795953822932697, "ce_loss": 0.46959539183001286, "lb_loss": 0.9999999961119078, "training_bytes": 47653406 }, "cumulative_training_bytes": 428880620, "training_bytes_this_epoch": 47653406 }, { "checkpoint_type": "bytes", "bytes_threshold": 429000000, "cumulative_training_bytes": 429002185, "metrics": { "loss": 0.47997069358825684, "ce_loss": 0.469970703125, "lb_loss": 0.9999999925494194 } }, { "checkpoint_type": "bytes", "bytes_threshold": 430000000, "cumulative_training_bytes": 430003863, "metrics": { "loss": 0.476517847606114, "ce_loss": 0.46651785714285715, "lb_loss": 0.9999999862138916 } }, { "checkpoint_type": "bytes", "bytes_threshold": 431000000, "cumulative_training_bytes": 431005314, "metrics": { "loss": 0.47611537940210574, "ce_loss": 0.4661153889388489, "lb_loss": 0.9999999903517661 } }, { "checkpoint_type": "bytes", "bytes_threshold": 432000000, "cumulative_training_bytes": 432001200, "metrics": { "loss": 0.4762607135024725, "ce_loss": 0.4662607230392157, "lb_loss": 0.9999999903580722 } }, { "checkpoint_type": "bytes", "bytes_threshold": 433000000, "cumulative_training_bytes": 433004578, "metrics": { "loss": 0.4760105424113088, "ce_loss": 0.46601055194805197, "lb_loss": 0.9999999920379695 } }, { "checkpoint_type": "bytes", "bytes_threshold": 434000000, "cumulative_training_bytes": 434007002, "metrics": { "loss": 0.4758844354259434, "ce_loss": 0.46588444496268655, "lb_loss": 0.9999999925271789 } }, { "checkpoint_type": "bytes", "bytes_threshold": 435000000, "cumulative_training_bytes": 435000122, "metrics": { "loss": 0.4758044139613795, "ce_loss": 0.4658044234981227, "lb_loss": 0.9999999929130898 } }, { "checkpoint_type": "bytes", "bytes_threshold": 436000000, "cumulative_training_bytes": 436000104, "metrics": { "loss": 0.4760492176137945, "ce_loss": 0.46604922715053765, "lb_loss": 0.9999999934627164 } }, { "checkpoint_type": "bytes", "bytes_threshold": 437000000, "cumulative_training_bytes": 437004132, "metrics": { "loss": 0.4761286426781485, "ce_loss": 0.46612865221489164, "lb_loss": 0.9999999932586641 } }, { "checkpoint_type": "bytes", "bytes_threshold": 438000000, "cumulative_training_bytes": 438006173, "metrics": { "loss": 0.47627581365956556, "ce_loss": 0.4662758231963087, "lb_loss": 0.999999993699509 } }, { "checkpoint_type": "bytes", "bytes_threshold": 439000000, "cumulative_training_bytes": 439002702, "metrics": { "loss": 0.4764555856788393, "ce_loss": 0.46645559521558244, "lb_loss": 0.999999995310981 } }, { "checkpoint_type": "bytes", "bytes_threshold": 440000000, "cumulative_training_bytes": 440005689, "metrics": { "loss": 0.4764527498404076, "ce_loss": 0.46645275937715075, "lb_loss": 0.9999999952414736 } }, { "checkpoint_type": "bytes", "bytes_threshold": 441000000, "cumulative_training_bytes": 441000190, "metrics": { "loss": 0.47656120729838003, "ce_loss": 0.4665612168351232, "lb_loss": 0.9999999955192971 } }, { "checkpoint_type": "bytes", "bytes_threshold": 442000000, "cumulative_training_bytes": 442005265, "metrics": { "loss": 0.4766282175636069, "ce_loss": 0.46662822710035007, "lb_loss": 0.9999999957226539 } }, { "checkpoint_type": "bytes", "bytes_threshold": 443000000, "cumulative_training_bytes": 443003341, "metrics": { "loss": 0.47682650638466606, "ce_loss": 0.4668265159214092, "lb_loss": 0.9999999960586631 } }, { "checkpoint_type": "bytes", "bytes_threshold": 444000000, "cumulative_training_bytes": 444007542, "metrics": { "loss": 0.47688483516214347, "ce_loss": 0.46688484469888664, "lb_loss": 0.9999999959278203 } }, { "checkpoint_type": "bytes", "bytes_threshold": 445000000, "cumulative_training_bytes": 445007323, "metrics": { "loss": 0.47694339620874593, "ce_loss": 0.4669434057454891, "lb_loss": 0.999999996235794 } }, { "checkpoint_type": "bytes", "bytes_threshold": 446000000, "cumulative_training_bytes": 446000020, "metrics": { "loss": 0.4770030092467989, "ce_loss": 0.46700301878354206, "lb_loss": 0.9999999962413887 } }, { "checkpoint_type": "bytes", "bytes_threshold": 447000000, "cumulative_training_bytes": 447002709, "metrics": { "loss": 0.4770996947302615, "ce_loss": 0.46709970426700465, "lb_loss": 0.9999999963990435 } }, { "checkpoint_type": "bytes", "bytes_threshold": 448000000, "cumulative_training_bytes": 448004545, "metrics": { "loss": 0.47733323205653144, "ce_loss": 0.4673332415932746, "lb_loss": 0.9999999962538314 } }, { "checkpoint_type": "bytes", "bytes_threshold": 449000000, "cumulative_training_bytes": 449006756, "metrics": { "loss": 0.4773651958645501, "ce_loss": 0.4673652054012933, "lb_loss": 0.9999999961684348 } }, { "checkpoint_type": "bytes", "bytes_threshold": 450000000, "cumulative_training_bytes": 450004303, "metrics": { "loss": 0.47728603160135036, "ce_loss": 0.4672860411380935, "lb_loss": 0.9999999964785947 } }, { "checkpoint_type": "bytes", "bytes_threshold": 451000000, "cumulative_training_bytes": 451006142, "metrics": { "loss": 0.47733279215010804, "ce_loss": 0.4673328016868512, "lb_loss": 0.9999999963082244 } }, { "checkpoint_type": "bytes", "bytes_threshold": 452000000, "cumulative_training_bytes": 452005227, "metrics": { "loss": 0.477412995262992, "ce_loss": 0.4674130047997352, "lb_loss": 0.9999999966853425 } }, { "checkpoint_type": "bytes", "bytes_threshold": 453000000, "cumulative_training_bytes": 453006680, "metrics": { "loss": 0.47739544374688625, "ce_loss": 0.4673954532836294, "lb_loss": 0.9999999969365633 } }, { "checkpoint_type": "bytes", "bytes_threshold": 454000000, "cumulative_training_bytes": 454003038, "metrics": { "loss": 0.4773985136579552, "ce_loss": 0.4673985231946984, "lb_loss": 0.999999997039745 } }, { "checkpoint_type": "bytes", "bytes_threshold": 455000000, "cumulative_training_bytes": 455007108, "metrics": { "loss": 0.4773937331383228, "ce_loss": 0.46739374267506595, "lb_loss": 0.9999999970835114 } }, { "checkpoint_type": "bytes", "bytes_threshold": 456000000, "cumulative_training_bytes": 456006436, "metrics": { "loss": 0.47737679419596923, "ce_loss": 0.4673768037327124, "lb_loss": 0.9999999970559377 } }, { "checkpoint_type": "bytes", "bytes_threshold": 457000000, "cumulative_training_bytes": 457003387, "metrics": { "loss": 0.47746527769440306, "ce_loss": 0.4674652872311462, "lb_loss": 0.9999999969816324 } }, { "checkpoint_type": "bytes", "bytes_threshold": 458000000, "cumulative_training_bytes": 458006870, "metrics": { "loss": 0.4774874419945397, "ce_loss": 0.46748745153128285, "lb_loss": 0.9999999967565296 } }, { "checkpoint_type": "bytes", "bytes_threshold": 459000000, "cumulative_training_bytes": 459003680, "metrics": { "loss": 0.47746660761501075, "ce_loss": 0.4674666171517539, "lb_loss": 0.9999999966970482 } }, { "checkpoint_type": "bytes", "bytes_threshold": 460000000, "cumulative_training_bytes": 460005767, "metrics": { "loss": 0.47751757579536014, "ce_loss": 0.4675175853321033, "lb_loss": 0.999999996686187 } }, { "checkpoint_type": "bytes", "bytes_threshold": 461000000, "cumulative_training_bytes": 461002393, "metrics": { "loss": 0.4775231765478814, "ce_loss": 0.46752318608462456, "lb_loss": 0.9999999966183777 } }, { "checkpoint_type": "bytes", "bytes_threshold": 462000000, "cumulative_training_bytes": 462006702, "metrics": { "loss": 0.4775458794195675, "ce_loss": 0.46754588895631066, "lb_loss": 0.9999999969136754 } }, { "checkpoint_type": "bytes", "bytes_threshold": 463000000, "cumulative_training_bytes": 463005861, "metrics": { "loss": 0.47760887816799097, "ce_loss": 0.46760888770473413, "lb_loss": 0.9999999970980014 } }, { "checkpoint_type": "bytes", "bytes_threshold": 464000000, "cumulative_training_bytes": 464001234, "metrics": { "loss": 0.4776744304567166, "ce_loss": 0.46767443999345976, "lb_loss": 0.9999999970632986 } }, { "checkpoint_type": "bytes", "bytes_threshold": 465000000, "cumulative_training_bytes": 465004728, "metrics": { "loss": 0.4777479740137972, "ce_loss": 0.46774798355054037, "lb_loss": 0.9999999972338595 } }, { "checkpoint_type": "bytes", "bytes_threshold": 466000000, "cumulative_training_bytes": 466001776, "metrics": { "loss": 0.477748253636076, "ce_loss": 0.46774826317281915, "lb_loss": 0.9999999973694795 } }, { "checkpoint_type": "bytes", "bytes_threshold": 467000000, "cumulative_training_bytes": 467002535, "metrics": { "loss": 0.477785585970285, "ce_loss": 0.46778559550702814, "lb_loss": 0.9999999974626135 } }, { "checkpoint_type": "bytes", "bytes_threshold": 468000000, "cumulative_training_bytes": 468004269, "metrics": { "loss": 0.4778305581114666, "ce_loss": 0.46783056764820974, "lb_loss": 0.9999999974810011 } }, { "checkpoint_type": "bytes", "bytes_threshold": 469000000, "cumulative_training_bytes": 469006908, "metrics": { "loss": 0.4778490640515819, "ce_loss": 0.46784907358832506, "lb_loss": 0.999999997464358 } }, { "checkpoint_type": "bytes", "bytes_threshold": 470000000, "cumulative_training_bytes": 470004722, "metrics": { "loss": 0.47785450519240796, "ce_loss": 0.4678545147291511, "lb_loss": 0.9999999974369559 } }, { "checkpoint_type": "bytes", "bytes_threshold": 471000000, "cumulative_training_bytes": 471002302, "metrics": { "loss": 0.4778841831431914, "ce_loss": 0.4678841926799346, "lb_loss": 0.9999999974975149 } }, { "checkpoint_type": "bytes", "bytes_threshold": 472000000, "cumulative_training_bytes": 472007065, "metrics": { "loss": 0.4779119462372671, "ce_loss": 0.4679119557740103, "lb_loss": 0.9999999975768749 } }, { "checkpoint_type": "bytes", "bytes_threshold": 473000000, "cumulative_training_bytes": 473006577, "metrics": { "loss": 0.4779428524080868, "ce_loss": 0.46794286194483, "lb_loss": 0.9999999976112642 } }, { "checkpoint_type": "bytes", "bytes_threshold": 474000000, "cumulative_training_bytes": 474004871, "metrics": { "loss": 0.4779470685299348, "ce_loss": 0.46794707806667796, "lb_loss": 0.9999999978662063 } }, { "checkpoint_type": "bytes", "bytes_threshold": 475000000, "cumulative_training_bytes": 475004074, "metrics": { "loss": 0.47796439586249323, "ce_loss": 0.4679644053992364, "lb_loss": 0.9999999978924653 } }, { "checkpoint_type": "bytes", "bytes_threshold": 476000000, "cumulative_training_bytes": 476004262, "metrics": { "loss": 0.4779159292835363, "ce_loss": 0.4679159388202795, "lb_loss": 0.9999999980532119 } }, { "epoch": 10, "checkpoint_type": "epoch", "metrics": { "loss": 0.477952924669555, "ce_loss": 0.4679529342062982, "lb_loss": 0.9999999979314583, "training_bytes": 47653389 }, "cumulative_training_bytes": 476534009, "training_bytes_this_epoch": 47653389 }, { "checkpoint_type": "bytes", "bytes_threshold": 477000000, "cumulative_training_bytes": 477001318, "metrics": { "loss": 0.47401126095506013, "ce_loss": 0.4640112704918033, "lb_loss": 1.0000000039085013 } }, { "checkpoint_type": "bytes", "bytes_threshold": 478000000, "cumulative_training_bytes": 478000704, "metrics": { "loss": 0.47289061546325684, "ce_loss": 0.462890625, "lb_loss": 0.9999999971914042 } }, { "checkpoint_type": "bytes", "bytes_threshold": 479000000, "cumulative_training_bytes": 479001718, "metrics": { "loss": 0.4746253786620146, "ce_loss": 0.46462538819875776, "lb_loss": 0.999999995927633 } }, { "checkpoint_type": "bytes", "bytes_threshold": 480000000, "cumulative_training_bytes": 480006675, "metrics": { "loss": 0.47415389498864313, "ce_loss": 0.4641539045253863, "lb_loss": 0.9999999944737415 } }, { "checkpoint_type": "bytes", "bytes_threshold": 481000000, "cumulative_training_bytes": 481002129, "metrics": { "loss": 0.4743378710378709, "ce_loss": 0.46433788057461406, "lb_loss": 0.9999999941724447 } }, { "checkpoint_type": "bytes", "bytes_threshold": 482000000, "cumulative_training_bytes": 482003899, "metrics": { "loss": 0.47443615555429325, "ce_loss": 0.4644361650910364, "lb_loss": 0.999999993655528 } }, { "checkpoint_type": "bytes", "bytes_threshold": 483000000, "cumulative_training_bytes": 483006727, "metrics": { "loss": 0.47455712803722133, "ce_loss": 0.4645571375739645, "lb_loss": 0.9999999937221143 } }, { "checkpoint_type": "bytes", "bytes_threshold": 484000000, "cumulative_training_bytes": 484005792, "metrics": { "loss": 0.47493579348579784, "ce_loss": 0.464935803022541, "lb_loss": 0.999999994320459 } }, { "checkpoint_type": "bytes", "bytes_threshold": 485000000, "cumulative_training_bytes": 485004003, "metrics": { "loss": 0.47497618564860944, "ce_loss": 0.4649761951853526, "lb_loss": 0.999999995473065 } }, { "checkpoint_type": "bytes", "bytes_threshold": 486000000, "cumulative_training_bytes": 486002749, "metrics": { "loss": 0.4750080810781436, "ce_loss": 0.46500809061488674, "lb_loss": 0.9999999949364986 } }, { "checkpoint_type": "bytes", "bytes_threshold": 487000000, "cumulative_training_bytes": 487001523, "metrics": { "loss": 0.47491094168946474, "ce_loss": 0.4649109512262079, "lb_loss": 0.9999999950256738 } }, { "checkpoint_type": "bytes", "bytes_threshold": 488000000, "cumulative_training_bytes": 488005452, "metrics": { "loss": 0.4753404965547121, "ce_loss": 0.4653405060914553, "lb_loss": 0.9999999949069462 } }, { "checkpoint_type": "bytes", "bytes_threshold": 489000000, "cumulative_training_bytes": 489002785, "metrics": { "loss": 0.4756307490627654, "ce_loss": 0.4656307585995086, "lb_loss": 0.9999999955333129 } }, { "checkpoint_type": "bytes", "bytes_threshold": 490000000, "cumulative_training_bytes": 490003743, "metrics": { "loss": 0.4758719347497833, "ce_loss": 0.46587194428652645, "lb_loss": 0.9999999956287668 } }, { "checkpoint_type": "bytes", "bytes_threshold": 491000000, "cumulative_training_bytes": 491003982, "metrics": { "loss": 0.4757608510376348, "ce_loss": 0.465760860574378, "lb_loss": 0.9999999958980392 } }, { "checkpoint_type": "bytes", "bytes_threshold": 492000000, "cumulative_training_bytes": 492005024, "metrics": { "loss": 0.4758957206612766, "ce_loss": 0.4658957301980198, "lb_loss": 0.9999999958984923 } }, { "checkpoint_type": "bytes", "bytes_threshold": 493000000, "cumulative_training_bytes": 493004726, "metrics": { "loss": 0.4758511895911638, "ce_loss": 0.46585119912790696, "lb_loss": 0.9999999962019366 } }, { "checkpoint_type": "bytes", "bytes_threshold": 494000000, "cumulative_training_bytes": 494006508, "metrics": { "loss": 0.47593461343563737, "ce_loss": 0.46593462297238053, "lb_loss": 0.9999999958974445 } }, { "checkpoint_type": "bytes", "bytes_threshold": 495000000, "cumulative_training_bytes": 495003897, "metrics": { "loss": 0.4759487020974335, "ce_loss": 0.46594871163417667, "lb_loss": 0.9999999958219888 } }, { "checkpoint_type": "bytes", "bytes_threshold": 496000000, "cumulative_training_bytes": 496001530, "metrics": { "loss": 0.4761391648338312, "ce_loss": 0.46613917437057434, "lb_loss": 0.9999999956152366 } }, { "checkpoint_type": "bytes", "bytes_threshold": 497000000, "cumulative_training_bytes": 497005237, "metrics": { "loss": 0.47611367359082885, "ce_loss": 0.466113683127572, "lb_loss": 0.999999995807829 } }, { "checkpoint_type": "bytes", "bytes_threshold": 498000000, "cumulative_training_bytes": 498004778, "metrics": { "loss": 0.4762298741116163, "ce_loss": 0.4662298836483595, "lb_loss": 0.9999999961099679 } }, { "checkpoint_type": "bytes", "bytes_threshold": 499000000, "cumulative_training_bytes": 499000975, "metrics": { "loss": 0.47627962683851244, "ce_loss": 0.4662796363752556, "lb_loss": 0.9999999963635885 } }, { "checkpoint_type": "bytes", "bytes_threshold": 500000000, "cumulative_training_bytes": 500006091, "metrics": { "loss": 0.47628707753503496, "ce_loss": 0.46628708707177813, "lb_loss": 0.999999996227308 } }, { "checkpoint_type": "bytes", "bytes_threshold": 501000000, "cumulative_training_bytes": 501000599, "metrics": { "loss": 0.4763518341643523, "ce_loss": 0.46635184370109545, "lb_loss": 0.9999999964181246 } }, { "checkpoint_type": "bytes", "bytes_threshold": 502000000, "cumulative_training_bytes": 502000922, "metrics": { "loss": 0.4764045962209237, "ce_loss": 0.46640460575766685, "lb_loss": 0.9999999965412819 } }, { "checkpoint_type": "bytes", "bytes_threshold": 503000000, "cumulative_training_bytes": 503003452, "metrics": { "loss": 0.47639347198625365, "ce_loss": 0.4663934815229968, "lb_loss": 0.999999996551655 } }, { "checkpoint_type": "bytes", "bytes_threshold": 504000000, "cumulative_training_bytes": 504004414, "metrics": { "loss": 0.4764996507893438, "ce_loss": 0.466499660326087, "lb_loss": 0.9999999965446583 } }, { "checkpoint_type": "bytes", "bytes_threshold": 505000000, "cumulative_training_bytes": 505006535, "metrics": { "loss": 0.47654373122018073, "ce_loss": 0.4665437407569239, "lb_loss": 0.9999999965541816 } }, { "checkpoint_type": "bytes", "bytes_threshold": 506000000, "cumulative_training_bytes": 506002066, "metrics": { "loss": 0.476615203018466, "ce_loss": 0.46661521255520916, "lb_loss": 0.9999999968099359 } }, { "checkpoint_type": "bytes", "bytes_threshold": 507000000, "cumulative_training_bytes": 507007439, "metrics": { "loss": 0.47662020089039253, "ce_loss": 0.4666202104271357, "lb_loss": 0.9999999967951272 } }, { "checkpoint_type": "bytes", "bytes_threshold": 508000000, "cumulative_training_bytes": 508001710, "metrics": { "loss": 0.47666380585255125, "ce_loss": 0.4666638153892944, "lb_loss": 0.9999999968529908 } }, { "checkpoint_type": "bytes", "bytes_threshold": 509000000, "cumulative_training_bytes": 509001905, "metrics": { "loss": 0.47671074559176424, "ce_loss": 0.4667107551285074, "lb_loss": 0.9999999967674915 } }, { "checkpoint_type": "bytes", "bytes_threshold": 510000000, "cumulative_training_bytes": 510006108, "metrics": { "loss": 0.4766892023514087, "ce_loss": 0.4666892118881519, "lb_loss": 0.999999996632583 } }, { "checkpoint_type": "bytes", "bytes_threshold": 511000000, "cumulative_training_bytes": 511002126, "metrics": { "loss": 0.47665283225579347, "ce_loss": 0.46665284179253663, "lb_loss": 0.9999999965179872 } }, { "checkpoint_type": "bytes", "bytes_threshold": 512000000, "cumulative_training_bytes": 512004051, "metrics": { "loss": 0.47662187693238467, "ce_loss": 0.46662188646912783, "lb_loss": 0.9999999966157122 } }, { "checkpoint_type": "bytes", "bytes_threshold": 513000000, "cumulative_training_bytes": 513002597, "metrics": { "loss": 0.47660819736161886, "ce_loss": 0.466608206898362, "lb_loss": 0.9999999966830678 } }, { "checkpoint_type": "bytes", "bytes_threshold": 514000000, "cumulative_training_bytes": 514004929, "metrics": { "loss": 0.4766356019235062, "ce_loss": 0.46663561146024934, "lb_loss": 0.9999999970276858 } }, { "checkpoint_type": "bytes", "bytes_threshold": 515000000, "cumulative_training_bytes": 515000236, "metrics": { "loss": 0.47663549837685426, "ce_loss": 0.4666355079135974, "lb_loss": 0.9999999970452804 } }, { "checkpoint_type": "bytes", "bytes_threshold": 516000000, "cumulative_training_bytes": 516005322, "metrics": { "loss": 0.4766574106708626, "ce_loss": 0.46665742020760576, "lb_loss": 0.9999999971088163 } }, { "checkpoint_type": "bytes", "bytes_threshold": 517000000, "cumulative_training_bytes": 517005666, "metrics": { "loss": 0.47670004072342714, "ce_loss": 0.4667000502601703, "lb_loss": 0.9999999968872504 } }, { "checkpoint_type": "bytes", "bytes_threshold": 518000000, "cumulative_training_bytes": 518006360, "metrics": { "loss": 0.47669515147433716, "ce_loss": 0.46669516101108033, "lb_loss": 0.9999999968519061 } }, { "checkpoint_type": "bytes", "bytes_threshold": 519000000, "cumulative_training_bytes": 519004375, "metrics": { "loss": 0.4766806289889557, "ce_loss": 0.46668063852569885, "lb_loss": 0.9999999968289685 } }, { "checkpoint_type": "bytes", "bytes_threshold": 520000000, "cumulative_training_bytes": 520002998, "metrics": { "loss": 0.476721162943877, "ce_loss": 0.4667211724806202, "lb_loss": 0.9999999969336582 } }, { "checkpoint_type": "bytes", "bytes_threshold": 521000000, "cumulative_training_bytes": 521004681, "metrics": { "loss": 0.47674204208199283, "ce_loss": 0.466742051618736, "lb_loss": 0.9999999968796605 } }, { "checkpoint_type": "bytes", "bytes_threshold": 522000000, "cumulative_training_bytes": 522005267, "metrics": { "loss": 0.4767827219174502, "ce_loss": 0.46678273145419336, "lb_loss": 0.9999999969083478 } }, { "checkpoint_type": "bytes", "bytes_threshold": 523000000, "cumulative_training_bytes": 523004469, "metrics": { "loss": 0.47677980619743615, "ce_loss": 0.4667798157341793, "lb_loss": 0.9999999967781273 } }, { "checkpoint_type": "bytes", "bytes_threshold": 524000000, "cumulative_training_bytes": 524001588, "metrics": { "loss": 0.4768444411811146, "ce_loss": 0.46684445071785774, "lb_loss": 0.9999999967404462 } }, { "epoch": 11, "checkpoint_type": "epoch", "metrics": { "loss": 0.4768498985810268, "ce_loss": 0.46684990811776994, "lb_loss": 0.9999999967918451, "training_bytes": 47653417 }, "cumulative_training_bytes": 524187426, "training_bytes_this_epoch": 47653417 }, { "checkpoint_type": "bytes", "bytes_threshold": 525000000, "cumulative_training_bytes": 525004346, "metrics": { "loss": 0.47418661429503256, "ce_loss": 0.4641866238317757, "lb_loss": 1.0000000077987385 } }, { "checkpoint_type": "bytes", "bytes_threshold": 526000000, "cumulative_training_bytes": 526005081, "metrics": { "loss": 0.4731121884674585, "ce_loss": 0.46311219800420167, "lb_loss": 1.000000004007035 } }, { "checkpoint_type": "bytes", "bytes_threshold": 527000000, "cumulative_training_bytes": 527001768, "metrics": { "loss": 0.47275793034097424, "ce_loss": 0.4627579398777174, "lb_loss": 1.0000000037252903 } }, { "checkpoint_type": "bytes", "bytes_threshold": 528000000, "cumulative_training_bytes": 528002785, "metrics": { "loss": 0.4726709869492006, "ce_loss": 0.4626709964859438, "lb_loss": 0.9999999992818718 } }, { "checkpoint_type": "bytes", "bytes_threshold": 529000000, "cumulative_training_bytes": 529007587, "metrics": { "loss": 0.47280677702923457, "ce_loss": 0.46280678656597773, "lb_loss": 0.9999999995261952 } }, { "checkpoint_type": "bytes", "bytes_threshold": 530000000, "cumulative_training_bytes": 530006938, "metrics": { "loss": 0.4731630250027305, "ce_loss": 0.46316303453947366, "lb_loss": 0.9999999993725827 } }, { "checkpoint_type": "bytes", "bytes_threshold": 531000000, "cumulative_training_bytes": 531005656, "metrics": { "loss": 0.47286867023853774, "ce_loss": 0.4628686797752809, "lb_loss": 0.9999999978569116 } }, { "checkpoint_type": "bytes", "bytes_threshold": 532000000, "cumulative_training_bytes": 532005754, "metrics": { "loss": 0.47314695116355066, "ce_loss": 0.4631469607002938, "lb_loss": 0.9999999973145801 } }, { "checkpoint_type": "bytes", "bytes_threshold": 533000000, "cumulative_training_bytes": 533000212, "metrics": { "loss": 0.4732809010844558, "ce_loss": 0.46328091062119897, "lb_loss": 0.999999997307175 } }, { "checkpoint_type": "bytes", "bytes_threshold": 534000000, "cumulative_training_bytes": 534000062, "metrics": { "loss": 0.47332667615802654, "ce_loss": 0.4633266856947697, "lb_loss": 0.9999999969290347 } }, { "checkpoint_type": "bytes", "bytes_threshold": 535000000, "cumulative_training_bytes": 535000930, "metrics": { "loss": 0.47340379490730783, "ce_loss": 0.463403804444051, "lb_loss": 0.9999999974250118 } }, { "checkpoint_type": "bytes", "bytes_threshold": 536000000, "cumulative_training_bytes": 536002691, "metrics": { "loss": 0.47355009577757956, "ce_loss": 0.4635501053143227, "lb_loss": 0.9999999983389503 } }, { "checkpoint_type": "bytes", "bytes_threshold": 537000000, "cumulative_training_bytes": 537004758, "metrics": { "loss": 0.4735789928527431, "ce_loss": 0.46357900238948624, "lb_loss": 0.9999999973651471 } }, { "checkpoint_type": "bytes", "bytes_threshold": 538000000, "cumulative_training_bytes": 538000966, "metrics": { "loss": 0.4737231837143655, "ce_loss": 0.46372319325110867, "lb_loss": 0.9999999968611745 } }, { "checkpoint_type": "bytes", "bytes_threshold": 539000000, "cumulative_training_bytes": 539001127, "metrics": { "loss": 0.4737859239257891, "ce_loss": 0.4637859334625323, "lb_loss": 0.9999999970120669 } }, { "checkpoint_type": "bytes", "bytes_threshold": 540000000, "cumulative_training_bytes": 540006347, "metrics": { "loss": 0.47382085505667404, "ce_loss": 0.4638208645934172, "lb_loss": 0.9999999970861234 } }, { "checkpoint_type": "bytes", "bytes_threshold": 541000000, "cumulative_training_bytes": 541005694, "metrics": { "loss": 0.4739520771155099, "ce_loss": 0.46395208665225307, "lb_loss": 0.9999999976668187 } }, { "checkpoint_type": "bytes", "bytes_threshold": 542000000, "cumulative_training_bytes": 542006701, "metrics": { "loss": 0.4740450398618823, "ce_loss": 0.46404504939862545, "lb_loss": 0.9999999976188866 } }, { "checkpoint_type": "bytes", "bytes_threshold": 543000000, "cumulative_training_bytes": 543005643, "metrics": { "loss": 0.4742385020533341, "ce_loss": 0.4642385115900773, "lb_loss": 0.9999999976730192 } }, { "checkpoint_type": "bytes", "bytes_threshold": 544000000, "cumulative_training_bytes": 544004689, "metrics": { "loss": 0.4743254720681236, "ce_loss": 0.46432548160486675, "lb_loss": 0.9999999975366176 } }, { "checkpoint_type": "bytes", "bytes_threshold": 545000000, "cumulative_training_bytes": 545005233, "metrics": { "loss": 0.474323673342992, "ce_loss": 0.4643236828797352, "lb_loss": 0.9999999969748287 } }, { "checkpoint_type": "bytes", "bytes_threshold": 546000000, "cumulative_training_bytes": 546004328, "metrics": { "loss": 0.47431586884163524, "ce_loss": 0.4643158783783784, "lb_loss": 0.9999999970291823 } }, { "checkpoint_type": "bytes", "bytes_threshold": 547000000, "cumulative_training_bytes": 547003640, "metrics": { "loss": 0.4743756198690977, "ce_loss": 0.4643756294058409, "lb_loss": 0.9999999968386929 } }, { "checkpoint_type": "bytes", "bytes_threshold": 548000000, "cumulative_training_bytes": 548000417, "metrics": { "loss": 0.47438953995666305, "ce_loss": 0.4643895494934062, "lb_loss": 0.999999996817507 } }, { "checkpoint_type": "bytes", "bytes_threshold": 549000000, "cumulative_training_bytes": 549005076, "metrics": { "loss": 0.4745037520373309, "ce_loss": 0.4645037615740741, "lb_loss": 0.9999999967254238 } }, { "checkpoint_type": "bytes", "bytes_threshold": 550000000, "cumulative_training_bytes": 550005242, "metrics": { "loss": 0.4746403744368552, "ce_loss": 0.46464038397359836, "lb_loss": 0.9999999965697712 } }, { "checkpoint_type": "bytes", "bytes_threshold": 551000000, "cumulative_training_bytes": 551001002, "metrics": { "loss": 0.4746668936694836, "ce_loss": 0.4646669032062268, "lb_loss": 0.9999999966120182 } }, { "checkpoint_type": "bytes", "bytes_threshold": 552000000, "cumulative_training_bytes": 552003787, "metrics": { "loss": 0.47473511401777224, "ce_loss": 0.4647351235545154, "lb_loss": 0.999999996668573 } }, { "checkpoint_type": "bytes", "bytes_threshold": 553000000, "cumulative_training_bytes": 553002274, "metrics": { "loss": 0.4748042938780854, "ce_loss": 0.4648043034148286, "lb_loss": 0.9999999963410383 } }, { "checkpoint_type": "bytes", "bytes_threshold": 554000000, "cumulative_training_bytes": 554003365, "metrics": { "loss": 0.47482116968128457, "ce_loss": 0.46482117921802774, "lb_loss": 0.9999999963110633 } }, { "checkpoint_type": "bytes", "bytes_threshold": 555000000, "cumulative_training_bytes": 555006363, "metrics": { "loss": 0.47514410925207673, "ce_loss": 0.4651441187888199, "lb_loss": 0.9999999963274654 } }, { "checkpoint_type": "bytes", "bytes_threshold": 556000000, "cumulative_training_bytes": 556003044, "metrics": { "loss": 0.47538666690730014, "ce_loss": 0.4653866764440433, "lb_loss": 0.9999999961698098 } }, { "checkpoint_type": "bytes", "bytes_threshold": 557000000, "cumulative_training_bytes": 557004116, "metrics": { "loss": 0.47563164366554334, "ce_loss": 0.4656316532022865, "lb_loss": 0.9999999961060895 } }, { "checkpoint_type": "bytes", "bytes_threshold": 558000000, "cumulative_training_bytes": 558000464, "metrics": { "loss": 0.4758357828941898, "ce_loss": 0.465835792430933, "lb_loss": 0.9999999960857457 } }, { "checkpoint_type": "bytes", "bytes_threshold": 559000000, "cumulative_training_bytes": 559004482, "metrics": { "loss": 0.47595539375113893, "ce_loss": 0.4659554032878821, "lb_loss": 0.9999999961854076 } }, { "checkpoint_type": "bytes", "bytes_threshold": 560000000, "cumulative_training_bytes": 560002449, "metrics": { "loss": 0.47605645694818305, "ce_loss": 0.4660564664849262, "lb_loss": 0.9999999963806459 } }, { "checkpoint_type": "bytes", "bytes_threshold": 561000000, "cumulative_training_bytes": 561007064, "metrics": { "loss": 0.47612497096450473, "ce_loss": 0.4661249805012479, "lb_loss": 0.9999999961693354 } }, { "checkpoint_type": "bytes", "bytes_threshold": 562000000, "cumulative_training_bytes": 562002742, "metrics": { "loss": 0.4761972426402516, "ce_loss": 0.46619725217699476, "lb_loss": 0.9999999964391717 } }, { "checkpoint_type": "bytes", "bytes_threshold": 563000000, "cumulative_training_bytes": 563003402, "metrics": { "loss": 0.47623431614386447, "ce_loss": 0.46623432568060763, "lb_loss": 0.9999999965076781 } }, { "checkpoint_type": "bytes", "bytes_threshold": 564000000, "cumulative_training_bytes": 564003696, "metrics": { "loss": 0.47626276089594916, "ce_loss": 0.4662627704326923, "lb_loss": 0.9999999963664091 } }, { "checkpoint_type": "bytes", "bytes_threshold": 565000000, "cumulative_training_bytes": 565007276, "metrics": { "loss": 0.47630263068085205, "ce_loss": 0.4663026402175952, "lb_loss": 0.9999999963550714 } }, { "checkpoint_type": "bytes", "bytes_threshold": 566000000, "cumulative_training_bytes": 566006079, "metrics": { "loss": 0.47630903127537916, "ce_loss": 0.4663090408121223, "lb_loss": 0.9999999962126329 } }, { "checkpoint_type": "bytes", "bytes_threshold": 567000000, "cumulative_training_bytes": 567007618, "metrics": { "loss": 0.4764510868956603, "ce_loss": 0.46645109643240346, "lb_loss": 0.9999999961627911 } }, { "checkpoint_type": "bytes", "bytes_threshold": 568000000, "cumulative_training_bytes": 568004347, "metrics": { "loss": 0.47650604713050604, "ce_loss": 0.4665060566672492, "lb_loss": 0.9999999959478841 } }, { "checkpoint_type": "bytes", "bytes_threshold": 569000000, "cumulative_training_bytes": 569001858, "metrics": { "loss": 0.47657124815293556, "ce_loss": 0.4665712576896787, "lb_loss": 0.9999999959869736 } }, { "checkpoint_type": "bytes", "bytes_threshold": 570000000, "cumulative_training_bytes": 570002921, "metrics": { "loss": 0.4766179731851355, "ce_loss": 0.46661798272187865, "lb_loss": 0.999999996034991 } }, { "checkpoint_type": "bytes", "bytes_threshold": 571000000, "cumulative_training_bytes": 571001763, "metrics": { "loss": 0.4765936612263846, "ce_loss": 0.46659367076312774, "lb_loss": 0.9999999959145516 } }, { "epoch": 12, "checkpoint_type": "epoch", "metrics": { "loss": 0.47662866575245993, "ce_loss": 0.4666286752892031, "lb_loss": 0.9999999958629167, "training_bytes": 47653410 }, "cumulative_training_bytes": 571840836, "training_bytes_this_epoch": 47653410 }, { "checkpoint_type": "bytes", "bytes_threshold": 572000000, "cumulative_training_bytes": 572003805, "metrics": { "loss": 0.46656621070135207, "ce_loss": 0.45656622023809523, "lb_loss": 0.9999999886467343 } }, { "checkpoint_type": "bytes", "bytes_threshold": 573000000, "cumulative_training_bytes": 573001015, "metrics": { "loss": 0.47032956413875354, "ce_loss": 0.4603295736754967, "lb_loss": 0.999999998421069 } }, { "checkpoint_type": "bytes", "bytes_threshold": 574000000, "cumulative_training_bytes": 574004305, "metrics": { "loss": 0.4710136761902072, "ce_loss": 0.46101368572695034, "lb_loss": 0.999999999365908 } }, { "checkpoint_type": "bytes", "bytes_threshold": 575000000, "cumulative_training_bytes": 575005268, "metrics": { "loss": 0.4715617338046612, "ce_loss": 0.4615617433414044, "lb_loss": 0.9999999987011094 } }, { "checkpoint_type": "bytes", "bytes_threshold": 576000000, "cumulative_training_bytes": 576005624, "metrics": { "loss": 0.47210074873531566, "ce_loss": 0.4621007582720588, "lb_loss": 1.0000000005478369 } }, { "checkpoint_type": "bytes", "bytes_threshold": 577000000, "cumulative_training_bytes": 577007614, "metrics": { "loss": 0.47244212009288644, "ce_loss": 0.4624421296296296, "lb_loss": 1.0000000013245476 } }, { "checkpoint_type": "bytes", "bytes_threshold": 578000000, "cumulative_training_bytes": 578000160, "metrics": { "loss": 0.47264556499741833, "ce_loss": 0.4626455745341615, "lb_loss": 1.0000000017029897 } }, { "checkpoint_type": "bytes", "bytes_threshold": 579000000, "cumulative_training_bytes": 579000407, "metrics": { "loss": 0.47249581265577023, "ce_loss": 0.4624958221925134, "lb_loss": 1.0000000014662105 } }, { "checkpoint_type": "bytes", "bytes_threshold": 580000000, "cumulative_training_bytes": 580003744, "metrics": { "loss": 0.472489363704345, "ce_loss": 0.4624893732410882, "lb_loss": 1.0000000013419432 } }, { "checkpoint_type": "bytes", "bytes_threshold": 581000000, "cumulative_training_bytes": 581005566, "metrics": { "loss": 0.4733132214574089, "ce_loss": 0.46331323099415206, "lb_loss": 1.0000000010456955 } }, { "checkpoint_type": "bytes", "bytes_threshold": 582000000, "cumulative_training_bytes": 582005047, "metrics": { "loss": 0.47375311075338494, "ce_loss": 0.4637531202901281, "lb_loss": 1.0000000007635863 } }, { "checkpoint_type": "bytes", "bytes_threshold": 583000000, "cumulative_training_bytes": 583001026, "metrics": { "loss": 0.4739308522511772, "ce_loss": 0.46393086178792037, "lb_loss": 1.000000001022729 } }, { "checkpoint_type": "bytes", "bytes_threshold": 584000000, "cumulative_training_bytes": 584004132, "metrics": { "loss": 0.47396557216980595, "ce_loss": 0.4639655817065491, "lb_loss": 1.000000000638085 } }, { "checkpoint_type": "bytes", "bytes_threshold": 585000000, "cumulative_training_bytes": 585007650, "metrics": { "loss": 0.4742566674254661, "ce_loss": 0.4642566769622093, "lb_loss": 1.0000000008316927 } }, { "checkpoint_type": "bytes", "bytes_threshold": 586000000, "cumulative_training_bytes": 586004555, "metrics": { "loss": 0.4743739347200136, "ce_loss": 0.46437394425675677, "lb_loss": 1.000000000483281 } }, { "checkpoint_type": "bytes", "bytes_threshold": 587000000, "cumulative_training_bytes": 587000297, "metrics": { "loss": 0.474306137874873, "ce_loss": 0.4643061474116162, "lb_loss": 1.000000000752584 } }, { "checkpoint_type": "bytes", "bytes_threshold": 588000000, "cumulative_training_bytes": 588002607, "metrics": { "loss": 0.4741220741913478, "ce_loss": 0.46412208372809094, "lb_loss": 1.0000000010447048 } }, { "checkpoint_type": "bytes", "bytes_threshold": 589000000, "cumulative_training_bytes": 589003684, "metrics": { "loss": 0.47414856037851105, "ce_loss": 0.4641485699152542, "lb_loss": 1.0000000009304917 } }, { "checkpoint_type": "bytes", "bytes_threshold": 590000000, "cumulative_training_bytes": 590003539, "metrics": { "loss": 0.4741836451198097, "ce_loss": 0.46418365465655287, "lb_loss": 1.0000000006279461 } }, { "checkpoint_type": "bytes", "bytes_threshold": 591000000, "cumulative_training_bytes": 591000699, "metrics": { "loss": 0.4741765723150347, "ce_loss": 0.46417658185177785, "lb_loss": 1.0000000000476266 } }, { "checkpoint_type": "bytes", "bytes_threshold": 592000000, "cumulative_training_bytes": 592006371, "metrics": { "loss": 0.4741393104139022, "ce_loss": 0.4641393199506454, "lb_loss": 1.0000000001810316 } }, { "checkpoint_type": "bytes", "bytes_threshold": 593000000, "cumulative_training_bytes": 593001411, "metrics": { "loss": 0.47416184262497896, "ce_loss": 0.4641618521617221, "lb_loss": 0.9999999998490475 } }, { "checkpoint_type": "bytes", "bytes_threshold": 594000000, "cumulative_training_bytes": 594006305, "metrics": { "loss": 0.47413737712128795, "ce_loss": 0.4641373866580311, "lb_loss": 0.9999999995882235 } }, { "checkpoint_type": "bytes", "bytes_threshold": 595000000, "cumulative_training_bytes": 595002552, "metrics": { "loss": 0.47414900905829815, "ce_loss": 0.4641490185950413, "lb_loss": 0.9999999991724314 } }, { "checkpoint_type": "bytes", "bytes_threshold": 596000000, "cumulative_training_bytes": 596006850, "metrics": { "loss": 0.47415680486439754, "ce_loss": 0.4641568144011407, "lb_loss": 0.9999999985079953 } }, { "checkpoint_type": "bytes", "bytes_threshold": 597000000, "cumulative_training_bytes": 597001688, "metrics": { "loss": 0.4742202382949671, "ce_loss": 0.4642202478317103, "lb_loss": 0.999999998893523 } }, { "checkpoint_type": "bytes", "bytes_threshold": 598000000, "cumulative_training_bytes": 598005915, "metrics": { "loss": 0.47454308382000254, "ce_loss": 0.4645430933567457, "lb_loss": 0.999999998848725 } }, { "checkpoint_type": "bytes", "bytes_threshold": 599000000, "cumulative_training_bytes": 599003238, "metrics": { "loss": 0.47462788903951847, "ce_loss": 0.46462789857626163, "lb_loss": 0.9999999990085497 } }, { "checkpoint_type": "bytes", "bytes_threshold": 600000000, "cumulative_training_bytes": 600001112, "metrics": { "loss": 0.4747784238849534, "ce_loss": 0.46477843342169656, "lb_loss": 0.9999999986711308 } }, { "checkpoint_type": "bytes", "bytes_threshold": 601000000, "cumulative_training_bytes": 601004350, "metrics": { "loss": 0.47477400431203604, "ce_loss": 0.4647740138487792, "lb_loss": 0.9999999984664597 } }, { "checkpoint_type": "bytes", "bytes_threshold": 602000000, "cumulative_training_bytes": 602000426, "metrics": { "loss": 0.47477283493520406, "ce_loss": 0.4647728444719472, "lb_loss": 0.9999999983354885 } }, { "checkpoint_type": "bytes", "bytes_threshold": 603000000, "cumulative_training_bytes": 603006064, "metrics": { "loss": 0.47476264018684405, "ce_loss": 0.4647626497235872, "lb_loss": 0.9999999979204276 } }, { "checkpoint_type": "bytes", "bytes_threshold": 604000000, "cumulative_training_bytes": 604000130, "metrics": { "loss": 0.4748116534096854, "ce_loss": 0.46481166294642856, "lb_loss": 0.9999999979706038 } }, { "checkpoint_type": "bytes", "bytes_threshold": 605000000, "cumulative_training_bytes": 605006216, "metrics": { "loss": 0.4747801545419915, "ce_loss": 0.4647801640787347, "lb_loss": 0.9999999979081261 } }, { "checkpoint_type": "bytes", "bytes_threshold": 606000000, "cumulative_training_bytes": 606000518, "metrics": { "loss": 0.4748008338840145, "ce_loss": 0.46480084342075767, "lb_loss": 0.9999999976484157 } }, { "checkpoint_type": "bytes", "bytes_threshold": 607000000, "cumulative_training_bytes": 607002557, "metrics": { "loss": 0.47481949653359656, "ce_loss": 0.4648195060703397, "lb_loss": 0.9999999974948396 } }, { "checkpoint_type": "bytes", "bytes_threshold": 608000000, "cumulative_training_bytes": 608005994, "metrics": { "loss": 0.47476268763666357, "ce_loss": 0.46476269717340674, "lb_loss": 0.9999999975138439 } }, { "checkpoint_type": "bytes", "bytes_threshold": 609000000, "cumulative_training_bytes": 609006940, "metrics": { "loss": 0.4747737273297587, "ce_loss": 0.46477373686650186, "lb_loss": 0.999999997568661 } }, { "checkpoint_type": "bytes", "bytes_threshold": 610000000, "cumulative_training_bytes": 610003956, "metrics": { "loss": 0.4748123964311605, "ce_loss": 0.4648124059679037, "lb_loss": 0.9999999975847266 } }, { "checkpoint_type": "bytes", "bytes_threshold": 611000000, "cumulative_training_bytes": 611004009, "metrics": { "loss": 0.47486817361415595, "ce_loss": 0.4648681831508991, "lb_loss": 0.999999997518415 } }, { "checkpoint_type": "bytes", "bytes_threshold": 612000000, "cumulative_training_bytes": 612000579, "metrics": { "loss": 0.47488283274785464, "ce_loss": 0.4648828422845978, "lb_loss": 0.9999999974549294 } }, { "checkpoint_type": "bytes", "bytes_threshold": 613000000, "cumulative_training_bytes": 613001932, "metrics": { "loss": 0.4748709832333889, "ce_loss": 0.46487099277013205, "lb_loss": 0.9999999975058499 } }, { "checkpoint_type": "bytes", "bytes_threshold": 614000000, "cumulative_training_bytes": 614003917, "metrics": { "loss": 0.47490614968620526, "ce_loss": 0.4649061592229484, "lb_loss": 0.9999999975976341 } }, { "checkpoint_type": "bytes", "bytes_threshold": 615000000, "cumulative_training_bytes": 615001118, "metrics": { "loss": 0.47489570370376766, "ce_loss": 0.4648957132405108, "lb_loss": 0.9999999976635994 } }, { "checkpoint_type": "bytes", "bytes_threshold": 616000000, "cumulative_training_bytes": 616000487, "metrics": { "loss": 0.4748908077959545, "ce_loss": 0.46489081733269766, "lb_loss": 0.9999999977575923 } }, { "checkpoint_type": "bytes", "bytes_threshold": 617000000, "cumulative_training_bytes": 617005273, "metrics": { "loss": 0.474879167548356, "ce_loss": 0.4648791770850992, "lb_loss": 0.9999999978275982 } }, { "checkpoint_type": "bytes", "bytes_threshold": 618000000, "cumulative_training_bytes": 618005046, "metrics": { "loss": 0.4748712766425569, "ce_loss": 0.46487128617930007, "lb_loss": 0.9999999979436447 } }, { "checkpoint_type": "bytes", "bytes_threshold": 619000000, "cumulative_training_bytes": 619006994, "metrics": { "loss": 0.4749004952319257, "ce_loss": 0.46490050476866884, "lb_loss": 0.999999997909967 } }, { "epoch": 13, "checkpoint_type": "epoch", "metrics": { "loss": 0.47487888667148304, "ce_loss": 0.4648788962082262, "lb_loss": 0.9999999979889178, "training_bytes": 47653395 }, "cumulative_training_bytes": 619494231, "training_bytes_this_epoch": 47653395 }, { "checkpoint_type": "bytes", "bytes_threshold": 620000000, "cumulative_training_bytes": 620000323, "metrics": { "loss": 0.46723839008446894, "ce_loss": 0.4572383996212121, "lb_loss": 0.9999999918720939 } }, { "checkpoint_type": "bytes", "bytes_threshold": 621000000, "cumulative_training_bytes": 621005489, "metrics": { "loss": 0.4674079853871147, "ce_loss": 0.45740799492385786, "lb_loss": 0.9999999990923151 } }, { "checkpoint_type": "bytes", "bytes_threshold": 622000000, "cumulative_training_bytes": 622007499, "metrics": { "loss": 0.46797802971630564, "ce_loss": 0.4579780392530488, "lb_loss": 1.0000000014537718 } }, { "checkpoint_type": "bytes", "bytes_threshold": 623000000, "cumulative_training_bytes": 623001614, "metrics": { "loss": 0.4684726296553966, "ce_loss": 0.45847263919213976, "lb_loss": 1.0 } }, { "checkpoint_type": "bytes", "bytes_threshold": 624000000, "cumulative_training_bytes": 624004859, "metrics": { "loss": 0.46862292127819744, "ce_loss": 0.4586229308149406, "lb_loss": 0.9999999988868402 } }, { "checkpoint_type": "bytes", "bytes_threshold": 625000000, "cumulative_training_bytes": 625002853, "metrics": { "loss": 0.4686638247817547, "ce_loss": 0.4586638343184979, "lb_loss": 0.9999999982591133 } }, { "checkpoint_type": "bytes", "bytes_threshold": 626000000, "cumulative_training_bytes": 626004267, "metrics": { "loss": 0.4690142367867862, "ce_loss": 0.4590142463235294, "lb_loss": 0.9999999991585227 } }, { "checkpoint_type": "bytes", "bytes_threshold": 627000000, "cumulative_training_bytes": 627004393, "metrics": { "loss": 0.4692574043663181, "ce_loss": 0.45925741390306124, "lb_loss": 0.9999999992093261 } }, { "checkpoint_type": "bytes", "bytes_threshold": 628000000, "cumulative_training_bytes": 628005962, "metrics": { "loss": 0.4694238625379643, "ce_loss": 0.4594238720747075, "lb_loss": 0.9999999987660605 } }, { "checkpoint_type": "bytes", "bytes_threshold": 629000000, "cumulative_training_bytes": 629001986, "metrics": { "loss": 0.4696217672561658, "ce_loss": 0.45962177679290894, "lb_loss": 0.9999999979827598 } }, { "checkpoint_type": "bytes", "bytes_threshold": 630000000, "cumulative_training_bytes": 630004200, "metrics": { "loss": 0.4695898199672685, "ce_loss": 0.45958982950401167, "lb_loss": 0.9999999979131853 } }, { "checkpoint_type": "bytes", "bytes_threshold": 631000000, "cumulative_training_bytes": 631006243, "metrics": { "loss": 0.4696956566583301, "ce_loss": 0.45969566619507324, "lb_loss": 0.9999999982936087 } }, { "checkpoint_type": "bytes", "bytes_threshold": 632000000, "cumulative_training_bytes": 632002297, "metrics": { "loss": 0.46990467987808526, "ce_loss": 0.4599046894148284, "lb_loss": 0.999999998393012 } }, { "checkpoint_type": "bytes", "bytes_threshold": 633000000, "cumulative_training_bytes": 633003176, "metrics": { "loss": 0.46981904374078237, "ce_loss": 0.45981905327752554, "lb_loss": 0.9999999986807144 } }, { "checkpoint_type": "bytes", "bytes_threshold": 634000000, "cumulative_training_bytes": 634003223, "metrics": { "loss": 0.46992636394450354, "ce_loss": 0.4599263734812467, "lb_loss": 0.9999999983311959 } }, { "checkpoint_type": "bytes", "bytes_threshold": 635000000, "cumulative_training_bytes": 635006003, "metrics": { "loss": 0.4700912006287707, "ce_loss": 0.46009121016551385, "lb_loss": 0.999999997732432 } }, { "checkpoint_type": "bytes", "bytes_threshold": 636000000, "cumulative_training_bytes": 636004090, "metrics": { "loss": 0.4703039710317023, "ce_loss": 0.46030398056844546, "lb_loss": 0.9999999974000758 } }, { "checkpoint_type": "bytes", "bytes_threshold": 637000000, "cumulative_training_bytes": 637000138, "metrics": { "loss": 0.47044514910658286, "ce_loss": 0.460445158643326, "lb_loss": 0.999999997443652 } }, { "checkpoint_type": "bytes", "bytes_threshold": 638000000, "cumulative_training_bytes": 638002271, "metrics": { "loss": 0.4705825973030747, "ce_loss": 0.4605826068398179, "lb_loss": 0.9999999972368707 } }, { "checkpoint_type": "bytes", "bytes_threshold": 639000000, "cumulative_training_bytes": 639000648, "metrics": { "loss": 0.47067206184974547, "ce_loss": 0.46067207138648864, "lb_loss": 0.9999999969565578 } }, { "checkpoint_type": "bytes", "bytes_threshold": 640000000, "cumulative_training_bytes": 640003100, "metrics": { "loss": 0.47072955597502375, "ce_loss": 0.4607295655117669, "lb_loss": 0.9999999970164279 } }, { "checkpoint_type": "bytes", "bytes_threshold": 641000000, "cumulative_training_bytes": 641002993, "metrics": { "loss": 0.47074683985762805, "ce_loss": 0.4607468493943712, "lb_loss": 0.9999999968785598 } }, { "checkpoint_type": "bytes", "bytes_threshold": 642000000, "cumulative_training_bytes": 642000407, "metrics": { "loss": 0.4708656697278125, "ce_loss": 0.4608656792645557, "lb_loss": 0.9999999967123077 } }, { "checkpoint_type": "bytes", "bytes_threshold": 643000000, "cumulative_training_bytes": 643000364, "metrics": { "loss": 0.47096550138242244, "ce_loss": 0.4609655109191656, "lb_loss": 0.9999999967166933 } }, { "checkpoint_type": "bytes", "bytes_threshold": 644000000, "cumulative_training_bytes": 644000345, "metrics": { "loss": 0.47106452298358203, "ce_loss": 0.4610645325203252, "lb_loss": 0.9999999966078658 } }, { "checkpoint_type": "bytes", "bytes_threshold": 645000000, "cumulative_training_bytes": 645004285, "metrics": { "loss": 0.4711627833440018, "ce_loss": 0.46116279288074497, "lb_loss": 0.9999999969383015 } }, { "checkpoint_type": "bytes", "bytes_threshold": 646000000, "cumulative_training_bytes": 646004246, "metrics": { "loss": 0.47127110227684066, "ce_loss": 0.4612711118135838, "lb_loss": 0.9999999970886748 } }, { "checkpoint_type": "bytes", "bytes_threshold": 647000000, "cumulative_training_bytes": 647002840, "metrics": { "loss": 0.4712862239242596, "ce_loss": 0.4612862334610028, "lb_loss": 0.9999999970280693 } }, { "checkpoint_type": "bytes", "bytes_threshold": 648000000, "cumulative_training_bytes": 648003976, "metrics": { "loss": 0.4713705267774197, "ce_loss": 0.46137053631416286, "lb_loss": 0.9999999969564949 } }, { "checkpoint_type": "bytes", "bytes_threshold": 649000000, "cumulative_training_bytes": 649000496, "metrics": { "loss": 0.47139901831316594, "ce_loss": 0.4613990278499091, "lb_loss": 0.9999999971211467 } }, { "checkpoint_type": "bytes", "bytes_threshold": 650000000, "cumulative_training_bytes": 650001399, "metrics": { "loss": 0.4714765372676265, "ce_loss": 0.46147654680436967, "lb_loss": 0.9999999974403831 } }, { "checkpoint_type": "bytes", "bytes_threshold": 651000000, "cumulative_training_bytes": 651002904, "metrics": { "loss": 0.4715733364090385, "ce_loss": 0.46157334594578164, "lb_loss": 0.9999999976088582 } }, { "checkpoint_type": "bytes", "bytes_threshold": 652000000, "cumulative_training_bytes": 652005210, "metrics": { "loss": 0.47166369952310605, "ce_loss": 0.4616637090598492, "lb_loss": 0.999999997626488 } }, { "checkpoint_type": "bytes", "bytes_threshold": 653000000, "cumulative_training_bytes": 653002239, "metrics": { "loss": 0.4716997182581814, "ce_loss": 0.4616997277949246, "lb_loss": 0.9999999975743881 } }, { "checkpoint_type": "bytes", "bytes_threshold": 654000000, "cumulative_training_bytes": 654006342, "metrics": { "loss": 0.4717738008128684, "ce_loss": 0.46177381034961157, "lb_loss": 0.9999999973670757 } }, { "checkpoint_type": "bytes", "bytes_threshold": 655000000, "cumulative_training_bytes": 655002681, "metrics": { "loss": 0.47183420027986955, "ce_loss": 0.4618342098166127, "lb_loss": 0.9999999973637643 } }, { "checkpoint_type": "bytes", "bytes_threshold": 656000000, "cumulative_training_bytes": 656006752, "metrics": { "loss": 0.471994152803303, "ce_loss": 0.46199416234004614, "lb_loss": 0.999999997549295 } }, { "checkpoint_type": "bytes", "bytes_threshold": 657000000, "cumulative_training_bytes": 657006557, "metrics": { "loss": 0.4720288957562336, "ce_loss": 0.46202890529297674, "lb_loss": 0.9999999973592879 } }, { "checkpoint_type": "bytes", "bytes_threshold": 658000000, "cumulative_training_bytes": 658003676, "metrics": { "loss": 0.47203408391492746, "ce_loss": 0.4620340934516706, "lb_loss": 0.9999999974157096 } }, { "checkpoint_type": "bytes", "bytes_threshold": 659000000, "cumulative_training_bytes": 659005764, "metrics": { "loss": 0.47206756957258034, "ce_loss": 0.4620675791093235, "lb_loss": 0.9999999975159918 } }, { "checkpoint_type": "bytes", "bytes_threshold": 660000000, "cumulative_training_bytes": 660004372, "metrics": { "loss": 0.472095185088895, "ce_loss": 0.46209519462563814, "lb_loss": 0.9999999974530819 } }, { "checkpoint_type": "bytes", "bytes_threshold": 661000000, "cumulative_training_bytes": 661000728, "metrics": { "loss": 0.47210958171625556, "ce_loss": 0.4621095912529987, "lb_loss": 0.9999999973271952 } }, { "checkpoint_type": "bytes", "bytes_threshold": 662000000, "cumulative_training_bytes": 662006203, "metrics": { "loss": 0.47222807999667427, "ce_loss": 0.46222808953341743, "lb_loss": 0.9999999972082133 } }, { "checkpoint_type": "bytes", "bytes_threshold": 663000000, "cumulative_training_bytes": 663003652, "metrics": { "loss": 0.47226043096668935, "ce_loss": 0.4622604405034325, "lb_loss": 0.9999999970307842 } }, { "checkpoint_type": "bytes", "bytes_threshold": 664000000, "cumulative_training_bytes": 664002530, "metrics": { "loss": 0.47231932976126095, "ce_loss": 0.4623193392980041, "lb_loss": 0.9999999970874537 } }, { "checkpoint_type": "bytes", "bytes_threshold": 665000000, "cumulative_training_bytes": 665004691, "metrics": { "loss": 0.47233488024956005, "ce_loss": 0.4623348897863032, "lb_loss": 0.9999999971215661 } }, { "checkpoint_type": "bytes", "bytes_threshold": 666000000, "cumulative_training_bytes": 666005317, "metrics": { "loss": 0.47233496844317124, "ce_loss": 0.4623349779799144, "lb_loss": 0.99999999716402 } }, { "checkpoint_type": "bytes", "bytes_threshold": 667000000, "cumulative_training_bytes": 667007057, "metrics": { "loss": 0.4723920256163592, "ce_loss": 0.46239203515310234, "lb_loss": 0.9999999972623169 } }, { "epoch": 14, "checkpoint_type": "epoch", "metrics": { "loss": 0.4724123759870358, "ce_loss": 0.46241238552377895, "lb_loss": 0.9999999972610977, "training_bytes": 47653399 }, "cumulative_training_bytes": 667147630, "training_bytes_this_epoch": 47653399 }, { "checkpoint_type": "bytes", "bytes_threshold": 668000000, "cumulative_training_bytes": 668001125, "metrics": { "loss": 0.46842632974897114, "ce_loss": 0.4584263392857143, "lb_loss": 0.9999999936137881 } }, { "checkpoint_type": "bytes", "bytes_threshold": 669000000, "cumulative_training_bytes": 669005783, "metrics": { "loss": 0.4678189205044091, "ce_loss": 0.45781893004115226, "lb_loss": 0.9999999923961153 } }, { "checkpoint_type": "bytes", "bytes_threshold": 670000000, "cumulative_training_bytes": 670007110, "metrics": { "loss": 0.4679608026290322, "ce_loss": 0.4579608121657754, "lb_loss": 0.9999999939439131 } }, { "checkpoint_type": "bytes", "bytes_threshold": 671000000, "cumulative_training_bytes": 671002720, "metrics": { "loss": 0.46782954155452666, "ce_loss": 0.4578295510912698, "lb_loss": 0.9999999952694726 } }, { "checkpoint_type": "bytes", "bytes_threshold": 672000000, "cumulative_training_bytes": 672007653, "metrics": { "loss": 0.4680907186472191, "ce_loss": 0.45809072818396224, "lb_loss": 0.9999999966261521 } }, { "checkpoint_type": "bytes", "bytes_threshold": 673000000, "cumulative_training_bytes": 673000283, "metrics": { "loss": 0.4684463636682177, "ce_loss": 0.45844637320496084, "lb_loss": 0.9999999954090417 } }, { "checkpoint_type": "bytes", "bytes_threshold": 674000000, "cumulative_training_bytes": 674004321, "metrics": { "loss": 0.46855324088131706, "ce_loss": 0.4585532504180602, "lb_loss": 0.999999994484743 } }, { "checkpoint_type": "bytes", "bytes_threshold": 675000000, "cumulative_training_bytes": 675006849, "metrics": { "loss": 0.4686119797628677, "ce_loss": 0.4586119892996109, "lb_loss": 0.9999999941439016 } }, { "checkpoint_type": "bytes", "bytes_threshold": 676000000, "cumulative_training_bytes": 676003287, "metrics": { "loss": 0.4687279967240513, "ce_loss": 0.45872800626079446, "lb_loss": 0.9999999935659926 } }, { "checkpoint_type": "bytes", "bytes_threshold": 677000000, "cumulative_training_bytes": 677005219, "metrics": { "loss": 0.46872374676659273, "ce_loss": 0.4587237563033359, "lb_loss": 0.9999999942198754 } }, { "checkpoint_type": "bytes", "bytes_threshold": 678000000, "cumulative_training_bytes": 678005845, "metrics": { "loss": 0.4687448702201279, "ce_loss": 0.45874487975687106, "lb_loss": 0.9999999951274569 } }, { "checkpoint_type": "bytes", "bytes_threshold": 679000000, "cumulative_training_bytes": 679002372, "metrics": { "loss": 0.46877505703846667, "ce_loss": 0.45877506657520983, "lb_loss": 0.9999999955363855 } }, { "checkpoint_type": "bytes", "bytes_threshold": 680000000, "cumulative_training_bytes": 680006963, "metrics": { "loss": 0.4687262739453997, "ce_loss": 0.45872628348214284, "lb_loss": 0.9999999950329462 } }, { "checkpoint_type": "bytes", "bytes_threshold": 681000000, "cumulative_training_bytes": 681000974, "metrics": { "loss": 0.468684216900515, "ce_loss": 0.45868422643725815, "lb_loss": 0.9999999951894538 } }, { "checkpoint_type": "bytes", "bytes_threshold": 682000000, "cumulative_training_bytes": 682006359, "metrics": { "loss": 0.46886864708355563, "ce_loss": 0.4588686566202988, "lb_loss": 0.9999999951481021 } }, { "checkpoint_type": "bytes", "bytes_threshold": 683000000, "cumulative_training_bytes": 683003602, "metrics": { "loss": 0.4691238743918283, "ce_loss": 0.45912388392857145, "lb_loss": 0.9999999956274584 } }, { "checkpoint_type": "bytes", "bytes_threshold": 684000000, "cumulative_training_bytes": 684002920, "metrics": { "loss": 0.46929555333207207, "ce_loss": 0.45929556286881523, "lb_loss": 0.9999999955898515 } }, { "checkpoint_type": "bytes", "bytes_threshold": 685000000, "cumulative_training_bytes": 685007066, "metrics": { "loss": 0.4694569631613296, "ce_loss": 0.4594569726980728, "lb_loss": 0.9999999959412683 } }, { "checkpoint_type": "bytes", "bytes_threshold": 686000000, "cumulative_training_bytes": 686003334, "metrics": { "loss": 0.46950572702512294, "ce_loss": 0.4595057365618661, "lb_loss": 0.9999999965180248 } }, { "checkpoint_type": "bytes", "bytes_threshold": 687000000, "cumulative_training_bytes": 687002972, "metrics": { "loss": 0.4696742790322091, "ce_loss": 0.45967428856895226, "lb_loss": 0.9999999965100517 } }, { "checkpoint_type": "bytes", "bytes_threshold": 688000000, "cumulative_training_bytes": 688007217, "metrics": { "loss": 0.46969413696123996, "ce_loss": 0.4596941464979831, "lb_loss": 0.9999999968744172 } }, { "checkpoint_type": "bytes", "bytes_threshold": 689000000, "cumulative_training_bytes": 689000339, "metrics": { "loss": 0.46979924995748157, "ce_loss": 0.45979925949422473, "lb_loss": 0.9999999969331876 } }, { "checkpoint_type": "bytes", "bytes_threshold": 690000000, "cumulative_training_bytes": 690005975, "metrics": { "loss": 0.46974848966841076, "ce_loss": 0.4597484992051539, "lb_loss": 0.999999997187331 } }, { "checkpoint_type": "bytes", "bytes_threshold": 691000000, "cumulative_training_bytes": 691000262, "metrics": { "loss": 0.4697124794350246, "ce_loss": 0.45971248897176775, "lb_loss": 0.999999997131634 } }, { "checkpoint_type": "bytes", "bytes_threshold": 692000000, "cumulative_training_bytes": 692001292, "metrics": { "loss": 0.46974805658086766, "ce_loss": 0.4597480661176108, "lb_loss": 0.9999999971555665 } }, { "checkpoint_type": "bytes", "bytes_threshold": 693000000, "cumulative_training_bytes": 693001879, "metrics": { "loss": 0.46981497825550306, "ce_loss": 0.4598149877922462, "lb_loss": 0.9999999971600037 } }, { "checkpoint_type": "bytes", "bytes_threshold": 694000000, "cumulative_training_bytes": 694001241, "metrics": { "loss": 0.4697958947451035, "ce_loss": 0.4597959042818467, "lb_loss": 0.9999999970953565 } }, { "checkpoint_type": "bytes", "bytes_threshold": 695000000, "cumulative_training_bytes": 695005882, "metrics": { "loss": 0.469869174014081, "ce_loss": 0.4598691835508242, "lb_loss": 0.9999999976092643 } }, { "checkpoint_type": "bytes", "bytes_threshold": 696000000, "cumulative_training_bytes": 696001244, "metrics": { "loss": 0.46987648250569714, "ce_loss": 0.4598764920424403, "lb_loss": 0.9999999974071189 } }, { "checkpoint_type": "bytes", "bytes_threshold": 697000000, "cumulative_training_bytes": 697002626, "metrics": { "loss": 0.4699041015886093, "ce_loss": 0.45990411112535245, "lb_loss": 0.9999999974483528 } }, { "checkpoint_type": "bytes", "bytes_threshold": 698000000, "cumulative_training_bytes": 698000141, "metrics": { "loss": 0.46995632496462125, "ce_loss": 0.4599563345013644, "lb_loss": 0.9999999972940585 } }, { "checkpoint_type": "bytes", "bytes_threshold": 699000000, "cumulative_training_bytes": 699000609, "metrics": { "loss": 0.46998604024095453, "ce_loss": 0.4599860497776977, "lb_loss": 0.9999999971923792 } }, { "checkpoint_type": "bytes", "bytes_threshold": 700000000, "cumulative_training_bytes": 700001376, "metrics": { "loss": 0.46995432803025755, "ce_loss": 0.4599543375670007, "lb_loss": 0.9999999970968607 } }, { "checkpoint_type": "bytes", "bytes_threshold": 701000000, "cumulative_training_bytes": 701000514, "metrics": { "loss": 0.46995761458675833, "ce_loss": 0.4599576241235015, "lb_loss": 0.9999999969934775 } }, { "checkpoint_type": "bytes", "bytes_threshold": 702000000, "cumulative_training_bytes": 702001728, "metrics": { "loss": 0.4700420221251637, "ce_loss": 0.4600420316619069, "lb_loss": 0.9999999970669068 } }, { "checkpoint_type": "bytes", "bytes_threshold": 703000000, "cumulative_training_bytes": 703001158, "metrics": { "loss": 0.4700812518074806, "ce_loss": 0.4600812613442238, "lb_loss": 0.999999997110772 } }, { "checkpoint_type": "bytes", "bytes_threshold": 704000000, "cumulative_training_bytes": 704004853, "metrics": { "loss": 0.47010860920546704, "ce_loss": 0.4601086187422102, "lb_loss": 0.9999999968055674 } }, { "checkpoint_type": "bytes", "bytes_threshold": 705000000, "cumulative_training_bytes": 705003445, "metrics": { "loss": 0.470210351307544, "ce_loss": 0.4602103608442872, "lb_loss": 0.9999999965647475 } }, { "checkpoint_type": "bytes", "bytes_threshold": 706000000, "cumulative_training_bytes": 706001098, "metrics": { "loss": 0.47026130511842923, "ce_loss": 0.4602613146551724, "lb_loss": 0.9999999964413384 } }, { "checkpoint_type": "bytes", "bytes_threshold": 707000000, "cumulative_training_bytes": 707007487, "metrics": { "loss": 0.4703792403672138, "ce_loss": 0.46037924990395696, "lb_loss": 0.9999999965079875 } }, { "checkpoint_type": "bytes", "bytes_threshold": 708000000, "cumulative_training_bytes": 708003669, "metrics": { "loss": 0.47049825695501096, "ce_loss": 0.4604982664917541, "lb_loss": 0.9999999966154034 } }, { "checkpoint_type": "bytes", "bytes_threshold": 709000000, "cumulative_training_bytes": 709000496, "metrics": { "loss": 0.4705623017969561, "ce_loss": 0.46056231133369924, "lb_loss": 0.9999999967613283 } }, { "checkpoint_type": "bytes", "bytes_threshold": 710000000, "cumulative_training_bytes": 710001059, "metrics": { "loss": 0.4706237760850185, "ce_loss": 0.46062378562176165, "lb_loss": 0.999999996901027 } }, { "checkpoint_type": "bytes", "bytes_threshold": 711000000, "cumulative_training_bytes": 711002755, "metrics": { "loss": 0.47068721222477916, "ce_loss": 0.4606872217615223, "lb_loss": 0.9999999968678425 } }, { "checkpoint_type": "bytes", "bytes_threshold": 712000000, "cumulative_training_bytes": 712004688, "metrics": { "loss": 0.47075681223318344, "ce_loss": 0.4607568217699266, "lb_loss": 0.9999999968870078 } }, { "checkpoint_type": "bytes", "bytes_threshold": 713000000, "cumulative_training_bytes": 713007034, "metrics": { "loss": 0.4708357584098345, "ce_loss": 0.46083576794657766, "lb_loss": 0.9999999969550882 } }, { "checkpoint_type": "bytes", "bytes_threshold": 714000000, "cumulative_training_bytes": 714005137, "metrics": { "loss": 0.47085419542649215, "ce_loss": 0.4608542049632353, "lb_loss": 0.9999999971366396 } }, { "epoch": 15, "checkpoint_type": "epoch", "metrics": { "loss": 0.4708282861734111, "ce_loss": 0.46082829571015427, "lb_loss": 0.999999997136602, "training_bytes": 47653389 }, "cumulative_training_bytes": 714801019, "training_bytes_this_epoch": 47653389 }, { "checkpoint_type": "bytes", "bytes_threshold": 715000000, "cumulative_training_bytes": 715000925, "metrics": { "loss": 0.4613972260401799, "ce_loss": 0.4513972355769231, "lb_loss": 0.9999999954150274 } }, { "checkpoint_type": "bytes", "bytes_threshold": 716000000, "cumulative_training_bytes": 716000809, "metrics": { "loss": 0.46277442956582093, "ce_loss": 0.4527744391025641, "lb_loss": 0.9999999912121357 } }, { "checkpoint_type": "bytes", "bytes_threshold": 717000000, "cumulative_training_bytes": 717007272, "metrics": { "loss": 0.4638031588660346, "ce_loss": 0.4538031684027778, "lb_loss": 0.9999999913076559 } }, { "checkpoint_type": "bytes", "bytes_threshold": 718000000, "cumulative_training_bytes": 718001268, "metrics": { "loss": 0.4635652632450314, "ce_loss": 0.45356527278177455, "lb_loss": 0.9999999925672866 } }, { "checkpoint_type": "bytes", "bytes_threshold": 719000000, "cumulative_training_bytes": 719000209, "metrics": { "loss": 0.4637926949879369, "ce_loss": 0.45379270452468007, "lb_loss": 0.9999999930261476 } }, { "checkpoint_type": "bytes", "bytes_threshold": 720000000, "cumulative_training_bytes": 720006860, "metrics": { "loss": 0.46441365025707126, "ce_loss": 0.45441365979381443, "lb_loss": 0.9999999941185402 } }, { "checkpoint_type": "bytes", "bytes_threshold": 721000000, "cumulative_training_bytes": 721002611, "metrics": { "loss": 0.4646387226944064, "ce_loss": 0.4546387322311496, "lb_loss": 0.9999999942531986 } }, { "checkpoint_type": "bytes", "bytes_threshold": 722000000, "cumulative_training_bytes": 722004482, "metrics": { "loss": 0.4649596812877249, "ce_loss": 0.4549596908244681, "lb_loss": 0.9999999962588574 } }, { "checkpoint_type": "bytes", "bytes_threshold": 723000000, "cumulative_training_bytes": 723006477, "metrics": { "loss": 0.46530060029052106, "ce_loss": 0.4553006098272642, "lb_loss": 0.9999999972729902 } }, { "checkpoint_type": "bytes", "bytes_threshold": 724000000, "cumulative_training_bytes": 724002028, "metrics": { "loss": 0.46535132528840256, "ce_loss": 0.4553513348251457, "lb_loss": 0.9999999976177993 } }, { "checkpoint_type": "bytes", "bytes_threshold": 725000000, "cumulative_training_bytes": 725004555, "metrics": { "loss": 0.4655385368221157, "ce_loss": 0.4555385463588589, "lb_loss": 0.9999999973598543 } }, { "checkpoint_type": "bytes", "bytes_threshold": 726000000, "cumulative_training_bytes": 726004006, "metrics": { "loss": 0.4657723225719376, "ce_loss": 0.4557723321086808, "lb_loss": 0.9999999966592065 } }, { "checkpoint_type": "bytes", "bytes_threshold": 727000000, "cumulative_training_bytes": 727000132, "metrics": { "loss": 0.4658971283320578, "ce_loss": 0.455897137868801, "lb_loss": 0.9999999966699226 } }, { "checkpoint_type": "bytes", "bytes_threshold": 728000000, "cumulative_training_bytes": 728001078, "metrics": { "loss": 0.4661883605328624, "ce_loss": 0.45618837006960555, "lb_loss": 0.9999999971304029 } }, { "checkpoint_type": "bytes", "bytes_threshold": 729000000, "cumulative_training_bytes": 729008228, "metrics": { "loss": 0.4664945520203689, "ce_loss": 0.45649456155711204, "lb_loss": 0.9999999964995117 } }, { "checkpoint_type": "bytes", "bytes_threshold": 730000000, "cumulative_training_bytes": 730005608, "metrics": { "loss": 0.46660442462740587, "ce_loss": 0.45660443416414903, "lb_loss": 0.9999999962184364 } }, { "checkpoint_type": "bytes", "bytes_threshold": 731000000, "cumulative_training_bytes": 731002789, "metrics": { "loss": 0.46665003033802055, "ce_loss": 0.4566500398747637, "lb_loss": 0.9999999959437292 } }, { "checkpoint_type": "bytes", "bytes_threshold": 732000000, "cumulative_training_bytes": 732003542, "metrics": { "loss": 0.4667574378931634, "ce_loss": 0.45675744742990654, "lb_loss": 0.9999999955701043 } }, { "checkpoint_type": "bytes", "bytes_threshold": 733000000, "cumulative_training_bytes": 733005914, "metrics": { "loss": 0.4668850435540895, "ce_loss": 0.45688505309083266, "lb_loss": 0.9999999954130993 } }, { "checkpoint_type": "bytes", "bytes_threshold": 734000000, "cumulative_training_bytes": 734004082, "metrics": { "loss": 0.4669946388384562, "ce_loss": 0.4569946483751994, "lb_loss": 0.999999995555794 } }, { "checkpoint_type": "bytes", "bytes_threshold": 735000000, "cumulative_training_bytes": 735003332, "metrics": { "loss": 0.46714373572661416, "ce_loss": 0.4571437452633573, "lb_loss": 0.9999999958667488 } }, { "checkpoint_type": "bytes", "bytes_threshold": 736000000, "cumulative_training_bytes": 736001573, "metrics": { "loss": 0.467144097090198, "ce_loss": 0.45714410662694116, "lb_loss": 0.9999999956733356 } }, { "checkpoint_type": "bytes", "bytes_threshold": 737000000, "cumulative_training_bytes": 737001233, "metrics": { "loss": 0.4672433902477396, "ce_loss": 0.45724339978448275, "lb_loss": 0.9999999959098882 } }, { "checkpoint_type": "bytes", "bytes_threshold": 738000000, "cumulative_training_bytes": 738001247, "metrics": { "loss": 0.4672973706595617, "ce_loss": 0.45729738019630484, "lb_loss": 0.9999999956343678 } }, { "checkpoint_type": "bytes", "bytes_threshold": 739000000, "cumulative_training_bytes": 739005713, "metrics": { "loss": 0.46737158577476856, "ce_loss": 0.4573715953115117, "lb_loss": 0.9999999955513295 } }, { "checkpoint_type": "bytes", "bytes_threshold": 740000000, "cumulative_training_bytes": 740004035, "metrics": { "loss": 0.46742578120596645, "ce_loss": 0.4574257907427096, "lb_loss": 0.9999999952924643 } }, { "checkpoint_type": "bytes", "bytes_threshold": 741000000, "cumulative_training_bytes": 741006421, "metrics": { "loss": 0.4675053996438002, "ce_loss": 0.4575054091805434, "lb_loss": 0.9999999957860578 } }, { "checkpoint_type": "bytes", "bytes_threshold": 742000000, "cumulative_training_bytes": 742000099, "metrics": { "loss": 0.467600741589066, "ce_loss": 0.45760075112580917, "lb_loss": 0.9999999958563616 } }, { "checkpoint_type": "bytes", "bytes_threshold": 743000000, "cumulative_training_bytes": 743007078, "metrics": { "loss": 0.4677033062705838, "ce_loss": 0.457703315807327, "lb_loss": 0.9999999960209653 } }, { "checkpoint_type": "bytes", "bytes_threshold": 744000000, "cumulative_training_bytes": 744007281, "metrics": { "loss": 0.4678138212088019, "ce_loss": 0.45781383074554505, "lb_loss": 0.9999999962512802 } }, { "checkpoint_type": "bytes", "bytes_threshold": 745000000, "cumulative_training_bytes": 745003239, "metrics": { "loss": 0.46785139457501557, "ce_loss": 0.45785140411175873, "lb_loss": 0.9999999964805164 } }, { "checkpoint_type": "bytes", "bytes_threshold": 746000000, "cumulative_training_bytes": 746000194, "metrics": { "loss": 0.46787027919608315, "ce_loss": 0.4578702887328263, "lb_loss": 0.9999999962856772 } }, { "checkpoint_type": "bytes", "bytes_threshold": 747000000, "cumulative_training_bytes": 747000581, "metrics": { "loss": 0.4678921757342982, "ce_loss": 0.45789218527104136, "lb_loss": 0.9999999963154523 } }, { "checkpoint_type": "bytes", "bytes_threshold": 748000000, "cumulative_training_bytes": 748000684, "metrics": { "loss": 0.46796389480381484, "ce_loss": 0.457963904340558, "lb_loss": 0.9999999963167985 } }, { "checkpoint_type": "bytes", "bytes_threshold": 749000000, "cumulative_training_bytes": 749004115, "metrics": { "loss": 0.468031408323597, "ce_loss": 0.4580314178603402, "lb_loss": 0.9999999968650198 } }, { "checkpoint_type": "bytes", "bytes_threshold": 750000000, "cumulative_training_bytes": 750006305, "metrics": { "loss": 0.4680848832388602, "ce_loss": 0.4580848927756034, "lb_loss": 0.9999999969413577 } }, { "checkpoint_type": "bytes", "bytes_threshold": 751000000, "cumulative_training_bytes": 751001387, "metrics": { "loss": 0.4680606794437983, "ce_loss": 0.4580606889805415, "lb_loss": 0.9999999969491701 } }, { "checkpoint_type": "bytes", "bytes_threshold": 752000000, "cumulative_training_bytes": 752007732, "metrics": { "loss": 0.4681460488480305, "ce_loss": 0.4581460583847737, "lb_loss": 0.9999999967254238 } }, { "checkpoint_type": "bytes", "bytes_threshold": 753000000, "cumulative_training_bytes": 753007023, "metrics": { "loss": 0.4681992013851005, "ce_loss": 0.4581992109218437, "lb_loss": 0.9999999967748989 } }, { "checkpoint_type": "bytes", "bytes_threshold": 754000000, "cumulative_training_bytes": 754007126, "metrics": { "loss": 0.4682459842877052, "ce_loss": 0.45824599382444836, "lb_loss": 0.9999999969039571 } }, { "checkpoint_type": "bytes", "bytes_threshold": 755000000, "cumulative_training_bytes": 755004818, "metrics": { "loss": 0.46830369154400336, "ce_loss": 0.45830370108074653, "lb_loss": 0.9999999969692553 } }, { "checkpoint_type": "bytes", "bytes_threshold": 756000000, "cumulative_training_bytes": 756003553, "metrics": { "loss": 0.4682925505125042, "ce_loss": 0.45829256004924734, "lb_loss": 0.9999999967987842 } }, { "checkpoint_type": "bytes", "bytes_threshold": 757000000, "cumulative_training_bytes": 757001249, "metrics": { "loss": 0.46836309380203384, "ce_loss": 0.458363103338777, "lb_loss": 0.9999999967877736 } }, { "checkpoint_type": "bytes", "bytes_threshold": 758000000, "cumulative_training_bytes": 758007153, "metrics": { "loss": 0.46841179035691155, "ce_loss": 0.4584117998936547, "lb_loss": 0.9999999969046153 } }, { "checkpoint_type": "bytes", "bytes_threshold": 759000000, "cumulative_training_bytes": 759008102, "metrics": { "loss": 0.4684474506226194, "ce_loss": 0.45844746015936255, "lb_loss": 0.9999999969232316 } }, { "checkpoint_type": "bytes", "bytes_threshold": 760000000, "cumulative_training_bytes": 760000290, "metrics": { "loss": 0.46847780365146624, "ce_loss": 0.4584778131882094, "lb_loss": 0.9999999969707957 } }, { "checkpoint_type": "bytes", "bytes_threshold": 761000000, "cumulative_training_bytes": 761003802, "metrics": { "loss": 0.46849883554943517, "ce_loss": 0.45849884508617833, "lb_loss": 0.9999999969871699 } }, { "checkpoint_type": "bytes", "bytes_threshold": 762000000, "cumulative_training_bytes": 762007779, "metrics": { "loss": 0.4685566738574174, "ce_loss": 0.45855668339416056, "lb_loss": 0.9999999969931801 } }, { "epoch": 16, "checkpoint_type": "epoch", "metrics": { "loss": 0.46860215044879666, "ce_loss": 0.4586021599855398, "lb_loss": 0.9999999970504129, "training_bytes": 47653395 }, "cumulative_training_bytes": 762454414, "training_bytes_this_epoch": 47653395 }, { "checkpoint_type": "bytes", "bytes_threshold": 763000000, "cumulative_training_bytes": 763000457, "metrics": { "loss": 0.4623547439843836, "ce_loss": 0.45235475352112675, "lb_loss": 0.9999999974814939 } }, { "checkpoint_type": "bytes", "bytes_threshold": 764000000, "cumulative_training_bytes": 764004913, "metrics": { "loss": 0.46177133947315785, "ce_loss": 0.451771349009901, "lb_loss": 0.99999999527884 } }, { "checkpoint_type": "bytes", "bytes_threshold": 765000000, "cumulative_training_bytes": 765007589, "metrics": { "loss": 0.46142407079358716, "ce_loss": 0.4514240803303303, "lb_loss": 0.9999999946302122 } }, { "checkpoint_type": "bytes", "bytes_threshold": 766000000, "cumulative_training_bytes": 766002665, "metrics": { "loss": 0.46156417782826764, "ce_loss": 0.4515641873650108, "lb_loss": 0.9999999921471202 } }, { "checkpoint_type": "bytes", "bytes_threshold": 767000000, "cumulative_training_bytes": 767006783, "metrics": { "loss": 0.4617538572561861, "ce_loss": 0.4517538667929293, "lb_loss": 0.9999999925745056 } }, { "checkpoint_type": "bytes", "bytes_threshold": 768000000, "cumulative_training_bytes": 768006387, "metrics": { "loss": 0.46248921460118786, "ce_loss": 0.452489224137931, "lb_loss": 0.9999999929296559 } }, { "checkpoint_type": "bytes", "bytes_threshold": 769000000, "cumulative_training_bytes": 769007261, "metrics": { "loss": 0.46278501893872415, "ce_loss": 0.4527850284754673, "lb_loss": 0.9999999917834719 } }, { "checkpoint_type": "bytes", "bytes_threshold": 770000000, "cumulative_training_bytes": 770004386, "metrics": { "loss": 0.46281795762857125, "ce_loss": 0.4528179671653144, "lb_loss": 0.9999999905696505 } }, { "checkpoint_type": "bytes", "bytes_threshold": 771000000, "cumulative_training_bytes": 771004582, "metrics": { "loss": 0.46292215921885216, "ce_loss": 0.4529221687555953, "lb_loss": 0.9999999913554588 } }, { "checkpoint_type": "bytes", "bytes_threshold": 772000000, "cumulative_training_bytes": 772000420, "metrics": { "loss": 0.4632846208719107, "ce_loss": 0.45328463040865385, "lb_loss": 0.9999999921673384 } }, { "checkpoint_type": "bytes", "bytes_threshold": 773000000, "cumulative_training_bytes": 773003781, "metrics": { "loss": 0.46348049037261146, "ce_loss": 0.45348049990935463, "lb_loss": 0.9999999921333972 } }, { "checkpoint_type": "bytes", "bytes_threshold": 774000000, "cumulative_training_bytes": 774007887, "metrics": { "loss": 0.4635867557778264, "ce_loss": 0.45358676531456954, "lb_loss": 0.9999999925000778 } }, { "checkpoint_type": "bytes", "bytes_threshold": 775000000, "cumulative_training_bytes": 775006162, "metrics": { "loss": 0.4637526105089885, "ce_loss": 0.4537526200457317, "lb_loss": 0.999999993857814 } }, { "checkpoint_type": "bytes", "bytes_threshold": 776000000, "cumulative_training_bytes": 776001976, "metrics": { "loss": 0.46378485981353934, "ce_loss": 0.4537848693502825, "lb_loss": 0.9999999951844835 } }, { "checkpoint_type": "bytes", "bytes_threshold": 777000000, "cumulative_training_bytes": 777005637, "metrics": { "loss": 0.46390480207556617, "ce_loss": 0.45390481161230933, "lb_loss": 0.9999999949519475 } }, { "checkpoint_type": "bytes", "bytes_threshold": 778000000, "cumulative_training_bytes": 778003374, "metrics": { "loss": 0.4639972131983627, "ce_loss": 0.45399722273510584, "lb_loss": 0.9999999953337575 } }, { "checkpoint_type": "bytes", "bytes_threshold": 779000000, "cumulative_training_bytes": 779004889, "metrics": { "loss": 0.4640907123064576, "ce_loss": 0.45409072184320076, "lb_loss": 0.9999999953132332 } }, { "checkpoint_type": "bytes", "bytes_threshold": 780000000, "cumulative_training_bytes": 780004536, "metrics": { "loss": 0.4642408185269289, "ce_loss": 0.45424082806367205, "lb_loss": 0.9999999955290018 } }, { "checkpoint_type": "bytes", "bytes_threshold": 781000000, "cumulative_training_bytes": 781001322, "metrics": { "loss": 0.4643268515342432, "ce_loss": 0.45432686107098635, "lb_loss": 0.9999999953752896 } }, { "checkpoint_type": "bytes", "bytes_threshold": 782000000, "cumulative_training_bytes": 782005205, "metrics": { "loss": 0.4643638559683469, "ce_loss": 0.45436386550509006, "lb_loss": 0.9999999956125007 } }, { "checkpoint_type": "bytes", "bytes_threshold": 783000000, "cumulative_training_bytes": 783003504, "metrics": { "loss": 0.4644428404539424, "ce_loss": 0.45444284999068557, "lb_loss": 0.9999999955585213 } }, { "checkpoint_type": "bytes", "bytes_threshold": 784000000, "cumulative_training_bytes": 784007012, "metrics": { "loss": 0.4645556631986032, "ce_loss": 0.45455567273534636, "lb_loss": 0.9999999957228639 } }, { "checkpoint_type": "bytes", "bytes_threshold": 785000000, "cumulative_training_bytes": 785003493, "metrics": { "loss": 0.4646576464479767, "ce_loss": 0.45465765598471986, "lb_loss": 0.9999999954259254 } }, { "checkpoint_type": "bytes", "bytes_threshold": 786000000, "cumulative_training_bytes": 786001543, "metrics": { "loss": 0.4646982882274845, "ce_loss": 0.45469829776422765, "lb_loss": 0.9999999956386846 } }, { "checkpoint_type": "bytes", "bytes_threshold": 787000000, "cumulative_training_bytes": 787006273, "metrics": { "loss": 0.46475888979263924, "ce_loss": 0.4547588993293824, "lb_loss": 0.9999999954264683 } }, { "checkpoint_type": "bytes", "bytes_threshold": 788000000, "cumulative_training_bytes": 788002324, "metrics": { "loss": 0.46486149188711773, "ce_loss": 0.4548615014238609, "lb_loss": 0.9999999956582948 } }, { "checkpoint_type": "bytes", "bytes_threshold": 789000000, "cumulative_training_bytes": 789000949, "metrics": { "loss": 0.46496146893267404, "ce_loss": 0.4549614784694172, "lb_loss": 0.9999999959587157 } }, { "checkpoint_type": "bytes", "bytes_threshold": 790000000, "cumulative_training_bytes": 790004342, "metrics": { "loss": 0.46504716519219763, "ce_loss": 0.4550471747289408, "lb_loss": 0.9999999959567603 } }, { "checkpoint_type": "bytes", "bytes_threshold": 791000000, "cumulative_training_bytes": 791006479, "metrics": { "loss": 0.46513784085220533, "ce_loss": 0.4551378503889485, "lb_loss": 0.9999999957790702 } }, { "checkpoint_type": "bytes", "bytes_threshold": 792000000, "cumulative_training_bytes": 792007573, "metrics": { "loss": 0.4652086949527619, "ce_loss": 0.45520870448950507, "lb_loss": 0.9999999958760196 } }, { "checkpoint_type": "bytes", "bytes_threshold": 793000000, "cumulative_training_bytes": 793000192, "metrics": { "loss": 0.46531999156002296, "ce_loss": 0.4553200010967661, "lb_loss": 0.9999999958012271 } }, { "checkpoint_type": "bytes", "bytes_threshold": 794000000, "cumulative_training_bytes": 794002455, "metrics": { "loss": 0.4653909947108297, "ce_loss": 0.45539100424757284, "lb_loss": 0.9999999960070675 } }, { "checkpoint_type": "bytes", "bytes_threshold": 795000000, "cumulative_training_bytes": 795002572, "metrics": { "loss": 0.46549575616544453, "ce_loss": 0.4554957657021877, "lb_loss": 0.9999999956814325 } }, { "checkpoint_type": "bytes", "bytes_threshold": 796000000, "cumulative_training_bytes": 796000398, "metrics": { "loss": 0.4655890219343822, "ce_loss": 0.45558903147112534, "lb_loss": 0.9999999957279483 } }, { "checkpoint_type": "bytes", "bytes_threshold": 797000000, "cumulative_training_bytes": 797004240, "metrics": { "loss": 0.4656447468074501, "ce_loss": 0.45564475634419327, "lb_loss": 0.999999996076556 } }, { "checkpoint_type": "bytes", "bytes_threshold": 798000000, "cumulative_training_bytes": 798001241, "metrics": { "loss": 0.4657365903932439, "ce_loss": 0.45573659992998705, "lb_loss": 0.9999999960323491 } }, { "checkpoint_type": "bytes", "bytes_threshold": 799000000, "cumulative_training_bytes": 799006684, "metrics": { "loss": 0.46579053755104227, "ce_loss": 0.45579054708778544, "lb_loss": 0.9999999962786121 } }, { "checkpoint_type": "bytes", "bytes_threshold": 800000000, "cumulative_training_bytes": 800005908, "metrics": { "loss": 0.46589497096386856, "ce_loss": 0.4558949805006117, "lb_loss": 0.9999999962564783 } }, { "checkpoint_type": "bytes", "bytes_threshold": 801000000, "cumulative_training_bytes": 801004990, "metrics": { "loss": 0.4659691438892747, "ce_loss": 0.45596915342601785, "lb_loss": 0.9999999963183626 } }, { "checkpoint_type": "bytes", "bytes_threshold": 802000000, "cumulative_training_bytes": 802006894, "metrics": { "loss": 0.4660316155116502, "ce_loss": 0.45603162504839334, "lb_loss": 0.9999999962848053 } }, { "checkpoint_type": "bytes", "bytes_threshold": 803000000, "cumulative_training_bytes": 803004532, "metrics": { "loss": 0.4660554155482266, "ce_loss": 0.45605542508496977, "lb_loss": 0.9999999960833806 } }, { "checkpoint_type": "bytes", "bytes_threshold": 804000000, "cumulative_training_bytes": 804005297, "metrics": { "loss": 0.4661466299510033, "ce_loss": 0.4561466394877465, "lb_loss": 0.9999999963426669 } }, { "checkpoint_type": "bytes", "bytes_threshold": 805000000, "cumulative_training_bytes": 805002675, "metrics": { "loss": 0.4661778685899439, "ce_loss": 0.4561778781266871, "lb_loss": 0.9999999964389523 } }, { "checkpoint_type": "bytes", "bytes_threshold": 806000000, "cumulative_training_bytes": 806004255, "metrics": { "loss": 0.46626139055995164, "ce_loss": 0.4562614000966948, "lb_loss": 0.9999999964371344 } }, { "checkpoint_type": "bytes", "bytes_threshold": 807000000, "cumulative_training_bytes": 807001576, "metrics": { "loss": 0.4663353263819574, "ce_loss": 0.45633533591870057, "lb_loss": 0.9999999966601729 } }, { "checkpoint_type": "bytes", "bytes_threshold": 808000000, "cumulative_training_bytes": 808007129, "metrics": { "loss": 0.4663545905010783, "ce_loss": 0.45635460003782147, "lb_loss": 0.9999999965934477 } }, { "checkpoint_type": "bytes", "bytes_threshold": 809000000, "cumulative_training_bytes": 809006953, "metrics": { "loss": 0.4663725949212269, "ce_loss": 0.45637260445797007, "lb_loss": 0.9999999967741523 } }, { "checkpoint_type": "bytes", "bytes_threshold": 810000000, "cumulative_training_bytes": 810005884, "metrics": { "loss": 0.46644152880866746, "ce_loss": 0.4564415383454106, "lb_loss": 0.9999999967942107 } }, { "epoch": 17, "checkpoint_type": "epoch", "metrics": { "loss": 0.4664472485569265, "ce_loss": 0.45644725809366965, "lb_loss": 0.9999999967535388, "training_bytes": 47653413 }, "cumulative_training_bytes": 810107827, "training_bytes_this_epoch": 47653413 }, { "checkpoint_type": "bytes", "bytes_threshold": 811000000, "cumulative_training_bytes": 811000418, "metrics": { "loss": 0.4582590152477396, "ce_loss": 0.44825902478448276, "lb_loss": 0.9999999866403383 } }, { "checkpoint_type": "bytes", "bytes_threshold": 812000000, "cumulative_training_bytes": 812004574, "metrics": { "loss": 0.458675330685031, "ce_loss": 0.4486753402217742, "lb_loss": 0.9999999891846411 } }, { "checkpoint_type": "bytes", "bytes_threshold": 813000000, "cumulative_training_bytes": 813005709, "metrics": { "loss": 0.45849726948700353, "ce_loss": 0.4484972790237467, "lb_loss": 0.9999999976409771 } }, { "checkpoint_type": "bytes", "bytes_threshold": 814000000, "cumulative_training_bytes": 814001225, "metrics": { "loss": 0.45856258255559473, "ce_loss": 0.4485625920923379, "lb_loss": 0.9999999977750722 } }, { "checkpoint_type": "bytes", "bytes_threshold": 815000000, "cumulative_training_bytes": 815003952, "metrics": { "loss": 0.4582559323646653, "ce_loss": 0.44825594190140844, "lb_loss": 0.9999999988806639 } }, { "checkpoint_type": "bytes", "bytes_threshold": 816000000, "cumulative_training_bytes": 816003923, "metrics": { "loss": 0.4589549417619581, "ce_loss": 0.4489549512987013, "lb_loss": 0.9999999982196015 } }, { "checkpoint_type": "bytes", "bytes_threshold": 817000000, "cumulative_training_bytes": 817007266, "metrics": { "loss": 0.4592534241480515, "ce_loss": 0.44925343368479465, "lb_loss": 0.9999999976846142 } }, { "checkpoint_type": "bytes", "bytes_threshold": 818000000, "cumulative_training_bytes": 818004841, "metrics": { "loss": 0.4594157579220347, "ce_loss": 0.44941576745877787, "lb_loss": 0.999999997109377 } }, { "checkpoint_type": "bytes", "bytes_threshold": 819000000, "cumulative_training_bytes": 819004493, "metrics": { "loss": 0.45983224358287816, "ce_loss": 0.44983225311962133, "lb_loss": 0.9999999966145382 } }, { "checkpoint_type": "bytes", "bytes_threshold": 820000000, "cumulative_training_bytes": 820003619, "metrics": { "loss": 0.46002989910594827, "ce_loss": 0.45002990864269143, "lb_loss": 0.9999999966809479 } }, { "checkpoint_type": "bytes", "bytes_threshold": 821000000, "cumulative_training_bytes": 821007131, "metrics": { "loss": 0.460265253367049, "ce_loss": 0.45026526290379215, "lb_loss": 0.9999999963165669 } }, { "checkpoint_type": "bytes", "bytes_threshold": 822000000, "cumulative_training_bytes": 822001154, "metrics": { "loss": 0.4605283648197562, "ce_loss": 0.45052837435649934, "lb_loss": 0.9999999952439023 } }, { "checkpoint_type": "bytes", "bytes_threshold": 823000000, "cumulative_training_bytes": 823004363, "metrics": { "loss": 0.46066069150183253, "ce_loss": 0.4506607010385757, "lb_loss": 0.9999999947646959 } }, { "checkpoint_type": "bytes", "bytes_threshold": 824000000, "cumulative_training_bytes": 824005319, "metrics": { "loss": 0.46092772483825684, "ce_loss": 0.450927734375, "lb_loss": 0.999999995011065 } }, { "checkpoint_type": "bytes", "bytes_threshold": 825000000, "cumulative_training_bytes": 825005826, "metrics": { "loss": 0.46111766774999885, "ce_loss": 0.451117677286742, "lb_loss": 0.9999999947011287 } }, { "checkpoint_type": "bytes", "bytes_threshold": 826000000, "cumulative_training_bytes": 826000335, "metrics": { "loss": 0.46198096348830503, "ce_loss": 0.4519809730250482, "lb_loss": 0.9999999948606785 } }, { "checkpoint_type": "bytes", "bytes_threshold": 827000000, "cumulative_training_bytes": 827007343, "metrics": { "loss": 0.46243060498997784, "ce_loss": 0.452430614526721, "lb_loss": 0.9999999950869359 } }, { "checkpoint_type": "bytes", "bytes_threshold": 828000000, "cumulative_training_bytes": 828002571, "metrics": { "loss": 0.4624639199134066, "ce_loss": 0.45246392945014974, "lb_loss": 0.9999999954601512 } }, { "checkpoint_type": "bytes", "bytes_threshold": 829000000, "cumulative_training_bytes": 829005468, "metrics": { "loss": 0.4625939747116361, "ce_loss": 0.45259398424837927, "lb_loss": 0.9999999954113118 } }, { "checkpoint_type": "bytes", "bytes_threshold": 830000000, "cumulative_training_bytes": 830003520, "metrics": { "loss": 0.4626671563119866, "ce_loss": 0.4526671658487298, "lb_loss": 0.9999999956409228 } }, { "checkpoint_type": "bytes", "bytes_threshold": 831000000, "cumulative_training_bytes": 831006123, "metrics": { "loss": 0.46278288941891826, "ce_loss": 0.45278289895566143, "lb_loss": 0.9999999959156949 } }, { "checkpoint_type": "bytes", "bytes_threshold": 832000000, "cumulative_training_bytes": 832004095, "metrics": { "loss": 0.4628742745223684, "ce_loss": 0.45287428405911156, "lb_loss": 0.9999999959763216 } }, { "checkpoint_type": "bytes", "bytes_threshold": 833000000, "cumulative_training_bytes": 833003109, "metrics": { "loss": 0.46298912089803945, "ce_loss": 0.4529891304347826, "lb_loss": 0.9999999960330019 } }, { "checkpoint_type": "bytes", "bytes_threshold": 834000000, "cumulative_training_bytes": 834000963, "metrics": { "loss": 0.46311497764685183, "ce_loss": 0.453114987183595, "lb_loss": 0.9999999959130427 } }, { "checkpoint_type": "bytes", "bytes_threshold": 835000000, "cumulative_training_bytes": 835000614, "metrics": { "loss": 0.4632120762950527, "ce_loss": 0.45321208583179584, "lb_loss": 0.9999999960226913 } }, { "checkpoint_type": "bytes", "bytes_threshold": 836000000, "cumulative_training_bytes": 836003119, "metrics": { "loss": 0.46328433497995797, "ce_loss": 0.45328434451670113, "lb_loss": 0.9999999961943236 } }, { "checkpoint_type": "bytes", "bytes_threshold": 837000000, "cumulative_training_bytes": 837001175, "metrics": { "loss": 0.46331735631865106, "ce_loss": 0.4533173658553942, "lb_loss": 0.9999999961315517 } }, { "checkpoint_type": "bytes", "bytes_threshold": 838000000, "cumulative_training_bytes": 838002124, "metrics": { "loss": 0.4633801187563414, "ce_loss": 0.45338012829308455, "lb_loss": 0.999999996156122 } }, { "checkpoint_type": "bytes", "bytes_threshold": 839000000, "cumulative_training_bytes": 839006609, "metrics": { "loss": 0.46342972970166746, "ce_loss": 0.4534297392384106, "lb_loss": 0.9999999963684587 } }, { "checkpoint_type": "bytes", "bytes_threshold": 840000000, "cumulative_training_bytes": 840000100, "metrics": { "loss": 0.46349160778079845, "ce_loss": 0.4534916173175416, "lb_loss": 0.9999999965046188 } }, { "checkpoint_type": "bytes", "bytes_threshold": 841000000, "cumulative_training_bytes": 841000215, "metrics": { "loss": 0.46359052719516963, "ce_loss": 0.4535905367319128, "lb_loss": 0.9999999966623762 } }, { "checkpoint_type": "bytes", "bytes_threshold": 842000000, "cumulative_training_bytes": 842004728, "metrics": { "loss": 0.46364198035406556, "ce_loss": 0.45364198989080873, "lb_loss": 0.9999999965670471 } }, { "checkpoint_type": "bytes", "bytes_threshold": 843000000, "cumulative_training_bytes": 843006127, "metrics": { "loss": 0.4637348307959697, "ce_loss": 0.4537348403327129, "lb_loss": 0.9999999966439451 } }, { "checkpoint_type": "bytes", "bytes_threshold": 844000000, "cumulative_training_bytes": 844003435, "metrics": { "loss": 0.46380382049374463, "ce_loss": 0.4538038300304878, "lb_loss": 0.999999996850161 } }, { "checkpoint_type": "bytes", "bytes_threshold": 845000000, "cumulative_training_bytes": 845006695, "metrics": { "loss": 0.4638635707440202, "ce_loss": 0.45386358028076335, "lb_loss": 0.999999997019114 } }, { "checkpoint_type": "bytes", "bytes_threshold": 846000000, "cumulative_training_bytes": 846002322, "metrics": { "loss": 0.4639164038776309, "ce_loss": 0.45391641341437405, "lb_loss": 0.9999999970381995 } }, { "checkpoint_type": "bytes", "bytes_threshold": 847000000, "cumulative_training_bytes": 847004506, "metrics": { "loss": 0.46396459108566346, "ce_loss": 0.4539646006224066, "lb_loss": 0.9999999970939644 } }, { "checkpoint_type": "bytes", "bytes_threshold": 848000000, "cumulative_training_bytes": 848000971, "metrics": { "loss": 0.4640376325087114, "ce_loss": 0.45403764204545455, "lb_loss": 0.9999999971100778 } }, { "checkpoint_type": "bytes", "bytes_threshold": 849000000, "cumulative_training_bytes": 849004995, "metrics": { "loss": 0.4640902132294446, "ce_loss": 0.45409022276618777, "lb_loss": 0.9999999971142014 } }, { "checkpoint_type": "bytes", "bytes_threshold": 850000000, "cumulative_training_bytes": 850005350, "metrics": { "loss": 0.4640859984271793, "ce_loss": 0.45408600796392246, "lb_loss": 0.9999999969002382 } }, { "checkpoint_type": "bytes", "bytes_threshold": 851000000, "cumulative_training_bytes": 851000453, "metrics": { "loss": 0.464119288698606, "ce_loss": 0.4541192982353492, "lb_loss": 0.9999999969198873 } }, { "checkpoint_type": "bytes", "bytes_threshold": 852000000, "cumulative_training_bytes": 852004187, "metrics": { "loss": 0.4641575910891706, "ce_loss": 0.45415760062591376, "lb_loss": 0.9999999968846988 } }, { "checkpoint_type": "bytes", "bytes_threshold": 853000000, "cumulative_training_bytes": 853003588, "metrics": { "loss": 0.4642327954338083, "ce_loss": 0.45423280497055146, "lb_loss": 0.9999999971702953 } }, { "checkpoint_type": "bytes", "bytes_threshold": 854000000, "cumulative_training_bytes": 854000038, "metrics": { "loss": 0.464275811368542, "ce_loss": 0.4542758209052852, "lb_loss": 0.9999999970785095 } }, { "checkpoint_type": "bytes", "bytes_threshold": 855000000, "cumulative_training_bytes": 855002152, "metrics": { "loss": 0.464321046376456, "ce_loss": 0.45432105591319916, "lb_loss": 0.9999999970218006 } }, { "checkpoint_type": "bytes", "bytes_threshold": 856000000, "cumulative_training_bytes": 856002523, "metrics": { "loss": 0.4643201960855458, "ce_loss": 0.454320205622289, "lb_loss": 0.9999999968079595 } }, { "checkpoint_type": "bytes", "bytes_threshold": 857000000, "cumulative_training_bytes": 857006768, "metrics": { "loss": 0.4643526690346854, "ce_loss": 0.4543526785714286, "lb_loss": 0.9999999967789164 } }, { "epoch": 18, "checkpoint_type": "epoch", "metrics": { "loss": 0.4643767603258854, "ce_loss": 0.45437676986262854, "lb_loss": 0.9999999966960793, "training_bytes": 47653406 }, "cumulative_training_bytes": 857761233, "training_bytes_this_epoch": 47653406 }, { "checkpoint_type": "bytes", "bytes_threshold": 858000000, "cumulative_training_bytes": 858005351, "metrics": { "loss": 0.45714354515075684, "ce_loss": 0.4471435546875, "lb_loss": 1.0000000167638063 } }, { "checkpoint_type": "bytes", "bytes_threshold": 859000000, "cumulative_training_bytes": 859002822, "metrics": { "loss": 0.4560117574385655, "ce_loss": 0.44601176697530864, "lb_loss": 1.0000000051510187 } }, { "checkpoint_type": "bytes", "bytes_threshold": 860000000, "cumulative_training_bytes": 860005316, "metrics": { "loss": 0.45533248833015105, "ce_loss": 0.4453324978668942, "lb_loss": 0.9999999993897135 } }, { "checkpoint_type": "bytes", "bytes_threshold": 861000000, "cumulative_training_bytes": 861001866, "metrics": { "loss": 0.4553494289975358, "ce_loss": 0.44534943853427894, "lb_loss": 1.0000000015500026 } }, { "checkpoint_type": "bytes", "bytes_threshold": 862000000, "cumulative_training_bytes": 862004590, "metrics": { "loss": 0.4559224013387081, "ce_loss": 0.44592241087545126, "lb_loss": 0.9999999984937454 } }, { "checkpoint_type": "bytes", "bytes_threshold": 863000000, "cumulative_training_bytes": 863001335, "metrics": { "loss": 0.45603777372349075, "ce_loss": 0.4460377832602339, "lb_loss": 1.0000000004357064 } }, { "checkpoint_type": "bytes", "bytes_threshold": 864000000, "cumulative_training_bytes": 864007195, "metrics": { "loss": 0.4564063362046784, "ce_loss": 0.4464063457414216, "lb_loss": 0.9999999999269551 } }, { "checkpoint_type": "bytes", "bytes_threshold": 865000000, "cumulative_training_bytes": 865006139, "metrics": { "loss": 0.45668132965458874, "ce_loss": 0.4466813391913319, "lb_loss": 0.9999999995589508 } }, { "checkpoint_type": "bytes", "bytes_threshold": 866000000, "cumulative_training_bytes": 866001721, "metrics": { "loss": 0.45680093144838696, "ce_loss": 0.4468009409851301, "lb_loss": 0.9999999989475016 } }, { "checkpoint_type": "bytes", "bytes_threshold": 867000000, "cumulative_training_bytes": 867002974, "metrics": { "loss": 0.4570681993489238, "ce_loss": 0.44706820888566695, "lb_loss": 0.999999999061733 } }, { "checkpoint_type": "bytes", "bytes_threshold": 868000000, "cumulative_training_bytes": 868004509, "metrics": { "loss": 0.45706402140940494, "ce_loss": 0.4470640309461481, "lb_loss": 0.9999999983950881 } }, { "checkpoint_type": "bytes", "bytes_threshold": 869000000, "cumulative_training_bytes": 869007120, "metrics": { "loss": 0.45723368426434674, "ce_loss": 0.4472336938010899, "lb_loss": 0.9999999979292664 } }, { "checkpoint_type": "bytes", "bytes_threshold": 870000000, "cumulative_training_bytes": 870003043, "metrics": { "loss": 0.4574648391021805, "ce_loss": 0.44746484863892366, "lb_loss": 0.9999999979858255 } }, { "checkpoint_type": "bytes", "bytes_threshold": 871000000, "cumulative_training_bytes": 871006700, "metrics": { "loss": 0.4577389291561429, "ce_loss": 0.44773893869288606, "lb_loss": 0.9999999981039587 } }, { "checkpoint_type": "bytes", "bytes_threshold": 872000000, "cumulative_training_bytes": 872007461, "metrics": { "loss": 0.4579849114982031, "ce_loss": 0.44798492103494625, "lb_loss": 0.9999999977247689 } }, { "checkpoint_type": "bytes", "bytes_threshold": 873000000, "cumulative_training_bytes": 873006742, "metrics": { "loss": 0.45822795379826436, "ce_loss": 0.44822796333500753, "lb_loss": 0.999999997754722 } }, { "checkpoint_type": "bytes", "bytes_threshold": 874000000, "cumulative_training_bytes": 874001226, "metrics": { "loss": 0.45845995922799043, "ce_loss": 0.4484599687647336, "lb_loss": 0.9999999979766457 } }, { "checkpoint_type": "bytes", "bytes_threshold": 875000000, "cumulative_training_bytes": 875004724, "metrics": { "loss": 0.45866020915775063, "ce_loss": 0.4486602186944938, "lb_loss": 0.999999998014943 } }, { "checkpoint_type": "bytes", "bytes_threshold": 876000000, "cumulative_training_bytes": 876004731, "metrics": { "loss": 0.45884581898822535, "ce_loss": 0.4488458285249685, "lb_loss": 0.9999999977488804 } }, { "checkpoint_type": "bytes", "bytes_threshold": 877000000, "cumulative_training_bytes": 877006989, "metrics": { "loss": 0.45900198554082244, "ce_loss": 0.4490019950775656, "lb_loss": 0.999999997818764 } }, { "checkpoint_type": "bytes", "bytes_threshold": 878000000, "cumulative_training_bytes": 878005540, "metrics": { "loss": 0.45909980962929314, "ce_loss": 0.4490998191660363, "lb_loss": 0.9999999976329471 } }, { "checkpoint_type": "bytes", "bytes_threshold": 879000000, "cumulative_training_bytes": 879001520, "metrics": { "loss": 0.45923634252706363, "ce_loss": 0.4492363520638068, "lb_loss": 0.9999999974000858 } }, { "checkpoint_type": "bytes", "bytes_threshold": 880000000, "cumulative_training_bytes": 880003773, "metrics": { "loss": 0.45938951283847196, "ce_loss": 0.4493895223752151, "lb_loss": 0.9999999969017895 } }, { "checkpoint_type": "bytes", "bytes_threshold": 881000000, "cumulative_training_bytes": 881006052, "metrics": { "loss": 0.4595082352755098, "ce_loss": 0.449508244812253, "lb_loss": 0.999999996976576 } }, { "checkpoint_type": "bytes", "bytes_threshold": 882000000, "cumulative_training_bytes": 882002195, "metrics": { "loss": 0.4596030730201109, "ce_loss": 0.44960308255685405, "lb_loss": 0.9999999969124568 } }, { "checkpoint_type": "bytes", "bytes_threshold": 883000000, "cumulative_training_bytes": 883005745, "metrics": { "loss": 0.4596205058607083, "ce_loss": 0.44962051539745146, "lb_loss": 0.9999999968895634 } }, { "checkpoint_type": "bytes", "bytes_threshold": 884000000, "cumulative_training_bytes": 884007789, "metrics": { "loss": 0.4597436392449901, "ce_loss": 0.4497436487817333, "lb_loss": 0.9999999968693213 } }, { "checkpoint_type": "bytes", "bytes_threshold": 885000000, "cumulative_training_bytes": 885004439, "metrics": { "loss": 0.45980132849390065, "ce_loss": 0.4498013380306438, "lb_loss": 0.9999999972518522 } }, { "checkpoint_type": "bytes", "bytes_threshold": 886000000, "cumulative_training_bytes": 886001168, "metrics": { "loss": 0.4598883227795031, "ce_loss": 0.44988833231624625, "lb_loss": 0.9999999972194199 } }, { "checkpoint_type": "bytes", "bytes_threshold": 887000000, "cumulative_training_bytes": 887003211, "metrics": { "loss": 0.45996510226917614, "ce_loss": 0.4499651118059193, "lb_loss": 0.9999999972523789 } }, { "checkpoint_type": "bytes", "bytes_threshold": 888000000, "cumulative_training_bytes": 888000146, "metrics": { "loss": 0.46011987840957236, "ce_loss": 0.4501198879463155, "lb_loss": 0.9999999971624023 } }, { "checkpoint_type": "bytes", "bytes_threshold": 889000000, "cumulative_training_bytes": 889004224, "metrics": { "loss": 0.4601905158921784, "ce_loss": 0.4501905254289216, "lb_loss": 0.9999999969613318 } }, { "checkpoint_type": "bytes", "bytes_threshold": 890000000, "cumulative_training_bytes": 890005885, "metrics": { "loss": 0.46025722226686644, "ce_loss": 0.4502572318036096, "lb_loss": 0.9999999972964885 } }, { "checkpoint_type": "bytes", "bytes_threshold": 891000000, "cumulative_training_bytes": 891002684, "metrics": { "loss": 0.46033590456714996, "ce_loss": 0.4503359141038931, "lb_loss": 0.9999999971577606 } }, { "checkpoint_type": "bytes", "bytes_threshold": 892000000, "cumulative_training_bytes": 892004908, "metrics": { "loss": 0.4604197900495717, "ce_loss": 0.45041979958631484, "lb_loss": 0.999999997360975 } }, { "checkpoint_type": "bytes", "bytes_threshold": 893000000, "cumulative_training_bytes": 893001735, "metrics": { "loss": 0.4605195505729917, "ce_loss": 0.4505195601097349, "lb_loss": 0.9999999973060048 } }, { "checkpoint_type": "bytes", "bytes_threshold": 894000000, "cumulative_training_bytes": 894003726, "metrics": { "loss": 0.4606263274852302, "ce_loss": 0.45062633702197336, "lb_loss": 0.9999999972042614 } }, { "checkpoint_type": "bytes", "bytes_threshold": 895000000, "cumulative_training_bytes": 895004512, "metrics": { "loss": 0.46070968477349533, "ce_loss": 0.4507096943102385, "lb_loss": 0.9999999971202526 } }, { "checkpoint_type": "bytes", "bytes_threshold": 896000000, "cumulative_training_bytes": 896007365, "metrics": { "loss": 0.46081095768047403, "ce_loss": 0.4508109672172172, "lb_loss": 0.9999999972196432 } }, { "checkpoint_type": "bytes", "bytes_threshold": 897000000, "cumulative_training_bytes": 897002215, "metrics": { "loss": 0.4608837557071593, "ce_loss": 0.45088376524390245, "lb_loss": 0.999999997197128 } }, { "checkpoint_type": "bytes", "bytes_threshold": 898000000, "cumulative_training_bytes": 898006553, "metrics": { "loss": 0.4609418456049996, "ce_loss": 0.4509418551417428, "lb_loss": 0.9999999970855415 } }, { "checkpoint_type": "bytes", "bytes_threshold": 899000000, "cumulative_training_bytes": 899002574, "metrics": { "loss": 0.4610199235072598, "ce_loss": 0.45101993304400295, "lb_loss": 0.9999999969566882 } }, { "checkpoint_type": "bytes", "bytes_threshold": 900000000, "cumulative_training_bytes": 900001892, "metrics": { "loss": 0.4611180545152779, "ce_loss": 0.45111806405202104, "lb_loss": 0.9999999968668938 } }, { "checkpoint_type": "bytes", "bytes_threshold": 901000000, "cumulative_training_bytes": 901002457, "metrics": { "loss": 0.4611618370239863, "ce_loss": 0.45116184656072944, "lb_loss": 0.999999996834031 } }, { "checkpoint_type": "bytes", "bytes_threshold": 902000000, "cumulative_training_bytes": 902005917, "metrics": { "loss": 0.4612114078797649, "ce_loss": 0.45121141741650805, "lb_loss": 0.999999996926426 } }, { "checkpoint_type": "bytes", "bytes_threshold": 903000000, "cumulative_training_bytes": 903002754, "metrics": { "loss": 0.4612944935517659, "ce_loss": 0.45129450308850905, "lb_loss": 0.999999996913349 } }, { "checkpoint_type": "bytes", "bytes_threshold": 904000000, "cumulative_training_bytes": 904007438, "metrics": { "loss": 0.46135489009074027, "ce_loss": 0.45135489962748343, "lb_loss": 0.999999997118451 } }, { "checkpoint_type": "bytes", "bytes_threshold": 905000000, "cumulative_training_bytes": 905006302, "metrics": { "loss": 0.4613947179754124, "ce_loss": 0.4513947275121556, "lb_loss": 0.9999999970922208 } }, { "epoch": 19, "checkpoint_type": "epoch", "metrics": { "loss": 0.4614244788348828, "ce_loss": 0.45142448837162596, "lb_loss": 0.999999997069566, "training_bytes": 47653396 }, "cumulative_training_bytes": 905414629, "training_bytes_this_epoch": 47653396 }, { "checkpoint_type": "bytes", "bytes_threshold": 906000000, "cumulative_training_bytes": 906007139, "metrics": { "loss": 0.4505945521515685, "ce_loss": 0.4405945616883117, "lb_loss": 0.9999999984518274 } }, { "checkpoint_type": "bytes", "bytes_threshold": 907000000, "cumulative_training_bytes": 907003717, "metrics": { "loss": 0.45137793430383655, "ce_loss": 0.4413779438405797, "lb_loss": 0.9999999956808229 } }, { "checkpoint_type": "bytes", "bytes_threshold": 908000000, "cumulative_training_bytes": 908005720, "metrics": { "loss": 0.45178184142479527, "ce_loss": 0.44178185096153844, "lb_loss": 0.9999999961204077 } }, { "checkpoint_type": "bytes", "bytes_threshold": 909000000, "cumulative_training_bytes": 909006677, "metrics": { "loss": 0.45235156962103934, "ce_loss": 0.4423515791577825, "lb_loss": 0.999999996949869 } }, { "checkpoint_type": "bytes", "bytes_threshold": 910000000, "cumulative_training_bytes": 910005425, "metrics": { "loss": 0.45257681120616167, "ce_loss": 0.44257682074290483, "lb_loss": 0.9999999954226817 } }, { "checkpoint_type": "bytes", "bytes_threshold": 911000000, "cumulative_training_bytes": 911000290, "metrics": { "loss": 0.45285567890632955, "ce_loss": 0.4428556884430727, "lb_loss": 0.9999999962389388 } }, { "checkpoint_type": "bytes", "bytes_threshold": 912000000, "cumulative_training_bytes": 912003476, "metrics": { "loss": 0.4531254446783731, "ce_loss": 0.4431254542151163, "lb_loss": 0.9999999967425368 } }, { "checkpoint_type": "bytes", "bytes_threshold": 913000000, "cumulative_training_bytes": 913000462, "metrics": { "loss": 0.4532784784683073, "ce_loss": 0.4432784880050505, "lb_loss": 0.9999999971100778 } }, { "checkpoint_type": "bytes", "bytes_threshold": 914000000, "cumulative_training_bytes": 914002700, "metrics": { "loss": 0.4536259382777082, "ce_loss": 0.44362594781445136, "lb_loss": 0.999999997447794 } }, { "checkpoint_type": "bytes", "bytes_threshold": 915000000, "cumulative_training_bytes": 915005428, "metrics": { "loss": 0.4537899265654933, "ce_loss": 0.44378993610223644, "lb_loss": 0.999999997191155 } }, { "checkpoint_type": "bytes", "bytes_threshold": 916000000, "cumulative_training_bytes": 916000562, "metrics": { "loss": 0.4539811997613755, "ce_loss": 0.4439812092981187, "lb_loss": 0.999999998059183 } }, { "checkpoint_type": "bytes", "bytes_threshold": 917000000, "cumulative_training_bytes": 917005115, "metrics": { "loss": 0.45415455713543135, "ce_loss": 0.4441545666721745, "lb_loss": 0.9999999975575096 } }, { "checkpoint_type": "bytes", "bytes_threshold": 918000000, "cumulative_training_bytes": 918000290, "metrics": { "loss": 0.45433057856429154, "ce_loss": 0.4443305881010347, "lb_loss": 0.9999999979684357 } }, { "checkpoint_type": "bytes", "bytes_threshold": 919000000, "cumulative_training_bytes": 919001157, "metrics": { "loss": 0.45467502745592875, "ce_loss": 0.4446750369926719, "lb_loss": 0.9999999979168613 } }, { "checkpoint_type": "bytes", "bytes_threshold": 920000000, "cumulative_training_bytes": 920005392, "metrics": { "loss": 0.4549423707126007, "ce_loss": 0.44494238024934385, "lb_loss": 0.9999999978723801 } }, { "checkpoint_type": "bytes", "bytes_threshold": 921000000, "cumulative_training_bytes": 921006037, "metrics": { "loss": 0.455245339800192, "ce_loss": 0.44524534933693516, "lb_loss": 0.9999999982142027 } }, { "checkpoint_type": "bytes", "bytes_threshold": 922000000, "cumulative_training_bytes": 922000214, "metrics": { "loss": 0.45546488110268435, "ce_loss": 0.4454648906394275, "lb_loss": 0.9999999985965665 } }, { "checkpoint_type": "bytes", "bytes_threshold": 923000000, "cumulative_training_bytes": 923002745, "metrics": { "loss": 0.4556951227009582, "ce_loss": 0.44569513223770135, "lb_loss": 0.9999999987804012 } }, { "checkpoint_type": "bytes", "bytes_threshold": 924000000, "cumulative_training_bytes": 924007204, "metrics": { "loss": 0.4558257086469471, "ce_loss": 0.44582571818369027, "lb_loss": 0.9999999989689476 } }, { "checkpoint_type": "bytes", "bytes_threshold": 925000000, "cumulative_training_bytes": 925003699, "metrics": { "loss": 0.4559782938252584, "ce_loss": 0.44597830336200156, "lb_loss": 0.9999999988116353 } }, { "checkpoint_type": "bytes", "bytes_threshold": 926000000, "cumulative_training_bytes": 926005686, "metrics": { "loss": 0.4561811916532903, "ce_loss": 0.44618120119003346, "lb_loss": 0.9999999985813696 } }, { "checkpoint_type": "bytes", "bytes_threshold": 927000000, "cumulative_training_bytes": 927002982, "metrics": { "loss": 0.4563115703674782, "ce_loss": 0.4463115799042214, "lb_loss": 0.9999999980547615 } }, { "checkpoint_type": "bytes", "bytes_threshold": 928000000, "cumulative_training_bytes": 928004103, "metrics": { "loss": 0.45646053071749415, "ce_loss": 0.4464605402542373, "lb_loss": 0.999999998282578 } }, { "checkpoint_type": "bytes", "bytes_threshold": 929000000, "cumulative_training_bytes": 929006434, "metrics": { "loss": 0.4565663954454055, "ce_loss": 0.44656640498214867, "lb_loss": 0.9999999982201794 } }, { "checkpoint_type": "bytes", "bytes_threshold": 930000000, "cumulative_training_bytes": 930003003, "metrics": { "loss": 0.45671574782077784, "ce_loss": 0.446715757357521, "lb_loss": 0.999999998180861 } }, { "checkpoint_type": "bytes", "bytes_threshold": 931000000, "cumulative_training_bytes": 931005136, "metrics": { "loss": 0.45679983430676374, "ce_loss": 0.4467998438435069, "lb_loss": 0.9999999983770129 } }, { "checkpoint_type": "bytes", "bytes_threshold": 932000000, "cumulative_training_bytes": 932004010, "metrics": { "loss": 0.4569366266567495, "ce_loss": 0.44693663619349266, "lb_loss": 0.9999999983867445 } }, { "checkpoint_type": "bytes", "bytes_threshold": 933000000, "cumulative_training_bytes": 933006103, "metrics": { "loss": 0.4570423392423911, "ce_loss": 0.4470423487791343, "lb_loss": 0.9999999979161528 } }, { "checkpoint_type": "bytes", "bytes_threshold": 934000000, "cumulative_training_bytes": 934001056, "metrics": { "loss": 0.4571594332524909, "ce_loss": 0.4471594427892341, "lb_loss": 0.9999999978131129 } }, { "checkpoint_type": "bytes", "bytes_threshold": 935000000, "cumulative_training_bytes": 935003223, "metrics": { "loss": 0.45730806371681443, "ce_loss": 0.4473080732535576, "lb_loss": 0.9999999977175971 } }, { "checkpoint_type": "bytes", "bytes_threshold": 936000000, "cumulative_training_bytes": 936001763, "metrics": { "loss": 0.45738783855462106, "ce_loss": 0.4473878480913642, "lb_loss": 0.9999999975233114 } }, { "checkpoint_type": "bytes", "bytes_threshold": 937000000, "cumulative_training_bytes": 937006395, "metrics": { "loss": 0.45747957874488554, "ce_loss": 0.4474795882816287, "lb_loss": 0.9999999975441615 } }, { "checkpoint_type": "bytes", "bytes_threshold": 938000000, "cumulative_training_bytes": 938001495, "metrics": { "loss": 0.45755059916273993, "ce_loss": 0.4475506086994831, "lb_loss": 0.9999999975771608 } }, { "checkpoint_type": "bytes", "bytes_threshold": 939000000, "cumulative_training_bytes": 939003006, "metrics": { "loss": 0.45756441776033846, "ce_loss": 0.4475644272970816, "lb_loss": 0.9999999975810244 } }, { "checkpoint_type": "bytes", "bytes_threshold": 940000000, "cumulative_training_bytes": 940003165, "metrics": { "loss": 0.45759776781046674, "ce_loss": 0.4475977773472099, "lb_loss": 0.9999999973602903 } }, { "checkpoint_type": "bytes", "bytes_threshold": 941000000, "cumulative_training_bytes": 941001772, "metrics": { "loss": 0.45767297192580525, "ce_loss": 0.4476729814625484, "lb_loss": 0.9999999972288843 } }, { "checkpoint_type": "bytes", "bytes_threshold": 942000000, "cumulative_training_bytes": 942003089, "metrics": { "loss": 0.45774275527380737, "ce_loss": 0.44774276481055053, "lb_loss": 0.9999999972424898 } }, { "checkpoint_type": "bytes", "bytes_threshold": 943000000, "cumulative_training_bytes": 943006108, "metrics": { "loss": 0.45780045653905144, "ce_loss": 0.4478004660757946, "lb_loss": 0.9999999973889571 } }, { "checkpoint_type": "bytes", "bytes_threshold": 944000000, "cumulative_training_bytes": 944003340, "metrics": { "loss": 0.4579041221871552, "ce_loss": 0.44790413172389837, "lb_loss": 0.999999997290698 } }, { "checkpoint_type": "bytes", "bytes_threshold": 945000000, "cumulative_training_bytes": 945007101, "metrics": { "loss": 0.45798618218312526, "ce_loss": 0.4479861917198684, "lb_loss": 0.9999999974977349 } }, { "checkpoint_type": "bytes", "bytes_threshold": 946000000, "cumulative_training_bytes": 946006954, "metrics": { "loss": 0.45804723397740776, "ce_loss": 0.44804724351415093, "lb_loss": 0.9999999973683987 } }, { "checkpoint_type": "bytes", "bytes_threshold": 947000000, "cumulative_training_bytes": 947006874, "metrics": { "loss": 0.45816000358238773, "ce_loss": 0.4481600131191309, "lb_loss": 0.9999999972123771 } }, { "checkpoint_type": "bytes", "bytes_threshold": 948000000, "cumulative_training_bytes": 948002304, "metrics": { "loss": 0.4582202268191281, "ce_loss": 0.44822023635587127, "lb_loss": 0.9999999972346704 } }, { "checkpoint_type": "bytes", "bytes_threshold": 949000000, "cumulative_training_bytes": 949000281, "metrics": { "loss": 0.45827323714661655, "ce_loss": 0.4482732466833597, "lb_loss": 0.9999999969312667 } }, { "checkpoint_type": "bytes", "bytes_threshold": 950000000, "cumulative_training_bytes": 950005460, "metrics": { "loss": 0.4582935053636347, "ce_loss": 0.4482935149003779, "lb_loss": 0.9999999967238945 } }, { "checkpoint_type": "bytes", "bytes_threshold": 951000000, "cumulative_training_bytes": 951004754, "metrics": { "loss": 0.45839260763107137, "ce_loss": 0.44839261716781453, "lb_loss": 0.9999999967359123 } }, { "checkpoint_type": "bytes", "bytes_threshold": 952000000, "cumulative_training_bytes": 952003496, "metrics": { "loss": 0.4585031724775566, "ce_loss": 0.4485031820142998, "lb_loss": 0.9999999967180216 } }, { "checkpoint_type": "bytes", "bytes_threshold": 953000000, "cumulative_training_bytes": 953005419, "metrics": { "loss": 0.4585650799644636, "ce_loss": 0.44856508950120677, "lb_loss": 0.9999999965953904 } }, { "epoch": 20, "checkpoint_type": "epoch", "metrics": { "loss": 0.45858579490975426, "ce_loss": 0.4485858044464974, "lb_loss": 0.9999999965715838, "training_bytes": 47653413 }, "cumulative_training_bytes": 953068042, "training_bytes_this_epoch": 47653413 }, { "checkpoint_type": "bytes", "bytes_threshold": 954000000, "cumulative_training_bytes": 954000428, "metrics": { "loss": 0.4476760918976831, "ce_loss": 0.43767610143442626, "lb_loss": 1.0000000063513146 } }, { "checkpoint_type": "bytes", "bytes_threshold": 955000000, "cumulative_training_bytes": 955002200, "metrics": { "loss": 0.44804038077946234, "ce_loss": 0.4380403903162055, "lb_loss": 0.999999999528817 } }, { "checkpoint_type": "bytes", "bytes_threshold": 956000000, "cumulative_training_bytes": 956005963, "metrics": { "loss": 0.4483036200205485, "ce_loss": 0.4383036295572917, "lb_loss": 0.9999999996895591 } }, { "checkpoint_type": "bytes", "bytes_threshold": 957000000, "cumulative_training_bytes": 957000742, "metrics": { "loss": 0.448704544208393, "ce_loss": 0.43870455374513617, "lb_loss": 0.9999999988403766 } }, { "checkpoint_type": "bytes", "bytes_threshold": 958000000, "cumulative_training_bytes": 958002438, "metrics": { "loss": 0.44925628891286923, "ce_loss": 0.4392562984496124, "lb_loss": 0.9999999988910764 } }, { "checkpoint_type": "bytes", "bytes_threshold": 959000000, "cumulative_training_bytes": 959002429, "metrics": { "loss": 0.44934474852777295, "ce_loss": 0.4393447580645161, "lb_loss": 0.9999999976158143 } }, { "checkpoint_type": "bytes", "bytes_threshold": 960000000, "cumulative_training_bytes": 960003553, "metrics": { "loss": 0.4496341999003429, "ce_loss": 0.4396342094370861, "lb_loss": 0.9999999972368707 } }, { "checkpoint_type": "bytes", "bytes_threshold": 961000000, "cumulative_training_bytes": 961003867, "metrics": { "loss": 0.44992774178919703, "ce_loss": 0.4399277513259402, "lb_loss": 0.9999999952868073 } }, { "checkpoint_type": "bytes", "bytes_threshold": 962000000, "cumulative_training_bytes": 962003623, "metrics": { "loss": 0.45012032169185273, "ce_loss": 0.4401203312285959, "lb_loss": 0.9999999951520194 } }, { "checkpoint_type": "bytes", "bytes_threshold": 963000000, "cumulative_training_bytes": 963004917, "metrics": { "loss": 0.4505898071491691, "ce_loss": 0.44058981668591224, "lb_loss": 0.9999999956409228 } }, { "checkpoint_type": "bytes", "bytes_threshold": 964000000, "cumulative_training_bytes": 964007286, "metrics": { "loss": 0.4508954231555645, "ce_loss": 0.4408954326923077, "lb_loss": 0.999999996415385 } }, { "checkpoint_type": "bytes", "bytes_threshold": 965000000, "cumulative_training_bytes": 965001346, "metrics": { "loss": 0.4511721158638979, "ce_loss": 0.44117212540064105, "lb_loss": 0.9999999975164732 } }, { "checkpoint_type": "bytes", "bytes_threshold": 966000000, "cumulative_training_bytes": 966000376, "metrics": { "loss": 0.451245598821245, "ce_loss": 0.44124560835798815, "lb_loss": 0.9999999979543968 } }, { "checkpoint_type": "bytes", "bytes_threshold": 967000000, "cumulative_training_bytes": 967005281, "metrics": { "loss": 0.45132150851652425, "ce_loss": 0.4413215180532674, "lb_loss": 0.9999999973814544 } }, { "checkpoint_type": "bytes", "bytes_threshold": 968000000, "cumulative_training_bytes": 968007618, "metrics": { "loss": 0.4515903465083388, "ce_loss": 0.44159035604508196, "lb_loss": 0.9999999970075537 } }, { "checkpoint_type": "bytes", "bytes_threshold": 969000000, "cumulative_training_bytes": 969006307, "metrics": { "loss": 0.4518647513249947, "ce_loss": 0.44186476086173787, "lb_loss": 0.9999999977966598 } }, { "checkpoint_type": "bytes", "bytes_threshold": 970000000, "cumulative_training_bytes": 970004567, "metrics": { "loss": 0.4520443377858958, "ce_loss": 0.442044347322639, "lb_loss": 0.9999999973604812 } }, { "checkpoint_type": "bytes", "bytes_threshold": 971000000, "cumulative_training_bytes": 971001385, "metrics": { "loss": 0.4521723189417033, "ce_loss": 0.44217232847844645, "lb_loss": 0.9999999969981442 } }, { "checkpoint_type": "bytes", "bytes_threshold": 972000000, "cumulative_training_bytes": 972005775, "metrics": { "loss": 0.45231254098872165, "ce_loss": 0.4423125505254648, "lb_loss": 0.9999999970848172 } }, { "checkpoint_type": "bytes", "bytes_threshold": 973000000, "cumulative_training_bytes": 973005099, "metrics": { "loss": 0.4524783973730457, "ce_loss": 0.44247840690978885, "lb_loss": 0.9999999974831052 } }, { "checkpoint_type": "bytes", "bytes_threshold": 974000000, "cumulative_training_bytes": 974000735, "metrics": { "loss": 0.4526559548142623, "ce_loss": 0.44265596435100546, "lb_loss": 0.9999999972758389 } }, { "checkpoint_type": "bytes", "bytes_threshold": 975000000, "cumulative_training_bytes": 975000397, "metrics": { "loss": 0.45285711384950944, "ce_loss": 0.4428571233862526, "lb_loss": 0.9999999974211529 } }, { "checkpoint_type": "bytes", "bytes_threshold": 976000000, "cumulative_training_bytes": 976004243, "metrics": { "loss": 0.45301266459572265, "ce_loss": 0.4430126741324658, "lb_loss": 0.9999999969372322 } }, { "checkpoint_type": "bytes", "bytes_threshold": 977000000, "cumulative_training_bytes": 977001859, "metrics": { "loss": 0.4531138975627132, "ce_loss": 0.44311390709945636, "lb_loss": 0.9999999971026844 } }, { "checkpoint_type": "bytes", "bytes_threshold": 978000000, "cumulative_training_bytes": 978005532, "metrics": { "loss": 0.4532364708576706, "ce_loss": 0.44323648039441377, "lb_loss": 0.9999999968166948 } }, { "checkpoint_type": "bytes", "bytes_threshold": 979000000, "cumulative_training_bytes": 979005081, "metrics": { "loss": 0.45334783579973853, "ce_loss": 0.4433478453364817, "lb_loss": 0.9999999970971764 } }, { "checkpoint_type": "bytes", "bytes_threshold": 980000000, "cumulative_training_bytes": 980007499, "metrics": { "loss": 0.4534764752600741, "ce_loss": 0.4434764847968173, "lb_loss": 0.9999999970527967 } }, { "checkpoint_type": "bytes", "bytes_threshold": 981000000, "cumulative_training_bytes": 981004891, "metrics": { "loss": 0.4536125384216016, "ce_loss": 0.44361254795834476, "lb_loss": 0.999999996733097 } }, { "checkpoint_type": "bytes", "bytes_threshold": 982000000, "cumulative_training_bytes": 982006800, "metrics": { "loss": 0.45371898847912984, "ce_loss": 0.443718998015873, "lb_loss": 0.9999999963101887 } }, { "checkpoint_type": "bytes", "bytes_threshold": 983000000, "cumulative_training_bytes": 983004665, "metrics": { "loss": 0.4538384055847402, "ce_loss": 0.44383841512148337, "lb_loss": 0.9999999963109145 } }, { "checkpoint_type": "bytes", "bytes_threshold": 984000000, "cumulative_training_bytes": 984006393, "metrics": { "loss": 0.4539954239883744, "ce_loss": 0.44399543352511756, "lb_loss": 0.9999999966517559 } }, { "checkpoint_type": "bytes", "bytes_threshold": 985000000, "cumulative_training_bytes": 985007069, "metrics": { "loss": 0.45418143821014084, "ce_loss": 0.444181447746884, "lb_loss": 0.9999999966140218 } }, { "checkpoint_type": "bytes", "bytes_threshold": 986000000, "cumulative_training_bytes": 986006787, "metrics": { "loss": 0.45425647727171803, "ce_loss": 0.4442564868084612, "lb_loss": 0.9999999965916452 } }, { "checkpoint_type": "bytes", "bytes_threshold": 987000000, "cumulative_training_bytes": 987002711, "metrics": { "loss": 0.45432535147408715, "ce_loss": 0.4443253610108303, "lb_loss": 0.9999999966378247 } }, { "checkpoint_type": "bytes", "bytes_threshold": 988000000, "cumulative_training_bytes": 988007659, "metrics": { "loss": 0.4544178967201054, "ce_loss": 0.44441790625684857, "lb_loss": 0.9999999963816597 } }, { "checkpoint_type": "bytes", "bytes_threshold": 989000000, "cumulative_training_bytes": 989002984, "metrics": { "loss": 0.45448096611848804, "ce_loss": 0.4444809756552312, "lb_loss": 0.9999999967232052 } }, { "checkpoint_type": "bytes", "bytes_threshold": 990000000, "cumulative_training_bytes": 990002219, "metrics": { "loss": 0.4545410405099083, "ce_loss": 0.44454105004665145, "lb_loss": 0.999999996885679 } }, { "checkpoint_type": "bytes", "bytes_threshold": 991000000, "cumulative_training_bytes": 991003464, "metrics": { "loss": 0.45466355147456083, "ce_loss": 0.444663561011304, "lb_loss": 0.9999999970522532 } }, { "checkpoint_type": "bytes", "bytes_threshold": 992000000, "cumulative_training_bytes": 992005888, "metrics": { "loss": 0.45475785673660984, "ce_loss": 0.444757866273353, "lb_loss": 0.9999999972219664 } }, { "checkpoint_type": "bytes", "bytes_threshold": 993000000, "cumulative_training_bytes": 993002370, "metrics": { "loss": 0.4548222429800354, "ce_loss": 0.44482225251677854, "lb_loss": 0.9999999970283399 } }, { "checkpoint_type": "bytes", "bytes_threshold": 994000000, "cumulative_training_bytes": 994003864, "metrics": { "loss": 0.454938013634787, "ce_loss": 0.44493802317153014, "lb_loss": 0.9999999967555272 } }, { "checkpoint_type": "bytes", "bytes_threshold": 995000000, "cumulative_training_bytes": 995006555, "metrics": { "loss": 0.45501115932851155, "ce_loss": 0.4450111688652547, "lb_loss": 0.9999999965828267 } }, { "checkpoint_type": "bytes", "bytes_threshold": 996000000, "cumulative_training_bytes": 996000608, "metrics": { "loss": 0.4550933865529661, "ce_loss": 0.44509339608970927, "lb_loss": 0.9999999965132292 } }, { "checkpoint_type": "bytes", "bytes_threshold": 997000000, "cumulative_training_bytes": 997007326, "metrics": { "loss": 0.4551511483144245, "ce_loss": 0.44515115785116766, "lb_loss": 0.9999999964058545 } }, { "checkpoint_type": "bytes", "bytes_threshold": 998000000, "cumulative_training_bytes": 998006581, "metrics": { "loss": 0.4552532544349726, "ce_loss": 0.4452532639717158, "lb_loss": 0.9999999961610913 } }, { "checkpoint_type": "bytes", "bytes_threshold": 999000000, "cumulative_training_bytes": 999000135, "metrics": { "loss": 0.4552769968612228, "ce_loss": 0.44527700639796597, "lb_loss": 0.9999999962337178 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1000000000, "cumulative_training_bytes": 1000002883, "metrics": { "loss": 0.4553679389458804, "ce_loss": 0.44536794848262357, "lb_loss": 0.9999999961586172 } }, { "epoch": 21, "checkpoint_type": "epoch", "metrics": { "loss": 0.4554653137079548, "ce_loss": 0.44546532324469795, "lb_loss": 0.9999999964087819, "training_bytes": 47653407 }, "cumulative_training_bytes": 1000721449, "training_bytes_this_epoch": 47653407 }, { "checkpoint_type": "bytes", "bytes_threshold": 1001000000, "cumulative_training_bytes": 1001004765, "metrics": { "loss": 0.4417461897875812, "ce_loss": 0.43174619932432434, "lb_loss": 0.9999999967781273 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1002000000, "cumulative_training_bytes": 1002000171, "metrics": { "loss": 0.4442837666608616, "ce_loss": 0.4342837761976048, "lb_loss": 1.0000000014276562 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1003000000, "cumulative_training_bytes": 1003000999, "metrics": { "loss": 0.4452322681478206, "ce_loss": 0.43523227768456374, "lb_loss": 0.999999996599735 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1004000000, "cumulative_training_bytes": 1004003831, "metrics": { "loss": 0.4486882553233967, "ce_loss": 0.43868826486013984, "lb_loss": 0.9999999962486588 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1005000000, "cumulative_training_bytes": 1005001152, "metrics": { "loss": 0.44896745468508154, "ce_loss": 0.4389674642218247, "lb_loss": 0.999999996054791 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1006000000, "cumulative_training_bytes": 1006005315, "metrics": { "loss": 0.44883037995601044, "ce_loss": 0.4388303894927536, "lb_loss": 0.9999999951625216 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1007000000, "cumulative_training_bytes": 1007001983, "metrics": { "loss": 0.44876475799374466, "ce_loss": 0.4387647675304878, "lb_loss": 0.9999999939668469 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1008000000, "cumulative_training_bytes": 1008002750, "metrics": { "loss": 0.448804127555791, "ce_loss": 0.4388041370925342, "lb_loss": 0.9999999941084788 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1009000000, "cumulative_training_bytes": 1009004840, "metrics": { "loss": 0.4488484048578964, "ce_loss": 0.43884841439463956, "lb_loss": 0.9999999951523024 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1010000000, "cumulative_training_bytes": 1010007632, "metrics": { "loss": 0.44896201643192957, "ce_loss": 0.43896202596867273, "lb_loss": 0.999999996216358 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1011000000, "cumulative_training_bytes": 1011001073, "metrics": { "loss": 0.44882147891333135, "ce_loss": 0.4388214884500745, "lb_loss": 0.999999996446817 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1012000000, "cumulative_training_bytes": 1012002820, "metrics": { "loss": 0.44899433661057525, "ce_loss": 0.4389943461473184, "lb_loss": 0.9999999968032811 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1013000000, "cumulative_training_bytes": 1013000900, "metrics": { "loss": 0.4489474717483473, "ce_loss": 0.4389474812850905, "lb_loss": 0.9999999971368948 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1014000000, "cumulative_training_bytes": 1014005352, "metrics": { "loss": 0.449001441227386, "ce_loss": 0.43900145076412916, "lb_loss": 0.9999999966657148 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1015000000, "cumulative_training_bytes": 1015002241, "metrics": { "loss": 0.4491251497309607, "ce_loss": 0.43912515926770385, "lb_loss": 0.9999999973139537 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1016000000, "cumulative_training_bytes": 1016006378, "metrics": { "loss": 0.4492504603880689, "ce_loss": 0.43925046992481204, "lb_loss": 0.9999999975799618 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1017000000, "cumulative_training_bytes": 1017004553, "metrics": { "loss": 0.44930973311031563, "ce_loss": 0.4393097426470588, "lb_loss": 0.9999999975597157 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1018000000, "cumulative_training_bytes": 1018005638, "metrics": { "loss": 0.4494938055674235, "ce_loss": 0.4394938151041667, "lb_loss": 0.9999999973579501 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1019000000, "cumulative_training_bytes": 1019003819, "metrics": { "loss": 0.44971085923158527, "ce_loss": 0.43971086876832843, "lb_loss": 0.9999999975029474 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1020000000, "cumulative_training_bytes": 1020001116, "metrics": { "loss": 0.44981704697895014, "ce_loss": 0.4398170565156933, "lb_loss": 0.9999999973714281 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1021000000, "cumulative_training_bytes": 1021000064, "metrics": { "loss": 0.4500520344587251, "ce_loss": 0.44005204399546827, "lb_loss": 0.9999999973439018 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1022000000, "cumulative_training_bytes": 1022006171, "metrics": { "loss": 0.4501109510156138, "ce_loss": 0.440110960552357, "lb_loss": 0.999999997533453 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1023000000, "cumulative_training_bytes": 1023007389, "metrics": { "loss": 0.4502652396041503, "ce_loss": 0.4402652491408935, "lb_loss": 0.9999999975420765 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1024000000, "cumulative_training_bytes": 1024005719, "metrics": { "loss": 0.45038213478891476, "ce_loss": 0.4403821443256579, "lb_loss": 0.9999999971178017 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1025000000, "cumulative_training_bytes": 1025000620, "metrics": { "loss": 0.45051531972193193, "ce_loss": 0.4405153292586751, "lb_loss": 0.9999999971607882 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1026000000, "cumulative_training_bytes": 1026003600, "metrics": { "loss": 0.45060333509367184, "ce_loss": 0.440603344630415, "lb_loss": 0.9999999973637449 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1027000000, "cumulative_training_bytes": 1027002396, "metrics": { "loss": 0.45068954434696423, "ce_loss": 0.4406895538837074, "lb_loss": 0.9999999975852388 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1028000000, "cumulative_training_bytes": 1028006189, "metrics": { "loss": 0.45083050176308836, "ce_loss": 0.44083051129983153, "lb_loss": 0.9999999974063111 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1029000000, "cumulative_training_bytes": 1029000329, "metrics": { "loss": 0.45092642346027745, "ce_loss": 0.4409264329970206, "lb_loss": 0.999999997368484 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1030000000, "cumulative_training_bytes": 1030002716, "metrics": { "loss": 0.45103073774941954, "ce_loss": 0.4410307472861627, "lb_loss": 0.9999999974430652 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1031000000, "cumulative_training_bytes": 1031006184, "metrics": { "loss": 0.4510980082933024, "ce_loss": 0.44109801783004554, "lb_loss": 0.9999999974674809 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1032000000, "cumulative_training_bytes": 1032004553, "metrics": { "loss": 0.45115468714543117, "ce_loss": 0.44115469668217433, "lb_loss": 0.9999999971978227 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1033000000, "cumulative_training_bytes": 1033007149, "metrics": { "loss": 0.4512519366376341, "ce_loss": 0.44125194617437724, "lb_loss": 0.9999999973839006 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1034000000, "cumulative_training_bytes": 1034001070, "metrics": { "loss": 0.4513257780064099, "ce_loss": 0.44132578754315305, "lb_loss": 0.9999999975444807 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1035000000, "cumulative_training_bytes": 1035006816, "metrics": { "loss": 0.4513804955179932, "ce_loss": 0.44138050505473636, "lb_loss": 0.9999999975364479 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1036000000, "cumulative_training_bytes": 1036006156, "metrics": { "loss": 0.4515215541163934, "ce_loss": 0.44152156365313655, "lb_loss": 0.9999999976582503 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1037000000, "cumulative_training_bytes": 1037000394, "metrics": { "loss": 0.45160786134672737, "ce_loss": 0.44160787088347053, "lb_loss": 0.9999999976470195 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1038000000, "cumulative_training_bytes": 1038006020, "metrics": { "loss": 0.45169471558137514, "ce_loss": 0.4416947251181183, "lb_loss": 0.9999999977225834 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1039000000, "cumulative_training_bytes": 1039004839, "metrics": { "loss": 0.45180788329182253, "ce_loss": 0.4418078928285657, "lb_loss": 0.9999999978418802 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1040000000, "cumulative_training_bytes": 1040005527, "metrics": { "loss": 0.4519148905607227, "ce_loss": 0.44191490009746587, "lb_loss": 0.999999997618138 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1041000000, "cumulative_training_bytes": 1041000015, "metrics": { "loss": 0.4519806671505192, "ce_loss": 0.4419806766872624, "lb_loss": 0.9999999976656736 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1042000000, "cumulative_training_bytes": 1042000408, "metrics": { "loss": 0.4521246687465067, "ce_loss": 0.44212467828324986, "lb_loss": 0.9999999977776788 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1043000000, "cumulative_training_bytes": 1043006070, "metrics": { "loss": 0.4522623903138815, "ce_loss": 0.44226239985062465, "lb_loss": 0.9999999981221785 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1044000000, "cumulative_training_bytes": 1044005567, "metrics": { "loss": 0.4523166392559333, "ce_loss": 0.44231664879267646, "lb_loss": 0.9999999979228524 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1045000000, "cumulative_training_bytes": 1045001996, "metrics": { "loss": 0.4523748661765544, "ce_loss": 0.4423748757132976, "lb_loss": 0.9999999980416941 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1046000000, "cumulative_training_bytes": 1046006987, "metrics": { "loss": 0.45245876149386216, "ce_loss": 0.4424587710306053, "lb_loss": 0.9999999979238152 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1047000000, "cumulative_training_bytes": 1047002167, "metrics": { "loss": 0.452550329069543, "ce_loss": 0.4425503386062862, "lb_loss": 0.9999999979293672 } }, { "checkpoint_type": "bytes", "bytes_threshold": 1048000000, "cumulative_training_bytes": 1048003428, "metrics": { "loss": 0.4525869717385605, "ce_loss": 0.44258698127530366, "lb_loss": 0.9999999980694851 } }, { "epoch": 22, "checkpoint_type": "epoch", "metrics": { "loss": 0.4525974458777813, "ce_loss": 0.44259745541452444, "lb_loss": 0.9999999981517197, "training_bytes": 47653407 }, "cumulative_training_bytes": 1048374856, "training_bytes_this_epoch": 47653407 } ] }