| { |
| "best_global_step": 2100, |
| "best_metric": 1.0858707427978516, |
| "best_model_checkpoint": "./outputs/checkpoint-2100", |
| "epoch": 0.16188870151770657, |
| "eval_steps": 100, |
| "global_step": 2100, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00015417971573114913, |
| "grad_norm": 1.2087944746017456, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 1.8689, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.00030835943146229826, |
| "grad_norm": 1.2666666507720947, |
| "learning_rate": 6e-06, |
| "loss": 1.7785, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.00046253914719344736, |
| "grad_norm": 0.7307026982307434, |
| "learning_rate": 1e-05, |
| "loss": 1.6809, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0006167188629245965, |
| "grad_norm": 1.2569252252578735, |
| "learning_rate": 1.4000000000000001e-05, |
| "loss": 1.9048, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0007708985786557456, |
| "grad_norm": 0.9572980403900146, |
| "learning_rate": 1.8e-05, |
| "loss": 1.7574, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0009250782943868947, |
| "grad_norm": 0.9918506145477295, |
| "learning_rate": 2.2000000000000003e-05, |
| "loss": 1.858, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0010792580101180438, |
| "grad_norm": 0.9316955208778381, |
| "learning_rate": 2.6000000000000002e-05, |
| "loss": 1.8238, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.001233437725849193, |
| "grad_norm": 0.8265096545219421, |
| "learning_rate": 3e-05, |
| "loss": 1.6852, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.001387617441580342, |
| "grad_norm": 0.900516152381897, |
| "learning_rate": 3.4000000000000007e-05, |
| "loss": 1.8227, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0015417971573114912, |
| "grad_norm": 0.9343056678771973, |
| "learning_rate": 3.8e-05, |
| "loss": 1.7732, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0016959768730426404, |
| "grad_norm": 0.8314495086669922, |
| "learning_rate": 4.2e-05, |
| "loss": 1.732, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0018501565887737894, |
| "grad_norm": 0.8370314240455627, |
| "learning_rate": 4.600000000000001e-05, |
| "loss": 1.6725, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.0020043363045049384, |
| "grad_norm": 0.6678845286369324, |
| "learning_rate": 5e-05, |
| "loss": 1.5638, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.0021585160202360876, |
| "grad_norm": 0.6469596028327942, |
| "learning_rate": 5.4000000000000005e-05, |
| "loss": 1.6414, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.002312695735967237, |
| "grad_norm": 1.1161589622497559, |
| "learning_rate": 5.8e-05, |
| "loss": 1.6015, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.002466875451698386, |
| "grad_norm": 0.6085391044616699, |
| "learning_rate": 6.2e-05, |
| "loss": 1.4577, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.0026210551674295353, |
| "grad_norm": 0.7159522175788879, |
| "learning_rate": 6.6e-05, |
| "loss": 1.4667, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.002775234883160684, |
| "grad_norm": 0.67247074842453, |
| "learning_rate": 7e-05, |
| "loss": 1.5619, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.0029294145988918332, |
| "grad_norm": 0.6272625923156738, |
| "learning_rate": 7.4e-05, |
| "loss": 1.322, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.0030835943146229824, |
| "grad_norm": 0.7291163206100464, |
| "learning_rate": 7.800000000000001e-05, |
| "loss": 1.3936, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0032377740303541317, |
| "grad_norm": 0.4980190396308899, |
| "learning_rate": 8.2e-05, |
| "loss": 1.3322, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.003391953746085281, |
| "grad_norm": 1.032578945159912, |
| "learning_rate": 8.6e-05, |
| "loss": 1.3657, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.0035461334618164296, |
| "grad_norm": 0.5118615031242371, |
| "learning_rate": 9e-05, |
| "loss": 1.2866, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.003700313177547579, |
| "grad_norm": 0.5234407782554626, |
| "learning_rate": 9.4e-05, |
| "loss": 1.2806, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.003854492893278728, |
| "grad_norm": 0.49764135479927063, |
| "learning_rate": 9.8e-05, |
| "loss": 1.2004, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.004008672609009877, |
| "grad_norm": 0.34377485513687134, |
| "learning_rate": 0.00010200000000000001, |
| "loss": 1.1947, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.0041628523247410265, |
| "grad_norm": 0.41426530480384827, |
| "learning_rate": 0.00010600000000000002, |
| "loss": 1.2689, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.004317032040472175, |
| "grad_norm": 0.5027992129325867, |
| "learning_rate": 0.00011000000000000002, |
| "loss": 1.2249, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.004471211756203325, |
| "grad_norm": 0.44335752725601196, |
| "learning_rate": 0.00011399999999999999, |
| "loss": 1.2771, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.004625391471934474, |
| "grad_norm": 0.3176646828651428, |
| "learning_rate": 0.000118, |
| "loss": 1.1873, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0047795711876656224, |
| "grad_norm": 0.24802716076374054, |
| "learning_rate": 0.000122, |
| "loss": 1.1989, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.004933750903396772, |
| "grad_norm": 0.23831751942634583, |
| "learning_rate": 0.000126, |
| "loss": 1.1093, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.005087930619127921, |
| "grad_norm": 0.24024009704589844, |
| "learning_rate": 0.00013000000000000002, |
| "loss": 1.2196, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.0052421103348590705, |
| "grad_norm": 0.2745237350463867, |
| "learning_rate": 0.000134, |
| "loss": 1.1802, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.005396290050590219, |
| "grad_norm": 0.27817806601524353, |
| "learning_rate": 0.000138, |
| "loss": 1.1939, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.005550469766321368, |
| "grad_norm": 0.19907328486442566, |
| "learning_rate": 0.000142, |
| "loss": 1.2061, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.005704649482052518, |
| "grad_norm": 0.18879663944244385, |
| "learning_rate": 0.000146, |
| "loss": 1.2149, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.0058588291977836665, |
| "grad_norm": 0.21456782519817352, |
| "learning_rate": 0.00015000000000000001, |
| "loss": 1.1726, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.006013008913514816, |
| "grad_norm": 0.23913143575191498, |
| "learning_rate": 0.000154, |
| "loss": 1.148, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.006167188629245965, |
| "grad_norm": 0.2148526906967163, |
| "learning_rate": 0.00015800000000000002, |
| "loss": 1.1925, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.006321368344977114, |
| "grad_norm": 0.2392999231815338, |
| "learning_rate": 0.000162, |
| "loss": 1.1488, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.006475548060708263, |
| "grad_norm": 0.16503232717514038, |
| "learning_rate": 0.000166, |
| "loss": 1.1555, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.006629727776439412, |
| "grad_norm": 0.1844739466905594, |
| "learning_rate": 0.00017, |
| "loss": 1.1934, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.006783907492170562, |
| "grad_norm": 0.23832857608795166, |
| "learning_rate": 0.000174, |
| "loss": 1.1129, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.0069380872079017105, |
| "grad_norm": 0.8846365809440613, |
| "learning_rate": 0.00017800000000000002, |
| "loss": 1.1028, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.007092266923632859, |
| "grad_norm": 0.187076598405838, |
| "learning_rate": 0.000182, |
| "loss": 1.1, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.007246446639364009, |
| "grad_norm": 0.1795521378517151, |
| "learning_rate": 0.00018600000000000002, |
| "loss": 1.1478, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.007400626355095158, |
| "grad_norm": 0.199871227145195, |
| "learning_rate": 0.00019, |
| "loss": 1.1223, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.007554806070826307, |
| "grad_norm": 0.17832662165164948, |
| "learning_rate": 0.000194, |
| "loss": 1.0909, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.007708985786557456, |
| "grad_norm": 0.17023932933807373, |
| "learning_rate": 0.00019800000000000002, |
| "loss": 1.1526, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.007708985786557456, |
| "eval_loss": 1.1401352882385254, |
| "eval_runtime": 185.6269, |
| "eval_samples_per_second": 91.274, |
| "eval_steps_per_second": 1.428, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.007863165502288605, |
| "grad_norm": 0.17429223656654358, |
| "learning_rate": 0.00019999484748557298, |
| "loss": 1.1597, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.008017345218019754, |
| "grad_norm": 0.16158349812030792, |
| "learning_rate": 0.0001999845424567189, |
| "loss": 1.1297, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.008171524933750904, |
| "grad_norm": 0.15818771719932556, |
| "learning_rate": 0.0001999742374278648, |
| "loss": 1.083, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.008325704649482053, |
| "grad_norm": 0.1591726392507553, |
| "learning_rate": 0.00019996393239901073, |
| "loss": 1.086, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.008479884365213202, |
| "grad_norm": 0.174184650182724, |
| "learning_rate": 0.00019995362737015664, |
| "loss": 1.0769, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.00863406408094435, |
| "grad_norm": 0.15928815305233002, |
| "learning_rate": 0.00019994332234130258, |
| "loss": 1.1315, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.0087882437966755, |
| "grad_norm": 0.19639264047145844, |
| "learning_rate": 0.0001999330173124485, |
| "loss": 1.1339, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.00894242351240665, |
| "grad_norm": 0.1639835238456726, |
| "learning_rate": 0.0001999227122835944, |
| "loss": 1.0836, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.009096603228137799, |
| "grad_norm": 0.18691964447498322, |
| "learning_rate": 0.00019991240725474033, |
| "loss": 1.2109, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.009250782943868947, |
| "grad_norm": 0.188096821308136, |
| "learning_rate": 0.00019990210222588624, |
| "loss": 1.1778, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.009404962659600096, |
| "grad_norm": 0.1527150571346283, |
| "learning_rate": 0.00019989179719703218, |
| "loss": 1.0977, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.009559142375331245, |
| "grad_norm": 0.1705218255519867, |
| "learning_rate": 0.0001998814921681781, |
| "loss": 1.1333, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.009713322091062395, |
| "grad_norm": 0.1888928860425949, |
| "learning_rate": 0.00019987118713932401, |
| "loss": 1.1843, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.009867501806793544, |
| "grad_norm": 0.1778104603290558, |
| "learning_rate": 0.00019986088211046993, |
| "loss": 1.0766, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.010021681522524693, |
| "grad_norm": 0.15807992219924927, |
| "learning_rate": 0.00019985057708161584, |
| "loss": 1.0449, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.010175861238255842, |
| "grad_norm": 0.16706159710884094, |
| "learning_rate": 0.00019984027205276176, |
| "loss": 1.0644, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.01033004095398699, |
| "grad_norm": 0.16455501317977905, |
| "learning_rate": 0.00019982996702390767, |
| "loss": 1.1479, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.010484220669718141, |
| "grad_norm": 0.17258939146995544, |
| "learning_rate": 0.0001998196619950536, |
| "loss": 1.0614, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.01063840038544929, |
| "grad_norm": 0.15501369535923004, |
| "learning_rate": 0.0001998093569661995, |
| "loss": 1.1045, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.010792580101180439, |
| "grad_norm": 0.1534334272146225, |
| "learning_rate": 0.00019979905193734542, |
| "loss": 1.1035, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.010946759816911587, |
| "grad_norm": 0.14120443165302277, |
| "learning_rate": 0.00019978874690849136, |
| "loss": 1.0618, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.011100939532642736, |
| "grad_norm": 0.17808520793914795, |
| "learning_rate": 0.00019977844187963728, |
| "loss": 1.1687, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.011255119248373887, |
| "grad_norm": 0.16697613894939423, |
| "learning_rate": 0.0001997681368507832, |
| "loss": 1.0979, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.011409298964105035, |
| "grad_norm": 0.16491086781024933, |
| "learning_rate": 0.0001997578318219291, |
| "loss": 1.1219, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.011563478679836184, |
| "grad_norm": 0.15342313051223755, |
| "learning_rate": 0.00019974752679307502, |
| "loss": 1.1169, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.011717658395567333, |
| "grad_norm": 0.1539286971092224, |
| "learning_rate": 0.00019973722176422093, |
| "loss": 1.1288, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.011871838111298482, |
| "grad_norm": 0.15605852007865906, |
| "learning_rate": 0.00019972691673536688, |
| "loss": 1.0445, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.012026017827029632, |
| "grad_norm": 0.14324098825454712, |
| "learning_rate": 0.0001997166117065128, |
| "loss": 1.1309, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.012180197542760781, |
| "grad_norm": 0.21045701205730438, |
| "learning_rate": 0.0001997063066776587, |
| "loss": 1.0946, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.01233437725849193, |
| "grad_norm": 0.16019922494888306, |
| "learning_rate": 0.00019969600164880462, |
| "loss": 1.11, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.012488556974223079, |
| "grad_norm": 0.15740078687667847, |
| "learning_rate": 0.00019968569661995054, |
| "loss": 1.112, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.012642736689954227, |
| "grad_norm": 0.16974380612373352, |
| "learning_rate": 0.00019967539159109648, |
| "loss": 1.1279, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.012796916405685378, |
| "grad_norm": 0.16405288875102997, |
| "learning_rate": 0.0001996650865622424, |
| "loss": 1.0952, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.012951096121416527, |
| "grad_norm": 0.16120509803295135, |
| "learning_rate": 0.0001996547815333883, |
| "loss": 1.1203, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.013105275837147675, |
| "grad_norm": 0.17402276396751404, |
| "learning_rate": 0.00019964447650453422, |
| "loss": 1.0991, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.013259455552878824, |
| "grad_norm": 0.18349111080169678, |
| "learning_rate": 0.00019963417147568014, |
| "loss": 1.1394, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.013413635268609973, |
| "grad_norm": 0.14613087475299835, |
| "learning_rate": 0.00019962386644682608, |
| "loss": 1.1357, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.013567814984341123, |
| "grad_norm": 0.142988383769989, |
| "learning_rate": 0.000199613561417972, |
| "loss": 1.0169, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.013721994700072272, |
| "grad_norm": 0.14817160367965698, |
| "learning_rate": 0.0001996032563891179, |
| "loss": 1.1238, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.013876174415803421, |
| "grad_norm": 0.15391133725643158, |
| "learning_rate": 0.00019959295136026382, |
| "loss": 1.0712, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.01403035413153457, |
| "grad_norm": 0.1766846477985382, |
| "learning_rate": 0.00019958264633140974, |
| "loss": 1.1422, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.014184533847265719, |
| "grad_norm": 0.16789212822914124, |
| "learning_rate": 0.00019957234130255565, |
| "loss": 1.1266, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.014338713562996869, |
| "grad_norm": 0.1527165323495865, |
| "learning_rate": 0.00019956203627370157, |
| "loss": 1.0667, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.014492893278728018, |
| "grad_norm": 0.1772206574678421, |
| "learning_rate": 0.00019955173124484748, |
| "loss": 1.1182, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.014647072994459167, |
| "grad_norm": 0.15008313953876495, |
| "learning_rate": 0.0001995414262159934, |
| "loss": 1.0382, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.014801252710190315, |
| "grad_norm": 0.16365988552570343, |
| "learning_rate": 0.00019953112118713931, |
| "loss": 1.1262, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.014955432425921464, |
| "grad_norm": 0.14952193200588226, |
| "learning_rate": 0.00019952081615828526, |
| "loss": 1.1245, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.015109612141652615, |
| "grad_norm": 0.15425263345241547, |
| "learning_rate": 0.00019951051112943117, |
| "loss": 1.1452, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.015263791857383763, |
| "grad_norm": 0.1567617654800415, |
| "learning_rate": 0.00019950020610057709, |
| "loss": 1.0392, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.015417971573114912, |
| "grad_norm": 0.14292609691619873, |
| "learning_rate": 0.000199489901071723, |
| "loss": 1.0728, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.015417971573114912, |
| "eval_loss": 1.1127630472183228, |
| "eval_runtime": 185.2528, |
| "eval_samples_per_second": 91.459, |
| "eval_steps_per_second": 1.43, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.015572151288846061, |
| "grad_norm": 0.15465517342090607, |
| "learning_rate": 0.00019947959604286892, |
| "loss": 1.0596, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.01572633100457721, |
| "grad_norm": 0.16749607026576996, |
| "learning_rate": 0.00019946929101401486, |
| "loss": 1.1005, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.01588051072030836, |
| "grad_norm": 0.15854287147521973, |
| "learning_rate": 0.00019945898598516077, |
| "loss": 1.0963, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.016034690436039507, |
| "grad_norm": 0.1457831859588623, |
| "learning_rate": 0.0001994486809563067, |
| "loss": 1.1149, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.016188870151770656, |
| "grad_norm": 0.15744629502296448, |
| "learning_rate": 0.0001994383759274526, |
| "loss": 1.0789, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.01634304986750181, |
| "grad_norm": 0.13411423563957214, |
| "learning_rate": 0.00019942807089859852, |
| "loss": 1.0641, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.016497229583232957, |
| "grad_norm": 0.1575399488210678, |
| "learning_rate": 0.00019941776586974446, |
| "loss": 1.0888, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.016651409298964106, |
| "grad_norm": 0.14619529247283936, |
| "learning_rate": 0.00019940746084089037, |
| "loss": 1.081, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.016805589014695255, |
| "grad_norm": 0.15578237175941467, |
| "learning_rate": 0.0001993971558120363, |
| "loss": 1.1434, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.016959768730426403, |
| "grad_norm": 0.1516629308462143, |
| "learning_rate": 0.0001993868507831822, |
| "loss": 1.0909, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.017113948446157552, |
| "grad_norm": 0.15613436698913574, |
| "learning_rate": 0.00019937654575432812, |
| "loss": 1.0999, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.0172681281618887, |
| "grad_norm": 0.14825573563575745, |
| "learning_rate": 0.00019936624072547406, |
| "loss": 1.0827, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.01742230787761985, |
| "grad_norm": 0.1624906212091446, |
| "learning_rate": 0.00019935593569661998, |
| "loss": 1.0856, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.017576487593351, |
| "grad_norm": 0.1380940079689026, |
| "learning_rate": 0.0001993456306677659, |
| "loss": 1.0514, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.017730667309082147, |
| "grad_norm": 0.13712120056152344, |
| "learning_rate": 0.0001993353256389118, |
| "loss": 1.0977, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.0178848470248133, |
| "grad_norm": 0.1448957622051239, |
| "learning_rate": 0.00019932502061005772, |
| "loss": 1.0729, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.01803902674054445, |
| "grad_norm": 0.13421876728534698, |
| "learning_rate": 0.00019931471558120364, |
| "loss": 1.0879, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.018193206456275597, |
| "grad_norm": 0.16884732246398926, |
| "learning_rate": 0.00019930441055234955, |
| "loss": 1.1159, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.018347386172006746, |
| "grad_norm": 0.14634890854358673, |
| "learning_rate": 0.00019929410552349547, |
| "loss": 1.0568, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.018501565887737895, |
| "grad_norm": 0.16796648502349854, |
| "learning_rate": 0.00019928380049464138, |
| "loss": 1.0944, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.018655745603469043, |
| "grad_norm": 0.13724717497825623, |
| "learning_rate": 0.0001992734954657873, |
| "loss": 1.0609, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.018809925319200192, |
| "grad_norm": 0.14133594930171967, |
| "learning_rate": 0.0001992631904369332, |
| "loss": 1.0879, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.01896410503493134, |
| "grad_norm": 0.1611246019601822, |
| "learning_rate": 0.00019925288540807915, |
| "loss": 1.0681, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.01911828475066249, |
| "grad_norm": 0.17420877516269684, |
| "learning_rate": 0.00019924258037922507, |
| "loss": 1.1336, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.01927246446639364, |
| "grad_norm": 0.13766029477119446, |
| "learning_rate": 0.00019923227535037098, |
| "loss": 1.075, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.01942664418212479, |
| "grad_norm": 0.1691662222146988, |
| "learning_rate": 0.0001992219703215169, |
| "loss": 1.1369, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.01958082389785594, |
| "grad_norm": 0.14959432184696198, |
| "learning_rate": 0.0001992116652926628, |
| "loss": 1.1129, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.01973500361358709, |
| "grad_norm": 0.14996406435966492, |
| "learning_rate": 0.00019920136026380875, |
| "loss": 1.0304, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.019889183329318237, |
| "grad_norm": 0.13211801648139954, |
| "learning_rate": 0.00019919105523495467, |
| "loss": 1.0652, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.020043363045049386, |
| "grad_norm": 0.16041967272758484, |
| "learning_rate": 0.00019918075020610058, |
| "loss": 1.077, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.020197542760780535, |
| "grad_norm": 0.1524546593427658, |
| "learning_rate": 0.0001991704451772465, |
| "loss": 1.1176, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.020351722476511683, |
| "grad_norm": 0.16032540798187256, |
| "learning_rate": 0.00019916014014839241, |
| "loss": 1.0736, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.020505902192242832, |
| "grad_norm": 0.17891019582748413, |
| "learning_rate": 0.00019914983511953836, |
| "loss": 1.1435, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.02066008190797398, |
| "grad_norm": 0.14484059810638428, |
| "learning_rate": 0.00019913953009068427, |
| "loss": 1.0356, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.02081426162370513, |
| "grad_norm": 0.14321155846118927, |
| "learning_rate": 0.00019912922506183019, |
| "loss": 1.0536, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.020968441339436282, |
| "grad_norm": 0.17357808351516724, |
| "learning_rate": 0.0001991189200329761, |
| "loss": 1.171, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.02112262105516743, |
| "grad_norm": 0.13990800082683563, |
| "learning_rate": 0.00019910861500412202, |
| "loss": 1.0946, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.02127680077089858, |
| "grad_norm": 0.16634231805801392, |
| "learning_rate": 0.00019909830997526796, |
| "loss": 1.1029, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.02143098048662973, |
| "grad_norm": 0.16322381794452667, |
| "learning_rate": 0.00019908800494641387, |
| "loss": 1.0688, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.021585160202360877, |
| "grad_norm": 0.1652844250202179, |
| "learning_rate": 0.0001990776999175598, |
| "loss": 1.1237, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.021739339918092026, |
| "grad_norm": 0.14457885921001434, |
| "learning_rate": 0.0001990673948887057, |
| "loss": 1.1995, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.021893519633823175, |
| "grad_norm": 0.15549878776073456, |
| "learning_rate": 0.00019905708985985162, |
| "loss": 1.0475, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.022047699349554323, |
| "grad_norm": 0.15715502202510834, |
| "learning_rate": 0.00019904678483099756, |
| "loss": 1.1211, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.022201879065285472, |
| "grad_norm": 0.14022529125213623, |
| "learning_rate": 0.00019903647980214347, |
| "loss": 1.1056, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.02235605878101662, |
| "grad_norm": 0.13293786346912384, |
| "learning_rate": 0.0001990261747732894, |
| "loss": 1.0877, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.022510238496747773, |
| "grad_norm": 0.14625073969364166, |
| "learning_rate": 0.0001990158697444353, |
| "loss": 1.0375, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.022664418212478922, |
| "grad_norm": 0.1417943835258484, |
| "learning_rate": 0.0001990055647155812, |
| "loss": 1.091, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.02281859792821007, |
| "grad_norm": 0.1519964039325714, |
| "learning_rate": 0.00019899525968672713, |
| "loss": 1.0396, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.02297277764394122, |
| "grad_norm": 0.1676655411720276, |
| "learning_rate": 0.00019898495465787305, |
| "loss": 1.1249, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.02312695735967237, |
| "grad_norm": 0.1487220674753189, |
| "learning_rate": 0.00019897464962901896, |
| "loss": 1.1768, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.02312695735967237, |
| "eval_loss": 1.1061022281646729, |
| "eval_runtime": 185.239, |
| "eval_samples_per_second": 91.466, |
| "eval_steps_per_second": 1.431, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.023281137075403517, |
| "grad_norm": 0.1399739533662796, |
| "learning_rate": 0.00019896434460016488, |
| "loss": 1.0962, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.023435316791134666, |
| "grad_norm": 0.15282337367534637, |
| "learning_rate": 0.0001989540395713108, |
| "loss": 1.1688, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.023589496506865815, |
| "grad_norm": 0.15459619462490082, |
| "learning_rate": 0.00019894373454245674, |
| "loss": 1.0216, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.023743676222596963, |
| "grad_norm": 0.15799634158611298, |
| "learning_rate": 0.00019893342951360265, |
| "loss": 1.1429, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.023897855938328112, |
| "grad_norm": 0.1343819946050644, |
| "learning_rate": 0.00019892312448474857, |
| "loss": 1.0959, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.024052035654059264, |
| "grad_norm": 0.14791317284107208, |
| "learning_rate": 0.00019891281945589448, |
| "loss": 1.0636, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.024206215369790413, |
| "grad_norm": 0.1442137360572815, |
| "learning_rate": 0.0001989025144270404, |
| "loss": 1.055, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.024360395085521562, |
| "grad_norm": 0.14649145305156708, |
| "learning_rate": 0.00019889220939818634, |
| "loss": 1.0906, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.02451457480125271, |
| "grad_norm": 0.14234665036201477, |
| "learning_rate": 0.00019888190436933225, |
| "loss": 1.0853, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.02466875451698386, |
| "grad_norm": 0.1419668048620224, |
| "learning_rate": 0.00019887159934047817, |
| "loss": 1.0296, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.02482293423271501, |
| "grad_norm": 0.14730845391750336, |
| "learning_rate": 0.00019886129431162408, |
| "loss": 1.0421, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.024977113948446157, |
| "grad_norm": 0.1400081068277359, |
| "learning_rate": 0.00019885098928277, |
| "loss": 1.0291, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.025131293664177306, |
| "grad_norm": 0.15542668104171753, |
| "learning_rate": 0.0001988406842539159, |
| "loss": 1.0597, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.025285473379908455, |
| "grad_norm": 0.14521440863609314, |
| "learning_rate": 0.00019883037922506185, |
| "loss": 1.0491, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.025439653095639603, |
| "grad_norm": 0.16224826872348785, |
| "learning_rate": 0.00019882007419620777, |
| "loss": 1.1031, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.025593832811370756, |
| "grad_norm": 0.15028877556324005, |
| "learning_rate": 0.00019880976916735368, |
| "loss": 1.1154, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.025748012527101904, |
| "grad_norm": 0.12962941825389862, |
| "learning_rate": 0.0001987994641384996, |
| "loss": 1.0363, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.025902192242833053, |
| "grad_norm": 0.14908359944820404, |
| "learning_rate": 0.0001987891591096455, |
| "loss": 1.1513, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.026056371958564202, |
| "grad_norm": 0.15441828966140747, |
| "learning_rate": 0.00019877885408079146, |
| "loss": 1.1303, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.02621055167429535, |
| "grad_norm": 0.12669101357460022, |
| "learning_rate": 0.00019876854905193737, |
| "loss": 1.0875, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.0263647313900265, |
| "grad_norm": 0.13190661370754242, |
| "learning_rate": 0.00019875824402308329, |
| "loss": 1.0778, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.02651891110575765, |
| "grad_norm": 0.14043989777565002, |
| "learning_rate": 0.0001987479389942292, |
| "loss": 1.1011, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.026673090821488797, |
| "grad_norm": 0.13694870471954346, |
| "learning_rate": 0.00019873763396537512, |
| "loss": 1.0532, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.026827270537219946, |
| "grad_norm": 0.15089921653270721, |
| "learning_rate": 0.00019872732893652103, |
| "loss": 1.1292, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.026981450252951095, |
| "grad_norm": 0.14839838445186615, |
| "learning_rate": 0.00019871702390766694, |
| "loss": 1.0275, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.027135629968682247, |
| "grad_norm": 0.16198500990867615, |
| "learning_rate": 0.00019870671887881286, |
| "loss": 1.1453, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.027289809684413396, |
| "grad_norm": 0.14694632589817047, |
| "learning_rate": 0.00019869641384995877, |
| "loss": 1.129, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.027443989400144544, |
| "grad_norm": 0.16091379523277283, |
| "learning_rate": 0.0001986861088211047, |
| "loss": 1.1186, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.027598169115875693, |
| "grad_norm": 0.144720658659935, |
| "learning_rate": 0.00019867580379225063, |
| "loss": 1.0224, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.027752348831606842, |
| "grad_norm": 0.13851307332515717, |
| "learning_rate": 0.00019866549876339655, |
| "loss": 1.1421, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.02790652854733799, |
| "grad_norm": 0.13124969601631165, |
| "learning_rate": 0.00019865519373454246, |
| "loss": 1.0938, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.02806070826306914, |
| "grad_norm": 0.14723828434944153, |
| "learning_rate": 0.00019864488870568838, |
| "loss": 1.1335, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.02821488797880029, |
| "grad_norm": 0.17669795453548431, |
| "learning_rate": 0.0001986345836768343, |
| "loss": 1.0765, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.028369067694531437, |
| "grad_norm": 0.1457260102033615, |
| "learning_rate": 0.00019862427864798023, |
| "loss": 1.1073, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.028523247410262586, |
| "grad_norm": 0.13594554364681244, |
| "learning_rate": 0.00019861397361912615, |
| "loss": 1.0587, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.028677427125993738, |
| "grad_norm": 0.13798941671848297, |
| "learning_rate": 0.00019860366859027206, |
| "loss": 1.0833, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.028831606841724887, |
| "grad_norm": 0.15587519109249115, |
| "learning_rate": 0.00019859336356141798, |
| "loss": 1.0287, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.028985786557456036, |
| "grad_norm": 0.16585086286067963, |
| "learning_rate": 0.0001985830585325639, |
| "loss": 1.1786, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.029139966273187184, |
| "grad_norm": 0.1444484293460846, |
| "learning_rate": 0.00019857275350370983, |
| "loss": 1.1793, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.029294145988918333, |
| "grad_norm": 0.14413981139659882, |
| "learning_rate": 0.00019856244847485575, |
| "loss": 1.1141, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.029448325704649482, |
| "grad_norm": 0.142032191157341, |
| "learning_rate": 0.00019855214344600166, |
| "loss": 1.1033, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.02960250542038063, |
| "grad_norm": 0.1490195393562317, |
| "learning_rate": 0.00019854183841714758, |
| "loss": 1.1592, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.02975668513611178, |
| "grad_norm": 0.1408643275499344, |
| "learning_rate": 0.0001985315333882935, |
| "loss": 1.1505, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.02991086485184293, |
| "grad_norm": 0.12526237964630127, |
| "learning_rate": 0.00019852122835943944, |
| "loss": 1.1027, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.030065044567574077, |
| "grad_norm": 0.1339711844921112, |
| "learning_rate": 0.00019851092333058535, |
| "loss": 1.1238, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.03021922428330523, |
| "grad_norm": 0.13032345473766327, |
| "learning_rate": 0.00019850061830173127, |
| "loss": 1.1121, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.030373403999036378, |
| "grad_norm": 0.15815846621990204, |
| "learning_rate": 0.00019849031327287718, |
| "loss": 1.168, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.030527583714767527, |
| "grad_norm": 0.14245116710662842, |
| "learning_rate": 0.0001984800082440231, |
| "loss": 1.0436, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.030681763430498676, |
| "grad_norm": 0.15660050511360168, |
| "learning_rate": 0.000198469703215169, |
| "loss": 1.158, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.030835943146229824, |
| "grad_norm": 0.1654158979654312, |
| "learning_rate": 0.00019845939818631493, |
| "loss": 1.0802, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.030835943146229824, |
| "eval_loss": 1.1026971340179443, |
| "eval_runtime": 185.7295, |
| "eval_samples_per_second": 91.224, |
| "eval_steps_per_second": 1.427, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.030990122861960973, |
| "grad_norm": 0.13845407962799072, |
| "learning_rate": 0.00019844909315746084, |
| "loss": 1.1055, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.031144302577692122, |
| "grad_norm": 0.14852891862392426, |
| "learning_rate": 0.00019843878812860676, |
| "loss": 1.0983, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.031298482293423274, |
| "grad_norm": 0.13408593833446503, |
| "learning_rate": 0.00019842848309975267, |
| "loss": 1.1063, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.03145266200915442, |
| "grad_norm": 0.14041072130203247, |
| "learning_rate": 0.00019841817807089859, |
| "loss": 1.0327, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.03160684172488557, |
| "grad_norm": 0.16119754314422607, |
| "learning_rate": 0.00019840787304204453, |
| "loss": 1.1, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.03176102144061672, |
| "grad_norm": 0.14471223950386047, |
| "learning_rate": 0.00019839756801319044, |
| "loss": 1.0783, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.03191520115634787, |
| "grad_norm": 0.15591050684452057, |
| "learning_rate": 0.00019838726298433636, |
| "loss": 1.1782, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.032069380872079015, |
| "grad_norm": 0.1766556203365326, |
| "learning_rate": 0.00019837695795548227, |
| "loss": 1.1063, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.03222356058781017, |
| "grad_norm": 0.16078630089759827, |
| "learning_rate": 0.0001983666529266282, |
| "loss": 1.0891, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.03237774030354131, |
| "grad_norm": 0.13378402590751648, |
| "learning_rate": 0.00019835634789777413, |
| "loss": 1.074, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.032531920019272464, |
| "grad_norm": 0.14526261389255524, |
| "learning_rate": 0.00019834604286892004, |
| "loss": 1.108, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.03268609973500362, |
| "grad_norm": 0.1321713775396347, |
| "learning_rate": 0.00019833573784006596, |
| "loss": 1.019, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.03284027945073476, |
| "grad_norm": 0.12685374915599823, |
| "learning_rate": 0.00019832543281121187, |
| "loss": 1.09, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.032994459166465914, |
| "grad_norm": 0.13825605809688568, |
| "learning_rate": 0.0001983151277823578, |
| "loss": 1.1356, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.03314863888219706, |
| "grad_norm": 0.13683827221393585, |
| "learning_rate": 0.00019830482275350373, |
| "loss": 1.1405, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.03330281859792821, |
| "grad_norm": 0.16707143187522888, |
| "learning_rate": 0.00019829451772464965, |
| "loss": 1.1305, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.03345699831365936, |
| "grad_norm": 0.11735045164823532, |
| "learning_rate": 0.00019828421269579556, |
| "loss": 1.0421, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.03361117802939051, |
| "grad_norm": 0.1337989866733551, |
| "learning_rate": 0.00019827390766694148, |
| "loss": 1.0572, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.033765357745121655, |
| "grad_norm": 0.17111611366271973, |
| "learning_rate": 0.0001982636026380874, |
| "loss": 1.1698, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.03391953746085281, |
| "grad_norm": 0.13785259425640106, |
| "learning_rate": 0.00019825329760923333, |
| "loss": 1.056, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.03407371717658395, |
| "grad_norm": 0.15061460435390472, |
| "learning_rate": 0.00019824299258037925, |
| "loss": 1.0963, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.034227896892315104, |
| "grad_norm": 0.1231001690030098, |
| "learning_rate": 0.00019823268755152516, |
| "loss": 1.1264, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.03438207660804626, |
| "grad_norm": 0.13752298057079315, |
| "learning_rate": 0.00019822238252267108, |
| "loss": 1.0672, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.0345362563237774, |
| "grad_norm": 0.13519813120365143, |
| "learning_rate": 0.000198212077493817, |
| "loss": 1.0882, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.034690436039508554, |
| "grad_norm": 0.140150785446167, |
| "learning_rate": 0.0001982017724649629, |
| "loss": 1.0572, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.0348446157552397, |
| "grad_norm": 0.13910406827926636, |
| "learning_rate": 0.00019819146743610882, |
| "loss": 1.0762, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.03499879547097085, |
| "grad_norm": 0.14587442576885223, |
| "learning_rate": 0.00019818116240725474, |
| "loss": 1.1232, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.035152975186702, |
| "grad_norm": 0.14476893842220306, |
| "learning_rate": 0.00019817085737840065, |
| "loss": 1.1004, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.03530715490243315, |
| "grad_norm": 0.13861101865768433, |
| "learning_rate": 0.00019816055234954657, |
| "loss": 1.0302, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.035461334618164295, |
| "grad_norm": 0.14342686533927917, |
| "learning_rate": 0.0001981502473206925, |
| "loss": 1.1092, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.03561551433389545, |
| "grad_norm": 0.11709775030612946, |
| "learning_rate": 0.00019813994229183842, |
| "loss": 1.0463, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.0357696940496266, |
| "grad_norm": 0.15154917538166046, |
| "learning_rate": 0.00019812963726298434, |
| "loss": 1.0897, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.035923873765357744, |
| "grad_norm": 0.16716259717941284, |
| "learning_rate": 0.00019811933223413025, |
| "loss": 1.1214, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.0360780534810889, |
| "grad_norm": 0.13513320684432983, |
| "learning_rate": 0.00019810902720527617, |
| "loss": 1.0623, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.03623223319682004, |
| "grad_norm": 0.15930432081222534, |
| "learning_rate": 0.0001980987221764221, |
| "loss": 1.1092, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.036386412912551194, |
| "grad_norm": 0.13990509510040283, |
| "learning_rate": 0.00019808841714756803, |
| "loss": 1.1048, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.03654059262828234, |
| "grad_norm": 0.18784300982952118, |
| "learning_rate": 0.00019807811211871394, |
| "loss": 1.1676, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.03669477234401349, |
| "grad_norm": 0.152045339345932, |
| "learning_rate": 0.00019806780708985986, |
| "loss": 1.1303, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.03684895205974464, |
| "grad_norm": 0.1409967988729477, |
| "learning_rate": 0.00019805750206100577, |
| "loss": 1.0972, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.03700313177547579, |
| "grad_norm": 0.13838854432106018, |
| "learning_rate": 0.0001980471970321517, |
| "loss": 1.101, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.037157311491206935, |
| "grad_norm": 0.1579430103302002, |
| "learning_rate": 0.00019803689200329763, |
| "loss": 1.1077, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.03731149120693809, |
| "grad_norm": 0.15061910450458527, |
| "learning_rate": 0.00019802658697444354, |
| "loss": 1.1239, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.03746567092266924, |
| "grad_norm": 0.16408291459083557, |
| "learning_rate": 0.00019801628194558946, |
| "loss": 1.0961, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.037619850638400384, |
| "grad_norm": 0.15612424910068512, |
| "learning_rate": 0.00019800597691673537, |
| "loss": 1.1299, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.03777403035413154, |
| "grad_norm": 0.14135530591011047, |
| "learning_rate": 0.00019799567188788131, |
| "loss": 1.0489, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.03792821006986268, |
| "grad_norm": 0.13743548095226288, |
| "learning_rate": 0.00019798536685902723, |
| "loss": 1.0837, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.038082389785593834, |
| "grad_norm": 0.157401442527771, |
| "learning_rate": 0.00019797506183017314, |
| "loss": 1.0573, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.03823656950132498, |
| "grad_norm": 0.14982052147388458, |
| "learning_rate": 0.00019796475680131906, |
| "loss": 1.0839, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.03839074921705613, |
| "grad_norm": 0.1347000151872635, |
| "learning_rate": 0.00019795445177246497, |
| "loss": 1.113, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.03854492893278728, |
| "grad_norm": 0.14478904008865356, |
| "learning_rate": 0.0001979441467436109, |
| "loss": 1.0514, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.03854492893278728, |
| "eval_loss": 1.1000746488571167, |
| "eval_runtime": 185.5217, |
| "eval_samples_per_second": 91.326, |
| "eval_steps_per_second": 1.428, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.03869910864851843, |
| "grad_norm": 0.14274291694164276, |
| "learning_rate": 0.00019793384171475683, |
| "loss": 1.0847, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.03885328836424958, |
| "grad_norm": 0.14326965808868408, |
| "learning_rate": 0.00019792353668590275, |
| "loss": 1.0865, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.03900746807998073, |
| "grad_norm": 0.1575518548488617, |
| "learning_rate": 0.00019791323165704866, |
| "loss": 1.1258, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.03916164779571188, |
| "grad_norm": 0.14699862897396088, |
| "learning_rate": 0.00019790292662819458, |
| "loss": 1.1687, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.039315827511443024, |
| "grad_norm": 0.1394687294960022, |
| "learning_rate": 0.0001978926215993405, |
| "loss": 1.1214, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.03947000722717418, |
| "grad_norm": 0.14366985857486725, |
| "learning_rate": 0.0001978823165704864, |
| "loss": 1.0651, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.03962418694290532, |
| "grad_norm": 0.14171218872070312, |
| "learning_rate": 0.00019787201154163232, |
| "loss": 1.1398, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.039778366658636474, |
| "grad_norm": 0.13258612155914307, |
| "learning_rate": 0.00019786170651277824, |
| "loss": 1.1234, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.03993254637436762, |
| "grad_norm": 0.17693160474300385, |
| "learning_rate": 0.00019785140148392415, |
| "loss": 1.1121, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.04008672609009877, |
| "grad_norm": 0.143838569521904, |
| "learning_rate": 0.00019784109645507006, |
| "loss": 1.102, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.04024090580582992, |
| "grad_norm": 0.14078038930892944, |
| "learning_rate": 0.000197830791426216, |
| "loss": 1.1044, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.04039508552156107, |
| "grad_norm": 0.12367985397577286, |
| "learning_rate": 0.00019782048639736192, |
| "loss": 1.102, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.04054926523729222, |
| "grad_norm": 0.136929452419281, |
| "learning_rate": 0.00019781018136850784, |
| "loss": 1.0802, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.04070344495302337, |
| "grad_norm": 0.15831957757472992, |
| "learning_rate": 0.00019779987633965375, |
| "loss": 1.09, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.04085762466875452, |
| "grad_norm": 0.15482452511787415, |
| "learning_rate": 0.00019778957131079967, |
| "loss": 1.0828, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.041011804384485664, |
| "grad_norm": 0.13797122240066528, |
| "learning_rate": 0.0001977792662819456, |
| "loss": 1.1263, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.04116598410021682, |
| "grad_norm": 0.18304814398288727, |
| "learning_rate": 0.00019776896125309152, |
| "loss": 1.0991, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.04132016381594796, |
| "grad_norm": 0.1509987860918045, |
| "learning_rate": 0.00019775865622423744, |
| "loss": 1.0804, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.041474343531679114, |
| "grad_norm": 0.13406258821487427, |
| "learning_rate": 0.00019774835119538335, |
| "loss": 1.0348, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.04162852324741026, |
| "grad_norm": 0.1413736194372177, |
| "learning_rate": 0.00019773804616652927, |
| "loss": 1.066, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.04178270296314141, |
| "grad_norm": 0.1451394259929657, |
| "learning_rate": 0.0001977277411376752, |
| "loss": 1.0485, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.041936882678872564, |
| "grad_norm": 0.13275358080863953, |
| "learning_rate": 0.00019771743610882113, |
| "loss": 1.1164, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.04209106239460371, |
| "grad_norm": 0.15869611501693726, |
| "learning_rate": 0.00019770713107996704, |
| "loss": 1.1361, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.04224524211033486, |
| "grad_norm": 0.14091487228870392, |
| "learning_rate": 0.00019769682605111295, |
| "loss": 1.061, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.04239942182606601, |
| "grad_norm": 0.13538867235183716, |
| "learning_rate": 0.00019768652102225887, |
| "loss": 1.0607, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.04255360154179716, |
| "grad_norm": 0.15626317262649536, |
| "learning_rate": 0.0001976762159934048, |
| "loss": 1.0758, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.042707781257528304, |
| "grad_norm": 0.1293731927871704, |
| "learning_rate": 0.00019766591096455073, |
| "loss": 1.0434, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.04286196097325946, |
| "grad_norm": 0.13498535752296448, |
| "learning_rate": 0.00019765560593569664, |
| "loss": 1.0953, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.0430161406889906, |
| "grad_norm": 0.14134527742862701, |
| "learning_rate": 0.00019764530090684256, |
| "loss": 1.1559, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.043170320404721754, |
| "grad_norm": 0.13958705961704254, |
| "learning_rate": 0.00019763499587798847, |
| "loss": 1.2585, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.0433245001204529, |
| "grad_norm": 0.2181047797203064, |
| "learning_rate": 0.0001976246908491344, |
| "loss": 1.0164, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.04347867983618405, |
| "grad_norm": 0.1365436315536499, |
| "learning_rate": 0.0001976143858202803, |
| "loss": 1.124, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.043632859551915204, |
| "grad_norm": 0.12809793651103973, |
| "learning_rate": 0.00019760408079142622, |
| "loss": 1.0378, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.04378703926764635, |
| "grad_norm": 0.12341924756765366, |
| "learning_rate": 0.00019759377576257213, |
| "loss": 1.1091, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.0439412189833775, |
| "grad_norm": 0.14291982352733612, |
| "learning_rate": 0.00019758347073371805, |
| "loss": 1.1366, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.04409539869910865, |
| "grad_norm": 0.14486652612686157, |
| "learning_rate": 0.000197573165704864, |
| "loss": 1.0168, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.0442495784148398, |
| "grad_norm": 0.1724916249513626, |
| "learning_rate": 0.0001975628606760099, |
| "loss": 1.1037, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.044403758130570944, |
| "grad_norm": 0.13338427245616913, |
| "learning_rate": 0.00019755255564715582, |
| "loss": 1.0259, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.0445579378463021, |
| "grad_norm": 0.1372508853673935, |
| "learning_rate": 0.00019754225061830173, |
| "loss": 1.0784, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.04471211756203324, |
| "grad_norm": 0.11633725464344025, |
| "learning_rate": 0.00019753194558944765, |
| "loss": 1.0648, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.044866297277764394, |
| "grad_norm": 0.14386776089668274, |
| "learning_rate": 0.00019752164056059356, |
| "loss": 1.0777, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.045020476993495546, |
| "grad_norm": 0.14929193258285522, |
| "learning_rate": 0.0001975113355317395, |
| "loss": 1.1319, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.04517465670922669, |
| "grad_norm": 0.1324220448732376, |
| "learning_rate": 0.00019750103050288542, |
| "loss": 1.0614, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.045328836424957844, |
| "grad_norm": 0.1392926126718521, |
| "learning_rate": 0.00019749072547403133, |
| "loss": 1.142, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.04548301614068899, |
| "grad_norm": 0.2632090151309967, |
| "learning_rate": 0.00019748042044517725, |
| "loss": 1.0159, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.04563719585642014, |
| "grad_norm": 0.13699129223823547, |
| "learning_rate": 0.00019747011541632316, |
| "loss": 1.0778, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.04579137557215129, |
| "grad_norm": 0.13768675923347473, |
| "learning_rate": 0.0001974598103874691, |
| "loss": 1.0719, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.04594555528788244, |
| "grad_norm": 0.13458684086799622, |
| "learning_rate": 0.00019744950535861502, |
| "loss": 1.0145, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.046099735003613584, |
| "grad_norm": 0.1772696077823639, |
| "learning_rate": 0.00019743920032976094, |
| "loss": 1.0629, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.04625391471934474, |
| "grad_norm": 0.13998697698116302, |
| "learning_rate": 0.00019742889530090685, |
| "loss": 1.102, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.04625391471934474, |
| "eval_loss": 1.098169207572937, |
| "eval_runtime": 185.5141, |
| "eval_samples_per_second": 91.33, |
| "eval_steps_per_second": 1.428, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.04640809443507588, |
| "grad_norm": 0.13928066194057465, |
| "learning_rate": 0.00019741859027205277, |
| "loss": 1.1527, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.046562274150807034, |
| "grad_norm": 0.13011601567268372, |
| "learning_rate": 0.0001974082852431987, |
| "loss": 1.1259, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.046716453866538186, |
| "grad_norm": 0.1306074559688568, |
| "learning_rate": 0.00019739798021434462, |
| "loss": 1.0951, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.04687063358226933, |
| "grad_norm": 0.14797037839889526, |
| "learning_rate": 0.00019738767518549054, |
| "loss": 1.0321, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.047024813298000484, |
| "grad_norm": 0.14849938452243805, |
| "learning_rate": 0.00019737737015663645, |
| "loss": 1.1096, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.04717899301373163, |
| "grad_norm": 0.12060682475566864, |
| "learning_rate": 0.00019736706512778237, |
| "loss": 1.0652, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.04733317272946278, |
| "grad_norm": 0.12754854559898376, |
| "learning_rate": 0.00019735676009892828, |
| "loss": 1.1097, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.04748735244519393, |
| "grad_norm": 0.12162326276302338, |
| "learning_rate": 0.0001973464550700742, |
| "loss": 1.1087, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.04764153216092508, |
| "grad_norm": 0.175630122423172, |
| "learning_rate": 0.0001973361500412201, |
| "loss": 1.0723, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.047795711876656224, |
| "grad_norm": 0.15365472435951233, |
| "learning_rate": 0.00019732584501236603, |
| "loss": 1.1009, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.04794989159238738, |
| "grad_norm": 0.13359837234020233, |
| "learning_rate": 0.00019731553998351194, |
| "loss": 1.0974, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.04810407130811853, |
| "grad_norm": 0.1482960432767868, |
| "learning_rate": 0.00019730523495465788, |
| "loss": 1.1214, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.048258251023849674, |
| "grad_norm": 0.1309668868780136, |
| "learning_rate": 0.0001972949299258038, |
| "loss": 1.0849, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.048412430739580826, |
| "grad_norm": 0.1544414609670639, |
| "learning_rate": 0.00019728462489694971, |
| "loss": 1.092, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.04856661045531197, |
| "grad_norm": 0.14907146990299225, |
| "learning_rate": 0.00019727431986809563, |
| "loss": 1.0671, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.048720790171043124, |
| "grad_norm": 0.16943813860416412, |
| "learning_rate": 0.00019726401483924154, |
| "loss": 1.1433, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.04887496988677427, |
| "grad_norm": 0.14070230722427368, |
| "learning_rate": 0.00019725370981038749, |
| "loss": 1.1613, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.04902914960250542, |
| "grad_norm": 0.15507204830646515, |
| "learning_rate": 0.0001972434047815334, |
| "loss": 1.1286, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.04918332931823657, |
| "grad_norm": 0.13587893545627594, |
| "learning_rate": 0.00019723309975267932, |
| "loss": 1.1094, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.04933750903396772, |
| "grad_norm": 0.12399852275848389, |
| "learning_rate": 0.00019722279472382523, |
| "loss": 1.058, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.049491688749698864, |
| "grad_norm": 0.12497518211603165, |
| "learning_rate": 0.00019721248969497115, |
| "loss": 1.0716, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.04964586846543002, |
| "grad_norm": 0.15282607078552246, |
| "learning_rate": 0.0001972021846661171, |
| "loss": 1.0912, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.04980004818116117, |
| "grad_norm": 0.14203013479709625, |
| "learning_rate": 0.000197191879637263, |
| "loss": 1.0846, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.049954227896892314, |
| "grad_norm": 0.12308704853057861, |
| "learning_rate": 0.00019718157460840892, |
| "loss": 1.1202, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.050108407612623466, |
| "grad_norm": 0.15226681530475616, |
| "learning_rate": 0.00019717126957955483, |
| "loss": 1.0626, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.05026258732835461, |
| "grad_norm": 0.12636694312095642, |
| "learning_rate": 0.00019716096455070075, |
| "loss": 1.1086, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.050416767044085764, |
| "grad_norm": 0.14969666302204132, |
| "learning_rate": 0.0001971506595218467, |
| "loss": 1.1602, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.05057094675981691, |
| "grad_norm": 0.130833700299263, |
| "learning_rate": 0.0001971403544929926, |
| "loss": 1.0657, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.05072512647554806, |
| "grad_norm": 0.1283751279115677, |
| "learning_rate": 0.00019713004946413852, |
| "loss": 1.0371, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.05087930619127921, |
| "grad_norm": 0.11827697604894638, |
| "learning_rate": 0.00019711974443528443, |
| "loss": 1.0308, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.05103348590701036, |
| "grad_norm": 0.12265590578317642, |
| "learning_rate": 0.00019710943940643035, |
| "loss": 1.1127, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.05118766562274151, |
| "grad_norm": 0.13979150354862213, |
| "learning_rate": 0.0001970991343775763, |
| "loss": 1.1011, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.05134184533847266, |
| "grad_norm": 0.1368461698293686, |
| "learning_rate": 0.0001970888293487222, |
| "loss": 1.0857, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.05149602505420381, |
| "grad_norm": 0.13669301569461823, |
| "learning_rate": 0.00019707852431986812, |
| "loss": 1.0971, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.051650204769934954, |
| "grad_norm": 0.12659449875354767, |
| "learning_rate": 0.00019706821929101404, |
| "loss": 1.0556, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.051804384485666106, |
| "grad_norm": 0.14103113114833832, |
| "learning_rate": 0.00019705791426215995, |
| "loss": 1.0913, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.05195856420139725, |
| "grad_norm": 0.16134017705917358, |
| "learning_rate": 0.00019704760923330587, |
| "loss": 1.0994, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.052112743917128404, |
| "grad_norm": 0.12725086510181427, |
| "learning_rate": 0.00019703730420445178, |
| "loss": 1.1008, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.05226692363285955, |
| "grad_norm": 0.12865908443927765, |
| "learning_rate": 0.0001970269991755977, |
| "loss": 1.0186, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.0524211033485907, |
| "grad_norm": 0.1661859154701233, |
| "learning_rate": 0.0001970166941467436, |
| "loss": 1.068, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.05257528306432185, |
| "grad_norm": 0.14370663464069366, |
| "learning_rate": 0.00019700638911788953, |
| "loss": 1.102, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.052729462780053, |
| "grad_norm": 0.13285204768180847, |
| "learning_rate": 0.00019699608408903544, |
| "loss": 1.1055, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.05288364249578415, |
| "grad_norm": 0.17762747406959534, |
| "learning_rate": 0.00019698577906018138, |
| "loss": 1.1601, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.0530378222115153, |
| "grad_norm": 0.12693317234516144, |
| "learning_rate": 0.0001969754740313273, |
| "loss": 1.0494, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.05319200192724645, |
| "grad_norm": 0.1302707940340042, |
| "learning_rate": 0.0001969651690024732, |
| "loss": 1.066, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.053346181642977594, |
| "grad_norm": 0.11844471096992493, |
| "learning_rate": 0.00019695486397361913, |
| "loss": 1.0085, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.053500361358708746, |
| "grad_norm": 0.12299422174692154, |
| "learning_rate": 0.00019694455894476504, |
| "loss": 1.0985, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.05365454107443989, |
| "grad_norm": 0.1222420409321785, |
| "learning_rate": 0.00019693425391591098, |
| "loss": 1.0648, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.053808720790171044, |
| "grad_norm": 0.13273879885673523, |
| "learning_rate": 0.0001969239488870569, |
| "loss": 1.1108, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.05396290050590219, |
| "grad_norm": 0.13202215731143951, |
| "learning_rate": 0.00019691364385820281, |
| "loss": 1.1013, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.05396290050590219, |
| "eval_loss": 1.0964874029159546, |
| "eval_runtime": 185.3303, |
| "eval_samples_per_second": 91.421, |
| "eval_steps_per_second": 1.43, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.05411708022163334, |
| "grad_norm": 0.13038010895252228, |
| "learning_rate": 0.00019690333882934873, |
| "loss": 1.0642, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.054271259937364494, |
| "grad_norm": 0.18084144592285156, |
| "learning_rate": 0.00019689303380049464, |
| "loss": 1.0673, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.05442543965309564, |
| "grad_norm": 0.18958036601543427, |
| "learning_rate": 0.00019688272877164059, |
| "loss": 1.0925, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.05457961936882679, |
| "grad_norm": 0.13386841118335724, |
| "learning_rate": 0.0001968724237427865, |
| "loss": 1.0978, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.05473379908455794, |
| "grad_norm": 0.1408504843711853, |
| "learning_rate": 0.00019686211871393242, |
| "loss": 1.1158, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.05488797880028909, |
| "grad_norm": 0.12006545811891556, |
| "learning_rate": 0.00019685181368507833, |
| "loss": 1.0395, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.055042158516020234, |
| "grad_norm": 0.13973191380500793, |
| "learning_rate": 0.00019684150865622425, |
| "loss": 1.0685, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.055196338231751386, |
| "grad_norm": 0.14461107552051544, |
| "learning_rate": 0.0001968312036273702, |
| "loss": 1.0924, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.05535051794748253, |
| "grad_norm": 0.13358595967292786, |
| "learning_rate": 0.0001968208985985161, |
| "loss": 1.0479, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.055504697663213684, |
| "grad_norm": 0.13416843116283417, |
| "learning_rate": 0.00019681059356966202, |
| "loss": 1.0166, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.05565887737894483, |
| "grad_norm": 0.15217959880828857, |
| "learning_rate": 0.00019680028854080793, |
| "loss": 1.0918, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.05581305709467598, |
| "grad_norm": 0.13012762367725372, |
| "learning_rate": 0.00019678998351195385, |
| "loss": 1.0967, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.055967236810407134, |
| "grad_norm": 0.13023535907268524, |
| "learning_rate": 0.00019677967848309976, |
| "loss": 1.0247, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.05612141652613828, |
| "grad_norm": 0.13703665137290955, |
| "learning_rate": 0.00019676937345424568, |
| "loss": 1.0969, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.05627559624186943, |
| "grad_norm": 0.12767066061496735, |
| "learning_rate": 0.0001967590684253916, |
| "loss": 1.08, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.05642977595760058, |
| "grad_norm": 0.12238382548093796, |
| "learning_rate": 0.0001967487633965375, |
| "loss": 1.1233, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.05658395567333173, |
| "grad_norm": 0.1356974095106125, |
| "learning_rate": 0.00019673845836768342, |
| "loss": 1.0439, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.056738135389062874, |
| "grad_norm": 0.14199669659137726, |
| "learning_rate": 0.00019672815333882936, |
| "loss": 1.0753, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.056892315104794026, |
| "grad_norm": 0.12904112040996552, |
| "learning_rate": 0.00019671784830997528, |
| "loss": 1.0749, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.05704649482052517, |
| "grad_norm": 0.1235031932592392, |
| "learning_rate": 0.0001967075432811212, |
| "loss": 1.0275, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.057200674536256324, |
| "grad_norm": 0.170023113489151, |
| "learning_rate": 0.0001966972382522671, |
| "loss": 1.1295, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.057354854251987476, |
| "grad_norm": 0.15533532202243805, |
| "learning_rate": 0.00019668693322341302, |
| "loss": 1.0629, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.05750903396771862, |
| "grad_norm": 0.1602126806974411, |
| "learning_rate": 0.00019667662819455897, |
| "loss": 1.1538, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.057663213683449774, |
| "grad_norm": 0.16433580219745636, |
| "learning_rate": 0.00019666632316570488, |
| "loss": 1.1322, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.05781739339918092, |
| "grad_norm": 0.13925233483314514, |
| "learning_rate": 0.0001966560181368508, |
| "loss": 1.083, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.05797157311491207, |
| "grad_norm": 0.12234565615653992, |
| "learning_rate": 0.0001966457131079967, |
| "loss": 1.0113, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.05812575283064322, |
| "grad_norm": 0.1425125002861023, |
| "learning_rate": 0.00019663540807914262, |
| "loss": 1.0762, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.05827993254637437, |
| "grad_norm": 0.14309099316596985, |
| "learning_rate": 0.00019662510305028854, |
| "loss": 1.0633, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.058434112262105514, |
| "grad_norm": 0.1381814330816269, |
| "learning_rate": 0.00019661479802143448, |
| "loss": 1.142, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.058588291977836666, |
| "grad_norm": 0.15551595389842987, |
| "learning_rate": 0.0001966044929925804, |
| "loss": 1.026, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.05874247169356781, |
| "grad_norm": 0.14606410264968872, |
| "learning_rate": 0.0001965941879637263, |
| "loss": 1.1265, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.058896651409298964, |
| "grad_norm": 0.13017289340496063, |
| "learning_rate": 0.00019658388293487223, |
| "loss": 1.1051, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.059050831125030116, |
| "grad_norm": 0.1500990092754364, |
| "learning_rate": 0.00019657357790601814, |
| "loss": 1.0948, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.05920501084076126, |
| "grad_norm": 0.14307473599910736, |
| "learning_rate": 0.00019656327287716408, |
| "loss": 1.0667, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.059359190556492414, |
| "grad_norm": 0.13513712584972382, |
| "learning_rate": 0.00019655296784831, |
| "loss": 1.0488, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.05951337027222356, |
| "grad_norm": 0.13991938531398773, |
| "learning_rate": 0.0001965426628194559, |
| "loss": 1.0888, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.05966754998795471, |
| "grad_norm": 0.15015999972820282, |
| "learning_rate": 0.00019653235779060183, |
| "loss": 1.0774, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.05982172970368586, |
| "grad_norm": 0.16419099271297455, |
| "learning_rate": 0.00019652205276174774, |
| "loss": 1.0661, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.05997590941941701, |
| "grad_norm": 0.12072901427745819, |
| "learning_rate": 0.00019651174773289366, |
| "loss": 1.0645, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.060130089135148154, |
| "grad_norm": 0.13410696387290955, |
| "learning_rate": 0.00019650144270403957, |
| "loss": 1.0677, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.060284268850879306, |
| "grad_norm": 0.13373896479606628, |
| "learning_rate": 0.0001964911376751855, |
| "loss": 1.0055, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.06043844856661046, |
| "grad_norm": 0.13043928146362305, |
| "learning_rate": 0.0001964808326463314, |
| "loss": 1.0579, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.060592628282341604, |
| "grad_norm": 0.13334155082702637, |
| "learning_rate": 0.00019647052761747732, |
| "loss": 1.0781, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.060746807998072756, |
| "grad_norm": 0.14660002291202545, |
| "learning_rate": 0.00019646022258862326, |
| "loss": 1.1244, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.0609009877138039, |
| "grad_norm": 0.1240791380405426, |
| "learning_rate": 0.00019644991755976917, |
| "loss": 1.0353, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.061055167429535054, |
| "grad_norm": 0.12248943001031876, |
| "learning_rate": 0.0001964396125309151, |
| "loss": 1.1292, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.0612093471452662, |
| "grad_norm": 0.1340823471546173, |
| "learning_rate": 0.000196429307502061, |
| "loss": 1.0764, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.06136352686099735, |
| "grad_norm": 0.1297413557767868, |
| "learning_rate": 0.00019641900247320692, |
| "loss": 1.0998, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.0615177065767285, |
| "grad_norm": 0.13512568175792694, |
| "learning_rate": 0.00019640869744435286, |
| "loss": 1.0349, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.06167188629245965, |
| "grad_norm": 0.13964438438415527, |
| "learning_rate": 0.00019639839241549878, |
| "loss": 1.0543, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.06167188629245965, |
| "eval_loss": 1.0952669382095337, |
| "eval_runtime": 185.8383, |
| "eval_samples_per_second": 91.171, |
| "eval_steps_per_second": 1.426, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.061826066008190794, |
| "grad_norm": 0.1318446695804596, |
| "learning_rate": 0.0001963880873866447, |
| "loss": 1.1469, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.061980245723921946, |
| "grad_norm": 0.13778544962406158, |
| "learning_rate": 0.0001963777823577906, |
| "loss": 1.0361, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.0621344254396531, |
| "grad_norm": 0.14804169535636902, |
| "learning_rate": 0.00019636747732893652, |
| "loss": 1.0537, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.062288605155384244, |
| "grad_norm": 0.1363479495048523, |
| "learning_rate": 0.00019635717230008246, |
| "loss": 1.0819, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.062442784871115396, |
| "grad_norm": 0.12277363240718842, |
| "learning_rate": 0.00019634686727122838, |
| "loss": 1.0629, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.06259696458684655, |
| "grad_norm": 0.13027344644069672, |
| "learning_rate": 0.0001963365622423743, |
| "loss": 1.0544, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.0627511443025777, |
| "grad_norm": 0.1274079531431198, |
| "learning_rate": 0.0001963262572135202, |
| "loss": 1.0685, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.06290532401830884, |
| "grad_norm": 0.1349189281463623, |
| "learning_rate": 0.00019631595218466612, |
| "loss": 1.0289, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.06305950373403998, |
| "grad_norm": 0.1265273541212082, |
| "learning_rate": 0.00019630564715581206, |
| "loss": 1.0765, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.06321368344977114, |
| "grad_norm": 0.1393941193819046, |
| "learning_rate": 0.00019629534212695798, |
| "loss": 1.0918, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.06336786316550229, |
| "grad_norm": 0.12475106865167618, |
| "learning_rate": 0.0001962850370981039, |
| "loss": 1.027, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.06352204288123343, |
| "grad_norm": 0.13844382762908936, |
| "learning_rate": 0.0001962747320692498, |
| "loss": 1.1482, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.0636762225969646, |
| "grad_norm": 0.1444624364376068, |
| "learning_rate": 0.00019626442704039572, |
| "loss": 1.0659, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.06383040231269574, |
| "grad_norm": 0.13939915597438812, |
| "learning_rate": 0.00019625412201154164, |
| "loss": 1.0392, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.06398458202842688, |
| "grad_norm": 0.12919913232326508, |
| "learning_rate": 0.00019624381698268755, |
| "loss": 1.0566, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.06413876174415803, |
| "grad_norm": 0.1297498196363449, |
| "learning_rate": 0.00019623351195383347, |
| "loss": 1.058, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.06429294145988919, |
| "grad_norm": 0.16311457753181458, |
| "learning_rate": 0.00019622320692497938, |
| "loss": 1.1175, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.06444712117562033, |
| "grad_norm": 0.14434239268302917, |
| "learning_rate": 0.0001962129018961253, |
| "loss": 1.0966, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.06460130089135148, |
| "grad_norm": 0.13500697910785675, |
| "learning_rate": 0.00019620259686727121, |
| "loss": 1.138, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.06475548060708262, |
| "grad_norm": 0.13175781071186066, |
| "learning_rate": 0.00019619229183841716, |
| "loss": 1.0744, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.06490966032281378, |
| "grad_norm": 0.142098531126976, |
| "learning_rate": 0.00019618198680956307, |
| "loss": 1.0686, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.06506384003854493, |
| "grad_norm": 0.16844119131565094, |
| "learning_rate": 0.00019617168178070899, |
| "loss": 1.0992, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.06521801975427607, |
| "grad_norm": 0.13562923669815063, |
| "learning_rate": 0.0001961613767518549, |
| "loss": 1.0749, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.06537219947000723, |
| "grad_norm": 0.14538466930389404, |
| "learning_rate": 0.00019615107172300082, |
| "loss": 1.123, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.06552637918573838, |
| "grad_norm": 0.13058879971504211, |
| "learning_rate": 0.00019614076669414676, |
| "loss": 1.0835, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.06568055890146952, |
| "grad_norm": 0.1567140519618988, |
| "learning_rate": 0.00019613046166529267, |
| "loss": 1.1157, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.06583473861720067, |
| "grad_norm": 0.12576104700565338, |
| "learning_rate": 0.0001961201566364386, |
| "loss": 1.0143, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.06598891833293183, |
| "grad_norm": 0.13823091983795166, |
| "learning_rate": 0.0001961098516075845, |
| "loss": 1.0797, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.06614309804866297, |
| "grad_norm": 0.12293639779090881, |
| "learning_rate": 0.00019609954657873042, |
| "loss": 1.0808, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.06629727776439412, |
| "grad_norm": 0.13951502740383148, |
| "learning_rate": 0.00019608924154987636, |
| "loss": 1.076, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.06645145748012526, |
| "grad_norm": 0.13900773227214813, |
| "learning_rate": 0.00019607893652102227, |
| "loss": 1.0846, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.06660563719585642, |
| "grad_norm": 0.14335249364376068, |
| "learning_rate": 0.0001960686314921682, |
| "loss": 1.0639, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.06675981691158757, |
| "grad_norm": 0.1712643951177597, |
| "learning_rate": 0.0001960583264633141, |
| "loss": 1.1411, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.06691399662731871, |
| "grad_norm": 0.12118082493543625, |
| "learning_rate": 0.00019604802143446002, |
| "loss": 1.0807, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.06706817634304987, |
| "grad_norm": 0.141808420419693, |
| "learning_rate": 0.00019603771640560596, |
| "loss": 1.0641, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.06722235605878102, |
| "grad_norm": 0.14798308908939362, |
| "learning_rate": 0.00019602741137675188, |
| "loss": 1.073, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.06737653577451216, |
| "grad_norm": 0.13768306374549866, |
| "learning_rate": 0.0001960171063478978, |
| "loss": 1.0735, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.06753071549024331, |
| "grad_norm": 0.12452355027198792, |
| "learning_rate": 0.0001960068013190437, |
| "loss": 1.0509, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.06768489520597447, |
| "grad_norm": 0.1402217000722885, |
| "learning_rate": 0.00019599649629018962, |
| "loss": 1.1157, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.06783907492170561, |
| "grad_norm": 0.12509870529174805, |
| "learning_rate": 0.00019598619126133556, |
| "loss": 1.0516, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.06799325463743676, |
| "grad_norm": 0.1574297547340393, |
| "learning_rate": 0.00019597588623248148, |
| "loss": 1.0823, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.0681474343531679, |
| "grad_norm": 0.14185413718223572, |
| "learning_rate": 0.0001959655812036274, |
| "loss": 1.0444, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.06830161406889906, |
| "grad_norm": 0.1380462348461151, |
| "learning_rate": 0.0001959552761747733, |
| "loss": 1.1066, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.06845579378463021, |
| "grad_norm": 0.12986746430397034, |
| "learning_rate": 0.00019594497114591922, |
| "loss": 1.1006, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.06860997350036135, |
| "grad_norm": 0.13894346356391907, |
| "learning_rate": 0.00019593466611706514, |
| "loss": 1.0569, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.06876415321609251, |
| "grad_norm": 0.12822435796260834, |
| "learning_rate": 0.00019592436108821105, |
| "loss": 1.0696, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.06891833293182366, |
| "grad_norm": 0.1369408816099167, |
| "learning_rate": 0.00019591405605935697, |
| "loss": 1.0691, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.0690725126475548, |
| "grad_norm": 0.13459660112857819, |
| "learning_rate": 0.00019590375103050288, |
| "loss": 1.0801, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.06922669236328595, |
| "grad_norm": 0.1299123764038086, |
| "learning_rate": 0.0001958934460016488, |
| "loss": 1.0885, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.06938087207901711, |
| "grad_norm": 0.12562230229377747, |
| "learning_rate": 0.00019588314097279474, |
| "loss": 1.183, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.06938087207901711, |
| "eval_loss": 1.0944268703460693, |
| "eval_runtime": 185.3723, |
| "eval_samples_per_second": 91.4, |
| "eval_steps_per_second": 1.43, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.06953505179474825, |
| "grad_norm": 0.13996927440166473, |
| "learning_rate": 0.00019587283594394065, |
| "loss": 1.0356, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.0696892315104794, |
| "grad_norm": 0.128004252910614, |
| "learning_rate": 0.00019586253091508657, |
| "loss": 1.0343, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.06984341122621056, |
| "grad_norm": 0.15650418400764465, |
| "learning_rate": 0.00019585222588623248, |
| "loss": 1.1138, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.0699975909419417, |
| "grad_norm": 0.5840476751327515, |
| "learning_rate": 0.0001958419208573784, |
| "loss": 1.1785, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.07015177065767285, |
| "grad_norm": 0.15330374240875244, |
| "learning_rate": 0.00019583161582852434, |
| "loss": 1.0243, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.070305950373404, |
| "grad_norm": 0.1603543907403946, |
| "learning_rate": 0.00019582131079967026, |
| "loss": 1.1228, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.07046013008913515, |
| "grad_norm": 0.14209845662117004, |
| "learning_rate": 0.00019581100577081617, |
| "loss": 1.0939, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.0706143098048663, |
| "grad_norm": 0.16117019951343536, |
| "learning_rate": 0.00019580070074196209, |
| "loss": 1.1447, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.07076848952059744, |
| "grad_norm": 0.14068694412708282, |
| "learning_rate": 0.000195790395713108, |
| "loss": 1.0642, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.07092266923632859, |
| "grad_norm": 0.15248316526412964, |
| "learning_rate": 0.00019578009068425394, |
| "loss": 1.0162, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.07107684895205975, |
| "grad_norm": 0.22734233736991882, |
| "learning_rate": 0.00019576978565539986, |
| "loss": 1.1123, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.0712310286677909, |
| "grad_norm": 0.1393287032842636, |
| "learning_rate": 0.00019575948062654577, |
| "loss": 1.0862, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.07138520838352204, |
| "grad_norm": 0.12911191582679749, |
| "learning_rate": 0.0001957491755976917, |
| "loss": 1.0651, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.0715393880992532, |
| "grad_norm": 0.12298440933227539, |
| "learning_rate": 0.0001957388705688376, |
| "loss": 1.1227, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.07169356781498434, |
| "grad_norm": 0.14941005408763885, |
| "learning_rate": 0.00019572856553998352, |
| "loss": 1.0989, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.07184774753071549, |
| "grad_norm": 0.1411515325307846, |
| "learning_rate": 0.00019571826051112946, |
| "loss": 1.0816, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.07200192724644663, |
| "grad_norm": 0.11999720335006714, |
| "learning_rate": 0.00019570795548227537, |
| "loss": 1.0306, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.0721561069621778, |
| "grad_norm": 0.1500861495733261, |
| "learning_rate": 0.0001956976504534213, |
| "loss": 1.0678, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.07231028667790894, |
| "grad_norm": 0.12102475017309189, |
| "learning_rate": 0.0001956873454245672, |
| "loss": 1.0534, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.07246446639364008, |
| "grad_norm": 0.11554603278636932, |
| "learning_rate": 0.00019567704039571312, |
| "loss": 1.0535, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.07261864610937123, |
| "grad_norm": 0.12290264666080475, |
| "learning_rate": 0.00019566673536685903, |
| "loss": 1.0738, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.07277282582510239, |
| "grad_norm": 0.17740991711616516, |
| "learning_rate": 0.00019565643033800495, |
| "loss": 1.0811, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.07292700554083353, |
| "grad_norm": 0.14767777919769287, |
| "learning_rate": 0.00019564612530915086, |
| "loss": 1.105, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.07308118525656468, |
| "grad_norm": 0.13773177564144135, |
| "learning_rate": 0.00019563582028029678, |
| "loss": 1.0983, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.07323536497229584, |
| "grad_norm": 0.13891370594501495, |
| "learning_rate": 0.0001956255152514427, |
| "loss": 1.1349, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.07338954468802698, |
| "grad_norm": 0.14717017114162445, |
| "learning_rate": 0.00019561521022258863, |
| "loss": 1.134, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.07354372440375813, |
| "grad_norm": 0.15095743536949158, |
| "learning_rate": 0.00019560490519373455, |
| "loss": 1.063, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.07369790411948927, |
| "grad_norm": 0.12851206958293915, |
| "learning_rate": 0.00019559460016488046, |
| "loss": 1.1005, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.07385208383522043, |
| "grad_norm": 0.13364006578922272, |
| "learning_rate": 0.00019558429513602638, |
| "loss": 1.0429, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.07400626355095158, |
| "grad_norm": 0.1326039433479309, |
| "learning_rate": 0.0001955739901071723, |
| "loss": 1.1586, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.07416044326668272, |
| "grad_norm": 0.13149486482143402, |
| "learning_rate": 0.00019556368507831824, |
| "loss": 1.109, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.07431462298241387, |
| "grad_norm": 0.1189669519662857, |
| "learning_rate": 0.00019555338004946415, |
| "loss": 1.0462, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.07446880269814503, |
| "grad_norm": 0.14341482520103455, |
| "learning_rate": 0.00019554307502061007, |
| "loss": 1.0623, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.07462298241387617, |
| "grad_norm": 0.14133721590042114, |
| "learning_rate": 0.00019553276999175598, |
| "loss": 1.0945, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.07477716212960732, |
| "grad_norm": 0.1351941078901291, |
| "learning_rate": 0.0001955224649629019, |
| "loss": 1.0327, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.07493134184533848, |
| "grad_norm": 0.12836019694805145, |
| "learning_rate": 0.00019551215993404784, |
| "loss": 1.069, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.07508552156106962, |
| "grad_norm": 0.13199055194854736, |
| "learning_rate": 0.00019550185490519375, |
| "loss": 1.0323, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.07523970127680077, |
| "grad_norm": 0.14991353452205658, |
| "learning_rate": 0.00019549154987633967, |
| "loss": 1.0625, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.07539388099253191, |
| "grad_norm": 0.13832435011863708, |
| "learning_rate": 0.00019548124484748558, |
| "loss": 1.1031, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.07554806070826307, |
| "grad_norm": 0.12351599335670471, |
| "learning_rate": 0.0001954709398186315, |
| "loss": 1.0286, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.07570224042399422, |
| "grad_norm": 0.12360050529241562, |
| "learning_rate": 0.00019546063478977744, |
| "loss": 1.0652, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.07585642013972536, |
| "grad_norm": 0.13384872674942017, |
| "learning_rate": 0.00019545032976092335, |
| "loss": 1.1125, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.07601059985545652, |
| "grad_norm": 0.13200527429580688, |
| "learning_rate": 0.00019544002473206927, |
| "loss": 1.0727, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.07616477957118767, |
| "grad_norm": 0.143647700548172, |
| "learning_rate": 0.00019542971970321518, |
| "loss": 1.1207, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.07631895928691881, |
| "grad_norm": 0.13605177402496338, |
| "learning_rate": 0.0001954194146743611, |
| "loss": 1.0225, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.07647313900264996, |
| "grad_norm": 0.12646125257015228, |
| "learning_rate": 0.00019540910964550701, |
| "loss": 1.11, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.07662731871838112, |
| "grad_norm": 0.132467120885849, |
| "learning_rate": 0.00019539880461665293, |
| "loss": 1.1092, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.07678149843411226, |
| "grad_norm": 0.12461701035499573, |
| "learning_rate": 0.00019538849958779884, |
| "loss": 1.0854, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.07693567814984341, |
| "grad_norm": 0.13430501520633698, |
| "learning_rate": 0.00019537819455894476, |
| "loss": 1.2, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.07708985786557455, |
| "grad_norm": 0.12623916566371918, |
| "learning_rate": 0.00019536788953009067, |
| "loss": 1.0522, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.07708985786557455, |
| "eval_loss": 1.0930616855621338, |
| "eval_runtime": 185.4001, |
| "eval_samples_per_second": 91.386, |
| "eval_steps_per_second": 1.429, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.07724403758130571, |
| "grad_norm": 0.11760087311267853, |
| "learning_rate": 0.00019535758450123662, |
| "loss": 1.1566, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.07739821729703686, |
| "grad_norm": 0.145633727312088, |
| "learning_rate": 0.00019534727947238253, |
| "loss": 1.094, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.077552397012768, |
| "grad_norm": 0.1311633288860321, |
| "learning_rate": 0.00019533697444352845, |
| "loss": 1.0792, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.07770657672849916, |
| "grad_norm": 0.12563548982143402, |
| "learning_rate": 0.00019532666941467436, |
| "loss": 1.0601, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.07786075644423031, |
| "grad_norm": 0.14429886639118195, |
| "learning_rate": 0.00019531636438582028, |
| "loss": 1.0926, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.07801493615996145, |
| "grad_norm": 0.13131891191005707, |
| "learning_rate": 0.0001953060593569662, |
| "loss": 1.1012, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.0781691158756926, |
| "grad_norm": 0.14185300469398499, |
| "learning_rate": 0.00019529575432811213, |
| "loss": 1.1113, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.07832329559142376, |
| "grad_norm": 0.14298418164253235, |
| "learning_rate": 0.00019528544929925805, |
| "loss": 1.0909, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.0784774753071549, |
| "grad_norm": 0.1339821219444275, |
| "learning_rate": 0.00019527514427040396, |
| "loss": 1.0994, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.07863165502288605, |
| "grad_norm": 0.1252928525209427, |
| "learning_rate": 0.00019526483924154988, |
| "loss": 1.0316, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.0787858347386172, |
| "grad_norm": 0.1277703046798706, |
| "learning_rate": 0.0001952545342126958, |
| "loss": 1.1067, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.07894001445434835, |
| "grad_norm": 0.12644124031066895, |
| "learning_rate": 0.00019524422918384173, |
| "loss": 1.0176, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.0790941941700795, |
| "grad_norm": 0.13443627953529358, |
| "learning_rate": 0.00019523392415498765, |
| "loss": 1.0754, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.07924837388581064, |
| "grad_norm": 0.1895609050989151, |
| "learning_rate": 0.00019522361912613356, |
| "loss": 1.0551, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.0794025536015418, |
| "grad_norm": 0.1372397392988205, |
| "learning_rate": 0.00019521331409727948, |
| "loss": 1.0442, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.07955673331727295, |
| "grad_norm": 0.14173942804336548, |
| "learning_rate": 0.0001952030090684254, |
| "loss": 1.0692, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.0797109130330041, |
| "grad_norm": 0.12321804463863373, |
| "learning_rate": 0.00019519270403957134, |
| "loss": 1.0276, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.07986509274873524, |
| "grad_norm": 0.12327130138874054, |
| "learning_rate": 0.00019518239901071725, |
| "loss": 1.0376, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.0800192724644664, |
| "grad_norm": 0.12301841378211975, |
| "learning_rate": 0.00019517209398186317, |
| "loss": 1.0887, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.08017345218019754, |
| "grad_norm": 0.1429559886455536, |
| "learning_rate": 0.00019516178895300908, |
| "loss": 1.0321, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.08032763189592869, |
| "grad_norm": 0.13955366611480713, |
| "learning_rate": 0.000195151483924155, |
| "loss": 1.1081, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.08048181161165983, |
| "grad_norm": 0.13553303480148315, |
| "learning_rate": 0.00019514117889530094, |
| "loss": 1.0252, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.080635991327391, |
| "grad_norm": 0.14100225269794464, |
| "learning_rate": 0.00019513087386644685, |
| "loss": 1.1071, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.08079017104312214, |
| "grad_norm": 0.14522643387317657, |
| "learning_rate": 0.00019512056883759277, |
| "loss": 1.0653, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.08094435075885328, |
| "grad_norm": 0.14540371298789978, |
| "learning_rate": 0.00019511026380873868, |
| "loss": 1.01, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.08109853047458444, |
| "grad_norm": 0.1459018737077713, |
| "learning_rate": 0.0001950999587798846, |
| "loss": 1.1147, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.08125271019031559, |
| "grad_norm": 0.12590867280960083, |
| "learning_rate": 0.0001950896537510305, |
| "loss": 1.0685, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.08140688990604673, |
| "grad_norm": 0.11943504959344864, |
| "learning_rate": 0.00019507934872217643, |
| "loss": 1.0854, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.08156106962177788, |
| "grad_norm": 0.12039398401975632, |
| "learning_rate": 0.00019506904369332234, |
| "loss": 1.1397, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.08171524933750904, |
| "grad_norm": 0.1411554217338562, |
| "learning_rate": 0.00019505873866446826, |
| "loss": 1.1271, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.08186942905324018, |
| "grad_norm": 0.1402871012687683, |
| "learning_rate": 0.00019504843363561417, |
| "loss": 1.0425, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.08202360876897133, |
| "grad_norm": 0.13545840978622437, |
| "learning_rate": 0.00019503812860676011, |
| "loss": 1.0571, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.08217778848470249, |
| "grad_norm": 0.12789209187030792, |
| "learning_rate": 0.00019502782357790603, |
| "loss": 1.0596, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.08233196820043363, |
| "grad_norm": 0.13018928468227386, |
| "learning_rate": 0.00019501751854905194, |
| "loss": 1.1188, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.08248614791616478, |
| "grad_norm": 0.12482234835624695, |
| "learning_rate": 0.00019500721352019786, |
| "loss": 1.0831, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.08264032763189592, |
| "grad_norm": 0.11897309869527817, |
| "learning_rate": 0.00019499690849134377, |
| "loss": 1.0658, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.08279450734762708, |
| "grad_norm": 0.12954497337341309, |
| "learning_rate": 0.00019498660346248972, |
| "loss": 1.0204, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.08294868706335823, |
| "grad_norm": 0.14220042526721954, |
| "learning_rate": 0.00019497629843363563, |
| "loss": 1.1101, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.08310286677908937, |
| "grad_norm": 0.1631559580564499, |
| "learning_rate": 0.00019496599340478155, |
| "loss": 1.1352, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.08325704649482052, |
| "grad_norm": 0.13439539074897766, |
| "learning_rate": 0.00019495568837592746, |
| "loss": 1.0108, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.08341122621055168, |
| "grad_norm": 0.12389718741178513, |
| "learning_rate": 0.00019494538334707338, |
| "loss": 1.0155, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.08356540592628282, |
| "grad_norm": 0.1241556853055954, |
| "learning_rate": 0.00019493507831821932, |
| "loss": 1.1428, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.08371958564201397, |
| "grad_norm": 0.13087880611419678, |
| "learning_rate": 0.00019492477328936523, |
| "loss": 1.0876, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.08387376535774513, |
| "grad_norm": 0.12431449443101883, |
| "learning_rate": 0.00019491446826051115, |
| "loss": 1.0758, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.08402794507347627, |
| "grad_norm": 0.13807635009288788, |
| "learning_rate": 0.00019490416323165706, |
| "loss": 1.0902, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.08418212478920742, |
| "grad_norm": 0.12751048803329468, |
| "learning_rate": 0.00019489385820280298, |
| "loss": 1.0732, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.08433630450493856, |
| "grad_norm": 0.15594707429409027, |
| "learning_rate": 0.00019488355317394892, |
| "loss": 1.1115, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.08449048422066972, |
| "grad_norm": 0.11647301912307739, |
| "learning_rate": 0.00019487324814509483, |
| "loss": 1.1592, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.08464466393640087, |
| "grad_norm": 0.13609850406646729, |
| "learning_rate": 0.00019486294311624075, |
| "loss": 1.1139, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.08479884365213201, |
| "grad_norm": 0.1234198659658432, |
| "learning_rate": 0.00019485263808738666, |
| "loss": 1.0682, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.08479884365213201, |
| "eval_loss": 1.0920624732971191, |
| "eval_runtime": 185.5142, |
| "eval_samples_per_second": 91.33, |
| "eval_steps_per_second": 1.428, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.08495302336786316, |
| "grad_norm": 0.1375039666891098, |
| "learning_rate": 0.00019484233305853258, |
| "loss": 1.0585, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.08510720308359432, |
| "grad_norm": 0.14471521973609924, |
| "learning_rate": 0.0001948320280296785, |
| "loss": 1.1115, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.08526138279932546, |
| "grad_norm": 0.12425632029771805, |
| "learning_rate": 0.0001948217230008244, |
| "loss": 1.0501, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.08541556251505661, |
| "grad_norm": 0.1161596029996872, |
| "learning_rate": 0.00019481141797197032, |
| "loss": 1.0182, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.08556974223078777, |
| "grad_norm": 0.11700072139501572, |
| "learning_rate": 0.00019480111294311624, |
| "loss": 1.0579, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.08572392194651891, |
| "grad_norm": 0.14330415427684784, |
| "learning_rate": 0.00019479080791426215, |
| "loss": 1.1211, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.08587810166225006, |
| "grad_norm": 0.14039026200771332, |
| "learning_rate": 0.00019478050288540807, |
| "loss": 1.0826, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.0860322813779812, |
| "grad_norm": 0.14031362533569336, |
| "learning_rate": 0.000194770197856554, |
| "loss": 1.0871, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.08618646109371236, |
| "grad_norm": 0.12351037561893463, |
| "learning_rate": 0.00019475989282769993, |
| "loss": 1.001, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.08634064080944351, |
| "grad_norm": 0.11667052656412125, |
| "learning_rate": 0.00019474958779884584, |
| "loss": 1.0421, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.08649482052517465, |
| "grad_norm": 0.1489124447107315, |
| "learning_rate": 0.00019473928276999175, |
| "loss": 1.1644, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.0866490002409058, |
| "grad_norm": 0.1338202804327011, |
| "learning_rate": 0.00019472897774113767, |
| "loss": 1.1239, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.08680317995663696, |
| "grad_norm": 0.13266493380069733, |
| "learning_rate": 0.0001947186727122836, |
| "loss": 1.0839, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.0869573596723681, |
| "grad_norm": 0.13726286590099335, |
| "learning_rate": 0.00019470836768342953, |
| "loss": 1.1325, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.08711153938809925, |
| "grad_norm": 0.14077100157737732, |
| "learning_rate": 0.00019469806265457544, |
| "loss": 1.0429, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.08726571910383041, |
| "grad_norm": 0.1362866312265396, |
| "learning_rate": 0.00019468775762572136, |
| "loss": 1.0715, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.08741989881956155, |
| "grad_norm": 0.12472223490476608, |
| "learning_rate": 0.00019467745259686727, |
| "loss": 1.0503, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.0875740785352927, |
| "grad_norm": 0.1350635141134262, |
| "learning_rate": 0.0001946671475680132, |
| "loss": 1.0498, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.08772825825102384, |
| "grad_norm": 0.1424301117658615, |
| "learning_rate": 0.00019465684253915913, |
| "loss": 1.1589, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.087882437966755, |
| "grad_norm": 0.12365067005157471, |
| "learning_rate": 0.00019464653751030504, |
| "loss": 1.1065, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.08803661768248615, |
| "grad_norm": 0.16497495770454407, |
| "learning_rate": 0.00019463623248145096, |
| "loss": 1.0189, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.0881907973982173, |
| "grad_norm": 0.1381298303604126, |
| "learning_rate": 0.00019462592745259687, |
| "loss": 1.0426, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.08834497711394845, |
| "grad_norm": 0.15007291734218597, |
| "learning_rate": 0.00019461562242374282, |
| "loss": 1.1108, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.0884991568296796, |
| "grad_norm": 0.19384606182575226, |
| "learning_rate": 0.00019460531739488873, |
| "loss": 1.0664, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.08865333654541074, |
| "grad_norm": 0.12032177299261093, |
| "learning_rate": 0.00019459501236603465, |
| "loss": 1.018, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.08880751626114189, |
| "grad_norm": 0.1197669506072998, |
| "learning_rate": 0.00019458470733718056, |
| "loss": 1.071, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.08896169597687305, |
| "grad_norm": 0.12108784914016724, |
| "learning_rate": 0.00019457440230832647, |
| "loss": 1.0499, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.0891158756926042, |
| "grad_norm": 0.1270270049571991, |
| "learning_rate": 0.0001945640972794724, |
| "loss": 1.1172, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.08927005540833534, |
| "grad_norm": 0.13599786162376404, |
| "learning_rate": 0.0001945537922506183, |
| "loss": 1.103, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.08942423512406648, |
| "grad_norm": 0.12051045894622803, |
| "learning_rate": 0.00019454348722176422, |
| "loss": 1.0905, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.08957841483979764, |
| "grad_norm": 0.12117696553468704, |
| "learning_rate": 0.00019453318219291013, |
| "loss": 1.0611, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.08973259455552879, |
| "grad_norm": 0.13710887730121613, |
| "learning_rate": 0.00019452287716405605, |
| "loss": 1.0242, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.08988677427125993, |
| "grad_norm": 0.1160813644528389, |
| "learning_rate": 0.000194512572135202, |
| "loss": 1.0863, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.09004095398699109, |
| "grad_norm": 0.1754099279642105, |
| "learning_rate": 0.0001945022671063479, |
| "loss": 1.0938, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.09019513370272224, |
| "grad_norm": 0.1331128627061844, |
| "learning_rate": 0.00019449196207749382, |
| "loss": 1.0692, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.09034931341845338, |
| "grad_norm": 0.13422611355781555, |
| "learning_rate": 0.00019448165704863974, |
| "loss": 1.0699, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.09050349313418453, |
| "grad_norm": 0.12999802827835083, |
| "learning_rate": 0.00019447135201978565, |
| "loss": 1.0957, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.09065767284991569, |
| "grad_norm": 0.13413815200328827, |
| "learning_rate": 0.0001944610469909316, |
| "loss": 1.0869, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.09081185256564683, |
| "grad_norm": 0.12901006639003754, |
| "learning_rate": 0.0001944507419620775, |
| "loss": 1.0442, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.09096603228137798, |
| "grad_norm": 0.11824194341897964, |
| "learning_rate": 0.00019444043693322342, |
| "loss": 1.0935, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.09112021199710912, |
| "grad_norm": 0.14895616471767426, |
| "learning_rate": 0.00019443013190436934, |
| "loss": 1.0624, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.09127439171284028, |
| "grad_norm": 0.13515722751617432, |
| "learning_rate": 0.00019441982687551525, |
| "loss": 1.0797, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.09142857142857143, |
| "grad_norm": 0.13411575555801392, |
| "learning_rate": 0.00019440952184666117, |
| "loss": 1.0637, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.09158275114430257, |
| "grad_norm": 0.12519463896751404, |
| "learning_rate": 0.0001943992168178071, |
| "loss": 1.0608, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.09173693086003373, |
| "grad_norm": 0.1267428696155548, |
| "learning_rate": 0.00019438891178895302, |
| "loss": 1.0182, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.09189111057576488, |
| "grad_norm": 0.13116560876369476, |
| "learning_rate": 0.00019437860676009894, |
| "loss": 1.1139, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.09204529029149602, |
| "grad_norm": 0.14659713208675385, |
| "learning_rate": 0.00019436830173124485, |
| "loss": 1.1275, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.09219947000722717, |
| "grad_norm": 0.12913885712623596, |
| "learning_rate": 0.00019435799670239077, |
| "loss": 1.0858, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.09235364972295833, |
| "grad_norm": 0.12855856120586395, |
| "learning_rate": 0.0001943476916735367, |
| "loss": 1.0811, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.09250782943868947, |
| "grad_norm": 0.1391747146844864, |
| "learning_rate": 0.00019433738664468263, |
| "loss": 1.0146, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.09250782943868947, |
| "eval_loss": 1.0912913084030151, |
| "eval_runtime": 185.3661, |
| "eval_samples_per_second": 91.403, |
| "eval_steps_per_second": 1.43, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.09266200915442062, |
| "grad_norm": 0.13186782598495483, |
| "learning_rate": 0.00019432708161582854, |
| "loss": 1.1017, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.09281618887015176, |
| "grad_norm": 0.12913943827152252, |
| "learning_rate": 0.00019431677658697446, |
| "loss": 1.1027, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.09297036858588292, |
| "grad_norm": 0.1349743753671646, |
| "learning_rate": 0.00019430647155812037, |
| "loss": 1.1023, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.09312454830161407, |
| "grad_norm": 0.12534667551517487, |
| "learning_rate": 0.00019429616652926629, |
| "loss": 1.0659, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.09327872801734521, |
| "grad_norm": 0.11720700562000275, |
| "learning_rate": 0.0001942858615004122, |
| "loss": 1.0532, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.09343290773307637, |
| "grad_norm": 0.1364222913980484, |
| "learning_rate": 0.00019427555647155812, |
| "loss": 1.0575, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.09358708744880752, |
| "grad_norm": 0.15532977879047394, |
| "learning_rate": 0.00019426525144270403, |
| "loss": 1.1145, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.09374126716453866, |
| "grad_norm": 0.1377478837966919, |
| "learning_rate": 0.00019425494641384995, |
| "loss": 1.0505, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.09389544688026981, |
| "grad_norm": 0.1273409128189087, |
| "learning_rate": 0.0001942446413849959, |
| "loss": 1.0873, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.09404962659600097, |
| "grad_norm": 0.11990435421466827, |
| "learning_rate": 0.0001942343363561418, |
| "loss": 1.0829, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.09420380631173211, |
| "grad_norm": 0.14191892743110657, |
| "learning_rate": 0.00019422403132728772, |
| "loss": 1.0992, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.09435798602746326, |
| "grad_norm": 0.14520397782325745, |
| "learning_rate": 0.00019421372629843363, |
| "loss": 1.0712, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.09451216574319442, |
| "grad_norm": 0.13780727982521057, |
| "learning_rate": 0.00019420342126957955, |
| "loss": 0.9943, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.09466634545892556, |
| "grad_norm": 0.13550738990306854, |
| "learning_rate": 0.0001941931162407255, |
| "loss": 1.1264, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.09482052517465671, |
| "grad_norm": 0.12125276774168015, |
| "learning_rate": 0.0001941828112118714, |
| "loss": 1.1207, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.09497470489038785, |
| "grad_norm": 0.14529301226139069, |
| "learning_rate": 0.00019417250618301732, |
| "loss": 1.144, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.09512888460611901, |
| "grad_norm": 0.15477551519870758, |
| "learning_rate": 0.00019416220115416323, |
| "loss": 1.0568, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.09528306432185016, |
| "grad_norm": 0.1299963742494583, |
| "learning_rate": 0.00019415189612530915, |
| "loss": 1.0235, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.0954372440375813, |
| "grad_norm": 0.1372281014919281, |
| "learning_rate": 0.0001941415910964551, |
| "loss": 1.0764, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.09559142375331245, |
| "grad_norm": 0.1247306764125824, |
| "learning_rate": 0.000194131286067601, |
| "loss": 1.1345, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.09574560346904361, |
| "grad_norm": 0.1330571472644806, |
| "learning_rate": 0.00019412098103874692, |
| "loss": 1.1596, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.09589978318477475, |
| "grad_norm": 0.15787385404109955, |
| "learning_rate": 0.00019411067600989284, |
| "loss": 1.1067, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.0960539629005059, |
| "grad_norm": 0.12646274268627167, |
| "learning_rate": 0.00019410037098103875, |
| "loss": 1.0769, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.09620814261623706, |
| "grad_norm": 0.16424262523651123, |
| "learning_rate": 0.0001940900659521847, |
| "loss": 1.0459, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.0963623223319682, |
| "grad_norm": 0.1401062309741974, |
| "learning_rate": 0.0001940797609233306, |
| "loss": 1.1308, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.09651650204769935, |
| "grad_norm": 0.13971561193466187, |
| "learning_rate": 0.00019406945589447652, |
| "loss": 1.1457, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.0966706817634305, |
| "grad_norm": 0.13544687628746033, |
| "learning_rate": 0.00019405915086562244, |
| "loss": 1.0532, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.09682486147916165, |
| "grad_norm": 0.13527531921863556, |
| "learning_rate": 0.00019404884583676835, |
| "loss": 1.0376, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.0969790411948928, |
| "grad_norm": 0.1731848120689392, |
| "learning_rate": 0.0001940385408079143, |
| "loss": 1.2252, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.09713322091062394, |
| "grad_norm": 0.13142083585262299, |
| "learning_rate": 0.0001940282357790602, |
| "loss": 1.0254, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.09728740062635509, |
| "grad_norm": 0.13390247523784637, |
| "learning_rate": 0.00019401793075020612, |
| "loss": 1.0448, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.09744158034208625, |
| "grad_norm": 0.15188650786876678, |
| "learning_rate": 0.00019400762572135204, |
| "loss": 1.1019, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.0975957600578174, |
| "grad_norm": 0.14055617153644562, |
| "learning_rate": 0.00019399732069249795, |
| "loss": 1.0835, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.09774993977354854, |
| "grad_norm": 0.12209255248308182, |
| "learning_rate": 0.00019398701566364387, |
| "loss": 1.0675, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.0979041194892797, |
| "grad_norm": 0.14639706909656525, |
| "learning_rate": 0.00019397671063478978, |
| "loss": 1.049, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.09805829920501084, |
| "grad_norm": 0.13672591745853424, |
| "learning_rate": 0.0001939664056059357, |
| "loss": 1.1057, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.09821247892074199, |
| "grad_norm": 0.1522635966539383, |
| "learning_rate": 0.00019395610057708161, |
| "loss": 1.14, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.09836665863647313, |
| "grad_norm": 0.13887491822242737, |
| "learning_rate": 0.00019394579554822753, |
| "loss": 1.069, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.09852083835220429, |
| "grad_norm": 0.13854965567588806, |
| "learning_rate": 0.00019393549051937344, |
| "loss": 1.0704, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.09867501806793544, |
| "grad_norm": 0.12839765846729279, |
| "learning_rate": 0.00019392518549051939, |
| "loss": 1.0512, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.09882919778366658, |
| "grad_norm": 0.1270405352115631, |
| "learning_rate": 0.0001939148804616653, |
| "loss": 1.0251, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.09898337749939773, |
| "grad_norm": 0.1269143521785736, |
| "learning_rate": 0.00019390457543281122, |
| "loss": 1.0433, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.09913755721512889, |
| "grad_norm": 0.14292192459106445, |
| "learning_rate": 0.00019389427040395713, |
| "loss": 1.1507, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.09929173693086003, |
| "grad_norm": 0.12512263655662537, |
| "learning_rate": 0.00019388396537510305, |
| "loss": 1.0918, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.09944591664659118, |
| "grad_norm": 0.11927679181098938, |
| "learning_rate": 0.000193873660346249, |
| "loss": 1.0924, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.09960009636232234, |
| "grad_norm": 0.13639990985393524, |
| "learning_rate": 0.0001938633553173949, |
| "loss": 1.1024, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.09975427607805348, |
| "grad_norm": 0.142363503575325, |
| "learning_rate": 0.00019385305028854082, |
| "loss": 1.021, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.09990845579378463, |
| "grad_norm": 0.1389359086751938, |
| "learning_rate": 0.00019384274525968673, |
| "loss": 1.0269, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.10006263550951577, |
| "grad_norm": 0.15595073997974396, |
| "learning_rate": 0.00019383244023083265, |
| "loss": 1.0913, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.10021681522524693, |
| "grad_norm": 0.1324295848608017, |
| "learning_rate": 0.0001938221352019786, |
| "loss": 1.1001, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.10021681522524693, |
| "eval_loss": 1.0909266471862793, |
| "eval_runtime": 185.4116, |
| "eval_samples_per_second": 91.38, |
| "eval_steps_per_second": 1.429, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.10037099494097808, |
| "grad_norm": 0.139576256275177, |
| "learning_rate": 0.0001938118301731245, |
| "loss": 1.1147, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.10052517465670922, |
| "grad_norm": 0.12854811549186707, |
| "learning_rate": 0.00019380152514427042, |
| "loss": 1.0973, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.10067935437244037, |
| "grad_norm": 0.1245393380522728, |
| "learning_rate": 0.00019379122011541633, |
| "loss": 1.0485, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.10083353408817153, |
| "grad_norm": 0.13261497020721436, |
| "learning_rate": 0.00019378091508656225, |
| "loss": 1.156, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.10098771380390267, |
| "grad_norm": 0.1255144327878952, |
| "learning_rate": 0.0001937706100577082, |
| "loss": 1.0852, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.10114189351963382, |
| "grad_norm": 0.1412706971168518, |
| "learning_rate": 0.0001937603050288541, |
| "loss": 1.0766, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.10129607323536498, |
| "grad_norm": 0.1281047761440277, |
| "learning_rate": 0.00019375000000000002, |
| "loss": 1.0824, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.10145025295109612, |
| "grad_norm": 0.13307350873947144, |
| "learning_rate": 0.00019373969497114594, |
| "loss": 1.0887, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.10160443266682727, |
| "grad_norm": 0.1287691742181778, |
| "learning_rate": 0.00019372938994229185, |
| "loss": 1.0705, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.10175861238255841, |
| "grad_norm": 0.1303441971540451, |
| "learning_rate": 0.00019371908491343777, |
| "loss": 1.1684, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.10191279209828957, |
| "grad_norm": 0.13304616510868073, |
| "learning_rate": 0.00019370877988458368, |
| "loss": 1.0944, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.10206697181402072, |
| "grad_norm": 0.13905592262744904, |
| "learning_rate": 0.0001936984748557296, |
| "loss": 1.0915, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.10222115152975186, |
| "grad_norm": 0.13225632905960083, |
| "learning_rate": 0.0001936881698268755, |
| "loss": 1.0418, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.10237533124548302, |
| "grad_norm": 0.1267402619123459, |
| "learning_rate": 0.00019367786479802142, |
| "loss": 1.0446, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.10252951096121417, |
| "grad_norm": 0.1439935863018036, |
| "learning_rate": 0.00019366755976916737, |
| "loss": 1.0582, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.10268369067694531, |
| "grad_norm": 0.1267223060131073, |
| "learning_rate": 0.00019365725474031328, |
| "loss": 1.0176, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.10283787039267646, |
| "grad_norm": 0.1298942118883133, |
| "learning_rate": 0.0001936469497114592, |
| "loss": 1.0552, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.10299205010840762, |
| "grad_norm": 0.13010933995246887, |
| "learning_rate": 0.0001936366446826051, |
| "loss": 1.0848, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.10314622982413876, |
| "grad_norm": 0.13728559017181396, |
| "learning_rate": 0.00019362633965375103, |
| "loss": 1.0779, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.10330040953986991, |
| "grad_norm": 0.13863548636436462, |
| "learning_rate": 0.00019361603462489697, |
| "loss": 1.0326, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.10345458925560105, |
| "grad_norm": 0.12995532155036926, |
| "learning_rate": 0.00019360572959604288, |
| "loss": 1.1427, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.10360876897133221, |
| "grad_norm": 0.13650789856910706, |
| "learning_rate": 0.0001935954245671888, |
| "loss": 1.0528, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.10376294868706336, |
| "grad_norm": 0.1336941123008728, |
| "learning_rate": 0.0001935851195383347, |
| "loss": 1.1155, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.1039171284027945, |
| "grad_norm": 0.13927003741264343, |
| "learning_rate": 0.00019357481450948063, |
| "loss": 1.0551, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.10407130811852566, |
| "grad_norm": 0.14504994451999664, |
| "learning_rate": 0.00019356450948062657, |
| "loss": 1.1014, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.10422548783425681, |
| "grad_norm": 0.15796230733394623, |
| "learning_rate": 0.00019355420445177248, |
| "loss": 1.2115, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.10437966754998795, |
| "grad_norm": 0.1317984163761139, |
| "learning_rate": 0.0001935438994229184, |
| "loss": 1.0933, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.1045338472657191, |
| "grad_norm": 0.13189563155174255, |
| "learning_rate": 0.00019353359439406431, |
| "loss": 1.0664, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.10468802698145026, |
| "grad_norm": 0.1323234885931015, |
| "learning_rate": 0.00019352328936521023, |
| "loss": 1.0824, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.1048422066971814, |
| "grad_norm": 0.13659097254276276, |
| "learning_rate": 0.00019351298433635614, |
| "loss": 1.0334, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.10499638641291255, |
| "grad_norm": 0.11882172524929047, |
| "learning_rate": 0.0001935026793075021, |
| "loss": 1.0401, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.1051505661286437, |
| "grad_norm": 0.13025067746639252, |
| "learning_rate": 0.000193492374278648, |
| "loss": 1.0838, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.10530474584437485, |
| "grad_norm": 0.1249939501285553, |
| "learning_rate": 0.00019348206924979392, |
| "loss": 1.0349, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.105458925560106, |
| "grad_norm": 0.12588031589984894, |
| "learning_rate": 0.00019347176422093983, |
| "loss": 1.079, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.10561310527583714, |
| "grad_norm": 0.12548890709877014, |
| "learning_rate": 0.00019346145919208575, |
| "loss": 1.0062, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.1057672849915683, |
| "grad_norm": 0.13328798115253448, |
| "learning_rate": 0.00019345115416323166, |
| "loss": 1.1154, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.10592146470729945, |
| "grad_norm": 0.1443903148174286, |
| "learning_rate": 0.00019344084913437758, |
| "loss": 1.097, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.1060756444230306, |
| "grad_norm": 0.12835648655891418, |
| "learning_rate": 0.0001934305441055235, |
| "loss": 1.0723, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.10622982413876174, |
| "grad_norm": 0.13068312406539917, |
| "learning_rate": 0.0001934202390766694, |
| "loss": 1.1128, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.1063840038544929, |
| "grad_norm": 0.13628961145877838, |
| "learning_rate": 0.00019340993404781532, |
| "loss": 1.1146, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.10653818357022404, |
| "grad_norm": 0.12263484299182892, |
| "learning_rate": 0.00019339962901896126, |
| "loss": 1.0947, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.10669236328595519, |
| "grad_norm": 0.12684424221515656, |
| "learning_rate": 0.00019338932399010718, |
| "loss": 1.059, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.10684654300168633, |
| "grad_norm": 0.1421595960855484, |
| "learning_rate": 0.0001933790189612531, |
| "loss": 1.0688, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.10700072271741749, |
| "grad_norm": 0.12416025251150131, |
| "learning_rate": 0.000193368713932399, |
| "loss": 1.0905, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.10715490243314864, |
| "grad_norm": 0.1284332126379013, |
| "learning_rate": 0.00019335840890354492, |
| "loss": 1.0612, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.10730908214887978, |
| "grad_norm": 0.1282491385936737, |
| "learning_rate": 0.00019334810387469086, |
| "loss": 1.0851, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.10746326186461094, |
| "grad_norm": 0.13221289217472076, |
| "learning_rate": 0.00019333779884583678, |
| "loss": 1.0446, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.10761744158034209, |
| "grad_norm": 0.12401736527681351, |
| "learning_rate": 0.0001933274938169827, |
| "loss": 1.0826, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.10777162129607323, |
| "grad_norm": 0.14316771924495697, |
| "learning_rate": 0.0001933171887881286, |
| "loss": 1.1136, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.10792580101180438, |
| "grad_norm": 0.17223364114761353, |
| "learning_rate": 0.00019330688375927452, |
| "loss": 1.0752, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.10792580101180438, |
| "eval_loss": 1.0899540185928345, |
| "eval_runtime": 185.3818, |
| "eval_samples_per_second": 91.395, |
| "eval_steps_per_second": 1.429, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.10807998072753554, |
| "grad_norm": 0.15027141571044922, |
| "learning_rate": 0.00019329657873042047, |
| "loss": 1.0371, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.10823416044326668, |
| "grad_norm": 0.19876505434513092, |
| "learning_rate": 0.00019328627370156638, |
| "loss": 1.0312, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.10838834015899783, |
| "grad_norm": 0.1422131210565567, |
| "learning_rate": 0.0001932759686727123, |
| "loss": 1.0597, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.10854251987472899, |
| "grad_norm": 0.13597753643989563, |
| "learning_rate": 0.0001932656636438582, |
| "loss": 1.0939, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.10869669959046013, |
| "grad_norm": 0.16808953881263733, |
| "learning_rate": 0.00019325535861500413, |
| "loss": 1.1221, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.10885087930619128, |
| "grad_norm": 0.14884881675243378, |
| "learning_rate": 0.00019324505358615007, |
| "loss": 1.1114, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.10900505902192242, |
| "grad_norm": 0.12680503726005554, |
| "learning_rate": 0.00019323474855729598, |
| "loss": 1.1032, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.10915923873765358, |
| "grad_norm": 0.13997766375541687, |
| "learning_rate": 0.0001932244435284419, |
| "loss": 1.0799, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.10931341845338473, |
| "grad_norm": 0.1343669593334198, |
| "learning_rate": 0.0001932141384995878, |
| "loss": 1.0778, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.10946759816911587, |
| "grad_norm": 0.12029851973056793, |
| "learning_rate": 0.00019320383347073373, |
| "loss": 1.1021, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.10962177788484702, |
| "grad_norm": 0.1322990357875824, |
| "learning_rate": 0.00019319352844187967, |
| "loss": 1.1061, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.10977595760057818, |
| "grad_norm": 0.13710594177246094, |
| "learning_rate": 0.00019318322341302558, |
| "loss": 1.0786, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.10993013731630932, |
| "grad_norm": 0.11956049501895905, |
| "learning_rate": 0.0001931729183841715, |
| "loss": 1.0711, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.11008431703204047, |
| "grad_norm": 0.139973446726799, |
| "learning_rate": 0.00019316261335531741, |
| "loss": 1.1162, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.11023849674777163, |
| "grad_norm": 0.1525941640138626, |
| "learning_rate": 0.00019315230832646333, |
| "loss": 1.0572, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.11039267646350277, |
| "grad_norm": 0.1349973976612091, |
| "learning_rate": 0.00019314200329760924, |
| "loss": 1.1048, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.11054685617923392, |
| "grad_norm": 0.1305711269378662, |
| "learning_rate": 0.00019313169826875516, |
| "loss": 1.0841, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.11070103589496506, |
| "grad_norm": 0.16756822168827057, |
| "learning_rate": 0.00019312139323990107, |
| "loss": 1.0736, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.11085521561069622, |
| "grad_norm": 0.13367486000061035, |
| "learning_rate": 0.000193111088211047, |
| "loss": 1.0774, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.11100939532642737, |
| "grad_norm": 0.12484605610370636, |
| "learning_rate": 0.0001931007831821929, |
| "loss": 1.1196, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.11116357504215851, |
| "grad_norm": 0.14064739644527435, |
| "learning_rate": 0.00019309047815333885, |
| "loss": 1.1101, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.11131775475788966, |
| "grad_norm": 0.1366916447877884, |
| "learning_rate": 0.00019308017312448476, |
| "loss": 1.111, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.11147193447362082, |
| "grad_norm": 0.11520934104919434, |
| "learning_rate": 0.00019306986809563068, |
| "loss": 1.065, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.11162611418935196, |
| "grad_norm": 0.15567731857299805, |
| "learning_rate": 0.0001930595630667766, |
| "loss": 1.1036, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.11178029390508311, |
| "grad_norm": 0.13628730177879333, |
| "learning_rate": 0.0001930492580379225, |
| "loss": 1.0717, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.11193447362081427, |
| "grad_norm": 0.1359964907169342, |
| "learning_rate": 0.00019303895300906842, |
| "loss": 1.0986, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.11208865333654541, |
| "grad_norm": 0.16372162103652954, |
| "learning_rate": 0.00019302864798021436, |
| "loss": 1.0306, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.11224283305227656, |
| "grad_norm": 0.1724134087562561, |
| "learning_rate": 0.00019301834295136028, |
| "loss": 1.0753, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.1123970127680077, |
| "grad_norm": 0.13646383583545685, |
| "learning_rate": 0.0001930080379225062, |
| "loss": 1.0975, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.11255119248373886, |
| "grad_norm": 0.1522134691476822, |
| "learning_rate": 0.0001929977328936521, |
| "loss": 1.1031, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.11270537219947001, |
| "grad_norm": 0.13656160235404968, |
| "learning_rate": 0.00019298742786479802, |
| "loss": 1.0602, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.11285955191520115, |
| "grad_norm": 0.14140130579471588, |
| "learning_rate": 0.00019297712283594396, |
| "loss": 1.1289, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.1130137316309323, |
| "grad_norm": 0.1383032351732254, |
| "learning_rate": 0.00019296681780708988, |
| "loss": 1.0797, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.11316791134666346, |
| "grad_norm": 0.15723556280136108, |
| "learning_rate": 0.0001929565127782358, |
| "loss": 1.1156, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.1133220910623946, |
| "grad_norm": 0.13462230563163757, |
| "learning_rate": 0.0001929462077493817, |
| "loss": 1.0953, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.11347627077812575, |
| "grad_norm": 0.14101319015026093, |
| "learning_rate": 0.00019293590272052762, |
| "loss": 1.1152, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.11363045049385691, |
| "grad_norm": 0.13705132901668549, |
| "learning_rate": 0.00019292559769167357, |
| "loss": 1.0886, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.11378463020958805, |
| "grad_norm": 0.1206672340631485, |
| "learning_rate": 0.00019291529266281948, |
| "loss": 1.0995, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.1139388099253192, |
| "grad_norm": 0.13666383922100067, |
| "learning_rate": 0.0001929049876339654, |
| "loss": 1.058, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.11409298964105034, |
| "grad_norm": 0.1265423446893692, |
| "learning_rate": 0.0001928946826051113, |
| "loss": 1.0676, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.1142471693567815, |
| "grad_norm": 0.1528097242116928, |
| "learning_rate": 0.00019288437757625723, |
| "loss": 1.0675, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.11440134907251265, |
| "grad_norm": 0.16541676223278046, |
| "learning_rate": 0.00019287407254740314, |
| "loss": 1.1539, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.1145555287882438, |
| "grad_norm": 0.20383091270923615, |
| "learning_rate": 0.00019286376751854906, |
| "loss": 1.0472, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.11470970850397495, |
| "grad_norm": 0.13806484639644623, |
| "learning_rate": 0.00019285346248969497, |
| "loss": 1.0408, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.1148638882197061, |
| "grad_norm": 0.1251746118068695, |
| "learning_rate": 0.00019284315746084089, |
| "loss": 1.1207, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.11501806793543724, |
| "grad_norm": 0.13218504190444946, |
| "learning_rate": 0.0001928328524319868, |
| "loss": 1.1131, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.11517224765116839, |
| "grad_norm": 0.21616914868354797, |
| "learning_rate": 0.00019282254740313274, |
| "loss": 1.1103, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.11532642736689955, |
| "grad_norm": 0.1437305361032486, |
| "learning_rate": 0.00019281224237427866, |
| "loss": 1.1243, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.11548060708263069, |
| "grad_norm": 0.13094168901443481, |
| "learning_rate": 0.00019280193734542457, |
| "loss": 1.1012, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.11563478679836184, |
| "grad_norm": 0.12384334206581116, |
| "learning_rate": 0.0001927916323165705, |
| "loss": 1.05, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.11563478679836184, |
| "eval_loss": 1.0905406475067139, |
| "eval_runtime": 185.4473, |
| "eval_samples_per_second": 91.363, |
| "eval_steps_per_second": 1.429, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.11578896651409298, |
| "grad_norm": 0.12807106971740723, |
| "learning_rate": 0.0001927813272877164, |
| "loss": 1.0754, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.11594314622982414, |
| "grad_norm": 0.12517131865024567, |
| "learning_rate": 0.00019277102225886234, |
| "loss": 1.1017, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.11609732594555529, |
| "grad_norm": 0.1704496592283249, |
| "learning_rate": 0.00019276071723000826, |
| "loss": 1.098, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.11625150566128643, |
| "grad_norm": 0.12152231484651566, |
| "learning_rate": 0.00019275041220115417, |
| "loss": 1.0738, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.11640568537701759, |
| "grad_norm": 0.12952156364917755, |
| "learning_rate": 0.0001927401071723001, |
| "loss": 1.0479, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.11655986509274874, |
| "grad_norm": 0.1499640941619873, |
| "learning_rate": 0.000192729802143446, |
| "loss": 1.1046, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.11671404480847988, |
| "grad_norm": 0.1331593543291092, |
| "learning_rate": 0.00019271949711459195, |
| "loss": 1.1219, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.11686822452421103, |
| "grad_norm": 0.1368558406829834, |
| "learning_rate": 0.00019270919208573786, |
| "loss": 1.1357, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.11702240423994219, |
| "grad_norm": 0.12278290838003159, |
| "learning_rate": 0.00019269888705688378, |
| "loss": 1.1079, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.11717658395567333, |
| "grad_norm": 0.11737775802612305, |
| "learning_rate": 0.0001926885820280297, |
| "loss": 1.1224, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.11733076367140448, |
| "grad_norm": 0.13017341494560242, |
| "learning_rate": 0.0001926782769991756, |
| "loss": 1.0648, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.11748494338713562, |
| "grad_norm": 0.11939583718776703, |
| "learning_rate": 0.00019266797197032155, |
| "loss": 1.0899, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.11763912310286678, |
| "grad_norm": 0.12446755915880203, |
| "learning_rate": 0.00019265766694146746, |
| "loss": 1.0626, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.11779330281859793, |
| "grad_norm": 0.13369430601596832, |
| "learning_rate": 0.00019264736191261338, |
| "loss": 1.0526, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.11794748253432907, |
| "grad_norm": 0.13470736145973206, |
| "learning_rate": 0.0001926370568837593, |
| "loss": 1.0946, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.11810166225006023, |
| "grad_norm": 0.14193174242973328, |
| "learning_rate": 0.0001926267518549052, |
| "loss": 1.1089, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.11825584196579138, |
| "grad_norm": 0.14893026649951935, |
| "learning_rate": 0.00019261644682605112, |
| "loss": 1.0606, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.11841002168152252, |
| "grad_norm": 0.20594976842403412, |
| "learning_rate": 0.00019260614179719704, |
| "loss": 1.0375, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.11856420139725367, |
| "grad_norm": 0.15287873148918152, |
| "learning_rate": 0.00019259583676834295, |
| "loss": 1.1414, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.11871838111298483, |
| "grad_norm": 0.1275177299976349, |
| "learning_rate": 0.00019258553173948887, |
| "loss": 1.1084, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.11887256082871597, |
| "grad_norm": 0.20036157965660095, |
| "learning_rate": 0.00019257522671063478, |
| "loss": 1.1261, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.11902674054444712, |
| "grad_norm": 0.14492087066173553, |
| "learning_rate": 0.0001925649216817807, |
| "loss": 1.1137, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.11918092026017826, |
| "grad_norm": 0.1259312629699707, |
| "learning_rate": 0.00019255461665292664, |
| "loss": 1.0409, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.11933509997590942, |
| "grad_norm": 0.1296795755624771, |
| "learning_rate": 0.00019254431162407255, |
| "loss": 1.0332, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.11948927969164057, |
| "grad_norm": 0.13372276723384857, |
| "learning_rate": 0.00019253400659521847, |
| "loss": 1.1087, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.11964345940737171, |
| "grad_norm": 0.14354725182056427, |
| "learning_rate": 0.00019252370156636438, |
| "loss": 1.0398, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.11979763912310287, |
| "grad_norm": 0.1378318965435028, |
| "learning_rate": 0.0001925133965375103, |
| "loss": 1.0542, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.11995181883883402, |
| "grad_norm": 0.12171255797147751, |
| "learning_rate": 0.00019250309150865624, |
| "loss": 1.0935, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.12010599855456516, |
| "grad_norm": 0.11905664205551147, |
| "learning_rate": 0.00019249278647980215, |
| "loss": 1.0097, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.12026017827029631, |
| "grad_norm": 0.12854760885238647, |
| "learning_rate": 0.00019248248145094807, |
| "loss": 1.1517, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.12041435798602747, |
| "grad_norm": 0.247908353805542, |
| "learning_rate": 0.00019247217642209398, |
| "loss": 1.0876, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.12056853770175861, |
| "grad_norm": 0.1441553235054016, |
| "learning_rate": 0.0001924618713932399, |
| "loss": 1.1414, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.12072271741748976, |
| "grad_norm": 0.13307887315750122, |
| "learning_rate": 0.00019245156636438584, |
| "loss": 1.1012, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.12087689713322092, |
| "grad_norm": 0.14192406833171844, |
| "learning_rate": 0.00019244126133553176, |
| "loss": 1.1418, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.12103107684895206, |
| "grad_norm": 0.11530864983797073, |
| "learning_rate": 0.00019243095630667767, |
| "loss": 1.0776, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.12118525656468321, |
| "grad_norm": 0.13385196030139923, |
| "learning_rate": 0.00019242065127782359, |
| "loss": 1.1311, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.12133943628041435, |
| "grad_norm": 0.1308089643716812, |
| "learning_rate": 0.0001924103462489695, |
| "loss": 1.0625, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.12149361599614551, |
| "grad_norm": 0.11851842701435089, |
| "learning_rate": 0.00019240004122011544, |
| "loss": 1.0182, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.12164779571187666, |
| "grad_norm": 0.2496737688779831, |
| "learning_rate": 0.00019238973619126136, |
| "loss": 1.0746, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.1218019754276078, |
| "grad_norm": 0.12962055206298828, |
| "learning_rate": 0.00019237943116240727, |
| "loss": 1.0245, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.12195615514333895, |
| "grad_norm": 0.13170978426933289, |
| "learning_rate": 0.0001923691261335532, |
| "loss": 0.9897, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.12211033485907011, |
| "grad_norm": 0.13226309418678284, |
| "learning_rate": 0.0001923588211046991, |
| "loss": 1.1035, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.12226451457480125, |
| "grad_norm": 0.11901077628135681, |
| "learning_rate": 0.00019234851607584502, |
| "loss": 1.0084, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.1224186942905324, |
| "grad_norm": 0.15274369716644287, |
| "learning_rate": 0.00019233821104699093, |
| "loss": 1.1436, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.12257287400626356, |
| "grad_norm": 0.11832466721534729, |
| "learning_rate": 0.00019232790601813685, |
| "loss": 1.0179, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.1227270537219947, |
| "grad_norm": 0.13038666546344757, |
| "learning_rate": 0.00019231760098928276, |
| "loss": 1.0779, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.12288123343772585, |
| "grad_norm": 0.12837626039981842, |
| "learning_rate": 0.00019230729596042868, |
| "loss": 1.1404, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.123035413153457, |
| "grad_norm": 0.1400509923696518, |
| "learning_rate": 0.00019229699093157462, |
| "loss": 1.1132, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.12318959286918815, |
| "grad_norm": 0.13757595419883728, |
| "learning_rate": 0.00019228668590272053, |
| "loss": 1.0816, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.1233437725849193, |
| "grad_norm": 0.12403321266174316, |
| "learning_rate": 0.00019227638087386645, |
| "loss": 1.039, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.1233437725849193, |
| "eval_loss": 1.0888522863388062, |
| "eval_runtime": 185.2371, |
| "eval_samples_per_second": 91.467, |
| "eval_steps_per_second": 1.431, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.12349795230065044, |
| "grad_norm": 0.12380605190992355, |
| "learning_rate": 0.00019226607584501236, |
| "loss": 1.0903, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.12365213201638159, |
| "grad_norm": 0.13564443588256836, |
| "learning_rate": 0.00019225577081615828, |
| "loss": 1.0768, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.12380631173211275, |
| "grad_norm": 0.1533685177564621, |
| "learning_rate": 0.00019224546578730422, |
| "loss": 1.0852, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.12396049144784389, |
| "grad_norm": 0.1163390502333641, |
| "learning_rate": 0.00019223516075845014, |
| "loss": 1.0574, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.12411467116357504, |
| "grad_norm": 0.13867324590682983, |
| "learning_rate": 0.00019222485572959605, |
| "loss": 1.0992, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.1242688508793062, |
| "grad_norm": 0.12759087979793549, |
| "learning_rate": 0.00019221455070074197, |
| "loss": 1.0738, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.12442303059503734, |
| "grad_norm": 0.1237189844250679, |
| "learning_rate": 0.00019220424567188788, |
| "loss": 1.0974, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.12457721031076849, |
| "grad_norm": 0.13331052660942078, |
| "learning_rate": 0.00019219394064303382, |
| "loss": 1.0917, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.12473139002649963, |
| "grad_norm": 0.1290212869644165, |
| "learning_rate": 0.00019218363561417974, |
| "loss": 1.0696, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.12488556974223079, |
| "grad_norm": 0.13309410214424133, |
| "learning_rate": 0.00019217333058532565, |
| "loss": 1.043, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.12503974945796192, |
| "grad_norm": 0.13453248143196106, |
| "learning_rate": 0.00019216302555647157, |
| "loss": 1.0435, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.1251939291736931, |
| "grad_norm": 0.11639372259378433, |
| "learning_rate": 0.00019215272052761748, |
| "loss": 1.0579, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.12534810888942424, |
| "grad_norm": 0.13231517374515533, |
| "learning_rate": 0.0001921424154987634, |
| "loss": 1.1268, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.1255022886051554, |
| "grad_norm": 0.1349351406097412, |
| "learning_rate": 0.00019213211046990934, |
| "loss": 1.1599, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.12565646832088653, |
| "grad_norm": 0.13710346817970276, |
| "learning_rate": 0.00019212180544105525, |
| "loss": 1.0866, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.12581064803661768, |
| "grad_norm": 0.14535072445869446, |
| "learning_rate": 0.00019211150041220117, |
| "loss": 1.0445, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.12596482775234882, |
| "grad_norm": 0.11799806356430054, |
| "learning_rate": 0.00019210119538334708, |
| "loss": 1.0525, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.12611900746807997, |
| "grad_norm": 0.13399624824523926, |
| "learning_rate": 0.000192090890354493, |
| "loss": 1.0246, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.12627318718381114, |
| "grad_norm": 0.14404788613319397, |
| "learning_rate": 0.00019208058532563894, |
| "loss": 1.0582, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.1264273668995423, |
| "grad_norm": 0.14395713806152344, |
| "learning_rate": 0.00019207028029678486, |
| "loss": 1.0686, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.12658154661527343, |
| "grad_norm": 0.13249294459819794, |
| "learning_rate": 0.00019205997526793077, |
| "loss": 1.1286, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.12673572633100458, |
| "grad_norm": 0.12791812419891357, |
| "learning_rate": 0.00019204967023907669, |
| "loss": 1.062, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.12688990604673572, |
| "grad_norm": 0.12210959941148758, |
| "learning_rate": 0.0001920393652102226, |
| "loss": 1.0419, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.12704408576246687, |
| "grad_norm": 0.13438813388347626, |
| "learning_rate": 0.00019202906018136852, |
| "loss": 1.0589, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.127198265478198, |
| "grad_norm": 0.12953762710094452, |
| "learning_rate": 0.00019201875515251443, |
| "loss": 1.0128, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.1273524451939292, |
| "grad_norm": 0.1318603903055191, |
| "learning_rate": 0.00019200845012366035, |
| "loss": 1.073, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.12750662490966033, |
| "grad_norm": 0.12956051528453827, |
| "learning_rate": 0.00019199814509480626, |
| "loss": 1.0489, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.12766080462539148, |
| "grad_norm": 0.13501368463039398, |
| "learning_rate": 0.00019198784006595218, |
| "loss": 1.0198, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.12781498434112262, |
| "grad_norm": 0.13902342319488525, |
| "learning_rate": 0.00019197753503709812, |
| "loss": 1.0512, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.12796916405685377, |
| "grad_norm": 0.15590503811836243, |
| "learning_rate": 0.00019196723000824403, |
| "loss": 1.1782, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.1281233437725849, |
| "grad_norm": 0.13954932987689972, |
| "learning_rate": 0.00019195692497938995, |
| "loss": 1.0421, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.12827752348831606, |
| "grad_norm": 0.11550859361886978, |
| "learning_rate": 0.00019194661995053586, |
| "loss": 1.086, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.1284317032040472, |
| "grad_norm": 0.12175869196653366, |
| "learning_rate": 0.00019193631492168178, |
| "loss": 1.0704, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.12858588291977838, |
| "grad_norm": 0.13503512740135193, |
| "learning_rate": 0.00019192600989282772, |
| "loss": 1.1166, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.12874006263550952, |
| "grad_norm": 0.12849009037017822, |
| "learning_rate": 0.00019191570486397363, |
| "loss": 1.0315, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.12889424235124067, |
| "grad_norm": 0.12484319508075714, |
| "learning_rate": 0.00019190539983511955, |
| "loss": 1.0737, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.1290484220669718, |
| "grad_norm": 0.1364014446735382, |
| "learning_rate": 0.00019189509480626546, |
| "loss": 1.0619, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.12920260178270296, |
| "grad_norm": 0.12930172681808472, |
| "learning_rate": 0.00019188478977741138, |
| "loss": 1.046, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.1293567814984341, |
| "grad_norm": 0.13860805332660675, |
| "learning_rate": 0.00019187448474855732, |
| "loss": 1.0832, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.12951096121416525, |
| "grad_norm": 0.1379111111164093, |
| "learning_rate": 0.00019186417971970324, |
| "loss": 1.1406, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.12966514092989642, |
| "grad_norm": 0.1349123865365982, |
| "learning_rate": 0.00019185387469084915, |
| "loss": 1.1055, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.12981932064562757, |
| "grad_norm": 0.13304142653942108, |
| "learning_rate": 0.00019184356966199507, |
| "loss": 1.0392, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.1299735003613587, |
| "grad_norm": 0.12159105390310287, |
| "learning_rate": 0.00019183326463314098, |
| "loss": 1.0548, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.13012768007708986, |
| "grad_norm": 0.12661418318748474, |
| "learning_rate": 0.00019182295960428692, |
| "loss": 1.0588, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.130281859792821, |
| "grad_norm": 0.13691510260105133, |
| "learning_rate": 0.00019181265457543284, |
| "loss": 1.0854, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.13043603950855215, |
| "grad_norm": 0.1401318609714508, |
| "learning_rate": 0.00019180234954657875, |
| "loss": 1.0864, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.1305902192242833, |
| "grad_norm": 0.1355384737253189, |
| "learning_rate": 0.00019179204451772467, |
| "loss": 1.058, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.13074439894001447, |
| "grad_norm": 0.13987474143505096, |
| "learning_rate": 0.00019178173948887058, |
| "loss": 1.06, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.1308985786557456, |
| "grad_norm": 0.14350661635398865, |
| "learning_rate": 0.0001917714344600165, |
| "loss": 1.0731, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.13105275837147676, |
| "grad_norm": 0.12443742901086807, |
| "learning_rate": 0.0001917611294311624, |
| "loss": 1.0987, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.13105275837147676, |
| "eval_loss": 1.0880467891693115, |
| "eval_runtime": 185.5457, |
| "eval_samples_per_second": 91.314, |
| "eval_steps_per_second": 1.428, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.1312069380872079, |
| "grad_norm": 0.10956554859876633, |
| "learning_rate": 0.00019175082440230833, |
| "loss": 1.0393, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.13136111780293905, |
| "grad_norm": 0.11846137791872025, |
| "learning_rate": 0.00019174051937345424, |
| "loss": 1.0998, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.1315152975186702, |
| "grad_norm": 0.11894328892230988, |
| "learning_rate": 0.00019173021434460016, |
| "loss": 1.1007, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.13166947723440134, |
| "grad_norm": 0.11090514808893204, |
| "learning_rate": 0.00019171990931574607, |
| "loss": 1.0343, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.1318236569501325, |
| "grad_norm": 0.1276719868183136, |
| "learning_rate": 0.000191709604286892, |
| "loss": 1.0392, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.13197783666586366, |
| "grad_norm": 0.12342885881662369, |
| "learning_rate": 0.00019169929925803793, |
| "loss": 1.063, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.1321320163815948, |
| "grad_norm": 0.1237882748246193, |
| "learning_rate": 0.00019168899422918384, |
| "loss": 1.0558, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.13228619609732595, |
| "grad_norm": 0.12958785891532898, |
| "learning_rate": 0.00019167868920032976, |
| "loss": 1.0493, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.1324403758130571, |
| "grad_norm": 0.1181110367178917, |
| "learning_rate": 0.00019166838417147567, |
| "loss": 1.0668, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.13259455552878824, |
| "grad_norm": 0.12053950875997543, |
| "learning_rate": 0.00019165807914262162, |
| "loss": 1.0392, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.13274873524451938, |
| "grad_norm": 0.11725175380706787, |
| "learning_rate": 0.00019164777411376753, |
| "loss": 1.0188, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.13290291496025053, |
| "grad_norm": 0.12475614994764328, |
| "learning_rate": 0.00019163746908491344, |
| "loss": 1.0134, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.1330570946759817, |
| "grad_norm": 0.1231207475066185, |
| "learning_rate": 0.00019162716405605936, |
| "loss": 1.0309, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.13321127439171285, |
| "grad_norm": 0.1269765943288803, |
| "learning_rate": 0.00019161685902720527, |
| "loss": 1.0918, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.133365454107444, |
| "grad_norm": 0.12103556841611862, |
| "learning_rate": 0.00019160655399835122, |
| "loss": 1.0453, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.13351963382317514, |
| "grad_norm": 0.12427771091461182, |
| "learning_rate": 0.00019159624896949713, |
| "loss": 1.1544, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.13367381353890628, |
| "grad_norm": 0.13416282832622528, |
| "learning_rate": 0.00019158594394064305, |
| "loss": 1.0941, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.13382799325463743, |
| "grad_norm": 0.13207705318927765, |
| "learning_rate": 0.00019157563891178896, |
| "loss": 1.0998, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.13398217297036857, |
| "grad_norm": 0.1436687856912613, |
| "learning_rate": 0.00019156533388293488, |
| "loss": 1.0723, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.13413635268609975, |
| "grad_norm": 0.1206304207444191, |
| "learning_rate": 0.00019155502885408082, |
| "loss": 1.0279, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.1342905324018309, |
| "grad_norm": 0.12685900926589966, |
| "learning_rate": 0.00019154472382522673, |
| "loss": 1.0683, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.13444471211756204, |
| "grad_norm": 0.12833228707313538, |
| "learning_rate": 0.00019153441879637265, |
| "loss": 1.0904, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.13459889183329318, |
| "grad_norm": 0.12999312579631805, |
| "learning_rate": 0.00019152411376751856, |
| "loss": 1.0492, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.13475307154902433, |
| "grad_norm": 0.13486912846565247, |
| "learning_rate": 0.00019151380873866448, |
| "loss": 1.101, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.13490725126475547, |
| "grad_norm": 0.12793023884296417, |
| "learning_rate": 0.0001915035037098104, |
| "loss": 1.1135, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.13506143098048662, |
| "grad_norm": 0.12652675807476044, |
| "learning_rate": 0.0001914931986809563, |
| "loss": 1.0902, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.1352156106962178, |
| "grad_norm": 0.12431836873292923, |
| "learning_rate": 0.00019148289365210222, |
| "loss": 1.0922, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.13536979041194894, |
| "grad_norm": 0.13665209710597992, |
| "learning_rate": 0.00019147258862324814, |
| "loss": 1.0584, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.13552397012768008, |
| "grad_norm": 0.1355196088552475, |
| "learning_rate": 0.00019146228359439405, |
| "loss": 1.1199, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.13567814984341123, |
| "grad_norm": 0.14115893840789795, |
| "learning_rate": 0.00019145197856554, |
| "loss": 1.0697, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.13583232955914237, |
| "grad_norm": 0.13009534776210785, |
| "learning_rate": 0.0001914416735366859, |
| "loss": 1.1111, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.13598650927487352, |
| "grad_norm": 0.12280994653701782, |
| "learning_rate": 0.00019143136850783182, |
| "loss": 1.0341, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.13614068899060466, |
| "grad_norm": 0.15171582996845245, |
| "learning_rate": 0.00019142106347897774, |
| "loss": 1.1275, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.1362948687063358, |
| "grad_norm": 0.15258526802062988, |
| "learning_rate": 0.00019141075845012365, |
| "loss": 1.0513, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.13644904842206698, |
| "grad_norm": 0.132346972823143, |
| "learning_rate": 0.0001914004534212696, |
| "loss": 1.0878, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.13660322813779813, |
| "grad_norm": 0.13237041234970093, |
| "learning_rate": 0.0001913901483924155, |
| "loss": 1.0845, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.13675740785352927, |
| "grad_norm": 0.13837209343910217, |
| "learning_rate": 0.00019137984336356143, |
| "loss": 1.1221, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.13691158756926042, |
| "grad_norm": 0.17590375244617462, |
| "learning_rate": 0.00019136953833470734, |
| "loss": 1.1963, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.13706576728499156, |
| "grad_norm": 0.12898488342761993, |
| "learning_rate": 0.00019135923330585326, |
| "loss": 1.1306, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.1372199470007227, |
| "grad_norm": 0.12428785115480423, |
| "learning_rate": 0.0001913489282769992, |
| "loss": 1.068, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.13737412671645385, |
| "grad_norm": 0.12678809463977814, |
| "learning_rate": 0.0001913386232481451, |
| "loss": 1.0709, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.13752830643218503, |
| "grad_norm": 0.1344168782234192, |
| "learning_rate": 0.00019132831821929103, |
| "loss": 1.1073, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.13768248614791617, |
| "grad_norm": 0.14730733633041382, |
| "learning_rate": 0.00019131801319043694, |
| "loss": 1.0073, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.13783666586364732, |
| "grad_norm": 0.13661792874336243, |
| "learning_rate": 0.00019130770816158286, |
| "loss": 1.0637, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.13799084557937846, |
| "grad_norm": 0.1342434138059616, |
| "learning_rate": 0.0001912974031327288, |
| "loss": 1.1069, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.1381450252951096, |
| "grad_norm": 0.11941581219434738, |
| "learning_rate": 0.00019128709810387471, |
| "loss": 1.1023, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.13829920501084075, |
| "grad_norm": 0.13641759753227234, |
| "learning_rate": 0.00019127679307502063, |
| "loss": 1.0564, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.1384533847265719, |
| "grad_norm": 0.11148608475923538, |
| "learning_rate": 0.00019126648804616654, |
| "loss": 1.0255, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.13860756444230307, |
| "grad_norm": 0.1387186199426651, |
| "learning_rate": 0.00019125618301731246, |
| "loss": 1.0663, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.13876174415803422, |
| "grad_norm": 0.12380651384592056, |
| "learning_rate": 0.00019124587798845837, |
| "loss": 1.1222, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.13876174415803422, |
| "eval_loss": 1.0875153541564941, |
| "eval_runtime": 185.4605, |
| "eval_samples_per_second": 91.356, |
| "eval_steps_per_second": 1.429, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.13891592387376536, |
| "grad_norm": 0.13224369287490845, |
| "learning_rate": 0.00019123557295960432, |
| "loss": 1.0821, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.1390701035894965, |
| "grad_norm": 0.13096244633197784, |
| "learning_rate": 0.00019122526793075023, |
| "loss": 1.0097, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.13922428330522765, |
| "grad_norm": 0.11652527749538422, |
| "learning_rate": 0.00019121496290189615, |
| "loss": 1.0517, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.1393784630209588, |
| "grad_norm": 0.13449358940124512, |
| "learning_rate": 0.00019120465787304206, |
| "loss": 1.0915, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.13953264273668994, |
| "grad_norm": 0.11550068855285645, |
| "learning_rate": 0.00019119435284418798, |
| "loss": 1.0568, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.13968682245242112, |
| "grad_norm": 0.13804587721824646, |
| "learning_rate": 0.0001911840478153339, |
| "loss": 1.0933, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.13984100216815226, |
| "grad_norm": 0.12062159180641174, |
| "learning_rate": 0.0001911737427864798, |
| "loss": 1.0517, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.1399951818838834, |
| "grad_norm": 0.12154779583215714, |
| "learning_rate": 0.00019116343775762572, |
| "loss": 1.0955, |
| "step": 1816 |
| }, |
| { |
| "epoch": 0.14014936159961455, |
| "grad_norm": 0.11615799367427826, |
| "learning_rate": 0.00019115313272877164, |
| "loss": 0.968, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.1403035413153457, |
| "grad_norm": 0.1207037940621376, |
| "learning_rate": 0.00019114282769991755, |
| "loss": 1.0896, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.14045772103107684, |
| "grad_norm": 0.12750887870788574, |
| "learning_rate": 0.0001911325226710635, |
| "loss": 1.065, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.140611900746808, |
| "grad_norm": 0.16391952335834503, |
| "learning_rate": 0.0001911222176422094, |
| "loss": 1.0232, |
| "step": 1824 |
| }, |
| { |
| "epoch": 0.14076608046253913, |
| "grad_norm": 0.14626921713352203, |
| "learning_rate": 0.00019111191261335532, |
| "loss": 1.0375, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.1409202601782703, |
| "grad_norm": 0.12393996119499207, |
| "learning_rate": 0.00019110160758450124, |
| "loss": 1.0345, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.14107443989400145, |
| "grad_norm": 0.13275925815105438, |
| "learning_rate": 0.00019109130255564715, |
| "loss": 1.071, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.1412286196097326, |
| "grad_norm": 0.1255485862493515, |
| "learning_rate": 0.0001910809975267931, |
| "loss": 1.1026, |
| "step": 1832 |
| }, |
| { |
| "epoch": 0.14138279932546374, |
| "grad_norm": 0.13399668037891388, |
| "learning_rate": 0.000191070692497939, |
| "loss": 1.11, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.1415369790411949, |
| "grad_norm": 0.13084925711154938, |
| "learning_rate": 0.00019106038746908492, |
| "loss": 1.0528, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.14169115875692603, |
| "grad_norm": 0.15695689618587494, |
| "learning_rate": 0.00019105008244023084, |
| "loss": 1.1336, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.14184533847265718, |
| "grad_norm": 0.13630808889865875, |
| "learning_rate": 0.00019103977741137675, |
| "loss": 1.0767, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.14199951818838835, |
| "grad_norm": 0.11874844878911972, |
| "learning_rate": 0.0001910294723825227, |
| "loss": 1.0511, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.1421536979041195, |
| "grad_norm": 0.11898507922887802, |
| "learning_rate": 0.0001910191673536686, |
| "loss": 1.0866, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.14230787761985064, |
| "grad_norm": 0.1393211930990219, |
| "learning_rate": 0.00019100886232481453, |
| "loss": 1.0553, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.1424620573355818, |
| "grad_norm": 0.1382310539484024, |
| "learning_rate": 0.00019099855729596044, |
| "loss": 1.07, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.14261623705131293, |
| "grad_norm": 0.1471824198961258, |
| "learning_rate": 0.00019098825226710636, |
| "loss": 1.0893, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.14277041676704408, |
| "grad_norm": 0.12706084549427032, |
| "learning_rate": 0.0001909779472382523, |
| "loss": 1.0848, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.14292459648277522, |
| "grad_norm": 0.1324569135904312, |
| "learning_rate": 0.0001909676422093982, |
| "loss": 1.024, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.1430787761985064, |
| "grad_norm": 0.11245544254779816, |
| "learning_rate": 0.00019095733718054413, |
| "loss": 1.0802, |
| "step": 1856 |
| }, |
| { |
| "epoch": 0.14323295591423754, |
| "grad_norm": 0.15419217944145203, |
| "learning_rate": 0.00019094703215169004, |
| "loss": 1.1101, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.1433871356299687, |
| "grad_norm": 0.1071443036198616, |
| "learning_rate": 0.00019093672712283596, |
| "loss": 1.0576, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.14354131534569983, |
| "grad_norm": 0.1341090053319931, |
| "learning_rate": 0.00019092642209398187, |
| "loss": 1.0606, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.14369549506143098, |
| "grad_norm": 0.11848092079162598, |
| "learning_rate": 0.0001909161170651278, |
| "loss": 1.0714, |
| "step": 1864 |
| }, |
| { |
| "epoch": 0.14384967477716212, |
| "grad_norm": 0.12697815895080566, |
| "learning_rate": 0.0001909058120362737, |
| "loss": 1.092, |
| "step": 1866 |
| }, |
| { |
| "epoch": 0.14400385449289327, |
| "grad_norm": 0.11891257762908936, |
| "learning_rate": 0.00019089550700741962, |
| "loss": 0.9649, |
| "step": 1868 |
| }, |
| { |
| "epoch": 0.14415803420862444, |
| "grad_norm": 0.12616439163684845, |
| "learning_rate": 0.00019088520197856553, |
| "loss": 1.0962, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.1443122139243556, |
| "grad_norm": 0.12141067534685135, |
| "learning_rate": 0.00019087489694971147, |
| "loss": 1.0838, |
| "step": 1872 |
| }, |
| { |
| "epoch": 0.14446639364008673, |
| "grad_norm": 0.13279564678668976, |
| "learning_rate": 0.0001908645919208574, |
| "loss": 1.0484, |
| "step": 1874 |
| }, |
| { |
| "epoch": 0.14462057335581788, |
| "grad_norm": 0.15748505294322968, |
| "learning_rate": 0.0001908542868920033, |
| "loss": 1.1433, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.14477475307154902, |
| "grad_norm": 0.11593475937843323, |
| "learning_rate": 0.00019084398186314922, |
| "loss": 1.1483, |
| "step": 1878 |
| }, |
| { |
| "epoch": 0.14492893278728017, |
| "grad_norm": 0.14499489963054657, |
| "learning_rate": 0.00019083367683429513, |
| "loss": 1.0782, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.1450831125030113, |
| "grad_norm": 0.13570410013198853, |
| "learning_rate": 0.00019082337180544105, |
| "loss": 1.0989, |
| "step": 1882 |
| }, |
| { |
| "epoch": 0.14523729221874246, |
| "grad_norm": 0.12810774147510529, |
| "learning_rate": 0.000190813066776587, |
| "loss": 1.0374, |
| "step": 1884 |
| }, |
| { |
| "epoch": 0.14539147193447363, |
| "grad_norm": 0.11781581491231918, |
| "learning_rate": 0.0001908027617477329, |
| "loss": 1.0796, |
| "step": 1886 |
| }, |
| { |
| "epoch": 0.14554565165020478, |
| "grad_norm": 0.12243229150772095, |
| "learning_rate": 0.00019079245671887882, |
| "loss": 1.0477, |
| "step": 1888 |
| }, |
| { |
| "epoch": 0.14569983136593592, |
| "grad_norm": 0.1385030299425125, |
| "learning_rate": 0.00019078215169002474, |
| "loss": 1.0349, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.14585401108166707, |
| "grad_norm": 0.12011386454105377, |
| "learning_rate": 0.00019077184666117065, |
| "loss": 1.0718, |
| "step": 1892 |
| }, |
| { |
| "epoch": 0.1460081907973982, |
| "grad_norm": 0.12646062672138214, |
| "learning_rate": 0.0001907615416323166, |
| "loss": 1.1228, |
| "step": 1894 |
| }, |
| { |
| "epoch": 0.14616237051312936, |
| "grad_norm": 0.1284620612859726, |
| "learning_rate": 0.0001907512366034625, |
| "loss": 1.079, |
| "step": 1896 |
| }, |
| { |
| "epoch": 0.1463165502288605, |
| "grad_norm": 0.15374581515789032, |
| "learning_rate": 0.00019074093157460842, |
| "loss": 1.1147, |
| "step": 1898 |
| }, |
| { |
| "epoch": 0.14647072994459168, |
| "grad_norm": 0.1325882524251938, |
| "learning_rate": 0.00019073062654575434, |
| "loss": 1.0404, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.14647072994459168, |
| "eval_loss": 1.0869932174682617, |
| "eval_runtime": 185.4754, |
| "eval_samples_per_second": 91.349, |
| "eval_steps_per_second": 1.429, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.14662490966032282, |
| "grad_norm": 0.14041611552238464, |
| "learning_rate": 0.00019072032151690025, |
| "loss": 1.095, |
| "step": 1902 |
| }, |
| { |
| "epoch": 0.14677908937605397, |
| "grad_norm": 0.14162160456180573, |
| "learning_rate": 0.0001907100164880462, |
| "loss": 1.1714, |
| "step": 1904 |
| }, |
| { |
| "epoch": 0.1469332690917851, |
| "grad_norm": 0.12077832221984863, |
| "learning_rate": 0.0001906997114591921, |
| "loss": 1.1109, |
| "step": 1906 |
| }, |
| { |
| "epoch": 0.14708744880751626, |
| "grad_norm": 0.1738968789577484, |
| "learning_rate": 0.00019068940643033802, |
| "loss": 1.0838, |
| "step": 1908 |
| }, |
| { |
| "epoch": 0.1472416285232474, |
| "grad_norm": 0.13948039710521698, |
| "learning_rate": 0.00019067910140148394, |
| "loss": 1.0494, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.14739580823897855, |
| "grad_norm": 0.21179239451885223, |
| "learning_rate": 0.00019066879637262985, |
| "loss": 1.0962, |
| "step": 1912 |
| }, |
| { |
| "epoch": 0.14754998795470972, |
| "grad_norm": 0.12927787005901337, |
| "learning_rate": 0.00019065849134377577, |
| "loss": 1.1113, |
| "step": 1914 |
| }, |
| { |
| "epoch": 0.14770416767044087, |
| "grad_norm": 0.1296701431274414, |
| "learning_rate": 0.00019064818631492168, |
| "loss": 1.0603, |
| "step": 1916 |
| }, |
| { |
| "epoch": 0.147858347386172, |
| "grad_norm": 0.1282590925693512, |
| "learning_rate": 0.0001906378812860676, |
| "loss": 1.0594, |
| "step": 1918 |
| }, |
| { |
| "epoch": 0.14801252710190316, |
| "grad_norm": 0.13304758071899414, |
| "learning_rate": 0.0001906275762572135, |
| "loss": 1.0784, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.1481667068176343, |
| "grad_norm": 0.15661965310573578, |
| "learning_rate": 0.00019061727122835943, |
| "loss": 1.008, |
| "step": 1922 |
| }, |
| { |
| "epoch": 0.14832088653336545, |
| "grad_norm": 0.12986873090267181, |
| "learning_rate": 0.00019060696619950537, |
| "loss": 1.0788, |
| "step": 1924 |
| }, |
| { |
| "epoch": 0.1484750662490966, |
| "grad_norm": 0.1128251776099205, |
| "learning_rate": 0.00019059666117065128, |
| "loss": 1.1449, |
| "step": 1926 |
| }, |
| { |
| "epoch": 0.14862924596482774, |
| "grad_norm": 0.13722160458564758, |
| "learning_rate": 0.0001905863561417972, |
| "loss": 1.0914, |
| "step": 1928 |
| }, |
| { |
| "epoch": 0.1487834256805589, |
| "grad_norm": 0.1507786512374878, |
| "learning_rate": 0.00019057605111294311, |
| "loss": 1.0694, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.14893760539629006, |
| "grad_norm": 0.1368752121925354, |
| "learning_rate": 0.00019056574608408903, |
| "loss": 1.0417, |
| "step": 1932 |
| }, |
| { |
| "epoch": 0.1490917851120212, |
| "grad_norm": 0.12566259503364563, |
| "learning_rate": 0.00019055544105523497, |
| "loss": 1.0853, |
| "step": 1934 |
| }, |
| { |
| "epoch": 0.14924596482775235, |
| "grad_norm": 0.12362397462129593, |
| "learning_rate": 0.0001905451360263809, |
| "loss": 1.1136, |
| "step": 1936 |
| }, |
| { |
| "epoch": 0.1494001445434835, |
| "grad_norm": 0.12472514808177948, |
| "learning_rate": 0.0001905348309975268, |
| "loss": 1.0628, |
| "step": 1938 |
| }, |
| { |
| "epoch": 0.14955432425921464, |
| "grad_norm": 0.1355161964893341, |
| "learning_rate": 0.00019052452596867272, |
| "loss": 1.1211, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.14970850397494578, |
| "grad_norm": 0.13438721001148224, |
| "learning_rate": 0.00019051422093981863, |
| "loss": 1.0758, |
| "step": 1942 |
| }, |
| { |
| "epoch": 0.14986268369067696, |
| "grad_norm": 0.11768204718828201, |
| "learning_rate": 0.00019050391591096457, |
| "loss": 1.0533, |
| "step": 1944 |
| }, |
| { |
| "epoch": 0.1500168634064081, |
| "grad_norm": 0.13892577588558197, |
| "learning_rate": 0.0001904936108821105, |
| "loss": 1.1076, |
| "step": 1946 |
| }, |
| { |
| "epoch": 0.15017104312213925, |
| "grad_norm": 0.1532358080148697, |
| "learning_rate": 0.0001904833058532564, |
| "loss": 1.0706, |
| "step": 1948 |
| }, |
| { |
| "epoch": 0.1503252228378704, |
| "grad_norm": 0.13364464044570923, |
| "learning_rate": 0.00019047300082440232, |
| "loss": 1.1322, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.15047940255360154, |
| "grad_norm": 0.12663134932518005, |
| "learning_rate": 0.00019046269579554823, |
| "loss": 1.0749, |
| "step": 1952 |
| }, |
| { |
| "epoch": 0.15063358226933268, |
| "grad_norm": 0.1297607123851776, |
| "learning_rate": 0.00019045239076669417, |
| "loss": 1.0594, |
| "step": 1954 |
| }, |
| { |
| "epoch": 0.15078776198506383, |
| "grad_norm": 0.11931920051574707, |
| "learning_rate": 0.0001904420857378401, |
| "loss": 1.0522, |
| "step": 1956 |
| }, |
| { |
| "epoch": 0.150941941700795, |
| "grad_norm": 0.1334810107946396, |
| "learning_rate": 0.000190431780708986, |
| "loss": 1.0674, |
| "step": 1958 |
| }, |
| { |
| "epoch": 0.15109612141652615, |
| "grad_norm": 0.12633340060710907, |
| "learning_rate": 0.00019042147568013192, |
| "loss": 1.0139, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.1512503011322573, |
| "grad_norm": 0.12485836446285248, |
| "learning_rate": 0.00019041117065127783, |
| "loss": 1.0288, |
| "step": 1962 |
| }, |
| { |
| "epoch": 0.15140448084798844, |
| "grad_norm": 0.10940799117088318, |
| "learning_rate": 0.00019040086562242375, |
| "loss": 1.0475, |
| "step": 1964 |
| }, |
| { |
| "epoch": 0.15155866056371958, |
| "grad_norm": 0.12229325622320175, |
| "learning_rate": 0.00019039056059356966, |
| "loss": 1.0628, |
| "step": 1966 |
| }, |
| { |
| "epoch": 0.15171284027945073, |
| "grad_norm": 0.14333505928516388, |
| "learning_rate": 0.00019038025556471558, |
| "loss": 1.0423, |
| "step": 1968 |
| }, |
| { |
| "epoch": 0.15186701999518187, |
| "grad_norm": 0.12773017585277557, |
| "learning_rate": 0.0001903699505358615, |
| "loss": 1.1283, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.15202119971091305, |
| "grad_norm": 0.11913473904132843, |
| "learning_rate": 0.0001903596455070074, |
| "loss": 1.0646, |
| "step": 1972 |
| }, |
| { |
| "epoch": 0.1521753794266442, |
| "grad_norm": 0.13321518898010254, |
| "learning_rate": 0.00019034934047815332, |
| "loss": 1.0476, |
| "step": 1974 |
| }, |
| { |
| "epoch": 0.15232955914237534, |
| "grad_norm": 0.1362799108028412, |
| "learning_rate": 0.00019033903544929927, |
| "loss": 1.0937, |
| "step": 1976 |
| }, |
| { |
| "epoch": 0.15248373885810648, |
| "grad_norm": 0.13804180920124054, |
| "learning_rate": 0.00019032873042044518, |
| "loss": 1.113, |
| "step": 1978 |
| }, |
| { |
| "epoch": 0.15263791857383763, |
| "grad_norm": 0.1774570494890213, |
| "learning_rate": 0.0001903184253915911, |
| "loss": 1.0795, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.15279209828956877, |
| "grad_norm": 0.13106994330883026, |
| "learning_rate": 0.000190308120362737, |
| "loss": 1.098, |
| "step": 1982 |
| }, |
| { |
| "epoch": 0.15294627800529992, |
| "grad_norm": 0.14435411989688873, |
| "learning_rate": 0.00019029781533388293, |
| "loss": 1.0814, |
| "step": 1984 |
| }, |
| { |
| "epoch": 0.15310045772103106, |
| "grad_norm": 0.13178013265132904, |
| "learning_rate": 0.00019028751030502887, |
| "loss": 1.1002, |
| "step": 1986 |
| }, |
| { |
| "epoch": 0.15325463743676224, |
| "grad_norm": 0.1283218264579773, |
| "learning_rate": 0.00019027720527617478, |
| "loss": 1.0749, |
| "step": 1988 |
| }, |
| { |
| "epoch": 0.15340881715249338, |
| "grad_norm": 0.12113723158836365, |
| "learning_rate": 0.0001902669002473207, |
| "loss": 1.0831, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.15356299686822453, |
| "grad_norm": 0.12649892270565033, |
| "learning_rate": 0.0001902565952184666, |
| "loss": 1.0166, |
| "step": 1992 |
| }, |
| { |
| "epoch": 0.15371717658395567, |
| "grad_norm": 0.12823793292045593, |
| "learning_rate": 0.00019024629018961253, |
| "loss": 1.0273, |
| "step": 1994 |
| }, |
| { |
| "epoch": 0.15387135629968682, |
| "grad_norm": 0.1291527897119522, |
| "learning_rate": 0.00019023598516075847, |
| "loss": 1.1092, |
| "step": 1996 |
| }, |
| { |
| "epoch": 0.15402553601541796, |
| "grad_norm": 0.12588894367218018, |
| "learning_rate": 0.00019022568013190438, |
| "loss": 1.0627, |
| "step": 1998 |
| }, |
| { |
| "epoch": 0.1541797157311491, |
| "grad_norm": 0.12996312975883484, |
| "learning_rate": 0.0001902153751030503, |
| "loss": 1.1196, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.1541797157311491, |
| "eval_loss": 1.0863893032073975, |
| "eval_runtime": 185.3254, |
| "eval_samples_per_second": 91.423, |
| "eval_steps_per_second": 1.43, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.15433389544688028, |
| "grad_norm": 0.14361834526062012, |
| "learning_rate": 0.00019020507007419621, |
| "loss": 1.1151, |
| "step": 2002 |
| }, |
| { |
| "epoch": 0.15448807516261143, |
| "grad_norm": 0.12650837004184723, |
| "learning_rate": 0.00019019476504534213, |
| "loss": 1.1155, |
| "step": 2004 |
| }, |
| { |
| "epoch": 0.15464225487834257, |
| "grad_norm": 0.13820499181747437, |
| "learning_rate": 0.00019018446001648807, |
| "loss": 1.1243, |
| "step": 2006 |
| }, |
| { |
| "epoch": 0.15479643459407372, |
| "grad_norm": 0.13205693662166595, |
| "learning_rate": 0.00019017415498763399, |
| "loss": 1.0626, |
| "step": 2008 |
| }, |
| { |
| "epoch": 0.15495061430980486, |
| "grad_norm": 0.13930106163024902, |
| "learning_rate": 0.0001901638499587799, |
| "loss": 1.1105, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.155104794025536, |
| "grad_norm": 0.14711922407150269, |
| "learning_rate": 0.00019015354492992582, |
| "loss": 1.0556, |
| "step": 2012 |
| }, |
| { |
| "epoch": 0.15525897374126715, |
| "grad_norm": 0.11909156292676926, |
| "learning_rate": 0.00019014323990107173, |
| "loss": 1.1025, |
| "step": 2014 |
| }, |
| { |
| "epoch": 0.15541315345699833, |
| "grad_norm": 0.14099714159965515, |
| "learning_rate": 0.00019013293487221767, |
| "loss": 1.064, |
| "step": 2016 |
| }, |
| { |
| "epoch": 0.15556733317272947, |
| "grad_norm": 0.11500216275453568, |
| "learning_rate": 0.0001901226298433636, |
| "loss": 1.1196, |
| "step": 2018 |
| }, |
| { |
| "epoch": 0.15572151288846062, |
| "grad_norm": 0.12341683357954025, |
| "learning_rate": 0.0001901123248145095, |
| "loss": 1.0625, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.15587569260419176, |
| "grad_norm": 0.1390669196844101, |
| "learning_rate": 0.00019010201978565542, |
| "loss": 1.0526, |
| "step": 2022 |
| }, |
| { |
| "epoch": 0.1560298723199229, |
| "grad_norm": 0.13482992351055145, |
| "learning_rate": 0.00019009171475680133, |
| "loss": 1.1074, |
| "step": 2024 |
| }, |
| { |
| "epoch": 0.15618405203565405, |
| "grad_norm": 0.12277045845985413, |
| "learning_rate": 0.00019008140972794725, |
| "loss": 1.0648, |
| "step": 2026 |
| }, |
| { |
| "epoch": 0.1563382317513852, |
| "grad_norm": 0.13579949736595154, |
| "learning_rate": 0.00019007110469909316, |
| "loss": 1.1235, |
| "step": 2028 |
| }, |
| { |
| "epoch": 0.15649241146711637, |
| "grad_norm": 0.14128637313842773, |
| "learning_rate": 0.00019006079967023908, |
| "loss": 1.0442, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.15664659118284752, |
| "grad_norm": 0.13722474873065948, |
| "learning_rate": 0.000190050494641385, |
| "loss": 1.1215, |
| "step": 2032 |
| }, |
| { |
| "epoch": 0.15680077089857866, |
| "grad_norm": 0.13500674068927765, |
| "learning_rate": 0.0001900401896125309, |
| "loss": 1.0776, |
| "step": 2034 |
| }, |
| { |
| "epoch": 0.1569549506143098, |
| "grad_norm": 0.11917294561862946, |
| "learning_rate": 0.00019002988458367685, |
| "loss": 1.0698, |
| "step": 2036 |
| }, |
| { |
| "epoch": 0.15710913033004095, |
| "grad_norm": 0.12245581299066544, |
| "learning_rate": 0.00019001957955482276, |
| "loss": 1.0166, |
| "step": 2038 |
| }, |
| { |
| "epoch": 0.1572633100457721, |
| "grad_norm": 0.12556669116020203, |
| "learning_rate": 0.00019000927452596868, |
| "loss": 1.0846, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.15741748976150324, |
| "grad_norm": 0.13316373527050018, |
| "learning_rate": 0.0001899989694971146, |
| "loss": 1.0566, |
| "step": 2042 |
| }, |
| { |
| "epoch": 0.1575716694772344, |
| "grad_norm": 0.1296815425157547, |
| "learning_rate": 0.0001899886644682605, |
| "loss": 1.0824, |
| "step": 2044 |
| }, |
| { |
| "epoch": 0.15772584919296556, |
| "grad_norm": 0.1288246214389801, |
| "learning_rate": 0.00018997835943940645, |
| "loss": 1.0974, |
| "step": 2046 |
| }, |
| { |
| "epoch": 0.1578800289086967, |
| "grad_norm": 0.1185479462146759, |
| "learning_rate": 0.00018996805441055237, |
| "loss": 1.1443, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.15803420862442785, |
| "grad_norm": 0.12504369020462036, |
| "learning_rate": 0.00018995774938169828, |
| "loss": 1.0899, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.158188388340159, |
| "grad_norm": 0.1266452521085739, |
| "learning_rate": 0.0001899474443528442, |
| "loss": 1.0654, |
| "step": 2052 |
| }, |
| { |
| "epoch": 0.15834256805589014, |
| "grad_norm": 0.13447126746177673, |
| "learning_rate": 0.0001899371393239901, |
| "loss": 1.0649, |
| "step": 2054 |
| }, |
| { |
| "epoch": 0.1584967477716213, |
| "grad_norm": 0.1446131467819214, |
| "learning_rate": 0.00018992683429513603, |
| "loss": 1.1439, |
| "step": 2056 |
| }, |
| { |
| "epoch": 0.15865092748735243, |
| "grad_norm": 0.12688389420509338, |
| "learning_rate": 0.00018991652926628197, |
| "loss": 1.0262, |
| "step": 2058 |
| }, |
| { |
| "epoch": 0.1588051072030836, |
| "grad_norm": 0.12581713497638702, |
| "learning_rate": 0.00018990622423742788, |
| "loss": 1.0723, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.15895928691881475, |
| "grad_norm": 0.15745951235294342, |
| "learning_rate": 0.0001898959192085738, |
| "loss": 1.1038, |
| "step": 2062 |
| }, |
| { |
| "epoch": 0.1591134666345459, |
| "grad_norm": 0.14457587897777557, |
| "learning_rate": 0.0001898856141797197, |
| "loss": 1.1072, |
| "step": 2064 |
| }, |
| { |
| "epoch": 0.15926764635027704, |
| "grad_norm": 0.11454683542251587, |
| "learning_rate": 0.00018987530915086563, |
| "loss": 1.0605, |
| "step": 2066 |
| }, |
| { |
| "epoch": 0.1594218260660082, |
| "grad_norm": 0.1137547716498375, |
| "learning_rate": 0.00018986500412201157, |
| "loss": 1.0405, |
| "step": 2068 |
| }, |
| { |
| "epoch": 0.15957600578173933, |
| "grad_norm": 0.1220378428697586, |
| "learning_rate": 0.00018985469909315748, |
| "loss": 1.086, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.15973018549747048, |
| "grad_norm": 0.13579098880290985, |
| "learning_rate": 0.0001898443940643034, |
| "loss": 1.0334, |
| "step": 2072 |
| }, |
| { |
| "epoch": 0.15988436521320165, |
| "grad_norm": 0.1529407948255539, |
| "learning_rate": 0.00018983408903544931, |
| "loss": 1.0614, |
| "step": 2074 |
| }, |
| { |
| "epoch": 0.1600385449289328, |
| "grad_norm": 0.13769444823265076, |
| "learning_rate": 0.00018982378400659523, |
| "loss": 1.1212, |
| "step": 2076 |
| }, |
| { |
| "epoch": 0.16019272464466394, |
| "grad_norm": 0.12095335125923157, |
| "learning_rate": 0.00018981347897774114, |
| "loss": 1.047, |
| "step": 2078 |
| }, |
| { |
| "epoch": 0.1603469043603951, |
| "grad_norm": 0.12483233958482742, |
| "learning_rate": 0.00018980317394888706, |
| "loss": 1.0808, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.16050108407612623, |
| "grad_norm": 0.12451382726430893, |
| "learning_rate": 0.00018979286892003297, |
| "loss": 1.1259, |
| "step": 2082 |
| }, |
| { |
| "epoch": 0.16065526379185738, |
| "grad_norm": 0.12540730834007263, |
| "learning_rate": 0.0001897825638911789, |
| "loss": 1.0761, |
| "step": 2084 |
| }, |
| { |
| "epoch": 0.16080944350758852, |
| "grad_norm": 0.12948516011238098, |
| "learning_rate": 0.0001897722588623248, |
| "loss": 1.0621, |
| "step": 2086 |
| }, |
| { |
| "epoch": 0.16096362322331967, |
| "grad_norm": 0.1349886953830719, |
| "learning_rate": 0.00018976195383347075, |
| "loss": 1.0549, |
| "step": 2088 |
| }, |
| { |
| "epoch": 0.16111780293905084, |
| "grad_norm": 0.1249813437461853, |
| "learning_rate": 0.00018975164880461666, |
| "loss": 1.0828, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.161271982654782, |
| "grad_norm": 0.1299104243516922, |
| "learning_rate": 0.00018974134377576258, |
| "loss": 1.097, |
| "step": 2092 |
| }, |
| { |
| "epoch": 0.16142616237051313, |
| "grad_norm": 0.13004744052886963, |
| "learning_rate": 0.0001897310387469085, |
| "loss": 1.0417, |
| "step": 2094 |
| }, |
| { |
| "epoch": 0.16158034208624428, |
| "grad_norm": 0.11553830653429031, |
| "learning_rate": 0.0001897207337180544, |
| "loss": 1.0563, |
| "step": 2096 |
| }, |
| { |
| "epoch": 0.16173452180197542, |
| "grad_norm": 0.12000396102666855, |
| "learning_rate": 0.00018971042868920035, |
| "loss": 1.077, |
| "step": 2098 |
| }, |
| { |
| "epoch": 0.16188870151770657, |
| "grad_norm": 0.13707685470581055, |
| "learning_rate": 0.00018970012366034626, |
| "loss": 1.0994, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.16188870151770657, |
| "eval_loss": 1.0858707427978516, |
| "eval_runtime": 185.7188, |
| "eval_samples_per_second": 91.229, |
| "eval_steps_per_second": 1.427, |
| "step": 2100 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 38916, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 3, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.132999221824717e+18, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|