gated-david / weights /checkpoint_epoch_20_metadata.json
AbstractPhil's picture
Upload weights and configs - Run 20251012_041353
0311dd6 verified
{
"epoch": 19,
"optimizer_state_dict": {
"state": {
"0": {
"step": "tensor(25040.)",
"exp_avg": "tensor([[ 4.7855e-07, -2.8362e-05, 6.1365e-06, ..., 2.3855e-06,\n -7.0232e-06, 6.6765e-07],\n [-7.8494e-06, 2.6277e-05, -2.8105e-06, ..., 1.4638e-05,\n -5.1096e-06, -1.3632e-06],\n [ 1.9106e-06, 2.9793e-06, 5.8921e-07, ..., -8.6641e-07,\n -3.0229e-06, -1.1548e-07],\n ...,\n [ 1.2039e-05, 2.1023e-05, 2.6477e-05, ..., -8.6529e-06,\n 9.0014e-06, 3.5765e-05],\n [-2.8159e-05, 2.4238e-05, 9.9622e-06, ..., 1.3619e-05,\n -6.8560e-06, 2.8892e-05],\n [ 3.1999e-06, 2.0921e-05, 5.1910e-06, ..., -3.8031e-06,\n 1.4771e-05, -1.1220e-05]], device='cuda:0')",
"exp_avg_sq": "tensor([[7.5278e-09, 8.6641e-09, 4.3816e-09, ..., 4.8477e-09, 4.5908e-09,\n 3.6245e-09],\n [6.9600e-09, 6.2931e-09, 7.3132e-09, ..., 5.2736e-09, 4.0526e-09,\n 3.1510e-09],\n [3.2098e-10, 3.6571e-10, 1.8603e-10, ..., 2.8248e-10, 2.7618e-10,\n 1.9356e-10],\n ...,\n [7.0773e-09, 5.6125e-09, 5.2420e-09, ..., 4.0586e-09, 4.1386e-09,\n 3.1464e-09],\n [8.4677e-09, 7.3359e-09, 6.3060e-09, ..., 6.2596e-09, 4.9606e-09,\n 3.8731e-09],\n [2.6034e-09, 3.8147e-09, 2.3548e-09, ..., 1.5739e-09, 1.7697e-09,\n 1.3828e-09]], device='cuda:0')"
},
"1": {
"step": "tensor(25040.)",
"exp_avg": "tensor([-2.7998e-04, 6.3148e-04, -1.9552e-05, ..., -4.1836e-04,\n -4.2263e-04, 4.7905e-04], device='cuda:0')",
"exp_avg_sq": "tensor([1.0341e-05, 9.6406e-06, 6.0121e-07, ..., 8.6792e-06, 1.1257e-05,\n 3.9933e-06], device='cuda:0')"
},
"2": {
"step": "tensor(25040.)",
"exp_avg": "tensor([[ 3.4862e-06, -1.0523e-07, -3.6105e-10, ..., -9.3377e-07,\n -4.4853e-06, 1.6478e-06],\n [ 1.0881e-05, 2.0274e-08, 1.5601e-06, ..., -1.2355e-06,\n -6.5253e-07, -6.3509e-09],\n [ 3.4015e-07, 3.3309e-06, -2.2469e-07, ..., -2.0356e-06,\n 5.7929e-07, -3.4297e-06],\n ...,\n [ 0.0000e+00, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 0.0000e+00],\n [-2.7422e-06, 8.3219e-07, -3.0563e-07, ..., 3.1836e-06,\n 1.8240e-06, -7.1395e-07],\n [ 2.5603e-07, 5.9040e-07, 5.9978e-08, ..., -1.7944e-08,\n -2.0139e-06, 2.9062e-08]], device='cuda:0')",
"exp_avg_sq": "tensor([[6.9834e-10, 8.9197e-11, 2.8956e-11, ..., 1.2364e-09, 2.9846e-10,\n 1.3164e-10],\n [3.2663e-09, 1.0254e-10, 1.2508e-09, ..., 9.5398e-10, 2.8868e-10,\n 1.1235e-11],\n [6.3681e-10, 8.5574e-10, 1.5433e-10, ..., 1.1856e-09, 2.1785e-09,\n 3.8453e-11],\n ...,\n [0.0000e+00, 7.2328e-27, 0.0000e+00, ..., 2.1189e-28, 2.5306e-28,\n 0.0000e+00],\n [3.3579e-09, 4.1970e-10, 1.2735e-10, ..., 2.1111e-09, 3.6014e-10,\n 8.0853e-10],\n [1.6551e-09, 7.1761e-10, 2.9226e-11, ..., 3.8150e-10, 3.1043e-09,\n 2.9589e-10]], device='cuda:0')"
},
"3": {
"step": "tensor(25040.)",
"exp_avg": "tensor([-2.3417e-05, 1.0869e-04, 5.0860e-05, -2.0679e-05, 1.0940e-05,\n -6.3716e-06, -1.1589e-04, 4.9230e-05, -2.2433e-05, 7.8860e-06,\n 3.5839e-05, 5.3028e-05, -5.7574e-05, 9.9217e-06, 2.6153e-04,\n -5.4614e-05, -7.9927e-05, 4.3496e-05, 1.9399e-04, -3.2363e-06,\n 2.5123e-05, 8.5945e-06, -5.2042e-05, -6.1650e-05, -2.3809e-05,\n 3.5911e-05, 2.8088e-06, 5.6052e-45, 7.9537e-05, -1.8822e-05,\n -3.1249e-05, -9.5124e-06, -3.1025e-05, 2.9351e-05, -2.8860e-05,\n -1.7788e-05, 5.5750e-05, -2.6437e-05, 6.3399e-05, 7.1006e-06,\n 5.1195e-05, -5.3072e-05, 7.3677e-05, 6.8925e-05, -5.8142e-06,\n 8.1950e-08, -1.1390e-04, -7.3443e-05, 1.4399e-05, 1.2382e-04,\n -2.0351e-05, -7.3158e-05, 4.7580e-05, 1.2104e-05, 3.4692e-05,\n -1.3044e-05, -5.2895e-05, -5.7057e-05, 3.5187e-06, -8.6241e-05,\n -5.0459e-06, 6.1589e-05, 1.2421e-04, 2.3834e-05, 2.4572e-05,\n -1.9903e-05, 1.3943e-05, -7.9278e-05, 2.1805e-05, 3.3724e-05,\n -9.4838e-06, -6.3505e-05, 1.5431e-05, -7.0602e-05, -4.0375e-05,\n 8.4890e-05, 3.1639e-05, -7.3266e-05, -8.2743e-06, -8.3554e-05,\n 1.6140e-05, 1.7322e-05, 6.9952e-05, -4.5774e-06, 5.5513e-05,\n -7.7110e-05, -3.2018e-05, 8.5333e-05, 1.2339e-05, 3.9323e-05,\n -3.3312e-05, 4.4363e-05, 1.2088e-04, -7.9918e-05, 2.4782e-05,\n -2.0157e-05, 2.7429e-05, -4.9655e-05, 1.1388e-05, -2.0318e-05,\n -3.2760e-05, 2.7578e-07, -1.5805e-04, 9.8155e-06, -1.5212e-05,\n 8.6702e-05, -3.7241e-05, -6.3969e-05, 3.9605e-05, -1.0433e-06,\n -2.7178e-05, -3.9835e-05, -7.3296e-06, -2.6904e-05, -2.3320e-05,\n -1.8482e-06, -3.4229e-06, -2.0749e-05, 6.8529e-05, 5.6052e-45,\n -3.8389e-06, -5.0350e-05, -2.2720e-05, 2.1243e-06, 1.3075e-04,\n 8.6677e-06, -5.1888e-05, 2.9661e-05, -5.3901e-05, 2.4973e-05,\n 9.8852e-06, -1.5108e-05, 5.6052e-45, -4.6532e-05, 8.1966e-05,\n 6.8804e-05, 4.4820e-06, -1.6495e-05, -2.6936e-05, 2.0875e-05,\n -3.4943e-05, 5.6052e-45, -2.5878e-05, 1.1841e-06, 5.6052e-45,\n 5.6052e-45, 4.3776e-05, 5.1759e-05, 5.6052e-45, 8.7514e-06,\n -7.4270e-05, -6.8552e-07, -5.7954e-05, 1.6541e-05, -5.5310e-05,\n -1.0141e-04, 3.0188e-05, 3.0022e-06, 5.6052e-45, 5.6146e-05,\n 1.0592e-05, -1.2533e-04, -1.6736e-04, 3.8657e-05, 1.0466e-04,\n -1.0396e-04, 6.0132e-06, 5.6052e-45, -7.5488e-06, 7.4029e-05,\n 1.1692e-07, -2.8829e-05, 4.4952e-05, -7.9139e-06, 1.1082e-04,\n -1.2981e-05, -1.0684e-04, 1.8862e-05, -5.5697e-05, -3.0518e-05,\n -3.2962e-05, -2.3387e-05, -2.3661e-04, 5.4104e-05, 2.0828e-05,\n 5.6052e-45, 1.0441e-04, 2.2550e-05, -7.4156e-05, 1.0652e-05,\n -1.6022e-06, -9.8403e-06, 1.1428e-04, 1.2335e-04, 2.5260e-05,\n 3.3257e-05, 6.8477e-05, 6.5280e-06, 8.9212e-05, 5.6052e-45,\n 2.5781e-05, -1.7584e-05, -3.1680e-05, -6.9079e-05, 3.3577e-05,\n 1.0441e-05, 7.9169e-06, 1.3519e-05, -5.4950e-06, 5.9122e-05,\n 5.8427e-05, -7.7070e-06, -2.9243e-05, 1.6918e-05, -9.3517e-05,\n -5.5299e-06, 2.7353e-05, 1.2932e-05, -1.1321e-04, -1.4087e-05,\n 4.5730e-05, -4.0543e-06, 2.0128e-06, -2.9839e-06, 3.0967e-05,\n 1.0607e-06, -2.7993e-05, 1.6548e-06, 6.4309e-06, 3.7802e-05,\n -3.1499e-07, -1.2325e-06, 2.8199e-05, -1.8746e-05, 5.6052e-45,\n -8.3653e-05, 4.5681e-05, 5.6052e-45, 3.6873e-05, 3.5082e-05,\n 1.4882e-04, -3.4230e-05, -4.8431e-05, -2.6842e-05, 2.5021e-05,\n 3.5697e-05, 8.6234e-06, -2.3245e-05, -3.4960e-05, 1.8614e-05,\n -6.4279e-05, 6.3831e-05, -2.4537e-05, -2.1178e-05, 3.5083e-05,\n -3.9646e-04, 5.6052e-45, 1.5414e-05, -1.5911e-06, 7.5868e-05,\n 5.6052e-45, -1.9916e-05, 1.5159e-05, -3.8783e-05, 1.2012e-05,\n -4.1169e-05, 7.3680e-06, -1.4271e-05, 7.8516e-05, 1.5750e-05,\n -4.1940e-06, -1.8588e-05, 4.3696e-05, -5.1770e-05, -1.3639e-05,\n 1.4840e-06, -2.9052e-05, -1.2809e-06, -2.4812e-05, 2.8432e-05,\n 4.9698e-08, 2.8866e-05, 3.7695e-05, -1.2246e-05, 6.9908e-06,\n -9.7580e-05, 3.3683e-07, -1.7445e-05, 1.4177e-05, -4.2066e-06,\n -3.5414e-05, -1.0041e-05, 5.9036e-06, 1.6073e-05, 6.7210e-05,\n 7.7319e-05, -1.0682e-04, -3.2485e-05, -3.5182e-05, -1.0756e-04,\n -1.2817e-05, 1.2925e-05, -1.4820e-05, -1.5358e-05, -4.1672e-05,\n -6.4453e-05, 1.9520e-05, 6.9213e-05, 6.7571e-05, -1.1525e-05,\n 1.5110e-05, 5.6052e-45, 4.2821e-05, 1.9012e-05, 8.2789e-05,\n 5.6052e-45, 1.7125e-05, -3.8602e-05, 2.0533e-05, 1.0396e-05,\n -2.5968e-05, 1.2791e-05, 2.9435e-05, 1.4803e-05, -8.5510e-05,\n 5.6052e-45, 5.0868e-05, 6.3225e-05, 4.0152e-05, -3.2611e-05,\n 1.0751e-04, -2.1428e-05, 5.1900e-05, -6.1643e-06, -3.4757e-05,\n -2.5864e-05, 3.8154e-05, -1.5429e-05, 5.6052e-45, -2.7859e-05,\n 5.9847e-05, 7.3820e-05, 5.0649e-05, -4.9991e-05, 2.3503e-05,\n -7.0443e-05, -4.6426e-05, 8.1839e-05, -7.2296e-06, 1.9290e-05,\n 3.0952e-05, -2.9861e-05, 1.4697e-05, 5.6052e-45, 1.1975e-04,\n 1.1474e-04, 8.2174e-05, 3.3650e-05, 9.8003e-05, -3.6013e-06,\n 3.0776e-05, 5.4368e-05, 5.6052e-45, -5.2574e-05, 1.9225e-06,\n -1.5645e-05, -8.7506e-06, 1.9282e-06, 6.3703e-07, -5.4891e-06,\n -3.7549e-05, 3.3294e-05, 1.2946e-05, 9.0820e-05, 3.8182e-05,\n 3.3370e-05, -2.5296e-07, 2.2593e-05, -9.7731e-06, 1.0657e-04,\n -1.5258e-05, 3.6311e-05, -2.5219e-06, -1.2653e-05, 5.6052e-45,\n -3.9828e-05, -5.2843e-05, -2.0941e-04, 6.2772e-05, -5.6157e-05,\n -9.3354e-06, 6.7556e-05, 5.2356e-05, 1.4235e-06, -3.0392e-05,\n 9.3062e-05, -1.6556e-05, -3.7741e-05, -3.3301e-06, -7.3157e-05,\n -1.8686e-05, -4.7856e-05, 5.2285e-05, 2.1052e-05, 6.1708e-05,\n 8.2419e-05, 4.1136e-06, -6.6875e-05, 2.1566e-05, -8.0902e-05,\n 6.0522e-05, -5.6052e-45, -4.0707e-05, -4.3265e-05, 2.4748e-05,\n 8.2249e-06, 1.1793e-05, 1.9633e-04, 2.1058e-06, 2.5392e-05,\n 4.8291e-05, 5.4061e-05, -1.5555e-05, -1.0215e-04, -2.8745e-06,\n -1.9555e-05, -2.3926e-05, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 2.8059e-05, 1.1178e-05, 3.6317e-05, -4.2448e-05, 4.8013e-05,\n -4.4770e-05, -1.8677e-05, 1.5416e-05, 6.8403e-05, 1.5013e-04,\n -1.3413e-05, 3.1211e-05, -2.0441e-05, 5.6052e-45, 3.8583e-05,\n -1.6123e-05, -2.5162e-05, -2.0423e-05, 1.0552e-04, -5.1513e-05,\n -6.3589e-05, -5.3522e-05, -3.8040e-05, -6.1606e-06, -1.5163e-04,\n -2.6281e-05, 4.1089e-05, 1.0197e-04, 5.6052e-45, 9.7272e-05,\n 5.8079e-05, 1.9560e-05, 2.6933e-05, -2.1879e-05, 3.6683e-05,\n -4.3979e-06, 3.8709e-05, -8.0339e-06, 1.4417e-05, -4.5544e-06,\n 1.2192e-05, 5.6052e-45, 4.0745e-05, -3.3265e-06, -5.4629e-05,\n -2.9782e-05, 4.6080e-05, 4.8436e-05, -2.6694e-06, 8.2390e-06,\n 2.5490e-05, 1.1124e-04, 4.6261e-06, 1.0959e-05, 3.0985e-05,\n -2.3084e-05, 9.5142e-05, 8.5315e-05, 6.9060e-05, -7.2174e-06,\n -5.2024e-05, 5.6447e-06, 4.9549e-05, 1.0093e-05, 2.9826e-05,\n 5.2224e-06, -1.2561e-04, -1.3390e-04, -1.1311e-05, 1.1167e-05,\n 8.6068e-07, 7.4644e-05, 1.4270e-04, 2.1062e-05, -2.3120e-05,\n 9.6362e-06, 5.2068e-05, -1.1231e-05, 1.0859e-04, -1.5335e-04,\n 1.6361e-04, -1.4166e-05, 3.6272e-04, 7.8985e-05, -8.9354e-06,\n -6.8768e-05, 5.0716e-05, 6.4886e-05, 5.5478e-10, 1.9653e-05,\n 9.4578e-06, -3.0212e-05, -5.1889e-05, -1.4030e-05, 4.8059e-05,\n -2.0071e-05, -1.4633e-05, -4.8723e-05, 5.1856e-06, -5.4642e-05,\n 4.3399e-06, -5.4512e-05, 8.6155e-05, -8.5661e-05, -6.7195e-05,\n -2.8196e-05, 1.8616e-05, 5.6052e-45, 4.7356e-05, 6.2364e-06,\n -5.3005e-05, 8.2914e-05, -5.3938e-05, 6.4984e-05, 6.0860e-05,\n 5.6052e-45, 8.6213e-06, -6.4817e-07, 5.1057e-05, 3.3903e-05,\n -1.9045e-05, 4.3795e-05, 2.8627e-05, 3.4347e-05, -1.6047e-05,\n 2.0271e-05, -2.8412e-05, -3.0607e-05, 5.1312e-05, -1.1938e-05,\n 5.6052e-45, -1.9952e-05, -9.2206e-06, 4.9283e-05, 2.6113e-06,\n 5.0904e-05, -6.6084e-06, 5.6052e-45, 4.2698e-05, 5.6052e-45,\n -1.7648e-06, 9.6147e-06, -3.2764e-06, -2.1465e-05, 1.5792e-05,\n -1.2625e-05, -2.4434e-05, 7.2588e-05, 2.6208e-05, 6.1977e-05,\n 2.0359e-05, 1.1401e-06, 1.2691e-05, -1.3343e-04, 5.6052e-45,\n -8.3869e-05, -5.1079e-05, 4.7920e-05, -3.7080e-05, -1.1720e-06,\n -2.0207e-05, 5.6052e-45, 2.8148e-06, -8.7754e-05, -4.2531e-05,\n -1.6659e-04, -1.4460e-05, 1.0587e-04, 3.3210e-05, 3.0487e-05,\n -7.0828e-05, 2.1012e-06, 4.4710e-05, 6.0191e-05, -1.1361e-05,\n -7.3688e-06, -4.1773e-05, 5.8160e-06, -5.0593e-05, 1.1708e-04,\n -5.0747e-05, 4.5735e-05, -2.9939e-06, -7.5677e-06, -4.3382e-05,\n 5.6052e-45, -1.3393e-04, 7.8651e-06, 2.0024e-05, 8.4476e-05,\n -1.4322e-05, 8.3338e-05, 5.6620e-06, -3.0921e-05, -7.0068e-05,\n -1.5279e-05, 5.6052e-45, 9.5309e-05, -7.7610e-05, -1.1064e-05,\n -3.0272e-05, -2.6524e-05, 5.6052e-45, 5.6052e-45, 1.7499e-05,\n 3.9179e-05, 5.6052e-45, -5.4034e-05, 1.5054e-05, -6.5313e-05,\n 6.6295e-05, 5.6052e-45, -1.1523e-05, 3.8356e-05, 9.7480e-06,\n -6.7581e-05, 4.9965e-05, 1.3604e-07, -1.0779e-05, 3.4718e-06,\n -1.0014e-04, 4.4166e-05, -1.0541e-04, 2.5247e-04, -3.5192e-05,\n 4.2830e-05, 7.1035e-05, -1.2968e-04, -4.7387e-05, -6.1922e-05,\n -1.2536e-06, 7.5690e-06, -7.5038e-05, 6.0050e-05, 2.1750e-05,\n -7.1275e-05, 8.5723e-05, -3.1638e-05, -3.2120e-05, 2.5243e-05,\n -2.8837e-05, 5.6052e-45, 1.9648e-05, -4.7032e-06, -7.3381e-05,\n -6.1251e-05, -9.2262e-05, 3.8867e-05, -3.2997e-05, 5.6052e-45,\n 6.0342e-05, -2.0883e-05, -5.5067e-05, 6.4870e-06, 3.5546e-05,\n 2.0735e-05, 3.1603e-05, 2.9079e-05, -3.9304e-05, 9.7804e-05,\n -3.8339e-05, 1.0194e-05, -8.0390e-06, -1.6234e-05, 4.5563e-05,\n 3.6863e-05, -1.9910e-05, -1.1792e-05, -6.4681e-05, -4.2362e-05,\n -3.6591e-05, 1.1972e-05, 1.3586e-06, 1.2773e-05, -1.5848e-05,\n 6.7115e-05, 5.6052e-45, -8.2598e-05, -7.0288e-05, 5.6052e-45,\n 5.4285e-05, -6.2144e-05, 5.2431e-05, -2.9231e-06, -1.4096e-04,\n 8.6763e-06, -8.8740e-05, -4.4875e-05, 1.2115e-06, 6.1375e-05,\n 5.6052e-45, 5.9038e-05, 8.9667e-05, 7.2413e-05, -3.3509e-05,\n -3.0299e-05, -4.7211e-05, -5.7025e-05, 8.2712e-05, -6.4883e-05,\n -1.3720e-05, 4.3999e-05, 1.8569e-05, -3.5992e-06, -2.7626e-05,\n 8.8106e-05, -1.5047e-04, -2.0950e-06, -1.4603e-05, -6.1229e-07,\n 1.9062e-05, -5.8297e-05, -9.5922e-06, 4.3905e-05, -5.7983e-05,\n 2.1920e-05, 5.9032e-05, 2.5369e-07, 1.6492e-06, 1.0936e-05,\n -7.8156e-06, -6.3830e-05, -2.9258e-05, 3.7923e-05, 6.5043e-05,\n 2.3446e-05, 4.3672e-05, -5.5858e-05, -6.4516e-05, -3.0839e-05,\n -2.9728e-05, 1.0922e-05, 6.8355e-05, 1.7797e-05, 5.6052e-45,\n 5.6052e-45, 1.5786e-05, -4.3993e-05], device='cuda:0')",
"exp_avg_sq": "tensor([3.5714e-08, 4.2195e-08, 4.2070e-08, 5.0195e-08, 5.6416e-08, 8.8783e-09,\n 6.2200e-08, 3.6730e-08, 3.2619e-08, 4.6578e-08, 3.4253e-08, 2.1778e-08,\n 4.8512e-08, 2.3433e-08, 4.7870e-08, 4.7230e-08, 3.5016e-08, 5.0400e-08,\n 5.1007e-08, 2.9132e-08, 2.9921e-08, 1.4612e-08, 3.5507e-08, 4.2934e-08,\n 5.2852e-08, 7.5930e-08, 4.0909e-08, 8.0459e-19, 4.3165e-08, 5.6683e-08,\n 1.8561e-08, 5.2949e-08, 3.0597e-08, 5.1428e-08, 2.9313e-08, 4.7267e-08,\n 2.3932e-08, 3.4459e-08, 3.8489e-08, 3.6140e-08, 5.4017e-08, 4.3897e-08,\n 5.7105e-08, 5.6889e-08, 4.2984e-08, 3.2924e-08, 4.5531e-08, 5.6885e-08,\n 9.7997e-08, 7.0402e-08, 2.4964e-08, 8.5104e-08, 3.3489e-08, 3.1780e-08,\n 4.0222e-08, 3.2004e-08, 5.3415e-08, 4.5543e-08, 2.5640e-08, 3.7763e-08,\n 1.9826e-08, 3.5679e-08, 6.7216e-08, 2.1787e-08, 2.0129e-08, 5.0714e-08,\n 5.2045e-08, 7.8444e-08, 1.8341e-08, 4.0035e-08, 3.9597e-08, 2.3180e-08,\n 3.6857e-08, 1.0950e-07, 5.0519e-08, 6.0441e-08, 5.8476e-08, 7.5716e-08,\n 2.7079e-08, 5.8034e-08, 4.2024e-08, 5.0756e-08, 4.6124e-08, 1.3330e-08,\n 3.8684e-08, 4.6217e-08, 2.8999e-08, 7.3370e-08, 4.3101e-08, 7.2999e-08,\n 6.8850e-08, 3.1691e-08, 4.6899e-08, 4.2469e-08, 3.1139e-08, 4.1863e-08,\n 5.0861e-08, 2.9664e-08, 3.8329e-08, 6.1649e-08, 5.4333e-08, 3.0538e-08,\n 1.0696e-07, 2.5761e-08, 4.8254e-08, 3.5802e-08, 3.3423e-08, 2.8055e-08,\n 2.8363e-08, 3.3830e-08, 4.6844e-08, 4.1828e-08, 2.8621e-08, 2.3101e-08,\n 4.4745e-08, 4.5296e-08, 4.8170e-08, 4.2291e-08, 3.7300e-08, 1.6471e-16,\n 3.8593e-08, 4.3045e-08, 3.5618e-08, 8.7155e-09, 3.2998e-08, 4.5164e-08,\n 4.2247e-08, 4.1546e-08, 5.4208e-08, 3.2164e-08, 3.1487e-08, 3.2802e-08,\n 1.8030e-17, 5.5712e-08, 9.2164e-08, 5.0581e-08, 9.1211e-09, 4.8965e-08,\n 2.7716e-08, 3.9257e-08, 2.0059e-08, 9.9964e-17, 3.3914e-08, 3.5449e-08,\n 3.8729e-19, 5.1515e-16, 6.4656e-08, 3.6277e-08, 1.3841e-15, 2.9372e-08,\n 4.4576e-08, 5.7282e-08, 3.3646e-08, 4.0951e-08, 3.3842e-08, 4.4192e-08,\n 3.8061e-08, 4.6242e-08, 5.7020e-16, 2.6984e-08, 7.2613e-08, 8.0873e-08,\n 6.5749e-08, 8.1786e-08, 3.4085e-08, 3.6370e-08, 1.4096e-08, 7.2821e-22,\n 4.1791e-08, 3.8411e-08, 6.4891e-08, 6.1009e-08, 3.6322e-08, 4.6572e-08,\n 3.5167e-08, 3.9745e-08, 4.8572e-08, 4.9176e-08, 2.8450e-08, 7.3245e-08,\n 5.8586e-08, 4.4754e-08, 6.4884e-08, 4.7663e-08, 1.8828e-08, 4.2546e-25,\n 4.2638e-08, 4.1293e-08, 2.9716e-08, 3.1730e-08, 2.0827e-08, 4.4682e-08,\n 5.6512e-08, 6.0316e-08, 4.5271e-08, 2.2155e-08, 6.4660e-08, 4.5080e-08,\n 4.8603e-08, 4.6603e-27, 2.0719e-08, 2.4742e-08, 6.8110e-08, 3.4900e-08,\n 6.6641e-08, 4.4008e-08, 3.5291e-08, 2.7545e-08, 3.1248e-08, 2.0106e-08,\n 4.6193e-08, 1.4143e-08, 2.7515e-08, 6.6355e-08, 5.7627e-08, 4.5365e-08,\n 5.2786e-08, 2.8011e-08, 3.9638e-08, 3.7790e-08, 1.6652e-08, 5.3574e-08,\n 5.1297e-07, 2.7227e-08, 2.1871e-08, 2.3754e-08, 6.0067e-08, 6.2595e-08,\n 6.0076e-08, 2.1235e-08, 3.3629e-08, 2.9627e-08, 4.7749e-08, 1.1186e-07,\n 4.3661e-16, 6.9190e-08, 2.9150e-08, 8.2852e-20, 3.0023e-08, 3.8205e-08,\n 1.7176e-08, 2.5839e-08, 5.4845e-08, 5.2216e-08, 4.2455e-08, 8.0395e-08,\n 4.2014e-08, 3.5090e-08, 5.7986e-08, 1.2522e-08, 5.6391e-08, 7.8770e-08,\n 2.9215e-08, 3.8634e-08, 3.6845e-08, 8.1741e-08, 8.8576e-20, 4.8989e-08,\n 5.5824e-08, 7.7287e-08, 5.2158e-25, 3.5903e-08, 4.8412e-08, 2.5388e-08,\n 4.4904e-08, 3.4901e-08, 2.8421e-08, 2.7027e-08, 4.4749e-08, 7.6977e-08,\n 3.5495e-08, 5.1287e-08, 3.3184e-08, 3.4538e-08, 7.4813e-08, 3.7536e-08,\n 6.1123e-08, 6.9678e-09, 2.7708e-08, 6.6054e-08, 1.3816e-08, 6.9213e-08,\n 3.1072e-08, 9.5217e-08, 4.0004e-08, 5.5677e-08, 8.4864e-08, 4.2772e-08,\n 4.1656e-08, 4.0245e-08, 4.8103e-08, 2.3324e-08, 2.0696e-08, 3.2530e-08,\n 5.1591e-08, 2.8560e-08, 3.8631e-08, 4.7127e-08, 2.7439e-08, 2.5715e-08,\n 6.8616e-08, 3.6207e-08, 2.7302e-08, 2.9296e-08, 6.1588e-08, 2.0041e-08,\n 2.2018e-08, 2.1408e-08, 5.0680e-08, 4.9138e-08, 3.8973e-08, 7.4765e-20,\n 3.3821e-08, 2.9734e-08, 6.3005e-08, 1.4851e-19, 2.6841e-08, 8.7396e-08,\n 2.5378e-08, 3.5604e-08, 4.3660e-08, 1.0179e-08, 3.5411e-08, 4.5809e-08,\n 3.2022e-08, 5.3880e-16, 3.7588e-08, 4.1293e-08, 3.1774e-08, 4.9296e-08,\n 4.8275e-08, 2.0507e-08, 3.3075e-08, 3.6011e-08, 2.3840e-08, 6.9646e-08,\n 4.2227e-08, 4.9394e-08, 4.0467e-25, 3.1616e-08, 5.8113e-08, 4.9651e-08,\n 5.9726e-08, 3.2507e-08, 6.0135e-08, 4.7975e-08, 5.9395e-08, 2.3924e-08,\n 5.4742e-08, 4.1563e-08, 5.2543e-08, 4.0124e-08, 3.3827e-08, 1.1017e-16,\n 4.6846e-08, 8.3508e-08, 2.6703e-08, 2.9675e-08, 4.9575e-08, 4.1811e-08,\n 3.8735e-08, 5.0736e-08, 4.5690e-16, 3.4054e-08, 2.6239e-08, 2.0735e-08,\n 3.8285e-08, 5.0903e-08, 5.0928e-08, 3.1937e-08, 2.2724e-08, 2.1889e-08,\n 2.6712e-08, 4.0483e-08, 3.8153e-08, 4.3586e-08, 3.5002e-08, 3.4114e-08,\n 2.3053e-08, 2.7313e-08, 9.7037e-08, 3.0764e-08, 2.3880e-08, 4.9223e-08,\n 4.5333e-16, 7.6357e-08, 3.6102e-08, 8.6626e-08, 4.9531e-08, 2.4246e-08,\n 4.0240e-08, 7.2893e-08, 6.4687e-08, 1.4387e-08, 4.6115e-08, 4.5692e-08,\n 1.9294e-08, 1.3621e-08, 8.9760e-09, 2.7775e-08, 5.4089e-08, 6.9034e-08,\n 5.3879e-08, 7.0058e-08, 4.0486e-08, 7.2680e-08, 2.3034e-08, 4.2008e-08,\n 4.3288e-08, 5.6290e-08, 9.1703e-08, 3.7532e-15, 2.4764e-08, 4.2337e-08,\n 3.1297e-08, 4.7579e-08, 1.6787e-08, 5.0558e-08, 3.9933e-08, 4.1196e-08,\n 4.9170e-08, 5.3746e-08, 7.6945e-08, 3.2570e-08, 1.2952e-08, 4.0888e-08,\n 1.6118e-08, 1.0471e-16, 1.1411e-18, 6.0202e-19, 3.8037e-08, 5.7808e-08,\n 3.6485e-08, 6.0038e-08, 4.7491e-08, 2.8265e-08, 6.0017e-08, 8.2710e-08,\n 3.9974e-08, 1.6397e-08, 2.7432e-08, 2.9842e-08, 3.5887e-08, 2.0581e-18,\n 5.2630e-08, 7.3204e-08, 3.5699e-08, 4.0776e-08, 3.5486e-08, 3.0236e-08,\n 8.0719e-08, 2.7351e-08, 3.4042e-08, 4.8540e-08, 4.5188e-08, 5.2141e-08,\n 3.9037e-08, 6.9510e-08, 5.8993e-17, 5.0802e-08, 3.1206e-08, 1.1944e-08,\n 5.6883e-08, 3.2814e-08, 8.1684e-08, 5.8604e-08, 3.2984e-08, 3.1812e-08,\n 5.1302e-08, 3.4367e-08, 3.4538e-08, 5.8182e-18, 3.3488e-08, 5.8658e-08,\n 3.0308e-08, 3.2044e-08, 3.9365e-08, 4.5578e-08, 1.8120e-08, 6.9898e-08,\n 4.3291e-08, 4.0489e-08, 4.0697e-08, 4.3128e-08, 5.6771e-08, 4.3411e-08,\n 5.0151e-08, 5.6984e-08, 4.3909e-08, 3.4014e-08, 3.8510e-08, 3.3093e-08,\n 5.2987e-08, 1.9173e-08, 6.2614e-08, 2.6881e-08, 4.0993e-08, 4.1699e-08,\n 5.3079e-08, 1.9877e-08, 8.0988e-09, 9.8599e-08, 3.5362e-08, 4.7492e-08,\n 3.7052e-08, 3.6309e-08, 6.6926e-08, 5.6371e-08, 5.7214e-08, 5.0092e-08,\n 4.6631e-08, 6.4789e-08, 1.3505e-07, 6.0959e-08, 4.4790e-08, 6.0400e-08,\n 4.2454e-08, 2.5907e-08, 1.5997e-08, 5.1180e-08, 3.3247e-08, 3.5483e-08,\n 6.6128e-08, 3.4300e-08, 5.4209e-08, 6.0474e-08, 2.7109e-08, 7.8824e-08,\n 2.4206e-08, 5.1711e-08, 4.1105e-08, 3.7830e-08, 4.1495e-08, 5.0710e-08,\n 6.0144e-08, 4.3236e-08, 5.0128e-08, 1.4444e-15, 3.1120e-08, 5.0861e-08,\n 7.2373e-08, 5.8782e-08, 6.2105e-08, 9.2809e-08, 4.1135e-08, 5.1920e-18,\n 5.7624e-08, 5.0727e-08, 5.9877e-08, 5.2890e-08, 4.9626e-08, 6.8141e-08,\n 4.5690e-08, 4.0694e-08, 4.3888e-08, 6.3899e-08, 4.9898e-08, 4.4317e-08,\n 5.1740e-08, 3.2706e-08, 9.8106e-20, 5.2944e-08, 5.1324e-08, 3.0141e-08,\n 6.0090e-08, 5.4050e-08, 4.2432e-08, 2.0539e-18, 3.8610e-08, 1.0018e-16,\n 6.2007e-08, 3.4811e-08, 4.8742e-08, 6.0448e-08, 2.0728e-08, 2.7884e-08,\n 4.1104e-08, 2.6871e-08, 3.1728e-08, 3.4164e-08, 4.3092e-08, 2.3688e-08,\n 2.7084e-08, 1.8654e-08, 3.4583e-23, 5.4448e-08, 4.0134e-08, 3.4790e-08,\n 3.4155e-08, 2.7166e-08, 4.6880e-08, 4.0203e-20, 2.9974e-08, 3.8917e-08,\n 3.8675e-08, 2.8791e-08, 2.0465e-08, 3.6510e-08, 4.4944e-08, 2.9061e-08,\n 7.3347e-08, 3.0575e-08, 3.5769e-08, 5.2383e-08, 6.0262e-08, 4.6953e-08,\n 6.8950e-08, 2.0393e-08, 5.4101e-08, 4.1387e-08, 4.4190e-08, 4.0968e-08,\n 5.3212e-08, 2.7240e-08, 2.9881e-08, 1.8178e-19, 7.3386e-08, 1.4659e-08,\n 5.2404e-08, 6.5940e-08, 7.2260e-08, 6.0334e-08, 4.0490e-08, 4.9560e-08,\n 4.2481e-08, 5.5129e-08, 1.8036e-20, 4.5183e-08, 6.1317e-08, 2.0626e-08,\n 1.4966e-08, 1.8529e-08, 4.7350e-18, 1.4758e-19, 4.8770e-08, 4.8936e-08,\n 5.6231e-22, 3.4811e-08, 6.2188e-08, 4.9916e-08, 6.7262e-08, 5.9892e-20,\n 3.7161e-08, 4.1464e-08, 5.5098e-08, 3.4389e-08, 7.0250e-08, 4.7463e-08,\n 3.8994e-08, 3.7693e-08, 4.6490e-08, 5.6724e-08, 6.9827e-08, 5.5025e-08,\n 3.3536e-08, 3.5866e-08, 5.6932e-08, 2.3012e-08, 4.7965e-08, 3.7320e-08,\n 9.4374e-08, 2.7562e-08, 6.2337e-08, 5.3580e-08, 2.7207e-08, 7.1244e-08,\n 7.7094e-08, 2.6056e-08, 3.5339e-08, 4.3253e-08, 3.9004e-08, 5.2633e-21,\n 1.9679e-08, 5.0144e-08, 3.8522e-08, 5.7417e-08, 6.0234e-08, 6.6882e-08,\n 4.1823e-08, 6.0517e-19, 6.1952e-08, 4.3980e-08, 3.1722e-08, 3.1005e-08,\n 4.6434e-08, 2.5988e-08, 5.6344e-08, 3.7220e-08, 3.1061e-08, 6.7188e-08,\n 4.0794e-08, 3.7701e-08, 1.1269e-08, 3.1198e-08, 4.8462e-08, 3.0247e-08,\n 5.8089e-08, 3.2215e-08, 5.9115e-08, 2.6653e-08, 3.9430e-08, 5.3427e-08,\n 3.7167e-08, 4.8069e-08, 4.2709e-08, 3.6091e-08, 1.2805e-18, 1.5561e-07,\n 7.9020e-08, 1.0092e-20, 6.0930e-08, 4.2642e-08, 2.2182e-08, 4.2900e-08,\n 4.3782e-08, 1.8594e-08, 5.0350e-08, 5.5179e-08, 3.7685e-08, 4.5081e-08,\n 9.0101e-16, 7.3360e-08, 4.3803e-08, 5.8831e-08, 4.5127e-08, 2.8550e-08,\n 6.0273e-08, 6.3813e-08, 1.2234e-07, 4.9970e-08, 2.4521e-08, 3.6487e-08,\n 6.2055e-08, 6.3225e-08, 2.9158e-08, 2.9581e-08, 3.6150e-08, 5.2161e-08,\n 3.8915e-08, 4.2503e-08, 4.0311e-08, 3.4229e-08, 1.8899e-08, 2.7328e-08,\n 4.2164e-08, 5.9004e-08, 3.7582e-08, 1.8496e-08, 3.4376e-08, 3.8007e-08,\n 3.7086e-08, 4.1891e-08, 4.1854e-08, 3.3834e-08, 6.4460e-08, 6.7874e-08,\n 2.5467e-08, 2.7834e-08, 4.4089e-08, 4.4331e-08, 2.4962e-08, 4.0705e-08,\n 4.0052e-08, 1.4557e-08, 1.2818e-18, 1.9374e-25, 5.3238e-08, 4.0484e-08],\n device='cuda:0')"
},
"4": {
"step": "tensor(25040.)",
"exp_avg": "tensor([[ 3.7872e-06, -1.1744e-06, 1.0282e-06, ..., 5.6052e-45,\n 1.1146e-06, 9.8124e-07],\n [ 1.0413e-06, -1.8975e-05, 7.9172e-07, ..., -5.6052e-45,\n -1.7463e-05, 6.1551e-07],\n [ 2.4576e-06, -8.9567e-06, 1.3082e-06, ..., -5.6052e-45,\n 1.5031e-05, -3.2102e-06],\n ...,\n [-5.8124e-06, -2.5645e-05, -4.2916e-06, ..., -5.6052e-45,\n -5.5089e-06, -3.4290e-06],\n [-4.6933e-06, 1.1324e-05, 6.9274e-07, ..., -5.6052e-45,\n 1.7607e-06, -7.3242e-06],\n [ 5.6216e-06, -8.5150e-06, -1.8661e-07, ..., -5.6052e-45,\n -2.5080e-06, -2.7810e-06]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.8074e-10, 9.9683e-11, 8.8343e-11, ..., 5.4304e-28, 2.2872e-10,\n 8.6738e-11],\n [3.0477e-10, 1.3356e-10, 1.8386e-10, ..., 2.7753e-26, 5.3439e-10,\n 3.9223e-10],\n [5.9773e-10, 1.5855e-10, 7.4319e-10, ..., 1.2104e-26, 3.4939e-10,\n 4.2039e-10],\n ...,\n [5.3243e-10, 2.0862e-10, 2.1727e-10, ..., 1.0590e-26, 2.2129e-10,\n 2.2508e-10],\n [6.1276e-10, 1.8953e-10, 3.9787e-10, ..., 1.5782e-28, 4.0911e-10,\n 4.6863e-10],\n [5.3071e-10, 1.4795e-10, 5.9625e-10, ..., 1.0114e-26, 3.1403e-10,\n 3.5189e-10]], device='cuda:0')"
},
"5": {
"step": "tensor(23788.)",
"exp_avg": "tensor([[-1.2074e-08, 1.0135e-07, 5.8361e-08, ..., 9.4664e-07,\n 1.2818e-08, 5.1650e-10],\n [-1.5338e-07, 1.3816e-07, 3.9266e-15, ..., 1.9893e-07,\n -2.4520e-07, 1.5028e-09],\n [ 1.6835e-09, 5.5058e-08, 1.4529e-09, ..., 1.9924e-08,\n 4.6696e-07, -8.7692e-08],\n ...,\n [-1.3820e-06, 3.6539e-07, -1.1146e-08, ..., 1.9993e-08,\n 3.2483e-07, 3.0002e-06],\n [ 1.1244e-09, 4.3683e-08, 7.2904e-22, ..., -2.5118e-07,\n -8.8448e-07, 5.4964e-09],\n [ 2.5477e-08, 1.6106e-07, 1.7982e-08, ..., 7.5466e-07,\n 1.3451e-08, -1.9609e-07]], device='cuda:0')",
"exp_avg_sq": "tensor([[7.0025e-14, 4.8454e-12, 6.1642e-11, ..., 4.0904e-11, 1.1039e-11,\n 7.0180e-13],\n [2.5617e-11, 2.7253e-11, 1.2947e-15, ..., 3.6139e-11, 1.8147e-11,\n 2.1250e-12],\n [1.8658e-12, 4.8658e-12, 3.4748e-13, ..., 1.2107e-12, 2.6536e-11,\n 5.6692e-13],\n ...,\n [1.1914e-10, 1.2956e-11, 1.4472e-12, ..., 1.7857e-12, 6.1849e-12,\n 8.2024e-09],\n [3.1009e-13, 1.9226e-11, 1.7168e-14, ..., 5.8177e-11, 2.5147e-11,\n 6.4428e-12],\n [1.7684e-11, 5.6626e-12, 2.6619e-12, ..., 1.4303e-11, 4.1947e-12,\n 1.0183e-10]], device='cuda:0')"
},
"6": {
"step": "tensor(23788.)",
"exp_avg": "tensor([-1.0874e-06, -2.3645e-06, -1.8390e-06, ..., 7.3688e-06,\n -6.3505e-06, -4.5159e-06], device='cuda:0')",
"exp_avg_sq": "tensor([6.5076e-10, 1.2028e-09, 7.1218e-10, ..., 5.4302e-09, 1.0333e-09,\n 9.8003e-10], device='cuda:0')"
},
"7": {
"step": "tensor(23788.)",
"exp_avg": "tensor([[-1.2865e-08, -1.2050e-07, 1.5276e-06, ..., 5.7586e-08,\n 8.3759e-08, -3.7446e-08],\n [ 6.4525e-08, 2.5138e-07, 3.9921e-07, ..., -2.3164e-07,\n 6.7710e-08, -2.2803e-08],\n [ 8.6614e-08, 3.6603e-07, -5.5850e-07, ..., -2.2524e-07,\n 1.6711e-07, 3.4905e-07],\n ...,\n [-3.0591e-08, -6.6691e-08, 1.7021e-06, ..., -1.2613e-07,\n 9.0767e-08, -2.7655e-07],\n [-4.9875e-08, 2.3182e-07, -2.4823e-06, ..., -3.5428e-08,\n 7.8321e-08, -2.4707e-07],\n [ 8.8252e-08, 1.6104e-07, -3.1471e-06, ..., -1.8318e-07,\n 9.5786e-08, 2.8499e-08]], device='cuda:0')",
"exp_avg_sq": "tensor([[2.7003e-13, 7.2645e-13, 4.1566e-12, ..., 4.8834e-13, 4.0241e-13,\n 5.4833e-13],\n [5.0140e-13, 2.6737e-12, 2.8453e-12, ..., 6.6368e-13, 9.8494e-13,\n 5.0204e-13],\n [5.8474e-13, 1.6803e-12, 1.3298e-12, ..., 7.1697e-13, 4.6027e-13,\n 9.0905e-13],\n ...,\n [2.5220e-13, 1.3670e-12, 2.0161e-12, ..., 1.3977e-12, 6.0946e-13,\n 1.1967e-12],\n [2.7506e-13, 2.2395e-12, 8.1588e-12, ..., 1.3201e-12, 4.2125e-13,\n 1.2410e-12],\n [4.3543e-13, 1.2576e-12, 1.7050e-11, ..., 7.4380e-13, 8.6372e-13,\n 7.1587e-13]], device='cuda:0')"
},
"14": {
"step": "tensor(23788.)",
"exp_avg": "tensor([5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([1.2166e-15], device='cuda:0')"
},
"15": {
"step": "tensor(23788.)",
"exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([1.5695e-18, 8.8848e-17, 6.6800e-17], device='cuda:0')"
},
"16": {
"step": "tensor(23788.)",
"exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([9.4514e-14, 9.7385e-15, 1.0537e-14, 1.1256e-14], device='cuda:0')"
},
"18": {
"step": "tensor(23788.)",
"exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.0077e-19, 6.2904e-20, 0.0000e+00, ..., 4.1507e-19, 1.4928e-19,\n 4.7915e-21],\n [2.1499e-20, 5.5265e-20, 0.0000e+00, ..., 5.0809e-20, 1.7866e-19,\n 5.0351e-20],\n [6.4612e-21, 2.2802e-20, 0.0000e+00, ..., 7.6201e-21, 4.9402e-20,\n 1.6180e-20],\n ...,\n [8.1870e-22, 4.4033e-21, 0.0000e+00, ..., 6.2795e-21, 8.6960e-20,\n 4.8990e-22],\n [1.5770e-19, 1.6579e-19, 0.0000e+00, ..., 2.0066e-19, 8.4105e-19,\n 5.0474e-20],\n [4.9258e-22, 3.8282e-22, 0.0000e+00, ..., 7.5768e-22, 7.5155e-21,\n 1.7801e-21]], device='cuda:0')"
},
"19": {
"step": "tensor(23788.)",
"exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([1.4255e-16, 6.4996e-17, 1.4851e-17, 2.6960e-17, 9.8086e-18, 9.5049e-20,\n 1.6876e-18, 2.6768e-18, 6.8408e-18, 3.8725e-19, 2.1695e-17, 1.1033e-16,\n 4.5464e-17, 5.5072e-17, 8.3145e-18, 5.1852e-17, 1.2359e-16, 1.0244e-17,\n 6.8521e-17, 1.1225e-16, 1.3398e-18, 4.7159e-17, 1.0195e-17, 1.0080e-16,\n 2.0394e-17, 4.1272e-17, 1.8394e-17, 1.1442e-17, 3.1158e-18, 1.9303e-17,\n 5.7745e-17, 2.8940e-17, 1.0515e-18, 2.6545e-16, 1.3764e-16, 6.6917e-17,\n 6.9385e-18, 7.6521e-19, 5.2593e-17, 5.1256e-17, 2.5147e-16, 6.2202e-18,\n 1.1100e-18, 3.5152e-18, 8.8742e-18, 6.4473e-17, 2.7722e-17, 1.8071e-17,\n 1.7584e-17, 2.0613e-17, 8.4325e-19, 2.9682e-17, 7.5391e-17, 4.5994e-17,\n 9.7534e-18, 5.0861e-18, 3.4661e-17, 4.4152e-19, 1.3332e-17, 4.7935e-18,\n 1.4931e-17, 3.2987e-17, 2.1274e-17, 1.2330e-17, 1.2084e-18, 7.8693e-18,\n 2.8615e-17, 1.6833e-18, 3.6068e-17, 3.3948e-17, 3.2470e-19, 1.0960e-17,\n 2.8993e-17, 1.3180e-18, 1.1632e-17, 9.9656e-20, 7.7121e-18, 1.9693e-16,\n 3.9155e-18, 3.6189e-19, 6.0370e-17, 8.0804e-17, 1.3243e-20, 4.5059e-19,\n 1.4395e-17, 4.9999e-17, 1.5044e-18, 2.5588e-17, 3.2803e-18, 5.6896e-17,\n 9.0348e-18, 8.2522e-17, 1.0519e-16, 7.4430e-18, 1.0421e-17, 4.1483e-17,\n 2.7347e-18, 1.6308e-16, 5.4398e-17, 1.9484e-16, 5.5609e-17, 3.8532e-18,\n 7.6891e-19, 2.3531e-17, 1.9556e-18, 1.9425e-16, 4.8760e-18, 1.3815e-17,\n 3.1130e-17, 2.4017e-19, 2.5061e-16, 4.6427e-18, 7.4535e-17, 3.1655e-17,\n 1.0738e-17, 1.4771e-16, 3.6802e-18, 1.7906e-17, 4.9439e-17, 2.3655e-17,\n 2.5803e-17, 4.7746e-17, 4.7413e-18, 7.9699e-19, 3.1080e-19, 1.7362e-17,\n 4.1406e-16, 3.2286e-18, 2.7178e-17, 5.3901e-18, 7.2533e-19, 1.3315e-16,\n 1.1191e-17, 1.8743e-17, 7.5856e-17, 5.4166e-17, 4.5779e-17, 1.4035e-16,\n 6.0520e-17, 1.3314e-20, 5.5205e-19, 8.9570e-19, 4.8740e-17, 2.0002e-16,\n 1.1203e-16, 7.3601e-18, 7.5279e-18, 9.7178e-18, 5.9950e-17, 1.3576e-19,\n 3.1068e-18, 6.2412e-17, 2.1546e-17, 2.3995e-17, 5.3681e-17, 1.8468e-18,\n 1.6862e-17, 4.4302e-17, 8.6068e-18, 1.4350e-16, 6.8038e-17, 1.1165e-17,\n 2.6484e-17, 7.1906e-17, 4.5456e-18, 3.2522e-17, 3.2016e-16, 4.8645e-17,\n 3.3612e-17, 1.5499e-17, 2.2417e-18, 5.3474e-17, 2.1859e-16, 5.4981e-17,\n 7.2254e-17, 5.2632e-19, 7.2852e-18, 2.5155e-17, 2.8667e-17, 3.0734e-17,\n 3.7718e-17, 1.7611e-17, 3.5362e-18, 2.1234e-17, 4.0417e-17, 1.1216e-19,\n 1.0586e-17, 4.3477e-17, 1.1028e-17, 1.7526e-16, 1.3101e-16, 1.3587e-17,\n 8.9941e-18, 3.8958e-18, 2.6000e-20, 6.2779e-16, 7.3573e-18, 1.2667e-17,\n 1.3986e-17, 3.0663e-16, 3.0549e-21, 2.8635e-16, 5.3863e-17, 8.1950e-18,\n 5.2320e-17, 6.4287e-16, 5.9107e-17, 1.8446e-17, 2.6771e-19, 8.2976e-17,\n 6.6992e-18, 1.2109e-16, 1.8854e-17, 3.8416e-17, 2.9408e-19, 4.8919e-17,\n 3.7617e-16, 2.1295e-19, 4.1462e-18, 1.2161e-17, 8.9527e-18, 2.5026e-16,\n 3.1517e-17, 1.7994e-16, 2.4946e-17, 3.0292e-20, 6.4270e-19, 3.6819e-18,\n 1.3646e-18, 5.2539e-17, 4.4919e-19, 4.8010e-19, 9.1196e-17, 3.5665e-17,\n 2.7816e-17, 6.1974e-18, 7.4184e-18, 5.6152e-17, 7.0303e-18, 6.1775e-18,\n 7.1665e-17, 2.9166e-19, 1.5778e-17, 9.6746e-17, 1.6865e-17, 4.8312e-19,\n 9.0287e-19, 4.0599e-17, 4.5838e-18, 1.4602e-16, 7.6126e-17, 7.6491e-18,\n 6.5757e-17, 7.7643e-18, 2.1961e-16, 3.8831e-18], device='cuda:0')"
},
"20": {
"step": "tensor(23788.)",
"exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([3.5169e-19, 3.2161e-19, 2.1628e-20, 1.4114e-19, 1.6558e-20, 1.3656e-21,\n 3.0027e-21, 1.3944e-20, 8.2799e-21, 4.8640e-22, 3.9962e-20, 2.0012e-19,\n 1.1522e-19, 2.5918e-19, 2.6680e-20, 1.2213e-19, 4.0059e-19, 1.7768e-20,\n 2.2165e-19, 3.1935e-19, 6.6181e-22, 1.9836e-19, 2.2429e-20, 4.1140e-19,\n 7.9965e-20, 8.0105e-20, 4.6519e-20, 3.8018e-20, 1.1307e-20, 3.4165e-20,\n 1.7635e-19, 8.5859e-20, 6.6970e-21, 7.1467e-19, 5.3328e-19, 1.6688e-19,\n 1.3775e-20, 1.3941e-23, 1.0929e-19, 9.2658e-20, 1.0637e-18, 1.4839e-20,\n 4.7224e-21, 1.5422e-20, 1.7729e-20, 1.4503e-19, 6.0385e-20, 3.8122e-20,\n 2.6893e-20, 3.1849e-20, 6.6070e-22, 8.1102e-20, 1.5653e-19, 1.0026e-19,\n 1.3360e-20, 2.3985e-20, 1.1214e-19, 9.2378e-22, 4.4002e-20, 1.3577e-20,\n 2.6959e-20, 2.4608e-19, 5.9671e-20, 2.9614e-20, 1.8287e-23, 1.2572e-20,\n 6.2479e-20, 9.1390e-22, 6.6597e-20, 1.0991e-19, 7.6407e-21, 2.5182e-20,\n 3.2191e-20, 4.4252e-21, 2.1274e-20, 6.4405e-21, 4.4969e-20, 9.9993e-19,\n 3.4271e-21, 1.0227e-24, 2.5286e-19, 2.0949e-19, 1.9253e-21, 2.2260e-21,\n 2.8315e-20, 1.1171e-19, 5.9950e-21, 1.1979e-19, 9.6140e-21, 1.0609e-19,\n 2.7366e-20, 1.8857e-19, 3.8237e-19, 9.4372e-21, 2.8830e-20, 1.4621e-19,\n 5.9215e-21, 3.4736e-19, 2.1170e-19, 5.4788e-19, 1.6667e-19, 1.7938e-20,\n 9.4026e-21, 4.1707e-20, 8.1576e-22, 4.5647e-19, 5.0971e-21, 3.2570e-20,\n 6.4817e-20, 3.3942e-22, 6.3966e-19, 1.4472e-20, 2.1124e-19, 6.6466e-20,\n 1.9594e-20, 5.9621e-19, 1.4500e-20, 3.8179e-20, 1.4907e-19, 5.7382e-20,\n 4.7477e-20, 9.9440e-20, 3.9847e-21, 7.6905e-23, 3.1717e-21, 2.6718e-20,\n 1.8723e-18, 1.2244e-20, 8.6804e-20, 8.2209e-21, 2.6579e-22, 3.9710e-19,\n 2.7385e-20, 5.5763e-20, 1.4561e-19, 1.2232e-19, 1.2858e-19, 3.6225e-19,\n 1.6556e-19, 2.2462e-21, 1.4157e-20, 5.1325e-21, 1.0709e-19, 5.8791e-19,\n 7.0393e-19, 2.9135e-20, 3.0240e-20, 2.2748e-20, 1.2419e-19, 3.4431e-23,\n 3.1099e-20, 1.3685e-19, 6.9563e-20, 2.7317e-20, 1.2593e-19, 1.1188e-20,\n 2.4884e-20, 7.5478e-20, 1.4158e-20, 5.6756e-19, 1.3268e-19, 1.2635e-20,\n 6.1700e-20, 2.3970e-19, 1.3209e-20, 4.2266e-20, 1.1005e-18, 9.2259e-20,\n 1.0285e-19, 3.9498e-20, 1.1488e-21, 2.1329e-19, 5.2654e-19, 2.0520e-19,\n 1.3601e-19, 2.0247e-21, 1.1385e-20, 1.1252e-19, 8.7809e-20, 8.0267e-20,\n 9.7404e-20, 3.4082e-20, 4.2942e-21, 3.1881e-20, 1.0118e-19, 6.0483e-26,\n 1.6308e-20, 9.6722e-20, 9.7122e-21, 4.3884e-19, 3.4194e-19, 1.7113e-20,\n 2.6748e-20, 2.1315e-20, 2.4508e-21, 2.4336e-18, 9.3664e-21, 2.0284e-20,\n 4.2327e-20, 8.4274e-19, 7.2601e-22, 7.6938e-19, 9.7196e-20, 1.1194e-20,\n 1.7403e-19, 1.6384e-18, 1.2803e-19, 4.6549e-20, 8.7756e-23, 1.2966e-19,\n 9.1763e-21, 3.0185e-19, 1.2802e-19, 1.1724e-19, 1.7575e-22, 8.4952e-20,\n 1.7897e-18, 7.3013e-23, 1.5566e-20, 1.4567e-20, 1.1435e-20, 1.1008e-18,\n 1.7538e-19, 4.3790e-19, 7.3751e-20, 6.4450e-22, 3.1239e-23, 1.0917e-20,\n 1.3516e-21, 1.4773e-19, 3.6307e-21, 3.3812e-22, 5.3795e-19, 1.5749e-19,\n 8.4184e-20, 1.4437e-20, 9.4885e-21, 1.6332e-19, 1.9295e-20, 6.1679e-21,\n 1.7745e-19, 6.7942e-23, 4.7340e-20, 2.4918e-19, 2.5574e-20, 8.2823e-23,\n 2.1266e-22, 7.7660e-20, 7.2296e-21, 3.9407e-19, 1.5469e-19, 1.0751e-20,\n 1.4729e-19, 1.5366e-20, 7.0330e-19, 2.0449e-20], device='cuda:0')"
},
"21": {
"step": "tensor(23788.)",
"exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([6.0319e-19, 3.0790e-19, 4.2391e-20, 1.3856e-19, 2.1789e-20, 9.2558e-22,\n 4.1979e-21, 2.2802e-20, 1.3918e-20, 2.7275e-22, 6.6242e-20, 3.7389e-19,\n 1.3186e-19, 2.5695e-19, 4.4903e-20, 2.3686e-19, 5.3567e-19, 2.8322e-20,\n 1.9686e-19, 4.6479e-19, 7.4822e-22, 2.2210e-19, 5.1408e-20, 4.3396e-19,\n 1.0683e-19, 1.2371e-19, 8.5664e-20, 6.0108e-20, 2.3285e-20, 4.9803e-20,\n 2.5896e-19, 1.3474e-19, 1.0472e-20, 9.1361e-19, 5.8863e-19, 2.8747e-19,\n 9.9547e-21, 2.8936e-24, 2.3212e-19, 1.7630e-19, 8.1807e-19, 3.4516e-20,\n 1.1390e-20, 2.7243e-20, 3.0396e-20, 2.0726e-19, 1.2802e-19, 8.9228e-20,\n 4.4185e-20, 6.6504e-20, 1.0798e-21, 1.3123e-19, 2.4575e-19, 1.4415e-19,\n 2.4321e-20, 3.2902e-20, 1.6020e-19, 8.3295e-22, 7.4291e-20, 2.8531e-20,\n 3.5729e-20, 1.7331e-19, 9.7324e-20, 5.6402e-20, 3.0045e-22, 1.9667e-20,\n 1.3529e-19, 2.4695e-21, 1.6036e-19, 1.6452e-19, 8.3996e-21, 5.5632e-20,\n 1.0078e-19, 9.5495e-21, 2.7659e-20, 5.9028e-21, 4.7893e-20, 8.7028e-19,\n 5.1646e-21, 8.2072e-25, 2.9162e-19, 3.6387e-19, 2.1210e-21, 4.8156e-21,\n 7.3379e-20, 2.3283e-19, 1.0587e-20, 1.3548e-19, 1.0638e-20, 2.5038e-19,\n 4.7295e-20, 3.5517e-19, 3.3874e-19, 1.9740e-20, 4.6895e-20, 1.1204e-19,\n 7.7664e-21, 5.5686e-19, 2.5507e-19, 8.1607e-19, 2.4514e-19, 2.6776e-20,\n 1.0723e-20, 6.4143e-20, 1.2050e-21, 6.4113e-19, 8.6838e-21, 7.2298e-20,\n 8.7600e-20, 2.2524e-22, 8.4160e-19, 2.8251e-20, 3.3528e-19, 1.1663e-19,\n 2.5792e-20, 4.6662e-19, 1.9933e-20, 4.8284e-20, 2.2614e-19, 1.1436e-19,\n 7.3117e-20, 1.4084e-19, 7.3308e-21, 2.1207e-22, 5.9063e-21, 4.9806e-20,\n 1.6646e-18, 1.8119e-20, 1.3722e-19, 1.5178e-20, 3.0685e-22, 5.7667e-19,\n 5.7125e-20, 8.8794e-20, 2.3911e-19, 1.7677e-19, 2.0279e-19, 4.7334e-19,\n 2.6943e-19, 2.7394e-21, 1.0705e-20, 8.3953e-21, 1.5600e-19, 6.8426e-19,\n 5.2060e-19, 4.3620e-20, 4.3179e-20, 5.0789e-20, 1.9656e-19, 7.6870e-23,\n 2.8518e-20, 1.8777e-19, 1.0922e-19, 6.8828e-20, 1.4904e-19, 1.7011e-20,\n 4.2205e-20, 1.6061e-19, 2.2826e-20, 6.0821e-19, 2.2342e-19, 3.1065e-20,\n 6.2297e-20, 3.2338e-19, 2.4426e-20, 9.2887e-20, 1.1432e-18, 1.5506e-19,\n 1.5408e-19, 7.5713e-20, 3.1575e-21, 1.3925e-19, 7.4979e-19, 2.6060e-19,\n 2.1728e-19, 4.1366e-21, 1.6936e-20, 1.3448e-19, 6.5586e-20, 1.4694e-19,\n 9.9618e-20, 4.4005e-20, 7.1611e-21, 5.8011e-20, 1.6945e-19, 1.7387e-23,\n 3.2491e-20, 1.2407e-19, 2.9644e-20, 7.1944e-19, 4.3723e-19, 3.5874e-20,\n 4.8942e-20, 2.5713e-20, 3.2191e-21, 2.5760e-18, 1.4883e-20, 2.7350e-20,\n 6.7906e-20, 1.0805e-18, 1.8158e-21, 1.1922e-18, 1.7332e-19, 1.8194e-20,\n 2.4540e-19, 2.2228e-18, 1.7714e-19, 4.3356e-20, 2.2166e-22, 2.8847e-19,\n 1.1101e-20, 5.2222e-19, 1.0747e-19, 1.8559e-19, 2.1011e-22, 1.6317e-19,\n 1.2632e-18, 4.9220e-23, 2.6200e-20, 2.7138e-20, 1.6629e-20, 1.0494e-18,\n 1.6349e-19, 7.6558e-19, 1.2819e-19, 1.1866e-21, 2.1414e-23, 2.4806e-20,\n 3.4481e-21, 2.2582e-19, 7.2633e-21, 7.3031e-22, 4.0787e-19, 1.6860e-19,\n 1.2833e-19, 3.1032e-20, 1.9210e-20, 2.5191e-19, 4.0030e-20, 1.4976e-20,\n 3.2343e-19, 6.4208e-23, 7.6795e-20, 3.0271e-19, 4.8000e-20, 1.6011e-22,\n 1.9643e-22, 1.2932e-19, 7.2197e-21, 6.2870e-19, 2.4290e-19, 1.7819e-20,\n 1.9742e-19, 2.6070e-20, 7.2824e-19, 2.8495e-20], device='cuda:0')"
},
"22": {
"step": "tensor(23788.)",
"exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')",
"exp_avg_sq": "tensor([[4.2620e-20, 1.6010e-19, 0.0000e+00, ..., 2.3312e-19, 3.8121e-19,\n 5.8326e-20],\n [1.9549e-20, 2.1818e-20, 0.0000e+00, ..., 3.6005e-20, 1.1106e-19,\n 7.0596e-23],\n [2.6247e-20, 1.4258e-20, 0.0000e+00, ..., 2.3977e-20, 5.1228e-20,\n 4.1257e-21],\n ...,\n [4.4831e-20, 5.5274e-20, 0.0000e+00, ..., 6.8270e-20, 1.2918e-19,\n 4.1093e-20],\n [1.3650e-19, 4.3339e-20, 0.0000e+00, ..., 4.3082e-20, 2.7337e-19,\n 2.6025e-20],\n [6.6813e-22, 9.3566e-22, 0.0000e+00, ..., 1.0089e-20, 1.6080e-20,\n 3.2821e-21]], device='cuda:0')"
},
"23": {
"step": "tensor(23788.)",
"exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([1.6650e-16, 1.2815e-17, 2.8305e-17, 2.1302e-17, 7.2637e-18, 3.8962e-18,\n 1.6760e-17, 9.9714e-21, 6.6830e-18, 1.1335e-18, 1.2584e-16, 2.4917e-17,\n 3.3523e-17, 2.1751e-17, 6.3652e-18, 1.5709e-17, 6.5831e-17, 4.3062e-18,\n 1.2463e-17, 1.3275e-16, 5.2403e-19, 5.0401e-17, 2.1940e-17, 1.2675e-17,\n 1.6962e-18, 5.2863e-17, 1.6699e-17, 2.6550e-18, 1.8564e-18, 1.4786e-17,\n 7.3038e-17, 1.5208e-17, 1.8656e-18, 5.3438e-16, 5.2785e-17, 3.6987e-17,\n 6.3341e-18, 4.5904e-22, 4.9356e-17, 2.4892e-16, 3.1306e-17, 1.9109e-17,\n 3.3780e-18, 1.9433e-17, 1.1347e-16, 8.4196e-17, 4.7613e-17, 2.7859e-17,\n 7.0190e-18, 1.2091e-16, 1.3558e-18, 2.1854e-16, 1.5148e-16, 3.4547e-17,\n 3.5056e-18, 6.3336e-19, 8.3107e-17, 2.6423e-18, 3.7947e-17, 7.6513e-18,\n 1.7857e-17, 2.0477e-17, 1.0894e-16, 5.7825e-17, 2.6385e-18, 6.8265e-18,\n 4.0747e-17, 7.2667e-18, 5.7057e-17, 1.3718e-18, 2.0152e-21, 2.3410e-17,\n 4.3054e-17, 7.5631e-19, 1.8260e-17, 2.2315e-18, 6.3281e-18, 1.2145e-16,\n 1.7856e-18, 7.6326e-19, 9.7577e-18, 8.5574e-17, 2.4405e-21, 6.3420e-18,\n 1.7770e-18, 6.4463e-17, 2.5941e-17, 1.4536e-17, 2.5646e-17, 3.7564e-16,\n 5.0320e-18, 8.4697e-17, 1.0801e-16, 1.6336e-17, 2.0496e-16, 1.5314e-17,\n 1.8134e-17, 1.1513e-16, 3.8691e-17, 4.5117e-17, 1.0022e-16, 5.6425e-18,\n 1.0869e-18, 4.0492e-17, 1.5006e-18, 2.1613e-16, 1.2175e-17, 2.4644e-17,\n 7.5278e-18, 3.4810e-20, 1.7675e-16, 1.2647e-17, 1.7554e-16, 5.3585e-17,\n 5.8627e-18, 9.7517e-18, 1.1619e-17, 9.8735e-18, 7.4501e-18, 1.8106e-17,\n 5.4197e-17, 3.1695e-17, 5.3851e-18, 4.2956e-21, 3.1874e-20, 1.9504e-17,\n 1.0851e-16, 7.0362e-17, 5.6936e-18, 9.4537e-17, 7.4178e-20, 4.6719e-16,\n 2.1255e-17, 3.9511e-17, 6.7782e-17, 1.8199e-16, 5.7738e-17, 1.8347e-16,\n 5.0046e-17, 2.2830e-20, 6.5465e-19, 3.4182e-18, 2.1971e-17, 1.9783e-16,\n 3.4495e-17, 9.3411e-18, 8.2661e-18, 3.2038e-18, 1.9665e-16, 4.2227e-18,\n 3.4487e-18, 1.6427e-17, 1.7950e-17, 7.1444e-17, 2.4355e-17, 3.2619e-18,\n 1.7196e-17, 1.8263e-16, 5.1632e-18, 2.5144e-16, 6.3907e-17, 9.2904e-17,\n 9.6985e-18, 8.4541e-17, 6.4667e-17, 5.3282e-17, 5.1338e-16, 7.2221e-17,\n 7.9429e-17, 1.0272e-17, 3.2766e-18, 1.3816e-17, 9.5216e-17, 2.9292e-17,\n 5.4630e-17, 5.8511e-19, 4.8127e-18, 3.2102e-18, 6.5169e-18, 2.2942e-17,\n 3.3702e-18, 1.8591e-17, 1.2269e-17, 1.6444e-17, 1.2687e-16, 4.3120e-19,\n 1.2850e-17, 3.9345e-17, 7.5562e-18, 2.0396e-16, 2.1505e-17, 3.0725e-17,\n 3.3182e-17, 2.6221e-18, 1.1155e-20, 5.8352e-16, 6.9370e-18, 7.1913e-18,\n 4.2930e-17, 1.1178e-16, 4.5320e-21, 3.1732e-17, 1.2025e-16, 1.4228e-18,\n 7.2275e-17, 2.4055e-16, 3.7907e-17, 7.7863e-18, 2.9083e-20, 3.2877e-16,\n 6.7583e-18, 1.1275e-16, 3.4646e-18, 1.5491e-17, 2.6213e-18, 3.1270e-17,\n 4.1340e-17, 1.4215e-19, 3.1457e-18, 4.0719e-18, 1.3599e-17, 5.2835e-17,\n 9.7886e-18, 1.8732e-16, 3.3017e-17, 1.3488e-18, 4.1681e-19, 2.0132e-18,\n 7.4825e-18, 1.7146e-17, 5.2028e-19, 1.8271e-18, 2.9873e-17, 2.4191e-17,\n 9.7177e-17, 3.0447e-17, 7.1167e-18, 5.4270e-17, 8.1177e-18, 1.4238e-17,\n 5.2050e-17, 1.0812e-19, 6.8072e-17, 8.4460e-17, 2.9330e-17, 6.6708e-20,\n 2.3294e-21, 2.9104e-16, 1.4051e-18, 1.0695e-16, 3.6693e-17, 6.3101e-18,\n 4.8179e-17, 5.0406e-17, 1.1700e-16, 3.2994e-18], device='cuda:0')"
},
"24": {
"step": "tensor(23788.)",
"exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([4.6668e-19, 3.9428e-20, 4.4712e-20, 9.6753e-20, 1.1317e-20, 1.5280e-20,\n 3.9289e-20, 1.3149e-22, 8.0396e-21, 7.7128e-22, 4.4876e-19, 6.1208e-20,\n 6.9728e-20, 5.1891e-20, 1.7830e-20, 4.2777e-20, 1.5855e-19, 8.7623e-21,\n 2.7347e-20, 2.9515e-19, 6.6651e-24, 1.7294e-19, 6.7177e-20, 3.4979e-20,\n 7.1488e-21, 9.7185e-20, 3.7270e-20, 9.2762e-21, 7.2831e-21, 3.6512e-20,\n 2.7956e-19, 2.8066e-20, 4.6134e-21, 2.1158e-18, 1.2156e-19, 7.7913e-20,\n 1.5399e-20, 1.3500e-22, 1.3590e-19, 5.5298e-19, 6.0697e-20, 6.3918e-20,\n 7.7129e-21, 1.6085e-19, 3.3871e-19, 1.6557e-19, 2.1668e-19, 7.1216e-20,\n 1.1200e-20, 4.6341e-19, 1.1918e-21, 6.5828e-19, 3.7697e-19, 5.8694e-20,\n 2.9946e-21, 2.1134e-21, 2.7279e-19, 1.7089e-21, 1.0586e-19, 1.9078e-20,\n 4.0069e-20, 5.8969e-20, 4.6959e-19, 1.5879e-19, 1.8666e-21, 9.5765e-21,\n 1.0867e-19, 8.0149e-21, 1.1754e-19, 6.8352e-21, 2.3884e-21, 4.7339e-20,\n 8.7796e-20, 4.5184e-21, 3.2740e-20, 1.5297e-20, 2.0243e-20, 3.1296e-19,\n 2.8621e-21, 8.1328e-22, 2.3199e-20, 2.2519e-19, 2.1681e-22, 5.0662e-20,\n 8.2002e-21, 1.5667e-19, 1.1658e-19, 3.3267e-20, 4.6477e-20, 1.3338e-18,\n 1.3792e-20, 2.0207e-19, 3.7796e-19, 2.4210e-20, 1.3203e-18, 2.9404e-20,\n 2.3398e-20, 2.3133e-19, 1.0064e-19, 1.1605e-19, 4.2457e-19, 4.2548e-20,\n 5.4508e-21, 1.0523e-19, 6.2782e-22, 4.0157e-19, 2.3800e-20, 6.1872e-20,\n 1.1800e-20, 1.4993e-21, 3.8464e-19, 4.8019e-20, 1.0977e-18, 1.0944e-19,\n 5.0889e-21, 1.7161e-20, 3.2212e-20, 1.4997e-20, 2.6864e-20, 6.0632e-20,\n 1.5564e-19, 8.0655e-20, 6.0063e-21, 1.9141e-23, 7.5085e-22, 3.2707e-20,\n 2.3877e-19, 4.1792e-19, 1.2706e-20, 3.1812e-19, 9.4526e-23, 1.5137e-18,\n 5.3041e-20, 9.2143e-20, 1.0055e-19, 6.6108e-19, 1.3105e-19, 5.3264e-19,\n 1.1920e-19, 1.6980e-22, 6.7208e-21, 1.2569e-20, 3.7279e-20, 6.1760e-19,\n 7.2753e-20, 3.1658e-20, 2.5679e-20, 9.1285e-21, 6.8537e-19, 6.0335e-21,\n 1.4907e-20, 2.7537e-20, 5.2316e-20, 2.4538e-19, 4.0260e-20, 1.5950e-20,\n 2.5507e-20, 5.4271e-19, 1.1586e-20, 7.7125e-19, 1.3426e-19, 3.5726e-19,\n 1.3618e-20, 2.8630e-19, 3.9324e-19, 1.1921e-19, 1.7600e-18, 1.3689e-19,\n 3.4370e-19, 3.8317e-20, 2.4307e-21, 1.9791e-20, 1.7945e-19, 8.9350e-20,\n 1.0078e-19, 1.9265e-21, 5.5343e-21, 7.6284e-21, 1.1616e-20, 3.9714e-20,\n 6.1215e-21, 4.3468e-20, 2.0773e-20, 2.8245e-20, 2.7173e-19, 2.7701e-24,\n 2.3275e-20, 9.5717e-20, 9.9710e-21, 5.5408e-19, 2.6877e-20, 7.9337e-20,\n 1.5221e-19, 9.4811e-21, 2.7573e-22, 2.3043e-18, 1.2693e-20, 1.1124e-20,\n 1.7084e-19, 1.6062e-19, 7.5732e-22, 5.3830e-20, 4.1079e-19, 1.6886e-21,\n 2.2151e-19, 4.5157e-19, 6.3166e-20, 1.2252e-20, 9.4722e-24, 1.0466e-18,\n 1.4113e-20, 2.5273e-19, 1.6443e-20, 3.3940e-20, 3.0506e-21, 6.0114e-20,\n 7.7444e-20, 5.7805e-23, 9.6651e-21, 4.9789e-21, 3.3107e-20, 1.5246e-19,\n 4.4648e-20, 6.0228e-19, 1.1466e-19, 5.2755e-21, 1.8966e-22, 2.1108e-20,\n 1.1822e-20, 4.9290e-20, 2.7026e-21, 2.9723e-21, 5.1197e-20, 9.7257e-20,\n 2.4502e-19, 9.9361e-20, 1.1152e-20, 1.1911e-19, 2.4983e-20, 2.1350e-20,\n 1.3220e-19, 4.5856e-23, 5.7552e-19, 2.0615e-19, 4.8021e-20, 4.5418e-23,\n 8.2719e-22, 1.3730e-18, 1.4842e-21, 3.3137e-19, 7.0012e-20, 9.4541e-21,\n 1.1034e-19, 1.0450e-19, 2.5513e-19, 1.2374e-20], device='cuda:0')"
},
"25": {
"step": "tensor(23788.)",
"exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([6.7737e-19, 5.9724e-20, 8.0585e-20, 1.0930e-19, 1.6957e-20, 2.4275e-20,\n 4.9148e-20, 1.5435e-22, 1.4542e-20, 1.3443e-21, 4.0143e-19, 9.0296e-20,\n 1.0087e-19, 1.0078e-19, 3.2260e-20, 7.8561e-20, 2.8702e-19, 1.2200e-20,\n 3.9894e-20, 5.7378e-19, 1.0466e-22, 2.2973e-19, 1.0939e-19, 5.9293e-20,\n 9.0008e-21, 1.7260e-19, 7.9203e-20, 1.6294e-20, 1.3037e-20, 4.3781e-20,\n 3.3095e-19, 6.7683e-20, 1.1553e-20, 1.8838e-18, 2.3091e-19, 1.6098e-19,\n 1.1978e-20, 1.0208e-22, 2.2155e-19, 8.7509e-19, 1.0963e-19, 9.2345e-20,\n 1.9229e-20, 1.0693e-19, 3.7107e-19, 2.9436e-19, 2.1882e-19, 1.3451e-19,\n 1.8184e-20, 3.8959e-19, 1.2066e-21, 8.9242e-19, 4.9211e-19, 1.0992e-19,\n 6.3225e-21, 5.1515e-21, 3.7064e-19, 4.7286e-21, 1.6303e-19, 4.2643e-20,\n 4.6011e-20, 9.6065e-20, 4.6812e-19, 2.5118e-19, 3.2396e-21, 1.8171e-20,\n 1.8114e-19, 1.6904e-20, 2.4250e-19, 8.4425e-21, 1.9735e-21, 1.1169e-19,\n 1.3845e-19, 4.7710e-21, 5.2246e-20, 1.7252e-20, 3.7179e-20, 5.0343e-19,\n 4.6945e-21, 7.2086e-22, 5.1666e-20, 3.8341e-19, 5.7871e-22, 4.2896e-20,\n 1.0756e-20, 2.7419e-19, 1.3027e-19, 6.5720e-20, 7.9891e-20, 1.5096e-18,\n 2.4856e-20, 3.6703e-19, 3.4398e-19, 5.0787e-20, 8.7835e-19, 4.7210e-20,\n 5.7274e-20, 4.0694e-19, 1.7245e-19, 2.1093e-19, 4.3140e-19, 3.7918e-20,\n 1.0331e-20, 1.1772e-19, 1.8191e-21, 7.3837e-19, 2.9765e-20, 1.1930e-19,\n 2.1598e-20, 2.6754e-21, 6.0685e-19, 6.4988e-20, 7.4911e-19, 1.8459e-19,\n 1.5486e-20, 3.1687e-20, 5.9872e-20, 2.9897e-20, 3.6380e-20, 9.3625e-20,\n 1.6027e-19, 9.6560e-20, 1.2088e-20, 1.2533e-23, 7.9458e-22, 5.9614e-20,\n 4.6261e-19, 3.2015e-19, 2.8792e-20, 2.8792e-19, 1.2545e-22, 1.8944e-18,\n 9.6329e-20, 1.6972e-19, 2.1612e-19, 6.0967e-19, 2.5716e-19, 6.2870e-19,\n 2.1545e-19, 3.6953e-22, 8.5243e-21, 2.2912e-20, 6.8388e-20, 6.6572e-19,\n 1.4637e-19, 5.0463e-20, 4.1579e-20, 1.6780e-20, 6.6661e-19, 6.8223e-21,\n 2.4245e-20, 5.4413e-20, 8.6913e-20, 2.1158e-19, 7.0214e-20, 2.3224e-20,\n 4.6237e-20, 6.1869e-19, 1.5373e-20, 1.0251e-18, 2.1240e-19, 2.7982e-19,\n 2.2971e-20, 3.7772e-19, 3.0016e-19, 1.6740e-19, 1.7919e-18, 2.4523e-19,\n 3.3784e-19, 5.0219e-20, 6.2694e-21, 3.9223e-20, 3.2980e-19, 1.3748e-19,\n 1.7749e-19, 4.5224e-21, 1.0694e-20, 1.5611e-20, 1.5036e-20, 1.0523e-19,\n 8.9793e-21, 4.6147e-20, 3.3617e-20, 5.0900e-20, 5.3011e-19, 4.6659e-23,\n 4.1739e-20, 1.1662e-19, 1.9843e-20, 8.4757e-19, 7.2225e-20, 8.9337e-20,\n 1.5757e-19, 1.5064e-20, 7.2630e-22, 2.3410e-18, 1.6546e-20, 1.9057e-20,\n 1.9958e-19, 3.8352e-19, 1.8323e-21, 1.4094e-19, 3.7832e-19, 3.1916e-21,\n 2.9927e-19, 8.4455e-19, 1.2470e-19, 2.0077e-20, 1.6801e-23, 1.1851e-18,\n 1.3154e-20, 4.7352e-19, 2.4476e-20, 7.5509e-20, 4.1212e-21, 1.0485e-19,\n 1.3788e-19, 5.6751e-22, 1.9155e-20, 9.2308e-21, 2.7675e-20, 2.3106e-19,\n 5.2256e-20, 7.8460e-19, 1.5618e-19, 1.0841e-20, 1.5045e-22, 1.8626e-20,\n 1.5697e-20, 8.0390e-20, 5.7766e-21, 3.2016e-21, 1.3433e-19, 1.2326e-19,\n 4.1752e-19, 1.4352e-19, 1.6534e-20, 2.2951e-19, 4.7232e-20, 4.0479e-20,\n 2.3085e-19, 1.0062e-22, 3.2042e-19, 2.8092e-19, 8.5816e-20, 3.4172e-22,\n 1.7108e-21, 9.9031e-19, 2.3858e-21, 4.3403e-19, 1.1210e-19, 1.4555e-20,\n 1.4900e-19, 1.5058e-19, 3.8048e-19, 2.2348e-20], device='cuda:0')"
},
"26": {
"step": "tensor(23788.)",
"exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45]], device='cuda:0')",
"exp_avg_sq": "tensor([[3.2663e-20, 4.5150e-20, 0.0000e+00, ..., 9.5566e-20, 1.5410e-19,\n 1.8824e-20],\n [3.0196e-21, 4.1980e-21, 0.0000e+00, ..., 1.1732e-22, 3.7821e-21,\n 1.2078e-22],\n [2.4407e-20, 1.9073e-20, 0.0000e+00, ..., 1.9632e-20, 2.1203e-20,\n 1.3791e-20],\n ...,\n [1.2154e-20, 9.9878e-21, 0.0000e+00, ..., 7.0457e-21, 6.8812e-20,\n 1.9860e-21],\n [7.1321e-20, 2.2072e-20, 0.0000e+00, ..., 1.9021e-20, 1.5589e-19,\n 3.4524e-20],\n [1.4423e-20, 2.5357e-21, 0.0000e+00, ..., 3.4985e-21, 3.0053e-20,\n 3.1596e-22]], device='cuda:0')"
},
"27": {
"step": "tensor(23788.)",
"exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([5.5355e-17, 2.4826e-18, 1.9705e-17, 2.5604e-17, 7.7966e-18, 1.9653e-18,\n 2.5862e-17, 6.2940e-19, 1.1606e-17, 1.7713e-18, 5.8843e-17, 2.0366e-16,\n 3.4671e-17, 4.0773e-17, 4.2541e-17, 1.7609e-16, 1.9735e-17, 1.9565e-17,\n 6.6039e-17, 7.6779e-17, 2.4622e-19, 8.2493e-18, 1.6638e-17, 2.0629e-17,\n 1.9105e-17, 3.5418e-17, 2.8157e-17, 5.5304e-18, 8.0814e-19, 9.2050e-18,\n 4.8917e-17, 3.2318e-17, 5.0676e-18, 2.4857e-16, 9.0033e-17, 5.5182e-17,\n 1.6415e-18, 3.1112e-20, 2.3148e-17, 7.4746e-16, 1.1151e-16, 1.5676e-17,\n 4.9140e-18, 3.0756e-18, 1.2892e-16, 2.2702e-16, 3.5363e-17, 7.8341e-17,\n 9.7595e-18, 6.2715e-17, 1.2094e-18, 1.3352e-16, 3.4043e-17, 3.0669e-17,\n 4.8741e-18, 4.2516e-19, 5.8140e-17, 2.1576e-18, 3.8641e-17, 3.3816e-18,\n 1.9857e-17, 3.8015e-17, 9.5833e-17, 5.1074e-17, 1.4405e-18, 3.5642e-18,\n 3.8318e-17, 2.4946e-18, 2.4298e-17, 4.6168e-18, 8.4287e-20, 2.3548e-17,\n 9.9524e-17, 7.3096e-18, 2.0150e-17, 1.3470e-19, 1.3085e-18, 8.1290e-17,\n 5.2459e-18, 1.7322e-21, 4.8116e-17, 5.7906e-17, 8.2922e-20, 4.9153e-18,\n 3.4307e-17, 1.4654e-16, 1.1779e-17, 5.0596e-17, 1.0023e-17, 8.3578e-17,\n 5.9188e-17, 3.6405e-17, 9.2533e-17, 1.6884e-17, 2.9815e-17, 4.5698e-18,\n 1.5544e-17, 1.0642e-16, 2.5821e-17, 2.1492e-16, 1.9039e-16, 4.6583e-18,\n 8.2801e-20, 2.5570e-17, 1.4370e-18, 1.6860e-16, 1.3513e-17, 1.2341e-17,\n 3.6771e-17, 2.2931e-19, 1.8227e-17, 1.1894e-19, 1.1879e-16, 1.1636e-16,\n 9.1727e-18, 1.0172e-16, 8.6861e-18, 1.6590e-17, 2.4580e-17, 2.5246e-17,\n 8.7986e-18, 2.0019e-18, 3.6099e-18, 4.7888e-21, 4.6909e-19, 2.2594e-17,\n 1.9201e-16, 3.1714e-17, 8.0193e-17, 3.6464e-17, 2.0137e-19, 2.3139e-16,\n 3.7306e-17, 3.4483e-17, 8.4207e-17, 4.9134e-17, 5.4757e-17, 7.3476e-17,\n 4.9909e-17, 1.9229e-19, 1.4565e-18, 2.6836e-18, 1.9759e-17, 2.5144e-16,\n 4.8041e-17, 2.9348e-18, 4.5307e-17, 5.4732e-18, 4.6257e-17, 1.1443e-18,\n 2.2292e-18, 7.8386e-17, 5.7185e-17, 7.1563e-17, 1.5196e-17, 2.5619e-18,\n 4.5567e-17, 1.3490e-16, 2.3666e-17, 2.9819e-17, 3.6842e-17, 4.2298e-17,\n 2.2963e-17, 3.0662e-17, 2.8708e-17, 2.7021e-17, 1.9397e-16, 1.4940e-16,\n 2.1807e-17, 8.2280e-17, 8.1918e-18, 5.9113e-18, 1.2208e-16, 1.1615e-17,\n 8.7185e-17, 6.4848e-18, 2.4951e-18, 7.8722e-18, 8.4188e-18, 9.3612e-18,\n 1.1034e-17, 9.4975e-18, 3.4596e-18, 2.4645e-17, 3.7923e-16, 5.7928e-20,\n 1.8801e-17, 1.5765e-17, 1.1130e-17, 1.1374e-16, 3.3438e-17, 4.1006e-18,\n 1.6910e-17, 2.3315e-17, 1.7709e-19, 2.0658e-16, 1.0870e-17, 1.6554e-18,\n 5.1629e-17, 2.2909e-16, 1.9184e-22, 2.2403e-17, 1.1119e-16, 2.4747e-17,\n 2.0664e-17, 1.6133e-16, 3.9899e-17, 8.5961e-18, 8.5684e-21, 1.9348e-16,\n 3.9302e-18, 1.9387e-16, 1.1911e-19, 4.2990e-17, 6.1701e-19, 5.1654e-17,\n 7.2708e-17, 1.2360e-21, 6.1464e-18, 8.5465e-18, 7.2751e-18, 2.3660e-16,\n 1.2508e-17, 4.9629e-17, 1.1174e-17, 4.5976e-19, 2.5596e-19, 9.1184e-19,\n 8.3599e-18, 1.6657e-17, 1.3965e-18, 2.0491e-19, 8.7158e-17, 6.2209e-18,\n 4.5729e-18, 3.1890e-17, 2.7126e-17, 3.9914e-17, 1.6507e-17, 1.2808e-17,\n 4.3284e-18, 9.8052e-21, 3.2864e-17, 2.1310e-17, 9.5338e-18, 3.0454e-19,\n 1.9676e-19, 1.6280e-16, 7.5643e-18, 1.8770e-16, 5.9667e-17, 6.5132e-18,\n 8.9588e-17, 2.5321e-17, 5.2894e-17, 1.0053e-17], device='cuda:0')"
},
"28": {
"step": "tensor(23788.)",
"exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([9.9046e-20, 1.2287e-20, 3.6076e-20, 8.3660e-20, 1.4923e-20, 9.2212e-21,\n 5.3238e-20, 3.5277e-21, 1.8047e-20, 2.1960e-21, 1.1036e-19, 5.3838e-19,\n 7.5683e-20, 1.0199e-19, 1.6359e-19, 7.3004e-19, 5.9545e-20, 4.8100e-20,\n 2.2264e-19, 1.5224e-19, 2.6926e-23, 2.1845e-20, 6.1924e-20, 4.1983e-20,\n 6.8126e-20, 8.5427e-20, 5.8613e-20, 1.7447e-20, 2.0764e-21, 1.6114e-20,\n 1.3241e-19, 7.6898e-20, 1.1927e-20, 6.2372e-19, 2.3552e-19, 1.6658e-19,\n 3.5167e-21, 1.3286e-23, 5.6159e-20, 4.5694e-18, 2.5021e-19, 5.0149e-20,\n 1.6410e-20, 9.1772e-21, 4.3641e-19, 1.0018e-18, 1.0918e-19, 3.5685e-19,\n 1.0599e-20, 1.3007e-19, 1.2051e-21, 3.3723e-19, 6.5342e-20, 6.7769e-20,\n 6.8980e-21, 1.6114e-21, 1.8632e-19, 1.8339e-21, 1.5786e-19, 1.1745e-20,\n 4.5997e-20, 2.7295e-19, 4.5693e-19, 1.7888e-19, 6.7398e-22, 4.1239e-21,\n 1.0375e-19, 2.2026e-21, 3.9782e-20, 1.4980e-20, 3.3032e-21, 7.1020e-20,\n 2.3141e-19, 1.7248e-20, 4.8305e-20, 1.4136e-21, 4.7030e-21, 2.3314e-19,\n 7.4397e-21, 1.3248e-22, 1.7880e-19, 1.2460e-19, 1.4473e-22, 2.3879e-20,\n 1.3595e-19, 4.2640e-19, 3.4108e-20, 2.1591e-19, 2.4529e-20, 1.9151e-19,\n 3.3541e-19, 9.1182e-20, 2.2862e-19, 2.4724e-20, 6.4490e-20, 4.4652e-21,\n 3.1380e-20, 1.8902e-19, 6.0771e-20, 6.0143e-19, 1.2465e-18, 2.1255e-20,\n 4.7544e-22, 5.8294e-20, 7.6166e-22, 3.7285e-19, 4.1202e-20, 3.3615e-20,\n 1.1003e-19, 9.5076e-25, 4.7911e-20, 2.4047e-21, 3.6347e-19, 2.2934e-19,\n 1.2812e-20, 2.6782e-19, 2.9060e-20, 3.1825e-20, 6.1656e-20, 6.6419e-20,\n 1.1543e-20, 8.2166e-21, 6.6827e-21, 2.8336e-23, 1.8682e-21, 4.2600e-20,\n 4.8901e-19, 9.8489e-20, 4.0810e-19, 8.2657e-20, 4.3447e-23, 6.0424e-19,\n 1.7312e-19, 6.7014e-20, 2.6251e-19, 8.9179e-20, 9.3708e-20, 1.2482e-19,\n 1.0755e-19, 1.0569e-23, 7.2611e-21, 1.6343e-20, 2.3504e-20, 7.1809e-19,\n 1.2334e-19, 8.6781e-21, 2.0550e-19, 1.4397e-20, 8.1815e-20, 2.3176e-22,\n 6.1011e-21, 2.0368e-19, 3.5262e-19, 2.8998e-19, 1.5304e-20, 2.1983e-20,\n 2.1581e-19, 3.5967e-19, 5.2028e-20, 6.4128e-20, 5.0168e-20, 7.0716e-20,\n 1.1262e-19, 5.6534e-20, 8.9307e-20, 6.3237e-20, 3.5134e-19, 4.9275e-19,\n 4.4867e-20, 1.9377e-19, 1.5101e-20, 8.6036e-21, 2.6790e-19, 2.2852e-20,\n 2.5497e-19, 3.9028e-20, 5.4094e-21, 1.8291e-20, 1.6420e-20, 3.2896e-20,\n 1.4865e-20, 2.0714e-20, 3.9002e-21, 4.5802e-20, 1.1661e-18, 3.9423e-22,\n 2.9862e-20, 2.8655e-20, 1.2396e-20, 2.4951e-19, 4.9397e-20, 6.8165e-21,\n 5.9421e-20, 1.0724e-19, 7.4112e-22, 4.5879e-19, 3.6757e-20, 2.5422e-21,\n 2.3965e-19, 5.6755e-19, 7.5761e-23, 5.1035e-20, 4.0555e-19, 8.1433e-20,\n 7.6417e-20, 3.2082e-19, 7.7660e-20, 2.2322e-20, 3.1321e-23, 4.1367e-19,\n 6.1944e-21, 5.2608e-19, 1.3724e-21, 1.1500e-19, 6.4573e-22, 1.1655e-19,\n 1.0246e-19, 5.5612e-22, 2.0604e-20, 1.5043e-20, 1.8396e-20, 1.0716e-18,\n 4.5361e-20, 1.2989e-19, 3.1037e-20, 4.9784e-21, 7.0733e-24, 5.2300e-21,\n 1.1511e-20, 3.8016e-20, 5.3778e-21, 3.9046e-24, 3.2477e-19, 2.1011e-20,\n 1.6617e-20, 1.1522e-19, 9.8581e-20, 8.2520e-20, 7.4397e-20, 2.8707e-20,\n 2.2341e-20, 2.0017e-22, 1.1031e-19, 3.5675e-20, 1.7388e-20, 3.0610e-22,\n 3.5454e-22, 5.4124e-19, 1.7321e-20, 6.1284e-19, 1.0157e-19, 1.1229e-20,\n 3.6459e-19, 4.5913e-20, 9.5416e-20, 6.4988e-20], device='cuda:0')"
},
"29": {
"step": "tensor(23788.)",
"exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([2.2091e-19, 1.2139e-20, 6.5983e-20, 1.1237e-19, 1.8072e-20, 1.2553e-20,\n 7.9719e-20, 5.7111e-21, 3.2911e-20, 3.5978e-21, 1.9421e-19, 7.4323e-19,\n 1.1577e-19, 1.6594e-19, 1.9351e-19, 7.5636e-19, 1.0165e-19, 6.6022e-20,\n 2.1712e-19, 3.3129e-19, 3.7330e-24, 3.8912e-20, 8.0618e-20, 9.5326e-20,\n 9.3031e-20, 1.1758e-19, 1.2572e-19, 2.9625e-20, 5.1205e-21, 2.8712e-20,\n 2.2115e-19, 1.3901e-19, 2.6687e-20, 9.0227e-19, 3.8939e-19, 2.4973e-19,\n 2.8215e-21, 6.1046e-23, 1.0297e-19, 2.7198e-18, 3.8909e-19, 7.9052e-20,\n 2.6680e-20, 1.8115e-20, 4.4845e-19, 7.9613e-19, 1.6250e-19, 3.5415e-19,\n 2.6471e-20, 2.1885e-19, 2.1040e-21, 5.6763e-19, 1.1201e-19, 1.0523e-19,\n 1.2557e-20, 4.1450e-21, 2.5311e-19, 5.6151e-21, 1.8203e-19, 1.9066e-20,\n 5.8122e-20, 1.8662e-19, 4.1527e-19, 2.2739e-19, 2.3755e-21, 1.0239e-20,\n 1.7311e-19, 5.4043e-21, 1.0705e-19, 2.2797e-20, 2.1331e-21, 1.1281e-19,\n 3.3244e-19, 3.9611e-20, 6.1889e-20, 1.1878e-21, 7.6008e-21, 3.5478e-19,\n 1.0989e-20, 1.5414e-22, 2.1303e-19, 2.5917e-19, 5.5571e-22, 2.7815e-20,\n 1.6371e-19, 6.3415e-19, 5.3475e-20, 2.2052e-19, 3.4472e-20, 3.6854e-19,\n 2.7316e-19, 1.6229e-19, 3.2421e-19, 5.4711e-20, 1.3079e-19, 1.3366e-20,\n 5.0880e-20, 3.9223e-19, 1.2315e-19, 8.6589e-19, 8.2202e-19, 2.7964e-20,\n 8.1661e-22, 7.8756e-20, 1.1381e-21, 6.1596e-19, 3.4659e-20, 5.9427e-20,\n 1.1726e-19, 2.3036e-23, 6.4924e-20, 6.5471e-22, 4.9577e-19, 4.1502e-19,\n 2.9272e-20, 3.5490e-19, 4.2252e-20, 5.2485e-20, 1.1167e-19, 1.0660e-19,\n 2.7977e-20, 6.6464e-21, 7.6999e-21, 7.3893e-23, 4.4079e-21, 7.1431e-20,\n 8.1469e-19, 1.4652e-19, 3.5300e-19, 1.1816e-19, 2.6422e-24, 9.3155e-19,\n 1.7685e-19, 1.5519e-19, 2.8684e-19, 1.6848e-19, 2.3843e-19, 2.7058e-19,\n 2.1896e-19, 2.2739e-23, 1.0821e-20, 1.9111e-20, 6.7996e-20, 8.8058e-19,\n 2.0225e-19, 1.6412e-20, 2.0701e-19, 2.8835e-20, 1.5593e-19, 1.8902e-21,\n 1.3359e-20, 2.6485e-19, 2.5977e-19, 2.3755e-19, 4.6383e-20, 1.8623e-20,\n 1.3948e-19, 4.9266e-19, 7.3168e-20, 1.3755e-19, 1.3072e-19, 1.4350e-19,\n 6.2190e-20, 1.3116e-19, 1.2923e-19, 8.8096e-20, 7.1266e-19, 5.1721e-19,\n 1.0546e-19, 3.6741e-19, 2.0415e-20, 2.0131e-20, 4.3617e-19, 5.3731e-20,\n 2.9225e-19, 3.9914e-20, 7.1935e-21, 3.9670e-20, 2.1635e-20, 4.7246e-20,\n 3.2783e-20, 2.7657e-20, 9.2380e-21, 7.6647e-20, 1.4875e-18, 7.3903e-22,\n 6.2850e-20, 4.9457e-20, 3.2165e-20, 4.6943e-19, 1.2254e-19, 1.2707e-20,\n 7.9715e-20, 1.1718e-19, 2.7181e-21, 8.9449e-19, 2.9401e-20, 4.3572e-21,\n 2.4047e-19, 8.5628e-19, 6.9536e-22, 1.0265e-19, 3.8361e-19, 6.8065e-20,\n 1.0130e-19, 6.2904e-19, 1.3416e-19, 2.6240e-20, 2.6692e-22, 7.0484e-19,\n 8.5945e-21, 7.8053e-19, 1.2942e-21, 1.8905e-19, 5.1163e-22, 1.9137e-19,\n 2.5124e-19, 6.5362e-22, 3.4118e-20, 2.7287e-20, 1.5327e-20, 9.8176e-19,\n 6.2159e-20, 2.2968e-19, 5.8402e-20, 6.5448e-21, 1.1020e-23, 8.4959e-21,\n 2.1985e-20, 7.4153e-20, 9.6015e-21, 1.1625e-22, 3.8440e-19, 3.0992e-20,\n 2.2264e-20, 1.4764e-19, 7.8974e-20, 1.8075e-19, 8.2646e-20, 3.7704e-20,\n 1.7590e-20, 4.6939e-22, 1.5104e-19, 7.4131e-20, 3.1844e-20, 3.7238e-22,\n 1.5468e-22, 5.6402e-19, 1.7984e-20, 7.7503e-19, 2.0109e-19, 1.7678e-20,\n 2.9458e-19, 8.1491e-20, 2.0144e-19, 5.5838e-20], device='cuda:0')"
},
"30": {
"step": "tensor(23788.)",
"exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')",
"exp_avg_sq": "tensor([[2.0342e-20, 6.8698e-20, 0.0000e+00, ..., 2.3632e-19, 1.3447e-19,\n 1.4376e-19],\n [3.2496e-20, 1.2137e-20, 0.0000e+00, ..., 1.0151e-20, 1.1758e-19,\n 5.3023e-21],\n [2.6467e-20, 4.6703e-20, 0.0000e+00, ..., 3.2487e-20, 2.2039e-19,\n 1.3030e-20],\n ...,\n [6.3986e-21, 1.7420e-20, 0.0000e+00, ..., 2.2156e-20, 3.2931e-20,\n 2.0330e-22],\n [1.4610e-19, 1.2988e-19, 0.0000e+00, ..., 1.8886e-19, 8.0559e-19,\n 1.6423e-19],\n [1.1795e-21, 1.5770e-21, 0.0000e+00, ..., 6.0829e-21, 1.4226e-20,\n 1.4803e-22]], device='cuda:0')"
},
"31": {
"step": "tensor(23788.)",
"exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([1.2605e-16, 3.9183e-17, 4.9822e-17, 1.5505e-18, 1.4088e-17, 1.4902e-18,\n 1.5284e-17, 1.3071e-18, 8.9311e-18, 5.7242e-18, 1.1759e-16, 1.1591e-16,\n 1.5908e-17, 1.1484e-17, 2.2471e-17, 3.0399e-18, 5.7070e-17, 1.6976e-17,\n 5.7005e-17, 1.5872e-16, 2.6107e-19, 4.5497e-17, 1.0025e-17, 9.3139e-17,\n 6.8698e-18, 1.5086e-17, 9.9751e-17, 1.6126e-18, 5.6637e-18, 1.7293e-17,\n 2.9999e-17, 9.8750e-17, 1.3050e-19, 1.7773e-16, 1.0226e-16, 1.2139e-16,\n 4.6044e-18, 1.6411e-19, 6.5965e-17, 4.3556e-16, 1.8306e-16, 5.1550e-18,\n 4.6850e-18, 4.7224e-18, 3.3288e-17, 7.5986e-17, 4.6311e-17, 2.1403e-17,\n 4.6619e-18, 6.4388e-17, 1.7258e-18, 1.6124e-16, 8.0813e-17, 5.3842e-17,\n 2.3834e-18, 4.9501e-18, 3.6238e-17, 4.2395e-18, 2.6866e-17, 8.4946e-18,\n 1.0380e-17, 2.6865e-17, 4.4221e-17, 1.0877e-16, 1.1395e-18, 2.7112e-17,\n 2.6112e-17, 6.3131e-18, 5.2017e-17, 7.5520e-18, 5.2963e-20, 2.1045e-17,\n 8.6369e-17, 8.4174e-18, 5.1543e-18, 6.6719e-20, 1.3549e-17, 1.5159e-16,\n 5.9311e-18, 4.2565e-20, 6.1765e-17, 1.9725e-16, 8.8160e-20, 1.4508e-18,\n 2.4498e-17, 1.4603e-16, 6.5931e-18, 2.9950e-17, 2.1910e-17, 5.6086e-18,\n 3.1027e-17, 1.1187e-16, 3.4079e-17, 3.9539e-17, 1.2680e-16, 3.1604e-17,\n 5.6038e-17, 1.9013e-16, 2.9636e-17, 2.2999e-16, 2.1424e-17, 4.3877e-18,\n 8.5480e-21, 2.2686e-17, 2.4903e-19, 2.7415e-16, 7.8473e-18, 3.1013e-17,\n 2.3582e-17, 1.6392e-19, 2.0787e-16, 9.1951e-18, 6.4571e-17, 9.7242e-17,\n 1.6990e-17, 1.3539e-16, 2.9183e-18, 1.0750e-17, 1.6820e-17, 9.0269e-17,\n 9.4137e-18, 7.2079e-18, 4.0130e-18, 6.7389e-20, 6.6708e-19, 4.8501e-17,\n 8.1164e-17, 1.1958e-17, 4.0221e-17, 8.2429e-17, 8.9999e-19, 1.8482e-16,\n 4.5135e-17, 6.7570e-18, 7.8236e-17, 2.9599e-17, 1.4810e-16, 2.8699e-17,\n 2.2616e-17, 1.6271e-20, 5.7674e-20, 5.5749e-19, 1.0143e-16, 3.7605e-16,\n 6.5848e-17, 1.2964e-17, 1.5908e-17, 2.5168e-17, 2.2226e-17, 1.8992e-18,\n 5.8146e-18, 5.1511e-17, 3.7355e-17, 9.1426e-18, 1.1189e-17, 6.6184e-18,\n 1.7798e-17, 2.0032e-16, 8.8640e-18, 9.6022e-18, 3.1733e-17, 9.9342e-18,\n 9.4593e-18, 2.8340e-17, 8.9945e-18, 4.2208e-17, 1.9354e-16, 1.4002e-17,\n 6.6099e-17, 1.1985e-17, 6.0750e-19, 2.1949e-17, 4.0566e-17, 6.3456e-18,\n 8.2106e-17, 1.3958e-17, 9.6920e-18, 9.2573e-18, 1.0319e-17, 2.7900e-17,\n 3.3689e-17, 1.0052e-17, 1.5039e-17, 2.4975e-17, 2.2067e-16, 1.2896e-19,\n 1.7302e-17, 6.5040e-17, 1.1550e-17, 2.1194e-16, 3.6818e-17, 2.1679e-17,\n 1.2464e-17, 4.3462e-18, 1.9068e-20, 5.8773e-16, 8.2119e-18, 1.7530e-18,\n 1.7226e-17, 8.0592e-17, 9.2498e-20, 2.3909e-16, 1.6480e-17, 2.0566e-17,\n 2.0205e-17, 5.5519e-17, 1.0079e-16, 2.0525e-18, 4.6943e-20, 3.8476e-17,\n 1.1115e-17, 2.8256e-17, 1.0530e-18, 7.0320e-17, 1.3926e-18, 1.5313e-16,\n 1.7768e-16, 6.9937e-19, 1.7498e-18, 1.7800e-17, 7.2504e-18, 9.9207e-17,\n 9.2259e-18, 1.2012e-16, 1.6201e-17, 2.2239e-21, 2.0602e-20, 1.5884e-18,\n 2.4653e-18, 2.5961e-17, 6.8332e-19, 2.3051e-18, 2.6820e-17, 1.5559e-17,\n 1.0618e-16, 8.1725e-19, 7.4622e-18, 4.9710e-17, 1.3888e-17, 1.6269e-17,\n 2.9501e-17, 1.2030e-19, 1.1681e-17, 4.1676e-17, 1.7126e-17, 4.1320e-19,\n 9.6300e-19, 7.9600e-17, 5.5170e-18, 1.7814e-17, 1.4326e-16, 2.1945e-18,\n 8.9349e-17, 7.9002e-18, 2.1272e-16, 5.0879e-18], device='cuda:0')"
},
"32": {
"step": "tensor(23788.)",
"exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([2.7169e-19, 1.2559e-19, 1.3917e-19, 8.2366e-21, 3.1206e-20, 4.4236e-21,\n 2.2293e-20, 1.0638e-20, 1.2186e-20, 1.3740e-20, 2.1649e-19, 1.9057e-19,\n 3.2926e-20, 2.9102e-20, 7.4197e-20, 2.2074e-20, 1.3619e-19, 2.9543e-20,\n 1.3948e-19, 3.7740e-19, 1.2561e-22, 1.5972e-19, 4.7782e-20, 1.9619e-19,\n 2.3059e-20, 1.7458e-20, 4.7634e-19, 4.9650e-21, 1.9698e-20, 3.3192e-20,\n 4.5388e-20, 7.1928e-19, 1.2272e-21, 3.0388e-19, 3.0411e-19, 4.6232e-19,\n 5.7660e-21, 1.8592e-22, 1.9245e-19, 1.1025e-18, 5.4139e-19, 8.5733e-21,\n 1.2483e-20, 2.1498e-20, 4.3155e-20, 1.3629e-19, 1.9104e-19, 5.1539e-20,\n 5.9502e-21, 1.4953e-19, 1.8848e-21, 4.9397e-19, 1.6795e-19, 1.1728e-19,\n 2.7889e-21, 3.5299e-20, 7.8072e-20, 4.6915e-21, 1.3601e-19, 2.9289e-20,\n 2.3871e-20, 1.1018e-19, 1.4039e-19, 4.8845e-19, 1.3738e-21, 7.8898e-20,\n 7.1148e-20, 1.0123e-20, 1.2082e-19, 1.6373e-20, 1.5341e-23, 7.6751e-20,\n 2.2078e-19, 2.7833e-20, 7.7925e-21, 2.5230e-22, 9.6315e-20, 4.4063e-19,\n 1.2350e-20, 2.2798e-24, 3.6056e-19, 1.0856e-18, 5.3537e-22, 8.1488e-21,\n 1.1023e-19, 4.2644e-19, 2.5246e-20, 1.1687e-19, 4.6669e-20, 2.9454e-20,\n 8.4438e-20, 2.6808e-19, 4.9348e-20, 1.0442e-19, 4.3344e-19, 6.3058e-20,\n 1.4988e-19, 4.3517e-19, 7.5544e-20, 6.6616e-19, 4.7370e-20, 2.6588e-20,\n 1.2197e-21, 4.3457e-20, 1.2640e-22, 8.4406e-19, 1.5592e-20, 1.5716e-19,\n 3.7899e-20, 6.0298e-26, 6.3663e-19, 2.7074e-20, 1.6271e-19, 1.6924e-19,\n 3.0252e-20, 4.2532e-19, 9.1333e-21, 1.7590e-20, 3.3506e-20, 3.9016e-19,\n 1.1472e-20, 1.0327e-20, 4.2579e-21, 3.1528e-21, 6.1918e-21, 1.6057e-19,\n 1.4533e-19, 3.0085e-20, 1.2379e-19, 3.1077e-19, 4.6900e-23, 3.4686e-19,\n 1.7416e-19, 2.0684e-20, 1.2967e-19, 4.2595e-20, 5.0546e-19, 6.4746e-20,\n 5.7283e-20, 2.5056e-24, 2.3699e-23, 2.7438e-21, 2.4375e-19, 1.5563e-18,\n 2.2821e-19, 4.9287e-20, 3.9041e-20, 1.5029e-19, 2.6306e-20, 1.8929e-21,\n 3.8123e-20, 1.0518e-19, 1.4782e-19, 1.4914e-20, 1.4754e-20, 5.2890e-20,\n 3.1497e-20, 5.0893e-19, 1.3392e-20, 4.1801e-20, 7.0324e-20, 1.4770e-20,\n 1.1448e-20, 9.6581e-20, 2.1572e-20, 9.1644e-20, 3.6271e-19, 2.6829e-20,\n 3.1827e-19, 3.9476e-20, 1.9140e-22, 4.1195e-20, 6.5909e-20, 1.9369e-20,\n 2.3305e-19, 9.6763e-20, 1.4277e-20, 2.9879e-20, 2.1557e-20, 5.9651e-20,\n 7.6313e-20, 1.5440e-20, 3.2082e-20, 4.0925e-20, 5.1234e-19, 8.3211e-24,\n 2.9163e-20, 2.3724e-19, 1.3212e-20, 4.6239e-19, 6.1972e-20, 5.3370e-20,\n 4.8227e-20, 1.3660e-20, 4.9193e-23, 2.4078e-18, 8.8217e-21, 1.2990e-21,\n 3.5947e-20, 1.5446e-19, 3.1873e-21, 1.0499e-18, 2.8915e-20, 5.6764e-20,\n 4.0294e-20, 1.2392e-19, 2.6001e-19, 3.6069e-21, 3.3868e-24, 6.6959e-20,\n 2.2924e-20, 6.4017e-20, 3.3233e-21, 2.4627e-19, 1.0056e-21, 4.4384e-19,\n 4.1866e-19, 8.4740e-23, 4.8704e-21, 3.8155e-20, 1.4128e-20, 2.4117e-19,\n 3.1186e-20, 2.5544e-19, 4.4394e-20, 2.0168e-22, 1.4955e-22, 5.8111e-21,\n 3.5025e-21, 7.8379e-20, 2.0460e-21, 1.3246e-21, 9.2817e-20, 5.4432e-20,\n 3.3469e-19, 5.5431e-21, 1.1026e-20, 1.0617e-19, 5.9340e-20, 2.9796e-20,\n 5.7950e-20, 6.5492e-23, 3.3165e-20, 1.1881e-19, 2.8005e-20, 3.2633e-23,\n 4.1387e-22, 1.3024e-19, 8.7178e-21, 6.0864e-20, 4.2130e-19, 2.0514e-21,\n 3.2038e-19, 1.4888e-20, 5.6727e-19, 1.6037e-20], device='cuda:0')"
},
"33": {
"step": "tensor(23788.)",
"exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([5.2110e-19, 1.8373e-19, 1.4600e-19, 9.4857e-21, 3.0470e-20, 1.1847e-20,\n 4.1951e-20, 1.3738e-20, 2.0549e-20, 9.5579e-21, 3.8392e-19, 4.1249e-19,\n 4.9525e-20, 5.4892e-20, 1.1672e-19, 1.2634e-20, 2.5575e-19, 4.3511e-20,\n 1.8332e-19, 6.6175e-19, 1.0354e-22, 2.1100e-19, 5.8984e-20, 3.9137e-19,\n 3.8216e-20, 4.7690e-20, 4.4772e-19, 9.4546e-21, 3.4298e-20, 4.5341e-20,\n 1.4244e-19, 4.6233e-19, 1.3802e-21, 6.0864e-19, 4.3362e-19, 5.3678e-19,\n 9.6907e-21, 3.3372e-22, 2.9399e-19, 1.5253e-18, 6.1286e-19, 3.0358e-20,\n 2.7775e-20, 2.9448e-20, 1.1226e-19, 2.5363e-19, 2.2361e-19, 1.0446e-19,\n 1.2366e-20, 2.1798e-19, 1.6229e-21, 6.8560e-19, 2.7467e-19, 1.6334e-19,\n 4.7417e-21, 3.7842e-20, 1.6091e-19, 1.0419e-20, 1.3877e-19, 4.6549e-20,\n 2.3105e-20, 1.3722e-19, 2.1077e-19, 4.8416e-19, 1.8953e-21, 7.0933e-20,\n 1.2985e-19, 1.3530e-20, 2.2495e-19, 3.8473e-20, 3.8379e-25, 1.0545e-19,\n 2.7866e-19, 4.7816e-20, 1.4305e-20, 1.1596e-21, 7.8501e-20, 6.2653e-19,\n 9.0462e-21, 6.2130e-23, 2.9253e-19, 8.5711e-19, 7.3105e-22, 1.3076e-20,\n 1.2438e-19, 6.1024e-19, 3.7705e-20, 1.4830e-19, 7.1392e-20, 2.4722e-20,\n 1.4274e-19, 4.9429e-19, 1.1093e-19, 1.2607e-19, 5.5312e-19, 9.0995e-20,\n 1.7132e-19, 6.4958e-19, 1.4804e-19, 9.6625e-19, 1.0330e-19, 2.8760e-20,\n 2.5848e-21, 6.3403e-20, 7.6599e-23, 9.2509e-19, 1.6085e-20, 1.5440e-19,\n 6.8975e-20, 7.9873e-23, 6.9976e-19, 4.9991e-20, 2.9088e-19, 3.3434e-19,\n 4.2242e-20, 4.4930e-19, 1.5600e-20, 3.0379e-20, 7.5954e-20, 4.1507e-19,\n 2.8614e-20, 2.0619e-20, 7.5596e-21, 1.6687e-21, 7.6944e-21, 1.4259e-19,\n 3.4150e-19, 6.0255e-20, 1.8624e-19, 2.4841e-19, 2.5314e-22, 7.5724e-19,\n 2.1202e-19, 3.5421e-20, 2.5683e-19, 9.5663e-20, 6.2655e-19, 9.9008e-20,\n 1.0630e-19, 1.0203e-22, 2.2093e-22, 4.8454e-21, 3.1129e-19, 1.2674e-18,\n 2.8930e-19, 6.8556e-20, 7.5822e-20, 1.3473e-19, 7.3815e-20, 2.0139e-21,\n 3.9226e-20, 1.6379e-19, 1.8172e-19, 2.7640e-20, 3.4310e-20, 4.7875e-20,\n 5.0015e-20, 6.8420e-19, 2.5287e-20, 4.3082e-20, 1.0603e-19, 2.9140e-20,\n 2.3258e-20, 1.3024e-19, 4.6243e-20, 1.3222e-19, 6.8151e-19, 4.6303e-20,\n 3.1405e-19, 5.7876e-20, 1.0564e-21, 6.2592e-20, 1.4310e-19, 3.4651e-20,\n 2.6573e-19, 8.5301e-20, 2.4391e-20, 5.2763e-20, 2.1696e-20, 1.2676e-19,\n 9.2298e-20, 2.6461e-20, 3.6043e-20, 7.1761e-20, 9.3201e-19, 6.8855e-23,\n 5.4442e-20, 1.8746e-19, 3.1762e-20, 8.5946e-19, 1.2412e-19, 6.2928e-20,\n 6.3996e-20, 2.7821e-20, 4.6681e-23, 2.3742e-18, 1.9315e-20, 3.2228e-21,\n 8.5376e-20, 2.8905e-19, 5.5977e-21, 1.0293e-18, 5.8415e-20, 5.1882e-20,\n 9.6019e-20, 2.0940e-19, 3.0982e-19, 5.8664e-21, 1.6054e-22, 1.3794e-19,\n 2.4087e-20, 1.1836e-19, 7.9813e-21, 3.0959e-19, 1.1289e-21, 5.0531e-19,\n 6.0141e-19, 1.0368e-22, 9.9281e-21, 4.4489e-20, 1.1101e-20, 4.3267e-19,\n 5.1924e-20, 5.1450e-19, 7.7661e-20, 5.3449e-22, 1.2244e-22, 1.2269e-20,\n 5.8584e-21, 1.2562e-19, 4.5439e-21, 2.6966e-21, 1.2180e-19, 8.0367e-20,\n 4.6112e-19, 4.1888e-21, 1.7956e-20, 2.2307e-19, 7.5950e-20, 4.3071e-20,\n 1.3257e-19, 6.8388e-23, 6.2382e-20, 1.4713e-19, 5.0968e-20, 1.4819e-24,\n 5.7536e-22, 2.5850e-19, 1.2303e-20, 8.1913e-20, 4.8158e-19, 4.4638e-21,\n 2.6687e-19, 2.4524e-20, 7.3949e-19, 3.1321e-20], device='cuda:0')"
},
"34": {
"step": "tensor(23788.)",
"exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.8574e-21, 6.7067e-21, 4.0915e-21, ..., 9.2602e-22, 5.0701e-21,\n 3.0467e-21],\n [1.3220e-22, 1.9777e-23, 2.1555e-22, ..., 5.7523e-22, 6.6871e-22,\n 2.3834e-23],\n [6.4438e-22, 2.8340e-21, 8.9360e-22, ..., 4.5726e-22, 2.7536e-21,\n 5.2688e-22],\n ...,\n [1.6124e-20, 3.8247e-20, 3.9637e-20, ..., 7.0432e-21, 5.9469e-20,\n 8.6067e-20],\n [1.0767e-19, 1.7030e-19, 2.6285e-19, ..., 5.8368e-20, 2.5718e-19,\n 4.7219e-19],\n [1.4773e-18, 3.2767e-18, 3.9155e-18, ..., 8.3355e-19, 4.9318e-18,\n 7.7376e-18]], device='cuda:0')"
},
"35": {
"step": "tensor(23788.)",
"exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([3.7448e-20, 1.2035e-21, 1.0744e-20, 7.0702e-22, 4.7341e-21, 6.9988e-21,\n 1.0335e-21, 8.9402e-22, 4.3552e-23, 7.2966e-21, 5.9183e-21, 1.0236e-20,\n 2.5423e-22, 2.1522e-22, 2.6186e-21, 1.7175e-21, 5.6655e-22, 2.2424e-20,\n 7.6524e-21, 9.8012e-23, 3.8588e-22, 3.1313e-21, 7.2299e-21, 2.7889e-22,\n 1.3507e-20, 1.5841e-21, 2.2430e-21, 2.2871e-22, 5.2128e-22, 1.0663e-21,\n 1.0643e-21, 1.7701e-20, 2.5652e-21, 1.3731e-22, 5.1799e-24, 5.2922e-21,\n 1.8832e-21, 7.5870e-22, 2.2686e-23, 1.0115e-24, 3.0734e-21, 3.8461e-22,\n 1.8780e-21, 8.8674e-22, 1.5169e-22, 1.7269e-21, 2.1867e-21, 2.2186e-21,\n 1.1714e-21, 4.9777e-21, 1.9317e-21, 5.4941e-22, 7.1689e-22, 4.2217e-22,\n 1.5837e-23, 2.1532e-23, 4.9283e-22, 3.9556e-23, 1.5857e-21, 6.7437e-23,\n 5.6003e-21, 1.8677e-21, 4.4076e-24, 3.3979e-21, 6.5713e-21, 2.2382e-21,\n 3.3558e-21, 6.6446e-22, 8.5100e-23, 5.2456e-23, 3.4383e-21, 3.6320e-22,\n 2.8900e-23, 1.4199e-21, 8.2732e-22, 2.2301e-21, 1.6403e-21, 4.2478e-22,\n 7.0350e-27, 1.5404e-21, 1.1731e-21, 1.0326e-20, 1.5801e-22, 1.4598e-21,\n 1.9859e-22, 1.0020e-21, 5.8410e-22, 1.6163e-24, 9.9387e-22, 1.2899e-22,\n 4.6137e-24, 7.4857e-22, 6.6938e-22, 1.9076e-20, 6.2852e-21, 4.3708e-21,\n 4.7618e-22, 2.2470e-20, 1.5521e-21, 2.8784e-21, 2.6753e-22, 3.0775e-22,\n 1.5157e-22, 1.6462e-21, 2.0010e-23, 5.7408e-21, 9.4272e-21, 1.0479e-21,\n 1.1898e-21, 1.7888e-22, 1.8095e-21, 2.0548e-20, 1.5544e-22, 2.3540e-20,\n 3.2652e-21, 1.4546e-20, 1.4275e-27, 1.3343e-20, 9.7199e-21, 4.8105e-21,\n 1.8410e-20, 8.2361e-23, 9.6114e-21, 3.6242e-23, 5.2455e-21, 8.7370e-21,\n 7.9180e-21, 5.7351e-21, 1.0586e-20, 1.6020e-20, 1.8424e-21, 3.5933e-21,\n 1.0822e-22, 6.1749e-21, 1.7740e-21, 1.6226e-21, 3.7519e-21, 5.0801e-21,\n 4.2684e-24, 9.9112e-22, 1.2461e-20, 4.5857e-23, 2.6209e-21, 9.6700e-22,\n 6.0534e-21, 2.5181e-20, 5.0069e-21, 6.4306e-23, 1.5881e-20, 2.4083e-22,\n 5.1219e-22, 4.1609e-21, 1.7277e-20, 1.4200e-21, 6.1012e-21, 9.1533e-21,\n 6.7402e-23, 1.6998e-23, 6.2306e-22, 3.0631e-21, 2.9224e-21, 8.1048e-22,\n 1.2201e-20, 6.3108e-21, 7.0274e-22, 6.7808e-24, 1.3541e-22, 1.9184e-23,\n 3.8307e-22, 1.7250e-21, 3.4881e-22, 2.1161e-22, 1.4608e-21, 1.7697e-21,\n 1.6108e-22, 4.5194e-23, 1.6934e-21, 3.9097e-22, 2.4281e-22, 3.4606e-23,\n 2.8215e-21, 8.2070e-24, 6.1025e-22, 2.6344e-21, 7.4999e-22, 3.4971e-22,\n 2.6134e-21, 8.7735e-22, 8.8035e-21, 2.8743e-22, 2.4549e-21, 2.0235e-22,\n 2.2235e-21, 2.2080e-20, 3.1164e-21, 6.6408e-21, 3.3968e-21, 9.9290e-21,\n 1.1356e-24, 1.2058e-22, 5.7756e-22, 8.5752e-22, 1.4784e-21, 1.2179e-20,\n 3.0574e-22, 9.1543e-22, 6.8457e-23, 1.0311e-21, 1.1876e-22, 2.7579e-21,\n 4.7196e-25, 1.8546e-21, 1.2530e-22, 1.3660e-21, 1.2568e-20, 6.3713e-21,\n 9.3597e-21, 2.5313e-21, 3.2032e-21, 9.9649e-21, 2.5306e-21, 4.8323e-23,\n 1.2812e-21, 1.1397e-20, 7.2899e-23, 1.5046e-21, 9.1109e-21, 3.2392e-21,\n 2.8575e-21, 2.7646e-21, 2.3927e-21, 7.1240e-23, 1.0508e-21, 5.5510e-22,\n 2.5752e-21, 7.0677e-22, 1.7798e-23, 1.6327e-22, 6.5931e-22, 6.9434e-23,\n 6.3870e-21, 1.7616e-21, 2.6969e-22, 2.1349e-21, 6.5390e-23, 2.4416e-21,\n 6.5581e-21, 8.4148e-22, 2.7882e-21, 8.6917e-22, 6.6307e-22, 6.8190e-21,\n 2.6484e-21, 7.5456e-21, 7.0587e-21, 1.5300e-21, 2.4274e-37, 1.0731e-38,\n 4.3779e-36, 3.0781e-37, 4.9015e-36, 1.9264e-37, 1.2636e-37, 2.6768e-36,\n 1.7542e-36, 2.5533e-36, 7.7080e-38, 9.5473e-38, 2.6776e-36, 2.4064e-36,\n 1.3266e-37, 2.9232e-37, 1.0769e-36, 6.8608e-40, 6.8077e-36, 1.0775e-36,\n 3.5458e-37, 2.4220e-36, 1.0772e-36, 1.1058e-36, 8.0854e-37, 2.5825e-37,\n 6.3449e-37, 7.2615e-38, 5.4867e-36, 1.1277e-36, 9.7958e-37, 8.7247e-37,\n 4.7224e-36, 1.4305e-36, 1.5431e-37, 5.4681e-37, 8.6894e-37, 1.2399e-38,\n 1.1935e-37, 3.7439e-38, 2.5775e-39, 1.1520e-37, 5.7046e-37, 8.1893e-39,\n 9.3560e-37, 2.7506e-37, 1.4535e-36, 7.5101e-39, 1.1888e-36, 1.0241e-38,\n 3.1339e-37, 3.5793e-41, 4.4010e-37, 2.0951e-36, 6.0790e-37, 1.1667e-37,\n 4.5515e-36, 5.8554e-37, 8.4576e-36, 6.8054e-37, 1.9551e-38, 1.0875e-37,\n 9.8728e-37, 5.8535e-37, 1.5081e-37, 7.8599e-37, 2.8766e-36, 3.7694e-36,\n 2.0074e-36, 8.2489e-38, 7.0232e-36, 5.1195e-36, 5.3801e-36, 8.9797e-36,\n 1.8578e-35, 5.8416e-37, 1.1577e-35, 2.4621e-37, 8.5268e-37, 1.7004e-37,\n 2.5059e-36, 9.9260e-36, 5.9306e-36, 3.8642e-36, 1.0281e-36, 1.8595e-36,\n 9.0494e-36, 2.0352e-35, 1.1398e-36, 3.1218e-36, 1.7970e-36, 9.4099e-36,\n 9.8734e-37, 5.7694e-38, 6.5472e-37, 6.2508e-37, 5.8132e-36, 1.9763e-38,\n 1.1852e-36, 8.5707e-39, 6.7388e-37, 3.1819e-36, 4.6780e-36, 4.0091e-37,\n 1.0576e-37, 4.6199e-37, 7.9938e-37, 4.5844e-38, 2.8159e-38, 1.3163e-37,\n 1.5216e-37, 2.6019e-38, 6.0223e-37, 7.5274e-37, 1.0648e-36, 9.6570e-37,\n 6.8679e-37, 9.0692e-39, 3.0634e-40, 1.7648e-38, 1.2500e-38, 1.2985e-36,\n 1.8003e-36, 1.3177e-36, 6.2218e-37, 1.6881e-36, 8.3671e-39, 2.8289e-36,\n 3.0119e-38, 3.4594e-37, 1.4091e-37, 4.4903e-36, 1.7948e-36, 2.8063e-36,\n 2.3347e-36, 4.9450e-38, 4.3984e-38, 5.1921e-37, 1.0395e-36, 3.2173e-36,\n 3.4008e-37, 1.6134e-37, 9.2574e-38, 1.4022e-38, 5.2780e-36, 4.9643e-37,\n 1.4920e-36, 1.1242e-37, 1.7676e-37, 1.1433e-35, 1.9435e-36, 2.2098e-38,\n 1.5773e-36, 1.8724e-36, 5.4345e-36, 3.2305e-36, 4.1379e-36, 1.9795e-36,\n 2.9873e-38, 2.3295e-35, 5.2778e-36, 6.4728e-38, 6.3840e-36, 1.6218e-36,\n 1.4115e-36, 8.3120e-38, 9.5828e-37, 4.5679e-37, 4.7581e-37, 1.8701e-36,\n 1.4957e-36, 1.7798e-40, 1.3877e-36, 1.4027e-37, 2.1596e-38, 3.3005e-37,\n 3.3796e-36, 2.3916e-36, 1.2365e-38, 1.8837e-36, 5.4682e-37, 3.9274e-37,\n 6.5814e-36, 6.5919e-36, 6.9771e-37, 3.5856e-38, 6.7357e-36, 6.2409e-36,\n 2.9101e-36, 3.7013e-37, 3.7770e-37, 1.5000e-36, 1.7591e-36, 1.9700e-36,\n 9.6940e-36, 6.8235e-37, 2.1374e-37, 3.3747e-36, 6.5141e-37, 3.2557e-36,\n 5.1380e-37, 5.5694e-37, 5.2819e-37, 2.5159e-36, 4.9090e-36, 3.5400e-36,\n 2.3581e-38, 8.9415e-38, 1.1490e-36, 4.7474e-40, 3.4847e-37, 2.1864e-36,\n 2.3336e-37, 6.7714e-37, 9.9904e-37, 1.9940e-36, 6.2301e-37, 9.9126e-37,\n 2.0516e-36, 1.1228e-36, 9.6950e-38, 2.2479e-37, 7.1609e-38, 1.3014e-37,\n 8.8452e-37, 5.8657e-37, 1.6545e-36, 5.6822e-36, 3.5827e-36, 2.4586e-36,\n 1.7596e-36, 2.1015e-36, 7.5902e-38, 4.3534e-36, 1.3175e-37, 1.9854e-37,\n 9.3655e-37, 5.4307e-37, 9.8947e-37, 1.9989e-37, 1.5335e-37, 3.0782e-41,\n 9.2892e-38, 7.6868e-38, 1.4299e-36, 2.5419e-36, 1.9775e-37, 1.7085e-36,\n 7.5169e-37, 2.2466e-36, 1.8564e-38, 6.3087e-37, 1.7980e-37, 2.6616e-36,\n 4.1276e-37, 2.3801e-37, 4.9393e-18, 7.7323e-17, 1.3611e-17, 1.2980e-17,\n 1.0270e-16, 9.1250e-17, 1.2447e-18, 6.6345e-19, 1.6633e-18, 1.6647e-17,\n 2.2050e-20, 4.4381e-18, 8.0565e-17, 2.7100e-18, 4.4591e-18, 2.0417e-17,\n 2.9974e-17, 6.1571e-18, 5.4881e-17, 2.0707e-17, 2.2966e-20, 6.4497e-20,\n 1.1138e-17, 7.3379e-17, 6.1819e-17, 2.1537e-19, 2.7845e-17, 1.4976e-18,\n 7.8349e-18, 1.5895e-18, 3.4651e-17, 2.8934e-17, 1.1215e-16, 1.2038e-18,\n 2.2943e-17, 3.2935e-17, 1.9429e-17, 1.9664e-17, 1.7343e-17, 6.8546e-19,\n 1.0621e-16, 3.4781e-19, 1.7181e-18, 3.0251e-18, 5.1433e-17, 4.3983e-18,\n 9.6185e-18, 4.0137e-18, 7.7202e-19, 7.3511e-18, 1.4736e-17, 2.6321e-18,\n 4.2981e-17, 4.0565e-17, 4.3718e-18, 3.0636e-18, 4.7406e-18, 3.4359e-19,\n 2.7758e-17, 1.0530e-17, 2.2444e-17, 2.2213e-17, 2.3387e-18, 6.0562e-19,\n 1.2504e-21, 6.0958e-20, 1.8829e-19, 5.6310e-19, 8.1209e-18, 5.7874e-19,\n 7.2316e-18, 1.8859e-17, 2.7142e-17, 4.3263e-18, 9.8042e-18, 1.2379e-19,\n 5.5290e-18, 1.6372e-16, 8.5905e-19, 8.2536e-18, 1.1081e-17, 7.5783e-17,\n 2.7813e-20, 2.1494e-17, 6.1933e-18, 2.9913e-17, 1.9098e-17, 2.2176e-17,\n 1.8120e-21, 7.0735e-17, 4.7550e-17, 4.3288e-20, 2.1296e-18, 5.1225e-17,\n 2.3792e-18, 9.9372e-18, 1.4045e-19, 3.5296e-17, 6.7959e-18, 3.3031e-17,\n 1.7216e-17, 5.9700e-17, 7.5189e-19, 1.4436e-17, 2.9597e-17, 1.0785e-16,\n 4.7693e-17, 8.4176e-20, 1.8083e-18, 1.8108e-17, 1.1318e-17, 2.0902e-18,\n 3.1889e-18, 3.7698e-18, 1.1180e-17, 1.6628e-18, 1.4088e-17, 2.7958e-19,\n 4.3271e-17, 5.4689e-19, 4.2397e-17, 4.8570e-17, 1.4965e-17, 4.4102e-19,\n 5.1145e-18, 5.9874e-19, 1.4002e-17, 3.3185e-17, 3.3135e-18, 8.5959e-19,\n 3.4503e-17, 1.7120e-18, 2.0151e-17, 1.1354e-17, 5.8494e-18, 3.9839e-20,\n 8.1283e-19, 8.6092e-17, 1.1095e-18, 5.9729e-17, 1.0313e-20, 2.9564e-17,\n 1.1202e-17, 1.1190e-17, 3.3026e-18, 4.0239e-17, 1.5137e-17, 5.8788e-18,\n 2.4402e-17, 7.6160e-20, 5.5461e-17, 3.4933e-18, 4.3010e-19, 5.7670e-18,\n 1.1402e-17, 4.4721e-17, 8.6180e-18, 2.0484e-17, 3.1774e-19, 5.1523e-17,\n 1.5064e-17, 1.5395e-17, 1.2081e-17, 9.4484e-17, 8.6051e-20, 2.6732e-18,\n 8.1985e-17, 6.3989e-18, 7.5593e-18, 3.9325e-19, 1.3099e-17, 3.3890e-17,\n 2.9448e-17, 4.4785e-17, 1.0867e-18, 1.3329e-17, 7.7783e-18, 6.9605e-18,\n 7.1138e-18, 4.4812e-18, 1.1339e-17, 2.8447e-17, 2.1849e-18, 4.8510e-17,\n 5.4173e-17, 1.3163e-17, 1.2928e-17, 1.2305e-17, 8.8996e-20, 2.6331e-18,\n 2.6817e-18, 5.7834e-18, 2.6163e-17, 3.3138e-18, 1.4419e-18, 1.9945e-17,\n 5.5038e-21, 4.8487e-17, 4.1000e-19, 5.3126e-17, 2.2602e-17, 6.3533e-17,\n 1.6400e-18, 1.0867e-17, 4.2674e-17, 1.6057e-17, 1.5590e-18, 4.1188e-17,\n 3.1381e-18, 1.9965e-17, 1.0142e-19, 3.0760e-18, 8.9844e-17, 1.9600e-17,\n 9.0172e-17, 1.0146e-17, 1.4486e-17, 3.5881e-20, 5.0105e-18, 1.2635e-17,\n 1.1619e-19, 8.5054e-19, 1.0915e-17, 3.4949e-18, 5.0214e-18, 2.8602e-18,\n 3.2407e-17, 3.2461e-18, 8.1974e-19, 2.7059e-19, 8.9601e-20, 1.5605e-19,\n 1.9553e-18, 2.6800e-17, 1.8697e-18, 3.8230e-18, 4.7781e-19, 4.5627e-17,\n 2.2374e-17, 4.3385e-17, 9.8718e-17, 2.0446e-18, 5.3432e-17, 2.1806e-17,\n 4.8424e-18, 1.8137e-16, 3.9140e-18, 1.8664e-17, 6.2578e-18, 4.4905e-18,\n 2.3360e-18, 2.0066e-17, 1.0810e-17, 5.2211e-19, 2.7040e-18, 4.6465e-17],\n device='cuda:0')"
},
"36": {
"step": "tensor(23788.)",
"exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.6545e-18, 9.2388e-22, 9.2389e-18, ..., 2.3926e-20, 2.4087e-19,\n 6.6582e-19],\n [2.2426e-18, 2.3372e-22, 1.3104e-17, ..., 3.7506e-20, 3.5592e-19,\n 9.3998e-19],\n [1.2664e-18, 1.6108e-22, 7.4065e-18, ..., 1.9569e-20, 2.1079e-19,\n 4.7893e-19],\n ...,\n [9.5824e-19, 1.3677e-22, 5.8115e-18, ..., 8.9885e-21, 1.4499e-19,\n 3.8548e-19],\n [1.2589e-19, 3.3669e-22, 6.9343e-19, ..., 2.4118e-21, 1.3630e-20,\n 5.2260e-20],\n [6.6609e-19, 8.0262e-22, 3.9085e-18, ..., 1.1375e-20, 9.1026e-20,\n 2.6747e-19]], device='cuda:0')"
},
"37": {
"step": "tensor(23788.)",
"exp_avg": "tensor([-5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([5.9854e-17, 8.3269e-17, 4.4531e-17, 2.3643e-16, 7.1753e-19, 6.8574e-18,\n 1.2558e-16, 2.3581e-16, 1.9073e-16, 9.4977e-17, 4.1167e-18, 1.9831e-16,\n 9.7972e-18, 4.8857e-19, 2.5789e-17, 2.4640e-19, 1.0771e-16, 2.0398e-16,\n 5.7047e-17, 1.2187e-17, 2.2211e-17, 1.6348e-17, 7.7821e-17, 9.9759e-19,\n 2.5931e-18, 2.2003e-17, 2.2688e-18, 3.3171e-17, 1.5720e-17, 4.9353e-17,\n 1.0792e-19, 2.6483e-17, 2.6798e-17, 9.3332e-18, 1.2016e-18, 5.2540e-17,\n 1.7990e-18, 3.0210e-16, 5.0661e-17, 3.6055e-18, 1.1381e-16, 7.4138e-18,\n 2.1784e-17, 1.9699e-18, 3.9750e-17, 6.8174e-17, 8.5602e-19, 8.9055e-20,\n 5.5924e-17, 5.0810e-17, 7.2585e-17, 3.0251e-18, 3.1546e-17, 1.1824e-16,\n 1.5105e-18, 1.3212e-17, 1.1951e-16, 4.7416e-19, 5.4845e-18, 9.8464e-18,\n 4.1332e-17, 3.2351e-17, 6.6555e-18, 6.3237e-17, 3.9755e-19, 9.9677e-17,\n 1.3577e-17, 2.5956e-17, 2.9227e-17, 1.0702e-17, 5.0279e-18, 9.8503e-17,\n 1.2069e-16, 2.5285e-16, 1.6317e-18, 1.9742e-19, 9.9571e-18, 8.5014e-17,\n 1.2882e-18, 2.4449e-19, 1.9984e-17, 1.2672e-18, 5.0667e-17, 8.7033e-18,\n 9.5770e-18, 4.9504e-18, 9.6732e-18, 1.3907e-17, 1.4337e-17, 1.6726e-16,\n 1.4576e-19, 9.9441e-17, 4.1237e-17, 1.1080e-16, 2.7003e-16, 1.7211e-16,\n 1.7477e-17, 2.3563e-16, 8.4701e-18, 2.9781e-18, 1.0488e-16, 3.6054e-20,\n 4.5721e-17, 2.7622e-17, 2.6039e-18, 7.3078e-17, 4.3921e-20, 4.3037e-16,\n 8.9165e-17, 9.2501e-17, 3.2690e-19, 2.6757e-18, 8.2860e-18, 8.2659e-17,\n 2.6056e-17, 6.9461e-17, 1.4689e-19, 8.5854e-17, 1.0796e-16, 4.3279e-17,\n 1.4434e-16, 5.7020e-17, 4.4567e-17, 6.3870e-18, 3.0196e-16, 1.5835e-16,\n 4.0246e-17, 1.6632e-17, 3.4481e-17, 1.2273e-19, 3.5907e-16, 3.4897e-17,\n 4.5332e-19, 1.3387e-19, 2.3323e-16, 8.9966e-17, 1.3547e-17, 5.9526e-18,\n 5.5047e-19, 1.8024e-19, 1.4869e-16, 3.0927e-19, 2.2310e-16, 4.3412e-18,\n 5.0365e-18, 1.0677e-17, 8.8840e-17, 7.4517e-17, 2.1641e-17, 2.6061e-19,\n 4.2909e-18, 1.3692e-18, 1.6247e-19, 1.4690e-16, 1.2998e-17, 1.9272e-16,\n 4.6239e-18, 1.0803e-16, 1.4617e-17, 1.4640e-16, 3.8271e-17, 6.2294e-18,\n 6.5722e-17, 5.7176e-17, 7.9326e-17, 1.7483e-16, 3.6732e-16, 1.7355e-19,\n 6.8921e-19, 3.3424e-17, 1.9328e-17, 2.9426e-19, 1.1853e-16, 1.5257e-17,\n 1.7136e-16, 2.1370e-19, 8.0081e-17, 8.9361e-18, 3.8555e-17, 3.8693e-17,\n 4.1252e-16, 4.1074e-17, 2.6523e-17, 3.9431e-20, 7.7279e-18, 1.1978e-16,\n 4.8928e-19, 4.3762e-17, 1.1149e-16, 9.9337e-18, 1.1391e-16, 9.9049e-18,\n 2.8715e-17, 2.6284e-17, 1.7368e-16, 9.2940e-17, 1.0935e-16, 8.0274e-19,\n 3.4836e-17, 2.2311e-16, 2.3854e-19, 2.0694e-17, 1.0667e-17, 1.5643e-16,\n 1.5283e-16, 2.1859e-17, 7.2303e-17, 1.4915e-17, 1.0684e-18, 4.0754e-19,\n 4.8399e-19, 2.4339e-17, 1.2541e-16, 1.1162e-16, 4.0278e-17, 1.4012e-16,\n 1.7019e-17, 8.4186e-17, 6.1327e-19, 2.2090e-18, 4.2183e-20, 1.1890e-17,\n 1.3189e-16, 9.8286e-18, 3.3146e-18, 6.7881e-17, 1.9109e-17, 5.9668e-17,\n 4.2432e-18, 8.4167e-17, 1.1779e-18, 4.6057e-17, 1.4316e-16, 6.0385e-19,\n 1.1921e-16, 2.6748e-18, 2.8305e-17, 1.0625e-19, 2.2475e-16, 4.5299e-18,\n 2.0602e-17, 4.6507e-18, 5.4411e-18, 3.1870e-17, 4.6903e-17, 3.2502e-17,\n 1.1749e-16, 4.4679e-17, 1.9020e-16, 1.8276e-17, 9.8923e-17, 2.1445e-21,\n 7.7970e-18, 3.6737e-17, 4.7487e-18, 2.5277e-17], device='cuda:0')"
},
"38": {
"step": "tensor(23788.)",
"exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')",
"exp_avg_sq": "tensor([[4.1878e-16, 1.1085e-16, 1.5868e-16, ..., 2.0211e-17, 1.2336e-16,\n 2.2780e-16],\n [4.3241e-17, 1.1240e-17, 1.6827e-17, ..., 2.2408e-18, 1.2455e-17,\n 2.2583e-17],\n [4.7105e-17, 1.2144e-17, 1.7533e-17, ..., 2.1576e-18, 1.3982e-17,\n 2.6119e-17],\n [4.9352e-17, 1.3626e-17, 1.8554e-17, ..., 2.3403e-18, 1.4734e-17,\n 2.7355e-17]], device='cuda:0')"
},
"39": {
"step": "tensor(23788.)",
"exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([1.0502e-14, 1.0821e-15, 1.1708e-15, 1.2507e-15], device='cuda:0')"
},
"40": {
"step": "tensor(23788.)",
"exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')",
"exp_avg_sq": "tensor([[4.1878e-16, 1.1085e-16, 1.5868e-16, ..., 2.0211e-17, 1.2336e-16,\n 2.2780e-16],\n [4.3241e-17, 1.1240e-17, 1.6827e-17, ..., 2.2408e-18, 1.2455e-17,\n 2.2583e-17],\n [4.7105e-17, 1.2144e-17, 1.7533e-17, ..., 2.1576e-18, 1.3982e-17,\n 2.6119e-17],\n [4.9352e-17, 1.3626e-17, 1.8554e-17, ..., 2.3403e-18, 1.4734e-17,\n 2.7355e-17]], device='cuda:0')"
},
"41": {
"step": "tensor(23788.)",
"exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([1.0502e-14, 1.0821e-15, 1.1708e-15, 1.2507e-15], device='cuda:0')"
},
"42": {
"step": "tensor(23788.)",
"exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')",
"exp_avg_sq": "tensor([[4.1878e-16, 1.1085e-16, 1.5868e-16, ..., 2.0211e-17, 1.2336e-16,\n 2.2780e-16],\n [4.3241e-17, 1.1240e-17, 1.6827e-17, ..., 2.2408e-18, 1.2455e-17,\n 2.2583e-17],\n [4.7105e-17, 1.2144e-17, 1.7533e-17, ..., 2.1576e-18, 1.3982e-17,\n 2.6119e-17],\n [4.9352e-17, 1.3626e-17, 1.8554e-17, ..., 2.3403e-18, 1.4734e-17,\n 2.7355e-17]], device='cuda:0')"
},
"43": {
"step": "tensor(23788.)",
"exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([1.0502e-14, 1.0821e-15, 1.1708e-15, 1.2507e-15], device='cuda:0')"
},
"8": {
"step": "tensor(22536.)",
"exp_avg": "tensor([[ 7.2263e-08, -3.6041e-08, 1.1662e-12, ..., -2.8868e-08,\n -2.3718e-07, -5.0621e-08],\n [-5.4148e-06, 9.4806e-08, 6.0259e-12, ..., -1.1911e-08,\n 2.6591e-06, 6.7227e-07],\n [ 4.2784e-08, 3.6937e-08, -3.4431e-11, ..., -1.1785e-08,\n -3.2087e-07, 9.1184e-07],\n ...,\n [ 1.0986e-07, -1.3670e-07, 9.4910e-21, ..., -1.0643e-08,\n 2.9263e-06, -1.4940e-08],\n [ 4.8732e-07, -1.4769e-06, 1.2920e-10, ..., -3.9437e-08,\n 3.2940e-07, -8.5371e-07],\n [-2.7998e-08, 6.3856e-10, -9.6040e-09, ..., -3.0761e-07,\n 8.1022e-10, 1.5774e-11]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.5062e-11, 6.2034e-12, 1.8354e-14, ..., 2.8134e-12, 1.1295e-11,\n 1.5963e-11],\n [2.2792e-11, 1.0663e-11, 9.4406e-15, ..., 5.1454e-12, 1.1282e-10,\n 1.3395e-11],\n [1.1317e-11, 3.8089e-12, 6.5031e-13, ..., 4.8379e-12, 1.3181e-11,\n 2.2365e-11],\n ...,\n [2.4781e-11, 5.6226e-12, 1.3364e-15, ..., 4.4366e-13, 1.3990e-10,\n 4.6709e-12],\n [1.9508e-11, 1.0931e-11, 1.3128e-12, ..., 9.4605e-12, 5.4853e-11,\n 6.1568e-12],\n [5.3941e-11, 1.2638e-11, 2.1197e-12, ..., 2.9432e-10, 1.4580e-12,\n 1.8870e-12]], device='cuda:0')"
},
"9": {
"step": "tensor(22536.)",
"exp_avg": "tensor([-7.3518e-07, 5.3155e-06, 7.7293e-07, ..., 2.1352e-06,\n -1.0879e-06, 4.6073e-06], device='cuda:0')",
"exp_avg_sq": "tensor([7.4122e-10, 9.6338e-10, 6.7978e-10, ..., 1.0804e-09, 6.7205e-10,\n 2.0181e-09], device='cuda:0')"
},
"10": {
"step": "tensor(22536.)",
"exp_avg": "tensor([[ 3.9438e-08, -7.2389e-08, -4.1392e-07, ..., -1.0747e-08,\n -1.0121e-07, 4.8199e-08],\n [ 4.3504e-09, 3.4073e-07, 3.6320e-07, ..., 5.1105e-09,\n 1.7838e-07, 1.3611e-07],\n [ 8.2347e-08, -2.2692e-07, 4.8652e-07, ..., 4.9861e-09,\n -1.7447e-07, 1.3872e-07],\n ...,\n [-3.6323e-08, -1.3611e-07, 4.6246e-07, ..., -1.2686e-07,\n -1.0579e-08, 6.8958e-08],\n [ 9.8628e-08, -5.8662e-08, -4.4099e-07, ..., 1.2570e-08,\n -9.3949e-08, -1.5358e-07],\n [ 3.7083e-08, -5.5424e-08, 4.7562e-07, ..., -6.5599e-08,\n 8.2806e-08, -2.7306e-08]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.5144e-13, 2.3601e-13, 5.9843e-13, ..., 2.5199e-13, 2.8761e-13,\n 2.6898e-13],\n [2.4644e-13, 6.9390e-13, 3.8157e-13, ..., 6.0781e-13, 5.1399e-13,\n 3.8013e-13],\n [4.2209e-13, 9.8489e-13, 4.3207e-13, ..., 6.6150e-13, 8.3530e-13,\n 7.2758e-13],\n ...,\n [3.5139e-13, 7.1662e-13, 4.2745e-13, ..., 7.1861e-13, 6.3588e-13,\n 6.5552e-13],\n [2.8537e-13, 9.0931e-13, 4.6134e-13, ..., 8.3460e-13, 4.5643e-13,\n 3.7040e-13],\n [2.8991e-13, 9.6541e-13, 6.3302e-13, ..., 1.3326e-12, 7.0475e-13,\n 2.8795e-13]], device='cuda:0')"
},
"11": {
"step": "tensor(21284.)",
"exp_avg": "tensor([[-4.5010e-07, 5.7669e-08, 3.0969e-09, ..., -3.1271e-07,\n 9.6291e-08, -9.1990e-09],\n [-4.3908e-08, 1.0823e-07, 1.4397e-10, ..., -8.9437e-10,\n -5.5777e-08, 7.0789e-07],\n [ 8.7293e-08, -3.6128e-08, -9.4206e-09, ..., -3.0986e-07,\n -5.7038e-08, 2.3645e-10],\n ...,\n [ 5.4739e-07, 5.9701e-08, 2.8052e-12, ..., -1.4037e-08,\n 1.4730e-08, 3.3166e-13],\n [-4.1702e-08, -1.1623e-09, 9.2594e-12, ..., -3.6338e-07,\n 4.3222e-07, -1.5776e-13],\n [ 3.3657e-09, 2.1779e-08, 2.3710e-08, ..., -2.2895e-07,\n 1.4771e-07, 3.2875e-09]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.2381e-12, 8.1567e-12, 5.2332e-14, ..., 1.2894e-11, 2.5189e-12,\n 3.6284e-12],\n [5.5931e-12, 3.0054e-13, 1.2100e-13, ..., 5.7171e-13, 1.1091e-11,\n 5.0249e-11],\n [2.6949e-12, 5.3934e-12, 2.5327e-14, ..., 2.5192e-12, 1.4522e-10,\n 6.6708e-13],\n ...,\n [9.3287e-11, 2.8068e-13, 1.4415e-13, ..., 6.4667e-13, 3.3483e-12,\n 3.4600e-14],\n [1.6888e-12, 2.4687e-13, 1.6914e-12, ..., 3.0911e-11, 1.2164e-10,\n 6.1138e-13],\n [4.4546e-13, 1.9542e-11, 8.9744e-13, ..., 7.4024e-11, 4.6800e-12,\n 2.8392e-12]], device='cuda:0')"
},
"12": {
"step": "tensor(21284.)",
"exp_avg": "tensor([-1.0549e-05, 2.4165e-06, -2.3861e-06, ..., -7.9029e-06,\n 1.1984e-05, 1.1468e-06], device='cuda:0')",
"exp_avg_sq": "tensor([3.2005e-10, 4.9548e-10, 4.7905e-10, ..., 5.9107e-10, 9.6323e-10,\n 5.8690e-10], device='cuda:0')"
},
"13": {
"step": "tensor(21284.)",
"exp_avg": "tensor([[-2.4656e-07, -1.4318e-07, 2.1095e-07, ..., 6.4344e-08,\n 5.7850e-08, -3.0665e-08],\n [-2.1587e-07, -1.2889e-07, -3.4456e-07, ..., 8.2379e-09,\n 7.2186e-08, 3.9188e-08],\n [-5.2824e-08, 1.2873e-07, 1.8983e-07, ..., -2.2606e-08,\n -4.0865e-08, 6.0729e-08],\n ...,\n [-3.0616e-07, -1.9940e-07, 9.6785e-08, ..., -7.2441e-09,\n -4.5459e-08, 6.8075e-08],\n [ 1.3188e-07, -7.1311e-08, 1.6023e-07, ..., -1.9035e-07,\n 1.3002e-07, 1.4693e-08],\n [ 1.2937e-08, 1.8283e-07, -5.8934e-08, ..., -2.2292e-07,\n -1.0049e-08, 2.6372e-08]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.1619e-13, 6.4513e-14, 8.0862e-13, ..., 9.3820e-14, 3.7516e-14,\n 7.8689e-14],\n [1.1912e-13, 1.2028e-13, 1.5075e-12, ..., 4.4100e-13, 7.0981e-14,\n 1.0198e-13],\n [2.6826e-13, 7.4735e-14, 1.2614e-12, ..., 9.0722e-13, 9.1381e-14,\n 1.2663e-13],\n ...,\n [2.1677e-13, 1.1629e-13, 2.6651e-13, ..., 1.9367e-13, 1.1072e-13,\n 1.0943e-13],\n [2.9368e-13, 2.4386e-13, 2.3169e-13, ..., 7.3020e-13, 1.0518e-13,\n 1.3523e-13],\n [3.2072e-13, 1.2275e-13, 2.1874e-13, ..., 3.8285e-13, 6.9521e-14,\n 1.2237e-13]], device='cuda:0')"
}
},
"param_groups": [
{
"lr": 0.005000500000000001,
"name": "shared",
"betas": [
0.9,
0.999
],
"eps": 1e-08,
"weight_decay": 1e-05,
"amsgrad": false,
"maximize": false,
"foreach": null,
"capturable": false,
"differentiable": false,
"fused": null,
"decoupled_weight_decay": true,
"initial_lr": 0.01,
"params": [
0,
1
]
},
{
"lr": 0.005000500000000001,
"name": "scale_384",
"betas": [
0.9,
0.999
],
"eps": 1e-08,
"weight_decay": 1e-05,
"amsgrad": false,
"maximize": false,
"foreach": null,
"capturable": false,
"differentiable": false,
"fused": null,
"decoupled_weight_decay": true,
"initial_lr": 0.01,
"params": [
2,
3,
4
]
},
{
"lr": 0.005000500000000001,
"name": "scale_768",
"betas": [
0.9,
0.999
],
"eps": 1e-08,
"weight_decay": 1e-05,
"amsgrad": false,
"maximize": false,
"foreach": null,
"capturable": false,
"differentiable": false,
"fused": null,
"decoupled_weight_decay": true,
"initial_lr": 0.01,
"params": [
5,
6,
7
]
},
{
"lr": 0.005000500000000001,
"name": "scale_1024",
"betas": [
0.9,
0.999
],
"eps": 1e-08,
"weight_decay": 1e-05,
"amsgrad": false,
"maximize": false,
"foreach": null,
"capturable": false,
"differentiable": false,
"fused": null,
"decoupled_weight_decay": true,
"initial_lr": 0.01,
"params": [
8,
9,
10
]
},
{
"lr": 0.005000500000000001,
"name": "scale_1280",
"betas": [
0.9,
0.999
],
"eps": 1e-08,
"weight_decay": 1e-05,
"amsgrad": false,
"maximize": false,
"foreach": null,
"capturable": false,
"differentiable": false,
"fused": null,
"decoupled_weight_decay": true,
"initial_lr": 0.01,
"params": [
11,
12,
13
]
},
{
"lr": 0.0025005,
"name": "fusion",
"betas": [
0.9,
0.999
],
"eps": 1e-08,
"weight_decay": 1e-05,
"amsgrad": false,
"maximize": false,
"foreach": null,
"capturable": false,
"differentiable": false,
"fused": null,
"decoupled_weight_decay": true,
"initial_lr": 0.005,
"params": [
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43
]
}
]
},
"scheduler_state_dict": {
"T_0": 10,
"T_i": 20,
"T_mult": 2,
"eta_min": 1e-06,
"T_cur": 10,
"base_lrs": [
0.01,
0.01,
0.01,
0.01,
0.01,
0.005
],
"last_epoch": 20,
"_step_count": 0,
"_is_initial": false,
"_get_lr_called_within_step": false,
"_last_lr": [
0.005000500000000001,
0.005000500000000001,
0.005000500000000001,
0.005000500000000001,
0.005000500000000001,
0.0025005
]
},
"metrics": {
"val_acc": 81.212
},
"train_config": {
"name": "david_training",
"run_id": "20251012_041353",
"dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
"model_variant": "clip_vit_l14",
"num_classes": 1000,
"preset": "clip_vit_l14",
"custom_config_path": null,
"num_classes_override": null,
"use_belly_override": null,
"belly_expand_override": null,
"progressive_training_override": true,
"scale_warmup_epochs_override": {
"384": 0,
"768": 1,
"1024": 2,
"1280": 3
},
"num_epochs": 20,
"batch_size": 1024,
"learning_rate": 0.01,
"weight_decay": 1e-05,
"warmup_epochs": 3,
"use_rose_loss": true,
"rose_initial_weight": 0.1,
"rose_max_weight": 0.5,
"rose_weight_schedule": "adaptive",
"use_cayley_loss": false,
"cayley_weight": 0.001,
"scale_loss_balance": null,
"use_mixed_precision": false,
"gradient_clip": 5.0,
"scheduler_type": "cosine_restarts",
"min_lr": 1e-06,
"freeze_strategy": "never",
"freeze_threshold": 90.0,
"unfreeze_on_plateau": true,
"patience": 10,
"track_gradients": true,
"gradient_scale_threshold": 1e-07,
"gradient_scale_multiplier": 5.0,
"log_interval": 50,
"val_interval": 1,
"save_interval": 5,
"log_fusion_weights": true,
"log_loss_components": true,
"save_format": "safetensors",
"hf_repo": "AbstractPhil/gated-david",
"upload_to_hub": true,
"base_dir": "./david_training",
"num_workers": 10,
"pin_memory": true,
"prefetch_factor": 4,
"persistent_workers": true
}
}