diff --git "a/weights/David-partial_shared-hierarchical_tree/20251012_065325/checkpoint_epoch_10_metadata.json" "b/weights/David-partial_shared-hierarchical_tree/20251012_065325/checkpoint_epoch_10_metadata.json" new file mode 100644--- /dev/null +++ "b/weights/David-partial_shared-hierarchical_tree/20251012_065325/checkpoint_epoch_10_metadata.json" @@ -0,0 +1,599 @@ +{ + "epoch": 9, + "optimizer_state_dict": { + "state": { + "0": { + "step": "tensor(12520.)", + "exp_avg": "tensor([[-2.3193e-04, -7.2228e-04, 1.1289e-05, ..., 3.1681e-04,\n -5.8875e-05, 5.3191e-04],\n [-7.9149e-04, 1.0634e-03, -1.0112e-03, ..., -8.9956e-04,\n -2.0858e-04, -1.6369e-03],\n [ 3.7158e-04, -2.3017e-03, 1.2457e-04, ..., -3.1223e-04,\n 2.1434e-04, 2.6071e-04],\n ...,\n [-3.0042e-04, -1.9174e-05, 3.4272e-04, ..., -4.3584e-04,\n -1.2141e-05, -2.7795e-04],\n [ 1.4506e-04, -2.9431e-04, -4.1488e-04, ..., -7.1613e-05,\n 2.2072e-04, 2.7437e-04],\n [-8.0731e-04, 1.3317e-03, -8.6057e-04, ..., 4.4153e-04,\n 3.3261e-04, -3.7020e-04]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.7614e-06, 1.4195e-05, 5.9399e-06, ..., 1.6052e-06, 1.0108e-06,\n 1.2740e-06],\n [3.3875e-06, 1.2063e-05, 7.1568e-06, ..., 3.3760e-06, 1.1940e-06,\n 4.4645e-06],\n [2.4513e-06, 1.1572e-05, 4.9133e-06, ..., 1.3986e-06, 9.2780e-07,\n 2.1143e-06],\n ...,\n [1.4403e-06, 1.8771e-05, 4.0443e-06, ..., 1.1042e-06, 7.2365e-07,\n 9.9883e-07],\n [1.4676e-06, 8.6484e-06, 4.3211e-06, ..., 9.6244e-07, 6.6542e-07,\n 9.7271e-07],\n [2.7214e-06, 9.1822e-06, 6.8638e-06, ..., 2.6057e-06, 1.1856e-06,\n 2.3630e-06]], device='cuda:0')" + }, + "1": { + "step": "tensor(12520.)", + "exp_avg": "tensor([-7.6547e-03, -3.2305e-02, 6.8591e-03, 8.2147e-03, 1.4252e-02,\n -3.6207e-02, 2.1732e-02, -2.7578e-03, -2.5823e-03, 4.0939e-03,\n 1.2482e-02, -1.2733e-02, 6.0480e-03, -8.0623e-04, 3.0512e-03,\n 7.1697e-03, -7.3870e-03, 4.7116e-03, 8.3679e-03, -1.5279e-02,\n -6.3573e-03, 5.4970e-03, 9.3927e-03, 2.5779e-03, -3.5349e-03,\n -8.0750e-03, 1.7019e-04, 4.6095e-03, 1.5932e-03, 5.1233e-03,\n -1.6303e-02, 7.7621e-03, -1.0406e-02, 1.2919e-04, 1.3151e-02,\n 3.0483e-02, -7.2463e-03, -7.7479e-03, -1.0496e-02, -2.5683e-02,\n 3.2121e-03, 1.2016e-03, 2.8614e-03, -6.2881e-03, -6.2095e-03,\n -2.6090e-02, 3.5475e-02, -1.5881e-03, -1.1657e-02, -3.9040e-04,\n -1.2567e-02, -1.4316e-02, -3.3617e-03, -1.9749e-02, 7.0622e-03,\n 1.0226e-02, 7.3483e-03, -5.6816e-03, 3.6442e-03, 1.2895e-02,\n 2.0112e-02, 1.0960e-02, -3.2703e-03, -1.2708e-03, -4.7600e-03,\n -5.5151e-03, -8.1834e-03, -3.1045e-03, 1.2556e-02, -1.2721e-02,\n 6.0427e-03, -1.6182e-03, -6.5993e-03, 3.3732e-03, -1.5877e-02,\n -1.0104e-02, -2.3956e-02, 6.0496e-03, -2.2081e-03, 1.7452e-03,\n -7.5179e-03, -8.7724e-04, -6.4156e-03, -1.2248e-02, -2.8860e-02,\n -3.0694e-04, -2.3003e-03, 3.8323e-03, 1.8933e-02, 1.1511e-02,\n -1.8621e-02, 2.7202e-03, 7.7854e-03, -1.0636e-02, -6.5773e-03,\n 4.5977e-04, -5.1973e-03, 5.6052e-45, 2.1942e-03, 5.4656e-03,\n 5.0041e-03, -2.5054e-02, 9.6675e-04, 1.2086e-02, 1.6300e-03,\n -1.9993e-02, -1.1497e-03, -7.1431e-03, -1.4741e-02, -2.5988e-02,\n -8.3735e-03, 3.6724e-03, -5.7817e-03, 1.4335e-02, 2.3957e-03,\n 1.3743e-02, -4.2452e-03, 7.6307e-03, 5.3508e-03, 1.8709e-02,\n 1.7670e-02, 7.8405e-03, -1.9089e-02, -3.4139e-02, -2.0089e-02,\n 1.8927e-02, 2.0461e-02, 1.3888e-02, -1.8126e-03, -2.1316e-03,\n -1.1858e-02, -4.6511e-03, -1.1375e-03, 1.6084e-02, -5.2549e-03,\n -4.0315e-03, 3.7844e-03, 6.2334e-03, -7.0416e-03, -1.4211e-02,\n 9.7876e-03, -6.5456e-03, 1.0165e-02, -2.0694e-02, 1.5375e-02,\n 5.4983e-03, 1.5512e-02, 1.1542e-02, 7.7355e-04, -1.2872e-02,\n -5.9177e-03, -2.8917e-02, -1.2237e-04, 9.8360e-03, 8.7999e-03,\n -1.6067e-02, -1.0876e-02, 9.8266e-04, 6.1449e-04, -1.4027e-02,\n -2.8726e-03, 1.2769e-02, 1.9852e-02, -1.3268e-02, -7.5984e-03,\n 9.6340e-03, -1.3240e-03, 3.5819e-03, -2.4517e-02, 2.2566e-02,\n -3.8174e-03, -2.5216e-02, -1.4632e-02, 4.5049e-02, 1.2943e-02,\n -6.7446e-04, -3.9887e-03, 2.6234e-03, 7.1690e-03, -1.1621e-02,\n -6.1104e-03, -3.7374e-02, 1.0636e-02, 8.8696e-03, -9.7305e-03,\n -5.1726e-03, -3.0345e-03, -6.0543e-03, 2.0144e-02, 1.1102e-02,\n 1.6441e-02, 1.5600e-02, 2.5256e-03, 1.7255e-03, 5.3709e-03,\n -8.3970e-05, 2.8946e-03, -7.4871e-03, 7.8728e-03, 2.3148e-02,\n -5.5716e-03, 3.2819e-03, 2.7363e-03, 4.8545e-03, 4.9893e-03,\n -6.6807e-03, 4.4714e-03, 3.0436e-03, -1.8398e-03, 7.4629e-03,\n 2.3132e-03, -6.0816e-03, 4.2000e-03, 1.5733e-02, -1.0257e-02,\n -1.8227e-03, -2.2925e-02, 7.3014e-03, 1.0967e-02, 6.3172e-03,\n 8.8820e-03, -1.7488e-02, 1.2552e-02, 1.0163e-02, -4.5416e-03,\n -9.8571e-03, -4.2559e-02, 1.3344e-02, -2.9815e-03, -1.7214e-02,\n 9.8772e-03, -8.3129e-03, 5.0514e-03, -4.1637e-03, -1.5510e-02,\n -2.4592e-04, -2.4640e-03, -2.5021e-05, -1.2527e-02, -1.5855e-03,\n 1.0251e-02, 3.1593e-03, 1.0526e-02, 3.7725e-02, 7.7431e-03,\n 1.8929e-02, -1.1602e-03, 1.1983e-02, -1.9083e-02, 4.4159e-03,\n 4.5953e-03, -3.2150e-02, 2.2212e-02, -1.9655e-02, 4.9410e-03,\n -5.7969e-03, -3.4837e-03, -6.0772e-03, 8.0731e-03, 1.7737e-02,\n 1.9072e-02, -1.0791e-03, 6.9720e-03, -6.9696e-03, 5.0341e-03,\n 1.5148e-02, 7.0745e-04, 1.0445e-02, 2.7918e-03, 2.5221e-03,\n 8.1352e-03, 1.1726e-02, -2.5394e-03, 1.5220e-02, 1.5949e-02,\n -1.2757e-02, 1.3127e-03, -9.0215e-03, -7.2328e-03, -5.0211e-03,\n 7.9349e-03, 1.0960e-02, -3.1812e-03, 9.2704e-03, -2.5669e-04,\n 6.9765e-03, 2.1747e-02, 6.3927e-03, 7.7660e-03, 2.6560e-02,\n -1.7556e-02, -7.1440e-03, 1.6782e-03, -1.3842e-03, 9.3529e-03,\n 2.8511e-02, -3.2190e-02, -7.5965e-03, -1.6308e-02, 3.7740e-03,\n -1.0379e-02, 1.3972e-02, -8.3778e-03, -8.3014e-03, 4.7445e-03,\n -1.6398e-02, -8.2786e-04, -1.1360e-02, 2.5349e-02, -9.1028e-03,\n -7.1186e-03, -2.0430e-03, 7.5186e-03, 9.2747e-03, 1.6237e-04,\n 1.4192e-02, 1.9877e-02, 3.3590e-03, 1.7099e-02, -4.8049e-03,\n 2.3696e-03, 9.4233e-03, -8.7194e-03, 1.8402e-02, 1.9988e-03,\n 4.7858e-03, -1.4872e-02, -9.2088e-04, 1.9399e-02, 9.3696e-03,\n -2.9997e-03, 1.9302e-02, 2.6880e-03, 5.4421e-03, 3.1196e-03,\n -1.1116e-02, -5.6751e-03, -8.1384e-03, 4.7495e-03, -1.5436e-02,\n 9.4794e-03, 5.4691e-04, 1.5696e-02, -5.8187e-03, -2.8938e-03,\n 4.4804e-03, 8.7835e-03, -1.2040e-02, 1.0839e-02, -7.0880e-03,\n 2.8554e-02, -5.2180e-03, -3.3472e-03, -1.2593e-02, 2.1204e-02,\n -1.5141e-02, 4.7077e-03, -3.6413e-03, 9.0641e-03, 2.6437e-05,\n -9.0239e-03, 7.1842e-03, -9.7889e-03, 1.2214e-03, 4.4564e-03,\n -2.4348e-03, -1.4165e-02, -3.1207e-02, -2.2310e-02, 5.1147e-03,\n -1.1841e-02, -7.0527e-03, 4.6993e-03, 5.3276e-03, 1.9463e-02,\n 3.6770e-04, -2.1502e-02, -8.1407e-03, 7.9160e-03, -1.3754e-02,\n 1.3632e-02, 1.0740e-03, 1.2283e-02, 2.5175e-02, -3.2038e-03,\n 2.6672e-02, -2.0629e-02, -5.7082e-03, 1.7901e-02, -1.5355e-02,\n -1.0082e-02, 1.7965e-02, -2.3563e-02, -8.3003e-03, 6.8847e-03,\n -1.5695e-02, 6.4694e-03, 8.6046e-03, 4.1779e-04, -1.1666e-02,\n 1.3154e-04, -8.9319e-03, 6.4100e-03, -2.8270e-02, 1.1560e-02,\n -9.6266e-03, 1.2031e-03, 4.8779e-03, 6.9161e-03, 1.4241e-02,\n -1.9160e-02, 7.1476e-04, -8.6347e-03, -5.7880e-03, -8.7900e-03,\n -9.0895e-04, 1.1816e-02, -6.8036e-03, -6.6357e-03, -4.0582e-02,\n -3.3120e-02, -3.7860e-02, 3.2912e-05, 4.4507e-03, 3.9587e-03,\n 2.8572e-03, 2.0555e-02, 4.8659e-04, -2.6627e-02, -1.9818e-02,\n -3.5335e-03, 1.6530e-02, 6.1139e-03, -5.2420e-03, -1.7622e-02,\n 1.4029e-02, -1.4822e-02, 1.4746e-02, -1.4185e-02, -2.0269e-02,\n -6.1552e-03, 5.3146e-05, -1.4261e-02, 4.7973e-03, 1.0299e-02,\n -1.0321e-03, -1.1870e-03, 6.1284e-04, 7.5035e-04, -4.0309e-03,\n -3.4607e-02, -2.5046e-02, 5.4675e-04, 1.8380e-02, 7.7420e-03,\n 5.1727e-05, 2.0999e-02, 2.2346e-02, 6.3765e-03, 1.3600e-04,\n 1.7608e-02, 2.1320e-04, -9.1134e-03, 1.7984e-02, 1.5653e-03,\n 2.5991e-02, 3.6711e-02, -4.9336e-03, 3.7298e-03, -9.1138e-03,\n -5.8203e-03, 2.7814e-02, 1.6413e-02, 3.4677e-03, 3.6201e-02,\n -1.2880e-02, -2.7189e-02, -4.3778e-03, 1.0274e-02, 1.6234e-02,\n 2.7752e-03, 1.2635e-02, -1.6775e-02, 1.0625e-02, 3.4008e-03,\n -2.6351e-02, 2.2840e-03, 8.5565e-03, -1.9228e-02, -5.5383e-03,\n -6.9335e-03, 6.6652e-03, 1.0589e-03, 2.8101e-02, 1.6581e-02,\n -1.0419e-02, -2.4065e-02, -1.6171e-02, -1.8990e-02, -3.0018e-02,\n -1.6390e-03, 1.9190e-02, -3.8092e-03, 1.3864e-02, -1.0881e-03,\n 3.2030e-03, -1.3262e-02, 6.5223e-03, 1.3400e-02, 1.7431e-02,\n 9.2867e-03, -7.2275e-03, -3.4845e-03, -1.5879e-02, -6.3865e-03,\n -1.5856e-02, 2.1654e-02, -1.2667e-02, 2.0903e-02, -7.5353e-03,\n 1.7996e-02, -1.0987e-02, -3.3232e-05, 4.0458e-02, -5.5425e-03,\n 2.8882e-02, -7.2888e-03, -2.0331e-02, 3.4786e-03, 1.3630e-02,\n 9.2691e-03, 2.7417e-03, -7.1568e-03, -1.2073e-02, -2.6154e-02,\n 9.6172e-03, -1.0532e-02, -5.3202e-03, -8.2069e-03, 2.4895e-02,\n 1.4493e-02, -4.6572e-03, 1.3105e-02, 5.1302e-03, -5.1120e-04,\n -1.3628e-02, 3.1339e-02, 1.6376e-03, -6.7481e-03, 4.8195e-03,\n 5.1311e-03, 3.1447e-04, 1.9254e-03, 1.7442e-03, 1.6407e-02,\n -2.1080e-02, 2.0392e-02, 5.7429e-03, 2.3783e-03, -2.2945e-02,\n 2.1019e-04, 8.2704e-03, 1.4078e-02, -2.7509e-02, 1.0648e-02,\n 1.3982e-02, 2.0008e-02, -1.1740e-02, 2.0724e-02, 2.7261e-02,\n -6.1981e-03, -2.9483e-03, -3.0408e-03, 5.7179e-03, 7.1701e-04,\n 1.4202e-02, -2.8242e-03, -2.8099e-03, -1.6652e-02, 1.9929e-03,\n 9.5749e-03, 8.1763e-03, 1.2387e-02, 1.0418e-02, 1.9743e-04,\n -7.8415e-04, -6.3457e-03, 8.9911e-03, 4.0613e-03, 2.8301e-03,\n -2.5243e-03, 4.8328e-03, -3.0195e-03, -9.9233e-04, -6.1184e-03,\n 1.3758e-02, 1.2626e-03, 6.3263e-03, -9.4154e-03, -1.8949e-02,\n -1.0993e-02, 7.6636e-03, -1.0965e-02, -1.5450e-02, 1.7634e-03,\n 2.0138e-02, -1.9353e-03, 4.0047e-03, 4.5886e-03, 1.1096e-02,\n -2.7417e-03, -7.5117e-03, 1.7042e-02, -5.7114e-04, -1.8716e-03,\n 3.8534e-02, -1.5451e-03, 1.6907e-03, -6.1581e-03, -1.3322e-03,\n 8.5309e-03, 2.6586e-03, -8.6722e-03, -1.9138e-02, 5.6028e-03,\n 1.1279e-02, -9.8888e-03, -1.7711e-02, 1.0971e-03, 1.0843e-02,\n 1.0968e-02, 9.8241e-03, -5.5834e-03, 1.7357e-02, 1.6204e-03,\n -6.0515e-03, -4.3314e-03, -1.1002e-02, -5.5000e-04, 1.4220e-02,\n -1.7001e-02, 2.0119e-02, -5.8233e-03, -1.1078e-02, 1.2193e-02,\n -6.8916e-03, 6.8575e-03, 2.3049e-03, -1.1176e-02, 9.3842e-03,\n 1.0475e-02, -1.0655e-02, -1.9235e-02, -4.0273e-03, 6.1034e-03,\n -4.6616e-02, -6.3864e-03, -1.4246e-02, 1.8997e-02, -4.7431e-03,\n 1.0812e-02, 3.9608e-03, -4.0471e-03, -1.0275e-03, 1.8946e-03,\n -1.5675e-03, -5.8862e-03, 3.2796e-03, 5.5552e-04, 8.7161e-03,\n 4.6339e-03, 4.9186e-03, 7.6574e-03, 1.6034e-02, 6.2029e-03,\n -1.0952e-02, -2.3195e-02, 2.6043e-03, 8.1454e-03, 4.1791e-03,\n 5.0173e-03, -6.1025e-03, -9.4365e-03, -2.2656e-02, -1.0198e-02,\n 3.4124e-02, 1.9784e-02, 9.8315e-03, -2.1780e-02, 4.1389e-03,\n 2.3565e-03, -2.4017e-02, 1.0430e-03, -7.5264e-03, -1.1122e-02,\n 1.1691e-02, 1.2157e-02, 2.1853e-02, 1.1216e-02, -9.8763e-03,\n 4.2705e-03, 6.7047e-04, 6.0663e-04, -1.1913e-02, -4.3865e-03,\n -1.2645e-04, -1.1489e-02, 1.5690e-02, 2.4900e-02, -1.2386e-02,\n 1.4746e-02, 4.7195e-03, 4.5602e-03, 8.0310e-03, 8.2955e-03,\n -2.8334e-03, -3.9205e-03, -3.2664e-03, -4.0076e-02, 2.0464e-03,\n 1.0051e-02, 7.8111e-03, 6.7384e-03, 2.0376e-02, -3.6749e-02,\n -8.1335e-03, -3.7501e-03, 1.3624e-02, -3.0388e-03, -2.1146e-03,\n 1.6561e-03, -1.5585e-02, 1.2733e-02, 5.5215e-03, -5.3177e-03,\n -8.1243e-03, 1.3374e-02, 2.2154e-03, -2.1340e-02, -1.8850e-02,\n -1.4614e-02, -3.2290e-03, 4.7518e-04, 9.8400e-03, 1.2792e-02,\n -1.2117e-02, -1.5307e-02, -3.1724e-02, 6.3329e-03, 1.3767e-02,\n -3.3051e-03, 4.2280e-02, -4.9951e-03, -8.9356e-03, 9.7848e-04,\n -9.5201e-03, 2.2317e-03, 1.2913e-02, -3.0079e-03, 1.4648e-02,\n 8.7608e-03, 1.0287e-02, -1.7005e-03, -1.4888e-02, -1.9158e-03,\n 1.4197e-02, 8.7826e-03, 1.4882e-02], device='cuda:0')", + "exp_avg_sq": "tensor([1.6164e-03, 2.5639e-03, 1.4844e-03, 1.4413e-03, 1.1520e-03, 2.1419e-03,\n 1.0890e-03, 1.6585e-03, 1.4078e-03, 1.1008e-03, 1.4257e-03, 1.1440e-03,\n 1.4502e-03, 2.4314e-03, 1.9087e-03, 1.5355e-03, 1.7558e-03, 1.1619e-03,\n 1.5823e-03, 1.5539e-03, 1.3853e-03, 1.2695e-03, 9.2205e-04, 1.8559e-03,\n 1.3545e-03, 1.5767e-03, 1.0540e-03, 1.6397e-03, 1.2135e-03, 1.4301e-03,\n 1.0581e-03, 1.3579e-03, 8.4208e-04, 1.7386e-03, 1.2732e-03, 1.5930e-03,\n 1.8693e-03, 2.0411e-03, 1.7629e-03, 1.5822e-03, 1.0595e-03, 1.7035e-03,\n 1.5937e-03, 1.0982e-03, 1.0975e-03, 1.6640e-03, 1.1403e-03, 1.3939e-03,\n 1.2163e-03, 1.4671e-03, 1.6243e-03, 9.9511e-04, 1.3987e-03, 1.3444e-03,\n 1.1986e-03, 1.6627e-03, 1.5701e-03, 1.0431e-03, 1.4233e-03, 1.4011e-03,\n 1.6392e-03, 1.6191e-03, 1.3182e-03, 1.3836e-03, 1.4841e-03, 1.2771e-03,\n 1.0766e-03, 2.2095e-03, 1.8221e-03, 1.5038e-03, 1.2687e-03, 1.2903e-03,\n 1.2297e-03, 1.6069e-03, 1.8527e-03, 1.0280e-03, 1.1786e-03, 1.4053e-03,\n 1.0140e-03, 1.0129e-04, 1.9156e-03, 1.5065e-03, 1.4897e-03, 1.4441e-03,\n 1.7420e-03, 1.2363e-03, 1.1912e-03, 1.7875e-03, 1.2401e-03, 1.0505e-03,\n 1.2689e-03, 1.0941e-03, 1.7930e-03, 1.1854e-03, 1.6044e-03, 1.0621e-03,\n 1.1026e-03, 9.8529e-14, 1.3059e-03, 1.0886e-03, 1.1397e-03, 1.6665e-03,\n 1.1507e-03, 1.3097e-03, 8.1839e-04, 1.0100e-03, 1.3097e-03, 1.5577e-03,\n 1.8852e-03, 1.2930e-03, 1.6030e-03, 1.4984e-03, 1.3040e-03, 1.5143e-03,\n 1.1394e-03, 1.6266e-03, 1.3566e-03, 1.4145e-03, 1.4641e-03, 1.6327e-03,\n 1.2204e-03, 1.7670e-03, 1.5148e-03, 1.1196e-03, 1.8147e-03, 1.5367e-03,\n 1.5690e-03, 1.0529e-03, 1.1808e-03, 9.3142e-04, 2.1825e-03, 1.1585e-03,\n 1.3983e-03, 1.1400e-03, 1.3494e-03, 1.0980e-03, 1.0552e-03, 1.3171e-03,\n 9.9440e-04, 1.5135e-03, 1.3236e-03, 1.1501e-03, 1.9216e-03, 1.5568e-03,\n 1.3877e-03, 2.0812e-03, 1.1491e-03, 1.4752e-03, 1.7311e-03, 1.2787e-03,\n 1.6583e-03, 2.0028e-03, 1.1834e-03, 1.4062e-03, 1.3018e-03, 1.5312e-03,\n 1.2291e-03, 1.8239e-03, 1.7588e-03, 1.3571e-03, 1.4319e-03, 9.2785e-04,\n 1.9994e-03, 1.5498e-03, 1.0464e-03, 1.4025e-03, 1.2851e-03, 1.0820e-03,\n 1.2955e-03, 9.2640e-04, 1.7311e-03, 1.5514e-03, 9.2787e-04, 1.7622e-03,\n 1.2095e-03, 1.3688e-03, 1.5034e-03, 2.1079e-03, 1.6121e-03, 1.3306e-03,\n 1.7635e-03, 2.1468e-03, 8.8605e-04, 1.3388e-03, 1.3848e-03, 1.4508e-03,\n 1.0541e-03, 1.3723e-03, 1.6662e-03, 1.2506e-03, 2.9010e-03, 1.3854e-03,\n 1.1511e-03, 1.7136e-03, 1.3786e-03, 9.5091e-04, 9.9250e-04, 1.3456e-03,\n 1.4019e-03, 1.1285e-03, 1.4859e-03, 1.3493e-03, 1.6451e-03, 9.6565e-04,\n 1.7002e-03, 1.1187e-03, 1.1469e-03, 9.2154e-04, 1.4319e-03, 9.8621e-04,\n 1.2180e-03, 1.6605e-03, 1.3417e-03, 2.8307e-03, 1.0684e-03, 1.3971e-03,\n 2.1291e-03, 1.4232e-03, 1.1099e-03, 1.5182e-03, 1.3583e-03, 1.4049e-03,\n 1.3667e-03, 1.3910e-03, 1.2420e-03, 1.1833e-03, 1.4359e-03, 1.2831e-03,\n 1.1445e-03, 3.2578e-03, 2.8223e-03, 1.2170e-03, 1.4808e-03, 1.7882e-03,\n 1.5671e-03, 1.4923e-03, 1.0296e-03, 1.4698e-03, 1.1484e-03, 1.7772e-03,\n 1.2046e-03, 1.1896e-03, 2.8005e-03, 1.7238e-03, 1.1266e-03, 1.1644e-03,\n 1.1143e-03, 1.1535e-03, 1.5317e-03, 1.7388e-03, 9.9128e-04, 2.2115e-03,\n 1.5800e-03, 1.2417e-03, 1.4985e-03, 2.3686e-03, 1.1621e-03, 9.9132e-04,\n 1.2848e-03, 1.8594e-03, 1.5521e-03, 1.3094e-03, 2.1351e-03, 1.8796e-03,\n 9.2889e-04, 1.4286e-03, 1.0554e-03, 1.6299e-03, 1.8064e-03, 1.0261e-03,\n 1.3470e-03, 1.6357e-03, 1.2323e-03, 1.2696e-03, 1.7326e-03, 1.2657e-03,\n 1.4863e-03, 1.6189e-03, 1.9224e-03, 1.2517e-03, 1.0849e-03, 1.8836e-03,\n 1.0634e-03, 1.4406e-03, 2.1103e-03, 1.0957e-03, 1.7862e-03, 1.8071e-03,\n 1.1082e-03, 1.3610e-03, 1.6008e-03, 1.1620e-03, 1.5972e-03, 1.3265e-03,\n 1.5122e-03, 1.3777e-03, 1.9492e-03, 1.1656e-03, 1.3230e-03, 1.2145e-03,\n 1.0224e-03, 2.3894e-03, 1.2316e-03, 1.2619e-03, 1.0142e-03, 1.1806e-03,\n 1.4667e-03, 1.2311e-03, 1.3483e-03, 9.0351e-04, 2.0676e-03, 1.1892e-03,\n 1.2687e-03, 1.0988e-03, 9.9061e-04, 2.3462e-03, 1.2571e-03, 1.2637e-03,\n 1.4025e-03, 1.4515e-03, 1.3648e-03, 1.7132e-03, 1.5621e-03, 1.0674e-03,\n 1.2669e-03, 1.5047e-03, 1.4614e-03, 1.3074e-03, 1.2431e-03, 2.2745e-03,\n 9.7475e-04, 1.4325e-03, 1.1456e-03, 1.2450e-03, 1.2740e-03, 1.3984e-03,\n 1.1798e-03, 1.6671e-03, 1.2881e-03, 1.4336e-03, 1.8529e-03, 1.3701e-03,\n 2.4512e-03, 9.2690e-04, 2.2078e-03, 1.0475e-03, 1.7198e-03, 1.3800e-03,\n 1.3792e-03, 9.9234e-04, 1.5849e-03, 1.1975e-03, 1.2152e-03, 1.4027e-03,\n 1.4509e-03, 1.2759e-03, 1.1414e-03, 1.9209e-03, 1.3710e-03, 1.3880e-03,\n 1.2338e-03, 1.2453e-03, 9.3039e-04, 1.1805e-03, 1.6149e-03, 1.4042e-03,\n 9.4592e-04, 1.6575e-03, 2.1659e-03, 1.2563e-03, 2.4921e-03, 1.2895e-03,\n 1.1264e-03, 1.2990e-03, 3.1543e-03, 8.7086e-04, 9.8263e-04, 1.3901e-03,\n 1.4874e-03, 1.5524e-03, 1.7015e-03, 1.0681e-03, 1.0978e-03, 1.4534e-03,\n 1.2952e-03, 1.6230e-03, 1.5935e-03, 1.2292e-03, 1.1595e-03, 2.3563e-03,\n 2.0082e-03, 1.6704e-03, 1.5188e-03, 1.4176e-03, 1.6843e-03, 1.1715e-03,\n 1.2565e-03, 1.8969e-03, 1.6790e-03, 2.1373e-03, 1.1560e-03, 8.7755e-04,\n 1.8421e-03, 1.5928e-03, 7.9966e-04, 1.2146e-03, 1.3584e-03, 1.2838e-03,\n 1.1323e-03, 1.8308e-03, 2.3184e-03, 1.3501e-03, 1.2761e-03, 1.5291e-03,\n 1.2901e-03, 1.0491e-03, 1.3196e-03, 1.6955e-03, 1.0839e-03, 1.5616e-03,\n 1.3311e-03, 1.7579e-03, 1.5057e-03, 1.4295e-03, 1.1557e-03, 1.7972e-03,\n 1.4899e-03, 1.7462e-03, 1.3426e-03, 1.4058e-03, 1.5781e-03, 1.2969e-03,\n 2.3162e-03, 1.2669e-03, 1.2882e-03, 1.4539e-03, 1.2391e-03, 1.0866e-03,\n 1.2286e-03, 2.1442e-03, 1.6489e-03, 2.2147e-03, 1.0689e-03, 2.0142e-03,\n 1.4975e-03, 1.6365e-03, 1.1209e-03, 1.3483e-03, 1.5421e-03, 1.4581e-03,\n 1.4752e-03, 1.4105e-03, 1.2879e-03, 1.3560e-03, 1.4715e-03, 1.2308e-03,\n 1.2352e-03, 1.5758e-03, 8.4522e-04, 1.2626e-03, 2.3167e-03, 1.2115e-03,\n 1.1567e-03, 1.4538e-03, 1.4224e-03, 1.2949e-03, 1.4090e-03, 9.3777e-04,\n 1.2911e-03, 2.0710e-03, 1.3791e-03, 1.4316e-03, 1.2363e-03, 9.2403e-04,\n 1.7274e-03, 1.8405e-03, 1.1819e-03, 2.1214e-03, 1.4018e-03, 1.1228e-03,\n 1.6143e-03, 1.8780e-03, 1.3793e-03, 2.0181e-03, 1.0534e-03, 1.5814e-03,\n 1.3620e-03, 1.6573e-03, 1.5140e-03, 1.1111e-03, 1.6568e-03, 9.1051e-04,\n 2.2335e-03, 1.3088e-03, 1.3194e-03, 1.8402e-03, 2.0462e-03, 1.2359e-03,\n 1.1559e-03, 1.1429e-03, 9.2514e-04, 1.3352e-03, 1.6394e-03, 1.3307e-03,\n 1.5135e-03, 9.6428e-04, 2.0591e-03, 1.3954e-03, 1.5534e-03, 2.0877e-03,\n 1.5162e-03, 2.2923e-03, 1.4714e-03, 1.5453e-03, 1.6450e-03, 1.6026e-03,\n 1.6309e-03, 1.3150e-03, 1.3270e-03, 1.1302e-03, 1.8925e-03, 1.2072e-03,\n 1.2650e-03, 2.3190e-03, 1.0594e-03, 1.5326e-03, 1.1441e-03, 1.4124e-03,\n 1.1119e-03, 1.5099e-03, 1.7233e-03, 1.1975e-03, 1.5536e-03, 1.1441e-03,\n 1.9643e-03, 1.3445e-03, 1.2040e-03, 1.5470e-03, 1.2410e-03, 1.3772e-03,\n 1.9345e-03, 2.1829e-03, 1.3780e-03, 1.2667e-03, 1.0845e-03, 1.6134e-03,\n 1.2349e-03, 2.5059e-03, 1.4014e-03, 1.4982e-03, 1.4681e-03, 1.4912e-03,\n 1.3347e-03, 1.2076e-03, 1.7392e-03, 1.6129e-03, 1.2072e-03, 1.9749e-03,\n 1.1473e-03, 1.3532e-03, 1.4454e-03, 1.4767e-03, 1.4042e-03, 1.6051e-03,\n 1.2895e-03, 1.0845e-03, 1.2653e-03, 9.9665e-04, 1.5631e-03, 1.7760e-03,\n 1.0268e-03, 1.2902e-03, 1.3746e-03, 1.2623e-03, 1.4537e-03, 1.3038e-03,\n 9.8984e-04, 1.3884e-03, 1.3210e-03, 3.6138e-04, 1.7969e-03, 1.0233e-03,\n 1.1415e-03, 2.4665e-03, 1.2741e-03, 1.3272e-03, 1.6425e-03, 1.1568e-03,\n 1.6433e-03, 1.3610e-03, 1.8717e-03, 1.2067e-03, 1.2192e-03, 1.7756e-03,\n 1.7039e-03, 1.1122e-03, 1.7709e-03, 1.2765e-03, 1.2636e-03, 1.3211e-03,\n 1.3400e-03, 1.2454e-03, 1.4686e-03, 1.1597e-03, 1.2215e-03, 1.4404e-03,\n 9.8501e-04, 1.4075e-03, 9.6534e-04, 1.4889e-03, 1.5713e-03, 1.4218e-03,\n 1.8292e-03, 1.4063e-03, 1.4661e-03, 2.3479e-03, 1.3289e-03, 1.3302e-03,\n 1.0606e-03, 1.5781e-03, 1.3167e-03, 1.1081e-03, 1.8055e-03, 1.4898e-03,\n 1.5711e-03, 1.3987e-03, 1.0472e-03, 1.4681e-03, 1.2667e-03, 1.2010e-03,\n 1.2199e-03, 1.0639e-03, 9.5507e-04, 1.1699e-03, 1.5081e-03, 1.2147e-03,\n 9.6099e-04, 1.7006e-03, 1.3885e-03, 1.8663e-03, 1.1458e-03, 2.0168e-03,\n 1.5071e-03, 1.2448e-03, 1.3497e-03, 1.2834e-03, 1.1421e-03, 1.4536e-03,\n 1.8764e-03, 1.1610e-03, 1.5359e-03, 1.9669e-03, 1.2616e-03, 1.3189e-03,\n 1.5004e-03, 1.9778e-03, 1.0791e-03, 1.3480e-03, 1.9696e-03, 1.5223e-03,\n 9.7903e-04, 1.2870e-03, 1.2454e-03, 2.1901e-03, 1.8266e-03, 1.4521e-03,\n 1.3100e-03, 1.1709e-03, 1.1436e-03, 1.0966e-03, 1.1657e-03, 1.7130e-03,\n 1.4075e-03, 1.3635e-03, 1.2530e-03, 1.1838e-03, 2.3774e-03, 9.5922e-04,\n 1.3868e-03, 1.1217e-03, 1.5981e-03, 1.1125e-03, 1.4947e-03, 2.1206e-03,\n 9.2600e-04, 1.5259e-03, 2.3683e-03, 1.2807e-03, 1.6913e-03, 1.0543e-03,\n 1.1535e-03, 1.1731e-03, 9.8088e-04, 1.0933e-03, 1.5734e-03, 1.7487e-03,\n 1.3746e-03, 1.0775e-03, 1.4141e-03, 1.0806e-03, 1.1706e-03, 1.6268e-03,\n 1.2086e-03, 1.4395e-03, 1.5517e-03, 1.1648e-03, 1.0990e-03, 1.3759e-03,\n 2.1825e-03, 1.4970e-03, 1.4144e-03, 1.1455e-03, 1.5490e-03, 1.9739e-03,\n 1.1252e-03, 1.4395e-03, 1.4358e-03, 1.3517e-03, 1.5537e-03, 1.3315e-03,\n 1.2603e-03, 1.0314e-03, 1.8641e-03, 1.5405e-03, 1.6250e-03, 1.4233e-03,\n 1.7014e-03, 1.3714e-03, 1.2454e-03, 1.4494e-03, 1.2792e-03, 1.6420e-03,\n 1.1868e-03, 9.2037e-04, 1.5139e-03, 1.2271e-03, 1.1942e-03, 1.0934e-03,\n 1.5219e-03, 1.2157e-03, 1.5693e-03, 2.0606e-03, 1.1357e-03, 1.1320e-03,\n 1.6821e-03, 1.9186e-03, 1.4961e-03, 1.1827e-03, 1.2414e-03, 1.8285e-03,\n 1.3031e-03, 2.1668e-03, 1.2105e-03, 1.1696e-03, 1.0666e-03, 1.5704e-03,\n 3.3934e-04, 1.1564e-03, 1.0904e-03, 1.6030e-03, 1.3934e-03, 1.3451e-03,\n 1.3814e-03, 1.6372e-03, 1.3813e-03, 1.2780e-03, 1.0242e-03, 1.9313e-03],\n device='cuda:0')" + }, + "2": { + "step": "tensor(12520.)", + "exp_avg": "tensor([[-2.1745e-05, -3.8191e-07, 2.4631e-05, ..., 9.6987e-06,\n -1.1872e-06, 6.4311e-06],\n [-7.0493e-05, -7.5217e-06, -8.2941e-05, ..., 1.3206e-05,\n 1.8610e-04, -1.2421e-04],\n [ 4.4057e-07, -2.3143e-06, -2.5829e-05, ..., 9.9502e-06,\n -3.2135e-04, -6.3263e-06],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 0.0000e+00,\n 5.6052e-45, 5.6052e-45],\n [-7.1096e-05, -2.2634e-05, -1.6609e-05, ..., -1.0879e-04,\n 2.9247e-05, 1.1101e-04],\n [ 2.6493e-05, -3.5258e-05, 3.1619e-06, ..., 3.0881e-05,\n -4.4249e-05, 8.4813e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.5363e-09, 2.3876e-09, 1.2523e-07, ..., 4.5216e-08, 1.2139e-08,\n 6.7279e-10],\n [3.2530e-08, 6.0781e-09, 1.2977e-07, ..., 2.3369e-08, 1.2766e-07,\n 4.1287e-08],\n [1.0328e-07, 5.0538e-09, 1.1736e-08, ..., 6.3869e-08, 1.9267e-07,\n 4.5350e-08],\n ...,\n [4.1091e-15, 6.1507e-16, 2.6282e-17, ..., 0.0000e+00, 1.1348e-15,\n 1.1018e-15],\n [6.0307e-08, 3.4158e-08, 4.7600e-08, ..., 8.0259e-08, 7.4402e-08,\n 6.3324e-08],\n [7.7937e-08, 3.6723e-08, 4.1776e-08, ..., 4.0698e-08, 4.2730e-08,\n 8.0944e-08]], device='cuda:0')" + }, + "3": { + "step": "tensor(12520.)", + "exp_avg": "tensor([-2.0843e-03, 6.9995e-04, -4.5520e-03, 2.1538e-03, -4.7779e-03,\n 1.7149e-03, 5.4926e-03, -5.4454e-03, -4.5391e-04, -1.2327e-03,\n -1.0064e-02, 4.4913e-03, 7.1662e-03, -3.9294e-03, 3.7920e-03,\n -3.2184e-03, -8.9481e-03, -6.3145e-03, -1.1159e-03, 2.5073e-04,\n 5.6052e-45, 1.5001e-02, 8.8077e-03, 5.3648e-03, 3.5524e-03,\n 5.6052e-45, -1.2638e-03, 6.6380e-03, -4.6375e-03, 3.3318e-03,\n -6.9171e-03, -9.2274e-03, 4.9780e-03, 6.0322e-04, -7.2065e-04,\n 1.9869e-03, 5.6052e-45, 3.0602e-03, 1.4197e-03, -5.0494e-03,\n -9.8512e-03, 1.2992e-03, -2.6089e-03, 2.0467e-04, 7.4623e-04,\n -2.2029e-03, -5.9289e-03, 4.9995e-03, 6.1401e-03, 1.0073e-03,\n -1.8478e-03, 5.4258e-04, -2.6089e-03, -6.9721e-18, 1.0692e-03,\n 6.4989e-04, -6.4850e-04, 1.0481e-02, 5.2074e-03, 5.4045e-03,\n -4.6491e-03, -3.0861e-03, 4.2575e-03, -2.8164e-03, -4.1572e-06,\n -6.4057e-03, 1.0634e-02, 4.8646e-03, 1.7945e-03, -1.5874e-04,\n 7.1471e-03, 9.3399e-04, 1.5283e-03, 1.1705e-03, -9.2472e-03,\n 4.4148e-03, 7.6419e-03, -1.4475e-03, -4.0255e-03, -6.7961e-03,\n 2.9583e-03, -2.5107e-03, 1.0253e-02, 6.2018e-03, -6.8207e-03,\n 9.5106e-04, 8.6949e-03, -8.6263e-03, 5.6052e-45, -6.7133e-03,\n -6.4981e-03, 2.4601e-03, 5.8680e-03, 6.0581e-03, -4.0419e-03,\n -3.2070e-03, -3.0478e-03, -6.5808e-03, 2.1436e-03, 1.7106e-03,\n 3.9599e-03, -3.3693e-03, -3.7517e-03, -1.0579e-03, -2.1607e-03,\n 5.6052e-45, 5.6052e-45, 2.5071e-03, 1.4546e-03, -5.7130e-03,\n -6.7701e-03, 4.0488e-03, 1.3660e-02, 6.2193e-03, 2.4973e-03,\n -2.8233e-04, -6.7813e-03, 6.6386e-03, 9.7999e-04, 2.3717e-04,\n 7.4574e-03, 5.6052e-45, 6.9988e-03, -7.4481e-03, 2.2507e-03,\n -3.4132e-03, -3.8948e-03, 6.9628e-04, 5.2050e-03, 1.0096e-02,\n -4.5938e-03, -8.9428e-05, 2.3342e-03, -7.2966e-04, -8.3708e-04,\n 5.6052e-45, 1.4075e-03, 6.2149e-04, -1.4481e-03, 1.3302e-03,\n 3.5865e-03, -2.2082e-03, 4.9698e-03, 2.0638e-05, 3.2740e-03,\n 3.8790e-03, -4.1165e-03, 5.6052e-45, 3.2173e-05, -6.9914e-03,\n 3.9789e-03, 3.3183e-03, 1.5802e-03, -9.2288e-03, 1.1280e-02,\n -4.2075e-03, 5.7713e-06, 2.4890e-03, -1.1362e-02, 1.9038e-03,\n -2.5624e-03, -5.6636e-03, 2.4912e-03, 1.0122e-02, 3.1122e-03,\n 5.2793e-03, -1.0097e-03, 5.6810e-04, -3.6716e-05, -4.5148e-03,\n 5.6052e-45, -4.4611e-03, 8.5190e-04, 1.3357e-03, 3.8165e-03,\n -1.0595e-03, 6.8923e-04, 1.0023e-03, -9.0369e-04, 3.9929e-03,\n -1.0337e-03, -4.6366e-03, 5.6052e-45, 6.0985e-03, 2.3590e-03,\n 6.1509e-04, 7.0773e-03, 2.0913e-05, 6.0212e-03, -1.5968e-03,\n -5.7426e-03, -4.5347e-03, 5.6052e-45, 4.9581e-03, 3.9542e-03,\n 4.6835e-03, 1.0543e-02, -2.5204e-03, 8.0480e-04, 5.2951e-03,\n -2.4757e-03, 1.2440e-02, 2.6381e-03, -2.7342e-03, -3.9443e-03,\n 2.5265e-03, 4.5034e-04, -1.0638e-02, 7.1973e-03, 4.6107e-03,\n -4.0035e-04, -8.9472e-04, -7.1337e-03, -6.1911e-03, -8.1849e-04,\n 2.5005e-03, 1.0583e-03, -1.9699e-03, 4.7481e-03, -3.2982e-03,\n -8.4613e-03, -6.7262e-03, -1.0149e-02, 4.8227e-03, 1.4776e-02,\n 8.3465e-03, -6.5920e-03, 4.9961e-03, -1.7730e-03, -3.9633e-03,\n 9.3907e-03, 3.2701e-03, 7.6244e-03, -5.4992e-03, -3.1526e-03,\n 1.0760e-02, -1.5604e-03, 1.8448e-03, -1.0004e-02, 3.9799e-03,\n 2.0297e-03, -1.0146e-02, -4.0832e-03, 8.1379e-05, -2.0977e-03,\n 8.3889e-04, -2.5050e-03, 5.7682e-04, 1.8320e-04, 4.5466e-03,\n 1.3902e-04, 4.5970e-03, 5.7617e-03, 2.9837e-03, -2.8088e-03,\n -5.4467e-03, -8.1557e-03, 9.0316e-03, -5.1655e-04, 6.8678e-03,\n -4.6322e-03, 1.2910e-03, 9.8284e-03, 4.1227e-03, 6.0834e-03,\n 1.0956e-03, 7.2363e-03, 7.7592e-03, -1.0191e-02, 2.1611e-03,\n 2.0308e-03, -6.9337e-04, 8.9108e-04, -6.8576e-03, -1.7701e-02,\n 1.0722e-03, 6.4869e-04, -9.7798e-04, 7.1216e-03, -1.1628e-02,\n -9.2244e-04, -7.9618e-03, -4.5390e-03, 6.3388e-03, 8.8427e-03,\n 6.3760e-04, 2.8242e-03, 2.5804e-03, -1.7468e-03, 1.6862e-03,\n 6.1870e-03, 4.0827e-03, 1.2604e-03, -5.7752e-03, 6.8584e-03,\n 4.5676e-03, 8.0578e-03, 4.0432e-03, 2.0583e-03, -4.4805e-03,\n 7.0474e-03, -9.5221e-04, -8.6691e-03, -1.6340e-03, 6.0453e-04,\n 4.0081e-03, 4.1383e-04, 3.2849e-03, 4.6130e-03, -9.1761e-03,\n -7.6585e-04, -5.3603e-03, -6.5244e-05, 1.2720e-03, 4.0193e-04,\n 1.0934e-02, 4.5815e-03, 1.2568e-03, -5.2828e-03, 7.6270e-04,\n 1.1336e-03, 3.8706e-03, -7.1427e-03, -1.1191e-03, -2.4931e-03,\n 2.1707e-03, -3.3559e-03, -1.3264e-02, 1.6058e-03, -1.1638e-03,\n 5.6052e-45, -3.2653e-03, 7.0758e-04, -3.3378e-03, -3.7398e-03,\n 3.5905e-03, -6.8304e-03, -1.2138e-03, 5.9182e-03, -6.8721e-03,\n -3.3247e-03, -3.1355e-03, 1.9585e-03, -1.3084e-03, 5.6052e-45,\n -2.7797e-03, -4.2230e-03, 8.8546e-03, 1.0071e-03, 1.2522e-02,\n -7.1974e-03, 5.8940e-03, 1.1874e-03, -2.2857e-03, 6.6029e-03,\n -7.5673e-04, -1.0333e-02, 4.1633e-03, -1.9313e-03, -1.2528e-03,\n -4.4204e-03, -8.7199e-03, 1.9405e-03, -3.2455e-03, 2.2193e-03,\n -7.5341e-03, 2.3433e-03, 6.8803e-03, -5.0824e-03, -8.6824e-03,\n 1.3375e-03, -2.8327e-03, -6.1440e-03, 1.7143e-03, -1.8052e-02,\n -3.7040e-03, -1.8465e-03, -6.2589e-03, -1.1502e-04, 4.7851e-03,\n 1.5344e-03, 1.3793e-03, 6.2553e-04, 6.5004e-03, 5.6052e-45,\n -1.4231e-03, 1.5380e-03, -4.2834e-03, -5.9420e-03, 6.5109e-03,\n 5.2777e-04, -4.1408e-03, -1.4777e-03, 3.3236e-03, 2.2811e-03,\n 6.9437e-03, -3.4440e-04, -4.5696e-03, 1.0530e-02, -7.2387e-03,\n 7.4357e-03, 1.2311e-04, 4.3802e-03, -1.0920e-02, 6.6605e-03,\n 4.5795e-03, -3.6658e-03, 1.5615e-02, 3.6042e-03, 1.0574e-03,\n -7.8646e-04, 3.3203e-04, 3.2457e-04, -3.9756e-03, 2.8038e-03,\n 4.9421e-03, 1.2078e-03, -5.1487e-03, -4.3392e-03, -4.5965e-03,\n -1.5907e-03, 7.2416e-04, -4.3101e-03, 3.3552e-05, 1.5407e-03,\n -8.6108e-03, -1.2211e-03, -3.3188e-03, 8.5745e-03, 3.3982e-03,\n -4.2268e-03, 2.3344e-03, 1.3642e-03, -9.2427e-04, -1.2208e-04,\n 5.1500e-03, -1.5031e-02, 6.9677e-03, -4.8211e-04, -1.7329e-03,\n 6.9843e-03, 2.1662e-03, -1.4320e-03, -7.5276e-04, 4.4215e-03,\n -1.2665e-02, -2.1536e-03, -8.2889e-03, 4.6526e-03, 2.7443e-03,\n -6.0652e-04, 3.5097e-03, 1.0103e-02, -5.0211e-03, -1.0775e-02,\n -8.4538e-03, -5.6911e-03, 7.6168e-04, 5.4692e-03, -1.6687e-02,\n -1.0751e-03, 8.2335e-03, -2.1799e-03, 1.3545e-02, 1.5387e-03,\n -3.5620e-03, 1.0980e-03, 1.0671e-03, -2.2123e-03, -1.9153e-03,\n 8.6917e-03, 1.8504e-03, 1.5562e-03, -3.1032e-03, 5.3085e-03,\n -3.6354e-03, 1.4596e-03, 1.6206e-04, 7.3406e-04, 2.6259e-03,\n -1.0118e-02, 2.4406e-03, 4.7154e-04, 6.5607e-03, 1.4868e-03,\n -6.5460e-03, -6.9561e-03, -1.4903e-02, -8.5656e-03, 8.2633e-03,\n -1.2501e-03, 2.7594e-03, 4.8808e-03, 9.8818e-03, -2.7164e-03,\n -2.7101e-03, -3.5044e-04, 1.0180e-02, 2.9653e-03, -1.0795e-02,\n 1.2758e-03, -1.8686e-03, -2.8490e-03, 5.3925e-03, -3.9343e-03,\n 7.8327e-04, 4.1618e-03, 7.0714e-04, 4.4855e-03, 5.6052e-45,\n -1.0946e-04, -3.0459e-03], device='cuda:0')", + "exp_avg_sq": "tensor([1.8147e-04, 3.1742e-04, 3.2051e-04, 1.2571e-04, 3.3406e-04, 2.9784e-04,\n 3.7629e-04, 2.9723e-04, 3.2842e-04, 3.6702e-04, 3.7968e-04, 3.3636e-04,\n 3.7454e-04, 3.4818e-04, 3.6345e-04, 3.7000e-04, 3.5699e-04, 1.3546e-04,\n 3.6332e-04, 3.5241e-04, 4.1495e-11, 3.8725e-04, 3.2799e-04, 3.4794e-04,\n 2.5060e-04, 3.3439e-11, 3.4784e-04, 3.8719e-04, 3.2408e-04, 3.1914e-04,\n 3.0955e-04, 2.3433e-04, 3.0964e-04, 2.7835e-04, 1.4779e-04, 4.2359e-04,\n 1.5537e-11, 3.6590e-04, 3.7945e-04, 2.2795e-04, 2.9118e-04, 4.3496e-04,\n 1.9431e-04, 1.9641e-04, 1.2136e-04, 1.8249e-04, 3.8318e-04, 3.5353e-04,\n 3.1333e-04, 3.5382e-04, 3.3670e-04, 2.9152e-04, 3.4257e-04, 6.2699e-10,\n 2.9078e-04, 3.8291e-04, 2.1586e-04, 3.4332e-04, 4.5261e-04, 3.5186e-04,\n 1.5483e-04, 3.7567e-04, 3.2349e-04, 3.4207e-04, 3.9954e-04, 3.0878e-04,\n 3.5653e-04, 3.6510e-04, 2.6350e-04, 3.3158e-04, 3.6135e-04, 3.1003e-04,\n 3.6685e-04, 3.6246e-04, 2.9047e-04, 3.4733e-04, 3.4204e-04, 3.2520e-04,\n 3.2930e-04, 2.7233e-04, 3.0527e-04, 3.5469e-04, 3.0108e-04, 3.3284e-04,\n 3.7117e-04, 3.2741e-04, 3.3752e-04, 3.5617e-04, 1.9467e-10, 3.0451e-04,\n 3.3481e-04, 4.0000e-04, 3.9031e-04, 3.6647e-04, 1.1597e-04, 2.8547e-04,\n 9.3991e-05, 3.0434e-04, 3.5058e-04, 2.8573e-04, 4.0547e-04, 3.6792e-04,\n 3.5267e-04, 3.0867e-04, 1.0586e-04, 7.6236e-11, 2.5383e-12, 2.9954e-04,\n 3.4726e-04, 3.2383e-04, 2.5839e-04, 3.2024e-04, 3.9678e-04, 3.4314e-04,\n 2.7392e-04, 3.0818e-04, 3.7798e-04, 2.7506e-04, 3.3988e-04, 3.2066e-04,\n 3.2728e-04, 1.6221e-10, 3.0110e-04, 3.2792e-04, 3.4516e-04, 1.2383e-04,\n 3.2566e-04, 3.3994e-04, 2.9748e-04, 3.2658e-04, 3.2616e-04, 3.5897e-04,\n 1.1454e-04, 3.2258e-04, 1.6102e-04, 8.1524e-10, 2.8024e-04, 3.7574e-04,\n 2.7219e-04, 3.1891e-04, 2.8007e-04, 2.7179e-04, 2.9790e-04, 3.5178e-04,\n 3.8698e-04, 3.6603e-04, 3.3972e-04, 1.2472e-13, 1.4273e-04, 3.0924e-04,\n 3.6118e-04, 3.4429e-04, 3.0731e-04, 3.7871e-04, 4.4634e-04, 3.9787e-04,\n 3.0807e-04, 3.5936e-04, 3.8644e-04, 3.7096e-04, 3.1625e-04, 3.2792e-04,\n 3.5719e-04, 2.2953e-04, 3.2375e-04, 3.0951e-04, 3.0889e-04, 3.2111e-04,\n 3.8954e-04, 3.2379e-04, 3.0011e-11, 3.8682e-04, 3.3592e-04, 3.8042e-04,\n 4.0379e-04, 3.3036e-04, 3.8895e-04, 2.8189e-04, 2.7334e-04, 3.9173e-04,\n 3.2517e-04, 3.1010e-04, 3.0937e-11, 3.1231e-04, 2.0921e-04, 2.5624e-04,\n 3.5682e-04, 3.3721e-04, 3.4880e-04, 3.1379e-04, 8.6655e-05, 2.3741e-04,\n 7.6094e-12, 2.7935e-04, 3.5970e-04, 2.9138e-04, 3.3029e-04, 3.1718e-04,\n 1.6653e-04, 3.4173e-04, 3.4717e-04, 2.9117e-04, 2.0943e-04, 3.5348e-04,\n 3.9118e-04, 4.0198e-04, 3.1457e-04, 4.1873e-04, 3.4859e-04, 2.7279e-04,\n 3.2311e-04, 2.6799e-04, 3.0130e-04, 3.3331e-04, 2.6966e-04, 3.2519e-04,\n 3.6933e-04, 2.9932e-04, 3.8156e-04, 3.7359e-04, 3.5372e-04, 3.1577e-04,\n 3.3220e-04, 3.1454e-04, 3.6966e-04, 3.9296e-04, 2.9351e-04, 3.2059e-04,\n 3.4682e-04, 2.7530e-04, 3.2750e-04, 3.2951e-04, 3.0087e-04, 2.1474e-04,\n 3.1852e-04, 2.9651e-04, 2.2839e-04, 3.1220e-04, 3.6848e-04, 3.8442e-04,\n 3.3431e-04, 2.4007e-04, 2.6468e-04, 2.8732e-04, 3.4827e-04, 2.7042e-04,\n 3.4157e-04, 3.5821e-04, 3.2308e-04, 3.1995e-04, 3.2684e-04, 3.0358e-04,\n 3.4440e-04, 4.0529e-04, 3.5118e-04, 3.2056e-04, 3.3338e-04, 3.5713e-04,\n 3.8632e-04, 3.0124e-04, 2.9897e-04, 3.0945e-04, 3.5760e-04, 3.5925e-04,\n 3.4257e-04, 2.9360e-04, 3.3074e-04, 2.7061e-04, 2.9541e-04, 3.4190e-04,\n 2.7698e-04, 2.9987e-04, 4.6217e-04, 3.5611e-04, 3.3891e-04, 3.4746e-04,\n 3.4073e-04, 3.3600e-04, 3.8959e-04, 3.3187e-04, 3.3163e-04, 3.1441e-04,\n 3.6108e-04, 3.3831e-04, 3.0745e-04, 3.5177e-04, 1.7123e-04, 3.1090e-04,\n 3.0534e-04, 2.2299e-04, 3.0623e-04, 2.6170e-04, 2.9809e-04, 2.7847e-04,\n 4.1649e-04, 3.2908e-04, 3.1132e-04, 3.3063e-04, 3.4604e-04, 3.4823e-04,\n 3.3826e-04, 3.4549e-04, 4.0608e-04, 3.7765e-04, 3.0577e-04, 3.3066e-04,\n 3.8172e-04, 3.1807e-04, 3.0197e-04, 2.8867e-04, 2.7042e-04, 2.9053e-04,\n 3.3528e-04, 2.8378e-04, 3.4865e-04, 3.0283e-04, 2.3018e-04, 1.5118e-04,\n 2.8605e-04, 3.6234e-04, 3.3335e-04, 3.4976e-04, 3.4218e-04, 3.1765e-04,\n 2.9501e-04, 3.5366e-04, 3.7992e-04, 3.5459e-04, 2.3123e-04, 3.0913e-04,\n 8.1816e-12, 2.9369e-04, 3.0976e-04, 3.6819e-04, 3.3608e-04, 2.1814e-04,\n 3.1622e-04, 3.3101e-04, 4.5078e-04, 3.1958e-04, 3.3631e-04, 3.1091e-04,\n 3.3521e-04, 3.5509e-04, 1.7881e-13, 3.4299e-04, 3.2297e-04, 3.2978e-04,\n 1.9815e-04, 3.4373e-04, 2.8188e-04, 3.2863e-04, 2.9378e-04, 3.3178e-04,\n 3.4716e-04, 3.4123e-04, 3.8745e-04, 3.2900e-04, 3.2407e-04, 1.6272e-04,\n 3.3975e-04, 2.7573e-04, 2.9870e-04, 3.2724e-04, 3.3369e-04, 3.3249e-04,\n 2.0033e-04, 3.7728e-04, 3.0722e-04, 2.7094e-04, 3.0430e-04, 2.4740e-04,\n 3.2074e-04, 2.9470e-04, 3.5053e-04, 7.5007e-05, 2.8573e-04, 3.3505e-04,\n 2.9163e-04, 3.2497e-04, 3.2383e-04, 4.1018e-04, 3.4837e-04, 2.8671e-04,\n 2.6210e-11, 4.0642e-04, 2.8663e-04, 3.7784e-04, 3.2838e-04, 3.8050e-04,\n 2.6577e-04, 2.1984e-04, 2.7004e-04, 2.7279e-04, 3.0766e-04, 3.6605e-04,\n 4.0910e-04, 3.3509e-04, 4.9430e-04, 3.9738e-04, 3.3257e-04, 3.4193e-04,\n 3.1861e-04, 3.2246e-04, 2.7120e-04, 3.1028e-04, 2.5500e-04, 2.7835e-04,\n 3.2140e-04, 3.0906e-04, 4.1664e-04, 3.0684e-04, 2.8099e-04, 2.9278e-04,\n 3.0279e-04, 3.5737e-04, 3.3858e-04, 3.2077e-04, 3.3300e-04, 2.2225e-04,\n 3.1242e-04, 3.6184e-04, 3.4035e-04, 3.8274e-04, 3.1387e-04, 3.8333e-04,\n 3.0794e-04, 3.3720e-04, 2.7227e-04, 2.8059e-04, 3.0591e-04, 3.4288e-04,\n 3.2271e-04, 3.6278e-04, 4.0777e-04, 2.6965e-04, 3.1268e-04, 3.0036e-04,\n 2.8578e-04, 3.1325e-04, 3.2352e-04, 1.9167e-04, 3.5581e-04, 3.2628e-04,\n 3.1090e-04, 3.1096e-04, 3.3013e-04, 3.2853e-04, 3.4547e-04, 2.9958e-04,\n 3.1763e-04, 3.1184e-04, 3.3701e-04, 3.7931e-04, 2.8419e-04, 4.4377e-04,\n 2.3567e-04, 3.7032e-04, 3.1260e-04, 3.3084e-04, 3.5393e-04, 2.8718e-04,\n 3.5076e-04, 3.5943e-04, 2.9405e-04, 3.5292e-04, 3.1866e-04, 3.5941e-04,\n 3.3598e-04, 2.9274e-04, 3.1100e-04, 3.3013e-04, 2.6893e-04, 3.3920e-04,\n 1.7960e-04, 3.2357e-04, 6.8290e-05, 3.5332e-04, 3.5796e-04, 3.4744e-04,\n 3.5311e-04, 3.4664e-04, 1.1760e-04, 3.4872e-04, 3.2726e-04, 3.6074e-04,\n 4.1326e-04, 3.1530e-04, 3.1190e-04, 3.7264e-04, 3.5267e-04, 2.9225e-04,\n 3.5793e-04, 3.6442e-04, 3.1246e-04, 3.2907e-04, 3.9150e-04, 3.3107e-04,\n 3.4168e-04, 3.1021e-04, 2.6830e-04, 3.0945e-04, 3.2977e-04, 3.3588e-04,\n 2.9415e-04, 4.3286e-04, 3.7628e-04, 3.7418e-04, 4.3303e-04, 1.7568e-11,\n 3.2815e-04, 3.2785e-04], device='cuda:0')" + }, + "4": { + "step": "tensor(12520.)", + "exp_avg": "tensor([[ 1.4579e-05, -7.9839e-05, 3.7980e-07, ..., -5.6052e-45,\n -8.8849e-05, -2.0402e-04],\n [ 1.2893e-05, 2.3514e-05, -1.5769e-05, ..., -5.6052e-45,\n -2.6903e-05, -1.9207e-05],\n [-2.6145e-05, 2.0340e-04, 3.1691e-05, ..., -5.6052e-45,\n -1.8889e-04, -1.8098e-05],\n ...,\n [-8.0893e-06, -1.0864e-04, 1.1001e-05, ..., 5.6052e-45,\n -2.7084e-04, -1.5738e-05],\n [ 1.3906e-05, 2.2018e-05, -3.4512e-05, ..., 5.6052e-45,\n 3.0742e-05, 5.5180e-05],\n [ 1.9945e-05, 2.7748e-04, 3.0267e-05, ..., 5.6052e-45,\n 1.6137e-04, 6.1344e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.9751e-09, 3.4320e-08, 9.9339e-09, ..., 8.0612e-16, 5.4485e-08,\n 1.2190e-07],\n [6.2706e-09, 7.0140e-08, 2.0381e-08, ..., 1.2982e-15, 1.2663e-07,\n 7.5625e-08],\n [7.8389e-09, 6.4095e-08, 2.2382e-08, ..., 4.0512e-16, 1.2037e-07,\n 8.3136e-08],\n ...,\n [6.6223e-09, 8.9889e-08, 2.3915e-08, ..., 8.6932e-16, 1.5214e-07,\n 7.7784e-08],\n [6.4372e-09, 7.7523e-08, 2.4062e-08, ..., 1.7290e-15, 1.4608e-07,\n 1.0749e-07],\n [9.0987e-09, 9.4259e-08, 3.2368e-08, ..., 9.8112e-16, 1.3046e-07,\n 1.1072e-07]], device='cuda:0')" + }, + "5": { + "step": "tensor(8764.)", + "exp_avg": "tensor([[ 1.2379e-04, -2.7065e-05, -1.0554e-04, ..., -8.6088e-05,\n -1.5027e-04, -3.7380e-05],\n [-6.9783e-05, -2.6239e-06, 4.0394e-05, ..., 1.0182e-04,\n 6.7751e-05, -3.5092e-05],\n [-4.0150e-05, 1.1927e-05, -6.0686e-05, ..., -3.9867e-05,\n 2.1317e-04, 5.9836e-05],\n ...,\n [ 1.0279e-04, -8.5642e-04, 1.5905e-04, ..., -7.0023e-05,\n 2.5552e-04, -6.1508e-05],\n [-9.6144e-05, -1.2258e-05, 1.5080e-04, ..., 7.8858e-04,\n -1.5436e-05, 5.9455e-05],\n [ 2.6950e-05, -3.2928e-05, 5.8767e-05, ..., -2.3082e-04,\n 2.2180e-04, -9.8355e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.4865e-07, 2.3739e-08, 3.2408e-07, ..., 1.6404e-07, 3.5776e-07,\n 1.2021e-07],\n [1.4578e-07, 1.7697e-08, 7.2464e-07, ..., 1.1793e-06, 1.9732e-07,\n 1.0007e-07],\n [2.1141e-07, 1.3301e-07, 1.2438e-07, ..., 4.8213e-08, 8.4416e-08,\n 3.2443e-07],\n ...,\n [1.2279e-07, 4.4940e-06, 2.5626e-07, ..., 1.8251e-07, 2.2075e-07,\n 2.5649e-07],\n [3.0385e-07, 3.3712e-08, 3.2350e-07, ..., 6.4554e-07, 1.1409e-07,\n 6.5850e-08],\n [2.2142e-07, 1.0824e-07, 1.0851e-07, ..., 6.8635e-07, 3.8934e-07,\n 3.7107e-07]], device='cuda:0')" + }, + "6": { + "step": "tensor(8764.)", + "exp_avg": "tensor([ 0.0040, -0.0027, 0.0018, ..., 0.0001, 0.0080, -0.0200],\n device='cuda:0')", + "exp_avg_sq": "tensor([0.0010, 0.0010, 0.0010, ..., 0.0012, 0.0009, 0.0011], device='cuda:0')" + }, + "7": { + "step": "tensor(8764.)", + "exp_avg": "tensor([[-2.0776e-04, 1.0884e-05, 9.0935e-05, ..., -1.0899e-04,\n -8.8966e-05, -2.1880e-05],\n [-1.2323e-04, -1.1696e-04, 1.5337e-04, ..., -3.8824e-05,\n 5.3936e-05, -6.5076e-05],\n [ 7.6985e-05, 4.4438e-05, -1.1311e-04, ..., -1.1437e-04,\n -1.0596e-04, 2.9991e-05],\n ...,\n [-1.1240e-04, 7.7650e-05, -8.0805e-05, ..., 1.0886e-04,\n 3.4065e-05, 9.5708e-06],\n [-2.8301e-04, -2.2069e-04, -6.3019e-06, ..., -1.1888e-05,\n -1.9787e-04, -2.2985e-04],\n [-2.0400e-04, 9.8287e-05, -1.3908e-04, ..., -3.7434e-05,\n 4.3839e-05, 1.5633e-04]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.5414e-08, 3.0462e-08, 4.7204e-08, ..., 4.7659e-08, 3.6840e-08,\n 5.3872e-08],\n [6.9461e-08, 7.2470e-08, 9.2486e-08, ..., 1.2120e-07, 9.5305e-08,\n 1.5270e-07],\n [8.5046e-08, 9.8980e-08, 1.1238e-07, ..., 9.9847e-08, 8.5486e-08,\n 1.7225e-07],\n ...,\n [1.0261e-07, 1.1789e-07, 9.2574e-08, ..., 1.1667e-07, 9.6998e-08,\n 1.4119e-07],\n [1.1216e-07, 1.0081e-07, 1.0061e-07, ..., 9.3152e-08, 9.4987e-08,\n 1.3501e-07],\n [9.7113e-08, 8.9694e-08, 1.0790e-07, ..., 3.8867e-07, 8.2369e-08,\n 1.3878e-07]], device='cuda:0')" + }, + "14": { + "step": "tensor(8764.)", + "exp_avg": "tensor(7.4720e-06, device='cuda:0')", + "exp_avg_sq": "tensor(4.9530e-06, device='cuda:0')" + }, + "15": { + "step": "tensor(8764.)", + "exp_avg": "tensor([[-9.6136e-11, 4.1964e-11, -4.9723e-11, ..., -6.3420e-11,\n -1.7416e-10, -1.0795e-10],\n [-1.1566e-10, -2.2757e-11, -1.5105e-10, ..., 6.5263e-11,\n 4.3717e-11, 1.6766e-10],\n [-2.5949e-10, -2.6223e-11, 3.1405e-12, ..., 9.1190e-11,\n 1.7628e-10, 1.2950e-10],\n ...,\n [ 6.1088e-10, 4.7653e-11, 3.4627e-11, ..., -4.0674e-10,\n 3.5563e-10, 8.2478e-11],\n [ 8.0379e-11, -2.7744e-10, -1.6110e-10, ..., -3.4060e-10,\n -8.5150e-11, -3.2392e-10],\n [ 1.3716e-10, 8.9800e-11, -6.1251e-10, ..., -4.2028e-10,\n -2.6207e-10, -5.5259e-10]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.1595e-15, 6.6079e-17, 3.2803e-16, ..., 2.5749e-16, 1.9388e-15,\n 8.2318e-16],\n [5.6259e-17, 3.3318e-17, 3.0208e-16, ..., 4.2629e-17, 5.4192e-16,\n 2.6940e-16],\n [1.6395e-16, 8.8597e-16, 4.4599e-17, ..., 3.6101e-17, 4.8504e-16,\n 5.3060e-16],\n ...,\n [9.1012e-14, 3.9199e-14, 8.0766e-14, ..., 3.3125e-14, 3.4836e-13,\n 9.7453e-14],\n [2.0548e-14, 2.4114e-14, 2.3044e-14, ..., 1.1452e-14, 6.5735e-14,\n 4.9300e-14],\n [2.3159e-14, 9.6747e-15, 1.5339e-14, ..., 2.0057e-14, 4.1983e-14,\n 2.9301e-14]], device='cuda:0')" + }, + "16": { + "step": "tensor(8764.)", + "exp_avg": "tensor([-2.7650e-08, -5.5592e-09, 1.0202e-08, -1.7912e-08, -4.7151e-07,\n -2.6560e-08, -1.8568e-08, -6.0227e-09, 1.4514e-07, -2.1144e-08,\n -1.4046e-08, -3.7567e-08, -3.1848e-08, 1.2199e-09, -8.2692e-08,\n 3.1641e-08, 1.5051e-08, -3.7829e-08, -3.6958e-08, 1.7786e-07,\n 7.4031e-09, 3.6974e-08, -1.2413e-08, 5.3936e-08, -2.4371e-08,\n 2.7619e-08, -9.4598e-09, 7.3725e-08, -6.8222e-08, -2.1029e-08,\n 1.0461e-08, -1.5018e-09, 4.2475e-08, 9.5417e-08, -9.6204e-08,\n 7.8521e-08, -6.2286e-08, -1.1642e-07, -1.3616e-08, 3.7897e-08,\n 2.3750e-08, 2.3197e-08, 7.6450e-08, 1.7201e-09, -3.8901e-08,\n 1.9140e-08, 5.6558e-08, -5.7546e-08, 8.4639e-08, 5.7810e-08,\n 2.7723e-08, 1.3707e-08, 1.7545e-08, -2.7921e-08, 1.6405e-07,\n 5.9495e-08, 1.0674e-07, -4.1564e-09, 6.4843e-08, -9.4577e-08,\n -1.8652e-07, 7.5007e-08, 1.7768e-08, -6.4680e-08], device='cuda:0')", + "exp_avg_sq": "tensor([3.6346e-11, 1.1822e-11, 3.3217e-12, 4.1946e-09, 3.0772e-09, 2.5738e-10,\n 3.8603e-10, 5.8272e-11, 3.5464e-08, 1.2574e-10, 1.2267e-10, 8.4457e-10,\n 1.4528e-09, 6.3183e-10, 2.2096e-11, 1.0168e-09, 1.0315e-08, 1.5391e-09,\n 3.9180e-10, 2.5552e-08, 5.1828e-10, 1.8622e-11, 3.4824e-11, 1.0056e-08,\n 4.6403e-11, 1.7457e-08, 1.4739e-12, 4.4501e-09, 2.7998e-11, 1.4202e-10,\n 2.2344e-10, 1.2951e-11, 1.2570e-08, 9.0669e-09, 1.2129e-09, 6.3363e-09,\n 3.2036e-09, 2.3922e-10, 1.1260e-10, 7.0498e-09, 3.5884e-08, 3.4467e-10,\n 1.7812e-08, 4.3294e-11, 5.1003e-13, 1.1352e-08, 5.3450e-09, 1.5508e-10,\n 2.1693e-08, 3.9941e-09, 3.7148e-11, 5.4373e-09, 4.1448e-08, 2.9046e-11,\n 4.7062e-08, 9.3394e-09, 4.3147e-09, 4.1426e-12, 4.7619e-10, 1.1267e-09,\n 1.4370e-09, 5.6211e-09, 1.5533e-09, 1.1258e-09], device='cuda:0')" + }, + "17": { + "step": "tensor(8764.)", + "exp_avg": "tensor([-9.9380e-11, 3.5201e-11, -3.7528e-11, 9.3677e-11, -9.4196e-09,\n 3.4679e-11, 1.5515e-10, -7.9799e-11, 7.9648e-11, 4.6137e-12,\n 3.7846e-12, 2.8348e-10, 2.5633e-10, 4.3663e-11, 5.6847e-09,\n -1.9073e-10, -2.6523e-09, 2.1351e-10, 2.3722e-10, 4.0749e-09,\n -4.8694e-11, 3.7044e-11, -2.8286e-11, -2.5721e-10, -2.1915e-10,\n 4.3324e-09, 5.0569e-11, 1.3320e-10, 3.3481e-10, 1.5987e-10,\n -2.1942e-11, 3.6701e-12, 2.4815e-09, 4.0083e-10, -8.4977e-10,\n 1.6332e-10, 2.1382e-10, -3.0916e-09, 4.2210e-11, -2.1359e-09,\n -1.4434e-09, -2.9169e-10, 7.3325e-11, 2.3885e-12, 1.3938e-10,\n -3.1486e-10, 4.0132e-11, 3.3402e-09, 8.0907e-11, -1.7280e-09,\n -1.2638e-11, 3.1471e-11, 7.2286e-10, -2.1184e-11, -8.7160e-11,\n 8.2212e-10, -3.3002e-10, 1.8823e-11, -6.0577e-11, 8.4440e-10,\n 1.8737e-09, -3.8578e-11, 1.9520e-11, 7.9349e-10], device='cuda:0')", + "exp_avg_sq": "tensor([1.0077e-14, 5.8044e-17, 8.0898e-18, 7.0737e-14, 8.2542e-14, 1.8239e-15,\n 5.8307e-14, 7.6264e-15, 6.0917e-12, 3.6187e-15, 3.4746e-16, 4.3845e-13,\n 1.4619e-14, 6.1749e-15, 4.0934e-13, 6.3783e-15, 1.0399e-11, 8.5700e-15,\n 4.7885e-16, 3.8876e-11, 2.4442e-15, 4.9913e-14, 2.0971e-15, 1.0520e-12,\n 1.4905e-13, 4.1367e-12, 9.5282e-16, 1.1727e-12, 6.7047e-14, 1.1223e-15,\n 1.5148e-15, 8.8290e-18, 8.8959e-13, 3.9368e-12, 9.7322e-14, 2.5237e-12,\n 2.7551e-13, 2.7455e-14, 3.6243e-16, 4.2835e-12, 4.6554e-11, 2.7968e-13,\n 1.7840e-12, 3.3856e-16, 5.8723e-16, 2.6433e-11, 1.9356e-12, 1.5363e-14,\n 6.2622e-12, 2.7995e-12, 2.2627e-13, 2.5781e-13, 5.5608e-11, 5.6669e-16,\n 7.0632e-12, 7.8724e-12, 1.0655e-13, 2.2635e-17, 5.2629e-14, 3.6977e-13,\n 6.0713e-14, 6.0010e-13, 1.1700e-13, 1.1902e-13], device='cuda:0')" + }, + "18": { + "step": "tensor(8764.)", + "exp_avg": "tensor([ 1.6182e-10, -1.0239e-10, 7.1705e-11, -4.0054e-10, -7.0015e-09,\n -6.9295e-11, -2.2669e-11, 4.0770e-10, 1.6625e-09, -4.4226e-12,\n -2.7707e-11, -3.2389e-10, -6.5931e-10, -1.2766e-10, -7.5331e-11,\n 2.4772e-10, -6.0659e-10, -7.2177e-10, -4.2688e-10, 2.5421e-09,\n -3.1273e-11, -1.5732e-10, 1.3360e-11, 6.0018e-10, 5.7180e-10,\n 1.6007e-09, -1.1391e-10, 2.5987e-10, -1.0603e-09, -2.5501e-10,\n 7.4233e-11, -3.1166e-11, 2.2529e-09, 3.8986e-10, -1.4469e-09,\n 1.2724e-10, -4.7757e-10, -1.8692e-09, -1.3257e-10, -1.3422e-10,\n 1.3311e-10, -3.5627e-10, 3.9684e-10, 1.7501e-11, -2.4278e-10,\n -1.4306e-10, -5.5027e-12, 3.8350e-10, 7.8707e-10, 3.0945e-11,\n -1.2501e-10, -4.2506e-11, 6.1979e-10, 3.1895e-11, 1.8233e-09,\n 9.5926e-10, 1.0938e-09, -3.0130e-11, -2.1936e-10, -9.3826e-10,\n -1.3437e-09, 4.9392e-10, 2.5157e-12, -5.2866e-10], device='cuda:0')", + "exp_avg_sq": "tensor([2.0442e-15, 2.3994e-15, 1.6678e-15, 1.6802e-12, 1.7617e-12, 7.3562e-16,\n 2.4983e-14, 7.5281e-15, 1.4401e-11, 1.3676e-15, 1.0008e-16, 1.4334e-13,\n 5.1077e-13, 2.7932e-13, 1.5902e-13, 3.6727e-13, 7.8149e-12, 7.1629e-13,\n 1.4739e-13, 1.7575e-11, 1.7123e-13, 1.0596e-13, 8.6999e-16, 3.7093e-12,\n 5.6837e-14, 1.0526e-11, 5.6110e-15, 7.5462e-13, 3.7643e-14, 4.2645e-14,\n 3.7643e-14, 6.8210e-16, 7.2329e-12, 2.0720e-12, 9.4712e-13, 1.0993e-12,\n 2.1301e-12, 2.5795e-13, 3.4267e-14, 5.2895e-12, 2.3250e-11, 5.8319e-13,\n 6.9980e-12, 3.8189e-17, 4.3375e-15, 9.4825e-12, 7.6209e-13, 1.8622e-13,\n 7.8017e-12, 3.4422e-12, 1.9816e-13, 1.9547e-12, 2.6428e-11, 1.6954e-15,\n 1.9602e-11, 7.0130e-12, 1.6222e-12, 2.2779e-15, 2.9202e-14, 1.4035e-13,\n 8.9477e-13, 1.7510e-12, 2.7341e-13, 9.0762e-13], device='cuda:0')" + }, + "19": { + "step": "tensor(8764.)", + "exp_avg": "tensor([[-1.2241e-09, 1.0743e-09, -5.2934e-10, -1.3950e-10, 2.8582e-08,\n 1.1047e-09, 7.0042e-10, -1.7706e-09, 2.6752e-09, 1.2348e-09,\n 8.9818e-10, 1.6086e-09, 2.4775e-09, 1.1610e-09, -2.6601e-08,\n 7.7786e-10, 1.8143e-08, 2.7058e-10, 6.0465e-10, -1.8709e-08,\n -1.3760e-09, 3.7417e-10, 1.9338e-09, 1.2050e-09, -1.5598e-09,\n -1.9440e-08, 2.8933e-09, 3.5039e-09, 1.5092e-09, 1.0508e-09,\n 9.1546e-11, -4.8697e-11, -6.4222e-09, 9.0375e-09, 3.5947e-09,\n 5.5818e-09, -1.3520e-09, 1.2566e-08, -2.5757e-10, 1.4708e-08,\n 7.7164e-09, 5.4410e-09, 2.8042e-09, 1.0637e-10, 2.1501e-09,\n 2.4582e-09, 2.3955e-09, -2.0034e-08, 2.6863e-09, 1.2907e-08,\n 2.2164e-09, -2.8443e-10, -2.4328e-09, 5.2567e-10, 1.3233e-09,\n -4.2623e-09, 3.2437e-09, 7.4031e-10, 6.9610e-09, 7.8858e-10,\n -9.3739e-09, 4.0001e-09, 1.0013e-09, -4.2937e-09],\n [ 1.2360e-09, -1.0341e-09, 5.4249e-10, 1.4348e-10, -2.8615e-08,\n -1.0669e-09, -6.9562e-10, 1.8092e-09, -2.6696e-09, -1.2258e-09,\n -8.8328e-10, -1.6037e-09, -2.4632e-09, -1.1087e-09, 2.6263e-08,\n -7.3332e-10, -1.8406e-08, -2.6175e-10, -5.7617e-10, 1.8468e-08,\n 1.4073e-09, -3.6194e-10, -1.9162e-09, -1.2143e-09, 1.6009e-09,\n 1.9120e-08, -2.8729e-09, -3.5128e-09, -1.4870e-09, -1.0289e-09,\n -7.9878e-11, 6.5946e-11, 6.2280e-09, -8.9973e-09, -3.7094e-09,\n -5.5808e-09, 1.2200e-09, -1.3040e-08, 2.8636e-10, -1.4811e-08,\n -7.6534e-09, -5.4087e-09, -2.7783e-09, -1.1410e-10, -2.1432e-09,\n -2.4957e-09, -2.3987e-09, 1.9720e-08, -2.6917e-09, -1.3065e-08,\n -2.2105e-09, 2.9696e-10, 2.1198e-09, -5.0219e-10, -1.3550e-09,\n 4.0304e-09, -3.2064e-09, -6.8933e-10, -6.9315e-09, -7.6310e-10,\n 9.2954e-09, -3.9715e-09, -1.0078e-09, 4.3023e-09]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.0585e-11, 5.5683e-13, 5.8275e-12, 9.5058e-13, 1.7557e-12, 5.8111e-12,\n 7.6343e-12, 9.7085e-12, 9.8118e-11, 7.3615e-12, 7.8390e-12, 7.4862e-12,\n 3.9998e-13, 3.1828e-12, 5.7279e-12, 5.1724e-13, 5.4411e-10, 4.4440e-12,\n 3.3176e-12, 9.8600e-10, 9.1228e-13, 1.2148e-10, 9.8029e-12, 4.6226e-11,\n 4.3870e-12, 8.3956e-11, 8.8888e-12, 6.7636e-10, 9.8410e-12, 1.4554e-12,\n 1.2273e-12, 2.0783e-12, 1.0262e-11, 9.0047e-10, 5.7958e-12, 1.1659e-09,\n 8.7617e-12, 1.2272e-12, 2.6085e-12, 2.6129e-10, 8.8222e-10, 1.1669e-10,\n 4.2236e-11, 1.0444e-11, 9.0456e-12, 1.4131e-09, 1.2627e-09, 1.2215e-12,\n 2.6051e-10, 2.6746e-10, 4.4668e-10, 1.3424e-11, 9.4799e-10, 9.5632e-12,\n 7.6615e-11, 4.2447e-10, 3.1067e-12, 3.7053e-12, 8.6612e-10, 4.9336e-12,\n 1.1464e-12, 7.2023e-11, 9.5472e-11, 9.3344e-12],\n [1.0585e-11, 5.5683e-13, 5.8275e-12, 9.5058e-13, 1.7558e-12, 5.8111e-12,\n 7.6343e-12, 9.7085e-12, 9.8118e-11, 7.3615e-12, 7.8390e-12, 7.4862e-12,\n 3.9998e-13, 3.1828e-12, 5.7279e-12, 5.1724e-13, 5.4411e-10, 4.4440e-12,\n 3.3176e-12, 9.8600e-10, 9.1228e-13, 1.2148e-10, 9.8029e-12, 4.6226e-11,\n 4.3870e-12, 8.3956e-11, 8.8888e-12, 6.7636e-10, 9.8410e-12, 1.4554e-12,\n 1.2274e-12, 2.0783e-12, 1.0262e-11, 9.0047e-10, 5.7958e-12, 1.1659e-09,\n 8.7618e-12, 1.2273e-12, 2.6085e-12, 2.6129e-10, 8.8222e-10, 1.1669e-10,\n 4.2236e-11, 1.0444e-11, 9.0456e-12, 1.4131e-09, 1.2627e-09, 1.2215e-12,\n 2.6051e-10, 2.6746e-10, 4.4668e-10, 1.3424e-11, 9.4800e-10, 9.5632e-12,\n 7.6615e-11, 4.2447e-10, 3.1067e-12, 3.7053e-12, 8.6612e-10, 4.9336e-12,\n 1.1465e-12, 7.2023e-11, 9.5472e-11, 9.3344e-12]], device='cuda:0')" + }, + "20": { + "step": "tensor(8764.)", + "exp_avg": "tensor([-2.8462e-09, 2.7556e-09], device='cuda:0')", + "exp_avg_sq": "tensor([5.7147e-10, 5.7147e-10], device='cuda:0')" + }, + "21": { + "step": "tensor(8764.)", + "exp_avg": "tensor([[-4.9734e-12, -4.0128e-13, 7.7000e-13, ..., 1.3449e-12,\n -4.7925e-14, 6.4053e-13],\n [-1.2137e-12, -2.3126e-14, -2.6909e-13, ..., 8.2661e-13,\n 9.0935e-14, 3.7451e-13],\n [-4.4495e-13, -1.5556e-13, -6.6930e-14, ..., 1.0875e-14,\n -5.8408e-14, 2.7374e-13],\n ...,\n [-3.5859e-13, 3.0118e-14, 1.8085e-13, ..., 2.9124e-13,\n 5.0625e-14, 2.4416e-13],\n [ 2.1665e-12, -2.7796e-13, 2.8960e-13, ..., 2.3243e-13,\n 1.2577e-13, 6.3164e-13],\n [ 4.5410e-14, -6.1904e-15, -7.3359e-15, ..., -6.9307e-14,\n 2.7426e-15, -9.4450e-14]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.4199e-15, 6.5261e-16, 9.0758e-16, ..., 1.2779e-15, 2.8040e-15,\n 1.5788e-15],\n [4.6777e-17, 1.8428e-17, 3.2351e-17, ..., 2.1172e-17, 3.7177e-17,\n 1.1888e-16],\n [8.4988e-17, 3.2392e-17, 5.5033e-17, ..., 2.3974e-17, 1.0488e-16,\n 7.6752e-17],\n ...,\n [1.6021e-17, 1.2487e-18, 1.9136e-17, ..., 1.9522e-17, 2.8351e-17,\n 1.6307e-17],\n [1.2976e-16, 1.9834e-17, 7.4305e-17, ..., 5.6055e-17, 2.7041e-16,\n 1.5504e-16],\n [5.2562e-18, 3.4331e-18, 5.5933e-18, ..., 1.0819e-18, 9.0353e-18,\n 7.6220e-18]], device='cuda:0')" + }, + "22": { + "step": "tensor(8764.)", + "exp_avg": "tensor([-5.4359e-10, -2.0150e-10, 7.7285e-12, 7.9995e-12, 8.8502e-11,\n 3.1123e-11, 4.4009e-12, -7.0932e-12, -2.3516e-11, 3.4444e-10,\n -1.7788e-11, 6.9072e-11, -2.5888e-10, 1.0799e-11, -3.0770e-11,\n 3.7958e-11, -3.6699e-10, 5.4035e-11, 3.0762e-10, -6.4687e-11,\n 3.7364e-11, -9.6369e-12, -1.9879e-11, -5.5780e-12, -4.1369e-10,\n 7.8550e-10, -3.5399e-10, -5.6476e-11, 1.7971e-10, -2.5706e-11,\n -2.1009e-10, -3.7865e-10, 5.4551e-11, 5.7007e-15, -4.1553e-11,\n -2.6547e-10, 5.7744e-10, -1.1565e-11, -1.9199e-10, -1.2538e-10,\n 1.1227e-10, 7.1005e-11, -2.7538e-11, -3.3490e-11, 3.3636e-10,\n -3.2950e-10, -8.5469e-11, 4.6129e-11, -2.3366e-10, -1.2408e-11,\n -3.1157e-11, 1.3293e-12, -8.3607e-10, -5.3663e-11, 1.6988e-10,\n 5.0444e-12, 9.5260e-10, 3.4967e-10, 6.7540e-11, 5.4543e-10,\n -1.1050e-10, -9.2619e-11, 2.0536e-10, 9.6836e-12], device='cuda:0')", + "exp_avg_sq": "tensor([6.0787e-11, 1.7320e-12, 2.4181e-12, 1.4922e-12, 6.2273e-13, 9.2880e-15,\n 6.0138e-11, 5.8268e-12, 9.9480e-13, 1.0730e-11, 2.0423e-14, 3.3892e-13,\n 4.0793e-12, 2.8934e-12, 1.4605e-13, 6.0995e-11, 1.0829e-10, 1.4319e-13,\n 2.0557e-11, 7.6817e-14, 1.0514e-11, 8.5580e-14, 2.4275e-13, 4.5165e-14,\n 3.6401e-12, 1.2091e-10, 3.4170e-12, 5.6375e-13, 9.3668e-12, 7.2026e-13,\n 1.4462e-11, 3.0790e-11, 6.8835e-13, 6.0422e-14, 2.1781e-13, 2.1736e-11,\n 5.5850e-11, 1.5327e-13, 2.2624e-11, 6.8930e-12, 1.4555e-12, 1.3176e-12,\n 5.9141e-14, 2.5513e-13, 1.2240e-11, 1.1783e-12, 5.8004e-13, 5.8290e-12,\n 9.2838e-13, 9.9733e-15, 2.7288e-14, 4.5789e-12, 1.0567e-10, 1.0201e-13,\n 1.3400e-11, 1.4727e-14, 7.8813e-11, 2.4753e-11, 1.2805e-13, 1.0154e-10,\n 3.0064e-13, 7.4331e-13, 4.9970e-12, 2.7659e-13], device='cuda:0')" + }, + "23": { + "step": "tensor(8764.)", + "exp_avg": "tensor([-1.5023e-11, -4.7034e-13, -1.8307e-13, -3.3904e-14, 2.9584e-13,\n -1.1306e-13, -2.2680e-12, 1.3417e-13, -2.3580e-12, 3.2130e-12,\n -6.0718e-13, 7.3721e-15, 2.2352e-12, -4.6803e-12, -2.4386e-13,\n -4.4976e-12, -1.4531e-11, -6.0016e-13, 3.2215e-12, 6.3981e-13,\n -2.9749e-13, -1.1671e-13, 5.9618e-14, -6.8814e-14, -4.3514e-12,\n 1.2462e-11, -3.6299e-12, 5.9645e-13, 3.2502e-13, 3.3971e-13,\n -1.4532e-11, -8.4599e-12, -2.7441e-12, -7.0848e-14, 4.3298e-13,\n -8.8434e-12, 7.4029e-12, 7.6896e-14, -1.0710e-11, -6.6586e-12,\n 3.1501e-13, -3.5004e-13, 8.0105e-14, -3.7201e-13, 3.0175e-12,\n -2.7992e-12, 1.8245e-13, -3.1744e-13, -3.0117e-13, 4.2044e-14,\n 5.2821e-14, -5.5141e-12, -4.1166e-11, 4.6508e-13, 5.4015e-13,\n 2.8946e-14, 1.6930e-11, 4.1675e-12, 3.3917e-14, 7.1700e-12,\n 1.6672e-12, 5.7742e-13, 1.4199e-12, 1.1366e-13], device='cuda:0')", + "exp_avg_sq": "tensor([2.2141e-14, 6.1426e-17, 5.7000e-17, 1.1441e-17, 4.8990e-16, 3.2982e-18,\n 1.5103e-14, 2.2501e-17, 1.1739e-15, 1.5140e-14, 4.5264e-17, 2.5867e-16,\n 1.8748e-16, 1.9283e-15, 5.1391e-17, 2.6733e-14, 4.6781e-14, 3.6240e-17,\n 1.5379e-14, 2.6403e-16, 5.5515e-17, 3.2818e-17, 1.6940e-19, 3.2915e-19,\n 1.1352e-16, 1.2813e-13, 1.2282e-16, 2.3252e-17, 1.7273e-15, 7.9485e-19,\n 1.3048e-14, 7.5942e-15, 8.4242e-16, 8.4165e-20, 5.7915e-18, 5.2499e-15,\n 3.5048e-14, 1.1716e-18, 1.5340e-14, 2.9157e-15, 1.6228e-15, 1.5031e-17,\n 1.7175e-18, 1.9867e-17, 1.4867e-14, 4.7822e-17, 5.1788e-16, 2.2195e-16,\n 3.6330e-17, 2.6932e-20, 1.5717e-19, 4.7286e-15, 9.7818e-14, 4.0933e-16,\n 7.7462e-15, 2.0576e-19, 8.4686e-14, 2.9054e-14, 2.2635e-16, 6.4980e-14,\n 6.9658e-16, 7.7387e-16, 6.3064e-16, 4.3898e-18], device='cuda:0')" + }, + "24": { + "step": "tensor(8764.)", + "exp_avg": "tensor([-1.3101e-11, -4.8730e-12, 2.7454e-13, 4.2710e-13, 6.2151e-13,\n 2.3107e-13, -2.4750e-12, -1.0131e-13, 1.2770e-12, 5.2151e-12,\n 5.3583e-13, 3.6107e-13, -5.7492e-12, -2.9647e-12, 2.2026e-13,\n -2.5171e-12, -1.0409e-11, 1.0265e-12, 4.2022e-12, -6.1213e-13,\n 7.2073e-13, 2.6312e-14, -3.1062e-14, 9.5480e-14, -9.4540e-12,\n 1.3955e-11, -8.2549e-12, -6.7368e-13, 2.9323e-12, -3.4593e-13,\n -7.9866e-12, -9.8350e-12, 1.8473e-12, 1.6908e-13, -3.2314e-13,\n -7.9196e-12, 9.6311e-12, -1.1914e-13, -6.8656e-12, -5.2039e-12,\n 6.8897e-13, 1.3524e-12, -6.4042e-14, -1.6830e-12, 5.1616e-12,\n -7.6185e-12, -9.2312e-14, 9.7548e-13, -5.3723e-12, -1.8800e-14,\n -7.9097e-14, -3.3257e-12, -2.0769e-11, -7.0230e-13, 1.6352e-12,\n -5.9503e-14, 1.6995e-11, 5.2271e-12, 1.0013e-13, 9.3564e-12,\n -1.4698e-12, -3.5894e-13, 2.4557e-12, -4.0909e-13], device='cuda:0')", + "exp_avg_sq": "tensor([3.4972e-14, 1.1739e-15, 1.7652e-15, 1.0148e-15, 7.2200e-16, 6.0223e-19,\n 3.5581e-14, 3.6829e-15, 5.0058e-16, 8.3523e-15, 4.8202e-18, 4.5561e-16,\n 3.2837e-15, 9.3310e-16, 2.1965e-17, 3.5019e-14, 6.2872e-14, 7.3432e-17,\n 1.4592e-14, 1.0453e-16, 6.7356e-15, 1.2417e-17, 2.0818e-16, 4.8915e-17,\n 2.4705e-15, 7.9200e-14, 2.1654e-15, 3.3666e-16, 6.5700e-15, 5.0843e-16,\n 6.8828e-15, 1.7833e-14, 3.4230e-16, 5.6296e-17, 1.5387e-16, 1.2340e-14,\n 3.7032e-14, 6.9212e-17, 1.1844e-14, 3.3147e-15, 1.5043e-15, 9.9560e-16,\n 6.9748e-19, 1.0711e-16, 9.3693e-15, 1.0102e-15, 2.1709e-16, 3.9757e-15,\n 8.8103e-16, 1.3530e-17, 4.9974e-20, 1.5051e-15, 5.9028e-14, 1.2005e-16,\n 9.6722e-15, 9.5450e-18, 5.2665e-14, 1.7741e-14, 2.2646e-16, 6.5935e-14,\n 2.9472e-16, 3.2493e-16, 2.7507e-15, 1.4619e-16], device='cuda:0')" + }, + "25": { + "step": "tensor(8764.)", + "exp_avg": "tensor([[-6.8991e-11, -5.3005e-12, 1.1494e-11, 8.3393e-12, -1.6318e-11,\n 1.7440e-11, -9.0571e-12, 1.0848e-11, 3.8207e-12, -4.4322e-11,\n 1.0457e-11, -1.9443e-12, 6.7465e-12, -1.0697e-10, 1.0813e-11,\n -2.3281e-11, -5.4434e-11, 1.1601e-11, -2.8747e-11, 9.1454e-12,\n 1.3118e-11, 6.5731e-12, 1.3942e-11, -2.2225e-12, -2.4961e-11,\n -4.8155e-11, -2.0726e-11, 7.4741e-12, -1.9472e-12, 1.4332e-11,\n -1.7106e-10, -4.4802e-11, 9.9054e-12, 1.1853e-11, 1.0418e-11,\n -5.5940e-11, -4.0428e-11, 1.1689e-11, -9.2201e-11, -8.5384e-11,\n -1.2707e-11, 6.0134e-12, 1.5310e-11, -9.1887e-12, -3.8137e-11,\n -1.8267e-11, -8.3964e-13, 1.3338e-11, -1.0469e-12, 9.1306e-12,\n 9.8947e-12, -1.1294e-10, -1.9808e-10, 9.4897e-12, -7.1527e-12,\n 9.1389e-12, -8.2960e-11, -3.6009e-11, -1.2530e-11, -2.8917e-11,\n 8.4794e-12, -1.0900e-12, 1.3300e-11, 5.7541e-12],\n [ 6.8991e-11, 5.3007e-12, -1.1494e-11, -8.3393e-12, 1.6318e-11,\n -1.7440e-11, 9.0573e-12, -1.0848e-11, -3.8207e-12, 4.4322e-11,\n -1.0457e-11, 1.9442e-12, -6.7464e-12, 1.0697e-10, -1.0813e-11,\n 2.3281e-11, 5.4434e-11, -1.1601e-11, 2.8747e-11, -9.1454e-12,\n -1.3118e-11, -6.5731e-12, -1.3942e-11, 2.2225e-12, 2.4961e-11,\n 4.8155e-11, 2.0726e-11, -7.4741e-12, 1.9472e-12, -1.4332e-11,\n 1.7106e-10, 4.4802e-11, -9.9054e-12, -1.1853e-11, -1.0418e-11,\n 5.5940e-11, 4.0428e-11, -1.1689e-11, 9.2201e-11, 8.5384e-11,\n 1.2707e-11, -6.0134e-12, -1.5310e-11, 9.1887e-12, 3.8137e-11,\n 1.8267e-11, 8.3962e-13, -1.3338e-11, 1.0469e-12, -9.1306e-12,\n -9.8947e-12, 1.1294e-10, 1.9808e-10, -9.4897e-12, 7.1527e-12,\n -9.1389e-12, 8.2960e-11, 3.6009e-11, 1.2530e-11, 2.8917e-11,\n -8.4794e-12, 1.0900e-12, -1.3299e-11, -5.7540e-12]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.1365e-13, 7.5120e-15, 2.2131e-15, 6.2134e-15, 5.6129e-13, 2.8392e-14,\n 2.2627e-13, 1.0645e-14, 2.2281e-14, 2.3470e-12, 3.0626e-14, 4.3900e-13,\n 1.5291e-14, 2.5427e-12, 1.7369e-14, 6.2139e-13, 6.3715e-13, 2.9465e-14,\n 1.0890e-12, 1.7974e-14, 3.9902e-15, 2.0965e-14, 8.4974e-15, 1.1252e-14,\n 6.5121e-15, 1.9369e-12, 3.5762e-15, 2.7193e-14, 1.4348e-13, 1.4362e-14,\n 2.4007e-12, 2.5006e-13, 2.5690e-14, 1.6522e-14, 1.9557e-14, 2.3893e-13,\n 9.6939e-13, 5.9703e-15, 1.4039e-12, 7.6935e-13, 1.1340e-12, 1.5592e-15,\n 1.9407e-14, 1.7312e-14, 1.9307e-12, 1.8775e-14, 1.7857e-14, 7.2067e-15,\n 2.3251e-14, 1.8592e-14, 2.5550e-14, 4.5938e-12, 2.0777e-12, 2.6582e-14,\n 7.5289e-13, 2.3418e-14, 2.0284e-12, 2.0209e-12, 1.0105e-12, 1.0492e-12,\n 2.6651e-14, 2.1120e-14, 7.4033e-14, 8.5166e-15],\n [5.1365e-13, 7.5120e-15, 2.2131e-15, 6.2134e-15, 5.6129e-13, 2.8392e-14,\n 2.2627e-13, 1.0645e-14, 2.2281e-14, 2.3470e-12, 3.0626e-14, 4.3900e-13,\n 1.5291e-14, 2.5427e-12, 1.7369e-14, 6.2139e-13, 6.3715e-13, 2.9465e-14,\n 1.0890e-12, 1.7974e-14, 3.9902e-15, 2.0965e-14, 8.4974e-15, 1.1252e-14,\n 6.5121e-15, 1.9369e-12, 3.5762e-15, 2.7193e-14, 1.4348e-13, 1.4362e-14,\n 2.4007e-12, 2.5006e-13, 2.5690e-14, 1.6522e-14, 1.9557e-14, 2.3893e-13,\n 9.6939e-13, 5.9703e-15, 1.4039e-12, 7.6935e-13, 1.1340e-12, 1.5592e-15,\n 1.9407e-14, 1.7312e-14, 1.9307e-12, 1.8775e-14, 1.7857e-14, 7.2067e-15,\n 2.3251e-14, 1.8592e-14, 2.5550e-14, 4.5938e-12, 2.0777e-12, 2.6582e-14,\n 7.5289e-13, 2.3418e-14, 2.0284e-12, 2.0209e-12, 1.0105e-12, 1.0492e-12,\n 2.6651e-14, 2.1120e-14, 7.4033e-14, 8.5166e-15]], device='cuda:0')" + }, + "26": { + "step": "tensor(8764.)", + "exp_avg": "tensor([-5.9961e-11, 5.9961e-11], device='cuda:0')", + "exp_avg_sq": "tensor([1.5877e-12, 1.5877e-12], device='cuda:0')" + }, + "27": { + "step": "tensor(8764.)", + "exp_avg": "tensor([[ 3.4912e-11, -3.1776e-11, 1.9558e-10, ..., 8.4191e-11,\n 3.0889e-10, -5.4967e-11],\n [-3.1876e-10, -2.5281e-11, -7.1510e-11, ..., -7.7194e-11,\n -1.6511e-10, -1.0525e-11],\n [ 5.6145e-10, -1.2327e-11, -3.9589e-10, ..., 2.6414e-10,\n 3.0654e-12, 1.7892e-10],\n ...,\n [-2.9709e-10, -1.4854e-11, 7.0377e-11, ..., 3.3180e-11,\n -1.5373e-10, -9.5836e-11],\n [ 9.3655e-11, 2.7307e-11, -3.0390e-10, ..., -7.7155e-11,\n 5.0068e-10, -2.6539e-11],\n [ 6.2642e-11, 2.3751e-11, -2.5275e-11, ..., 2.6832e-11,\n 4.4413e-11, -3.5701e-11]], device='cuda:0')", + "exp_avg_sq": "tensor([[7.9277e-16, 1.5445e-16, 1.0406e-15, ..., 6.0971e-16, 4.2093e-15,\n 8.3942e-16],\n [2.0931e-16, 2.6531e-16, 2.8306e-16, ..., 8.7740e-17, 9.9479e-16,\n 6.8611e-16],\n [9.2120e-16, 9.1630e-16, 8.5174e-16, ..., 3.2108e-16, 2.5915e-15,\n 1.9046e-15],\n ...,\n [3.1098e-15, 2.7048e-15, 3.6354e-15, ..., 1.2361e-15, 1.0775e-14,\n 7.7656e-15],\n [1.0534e-14, 6.3977e-15, 1.5349e-14, ..., 4.1787e-15, 3.6784e-14,\n 2.0495e-14],\n [1.8074e-17, 1.8073e-17, 9.9942e-18, ..., 2.7493e-18, 5.4252e-17,\n 6.9527e-18]], device='cuda:0')" + }, + "28": { + "step": "tensor(8764.)", + "exp_avg": "tensor([-1.4315e-08, -5.7246e-09, 1.4428e-09, -1.2347e-08, -1.3943e-09,\n 3.5697e-08, 2.6663e-09, 1.4668e-08, 4.3387e-09, -4.7615e-08,\n 1.6391e-09, -2.8372e-08, 1.4913e-08, -4.6225e-08, 7.1637e-09,\n -7.8975e-09, 1.5139e-09, -3.2716e-10, 5.2578e-08, 3.5784e-08,\n -6.0658e-09, -2.2456e-08, 9.7765e-09, -1.5410e-09, 5.5640e-10,\n -1.7786e-08, -1.6775e-08, -1.9877e-08, 4.4145e-08, 2.8521e-08,\n 7.0720e-09, 1.0102e-08, 1.5724e-08, -4.1125e-09, -6.4791e-09,\n -3.3667e-08, 2.5179e-08, 2.1748e-08, 6.8712e-09, -2.8174e-09,\n 4.6799e-09, 3.1031e-08, -3.0952e-08, -9.1811e-08, -4.0654e-09,\n 5.6488e-09, -9.4733e-09, 1.1203e-08, 1.7616e-08, 6.9721e-09,\n 7.0697e-09, -7.3168e-08, 2.2030e-08, -5.8922e-08, -3.6517e-10,\n -1.3723e-08, -8.1311e-09, -6.5903e-09, 9.3415e-08, 2.7418e-08,\n 8.8464e-10, 4.2851e-09, 2.0082e-08, -1.4364e-09], device='cuda:0')", + "exp_avg_sq": "tensor([7.3179e-11, 2.2196e-11, 6.7292e-11, 1.3895e-10, 6.9751e-12, 9.8164e-10,\n 3.7350e-11, 1.7044e-11, 4.8481e-12, 4.3989e-11, 1.5732e-11, 2.1994e-09,\n 6.7260e-11, 1.8448e-11, 1.2656e-11, 2.7001e-12, 4.9724e-12, 5.2855e-12,\n 1.0361e-09, 5.6961e-09, 9.2196e-12, 7.4091e-10, 3.0731e-11, 5.7720e-12,\n 4.2632e-12, 1.3255e-09, 1.6973e-09, 1.0975e-09, 3.1021e-09, 1.3253e-10,\n 1.3337e-11, 6.2666e-11, 4.1973e-11, 3.7681e-11, 8.4455e-13, 9.6227e-10,\n 1.0629e-09, 2.9550e-10, 1.6402e-11, 2.7367e-11, 6.7146e-13, 4.6427e-10,\n 1.2446e-09, 6.4919e-11, 2.0125e-12, 8.7928e-10, 3.0957e-11, 1.5757e-11,\n 3.0581e-09, 8.4949e-12, 4.2525e-10, 2.7308e-09, 2.0868e-10, 3.2084e-11,\n 3.3398e-12, 2.1194e-11, 6.9472e-10, 9.1935e-12, 6.5850e-11, 3.8176e-09,\n 9.3406e-11, 2.4229e-10, 8.9566e-10, 2.6488e-13], device='cuda:0')" + }, + "29": { + "step": "tensor(8764.)", + "exp_avg": "tensor([ 1.2786e-10, -4.9975e-11, -5.7131e-11, 1.2424e-10, 4.2262e-12,\n -2.5135e-10, 4.3004e-12, -7.6370e-11, 1.4775e-11, -3.5122e-09,\n -2.1572e-11, -4.6500e-09, -1.2879e-10, -4.5277e-09, -4.4530e-11,\n 2.9901e-11, 3.8858e-12, 1.9562e-12, -1.4890e-09, -1.6863e-10,\n -4.3955e-12, -2.4906e-09, -1.1723e-11, -1.8156e-13, 2.9980e-12,\n -3.3386e-09, 3.8605e-11, -2.7756e-09, -1.5058e-10, -1.8702e-09,\n -2.7087e-11, -3.8605e-11, -6.7395e-11, 1.6755e-11, 6.7953e-12,\n -2.3372e-09, -9.6899e-10, -1.6112e-10, -8.6406e-11, 3.2530e-12,\n -4.4420e-11, -1.4527e-10, -2.6416e-09, -6.2657e-09, 1.8354e-11,\n -1.5422e-09, 1.2079e-11, 1.3834e-11, -1.0818e-12, -8.6905e-10,\n -1.3679e-09, -6.6736e-09, -1.4461e-09, -5.5764e-09, -2.6320e-11,\n 1.3579e-10, -2.6844e-11, 1.6001e-11, 1.2512e-09, -5.1136e-11,\n 3.2357e-11, 6.8123e-12, -1.1556e-13, 2.7441e-12], device='cuda:0')", + "exp_avg_sq": "tensor([3.4441e-14, 3.4010e-14, 2.8647e-14, 1.3640e-15, 2.5837e-15, 3.6337e-14,\n 5.3474e-16, 1.1926e-16, 7.8975e-15, 2.4052e-14, 9.8632e-16, 3.8273e-12,\n 2.2381e-13, 2.3512e-14, 2.5211e-16, 1.4569e-16, 1.0218e-14, 1.7611e-16,\n 5.1305e-13, 2.0376e-12, 3.3352e-17, 1.7233e-13, 5.2431e-16, 1.3592e-17,\n 1.4432e-14, 5.5324e-13, 2.1238e-13, 9.9599e-13, 7.7750e-13, 2.3555e-14,\n 6.6805e-14, 9.2054e-16, 2.6725e-13, 9.8436e-17, 4.2054e-18, 1.8002e-12,\n 1.2995e-12, 6.1620e-15, 8.5117e-16, 1.9324e-16, 9.0248e-16, 2.4141e-14,\n 1.9429e-12, 2.9959e-14, 1.2521e-14, 7.1585e-13, 4.8440e-16, 7.9943e-15,\n 7.4605e-13, 5.9142e-14, 3.4534e-13, 2.5564e-12, 3.6220e-14, 7.1989e-15,\n 2.4118e-16, 6.7860e-15, 7.3385e-14, 1.0142e-16, 2.8881e-14, 2.4658e-12,\n 1.0614e-15, 1.0837e-14, 3.4103e-13, 2.6863e-19], device='cuda:0')" + }, + "30": { + "step": "tensor(8764.)", + "exp_avg": "tensor([-1.3778e-10, 9.8850e-11, 1.6557e-10, -1.0340e-10, -1.0380e-11,\n 5.0763e-10, -2.7027e-13, 2.1091e-10, -5.7722e-11, -2.3624e-09,\n 5.0787e-11, -3.2579e-09, -2.7387e-10, -2.8455e-09, 9.1225e-11,\n 1.3865e-11, 1.5844e-12, 1.2163e-11, -1.0493e-09, 4.1814e-10,\n -8.1765e-11, -2.3013e-09, 3.2474e-11, -1.9942e-12, -3.4934e-12,\n -2.6575e-09, -2.1369e-10, -2.1985e-09, 8.5821e-10, -9.5774e-10,\n -4.7204e-11, 8.1977e-11, -1.8223e-10, 6.5198e-11, -8.4922e-12,\n -2.3549e-09, -1.1729e-09, 2.4549e-10, 1.6519e-10, -2.9119e-12,\n 7.9877e-11, 3.2466e-10, -2.5589e-09, -3.7237e-09, -1.4803e-10,\n -1.6451e-09, -7.5874e-10, -8.5998e-11, -3.4466e-11, -1.0510e-09,\n -1.4199e-09, -3.9152e-09, -1.1315e-09, -3.2333e-09, 6.6677e-11,\n -7.3104e-12, -1.5696e-12, 1.3041e-11, 2.6382e-10, 2.1390e-10,\n 2.2255e-11, 3.4255e-11, 1.4084e-10, -6.8649e-12], device='cuda:0')", + "exp_avg_sq": "tensor([1.2690e-14, 5.6032e-14, 1.2067e-14, 6.7625e-14, 1.0149e-15, 4.1876e-13,\n 1.7709e-15, 7.0629e-15, 2.2144e-14, 5.6785e-14, 3.7559e-16, 1.6525e-12,\n 1.3765e-13, 2.3080e-14, 1.0295e-16, 1.9758e-17, 1.2375e-14, 2.2060e-15,\n 7.6487e-13, 2.4110e-12, 1.1408e-15, 5.3299e-13, 4.1260e-15, 5.2120e-15,\n 5.1462e-15, 9.3165e-13, 6.9730e-13, 8.5152e-13, 1.2658e-12, 1.0141e-13,\n 5.5520e-14, 1.5251e-14, 1.2128e-13, 1.4380e-14, 8.3035e-18, 8.4436e-13,\n 8.6386e-13, 1.1563e-13, 9.5487e-15, 6.2621e-16, 2.7918e-16, 1.7781e-13,\n 1.0104e-12, 5.5916e-14, 5.0523e-15, 7.0488e-13, 2.4071e-14, 7.2350e-15,\n 1.2436e-12, 2.4318e-14, 3.9005e-13, 1.8675e-12, 1.7593e-13, 2.7764e-14,\n 4.5184e-17, 1.3908e-14, 2.4878e-13, 3.9575e-17, 7.3517e-14, 1.4228e-12,\n 3.3549e-14, 8.1357e-14, 2.4231e-13, 5.3293e-18], device='cuda:0')" + }, + "31": { + "step": "tensor(8764.)", + "exp_avg": "tensor([[ 2.3827e-09, 2.5260e-09, 1.1806e-09, 2.6211e-09, 1.6261e-09,\n 1.6560e-09, 5.8317e-10, 2.5305e-09, 1.4728e-09, -1.1437e-08,\n 2.4027e-09, -2.4815e-08, -7.7018e-10, -2.2536e-08, 9.3154e-10,\n 1.4015e-09, 2.6457e-09, 1.4012e-09, -9.3337e-09, 1.0700e-09,\n 1.6680e-09, -1.2755e-08, 2.2276e-09, 1.6429e-09, 1.5944e-09,\n -1.7714e-08, 1.5933e-09, -1.7662e-08, 1.0692e-09, -7.5682e-09,\n 1.1996e-09, 2.9599e-09, -5.1627e-10, 1.7038e-09, 1.5548e-09,\n -1.5835e-08, -6.0217e-09, 1.8406e-09, 2.3758e-09, 1.1263e-09,\n 2.2745e-09, 2.7279e-09, -1.6598e-08, -2.6750e-08, 2.6381e-09,\n -9.9251e-09, 3.5115e-11, 1.5500e-09, 2.8095e-10, -4.0067e-09,\n -1.1228e-08, -3.2863e-08, -5.8932e-09, -3.4842e-08, 1.0260e-09,\n 2.1214e-09, 3.6086e-09, 1.2163e-09, 4.8190e-09, 1.3634e-09,\n 2.7499e-09, 1.6681e-09, 1.9162e-09, 2.0742e-09],\n [-2.4142e-09, -2.5428e-09, -1.2231e-09, -2.6667e-09, -1.6718e-09,\n -1.6865e-09, -6.0573e-10, -2.5705e-09, -1.4926e-09, 1.2049e-08,\n -2.4307e-09, 2.5121e-08, 7.4927e-10, 2.3162e-08, -9.7231e-10,\n -1.4093e-09, -2.6709e-09, -1.4173e-09, 9.5594e-09, -1.0860e-09,\n -1.7304e-09, 1.2965e-08, -2.2714e-09, -1.6774e-09, -1.6308e-09,\n 1.7929e-08, -1.6486e-09, 1.7930e-08, -1.1244e-09, 7.7354e-09,\n -1.2330e-09, -3.0061e-09, 4.8634e-10, -1.7358e-09, -1.5946e-09,\n 1.6094e-08, 6.2725e-09, -1.9020e-09, -2.3945e-09, -1.1484e-09,\n -2.3258e-09, -2.7712e-09, 1.6806e-08, 2.7134e-08, -2.6747e-09,\n 1.0142e-08, -3.5506e-11, -1.5899e-09, -2.9219e-10, 4.2008e-09,\n 1.1433e-08, 3.3080e-08, 6.2389e-09, 3.5518e-08, -1.0545e-09,\n -2.1779e-09, -3.6443e-09, -1.2460e-09, -4.1418e-09, -1.3450e-09,\n -2.7622e-09, -1.6974e-09, -1.9306e-09, -2.1119e-09]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.0504e-12, 2.5631e-11, 1.1623e-12, 6.9562e-13, 9.2912e-13, 6.5915e-13,\n 7.6472e-12, 6.7509e-13, 1.1342e-11, 3.2465e-13, 1.1737e-12, 1.5801e-10,\n 1.0664e-10, 1.7525e-12, 1.2391e-12, 1.5864e-12, 4.2112e-11, 1.2386e-12,\n 3.0114e-11, 4.5460e-11, 1.3564e-13, 8.5571e-12, 1.2775e-12, 6.0717e-13,\n 6.0077e-13, 2.5147e-11, 8.5187e-12, 6.6888e-11, 2.7401e-11, 7.1407e-13,\n 7.1837e-11, 4.2587e-13, 1.5592e-10, 5.4969e-13, 1.5777e-12, 1.4647e-10,\n 9.1846e-11, 2.6079e-13, 1.1424e-12, 7.0738e-12, 1.4348e-12, 1.7668e-12,\n 1.2955e-10, 1.4817e-12, 8.5072e-13, 5.4730e-11, 5.4482e-13, 6.7190e-11,\n 2.6189e-11, 1.0143e-12, 4.5159e-11, 8.0393e-11, 1.3644e-12, 1.2614e-12,\n 1.5805e-12, 1.4029e-12, 7.8682e-12, 1.3357e-12, 3.0292e-13, 1.2355e-10,\n 1.3874e-13, 1.6965e-12, 8.6835e-11, 1.2975e-12],\n [1.0504e-12, 2.5631e-11, 1.1623e-12, 6.9562e-13, 9.2912e-13, 6.5915e-13,\n 7.6472e-12, 6.7509e-13, 1.1342e-11, 3.2461e-13, 1.1737e-12, 1.5801e-10,\n 1.0664e-10, 1.7525e-12, 1.2391e-12, 1.5864e-12, 4.2112e-11, 1.2386e-12,\n 3.0114e-11, 4.5460e-11, 1.3564e-13, 8.5571e-12, 1.2775e-12, 6.0717e-13,\n 6.0077e-13, 2.5147e-11, 8.5187e-12, 6.6888e-11, 2.7401e-11, 7.1404e-13,\n 7.1837e-11, 4.2587e-13, 1.5592e-10, 5.4969e-13, 1.5777e-12, 1.4647e-10,\n 9.1846e-11, 2.6079e-13, 1.1424e-12, 7.0738e-12, 1.4348e-12, 1.7668e-12,\n 1.2955e-10, 1.4817e-12, 8.5072e-13, 5.4730e-11, 5.4482e-13, 6.7190e-11,\n 2.6189e-11, 1.0143e-12, 4.5159e-11, 8.0393e-11, 1.3644e-12, 1.2614e-12,\n 1.5805e-12, 1.4029e-12, 7.8682e-12, 1.3357e-12, 3.0290e-13, 1.2355e-10,\n 1.3874e-13, 1.6965e-12, 8.6835e-11, 1.2975e-12]], device='cuda:0')" + }, + "32": { + "step": "tensor(8764.)", + "exp_avg": "tensor([-1.2026e-08, 1.2247e-08], device='cuda:0')", + "exp_avg_sq": "tensor([8.5228e-11, 8.5228e-11], device='cuda:0')" + }, + "33": { + "step": "tensor(8764.)", + "exp_avg": "tensor([[-1.1699e-13, 9.1494e-14, 8.5886e-15, ..., 4.1758e-14,\n -2.4038e-13, -3.7974e-14],\n [ 1.9940e-13, -4.4560e-14, 1.8590e-13, ..., -1.2503e-13,\n 1.1227e-13, 2.9915e-14],\n [-1.5763e-13, 4.1818e-14, 8.1055e-14, ..., 1.8807e-13,\n -9.2511e-14, -1.2133e-13],\n ...,\n [-3.6376e-13, 1.9122e-13, 1.3272e-13, ..., 7.8413e-13,\n -3.0367e-13, -2.1861e-13],\n [ 1.8504e-13, 7.5209e-15, 5.0910e-14, ..., 2.7759e-14,\n -2.4180e-14, -3.2980e-13],\n [-9.0836e-13, 4.0465e-13, 1.2400e-13, ..., 1.6736e-13,\n 2.3133e-13, 1.4392e-12]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.3331e-16, 2.1263e-16, 1.7863e-16, ..., 1.5303e-16, 5.2721e-16,\n 4.8547e-16],\n [2.3069e-16, 2.1592e-16, 2.5544e-16, ..., 9.9202e-17, 6.4267e-16,\n 5.0658e-16],\n [3.9286e-18, 1.6253e-18, 3.2895e-18, ..., 2.0451e-18, 7.1682e-18,\n 2.3229e-18],\n ...,\n [8.6847e-17, 1.5298e-17, 5.6239e-17, ..., 3.2482e-17, 3.2822e-16,\n 1.1137e-16],\n [1.4286e-18, 2.8500e-19, 1.1442e-18, ..., 1.1998e-18, 2.4360e-18,\n 1.9489e-18],\n [2.5268e-15, 1.3350e-15, 1.9505e-15, ..., 1.0535e-15, 6.0612e-15,\n 3.5507e-15]], device='cuda:0')" + }, + "34": { + "step": "tensor(8764.)", + "exp_avg": "tensor([-5.2324e-11, 1.0896e-10, -3.0094e-11, -1.5506e-11, -3.4136e-11,\n 8.7772e-12, -3.3345e-10, -1.7962e-11, -4.3791e-11, -3.8314e-10,\n -2.2906e-10, -2.3198e-11, -6.3751e-12, 9.0537e-12, 8.9156e-12,\n -2.2309e-11, 2.1011e-12, -2.3992e-11, 3.9601e-10, 2.3768e-11,\n 7.1576e-11, 5.4731e-11, 2.7620e-12, -1.1591e-11, 2.3231e-11,\n 1.2231e-10, -2.4531e-11, -2.5387e-10, 3.5123e-10, -1.9634e-11,\n 5.0048e-11, -1.6226e-10, -9.7818e-12, 4.1827e-11, 2.1948e-10,\n 4.0415e-11, 1.4929e-10, -6.3708e-11, -3.6687e-10, 5.2670e-12,\n 4.9803e-11, -4.2624e-11, -3.9625e-10, 4.0256e-11, -7.2874e-11,\n 5.0118e-11, 6.5885e-12, 1.2318e-10, 4.0689e-11, -1.1615e-10,\n 3.4804e-11, 7.2431e-11, -5.9553e-11, -8.4755e-13, 5.3311e-11,\n 9.1438e-11, 1.0573e-10, 4.7607e-10, 3.7887e-11, 3.8853e-12,\n 1.2100e-10, -2.7811e-10, -2.2619e-11, 1.1967e-10], device='cuda:0')", + "exp_avg_sq": "tensor([1.6620e-11, 1.5157e-11, 1.2141e-13, 2.2516e-15, 1.7360e-13, 1.7068e-10,\n 1.0351e-11, 6.9964e-12, 1.1199e-13, 1.2171e-11, 6.4979e-11, 6.4106e-11,\n 6.3597e-14, 1.1869e-14, 1.2792e-13, 2.5490e-14, 6.9447e-14, 1.2417e-14,\n 1.4936e-10, 1.0383e-12, 3.3222e-11, 5.0335e-11, 1.5047e-13, 5.7048e-15,\n 7.5798e-14, 1.6455e-12, 3.1868e-13, 9.3137e-12, 1.0115e-10, 1.3100e-13,\n 2.2164e-12, 4.9546e-13, 1.3454e-11, 6.1098e-12, 8.5969e-11, 9.6647e-12,\n 2.4154e-11, 6.4590e-13, 6.4858e-13, 1.9802e-12, 1.3753e-11, 3.2337e-11,\n 2.0112e-11, 2.4451e-13, 3.5617e-11, 5.0861e-12, 3.1747e-13, 2.5914e-12,\n 3.5481e-11, 1.2604e-11, 1.4020e-12, 5.0177e-11, 7.4644e-13, 7.5217e-13,\n 4.4659e-13, 9.1779e-13, 7.6120e-12, 1.9557e-10, 4.4912e-13, 9.7395e-12,\n 9.8860e-12, 7.1724e-12, 5.7664e-14, 1.3342e-10], device='cuda:0')" + }, + "35": { + "step": "tensor(8764.)", + "exp_avg": "tensor([-6.1303e-12, -9.7405e-14, -8.3831e-14, 1.1041e-13, -5.5592e-13,\n -1.2955e-11, -9.1612e-12, 2.9771e-13, 4.0488e-13, -1.0079e-11,\n -8.6208e-12, -1.0737e-11, -7.5681e-15, 3.7239e-14, 1.3150e-14,\n -1.5005e-13, 7.2074e-15, 1.9209e-13, 3.0924e-12, 9.5485e-14,\n -2.7030e-13, -7.8962e-12, 9.9936e-15, 6.8698e-14, -1.7174e-14,\n -1.3769e-12, -1.1856e-12, -2.9860e-12, 3.0744e-12, 1.3264e-13,\n -5.8694e-15, -2.7225e-12, 2.9038e-13, -2.5454e-12, 3.9701e-13,\n -9.2592e-14, 1.8586e-13, -1.1289e-12, -1.0139e-11, -6.9083e-13,\n -3.1716e-14, -6.0466e-12, -7.7616e-12, 6.3386e-16, -8.0351e-12,\n -8.6805e-14, -1.1748e-14, 6.5759e-14, -1.7856e-13, -4.5945e-12,\n -5.1263e-13, -4.5103e-13, 6.1080e-13, 7.4407e-14, -1.8023e-14,\n -9.1778e-13, 1.8040e-14, 4.7800e-12, -5.0608e-14, -2.0386e-12,\n 9.8326e-14, -3.7816e-12, 4.8165e-14, -4.7229e-12], device='cuda:0')", + "exp_avg_sq": "tensor([3.0147e-15, 1.9354e-15, 7.1294e-17, 7.2846e-18, 7.6449e-16, 2.2850e-13,\n 6.7840e-16, 1.1004e-16, 4.9725e-16, 5.6040e-16, 1.2280e-14, 6.5660e-14,\n 1.9940e-17, 1.8636e-16, 1.4949e-18, 1.2463e-16, 4.8848e-19, 8.3475e-18,\n 7.5761e-14, 1.2696e-17, 1.4894e-15, 5.3166e-14, 5.2620e-18, 4.3951e-19,\n 3.6872e-18, 3.3518e-15, 1.5451e-15, 4.2498e-16, 5.6380e-14, 1.7796e-16,\n 2.9426e-16, 1.4712e-16, 2.7299e-16, 1.0427e-15, 4.0655e-14, 7.0920e-16,\n 4.7830e-15, 2.3999e-15, 2.2907e-16, 1.5893e-16, 2.1944e-16, 6.1654e-15,\n 1.1614e-15, 3.5245e-17, 1.8197e-14, 7.0933e-16, 2.8058e-18, 1.3245e-15,\n 2.5499e-15, 9.1033e-16, 2.2876e-17, 1.9407e-15, 3.2177e-16, 1.0614e-17,\n 2.8102e-16, 1.8592e-15, 2.3060e-15, 2.1268e-13, 1.1773e-16, 2.4053e-15,\n 5.2318e-15, 2.1926e-16, 1.7763e-19, 1.2772e-13], device='cuda:0')" + }, + "36": { + "step": "tensor(8764.)", + "exp_avg": "tensor([-4.7366e-12, 8.9170e-13, 5.2118e-14, -1.4931e-13, 5.3808e-13,\n -5.6109e-12, -9.2324e-12, -1.9753e-13, -4.2589e-13, -1.0023e-11,\n -7.9363e-12, -5.5379e-12, -1.3732e-14, -1.9362e-13, 7.7598e-14,\n 1.2882e-13, 8.8044e-15, -1.7297e-13, 5.0410e-12, -2.9500e-13,\n 9.4143e-13, -4.0636e-12, -1.0508e-14, -7.2069e-14, 8.0757e-14,\n -1.2616e-12, 7.6962e-13, -6.6696e-12, 4.2198e-12, -2.3087e-13,\n 1.3458e-13, -5.2907e-12, -9.2544e-14, -2.3782e-12, 2.4671e-12,\n 3.3471e-13, 1.3979e-12, 8.8590e-13, -1.0002e-11, -1.8146e-12,\n 7.7635e-13, -4.4600e-12, -9.8731e-12, -2.0976e-14, -5.5785e-12,\n 1.9796e-13, -3.6589e-14, 1.2080e-13, 3.8412e-13, -5.1482e-12,\n -9.0426e-13, 9.2723e-13, -5.8484e-13, 2.1564e-14, -4.7355e-13,\n -1.2903e-12, 7.3965e-13, 6.5842e-12, -6.4125e-13, -2.8148e-12,\n 7.1848e-13, -7.3498e-12, -4.2402e-14, -2.9605e-12], device='cuda:0')", + "exp_avg_sq": "tensor([9.1874e-15, 7.3950e-15, 2.5812e-17, 2.3728e-17, 2.6934e-16, 8.9042e-14,\n 5.9398e-15, 3.7181e-15, 1.4339e-17, 7.3724e-15, 3.4162e-14, 3.4444e-14,\n 1.1506e-17, 4.9993e-17, 1.4805e-16, 4.2949e-17, 1.7669e-18, 4.7814e-17,\n 7.0206e-14, 4.7475e-16, 1.6483e-14, 2.7412e-14, 2.6446e-17, 3.1553e-17,\n 5.9741e-17, 1.1707e-15, 4.8338e-16, 5.2114e-15, 4.6914e-14, 1.6122e-17,\n 1.0631e-15, 3.4185e-16, 6.9603e-15, 3.4984e-15, 4.0005e-14, 4.9392e-15,\n 1.1388e-14, 7.4022e-16, 6.6805e-16, 1.1581e-15, 7.1591e-15, 1.7239e-14,\n 1.1155e-14, 6.6086e-17, 1.9335e-14, 2.4170e-15, 1.0015e-16, 7.8796e-16,\n 1.7365e-14, 6.9987e-15, 6.8192e-16, 2.4542e-14, 1.2608e-16, 5.5345e-16,\n 2.8450e-16, 6.1028e-16, 3.3990e-15, 9.0468e-14, 2.8771e-16, 5.5237e-15,\n 4.1070e-15, 4.0280e-15, 1.9452e-19, 6.9439e-14], device='cuda:0')" + }, + "37": { + "step": "tensor(8764.)", + "exp_avg": "tensor([[ 5.0553e-11, -2.5555e-12, -6.0922e-12, -6.9807e-12, -5.1451e-12,\n 7.1748e-11, 4.2676e-11, -6.2298e-12, -5.7274e-12, 3.7384e-11,\n 4.4061e-11, 8.4854e-11, -7.4444e-12, -7.0037e-12, -6.6689e-12,\n -4.8428e-12, -5.6050e-12, -8.0850e-12, 1.7721e-11, -4.7607e-12,\n -4.2842e-12, 6.9980e-11, -6.8871e-12, -6.9012e-12, -6.9894e-12,\n 3.5196e-11, -5.1357e-12, 1.3032e-11, 2.1135e-11, -4.7768e-12,\n -3.7299e-12, 1.9797e-11, -7.1520e-12, 2.9595e-11, 3.1418e-12,\n -5.4369e-12, 1.0479e-12, -2.5590e-12, 5.4563e-11, 8.3917e-12,\n -6.8220e-12, 4.2574e-11, 3.1490e-11, -4.0206e-12, 6.8959e-11,\n -4.9477e-12, -4.7002e-12, 1.0980e-11, -6.8555e-12, 2.6650e-11,\n 6.2579e-12, -7.7049e-12, -7.7380e-12, -6.5357e-12, -1.1308e-14,\n 2.7765e-11, 1.5976e-13, 2.4692e-11, 1.1710e-12, 2.2995e-11,\n 3.0033e-12, 2.1807e-11, -6.9236e-12, 2.6552e-11],\n [-5.0553e-11, 2.5555e-12, 6.0922e-12, 6.9807e-12, 5.1451e-12,\n -7.1748e-11, -4.2676e-11, 6.2298e-12, 5.7274e-12, -3.7384e-11,\n -4.4062e-11, -8.4854e-11, 7.4445e-12, 7.0037e-12, 6.6689e-12,\n 4.8427e-12, 5.6050e-12, 8.0850e-12, -1.7722e-11, 4.7607e-12,\n 4.2842e-12, -6.9980e-11, 6.8871e-12, 6.9012e-12, 6.9894e-12,\n -3.5196e-11, 5.1357e-12, -1.3033e-11, -2.1135e-11, 4.7768e-12,\n 3.7299e-12, -1.9798e-11, 7.1520e-12, -2.9595e-11, -3.1418e-12,\n 5.4369e-12, -1.0479e-12, 2.5590e-12, -5.4564e-11, -8.3917e-12,\n 6.8220e-12, -4.2574e-11, -3.1490e-11, 4.0206e-12, -6.8959e-11,\n 4.9477e-12, 4.7002e-12, -1.0980e-11, 6.8555e-12, -2.6650e-11,\n -6.2580e-12, 7.7050e-12, 7.7380e-12, 6.5358e-12, 1.1326e-14,\n -2.7765e-11, -1.5976e-13, -2.4692e-11, -1.1710e-12, -2.2995e-11,\n -3.0033e-12, -2.1807e-11, 6.9236e-12, -2.6552e-11]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.5317e-13, 2.2572e-13, 2.1791e-14, 4.6997e-14, 1.9580e-14, 6.7230e-12,\n 7.9022e-15, 4.3862e-15, 4.9136e-14, 2.1622e-14, 3.4265e-13, 4.5589e-12,\n 5.6127e-14, 4.5594e-14, 2.2484e-14, 4.4479e-14, 4.3712e-14, 4.9951e-14,\n 2.1981e-12, 1.7314e-14, 2.8651e-14, 4.7617e-12, 4.7737e-14, 3.9324e-14,\n 1.3797e-14, 7.9666e-12, 3.7107e-14, 4.3312e-15, 2.5365e-12, 5.1654e-14,\n 2.4091e-13, 4.4019e-14, 3.3629e-15, 2.1806e-13, 1.9777e-12, 5.1319e-14,\n 5.0619e-13, 3.3629e-14, 5.0780e-14, 3.1456e-14, 7.4265e-15, 3.1265e-13,\n 9.0882e-15, 6.3755e-13, 1.6960e-12, 2.7156e-13, 2.8278e-14, 3.8666e-12,\n 7.2922e-14, 2.5250e-14, 1.8518e-14, 2.2823e-14, 3.8858e-14, 1.8827e-14,\n 1.9607e-12, 8.8683e-12, 1.0533e-12, 6.1551e-12, 4.7579e-13, 4.8027e-13,\n 2.6661e-12, 1.3970e-14, 4.8950e-14, 4.5130e-12],\n [2.5317e-13, 2.2572e-13, 2.1791e-14, 4.6997e-14, 1.9580e-14, 6.7230e-12,\n 7.9022e-15, 4.3862e-15, 4.9136e-14, 2.1622e-14, 3.4265e-13, 4.5589e-12,\n 5.6127e-14, 4.5594e-14, 2.2484e-14, 4.4479e-14, 4.3712e-14, 4.9951e-14,\n 2.1981e-12, 1.7314e-14, 2.8651e-14, 4.7617e-12, 4.7737e-14, 3.9324e-14,\n 1.3797e-14, 7.9666e-12, 3.7107e-14, 4.3312e-15, 2.5365e-12, 5.1654e-14,\n 2.4091e-13, 4.4019e-14, 3.3629e-15, 2.1806e-13, 1.9777e-12, 5.1319e-14,\n 5.0619e-13, 3.3629e-14, 5.0780e-14, 3.1456e-14, 7.4265e-15, 3.1265e-13,\n 9.0882e-15, 6.3755e-13, 1.6960e-12, 2.7156e-13, 2.8278e-14, 3.8666e-12,\n 7.2922e-14, 2.5250e-14, 1.8518e-14, 2.2823e-14, 3.8858e-14, 1.8827e-14,\n 1.9607e-12, 8.8683e-12, 1.0533e-12, 6.1551e-12, 4.7579e-13, 4.8027e-13,\n 2.6661e-12, 1.3970e-14, 4.8950e-14, 4.5130e-12]], device='cuda:0')" + }, + "38": { + "step": "tensor(8764.)", + "exp_avg": "tensor([ 4.0600e-11, -4.0600e-11], device='cuda:0')", + "exp_avg_sq": "tensor([3.0362e-12, 3.0362e-12], device='cuda:0')" + }, + "39": { + "step": "tensor(8764.)", + "exp_avg": "tensor([[-4.0875e-15, 6.8878e-16, 9.7333e-16, ..., 1.3092e-15,\n -9.1692e-16, 7.6217e-16],\n [-2.3437e-16, 4.7882e-16, 1.3020e-15, ..., 4.2861e-15,\n -1.0245e-15, 9.1226e-16],\n [ 5.3774e-15, -1.2691e-15, -3.1091e-15, ..., -8.1895e-15,\n 3.1548e-16, 4.0685e-16],\n ...,\n [ 2.8938e-16, 8.4700e-17, -3.8147e-16, ..., -2.8094e-16,\n -8.2626e-17, 1.9735e-16],\n [ 2.2012e-15, -1.2014e-16, 1.7844e-17, ..., 1.8145e-15,\n -4.0284e-16, 8.2942e-18],\n [-1.3712e-15, -1.6323e-15, -1.9612e-15, ..., -4.4836e-15,\n 2.0754e-15, 3.3726e-16]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.0169e-16, 6.0138e-17, 1.8351e-16, ..., 5.9667e-17, 5.2607e-16,\n 1.8369e-16],\n [9.0738e-16, 5.5305e-16, 5.5553e-16, ..., 5.2135e-16, 2.5625e-15,\n 1.4459e-15],\n [3.8312e-15, 1.4221e-15, 3.2327e-15, ..., 1.5421e-15, 1.0855e-14,\n 4.8640e-15],\n ...,\n [1.8190e-19, 9.2297e-20, 2.5630e-19, ..., 2.1529e-19, 1.8438e-19,\n 4.5557e-20],\n [3.2499e-19, 6.7821e-18, 7.4863e-18, ..., 2.4119e-18, 9.3557e-18,\n 1.2910e-17],\n [2.9518e-16, 2.7230e-16, 6.2697e-17, ..., 1.7655e-16, 1.2393e-16,\n 2.4741e-17]], device='cuda:0')" + }, + "40": { + "step": "tensor(8764.)", + "exp_avg": "tensor([-7.4976e-13, -1.1810e-12, 3.7864e-12, 1.9247e-12, 6.1312e-13,\n 2.3843e-12, -1.2857e-14, -1.0258e-12, 2.8916e-12, -1.7676e-12,\n 1.1569e-12, 2.6435e-13, -5.6381e-12, -3.6827e-12, -2.0570e-14,\n -1.1841e-12, -2.7805e-13, 5.3445e-13, 1.2256e-12, 8.5073e-14,\n 3.7136e-13, -9.6855e-13, 7.0528e-12, -1.1324e-12, 2.6051e-13,\n 5.7269e-13, 3.7625e-14, 2.0670e-12, -1.2691e-12, -3.6407e-12,\n 6.4437e-13, 6.4249e-12, -5.5247e-12, 5.1139e-12, 1.0132e-12,\n 5.3553e-13, 1.0741e-12, -2.1174e-13, -6.3559e-12, 1.6399e-13,\n 3.2294e-12, 2.4719e-13, -4.5786e-13, -5.6721e-13, 2.1756e-12,\n -7.7293e-12, 4.6791e-13, 8.3442e-13, 8.5664e-13, -5.3997e-12,\n 6.8899e-13, 4.6610e-13, 6.9200e-13, -7.1874e-14, -2.8617e-12,\n 5.7035e-13, 1.4781e-13, 3.9504e-13, -1.8443e-12, -1.4683e-13,\n 1.7017e-13, -2.4988e-13, 2.8556e-13, 2.5470e-12], device='cuda:0')", + "exp_avg_sq": "tensor([9.1737e-12, 5.3213e-11, 2.2945e-10, 9.6862e-11, 2.1110e-13, 1.3927e-10,\n 5.1971e-12, 3.5217e-11, 9.4993e-11, 3.9915e-11, 3.6584e-11, 3.3187e-13,\n 3.9460e-10, 5.2441e-10, 3.6116e-12, 1.1739e-11, 9.9308e-11, 6.2342e-12,\n 2.1407e-11, 9.0783e-13, 5.1877e-12, 4.9702e-11, 7.1831e-10, 2.6220e-11,\n 8.6959e-13, 1.3860e-13, 5.2092e-13, 5.4870e-11, 2.3732e-11, 1.5073e-10,\n 3.7206e-13, 1.4145e-10, 2.1977e-10, 3.0974e-10, 2.9360e-12, 1.6151e-12,\n 2.3485e-11, 5.6876e-14, 3.7320e-10, 1.8426e-12, 6.3463e-11, 1.4287e-13,\n 3.7144e-12, 2.5644e-12, 2.6742e-11, 5.1446e-10, 1.5958e-13, 5.3427e-13,\n 3.3573e-12, 3.8798e-10, 4.1764e-11, 7.5609e-13, 9.6784e-14, 3.4244e-14,\n 1.4266e-10, 2.3622e-13, 1.4389e-12, 7.2656e-14, 2.2182e-11, 1.9090e-14,\n 1.1138e-14, 1.7577e-15, 2.1020e-13, 7.7496e-12], device='cuda:0')" + }, + "41": { + "step": "tensor(8764.)", + "exp_avg": "tensor([-5.9847e-16, 1.6466e-15, 1.5223e-13, 1.4679e-13, -6.0949e-15,\n 1.5110e-13, 2.4677e-15, 3.4373e-15, 8.3107e-14, -7.4985e-15,\n -2.2547e-14, 1.8572e-14, -7.0918e-14, -7.2847e-14, 1.6201e-15,\n -6.9323e-15, 6.8260e-15, 8.2557e-15, 3.8357e-14, 4.6112e-15,\n -3.8468e-15, 4.3867e-15, 3.5929e-13, 3.4188e-15, -3.6182e-16,\n -1.5973e-15, 4.8957e-16, 8.8800e-15, -4.8839e-15, -2.5178e-14,\n -6.7737e-15, 4.8552e-14, -1.0900e-13, 2.2076e-13, -1.1481e-14,\n 2.8153e-15, 5.1742e-14, -4.4422e-17, -1.2394e-13, -4.0518e-16,\n 5.5930e-14, -2.4868e-15, 3.2023e-15, 4.0310e-16, 8.0664e-14,\n -1.9446e-13, -2.2987e-15, -2.6638e-15, 6.5361e-15, -2.9640e-14,\n 7.4363e-14, -3.2362e-15, 4.3018e-15, 1.3301e-15, -7.2140e-15,\n 7.2453e-15, 1.7421e-14, -2.4327e-15, -1.9310e-14, -1.3323e-16,\n -4.8945e-16, 1.4604e-15, -1.3263e-15, -4.5588e-15], device='cuda:0')", + "exp_avg_sq": "tensor([1.3288e-15, 5.0969e-15, 1.0744e-13, 1.6072e-13, 1.1721e-15, 1.4146e-13,\n 8.4563e-18, 3.0481e-15, 3.3854e-14, 5.9942e-15, 4.0069e-16, 9.6670e-16,\n 3.4391e-13, 2.1692e-13, 8.6849e-18, 5.2914e-15, 6.9805e-14, 7.3459e-16,\n 8.8016e-15, 3.8468e-17, 4.5746e-17, 6.1099e-15, 7.7860e-13, 1.2070e-15,\n 8.2409e-16, 2.8513e-18, 5.5696e-18, 3.4625e-15, 3.9756e-18, 2.9250e-14,\n 2.7397e-17, 1.2746e-14, 1.3604e-13, 2.2598e-13, 3.6093e-18, 1.2446e-15,\n 1.0016e-14, 2.2563e-18, 2.0258e-13, 6.9942e-18, 1.2782e-14, 3.5612e-17,\n 3.8781e-17, 6.2079e-17, 1.1142e-14, 6.6961e-13, 2.6439e-17, 1.2754e-15,\n 6.9571e-17, 8.5180e-14, 4.2201e-14, 4.0870e-16, 3.1650e-16, 2.9593e-17,\n 2.1711e-14, 7.0808e-16, 2.9439e-15, 9.7118e-17, 1.8849e-14, 9.8124e-20,\n 5.1978e-17, 3.2477e-20, 4.4891e-16, 4.0876e-17], device='cuda:0')" + }, + "42": { + "step": "tensor(8764.)", + "exp_avg": "tensor([-9.2879e-15, -2.0965e-14, 1.0612e-13, 7.3035e-14, 7.7779e-15,\n 8.0960e-14, -2.2834e-15, -1.7609e-14, 8.0732e-14, -2.8876e-14,\n 2.4992e-14, -1.0494e-14, -1.0660e-13, -5.6701e-14, -2.8506e-15,\n -1.1272e-14, 1.2477e-14, 2.3011e-14, 4.3137e-14, 1.2627e-14,\n 1.0442e-14, -1.7112e-14, 1.8055e-13, -2.0480e-14, 2.3100e-15,\n 1.2688e-15, -1.3691e-15, 5.2302e-14, -2.2325e-14, -6.6186e-14,\n 6.4431e-15, 1.4445e-13, -9.7677e-14, 1.3603e-13, 1.0866e-14,\n -2.6086e-15, 4.4337e-14, 2.2573e-15, -1.1731e-13, 1.2209e-16,\n 8.2336e-14, 5.6043e-15, -9.0503e-15, -7.4821e-15, 6.8627e-14,\n -1.3951e-13, 1.8138e-15, 1.5985e-15, 2.7396e-14, -1.0476e-13,\n 4.3563e-14, 3.6553e-15, -2.3669e-15, -2.1860e-15, -5.2874e-14,\n -4.2774e-15, -1.1230e-14, 2.9322e-15, -2.0126e-14, 2.3728e-15,\n 8.7027e-16, -2.9999e-15, 2.1878e-15, 6.0725e-14], device='cuda:0')", + "exp_avg_sq": "tensor([4.7409e-15, 2.8686e-14, 1.2633e-13, 5.5688e-14, 8.7664e-17, 7.8129e-14,\n 3.0691e-15, 1.8800e-14, 5.2836e-14, 2.1504e-14, 1.9483e-14, 3.5839e-16,\n 2.0820e-13, 2.8564e-13, 2.0946e-15, 5.7667e-15, 5.1520e-14, 3.4636e-15,\n 1.2386e-14, 4.9150e-16, 2.6669e-15, 2.6675e-14, 3.9479e-13, 1.4224e-14,\n 2.8528e-16, 1.1690e-18, 3.3100e-16, 3.0002e-14, 1.2571e-14, 8.1078e-14,\n 3.0538e-16, 7.6718e-14, 1.1521e-13, 1.7130e-13, 1.8226e-15, 4.8617e-16,\n 1.3405e-14, 7.8350e-18, 1.9669e-13, 1.1697e-15, 3.4894e-14, 2.5823e-17,\n 2.1125e-15, 1.4543e-15, 1.5336e-14, 2.7075e-13, 9.7143e-18, 5.4470e-16,\n 1.8215e-15, 2.0709e-13, 2.4197e-14, 1.7213e-16, 1.3442e-16, 6.0855e-19,\n 7.5853e-14, 2.7604e-16, 1.1645e-15, 3.6766e-18, 1.0860e-14, 9.0553e-19,\n 4.2477e-17, 1.7097e-17, 4.2115e-17, 3.8671e-15], device='cuda:0')" + }, + "43": { + "step": "tensor(8764.)", + "exp_avg": "tensor([[ 1.4886e-14, -3.6158e-14, 8.7295e-13, 1.5247e-12, -1.1912e-13,\n 1.3000e-12, -1.1702e-13, -4.8462e-14, 6.3976e-13, 9.0882e-14,\n -1.4815e-13, -7.7426e-14, 3.7330e-13, -2.7171e-13, -1.1672e-13,\n 2.6187e-13, -7.5253e-14, 1.6836e-13, 5.6013e-13, 1.5373e-13,\n -7.6456e-14, -6.6364e-14, 1.4776e-12, -6.1749e-14, -1.1908e-13,\n -9.7035e-14, -1.1564e-13, 7.0799e-14, -3.2983e-14, 1.6736e-13,\n -1.1259e-13, 2.3797e-13, 7.4394e-13, 1.2177e-12, -1.7254e-13,\n 2.7306e-14, 7.8054e-13, -1.3505e-15, 6.3233e-13, -1.1386e-13,\n 4.5627e-13, -1.0187e-13, -1.1320e-13, -2.9116e-14, 1.1243e-12,\n 9.4771e-13, -1.0788e-13, 1.2845e-14, 1.2797e-13, 1.3527e-13,\n 1.0617e-12, -1.1944e-13, -3.9521e-14, -1.5735e-13, 5.0572e-14,\n -5.2350e-14, -6.4855e-14, -1.2123e-13, 5.4237e-13, -9.5178e-15,\n -1.6710e-13, -1.0316e-13, -1.6074e-13, -1.6829e-14],\n [-1.4886e-14, 3.6158e-14, -8.7295e-13, -1.5247e-12, 1.1912e-13,\n -1.3000e-12, 1.1702e-13, 4.8462e-14, -6.3977e-13, -9.0883e-14,\n 1.4815e-13, 7.7426e-14, -3.7330e-13, 2.7171e-13, 1.1672e-13,\n -2.6187e-13, 7.5253e-14, -1.6836e-13, -5.6013e-13, -1.5373e-13,\n 7.6456e-14, 6.6364e-14, -1.4776e-12, 6.1749e-14, 1.1908e-13,\n 9.7036e-14, 1.1565e-13, -7.0800e-14, 3.2983e-14, -1.6736e-13,\n 1.1259e-13, -2.3798e-13, -7.4395e-13, -1.2177e-12, 1.7254e-13,\n -2.7306e-14, -7.8054e-13, 1.3505e-15, -6.3233e-13, 1.1386e-13,\n -4.5627e-13, 1.0187e-13, 1.1320e-13, 2.9116e-14, -1.1243e-12,\n -9.4771e-13, 1.0788e-13, -1.2845e-14, -1.2797e-13, -1.3527e-13,\n -1.0617e-12, 1.1944e-13, 3.9521e-14, 1.5735e-13, -5.0572e-14,\n 5.2351e-14, 6.4855e-14, 1.2123e-13, -5.4237e-13, 9.5179e-15,\n 1.6710e-13, 1.0316e-13, 1.6074e-13, 1.6830e-14]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.7212e-13, 3.1123e-13, 3.7871e-12, 1.9349e-11, 1.4413e-13, 1.0786e-11,\n 6.3258e-14, 2.6711e-13, 2.4212e-12, 6.5373e-13, 3.0591e-15, 1.1004e-13,\n 9.7800e-12, 3.2990e-12, 3.8573e-14, 4.1787e-12, 7.5498e-12, 4.4079e-13,\n 2.8838e-12, 6.6289e-14, 2.0612e-14, 4.9478e-13, 1.2169e-11, 7.9782e-14,\n 1.0943e-13, 8.5018e-14, 1.1246e-14, 1.5386e-13, 4.9844e-14, 1.0660e-12,\n 1.2765e-13, 2.6301e-13, 5.9149e-12, 6.8820e-12, 7.3190e-14, 6.1736e-14,\n 3.2074e-12, 7.0599e-13, 5.1324e-12, 4.6833e-14, 1.0124e-12, 1.2775e-13,\n 4.9006e-15, 1.6771e-14, 2.9688e-12, 1.6219e-11, 1.1376e-13, 9.6972e-14,\n 1.1499e-14, 1.2770e-12, 1.0632e-11, 8.2960e-14, 8.9556e-14, 1.3492e-13,\n 6.9287e-13, 6.5552e-14, 8.0299e-14, 1.2138e-13, 1.0337e-11, 1.1765e-13,\n 1.0256e-13, 9.3833e-14, 1.3294e-13, 9.9977e-14],\n [6.7212e-13, 3.1123e-13, 3.7871e-12, 1.9349e-11, 1.4413e-13, 1.0786e-11,\n 6.3258e-14, 2.6711e-13, 2.4212e-12, 6.5373e-13, 3.0591e-15, 1.1004e-13,\n 9.7800e-12, 3.2990e-12, 3.8573e-14, 4.1787e-12, 7.5498e-12, 4.4079e-13,\n 2.8838e-12, 6.6289e-14, 2.0612e-14, 4.9478e-13, 1.2169e-11, 7.9782e-14,\n 1.0943e-13, 8.5018e-14, 1.1246e-14, 1.5386e-13, 4.9844e-14, 1.0660e-12,\n 1.2765e-13, 2.6301e-13, 5.9149e-12, 6.8820e-12, 7.3190e-14, 6.1736e-14,\n 3.2074e-12, 7.0599e-13, 5.1324e-12, 4.6833e-14, 1.0124e-12, 1.2775e-13,\n 4.9006e-15, 1.6771e-14, 2.9688e-12, 1.6219e-11, 1.1376e-13, 9.6972e-14,\n 1.1499e-14, 1.2770e-12, 1.0632e-11, 8.2960e-14, 8.9556e-14, 1.3492e-13,\n 6.9287e-13, 6.5552e-14, 8.0299e-14, 1.2138e-13, 1.0337e-11, 1.1765e-13,\n 1.0256e-13, 9.3833e-14, 1.3294e-13, 9.9977e-14]], device='cuda:0')" + }, + "44": { + "step": "tensor(8764.)", + "exp_avg": "tensor([ 6.5402e-13, -6.5402e-13], device='cuda:0')", + "exp_avg_sq": "tensor([7.3455e-12, 7.3455e-12], device='cuda:0')" + }, + "45": { + "step": "tensor(8764.)", + "exp_avg": "tensor([[-2.1134e-10, -6.8476e-10, -7.6390e-11, ..., 4.3832e-10,\n -1.8053e-10, 7.1131e-11],\n [ 1.7582e-10, -1.6279e-10, 1.6112e-10, ..., 6.4229e-10,\n -2.7113e-10, -3.9147e-11],\n [ 1.9918e-10, -2.6512e-10, -5.0545e-10, ..., -1.3125e-09,\n 8.7149e-11, 5.1010e-10],\n ...,\n [-2.3058e-10, 2.8555e-09, 1.5291e-10, ..., 6.1860e-09,\n 2.5090e-09, -3.4186e-09],\n [ 8.5593e-11, -4.1726e-10, 2.8468e-10, ..., 2.2411e-09,\n -9.1963e-10, 5.0000e-11],\n [ 1.0609e-09, 3.8505e-11, 7.8616e-10, ..., 1.1833e-09,\n -9.8338e-10, 5.6270e-10]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.4316e-15, 1.7964e-15, 3.4219e-15, ..., 2.1210e-15, 4.0203e-15,\n 4.2086e-15],\n [9.8778e-16, 7.7239e-16, 1.1764e-15, ..., 9.1491e-16, 3.2046e-15,\n 1.4131e-15],\n [7.2054e-14, 4.9024e-14, 7.1611e-14, ..., 4.5780e-14, 1.9911e-13,\n 1.0499e-13],\n ...,\n [9.8816e-14, 4.4067e-14, 1.3235e-13, ..., 7.4048e-14, 2.5513e-13,\n 1.1822e-13],\n [3.8086e-15, 4.8578e-16, 7.0521e-15, ..., 1.1343e-15, 9.8067e-15,\n 9.0948e-15],\n [2.9112e-13, 1.9173e-13, 4.0950e-13, ..., 1.6797e-13, 9.7430e-13,\n 5.3243e-13]], device='cuda:0')" + }, + "46": { + "step": "tensor(8764.)", + "exp_avg": "tensor([-2.1811e-08, 6.4778e-08, 5.9442e-08, -1.1008e-08, 1.4485e-07,\n 6.9051e-09, 1.3157e-07, -4.2983e-08, -5.2315e-08, -3.2815e-08,\n -5.7788e-08, 1.4737e-08, 6.9343e-09, -2.0601e-07, 1.8355e-07,\n 2.4581e-07, 2.4444e-07, 2.2197e-07, 3.2982e-07, -1.1561e-07,\n 5.9806e-08, -6.0252e-08, -6.2590e-08, -1.7647e-08, -1.0910e-07,\n -1.0375e-07, 1.2193e-08, 1.3224e-07, -3.2468e-08, -9.9296e-08,\n -1.3495e-06, 9.8870e-08, -8.1396e-08, 1.7591e-07, 6.4813e-08,\n -5.9996e-10, 2.2586e-07, 1.7418e-07, 1.0397e-07, 5.4422e-08,\n -2.3434e-08, 1.2955e-07, 3.7356e-09, -8.0891e-07, -3.0028e-08,\n 8.9269e-09, -2.6885e-10, -1.7067e-08, -1.2035e-07, -3.8487e-07,\n 2.3166e-08, -6.1755e-08, -2.5237e-08, 8.9239e-08, 3.7991e-08,\n 1.8243e-07, 8.5881e-08, 1.4026e-08, 5.7461e-09, 3.5828e-08,\n 2.0898e-07, 1.6989e-07, 1.9140e-08, 1.5722e-07], device='cuda:0')", + "exp_avg_sq": "tensor([2.0288e-10, 8.5267e-11, 5.5659e-09, 8.1674e-09, 2.1367e-08, 7.9780e-11,\n 2.7723e-09, 4.1345e-11, 1.2443e-09, 4.3323e-11, 5.7424e-11, 4.8013e-09,\n 1.3462e-10, 4.1801e-10, 4.4515e-09, 3.2087e-09, 1.2929e-08, 4.4994e-08,\n 3.2069e-08, 8.9219e-11, 2.0911e-09, 1.2159e-09, 9.0129e-11, 1.7057e-11,\n 6.8599e-11, 8.1992e-11, 3.9850e-11, 3.0052e-09, 1.2610e-10, 3.1238e-11,\n 6.5973e-10, 2.1175e-10, 5.2833e-11, 3.4176e-08, 9.9135e-10, 7.2151e-11,\n 2.2583e-08, 1.1145e-08, 6.4388e-09, 1.2987e-08, 1.7297e-10, 2.3238e-09,\n 6.1296e-12, 6.3310e-10, 5.5811e-11, 9.7400e-11, 4.6847e-09, 8.8576e-11,\n 7.3510e-11, 8.4910e-10, 9.3655e-11, 1.0259e-10, 3.2003e-11, 9.2408e-10,\n 1.3233e-09, 1.7285e-08, 1.6384e-09, 1.5171e-10, 3.9846e-09, 2.4900e-10,\n 7.4721e-09, 6.9745e-09, 2.4253e-10, 2.7733e-08], device='cuda:0')" + }, + "47": { + "step": "tensor(8764.)", + "exp_avg": "tensor([-6.9110e-10, 2.2760e-10, -5.3399e-11, -3.6678e-11, -3.4309e-10,\n -4.6197e-11, 4.9524e-09, 1.2873e-10, 3.5370e-10, 3.8118e-11,\n 7.7009e-11, 8.4482e-09, -2.4559e-11, 4.0800e-10, 6.2994e-09,\n 9.2373e-09, -3.3503e-11, 1.5726e-09, 2.3360e-08, 4.9762e-10,\n 1.3581e-10, -3.9429e-09, 3.7821e-10, 1.2371e-11, 7.7459e-10,\n 2.8922e-10, 1.2156e-10, 1.5821e-10, 1.1779e-10, 1.0345e-09,\n -4.7941e-08, 1.6081e-12, 1.7330e-10, 8.2912e-09, -1.5590e-09,\n -1.8765e-10, 2.1714e-08, 7.3379e-09, -5.3455e-10, 1.0278e-09,\n -3.0492e-10, 2.2787e-10, -7.8955e-13, -1.0079e-08, -1.9447e-11,\n 9.0954e-11, -3.1019e-10, -5.0390e-11, 9.5562e-10, 5.7973e-09,\n 4.0125e-11, -7.2081e-10, 2.6419e-10, 2.5695e-11, 1.0013e-10,\n -3.6860e-10, -7.5571e-11, 2.3746e-10, 2.1460e-09, 2.1423e-10,\n 2.5380e-08, 1.4994e-08, -3.4177e-10, -8.9801e-10], device='cuda:0')", + "exp_avg_sq": "tensor([2.7427e-13, 8.2955e-14, 2.4011e-12, 6.6637e-13, 7.5743e-12, 2.5564e-15,\n 2.6733e-12, 8.3891e-16, 7.5492e-13, 4.0236e-15, 6.3626e-15, 3.9875e-12,\n 1.1788e-13, 3.6596e-15, 4.5304e-12, 1.3358e-12, 1.0363e-11, 6.6652e-11,\n 2.6429e-11, 1.4087e-14, 5.9809e-13, 2.2328e-12, 2.9088e-16, 1.0923e-16,\n 8.9256e-14, 7.7570e-16, 1.6059e-16, 4.8555e-12, 2.8972e-13, 2.2836e-13,\n 2.0235e-13, 4.7224e-14, 2.1300e-15, 3.4499e-11, 9.4961e-15, 3.4280e-14,\n 1.6836e-11, 7.1943e-12, 1.4694e-12, 1.0567e-11, 2.2635e-13, 3.9262e-12,\n 5.3499e-16, 1.9985e-13, 1.1004e-14, 6.4026e-14, 3.3568e-13, 1.1302e-13,\n 5.5703e-14, 1.2402e-13, 3.1113e-14, 1.1384e-13, 1.6516e-13, 6.5550e-13,\n 1.5617e-12, 7.8288e-12, 6.2404e-13, 1.5966e-13, 1.1926e-12, 4.9204e-15,\n 2.8506e-12, 1.9645e-12, 1.1120e-14, 1.4332e-11], device='cuda:0')" + }, + "48": { + "step": "tensor(8764.)", + "exp_avg": "tensor([ 9.8195e-10, -5.7064e-10, 1.1555e-09, 4.4394e-10, 1.9193e-09,\n 4.5981e-11, 4.7068e-09, -1.6519e-10, 4.8025e-10, -6.1051e-11,\n -7.0471e-11, 3.9303e-09, 2.3652e-09, -3.2122e-09, 6.0648e-09,\n 8.9763e-09, 2.5719e-09, 2.6345e-09, 1.2034e-08, -5.3763e-10,\n 1.5598e-10, -4.1276e-09, -2.7891e-10, 4.3391e-12, -9.7167e-10,\n -4.4390e-10, -5.2662e-10, 1.0106e-09, -1.7501e-10, -1.5375e-09,\n -2.8089e-08, -2.9660e-10, -2.3024e-10, 5.6324e-09, 1.2402e-09,\n 1.8329e-10, 1.0776e-08, 5.9449e-09, 1.0760e-09, 2.7338e-09,\n -8.2134e-11, 4.3760e-10, 1.0023e-11, -1.1954e-08, -2.5786e-10,\n -1.3323e-10, 9.3063e-11, 6.5230e-10, -1.0686e-09, -1.9120e-09,\n -7.8706e-11, 7.4480e-10, -4.6845e-10, 9.2744e-11, -1.6493e-11,\n 3.1975e-09, 3.8360e-10, -3.3332e-10, 1.1334e-09, 2.0621e-09,\n 1.1656e-08, 8.7122e-09, 1.5656e-09, 2.2530e-09], device='cuda:0')", + "exp_avg_sq": "tensor([1.5497e-13, 7.2635e-14, 4.8131e-12, 7.2750e-12, 1.9459e-11, 1.8131e-13,\n 3.4892e-12, 3.8019e-16, 1.7541e-12, 1.9974e-15, 3.3763e-15, 5.6831e-12,\n 3.4911e-13, 2.9341e-13, 5.3458e-12, 3.9992e-12, 1.1440e-11, 4.1351e-11,\n 3.3983e-11, 5.0109e-15, 1.6859e-12, 1.6095e-12, 1.2045e-16, 1.1109e-16,\n 3.4878e-14, 3.0423e-16, 6.0610e-14, 2.2329e-12, 1.0628e-13, 1.0508e-13,\n 1.2970e-12, 3.3800e-14, 1.0244e-15, 3.6016e-11, 7.8383e-13, 2.0605e-13,\n 2.4330e-11, 1.2461e-11, 5.6737e-12, 1.4383e-11, 3.5424e-13, 1.6260e-12,\n 4.4497e-14, 1.2430e-12, 4.6898e-15, 4.3227e-14, 4.1062e-12, 1.8393e-13,\n 2.1049e-14, 1.2683e-12, 2.6904e-14, 4.4081e-14, 7.9774e-14, 5.8518e-13,\n 8.3350e-13, 1.5553e-11, 1.2707e-12, 3.5352e-13, 4.8791e-12, 4.2463e-13,\n 8.6248e-12, 8.0765e-12, 4.3220e-13, 2.5194e-11], device='cuda:0')" + }, + "49": { + "step": "tensor(8764.)", + "exp_avg": "tensor([[ 5.7385e-09, 8.5253e-09, -2.1282e-09, -1.6764e-08, 1.1760e-09,\n 7.0853e-09, -3.4568e-08, 5.7900e-09, -2.7983e-09, 6.0800e-09,\n 5.1708e-09, -5.6259e-08, -4.0048e-09, -2.8156e-09, -3.8662e-08,\n -4.9455e-08, 1.3131e-08, 6.0810e-09, -8.4124e-08, -4.8571e-09,\n 8.5699e-09, 4.1703e-08, -7.9329e-09, 5.6323e-09, 6.6068e-09,\n -1.1257e-10, 2.7724e-09, 9.6060e-09, 3.5555e-09, 7.5271e-09,\n 1.9947e-07, 1.2059e-08, 2.7025e-09, -2.7025e-08, 6.3643e-11,\n 4.9784e-09, -8.7252e-08, -3.0962e-08, -2.9905e-09, -1.4996e-09,\n 5.2714e-10, 1.8005e-08, 6.1574e-09, 3.4424e-08, 5.4742e-09,\n 1.5322e-09, 7.5304e-09, 5.5149e-10, 6.2116e-09, -2.8483e-08,\n -2.3393e-09, 3.4664e-09, 4.7270e-09, 9.9545e-09, 1.0203e-08,\n -3.3261e-09, 5.1617e-09, 3.0244e-09, -1.0624e-08, -2.5475e-09,\n -1.2175e-07, -6.2846e-08, 7.4452e-10, 9.8671e-09],\n [-5.7122e-09, -8.5082e-09, 2.1553e-09, 1.6800e-08, -1.1295e-09,\n -7.0344e-09, 3.4356e-08, -5.7303e-09, 2.5408e-09, -6.0372e-09,\n -5.1422e-09, 5.6111e-08, 3.9949e-09, 2.8539e-09, 3.8286e-08,\n 4.9295e-08, -1.3082e-08, -6.0593e-09, 8.3426e-08, 4.9158e-09,\n -8.5181e-09, -4.1842e-08, 7.9928e-09, -5.5865e-09, -6.5454e-09,\n 1.5288e-10, -2.7277e-09, -9.5857e-09, -3.5215e-09, -7.4754e-09,\n -2.0034e-07, -1.2013e-08, -2.6528e-09, 2.6639e-08, -4.5509e-11,\n -4.9329e-09, 8.6751e-08, 3.0339e-08, 3.0165e-09, 9.3599e-10,\n -4.8714e-10, -1.7934e-08, -6.1327e-09, -3.5186e-08, -5.4232e-09,\n -1.4764e-09, -7.4863e-09, -5.0974e-10, -6.1676e-09, 2.7693e-08,\n 2.3698e-09, -3.4456e-09, -4.6644e-09, -9.9191e-09, -1.0158e-08,\n 3.3883e-09, -5.1200e-09, -2.9959e-09, 1.0289e-08, 2.5718e-09,\n 1.2130e-07, 6.2491e-08, -9.8533e-10, -9.8300e-09]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.5986e-11, 4.9876e-10, 1.8686e-10, 1.4380e-11, 1.3378e-10, 1.0610e-11,\n 3.4011e-10, 1.3290e-11, 1.4087e-10, 1.4296e-11, 1.2822e-11, 2.9756e-10,\n 7.0341e-11, 2.7690e-12, 3.8717e-10, 9.7552e-11, 4.4551e-10, 9.9560e-10,\n 3.5971e-10, 4.8967e-12, 1.1436e-10, 7.8075e-10, 5.9874e-12, 1.5665e-11,\n 6.3521e-12, 6.4458e-12, 1.5606e-11, 1.4134e-09, 5.0689e-12, 1.4936e-11,\n 1.2258e-11, 7.6635e-10, 1.2073e-11, 4.7371e-10, 1.1419e-11, 2.9785e-11,\n 2.9681e-10, 2.2544e-10, 6.8483e-11, 3.2643e-10, 1.9387e-10, 1.6025e-09,\n 5.2940e-12, 1.2326e-11, 8.4039e-12, 1.4480e-11, 1.1576e-11, 1.8855e-10,\n 5.7550e-12, 4.7075e-12, 5.6955e-10, 1.3180e-11, 1.2609e-11, 5.6399e-10,\n 1.1474e-09, 1.9034e-10, 1.7679e-10, 1.1232e-10, 5.8519e-11, 5.5485e-12,\n 9.5267e-11, 5.8498e-11, 1.0345e-11, 2.2936e-10],\n [1.5986e-11, 4.9876e-10, 1.8686e-10, 1.4380e-11, 1.3378e-10, 1.0610e-11,\n 3.4011e-10, 1.3290e-11, 1.4087e-10, 1.4296e-11, 1.2822e-11, 2.9756e-10,\n 7.0341e-11, 2.7690e-12, 3.8717e-10, 9.7552e-11, 4.4551e-10, 9.9560e-10,\n 3.5971e-10, 4.8967e-12, 1.1436e-10, 7.8075e-10, 5.9874e-12, 1.5665e-11,\n 6.3521e-12, 6.4458e-12, 1.5606e-11, 1.4134e-09, 5.0689e-12, 1.4936e-11,\n 1.2258e-11, 7.6635e-10, 1.2073e-11, 4.7371e-10, 1.1419e-11, 2.9785e-11,\n 2.9681e-10, 2.2544e-10, 6.8483e-11, 3.2643e-10, 1.9387e-10, 1.6025e-09,\n 5.2940e-12, 1.2326e-11, 8.4039e-12, 1.4480e-11, 1.1576e-11, 1.8855e-10,\n 5.7550e-12, 4.7076e-12, 5.6955e-10, 1.3180e-11, 1.2609e-11, 5.6399e-10,\n 1.1474e-09, 1.9034e-10, 1.7679e-10, 1.1232e-10, 5.8519e-11, 5.5485e-12,\n 9.5267e-11, 5.8498e-11, 1.0345e-11, 2.2936e-10]], device='cuda:0')" + }, + "50": { + "step": "tensor(8764.)", + "exp_avg": "tensor([-2.2260e-08, 2.1983e-08], device='cuda:0')", + "exp_avg_sq": "tensor([7.7612e-10, 7.7612e-10], device='cuda:0')" + }, + "51": { + "step": "tensor(8764.)", + "exp_avg": "tensor([[ 2.0643e-13, -6.6706e-14, -2.5245e-13, ..., -1.3453e-13,\n 5.6831e-13, -4.3295e-14],\n [ 1.2623e-13, -3.2858e-14, -1.2560e-13, ..., -3.3872e-14,\n 3.5364e-13, -2.5131e-14],\n [-3.8090e-14, 2.4218e-14, 9.8613e-14, ..., -5.9540e-15,\n -5.7714e-15, -5.7327e-15],\n ...,\n [ 1.9429e-15, 3.6517e-14, 5.2633e-14, ..., -2.6987e-14,\n 9.5859e-14, 4.1033e-14],\n [-8.2534e-14, -1.7305e-15, 1.0266e-13, ..., 1.2242e-14,\n -6.8672e-14, 1.1970e-14],\n [-1.1873e-13, 2.4129e-14, 1.1041e-13, ..., -1.9332e-13,\n -5.2969e-13, -8.9004e-15]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.6044e-16, 1.3763e-16, 2.1124e-16, ..., 5.5488e-17, 4.9778e-16,\n 2.6036e-16],\n [9.0372e-18, 5.1791e-18, 9.5578e-18, ..., 2.0774e-18, 1.7641e-17,\n 1.1012e-17],\n [8.2294e-19, 2.5192e-19, 2.4934e-19, ..., 3.2500e-19, 1.2052e-19,\n 6.9271e-20],\n ...,\n [3.9678e-18, 2.0894e-18, 3.4582e-18, ..., 2.1213e-18, 1.1658e-17,\n 6.1745e-18],\n [4.0806e-18, 2.9964e-18, 2.9339e-18, ..., 1.2532e-18, 5.9459e-18,\n 2.8451e-18],\n [5.7254e-16, 4.5681e-16, 8.1890e-16, ..., 1.9881e-16, 2.0618e-15,\n 1.1734e-15]], device='cuda:0')" + }, + "52": { + "step": "tensor(8764.)", + "exp_avg": "tensor([ 2.1025e-11, 3.8601e-11, 8.4687e-13, 5.2214e-12, 4.1823e-12,\n 4.4480e-12, 6.7821e-12, -4.9471e-12, -2.4811e-11, 1.5021e-11,\n -9.0923e-11, 2.2150e-11, 1.4702e-12, -1.5965e-11, -4.1941e-13,\n 6.8261e-11, -4.0605e-11, 1.8484e-11, -1.2252e-11, -7.9734e-12,\n -1.9416e-11, -4.8401e-11, -5.0026e-11, -5.2241e-12, 3.8237e-13,\n 1.2817e-11, 5.2466e-11, -4.8109e-11, -3.3714e-11, -1.8709e-11,\n 1.3425e-11, 3.7225e-13, 4.9282e-12, 6.9616e-12, 3.3430e-12,\n 1.7572e-11, 7.4651e-12, -5.4089e-12, -1.0340e-11, -3.9208e-13,\n 2.4597e-12, -2.6187e-12, -1.3730e-11, -9.2371e-12, 1.4029e-11,\n 4.2310e-11, 2.9386e-11, 2.5268e-11, -6.5329e-11, 8.9148e-11,\n -1.9766e-12, -2.1393e-12, 2.2674e-11, 1.1281e-11, 3.2910e-11,\n 1.9041e-11, 4.3579e-12, 2.4427e-11, -6.3749e-11, -3.4757e-11,\n 3.8526e-11, 2.7201e-12, -3.2863e-12, -5.0305e-11], device='cuda:0')", + "exp_avg_sq": "tensor([1.2948e-11, 5.5712e-13, 1.1336e-14, 8.7795e-13, 1.1017e-14, 1.9384e-13,\n 1.4863e-12, 1.5975e-14, 2.2513e-12, 1.7084e-12, 1.4612e-11, 2.5096e-12,\n 7.7222e-14, 8.2586e-13, 5.2403e-14, 1.1111e-11, 3.6827e-11, 1.0650e-11,\n 1.3751e-11, 1.4691e-10, 2.1372e-12, 2.7105e-12, 2.2973e-11, 1.6864e-11,\n 2.2871e-12, 1.0772e-13, 7.3071e-11, 1.8893e-11, 2.4708e-12, 3.1731e-12,\n 1.6618e-11, 2.0569e-12, 1.7354e-13, 6.1077e-14, 9.8306e-15, 1.1689e-10,\n 3.4457e-11, 1.1922e-14, 9.5159e-14, 1.2570e-13, 1.5164e-12, 6.3377e-15,\n 8.0363e-11, 1.2722e-11, 3.3768e-11, 4.8161e-12, 2.9561e-11, 2.3346e-11,\n 1.6790e-11, 3.8084e-10, 6.5307e-14, 1.2795e-14, 2.2848e-13, 1.1186e-11,\n 1.1166e-11, 1.1091e-10, 9.2456e-14, 1.6058e-11, 5.9554e-11, 1.4352e-11,\n 2.6572e-11, 2.6634e-13, 1.8914e-13, 4.9359e-11], device='cuda:0')" + }, + "53": { + "step": "tensor(8764.)", + "exp_avg": "tensor([ 4.8466e-14, 4.7569e-14, 5.2399e-15, -1.7423e-13, -2.6335e-14,\n -3.1024e-13, 3.9185e-14, 6.7325e-14, 4.1922e-14, -7.0610e-14,\n -1.4612e-12, 1.6839e-15, 5.8296e-15, 1.3321e-13, 1.8434e-14,\n 3.7044e-13, -2.1858e-12, 6.8622e-14, 2.6363e-13, -9.5007e-13,\n 3.3021e-13, -2.1384e-14, -2.3529e-12, 1.3692e-13, -2.0416e-13,\n -6.8580e-15, 6.2898e-13, -1.5325e-12, 5.9184e-13, -8.2227e-13,\n -1.9204e-12, 4.0077e-14, -4.0780e-14, -1.5805e-14, 8.0867e-15,\n -2.2489e-13, -9.4996e-14, -1.1210e-14, 9.2603e-15, 5.2234e-14,\n 3.2517e-14, 2.6892e-14, 4.1719e-13, 5.3355e-14, -3.0132e-14,\n -2.1704e-13, -2.8239e-14, -2.0712e-13, -1.8396e-12, 3.1884e-13,\n 1.0639e-15, 1.6836e-14, -1.5676e-13, -1.4319e-12, 6.1393e-14,\n -1.2550e-12, 7.4031e-15, -3.4021e-14, -3.6790e-12, -1.1812e-12,\n 2.1656e-13, 3.6650e-16, -1.3293e-13, -2.7474e-12], device='cuda:0')", + "exp_avg_sq": "tensor([2.6740e-15, 3.3398e-15, 1.5501e-20, 2.7251e-15, 1.3269e-16, 1.4166e-15,\n 3.3672e-17, 1.0856e-17, 1.1468e-16, 1.4272e-17, 3.2344e-16, 7.5105e-17,\n 9.8395e-18, 7.6722e-16, 7.7314e-19, 5.7247e-15, 4.2733e-14, 2.4898e-16,\n 2.2848e-17, 1.0637e-13, 1.7080e-15, 1.2734e-16, 4.0915e-14, 5.0429e-17,\n 4.0183e-17, 5.6366e-16, 3.6341e-14, 3.5975e-15, 1.8973e-15, 2.7670e-15,\n 3.0396e-14, 4.3873e-17, 1.3022e-17, 2.0097e-16, 4.7240e-18, 4.7584e-14,\n 1.9934e-15, 3.8701e-18, 3.8329e-18, 1.3801e-16, 2.0331e-18, 4.9806e-18,\n 2.2778e-14, 1.3016e-16, 4.0680e-15, 2.1258e-14, 1.9651e-15, 7.9190e-17,\n 1.8399e-15, 2.4806e-13, 1.1391e-16, 9.0539e-18, 4.5929e-15, 2.1579e-14,\n 7.6210e-16, 1.1299e-13, 3.2810e-16, 2.3522e-15, 7.2537e-14, 1.6599e-15,\n 1.1823e-14, 8.8218e-20, 1.5883e-15, 4.2765e-14], device='cuda:0')" + }, + "54": { + "step": "tensor(8764.)", + "exp_avg": "tensor([ 1.0689e-13, 1.3094e-13, -6.3576e-15, 1.5365e-13, 4.7190e-14,\n 2.2568e-13, 9.2920e-14, -5.1108e-14, -8.1171e-13, -1.0157e-13,\n -2.3572e-12, 6.0906e-15, 3.6907e-15, -1.4065e-13, -1.5646e-14,\n 6.5977e-13, -1.5528e-12, 2.5836e-13, -1.9582e-13, -8.6715e-13,\n -2.6986e-13, -1.2839e-12, -1.6733e-12, -1.5205e-13, -3.2220e-13,\n -3.0743e-14, 4.8697e-13, -1.5942e-12, -4.7024e-13, -9.1794e-13,\n -7.5775e-13, -3.0761e-13, -2.1910e-13, -1.5147e-13, -2.2703e-14,\n 1.3822e-13, 9.1644e-14, 9.4792e-15, -6.3353e-15, 3.5870e-15,\n 1.6115e-14, -5.1153e-14, -7.2407e-13, -1.3828e-13, 1.7302e-13,\n -2.8957e-15, 4.2641e-13, 4.0589e-13, -2.0670e-12, 1.4508e-12,\n -9.2705e-15, -1.6659e-14, -1.9434e-13, -6.8878e-13, 4.0431e-13,\n -5.4451e-13, 1.5555e-14, 1.1984e-13, -2.1693e-12, -1.3246e-12,\n 3.1311e-13, 1.1472e-15, 8.4967e-14, -1.8437e-12], device='cuda:0')", + "exp_avg_sq": "tensor([2.9993e-15, 2.6511e-15, 1.6813e-18, 1.0631e-15, 7.3146e-18, 5.5511e-16,\n 7.1427e-16, 8.4383e-18, 2.6406e-15, 1.7772e-17, 1.1789e-14, 1.7728e-16,\n 3.6615e-17, 2.3306e-16, 1.2982e-17, 1.1714e-14, 3.3489e-14, 4.2192e-15,\n 6.8848e-15, 1.0636e-13, 7.0419e-16, 2.8115e-15, 2.4185e-14, 7.9780e-15,\n 2.2178e-15, 7.2068e-16, 2.8443e-14, 1.6367e-14, 8.1724e-16, 5.0729e-15,\n 1.8887e-14, 2.1393e-15, 5.0653e-16, 6.4812e-16, 2.5431e-18, 5.1514e-14,\n 1.5204e-14, 6.5242e-18, 1.4624e-18, 7.3308e-17, 4.6454e-16, 3.2555e-18,\n 5.9139e-14, 5.3827e-15, 1.3840e-14, 9.2541e-15, 1.2450e-14, 1.1518e-14,\n 1.4387e-14, 1.8566e-13, 4.5721e-17, 7.5805e-19, 1.9108e-15, 1.4212e-14,\n 3.5548e-15, 8.4917e-14, 1.1841e-16, 5.0083e-15, 5.0312e-14, 1.2590e-14,\n 7.1771e-15, 2.7926e-18, 6.1492e-16, 4.1686e-14], device='cuda:0')" + }, + "55": { + "step": "tensor(8764.)", + "exp_avg": "tensor([[-1.4665e-12, 1.1377e-12, 6.3645e-13, 9.5099e-13, -4.7112e-13,\n 1.9078e-12, 9.9407e-13, 1.7305e-12, 7.3832e-15, -4.9358e-12,\n -6.2623e-12, -2.9412e-12, 1.3735e-12, 3.6240e-13, 1.9832e-12,\n 3.7755e-12, -1.6755e-11, -9.7887e-14, 2.6805e-12, -3.9146e-12,\n 1.7387e-13, -1.9213e-13, -2.1133e-11, 1.8256e-12, -1.6441e-12,\n -1.8233e-13, -6.6757e-12, -1.0463e-11, 6.2314e-13, -1.1227e-11,\n -2.2155e-11, 4.0896e-13, -8.5684e-13, -3.2304e-13, 2.7667e-12,\n 1.9261e-12, 1.7360e-12, 2.4442e-12, 7.5610e-13, 1.2325e-12,\n 5.3235e-13, 8.3366e-13, 3.0006e-12, 1.0933e-12, 1.2391e-12,\n -4.0702e-12, 5.8792e-13, 1.9256e-12, -1.1138e-11, -1.1497e-12,\n 3.9708e-13, 2.3116e-12, -3.9066e-12, -1.7651e-11, -7.8761e-13,\n -7.2248e-12, 2.1796e-12, -5.0892e-13, -2.5158e-11, -7.1763e-12,\n -5.1702e-12, 1.4483e-12, 8.4612e-13, -1.8759e-11],\n [ 1.4665e-12, -1.1377e-12, -6.3644e-13, -9.5100e-13, 4.7112e-13,\n -1.9078e-12, -9.9407e-13, -1.7305e-12, -7.4095e-15, 4.9358e-12,\n 6.2623e-12, 2.9412e-12, -1.3736e-12, -3.6241e-13, -1.9832e-12,\n -3.7755e-12, 1.6755e-11, 9.7899e-14, -2.6805e-12, 3.9145e-12,\n -1.7386e-13, 1.9228e-13, 2.1133e-11, -1.8256e-12, 1.6441e-12,\n 1.8232e-13, 6.6756e-12, 1.0463e-11, -6.2313e-13, 1.1227e-11,\n 2.2155e-11, -4.0893e-13, 8.5683e-13, 3.2304e-13, -2.7667e-12,\n -1.9261e-12, -1.7360e-12, -2.4442e-12, -7.5610e-13, -1.2325e-12,\n -5.3235e-13, -8.3366e-13, -3.0006e-12, -1.0933e-12, -1.2391e-12,\n 4.0703e-12, -5.8794e-13, -1.9256e-12, 1.1138e-11, 1.1497e-12,\n -3.9709e-13, -2.3116e-12, 3.9066e-12, 1.7652e-11, 7.8760e-13,\n 7.2248e-12, -2.1796e-12, 5.0890e-13, 2.5158e-11, 7.1763e-12,\n 5.1701e-12, -1.4483e-12, -8.4611e-13, 1.8759e-11]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.8556e-12, 2.9995e-12, 4.4713e-14, 4.6583e-14, 6.3739e-14, 2.8258e-14,\n 4.6398e-14, 6.5424e-14, 2.0194e-15, 2.8612e-12, 8.9654e-15, 5.7308e-13,\n 5.7862e-14, 5.4110e-14, 5.2570e-14, 6.9999e-13, 2.9522e-12, 1.1226e-14,\n 2.6138e-14, 2.1233e-12, 5.1010e-14, 4.6148e-14, 4.3793e-12, 1.6383e-14,\n 2.9058e-14, 1.6169e-12, 3.2548e-12, 1.7430e-13, 4.9051e-14, 8.3725e-13,\n 3.9966e-12, 2.1147e-14, 3.6566e-15, 3.6516e-13, 6.2404e-14, 2.0294e-12,\n 8.3587e-14, 5.5316e-14, 5.0971e-14, 6.2391e-14, 1.8450e-14, 5.8634e-14,\n 4.8444e-13, 2.7475e-15, 3.0666e-13, 6.5304e-12, 1.0093e-13, 1.9085e-14,\n 5.9336e-14, 3.5045e-12, 3.6461e-14, 5.9789e-14, 7.2105e-12, 3.8151e-12,\n 2.0663e-13, 3.0772e-12, 2.2120e-14, 6.8948e-13, 3.5511e-12, 4.5235e-14,\n 4.4082e-12, 1.1952e-14, 2.5953e-14, 2.1762e-12],\n [1.8556e-12, 2.9995e-12, 4.4713e-14, 4.6583e-14, 6.3739e-14, 2.8258e-14,\n 4.6398e-14, 6.5424e-14, 2.0194e-15, 2.8612e-12, 8.9654e-15, 5.7308e-13,\n 5.7862e-14, 5.4111e-14, 5.2570e-14, 6.9999e-13, 2.9522e-12, 1.1226e-14,\n 2.6138e-14, 2.1233e-12, 5.1010e-14, 4.6148e-14, 4.3793e-12, 1.6383e-14,\n 2.9058e-14, 1.6169e-12, 3.2548e-12, 1.7430e-13, 4.9051e-14, 8.3725e-13,\n 3.9966e-12, 2.1147e-14, 3.6566e-15, 3.6516e-13, 6.2404e-14, 2.0294e-12,\n 8.3587e-14, 5.5316e-14, 5.0971e-14, 6.2391e-14, 1.8450e-14, 5.8634e-14,\n 4.8444e-13, 2.7475e-15, 3.0666e-13, 6.5304e-12, 1.0093e-13, 1.9085e-14,\n 5.9336e-14, 3.5045e-12, 3.6461e-14, 5.9789e-14, 7.2105e-12, 3.8151e-12,\n 2.0663e-13, 3.0772e-12, 2.2120e-14, 6.8948e-13, 3.5511e-12, 4.5235e-14,\n 4.4082e-12, 1.1952e-14, 2.5953e-14, 2.1762e-12]], device='cuda:0')" + }, + "56": { + "step": "tensor(8764.)", + "exp_avg": "tensor([-9.4152e-12, 9.4152e-12], device='cuda:0')", + "exp_avg_sq": "tensor([3.3465e-12, 3.3465e-12], device='cuda:0')" + }, + "57": { + "step": "tensor(8764.)", + "exp_avg": "tensor([[ 7.1448e-14, 3.4212e-13, 1.4532e-13, 9.7788e-15, -5.3995e-12,\n 1.7124e-06, 1.1599e-11, 5.5800e-14],\n [ 7.9963e-15, -8.9382e-14, 1.6036e-14, 1.0850e-15, -8.4556e-13,\n 1.2867e-07, 1.2863e-12, 6.0971e-15],\n [-3.3218e-12, 2.2263e-10, -6.5144e-12, -4.5243e-13, 6.8072e-10,\n 4.7658e-05, -4.9274e-10, -2.3501e-12],\n [-9.4824e-13, -2.9443e-11, -1.9489e-12, -1.2959e-13, 2.7069e-11,\n -3.6211e-05, -1.5890e-10, -7.6455e-13],\n [-1.9759e-12, 1.3119e-10, -3.9568e-12, -2.7583e-13, 4.0032e-10,\n 3.0802e-05, -2.9070e-10, -1.4221e-12],\n [ 5.1489e-14, -1.0399e-10, -2.3308e-14, 4.2274e-15, -1.9215e-10,\n -5.3525e-05, -1.1185e-11, -7.2780e-14],\n [-3.8849e-14, 9.6902e-12, -6.4158e-14, -4.8235e-15, 2.0856e-11,\n 4.1503e-06, -4.5518e-12, -1.8868e-14],\n [ 1.4227e-15, -8.2031e-14, 2.8281e-15, 1.9577e-16, -3.2300e-13,\n -1.3404e-08, 2.2880e-13, 1.0400e-15]], device='cuda:0')", + "exp_avg_sq": "tensor([[9.0654e-13, 1.4787e-12, 4.8828e-12, 3.2431e-12, 2.8231e-09, 1.7203e-07,\n 3.0472e-11, 3.5781e-12],\n [8.9880e-15, 1.8384e-14, 4.8177e-14, 3.2566e-14, 3.8315e-11, 1.1172e-08,\n 2.9712e-13, 3.4462e-14],\n [8.9968e-12, 1.0986e-11, 4.9064e-11, 3.3383e-11, 1.1053e-08, 1.9074e-06,\n 2.7175e-10, 3.6732e-11],\n [2.4547e-12, 3.9224e-12, 1.2948e-11, 8.4291e-12, 8.6786e-09, 7.1096e-07,\n 8.3667e-11, 9.7153e-12],\n [1.2814e-13, 1.1167e-12, 6.4043e-13, 3.1661e-13, 3.9311e-09, 2.5077e-06,\n 9.5806e-12, 3.9192e-13],\n [3.5638e-11, 6.5116e-11, 1.9044e-10, 1.2081e-10, 1.9743e-07, 5.2482e-06,\n 1.2495e-09, 1.3629e-10],\n [9.8319e-12, 1.4227e-11, 5.4113e-11, 3.5955e-11, 3.2227e-08, 3.1874e-07,\n 3.1231e-10, 3.8135e-11],\n [3.1997e-13, 5.0452e-13, 1.7044e-12, 1.0873e-12, 8.8177e-10, 5.7351e-09,\n 1.0762e-11, 1.2369e-12]], device='cuda:0')" + }, + "58": { + "step": "tensor(8764.)", + "exp_avg": "tensor([ 1.7124e-06, 1.2867e-07, 4.7659e-05, -3.6211e-05, 3.0802e-05,\n -5.3526e-05, 4.1503e-06, -1.3404e-08], device='cuda:0')", + "exp_avg_sq": "tensor([1.8267e-07, 1.1291e-08, 1.9483e-06, 7.3331e-07, 2.5236e-06, 5.7762e-06,\n 4.0706e-07, 7.9818e-09], device='cuda:0')" + }, + "59": { + "step": "tensor(8764.)", + "exp_avg": "tensor([[ 1.7345e-05, 1.8293e-05, -1.6487e-04, -1.3369e-04, -1.1735e-04,\n -1.0726e-04, 1.7329e-05, 1.8306e-05],\n [-1.0260e-05, -1.0821e-05, 9.7528e-05, 7.9085e-05, 6.9419e-05,\n 6.3446e-05, -1.0251e-05, -1.0829e-05],\n [ 9.7059e-07, 1.0236e-06, -9.2258e-06, -7.4811e-06, -6.5669e-06,\n -6.0018e-06, 9.6969e-07, 1.0244e-06],\n [-8.0553e-06, -8.4954e-06, 7.6568e-05, 6.2088e-05, 5.4501e-05,\n 4.9811e-05, -8.0478e-06, -8.5019e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.3459e-07, 2.0862e-07, 1.2136e-06, 8.8735e-07, 9.6370e-07, 3.7993e-06,\n 2.1907e-07, 3.6224e-07],\n [3.7469e-08, 5.9492e-08, 1.3193e-06, 9.1329e-07, 8.1413e-07, 1.3305e-06,\n 5.4189e-08, 8.0854e-08],\n [7.4015e-08, 1.3090e-07, 3.7310e-07, 3.1509e-07, 4.6739e-07, 2.3346e-06,\n 1.3191e-07, 2.2071e-07],\n [4.0762e-08, 7.3897e-08, 2.5534e-07, 2.0642e-07, 2.8882e-07, 1.3328e-06,\n 7.3773e-08, 1.2400e-07]], device='cuda:0')" + }, + "60": { + "step": "tensor(8764.)", + "exp_avg": "tensor([-1.0770e-04, 6.3711e-05, -6.0269e-06, 5.0020e-05], device='cuda:0')", + "exp_avg_sq": "tensor([1.7388e-05, 3.0818e-06, 9.7260e-06, 5.3560e-06], device='cuda:0')" + }, + "61": { + "step": "tensor(8764.)", + "exp_avg": "tensor([[-3.2438e-08, 3.8686e-07, 2.4377e-07, ..., 4.5328e-07,\n 3.2408e-08, 4.7193e-07],\n [ 1.5947e-08, -8.4273e-08, -5.0063e-08, ..., -1.0942e-07,\n -1.9591e-08, -7.0061e-08],\n [ 8.6395e-09, -6.2910e-08, -3.6828e-08, ..., -8.3503e-08,\n -1.6047e-08, -4.9178e-08],\n ...,\n [ 6.8285e-09, -4.1868e-08, -2.4316e-08, ..., -5.1679e-08,\n -8.3883e-09, -3.9222e-08],\n [ 4.2986e-08, -4.9599e-07, -3.2024e-07, ..., -5.7111e-07,\n -3.7401e-08, -6.1285e-07],\n [-4.9901e-09, 5.6318e-08, 3.3578e-08, ..., 7.3001e-08,\n 1.2485e-08, 5.4689e-08]], device='cuda:0')", + "exp_avg_sq": "tensor([[8.0810e-13, 1.4243e-12, 7.7360e-13, ..., 8.4662e-13, 7.3007e-13,\n 1.0788e-12],\n [5.1243e-14, 6.2518e-14, 4.3315e-14, ..., 4.3394e-14, 5.6237e-14,\n 6.2888e-14],\n [2.9736e-14, 3.4892e-14, 2.6215e-14, ..., 2.5390e-14, 3.8756e-14,\n 3.9896e-14],\n ...,\n [6.6928e-14, 5.0283e-14, 7.3629e-14, ..., 4.7450e-14, 1.6249e-13,\n 1.3722e-13],\n [2.0902e-12, 3.7492e-12, 2.0691e-12, ..., 2.1535e-12, 1.7798e-12,\n 2.8337e-12],\n [1.4998e-13, 1.4525e-13, 1.6310e-13, ..., 1.1286e-13, 3.1875e-13,\n 2.7891e-13]], device='cuda:0')" + }, + "62": { + "step": "tensor(8764.)", + "exp_avg": "tensor([ 6.1316e-06, 1.9563e-06, 1.1903e-06, 1.0672e-05, -3.8052e-06,\n -9.2813e-06, -5.3741e-06, 7.7898e-06, 4.1331e-05, 5.3256e-06,\n 9.8328e-08, 6.7695e-07, -7.4025e-06, -7.1297e-06, -1.6539e-06,\n 2.1186e-07, 8.5034e-07, 1.2301e-05, -1.3428e-07, 6.4676e-07,\n 9.1040e-07, 3.0719e-06, 1.4616e-05, 8.5147e-07, -6.0729e-06,\n -1.7829e-06, -3.9073e-07, 1.2395e-05, 2.3819e-07, 9.6736e-07,\n 2.0017e-05, 1.2964e-06, 7.0716e-07, -5.4791e-07, 5.7980e-07,\n 8.3260e-08, -5.8057e-06, -1.6572e-06, 1.1719e-05, 9.0822e-07,\n -2.7655e-06, -4.0838e-06, 1.1267e-05, 4.6693e-07, -5.5106e-07,\n 1.1660e-07, 7.4902e-07, 8.2163e-07, 5.2737e-06, 1.3958e-05,\n 1.2062e-06, 6.5458e-07, -9.6652e-11, 5.9412e-07, 2.4019e-06,\n 3.4823e-05, 6.8839e-07, 6.6585e-07, 6.7925e-06, 5.5687e-07,\n -5.7651e-07, 1.0188e-06, 7.3275e-07, 4.3884e-07, 7.1638e-07,\n 9.3610e-07, 1.2150e-05, 4.2671e-06, -6.7411e-06, 5.0302e-06,\n 9.5432e-07, 5.8773e-06, -3.4554e-06, -1.0127e-05, 6.9492e-07,\n 6.2388e-07, 6.7948e-07, 3.0735e-06, 8.5992e-06, -6.1241e-06,\n -1.0715e-05, -4.6517e-06, 1.1707e-05, 1.3307e-06, 5.2874e-06,\n 2.1875e-05, 6.7789e-07, -3.0873e-06, 4.0320e-07, -8.8993e-06,\n 8.0549e-07, -4.5884e-06, 5.3954e-07, -6.8208e-06, 9.7372e-06,\n 4.4017e-06, 1.9830e-05, 7.5311e-07, -2.5258e-06, 1.0691e-05,\n 5.1346e-07, 4.3828e-05, 1.0531e-05, 2.1011e-06, 9.5450e-07,\n 4.0756e-07, 6.4762e-07, 8.1765e-07, 5.5892e-06, 2.5412e-06,\n 7.8718e-07, -1.5445e-05, 5.4362e-07, 7.4899e-07, 4.2381e-06,\n -3.0325e-06, 8.4571e-07, 2.0379e-07, -2.5764e-06, 1.3861e-06,\n 2.5315e-06, 2.4641e-07, -5.3138e-08, -2.1445e-06, -1.3189e-05,\n 5.1439e-07, 8.8950e-07, 2.2302e-07, 6.2398e-07, -6.1372e-06,\n -2.6003e-06, 5.3310e-07, 5.0824e-07, -2.8849e-08, 6.2608e-06,\n 8.9337e-07, 5.9976e-07, 6.9868e-07, 7.3502e-07, 7.0015e-07,\n -5.5344e-06, 6.7468e-07, 8.6598e-07, 5.3170e-07, 8.4585e-07,\n 5.6932e-07, 1.2505e-05, 2.7823e-06, 9.7294e-07, 1.0845e-06,\n 7.9216e-06, -3.1708e-07, 1.0533e-06, 5.9940e-07, -4.8785e-06,\n 8.7718e-06, 1.0470e-05, 7.1677e-07, -4.3681e-06, 1.6308e-05,\n 1.3469e-06, -2.1112e-06, 8.6782e-07, 1.9075e-06, -5.3274e-06,\n 7.4437e-07, 4.0919e-06, 3.4458e-07, 8.0762e-07, 9.6934e-07,\n 1.3059e-05, 6.2583e-07, 5.1464e-07, 3.6803e-07, 1.2925e-06,\n 1.0453e-06, 8.8751e-07, 6.5443e-07, -9.4279e-06, 2.5152e-07,\n 6.9391e-07, 7.8109e-07, 3.3006e-06, 1.6643e-06, 2.0023e-05,\n 9.6623e-07, 6.9178e-07, -2.5972e-06, 5.6670e-07, 8.9613e-07,\n -1.2072e-05, 9.1847e-07], device='cuda:0')", + "exp_avg_sq": "tensor([5.4315e-09, 6.0774e-10, 5.7123e-10, 1.5842e-08, 1.8965e-09, 1.8814e-08,\n 4.1269e-09, 1.3084e-08, 1.4726e-07, 8.8347e-09, 4.5322e-09, 4.3929e-09,\n 5.8474e-09, 4.3921e-09, 1.0572e-08, 1.5081e-09, 6.4700e-09, 1.5898e-08,\n 1.1138e-08, 4.1644e-09, 5.6744e-09, 1.6836e-08, 2.1891e-08, 1.4985e-09,\n 3.9770e-09, 3.4383e-09, 1.8560e-09, 1.5936e-08, 6.8480e-09, 1.0101e-08,\n 3.7800e-08, 2.2634e-10, 6.9732e-09, 1.4426e-09, 4.1993e-09, 2.1960e-09,\n 8.5226e-09, 3.1839e-09, 1.6365e-08, 4.1082e-09, 2.5537e-09, 1.0803e-08,\n 1.4381e-08, 2.4755e-09, 2.9315e-09, 5.1513e-09, 2.2727e-09, 7.0435e-09,\n 5.4770e-09, 1.7853e-08, 3.1051e-09, 1.3532e-09, 1.4067e-09, 2.2100e-09,\n 8.3652e-09, 1.0765e-07, 6.3289e-09, 4.3276e-09, 1.1826e-08, 2.0176e-09,\n 5.3922e-09, 3.0026e-09, 6.6695e-09, 2.3066e-09, 2.3307e-09, 7.4044e-09,\n 1.9578e-08, 8.4637e-09, 2.0877e-08, 8.8852e-09, 9.6903e-09, 1.0369e-08,\n 5.3653e-09, 1.6204e-08, 5.1605e-09, 7.3105e-09, 3.2485e-09, 4.3956e-09,\n 1.3138e-08, 7.7989e-09, 8.4038e-09, 9.6627e-09, 1.4305e-08, 4.9082e-09,\n 1.0864e-08, 3.9737e-08, 3.9735e-09, 3.8335e-09, 7.5881e-10, 1.7376e-08,\n 7.5036e-09, 3.9950e-09, 1.1654e-09, 1.4261e-08, 1.1413e-08, 5.5949e-09,\n 3.7681e-08, 1.7257e-09, 4.9204e-09, 1.7677e-08, 1.9653e-09, 1.7012e-07,\n 1.4433e-08, 4.3071e-09, 7.2685e-09, 2.6937e-09, 2.3404e-09, 6.1062e-09,\n 1.5751e-08, 9.6525e-09, 4.3810e-09, 1.8643e-08, 1.9756e-09, 3.5129e-09,\n 1.4065e-08, 6.5997e-09, 1.7600e-09, 1.3402e-09, 9.9888e-09, 2.4488e-10,\n 2.1326e-08, 2.4787e-09, 1.5515e-09, 3.6487e-09, 2.2785e-08, 7.7911e-10,\n 8.9757e-09, 8.3763e-10, 2.0745e-09, 6.4458e-09, 2.5536e-09, 3.5685e-09,\n 4.1017e-09, 1.5051e-09, 6.0398e-09, 5.0093e-09, 2.7131e-09, 4.7596e-09,\n 1.8638e-09, 1.1906e-09, 6.3162e-09, 6.4319e-09, 5.4889e-09, 6.4675e-09,\n 4.1048e-09, 2.1298e-09, 1.4477e-08, 3.7146e-09, 7.1847e-10, 1.7410e-09,\n 1.1622e-08, 3.5229e-09, 3.3209e-09, 2.8251e-09, 6.9157e-09, 8.4621e-09,\n 1.3330e-08, 3.3088e-09, 8.0337e-09, 2.2605e-08, 2.9730e-10, 3.0130e-09,\n 4.5169e-09, 1.0867e-08, 5.1330e-09, 2.2335e-09, 9.5110e-09, 3.5592e-09,\n 7.2574e-09, 2.0302e-09, 1.6109e-08, 5.3621e-09, 7.5410e-09, 4.2441e-09,\n 3.4642e-10, 1.0663e-08, 3.1538e-09, 6.4007e-09, 1.8442e-08, 2.6599e-09,\n 9.5394e-09, 1.5343e-09, 5.5619e-09, 4.0961e-09, 3.7279e-08, 7.4200e-09,\n 1.2561e-09, 2.5618e-09, 3.1344e-09, 4.5813e-09, 1.2172e-08, 8.2440e-09],\n device='cuda:0')" + }, + "63": { + "step": "tensor(8764.)", + "exp_avg": "tensor([[-8.1129e-05, 2.5758e-05, 2.3405e-05, -5.8277e-05, -9.5622e-05,\n -4.9042e-05, -7.5930e-05, -5.3108e-05, -1.1945e-04, -5.4922e-05,\n 2.2642e-05, 2.1664e-05, -7.3252e-05, -8.3464e-05, -4.8764e-05,\n 2.3659e-05, 2.1586e-05, -7.6374e-05, -5.5693e-05, 2.2420e-05,\n 2.2065e-05, -5.0040e-05, -6.7843e-05, 2.3442e-05, -8.5440e-05,\n -7.2393e-05, 2.5199e-05, -7.0456e-05, -5.8921e-05, 2.2517e-05,\n -7.3037e-05, 2.5141e-05, 2.0290e-05, 2.5256e-05, 2.2529e-05,\n 2.4461e-05, -5.8843e-05, -7.4207e-05, -5.9731e-05, -6.2200e-05,\n -8.5065e-05, -5.4797e-05, -6.5554e-05, 2.3886e-05, 2.5264e-05,\n 2.3508e-05, 2.3411e-05, 2.1484e-05, -7.2616e-05, -8.0970e-05,\n 2.2630e-05, 2.5946e-05, 2.4548e-05, 2.3428e-05, -5.9886e-05,\n -1.3652e-04, 2.2640e-05, 2.3222e-05, -5.5931e-05, 2.2407e-05,\n -6.3081e-05, 2.3450e-05, 2.2411e-05, 2.4264e-05, 2.1311e-05,\n 2.1241e-05, -5.6978e-05, -5.5017e-05, -4.8716e-05, -5.4558e-05,\n 2.0601e-05, -5.9142e-05, -5.3599e-05, -5.4296e-05, 2.0668e-05,\n 2.2191e-05, -7.1544e-05, -7.5853e-05, -5.6072e-05, -6.0555e-05,\n -8.1909e-05, -5.7326e-05, -7.4159e-05, -6.1470e-05, -5.7733e-05,\n -7.3979e-05, 2.1560e-05, -6.3786e-05, 2.5834e-05, -5.5048e-05,\n 2.2079e-05, -7.5192e-05, 2.3451e-05, -5.4157e-05, -7.1859e-05,\n -6.6366e-05, -1.0862e-04, 2.4179e-05, -6.1564e-05, -5.2966e-05,\n 2.3222e-05, -1.1676e-04, -6.4042e-05, -6.0491e-05, 2.1403e-05,\n 2.3406e-05, 2.0941e-05, 2.1611e-05, -4.9322e-05, -5.6604e-05,\n 2.1950e-05, -6.5167e-05, 2.2805e-05, 2.1443e-05, -5.2734e-05,\n -5.6556e-05, 2.2873e-05, 2.3841e-05, -5.7116e-05, 2.4557e-05,\n -4.9632e-05, 2.3748e-05, 2.6104e-05, -7.1101e-05, -4.8074e-05,\n 2.5385e-05, 1.9688e-05, 2.6032e-05, 2.3467e-05, -6.1410e-05,\n -8.0969e-05, 2.1054e-05, 2.2681e-05, 2.6146e-05, -8.1257e-05,\n 2.1038e-05, 2.3316e-05, 2.0749e-05, 2.4690e-05, 2.4887e-05,\n -6.5159e-05, 2.1001e-05, 2.1065e-05, 2.0175e-05, 2.2459e-05,\n 2.2531e-05, -8.4023e-05, -7.3042e-05, 2.5123e-05, 2.2618e-05,\n -5.5231e-05, 2.3927e-05, 2.2938e-05, 2.1610e-05, -6.2544e-05,\n -9.1934e-05, -1.4909e-04, -7.4135e-05, -4.9567e-05, -8.0645e-05,\n 2.5193e-05, -7.8019e-05, 2.2316e-05, -4.9970e-05, -6.7788e-05,\n 2.1797e-05, -5.6509e-05, 2.3572e-05, 2.1106e-05, 2.2406e-05,\n -8.4367e-05, 2.3001e-05, 1.9338e-05, 2.3434e-05, 2.3459e-05,\n -5.2027e-05, 2.2412e-05, 2.2805e-05, -4.8712e-05, 2.4613e-05,\n 2.0200e-05, 2.1962e-05, -6.8204e-05, -6.6293e-05, -6.7522e-05,\n 2.2561e-05, 2.4856e-05, -7.9370e-05, 2.0662e-05, 2.1363e-05,\n -6.7364e-05, 2.2291e-05],\n [ 7.9145e-05, -2.4785e-05, -2.2420e-05, 5.6662e-05, 9.3388e-05,\n 4.7837e-05, 7.4198e-05, 5.1791e-05, 1.1633e-04, 5.3545e-05,\n -2.1908e-05, -2.0939e-05, 7.1677e-05, 8.1546e-05, 4.7539e-05,\n -2.2819e-05, -2.0904e-05, 7.4085e-05, 5.4380e-05, -2.1674e-05,\n -2.1478e-05, 4.8909e-05, 6.5988e-05, -2.2598e-05, 8.3561e-05,\n 7.0622e-05, -2.4369e-05, 6.8734e-05, 5.7611e-05, -2.1923e-05,\n 7.1016e-05, -2.4016e-05, -1.9598e-05, -2.4378e-05, -2.1772e-05,\n -2.3668e-05, 5.7437e-05, 7.2406e-05, 5.8123e-05, 6.0756e-05,\n 8.3100e-05, 5.3470e-05, 6.4039e-05, -2.3105e-05, -2.4489e-05,\n -2.2755e-05, -2.2746e-05, -2.0870e-05, 7.0840e-05, 7.8878e-05,\n -2.1820e-05, -2.5105e-05, -2.3671e-05, -2.2633e-05, 5.8401e-05,\n 1.3304e-04, -2.1972e-05, -2.2515e-05, 5.4572e-05, -2.1567e-05,\n 6.1541e-05, -2.2691e-05, -2.1733e-05, -2.3539e-05, -2.0468e-05,\n -2.0579e-05, 5.5448e-05, 5.3720e-05, 4.7520e-05, 5.3282e-05,\n -1.9920e-05, 5.7660e-05, 5.2485e-05, 5.3058e-05, -1.9970e-05,\n -2.1465e-05, 6.9711e-05, 7.4081e-05, 5.4526e-05, 5.9102e-05,\n 8.0025e-05, 5.6019e-05, 7.2095e-05, 5.9911e-05, 5.6204e-05,\n 7.2161e-05, -2.0785e-05, 6.2427e-05, -2.5007e-05, 5.3786e-05,\n -2.1393e-05, 7.3496e-05, -2.2559e-05, 5.2931e-05, 6.9988e-05,\n 6.4728e-05, 1.0569e-04, -2.3457e-05, 6.0005e-05, 5.1562e-05,\n -2.2393e-05, 1.1364e-04, 6.2321e-05, 5.9022e-05, -2.0730e-05,\n -2.2622e-05, -2.0109e-05, -2.0919e-05, 4.8188e-05, 5.5352e-05,\n -2.1223e-05, 6.3692e-05, -2.2085e-05, -2.0674e-05, 5.1449e-05,\n 5.5363e-05, -2.2035e-05, -2.2977e-05, 5.5852e-05, -2.3432e-05,\n 4.8372e-05, -2.2962e-05, -2.5289e-05, 6.9501e-05, 4.6978e-05,\n -2.4501e-05, -1.9000e-05, -2.5134e-05, -2.2737e-05, 6.0129e-05,\n 7.9091e-05, -2.0390e-05, -2.1929e-05, -2.5321e-05, 7.9112e-05,\n -2.0295e-05, -2.2562e-05, -1.9997e-05, -2.3993e-05, -2.4075e-05,\n 6.3746e-05, -2.0239e-05, -2.0422e-05, -1.9552e-05, -2.1844e-05,\n -2.1754e-05, 8.1980e-05, 7.1358e-05, -2.4235e-05, -2.1765e-05,\n 5.3885e-05, -2.3146e-05, -2.2172e-05, -2.0835e-05, 6.1195e-05,\n 8.9795e-05, 1.4508e-04, 7.2422e-05, 4.8571e-05, 7.8749e-05,\n -2.4271e-05, 7.6241e-05, -2.1655e-05, 4.8823e-05, 6.6208e-05,\n -2.1136e-05, 5.5185e-05, -2.2835e-05, -2.0430e-05, -2.1580e-05,\n 8.2324e-05, -2.2325e-05, -1.8579e-05, -2.2698e-05, -2.2393e-05,\n 5.0829e-05, -2.1735e-05, -2.2146e-05, 4.7634e-05, -2.3831e-05,\n -1.9460e-05, -2.1094e-05, 6.6462e-05, 6.4579e-05, 6.5623e-05,\n -2.1913e-05, -2.4039e-05, 7.7556e-05, -1.9889e-05, -2.0649e-05,\n 6.5734e-05, -2.1684e-05],\n [ 5.9708e-08, -3.5812e-08, -3.9037e-08, 4.9292e-08, 7.6233e-08,\n 4.0945e-08, 5.8293e-08, 3.8740e-08, 8.5454e-08, 4.1936e-08,\n -2.8180e-08, -2.6107e-08, 5.2801e-08, 6.4476e-08, 3.8196e-08,\n -3.2143e-08, -2.5002e-08, 7.1282e-08, 4.3112e-08, -2.7647e-08,\n -2.0741e-08, 3.3878e-08, 5.7049e-08, -3.0541e-08, 6.1667e-08,\n 5.9350e-08, -3.2676e-08, 4.8670e-08, 4.0411e-08, -2.0614e-08,\n 6.1152e-08, -4.5236e-08, -2.4533e-08, -3.4699e-08, -2.8395e-08,\n -3.0439e-08, 4.7076e-08, 6.1485e-08, 4.9333e-08, 4.3606e-08,\n 6.6026e-08, 4.5489e-08, 3.9649e-08, -2.9001e-08, -2.8722e-08,\n -2.8959e-08, -2.1937e-08, -2.1070e-08, 5.1468e-08, 6.2719e-08,\n -2.8427e-08, -3.1384e-08, -3.5149e-08, -3.0409e-08, 4.9034e-08,\n 1.0087e-07, -2.4545e-08, -2.5496e-08, 3.9441e-08, -3.2117e-08,\n 5.2437e-08, -2.7408e-08, -2.3709e-08, -2.6008e-08, -3.1338e-08,\n -2.3486e-08, 4.7172e-08, 3.8383e-08, 3.9977e-08, 3.6534e-08,\n -2.4711e-08, 4.5548e-08, 3.5272e-08, 4.1511e-08, -2.4494e-08,\n -2.8497e-08, 5.9460e-08, 5.5434e-08, 4.6840e-08, 5.1168e-08,\n 6.8393e-08, 4.5978e-08, 6.5774e-08, 4.9695e-08, 4.9180e-08,\n 4.8167e-08, -2.7803e-08, 4.2633e-08, -3.0150e-08, 4.2969e-08,\n -2.5867e-08, 5.5159e-08, -3.5115e-08, 4.1693e-08, 5.6346e-08,\n 4.8801e-08, 9.0180e-08, -2.6258e-08, 5.2735e-08, 4.1301e-08,\n -3.2209e-08, 8.8536e-08, 5.2329e-08, 4.4383e-08, -2.3822e-08,\n -3.0913e-08, -3.1834e-08, -2.5892e-08, 3.3410e-08, 3.6895e-08,\n -2.5494e-08, 5.3262e-08, -2.5768e-08, -2.8441e-08, 3.9530e-08,\n 3.7353e-08, -3.1798e-08, -3.4430e-08, 4.1303e-08, -4.5787e-08,\n 4.0162e-08, -3.0410e-08, -2.9694e-08, 5.2060e-08, 3.7118e-08,\n -3.3777e-08, -2.5413e-08, -3.4197e-08, -2.6023e-08, 4.2265e-08,\n 6.1399e-08, -2.1503e-08, -2.7766e-08, -3.1012e-08, 6.5952e-08,\n -2.8968e-08, -2.7366e-08, -2.8173e-08, -2.4405e-08, -2.9037e-08,\n 4.5860e-08, -3.0009e-08, -2.1378e-08, -2.0722e-08, -2.1302e-08,\n -2.8520e-08, 5.8390e-08, 5.1585e-08, -3.2838e-08, -3.2399e-08,\n 3.9564e-08, -3.1087e-08, -2.7699e-08, -2.8320e-08, 4.3878e-08,\n 6.1597e-08, 1.3121e-07, 5.4055e-08, 2.9030e-08, 5.1807e-08,\n -3.4651e-08, 5.7753e-08, -2.4308e-08, 3.2890e-08, 5.1212e-08,\n -2.2347e-08, 4.0389e-08, -2.7055e-08, -2.5232e-08, -3.0601e-08,\n 5.8224e-08, -2.4200e-08, -2.8954e-08, -2.7372e-08, -4.3225e-08,\n 3.6285e-08, -2.3599e-08, -2.3960e-08, 3.2529e-08, -2.9660e-08,\n -2.8263e-08, -3.3802e-08, 5.4654e-08, 5.6083e-08, 5.5452e-08,\n -2.2687e-08, -3.0071e-08, 5.8072e-08, -2.9541e-08, -2.6406e-08,\n 5.7609e-08, -2.0165e-08],\n [ 1.9243e-06, -9.3742e-07, -9.4584e-07, 1.5652e-06, 2.1582e-06,\n 1.1637e-06, 1.6736e-06, 1.2788e-06, 3.0317e-06, 1.3354e-06,\n -7.0588e-07, -6.9874e-07, 1.5217e-06, 1.8539e-06, 1.1867e-06,\n -8.0791e-07, -6.5671e-07, 2.2181e-06, 1.2699e-06, -7.1807e-07,\n -5.6643e-07, 1.0972e-06, 1.7975e-06, -8.1314e-07, 1.8168e-06,\n 1.7110e-06, -7.9719e-07, 1.6734e-06, 1.2696e-06, -5.7403e-07,\n 1.9600e-06, -1.0793e-06, -6.6711e-07, -8.4354e-07, -7.2876e-07,\n -7.6291e-07, 1.3596e-06, 1.7391e-06, 1.5585e-06, 1.4002e-06,\n 1.8990e-06, 1.2819e-06, 1.4747e-06, -7.5205e-07, -7.4682e-07,\n -7.2432e-07, -6.4296e-07, -5.9355e-07, 1.7247e-06, 2.0288e-06,\n -7.8175e-07, -8.1017e-07, -8.4185e-07, -7.6502e-07, 1.4363e-06,\n 3.3835e-06, -6.4380e-07, -6.8149e-07, 1.3191e-06, -8.0768e-07,\n 1.4872e-06, -7.3111e-07, -6.5414e-07, -6.9862e-07, -8.1209e-07,\n -6.3839e-07, 1.4833e-06, 1.2586e-06, 1.1561e-06, 1.2393e-06,\n -6.5608e-07, 1.4372e-06, 1.0786e-06, 1.1966e-06, -6.7334e-07,\n -6.9686e-07, 1.7731e-06, 1.7159e-06, 1.4994e-06, 1.4016e-06,\n 1.8157e-06, 1.2614e-06, 1.9987e-06, 1.5095e-06, 1.4801e-06,\n 1.7705e-06, -7.4670e-07, 1.3161e-06, -7.9634e-07, 1.2191e-06,\n -6.5942e-07, 1.6409e-06, -8.5627e-07, 1.1841e-06, 1.8142e-06,\n 1.5901e-06, 2.8465e-06, -6.9608e-07, 1.5059e-06, 1.3632e-06,\n -7.9635e-07, 3.0319e-06, 1.6691e-06, 1.4253e-06, -6.4890e-07,\n -7.5265e-07, -8.0016e-07, -6.6539e-07, 1.1006e-06, 1.2159e-06,\n -7.0107e-07, 1.4221e-06, -6.9453e-07, -7.4055e-07, 1.2456e-06,\n 1.1564e-06, -8.0678e-07, -8.2964e-07, 1.2227e-06, -1.0795e-06,\n 1.2193e-06, -7.5589e-07, -7.8541e-07, 1.5484e-06, 1.0584e-06,\n -8.5016e-07, -6.6283e-07, -8.6372e-07, -7.0332e-07, 1.2390e-06,\n 1.8164e-06, -6.4250e-07, -7.2398e-07, -7.9379e-07, 2.0789e-06,\n -7.1394e-07, -7.2716e-07, -7.2433e-07, -6.7280e-07, -7.8313e-07,\n 1.3673e-06, -7.3225e-07, -6.2175e-07, -6.0198e-07, -5.9326e-07,\n -7.4788e-07, 1.9853e-06, 1.6329e-06, -8.5452e-07, -8.2025e-07,\n 1.3061e-06, -7.4994e-07, -7.3825e-07, -7.4678e-07, 1.3053e-06,\n 2.0776e-06, 3.8695e-06, 1.6581e-06, 9.6665e-07, 1.8436e-06,\n -8.8743e-07, 1.7207e-06, -6.3602e-07, 1.1149e-06, 1.5295e-06,\n -6.3831e-07, 1.2834e-06, -7.0982e-07, -6.5024e-07, -7.9580e-07,\n 1.9852e-06, -6.5269e-07, -7.2946e-07, -7.0841e-07, -1.0224e-06,\n 1.1623e-06, -6.5363e-07, -6.3465e-07, 1.0459e-06, -7.5185e-07,\n -7.1152e-07, -8.3424e-07, 1.6873e-06, 1.6580e-06, 1.8431e-06,\n -6.2499e-07, -7.8651e-07, 1.7556e-06, -7.4349e-07, -6.8798e-07,\n 1.5730e-06, -5.8605e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.4708e-07, 2.8266e-08, 2.5939e-08, 9.7210e-08, 1.7958e-07, 5.7616e-08,\n 1.1561e-07, 6.9297e-08, 3.5600e-07, 7.5062e-08, 1.8717e-08, 1.8296e-08,\n 9.7919e-08, 1.3657e-07, 6.0150e-08, 2.3599e-08, 1.6847e-08, 1.8237e-07,\n 6.7177e-08, 1.9539e-08, 1.2572e-08, 5.1427e-08, 1.2411e-07, 2.3787e-08,\n 1.3358e-07, 1.2038e-07, 2.2117e-08, 1.1731e-07, 6.7680e-08, 1.3321e-08,\n 1.4616e-07, 3.1748e-08, 1.7074e-08, 2.3631e-08, 1.9990e-08, 2.1075e-08,\n 7.7653e-08, 1.2164e-07, 9.6984e-08, 8.2410e-08, 1.4577e-07, 6.9007e-08,\n 9.1883e-08, 2.1150e-08, 2.0206e-08, 2.0021e-08, 1.5266e-08, 1.3782e-08,\n 1.2056e-07, 1.6196e-07, 2.0721e-08, 2.3734e-08, 2.5148e-08, 2.1490e-08,\n 8.4380e-08, 4.4245e-07, 1.6124e-08, 1.7518e-08, 7.2997e-08, 2.4331e-08,\n 9.0483e-08, 1.9688e-08, 1.6709e-08, 1.8002e-08, 2.3487e-08, 1.5931e-08,\n 8.7669e-08, 6.6637e-08, 5.7013e-08, 6.5757e-08, 1.7030e-08, 8.4347e-08,\n 5.0072e-08, 6.1682e-08, 1.7185e-08, 1.9114e-08, 1.2668e-07, 1.1961e-07,\n 9.1411e-08, 8.0711e-08, 1.2965e-07, 6.7136e-08, 1.5077e-07, 9.3909e-08,\n 8.9095e-08, 1.3140e-07, 2.1266e-08, 7.2829e-08, 2.1084e-08, 6.4154e-08,\n 1.6797e-08, 1.1114e-07, 2.6420e-08, 6.0021e-08, 1.2837e-07, 1.0524e-07,\n 3.0320e-07, 1.7421e-08, 9.3773e-08, 7.6922e-08, 2.3222e-08, 3.5469e-07,\n 1.0914e-07, 8.2651e-08, 1.6156e-08, 2.1028e-08, 2.1890e-08, 1.7042e-08,\n 5.1776e-08, 6.3084e-08, 1.8325e-08, 8.2703e-08, 1.7572e-08, 2.0375e-08,\n 6.6283e-08, 5.8081e-08, 2.3101e-08, 2.5206e-08, 6.3651e-08, 3.1448e-08,\n 6.2317e-08, 2.0712e-08, 2.2150e-08, 9.9729e-08, 4.8652e-08, 2.4233e-08,\n 1.7105e-08, 2.6386e-08, 1.8017e-08, 6.6010e-08, 1.3478e-07, 1.5401e-08,\n 2.0357e-08, 2.2461e-08, 1.6761e-07, 1.9391e-08, 1.9532e-08, 1.9868e-08,\n 1.6432e-08, 2.1381e-08, 7.9327e-08, 2.0783e-08, 1.4870e-08, 1.3783e-08,\n 1.3541e-08, 2.0361e-08, 1.6197e-07, 1.0969e-07, 2.4190e-08, 2.1562e-08,\n 7.2724e-08, 2.0762e-08, 2.0090e-08, 2.0848e-08, 7.2291e-08, 1.7506e-07,\n 5.4797e-07, 1.1343e-07, 4.1944e-08, 1.4281e-07, 2.3111e-08, 1.2057e-07,\n 1.5138e-08, 5.4040e-08, 9.6994e-08, 1.5035e-08, 6.9320e-08, 1.8692e-08,\n 1.6324e-08, 2.0954e-08, 1.6325e-07, 1.6461e-08, 2.0614e-08, 1.9277e-08,\n 2.8718e-08, 5.8371e-08, 1.5572e-08, 1.5440e-08, 4.8868e-08, 2.1204e-08,\n 1.9529e-08, 2.3175e-08, 1.1596e-07, 1.1235e-07, 1.3112e-07, 1.5255e-08,\n 2.1703e-08, 1.2227e-07, 2.0600e-08, 1.7866e-08, 1.0109e-07, 1.3601e-08],\n [1.4563e-07, 2.6121e-08, 2.4757e-08, 9.2199e-08, 1.7602e-07, 5.0940e-08,\n 1.0964e-07, 6.5696e-08, 3.5713e-07, 7.1465e-08, 1.5844e-08, 1.5447e-08,\n 9.3648e-08, 1.3265e-07, 5.5183e-08, 1.9904e-08, 1.3960e-08, 1.7950e-07,\n 6.0769e-08, 1.5929e-08, 1.1291e-08, 4.7500e-08, 1.2026e-07, 2.0065e-08,\n 1.2723e-07, 1.1029e-07, 1.9533e-08, 1.1247e-07, 6.3849e-08, 1.1442e-08,\n 1.4409e-07, 3.1097e-08, 1.4156e-08, 2.1644e-08, 1.6214e-08, 1.8074e-08,\n 7.0084e-08, 1.1323e-07, 9.2266e-08, 7.9715e-08, 1.3781e-07, 6.1441e-08,\n 8.8733e-08, 1.7546e-08, 1.7589e-08, 1.6018e-08, 1.4101e-08, 1.2100e-08,\n 1.1721e-07, 1.5867e-07, 1.8854e-08, 2.0141e-08, 2.0871e-08, 1.7973e-08,\n 7.6023e-08, 4.4363e-07, 1.3615e-08, 1.4909e-08, 6.8894e-08, 1.9402e-08,\n 8.2970e-08, 1.6850e-08, 1.3924e-08, 1.6007e-08, 1.9359e-08, 1.3311e-08,\n 8.2377e-08, 6.3475e-08, 4.9888e-08, 6.2974e-08, 1.3675e-08, 7.7913e-08,\n 4.8471e-08, 5.5111e-08, 1.4748e-08, 1.5071e-08, 1.1718e-07, 1.1308e-07,\n 8.5316e-08, 7.3608e-08, 1.2405e-07, 6.0920e-08, 1.4682e-07, 8.7862e-08,\n 8.0174e-08, 1.2852e-07, 1.6945e-08, 7.0728e-08, 1.9972e-08, 5.5993e-08,\n 1.3814e-08, 1.0451e-07, 2.1564e-08, 5.3602e-08, 1.2405e-07, 1.0095e-07,\n 3.0249e-07, 1.5965e-08, 8.6229e-08, 7.3054e-08, 1.8953e-08, 3.5598e-07,\n 1.0425e-07, 8.0161e-08, 1.3622e-08, 1.7319e-08, 1.8786e-08, 1.4125e-08,\n 4.8509e-08, 5.8933e-08, 1.5652e-08, 7.6132e-08, 1.5918e-08, 1.6974e-08,\n 6.0072e-08, 5.5207e-08, 1.9527e-08, 2.0684e-08, 5.8066e-08, 3.0911e-08,\n 5.4748e-08, 1.7630e-08, 1.9271e-08, 9.4827e-08, 4.4447e-08, 2.1984e-08,\n 1.3870e-08, 2.2428e-08, 1.6127e-08, 6.3212e-08, 1.2982e-07, 1.4047e-08,\n 1.6297e-08, 1.9555e-08, 1.6323e-07, 1.5638e-08, 1.6627e-08, 1.6105e-08,\n 1.5179e-08, 1.9267e-08, 7.4356e-08, 1.6146e-08, 1.2965e-08, 1.2317e-08,\n 1.2247e-08, 1.7513e-08, 1.5908e-07, 1.0592e-07, 2.2248e-08, 1.9815e-08,\n 6.9359e-08, 1.7358e-08, 1.6985e-08, 1.7460e-08, 6.7435e-08, 1.7214e-07,\n 5.4890e-07, 1.0780e-07, 4.0147e-08, 1.3867e-07, 2.3029e-08, 1.1383e-07,\n 1.3386e-08, 5.0850e-08, 9.0160e-08, 1.3829e-08, 6.5181e-08, 1.5969e-08,\n 1.3627e-08, 1.8947e-08, 1.5845e-07, 1.3983e-08, 1.6026e-08, 1.5764e-08,\n 2.7983e-08, 5.4202e-08, 1.4092e-08, 1.3448e-08, 4.4422e-08, 1.7397e-08,\n 1.5297e-08, 2.0110e-08, 1.0586e-07, 1.0443e-07, 1.2890e-07, 1.2996e-08,\n 1.9314e-08, 1.1897e-07, 1.7021e-08, 1.5113e-08, 9.4019e-08, 1.1795e-08],\n [5.0320e-10, 6.5190e-10, 3.4336e-10, 1.6735e-09, 1.0634e-09, 2.2130e-09,\n 1.8428e-09, 1.1447e-09, 1.9654e-10, 1.1277e-09, 9.6714e-10, 9.8245e-10,\n 1.2562e-09, 1.1393e-09, 1.5921e-09, 1.2681e-09, 9.9458e-10, 1.0292e-09,\n 2.1264e-09, 1.2616e-09, 3.9485e-10, 1.2553e-09, 1.3024e-09, 1.2875e-09,\n 1.9577e-09, 3.3131e-09, 8.4150e-10, 1.5725e-09, 1.1936e-09, 6.2153e-10,\n 7.7438e-10, 1.7470e-10, 1.0147e-09, 6.0371e-10, 1.3301e-09, 1.0141e-09,\n 2.4679e-09, 2.7514e-09, 1.5528e-09, 8.0451e-10, 2.5136e-09, 2.5210e-09,\n 9.9591e-10, 1.2458e-09, 8.6307e-10, 1.4034e-09, 3.4142e-10, 5.4309e-10,\n 1.0455e-09, 1.1163e-09, 6.0167e-10, 1.2358e-09, 1.4827e-09, 1.2166e-09,\n 2.8016e-09, 2.7433e-10, 8.5123e-10, 8.8915e-10, 1.3099e-09, 1.7411e-09,\n 2.4673e-09, 9.6973e-10, 9.5845e-10, 6.3923e-10, 1.4535e-09, 8.9871e-10,\n 1.7720e-09, 9.7823e-10, 2.3813e-09, 8.4641e-10, 1.1775e-09, 2.1449e-09,\n 4.4886e-10, 2.1421e-09, 8.2070e-10, 1.4136e-09, 3.1268e-09, 2.1167e-09,\n 2.0344e-09, 2.3241e-09, 1.6885e-09, 2.0203e-09, 1.3391e-09, 1.9424e-09,\n 3.0173e-09, 9.8123e-10, 1.5299e-09, 6.0460e-10, 3.1039e-10, 2.7105e-09,\n 1.0363e-09, 2.0915e-09, 1.7100e-09, 2.1055e-09, 1.4259e-09, 1.3393e-09,\n 5.5771e-10, 4.4373e-10, 2.4597e-09, 1.2499e-09, 1.4977e-09, 1.6152e-10,\n 1.6259e-09, 7.5413e-10, 8.6928e-10, 1.2922e-09, 1.0729e-09, 1.0081e-09,\n 1.0233e-09, 1.3081e-09, 9.1344e-10, 2.0794e-09, 5.1193e-10, 1.1860e-09,\n 2.0499e-09, 8.4899e-10, 1.2418e-09, 1.5683e-09, 1.8108e-09, 1.4861e-10,\n 2.5640e-09, 1.0530e-09, 9.6152e-10, 1.5114e-09, 1.3255e-09, 7.0333e-10,\n 1.1366e-09, 1.3546e-09, 6.0189e-10, 8.1365e-10, 1.4812e-09, 4.0940e-10,\n 1.4236e-09, 9.6737e-10, 1.4137e-09, 1.3243e-09, 9.9005e-10, 1.3254e-09,\n 3.7263e-10, 6.7391e-10, 1.5451e-09, 1.6466e-09, 6.2535e-10, 4.6289e-10,\n 3.9280e-10, 9.6393e-10, 9.5403e-10, 1.1502e-09, 5.9564e-10, 5.5470e-10,\n 1.0440e-09, 1.1641e-09, 1.0736e-09, 1.1756e-09, 1.5170e-09, 9.4996e-10,\n 3.7446e-10, 1.7637e-09, 5.0273e-10, 1.3526e-09, 3.6052e-11, 2.1272e-09,\n 5.6753e-10, 9.7804e-10, 2.1876e-09, 3.5669e-10, 1.3137e-09, 9.2211e-10,\n 9.2811e-10, 6.5099e-10, 1.5764e-09, 8.3915e-10, 1.6367e-09, 1.2200e-09,\n 2.0463e-10, 1.3160e-09, 4.6426e-10, 6.5515e-10, 1.3991e-09, 1.3265e-09,\n 1.5056e-09, 1.0458e-09, 3.3891e-09, 2.5928e-09, 8.0086e-10, 7.6096e-10,\n 7.7694e-10, 9.7817e-10, 1.2513e-09, 9.4480e-10, 2.2241e-09, 5.9096e-10],\n [5.5546e-10, 7.0429e-10, 3.7242e-10, 1.8195e-09, 1.1669e-09, 2.4163e-09,\n 2.0041e-09, 1.2403e-09, 2.5548e-10, 1.2221e-09, 1.0568e-09, 1.0702e-09,\n 1.3635e-09, 1.2417e-09, 1.7304e-09, 1.3854e-09, 1.0858e-09, 1.1293e-09,\n 2.3181e-09, 1.3814e-09, 4.2713e-10, 1.3612e-09, 1.4185e-09, 1.4049e-09,\n 2.1324e-09, 3.6193e-09, 9.1513e-10, 1.7065e-09, 1.2941e-09, 6.7485e-10,\n 8.4832e-10, 1.9174e-10, 1.1070e-09, 6.5452e-10, 1.4564e-09, 1.1057e-09,\n 2.6940e-09, 3.0015e-09, 1.6872e-09, 8.7220e-10, 2.7382e-09, 2.7539e-09,\n 1.0797e-09, 1.3619e-09, 9.4049e-10, 1.5410e-09, 3.6925e-10, 5.8833e-10,\n 1.1357e-09, 1.2164e-09, 6.5081e-10, 1.3476e-09, 1.6238e-09, 1.3289e-09,\n 3.0598e-09, 3.4846e-10, 9.2764e-10, 9.6774e-10, 1.4202e-09, 1.9106e-09,\n 2.6906e-09, 1.0552e-09, 1.0450e-09, 6.9290e-10, 1.5902e-09, 9.7994e-10,\n 1.9273e-09, 1.0596e-09, 2.6019e-09, 9.1622e-10, 1.2888e-09, 2.3362e-09,\n 4.8722e-10, 2.3380e-09, 8.9170e-10, 1.5526e-09, 3.4125e-09, 2.3018e-09,\n 2.2140e-09, 2.5362e-09, 1.8470e-09, 2.2020e-09, 1.4611e-09, 2.1118e-09,\n 3.2973e-09, 1.0675e-09, 1.6775e-09, 6.5636e-10, 3.3571e-10, 2.9646e-09,\n 1.1322e-09, 2.2767e-09, 1.8730e-09, 2.2966e-09, 1.5504e-09, 1.4525e-09,\n 6.3541e-10, 4.7978e-10, 2.6829e-09, 1.3552e-09, 1.6403e-09, 2.1746e-10,\n 1.7682e-09, 8.1880e-10, 9.4652e-10, 1.4141e-09, 1.1693e-09, 1.1008e-09,\n 1.1079e-09, 1.4183e-09, 9.9367e-10, 2.2729e-09, 5.5403e-10, 1.2943e-09,\n 2.2325e-09, 9.1908e-10, 1.3547e-09, 1.7174e-09, 1.9710e-09, 1.6423e-10,\n 2.8015e-09, 1.1495e-09, 1.0457e-09, 1.6408e-09, 1.4407e-09, 7.6044e-10,\n 1.2426e-09, 1.4761e-09, 6.5195e-10, 8.8182e-10, 1.6086e-09, 4.4285e-10,\n 1.5606e-09, 1.0520e-09, 1.5380e-09, 1.4511e-09, 1.0786e-09, 1.4506e-09,\n 4.0299e-10, 7.2972e-10, 1.6782e-09, 1.8085e-09, 6.7865e-10, 5.0098e-10,\n 4.2491e-10, 1.0490e-09, 1.0403e-09, 1.2478e-09, 6.4374e-10, 6.0005e-10,\n 1.1304e-09, 1.2750e-09, 1.1698e-09, 1.2820e-09, 1.6479e-09, 1.0366e-09,\n 4.7488e-10, 1.9146e-09, 5.4384e-10, 1.4689e-09, 4.3632e-11, 2.3142e-09,\n 6.1550e-10, 1.0585e-09, 2.3839e-09, 3.8579e-10, 1.4243e-09, 1.0056e-09,\n 1.0117e-09, 7.0480e-10, 1.7127e-09, 9.1321e-10, 1.7970e-09, 1.3354e-09,\n 2.2491e-10, 1.4275e-09, 5.0246e-10, 7.1229e-10, 1.5197e-09, 1.4517e-09,\n 1.6517e-09, 1.1394e-09, 3.7038e-09, 2.8252e-09, 8.7459e-10, 8.2739e-10,\n 8.4204e-10, 1.0646e-09, 1.3674e-09, 1.0287e-09, 2.4309e-09, 6.4098e-10]],\n device='cuda:0')" + }, + "64": { + "step": "tensor(8764.)", + "exp_avg": "tensor([-1.0022e-04, 9.1315e-05, 4.3022e-07, 8.4782e-06], device='cuda:0')", + "exp_avg_sq": "tensor([3.4793e-06, 1.6147e-06, 6.5147e-07, 7.2997e-07], device='cuda:0')" + }, + "8": { + "step": "tensor(5008.)", + "exp_avg": "tensor([[ 2.2026e-05, 1.0837e-05, -9.8469e-05, ..., 3.4734e-06,\n -8.5054e-05, -2.8411e-05],\n [ 4.4075e-05, 2.0821e-06, -2.5971e-05, ..., 1.8803e-05,\n 2.1034e-05, 4.8602e-05],\n [ 3.2149e-05, 1.1848e-05, 3.1866e-05, ..., 3.7310e-05,\n -1.2639e-05, -9.6436e-05],\n ...,\n [ 1.5751e-05, 4.5288e-05, 1.8740e-05, ..., -1.3570e-05,\n -4.0530e-05, 2.0636e-05],\n [-3.1709e-05, -7.5370e-08, 1.2094e-05, ..., -5.2284e-06,\n -6.8160e-05, 1.4215e-05],\n [ 2.0331e-06, -7.3965e-06, 4.6246e-05, ..., -1.6489e-06,\n -9.4747e-05, -4.1315e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.9588e-09, 2.3214e-08, 5.6263e-08, ..., 2.3123e-09, 9.6309e-09,\n 7.2946e-08],\n [2.0583e-08, 3.6157e-08, 5.3405e-08, ..., 2.4621e-08, 7.1300e-09,\n 5.7138e-08],\n [2.8127e-08, 1.2817e-08, 5.0826e-08, ..., 3.2937e-08, 3.0511e-08,\n 4.2882e-08],\n ...,\n [2.7137e-08, 1.5768e-08, 1.2838e-08, ..., 1.9897e-08, 1.9304e-08,\n 1.7743e-08],\n [5.5132e-08, 4.5061e-09, 3.1350e-08, ..., 1.8997e-08, 2.5366e-08,\n 2.1149e-08],\n [1.6197e-08, 8.7051e-09, 3.1158e-08, ..., 3.9071e-08, 2.3041e-08,\n 4.0823e-08]], device='cuda:0')" + }, + "9": { + "step": "tensor(5008.)", + "exp_avg": "tensor([ 0.0017, 0.0027, 0.0015, ..., 0.0050, 0.0019, -0.0014],\n device='cuda:0')", + "exp_avg_sq": "tensor([0.0002, 0.0001, 0.0001, ..., 0.0001, 0.0002, 0.0001], device='cuda:0')" + }, + "10": { + "step": "tensor(5008.)", + "exp_avg": "tensor([[-5.1325e-05, -2.0662e-06, -2.2541e-05, ..., 1.2044e-06,\n -6.7534e-06, -1.0828e-05],\n [-5.5853e-05, -8.9652e-06, 8.1658e-06, ..., -1.2224e-05,\n -4.3790e-05, -8.8943e-06],\n [ 9.9957e-06, 9.1311e-06, 1.0366e-05, ..., -3.7105e-05,\n -2.6595e-05, -3.3775e-05],\n ...,\n [-8.1585e-06, 5.3982e-06, 2.6113e-06, ..., 3.0303e-07,\n -2.4207e-05, 3.6584e-05],\n [-6.7666e-06, 1.6835e-05, -2.8455e-06, ..., 1.0128e-05,\n 4.7035e-05, 2.6106e-05],\n [ 3.8645e-05, -4.3955e-05, -3.3548e-05, ..., -4.5270e-06,\n -5.8664e-05, 2.4450e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.9043e-09, 5.4980e-09, 4.7750e-09, ..., 4.7593e-09, 6.6356e-09,\n 6.0983e-09],\n [1.0914e-08, 1.0507e-08, 8.3895e-09, ..., 8.5103e-09, 9.9646e-09,\n 9.1243e-09],\n [1.1858e-08, 1.0945e-08, 8.5909e-09, ..., 8.5790e-09, 9.2586e-09,\n 6.7363e-09],\n ...,\n [1.1645e-08, 1.1506e-08, 8.8693e-09, ..., 1.1950e-08, 1.1004e-08,\n 8.1777e-09],\n [1.1200e-08, 1.1475e-08, 1.0706e-08, ..., 1.0075e-08, 1.2553e-08,\n 7.9785e-09],\n [8.2573e-09, 1.0166e-08, 1.0105e-08, ..., 8.1296e-09, 1.4099e-08,\n 7.6924e-09]], device='cuda:0')" + }, + "11": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[-5.8089e-06, -9.8414e-08, 6.2881e-05, ..., 2.8323e-05,\n 1.8704e-05, 5.7127e-05],\n [-2.7605e-05, -7.1525e-05, 5.6445e-05, ..., -1.0535e-04,\n -1.3358e-04, 7.3036e-05],\n [ 1.9314e-06, -5.5069e-05, -6.3870e-05, ..., -4.0371e-05,\n 1.1539e-04, -6.6656e-05],\n ...,\n [-8.7640e-05, 2.3626e-05, -2.6679e-05, ..., 5.1500e-05,\n 8.8727e-05, -3.6993e-05],\n [ 5.4981e-05, 4.1414e-05, 8.0054e-05, ..., -4.9969e-06,\n 1.4944e-05, 1.6040e-04],\n [-2.0933e-05, -1.1633e-04, -3.7081e-05, ..., 9.2681e-06,\n 3.6271e-05, 7.4506e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.7190e-08, 3.8504e-08, 3.2454e-08, ..., 3.0680e-08, 2.8038e-08,\n 3.1144e-08],\n [2.6641e-08, 8.9789e-08, 3.7377e-08, ..., 3.9602e-08, 1.9419e-08,\n 3.4643e-08],\n [1.5079e-08, 3.0209e-08, 3.6162e-08, ..., 2.4427e-08, 2.0358e-08,\n 2.4175e-08],\n ...,\n [2.3592e-08, 1.0878e-07, 3.7020e-08, ..., 1.9758e-08, 1.7294e-08,\n 6.4391e-08],\n [1.7943e-08, 1.9880e-08, 3.7161e-08, ..., 3.3322e-08, 2.8275e-08,\n 3.2171e-08],\n [2.7976e-08, 5.7795e-08, 3.4527e-08, ..., 4.7846e-08, 2.3135e-08,\n 3.7807e-08]], device='cuda:0')" + }, + "12": { + "step": "tensor(1252.)", + "exp_avg": "tensor([ 0.0021, 0.0030, -0.0029, ..., 0.0025, 0.0042, 0.0040],\n device='cuda:0')", + "exp_avg_sq": "tensor([0.0001, 0.0001, 0.0001, ..., 0.0001, 0.0001, 0.0002], device='cuda:0')" + }, + "13": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[-6.0818e-05, -1.9123e-05, -4.6660e-05, ..., 1.8691e-05,\n -3.1524e-05, 2.1436e-06],\n [-4.1868e-05, -8.0115e-05, 3.2377e-06, ..., -2.2567e-05,\n -8.8056e-07, 7.8103e-05],\n [-9.0789e-05, -4.8492e-05, -3.8058e-05, ..., -3.0477e-05,\n -3.9445e-05, -2.4518e-05],\n ...,\n [ 5.7012e-05, 9.4080e-05, 8.9873e-05, ..., 1.0344e-04,\n 5.3887e-05, -5.2039e-05],\n [-3.3498e-05, -2.8296e-05, -6.6885e-05, ..., -1.1132e-04,\n -1.6599e-05, 7.2749e-05],\n [-6.2066e-06, -7.8242e-05, -3.6638e-05, ..., 5.5840e-05,\n -5.7617e-05, -6.8017e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.6511e-08, 1.3475e-08, 1.3215e-08, ..., 1.8946e-08, 1.3057e-08,\n 1.2491e-08],\n [2.7112e-08, 2.2342e-08, 1.8242e-08, ..., 4.1702e-08, 2.5966e-08,\n 2.5568e-08],\n [3.1109e-08, 1.9905e-08, 1.8729e-08, ..., 3.8010e-08, 2.3600e-08,\n 2.7267e-08],\n ...,\n [2.9447e-08, 3.0751e-08, 2.3179e-08, ..., 4.7008e-08, 2.5003e-08,\n 3.0408e-08],\n [2.7589e-08, 2.5540e-08, 2.0024e-08, ..., 4.0408e-08, 2.6197e-08,\n 2.9843e-08],\n [3.1138e-08, 2.9515e-08, 1.9494e-08, ..., 4.1642e-08, 2.5231e-08,\n 2.7049e-08]], device='cuda:0')" + } + }, + "param_groups": [ + { + "lr": 0.001, + "name": "shared", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.001, + "params": [ + 0, + 1 + ] + }, + { + "lr": 0.001, + "name": "scale_256", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.001, + "params": [ + 2, + 3, + 4 + ] + }, + { + "lr": 0.001, + "name": "scale_512", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.001, + "params": [ + 5, + 6, + 7 + ] + }, + { + "lr": 0.001, + "name": "scale_768", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.001, + "params": [ + 8, + 9, + 10 + ] + }, + { + "lr": 0.001, + "name": "scale_1024", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.001, + "params": [ + 11, + 12, + 13 + ] + }, + { + "lr": 0.0005, + "name": "fusion", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0005, + "params": [ + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64 + ] + } + ] + }, + "scheduler_state_dict": { + "T_0": 10, + "T_i": 20, + "T_mult": 2, + "eta_min": 1e-06, + "T_cur": 0, + "base_lrs": [ + 0.001, + 0.001, + 0.001, + 0.001, + 0.001, + 0.0005 + ], + "last_epoch": 10, + "_step_count": 0, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 0.001, + 0.001, + 0.001, + 0.001, + 0.001, + 0.0005 + ] + }, + "metrics": { + "val_acc": 75.414 + }, + "train_config": { + "name": "david_training", + "run_id": "20251012_065325", + "dataset_name": "AbstractPhil/imagenet-clip-features-orderly", + "model_variant": "clip_vit_laion_b32", + "num_classes": 1000, + "preset": "balanced", + "custom_config_path": null, + "num_classes_override": null, + "use_belly_override": null, + "belly_expand_override": null, + "progressive_training_override": true, + "scale_warmup_epochs_override": null, + "num_epochs": 20, + "batch_size": 1024, + "learning_rate": 0.001, + "weight_decay": 1e-05, + "warmup_epochs": 3, + "use_rose_loss": true, + "rose_initial_weight": 0.1, + "rose_max_weight": 0.5, + "rose_weight_schedule": "adaptive", + "use_cayley_loss": false, + "cayley_weight": 0.001, + "scale_loss_balance": null, + "use_mixed_precision": false, + "gradient_clip": 10.0, + "scheduler_type": "cosine_restarts", + "min_lr": 1e-06, + "freeze_strategy": "never", + "freeze_threshold": 90.0, + "unfreeze_on_plateau": true, + "patience": 10, + "track_gradients": true, + "gradient_scale_threshold": 1e-05, + "gradient_scale_multiplier": 10.0, + "log_interval": 50, + "val_interval": 1, + "save_interval": 5, + "log_fusion_weights": true, + "log_loss_components": true, + "save_format": "safetensors", + "hf_repo": "AbstractPhil/gated-david", + "upload_to_hub": true, + "base_dir": "./david_training", + "num_workers": 10, + "pin_memory": true, + "prefetch_factor": 4, + "persistent_workers": true + } +} \ No newline at end of file