AbstractPhil commited on
Commit
2fbf28b
·
verified ·
1 Parent(s): 0f7fd70

Update best_model_acc72.23_metadata.json - Run 20251012_151647

Browse files
weights/David-hierarchical-progressive/20251012_151647/best_model_acc72.23_metadata.json ADDED
@@ -0,0 +1,311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0,
3
+ "optimizer_state_dict": {
4
+ "state": {
5
+ "0": {
6
+ "step": "tensor(1252.)",
7
+ "exp_avg": "tensor([[-4.8183e-06, 1.0666e-03, 4.1198e-04, ..., 9.8579e-05,\n 1.8150e-04, -5.4795e-04],\n [ 1.8415e-04, 6.5962e-05, -4.8868e-04, ..., 6.9019e-05,\n -1.6130e-04, 1.3819e-04],\n [ 2.9061e-04, -1.4908e-04, 8.6728e-04, ..., -4.5100e-04,\n -6.2113e-04, -4.7785e-04],\n ...,\n [ 6.8926e-04, -1.8458e-03, -1.2421e-03, ..., -1.1689e-03,\n -7.6148e-04, 1.2734e-04],\n [-1.5868e-05, -6.0552e-05, 5.7719e-04, ..., 1.5706e-04,\n -2.0654e-04, -3.2987e-04],\n [-2.3350e-04, -2.2969e-04, 1.2349e-05, ..., -5.4028e-04,\n -1.5818e-04, 9.5613e-05]], device='cuda:0')",
8
+ "exp_avg_sq": "tensor([[1.8122e-06, 8.7557e-06, 5.1889e-06, ..., 1.3598e-06, 1.0871e-06,\n 1.9845e-06],\n [1.5415e-06, 9.6196e-06, 4.8851e-06, ..., 1.4519e-06, 1.1277e-06,\n 1.3194e-06],\n [2.0603e-06, 1.7414e-05, 5.8751e-06, ..., 1.3652e-06, 9.5615e-07,\n 1.2532e-06],\n ...,\n [1.7877e-06, 1.2368e-05, 6.6789e-06, ..., 1.7635e-06, 1.1760e-06,\n 1.3032e-06],\n [2.2154e-06, 1.0915e-05, 5.4817e-06, ..., 1.6799e-06, 8.5314e-07,\n 1.8566e-06],\n [2.7144e-06, 2.3533e-05, 5.6403e-06, ..., 1.7484e-06, 1.3521e-06,\n 1.3974e-06]], device='cuda:0')"
9
+ },
10
+ "1": {
11
+ "step": "tensor(1252.)",
12
+ "exp_avg": "tensor([ 2.3275e-03, 1.7118e-02, 3.7952e-03, -1.5698e-02, -8.3095e-03,\n 1.2969e-02, 1.6404e-02, -1.0515e-02, 4.4411e-03, -6.7647e-03,\n 8.7093e-04, 9.0920e-04, -1.4033e-03, 6.7936e-03, 2.9491e-03,\n -4.5173e-03, 7.2931e-03, 4.9542e-03, -2.1742e-03, -1.2011e-02,\n 3.0726e-02, -1.8202e-03, -1.3228e-02, 3.2758e-02, -3.7159e-04,\n -7.1880e-03, 1.4959e-02, -3.6842e-02, 1.7450e-02, -1.8402e-02,\n 1.1438e-02, 4.2237e-04, 2.0433e-02, 7.9259e-03, -1.5926e-02,\n 6.9534e-03, 1.3850e-02, 2.6327e-02, 5.5051e-03, -5.5014e-03,\n 8.8344e-03, -2.2902e-03, -2.0491e-02, -1.7748e-02, 1.9588e-04,\n -1.6913e-03, -5.0844e-03, -2.0932e-02, -7.5530e-03, -9.5691e-03,\n -1.1051e-02, 9.5446e-03, -6.1309e-03, -1.0008e-03, -2.3905e-02,\n -1.5545e-02, 1.3597e-02, 2.7701e-02, 1.0147e-05, -7.1804e-03,\n 2.2232e-02, -4.6711e-04, -6.0588e-03, -1.9245e-02, 8.3206e-03,\n -1.7427e-02, -1.6078e-02, -1.2670e-02, -1.0904e-02, 1.8272e-03,\n -7.4802e-03, -1.4394e-02, 9.5546e-03, 1.9042e-03, 1.0108e-02,\n -8.1402e-03, -7.8581e-03, 3.5459e-02, 2.6205e-02, 6.0998e-03,\n 1.4657e-02, 7.2008e-03, 8.8139e-03, 1.2258e-03, -6.4981e-03,\n -2.8231e-03, -2.6517e-03, 3.6082e-03, -2.4425e-03, -3.8429e-03,\n -1.1696e-02, 2.1480e-02, 1.9102e-02, -8.4397e-03, -3.7475e-04,\n 7.8406e-04, 3.1044e-03, 1.1383e-02, -1.9682e-02, 6.4505e-03,\n -1.8621e-03, 1.9183e-02, -2.4493e-03, -2.5000e-02, -2.2812e-02,\n 7.1688e-03, 1.6550e-02, 1.3567e-02, -1.9895e-02, -1.0538e-02,\n -4.4396e-03, -1.3276e-02, 1.2398e-02, 7.0993e-03, 3.3207e-03,\n -4.7289e-03, 9.3566e-03, -6.3257e-05, 3.5622e-03, 8.8143e-03,\n -5.6554e-03, -4.5060e-03, -9.3839e-03, 5.3141e-03, -7.1501e-03,\n 1.0424e-02, 1.5400e-02, 4.8962e-03, 1.3516e-02, -7.3115e-03,\n -4.8250e-03, 1.1620e-02, 4.4564e-04, -1.3401e-02, -7.6306e-03,\n -1.4345e-02, 3.4591e-03, -3.7536e-03, 2.0306e-03, -8.9670e-03,\n -1.5388e-02, -5.8553e-03, -1.8910e-02, -1.2472e-02, 8.8384e-03,\n -3.3439e-03, -6.9081e-03, -2.2757e-02, 1.4385e-02, -2.1136e-04,\n 1.7050e-02, 2.5597e-02, -7.4378e-03, 5.5001e-03, -2.3214e-03,\n 1.6886e-03, 1.7479e-02, 1.1872e-02, -4.6276e-03, 4.4573e-03,\n -1.2697e-02, -5.2116e-03, -1.2595e-02, 5.9972e-03, -1.2194e-02,\n -1.1917e-02, -8.6362e-03, -1.2391e-02, 9.3388e-03, 5.9917e-03,\n 1.6248e-02, 1.4317e-02, 5.8798e-04, -5.4439e-03, -2.8378e-03,\n 2.1266e-02, 1.8338e-02, 2.2011e-02, -1.4033e-02, 6.1318e-03,\n -1.4412e-02, -9.8200e-03, 2.5071e-03, -3.7107e-02, 4.2517e-03,\n 9.8787e-03, 7.5059e-03, 1.8725e-03, 2.5662e-02, -5.7139e-03,\n -8.5026e-03, -8.7890e-03, 1.0944e-02, 6.3656e-03, -1.1896e-02,\n 2.3792e-03, -6.9530e-03, -2.3071e-03, 2.2349e-02, -9.7613e-03,\n -7.9214e-03, 9.4951e-03, -2.1660e-03, -2.9820e-03, -4.2749e-03,\n -4.7317e-03, 9.9317e-03, 1.9790e-02, -4.5623e-03, 1.7008e-02,\n -1.9035e-02, 1.2290e-03, 1.5125e-02, -1.6148e-02, -1.3051e-02,\n 2.7691e-02, -5.5720e-03, 2.5280e-02, -8.1468e-03, -8.4773e-03,\n 4.6246e-03, 1.3691e-02, 2.0728e-02, 3.9066e-03, 2.0879e-02,\n 4.1512e-03, 6.8997e-03, 1.0381e-03, -4.6051e-04, -1.5086e-02,\n 4.5507e-03, -3.4062e-03, -5.4300e-03, -1.5262e-03, 4.0735e-03,\n -1.2801e-02, 3.3788e-02, 6.0429e-03, -2.7706e-02, 1.8673e-02,\n -1.7924e-02, -1.1831e-03, 7.1841e-03, -5.0564e-03, 5.4075e-03,\n 1.0160e-02, -9.6108e-03, 1.0101e-02, 2.0533e-02, 2.3493e-03,\n -7.0075e-03, -1.2117e-02, 5.6459e-03, -3.2287e-03, -5.6623e-03,\n -1.2497e-02, -3.7936e-02, -2.2812e-04, -1.0482e-02, 2.2327e-02,\n -1.9654e-02, 2.8268e-02, 1.2738e-02, -7.0683e-03, -2.5341e-02,\n -1.8444e-02, -1.2402e-02, 7.9091e-03, 9.9999e-03, 9.8552e-03,\n -4.2370e-03, 3.0149e-04, 7.4613e-03, -6.0895e-03, -1.9623e-02,\n -1.0861e-03, -5.3652e-03, 1.9756e-02, 2.2248e-02, -5.1280e-03,\n -1.1305e-02, -2.3801e-02, 1.4324e-02, 6.5659e-03, 1.7273e-02,\n -1.5654e-02, 2.9309e-02, -2.1752e-03, 1.4591e-02, 2.0962e-02,\n 1.0669e-02, -5.8365e-03, 7.8430e-03, 4.9935e-03, 4.1962e-03,\n 1.3774e-02, -3.8230e-03, 2.9372e-02, 2.2185e-02, 3.9797e-03,\n 4.1679e-03, -1.2275e-02, 5.4732e-03, -5.4906e-03, -2.2192e-03,\n 1.8218e-02, -1.5541e-03, 6.0309e-04, -1.9565e-02, -1.1105e-02,\n -1.1965e-02, 3.9547e-04, 1.3978e-02, 3.8516e-03, 4.2796e-02,\n -2.4647e-03, -3.8515e-03, -2.3704e-02, 2.4797e-03, -3.9364e-04,\n -1.5003e-03, -1.2531e-02, 4.9218e-03, 1.8187e-03, 2.4133e-03,\n -2.5667e-02, -2.2206e-04, -1.3522e-03, 1.0924e-02, 1.9130e-03,\n -1.3575e-04, 1.4297e-02, -4.3963e-04, 1.8602e-02, 9.4617e-03,\n 2.6880e-04, -3.3227e-03, -2.2332e-02, -8.5390e-03, 1.7242e-02,\n 4.6232e-03, 4.8347e-03, 6.6211e-04, -6.7180e-03, -7.0527e-03,\n -1.3263e-03, -1.3789e-03, 7.8770e-03, 3.5011e-03, -2.7351e-02,\n 1.2465e-02, -5.2055e-03, 7.5128e-03, 1.3304e-02, -2.3124e-03,\n 1.7991e-02, -9.8971e-03, -1.0263e-02, 1.7533e-02, 5.1205e-03,\n 1.1203e-02, 1.2650e-03, -9.0690e-03, -9.4441e-03, -1.5076e-02,\n 1.3566e-02, 2.5868e-03, 7.4622e-03, 2.8058e-03, 1.7487e-02,\n -4.2005e-04, -3.4705e-05, -1.1801e-02, 2.9818e-03, 1.0088e-02,\n 7.2703e-03, -1.3763e-02, 9.8473e-03, -1.0636e-02, 7.2898e-03,\n -1.7659e-02, 1.0327e-02, 1.9597e-02, -5.9022e-03, 5.2531e-03,\n 9.5003e-03, 6.9129e-03, -8.8550e-03, 7.3866e-04, -7.1001e-03,\n -1.5953e-02, 2.9767e-02, -1.4091e-02, 7.9041e-03, 8.3753e-03,\n 9.1991e-03, -1.5588e-02, 9.2907e-03, -7.2932e-03, -7.3928e-03,\n 1.0656e-02, -4.9072e-03, -3.8570e-03, -3.4310e-03, 7.8473e-03,\n -6.0912e-03, 1.1986e-03, 7.0073e-03, -1.8781e-02, 1.0323e-02,\n -1.8611e-02, -4.7180e-03, 7.1313e-03, 1.6572e-02, 1.9958e-02,\n -6.9711e-04, -9.9040e-03, 2.7528e-03, 3.0631e-02, 7.4959e-03,\n -9.4068e-03, -3.4749e-03, -1.0385e-02, -3.0017e-04, -9.3955e-03,\n -1.1563e-02, 1.0811e-02, -2.7360e-02, -4.2035e-04, 3.9240e-03,\n -2.1177e-02, 1.7277e-02, -3.0988e-02, -1.2555e-02, 1.8773e-02,\n -9.4003e-03, -5.0014e-03, -2.8842e-03, -1.1194e-02, 1.2013e-02,\n -4.5424e-03, -1.2599e-02, 2.4414e-04, -1.0703e-02, 1.1200e-02,\n -1.0527e-02, -2.1957e-02, 5.6453e-04, 8.3437e-03, -2.8670e-03,\n -2.3640e-04, 4.3825e-03, 5.8232e-03, -2.7071e-02, 6.0121e-03,\n 7.0678e-03, 1.2817e-02, -6.1651e-03, -1.2683e-02, -1.6375e-03,\n -5.7250e-03, -6.0799e-03, 1.4152e-02, -5.0650e-03, 4.9593e-03,\n -2.3152e-02, 1.4635e-02, -1.9262e-02, -3.2112e-03, -6.3466e-03,\n -3.4124e-03, -7.2078e-03, 9.2928e-03, 1.2600e-02, -1.1368e-03,\n 9.0731e-03, -1.3807e-03, -1.1486e-02, 1.1238e-02, 1.8285e-02,\n 1.6726e-02, 3.7121e-03, 4.6503e-04, 1.8140e-02, 1.6573e-03,\n 5.1180e-03, -2.0880e-03, -1.9281e-02, -3.5396e-03, -1.3321e-02,\n -3.1586e-03, 8.5835e-04, 8.4042e-03, -1.0547e-02, 2.6928e-02,\n -1.8459e-02, -1.2526e-02, -1.9057e-02, 1.9545e-02, -3.1439e-02,\n 8.3415e-03, 9.2051e-03, 5.3283e-03, 2.0628e-02, 1.8290e-03,\n 1.8023e-02, 1.0037e-02, -2.0146e-02, 1.3094e-02, 2.1487e-02,\n -1.0009e-02, -5.4824e-03], device='cuda:0')",
13
+ "exp_avg_sq": "tensor([0.0015, 0.0015, 0.0016, 0.0018, 0.0016, 0.0020, 0.0020, 0.0015, 0.0022,\n 0.0018, 0.0021, 0.0018, 0.0020, 0.0015, 0.0019, 0.0020, 0.0020, 0.0022,\n 0.0027, 0.0021, 0.0020, 0.0017, 0.0017, 0.0015, 0.0020, 0.0017, 0.0021,\n 0.0015, 0.0020, 0.0021, 0.0020, 0.0022, 0.0031, 0.0015, 0.0020, 0.0012,\n 0.0017, 0.0019, 0.0016, 0.0016, 0.0019, 0.0017, 0.0018, 0.0016, 0.0014,\n 0.0022, 0.0020, 0.0026, 0.0022, 0.0022, 0.0019, 0.0020, 0.0018, 0.0014,\n 0.0015, 0.0014, 0.0013, 0.0026, 0.0017, 0.0022, 0.0016, 0.0020, 0.0013,\n 0.0014, 0.0025, 0.0026, 0.0020, 0.0025, 0.0029, 0.0024, 0.0015, 0.0018,\n 0.0013, 0.0017, 0.0024, 0.0013, 0.0020, 0.0017, 0.0024, 0.0021, 0.0016,\n 0.0018, 0.0016, 0.0017, 0.0019, 0.0030, 0.0021, 0.0019, 0.0021, 0.0019,\n 0.0018, 0.0019, 0.0021, 0.0016, 0.0014, 0.0019, 0.0020, 0.0020, 0.0018,\n 0.0019, 0.0016, 0.0019, 0.0002, 0.0016, 0.0020, 0.0024, 0.0022, 0.0015,\n 0.0021, 0.0013, 0.0026, 0.0027, 0.0019, 0.0017, 0.0020, 0.0016, 0.0022,\n 0.0026, 0.0016, 0.0019, 0.0020, 0.0016, 0.0016, 0.0019, 0.0020, 0.0018,\n 0.0033, 0.0019, 0.0017, 0.0023, 0.0018, 0.0020, 0.0013, 0.0017, 0.0018,\n 0.0019, 0.0020, 0.0016, 0.0017, 0.0021, 0.0013, 0.0017, 0.0019, 0.0016,\n 0.0015, 0.0017, 0.0018, 0.0019, 0.0029, 0.0021, 0.0020, 0.0028, 0.0013,\n 0.0021, 0.0019, 0.0021, 0.0016, 0.0017, 0.0017, 0.0018, 0.0021, 0.0021,\n 0.0019, 0.0013, 0.0019, 0.0015, 0.0020, 0.0017, 0.0020, 0.0023, 0.0024,\n 0.0016, 0.0013, 0.0015, 0.0017, 0.0016, 0.0017, 0.0021, 0.0016, 0.0013,\n 0.0019, 0.0006, 0.0017, 0.0023, 0.0013, 0.0015, 0.0014, 0.0019, 0.0020,\n 0.0013, 0.0021, 0.0020, 0.0015, 0.0016, 0.0014, 0.0020, 0.0015, 0.0024,\n 0.0018, 0.0018, 0.0022, 0.0025, 0.0021, 0.0014, 0.0021, 0.0021, 0.0018,\n 0.0024, 0.0017, 0.0019, 0.0021, 0.0021, 0.0016, 0.0014, 0.0028, 0.0018,\n 0.0023, 0.0017, 0.0018, 0.0017, 0.0029, 0.0014, 0.0016, 0.0023, 0.0014,\n 0.0016, 0.0019, 0.0021, 0.0015, 0.0016, 0.0018, 0.0021, 0.0016, 0.0022,\n 0.0017, 0.0025, 0.0025, 0.0021, 0.0017, 0.0016, 0.0017, 0.0017, 0.0017,\n 0.0019, 0.0018, 0.0024, 0.0020, 0.0021, 0.0025, 0.0018, 0.0018, 0.0017,\n 0.0024, 0.0015, 0.0013, 0.0017, 0.0017, 0.0025, 0.0018, 0.0016, 0.0010,\n 0.0015, 0.0012, 0.0017, 0.0024, 0.0010, 0.0019, 0.0014, 0.0017, 0.0019,\n 0.0015, 0.0010, 0.0014, 0.0016, 0.0013, 0.0019, 0.0013, 0.0017, 0.0019,\n 0.0016, 0.0017, 0.0016, 0.0015, 0.0022, 0.0019, 0.0016, 0.0012, 0.0021,\n 0.0025, 0.0025, 0.0028, 0.0016, 0.0024, 0.0019, 0.0018, 0.0023, 0.0017,\n 0.0019, 0.0022, 0.0019, 0.0012, 0.0023, 0.0020, 0.0019, 0.0014, 0.0018,\n 0.0018, 0.0020, 0.0020, 0.0017, 0.0022, 0.0021, 0.0023, 0.0017, 0.0019,\n 0.0024, 0.0019, 0.0020, 0.0031, 0.0017, 0.0012, 0.0019, 0.0013, 0.0015,\n 0.0017, 0.0020, 0.0013, 0.0020, 0.0017, 0.0018, 0.0024, 0.0019, 0.0014,\n 0.0020, 0.0017, 0.0016, 0.0020, 0.0023, 0.0023, 0.0021, 0.0025, 0.0018,\n 0.0023, 0.0019, 0.0013, 0.0018, 0.0022, 0.0011, 0.0017, 0.0013, 0.0019,\n 0.0016, 0.0018, 0.0013, 0.0012, 0.0021, 0.0022, 0.0017, 0.0021, 0.0026,\n 0.0015, 0.0019, 0.0021, 0.0019, 0.0014, 0.0017, 0.0020, 0.0016, 0.0019,\n 0.0020, 0.0024, 0.0015, 0.0024, 0.0020, 0.0016, 0.0022, 0.0016, 0.0016,\n 0.0021, 0.0023, 0.0021, 0.0016, 0.0018, 0.0021, 0.0016, 0.0020, 0.0018,\n 0.0016, 0.0017, 0.0021, 0.0018, 0.0019, 0.0017, 0.0018, 0.0017, 0.0020,\n 0.0022, 0.0018, 0.0020, 0.0014, 0.0018, 0.0015, 0.0021, 0.0020, 0.0019,\n 0.0020, 0.0020, 0.0016, 0.0019, 0.0022, 0.0017, 0.0016, 0.0023, 0.0017,\n 0.0019, 0.0017, 0.0016, 0.0014, 0.0016, 0.0025, 0.0022, 0.0020, 0.0021,\n 0.0021, 0.0018, 0.0022, 0.0011, 0.0018, 0.0014, 0.0013, 0.0023, 0.0015,\n 0.0019, 0.0014, 0.0023, 0.0020, 0.0015, 0.0018, 0.0020, 0.0014, 0.0023,\n 0.0023, 0.0020, 0.0020, 0.0018, 0.0021, 0.0020, 0.0029, 0.0021, 0.0014,\n 0.0016, 0.0023, 0.0020, 0.0018, 0.0013, 0.0017, 0.0019, 0.0020, 0.0015,\n 0.0016, 0.0020, 0.0018, 0.0017, 0.0015, 0.0020, 0.0017, 0.0030, 0.0017,\n 0.0016, 0.0021, 0.0019, 0.0022, 0.0014, 0.0013, 0.0019, 0.0023, 0.0022,\n 0.0019, 0.0023, 0.0020, 0.0019, 0.0016, 0.0016, 0.0015, 0.0017, 0.0015,\n 0.0021, 0.0018, 0.0015, 0.0021, 0.0022, 0.0014, 0.0019, 0.0013, 0.0017,\n 0.0029, 0.0024, 0.0012, 0.0018, 0.0032, 0.0018, 0.0016, 0.0018, 0.0026,\n 0.0015, 0.0017, 0.0018, 0.0017, 0.0014, 0.0018, 0.0017, 0.0020],\n device='cuda:0')"
14
+ },
15
+ "2": {
16
+ "step": "tensor(1252.)",
17
+ "exp_avg": "tensor([[-2.2530e-04, -2.3007e-04, 1.4135e-04, ..., 3.1445e-04,\n -4.5599e-04, 7.0257e-05],\n [-4.0263e-04, 2.3347e-04, -1.3849e-04, ..., -3.7586e-05,\n 4.9673e-04, 9.8327e-05],\n [ 8.3669e-05, -1.1584e-04, -2.0701e-04, ..., -1.2114e-04,\n 3.8151e-04, -1.0200e-04],\n ...,\n [ 8.8221e-06, -2.8005e-05, 7.9927e-04, ..., -2.4346e-04,\n -5.3018e-05, -1.8086e-04],\n [ 9.9453e-05, 1.2974e-04, 5.8602e-05, ..., -3.6481e-05,\n 1.8713e-04, 3.6225e-04],\n [-1.7890e-04, 5.6560e-05, -1.4582e-04, ..., 7.0127e-05,\n -3.5613e-04, -1.1996e-04]], device='cuda:0')",
18
+ "exp_avg_sq": "tensor([[1.0156e-06, 1.1219e-06, 1.1176e-06, ..., 1.3431e-06, 8.5119e-07,\n 1.0890e-06],\n [1.9144e-06, 1.4360e-06, 1.6939e-06, ..., 1.3735e-06, 1.1588e-06,\n 1.5997e-06],\n [1.1541e-06, 1.2541e-06, 1.5433e-06, ..., 1.6103e-06, 1.2980e-06,\n 1.2706e-06],\n ...,\n [1.3588e-06, 1.2978e-06, 1.7178e-06, ..., 1.4866e-06, 1.3245e-06,\n 1.4711e-06],\n [1.7115e-06, 1.4226e-06, 1.6237e-06, ..., 1.4127e-06, 1.2446e-06,\n 1.6055e-06],\n [1.8338e-06, 1.3482e-06, 1.8662e-06, ..., 1.9583e-06, 1.2927e-06,\n 1.8932e-06]], device='cuda:0')"
19
+ },
20
+ "3": {
21
+ "step": "tensor(1252.)",
22
+ "exp_avg": "tensor([[-2.9063e-05, 9.0805e-05, 2.9705e-05, ..., 2.7188e-04,\n -5.2732e-05, -1.4932e-05],\n [-3.8454e-05, 2.0493e-06, 1.0052e-04, ..., 3.0046e-06,\n 7.9280e-06, -4.7728e-05],\n [ 3.9151e-09, -1.9087e-08, -4.5557e-08, ..., 1.8629e-09,\n 5.9162e-07, 1.7546e-04],\n ...,\n [-1.2513e-04, -5.6202e-05, -2.6410e-05, ..., -3.2025e-05,\n 4.8805e-05, 3.4097e-06],\n [ 1.2192e-04, 5.4437e-06, 6.5960e-05, ..., -2.2777e-06,\n 2.6620e-04, 1.2477e-04],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')",
23
+ "exp_avg_sq": "tensor([[2.0102e-07, 7.2902e-07, 3.3892e-08, ..., 3.5575e-07, 4.8263e-07,\n 3.3151e-08],\n [1.9546e-07, 4.2647e-08, 2.6964e-08, ..., 4.6199e-08, 1.4018e-07,\n 3.7827e-07],\n [5.8972e-09, 6.6470e-09, 1.8414e-09, ..., 8.5277e-11, 5.3437e-09,\n 4.3754e-07],\n ...,\n [6.6237e-07, 4.7441e-07, 9.5843e-07, ..., 3.4596e-07, 5.5325e-07,\n 5.2647e-07],\n [4.3982e-07, 2.2938e-07, 3.8161e-07, ..., 4.3921e-07, 6.5262e-07,\n 1.0189e-06],\n [1.9061e-11, 1.3535e-10, 3.7251e-17, ..., 8.7060e-13, 5.9655e-14,\n 9.9479e-11]], device='cuda:0')"
24
+ },
25
+ "4": {
26
+ "step": "tensor(1252.)",
27
+ "exp_avg": "tensor([ 1.9894e-03, 8.2508e-03, 2.1645e-04, ..., -9.1418e-03,\n 2.9486e-03, 3.2287e-41], device='cuda:0')",
28
+ "exp_avg_sq": "tensor([1.0148e-04, 8.3960e-04, 1.1252e-04, ..., 1.1969e-03, 1.1332e-03,\n 1.5886e-07], device='cuda:0')"
29
+ },
30
+ "5": {
31
+ "step": "tensor(1252.)",
32
+ "exp_avg": "tensor([[-3.2987e-05, -6.5892e-05, 1.2190e-05, ..., -7.4330e-05,\n -7.6971e-05, -5.6052e-45],\n [-9.7684e-06, 7.0812e-05, -4.5811e-06, ..., 6.8275e-06,\n 1.4526e-04, 5.6052e-45],\n [-4.3676e-05, -9.3502e-05, 6.1758e-06, ..., 1.9671e-04,\n 1.3874e-04, 5.6052e-45],\n ...,\n [ 6.4789e-05, -7.9088e-05, -1.1856e-06, ..., 1.6524e-04,\n 1.6191e-07, -1.6816e-44],\n [-3.7226e-05, 1.1028e-04, 9.7595e-06, ..., -3.3604e-05,\n -1.0009e-04, 1.2612e-44],\n [-4.8055e-05, -1.5428e-04, -2.5882e-06, ..., -1.4445e-04,\n 1.1399e-04, -5.6052e-45]], device='cuda:0')",
33
+ "exp_avg_sq": "tensor([[1.0627e-08, 1.3717e-07, 1.1398e-08, ..., 2.1197e-07, 2.3996e-07,\n 1.1281e-12],\n [1.3330e-08, 1.3555e-07, 1.2715e-08, ..., 2.8493e-07, 2.4378e-07,\n 9.6134e-14],\n [1.2887e-08, 9.9602e-08, 7.9870e-09, ..., 2.3108e-07, 2.1670e-07,\n 2.5679e-12],\n ...,\n [1.2007e-08, 1.6316e-07, 3.6921e-08, ..., 2.6917e-07, 2.4554e-07,\n 3.8305e-13],\n [1.2990e-08, 2.1341e-07, 1.0314e-08, ..., 2.7317e-07, 2.6842e-07,\n 1.7391e-13],\n [1.1952e-08, 1.1635e-07, 1.5528e-08, ..., 2.3739e-07, 2.4039e-07,\n 1.8973e-14]], device='cuda:0')"
34
+ },
35
+ "6": {
36
+ "step": "tensor(1252.)",
37
+ "exp_avg": "tensor([[ 3.0438e-04, -6.1331e-05, -7.1216e-04, ..., -2.0327e-04,\n -1.0735e-04, 1.4158e-04],\n [ 4.7016e-04, 2.0363e-05, -8.6237e-04, ..., 3.5750e-07,\n -4.3073e-05, -3.8756e-04],\n [ 1.1098e-04, -2.4293e-04, -7.7781e-04, ..., -1.6144e-04,\n 1.7782e-04, -6.1524e-05],\n ...,\n [ 1.1314e-04, 6.9165e-04, -1.7368e-04, ..., -1.5741e-04,\n 6.8166e-05, -1.9639e-05],\n [-3.1919e-05, 2.2053e-04, -5.6613e-05, ..., 3.8456e-04,\n 6.8254e-05, -2.4889e-05],\n [ 4.0235e-05, 4.3375e-05, 6.7642e-04, ..., 7.6500e-05,\n -9.7459e-05, 7.5371e-05]], device='cuda:0')",
38
+ "exp_avg_sq": "tensor([[6.0864e-07, 8.9552e-07, 1.0663e-06, ..., 2.9813e-07, 1.7216e-07,\n 2.2044e-07],\n [5.3989e-07, 9.8343e-07, 7.9613e-07, ..., 2.3137e-07, 1.6983e-07,\n 2.5895e-07],\n [7.8990e-07, 1.4367e-06, 1.1563e-06, ..., 3.9094e-07, 2.3337e-07,\n 3.2127e-07],\n ...,\n [6.0111e-07, 1.1086e-06, 1.1099e-06, ..., 3.2440e-07, 2.3010e-07,\n 2.3059e-07],\n [8.5607e-07, 1.3417e-06, 1.3604e-06, ..., 3.2054e-07, 2.3754e-07,\n 2.7523e-07],\n [1.1300e-06, 1.7741e-06, 1.8672e-06, ..., 4.7027e-07, 2.5373e-07,\n 7.2766e-07]], device='cuda:0')"
39
+ },
40
+ "7": {
41
+ "step": "tensor(1252.)",
42
+ "exp_avg": "tensor([-3.1290e-03, -1.0839e-02, 4.4347e-03, 2.1561e-04, 3.4243e-03,\n 5.4761e-03, 3.4715e-03, 2.8365e-03, 3.7082e-03, -9.1690e-03,\n 1.7548e-03, 6.9845e-03, -1.9522e-03, -7.8453e-03, 4.3774e-03,\n 5.1743e-04, 4.0792e-03, -9.1154e-03, 6.4104e-03, 4.8187e-03,\n 3.6185e-03, -1.6241e-02, -6.4970e-03, 5.7287e-03, -9.2112e-03,\n -9.5677e-03, 7.1277e-03, 7.3680e-03, -1.9756e-04, -4.4746e-03,\n 2.5317e-03, -2.4515e-03, -6.9580e-03, -3.8703e-03, 7.6704e-03,\n -3.1365e-04, 1.3443e-03, 7.1168e-05, 8.3198e-03, 1.8483e-03,\n 4.1124e-03, 1.4538e-02, 2.7159e-03, 4.5591e-03, -6.4540e-04,\n -6.2158e-03, -1.1082e-03, -5.8815e-04, -6.1031e-03, 7.4937e-03,\n -2.5806e-03, -3.4831e-03, 6.8023e-03, -7.6657e-03, -9.7198e-04,\n 1.5265e-03, 3.9266e-03, 6.8905e-03, -5.2033e-03, 4.6418e-03,\n 6.2224e-04, 2.7593e-03, 2.9342e-03, -5.1328e-03, -1.3321e-03,\n 2.3811e-03, -3.8518e-03, -1.1252e-02, -8.3743e-03, 5.9291e-03,\n -7.0977e-03, -4.1220e-03, 4.8386e-03, -6.2870e-03, 9.7492e-04,\n -8.2005e-04, 9.5278e-03, -2.0831e-04, -4.6898e-03, 1.6252e-03,\n 7.7169e-03, -3.3244e-03, 1.3706e-02, -7.8063e-04, -4.0751e-03,\n -5.0373e-04, 8.0062e-04, -1.9335e-03, -6.8021e-05, 5.3671e-03,\n -5.3745e-04, 8.0252e-03, 4.9127e-03, -4.2787e-03, 5.8770e-04,\n -1.0117e-02, 9.6045e-03, -4.1492e-03, 9.7187e-03, 6.0861e-03,\n -1.7587e-03, -1.2121e-02, -1.3740e-03, -2.5843e-04, -4.1720e-03,\n 1.8137e-03, 2.0609e-03, -3.2516e-03, 3.0716e-03, -2.0894e-04,\n -7.6776e-03, 3.7219e-04, -1.3307e-03, 1.0652e-03, 3.1150e-03,\n -5.5033e-03, -1.0104e-02, 5.7005e-03, -4.3637e-03, -6.7368e-04,\n -8.3367e-03, -1.0661e-02, -8.0799e-05, 9.8957e-03, -9.7153e-04,\n 3.8149e-04, 3.6488e-03, 8.6099e-03, 1.0030e-02, 8.8952e-04,\n 4.9368e-03, 1.7137e-03, -5.7159e-03, -7.8301e-03, -5.9587e-03,\n -3.8230e-03, -1.1283e-02, 6.0542e-05, 5.5236e-03, 6.0309e-04,\n 2.7035e-03, -3.1683e-03, 2.8612e-03, 1.6418e-05, -5.7541e-03,\n -1.1405e-02, 4.9373e-04, -3.5078e-03, -2.1429e-03, 5.1427e-03,\n -1.9865e-03, 4.6640e-03, 5.3591e-03, -5.7253e-03, 4.2628e-03,\n 2.3007e-03, -2.1914e-03, -6.2742e-03, 1.1828e-03, -3.9402e-03,\n 9.1025e-03, 8.4348e-03, 6.4404e-03, 1.4480e-03, 2.2003e-03,\n 1.0055e-03, -7.3317e-03, -6.6420e-03, 4.9784e-03, -3.6981e-03,\n -1.4068e-02, 5.9558e-03, -9.6833e-04, -9.2150e-03, -8.5673e-03,\n 1.4603e-03, -6.1410e-03, -1.0240e-03, -2.8317e-03, -6.9385e-04,\n 2.6847e-03, 8.1624e-03, -1.2251e-03, 6.1483e-03, -4.8383e-04,\n 3.5365e-03, -5.9150e-03, -2.8597e-03, 1.1097e-02, 1.5960e-03,\n -2.4443e-03, -1.1518e-02, -1.4362e-03, -3.2762e-03, -1.4370e-03,\n -2.7091e-03, -4.8744e-03, -2.8366e-03, 1.0204e-02, -2.4393e-03,\n 1.0735e-02, 3.9891e-03, 1.9101e-03, -5.4370e-03, -5.1682e-03,\n -4.7371e-03, -3.6399e-03, -1.9006e-03, 1.9850e-03, 1.2342e-02,\n -1.0125e-02, -6.5521e-03, 1.3220e-02, 4.7359e-03, 5.8811e-03,\n -5.0488e-04, 1.8445e-03, -5.1886e-03, -9.2281e-03, -7.8122e-03,\n 1.5978e-03, 4.3430e-03, 7.8992e-03, 1.1823e-02, 6.9462e-04,\n -4.3527e-03, 4.5454e-03, -1.9668e-03, -4.4083e-03, -5.6419e-03,\n 2.5676e-03, -3.6266e-03, -1.1142e-02, -1.5301e-03, 4.2844e-03,\n -1.9913e-03, -7.6892e-03, -5.6131e-03, 6.8052e-03, -7.3671e-03,\n -7.3679e-03, 3.5898e-03, -4.0303e-03, -2.9117e-03, -2.1303e-03,\n -9.6192e-03, -6.3396e-03, 5.0744e-04, -1.5104e-03, -6.1110e-03,\n 1.2617e-03, 5.3939e-03, 1.7779e-02, -7.5813e-04, -1.9042e-04,\n 4.3293e-04, -5.2831e-03, 4.6258e-03, 4.7083e-04, -7.4097e-03,\n 6.1843e-03, 4.0118e-04, 1.1072e-02, -1.8976e-03, 2.6547e-03,\n 6.8480e-03, 4.0891e-03, 1.1359e-03, 1.1594e-02, -2.8672e-03,\n 4.3172e-03, -1.1305e-03, 9.9707e-03, -8.1990e-03, 1.4892e-03,\n 8.5483e-03, 5.9545e-04, -1.5423e-03, 1.7811e-04, -1.8679e-03,\n -9.7825e-03, 4.5867e-03, -2.6765e-03, 7.8935e-03, -5.7646e-03,\n 7.2343e-04, -8.4117e-03, 7.8227e-03, -1.2249e-04, -6.3760e-03,\n 1.6040e-03, 5.1553e-03, 2.9040e-03, -1.9731e-03, -5.9545e-03,\n 6.6730e-04, -3.2945e-03, 8.4818e-03, -2.7212e-03, -2.5170e-03,\n 6.7904e-03, 2.1152e-03, -3.5527e-03, 1.2403e-02, 1.0805e-03,\n 2.4836e-03, -9.3564e-03, -1.2332e-03, 7.8264e-03, 5.3713e-03,\n -2.2991e-03, -4.9742e-03, -6.1052e-03, -1.0511e-03, -1.4478e-02,\n 1.9470e-03, 1.9868e-03, -8.9314e-03, 1.6084e-03, 9.7022e-04,\n 2.0716e-03, 3.8894e-03, 6.4146e-03, -8.3757e-04, 1.0114e-03,\n 5.1089e-03, 2.9937e-03, 2.1089e-03, -1.7629e-03, 5.1550e-03,\n -2.3736e-03, 6.9281e-03, -9.6416e-03, -4.4031e-03, -5.7859e-03,\n 5.9968e-03, 1.3310e-03, -1.3689e-03, 8.4722e-03, -3.5751e-03,\n -2.8553e-03, 3.9831e-03, -3.9839e-03, 4.1198e-03, -1.5266e-03,\n -3.5276e-03, -2.7174e-03, 9.2619e-04, 4.8522e-04, -4.7064e-03,\n -3.5643e-03, 3.9271e-04, -4.0441e-03, -9.6931e-04, 7.9239e-03,\n 7.2553e-03, 1.3769e-02, 5.3930e-03, -4.0284e-03, -1.8410e-03,\n -1.0353e-03, 1.4277e-03, 2.3184e-03, 3.1377e-03, -1.1024e-03,\n 9.6850e-03, 1.4721e-03, 5.4423e-03, 5.6410e-04, 1.2994e-03,\n -1.2254e-03, 1.0252e-03, -3.5951e-03, -3.5564e-03, 6.0008e-03,\n -7.1421e-03, -1.8391e-03, -6.8941e-04, 3.5054e-03, 2.1451e-03,\n -2.9658e-03, 3.6568e-03, -1.4232e-02, 1.9787e-03, -1.0132e-02,\n 2.2974e-03, -1.4174e-02, -3.5325e-03, -5.9175e-03, 3.9237e-03,\n 2.1589e-03, 2.2651e-03, -3.5600e-03, -4.1728e-04, 9.7893e-03,\n 8.8899e-03, -5.4193e-03, -4.0363e-03, -1.1398e-02, 2.1282e-03,\n -8.4429e-03, 8.5148e-03, -1.0498e-02, 2.1518e-03, -3.0429e-03,\n 1.1719e-03, -2.3191e-03, 9.8006e-04, -5.0818e-03, -1.5183e-02,\n -2.7076e-03, -7.0799e-03, 2.6461e-03, -3.0265e-03, 3.2407e-03,\n 3.6699e-03, -3.9715e-03, 2.3994e-04, 1.4482e-03, 3.0002e-04,\n -2.2123e-03, 5.2146e-03, 4.0532e-03, 1.4514e-03, 1.5579e-03,\n 1.2975e-03, -5.7587e-03, 5.6404e-04, 5.9509e-03, -1.1382e-02,\n 4.7414e-04, 2.8470e-03, 9.6303e-03, -2.8329e-03, 2.3641e-03,\n 3.2310e-03, 2.3510e-03, 1.0626e-03, -2.1143e-03, 2.2973e-03,\n 5.0321e-03, -1.2766e-03, -2.2012e-03, 3.5521e-03, -9.1439e-03,\n -6.9207e-03, 4.7288e-03, -5.3414e-03, 6.9938e-04, -8.9239e-03,\n -5.9861e-03, 1.6808e-04, 1.9123e-03, -7.8491e-03, -3.2882e-03,\n 4.1029e-03, -1.9990e-03, -7.9232e-03, -4.8292e-03, -1.1446e-02,\n -1.6599e-03, 5.3135e-03, 6.0018e-03, 5.0868e-03, -5.9694e-04,\n 1.3968e-03, -1.4763e-02, 7.6078e-03, 5.0342e-03, -2.9885e-03,\n -1.5857e-03, 5.6393e-03, 4.0594e-03, 2.7200e-04, 5.0404e-03,\n 7.8816e-03, -6.8926e-04, -7.1740e-03, -2.6018e-03, 9.1243e-03,\n -1.0542e-03, 1.2427e-03, -1.3331e-05, 4.9243e-03, -1.6277e-03,\n 2.0584e-03, 4.0486e-03, -6.8104e-03, 5.5210e-03, 5.8179e-04,\n 1.4997e-03, -3.1499e-03, 1.4409e-02, -7.4148e-04, 7.1499e-03,\n -9.0447e-03, 7.9663e-03, 2.7462e-03, 2.6999e-03, 2.8239e-03,\n -1.0697e-02, 1.1279e-03, -5.9091e-03, 2.1055e-03, 1.7239e-03,\n -4.7988e-04, -1.5006e-03, -7.9349e-04, 3.2204e-03, 1.4288e-03,\n 9.0803e-04, 3.0310e-03], device='cuda:0')",
43
+ "exp_avg_sq": "tensor([0.0003, 0.0003, 0.0004, 0.0004, 0.0004, 0.0004, 0.0004, 0.0004, 0.0004,\n 0.0004, 0.0003, 0.0006, 0.0003, 0.0005, 0.0003, 0.0004, 0.0004, 0.0005,\n 0.0004, 0.0003, 0.0006, 0.0004, 0.0003, 0.0004, 0.0005, 0.0004, 0.0004,\n 0.0005, 0.0004, 0.0004, 0.0003, 0.0003, 0.0005, 0.0004, 0.0004, 0.0006,\n 0.0004, 0.0005, 0.0005, 0.0003, 0.0003, 0.0004, 0.0004, 0.0004, 0.0003,\n 0.0006, 0.0004, 0.0004, 0.0004, 0.0003, 0.0005, 0.0003, 0.0005, 0.0004,\n 0.0005, 0.0004, 0.0003, 0.0005, 0.0004, 0.0004, 0.0004, 0.0004, 0.0003,\n 0.0005, 0.0004, 0.0004, 0.0004, 0.0006, 0.0005, 0.0003, 0.0004, 0.0005,\n 0.0004, 0.0005, 0.0003, 0.0005, 0.0004, 0.0004, 0.0005, 0.0005, 0.0004,\n 0.0004, 0.0006, 0.0004, 0.0004, 0.0004, 0.0004, 0.0005, 0.0003, 0.0002,\n 0.0003, 0.0004, 0.0004, 0.0004, 0.0005, 0.0005, 0.0002, 0.0004, 0.0004,\n 0.0004, 0.0004, 0.0003, 0.0005, 0.0004, 0.0004, 0.0003, 0.0004, 0.0004,\n 0.0004, 0.0004, 0.0004, 0.0004, 0.0005, 0.0003, 0.0003, 0.0005, 0.0003,\n 0.0005, 0.0005, 0.0004, 0.0005, 0.0004, 0.0004, 0.0005, 0.0004, 0.0006,\n 0.0003, 0.0005, 0.0003, 0.0003, 0.0004, 0.0004, 0.0005, 0.0004, 0.0003,\n 0.0005, 0.0005, 0.0003, 0.0005, 0.0003, 0.0003, 0.0004, 0.0003, 0.0004,\n 0.0004, 0.0004, 0.0005, 0.0005, 0.0004, 0.0006, 0.0003, 0.0004, 0.0005,\n 0.0004, 0.0005, 0.0006, 0.0004, 0.0004, 0.0004, 0.0003, 0.0003, 0.0004,\n 0.0003, 0.0003, 0.0005, 0.0003, 0.0004, 0.0004, 0.0004, 0.0004, 0.0005,\n 0.0004, 0.0003, 0.0005, 0.0004, 0.0002, 0.0003, 0.0005, 0.0003, 0.0003,\n 0.0004, 0.0004, 0.0003, 0.0005, 0.0004, 0.0005, 0.0003, 0.0004, 0.0005,\n 0.0005, 0.0005, 0.0003, 0.0004, 0.0004, 0.0004, 0.0003, 0.0004, 0.0003,\n 0.0004, 0.0004, 0.0004, 0.0003, 0.0005, 0.0004, 0.0005, 0.0005, 0.0005,\n 0.0003, 0.0004, 0.0006, 0.0005, 0.0004, 0.0005, 0.0005, 0.0004, 0.0005,\n 0.0004, 0.0004, 0.0003, 0.0004, 0.0003, 0.0003, 0.0005, 0.0003, 0.0004,\n 0.0005, 0.0004, 0.0004, 0.0004, 0.0004, 0.0005, 0.0004, 0.0006, 0.0003,\n 0.0004, 0.0004, 0.0005, 0.0003, 0.0004, 0.0004, 0.0004, 0.0003, 0.0004,\n 0.0003, 0.0004, 0.0005, 0.0004, 0.0005, 0.0004, 0.0004, 0.0005, 0.0003,\n 0.0005, 0.0005, 0.0004, 0.0004, 0.0005, 0.0004, 0.0005, 0.0004, 0.0004,\n 0.0003, 0.0004, 0.0004, 0.0004, 0.0005, 0.0004, 0.0003, 0.0005, 0.0004,\n 0.0005, 0.0004, 0.0004, 0.0004, 0.0005, 0.0004, 0.0005, 0.0004, 0.0004,\n 0.0003, 0.0004, 0.0005, 0.0003, 0.0005, 0.0004, 0.0004, 0.0004, 0.0003,\n 0.0004, 0.0006, 0.0004, 0.0004, 0.0003, 0.0004, 0.0004, 0.0004, 0.0004,\n 0.0004, 0.0004, 0.0005, 0.0003, 0.0005, 0.0005, 0.0004, 0.0003, 0.0003,\n 0.0004, 0.0005, 0.0004, 0.0003, 0.0004, 0.0005, 0.0005, 0.0003, 0.0005,\n 0.0006, 0.0005, 0.0005, 0.0004, 0.0005, 0.0005, 0.0004, 0.0004, 0.0004,\n 0.0004, 0.0005, 0.0003, 0.0005, 0.0004, 0.0005, 0.0003, 0.0003, 0.0004,\n 0.0004, 0.0004, 0.0003, 0.0004, 0.0005, 0.0004, 0.0005, 0.0004, 0.0004,\n 0.0005, 0.0003, 0.0003, 0.0004, 0.0005, 0.0005, 0.0006, 0.0003, 0.0006,\n 0.0006, 0.0004, 0.0003, 0.0004, 0.0005, 0.0004, 0.0004, 0.0005, 0.0004,\n 0.0004, 0.0004, 0.0006, 0.0004, 0.0005, 0.0006, 0.0004, 0.0006, 0.0004,\n 0.0004, 0.0003, 0.0005, 0.0004, 0.0004, 0.0005, 0.0005, 0.0004, 0.0006,\n 0.0004, 0.0004, 0.0004, 0.0007, 0.0004, 0.0004, 0.0004, 0.0005, 0.0005,\n 0.0003, 0.0005, 0.0006, 0.0004, 0.0003, 0.0004, 0.0004, 0.0004, 0.0003,\n 0.0004, 0.0004, 0.0005, 0.0004, 0.0005, 0.0003, 0.0003, 0.0003, 0.0005,\n 0.0004, 0.0004, 0.0004, 0.0003, 0.0006, 0.0004, 0.0004, 0.0003, 0.0004,\n 0.0004, 0.0004, 0.0005, 0.0006, 0.0004, 0.0005, 0.0004, 0.0004, 0.0006,\n 0.0003, 0.0004, 0.0004, 0.0004, 0.0003, 0.0004, 0.0005, 0.0004, 0.0005,\n 0.0004, 0.0005, 0.0003, 0.0003, 0.0004, 0.0004, 0.0003, 0.0005, 0.0005,\n 0.0004, 0.0004, 0.0003, 0.0004, 0.0002, 0.0004, 0.0004, 0.0004, 0.0004,\n 0.0005, 0.0004, 0.0004, 0.0004, 0.0005, 0.0004, 0.0006, 0.0005, 0.0003,\n 0.0005, 0.0004, 0.0004, 0.0005, 0.0003, 0.0004, 0.0005, 0.0004, 0.0004,\n 0.0005, 0.0005, 0.0005, 0.0004, 0.0001, 0.0003, 0.0005, 0.0005, 0.0004,\n 0.0003, 0.0004, 0.0004, 0.0004, 0.0003, 0.0005, 0.0004, 0.0005, 0.0004,\n 0.0004, 0.0004, 0.0004, 0.0004, 0.0003, 0.0005, 0.0006, 0.0004, 0.0005,\n 0.0004, 0.0003, 0.0004, 0.0005, 0.0004, 0.0004, 0.0005, 0.0005, 0.0005,\n 0.0005, 0.0004, 0.0005, 0.0004, 0.0003, 0.0003, 0.0004, 0.0005],\n device='cuda:0')"
44
+ },
45
+ "8": {
46
+ "step": "tensor(1252.)",
47
+ "exp_avg": "tensor([[-1.2629e-05, 5.8751e-05, -3.9425e-06, ..., -1.0545e-05,\n 2.4633e-05, -8.4373e-07],\n [-2.0918e-04, -5.3185e-05, -8.4437e-05, ..., -2.3895e-05,\n -3.5379e-04, -5.4214e-05],\n [-7.5313e-05, -4.4599e-07, -2.1712e-04, ..., 2.2746e-05,\n 2.3718e-05, -1.0876e-04],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-8.6459e-05, -1.4315e-05, -2.2962e-05, ..., -5.5990e-06,\n -2.7604e-05, -6.3772e-08],\n [ 3.2666e-06, 3.2416e-05, 4.4862e-05, ..., -3.0307e-06,\n -7.4605e-07, 1.3434e-06]], device='cuda:0')",
48
+ "exp_avg_sq": "tensor([[2.6651e-07, 3.6714e-07, 4.3197e-07, ..., 4.5357e-07, 9.4844e-08,\n 7.8451e-09],\n [5.5585e-07, 1.3808e-07, 1.0050e-07, ..., 7.6677e-08, 4.7699e-07,\n 1.5756e-07],\n [1.4340e-06, 7.6743e-08, 5.4122e-07, ..., 3.5253e-07, 1.9810e-07,\n 4.5656e-07],\n ...,\n [1.7477e-11, 1.7574e-13, 2.8879e-11, ..., 1.9967e-12, 1.1237e-12,\n 7.2447e-12],\n [2.7596e-08, 3.1893e-08, 7.8125e-09, ..., 3.5941e-08, 7.3419e-08,\n 7.1466e-10],\n [6.4398e-09, 6.3680e-08, 4.0705e-08, ..., 2.2778e-08, 1.1178e-08,\n 8.9921e-10]], device='cuda:0')"
49
+ },
50
+ "9": {
51
+ "step": "tensor(1252.)",
52
+ "exp_avg": "tensor([-1.5703e-03, -1.9829e-03, -1.2989e-03, ..., 5.6052e-45,\n -3.0878e-04, 1.0775e-03], device='cuda:0')",
53
+ "exp_avg_sq": "tensor([2.5512e-04, 2.6564e-04, 3.6225e-04, ..., 8.2137e-09, 2.5219e-05,\n 4.5197e-05], device='cuda:0')"
54
+ },
55
+ "10": {
56
+ "step": "tensor(1252.)",
57
+ "exp_avg": "tensor([[ 4.7515e-05, -5.0389e-05, -8.4065e-05, ..., -5.6052e-45,\n -1.9104e-05, 4.3596e-05],\n [ 3.7928e-06, -2.6888e-05, 1.8763e-04, ..., -5.6052e-45,\n -5.3368e-06, 2.4018e-05],\n [ 1.3919e-05, 4.5148e-05, -1.0436e-05, ..., 5.6052e-45,\n 1.3882e-07, 5.3756e-06],\n ...,\n [-3.9344e-05, -1.9302e-05, -9.3420e-06, ..., 5.6052e-45,\n -4.8806e-06, 3.5702e-05],\n [ 4.9965e-05, -4.0471e-05, -1.0613e-04, ..., -5.6052e-45,\n 1.7126e-05, 1.0470e-05],\n [ 3.6832e-05, -1.7629e-05, 2.8861e-06, ..., 5.6052e-45,\n 2.4257e-05, 1.7735e-05]], device='cuda:0')",
58
+ "exp_avg_sq": "tensor([[4.0750e-08, 3.0563e-08, 5.2916e-08, ..., 1.8240e-13, 8.3271e-09,\n 8.7881e-09],\n [6.2465e-08, 3.7422e-08, 7.1765e-08, ..., 9.5662e-13, 1.0335e-08,\n 1.7565e-08],\n [3.2744e-08, 3.7939e-08, 8.2240e-08, ..., 3.5257e-13, 1.3273e-08,\n 1.7485e-08],\n ...,\n [3.6966e-08, 4.2214e-08, 6.6752e-08, ..., 8.7254e-13, 1.3190e-08,\n 1.9506e-08],\n [4.7363e-08, 4.4401e-08, 7.5648e-08, ..., 7.4742e-13, 1.5607e-08,\n 1.5387e-08],\n [3.2863e-08, 3.3734e-08, 7.2504e-08, ..., 2.4304e-13, 9.7172e-09,\n 2.2855e-08]], device='cuda:0')"
59
+ },
60
+ "11": {
61
+ "step": "tensor(1252.)",
62
+ "exp_avg": "tensor([[ 1.3144e-05, -2.0262e-04, -8.8812e-05, ..., -1.2493e-04,\n -8.5530e-05, 5.3006e-05],\n [-3.6229e-05, 5.6138e-06, 6.4811e-05, ..., -9.5030e-05,\n 1.4980e-04, 1.0043e-04],\n [-4.2070e-05, -1.1938e-04, -1.4758e-04, ..., 4.7592e-05,\n -2.2427e-05, 2.0970e-04],\n ...,\n [-3.6768e-05, -2.3943e-04, 1.4692e-04, ..., 1.3486e-04,\n 1.2593e-04, 1.2838e-05],\n [-9.4648e-05, 8.4504e-05, 1.7936e-04, ..., 8.9412e-06,\n 9.3157e-06, -1.9011e-06],\n [-3.6091e-05, -1.7713e-04, -4.0184e-05, ..., -1.2610e-05,\n -5.0959e-05, -5.8748e-07]], device='cuda:0')",
63
+ "exp_avg_sq": "tensor([[1.3922e-07, 2.3254e-07, 2.7340e-07, ..., 1.2803e-07, 7.1115e-08,\n 1.3372e-07],\n [8.9759e-08, 1.6792e-07, 1.7131e-07, ..., 9.0531e-08, 5.5064e-08,\n 7.8730e-08],\n [1.3708e-07, 2.2840e-07, 2.2723e-07, ..., 1.2945e-07, 6.9632e-08,\n 1.1455e-07],\n ...,\n [1.6239e-07, 2.5847e-07, 2.5502e-07, ..., 1.3973e-07, 9.1194e-08,\n 1.5495e-07],\n [1.0947e-07, 1.5356e-07, 2.1171e-07, ..., 8.6336e-08, 6.0314e-08,\n 8.2016e-08],\n [8.4400e-08, 1.2763e-07, 1.7290e-07, ..., 7.9902e-08, 5.6851e-08,\n 8.8222e-08]], device='cuda:0')"
64
+ },
65
+ "12": {
66
+ "step": "tensor(1252.)",
67
+ "exp_avg": "tensor([-1.0599e-04, 8.3552e-03, -1.4234e-04, 3.1915e-03, -1.3750e-03,\n 3.2440e-04, 2.6259e-03, -2.1111e-03, 5.0076e-06, 1.8827e-03,\n 3.6772e-03, 6.8654e-05, 3.0456e-03, -1.9142e-03, -2.6019e-03,\n 8.5992e-06, 6.7610e-03, 2.3749e-03, -9.7032e-05, -1.3891e-03,\n -4.2681e-03, 2.0131e-03, 5.1296e-03, 4.3073e-03, -3.6268e-03,\n -1.1777e-03, 8.2693e-04, -1.8457e-03, -1.6032e-03, -1.7690e-03,\n -2.8140e-03, 5.7852e-03, 6.2910e-04, -4.4664e-04, -5.3315e-03,\n 1.6971e-04, -3.4181e-03, -9.2831e-04, 3.5089e-03, 5.3750e-03,\n 1.3928e-03, 2.4443e-03, 2.9832e-03, 9.7622e-05, -6.1645e-04,\n -5.9057e-03, -2.9183e-03, 2.2769e-03, 8.0676e-04, -1.3751e-03,\n 5.7465e-05, -3.0351e-03, -9.7150e-04, 4.1942e-04, -4.9135e-03,\n 3.2150e-03, 5.7876e-06, 3.7882e-03, -2.3873e-03, 1.4072e-03,\n -2.6292e-03, -1.1704e-03, -2.6136e-03, 3.4886e-03, 1.8281e-03,\n -2.6298e-04, 6.9276e-03, 3.3210e-03, -4.5128e-03, 2.0097e-03,\n -1.3622e-03, 2.3371e-03, -5.1887e-05, -6.3913e-04, -5.7127e-04,\n -1.0404e-03, -2.3142e-03, 4.0173e-04, 2.9065e-03, -1.0151e-03,\n 1.3192e-03, -4.2963e-03, -5.1138e-03, -3.9469e-04, 8.7962e-05,\n 8.0872e-04, 5.0835e-04, -1.2012e-03, 6.2196e-04, -9.7352e-04,\n -4.3697e-03, 3.3784e-03, 3.2695e-03, 2.8850e-04, 1.2061e-03,\n -1.4216e-03, 4.1619e-03, -4.6011e-03, 1.0092e-03, -5.9398e-03,\n 4.4504e-03, 2.9912e-03, 1.5740e-03, 3.4155e-03, 3.4634e-04,\n -2.6385e-03, 1.5552e-03, -8.0015e-04, -1.3967e-03, -1.3498e-04,\n 2.6605e-03, -3.1175e-03, -2.9452e-03, 3.9840e-03, 1.5300e-03,\n 1.3486e-03, 4.4666e-03, -1.2025e-04, 5.0523e-03, -3.1233e-03,\n -4.1598e-03, 2.2764e-03, 6.8694e-04, -3.6781e-03, 1.6588e-03,\n -7.5233e-04, 1.4673e-03, -1.5722e-04, -2.7319e-04, -6.2315e-04,\n -3.4879e-03, 5.3025e-03, -5.6698e-04, 8.2763e-04, 2.5530e-03,\n 1.8872e-03, -1.4880e-03, 5.0830e-03, -4.7865e-04, -5.7530e-03,\n -3.3096e-03, -2.2061e-03, 4.8984e-04, 1.0386e-03, -2.4812e-04,\n -3.5526e-03, 2.4047e-03, 1.0907e-03, 1.2826e-03, -4.0607e-04,\n -2.8333e-04, -2.3122e-03, -3.8670e-03, 9.7953e-04, -1.5515e-03,\n 9.0397e-05, 1.9811e-03, 2.5639e-03, -4.2736e-04, 4.1324e-04,\n -1.2337e-03, 1.5298e-04, 6.3406e-03, 4.5168e-03, -9.3898e-05,\n -3.0152e-03, 4.2793e-04, -1.0316e-03, 2.2637e-03, -7.7388e-04,\n -4.3788e-04, 5.5192e-03, -4.9058e-03, -6.1215e-04, -3.1246e-03,\n -2.4662e-03, 8.6542e-05, 2.5574e-03, -3.1701e-03, -1.4561e-03,\n -2.1756e-03, -1.0326e-03, 6.3957e-04, 1.7361e-04, 4.2689e-03,\n -4.3625e-03, 3.2604e-03, 1.1655e-03, -3.5945e-03, -2.7515e-03,\n 7.8182e-03, -1.5874e-03, -1.5751e-03, 1.4896e-03, 4.2874e-03,\n 1.9000e-03, -1.3581e-03, -6.8589e-03, -1.1498e-03, -3.1031e-03,\n -1.8485e-03, -1.4982e-03, 1.4389e-04, -2.6591e-04, -5.5107e-05,\n -2.7958e-03, -4.6518e-04, -6.0512e-05, -4.9141e-05, -1.0188e-03,\n -7.6681e-04, 5.5318e-04, 1.7925e-03, 2.0380e-03, -1.6776e-03,\n -1.6457e-03, 1.3078e-03, 3.9229e-05, 2.3425e-03, -3.2091e-03,\n 3.9566e-04, -2.2003e-04, 1.5769e-03, 4.7892e-03, 2.6184e-03,\n 2.9576e-03, 8.2965e-04, 5.4198e-04, -1.5093e-03, 1.8696e-03,\n -6.8209e-03, -4.5641e-04, -1.9929e-03, 3.1724e-03, -5.9405e-03,\n -2.4068e-03, 2.1892e-03, 3.6281e-04, 6.8284e-03, -1.8494e-03,\n -3.3361e-03, 2.0777e-04, -7.8778e-03, -1.2688e-03, -4.7112e-04,\n 1.1019e-04, 1.9605e-03, 2.0315e-03, 6.2763e-04, -7.3570e-04,\n 2.6885e-03, 1.6305e-03, 2.7396e-03, 3.9849e-03, -2.1092e-03,\n 5.2312e-03, -1.8273e-03, -5.7870e-04, 5.3344e-03, -4.8950e-03,\n -4.1227e-03, -2.2606e-03, 2.8998e-03, -3.3867e-03, -1.9073e-03,\n 3.4957e-05, -8.0428e-04, -4.5291e-03, -1.0735e-03, -3.7868e-03,\n 5.8702e-03, -1.1584e-03, -1.2501e-04, -3.3239e-04, 1.7105e-03,\n -5.7033e-03, 1.4851e-03, 4.9811e-03, -4.1653e-03, -3.3089e-03,\n -2.1466e-03, 3.3759e-03, 5.8921e-04, -3.8886e-03, 1.8054e-03,\n 3.5721e-03, -1.0391e-03, 2.0547e-03, -1.8862e-06, 5.0773e-03,\n -2.2277e-03, 2.5740e-03, -1.3153e-03, -6.3193e-04, -3.8019e-03,\n 2.0267e-03, 1.9466e-03, -7.4878e-04, 5.4076e-03, -5.2736e-03,\n 1.9802e-03, -3.0028e-03, 4.0301e-03, 1.4593e-03, 9.9081e-04,\n -1.6211e-03, -1.5512e-03, -6.2260e-03, -8.1623e-04, -2.3467e-03,\n 4.3506e-03, 3.3147e-03, -3.2608e-03, -2.2055e-03, -2.2397e-03,\n 6.1576e-03, -5.2432e-03, 2.4336e-03, 1.7189e-04, -3.0379e-03,\n -9.6974e-04, -3.1114e-03, 1.1802e-03, -1.1835e-03, 4.1782e-03,\n -1.2343e-03, 6.3085e-04, -1.6868e-03, 9.5894e-04, 2.2809e-03,\n 7.2831e-04, -3.1658e-04, 3.5609e-03, -1.5252e-03, 1.1883e-03,\n -1.9772e-03, -2.8644e-03, -5.5075e-03, 1.0879e-03, -7.2560e-03,\n 2.4679e-04, -5.3705e-04, 4.2610e-03, 5.6775e-03, -1.9402e-03,\n 2.9995e-03, -8.1881e-03, -1.6396e-03, 1.3327e-03, -4.6881e-03,\n 3.2415e-03, 2.6711e-03, -2.5716e-03, 1.3697e-03, -3.5438e-03,\n 8.8783e-03, -2.4893e-04, 1.7437e-03, 6.1672e-03, -2.8417e-03,\n -1.9335e-03, 5.5017e-03, -5.1468e-04, 2.8860e-03, 7.4911e-03,\n -3.4000e-04, 5.3836e-03, 2.0367e-03, 1.8756e-03, 1.0246e-02,\n 4.7281e-03, -5.1497e-03, 4.1876e-03, 3.5072e-03, -2.2614e-04,\n 7.2719e-04, 5.3432e-03, 3.6317e-05, -4.7257e-03, -1.9156e-04,\n -6.0797e-04, -8.6541e-04, -4.4615e-04, -3.9097e-03, 1.8924e-03,\n 1.3983e-04, 3.4393e-03, 1.5973e-04, 2.0977e-03, 5.4831e-03,\n 2.1230e-03, 3.4626e-03, -1.6255e-03, 4.9394e-03, -2.7993e-03,\n -9.8298e-04, 7.2069e-06, -8.3866e-04, -3.1330e-03, 1.8910e-03,\n -6.3688e-05, -4.0395e-03, 8.6279e-04, -3.3443e-03, 6.9621e-03,\n 3.3682e-03, -2.0913e-03, 6.2906e-03, -3.7739e-03, 3.1499e-04,\n -3.1694e-03, 2.6715e-03, -4.5394e-03, -6.2412e-05, -5.2579e-03,\n 1.8425e-03, 1.9918e-03, -1.6160e-03, 3.2648e-03, -7.5326e-04,\n -2.5254e-03, -1.6210e-03, 7.6103e-04, -3.0895e-03, -6.1092e-03,\n -1.3200e-04, 5.5204e-03, 7.6847e-03, 4.2085e-03, -6.9145e-04,\n 7.7656e-04, 7.2694e-03, 4.0532e-04, 1.4846e-03, -8.3040e-04,\n 2.8044e-03, 3.2055e-05, 6.2028e-03, -1.1000e-05, 4.8162e-03,\n 1.6463e-03, -4.5214e-04, -6.5102e-04, -3.0825e-03, 6.7660e-04,\n 1.0525e-03, 3.1562e-03, -3.7991e-03, 1.1203e-03, -1.9097e-03,\n 1.5582e-03, 3.5371e-03, -3.2794e-04, 1.8119e-03, 3.3316e-04,\n 2.2329e-03, -2.3499e-03, -1.7013e-03, 1.5192e-03, -3.6861e-03,\n -2.7861e-03, -2.4568e-04, -1.8042e-03, 3.9977e-03, 6.6115e-04,\n 2.4293e-03, 2.9716e-03, -1.7446e-03, -1.0204e-02, -6.5336e-03,\n -6.8339e-04, -1.4488e-03, -2.3343e-03, -2.8323e-03, 1.7658e-03,\n -3.3890e-03, -2.3313e-03, 1.6101e-03, -5.5873e-03, -1.8353e-03,\n 8.5442e-03, 1.3369e-03, -2.6546e-03, 3.8600e-04, -8.2450e-03,\n 5.7066e-03, -1.0773e-03, 7.9400e-03, 3.5696e-03, 8.8774e-04,\n 5.9195e-04, 2.4787e-03, -1.7189e-03, -2.0486e-03, 2.3321e-03,\n -5.6900e-03, 4.4071e-03, 1.2299e-03, 1.0886e-03, 1.8897e-03,\n 2.3207e-03, 2.1629e-03, 3.1362e-03, -4.1076e-05, -3.0116e-04,\n -1.1747e-03, -6.4370e-05, -8.8264e-04, 4.0314e-03, -4.8174e-03,\n -1.5617e-03, 4.6107e-04, -7.9178e-04, 3.9490e-04, -3.8808e-03,\n 1.7255e-03, -8.0564e-04, 2.4242e-03, 2.5705e-03, 1.6330e-03,\n -5.0606e-03, 4.8074e-03, 5.2774e-03, -5.1029e-03, 5.7838e-03,\n 1.5565e-03, 1.3973e-03, -2.4602e-03, 1.5827e-03, 7.8115e-04,\n 8.5024e-04, 5.0202e-03, -5.3324e-03, 5.0690e-03, 4.3123e-04,\n 2.6965e-03, 1.2848e-03, -4.4777e-03, 6.5132e-04, -1.0953e-03,\n 7.1084e-04, 4.2995e-03, -2.4463e-03, -2.9296e-03, -3.1040e-03,\n 1.0288e-02, 1.0281e-03, 4.2953e-03, 5.7092e-03, 4.0279e-03,\n 1.1198e-03, -2.0392e-03, 7.3994e-03, -5.0036e-03, -1.0558e-03,\n -2.1461e-03, 1.5210e-03, -3.9480e-03, -6.3664e-04, -3.6396e-04,\n -5.8340e-04, 1.2085e-04, 2.6207e-03, -4.5857e-03, 3.9738e-04,\n -3.2214e-03, 2.5630e-03, 2.3485e-03, 2.5695e-04, -9.9106e-04,\n -4.3634e-03, 4.3177e-03, -6.3509e-04, 6.5794e-03, -6.7654e-03,\n -3.8862e-03, 2.8662e-03, 9.5635e-05, 3.6721e-03, 2.6198e-03,\n 1.5751e-03, -3.4345e-03, -3.0863e-04, 6.1364e-04, -2.1105e-03,\n 2.0078e-03, 2.8815e-03, 3.8295e-03, -3.2372e-03, 1.2569e-04,\n -7.6778e-04, 6.2376e-03, -3.6775e-03, -4.0489e-03, 7.9926e-04,\n 1.6300e-03, 5.6208e-03, 1.4671e-03, -3.2125e-03, -1.4929e-04,\n 3.0702e-03, 5.6162e-04, -7.3768e-04, 3.1802e-03, -2.7683e-03,\n 2.3013e-03, -4.0158e-03, -2.0289e-03, -3.6394e-03, 1.0951e-03,\n -2.6933e-03, -1.6959e-05, -1.2750e-03, 1.0702e-03, -1.9200e-03,\n 1.0220e-02, -1.3223e-03, -1.5491e-15, -5.0692e-03, 3.5948e-03,\n -5.6103e-03, -4.4078e-03, -2.1993e-03, 9.7040e-03, -1.4019e-03,\n 3.6047e-05, -4.3787e-03, -4.7905e-03, -3.4224e-03, 2.6994e-03,\n -5.8129e-03, 3.9977e-03, -3.8017e-04, 5.2780e-03, 3.4560e-03,\n 7.2954e-03, -6.9093e-04, 4.2560e-03, 9.4767e-04, 2.6168e-03,\n 3.4658e-04, 7.4707e-04, 1.7471e-03, 6.5818e-04, -1.3501e-03,\n 1.0091e-03, 5.0138e-03, -2.0912e-03, -2.4421e-03, 1.5187e-03,\n -7.7544e-04, 3.6279e-05, 2.7036e-04, -2.2751e-03, 4.0733e-03,\n -3.1110e-04, 3.8983e-03, -2.6036e-03, -6.4473e-04, -2.8748e-03,\n -2.4057e-03, 3.1933e-04, -3.5694e-03, -1.5009e-03, 3.9283e-03,\n 3.6522e-03, -2.9339e-03, -5.3530e-03, -6.8224e-03, -2.1518e-03,\n -4.2508e-03, -1.1316e-03, 2.9718e-03, 2.6768e-03, -7.6761e-03,\n 3.0311e-03, 7.9466e-05, 5.1299e-04, -1.4276e-03, -5.1013e-03,\n 1.3634e-03, -1.6453e-03, 1.0236e-03, -3.8248e-03, 4.7792e-03,\n 2.0802e-03, -9.8855e-04, 3.5274e-03, 1.8911e-03, 2.6718e-03,\n 1.2421e-03, 3.3167e-03, -8.6767e-05, 1.2573e-03, 4.3397e-03,\n 3.9021e-03, -2.7561e-03, -2.2973e-03, 4.6992e-04, 5.8292e-04,\n 3.7865e-03, 1.7858e-03, 1.9339e-03, -2.5471e-05, 7.6416e-04,\n 7.3757e-03, 4.4219e-03, 6.6600e-04, -9.1710e-04, -4.4364e-04,\n 6.6258e-03, -2.6358e-03, 5.3791e-04, 4.8576e-03, 2.0443e-03,\n -1.6349e-03, -1.0610e-03, 3.9158e-03, -4.7365e-03, 3.3434e-03,\n 2.5453e-03, 1.3712e-04, 6.9318e-03, -2.9493e-03, -4.8875e-03,\n 8.1606e-04, 2.8498e-03, 5.8886e-04, -2.1460e-03, 2.6048e-03,\n -4.5482e-03, 1.1032e-03, -1.0578e-03, 9.8037e-04, 5.6383e-04,\n -1.7650e-03, -1.5033e-03, 2.0766e-03, -9.6531e-03, 2.7524e-03,\n -5.4171e-04, -2.1569e-03, 6.5133e-03, 1.0654e-03, 1.4745e-03,\n 5.4096e-04, -4.5697e-04, 2.8161e-03, 3.5587e-03, -4.4890e-03,\n 4.2144e-03, -1.6591e-03, 2.9212e-03, -4.1241e-04, 1.3523e-03,\n 2.4092e-03, 2.8117e-03, 9.0245e-04, 1.5960e-03, 1.6028e-03,\n 2.7801e-03, 2.8091e-03, 1.8347e-03, -2.0994e-03, 3.0984e-03,\n 2.9965e-04, 1.5932e-03, 1.3356e-03], device='cuda:0')",
68
+ "exp_avg_sq": "tensor([1.4425e-04, 9.0641e-05, 1.2819e-04, 1.1301e-04, 9.7713e-05, 1.0580e-04,\n 9.8471e-05, 1.4682e-04, 1.4415e-04, 9.7967e-05, 7.4123e-05, 8.3457e-05,\n 1.1448e-04, 1.1730e-04, 1.6230e-04, 9.9552e-05, 1.6960e-04, 1.2741e-04,\n 1.0271e-04, 1.2548e-04, 8.8777e-05, 1.6929e-04, 1.5446e-04, 1.0888e-04,\n 1.3576e-04, 1.4415e-04, 9.3998e-05, 1.3373e-04, 4.9139e-05, 7.8526e-05,\n 1.9502e-04, 1.7295e-04, 1.0278e-04, 1.0518e-04, 1.2441e-04, 1.3253e-04,\n 1.0428e-04, 1.4029e-04, 1.2825e-04, 1.4820e-04, 1.4324e-04, 1.2369e-04,\n 1.3479e-04, 1.2001e-04, 9.2725e-05, 8.9702e-05, 8.5553e-05, 1.4858e-04,\n 1.3405e-04, 1.1584e-04, 7.7210e-05, 1.4098e-04, 1.0353e-04, 9.6035e-05,\n 1.4077e-04, 1.0701e-04, 1.3199e-04, 1.1965e-04, 7.4144e-05, 6.4421e-05,\n 1.1444e-04, 7.9418e-05, 1.1230e-04, 7.3746e-05, 1.0393e-04, 1.2130e-04,\n 1.1735e-04, 1.4676e-04, 1.6776e-04, 6.9688e-05, 8.4011e-05, 8.0668e-05,\n 6.9484e-05, 1.4391e-04, 1.5474e-04, 1.2837e-04, 9.2365e-05, 1.3782e-04,\n 1.3153e-04, 1.8019e-04, 1.4443e-04, 1.1690e-04, 7.6280e-05, 1.3415e-04,\n 1.1298e-04, 1.1416e-04, 8.5455e-05, 5.8020e-05, 1.6310e-04, 1.3454e-04,\n 1.2416e-04, 4.9586e-05, 1.2207e-04, 1.1046e-04, 7.3508e-05, 1.1366e-04,\n 1.2578e-04, 1.0918e-04, 1.1279e-04, 1.2444e-04, 1.7101e-04, 1.1635e-04,\n 1.4584e-04, 1.1334e-04, 1.3574e-04, 8.5872e-05, 8.6996e-05, 1.0951e-04,\n 1.4999e-04, 8.6027e-05, 8.6375e-05, 1.3263e-04, 8.5666e-05, 7.5176e-05,\n 1.3185e-04, 1.3245e-04, 1.4692e-04, 5.5475e-05, 1.1820e-04, 1.1941e-04,\n 1.1869e-04, 1.2067e-04, 6.3845e-05, 9.7051e-05, 1.0252e-04, 9.6802e-05,\n 1.0078e-04, 7.9017e-05, 4.7250e-05, 7.0113e-05, 1.0889e-04, 1.4692e-04,\n 8.6684e-05, 1.4281e-04, 1.6632e-04, 8.1970e-05, 1.4587e-04, 1.2451e-04,\n 1.3505e-04, 1.2893e-04, 1.2911e-04, 1.1006e-04, 1.1271e-04, 1.3076e-04,\n 7.8089e-05, 1.5964e-04, 9.6280e-05, 1.0430e-04, 1.5578e-04, 7.8856e-05,\n 1.1245e-04, 1.5280e-04, 1.1788e-04, 8.9899e-05, 1.3544e-04, 8.1366e-05,\n 9.3395e-05, 1.5727e-04, 8.8573e-05, 1.0756e-04, 9.1404e-05, 3.7725e-05,\n 1.3221e-04, 1.3614e-04, 1.3094e-04, 8.4900e-05, 1.1948e-04, 1.1303e-04,\n 7.5986e-05, 8.6645e-05, 1.0966e-04, 1.3982e-04, 8.5686e-05, 9.3603e-05,\n 1.0077e-04, 1.1456e-04, 7.3271e-05, 8.2144e-05, 1.8151e-04, 1.0512e-04,\n 1.1249e-04, 1.0650e-04, 4.9766e-05, 1.0740e-04, 6.8003e-05, 8.7363e-05,\n 1.3201e-04, 1.1525e-04, 1.2748e-04, 1.3985e-04, 1.1676e-04, 1.9345e-04,\n 1.0383e-04, 7.6051e-05, 1.1971e-04, 9.2036e-05, 7.9563e-05, 1.1391e-04,\n 9.8069e-05, 1.2132e-04, 8.8380e-05, 7.6595e-05, 1.1963e-04, 1.7280e-04,\n 1.5033e-04, 1.0007e-04, 1.3342e-04, 6.3289e-05, 3.3601e-05, 1.0042e-04,\n 1.5852e-04, 1.3181e-04, 7.6861e-05, 1.0655e-04, 1.0515e-04, 8.1491e-05,\n 1.1141e-04, 1.6644e-04, 6.5648e-05, 1.0291e-04, 6.2401e-05, 1.0694e-04,\n 1.1320e-04, 1.1744e-04, 7.2985e-05, 1.3219e-04, 1.0551e-04, 1.5711e-04,\n 1.4408e-04, 1.2239e-04, 1.0135e-04, 1.0184e-04, 1.4693e-04, 1.5403e-04,\n 1.1225e-04, 1.0708e-04, 9.3091e-05, 9.9662e-05, 1.1320e-04, 9.5689e-05,\n 1.0827e-04, 1.1401e-04, 1.3665e-04, 6.1626e-05, 1.2532e-04, 1.4344e-04,\n 8.8782e-05, 1.4291e-04, 1.0141e-04, 8.1881e-05, 9.7223e-05, 1.0789e-04,\n 8.1857e-05, 9.1247e-05, 1.2284e-04, 7.9910e-05, 1.4574e-04, 1.4431e-04,\n 1.4310e-04, 8.8770e-05, 1.5667e-04, 9.1605e-05, 1.0465e-04, 9.2497e-05,\n 9.5106e-05, 1.3343e-04, 1.1493e-04, 1.3730e-04, 1.8259e-04, 9.8234e-05,\n 1.0038e-04, 1.0940e-04, 6.2434e-05, 9.8495e-05, 1.0902e-04, 1.4025e-04,\n 6.2532e-05, 9.6447e-05, 1.2552e-04, 1.0302e-04, 9.1914e-05, 1.1201e-04,\n 1.3781e-04, 8.9070e-05, 7.6668e-05, 1.1945e-04, 1.6122e-04, 9.4052e-05,\n 9.8378e-05, 1.0158e-04, 8.8557e-05, 9.4118e-05, 1.6054e-04, 1.1382e-04,\n 1.1722e-04, 6.2020e-05, 8.7344e-05, 1.0602e-04, 8.8738e-05, 1.2298e-04,\n 5.8815e-05, 9.2164e-05, 1.1659e-04, 1.4917e-04, 1.1176e-04, 1.0182e-04,\n 8.7955e-05, 1.2110e-04, 1.3947e-04, 1.3058e-04, 9.7660e-05, 1.3124e-04,\n 9.0994e-05, 9.1565e-05, 1.2495e-04, 1.5127e-04, 1.0957e-04, 1.4193e-04,\n 1.7698e-04, 1.2063e-04, 1.2908e-04, 9.3545e-05, 7.5877e-05, 1.1012e-04,\n 1.1429e-04, 9.9485e-05, 1.4431e-04, 7.6656e-05, 1.2089e-04, 9.3911e-05,\n 1.2192e-04, 5.6167e-05, 1.0977e-04, 8.0065e-05, 1.3902e-04, 9.6243e-05,\n 1.3222e-04, 1.1498e-04, 1.0288e-04, 1.1825e-04, 1.0481e-04, 9.1899e-05,\n 1.3413e-04, 1.5413e-04, 1.1151e-04, 1.5082e-04, 1.4171e-04, 1.0498e-04,\n 1.0431e-04, 9.1577e-05, 8.5282e-05, 1.3835e-04, 1.5608e-04, 1.1720e-04,\n 1.6634e-04, 1.4521e-04, 9.6200e-05, 1.4060e-04, 1.1182e-04, 1.7597e-04,\n 1.4043e-04, 1.5078e-04, 1.0214e-04, 1.2987e-04, 1.0560e-04, 2.6860e-05,\n 8.2629e-05, 9.2162e-05, 5.9190e-05, 1.1929e-04, 1.4148e-04, 1.5832e-04,\n 1.0325e-04, 9.9169e-05, 9.3288e-05, 1.4127e-04, 9.8936e-05, 1.5159e-04,\n 1.1540e-04, 8.2125e-05, 1.1447e-04, 1.0901e-04, 8.8052e-05, 8.1785e-05,\n 1.3504e-04, 1.4705e-04, 1.3071e-04, 1.0549e-04, 1.7994e-04, 1.5004e-04,\n 1.2886e-04, 1.2998e-04, 1.3441e-04, 1.1291e-04, 1.2236e-04, 7.1996e-05,\n 7.8481e-05, 1.0858e-04, 8.3563e-05, 9.2702e-05, 1.3125e-04, 1.1125e-04,\n 4.3908e-05, 1.1711e-04, 1.4245e-04, 1.4354e-04, 2.0119e-04, 1.2567e-04,\n 8.4006e-05, 1.0508e-04, 1.6325e-04, 9.2049e-05, 8.1360e-05, 5.7843e-05,\n 1.5391e-04, 1.1996e-04, 8.9639e-05, 1.0520e-04, 1.0869e-04, 1.7331e-04,\n 9.8769e-05, 1.1283e-04, 1.2449e-04, 1.1139e-04, 1.4988e-04, 1.2678e-04,\n 1.4829e-04, 6.7984e-05, 8.9026e-05, 1.0340e-04, 1.8458e-04, 1.7097e-04,\n 1.3184e-04, 9.8697e-05, 4.7406e-05, 1.3237e-04, 7.4571e-05, 1.5191e-04,\n 1.3771e-04, 1.3983e-04, 1.1993e-04, 9.8522e-05, 1.1394e-04, 1.2240e-04,\n 1.1307e-04, 1.4349e-04, 1.7653e-04, 9.7658e-05, 8.2400e-05, 1.2308e-04,\n 1.2404e-04, 1.4139e-04, 1.5181e-04, 1.0692e-04, 1.1280e-04, 1.2434e-04,\n 9.3232e-05, 1.0533e-04, 9.4253e-05, 1.0640e-04, 1.0035e-04, 1.3205e-04,\n 1.6792e-04, 1.3153e-04, 9.5819e-05, 1.1376e-04, 1.1585e-04, 7.4774e-05,\n 8.8388e-05, 1.4743e-04, 8.7450e-05, 1.5053e-04, 2.0581e-04, 1.2134e-04,\n 5.9050e-05, 1.2524e-04, 1.7278e-04, 9.7404e-05, 9.9938e-05, 1.3577e-04,\n 1.1030e-04, 5.7081e-05, 1.6287e-04, 5.8843e-05, 1.3222e-04, 1.6674e-04,\n 1.1074e-04, 1.3422e-04, 1.0027e-04, 1.1587e-04, 1.3737e-04, 1.5721e-04,\n 1.5033e-04, 1.0926e-04, 1.6437e-04, 7.2658e-05, 8.2661e-05, 7.3326e-05,\n 1.1117e-04, 1.0599e-04, 1.3280e-04, 8.7968e-05, 2.0123e-04, 8.5065e-05,\n 1.0929e-04, 1.0928e-04, 8.6428e-05, 9.4609e-05, 1.0103e-04, 1.5575e-04,\n 8.9611e-05, 1.6755e-04, 1.3053e-04, 9.2282e-05, 9.8795e-05, 1.1798e-04,\n 1.4222e-04, 1.6749e-04, 1.0766e-04, 2.1823e-04, 1.3353e-04, 1.4179e-04,\n 1.2150e-04, 1.2938e-04, 9.2352e-05, 1.1401e-04, 1.1238e-04, 9.4594e-05,\n 9.2684e-05, 1.3074e-04, 1.1927e-04, 1.5678e-04, 1.1092e-04, 7.6962e-05,\n 1.1450e-04, 6.2483e-05, 5.3964e-05, 9.0111e-05, 1.1884e-04, 1.0339e-04,\n 1.5502e-04, 1.2045e-04, 1.1591e-04, 1.1603e-04, 9.9181e-05, 1.5420e-04,\n 1.6942e-04, 8.6657e-05, 1.3899e-04, 1.3585e-04, 8.6692e-05, 5.7510e-05,\n 1.5104e-04, 1.1725e-04, 7.2912e-05, 5.8064e-05, 8.2270e-05, 1.3103e-04,\n 1.4681e-04, 1.3001e-04, 9.6557e-05, 1.1262e-04, 1.1334e-04, 1.3206e-04,\n 1.3558e-04, 1.2112e-04, 1.7580e-04, 1.0730e-04, 1.3728e-04, 1.0669e-04,\n 1.1995e-04, 1.2010e-04, 9.2425e-05, 1.2276e-04, 1.7677e-04, 1.0467e-04,\n 1.0933e-04, 8.3729e-05, 1.7523e-04, 1.3027e-04, 1.0617e-04, 1.3129e-04,\n 9.1720e-05, 6.9234e-05, 8.4639e-05, 1.2571e-04, 1.1909e-04, 1.2253e-04,\n 9.0948e-05, 1.2066e-04, 9.7973e-05, 1.2089e-04, 7.7533e-05, 1.5567e-04,\n 9.7085e-05, 1.8227e-04, 1.2096e-04, 1.0887e-04, 1.3986e-04, 1.4082e-04,\n 1.9613e-04, 7.5428e-05, 7.2215e-05, 1.2079e-04, 6.8667e-05, 9.9800e-05,\n 1.1371e-04, 5.1659e-05, 1.6851e-04, 1.1393e-04, 9.3420e-05, 9.8729e-05,\n 4.7178e-05, 8.2392e-05, 1.0274e-04, 1.0133e-04, 9.0268e-05, 9.7186e-08,\n 9.2415e-05, 1.3530e-04, 1.3431e-04, 1.0265e-04, 8.8498e-05, 1.0395e-04,\n 1.0769e-04, 1.0670e-04, 1.3104e-04, 9.5203e-05, 1.0035e-04, 1.2582e-04,\n 1.3263e-04, 1.3861e-04, 9.5866e-05, 1.0487e-04, 7.2265e-05, 8.3228e-05,\n 1.0122e-04, 1.2161e-04, 9.1576e-05, 1.6642e-04, 1.2829e-04, 7.8834e-05,\n 9.7369e-05, 1.1335e-04, 1.3441e-04, 1.0268e-04, 1.1407e-04, 1.4853e-04,\n 4.5546e-05, 1.1104e-04, 1.4618e-04, 1.1427e-04, 7.8635e-05, 1.4637e-04,\n 1.5422e-04, 1.3303e-04, 1.0544e-04, 4.2826e-05, 1.2048e-04, 8.1644e-05,\n 1.3170e-04, 8.8178e-05, 1.0619e-04, 1.4299e-04, 1.3025e-04, 1.6831e-04,\n 1.2012e-04, 1.1700e-04, 1.8900e-04, 1.0045e-04, 1.3763e-04, 2.9833e-05,\n 6.7193e-05, 8.5600e-05, 1.3541e-04, 1.0076e-04, 1.6942e-04, 1.2680e-04,\n 1.4049e-04, 9.5359e-05, 1.1609e-04, 1.3913e-04, 1.2979e-04, 9.1422e-05,\n 1.0886e-04, 1.0611e-04, 1.8303e-04, 1.3280e-04, 6.1206e-05, 1.2884e-04,\n 7.5242e-05, 1.3043e-04, 8.8486e-05, 1.1173e-04, 1.3842e-04, 1.0669e-04,\n 8.9438e-05, 7.5694e-05, 1.0650e-04, 8.0410e-05, 1.1999e-04, 1.2470e-04,\n 8.8157e-05, 3.7441e-05, 5.4941e-05, 1.2985e-04, 8.7907e-05, 1.1951e-04,\n 1.4396e-04, 8.8010e-05, 1.5499e-04, 8.8715e-05, 7.7535e-05, 1.0309e-04,\n 1.6973e-04, 5.6630e-05, 1.5543e-04, 1.1135e-04, 1.6324e-04, 6.6432e-05,\n 1.6427e-04, 1.4596e-04, 1.2017e-04, 9.6783e-05, 1.1104e-04, 9.8906e-05,\n 8.7955e-05, 1.2261e-04, 1.5838e-04, 1.2074e-04, 8.2804e-05, 1.1969e-04,\n 1.4254e-04, 1.7287e-04, 3.6077e-05, 8.8147e-05, 4.0852e-05, 7.7829e-05,\n 1.0797e-04, 9.3730e-05, 8.2308e-05, 1.6948e-04, 1.3017e-04, 1.3145e-04,\n 7.8189e-05, 1.2731e-04, 2.0584e-04, 5.9594e-05, 1.3098e-04, 1.2685e-04,\n 1.1573e-04, 1.0115e-04, 1.0430e-04, 8.3375e-05, 7.6495e-05, 1.5167e-04,\n 1.2809e-04, 7.6854e-05, 8.1979e-05, 1.2994e-04, 1.1129e-04, 1.4729e-04,\n 6.6452e-05, 1.2145e-04, 1.5836e-04, 1.4597e-04, 1.1367e-04, 7.7476e-05],\n device='cuda:0')"
69
+ },
70
+ "13": {
71
+ "step": "tensor(1252.)",
72
+ "exp_avg": "tensor([[ 1.0028e-05, 5.8652e-05, -2.7339e-05, ..., 3.8177e-05,\n -7.9354e-06, 2.0029e-06],\n [ 1.4749e-06, -7.0987e-05, 2.2846e-05, ..., 2.1535e-05,\n -1.2250e-06, -2.0902e-05],\n [-1.7944e-21, 5.6052e-45, 1.9221e-14, ..., 1.1127e-19,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 2.8026e-44, 3.2202e-21, 8.7133e-21, ..., -1.1077e-12,\n 5.6052e-45, 5.6052e-45],\n [ 3.4985e-06, 2.2143e-05, 1.5465e-06, ..., 2.3442e-05,\n -3.2866e-05, 7.3582e-07],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')",
73
+ "exp_avg_sq": "tensor([[3.0678e-08, 7.0716e-08, 2.6578e-07, ..., 7.9325e-08, 3.7150e-08,\n 1.9338e-08],\n [5.8191e-10, 2.7929e-07, 5.3975e-08, ..., 1.3019e-07, 4.7131e-09,\n 5.4563e-08],\n [1.8300e-10, 7.1920e-11, 9.1536e-11, ..., 1.7186e-10, 2.9158e-10,\n 9.2295e-11],\n ...,\n [2.2200e-10, 9.0974e-11, 1.8433e-10, ..., 2.0640e-10, 2.9783e-10,\n 5.8879e-11],\n [6.7277e-09, 1.3536e-08, 9.7725e-09, ..., 4.6214e-08, 2.2105e-08,\n 3.3085e-08],\n [1.5819e-11, 4.0177e-11, 5.2765e-12, ..., 1.0278e-10, 1.2452e-10,\n 3.1154e-10]], device='cuda:0')"
74
+ },
75
+ "14": {
76
+ "step": "tensor(1252.)",
77
+ "exp_avg": "tensor([-3.9205e-04, 3.3979e-03, 3.3139e-13, ..., 4.5192e-11,\n 4.9312e-03, 8.1955e-34], device='cuda:0')",
78
+ "exp_avg_sq": "tensor([2.7204e-04, 1.7077e-04, 5.3364e-07, ..., 3.2237e-07, 2.4956e-04,\n 4.2516e-07], device='cuda:0')"
79
+ },
80
+ "15": {
81
+ "step": "tensor(1252.)",
82
+ "exp_avg": "tensor([[ 1.6537e-05, -1.6412e-05, 1.7351e-15, ..., -3.5602e-14,\n -2.8445e-05, -3.5480e-38],\n [ 2.7806e-05, 3.6423e-05, 1.4179e-15, ..., -1.8087e-14,\n 6.5267e-05, 3.0549e-37],\n [-4.3664e-05, -3.7434e-05, -1.6657e-16, ..., 3.2190e-14,\n 4.4236e-05, -2.4694e-36],\n ...,\n [-4.8296e-06, -6.5051e-06, -4.5930e-16, ..., 2.2902e-14,\n -7.3178e-05, 1.0793e-36],\n [-5.1397e-06, -5.5997e-07, 1.1155e-15, ..., 1.4088e-14,\n -4.0886e-05, 5.2565e-37],\n [ 2.5196e-05, -1.9436e-05, 2.3776e-15, ..., 6.5804e-14,\n -2.7514e-05, 2.0440e-37]], device='cuda:0')",
83
+ "exp_avg_sq": "tensor([[1.1504e-08, 1.1983e-08, 9.5027e-10, ..., 1.0574e-09, 1.4966e-08,\n 2.0861e-12],\n [2.1426e-08, 1.4491e-08, 6.3745e-10, ..., 5.0018e-10, 2.4404e-08,\n 1.0032e-11],\n [2.2603e-08, 2.0371e-08, 9.5595e-10, ..., 2.0004e-09, 1.4143e-08,\n 1.1132e-11],\n ...,\n [2.0336e-08, 2.5000e-08, 5.5699e-10, ..., 4.9564e-10, 3.3213e-08,\n 2.1593e-13],\n [3.4632e-08, 1.5072e-08, 6.9465e-10, ..., 4.5448e-10, 3.5708e-08,\n 3.8778e-12],\n [2.5753e-08, 2.0554e-08, 9.0051e-10, ..., 1.1916e-09, 2.2594e-08,\n 8.4316e-13]], device='cuda:0')"
84
+ },
85
+ "16": {
86
+ "step": "tensor(1252.)",
87
+ "exp_avg": "tensor([[-1.2615e-05, -4.4866e-05, 7.6910e-05, ..., 2.4850e-05,\n 4.7285e-05, -5.8248e-05],\n [-9.2113e-05, -1.0799e-04, -2.3123e-05, ..., 6.9309e-05,\n -1.5636e-05, -3.2883e-06],\n [ 1.4962e-05, 5.1615e-05, -5.1669e-05, ..., -2.2054e-05,\n 5.8570e-05, 1.3382e-04],\n ...,\n [ 9.9513e-05, 7.0477e-05, -9.1406e-05, ..., -1.3692e-04,\n -7.1785e-05, -1.1458e-04],\n [ 1.6101e-05, 1.9173e-05, 2.3902e-07, ..., 4.5197e-05,\n 1.3297e-05, -4.1407e-06],\n [-4.0990e-05, 1.9234e-05, 8.1815e-05, ..., 2.4802e-05,\n 3.3396e-05, -2.9530e-05]], device='cuda:0')",
88
+ "exp_avg_sq": "tensor([[2.1239e-08, 4.3178e-08, 3.8035e-08, ..., 3.0361e-08, 2.2880e-08,\n 2.6587e-08],\n [2.9610e-08, 5.3500e-08, 4.4636e-08, ..., 3.8571e-08, 2.8108e-08,\n 3.9779e-08],\n [3.3169e-08, 5.6815e-08, 4.4240e-08, ..., 4.5404e-08, 2.7140e-08,\n 4.3827e-08],\n ...,\n [4.4767e-08, 9.5367e-08, 6.7216e-08, ..., 6.5536e-08, 4.1392e-08,\n 7.2582e-08],\n [1.3410e-08, 2.5018e-08, 2.5039e-08, ..., 1.8956e-08, 1.2677e-08,\n 1.6630e-08],\n [1.3196e-08, 2.1888e-08, 4.3701e-08, ..., 2.4678e-08, 1.3211e-08,\n 1.9753e-08]], device='cuda:0')"
89
+ },
90
+ "17": {
91
+ "step": "tensor(1252.)",
92
+ "exp_avg": "tensor([-4.8731e-04, -2.2546e-04, 3.2669e-03, ..., 2.6958e-03,\n 6.4566e-05, -1.3442e-03], device='cuda:0')",
93
+ "exp_avg_sq": "tensor([3.7995e-05, 4.2355e-05, 4.6759e-05, ..., 6.7935e-05, 2.2636e-05,\n 2.1263e-05], device='cuda:0')"
94
+ }
95
+ },
96
+ "param_groups": [
97
+ {
98
+ "lr": 0.0009755527298894294,
99
+ "name": "scale_256",
100
+ "betas": [
101
+ 0.9,
102
+ 0.999
103
+ ],
104
+ "eps": 1e-08,
105
+ "weight_decay": 1e-05,
106
+ "amsgrad": false,
107
+ "maximize": false,
108
+ "foreach": null,
109
+ "capturable": false,
110
+ "differentiable": false,
111
+ "fused": null,
112
+ "decoupled_weight_decay": true,
113
+ "initial_lr": 0.001,
114
+ "params": [
115
+ 0,
116
+ 1,
117
+ 2
118
+ ]
119
+ },
120
+ {
121
+ "lr": 0.0009755527298894294,
122
+ "name": "scale_512",
123
+ "betas": [
124
+ 0.9,
125
+ 0.999
126
+ ],
127
+ "eps": 1e-08,
128
+ "weight_decay": 1e-05,
129
+ "amsgrad": false,
130
+ "maximize": false,
131
+ "foreach": null,
132
+ "capturable": false,
133
+ "differentiable": false,
134
+ "fused": null,
135
+ "decoupled_weight_decay": true,
136
+ "initial_lr": 0.001,
137
+ "params": [
138
+ 3,
139
+ 4,
140
+ 5,
141
+ 6,
142
+ 7
143
+ ]
144
+ },
145
+ {
146
+ "lr": 0.0009755527298894294,
147
+ "name": "scale_768",
148
+ "betas": [
149
+ 0.9,
150
+ 0.999
151
+ ],
152
+ "eps": 1e-08,
153
+ "weight_decay": 1e-05,
154
+ "amsgrad": false,
155
+ "maximize": false,
156
+ "foreach": null,
157
+ "capturable": false,
158
+ "differentiable": false,
159
+ "fused": null,
160
+ "decoupled_weight_decay": true,
161
+ "initial_lr": 0.001,
162
+ "params": [
163
+ 8,
164
+ 9,
165
+ 10,
166
+ 11,
167
+ 12
168
+ ]
169
+ },
170
+ {
171
+ "lr": 0.0009755527298894294,
172
+ "name": "scale_1024",
173
+ "betas": [
174
+ 0.9,
175
+ 0.999
176
+ ],
177
+ "eps": 1e-08,
178
+ "weight_decay": 1e-05,
179
+ "amsgrad": false,
180
+ "maximize": false,
181
+ "foreach": null,
182
+ "capturable": false,
183
+ "differentiable": false,
184
+ "fused": null,
185
+ "decoupled_weight_decay": true,
186
+ "initial_lr": 0.001,
187
+ "params": [
188
+ 13,
189
+ 14,
190
+ 15,
191
+ 16,
192
+ 17
193
+ ]
194
+ }
195
+ ]
196
+ },
197
+ "scheduler_state_dict": {
198
+ "T_0": 10,
199
+ "T_i": 10,
200
+ "T_mult": 2,
201
+ "eta_min": 1e-06,
202
+ "T_cur": 1,
203
+ "base_lrs": [
204
+ 0.001,
205
+ 0.001,
206
+ 0.001,
207
+ 0.001
208
+ ],
209
+ "last_epoch": 1,
210
+ "_step_count": 0,
211
+ "_is_initial": false,
212
+ "_get_lr_called_within_step": false,
213
+ "_last_lr": [
214
+ 0.0009755527298894294,
215
+ 0.0009755527298894294,
216
+ 0.0009755527298894294,
217
+ 0.0009755527298894294
218
+ ]
219
+ },
220
+ "metrics": {
221
+ "best_val_acc": 72.232,
222
+ "best_epoch": 0,
223
+ "scale_accuracies": {
224
+ "256": 65.866,
225
+ "512": 69.958,
226
+ "768": 71.406,
227
+ "1024": 71.52
228
+ },
229
+ "training_history": {
230
+ "epochs": [
231
+ 1
232
+ ],
233
+ "train_loss": [
234
+ 3.9085216390819975
235
+ ],
236
+ "train_acc": [
237
+ 68.41535880958533
238
+ ],
239
+ "val_acc": [
240
+ 72.232
241
+ ],
242
+ "scale_accs": {
243
+ "256": [
244
+ 65.866
245
+ ],
246
+ "512": [
247
+ 69.958
248
+ ],
249
+ "768": [
250
+ 71.406
251
+ ],
252
+ "1024": [
253
+ 71.52
254
+ ]
255
+ },
256
+ "lr": [
257
+ 0.0009755527298894294
258
+ ]
259
+ }
260
+ },
261
+ "train_config": {
262
+ "name": "david_training",
263
+ "run_id": "20251012_151647",
264
+ "dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
265
+ "model_variant": "clip_vit_laion_b32",
266
+ "num_classes": 1000,
267
+ "preset": "hierarchical_refinement",
268
+ "custom_config_path": null,
269
+ "num_classes_override": null,
270
+ "use_belly_override": null,
271
+ "belly_expand_override": null,
272
+ "progressive_training_override": false,
273
+ "scale_warmup_epochs_override": null,
274
+ "num_epochs": 10,
275
+ "batch_size": 1024,
276
+ "learning_rate": 0.001,
277
+ "weight_decay": 1e-05,
278
+ "warmup_epochs": 3,
279
+ "use_rose_loss": true,
280
+ "rose_initial_weight": 0.1,
281
+ "rose_max_weight": 0.5,
282
+ "rose_weight_schedule": "adaptive",
283
+ "use_cayley_loss": false,
284
+ "cayley_weight": 0.001,
285
+ "scale_loss_balance": null,
286
+ "use_mixed_precision": true,
287
+ "gradient_clip": 10.0,
288
+ "scheduler_type": "cosine_restarts",
289
+ "min_lr": 1e-06,
290
+ "freeze_strategy": "performance",
291
+ "freeze_threshold": 75.0,
292
+ "unfreeze_on_plateau": true,
293
+ "patience": 10,
294
+ "track_gradients": true,
295
+ "gradient_scale_threshold": 1e-05,
296
+ "gradient_scale_multiplier": 10.0,
297
+ "log_interval": 50,
298
+ "val_interval": 1,
299
+ "save_interval": 5,
300
+ "log_fusion_weights": true,
301
+ "log_loss_components": true,
302
+ "save_format": "safetensors",
303
+ "hf_repo": "AbstractPhil/gated-david",
304
+ "upload_to_hub": true,
305
+ "base_dir": "./david_training",
306
+ "num_workers": 10,
307
+ "pin_memory": true,
308
+ "prefetch_factor": 4,
309
+ "persistent_workers": true
310
+ }
311
+ }