| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.6230098297106466, | |
| "eval_steps": 1000, | |
| "global_step": 9000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06922331441229405, | |
| "grad_norm": 52.84375, | |
| "learning_rate": 2.9483376313193374e-05, | |
| "logits/chosen": -0.7172003388404846, | |
| "logits/rejected": -0.8999207019805908, | |
| "logps/chosen": -304.1668701171875, | |
| "logps/rejected": -216.46609497070312, | |
| "loss": 0.6793, | |
| "rewards/accuracies": 0.5619999766349792, | |
| "rewards/chosen": 0.10503997653722763, | |
| "rewards/margins": 0.037437956780195236, | |
| "rewards/rejected": 0.06760203093290329, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.06922331441229405, | |
| "eval_logits/chosen": -0.6456733345985413, | |
| "eval_logits/rejected": -0.8373212218284607, | |
| "eval_logps/chosen": -304.64984130859375, | |
| "eval_logps/rejected": -217.35638427734375, | |
| "eval_loss": 0.6695967316627502, | |
| "eval_rewards/accuracies": 0.5669527053833008, | |
| "eval_rewards/chosen": 0.20719687640666962, | |
| "eval_rewards/margins": 0.07671476900577545, | |
| "eval_rewards/rejected": 0.13048213720321655, | |
| "eval_runtime": 1809.5718, | |
| "eval_samples_per_second": 3.992, | |
| "eval_steps_per_second": 0.998, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.1384466288245881, | |
| "grad_norm": 20.1875, | |
| "learning_rate": 2.8964156527458075e-05, | |
| "logits/chosen": -0.6381000280380249, | |
| "logits/rejected": -0.8281131386756897, | |
| "logps/chosen": -298.145263671875, | |
| "logps/rejected": -213.81825256347656, | |
| "loss": 0.6633, | |
| "rewards/accuracies": 0.5932499766349792, | |
| "rewards/chosen": 0.25646814703941345, | |
| "rewards/margins": 0.09272526204586029, | |
| "rewards/rejected": 0.16374288499355316, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.1384466288245881, | |
| "eval_logits/chosen": -0.5784724354743958, | |
| "eval_logits/rejected": -0.774167001247406, | |
| "eval_logps/chosen": -304.0080871582031, | |
| "eval_logps/rejected": -217.06524658203125, | |
| "eval_loss": 0.6561135649681091, | |
| "eval_rewards/accuracies": 0.5975452661514282, | |
| "eval_rewards/chosen": 0.27137282490730286, | |
| "eval_rewards/margins": 0.11177627742290497, | |
| "eval_rewards/rejected": 0.1595965474843979, | |
| "eval_runtime": 1809.9391, | |
| "eval_samples_per_second": 3.991, | |
| "eval_steps_per_second": 0.998, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.20766994323688218, | |
| "grad_norm": 45.59375, | |
| "learning_rate": 2.8444936741722772e-05, | |
| "logits/chosen": -0.6119475960731506, | |
| "logits/rejected": -0.8187024593353271, | |
| "logps/chosen": -306.91265869140625, | |
| "logps/rejected": -214.48626708984375, | |
| "loss": 0.6522, | |
| "rewards/accuracies": 0.6052500009536743, | |
| "rewards/chosen": 0.26545238494873047, | |
| "rewards/margins": 0.13211578130722046, | |
| "rewards/rejected": 0.13333660364151, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.20766994323688218, | |
| "eval_logits/chosen": -0.5946438908576965, | |
| "eval_logits/rejected": -0.7924441695213318, | |
| "eval_logps/chosen": -303.99310302734375, | |
| "eval_logps/rejected": -217.3607177734375, | |
| "eval_loss": 0.6483559608459473, | |
| "eval_rewards/accuracies": 0.6118955016136169, | |
| "eval_rewards/chosen": 0.27287325263023376, | |
| "eval_rewards/margins": 0.14282457530498505, | |
| "eval_rewards/rejected": 0.13004866242408752, | |
| "eval_runtime": 1808.1656, | |
| "eval_samples_per_second": 3.995, | |
| "eval_steps_per_second": 0.999, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.2768932576491762, | |
| "grad_norm": 15.4453125, | |
| "learning_rate": 2.7925716955987473e-05, | |
| "logits/chosen": -0.6306295394897461, | |
| "logits/rejected": -0.8517826795578003, | |
| "logps/chosen": -307.3712463378906, | |
| "logps/rejected": -212.0769500732422, | |
| "loss": 0.6386, | |
| "rewards/accuracies": 0.6212499737739563, | |
| "rewards/chosen": 0.32602864503860474, | |
| "rewards/margins": 0.17370422184467316, | |
| "rewards/rejected": 0.15232445299625397, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.2768932576491762, | |
| "eval_logits/chosen": -0.6721797585487366, | |
| "eval_logits/rejected": -0.8792731165885925, | |
| "eval_logps/chosen": -303.22210693359375, | |
| "eval_logps/rejected": -216.90435791015625, | |
| "eval_loss": 0.6395601630210876, | |
| "eval_rewards/accuracies": 0.6196474432945251, | |
| "eval_rewards/chosen": 0.3499698042869568, | |
| "eval_rewards/margins": 0.17428545653820038, | |
| "eval_rewards/rejected": 0.1756843477487564, | |
| "eval_runtime": 1809.8644, | |
| "eval_samples_per_second": 3.991, | |
| "eval_steps_per_second": 0.998, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.3461165720614703, | |
| "grad_norm": 57.375, | |
| "learning_rate": 2.7406497170252167e-05, | |
| "logits/chosen": -0.70070880651474, | |
| "logits/rejected": -0.9215654730796814, | |
| "logps/chosen": -302.41265869140625, | |
| "logps/rejected": -212.0404052734375, | |
| "loss": 0.6371, | |
| "rewards/accuracies": 0.625249981880188, | |
| "rewards/chosen": 0.38654881715774536, | |
| "rewards/margins": 0.1891336739063263, | |
| "rewards/rejected": 0.19741514325141907, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.3461165720614703, | |
| "eval_logits/chosen": -0.6983235478401184, | |
| "eval_logits/rejected": -0.9088946580886841, | |
| "eval_logps/chosen": -302.4079895019531, | |
| "eval_logps/rejected": -216.43348693847656, | |
| "eval_loss": 0.6330362558364868, | |
| "eval_rewards/accuracies": 0.6294757723808289, | |
| "eval_rewards/chosen": 0.43138742446899414, | |
| "eval_rewards/margins": 0.20861481130123138, | |
| "eval_rewards/rejected": 0.22277262806892395, | |
| "eval_runtime": 1811.3565, | |
| "eval_samples_per_second": 3.988, | |
| "eval_steps_per_second": 0.997, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.41533988647376435, | |
| "grad_norm": 68.1875, | |
| "learning_rate": 2.6887277384516867e-05, | |
| "logits/chosen": -0.7243251800537109, | |
| "logits/rejected": -0.9372639656066895, | |
| "logps/chosen": -298.91192626953125, | |
| "logps/rejected": -214.60647583007812, | |
| "loss": 0.6348, | |
| "rewards/accuracies": 0.6307500004768372, | |
| "rewards/chosen": 0.4019148051738739, | |
| "rewards/margins": 0.20437340438365936, | |
| "rewards/rejected": 0.19754138588905334, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.41533988647376435, | |
| "eval_logits/chosen": -0.7277015447616577, | |
| "eval_logits/rejected": -0.9341850280761719, | |
| "eval_logps/chosen": -302.8192138671875, | |
| "eval_logps/rejected": -216.93202209472656, | |
| "eval_loss": 0.626986563205719, | |
| "eval_rewards/accuracies": 0.6369508504867554, | |
| "eval_rewards/chosen": 0.3902588188648224, | |
| "eval_rewards/margins": 0.21733936667442322, | |
| "eval_rewards/rejected": 0.17291945219039917, | |
| "eval_runtime": 1811.3554, | |
| "eval_samples_per_second": 3.988, | |
| "eval_steps_per_second": 0.997, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.4845632008860584, | |
| "grad_norm": 42.96875, | |
| "learning_rate": 2.6368057598781565e-05, | |
| "logits/chosen": -0.7278515100479126, | |
| "logits/rejected": -0.9392414093017578, | |
| "logps/chosen": -299.16754150390625, | |
| "logps/rejected": -211.17393493652344, | |
| "loss": 0.6228, | |
| "rewards/accuracies": 0.6457499861717224, | |
| "rewards/chosen": 0.4171576201915741, | |
| "rewards/margins": 0.23923394083976746, | |
| "rewards/rejected": 0.17792373895645142, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.4845632008860584, | |
| "eval_logits/chosen": -0.6790074706077576, | |
| "eval_logits/rejected": -0.8909901976585388, | |
| "eval_logps/chosen": -302.5709533691406, | |
| "eval_logps/rejected": -216.92449951171875, | |
| "eval_loss": 0.621570348739624, | |
| "eval_rewards/accuracies": 0.6430416703224182, | |
| "eval_rewards/chosen": 0.41509076952934265, | |
| "eval_rewards/margins": 0.24142169952392578, | |
| "eval_rewards/rejected": 0.17366909980773926, | |
| "eval_runtime": 1809.1079, | |
| "eval_samples_per_second": 3.993, | |
| "eval_steps_per_second": 0.998, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.5537865152983524, | |
| "grad_norm": 68.9375, | |
| "learning_rate": 2.5848837813046265e-05, | |
| "logits/chosen": -0.6841186285018921, | |
| "logits/rejected": -0.8984114527702332, | |
| "logps/chosen": -307.65289306640625, | |
| "logps/rejected": -220.9628448486328, | |
| "loss": 0.6251, | |
| "rewards/accuracies": 0.6430000066757202, | |
| "rewards/chosen": 0.38440626859664917, | |
| "rewards/margins": 0.23852017521858215, | |
| "rewards/rejected": 0.1458861082792282, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.5537865152983524, | |
| "eval_logits/chosen": -0.6851416230201721, | |
| "eval_logits/rejected": -0.9019606709480286, | |
| "eval_logps/chosen": -302.9433898925781, | |
| "eval_logps/rejected": -217.45289611816406, | |
| "eval_loss": 0.6197263598442078, | |
| "eval_rewards/accuracies": 0.6425802707672119, | |
| "eval_rewards/chosen": 0.3778453469276428, | |
| "eval_rewards/margins": 0.25701332092285156, | |
| "eval_rewards/rejected": 0.12083201110363007, | |
| "eval_runtime": 1812.2911, | |
| "eval_samples_per_second": 3.986, | |
| "eval_steps_per_second": 0.997, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.6230098297106466, | |
| "grad_norm": 23.046875, | |
| "learning_rate": 2.532961802731096e-05, | |
| "logits/chosen": -0.6993722915649414, | |
| "logits/rejected": -0.9128779172897339, | |
| "logps/chosen": -300.31256103515625, | |
| "logps/rejected": -215.20162963867188, | |
| "loss": 0.615, | |
| "rewards/accuracies": 0.640250027179718, | |
| "rewards/chosen": 0.4151590168476105, | |
| "rewards/margins": 0.2731047570705414, | |
| "rewards/rejected": 0.1420542448759079, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.6230098297106466, | |
| "eval_logits/chosen": -0.709079384803772, | |
| "eval_logits/rejected": -0.9235769510269165, | |
| "eval_logps/chosen": -302.36663818359375, | |
| "eval_logps/rejected": -217.17535400390625, | |
| "eval_loss": 0.6112583875656128, | |
| "eval_rewards/accuracies": 0.651024341583252, | |
| "eval_rewards/chosen": 0.43552058935165405, | |
| "eval_rewards/margins": 0.2869325280189514, | |
| "eval_rewards/rejected": 0.14858807623386383, | |
| "eval_runtime": 1804.2977, | |
| "eval_samples_per_second": 4.003, | |
| "eval_steps_per_second": 1.001, | |
| "step": 9000 | |
| } | |
| ], | |
| "logging_steps": 1000, | |
| "max_steps": 57784, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 1000, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |