{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.6713286713286712, "eval_steps": 500, "global_step": 60, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.027972027972027972, "grad_norm": 1.1463329792022705, "learning_rate": 0.0, "loss": 1.4132, "step": 1 }, { "epoch": 0.055944055944055944, "grad_norm": 0.8597254753112793, "learning_rate": 4e-05, "loss": 0.5978, "step": 2 }, { "epoch": 0.08391608391608392, "grad_norm": 1.196772575378418, "learning_rate": 8e-05, "loss": 1.3866, "step": 3 }, { "epoch": 0.11188811188811189, "grad_norm": 1.092492699623108, "learning_rate": 0.00012, "loss": 1.5973, "step": 4 }, { "epoch": 0.13986013986013987, "grad_norm": 1.1433535814285278, "learning_rate": 0.00016, "loss": 1.3213, "step": 5 }, { "epoch": 0.16783216783216784, "grad_norm": 1.0283416509628296, "learning_rate": 0.0002, "loss": 1.0864, "step": 6 }, { "epoch": 0.1958041958041958, "grad_norm": 1.0165659189224243, "learning_rate": 0.00019636363636363636, "loss": 0.7085, "step": 7 }, { "epoch": 0.22377622377622378, "grad_norm": 1.665509581565857, "learning_rate": 0.00019272727272727274, "loss": 0.9285, "step": 8 }, { "epoch": 0.2517482517482518, "grad_norm": 1.216216802597046, "learning_rate": 0.0001890909090909091, "loss": 1.1977, "step": 9 }, { "epoch": 0.27972027972027974, "grad_norm": 1.0458927154541016, "learning_rate": 0.00018545454545454545, "loss": 0.6523, "step": 10 }, { "epoch": 0.3076923076923077, "grad_norm": 0.8290866613388062, "learning_rate": 0.00018181818181818183, "loss": 0.5238, "step": 11 }, { "epoch": 0.3356643356643357, "grad_norm": 1.3056946992874146, "learning_rate": 0.0001781818181818182, "loss": 0.9392, "step": 12 }, { "epoch": 0.36363636363636365, "grad_norm": 0.89715576171875, "learning_rate": 0.00017454545454545454, "loss": 0.6422, "step": 13 }, { "epoch": 0.3916083916083916, "grad_norm": 0.9536185264587402, "learning_rate": 0.0001709090909090909, "loss": 0.5259, "step": 14 }, { "epoch": 0.4195804195804196, "grad_norm": 2.0107877254486084, "learning_rate": 0.00016727272727272728, "loss": 1.6038, "step": 15 }, { "epoch": 0.44755244755244755, "grad_norm": 1.396262288093567, "learning_rate": 0.00016363636363636366, "loss": 0.9653, "step": 16 }, { "epoch": 0.4755244755244755, "grad_norm": 1.4076898097991943, "learning_rate": 0.00016, "loss": 1.2386, "step": 17 }, { "epoch": 0.5034965034965035, "grad_norm": 1.2920570373535156, "learning_rate": 0.00015636363636363637, "loss": 1.3082, "step": 18 }, { "epoch": 0.5314685314685315, "grad_norm": 0.8619877696037292, "learning_rate": 0.00015272727272727275, "loss": 0.4883, "step": 19 }, { "epoch": 0.5594405594405595, "grad_norm": 1.2779756784439087, "learning_rate": 0.0001490909090909091, "loss": 0.6035, "step": 20 }, { "epoch": 0.5874125874125874, "grad_norm": 1.474133014678955, "learning_rate": 0.00014545454545454546, "loss": 0.8825, "step": 21 }, { "epoch": 0.6153846153846154, "grad_norm": 1.3216720819473267, "learning_rate": 0.00014181818181818184, "loss": 0.8638, "step": 22 }, { "epoch": 0.6433566433566433, "grad_norm": 1.0999218225479126, "learning_rate": 0.0001381818181818182, "loss": 0.6484, "step": 23 }, { "epoch": 0.6713286713286714, "grad_norm": 1.1263219118118286, "learning_rate": 0.00013454545454545455, "loss": 0.5965, "step": 24 }, { "epoch": 0.6993006993006993, "grad_norm": 1.020799994468689, "learning_rate": 0.00013090909090909093, "loss": 0.5809, "step": 25 }, { "epoch": 0.7272727272727273, "grad_norm": 1.032562494277954, "learning_rate": 0.00012727272727272728, "loss": 0.6899, "step": 26 }, { "epoch": 0.7552447552447552, "grad_norm": 1.8015700578689575, "learning_rate": 0.00012363636363636364, "loss": 1.351, "step": 27 }, { "epoch": 0.7832167832167832, "grad_norm": 1.6515522003173828, "learning_rate": 0.00012, "loss": 1.3753, "step": 28 }, { "epoch": 0.8111888111888111, "grad_norm": 1.4862653017044067, "learning_rate": 0.00011636363636363636, "loss": 1.045, "step": 29 }, { "epoch": 0.8391608391608392, "grad_norm": 1.2828856706619263, "learning_rate": 0.00011272727272727272, "loss": 1.1004, "step": 30 }, { "epoch": 0.8671328671328671, "grad_norm": 0.9894140362739563, "learning_rate": 0.00010909090909090909, "loss": 0.5355, "step": 31 }, { "epoch": 0.8951048951048951, "grad_norm": 1.5945513248443604, "learning_rate": 0.00010545454545454545, "loss": 1.1733, "step": 32 }, { "epoch": 0.9230769230769231, "grad_norm": 1.453596830368042, "learning_rate": 0.00010181818181818181, "loss": 1.1949, "step": 33 }, { "epoch": 0.951048951048951, "grad_norm": 1.5049810409545898, "learning_rate": 9.818181818181818e-05, "loss": 1.0341, "step": 34 }, { "epoch": 0.9790209790209791, "grad_norm": 1.3859373331069946, "learning_rate": 9.454545454545455e-05, "loss": 0.9874, "step": 35 }, { "epoch": 1.0, "grad_norm": 1.5079317092895508, "learning_rate": 9.090909090909092e-05, "loss": 0.9925, "step": 36 }, { "epoch": 1.027972027972028, "grad_norm": 1.2381432056427002, "learning_rate": 8.727272727272727e-05, "loss": 0.9168, "step": 37 }, { "epoch": 1.055944055944056, "grad_norm": 1.0585517883300781, "learning_rate": 8.363636363636364e-05, "loss": 0.9168, "step": 38 }, { "epoch": 1.083916083916084, "grad_norm": 1.246953010559082, "learning_rate": 8e-05, "loss": 0.9327, "step": 39 }, { "epoch": 1.1118881118881119, "grad_norm": 1.295661211013794, "learning_rate": 7.636363636363637e-05, "loss": 0.8212, "step": 40 }, { "epoch": 1.1398601398601398, "grad_norm": 1.1516053676605225, "learning_rate": 7.272727272727273e-05, "loss": 0.5509, "step": 41 }, { "epoch": 1.167832167832168, "grad_norm": 0.874414324760437, "learning_rate": 6.90909090909091e-05, "loss": 0.3707, "step": 42 }, { "epoch": 1.1958041958041958, "grad_norm": 1.9163153171539307, "learning_rate": 6.545454545454546e-05, "loss": 1.2245, "step": 43 }, { "epoch": 1.2237762237762237, "grad_norm": 1.3832831382751465, "learning_rate": 6.181818181818182e-05, "loss": 0.8484, "step": 44 }, { "epoch": 1.2517482517482517, "grad_norm": 1.5212609767913818, "learning_rate": 5.818181818181818e-05, "loss": 0.563, "step": 45 }, { "epoch": 1.2797202797202798, "grad_norm": 1.087664008140564, "learning_rate": 5.4545454545454546e-05, "loss": 0.399, "step": 46 }, { "epoch": 1.3076923076923077, "grad_norm": 1.8231722116470337, "learning_rate": 5.090909090909091e-05, "loss": 0.9092, "step": 47 }, { "epoch": 1.3356643356643356, "grad_norm": 1.591951608657837, "learning_rate": 4.7272727272727275e-05, "loss": 0.9348, "step": 48 }, { "epoch": 1.3636363636363638, "grad_norm": 1.0458203554153442, "learning_rate": 4.3636363636363636e-05, "loss": 0.3926, "step": 49 }, { "epoch": 1.3916083916083917, "grad_norm": 1.0491923093795776, "learning_rate": 4e-05, "loss": 0.3799, "step": 50 }, { "epoch": 1.4195804195804196, "grad_norm": 1.5752729177474976, "learning_rate": 3.6363636363636364e-05, "loss": 0.6908, "step": 51 }, { "epoch": 1.4475524475524475, "grad_norm": 1.6831164360046387, "learning_rate": 3.272727272727273e-05, "loss": 0.7934, "step": 52 }, { "epoch": 1.4755244755244754, "grad_norm": 1.3585453033447266, "learning_rate": 2.909090909090909e-05, "loss": 0.3979, "step": 53 }, { "epoch": 1.5034965034965035, "grad_norm": 1.3879740238189697, "learning_rate": 2.5454545454545454e-05, "loss": 0.962, "step": 54 }, { "epoch": 1.5314685314685315, "grad_norm": 1.542452096939087, "learning_rate": 2.1818181818181818e-05, "loss": 0.6852, "step": 55 }, { "epoch": 1.5594405594405596, "grad_norm": 1.3172391653060913, "learning_rate": 1.8181818181818182e-05, "loss": 1.1097, "step": 56 }, { "epoch": 1.5874125874125875, "grad_norm": 1.2537016868591309, "learning_rate": 1.4545454545454545e-05, "loss": 0.6174, "step": 57 }, { "epoch": 1.6153846153846154, "grad_norm": 1.3360239267349243, "learning_rate": 1.0909090909090909e-05, "loss": 0.8211, "step": 58 }, { "epoch": 1.6433566433566433, "grad_norm": 1.0257346630096436, "learning_rate": 7.272727272727272e-06, "loss": 0.374, "step": 59 }, { "epoch": 1.6713286713286712, "grad_norm": 1.2555755376815796, "learning_rate": 3.636363636363636e-06, "loss": 1.0604, "step": 60 } ], "logging_steps": 1, "max_steps": 60, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4818101472165888.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }