{ "best_metric": 0.9679633867276888, "best_model_checkpoint": "/content/drive/MyDrive/Papers/RAG_3GPP/models/checkpoints/embedding/bge-small-telecom_10e_256bs/checkpoint-150", "epoch": 6.857142857142857, "eval_steps": 15, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.7142857142857143, "grad_norm": 1.681250810623169, "learning_rate": 3.571428571428572e-05, "loss": 0.824, "step": 15 }, { "epoch": 0.7142857142857143, "eval_loss": 0.13330750167369843, "eval_runtime": 3.6814, "eval_samples_per_second": 356.115, "eval_steps_per_second": 1.63, "eval_telecom-ir-eval_cosine_accuracy@1": 0.9397406559877955, "eval_telecom-ir-eval_cosine_accuracy@10": 0.9931350114416476, "eval_telecom-ir-eval_cosine_accuracy@3": 0.9839816933638444, "eval_telecom-ir-eval_cosine_accuracy@5": 0.9893211289092296, "eval_telecom-ir-eval_cosine_map@100": 0.9625163452108533, "eval_telecom-ir-eval_cosine_mrr@10": 0.9623769568849659, "eval_telecom-ir-eval_cosine_ndcg@10": 0.9701258981216676, "eval_telecom-ir-eval_cosine_precision@1": 0.9397406559877955, "eval_telecom-ir-eval_cosine_recall@1": 0.9397406559877955, "step": 15 }, { "epoch": 1.380952380952381, "grad_norm": 0.8189207315444946, "learning_rate": 4.972077065562821e-05, "loss": 0.1731, "step": 30 }, { "epoch": 1.380952380952381, "eval_loss": 0.07593704760074615, "eval_runtime": 4.0688, "eval_samples_per_second": 322.209, "eval_steps_per_second": 1.475, "eval_telecom-ir-eval_cosine_accuracy@1": 0.9565217391304348, "eval_telecom-ir-eval_cosine_accuracy@10": 0.9938977879481312, "eval_telecom-ir-eval_cosine_accuracy@3": 0.9877955758962624, "eval_telecom-ir-eval_cosine_accuracy@5": 0.9908466819221968, "eval_telecom-ir-eval_cosine_map@100": 0.9723266300874301, "eval_telecom-ir-eval_cosine_mrr@10": 0.9721883210441564, "eval_telecom-ir-eval_cosine_ndcg@10": 0.9776352051817517, "eval_telecom-ir-eval_cosine_precision@1": 0.9565217391304348, "eval_telecom-ir-eval_cosine_recall@1": 0.9565217391304348, "step": 30 }, { "epoch": 2.0476190476190474, "grad_norm": 0.7057574391365051, "learning_rate": 4.803690529676019e-05, "loss": 0.0917, "step": 45 }, { "epoch": 2.0476190476190474, "eval_loss": 0.06566686183214188, "eval_runtime": 3.7186, "eval_samples_per_second": 352.553, "eval_steps_per_second": 1.614, "eval_telecom-ir-eval_cosine_accuracy@1": 0.9649122807017544, "eval_telecom-ir-eval_cosine_accuracy@10": 0.9931350114416476, "eval_telecom-ir-eval_cosine_accuracy@3": 0.9900839054157132, "eval_telecom-ir-eval_cosine_accuracy@5": 0.9908466819221968, "eval_telecom-ir-eval_cosine_map@100": 0.9768047979761636, "eval_telecom-ir-eval_cosine_mrr@10": 0.9765700483091787, "eval_telecom-ir-eval_cosine_ndcg@10": 0.9807364362901521, "eval_telecom-ir-eval_cosine_precision@1": 0.9649122807017544, "eval_telecom-ir-eval_cosine_recall@1": 0.9649122807017544, "step": 45 }, { "epoch": 2.761904761904762, "grad_norm": 0.7498806118965149, "learning_rate": 4.4928312680573064e-05, "loss": 0.0676, "step": 60 }, { "epoch": 2.761904761904762, "eval_loss": 0.06091764196753502, "eval_runtime": 3.7927, "eval_samples_per_second": 345.667, "eval_steps_per_second": 1.582, "eval_telecom-ir-eval_cosine_accuracy@1": 0.9641495041952708, "eval_telecom-ir-eval_cosine_accuracy@10": 0.9931350114416476, "eval_telecom-ir-eval_cosine_accuracy@3": 0.9916094584286804, "eval_telecom-ir-eval_cosine_accuracy@5": 0.9916094584286804, "eval_telecom-ir-eval_cosine_map@100": 0.977428148947981, "eval_telecom-ir-eval_cosine_mrr@10": 0.9771802695143658, "eval_telecom-ir-eval_cosine_ndcg@10": 0.9812569737659373, "eval_telecom-ir-eval_cosine_precision@1": 0.9641495041952708, "eval_telecom-ir-eval_cosine_recall@1": 0.9641495041952708, "step": 60 }, { "epoch": 3.4285714285714284, "grad_norm": 0.48658156394958496, "learning_rate": 4.058724504646834e-05, "loss": 0.0435, "step": 75 }, { "epoch": 3.4285714285714284, "eval_loss": 0.05956002324819565, "eval_runtime": 4.2667, "eval_samples_per_second": 307.261, "eval_steps_per_second": 1.406, "eval_telecom-ir-eval_cosine_accuracy@1": 0.9649122807017544, "eval_telecom-ir-eval_cosine_accuracy@10": 0.9931350114416476, "eval_telecom-ir-eval_cosine_accuracy@3": 0.9916094584286804, "eval_telecom-ir-eval_cosine_accuracy@5": 0.9916094584286804, "eval_telecom-ir-eval_cosine_map@100": 0.978052610298987, "eval_telecom-ir-eval_cosine_mrr@10": 0.9778295376121463, "eval_telecom-ir-eval_cosine_ndcg@10": 0.9817518617980646, "eval_telecom-ir-eval_cosine_precision@1": 0.9649122807017544, "eval_telecom-ir-eval_cosine_recall@1": 0.9649122807017544, "step": 75 }, { "epoch": 4.095238095238095, "grad_norm": 0.4985809624195099, "learning_rate": 3.5282177578265296e-05, "loss": 0.038, "step": 90 }, { "epoch": 4.095238095238095, "eval_loss": 0.060632411390542984, "eval_runtime": 4.6488, "eval_samples_per_second": 282.008, "eval_steps_per_second": 1.291, "eval_telecom-ir-eval_cosine_accuracy@1": 0.9649122807017544, "eval_telecom-ir-eval_cosine_accuracy@10": 0.9931350114416476, "eval_telecom-ir-eval_cosine_accuracy@3": 0.9916094584286804, "eval_telecom-ir-eval_cosine_accuracy@5": 0.992372234935164, "eval_telecom-ir-eval_cosine_map@100": 0.9775869566334031, "eval_telecom-ir-eval_cosine_mrr@10": 0.9773646071700992, "eval_telecom-ir-eval_cosine_ndcg@10": 0.9813932046352999, "eval_telecom-ir-eval_cosine_precision@1": 0.9649122807017544, "eval_telecom-ir-eval_cosine_recall@1": 0.9649122807017544, "step": 90 }, { "epoch": 4.809523809523809, "grad_norm": 0.4105435609817505, "learning_rate": 2.9341204441673266e-05, "loss": 0.0332, "step": 105 }, { "epoch": 4.809523809523809, "eval_loss": 0.05935605987906456, "eval_runtime": 4.0644, "eval_samples_per_second": 322.554, "eval_steps_per_second": 1.476, "eval_telecom-ir-eval_cosine_accuracy@1": 0.965675057208238, "eval_telecom-ir-eval_cosine_accuracy@10": 0.9931350114416476, "eval_telecom-ir-eval_cosine_accuracy@3": 0.9916094584286804, "eval_telecom-ir-eval_cosine_accuracy@5": 0.992372234935164, "eval_telecom-ir-eval_cosine_map@100": 0.9783638236659703, "eval_telecom-ir-eval_cosine_mrr@10": 0.9781273836765828, "eval_telecom-ir-eval_cosine_ndcg@10": 0.9819743331685896, "eval_telecom-ir-eval_cosine_precision@1": 0.965675057208238, "eval_telecom-ir-eval_cosine_recall@1": 0.965675057208238, "step": 105 }, { "epoch": 5.476190476190476, "grad_norm": 0.468258261680603, "learning_rate": 2.3131747660339394e-05, "loss": 0.0269, "step": 120 }, { "epoch": 5.476190476190476, "eval_loss": 0.060672808438539505, "eval_runtime": 4.0797, "eval_samples_per_second": 321.343, "eval_steps_per_second": 1.471, "eval_telecom-ir-eval_cosine_accuracy@1": 0.9664378337147216, "eval_telecom-ir-eval_cosine_accuracy@10": 0.9931350114416476, "eval_telecom-ir-eval_cosine_accuracy@3": 0.9908466819221968, "eval_telecom-ir-eval_cosine_accuracy@5": 0.9916094584286804, "eval_telecom-ir-eval_cosine_map@100": 0.9780891289133677, "eval_telecom-ir-eval_cosine_mrr@10": 0.9778688871938299, "eval_telecom-ir-eval_cosine_ndcg@10": 0.9817380288044749, "eval_telecom-ir-eval_cosine_precision@1": 0.9664378337147216, "eval_telecom-ir-eval_cosine_recall@1": 0.9664378337147216, "step": 120 }, { "epoch": 6.142857142857143, "grad_norm": 0.192308709025383, "learning_rate": 1.7037833743707892e-05, "loss": 0.0219, "step": 135 }, { "epoch": 6.142857142857143, "eval_loss": 0.06004022806882858, "eval_runtime": 3.6988, "eval_samples_per_second": 354.443, "eval_steps_per_second": 1.622, "eval_telecom-ir-eval_cosine_accuracy@1": 0.965675057208238, "eval_telecom-ir-eval_cosine_accuracy@10": 0.9938977879481312, "eval_telecom-ir-eval_cosine_accuracy@3": 0.9908466819221968, "eval_telecom-ir-eval_cosine_accuracy@5": 0.9916094584286804, "eval_telecom-ir-eval_cosine_map@100": 0.9779666698415427, "eval_telecom-ir-eval_cosine_mrr@10": 0.9778095601322145, "eval_telecom-ir-eval_cosine_ndcg@10": 0.9818676160795978, "eval_telecom-ir-eval_cosine_precision@1": 0.965675057208238, "eval_telecom-ir-eval_cosine_recall@1": 0.965675057208238, "step": 135 }, { "epoch": 6.857142857142857, "grad_norm": 0.3330775499343872, "learning_rate": 1.1436343403356017e-05, "loss": 0.0244, "step": 150 }, { "epoch": 6.857142857142857, "eval_loss": 0.05985964834690094, "eval_runtime": 3.8386, "eval_samples_per_second": 341.53, "eval_steps_per_second": 1.563, "eval_telecom-ir-eval_cosine_accuracy@1": 0.9679633867276888, "eval_telecom-ir-eval_cosine_accuracy@10": 0.992372234935164, "eval_telecom-ir-eval_cosine_accuracy@3": 0.9916094584286804, "eval_telecom-ir-eval_cosine_accuracy@5": 0.9916094584286804, "eval_telecom-ir-eval_cosine_map@100": 0.9791402442094453, "eval_telecom-ir-eval_cosine_mrr@10": 0.9788647342995168, "eval_telecom-ir-eval_cosine_ndcg@10": 0.9823240649953693, "eval_telecom-ir-eval_cosine_precision@1": 0.9679633867276888, "eval_telecom-ir-eval_cosine_recall@1": 0.9679633867276888, "step": 150 } ], "logging_steps": 15, "max_steps": 210, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 15, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 256, "trial_name": null, "trial_params": null }