MHGanainy
/

16-clusters-balanced-7

Generated from Trainer

Model card Files Files and versions Community

16-clusters-balanced-7 / trainer_state.json

MHGanainy's picture

MHGanainy/16-clusters-balanced-7

5943cb1 verified 6 months ago

history blame contribute delete

3.76 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.0,
	"eval_steps": 500,
	"global_step": 1673,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.05977286312014345,
	"grad_norm": 0.18261943757534027,
	"learning_rate": 6.666666666666667e-06,
	"loss": 2.3555,
	"step": 100
	},
	{
	"epoch": 0.1195457262402869,
	"grad_norm": 0.2138003557920456,
	"learning_rate": 1.3333333333333333e-05,
	"loss": 2.3285,
	"step": 200
	},
	{
	"epoch": 0.17931858936043035,
	"grad_norm": 0.3002658188343048,
	"learning_rate": 2e-05,
	"loss": 2.2489,
	"step": 300
	},
	{
	"epoch": 0.2390914524805738,
	"grad_norm": 0.4061976671218872,
	"learning_rate": 1.9739364808281564e-05,
	"loss": 2.1912,
	"step": 400
	},
	{
	"epoch": 0.2988643156007173,
	"grad_norm": 0.5177231431007385,
	"learning_rate": 1.8971045373758673e-05,
	"loss": 2.1328,
	"step": 500
	},
	{
	"epoch": 0.3586371787208607,
	"grad_norm": 0.6728057861328125,
	"learning_rate": 1.7735091913054898e-05,
	"loss": 2.062,
	"step": 600
	},
	{
	"epoch": 0.41841004184100417,
	"grad_norm": 0.6207203269004822,
	"learning_rate": 1.6095931019607367e-05,
	"loss": 2.0053,
	"step": 700
	},
	{
	"epoch": 0.4781829049611476,
	"grad_norm": 0.6931679248809814,
	"learning_rate": 1.4139007296160285e-05,
	"loss": 2.0062,
	"step": 800
	},
	{
	"epoch": 0.5379557680812911,
	"grad_norm": 0.6681210994720459,
	"learning_rate": 1.1966329380681454e-05,
	"loss": 1.9427,
	"step": 900
	},
	{
	"epoch": 0.5977286312014346,
	"grad_norm": 0.8678436279296875,
	"learning_rate": 9.691152538179525e-06,
	"loss": 1.9019,
	"step": 1000
	},
	{
	"epoch": 0.657501494321578,
	"grad_norm": 1.0380738973617554,
	"learning_rate": 7.432074999162258e-06,
	"loss": 1.9017,
	"step": 1100
	},
	{
	"epoch": 0.7172743574417214,
	"grad_norm": 0.8856862783432007,
	"learning_rate": 5.306855785127376e-06,
	"loss": 1.8821,
	"step": 1200
	},
	{
	"epoch": 0.7770472205618649,
	"grad_norm": 1.1661518812179565,
	"learning_rate": 3.426276279533615e-06,
	"loss": 1.8826,
	"step": 1300
	},
	{
	"epoch": 0.8368200836820083,
	"grad_norm": 0.8435269594192505,
	"learning_rate": 1.8883655223774121e-06,
	"loss": 1.8919,
	"step": 1400
	},
	{
	"epoch": 0.8965929468021518,
	"grad_norm": 0.7680924534797668,
	"learning_rate": 7.732902466662218e-07,
	"loss": 1.9126,
	"step": 1500
	},
	{
	"epoch": 0.9563658099222953,
	"grad_norm": 0.9946077466011047,
	"learning_rate": 1.3917602405313812e-07,
	"loss": 1.8853,
	"step": 1600
	},
	{
	"epoch": 1.0,
	"step": 1673,
	"total_flos": 3.0394718060544e+16,
	"train_loss": 2.029054084226035,
	"train_runtime": 519.4686,
	"train_samples_per_second": 6.439,
	"train_steps_per_second": 3.221
	}
	],
	"logging_steps": 100,
	"max_steps": 1673,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 3.0394718060544e+16,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}