hHoai commited on
Commit
d713b86
·
verified ·
1 Parent(s): 7fec4c8

Update model/model.py

Browse files
Files changed (1) hide show
  1. model/model.py +516 -510
model/model.py CHANGED
@@ -1,511 +1,517 @@
1
- from bartpho.preprocess import normalize, tokenize
2
- from bartpho.utils import tag_dict, polarity_dict, polarity_list, tags, eng_tags, eng_polarity, detect_labels, no_polarity, no_tag
3
- from bartpho.utils import predict, predict_df, predict_detect, predict_df_detect
4
- from simpletransformers.config.model_args import Seq2SeqArgs
5
- import random
6
- import numpy as np
7
- import torch
8
- from transformers import (
9
- AdamW,
10
- AutoConfig,
11
- AutoModel,
12
- AutoTokenizer,
13
- MBartConfig,
14
- MBartForConditionalGeneration,
15
- MBartTokenizer,
16
- get_linear_schedule_with_warmup,
17
- )
18
- from pyvi.ViTokenizer import tokenize as model_tokenize
19
-
20
- class Seq2SeqModel:
21
- def __init__(
22
- self,
23
- encoder_decoder_type=None,
24
- encoder_decoder_name=None,
25
- config=None,
26
- args=None,
27
- use_cuda=False,
28
- cuda_device=0,
29
- **kwargs,
30
- ):
31
-
32
- """
33
- Initializes a Seq2SeqModel.
34
-
35
- Args:
36
- encoder_decoder_type (optional): The type of encoder-decoder model. (E.g. bart)
37
- encoder_decoder_name (optional): The path to a directory containing the saved encoder and decoder of a Seq2SeqModel. (E.g. "outputs/") OR a valid BART or MarianMT model.
38
- config (optional): A configuration file to build an EncoderDecoderModel.
39
- args (optional): Default args will be used if this parameter is not provided. If provided, it should be a dict containing the args that should be changed in the default args.
40
- use_cuda (optional): Use GPU if available. Setting to False will force model to use CPU only.
41
- cuda_device (optional): Specific GPU that should be used. Will use the first available GPU by default.
42
- **kwargs (optional): For providing proxies, force_download, resume_download, cache_dir and other options specific to the 'from_pretrained' implementation where this will be supplied.
43
- """ # noqa: ignore flake8"
44
-
45
- if not config:
46
- # if not ((encoder_name and decoder_name) or encoder_decoder_name) and not encoder_type:
47
- if not encoder_decoder_name:
48
- raise ValueError(
49
- "You must specify a Seq2Seq config \t OR \t"
50
- "encoder_decoder_name"
51
- )
52
- elif not encoder_decoder_type:
53
- raise ValueError(
54
- "You must specify a Seq2Seq config \t OR \t"
55
- "encoder_decoder_name"
56
- )
57
-
58
- self.args = self._load_model_args(encoder_decoder_name)
59
- print(args)
60
- if args:
61
- self.args.update_from_dict(args)
62
- print(args)
63
-
64
- if self.args.manual_seed:
65
- random.seed(self.args.manual_seed)
66
- np.random.seed(self.args.manual_seed)
67
- torch.manual_seed(self.args.manual_seed)
68
- if self.args.n_gpu > 0:
69
- torch.cuda.manual_seed_all(self.args.manual_seed)
70
-
71
- if use_cuda:
72
- if torch.cuda.is_available():
73
- self.device = torch.device("cuda")
74
- else:
75
- raise ValueError(
76
- "'use_cuda' set to True when cuda is unavailable."
77
- "Make sure CUDA is available or set `use_cuda=False`."
78
- )
79
- else:
80
- self.device = "cpu"
81
-
82
- self.results = {}
83
-
84
- if not use_cuda:
85
- self.args.fp16 = False
86
-
87
- # config = EncoderDecoderConfig.from_encoder_decoder_configs(config, config)
88
- #if encoder_decoder_type:
89
- config_class, model_class, tokenizer_class = MODEL_CLASSES[encoder_decoder_type]
90
-
91
- self.model = model_class.from_pretrained(encoder_decoder_name)
92
- self.encoder_tokenizer = tokenizer_class.from_pretrained(encoder_decoder_name)
93
- self.decoder_tokenizer = self.encoder_tokenizer
94
- self.config = self.model.config
95
-
96
- if self.args.wandb_project and not wandb_available:
97
- warnings.warn("wandb_project specified but wandb is not available. Wandb disabled.")
98
- self.args.wandb_project = None
99
-
100
- self.args.model_name = encoder_decoder_name
101
- self.args.model_type = encoder_decoder_type
102
-
103
- def train_model(
104
- self,
105
- train_data,
106
- best_accuracy,
107
- output_dir=None,
108
- show_running_loss=True,
109
- args=None,
110
- eval_data=None,
111
- test_data=None,
112
- verbose=True,
113
- **kwargs,
114
- ):
115
- if args:
116
- self.args.update_from_dict(args)
117
- #self.args = args
118
- if self.args.silent:
119
- show_running_loss = False
120
-
121
-
122
- if not output_dir:
123
- output_dir = self.args.output_dir
124
- self._move_model_to_device()
125
-
126
- train_dataset = self.load_and_cache_examples(train_data, verbose=verbose)
127
-
128
- os.makedirs(output_dir, exist_ok=True)
129
-
130
- global_step, tr_loss, best_accuracy = self.train(
131
- train_dataset,
132
- output_dir,
133
- best_accuracy,
134
- show_running_loss=show_running_loss,
135
- eval_data=eval_data,
136
- test_data=test_data,
137
- verbose=verbose,
138
- **kwargs,
139
- )
140
-
141
- final_dir = self.args.output_dir + "/final"
142
- self._save_model(final_dir, model=self.model)
143
-
144
- if verbose:
145
- logger.info(" Training of {} model complete. Saved best to {}.".format(self.args.model_name, final_dir))
146
-
147
- return best_accuracy
148
-
149
- def train(
150
- self,
151
- train_dataset,
152
- output_dir,
153
- best_accuracy,
154
- show_running_loss=True,
155
- eval_data=None,
156
- test_data=None,
157
- verbose=True,
158
- **kwargs,
159
- ):
160
- """
161
- Trains the model on train_dataset.
162
-
163
- Utility function to be used by the train_model() method. Not intended to be used directly.
164
- """
165
-
166
- #epoch_lst = []
167
- #acc_detects, pre_detects, rec_detects, f1_detects, accs, pre_absas, rec_absas, f1_absas = [], [], [], [], [], [], [], []
168
- #tacc_detects, tpre_detects, trec_detects, tf1_detects, taccs, tpre_absas, trec_absas, tf1_absas = [], [], [], [], [], [], [], []
169
-
170
- model = self.model
171
- args = self.args
172
-
173
- tb_writer = SummaryWriter(logdir=args.tensorboard_dir)
174
- train_sampler = RandomSampler(train_dataset)
175
- train_dataloader = DataLoader(
176
- train_dataset,
177
- sampler=train_sampler,
178
- batch_size=args.train_batch_size,
179
- num_workers=self.args.dataloader_num_workers,
180
- )
181
-
182
- if args.max_steps > 0:
183
- t_total = args.max_steps
184
- args.num_train_epochs = args.max_steps // (len(train_dataloader) // args.gradient_accumulation_steps) + 1
185
- else:
186
- t_total = len(train_dataloader) // args.gradient_accumulation_steps * args.num_train_epochs
187
-
188
- no_decay = ["bias", "LayerNorm.weight"]
189
-
190
- optimizer_grouped_parameters = []
191
- custom_parameter_names = set()
192
- for group in self.args.custom_parameter_groups:
193
- params = group.pop("params")
194
- custom_parameter_names.update(params)
195
- param_group = {**group}
196
- param_group["params"] = [p for n, p in model.named_parameters() if n in params]
197
- optimizer_grouped_parameters.append(param_group)
198
-
199
- for group in self.args.custom_layer_parameters:
200
- layer_number = group.pop("layer")
201
- layer = f"layer.{layer_number}."
202
- group_d = {**group}
203
- group_nd = {**group}
204
- group_nd["weight_decay"] = 0.0
205
- params_d = []
206
- params_nd = []
207
- for n, p in model.named_parameters():
208
- if n not in custom_parameter_names and layer in n:
209
- if any(nd in n for nd in no_decay):
210
- params_nd.append(p)
211
- else:
212
- params_d.append(p)
213
- custom_parameter_names.add(n)
214
- group_d["params"] = params_d
215
- group_nd["params"] = params_nd
216
-
217
- optimizer_grouped_parameters.append(group_d)
218
- optimizer_grouped_parameters.append(group_nd)
219
-
220
- if not self.args.train_custom_parameters_only:
221
- optimizer_grouped_parameters.extend(
222
- [
223
- {
224
- "params": [
225
- p
226
- for n, p in model.named_parameters()
227
- if n not in custom_parameter_names and not any(nd in n for nd in no_decay)
228
- ],
229
- "weight_decay": args.weight_decay,
230
- },
231
- {
232
- "params": [
233
- p
234
- for n, p in model.named_parameters()
235
- if n not in custom_parameter_names and any(nd in n for nd in no_decay)
236
- ],
237
- "weight_decay": 0.0,
238
- },
239
- ]
240
- )
241
-
242
- warmup_steps = math.ceil(t_total * args.warmup_ratio)
243
- args.warmup_steps = warmup_steps if args.warmup_steps == 0 else args.warmup_steps
244
-
245
- # TODO: Use custom optimizer like with BertSum?
246
- optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
247
- scheduler = get_linear_schedule_with_warmup(
248
- optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total
249
- )
250
-
251
- if (args.model_name and os.path.isfile(os.path.join(args.model_name, "optimizer.pt")) and os.path.isfile(os.path.join(args.model_name, "scheduler.pt"))):
252
- # Load in optimizer and scheduler states
253
- optimizer.load_state_dict(torch.load(os.path.join(args.model_name, "optimizer.pt")))
254
- scheduler.load_state_dict(torch.load(os.path.join(args.model_name, "scheduler.pt")))
255
-
256
- if args.n_gpu > 1:
257
- model = torch.nn.DataParallel(model)
258
-
259
- logger.info(" Training started")
260
-
261
- global_step = 0
262
- tr_loss, logging_loss = 0.0, 0.0
263
- model.zero_grad()
264
- train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.silent, mininterval=0)
265
- epoch_number = 0
266
- best_eval_metric = None
267
- early_stopping_counter = 0
268
- steps_trained_in_current_epoch = 0
269
- epochs_trained = 0
270
-
271
- if args.model_name and os.path.exists(args.model_name):
272
- try:
273
- # set global_step to gobal_step of last saved checkpoint from model path
274
- checkpoint_suffix = args.model_name.split("/")[-1].split("-")
275
- if len(checkpoint_suffix) > 2:
276
- checkpoint_suffix = checkpoint_suffix[1]
277
- else:
278
- checkpoint_suffix = checkpoint_suffix[-1]
279
- global_step = int(checkpoint_suffix)
280
- epochs_trained = global_step // (len(train_dataloader) // args.gradient_accumulation_steps)
281
- steps_trained_in_current_epoch = global_step % (
282
- len(train_dataloader) // args.gradient_accumulation_steps
283
- )
284
-
285
- logger.info(" Continuing training from checkpoint, will skip to saved global_step")
286
- logger.info(" Continuing training from epoch %d", epochs_trained)
287
- logger.info(" Continuing training from global step %d", global_step)
288
- logger.info(" Will skip the first %d steps in the current epoch", steps_trained_in_current_epoch)
289
- except ValueError:
290
- logger.info(" Starting fine-tuning.")
291
-
292
- if args.wandb_project:
293
- wandb.init(project=args.wandb_project, config={**asdict(args)}, **args.wandb_kwargs)
294
- wandb.watch(self.model)
295
-
296
- if args.fp16:
297
- from torch.cuda import amp
298
-
299
- scaler = amp.GradScaler()
300
-
301
- model.train()
302
- for current_epoch in train_iterator:
303
- if epochs_trained > 0:
304
- epochs_trained -= 1
305
- continue
306
- train_iterator.set_description(f"Epoch {epoch_number + 1} of {args.num_train_epochs}")
307
- batch_iterator = tqdm(
308
- train_dataloader,
309
- desc=f"Running Epoch {epoch_number} of {args.num_train_epochs}",
310
- disable=args.silent,
311
- mininterval=0,
312
- )
313
- for step, batch in enumerate(batch_iterator):
314
- if steps_trained_in_current_epoch > 0:
315
- steps_trained_in_current_epoch -= 1
316
- continue
317
- # batch = tuple(t.to(device) for t in batch)
318
-
319
- inputs = self._get_inputs_dict(batch)
320
- if args.fp16:
321
- with amp.autocast():
322
- outputs = model(**inputs)
323
- # model outputs are always tuple in pytorch-transformers (see doc)
324
- loss = outputs[0]
325
- else:
326
- outputs = model(**inputs)
327
- # model outputs are always tuple in pytorch-transformers (see doc)
328
- loss = outputs[0]
329
-
330
- if args.n_gpu > 1:
331
- loss = loss.mean() # mean() to average on multi-gpu parallel training
332
-
333
- current_loss = loss.item()
334
-
335
- if show_running_loss:
336
- batch_iterator.set_description(
337
- f"Epochs {epoch_number}/{args.num_train_epochs}. Running Loss: {current_loss:9.4f}"
338
- )
339
-
340
- if args.gradient_accumulation_steps > 1:
341
- loss = loss / args.gradient_accumulation_steps
342
-
343
- if args.fp16:
344
- scaler.scale(loss).backward()
345
- else:
346
- loss.backward()
347
-
348
- tr_loss += loss.item()
349
- if (step + 1) % args.gradient_accumulation_steps == 0:
350
- if args.fp16:
351
- scaler.unscale_(optimizer)
352
- torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)
353
-
354
- if args.fp16:
355
- scaler.step(optimizer)
356
- scaler.update()
357
- else:
358
- optimizer.step()
359
- scheduler.step() # Update learning rate schedule
360
- model.zero_grad()
361
- global_step += 1
362
-
363
- if args.logging_steps > 0 and global_step % args.logging_steps == 0:
364
- # Log metrics
365
- tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step)
366
- tb_writer.add_scalar("loss", (tr_loss - logging_loss) / args.logging_steps, global_step)
367
- logging_loss = tr_loss
368
- if args.wandb_project:
369
- wandb.log(
370
- {
371
- "Training loss": current_loss,
372
- "lr": scheduler.get_lr()[0],
373
- "global_step": global_step,
374
- }
375
- )
376
-
377
- # if args.save_steps > 0 and global_step % args.save_steps == 0:
378
- # # Save model checkpoint
379
- # output_dir_current = os.path.join(output_dir, "checkpoint-{}".format(global_step))
380
-
381
- # self._save_model(output_dir_current, optimizer, scheduler, model=model)
382
-
383
- epoch_number += 1
384
- output_dir_current = os.path.join(output_dir, "checkpoint-{}-epoch-{}".format(global_step, epoch_number))
385
-
386
-
387
- print('batch: '+str(args.train_batch_size)+' accumulation_steps: '+str(args.gradient_accumulation_steps)+\
388
- ' lr: '+str(args.learning_rate)+' epochs: '+str(args.num_train_epochs)+' epoch: '+str(epoch_number))
389
- print('---dev dataset----')
390
- acc_detect, pre_detect, rec_detect, f1_detect, acc, pre_absa, rec_absa, f1_absa = predict_df(model, eval_data, tokenizer=self.encoder_tokenizer, device=self.device)
391
- print('---test dataset----')
392
- tacc_detect, tpre_detect, trec_detect, tf1_detect, tacc, tpre_absa, trec_absa, tf1_absa = predict_df(model, test_data, tokenizer=self.encoder_tokenizer, device=self.device)
393
- # if acc > best_accuracy:
394
- # best_accuracy = acc
395
- # if not args.save_model_every_epoch:
396
- # self._save_model(output_dir_current, optimizer, scheduler, model=model)
397
- # with open('./MAMS_best_accuracy.txt', 'a') as f0:
398
- # f0.writelines('batch: '+str(args.train_batch_size)+' accumulation_steps: '+str(args.gradient_accumulation_steps)+\
399
- # ' lr: '+str(args.learning_rate)+' epochs: '+str(args.num_train_epochs)+' epoch: '+str(epoch_number)+' val_accuracy: '+str(best_accuracy)+\
400
- # ' test_accuracy: '+str(tacc)+'\n')
401
-
402
- # if args.save_model_every_epoch:
403
- # os.makedirs(output_dir_current, exist_ok=True)
404
- # self._save_model(output_dir_current, optimizer, scheduler, model=model)
405
-
406
- if acc > best_accuracy:
407
- # Cập nhật best_accuracy nếu tìm thấy mô hình tốt hơn
408
- best_accuracy = acc
409
-
410
- # Lưu hình tốt nhất vào output_dir_current
411
- self._save_model(output_dir_current, optimizer, scheduler, model=model)
412
-
413
- # Ghi lại thông tin về best_accuracy vào file log
414
- with open('./MAMS_best_accuracy.txt', 'a') as f0:
415
- f0.writelines(
416
- 'batch: ' + str(args.train_batch_size) +
417
- ' accumulation_steps: ' + str(args.gradient_accumulation_steps) +
418
- ' lr: ' + str(args.learning_rate) +
419
- ' epochs: ' + str(args.num_train_epochs) +
420
- ' epoch: ' + str(epoch_number) +
421
- ' val_accuracy: ' + str(best_accuracy) +
422
- ' test_accuracy: ' + str(tacc) + '\n'
423
- )
424
-
425
-
426
-
427
- return global_step, tr_loss / global_step, best_accuracy
428
-
429
- def load_and_cache_examples(self, data, evaluate=False, no_cache=False, verbose=True, silent=False):
430
- """
431
- Creates a T5Dataset from data.
432
-
433
- Utility function for train() and eval() methods. Not intended to be used directly.
434
- """
435
-
436
- encoder_tokenizer = self.encoder_tokenizer
437
- decoder_tokenizer = self.decoder_tokenizer
438
- args = self.args
439
-
440
- if not no_cache:
441
- no_cache = args.no_cache
442
-
443
- if not no_cache:
444
- os.makedirs(self.args.cache_dir, exist_ok=True)
445
-
446
- mode = "dev" if evaluate else "train"
447
-
448
- if args.dataset_class:
449
- CustomDataset = args.dataset_class
450
- return CustomDataset(encoder_tokenizer, decoder_tokenizer, args, data, mode)
451
- else:
452
- return SimpleSummarizationDataset(encoder_tokenizer, self.args, data, mode)
453
-
454
- def _save_model(self, output_dir=None, optimizer=None, scheduler=None, model=None, results=None):
455
- if not output_dir:
456
- output_dir = self.args.output_dir
457
- os.makedirs(output_dir, exist_ok=True)
458
-
459
- logger.info(f"Saving model into {output_dir}")
460
-
461
- if model and not self.args.no_save:
462
- # Take care of distributed/parallel training
463
- model_to_save = model.module if hasattr(model, "module") else model
464
- self._save_model_args(output_dir)
465
-
466
- os.makedirs(os.path.join(output_dir), exist_ok=True)
467
- model_to_save.save_pretrained(output_dir)
468
- self.config.save_pretrained(output_dir)
469
- self.encoder_tokenizer.save_pretrained(output_dir)
470
-
471
- torch.save(self.args, os.path.join(output_dir, "training_args.bin"))
472
- if optimizer and scheduler and self.args.save_optimizer_and_scheduler:
473
- torch.save(optimizer.state_dict(), os.path.join(output_dir, "optimizer.pt"))
474
- torch.save(scheduler.state_dict(), os.path.join(output_dir, "scheduler.pt"))
475
-
476
- if results:
477
- output_eval_file = os.path.join(output_dir, "eval_results.txt")
478
- with open(output_eval_file, "w") as writer:
479
- for key in sorted(results.keys()):
480
- writer.write("{} = {}\n".format(key, str(results[key])))
481
-
482
- def _move_model_to_device(self):
483
- self.model.to(self.device)
484
-
485
- def _get_inputs_dict(self, batch):
486
- device = self.device
487
- pad_token_id = self.encoder_tokenizer.pad_token_id
488
- source_ids, source_mask, y = batch["source_ids"], batch["source_mask"], batch["target_ids"]
489
- y_ids = y[:, :-1].contiguous()
490
- lm_labels = y[:, 1:].clone()
491
- lm_labels[y[:, 1:] == pad_token_id] = -100
492
-
493
- inputs = {
494
- "input_ids": source_ids.to(device),
495
- "attention_mask": source_mask.to(device),
496
- "decoder_input_ids": y_ids.to(device),
497
- "labels": lm_labels.to(device),
498
- }
499
- return inputs
500
-
501
- def _save_model_args(self, output_dir):
502
- os.makedirs(output_dir, exist_ok=True)
503
- self.args.save(output_dir)
504
-
505
- def _load_model_args(self, input_dir):
506
- args = Seq2SeqArgs()
507
- args.load(input_dir)
508
- return args
509
-
510
- def get_named_parameters(self):
 
 
 
 
 
 
511
  return [n for n, p in self.model.named_parameters()]
 
1
+ from bartpho.preprocess import normalize, tokenize
2
+ from bartpho.utils import tag_dict, polarity_dict, polarity_list, tags, eng_tags, eng_polarity, detect_labels, no_polarity, no_tag
3
+ from bartpho.utils import predict, predict_df, predict_detect, predict_df_detect
4
+ from simpletransformers.config.model_args import Seq2SeqArgs
5
+ import random
6
+ import numpy as np
7
+ import torch
8
+ from transformers import (
9
+ AdamW,
10
+ AutoConfig,
11
+ AutoModel,
12
+ AutoTokenizer,
13
+ MBartConfig,
14
+ MBartForConditionalGeneration,
15
+ MBartTokenizer,
16
+ get_linear_schedule_with_warmup,
17
+ )
18
+ from pyvi.ViTokenizer import tokenize as model_tokenize
19
+
20
+ MODEL_CLASSES = {
21
+ "auto": (AutoConfig, AutoModel, AutoTokenizer),
22
+ #"mbart": (MBartConfig, MBartForConditionalGeneration, MBartTokenizer),
23
+ "bartpho": (MBartConfig, MBartForConditionalGeneration, AutoTokenizer)
24
+ }
25
+
26
+ class Seq2SeqModel:
27
+ def __init__(
28
+ self,
29
+ encoder_decoder_type=None,
30
+ encoder_decoder_name=None,
31
+ config=None,
32
+ args=None,
33
+ use_cuda=False,
34
+ cuda_device=0,
35
+ **kwargs,
36
+ ):
37
+
38
+ """
39
+ Initializes a Seq2SeqModel.
40
+
41
+ Args:
42
+ encoder_decoder_type (optional): The type of encoder-decoder model. (E.g. bart)
43
+ encoder_decoder_name (optional): The path to a directory containing the saved encoder and decoder of a Seq2SeqModel. (E.g. "outputs/") OR a valid BART or MarianMT model.
44
+ config (optional): A configuration file to build an EncoderDecoderModel.
45
+ args (optional): Default args will be used if this parameter is not provided. If provided, it should be a dict containing the args that should be changed in the default args.
46
+ use_cuda (optional): Use GPU if available. Setting to False will force model to use CPU only.
47
+ cuda_device (optional): Specific GPU that should be used. Will use the first available GPU by default.
48
+ **kwargs (optional): For providing proxies, force_download, resume_download, cache_dir and other options specific to the 'from_pretrained' implementation where this will be supplied.
49
+ """ # noqa: ignore flake8"
50
+
51
+ if not config:
52
+ # if not ((encoder_name and decoder_name) or encoder_decoder_name) and not encoder_type:
53
+ if not encoder_decoder_name:
54
+ raise ValueError(
55
+ "You must specify a Seq2Seq config \t OR \t"
56
+ "encoder_decoder_name"
57
+ )
58
+ elif not encoder_decoder_type:
59
+ raise ValueError(
60
+ "You must specify a Seq2Seq config \t OR \t"
61
+ "encoder_decoder_name"
62
+ )
63
+
64
+ self.args = self._load_model_args(encoder_decoder_name)
65
+ print(args)
66
+ if args:
67
+ self.args.update_from_dict(args)
68
+ print(args)
69
+
70
+ if self.args.manual_seed:
71
+ random.seed(self.args.manual_seed)
72
+ np.random.seed(self.args.manual_seed)
73
+ torch.manual_seed(self.args.manual_seed)
74
+ if self.args.n_gpu > 0:
75
+ torch.cuda.manual_seed_all(self.args.manual_seed)
76
+
77
+ if use_cuda:
78
+ if torch.cuda.is_available():
79
+ self.device = torch.device("cuda")
80
+ else:
81
+ raise ValueError(
82
+ "'use_cuda' set to True when cuda is unavailable."
83
+ "Make sure CUDA is available or set `use_cuda=False`."
84
+ )
85
+ else:
86
+ self.device = "cpu"
87
+
88
+ self.results = {}
89
+
90
+ if not use_cuda:
91
+ self.args.fp16 = False
92
+
93
+ # config = EncoderDecoderConfig.from_encoder_decoder_configs(config, config)
94
+ #if encoder_decoder_type:
95
+ config_class, model_class, tokenizer_class = MODEL_CLASSES[encoder_decoder_type]
96
+
97
+ self.model = model_class.from_pretrained(encoder_decoder_name)
98
+ self.encoder_tokenizer = tokenizer_class.from_pretrained(encoder_decoder_name)
99
+ self.decoder_tokenizer = self.encoder_tokenizer
100
+ self.config = self.model.config
101
+
102
+ if self.args.wandb_project and not wandb_available:
103
+ warnings.warn("wandb_project specified but wandb is not available. Wandb disabled.")
104
+ self.args.wandb_project = None
105
+
106
+ self.args.model_name = encoder_decoder_name
107
+ self.args.model_type = encoder_decoder_type
108
+
109
+ def train_model(
110
+ self,
111
+ train_data,
112
+ best_accuracy,
113
+ output_dir=None,
114
+ show_running_loss=True,
115
+ args=None,
116
+ eval_data=None,
117
+ test_data=None,
118
+ verbose=True,
119
+ **kwargs,
120
+ ):
121
+ if args:
122
+ self.args.update_from_dict(args)
123
+ #self.args = args
124
+ if self.args.silent:
125
+ show_running_loss = False
126
+
127
+
128
+ if not output_dir:
129
+ output_dir = self.args.output_dir
130
+ self._move_model_to_device()
131
+
132
+ train_dataset = self.load_and_cache_examples(train_data, verbose=verbose)
133
+
134
+ os.makedirs(output_dir, exist_ok=True)
135
+
136
+ global_step, tr_loss, best_accuracy = self.train(
137
+ train_dataset,
138
+ output_dir,
139
+ best_accuracy,
140
+ show_running_loss=show_running_loss,
141
+ eval_data=eval_data,
142
+ test_data=test_data,
143
+ verbose=verbose,
144
+ **kwargs,
145
+ )
146
+
147
+ final_dir = self.args.output_dir + "/final"
148
+ self._save_model(final_dir, model=self.model)
149
+
150
+ if verbose:
151
+ logger.info(" Training of {} model complete. Saved best to {}.".format(self.args.model_name, final_dir))
152
+
153
+ return best_accuracy
154
+
155
+ def train(
156
+ self,
157
+ train_dataset,
158
+ output_dir,
159
+ best_accuracy,
160
+ show_running_loss=True,
161
+ eval_data=None,
162
+ test_data=None,
163
+ verbose=True,
164
+ **kwargs,
165
+ ):
166
+ """
167
+ Trains the model on train_dataset.
168
+
169
+ Utility function to be used by the train_model() method. Not intended to be used directly.
170
+ """
171
+
172
+ #epoch_lst = []
173
+ #acc_detects, pre_detects, rec_detects, f1_detects, accs, pre_absas, rec_absas, f1_absas = [], [], [], [], [], [], [], []
174
+ #tacc_detects, tpre_detects, trec_detects, tf1_detects, taccs, tpre_absas, trec_absas, tf1_absas = [], [], [], [], [], [], [], []
175
+
176
+ model = self.model
177
+ args = self.args
178
+
179
+ tb_writer = SummaryWriter(logdir=args.tensorboard_dir)
180
+ train_sampler = RandomSampler(train_dataset)
181
+ train_dataloader = DataLoader(
182
+ train_dataset,
183
+ sampler=train_sampler,
184
+ batch_size=args.train_batch_size,
185
+ num_workers=self.args.dataloader_num_workers,
186
+ )
187
+
188
+ if args.max_steps > 0:
189
+ t_total = args.max_steps
190
+ args.num_train_epochs = args.max_steps // (len(train_dataloader) // args.gradient_accumulation_steps) + 1
191
+ else:
192
+ t_total = len(train_dataloader) // args.gradient_accumulation_steps * args.num_train_epochs
193
+
194
+ no_decay = ["bias", "LayerNorm.weight"]
195
+
196
+ optimizer_grouped_parameters = []
197
+ custom_parameter_names = set()
198
+ for group in self.args.custom_parameter_groups:
199
+ params = group.pop("params")
200
+ custom_parameter_names.update(params)
201
+ param_group = {**group}
202
+ param_group["params"] = [p for n, p in model.named_parameters() if n in params]
203
+ optimizer_grouped_parameters.append(param_group)
204
+
205
+ for group in self.args.custom_layer_parameters:
206
+ layer_number = group.pop("layer")
207
+ layer = f"layer.{layer_number}."
208
+ group_d = {**group}
209
+ group_nd = {**group}
210
+ group_nd["weight_decay"] = 0.0
211
+ params_d = []
212
+ params_nd = []
213
+ for n, p in model.named_parameters():
214
+ if n not in custom_parameter_names and layer in n:
215
+ if any(nd in n for nd in no_decay):
216
+ params_nd.append(p)
217
+ else:
218
+ params_d.append(p)
219
+ custom_parameter_names.add(n)
220
+ group_d["params"] = params_d
221
+ group_nd["params"] = params_nd
222
+
223
+ optimizer_grouped_parameters.append(group_d)
224
+ optimizer_grouped_parameters.append(group_nd)
225
+
226
+ if not self.args.train_custom_parameters_only:
227
+ optimizer_grouped_parameters.extend(
228
+ [
229
+ {
230
+ "params": [
231
+ p
232
+ for n, p in model.named_parameters()
233
+ if n not in custom_parameter_names and not any(nd in n for nd in no_decay)
234
+ ],
235
+ "weight_decay": args.weight_decay,
236
+ },
237
+ {
238
+ "params": [
239
+ p
240
+ for n, p in model.named_parameters()
241
+ if n not in custom_parameter_names and any(nd in n for nd in no_decay)
242
+ ],
243
+ "weight_decay": 0.0,
244
+ },
245
+ ]
246
+ )
247
+
248
+ warmup_steps = math.ceil(t_total * args.warmup_ratio)
249
+ args.warmup_steps = warmup_steps if args.warmup_steps == 0 else args.warmup_steps
250
+
251
+ # TODO: Use custom optimizer like with BertSum?
252
+ optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
253
+ scheduler = get_linear_schedule_with_warmup(
254
+ optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total
255
+ )
256
+
257
+ if (args.model_name and os.path.isfile(os.path.join(args.model_name, "optimizer.pt")) and os.path.isfile(os.path.join(args.model_name, "scheduler.pt"))):
258
+ # Load in optimizer and scheduler states
259
+ optimizer.load_state_dict(torch.load(os.path.join(args.model_name, "optimizer.pt")))
260
+ scheduler.load_state_dict(torch.load(os.path.join(args.model_name, "scheduler.pt")))
261
+
262
+ if args.n_gpu > 1:
263
+ model = torch.nn.DataParallel(model)
264
+
265
+ logger.info(" Training started")
266
+
267
+ global_step = 0
268
+ tr_loss, logging_loss = 0.0, 0.0
269
+ model.zero_grad()
270
+ train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.silent, mininterval=0)
271
+ epoch_number = 0
272
+ best_eval_metric = None
273
+ early_stopping_counter = 0
274
+ steps_trained_in_current_epoch = 0
275
+ epochs_trained = 0
276
+
277
+ if args.model_name and os.path.exists(args.model_name):
278
+ try:
279
+ # set global_step to gobal_step of last saved checkpoint from model path
280
+ checkpoint_suffix = args.model_name.split("/")[-1].split("-")
281
+ if len(checkpoint_suffix) > 2:
282
+ checkpoint_suffix = checkpoint_suffix[1]
283
+ else:
284
+ checkpoint_suffix = checkpoint_suffix[-1]
285
+ global_step = int(checkpoint_suffix)
286
+ epochs_trained = global_step // (len(train_dataloader) // args.gradient_accumulation_steps)
287
+ steps_trained_in_current_epoch = global_step % (
288
+ len(train_dataloader) // args.gradient_accumulation_steps
289
+ )
290
+
291
+ logger.info(" Continuing training from checkpoint, will skip to saved global_step")
292
+ logger.info(" Continuing training from epoch %d", epochs_trained)
293
+ logger.info(" Continuing training from global step %d", global_step)
294
+ logger.info(" Will skip the first %d steps in the current epoch", steps_trained_in_current_epoch)
295
+ except ValueError:
296
+ logger.info(" Starting fine-tuning.")
297
+
298
+ if args.wandb_project:
299
+ wandb.init(project=args.wandb_project, config={**asdict(args)}, **args.wandb_kwargs)
300
+ wandb.watch(self.model)
301
+
302
+ if args.fp16:
303
+ from torch.cuda import amp
304
+
305
+ scaler = amp.GradScaler()
306
+
307
+ model.train()
308
+ for current_epoch in train_iterator:
309
+ if epochs_trained > 0:
310
+ epochs_trained -= 1
311
+ continue
312
+ train_iterator.set_description(f"Epoch {epoch_number + 1} of {args.num_train_epochs}")
313
+ batch_iterator = tqdm(
314
+ train_dataloader,
315
+ desc=f"Running Epoch {epoch_number} of {args.num_train_epochs}",
316
+ disable=args.silent,
317
+ mininterval=0,
318
+ )
319
+ for step, batch in enumerate(batch_iterator):
320
+ if steps_trained_in_current_epoch > 0:
321
+ steps_trained_in_current_epoch -= 1
322
+ continue
323
+ # batch = tuple(t.to(device) for t in batch)
324
+
325
+ inputs = self._get_inputs_dict(batch)
326
+ if args.fp16:
327
+ with amp.autocast():
328
+ outputs = model(**inputs)
329
+ # model outputs are always tuple in pytorch-transformers (see doc)
330
+ loss = outputs[0]
331
+ else:
332
+ outputs = model(**inputs)
333
+ # model outputs are always tuple in pytorch-transformers (see doc)
334
+ loss = outputs[0]
335
+
336
+ if args.n_gpu > 1:
337
+ loss = loss.mean() # mean() to average on multi-gpu parallel training
338
+
339
+ current_loss = loss.item()
340
+
341
+ if show_running_loss:
342
+ batch_iterator.set_description(
343
+ f"Epochs {epoch_number}/{args.num_train_epochs}. Running Loss: {current_loss:9.4f}"
344
+ )
345
+
346
+ if args.gradient_accumulation_steps > 1:
347
+ loss = loss / args.gradient_accumulation_steps
348
+
349
+ if args.fp16:
350
+ scaler.scale(loss).backward()
351
+ else:
352
+ loss.backward()
353
+
354
+ tr_loss += loss.item()
355
+ if (step + 1) % args.gradient_accumulation_steps == 0:
356
+ if args.fp16:
357
+ scaler.unscale_(optimizer)
358
+ torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)
359
+
360
+ if args.fp16:
361
+ scaler.step(optimizer)
362
+ scaler.update()
363
+ else:
364
+ optimizer.step()
365
+ scheduler.step() # Update learning rate schedule
366
+ model.zero_grad()
367
+ global_step += 1
368
+
369
+ if args.logging_steps > 0 and global_step % args.logging_steps == 0:
370
+ # Log metrics
371
+ tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step)
372
+ tb_writer.add_scalar("loss", (tr_loss - logging_loss) / args.logging_steps, global_step)
373
+ logging_loss = tr_loss
374
+ if args.wandb_project:
375
+ wandb.log(
376
+ {
377
+ "Training loss": current_loss,
378
+ "lr": scheduler.get_lr()[0],
379
+ "global_step": global_step,
380
+ }
381
+ )
382
+
383
+ # if args.save_steps > 0 and global_step % args.save_steps == 0:
384
+ # # Save model checkpoint
385
+ # output_dir_current = os.path.join(output_dir, "checkpoint-{}".format(global_step))
386
+
387
+ # self._save_model(output_dir_current, optimizer, scheduler, model=model)
388
+
389
+ epoch_number += 1
390
+ output_dir_current = os.path.join(output_dir, "checkpoint-{}-epoch-{}".format(global_step, epoch_number))
391
+
392
+
393
+ print('batch: '+str(args.train_batch_size)+' accumulation_steps: '+str(args.gradient_accumulation_steps)+\
394
+ ' lr: '+str(args.learning_rate)+' epochs: '+str(args.num_train_epochs)+' epoch: '+str(epoch_number))
395
+ print('---dev dataset----')
396
+ acc_detect, pre_detect, rec_detect, f1_detect, acc, pre_absa, rec_absa, f1_absa = predict_df(model, eval_data, tokenizer=self.encoder_tokenizer, device=self.device)
397
+ print('---test dataset----')
398
+ tacc_detect, tpre_detect, trec_detect, tf1_detect, tacc, tpre_absa, trec_absa, tf1_absa = predict_df(model, test_data, tokenizer=self.encoder_tokenizer, device=self.device)
399
+ # if acc > best_accuracy:
400
+ # best_accuracy = acc
401
+ # if not args.save_model_every_epoch:
402
+ # self._save_model(output_dir_current, optimizer, scheduler, model=model)
403
+ # with open('./MAMS_best_accuracy.txt', 'a') as f0:
404
+ # f0.writelines('batch: '+str(args.train_batch_size)+' accumulation_steps: '+str(args.gradient_accumulation_steps)+\
405
+ # ' lr: '+str(args.learning_rate)+' epochs: '+str(args.num_train_epochs)+' epoch: '+str(epoch_number)+' val_accuracy: '+str(best_accuracy)+\
406
+ # ' test_accuracy: '+str(tacc)+'\n')
407
+
408
+ # if args.save_model_every_epoch:
409
+ # os.makedirs(output_dir_current, exist_ok=True)
410
+ # self._save_model(output_dir_current, optimizer, scheduler, model=model)
411
+
412
+ if acc > best_accuracy:
413
+ # Cập nhật best_accuracy nếu tìm thấy hình tốt hơn
414
+ best_accuracy = acc
415
+
416
+ # Lưu hình tốt nhất vào output_dir_current
417
+ self._save_model(output_dir_current, optimizer, scheduler, model=model)
418
+
419
+ # Ghi lại thông tin về best_accuracy vào file log
420
+ with open('./MAMS_best_accuracy.txt', 'a') as f0:
421
+ f0.writelines(
422
+ 'batch: ' + str(args.train_batch_size) +
423
+ ' accumulation_steps: ' + str(args.gradient_accumulation_steps) +
424
+ ' lr: ' + str(args.learning_rate) +
425
+ ' epochs: ' + str(args.num_train_epochs) +
426
+ ' epoch: ' + str(epoch_number) +
427
+ ' val_accuracy: ' + str(best_accuracy) +
428
+ ' test_accuracy: ' + str(tacc) + '\n'
429
+ )
430
+
431
+
432
+
433
+ return global_step, tr_loss / global_step, best_accuracy
434
+
435
+ def load_and_cache_examples(self, data, evaluate=False, no_cache=False, verbose=True, silent=False):
436
+ """
437
+ Creates a T5Dataset from data.
438
+
439
+ Utility function for train() and eval() methods. Not intended to be used directly.
440
+ """
441
+
442
+ encoder_tokenizer = self.encoder_tokenizer
443
+ decoder_tokenizer = self.decoder_tokenizer
444
+ args = self.args
445
+
446
+ if not no_cache:
447
+ no_cache = args.no_cache
448
+
449
+ if not no_cache:
450
+ os.makedirs(self.args.cache_dir, exist_ok=True)
451
+
452
+ mode = "dev" if evaluate else "train"
453
+
454
+ if args.dataset_class:
455
+ CustomDataset = args.dataset_class
456
+ return CustomDataset(encoder_tokenizer, decoder_tokenizer, args, data, mode)
457
+ else:
458
+ return SimpleSummarizationDataset(encoder_tokenizer, self.args, data, mode)
459
+
460
+ def _save_model(self, output_dir=None, optimizer=None, scheduler=None, model=None, results=None):
461
+ if not output_dir:
462
+ output_dir = self.args.output_dir
463
+ os.makedirs(output_dir, exist_ok=True)
464
+
465
+ logger.info(f"Saving model into {output_dir}")
466
+
467
+ if model and not self.args.no_save:
468
+ # Take care of distributed/parallel training
469
+ model_to_save = model.module if hasattr(model, "module") else model
470
+ self._save_model_args(output_dir)
471
+
472
+ os.makedirs(os.path.join(output_dir), exist_ok=True)
473
+ model_to_save.save_pretrained(output_dir)
474
+ self.config.save_pretrained(output_dir)
475
+ self.encoder_tokenizer.save_pretrained(output_dir)
476
+
477
+ torch.save(self.args, os.path.join(output_dir, "training_args.bin"))
478
+ if optimizer and scheduler and self.args.save_optimizer_and_scheduler:
479
+ torch.save(optimizer.state_dict(), os.path.join(output_dir, "optimizer.pt"))
480
+ torch.save(scheduler.state_dict(), os.path.join(output_dir, "scheduler.pt"))
481
+
482
+ if results:
483
+ output_eval_file = os.path.join(output_dir, "eval_results.txt")
484
+ with open(output_eval_file, "w") as writer:
485
+ for key in sorted(results.keys()):
486
+ writer.write("{} = {}\n".format(key, str(results[key])))
487
+
488
+ def _move_model_to_device(self):
489
+ self.model.to(self.device)
490
+
491
+ def _get_inputs_dict(self, batch):
492
+ device = self.device
493
+ pad_token_id = self.encoder_tokenizer.pad_token_id
494
+ source_ids, source_mask, y = batch["source_ids"], batch["source_mask"], batch["target_ids"]
495
+ y_ids = y[:, :-1].contiguous()
496
+ lm_labels = y[:, 1:].clone()
497
+ lm_labels[y[:, 1:] == pad_token_id] = -100
498
+
499
+ inputs = {
500
+ "input_ids": source_ids.to(device),
501
+ "attention_mask": source_mask.to(device),
502
+ "decoder_input_ids": y_ids.to(device),
503
+ "labels": lm_labels.to(device),
504
+ }
505
+ return inputs
506
+
507
+ def _save_model_args(self, output_dir):
508
+ os.makedirs(output_dir, exist_ok=True)
509
+ self.args.save(output_dir)
510
+
511
+ def _load_model_args(self, input_dir):
512
+ args = Seq2SeqArgs()
513
+ args.load(input_dir)
514
+ return args
515
+
516
+ def get_named_parameters(self):
517
  return [n for n, p in self.model.named_parameters()]