|  |  | 
					
						
						|  | import numpy as np | 
					
						
						|  | import pandas as pd | 
					
						
						|  | import csv | 
					
						
						|  | import torch.nn as nn | 
					
						
						|  | from torch.optim.lr_scheduler import ReduceLROnPlateau | 
					
						
						|  | from torch.utils.data import TensorDataset, DataLoader | 
					
						
						|  | from transformers import BertTokenizer,BertConfig,AdamW | 
					
						
						|  | from sklearn.metrics import accuracy_score | 
					
						
						|  | from sklearn.metrics import classification_report | 
					
						
						|  | from tqdm import tqdm | 
					
						
						|  | import torch | 
					
						
						|  | import transformers | 
					
						
						|  | from torch.utils.data import Dataset, DataLoader | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | class MyDataSet(Dataset): | 
					
						
						|  | def __init__(self, loaded_data): | 
					
						
						|  | self.data = loaded_data | 
					
						
						|  |  | 
					
						
						|  | def __len__(self): | 
					
						
						|  | return len(self.data) | 
					
						
						|  |  | 
					
						
						|  | def __getitem__(self, idx): | 
					
						
						|  | return self.data[idx] | 
					
						
						|  |  | 
					
						
						|  | Data_path = "/kaggle/input/inference/train.csv" | 
					
						
						|  | Totle_data = pd.read_csv(Data_path) | 
					
						
						|  | Totle_data = Totle_data.sample(frac=0.1) | 
					
						
						|  | Totle_data = Totle_data.dropna(axis=0,subset = ["2"]) | 
					
						
						|  | custom_dataset = MyDataSet(Totle_data) | 
					
						
						|  |  | 
					
						
						|  | train_size = int(len(custom_dataset) * 0.6) | 
					
						
						|  | validate_size = int(len(custom_dataset) * 0.1) | 
					
						
						|  | test_size = len(custom_dataset) - validate_size - train_size | 
					
						
						|  | train_dataset, validate_dataset, test_dataset = torch.utils.data.random_split(custom_dataset, [train_size, validate_size, test_size]) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | train_data_path="Bert_Try.csv" | 
					
						
						|  | dev_data_path = "Bert_Dev.csv" | 
					
						
						|  | test_data_path="Bert_Test.csv" | 
					
						
						|  |  | 
					
						
						|  | train_dataset = Totle_data.iloc[train_dataset.indices] | 
					
						
						|  | validate_dataset = Totle_data.iloc[validate_dataset.indices] | 
					
						
						|  | test_dataset = Totle_data.iloc[test_dataset.indices] | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | train_dataset.to_csv(train_data_path,index=False,header=True) | 
					
						
						|  | validate_dataset.to_csv(dev_data_path ,index=False,header=True) | 
					
						
						|  | test_dataset.to_csv(test_data_path,index=False,header=True) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | data = pd.read_csv(train_data_path) | 
					
						
						|  | data.head | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | class BertClassificationModel(nn.Module): | 
					
						
						|  | def __init__(self): | 
					
						
						|  | super(BertClassificationModel, self).__init__() | 
					
						
						|  |  | 
					
						
						|  | pretrained_weights="bert-base-chinese" | 
					
						
						|  | self.bert = transformers.BertModel.from_pretrained(pretrained_weights) | 
					
						
						|  | for param in self.bert.parameters(): | 
					
						
						|  | param.requires_grad = True | 
					
						
						|  |  | 
					
						
						|  | self.dense = nn.Linear(768, 3) | 
					
						
						|  |  | 
					
						
						|  | def forward(self, input_ids,token_type_ids,attention_mask): | 
					
						
						|  |  | 
					
						
						|  | bert_output = self.bert(input_ids=input_ids,token_type_ids=token_type_ids, attention_mask=attention_mask) | 
					
						
						|  |  | 
					
						
						|  | bert_cls_hidden_state = bert_output[1] | 
					
						
						|  |  | 
					
						
						|  | linear_output = self.dense(bert_cls_hidden_state) | 
					
						
						|  | return  linear_output | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def encoder(max_len,vocab_path,text_list): | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | tokenizer = BertTokenizer.from_pretrained("bert-base-chinese") | 
					
						
						|  | tokenizer = tokenizer( | 
					
						
						|  | text_list, | 
					
						
						|  | padding = True, | 
					
						
						|  | truncation = True, | 
					
						
						|  | max_length = max_len, | 
					
						
						|  | return_tensors='pt' | 
					
						
						|  | ) | 
					
						
						|  | input_ids = tokenizer['input_ids'] | 
					
						
						|  | token_type_ids = tokenizer['token_type_ids'] | 
					
						
						|  | attention_mask = tokenizer['attention_mask'] | 
					
						
						|  | return input_ids,token_type_ids,attention_mask | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | labels2dict = {"neutral":0,"entailment":1,"contradiction":2} | 
					
						
						|  | def load_data(path): | 
					
						
						|  | csvFileObj = open(path) | 
					
						
						|  | readerObj = csv.reader(csvFileObj) | 
					
						
						|  | text_list = [] | 
					
						
						|  | labels = [] | 
					
						
						|  | for row in readerObj: | 
					
						
						|  |  | 
					
						
						|  | if readerObj.line_num == 1: | 
					
						
						|  | continue | 
					
						
						|  |  | 
					
						
						|  | label = int(labels2dict[row[0]]) | 
					
						
						|  | text = row[1] | 
					
						
						|  | text_list.append(text) | 
					
						
						|  | labels.append(label) | 
					
						
						|  |  | 
					
						
						|  | input_ids,token_type_ids,attention_mask = encoder(max_len=150,vocab_path="/root/Bert/bert-base-chinese/vocab.txt",text_list=text_list) | 
					
						
						|  | labels = torch.tensor(labels) | 
					
						
						|  |  | 
					
						
						|  | data = TensorDataset(input_ids,token_type_ids,attention_mask,labels) | 
					
						
						|  | return data | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | batch_size = 16 | 
					
						
						|  |  | 
					
						
						|  | train_data_path="Bert_Try.csv" | 
					
						
						|  | dev_data_path="Bert_Dev.csv" | 
					
						
						|  | test_data_path="Bert_Test.csv" | 
					
						
						|  |  | 
					
						
						|  | train_data = load_data(train_data_path) | 
					
						
						|  | dev_data = load_data(dev_data_path) | 
					
						
						|  | test_data = load_data(test_data_path) | 
					
						
						|  |  | 
					
						
						|  | train_loader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True) | 
					
						
						|  | dev_loader = DataLoader(dataset=dev_data, batch_size=batch_size, shuffle=True) | 
					
						
						|  | test_loader = DataLoader(dataset=test_data, batch_size=batch_size, shuffle=False) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def dev(model,dev_loader): | 
					
						
						|  | model.to(device) | 
					
						
						|  |  | 
					
						
						|  | model.eval() | 
					
						
						|  |  | 
					
						
						|  | with torch.no_grad(): | 
					
						
						|  | correct = 0 | 
					
						
						|  | total = 0 | 
					
						
						|  | for step, (input_ids,token_type_ids,attention_mask,labels) in tqdm(enumerate(dev_loader),desc='Dev Itreation:'): | 
					
						
						|  | input_ids,token_type_ids,attention_mask,labels=input_ids.to(device),token_type_ids.to(device),attention_mask.to(device),labels.to(device) | 
					
						
						|  | out_put = model(input_ids,token_type_ids,attention_mask) | 
					
						
						|  | _, predict = torch.max(out_put.data, 1) | 
					
						
						|  | correct += (predict==labels).sum().item() | 
					
						
						|  | total += labels.size(0) | 
					
						
						|  | res = correct / total | 
					
						
						|  | return res | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') | 
					
						
						|  | def train(model,train_loader,dev_loader) : | 
					
						
						|  |  | 
					
						
						|  | model.to(device) | 
					
						
						|  | model.train() | 
					
						
						|  | criterion = nn.CrossEntropyLoss() | 
					
						
						|  | param_optimizer = list(model.named_parameters()) | 
					
						
						|  | no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] | 
					
						
						|  |  | 
					
						
						|  | optimizer_grouped_parameters = [ | 
					
						
						|  | {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], | 
					
						
						|  | 'weight_decay': 0.01}, | 
					
						
						|  | {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} | 
					
						
						|  | ] | 
					
						
						|  |  | 
					
						
						|  | optimizer_params = {'lr': 1e-5, 'eps': 1e-6, 'correct_bias': False} | 
					
						
						|  | optimizer = AdamW(optimizer_grouped_parameters, **optimizer_params) | 
					
						
						|  | scheduler = ReduceLROnPlateau(optimizer,mode='max',factor=0.5,min_lr=1e-7, patience=5,verbose= True, threshold=0.0001, eps=1e-08) | 
					
						
						|  | t_total = len(train_loader) | 
					
						
						|  |  | 
					
						
						|  | total_epochs = 10 | 
					
						
						|  | bestAcc = 0 | 
					
						
						|  | correct = 0 | 
					
						
						|  | total = 0 | 
					
						
						|  | print('Training and verification begin!') | 
					
						
						|  | for epoch in range(total_epochs): | 
					
						
						|  | for step, (input_ids,token_type_ids,attention_mask,labels) in enumerate(train_loader): | 
					
						
						|  |  | 
					
						
						|  | optimizer.zero_grad() | 
					
						
						|  | input_ids,token_type_ids,attention_mask,labels=input_ids.to(device),token_type_ids.to(device),attention_mask.to(device),labels.to(device) | 
					
						
						|  | out_put =  model(input_ids,token_type_ids,attention_mask) | 
					
						
						|  | loss = criterion(out_put, labels) | 
					
						
						|  | _, predict = torch.max(out_put.data, 1) | 
					
						
						|  | correct += (predict == labels).sum().item() | 
					
						
						|  | total += labels.size(0) | 
					
						
						|  | loss.backward() | 
					
						
						|  | optimizer.step() | 
					
						
						|  |  | 
					
						
						|  | if (step + 1) % 10 == 0: | 
					
						
						|  | train_acc = correct / total | 
					
						
						|  | print("Train Epoch[{}/{}],step[{}/{}],tra_acc{:.6f} %,loss:{:.6f}".format(epoch + 1, total_epochs, step + 1, len(train_loader),train_acc*100,loss.item())) | 
					
						
						|  |  | 
					
						
						|  | if (step + 1) % 200 == 0: | 
					
						
						|  | train_acc = correct / total | 
					
						
						|  |  | 
					
						
						|  | acc = dev(model, dev_loader) | 
					
						
						|  | if bestAcc < acc: | 
					
						
						|  | bestAcc = acc | 
					
						
						|  |  | 
					
						
						|  | path = 'bert_model.pkl' | 
					
						
						|  | torch.save(model, path) | 
					
						
						|  | print("DEV Epoch[{}/{}],step[{}/{}],tra_acc{:.6f} %,bestAcc{:.6f}%,dev_acc{:.6f} %,loss:{:.6f}".format(epoch + 1, total_epochs, step + 1, len(train_loader),train_acc*100,bestAcc*100,acc*100,loss.item())) | 
					
						
						|  | scheduler.step(bestAcc) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | path = '/kaggle/input/inference/bert_model.pkl' | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | model = BertClassificationModel() | 
					
						
						|  |  | 
					
						
						|  | train(model,train_loader,dev_loader) | 
					
						
						|  |  |