Initial model output

Browse files

Files changed (12) hide show

.gitignore +1 -0
config.json +45 -0
datasets/info.txt +2 -0
datasets/test.csv +0 -0
datasets/train.csv +0 -0
model.safetensors +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
test.py +23 -0
train.py +66 -0
trainer_state.json +53 -0
training_args.bin +3 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ outputs

config.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+  "_name_or_path": "microsoft/deberta-v3-base",
+  "architectures": [
+    "DebertaV2ForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "layer_norm_eps": 1e-07,
+  "max_position_embeddings": 512,
+  "max_relative_positions": -1,
+  "model_type": "deberta-v2",
+  "norm_rel_ebd": "layer_norm",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "pooler_dropout": 0,
+  "pooler_hidden_act": "gelu",
+  "pooler_hidden_size": 768,
+  "pos_att_type": [
+    "p2c",
+    "c2p"
+  ],
+  "position_biased_input": false,
+  "position_buckets": 256,
+  "relative_attention": true,
+  "share_att_key": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "type_vocab_size": 0,
+  "vocab_size": 128100
+}

datasets/info.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ SemEval-2016 Task 6
2	+ https://alt.qcri.org/semeval2016/task6/index.php?id=data-and-tools

datasets/test.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

datasets/train.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc4eccd7fa0d493ce18a1f334c618566485321f1567c92f8f9170ace693badaf
+size 737722356

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cf6d32b103197dc0b3210814748bc7af241646d83d0a4e8e33910d527c40122b
+size 14244

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bd93ceae9b40717ccd5dcd1b77f775797992dd03971d180c878c64433e5e15d2
+size 1064

test.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+import os
+import numpy as np
+tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-base")
+model = AutoModelForSequenceClassification.from_pretrained(
+	os.path.realpath(os.path.join(__file__, "..", "./outputs/v2-deberta-100-max-71%-sep/checkpoint-1000/")),
+	local_files_only=True
+)
+text_against = "ai [SEP] I think ai is a waste of time. I don't understand why everyone is so obsessed with this subject, it makes no sense?"
+text_for = "flowers [SEP] I think flowers are very useful and will become essential to society"
+text_neutral = "Ai is a tool use by researchers and scientists to approximate functions"
+encoded = tokenizer(text_for.lower(), max_length=100, padding="max_length", truncation=True, return_tensors="pt")
+def normalize(arr: np.ndarray) -> np.ndarray:
+	min = arr.min()
+	arr = arr - min
+	return arr / arr.sum()
+output = model(**encoded)
+print(output.logits.detach().numpy()[0])
+print(normalize(output.logits.detach().numpy()[0]))

train.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import csv
+from typing import TypedDict
+import numpy as np
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
+from datasets import load_dataset, Dataset
+import pandas as pd
+import evaluate
+import os
+import torch
+data_files = {
+	"train": os.path.realpath(os.path.join(__file__, "..", "./datasets/train.csv")),
+	"test": os.path.realpath(os.path.join(__file__, "..", "./datasets/test.csv"))
+}
+output_dir = os.path.realpath(os.path.join(__file__, "..", "./outputs/v2-deberta-100-max"))
+tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-base")
+label_map = {
+	"FAVOR": 0,
+	"NONE": 1,
+	"AGAINST": 2
+}
+torch.cuda.empty_cache()
+def tokenize(examples):
+	examples["label"] = [label_map[label] for label in examples["label"]]
+	examples["text"] = [examples["Target"][i] + " [SEP] " + text for i , text in enumerate(examples["text"])]
+	return tokenizer(examples["text"], padding="max_length", return_tensors='pt', truncation=True, max_length=100)
+def load_dataset(path: str) -> Dataset:
+	dataframe = pd.read_csv(path)
+	dataframe = dataframe.drop("Opinion Towards", axis=1)
+	dataframe = dataframe.drop("Sentiment", axis=1)
+	dataset = Dataset.from_pandas(dataframe)
+	dataset = dataset.rename_column('Tweet', 'text')
+	dataset = dataset.rename_column("Stance", "label")
+	return dataset.map(tokenize, batched=True)
+train_ds = load_dataset(data_files["train"])
+test_ds = load_dataset(data_files["test"])
+model = AutoModelForSequenceClassification.from_pretrained("microsoft/deberta-v3-base", num_labels=3)
+metric = evaluate.load("accuracy")
+def compute_metrics(eval_pred):
+	logits, labels = eval_pred
+	predictions = np.argmax(logits, axis=-1)
+	return metric.compute(predictions=predictions, references=labels)
+training_args = TrainingArguments(output_dir=output_dir, evaluation_strategy="epoch")
+trainer = Trainer(
+	model=model,
+	args=training_args,
+	train_dataset=train_ds,
+	eval_dataset=test_ds,
+	compute_metrics=compute_metrics
+)
+print("TRAINING")
+trainer.train()

trainer_state.json ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.73972602739726,
+  "eval_steps": 500,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.6820040899795501,
+      "eval_loss": 0.7493410706520081,
+      "eval_runtime": 6.4955,
+      "eval_samples_per_second": 301.132,
+      "eval_steps_per_second": 37.718,
+      "step": 365
+    },
+    {
+      "epoch": 1.37,
+      "grad_norm": 20.02265739440918,
+      "learning_rate": 2.71689497716895e-05,
+      "loss": 0.7495,
+      "step": 500
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.7055214723926381,
+      "eval_loss": 0.7625377774238586,
+      "eval_runtime": 6.586,
+      "eval_samples_per_second": 296.992,
+      "eval_steps_per_second": 37.2,
+      "step": 730
+    },
+    {
+      "epoch": 2.74,
+      "grad_norm": 15.491150856018066,
+      "learning_rate": 4.337899543378996e-06,
+      "loss": 0.4253,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1095,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 410505404868000.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:16a839ef4391579a0a777fe359682af2af1f7b1340ab79e578d454375fa4556c
+size 5048