sentiment-analysis-app / test_model.py
andyqin18's picture
Added test model performance code
bcfb40b
raw
history blame
2.5 kB
import numpy as np
import pandas as pd
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import torch
from tqdm import tqdm
# Global var
TEST_SIZE = 1000
FINE_TUNED_MODEL = "andyqin18/test-finetuned"
# Define analyze function
def analyze(text: str):
'''
Input: Text string
Output: Prediction array (6x1) with threshold prob > 0.5
'''
encoding = tokenizer(text, return_tensors="pt")
encoding = {k: v.to(model.device) for k,v in encoding.items()}
outputs = model(**encoding)
logits = outputs.logits
sigmoid = torch.nn.Sigmoid()
probs = sigmoid(logits.squeeze().cpu())
predictions = np.zeros(probs.shape)
predictions[np.where(probs >= 0.5)] = 1
return predictions
# Read dataset and randomly select testing texts and respective labels
df = pd.read_csv("milestone3/comp/train.csv")
labels = df.columns[2:]
num_label = len(labels)
train_texts = df["comment_text"].values
train_labels = df[labels].values
np.random.seed(1)
small_test_texts = np.random.choice(train_texts, size=TEST_SIZE, replace=False)
np.random.seed(1)
small_test_labels_idx = np.random.choice(train_labels.shape[0], size=TEST_SIZE, replace=False)
small_test_labels = train_labels[small_test_labels_idx, :]
# Load model and tokenizer. Prepare for analysis loop
model = AutoModelForSequenceClassification.from_pretrained(FINE_TUNED_MODEL)
tokenizer = AutoTokenizer.from_pretrained(FINE_TUNED_MODEL)
total_true = 0
total_success = 0
TP, FP, TN, FN = 0, 0, 0, 0
# Analysis Loop
for comment_idx in tqdm(range(TEST_SIZE), desc="Analyzing..."):
comment = small_test_texts[comment_idx]
target = small_test_labels[comment_idx]
result = analyze(comment[:500])
# Counting TP, FP, TN, FN
for i in range(num_label):
if result[i] == target[i]:
if result[i] == 1:
TP += 1
else:
TN += 1
else:
if result[i] == 1:
FP += 1
else:
FN += 1
# Counting success prediction of 1) each label, 2) label array
num_true = (result == target).sum()
if num_true == len(labels):
total_success += 1
total_true += num_true
# Calculate performance
performance = {}
performance["label_accuracy"] = total_true/(len(labels) * TEST_SIZE)
performance["prediction_accuracy"] = total_success/TEST_SIZE
performance["precision"] = TP / (TP + FP)
performance["recall"] = TP / (TP + FN)
print(performance)