In [None]:
'''
Script for getting prediction for a TEST.CSV file with three columns of text.
It looks at the columns Q1, Q2, Q3, concatenates them and passes the full string as text into 
PersonalityClassifier(). 
>>> The method "predict_all_traits" defined in the class will get the predictions by running five 
separate prediction models (optimized random forests). 
The predictions are then applies as labels for each of the train columns.
>>> Important: All the other columns in the original CSV file will be untouched (Q1, Q2, Q3 and Humility). 
The CSV input file does not need to have empty values for traits; 
the script replaces the predictions with annotations.
'''

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
import pandas as pd
from personality_model import PersonalityClassifier

# ***************** LOAD THE TEST DATA WITH Q1, Q2, Q3 *********************

input_path = "/path/to/test.csv" # path to test data
output_path = "/path/to/output/filled_predictions.csv" # change PATH and NAME of output

df = pd.read_csv(input_path)

# concatenating Q1, Q2, Q3 
texts = df[["Q1", "Q2", "Q3"]].fillna("").agg(" ".join, axis=1)

# model initialization
model = PersonalityClassifier()

# predicting trait labels for each row
predictions = texts.apply(model.predict_all_traits)

# applying the predictions and filling the columns
for trait in ["Openness", "Conscientiousness", "Extraversion", "Agreeableness", "Emotional stability"]:
 df[trait] = predictions.apply(lambda d: d[trait])

df.to_csv(output_path, index=False)

print(f"Trait predictions saved to: {output_path}")