README.md · DReAMy-lib/DB-custom-architecture at c110c07b6d565d3d2aaec0d12a69a36a77f5a5e1

metadata

license: apache-2.0

DreamBank Custom Architecture

The repo contains the weights for the custom architecture in Bertolini et al., 2023. Working example on how to load and use the model can be found in the Git repo.

Use

import torch, os
import pandas as pd
from tqdm import tqdm
import transformers
from transformers import AutoModel
from transformers import AutoConfig
from transformers import BertTokenizerFast
from SL_utils import *

Coding_emotions = {
    "AN": "Anger",
    "AP": "Apprehension",
    "SD": "Sadness",
    "CO": "Confusion",
    "HA": "Happiness",
}

emotions_list = list(Coding_emotions.keys())

test_sentences = [
    "In my dream I was follwed by the scary monster.",
    "I was walking in a forest, sorrounded by singing birds. I was in calm and peace."
]

test_sentences_target = len(test_sentences)*[[0, 0, 0, 0, 0]]
test_sentences_df     = pd.DataFrame.from_dict(
                {
                "report":test_sentences,
                "Report_as_Multilabel":test_sentences_target
                }
)

model_name   = "bert-large-cased"
model_config = AutoConfig.from_pretrained(model_name)
tokenizer    = BertTokenizerFast.from_pretrained(model_name, do_lower_case=False)
testing_set  = CustomDataset(test_sentences_df, tokenizer, max_length=512)

test_params = {
    'batch_size': 2,
    'shuffle': True,
    'num_workers': 0
}

testing_loader  = DataLoader(testing_set, **test_params)

model = BERT_PTM(
    model_config,
    model_name=model_name, 
    n_classes=len(emotions_list), 
    freeze_BERT=False,
)

# Load the models' weights from the pre-treined model
model.load_state_dict(torch.load("path/to/pytorch_model.bin"))
model.to("cuda")

outputs, targets, ids = validation(model, testing_loader, device="cuda", return_inputs=True)

corr_outputs    = np.array(outputs) >= 0.5 
corr_outputs_df = pd.DataFrame(corr_outputs, columns=emotions_list)
corr_outputs_df = corr_outputs_df.astype(int)

corr_outputs_df["report"] = decoded_ids = [decode_clean(x, tokenizer) for x in tqdm(ids)]

Cite

If you use this model on your work or research, please cite as:

@inproceedings{bertolini-etal-2024-automatic,
    title = "Automatic Annotation of Dream Report{'}s Emotional Content with Large Language Models",
    author = "Bertolini, Lorenzo  and
      Elce, Valentina  and
      Michalak, Adriana  and
      Widhoelzl, Hanna-Sophia  and
      Bernardi, Giulio  and
      Weeds, Julie",
    booktitle = "Proceedings of the 9th Workshop on Computational Linguistics and Clinical Psychology (CLPsych 2024)",
    month = mar,
    year = "2024",
    address = "St. Julians, Malta",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2024.clpsych-1.7",
    pages = "92--107",
}