# NDIS Project - OpenAI - PBSP Scoring - Page 2 - Direct / Indirect Data Collection

In [None]:
import openai
import re
import string
from ipywidgets import interact
import ipywidgets as widgets
from IPython.display import display, clear_output, Javascript, HTML, Markdown
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import json
import spacy
from spacy import displacy
from dotenv import load_dotenv
import pandas as pd
import argilla as rg
from argilla.metrics.text_classification import f1
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_colwidth', 10000)
pd.set_option('display.width', 10000)

In [None]:
#initializations
openai.api_key = os.environ['API_KEY']
openai.api_base = os.environ['API_BASE']
openai.api_type = os.environ['API_TYPE']
openai.api_version = os.environ['API_VERSION']
deployment_name = os.environ['DEPLOYMENT_ID']

#argilla
rg.init(
    api_url=os.environ["ARGILLA_API_URL"],
    api_key=os.environ["ARGILLA_API_KEY"]
)

In [None]:
#sentence extraction
def extract_sentences(paragraph):
    symbols = ['\\.', '!', '\\?', ';', ':', ',', '\\_', '\n', '\\-']
    pattern = '|'.join([f'{symbol}' for symbol in symbols])
    sentences = re.split(pattern, paragraph)
    sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
    return sentences

In [None]:
def filter_dataframe(result_df, paragraph):
    filtered_df = result_df[result_df['Phrase'].apply(lambda x: x.lower().translate(str.maketrans("", "", string.punctuation)) in paragraph.lower().translate(str.maketrans("", "", string.punctuation)) or 
                                                                  x.lower().translate(str.maketrans("", "", string.punctuation)).replace("â€™s","s'") in paragraph.lower().translate(str.maketrans("", "", string.punctuation)))]
    filtered_df['Match_Percentage'] = filtered_df.apply(lambda row: len(set(row['Phrase'].lower()) & set(paragraph.lower())) / len(set(row['Phrase'].lower())), axis=1)
    filtered_df = filtered_df[filtered_df['Match_Percentage'] >= 0.2]
    filtered_df = filtered_df.drop(['Match_Percentage'], axis=1)
    if len(filtered_df) == 0:
        filtered_df = result_df
    filtered_df = filtered_df.drop_duplicates()
    return filtered_df

In [None]:
def process_response(response, query):
    sentences = []
    topics = []
    scores = []
    lines = response.strip().split("\n")
    topic = None
    for line in lines:
        if "Direct data collection:" in line:
            topic = "DIRECT"
        elif "Indirect data collection:" in line:
            topic = "INDIRECT"
        elif "None:" in line:
            topic = "NONE"
        else:
            try:
                parts = line.split("(Confidence Score:")
                if len(parts) == 2:
                    phrase = parts[0].strip()
                    score = float(parts[1].strip().replace(")", ""))
                    sentences.append(phrase)
                    topics.append(topic)
                    scores.append(score)
            except:
                pass
    result_df = pd.DataFrame({'Phrase': sentences, 'Topic': topics, 'Score': scores})
    try:
        result_df['Phrase'] = result_df['Phrase'].str.replace('\d+\.', '', regex=True)
        result_df['Phrase'] = result_df['Phrase'].str.replace('^\s', '', regex=True)
        result_df['Phrase'] = result_df['Phrase'].str.strip('"')
        result_df = filter_dataframe(result_df, query)
    except:
        sentences = extract_sentences(query)
        topics = ['NONE'] * len(sentences)
        scores = [0.9] * len(sentences)
        result_df = pd.DataFrame({'Phrase': sentences, 'Topic': topics, 'Score': scores})
    return result_df

In [None]:
def get_prompt(query):
    prompt = f"""
    The paragraph below is written by a disability practitioner to describe the direct and/or indirect data collection approaches which has been undertaken to prepare the positive behaviour support plan. 

    Paragraph:
    {query}

    Requirement:
    - Identify the phrases from the paragraph above that represent each of the following data collection categories: "Direct data collection", "Indirect data collection".

    Guidelines:
    - "Direct data collection": To detect any phrases from the paragraph that might represent direct data collection approaches, look for phrases that mention direct observation of the person with disability by either the practitioner or a relevant stakeholder (e.g., support worker). Example phrases include: "we conducted direct observations of the person with disability", "we implemented a functional assessment that involved direct observation by both myself and the support worker". Also look for phrases that mention completion of behavioural data collection tools like ABC note cards and scatter plots, completed by the practitioner or relevant stakeholder. Example phrases include: "I conducted direct observation using ABC note cards to capture antecedents, behaviors, and consequences in the individual's natural environment.", "I completed scatter plots to visually represent the frequency, duration, and intensity of the targeted behaviors over time.".

    - "Indirect data collection": To detect any phrases from the paragraph that might represent indirect data collection approaches, look for phrases that mention the use of standardised tools completed by the practitioner in consultation with relevant stakeholders who know the person of focus well, such as Contextual Assessment Inventory, Functional Assessment Interview. Example phrases include: "I conducted a Contextual Assessment Inventory with input from relevant stakeholders", "I utilized the Functional Assessment Interview to gather information from individuals who know the person best". Also look for phrases that mention interviews, phone calls or any other form of communication with relevant stakeholders who know the person of focus well. Example phrases include: "We conducted several phone interviews with family members who have known the person for several years", "We spoke with the person's support workers to gain insight into their daily routines and any challenges they may be experiencing". Also look for phrases that mention consultation of relevant reports (e.g., previous positive behaviour support plans, incident report, previous assessment reports from health and allied health professionals). Example phrases include: "We have consulted relevant reports, such as previous positive behavior support plans, incident reports", "Our team has reviewed previous assessment reports from health and allied health professionals to gather information about any underlying medical or psychological conditions that may be influencing the individual's behavior."

    Specifications of a correct answer:
    - Please provide a response that closely matches the information in the paragraph and does not deviate significantly from it.
    - Provide your answer in numbered lists. 
    - All the phrases in your answer must be exact substrings in the original paragraph. without changing any characters.
    - All the upper case and lower case characters in the phrases in your answer must match the upper case and lower case characters in the original paragraph.
    - Start numbering the phrases under each social validity topic from number 1. 
    - Start each list of phrases with these titles: "Direct data collection", "Indirect data collection".
    - For each phrase that belongs to any of the above data collection categories, provide a confidence score that ranges between 0.50 and 1.00, where a score of 0.50 means you are very weakly confident that the phrase belongs to that specific data collection category, whereas a score of 1.00 means you are very strongly confident that the phrase belongs to that specific data collection category.
    - Never include any phrase in your answer that does not exist in the paragraph above.
    - Include a final numbered list titled "None:", which include all the remaining phrases from the paragraph above that do not belong to any of the data collection categories above. Provide a confidence score for each of these phrases as well.

    Example answer:

    Direct data collection:
    1. we conducted direct observations of the person with disability. (Confidence Score: 1.00)
    2. we implemented a functional assessment that involved direct observation by both myself and the support worker. (Confidence Score: 0.95)
    3. I conducted direct observation using ABC note cards to capture antecedents, behaviors, and consequences in the individual's natural environment. (Confidence Score: 1.00)
    4. I completed scatter plots to visually represent the frequency, duration, and intensity of the targeted behaviors over time. (Confidence Score: 0.92)
    
    Indirect data collection:
    1. I conducted a Contextual Assessment Inventory with input from relevant stakeholders. (Confidence Score: 1.00)
    2. I utilized the Functional Assessment Interview to gather information from individuals who know the person best. (Confidence Score: 1.00)
    3. We conducted several phone interviews with family members who have known the person for several years. (Confidence Score: 0.94)
    4. We spoke with the person's support workers to gain insight into their daily routines and any challenges they may be experiencing (Confidence Score: 0.92)
    5. We have consulted relevant reports, such as previous positive behavior support plans, incident reports. (Confidence Score: 0.89)
    6. Our team has reviewed previous assessment reports from health and allied health professionals to gather information about any underlying medical or psychological conditions that may be influencing the individual's behavior. (Confidence Score: 0.87)
    
    None:
    1. <remaining phrase from the paragraph goes here>. (Confidence Score: <your score goes here>)
    2. <remaining phrase from the paragraph goes here>. (Confidence Score: <your score goes here>)
    """
    return prompt

In [None]:
def get_response_chatgpt(prompt):
    response=openai.ChatCompletion.create(   
        engine=deployment_name,   
        messages=[         
        {"role": "system", "content": "You are a helpful assistant."},                  
        {"role": "user", "content": prompt}     
        ],
        temperature=0
    )
    reply = response["choices"][0]["message"]["content"]
    return reply

In [None]:
def convert_df(result_df):
    new_df = pd.DataFrame(columns=['text', 'prediction'])
    new_df['text'] = result_df['Phrase']
    new_df['prediction'] = result_df.apply(lambda row: [[row['Topic'], row['Score']]], axis=1)
    return new_df

In [None]:
#query = """
#In preparing the positive behavior support plan, our team utilized a combination of direct and indirect data collection approaches to gain a comprehensive understanding of the person of focus and their behavior patterns. Direct data collection approaches included conducting direct observations of the individual by both the practitioner and support workers, as well as the completion of behavioral data collection tools such as ABC note cards and scatter plots. Indirect data collection methods included the use of standardized tools completed by the practitioner in consultation with relevant stakeholders who know the individual well, such as the Contextual Assessment Inventory and Functional Assessment Interview. We also conducted interviews and engaged in communication with family members, caregivers, and healthcare professionals to gather additional information about the individual's behavior patterns and needs. Additionally, we consulted relevant reports, such as previous positive behavior support plans, incident reports, and assessment reports from health and allied health professionals to gain insight into potential triggers and reinforcement patterns that may be contributing to the individual's challenging behaviors. By utilizing both direct and indirect data collection approaches, we were able to develop a comprehensive positive behavior support plan that is tailored to the unique needs of the individual.
#"""
#prompt = get_prompt(query)
#response = get_response_chatgpt(prompt)
#result_df = process_response(response, query)
#result_df

In [None]:
topic_color_dict = {
        'DIRECT': '#90EE90',
        'INDIRECT': '#F08080',
        'NONE': '#CCCCCC'
    }

def color(df, color):
    return df.style.format({'Score': '{:,.2%}'.format}).bar(subset=['Score'], color=color)

def annotate_query(highlights, query, topics):
    ents = []
    for h, t in zip(highlights, topics):
        pattern = re.escape(h)
        pattern = re.sub(r'\\(.)', r'[\1\\W]*', pattern) # optional non-alphanumeric characters
        for match in re.finditer(pattern, query, re.IGNORECASE):
            ent_dict = {"start": match.start(), "end": match.end(), "label": t}
            ents.append(ent_dict)
    return ents

def path_to_image_html(path):
    return '<img src="'+ path + '" width="30" height="15" />'

passing_score = 0.5
final_passing = 0.0
def display_final_df(agg_df):
    tags = []
    crits = [
            'DIRECT',
            'INDIRECT'
        ]
    orig_crits = crits
    crits = [x for x in crits if x in agg_df.index.tolist()]
    bools = [agg_df.loc[crit, 'Final_Score'] > final_passing for crit in crits]
    paths = ['./thumbs_up.png' if x else './thumbs_down.png' for x in bools]
    df = pd.DataFrame({'Data Collection Category': crits, 'USED': paths})
    rem_crits = [x for x in orig_crits if x not in crits]
    if len(rem_crits) > 0:
        df2 = pd.DataFrame({'Data Collection Category': rem_crits, 'USED': ['./thumbs_down.png'] * len(rem_crits)})
        df = pd.concat([df, df2])
    df = df.set_index('Data Collection Category')
    pd.set_option('display.max_colwidth', None)
    display(HTML('<div style="text-align: center;">' + df.to_html(classes=["align-center"], index=True, escape=False ,formatters=dict(USED=path_to_image_html)) + '</div>'))
    

### Quality Markers:
#### <font color='blue'>Q2c.</font> The plan indicates that at least one <font color='red'>direct</font> data collection approach has been undertaken.

#### <font color='blue'>Q2d.</font> The plan indicates that at least one <font color='red'>indirect</font> data collection approach has been undertaken.

In [None]:
#demo with Voila

bhvr_label = widgets.Label(value='Please type your answer:')
bhvr_text_input = widgets.Textarea(
    value='',
    placeholder='Type your answer',
    description='',
    disabled=False,
    layout={'height': '300px', 'width': '90%'}
)

bhvr_nlp_btn = widgets.Button(
    description='Score Answer',
    disabled=False,
    button_style='success', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Score Answer',
    icon='check',
    layout={'height': '70px', 'width': '250px'}
)
bhvr_agr_btn = widgets.Button(
    description='Validate Data',
    disabled=False,
    button_style='success', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Validate Data',
    icon='check',
    layout={'height': '70px', 'width': '250px'}
)
bhvr_eval_btn = widgets.Button(
    description='Evaluate Model',
    disabled=False,
    button_style='success', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Evaluate Model',
    icon='check',
    layout={'height': '70px', 'width': '250px'}
)
btn_box = widgets.HBox([bhvr_nlp_btn, bhvr_agr_btn, bhvr_eval_btn], 
                       layout={'width': '100%', 'height': '160%'})
bhvr_outt = widgets.Output()
bhvr_outt.layout.height = '100%'
bhvr_outt.layout.width = '100%'
bhvr_box = widgets.VBox([bhvr_text_input, btn_box, bhvr_outt], 
                   layout={'width': '100%', 'height': '160%'})
dataset_rg_name = 'pbsp-page2-direct-indirect-argilla-ds'
agrilla_df = None
annotated = False
def on_bhvr_button_next(b):
    global agrilla_df
    with bhvr_outt:
        clear_output()
        query = bhvr_text_input.value
        prompt = get_prompt(query)
        response = get_response_chatgpt(prompt)
        result_df = process_response(response, query)
        sub_result_df = result_df[(result_df['Score'] >= passing_score) & (result_df['Topic'] != 'NONE')]
        sub_2_result_df = result_df[result_df['Topic'] == 'NONE']
        highlights = []
        if len(sub_result_df) > 0:
            highlights = sub_result_df['Phrase'].tolist()
            highlight_topics = sub_result_df['Topic'].tolist()    
            ents = annotate_query(highlights, query, highlight_topics)
            colors = {}
            for ent, ht in zip(ents, highlight_topics):
                colors[ent['label']] = topic_color_dict[ht]

            ex = [{"text": query,
                   "ents": ents,
                   "title": None}]
            title = "Data Collection Category Highlights"
            display(HTML(f'<center><h1>{title}</h1></center>'))
            html = displacy.render(ex, style="ent", manual=True, jupyter=True, options={'colors': colors})
            display(HTML(html))
            title = "Data Collection Category Classifications"
            display(HTML(f'<center><h1>{title}</h1></center>'))
            for top in topic_color_dict.keys():
                top_result_df = sub_result_df[sub_result_df['Topic'] == top]
                if len(top_result_df) > 0:
                    top_result_df = top_result_df.sort_values(by='Score', ascending=False).reset_index(drop=True)
                    top_result_df = top_result_df.set_index('Phrase')
                    top_result_df = top_result_df[['Score']]
                    display(HTML(
                        f'<left><h2 style="text-decoration: underline; text-decoration-color:{topic_color_dict[top]};">{top}</h2></left>'))
                    display(color(top_result_df, topic_color_dict[top]))

            agg_df = sub_result_df.groupby('Topic')['Score'].sum()
            agg_df = agg_df.to_frame()
            agg_df.index.name = 'Topic'
            agg_df.columns = ['Total Score']
            agg_df = agg_df.assign(
                Final_Score=lambda x: x['Total Score'] / x['Total Score'].sum() * 100.00
            )
            agg_df = agg_df.sort_values(by='Final_Score', ascending=False)
            title = "Data Collection Category Coverage"
            display(HTML(f'<center><h1>{title}</h1></center>'))
            agg_df['Topic'] = agg_df.index
            rem_topics= [x for x in list(topic_color_dict.keys()) if not x in agg_df.Topic.tolist()]
            if len(rem_topics) > 0:
                rem_agg_df = pd.DataFrame({'Topic': rem_topics, 'Final_Score': 0.0, 'Total Score': 0.0})
                agg_df = pd.concat([agg_df, rem_agg_df])
            labels = agg_df['Final_Score'].round(1).astype('str') + '%'
            ax = agg_df.plot.bar(x='Topic', y='Final_Score', rot=0, figsize=(20, 5), align='center')
            for container in ax.containers:
                ax.bar_label(container, labels=labels)
                ax.yaxis.set_major_formatter(mtick.PercentFormatter())
                ax.legend(["Final Score (%)"])
                ax.set_xlabel('')
            plt.show()
            title = "Final Scores"
            display(HTML(f'<left><h1>{title}</h1></left>'))
            display_final_df(agg_df)
            if len(sub_2_result_df) > 0:
                sub_result_df = pd.concat([sub_result_df, sub_2_result_df]).reset_index(drop=True)
            agrilla_df = sub_result_df.copy()
        else:
            print(query)
            
def on_agr_button_next(b):
    global agrilla_df, annotated
    with bhvr_outt:
        clear_output()
        if agrilla_df is not None:
            # convert the dataframe to the structure accepted by argilla
            converted_df = convert_df(agrilla_df)
            # convert pandas dataframe to DatasetForTextClassification
            dataset_rg = rg.DatasetForTextClassification.from_pandas(converted_df)
            # delete the old DatasetForTextClassification from the Argilla web app if exists
            rg.delete(dataset_rg_name, workspace="admin")
            # load the new DatasetForTextClassification into the Argilla web app
            rg.log(dataset_rg, name=dataset_rg_name, workspace="admin")
            # Make sure all classes are present for annotation
            rg_settings = rg.TextClassificationSettings(label_schema=list(topic_color_dict.keys()))
            rg.configure_dataset(name=dataset_rg_name, workspace="admin", settings=rg_settings)
            annotated = True
        else:
            display(Markdown("<h2 style='color:red; text-align:center;'>Please score the answer first!</h2>"))
            
def on_eval_button_next(b):
    global annotated
    with bhvr_outt:
        clear_output()
        if annotated:
            display(f1(dataset_rg_name).visualize())
        else:
            display(Markdown("<h2 style='color:red; text-align:center;'>Please score the answer and validate the data first!</h2>"))

bhvr_nlp_btn.on_click(on_bhvr_button_next)
bhvr_agr_btn.on_click(on_agr_button_next)
bhvr_eval_btn.on_click(on_eval_button_next)

display(bhvr_label, bhvr_box)