# NDIS Project - OpenAI - PBSP Scoring - Page 3 - Early Warning Signs

In [None]:
import openai
import re
from ipywidgets import interact
import ipywidgets as widgets
from IPython.display import display, clear_output, Javascript, HTML, Markdown
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import json
import spacy
from spacy import displacy
from dotenv import load_dotenv
import pandas as pd
import argilla as rg
from argilla.metrics.text_classification import f1
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_colwidth', 10000)
pd.set_option('display.width', 10000)

In [None]:
#initializations
openai.api_key = os.environ['API_KEY']
openai.api_base = os.environ['API_BASE']
openai.api_type = os.environ['API_TYPE']
openai.api_version = os.environ['API_VERSION']
deployment_name = os.environ['DEPLOYMENT_ID']

#argilla
rg.init(
    api_url=os.environ["ARGILLA_API_URL"],
    api_key=os.environ["ARGILLA_API_KEY"]
)

In [None]:
def process_response(response):
    sentences = []
    topics = []
    scores = []
    lines = response.strip().split("\n")
    for line in lines:
        if "Physical signs:" in line:
            topic = "PHYSICAL SIGNS"
        elif "Verbal signs:" in line:
            topic = "VERBAL SIGNS"
        elif "None:" in line:
            topic = "NONE"
        else:
            try:
                phrase = line.split("(Confidence Score:")[0].strip()
                score = float(line.split("(Confidence Score:")[1].strip().replace(")", ""))
                sentences.append(phrase)
                topics.append(topic)
                scores.append(score)
            except:
                pass
    result_df = pd.DataFrame({'Phrase': sentences, 'Topic': topics, 'Score': scores})
    result_df['Phrase'] = result_df['Phrase'].str.replace('\d+\.', '', regex=True)
    result_df['Phrase'] = result_df['Phrase'].str.replace('^\s', '', regex=True)
    sub_result_df = result_df[result_df['Score'] >= 0.8]
    null_df = result_df[result_df['Topic'] == "NONE"]
    if len(null_df) > 0:
        result_df = pd.concat([sub_result_df, null_df]).drop_duplicates().reset_index(drop=True)
    else:
        result_df = sub_result_df.reset_index(drop=True)
    return result_df

In [None]:
def get_prompt(query):
    prompt = f"""
    The practitioner paragraph below was found in a Behaviour Support Plan (BSP) and may contain one or more phrases that describe observable physical and/or verbal early warning signs, which the person with disability exhibits, and may indicate that he/she is likely to display challenging behaviours.

    Practitioner Paragraph:
    {query}

    Task:
    You are an expert Behaviour Support Practitioner. Your task is to use the practitioner paragraph above to find and extract the phrase(s), if any, that describe observable physical and/or verbal early warning signs, which the person with disability exhibits, and may indicate that he/she is likely to display challenging behaviours. 
    
    Requirements:
    You MUST follow all the requirements below: 
    - Provide phrases that exactly match the text in the practitioner paragraph and do not deviate from it.
    - There must not be any phrase in your answer that does not exist the practitioner paragraph.
    - Provide your answer in a numbered list. 
    - All the phrases in your answer must be exact substrings in the practitioner paragraph. without changing any characters.
    - All the upper case and lower case characters in the phrases in your answer must match the upper case and lower case characters in the practitioner paragraph.
    - Start numbering the phrases under each early warning signs group (Physical, Verbal) from number 1.
    - Start each list of phrases with these group titles: "Physical signs:", "Verbal signs:". 
    - For each phrase that belongs to any of the above group (Physical, Verbal), provide a confidence score that ranges between 0.50 and 1.00, where a score of 0.50 means you are very weakly confident that the phrase belongs to that specific group, whereas a score of 1.00 means you are very strongly confident that the phrase belongs to that specific group.
    - Never include any phrase that does not exist in the practitioner paragraph. 
    - Include a final numbered list titled "None:", which include all the remaining phrases from the practitioner paragraph above that do not represent any physical or verbal early warning signs. Provide a confidence score for each of these phrases as well.

    
    Useful Information:
    There are two main groups to classify early warning signs. Here are the two groups along with examples of early warning signs that could be belong to each:
    Physical signs: These are the observable behaviours that the person displays with their body language. Some examples include:
        Clenching fists
        Pacing back and forth
        Rapid breathing
        Reddening of the face or neck
        Stomping feet
    Verbal signs: These are the observable things that the person says or the tone in which they say them. Some examples include:
        Swearing or using inappropriate language
        Yelling or screaming
        Making threatening statements
        Refusing to communicate
        Crying or whimpering

    Example correct answer:

    Physical signs:
    1. may pick or flick at his fingernails/cuticles (Confidence Score: 0.95)
    2. biting his nails or pinching himself (Confidence Score: 0.93)

    Verbal signs:
    1. is screaming in a repetitive pattern (Confidence Score: 0.97)
    2. threats of harming staff, others or himself. (Confidence Score: 0.88)
    
    None:
    1. Eddie is a 22-year old man who may exhibit some early warning signs. (Confidence Score: 0.99)
    2. Eddie may sit on the coach whenever he wants to have a meal. (Confidence Score: 0.90)
    """

    return prompt

In [None]:
def get_response_chatgpt(prompt):
    response=openai.ChatCompletion.create(   
        engine=deployment_name,   
        messages=[         
        {"role": "system", "content": "You are a helpful assistant."},                  
        {"role": "user", "content": prompt}     
        ],
        temperature=0
    )
    reply = response["choices"][0]["message"]["content"]
    return reply

In [None]:
def convert_df(result_df):
    new_df = pd.DataFrame(columns=['text', 'prediction'])
    new_df['text'] = result_df['Phrase']
    new_df['prediction'] = result_df.apply(lambda row: [[row['Topic'], row['Score']]], axis=1)
    return new_df

In [None]:
topic_color_dict = {
        'PHYSICAL SIGNS': '#90EE90',
        'VERBAL SIGNS': '#FF69B4',
        'NONE': '#CCCCCC'
    }

def color(df, color):
    return df.style.format({'Score': '{:,.2%}'.format}).bar(subset=['Score'], color=color)

def annotate_query(highlights, query, topics):
    ents = []
    for h, t in zip(highlights, topics):
        ent_dict = {}
        for match in re.finditer(h, query, re.IGNORECASE):
            ent_dict = {"start": match.start(), "end": match.end(), "label": t}
            break
        if len(ent_dict.keys()) > 0:
            ents.append(ent_dict)
    return ents

def path_to_image_html(path):
    return '<img src="'+ path + '" width="30" height="15" />'

passing_score = 0.8
final_passing = 0.0
def display_final_df(agg_df):
    tags = []
    crits = [
            'PHYSICAL SIGNS',
            'VERBAL SIGNS'
        ]
    orig_crits = crits
    crits = [x for x in crits if x in agg_df.index.tolist()]
    bools = [agg_df.loc[crit, 'Final_Score'] > final_passing for crit in crits]
    paths = ['./thumbs_up.png' if x else './thumbs_down.png' for x in bools]
    df = pd.DataFrame({'Early Warning Sign Categories': crits, 'USED': paths})
    rem_crits = [x for x in orig_crits if x not in crits]
    if len(rem_crits) > 0:
        df2 = pd.DataFrame({'Early Warning Sign Categories': rem_crits, 'USED': ['./thumbs_down.png'] * len(rem_crits)})
        df = pd.concat([df, df2])
    df = df.set_index('Early Warning Sign Categories')
    pd.set_option('display.max_colwidth', None)
    display(HTML('<div style="text-align: center;">' + df.to_html(classes=["align-center"], index=True, escape=False ,formatters=dict(USED=path_to_image_html)) + '</div>'))
    

### Please describe (in observable terms) the early warning signs the person with disability exhibits that indicate they are likely to display the challenging behaviour(s).

In [None]:
#demo with Voila

bhvr_label = widgets.Label(value='Please type your answer:')
bhvr_text_input = widgets.Textarea(
    value='',
    placeholder='Type your answer',
    description='',
    disabled=False,
    layout={'height': '300px', 'width': '90%'}
)

bhvr_nlp_btn = widgets.Button(
    description='Score Answer',
    disabled=False,
    button_style='success', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Score Answer',
    icon='check',
    layout={'height': '70px', 'width': '250px'}
)
bhvr_agr_btn = widgets.Button(
    description='Validate Data',
    disabled=False,
    button_style='success', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Validate Data',
    icon='check',
    layout={'height': '70px', 'width': '250px'}
)
bhvr_eval_btn = widgets.Button(
    description='Evaluate Model',
    disabled=False,
    button_style='success', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Evaluate Model',
    icon='check',
    layout={'height': '70px', 'width': '250px'}
)
btn_box = widgets.HBox([bhvr_nlp_btn, bhvr_agr_btn, bhvr_eval_btn], 
                       layout={'width': '100%', 'height': '160%'})
bhvr_outt = widgets.Output()
bhvr_outt.layout.height = '100%'
bhvr_outt.layout.width = '100%'
bhvr_box = widgets.VBox([bhvr_text_input, btn_box, bhvr_outt], 
                   layout={'width': '100%', 'height': '160%'})
dataset_rg_name = 'pbsp-page3-warning-argilla-ds'
agrilla_df = None
annotated = False
def on_bhvr_button_next(b):
    global agrilla_df
    with bhvr_outt:
        clear_output()
        query = bhvr_text_input.value
        prompt = get_prompt(query)
        response = get_response_chatgpt(prompt)
        result_df = process_response(response)
        sub_result_df = result_df[(result_df['Score'] >= passing_score) & (result_df['Topic'] != 'NONE')]
        sub_2_result_df = result_df[result_df['Topic'] == 'NONE']
        highlights = []
        if len(sub_result_df) > 0:
            highlights = sub_result_df['Phrase'].tolist()
            highlight_topics = sub_result_df['Topic'].tolist()    
            ents = annotate_query(highlights, query, highlight_topics)
            colors = {}
            for ent, ht in zip(ents, highlight_topics):
                colors[ent['label']] = topic_color_dict[ht]

            ex = [{"text": query,
                   "ents": ents,
                   "title": None}]
            title = "Early Warning Sign Highlights"
            display(HTML(f'<center><h1>{title}</h1></center>'))
            html = displacy.render(ex, style="ent", manual=True, jupyter=True, options={'colors': colors})
            display(HTML(html))
            title = "Early Warning Sign Classifications"
            display(HTML(f'<center><h1>{title}</h1></center>'))
            for top in topic_color_dict.keys():
                top_result_df = sub_result_df[sub_result_df['Topic'] == top]
                if len(top_result_df) > 0:
                    top_result_df = top_result_df.sort_values(by='Score', ascending=False).reset_index(drop=True)
                    top_result_df = top_result_df.set_index('Phrase')
                    top_result_df = top_result_df[['Score']]
                    display(HTML(
                        f'<left><h2 style="text-decoration: underline; text-decoration-color:{topic_color_dict[top]};">{top}</h2></left>'))
                    display(color(top_result_df, topic_color_dict[top]))

            agg_df = sub_result_df.groupby('Topic')['Score'].sum()
            agg_df = agg_df.to_frame()
            agg_df.index.name = 'Topic'
            agg_df.columns = ['Total Score']
            agg_df = agg_df.assign(
                Final_Score=lambda x: x['Total Score'] / x['Total Score'].sum() * 100.00
            )
            agg_df = agg_df.sort_values(by='Final_Score', ascending=False)
            title = "Early Warning Sign Coverage"
            display(HTML(f'<center><h1>{title}</h1></center>'))
            agg_df['Topic'] = agg_df.index
            rem_topics= [x for x in list(topic_color_dict.keys()) if not x in agg_df.Topic.tolist()]
            if len(rem_topics) > 0:
                rem_agg_df = pd.DataFrame({'Topic': rem_topics, 'Final_Score': 0.0, 'Total Score': 0.0})
                agg_df = pd.concat([agg_df, rem_agg_df])
            labels = agg_df['Final_Score'].round(1).astype('str') + '%'
            ax = agg_df.plot.bar(x='Topic', y='Final_Score', rot=0, figsize=(20, 5), align='center')
            for container in ax.containers:
                ax.bar_label(container, labels=labels)
                ax.yaxis.set_major_formatter(mtick.PercentFormatter())
                ax.legend(["Final Score (%)"])
                ax.set_xlabel('')
            plt.show()
            title = "Final Scores"
            display(HTML(f'<left><h1>{title}</h1></left>'))
            display_final_df(agg_df)
            if len(sub_2_result_df) > 0:
                sub_result_df = pd.concat([sub_result_df, sub_2_result_df]).reset_index(drop=True)
            agrilla_df = sub_result_df.copy()
        else:
            print(query)
            
def on_agr_button_next(b):
    global agrilla_df, annotated
    with bhvr_outt:
        clear_output()
        if agrilla_df is not None:
            # convert the dataframe to the structure accepted by argilla
            converted_df = convert_df(agrilla_df)
            # convert pandas dataframe to DatasetForTextClassification
            dataset_rg = rg.DatasetForTextClassification.from_pandas(converted_df)
            # delete the old DatasetForTextClassification from the Argilla web app if exists
            rg.delete(dataset_rg_name, workspace="admin")
            # load the new DatasetForTextClassification into the Argilla web app
            rg.log(dataset_rg, name=dataset_rg_name, workspace="admin")
            # Make sure all classes are present for annotation
            rg_settings = rg.TextClassificationSettings(label_schema=list(topic_color_dict.keys()))
            rg.configure_dataset(name=dataset_rg_name, workspace="admin", settings=rg_settings)
            annotated = True
        else:
            display(Markdown("<h2 style='color:red; text-align:center;'>Please score the answer first!</h2>"))
            
def on_eval_button_next(b):
    global annotated
    with bhvr_outt:
        clear_output()
        if annotated:
            display(f1(dataset_rg_name).visualize())
        else:
            display(Markdown("<h2 style='color:red; text-align:center;'>Please score the answer and validate the data first!</h2>"))

bhvr_nlp_btn.on_click(on_bhvr_button_next)
bhvr_agr_btn.on_click(on_agr_button_next)
bhvr_eval_btn.on_click(on_eval_button_next)

display(bhvr_label, bhvr_box)