{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "f56cc5ad",
   "metadata": {},
   "source": [
    "# NDIS Project - Azure OpenAI - PBSP Scoring - Page 4 - Reason(s) to why teaching is unneeded"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a8d844ea",
   "metadata": {
    "hide_input": false
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import openai\n",
    "import re\n",
    "from ipywidgets import interact\n",
    "import ipywidgets as widgets\n",
    "from IPython.display import display, clear_output, Javascript, HTML, Markdown\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib.ticker as mtick\n",
    "import json\n",
    "import spacy\n",
    "from spacy import displacy\n",
    "from dotenv import load_dotenv\n",
    "import pandas as pd\n",
    "import argilla as rg\n",
    "from argilla.metrics.text_classification import f1\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "%matplotlib inline\n",
    "pd.set_option('display.max_rows', 500)\n",
    "pd.set_option('display.max_colwidth', 10000)\n",
    "pd.set_option('display.width', 10000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "96b83a1d",
   "metadata": {},
   "outputs": [],
   "source": [
    "#initializations\n",
    "openai.api_key = os.environ['API_KEY']\n",
    "openai.api_base = os.environ['API_BASE']\n",
    "openai.api_type = os.environ['API_TYPE']\n",
    "openai.api_version = os.environ['API_VERSION']\n",
    "deployment_name = os.environ['DEPLOYMENT_ID']\n",
    "\n",
    "#argilla\n",
    "rg.init(\n",
    "    api_url=os.environ[\"ARGILLA_API_URL\"],\n",
    "    api_key=os.environ[\"ARGILLA_API_KEY\"]\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8934eadb",
   "metadata": {},
   "outputs": [],
   "source": [
    "#sentence extraction\n",
    "def extract_sentences(paragraph):\n",
    "    symbols = ['\\\\.', '!', '\\\\?', ';', ':', ',', '\\\\_', '\\n', '\\\\-']\n",
    "    pattern = '|'.join([f'{symbol}' for symbol in symbols])\n",
    "    sentences = re.split(pattern, paragraph)\n",
    "    sentences = [sentence.strip() for sentence in sentences if sentence.strip()]\n",
    "    return sentences"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "02fda761",
   "metadata": {},
   "outputs": [],
   "source": [
    "def process_response(response, query):\n",
    "    sentences = []\n",
    "    topics = []\n",
    "    scores = []\n",
    "    lines = response.strip().split(\"\\n\")\n",
    "    for line in lines:\n",
    "        if \"Reasons:\" in line:\n",
    "            topic = \"REASON\"\n",
    "        elif \"None:\" in line:\n",
    "            topic = \"NO REASON\"\n",
    "        else:\n",
    "            try:\n",
    "                phrase = line.split(\"(Confidence Score:\")[0].strip()\n",
    "                score = float(line.split(\"(Confidence Score:\")[1].strip().replace(\")\", \"\"))\n",
    "                sentences.append(phrase)\n",
    "                topics.append(topic)\n",
    "                scores.append(score)\n",
    "            except:\n",
    "                pass\n",
    "    result_df = pd.DataFrame({'Phrase': sentences, 'Topic': topics, 'Score': scores})\n",
    "    try:\n",
    "        result_df['Phrase'] = result_df['Phrase'].str.replace('\\d+\\.', '', regex=True)\n",
    "        result_df['Phrase'] = result_df['Phrase'].str.replace('^\\s', '', regex=True)\n",
    "        result_df['Phrase'] = result_df['Phrase'].str.strip('\"')\n",
    "    except:\n",
    "        sentences = extract_sentences(query)\n",
    "        topics = ['NO REASON'] * len(sentences)\n",
    "        scores = [0.9] * len(sentences)\n",
    "        result_df = pd.DataFrame({'Phrase': sentences, 'Topic': topics, 'Score': scores})\n",
    "    return result_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "714fafb4",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_prompt(query):\n",
    "    prompt = f\"\"\"\n",
    "    In the positive behaviour support plan, the NDIS health practitioner, who is managing a person with disability, is required to teach the person with disability a new skill, an alternative behaviour or a functionally equivalent replacement behaviour. If not, then the health practitioner is required to state, in the practitioner paragraph below, the reason(s) for why teaching the person with disability any of the above skills or behaviours was not needed. \n",
    "\n",
    "    Practitioner Paragraph:\n",
    "    {query}\n",
    "\n",
    "    Requirement:\n",
    "    Identify the phrases from the practitioner paragraph above that represent any reason(s) stated for why teaching the person with disability any skill or behaviour was not needed.\n",
    "\n",
    "    Specifications of a correct answer:\n",
    "    - Please provide a response that closely matches the information in the practitioner paragraph and does not deviate significantly from it.\n",
    "    - Provide your answer in a numbered list. \n",
    "    - All the phrases in your answer must be exact substrings in the practitioner paragraph. without changing any characters.\n",
    "    - All the upper case and lower case characters in the phrases in your answer must match the upper case and lower case characters in the practitioner paragraph.\n",
    "    - Start numbering the phrases from number 1.\n",
    "    - Start your answer for the phrases with the title \"Reasons:\"\n",
    "    - For each phrase in your answer, provide a confidence score that ranges between 0.50 and 1.00, where a score of 0.50 indicates you are very weakly confident that the phrase represents a valid reason for why teaching the person with disability any skill or behaviour was not needed, whereas a score of 1.00 indicates you are very strongly confident that the phrase represents a valid reason for why teaching the person with disability any skill or behaviour was not needed..\n",
    "    - Include another numbered list titled \"None:\", which includes all the remaining phrases in the practitioner paragraph that do not represent any valid reason for why teaching the person with disability any skill or behaviour was not needed. Provide a confidence score for each of these phrases as well.\n",
    "    - There must not be any phrase in your answer that does not exist the practitioner paragraph.\n",
    "\n",
    "    Example correct answer:\n",
    "\n",
    "    Reasons:\n",
    "    1. Eddie's autism is profound and restricts his ability to acquire new knowledge. (Confidence Score: 0.97)\n",
    "    2. Eddie's current behaviors do not cause harm to himself or others, and have minimal impact on his daily activities. (Confidence Score: 0.95)\n",
    "    3. Eddie finds comfort in his current routines and any modifications may provoke feelings of distress or anxiety. (Confidence Score: 0.93)\n",
    "    4. Teaching Eddie new skills or behaviors necessitates a substantial investment of time and resources. (Confidence Score: 0.90)\n",
    "    5. The advantages of teaching Eddie new skills or behaviors may not be significant or worthwhile in enhancing his quality of life. (Confidence Score: 0.88)\n",
    "\n",
    "    None:\n",
    "    1. I am a health practitioner who is managing Eddie, a person with autism, (Confidence Score: 0.99)\n",
    "    2. As the NDIS practitioner managing Eddie's disability, I must acknowledge the importance of teaching him new skills. (Confidence Score: 0.95)\n",
    "    3. However, in this particular instance, I was unable to teach Eddie for several reasons. (Confidence Score: 0.94)\n",
    "    \"\"\"\n",
    "    return prompt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "99da147a",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_response_chatgpt(prompt):\n",
    "    response=openai.ChatCompletion.create(   \n",
    "        engine=deployment_name,   \n",
    "        messages=[         \n",
    "        {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},                  \n",
    "        {\"role\": \"user\", \"content\": prompt}     \n",
    "        ],\n",
    "        temperature=0\n",
    "    )\n",
    "    reply = response[\"choices\"][0][\"message\"][\"content\"]\n",
    "    return reply"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f4592a82",
   "metadata": {},
   "outputs": [],
   "source": [
    "#query = \"\"\"\n",
    "#As the NDIS practitioner responsible for managing Eddie, I have not taught him any new skills or alternative behaviours for a number of reasons. Firstly, Eddie has a very limited capacity to learn due to the severity of his autism. Secondly, his behaviours are not causing any harm to himself or others, and are not significantly interfering with his ability to participate in daily activities. Thirdly, Eddie is comfortable with his current routines and any changes to these routines could cause him distress or anxiety. Fourthly, Eddie has a strong support network of family and carers who are able to manage his current behaviours effectively without the need for additional skills or behaviours to be taught. Finally, as Eddie is non-verbal and has limited communication skills, teaching him new skills or behaviours would require a significant amount of time and resources, which may not necessarily lead to any meaningful improvements in his quality of life. Based on these reasons, it is not necessary to teach Eddie any new skills or behaviours at this time.\n",
    "#\"\"\"\n",
    "#prompt = get_prompt(query)\n",
    "#response = get_response_chatgpt(prompt)\n",
    "#result_df = process_response(response, query)\n",
    "#print(response)\n",
    "#display(result_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "56d2bac8",
   "metadata": {},
   "outputs": [],
   "source": [
    "def convert_df(result_df):\n",
    "    new_df = pd.DataFrame(columns=['text', 'prediction'])\n",
    "    new_df['text'] = result_df['Phrase']\n",
    "    new_df['prediction'] = result_df.apply(lambda row: [[row['Topic'], row['Score']]], axis=1)\n",
    "    return new_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "905eaf2a",
   "metadata": {},
   "outputs": [],
   "source": [
    "topic_color_dict = {\n",
    "        'REASON': '#90EE90',\n",
    "        'NO REASON': '#F08080'\n",
    "    }\n",
    "\n",
    "def color(df, color):\n",
    "    return df.style.format({'Score': '{:,.2%}'.format}).bar(subset=['Score'], color=color)\n",
    "\n",
    "def annotate_query(highlights, query, topics):\n",
    "    ents = []\n",
    "    for h, t in zip(highlights, topics):\n",
    "        ent_dict = {}\n",
    "        for match in re.finditer(h, query, re.IGNORECASE):\n",
    "            ent_dict = {\"start\": match.start(), \"end\": match.end(), \"label\": t}\n",
    "            break\n",
    "        if len(ent_dict.keys()) > 0:\n",
    "            ents.append(ent_dict)\n",
    "    return ents\n",
    "\n",
    "def path_to_image_html(path):\n",
    "    return path\n",
    "\n",
    "passing_score = 0.75\n",
    "final_passing = 0.0\n",
    "def display_final_df(agg_df):\n",
    "    crits = [\n",
    "            'REASON'\n",
    "        ]\n",
    "    if not isinstance(agg_df, str):\n",
    "        tags = []\n",
    "        orig_crits = crits\n",
    "        crits = [x for x in crits if x in agg_df.index.tolist()]\n",
    "        bools = [agg_df.loc[crit, 'Final_Score'] > final_passing for crit in crits]\n",
    "        paths = ['YES' if x else 'NO' for x in bools]\n",
    "        df = pd.DataFrame({'Unneeded Teaching Reason': crits, 'MENTIONED': paths})\n",
    "        rem_crits = [x for x in orig_crits if x not in crits]\n",
    "        if len(rem_crits) > 0:\n",
    "            df2 = pd.DataFrame({'Unneeded Teaching Reason': rem_crits, 'MENTIONED': ['NO'] * len(rem_crits)})\n",
    "            df = pd.concat([df, df2])\n",
    "    else:\n",
    "        df = pd.DataFrame({'Unneeded Teaching Reason': [crits[0]], 'MENTIONED': ['NO']})\n",
    "    df = df.set_index('Unneeded Teaching Reason')\n",
    "    pd.set_option('display.max_colwidth', None)\n",
    "    display(HTML('<div style=\"text-align: center;\">' + df.to_html(classes=[\"align-center\"], index=True, escape=False ,formatters=dict(MENTIONED=path_to_image_html)) + '</div>'))\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2c6e9fe7",
   "metadata": {},
   "source": [
    "### Please indicate why the teaching of a new skill, alternative behaviour or functionally equivalent replacement behaviour is not needed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "76dd8cab",
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "#demo with Voila\n",
    "\n",
    "bhvr_label = widgets.Label(value='Please type your answer:')\n",
    "bhvr_text_input = widgets.Textarea(\n",
    "    value='',\n",
    "    placeholder='Type your answer',\n",
    "    description='',\n",
    "    disabled=False,\n",
    "    layout={'height': '300px', 'width': '90%'}\n",
    ")\n",
    "\n",
    "bhvr_nlp_btn = widgets.Button(\n",
    "    description='Score Answer',\n",
    "    disabled=False,\n",
    "    button_style='success', # 'success', 'info', 'warning', 'danger' or ''\n",
    "    tooltip='Score Answer',\n",
    "    icon='check',\n",
    "    layout={'height': '70px', 'width': '250px'}\n",
    ")\n",
    "bhvr_agr_btn = widgets.Button(\n",
    "    description='Validate Data',\n",
    "    disabled=False,\n",
    "    button_style='success', # 'success', 'info', 'warning', 'danger' or ''\n",
    "    tooltip='Validate Data',\n",
    "    icon='check',\n",
    "    layout={'height': '70px', 'width': '250px'}\n",
    ")\n",
    "bhvr_eval_btn = widgets.Button(\n",
    "    description='Evaluate Model',\n",
    "    disabled=False,\n",
    "    button_style='success', # 'success', 'info', 'warning', 'danger' or ''\n",
    "    tooltip='Evaluate Model',\n",
    "    icon='check',\n",
    "    layout={'height': '70px', 'width': '250px'}\n",
    ")\n",
    "btn_box = widgets.HBox([bhvr_nlp_btn, bhvr_agr_btn, bhvr_eval_btn], \n",
    "                       layout={'width': '100%', 'height': '160%'})\n",
    "bhvr_outt = widgets.Output()\n",
    "bhvr_outt.layout.height = '100%'\n",
    "bhvr_outt.layout.width = '100%'\n",
    "bhvr_box = widgets.VBox([bhvr_text_input, btn_box, bhvr_outt], \n",
    "                   layout={'width': '100%', 'height': '160%'})\n",
    "dataset_rg_name = 'pbsp-page4-unneeded-teaching-reason-argilla-ds'\n",
    "agrilla_df = None\n",
    "annotated = False\n",
    "def on_bhvr_button_next(b):\n",
    "    global agrilla_df\n",
    "    with bhvr_outt:\n",
    "        clear_output()\n",
    "        query = bhvr_text_input.value\n",
    "        prompt = get_prompt(query)\n",
    "        response = get_response_chatgpt(prompt)\n",
    "        result_df = process_response(response, query)\n",
    "        sub_result_df = result_df[(result_df['Score'] >= passing_score) & (result_df['Topic'] != 'NO REASON')]\n",
    "        sub_2_result_df = result_df[result_df['Topic'] == 'NO REASON']\n",
    "        highlights = []\n",
    "        if len(sub_result_df) > 0:\n",
    "            highlights = sub_result_df['Phrase'].tolist()\n",
    "            highlight_topics = sub_result_df['Topic'].tolist()    \n",
    "            ents = annotate_query(highlights, query, highlight_topics)\n",
    "            colors = {}\n",
    "            for ent, ht in zip(ents, highlight_topics):\n",
    "                colors[ent['label']] = topic_color_dict[ht]\n",
    "\n",
    "            ex = [{\"text\": query,\n",
    "                   \"ents\": ents,\n",
    "                   \"title\": None}]\n",
    "            title = \"Unneeded Teaching Reason Highlights\"\n",
    "            display(HTML(f'<center><h1>{title}</h1></center>'))\n",
    "            html = displacy.render(ex, style=\"ent\", manual=True, jupyter=True, options={'colors': colors})\n",
    "            display(HTML(html))\n",
    "            title = \"Unneeded Teaching Reason Classifications\"\n",
    "            display(HTML(f'<center><h1>{title}</h1></center>'))\n",
    "            for top in topic_color_dict.keys():\n",
    "                top_result_df = sub_result_df[sub_result_df['Topic'] == top]\n",
    "                if len(top_result_df) > 0:\n",
    "                    top_result_df = top_result_df.sort_values(by='Score', ascending=False).reset_index(drop=True)\n",
    "                    top_result_df = top_result_df.set_index('Phrase')\n",
    "                    top_result_df = top_result_df[['Score']]\n",
    "                    display(HTML(\n",
    "                        f'<left><h2 style=\"text-decoration: underline; text-decoration-color:{topic_color_dict[top]};\">{top}</h2></left>'))\n",
    "                    display(color(top_result_df, topic_color_dict[top]))\n",
    "            \n",
    "            agg_df = sub_result_df.groupby('Topic')['Score'].sum()\n",
    "            agg_df = agg_df.to_frame()\n",
    "            agg_df.index.name = 'Topic'\n",
    "            agg_df.columns = ['Total Score']\n",
    "            agg_df = agg_df.assign(\n",
    "                Final_Score=lambda x: x['Total Score'] / x['Total Score'].sum() * 100.00\n",
    "            )\n",
    "            agg_df = agg_df.sort_values(by='Final_Score', ascending=False)\n",
    "            agg_df['Topic'] = agg_df.index\n",
    "            rem_topics= [x for x in list(topic_color_dict.keys()) if not x in agg_df.Topic.tolist()]\n",
    "            if len(rem_topics) > 0:\n",
    "                rem_agg_df = pd.DataFrame({'Topic': rem_topics, 'Final_Score': 0.0, 'Total Score': 0.0})\n",
    "                agg_df = pd.concat([agg_df, rem_agg_df])\n",
    "            title = \"Final Scores\"\n",
    "            display(HTML(f'<left><h1>{title}</h1></left>'))\n",
    "            display_final_df(agg_df)\n",
    "            if len(sub_2_result_df) > 0:\n",
    "                sub_result_df = pd.concat([sub_result_df, sub_2_result_df]).reset_index(drop=True)\n",
    "            agrilla_df = sub_result_df.copy()\n",
    "        else:\n",
    "            print(query)\n",
    "            display_final_df('None')\n",
    "            if len(sub_2_result_df) > 0:\n",
    "                agrilla_df = sub_2_result_df.copy()\n",
    "\n",
    "def on_agr_button_next(b):\n",
    "    global agrilla_df, annotated\n",
    "    with bhvr_outt:\n",
    "        clear_output()\n",
    "        if agrilla_df is not None:\n",
    "            # convert the dataframe to the structure accepted by argilla\n",
    "            converted_df = convert_df(agrilla_df)\n",
    "            # convert pandas dataframe to DatasetForTextClassification\n",
    "            dataset_rg = rg.DatasetForTextClassification.from_pandas(converted_df)\n",
    "            # delete the old DatasetForTextClassification from the Argilla web app if exists\n",
    "            rg.delete(dataset_rg_name, workspace=\"admin\")\n",
    "            # load the new DatasetForTextClassification into the Argilla web app\n",
    "            rg.log(dataset_rg, name=dataset_rg_name, workspace=\"admin\")\n",
    "            # Make sure all classes are present for annotation\n",
    "            rg_settings = rg.TextClassificationSettings(label_schema=list(topic_color_dict.keys()))\n",
    "            rg.configure_dataset(name=dataset_rg_name, workspace=\"admin\", settings=rg_settings)\n",
    "            annotated = True\n",
    "        else:\n",
    "            display(Markdown(\"<h2 style='color:red; text-align:center;'>Please score the answer first!</h2>\"))\n",
    "            \n",
    "def on_eval_button_next(b):\n",
    "    global annotated\n",
    "    with bhvr_outt:\n",
    "        clear_output()\n",
    "        if annotated:\n",
    "            display(f1(dataset_rg_name).visualize())\n",
    "        else:\n",
    "            display(Markdown(\"<h2 style='color:red; text-align:center;'>Please score the answer and validate the data first!</h2>\"))\n",
    "\n",
    "bhvr_nlp_btn.on_click(on_bhvr_button_next)\n",
    "bhvr_agr_btn.on_click(on_agr_button_next)\n",
    "bhvr_eval_btn.on_click(on_eval_button_next)\n",
    "\n",
    "display(bhvr_label, bhvr_box)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a2e51901",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "hide_input": false,
  "kernelspec": {
   "display_name": "Python 3.9 (Argilla Trainer)",
   "language": "python",
   "name": "argilla_trainer"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": false,
   "sideBar": true,
   "skip_h1_title": true,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {
    "height": "calc(100% - 180px)",
    "left": "10px",
    "top": "150px",
    "width": "258.097px"
   },
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}