{ "cells": [ { "cell_type": "markdown", "id": "f56cc5ad", "metadata": {}, "source": [ "# NDIS Project - OpenAI - PBSP Scoring - Page 3 - Early Warning Signs" ] }, { "cell_type": "code", "execution_count": null, "id": "a8d844ea", "metadata": { "hide_input": false }, "outputs": [], "source": [ "import openai\n", "import re\n", "from ipywidgets import interact\n", "import ipywidgets as widgets\n", "from IPython.display import display, clear_output, Javascript, HTML, Markdown\n", "import matplotlib.pyplot as plt\n", "import matplotlib.ticker as mtick\n", "import json\n", "import spacy\n", "from spacy import displacy\n", "from dotenv import load_dotenv\n", "import pandas as pd\n", "import argilla as rg\n", "from argilla.metrics.text_classification import f1\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "%matplotlib inline\n", "pd.set_option('display.max_rows', 500)\n", "pd.set_option('display.max_colwidth', 10000)\n", "pd.set_option('display.width', 10000)" ] }, { "cell_type": "code", "execution_count": null, "id": "96b83a1d", "metadata": {}, "outputs": [], "source": [ "#initializations\n", "openai.api_key = os.environ['API_KEY']\n", "openai.api_base = os.environ['API_BASE']\n", "openai.api_type = os.environ['API_TYPE']\n", "openai.api_version = os.environ['API_VERSION']\n", "deployment_name = os.environ['DEPLOYMENT_ID']\n", "\n", "#argilla\n", "rg.init(\n", " api_url=os.environ[\"ARGILLA_API_URL\"],\n", " api_key=os.environ[\"ARGILLA_API_KEY\"]\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "02fda761", "metadata": {}, "outputs": [], "source": [ "def process_response(response):\n", " sentences = []\n", " topics = []\n", " scores = []\n", " lines = response.strip().split(\"\\n\")\n", " for line in lines:\n", " if \"Physical signs:\" in line:\n", " topic = \"PHYSICAL SIGNS\"\n", " elif \"Verbal signs:\" in line:\n", " topic = \"VERBAL SIGNS\"\n", " elif \"None:\" in line:\n", " topic = \"NONE\"\n", " else:\n", " try:\n", " phrase = line.split(\"(Confidence Score:\")[0].strip()\n", " score = float(line.split(\"(Confidence Score:\")[1].strip().replace(\")\", \"\"))\n", " sentences.append(phrase)\n", " topics.append(topic)\n", " scores.append(score)\n", " except:\n", " pass\n", " result_df = pd.DataFrame({'Phrase': sentences, 'Topic': topics, 'Score': scores})\n", " result_df['Phrase'] = result_df['Phrase'].str.replace('\\d+\\.', '', regex=True)\n", " result_df['Phrase'] = result_df['Phrase'].str.replace('^\\s', '', regex=True)\n", " sub_result_df = result_df[result_df['Score'] >= 0.8]\n", " null_df = result_df[result_df['Topic'] == \"NONE\"]\n", " if len(null_df) > 0:\n", " result_df = pd.concat([sub_result_df, null_df]).drop_duplicates().reset_index(drop=True)\n", " else:\n", " result_df = sub_result_df.reset_index(drop=True)\n", " return result_df" ] }, { "cell_type": "code", "execution_count": null, "id": "714fafb4", "metadata": {}, "outputs": [], "source": [ "def get_prompt(query):\n", " prompt = f\"\"\"\n", " The practitioner paragraph below was found in a Behaviour Support Plan (BSP) and may contain one or more phrases that describe observable physical and/or verbal early warning signs, which the person with disability exhibits, and may indicate that he/she is likely to display challenging behaviours.\n", "\n", " Practitioner Paragraph:\n", " {query}\n", "\n", " Task:\n", " You are an expert Behaviour Support Practitioner. Your task is to use the practitioner paragraph above to find and extract the phrase(s), if any, that describe observable physical and/or verbal early warning signs, which the person with disability exhibits, and may indicate that he/she is likely to display challenging behaviours. \n", " \n", " Requirements:\n", " You MUST follow all the requirements below: \n", " - Provide phrases that exactly match the text in the practitioner paragraph and do not deviate from it.\n", " - There must not be any phrase in your answer that does not exist the practitioner paragraph.\n", " - Provide your answer in a numbered list. \n", " - All the phrases in your answer must be exact substrings in the practitioner paragraph. without changing any characters.\n", " - All the upper case and lower case characters in the phrases in your answer must match the upper case and lower case characters in the practitioner paragraph.\n", " - Start numbering the phrases under each early warning signs group (Physical, Verbal) from number 1.\n", " - Start each list of phrases with these group titles: \"Physical signs:\", \"Verbal signs:\". \n", " - For each phrase that belongs to any of the above group (Physical, Verbal), provide a confidence score that ranges between 0.50 and 1.00, where a score of 0.50 means you are very weakly confident that the phrase belongs to that specific group, whereas a score of 1.00 means you are very strongly confident that the phrase belongs to that specific group.\n", " - Never include any phrase that does not exist in the practitioner paragraph. \n", " - Include a final numbered list titled \"None:\", which include all the remaining phrases from the practitioner paragraph above that do not represent any physical or verbal early warning signs. Provide a confidence score for each of these phrases as well.\n", "\n", " \n", " Useful Information:\n", " There are two main groups to classify early warning signs. Here are the two groups along with examples of early warning signs that could be belong to each:\n", " Physical signs: These are the observable behaviours that the person displays with their body language. Some examples include:\n", " Clenching fists\n", " Pacing back and forth\n", " Rapid breathing\n", " Reddening of the face or neck\n", " Stomping feet\n", " Verbal signs: These are the observable things that the person says or the tone in which they say them. Some examples include:\n", " Swearing or using inappropriate language\n", " Yelling or screaming\n", " Making threatening statements\n", " Refusing to communicate\n", " Crying or whimpering\n", "\n", " Example correct answer:\n", "\n", " Physical signs:\n", " 1. may pick or flick at his fingernails/cuticles (Confidence Score: 0.95)\n", " 2. biting his nails or pinching himself (Confidence Score: 0.93)\n", "\n", " Verbal signs:\n", " 1. is screaming in a repetitive pattern (Confidence Score: 0.97)\n", " 2. threats of harming staff, others or himself. (Confidence Score: 0.88)\n", " \n", " None:\n", " 1. Eddie is a 22-year old man who may exhibit some early warning signs. (Confidence Score: 0.99)\n", " 2. Eddie may sit on the coach whenever he wants to have a meal. (Confidence Score: 0.90)\n", " \"\"\"\n", "\n", " return prompt" ] }, { "cell_type": "code", "execution_count": null, "id": "9e23821b", "metadata": {}, "outputs": [], "source": [ "def get_response_chatgpt(prompt):\n", " response=openai.ChatCompletion.create( \n", " engine=deployment_name, \n", " messages=[ \n", " {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"}, \n", " {\"role\": \"user\", \"content\": prompt} \n", " ],\n", " temperature=0\n", " )\n", " reply = response[\"choices\"][0][\"message\"][\"content\"]\n", " return reply" ] }, { "cell_type": "code", "execution_count": null, "id": "983765bc", "metadata": {}, "outputs": [], "source": [ "def convert_df(result_df):\n", " new_df = pd.DataFrame(columns=['text', 'prediction'])\n", " new_df['text'] = result_df['Phrase']\n", " new_df['prediction'] = result_df.apply(lambda row: [[row['Topic'], row['Score']]], axis=1)\n", " return new_df" ] }, { "cell_type": "code", "execution_count": null, "id": "905eaf2a", "metadata": {}, "outputs": [], "source": [ "topic_color_dict = {\n", " 'PHYSICAL SIGNS': '#90EE90',\n", " 'VERBAL SIGNS': '#FF69B4',\n", " 'NONE': '#CCCCCC'\n", " }\n", "\n", "def color(df, color):\n", " return df.style.format({'Score': '{:,.2%}'.format}).bar(subset=['Score'], color=color)\n", "\n", "def annotate_query(highlights, query, topics):\n", " ents = []\n", " for h, t in zip(highlights, topics):\n", " ent_dict = {}\n", " for match in re.finditer(h, query, re.IGNORECASE):\n", " ent_dict = {\"start\": match.start(), \"end\": match.end(), \"label\": t}\n", " break\n", " if len(ent_dict.keys()) > 0:\n", " ents.append(ent_dict)\n", " return ents\n", "\n", "def path_to_image_html(path):\n", " return ''\n", "\n", "passing_score = 0.8\n", "final_passing = 0.0\n", "def display_final_df(agg_df):\n", " tags = []\n", " crits = [\n", " 'PHYSICAL SIGNS',\n", " 'VERBAL SIGNS'\n", " ]\n", " orig_crits = crits\n", " crits = [x for x in crits if x in agg_df.index.tolist()]\n", " bools = [agg_df.loc[crit, 'Final_Score'] > final_passing for crit in crits]\n", " paths = ['./thumbs_up.png' if x else './thumbs_down.png' for x in bools]\n", " df = pd.DataFrame({'Early Warning Sign Categories': crits, 'USED': paths})\n", " rem_crits = [x for x in orig_crits if x not in crits]\n", " if len(rem_crits) > 0:\n", " df2 = pd.DataFrame({'Early Warning Sign Categories': rem_crits, 'USED': ['./thumbs_down.png'] * len(rem_crits)})\n", " df = pd.concat([df, df2])\n", " df = df.set_index('Early Warning Sign Categories')\n", " pd.set_option('display.max_colwidth', None)\n", " display(HTML('