{ "cells": [ { "cell_type": "markdown", "id": "f56cc5ad", "metadata": {}, "source": [ "# NDIS Project - OpenAI - PBSP Scoring - Page 4 - Quality of Life Strategies" ] }, { "cell_type": "code", "execution_count": null, "id": "a8d844ea", "metadata": { "hide_input": false }, "outputs": [], "source": [ "import openai\n", "import re\n", "import string\n", "from ipywidgets import interact\n", "import ipywidgets as widgets\n", "from IPython.display import display, clear_output, Javascript, HTML, Markdown\n", "import matplotlib.pyplot as plt\n", "import matplotlib.ticker as mtick\n", "import json\n", "import spacy\n", "from spacy import displacy\n", "from dotenv import load_dotenv\n", "import pandas as pd\n", "import argilla as rg\n", "from argilla.metrics.text_classification import f1\n", "from typing import Dict\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "%matplotlib inline\n", "pd.set_option('display.max_rows', 500)\n", "pd.set_option('display.max_colwidth', 10000)\n", "pd.set_option('display.width', 10000)" ] }, { "cell_type": "code", "execution_count": null, "id": "96b83a1d", "metadata": {}, "outputs": [], "source": [ "#initializations\n", "openai.api_key = os.environ['API_KEY']\n", "openai.api_base = os.environ['API_BASE']\n", "openai.api_type = os.environ['API_TYPE']\n", "openai.api_version = os.environ['API_VERSION']\n", "deployment_name = os.environ['DEPLOYMENT_ID']\n", "\n", "#argilla\n", "rg.init(\n", " api_url=os.environ[\"ARGILLA_API_URL\"],\n", " api_key=os.environ[\"ARGILLA_API_KEY\"]\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "dee25d82", "metadata": {}, "outputs": [], "source": [ "#sentence extraction\n", "def extract_sentences(paragraph):\n", " symbols = ['\\\\.', '!', '\\\\?', ';', ':', ',', '\\\\_', '\\n', '\\\\-']\n", " pattern = '|'.join([f'{symbol}' for symbol in symbols])\n", " sentences = re.split(pattern, paragraph)\n", " sentences = [sentence.strip() for sentence in sentences if sentence.strip()]\n", " return sentences" ] }, { "cell_type": "code", "execution_count": null, "id": "08365143", "metadata": {}, "outputs": [], "source": [ "def filter_dataframe(result_df, paragraph):\n", " filtered_df = result_df[result_df['Phrase'].apply(lambda x: x.lower().translate(str.maketrans(\"\", \"\", string.punctuation)) in paragraph.lower().translate(str.maketrans(\"\", \"\", string.punctuation)) or \n", " x.lower().translate(str.maketrans(\"\", \"\", string.punctuation)).replace(\"’s\",\"s'\") in paragraph.lower().translate(str.maketrans(\"\", \"\", string.punctuation)))]\n", " filtered_df['Match_Percentage'] = filtered_df.apply(lambda row: len(set(row['Phrase'].lower()) & set(paragraph.lower())) / len(set(row['Phrase'].lower())), axis=1)\n", " filtered_df = filtered_df[filtered_df['Match_Percentage'] >= 0.2]\n", " filtered_df = filtered_df.drop(['Match_Percentage'], axis=1)\n", " if len(filtered_df) == 0:\n", " filtered_df = result_df\n", " filtered_df = filtered_df.drop_duplicates()\n", " return filtered_df" ] }, { "cell_type": "code", "execution_count": null, "id": "02fda761", "metadata": {}, "outputs": [], "source": [ "def process_response(response, query):\n", " sentences = []\n", " topics = []\n", " scores = []\n", " lines = response.strip().split(\"\\n\")\n", " topic = None\n", " for line in lines:\n", " if \"Likes/Dislikes:\" in line:\n", " topic = \"LIKES/DISLIKES\"\n", " elif \"Interests:\" in line:\n", " topic = \"INTERESTS\"\n", " elif \"Hobbies:\" in line:\n", " topic = \"HOBBIES\"\n", " elif \"Relationships:\" in line:\n", " topic = \"RELATIONSHIPS\"\n", " elif \"Employment:\" in line:\n", " topic = \"EMPLOYMENT\"\n", " elif \"Health:\" in line:\n", " topic = \"HEALTH\"\n", " elif \"Education:\" in line:\n", " topic = \"EDUCATION\"\n", " elif \"None:\" in line:\n", " topic = \"NONE\"\n", " else:\n", " try:\n", " parts = line.split(\"(Confidence Score:\")\n", " if len(parts) == 2:\n", " phrase = parts[0].strip()\n", " score = float(parts[1].strip().replace(\")\", \"\"))\n", " sentences.append(phrase)\n", " topics.append(topic)\n", " scores.append(score)\n", " except:\n", " pass\n", " result_df = pd.DataFrame({'Phrase': sentences, 'Topic': topics, 'Score': scores})\n", " try:\n", " result_df['Phrase'] = result_df['Phrase'].str.replace('\\d+\\.', '', regex=True)\n", " result_df['Phrase'] = result_df['Phrase'].str.replace('^\\s', '', regex=True)\n", " result_df['Phrase'] = result_df['Phrase'].str.strip('\"')\n", " result_df = filter_dataframe(result_df, query)\n", " except:\n", " sentences = extract_sentences(query)\n", " topics = ['NONE'] * len(sentences)\n", " scores = [0.9] * len(sentences)\n", " result_df = pd.DataFrame({'Phrase': sentences, 'Topic': topics, 'Score': scores})\n", " return result_df" ] }, { "cell_type": "code", "execution_count": null, "id": "714fafb4", "metadata": {}, "outputs": [], "source": [ "def get_prompt(query):\n", " prompt = f\"\"\"\n", " The paragraph below is written in a positive behaviour support plan by a disability practitioner to outline a set of strategies to implement that aim to enhance the person with disability's quality of life. These strategies are not related to the person with disability's target behaviours; but they are strategies related to their likes/dislikes, interests, hobbies, relationships, employment, health, and education.\n", "\n", " Practitioner Paragraph:\n", " {query}\n", "\n", " Requirement:\n", " - Identify the phrases from the practitioner paragraph above that represent each of the following quality of life strategies: \"Likes/Dislikes\", \"Interests\", \"Hobbies\", \"Relationships\", \"Employment\", \"Health\", and \"Education\". \n", "\n", " Guidelines:\n", " - \"Likes/Dislikes\": One strategy to enhance the quality of life of a person with a disability is to provide him with experiences that he enjoys and avoid experiences that he finds unpleasant. For example, if the person with a disability likes music, then encourage him to attend concerts or music festivals. On the other hand, if he dislikes crowded places, then discourage him from attending such events.\n", "\n", " - \"Interests\": One strategy to enhance the quality of life of a person with a disability is to provide opportunities for him to pursue his interests. For example, if the person with a disability is interested in animals, then encourage him to visit a zoo or volunteer at an animal shelter.\n", "\n", " - \"Hobbies\": One strategy to enhance the quality of life of a person with a disability is to facilitate access to materials or equipment needed for practicing his hobbies, or to arrange for him to participate in hobby-related events. For example, if the person with a disability enjoys painting, then provide him with art supplies or arrange for him to attend art classes.\n", "\n", " - \"Relationships\": One strategy to enhance the quality of life of a person with a disability is to support his existing relationships or facilitate the development of new relationships. For example, if the person with a disability enjoys spending time with a particular friend or family member, then support and encourage that relationship. \n", "\n", " - \"Employment\": One strategy to enhance the quality of life of a person with a disability is to identify potential job opportunities, provide job training or coaching, or modify job duties to better suit the person with a disability's abilities. For example, if the person with a disability has an interest in gardening, then find for him employment at a nursery or garden center.\n", "\n", " - \"Health\": One strategy to enhance the quality of life of a person with a disability is to facilitate access to healthcare services, encourage healthy lifestyle choices, and support the person with a disability's physical and mental well-being. For example, if the person with a disability enjoys outdoor activities, then facilitate opportunities for him to engage in those activities.\n", "\n", " - \"Education\": One strategy to enhance the quality of life of a person with a disability is to identify educational resources or programs that align with the person with a disability's interests or abilities, or provide support to pursue educational goals. For example, if the person with a disability has an interest in history, then provide him with access to historical books, documentaries, or lectures.\n", "\n", " Specifications of a correct answer:\n", " - Please provide a response that closely matches the information in the practitioner paragraph and does not deviate significantly from it.\n", " - Provide your answer in numbered lists. \n", " - All the phrases in your answer must be exact substrings in the original practitioner paragraph. without changing any characters.\n", " - All the upper case and lower case characters in the phrases in your answer must match the upper case and lower case characters in the original practitioner paragraph.\n", " - Start numbering the phrases under each quality of life strategy from number 1. \n", " - Start each list of phrases with these titles: \"Likes/Dislikes\", \"Interests\", \"Hobbies\", \"Relationships\", \"Employment\", \"Health\", \"Education\".\n", " - For each phrase that belongs to any of the above quality of life strategies, provide a confidence score that ranges between 0.50 and 1.00, where a score of 0.50 means you are very weakly confident that the phrase belongs to that specific strategy, whereas a score of 1.00 means you are very strongly confident that the phrase belongs to that specific strategy.\n", " - Never include any phrase in your answer that does not exist in the practitioner paragraph.\n", " - If none of the phrases in the practitioner paragraph belongs to a quality of life strategy, do not include this quality of life strategy in your answer.\n", " - Include a final numbered list titled \"None:\", which include all the remaining phrases from the practitioner paragraph that do not belong to any of the quality of life strategies above. Provide a confidence score for each of these phrases as well.\n", "\n", " Example answer:\n", "\n", " Likes/Dislikes:\n", " 1. I have encouraged Eddie to attend the theatre as he enjoys watching movies. (Confidence Score: 1.00)\n", " 2. I have discouraged Taylor from attending theatres as watching loud movies disturbes him. (Confidence Score: 0.95)\n", " \n", " Interests:\n", " 1. I always urge Eddie to visit a butterfly house as I've noticed his big interest in butterflies. (Confidence Score: 0.97)\n", " \n", " Hobbies:\n", " 1. I've arranged for Taylor to attend dancing classes as he told me that dancing is his greatest hobby. (Confidence Score: 0.94)\n", "\n", " Relationships:\n", " 1. I always encourage Eddie to gather with his school friends at cafes or restaurants as he enjoys spending time with them. (Confidence Score: 0.99)\n", " \n", " Employment:\n", " 1. I have been searching for librarian role at a local library for Taylor to undertake since he has shown a great interest in books. (Confidence Score: 0.94)\n", "\n", " Health:\n", " 1. I have encouraged Eddie to participate in local Marathons to improve his physical well-being. (Confidence Score: 0.99)\n", "\n", " Education:\n", " 1. I have provided Taylor with free access to fiction and drama books as he is a big fan of literature. (Confidence Score: 0.96)\n", "\n", " None:\n", " 1. I support Eddie, who is a 25-year old man with a disability. (Confidence Score: 0.96)\n", " 2. Taylor has a plenty of likes, interests and hobbies. In the following lines, I will expain the strategies I am using these to enhance his quality of life. (Confidence Score: 0.94)\n", " \"\"\"\n", " return prompt" ] }, { "cell_type": "code", "execution_count": null, "id": "9e23821b", "metadata": {}, "outputs": [], "source": [ "def get_response_chatgpt(prompt):\n", " response=openai.ChatCompletion.create( \n", " engine=deployment_name, \n", " messages=[ \n", " {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"}, \n", " {\"role\": \"user\", \"content\": prompt} \n", " ],\n", " temperature=0\n", " )\n", " reply = response[\"choices\"][0][\"message\"][\"content\"]\n", " return reply" ] }, { "cell_type": "code", "execution_count": null, "id": "983765bc", "metadata": {}, "outputs": [], "source": [ "def convert_df(result_df):\n", " new_df = pd.DataFrame(columns=['text', 'prediction'])\n", " new_df['text'] = result_df['Phrase']\n", " new_df['prediction'] = result_df.apply(lambda row: [[row['Topic'], row['Score']]], axis=1)\n", " return new_df" ] }, { "cell_type": "code", "execution_count": null, "id": "bc69cc81", "metadata": {}, "outputs": [], "source": [ "def custom_f1(data: Dict[str, float], title: str):\n", " from plotly.subplots import make_subplots\n", " import plotly.colors\n", " import random\n", "\n", " fig = make_subplots(\n", " rows=2,\n", " cols=1,\n", " subplot_titles=[ \"Overall Model Score\", \"Model Score By Category\", ],\n", " )\n", "\n", " x = ['precision', 'recall', 'f1']\n", " macro_data = [v for k, v in data.items() if \"macro\" in k]\n", " fig.add_bar(\n", " x=x,\n", " y=macro_data,\n", " row=1,\n", " col=1,\n", " )\n", " per_label = {\n", " k: v\n", " for k, v in data.items()\n", " if all(key not in k for key in [\"macro\", \"micro\", \"support\"])\n", " }\n", "\n", " num_labels = int(len(per_label.keys())/3)\n", " fixed_colors = [str(color) for color in plotly.colors.qualitative.Plotly]\n", " colors = random.sample(fixed_colors, num_labels)\n", "\n", " fig.add_bar(\n", " x=[k for k, v in per_label.items()],\n", " y=[v for k, v in per_label.items()],\n", " row=2,\n", " col=1,\n", " marker_color=[colors[int(i/3)] for i in range(0, len(per_label.keys()))]\n", " )\n", " fig.update_layout(showlegend=False, title_text=title)\n", "\n", " return fig" ] }, { "cell_type": "code", "execution_count": null, "id": "905eaf2a", "metadata": {}, "outputs": [], "source": [ "topic_color_dict = {\n", " 'LIKES/DISLIKES': '#FFCCCC',\n", " 'INTERESTS': '#CCFFFF',\n", " 'HOBBIES': '#FF69B4',\n", " 'RELATIONSHIPS': '#FFFF00',\n", " 'EMPLOYMENT': '#CCCCFF',\n", " 'HEALTH': '#FFCC99',\n", " 'EDUCATION': '#CCCCCC',\n", " 'NONE': '#F08080'\n", " }\n", "\n", "def color(df, color):\n", " return df.style.format({'Score': '{:,.2%}'.format}).bar(subset=['Score'], color=color)\n", "\n", "def annotate_query(highlights, query, topics):\n", " ents = []\n", " for h, t in zip(highlights, topics):\n", " pattern = re.escape(h)\n", " pattern = re.sub(r'\\\\(.)', r'[\\1\\\\W]*', pattern) # optional non-alphanumeric characters\n", " for match in re.finditer(pattern, query, re.IGNORECASE):\n", " ent_dict = {\"start\": match.start(), \"end\": match.end(), \"label\": t}\n", " ents.append(ent_dict)\n", " return ents\n", "\n", "def path_to_image_html(path):\n", " return ''\n", "\n", "passing_score = 0.5\n", "final_passing = 0.0\n", "def display_final_df(agg_df):\n", " tags = []\n", " crits = [\n", " 'LIKES/DISLIKES',\n", " 'INTERESTS',\n", " 'HOBBIES',\n", " 'RELATIONSHIPS',\n", " 'EMPLOYMENT',\n", " 'HEALTH',\n", " 'EDUCATION'\n", " ]\n", " orig_crits = crits\n", " crits = [x for x in crits if x in agg_df.index.tolist()]\n", " bools = [agg_df.loc[crit, 'Final_Score'] > final_passing for crit in crits]\n", " paths = ['./thumbs_up.png' if x else './thumbs_down.png' for x in bools]\n", " df = pd.DataFrame({'Quality of Life Strategy': crits, 'USED': paths})\n", " rem_crits = [x for x in orig_crits if x not in crits]\n", " if len(rem_crits) > 0:\n", " df2 = pd.DataFrame({'Quality of Life Strategy': rem_crits, 'USED': ['./thumbs_down.png'] * len(rem_crits)})\n", " df = pd.concat([df, df2])\n", " df = df.set_index('Quality of Life Strategy')\n", " pd.set_option('display.max_colwidth', None)\n", " display(HTML('