import gradio as gr |
import openai |
import requests |
import json |
import tiktoken |
import os |
encoding = tiktoken.get_encoding('cl100k_base') |
openai.api_key = os.environ.get('OPENAI_API_KEY') |
symptom = "" |
index = 0 |
prompt_count = 0 |
session_cost = 0 |
identified_symptom = [] |
included_symptom = [] |
excluded_symptom = [] |
questions_answered = [] |
questions_to_ask = [] |
output_ = [] |
data = { |
"shaking": { |
1: ["Since how long do you have this shaking problem?", ["Since birth", "Since few days", "Few weeks", "Few months", "Since many years"]], |
2: ["In what part of your body is the shaking located?", ["Hand", "Feet", "Right Half of the Body", "Left Half of the body", "Head", "Other"]], |
3: ["Are you facing any of these problems?", ["Unable to write legibly", "Difficulty in talking", "Difficulty in combing hair", "Unable to eat food properly", "Unable to hold cup or glass without spilling", "Getting up from sitting position"]], |
4: ["What makes the shaking better?", ["Rest", "Alcohol", "Walking", "Medications", "Activity", "Other"]], |
5: ["Are you facing any of these issues?", ["Muscle Fatigue", "Low Blood Sugar Levels", "Stress", "Cold Weather", "Hot Weather", "Difficulty in movement", "Difficulty in writing"]], |
6: ["Do you have any other related problems?", ["Weakness in walking", "Yes-Yes or No-No Head Movement/Nodding", "Unsteady Gait", "Hands affected", "Stooped Posture", "Slow Movement", "Chin affected", "Legs affected", "Other"]] |
}, |
"fever": { |
1: ["Since how long you are having fever?", ["Every seasonal change", "Recently 1-2 days", "1-2 weeks", "1 month", "Since last night"]], |
2: ["How did the fever start?", ["Slowly", "Suddenly"]], |
3: ["How is your fever?", ["High fever with shivering", "High fever with chills", "High fever with chills & Shivering", "Low grade fever"]], |
4: ["What is your temperature? (deg F)", ["less than 98.4", "99-100", "100-101", "101-102", "102-103", "more than 103"]], |
5: ["Select the relevant reason for fever", ["Heat Exhaustion / Sunburn / Sunstroke", "Insect Bite", "Abdominal discomfort", "Blood Donation / Receiving Blood", "Burning urination", "Any surgery", "Ear / Nose Piercing", "Animal Scratch", "Accident", "Tattoo", "Travel in last 1 month", "Immunization / Vaccine Shots", "Dental Treatment", "Hospitalization in last 6 Months", "Any Bleeding", "Unknown Infection", "Other symptoms"]], |
6: ["Do you have any other related problems?", ["Loss of Appetite", "Headache", "Sweating", "Weakness", "Increased sensitivity to pain", "Cough", "Common Cold", "Palpitations", "Dizziness", "Chest Pain", "Perspiration", "Lack energy", "Feel sleepy", "Confusion", "Difficulty in concentration", "Muscle Pain", "Runny Nose", "Wheezing", "Night Sweating", "Discomfort", "Pressure, Tightness in the Chest", "Fainting (Syncope) or near fainting", "Racing Heartbeats (Tachycardia)", "Slow Heartbeats (Bradycardia)", "Abdominal Pain", "Diarrhoea", "Vomiting", "Burning in urination", "Pain in throat", "Skin Lesion", "Swelling", "Joint Pain", "Shortness of breath", "Others"]] |
}, |
"diarrhoea": { |
1: ["Since how long have you been experiencing loose motions?", ["Few Minutes", "Few Hours", "Few days", "Few weeks", "Few months", "Few years", "Since birth"]], |
2: ["How does the condition start?", ["Slowly", "Suddenly"]], |
3: ["How many times do you pass stool?", ["One to Two times a Day", "Three to four times a day", "Four to six times a day", "More than 6 times a day"]], |
4: ["What is the consistency of the stool?", ["Loose", "Watery", "Nomal"]], |
5: ["Do you have blood in your stool?", ["Yes", "No"]], |
6: ["What causes an increase in diarrhea among the following options?", ["Food", "Drugs", "Tea", "Alcohol"]], |
7: ["Do you have any other related problems?", ["Loose Motions", "Cramps", "Dizziness", "Weakness", "Feeling like Vomiting", "Stiff Neck", "Bloating in Stomach", "Jaundice", "Headache", "Unconsciousness", "Chest Pain", "Irregular heartbeat", "Heart Burn", "Excessive Burps", "Vomiting", "Gas in Stomach / Flatulence", "Abdominal Pain", "Constipation", "Tarry Stools", "Weight Loss", "Swollen Lymph Nodes", "History of Iron tablets", "Other Symptoms"]] |
}, |
"generalized weakness": { |
1: ["Since when are you experiencing weakness?", ["Few Minutes", "Few Hours", "Few days", "Few weeks", "Few months", "Few years"]], |
2: ["How did the symptom start?", ["Slowly", "Suddenly"]], |
3: ["How weak do you feel?", ["A little weak", "Very weak"]], |
4: ["What increases the weakness?", ["Walking", "Climbing the stairs", "Talking", "Other Activity", "Sitting", "Running", "Lying on back", "Prone"]], |
5: ["When do you experience weakness the most?", ["Walking", "Climbing the stairs", "Talking", "Other Activity", "Sitting", "Running", "Lying on back", "Prone", "Hunger"]], |
6: ["Do you have any other symptoms or problems?",["Breathlessness", "Fever", "Cough", "Weight loss", "Loss of hair", "Weight Gain", "Decrease Appetite", "Generalized body aches", "Other symptoms"]] |
}, |
"irregular periods": { |
1: ["Since when are you having irregular periods?", ["Few days", "Few weeks", "Few months", "Few years"]], |
2: ["How did the symptom start?", ["Slowly", "Suddenly"]], |
3: ["How long does your periods typically last?", ["Less than 3 days", "3-7 days", "More than 7 days"]], |
4: ["How often do you change your pad in a day?", ["Few times a day", "Too many times"]], |
5: ["Did you miss three or more periods in a row?", ["I do", "No, I don't"]], |
6: ["What do you believe caused this problem among the following reasons?", ["IUCD Use", "Lactation", "Drug intake", "Medical illness"]], |
7: ["Do you have any other symptoms or problems?", ["Itching of vagina", "Pain in vagina", "Lower abdominal pain", "Difficulty in urination", "Other symptoms"]] |
} |
} |
symptom_list = ["shaking", "fever", "diarrhoea", "generalized weakness", "irregular periods"] |
new_data = {} |
def ask_gpt(prompt, model="gpt-3.5-turbo", temperature=0): |
global session_cost |
global prompt_count |
messages = [{"role": "user", "content": prompt}] |
response = openai.ChatCompletion.create( |
model=model, |
messages=messages, |
temperature=temperature, |
) |
js = response.choices[0].message["content"] |
prompt_count += 1 |
session_cost += len(encoding.encode(prompt)) * len(identified_symptom) * 0.00017 |
session_cost += len(encoding.encode(js)) * 0.00017 |
session_cost = round(session_cost, 2) |
return js |
def submit(input_text_box, output_text_box, submit_button, radio_button, next_button, session_cost_text_box, raw_output_text_box): |
global q_no |
global new_data |
global included_symptom |
global excluded_symptom |
global identified_symptom |
global prompt_count |
global session_cost |
global symptom_list |
global questions_answered |
global questions_to_ask |
included_symptom = [] |
excluded_symptom = [] |
if(input_text_box == ""): |
exit() |
prompt_count = 0 |
session_cost = 0 |
prompt_1 = '''Task: Perform Named Entity Recognition and provide output in JSON format |
\nInstructions: |
\n1. Process and analyse the <input_text_box> to identify symptom names. There could be more than 1 symptom. |
\n A symptom is a subjective indication of a health condition or disease experienced by an individual. |
\n2. Consider variations in the appearance of keywords or phrases, such as singular/plural forms, different cases, similar meaning. Rephrase the identified symptoms to match <symptom_list> |
\n3. For the symptoms that match, fetch their corresponding values from the <symptom_list> and store. |
\n4. Ensure all stored values are in lowercase and there are no spaces leading or trailing the values, in the list created. Make changes as desired to meet this. |
\n5. Display output only in json format. Example: { "identified": ["1","2"]} |
\n6. If the output is not in JSON format, respond with 'ERROR' |
\n7. If the output contains symptoms that are not in symptom_list, do not show them. Instead, find their closest match from <symptom_list> and show them. Re-do the output as required. |
\n\nThe reference to values mentioned under angle brackets <> are below: |
\n<input_text_box>: '''+ str(input_text_box) + "\n<symptom_list>: " + str(symptom_list) |
output = ask_gpt(prompt_1) |
identified_symptom = json.loads(output)["identified"] |
for item in identified_symptom: |
if item in data: |
included_symptom.append(item) |
else: |
excluded_symptom.append(item) |
questions = [] |
values = [] |
for item in included_symptom: |
key=0 |
questions = [] |
values = [] |
for sub_item in data[item]: |
key = key + 1 |
questions.append(data[item][key][0]) |
values.append(data[item][key][1]) |
prompt_2 = '''" |
\nAs a user, I will provide a text to you with my health concerns in the <input_text_box>. |
\nAs a user, expect me to provide may contain symptoms, synonyms, and their characteristics. |
\nAs a user, expect my input to be incomplete, contain gibberish, be blank, or have variations such as singular/plural form, different cases, spelling mistakes, similar meaning, synonyms, etc |
\n |
\nAs a system, you are a world renowned medical practitioner with 40 years of experience. |
\nAs a system, you must process <input_text_box> and need to match against <questions> and/or <values>. |
\nAs a system, you must not assume anything that is not present in the <input_text_box>. |
\nAs a system, you must not try to process information that does not match any of the instructions given to you (above or below) |
\nAs a system, your output must be in form of a python dictionary in below structure: |
\n |
{ |
"symptom": { |
"questions_answered": { |
1: ["question 1", "matched value for question 1"], |
2: ["question 3", "matched value for question 3"] |
}, |
"questions_to_ask": { |
1: ["question 2", [all values for question 2]], |
2: ["question 4", [all values for question 4]] |
} |
} |
} |
\nAs a system, you must analyse the provided <input_text_box> for to identify keywords or phrases that correspond to each set of <values> for each <questions> related to the symptom <item>; and find a match. |
\nAs a system, if you find a match and if the matched value or synonym of the matched value is present under the list <values>, add the corresponding question and its values from the list <values> that it matched with as a key in section 'questions_answered' |
\nAs a system, if you find a match and if the matched value or synonym of the matched value is not present under the list <values>, then ignore it and do not consider it as a match. Add the corresponding question and all its possible values as per the list <values> in section 'questions_to_ask' |
\nAs a system, if you do not find a match, then you will not assume <values> for <questions>. |
\nAs a system, if you do not find a match, then you will add the corresponding question and all its possible values in section 'questions_to_ask' |
\nAs a system, if you do not find a match, then you will NOT add the corresponding question and assumed value in section 'questions_answered' |
\nAs a system, you have to ensure any question appears only under one of the sections but never in both. |
\nAs a system, you have to ensure that the keys in each section of the dictionary starts from 1 and are sequential thereafter. |
\nAs a system, you must replace the placeholder "symptom" with the term <item> in the dictionary. |
\nAs a system, you must also ensure for symptom <item>, the length of section "questions_answered" + the length of section "questions_to_ask" does not exceed the length of total questions to be asked as per <questions> |
\nAs a system, the output you provide must only contain the dictionary and no text such as 'Here is the python dictionary created based on the provided instructions:' or similar, as a piece of code has to further process the dictionary. |
\nAs a system, you understand it is ok to have section 'questions_answered' as blank if there is nothing to put in the section |
\nAs a system, you understand it is ok to have section 'questions_to_ask' as blank if there is nothing to put in the section |
\nAs a system, you need to ensure that any value mentioned under section 'questions_answered' must be from the list <values> for respective questions. If that is not the case, you must remove the respective key from the section 'questions_answered' and must move it to section 'questions_to_ask' without the value but with all possible values as per the list <values> for the respective question |
\nAs a system, you will re-do the output to fit all instructions or as needed |
\n\nNote: The references mentioned under angle brackets <> are below: |
'''+ "\n<input_text_box>: " + str(input_text_box) + "\n<item>: " + str(item) + "\n<questions>: " + str(questions) + "\n<values>: " + str(values) |
prompt_3='''" |
You are a world renowned medical practitioner with 40 years of experience. The user needs your help to identify characteristics associated with the given symptom. As an expert, you are also ethically responsible to follow only the instructions given to you and not make up your own values if they are not mentioned in the <input_text_box> |
Instructions: |
\n1. Analyze the provided <input_text_box> to identify keywords or phrases that correspond to each set of <values> for each <question> related to the symptom <item>. |
\n2. Consider variations in the appearance of keywords or phrases, such as singular/plural forms, different cases, similar meaning and/or synonyms |
\nExample 1: Hands are swollen = Hand Swelling = Swelling |
\nExample 2: Upset stomach = Loose Motions = a characteristic of diarrhoea |
\n3. If a match is found, add the corresponding question and its matched value as a key in section 'questions_answered' in the dictionary. |
If a match is not found, add the corresponding question and all its possible values as a key in section 'questions_to_ask' in the dictionary. |
\n5. Ensure that a question appears only under one section. Eether under section: 'questions_answered' or under section: 'questions_to_ask' in the dictionary, but NEVER in both. |
\n6. Ensure keys in each section in the dictionary start from 1 and are sequential |
\n7. Replace the placeholder "symptom" with the term <item> in the dictionary. |
\n8. Re-do output as needed. |
\n9. The final output should be a Python dictionary following this structure. |
{ |
"symptom": { |
"questions_answered": { |
1: ["question 1", "matched value for question 1"], |
2: ["question 3", "matched value for question 3"] |
}, |
"questions_to_ask": { |
1: ["question 2", [all values for question 2]], |
2: ["question 4", [all values for question 4]] |
} |
} |
} |
\n10. For symptom <item>, the length of section "questions_answered" + the length of section "questions_to_ask" must not exceed the length of total questions to be asked as per <questions> |
\n11. Do not assume answers for the sake of this activity. Comply to all points above strictly. |
\n12. Re-do the output as needed. |
\n13. The references mentioned under angle brackets <> are below: |
'''+ "\n<input_text_box>: " + str(input_text_box) + "\n<item>: " + str(item) + "\n<questions>: " + str(questions) + "\n<values>: " + str(values) |
output = ask_gpt(prompt_2) |
output_.append(output) |
new_data.update(eval(output)) |
next_button = gr.update(value="Start",visible=True, interactive=True) |
total_questions = 0 |
questions_answered = [] |
questions_to_ask = [] |
for item in included_symptom: |
questions_answered.extend(new_data[item]["questions_answered"]) |
questions_to_ask.extend(new_data[item]["questions_to_ask"]) |
total_questions = total_questions + len(questions_answered) + len(questions_to_ask) |
raw_output_text_box = gr.update(label = "Raw Output: ", value=json.dumps(new_data, indent=4)) |
output_text_box = gr.update(value="Identified Symptoms: " + str(len(identified_symptom)) + "\n(" + ',\t'.join(identified_symptom) + ")\n\nSymptoms Processed: " + str(len(included_symptom)) + "\n(" + ',\t'.join(included_symptom) + ")\n\nSymptoms Not Processed: " + str(len(excluded_symptom)) + "\n(" + ',\t'.join(excluded_symptom) + ")\n\nTotal Questions: " + str(total_questions) + "\nAnswered: " + str(len(questions_answered)) + "\nUnanswered: " + str(len(questions_to_ask))) |
session_cost_text_box = gr.update(value = "Prompts: " + str(prompt_count) +"\t|\tINR Cost: " + str(session_cost)) |
radio_button = gr.update(visible=False) |
return (output_text_box, submit_button, radio_button, next_button, session_cost_text_box, raw_output_text_box) |
def next_question(radio_button, next_button, output_text_box, raw_output_text_box): |
global index |
global q_no |
global included_symptom |
global output |
global new_data |
global questions_answered |
global questions_to_ask |
if(index >= len(list(new_data.keys()))): |
radio_button = gr.update(visible=False) |
next_button = gr.update(visible=False) |
raw_output_text_box = gr.update(label = "Raw Output: ", value=json.dumps(new_data, indent=4)) |
return (radio_button, next_button, output_text_box, raw_output_text_box, original_data_text_box) |
else: |
if(next_button!="Start"): |
if(len(new_data[included_symptom[index]]["questions_answered"].keys()) > 0): |
moved_question = new_data[included_symptom[index]]["questions_to_ask"].pop(1) |
if(len(new_data[included_symptom[index]]["questions_answered"].keys()) <= 0): |
next_key = 1 |
else: |
next_key = max(new_data[included_symptom[index]]["questions_answered"].keys()) + 1 |
new_data[included_symptom[index]]["questions_answered"][next_key] = [moved_question[0], moved_question[1][0]] |
new_questions_to_ask = {} |
for idx, question in enumerate(new_data[included_symptom[index]]["questions_to_ask"].values(), start=1): |
new_questions_to_ask[idx] = question |
new_data[included_symptom[index]]["questions_to_ask"] = new_questions_to_ask |
raw_output_text_box = gr.update(label = "Raw Output: ", value=json.dumps(new_data, indent=4)) |
if(index > len(list(new_data.keys()))): |
next_button = gr.update(value="End", visible=False, interactive=False) |
radio_button = gr.update(visible=False) |
elif included_symptom[index] in list(data.keys()): |
next_button = gr.update(value="Next Question",visible=True) |
if(len(new_data[included_symptom[index]]["questions_to_ask"]) != 0): |
try: |
radio_label = new_data[included_symptom[index]]["questions_to_ask"][1][0] |
radio_choices = new_data[included_symptom[index]]["questions_to_ask"][1][1] |
radio_button = gr.update(choices=radio_choices,label=radio_label,visible=True) |
except: |
print("Identified Symptom Not Found for Index: " + str(index)) |
else: |
radio_label = "End of questions for symptom: " + included_symptom[index] |
radio_choices = [] |
radio_button = gr.update(choices=radio_choices,label=radio_label,visible=True) |
next_button = gr.update(value="Next Symptom") |
index = index + 1 |
if(index > len(list(new_data.keys()))): |
next_button = gr.update(value="End", visible=True) |
radio_button = gr.update(visible=False) |
else: |
next_button = gr.update(visible=True) |
radio_label = "Symptom: " + included_symptom[index] + " is not present in the databse." |
radio_choices = None |
radio_button = gr.update(choices=radio_choices,label=radio_label,visible=True) |
return (radio_button, next_button, output_text_box, raw_output_text_box) |
with gr.Blocks() as demo: |
with gr.Accordion(label="LLM in CDSS | Demo: To process free text input in SIPF to auto-fill questions."): |
with gr.Row(): |
with gr.Column(): |
sample_phrase_1 = gr.Textbox(label="Sample Phrase 1", value='''My hands shake. It started a few months ago. I think it happens more when I am under stress. Writing skill is affected sometimes, and fever 102.''', show_copy_button=True) |
with gr.Column(): |
sample_phrase_2 = gr.Textbox(label="Sample Phrase 2", value='''I've had fever for the past 1-2 days, it started suddenly. The fever is high with chills and shivering. My temperature is 101.5 °F. I'm also experiencing abdominal discomfort.''', show_copy_button=True) |
with gr.Column(): |
sample_phrase_3 = gr.Textbox(label="Sample Phrase 3", value='''I've had a high fever with chills and shivering for the past 1-2 weeks. Loose motions. Periods are not timely and feel very weak.''', show_copy_button=True) |
with gr.Row(): |
with gr.Column(): |
input_text_box = gr.Textbox(label="Enter your health concerns (minimum 5 words for each symptom)",lines=4) |
submit_button = gr.Button(value="Process") |
session_cost_text_box = gr.Textbox(label="Session Summary") |
with gr.Column(): |
output_text_box = gr.Textbox(label="Health Summary", value="", interactive=False, lines=12) |
radio_button = gr.Radio(visible=False) |
next_button = gr.Button(value="Next Question", visible=False) |
with gr.Accordion(label="Expand"): |
with gr.Row(): |
with gr.Column(): |
raw_output_text_box = gr.Textbox(label="Raw Output", value="", interactive=False, show_copy_button=True) |
with gr.Column(): |
original_data_text_box = gr.Textbox(label="Original Questions", value=json.dumps(data, indent=4), interactive=False, show_copy_button=True) |
submit_button.click(submit, [input_text_box, output_text_box, submit_button, radio_button, next_button, session_cost_text_box, raw_output_text_box], [ output_text_box, submit_button, radio_button, next_button, session_cost_text_box, raw_output_text_box]) |
next_button.click(next_question, [radio_button, next_button, output_text_box, raw_output_text_box], [radio_button, next_button, output_text_box, raw_output_text_box]) |
if __name__ == "__main__": |
demo.launch(debug=True) |
''' |
Notes |
''' |