cmcmaster's picture
Update main.py
979ea50 verified
raw
history blame
9.37 kB
import gradio as gr
from datasets import load_dataset
import datetime
from pbs_data import PBSPublicDataAPIClient
import os
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.interval import IntervalTrigger
import atexit
HF_TOKEN = os.environ.get("HF_TOKEN")
DATASET_NAME = "cmcmaster/rheumatology-biologics-dataset"
UPDATE_INTERVAL = 1
def load_data():
try:
dataset = load_dataset(DATASET_NAME, split="train")
# Create sets for dropdown options
drugs = set(dataset['drug'])
brands = set(dataset['brand'])
formulations = set(dataset['formulation'])
indications = set(dataset['indication'])
treatment_phases = set(dataset['treatment_phase'])
hospital_types = set(dataset['hospital_type'])
return {
'combinations': dataset,
'drugs': sorted(drugs),
'brands': sorted(brands),
'formulations': sorted(formulations),
'indications': sorted(indications),
'treatment_phases': sorted(treatment_phases),
'hospital_types': sorted(hospital_types)
}
except Exception as e:
print(f"An error occurred while loading data: {str(e)}")
return {
'combinations': [],
'drugs': [],
'brands': [],
'formulations': [],
'indications': [],
'treatment_phases': [],
'hospital_types': []
}
biologics_data = load_data()
def search_biologics(drug, brand, formulation, indication, treatment_phase, hospital_type, state):
results = state['combinations'].filter(
lambda x: (not drug or x['drug'] == drug) and
(not brand or x['brand'] == brand) and
(not formulation or x['formulation'] == formulation) and
(not indication or x['indication'] == indication) and
(not treatment_phase or x['treatment_phase'] == treatment_phase) and
(not hospital_type or x['hospital_type'] == hospital_type)
)
if len(results) == 0:
return "No results found."
output = ""
for item in results:
output += f"""
### {item['drug']} ({item['brand']})
* **PBS Code:** [{item['pbs_code']}](https://www.pbs.gov.au/medicine/item/{item['pbs_code']})
* **Formulation:** {item['formulation']}
* **Indication:** {item['indication']}
* **Treatment Phase:** {item['treatment_phase']}
* **Streamlined Code:** {item['streamlined_code'] or 'N/A'}
* **Authority Method:** {item['authority_method']}
* **Online Application:** {'Yes' if item['online_application'] else 'No'}
* **Hospital Type:** {item['hospital_type']}
* **Schedule:** {item['schedule_month']} {item['schedule_year']}
---
"""
return output
def update_data():
# Check the date - if it's the first day of the month then update the data, otherwise
if datetime.datetime.now().day == 1:
print(f"Updating data at {datetime.datetime.now()}")
client = PBSPublicDataAPIClient("2384af7c667342ceb5a736fe29f1dc6b", rate_limit=0.2)
try:
data = client.fetch_rheumatology_biologics_data()
client.save_data_to_hf(data, HF_TOKEN, DATASET_NAME)
print("Data updated successfully")
global biologics_data
biologics_data = load_data()
except Exception as e:
print(f"An error occurred while updating data: {str(e)}")
else:
print(f"Not updating data at {datetime.datetime.now()}")
def create_interface():
with gr.Blocks(title="Biologics Prescriber Helper") as demo:
gr.Markdown("# Biologics Prescriber Helper")
# Create session state to store filtered data for each user
session_data = gr.State(biologics_data)
def update_dropdown_choices(drug, brand, formulation, indication, treatment_phase, hospital_type, state):
# Filter the dataset based on current selections
filtered = state['combinations'].filter(
lambda x: (not drug or x['drug'] == drug) and
(not brand or x['brand'] == brand) and
(not formulation or x['formulation'] == formulation) and
(not indication or x['indication'] == indication) and
(not treatment_phase or x['treatment_phase'] == treatment_phase) and
(not hospital_type or x['hospital_type'] == hospital_type)
)
# Get unique values for each field from filtered dataset
available_options = {
'drugs': [""] + sorted(set(filtered['drug'])),
'brands': [""] + sorted(set(filtered['brand'])),
'formulations': [""] + sorted(set(filtered['formulation'])),
'indications': [""] + sorted(set(filtered['indication'])),
'treatment_phases': [""] + sorted(set(filtered['treatment_phase'])),
'hospital_types': [""] + sorted(set(filtered['hospital_type']))
}
# Return the choices and current values for each dropdown
return (
gr.Dropdown(choices=available_options['drugs'], value=drug if drug in available_options['drugs'] else ""),
gr.Dropdown(choices=available_options['brands'], value=brand if brand in available_options['brands'] else ""),
gr.Dropdown(choices=available_options['formulations'], value=formulation if formulation in available_options['formulations'] else ""),
gr.Dropdown(choices=available_options['indications'], value=indication if indication in available_options['indications'] else ""),
gr.Dropdown(choices=available_options['treatment_phases'], value=treatment_phase if treatment_phase in available_options['treatment_phases'] else ""),
gr.Dropdown(choices=available_options['hospital_types'], value=hospital_type if hospital_type in available_options['hospital_types'] else ""),
state # Return state unchanged
)
with gr.Row():
with gr.Column():
drug = gr.Dropdown(
choices=[""] + biologics_data['drugs'],
label="Drug",
value="",
interactive=True
)
brand = gr.Dropdown(
choices=[""] + biologics_data['brands'],
label="Brand",
value="",
interactive=True
)
formulation = gr.Dropdown(
choices=[""] + biologics_data['formulations'],
label="Formulation",
value="",
interactive=True
)
with gr.Column():
indication = gr.Dropdown(
choices=[""] + biologics_data['indications'],
label="Indication",
value="",
interactive=True
)
treatment_phase = gr.Dropdown(
choices=[""] + biologics_data['treatment_phases'],
label="Treatment Phase",
value="",
interactive=True
)
hospital_type = gr.Dropdown(
choices=[""] + biologics_data['hospital_types'],
label="Hospital Type",
value="",
interactive=True
)
with gr.Row():
search_btn = gr.Button("Search", variant="primary")
clear_btn = gr.Button("Reset")
results = gr.Markdown()
def reset_inputs(state):
return (*update_dropdown_choices("", "", "", "", "", "", state)[:-1], state)
# Update dropdowns when any selection changes
all_dropdowns = [drug, brand, formulation, indication, treatment_phase, hospital_type]
for dropdown in all_dropdowns:
dropdown.change(
fn=update_dropdown_choices,
inputs=[drug, brand, formulation, indication, treatment_phase, hospital_type, session_data],
outputs=[drug, brand, formulation, indication, treatment_phase, hospital_type, session_data]
)
search_btn.click(
fn=search_biologics,
inputs=[drug, brand, formulation, indication, treatment_phase, hospital_type, session_data],
outputs=results
)
clear_btn.click(
fn=reset_inputs,
inputs=[session_data],
outputs=[drug, brand, formulation, indication, treatment_phase, hospital_type, session_data]
)
return demo
if UPDATE_INTERVAL > 0:
# Set up the scheduler
update_data()
scheduler = BackgroundScheduler()
scheduler.add_job(
func=update_data,
trigger=IntervalTrigger(days=UPDATE_INTERVAL),
id='update_data',
name='Update Data',
replace_existing=True
)
scheduler.start()
# Shutdown scheduler when app terminates
atexit.register(lambda: scheduler.shutdown())
# Create and launch the interface
if __name__ == "__main__":
demo = create_interface()
demo.launch()