import gradio as gr from datasets import load_dataset import datetime from pbs_data import PBSPublicDataAPIClient import os from apscheduler.schedulers.background import BackgroundScheduler from apscheduler.triggers.interval import IntervalTrigger import atexit HF_TOKEN = os.environ.get("HF_TOKEN") DATASET_NAME = "cmcmaster/rheumatology-biologics-dataset" UPDATE_INTERVAL = 1 def load_data(): try: dataset = load_dataset(DATASET_NAME, split="train") # Create sets for dropdown options drugs = set(dataset['drug']) brands = set(dataset['brand']) formulations = set(dataset['formulation']) indications = set(dataset['indication']) treatment_phases = set(dataset['treatment_phase']) hospital_types = set(dataset['hospital_type']) return { 'combinations': dataset, 'drugs': sorted(drugs), 'brands': sorted(brands), 'formulations': sorted(formulations), 'indications': sorted(indications), 'treatment_phases': sorted(treatment_phases), 'hospital_types': sorted(hospital_types) } except Exception as e: print(f"An error occurred while loading data: {str(e)}") return { 'combinations': [], 'drugs': [], 'brands': [], 'formulations': [], 'indications': [], 'treatment_phases': [], 'hospital_types': [] } biologics_data = load_data() def search_biologics(drug, brand, formulation, indication, treatment_phase, hospital_type, state): results = state['combinations'].filter( lambda x: (not drug or x['drug'] == drug) and (not brand or x['brand'] == brand) and (not formulation or x['formulation'] == formulation) and (not indication or x['indication'] == indication) and (not treatment_phase or x['treatment_phase'] == treatment_phase) and (not hospital_type or x['hospital_type'] == hospital_type) ) if len(results) == 0: return "No results found." output = "" for item in results: output += f""" ### {item['drug']} ({item['brand']}) * **PBS Code:** [{item['pbs_code']}](https://www.pbs.gov.au/medicine/item/{item['pbs_code']}) * **Formulation:** {item['formulation']} * **Indication:** {item['indication']} * **Treatment Phase:** {item['treatment_phase']} * **Streamlined Code:** {item['streamlined_code'] or 'N/A'} * **Authority Method:** {item['authority_method']} * **Online Application:** {'Yes' if item['online_application'] else 'No'} * **Hospital Type:** {item['hospital_type']} * **Schedule:** {item['schedule_month']} {item['schedule_year']} --- """ return output def update_data(): # Check the date - if it's the first day of the month then update the data, otherwise if datetime.datetime.now().day == 1: print(f"Updating data at {datetime.datetime.now()}") client = PBSPublicDataAPIClient("2384af7c667342ceb5a736fe29f1dc6b", rate_limit=0.2) try: data = client.fetch_rheumatology_biologics_data() client.save_data_to_hf(data, HF_TOKEN, DATASET_NAME) print("Data updated successfully") global biologics_data biologics_data = load_data() except Exception as e: print(f"An error occurred while updating data: {str(e)}") else: print(f"Not updating data at {datetime.datetime.now()}") def create_interface(): with gr.Blocks(title="Biologics Prescriber Helper") as demo: gr.Markdown("# Biologics Prescriber Helper") # Create session state to store filtered data for each user session_data = gr.State(biologics_data) def update_dropdown_choices(drug, brand, formulation, indication, treatment_phase, hospital_type, state): # Filter the dataset based on current selections filtered = state['combinations'].filter( lambda x: (not drug or x['drug'] == drug) and (not brand or x['brand'] == brand) and (not formulation or x['formulation'] == formulation) and (not indication or x['indication'] == indication) and (not treatment_phase or x['treatment_phase'] == treatment_phase) and (not hospital_type or x['hospital_type'] == hospital_type) ) # Get unique values for each field from filtered dataset available_options = { 'drugs': [""] + sorted(set(filtered['drug'])), 'brands': [""] + sorted(set(filtered['brand'])), 'formulations': [""] + sorted(set(filtered['formulation'])), 'indications': [""] + sorted(set(filtered['indication'])), 'treatment_phases': [""] + sorted(set(filtered['treatment_phase'])), 'hospital_types': [""] + sorted(set(filtered['hospital_type'])) } # Return the choices and current values for each dropdown return ( gr.Dropdown(choices=available_options['drugs'], value=drug if drug in available_options['drugs'] else ""), gr.Dropdown(choices=available_options['brands'], value=brand if brand in available_options['brands'] else ""), gr.Dropdown(choices=available_options['formulations'], value=formulation if formulation in available_options['formulations'] else ""), gr.Dropdown(choices=available_options['indications'], value=indication if indication in available_options['indications'] else ""), gr.Dropdown(choices=available_options['treatment_phases'], value=treatment_phase if treatment_phase in available_options['treatment_phases'] else ""), gr.Dropdown(choices=available_options['hospital_types'], value=hospital_type if hospital_type in available_options['hospital_types'] else ""), state # Return state unchanged ) with gr.Row(): with gr.Column(): drug = gr.Dropdown( choices=[""] + biologics_data['drugs'], label="Drug", value="", interactive=True ) brand = gr.Dropdown( choices=[""] + biologics_data['brands'], label="Brand", value="", interactive=True ) formulation = gr.Dropdown( choices=[""] + biologics_data['formulations'], label="Formulation", value="", interactive=True ) with gr.Column(): indication = gr.Dropdown( choices=[""] + biologics_data['indications'], label="Indication", value="", interactive=True ) treatment_phase = gr.Dropdown( choices=[""] + biologics_data['treatment_phases'], label="Treatment Phase", value="", interactive=True ) hospital_type = gr.Dropdown( choices=[""] + biologics_data['hospital_types'], label="Hospital Type", value="", interactive=True ) with gr.Row(): search_btn = gr.Button("Search", variant="primary") clear_btn = gr.Button("Reset") results = gr.Markdown() def reset_inputs(state): return (*update_dropdown_choices("", "", "", "", "", "", state)[:-1], state) # Update dropdowns when any selection changes all_dropdowns = [drug, brand, formulation, indication, treatment_phase, hospital_type] for dropdown in all_dropdowns: dropdown.change( fn=update_dropdown_choices, inputs=[drug, brand, formulation, indication, treatment_phase, hospital_type, session_data], outputs=[drug, brand, formulation, indication, treatment_phase, hospital_type, session_data] ) search_btn.click( fn=search_biologics, inputs=[drug, brand, formulation, indication, treatment_phase, hospital_type, session_data], outputs=results ) clear_btn.click( fn=reset_inputs, inputs=[session_data], outputs=[drug, brand, formulation, indication, treatment_phase, hospital_type, session_data] ) return demo if UPDATE_INTERVAL > 0: # Set up the scheduler update_data() scheduler = BackgroundScheduler() scheduler.add_job( func=update_data, trigger=IntervalTrigger(days=UPDATE_INTERVAL), id='update_data', name='Update Data', replace_existing=True ) scheduler.start() # Shutdown scheduler when app terminates atexit.register(lambda: scheduler.shutdown()) # Create and launch the interface if __name__ == "__main__": demo = create_interface() demo.launch()