import gradio as gr from transformers import pipeline # Load the Distil_IBD_BERT model for IBD classification classifier = pipeline( "text-classification", model="MattStammers/Distil_IBD_BERT", return_all_scores=True ) def classify_doc(text): res = classifier(text)[0] # Choose the highest-scoring label top = max(res, key=lambda x: x["score"]) label_map = {"LABEL_0": "Non-IBD", "LABEL_1": "IBD"} label = label_map.get(top["label"], top["label"]) return label, round(top["score"], 3) # Example reports for testing positive_example = """Patient: 45-year-old female Procedure: Colonoscopy Clinical History: 6-month history of intermittent bloody diarrhea, abdominal cramping, and tenesmus. Findings: • Diffuse mucosal erythema and friability extending continuously from rectum through sigmoid colon. • Multiple superficial ulcerations (3-5 mm) with easily induced bleeding. • Loss of normal vascular pattern and pseudopolyps in the descending colon. Biopsies taken from ulcer margin reveal crypt abscesses. Impression: Findings are consistent with moderate ulcerative colitis (IBD).""" negative_example = """ Procedure: Screening Colonoscopy Clinical History: Diarrhoea, routine colorectal cancer screening. FIT 34 Findings: • Normal colonic mucosa without erythema, ulceration, or friability. • Preserved vascular pattern and intact crypt architecture. • No crypt abscesses, granulomas, or inflammatory infiltrates. Diagnosis: No evidence of inflammatory bowel disease (Non-IBD). Mapping biopsies taken in light of underlying diarrhoea. Specimen: Multiple colonic mucosal biopsies Clinical History: Surveillance colonoscopy in patient with diverticulosis. Microscopic Description: • Colonic mucosa with intact crypt architecture. • No basal plasmacytosis, crypt branching, or mucosal erosion. • Scattered lymphocytes and plasma cells evenly distributed in lamina propria. • No granulomas or dysplasia identified. Diagnosis: Negative for inflammatory bowel disease.""" # Build Gradio interface demo = gr.Interface( fn=classify_doc, inputs=gr.Textbox(lines=10, placeholder="Enter clinical note or patient letter…"), outputs=[ gr.Label(num_top_classes=2, label="Prediction"), gr.Textbox(label="Confidence") ], examples=[ [positive_example], [negative_example] ], title="IBD Cohort Identifier", description="Classify free-text clinical documents as IBD vs non-IBD using [Distil_IBD_BERT](https://huggingface.co/MattStammers/Distil_IBD_BERT). Please note these models are very likely over-fitted to the data on which they were trained and should be re-trained locally before attempting inference or results may not be amazing. However, this demo gives an idea of the models capabilities which far exceed that of any other open weight models for IBD detection in free-text currently available. Please read the paper for full information. Reference: Stammers M, Gwiggner M, Nouraei R, Metcalf C, Batchelor J. From Rule-Based to DeepSeek R1: A Robust Comparative Evaluation of Fifty Years of Natural Language Processing (NLP) Models To Identify Inflammatory Bowel Disease Cohorts. medRxiv. 2025:2025-07." ) # Launch publicly on Hugging Face Spaces demo.launch(share=True)