import json import gradio as gr from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all") model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all") EXAMPLE_TEXTS = [] with open("examples.json", "r") as f: example_json = json.load(f) EXAMPLE_TEXTS = [x["text"] for x in example_json] pipe = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple") def ner(text): raw = pipe(text) result = { "text": text, "entities": [ { "entity": x["entity_group"], "word": x["word"], "score": x["score"], "start": x["start"], "end": x["end"], } for x in raw ], } return result, {} interface = gr.Interface( ner, inputs=gr.Textbox(label="Input", value=""), outputs=[gr.HighlightedText(combine_adjacent=True), "json"], examples=EXAMPLE_TEXTS, ) interface.launch()