Spaces:

LampOfSocrates
/

hf_gradio_plodcw_group27

Sleeping

App Files Files Community

Lamp Socrates commited on May 24, 2024

Commit

4efeb3b

1 Parent(s): b022555

latest

Browse files

Files changed (1) hide show

app.py +95 -36

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import uvicorn
 import threading
 from typing import Optional
 from transformers import pipeline
 from transformers import AutoTokenizer, AutoModelForTokenClassification
@@ -13,11 +14,14 @@ from fastapi import FastAPI
 from pydantic import BaseModel
 from typing import List, Dict
 # Define the FastAPI app
 app = FastAPI()
 model_cache: Optional[object] = None
 def load_model():
     tokenizer = AutoTokenizer.from_pretrained("LampOfSocrates/bert-cased-plodcw-sourav")
     model = AutoModelForTokenClassification.from_pretrained("LampOfSocrates/bert-cased-plodcw-sourav")
@@ -36,6 +40,12 @@ def load_plod_cw_dataset():
     dataset = load_dataset("surrey-nlp/PLOD-CW")
     return dataset
 def get_cached_model():
     global model_cache
     if model_cache is None:
@@ -44,8 +54,7 @@ def get_cached_model():
 # Cache the model when the server starts
 model = get_cached_model()
 class Entity(BaseModel):
     entity: str
@@ -62,15 +71,20 @@ class NERRequest(BaseModel):
 @app.get("/hello")
 def read_root():
     return {"message": "Hello, World!"}
 @app.post("/ner", response_model=NERResponse)
 def get_entities(request: NERRequest):
     print(request)
     model = get_cached_model()
     # Use the NER model to detect entities
     entities = model(request.text)
     print(entities[0].keys())
     # Convert entities to the response model
     response_entities = [Entity(**entity) for entity in entities]
@@ -81,8 +95,9 @@ def get_color_for_label(label: str) -> str:
     # Define a mapping of labels to colors
     color_mapping = {
         "I-LF": "red",
         "B-AC": "blue",
-        "LOC": "green",
         # Add more labels and colors as needed
     }
     return color_mapping.get(label, "black")  # Default to black if label not found
@@ -90,30 +105,73 @@ def get_color_for_label(label: str) -> str:
 # Define the Gradio interface function
 def ner_demo(text):
     model = get_cached_model()
     entities = model(text)
-    #return {"entities": entities}
-    # Color code the entities
-    color_coded_text = text
     for entity in entities:
-        #print(entity)
         start, end, label = entity["start"], entity["end"], entity["entity"]
-        color = get_color_for_label(label)  # You need to define this function
         entity_text = text[start:end]
-        colored_entity = f'<span style="color: {color}; font-weight: bold;">{entity_text}</span>'
-        color_coded_text = color_coded_text[:start] + colored_entity + color_coded_text[end:]
-    return color_coded_text
-PROJECT_INTRO = "This is a HF Spaces hosted Gradio App built by NLP Group 27 . The model has been trained on surrey-nlp/PLOD-CW dataset"
 def echo(text, request: gr.Request):
     if request:
-        print("Request headers dictionary:", request.headers)
-        print("IP address:", request.client.host)
-        print("Query parameters:", dict(request.query_params))
-    return text
 # Create the Gradio interface
 demo = gr.Interface(
@@ -124,26 +182,27 @@ demo = gr.Interface(
     title="Named Entity Recognition on PLOD-CW ",
     description=f"{PROJECT_INTRO}\n\nEnter text to extract named entities using a NER model."
 )
-'''
-with gr.Blocks() as demo:
-    gr.Markdown("# Page Title")
-    gr.Markdown("## Subtitle with h2 Font")
-    inputs=gr.Textbox(lines=10, placeholder="Enter text here...", label="Input Text")
-    with gr.Column():
-        echo_output = gr.Textbox(label="Echo Output")
-        html_output = ner_demo
-    with gr.Column():
-        button1 = gr.Button("Submit")
-'''
-#CUSTOM_PATH = "/gradio"
-#app = gr.mount_gradio_app(app, demo, path=CUSTOM_PATH)
-# Function to run FastAPI
-def run_fastapi():
-    uvicorn.run(app, host="0.0.0.0", port=8000)
 # Function to run Gradio

 import uvicorn
 import threading
+from collections import Counter
 from typing import Optional
 from transformers import pipeline
 from transformers import AutoTokenizer, AutoModelForTokenClassification
 from pydantic import BaseModel
 from typing import List, Dict
 # Define the FastAPI app
 app = FastAPI()
 model_cache: Optional[object] = None
+dataset_cache : Optional[object] = None
 def load_model():
+    """ We load the model at startup"""
     tokenizer = AutoTokenizer.from_pretrained("LampOfSocrates/bert-cased-plodcw-sourav")
     model = AutoModelForTokenClassification.from_pretrained("LampOfSocrates/bert-cased-plodcw-sourav")
     dataset = load_dataset("surrey-nlp/PLOD-CW")
     return dataset
+def get_cached_data():
+    global dataset_cache
+    if dataset_cache is None:
+        dataset_cache = load_plod_cw_dataset()
+    return dataset_cache
 def get_cached_model():
     global model_cache
     if model_cache is None:
 # Cache the model when the server starts
 model = get_cached_model()
+#plod_cw = get_cached_data()
 class Entity(BaseModel):
     entity: str
 @app.get("/hello")
 def read_root():
+    """useful for testing connections"""
     return {"message": "Hello, World!"}
 @app.post("/ner", response_model=NERResponse)
 def get_entities(request: NERRequest):
+    """ This is invoked while API Testing """
     print(request)
     model = get_cached_model()
     # Use the NER model to detect entities
     entities = model(request.text)
     print(entities[0].keys())
     # Convert entities to the response model
     response_entities = [Entity(**entity) for entity in entities]
     # Define a mapping of labels to colors
     color_mapping = {
         "I-LF": "red",
+        "B-LF": "pink",
         "B-AC": "blue",
+        "B-O": "green",
         # Add more labels and colors as needed
     }
     return color_mapping.get(label, "black")  # Default to black if label not found
 # Define the Gradio interface function
 def ner_demo(text):
+    """ This is invoked while rendering the page"""
     model = get_cached_model()
     entities = model(text)
+    print("Entities detected {}".format(Counter( [ entity['entity'] for entity in entities])))
+    all_html = ""
+    last_index = 0
     for entity in entities:
         start, end, label = entity["start"], entity["end"], entity["entity"]
+        color = get_color_for_label(label)
         entity_text = text[start:end]
+        #colored_entity = f'<span style="color: {color}; font-weight: bold;">{entity_text}</span>'
+        colored_entity = f'<sup style="color: {color}; font-weight: bold;">{entity_text}</sup>'
+        # Append text before the entity
+        all_html += text[last_index:start]
+        # Append the colored entity
+        all_html += colored_entity
+        # Update the last_index
+        last_index = end
+    # Append the remaining text after the last entity
+    all_html += text[last_index:]
+    return all_html
+bo_color = get_color_for_label("B-O")
+bac_color = get_color_for_label("B-AC")
+ilf_color = get_color_for_label("I-LF")
+blf_color = get_color_for_label("B-LF")
+PROJECT_INTRO = f"""This is a HF Spaces hosted Gradio App built by NLP Group 27. \n\n
+                The model has been trained on surrey-nlp/PLOD-CW dataset.
+                The following Entities are recognized:
+                <sup style="color: {bo_color}; font-weight: bold;">B-O</sup>
+                <sup style="color: {bac_color}; font-weight: bold;">B-AC</sup>
+                <sup style="color: {ilf_color}; font-weight: bold;">I-LF</sup>
+                <sup style="color: {blf_color}; font-weight: bold;">B-LF</sup>
+                <sup style="color: black; font-weight: bold;">Rest</sup>
+                """
 def echo(text, request: gr.Request):
+    res = '<div>'
     if request:
+        res += f"Request headers dictionary: {request.headers} <p>"
+        res += f"IP address: {request.client.host} <p>"
+        res += f"Query parameters: {dict(request.query_params)} <p>"
+        res += "</div>"
+    return res
+def sample_data(text):
+    text = "The red dots represents LCI , the bright yellow rectangle represents RV , and the black triangle represents the /TLCnLCI"
+    #dat = get_cached_data()
+    #df = dat['test']['tokens'].sample(5)
+    data = {
+        "Text": [text],
+        "Length": [len(text)]
+    }
+    df = pd.DataFrame(data)
+    return df
 # Create the Gradio interface
 demo = gr.Interface(
     title="Named Entity Recognition on PLOD-CW ",
     description=f"{PROJECT_INTRO}\n\nEnter text to extract named entities using a NER model."
 )
+with gr.Blocks() as demo:
+    gr.Markdown("# Named Entity Recognition on PLOD-CW")
+    gr.Markdown(PROJECT_INTRO)
+    gr.Markdown("### Enter text to extract named entities using a NER model.")
+    text_input = gr.Textbox(lines=10, placeholder="Enter text here...", label="Input Text")
+    html_output = gr.HTML(label="HTML Output")
+    with gr.Row():
+        submit_button = gr.Button("Submit")
+        echo_button = gr.Button("Echo Client")
+        sample_button = gr.Button("Sample PLOD_CW")
+    sample_output = gr.Dataframe(label="Sample Table")
+    echo_output = gr.HTML(label="HTML Output")
+    submit_button.click(ner_demo, inputs=text_input, outputs=html_output)
+    echo_button.click(echo, inputs=text_input, outputs=echo_output)
+    sample_button.click(sample_data, inputs=text_input, outputs=sample_output)
 # Function to run Gradio