Spaces:

dlsmallw
/

NLPinitiative-Streamlit-App

Running

App Files Files Community

dlsmallw commited on 7 days ago

Commit

a189dd1

1 Parent(s): 4f70c5d

Task-292 Implement method for deploying models

Browse files

Files changed (6) hide show

Pipfile +2 -0
Pipfile.lock +20 -42
app.py +48 -49
config.py +22 -0
scripts/__init__.py +1 -0
scripts/predict.py +32 -8

Pipfile CHANGED Viewed

@@ -10,6 +10,8 @@ numpy = "*"
 st-annotated-text = "*"
 transformers = "*"
 torch = "*"
 [dev-packages]

 st-annotated-text = "*"
 transformers = "*"
 torch = "*"
+huggingface-hub = "*"
+joblib = "*"
 [dev-packages]

Pipfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "d44f8f17557914a1bc97b5e9ce219979a85e81b74eb603b3c0c6920cac065c91"
         },
         "pipfile-spec": 6,
         "requires": {
@@ -211,11 +211,12 @@
         },
         "huggingface-hub": {
             "hashes": [
-                "sha256:352f69caf16566c7b6de84b54a822f6238e17ddd8ae3da4f8f2272aea5b198d5",
-                "sha256:9524eae42077b8ff4fc459ceb7a514eca1c1232b775276b009709fe2a084f250"
             ],
             "markers": "python_full_version >= '3.8.0'",
-            "version": "==0.29.1"
         },
         "idna": {
             "hashes": [
@@ -227,11 +228,20 @@
         },
         "jinja2": {
             "hashes": [
-                "sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb",
-                "sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb"
             ],
             "markers": "python_version >= '3.7'",
-            "version": "==3.1.5"
         },
         "jsonschema": {
             "hashes": [
@@ -249,14 +259,6 @@
             "markers": "python_version >= '3.9'",
             "version": "==2024.10.1"
         },
-        "markdown-it-py": {
-            "hashes": [
-                "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1",
-                "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"
-            ],
-            "markers": "python_version >= '3.8'",
-            "version": "==3.0.0"
-        },
         "markupsafe": {
             "hashes": [
                 "sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4",
@@ -324,14 +326,6 @@
             "markers": "python_version >= '3.9'",
             "version": "==3.0.2"
         },
-        "mdurl": {
-            "hashes": [
-                "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8",
-                "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"
-            ],
-            "markers": "python_version >= '3.7'",
-            "version": "==0.1.2"
-        },
         "mpmath": {
             "hashes": [
                 "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f",
@@ -624,14 +618,6 @@
             "markers": "python_version >= '3.8'",
             "version": "==0.9.1"
         },
-        "pygments": {
-            "hashes": [
-                "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f",
-                "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c"
-            ],
-            "markers": "python_version >= '3.8'",
-            "version": "==2.19.1"
-        },
         "python-dateutil": {
             "hashes": [
                 "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3",
@@ -730,14 +716,6 @@
             "markers": "python_version >= '3.8'",
             "version": "==2.32.3"
         },
-        "rich": {
-            "hashes": [
-                "sha256:439594978a49a09530cff7ebc4b5c7103ef57baf48d5ea3184f21d9a2befa098",
-                "sha256:6049d5e6ec054bf2779ab3358186963bac2ea89175919d699e378b99738c2a90"
-            ],
-            "markers": "python_full_version >= '3.8.0'",
-            "version": "==13.9.4"
-        },
         "rpds-py": {
             "hashes": [
                 "sha256:09cd7dbcb673eb60518231e02874df66ec1296c01a4fcd733875755c02014b19",
@@ -903,12 +881,12 @@
         },
         "streamlit": {
             "hashes": [
-                "sha256:62026dbdcb482790933f658b096d7dd58fa70da89c1f06fbc3658b91dcd4dab2",
-                "sha256:e2516c7fcd17a11a85cc1999fae58ace0a6458e2b4c1a411ed3d75b1aee2eb93"
             ],
             "index": "pypi",
             "markers": "python_version >= '3.9' and python_full_version != '3.9.7'",
-            "version": "==1.42.2"
         },
         "sympy": {
             "hashes": [

 {
     "_meta": {
         "hash": {
+            "sha256": "c52664113fb789224f8338560a034a86739fe4d813ded69beb069fdc571c1fd4"
         },
         "pipfile-spec": 6,
         "requires": {
         },
         "huggingface-hub": {
             "hashes": [
+                "sha256:590b29c0dcbd0ee4b7b023714dc1ad8563fe4a68a91463438b74e980d28afaf3",
+                "sha256:c56f20fca09ef19da84dcde2b76379ecdaddf390b083f59f166715584953307d"
             ],
+            "index": "pypi",
             "markers": "python_full_version >= '3.8.0'",
+            "version": "==0.29.2"
         },
         "idna": {
             "hashes": [
         },
         "jinja2": {
             "hashes": [
+                "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d",
+                "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"
             ],
             "markers": "python_version >= '3.7'",
+            "version": "==3.1.6"
+        },
+        "joblib": {
+            "hashes": [
+                "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6",
+                "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"
+            ],
+            "index": "pypi",
+            "markers": "python_version >= '3.8'",
+            "version": "==1.4.2"
         },
         "jsonschema": {
             "hashes": [
             "markers": "python_version >= '3.9'",
             "version": "==2024.10.1"
         },
         "markupsafe": {
             "hashes": [
                 "sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4",
             "markers": "python_version >= '3.9'",
             "version": "==3.0.2"
         },
         "mpmath": {
             "hashes": [
                 "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f",
             "markers": "python_version >= '3.8'",
             "version": "==0.9.1"
         },
         "python-dateutil": {
             "hashes": [
                 "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3",
             "markers": "python_version >= '3.8'",
             "version": "==2.32.3"
         },
         "rpds-py": {
             "hashes": [
                 "sha256:09cd7dbcb673eb60518231e02874df66ec1296c01a4fcd733875755c02014b19",
         },
         "streamlit": {
             "hashes": [
+                "sha256:c10c09f9d1251fa7f975dd360572f03cabc82b174f080e323bf7e556103c22e0",
+                "sha256:cf94b1e9f1de75e4e383df53745230feaac4ac7a7e1f14a3ea362df134db8510"
             ],
             "index": "pypi",
             "markers": "python_version >= '3.9' and python_full_version != '3.9.7'",
+            "version": "==1.43.0"
         },
         "sympy": {
             "hashes": [

app.py CHANGED Viewed

@@ -1,19 +1,17 @@
 import streamlit as st
 import pandas as pd
-from annotated_text import annotated_text, annotation
 import time
-from random import randint, uniform
 from scripts.predict import InferenceHandler
-from pathlib import Path
-ROOT = Path(__file__).resolve().parents[0]
-st.write(ROOT)
-MODELS_DIR = ROOT / 'models'
-BIN_MODEL_PATH = MODELS_DIR / 'binary_classification'
-ML_MODEL_PATH = MODELS_DIR / 'multilabel_regression'
 history_df = pd.DataFrame(data=[], columns=['Text', 'Classification', 'Gender', 'Race', 'Sexuality', 'Disability', 'Religion', 'Unspecified'])
-ih = InferenceHandler(BIN_MODEL_PATH, ML_MODEL_PATH)
 def extract_data(json_obj):
     row_data = []
@@ -58,40 +56,38 @@ def output_results(res):
             if len(at_list) > 0:
                 annotated_text(at_list)
-# def test_results(text):
-#     test_val = int(randint(0, 1))
-#     res_obj = {
-#             'raw_text': text,
-#             'text_sentiment': 'Discriminatory' if test_val == 1 else 'Non-Discriminatory',
-#             'numerical_sentiment': test_val,
-#             'category_sentiments': {
-#                 'Gender': None if test_val == 0 else uniform(0.0, 1.0),
-#                 'Race': None if test_val == 0 else uniform(0.0, 1.0),
-#                 'Sexuality': None if test_val == 0 else uniform(0.0, 1.0),
-#                 'Disability': None if test_val == 0 else uniform(0.0, 1.0),
-#                 'Religion': None if test_val == 0 else uniform(0.0, 1.0),
-#                 'Unspecified': None if test_val == 0 else uniform(0.0, 1.0)
-#             }
-#         }
-#     return res_obj
 def analyze_text(text):
-    res = None
-    with rc:
-        with st.spinner("Processing...", show_time=True) as spnr:
-            time.sleep(5)
-            res = ih.classify_text(text)
-            del spnr
-    if res is not None:
-        st.session_state.results.append(res)
-        history_df.loc[-1] = extract_data(res)
-        output_results(res)
 st.title('NLPinitiative Text Classifier')
 tab1, tab2 = st.tabs(['Classifier', 'About This App'])
 if "results" not in st.session_state:
@@ -102,20 +98,23 @@ load_history()
 with tab1:
     "Text Classifier for determining if entered text is discriminatory (and the categories of discrimination) or Non-Discriminatory."
-    with st.container():
-        with st.expander('History'):
-            st.write(history_df)
-        rc = st.container()
     text_form = st.form(key='classifier', clear_on_submit=True, enter_to_submit=True)
     with text_form:
-        text_area = st.text_area('Enter text to classify')
-        form_btn = st.form_submit_button('submit')
         if entry := text_area:
-            analyze_text(entry)
 with tab2:
     st.markdown(

 import streamlit as st
 import pandas as pd
+from annotated_text import annotated_text
 import time
 from scripts.predict import InferenceHandler
 history_df = pd.DataFrame(data=[], columns=['Text', 'Classification', 'Gender', 'Race', 'Sexuality', 'Disability', 'Religion', 'Unspecified'])
+rc = None
+ih = None
+entry = None
+@st.cache_data
+def load_inference_handler(api_token):
+    ih = InferenceHandler(api_token)
 def extract_data(json_obj):
     row_data = []
             if len(at_list) > 0:
                 annotated_text(at_list)
+@st.cache_data
 def analyze_text(text):
+    if ih:
+        res = None
+        with rc:
+            with st.spinner("Processing...", show_time=True) as spnr:
+                time.sleep(5)
+                res = ih.classify_text(text)
+                del spnr
+        if res is not None:
+            st.session_state.results.append(res)
+            history_df.loc[-1] = extract_data(res)
+            output_results(res)
 st.title('NLPinitiative Text Classifier')
+st.sidebar.write("")
+API_KEY = st.sidebar.text_input(
+    "Enter your HuggingFace API Token",
+    help="You can get your free API token in your settings page: https://huggingface.co/settings/tokens",
+    type="password",
+)
+try:
+    if API_KEY is not None and len(API_KEY) > 0:
+        ih = InferenceHandler(API_KEY)
+except:
+    ih = None
+    st.error('Invalid Token')
 tab1, tab2 = st.tabs(['Classifier', 'About This App'])
 if "results" not in st.session_state:
 with tab1:
     "Text Classifier for determining if entered text is discriminatory (and the categories of discrimination) or Non-Discriminatory."
+    hist_container = st.container()
+    hist_expander = hist_container.expander('History')
+    rc = st.container()
     text_form = st.form(key='classifier', clear_on_submit=True, enter_to_submit=True)
     with text_form:
+        text_area = st.text_area('Enter text to classify', value='', disabled=True if ih is None else False)
+        form_btn = st.form_submit_button('submit', disabled=True if ih is None else False)
         if entry := text_area:
+            st.write(f'TEXT AREA: {entry}')
+            if entry and len(entry) > 0:
+                analyze_text(entry)
+                entry = None
+    with hist_expander:
+        st.dataframe(history_df)
 with tab2:
     st.markdown(

config.py ADDED Viewed

	@@ -0,0 +1,22 @@

+# Used for setting some constants for the project codebase
+from pathlib import Path
+# Root Path
+ROOT = Path(__file__).resolve().parents[0]
+# Model Directory
+MODELS_DIR = ROOT / 'models'
+# Binary Classification Model Path
+BIN_MODEL_PATH = MODELS_DIR / 'binary_classification'
+# Multilabel Regression Model Path
+ML_MODEL_PATH = MODELS_DIR / 'multilabel_regression'
+# HF Hub Repositories
+BIN_REPO = 'dlsmallw/Binary-Classification-testing'
+ML_REPO = 'dlsmallw/Multilabel-Regression-testing'
+BIN_API_URL = f"https://api-inference.huggingface.co/models/{BIN_REPO}"
+ML_API_URL = f"https://api-inference.huggingface.co/models/{ML_REPO}"

scripts/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ import config

scripts/predict.py CHANGED Viewed

@@ -5,30 +5,42 @@ Script file used for performing inference with an existing model.
 from pathlib import Path
 import torch
 import json
 from transformers import (
     AutoTokenizer,
     AutoModelForSequenceClassification
 )
 ## Class used to encapsulate and handle the logic for inference
 class InferenceHandler:
-    def __init__(self, bin_model_path: Path, ml_regr_model_path: Path):
-        self.bin_tokenizer, self.bin_model = self.init_model_and_tokenizer(bin_model_path)
-        self.ml_regr_tokenizer, self.ml_regr_model = self.init_model_and_tokenizer(ml_regr_model_path)
     ## Initializes a model and tokenizer for use in inference using the models path
-    def init_model_and_tokenizer(self, model_path: Path):
-        with open(model_path / 'config.json') as config_file:
             config_json = json.load(config_file)
         model_name = config_json['_name_or_path']
-        model_type = config_json['model_type']
         tokenizer = AutoTokenizer.from_pretrained(model_name)
-        model = AutoModelForSequenceClassification.from_pretrained(model_path, model_type=model_type)
-        model.eval()
         return tokenizer, model
     ## Handles logic used to encode the text for use in binary classification
@@ -103,3 +115,15 @@ class InferenceHandler:
             results.append(max(0.0, item))
         return results

 from pathlib import Path
 import torch
 import json
+import huggingface_hub
+import joblib
+from huggingface_hub.inference_api import InferenceApi
 from transformers import (
     AutoTokenizer,
     AutoModelForSequenceClassification
 )
+BIN_REPO = 'dlsmallw/Binary-Classification-testing'
+ML_REPO = 'dlsmallw/Multilabel-Regression-testing'
 ## Class used to encapsulate and handle the logic for inference
 class InferenceHandler:
+    def __init__(self, api_token):
+        self.api_token = api_token
+        self.bin_tokenizer, self.bin_model = self.init_model_and_tokenizer(BIN_REPO)
+        self.ml_regr_tokenizer, self.ml_regr_model = self.init_model_and_tokenizer(ML_REPO)
+    def get_config(self, repo_id):
+        config = None
+        if repo_id and self.api_token:
+            config = huggingface_hub.hf_hub_download(repo_id, filename='config.json', token=self.api_token)
+        return config
     ## Initializes a model and tokenizer for use in inference using the models path
+    def init_model_and_tokenizer(self, repo_id):
+        config = self.get_config(repo_id)
+        with open(config) as config_file:
             config_json = json.load(config_file)
         model_name = config_json['_name_or_path']
         tokenizer = AutoTokenizer.from_pretrained(model_name)
+        model = AutoModelForSequenceClassification.from_pretrained(repo_id, token=self.api_token)
+        model.eval()
         return tokenizer, model
     ## Handles logic used to encode the text for use in binary classification
             results.append(max(0.0, item))
         return results
+ih = InferenceHandler('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')
+discr_text = """Islam is hell .  We Asian hate isalm...Islam bring only destruction..killing.  rape... rufugee.... Islam is hell .. Asian thought √∞¬ü¬ò¬Ç√∞¬ü¬ò¬Çsorry...but is true"""
+nondiscr_text = """Absolutely excellent as ever, and yes, cycling is fabulous for mental health.. bike when you'¬Ä¬ôre happy,
+bike when you'¬Ä¬ôre sad, shout at the rain, scream inwardly at a complex hill climb and get that endorphin rush from being outside..
+and smile at your fellow cyclists (and passing chickens, in my experience, a wee bwark at a chook makes many things better)"""
+result1 = ih.classify_text(discr_text)
+result2 = ih.classify_text(nondiscr_text)
+print(json.dumps(result1, indent=4))
+print(json.dumps(result2, indent=4))