Spaces:

kkngan
/

it-service-classifcation

Sleeping

App Files Files Community

kkngan commited on Mar 20, 2024

Commit

2af3524

verified ·

1 Parent(s): 69bab00

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -11

app.py CHANGED Viewed

@@ -2,11 +2,12 @@ import streamlit as st
 from streamlit_mic_recorder import mic_recorder
 from transformers import pipeline
 import torch
-# from transformers import BertTokenizer, BertForSequenceClassification
-from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import numpy as np
 import pandas as pd
 import time
 def callback():
@@ -16,12 +17,12 @@ def callback():
 @st.cache_resource
-def load_text_to_speech_model(model="openai/whisper-medium"):
     pipe = pipeline("automatic-speech-recognition", model=model)
     return pipe
-def translate(inputs, model="openai/whisper-medium"):
     pipe = pipeline("automatic-speech-recognition", model=model)
     translate_result = pipe(inputs, generate_kwargs={'task': 'translate'})
     return translate_result['text']
@@ -38,7 +39,7 @@ def translate(inputs, model="openai/whisper-medium"):
 #     return input_ids, attention_masks
-# def load_model_deprecated():
 #     CUSTOMMODEL_PATH = "./bert-itserviceclassification"
 #     PRETRAINED_LM = "bert-base-uncased"
 #     tokenizer = BertTokenizer.from_pretrained(PRETRAINED_LM, do_lower_case=True)
@@ -52,8 +53,11 @@ def translate(inputs, model="openai/whisper-medium"):
 @st.cache_resource
 def load_classification_model():
     PRETRAINED_LM = "kkngan/bert-base-uncased-it-service-classification"
-    model = AutoModelForSequenceClassification.from_pretrained(PRETRAINED_LM, num_labels=8)
-    tokenizer = AutoTokenizer.from_pretrained(PRETRAINED_LM)
     return model, tokenizer
@@ -86,21 +90,34 @@ def display_result(translate_text, prediction, predicted_class_id, probability):
     st.write(f'\n')
     st.write(f'\n')
-    st.markdown(f'<font color="green"><b>Predicted Class: (Probability: {(probability[0][predicted_class_id] * 100):.2f}%) </b></font>', unsafe_allow_html=True)
     st.write(f'{prediction}')
     # Convert probability to bar cart
     st.write(f'\n')
     st.write(f'\n')
     category = ('Hardware', 'Access', 'Miscellaneous', 'HR Support', 'Purchase', 'Administrative rights', 'Storage', 'Internal Project')
     probability = np.array(probability[0])
     df = pd.DataFrame({'Category': category, 'Probability (%)': probability * 100})
     df['Probability (%)'] = df['Probability (%)'].apply(lambda x: round(x, 2))
-    st.bar_chart(data=df, x='Category', y='Probability (%)')
 def main():
     st.set_page_config(layout="wide", page_title="NLP IT Service Classification", page_icon="🤖",)
     st.markdown('<b>🤖 Welcome to IT Service Classification Assistant!!! 🤖</b>', unsafe_allow_html=True)
     st.write(f'\n')
@@ -108,7 +125,7 @@ def main():
     with st.sidebar:
         st.image('front_page_image.jpg' , use_column_width=True)
-        text_to_speech_model = st.selectbox("Pick select a speech to text model", ["openai/whisper-base", "openai/whisper-medium", "openai/whisper-large", "openai/whisper-large-v3"])
         options = st.selectbox("Pick select an input method", ["Start a recording", "Upload an audio", "Enter a transcript"])
         if options == "Start a recording":
             audio = mic_recorder(key='my_recorder', callback=callback)
@@ -141,4 +158,4 @@ def main():
 if __name__ == '__main__':
-    main()

 from streamlit_mic_recorder import mic_recorder
 from transformers import pipeline
 import torch
+from transformers import BertTokenizer, BertForSequenceClassification
+# from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import numpy as np
 import pandas as pd
 import time
+import altair as alt
 def callback():
 @st.cache_resource
+def load_text_to_speech_model(model="openai/whisper-base"):
     pipe = pipeline("automatic-speech-recognition", model=model)
     return pipe
+def translate(inputs, model="openai/whisper-base"):
     pipe = pipeline("automatic-speech-recognition", model=model)
     translate_result = pipe(inputs, generate_kwargs={'task': 'translate'})
     return translate_result['text']
 #     return input_ids, attention_masks
+# def load_classification_model():
 #     CUSTOMMODEL_PATH = "./bert-itserviceclassification"
 #     PRETRAINED_LM = "bert-base-uncased"
 #     tokenizer = BertTokenizer.from_pretrained(PRETRAINED_LM, do_lower_case=True)
 @st.cache_resource
 def load_classification_model():
     PRETRAINED_LM = "kkngan/bert-base-uncased-it-service-classification"
+    # model = AutoModelForSequenceClassification.from_pretrained(PRETRAINED_LM, num_labels=8)
+    # tokenizer = AutoTokenizer.from_pretrained(PRETRAINED_LM)
+    tokenizer = BertTokenizer.from_pretrained(PRETRAINED_LM, do_lower_case=True)
+    model = BertForSequenceClassification.from_pretrained(PRETRAINED_LM,
+                                                         num_labels=8)
     return model, tokenizer
     st.write(f'\n')
     st.write(f'\n')
+    # st.markdown(f'<font color="green"><b>Predicted Class: (Probability: {(probability[0][predicted_class_id] * 100):.2f}%) </b></font>', unsafe_allow_html=True)
+    st.markdown('<font color="green"><b>Predicted Class:</b></font>', unsafe_allow_html=True)
     st.write(f'{prediction}')
     # Convert probability to bar cart
     st.write(f'\n')
     st.write(f'\n')
+    # Show Probability of each Service Category
     category = ('Hardware', 'Access', 'Miscellaneous', 'HR Support', 'Purchase', 'Administrative rights', 'Storage', 'Internal Project')
     probability = np.array(probability[0])
     df = pd.DataFrame({'Category': category, 'Probability (%)': probability * 100})
     df['Probability (%)'] = df['Probability (%)'].apply(lambda x: round(x, 2))
+    base = alt.Chart(df).encode(
+                x='Probability (%)',
+                y=alt.Y('Category').sort('-x'),
+                # color='b:O',
+                tooltip=['Category',alt.Tooltip('Probability (%)', format=",.2f")],
+                text='Probability (%)'
+                ).properties(title="Probability of each Service Category")
+    chart = base.mark_bar() + base.mark_text(align='left', dx=2)
+    st.altair_chart(chart, use_container_width=True)
 def main():
+    # st.cache_resource.clear()
     st.set_page_config(layout="wide", page_title="NLP IT Service Classification", page_icon="🤖",)
     st.markdown('<b>🤖 Welcome to IT Service Classification Assistant!!! 🤖</b>', unsafe_allow_html=True)
     st.write(f'\n')
     with st.sidebar:
         st.image('front_page_image.jpg' , use_column_width=True)
+        text_to_speech_model = st.selectbox("Pick select a speech to text model", ["openai/whisper-base", "openai/whisper-large-v3"])
         options = st.selectbox("Pick select an input method", ["Start a recording", "Upload an audio", "Enter a transcript"])
         if options == "Start a recording":
             audio = mic_recorder(key='my_recorder', callback=callback)
 if __name__ == '__main__':
+    main()