Spaces:

kkngan
/

it-service-classifcation

Sleeping

App Files Files Community

kkngan commited on Mar 18, 2024

Commit

8c0ca02

verified ·

1 Parent(s): 82c845d

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -45

app.py CHANGED Viewed

@@ -2,8 +2,8 @@ import streamlit as st
 from streamlit_mic_recorder import mic_recorder
 from transformers import pipeline
 import torch
-from transformers import BertTokenizer, BertForSequenceClassification, AutoModelForSequenceClassification, AutoTokenizer
-from transformers import WhisperForConditionalGeneration, WhisperProcessor
 import numpy as np
 import pandas as pd
 import time
@@ -15,22 +15,27 @@ def callback():
         st.audio(audio_bytes)
 def translate(inputs, model="openai/whisper-medium"):
     pipe = pipeline("automatic-speech-recognition", model=model)
-    # transcribe_result = pipe(upload, generate_kwargs={'task': 'transcribe'})
     translate_result = pipe(inputs, generate_kwargs={'task': 'translate'})
     return translate_result['text']
-def encode_depracated(docs, tokenizer):
-    '''
-    This function takes list of texts and returns input_ids and attention_mask of texts
-    '''
-    encoded_dict = tokenizer.batch_encode_plus(docs, add_special_tokens=True, max_length=128, padding='max_length',
-                            return_attention_mask=True, truncation=True, return_tensors='pt')
-    input_ids = encoded_dict['input_ids']
-    attention_masks = encoded_dict['attention_mask']
-    return input_ids, attention_masks
 # def load_model_deprecated():
@@ -44,8 +49,8 @@ def encode_depracated(docs, tokenizer):
 #     model.load_state_dict(torch.load(CUSTOMMODEL_PATH, map_location ='cpu'))
 #     return model, tokenizer
-def load_model():
     PRETRAINED_LM = "kkngan/bert-base-uncased-it-service-classification"
     model = AutoModelForSequenceClassification.from_pretrained(PRETRAINED_LM, num_labels=8)
     tokenizer = AutoTokenizer.from_pretrained(PRETRAINED_LM)
@@ -71,8 +76,28 @@ def predict(text, model, tokenizer):
     outputs = model(**inputs)
     predicted_class_id = outputs.logits.argmax().item()
     predicted_label = lookup_key.get(predicted_class_id)
-    confidence = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().detach().numpy()
-    return predicted_label, confidence
 def main():
@@ -83,8 +108,7 @@ def main():
     with st.sidebar:
         st.image('front_page_image.jpg' , use_column_width=True)
-        text_to_speech_model = st.selectbox("Pick select a speech to text model",
-                                            ["openai/whisper-base", "openai/whisper-medium", "openai/whisper-large", "openai/whisper-large-v3"])
         options = st.selectbox("Pick select an input method", ["Start a recording", "Upload an audio", "Enter a transcript"])
         if options == "Start a recording":
             audio = mic_recorder(key='my_recorder', callback=callback)
@@ -94,47 +118,27 @@ def main():
             text = st.text_area("Please input the transcript (Only support English)")
         button = st.button('Submit')
-    if button:
         with st.spinner(text="Loading... It may take a while if you are running the app for the first time."):
             start_time = time.time()
-            model, tokenizer = load_model()
             if options == "Start a recording":
                 # transcibe_text, translate_text = transcribe_and_translate(upload=audio["bytes"])
                 translate_text = translate(inputs=audio["bytes"], model=text_to_speech_model)
-                prediction, confidence = predict(text=translate_text, model=model, tokenizer=tokenizer)
             elif options == "Upload an audio":
                 # transcibe_text, translate_text = transcribe_and_translate(upload=audio.getvalue())
                 translate_text = translate(inputs=audio.getvalue(), model=text_to_speech_model)
-                prediction, confidence = predict(text=translate_text, model=model, tokenizer=tokenizer)
             else:
                 translate_text = text
-                prediction, confidence = predict(text=text, model=model, tokenizer=tokenizer)
             end_time = time.time()
-        # st.markdown('<font color="blue"><b>Transcript:</b></font>', unsafe_allow_html=True)
-        # st.write(f'{transcibe_text}')
-        # st.write(f'\n')
-        # if options != "Enter a transcript":
-        st.markdown('<font color="purple"><b>(Translated) Text:</b></font>', unsafe_allow_html=True)
-        st.write(f'{translate_text}')
-        st.write(f'\n')
-        st.write(f'\n')
-        st.markdown('<font color="green"><b>Predicted Class:</b></font>', unsafe_allow_html=True)
-        st.write(f'{prediction}')
-        # Convert confidence to bar cart
-        st.write(f'\n')
-        st.write(f'\n')
-        category = ('Hardware', 'Access', 'Miscellaneous', 'HR Support', 'Purchase', 'Administrative rights', 'Storage', 'Internal Project')
-        confidence = np.array(confidence[0])
-        df = pd.DataFrame({'Category': category, 'Confidence (%)': confidence * 100})
-        df['Confidence (%)'] = df['Confidence (%)'].apply(lambda x: round(x, 2))
-        st.bar_chart(data=df, x='Category', y='Confidence (%)')
-        # df = df.sort_values(by='Confidence (%)', ascending=False).reset_index(drop=True)
-        # st.write(df)
         st.write(f'\n')
         st.write(f'\n')
-        st.markdown(f'*It took {(end_time-start_time):.2f} sec to process the input', unsafe_allow_html=True)
 if __name__ == '__main__':
     main()

 from streamlit_mic_recorder import mic_recorder
 from transformers import pipeline
 import torch
+# from transformers import BertTokenizer, BertForSequenceClassification
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import numpy as np
 import pandas as pd
 import time
         st.audio(audio_bytes)
+@st.cache_resource
+def load_text_to_speech_model(model="openai/whisper-medium"):
+    pipe = pipeline("automatic-speech-recognition", model=model)
+    return pipe
 def translate(inputs, model="openai/whisper-medium"):
     pipe = pipeline("automatic-speech-recognition", model=model)
     translate_result = pipe(inputs, generate_kwargs={'task': 'translate'})
     return translate_result['text']
+# def encode_depracated(docs, tokenizer):
+#     '''
+#     This function takes list of texts and returns input_ids and attention_mask of texts
+#     '''
+#     encoded_dict = tokenizer.batch_encode_plus(docs, add_special_tokens=True, max_length=128, padding='max_length',
+#                             return_attention_mask=True, truncation=True, return_tensors='pt')
+#     input_ids = encoded_dict['input_ids']
+#     attention_masks = encoded_dict['attention_mask']
+#     return input_ids, attention_masks
 # def load_model_deprecated():
 #     model.load_state_dict(torch.load(CUSTOMMODEL_PATH, map_location ='cpu'))
 #     return model, tokenizer
+@st.cache_resource
+def load_classification_model():
     PRETRAINED_LM = "kkngan/bert-base-uncased-it-service-classification"
     model = AutoModelForSequenceClassification.from_pretrained(PRETRAINED_LM, num_labels=8)
     tokenizer = AutoTokenizer.from_pretrained(PRETRAINED_LM)
     outputs = model(**inputs)
     predicted_class_id = outputs.logits.argmax().item()
     predicted_label = lookup_key.get(predicted_class_id)
+    probability = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().detach().numpy()
+    return predicted_label, predicted_class_id, probability
+def display_result(translate_text, prediction, predicted_class_id, probability):
+    st.markdown('<font color="purple"><b>Text:</b></font>', unsafe_allow_html=True)
+    st.write(f'{translate_text}')
+    st.write(f'\n')
+    st.write(f'\n')
+    st.markdown(f'<font color="green"><b>Predicted Class: (Probability: {(probability[0][predicted_class_id] * 100):.2f}%) </b></font>', unsafe_allow_html=True)
+    st.write(f'{prediction}')
+    # Convert probability to bar cart
+    st.write(f'\n')
+    st.write(f'\n')
+    category = ('Hardware', 'Access', 'Miscellaneous', 'HR Support', 'Purchase', 'Administrative rights', 'Storage', 'Internal Project')
+    probability = np.array(probability[0])
+    df = pd.DataFrame({'Category': category, 'Probability (%)': probability * 100})
+    df['Probability (%)'] = df['Probability (%)'].apply(lambda x: round(x, 2))
+    st.bar_chart(data=df, x='Category', y='Probability (%)')
 def main():
     with st.sidebar:
         st.image('front_page_image.jpg' , use_column_width=True)
+        text_to_speech_model = st.selectbox("Pick select a speech to text model", ["openai/whisper-base", "openai/whisper-medium", "openai/whisper-large", "openai/whisper-large-v3"])
         options = st.selectbox("Pick select an input method", ["Start a recording", "Upload an audio", "Enter a transcript"])
         if options == "Start a recording":
             audio = mic_recorder(key='my_recorder', callback=callback)
             text = st.text_area("Please input the transcript (Only support English)")
         button = st.button('Submit')
+    if button:
         with st.spinner(text="Loading... It may take a while if you are running the app for the first time."):
             start_time = time.time()
             if options == "Start a recording":
                 # transcibe_text, translate_text = transcribe_and_translate(upload=audio["bytes"])
                 translate_text = translate(inputs=audio["bytes"], model=text_to_speech_model)
             elif options == "Upload an audio":
                 # transcibe_text, translate_text = transcribe_and_translate(upload=audio.getvalue())
                 translate_text = translate(inputs=audio.getvalue(), model=text_to_speech_model)
             else:
                 translate_text = text
+            model, tokenizer = load_classification_model()
+            prediction, predicted_class_id, probability = predict(text=translate_text, model=model, tokenizer=tokenizer)
             end_time = time.time()
+        display_result(translate_text, prediction, predicted_class_id, probability)
         st.write(f'\n')
         st.write(f'\n')
+        st.markdown(f'*It took {(end_time-start_time):.2f} sec to process the input.', unsafe_allow_html=True)
 if __name__ == '__main__':
     main()