kkngan commited on
Commit
2af3524
Β·
verified Β·
1 Parent(s): 69bab00

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -11
app.py CHANGED
@@ -2,11 +2,12 @@ import streamlit as st
2
  from streamlit_mic_recorder import mic_recorder
3
  from transformers import pipeline
4
  import torch
5
- # from transformers import BertTokenizer, BertForSequenceClassification
6
- from transformers import AutoModelForSequenceClassification, AutoTokenizer
7
  import numpy as np
8
  import pandas as pd
9
  import time
 
10
 
11
 
12
  def callback():
@@ -16,12 +17,12 @@ def callback():
16
 
17
 
18
  @st.cache_resource
19
- def load_text_to_speech_model(model="openai/whisper-medium"):
20
  pipe = pipeline("automatic-speech-recognition", model=model)
21
  return pipe
22
 
23
 
24
- def translate(inputs, model="openai/whisper-medium"):
25
  pipe = pipeline("automatic-speech-recognition", model=model)
26
  translate_result = pipe(inputs, generate_kwargs={'task': 'translate'})
27
  return translate_result['text']
@@ -38,7 +39,7 @@ def translate(inputs, model="openai/whisper-medium"):
38
  # return input_ids, attention_masks
39
 
40
 
41
- # def load_model_deprecated():
42
  # CUSTOMMODEL_PATH = "./bert-itserviceclassification"
43
  # PRETRAINED_LM = "bert-base-uncased"
44
  # tokenizer = BertTokenizer.from_pretrained(PRETRAINED_LM, do_lower_case=True)
@@ -52,8 +53,11 @@ def translate(inputs, model="openai/whisper-medium"):
52
  @st.cache_resource
53
  def load_classification_model():
54
  PRETRAINED_LM = "kkngan/bert-base-uncased-it-service-classification"
55
- model = AutoModelForSequenceClassification.from_pretrained(PRETRAINED_LM, num_labels=8)
56
- tokenizer = AutoTokenizer.from_pretrained(PRETRAINED_LM)
 
 
 
57
  return model, tokenizer
58
 
59
 
@@ -86,21 +90,34 @@ def display_result(translate_text, prediction, predicted_class_id, probability):
86
  st.write(f'\n')
87
  st.write(f'\n')
88
 
89
- st.markdown(f'<font color="green"><b>Predicted Class: (Probability: {(probability[0][predicted_class_id] * 100):.2f}%) </b></font>', unsafe_allow_html=True)
 
90
  st.write(f'{prediction}')
91
 
92
  # Convert probability to bar cart
93
  st.write(f'\n')
94
  st.write(f'\n')
95
 
 
96
  category = ('Hardware', 'Access', 'Miscellaneous', 'HR Support', 'Purchase', 'Administrative rights', 'Storage', 'Internal Project')
97
  probability = np.array(probability[0])
98
  df = pd.DataFrame({'Category': category, 'Probability (%)': probability * 100})
99
  df['Probability (%)'] = df['Probability (%)'].apply(lambda x: round(x, 2))
100
- st.bar_chart(data=df, x='Category', y='Probability (%)')
 
 
 
 
 
 
 
 
 
 
101
 
102
 
103
  def main():
 
104
  st.set_page_config(layout="wide", page_title="NLP IT Service Classification", page_icon="πŸ€–",)
105
  st.markdown('<b>πŸ€– Welcome to IT Service Classification Assistant!!! πŸ€–</b>', unsafe_allow_html=True)
106
  st.write(f'\n')
@@ -108,7 +125,7 @@ def main():
108
 
109
  with st.sidebar:
110
  st.image('front_page_image.jpg' , use_column_width=True)
111
- text_to_speech_model = st.selectbox("Pick select a speech to text model", ["openai/whisper-base", "openai/whisper-medium", "openai/whisper-large", "openai/whisper-large-v3"])
112
  options = st.selectbox("Pick select an input method", ["Start a recording", "Upload an audio", "Enter a transcript"])
113
  if options == "Start a recording":
114
  audio = mic_recorder(key='my_recorder', callback=callback)
@@ -141,4 +158,4 @@ def main():
141
 
142
 
143
  if __name__ == '__main__':
144
- main()
 
2
  from streamlit_mic_recorder import mic_recorder
3
  from transformers import pipeline
4
  import torch
5
+ from transformers import BertTokenizer, BertForSequenceClassification
6
+ # from transformers import AutoModelForSequenceClassification, AutoTokenizer
7
  import numpy as np
8
  import pandas as pd
9
  import time
10
+ import altair as alt
11
 
12
 
13
  def callback():
 
17
 
18
 
19
  @st.cache_resource
20
+ def load_text_to_speech_model(model="openai/whisper-base"):
21
  pipe = pipeline("automatic-speech-recognition", model=model)
22
  return pipe
23
 
24
 
25
+ def translate(inputs, model="openai/whisper-base"):
26
  pipe = pipeline("automatic-speech-recognition", model=model)
27
  translate_result = pipe(inputs, generate_kwargs={'task': 'translate'})
28
  return translate_result['text']
 
39
  # return input_ids, attention_masks
40
 
41
 
42
+ # def load_classification_model():
43
  # CUSTOMMODEL_PATH = "./bert-itserviceclassification"
44
  # PRETRAINED_LM = "bert-base-uncased"
45
  # tokenizer = BertTokenizer.from_pretrained(PRETRAINED_LM, do_lower_case=True)
 
53
  @st.cache_resource
54
  def load_classification_model():
55
  PRETRAINED_LM = "kkngan/bert-base-uncased-it-service-classification"
56
+ # model = AutoModelForSequenceClassification.from_pretrained(PRETRAINED_LM, num_labels=8)
57
+ # tokenizer = AutoTokenizer.from_pretrained(PRETRAINED_LM)
58
+ tokenizer = BertTokenizer.from_pretrained(PRETRAINED_LM, do_lower_case=True)
59
+ model = BertForSequenceClassification.from_pretrained(PRETRAINED_LM,
60
+ num_labels=8)
61
  return model, tokenizer
62
 
63
 
 
90
  st.write(f'\n')
91
  st.write(f'\n')
92
 
93
+ # st.markdown(f'<font color="green"><b>Predicted Class: (Probability: {(probability[0][predicted_class_id] * 100):.2f}%) </b></font>', unsafe_allow_html=True)
94
+ st.markdown('<font color="green"><b>Predicted Class:</b></font>', unsafe_allow_html=True)
95
  st.write(f'{prediction}')
96
 
97
  # Convert probability to bar cart
98
  st.write(f'\n')
99
  st.write(f'\n')
100
 
101
+ # Show Probability of each Service Category
102
  category = ('Hardware', 'Access', 'Miscellaneous', 'HR Support', 'Purchase', 'Administrative rights', 'Storage', 'Internal Project')
103
  probability = np.array(probability[0])
104
  df = pd.DataFrame({'Category': category, 'Probability (%)': probability * 100})
105
  df['Probability (%)'] = df['Probability (%)'].apply(lambda x: round(x, 2))
106
+
107
+ base = alt.Chart(df).encode(
108
+ x='Probability (%)',
109
+ y=alt.Y('Category').sort('-x'),
110
+
111
+ # color='b:O',
112
+ tooltip=['Category',alt.Tooltip('Probability (%)', format=",.2f")],
113
+ text='Probability (%)'
114
+ ).properties(title="Probability of each Service Category")
115
+ chart = base.mark_bar() + base.mark_text(align='left', dx=2)
116
+ st.altair_chart(chart, use_container_width=True)
117
 
118
 
119
  def main():
120
+ # st.cache_resource.clear()
121
  st.set_page_config(layout="wide", page_title="NLP IT Service Classification", page_icon="πŸ€–",)
122
  st.markdown('<b>πŸ€– Welcome to IT Service Classification Assistant!!! πŸ€–</b>', unsafe_allow_html=True)
123
  st.write(f'\n')
 
125
 
126
  with st.sidebar:
127
  st.image('front_page_image.jpg' , use_column_width=True)
128
+ text_to_speech_model = st.selectbox("Pick select a speech to text model", ["openai/whisper-base", "openai/whisper-large-v3"])
129
  options = st.selectbox("Pick select an input method", ["Start a recording", "Upload an audio", "Enter a transcript"])
130
  if options == "Start a recording":
131
  audio = mic_recorder(key='my_recorder', callback=callback)
 
158
 
159
 
160
  if __name__ == '__main__':
161
+ main()