Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,11 +2,12 @@ import streamlit as st
|
|
2 |
from streamlit_mic_recorder import mic_recorder
|
3 |
from transformers import pipeline
|
4 |
import torch
|
5 |
-
|
6 |
-
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
7 |
import numpy as np
|
8 |
import pandas as pd
|
9 |
import time
|
|
|
10 |
|
11 |
|
12 |
def callback():
|
@@ -16,12 +17,12 @@ def callback():
|
|
16 |
|
17 |
|
18 |
@st.cache_resource
|
19 |
-
def load_text_to_speech_model(model="openai/whisper-
|
20 |
pipe = pipeline("automatic-speech-recognition", model=model)
|
21 |
return pipe
|
22 |
|
23 |
|
24 |
-
def translate(inputs, model="openai/whisper-
|
25 |
pipe = pipeline("automatic-speech-recognition", model=model)
|
26 |
translate_result = pipe(inputs, generate_kwargs={'task': 'translate'})
|
27 |
return translate_result['text']
|
@@ -38,7 +39,7 @@ def translate(inputs, model="openai/whisper-medium"):
|
|
38 |
# return input_ids, attention_masks
|
39 |
|
40 |
|
41 |
-
# def
|
42 |
# CUSTOMMODEL_PATH = "./bert-itserviceclassification"
|
43 |
# PRETRAINED_LM = "bert-base-uncased"
|
44 |
# tokenizer = BertTokenizer.from_pretrained(PRETRAINED_LM, do_lower_case=True)
|
@@ -52,8 +53,11 @@ def translate(inputs, model="openai/whisper-medium"):
|
|
52 |
@st.cache_resource
|
53 |
def load_classification_model():
|
54 |
PRETRAINED_LM = "kkngan/bert-base-uncased-it-service-classification"
|
55 |
-
model = AutoModelForSequenceClassification.from_pretrained(PRETRAINED_LM, num_labels=8)
|
56 |
-
tokenizer = AutoTokenizer.from_pretrained(PRETRAINED_LM)
|
|
|
|
|
|
|
57 |
return model, tokenizer
|
58 |
|
59 |
|
@@ -86,21 +90,34 @@ def display_result(translate_text, prediction, predicted_class_id, probability):
|
|
86 |
st.write(f'\n')
|
87 |
st.write(f'\n')
|
88 |
|
89 |
-
st.markdown(f'<font color="green"><b>Predicted Class: (Probability: {(probability[0][predicted_class_id] * 100):.2f}%) </b></font>', unsafe_allow_html=True)
|
|
|
90 |
st.write(f'{prediction}')
|
91 |
|
92 |
# Convert probability to bar cart
|
93 |
st.write(f'\n')
|
94 |
st.write(f'\n')
|
95 |
|
|
|
96 |
category = ('Hardware', 'Access', 'Miscellaneous', 'HR Support', 'Purchase', 'Administrative rights', 'Storage', 'Internal Project')
|
97 |
probability = np.array(probability[0])
|
98 |
df = pd.DataFrame({'Category': category, 'Probability (%)': probability * 100})
|
99 |
df['Probability (%)'] = df['Probability (%)'].apply(lambda x: round(x, 2))
|
100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
|
102 |
|
103 |
def main():
|
|
|
104 |
st.set_page_config(layout="wide", page_title="NLP IT Service Classification", page_icon="π€",)
|
105 |
st.markdown('<b>π€ Welcome to IT Service Classification Assistant!!! π€</b>', unsafe_allow_html=True)
|
106 |
st.write(f'\n')
|
@@ -108,7 +125,7 @@ def main():
|
|
108 |
|
109 |
with st.sidebar:
|
110 |
st.image('front_page_image.jpg' , use_column_width=True)
|
111 |
-
text_to_speech_model = st.selectbox("Pick select a speech to text model", ["openai/whisper-base", "openai/whisper-
|
112 |
options = st.selectbox("Pick select an input method", ["Start a recording", "Upload an audio", "Enter a transcript"])
|
113 |
if options == "Start a recording":
|
114 |
audio = mic_recorder(key='my_recorder', callback=callback)
|
@@ -141,4 +158,4 @@ def main():
|
|
141 |
|
142 |
|
143 |
if __name__ == '__main__':
|
144 |
-
main()
|
|
|
2 |
from streamlit_mic_recorder import mic_recorder
|
3 |
from transformers import pipeline
|
4 |
import torch
|
5 |
+
from transformers import BertTokenizer, BertForSequenceClassification
|
6 |
+
# from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
7 |
import numpy as np
|
8 |
import pandas as pd
|
9 |
import time
|
10 |
+
import altair as alt
|
11 |
|
12 |
|
13 |
def callback():
|
|
|
17 |
|
18 |
|
19 |
@st.cache_resource
|
20 |
+
def load_text_to_speech_model(model="openai/whisper-base"):
|
21 |
pipe = pipeline("automatic-speech-recognition", model=model)
|
22 |
return pipe
|
23 |
|
24 |
|
25 |
+
def translate(inputs, model="openai/whisper-base"):
|
26 |
pipe = pipeline("automatic-speech-recognition", model=model)
|
27 |
translate_result = pipe(inputs, generate_kwargs={'task': 'translate'})
|
28 |
return translate_result['text']
|
|
|
39 |
# return input_ids, attention_masks
|
40 |
|
41 |
|
42 |
+
# def load_classification_model():
|
43 |
# CUSTOMMODEL_PATH = "./bert-itserviceclassification"
|
44 |
# PRETRAINED_LM = "bert-base-uncased"
|
45 |
# tokenizer = BertTokenizer.from_pretrained(PRETRAINED_LM, do_lower_case=True)
|
|
|
53 |
@st.cache_resource
|
54 |
def load_classification_model():
|
55 |
PRETRAINED_LM = "kkngan/bert-base-uncased-it-service-classification"
|
56 |
+
# model = AutoModelForSequenceClassification.from_pretrained(PRETRAINED_LM, num_labels=8)
|
57 |
+
# tokenizer = AutoTokenizer.from_pretrained(PRETRAINED_LM)
|
58 |
+
tokenizer = BertTokenizer.from_pretrained(PRETRAINED_LM, do_lower_case=True)
|
59 |
+
model = BertForSequenceClassification.from_pretrained(PRETRAINED_LM,
|
60 |
+
num_labels=8)
|
61 |
return model, tokenizer
|
62 |
|
63 |
|
|
|
90 |
st.write(f'\n')
|
91 |
st.write(f'\n')
|
92 |
|
93 |
+
# st.markdown(f'<font color="green"><b>Predicted Class: (Probability: {(probability[0][predicted_class_id] * 100):.2f}%) </b></font>', unsafe_allow_html=True)
|
94 |
+
st.markdown('<font color="green"><b>Predicted Class:</b></font>', unsafe_allow_html=True)
|
95 |
st.write(f'{prediction}')
|
96 |
|
97 |
# Convert probability to bar cart
|
98 |
st.write(f'\n')
|
99 |
st.write(f'\n')
|
100 |
|
101 |
+
# Show Probability of each Service Category
|
102 |
category = ('Hardware', 'Access', 'Miscellaneous', 'HR Support', 'Purchase', 'Administrative rights', 'Storage', 'Internal Project')
|
103 |
probability = np.array(probability[0])
|
104 |
df = pd.DataFrame({'Category': category, 'Probability (%)': probability * 100})
|
105 |
df['Probability (%)'] = df['Probability (%)'].apply(lambda x: round(x, 2))
|
106 |
+
|
107 |
+
base = alt.Chart(df).encode(
|
108 |
+
x='Probability (%)',
|
109 |
+
y=alt.Y('Category').sort('-x'),
|
110 |
+
|
111 |
+
# color='b:O',
|
112 |
+
tooltip=['Category',alt.Tooltip('Probability (%)', format=",.2f")],
|
113 |
+
text='Probability (%)'
|
114 |
+
).properties(title="Probability of each Service Category")
|
115 |
+
chart = base.mark_bar() + base.mark_text(align='left', dx=2)
|
116 |
+
st.altair_chart(chart, use_container_width=True)
|
117 |
|
118 |
|
119 |
def main():
|
120 |
+
# st.cache_resource.clear()
|
121 |
st.set_page_config(layout="wide", page_title="NLP IT Service Classification", page_icon="π€",)
|
122 |
st.markdown('<b>π€ Welcome to IT Service Classification Assistant!!! π€</b>', unsafe_allow_html=True)
|
123 |
st.write(f'\n')
|
|
|
125 |
|
126 |
with st.sidebar:
|
127 |
st.image('front_page_image.jpg' , use_column_width=True)
|
128 |
+
text_to_speech_model = st.selectbox("Pick select a speech to text model", ["openai/whisper-base", "openai/whisper-large-v3"])
|
129 |
options = st.selectbox("Pick select an input method", ["Start a recording", "Upload an audio", "Enter a transcript"])
|
130 |
if options == "Start a recording":
|
131 |
audio = mic_recorder(key='my_recorder', callback=callback)
|
|
|
158 |
|
159 |
|
160 |
if __name__ == '__main__':
|
161 |
+
main()
|