muhfrrazi commited on
Commit
8546e4d
·
1 Parent(s): 5fa282c

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +79 -0
  2. requirement.txt +6 -0
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import tensorflow as tf
3
+ import numpy as np
4
+ import transformers
5
+ from transformers import AutoTokenizer,TFBertForSequenceClassification
6
+ import re
7
+ import string
8
+ import preprocessor as p
9
+ from tensorflow import keras
10
+
11
+ # Load tokenizer
12
+ tokenizer = AutoTokenizer.from_pretrained("indolem/indobert-base-uncased")
13
+
14
+ # Define the maximum sequence length
15
+ max_seq = 110
16
+
17
+ # Function to preprocess the data
18
+ def preprocess_data(data):
19
+ data = data.tolist() # Convert numpy array to list
20
+ processed_data = []
21
+ for sentence in data:
22
+ sentence = text_preprocess(sentence)
23
+ encoded_data = tokenizer.encode_plus(
24
+ sentence,
25
+ add_special_tokens=True,
26
+ max_length=max_seq,
27
+ padding="max_length",
28
+ truncation=True,
29
+ return_tensors="tf"
30
+ )
31
+ processed_data.append((encoded_data['input_ids'], encoded_data['attention_mask']))
32
+ return processed_data
33
+
34
+ # Function to preprocess the sentence
35
+ def text_preprocess(sentence):
36
+ pattern = r'[0-9]'
37
+ for punctuation in string.punctuation:
38
+ sentence = p.clean(sentence)
39
+ sentence = re.sub(r'[^a-zA-Z0-9\s]', '', sentence)
40
+ sentence = re.sub(r'http[s]?://\S+', '', sentence)
41
+ sentence = sentence.replace(punctuation, '')
42
+ sentence = re.sub(pattern, '', sentence)
43
+ sentence = re.sub(r'\r?\n|\r', '', sentence)
44
+ sentence = sentence.encode('ascii', 'ignore').decode('ascii')
45
+ sentence = sentence.lower()
46
+ return sentence
47
+
48
+ # Function to perform sentiment prediction
49
+ def predict_sentiment(sentence):
50
+ preprocessed_sentence = preprocess_data(np.array([sentence]))
51
+ input_ids, attention_mask = preprocessed_sentence[0]
52
+ prediction = model.predict([input_ids, attention_mask])
53
+ predicted_label = np.argmax(prediction)
54
+ label_mapping = {0: "negative", 1: "neutral", 2: "positive"}
55
+ predicted_label = label_mapping[predicted_label]
56
+ return predicted_label
57
+
58
+ # Streamlit app
59
+ def main():
60
+ st.title("Analisis Sentimen Berbahasa Indonesia")
61
+ sentence = st.text_input("Masukkan teks disini:")
62
+ if st.button("Cek Kalimat"):
63
+ st.write("Hasil Klasifikasi:")
64
+ sentiment = predict_sentiment(sentence)
65
+ if sentiment == "positive":
66
+ st.markdown('<div style="background-color: green; padding: 10px; color:white;">Sentiment: positive</div>', unsafe_allow_html=True)
67
+ elif sentiment == "negative":
68
+ st.markdown('<div style="background-color: #FE4365; padding: 10px; color:white;">Sentiment: negative</div>', unsafe_allow_html=True)
69
+ elif sentiment == "neutral":
70
+ st.markdown('<div style="background-color: #FDFD96; padding: 10px; color: black;">Sentiment: neutral</div>', unsafe_allow_html=True)
71
+
72
+
73
+ if __name__ == '__main__':
74
+ # Register the custom objects using custom_object_scope
75
+ with keras.utils.custom_object_scope({'TFBertForSequenceClassification': transformers.TFBertForSequenceClassification}):
76
+ # Load the saved model
77
+ model = TFBertForSequenceClassification.from_pretrained('muhfrrazi/IndoBERT-Sentiment-Analysist_Dataset-Indonesia')
78
+
79
+ main()
requirement.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit==0.87.0
2
+ tensorflow==2.5.0
3
+ numpy==1.21.0
4
+ transformers==4.9.0
5
+ preprocessor==1.1.3
6
+ tensorflow.keras==2.5.0