Spaces:

muhfrrazi
/

app_stream_sentiment_indonesia

Build error

App Files Files Community

muhfrrazi commited on Jun 5, 2023

Commit

8546e4d

1 Parent(s): 5fa282c

Upload 2 files

Browse files

Files changed (2) hide show

app.py +79 -0
requirement.txt +6 -0

app.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import streamlit as st
+import tensorflow as tf
+import numpy as np
+import transformers
+from transformers import AutoTokenizer,TFBertForSequenceClassification
+import re
+import string
+import preprocessor as p
+from tensorflow import keras
+# Load tokenizer
+tokenizer = AutoTokenizer.from_pretrained("indolem/indobert-base-uncased")
+# Define the maximum sequence length
+max_seq = 110
+# Function to preprocess the data
+def preprocess_data(data):
+    data = data.tolist()  # Convert numpy array to list
+    processed_data = []
+    for sentence in data:
+        sentence = text_preprocess(sentence)
+        encoded_data = tokenizer.encode_plus(
+            sentence,
+            add_special_tokens=True,
+            max_length=max_seq,
+            padding="max_length",
+            truncation=True,
+            return_tensors="tf"
+        )
+        processed_data.append((encoded_data['input_ids'], encoded_data['attention_mask']))
+    return processed_data
+# Function to preprocess the sentence
+def text_preprocess(sentence):
+    pattern = r'[0-9]'
+    for punctuation in string.punctuation:
+        sentence = p.clean(sentence)
+        sentence = re.sub(r'[^a-zA-Z0-9\s]', '', sentence)
+        sentence = re.sub(r'http[s]?://\S+', '', sentence)
+        sentence = sentence.replace(punctuation, '')
+        sentence = re.sub(pattern, '', sentence)
+        sentence = re.sub(r'\r?\n|\r', '', sentence)
+        sentence = sentence.encode('ascii', 'ignore').decode('ascii')
+        sentence = sentence.lower()
+    return sentence
+# Function to perform sentiment prediction
+def predict_sentiment(sentence):
+    preprocessed_sentence = preprocess_data(np.array([sentence]))
+    input_ids, attention_mask = preprocessed_sentence[0]
+    prediction = model.predict([input_ids, attention_mask])
+    predicted_label = np.argmax(prediction)
+    label_mapping = {0: "negative", 1: "neutral", 2: "positive"}
+    predicted_label = label_mapping[predicted_label]
+    return predicted_label
+# Streamlit app
+def main():
+    st.title("Analisis Sentimen Berbahasa Indonesia")
+    sentence = st.text_input("Masukkan teks disini:")
+    if st.button("Cek Kalimat"):
+        st.write("Hasil Klasifikasi:")
+        sentiment = predict_sentiment(sentence)
+        if sentiment == "positive":
+            st.markdown('<div style="background-color: green; padding: 10px; color:white;">Sentiment: positive</div>', unsafe_allow_html=True)
+        elif sentiment == "negative":
+            st.markdown('<div style="background-color: #FE4365; padding: 10px; color:white;">Sentiment: negative</div>', unsafe_allow_html=True)
+        elif sentiment == "neutral":
+            st.markdown('<div style="background-color: #FDFD96; padding: 10px; color: black;">Sentiment: neutral</div>', unsafe_allow_html=True)
+if __name__ == '__main__':
+    # Register the custom objects using custom_object_scope
+    with keras.utils.custom_object_scope({'TFBertForSequenceClassification': transformers.TFBertForSequenceClassification}):
+        # Load the saved model
+        model = TFBertForSequenceClassification.from_pretrained('muhfrrazi/IndoBERT-Sentiment-Analysist_Dataset-Indonesia')
+        main()

requirement.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+streamlit==0.87.0
+tensorflow==2.5.0
+numpy==1.21.0
+transformers==4.9.0
+preprocessor==1.1.3
+tensorflow.keras==2.5.0