Spaces:

muhfrrazi
/

app_stream_sentiment_indonesia

Build error

App Files Files Community

app_stream_sentiment_indonesia / app.py

muhfrrazi

Upload 2 files

8546e4d over 1 year ago

raw

history blame

3.16 kB

	import streamlit as st
	import tensorflow as tf
	import numpy as np
	import transformers
	from transformers import AutoTokenizer,TFBertForSequenceClassification
	import re
	import string
	import preprocessor as p
	from tensorflow import keras

	# Load tokenizer
	tokenizer = AutoTokenizer.from_pretrained("indolem/indobert-base-uncased")

	# Define the maximum sequence length
	max_seq = 110

	# Function to preprocess the data
	def preprocess_data(data):
	data = data.tolist() # Convert numpy array to list
	processed_data = []
	for sentence in data:
	sentence = text_preprocess(sentence)
	encoded_data = tokenizer.encode_plus(
	sentence,
	add_special_tokens=True,
	max_length=max_seq,
	padding="max_length",
	truncation=True,
	return_tensors="tf"
	)
	processed_data.append((encoded_data['input_ids'], encoded_data['attention_mask']))
	return processed_data

	# Function to preprocess the sentence
	def text_preprocess(sentence):
	pattern = r'[0-9]'
	for punctuation in string.punctuation:
	sentence = p.clean(sentence)
	sentence = re.sub(r'[^a-zA-Z0-9\s]', '', sentence)
	sentence = re.sub(r'http[s]?://\S+', '', sentence)
	sentence = sentence.replace(punctuation, '')
	sentence = re.sub(pattern, '', sentence)
	sentence = re.sub(r'\r?\n\|\r', '', sentence)
	sentence = sentence.encode('ascii', 'ignore').decode('ascii')
	sentence = sentence.lower()
	return sentence

	# Function to perform sentiment prediction
	def predict_sentiment(sentence):
	preprocessed_sentence = preprocess_data(np.array([sentence]))
	input_ids, attention_mask = preprocessed_sentence[0]
	prediction = model.predict([input_ids, attention_mask])
	predicted_label = np.argmax(prediction)
	label_mapping = {0: "negative", 1: "neutral", 2: "positive"}
	predicted_label = label_mapping[predicted_label]
	return predicted_label

	# Streamlit app
	def main():
	st.title("Analisis Sentimen Berbahasa Indonesia")
	sentence = st.text_input("Masukkan teks disini:")
	if st.button("Cek Kalimat"):
	st.write("Hasil Klasifikasi:")
	sentiment = predict_sentiment(sentence)
	if sentiment == "positive":
	st.markdown('<div style="background-color: green; padding: 10px; color:white;">Sentiment: positive</div>', unsafe_allow_html=True)
	elif sentiment == "negative":
	st.markdown('<div style="background-color: #FE4365; padding: 10px; color:white;">Sentiment: negative</div>', unsafe_allow_html=True)
	elif sentiment == "neutral":
	st.markdown('<div style="background-color: #FDFD96; padding: 10px; color: black;">Sentiment: neutral</div>', unsafe_allow_html=True)


	if __name__ == '__main__':
	# Register the custom objects using custom_object_scope
	with keras.utils.custom_object_scope({'TFBertForSequenceClassification': transformers.TFBertForSequenceClassification}):
	# Load the saved model
	model = TFBertForSequenceClassification.from_pretrained('muhfrrazi/IndoBERT-Sentiment-Analysist_Dataset-Indonesia')

	main()