|
import streamlit as st |
|
import re |
|
import pandas as pd |
|
from transformers import pipeline |
|
from gensim.models import LdaModel |
|
from gensim.corpora import Dictionary |
|
|
|
|
|
def text_preprocess(teks): |
|
teks = teks.lower() |
|
teks = re.sub("@[A-Za-z0-9_]+", " ", teks) |
|
teks = re.sub("#[A-Za-z0-9_]+", " ", teks) |
|
teks = re.sub(r"\\n", " ", teks) |
|
teks = teks.strip() |
|
teks = re.sub(r"http\S+", " ", teks) |
|
teks = re.sub(r"www.\S+", " ", teks) |
|
teks = re.sub("[^A-Za-z\s']", " ", teks) |
|
return teks |
|
|
|
|
|
def get_highest_probability_topic(lda_model, dictionary, new_document, topic_names): |
|
new_bow = dictionary.doc2bow(new_document.split()) |
|
topic_distribution = lda_model.get_document_topics(new_bow, minimum_probability=0) |
|
|
|
highest_probability_topic = max(topic_distribution, key=lambda x: x[1]) |
|
topic_id, probability = highest_probability_topic |
|
topic_name = topic_names.get(topic_id, f"Topic {topic_id}") |
|
|
|
return topic_name, probability |
|
|
|
|
|
pretrained_name = "w11wo/indonesian-roberta-base-sentiment-classifier" |
|
nlp = pipeline("sentiment-analysis", model=pretrained_name, tokenizer=pretrained_name) |
|
|
|
|
|
def main(): |
|
st.title("Sentiment Analysis and Topic Inference App") |
|
st.write("Enter your text below:") |
|
input_text = st.text_area("Input Text") |
|
|
|
if st.button("Analyze Sentiment"): |
|
processed_text = text_preprocess(input_text) |
|
result = nlp(processed_text) |
|
sentiment = result[0]['label'] |
|
probability = result[0]['score'] |
|
st.write("Sentiment:", sentiment) |
|
st.write("Probability:", probability) |
|
|
|
if st.button("Infer Topic"): |
|
lda_model = LdaModel.load("lda.model") |
|
dictionary = Dictionary.load("dictionary.dict") |
|
topic_names = {0: 'Kurang Memuaskan', |
|
1: 'Aplikasi Lambat', |
|
2: 'Aplikasi Error', |
|
3: 'Sulit Sinkronisasi', |
|
4: 'Tidak Bisa Login', |
|
5: 'Aplikasi Sulit Dibuka', |
|
6: 'Aplikasi Keseringan Update', |
|
7: 'Neutral', |
|
8: 'Aplikasi Bug', |
|
9: 'Pelayanan Buruk', |
|
10: 'Aplikasi Tidak Bisa Digunakan', |
|
11: 'Aplikasi Belum Update', |
|
12: 'Aplikasi Bug/Lag', |
|
13: 'Sulit Komplain', |
|
14: 'Gangguan Server', |
|
15: 'Tidak Bisa Update', |
|
16: 'Tidak Bisa Download', |
|
17: 'Jaringan Bermasalah', |
|
18: 'Transaksi Lambat', |
|
19: 'Tidak Bisa Buka Aplikasi', |
|
20: 'Terlalu Banyak Iklan', |
|
21: 'Verifikasi Wajah Gagal', |
|
22: 'Pengajuan Pinjaman', |
|
23: 'Sms Kode Otp Tidak Masuk', |
|
24: 'Sulit Pengajuan Pinjaman', |
|
25: 'Tidak Bisa Transaksi / Lambat', |
|
26: 'Sulit Daftar', |
|
27: 'Sulit Transfer', |
|
28: 'Banyak Potongan', |
|
29: 'Tidak Bisa Cek Mutasi / Mutasi Hilang', |
|
30: 'Proses Kta Lama', |
|
31: 'Aplikasi Tidak Real Time', |
|
32: 'Kesulitan Pengajuan Kartu Kredit', |
|
33: 'Mesin Atm Error', |
|
} |
|
|
|
inferred_topic, inferred_probability = get_highest_probability_topic(lda_model, dictionary, input_text, topic_names) |
|
st.write("Inferred Topic:", inferred_topic) |
|
st.write("Inference Probability:", inferred_probability) |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|