Spaces:

dhanikitkat
/

demo-topic-detection

Running

App Files Files Community

demo-topic-detection / app.py

dhanikitkat

Update app.py

6b5839d verified over 1 year ago

raw

history blame contribute delete

3.77 kB

	import streamlit as st
	import re
	import pandas as pd
	from transformers import pipeline
	from gensim.models import LdaModel
	from gensim.corpora import Dictionary

	# Function to preprocess text
	def text_preprocess(teks):
	teks = teks.lower()
	teks = re.sub("@[A-Za-z0-9_]+", " ", teks)
	teks = re.sub("#[A-Za-z0-9_]+", " ", teks)
	teks = re.sub(r"\\n", " ", teks)
	teks = teks.strip()
	teks = re.sub(r"http\S+", " ", teks)
	teks = re.sub(r"www.\S+", " ", teks)
	teks = re.sub("[^A-Za-z\s']", " ", teks)
	return teks

	# Function to perform inference and get the topic with the highest probability
	def get_highest_probability_topic(lda_model, dictionary, new_document, topic_names):
	new_bow = dictionary.doc2bow(new_document.split())
	topic_distribution = lda_model.get_document_topics(new_bow, minimum_probability=0)

	highest_probability_topic = max(topic_distribution, key=lambda x: x[1])
	topic_id, probability = highest_probability_topic
	topic_name = topic_names.get(topic_id, f"Topic {topic_id}")

	return topic_name, probability

	# Load sentiment analysis model
	pretrained_name = "w11wo/indonesian-roberta-base-sentiment-classifier"
	nlp = pipeline("sentiment-analysis", model=pretrained_name, tokenizer=pretrained_name)

	# Streamlit app
	def main():
	st.title("Sentiment Analysis and Topic Inference App")
	st.write("Enter your text below:")
	input_text = st.text_area("Input Text")

	if st.button("Analyze Sentiment"):
	processed_text = text_preprocess(input_text)
	result = nlp(processed_text)
	sentiment = result[0]['label']
	probability = result[0]['score']
	st.write("Sentiment:", sentiment)
	st.write("Probability:", probability)

	if st.button("Infer Topic"):
	lda_model = LdaModel.load("lda.model")
	dictionary = Dictionary.load("dictionary.dict")
	topic_names = {0: 'Kurang Memuaskan',
	1: 'Aplikasi Lambat',
	2: 'Aplikasi Error',
	3: 'Sulit Sinkronisasi',
	4: 'Tidak Bisa Login',
	5: 'Aplikasi Sulit Dibuka',
	6: 'Aplikasi Keseringan Update',
	7: 'Neutral',
	8: 'Aplikasi Bug',
	9: 'Pelayanan Buruk',
	10: 'Aplikasi Tidak Bisa Digunakan',
	11: 'Aplikasi Belum Update',
	12: 'Aplikasi Bug/Lag',
	13: 'Sulit Komplain',
	14: 'Gangguan Server',
	15: 'Tidak Bisa Update',
	16: 'Tidak Bisa Download',
	17: 'Jaringan Bermasalah',
	18: 'Transaksi Lambat',
	19: 'Tidak Bisa Buka Aplikasi',
	20: 'Terlalu Banyak Iklan',
	21: 'Verifikasi Wajah Gagal',
	22: 'Pengajuan Pinjaman',
	23: 'Sms Kode Otp Tidak Masuk',
	24: 'Sulit Pengajuan Pinjaman',
	25: 'Tidak Bisa Transaksi / Lambat',
	26: 'Sulit Daftar',
	27: 'Sulit Transfer',
	28: 'Banyak Potongan',
	29: 'Tidak Bisa Cek Mutasi / Mutasi Hilang',
	30: 'Proses Kta Lama',
	31: 'Aplikasi Tidak Real Time',
	32: 'Kesulitan Pengajuan Kartu Kredit',
	33: 'Mesin Atm Error',
	}

	inferred_topic, inferred_probability = get_highest_probability_topic(lda_model, dictionary, input_text, topic_names)
	st.write("Inferred Topic:", inferred_topic)
	st.write("Inference Probability:", inferred_probability)

	if __name__ == "__main__":
	main()