|
import streamlit as st |
|
from transformers import pipeline |
|
import pandas as pd |
|
import plotly.express as px |
|
import nltk |
|
from nltk.tokenize import word_tokenize |
|
from nltk.corpus import stopwords |
|
from nltk.stem import WordNetLemmatizer |
|
import re |
|
import gc |
|
|
|
st.set_page_config(page_title="Análisis de Reseñas de Amazon", page_icon="📊", layout="wide") |
|
|
|
@st.cache_resource |
|
def load_sentiment_analyzer(): |
|
return pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment") |
|
|
|
@st.cache_resource |
|
def download_nltk_resources(): |
|
nltk.download('punkt') |
|
nltk.download('stopwords') |
|
nltk.download('wordnet') |
|
|
|
@st.cache_data |
|
def load_data(): |
|
try: |
|
return pd.read_csv('processed_reviews.csv') |
|
except Exception as e: |
|
st.error(f"Error cargando datos: {str(e)}") |
|
return None |
|
|
|
def analyze_sentiment(text): |
|
analyzer = load_sentiment_analyzer() |
|
result = analyzer(text)[0] |
|
rating = int(result['label'].split()[0]) |
|
if rating >= 4: |
|
sentiment = "POSITIVO" |
|
color = "green" |
|
elif rating == 3: |
|
sentiment = "NEUTRAL" |
|
color = "blue" |
|
else: |
|
sentiment = "NEGATIVO" |
|
color = "red" |
|
return sentiment, result['score'], color |
|
|
|
def main(): |
|
st.title("📊 Análisis de Reseñas de Amazon") |
|
|
|
|
|
st.header("🔍 Análisis de Sentimiento en Tiempo Real") |
|
user_text = st.text_area("Escribe o pega un texto para analizar:", height=150) |
|
|
|
if user_text: |
|
with st.spinner('Analizando sentimiento...'): |
|
sentiment, confidence, color = analyze_sentiment(user_text) |
|
|
|
col1, col2 = st.columns(2) |
|
col1.metric("Sentimiento", sentiment) |
|
col2.metric("Confianza", f"{confidence:.2%}") |
|
|
|
st.markdown(f"<h3 style='color: {color};'>Resultado: {sentiment}</h3>", unsafe_allow_html=True) |
|
|
|
|
|
st.markdown("---") |
|
|
|
|
|
download_nltk_resources() |
|
df = load_data() |
|
|
|
if df is not None: |
|
categories = ['Todas'] + sorted(df['product_category'].unique().tolist()) |
|
selected_category = st.sidebar.selectbox("Categoría de Producto", categories) |
|
|
|
df_filtered = df[df['product_category'] == selected_category] if selected_category != 'Todas' else df |
|
|
|
col1, col2 = st.columns(2) |
|
|
|
with col1: |
|
sentiment_counts = df_filtered['sentiment'].value_counts() |
|
fig_sentiment = px.pie( |
|
values=sentiment_counts.values, |
|
names=sentiment_counts.index, |
|
title="Distribución de Sentimientos" |
|
) |
|
st.plotly_chart(fig_sentiment, use_container_width=True) |
|
|
|
with col2: |
|
rating_counts = df_filtered['star_rating'].value_counts().sort_index() |
|
fig_ratings = px.bar( |
|
x=rating_counts.index, |
|
y=rating_counts.values, |
|
title="Distribución de Ratings", |
|
labels={'x': 'Rating', 'y': 'Cantidad'} |
|
) |
|
st.plotly_chart(fig_ratings, use_container_width=True) |
|
|
|
col1, col2, col3 = st.columns(3) |
|
col1.metric("Total de Reseñas", f"{len(df_filtered):,}") |
|
col2.metric("Rating Promedio", f"{df_filtered['star_rating'].mean():.2f}⭐") |
|
col3.metric("% Positivas", f"{(df_filtered['sentiment'] == 'positive').mean():.1%}") |
|
|
|
if selected_category == 'Todas': |
|
st.header("📊 Análisis por Categoría") |
|
category_stats = df.groupby('product_category').agg({ |
|
'star_rating': 'mean', |
|
'sentiment': lambda x: (x == 'positive').mean() |
|
}).round(3) |
|
category_stats.columns = ['Rating Promedio', '% Positivas'] |
|
st.dataframe(category_stats) |
|
|
|
if __name__ == "__main__": |
|
try: |
|
main() |
|
finally: |
|
gc.collect() |