import streamlit as st from transformers import pipeline import pandas as pd import plotly.express as px import nltk from nltk.tokenize import word_tokenize from nltk.corpus import stopwords from nltk.stem import WordNetLemmatizer import re import gc st.set_page_config(page_title="Análisis de Reseñas de Amazon", page_icon="📊", layout="wide") @st.cache_resource def load_sentiment_analyzer(): return pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment") @st.cache_resource def download_nltk_resources(): nltk.download('punkt') nltk.download('stopwords') nltk.download('wordnet') @st.cache_data def load_data(): try: return pd.read_csv('processed_reviews.csv') except Exception as e: st.error(f"Error cargando datos: {str(e)}") return None def analyze_sentiment(text): analyzer = load_sentiment_analyzer() result = analyzer(text)[0] rating = int(result['label'].split()[0]) if rating >= 4: sentiment = "POSITIVO" color = "green" elif rating == 3: sentiment = "NEUTRAL" color = "blue" else: sentiment = "NEGATIVO" color = "red" return sentiment, result['score'], color def main(): st.title("📊 Análisis de Reseñas de Amazon") # Sección de Análisis de Texto en Tiempo Real st.header("🔍 Análisis de Sentimiento en Tiempo Real") user_text = st.text_area("Escribe o pega un texto para analizar:", height=150) if user_text: with st.spinner('Analizando sentimiento...'): sentiment, confidence, color = analyze_sentiment(user_text) col1, col2 = st.columns(2) col1.metric("Sentimiento", sentiment) col2.metric("Confianza", f"{confidence:.2%}") st.markdown(f"

Resultado: {sentiment}

", unsafe_allow_html=True) # Separador st.markdown("---") # Dashboard Original download_nltk_resources() df = load_data() if df is not None: categories = ['Todas'] + sorted(df['product_category'].unique().tolist()) selected_category = st.sidebar.selectbox("Categoría de Producto", categories) df_filtered = df[df['product_category'] == selected_category] if selected_category != 'Todas' else df col1, col2 = st.columns(2) with col1: sentiment_counts = df_filtered['sentiment'].value_counts() fig_sentiment = px.pie( values=sentiment_counts.values, names=sentiment_counts.index, title="Distribución de Sentimientos" ) st.plotly_chart(fig_sentiment, use_container_width=True) with col2: rating_counts = df_filtered['star_rating'].value_counts().sort_index() fig_ratings = px.bar( x=rating_counts.index, y=rating_counts.values, title="Distribución de Ratings", labels={'x': 'Rating', 'y': 'Cantidad'} ) st.plotly_chart(fig_ratings, use_container_width=True) col1, col2, col3 = st.columns(3) col1.metric("Total de Reseñas", f"{len(df_filtered):,}") col2.metric("Rating Promedio", f"{df_filtered['star_rating'].mean():.2f}⭐") col3.metric("% Positivas", f"{(df_filtered['sentiment'] == 'positive').mean():.1%}") if selected_category == 'Todas': st.header("📊 Análisis por Categoría") category_stats = df.groupby('product_category').agg({ 'star_rating': 'mean', 'sentiment': lambda x: (x == 'positive').mean() }).round(3) category_stats.columns = ['Rating Promedio', '% Positivas'] st.dataframe(category_stats) if __name__ == "__main__": try: main() finally: gc.collect()