File size: 3,988 Bytes
8c0ffcd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8bb7520
8c0ffcd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6639e01
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import streamlit as st
from transformers import pipeline
import pandas as pd
import plotly.express as px
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import re
import gc

st.set_page_config(page_title="Análisis de Reseñas de Amazon", page_icon="📊", layout="wide")

@st.cache_resource
def load_sentiment_analyzer():
    return pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")

@st.cache_resource
def download_nltk_resources():
    nltk.download('punkt')
    nltk.download('stopwords')
    nltk.download('wordnet')

@st.cache_data
def load_data():
    try:
        return pd.read_csv('processed_reviews.csv')
    except Exception as e:
        st.error(f"Error cargando datos: {str(e)}")
        return None

def analyze_sentiment(text):
    analyzer = load_sentiment_analyzer()
    result = analyzer(text)[0]
    rating = int(result['label'].split()[0])
    if rating >= 4:
        sentiment = "POSITIVO"
        color = "green"
    elif rating == 3:
        sentiment = "NEUTRAL"
        color = "blue"
    else:
        sentiment = "NEGATIVO"
        color = "red"
    return sentiment, result['score'], color

def main():
    st.title("📊 Análisis de Reseñas de Amazon")
    
    # Sección de Análisis de Texto en Tiempo Real
    st.header("🔍 Análisis de Sentimiento en Tiempo Real")
    user_text = st.text_area("Escribe o pega un texto para analizar:", height=150)
    
    if user_text:
        with st.spinner('Analizando sentimiento...'):
            sentiment, confidence, color = analyze_sentiment(user_text)
            
            col1, col2 = st.columns(2)
            col1.metric("Sentimiento", sentiment)
            col2.metric("Confianza", f"{confidence:.2%}")
            
            st.markdown(f"<h3 style='color: {color};'>Resultado: {sentiment}</h3>", unsafe_allow_html=True)
    
    # Separador
    st.markdown("---")
    
    # Dashboard Original
    download_nltk_resources()
    df = load_data()
    
    if df is not None:
        categories = ['Todas'] + sorted(df['product_category'].unique().tolist())
        selected_category = st.sidebar.selectbox("Categoría de Producto", categories)
        
        df_filtered = df[df['product_category'] == selected_category] if selected_category != 'Todas' else df
        
        col1, col2 = st.columns(2)
        
        with col1:
            sentiment_counts = df_filtered['sentiment'].value_counts()
            fig_sentiment = px.pie(
                values=sentiment_counts.values,
                names=sentiment_counts.index,
                title="Distribución de Sentimientos"
            )
            st.plotly_chart(fig_sentiment, use_container_width=True)
        
        with col2:
            rating_counts = df_filtered['star_rating'].value_counts().sort_index()
            fig_ratings = px.bar(
                x=rating_counts.index,
                y=rating_counts.values,
                title="Distribución de Ratings",
                labels={'x': 'Rating', 'y': 'Cantidad'}
            )
            st.plotly_chart(fig_ratings, use_container_width=True)
        
        col1, col2, col3 = st.columns(3)
        col1.metric("Total de Reseñas", f"{len(df_filtered):,}")
        col2.metric("Rating Promedio", f"{df_filtered['star_rating'].mean():.2f}⭐")
        col3.metric("% Positivas", f"{(df_filtered['sentiment'] == 'positive').mean():.1%}")
        
        if selected_category == 'Todas':
            st.header("📊 Análisis por Categoría")
            category_stats = df.groupby('product_category').agg({
                'star_rating': 'mean',
                'sentiment': lambda x: (x == 'positive').mean()
            }).round(3)
            category_stats.columns = ['Rating Promedio', '% Positivas']
            st.dataframe(category_stats)

if __name__ == "__main__":
    try:
        main()
    finally:
        gc.collect()