Spaces:

VicGerardoPR
/

AmazonSenitmentReview

Running

File size: 3,988 Bytes

import streamlit as st
from transformers import pipeline
import pandas as pd
import plotly.express as px
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import re
import gc

st.set_page_config(page_title="Análisis de Reseñas de Amazon", page_icon="📊", layout="wide")

@st.cache_resource
def load_sentiment_analyzer():
    return pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")

@st.cache_resource
def download_nltk_resources():
    nltk.download('punkt')
    nltk.download('stopwords')
    nltk.download('wordnet')

@st.cache_data
def load_data():
    try:
        return pd.read_csv('processed_reviews.csv')
    except Exception as e:
        st.error(f"Error cargando datos: {str(e)}")
        return None

def analyze_sentiment(text):
    analyzer = load_sentiment_analyzer()
    result = analyzer(text)[0]
    rating = int(result['label'].split()[0])
    if rating >= 4:
        sentiment = "POSITIVO"
        color = "green"
    elif rating == 3:
        sentiment = "NEUTRAL"
        color = "blue"
    else:
        sentiment = "NEGATIVO"
        color = "red"
    return sentiment, result['score'], color

def main():
    st.title("📊 Análisis de Reseñas de Amazon")
    
    # Sección de Análisis de Texto en Tiempo Real
    st.header("🔍 Análisis de Sentimiento en Tiempo Real")
    user_text = st.text_area("Escribe o pega un texto para analizar:", height=150)
    
    if user_text:
        with st.spinner('Analizando sentimiento...'):
            sentiment, confidence, color = analyze_sentiment(user_text)
            
            col1, col2 = st.columns(2)
            col1.metric("Sentimiento", sentiment)
            col2.metric("Confianza", f"{confidence:.2%}")
            
            st.markdown(f"<h3 style='color: {color};'>Resultado: {sentiment}</h3>", unsafe_allow_html=True)
    
    # Separador
    st.markdown("---")
    
    # Dashboard Original
    download_nltk_resources()
    df = load_data()
    
    if df is not None:
        categories = ['Todas'] + sorted(df['product_category'].unique().tolist())
        selected_category = st.sidebar.selectbox("Categoría de Producto", categories)
        
        df_filtered = df[df['product_category'] == selected_category] if selected_category != 'Todas' else df
        
        col1, col2 = st.columns(2)
        
        with col1:
            sentiment_counts = df_filtered['sentiment'].value_counts()
            fig_sentiment = px.pie(
                values=sentiment_counts.values,
                names=sentiment_counts.index,
                title="Distribución de Sentimientos"
            )
            st.plotly_chart(fig_sentiment, use_container_width=True)
        
        with col2:
            rating_counts = df_filtered['star_rating'].value_counts().sort_index()
            fig_ratings = px.bar(
                x=rating_counts.index,
                y=rating_counts.values,
                title="Distribución de Ratings",
                labels={'x': 'Rating', 'y': 'Cantidad'}
            )
            st.plotly_chart(fig_ratings, use_container_width=True)
        
        col1, col2, col3 = st.columns(3)
        col1.metric("Total de Reseñas", f"{len(df_filtered):,}")
        col2.metric("Rating Promedio", f"{df_filtered['star_rating'].mean():.2f}⭐")
        col3.metric("% Positivas", f"{(df_filtered['sentiment'] == 'positive').mean():.1%}")
        
        if selected_category == 'Todas':
            st.header("📊 Análisis por Categoría")
            category_stats = df.groupby('product_category').agg({
                'star_rating': 'mean',
                'sentiment': lambda x: (x == 'positive').mean()
            }).round(3)
            category_stats.columns = ['Rating Promedio', '% Positivas']
            st.dataframe(category_stats)

if __name__ == "__main__":
    try:
        main()
    finally:
        gc.collect()