import streamlit as st import pandas as pd from transformers import pipeline import re # Model configurations MODELS = { "English": "MarieAngeA13/Sentiment-Analysis-BERT", "Danish": "larskjeldgaard/senda" } # Page config st.set_page_config( page_title="Multi-language Sentiment Analyzer", page_icon="🎭", layout="wide" ) # Load custom CSS with open('style.css') as f: st.markdown(f'', unsafe_allow_html=True) def process_sentiment(text, pipeline): """Process sentiment for a single text entry""" try: result = pipeline(str(text)) # Convert sentiment to lowercase return result[0]['label'].lower(), result[0]['score'] except Exception as e: st.warning(f"Error processing text: {text[:50]}... Error: {str(e)}") return "unknown", 0.0 # App layout col1, col2, col3 = st.columns([1, 2, 1]) with col2: st.title("🎭 Multi-language Sentiment Analysis") selected_language = st.selectbox( "Select Language", options=list(MODELS.keys()), index=0 ) st.markdown("""
⚠️ Privacy Notice: Your data is processed in memory and not stored.
""", unsafe_allow_html=True) uploaded_file = st.file_uploader("Upload a CSV file with text", type=["csv"]) if uploaded_file: try: df = pd.read_csv(uploaded_file) if "text" not in df.columns: st.error("CSV must contain a 'text' column") else: with st.spinner(f"📊 Analyzing sentiments in {selected_language}..."): def clean_transcript_text(text): speaker_timestamp_pattern = r'Speaker: Speaker [A-Z], Start Time: \d+\.\d+ - End Time: \d+\.\d+' timestamp_pattern = r'Start Time: \d+\.\d+ - End Time: \d+\.\d+' cleaned_text = re.sub(speaker_timestamp_pattern, '', text) if cleaned_text == text: cleaned_text = re.sub(timestamp_pattern, '', text) cleaned_text = re.sub(r'\s+', ' ', cleaned_text) return cleaned_text.strip() df['cleaned_text'] = df['text'].apply(clean_transcript_text) sentiment_pipeline = pipeline( "text-classification", model=MODELS[selected_language], truncation=True, max_length=512 ) results = [process_sentiment(text, sentiment_pipeline) for text in df["cleaned_text"]] df["sentiment"] = [r[0] for r in results] df["confidence"] = [r[1] for r in results] st.markdown("### 📈 Analysis Results") # Fix the sentiment counting logic if selected_language == 'English': pos_count = len(df[df["sentiment"] == "positive"]) neu_count = len(df[df["sentiment"] == "neutral"]) neg_count = len(df[df["sentiment"] == "negative"]) else: # Danish pos_count = len(df[df["sentiment"] == "positiv"]) neu_count = len(df[df["sentiment"] == "neutral"]) neg_count = len(df[df["sentiment"] == "negativ"]) metric_col1, metric_col2, metric_col3, metric_col4 = st.columns(4) with metric_col1: st.metric( "Positive Sentiments", f"{pos_count} ({pos_count/len(df)*100:.1f}%)" ) with metric_col2: st.metric( "Neutral Sentiments", f"{neu_count} ({neu_count/len(df)*100:.1f}%)" ) with metric_col3: st.metric( "Negative Sentiments", f"{neg_count} ({neg_count/len(df)*100:.1f}%)" ) with metric_col4: st.metric( "Average Confidence", f"{df['confidence'].mean():.1%}" ) st.markdown("#### Preview") preview_df = df[["cleaned_text", "sentiment", "confidence"]].head() preview_df["confidence"] = preview_df["confidence"].apply(lambda x: f"{x:.1%}") def highlight_sentiment(val): if val in ["positive", "positiv"]: return 'background-color: rgba(0, 255, 0, 0.2)' elif val in ["negative", "negativ"]: return 'background-color: rgba(255, 0, 0, 0.2)' elif val == "neutral": return 'background-color: rgba(128, 128, 128, 0.2)' return '' st.dataframe( preview_df.style.applymap(highlight_sentiment, subset=['sentiment']), use_container_width=True ) st.markdown("### 💾 Download Results") csv_data = df.to_csv(index=False) st.download_button( label="Download Complete Analysis", data=csv_data, file_name=f"sentiment_results_{selected_language.lower()}.csv", mime="text/csv" ) except Exception as e: st.error(f"Error processing file: {str(e)}") st.error("Full error details:") st.code(str(e)) else: st.markdown("""

📝 How to use:

  1. Select your desired language
  2. Prepare a CSV file with a column named "text"
  3. Upload your file using the button above
  4. Wait for the analysis to complete
  5. Download the results with sentiment labels
""", unsafe_allow_html=True)