Apasalic's picture
Update app.py
4fd5693 verified
import streamlit as st
import pandas as pd
from transformers import pipeline
import re
# Model configurations
MODELS = {
"English": "MarieAngeA13/Sentiment-Analysis-BERT",
"Danish": "larskjeldgaard/senda"
}
# Page config
st.set_page_config(
page_title="Multi-language Sentiment Analyzer",
page_icon="🎭",
layout="wide"
)
# Load custom CSS
with open('style.css') as f:
st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
def process_sentiment(text, pipeline):
"""Process sentiment for a single text entry"""
try:
result = pipeline(str(text))
# Convert sentiment to lowercase
return result[0]['label'].lower(), result[0]['score']
except Exception as e:
st.warning(f"Error processing text: {text[:50]}... Error: {str(e)}")
return "unknown", 0.0
# App layout
col1, col2, col3 = st.columns([1, 2, 1])
with col2:
st.title("🎭 Multi-language Sentiment Analysis")
selected_language = st.selectbox(
"Select Language",
options=list(MODELS.keys()),
index=0
)
st.markdown("""
<div class="privacy-notice">
⚠️ <b>Privacy Notice:</b> Your data is processed in memory and not stored.
</div>
""", unsafe_allow_html=True)
uploaded_file = st.file_uploader("Upload a CSV file with text", type=["csv"])
if uploaded_file:
try:
df = pd.read_csv(uploaded_file)
if "text" not in df.columns:
st.error("CSV must contain a 'text' column")
else:
with st.spinner(f"πŸ“Š Analyzing sentiments in {selected_language}..."):
def clean_transcript_text(text):
speaker_timestamp_pattern = r'Speaker: Speaker [A-Z], Start Time: \d+\.\d+ - End Time: \d+\.\d+'
timestamp_pattern = r'Start Time: \d+\.\d+ - End Time: \d+\.\d+'
cleaned_text = re.sub(speaker_timestamp_pattern, '', text)
if cleaned_text == text:
cleaned_text = re.sub(timestamp_pattern, '', text)
cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
return cleaned_text.strip()
df['cleaned_text'] = df['text'].apply(clean_transcript_text)
sentiment_pipeline = pipeline(
"text-classification",
model=MODELS[selected_language],
truncation=True,
max_length=512
)
results = [process_sentiment(text, sentiment_pipeline) for text in df["cleaned_text"]]
df["sentiment"] = [r[0] for r in results]
df["confidence"] = [r[1] for r in results]
st.markdown("### πŸ“ˆ Analysis Results")
# Fix the sentiment counting logic
if selected_language == 'English':
pos_count = len(df[df["sentiment"] == "positive"])
neu_count = len(df[df["sentiment"] == "neutral"])
neg_count = len(df[df["sentiment"] == "negative"])
else: # Danish
pos_count = len(df[df["sentiment"] == "positiv"])
neu_count = len(df[df["sentiment"] == "neutral"])
neg_count = len(df[df["sentiment"] == "negativ"])
metric_col1, metric_col2, metric_col3, metric_col4 = st.columns(4)
with metric_col1:
st.metric(
"Positive Sentiments",
f"{pos_count} ({pos_count/len(df)*100:.1f}%)"
)
with metric_col2:
st.metric(
"Neutral Sentiments",
f"{neu_count} ({neu_count/len(df)*100:.1f}%)"
)
with metric_col3:
st.metric(
"Negative Sentiments",
f"{neg_count} ({neg_count/len(df)*100:.1f}%)"
)
with metric_col4:
st.metric(
"Average Confidence",
f"{df['confidence'].mean():.1%}"
)
st.markdown("#### Preview")
preview_df = df[["cleaned_text", "sentiment", "confidence"]].head()
preview_df["confidence"] = preview_df["confidence"].apply(lambda x: f"{x:.1%}")
def highlight_sentiment(val):
if val in ["positive", "positiv"]:
return 'background-color: rgba(0, 255, 0, 0.2)'
elif val in ["negative", "negativ"]:
return 'background-color: rgba(255, 0, 0, 0.2)'
elif val == "neutral":
return 'background-color: rgba(128, 128, 128, 0.2)'
return ''
st.dataframe(
preview_df.style.applymap(highlight_sentiment, subset=['sentiment']),
use_container_width=True
)
st.markdown("### πŸ’Ύ Download Results")
csv_data = df.to_csv(index=False)
st.download_button(
label="Download Complete Analysis",
data=csv_data,
file_name=f"sentiment_results_{selected_language.lower()}.csv",
mime="text/csv"
)
except Exception as e:
st.error(f"Error processing file: {str(e)}")
st.error("Full error details:")
st.code(str(e))
else:
st.markdown("""
<div class="instructions">
<h4>πŸ“ How to use:</h4>
<ol>
<li>Select your desired language</li>
<li>Prepare a CSV file with a column named "text"</li>
<li>Upload your file using the button above</li>
<li>Wait for the analysis to complete</li>
<li>Download the results with sentiment labels</li>
</ol>
</div>
""", unsafe_allow_html=True)