Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
from transformers import pipeline | |
import re | |
# Model configurations | |
MODELS = { | |
"English": "MarieAngeA13/Sentiment-Analysis-BERT", | |
"Danish": "larskjeldgaard/senda" | |
} | |
# Page config | |
st.set_page_config( | |
page_title="Multi-language Sentiment Analyzer", | |
page_icon="π", | |
layout="wide" | |
) | |
# Load custom CSS | |
with open('style.css') as f: | |
st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True) | |
def process_sentiment(text, pipeline): | |
"""Process sentiment for a single text entry""" | |
try: | |
result = pipeline(str(text)) | |
# Convert sentiment to lowercase | |
return result[0]['label'].lower(), result[0]['score'] | |
except Exception as e: | |
st.warning(f"Error processing text: {text[:50]}... Error: {str(e)}") | |
return "unknown", 0.0 | |
# App layout | |
col1, col2, col3 = st.columns([1, 2, 1]) | |
with col2: | |
st.title("π Multi-language Sentiment Analysis") | |
selected_language = st.selectbox( | |
"Select Language", | |
options=list(MODELS.keys()), | |
index=0 | |
) | |
st.markdown(""" | |
<div class="privacy-notice"> | |
β οΈ <b>Privacy Notice:</b> Your data is processed in memory and not stored. | |
</div> | |
""", unsafe_allow_html=True) | |
uploaded_file = st.file_uploader("Upload a CSV file with text", type=["csv"]) | |
if uploaded_file: | |
try: | |
df = pd.read_csv(uploaded_file) | |
if "text" not in df.columns: | |
st.error("CSV must contain a 'text' column") | |
else: | |
with st.spinner(f"π Analyzing sentiments in {selected_language}..."): | |
def clean_transcript_text(text): | |
speaker_timestamp_pattern = r'Speaker: Speaker [A-Z], Start Time: \d+\.\d+ - End Time: \d+\.\d+' | |
timestamp_pattern = r'Start Time: \d+\.\d+ - End Time: \d+\.\d+' | |
cleaned_text = re.sub(speaker_timestamp_pattern, '', text) | |
if cleaned_text == text: | |
cleaned_text = re.sub(timestamp_pattern, '', text) | |
cleaned_text = re.sub(r'\s+', ' ', cleaned_text) | |
return cleaned_text.strip() | |
df['cleaned_text'] = df['text'].apply(clean_transcript_text) | |
sentiment_pipeline = pipeline( | |
"text-classification", | |
model=MODELS[selected_language], | |
truncation=True, | |
max_length=512 | |
) | |
results = [process_sentiment(text, sentiment_pipeline) for text in df["cleaned_text"]] | |
df["sentiment"] = [r[0] for r in results] | |
df["confidence"] = [r[1] for r in results] | |
st.markdown("### π Analysis Results") | |
# Fix the sentiment counting logic | |
if selected_language == 'English': | |
pos_count = len(df[df["sentiment"] == "positive"]) | |
neu_count = len(df[df["sentiment"] == "neutral"]) | |
neg_count = len(df[df["sentiment"] == "negative"]) | |
else: # Danish | |
pos_count = len(df[df["sentiment"] == "positiv"]) | |
neu_count = len(df[df["sentiment"] == "neutral"]) | |
neg_count = len(df[df["sentiment"] == "negativ"]) | |
metric_col1, metric_col2, metric_col3, metric_col4 = st.columns(4) | |
with metric_col1: | |
st.metric( | |
"Positive Sentiments", | |
f"{pos_count} ({pos_count/len(df)*100:.1f}%)" | |
) | |
with metric_col2: | |
st.metric( | |
"Neutral Sentiments", | |
f"{neu_count} ({neu_count/len(df)*100:.1f}%)" | |
) | |
with metric_col3: | |
st.metric( | |
"Negative Sentiments", | |
f"{neg_count} ({neg_count/len(df)*100:.1f}%)" | |
) | |
with metric_col4: | |
st.metric( | |
"Average Confidence", | |
f"{df['confidence'].mean():.1%}" | |
) | |
st.markdown("#### Preview") | |
preview_df = df[["cleaned_text", "sentiment", "confidence"]].head() | |
preview_df["confidence"] = preview_df["confidence"].apply(lambda x: f"{x:.1%}") | |
def highlight_sentiment(val): | |
if val in ["positive", "positiv"]: | |
return 'background-color: rgba(0, 255, 0, 0.2)' | |
elif val in ["negative", "negativ"]: | |
return 'background-color: rgba(255, 0, 0, 0.2)' | |
elif val == "neutral": | |
return 'background-color: rgba(128, 128, 128, 0.2)' | |
return '' | |
st.dataframe( | |
preview_df.style.applymap(highlight_sentiment, subset=['sentiment']), | |
use_container_width=True | |
) | |
st.markdown("### πΎ Download Results") | |
csv_data = df.to_csv(index=False) | |
st.download_button( | |
label="Download Complete Analysis", | |
data=csv_data, | |
file_name=f"sentiment_results_{selected_language.lower()}.csv", | |
mime="text/csv" | |
) | |
except Exception as e: | |
st.error(f"Error processing file: {str(e)}") | |
st.error("Full error details:") | |
st.code(str(e)) | |
else: | |
st.markdown(""" | |
<div class="instructions"> | |
<h4>π How to use:</h4> | |
<ol> | |
<li>Select your desired language</li> | |
<li>Prepare a CSV file with a column named "text"</li> | |
<li>Upload your file using the button above</li> | |
<li>Wait for the analysis to complete</li> | |
<li>Download the results with sentiment labels</li> | |
</ol> | |
</div> | |
""", unsafe_allow_html=True) |