|
|
|
|
|
import streamlit as st
|
|
import pandas as pd
|
|
import re
|
|
|
|
|
|
try:
|
|
import plotly.express as px
|
|
|
|
PLOTLY_AVAILABLE = True
|
|
except ImportError:
|
|
PLOTLY_AVAILABLE = False
|
|
st.sidebar.warning(
|
|
"Plotly not installed. Charts will be basic. Consider 'pip install plotly'."
|
|
)
|
|
|
|
|
|
try:
|
|
from predict import (
|
|
predict_sentiments,
|
|
)
|
|
from youtube import (
|
|
get_video_comments,
|
|
)
|
|
except ImportError as e:
|
|
st.error(
|
|
f"Failed to import necessary modules (predict.py, youtube.py). Ensure they are in the 'src' directory. Error: {e}"
|
|
)
|
|
|
|
st.stop()
|
|
|
|
|
|
def extract_video_id(url_or_id: str):
|
|
"""
|
|
Tries to get the YouTube video ID from different common URL types.
|
|
Also handles if the input is just the ID itself.
|
|
A bit of regex to find the ID part in common URLs.
|
|
"""
|
|
if not url_or_id:
|
|
return None
|
|
|
|
|
|
|
|
patterns = [
|
|
r"watch\?v=([a-zA-Z0-9_-]{11})",
|
|
r"youtu\.be/([a-zA-Z0-9_-]{11})",
|
|
r"embed/([a-zA-Z0-9_-]{11})",
|
|
r"shorts/([a-zA-Z0-9_-]{11})",
|
|
]
|
|
|
|
for pattern in patterns:
|
|
match = re.search(pattern, url_or_id)
|
|
if match:
|
|
return match.group(1)
|
|
|
|
|
|
|
|
if len(url_or_id) == 11 and not (
|
|
"/" in url_or_id or "?" in url_or_id or "=" in url_or_id or "." in url_or_id
|
|
):
|
|
return url_or_id
|
|
|
|
return None
|
|
|
|
|
|
def analyze_youtube_video(video_url_or_id: str):
|
|
"""
|
|
Main function for the YouTube analysis part.
|
|
It gets comments, then predicts their sentiments.
|
|
Then it summarizes the results.
|
|
"""
|
|
video_id = extract_video_id(video_url_or_id)
|
|
if not video_id:
|
|
|
|
st.error(
|
|
"Oops! That doesn't look like a valid YouTube URL or Video ID. Please check and try again. Example: Z9kGRMglw-I or youtu.be/3?v=Z9kGRMglw-I"
|
|
)
|
|
return None
|
|
|
|
summary_data = {}
|
|
|
|
|
|
try:
|
|
with st.spinner(f"Fetching comments & title for video ID: {video_id}..."):
|
|
video_data = get_video_comments(video_id)
|
|
comments_text_list = video_data.get("comments", [])
|
|
video_title = video_data.get("title", "Video Title Not Found")
|
|
print(
|
|
f"DEBUG (streamlit_app.py): Received title from youtube.py: '{video_title}'"
|
|
)
|
|
|
|
|
|
if not comments_text_list:
|
|
st.warning(
|
|
"Hmm, no comments found for this video. Are comments enabled? Or is it a very new video?"
|
|
)
|
|
|
|
summary_data = {
|
|
"num_comments_fetched": 0,
|
|
"num_comments_analyzed": 0,
|
|
"positive": 0,
|
|
"neutral": 0,
|
|
"negative": 0,
|
|
"positive_percentage": 0,
|
|
"neutral_percentage": 0,
|
|
"negative_percentage": 0,
|
|
"num_valid_predictions": 0,
|
|
}
|
|
return {"summary": summary_data, "comments_data": []}
|
|
|
|
st.info(
|
|
f"Great! Found {len(comments_text_list)} comments. Now thinking about their feelings (sentiments)..."
|
|
)
|
|
|
|
with st.spinner("Analyzing sentiments with the model... Please wait."):
|
|
|
|
|
|
prediction_results = predict_sentiments(comments_text_list)
|
|
|
|
positive_count = 0
|
|
negative_count = 0
|
|
neutral_count = 0
|
|
error_count = 0
|
|
|
|
for result in prediction_results:
|
|
label = result.get("label")
|
|
if label == "positive":
|
|
positive_count += 1
|
|
elif label == "negative":
|
|
negative_count += 1
|
|
elif label == "neutral":
|
|
neutral_count += 1
|
|
else:
|
|
error_count += 1
|
|
|
|
num_valid_predictions = positive_count + negative_count + neutral_count
|
|
total_comments_processed = len(prediction_results)
|
|
if error_count > 0:
|
|
st.warning(
|
|
f"Could not predict sentiment properly for {error_count} comments."
|
|
)
|
|
|
|
summary_data = {
|
|
"video_title": video_title,
|
|
"num_comments_fetched": len(comments_text_list),
|
|
"num_comments_analyzed": total_comments_processed,
|
|
"num_valid_predictions": num_valid_predictions,
|
|
"positive": positive_count,
|
|
"negative": negative_count,
|
|
"neutral": neutral_count,
|
|
"positive_percentage": (
|
|
(positive_count / num_valid_predictions) * 100
|
|
if num_valid_predictions > 0
|
|
else 0
|
|
),
|
|
"neutral_percentage": (
|
|
(neutral_count / num_valid_predictions) * 100
|
|
if num_valid_predictions > 0
|
|
else 0
|
|
),
|
|
"negative_percentage": (
|
|
(negative_count / num_valid_predictions) * 100
|
|
if num_valid_predictions > 0
|
|
else 0
|
|
),
|
|
}
|
|
|
|
comments_data_for_df = []
|
|
for i in range(len(comments_text_list)):
|
|
comment_text = comments_text_list[i]
|
|
result = prediction_results[i]
|
|
label = result.get("label", "Error")
|
|
scores = result.get("scores", {})
|
|
confidence = max(scores.values()) if scores else 0.0
|
|
|
|
comments_data_for_df.append(
|
|
{
|
|
"Comment Text": comment_text,
|
|
"Predicted Sentiment": label,
|
|
"Confidence": confidence,
|
|
|
|
}
|
|
)
|
|
|
|
return {"summary": summary_data, "comments_data": comments_data_for_df}
|
|
|
|
except Exception as e:
|
|
|
|
st.error(f"Uh oh! An error popped up during analysis: {str(e)}")
|
|
|
|
print(f"Full error in analyze_youtube_video: {e}")
|
|
import traceback
|
|
|
|
traceback.print_exc()
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
st.set_page_config(page_title="Social Sentiment Analysis", layout="centered")
|
|
|
|
st.title("π SOCIAL SENTIMENT ANALYSIS")
|
|
|
|
st.write(
|
|
"""
|
|
Welcome to the **Social Sentiment Analyzer!** π
|
|
|
|
This application uses a fine-tuned RoBERTa model to predict the sentiment (Positive, Neutral, or Negative) expressed in text.
|
|
|
|
Use the tabs below to choose your input method:
|
|
* **Analyze Text Input:** Paste or type any English text directly.
|
|
* **YouTube Analysis:** Enter a YouTube video URL or ID to analyze its comments.
|
|
* **Twitter/X Analysis:** Support for analyzing Twitter/X posts is coming soon!
|
|
|
|
Select a tab to begin!
|
|
"""
|
|
)
|
|
|
|
|
|
tab_text_input, tab_youtube, tab_twitter = st.tabs(
|
|
["Analyze Text Input", "YouTube Analysis", "Twitter/X Analysis (Coming Soon!)"]
|
|
)
|
|
|
|
with tab_text_input:
|
|
|
|
st.header("Analyze Sentiment of Your Text")
|
|
st.write(
|
|
"Enter a sentence or a short paragraph below to see its predicted sentiment distribution."
|
|
)
|
|
|
|
|
|
|
|
user_text = st.text_area(
|
|
"Enter text here:",
|
|
key="text_input_area_key",
|
|
height=100,
|
|
placeholder="Type or paste your text...",
|
|
)
|
|
|
|
|
|
if st.button("Analyze Text", key="text_input_analyze_btn"):
|
|
|
|
if user_text and not user_text.isspace():
|
|
|
|
with st.spinner("Analyzing your text..."):
|
|
try:
|
|
|
|
|
|
prediction_results = predict_sentiments([user_text])
|
|
|
|
|
|
if (
|
|
prediction_results
|
|
and isinstance(prediction_results, list)
|
|
and len(prediction_results) > 0
|
|
):
|
|
|
|
result = prediction_results[0]
|
|
predicted_label = result.get("label")
|
|
scores = result.get(
|
|
"scores"
|
|
)
|
|
|
|
|
|
if (
|
|
predicted_label
|
|
and scores
|
|
and isinstance(scores, dict)
|
|
and predicted_label != "Error"
|
|
):
|
|
|
|
|
|
st.subheader("Predicted Sentiment:")
|
|
|
|
if predicted_label == "positive":
|
|
st.success(
|
|
f"The model thinks the sentiment is: **{predicted_label.capitalize()}** π"
|
|
)
|
|
elif predicted_label == "negative":
|
|
st.error(
|
|
f"The model thinks the sentiment is: **{predicted_label.capitalize()}** π"
|
|
)
|
|
else:
|
|
st.info(
|
|
f"The model thinks the sentiment is: **{predicted_label.capitalize()}** π"
|
|
)
|
|
|
|
st.write("---")
|
|
st.subheader(
|
|
"Detailed Probabilities:"
|
|
)
|
|
if scores and isinstance(scores, dict):
|
|
|
|
prob_col_neg, prob_col_neu, prob_col_pos = st.columns(3)
|
|
|
|
|
|
def get_score(sentiment_name):
|
|
return scores.get(
|
|
sentiment_name.lower(), 0.0
|
|
)
|
|
|
|
value_font_size = "22px"
|
|
value_font_weight = "bold"
|
|
|
|
with prob_col_neg:
|
|
neg_prob = get_score("negative")
|
|
|
|
st.markdown("**Negative π:**")
|
|
|
|
st.markdown(
|
|
f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; color:red;'>{neg_prob:.1%}</p>",
|
|
unsafe_allow_html=True,
|
|
)
|
|
|
|
with prob_col_neu:
|
|
neu_prob = get_score("neutral")
|
|
|
|
st.markdown("**Neutral π:**")
|
|
|
|
st.markdown(
|
|
f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; color:grey;'>{neu_prob:.1%}</p>",
|
|
unsafe_allow_html=True,
|
|
)
|
|
|
|
with prob_col_pos:
|
|
pos_prob = get_score("positive")
|
|
|
|
st.markdown("**Positive π:**")
|
|
|
|
st.markdown(
|
|
f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; color:green;'>{pos_prob:.1%}</p>",
|
|
unsafe_allow_html=True,
|
|
)
|
|
|
|
else:
|
|
|
|
st.write("Could not retrieve probability scores.")
|
|
st.write("---")
|
|
|
|
|
|
st.subheader("Sentiment Probabilities:")
|
|
if PLOTLY_AVAILABLE:
|
|
|
|
|
|
|
|
score_items = list(scores.items())
|
|
if score_items:
|
|
df_scores = pd.DataFrame(
|
|
score_items,
|
|
columns=["Sentiment", "Probability"],
|
|
)
|
|
|
|
df_scores["Probability"] = pd.to_numeric(
|
|
df_scores["Probability"]
|
|
)
|
|
|
|
|
|
color_map = {
|
|
"positive": "green",
|
|
"neutral": "grey",
|
|
"negative": "red",
|
|
}
|
|
|
|
df_scores["Sentiment"] = df_scores[
|
|
"Sentiment"
|
|
].str.capitalize()
|
|
df_scores["Sentiment_Lower"] = df_scores[
|
|
"Sentiment"
|
|
].str.lower()
|
|
color_map_lower = {
|
|
k.lower(): v for k, v in color_map.items()
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
fig_pie_text = px.pie(
|
|
df_scores,
|
|
values="Probability",
|
|
names="Sentiment",
|
|
title="Probability Distribution per Class",
|
|
color="Sentiment_Lower",
|
|
color_discrete_map=color_map_lower,
|
|
)
|
|
|
|
|
|
fig_pie_text.update_traces(
|
|
textposition="inside",
|
|
textinfo="percent+label",
|
|
hovertemplate="Sentiment: %{label}<br>Probability: %{percent}",
|
|
)
|
|
|
|
fig_pie_text.update_layout(
|
|
uniformtext_minsize=16,
|
|
uniformtext_mode="hide",
|
|
)
|
|
|
|
st.plotly_chart(
|
|
fig_pie_text, use_container_width=True
|
|
)
|
|
|
|
except Exception as plot_e:
|
|
st.error(
|
|
f"Sorry, couldn't create the probability pie chart: {str(plot_e)}"
|
|
)
|
|
print(
|
|
f"Full error during text input Plotly chart generation: {plot_e}"
|
|
)
|
|
import traceback
|
|
|
|
traceback.print_exc()
|
|
st.write(
|
|
"Raw scores:", scores
|
|
)
|
|
|
|
else:
|
|
st.warning(
|
|
"Received empty scores, cannot plot chart."
|
|
)
|
|
|
|
elif not PLOTLY_AVAILABLE:
|
|
st.warning(
|
|
"Plotly not installed, cannot display pie chart. Showing raw scores instead."
|
|
)
|
|
st.json(
|
|
scores
|
|
)
|
|
else:
|
|
|
|
st.write("No valid score data available to plot.")
|
|
|
|
|
|
else:
|
|
|
|
st.error(
|
|
f"Sentiment analysis failed for the input text. Result: {result}"
|
|
)
|
|
|
|
else:
|
|
|
|
st.error(
|
|
"Received no valid result from the prediction function."
|
|
)
|
|
|
|
except Exception as analysis_e:
|
|
|
|
st.error(
|
|
f"An error occurred during text analysis: {str(analysis_e)}"
|
|
)
|
|
print(f"Full error during text input analysis: {analysis_e}")
|
|
import traceback
|
|
|
|
traceback.print_exc()
|
|
|
|
else:
|
|
|
|
st.warning("Please enter some text in the text area first!")
|
|
|
|
with tab_youtube:
|
|
st.header("YouTube Comment Sentiment Analyzer")
|
|
|
|
video_url_input = st.text_input(
|
|
"Enter YouTube Video URL or Video ID:",
|
|
key="youtube_url_input_key",
|
|
placeholder="e.g., Z9kGRMglw-I or full URL",
|
|
)
|
|
|
|
|
|
if st.button("Analyze YouTube Comments", key="youtube_analyze_button_key"):
|
|
if video_url_input:
|
|
|
|
analysis_results = analyze_youtube_video(video_url_input)
|
|
|
|
if (
|
|
analysis_results and analysis_results["summary"]
|
|
):
|
|
summary = analysis_results["summary"]
|
|
comments_data = analysis_results["comments_data"]
|
|
video_title_display = summary.get(
|
|
"video_title", "Video Title Not Available"
|
|
)
|
|
|
|
st.markdown("---")
|
|
|
|
st.markdown(f"### Analyzing Video: **{video_title_display}**")
|
|
st.markdown("---")
|
|
|
|
st.subheader("π Sentiment Summary")
|
|
|
|
|
|
|
|
|
|
|
|
label_font_size = "24px"
|
|
value_font_size = "28px"
|
|
value_font_weight = "bold"
|
|
|
|
|
|
positive_color = "green"
|
|
neutral_color = "grey"
|
|
negative_color = "red"
|
|
|
|
|
|
col_fetched, col_analyzed, col_pos, col_neu, col_neg = st.columns(5)
|
|
|
|
|
|
with col_fetched:
|
|
|
|
st.markdown(
|
|
f"<p style='font-size: {label_font_size}; margin-bottom: 0px;'>Comments Fetched</p>",
|
|
unsafe_allow_html=True,
|
|
)
|
|
|
|
st.markdown(
|
|
f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; margin-top: 0px;'>{summary.get('num_comments_fetched', 0)}</p>",
|
|
unsafe_allow_html=True,
|
|
)
|
|
|
|
|
|
with col_analyzed:
|
|
|
|
st.markdown(
|
|
f"<p style='font-size: {label_font_size}; margin-bottom: 0px;'>Comments Analyzed</p>",
|
|
unsafe_allow_html=True,
|
|
)
|
|
|
|
st.markdown(
|
|
f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; margin-top: 0px;'>{summary.get('num_comments_analyzed', 0)}</p>",
|
|
unsafe_allow_html=True,
|
|
)
|
|
|
|
|
|
with col_pos:
|
|
|
|
st.markdown(
|
|
f"<p style='font-size: {label_font_size}; margin-bottom: 0px;'>Positive π</p>",
|
|
unsafe_allow_html=True,
|
|
)
|
|
|
|
st.markdown(
|
|
f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; color:{positive_color}; margin-top: 0px;'>{summary.get('positive', 0)}</p>",
|
|
unsafe_allow_html=True,
|
|
)
|
|
|
|
|
|
with col_neu:
|
|
|
|
st.markdown(
|
|
f"<p style='font-size: {label_font_size}; margin-bottom: 0px;'>Neutral π</p>",
|
|
unsafe_allow_html=True,
|
|
)
|
|
|
|
st.markdown(
|
|
f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; color:{neutral_color}; margin-top: 0px;'>{summary.get('neutral', 0)}</p>",
|
|
unsafe_allow_html=True,
|
|
)
|
|
|
|
|
|
with col_neg:
|
|
|
|
st.markdown(
|
|
f"<p style='font-size: {label_font_size}; margin-bottom: 0px;'>Negative π</p>",
|
|
unsafe_allow_html=True,
|
|
)
|
|
|
|
st.markdown(
|
|
f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; color:{negative_color}; margin-top: 0px;'>{summary.get('negative', 0)}</p>",
|
|
unsafe_allow_html=True,
|
|
)
|
|
|
|
|
|
st.markdown("---")
|
|
|
|
|
|
if summary.get("num_valid_predictions", 0) > 0:
|
|
|
|
sentiment_data_for_plot = [
|
|
{"Sentiment": "Positive", "Count": summary.get("positive", 0)},
|
|
{"Sentiment": "Neutral", "Count": summary.get("neutral", 0)},
|
|
{"Sentiment": "Negative", "Count": summary.get("negative", 0)},
|
|
]
|
|
sentiment_counts_df = pd.DataFrame(sentiment_data_for_plot)
|
|
|
|
sentiment_counts_df_for_plot = sentiment_counts_df[
|
|
sentiment_counts_df["Count"] > 0
|
|
].copy()
|
|
|
|
|
|
|
|
color_map = {
|
|
"Positive": "green",
|
|
"Neutral": "grey",
|
|
"Negative": "red",
|
|
}
|
|
|
|
if not sentiment_counts_df_for_plot.empty:
|
|
st.subheader("π Sentiment Distribution Charts")
|
|
|
|
if PLOTLY_AVAILABLE:
|
|
try:
|
|
|
|
|
|
fig_pie = px.pie(
|
|
sentiment_counts_df_for_plot,
|
|
values="Count",
|
|
names="Sentiment",
|
|
title="Pie Chart: Comment Sentiments",
|
|
color="Sentiment",
|
|
color_discrete_map=color_map,
|
|
)
|
|
|
|
fig_pie.update_traces(
|
|
textposition="inside",
|
|
textinfo="percent+label",
|
|
hovertemplate="Sentiment: %{label}<br>Count: %{value}<br>Percentage: %{percent}",
|
|
)
|
|
|
|
fig_pie.update_layout(
|
|
uniformtext_minsize=16, uniformtext_mode="hide"
|
|
)
|
|
|
|
st.plotly_chart(fig_pie, use_container_width=True)
|
|
|
|
|
|
fig_bar = px.bar(
|
|
sentiment_counts_df_for_plot,
|
|
x="Sentiment",
|
|
y="Count",
|
|
title="Bar Chart: Comment Sentiments",
|
|
color="Sentiment",
|
|
color_discrete_map=color_map,
|
|
labels={
|
|
"Count": "Number of Comments",
|
|
"Sentiment": "Sentiment Category",
|
|
},
|
|
)
|
|
st.plotly_chart(fig_bar, use_container_width=True)
|
|
|
|
except Exception as plot_e:
|
|
|
|
st.error(
|
|
f"Sorry, couldn't create Plotly charts: {plot_e}"
|
|
)
|
|
st.write(
|
|
"Displaying basic bar chart instead (default colors):"
|
|
)
|
|
st.bar_chart(
|
|
sentiment_counts_df.set_index("Sentiment")
|
|
)
|
|
else:
|
|
|
|
st.write(
|
|
"Displaying basic bar chart (Plotly not installed):"
|
|
)
|
|
st.bar_chart(
|
|
sentiment_counts_df.set_index("Sentiment")
|
|
)
|
|
else:
|
|
|
|
st.write(
|
|
"No sentiment data (Positive, Neutral, Negative all zero) to display in charts."
|
|
)
|
|
else:
|
|
|
|
st.write(
|
|
"Not enough valid sentiment data to display distribution charts."
|
|
)
|
|
|
|
|
|
if comments_data:
|
|
st.subheader(
|
|
f"π Analyzed Comments (showing first {len(comments_data)} results)"
|
|
)
|
|
comments_display_df = pd.DataFrame(comments_data)
|
|
|
|
if "Confidence" in comments_display_df.columns:
|
|
try:
|
|
|
|
comments_display_df["Confidence"] = comments_display_df[
|
|
"Confidence"
|
|
].map("{:.1%}".format)
|
|
except (TypeError, ValueError):
|
|
st.warning(
|
|
"Could not format confidence scores."
|
|
)
|
|
|
|
st.dataframe(
|
|
comments_display_df, use_container_width=True, height=400
|
|
)
|
|
else:
|
|
st.write("No comments were analyzed to display.")
|
|
|
|
|
|
else:
|
|
|
|
st.warning("Please enter a YouTube URL or Video ID first!")
|
|
|
|
with tab_twitter:
|
|
st.header("Twitter/X Post Analysis")
|
|
st.info("This feature is currently under construction. Please check back later!")
|
|
|
|
|
|
|
|
|
|
|