AlainDeLong commited on
Commit
c4ad43e
·
1 Parent(s): f531887

update app

Browse files
Files changed (4) hide show
  1. Dockerfile +0 -21
  2. app.py +709 -0
  3. src/__init__.py +0 -0
  4. src/youtube.py +3 -10
Dockerfile DELETED
@@ -1,21 +0,0 @@
1
- FROM python:3.9-slim
2
-
3
- WORKDIR /app
4
-
5
- RUN apt-get update && apt-get install -y \
6
- build-essential \
7
- curl \
8
- software-properties-common \
9
- git \
10
- && rm -rf /var/lib/apt/lists/*
11
-
12
- COPY requirements.txt ./
13
- COPY src/ ./src/
14
-
15
- RUN pip3 install -r requirements.txt
16
-
17
- EXPOSE 8501
18
-
19
- HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
20
-
21
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,709 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # src/streamlit_app.py
2
+
3
+ import streamlit as st
4
+ import pandas as pd
5
+ import re # For robust YouTube video ID extraction
6
+
7
+ # Try to import Plotly, if not available, we'll use Streamlit's basic charts
8
+ try:
9
+ import plotly.express as px
10
+
11
+ PLOTLY_AVAILABLE = True
12
+ except ImportError:
13
+ PLOTLY_AVAILABLE = False
14
+ st.sidebar.warning(
15
+ "Plotly not installed. Charts will be basic. Consider 'pip install plotly'."
16
+ ) # Optional warning
17
+
18
+ # Import our custom modules from the src directory
19
+ try:
20
+ from src.predict import (
21
+ predict_sentiments,
22
+ ) # This function should return list of strings: "positive", "negative", "neutral"
23
+ from src.youtube import (
24
+ get_video_comments,
25
+ ) # This function should return a list of comment strings
26
+ except ImportError as e:
27
+ st.error(
28
+ f"Failed to import necessary modules (predict.py, youtube.py). Ensure they are in the 'src' directory. Error: {e}"
29
+ )
30
+ # Stop the app if core modules are missing
31
+ st.stop()
32
+
33
+
34
+ def extract_video_id(url_or_id: str):
35
+ """
36
+ Tries to get the YouTube video ID from different common URL types.
37
+ Also handles if the input is just the ID itself.
38
+ A bit of regex to find the ID part in common URLs.
39
+ """
40
+ if not url_or_id:
41
+ return None
42
+
43
+ # Patterns for various YouTube URL formats
44
+ # Order matters: more specific patterns should come first if overlap exists
45
+ patterns = [
46
+ r"watch\?v=([a-zA-Z0-9_-]{11})", # Standard watch URL
47
+ r"youtu\.be/([a-zA-Z0-9_-]{11})", # Shortened URL
48
+ r"embed/([a-zA-Z0-9_-]{11})", # Embed URL
49
+ r"shorts/([a-zA-Z0-9_-]{11})", # Shorts URL
50
+ ]
51
+
52
+ for pattern in patterns:
53
+ match = re.search(pattern, url_or_id)
54
+ if match:
55
+ return match.group(1) # The first capturing group is the ID
56
+
57
+ # If no pattern matches, check if the input itself is a valid 11-char ID
58
+ # Basic check: 11 chars, no spaces, not starting with http (already handled by regex above implicitly)
59
+ if len(url_or_id) == 11 and not (
60
+ "/" in url_or_id or "?" in url_or_id or "=" in url_or_id or "." in url_or_id
61
+ ):
62
+ return url_or_id # Assume it's a direct ID
63
+
64
+ return None # Return None if no ID found
65
+
66
+
67
+ def analyze_youtube_video(video_url_or_id: str):
68
+ """
69
+ Main function for the YouTube analysis part.
70
+ It gets comments, then predicts their sentiments.
71
+ Then it summarizes the results.
72
+ """
73
+ video_id = extract_video_id(video_url_or_id)
74
+ if not video_id:
75
+ # Give a more helpful error message to the user
76
+ st.error(
77
+ "Oops! That doesn't look like a valid YouTube URL or Video ID. Please check and try again. Example: Z9kGRMglw-I or youtu.be/3?v=Z9kGRMglw-I"
78
+ )
79
+ return None # Stop if no valid ID
80
+
81
+ summary_data = {} # Initialize
82
+ # comments_with_sentiments = [] # Initialize
83
+
84
+ try:
85
+ with st.spinner(f"Fetching comments & title for video ID: {video_id}..."):
86
+ video_data = get_video_comments(video_id)
87
+ comments_text_list = video_data.get("comments", [])
88
+ video_title = video_data.get("title", "Video Title Not Found")
89
+ print(
90
+ f"DEBUG (streamlit_app.py): Received title from youtube.py: '{video_title}'"
91
+ )
92
+
93
+ # Check if we actually got any comments
94
+ if not comments_text_list:
95
+ st.warning(
96
+ "Hmm, no comments found for this video. Are comments enabled? Or is it a very new video?"
97
+ )
98
+ # Provide a default empty summary structure
99
+ summary_data = {
100
+ "num_comments_fetched": 0,
101
+ "num_comments_analyzed": 0,
102
+ "positive": 0,
103
+ "neutral": 0,
104
+ "negative": 0,
105
+ "positive_percentage": 0,
106
+ "neutral_percentage": 0,
107
+ "negative_percentage": 0,
108
+ "num_valid_predictions": 0,
109
+ }
110
+ return {"summary": summary_data, "comments_data": []}
111
+
112
+ st.info(
113
+ f"Great! Found {len(comments_text_list)} comments. Now thinking about their feelings (sentiments)..."
114
+ )
115
+ # Another spinner for the prediction part, as this can be slow on CPU
116
+ with st.spinner("Analyzing sentiments with the model... Please wait."):
117
+ # This calls predict_sentiments from predict.py
118
+ # Expected to return: ["positive", "negative", "neutral", ...]
119
+ prediction_results = predict_sentiments(comments_text_list)
120
+
121
+ positive_count = 0
122
+ negative_count = 0
123
+ neutral_count = 0
124
+ error_count = 0
125
+
126
+ for result in prediction_results:
127
+ label = result.get("label")
128
+ if label == "positive":
129
+ positive_count += 1
130
+ elif label == "negative":
131
+ negative_count += 1
132
+ elif label == "neutral":
133
+ neutral_count += 1
134
+ else:
135
+ error_count += 1
136
+
137
+ num_valid_predictions = positive_count + negative_count + neutral_count
138
+ total_comments_processed = len(prediction_results)
139
+ if error_count > 0:
140
+ st.warning(
141
+ f"Could not predict sentiment properly for {error_count} comments."
142
+ )
143
+
144
+ summary_data = {
145
+ "video_title": video_title,
146
+ "num_comments_fetched": len(comments_text_list),
147
+ "num_comments_analyzed": total_comments_processed,
148
+ "num_valid_predictions": num_valid_predictions,
149
+ "positive": positive_count,
150
+ "negative": negative_count,
151
+ "neutral": neutral_count,
152
+ "positive_percentage": (
153
+ (positive_count / num_valid_predictions) * 100
154
+ if num_valid_predictions > 0
155
+ else 0
156
+ ),
157
+ "neutral_percentage": (
158
+ (neutral_count / num_valid_predictions) * 100
159
+ if num_valid_predictions > 0
160
+ else 0
161
+ ),
162
+ "negative_percentage": (
163
+ (negative_count / num_valid_predictions) * 100
164
+ if num_valid_predictions > 0
165
+ else 0
166
+ ),
167
+ }
168
+
169
+ comments_data_for_df = []
170
+ for i in range(len(comments_text_list)):
171
+ comment_text = comments_text_list[i]
172
+ result = prediction_results[i]
173
+ label = result.get("label", "Error")
174
+ scores = result.get("scores", {})
175
+ confidence = max(scores.values()) if scores else 0.0
176
+
177
+ comments_data_for_df.append(
178
+ {
179
+ "Comment Text": comment_text,
180
+ "Predicted Sentiment": label,
181
+ "Confidence": confidence,
182
+ # "All Scores": scores
183
+ }
184
+ )
185
+
186
+ return {"summary": summary_data, "comments_data": comments_data_for_df}
187
+
188
+ except Exception as e:
189
+ # Show a general error if anything unexpected happens
190
+ st.error(f"Uh oh! An error popped up during analysis: {str(e)}")
191
+ # Also print to console for more detailed debugging when running locally
192
+ print(f"Full error in analyze_youtube_video: {e}")
193
+ import traceback
194
+
195
+ traceback.print_exc() # Print full traceback to console
196
+ return None # Return None on error
197
+
198
+
199
+ # --- Streamlit App UI ---
200
+
201
+ # Page configuration: Set to centered layout (default) instead of "wide"
202
+ st.set_page_config(page_title="Social Sentiment Analysis", layout="centered")
203
+
204
+ st.title("📊 SOCIAL SENTIMENT ANALYSIS")
205
+ # A little description for the user
206
+ st.write(
207
+ """
208
+ Welcome to the **Social Sentiment Analyzer!** 👋
209
+
210
+ This application uses a fine-tuned RoBERTa model to predict the sentiment (Positive, Neutral, or Negative) expressed in text.
211
+
212
+ Use the tabs below to choose your input method:
213
+ * **Analyze Text Input:** Paste or type any English text directly.
214
+ * **YouTube Analysis:** Enter a YouTube video URL or ID to analyze its comments.
215
+ * **Twitter/X Analysis:** Support for analyzing Twitter/X posts is coming soon!
216
+
217
+ Select a tab to begin!
218
+ """
219
+ )
220
+
221
+ # Tabs for different platforms, makes it easy to add Twitter later
222
+ tab_text_input, tab_youtube, tab_twitter = st.tabs(
223
+ ["Analyze Text Input", "YouTube Analysis", "Twitter/X Analysis (Coming Soon!)"]
224
+ )
225
+
226
+ with tab_text_input:
227
+ # Header for this tab
228
+ st.header("Analyze Sentiment of Your Text")
229
+ st.write(
230
+ "Enter a sentence or a short paragraph below to see its predicted sentiment distribution."
231
+ )
232
+
233
+ # Use text_area for potentially longer input
234
+ # Giving it a unique key helps maintain state if needed
235
+ user_text = st.text_area(
236
+ "Enter text here:",
237
+ key="text_input_area_key",
238
+ height=100,
239
+ placeholder="Type or paste your text...",
240
+ )
241
+
242
+ # Button to trigger the analysis
243
+ if st.button("Analyze Text", key="text_input_analyze_btn"):
244
+ # Check if the user actually entered something (not just whitespace)
245
+ if user_text and not user_text.isspace():
246
+ # Show a spinner while processing
247
+ with st.spinner("Analyzing your text..."):
248
+ try:
249
+ # Call the prediction function from predict.py
250
+ # Pass the input text as a list with one element
251
+ prediction_results = predict_sentiments([user_text])
252
+
253
+ # Check if prediction was successful and returned expected format
254
+ if (
255
+ prediction_results
256
+ and isinstance(prediction_results, list)
257
+ and len(prediction_results) > 0
258
+ ):
259
+ # Get the result dictionary for the single input text
260
+ result = prediction_results[0]
261
+ predicted_label = result.get("label")
262
+ scores = result.get(
263
+ "scores"
264
+ ) # This should be a dict like {'negative': 0.1, ...}
265
+
266
+ # Make sure we got a valid label and scores dictionary
267
+ if (
268
+ predicted_label
269
+ and scores
270
+ and isinstance(scores, dict)
271
+ and predicted_label != "Error"
272
+ ):
273
+
274
+ # Display the top predicted sentiment
275
+ st.subheader("Predicted Sentiment:")
276
+ # Using Streamlit's built-in status elements for color
277
+ if predicted_label == "positive":
278
+ st.success(
279
+ f"The model thinks the sentiment is: **{predicted_label.capitalize()}** 👍"
280
+ )
281
+ elif predicted_label == "negative":
282
+ st.error(
283
+ f"The model thinks the sentiment is: **{predicted_label.capitalize()}** 👎"
284
+ )
285
+ else: # Neutral or potentially "Unknown" if mapping failed
286
+ st.info(
287
+ f"The model thinks the sentiment is: **{predicted_label.capitalize()}** 😐"
288
+ )
289
+
290
+ st.write("---") # Adding a small separator
291
+ st.subheader(
292
+ "Detailed Probabilities:"
293
+ ) # Subheader for this section
294
+ if scores and isinstance(scores, dict):
295
+ # Using columns here helps align the probabilities nicely
296
+ prob_col_neg, prob_col_neu, prob_col_pos = st.columns(3)
297
+
298
+ # Helper to get score safely
299
+ def get_score(sentiment_name):
300
+ return scores.get(
301
+ sentiment_name.lower(), 0.0
302
+ ) # Use lowercase to be safe
303
+
304
+ value_font_size = "22px"
305
+ value_font_weight = "bold"
306
+
307
+ with prob_col_neg:
308
+ neg_prob = get_score("negative")
309
+ # Display label "Negative"
310
+ st.markdown("**Negative 👎:**")
311
+ # Display the probability, larger font, red color
312
+ st.markdown(
313
+ f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; color:red;'>{neg_prob:.1%}</p>",
314
+ unsafe_allow_html=True,
315
+ )
316
+
317
+ with prob_col_neu:
318
+ neu_prob = get_score("neutral")
319
+ # Display label "Neutral"
320
+ st.markdown("**Neutral 😐:**")
321
+ # Display the probability, larger font, grey color
322
+ st.markdown(
323
+ f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; color:grey;'>{neu_prob:.1%}</p>",
324
+ unsafe_allow_html=True,
325
+ )
326
+
327
+ with prob_col_pos:
328
+ pos_prob = get_score("positive")
329
+ # Display label "Positive"
330
+ st.markdown("**Positive 👍:**")
331
+ # Display the probability, larger font, green color
332
+ st.markdown(
333
+ f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; color:green;'>{pos_prob:.1%}</p>",
334
+ unsafe_allow_html=True,
335
+ )
336
+
337
+ else:
338
+ # If scores dict is missing or invalid
339
+ st.write("Could not retrieve probability scores.")
340
+ st.write("---") # Another separator before the chart
341
+
342
+ # --- Display Pie Chart of Probabilities ---
343
+ st.subheader("Sentiment Probabilities:")
344
+ if PLOTLY_AVAILABLE:
345
+ # Convert the scores dictionary to a DataFrame suitable for Plotly
346
+ # Ensure keys match class_names for consistency if possible
347
+ # Assuming scores keys are 'negative', 'neutral', 'positive'
348
+ score_items = list(scores.items())
349
+ if score_items: # Check if scores dict is not empty
350
+ df_scores = pd.DataFrame(
351
+ score_items,
352
+ columns=["Sentiment", "Probability"],
353
+ )
354
+ # Convert Probability to numeric just in case
355
+ df_scores["Probability"] = pd.to_numeric(
356
+ df_scores["Probability"]
357
+ )
358
+
359
+ # Define colors (ensure keys match Sentiment names case)
360
+ color_map = {
361
+ "positive": "green",
362
+ "neutral": "grey",
363
+ "negative": "red",
364
+ }
365
+ # Make keys lowercase for robust mapping
366
+ df_scores["Sentiment"] = df_scores[
367
+ "Sentiment"
368
+ ].str.capitalize()
369
+ df_scores["Sentiment_Lower"] = df_scores[
370
+ "Sentiment"
371
+ ].str.lower()
372
+ color_map_lower = {
373
+ k.lower(): v for k, v in color_map.items()
374
+ }
375
+
376
+ # Debug print for the dataframe fed to plotly
377
+ # st.write("DEBUG: DataFrame for text input pie chart:")
378
+ # st.dataframe(df_scores)
379
+
380
+ try:
381
+ # Create the pie chart
382
+ fig_pie_text = px.pie(
383
+ df_scores,
384
+ values="Probability", # Use the probability column
385
+ names="Sentiment", # Labels for the slices
386
+ title="Probability Distribution per Class",
387
+ color="Sentiment_Lower", # Use lowercase for mapping
388
+ color_discrete_map=color_map_lower,
389
+ ) # Map colors
390
+
391
+ # Update how text is shown on slices
392
+ fig_pie_text.update_traces(
393
+ textposition="inside",
394
+ textinfo="percent+label",
395
+ hovertemplate="Sentiment: %{label}<br>Probability: %{percent}",
396
+ )
397
+ # Maybe add hover info too
398
+ fig_pie_text.update_layout(
399
+ uniformtext_minsize=16,
400
+ uniformtext_mode="hide",
401
+ ) # Improve text fitting
402
+
403
+ st.plotly_chart(
404
+ fig_pie_text, use_container_width=True
405
+ )
406
+
407
+ except Exception as plot_e:
408
+ st.error(
409
+ f"Sorry, couldn't create the probability pie chart: {str(plot_e)}"
410
+ )
411
+ print(
412
+ f"Full error during text input Plotly chart generation: {plot_e}"
413
+ )
414
+ import traceback
415
+
416
+ traceback.print_exc()
417
+ st.write(
418
+ "Raw scores:", scores
419
+ ) # Show raw scores as fallback
420
+
421
+ else: # If scores dictionary was empty
422
+ st.warning(
423
+ "Received empty scores, cannot plot chart."
424
+ )
425
+
426
+ elif not PLOTLY_AVAILABLE:
427
+ st.warning(
428
+ "Plotly not installed, cannot display pie chart. Showing raw scores instead."
429
+ )
430
+ st.json(
431
+ scores
432
+ ) # Display raw scores as JSON if no Plotly
433
+ else:
434
+ # This case should be covered by the check above, but for safety
435
+ st.write("No valid score data available to plot.")
436
+ # --- End Pie Chart ---
437
+
438
+ else:
439
+ # This handles cases where predict_sentiments returned an error label
440
+ st.error(
441
+ f"Sentiment analysis failed for the input text. Result: {result}"
442
+ )
443
+
444
+ else:
445
+ # This handles cases where predict_sentiments returned None or empty list
446
+ st.error(
447
+ "Received no valid result from the prediction function."
448
+ )
449
+
450
+ except Exception as analysis_e:
451
+ # Catch-all for other errors during analysis for this tab
452
+ st.error(
453
+ f"An error occurred during text analysis: {str(analysis_e)}"
454
+ )
455
+ print(f"Full error during text input analysis: {analysis_e}")
456
+ import traceback
457
+
458
+ traceback.print_exc()
459
+
460
+ else:
461
+ # If user clicks button without entering text
462
+ st.warning("Please enter some text in the text area first!")
463
+
464
+ with tab_youtube:
465
+ st.header("YouTube Comment Sentiment Analyzer")
466
+ # Input field for URL or ID
467
+ video_url_input = st.text_input(
468
+ "Enter YouTube Video URL or Video ID:",
469
+ key="youtube_url_input_key", # Giving it a unique key
470
+ placeholder="e.g., Z9kGRMglw-I or full URL",
471
+ )
472
+
473
+ # Button to trigger analysis
474
+ if st.button("Analyze YouTube Comments", key="youtube_analyze_button_key"):
475
+ if video_url_input: # Check if user actually entered something
476
+ # analyze_youtube_video handles spinners internally now
477
+ analysis_results = analyze_youtube_video(video_url_input)
478
+
479
+ if (
480
+ analysis_results and analysis_results["summary"]
481
+ ): # Check if we got valid results
482
+ summary = analysis_results["summary"]
483
+ comments_data = analysis_results["comments_data"]
484
+ video_title_display = summary.get(
485
+ "video_title", "Video Title Not Available"
486
+ )
487
+
488
+ st.markdown("---")
489
+ # Displaying the video title using markdown for potential formatting later
490
+ st.markdown(f"### Analyzing Video: **{video_title_display}**")
491
+ st.markdown("---")
492
+
493
+ st.subheader("📊 Sentiment Summary")
494
+
495
+ # Define desired font sizes (you can adjust these)
496
+ # label_font_size = (
497
+ # "24px" # Font size for the label text like "Comments Fetched"
498
+ # )
499
+ label_font_size = "24px"
500
+ value_font_size = "28px" # Font size for the actual count like "137"
501
+ value_font_weight = "bold" # Make the count bold
502
+
503
+ # Define colors for the sentiment counts
504
+ positive_color = "green"
505
+ neutral_color = "grey"
506
+ negative_color = "red"
507
+
508
+ # Using 5 columns
509
+ col_fetched, col_analyzed, col_pos, col_neu, col_neg = st.columns(5)
510
+
511
+ # Metric 1: Comments Fetched
512
+ with col_fetched:
513
+ # Label for fetched comments
514
+ st.markdown(
515
+ f"<p style='font-size: {label_font_size}; margin-bottom: 0px;'>Comments Fetched</p>",
516
+ unsafe_allow_html=True,
517
+ )
518
+ # The number of fetched comments
519
+ st.markdown(
520
+ f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; margin-top: 0px;'>{summary.get('num_comments_fetched', 0)}</p>",
521
+ unsafe_allow_html=True,
522
+ )
523
+
524
+ # Metric 2: Comments Analyzed
525
+ with col_analyzed:
526
+ # Label for analyzed comments
527
+ st.markdown(
528
+ f"<p style='font-size: {label_font_size}; margin-bottom: 0px;'>Comments Analyzed</p>",
529
+ unsafe_allow_html=True,
530
+ )
531
+ # The number of analyzed comments
532
+ st.markdown(
533
+ f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; margin-top: 0px;'>{summary.get('num_comments_analyzed', 0)}</p>",
534
+ unsafe_allow_html=True,
535
+ )
536
+
537
+ # Metric 3: Positive
538
+ with col_pos:
539
+ # Label for positive comments, with emoji
540
+ st.markdown(
541
+ f"<p style='font-size: {label_font_size}; margin-bottom: 0px;'>Positive 👍</p>",
542
+ unsafe_allow_html=True,
543
+ )
544
+ # The count of positive comments, green and bold
545
+ st.markdown(
546
+ f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; color:{positive_color}; margin-top: 0px;'>{summary.get('positive', 0)}</p>",
547
+ unsafe_allow_html=True,
548
+ )
549
+
550
+ # Metric 4: Neutral
551
+ with col_neu:
552
+ # Label for neutral comments
553
+ st.markdown(
554
+ f"<p style='font-size: {label_font_size}; margin-bottom: 0px;'>Neutral 😐</p>",
555
+ unsafe_allow_html=True,
556
+ )
557
+ # The count of neutral comments, grey and bold
558
+ st.markdown(
559
+ f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; color:{neutral_color}; margin-top: 0px;'>{summary.get('neutral', 0)}</p>",
560
+ unsafe_allow_html=True,
561
+ )
562
+
563
+ # Metric 5: Negative
564
+ with col_neg:
565
+ # Label for negative comments
566
+ st.markdown(
567
+ f"<p style='font-size: {label_font_size}; margin-bottom: 0px;'>Negative 👎</p>",
568
+ unsafe_allow_html=True,
569
+ )
570
+ # The count of negative comments, red and bold
571
+ st.markdown(
572
+ f"<p style='font-size: {value_font_size}; font-weight: {value_font_weight}; color:{negative_color}; margin-top: 0px;'>{summary.get('negative', 0)}</p>",
573
+ unsafe_allow_html=True,
574
+ )
575
+
576
+ # Add a visual separator before charts
577
+ st.markdown("---")
578
+
579
+ # Data for charts - make sure it has counts > 0
580
+ if summary.get("num_valid_predictions", 0) > 0:
581
+ # Prepare DataFrame for Plotly charts
582
+ sentiment_data_for_plot = [
583
+ {"Sentiment": "Positive", "Count": summary.get("positive", 0)},
584
+ {"Sentiment": "Neutral", "Count": summary.get("neutral", 0)},
585
+ {"Sentiment": "Negative", "Count": summary.get("negative", 0)},
586
+ ]
587
+ sentiment_counts_df = pd.DataFrame(sentiment_data_for_plot)
588
+ # Filter out rows where Count is 0 for cleaner charts
589
+ sentiment_counts_df_for_plot = sentiment_counts_df[
590
+ sentiment_counts_df["Count"] > 0
591
+ ].copy()
592
+
593
+ # Define the color map for charts
594
+ # Keys should match the 'Sentiment' column values
595
+ color_map = {
596
+ "Positive": "green",
597
+ "Neutral": "grey",
598
+ "Negative": "red",
599
+ }
600
+
601
+ if not sentiment_counts_df_for_plot.empty:
602
+ st.subheader("📈 Sentiment Distribution Charts")
603
+ # Try to use Plotly for richer charts
604
+ if PLOTLY_AVAILABLE:
605
+ try:
606
+ # Pie Chart (Corrected data input for Plotly)
607
+ # Plotly pie chart expects a DataFrame where one column is values, another is names
608
+ fig_pie = px.pie(
609
+ sentiment_counts_df_for_plot, # Use the filtered DataFrame
610
+ values="Count", # Column for pie slice values
611
+ names="Sentiment", # Column for pie slice names
612
+ title="Pie Chart: Comment Sentiments",
613
+ color="Sentiment", # Color slices based on the 'Sentiment' category
614
+ color_discrete_map=color_map,
615
+ ) # Apply custom colors
616
+
617
+ fig_pie.update_traces(
618
+ textposition="inside",
619
+ textinfo="percent+label",
620
+ hovertemplate="Sentiment: %{label}<br>Count: %{value}<br>Percentage: %{percent}",
621
+ )
622
+
623
+ fig_pie.update_layout(
624
+ uniformtext_minsize=16, uniformtext_mode="hide"
625
+ )
626
+
627
+ st.plotly_chart(fig_pie, use_container_width=True)
628
+
629
+ # Bar Chart (Using Plotly for consistent coloring)
630
+ fig_bar = px.bar(
631
+ sentiment_counts_df_for_plot, # Use the filtered DataFrame
632
+ x="Sentiment", # Categories on X-axis
633
+ y="Count", # Values on Y-axis
634
+ title="Bar Chart: Comment Sentiments",
635
+ color="Sentiment", # Color bars based on 'Sentiment'
636
+ color_discrete_map=color_map, # Apply custom colors
637
+ labels={
638
+ "Count": "Number of Comments",
639
+ "Sentiment": "Sentiment Category",
640
+ },
641
+ ) # Custom labels
642
+ st.plotly_chart(fig_bar, use_container_width=True)
643
+
644
+ except Exception as plot_e:
645
+ # Fallback if Plotly fails for some reason other than import
646
+ st.error(
647
+ f"Sorry, couldn't create Plotly charts: {plot_e}"
648
+ )
649
+ st.write(
650
+ "Displaying basic bar chart instead (default colors):"
651
+ )
652
+ st.bar_chart(
653
+ sentiment_counts_df.set_index("Sentiment")
654
+ ) # Fallback with original (unfiltered for bar)
655
+ else:
656
+ # Fallback to Streamlit's basic bar chart if Plotly is not installed
657
+ st.write(
658
+ "Displaying basic bar chart (Plotly not installed):"
659
+ )
660
+ st.bar_chart(
661
+ sentiment_counts_df.set_index("Sentiment")
662
+ ) # Basic bar chart
663
+ else:
664
+ # This message shows if all sentiment counts are zero
665
+ st.write(
666
+ "No sentiment data (Positive, Neutral, Negative all zero) to display in charts."
667
+ )
668
+ else:
669
+ # This message shows if no comments were analyzed successfully
670
+ st.write(
671
+ "Not enough valid sentiment data to display distribution charts."
672
+ )
673
+
674
+ # Display comments and their sentiments
675
+ if comments_data:
676
+ st.subheader(
677
+ f"🔍 Analyzed Comments (showing first {len(comments_data)} results)"
678
+ )
679
+ comments_display_df = pd.DataFrame(comments_data)
680
+
681
+ if "Confidence" in comments_display_df.columns:
682
+ try:
683
+ # Format as percentage with 1 decimal place
684
+ comments_display_df["Confidence"] = comments_display_df[
685
+ "Confidence"
686
+ ].map("{:.1%}".format)
687
+ except (TypeError, ValueError):
688
+ st.warning(
689
+ "Could not format confidence scores."
690
+ ) # Handle potential errors if confidence is not numeric
691
+
692
+ st.dataframe(
693
+ comments_display_df, use_container_width=True, height=400
694
+ )
695
+ else:
696
+ st.write("No comments were analyzed to display.")
697
+ # else: # analyze_youtube_video already handles its own errors by showing st.error
698
+ # st.info("Could not complete analysis. Please check the URL or try again.")
699
+ else:
700
+ # If user clicks button without entering URL
701
+ st.warning("Please enter a YouTube URL or Video ID first!")
702
+
703
+ with tab_twitter:
704
+ st.header("Twitter/X Post Analysis")
705
+ st.info("This feature is currently under construction. Please check back later!")
706
+ # Placeholder for future Twitter input
707
+ # twitter_url_input = st.text_input("Enter Twitter/X Post URL:", key="twitter_url_input_key")
708
+ # if st.button("Analyze Tweets", key="twitter_analyze_button_key"):
709
+ # st.write("Imagine amazing Twitter analysis happening here... Tweet tweet!")
src/__init__.py ADDED
File without changes
src/youtube.py CHANGED
@@ -1,21 +1,14 @@
1
  import os
2
  import googleapiclient.discovery
3
  import googleapiclient.errors
 
4
 
5
  # from dotenv import load_dotenv
6
  import streamlit as st
7
 
8
- # load_dotenv()
9
- # api_key = os.getenv("API_KEY")
10
  # api_key = st.secrets["API_KEY"]
11
- try:
12
- api_key = os.environ["API_KEY"]
13
- # Sử dụng api_key ở đây
14
- except KeyError:
15
- st.error(
16
- "Lỗi: Secret 'API_KEY' chưa được cấu hình trong Hugging Face Space Settings."
17
- )
18
- st.stop() # Hoặc xử lý lỗi theo cách khác
19
 
20
 
21
  def get_comments(youtube, **kwargs):
 
1
  import os
2
  import googleapiclient.discovery
3
  import googleapiclient.errors
4
+ from dotenv import load_dotenv
5
 
6
  # from dotenv import load_dotenv
7
  import streamlit as st
8
 
9
+ load_dotenv()
10
+ api_key = os.getenv("API_KEY")
11
  # api_key = st.secrets["API_KEY"]
 
 
 
 
 
 
 
 
12
 
13
 
14
  def get_comments(youtube, **kwargs):