Ahmedik95316 commited on
Commit
04e5963
Β·
1 Parent(s): 34841ba

Update app/streamlit_app.py

Browse files
Files changed (1) hide show
  1. app/streamlit_app.py +560 -282
app/streamlit_app.py CHANGED
@@ -15,7 +15,7 @@ import plotly.express as px
15
  import plotly.graph_objects as go
16
  from datetime import datetime, timedelta
17
  from typing import Dict, List, Optional, Any
18
-
19
 
20
  # Configure logging
21
  logging.basicConfig(level=logging.INFO)
@@ -24,15 +24,26 @@ logger = logging.getLogger(__name__)
24
  # Add root to sys.path for imports
25
  sys.path.append(str(Path(__file__).resolve().parent.parent))
26
 
 
 
 
 
 
 
 
 
 
 
 
27
  class StreamlitAppManager:
28
  """Manages Streamlit application state and functionality"""
29
-
30
  def __init__(self):
31
  self.setup_config()
32
  self.setup_paths()
33
  self.setup_api_client()
34
  self.initialize_session_state()
35
-
36
  def setup_config(self):
37
  """Setup application configuration"""
38
  self.config = {
@@ -44,7 +55,7 @@ class StreamlitAppManager:
44
  'refresh_interval': 60,
45
  'max_batch_size': 10
46
  }
47
-
48
  def setup_paths(self):
49
  """Setup file paths"""
50
  self.paths = {
@@ -56,37 +67,39 @@ class StreamlitAppManager:
56
  'scheduler_log': Path("/tmp/logs/scheduler_execution.json"),
57
  'error_log': Path("/tmp/logs/scheduler_errors.json")
58
  }
59
-
60
  def setup_api_client(self):
61
  """Setup API client with error handling"""
62
  self.session = requests.Session()
63
  self.session.timeout = self.config['prediction_timeout']
64
-
65
  # Test API connection
66
  self.api_available = self.test_api_connection()
67
-
68
  def test_api_connection(self) -> bool:
69
  """Test API connection"""
70
  try:
71
- response = self.session.get(f"{self.config['api_url']}/health", timeout=5)
 
72
  return response.status_code == 200
73
  except:
74
  return False
75
-
76
  def initialize_session_state(self):
77
  """Initialize Streamlit session state"""
78
  if 'prediction_history' not in st.session_state:
79
  st.session_state.prediction_history = []
80
-
81
  if 'upload_history' not in st.session_state:
82
  st.session_state.upload_history = []
83
-
84
  if 'last_refresh' not in st.session_state:
85
  st.session_state.last_refresh = datetime.now()
86
-
87
  if 'auto_refresh' not in st.session_state:
88
  st.session_state.auto_refresh = False
89
 
 
90
  # Initialize app manager
91
  app_manager = StreamlitAppManager()
92
 
@@ -142,6 +155,7 @@ st.markdown("""
142
  </style>
143
  """, unsafe_allow_html=True)
144
 
 
145
  def load_json_file(file_path: Path, default: Any = None) -> Any:
146
  """Safely load JSON file with error handling"""
147
  try:
@@ -153,6 +167,7 @@ def load_json_file(file_path: Path, default: Any = None) -> Any:
153
  logger.error(f"Failed to load {file_path}: {e}")
154
  return default or {}
155
 
 
156
  def save_prediction_to_history(text: str, prediction: str, confidence: float):
157
  """Save prediction to session history"""
158
  prediction_entry = {
@@ -162,30 +177,31 @@ def save_prediction_to_history(text: str, prediction: str, confidence: float):
162
  'confidence': confidence,
163
  'text_length': len(text)
164
  }
165
-
166
  st.session_state.prediction_history.append(prediction_entry)
167
-
168
  # Keep only last 50 predictions
169
  if len(st.session_state.prediction_history) > 50:
170
  st.session_state.prediction_history = st.session_state.prediction_history[-50:]
171
 
 
172
  def make_prediction_request(text: str) -> Dict[str, Any]:
173
  """Make prediction request to API"""
174
  try:
175
  if not app_manager.api_available:
176
  return {'error': 'API is not available'}
177
-
178
  response = app_manager.session.post(
179
  f"{app_manager.config['api_url']}/predict",
180
  json={"text": text},
181
  timeout=app_manager.config['prediction_timeout']
182
  )
183
-
184
  if response.status_code == 200:
185
  return response.json()
186
  else:
187
  return {'error': f'API Error: {response.status_code} - {response.text}'}
188
-
189
  except requests.exceptions.Timeout:
190
  return {'error': 'Request timed out. Please try again.'}
191
  except requests.exceptions.ConnectionError:
@@ -193,33 +209,35 @@ def make_prediction_request(text: str) -> Dict[str, Any]:
193
  except Exception as e:
194
  return {'error': f'Unexpected error: {str(e)}'}
195
 
 
196
  def validate_text_input(text: str) -> tuple[bool, str]:
197
  """Validate text input"""
198
  if not text or not text.strip():
199
  return False, "Please enter some text to analyze."
200
-
201
  if len(text) < 10:
202
  return False, "Text must be at least 10 characters long."
203
-
204
  if len(text) > app_manager.config['max_text_length']:
205
  return False, f"Text must be less than {app_manager.config['max_text_length']} characters."
206
-
207
  # Check for suspicious content
208
  suspicious_patterns = ['<script', 'javascript:', 'data:']
209
  if any(pattern in text.lower() for pattern in suspicious_patterns):
210
  return False, "Text contains suspicious content."
211
-
212
  return True, "Valid"
213
 
 
214
  def create_confidence_gauge(confidence: float, prediction: str):
215
  """Create confidence gauge visualization"""
216
  fig = go.Figure(go.Indicator(
217
- mode = "gauge+number+delta",
218
- value = confidence * 100,
219
- domain = {'x': [0, 1], 'y': [0, 1]},
220
- title = {'text': f"Confidence: {prediction}"},
221
- delta = {'reference': 50},
222
- gauge = {
223
  'axis': {'range': [None, 100]},
224
  'bar': {'color': "red" if prediction == "Fake" else "green"},
225
  'steps': [
@@ -234,22 +252,23 @@ def create_confidence_gauge(confidence: float, prediction: str):
234
  }
235
  }
236
  ))
237
-
238
  fig.update_layout(height=300)
239
  return fig
240
 
 
241
  def create_prediction_history_chart():
242
  """Create prediction history visualization"""
243
  if not st.session_state.prediction_history:
244
  return None
245
-
246
  df = pd.DataFrame(st.session_state.prediction_history)
247
  df['timestamp'] = pd.to_datetime(df['timestamp'])
248
  df['confidence_percent'] = df['confidence'] * 100
249
-
250
  fig = px.scatter(
251
- df,
252
- x='timestamp',
253
  y='confidence_percent',
254
  color='prediction',
255
  size='text_length',
@@ -257,61 +276,326 @@ def create_prediction_history_chart():
257
  title="Prediction History",
258
  labels={'confidence_percent': 'Confidence (%)', 'timestamp': 'Time'}
259
  )
260
-
261
  fig.update_layout(height=400)
262
  return fig
263
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  # Main application
265
  def main():
266
  """Main Streamlit application"""
267
-
268
  # Header
269
- st.markdown('<h1 class="main-header">πŸ“° Fake News Detection System</h1>', unsafe_allow_html=True)
270
-
 
271
  # API Status indicator
272
  col1, col2, col3 = st.columns([1, 2, 1])
273
  with col2:
274
  if app_manager.api_available:
275
- st.markdown('<div class="success-message">🟒 API Service: Online</div>', unsafe_allow_html=True)
 
276
  else:
277
- st.markdown('<div class="error-message">πŸ”΄ API Service: Offline</div>', unsafe_allow_html=True)
278
-
 
279
  # Main content area
280
  tab1, tab2, tab3, tab4, tab5 = st.tabs([
281
- "πŸ” Prediction",
282
- "πŸ“Š Batch Analysis",
283
- "πŸ“ˆ Analytics",
284
- "🎯 Model Training",
285
  "βš™οΈ System Status"
286
  ])
287
-
288
  # Tab 1: Individual Prediction
289
  with tab1:
290
  st.header("Single Text Analysis")
291
-
292
  # Input methods
293
  input_method = st.radio(
294
  "Choose input method:",
295
  ["Type Text", "Upload File"],
296
  horizontal=True
297
  )
298
-
299
  user_text = ""
300
-
301
  if input_method == "Type Text":
302
  user_text = st.text_area(
303
  "Enter news article text:",
304
  height=200,
305
  placeholder="Paste or type the news article you want to analyze..."
306
  )
307
-
308
  else: # Upload File
309
  uploaded_file = st.file_uploader(
310
  "Upload text file:",
311
  type=['txt', 'csv'],
312
  help="Upload a text file containing the article to analyze"
313
  )
314
-
315
  if uploaded_file:
316
  try:
317
  if uploaded_file.type == "text/plain":
@@ -319,44 +603,48 @@ def main():
319
  elif uploaded_file.type == "text/csv":
320
  df = pd.read_csv(uploaded_file)
321
  if 'text' in df.columns:
322
- user_text = df['text'].iloc[0] if len(df) > 0 else ""
 
323
  else:
324
  st.error("CSV file must contain a 'text' column")
325
-
326
- st.success(f"File uploaded successfully! ({len(user_text)} characters)")
327
-
 
328
  except Exception as e:
329
  st.error(f"Error reading file: {e}")
330
-
331
  # Prediction section
332
  col1, col2 = st.columns([3, 1])
333
-
334
  with col1:
335
  if st.button("🧠 Analyze Text", type="primary", use_container_width=True):
336
  if user_text:
337
  # Validate input
338
- is_valid, validation_message = validate_text_input(user_text)
339
-
 
340
  if not is_valid:
341
  st.error(validation_message)
342
  else:
343
  # Show progress
344
  with st.spinner("Analyzing text..."):
345
  result = make_prediction_request(user_text)
346
-
347
  if 'error' in result:
348
  st.error(f"❌ {result['error']}")
349
  else:
350
  # Display results
351
  prediction = result['prediction']
352
  confidence = result['confidence']
353
-
354
  # Save to history
355
- save_prediction_to_history(user_text, prediction, confidence)
356
-
 
357
  # Results display
358
  col_result1, col_result2 = st.columns(2)
359
-
360
  with col_result1:
361
  if prediction == "Fake":
362
  st.markdown(f"""
@@ -372,12 +660,14 @@ def main():
372
  <p>Confidence: {confidence:.2%}</p>
373
  </div>
374
  """, unsafe_allow_html=True)
375
-
376
  with col_result2:
377
  # Confidence gauge
378
- fig_gauge = create_confidence_gauge(confidence, prediction)
379
- st.plotly_chart(fig_gauge, use_container_width=True)
380
-
 
 
381
  # Additional information
382
  with st.expander("πŸ“‹ Analysis Details"):
383
  st.json({
@@ -389,51 +679,53 @@ def main():
389
  })
390
  else:
391
  st.warning("Please enter text to analyze.")
392
-
393
  with col2:
394
  if st.button("πŸ”„ Clear Text", use_container_width=True):
395
  st.rerun()
396
-
397
  # Tab 2: Batch Analysis
398
  with tab2:
399
  st.header("Batch Text Analysis")
400
-
401
  # File upload for batch processing
402
  batch_file = st.file_uploader(
403
  "Upload CSV file for batch analysis:",
404
  type=['csv'],
405
  help="CSV file should contain a 'text' column with articles to analyze"
406
  )
407
-
408
  if batch_file:
409
  try:
410
  df = pd.read_csv(batch_file)
411
-
412
  if 'text' not in df.columns:
413
  st.error("CSV file must contain a 'text' column")
414
  else:
415
  st.success(f"File loaded: {len(df)} articles found")
416
-
417
  # Preview data
418
  st.subheader("Data Preview")
419
  st.dataframe(df.head(10))
420
-
421
  # Batch processing
422
  if st.button("πŸš€ Process Batch", type="primary"):
423
  if len(df) > app_manager.config['max_batch_size']:
424
- st.warning(f"Only processing first {app_manager.config['max_batch_size']} articles")
 
425
  df = df.head(app_manager.config['max_batch_size'])
426
-
427
  progress_bar = st.progress(0)
428
  status_text = st.empty()
429
  results = []
430
-
431
  for i, row in df.iterrows():
432
- status_text.text(f"Processing article {i+1}/{len(df)}...")
 
433
  progress_bar.progress((i + 1) / len(df))
434
-
435
  result = make_prediction_request(row['text'])
436
-
437
  if 'error' not in result:
438
  results.append({
439
  'text': row['text'][:100] + "...",
@@ -448,28 +740,31 @@ def main():
448
  'confidence': 0,
449
  'processing_time': 0
450
  })
451
-
452
  # Display results
453
  results_df = pd.DataFrame(results)
454
-
455
  # Summary statistics
456
  col1, col2, col3, col4 = st.columns(4)
457
-
458
  with col1:
459
  st.metric("Total Processed", len(results_df))
460
-
461
  with col2:
462
- fake_count = len(results_df[results_df['prediction'] == 'Fake'])
 
463
  st.metric("Fake News", fake_count)
464
-
465
  with col3:
466
- real_count = len(results_df[results_df['prediction'] == 'Real'])
 
467
  st.metric("Real News", real_count)
468
-
469
  with col4:
470
  avg_confidence = results_df['confidence'].mean()
471
- st.metric("Avg Confidence", f"{avg_confidence:.2%}")
472
-
 
473
  # Results visualization
474
  if len(results_df) > 0:
475
  fig = px.histogram(
@@ -479,268 +774,183 @@ def main():
479
  title="Batch Analysis Results"
480
  )
481
  st.plotly_chart(fig, use_container_width=True)
482
-
483
  # Download results
484
  csv_buffer = io.StringIO()
485
  results_df.to_csv(csv_buffer, index=False)
486
-
487
  st.download_button(
488
  label="πŸ“₯ Download Results",
489
  data=csv_buffer.getvalue(),
490
  file_name=f"batch_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
491
  mime="text/csv"
492
  )
493
-
494
  except Exception as e:
495
  st.error(f"Error processing file: {e}")
496
-
497
  # Tab 3: Analytics
498
  with tab3:
499
  st.header("System Analytics")
500
-
501
  # Prediction history
502
  if st.session_state.prediction_history:
503
  st.subheader("Recent Predictions")
504
-
505
  # History chart
506
  fig_history = create_prediction_history_chart()
507
  if fig_history:
508
  st.plotly_chart(fig_history, use_container_width=True)
509
-
510
  # History table
511
  history_df = pd.DataFrame(st.session_state.prediction_history)
512
  st.dataframe(history_df.tail(20), use_container_width=True)
513
-
514
  else:
515
- st.info("No prediction history available. Make some predictions to see analytics.")
516
-
 
517
  # System metrics
518
  st.subheader("System Metrics")
519
-
520
  # Load various log files for analytics
521
  try:
522
  # API health check
523
  if app_manager.api_available:
524
- response = app_manager.session.get(f"{app_manager.config['api_url']}/metrics")
 
525
  if response.status_code == 200:
526
  metrics = response.json()
527
-
528
  col1, col2, col3, col4 = st.columns(4)
529
-
530
  with col1:
531
- st.metric("Total API Requests", metrics.get('total_requests', 0))
532
-
 
533
  with col2:
534
- st.metric("Unique Clients", metrics.get('unique_clients', 0))
535
-
 
536
  with col3:
537
- st.metric("Model Version", metrics.get('model_version', 'Unknown'))
538
-
 
539
  with col4:
540
  status = metrics.get('model_health', 'unknown')
541
  st.metric("Model Status", status)
542
-
543
  except Exception as e:
544
  st.warning(f"Could not load API metrics: {e}")
545
-
546
  # Tab 4: Model Training
547
  with tab4:
548
- st.header("Custom Model Training")
549
-
550
- st.info("Upload your own dataset to retrain the model with custom data.")
551
-
552
  # File upload for training
553
  training_file = st.file_uploader(
554
  "Upload training dataset (CSV):",
555
  type=['csv'],
556
  help="CSV file should contain 'text' and 'label' columns (label: 0=Real, 1=Fake)"
557
  )
558
-
559
  if training_file:
560
  try:
561
  df_train = pd.read_csv(training_file)
562
-
563
  required_columns = ['text', 'label']
564
- missing_columns = [col for col in required_columns if col not in df_train.columns]
565
-
 
566
  if missing_columns:
567
  st.error(f"Missing required columns: {missing_columns}")
568
  else:
569
- st.success(f"Training file loaded: {len(df_train)} samples")
570
-
571
- # Data validation
572
- label_counts = df_train['label'].value_counts()
573
-
574
- col1, col2 = st.columns(2)
575
-
576
- with col1:
577
- st.subheader("Dataset Overview")
578
- st.write(f"Total samples: {len(df_train)}")
579
- st.write(f"Real news (0): {label_counts.get(0, 0)}")
580
- st.write(f"Fake news (1): {label_counts.get(1, 0)}")
581
-
582
- with col2:
583
- # Label distribution chart
584
- fig_labels = px.pie(
585
- values=label_counts.values,
586
- names=['Real', 'Fake'],
587
- title="Label Distribution"
588
- )
589
- st.plotly_chart(fig_labels)
590
-
591
- # Training options
592
- st.subheader("Training Configuration")
593
-
594
- col1, col2 = st.columns(2)
595
-
596
- with col1:
597
- test_size = st.slider("Test Size", 0.1, 0.4, 0.2, 0.05)
598
- max_features = st.number_input("Max Features", 1000, 20000, 10000, 1000)
599
-
600
- with col2:
601
- cross_validation = st.checkbox("Cross Validation", value=True)
602
- hyperparameter_tuning = st.checkbox("Hyperparameter Tuning", value=False)
603
-
604
- # Start training
605
- if st.button("πŸƒβ€β™‚οΈ Start Training", type="primary"):
606
- # Save training data
607
- app_manager.paths['custom_data'].parent.mkdir(parents=True, exist_ok=True)
608
- df_train.to_csv(app_manager.paths['custom_data'], index=False)
609
-
610
- # Progress simulation
611
- progress_bar = st.progress(0)
612
- status_text = st.empty()
613
-
614
- training_steps = [
615
- "Preprocessing data...",
616
- "Splitting dataset...",
617
- "Training model...",
618
- "Evaluating performance...",
619
- "Saving model..."
620
- ]
621
-
622
- for i, step in enumerate(training_steps):
623
- status_text.text(step)
624
- progress_bar.progress((i + 1) / len(training_steps))
625
- time.sleep(2) # Simulate processing time
626
-
627
- # Run actual training
628
- try:
629
- result = subprocess.run(
630
- [sys.executable, "model/train.py",
631
- "--data_path", str(app_manager.paths['custom_data'])],
632
- capture_output=True,
633
- text=True,
634
- timeout=300
635
- )
636
-
637
- if result.returncode == 0:
638
- st.success("πŸŽ‰ Training completed successfully!")
639
-
640
- # Try to extract accuracy from output
641
- try:
642
- output_lines = result.stdout.strip().split('\n')
643
- for line in output_lines:
644
- if 'accuracy' in line.lower():
645
- st.info(f"Model performance: {line}")
646
- except:
647
- pass
648
-
649
- # Reload API model
650
- if app_manager.api_available:
651
- try:
652
- reload_response = app_manager.session.post(
653
- f"{app_manager.config['api_url']}/model/reload"
654
- )
655
- if reload_response.status_code == 200:
656
- st.success("βœ… Model reloaded in API successfully!")
657
- except:
658
- st.warning("⚠️ Model trained but API reload failed")
659
-
660
- else:
661
- st.error(f"Training failed: {result.stderr}")
662
-
663
- except subprocess.TimeoutExpired:
664
- st.error("Training timed out. Please try with a smaller dataset.")
665
- except Exception as e:
666
- st.error(f"Training error: {e}")
667
-
668
  except Exception as e:
669
  st.error(f"Error loading training file: {e}")
670
-
671
  # Tab 5: System Status
672
  with tab5:
673
  render_system_status()
674
 
 
675
  def render_system_status():
676
  """Render system status tab"""
677
  st.header("System Status & Monitoring")
678
-
679
  # Auto-refresh toggle
680
  col1, col2 = st.columns([1, 4])
681
  with col1:
682
- st.session_state.auto_refresh = st.checkbox("Auto Refresh", value=st.session_state.auto_refresh)
683
-
 
684
  with col2:
685
  if st.button("πŸ”„ Refresh Now"):
686
  st.session_state.last_refresh = datetime.now()
687
  st.rerun()
688
-
689
  # System health overview
690
  st.subheader("πŸ₯ System Health")
691
-
692
  if app_manager.api_available:
693
  try:
694
- health_response = app_manager.session.get(f"{app_manager.config['api_url']}/health")
 
695
  if health_response.status_code == 200:
696
  health_data = health_response.json()
697
-
698
  # Overall status
699
  overall_status = health_data.get('status', 'unknown')
700
  if overall_status == 'healthy':
701
  st.success("🟒 System Status: Healthy")
702
  else:
703
  st.error("πŸ”΄ System Status: Unhealthy")
704
-
705
  # Detailed health metrics
706
  col1, col2, col3 = st.columns(3)
707
-
708
  with col1:
709
  st.subheader("πŸ€– Model Health")
710
  model_health = health_data.get('model_health', {})
711
-
712
  for key, value in model_health.items():
713
  if key != 'test_prediction':
714
- st.write(f"**{key.replace('_', ' ').title()}:** {value}")
715
-
 
716
  with col2:
717
  st.subheader("πŸ’» System Resources")
718
  system_health = health_data.get('system_health', {})
719
-
720
  for key, value in system_health.items():
721
  if isinstance(value, (int, float)):
722
- st.metric(key.replace('_', ' ').title(), f"{value:.1f}%")
723
-
 
724
  with col3:
725
  st.subheader("πŸ”— API Health")
726
  api_health = health_data.get('api_health', {})
727
-
728
  for key, value in api_health.items():
729
- st.write(f"**{key.replace('_', ' ').title()}:** {value}")
730
-
 
731
  except Exception as e:
732
  st.error(f"Failed to get health status: {e}")
733
-
734
  else:
735
  st.error("πŸ”΄ API Service is not available")
736
-
737
  # Model information
738
  st.subheader("🎯 Model Information")
739
-
740
  metadata = load_json_file(app_manager.paths['metadata'], {})
741
  if metadata:
742
  col1, col2 = st.columns(2)
743
-
744
  with col1:
745
  for key in ['model_version', 'test_accuracy', 'test_f1', 'model_type']:
746
  if key in metadata:
@@ -750,7 +960,7 @@ def render_system_status():
750
  st.metric(display_key, f"{value:.4f}")
751
  else:
752
  st.metric(display_key, str(value))
753
-
754
  with col2:
755
  for key in ['train_size', 'timestamp', 'data_version']:
756
  if key in metadata:
@@ -758,49 +968,52 @@ def render_system_status():
758
  value = metadata[key]
759
  if key == 'timestamp':
760
  try:
761
- dt = datetime.fromisoformat(value.replace('Z', '+00:00'))
 
762
  value = dt.strftime('%Y-%m-%d %H:%M:%S')
763
  except:
764
  pass
765
  st.write(f"**{display_key}:** {value}")
766
-
767
  else:
768
  st.warning("No model metadata available")
769
-
770
  # Recent activity
771
  st.subheader("πŸ“œ Recent Activity")
772
-
773
  activity_log = load_json_file(app_manager.paths['activity_log'], [])
774
  if activity_log:
775
- recent_activities = activity_log[-10:] if len(activity_log) > 10 else activity_log
776
-
 
777
  for entry in reversed(recent_activities):
778
  timestamp = entry.get('timestamp', 'Unknown')
779
  event = entry.get('event', 'Unknown event')
780
  level = entry.get('level', 'INFO')
781
-
782
  if level == 'ERROR':
783
  st.error(f"πŸ”΄ {timestamp} - {event}")
784
  elif level == 'WARNING':
785
  st.warning(f"🟑 {timestamp} - {event}")
786
  else:
787
  st.info(f"πŸ”΅ {timestamp} - {event}")
788
-
789
  else:
790
  st.info("No recent activity logs found")
791
-
792
  # File system status
793
  st.subheader("πŸ“ File System Status")
794
-
795
  critical_files = [
796
- ("/tmp/model.pkl", "Main Model"),
 
797
  ("/tmp/vectorizer.pkl", "Vectorizer"),
798
- ("/tmp/data/combined_dataset.csv", "Training Dataset"),
799
- ("/tmp/metadata.json", "Model Metadata")
800
  ]
801
-
802
  col1, col2 = st.columns(2)
803
-
804
  with col1:
805
  st.write("**Critical Files:**")
806
  for file_path, description in critical_files:
@@ -808,18 +1021,18 @@ def render_system_status():
808
  st.success(f"βœ… {description}")
809
  else:
810
  st.error(f"❌ {description}")
811
-
812
  with col2:
813
  # Disk usage information
814
  try:
815
  import shutil
816
  total, used, free = shutil.disk_usage("/tmp")
817
-
818
  st.write("**Disk Usage (/tmp):**")
819
  st.write(f"Total: {total // (1024**3)} GB")
820
  st.write(f"Used: {used // (1024**3)} GB")
821
  st.write(f"Free: {free // (1024**3)} GB")
822
-
823
  usage_percent = (used / total) * 100
824
  if usage_percent > 90:
825
  st.error(f"⚠️ Disk usage: {usage_percent:.1f}%")
@@ -827,34 +1040,90 @@ def render_system_status():
827
  st.warning(f"⚠️ Disk usage: {usage_percent:.1f}%")
828
  else:
829
  st.success(f"βœ… Disk usage: {usage_percent:.1f}%")
830
-
831
  except Exception as e:
832
  st.error(f"Cannot check disk usage: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
833
 
834
- # Initialize system button
835
- if st.button("πŸ”§ Initialize System", help="Run system initialization if components are missing"):
836
- with st.spinner("Running system initialization..."):
837
  try:
838
- result = subprocess.run(
839
- [sys.executable, "/app/initialize_system.py"],
840
- capture_output=True,
841
- text=True,
842
- timeout=300
843
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
844
 
845
- if result.returncode == 0:
846
- st.success("βœ… System initialization completed successfully!")
847
- st.code(result.stdout)
848
- time.sleep(2)
849
- st.rerun()
850
- else:
851
- st.error("❌ System initialization failed")
852
- st.code(result.stderr)
853
-
854
- except subprocess.TimeoutExpired:
855
- st.error("⏰ Initialization timed out")
856
  except Exception as e:
857
- st.error(f"❌ Initialization error: {e}")
 
858
 
859
  # Auto-refresh logic
860
  if st.session_state.auto_refresh:
@@ -863,6 +1132,15 @@ if st.session_state.auto_refresh:
863
  st.session_state.last_refresh = datetime.now()
864
  st.rerun()
865
 
 
 
 
 
 
 
 
 
 
866
  # Run main application
867
  if __name__ == "__main__":
868
  main()
 
15
  import plotly.graph_objects as go
16
  from datetime import datetime, timedelta
17
  from typing import Dict, List, Optional, Any
18
+ import contextlib
19
 
20
  # Configure logging
21
  logging.basicConfig(level=logging.INFO)
 
24
  # Add root to sys.path for imports
25
  sys.path.append(str(Path(__file__).resolve().parent.parent))
26
 
27
+ # Try to import trainer directly for better progress tracking
28
+ try:
29
+ from model.train import RobustModelTrainer, estimate_training_time
30
+ DIRECT_TRAINING_AVAILABLE = True
31
+ except ImportError:
32
+ RobustModelTrainer = None
33
+ estimate_training_time = None
34
+ DIRECT_TRAINING_AVAILABLE = False
35
+ logger.warning("Direct training import failed, using subprocess fallback")
36
+
37
+
38
  class StreamlitAppManager:
39
  """Manages Streamlit application state and functionality"""
40
+
41
  def __init__(self):
42
  self.setup_config()
43
  self.setup_paths()
44
  self.setup_api_client()
45
  self.initialize_session_state()
46
+
47
  def setup_config(self):
48
  """Setup application configuration"""
49
  self.config = {
 
55
  'refresh_interval': 60,
56
  'max_batch_size': 10
57
  }
58
+
59
  def setup_paths(self):
60
  """Setup file paths"""
61
  self.paths = {
 
67
  'scheduler_log': Path("/tmp/logs/scheduler_execution.json"),
68
  'error_log': Path("/tmp/logs/scheduler_errors.json")
69
  }
70
+
71
  def setup_api_client(self):
72
  """Setup API client with error handling"""
73
  self.session = requests.Session()
74
  self.session.timeout = self.config['prediction_timeout']
75
+
76
  # Test API connection
77
  self.api_available = self.test_api_connection()
78
+
79
  def test_api_connection(self) -> bool:
80
  """Test API connection"""
81
  try:
82
+ response = self.session.get(
83
+ f"{self.config['api_url']}/health", timeout=5)
84
  return response.status_code == 200
85
  except:
86
  return False
87
+
88
  def initialize_session_state(self):
89
  """Initialize Streamlit session state"""
90
  if 'prediction_history' not in st.session_state:
91
  st.session_state.prediction_history = []
92
+
93
  if 'upload_history' not in st.session_state:
94
  st.session_state.upload_history = []
95
+
96
  if 'last_refresh' not in st.session_state:
97
  st.session_state.last_refresh = datetime.now()
98
+
99
  if 'auto_refresh' not in st.session_state:
100
  st.session_state.auto_refresh = False
101
 
102
+
103
  # Initialize app manager
104
  app_manager = StreamlitAppManager()
105
 
 
155
  </style>
156
  """, unsafe_allow_html=True)
157
 
158
+
159
  def load_json_file(file_path: Path, default: Any = None) -> Any:
160
  """Safely load JSON file with error handling"""
161
  try:
 
167
  logger.error(f"Failed to load {file_path}: {e}")
168
  return default or {}
169
 
170
+
171
  def save_prediction_to_history(text: str, prediction: str, confidence: float):
172
  """Save prediction to session history"""
173
  prediction_entry = {
 
177
  'confidence': confidence,
178
  'text_length': len(text)
179
  }
180
+
181
  st.session_state.prediction_history.append(prediction_entry)
182
+
183
  # Keep only last 50 predictions
184
  if len(st.session_state.prediction_history) > 50:
185
  st.session_state.prediction_history = st.session_state.prediction_history[-50:]
186
 
187
+
188
  def make_prediction_request(text: str) -> Dict[str, Any]:
189
  """Make prediction request to API"""
190
  try:
191
  if not app_manager.api_available:
192
  return {'error': 'API is not available'}
193
+
194
  response = app_manager.session.post(
195
  f"{app_manager.config['api_url']}/predict",
196
  json={"text": text},
197
  timeout=app_manager.config['prediction_timeout']
198
  )
199
+
200
  if response.status_code == 200:
201
  return response.json()
202
  else:
203
  return {'error': f'API Error: {response.status_code} - {response.text}'}
204
+
205
  except requests.exceptions.Timeout:
206
  return {'error': 'Request timed out. Please try again.'}
207
  except requests.exceptions.ConnectionError:
 
209
  except Exception as e:
210
  return {'error': f'Unexpected error: {str(e)}'}
211
 
212
+
213
  def validate_text_input(text: str) -> tuple[bool, str]:
214
  """Validate text input"""
215
  if not text or not text.strip():
216
  return False, "Please enter some text to analyze."
217
+
218
  if len(text) < 10:
219
  return False, "Text must be at least 10 characters long."
220
+
221
  if len(text) > app_manager.config['max_text_length']:
222
  return False, f"Text must be less than {app_manager.config['max_text_length']} characters."
223
+
224
  # Check for suspicious content
225
  suspicious_patterns = ['<script', 'javascript:', 'data:']
226
  if any(pattern in text.lower() for pattern in suspicious_patterns):
227
  return False, "Text contains suspicious content."
228
+
229
  return True, "Valid"
230
 
231
+
232
  def create_confidence_gauge(confidence: float, prediction: str):
233
  """Create confidence gauge visualization"""
234
  fig = go.Figure(go.Indicator(
235
+ mode="gauge+number+delta",
236
+ value=confidence * 100,
237
+ domain={'x': [0, 1], 'y': [0, 1]},
238
+ title={'text': f"Confidence: {prediction}"},
239
+ delta={'reference': 50},
240
+ gauge={
241
  'axis': {'range': [None, 100]},
242
  'bar': {'color': "red" if prediction == "Fake" else "green"},
243
  'steps': [
 
252
  }
253
  }
254
  ))
255
+
256
  fig.update_layout(height=300)
257
  return fig
258
 
259
+
260
  def create_prediction_history_chart():
261
  """Create prediction history visualization"""
262
  if not st.session_state.prediction_history:
263
  return None
264
+
265
  df = pd.DataFrame(st.session_state.prediction_history)
266
  df['timestamp'] = pd.to_datetime(df['timestamp'])
267
  df['confidence_percent'] = df['confidence'] * 100
268
+
269
  fig = px.scatter(
270
+ df,
271
+ x='timestamp',
272
  y='confidence_percent',
273
  color='prediction',
274
  size='text_length',
 
276
  title="Prediction History",
277
  labels={'confidence_percent': 'Confidence (%)', 'timestamp': 'Time'}
278
  )
279
+
280
  fig.update_layout(height=400)
281
  return fig
282
 
283
+
284
+ def estimate_training_time_streamlit(dataset_size: int) -> dict:
285
+ """Estimate training time for Streamlit display"""
286
+ if estimate_training_time:
287
+ # Use the imported function
288
+ detailed_estimate = estimate_training_time(dataset_size, enable_tuning=True, cv_folds=3)
289
+ return {
290
+ 'detailed': detailed_estimate,
291
+ 'simple_range': f"{int(detailed_estimate['total_seconds']//60)}:{int(detailed_estimate['total_seconds']%60):02d}",
292
+ 'category': 'small' if dataset_size < 100 else 'medium' if dataset_size < 1000 else 'large'
293
+ }
294
+ else:
295
+ # Fallback estimation
296
+ if dataset_size < 100:
297
+ return {'simple_range': '0:30-1:00', 'category': 'small'}
298
+ elif dataset_size < 1000:
299
+ return {'simple_range': '1:00-3:00', 'category': 'medium'}
300
+ else:
301
+ return {'simple_range': '3:00+', 'category': 'large'}
302
+
303
+
304
+ def render_enhanced_training_section(df_train):
305
+ """Enhanced training section with progress tracking"""
306
+ st.header("Custom Model Training")
307
+ st.info("Upload your own dataset to retrain the model with custom data.")
308
+
309
+ # Show dataset info and time estimate
310
+ dataset_size = len(df_train)
311
+ time_estimate = estimate_training_time_streamlit(dataset_size)
312
+
313
+ # Training information display
314
+ st.markdown("### πŸ“Š Training Information")
315
+ col1, col2, col3, col4 = st.columns(4)
316
+
317
+ with col1:
318
+ st.metric("Dataset Size", f"{dataset_size} samples")
319
+ with col2:
320
+ if 'detailed' in time_estimate:
321
+ est_time = time_estimate['detailed']['total_formatted']
322
+ else:
323
+ est_time = time_estimate['simple_range']
324
+ st.metric("Estimated Time", est_time)
325
+ with col3:
326
+ st.metric("Category", time_estimate['category'].title())
327
+ with col4:
328
+ training_method = "Full Pipeline" if dataset_size >= 50 else "Simplified"
329
+ st.metric("Training Mode", training_method)
330
+
331
+ # Dataset preview
332
+ with st.expander("πŸ‘€ Dataset Preview"):
333
+ st.dataframe(df_train.head(10))
334
+
335
+ # Dataset statistics
336
+ label_counts = df_train['label'].value_counts()
337
+ col1, col2 = st.columns(2)
338
+
339
+ with col1:
340
+ st.subheader("Class Distribution")
341
+ st.write(f"Real news (0): {label_counts.get(0, 0)}")
342
+ st.write(f"Fake news (1): {label_counts.get(1, 0)}")
343
+
344
+ with col2:
345
+ # Label distribution chart
346
+ fig_labels = px.pie(
347
+ values=label_counts.values,
348
+ names=['Real', 'Fake'],
349
+ title="Label Distribution"
350
+ )
351
+ st.plotly_chart(fig_labels, use_container_width=True)
352
+
353
+ # Training configuration
354
+ with st.expander("βš™οΈ Training Configuration"):
355
+ col1, col2 = st.columns(2)
356
+
357
+ with col1:
358
+ if dataset_size < 20:
359
+ st.warning("⚠️ Very small dataset: Hyperparameter tuning will be skipped")
360
+ st.info("β€’ Simple training only")
361
+ st.info("β€’ Minimal cross-validation")
362
+ elif dataset_size < 50:
363
+ st.info("ℹ️ Small dataset: Limited hyperparameter tuning")
364
+ st.info("β€’ Reduced parameter grids")
365
+ st.info("β€’ 2-3 fold cross-validation")
366
+ else:
367
+ st.success("βœ… Standard dataset: Full training pipeline")
368
+ st.info("β€’ Complete hyperparameter tuning")
369
+ st.info("β€’ 3-fold cross-validation")
370
+ st.info("β€’ Model comparison")
371
+
372
+ with col2:
373
+ st.write("**Expected Features:**")
374
+ st.write(f"β€’ TF-IDF vectorization")
375
+ st.write(f"β€’ Feature selection")
376
+ st.write(f"β€’ Logistic Regression")
377
+ if dataset_size >= 50:
378
+ st.write(f"β€’ Random Forest comparison")
379
+ st.write(f"β€’ Performance evaluation")
380
+
381
+ # Training button and execution
382
+ if st.button("πŸƒβ€β™‚οΈ Start Training", type="primary", use_container_width=True):
383
+ # Save training data
384
+ app_manager.paths['custom_data'].parent.mkdir(parents=True, exist_ok=True)
385
+ df_train.to_csv(app_manager.paths['custom_data'], index=False)
386
+
387
+ st.markdown("---")
388
+ st.markdown("### πŸ”„ Training Progress")
389
+
390
+ # Progress containers
391
+ progress_col1, progress_col2 = st.columns([3, 1])
392
+
393
+ with progress_col1:
394
+ progress_bar = st.progress(0)
395
+ status_text = st.empty()
396
+
397
+ with progress_col2:
398
+ time_display = st.empty()
399
+
400
+ # Start training
401
+ start_time = time.time()
402
+
403
+ if DIRECT_TRAINING_AVAILABLE:
404
+ # Method 1: Direct function call (shows progress in real-time)
405
+ status_text.text("Status: Initializing direct training...")
406
+ progress_bar.progress(5)
407
+
408
+ try:
409
+ # Create output capture
410
+ output_buffer = io.StringIO()
411
+
412
+ with st.spinner("Training model (direct method)..."):
413
+ # Redirect stdout to capture progress
414
+ with contextlib.redirect_stdout(output_buffer):
415
+ trainer = RobustModelTrainer()
416
+ success, message = trainer.train_model(
417
+ data_path=str(app_manager.paths['custom_data'])
418
+ )
419
+
420
+ elapsed_time = time.time() - start_time
421
+ time_display.text(f"Elapsed: {timedelta(seconds=int(elapsed_time))}")
422
+
423
+ # Show final progress
424
+ progress_bar.progress(100)
425
+ status_text.text("Status: Training completed!")
426
+
427
+ # Get captured output
428
+ captured_output = output_buffer.getvalue()
429
+
430
+ if success:
431
+ st.success("πŸŽ‰ **Training Completed Successfully!**")
432
+ st.info(f"πŸ“Š **{message}**")
433
+
434
+ # Show captured progress if available
435
+ if captured_output:
436
+ with st.expander("πŸ“ˆ Training Progress Details"):
437
+ st.code(captured_output)
438
+
439
+ else:
440
+ st.error(f"❌ **Training Failed:** {message}")
441
+ if captured_output:
442
+ with st.expander("πŸ” Debug Output"):
443
+ st.code(captured_output)
444
+
445
+ except Exception as e:
446
+ st.error(f"❌ **Training Error:** {str(e)}")
447
+
448
+ else:
449
+ # Method 2: Subprocess with progress simulation
450
+ status_text.text("Status: Starting subprocess training...")
451
+ progress_bar.progress(10)
452
+
453
+ try:
454
+ # Simulate progress during subprocess execution
455
+ progress_steps = [
456
+ (20, "Loading and validating data..."),
457
+ (40, "Creating preprocessing pipeline..."),
458
+ (60, "Training models..."),
459
+ (80, "Evaluating performance..."),
460
+ (95, "Saving model artifacts...")
461
+ ]
462
+
463
+ # Start subprocess
464
+ process = subprocess.Popen(
465
+ [sys.executable, "model/train.py", "--data_path", str(app_manager.paths['custom_data'])],
466
+ stdout=subprocess.PIPE,
467
+ stderr=subprocess.STDOUT,
468
+ universal_newlines=True
469
+ )
470
+
471
+ # Simulate progress while waiting
472
+ step_idx = 0
473
+ while process.poll() is None:
474
+ elapsed = time.time() - start_time
475
+ time_display.text(f"Elapsed: {timedelta(seconds=int(elapsed))}")
476
+
477
+ # Update progress based on elapsed time
478
+ if step_idx < len(progress_steps):
479
+ expected_time = dataset_size * 0.1 # Rough estimate
480
+ if elapsed > expected_time * (step_idx + 1) / len(progress_steps):
481
+ progress, status = progress_steps[step_idx]
482
+ progress_bar.progress(progress)
483
+ status_text.text(f"Status: {status}")
484
+ step_idx += 1
485
+
486
+ time.sleep(1)
487
+
488
+ # Get final output
489
+ stdout, _ = process.communicate()
490
+
491
+ # Final progress
492
+ progress_bar.progress(100)
493
+ status_text.text("Status: Training completed!")
494
+
495
+ elapsed_time = time.time() - start_time
496
+ time_display.text(f"Completed: {timedelta(seconds=int(elapsed_time))}")
497
+
498
+ if process.returncode == 0:
499
+ st.success("πŸŽ‰ **Training Completed Successfully!**")
500
+
501
+ # Extract performance info from output
502
+ if stdout:
503
+ lines = stdout.strip().split('\n')
504
+ for line in lines[-10:]: # Check last 10 lines
505
+ if 'Best model:' in line:
506
+ st.info(f"πŸ“Š **{line}**")
507
+ elif any(keyword in line.lower() for keyword in ['accuracy', 'f1']):
508
+ if line.strip():
509
+ st.info(f"πŸ“ˆ **Performance:** {line}")
510
+
511
+ # Show full output in expander
512
+ with st.expander("πŸ“‹ Complete Training Log"):
513
+ st.code(stdout)
514
+
515
+ else:
516
+ st.error("❌ **Training Failed**")
517
+ st.code(stdout)
518
+
519
+ except Exception as e:
520
+ st.error(f"❌ **Training Error:** {str(e)}")
521
+
522
+ # Try to reload model in API regardless of training method
523
+ if app_manager.api_available:
524
+ try:
525
+ with st.spinner("Reloading model in API..."):
526
+ reload_response = app_manager.session.post(
527
+ f"{app_manager.config['api_url']}/model/reload",
528
+ timeout=30
529
+ )
530
+ if reload_response.status_code == 200:
531
+ st.success("βœ… **Model reloaded in API successfully!**")
532
+ else:
533
+ st.warning("⚠️ Model trained but API reload failed")
534
+ except Exception as e:
535
+ st.warning(f"⚠️ Model trained but API reload failed: {str(e)}")
536
+
537
+ # Training tips
538
+ st.markdown("---")
539
+ st.markdown("### πŸ’‘ Training Tips")
540
+ st.info("βœ“ **Model saved successfully** - You can now test predictions")
541
+ st.info("βœ“ **Try different datasets** to improve performance")
542
+ st.info("βœ“ **Larger datasets** (50+ samples) enable full hyperparameter tuning")
543
+
544
+
545
  # Main application
546
  def main():
547
  """Main Streamlit application"""
548
+
549
  # Header
550
+ st.markdown('<h1 class="main-header">πŸ“° Fake News Detection System</h1>',
551
+ unsafe_allow_html=True)
552
+
553
  # API Status indicator
554
  col1, col2, col3 = st.columns([1, 2, 1])
555
  with col2:
556
  if app_manager.api_available:
557
+ st.markdown(
558
+ '<div class="success-message">🟒 API Service: Online</div>', unsafe_allow_html=True)
559
  else:
560
+ st.markdown(
561
+ '<div class="error-message">πŸ”΄ API Service: Offline</div>', unsafe_allow_html=True)
562
+
563
  # Main content area
564
  tab1, tab2, tab3, tab4, tab5 = st.tabs([
565
+ "πŸ” Prediction",
566
+ "πŸ“Š Batch Analysis",
567
+ "πŸ“ˆ Analytics",
568
+ "🎯 Model Training",
569
  "βš™οΈ System Status"
570
  ])
571
+
572
  # Tab 1: Individual Prediction
573
  with tab1:
574
  st.header("Single Text Analysis")
575
+
576
  # Input methods
577
  input_method = st.radio(
578
  "Choose input method:",
579
  ["Type Text", "Upload File"],
580
  horizontal=True
581
  )
582
+
583
  user_text = ""
584
+
585
  if input_method == "Type Text":
586
  user_text = st.text_area(
587
  "Enter news article text:",
588
  height=200,
589
  placeholder="Paste or type the news article you want to analyze..."
590
  )
591
+
592
  else: # Upload File
593
  uploaded_file = st.file_uploader(
594
  "Upload text file:",
595
  type=['txt', 'csv'],
596
  help="Upload a text file containing the article to analyze"
597
  )
598
+
599
  if uploaded_file:
600
  try:
601
  if uploaded_file.type == "text/plain":
 
603
  elif uploaded_file.type == "text/csv":
604
  df = pd.read_csv(uploaded_file)
605
  if 'text' in df.columns:
606
+ user_text = df['text'].iloc[0] if len(
607
+ df) > 0 else ""
608
  else:
609
  st.error("CSV file must contain a 'text' column")
610
+
611
+ st.success(
612
+ f"File uploaded successfully! ({len(user_text)} characters)")
613
+
614
  except Exception as e:
615
  st.error(f"Error reading file: {e}")
616
+
617
  # Prediction section
618
  col1, col2 = st.columns([3, 1])
619
+
620
  with col1:
621
  if st.button("🧠 Analyze Text", type="primary", use_container_width=True):
622
  if user_text:
623
  # Validate input
624
+ is_valid, validation_message = validate_text_input(
625
+ user_text)
626
+
627
  if not is_valid:
628
  st.error(validation_message)
629
  else:
630
  # Show progress
631
  with st.spinner("Analyzing text..."):
632
  result = make_prediction_request(user_text)
633
+
634
  if 'error' in result:
635
  st.error(f"❌ {result['error']}")
636
  else:
637
  # Display results
638
  prediction = result['prediction']
639
  confidence = result['confidence']
640
+
641
  # Save to history
642
+ save_prediction_to_history(
643
+ user_text, prediction, confidence)
644
+
645
  # Results display
646
  col_result1, col_result2 = st.columns(2)
647
+
648
  with col_result1:
649
  if prediction == "Fake":
650
  st.markdown(f"""
 
660
  <p>Confidence: {confidence:.2%}</p>
661
  </div>
662
  """, unsafe_allow_html=True)
663
+
664
  with col_result2:
665
  # Confidence gauge
666
+ fig_gauge = create_confidence_gauge(
667
+ confidence, prediction)
668
+ st.plotly_chart(
669
+ fig_gauge, use_container_width=True)
670
+
671
  # Additional information
672
  with st.expander("πŸ“‹ Analysis Details"):
673
  st.json({
 
679
  })
680
  else:
681
  st.warning("Please enter text to analyze.")
682
+
683
  with col2:
684
  if st.button("πŸ”„ Clear Text", use_container_width=True):
685
  st.rerun()
686
+
687
  # Tab 2: Batch Analysis
688
  with tab2:
689
  st.header("Batch Text Analysis")
690
+
691
  # File upload for batch processing
692
  batch_file = st.file_uploader(
693
  "Upload CSV file for batch analysis:",
694
  type=['csv'],
695
  help="CSV file should contain a 'text' column with articles to analyze"
696
  )
697
+
698
  if batch_file:
699
  try:
700
  df = pd.read_csv(batch_file)
701
+
702
  if 'text' not in df.columns:
703
  st.error("CSV file must contain a 'text' column")
704
  else:
705
  st.success(f"File loaded: {len(df)} articles found")
706
+
707
  # Preview data
708
  st.subheader("Data Preview")
709
  st.dataframe(df.head(10))
710
+
711
  # Batch processing
712
  if st.button("πŸš€ Process Batch", type="primary"):
713
  if len(df) > app_manager.config['max_batch_size']:
714
+ st.warning(
715
+ f"Only processing first {app_manager.config['max_batch_size']} articles")
716
  df = df.head(app_manager.config['max_batch_size'])
717
+
718
  progress_bar = st.progress(0)
719
  status_text = st.empty()
720
  results = []
721
+
722
  for i, row in df.iterrows():
723
+ status_text.text(
724
+ f"Processing article {i+1}/{len(df)}...")
725
  progress_bar.progress((i + 1) / len(df))
726
+
727
  result = make_prediction_request(row['text'])
728
+
729
  if 'error' not in result:
730
  results.append({
731
  'text': row['text'][:100] + "...",
 
740
  'confidence': 0,
741
  'processing_time': 0
742
  })
743
+
744
  # Display results
745
  results_df = pd.DataFrame(results)
746
+
747
  # Summary statistics
748
  col1, col2, col3, col4 = st.columns(4)
749
+
750
  with col1:
751
  st.metric("Total Processed", len(results_df))
752
+
753
  with col2:
754
+ fake_count = len(
755
+ results_df[results_df['prediction'] == 'Fake'])
756
  st.metric("Fake News", fake_count)
757
+
758
  with col3:
759
+ real_count = len(
760
+ results_df[results_df['prediction'] == 'Real'])
761
  st.metric("Real News", real_count)
762
+
763
  with col4:
764
  avg_confidence = results_df['confidence'].mean()
765
+ st.metric("Avg Confidence",
766
+ f"{avg_confidence:.2%}")
767
+
768
  # Results visualization
769
  if len(results_df) > 0:
770
  fig = px.histogram(
 
774
  title="Batch Analysis Results"
775
  )
776
  st.plotly_chart(fig, use_container_width=True)
777
+
778
  # Download results
779
  csv_buffer = io.StringIO()
780
  results_df.to_csv(csv_buffer, index=False)
781
+
782
  st.download_button(
783
  label="πŸ“₯ Download Results",
784
  data=csv_buffer.getvalue(),
785
  file_name=f"batch_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
786
  mime="text/csv"
787
  )
788
+
789
  except Exception as e:
790
  st.error(f"Error processing file: {e}")
791
+
792
  # Tab 3: Analytics
793
  with tab3:
794
  st.header("System Analytics")
795
+
796
  # Prediction history
797
  if st.session_state.prediction_history:
798
  st.subheader("Recent Predictions")
799
+
800
  # History chart
801
  fig_history = create_prediction_history_chart()
802
  if fig_history:
803
  st.plotly_chart(fig_history, use_container_width=True)
804
+
805
  # History table
806
  history_df = pd.DataFrame(st.session_state.prediction_history)
807
  st.dataframe(history_df.tail(20), use_container_width=True)
808
+
809
  else:
810
+ st.info(
811
+ "No prediction history available. Make some predictions to see analytics.")
812
+
813
  # System metrics
814
  st.subheader("System Metrics")
815
+
816
  # Load various log files for analytics
817
  try:
818
  # API health check
819
  if app_manager.api_available:
820
+ response = app_manager.session.get(
821
+ f"{app_manager.config['api_url']}/metrics")
822
  if response.status_code == 200:
823
  metrics = response.json()
824
+
825
  col1, col2, col3, col4 = st.columns(4)
826
+
827
  with col1:
828
+ st.metric("Total API Requests",
829
+ metrics.get('total_requests', 0))
830
+
831
  with col2:
832
+ st.metric("Unique Clients", metrics.get(
833
+ 'unique_clients', 0))
834
+
835
  with col3:
836
+ st.metric("Model Version", metrics.get(
837
+ 'model_version', 'Unknown'))
838
+
839
  with col4:
840
  status = metrics.get('model_health', 'unknown')
841
  st.metric("Model Status", status)
842
+
843
  except Exception as e:
844
  st.warning(f"Could not load API metrics: {e}")
845
+
846
  # Tab 4: Model Training
847
  with tab4:
 
 
 
 
848
  # File upload for training
849
  training_file = st.file_uploader(
850
  "Upload training dataset (CSV):",
851
  type=['csv'],
852
  help="CSV file should contain 'text' and 'label' columns (label: 0=Real, 1=Fake)"
853
  )
854
+
855
  if training_file:
856
  try:
857
  df_train = pd.read_csv(training_file)
858
+
859
  required_columns = ['text', 'label']
860
+ missing_columns = [
861
+ col for col in required_columns if col not in df_train.columns]
862
+
863
  if missing_columns:
864
  st.error(f"Missing required columns: {missing_columns}")
865
  else:
866
+ st.success(
867
+ f"Training file loaded: {len(df_train)} samples")
868
+
869
+ # Enhanced training section
870
+ render_enhanced_training_section(df_train)
871
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
872
  except Exception as e:
873
  st.error(f"Error loading training file: {e}")
874
+
875
  # Tab 5: System Status
876
  with tab5:
877
  render_system_status()
878
 
879
+
880
  def render_system_status():
881
  """Render system status tab"""
882
  st.header("System Status & Monitoring")
883
+
884
  # Auto-refresh toggle
885
  col1, col2 = st.columns([1, 4])
886
  with col1:
887
+ st.session_state.auto_refresh = st.checkbox(
888
+ "Auto Refresh", value=st.session_state.auto_refresh)
889
+
890
  with col2:
891
  if st.button("πŸ”„ Refresh Now"):
892
  st.session_state.last_refresh = datetime.now()
893
  st.rerun()
894
+
895
  # System health overview
896
  st.subheader("πŸ₯ System Health")
897
+
898
  if app_manager.api_available:
899
  try:
900
+ health_response = app_manager.session.get(
901
+ f"{app_manager.config['api_url']}/health")
902
  if health_response.status_code == 200:
903
  health_data = health_response.json()
904
+
905
  # Overall status
906
  overall_status = health_data.get('status', 'unknown')
907
  if overall_status == 'healthy':
908
  st.success("🟒 System Status: Healthy")
909
  else:
910
  st.error("πŸ”΄ System Status: Unhealthy")
911
+
912
  # Detailed health metrics
913
  col1, col2, col3 = st.columns(3)
914
+
915
  with col1:
916
  st.subheader("πŸ€– Model Health")
917
  model_health = health_data.get('model_health', {})
918
+
919
  for key, value in model_health.items():
920
  if key != 'test_prediction':
921
+ st.write(
922
+ f"**{key.replace('_', ' ').title()}:** {value}")
923
+
924
  with col2:
925
  st.subheader("πŸ’» System Resources")
926
  system_health = health_data.get('system_health', {})
927
+
928
  for key, value in system_health.items():
929
  if isinstance(value, (int, float)):
930
+ st.metric(key.replace('_', ' ').title(),
931
+ f"{value:.1f}%")
932
+
933
  with col3:
934
  st.subheader("πŸ”— API Health")
935
  api_health = health_data.get('api_health', {})
936
+
937
  for key, value in api_health.items():
938
+ st.write(
939
+ f"**{key.replace('_', ' ').title()}:** {value}")
940
+
941
  except Exception as e:
942
  st.error(f"Failed to get health status: {e}")
943
+
944
  else:
945
  st.error("πŸ”΄ API Service is not available")
946
+
947
  # Model information
948
  st.subheader("🎯 Model Information")
949
+
950
  metadata = load_json_file(app_manager.paths['metadata'], {})
951
  if metadata:
952
  col1, col2 = st.columns(2)
953
+
954
  with col1:
955
  for key in ['model_version', 'test_accuracy', 'test_f1', 'model_type']:
956
  if key in metadata:
 
960
  st.metric(display_key, f"{value:.4f}")
961
  else:
962
  st.metric(display_key, str(value))
963
+
964
  with col2:
965
  for key in ['train_size', 'timestamp', 'data_version']:
966
  if key in metadata:
 
968
  value = metadata[key]
969
  if key == 'timestamp':
970
  try:
971
+ dt = datetime.fromisoformat(
972
+ value.replace('Z', '+00:00'))
973
  value = dt.strftime('%Y-%m-%d %H:%M:%S')
974
  except:
975
  pass
976
  st.write(f"**{display_key}:** {value}")
977
+
978
  else:
979
  st.warning("No model metadata available")
980
+
981
  # Recent activity
982
  st.subheader("πŸ“œ Recent Activity")
983
+
984
  activity_log = load_json_file(app_manager.paths['activity_log'], [])
985
  if activity_log:
986
+ recent_activities = activity_log[-10:] if len(
987
+ activity_log) > 10 else activity_log
988
+
989
  for entry in reversed(recent_activities):
990
  timestamp = entry.get('timestamp', 'Unknown')
991
  event = entry.get('event', 'Unknown event')
992
  level = entry.get('level', 'INFO')
993
+
994
  if level == 'ERROR':
995
  st.error(f"πŸ”΄ {timestamp} - {event}")
996
  elif level == 'WARNING':
997
  st.warning(f"🟑 {timestamp} - {event}")
998
  else:
999
  st.info(f"πŸ”΅ {timestamp} - {event}")
1000
+
1001
  else:
1002
  st.info("No recent activity logs found")
1003
+
1004
  # File system status
1005
  st.subheader("πŸ“ File System Status")
1006
+
1007
  critical_files = [
1008
+ ("/tmp/pipeline.pkl", "Pipeline Model"),
1009
+ ("/tmp/model.pkl", "Model Component"),
1010
  ("/tmp/vectorizer.pkl", "Vectorizer"),
1011
+ ("/tmp/metadata.json", "Model Metadata"),
1012
+ ("/tmp/data/combined_dataset.csv", "Training Dataset")
1013
  ]
1014
+
1015
  col1, col2 = st.columns(2)
1016
+
1017
  with col1:
1018
  st.write("**Critical Files:**")
1019
  for file_path, description in critical_files:
 
1021
  st.success(f"βœ… {description}")
1022
  else:
1023
  st.error(f"❌ {description}")
1024
+
1025
  with col2:
1026
  # Disk usage information
1027
  try:
1028
  import shutil
1029
  total, used, free = shutil.disk_usage("/tmp")
1030
+
1031
  st.write("**Disk Usage (/tmp):**")
1032
  st.write(f"Total: {total // (1024**3)} GB")
1033
  st.write(f"Used: {used // (1024**3)} GB")
1034
  st.write(f"Free: {free // (1024**3)} GB")
1035
+
1036
  usage_percent = (used / total) * 100
1037
  if usage_percent > 90:
1038
  st.error(f"⚠️ Disk usage: {usage_percent:.1f}%")
 
1040
  st.warning(f"⚠️ Disk usage: {usage_percent:.1f}%")
1041
  else:
1042
  st.success(f"βœ… Disk usage: {usage_percent:.1f}%")
1043
+
1044
  except Exception as e:
1045
  st.error(f"Cannot check disk usage: {e}")
1046
+
1047
+ # System actions
1048
+ st.subheader("πŸ”§ System Actions")
1049
+
1050
+ col1, col2, col3 = st.columns(3)
1051
+
1052
+ with col1:
1053
+ # Initialize system button
1054
+ if st.button("πŸ”§ Initialize System", help="Run system initialization if components are missing"):
1055
+ with st.spinner("Running system initialization..."):
1056
+ try:
1057
+ result = subprocess.run(
1058
+ [sys.executable, "/app/initialize_system.py"],
1059
+ capture_output=True,
1060
+ text=True,
1061
+ timeout=300
1062
+ )
1063
+
1064
+ if result.returncode == 0:
1065
+ st.success(
1066
+ "βœ… System initialization completed successfully!")
1067
+ with st.expander("πŸ“‹ Initialization Output"):
1068
+ st.code(result.stdout)
1069
+ time.sleep(2)
1070
+ st.rerun()
1071
+ else:
1072
+ st.error("❌ System initialization failed")
1073
+ st.code(result.stderr)
1074
+
1075
+ except subprocess.TimeoutExpired:
1076
+ st.error("⏰ Initialization timed out")
1077
+ except Exception as e:
1078
+ st.error(f"❌ Initialization error: {e}")
1079
+
1080
+ with col2:
1081
+ # Reload API model
1082
+ if st.button("πŸ”„ Reload API Model", help="Reload the model in the API service"):
1083
+ if app_manager.api_available:
1084
+ try:
1085
+ with st.spinner("Reloading model in API..."):
1086
+ reload_response = app_manager.session.post(
1087
+ f"{app_manager.config['api_url']}/model/reload",
1088
+ timeout=30
1089
+ )
1090
+ if reload_response.status_code == 200:
1091
+ st.success("βœ… Model reloaded successfully!")
1092
+ st.json(reload_response.json())
1093
+ else:
1094
+ st.error(f"❌ Model reload failed: {reload_response.status_code}")
1095
+ except Exception as e:
1096
+ st.error(f"❌ Model reload error: {e}")
1097
+ else:
1098
+ st.error("❌ API service not available")
1099
 
1100
+ with col3:
1101
+ # Clear cache
1102
+ if st.button("πŸ—‘οΈ Clear Cache", help="Clear prediction history and temporary data"):
1103
  try:
1104
+ # Clear session state
1105
+ st.session_state.prediction_history = []
1106
+ st.session_state.upload_history = []
1107
+
1108
+ # Clear temporary files
1109
+ temp_files = [
1110
+ "/tmp/custom_upload.csv",
1111
+ "/tmp/prediction_log.json"
1112
+ ]
1113
+
1114
+ cleared_count = 0
1115
+ for temp_file in temp_files:
1116
+ if Path(temp_file).exists():
1117
+ Path(temp_file).unlink()
1118
+ cleared_count += 1
1119
+
1120
+ st.success(f"βœ… Cache cleared! Removed {cleared_count} temporary files")
1121
+ time.sleep(1)
1122
+ st.rerun()
1123
 
 
 
 
 
 
 
 
 
 
 
 
1124
  except Exception as e:
1125
+ st.error(f"❌ Cache clear error: {e}")
1126
+
1127
 
1128
  # Auto-refresh logic
1129
  if st.session_state.auto_refresh:
 
1132
  st.session_state.last_refresh = datetime.now()
1133
  st.rerun()
1134
 
1135
+ # Footer
1136
+ st.markdown("---")
1137
+ st.markdown("""
1138
+ <div style='text-align: center; color: #666; padding: 20px;'>
1139
+ <p>πŸ“° <strong>Fake News Detection System</strong> | Advanced MLOps Pipeline</p>
1140
+ <p>Built with Streamlit, FastAPI, and Scikit-learn | Production-ready with comprehensive monitoring</p>
1141
+ </div>
1142
+ """, unsafe_allow_html=True)
1143
+
1144
  # Run main application
1145
  if __name__ == "__main__":
1146
  main()