DSatishchandra commited on
Commit
5fc1666
·
verified ·
1 Parent(s): 236ad59

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -27
app.py CHANGED
@@ -7,78 +7,77 @@ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassifica
7
  from sklearn.ensemble import RandomForestClassifier
8
  import joblib
9
  import os
 
10
 
11
- # Step 1: Load Hugging Face model for anomaly detection
12
  tokenizer = AutoTokenizer.from_pretrained("huggingface-course/distilbert-base-uncased-finetuned-imdb")
13
  model = AutoModelForSequenceClassification.from_pretrained("huggingface-course/distilbert-base-uncased-finetuned-imdb")
14
  anomaly_detection = pipeline("text-classification", model=model, tokenizer=tokenizer)
15
 
16
- # Step 2: Train or Load the Random Forest model for failure prediction
17
  if not os.path.exists('failure_prediction_model.pkl'):
18
- # Sample data (replace this with real Cisco device metrics data)
19
  data = pd.DataFrame({
20
  'cpu_usage': [10, 20, 15, 35, 55],
21
  'memory_usage': [30, 60, 45, 50, 80],
22
  'error_rate': [0, 1, 0, 2, 5],
23
- 'failure': [0, 1, 0, 1, 1] # 0 = no failure, 1 = failure
24
  })
25
-
26
- # Features and target
27
  X = data[['cpu_usage', 'memory_usage', 'error_rate']]
28
  y = data['failure']
29
-
30
- # Train the Random Forest model
31
  failure_prediction_model = RandomForestClassifier(n_estimators=100, random_state=42)
32
  failure_prediction_model.fit(X, y)
33
-
34
- # Save the model for future use
35
  joblib.dump(failure_prediction_model, 'failure_prediction_model.pkl')
36
  else:
37
- # Load the trained model from file
38
  failure_prediction_model = joblib.load('failure_prediction_model.pkl')
39
 
40
- # Step 3: Define function to preprocess logs for anomaly detection
41
  def preprocess_logs(logs):
42
  logs['timestamp'] = pd.to_datetime(logs['timestamp'])
43
- logs['log_message'] = logs['log_message'].str.lower() # Convert log messages to lowercase for uniformity
44
  return logs
45
 
46
- # Step 4: Function to detect anomalies in logs
47
  def detect_anomaly(logs):
48
  preprocessed_logs = preprocess_logs(logs)
49
  results = []
50
  for log in preprocessed_logs['log_message']:
51
- anomaly_result = anomaly_detection(log) # Use Hugging Face pipeline for anomaly detection
52
- results.append(anomaly_result[0]['label']) # Append label (e.g., "POSITIVE" or "NEGATIVE")
53
  return results
54
 
55
- # Step 5: Function to predict failures based on device metrics
56
  def predict_failure(device_metrics):
57
- # Check if metrics are None or missing required fields
58
  if device_metrics is None:
59
  return "Device metrics are missing."
60
  if 'cpu_usage' not in device_metrics or 'memory_usage' not in device_metrics or 'error_rate' not in device_metrics:
61
  return "Invalid metrics format. Please provide 'cpu_usage', 'memory_usage', and 'error_rate'."
62
 
63
- # Convert device metrics into a numpy array for prediction
64
  metrics_array = np.array([device_metrics['cpu_usage'], device_metrics['memory_usage'], device_metrics['error_rate']]).reshape(1, -1)
65
- failure_prediction = failure_prediction_model.predict(metrics_array) # Use the Random Forest model for failure prediction
66
  return failure_prediction
67
 
68
- # Step 6: Function to process logs and predict both anomalies and failures
69
  def process_logs_and_predict(log_file, metrics):
70
- logs = pd.read_json(log_file) # Load logs from the uploaded JSON file
71
- anomalies = detect_anomaly(logs) # Detect anomalies in logs
72
- failure_pred = predict_failure(metrics) # Predict failures using device metrics
 
 
 
 
73
 
 
 
 
 
 
 
74
  return f"Anomalies Detected: {anomalies}, Failure Prediction: {failure_pred}"
75
 
76
- # Step 7: Set up Gradio interface for uploading logs and metrics for prediction
77
  iface = gr.Interface(fn=process_logs_and_predict,
78
  inputs=["file", "json"],
79
  outputs="text",
80
  title="Cisco Device Monitoring",
81
  description="Upload log files to detect anomalies and predict potential device failures.")
82
-
83
- # Launch the Gradio interface
84
  iface.launch()
 
7
  from sklearn.ensemble import RandomForestClassifier
8
  import joblib
9
  import os
10
+ import json
11
 
12
+ # Load Hugging Face model for anomaly detection
13
  tokenizer = AutoTokenizer.from_pretrained("huggingface-course/distilbert-base-uncased-finetuned-imdb")
14
  model = AutoModelForSequenceClassification.from_pretrained("huggingface-course/distilbert-base-uncased-finetuned-imdb")
15
  anomaly_detection = pipeline("text-classification", model=model, tokenizer=tokenizer)
16
 
17
+ # Train or load Random Forest model for failure prediction
18
  if not os.path.exists('failure_prediction_model.pkl'):
 
19
  data = pd.DataFrame({
20
  'cpu_usage': [10, 20, 15, 35, 55],
21
  'memory_usage': [30, 60, 45, 50, 80],
22
  'error_rate': [0, 1, 0, 2, 5],
23
+ 'failure': [0, 1, 0, 1, 1]
24
  })
 
 
25
  X = data[['cpu_usage', 'memory_usage', 'error_rate']]
26
  y = data['failure']
 
 
27
  failure_prediction_model = RandomForestClassifier(n_estimators=100, random_state=42)
28
  failure_prediction_model.fit(X, y)
 
 
29
  joblib.dump(failure_prediction_model, 'failure_prediction_model.pkl')
30
  else:
 
31
  failure_prediction_model = joblib.load('failure_prediction_model.pkl')
32
 
33
+ # Preprocess logs for anomaly detection
34
  def preprocess_logs(logs):
35
  logs['timestamp'] = pd.to_datetime(logs['timestamp'])
36
+ logs['log_message'] = logs['log_message'].str.lower()
37
  return logs
38
 
39
+ # Detect anomalies in logs
40
  def detect_anomaly(logs):
41
  preprocessed_logs = preprocess_logs(logs)
42
  results = []
43
  for log in preprocessed_logs['log_message']:
44
+ anomaly_result = anomaly_detection(log)
45
+ results.append(anomaly_result[0]['label'])
46
  return results
47
 
48
+ # Predict failures based on device metrics
49
  def predict_failure(device_metrics):
 
50
  if device_metrics is None:
51
  return "Device metrics are missing."
52
  if 'cpu_usage' not in device_metrics or 'memory_usage' not in device_metrics or 'error_rate' not in device_metrics:
53
  return "Invalid metrics format. Please provide 'cpu_usage', 'memory_usage', and 'error_rate'."
54
 
 
55
  metrics_array = np.array([device_metrics['cpu_usage'], device_metrics['memory_usage'], device_metrics['error_rate']]).reshape(1, -1)
56
+ failure_prediction = failure_prediction_model.predict(metrics_array)
57
  return failure_prediction
58
 
59
+ # Process logs and predict anomalies and failures
60
  def process_logs_and_predict(log_file, metrics):
61
+ # Read and validate log file format
62
+ try:
63
+ logs = pd.read_json(log_file)
64
+ if not isinstance(logs, pd.DataFrame) or logs.empty:
65
+ return "Invalid log file format. Please upload a JSON array of log entries."
66
+ except ValueError as e:
67
+ return f"Error reading JSON file: {str(e)}"
68
 
69
+ # Detect anomalies
70
+ anomalies = detect_anomaly(logs)
71
+
72
+ # Predict failures using device metrics
73
+ failure_pred = predict_failure(metrics)
74
+
75
  return f"Anomalies Detected: {anomalies}, Failure Prediction: {failure_pred}"
76
 
77
+ # Gradio interface
78
  iface = gr.Interface(fn=process_logs_and_predict,
79
  inputs=["file", "json"],
80
  outputs="text",
81
  title="Cisco Device Monitoring",
82
  description="Upload log files to detect anomalies and predict potential device failures.")
 
 
83
  iface.launch()