# app.py import gradio as gr import pandas as pd import numpy as np from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification from sklearn.ensemble import RandomForestClassifier import joblib import os # Step 1: Load Hugging Face model for anomaly detection # Using the "huggingface-course/distilbert-base-uncased-finetuned-imdb" model tokenizer = AutoTokenizer.from_pretrained("huggingface-course/distilbert-base-uncased-finetuned-imdb") model = AutoModelForSequenceClassification.from_pretrained("huggingface-course/distilbert-base-uncased-finetuned-imdb") anomaly_detection = pipeline("text-classification", model=model, tokenizer=tokenizer) # Step 2: Train or Load the Random Forest model for failure prediction if not os.path.exists('failure_prediction_model.pkl'): # Sample data (replace this with real Cisco device metrics data) data = pd.DataFrame({ 'cpu_usage': [10, 20, 15, 35, 55], 'memory_usage': [30, 60, 45, 50, 80], 'error_rate': [0, 1, 0, 2, 5], 'failure': [0, 1, 0, 1, 1] # 0 = no failure, 1 = failure }) # Features and target X = data[['cpu_usage', 'memory_usage', 'error_rate']] y = data['failure'] # Train the Random Forest model failure_prediction_model = RandomForestClassifier(n_estimators=100, random_state=42) failure_prediction_model.fit(X, y) # Save the model for future use joblib.dump(failure_prediction_model, 'failure_prediction_model.pkl') else: # Load the trained model from file failure_prediction_model = joblib.load('failure_prediction_model.pkl') # Step 3: Define function to preprocess logs for anomaly detection def preprocess_logs(logs): logs['timestamp'] = pd.to_datetime(logs['timestamp']) logs['log_message'] = logs['log_message'].str.lower() # Convert log messages to lowercase for uniformity return logs # Step 4: Function to detect anomalies in logs def detect_anomaly(logs): preprocessed_logs = preprocess_logs(logs) results = [] for log in preprocessed_logs['log_message']: anomaly_result = anomaly_detection(log) # Use Hugging Face pipeline for anomaly detection results.append(anomaly_result[0]['label']) # Append label (e.g., "POSITIVE" or "NEGATIVE") return results # Step 5: Function to predict failures based on device metrics def predict_failure(device_metrics): # Convert device metrics into a numpy array for prediction metrics_array = np.array([device_metrics['cpu_usage'], device_metrics['memory_usage'], device_metrics['error_rate']]).reshape(1, -1) failure_prediction = failure_prediction_model.predict(metrics_array) # Use the Random Forest model for failure prediction return failure_prediction # Step 6: Function to process logs and predict both anomalies and failures def process_logs_and_predict(log_file, metrics): logs = pd.read_json(log_file) # Load logs from the uploaded JSON file anomalies = detect_anomaly(logs) # Detect anomalies in logs failure_pred = predict_failure(metrics) # Predict failures using device metrics return f"Anomalies Detected: {anomalies}, Failure Prediction: {failure_pred}" # Step 7: Set up Gradio interface for uploading logs and metrics for prediction iface = gr.Interface(fn=process_logs_and_predict, inputs=["file", "json"], outputs="text", title="Cisco Device Monitoring", description="Upload log files to detect anomalies and predict potential device failures.") # Launch the Gradio interface iface.launch()