chiichann commited on
Commit
f3075f7
Β·
verified Β·
1 Parent(s): faedcf9

Upload 3 files

Browse files
ambient_temperature_system_failure.csv ADDED
The diff for this file is too large to render. See raw diff
 
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ from sklearn.ensemble import IsolationForest
6
+ from sklearn.preprocessing import StandardScaler
7
+
8
+ # App title
9
+ st.title("πŸ“ˆ Anomaly Detection Tool")
10
+
11
+ # 🎯 Streamlit Tabs
12
+ tab1, tab2, tab3 = st.tabs(["πŸ“– About", "πŸ“Š Dataset Overview", "🚨 Anomaly Detection"])
13
+
14
+ # About Tab
15
+ with tab1:
16
+ st.write("""
17
+ This app detects anomalies in time-series data using the Isolation Forest algorithm.
18
+ Users can visualize detected anomalies.
19
+
20
+ ### How It Works:
21
+ - **Step 1**: Load a dataset (CSV format from the Numenta Anomaly Benchmark `realKnownCause` dataset)
22
+ - **Step 2**: Standardize numerical values for better anomaly detection
23
+ - **Step 3**: Apply **Isolation Forest** to identify outliers
24
+ - **Step 4**: Visualize the detected anomalies in a time-series plot
25
+ """)
26
+
27
+ # Load dataset
28
+ file_path = "ambient_temperature_system_failure.csv"
29
+ df = pd.read_csv(file_path)
30
+
31
+ # Dataset Overview Tab
32
+ with tab2:
33
+ st.write("### Dataset Overview")
34
+ st.write(df.head())
35
+
36
+ # Convert timestamp column to datetime
37
+ df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
38
+ df = df.dropna(subset=['timestamp'])
39
+ df.set_index('timestamp', inplace=True)
40
+
41
+ st.write("### Processed Dataset")
42
+ st.write(df.head())
43
+
44
+ # Anomaly Detection Tab
45
+ with tab3:
46
+ st.write("### Detect Anomalies in the Data")
47
+
48
+ # Standardize the data
49
+ scaler = StandardScaler()
50
+ df['scaled_value'] = scaler.fit_transform(df[['value']])
51
+
52
+ # Apply Isolation Forest
53
+ contamination_level = st.slider("Select Contamination Level", 0.01, 0.1, 0.05, 0.01)
54
+ model = IsolationForest(contamination=contamination_level, random_state=42)
55
+ df['anomaly'] = model.fit_predict(df[['scaled_value']])
56
+ df['anomaly'] = df['anomaly'].map({1: 0, -1: 1}) # Convert to binary (1: anomaly, 0: normal)
57
+
58
+ # Allow user to set anomaly score threshold
59
+ threshold = st.slider("Set Anomaly Score Threshold", -1.0, 1.0, 0.0, 0.01)
60
+ df["anomaly_score"] = model.decision_function(df[["scaled_value"]])
61
+ df["anomaly"] = df["anomaly_score"] < threshold
62
+
63
+ # Plot results
64
+ fig, ax = plt.subplots(figsize=(12, 6))
65
+ ax.plot(df.index, df['value'], label='Value', color='blue')
66
+ ax.scatter(df.index[df['anomaly'] == 1], df['value'][df['anomaly'] == 1], color='red', label='Anomaly', marker='o')
67
+ ax.set_xlabel('Timestamp')
68
+ ax.set_ylabel('Value')
69
+ ax.set_title('Anomaly Detection in Time-Series Data')
70
+ ax.legend()
71
+ st.pyplot(fig)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ numpy
4
+ matplotlib
5
+ scikit-learn
6
+ openpyxl # Required for reading Excel files