File size: 2,917 Bytes
9f4a990
 
 
 
 
 
 
 
 
1f37348
 
 
 
 
9f4a990
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import joblib
import pandas as pd
import matplotlib.pyplot as plt
import streamlit as st
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from scipy.cluster.hierarchy import fcluster

# ================== 加載保存的模型 ==================
scaler = joblib.load('scaler.sav')                # 標準化模型
pca = joblib.load('pca_model.sav')                # PCA 模型
kmeans = joblib.load('kmeans_model.sav')          # K-means 模型
linked = joblib.load('hierarchical_model.sav')    # 階層式聚類模型
dbscan = joblib.load('dbscan_model.sav')          # DBSCAN 模型

# 定義繪圖函數
def plot_clusters(data, labels, title):
    plt.figure(figsize=(8, 6))
    plt.scatter(data['PC1'], data['PC2'], c=labels, cmap='viridis', s=50)
    plt.title(title)
    plt.xlabel('Principal Component 1 (PC1)')
    plt.ylabel('Principal Component 2 (PC2)')
    plt.colorbar()
    plt.savefig('plot.png')
    plt.close()
    return 'plot.png'

# 處理上傳的資料
def process_data(file):
    # 讀取新資料
    new_data = pd.read_csv(file)
    # 移除 'Time' 欄位
    new_numerical_data = new_data.drop(columns=['Time'])

    # 數據預處理
    scaled_new_data = scaler.transform(new_numerical_data)      # 標準化數據
    pca_new_data = pca.transform(scaled_new_data)               # 使用已保存的 PCA 模型進行轉換

    # 創建包含主成分的 DataFrame
    pca_new_df = pd.DataFrame(pca_new_data, columns=['PC1', 'PC2'])

    # 使用加載的模型進行聚類
    kmeans_new_labels = kmeans.predict(pca_new_df)
    hclust_new_labels = fcluster(linked, 3, criterion='maxclust')
    dbscan_new_labels = dbscan.fit_predict(pca_new_df)

    # 可視化結果
    kmeans_plot = plot_clusters(pca_new_df, kmeans_new_labels, 'K-means Clustering')
    hclust_plot = plot_clusters(pca_new_df, hclust_new_labels, 'Hierarchical Clustering')
    dbscan_plot = plot_clusters(pca_new_df, dbscan_new_labels, 'DBSCAN Clustering')

    return kmeans_new_labels, hclust_new_labels, dbscan_new_labels, kmeans_plot, hclust_plot, dbscan_plot

# Streamlit 應用程式
st.title("聚類模型應用")

# 文件上傳
uploaded_file = st.file_uploader("上傳 CSV 檔案", type=["csv"])

if uploaded_file is not None:
    kmeans_labels, hclust_labels, dbscan_labels, kmeans_plot, hclust_plot, dbscan_plot = process_data(uploaded_file)

    # 顯示 K-means 標籤
    st.subheader("K-means Labels")
    st.text(kmeans_labels)

    # 顯示 Hierarchical 標籤
    st.subheader("Hierarchical Clustering Labels")
    st.text(hclust_labels)

    # 顯示 DBSCAN 標籤
    st.subheader("DBSCAN Labels")
    st.text(dbscan_labels)

    # 顯示圖像
    st.subheader("K-means Clustering Plot")
    st.image(kmeans_plot)

    st.subheader("Hierarchical Clustering Plot")
    st.image(hclust_plot)

    st.subheader("DBSCAN Clustering Plot")
    st.image(dbscan_plot)