Spaces:
Sleeping
Sleeping
File size: 2,917 Bytes
9f4a990 1f37348 9f4a990 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import joblib
import pandas as pd
import matplotlib.pyplot as plt
import streamlit as st
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from scipy.cluster.hierarchy import fcluster
# ================== 加載保存的模型 ==================
scaler = joblib.load('scaler.sav') # 標準化模型
pca = joblib.load('pca_model.sav') # PCA 模型
kmeans = joblib.load('kmeans_model.sav') # K-means 模型
linked = joblib.load('hierarchical_model.sav') # 階層式聚類模型
dbscan = joblib.load('dbscan_model.sav') # DBSCAN 模型
# 定義繪圖函數
def plot_clusters(data, labels, title):
plt.figure(figsize=(8, 6))
plt.scatter(data['PC1'], data['PC2'], c=labels, cmap='viridis', s=50)
plt.title(title)
plt.xlabel('Principal Component 1 (PC1)')
plt.ylabel('Principal Component 2 (PC2)')
plt.colorbar()
plt.savefig('plot.png')
plt.close()
return 'plot.png'
# 處理上傳的資料
def process_data(file):
# 讀取新資料
new_data = pd.read_csv(file)
# 移除 'Time' 欄位
new_numerical_data = new_data.drop(columns=['Time'])
# 數據預處理
scaled_new_data = scaler.transform(new_numerical_data) # 標準化數據
pca_new_data = pca.transform(scaled_new_data) # 使用已保存的 PCA 模型進行轉換
# 創建包含主成分的 DataFrame
pca_new_df = pd.DataFrame(pca_new_data, columns=['PC1', 'PC2'])
# 使用加載的模型進行聚類
kmeans_new_labels = kmeans.predict(pca_new_df)
hclust_new_labels = fcluster(linked, 3, criterion='maxclust')
dbscan_new_labels = dbscan.fit_predict(pca_new_df)
# 可視化結果
kmeans_plot = plot_clusters(pca_new_df, kmeans_new_labels, 'K-means Clustering')
hclust_plot = plot_clusters(pca_new_df, hclust_new_labels, 'Hierarchical Clustering')
dbscan_plot = plot_clusters(pca_new_df, dbscan_new_labels, 'DBSCAN Clustering')
return kmeans_new_labels, hclust_new_labels, dbscan_new_labels, kmeans_plot, hclust_plot, dbscan_plot
# Streamlit 應用程式
st.title("聚類模型應用")
# 文件上傳
uploaded_file = st.file_uploader("上傳 CSV 檔案", type=["csv"])
if uploaded_file is not None:
kmeans_labels, hclust_labels, dbscan_labels, kmeans_plot, hclust_plot, dbscan_plot = process_data(uploaded_file)
# 顯示 K-means 標籤
st.subheader("K-means Labels")
st.text(kmeans_labels)
# 顯示 Hierarchical 標籤
st.subheader("Hierarchical Clustering Labels")
st.text(hclust_labels)
# 顯示 DBSCAN 標籤
st.subheader("DBSCAN Labels")
st.text(dbscan_labels)
# 顯示圖像
st.subheader("K-means Clustering Plot")
st.image(kmeans_plot)
st.subheader("Hierarchical Clustering Plot")
st.image(hclust_plot)
st.subheader("DBSCAN Clustering Plot")
st.image(dbscan_plot)
|