File size: 4,968 Bytes
15b2650
 
 
 
 
 
 
 
c4877ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15b2650
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c4877ab
 
 
 
15b2650
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import streamlit as st
import requests
import json
import pandas as pd
import time
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import matplotlib as mpl
import matplotlib.font_manager as fm
import os
import urllib.request

# 下載並設定中文字型
def setup_chinese_font():
    font_path = 'TaipeiSansTCBeta-Regular.ttf'
    if not os.path.exists(font_path):
        font_url = "https://drive.google.com/uc?id=1eGAsTN1HBpJAkeVM57_C7ccp7hbgSz3_&export=download"
        urllib.request.urlretrieve(font_url, font_path)
    
    # 設定字型
    fm.fontManager.addfont(font_path)
    plt.rcParams['font.family'] = 'Taipei Sans TC Beta'
    mpl.rc('font', family='Taipei Sans TC Beta')

# 設定中文字型
setup_chinese_font()

# Set page config
st.set_page_config(
    page_title="PChome 商品分析器",
    page_icon="📊",
    layout="wide"
)

# Title and description
st.title("PChome 商品分析器")
st.markdown("這個應用程式可以爬取並分析 PChome 上的商品資訊")

# Input section
with st.sidebar:
    st.header("搜尋設定")
    keyword = st.text_input("請輸入搜尋關鍵字", "行李箱")
    page_num = st.number_input("要爬取的頁數", min_value=1, max_value=10, value=1)
    
# Function to scrape PChome data
def scrape_pchome(keyword, page_num):
    alldata = pd.DataFrame()
    
    with st.spinner(f'正在爬取 {page_num} 頁的資料...'):
        for i in range(1, page_num + 1):
            # Progress bar
            progress = st.progress((i - 1) / page_num)
            
            url = f'https://ecshweb.pchome.com.tw/search/v3.3/all/results?q={keyword}&page={i}&sort=sale/dc'
            
            try:
                list_req = requests.get(url)
                getdata = json.loads(list_req.content)
                
                if 'prods' in getdata and getdata['prods']:
                    todataFrame = pd.DataFrame(getdata['prods'])
                    alldata = pd.concat([alldata, todataFrame])
                    
                time.sleep(2)  # Reduced sleep time for better user experience
                
            except Exception as e:
                st.error(f"爬取第 {i} 頁時發生錯誤: {str(e)}")
                break
                
            progress.progress((i) / page_num)
            
    return alldata

# Function to create analysis plots
def create_analysis_plots(df):
    # 設定全域字型樣式
    plt.rcParams['font.sans-serif'] = ['Taipei Sans TC Beta']
    plt.rcParams['axes.unicode_minus'] = False

    # Basic statistics
    st.subheader("基本統計資訊")
    col1, col2, col3 = st.columns(3)
    with col1:
        st.metric("平均價格", f"NT$ {df['price'].mean():,.0f}")
    with col2:
        st.metric("最高價格", f"NT$ {df['price'].max():,.0f}")
    with col3:
        st.metric("最低價格", f"NT$ {df['price'].min():,.0f}")

    # Price trend plot
    st.subheader("價格趨勢圖")
    fig, ax = plt.subplots(figsize=(15, 8))
    df['price'][:70].plot(
        color='skyblue',
        linewidth=2,
        marker='o',
        markersize=8,
        ax=ax
    )
    
    mean_price = df['price'].mean()
    ax.axhline(y=mean_price, color='red', linestyle='--', linewidth=2, 
               label=f'平均價格: NT$ {mean_price:,.0f}')
    
    plt.title(f'{datetime.now().strftime("%Y%m%d")} PChome {keyword} 售價分析', 
              fontsize=20, fontweight='bold')
    plt.xlabel('商品編號', fontsize=14)
    plt.ylabel('價格 (NT$)', fontsize=14)
    plt.xticks(rotation=45)
    plt.grid(True, alpha=0.3)
    plt.legend()
    st.pyplot(fig)
    
    # Price distribution plot
    st.subheader("價格分布圖")
    fig2, ax2 = plt.subplots(figsize=(12, 6))
    sns.histplot(data=df['price'], bins=30, kde=True, ax=ax2)
    plt.title('商品價格分布', fontsize=16)
    plt.xlabel('價格 (NT$)', fontsize=12)
    plt.ylabel('數量', fontsize=12)
    st.pyplot(fig2)

# Main app logic
if st.sidebar.button('開始分析'):
    # Record start time
    start_time = time.time()
    
    # Scrape data
    data = scrape_pchome(keyword, page_num)
    
    if not data.empty:
        # Display raw data
        st.subheader("原始資料")
        st.dataframe(data[['name', 'price']])
        
        # Create analysis plots
        create_analysis_plots(data)
        
        # Download button for CSV
        csv = data.to_csv(index=False).encode('utf-8-sig')
        st.download_button(
            label="下載完整資料 (CSV)",
            data=csv,
            file_name=f'pchome_{keyword}_{datetime.now().strftime("%Y%m%d")}.csv',
            mime='text/csv'
        )
        
        # Display execution time
        end_time = time.time()
        st.info(f'分析完成!執行時間:{end_time - start_time:.2f} 秒')
    else:
        st.error("沒有找到相關商品資料")

# Footer
st.markdown("---")
st.markdown("Made with ❤️ by Your Name")