Spaces:
Sleeping
Sleeping
import streamlit as st | |
import requests | |
import json | |
import pandas as pd | |
import time | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from datetime import datetime | |
import matplotlib as mpl | |
import matplotlib.font_manager as fm | |
import os | |
import urllib.request | |
# 下載並設定中文字型 | |
def setup_chinese_font(): | |
font_path = 'TaipeiSansTCBeta-Regular.ttf' | |
if not os.path.exists(font_path): | |
font_url = "https://drive.google.com/uc?id=1eGAsTN1HBpJAkeVM57_C7ccp7hbgSz3_&export=download" | |
urllib.request.urlretrieve(font_url, font_path) | |
# 設定字型 | |
fm.fontManager.addfont(font_path) | |
plt.rcParams['font.family'] = 'Taipei Sans TC Beta' | |
mpl.rc('font', family='Taipei Sans TC Beta') | |
# 設定中文字型 | |
setup_chinese_font() | |
# Set page config | |
st.set_page_config( | |
page_title="PChome 商品分析器", | |
page_icon="📊", | |
layout="wide" | |
) | |
# Title and description | |
st.title("PChome 商品分析器") | |
st.markdown("這個應用程式可以爬取並分析 PChome 上的商品資訊") | |
# Input section | |
with st.sidebar: | |
st.header("搜尋設定") | |
keyword = st.text_input("請輸入搜尋關鍵字", "行李箱") | |
page_num = st.number_input("要爬取的頁數", min_value=1, max_value=10, value=1) | |
# Function to scrape PChome data | |
def scrape_pchome(keyword, page_num): | |
alldata = pd.DataFrame() | |
with st.spinner(f'正在爬取 {page_num} 頁的資料...'): | |
for i in range(1, page_num + 1): | |
# Progress bar | |
progress = st.progress((i - 1) / page_num) | |
url = f'https://ecshweb.pchome.com.tw/search/v3.3/all/results?q={keyword}&page={i}&sort=sale/dc' | |
try: | |
list_req = requests.get(url) | |
getdata = json.loads(list_req.content) | |
if 'prods' in getdata and getdata['prods']: | |
todataFrame = pd.DataFrame(getdata['prods']) | |
alldata = pd.concat([alldata, todataFrame]) | |
time.sleep(2) # Reduced sleep time for better user experience | |
except Exception as e: | |
st.error(f"爬取第 {i} 頁時發生錯誤: {str(e)}") | |
break | |
progress.progress((i) / page_num) | |
return alldata | |
# Function to create analysis plots | |
def create_analysis_plots(df): | |
# 設定全域字型樣式 | |
plt.rcParams['font.sans-serif'] = ['Taipei Sans TC Beta'] | |
plt.rcParams['axes.unicode_minus'] = False | |
# Basic statistics | |
st.subheader("基本統計資訊") | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.metric("平均價格", f"NT$ {df['price'].mean():,.0f}") | |
with col2: | |
st.metric("最高價格", f"NT$ {df['price'].max():,.0f}") | |
with col3: | |
st.metric("最低價格", f"NT$ {df['price'].min():,.0f}") | |
# Price trend plot | |
st.subheader("價格趨勢圖") | |
fig, ax = plt.subplots(figsize=(15, 8)) | |
df['price'][:70].plot( | |
color='skyblue', | |
linewidth=2, | |
marker='o', | |
markersize=8, | |
ax=ax | |
) | |
mean_price = df['price'].mean() | |
ax.axhline(y=mean_price, color='red', linestyle='--', linewidth=2, | |
label=f'平均價格: NT$ {mean_price:,.0f}') | |
plt.title(f'{datetime.now().strftime("%Y%m%d")} PChome {keyword} 售價分析', | |
fontsize=20, fontweight='bold') | |
plt.xlabel('商品編號', fontsize=14) | |
plt.ylabel('價格 (NT$)', fontsize=14) | |
plt.xticks(rotation=45) | |
plt.grid(True, alpha=0.3) | |
plt.legend() | |
st.pyplot(fig) | |
# Price distribution plot | |
st.subheader("價格分布圖") | |
fig2, ax2 = plt.subplots(figsize=(12, 6)) | |
sns.histplot(data=df['price'], bins=30, kde=True, ax=ax2) | |
plt.title('商品價格分布', fontsize=16) | |
plt.xlabel('價格 (NT$)', fontsize=12) | |
plt.ylabel('數量', fontsize=12) | |
st.pyplot(fig2) | |
# Main app logic | |
if st.sidebar.button('開始分析'): | |
# Record start time | |
start_time = time.time() | |
# Scrape data | |
data = scrape_pchome(keyword, page_num) | |
if not data.empty: | |
# Display raw data | |
st.subheader("原始資料") | |
st.dataframe(data[['name', 'price']]) | |
# Create analysis plots | |
create_analysis_plots(data) | |
# Download button for CSV | |
csv = data.to_csv(index=False).encode('utf-8-sig') | |
st.download_button( | |
label="下載完整資料 (CSV)", | |
data=csv, | |
file_name=f'pchome_{keyword}_{datetime.now().strftime("%Y%m%d")}.csv', | |
mime='text/csv' | |
) | |
# Display execution time | |
end_time = time.time() | |
st.info(f'分析完成!執行時間:{end_time - start_time:.2f} 秒') | |
else: | |
st.error("沒有找到相關商品資料") | |
# Footer | |
st.markdown("---") | |
st.markdown("Made with ❤️ by Your Name") |