Spaces:
Sleeping
Sleeping
File size: 4,968 Bytes
15b2650 c4877ab 15b2650 c4877ab 15b2650 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import streamlit as st
import requests
import json
import pandas as pd
import time
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import matplotlib as mpl
import matplotlib.font_manager as fm
import os
import urllib.request
# 下載並設定中文字型
def setup_chinese_font():
font_path = 'TaipeiSansTCBeta-Regular.ttf'
if not os.path.exists(font_path):
font_url = "https://drive.google.com/uc?id=1eGAsTN1HBpJAkeVM57_C7ccp7hbgSz3_&export=download"
urllib.request.urlretrieve(font_url, font_path)
# 設定字型
fm.fontManager.addfont(font_path)
plt.rcParams['font.family'] = 'Taipei Sans TC Beta'
mpl.rc('font', family='Taipei Sans TC Beta')
# 設定中文字型
setup_chinese_font()
# Set page config
st.set_page_config(
page_title="PChome 商品分析器",
page_icon="📊",
layout="wide"
)
# Title and description
st.title("PChome 商品分析器")
st.markdown("這個應用程式可以爬取並分析 PChome 上的商品資訊")
# Input section
with st.sidebar:
st.header("搜尋設定")
keyword = st.text_input("請輸入搜尋關鍵字", "行李箱")
page_num = st.number_input("要爬取的頁數", min_value=1, max_value=10, value=1)
# Function to scrape PChome data
def scrape_pchome(keyword, page_num):
alldata = pd.DataFrame()
with st.spinner(f'正在爬取 {page_num} 頁的資料...'):
for i in range(1, page_num + 1):
# Progress bar
progress = st.progress((i - 1) / page_num)
url = f'https://ecshweb.pchome.com.tw/search/v3.3/all/results?q={keyword}&page={i}&sort=sale/dc'
try:
list_req = requests.get(url)
getdata = json.loads(list_req.content)
if 'prods' in getdata and getdata['prods']:
todataFrame = pd.DataFrame(getdata['prods'])
alldata = pd.concat([alldata, todataFrame])
time.sleep(2) # Reduced sleep time for better user experience
except Exception as e:
st.error(f"爬取第 {i} 頁時發生錯誤: {str(e)}")
break
progress.progress((i) / page_num)
return alldata
# Function to create analysis plots
def create_analysis_plots(df):
# 設定全域字型樣式
plt.rcParams['font.sans-serif'] = ['Taipei Sans TC Beta']
plt.rcParams['axes.unicode_minus'] = False
# Basic statistics
st.subheader("基本統計資訊")
col1, col2, col3 = st.columns(3)
with col1:
st.metric("平均價格", f"NT$ {df['price'].mean():,.0f}")
with col2:
st.metric("最高價格", f"NT$ {df['price'].max():,.0f}")
with col3:
st.metric("最低價格", f"NT$ {df['price'].min():,.0f}")
# Price trend plot
st.subheader("價格趨勢圖")
fig, ax = plt.subplots(figsize=(15, 8))
df['price'][:70].plot(
color='skyblue',
linewidth=2,
marker='o',
markersize=8,
ax=ax
)
mean_price = df['price'].mean()
ax.axhline(y=mean_price, color='red', linestyle='--', linewidth=2,
label=f'平均價格: NT$ {mean_price:,.0f}')
plt.title(f'{datetime.now().strftime("%Y%m%d")} PChome {keyword} 售價分析',
fontsize=20, fontweight='bold')
plt.xlabel('商品編號', fontsize=14)
plt.ylabel('價格 (NT$)', fontsize=14)
plt.xticks(rotation=45)
plt.grid(True, alpha=0.3)
plt.legend()
st.pyplot(fig)
# Price distribution plot
st.subheader("價格分布圖")
fig2, ax2 = plt.subplots(figsize=(12, 6))
sns.histplot(data=df['price'], bins=30, kde=True, ax=ax2)
plt.title('商品價格分布', fontsize=16)
plt.xlabel('價格 (NT$)', fontsize=12)
plt.ylabel('數量', fontsize=12)
st.pyplot(fig2)
# Main app logic
if st.sidebar.button('開始分析'):
# Record start time
start_time = time.time()
# Scrape data
data = scrape_pchome(keyword, page_num)
if not data.empty:
# Display raw data
st.subheader("原始資料")
st.dataframe(data[['name', 'price']])
# Create analysis plots
create_analysis_plots(data)
# Download button for CSV
csv = data.to_csv(index=False).encode('utf-8-sig')
st.download_button(
label="下載完整資料 (CSV)",
data=csv,
file_name=f'pchome_{keyword}_{datetime.now().strftime("%Y%m%d")}.csv',
mime='text/csv'
)
# Display execution time
end_time = time.time()
st.info(f'分析完成!執行時間:{end_time - start_time:.2f} 秒')
else:
st.error("沒有找到相關商品資料")
# Footer
st.markdown("---")
st.markdown("Made with ❤️ by Your Name") |