Spaces:

earnliners
/

btc-usd

Sleeping

App Files Files Community

btc-usd / app.py

earnliners

Update app.py

1a956e7 verified 10 months ago

raw

history blame

16.1 kB

	import streamlit as st
	from datetime import datetime, timedelta
	import yfinance as yf
	import numpy as np
	import pandas as pd
	from sklearn.decomposition import PCA
	from sklearn.preprocessing import StandardScaler, LabelEncoder
	from sklearn.ensemble import RandomForestClassifier
	import shap
	import matplotlib.pyplot as plt

	class DataFetcher:
	"""Fetches historical financial data using yfinance."""
	def __init__(self, ticker, nb_days):
	self.ticker = ticker
	self.nb_days = nb_days
	self.data = None

	def fetch_data(self):
	"""Fetches historical data for the specified ticker and number of days."""
	end_date = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
	start_date = end_date - timedelta(days=self.nb_days)
	end_date = end_date + timedelta(days=1)
	self.data = yf.download(self.ticker, start=start_date, end=end_date, interval="1h")
	return self.data

	class FinancialDataProcessor:
	"""Processes financial data to calculate returns, scenarios, and probabilities."""
	def __init__(self, data):
	self.data = data.copy()

	def _flatten_columns(self):
	"""Flattens MultiIndex columns into a single level."""
	if isinstance(self.data.columns, pd.MultiIndex):
	self.data.columns = [f"{col[0]}_{col[1]}" if col[1] else col[0] for col in self.data.columns]

	def calculate_returns(self):
	"""Calculates logarithmic returns, scenarios, and adjusted returns."""
	self._flatten_columns()

	close_column = [col for col in self.data.columns if 'Close' in col]
	if not close_column:
	raise ValueError("The 'Close' column is missing in the dataset.")

	self.data.rename(columns={close_column[0]: 'Close'}, inplace=True)
	self.data = self.data[self.data['Close'] > 0].copy()

	self.data['LogReturn'] = np.log(self.data['Close'] / self.data['Close'].shift(1))
	self.data.replace([np.inf, -np.inf], np.nan, inplace=True)
	self.data.dropna(subset=['LogReturn'], inplace=True)

	self.data['Scenario'] = np.where(self.data['LogReturn'] > 0, 'Buy', 'Sell')
	self.data['AdjustedLogReturn'] = np.where(
	self.data['Scenario'] == 'Sell', -self.data['LogReturn'], self.data['LogReturn']
	)
	self.data['AnnualizedReturn'] = self.data['AdjustedLogReturn'] * 252

	return self

	def calculate_probabilities(self):
	"""Calculates Buy% and Sell% using hyperbolic tangent."""
	self.data['Buy%'] = (1 + np.tanh(self.data['LogReturn'])) / 2
	self.data['Sell%'] = (1 - np.tanh(self.data['LogReturn'])) / 2
	return self.data

	def apply_pca_calculations(self, pca_result):
	"""Applies PCA-based calculations to the data."""
	pca_result = pca_result[pca_result['PC1'] > 0].copy()

	pca_result['PCA_LogReturn'] = np.log(pca_result['PC1'] / pca_result['PC1'].shift(1))
	pca_result.replace([np.inf, -np.inf], np.nan, inplace=True)
	pca_result.dropna(subset=['PCA_LogReturn'], inplace=True)

	pca_result['PCA_Scenario'] = np.where(pca_result['PCA_LogReturn'] > 0, 'Buy', 'Sell')
	pca_result['PCA_Buy%'] = (1 + np.tanh(pca_result['PCA_LogReturn'])) / 2
	pca_result['PCA_Sell%'] = (1 - np.tanh(pca_result['PCA_LogReturn'])) / 2

	self.data = self.data.merge(pca_result, left_index=True, right_index=True)
	return self.data

	class PCATransformer:
	"""Applies PCA to reduce dimensionality and extract features."""
	def __init__(self, n_components=1):
	self.n_components = n_components
	self.scaler = StandardScaler()
	self.pca = PCA(n_components=n_components)

	def fit_transform(self, data):
	numeric_data = data.select_dtypes(include=[np.number])
	scaled_data = self.scaler.fit_transform(numeric_data)
	pca_result = self.pca.fit_transform(scaled_data)
	return pd.DataFrame(pca_result, columns=[f'PC{i+1}' for i in range(self.n_components)], index=data.index)

	class StrategyBuilder:
	"""Builds and refines the trading strategy using machine learning and SHAP."""
	def __init__(self, data):
	self.data = data.copy()

	def train_model(self, target_column):
	X = self.data.select_dtypes(include=[np.number])
	y = self.data[target_column]
	y_encoded = LabelEncoder().fit_transform(y)
	model = RandomForestClassifier(n_estimators=100, random_state=42)
	model.fit(X, y_encoded)
	return model, X, y_encoded

	def compute_shapley_values(self, model, X):
	explainer = shap.TreeExplainer(model)
	return explainer.shap_values(X)

	def analyze_feature_importance(self, shap_values, feature_names):
	"""Analyzes feature importance based on SHAP values."""
	if isinstance(shap_values, list):
	shap_values = shap_values[1]

	if len(shap_values.shape) == 3:
	shap_values = shap_values[:, :, 1]

	mean_abs_shap = np.mean(np.abs(shap_values), axis=0)

	if len(mean_abs_shap) != len(feature_names):
	raise ValueError("Mismatch between SHAP values and feature names.")

	feature_importance = pd.DataFrame({
	'Feature': feature_names,
	'Mean_Abs_SHAP': mean_abs_shap
	}).sort_values(by='Mean_Abs_SHAP', ascending=False)

	return feature_importance

	def refine_thresholds(self, feature_importance, buy_threshold=0.5, sell_threshold=0.5):
	top_features = feature_importance.head(3)['Feature'].tolist()
	for feature in top_features:
	if 'Buy%' in feature or 'PCA_Buy%' in feature:
	buy_threshold *= 1.1
	elif 'Sell%' in feature or 'PCA_Sell%' in feature:
	sell_threshold *= 1.1
	return buy_threshold, sell_threshold

	class Backtester:
	"""Backtests the trading strategy on historical data."""
	def __init__(self, data):
	self.data = data.copy()

	def backtest(self, buy_threshold=0.5, sell_threshold=0.5):
	portfolio_value = 10000
	position = None
	entry_price = None
	portfolio_values = []

	for i in range(1, len(self.data)):
	last_row = self.data.iloc[i]
	if (last_row['PCA_Scenario'] == 'Buy' and last_row['PCA_Buy%'] > buy_threshold) or \
	(last_row['Scenario'] == 'Buy' and last_row['Buy%'] > buy_threshold):
	if position != 'Buy':
	position = 'Buy'
	entry_price = last_row['Close']
	elif (last_row['PCA_Scenario'] == 'Sell' and last_row['PCA_Sell%'] > sell_threshold) or \
	(last_row['Scenario'] == 'Sell' and last_row['Sell%'] > sell_threshold):
	if position != 'Sell':
	position = 'Sell'
	entry_price = last_row['Close']

	if position == 'Buy':
	portfolio_value *= (last_row['Close'] / entry_price)
	elif position == 'Sell':
	portfolio_value *= (entry_price / last_row['Close'])

	portfolio_values.append(portfolio_value)

	return portfolio_values, position, entry_price

	def run_analysis():
	"""Runs the complete trading analysis."""
	try:
	fetcher = DataFetcher(ticker="BTC-USD", nb_days=50)
	data = fetcher.fetch_data()

	processor = FinancialDataProcessor(data)
	processed_data = processor.calculate_returns().calculate_probabilities()

	pca_transformer = PCATransformer(n_components=1)
	pca_result = pca_transformer.fit_transform(processed_data)
	processed_data = processor.apply_pca_calculations(pca_result)

	strategy_builder = StrategyBuilder(processed_data)
	model, X, y_encoded = strategy_builder.train_model(target_column='PCA_Scenario')
	shap_values = strategy_builder.compute_shapley_values(model, X)

	feature_importance = strategy_builder.analyze_feature_importance(shap_values, X.columns)
	buy_threshold, sell_threshold = strategy_builder.refine_thresholds(feature_importance)

	backtester = Backtester(processed_data)
	portfolio_values, final_position, entry_price = backtester.backtest(buy_threshold, sell_threshold)

	last_row = processed_data.iloc[-1]

	# Display results
	col1, col2 = st.columns(2)

	with col1:
	st.subheader("Current Position")
	st.metric("Position", final_position or "No position")
	if final_position:
	st.metric("Entry Price", f"${entry_price:.2f}")


	with col2:
	st.subheader("Decision Metrics")
	st.metric("Buy%", f"{last_row['PCA_Buy%']:.4f}")
	st.metric("Sell%", f"{last_row['PCA_Sell%']:.4f}")

	return True

	except Exception as e:
	st.error(f"An error occurred: {str(e)}")
	return False

	# Page configuration
	st.set_page_config(page_title="BTC-USD Trading Bot", layout="wide")
	st.title("BTC-USD Trading Bot")


	# Run analysis automatically on page load
	run_analysis()


	# Educational Slides Section
	st.header("Educational Slides")

	st.markdown('🎧Listen to the Audio🎧')
	st.markdown(
	"""
	<iframe src="https://drive.google.com/file/d/1eXuzJGng4o5Hvd3kZjSnVfQ-UmhKen81/preview" width="640" height="480"></iframe>
	""", unsafe_allow_html=True
	)

	st.markdown('Read the [White Paper](https://huggingface.co/spaces/earnliners/dow-usd/resolve/main/QPTFen.pdf).')


	st.markdown('Powered by [Forecast Trade Group](https://huggingface.co/TradeAdmin).')


	with st.expander("Slide 1: Introduction", expanded=False):
	st.subheader("Real-Time Crypto Trading Bot Using Machine Learning and PCA")
	st.markdown("""
	Subtitle: Leveraging Financial Data Analysis for Optimal Trading Decisions

	Overview:
	This system integrates real-time financial data, machine learning, and principal component analysis (PCA)
	to automate trading decisions in cryptocurrency markets.
	""")

	with st.expander("Slide 2: Objective", expanded=False):
	st.subheader("Main Goals")
	st.markdown("""
	- Develop an automated trading system that optimizes buy and sell decisions based on historical financial data
	- Use machine learning models, specifically Random Forest, to predict market movements
	- Implement Principal Component Analysis (PCA) for dimensionality reduction and feature extraction
	- Backtest the system and evaluate portfolio performance
	""")

	with st.expander("Slide 3: Key Concepts", expanded=False):
	st.subheader("Core Technologies and Methods")
	st.markdown("""
	- Logarithmic Return: A continuous-time return calculation used to model price changes in markets
	- Principal Component Analysis (PCA): A dimensionality reduction technique to extract meaningful features
	- Machine Learning: Using Random Forest to classify "Buy" and "Sell" scenarios
	- SHAP Values: A method to interpret model outputs through feature contribution analysis
	""")

	with st.expander("Slide 4: Data Collection and Preprocessing", expanded=False):
	col1, col2 = st.columns(2)
	with col1:
	st.markdown("### Data Fetching")
	st.markdown("""
	- Source: Yahoo Finance (yfinance)
	- Data: Historical cryptocurrency data
	- Interval: Hourly data points
	- Period: Last 50 days
	""")
	with col2:
	st.markdown("### Preprocessing")
	st.markdown("""
	- Calculate logarithmic returns
	- Classify scenarios (Buy/Sell)
	- Adjust returns for sell scenarios
	- Handle missing values and outliers
	""")

	with st.expander("Slide 5: Principal Component Analysis (PCA)", expanded=False):
	st.subheader("PCA Process")
	st.markdown("""
	1. Data Standardization:
	```
	X' = (X - μ) / σ
	```

	2. Covariance Matrix:
	```
	Cov(X) = 1/(n-1) Σ(Xi - μ)(Xi - μ)ᵀ
	```

	3. Eigenvalue Decomposition:
	- Find principal components
	- Sort by variance explained

	4. Dimensionality Reduction:
	- Transform data to lower dimensions
	- Preserve important features
	""")

	with st.expander("Slide 6: Machine Learning Strategy", expanded=False):
	col1, col2 = st.columns(2)
	with col1:
	st.markdown("### Model Architecture")
	st.markdown("""
	- Random Forest Classifier
	- Feature selection from PCA
	- Binary classification (Buy/Sell)
	""")
	with col2:
	st.markdown("### Training Process")
	st.markdown("""
	- Cross-validation
	- Hyperparameter tuning
	- Performance metrics
	- Model evaluation
	""")

	with st.expander("Slide 7: SHAP Analysis", expanded=False):
	st.subheader("Shapley Additive Explanations")
	st.markdown("""
	SHAP Value Formula:
	```
	φᵢ(f) = 1/\|N\|! Σ [f(S ∪ {i}) - f(S)]
	```

	Key Components:
	- Feature importance ranking
	- Individual prediction explanations
	- Global model interpretation
	""")

	with st.expander("Slide 8: Strategy Refinement", expanded=False):
	st.subheader("Dynamic Strategy Adjustment")
	st.markdown("""
	1. Threshold Refinement:
	- Use SHAP values to identify key features
	- Adjust thresholds based on importance

	2. Signal Processing:
	- Buy signal strengthening
	- Sell signal validation
	- Risk management integration
	""")

	with st.expander("Slide 9: Backtesting Framework", expanded=False):
	st.markdown("""
	### Portfolio Value Calculation
	```
	Portfolio Valueₜ₊₁ = Portfolio Valueₜ × (Pₜ₊₁/Pₜ)
	```

	### Trading Logic
	- Buy when probability > threshold
	- Sell when probability > threshold
	- Position management

	### Performance Metrics
	- Total Return
	- Risk Metrics
	- Sharpe Ratio
	""")

	with st.expander("Slide 10: Real-Time Interface", expanded=False):
	st.subheader("Streamlit Dashboard Features")
	st.markdown("""
	- Live trading signals
	- Portfolio performance tracking
	- Position monitoring
	- Automatic updates
	- Historical performance
	""")

	with st.expander("Slide 11: Summary", expanded=False):
	st.markdown("""
	### Key System Components
	- Automated trading system
	- ML & PCA integration
	- SHAP-based interpretation
	- Real-time analytics
	- Performance tracking
	""")

	with st.expander("Slide 12: Future Improvements", expanded=False):
	st.markdown("""
	### Planned Enhancements
	1. Additional data sources integration
	2. Advanced optimization techniques
	3. Live trading deployment
	4. Enhanced risk management
	5. Portfolio diversification
	""")

	with st.expander("Slide 13: Q&A", expanded=False):
	st.markdown("""
	### Questions & Discussion

	Thank you for exploring our trading system! For questions or suggestions:
	- System architecture
	- Implementation details
	- Performance metrics
	- Future developments
	""")
	st.markdown("---")
	st.markdown("### Enjoying the Content?")
	st.markdown("""
	If you find our work useful and interesting, please consider supporting us for free on Publish0x!
	It's quick, easy, and no cost to you. Just follow this link to show your support:
	[Like and Tip Us for Free on Publish0x!](https://www.publish0x.com/start/crypto-trading-mathematical-modeling-and-strategic-optimizat-xkelryw)
	""")

	# Footer
	st.markdown("---")
	st.markdown("*Support Development & Info : ")
	st.markdown("Send your email in comment to your btc donation at ")
	st.markdown("1P9R71C6JYJxrPVEzMz4K3hoHYGRW39A9A")
	st.markdown('Join our [Community on Huggingface](https://huggingface.co/TradeAdmin) for more.')
	st.markdown('All our bots are [tested on HTX](https://www.htx.com/invite/en-us/1f?invite_code=awhd9223).')

	# Educational Slides Section dona