Compare commits
7 Commits
Incrementa
...
a22914731f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a22914731f | ||
|
|
81e4b640a7 | ||
|
|
2dba88b620 | ||
|
|
de67b27e37 | ||
|
|
1284549106 | ||
|
|
5f03524d6a | ||
|
|
74c8048ed5 |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,11 +1,12 @@
|
|||||||
# ---> Python
|
# ---> Python
|
||||||
*.json
|
/credentials/*.json
|
||||||
*.csv
|
*.csv
|
||||||
*.png
|
*.png
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.py[cod]
|
*.py[cod]
|
||||||
*$py.class
|
*$py.class
|
||||||
|
/data/*.npy
|
||||||
|
|
||||||
# C extensions
|
# C extensions
|
||||||
*.so
|
*.so
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import time
|
||||||
|
|
||||||
from cycles.supertrend import Supertrends
|
from cycles.supertrend import Supertrends
|
||||||
from cycles.market_fees import MarketFees
|
from cycles.market_fees import MarketFees
|
||||||
@@ -27,6 +28,9 @@ class Backtest:
|
|||||||
trends_arr = np.stack(trends, axis=1)
|
trends_arr = np.stack(trends, axis=1)
|
||||||
meta_trend = np.where((trends_arr[:,0] == trends_arr[:,1]) & (trends_arr[:,1] == trends_arr[:,2]),
|
meta_trend = np.where((trends_arr[:,0] == trends_arr[:,1]) & (trends_arr[:,1] == trends_arr[:,2]),
|
||||||
trends_arr[:,0], 0)
|
trends_arr[:,0], 0)
|
||||||
|
# Shift meta_trend by one to avoid lookahead bias
|
||||||
|
meta_trend_signal = np.roll(meta_trend, 1)
|
||||||
|
meta_trend_signal[0] = 0 # or np.nan, but 0 means 'no signal' for first bar
|
||||||
|
|
||||||
position = 0 # 0 = no position, 1 = long
|
position = 0 # 0 = no position, 1 = long
|
||||||
entry_price = 0
|
entry_price = 0
|
||||||
@@ -39,14 +43,22 @@ class Backtest:
|
|||||||
entry_time = None
|
entry_time = None
|
||||||
current_trade_min1_start_idx = None
|
current_trade_min1_start_idx = None
|
||||||
|
|
||||||
min1_df['timestamp'] = pd.to_datetime(min1_df.index)
|
min1_df.index = pd.to_datetime(min1_df.index)
|
||||||
|
min1_timestamps = min1_df.index.values
|
||||||
|
|
||||||
|
last_print_time = time.time()
|
||||||
for i in range(1, len(_df)):
|
for i in range(1, len(_df)):
|
||||||
|
current_time = time.time()
|
||||||
|
if current_time - last_print_time >= 5:
|
||||||
|
progress = (i / len(_df)) * 100
|
||||||
|
print(f"\rProgress: {progress:.1f}%", end="", flush=True)
|
||||||
|
last_print_time = current_time
|
||||||
|
|
||||||
price_open = _df['open'].iloc[i]
|
price_open = _df['open'].iloc[i]
|
||||||
price_close = _df['close'].iloc[i]
|
price_close = _df['close'].iloc[i]
|
||||||
date = _df['timestamp'].iloc[i]
|
date = _df['timestamp'].iloc[i]
|
||||||
prev_mt = meta_trend[i-1]
|
prev_mt = meta_trend_signal[i-1]
|
||||||
curr_mt = meta_trend[i]
|
curr_mt = meta_trend_signal[i]
|
||||||
|
|
||||||
# Check stop loss if in position
|
# Check stop loss if in position
|
||||||
if position == 1:
|
if position == 1:
|
||||||
@@ -87,6 +99,8 @@ class Backtest:
|
|||||||
drawdown = (max_balance - balance) / max_balance
|
drawdown = (max_balance - balance) / max_balance
|
||||||
drawdowns.append(drawdown)
|
drawdowns.append(drawdown)
|
||||||
|
|
||||||
|
print("\rProgress: 100%\r\n", end="", flush=True)
|
||||||
|
|
||||||
# If still in position at end, sell at last close
|
# If still in position at end, sell at last close
|
||||||
if position == 1:
|
if position == 1:
|
||||||
exit_result = Backtest.handle_exit(coin, _df['close'].iloc[-1], entry_price, entry_time, _df['timestamp'].iloc[-1])
|
exit_result = Backtest.handle_exit(coin, _df['close'].iloc[-1], entry_price, entry_time, _df['timestamp'].iloc[-1])
|
||||||
|
|||||||
@@ -1,70 +1,30 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import logging
|
import logging
|
||||||
from scipy.signal import find_peaks
|
|
||||||
from matplotlib.patches import Rectangle
|
|
||||||
from scipy import stats
|
|
||||||
import concurrent.futures
|
|
||||||
from functools import partial
|
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
|
|
||||||
# Color configuration
|
|
||||||
# Plot colors
|
|
||||||
DARK_BG_COLOR = '#181C27'
|
|
||||||
LEGEND_BG_COLOR = '#333333'
|
|
||||||
TITLE_COLOR = 'white'
|
|
||||||
AXIS_LABEL_COLOR = 'white'
|
|
||||||
|
|
||||||
# Candlestick colors
|
|
||||||
CANDLE_UP_COLOR = '#089981' # Green
|
|
||||||
CANDLE_DOWN_COLOR = '#F23645' # Red
|
|
||||||
|
|
||||||
# Marker colors
|
|
||||||
MIN_COLOR = 'red'
|
|
||||||
MAX_COLOR = 'green'
|
|
||||||
|
|
||||||
# Line style colors
|
|
||||||
MIN_LINE_STYLE = 'g--' # Green dashed
|
|
||||||
MAX_LINE_STYLE = 'r--' # Red dashed
|
|
||||||
SMA7_LINE_STYLE = 'y-' # Yellow solid
|
|
||||||
SMA15_LINE_STYLE = 'm-' # Magenta solid
|
|
||||||
|
|
||||||
# SuperTrend colors
|
|
||||||
ST_COLOR_UP = 'g-'
|
|
||||||
ST_COLOR_DOWN = 'r-'
|
|
||||||
|
|
||||||
# Cache the calculation results by function parameters
|
|
||||||
@lru_cache(maxsize=32)
|
@lru_cache(maxsize=32)
|
||||||
def cached_supertrend_calculation(period, multiplier, data_tuple):
|
def cached_supertrend_calculation(period, multiplier, data_tuple):
|
||||||
# Convert tuple back to numpy arrays
|
|
||||||
high = np.array(data_tuple[0])
|
high = np.array(data_tuple[0])
|
||||||
low = np.array(data_tuple[1])
|
low = np.array(data_tuple[1])
|
||||||
close = np.array(data_tuple[2])
|
close = np.array(data_tuple[2])
|
||||||
|
|
||||||
# Calculate TR and ATR using vectorized operations
|
|
||||||
tr = np.zeros_like(close)
|
tr = np.zeros_like(close)
|
||||||
tr[0] = high[0] - low[0]
|
tr[0] = high[0] - low[0]
|
||||||
hc_range = np.abs(high[1:] - close[:-1])
|
hc_range = np.abs(high[1:] - close[:-1])
|
||||||
lc_range = np.abs(low[1:] - close[:-1])
|
lc_range = np.abs(low[1:] - close[:-1])
|
||||||
hl_range = high[1:] - low[1:]
|
hl_range = high[1:] - low[1:]
|
||||||
tr[1:] = np.maximum.reduce([hl_range, hc_range, lc_range])
|
tr[1:] = np.maximum.reduce([hl_range, hc_range, lc_range])
|
||||||
|
|
||||||
# Use numpy's exponential moving average
|
|
||||||
atr = np.zeros_like(tr)
|
atr = np.zeros_like(tr)
|
||||||
atr[0] = tr[0]
|
atr[0] = tr[0]
|
||||||
multiplier_ema = 2.0 / (period + 1)
|
multiplier_ema = 2.0 / (period + 1)
|
||||||
for i in range(1, len(tr)):
|
for i in range(1, len(tr)):
|
||||||
atr[i] = (tr[i] * multiplier_ema) + (atr[i-1] * (1 - multiplier_ema))
|
atr[i] = (tr[i] * multiplier_ema) + (atr[i-1] * (1 - multiplier_ema))
|
||||||
|
|
||||||
# Calculate bands
|
|
||||||
upper_band = np.zeros_like(close)
|
upper_band = np.zeros_like(close)
|
||||||
lower_band = np.zeros_like(close)
|
lower_band = np.zeros_like(close)
|
||||||
for i in range(len(close)):
|
for i in range(len(close)):
|
||||||
hl_avg = (high[i] + low[i]) / 2
|
hl_avg = (high[i] + low[i]) / 2
|
||||||
upper_band[i] = hl_avg + (multiplier * atr[i])
|
upper_band[i] = hl_avg + (multiplier * atr[i])
|
||||||
lower_band[i] = hl_avg - (multiplier * atr[i])
|
lower_band[i] = hl_avg - (multiplier * atr[i])
|
||||||
|
|
||||||
final_upper = np.zeros_like(close)
|
final_upper = np.zeros_like(close)
|
||||||
final_lower = np.zeros_like(close)
|
final_lower = np.zeros_like(close)
|
||||||
supertrend = np.zeros_like(close)
|
supertrend = np.zeros_like(close)
|
||||||
@@ -106,76 +66,18 @@ def cached_supertrend_calculation(period, multiplier, data_tuple):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def calculate_supertrend_external(data, period, multiplier):
|
def calculate_supertrend_external(data, period, multiplier):
|
||||||
# Convert DataFrame columns to hashable tuples
|
|
||||||
high_tuple = tuple(data['high'])
|
high_tuple = tuple(data['high'])
|
||||||
low_tuple = tuple(data['low'])
|
low_tuple = tuple(data['low'])
|
||||||
close_tuple = tuple(data['close'])
|
close_tuple = tuple(data['close'])
|
||||||
|
|
||||||
# Call the cached function
|
|
||||||
return cached_supertrend_calculation(period, multiplier, (high_tuple, low_tuple, close_tuple))
|
return cached_supertrend_calculation(period, multiplier, (high_tuple, low_tuple, close_tuple))
|
||||||
|
|
||||||
|
|
||||||
class Supertrends:
|
class Supertrends:
|
||||||
def __init__(self, data, verbose=False, display=False):
|
def __init__(self, data, verbose=False, display=False):
|
||||||
"""
|
|
||||||
Initialize the TrendDetectorSimple class.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- data: pandas DataFrame containing price data
|
|
||||||
- verbose: boolean, whether to display detailed logging information
|
|
||||||
- display: boolean, whether to enable display/plotting features
|
|
||||||
"""
|
|
||||||
|
|
||||||
self.data = data
|
self.data = data
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
self.display = display
|
|
||||||
|
|
||||||
# Only define display-related variables if display is True
|
|
||||||
if self.display:
|
|
||||||
# Plot style configuration
|
|
||||||
self.plot_style = 'dark_background'
|
|
||||||
self.bg_color = DARK_BG_COLOR
|
|
||||||
self.plot_size = (12, 8)
|
|
||||||
|
|
||||||
# Candlestick configuration
|
|
||||||
self.candle_width = 0.6
|
|
||||||
self.candle_up_color = CANDLE_UP_COLOR
|
|
||||||
self.candle_down_color = CANDLE_DOWN_COLOR
|
|
||||||
self.candle_alpha = 0.8
|
|
||||||
self.wick_width = 1
|
|
||||||
|
|
||||||
# Marker configuration
|
|
||||||
self.min_marker = '^'
|
|
||||||
self.min_color = MIN_COLOR
|
|
||||||
self.min_size = 100
|
|
||||||
self.max_marker = 'v'
|
|
||||||
self.max_color = MAX_COLOR
|
|
||||||
self.max_size = 100
|
|
||||||
self.marker_zorder = 100
|
|
||||||
|
|
||||||
# Line configuration
|
|
||||||
self.line_width = 1
|
|
||||||
self.min_line_style = MIN_LINE_STYLE
|
|
||||||
self.max_line_style = MAX_LINE_STYLE
|
|
||||||
self.sma7_line_style = SMA7_LINE_STYLE
|
|
||||||
self.sma15_line_style = SMA15_LINE_STYLE
|
|
||||||
|
|
||||||
# Text configuration
|
|
||||||
self.title_size = 14
|
|
||||||
self.title_color = TITLE_COLOR
|
|
||||||
self.axis_label_size = 12
|
|
||||||
self.axis_label_color = AXIS_LABEL_COLOR
|
|
||||||
|
|
||||||
# Legend configuration
|
|
||||||
self.legend_loc = 'best'
|
|
||||||
self.legend_bg_color = LEGEND_BG_COLOR
|
|
||||||
|
|
||||||
# Configure logging
|
|
||||||
logging.basicConfig(level=logging.INFO if verbose else logging.WARNING,
|
logging.basicConfig(level=logging.INFO if verbose else logging.WARNING,
|
||||||
format='%(asctime)s - %(levelname)s - %(message)s')
|
format='%(asctime)s - %(levelname)s - %(message)s')
|
||||||
self.logger = logging.getLogger('TrendDetectorSimple')
|
self.logger = logging.getLogger('TrendDetectorSimple')
|
||||||
|
|
||||||
# Convert data to pandas DataFrame if it's not already
|
|
||||||
if not isinstance(self.data, pd.DataFrame):
|
if not isinstance(self.data, pd.DataFrame):
|
||||||
if isinstance(self.data, list):
|
if isinstance(self.data, list):
|
||||||
self.data = pd.DataFrame({'close': self.data})
|
self.data = pd.DataFrame({'close': self.data})
|
||||||
@@ -183,154 +85,101 @@ class Supertrends:
|
|||||||
raise ValueError("Data must be a pandas DataFrame or a list")
|
raise ValueError("Data must be a pandas DataFrame or a list")
|
||||||
|
|
||||||
def calculate_tr(self):
|
def calculate_tr(self):
|
||||||
|
df = self.data.copy()
|
||||||
|
high = df['high'].values
|
||||||
|
low = df['low'].values
|
||||||
|
close = df['close'].values
|
||||||
|
tr = np.zeros_like(close)
|
||||||
|
tr[0] = high[0] - low[0]
|
||||||
|
for i in range(1, len(close)):
|
||||||
|
hl_range = high[i] - low[i]
|
||||||
|
hc_range = abs(high[i] - close[i-1])
|
||||||
|
lc_range = abs(low[i] - close[i-1])
|
||||||
|
tr[i] = max(hl_range, hc_range, lc_range)
|
||||||
|
return tr
|
||||||
|
|
||||||
|
def calculate_atr(self, period=14):
|
||||||
|
tr = self.calculate_tr()
|
||||||
|
atr = np.zeros_like(tr)
|
||||||
|
atr[0] = tr[0]
|
||||||
|
multiplier = 2.0 / (period + 1)
|
||||||
|
for i in range(1, len(tr)):
|
||||||
|
atr[i] = (tr[i] * multiplier) + (atr[i-1] * (1 - multiplier))
|
||||||
|
return atr
|
||||||
|
|
||||||
|
def calculate_supertrend(self, period=10, multiplier=3.0):
|
||||||
"""
|
"""
|
||||||
Calculate True Range (TR) for the price data.
|
Calculate SuperTrend indicator for the price data.
|
||||||
|
SuperTrend is a trend-following indicator that uses ATR to determine the trend direction.
|
||||||
True Range is the greatest of:
|
Parameters:
|
||||||
1. Current high - current low
|
- period: int, the period for the ATR calculation (default: 10)
|
||||||
2. |Current high - previous close|
|
- multiplier: float, the multiplier for the ATR (default: 3.0)
|
||||||
3. |Current low - previous close|
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
- Numpy array of TR values
|
- Dictionary containing SuperTrend values, trend direction, and upper/lower bands
|
||||||
"""
|
"""
|
||||||
df = self.data.copy()
|
df = self.data.copy()
|
||||||
high = df['high'].values
|
high = df['high'].values
|
||||||
low = df['low'].values
|
low = df['low'].values
|
||||||
close = df['close'].values
|
close = df['close'].values
|
||||||
|
atr = self.calculate_atr(period)
|
||||||
tr = np.zeros_like(close)
|
upper_band = np.zeros_like(close)
|
||||||
tr[0] = high[0] - low[0] # First TR is just the first day's range
|
lower_band = np.zeros_like(close)
|
||||||
|
for i in range(len(close)):
|
||||||
|
hl_avg = (high[i] + low[i]) / 2
|
||||||
|
upper_band[i] = hl_avg + (multiplier * atr[i])
|
||||||
|
lower_band[i] = hl_avg - (multiplier * atr[i])
|
||||||
|
final_upper = np.zeros_like(close)
|
||||||
|
final_lower = np.zeros_like(close)
|
||||||
|
supertrend = np.zeros_like(close)
|
||||||
|
trend = np.zeros_like(close)
|
||||||
|
final_upper[0] = upper_band[0]
|
||||||
|
final_lower[0] = lower_band[0]
|
||||||
|
if close[0] <= upper_band[0]:
|
||||||
|
supertrend[0] = upper_band[0]
|
||||||
|
trend[0] = -1
|
||||||
|
else:
|
||||||
|
supertrend[0] = lower_band[0]
|
||||||
|
trend[0] = 1
|
||||||
for i in range(1, len(close)):
|
for i in range(1, len(close)):
|
||||||
# Current high - current low
|
if (upper_band[i] < final_upper[i-1]) or (close[i-1] > final_upper[i-1]):
|
||||||
hl_range = high[i] - low[i]
|
final_upper[i] = upper_band[i]
|
||||||
# |Current high - previous close|
|
else:
|
||||||
hc_range = abs(high[i] - close[i-1])
|
final_upper[i] = final_upper[i-1]
|
||||||
# |Current low - previous close|
|
if (lower_band[i] > final_lower[i-1]) or (close[i-1] < final_lower[i-1]):
|
||||||
lc_range = abs(low[i] - close[i-1])
|
final_lower[i] = lower_band[i]
|
||||||
|
else:
|
||||||
# TR is the maximum of these three values
|
final_lower[i] = final_lower[i-1]
|
||||||
tr[i] = max(hl_range, hc_range, lc_range)
|
if supertrend[i-1] == final_upper[i-1] and close[i] <= final_upper[i]:
|
||||||
|
supertrend[i] = final_upper[i]
|
||||||
return tr
|
trend[i] = -1
|
||||||
|
elif supertrend[i-1] == final_upper[i-1] and close[i] > final_upper[i]:
|
||||||
def calculate_atr(self, period=14):
|
supertrend[i] = final_lower[i]
|
||||||
"""
|
trend[i] = 1
|
||||||
Calculate Average True Range (ATR) for the price data.
|
elif supertrend[i-1] == final_lower[i-1] and close[i] >= final_lower[i]:
|
||||||
|
supertrend[i] = final_lower[i]
|
||||||
ATR is the exponential moving average of the True Range over a specified period.
|
trend[i] = 1
|
||||||
|
elif supertrend[i-1] == final_lower[i-1] and close[i] < final_lower[i]:
|
||||||
Parameters:
|
supertrend[i] = final_upper[i]
|
||||||
- period: int, the period for the ATR calculation (default: 14)
|
trend[i] = -1
|
||||||
|
supertrend_results = {
|
||||||
Returns:
|
'supertrend': supertrend,
|
||||||
- Numpy array of ATR values
|
'trend': trend,
|
||||||
"""
|
'upper_band': final_upper,
|
||||||
|
'lower_band': final_lower
|
||||||
tr = self.calculate_tr()
|
}
|
||||||
atr = np.zeros_like(tr)
|
return supertrend_results
|
||||||
|
|
||||||
# First ATR value is just the first TR
|
|
||||||
atr[0] = tr[0]
|
|
||||||
|
|
||||||
# Calculate exponential moving average (EMA) of TR
|
|
||||||
multiplier = 2.0 / (period + 1)
|
|
||||||
|
|
||||||
for i in range(1, len(tr)):
|
|
||||||
atr[i] = (tr[i] * multiplier) + (atr[i-1] * (1 - multiplier))
|
|
||||||
|
|
||||||
return atr
|
|
||||||
|
|
||||||
def detect_trends(self):
|
|
||||||
"""
|
|
||||||
Detect trends by identifying local minima and maxima in the price data
|
|
||||||
using scipy.signal.find_peaks.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- prominence: float, required prominence of peaks (relative to the price range)
|
|
||||||
- width: int, required width of peaks in data points
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
- DataFrame with columns for timestamps, prices, and trend indicators
|
|
||||||
- Dictionary containing analysis results including linear regression, SMAs, and SuperTrend indicators
|
|
||||||
"""
|
|
||||||
df = self.data
|
|
||||||
# close_prices = df['close'].values
|
|
||||||
|
|
||||||
# max_peaks, _ = find_peaks(close_prices)
|
|
||||||
# min_peaks, _ = find_peaks(-close_prices)
|
|
||||||
|
|
||||||
# df['is_min'] = False
|
|
||||||
# df['is_max'] = False
|
|
||||||
|
|
||||||
# for peak in max_peaks:
|
|
||||||
# df.at[peak, 'is_max'] = True
|
|
||||||
# for peak in min_peaks:
|
|
||||||
# df.at[peak, 'is_min'] = True
|
|
||||||
|
|
||||||
# result = df[['timestamp', 'close', 'is_min', 'is_max']].copy()
|
|
||||||
|
|
||||||
# Perform linear regression on min_peaks and max_peaks
|
|
||||||
# min_prices = df['close'].iloc[min_peaks].values
|
|
||||||
# max_prices = df['close'].iloc[max_peaks].values
|
|
||||||
|
|
||||||
# Linear regression for min peaks if we have at least 2 points
|
|
||||||
# min_slope, min_intercept, min_r_value, _, _ = stats.linregress(min_peaks, min_prices)
|
|
||||||
# Linear regression for max peaks if we have at least 2 points
|
|
||||||
# max_slope, max_intercept, max_r_value, _, _ = stats.linregress(max_peaks, max_prices)
|
|
||||||
|
|
||||||
# Calculate Simple Moving Averages (SMA) for 7 and 15 periods
|
|
||||||
# sma_7 = pd.Series(close_prices).rolling(window=7, min_periods=1).mean().values
|
|
||||||
# sma_15 = pd.Series(close_prices).rolling(window=15, min_periods=1).mean().values
|
|
||||||
|
|
||||||
analysis_results = {}
|
|
||||||
# analysis_results['linear_regression'] = {
|
|
||||||
# 'min': {
|
|
||||||
# 'slope': min_slope,
|
|
||||||
# 'intercept': min_intercept,
|
|
||||||
# 'r_squared': min_r_value ** 2
|
|
||||||
# },
|
|
||||||
# 'max': {
|
|
||||||
# 'slope': max_slope,
|
|
||||||
# 'intercept': max_intercept,
|
|
||||||
# 'r_squared': max_r_value ** 2
|
|
||||||
# }
|
|
||||||
# }
|
|
||||||
# analysis_results['sma'] = {
|
|
||||||
# '7': sma_7,
|
|
||||||
# '15': sma_15
|
|
||||||
# }
|
|
||||||
|
|
||||||
# Calculate SuperTrend indicators
|
|
||||||
supertrend_results_list = self._calculate_supertrend_indicators()
|
|
||||||
analysis_results['supertrend'] = supertrend_results_list
|
|
||||||
|
|
||||||
return analysis_results
|
|
||||||
|
|
||||||
def calculate_supertrend_indicators(self):
|
def calculate_supertrend_indicators(self):
|
||||||
"""
|
|
||||||
Calculate SuperTrend indicators with different parameter sets in parallel.
|
|
||||||
Returns:
|
|
||||||
- list, the SuperTrend results
|
|
||||||
"""
|
|
||||||
supertrend_params = [
|
supertrend_params = [
|
||||||
{"period": 12, "multiplier": 3.0, "color_up": ST_COLOR_UP, "color_down": ST_COLOR_DOWN},
|
{"period": 12, "multiplier": 3.0},
|
||||||
{"period": 10, "multiplier": 1.0, "color_up": ST_COLOR_UP, "color_down": ST_COLOR_DOWN},
|
{"period": 10, "multiplier": 1.0},
|
||||||
{"period": 11, "multiplier": 2.0, "color_up": ST_COLOR_UP, "color_down": ST_COLOR_DOWN}
|
{"period": 11, "multiplier": 2.0}
|
||||||
]
|
]
|
||||||
data = self.data.copy()
|
|
||||||
|
|
||||||
# For just 3 calculations, direct calculation might be faster than process pool
|
|
||||||
results = []
|
results = []
|
||||||
for p in supertrend_params:
|
for p in supertrend_params:
|
||||||
result = calculate_supertrend_external(data, p["period"], p["multiplier"])
|
result = self.calculate_supertrend(period=p["period"], multiplier=p["multiplier"])
|
||||||
results.append(result)
|
results.append({
|
||||||
|
|
||||||
supertrend_results_list = []
|
|
||||||
for params, result in zip(supertrend_params, results):
|
|
||||||
supertrend_results_list.append({
|
|
||||||
"results": result,
|
"results": result,
|
||||||
"params": params
|
"params": p
|
||||||
})
|
})
|
||||||
return supertrend_results_list
|
return results
|
||||||
|
|||||||
1
data/xgboost_model.json
Normal file
1
data/xgboost_model.json
Normal file
File diff suppressed because one or more lines are too long
57
main.py
57
main.py
@@ -6,7 +6,6 @@ import os
|
|||||||
import datetime
|
import datetime
|
||||||
import argparse
|
import argparse
|
||||||
import json
|
import json
|
||||||
import ast
|
|
||||||
|
|
||||||
from cycles.utils.storage import Storage
|
from cycles.utils.storage import Storage
|
||||||
from cycles.utils.system import SystemUtils
|
from cycles.utils.system import SystemUtils
|
||||||
@@ -48,6 +47,7 @@ def process_timeframe_data(min1_df, df, stop_loss_pcts, rule_name, initial_usd,
|
|||||||
cumulative_profit = 0
|
cumulative_profit = 0
|
||||||
max_drawdown = 0
|
max_drawdown = 0
|
||||||
peak = 0
|
peak = 0
|
||||||
|
|
||||||
for trade in trades:
|
for trade in trades:
|
||||||
cumulative_profit += trade['profit_pct']
|
cumulative_profit += trade['profit_pct']
|
||||||
if cumulative_profit > peak:
|
if cumulative_profit > peak:
|
||||||
@@ -55,10 +55,14 @@ def process_timeframe_data(min1_df, df, stop_loss_pcts, rule_name, initial_usd,
|
|||||||
drawdown = peak - cumulative_profit
|
drawdown = peak - cumulative_profit
|
||||||
if drawdown > max_drawdown:
|
if drawdown > max_drawdown:
|
||||||
max_drawdown = drawdown
|
max_drawdown = drawdown
|
||||||
|
|
||||||
final_usd = initial_usd
|
final_usd = initial_usd
|
||||||
|
|
||||||
for trade in trades:
|
for trade in trades:
|
||||||
final_usd *= (1 + trade['profit_pct'])
|
final_usd *= (1 + trade['profit_pct'])
|
||||||
total_fees_usd = sum(trade.get('fee_usd', 0.0) for trade in trades)
|
|
||||||
|
total_fees_usd = sum(trade['fee_usd'] for trade in trades)
|
||||||
|
|
||||||
row = {
|
row = {
|
||||||
"timeframe": rule_name,
|
"timeframe": rule_name,
|
||||||
"stop_loss_pct": stop_loss_pct,
|
"stop_loss_pct": stop_loss_pct,
|
||||||
@@ -75,6 +79,7 @@ def process_timeframe_data(min1_df, df, stop_loss_pcts, rule_name, initial_usd,
|
|||||||
"total_fees_usd": total_fees_usd,
|
"total_fees_usd": total_fees_usd,
|
||||||
}
|
}
|
||||||
results_rows.append(row)
|
results_rows.append(row)
|
||||||
|
|
||||||
for trade in trades:
|
for trade in trades:
|
||||||
trade_rows.append({
|
trade_rows.append({
|
||||||
"timeframe": rule_name,
|
"timeframe": rule_name,
|
||||||
@@ -87,7 +92,9 @@ def process_timeframe_data(min1_df, df, stop_loss_pcts, rule_name, initial_usd,
|
|||||||
"type": trade.get("type"),
|
"type": trade.get("type"),
|
||||||
"fee_usd": trade.get("fee_usd"),
|
"fee_usd": trade.get("fee_usd"),
|
||||||
})
|
})
|
||||||
|
|
||||||
logging.info(f"Timeframe: {rule_name}, Stop Loss: {stop_loss_pct}, Trades: {n_trades}")
|
logging.info(f"Timeframe: {rule_name}, Stop Loss: {stop_loss_pct}, Trades: {n_trades}")
|
||||||
|
|
||||||
if debug:
|
if debug:
|
||||||
for trade in trades:
|
for trade in trades:
|
||||||
if trade['type'] == 'STOP':
|
if trade['type'] == 'STOP':
|
||||||
@@ -95,13 +102,16 @@ def process_timeframe_data(min1_df, df, stop_loss_pcts, rule_name, initial_usd,
|
|||||||
for trade in trades:
|
for trade in trades:
|
||||||
if trade['profit_pct'] < -0.09: # or whatever is close to -0.10
|
if trade['profit_pct'] < -0.09: # or whatever is close to -0.10
|
||||||
print("Large loss trade:", trade)
|
print("Large loss trade:", trade)
|
||||||
|
|
||||||
return results_rows, trade_rows
|
return results_rows, trade_rows
|
||||||
|
|
||||||
def process(timeframe_info, debug=False):
|
def process(timeframe_info, debug=False):
|
||||||
"""Process a single (timeframe, stop_loss_pct) combination (no monthly split)"""
|
from cycles.utils.storage import Storage # import inside function for safety
|
||||||
|
storage = Storage(logging=None) # or pass a logger if you want, but None is safest for multiprocessing
|
||||||
|
|
||||||
rule, data_1min, stop_loss_pct, initial_usd = timeframe_info
|
rule, data_1min, stop_loss_pct, initial_usd = timeframe_info
|
||||||
|
|
||||||
if rule == "1T":
|
if rule == "1T" or rule == "1min":
|
||||||
df = data_1min.copy()
|
df = data_1min.copy()
|
||||||
else:
|
else:
|
||||||
df = data_1min.resample(rule).agg({
|
df = data_1min.resample(rule).agg({
|
||||||
@@ -112,7 +122,33 @@ def process(timeframe_info, debug=False):
|
|||||||
'volume': 'sum'
|
'volume': 'sum'
|
||||||
}).dropna()
|
}).dropna()
|
||||||
df = df.reset_index()
|
df = df.reset_index()
|
||||||
|
|
||||||
results_rows, all_trade_rows = process_timeframe_data(data_1min, df, [stop_loss_pct], rule, initial_usd, debug=debug)
|
results_rows, all_trade_rows = process_timeframe_data(data_1min, df, [stop_loss_pct], rule, initial_usd, debug=debug)
|
||||||
|
|
||||||
|
if all_trade_rows:
|
||||||
|
trades_fieldnames = ["entry_time", "exit_time", "entry_price", "exit_price", "profit_pct", "type", "fee_usd"]
|
||||||
|
# Prepare header
|
||||||
|
summary_fields = ["timeframe", "stop_loss_pct", "n_trades", "n_stop_loss", "win_rate", "max_drawdown", "avg_trade", "profit_ratio", "final_usd"]
|
||||||
|
summary_row = results_rows[0]
|
||||||
|
header_line = "\t".join(summary_fields) + "\n"
|
||||||
|
value_line = "\t".join(str(summary_row.get(f, "")) for f in summary_fields) + "\n"
|
||||||
|
# File name
|
||||||
|
tf = summary_row["timeframe"]
|
||||||
|
sl = summary_row["stop_loss_pct"]
|
||||||
|
sl_percent = int(round(sl * 100))
|
||||||
|
trades_filename = os.path.join(storage.results_dir, f"trades_{tf}_ST{sl_percent}pct.csv")
|
||||||
|
# Write header
|
||||||
|
with open(trades_filename, "w") as f:
|
||||||
|
f.write(header_line)
|
||||||
|
f.write(value_line)
|
||||||
|
# Now write trades (append mode, skip header)
|
||||||
|
with open(trades_filename, "a", newline="") as f:
|
||||||
|
import csv
|
||||||
|
writer = csv.DictWriter(f, fieldnames=trades_fieldnames)
|
||||||
|
writer.writeheader()
|
||||||
|
for trade in all_trade_rows:
|
||||||
|
writer.writerow({k: trade.get(k, "") for k in trades_fieldnames})
|
||||||
|
|
||||||
return results_rows, all_trade_rows
|
return results_rows, all_trade_rows
|
||||||
|
|
||||||
def aggregate_results(all_rows):
|
def aggregate_results(all_rows):
|
||||||
@@ -126,7 +162,6 @@ def aggregate_results(all_rows):
|
|||||||
|
|
||||||
summary_rows = []
|
summary_rows = []
|
||||||
for (rule, stop_loss_pct), rows in grouped.items():
|
for (rule, stop_loss_pct), rows in grouped.items():
|
||||||
n_months = len(rows)
|
|
||||||
total_trades = sum(r['n_trades'] for r in rows)
|
total_trades = sum(r['n_trades'] for r in rows)
|
||||||
total_stop_loss = sum(r['n_stop_loss'] for r in rows)
|
total_stop_loss = sum(r['n_stop_loss'] for r in rows)
|
||||||
avg_win_rate = np.mean([r['win_rate'] for r in rows])
|
avg_win_rate = np.mean([r['win_rate'] for r in rows])
|
||||||
@@ -163,7 +198,7 @@ def get_nearest_price(df, target_date):
|
|||||||
return nearest_time, price
|
return nearest_time, price
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
debug = True
|
debug = False
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description="Run backtest with config file.")
|
parser = argparse.ArgumentParser(description="Run backtest with config file.")
|
||||||
parser.add_argument("config", type=str, nargs="?", help="Path to config JSON file.")
|
parser.add_argument("config", type=str, nargs="?", help="Path to config JSON file.")
|
||||||
@@ -171,11 +206,11 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
# Default values (from config.json)
|
# Default values (from config.json)
|
||||||
default_config = {
|
default_config = {
|
||||||
"start_date": "2024-05-15",
|
"start_date": "2025-05-01",
|
||||||
"stop_date": datetime.datetime.today().strftime('%Y-%m-%d'),
|
"stop_date": datetime.datetime.today().strftime('%Y-%m-%d'),
|
||||||
"initial_usd": 10000,
|
"initial_usd": 10000,
|
||||||
"timeframes": ["1D"],
|
"timeframes": ["1D", "6h", "3h", "1h", "30m", "15m", "5m", "1m"],
|
||||||
"stop_loss_pcts": [0.01, 0.02, 0.03],
|
"stop_loss_pcts": [0.01, 0.02, 0.03, 0.05],
|
||||||
}
|
}
|
||||||
|
|
||||||
if args.config:
|
if args.config:
|
||||||
@@ -238,6 +273,7 @@ if __name__ == "__main__":
|
|||||||
if debug:
|
if debug:
|
||||||
all_results_rows = []
|
all_results_rows = []
|
||||||
all_trade_rows = []
|
all_trade_rows = []
|
||||||
|
|
||||||
for task in tasks:
|
for task in tasks:
|
||||||
results, trades = process(task, debug)
|
results, trades = process(task, debug)
|
||||||
if results or trades:
|
if results or trades:
|
||||||
@@ -263,7 +299,4 @@ if __name__ == "__main__":
|
|||||||
]
|
]
|
||||||
storage.write_backtest_results(backtest_filename, backtest_fieldnames, all_results_rows, metadata_lines)
|
storage.write_backtest_results(backtest_filename, backtest_fieldnames, all_results_rows, metadata_lines)
|
||||||
|
|
||||||
trades_fieldnames = ["entry_time", "exit_time", "entry_price", "exit_price", "profit_pct", "type", "fee_usd"]
|
|
||||||
storage.write_trades(all_trade_rows, trades_fieldnames)
|
|
||||||
|
|
||||||
|
|
||||||
39
xgboost/custom_xgboost.py
Normal file
39
xgboost/custom_xgboost.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
import xgboost as xgb
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
class CustomXGBoostGPU:
|
||||||
|
def __init__(self, X_train, X_test, y_train, y_test):
|
||||||
|
self.X_train = X_train.astype(np.float32)
|
||||||
|
self.X_test = X_test.astype(np.float32)
|
||||||
|
self.y_train = y_train.astype(np.float32)
|
||||||
|
self.y_test = y_test.astype(np.float32)
|
||||||
|
self.model = None
|
||||||
|
self.params = None # Will be set during training
|
||||||
|
|
||||||
|
def train(self, **xgb_params):
|
||||||
|
params = {
|
||||||
|
'tree_method': 'hist',
|
||||||
|
'device': 'cuda',
|
||||||
|
'objective': 'reg:squarederror',
|
||||||
|
'eval_metric': 'rmse',
|
||||||
|
'verbosity': 1,
|
||||||
|
}
|
||||||
|
params.update(xgb_params)
|
||||||
|
self.params = params # Store params for later access
|
||||||
|
dtrain = xgb.DMatrix(self.X_train, label=self.y_train)
|
||||||
|
dtest = xgb.DMatrix(self.X_test, label=self.y_test)
|
||||||
|
evals = [(dtrain, 'train'), (dtest, 'eval')]
|
||||||
|
self.model = xgb.train(params, dtrain, num_boost_round=100, evals=evals, early_stopping_rounds=10)
|
||||||
|
return self.model
|
||||||
|
|
||||||
|
def predict(self, X):
|
||||||
|
if self.model is None:
|
||||||
|
raise ValueError('Model not trained yet.')
|
||||||
|
dmatrix = xgb.DMatrix(X.astype(np.float32))
|
||||||
|
return self.model.predict(dmatrix)
|
||||||
|
|
||||||
|
def save_model(self, file_path):
|
||||||
|
"""Save the trained XGBoost model to the specified file path."""
|
||||||
|
if self.model is None:
|
||||||
|
raise ValueError('Model not trained yet.')
|
||||||
|
self.model.save_model(file_path)
|
||||||
731
xgboost/main.py
Normal file
731
xgboost/main.py
Normal file
@@ -0,0 +1,731 @@
|
|||||||
|
import sys
|
||||||
|
import os
|
||||||
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from custom_xgboost import CustomXGBoostGPU
|
||||||
|
from sklearn.metrics import mean_squared_error
|
||||||
|
from plot_results import display_actual_vs_predicted, plot_target_distribution, plot_predicted_vs_actual_log_returns, plot_predicted_vs_actual_prices
|
||||||
|
import ta
|
||||||
|
from cycles.supertrend import Supertrends
|
||||||
|
from ta.trend import SMAIndicator, DPOIndicator, IchimokuIndicator, PSARIndicator
|
||||||
|
from ta.momentum import ROCIndicator, KAMAIndicator, UltimateOscillator, StochasticOscillator, WilliamsRIndicator
|
||||||
|
from ta.volatility import KeltnerChannel, DonchianChannel
|
||||||
|
from ta.others import DailyReturnIndicator
|
||||||
|
import time
|
||||||
|
from numba import njit
|
||||||
|
|
||||||
|
def run_indicator(func, *args):
|
||||||
|
return func(*args)
|
||||||
|
|
||||||
|
def run_indicator_job(job):
|
||||||
|
import time
|
||||||
|
func, *args = job
|
||||||
|
indicator_name = func.__name__
|
||||||
|
start = time.time()
|
||||||
|
result = func(*args)
|
||||||
|
elapsed = time.time() - start
|
||||||
|
print(f'Indicator {indicator_name} computed in {elapsed:.4f} seconds')
|
||||||
|
return result
|
||||||
|
|
||||||
|
def calc_rsi(close):
|
||||||
|
from ta.momentum import RSIIndicator
|
||||||
|
return ('rsi', RSIIndicator(close, window=14).rsi())
|
||||||
|
|
||||||
|
def calc_macd(close):
|
||||||
|
from ta.trend import MACD
|
||||||
|
return ('macd', MACD(close).macd())
|
||||||
|
|
||||||
|
def calc_bollinger(close):
|
||||||
|
from ta.volatility import BollingerBands
|
||||||
|
bb = BollingerBands(close=close, window=20, window_dev=2)
|
||||||
|
return [
|
||||||
|
('bb_bbm', bb.bollinger_mavg()),
|
||||||
|
('bb_bbh', bb.bollinger_hband()),
|
||||||
|
('bb_bbl', bb.bollinger_lband()),
|
||||||
|
('bb_bb_width', bb.bollinger_hband() - bb.bollinger_lband())
|
||||||
|
]
|
||||||
|
|
||||||
|
def calc_stochastic(high, low, close):
|
||||||
|
from ta.momentum import StochasticOscillator
|
||||||
|
stoch = StochasticOscillator(high=high, low=low, close=close, window=14, smooth_window=3)
|
||||||
|
return [
|
||||||
|
('stoch_k', stoch.stoch()),
|
||||||
|
('stoch_d', stoch.stoch_signal())
|
||||||
|
]
|
||||||
|
|
||||||
|
def calc_atr(high, low, close):
|
||||||
|
from ta.volatility import AverageTrueRange
|
||||||
|
atr = AverageTrueRange(high=high, low=low, close=close, window=14)
|
||||||
|
return ('atr', atr.average_true_range())
|
||||||
|
|
||||||
|
def calc_cci(high, low, close):
|
||||||
|
from ta.trend import CCIIndicator
|
||||||
|
cci = CCIIndicator(high=high, low=low, close=close, window=20)
|
||||||
|
return ('cci', cci.cci())
|
||||||
|
|
||||||
|
def calc_williamsr(high, low, close):
|
||||||
|
from ta.momentum import WilliamsRIndicator
|
||||||
|
willr = WilliamsRIndicator(high=high, low=low, close=close, lbp=14)
|
||||||
|
return ('williams_r', willr.williams_r())
|
||||||
|
|
||||||
|
def calc_ema(close):
|
||||||
|
from ta.trend import EMAIndicator
|
||||||
|
ema = EMAIndicator(close=close, window=14)
|
||||||
|
return ('ema_14', ema.ema_indicator())
|
||||||
|
|
||||||
|
def calc_obv(close, volume):
|
||||||
|
from ta.volume import OnBalanceVolumeIndicator
|
||||||
|
obv = OnBalanceVolumeIndicator(close=close, volume=volume)
|
||||||
|
return ('obv', obv.on_balance_volume())
|
||||||
|
|
||||||
|
def calc_cmf(high, low, close, volume):
|
||||||
|
from ta.volume import ChaikinMoneyFlowIndicator
|
||||||
|
cmf = ChaikinMoneyFlowIndicator(high=high, low=low, close=close, volume=volume, window=20)
|
||||||
|
return ('cmf', cmf.chaikin_money_flow())
|
||||||
|
|
||||||
|
def calc_sma(close):
|
||||||
|
from ta.trend import SMAIndicator
|
||||||
|
return [
|
||||||
|
('sma_50', SMAIndicator(close, window=50).sma_indicator()),
|
||||||
|
('sma_200', SMAIndicator(close, window=200).sma_indicator())
|
||||||
|
]
|
||||||
|
|
||||||
|
def calc_roc(close):
|
||||||
|
from ta.momentum import ROCIndicator
|
||||||
|
return ('roc_10', ROCIndicator(close, window=10).roc())
|
||||||
|
|
||||||
|
def calc_momentum(close):
|
||||||
|
return ('momentum_10', close - close.shift(10))
|
||||||
|
|
||||||
|
def calc_psar(high, low, close):
|
||||||
|
# Use the Numba-accelerated fast_psar function for speed
|
||||||
|
psar_values = fast_psar(np.array(high), np.array(low), np.array(close))
|
||||||
|
return [('psar', pd.Series(psar_values, index=close.index))]
|
||||||
|
|
||||||
|
def calc_donchian(high, low, close):
|
||||||
|
from ta.volatility import DonchianChannel
|
||||||
|
donchian = DonchianChannel(high, low, close, window=20)
|
||||||
|
return [
|
||||||
|
('donchian_hband', donchian.donchian_channel_hband()),
|
||||||
|
('donchian_lband', donchian.donchian_channel_lband()),
|
||||||
|
('donchian_mband', donchian.donchian_channel_mband())
|
||||||
|
]
|
||||||
|
|
||||||
|
def calc_keltner(high, low, close):
|
||||||
|
from ta.volatility import KeltnerChannel
|
||||||
|
keltner = KeltnerChannel(high, low, close, window=20)
|
||||||
|
return [
|
||||||
|
('keltner_hband', keltner.keltner_channel_hband()),
|
||||||
|
('keltner_lband', keltner.keltner_channel_lband()),
|
||||||
|
('keltner_mband', keltner.keltner_channel_mband())
|
||||||
|
]
|
||||||
|
|
||||||
|
def calc_dpo(close):
|
||||||
|
from ta.trend import DPOIndicator
|
||||||
|
return ('dpo_20', DPOIndicator(close, window=20).dpo())
|
||||||
|
|
||||||
|
def calc_ultimate(high, low, close):
|
||||||
|
from ta.momentum import UltimateOscillator
|
||||||
|
return ('ultimate_osc', UltimateOscillator(high, low, close).ultimate_oscillator())
|
||||||
|
|
||||||
|
def calc_ichimoku(high, low):
|
||||||
|
from ta.trend import IchimokuIndicator
|
||||||
|
ichimoku = IchimokuIndicator(high, low, window1=9, window2=26, window3=52)
|
||||||
|
return [
|
||||||
|
('ichimoku_a', ichimoku.ichimoku_a()),
|
||||||
|
('ichimoku_b', ichimoku.ichimoku_b()),
|
||||||
|
('ichimoku_base_line', ichimoku.ichimoku_base_line()),
|
||||||
|
('ichimoku_conversion_line', ichimoku.ichimoku_conversion_line())
|
||||||
|
]
|
||||||
|
|
||||||
|
def calc_elder_ray(close, low, high):
|
||||||
|
from ta.trend import EMAIndicator
|
||||||
|
ema = EMAIndicator(close, window=13).ema_indicator()
|
||||||
|
return [
|
||||||
|
('elder_ray_bull', ema - low),
|
||||||
|
('elder_ray_bear', ema - high)
|
||||||
|
]
|
||||||
|
|
||||||
|
def calc_daily_return(close):
|
||||||
|
from ta.others import DailyReturnIndicator
|
||||||
|
return ('daily_return', DailyReturnIndicator(close).daily_return())
|
||||||
|
|
||||||
|
@njit
|
||||||
|
def fast_psar(high, low, close, af=0.02, max_af=0.2):
|
||||||
|
length = len(close)
|
||||||
|
psar = np.zeros(length)
|
||||||
|
bull = True
|
||||||
|
af_step = af
|
||||||
|
ep = low[0]
|
||||||
|
psar[0] = low[0]
|
||||||
|
for i in range(1, length):
|
||||||
|
prev_psar = psar[i-1]
|
||||||
|
if bull:
|
||||||
|
psar[i] = prev_psar + af_step * (ep - prev_psar)
|
||||||
|
if low[i] < psar[i]:
|
||||||
|
bull = False
|
||||||
|
psar[i] = ep
|
||||||
|
af_step = af
|
||||||
|
ep = low[i]
|
||||||
|
else:
|
||||||
|
if high[i] > ep:
|
||||||
|
ep = high[i]
|
||||||
|
af_step = min(af_step + af, max_af)
|
||||||
|
else:
|
||||||
|
psar[i] = prev_psar + af_step * (ep - prev_psar)
|
||||||
|
if high[i] > psar[i]:
|
||||||
|
bull = True
|
||||||
|
psar[i] = ep
|
||||||
|
af_step = af
|
||||||
|
ep = high[i]
|
||||||
|
else:
|
||||||
|
if low[i] < ep:
|
||||||
|
ep = low[i]
|
||||||
|
af_step = min(af_step + af, max_af)
|
||||||
|
return psar
|
||||||
|
|
||||||
|
def compute_lag(df, col, lag):
|
||||||
|
return df[col].shift(lag)
|
||||||
|
|
||||||
|
def compute_rolling(df, col, stat, window):
|
||||||
|
if stat == 'mean':
|
||||||
|
return df[col].rolling(window).mean()
|
||||||
|
elif stat == 'std':
|
||||||
|
return df[col].rolling(window).std()
|
||||||
|
elif stat == 'min':
|
||||||
|
return df[col].rolling(window).min()
|
||||||
|
elif stat == 'max':
|
||||||
|
return df[col].rolling(window).max()
|
||||||
|
|
||||||
|
def compute_log_return(df, horizon):
|
||||||
|
return np.log(df['Close'] / df['Close'].shift(horizon))
|
||||||
|
|
||||||
|
def compute_volatility(df, window):
|
||||||
|
return df['log_return'].rolling(window).std()
|
||||||
|
|
||||||
|
def run_feature_job(job, df):
|
||||||
|
feature_name, func, *args = job
|
||||||
|
print(f'Computing feature: {feature_name}')
|
||||||
|
result = func(df, *args)
|
||||||
|
return feature_name, result
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
csv_path = './data/btcusd_1-min_data.csv'
|
||||||
|
csv_prefix = os.path.splitext(os.path.basename(csv_path))[0]
|
||||||
|
|
||||||
|
print('Reading CSV and filtering data...')
|
||||||
|
df = pd.read_csv(csv_path)
|
||||||
|
df = df[df['Volume'] != 0]
|
||||||
|
|
||||||
|
min_date = '2017-06-01'
|
||||||
|
print('Converting Timestamp and filtering by date...')
|
||||||
|
df['Timestamp'] = pd.to_datetime(df['Timestamp'], unit='s')
|
||||||
|
df = df[df['Timestamp'] >= min_date]
|
||||||
|
|
||||||
|
lags = 3
|
||||||
|
|
||||||
|
print('Calculating log returns as the new target...')
|
||||||
|
df['log_return'] = np.log(df['Close'] / df['Close'].shift(1))
|
||||||
|
|
||||||
|
ohlcv_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
|
||||||
|
window_sizes = [5, 15, 30] # in minutes, adjust as needed
|
||||||
|
|
||||||
|
features_dict = {}
|
||||||
|
|
||||||
|
print('Starting feature computation...')
|
||||||
|
feature_start_time = time.time()
|
||||||
|
|
||||||
|
# --- Technical Indicator Features: Calculate or Load from Cache ---
|
||||||
|
print('Calculating or loading technical indicator features...')
|
||||||
|
# RSI
|
||||||
|
feature_file = f'./data/{csv_prefix}_rsi.npy'
|
||||||
|
if os.path.exists(feature_file):
|
||||||
|
print(f'A Loading cached feature: {feature_file}')
|
||||||
|
arr = np.load(feature_file)
|
||||||
|
features_dict['rsi'] = pd.Series(arr, index=df.index)
|
||||||
|
else:
|
||||||
|
print('Calculating feature: rsi')
|
||||||
|
_, values = calc_rsi(df['Close'])
|
||||||
|
features_dict['rsi'] = values
|
||||||
|
np.save(feature_file, values.values)
|
||||||
|
print(f'Saved feature: {feature_file}')
|
||||||
|
|
||||||
|
# MACD
|
||||||
|
feature_file = f'./data/{csv_prefix}_macd.npy'
|
||||||
|
if os.path.exists(feature_file):
|
||||||
|
print(f'A Loading cached feature: {feature_file}')
|
||||||
|
arr = np.load(feature_file)
|
||||||
|
features_dict['macd'] = pd.Series(arr, index=df.index)
|
||||||
|
else:
|
||||||
|
print('Calculating feature: macd')
|
||||||
|
_, values = calc_macd(df['Close'])
|
||||||
|
features_dict['macd'] = values
|
||||||
|
np.save(feature_file, values.values)
|
||||||
|
print(f'Saved feature: {feature_file}')
|
||||||
|
|
||||||
|
# ATR
|
||||||
|
feature_file = f'./data/{csv_prefix}_atr.npy'
|
||||||
|
if os.path.exists(feature_file):
|
||||||
|
print(f'A Loading cached feature: {feature_file}')
|
||||||
|
arr = np.load(feature_file)
|
||||||
|
features_dict['atr'] = pd.Series(arr, index=df.index)
|
||||||
|
else:
|
||||||
|
print('Calculating feature: atr')
|
||||||
|
_, values = calc_atr(df['High'], df['Low'], df['Close'])
|
||||||
|
features_dict['atr'] = values
|
||||||
|
np.save(feature_file, values.values)
|
||||||
|
print(f'Saved feature: {feature_file}')
|
||||||
|
|
||||||
|
# CCI
|
||||||
|
feature_file = f'./data/{csv_prefix}_cci.npy'
|
||||||
|
if os.path.exists(feature_file):
|
||||||
|
print(f'A Loading cached feature: {feature_file}')
|
||||||
|
arr = np.load(feature_file)
|
||||||
|
features_dict['cci'] = pd.Series(arr, index=df.index)
|
||||||
|
else:
|
||||||
|
print('Calculating feature: cci')
|
||||||
|
_, values = calc_cci(df['High'], df['Low'], df['Close'])
|
||||||
|
features_dict['cci'] = values
|
||||||
|
np.save(feature_file, values.values)
|
||||||
|
print(f'Saved feature: {feature_file}')
|
||||||
|
|
||||||
|
# Williams %R
|
||||||
|
feature_file = f'./data/{csv_prefix}_williams_r.npy'
|
||||||
|
if os.path.exists(feature_file):
|
||||||
|
print(f'A Loading cached feature: {feature_file}')
|
||||||
|
arr = np.load(feature_file)
|
||||||
|
features_dict['williams_r'] = pd.Series(arr, index=df.index)
|
||||||
|
else:
|
||||||
|
print('Calculating feature: williams_r')
|
||||||
|
_, values = calc_williamsr(df['High'], df['Low'], df['Close'])
|
||||||
|
features_dict['williams_r'] = values
|
||||||
|
np.save(feature_file, values.values)
|
||||||
|
print(f'Saved feature: {feature_file}')
|
||||||
|
|
||||||
|
# EMA 14
|
||||||
|
feature_file = f'./data/{csv_prefix}_ema_14.npy'
|
||||||
|
if os.path.exists(feature_file):
|
||||||
|
print(f'A Loading cached feature: {feature_file}')
|
||||||
|
arr = np.load(feature_file)
|
||||||
|
features_dict['ema_14'] = pd.Series(arr, index=df.index)
|
||||||
|
else:
|
||||||
|
print('Calculating feature: ema_14')
|
||||||
|
_, values = calc_ema(df['Close'])
|
||||||
|
features_dict['ema_14'] = values
|
||||||
|
np.save(feature_file, values.values)
|
||||||
|
print(f'Saved feature: {feature_file}')
|
||||||
|
|
||||||
|
# OBV
|
||||||
|
feature_file = f'./data/{csv_prefix}_obv.npy'
|
||||||
|
if os.path.exists(feature_file):
|
||||||
|
print(f'A Loading cached feature: {feature_file}')
|
||||||
|
arr = np.load(feature_file)
|
||||||
|
features_dict['obv'] = pd.Series(arr, index=df.index)
|
||||||
|
else:
|
||||||
|
print('Calculating feature: obv')
|
||||||
|
_, values = calc_obv(df['Close'], df['Volume'])
|
||||||
|
features_dict['obv'] = values
|
||||||
|
np.save(feature_file, values.values)
|
||||||
|
print(f'Saved feature: {feature_file}')
|
||||||
|
|
||||||
|
# CMF
|
||||||
|
feature_file = f'./data/{csv_prefix}_cmf.npy'
|
||||||
|
if os.path.exists(feature_file):
|
||||||
|
print(f'A Loading cached feature: {feature_file}')
|
||||||
|
arr = np.load(feature_file)
|
||||||
|
features_dict['cmf'] = pd.Series(arr, index=df.index)
|
||||||
|
else:
|
||||||
|
print('Calculating feature: cmf')
|
||||||
|
_, values = calc_cmf(df['High'], df['Low'], df['Close'], df['Volume'])
|
||||||
|
features_dict['cmf'] = values
|
||||||
|
np.save(feature_file, values.values)
|
||||||
|
print(f'Saved feature: {feature_file}')
|
||||||
|
|
||||||
|
# ROC 10
|
||||||
|
feature_file = f'./data/{csv_prefix}_roc_10.npy'
|
||||||
|
if os.path.exists(feature_file):
|
||||||
|
print(f'A Loading cached feature: {feature_file}')
|
||||||
|
arr = np.load(feature_file)
|
||||||
|
features_dict['roc_10'] = pd.Series(arr, index=df.index)
|
||||||
|
else:
|
||||||
|
print('Calculating feature: roc_10')
|
||||||
|
_, values = calc_roc(df['Close'])
|
||||||
|
features_dict['roc_10'] = values
|
||||||
|
np.save(feature_file, values.values)
|
||||||
|
print(f'Saved feature: {feature_file}')
|
||||||
|
|
||||||
|
# DPO 20
|
||||||
|
feature_file = f'./data/{csv_prefix}_dpo_20.npy'
|
||||||
|
if os.path.exists(feature_file):
|
||||||
|
print(f'A Loading cached feature: {feature_file}')
|
||||||
|
arr = np.load(feature_file)
|
||||||
|
features_dict['dpo_20'] = pd.Series(arr, index=df.index)
|
||||||
|
else:
|
||||||
|
print('Calculating feature: dpo_20')
|
||||||
|
_, values = calc_dpo(df['Close'])
|
||||||
|
features_dict['dpo_20'] = values
|
||||||
|
np.save(feature_file, values.values)
|
||||||
|
print(f'Saved feature: {feature_file}')
|
||||||
|
|
||||||
|
# Ultimate Oscillator
|
||||||
|
feature_file = f'./data/{csv_prefix}_ultimate_osc.npy'
|
||||||
|
if os.path.exists(feature_file):
|
||||||
|
print(f'A Loading cached feature: {feature_file}')
|
||||||
|
arr = np.load(feature_file)
|
||||||
|
features_dict['ultimate_osc'] = pd.Series(arr, index=df.index)
|
||||||
|
else:
|
||||||
|
print('Calculating feature: ultimate_osc')
|
||||||
|
_, values = calc_ultimate(df['High'], df['Low'], df['Close'])
|
||||||
|
features_dict['ultimate_osc'] = values
|
||||||
|
np.save(feature_file, values.values)
|
||||||
|
print(f'Saved feature: {feature_file}')
|
||||||
|
|
||||||
|
# Daily Return
|
||||||
|
feature_file = f'./data/{csv_prefix}_daily_return.npy'
|
||||||
|
if os.path.exists(feature_file):
|
||||||
|
print(f'A Loading cached feature: {feature_file}')
|
||||||
|
arr = np.load(feature_file)
|
||||||
|
features_dict['daily_return'] = pd.Series(arr, index=df.index)
|
||||||
|
else:
|
||||||
|
print('Calculating feature: daily_return')
|
||||||
|
_, values = calc_daily_return(df['Close'])
|
||||||
|
features_dict['daily_return'] = values
|
||||||
|
np.save(feature_file, values.values)
|
||||||
|
print(f'Saved feature: {feature_file}')
|
||||||
|
|
||||||
|
# Multi-column indicators
|
||||||
|
# Bollinger Bands
|
||||||
|
print('Calculating multi-column indicator: bollinger')
|
||||||
|
result = calc_bollinger(df['Close'])
|
||||||
|
for subname, values in result:
|
||||||
|
print(f"Adding subfeature: {subname}")
|
||||||
|
sub_feature_file = f'./data/{csv_prefix}_{subname}.npy'
|
||||||
|
if os.path.exists(sub_feature_file):
|
||||||
|
print(f'B Loading cached feature: {sub_feature_file}')
|
||||||
|
arr = np.load(sub_feature_file)
|
||||||
|
features_dict[subname] = pd.Series(arr, index=df.index)
|
||||||
|
else:
|
||||||
|
features_dict[subname] = values
|
||||||
|
np.save(sub_feature_file, values.values)
|
||||||
|
print(f'Saved feature: {sub_feature_file}')
|
||||||
|
|
||||||
|
# Stochastic Oscillator
|
||||||
|
print('Calculating multi-column indicator: stochastic')
|
||||||
|
result = calc_stochastic(df['High'], df['Low'], df['Close'])
|
||||||
|
for subname, values in result:
|
||||||
|
print(f"Adding subfeature: {subname}")
|
||||||
|
sub_feature_file = f'./data/{csv_prefix}_{subname}.npy'
|
||||||
|
if os.path.exists(sub_feature_file):
|
||||||
|
print(f'B Loading cached feature: {sub_feature_file}')
|
||||||
|
arr = np.load(sub_feature_file)
|
||||||
|
features_dict[subname] = pd.Series(arr, index=df.index)
|
||||||
|
else:
|
||||||
|
features_dict[subname] = values
|
||||||
|
np.save(sub_feature_file, values.values)
|
||||||
|
print(f'Saved feature: {sub_feature_file}')
|
||||||
|
|
||||||
|
# SMA
|
||||||
|
print('Calculating multi-column indicator: sma')
|
||||||
|
result = calc_sma(df['Close'])
|
||||||
|
for subname, values in result:
|
||||||
|
print(f"Adding subfeature: {subname}")
|
||||||
|
sub_feature_file = f'./data/{csv_prefix}_{subname}.npy'
|
||||||
|
if os.path.exists(sub_feature_file):
|
||||||
|
print(f'B Loading cached feature: {sub_feature_file}')
|
||||||
|
arr = np.load(sub_feature_file)
|
||||||
|
features_dict[subname] = pd.Series(arr, index=df.index)
|
||||||
|
else:
|
||||||
|
features_dict[subname] = values
|
||||||
|
np.save(sub_feature_file, values.values)
|
||||||
|
print(f'Saved feature: {sub_feature_file}')
|
||||||
|
|
||||||
|
# PSAR
|
||||||
|
print('Calculating multi-column indicator: psar')
|
||||||
|
result = calc_psar(df['High'], df['Low'], df['Close'])
|
||||||
|
for subname, values in result:
|
||||||
|
print(f"Adding subfeature: {subname}")
|
||||||
|
sub_feature_file = f'./data/{csv_prefix}_{subname}.npy'
|
||||||
|
if os.path.exists(sub_feature_file):
|
||||||
|
print(f'B Loading cached feature: {sub_feature_file}')
|
||||||
|
arr = np.load(sub_feature_file)
|
||||||
|
features_dict[subname] = pd.Series(arr, index=df.index)
|
||||||
|
else:
|
||||||
|
features_dict[subname] = values
|
||||||
|
np.save(sub_feature_file, values.values)
|
||||||
|
print(f'Saved feature: {sub_feature_file}')
|
||||||
|
|
||||||
|
# Donchian Channel
|
||||||
|
print('Calculating multi-column indicator: donchian')
|
||||||
|
result = calc_donchian(df['High'], df['Low'], df['Close'])
|
||||||
|
for subname, values in result:
|
||||||
|
print(f"Adding subfeature: {subname}")
|
||||||
|
sub_feature_file = f'./data/{csv_prefix}_{subname}.npy'
|
||||||
|
if os.path.exists(sub_feature_file):
|
||||||
|
print(f'B Loading cached feature: {sub_feature_file}')
|
||||||
|
arr = np.load(sub_feature_file)
|
||||||
|
features_dict[subname] = pd.Series(arr, index=df.index)
|
||||||
|
else:
|
||||||
|
features_dict[subname] = values
|
||||||
|
np.save(sub_feature_file, values.values)
|
||||||
|
print(f'Saved feature: {sub_feature_file}')
|
||||||
|
|
||||||
|
# Keltner Channel
|
||||||
|
print('Calculating multi-column indicator: keltner')
|
||||||
|
result = calc_keltner(df['High'], df['Low'], df['Close'])
|
||||||
|
for subname, values in result:
|
||||||
|
print(f"Adding subfeature: {subname}")
|
||||||
|
sub_feature_file = f'./data/{csv_prefix}_{subname}.npy'
|
||||||
|
if os.path.exists(sub_feature_file):
|
||||||
|
print(f'B Loading cached feature: {sub_feature_file}')
|
||||||
|
arr = np.load(sub_feature_file)
|
||||||
|
features_dict[subname] = pd.Series(arr, index=df.index)
|
||||||
|
else:
|
||||||
|
features_dict[subname] = values
|
||||||
|
np.save(sub_feature_file, values.values)
|
||||||
|
print(f'Saved feature: {sub_feature_file}')
|
||||||
|
|
||||||
|
# Ichimoku
|
||||||
|
print('Calculating multi-column indicator: ichimoku')
|
||||||
|
result = calc_ichimoku(df['High'], df['Low'])
|
||||||
|
for subname, values in result:
|
||||||
|
print(f"Adding subfeature: {subname}")
|
||||||
|
sub_feature_file = f'./data/{csv_prefix}_{subname}.npy'
|
||||||
|
if os.path.exists(sub_feature_file):
|
||||||
|
print(f'B Loading cached feature: {sub_feature_file}')
|
||||||
|
arr = np.load(sub_feature_file)
|
||||||
|
features_dict[subname] = pd.Series(arr, index=df.index)
|
||||||
|
else:
|
||||||
|
features_dict[subname] = values
|
||||||
|
np.save(sub_feature_file, values.values)
|
||||||
|
print(f'Saved feature: {sub_feature_file}')
|
||||||
|
|
||||||
|
# Elder Ray
|
||||||
|
print('Calculating multi-column indicator: elder_ray')
|
||||||
|
result = calc_elder_ray(df['Close'], df['Low'], df['High'])
|
||||||
|
for subname, values in result:
|
||||||
|
print(f"Adding subfeature: {subname}")
|
||||||
|
sub_feature_file = f'./data/{csv_prefix}_{subname}.npy'
|
||||||
|
if os.path.exists(sub_feature_file):
|
||||||
|
print(f'B Loading cached feature: {sub_feature_file}')
|
||||||
|
arr = np.load(sub_feature_file)
|
||||||
|
features_dict[subname] = pd.Series(arr, index=df.index)
|
||||||
|
else:
|
||||||
|
features_dict[subname] = values
|
||||||
|
np.save(sub_feature_file, values.values)
|
||||||
|
print(f'Saved feature: {sub_feature_file}')
|
||||||
|
|
||||||
|
# Prepare jobs for lags, rolling stats, log returns, and volatility
|
||||||
|
feature_jobs = []
|
||||||
|
# Lags
|
||||||
|
for col in ohlcv_cols:
|
||||||
|
for lag in range(1, lags + 1):
|
||||||
|
feature_name = f'{col}_lag{lag}'
|
||||||
|
feature_file = f'./data/{csv_prefix}_{feature_name}.npy'
|
||||||
|
if os.path.exists(feature_file):
|
||||||
|
print(f'C Loading cached feature: {feature_file}')
|
||||||
|
features_dict[feature_name] = np.load(feature_file)
|
||||||
|
else:
|
||||||
|
print(f'Adding lag feature job: {feature_name}')
|
||||||
|
feature_jobs.append((feature_name, compute_lag, col, lag))
|
||||||
|
# Rolling statistics
|
||||||
|
for col in ohlcv_cols:
|
||||||
|
for window in window_sizes:
|
||||||
|
if (col == 'Open' and window == 5):
|
||||||
|
continue
|
||||||
|
if (col == 'High' and window == 5):
|
||||||
|
continue
|
||||||
|
if (col == 'High' and window == 30):
|
||||||
|
continue
|
||||||
|
if (col == 'Low' and window == 15):
|
||||||
|
continue
|
||||||
|
for stat in ['mean', 'std', 'min', 'max']:
|
||||||
|
feature_name = f'{col}_roll_{stat}_{window}'
|
||||||
|
feature_file = f'./data/{csv_prefix}_{feature_name}.npy'
|
||||||
|
if os.path.exists(feature_file):
|
||||||
|
print(f'D Loading cached feature: {feature_file}')
|
||||||
|
features_dict[feature_name] = np.load(feature_file)
|
||||||
|
else:
|
||||||
|
print(f'Adding rolling stat feature job: {feature_name}')
|
||||||
|
feature_jobs.append((feature_name, compute_rolling, col, stat, window))
|
||||||
|
# Log returns for different horizons
|
||||||
|
for horizon in [5, 15, 30]:
|
||||||
|
feature_name = f'log_return_{horizon}'
|
||||||
|
feature_file = f'./data/{csv_prefix}_{feature_name}.npy'
|
||||||
|
if os.path.exists(feature_file):
|
||||||
|
print(f'E Loading cached feature: {feature_file}')
|
||||||
|
features_dict[feature_name] = np.load(feature_file)
|
||||||
|
else:
|
||||||
|
print(f'Adding log return feature job: {feature_name}')
|
||||||
|
feature_jobs.append((feature_name, compute_log_return, horizon))
|
||||||
|
# Volatility
|
||||||
|
for window in window_sizes:
|
||||||
|
feature_name = f'volatility_{window}'
|
||||||
|
feature_file = f'./data/{csv_prefix}_{feature_name}.npy'
|
||||||
|
if os.path.exists(feature_file):
|
||||||
|
print(f'F Loading cached feature: {feature_file}')
|
||||||
|
features_dict[feature_name] = np.load(feature_file)
|
||||||
|
else:
|
||||||
|
print(f'Adding volatility feature job: {feature_name}')
|
||||||
|
feature_jobs.append((feature_name, compute_volatility, window))
|
||||||
|
|
||||||
|
# Sequential computation for all non-cached features
|
||||||
|
if feature_jobs:
|
||||||
|
print(f'Computing {len(feature_jobs)} features sequentially...')
|
||||||
|
for job in feature_jobs:
|
||||||
|
print(f'Computing feature job: {job[0]}')
|
||||||
|
feature_name, result = run_feature_job(job, df)
|
||||||
|
features_dict[feature_name] = result
|
||||||
|
feature_file = f'./data/{csv_prefix}_{feature_name}.npy'
|
||||||
|
np.save(feature_file, result.values)
|
||||||
|
print(f'Saved computed feature: {feature_file}')
|
||||||
|
print('All features computed.')
|
||||||
|
else:
|
||||||
|
print('All features loaded from cache.')
|
||||||
|
|
||||||
|
# Concatenate all new features at once
|
||||||
|
print('Concatenating all new features to DataFrame...')
|
||||||
|
features_df = pd.DataFrame(features_dict)
|
||||||
|
print("Columns in features_df:", features_df.columns.tolist())
|
||||||
|
print("All-NaN columns in features_df:", features_df.columns[features_df.isna().all()].tolist())
|
||||||
|
df = pd.concat([df, features_df], axis=1)
|
||||||
|
|
||||||
|
# Print all columns after concatenation
|
||||||
|
print("All columns in df after concat:", df.columns.tolist())
|
||||||
|
|
||||||
|
# Downcast all float columns to save memory
|
||||||
|
print('Downcasting float columns to save memory...')
|
||||||
|
for col in df.columns:
|
||||||
|
try:
|
||||||
|
df[col] = pd.to_numeric(df[col], downcast='float')
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Drop intermediate features_df to free memory
|
||||||
|
print('Dropping intermediate features_df to free memory...')
|
||||||
|
del features_df
|
||||||
|
import gc
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
feature_end_time = time.time()
|
||||||
|
print(f'Feature computation completed in {feature_end_time - feature_start_time:.2f} seconds.')
|
||||||
|
|
||||||
|
# Add Supertrend indicators (custom)
|
||||||
|
print('Preparing data for Supertrend calculation...')
|
||||||
|
st_df = df.rename(columns={'High': 'high', 'Low': 'low', 'Close': 'close'})
|
||||||
|
|
||||||
|
print('Calculating Supertrend indicators...')
|
||||||
|
supertrend = Supertrends(st_df)
|
||||||
|
st_results = supertrend.calculate_supertrend_indicators()
|
||||||
|
for idx, st in enumerate(st_results):
|
||||||
|
period = st['params']['period']
|
||||||
|
multiplier = st['params']['multiplier']
|
||||||
|
# Skip useless supertrend features
|
||||||
|
if (period == 10 and multiplier == 1.0) or (period == 11 and multiplier == 2.0):
|
||||||
|
continue
|
||||||
|
print(f'Adding Supertrend features: supertrend_{period}_{multiplier} and supertrend_trend_{period}_{multiplier}')
|
||||||
|
df[f'supertrend_{period}_{multiplier}'] = st['results']['supertrend']
|
||||||
|
df[f'supertrend_trend_{period}_{multiplier}'] = st['results']['trend']
|
||||||
|
|
||||||
|
# Add time features (exclude 'dayofweek')
|
||||||
|
print('Adding hour feature...')
|
||||||
|
df['Timestamp'] = pd.to_datetime(df['Timestamp'], errors='coerce')
|
||||||
|
df['hour'] = df['Timestamp'].dt.hour
|
||||||
|
|
||||||
|
# Drop NaNs after all feature engineering
|
||||||
|
print('Dropping NaNs after feature engineering...')
|
||||||
|
df = df.dropna().reset_index(drop=True)
|
||||||
|
|
||||||
|
# Exclude 'Timestamp', 'Close', 'log_return', and any future target columns from features
|
||||||
|
print('Selecting feature columns...')
|
||||||
|
exclude_cols = ['Timestamp', 'Close', 'log_return', 'log_return_5', 'log_return_15', 'log_return_30']
|
||||||
|
feature_cols = [col for col in df.columns if col not in exclude_cols]
|
||||||
|
|
||||||
|
# Print the features used for training
|
||||||
|
print("Features used for training:", feature_cols)
|
||||||
|
|
||||||
|
# Drop excluded columns to save memory
|
||||||
|
print('Dropping excluded columns to save memory...')
|
||||||
|
df = df[feature_cols + ['log_return', 'Timestamp']]
|
||||||
|
|
||||||
|
print('Preparing X and y...')
|
||||||
|
X = df[feature_cols].values.astype(np.float32)
|
||||||
|
y = df['log_return'].values.astype(np.float32)
|
||||||
|
|
||||||
|
split_idx = int(len(X) * 0.8)
|
||||||
|
print(f'Splitting data: {split_idx} train, {len(X) - split_idx} test')
|
||||||
|
X_train, X_test = X[:split_idx], X[split_idx:]
|
||||||
|
y_train, y_test = y[:split_idx], y[split_idx:]
|
||||||
|
test_timestamps = df['Timestamp'].values[split_idx:]
|
||||||
|
|
||||||
|
print('Initializing model...')
|
||||||
|
model = CustomXGBoostGPU(X_train, X_test, y_train, y_test)
|
||||||
|
|
||||||
|
print('Training model...')
|
||||||
|
booster = model.train()
|
||||||
|
|
||||||
|
print('Training complete.')
|
||||||
|
|
||||||
|
# Save the trained model
|
||||||
|
model.save_model('./data/xgboost_model.json')
|
||||||
|
print('Model saved to ./data/xgboost_model.json')
|
||||||
|
|
||||||
|
if hasattr(model, 'params'):
|
||||||
|
print("Model hyperparameters:", model.params)
|
||||||
|
if hasattr(model, 'model') and hasattr(model.model, 'get_score'):
|
||||||
|
import operator
|
||||||
|
importances = model.model.get_score(importance_type='weight')
|
||||||
|
# Map f0, f1, ... to actual feature names
|
||||||
|
feature_map = {f"f{idx}": name for idx, name in enumerate(feature_cols)}
|
||||||
|
sorted_importances = sorted(importances.items(), key=operator.itemgetter(1), reverse=True)
|
||||||
|
print('Feature importances (sorted, with names):')
|
||||||
|
for feat, score in sorted_importances:
|
||||||
|
print(f'{feature_map.get(feat, feat)}: {score}')
|
||||||
|
|
||||||
|
print('Making predictions for first 5 test samples...')
|
||||||
|
preds = model.predict(X_test[:5])
|
||||||
|
print('Predictions for first 5 test samples:', preds)
|
||||||
|
print('Actual values for first 5 test samples:', y_test[:5])
|
||||||
|
|
||||||
|
print('Making predictions for all test samples...')
|
||||||
|
test_preds = model.predict(X_test)
|
||||||
|
rmse = np.sqrt(mean_squared_error(y_test, test_preds))
|
||||||
|
print(f'RMSE on test set: {rmse:.4f}')
|
||||||
|
|
||||||
|
print('Saving y_test and test_preds to disk...')
|
||||||
|
np.save('./data/y_test.npy', y_test)
|
||||||
|
np.save('./data/test_preds.npy', test_preds)
|
||||||
|
|
||||||
|
# Reconstruct price series from log returns
|
||||||
|
print('Reconstructing price series from log returns...')
|
||||||
|
# Get the last available Close price before the test set
|
||||||
|
# The DataFrame df has been reset, so use split_idx to get the right row
|
||||||
|
if 'Close' in df.columns:
|
||||||
|
close_prices = df['Close'].values
|
||||||
|
else:
|
||||||
|
# Reload original CSV to get Close prices if not present
|
||||||
|
close_prices = pd.read_csv(csv_path)['Close'].values
|
||||||
|
start_price = close_prices[split_idx] # This is the price at the split point
|
||||||
|
# Actual prices
|
||||||
|
actual_prices = [start_price]
|
||||||
|
for r in y_test:
|
||||||
|
actual_prices.append(actual_prices[-1] * np.exp(r))
|
||||||
|
actual_prices = np.array(actual_prices[1:])
|
||||||
|
# Predicted prices
|
||||||
|
predicted_prices = [start_price]
|
||||||
|
for r in test_preds:
|
||||||
|
predicted_prices.append(predicted_prices[-1] * np.exp(r))
|
||||||
|
predicted_prices = np.array(predicted_prices[1:])
|
||||||
|
|
||||||
|
print('Plotting predicted vs actual prices...')
|
||||||
|
plot_predicted_vs_actual_prices(actual_prices, predicted_prices, test_timestamps)
|
||||||
|
|
||||||
|
print("Final features used for training:", feature_cols)
|
||||||
|
|
||||||
|
print("Shape of X:", X.shape)
|
||||||
|
print("First row of X:", X[0])
|
||||||
|
print("stoch_k in feature_cols?", "stoch_k" in feature_cols)
|
||||||
|
if "stoch_k" in feature_cols:
|
||||||
|
idx = feature_cols.index("stoch_k")
|
||||||
|
print("First 10 values of stoch_k:", X[:10, idx])
|
||||||
169
xgboost/plot_results.py
Normal file
169
xgboost/plot_results.py
Normal file
@@ -0,0 +1,169 @@
|
|||||||
|
import numpy as np
|
||||||
|
import dash
|
||||||
|
from dash import dcc, html
|
||||||
|
import plotly.graph_objs as go
|
||||||
|
import threading
|
||||||
|
|
||||||
|
|
||||||
|
def display_actual_vs_predicted(y_test, test_preds, timestamps, n_plot=200):
|
||||||
|
import plotly.offline as pyo
|
||||||
|
n_plot = min(n_plot, len(y_test))
|
||||||
|
plot_indices = timestamps[:n_plot]
|
||||||
|
actual = y_test[:n_plot]
|
||||||
|
predicted = test_preds[:n_plot]
|
||||||
|
|
||||||
|
trace_actual = go.Scatter(x=plot_indices, y=actual, mode='lines', name='Actual')
|
||||||
|
trace_predicted = go.Scatter(x=plot_indices, y=predicted, mode='lines', name='Predicted')
|
||||||
|
data = [trace_actual, trace_predicted]
|
||||||
|
layout = go.Layout(
|
||||||
|
title='Actual vs. Predicted BTC Close Prices (Test Set)',
|
||||||
|
xaxis={'title': 'Timestamp'},
|
||||||
|
yaxis={'title': 'BTC Close Price'},
|
||||||
|
legend={'x': 0, 'y': 1},
|
||||||
|
margin={'l': 40, 'b': 40, 't': 40, 'r': 10},
|
||||||
|
hovermode='closest'
|
||||||
|
)
|
||||||
|
fig = go.Figure(data=data, layout=layout)
|
||||||
|
pyo.plot(fig)
|
||||||
|
|
||||||
|
def plot_target_distribution(y_train, y_test):
|
||||||
|
import plotly.offline as pyo
|
||||||
|
trace_train = go.Histogram(
|
||||||
|
x=y_train,
|
||||||
|
nbinsx=100,
|
||||||
|
opacity=0.5,
|
||||||
|
name='Train',
|
||||||
|
marker=dict(color='blue')
|
||||||
|
)
|
||||||
|
trace_test = go.Histogram(
|
||||||
|
x=y_test,
|
||||||
|
nbinsx=100,
|
||||||
|
opacity=0.5,
|
||||||
|
name='Test',
|
||||||
|
marker=dict(color='orange')
|
||||||
|
)
|
||||||
|
data = [trace_train, trace_test]
|
||||||
|
layout = go.Layout(
|
||||||
|
title='Distribution of Target Variable (Close Price)',
|
||||||
|
xaxis=dict(title='BTC Close Price'),
|
||||||
|
yaxis=dict(title='Frequency'),
|
||||||
|
barmode='overlay'
|
||||||
|
)
|
||||||
|
fig = go.Figure(data=data, layout=layout)
|
||||||
|
pyo.plot(fig)
|
||||||
|
|
||||||
|
def plot_predicted_vs_actual_log_returns(y_test, test_preds, timestamps=None, n_plot=200):
|
||||||
|
import plotly.offline as pyo
|
||||||
|
import plotly.graph_objs as go
|
||||||
|
n_plot = min(n_plot, len(y_test))
|
||||||
|
actual = y_test[:n_plot]
|
||||||
|
predicted = test_preds[:n_plot]
|
||||||
|
if timestamps is not None:
|
||||||
|
x_axis = timestamps[:n_plot]
|
||||||
|
x_label = 'Timestamp'
|
||||||
|
else:
|
||||||
|
x_axis = list(range(n_plot))
|
||||||
|
x_label = 'Index'
|
||||||
|
|
||||||
|
# Line plot: Actual vs Predicted over time
|
||||||
|
trace_actual = go.Scatter(x=x_axis, y=actual, mode='lines', name='Actual')
|
||||||
|
trace_predicted = go.Scatter(x=x_axis, y=predicted, mode='lines', name='Predicted')
|
||||||
|
data_line = [trace_actual, trace_predicted]
|
||||||
|
layout_line = go.Layout(
|
||||||
|
title='Actual vs. Predicted Log Returns (Test Set)',
|
||||||
|
xaxis={'title': x_label},
|
||||||
|
yaxis={'title': 'Log Return'},
|
||||||
|
legend={'x': 0, 'y': 1},
|
||||||
|
margin={'l': 40, 'b': 40, 't': 40, 'r': 10},
|
||||||
|
hovermode='closest'
|
||||||
|
)
|
||||||
|
fig_line = go.Figure(data=data_line, layout=layout_line)
|
||||||
|
pyo.plot(fig_line, filename='log_return_line_plot.html')
|
||||||
|
|
||||||
|
# Scatter plot: Predicted vs Actual
|
||||||
|
trace_scatter = go.Scatter(
|
||||||
|
x=actual,
|
||||||
|
y=predicted,
|
||||||
|
mode='markers',
|
||||||
|
name='Predicted vs Actual',
|
||||||
|
opacity=0.5
|
||||||
|
)
|
||||||
|
# Diagonal reference line
|
||||||
|
min_val = min(np.min(actual), np.min(predicted))
|
||||||
|
max_val = max(np.max(actual), np.max(predicted))
|
||||||
|
trace_diag = go.Scatter(
|
||||||
|
x=[min_val, max_val],
|
||||||
|
y=[min_val, max_val],
|
||||||
|
mode='lines',
|
||||||
|
name='Ideal',
|
||||||
|
line=dict(dash='dash', color='red')
|
||||||
|
)
|
||||||
|
data_scatter = [trace_scatter, trace_diag]
|
||||||
|
layout_scatter = go.Layout(
|
||||||
|
title='Predicted vs Actual Log Returns (Scatter)',
|
||||||
|
xaxis={'title': 'Actual Log Return'},
|
||||||
|
yaxis={'title': 'Predicted Log Return'},
|
||||||
|
showlegend=True,
|
||||||
|
margin={'l': 40, 'b': 40, 't': 40, 'r': 10},
|
||||||
|
hovermode='closest'
|
||||||
|
)
|
||||||
|
fig_scatter = go.Figure(data=data_scatter, layout=layout_scatter)
|
||||||
|
pyo.plot(fig_scatter, filename='log_return_scatter_plot.html')
|
||||||
|
|
||||||
|
def plot_predicted_vs_actual_prices(actual_prices, predicted_prices, timestamps=None, n_plot=200):
|
||||||
|
import plotly.offline as pyo
|
||||||
|
import plotly.graph_objs as go
|
||||||
|
n_plot = min(n_plot, len(actual_prices))
|
||||||
|
actual = actual_prices[:n_plot]
|
||||||
|
predicted = predicted_prices[:n_plot]
|
||||||
|
if timestamps is not None:
|
||||||
|
x_axis = timestamps[:n_plot]
|
||||||
|
x_label = 'Timestamp'
|
||||||
|
else:
|
||||||
|
x_axis = list(range(n_plot))
|
||||||
|
x_label = 'Index'
|
||||||
|
|
||||||
|
# Line plot: Actual vs Predicted over time
|
||||||
|
trace_actual = go.Scatter(x=x_axis, y=actual, mode='lines', name='Actual Price')
|
||||||
|
trace_predicted = go.Scatter(x=x_axis, y=predicted, mode='lines', name='Predicted Price')
|
||||||
|
data_line = [trace_actual, trace_predicted]
|
||||||
|
layout_line = go.Layout(
|
||||||
|
title='Actual vs. Predicted BTC Prices (Test Set)',
|
||||||
|
xaxis={'title': x_label},
|
||||||
|
yaxis={'title': 'BTC Price'},
|
||||||
|
legend={'x': 0, 'y': 1},
|
||||||
|
margin={'l': 40, 'b': 40, 't': 40, 'r': 10},
|
||||||
|
hovermode='closest'
|
||||||
|
)
|
||||||
|
fig_line = go.Figure(data=data_line, layout=layout_line)
|
||||||
|
pyo.plot(fig_line, filename='price_line_plot.html')
|
||||||
|
|
||||||
|
# Scatter plot: Predicted vs Actual
|
||||||
|
trace_scatter = go.Scatter(
|
||||||
|
x=actual,
|
||||||
|
y=predicted,
|
||||||
|
mode='markers',
|
||||||
|
name='Predicted vs Actual',
|
||||||
|
opacity=0.5
|
||||||
|
)
|
||||||
|
# Diagonal reference line
|
||||||
|
min_val = min(np.min(actual), np.min(predicted))
|
||||||
|
max_val = max(np.max(actual), np.max(predicted))
|
||||||
|
trace_diag = go.Scatter(
|
||||||
|
x=[min_val, max_val],
|
||||||
|
y=[min_val, max_val],
|
||||||
|
mode='lines',
|
||||||
|
name='Ideal',
|
||||||
|
line=dict(dash='dash', color='red')
|
||||||
|
)
|
||||||
|
data_scatter = [trace_scatter, trace_diag]
|
||||||
|
layout_scatter = go.Layout(
|
||||||
|
title='Predicted vs Actual Prices (Scatter)',
|
||||||
|
xaxis={'title': 'Actual Price'},
|
||||||
|
yaxis={'title': 'Predicted Price'},
|
||||||
|
showlegend=True,
|
||||||
|
margin={'l': 40, 'b': 40, 't': 40, 'r': 10},
|
||||||
|
hovermode='closest'
|
||||||
|
)
|
||||||
|
fig_scatter = go.Figure(data=data_scatter, layout=layout_scatter)
|
||||||
|
pyo.plot(fig_scatter, filename='price_scatter_plot.html')
|
||||||
Reference in New Issue
Block a user