165 lines
6.1 KiB
Python
165 lines
6.1 KiB
Python
|
|
import pandas as pd
|
||
|
|
import numpy as np
|
||
|
|
import strategy_config as config
|
||
|
|
from numba import njit
|
||
|
|
|
||
|
|
# ==============================================================================
|
||
|
|
# --- LOOKBACK WINDOW FOR ROLLING Z-SCORES (PREVENTS LOOK-AHEAD BIAS) ---
|
||
|
|
# ==============================================================================
|
||
|
|
ZSCORE_LOOKBACK = 168 # 1 week in hourly candles
|
||
|
|
MIN_PERIODS_ZSCORE = 24
|
||
|
|
|
||
|
|
@njit
|
||
|
|
def ema_nb(arr, window):
|
||
|
|
alpha = 2.0 / (window + 1.0)
|
||
|
|
ema_arr = np.full_like(arr, np.nan)
|
||
|
|
ema_arr[0] = arr[0]
|
||
|
|
for i in range(1, len(arr)):
|
||
|
|
ema_arr[i] = alpha * arr[i] + (1.0 - alpha) * ema_arr[i-1]
|
||
|
|
return ema_arr
|
||
|
|
|
||
|
|
@njit
|
||
|
|
def atr_nb(high, low, close, window):
|
||
|
|
n = len(close)
|
||
|
|
tr = np.zeros(n)
|
||
|
|
atr = np.full_like(close, np.nan)
|
||
|
|
for i in range(1, n):
|
||
|
|
tr[i] = max(high[i] - low[i], abs(high[i] - close[i-1]),
|
||
|
|
abs(low[i] - close[i-1]))
|
||
|
|
tr_series = tr[1:window+1]
|
||
|
|
atr[window] = np.mean(tr_series)
|
||
|
|
for i in range(window + 1, n):
|
||
|
|
atr[i] = (atr[i-1] * (window - 1) + tr[i]) / window
|
||
|
|
return atr
|
||
|
|
|
||
|
|
def _add_price_action_features(df: pd.DataFrame) -> pd.DataFrame:
|
||
|
|
"""Adds price action based features like returns, momentum, SMAs."""
|
||
|
|
# Ensure column names match what we expect (lowercase usually)
|
||
|
|
# If user has 'Close', 'Open' etc. we might need to adjust or rename before calling this.
|
||
|
|
# Assuming the input df has lowercase 'close', 'high', 'low' etc. or we handle it.
|
||
|
|
|
||
|
|
# Map typical Capitalized names to lowercase if needed
|
||
|
|
mapper = {c: c.lower() for c in df.columns if c in ['Open', 'High', 'Low', 'Close', 'Volume']}
|
||
|
|
if mapper:
|
||
|
|
df = df.rename(columns=mapper)
|
||
|
|
|
||
|
|
df['returns'] = df['close'].pct_change()
|
||
|
|
df['log_returns'] = np.log(df['close'] / df['close'].shift(1))
|
||
|
|
|
||
|
|
df[f'momentum_{config.MOMENTUM_1_PERIODS}'] = df['close'].pct_change(
|
||
|
|
periods=config.MOMENTUM_1_PERIODS)
|
||
|
|
df[f'momentum_{config.MOMENTUM_2_PERIODS}'] = df['close'].pct_change(
|
||
|
|
periods=config.MOMENTUM_2_PERIODS)
|
||
|
|
|
||
|
|
df[f'SMA_{config.SMA_FAST_PERIODS}'] = df['close'].rolling(
|
||
|
|
window=config.SMA_FAST_PERIODS).mean()
|
||
|
|
df[f'SMA_{config.SMA_SLOW_PERIODS}'] = df['close'].rolling(
|
||
|
|
window=config.SMA_SLOW_PERIODS).mean()
|
||
|
|
|
||
|
|
df[f'volatility_{config.VOLATILITY_PERIODS}'] = df['log_returns'].rolling(
|
||
|
|
window=config.VOLATILITY_PERIODS).std()
|
||
|
|
return df
|
||
|
|
|
||
|
|
def _add_mean_reversion_features(df: pd.DataFrame) -> pd.DataFrame:
|
||
|
|
"""Adds Bollinger Band features."""
|
||
|
|
window = config.BBAND_PERIODS
|
||
|
|
df['bb_middle'] = df['close'].rolling(window=window).mean()
|
||
|
|
bb_std = df['close'].rolling(window=window).std()
|
||
|
|
df['bb_upper'] = df['bb_middle'] + (bb_std * 2)
|
||
|
|
df['bb_lower'] = df['bb_middle'] - (bb_std * 2)
|
||
|
|
|
||
|
|
df['bb_width'] = (df['bb_upper'] - df['bb_lower']) / df['bb_middle']
|
||
|
|
|
||
|
|
denom = (df['bb_upper'] - df['bb_lower'])
|
||
|
|
denom = denom.replace(0, np.nan)
|
||
|
|
df['bb_percent'] = (df['close'] - df['bb_lower']) / denom
|
||
|
|
return df
|
||
|
|
|
||
|
|
def _add_volatility_features(df: pd.DataFrame) -> pd.DataFrame:
|
||
|
|
"""Adds ATR features."""
|
||
|
|
atr_values = atr_nb(
|
||
|
|
df['high'].to_numpy(),
|
||
|
|
df['low'].to_numpy(),
|
||
|
|
df['close'].to_numpy(),
|
||
|
|
config.ATR_PERIOD
|
||
|
|
)
|
||
|
|
df['atr'] = atr_values
|
||
|
|
return df
|
||
|
|
|
||
|
|
def _add_onchain_features(df: pd.DataFrame) -> pd.DataFrame:
|
||
|
|
"""Adds on-chain features with z-score normalization and cycle MAs."""
|
||
|
|
for col_name in config.ONCHAIN_FEATURE_NAMES:
|
||
|
|
if col_name in df.columns:
|
||
|
|
col_data = df[col_name]
|
||
|
|
# Ensure numeric
|
||
|
|
df[col_name] = pd.to_numeric(col_data, errors='coerce')
|
||
|
|
|
||
|
|
# Custom transforms
|
||
|
|
if col_name == 'net_exchange_flow':
|
||
|
|
df[col_name] = -df[col_name] # Outflows bullish
|
||
|
|
elif col_name == 'funding_rate':
|
||
|
|
df[col_name] *= 100 # To %
|
||
|
|
|
||
|
|
# Rolling Z-Score (prevents look-ahead bias)
|
||
|
|
if df[col_name].notna().sum() > MIN_PERIODS_ZSCORE:
|
||
|
|
rolling_mean = df[col_name].rolling(
|
||
|
|
window=ZSCORE_LOOKBACK, min_periods=MIN_PERIODS_ZSCORE).mean()
|
||
|
|
rolling_std = df[col_name].rolling(
|
||
|
|
window=ZSCORE_LOOKBACK, min_periods=MIN_PERIODS_ZSCORE).std()
|
||
|
|
|
||
|
|
rolling_std = rolling_std.replace(0, np.nan)
|
||
|
|
df[f'{col_name}_z'] = (df[col_name] - rolling_mean) / rolling_std
|
||
|
|
|
||
|
|
# Cycle MAs for key metrics
|
||
|
|
if col_name == 'nupl':
|
||
|
|
df[f'{col_name}_ma_{config.NUPL_MA_PERIODS}'] = df[col_name].rolling(
|
||
|
|
config.NUPL_MA_PERIODS).mean()
|
||
|
|
elif col_name == 'mvrv':
|
||
|
|
df[f'{col_name}_ma_{config.MVRV_MA_PERIODS}'] = df[col_name].rolling(
|
||
|
|
config.MVRV_MA_PERIODS).mean()
|
||
|
|
else:
|
||
|
|
# Create NaN columns if missing so model doesn't crash (or handle later)
|
||
|
|
df[f'{col_name}_z'] = np.nan
|
||
|
|
|
||
|
|
return df
|
||
|
|
|
||
|
|
def _add_target_variables(df: pd.DataFrame) -> pd.DataFrame:
|
||
|
|
"""Adds target variables for ML training."""
|
||
|
|
# 1% gain threshold in prediction horizon
|
||
|
|
df['future_price'] = df['close'].shift(-config.PREDICTION_PERIOD)
|
||
|
|
future_ret = (df['future_price'] - df['close']) / df['close']
|
||
|
|
|
||
|
|
df['target'] = (future_ret > 0.01).astype(int)
|
||
|
|
return df
|
||
|
|
|
||
|
|
def create_features(df: pd.DataFrame) -> pd.DataFrame:
|
||
|
|
"""Engineers all features using modular sub-functions."""
|
||
|
|
df_feat = df.copy()
|
||
|
|
|
||
|
|
# Pass through raw data needed for ATR/exits
|
||
|
|
# Ensure lowercase columns first
|
||
|
|
mapper = {c: c.lower() for c in df_feat.columns if c in ['Open', 'High', 'Low', 'Close', 'Volume']}
|
||
|
|
if mapper:
|
||
|
|
df_feat = df_feat.rename(columns=mapper)
|
||
|
|
|
||
|
|
df_feat['high_raw'] = df_feat['high']
|
||
|
|
df_feat['low_raw'] = df_feat['low']
|
||
|
|
df_feat['close_raw'] = df_feat['close']
|
||
|
|
|
||
|
|
# 1. Price-Action
|
||
|
|
df_feat = _add_price_action_features(df_feat)
|
||
|
|
|
||
|
|
# 2. Mean-Reversion
|
||
|
|
df_feat = _add_mean_reversion_features(df_feat)
|
||
|
|
|
||
|
|
# 3. Volatility
|
||
|
|
df_feat = _add_volatility_features(df_feat)
|
||
|
|
|
||
|
|
# 4. On-Chain
|
||
|
|
df_feat = _add_onchain_features(df_feat)
|
||
|
|
|
||
|
|
# 5. Target
|
||
|
|
df_feat = _add_target_variables(df_feat)
|
||
|
|
|
||
|
|
return df_feat
|