lowkey_backtest/features.py

165 lines
6.1 KiB
Python
Raw Normal View History

import pandas as pd
import numpy as np
import strategy_config as config
from numba import njit
# ==============================================================================
# --- LOOKBACK WINDOW FOR ROLLING Z-SCORES (PREVENTS LOOK-AHEAD BIAS) ---
# ==============================================================================
ZSCORE_LOOKBACK = 168 # 1 week in hourly candles
MIN_PERIODS_ZSCORE = 24
@njit
def ema_nb(arr, window):
alpha = 2.0 / (window + 1.0)
ema_arr = np.full_like(arr, np.nan)
ema_arr[0] = arr[0]
for i in range(1, len(arr)):
ema_arr[i] = alpha * arr[i] + (1.0 - alpha) * ema_arr[i-1]
return ema_arr
@njit
def atr_nb(high, low, close, window):
n = len(close)
tr = np.zeros(n)
atr = np.full_like(close, np.nan)
for i in range(1, n):
tr[i] = max(high[i] - low[i], abs(high[i] - close[i-1]),
abs(low[i] - close[i-1]))
tr_series = tr[1:window+1]
atr[window] = np.mean(tr_series)
for i in range(window + 1, n):
atr[i] = (atr[i-1] * (window - 1) + tr[i]) / window
return atr
def _add_price_action_features(df: pd.DataFrame) -> pd.DataFrame:
"""Adds price action based features like returns, momentum, SMAs."""
# Ensure column names match what we expect (lowercase usually)
# If user has 'Close', 'Open' etc. we might need to adjust or rename before calling this.
# Assuming the input df has lowercase 'close', 'high', 'low' etc. or we handle it.
# Map typical Capitalized names to lowercase if needed
mapper = {c: c.lower() for c in df.columns if c in ['Open', 'High', 'Low', 'Close', 'Volume']}
if mapper:
df = df.rename(columns=mapper)
df['returns'] = df['close'].pct_change()
df['log_returns'] = np.log(df['close'] / df['close'].shift(1))
df[f'momentum_{config.MOMENTUM_1_PERIODS}'] = df['close'].pct_change(
periods=config.MOMENTUM_1_PERIODS)
df[f'momentum_{config.MOMENTUM_2_PERIODS}'] = df['close'].pct_change(
periods=config.MOMENTUM_2_PERIODS)
df[f'SMA_{config.SMA_FAST_PERIODS}'] = df['close'].rolling(
window=config.SMA_FAST_PERIODS).mean()
df[f'SMA_{config.SMA_SLOW_PERIODS}'] = df['close'].rolling(
window=config.SMA_SLOW_PERIODS).mean()
df[f'volatility_{config.VOLATILITY_PERIODS}'] = df['log_returns'].rolling(
window=config.VOLATILITY_PERIODS).std()
return df
def _add_mean_reversion_features(df: pd.DataFrame) -> pd.DataFrame:
"""Adds Bollinger Band features."""
window = config.BBAND_PERIODS
df['bb_middle'] = df['close'].rolling(window=window).mean()
bb_std = df['close'].rolling(window=window).std()
df['bb_upper'] = df['bb_middle'] + (bb_std * 2)
df['bb_lower'] = df['bb_middle'] - (bb_std * 2)
df['bb_width'] = (df['bb_upper'] - df['bb_lower']) / df['bb_middle']
denom = (df['bb_upper'] - df['bb_lower'])
denom = denom.replace(0, np.nan)
df['bb_percent'] = (df['close'] - df['bb_lower']) / denom
return df
def _add_volatility_features(df: pd.DataFrame) -> pd.DataFrame:
"""Adds ATR features."""
atr_values = atr_nb(
df['high'].to_numpy(),
df['low'].to_numpy(),
df['close'].to_numpy(),
config.ATR_PERIOD
)
df['atr'] = atr_values
return df
def _add_onchain_features(df: pd.DataFrame) -> pd.DataFrame:
"""Adds on-chain features with z-score normalization and cycle MAs."""
for col_name in config.ONCHAIN_FEATURE_NAMES:
if col_name in df.columns:
col_data = df[col_name]
# Ensure numeric
df[col_name] = pd.to_numeric(col_data, errors='coerce')
# Custom transforms
if col_name == 'net_exchange_flow':
df[col_name] = -df[col_name] # Outflows bullish
elif col_name == 'funding_rate':
df[col_name] *= 100 # To %
# Rolling Z-Score (prevents look-ahead bias)
if df[col_name].notna().sum() > MIN_PERIODS_ZSCORE:
rolling_mean = df[col_name].rolling(
window=ZSCORE_LOOKBACK, min_periods=MIN_PERIODS_ZSCORE).mean()
rolling_std = df[col_name].rolling(
window=ZSCORE_LOOKBACK, min_periods=MIN_PERIODS_ZSCORE).std()
rolling_std = rolling_std.replace(0, np.nan)
df[f'{col_name}_z'] = (df[col_name] - rolling_mean) / rolling_std
# Cycle MAs for key metrics
if col_name == 'nupl':
df[f'{col_name}_ma_{config.NUPL_MA_PERIODS}'] = df[col_name].rolling(
config.NUPL_MA_PERIODS).mean()
elif col_name == 'mvrv':
df[f'{col_name}_ma_{config.MVRV_MA_PERIODS}'] = df[col_name].rolling(
config.MVRV_MA_PERIODS).mean()
else:
# Create NaN columns if missing so model doesn't crash (or handle later)
df[f'{col_name}_z'] = np.nan
return df
def _add_target_variables(df: pd.DataFrame) -> pd.DataFrame:
"""Adds target variables for ML training."""
# 1% gain threshold in prediction horizon
df['future_price'] = df['close'].shift(-config.PREDICTION_PERIOD)
future_ret = (df['future_price'] - df['close']) / df['close']
df['target'] = (future_ret > 0.01).astype(int)
return df
def create_features(df: pd.DataFrame) -> pd.DataFrame:
"""Engineers all features using modular sub-functions."""
df_feat = df.copy()
# Pass through raw data needed for ATR/exits
# Ensure lowercase columns first
mapper = {c: c.lower() for c in df_feat.columns if c in ['Open', 'High', 'Low', 'Close', 'Volume']}
if mapper:
df_feat = df_feat.rename(columns=mapper)
df_feat['high_raw'] = df_feat['high']
df_feat['low_raw'] = df_feat['low']
df_feat['close_raw'] = df_feat['close']
# 1. Price-Action
df_feat = _add_price_action_features(df_feat)
# 2. Mean-Reversion
df_feat = _add_mean_reversion_features(df_feat)
# 3. Volatility
df_feat = _add_volatility_features(df_feat)
# 4. On-Chain
df_feat = _add_onchain_features(df_feat)
# 5. Target
df_feat = _add_target_variables(df_feat)
return df_feat