lowkey_backtest/features.py

import pandas as pd
import numpy as np
import strategy_config as config
from numba import njit

# ==============================================================================
# --- LOOKBACK WINDOW FOR ROLLING Z-SCORES (PREVENTS LOOK-AHEAD BIAS) ---
# ==============================================================================
ZSCORE_LOOKBACK = 168  # 1 week in hourly candles
MIN_PERIODS_ZSCORE = 24

@njit
def ema_nb(arr, window):
    alpha = 2.0 / (window + 1.0)
    ema_arr = np.full_like(arr, np.nan)
    ema_arr[0] = arr[0]
    for i in range(1, len(arr)):
        ema_arr[i] = alpha * arr[i] + (1.0 - alpha) * ema_arr[i-1]
    return ema_arr

@njit
def atr_nb(high, low, close, window):
    n = len(close)
    tr = np.zeros(n)
    atr = np.full_like(close, np.nan)
    for i in range(1, n):
        tr[i] = max(high[i] - low[i], abs(high[i] - close[i-1]),
                    abs(low[i] - close[i-1]))
    tr_series = tr[1:window+1]
    atr[window] = np.mean(tr_series)
    for i in range(window + 1, n):
        atr[i] = (atr[i-1] * (window - 1) + tr[i]) / window
    return atr

def _add_price_action_features(df: pd.DataFrame) -> pd.DataFrame:
    """Adds price action based features like returns, momentum, SMAs."""
    # Ensure column names match what we expect (lowercase usually)
    # If user has 'Close', 'Open' etc. we might need to adjust or rename before calling this.
    # Assuming the input df has lowercase 'close', 'high', 'low' etc. or we handle it.
    
    # Map typical Capitalized names to lowercase if needed
    mapper = {c: c.lower() for c in df.columns if c in ['Open', 'High', 'Low', 'Close', 'Volume']}
    if mapper:
        df = df.rename(columns=mapper)

    df['returns'] = df['close'].pct_change()
    df['log_returns'] = np.log(df['close'] / df['close'].shift(1))
    
    df[f'momentum_{config.MOMENTUM_1_PERIODS}'] = df['close'].pct_change(
        periods=config.MOMENTUM_1_PERIODS)
    df[f'momentum_{config.MOMENTUM_2_PERIODS}'] = df['close'].pct_change(
        periods=config.MOMENTUM_2_PERIODS)
        
    df[f'SMA_{config.SMA_FAST_PERIODS}'] = df['close'].rolling(
        window=config.SMA_FAST_PERIODS).mean()
    df[f'SMA_{config.SMA_SLOW_PERIODS}'] = df['close'].rolling(
        window=config.SMA_SLOW_PERIODS).mean()
        
    df[f'volatility_{config.VOLATILITY_PERIODS}'] = df['log_returns'].rolling(
        window=config.VOLATILITY_PERIODS).std()
    return df

def _add_mean_reversion_features(df: pd.DataFrame) -> pd.DataFrame:
    """Adds Bollinger Band features."""
    window = config.BBAND_PERIODS
    df['bb_middle'] = df['close'].rolling(window=window).mean()
    bb_std = df['close'].rolling(window=window).std()
    df['bb_upper'] = df['bb_middle'] + (bb_std * 2)
    df['bb_lower'] = df['bb_middle'] - (bb_std * 2)
    
    df['bb_width'] = (df['bb_upper'] - df['bb_lower']) / df['bb_middle']
    
    denom = (df['bb_upper'] - df['bb_lower'])
    denom = denom.replace(0, np.nan)
    df['bb_percent'] = (df['close'] - df['bb_lower']) / denom
    return df

def _add_volatility_features(df: pd.DataFrame) -> pd.DataFrame:
    """Adds ATR features."""
    atr_values = atr_nb(
        df['high'].to_numpy(),
        df['low'].to_numpy(),
        df['close'].to_numpy(),
        config.ATR_PERIOD
    )
    df['atr'] = atr_values
    return df

def _add_onchain_features(df: pd.DataFrame) -> pd.DataFrame:
    """Adds on-chain features with z-score normalization and cycle MAs."""
    for col_name in config.ONCHAIN_FEATURE_NAMES:
        if col_name in df.columns:
            col_data = df[col_name]
            # Ensure numeric
            df[col_name] = pd.to_numeric(col_data, errors='coerce')

            # Custom transforms
            if col_name == 'net_exchange_flow':
                df[col_name] = -df[col_name]  # Outflows bullish
            elif col_name == 'funding_rate':
                df[col_name] *= 100  # To %

            # Rolling Z-Score (prevents look-ahead bias)
            if df[col_name].notna().sum() > MIN_PERIODS_ZSCORE:
                rolling_mean = df[col_name].rolling(
                    window=ZSCORE_LOOKBACK, min_periods=MIN_PERIODS_ZSCORE).mean()
                rolling_std = df[col_name].rolling(
                    window=ZSCORE_LOOKBACK, min_periods=MIN_PERIODS_ZSCORE).std()
                
                rolling_std = rolling_std.replace(0, np.nan)
                df[f'{col_name}_z'] = (df[col_name] - rolling_mean) / rolling_std

            # Cycle MAs for key metrics
            if col_name == 'nupl':
                df[f'{col_name}_ma_{config.NUPL_MA_PERIODS}'] = df[col_name].rolling(
                    config.NUPL_MA_PERIODS).mean()
            elif col_name == 'mvrv':
                df[f'{col_name}_ma_{config.MVRV_MA_PERIODS}'] = df[col_name].rolling(
                    config.MVRV_MA_PERIODS).mean()
        else:
            # Create NaN columns if missing so model doesn't crash (or handle later)
            df[f'{col_name}_z'] = np.nan
            
    return df

def _add_target_variables(df: pd.DataFrame) -> pd.DataFrame:
    """Adds target variables for ML training."""
    # 1% gain threshold in prediction horizon
    df['future_price'] = df['close'].shift(-config.PREDICTION_PERIOD)
    future_ret = (df['future_price'] - df['close']) / df['close']
    
    df['target'] = (future_ret > 0.01).astype(int)
    return df

def create_features(df: pd.DataFrame) -> pd.DataFrame:
    """Engineers all features using modular sub-functions."""
    df_feat = df.copy()

    # Pass through raw data needed for ATR/exits
    # Ensure lowercase columns first
    mapper = {c: c.lower() for c in df_feat.columns if c in ['Open', 'High', 'Low', 'Close', 'Volume']}
    if mapper:
        df_feat = df_feat.rename(columns=mapper)

    df_feat['high_raw'] = df_feat['high']
    df_feat['low_raw'] = df_feat['low']
    df_feat['close_raw'] = df_feat['close']
    
    # 1. Price-Action
    df_feat = _add_price_action_features(df_feat)

    # 2. Mean-Reversion
    df_feat = _add_mean_reversion_features(df_feat)

    # 3. Volatility
    df_feat = _add_volatility_features(df_feat)

    # 4. On-Chain
    df_feat = _add_onchain_features(df_feat)

    # 5. Target
    df_feat = _add_target_variables(df_feat)
    
    return df_feat
Add MVRV strategy backtesting module with feature engineering and on-chain data integration. Implement model training and evaluation pipeline, including probability threshold analysis. Update configuration for strategy parameters and enhance logging for trade results. Include instructions for running the backtest and preparing data. 2026-01-10 06:10:35 +08:00			`import pandas as pd`
			`import numpy as np`
			`import strategy_config as config`
			`from numba import njit`

			`# ==============================================================================`
			`# --- LOOKBACK WINDOW FOR ROLLING Z-SCORES (PREVENTS LOOK-AHEAD BIAS) ---`
			`# ==============================================================================`
			`ZSCORE_LOOKBACK = 168 # 1 week in hourly candles`
			`MIN_PERIODS_ZSCORE = 24`

			`@njit`
			`def ema_nb(arr, window):`
			`alpha = 2.0 / (window + 1.0)`
			`ema_arr = np.full_like(arr, np.nan)`
			`ema_arr[0] = arr[0]`
			`for i in range(1, len(arr)):`
			`ema_arr[i] = alpha * arr[i] + (1.0 - alpha) * ema_arr[i-1]`
			`return ema_arr`

			`@njit`
			`def atr_nb(high, low, close, window):`
			`n = len(close)`
			`tr = np.zeros(n)`
			`atr = np.full_like(close, np.nan)`
			`for i in range(1, n):`
			`tr[i] = max(high[i] - low[i], abs(high[i] - close[i-1]),`
			`abs(low[i] - close[i-1]))`
			`tr_series = tr[1:window+1]`
			`atr[window] = np.mean(tr_series)`
			`for i in range(window + 1, n):`
			`atr[i] = (atr[i-1] * (window - 1) + tr[i]) / window`
			`return atr`

			`def _add_price_action_features(df: pd.DataFrame) -> pd.DataFrame:`
			`"""Adds price action based features like returns, momentum, SMAs."""`
			`# Ensure column names match what we expect (lowercase usually)`
			`# If user has 'Close', 'Open' etc. we might need to adjust or rename before calling this.`
			`# Assuming the input df has lowercase 'close', 'high', 'low' etc. or we handle it.`

			`# Map typical Capitalized names to lowercase if needed`
			`mapper = {c: c.lower() for c in df.columns if c in ['Open', 'High', 'Low', 'Close', 'Volume']}`
			`if mapper:`
			`df = df.rename(columns=mapper)`

			`df['returns'] = df['close'].pct_change()`
			`df['log_returns'] = np.log(df['close'] / df['close'].shift(1))`

			`df[f'momentum_{config.MOMENTUM_1_PERIODS}'] = df['close'].pct_change(`
			`periods=config.MOMENTUM_1_PERIODS)`
			`df[f'momentum_{config.MOMENTUM_2_PERIODS}'] = df['close'].pct_change(`
			`periods=config.MOMENTUM_2_PERIODS)`

			`df[f'SMA_{config.SMA_FAST_PERIODS}'] = df['close'].rolling(`
			`window=config.SMA_FAST_PERIODS).mean()`
			`df[f'SMA_{config.SMA_SLOW_PERIODS}'] = df['close'].rolling(`
			`window=config.SMA_SLOW_PERIODS).mean()`

			`df[f'volatility_{config.VOLATILITY_PERIODS}'] = df['log_returns'].rolling(`
			`window=config.VOLATILITY_PERIODS).std()`
			`return df`

			`def _add_mean_reversion_features(df: pd.DataFrame) -> pd.DataFrame:`
			`"""Adds Bollinger Band features."""`
			`window = config.BBAND_PERIODS`
			`df['bb_middle'] = df['close'].rolling(window=window).mean()`
			`bb_std = df['close'].rolling(window=window).std()`
			`df['bb_upper'] = df['bb_middle'] + (bb_std * 2)`
			`df['bb_lower'] = df['bb_middle'] - (bb_std * 2)`

			`df['bb_width'] = (df['bb_upper'] - df['bb_lower']) / df['bb_middle']`

			`denom = (df['bb_upper'] - df['bb_lower'])`
			`denom = denom.replace(0, np.nan)`
			`df['bb_percent'] = (df['close'] - df['bb_lower']) / denom`
			`return df`

			`def _add_volatility_features(df: pd.DataFrame) -> pd.DataFrame:`
			`"""Adds ATR features."""`
			`atr_values = atr_nb(`
			`df['high'].to_numpy(),`
			`df['low'].to_numpy(),`
			`df['close'].to_numpy(),`
			`config.ATR_PERIOD`
			`)`
			`df['atr'] = atr_values`
			`return df`

			`def _add_onchain_features(df: pd.DataFrame) -> pd.DataFrame:`
			`"""Adds on-chain features with z-score normalization and cycle MAs."""`
			`for col_name in config.ONCHAIN_FEATURE_NAMES:`
			`if col_name in df.columns:`
			`col_data = df[col_name]`
			`# Ensure numeric`
			`df[col_name] = pd.to_numeric(col_data, errors='coerce')`

			`# Custom transforms`
			`if col_name == 'net_exchange_flow':`
			`df[col_name] = -df[col_name] # Outflows bullish`
			`elif col_name == 'funding_rate':`
			`df[col_name] *= 100 # To %`

			`# Rolling Z-Score (prevents look-ahead bias)`
			`if df[col_name].notna().sum() > MIN_PERIODS_ZSCORE:`
			`rolling_mean = df[col_name].rolling(`
			`window=ZSCORE_LOOKBACK, min_periods=MIN_PERIODS_ZSCORE).mean()`
			`rolling_std = df[col_name].rolling(`
			`window=ZSCORE_LOOKBACK, min_periods=MIN_PERIODS_ZSCORE).std()`

			`rolling_std = rolling_std.replace(0, np.nan)`
			`df[f'{col_name}_z'] = (df[col_name] - rolling_mean) / rolling_std`

			`# Cycle MAs for key metrics`
			`if col_name == 'nupl':`
			`df[f'{col_name}_ma_{config.NUPL_MA_PERIODS}'] = df[col_name].rolling(`
			`config.NUPL_MA_PERIODS).mean()`
			`elif col_name == 'mvrv':`
			`df[f'{col_name}_ma_{config.MVRV_MA_PERIODS}'] = df[col_name].rolling(`
			`config.MVRV_MA_PERIODS).mean()`
			`else:`
			`# Create NaN columns if missing so model doesn't crash (or handle later)`
			`df[f'{col_name}_z'] = np.nan`

			`return df`

			`def _add_target_variables(df: pd.DataFrame) -> pd.DataFrame:`
			`"""Adds target variables for ML training."""`
			`# 1% gain threshold in prediction horizon`
			`df['future_price'] = df['close'].shift(-config.PREDICTION_PERIOD)`
			`future_ret = (df['future_price'] - df['close']) / df['close']`

			`df['target'] = (future_ret > 0.01).astype(int)`
			`return df`

			`def create_features(df: pd.DataFrame) -> pd.DataFrame:`
			`"""Engineers all features using modular sub-functions."""`
			`df_feat = df.copy()`

			`# Pass through raw data needed for ATR/exits`
			`# Ensure lowercase columns first`
			`mapper = {c: c.lower() for c in df_feat.columns if c in ['Open', 'High', 'Low', 'Close', 'Volume']}`
			`if mapper:`
			`df_feat = df_feat.rename(columns=mapper)`

			`df_feat['high_raw'] = df_feat['high']`
			`df_feat['low_raw'] = df_feat['low']`
			`df_feat['close_raw'] = df_feat['close']`

			`# 1. Price-Action`
			`df_feat = _add_price_action_features(df_feat)`

			`# 2. Mean-Reversion`
			`df_feat = _add_mean_reversion_features(df_feat)`

			`# 3. Volatility`
			`df_feat = _add_volatility_features(df_feat)`

			`# 4. On-Chain`
			`df_feat = _add_onchain_features(df_feat)`

			`# 5. Target`
			`df_feat = _add_target_variables(df_feat)`

			`return df_feat`