import pandas as pd import numpy as np import strategy_config as config from numba import njit # ============================================================================== # --- LOOKBACK WINDOW FOR ROLLING Z-SCORES (PREVENTS LOOK-AHEAD BIAS) --- # ============================================================================== ZSCORE_LOOKBACK = 168 # 1 week in hourly candles MIN_PERIODS_ZSCORE = 24 @njit def ema_nb(arr, window): alpha = 2.0 / (window + 1.0) ema_arr = np.full_like(arr, np.nan) ema_arr[0] = arr[0] for i in range(1, len(arr)): ema_arr[i] = alpha * arr[i] + (1.0 - alpha) * ema_arr[i-1] return ema_arr @njit def atr_nb(high, low, close, window): n = len(close) tr = np.zeros(n) atr = np.full_like(close, np.nan) for i in range(1, n): tr[i] = max(high[i] - low[i], abs(high[i] - close[i-1]), abs(low[i] - close[i-1])) tr_series = tr[1:window+1] atr[window] = np.mean(tr_series) for i in range(window + 1, n): atr[i] = (atr[i-1] * (window - 1) + tr[i]) / window return atr def _add_price_action_features(df: pd.DataFrame) -> pd.DataFrame: """Adds price action based features like returns, momentum, SMAs.""" # Ensure column names match what we expect (lowercase usually) # If user has 'Close', 'Open' etc. we might need to adjust or rename before calling this. # Assuming the input df has lowercase 'close', 'high', 'low' etc. or we handle it. # Map typical Capitalized names to lowercase if needed mapper = {c: c.lower() for c in df.columns if c in ['Open', 'High', 'Low', 'Close', 'Volume']} if mapper: df = df.rename(columns=mapper) df['returns'] = df['close'].pct_change() df['log_returns'] = np.log(df['close'] / df['close'].shift(1)) df[f'momentum_{config.MOMENTUM_1_PERIODS}'] = df['close'].pct_change( periods=config.MOMENTUM_1_PERIODS) df[f'momentum_{config.MOMENTUM_2_PERIODS}'] = df['close'].pct_change( periods=config.MOMENTUM_2_PERIODS) df[f'SMA_{config.SMA_FAST_PERIODS}'] = df['close'].rolling( window=config.SMA_FAST_PERIODS).mean() df[f'SMA_{config.SMA_SLOW_PERIODS}'] = df['close'].rolling( window=config.SMA_SLOW_PERIODS).mean() df[f'volatility_{config.VOLATILITY_PERIODS}'] = df['log_returns'].rolling( window=config.VOLATILITY_PERIODS).std() return df def _add_mean_reversion_features(df: pd.DataFrame) -> pd.DataFrame: """Adds Bollinger Band features.""" window = config.BBAND_PERIODS df['bb_middle'] = df['close'].rolling(window=window).mean() bb_std = df['close'].rolling(window=window).std() df['bb_upper'] = df['bb_middle'] + (bb_std * 2) df['bb_lower'] = df['bb_middle'] - (bb_std * 2) df['bb_width'] = (df['bb_upper'] - df['bb_lower']) / df['bb_middle'] denom = (df['bb_upper'] - df['bb_lower']) denom = denom.replace(0, np.nan) df['bb_percent'] = (df['close'] - df['bb_lower']) / denom return df def _add_volatility_features(df: pd.DataFrame) -> pd.DataFrame: """Adds ATR features.""" atr_values = atr_nb( df['high'].to_numpy(), df['low'].to_numpy(), df['close'].to_numpy(), config.ATR_PERIOD ) df['atr'] = atr_values return df def _add_onchain_features(df: pd.DataFrame) -> pd.DataFrame: """Adds on-chain features with z-score normalization and cycle MAs.""" for col_name in config.ONCHAIN_FEATURE_NAMES: if col_name in df.columns: col_data = df[col_name] # Ensure numeric df[col_name] = pd.to_numeric(col_data, errors='coerce') # Custom transforms if col_name == 'net_exchange_flow': df[col_name] = -df[col_name] # Outflows bullish elif col_name == 'funding_rate': df[col_name] *= 100 # To % # Rolling Z-Score (prevents look-ahead bias) if df[col_name].notna().sum() > MIN_PERIODS_ZSCORE: rolling_mean = df[col_name].rolling( window=ZSCORE_LOOKBACK, min_periods=MIN_PERIODS_ZSCORE).mean() rolling_std = df[col_name].rolling( window=ZSCORE_LOOKBACK, min_periods=MIN_PERIODS_ZSCORE).std() rolling_std = rolling_std.replace(0, np.nan) df[f'{col_name}_z'] = (df[col_name] - rolling_mean) / rolling_std # Cycle MAs for key metrics if col_name == 'nupl': df[f'{col_name}_ma_{config.NUPL_MA_PERIODS}'] = df[col_name].rolling( config.NUPL_MA_PERIODS).mean() elif col_name == 'mvrv': df[f'{col_name}_ma_{config.MVRV_MA_PERIODS}'] = df[col_name].rolling( config.MVRV_MA_PERIODS).mean() else: # Create NaN columns if missing so model doesn't crash (or handle later) df[f'{col_name}_z'] = np.nan return df def _add_target_variables(df: pd.DataFrame) -> pd.DataFrame: """Adds target variables for ML training.""" # 1% gain threshold in prediction horizon df['future_price'] = df['close'].shift(-config.PREDICTION_PERIOD) future_ret = (df['future_price'] - df['close']) / df['close'] df['target'] = (future_ret > 0.01).astype(int) return df def create_features(df: pd.DataFrame) -> pd.DataFrame: """Engineers all features using modular sub-functions.""" df_feat = df.copy() # Pass through raw data needed for ATR/exits # Ensure lowercase columns first mapper = {c: c.lower() for c in df_feat.columns if c in ['Open', 'High', 'Low', 'Close', 'Volume']} if mapper: df_feat = df_feat.rename(columns=mapper) df_feat['high_raw'] = df_feat['high'] df_feat['low_raw'] = df_feat['low'] df_feat['close_raw'] = df_feat['close'] # 1. Price-Action df_feat = _add_price_action_features(df_feat) # 2. Mean-Reversion df_feat = _add_mean_reversion_features(df_feat) # 3. Volatility df_feat = _add_volatility_features(df_feat) # 4. On-Chain df_feat = _add_onchain_features(df_feat) # 5. Target df_feat = _add_target_variables(df_feat) return df_feat