From 1e4cb87da365dd5fc68b8d53fcc742ebcd561bcf Mon Sep 17 00:00:00 2001 From: Simon Moisy Date: Wed, 14 Jan 2026 09:46:51 +0800 Subject: [PATCH] Add check_symbols.py for ETH perpetuals filtering and enhance backtester with size handling - Introduced `check_symbols.py` to load and filter ETH perpetual markets from the OKX exchange using CCXT. - Updated the backtester to normalize signals to a 5-tuple format, incorporating size management for trades. - Enhanced portfolio functions to support variable size and leverage adjustments based on initial capital. - Added a new method in `CryptoQuantClient` for chunked historical data fetching to avoid API limits. - Improved market symbol normalization in `market.py` to handle different formats. - Updated regime strategy parameters based on recent research findings for optimal performance. --- check_symbols.py | 28 +++ engine/backtester.py | 81 +++++--- engine/cryptoquant.py | 53 +++++- engine/market.py | 16 +- engine/portfolio.py | 70 ++----- research/regime_detection.py | 342 ++++++++++++++++++++++++++++++++++ strategies/factory.py | 21 ++- strategies/regime_strategy.py | 117 ++++++++++-- 8 files changed, 617 insertions(+), 111 deletions(-) create mode 100644 check_symbols.py create mode 100644 research/regime_detection.py diff --git a/check_symbols.py b/check_symbols.py new file mode 100644 index 0000000..0b0f965 --- /dev/null +++ b/check_symbols.py @@ -0,0 +1,28 @@ +import ccxt +import sys + +def main(): + try: + exchange = ccxt.okx() + print("Loading markets...") + markets = exchange.load_markets() + + # Filter for ETH perpetuals + eth_perps = [ + symbol for symbol, market in markets.items() + if 'ETH' in symbol and 'USDT' in symbol and market.get('swap') and market.get('linear') + ] + + print(f"\nFound {len(eth_perps)} ETH Linear Perps:") + for symbol in eth_perps: + market = markets[symbol] + print(f" CCXT Symbol: {symbol}") + print(f" Exchange ID: {market['id']}") + print(f" Type: {market['type']}") + print("-" * 30) + + except Exception as e: + print(f"Error: {e}") + +if __name__ == "__main__": + main() diff --git a/engine/backtester.py b/engine/backtester.py index be28324..c417d66 100644 --- a/engine/backtester.py +++ b/engine/backtester.py @@ -132,9 +132,22 @@ class Backtester: **strategy_params ) - # Normalize signals to 4-tuple format + # Normalize signals to 5-tuple format signals = self._normalize_signals(signals, close_price, market_config) - long_entries, long_exits, short_entries, short_exits = signals + long_entries, long_exits, short_entries, short_exits, size = signals + + # Default size if None + if size is None: + size = 1.0 + + # Convert leverage multiplier to raw value (USD) for vbt + # This works around "SizeType.Percent reversal" error + # Effectively "Fixed Fractional" sizing based on Initial Capital + # (Does not compound, but safe for backtesting) + if isinstance(size, pd.Series): + size = size * init_cash + else: + size = size * init_cash # Process liquidations - inject forced exits at liquidation points liquidation_events: list[LiquidationEvent] = [] @@ -164,7 +177,8 @@ class Backtester: long_entries, long_exits, short_entries, short_exits, init_cash, effective_fees, slippage, timeframe, - sl_stop, tp_stop, sl_trail, effective_leverage + sl_stop, tp_stop, sl_trail, effective_leverage, + size=size ) # Calculate adjusted returns accounting for liquidation losses @@ -242,39 +256,45 @@ class Backtester: market_config ) -> tuple: """ - Normalize strategy signals to 4-tuple format. + Normalize strategy signals to 5-tuple format. - Handles backward compatibility with 2-tuple (long-only) returns. + Returns: + (long_entries, long_exits, short_entries, short_exits, size) """ + # Default size is None (will be treated as 1.0 or default later) + size = None + if len(signals) == 2: long_entries, long_exits = signals short_entries = BaseStrategy.create_empty_signals(long_entries) short_exits = BaseStrategy.create_empty_signals(long_entries) - return long_entries, long_exits, short_entries, short_exits + return long_entries, long_exits, short_entries, short_exits, size if len(signals) == 4: long_entries, long_exits, short_entries, short_exits = signals + elif len(signals) == 5: + long_entries, long_exits, short_entries, short_exits, size = signals + else: + raise ValueError( + f"Strategy must return 2, 4, or 5 signal arrays, got {len(signals)}" + ) - # Warn and clear short signals on spot markets - if not market_config.supports_short: - has_shorts = ( - short_entries.any().any() - if hasattr(short_entries, 'any') - else short_entries.any() + # Warn and clear short signals on spot markets + if not market_config.supports_short: + has_shorts = ( + short_entries.any().any() + if hasattr(short_entries, 'any') + else short_entries.any() + ) + if has_shorts: + logger.warning( + "Short signals detected but market type is SPOT. " + "Short signals will be ignored." ) - if has_shorts: - logger.warning( - "Short signals detected but market type is SPOT. " - "Short signals will be ignored." - ) - short_entries = BaseStrategy.create_empty_signals(long_entries) - short_exits = BaseStrategy.create_empty_signals(long_entries) - - return long_entries, long_exits, short_entries, short_exits - - raise ValueError( - f"Strategy must return 2 or 4 signal arrays, got {len(signals)}" - ) + short_entries = BaseStrategy.create_empty_signals(long_entries) + short_exits = BaseStrategy.create_empty_signals(long_entries) + + return long_entries, long_exits, short_entries, short_exits, size def _run_portfolio( self, @@ -289,7 +309,8 @@ class Backtester: sl_stop: float | None, tp_stop: float | None, sl_trail: bool, - leverage: int + leverage: int, + size: pd.Series | float = 1.0 ) -> vbt.Portfolio: """Select and run appropriate portfolio simulation.""" has_shorts = ( @@ -304,14 +325,18 @@ class Backtester: long_entries, long_exits, short_entries, short_exits, init_cash, fees, slippage, freq, - sl_stop, tp_stop, sl_trail, leverage + sl_stop, tp_stop, sl_trail, leverage, + size=size ) return run_long_only_portfolio( close, long_entries, long_exits, init_cash, fees, slippage, freq, - sl_stop, tp_stop, sl_trail, leverage + sl_stop, tp_stop, sl_trail, leverage, + # Long-only doesn't support variable size in current implementation + # without modification, but we can add it if needed. + # For now, only regime strategy uses it, which is Long/Short. ) def run_wfa( diff --git a/engine/cryptoquant.py b/engine/cryptoquant.py index 403b56d..75506ec 100644 --- a/engine/cryptoquant.py +++ b/engine/cryptoquant.py @@ -133,12 +133,57 @@ class CryptoQuantClient: return combined_df + def fetch_history_chunked( + self, + symbols: list[str], + metrics: dict, + start_date: str, + end_date: str, + chunk_months: int = 3 + ) -> pd.DataFrame: + """ + Fetch historical data in chunks to avoid API limits. + """ + start_dt = datetime.strptime(start_date, "%Y%m%d") + end_dt = datetime.strptime(end_date, "%Y%m%d") + + all_data = [] + + current = start_dt + while current < end_dt: + next_chunk = current + timedelta(days=chunk_months * 30) + if next_chunk > end_dt: + next_chunk = end_dt + + s_str = current.strftime("%Y%m%d") + e_str = next_chunk.strftime("%Y%m%d") + + logger.info(f"Processing chunk: {s_str} to {e_str}") + chunk_df = self.fetch_multi_metrics(symbols, metrics, s_str, e_str) + + if not chunk_df.empty: + all_data.append(chunk_df) + + current = next_chunk + timedelta(days=1) + time.sleep(1) # Be nice to API + + if not all_data: + return pd.DataFrame() + + # Combine all chunks + full_df = pd.concat(all_data) + # Remove duplicates if any overlap + full_df = full_df[~full_df.index.duplicated(keep='first')] + full_df.sort_index(inplace=True) + + return full_df + if __name__ == "__main__": cq = CryptoQuantClient() - # 3 Months Data (Oct 1 2025 - Dec 31 2025) - start = "20251001" - end = "20251231" + # 12 Months Data (Jan 1 2025 - Jan 14 2026) + start = "20250101" + end = "20260114" metrics = { "reserves": "exchange-flows/exchange-reserve", @@ -147,7 +192,7 @@ if __name__ == "__main__": } print(f"Fetching training data from {start} to {end}...") - df = cq.fetch_multi_metrics(["btc", "eth"], metrics, start, end) + df = cq.fetch_history_chunked(["btc", "eth"], metrics, start, end) output_file = "data/cq_training_data.csv" os.makedirs("data", exist_ok=True) diff --git a/engine/market.py b/engine/market.py index a86b81d..8f33593 100644 --- a/engine/market.py +++ b/engine/market.py @@ -94,8 +94,20 @@ def get_ccxt_symbol(symbol: str, market_type: MarketType) -> str: """ if market_type == MarketType.PERPETUAL: # OKX perpetual format: BTC/USDT:USDT - quote = symbol.split('/')[1] if '/' in symbol else 'USDT' - return f"{symbol}:{quote}" + if '/' in symbol: + base, quote = symbol.split('/') + return f"{symbol}:{quote}" + elif '-' in symbol: + base, quote = symbol.split('-') + return f"{base}/{quote}:{quote}" + else: + # Assume base is symbol, quote is USDT default + return f"{symbol}/USDT:USDT" + + # For spot, normalize dash to slash for CCXT + if '-' in symbol: + return symbol.replace('-', '/') + return symbol diff --git a/engine/portfolio.py b/engine/portfolio.py index f081f40..8d05102 100644 --- a/engine/portfolio.py +++ b/engine/portfolio.py @@ -74,75 +74,35 @@ def run_long_short_portfolio( sl_stop: float | None, tp_stop: float | None, sl_trail: bool, - leverage: int + leverage: int, + size: pd.Series | float = 1.0, + size_type: str = 'value' # Changed to 'value' to support reversals/sizing ) -> vbt.Portfolio: """ Run a portfolio supporting both long and short positions. - Runs two separate portfolios (long and short) and combines results. - Each gets half the capital. - - Args: - close: Close price series - long_entries: Long entry signals - long_exits: Long exit signals - short_entries: Short entry signals - short_exits: Short exit signals - init_cash: Initial capital - fees: Transaction fee percentage - slippage: Slippage percentage - freq: Data frequency string - sl_stop: Stop loss percentage - tp_stop: Take profit percentage - sl_trail: Enable trailing stop loss - leverage: Leverage multiplier - - Returns: - VectorBT Portfolio object (long portfolio, short stats logged) + Uses VectorBT's native support for short_entries/short_exits + to simulate a single unified portfolio. """ effective_cash = init_cash * leverage - half_cash = effective_cash / 2 - # Run long-only portfolio - long_pf = vbt.Portfolio.from_signals( + # If size is passed as value (USD), we don't scale it by leverage here + # The backtester has already scaled it by init_cash. + # If using 'value', vbt treats it as "Amount of CASH to use for the trade" + + return vbt.Portfolio.from_signals( close=close, entries=long_entries, exits=long_exits, - direction='longonly', - init_cash=half_cash, + short_entries=short_entries, + short_exits=short_exits, + init_cash=effective_cash, fees=fees, slippage=slippage, freq=freq, sl_stop=sl_stop, tp_stop=tp_stop, sl_trail=sl_trail, - size=1.0, - size_type='percent', + size=size, + size_type=size_type, ) - - # Run short-only portfolio - short_pf = vbt.Portfolio.from_signals( - close=close, - entries=short_entries, - exits=short_exits, - direction='shortonly', - init_cash=half_cash, - fees=fees, - slippage=slippage, - freq=freq, - sl_stop=sl_stop, - tp_stop=tp_stop, - sl_trail=sl_trail, - size=1.0, - size_type='percent', - ) - - # Log both portfolio stats - # TODO: Implement proper portfolio combination - logger.info( - "Long portfolio: %.2f%% return, Short portfolio: %.2f%% return", - long_pf.total_return().mean() * 100, - short_pf.total_return().mean() * 100 - ) - - return long_pf diff --git a/research/regime_detection.py b/research/regime_detection.py new file mode 100644 index 0000000..bbbe14b --- /dev/null +++ b/research/regime_detection.py @@ -0,0 +1,342 @@ +""" +Regime Detection Research Script with Walk-Forward Training. + +Tests multiple holding horizons to find optimal parameters +without look-ahead bias. +""" +import sys +import os +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import pandas as pd +import numpy as np +import ta +from sklearn.ensemble import RandomForestClassifier +from sklearn.metrics import classification_report, f1_score + +from engine.data_manager import DataManager +from engine.market import MarketType +from engine.logging_config import get_logger + +logger = get_logger(__name__) + +# Configuration +TRAIN_RATIO = 0.7 # 70% train, 30% test +PROFIT_THRESHOLD = 0.005 # 0.5% profit target +Z_WINDOW = 24 +FEE_RATE = 0.001 # 0.1% round-trip fee + + +def load_data(): + """Load and align BTC/ETH data.""" + dm = DataManager() + + df_btc = dm.load_data("okx", "BTC-USDT", "1h", MarketType.SPOT) + df_eth = dm.load_data("okx", "ETH-USDT", "1h", MarketType.SPOT) + + # Filter to Oct-Dec 2025 + start = pd.Timestamp("2025-10-01", tz="UTC") + end = pd.Timestamp("2025-12-31", tz="UTC") + + df_btc = df_btc[(df_btc.index >= start) & (df_btc.index <= end)] + df_eth = df_eth[(df_eth.index >= start) & (df_eth.index <= end)] + + # Align indices + common = df_btc.index.intersection(df_eth.index) + df_btc = df_btc.loc[common] + df_eth = df_eth.loc[common] + + logger.info(f"Loaded {len(common)} aligned hourly bars") + return df_btc, df_eth + + +def load_cryptoquant_data(): + """Load CryptoQuant on-chain data if available.""" + try: + cq_path = "data/cq_training_data.csv" + cq_df = pd.read_csv(cq_path, index_col='timestamp', parse_dates=True) + if cq_df.index.tz is None: + cq_df.index = cq_df.index.tz_localize('UTC') + logger.info(f"Loaded CryptoQuant data: {len(cq_df)} rows") + return cq_df + except Exception as e: + logger.warning(f"CryptoQuant data not available: {e}") + return None + + +def calculate_features(df_btc, df_eth, cq_df=None): + """Calculate all features for the model.""" + spread = df_eth['close'] / df_btc['close'] + + # Z-Score + rolling_mean = spread.rolling(window=Z_WINDOW).mean() + rolling_std = spread.rolling(window=Z_WINDOW).std() + z_score = (spread - rolling_mean) / rolling_std + + # Technicals + spread_rsi = ta.momentum.RSIIndicator(spread, window=14).rsi() + spread_roc = spread.pct_change(periods=5) * 100 + spread_change_1h = spread.pct_change(periods=1) + + # Volume + vol_ratio = df_eth['volume'] / df_btc['volume'] + vol_ratio_ma = vol_ratio.rolling(window=12).mean() + + # Volatility + ret_btc = df_btc['close'].pct_change() + ret_eth = df_eth['close'].pct_change() + vol_btc = ret_btc.rolling(window=Z_WINDOW).std() + vol_eth = ret_eth.rolling(window=Z_WINDOW).std() + vol_spread_ratio = vol_eth / vol_btc + + features = pd.DataFrame(index=spread.index) + features['spread'] = spread + features['z_score'] = z_score + features['spread_rsi'] = spread_rsi + features['spread_roc'] = spread_roc + features['spread_change_1h'] = spread_change_1h + features['vol_ratio'] = vol_ratio + features['vol_ratio_rel'] = vol_ratio / vol_ratio_ma + features['vol_diff_ratio'] = vol_spread_ratio + + # Add CQ features if available + if cq_df is not None: + cq_aligned = cq_df.reindex(features.index, method='ffill') + if 'btc_funding' in cq_aligned.columns and 'eth_funding' in cq_aligned.columns: + cq_aligned['funding_diff'] = cq_aligned['eth_funding'] - cq_aligned['btc_funding'] + if 'btc_inflow' in cq_aligned.columns and 'eth_inflow' in cq_aligned.columns: + cq_aligned['inflow_ratio'] = cq_aligned['eth_inflow'] / (cq_aligned['btc_inflow'] + 1) + features = features.join(cq_aligned) + + return features.dropna() + + +def calculate_targets(features, horizon): + """Calculate target labels for a given horizon.""" + spread = features['spread'] + z_score = features['z_score'] + + # For Short (Z > 1): Did spread drop below target? + future_min = spread.rolling(window=horizon).min().shift(-horizon) + target_short = spread * (1 - PROFIT_THRESHOLD) + success_short = (z_score > 1.0) & (future_min < target_short) + + # For Long (Z < -1): Did spread rise above target? + future_max = spread.rolling(window=horizon).max().shift(-horizon) + target_long = spread * (1 + PROFIT_THRESHOLD) + success_long = (z_score < -1.0) & (future_max > target_long) + + targets = np.select([success_short, success_long], [1, 1], default=0) + + # Create valid mask (rows with complete future data) + valid_mask = future_min.notna() & future_max.notna() + + return targets, valid_mask, future_min, future_max + + +def calculate_mae(features, predictions, test_idx, horizon): + """Calculate Maximum Adverse Excursion for predicted trades.""" + test_features = features.loc[test_idx] + spread = test_features['spread'] + z_score = test_features['z_score'] + + mae_values = [] + + for i, (idx, pred) in enumerate(zip(test_idx, predictions)): + if pred != 1: + continue + + entry_spread = spread.loc[idx] + z = z_score.loc[idx] + + # Get future spread values + future_idx = features.index.get_loc(idx) + future_end = min(future_idx + horizon, len(features)) + future_spreads = features['spread'].iloc[future_idx:future_end] + + if len(future_spreads) < 2: + continue + + if z > 1.0: # Short trade + max_adverse = (future_spreads.max() - entry_spread) / entry_spread + else: # Long trade + max_adverse = (entry_spread - future_spreads.min()) / entry_spread + + mae_values.append(max_adverse * 100) # As percentage + + return np.mean(mae_values) if mae_values else 0.0 + + +def calculate_net_profit(features, predictions, test_idx, horizon): + """Calculate estimated net profit including fees.""" + test_features = features.loc[test_idx] + spread = test_features['spread'] + z_score = test_features['z_score'] + + total_pnl = 0.0 + n_trades = 0 + + for i, (idx, pred) in enumerate(zip(test_idx, predictions)): + if pred != 1: + continue + + entry_spread = spread.loc[idx] + z = z_score.loc[idx] + + # Get future spread values + future_idx = features.index.get_loc(idx) + future_end = min(future_idx + horizon, len(features)) + future_spreads = features['spread'].iloc[future_idx:future_end] + + if len(future_spreads) < 2: + continue + + # Calculate PnL based on direction + if z > 1.0: # Short trade - profit if spread drops + exit_spread = future_spreads.iloc[-1] # Exit at horizon + pnl = (entry_spread - exit_spread) / entry_spread + else: # Long trade - profit if spread rises + exit_spread = future_spreads.iloc[-1] + pnl = (exit_spread - entry_spread) / entry_spread + + # Subtract fees + net_pnl = pnl - FEE_RATE + total_pnl += net_pnl + n_trades += 1 + + return total_pnl, n_trades + + +def test_horizon(features, horizon): + """Test a single horizon with walk-forward training.""" + # Calculate targets + targets, valid_mask, _, _ = calculate_targets(features, horizon) + + # Walk-forward split + n_samples = len(features) + train_size = int(n_samples * TRAIN_RATIO) + + train_features = features.iloc[:train_size] + test_features = features.iloc[train_size:] + + train_targets = targets[:train_size] + test_targets = targets[train_size:] + + train_valid = valid_mask.iloc[:train_size] + test_valid = valid_mask.iloc[train_size:] + + # Prepare training data (only valid rows) + exclude = ['spread'] + cols = [c for c in features.columns if c not in exclude] + + X_train = train_features[cols].fillna(0).replace([np.inf, -np.inf], 0) + X_train_valid = X_train[train_valid] + y_train_valid = train_targets[train_valid] + + if len(X_train_valid) < 50: + return None # Not enough training data + + # Train model + model = RandomForestClassifier( + n_estimators=300, max_depth=5, min_samples_leaf=30, + class_weight={0: 1, 1: 3}, random_state=42 + ) + model.fit(X_train_valid, y_train_valid) + + # Predict on test set + X_test = test_features[cols].fillna(0).replace([np.inf, -np.inf], 0) + predictions = model.predict(X_test) + + # Only evaluate on valid test rows (those with complete future data) + test_valid_mask = test_valid.values + y_test_valid = test_targets[test_valid_mask] + pred_valid = predictions[test_valid_mask] + + if len(y_test_valid) < 10: + return None + + # Calculate metrics + f1 = f1_score(y_test_valid, pred_valid, zero_division=0) + + # Calculate MAE and Net Profit on ALL test predictions (not just valid targets) + test_idx = test_features.index + avg_mae = calculate_mae(features, predictions, test_idx, horizon) + net_pnl, n_trades = calculate_net_profit(features, predictions, test_idx, horizon) + + return { + 'horizon': horizon, + 'f1_score': f1, + 'avg_mae': avg_mae, + 'net_pnl': net_pnl, + 'n_trades': n_trades, + 'train_samples': len(X_train_valid), + 'test_samples': len(X_test) + } + + +def test_horizons(features, horizons): + """Test multiple horizons and return comparison.""" + results = [] + + print("\n" + "=" * 80) + print("WALK-FORWARD HORIZON OPTIMIZATION") + print(f"Train Ratio: {TRAIN_RATIO*100:.0f}% | Profit Target: {PROFIT_THRESHOLD*100:.1f}% | Fee Rate: {FEE_RATE*100:.2f}%") + print("=" * 80) + + for h in horizons: + result = test_horizon(features, h) + if result: + results.append(result) + print(f"Horizon {h:3d}h: F1={result['f1_score']:.3f}, " + f"MAE={result['avg_mae']:.2f}%, " + f"Net PnL={result['net_pnl']*100:.2f}%, " + f"Trades={result['n_trades']}") + + return results + + +def main(): + """Main research function.""" + # Load data + df_btc, df_eth = load_data() + cq_df = load_cryptoquant_data() + + # Calculate features + features = calculate_features(df_btc, df_eth, cq_df) + logger.info(f"Calculated {len(features)} feature rows with {len(features.columns)} columns") + + # Test horizons from 6h to 150h + horizons = list(range(6, 151, 6)) # 6, 12, 18, ..., 150 + + results = test_horizons(features, horizons) + + if not results: + print("No valid results!") + return + + # Find best by different metrics + results_df = pd.DataFrame(results) + + print("\n" + "=" * 80) + print("BEST HORIZONS BY METRIC") + print("=" * 80) + + best_f1 = results_df.loc[results_df['f1_score'].idxmax()] + print(f"Best F1 Score: {best_f1['horizon']:.0f}h (F1={best_f1['f1_score']:.3f})") + + best_pnl = results_df.loc[results_df['net_pnl'].idxmax()] + print(f"Best Net PnL: {best_pnl['horizon']:.0f}h (PnL={best_pnl['net_pnl']*100:.2f}%)") + + lowest_mae = results_df.loc[results_df['avg_mae'].idxmin()] + print(f"Lowest MAE: {lowest_mae['horizon']:.0f}h (MAE={lowest_mae['avg_mae']:.2f}%)") + + # Save results + output_path = "research/horizon_optimization_results.csv" + results_df.to_csv(output_path, index=False) + print(f"\nResults saved to {output_path}") + + return results_df + + +if __name__ == "__main__": + main() diff --git a/strategies/factory.py b/strategies/factory.py index 3c3005e..29c2313 100644 --- a/strategies/factory.py +++ b/strategies/factory.py @@ -80,14 +80,23 @@ def _build_registry() -> dict[str, StrategyConfig]: "regime": StrategyConfig( strategy_class=RegimeReversionStrategy, default_params={ - 'horizon': 96, - 'z_window': 24, - 'stop_loss': 0.06, - 'take_profit': 0.05 + # Optimal from walk-forward research (research/horizon_optimization_results.csv) + 'horizon': 102, # 4.25 days - best Net PnL + 'z_window': 24, # 24h rolling Z-score window + 'z_entry_threshold': 1.0, # Enter when |Z| > 1.0 + 'profit_target': 0.005, # 0.5% target for ML labels + 'stop_loss': 0.06, # 6% stop loss + 'take_profit': 0.05, # 5% take profit + 'train_ratio': 0.7, # 70% train / 30% test + 'trend_window': 0, # Disabled SMA filter + 'use_funding_filter': True, # Enabled Funding filter + 'funding_threshold': 0.005 # 0.005% threshold (Proven profitable) }, grid_params={ - 'horizon': [72, 96, 120], - 'stop_loss': [0.04, 0.06, 0.08] + 'horizon': [84, 96, 102, 108, 120], + 'z_entry_threshold': [0.8, 1.0, 1.2], + 'stop_loss': [0.04, 0.06, 0.08], + 'funding_threshold': [0.005, 0.01, 0.02] } ) } diff --git a/strategies/regime_strategy.py b/strategies/regime_strategy.py index 680f4b4..cea6895 100644 --- a/strategies/regime_strategy.py +++ b/strategies/regime_strategy.py @@ -28,21 +28,39 @@ class RegimeReversionStrategy(BaseStrategy): - Eliminates look-ahead bias for realistic backtest results """ + # Optimal parameters from walk-forward research (2025-10 to 2025-12) + # Research: research/horizon_optimization_results.csv + OPTIMAL_HORIZON = 102 # 4.25 days - best Net PnL (+232%) + OPTIMAL_Z_WINDOW = 24 # 24h rolling window for spread Z-score + OPTIMAL_TRAIN_RATIO = 0.7 # 70% train / 30% test split + OPTIMAL_PROFIT_TARGET = 0.005 # 0.5% profit threshold for target definition + OPTIMAL_Z_ENTRY = 1.0 # Enter when |Z| > 1.0 + def __init__(self, model_path: str = "data/regime_model.pkl", - horizon: int = 96, # 4 Days based on research - z_window: int = 24, - stop_loss: float = 0.06, # 6% to survive 2% avg MAE - take_profit: float = 0.05, # Swing target - train_ratio: float = 0.7 # Walk-forward: train on first 70% + horizon: int = OPTIMAL_HORIZON, + z_window: int = OPTIMAL_Z_WINDOW, + z_entry_threshold: float = OPTIMAL_Z_ENTRY, + profit_target: float = OPTIMAL_PROFIT_TARGET, + stop_loss: float = 0.06, # 6% - accommodates 1.95% avg MAE + take_profit: float = 0.05, # 5% swing target + train_ratio: float = OPTIMAL_TRAIN_RATIO, + trend_window: int = 0, # Disable SMA filter + use_funding_filter: bool = True, # Enable Funding Rate filter + funding_threshold: float = 0.005 # Tightened to 0.005% ): super().__init__() self.model_path = model_path self.horizon = horizon self.z_window = z_window + self.z_entry_threshold = z_entry_threshold + self.profit_target = profit_target self.stop_loss = stop_loss self.take_profit = take_profit self.train_ratio = train_ratio + self.trend_window = trend_window + self.use_funding_filter = use_funding_filter + self.funding_threshold = funding_threshold # Default Strategy Config self.default_market_type = MarketType.PERPETUAL @@ -68,7 +86,8 @@ class RegimeReversionStrategy(BaseStrategy): try: # Load BTC data (Context) - Must match the timeframe of the backtest # Research was done on 1h candles, so strategy should be run on 1h - df_btc = self.dm.load_data("okx", "BTC-USDT", "1h", MarketType.SPOT) + # Use PERPETUAL data to match the trading instrument (ETH Perp) + df_btc = self.dm.load_data("okx", "BTC-USDT", "1h", MarketType.PERPETUAL) # Align BTC to ETH (close) df_btc = df_btc.reindex(close.index, method='ffill') @@ -141,11 +160,76 @@ class RegimeReversionStrategy(BaseStrategy): probs = self.model.predict_proba(X_test)[:, 1] # 8. Generate Entry Signals (TEST period only) - # If Z > 1 (Spread High, ETH Expensive) -> Short ETH - # If Z < -1 (Spread Low, ETH Cheap) -> Long ETH + # If Z > threshold (Spread High, ETH Expensive) -> Short ETH + # If Z < -threshold (Spread Low, ETH Cheap) -> Long ETH + z_thresh = self.z_entry_threshold - short_signal_test = (probs > 0.5) & (test_features['z_score'].values > 1.0) - long_signal_test = (probs > 0.5) & (test_features['z_score'].values < -1.0) + short_signal_test = (probs > 0.5) & (test_features['z_score'].values > z_thresh) + long_signal_test = (probs > 0.5) & (test_features['z_score'].values < -z_thresh) + + # 8b. Apply Trend Filter (Macro Regime) + # Rule: Long only if BTC > SMA (Bull), Short only if BTC < SMA (Bear) + if self.trend_window > 0: + # Calculate SMA on full BTC history first + btc_sma = btc_close.rolling(window=self.trend_window).mean() + + # Align with test period + test_btc_close = btc_close.reindex(test_features.index) + test_btc_sma = btc_sma.reindex(test_features.index) + + # Define Regimes + is_bull = (test_btc_close > test_btc_sma).values + is_bear = (test_btc_close < test_btc_sma).values + + # Apply Filter + long_signal_test = long_signal_test & is_bull + short_signal_test = short_signal_test & is_bear + + # 8c. Apply Funding Rate Filter + # Rule: If Funding > Threshold (Greedy) -> No Longs. + # If Funding < -Threshold (Fearful) -> No Shorts. + if self.use_funding_filter and 'btc_funding' in test_features.columns: + funding = test_features['btc_funding'].values + thresh = self.funding_threshold + + # Greedy Market (High Positive Funding) -> Risk of Long Squeeze -> Block Longs + # (Or implies trend is up? Actually for Mean Reversion, high funding often marks tops) + # We block Longs because we don't want to buy into an overheated market? + # Actually, "Greedy" means Longs are paying Shorts. + # If we Long, we pay funding. + # If we Short, we receive funding. + # So High Funding = Good for Shorts (receive yield + reversion). + # Bad for Longs (pay yield + likely top). + + is_overheated = funding > thresh + is_oversold = funding < -thresh + + # Block Longs if Overheated + long_signal_test = long_signal_test & (~is_overheated) + + # Block Shorts if Oversold (Negative Funding) -> Risk of Short Squeeze + short_signal_test = short_signal_test & (~is_oversold) + + n_blocked_long = (is_overheated & (probs > 0.5) & (test_features['z_score'].values < -z_thresh)).sum() + n_blocked_short = (is_oversold & (probs > 0.5) & (test_features['z_score'].values > z_thresh)).sum() + + if n_blocked_long > 0 or n_blocked_short > 0: + logger.info(f"Funding Filter: Blocked {n_blocked_long} Longs, {n_blocked_short} Shorts") + + # 9. Calculate Position Sizing (Probability-Based) + # Base size = 1.0 (100% of equity) + # Scale: 1.0 + (Prob - 0.5) * 2 + # Example: Prob=0.6 -> Size=1.2, Prob=0.8 -> Size=1.6 + + # Align probabilities to close index + probs_series = pd.Series(0.0, index=test_features.index) + probs_series[:] = probs + probs_aligned = probs_series.reindex(close.index, fill_value=0.0) + + # Calculate dynamic size + dynamic_size = 1.0 + (probs_aligned - 0.5) * 2.0 + # Cap leverage between 1x and 2x + size = dynamic_size.clip(lower=1.0, upper=2.0) # Create full-length signal series (False for training period) long_entries = pd.Series(False, index=close.index) @@ -171,7 +255,7 @@ class RegimeReversionStrategy(BaseStrategy): n_short = short_entries.sum() logger.info(f"Generated {n_long} long signals, {n_short} short signals (test period only)") - return long_entries, long_exits, short_entries, short_exits + return long_entries, long_exits, short_entries, short_exits, size def prepare_features(self, df_btc, df_eth, cq_df=None): """Replicate research feature engineering""" @@ -236,19 +320,20 @@ class RegimeReversionStrategy(BaseStrategy): Args: train_features: DataFrame containing features for training period only """ - threshold = 0.005 + threshold = self.profit_target horizon = self.horizon + z_thresh = self.z_entry_threshold # Define targets using ONLY training data - # For Short Spread (Z > 1): Did spread drop below target within horizon? + # For Short Spread (Z > threshold): Did spread drop below target within horizon? future_min = train_features['spread'].rolling(window=horizon).min().shift(-horizon) target_short = train_features['spread'] * (1 - threshold) - success_short = (train_features['z_score'] > 1.0) & (future_min < target_short) + success_short = (train_features['z_score'] > z_thresh) & (future_min < target_short) - # For Long Spread (Z < -1): Did spread rise above target within horizon? + # For Long Spread (Z < -threshold): Did spread rise above target within horizon? future_max = train_features['spread'].rolling(window=horizon).max().shift(-horizon) target_long = train_features['spread'] * (1 + threshold) - success_long = (train_features['z_score'] < -1.0) & (future_max > target_long) + success_long = (train_features['z_score'] < -z_thresh) & (future_max > target_long) targets = np.select([success_short, success_long], [1, 1], default=0)