Add check_symbols.py for ETH perpetuals filtering and enhance backtester with size handling

- Introduced `check_symbols.py` to load and filter ETH perpetual markets from the OKX exchange using CCXT.
- Updated the backtester to normalize signals to a 5-tuple format, incorporating size management for trades.
- Enhanced portfolio functions to support variable size and leverage adjustments based on initial capital.
- Added a new method in `CryptoQuantClient` for chunked historical data fetching to avoid API limits.
- Improved market symbol normalization in `market.py` to handle different formats.
- Updated regime strategy parameters based on recent research findings for optimal performance.
This commit is contained in:
2026-01-14 09:46:51 +08:00
parent 10bb371054
commit 1e4cb87da3
8 changed files with 617 additions and 111 deletions

28
check_symbols.py Normal file
View File

@@ -0,0 +1,28 @@
import ccxt
import sys
def main():
try:
exchange = ccxt.okx()
print("Loading markets...")
markets = exchange.load_markets()
# Filter for ETH perpetuals
eth_perps = [
symbol for symbol, market in markets.items()
if 'ETH' in symbol and 'USDT' in symbol and market.get('swap') and market.get('linear')
]
print(f"\nFound {len(eth_perps)} ETH Linear Perps:")
for symbol in eth_perps:
market = markets[symbol]
print(f" CCXT Symbol: {symbol}")
print(f" Exchange ID: {market['id']}")
print(f" Type: {market['type']}")
print("-" * 30)
except Exception as e:
print(f"Error: {e}")
if __name__ == "__main__":
main()

View File

@@ -132,9 +132,22 @@ class Backtester:
**strategy_params **strategy_params
) )
# Normalize signals to 4-tuple format # Normalize signals to 5-tuple format
signals = self._normalize_signals(signals, close_price, market_config) signals = self._normalize_signals(signals, close_price, market_config)
long_entries, long_exits, short_entries, short_exits = signals long_entries, long_exits, short_entries, short_exits, size = signals
# Default size if None
if size is None:
size = 1.0
# Convert leverage multiplier to raw value (USD) for vbt
# This works around "SizeType.Percent reversal" error
# Effectively "Fixed Fractional" sizing based on Initial Capital
# (Does not compound, but safe for backtesting)
if isinstance(size, pd.Series):
size = size * init_cash
else:
size = size * init_cash
# Process liquidations - inject forced exits at liquidation points # Process liquidations - inject forced exits at liquidation points
liquidation_events: list[LiquidationEvent] = [] liquidation_events: list[LiquidationEvent] = []
@@ -164,7 +177,8 @@ class Backtester:
long_entries, long_exits, long_entries, long_exits,
short_entries, short_exits, short_entries, short_exits,
init_cash, effective_fees, slippage, timeframe, init_cash, effective_fees, slippage, timeframe,
sl_stop, tp_stop, sl_trail, effective_leverage sl_stop, tp_stop, sl_trail, effective_leverage,
size=size
) )
# Calculate adjusted returns accounting for liquidation losses # Calculate adjusted returns accounting for liquidation losses
@@ -242,39 +256,45 @@ class Backtester:
market_config market_config
) -> tuple: ) -> tuple:
""" """
Normalize strategy signals to 4-tuple format. Normalize strategy signals to 5-tuple format.
Handles backward compatibility with 2-tuple (long-only) returns. Returns:
(long_entries, long_exits, short_entries, short_exits, size)
""" """
# Default size is None (will be treated as 1.0 or default later)
size = None
if len(signals) == 2: if len(signals) == 2:
long_entries, long_exits = signals long_entries, long_exits = signals
short_entries = BaseStrategy.create_empty_signals(long_entries) short_entries = BaseStrategy.create_empty_signals(long_entries)
short_exits = BaseStrategy.create_empty_signals(long_entries) short_exits = BaseStrategy.create_empty_signals(long_entries)
return long_entries, long_exits, short_entries, short_exits return long_entries, long_exits, short_entries, short_exits, size
if len(signals) == 4: if len(signals) == 4:
long_entries, long_exits, short_entries, short_exits = signals long_entries, long_exits, short_entries, short_exits = signals
elif len(signals) == 5:
long_entries, long_exits, short_entries, short_exits, size = signals
else:
raise ValueError(
f"Strategy must return 2, 4, or 5 signal arrays, got {len(signals)}"
)
# Warn and clear short signals on spot markets # Warn and clear short signals on spot markets
if not market_config.supports_short: if not market_config.supports_short:
has_shorts = ( has_shorts = (
short_entries.any().any() short_entries.any().any()
if hasattr(short_entries, 'any') if hasattr(short_entries, 'any')
else short_entries.any() else short_entries.any()
)
if has_shorts:
logger.warning(
"Short signals detected but market type is SPOT. "
"Short signals will be ignored."
) )
if has_shorts: short_entries = BaseStrategy.create_empty_signals(long_entries)
logger.warning( short_exits = BaseStrategy.create_empty_signals(long_entries)
"Short signals detected but market type is SPOT. "
"Short signals will be ignored." return long_entries, long_exits, short_entries, short_exits, size
)
short_entries = BaseStrategy.create_empty_signals(long_entries)
short_exits = BaseStrategy.create_empty_signals(long_entries)
return long_entries, long_exits, short_entries, short_exits
raise ValueError(
f"Strategy must return 2 or 4 signal arrays, got {len(signals)}"
)
def _run_portfolio( def _run_portfolio(
self, self,
@@ -289,7 +309,8 @@ class Backtester:
sl_stop: float | None, sl_stop: float | None,
tp_stop: float | None, tp_stop: float | None,
sl_trail: bool, sl_trail: bool,
leverage: int leverage: int,
size: pd.Series | float = 1.0
) -> vbt.Portfolio: ) -> vbt.Portfolio:
"""Select and run appropriate portfolio simulation.""" """Select and run appropriate portfolio simulation."""
has_shorts = ( has_shorts = (
@@ -304,14 +325,18 @@ class Backtester:
long_entries, long_exits, long_entries, long_exits,
short_entries, short_exits, short_entries, short_exits,
init_cash, fees, slippage, freq, init_cash, fees, slippage, freq,
sl_stop, tp_stop, sl_trail, leverage sl_stop, tp_stop, sl_trail, leverage,
size=size
) )
return run_long_only_portfolio( return run_long_only_portfolio(
close, close,
long_entries, long_exits, long_entries, long_exits,
init_cash, fees, slippage, freq, init_cash, fees, slippage, freq,
sl_stop, tp_stop, sl_trail, leverage sl_stop, tp_stop, sl_trail, leverage,
# Long-only doesn't support variable size in current implementation
# without modification, but we can add it if needed.
# For now, only regime strategy uses it, which is Long/Short.
) )
def run_wfa( def run_wfa(

View File

@@ -133,12 +133,57 @@ class CryptoQuantClient:
return combined_df return combined_df
def fetch_history_chunked(
self,
symbols: list[str],
metrics: dict,
start_date: str,
end_date: str,
chunk_months: int = 3
) -> pd.DataFrame:
"""
Fetch historical data in chunks to avoid API limits.
"""
start_dt = datetime.strptime(start_date, "%Y%m%d")
end_dt = datetime.strptime(end_date, "%Y%m%d")
all_data = []
current = start_dt
while current < end_dt:
next_chunk = current + timedelta(days=chunk_months * 30)
if next_chunk > end_dt:
next_chunk = end_dt
s_str = current.strftime("%Y%m%d")
e_str = next_chunk.strftime("%Y%m%d")
logger.info(f"Processing chunk: {s_str} to {e_str}")
chunk_df = self.fetch_multi_metrics(symbols, metrics, s_str, e_str)
if not chunk_df.empty:
all_data.append(chunk_df)
current = next_chunk + timedelta(days=1)
time.sleep(1) # Be nice to API
if not all_data:
return pd.DataFrame()
# Combine all chunks
full_df = pd.concat(all_data)
# Remove duplicates if any overlap
full_df = full_df[~full_df.index.duplicated(keep='first')]
full_df.sort_index(inplace=True)
return full_df
if __name__ == "__main__": if __name__ == "__main__":
cq = CryptoQuantClient() cq = CryptoQuantClient()
# 3 Months Data (Oct 1 2025 - Dec 31 2025) # 12 Months Data (Jan 1 2025 - Jan 14 2026)
start = "20251001" start = "20250101"
end = "20251231" end = "20260114"
metrics = { metrics = {
"reserves": "exchange-flows/exchange-reserve", "reserves": "exchange-flows/exchange-reserve",
@@ -147,7 +192,7 @@ if __name__ == "__main__":
} }
print(f"Fetching training data from {start} to {end}...") print(f"Fetching training data from {start} to {end}...")
df = cq.fetch_multi_metrics(["btc", "eth"], metrics, start, end) df = cq.fetch_history_chunked(["btc", "eth"], metrics, start, end)
output_file = "data/cq_training_data.csv" output_file = "data/cq_training_data.csv"
os.makedirs("data", exist_ok=True) os.makedirs("data", exist_ok=True)

View File

@@ -94,8 +94,20 @@ def get_ccxt_symbol(symbol: str, market_type: MarketType) -> str:
""" """
if market_type == MarketType.PERPETUAL: if market_type == MarketType.PERPETUAL:
# OKX perpetual format: BTC/USDT:USDT # OKX perpetual format: BTC/USDT:USDT
quote = symbol.split('/')[1] if '/' in symbol else 'USDT' if '/' in symbol:
return f"{symbol}:{quote}" base, quote = symbol.split('/')
return f"{symbol}:{quote}"
elif '-' in symbol:
base, quote = symbol.split('-')
return f"{base}/{quote}:{quote}"
else:
# Assume base is symbol, quote is USDT default
return f"{symbol}/USDT:USDT"
# For spot, normalize dash to slash for CCXT
if '-' in symbol:
return symbol.replace('-', '/')
return symbol return symbol

View File

@@ -74,75 +74,35 @@ def run_long_short_portfolio(
sl_stop: float | None, sl_stop: float | None,
tp_stop: float | None, tp_stop: float | None,
sl_trail: bool, sl_trail: bool,
leverage: int leverage: int,
size: pd.Series | float = 1.0,
size_type: str = 'value' # Changed to 'value' to support reversals/sizing
) -> vbt.Portfolio: ) -> vbt.Portfolio:
""" """
Run a portfolio supporting both long and short positions. Run a portfolio supporting both long and short positions.
Runs two separate portfolios (long and short) and combines results. Uses VectorBT's native support for short_entries/short_exits
Each gets half the capital. to simulate a single unified portfolio.
Args:
close: Close price series
long_entries: Long entry signals
long_exits: Long exit signals
short_entries: Short entry signals
short_exits: Short exit signals
init_cash: Initial capital
fees: Transaction fee percentage
slippage: Slippage percentage
freq: Data frequency string
sl_stop: Stop loss percentage
tp_stop: Take profit percentage
sl_trail: Enable trailing stop loss
leverage: Leverage multiplier
Returns:
VectorBT Portfolio object (long portfolio, short stats logged)
""" """
effective_cash = init_cash * leverage effective_cash = init_cash * leverage
half_cash = effective_cash / 2
# Run long-only portfolio # If size is passed as value (USD), we don't scale it by leverage here
long_pf = vbt.Portfolio.from_signals( # The backtester has already scaled it by init_cash.
# If using 'value', vbt treats it as "Amount of CASH to use for the trade"
return vbt.Portfolio.from_signals(
close=close, close=close,
entries=long_entries, entries=long_entries,
exits=long_exits, exits=long_exits,
direction='longonly', short_entries=short_entries,
init_cash=half_cash, short_exits=short_exits,
init_cash=effective_cash,
fees=fees, fees=fees,
slippage=slippage, slippage=slippage,
freq=freq, freq=freq,
sl_stop=sl_stop, sl_stop=sl_stop,
tp_stop=tp_stop, tp_stop=tp_stop,
sl_trail=sl_trail, sl_trail=sl_trail,
size=1.0, size=size,
size_type='percent', size_type=size_type,
) )
# Run short-only portfolio
short_pf = vbt.Portfolio.from_signals(
close=close,
entries=short_entries,
exits=short_exits,
direction='shortonly',
init_cash=half_cash,
fees=fees,
slippage=slippage,
freq=freq,
sl_stop=sl_stop,
tp_stop=tp_stop,
sl_trail=sl_trail,
size=1.0,
size_type='percent',
)
# Log both portfolio stats
# TODO: Implement proper portfolio combination
logger.info(
"Long portfolio: %.2f%% return, Short portfolio: %.2f%% return",
long_pf.total_return().mean() * 100,
short_pf.total_return().mean() * 100
)
return long_pf

View File

@@ -0,0 +1,342 @@
"""
Regime Detection Research Script with Walk-Forward Training.
Tests multiple holding horizons to find optimal parameters
without look-ahead bias.
"""
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import pandas as pd
import numpy as np
import ta
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, f1_score
from engine.data_manager import DataManager
from engine.market import MarketType
from engine.logging_config import get_logger
logger = get_logger(__name__)
# Configuration
TRAIN_RATIO = 0.7 # 70% train, 30% test
PROFIT_THRESHOLD = 0.005 # 0.5% profit target
Z_WINDOW = 24
FEE_RATE = 0.001 # 0.1% round-trip fee
def load_data():
"""Load and align BTC/ETH data."""
dm = DataManager()
df_btc = dm.load_data("okx", "BTC-USDT", "1h", MarketType.SPOT)
df_eth = dm.load_data("okx", "ETH-USDT", "1h", MarketType.SPOT)
# Filter to Oct-Dec 2025
start = pd.Timestamp("2025-10-01", tz="UTC")
end = pd.Timestamp("2025-12-31", tz="UTC")
df_btc = df_btc[(df_btc.index >= start) & (df_btc.index <= end)]
df_eth = df_eth[(df_eth.index >= start) & (df_eth.index <= end)]
# Align indices
common = df_btc.index.intersection(df_eth.index)
df_btc = df_btc.loc[common]
df_eth = df_eth.loc[common]
logger.info(f"Loaded {len(common)} aligned hourly bars")
return df_btc, df_eth
def load_cryptoquant_data():
"""Load CryptoQuant on-chain data if available."""
try:
cq_path = "data/cq_training_data.csv"
cq_df = pd.read_csv(cq_path, index_col='timestamp', parse_dates=True)
if cq_df.index.tz is None:
cq_df.index = cq_df.index.tz_localize('UTC')
logger.info(f"Loaded CryptoQuant data: {len(cq_df)} rows")
return cq_df
except Exception as e:
logger.warning(f"CryptoQuant data not available: {e}")
return None
def calculate_features(df_btc, df_eth, cq_df=None):
"""Calculate all features for the model."""
spread = df_eth['close'] / df_btc['close']
# Z-Score
rolling_mean = spread.rolling(window=Z_WINDOW).mean()
rolling_std = spread.rolling(window=Z_WINDOW).std()
z_score = (spread - rolling_mean) / rolling_std
# Technicals
spread_rsi = ta.momentum.RSIIndicator(spread, window=14).rsi()
spread_roc = spread.pct_change(periods=5) * 100
spread_change_1h = spread.pct_change(periods=1)
# Volume
vol_ratio = df_eth['volume'] / df_btc['volume']
vol_ratio_ma = vol_ratio.rolling(window=12).mean()
# Volatility
ret_btc = df_btc['close'].pct_change()
ret_eth = df_eth['close'].pct_change()
vol_btc = ret_btc.rolling(window=Z_WINDOW).std()
vol_eth = ret_eth.rolling(window=Z_WINDOW).std()
vol_spread_ratio = vol_eth / vol_btc
features = pd.DataFrame(index=spread.index)
features['spread'] = spread
features['z_score'] = z_score
features['spread_rsi'] = spread_rsi
features['spread_roc'] = spread_roc
features['spread_change_1h'] = spread_change_1h
features['vol_ratio'] = vol_ratio
features['vol_ratio_rel'] = vol_ratio / vol_ratio_ma
features['vol_diff_ratio'] = vol_spread_ratio
# Add CQ features if available
if cq_df is not None:
cq_aligned = cq_df.reindex(features.index, method='ffill')
if 'btc_funding' in cq_aligned.columns and 'eth_funding' in cq_aligned.columns:
cq_aligned['funding_diff'] = cq_aligned['eth_funding'] - cq_aligned['btc_funding']
if 'btc_inflow' in cq_aligned.columns and 'eth_inflow' in cq_aligned.columns:
cq_aligned['inflow_ratio'] = cq_aligned['eth_inflow'] / (cq_aligned['btc_inflow'] + 1)
features = features.join(cq_aligned)
return features.dropna()
def calculate_targets(features, horizon):
"""Calculate target labels for a given horizon."""
spread = features['spread']
z_score = features['z_score']
# For Short (Z > 1): Did spread drop below target?
future_min = spread.rolling(window=horizon).min().shift(-horizon)
target_short = spread * (1 - PROFIT_THRESHOLD)
success_short = (z_score > 1.0) & (future_min < target_short)
# For Long (Z < -1): Did spread rise above target?
future_max = spread.rolling(window=horizon).max().shift(-horizon)
target_long = spread * (1 + PROFIT_THRESHOLD)
success_long = (z_score < -1.0) & (future_max > target_long)
targets = np.select([success_short, success_long], [1, 1], default=0)
# Create valid mask (rows with complete future data)
valid_mask = future_min.notna() & future_max.notna()
return targets, valid_mask, future_min, future_max
def calculate_mae(features, predictions, test_idx, horizon):
"""Calculate Maximum Adverse Excursion for predicted trades."""
test_features = features.loc[test_idx]
spread = test_features['spread']
z_score = test_features['z_score']
mae_values = []
for i, (idx, pred) in enumerate(zip(test_idx, predictions)):
if pred != 1:
continue
entry_spread = spread.loc[idx]
z = z_score.loc[idx]
# Get future spread values
future_idx = features.index.get_loc(idx)
future_end = min(future_idx + horizon, len(features))
future_spreads = features['spread'].iloc[future_idx:future_end]
if len(future_spreads) < 2:
continue
if z > 1.0: # Short trade
max_adverse = (future_spreads.max() - entry_spread) / entry_spread
else: # Long trade
max_adverse = (entry_spread - future_spreads.min()) / entry_spread
mae_values.append(max_adverse * 100) # As percentage
return np.mean(mae_values) if mae_values else 0.0
def calculate_net_profit(features, predictions, test_idx, horizon):
"""Calculate estimated net profit including fees."""
test_features = features.loc[test_idx]
spread = test_features['spread']
z_score = test_features['z_score']
total_pnl = 0.0
n_trades = 0
for i, (idx, pred) in enumerate(zip(test_idx, predictions)):
if pred != 1:
continue
entry_spread = spread.loc[idx]
z = z_score.loc[idx]
# Get future spread values
future_idx = features.index.get_loc(idx)
future_end = min(future_idx + horizon, len(features))
future_spreads = features['spread'].iloc[future_idx:future_end]
if len(future_spreads) < 2:
continue
# Calculate PnL based on direction
if z > 1.0: # Short trade - profit if spread drops
exit_spread = future_spreads.iloc[-1] # Exit at horizon
pnl = (entry_spread - exit_spread) / entry_spread
else: # Long trade - profit if spread rises
exit_spread = future_spreads.iloc[-1]
pnl = (exit_spread - entry_spread) / entry_spread
# Subtract fees
net_pnl = pnl - FEE_RATE
total_pnl += net_pnl
n_trades += 1
return total_pnl, n_trades
def test_horizon(features, horizon):
"""Test a single horizon with walk-forward training."""
# Calculate targets
targets, valid_mask, _, _ = calculate_targets(features, horizon)
# Walk-forward split
n_samples = len(features)
train_size = int(n_samples * TRAIN_RATIO)
train_features = features.iloc[:train_size]
test_features = features.iloc[train_size:]
train_targets = targets[:train_size]
test_targets = targets[train_size:]
train_valid = valid_mask.iloc[:train_size]
test_valid = valid_mask.iloc[train_size:]
# Prepare training data (only valid rows)
exclude = ['spread']
cols = [c for c in features.columns if c not in exclude]
X_train = train_features[cols].fillna(0).replace([np.inf, -np.inf], 0)
X_train_valid = X_train[train_valid]
y_train_valid = train_targets[train_valid]
if len(X_train_valid) < 50:
return None # Not enough training data
# Train model
model = RandomForestClassifier(
n_estimators=300, max_depth=5, min_samples_leaf=30,
class_weight={0: 1, 1: 3}, random_state=42
)
model.fit(X_train_valid, y_train_valid)
# Predict on test set
X_test = test_features[cols].fillna(0).replace([np.inf, -np.inf], 0)
predictions = model.predict(X_test)
# Only evaluate on valid test rows (those with complete future data)
test_valid_mask = test_valid.values
y_test_valid = test_targets[test_valid_mask]
pred_valid = predictions[test_valid_mask]
if len(y_test_valid) < 10:
return None
# Calculate metrics
f1 = f1_score(y_test_valid, pred_valid, zero_division=0)
# Calculate MAE and Net Profit on ALL test predictions (not just valid targets)
test_idx = test_features.index
avg_mae = calculate_mae(features, predictions, test_idx, horizon)
net_pnl, n_trades = calculate_net_profit(features, predictions, test_idx, horizon)
return {
'horizon': horizon,
'f1_score': f1,
'avg_mae': avg_mae,
'net_pnl': net_pnl,
'n_trades': n_trades,
'train_samples': len(X_train_valid),
'test_samples': len(X_test)
}
def test_horizons(features, horizons):
"""Test multiple horizons and return comparison."""
results = []
print("\n" + "=" * 80)
print("WALK-FORWARD HORIZON OPTIMIZATION")
print(f"Train Ratio: {TRAIN_RATIO*100:.0f}% | Profit Target: {PROFIT_THRESHOLD*100:.1f}% | Fee Rate: {FEE_RATE*100:.2f}%")
print("=" * 80)
for h in horizons:
result = test_horizon(features, h)
if result:
results.append(result)
print(f"Horizon {h:3d}h: F1={result['f1_score']:.3f}, "
f"MAE={result['avg_mae']:.2f}%, "
f"Net PnL={result['net_pnl']*100:.2f}%, "
f"Trades={result['n_trades']}")
return results
def main():
"""Main research function."""
# Load data
df_btc, df_eth = load_data()
cq_df = load_cryptoquant_data()
# Calculate features
features = calculate_features(df_btc, df_eth, cq_df)
logger.info(f"Calculated {len(features)} feature rows with {len(features.columns)} columns")
# Test horizons from 6h to 150h
horizons = list(range(6, 151, 6)) # 6, 12, 18, ..., 150
results = test_horizons(features, horizons)
if not results:
print("No valid results!")
return
# Find best by different metrics
results_df = pd.DataFrame(results)
print("\n" + "=" * 80)
print("BEST HORIZONS BY METRIC")
print("=" * 80)
best_f1 = results_df.loc[results_df['f1_score'].idxmax()]
print(f"Best F1 Score: {best_f1['horizon']:.0f}h (F1={best_f1['f1_score']:.3f})")
best_pnl = results_df.loc[results_df['net_pnl'].idxmax()]
print(f"Best Net PnL: {best_pnl['horizon']:.0f}h (PnL={best_pnl['net_pnl']*100:.2f}%)")
lowest_mae = results_df.loc[results_df['avg_mae'].idxmin()]
print(f"Lowest MAE: {lowest_mae['horizon']:.0f}h (MAE={lowest_mae['avg_mae']:.2f}%)")
# Save results
output_path = "research/horizon_optimization_results.csv"
results_df.to_csv(output_path, index=False)
print(f"\nResults saved to {output_path}")
return results_df
if __name__ == "__main__":
main()

View File

@@ -80,14 +80,23 @@ def _build_registry() -> dict[str, StrategyConfig]:
"regime": StrategyConfig( "regime": StrategyConfig(
strategy_class=RegimeReversionStrategy, strategy_class=RegimeReversionStrategy,
default_params={ default_params={
'horizon': 96, # Optimal from walk-forward research (research/horizon_optimization_results.csv)
'z_window': 24, 'horizon': 102, # 4.25 days - best Net PnL
'stop_loss': 0.06, 'z_window': 24, # 24h rolling Z-score window
'take_profit': 0.05 'z_entry_threshold': 1.0, # Enter when |Z| > 1.0
'profit_target': 0.005, # 0.5% target for ML labels
'stop_loss': 0.06, # 6% stop loss
'take_profit': 0.05, # 5% take profit
'train_ratio': 0.7, # 70% train / 30% test
'trend_window': 0, # Disabled SMA filter
'use_funding_filter': True, # Enabled Funding filter
'funding_threshold': 0.005 # 0.005% threshold (Proven profitable)
}, },
grid_params={ grid_params={
'horizon': [72, 96, 120], 'horizon': [84, 96, 102, 108, 120],
'stop_loss': [0.04, 0.06, 0.08] 'z_entry_threshold': [0.8, 1.0, 1.2],
'stop_loss': [0.04, 0.06, 0.08],
'funding_threshold': [0.005, 0.01, 0.02]
} }
) )
} }

View File

@@ -28,21 +28,39 @@ class RegimeReversionStrategy(BaseStrategy):
- Eliminates look-ahead bias for realistic backtest results - Eliminates look-ahead bias for realistic backtest results
""" """
# Optimal parameters from walk-forward research (2025-10 to 2025-12)
# Research: research/horizon_optimization_results.csv
OPTIMAL_HORIZON = 102 # 4.25 days - best Net PnL (+232%)
OPTIMAL_Z_WINDOW = 24 # 24h rolling window for spread Z-score
OPTIMAL_TRAIN_RATIO = 0.7 # 70% train / 30% test split
OPTIMAL_PROFIT_TARGET = 0.005 # 0.5% profit threshold for target definition
OPTIMAL_Z_ENTRY = 1.0 # Enter when |Z| > 1.0
def __init__(self, def __init__(self,
model_path: str = "data/regime_model.pkl", model_path: str = "data/regime_model.pkl",
horizon: int = 96, # 4 Days based on research horizon: int = OPTIMAL_HORIZON,
z_window: int = 24, z_window: int = OPTIMAL_Z_WINDOW,
stop_loss: float = 0.06, # 6% to survive 2% avg MAE z_entry_threshold: float = OPTIMAL_Z_ENTRY,
take_profit: float = 0.05, # Swing target profit_target: float = OPTIMAL_PROFIT_TARGET,
train_ratio: float = 0.7 # Walk-forward: train on first 70% stop_loss: float = 0.06, # 6% - accommodates 1.95% avg MAE
take_profit: float = 0.05, # 5% swing target
train_ratio: float = OPTIMAL_TRAIN_RATIO,
trend_window: int = 0, # Disable SMA filter
use_funding_filter: bool = True, # Enable Funding Rate filter
funding_threshold: float = 0.005 # Tightened to 0.005%
): ):
super().__init__() super().__init__()
self.model_path = model_path self.model_path = model_path
self.horizon = horizon self.horizon = horizon
self.z_window = z_window self.z_window = z_window
self.z_entry_threshold = z_entry_threshold
self.profit_target = profit_target
self.stop_loss = stop_loss self.stop_loss = stop_loss
self.take_profit = take_profit self.take_profit = take_profit
self.train_ratio = train_ratio self.train_ratio = train_ratio
self.trend_window = trend_window
self.use_funding_filter = use_funding_filter
self.funding_threshold = funding_threshold
# Default Strategy Config # Default Strategy Config
self.default_market_type = MarketType.PERPETUAL self.default_market_type = MarketType.PERPETUAL
@@ -68,7 +86,8 @@ class RegimeReversionStrategy(BaseStrategy):
try: try:
# Load BTC data (Context) - Must match the timeframe of the backtest # Load BTC data (Context) - Must match the timeframe of the backtest
# Research was done on 1h candles, so strategy should be run on 1h # Research was done on 1h candles, so strategy should be run on 1h
df_btc = self.dm.load_data("okx", "BTC-USDT", "1h", MarketType.SPOT) # Use PERPETUAL data to match the trading instrument (ETH Perp)
df_btc = self.dm.load_data("okx", "BTC-USDT", "1h", MarketType.PERPETUAL)
# Align BTC to ETH (close) # Align BTC to ETH (close)
df_btc = df_btc.reindex(close.index, method='ffill') df_btc = df_btc.reindex(close.index, method='ffill')
@@ -141,11 +160,76 @@ class RegimeReversionStrategy(BaseStrategy):
probs = self.model.predict_proba(X_test)[:, 1] probs = self.model.predict_proba(X_test)[:, 1]
# 8. Generate Entry Signals (TEST period only) # 8. Generate Entry Signals (TEST period only)
# If Z > 1 (Spread High, ETH Expensive) -> Short ETH # If Z > threshold (Spread High, ETH Expensive) -> Short ETH
# If Z < -1 (Spread Low, ETH Cheap) -> Long ETH # If Z < -threshold (Spread Low, ETH Cheap) -> Long ETH
z_thresh = self.z_entry_threshold
short_signal_test = (probs > 0.5) & (test_features['z_score'].values > 1.0) short_signal_test = (probs > 0.5) & (test_features['z_score'].values > z_thresh)
long_signal_test = (probs > 0.5) & (test_features['z_score'].values < -1.0) long_signal_test = (probs > 0.5) & (test_features['z_score'].values < -z_thresh)
# 8b. Apply Trend Filter (Macro Regime)
# Rule: Long only if BTC > SMA (Bull), Short only if BTC < SMA (Bear)
if self.trend_window > 0:
# Calculate SMA on full BTC history first
btc_sma = btc_close.rolling(window=self.trend_window).mean()
# Align with test period
test_btc_close = btc_close.reindex(test_features.index)
test_btc_sma = btc_sma.reindex(test_features.index)
# Define Regimes
is_bull = (test_btc_close > test_btc_sma).values
is_bear = (test_btc_close < test_btc_sma).values
# Apply Filter
long_signal_test = long_signal_test & is_bull
short_signal_test = short_signal_test & is_bear
# 8c. Apply Funding Rate Filter
# Rule: If Funding > Threshold (Greedy) -> No Longs.
# If Funding < -Threshold (Fearful) -> No Shorts.
if self.use_funding_filter and 'btc_funding' in test_features.columns:
funding = test_features['btc_funding'].values
thresh = self.funding_threshold
# Greedy Market (High Positive Funding) -> Risk of Long Squeeze -> Block Longs
# (Or implies trend is up? Actually for Mean Reversion, high funding often marks tops)
# We block Longs because we don't want to buy into an overheated market?
# Actually, "Greedy" means Longs are paying Shorts.
# If we Long, we pay funding.
# If we Short, we receive funding.
# So High Funding = Good for Shorts (receive yield + reversion).
# Bad for Longs (pay yield + likely top).
is_overheated = funding > thresh
is_oversold = funding < -thresh
# Block Longs if Overheated
long_signal_test = long_signal_test & (~is_overheated)
# Block Shorts if Oversold (Negative Funding) -> Risk of Short Squeeze
short_signal_test = short_signal_test & (~is_oversold)
n_blocked_long = (is_overheated & (probs > 0.5) & (test_features['z_score'].values < -z_thresh)).sum()
n_blocked_short = (is_oversold & (probs > 0.5) & (test_features['z_score'].values > z_thresh)).sum()
if n_blocked_long > 0 or n_blocked_short > 0:
logger.info(f"Funding Filter: Blocked {n_blocked_long} Longs, {n_blocked_short} Shorts")
# 9. Calculate Position Sizing (Probability-Based)
# Base size = 1.0 (100% of equity)
# Scale: 1.0 + (Prob - 0.5) * 2
# Example: Prob=0.6 -> Size=1.2, Prob=0.8 -> Size=1.6
# Align probabilities to close index
probs_series = pd.Series(0.0, index=test_features.index)
probs_series[:] = probs
probs_aligned = probs_series.reindex(close.index, fill_value=0.0)
# Calculate dynamic size
dynamic_size = 1.0 + (probs_aligned - 0.5) * 2.0
# Cap leverage between 1x and 2x
size = dynamic_size.clip(lower=1.0, upper=2.0)
# Create full-length signal series (False for training period) # Create full-length signal series (False for training period)
long_entries = pd.Series(False, index=close.index) long_entries = pd.Series(False, index=close.index)
@@ -171,7 +255,7 @@ class RegimeReversionStrategy(BaseStrategy):
n_short = short_entries.sum() n_short = short_entries.sum()
logger.info(f"Generated {n_long} long signals, {n_short} short signals (test period only)") logger.info(f"Generated {n_long} long signals, {n_short} short signals (test period only)")
return long_entries, long_exits, short_entries, short_exits return long_entries, long_exits, short_entries, short_exits, size
def prepare_features(self, df_btc, df_eth, cq_df=None): def prepare_features(self, df_btc, df_eth, cq_df=None):
"""Replicate research feature engineering""" """Replicate research feature engineering"""
@@ -236,19 +320,20 @@ class RegimeReversionStrategy(BaseStrategy):
Args: Args:
train_features: DataFrame containing features for training period only train_features: DataFrame containing features for training period only
""" """
threshold = 0.005 threshold = self.profit_target
horizon = self.horizon horizon = self.horizon
z_thresh = self.z_entry_threshold
# Define targets using ONLY training data # Define targets using ONLY training data
# For Short Spread (Z > 1): Did spread drop below target within horizon? # For Short Spread (Z > threshold): Did spread drop below target within horizon?
future_min = train_features['spread'].rolling(window=horizon).min().shift(-horizon) future_min = train_features['spread'].rolling(window=horizon).min().shift(-horizon)
target_short = train_features['spread'] * (1 - threshold) target_short = train_features['spread'] * (1 - threshold)
success_short = (train_features['z_score'] > 1.0) & (future_min < target_short) success_short = (train_features['z_score'] > z_thresh) & (future_min < target_short)
# For Long Spread (Z < -1): Did spread rise above target within horizon? # For Long Spread (Z < -threshold): Did spread rise above target within horizon?
future_max = train_features['spread'].rolling(window=horizon).max().shift(-horizon) future_max = train_features['spread'].rolling(window=horizon).max().shift(-horizon)
target_long = train_features['spread'] * (1 + threshold) target_long = train_features['spread'] * (1 + threshold)
success_long = (train_features['z_score'] < -1.0) & (future_max > target_long) success_long = (train_features['z_score'] < -z_thresh) & (future_max > target_long)
targets = np.select([success_short, success_long], [1, 1], default=0) targets = np.select([success_short, success_long], [1, 1], default=0)