feat: Multi-Pair Divergence Selection Strategy
- Extend regime detection to top 10 cryptocurrencies (45 pairs) - Dynamic pair selection based on divergence score (|z_score| * probability) - Universal ML model trained on all pairs - Correlation-based filtering to avoid redundant positions - Funding rate integration from OKX for all 10 assets - ATR-based dynamic stop-loss and take-profit - Walk-forward training with 70/30 split Performance: +35.69% return (vs +28.66% baseline), 63.6% win rate
This commit is contained in:
311
strategies/multi_pair/divergence_scorer.py
Normal file
311
strategies/multi_pair/divergence_scorer.py
Normal file
@@ -0,0 +1,311 @@
|
||||
"""
|
||||
Divergence Scorer for Multi-Pair Strategy.
|
||||
|
||||
Ranks pairs by divergence score and selects the best candidate.
|
||||
"""
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from sklearn.ensemble import RandomForestClassifier
|
||||
import pickle
|
||||
from pathlib import Path
|
||||
|
||||
from engine.logging_config import get_logger
|
||||
from .config import MultiPairConfig
|
||||
from .pair_scanner import TradingPair
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class DivergenceSignal:
|
||||
"""
|
||||
Signal for a divergent pair.
|
||||
|
||||
Attributes:
|
||||
pair: Trading pair
|
||||
z_score: Current Z-Score of the spread
|
||||
probability: ML model probability of profitable reversion
|
||||
divergence_score: Combined score (|z_score| * probability)
|
||||
direction: 'long' or 'short' (relative to base asset)
|
||||
base_price: Current price of base asset
|
||||
quote_price: Current price of quote asset
|
||||
atr: Average True Range in price units
|
||||
atr_pct: ATR as percentage of price
|
||||
"""
|
||||
pair: TradingPair
|
||||
z_score: float
|
||||
probability: float
|
||||
divergence_score: float
|
||||
direction: str
|
||||
base_price: float
|
||||
quote_price: float
|
||||
atr: float
|
||||
atr_pct: float
|
||||
timestamp: pd.Timestamp
|
||||
|
||||
|
||||
class DivergenceScorer:
|
||||
"""
|
||||
Scores and ranks pairs by divergence potential.
|
||||
|
||||
Uses ML model predictions combined with Z-Score magnitude
|
||||
to identify the most promising mean-reversion opportunity.
|
||||
"""
|
||||
|
||||
def __init__(self, config: MultiPairConfig, model_path: str = "data/multi_pair_model.pkl"):
|
||||
self.config = config
|
||||
self.model_path = Path(model_path)
|
||||
self.model: RandomForestClassifier | None = None
|
||||
self.feature_cols: list[str] | None = None
|
||||
self._load_model()
|
||||
|
||||
def _load_model(self) -> None:
|
||||
"""Load pre-trained model if available."""
|
||||
if self.model_path.exists():
|
||||
try:
|
||||
with open(self.model_path, 'rb') as f:
|
||||
saved = pickle.load(f)
|
||||
self.model = saved['model']
|
||||
self.feature_cols = saved['feature_cols']
|
||||
logger.info("Loaded model from %s", self.model_path)
|
||||
except Exception as e:
|
||||
logger.warning("Could not load model: %s", e)
|
||||
|
||||
def save_model(self) -> None:
|
||||
"""Save trained model."""
|
||||
if self.model is None:
|
||||
return
|
||||
|
||||
self.model_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(self.model_path, 'wb') as f:
|
||||
pickle.dump({
|
||||
'model': self.model,
|
||||
'feature_cols': self.feature_cols,
|
||||
}, f)
|
||||
logger.info("Saved model to %s", self.model_path)
|
||||
|
||||
def train_model(
|
||||
self,
|
||||
combined_features: pd.DataFrame,
|
||||
pair_features: dict[str, pd.DataFrame]
|
||||
) -> None:
|
||||
"""
|
||||
Train universal model on all pairs.
|
||||
|
||||
Args:
|
||||
combined_features: Combined feature DataFrame from all pairs
|
||||
pair_features: Individual pair feature DataFrames (for target calculation)
|
||||
"""
|
||||
logger.info("Training universal model on %d samples...", len(combined_features))
|
||||
|
||||
z_thresh = self.config.z_entry_threshold
|
||||
horizon = self.config.horizon
|
||||
profit_target = self.config.profit_target
|
||||
|
||||
# Calculate targets for each pair
|
||||
all_targets = []
|
||||
all_features = []
|
||||
|
||||
for pair_id, features in pair_features.items():
|
||||
if len(features) < horizon + 50:
|
||||
continue
|
||||
|
||||
spread = features['spread']
|
||||
z_score = features['z_score']
|
||||
|
||||
# Future price movements
|
||||
future_min = spread.rolling(window=horizon).min().shift(-horizon)
|
||||
future_max = spread.rolling(window=horizon).max().shift(-horizon)
|
||||
|
||||
# Target labels
|
||||
target_short = spread * (1 - profit_target)
|
||||
target_long = spread * (1 + profit_target)
|
||||
|
||||
success_short = (z_score > z_thresh) & (future_min < target_short)
|
||||
success_long = (z_score < -z_thresh) & (future_max > target_long)
|
||||
|
||||
targets = np.select([success_short, success_long], [1, 1], default=0)
|
||||
|
||||
# Valid mask (exclude rows without complete future data)
|
||||
valid_mask = future_min.notna() & future_max.notna()
|
||||
|
||||
# Collect valid samples
|
||||
valid_features = features[valid_mask]
|
||||
valid_targets = targets[valid_mask.values]
|
||||
|
||||
if len(valid_features) > 0:
|
||||
all_features.append(valid_features)
|
||||
all_targets.extend(valid_targets)
|
||||
|
||||
if not all_features:
|
||||
logger.warning("No valid training samples")
|
||||
return
|
||||
|
||||
# Combine all training data
|
||||
X_df = pd.concat(all_features, ignore_index=True)
|
||||
y = np.array(all_targets)
|
||||
|
||||
# Get feature columns
|
||||
exclude_cols = [
|
||||
'pair_id', 'base_asset', 'quote_asset',
|
||||
'spread', 'base_close', 'quote_close', 'base_volume'
|
||||
]
|
||||
self.feature_cols = [c for c in X_df.columns if c not in exclude_cols]
|
||||
|
||||
# Prepare features
|
||||
X = X_df[self.feature_cols].fillna(0)
|
||||
X = X.replace([np.inf, -np.inf], 0)
|
||||
|
||||
# Train model
|
||||
self.model = RandomForestClassifier(
|
||||
n_estimators=300,
|
||||
max_depth=5,
|
||||
min_samples_leaf=30,
|
||||
class_weight={0: 1, 1: 3},
|
||||
random_state=42
|
||||
)
|
||||
self.model.fit(X, y)
|
||||
|
||||
logger.info(
|
||||
"Model trained on %d samples, %d features, %.1f%% positive class",
|
||||
len(X), len(self.feature_cols), y.mean() * 100
|
||||
)
|
||||
self.save_model()
|
||||
|
||||
def score_pairs(
|
||||
self,
|
||||
pair_features: dict[str, pd.DataFrame],
|
||||
pairs: list[TradingPair],
|
||||
timestamp: pd.Timestamp | None = None
|
||||
) -> list[DivergenceSignal]:
|
||||
"""
|
||||
Score all pairs and return ranked signals.
|
||||
|
||||
Args:
|
||||
pair_features: Feature DataFrames by pair_id
|
||||
pairs: List of TradingPair objects
|
||||
timestamp: Current timestamp for feature extraction
|
||||
|
||||
Returns:
|
||||
List of DivergenceSignal sorted by score (descending)
|
||||
"""
|
||||
if self.model is None:
|
||||
logger.warning("Model not trained, returning empty signals")
|
||||
return []
|
||||
|
||||
signals = []
|
||||
pair_map = {p.pair_id: p for p in pairs}
|
||||
|
||||
for pair_id, features in pair_features.items():
|
||||
if pair_id not in pair_map:
|
||||
continue
|
||||
|
||||
pair = pair_map[pair_id]
|
||||
|
||||
# Get latest features
|
||||
if timestamp is not None:
|
||||
valid = features[features.index <= timestamp]
|
||||
if len(valid) == 0:
|
||||
continue
|
||||
latest = valid.iloc[-1]
|
||||
ts = valid.index[-1]
|
||||
else:
|
||||
latest = features.iloc[-1]
|
||||
ts = features.index[-1]
|
||||
|
||||
z_score = latest['z_score']
|
||||
|
||||
# Skip if Z-score below threshold
|
||||
if abs(z_score) < self.config.z_entry_threshold:
|
||||
continue
|
||||
|
||||
# Prepare features for prediction
|
||||
feature_row = latest[self.feature_cols].fillna(0).infer_objects(copy=False)
|
||||
feature_row = feature_row.replace([np.inf, -np.inf], 0)
|
||||
X = pd.DataFrame([feature_row.values], columns=self.feature_cols)
|
||||
|
||||
# Predict probability
|
||||
prob = self.model.predict_proba(X)[0, 1]
|
||||
|
||||
# Skip if probability below threshold
|
||||
if prob < self.config.prob_threshold:
|
||||
continue
|
||||
|
||||
# Apply funding rate filter
|
||||
# Block trades where funding opposes our direction
|
||||
base_funding = latest.get('base_funding', 0) or 0
|
||||
funding_thresh = self.config.funding_threshold
|
||||
|
||||
if z_score > 0: # Short signal
|
||||
# High negative funding = shorts are paying -> skip
|
||||
if base_funding < -funding_thresh:
|
||||
logger.debug(
|
||||
"Skipping %s short: funding too negative (%.4f)",
|
||||
pair.name, base_funding
|
||||
)
|
||||
continue
|
||||
else: # Long signal
|
||||
# High positive funding = longs are paying -> skip
|
||||
if base_funding > funding_thresh:
|
||||
logger.debug(
|
||||
"Skipping %s long: funding too positive (%.4f)",
|
||||
pair.name, base_funding
|
||||
)
|
||||
continue
|
||||
|
||||
# Calculate divergence score
|
||||
divergence_score = abs(z_score) * prob
|
||||
|
||||
# Determine direction
|
||||
# Z > 0: Spread high (base expensive vs quote) -> Short base
|
||||
# Z < 0: Spread low (base cheap vs quote) -> Long base
|
||||
direction = 'short' if z_score > 0 else 'long'
|
||||
|
||||
signal = DivergenceSignal(
|
||||
pair=pair,
|
||||
z_score=z_score,
|
||||
probability=prob,
|
||||
divergence_score=divergence_score,
|
||||
direction=direction,
|
||||
base_price=latest['base_close'],
|
||||
quote_price=latest['quote_close'],
|
||||
atr=latest.get('atr_base', 0),
|
||||
atr_pct=latest.get('atr_pct_base', 0.02),
|
||||
timestamp=ts
|
||||
)
|
||||
signals.append(signal)
|
||||
|
||||
# Sort by divergence score (highest first)
|
||||
signals.sort(key=lambda s: s.divergence_score, reverse=True)
|
||||
|
||||
if signals:
|
||||
logger.debug(
|
||||
"Scored %d pairs, top: %s (score=%.3f, z=%.2f, p=%.2f)",
|
||||
len(signals),
|
||||
signals[0].pair.name,
|
||||
signals[0].divergence_score,
|
||||
signals[0].z_score,
|
||||
signals[0].probability
|
||||
)
|
||||
|
||||
return signals
|
||||
|
||||
def select_best_pair(
|
||||
self,
|
||||
signals: list[DivergenceSignal]
|
||||
) -> DivergenceSignal | None:
|
||||
"""
|
||||
Select the best pair from scored signals.
|
||||
|
||||
Args:
|
||||
signals: List of DivergenceSignal (pre-sorted by score)
|
||||
|
||||
Returns:
|
||||
Best signal or None if no valid candidates
|
||||
"""
|
||||
if not signals:
|
||||
return None
|
||||
return signals[0]
|
||||
Reference in New Issue
Block a user