feat: Multi-Pair Divergence Selection Strategy

- Extend regime detection to top 10 cryptocurrencies (45 pairs) - Dynamic pair selection based on divergence score (|z_score| * probability) - Universal ML model trained on all pairs - Correlation-based filtering to avoid redundant positions - Funding rate integration from OKX for all 10 assets - ATR-based dynamic stop-loss and take-profit - Walk-forward training with 70/30 split Performance: +35.69% return (vs +28.66% baseline), 63.6% win rate
2026-01-15 20:47:23 +08:00
parent 7e4a6874a2
commit df37366603
13 changed files with 2531 additions and 0 deletions
--- a/strategies/multi_pair/funding.py
+++ b/strategies/multi_pair/funding.py
@@ -0,0 +1,272 @@
+"""
+Funding Rate Fetcher for Multi-Pair Strategy.
+
+Fetches historical funding rates from OKX for all assets.
+CryptoQuant only supports BTC/ETH, so we use OKX for the full universe.
+"""
+import time
+from pathlib import Path
+from datetime import datetime, timezone
+
+import ccxt
+import pandas as pd
+
+from engine.logging_config import get_logger
+
+logger = get_logger(__name__)
+
+
+class FundingRateFetcher:
+    """
+    Fetches and caches funding rate data from OKX.
+    
+    OKX funding rates are settled every 8 hours (00:00, 08:00, 16:00 UTC).
+    This fetcher retrieves historical funding rate data and aligns it
+    to hourly candles for use in the multi-pair strategy.
+    """
+    
+    def __init__(self, cache_dir: str = "data/funding"):
+        self.cache_dir = Path(cache_dir)
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+        self.exchange: ccxt.okx | None = None
+    
+    def _init_exchange(self) -> None:
+        """Initialize OKX exchange connection."""
+        if self.exchange is None:
+            self.exchange = ccxt.okx({
+                'enableRateLimit': True,
+                'options': {'defaultType': 'swap'}
+            })
+            self.exchange.load_markets()
+    
+    def fetch_funding_history(
+        self,
+        symbol: str,
+        start_date: str | None = None,
+        end_date: str | None = None,
+        limit: int = 100
+    ) -> pd.DataFrame:
+        """
+        Fetch historical funding rates for a symbol.
+        
+        Args:
+            symbol: Asset symbol (e.g., 'BTC-USDT')
+            start_date: Start date (YYYY-MM-DD)
+            end_date: End date (YYYY-MM-DD)
+            limit: Max records per request
+            
+        Returns:
+            DataFrame with funding rate history
+        """
+        self._init_exchange()
+        
+        # Convert symbol format
+        base = symbol.replace('-USDT', '')
+        okx_symbol = f"{base}/USDT:USDT"
+        
+        try:
+            # OKX funding rate history endpoint
+            # Uses fetch_funding_rate_history if available
+            all_funding = []
+            
+            # Parse dates
+            if start_date:
+                since = self.exchange.parse8601(f"{start_date}T00:00:00Z")
+            else:
+                # Default to 1 year ago
+                since = self.exchange.milliseconds() - 365 * 24 * 60 * 60 * 1000
+            
+            if end_date:
+                until = self.exchange.parse8601(f"{end_date}T23:59:59Z")
+            else:
+                until = self.exchange.milliseconds()
+            
+            # Fetch in batches
+            current_since = since
+            while current_since < until:
+                try:
+                    funding = self.exchange.fetch_funding_rate_history(
+                        okx_symbol,
+                        since=current_since,
+                        limit=limit
+                    )
+                    
+                    if not funding:
+                        break
+                    
+                    all_funding.extend(funding)
+                    
+                    # Move to next batch
+                    last_ts = funding[-1]['timestamp']
+                    if last_ts <= current_since:
+                        break
+                    current_since = last_ts + 1
+                    
+                    time.sleep(0.1)  # Rate limit
+                    
+                except Exception as e:
+                    logger.warning(
+                        "Error fetching funding batch for %s: %s", 
+                        symbol, str(e)[:50]
+                    )
+                    break
+            
+            if not all_funding:
+                return pd.DataFrame()
+            
+            # Convert to DataFrame
+            df = pd.DataFrame(all_funding)
+            df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms', utc=True)
+            df.set_index('timestamp', inplace=True)
+            df = df[['fundingRate']].rename(columns={'fundingRate': 'funding_rate'})
+            df.sort_index(inplace=True)
+            
+            # Remove duplicates
+            df = df[~df.index.duplicated(keep='first')]
+            
+            logger.info("Fetched %d funding records for %s", len(df), symbol)
+            return df
+            
+        except Exception as e:
+            logger.error("Failed to fetch funding for %s: %s", symbol, e)
+            return pd.DataFrame()
+    
+    def fetch_all_assets(
+        self,
+        assets: list[str],
+        start_date: str | None = None,
+        end_date: str | None = None
+    ) -> pd.DataFrame:
+        """
+        Fetch funding rates for all assets and combine.
+        
+        Args:
+            assets: List of asset symbols (e.g., ['BTC-USDT', 'ETH-USDT'])
+            start_date: Start date
+            end_date: End date
+            
+        Returns:
+            Combined DataFrame with columns like 'btc_funding', 'eth_funding', etc.
+        """
+        combined = pd.DataFrame()
+        
+        for symbol in assets:
+            df = self.fetch_funding_history(symbol, start_date, end_date)
+            
+            if df.empty:
+                continue
+            
+            # Rename column
+            asset_name = symbol.replace('-USDT', '').lower()
+            col_name = f"{asset_name}_funding"
+            df = df.rename(columns={'funding_rate': col_name})
+            
+            if combined.empty:
+                combined = df
+            else:
+                combined = combined.join(df, how='outer')
+            
+            time.sleep(0.2)  # Be nice to API
+        
+        # Forward fill to hourly (funding is every 8h)
+        if not combined.empty:
+            combined = combined.sort_index()
+            combined = combined.ffill()
+        
+        return combined
+    
+    def save_to_cache(self, df: pd.DataFrame, filename: str = "funding_rates.csv") -> None:
+        """Save funding data to cache file."""
+        path = self.cache_dir / filename
+        df.to_csv(path)
+        logger.info("Saved funding rates to %s", path)
+    
+    def load_from_cache(self, filename: str = "funding_rates.csv") -> pd.DataFrame | None:
+        """Load funding data from cache if available."""
+        path = self.cache_dir / filename
+        if path.exists():
+            df = pd.read_csv(path, index_col='timestamp', parse_dates=True)
+            logger.info("Loaded funding rates from cache: %d rows", len(df))
+            return df
+        return None
+    
+    def get_funding_data(
+        self,
+        assets: list[str],
+        start_date: str | None = None,
+        end_date: str | None = None,
+        use_cache: bool = True,
+        force_refresh: bool = False
+    ) -> pd.DataFrame:
+        """
+        Get funding data, using cache if available.
+        
+        Args:
+            assets: List of asset symbols
+            start_date: Start date
+            end_date: End date
+            use_cache: Whether to use cached data
+            force_refresh: Force refresh even if cache exists
+            
+        Returns:
+            DataFrame with funding rates for all assets
+        """
+        cache_file = "funding_rates.csv"
+        
+        # Try cache first
+        if use_cache and not force_refresh:
+            cached = self.load_from_cache(cache_file)
+            if cached is not None:
+                # Check if cache covers requested range
+                if start_date and end_date:
+                    start_ts = pd.Timestamp(start_date, tz='UTC')
+                    end_ts = pd.Timestamp(end_date, tz='UTC')
+                    
+                    if cached.index.min() <= start_ts and cached.index.max() >= end_ts:
+                        # Filter to requested range
+                        return cached[(cached.index >= start_ts) & (cached.index <= end_ts)]
+        
+        # Fetch fresh data
+        logger.info("Fetching fresh funding rate data...")
+        df = self.fetch_all_assets(assets, start_date, end_date)
+        
+        if not df.empty and use_cache:
+            self.save_to_cache(df, cache_file)
+        
+        return df
+
+
+def download_funding_data():
+    """Download funding data for all multi-pair assets."""
+    from strategies.multi_pair.config import MultiPairConfig
+    
+    config = MultiPairConfig()
+    fetcher = FundingRateFetcher()
+    
+    # Fetch last year of data
+    end_date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
+    start_date = (datetime.now(timezone.utc) - pd.Timedelta(days=365)).strftime("%Y-%m-%d")
+    
+    logger.info("Downloading funding rates for %d assets...", len(config.assets))
+    logger.info("Date range: %s to %s", start_date, end_date)
+    
+    df = fetcher.get_funding_data(
+        config.assets,
+        start_date=start_date,
+        end_date=end_date,
+        force_refresh=True
+    )
+    
+    if not df.empty:
+        logger.info("Downloaded %d funding rate records", len(df))
+        logger.info("Columns: %s", list(df.columns))
+    else:
+        logger.warning("No funding data downloaded")
+    
+    return df
+
+
+if __name__ == "__main__":
+    from engine.logging_config import setup_logging
+    setup_logging()
+    download_funding_data()