feat: Multi-Pair Divergence Selection Strategy
- Extend regime detection to top 10 cryptocurrencies (45 pairs) - Dynamic pair selection based on divergence score (|z_score| * probability) - Universal ML model trained on all pairs - Correlation-based filtering to avoid redundant positions - Funding rate integration from OKX for all 10 assets - ATR-based dynamic stop-loss and take-profit - Walk-forward training with 70/30 split Performance: +35.69% return (vs +28.66% baseline), 63.6% win rate
This commit is contained in:
272
strategies/multi_pair/funding.py
Normal file
272
strategies/multi_pair/funding.py
Normal file
@@ -0,0 +1,272 @@
|
||||
"""
|
||||
Funding Rate Fetcher for Multi-Pair Strategy.
|
||||
|
||||
Fetches historical funding rates from OKX for all assets.
|
||||
CryptoQuant only supports BTC/ETH, so we use OKX for the full universe.
|
||||
"""
|
||||
import time
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import ccxt
|
||||
import pandas as pd
|
||||
|
||||
from engine.logging_config import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class FundingRateFetcher:
|
||||
"""
|
||||
Fetches and caches funding rate data from OKX.
|
||||
|
||||
OKX funding rates are settled every 8 hours (00:00, 08:00, 16:00 UTC).
|
||||
This fetcher retrieves historical funding rate data and aligns it
|
||||
to hourly candles for use in the multi-pair strategy.
|
||||
"""
|
||||
|
||||
def __init__(self, cache_dir: str = "data/funding"):
|
||||
self.cache_dir = Path(cache_dir)
|
||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.exchange: ccxt.okx | None = None
|
||||
|
||||
def _init_exchange(self) -> None:
|
||||
"""Initialize OKX exchange connection."""
|
||||
if self.exchange is None:
|
||||
self.exchange = ccxt.okx({
|
||||
'enableRateLimit': True,
|
||||
'options': {'defaultType': 'swap'}
|
||||
})
|
||||
self.exchange.load_markets()
|
||||
|
||||
def fetch_funding_history(
|
||||
self,
|
||||
symbol: str,
|
||||
start_date: str | None = None,
|
||||
end_date: str | None = None,
|
||||
limit: int = 100
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Fetch historical funding rates for a symbol.
|
||||
|
||||
Args:
|
||||
symbol: Asset symbol (e.g., 'BTC-USDT')
|
||||
start_date: Start date (YYYY-MM-DD)
|
||||
end_date: End date (YYYY-MM-DD)
|
||||
limit: Max records per request
|
||||
|
||||
Returns:
|
||||
DataFrame with funding rate history
|
||||
"""
|
||||
self._init_exchange()
|
||||
|
||||
# Convert symbol format
|
||||
base = symbol.replace('-USDT', '')
|
||||
okx_symbol = f"{base}/USDT:USDT"
|
||||
|
||||
try:
|
||||
# OKX funding rate history endpoint
|
||||
# Uses fetch_funding_rate_history if available
|
||||
all_funding = []
|
||||
|
||||
# Parse dates
|
||||
if start_date:
|
||||
since = self.exchange.parse8601(f"{start_date}T00:00:00Z")
|
||||
else:
|
||||
# Default to 1 year ago
|
||||
since = self.exchange.milliseconds() - 365 * 24 * 60 * 60 * 1000
|
||||
|
||||
if end_date:
|
||||
until = self.exchange.parse8601(f"{end_date}T23:59:59Z")
|
||||
else:
|
||||
until = self.exchange.milliseconds()
|
||||
|
||||
# Fetch in batches
|
||||
current_since = since
|
||||
while current_since < until:
|
||||
try:
|
||||
funding = self.exchange.fetch_funding_rate_history(
|
||||
okx_symbol,
|
||||
since=current_since,
|
||||
limit=limit
|
||||
)
|
||||
|
||||
if not funding:
|
||||
break
|
||||
|
||||
all_funding.extend(funding)
|
||||
|
||||
# Move to next batch
|
||||
last_ts = funding[-1]['timestamp']
|
||||
if last_ts <= current_since:
|
||||
break
|
||||
current_since = last_ts + 1
|
||||
|
||||
time.sleep(0.1) # Rate limit
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Error fetching funding batch for %s: %s",
|
||||
symbol, str(e)[:50]
|
||||
)
|
||||
break
|
||||
|
||||
if not all_funding:
|
||||
return pd.DataFrame()
|
||||
|
||||
# Convert to DataFrame
|
||||
df = pd.DataFrame(all_funding)
|
||||
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms', utc=True)
|
||||
df.set_index('timestamp', inplace=True)
|
||||
df = df[['fundingRate']].rename(columns={'fundingRate': 'funding_rate'})
|
||||
df.sort_index(inplace=True)
|
||||
|
||||
# Remove duplicates
|
||||
df = df[~df.index.duplicated(keep='first')]
|
||||
|
||||
logger.info("Fetched %d funding records for %s", len(df), symbol)
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to fetch funding for %s: %s", symbol, e)
|
||||
return pd.DataFrame()
|
||||
|
||||
def fetch_all_assets(
|
||||
self,
|
||||
assets: list[str],
|
||||
start_date: str | None = None,
|
||||
end_date: str | None = None
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Fetch funding rates for all assets and combine.
|
||||
|
||||
Args:
|
||||
assets: List of asset symbols (e.g., ['BTC-USDT', 'ETH-USDT'])
|
||||
start_date: Start date
|
||||
end_date: End date
|
||||
|
||||
Returns:
|
||||
Combined DataFrame with columns like 'btc_funding', 'eth_funding', etc.
|
||||
"""
|
||||
combined = pd.DataFrame()
|
||||
|
||||
for symbol in assets:
|
||||
df = self.fetch_funding_history(symbol, start_date, end_date)
|
||||
|
||||
if df.empty:
|
||||
continue
|
||||
|
||||
# Rename column
|
||||
asset_name = symbol.replace('-USDT', '').lower()
|
||||
col_name = f"{asset_name}_funding"
|
||||
df = df.rename(columns={'funding_rate': col_name})
|
||||
|
||||
if combined.empty:
|
||||
combined = df
|
||||
else:
|
||||
combined = combined.join(df, how='outer')
|
||||
|
||||
time.sleep(0.2) # Be nice to API
|
||||
|
||||
# Forward fill to hourly (funding is every 8h)
|
||||
if not combined.empty:
|
||||
combined = combined.sort_index()
|
||||
combined = combined.ffill()
|
||||
|
||||
return combined
|
||||
|
||||
def save_to_cache(self, df: pd.DataFrame, filename: str = "funding_rates.csv") -> None:
|
||||
"""Save funding data to cache file."""
|
||||
path = self.cache_dir / filename
|
||||
df.to_csv(path)
|
||||
logger.info("Saved funding rates to %s", path)
|
||||
|
||||
def load_from_cache(self, filename: str = "funding_rates.csv") -> pd.DataFrame | None:
|
||||
"""Load funding data from cache if available."""
|
||||
path = self.cache_dir / filename
|
||||
if path.exists():
|
||||
df = pd.read_csv(path, index_col='timestamp', parse_dates=True)
|
||||
logger.info("Loaded funding rates from cache: %d rows", len(df))
|
||||
return df
|
||||
return None
|
||||
|
||||
def get_funding_data(
|
||||
self,
|
||||
assets: list[str],
|
||||
start_date: str | None = None,
|
||||
end_date: str | None = None,
|
||||
use_cache: bool = True,
|
||||
force_refresh: bool = False
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Get funding data, using cache if available.
|
||||
|
||||
Args:
|
||||
assets: List of asset symbols
|
||||
start_date: Start date
|
||||
end_date: End date
|
||||
use_cache: Whether to use cached data
|
||||
force_refresh: Force refresh even if cache exists
|
||||
|
||||
Returns:
|
||||
DataFrame with funding rates for all assets
|
||||
"""
|
||||
cache_file = "funding_rates.csv"
|
||||
|
||||
# Try cache first
|
||||
if use_cache and not force_refresh:
|
||||
cached = self.load_from_cache(cache_file)
|
||||
if cached is not None:
|
||||
# Check if cache covers requested range
|
||||
if start_date and end_date:
|
||||
start_ts = pd.Timestamp(start_date, tz='UTC')
|
||||
end_ts = pd.Timestamp(end_date, tz='UTC')
|
||||
|
||||
if cached.index.min() <= start_ts and cached.index.max() >= end_ts:
|
||||
# Filter to requested range
|
||||
return cached[(cached.index >= start_ts) & (cached.index <= end_ts)]
|
||||
|
||||
# Fetch fresh data
|
||||
logger.info("Fetching fresh funding rate data...")
|
||||
df = self.fetch_all_assets(assets, start_date, end_date)
|
||||
|
||||
if not df.empty and use_cache:
|
||||
self.save_to_cache(df, cache_file)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def download_funding_data():
|
||||
"""Download funding data for all multi-pair assets."""
|
||||
from strategies.multi_pair.config import MultiPairConfig
|
||||
|
||||
config = MultiPairConfig()
|
||||
fetcher = FundingRateFetcher()
|
||||
|
||||
# Fetch last year of data
|
||||
end_date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
||||
start_date = (datetime.now(timezone.utc) - pd.Timedelta(days=365)).strftime("%Y-%m-%d")
|
||||
|
||||
logger.info("Downloading funding rates for %d assets...", len(config.assets))
|
||||
logger.info("Date range: %s to %s", start_date, end_date)
|
||||
|
||||
df = fetcher.get_funding_data(
|
||||
config.assets,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
force_refresh=True
|
||||
)
|
||||
|
||||
if not df.empty:
|
||||
logger.info("Downloaded %d funding rate records", len(df))
|
||||
logger.info("Columns: %s", list(df.columns))
|
||||
else:
|
||||
logger.warning("No funding data downloaded")
|
||||
|
||||
return df
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from engine.logging_config import setup_logging
|
||||
setup_logging()
|
||||
download_funding_data()
|
||||
Reference in New Issue
Block a user