""" Funding Rate Fetcher for Multi-Pair Strategy. Fetches historical funding rates from OKX for all assets. CryptoQuant only supports BTC/ETH, so we use OKX for the full universe. """ import time from pathlib import Path from datetime import datetime, timezone import ccxt import pandas as pd from engine.logging_config import get_logger logger = get_logger(__name__) class FundingRateFetcher: """ Fetches and caches funding rate data from OKX. OKX funding rates are settled every 8 hours (00:00, 08:00, 16:00 UTC). This fetcher retrieves historical funding rate data and aligns it to hourly candles for use in the multi-pair strategy. """ def __init__(self, cache_dir: str = "data/funding"): self.cache_dir = Path(cache_dir) self.cache_dir.mkdir(parents=True, exist_ok=True) self.exchange: ccxt.okx | None = None def _init_exchange(self) -> None: """Initialize OKX exchange connection.""" if self.exchange is None: self.exchange = ccxt.okx({ 'enableRateLimit': True, 'options': {'defaultType': 'swap'} }) self.exchange.load_markets() def fetch_funding_history( self, symbol: str, start_date: str | None = None, end_date: str | None = None, limit: int = 100 ) -> pd.DataFrame: """ Fetch historical funding rates for a symbol. Args: symbol: Asset symbol (e.g., 'BTC-USDT') start_date: Start date (YYYY-MM-DD) end_date: End date (YYYY-MM-DD) limit: Max records per request Returns: DataFrame with funding rate history """ self._init_exchange() # Convert symbol format base = symbol.replace('-USDT', '') okx_symbol = f"{base}/USDT:USDT" try: # OKX funding rate history endpoint # Uses fetch_funding_rate_history if available all_funding = [] # Parse dates if start_date: since = self.exchange.parse8601(f"{start_date}T00:00:00Z") else: # Default to 1 year ago since = self.exchange.milliseconds() - 365 * 24 * 60 * 60 * 1000 if end_date: until = self.exchange.parse8601(f"{end_date}T23:59:59Z") else: until = self.exchange.milliseconds() # Fetch in batches current_since = since while current_since < until: try: funding = self.exchange.fetch_funding_rate_history( okx_symbol, since=current_since, limit=limit ) if not funding: break all_funding.extend(funding) # Move to next batch last_ts = funding[-1]['timestamp'] if last_ts <= current_since: break current_since = last_ts + 1 time.sleep(0.1) # Rate limit except Exception as e: logger.warning( "Error fetching funding batch for %s: %s", symbol, str(e)[:50] ) break if not all_funding: return pd.DataFrame() # Convert to DataFrame df = pd.DataFrame(all_funding) df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms', utc=True) df.set_index('timestamp', inplace=True) df = df[['fundingRate']].rename(columns={'fundingRate': 'funding_rate'}) df.sort_index(inplace=True) # Remove duplicates df = df[~df.index.duplicated(keep='first')] logger.info("Fetched %d funding records for %s", len(df), symbol) return df except Exception as e: logger.error("Failed to fetch funding for %s: %s", symbol, e) return pd.DataFrame() def fetch_all_assets( self, assets: list[str], start_date: str | None = None, end_date: str | None = None ) -> pd.DataFrame: """ Fetch funding rates for all assets and combine. Args: assets: List of asset symbols (e.g., ['BTC-USDT', 'ETH-USDT']) start_date: Start date end_date: End date Returns: Combined DataFrame with columns like 'btc_funding', 'eth_funding', etc. """ combined = pd.DataFrame() for symbol in assets: df = self.fetch_funding_history(symbol, start_date, end_date) if df.empty: continue # Rename column asset_name = symbol.replace('-USDT', '').lower() col_name = f"{asset_name}_funding" df = df.rename(columns={'funding_rate': col_name}) if combined.empty: combined = df else: combined = combined.join(df, how='outer') time.sleep(0.2) # Be nice to API # Forward fill to hourly (funding is every 8h) if not combined.empty: combined = combined.sort_index() combined = combined.ffill() return combined def save_to_cache(self, df: pd.DataFrame, filename: str = "funding_rates.csv") -> None: """Save funding data to cache file.""" path = self.cache_dir / filename df.to_csv(path) logger.info("Saved funding rates to %s", path) def load_from_cache(self, filename: str = "funding_rates.csv") -> pd.DataFrame | None: """Load funding data from cache if available.""" path = self.cache_dir / filename if path.exists(): df = pd.read_csv(path, index_col='timestamp', parse_dates=True) logger.info("Loaded funding rates from cache: %d rows", len(df)) return df return None def get_funding_data( self, assets: list[str], start_date: str | None = None, end_date: str | None = None, use_cache: bool = True, force_refresh: bool = False ) -> pd.DataFrame: """ Get funding data, using cache if available. Args: assets: List of asset symbols start_date: Start date end_date: End date use_cache: Whether to use cached data force_refresh: Force refresh even if cache exists Returns: DataFrame with funding rates for all assets """ cache_file = "funding_rates.csv" # Try cache first if use_cache and not force_refresh: cached = self.load_from_cache(cache_file) if cached is not None: # Check if cache covers requested range if start_date and end_date: start_ts = pd.Timestamp(start_date, tz='UTC') end_ts = pd.Timestamp(end_date, tz='UTC') if cached.index.min() <= start_ts and cached.index.max() >= end_ts: # Filter to requested range return cached[(cached.index >= start_ts) & (cached.index <= end_ts)] # Fetch fresh data logger.info("Fetching fresh funding rate data...") df = self.fetch_all_assets(assets, start_date, end_date) if not df.empty and use_cache: self.save_to_cache(df, cache_file) return df def download_funding_data(): """Download funding data for all multi-pair assets.""" from strategies.multi_pair.config import MultiPairConfig config = MultiPairConfig() fetcher = FundingRateFetcher() # Fetch last year of data end_date = datetime.now(timezone.utc).strftime("%Y-%m-%d") start_date = (datetime.now(timezone.utc) - pd.Timedelta(days=365)).strftime("%Y-%m-%d") logger.info("Downloading funding rates for %d assets...", len(config.assets)) logger.info("Date range: %s to %s", start_date, end_date) df = fetcher.get_funding_data( config.assets, start_date=start_date, end_date=end_date, force_refresh=True ) if not df.empty: logger.info("Downloaded %d funding rate records", len(df)) logger.info("Columns: %s", list(df.columns)) else: logger.warning("No funding data downloaded") return df if __name__ == "__main__": from engine.logging_config import setup_logging setup_logging() download_funding_data()