lowkey_backtest/strategies/multi_pair/funding.py

"""
Funding Rate Fetcher for Multi-Pair Strategy.

Fetches historical funding rates from OKX for all assets.
CryptoQuant only supports BTC/ETH, so we use OKX for the full universe.
"""
import time
from pathlib import Path
from datetime import datetime, timezone

import ccxt
import pandas as pd

from engine.logging_config import get_logger

logger = get_logger(__name__)


class FundingRateFetcher:
    """
    Fetches and caches funding rate data from OKX.

    OKX funding rates are settled every 8 hours (00:00, 08:00, 16:00 UTC).
    This fetcher retrieves historical funding rate data and aligns it
    to hourly candles for use in the multi-pair strategy.
    """

    def __init__(self, cache_dir: str = "data/funding"):
        self.cache_dir = Path(cache_dir)
        self.cache_dir.mkdir(parents=True, exist_ok=True)
        self.exchange: ccxt.okx | None = None

    def _init_exchange(self) -> None:
        """Initialize OKX exchange connection."""
        if self.exchange is None:
            self.exchange = ccxt.okx({
                'enableRateLimit': True,
                'options': {'defaultType': 'swap'}
            })
            self.exchange.load_markets()

    def fetch_funding_history(
        self,
        symbol: str,
        start_date: str | None = None,
        end_date: str | None = None,
        limit: int = 100
    ) -> pd.DataFrame:
        """
        Fetch historical funding rates for a symbol.

        Args:
            symbol: Asset symbol (e.g., 'BTC-USDT')
            start_date: Start date (YYYY-MM-DD)
            end_date: End date (YYYY-MM-DD)
            limit: Max records per request

        Returns:
            DataFrame with funding rate history
        """
        self._init_exchange()

        # Convert symbol format
        base = symbol.replace('-USDT', '')
        okx_symbol = f"{base}/USDT:USDT"

        try:
            # OKX funding rate history endpoint
            # Uses fetch_funding_rate_history if available
            all_funding = []

            # Parse dates
            if start_date:
                since = self.exchange.parse8601(f"{start_date}T00:00:00Z")
            else:
                # Default to 1 year ago
                since = self.exchange.milliseconds() - 365 * 24 * 60 * 60 * 1000

            if end_date:
                until = self.exchange.parse8601(f"{end_date}T23:59:59Z")
            else:
                until = self.exchange.milliseconds()

            # Fetch in batches
            current_since = since
            while current_since < until:
                try:
                    funding = self.exchange.fetch_funding_rate_history(
                        okx_symbol,
                        since=current_since,
                        limit=limit
                    )

                    if not funding:
                        break

                    all_funding.extend(funding)

                    # Move to next batch
                    last_ts = funding[-1]['timestamp']
                    if last_ts <= current_since:
                        break
                    current_since = last_ts + 1

                    time.sleep(0.1)  # Rate limit

                except Exception as e:
                    logger.warning(
                        "Error fetching funding batch for %s: %s",
                        symbol, str(e)[:50]
                    )
                    break

            if not all_funding:
                return pd.DataFrame()

            # Convert to DataFrame
            df = pd.DataFrame(all_funding)
            df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms', utc=True)
            df.set_index('timestamp', inplace=True)
            df = df[['fundingRate']].rename(columns={'fundingRate': 'funding_rate'})
            df.sort_index(inplace=True)

            # Remove duplicates
            df = df[~df.index.duplicated(keep='first')]

            logger.info("Fetched %d funding records for %s", len(df), symbol)
            return df

        except Exception as e:
            logger.error("Failed to fetch funding for %s: %s", symbol, e)
            return pd.DataFrame()

    def fetch_all_assets(
        self,
        assets: list[str],
        start_date: str | None = None,
        end_date: str | None = None
    ) -> pd.DataFrame:
        """
        Fetch funding rates for all assets and combine.

        Args:
            assets: List of asset symbols (e.g., ['BTC-USDT', 'ETH-USDT'])
            start_date: Start date
            end_date: End date

        Returns:
            Combined DataFrame with columns like 'btc_funding', 'eth_funding', etc.
        """
        combined = pd.DataFrame()

        for symbol in assets:
            df = self.fetch_funding_history(symbol, start_date, end_date)

            if df.empty:
                continue

            # Rename column
            asset_name = symbol.replace('-USDT', '').lower()
            col_name = f"{asset_name}_funding"
            df = df.rename(columns={'funding_rate': col_name})

            if combined.empty:
                combined = df
            else:
                combined = combined.join(df, how='outer')

            time.sleep(0.2)  # Be nice to API

        # Forward fill to hourly (funding is every 8h)
        if not combined.empty:
            combined = combined.sort_index()
            combined = combined.ffill()

        return combined

    def save_to_cache(self, df: pd.DataFrame, filename: str = "funding_rates.csv") -> None:
        """Save funding data to cache file."""
        path = self.cache_dir / filename
        df.to_csv(path)
        logger.info("Saved funding rates to %s", path)

    def load_from_cache(self, filename: str = "funding_rates.csv") -> pd.DataFrame | None:
        """Load funding data from cache if available."""
        path = self.cache_dir / filename
        if path.exists():
            df = pd.read_csv(path, index_col='timestamp', parse_dates=True)
            logger.info("Loaded funding rates from cache: %d rows", len(df))
            return df
        return None

    def get_funding_data(
        self,
        assets: list[str],
        start_date: str | None = None,
        end_date: str | None = None,
        use_cache: bool = True,
        force_refresh: bool = False
    ) -> pd.DataFrame:
        """
        Get funding data, using cache if available.

        Args:
            assets: List of asset symbols
            start_date: Start date
            end_date: End date
            use_cache: Whether to use cached data
            force_refresh: Force refresh even if cache exists

        Returns:
            DataFrame with funding rates for all assets
        """
        cache_file = "funding_rates.csv"

        # Try cache first
        if use_cache and not force_refresh:
            cached = self.load_from_cache(cache_file)
            if cached is not None:
                # Check if cache covers requested range
                if start_date and end_date:
                    start_ts = pd.Timestamp(start_date, tz='UTC')
                    end_ts = pd.Timestamp(end_date, tz='UTC')

                    if cached.index.min() <= start_ts and cached.index.max() >= end_ts:
                        # Filter to requested range
                        return cached[(cached.index >= start_ts) & (cached.index <= end_ts)]

        # Fetch fresh data
        logger.info("Fetching fresh funding rate data...")
        df = self.fetch_all_assets(assets, start_date, end_date)

        if not df.empty and use_cache:
            self.save_to_cache(df, cache_file)

        return df


def download_funding_data():
    """Download funding data for all multi-pair assets."""
    from strategies.multi_pair.config import MultiPairConfig

    config = MultiPairConfig()
    fetcher = FundingRateFetcher()

    # Fetch last year of data
    end_date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
    start_date = (datetime.now(timezone.utc) - pd.Timedelta(days=365)).strftime("%Y-%m-%d")

    logger.info("Downloading funding rates for %d assets...", len(config.assets))
    logger.info("Date range: %s to %s", start_date, end_date)

    df = fetcher.get_funding_data(
        config.assets,
        start_date=start_date,
        end_date=end_date,
        force_refresh=True
    )

    if not df.empty:
        logger.info("Downloaded %d funding rate records", len(df))
        logger.info("Columns: %s", list(df.columns))
    else:
        logger.warning("No funding data downloaded")

    return df


if __name__ == "__main__":
    from engine.logging_config import setup_logging
    setup_logging()
    download_funding_data()