""" Data Feed for Live Trading. Fetches real-time OHLCV data from OKX and prepares features for the regime strategy. """ import logging from datetime import datetime, timezone from typing import Optional import pandas as pd import numpy as np import ta from .okx_client import OKXClient from .config import TradingConfig, PathConfig logger = logging.getLogger(__name__) class DataFeed: """ Real-time data feed for the regime strategy. Fetches BTC and ETH OHLCV data from OKX and calculates the spread-based features required by the ML model. """ def __init__( self, okx_client: OKXClient, trading_config: TradingConfig, path_config: PathConfig ): self.client = okx_client self.config = trading_config self.paths = path_config self.cq_data: Optional[pd.DataFrame] = None self._load_cq_data() def _load_cq_data(self) -> None: """Load CryptoQuant on-chain data if available.""" try: if self.paths.cq_data_path.exists(): self.cq_data = pd.read_csv( self.paths.cq_data_path, index_col='timestamp', parse_dates=True ) if self.cq_data.index.tz is None: self.cq_data.index = self.cq_data.index.tz_localize('UTC') logger.info(f"Loaded CryptoQuant data: {len(self.cq_data)} rows") except Exception as e: logger.warning(f"Could not load CryptoQuant data: {e}") self.cq_data = None def fetch_ohlcv_data(self) -> tuple[pd.DataFrame, pd.DataFrame]: """ Fetch OHLCV data for BTC and ETH. Returns: Tuple of (btc_df, eth_df) DataFrames """ # Fetch BTC data btc_ohlcv = self.client.fetch_ohlcv( self.config.btc_symbol, self.config.timeframe, self.config.candles_to_fetch ) btc_df = self._ohlcv_to_dataframe(btc_ohlcv) # Fetch ETH data eth_ohlcv = self.client.fetch_ohlcv( self.config.eth_symbol, self.config.timeframe, self.config.candles_to_fetch ) eth_df = self._ohlcv_to_dataframe(eth_ohlcv) logger.info( f"Fetched {len(btc_df)} BTC candles and {len(eth_df)} ETH candles" ) return btc_df, eth_df def _ohlcv_to_dataframe(self, ohlcv: list) -> pd.DataFrame: """Convert OHLCV list to DataFrame.""" df = pd.DataFrame( ohlcv, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'] ) df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms', utc=True) df.set_index('timestamp', inplace=True) return df def calculate_features( self, btc_df: pd.DataFrame, eth_df: pd.DataFrame ) -> pd.DataFrame: """ Calculate spread-based features for the regime strategy. Args: btc_df: BTC OHLCV DataFrame eth_df: ETH OHLCV DataFrame Returns: DataFrame with calculated features """ # Align indices common_idx = btc_df.index.intersection(eth_df.index) df_btc = btc_df.loc[common_idx].copy() df_eth = eth_df.loc[common_idx].copy() # Calculate spread (ETH/BTC ratio) spread = df_eth['close'] / df_btc['close'] # Z-Score of spread z_window = self.config.z_window rolling_mean = spread.rolling(window=z_window).mean() rolling_std = spread.rolling(window=z_window).std() z_score = (spread - rolling_mean) / rolling_std # Spread technicals spread_rsi = ta.momentum.RSIIndicator(spread, window=14).rsi() spread_roc = spread.pct_change(periods=5) * 100 spread_change_1h = spread.pct_change(periods=1) # Volume ratio vol_ratio = df_eth['volume'] / df_btc['volume'] vol_ratio_ma = vol_ratio.rolling(window=12).mean() # Volatility ret_btc = df_btc['close'].pct_change() ret_eth = df_eth['close'].pct_change() vol_btc = ret_btc.rolling(window=z_window).std() vol_eth = ret_eth.rolling(window=z_window).std() vol_spread_ratio = vol_eth / vol_btc # Build features DataFrame features = pd.DataFrame(index=spread.index) features['spread'] = spread features['z_score'] = z_score features['spread_rsi'] = spread_rsi features['spread_roc'] = spread_roc features['spread_change_1h'] = spread_change_1h features['vol_ratio'] = vol_ratio features['vol_ratio_rel'] = vol_ratio / vol_ratio_ma features['vol_diff_ratio'] = vol_spread_ratio # Add price data for reference features['btc_close'] = df_btc['close'] features['eth_close'] = df_eth['close'] features['eth_volume'] = df_eth['volume'] # Merge CryptoQuant data if available if self.cq_data is not None: cq_aligned = self.cq_data.reindex(features.index, method='ffill') # Calculate derived features if 'btc_funding' in cq_aligned.columns and 'eth_funding' in cq_aligned.columns: cq_aligned['funding_diff'] = ( cq_aligned['eth_funding'] - cq_aligned['btc_funding'] ) if 'btc_inflow' in cq_aligned.columns and 'eth_inflow' in cq_aligned.columns: cq_aligned['inflow_ratio'] = ( cq_aligned['eth_inflow'] / (cq_aligned['btc_inflow'] + 1) ) features = features.join(cq_aligned) return features.dropna() def get_latest_data(self) -> Optional[pd.DataFrame]: """ Fetch and process latest market data. Returns: DataFrame with features or None on error """ try: btc_df, eth_df = self.fetch_ohlcv_data() features = self.calculate_features(btc_df, eth_df) if features.empty: logger.warning("No valid features calculated") return None logger.info( f"Latest data: ETH={features['eth_close'].iloc[-1]:.2f}, " f"BTC={features['btc_close'].iloc[-1]:.2f}, " f"Z-Score={features['z_score'].iloc[-1]:.3f}" ) return features except Exception as e: logger.error(f"Error fetching market data: {e}", exc_info=True) return None def get_current_funding_rates(self) -> dict: """ Get current funding rates for BTC and ETH. Returns: Dictionary with 'btc_funding' and 'eth_funding' rates """ btc_funding = self.client.get_funding_rate(self.config.btc_symbol) eth_funding = self.client.get_funding_rate(self.config.eth_symbol) return { 'btc_funding': btc_funding, 'eth_funding': eth_funding, 'funding_diff': eth_funding - btc_funding, }