""" Utility functions for market data aggregation. This module provides common utility functions for working with OHLCV candles and trade data aggregation. """ import re from typing import List, Tuple from utils.timeframe_utils import load_timeframe_options import pandas as pd from ..data_types import StandardizedTrade, OHLCVCandle def aggregate_trades_to_candles(trades: List[StandardizedTrade], timeframes: List[str], symbol: str, exchange: str) -> List[OHLCVCandle]: """ Simple utility function to aggregate a list of trades to candles. Args: trades: List of standardized trades timeframes: List of timeframes to generate symbol: Trading symbol exchange: Exchange name Returns: List of completed candles """ from .batch import BatchCandleProcessor processor = BatchCandleProcessor(symbol, exchange, timeframes) return processor.process_trades_to_candles(iter(trades)) def validate_timeframe(timeframe: str) -> bool: """ Validate if timeframe is supported. Args: timeframe: Timeframe string (e.g., '1s', '5s', '10s', '1m', '5m', '1h') Returns: True if supported, False otherwise """ supported = [item['value'] for item in load_timeframe_options()] return timeframe in supported def parse_timeframe(timeframe: str) -> Tuple[int, str]: """ Parse timeframe string into number and unit. Args: timeframe: Timeframe string (e.g., '1s', '5m', '1h') Returns: Tuple of (number, unit) Examples: '1s' -> (1, 's') '5m' -> (5, 'm') '1h' -> (1, 'h') '1d' -> (1, 'd') """ match = re.match(r'^(\d+)([smhd])$', timeframe.lower()) if not match: raise ValueError(f"Invalid timeframe format: {timeframe}") number = int(match.group(1)) unit = match.group(2) # Disallow zero or negative timeframes, as they are not meaningful for bucket intervals if number <= 0: raise ValueError(f"Timeframe value must be positive: {timeframe}") return number, unit def resample_candles_to_timeframe(df: pd.DataFrame, target_timeframe: str) -> pd.DataFrame: """ Resamples a DataFrame of OHLCV candles to a higher timeframe. Args: df (pd.DataFrame): Input DataFrame with a datetime index and 'open', 'high', 'low', 'close', 'volume', and optionally 'trades_count' columns. target_timeframe (str): The target timeframe for resampling (e.g., '1h', '1d'). Returns: pd.DataFrame: Resampled DataFrame with OHLCV data for the target timeframe. """ if df.empty: return pd.DataFrame() # Ensure the DataFrame index is a datetime index if not isinstance(df.index, pd.DatetimeIndex): df['timestamp'] = pd.to_datetime(df['timestamp']) df = df.set_index('timestamp') # Convert timedelta string to pandas frequency string # '1m' -> '1T', '1h' -> '1H', '1d' -> '1D' timeframe_map = { 's': 'S', 'm': 'T', 'h': 'H', 'd': 'D' } # Convert target_timeframe to pandas offset string match = re.match(r'^(\d+)([smhd])$', target_timeframe.lower()) if not match: raise ValueError(f"Invalid target timeframe format: {target_timeframe}") number = match.group(1) unit = timeframe_map.get(match.group(2)) if not unit: raise ValueError(f"Unsupported timeframe unit: {target_timeframe}") resample_freq = f"{number}{unit}" # Define how to aggregate each column ohlcv_dict = { 'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum', } # Only include 'trades_count' if it exists in the DataFrame if 'trades_count' in df.columns: ohlcv_dict['trades_count'] = 'sum' # Resample the data resampled_df = df.resample(resample_freq).apply(ohlcv_dict) # Drop rows where all OHLCV values are NaN (e.g., periods with no data) resampled_df.dropna(subset=['open', 'high', 'low', 'close'], inplace=True) # Fill NaN trades_count with 0 after resampling if 'trades_count' in resampled_df.columns: resampled_df['trades_count'] = resampled_df['trades_count'].fillna(0).astype(int) return resampled_df __all__ = [ 'aggregate_trades_to_candles', 'validate_timeframe', 'parse_timeframe', 'resample_candles_to_timeframe' ]