""" Batch candle processor for historical trade data. This module provides the BatchCandleProcessor class for building OHLCV candles from historical trade data in batch mode. """ from datetime import datetime from typing import Dict, List, Any, Iterator from collections import defaultdict from ..data_types import StandardizedTrade, OHLCVCandle, ProcessingStats from .bucket import TimeframeBucket from .utils import parse_timeframe class BatchCandleProcessor: """ Batch candle processor for historical trade data. This class processes trades in batch mode, building candles for multiple timeframes simultaneously. It's optimized for processing large amounts of historical trade data efficiently. """ def __init__(self, symbol: str, exchange: str, timeframes: List[str], component_name: str = "batch_candle_processor", logger = None): """ Initialize batch candle processor. Args: symbol: Trading symbol (e.g., 'BTC-USDT') exchange: Exchange name timeframes: List of timeframes to process (e.g., ['1m', '5m']) component_name: Name for logging/stats logger: Optional logger instance """ self.symbol = symbol self.exchange = exchange self.timeframes = timeframes self.component_name = component_name self.logger = logger # Stats tracking self.stats = ProcessingStats() def process_trades_to_candles(self, trades: Iterator[StandardizedTrade]) -> List[OHLCVCandle]: """ Process trades in batch and return completed candles. Args: trades: Iterator of trades to process Returns: List of completed candles for all timeframes """ # Track buckets for each timeframe buckets: Dict[str, Dict[datetime, TimeframeBucket]] = defaultdict(dict) # Process all trades for trade in trades: self.stats.trades_processed += 1 # Process trade for each timeframe for timeframe in self.timeframes: # Get bucket for this trade's timestamp bucket_start = self._get_bucket_start_time(trade.timestamp, timeframe) # Create bucket if it doesn't exist if bucket_start not in buckets[timeframe]: buckets[timeframe][bucket_start] = TimeframeBucket( symbol=self.symbol, timeframe=timeframe, start_time=bucket_start, exchange=self.exchange ) # Add trade to bucket buckets[timeframe][bucket_start].add_trade(trade) # Convert all buckets to candles candles = [] for timeframe_buckets in buckets.values(): for bucket in timeframe_buckets.values(): candle = bucket.to_candle(is_complete=True) candles.append(candle) self.stats.candles_emitted += 1 return sorted(candles, key=lambda x: (x.timeframe, x.end_time)) def _get_bucket_start_time(self, timestamp: datetime, timeframe: str) -> datetime: """ IMPORTANT: Uses RIGHT-ALIGNED timestamps Calculate the start time for the bucket that this timestamp belongs to using parsing-based logic. Args: timestamp: Trade timestamp timeframe: Time period (e.g., '1m', '5m', '1h') Returns: Start time for the appropriate bucket Raises: ValueError: If the timeframe is malformed or unsupported """ number, unit = parse_timeframe(timeframe) if unit == 's': seconds = (timestamp.second // number) * number return timestamp.replace(second=seconds, microsecond=0) elif unit == 'm': minutes = (timestamp.minute // number) * number return timestamp.replace(minute=minutes, second=0, microsecond=0) elif unit == 'h': hours = (timestamp.hour // number) * number return timestamp.replace(hour=hours, minute=0, second=0, microsecond=0) elif unit == 'd': # For days, always floor to midnight return timestamp.replace(hour=0, minute=0, second=0, microsecond=0) else: raise ValueError(f"Unsupported timeframe unit: {unit}") def get_stats(self) -> Dict[str, Any]: """Get processing statistics.""" return { "component": self.component_name, "stats": self.stats.to_dict() } __all__ = ['BatchCandleProcessor']