TCPDashboard/data/common/aggregation/batch.py

"""
Batch candle processor for historical trade data.

This module provides the BatchCandleProcessor class for building OHLCV candles
from historical trade data in batch mode.
"""

from datetime import datetime
from typing import Dict, List, Any, Iterator
from collections import defaultdict

from ..data_types import StandardizedTrade, OHLCVCandle, ProcessingStats
from .bucket import TimeframeBucket
from .utils import parse_timeframe


class BatchCandleProcessor:
    """
    Batch candle processor for historical trade data.
    
    This class processes trades in batch mode, building candles for multiple
    timeframes simultaneously. It's optimized for processing large amounts
    of historical trade data efficiently.
    """
    
    def __init__(self, 
                 symbol: str,
                 exchange: str,
                 timeframes: List[str],
                 component_name: str = "batch_candle_processor",
                 logger = None):
        """
        Initialize batch candle processor.
        
        Args:
            symbol: Trading symbol (e.g., 'BTC-USDT')
            exchange: Exchange name
            timeframes: List of timeframes to process (e.g., ['1m', '5m'])
            component_name: Name for logging/stats
            logger: Optional logger instance
        """
        self.symbol = symbol
        self.exchange = exchange
        self.timeframes = timeframes
        self.component_name = component_name
        self.logger = logger
        
        # Stats tracking
        self.stats = ProcessingStats()
    
    def process_trades_to_candles(self, trades: Iterator[StandardizedTrade]) -> List[OHLCVCandle]:
        """
        Process trades in batch and return completed candles.
        
        Args:
            trades: Iterator of trades to process
            
        Returns:
            List of completed candles for all timeframes
        """
        # Track buckets for each timeframe
        buckets: Dict[str, Dict[datetime, TimeframeBucket]] = defaultdict(dict)
        
        # Process all trades
        for trade in trades:
            self.stats.trades_processed += 1
            
            # Process trade for each timeframe
            for timeframe in self.timeframes:
                # Get bucket for this trade's timestamp
                bucket_start = self._get_bucket_start_time(trade.timestamp, timeframe)
                
                # Create bucket if it doesn't exist
                if bucket_start not in buckets[timeframe]:
                    buckets[timeframe][bucket_start] = TimeframeBucket(
                        symbol=self.symbol,
                        timeframe=timeframe,
                        start_time=bucket_start,
                        exchange=self.exchange
                    )
                
                # Add trade to bucket
                buckets[timeframe][bucket_start].add_trade(trade)
        
        # Convert all buckets to candles
        candles = []
        for timeframe_buckets in buckets.values():
            for bucket in timeframe_buckets.values():
                candle = bucket.to_candle(is_complete=True)
                candles.append(candle)
                self.stats.candles_emitted += 1
        
        return sorted(candles, key=lambda x: (x.timeframe, x.end_time))
    
    def _get_bucket_start_time(self, timestamp: datetime, timeframe: str) -> datetime:
        """
        
        IMPORTANT: Uses RIGHT-ALIGNED timestamps
        Calculate the start time for the bucket that this timestamp belongs to using parsing-based logic.
        Args:
            timestamp: Trade timestamp
            timeframe: Time period (e.g., '1m', '5m', '1h')
        Returns:
            Start time for the appropriate bucket
        Raises:
            ValueError: If the timeframe is malformed or unsupported
        """
        number, unit = parse_timeframe(timeframe)
        if unit == 's':
            seconds = (timestamp.second // number) * number
            return timestamp.replace(second=seconds, microsecond=0)
        elif unit == 'm':
            minutes = (timestamp.minute // number) * number
            return timestamp.replace(minute=minutes, second=0, microsecond=0)
        elif unit == 'h':
            hours = (timestamp.hour // number) * number
            return timestamp.replace(hour=hours, minute=0, second=0, microsecond=0)
        elif unit == 'd':
            # For days, always floor to midnight
            return timestamp.replace(hour=0, minute=0, second=0, microsecond=0)
        else:
            raise ValueError(f"Unsupported timeframe unit: {unit}")
    
    def get_stats(self) -> Dict[str, Any]:
        """Get processing statistics."""
        return {
            "component": self.component_name,
            "stats": self.stats.to_dict()
        }


__all__ = ['BatchCandleProcessor']
Refactor aggregation module and enhance structure - Split the `aggregation.py` file into a dedicated sub-package, improving modularity and maintainability. - Moved `TimeframeBucket`, `RealTimeCandleProcessor`, and `BatchCandleProcessor` classes into their respective files within the new `aggregation` sub-package. - Introduced utility functions for trade aggregation and validation, enhancing code organization. - Updated import paths throughout the codebase to reflect the new structure, ensuring compatibility. - Added safety net tests for the aggregation package to verify core functionality and prevent regressions during refactoring. These changes enhance the overall architecture of the aggregation module, making it more scalable and easier to manage. 2025-06-07 01:17:22 +08:00			`"""`
			`Batch candle processor for historical trade data.`

			`This module provides the BatchCandleProcessor class for building OHLCV candles`
			`from historical trade data in batch mode.`
			`"""`

			`from datetime import datetime`
			`from typing import Dict, List, Any, Iterator`
			`from collections import defaultdict`

			`from ..data_types import StandardizedTrade, OHLCVCandle, ProcessingStats`
			`from .bucket import TimeframeBucket`
update agregation to remove hardcoded timeframes and use unified timeframe function in all methods 2025-06-09 14:18:32 +08:00			`from .utils import parse_timeframe`
Refactor aggregation module and enhance structure - Split the `aggregation.py` file into a dedicated sub-package, improving modularity and maintainability. - Moved `TimeframeBucket`, `RealTimeCandleProcessor`, and `BatchCandleProcessor` classes into their respective files within the new `aggregation` sub-package. - Introduced utility functions for trade aggregation and validation, enhancing code organization. - Updated import paths throughout the codebase to reflect the new structure, ensuring compatibility. - Added safety net tests for the aggregation package to verify core functionality and prevent regressions during refactoring. These changes enhance the overall architecture of the aggregation module, making it more scalable and easier to manage. 2025-06-07 01:17:22 +08:00

			`class BatchCandleProcessor:`
			`"""`
			`Batch candle processor for historical trade data.`

			`This class processes trades in batch mode, building candles for multiple`
			`timeframes simultaneously. It's optimized for processing large amounts`
			`of historical trade data efficiently.`
			`"""`

			`def __init__(self,`
			`symbol: str,`
			`exchange: str,`
			`timeframes: List[str],`
			`component_name: str = "batch_candle_processor",`
			`logger = None):`
			`"""`
			`Initialize batch candle processor.`

			`Args:`
			`symbol: Trading symbol (e.g., 'BTC-USDT')`
			`exchange: Exchange name`
			`timeframes: List of timeframes to process (e.g., ['1m', '5m'])`
			`component_name: Name for logging/stats`
			`logger: Optional logger instance`
			`"""`
			`self.symbol = symbol`
			`self.exchange = exchange`
			`self.timeframes = timeframes`
			`self.component_name = component_name`
			`self.logger = logger`

			`# Stats tracking`
			`self.stats = ProcessingStats()`

			`def process_trades_to_candles(self, trades: Iterator[StandardizedTrade]) -> List[OHLCVCandle]:`
			`"""`
			`Process trades in batch and return completed candles.`

			`Args:`
			`trades: Iterator of trades to process`

			`Returns:`
			`List of completed candles for all timeframes`
			`"""`
			`# Track buckets for each timeframe`
			`buckets: Dict[str, Dict[datetime, TimeframeBucket]] = defaultdict(dict)`

			`# Process all trades`
			`for trade in trades:`
			`self.stats.trades_processed += 1`

			`# Process trade for each timeframe`
			`for timeframe in self.timeframes:`
			`# Get bucket for this trade's timestamp`
			`bucket_start = self._get_bucket_start_time(trade.timestamp, timeframe)`

			`# Create bucket if it doesn't exist`
			`if bucket_start not in buckets[timeframe]:`
			`buckets[timeframe][bucket_start] = TimeframeBucket(`
			`symbol=self.symbol,`
			`timeframe=timeframe,`
			`start_time=bucket_start,`
			`exchange=self.exchange`
			`)`

			`# Add trade to bucket`
			`buckets[timeframe][bucket_start].add_trade(trade)`

			`# Convert all buckets to candles`
			`candles = []`
			`for timeframe_buckets in buckets.values():`
			`for bucket in timeframe_buckets.values():`
			`candle = bucket.to_candle(is_complete=True)`
			`candles.append(candle)`
			`self.stats.candles_emitted += 1`

			`return sorted(candles, key=lambda x: (x.timeframe, x.end_time))`

			`def _get_bucket_start_time(self, timestamp: datetime, timeframe: str) -> datetime:`
			`"""`

			`IMPORTANT: Uses RIGHT-ALIGNED timestamps`
update agregation to remove hardcoded timeframes and use unified timeframe function in all methods 2025-06-09 14:18:32 +08:00			`Calculate the start time for the bucket that this timestamp belongs to using parsing-based logic.`
Refactor aggregation module and enhance structure - Split the `aggregation.py` file into a dedicated sub-package, improving modularity and maintainability. - Moved `TimeframeBucket`, `RealTimeCandleProcessor`, and `BatchCandleProcessor` classes into their respective files within the new `aggregation` sub-package. - Introduced utility functions for trade aggregation and validation, enhancing code organization. - Updated import paths throughout the codebase to reflect the new structure, ensuring compatibility. - Added safety net tests for the aggregation package to verify core functionality and prevent regressions during refactoring. These changes enhance the overall architecture of the aggregation module, making it more scalable and easier to manage. 2025-06-07 01:17:22 +08:00			`Args:`
			`timestamp: Trade timestamp`
			`timeframe: Time period (e.g., '1m', '5m', '1h')`
			`Returns:`
			`Start time for the appropriate bucket`
update agregation to remove hardcoded timeframes and use unified timeframe function in all methods 2025-06-09 14:18:32 +08:00			`Raises:`
			`ValueError: If the timeframe is malformed or unsupported`
Refactor aggregation module and enhance structure - Split the `aggregation.py` file into a dedicated sub-package, improving modularity and maintainability. - Moved `TimeframeBucket`, `RealTimeCandleProcessor`, and `BatchCandleProcessor` classes into their respective files within the new `aggregation` sub-package. - Introduced utility functions for trade aggregation and validation, enhancing code organization. - Updated import paths throughout the codebase to reflect the new structure, ensuring compatibility. - Added safety net tests for the aggregation package to verify core functionality and prevent regressions during refactoring. These changes enhance the overall architecture of the aggregation module, making it more scalable and easier to manage. 2025-06-07 01:17:22 +08:00			`"""`
update agregation to remove hardcoded timeframes and use unified timeframe function in all methods 2025-06-09 14:18:32 +08:00			`number, unit = parse_timeframe(timeframe)`
			`if unit == 's':`
			`seconds = (timestamp.second // number) * number`
Refactor aggregation module and enhance structure - Split the `aggregation.py` file into a dedicated sub-package, improving modularity and maintainability. - Moved `TimeframeBucket`, `RealTimeCandleProcessor`, and `BatchCandleProcessor` classes into their respective files within the new `aggregation` sub-package. - Introduced utility functions for trade aggregation and validation, enhancing code organization. - Updated import paths throughout the codebase to reflect the new structure, ensuring compatibility. - Added safety net tests for the aggregation package to verify core functionality and prevent regressions during refactoring. These changes enhance the overall architecture of the aggregation module, making it more scalable and easier to manage. 2025-06-07 01:17:22 +08:00			`return timestamp.replace(second=seconds, microsecond=0)`
update agregation to remove hardcoded timeframes and use unified timeframe function in all methods 2025-06-09 14:18:32 +08:00			`elif unit == 'm':`
			`minutes = (timestamp.minute // number) * number`
Refactor aggregation module and enhance structure - Split the `aggregation.py` file into a dedicated sub-package, improving modularity and maintainability. - Moved `TimeframeBucket`, `RealTimeCandleProcessor`, and `BatchCandleProcessor` classes into their respective files within the new `aggregation` sub-package. - Introduced utility functions for trade aggregation and validation, enhancing code organization. - Updated import paths throughout the codebase to reflect the new structure, ensuring compatibility. - Added safety net tests for the aggregation package to verify core functionality and prevent regressions during refactoring. These changes enhance the overall architecture of the aggregation module, making it more scalable and easier to manage. 2025-06-07 01:17:22 +08:00			`return timestamp.replace(minute=minutes, second=0, microsecond=0)`
update agregation to remove hardcoded timeframes and use unified timeframe function in all methods 2025-06-09 14:18:32 +08:00			`elif unit == 'h':`
			`hours = (timestamp.hour // number) * number`
Refactor aggregation module and enhance structure - Split the `aggregation.py` file into a dedicated sub-package, improving modularity and maintainability. - Moved `TimeframeBucket`, `RealTimeCandleProcessor`, and `BatchCandleProcessor` classes into their respective files within the new `aggregation` sub-package. - Introduced utility functions for trade aggregation and validation, enhancing code organization. - Updated import paths throughout the codebase to reflect the new structure, ensuring compatibility. - Added safety net tests for the aggregation package to verify core functionality and prevent regressions during refactoring. These changes enhance the overall architecture of the aggregation module, making it more scalable and easier to manage. 2025-06-07 01:17:22 +08:00			`return timestamp.replace(hour=hours, minute=0, second=0, microsecond=0)`
update agregation to remove hardcoded timeframes and use unified timeframe function in all methods 2025-06-09 14:18:32 +08:00			`elif unit == 'd':`
			`# For days, always floor to midnight`
Refactor aggregation module and enhance structure - Split the `aggregation.py` file into a dedicated sub-package, improving modularity and maintainability. - Moved `TimeframeBucket`, `RealTimeCandleProcessor`, and `BatchCandleProcessor` classes into their respective files within the new `aggregation` sub-package. - Introduced utility functions for trade aggregation and validation, enhancing code organization. - Updated import paths throughout the codebase to reflect the new structure, ensuring compatibility. - Added safety net tests for the aggregation package to verify core functionality and prevent regressions during refactoring. These changes enhance the overall architecture of the aggregation module, making it more scalable and easier to manage. 2025-06-07 01:17:22 +08:00			`return timestamp.replace(hour=0, minute=0, second=0, microsecond=0)`
			`else:`
update agregation to remove hardcoded timeframes and use unified timeframe function in all methods 2025-06-09 14:18:32 +08:00			`raise ValueError(f"Unsupported timeframe unit: {unit}")`
Refactor aggregation module and enhance structure - Split the `aggregation.py` file into a dedicated sub-package, improving modularity and maintainability. - Moved `TimeframeBucket`, `RealTimeCandleProcessor`, and `BatchCandleProcessor` classes into their respective files within the new `aggregation` sub-package. - Introduced utility functions for trade aggregation and validation, enhancing code organization. - Updated import paths throughout the codebase to reflect the new structure, ensuring compatibility. - Added safety net tests for the aggregation package to verify core functionality and prevent regressions during refactoring. These changes enhance the overall architecture of the aggregation module, making it more scalable and easier to manage. 2025-06-07 01:17:22 +08:00
			`def get_stats(self) -> Dict[str, Any]:`
			`"""Get processing statistics."""`
			`return {`
			`"component": self.component_name,`
			`"stats": self.stats.to_dict()`
			`}`


			`__all__ = ['BatchCandleProcessor']`