2025-06-07 01:17:22 +08:00
|
|
|
"""
|
|
|
|
|
Batch candle processor for historical trade data.
|
|
|
|
|
|
|
|
|
|
This module provides the BatchCandleProcessor class for building OHLCV candles
|
|
|
|
|
from historical trade data in batch mode.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
from typing import Dict, List, Any, Iterator
|
|
|
|
|
from collections import defaultdict
|
|
|
|
|
|
|
|
|
|
from ..data_types import StandardizedTrade, OHLCVCandle, ProcessingStats
|
|
|
|
|
from .bucket import TimeframeBucket
|
2025-06-09 14:18:32 +08:00
|
|
|
from .utils import parse_timeframe
|
2025-06-07 01:17:22 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
class BatchCandleProcessor:
|
|
|
|
|
"""
|
|
|
|
|
Batch candle processor for historical trade data.
|
|
|
|
|
|
|
|
|
|
This class processes trades in batch mode, building candles for multiple
|
|
|
|
|
timeframes simultaneously. It's optimized for processing large amounts
|
|
|
|
|
of historical trade data efficiently.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(self,
|
|
|
|
|
symbol: str,
|
|
|
|
|
exchange: str,
|
|
|
|
|
timeframes: List[str],
|
|
|
|
|
component_name: str = "batch_candle_processor",
|
|
|
|
|
logger = None):
|
|
|
|
|
"""
|
|
|
|
|
Initialize batch candle processor.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
symbol: Trading symbol (e.g., 'BTC-USDT')
|
|
|
|
|
exchange: Exchange name
|
|
|
|
|
timeframes: List of timeframes to process (e.g., ['1m', '5m'])
|
|
|
|
|
component_name: Name for logging/stats
|
|
|
|
|
logger: Optional logger instance
|
|
|
|
|
"""
|
|
|
|
|
self.symbol = symbol
|
|
|
|
|
self.exchange = exchange
|
|
|
|
|
self.timeframes = timeframes
|
|
|
|
|
self.component_name = component_name
|
|
|
|
|
self.logger = logger
|
|
|
|
|
|
|
|
|
|
# Stats tracking
|
|
|
|
|
self.stats = ProcessingStats()
|
|
|
|
|
|
|
|
|
|
def process_trades_to_candles(self, trades: Iterator[StandardizedTrade]) -> List[OHLCVCandle]:
|
|
|
|
|
"""
|
|
|
|
|
Process trades in batch and return completed candles.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
trades: Iterator of trades to process
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
List of completed candles for all timeframes
|
|
|
|
|
"""
|
|
|
|
|
# Track buckets for each timeframe
|
|
|
|
|
buckets: Dict[str, Dict[datetime, TimeframeBucket]] = defaultdict(dict)
|
|
|
|
|
|
|
|
|
|
# Process all trades
|
|
|
|
|
for trade in trades:
|
|
|
|
|
self.stats.trades_processed += 1
|
|
|
|
|
|
|
|
|
|
# Process trade for each timeframe
|
|
|
|
|
for timeframe in self.timeframes:
|
|
|
|
|
# Get bucket for this trade's timestamp
|
|
|
|
|
bucket_start = self._get_bucket_start_time(trade.timestamp, timeframe)
|
|
|
|
|
|
|
|
|
|
# Create bucket if it doesn't exist
|
|
|
|
|
if bucket_start not in buckets[timeframe]:
|
|
|
|
|
buckets[timeframe][bucket_start] = TimeframeBucket(
|
|
|
|
|
symbol=self.symbol,
|
|
|
|
|
timeframe=timeframe,
|
|
|
|
|
start_time=bucket_start,
|
|
|
|
|
exchange=self.exchange
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Add trade to bucket
|
|
|
|
|
buckets[timeframe][bucket_start].add_trade(trade)
|
|
|
|
|
|
|
|
|
|
# Convert all buckets to candles
|
|
|
|
|
candles = []
|
|
|
|
|
for timeframe_buckets in buckets.values():
|
|
|
|
|
for bucket in timeframe_buckets.values():
|
|
|
|
|
candle = bucket.to_candle(is_complete=True)
|
|
|
|
|
candles.append(candle)
|
|
|
|
|
self.stats.candles_emitted += 1
|
|
|
|
|
|
|
|
|
|
return sorted(candles, key=lambda x: (x.timeframe, x.end_time))
|
|
|
|
|
|
|
|
|
|
def _get_bucket_start_time(self, timestamp: datetime, timeframe: str) -> datetime:
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
IMPORTANT: Uses RIGHT-ALIGNED timestamps
|
2025-06-09 14:18:32 +08:00
|
|
|
Calculate the start time for the bucket that this timestamp belongs to using parsing-based logic.
|
2025-06-07 01:17:22 +08:00
|
|
|
Args:
|
|
|
|
|
timestamp: Trade timestamp
|
|
|
|
|
timeframe: Time period (e.g., '1m', '5m', '1h')
|
|
|
|
|
Returns:
|
|
|
|
|
Start time for the appropriate bucket
|
2025-06-09 14:18:32 +08:00
|
|
|
Raises:
|
|
|
|
|
ValueError: If the timeframe is malformed or unsupported
|
2025-06-07 01:17:22 +08:00
|
|
|
"""
|
2025-06-09 14:18:32 +08:00
|
|
|
number, unit = parse_timeframe(timeframe)
|
|
|
|
|
if unit == 's':
|
|
|
|
|
seconds = (timestamp.second // number) * number
|
2025-06-07 01:17:22 +08:00
|
|
|
return timestamp.replace(second=seconds, microsecond=0)
|
2025-06-09 14:18:32 +08:00
|
|
|
elif unit == 'm':
|
|
|
|
|
minutes = (timestamp.minute // number) * number
|
2025-06-07 01:17:22 +08:00
|
|
|
return timestamp.replace(minute=minutes, second=0, microsecond=0)
|
2025-06-09 14:18:32 +08:00
|
|
|
elif unit == 'h':
|
|
|
|
|
hours = (timestamp.hour // number) * number
|
2025-06-07 01:17:22 +08:00
|
|
|
return timestamp.replace(hour=hours, minute=0, second=0, microsecond=0)
|
2025-06-09 14:18:32 +08:00
|
|
|
elif unit == 'd':
|
|
|
|
|
# For days, always floor to midnight
|
2025-06-07 01:17:22 +08:00
|
|
|
return timestamp.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
|
|
|
else:
|
2025-06-09 14:18:32 +08:00
|
|
|
raise ValueError(f"Unsupported timeframe unit: {unit}")
|
2025-06-07 01:17:22 +08:00
|
|
|
|
|
|
|
|
def get_stats(self) -> Dict[str, Any]:
|
|
|
|
|
"""Get processing statistics."""
|
|
|
|
|
return {
|
|
|
|
|
"component": self.component_name,
|
|
|
|
|
"stats": self.stats.to_dict()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__all__ = ['BatchCandleProcessor']
|