""" Base classes for the incremental strategy system. This module contains the fundamental building blocks for all incremental trading strategies: - IncStrategySignal: Represents trading signals with confidence and metadata - IncStrategyBase: Abstract base class that all incremental strategies must inherit from - TimeframeAggregator: Built-in timeframe aggregation for minute-level data processing The incremental approach allows strategies to: - Process new data points without full recalculation - Maintain bounded memory usage regardless of data history length - Provide real-time performance with minimal latency - Support both initialization and incremental modes - Accept minute-level data and internally aggregate to any timeframe """ import pandas as pd from abc import ABC, abstractmethod from typing import Dict, Optional, List, Union, Any from collections import deque import logging import time # Import new timeframe utilities from ..utils.timeframe_utils import ( aggregate_minute_data_to_timeframe, parse_timeframe_to_minutes, get_latest_complete_bar, MinuteDataBuffer, TimeframeError ) logger = logging.getLogger(__name__) class IncStrategySignal: """ Represents a trading signal from an incremental strategy. A signal encapsulates the strategy's recommendation along with confidence level, optional price target, and additional metadata. Attributes: signal_type (str): Type of signal - "ENTRY", "EXIT", or "HOLD" confidence (float): Confidence level from 0.0 to 1.0 price (Optional[float]): Optional specific price for the signal metadata (Dict): Additional signal data and context Example: # Entry signal with high confidence signal = IncStrategySignal("ENTRY", confidence=0.8) # Exit signal with stop loss price signal = IncStrategySignal("EXIT", confidence=1.0, price=50000, metadata={"type": "STOP_LOSS"}) """ def __init__(self, signal_type: str, confidence: float = 1.0, price: Optional[float] = None, metadata: Optional[Dict] = None): """ Initialize a strategy signal. Args: signal_type: Type of signal ("ENTRY", "EXIT", "HOLD") confidence: Confidence level (0.0 to 1.0) price: Optional specific price for the signal metadata: Additional signal data and context """ self.signal_type = signal_type self.confidence = max(0.0, min(1.0, confidence)) # Clamp to [0,1] self.price = price self.metadata = metadata or {} @classmethod def BUY(cls, confidence: float = 1.0, price: Optional[float] = None, **metadata): """Create a BUY signal.""" return cls("ENTRY", confidence, price, metadata) @classmethod def SELL(cls, confidence: float = 1.0, price: Optional[float] = None, **metadata): """Create a SELL signal.""" return cls("EXIT", confidence, price, metadata) @classmethod def HOLD(cls, confidence: float = 0.0, **metadata): """Create a HOLD signal.""" return cls("HOLD", confidence, None, metadata) def __repr__(self) -> str: """String representation of the signal.""" return (f"IncStrategySignal(type={self.signal_type}, " f"confidence={self.confidence:.2f}, " f"price={self.price}, metadata={self.metadata})") class TimeframeAggregator: """ Handles real-time aggregation of minute data to higher timeframes. This class accumulates minute-level OHLCV data and produces complete bars when a timeframe period is completed. Now uses the new timeframe utilities for mathematically correct aggregation that matches pandas resampling behavior. Key improvements: - Uses bar END timestamps (prevents future data leakage) - Proper OHLCV aggregation (first/max/min/last/sum) - Mathematical equivalence to pandas resampling - Memory-efficient buffer management """ def __init__(self, timeframe: str = "15min", max_buffer_size: int = 1440): """ Initialize timeframe aggregator. Args: timeframe: Target timeframe string (e.g., "15min", "1h", "4h") max_buffer_size: Maximum minute data buffer size (default: 1440 = 24h) """ self.timeframe = timeframe self.timeframe_minutes = parse_timeframe_to_minutes(timeframe) # Use MinuteDataBuffer for efficient minute data management self.minute_buffer = MinuteDataBuffer(max_size=max_buffer_size) # Track last processed bar to avoid reprocessing self.last_processed_bar_timestamp = None # Performance tracking self._bars_completed = 0 self._minute_points_processed = 0 def update(self, timestamp: pd.Timestamp, ohlcv_data: Dict[str, float]) -> Optional[Dict[str, float]]: """ Update with new minute data and return completed bar if timeframe is complete. Args: timestamp: Timestamp of the minute data ohlcv_data: OHLCV data dictionary Returns: Completed OHLCV bar if timeframe period ended, None otherwise """ try: # Add minute data to buffer self.minute_buffer.add(timestamp, ohlcv_data) self._minute_points_processed += 1 # Get latest complete bar using new utilities latest_bar = get_latest_complete_bar( self.minute_buffer.get_data(), self.timeframe ) if latest_bar is None: return None # Check if this is a new bar (avoid reprocessing) bar_timestamp = latest_bar['timestamp'] if self.last_processed_bar_timestamp == bar_timestamp: return None # Already processed this bar # Update tracking self.last_processed_bar_timestamp = bar_timestamp self._bars_completed += 1 return latest_bar except TimeframeError as e: logger.error(f"Timeframe aggregation error: {e}") return None except Exception as e: logger.error(f"Unexpected error in timeframe aggregation: {e}") return None def get_current_bar(self) -> Optional[Dict[str, float]]: """ Get the current incomplete bar (for debugging). Returns: Current incomplete bar data or None """ try: # Get recent data and try to aggregate recent_data = self.minute_buffer.get_data(lookback_minutes=self.timeframe_minutes) if not recent_data: return None # Aggregate to get current (possibly incomplete) bar bars = aggregate_minute_data_to_timeframe(recent_data, self.timeframe, "end") if bars: return bars[-1] # Return most recent bar return None except Exception as e: logger.debug(f"Error getting current bar: {e}") return None def reset(self): """Reset aggregator state.""" self.minute_buffer = MinuteDataBuffer(max_size=self.minute_buffer.max_size) self.last_processed_bar_timestamp = None self._bars_completed = 0 self._minute_points_processed = 0 def get_stats(self) -> Dict[str, Any]: """Get aggregator statistics.""" return { 'timeframe': self.timeframe, 'timeframe_minutes': self.timeframe_minutes, 'minute_points_processed': self._minute_points_processed, 'bars_completed': self._bars_completed, 'buffer_size': len(self.minute_buffer.get_data()), 'last_processed_bar': self.last_processed_bar_timestamp } class IncStrategyBase(ABC): """ Abstract base class for all incremental trading strategies. This class defines the interface that all incremental strategies must implement: - get_minimum_buffer_size(): Specify minimum data requirements - process_data_point(): Process new data points incrementally - supports_incremental_calculation(): Whether strategy supports incremental mode - get_entry_signal(): Generate entry signals - get_exit_signal(): Generate exit signals The incremental approach allows strategies to: - Process new data points without full recalculation - Maintain bounded memory usage regardless of data history length - Provide real-time performance with minimal latency - Support both initialization and incremental modes - Accept minute-level data and internally aggregate to any timeframe New Features: - Built-in TimeframeAggregator for minute-level data processing - update_minute_data() method for real-time trading systems - Automatic timeframe detection and aggregation - Backward compatibility with existing update() methods Attributes: name (str): Strategy name weight (float): Strategy weight for combination params (Dict): Strategy parameters calculation_mode (str): Current mode ('initialization' or 'incremental') is_warmed_up (bool): Whether strategy has sufficient data for reliable signals timeframe_buffers (Dict): Rolling buffers for different timeframes indicator_states (Dict): Internal indicator calculation states timeframe_aggregator (TimeframeAggregator): Built-in aggregator for minute data Example: class MyIncStrategy(IncStrategyBase): def get_minimum_buffer_size(self): return {"15min": 50} # Strategy works on 15min timeframe def process_data_point(self, timestamp, ohlcv_data): # Process new data incrementally self._update_indicators(ohlcv_data) return self.get_current_signal() def get_entry_signal(self): # Generate signal based on current state if self._should_enter(): return IncStrategySignal.BUY(confidence=0.8) return IncStrategySignal.HOLD() # Usage with minute-level data: strategy = MyIncStrategy(params={"timeframe_minutes": 15}) for minute_data in live_stream: signal = strategy.process_data_point(minute_data['timestamp'], minute_data) """ def __init__(self, name: str, weight: float = 1.0, params: Optional[Dict] = None): """ Initialize the incremental strategy base. Args: name: Strategy name/identifier weight: Strategy weight for combination (default: 1.0) params: Strategy-specific parameters """ self.name = name self.weight = weight self.params = params or {} # Calculation state self._calculation_mode = "initialization" self._is_warmed_up = False self._data_points_received = 0 # Data management self._timeframe_buffers = {} self._timeframe_last_update = {} self._indicator_states = {} self._last_signals = {} self._signal_history = deque(maxlen=100) # Keep last 100 signals # Performance tracking self._performance_metrics = { 'update_times': deque(maxlen=1000), 'signal_generation_times': deque(maxlen=1000), 'state_validation_failures': 0, 'data_gaps_handled': 0, 'minute_data_points_processed': 0, 'timeframe_bars_completed': 0 } # Configuration self._buffer_size_multiplier = 1.5 # Extra buffer for safety self._state_validation_enabled = True self._max_acceptable_gap = pd.Timedelta(minutes=5) # Timeframe aggregation - Updated to use new utilities self._primary_timeframe = self.params.get("timeframe", "1min") self._timeframe_aggregator = None # Only create aggregator if timeframe is not 1min (minute data processing) if self._primary_timeframe != "1min": try: self._timeframe_aggregator = TimeframeAggregator( timeframe=self._primary_timeframe, max_buffer_size=1440 # 24 hours of minute data ) logger.info(f"Created timeframe aggregator for {self._primary_timeframe}") except TimeframeError as e: logger.error(f"Failed to create timeframe aggregator: {e}") self._timeframe_aggregator = None logger.info(f"Initialized incremental strategy: {self.name} (timeframe: {self._primary_timeframe})") def process_data_point(self, timestamp: pd.Timestamp, ohlcv_data: Dict[str, float]) -> Optional[IncStrategySignal]: """ Process a new data point and return signal if generated. This is the main entry point for incremental processing. It handles timeframe aggregation, buffer updates, and signal generation. Args: timestamp: Timestamp of the data point ohlcv_data: OHLCV data dictionary Returns: IncStrategySignal if a signal is generated, None otherwise """ start_time = time.time() try: # Update performance metrics self._performance_metrics['minute_data_points_processed'] += 1 self._data_points_received += 1 # Handle timeframe aggregation if needed if self._timeframe_aggregator is not None: completed_bar = self._timeframe_aggregator.update(timestamp, ohlcv_data) if completed_bar is not None: # Process the completed timeframe bar self._performance_metrics['timeframe_bars_completed'] += 1 return self._process_timeframe_bar(completed_bar['timestamp'], completed_bar) else: # No complete bar yet, return None return None else: # Process minute data directly return self._process_timeframe_bar(timestamp, ohlcv_data) except Exception as e: logger.error(f"Error processing data point in {self.name}: {e}") return None finally: # Track processing time processing_time = time.time() - start_time self._performance_metrics['update_times'].append(processing_time) def _process_timeframe_bar(self, timestamp: pd.Timestamp, ohlcv_data: Dict[str, float]) -> Optional[IncStrategySignal]: """Process a complete timeframe bar and generate signals.""" # Update timeframe buffers self._update_timeframe_buffers(ohlcv_data, timestamp) # Call strategy-specific calculation self.calculate_on_data(ohlcv_data, timestamp) # Check if strategy is warmed up if not self._is_warmed_up: self._check_warmup_status() # Generate signal if warmed up if self._is_warmed_up: signal_start = time.time() signal = self.get_current_signal() signal_time = time.time() - signal_start self._performance_metrics['signal_generation_times'].append(signal_time) # Store signal in history if signal and signal.signal_type != "HOLD": self._signal_history.append({ 'timestamp': timestamp, 'signal': signal, 'strategy_state': self.get_current_state_summary() }) return signal return None def _check_warmup_status(self): """Check if strategy has enough data to be considered warmed up.""" min_buffer_sizes = self.get_minimum_buffer_size() for timeframe, min_size in min_buffer_sizes.items(): buffer = self._timeframe_buffers.get(timeframe, deque()) if len(buffer) < min_size: return # Not enough data yet # All buffers have sufficient data self._is_warmed_up = True self._calculation_mode = "incremental" logger.info(f"Strategy {self.name} is now warmed up after {self._data_points_received} data points") def get_current_signal(self) -> IncStrategySignal: """Get the current signal based on strategy state.""" # Try entry signal first entry_signal = self.get_entry_signal() if entry_signal and entry_signal.signal_type != "HOLD": return entry_signal # Check exit signal exit_signal = self.get_exit_signal() if exit_signal and exit_signal.signal_type != "HOLD": return exit_signal # Default to hold return IncStrategySignal.HOLD() def get_current_incomplete_bar(self) -> Optional[Dict[str, float]]: """Get current incomplete timeframe bar (for debugging).""" if self._timeframe_aggregator is not None: return self._timeframe_aggregator.get_current_bar() return None def get_timeframe_aggregator_stats(self) -> Optional[Dict[str, Any]]: """Get timeframe aggregator statistics.""" if self._timeframe_aggregator is not None: return self._timeframe_aggregator.get_stats() return None def create_minute_data_buffer(self, max_size: int = 1440) -> MinuteDataBuffer: """ Create a MinuteDataBuffer for strategies that need direct minute data management. Args: max_size: Maximum buffer size in minutes (default: 1440 = 24h) Returns: MinuteDataBuffer instance """ return MinuteDataBuffer(max_size=max_size) def aggregate_minute_data(self, minute_data: List[Dict[str, float]], timeframe: str, timestamp_mode: str = "end") -> List[Dict[str, float]]: """ Helper method to aggregate minute data to specified timeframe. Args: minute_data: List of minute OHLCV data timeframe: Target timeframe (e.g., "5min", "15min", "1h") timestamp_mode: "end" (default) or "start" for bar timestamps Returns: List of aggregated OHLCV bars """ try: return aggregate_minute_data_to_timeframe(minute_data, timeframe, timestamp_mode) except TimeframeError as e: logger.error(f"Error aggregating minute data in {self.name}: {e}") return [] # Properties @property def calculation_mode(self) -> str: """Get current calculation mode.""" return self._calculation_mode @property def is_warmed_up(self) -> bool: """Check if strategy is warmed up.""" return self._is_warmed_up # Abstract methods that must be implemented by strategies @abstractmethod def get_minimum_buffer_size(self) -> Dict[str, int]: """ Get minimum buffer sizes for each timeframe. This method specifies how much historical data the strategy needs for each timeframe to generate reliable signals. Returns: Dict[str, int]: Mapping of timeframe to minimum buffer size Example: return {"15min": 50, "1h": 24} # 50 15min bars, 24 1h bars """ pass @abstractmethod def calculate_on_data(self, new_data_point: Dict[str, float], timestamp: pd.Timestamp) -> None: """ Process new data point and update internal indicators. This method is called for each new timeframe bar and should update all internal indicators and strategy state incrementally. Args: new_data_point: New OHLCV data point timestamp: Timestamp of the data point """ pass @abstractmethod def supports_incremental_calculation(self) -> bool: """ Check if strategy supports incremental calculation. Returns: bool: True if strategy can process data incrementally """ pass @abstractmethod def get_entry_signal(self) -> IncStrategySignal: """ Generate entry signal based on current strategy state. This method should use the current internal state to determine whether an entry signal should be generated. Returns: IncStrategySignal: Entry signal with confidence level """ pass @abstractmethod def get_exit_signal(self) -> IncStrategySignal: """ Generate exit signal based on current strategy state. This method should use the current internal state to determine whether an exit signal should be generated. Returns: IncStrategySignal: Exit signal with confidence level """ pass # Utility methods def get_confidence(self) -> float: """ Get strategy confidence for the current market state. Default implementation returns 1.0. Strategies can override this to provide dynamic confidence based on market conditions. Returns: float: Confidence level (0.0 to 1.0) """ return 1.0 def reset_calculation_state(self) -> None: """Reset internal calculation state for reinitialization.""" self._calculation_mode = "initialization" self._is_warmed_up = False self._data_points_received = 0 self._timeframe_buffers.clear() self._timeframe_last_update.clear() self._indicator_states.clear() self._last_signals.clear() self._signal_history.clear() # Reset timeframe aggregator if self._timeframe_aggregator is not None: self._timeframe_aggregator.reset() # Reset performance metrics for key in self._performance_metrics: if isinstance(self._performance_metrics[key], deque): self._performance_metrics[key].clear() else: self._performance_metrics[key] = 0 def get_current_state_summary(self) -> Dict[str, Any]: """Get summary of current calculation state for debugging.""" return { 'strategy_name': self.name, 'calculation_mode': self._calculation_mode, 'is_warmed_up': self._is_warmed_up, 'data_points_received': self._data_points_received, 'timeframes': list(self._timeframe_buffers.keys()), 'buffer_sizes': {tf: len(buf) for tf, buf in self._timeframe_buffers.items()}, 'indicator_states': {name: state.get_state_summary() if hasattr(state, 'get_state_summary') else str(state) for name, state in self._indicator_states.items()}, 'last_signals': self._last_signals, 'timeframe_aggregator': { 'enabled': self._timeframe_aggregator is not None, 'primary_timeframe': self._primary_timeframe, 'current_incomplete_bar': self.get_current_incomplete_bar() }, 'performance_metrics': { 'avg_update_time': sum(self._performance_metrics['update_times']) / len(self._performance_metrics['update_times']) if self._performance_metrics['update_times'] else 0, 'avg_signal_time': sum(self._performance_metrics['signal_generation_times']) / len(self._performance_metrics['signal_generation_times']) if self._performance_metrics['signal_generation_times'] else 0, 'validation_failures': self._performance_metrics['state_validation_failures'], 'data_gaps_handled': self._performance_metrics['data_gaps_handled'], 'minute_data_points_processed': self._performance_metrics['minute_data_points_processed'], 'timeframe_bars_completed': self._performance_metrics['timeframe_bars_completed'] } } def _update_timeframe_buffers(self, new_data_point: Dict[str, float], timestamp: pd.Timestamp) -> None: """Update all timeframe buffers with new data point.""" # Get minimum buffer sizes min_buffer_sizes = self.get_minimum_buffer_size() for timeframe in min_buffer_sizes.keys(): # Calculate actual buffer size with multiplier min_size = min_buffer_sizes[timeframe] actual_buffer_size = int(min_size * self._buffer_size_multiplier) # Initialize buffer if needed if timeframe not in self._timeframe_buffers: self._timeframe_buffers[timeframe] = deque(maxlen=actual_buffer_size) self._timeframe_last_update[timeframe] = None # Add data point to buffer data_point = new_data_point.copy() data_point['timestamp'] = timestamp self._timeframe_buffers[timeframe].append(data_point) self._timeframe_last_update[timeframe] = timestamp def _get_timeframe_buffer(self, timeframe: str) -> pd.DataFrame: """Get current buffer for specific timeframe as DataFrame.""" if timeframe not in self._timeframe_buffers: return pd.DataFrame() buffer_data = list(self._timeframe_buffers[timeframe]) if not buffer_data: return pd.DataFrame() df = pd.DataFrame(buffer_data) if 'timestamp' in df.columns: df = df.set_index('timestamp') return df def handle_data_gap(self, gap_duration: pd.Timedelta) -> None: """Handle gaps in data stream.""" self._performance_metrics['data_gaps_handled'] += 1 if gap_duration > self._max_acceptable_gap: logger.warning(f"Data gap {gap_duration} exceeds maximum acceptable gap {self._max_acceptable_gap}") self._trigger_reinitialization() else: logger.info(f"Handling acceptable data gap: {gap_duration}") # For small gaps, continue with current state def _trigger_reinitialization(self) -> None: """Trigger strategy reinitialization due to data gap or corruption.""" logger.info(f"Triggering reinitialization for strategy {self.name}") self.reset_calculation_state() # Compatibility methods for original strategy interface def get_timeframes(self) -> List[str]: """Get required timeframes (compatibility method).""" return list(self.get_minimum_buffer_size().keys()) def initialize(self, backtester) -> None: """Initialize strategy (compatibility method).""" # This method provides compatibility with the original strategy interface # The actual initialization happens through the incremental interface self.initialized = True logger.info(f"Incremental strategy {self.name} initialized in compatibility mode") def __repr__(self) -> str: """String representation of the strategy.""" return (f"{self.__class__.__name__}(name={self.name}, " f"weight={self.weight}, mode={self._calculation_mode}, " f"warmed_up={self._is_warmed_up}, " f"data_points={self._data_points_received})")