""" Base classes for the incremental strategy system. This module contains the fundamental building blocks for all incremental trading strategies: - IncStrategySignal: Represents trading signals with confidence and metadata - IncStrategyBase: Abstract base class that all incremental strategies must inherit from - TimeframeAggregator: Built-in timeframe aggregation for minute-level data processing """ import pandas as pd from abc import ABC, abstractmethod from typing import Dict, Optional, List, Union, Any from collections import deque import logging # Import the original signal class for compatibility from ..strategies.base import StrategySignal # Create alias for consistency IncStrategySignal = StrategySignal class TimeframeAggregator: """ Handles real-time aggregation of minute data to higher timeframes. This class accumulates minute-level OHLCV data and produces complete bars when a timeframe period is completed. Integrated into IncStrategyBase to provide consistent minute-level data processing across all strategies. """ def __init__(self, timeframe_minutes: int = 15): """ Initialize timeframe aggregator. Args: timeframe_minutes: Target timeframe in minutes (e.g., 60 for 1h, 15 for 15min) """ self.timeframe_minutes = timeframe_minutes self.current_bar = None self.current_bar_start = None self.last_completed_bar = None def update(self, timestamp: pd.Timestamp, ohlcv_data: Dict[str, float]) -> Optional[Dict[str, float]]: """ Update with new minute data and return completed bar if timeframe is complete. Args: timestamp: Timestamp of the data ohlcv_data: OHLCV data dictionary Returns: Completed OHLCV bar if timeframe period ended, None otherwise """ # Calculate which timeframe bar this timestamp belongs to bar_start = self._get_bar_start_time(timestamp) # Check if we're starting a new bar if self.current_bar_start != bar_start: # Save the completed bar (if any) completed_bar = self.current_bar.copy() if self.current_bar is not None else None # Start new bar self.current_bar_start = bar_start self.current_bar = { 'timestamp': bar_start, 'open': ohlcv_data['close'], # Use current close as open for new bar 'high': ohlcv_data['close'], 'low': ohlcv_data['close'], 'close': ohlcv_data['close'], 'volume': ohlcv_data['volume'] } # Return the completed bar (if any) if completed_bar is not None: self.last_completed_bar = completed_bar return completed_bar else: # Update current bar with new data if self.current_bar is not None: self.current_bar['high'] = max(self.current_bar['high'], ohlcv_data['high']) self.current_bar['low'] = min(self.current_bar['low'], ohlcv_data['low']) self.current_bar['close'] = ohlcv_data['close'] self.current_bar['volume'] += ohlcv_data['volume'] return None # No completed bar yet def _get_bar_start_time(self, timestamp: pd.Timestamp) -> pd.Timestamp: """Calculate the start time of the timeframe bar for given timestamp. This method now aligns with pandas resampling to ensure consistency with the original strategy's bar boundaries. """ # Use pandas-style resampling alignment # This ensures bars align to standard boundaries (e.g., 00:00, 00:15, 00:30, 00:45) freq_str = f'{self.timeframe_minutes}min' # Create a temporary series with the timestamp and resample to get the bar start temp_series = pd.Series([1], index=[timestamp]) resampled = temp_series.resample(freq_str) # Get the first group's name (which is the bar start time) for bar_start, _ in resampled: return bar_start # Fallback to original method if resampling fails minutes_since_midnight = timestamp.hour * 60 + timestamp.minute bar_minutes = (minutes_since_midnight // self.timeframe_minutes) * self.timeframe_minutes return timestamp.replace( hour=bar_minutes // 60, minute=bar_minutes % 60, second=0, microsecond=0 ) def get_current_bar(self) -> Optional[Dict[str, float]]: """Get the current incomplete bar (for debugging).""" return self.current_bar.copy() if self.current_bar is not None else None def reset(self): """Reset aggregator state.""" self.current_bar = None self.current_bar_start = None self.last_completed_bar = None class IncStrategyBase(ABC): """ Abstract base class for all incremental trading strategies. This class defines the interface that all incremental strategies must implement: - get_minimum_buffer_size(): Specify minimum data requirements - calculate_on_data(): Process new data points incrementally - supports_incremental_calculation(): Whether strategy supports incremental mode - get_entry_signal(): Generate entry signals - get_exit_signal(): Generate exit signals The incremental approach allows strategies to: - Process new data points without full recalculation - Maintain bounded memory usage regardless of data history length - Provide real-time performance with minimal latency - Support both initialization and incremental modes - Accept minute-level data and internally aggregate to any timeframe New Features: - Built-in TimeframeAggregator for minute-level data processing - update_minute_data() method for real-time trading systems - Automatic timeframe detection and aggregation - Backward compatibility with existing update() methods Attributes: name (str): Strategy name weight (float): Strategy weight for combination params (Dict): Strategy parameters calculation_mode (str): Current mode ('initialization' or 'incremental') is_warmed_up (bool): Whether strategy has sufficient data for reliable signals timeframe_buffers (Dict): Rolling buffers for different timeframes indicator_states (Dict): Internal indicator calculation states timeframe_aggregator (TimeframeAggregator): Built-in aggregator for minute data Example: class MyIncStrategy(IncStrategyBase): def get_minimum_buffer_size(self): return {"15min": 50} # Strategy works on 15min timeframe def calculate_on_data(self, new_data_point, timestamp): # Process new data incrementally self._update_indicators(new_data_point) def get_entry_signal(self): # Generate signal based on current state if self._should_enter(): return IncStrategySignal("ENTRY", confidence=0.8) return IncStrategySignal("HOLD", confidence=0.0) # Usage with minute-level data: strategy = MyIncStrategy(params={"timeframe_minutes": 15}) for minute_data in live_stream: result = strategy.update_minute_data(minute_data['timestamp'], minute_data) if result is not None: # Complete 15min bar formed entry_signal = strategy.get_entry_signal() """ def __init__(self, name: str, weight: float = 1.0, params: Optional[Dict] = None): """ Initialize the incremental strategy base. Args: name: Strategy name/identifier weight: Strategy weight for combination (default: 1.0) params: Strategy-specific parameters """ self.name = name self.weight = weight self.params = params or {} # Calculation state self._calculation_mode = "initialization" self._is_warmed_up = False self._data_points_received = 0 # Timeframe management self._timeframe_buffers = {} self._timeframe_last_update = {} self._buffer_size_multiplier = self.params.get("buffer_size_multiplier", 2.0) # Built-in timeframe aggregation self._primary_timeframe_minutes = self._extract_timeframe_minutes() self._timeframe_aggregator = None if self._primary_timeframe_minutes > 1: self._timeframe_aggregator = TimeframeAggregator(self._primary_timeframe_minutes) # Indicator states (strategy-specific) self._indicator_states = {} # Signal generation state self._last_signals = {} self._signal_history = deque(maxlen=100) # Error handling self._max_acceptable_gap = pd.Timedelta(self.params.get("max_acceptable_gap", "5min")) self._state_validation_enabled = self.params.get("enable_state_validation", True) # Performance monitoring self._performance_metrics = { 'update_times': deque(maxlen=1000), 'signal_generation_times': deque(maxlen=1000), 'state_validation_failures': 0, 'data_gaps_handled': 0, 'minute_data_points_processed': 0, 'timeframe_bars_completed': 0 } # Compatibility with original strategy interface self.initialized = False self.timeframes_data = {} def _extract_timeframe_minutes(self) -> int: """ Extract timeframe in minutes from strategy parameters. Looks for timeframe configuration in various parameter formats: - timeframe_minutes: Direct specification in minutes - timeframe: String format like "15min", "1h", etc. Returns: int: Timeframe in minutes (default: 1 for minute-level processing) """ # Direct specification if "timeframe_minutes" in self.params: return self.params["timeframe_minutes"] # String format parsing timeframe_str = self.params.get("timeframe", "1min") if timeframe_str.endswith("min"): return int(timeframe_str[:-3]) elif timeframe_str.endswith("h"): return int(timeframe_str[:-1]) * 60 elif timeframe_str.endswith("d"): return int(timeframe_str[:-1]) * 60 * 24 else: # Default to 1 minute if can't parse return 1 def update_minute_data(self, timestamp: pd.Timestamp, ohlcv_data: Dict[str, float]) -> Optional[Dict[str, Any]]: """ Update strategy with minute-level OHLCV data. This method provides a standardized interface for real-time trading systems that receive minute-level data. It internally aggregates to the strategy's configured timeframe and only processes indicators when complete bars are formed. Args: timestamp: Timestamp of the minute data ohlcv_data: Dictionary with 'open', 'high', 'low', 'close', 'volume' Returns: Strategy processing result if timeframe bar completed, None otherwise Example: # Process live minute data result = strategy.update_minute_data( timestamp=pd.Timestamp('2024-01-01 10:15:00'), ohlcv_data={ 'open': 100.0, 'high': 101.0, 'low': 99.5, 'close': 100.5, 'volume': 1000.0 } ) if result is not None: # A complete timeframe bar was formed and processed entry_signal = strategy.get_entry_signal() """ self._performance_metrics['minute_data_points_processed'] += 1 # If no aggregator (1min strategy), process directly if self._timeframe_aggregator is None: self.calculate_on_data(ohlcv_data, timestamp) return { 'timestamp': timestamp, 'timeframe_minutes': 1, 'processed_directly': True, 'is_warmed_up': self.is_warmed_up } # Use aggregator to accumulate minute data completed_bar = self._timeframe_aggregator.update(timestamp, ohlcv_data) if completed_bar is not None: # A complete timeframe bar was formed self._performance_metrics['timeframe_bars_completed'] += 1 # Process the completed bar self.calculate_on_data(completed_bar, completed_bar['timestamp']) # Return processing result return { 'timestamp': completed_bar['timestamp'], 'timeframe_minutes': self._primary_timeframe_minutes, 'bar_data': completed_bar, 'is_warmed_up': self.is_warmed_up, 'processed_bar': True } # No complete bar yet return None def get_current_incomplete_bar(self) -> Optional[Dict[str, float]]: """ Get the current incomplete timeframe bar (for monitoring). Useful for debugging and monitoring the aggregation process. Returns: Current incomplete bar data or None if no aggregator """ if self._timeframe_aggregator is not None: return self._timeframe_aggregator.get_current_bar() return None @property def calculation_mode(self) -> str: """Current calculation mode: 'initialization' or 'incremental'""" return self._calculation_mode @property def is_warmed_up(self) -> bool: """Whether strategy has sufficient data for reliable signals""" return self._is_warmed_up @abstractmethod def get_minimum_buffer_size(self) -> Dict[str, int]: """ Return minimum data points needed for each timeframe. This method must be implemented by each strategy to specify how much historical data is required for reliable calculations. Returns: Dict[str, int]: {timeframe: min_points} mapping Example: return {"15min": 50, "1min": 750} # 50 15min candles = 750 1min candles """ pass @abstractmethod def calculate_on_data(self, new_data_point: Dict[str, float], timestamp: pd.Timestamp) -> None: """ Process a single new data point incrementally. This method is called for each new data point and should update the strategy's internal state incrementally. Args: new_data_point: OHLCV data point {open, high, low, close, volume} timestamp: Timestamp of the data point """ pass @abstractmethod def supports_incremental_calculation(self) -> bool: """ Whether strategy supports incremental calculation. Returns: bool: True if incremental mode supported, False for fallback to batch mode """ pass @abstractmethod def get_entry_signal(self) -> IncStrategySignal: """ Generate entry signal based on current strategy state. This method should use the current internal state to determine whether an entry signal should be generated. Returns: IncStrategySignal: Entry signal with confidence level """ pass @abstractmethod def get_exit_signal(self) -> IncStrategySignal: """ Generate exit signal based on current strategy state. This method should use the current internal state to determine whether an exit signal should be generated. Returns: IncStrategySignal: Exit signal with confidence level """ pass def get_confidence(self) -> float: """ Get strategy confidence for the current market state. Default implementation returns 1.0. Strategies can override this to provide dynamic confidence based on market conditions. Returns: float: Confidence level (0.0 to 1.0) """ return 1.0 def reset_calculation_state(self) -> None: """Reset internal calculation state for reinitialization.""" self._calculation_mode = "initialization" self._is_warmed_up = False self._data_points_received = 0 self._timeframe_buffers.clear() self._timeframe_last_update.clear() self._indicator_states.clear() self._last_signals.clear() self._signal_history.clear() # Reset timeframe aggregator if self._timeframe_aggregator is not None: self._timeframe_aggregator.reset() # Reset performance metrics for key in self._performance_metrics: if isinstance(self._performance_metrics[key], deque): self._performance_metrics[key].clear() else: self._performance_metrics[key] = 0 def get_current_state_summary(self) -> Dict[str, Any]: """Get summary of current calculation state for debugging.""" return { 'strategy_name': self.name, 'calculation_mode': self._calculation_mode, 'is_warmed_up': self._is_warmed_up, 'data_points_received': self._data_points_received, 'timeframes': list(self._timeframe_buffers.keys()), 'buffer_sizes': {tf: len(buf) for tf, buf in self._timeframe_buffers.items()}, 'indicator_states': {name: state.get_state_summary() if hasattr(state, 'get_state_summary') else str(state) for name, state in self._indicator_states.items()}, 'last_signals': self._last_signals, 'timeframe_aggregator': { 'enabled': self._timeframe_aggregator is not None, 'primary_timeframe_minutes': self._primary_timeframe_minutes, 'current_incomplete_bar': self.get_current_incomplete_bar() }, 'performance_metrics': { 'avg_update_time': sum(self._performance_metrics['update_times']) / len(self._performance_metrics['update_times']) if self._performance_metrics['update_times'] else 0, 'avg_signal_time': sum(self._performance_metrics['signal_generation_times']) / len(self._performance_metrics['signal_generation_times']) if self._performance_metrics['signal_generation_times'] else 0, 'validation_failures': self._performance_metrics['state_validation_failures'], 'data_gaps_handled': self._performance_metrics['data_gaps_handled'], 'minute_data_points_processed': self._performance_metrics['minute_data_points_processed'], 'timeframe_bars_completed': self._performance_metrics['timeframe_bars_completed'] } } def _update_timeframe_buffers(self, new_data_point: Dict[str, float], timestamp: pd.Timestamp) -> None: """Update all timeframe buffers with new data point.""" # Get minimum buffer sizes min_buffer_sizes = self.get_minimum_buffer_size() for timeframe in min_buffer_sizes.keys(): # Calculate actual buffer size with multiplier min_size = min_buffer_sizes[timeframe] actual_buffer_size = int(min_size * self._buffer_size_multiplier) # Initialize buffer if needed if timeframe not in self._timeframe_buffers: self._timeframe_buffers[timeframe] = deque(maxlen=actual_buffer_size) self._timeframe_last_update[timeframe] = None # Check if this timeframe should be updated if self._should_update_timeframe(timeframe, timestamp): # For 1min timeframe, add data directly if timeframe == "1min": data_point = new_data_point.copy() data_point['timestamp'] = timestamp self._timeframe_buffers[timeframe].append(data_point) self._timeframe_last_update[timeframe] = timestamp else: # For other timeframes, we need to aggregate from 1min data self._aggregate_to_timeframe(timeframe, new_data_point, timestamp) def _should_update_timeframe(self, timeframe: str, timestamp: pd.Timestamp) -> bool: """Check if timeframe should be updated based on timestamp.""" if timeframe == "1min": return True # Always update 1min last_update = self._timeframe_last_update.get(timeframe) if last_update is None: return True # First update # Calculate timeframe interval if timeframe.endswith("min"): minutes = int(timeframe[:-3]) interval = pd.Timedelta(minutes=minutes) elif timeframe.endswith("h"): hours = int(timeframe[:-1]) interval = pd.Timedelta(hours=hours) else: return True # Unknown timeframe, update anyway # Check if enough time has passed return timestamp >= last_update + interval def _aggregate_to_timeframe(self, timeframe: str, new_data_point: Dict[str, float], timestamp: pd.Timestamp) -> None: """Aggregate 1min data to specified timeframe.""" # This is a simplified aggregation - in practice, you might want more sophisticated logic buffer = self._timeframe_buffers[timeframe] # If buffer is empty or we're starting a new period, add new candle if not buffer or self._should_update_timeframe(timeframe, timestamp): aggregated_point = new_data_point.copy() aggregated_point['timestamp'] = timestamp buffer.append(aggregated_point) self._timeframe_last_update[timeframe] = timestamp else: # Update the last candle in the buffer last_candle = buffer[-1] last_candle['high'] = max(last_candle['high'], new_data_point['high']) last_candle['low'] = min(last_candle['low'], new_data_point['low']) last_candle['close'] = new_data_point['close'] last_candle['volume'] += new_data_point['volume'] def _get_timeframe_buffer(self, timeframe: str) -> pd.DataFrame: """Get current buffer for specific timeframe as DataFrame.""" if timeframe not in self._timeframe_buffers: return pd.DataFrame() buffer_data = list(self._timeframe_buffers[timeframe]) if not buffer_data: return pd.DataFrame() df = pd.DataFrame(buffer_data) if 'timestamp' in df.columns: df = df.set_index('timestamp') return df def _validate_calculation_state(self) -> bool: """Validate internal calculation state consistency.""" if not self._state_validation_enabled: return True try: # Check that all required buffers exist min_buffer_sizes = self.get_minimum_buffer_size() for timeframe in min_buffer_sizes.keys(): if timeframe not in self._timeframe_buffers: logging.warning(f"Missing buffer for timeframe {timeframe}") return False # Check that indicator states are valid for name, state in self._indicator_states.items(): if hasattr(state, 'is_initialized') and not state.is_initialized: logging.warning(f"Indicator {name} not initialized") return False return True except Exception as e: logging.error(f"State validation failed: {e}") self._performance_metrics['state_validation_failures'] += 1 return False def _recover_from_state_corruption(self) -> None: """Recover from corrupted calculation state.""" logging.warning(f"Recovering from state corruption in strategy {self.name}") # Reset to initialization mode self._calculation_mode = "initialization" self._is_warmed_up = False # Try to recalculate from available buffer data try: self._reinitialize_from_buffers() except Exception as e: logging.error(f"Failed to recover from buffers: {e}") # Complete reset as last resort self.reset_calculation_state() def _reinitialize_from_buffers(self) -> None: """Reinitialize indicators from available buffer data.""" # This method should be overridden by specific strategies # to implement their own recovery logic pass def handle_data_gap(self, gap_duration: pd.Timedelta) -> None: """Handle gaps in data stream.""" self._performance_metrics['data_gaps_handled'] += 1 if gap_duration > self._max_acceptable_gap: logging.warning(f"Data gap {gap_duration} exceeds maximum acceptable gap {self._max_acceptable_gap}") self._trigger_reinitialization() else: logging.info(f"Handling acceptable data gap: {gap_duration}") # For small gaps, continue with current state def _trigger_reinitialization(self) -> None: """Trigger strategy reinitialization due to data gap or corruption.""" logging.info(f"Triggering reinitialization for strategy {self.name}") self.reset_calculation_state() # Compatibility methods for original strategy interface def get_timeframes(self) -> List[str]: """Get required timeframes (compatibility method).""" return list(self.get_minimum_buffer_size().keys()) def initialize(self, backtester) -> None: """Initialize strategy (compatibility method).""" # This method provides compatibility with the original strategy interface # The actual initialization happens through the incremental interface self.initialized = True logging.info(f"Incremental strategy {self.name} initialized in compatibility mode") def __repr__(self) -> str: """String representation of the strategy.""" return (f"{self.__class__.__name__}(name={self.name}, " f"weight={self.weight}, mode={self._calculation_mode}, " f"warmed_up={self._is_warmed_up}, " f"data_points={self._data_points_received})")