Cycles/cycles/IncStrategies/base.py

"""
Base classes for the incremental strategy system.

This module contains the fundamental building blocks for all incremental trading strategies:
- IncStrategySignal: Represents trading signals with confidence and metadata
- IncStrategyBase: Abstract base class that all incremental strategies must inherit from
- TimeframeAggregator: Built-in timeframe aggregation for minute-level data processing
"""

import pandas as pd
from abc import ABC, abstractmethod
from typing import Dict, Optional, List, Union, Any
from collections import deque
import logging

# Import the original signal class for compatibility
from ..strategies.base import StrategySignal

# Create alias for consistency
IncStrategySignal = StrategySignal


class TimeframeAggregator:
    """
    Handles real-time aggregation of minute data to higher timeframes.
    
    This class accumulates minute-level OHLCV data and produces complete
    bars when a timeframe period is completed. Integrated into IncStrategyBase
    to provide consistent minute-level data processing across all strategies.
    """
    
    def __init__(self, timeframe_minutes: int = 15):
        """
        Initialize timeframe aggregator.
        
        Args:
            timeframe_minutes: Target timeframe in minutes (e.g., 60 for 1h, 15 for 15min)
        """
        self.timeframe_minutes = timeframe_minutes
        self.current_bar = None
        self.current_bar_start = None
        self.last_completed_bar = None
        
    def update(self, timestamp: pd.Timestamp, ohlcv_data: Dict[str, float]) -> Optional[Dict[str, float]]:
        """
        Update with new minute data and return completed bar if timeframe is complete.
        
        Args:
            timestamp: Timestamp of the data
            ohlcv_data: OHLCV data dictionary
            
        Returns:
            Completed OHLCV bar if timeframe period ended, None otherwise
        """
        # Calculate which timeframe bar this timestamp belongs to
        bar_start = self._get_bar_start_time(timestamp)
        
        # Check if we're starting a new bar
        if self.current_bar_start != bar_start:
            # Save the completed bar (if any)
            completed_bar = self.current_bar.copy() if self.current_bar is not None else None
            
            # Start new bar
            self.current_bar_start = bar_start
            self.current_bar = {
                'timestamp': bar_start,
                'open': ohlcv_data['close'],  # Use current close as open for new bar
                'high': ohlcv_data['close'],
                'low': ohlcv_data['close'],
                'close': ohlcv_data['close'],
                'volume': ohlcv_data['volume']
            }
            
            # Return the completed bar (if any)
            if completed_bar is not None:
                self.last_completed_bar = completed_bar
                return completed_bar
        else:
            # Update current bar with new data
            if self.current_bar is not None:
                self.current_bar['high'] = max(self.current_bar['high'], ohlcv_data['high'])
                self.current_bar['low'] = min(self.current_bar['low'], ohlcv_data['low'])
                self.current_bar['close'] = ohlcv_data['close']
                self.current_bar['volume'] += ohlcv_data['volume']
        
        return None  # No completed bar yet
    
    def _get_bar_start_time(self, timestamp: pd.Timestamp) -> pd.Timestamp:
        """Calculate the start time of the timeframe bar for given timestamp.
        
        This method now aligns with pandas resampling to ensure consistency
        with the original strategy's bar boundaries.
        """
        # Use pandas-style resampling alignment
        # This ensures bars align to standard boundaries (e.g., 00:00, 00:15, 00:30, 00:45)
        freq_str = f'{self.timeframe_minutes}min'
        
        # Create a temporary series with the timestamp and resample to get the bar start
        temp_series = pd.Series([1], index=[timestamp])
        resampled = temp_series.resample(freq_str)
        
        # Get the first group's name (which is the bar start time)
        for bar_start, _ in resampled:
            return bar_start
        
        # Fallback to original method if resampling fails
        minutes_since_midnight = timestamp.hour * 60 + timestamp.minute
        bar_minutes = (minutes_since_midnight // self.timeframe_minutes) * self.timeframe_minutes
        
        return timestamp.replace(
            hour=bar_minutes // 60,
            minute=bar_minutes % 60,
            second=0,
            microsecond=0
        )
    
    def get_current_bar(self) -> Optional[Dict[str, float]]:
        """Get the current incomplete bar (for debugging)."""
        return self.current_bar.copy() if self.current_bar is not None else None
    
    def reset(self):
        """Reset aggregator state."""
        self.current_bar = None
        self.current_bar_start = None
        self.last_completed_bar = None


class IncStrategyBase(ABC):
    """
    Abstract base class for all incremental trading strategies.
    
    This class defines the interface that all incremental strategies must implement:
    - get_minimum_buffer_size(): Specify minimum data requirements
    - calculate_on_data(): Process new data points incrementally
    - supports_incremental_calculation(): Whether strategy supports incremental mode
    - get_entry_signal(): Generate entry signals
    - get_exit_signal(): Generate exit signals
    
    The incremental approach allows strategies to:
    - Process new data points without full recalculation
    - Maintain bounded memory usage regardless of data history length
    - Provide real-time performance with minimal latency
    - Support both initialization and incremental modes
    - Accept minute-level data and internally aggregate to any timeframe
    
    New Features:
    - Built-in TimeframeAggregator for minute-level data processing
    - update_minute_data() method for real-time trading systems
    - Automatic timeframe detection and aggregation
    - Backward compatibility with existing update() methods
    
    Attributes:
        name (str): Strategy name
        weight (float): Strategy weight for combination
        params (Dict): Strategy parameters
        calculation_mode (str): Current mode ('initialization' or 'incremental')
        is_warmed_up (bool): Whether strategy has sufficient data for reliable signals
        timeframe_buffers (Dict): Rolling buffers for different timeframes
        indicator_states (Dict): Internal indicator calculation states
        timeframe_aggregator (TimeframeAggregator): Built-in aggregator for minute data
    
    Example:
        class MyIncStrategy(IncStrategyBase):
            def get_minimum_buffer_size(self):
                return {"15min": 50}  # Strategy works on 15min timeframe
            
            def calculate_on_data(self, new_data_point, timestamp):
                # Process new data incrementally
                self._update_indicators(new_data_point)
            
            def get_entry_signal(self):
                # Generate signal based on current state
                if self._should_enter():
                    return IncStrategySignal("ENTRY", confidence=0.8)
                return IncStrategySignal("HOLD", confidence=0.0)
        
        # Usage with minute-level data:
        strategy = MyIncStrategy(params={"timeframe_minutes": 15})
        for minute_data in live_stream:
            result = strategy.update_minute_data(minute_data['timestamp'], minute_data)
            if result is not None:  # Complete 15min bar formed
                entry_signal = strategy.get_entry_signal()
    """
    
    def __init__(self, name: str, weight: float = 1.0, params: Optional[Dict] = None):
        """
        Initialize the incremental strategy base.
        
        Args:
            name: Strategy name/identifier
            weight: Strategy weight for combination (default: 1.0)
            params: Strategy-specific parameters
        """
        self.name = name
        self.weight = weight
        self.params = params or {}
        
        # Calculation state
        self._calculation_mode = "initialization"
        self._is_warmed_up = False
        self._data_points_received = 0
        
        # Timeframe management
        self._timeframe_buffers = {}
        self._timeframe_last_update = {}
        self._buffer_size_multiplier = self.params.get("buffer_size_multiplier", 2.0)
        
        # Built-in timeframe aggregation
        self._primary_timeframe_minutes = self._extract_timeframe_minutes()
        self._timeframe_aggregator = None
        if self._primary_timeframe_minutes > 1:
            self._timeframe_aggregator = TimeframeAggregator(self._primary_timeframe_minutes)
        
        # Indicator states (strategy-specific)
        self._indicator_states = {}
        
        # Signal generation state
        self._last_signals = {}
        self._signal_history = deque(maxlen=100)
        
        # Error handling
        self._max_acceptable_gap = pd.Timedelta(self.params.get("max_acceptable_gap", "5min"))
        self._state_validation_enabled = self.params.get("enable_state_validation", True)
        
        # Performance monitoring
        self._performance_metrics = {
            'update_times': deque(maxlen=1000),
            'signal_generation_times': deque(maxlen=1000),
            'state_validation_failures': 0,
            'data_gaps_handled': 0,
            'minute_data_points_processed': 0,
            'timeframe_bars_completed': 0
        }
        
        # Compatibility with original strategy interface
        self.initialized = False
        self.timeframes_data = {}
    
    def _extract_timeframe_minutes(self) -> int:
        """
        Extract timeframe in minutes from strategy parameters.
        
        Looks for timeframe configuration in various parameter formats:
        - timeframe_minutes: Direct specification in minutes
        - timeframe: String format like "15min", "1h", etc.
        
        Returns:
            int: Timeframe in minutes (default: 1 for minute-level processing)
        """
        # Direct specification
        if "timeframe_minutes" in self.params:
            return self.params["timeframe_minutes"]
        
        # String format parsing
        timeframe_str = self.params.get("timeframe", "1min")
        
        if timeframe_str.endswith("min"):
            return int(timeframe_str[:-3])
        elif timeframe_str.endswith("h"):
            return int(timeframe_str[:-1]) * 60
        elif timeframe_str.endswith("d"):
            return int(timeframe_str[:-1]) * 60 * 24
        else:
            # Default to 1 minute if can't parse
            return 1
    
    def update_minute_data(self, timestamp: pd.Timestamp, ohlcv_data: Dict[str, float]) -> Optional[Dict[str, Any]]:
        """
        Update strategy with minute-level OHLCV data.
        
        This method provides a standardized interface for real-time trading systems
        that receive minute-level data. It internally aggregates to the strategy's
        configured timeframe and only processes indicators when complete bars are formed.
        
        Args:
            timestamp: Timestamp of the minute data
            ohlcv_data: Dictionary with 'open', 'high', 'low', 'close', 'volume'
            
        Returns:
            Strategy processing result if timeframe bar completed, None otherwise
            
        Example:
            # Process live minute data
            result = strategy.update_minute_data(
                timestamp=pd.Timestamp('2024-01-01 10:15:00'),
                ohlcv_data={
                    'open': 100.0,
                    'high': 101.0,
                    'low': 99.5,
                    'close': 100.5,
                    'volume': 1000.0
                }
            )
            
            if result is not None:
                # A complete timeframe bar was formed and processed
                entry_signal = strategy.get_entry_signal()
        """
        self._performance_metrics['minute_data_points_processed'] += 1
        
        # If no aggregator (1min strategy), process directly
        if self._timeframe_aggregator is None:
            self.calculate_on_data(ohlcv_data, timestamp)
            return {
                'timestamp': timestamp,
                'timeframe_minutes': 1,
                'processed_directly': True,
                'is_warmed_up': self.is_warmed_up
            }
        
        # Use aggregator to accumulate minute data
        completed_bar = self._timeframe_aggregator.update(timestamp, ohlcv_data)
        
        if completed_bar is not None:
            # A complete timeframe bar was formed
            self._performance_metrics['timeframe_bars_completed'] += 1
            
            # Process the completed bar
            self.calculate_on_data(completed_bar, completed_bar['timestamp'])
            
            # Return processing result
            return {
                'timestamp': completed_bar['timestamp'],
                'timeframe_minutes': self._primary_timeframe_minutes,
                'bar_data': completed_bar,
                'is_warmed_up': self.is_warmed_up,
                'processed_bar': True
            }
        
        # No complete bar yet
        return None
    
    def get_current_incomplete_bar(self) -> Optional[Dict[str, float]]:
        """
        Get the current incomplete timeframe bar (for monitoring).
        
        Useful for debugging and monitoring the aggregation process.
        
        Returns:
            Current incomplete bar data or None if no aggregator
        """
        if self._timeframe_aggregator is not None:
            return self._timeframe_aggregator.get_current_bar()
        return None
    
    @property
    def calculation_mode(self) -> str:
        """Current calculation mode: 'initialization' or 'incremental'"""
        return self._calculation_mode
    
    @property
    def is_warmed_up(self) -> bool:
        """Whether strategy has sufficient data for reliable signals"""
        return self._is_warmed_up
    
    @abstractmethod
    def get_minimum_buffer_size(self) -> Dict[str, int]:
        """
        Return minimum data points needed for each timeframe.
        
        This method must be implemented by each strategy to specify how much
        historical data is required for reliable calculations.
        
        Returns:
            Dict[str, int]: {timeframe: min_points} mapping
            
        Example:
            return {"15min": 50, "1min": 750}  # 50 15min candles = 750 1min candles
        """
        pass
    
    @abstractmethod
    def calculate_on_data(self, new_data_point: Dict[str, float], timestamp: pd.Timestamp) -> None:
        """
        Process a single new data point incrementally.
        
        This method is called for each new data point and should update
        the strategy's internal state incrementally.
        
        Args:
            new_data_point: OHLCV data point {open, high, low, close, volume}
            timestamp: Timestamp of the data point
        """
        pass
    
    @abstractmethod
    def supports_incremental_calculation(self) -> bool:
        """
        Whether strategy supports incremental calculation.
        
        Returns:
            bool: True if incremental mode supported, False for fallback to batch mode
        """
        pass
    
    @abstractmethod
    def get_entry_signal(self) -> IncStrategySignal:
        """
        Generate entry signal based on current strategy state.
        
        This method should use the current internal state to determine
        whether an entry signal should be generated.
        
        Returns:
            IncStrategySignal: Entry signal with confidence level
        """
        pass
    
    @abstractmethod
    def get_exit_signal(self) -> IncStrategySignal:
        """
        Generate exit signal based on current strategy state.
        
        This method should use the current internal state to determine
        whether an exit signal should be generated.
        
        Returns:
            IncStrategySignal: Exit signal with confidence level
        """
        pass
    
    def get_confidence(self) -> float:
        """
        Get strategy confidence for the current market state.
        
        Default implementation returns 1.0. Strategies can override
        this to provide dynamic confidence based on market conditions.
        
        Returns:
            float: Confidence level (0.0 to 1.0)
        """
        return 1.0
    
    def reset_calculation_state(self) -> None:
        """Reset internal calculation state for reinitialization."""
        self._calculation_mode = "initialization"
        self._is_warmed_up = False
        self._data_points_received = 0
        self._timeframe_buffers.clear()
        self._timeframe_last_update.clear()
        self._indicator_states.clear()
        self._last_signals.clear()
        self._signal_history.clear()
        
        # Reset timeframe aggregator
        if self._timeframe_aggregator is not None:
            self._timeframe_aggregator.reset()
        
        # Reset performance metrics
        for key in self._performance_metrics:
            if isinstance(self._performance_metrics[key], deque):
                self._performance_metrics[key].clear()
            else:
                self._performance_metrics[key] = 0
    
    def get_current_state_summary(self) -> Dict[str, Any]:
        """Get summary of current calculation state for debugging."""
        return {
            'strategy_name': self.name,
            'calculation_mode': self._calculation_mode,
            'is_warmed_up': self._is_warmed_up,
            'data_points_received': self._data_points_received,
            'timeframes': list(self._timeframe_buffers.keys()),
            'buffer_sizes': {tf: len(buf) for tf, buf in self._timeframe_buffers.items()},
            'indicator_states': {name: state.get_state_summary() if hasattr(state, 'get_state_summary') else str(state) 
                               for name, state in self._indicator_states.items()},
            'last_signals': self._last_signals,
            'timeframe_aggregator': {
                'enabled': self._timeframe_aggregator is not None,
                'primary_timeframe_minutes': self._primary_timeframe_minutes,
                'current_incomplete_bar': self.get_current_incomplete_bar()
            },
            'performance_metrics': {
                'avg_update_time': sum(self._performance_metrics['update_times']) / len(self._performance_metrics['update_times']) 
                                 if self._performance_metrics['update_times'] else 0,
                'avg_signal_time': sum(self._performance_metrics['signal_generation_times']) / len(self._performance_metrics['signal_generation_times'])
                                 if self._performance_metrics['signal_generation_times'] else 0,
                'validation_failures': self._performance_metrics['state_validation_failures'],
                'data_gaps_handled': self._performance_metrics['data_gaps_handled'],
                'minute_data_points_processed': self._performance_metrics['minute_data_points_processed'],
                'timeframe_bars_completed': self._performance_metrics['timeframe_bars_completed']
            }
        }
    
    def _update_timeframe_buffers(self, new_data_point: Dict[str, float], timestamp: pd.Timestamp) -> None:
        """Update all timeframe buffers with new data point."""
        # Get minimum buffer sizes
        min_buffer_sizes = self.get_minimum_buffer_size()
        
        for timeframe in min_buffer_sizes.keys():
            # Calculate actual buffer size with multiplier
            min_size = min_buffer_sizes[timeframe]
            actual_buffer_size = int(min_size * self._buffer_size_multiplier)
            
            # Initialize buffer if needed
            if timeframe not in self._timeframe_buffers:
                self._timeframe_buffers[timeframe] = deque(maxlen=actual_buffer_size)
                self._timeframe_last_update[timeframe] = None
            
            # Check if this timeframe should be updated
            if self._should_update_timeframe(timeframe, timestamp):
                # For 1min timeframe, add data directly
                if timeframe == "1min":
                    data_point = new_data_point.copy()
                    data_point['timestamp'] = timestamp
                    self._timeframe_buffers[timeframe].append(data_point)
                    self._timeframe_last_update[timeframe] = timestamp
                else:
                    # For other timeframes, we need to aggregate from 1min data
                    self._aggregate_to_timeframe(timeframe, new_data_point, timestamp)
    
    def _should_update_timeframe(self, timeframe: str, timestamp: pd.Timestamp) -> bool:
        """Check if timeframe should be updated based on timestamp."""
        if timeframe == "1min":
            return True  # Always update 1min
        
        last_update = self._timeframe_last_update.get(timeframe)
        if last_update is None:
            return True  # First update
        
        # Calculate timeframe interval
        if timeframe.endswith("min"):
            minutes = int(timeframe[:-3])
            interval = pd.Timedelta(minutes=minutes)
        elif timeframe.endswith("h"):
            hours = int(timeframe[:-1])
            interval = pd.Timedelta(hours=hours)
        else:
            return True  # Unknown timeframe, update anyway
        
        # Check if enough time has passed
        return timestamp >= last_update + interval
    
    def _aggregate_to_timeframe(self, timeframe: str, new_data_point: Dict[str, float], timestamp: pd.Timestamp) -> None:
        """Aggregate 1min data to specified timeframe."""
        # This is a simplified aggregation - in practice, you might want more sophisticated logic
        buffer = self._timeframe_buffers[timeframe]
        
        # If buffer is empty or we're starting a new period, add new candle
        if not buffer or self._should_update_timeframe(timeframe, timestamp):
            aggregated_point = new_data_point.copy()
            aggregated_point['timestamp'] = timestamp
            buffer.append(aggregated_point)
            self._timeframe_last_update[timeframe] = timestamp
        else:
            # Update the last candle in the buffer
            last_candle = buffer[-1]
            last_candle['high'] = max(last_candle['high'], new_data_point['high'])
            last_candle['low'] = min(last_candle['low'], new_data_point['low'])
            last_candle['close'] = new_data_point['close']
            last_candle['volume'] += new_data_point['volume']
    
    def _get_timeframe_buffer(self, timeframe: str) -> pd.DataFrame:
        """Get current buffer for specific timeframe as DataFrame."""
        if timeframe not in self._timeframe_buffers:
            return pd.DataFrame()
        
        buffer_data = list(self._timeframe_buffers[timeframe])
        if not buffer_data:
            return pd.DataFrame()
        
        df = pd.DataFrame(buffer_data)
        if 'timestamp' in df.columns:
            df = df.set_index('timestamp')
        
        return df
    
    def _validate_calculation_state(self) -> bool:
        """Validate internal calculation state consistency."""
        if not self._state_validation_enabled:
            return True
        
        try:
            # Check that all required buffers exist
            min_buffer_sizes = self.get_minimum_buffer_size()
            for timeframe in min_buffer_sizes.keys():
                if timeframe not in self._timeframe_buffers:
                    logging.warning(f"Missing buffer for timeframe {timeframe}")
                    return False
            
            # Check that indicator states are valid
            for name, state in self._indicator_states.items():
                if hasattr(state, 'is_initialized') and not state.is_initialized:
                    logging.warning(f"Indicator {name} not initialized")
                    return False
            
            return True
            
        except Exception as e:
            logging.error(f"State validation failed: {e}")
            self._performance_metrics['state_validation_failures'] += 1
            return False
    
    def _recover_from_state_corruption(self) -> None:
        """Recover from corrupted calculation state."""
        logging.warning(f"Recovering from state corruption in strategy {self.name}")
        
        # Reset to initialization mode
        self._calculation_mode = "initialization"
        self._is_warmed_up = False
        
        # Try to recalculate from available buffer data
        try:
            self._reinitialize_from_buffers()
        except Exception as e:
            logging.error(f"Failed to recover from buffers: {e}")
            # Complete reset as last resort
            self.reset_calculation_state()
    
    def _reinitialize_from_buffers(self) -> None:
        """Reinitialize indicators from available buffer data."""
        # This method should be overridden by specific strategies
        # to implement their own recovery logic
        pass
    
    def handle_data_gap(self, gap_duration: pd.Timedelta) -> None:
        """Handle gaps in data stream."""
        self._performance_metrics['data_gaps_handled'] += 1
        
        if gap_duration > self._max_acceptable_gap:
            logging.warning(f"Data gap {gap_duration} exceeds maximum acceptable gap {self._max_acceptable_gap}")
            self._trigger_reinitialization()
        else:
            logging.info(f"Handling acceptable data gap: {gap_duration}")
            # For small gaps, continue with current state
    
    def _trigger_reinitialization(self) -> None:
        """Trigger strategy reinitialization due to data gap or corruption."""
        logging.info(f"Triggering reinitialization for strategy {self.name}")
        self.reset_calculation_state()
    
    # Compatibility methods for original strategy interface
    def get_timeframes(self) -> List[str]:
        """Get required timeframes (compatibility method)."""
        return list(self.get_minimum_buffer_size().keys())
    
    def initialize(self, backtester) -> None:
        """Initialize strategy (compatibility method)."""
        # This method provides compatibility with the original strategy interface
        # The actual initialization happens through the incremental interface
        self.initialized = True
        logging.info(f"Incremental strategy {self.name} initialized in compatibility mode")
    
    def __repr__(self) -> str:
        """String representation of the strategy."""
        return (f"{self.__class__.__name__}(name={self.name}, "
                f"weight={self.weight}, mode={self._calculation_mode}, "
                f"warmed_up={self._is_warmed_up}, "
                f"data_points={self._data_points_received})")