2025-05-28 18:26:51 +08:00

690 lines
27 KiB
Python

"""
Base classes for the incremental strategy system.
This module contains the fundamental building blocks for all incremental trading strategies:
- IncStrategySignal: Represents trading signals with confidence and metadata
- IncStrategyBase: Abstract base class that all incremental strategies must inherit from
- TimeframeAggregator: Built-in timeframe aggregation for minute-level data processing
The incremental approach allows strategies to:
- Process new data points without full recalculation
- Maintain bounded memory usage regardless of data history length
- Provide real-time performance with minimal latency
- Support both initialization and incremental modes
- Accept minute-level data and internally aggregate to any timeframe
"""
import pandas as pd
from abc import ABC, abstractmethod
from typing import Dict, Optional, List, Union, Any
from collections import deque
import logging
import time
# Import new timeframe utilities
from ..utils.timeframe_utils import (
aggregate_minute_data_to_timeframe,
parse_timeframe_to_minutes,
get_latest_complete_bar,
MinuteDataBuffer,
TimeframeError
)
logger = logging.getLogger(__name__)
class IncStrategySignal:
"""
Represents a trading signal from an incremental strategy.
A signal encapsulates the strategy's recommendation along with confidence
level, optional price target, and additional metadata.
Attributes:
signal_type (str): Type of signal - "ENTRY", "EXIT", or "HOLD"
confidence (float): Confidence level from 0.0 to 1.0
price (Optional[float]): Optional specific price for the signal
metadata (Dict): Additional signal data and context
Example:
# Entry signal with high confidence
signal = IncStrategySignal("ENTRY", confidence=0.8)
# Exit signal with stop loss price
signal = IncStrategySignal("EXIT", confidence=1.0, price=50000,
metadata={"type": "STOP_LOSS"})
"""
def __init__(self, signal_type: str, confidence: float = 1.0,
price: Optional[float] = None, metadata: Optional[Dict] = None):
"""
Initialize a strategy signal.
Args:
signal_type: Type of signal ("ENTRY", "EXIT", "HOLD")
confidence: Confidence level (0.0 to 1.0)
price: Optional specific price for the signal
metadata: Additional signal data and context
"""
self.signal_type = signal_type
self.confidence = max(0.0, min(1.0, confidence)) # Clamp to [0,1]
self.price = price
self.metadata = metadata or {}
@classmethod
def BUY(cls, confidence: float = 1.0, price: Optional[float] = None, **metadata):
"""Create a BUY signal."""
return cls("ENTRY", confidence, price, metadata)
@classmethod
def SELL(cls, confidence: float = 1.0, price: Optional[float] = None, **metadata):
"""Create a SELL signal."""
return cls("EXIT", confidence, price, metadata)
@classmethod
def HOLD(cls, confidence: float = 0.0, **metadata):
"""Create a HOLD signal."""
return cls("HOLD", confidence, None, metadata)
def __repr__(self) -> str:
"""String representation of the signal."""
return (f"IncStrategySignal(type={self.signal_type}, "
f"confidence={self.confidence:.2f}, "
f"price={self.price}, metadata={self.metadata})")
class TimeframeAggregator:
"""
Handles real-time aggregation of minute data to higher timeframes.
This class accumulates minute-level OHLCV data and produces complete
bars when a timeframe period is completed. Now uses the new timeframe
utilities for mathematically correct aggregation that matches pandas
resampling behavior.
Key improvements:
- Uses bar END timestamps (prevents future data leakage)
- Proper OHLCV aggregation (first/max/min/last/sum)
- Mathematical equivalence to pandas resampling
- Memory-efficient buffer management
"""
def __init__(self, timeframe: str = "15min", max_buffer_size: int = 1440):
"""
Initialize timeframe aggregator.
Args:
timeframe: Target timeframe string (e.g., "15min", "1h", "4h")
max_buffer_size: Maximum minute data buffer size (default: 1440 = 24h)
"""
self.timeframe = timeframe
self.timeframe_minutes = parse_timeframe_to_minutes(timeframe)
# Use MinuteDataBuffer for efficient minute data management
self.minute_buffer = MinuteDataBuffer(max_size=max_buffer_size)
# Track last processed bar to avoid reprocessing
self.last_processed_bar_timestamp = None
# Performance tracking
self._bars_completed = 0
self._minute_points_processed = 0
def update(self, timestamp: pd.Timestamp, ohlcv_data: Dict[str, float]) -> Optional[Dict[str, float]]:
"""
Update with new minute data and return completed bar if timeframe is complete.
Args:
timestamp: Timestamp of the minute data
ohlcv_data: OHLCV data dictionary
Returns:
Completed OHLCV bar if timeframe period ended, None otherwise
"""
try:
# Add minute data to buffer
self.minute_buffer.add(timestamp, ohlcv_data)
self._minute_points_processed += 1
# Get latest complete bar using new utilities
latest_bar = get_latest_complete_bar(
self.minute_buffer.get_data(),
self.timeframe
)
if latest_bar is None:
return None
# Check if this is a new bar (avoid reprocessing)
bar_timestamp = latest_bar['timestamp']
if self.last_processed_bar_timestamp == bar_timestamp:
return None # Already processed this bar
# Update tracking
self.last_processed_bar_timestamp = bar_timestamp
self._bars_completed += 1
return latest_bar
except TimeframeError as e:
logger.error(f"Timeframe aggregation error: {e}")
return None
except Exception as e:
logger.error(f"Unexpected error in timeframe aggregation: {e}")
return None
def get_current_bar(self) -> Optional[Dict[str, float]]:
"""
Get the current incomplete bar (for debugging).
Returns:
Current incomplete bar data or None
"""
try:
# Get recent data and try to aggregate
recent_data = self.minute_buffer.get_data(lookback_minutes=self.timeframe_minutes)
if not recent_data:
return None
# Aggregate to get current (possibly incomplete) bar
bars = aggregate_minute_data_to_timeframe(recent_data, self.timeframe, "end")
if bars:
return bars[-1] # Return most recent bar
return None
except Exception as e:
logger.debug(f"Error getting current bar: {e}")
return None
def reset(self):
"""Reset aggregator state."""
self.minute_buffer = MinuteDataBuffer(max_size=self.minute_buffer.max_size)
self.last_processed_bar_timestamp = None
self._bars_completed = 0
self._minute_points_processed = 0
def get_stats(self) -> Dict[str, Any]:
"""Get aggregator statistics."""
return {
'timeframe': self.timeframe,
'timeframe_minutes': self.timeframe_minutes,
'minute_points_processed': self._minute_points_processed,
'bars_completed': self._bars_completed,
'buffer_size': len(self.minute_buffer.get_data()),
'last_processed_bar': self.last_processed_bar_timestamp
}
class IncStrategyBase(ABC):
"""
Abstract base class for all incremental trading strategies.
This class defines the interface that all incremental strategies must implement:
- get_minimum_buffer_size(): Specify minimum data requirements
- process_data_point(): Process new data points incrementally
- supports_incremental_calculation(): Whether strategy supports incremental mode
- get_entry_signal(): Generate entry signals
- get_exit_signal(): Generate exit signals
The incremental approach allows strategies to:
- Process new data points without full recalculation
- Maintain bounded memory usage regardless of data history length
- Provide real-time performance with minimal latency
- Support both initialization and incremental modes
- Accept minute-level data and internally aggregate to any timeframe
New Features:
- Built-in TimeframeAggregator for minute-level data processing
- update_minute_data() method for real-time trading systems
- Automatic timeframe detection and aggregation
- Backward compatibility with existing update() methods
Attributes:
name (str): Strategy name
weight (float): Strategy weight for combination
params (Dict): Strategy parameters
calculation_mode (str): Current mode ('initialization' or 'incremental')
is_warmed_up (bool): Whether strategy has sufficient data for reliable signals
timeframe_buffers (Dict): Rolling buffers for different timeframes
indicator_states (Dict): Internal indicator calculation states
timeframe_aggregator (TimeframeAggregator): Built-in aggregator for minute data
Example:
class MyIncStrategy(IncStrategyBase):
def get_minimum_buffer_size(self):
return {"15min": 50} # Strategy works on 15min timeframe
def process_data_point(self, timestamp, ohlcv_data):
# Process new data incrementally
self._update_indicators(ohlcv_data)
return self.get_current_signal()
def get_entry_signal(self):
# Generate signal based on current state
if self._should_enter():
return IncStrategySignal.BUY(confidence=0.8)
return IncStrategySignal.HOLD()
# Usage with minute-level data:
strategy = MyIncStrategy(params={"timeframe_minutes": 15})
for minute_data in live_stream:
signal = strategy.process_data_point(minute_data['timestamp'], minute_data)
"""
def __init__(self, name: str, weight: float = 1.0, params: Optional[Dict] = None):
"""
Initialize the incremental strategy base.
Args:
name: Strategy name/identifier
weight: Strategy weight for combination (default: 1.0)
params: Strategy-specific parameters
"""
self.name = name
self.weight = weight
self.params = params or {}
# Calculation state
self._calculation_mode = "initialization"
self._is_warmed_up = False
self._data_points_received = 0
# Data management
self._timeframe_buffers = {}
self._timeframe_last_update = {}
self._indicator_states = {}
self._last_signals = {}
self._signal_history = deque(maxlen=100) # Keep last 100 signals
# Performance tracking
self._performance_metrics = {
'update_times': deque(maxlen=1000),
'signal_generation_times': deque(maxlen=1000),
'state_validation_failures': 0,
'data_gaps_handled': 0,
'minute_data_points_processed': 0,
'timeframe_bars_completed': 0
}
# Configuration
self._buffer_size_multiplier = 1.5 # Extra buffer for safety
self._state_validation_enabled = True
self._max_acceptable_gap = pd.Timedelta(minutes=5)
# Timeframe aggregation - Updated to use new utilities
self._primary_timeframe = self.params.get("timeframe", "1min")
self._timeframe_aggregator = None
# Only create aggregator if timeframe is not 1min (minute data processing)
if self._primary_timeframe != "1min":
try:
self._timeframe_aggregator = TimeframeAggregator(
timeframe=self._primary_timeframe,
max_buffer_size=1440 # 24 hours of minute data
)
logger.info(f"Created timeframe aggregator for {self._primary_timeframe}")
except TimeframeError as e:
logger.error(f"Failed to create timeframe aggregator: {e}")
self._timeframe_aggregator = None
logger.info(f"Initialized incremental strategy: {self.name} (timeframe: {self._primary_timeframe})")
def process_data_point(self, timestamp: pd.Timestamp, ohlcv_data: Dict[str, float]) -> Optional[IncStrategySignal]:
"""
Process a new data point and return signal if generated.
This is the main entry point for incremental processing. It handles
timeframe aggregation, buffer updates, and signal generation.
Args:
timestamp: Timestamp of the data point
ohlcv_data: OHLCV data dictionary
Returns:
IncStrategySignal if a signal is generated, None otherwise
"""
start_time = time.time()
try:
# Update performance metrics
self._performance_metrics['minute_data_points_processed'] += 1
self._data_points_received += 1
# Handle timeframe aggregation if needed
if self._timeframe_aggregator is not None:
completed_bar = self._timeframe_aggregator.update(timestamp, ohlcv_data)
if completed_bar is not None:
# Process the completed timeframe bar
self._performance_metrics['timeframe_bars_completed'] += 1
return self._process_timeframe_bar(completed_bar['timestamp'], completed_bar)
else:
# No complete bar yet, return None
return None
else:
# Process minute data directly
return self._process_timeframe_bar(timestamp, ohlcv_data)
except Exception as e:
logger.error(f"Error processing data point in {self.name}: {e}")
return None
finally:
# Track processing time
processing_time = time.time() - start_time
self._performance_metrics['update_times'].append(processing_time)
def _process_timeframe_bar(self, timestamp: pd.Timestamp, ohlcv_data: Dict[str, float]) -> Optional[IncStrategySignal]:
"""Process a complete timeframe bar and generate signals."""
# Update timeframe buffers
self._update_timeframe_buffers(ohlcv_data, timestamp)
# Call strategy-specific calculation
self.calculate_on_data(ohlcv_data, timestamp)
# Check if strategy is warmed up
if not self._is_warmed_up:
self._check_warmup_status()
# Generate signal if warmed up
if self._is_warmed_up:
signal_start = time.time()
signal = self.get_current_signal()
signal_time = time.time() - signal_start
self._performance_metrics['signal_generation_times'].append(signal_time)
# Store signal in history
if signal and signal.signal_type != "HOLD":
self._signal_history.append({
'timestamp': timestamp,
'signal': signal,
'strategy_state': self.get_current_state_summary()
})
return signal
return None
def _check_warmup_status(self):
"""Check if strategy has enough data to be considered warmed up."""
min_buffer_sizes = self.get_minimum_buffer_size()
for timeframe, min_size in min_buffer_sizes.items():
buffer = self._timeframe_buffers.get(timeframe, deque())
if len(buffer) < min_size:
return # Not enough data yet
# All buffers have sufficient data
self._is_warmed_up = True
self._calculation_mode = "incremental"
logger.info(f"Strategy {self.name} is now warmed up after {self._data_points_received} data points")
def get_current_signal(self) -> IncStrategySignal:
"""Get the current signal based on strategy state."""
# Try entry signal first
entry_signal = self.get_entry_signal()
if entry_signal and entry_signal.signal_type != "HOLD":
return entry_signal
# Check exit signal
exit_signal = self.get_exit_signal()
if exit_signal and exit_signal.signal_type != "HOLD":
return exit_signal
# Default to hold
return IncStrategySignal.HOLD()
def get_current_incomplete_bar(self) -> Optional[Dict[str, float]]:
"""Get current incomplete timeframe bar (for debugging)."""
if self._timeframe_aggregator is not None:
return self._timeframe_aggregator.get_current_bar()
return None
def get_timeframe_aggregator_stats(self) -> Optional[Dict[str, Any]]:
"""Get timeframe aggregator statistics."""
if self._timeframe_aggregator is not None:
return self._timeframe_aggregator.get_stats()
return None
def create_minute_data_buffer(self, max_size: int = 1440) -> MinuteDataBuffer:
"""
Create a MinuteDataBuffer for strategies that need direct minute data management.
Args:
max_size: Maximum buffer size in minutes (default: 1440 = 24h)
Returns:
MinuteDataBuffer instance
"""
return MinuteDataBuffer(max_size=max_size)
def aggregate_minute_data(self, minute_data: List[Dict[str, float]],
timeframe: str, timestamp_mode: str = "end") -> List[Dict[str, float]]:
"""
Helper method to aggregate minute data to specified timeframe.
Args:
minute_data: List of minute OHLCV data
timeframe: Target timeframe (e.g., "5min", "15min", "1h")
timestamp_mode: "end" (default) or "start" for bar timestamps
Returns:
List of aggregated OHLCV bars
"""
try:
return aggregate_minute_data_to_timeframe(minute_data, timeframe, timestamp_mode)
except TimeframeError as e:
logger.error(f"Error aggregating minute data in {self.name}: {e}")
return []
# Properties
@property
def calculation_mode(self) -> str:
"""Get current calculation mode."""
return self._calculation_mode
@property
def is_warmed_up(self) -> bool:
"""Check if strategy is warmed up."""
return self._is_warmed_up
# Abstract methods that must be implemented by strategies
@abstractmethod
def get_minimum_buffer_size(self) -> Dict[str, int]:
"""
Get minimum buffer sizes for each timeframe.
This method specifies how much historical data the strategy needs
for each timeframe to generate reliable signals.
Returns:
Dict[str, int]: Mapping of timeframe to minimum buffer size
Example:
return {"15min": 50, "1h": 24} # 50 15min bars, 24 1h bars
"""
pass
@abstractmethod
def calculate_on_data(self, new_data_point: Dict[str, float], timestamp: pd.Timestamp) -> None:
"""
Process new data point and update internal indicators.
This method is called for each new timeframe bar and should update
all internal indicators and strategy state incrementally.
Args:
new_data_point: New OHLCV data point
timestamp: Timestamp of the data point
"""
pass
@abstractmethod
def supports_incremental_calculation(self) -> bool:
"""
Check if strategy supports incremental calculation.
Returns:
bool: True if strategy can process data incrementally
"""
pass
@abstractmethod
def get_entry_signal(self) -> IncStrategySignal:
"""
Generate entry signal based on current strategy state.
This method should use the current internal state to determine
whether an entry signal should be generated.
Returns:
IncStrategySignal: Entry signal with confidence level
"""
pass
@abstractmethod
def get_exit_signal(self) -> IncStrategySignal:
"""
Generate exit signal based on current strategy state.
This method should use the current internal state to determine
whether an exit signal should be generated.
Returns:
IncStrategySignal: Exit signal with confidence level
"""
pass
# Utility methods
def get_confidence(self) -> float:
"""
Get strategy confidence for the current market state.
Default implementation returns 1.0. Strategies can override
this to provide dynamic confidence based on market conditions.
Returns:
float: Confidence level (0.0 to 1.0)
"""
return 1.0
def reset_calculation_state(self) -> None:
"""Reset internal calculation state for reinitialization."""
self._calculation_mode = "initialization"
self._is_warmed_up = False
self._data_points_received = 0
self._timeframe_buffers.clear()
self._timeframe_last_update.clear()
self._indicator_states.clear()
self._last_signals.clear()
self._signal_history.clear()
# Reset timeframe aggregator
if self._timeframe_aggregator is not None:
self._timeframe_aggregator.reset()
# Reset performance metrics
for key in self._performance_metrics:
if isinstance(self._performance_metrics[key], deque):
self._performance_metrics[key].clear()
else:
self._performance_metrics[key] = 0
def get_current_state_summary(self) -> Dict[str, Any]:
"""Get summary of current calculation state for debugging."""
return {
'strategy_name': self.name,
'calculation_mode': self._calculation_mode,
'is_warmed_up': self._is_warmed_up,
'data_points_received': self._data_points_received,
'timeframes': list(self._timeframe_buffers.keys()),
'buffer_sizes': {tf: len(buf) for tf, buf in self._timeframe_buffers.items()},
'indicator_states': {name: state.get_state_summary() if hasattr(state, 'get_state_summary') else str(state)
for name, state in self._indicator_states.items()},
'last_signals': self._last_signals,
'timeframe_aggregator': {
'enabled': self._timeframe_aggregator is not None,
'primary_timeframe': self._primary_timeframe,
'current_incomplete_bar': self.get_current_incomplete_bar()
},
'performance_metrics': {
'avg_update_time': sum(self._performance_metrics['update_times']) / len(self._performance_metrics['update_times'])
if self._performance_metrics['update_times'] else 0,
'avg_signal_time': sum(self._performance_metrics['signal_generation_times']) / len(self._performance_metrics['signal_generation_times'])
if self._performance_metrics['signal_generation_times'] else 0,
'validation_failures': self._performance_metrics['state_validation_failures'],
'data_gaps_handled': self._performance_metrics['data_gaps_handled'],
'minute_data_points_processed': self._performance_metrics['minute_data_points_processed'],
'timeframe_bars_completed': self._performance_metrics['timeframe_bars_completed']
}
}
def _update_timeframe_buffers(self, new_data_point: Dict[str, float], timestamp: pd.Timestamp) -> None:
"""Update all timeframe buffers with new data point."""
# Get minimum buffer sizes
min_buffer_sizes = self.get_minimum_buffer_size()
for timeframe in min_buffer_sizes.keys():
# Calculate actual buffer size with multiplier
min_size = min_buffer_sizes[timeframe]
actual_buffer_size = int(min_size * self._buffer_size_multiplier)
# Initialize buffer if needed
if timeframe not in self._timeframe_buffers:
self._timeframe_buffers[timeframe] = deque(maxlen=actual_buffer_size)
self._timeframe_last_update[timeframe] = None
# Add data point to buffer
data_point = new_data_point.copy()
data_point['timestamp'] = timestamp
self._timeframe_buffers[timeframe].append(data_point)
self._timeframe_last_update[timeframe] = timestamp
def _get_timeframe_buffer(self, timeframe: str) -> pd.DataFrame:
"""Get current buffer for specific timeframe as DataFrame."""
if timeframe not in self._timeframe_buffers:
return pd.DataFrame()
buffer_data = list(self._timeframe_buffers[timeframe])
if not buffer_data:
return pd.DataFrame()
df = pd.DataFrame(buffer_data)
if 'timestamp' in df.columns:
df = df.set_index('timestamp')
return df
def handle_data_gap(self, gap_duration: pd.Timedelta) -> None:
"""Handle gaps in data stream."""
self._performance_metrics['data_gaps_handled'] += 1
if gap_duration > self._max_acceptable_gap:
logger.warning(f"Data gap {gap_duration} exceeds maximum acceptable gap {self._max_acceptable_gap}")
self._trigger_reinitialization()
else:
logger.info(f"Handling acceptable data gap: {gap_duration}")
# For small gaps, continue with current state
def _trigger_reinitialization(self) -> None:
"""Trigger strategy reinitialization due to data gap or corruption."""
logger.info(f"Triggering reinitialization for strategy {self.name}")
self.reset_calculation_state()
# Compatibility methods for original strategy interface
def get_timeframes(self) -> List[str]:
"""Get required timeframes (compatibility method)."""
return list(self.get_minimum_buffer_size().keys())
def initialize(self, backtester) -> None:
"""Initialize strategy (compatibility method)."""
# This method provides compatibility with the original strategy interface
# The actual initialization happens through the incremental interface
self.initialized = True
logger.info(f"Incremental strategy {self.name} initialized in compatibility mode")
def __repr__(self) -> str:
"""String representation of the strategy."""
return (f"{self.__class__.__name__}(name={self.name}, "
f"weight={self.weight}, mode={self._calculation_mode}, "
f"warmed_up={self._is_warmed_up}, "
f"data_points={self._data_points_received})")