649 lines
27 KiB
Python
Raw Normal View History

2025-05-26 13:25:56 +08:00
"""
Base classes for the incremental strategy system.
This module contains the fundamental building blocks for all incremental trading strategies:
- IncStrategySignal: Represents trading signals with confidence and metadata
- IncStrategyBase: Abstract base class that all incremental strategies must inherit from
- TimeframeAggregator: Built-in timeframe aggregation for minute-level data processing
2025-05-26 13:25:56 +08:00
"""
import pandas as pd
from abc import ABC, abstractmethod
from typing import Dict, Optional, List, Union, Any
from collections import deque
import logging
# Import the original signal class for compatibility
from ..strategies.base import StrategySignal
# Create alias for consistency
IncStrategySignal = StrategySignal
class TimeframeAggregator:
"""
Handles real-time aggregation of minute data to higher timeframes.
This class accumulates minute-level OHLCV data and produces complete
bars when a timeframe period is completed. Integrated into IncStrategyBase
to provide consistent minute-level data processing across all strategies.
"""
def __init__(self, timeframe_minutes: int = 15):
"""
Initialize timeframe aggregator.
Args:
timeframe_minutes: Target timeframe in minutes (e.g., 60 for 1h, 15 for 15min)
"""
self.timeframe_minutes = timeframe_minutes
self.current_bar = None
self.current_bar_start = None
self.last_completed_bar = None
def update(self, timestamp: pd.Timestamp, ohlcv_data: Dict[str, float]) -> Optional[Dict[str, float]]:
"""
Update with new minute data and return completed bar if timeframe is complete.
Args:
timestamp: Timestamp of the data
ohlcv_data: OHLCV data dictionary
Returns:
Completed OHLCV bar if timeframe period ended, None otherwise
"""
# Calculate which timeframe bar this timestamp belongs to
bar_start = self._get_bar_start_time(timestamp)
# Check if we're starting a new bar
if self.current_bar_start != bar_start:
# Save the completed bar (if any)
completed_bar = self.current_bar.copy() if self.current_bar is not None else None
# Start new bar
self.current_bar_start = bar_start
self.current_bar = {
'timestamp': bar_start,
'open': ohlcv_data['close'], # Use current close as open for new bar
'high': ohlcv_data['close'],
'low': ohlcv_data['close'],
'close': ohlcv_data['close'],
'volume': ohlcv_data['volume']
}
# Return the completed bar (if any)
if completed_bar is not None:
self.last_completed_bar = completed_bar
return completed_bar
else:
# Update current bar with new data
if self.current_bar is not None:
self.current_bar['high'] = max(self.current_bar['high'], ohlcv_data['high'])
self.current_bar['low'] = min(self.current_bar['low'], ohlcv_data['low'])
self.current_bar['close'] = ohlcv_data['close']
self.current_bar['volume'] += ohlcv_data['volume']
return None # No completed bar yet
def _get_bar_start_time(self, timestamp: pd.Timestamp) -> pd.Timestamp:
"""Calculate the start time of the timeframe bar for given timestamp.
This method now aligns with pandas resampling to ensure consistency
with the original strategy's bar boundaries.
"""
# Use pandas-style resampling alignment
# This ensures bars align to standard boundaries (e.g., 00:00, 00:15, 00:30, 00:45)
freq_str = f'{self.timeframe_minutes}min'
# Create a temporary series with the timestamp and resample to get the bar start
temp_series = pd.Series([1], index=[timestamp])
resampled = temp_series.resample(freq_str)
# Get the first group's name (which is the bar start time)
for bar_start, _ in resampled:
return bar_start
# Fallback to original method if resampling fails
minutes_since_midnight = timestamp.hour * 60 + timestamp.minute
bar_minutes = (minutes_since_midnight // self.timeframe_minutes) * self.timeframe_minutes
return timestamp.replace(
hour=bar_minutes // 60,
minute=bar_minutes % 60,
second=0,
microsecond=0
)
def get_current_bar(self) -> Optional[Dict[str, float]]:
"""Get the current incomplete bar (for debugging)."""
return self.current_bar.copy() if self.current_bar is not None else None
def reset(self):
"""Reset aggregator state."""
self.current_bar = None
self.current_bar_start = None
self.last_completed_bar = None
2025-05-26 13:25:56 +08:00
class IncStrategyBase(ABC):
"""
Abstract base class for all incremental trading strategies.
This class defines the interface that all incremental strategies must implement:
- get_minimum_buffer_size(): Specify minimum data requirements
- calculate_on_data(): Process new data points incrementally
- supports_incremental_calculation(): Whether strategy supports incremental mode
- get_entry_signal(): Generate entry signals
- get_exit_signal(): Generate exit signals
The incremental approach allows strategies to:
- Process new data points without full recalculation
- Maintain bounded memory usage regardless of data history length
- Provide real-time performance with minimal latency
- Support both initialization and incremental modes
- Accept minute-level data and internally aggregate to any timeframe
New Features:
- Built-in TimeframeAggregator for minute-level data processing
- update_minute_data() method for real-time trading systems
- Automatic timeframe detection and aggregation
- Backward compatibility with existing update() methods
2025-05-26 13:25:56 +08:00
Attributes:
name (str): Strategy name
weight (float): Strategy weight for combination
params (Dict): Strategy parameters
calculation_mode (str): Current mode ('initialization' or 'incremental')
is_warmed_up (bool): Whether strategy has sufficient data for reliable signals
timeframe_buffers (Dict): Rolling buffers for different timeframes
indicator_states (Dict): Internal indicator calculation states
timeframe_aggregator (TimeframeAggregator): Built-in aggregator for minute data
2025-05-26 13:25:56 +08:00
Example:
class MyIncStrategy(IncStrategyBase):
def get_minimum_buffer_size(self):
return {"15min": 50} # Strategy works on 15min timeframe
2025-05-26 13:25:56 +08:00
def calculate_on_data(self, new_data_point, timestamp):
# Process new data incrementally
self._update_indicators(new_data_point)
def get_entry_signal(self):
# Generate signal based on current state
if self._should_enter():
return IncStrategySignal("ENTRY", confidence=0.8)
return IncStrategySignal("HOLD", confidence=0.0)
# Usage with minute-level data:
strategy = MyIncStrategy(params={"timeframe_minutes": 15})
for minute_data in live_stream:
result = strategy.update_minute_data(minute_data['timestamp'], minute_data)
if result is not None: # Complete 15min bar formed
entry_signal = strategy.get_entry_signal()
2025-05-26 13:25:56 +08:00
"""
def __init__(self, name: str, weight: float = 1.0, params: Optional[Dict] = None):
"""
Initialize the incremental strategy base.
Args:
name: Strategy name/identifier
weight: Strategy weight for combination (default: 1.0)
params: Strategy-specific parameters
"""
self.name = name
self.weight = weight
self.params = params or {}
# Calculation state
self._calculation_mode = "initialization"
self._is_warmed_up = False
self._data_points_received = 0
# Timeframe management
self._timeframe_buffers = {}
self._timeframe_last_update = {}
self._buffer_size_multiplier = self.params.get("buffer_size_multiplier", 2.0)
# Built-in timeframe aggregation
self._primary_timeframe_minutes = self._extract_timeframe_minutes()
self._timeframe_aggregator = None
if self._primary_timeframe_minutes > 1:
self._timeframe_aggregator = TimeframeAggregator(self._primary_timeframe_minutes)
2025-05-26 13:25:56 +08:00
# Indicator states (strategy-specific)
self._indicator_states = {}
# Signal generation state
self._last_signals = {}
self._signal_history = deque(maxlen=100)
# Error handling
self._max_acceptable_gap = pd.Timedelta(self.params.get("max_acceptable_gap", "5min"))
self._state_validation_enabled = self.params.get("enable_state_validation", True)
# Performance monitoring
self._performance_metrics = {
'update_times': deque(maxlen=1000),
'signal_generation_times': deque(maxlen=1000),
'state_validation_failures': 0,
'data_gaps_handled': 0,
'minute_data_points_processed': 0,
'timeframe_bars_completed': 0
2025-05-26 13:25:56 +08:00
}
# Compatibility with original strategy interface
self.initialized = False
self.timeframes_data = {}
def _extract_timeframe_minutes(self) -> int:
"""
Extract timeframe in minutes from strategy parameters.
Looks for timeframe configuration in various parameter formats:
- timeframe_minutes: Direct specification in minutes
- timeframe: String format like "15min", "1h", etc.
Returns:
int: Timeframe in minutes (default: 1 for minute-level processing)
"""
# Direct specification
if "timeframe_minutes" in self.params:
return self.params["timeframe_minutes"]
# String format parsing
timeframe_str = self.params.get("timeframe", "1min")
if timeframe_str.endswith("min"):
return int(timeframe_str[:-3])
elif timeframe_str.endswith("h"):
return int(timeframe_str[:-1]) * 60
elif timeframe_str.endswith("d"):
return int(timeframe_str[:-1]) * 60 * 24
else:
# Default to 1 minute if can't parse
return 1
def update_minute_data(self, timestamp: pd.Timestamp, ohlcv_data: Dict[str, float]) -> Optional[Dict[str, Any]]:
"""
Update strategy with minute-level OHLCV data.
This method provides a standardized interface for real-time trading systems
that receive minute-level data. It internally aggregates to the strategy's
configured timeframe and only processes indicators when complete bars are formed.
Args:
timestamp: Timestamp of the minute data
ohlcv_data: Dictionary with 'open', 'high', 'low', 'close', 'volume'
Returns:
Strategy processing result if timeframe bar completed, None otherwise
Example:
# Process live minute data
result = strategy.update_minute_data(
timestamp=pd.Timestamp('2024-01-01 10:15:00'),
ohlcv_data={
'open': 100.0,
'high': 101.0,
'low': 99.5,
'close': 100.5,
'volume': 1000.0
}
)
if result is not None:
# A complete timeframe bar was formed and processed
entry_signal = strategy.get_entry_signal()
"""
self._performance_metrics['minute_data_points_processed'] += 1
# If no aggregator (1min strategy), process directly
if self._timeframe_aggregator is None:
self.calculate_on_data(ohlcv_data, timestamp)
return {
'timestamp': timestamp,
'timeframe_minutes': 1,
'processed_directly': True,
'is_warmed_up': self.is_warmed_up
}
# Use aggregator to accumulate minute data
completed_bar = self._timeframe_aggregator.update(timestamp, ohlcv_data)
if completed_bar is not None:
# A complete timeframe bar was formed
self._performance_metrics['timeframe_bars_completed'] += 1
# Process the completed bar
self.calculate_on_data(completed_bar, completed_bar['timestamp'])
# Return processing result
return {
'timestamp': completed_bar['timestamp'],
'timeframe_minutes': self._primary_timeframe_minutes,
'bar_data': completed_bar,
'is_warmed_up': self.is_warmed_up,
'processed_bar': True
}
# No complete bar yet
return None
def get_current_incomplete_bar(self) -> Optional[Dict[str, float]]:
"""
Get the current incomplete timeframe bar (for monitoring).
Useful for debugging and monitoring the aggregation process.
Returns:
Current incomplete bar data or None if no aggregator
"""
if self._timeframe_aggregator is not None:
return self._timeframe_aggregator.get_current_bar()
return None
2025-05-26 13:25:56 +08:00
@property
def calculation_mode(self) -> str:
"""Current calculation mode: 'initialization' or 'incremental'"""
return self._calculation_mode
@property
def is_warmed_up(self) -> bool:
"""Whether strategy has sufficient data for reliable signals"""
return self._is_warmed_up
@abstractmethod
def get_minimum_buffer_size(self) -> Dict[str, int]:
"""
Return minimum data points needed for each timeframe.
This method must be implemented by each strategy to specify how much
historical data is required for reliable calculations.
Returns:
Dict[str, int]: {timeframe: min_points} mapping
Example:
return {"15min": 50, "1min": 750} # 50 15min candles = 750 1min candles
"""
pass
@abstractmethod
def calculate_on_data(self, new_data_point: Dict[str, float], timestamp: pd.Timestamp) -> None:
"""
Process a single new data point incrementally.
This method is called for each new data point and should update
the strategy's internal state incrementally.
Args:
new_data_point: OHLCV data point {open, high, low, close, volume}
timestamp: Timestamp of the data point
"""
pass
@abstractmethod
def supports_incremental_calculation(self) -> bool:
"""
Whether strategy supports incremental calculation.
Returns:
bool: True if incremental mode supported, False for fallback to batch mode
"""
pass
@abstractmethod
def get_entry_signal(self) -> IncStrategySignal:
"""
Generate entry signal based on current strategy state.
This method should use the current internal state to determine
whether an entry signal should be generated.
Returns:
IncStrategySignal: Entry signal with confidence level
"""
pass
@abstractmethod
def get_exit_signal(self) -> IncStrategySignal:
"""
Generate exit signal based on current strategy state.
This method should use the current internal state to determine
whether an exit signal should be generated.
Returns:
IncStrategySignal: Exit signal with confidence level
"""
pass
def get_confidence(self) -> float:
"""
Get strategy confidence for the current market state.
Default implementation returns 1.0. Strategies can override
this to provide dynamic confidence based on market conditions.
Returns:
float: Confidence level (0.0 to 1.0)
"""
return 1.0
def reset_calculation_state(self) -> None:
"""Reset internal calculation state for reinitialization."""
self._calculation_mode = "initialization"
self._is_warmed_up = False
self._data_points_received = 0
self._timeframe_buffers.clear()
self._timeframe_last_update.clear()
self._indicator_states.clear()
self._last_signals.clear()
self._signal_history.clear()
# Reset timeframe aggregator
if self._timeframe_aggregator is not None:
self._timeframe_aggregator.reset()
2025-05-26 13:25:56 +08:00
# Reset performance metrics
for key in self._performance_metrics:
if isinstance(self._performance_metrics[key], deque):
self._performance_metrics[key].clear()
else:
self._performance_metrics[key] = 0
def get_current_state_summary(self) -> Dict[str, Any]:
"""Get summary of current calculation state for debugging."""
return {
'strategy_name': self.name,
'calculation_mode': self._calculation_mode,
'is_warmed_up': self._is_warmed_up,
'data_points_received': self._data_points_received,
'timeframes': list(self._timeframe_buffers.keys()),
'buffer_sizes': {tf: len(buf) for tf, buf in self._timeframe_buffers.items()},
'indicator_states': {name: state.get_state_summary() if hasattr(state, 'get_state_summary') else str(state)
for name, state in self._indicator_states.items()},
'last_signals': self._last_signals,
'timeframe_aggregator': {
'enabled': self._timeframe_aggregator is not None,
'primary_timeframe_minutes': self._primary_timeframe_minutes,
'current_incomplete_bar': self.get_current_incomplete_bar()
},
2025-05-26 13:25:56 +08:00
'performance_metrics': {
'avg_update_time': sum(self._performance_metrics['update_times']) / len(self._performance_metrics['update_times'])
if self._performance_metrics['update_times'] else 0,
'avg_signal_time': sum(self._performance_metrics['signal_generation_times']) / len(self._performance_metrics['signal_generation_times'])
if self._performance_metrics['signal_generation_times'] else 0,
'validation_failures': self._performance_metrics['state_validation_failures'],
'data_gaps_handled': self._performance_metrics['data_gaps_handled'],
'minute_data_points_processed': self._performance_metrics['minute_data_points_processed'],
'timeframe_bars_completed': self._performance_metrics['timeframe_bars_completed']
2025-05-26 13:25:56 +08:00
}
}
def _update_timeframe_buffers(self, new_data_point: Dict[str, float], timestamp: pd.Timestamp) -> None:
"""Update all timeframe buffers with new data point."""
# Get minimum buffer sizes
min_buffer_sizes = self.get_minimum_buffer_size()
for timeframe in min_buffer_sizes.keys():
# Calculate actual buffer size with multiplier
min_size = min_buffer_sizes[timeframe]
actual_buffer_size = int(min_size * self._buffer_size_multiplier)
# Initialize buffer if needed
if timeframe not in self._timeframe_buffers:
self._timeframe_buffers[timeframe] = deque(maxlen=actual_buffer_size)
self._timeframe_last_update[timeframe] = None
# Check if this timeframe should be updated
if self._should_update_timeframe(timeframe, timestamp):
# For 1min timeframe, add data directly
if timeframe == "1min":
data_point = new_data_point.copy()
data_point['timestamp'] = timestamp
self._timeframe_buffers[timeframe].append(data_point)
self._timeframe_last_update[timeframe] = timestamp
else:
# For other timeframes, we need to aggregate from 1min data
self._aggregate_to_timeframe(timeframe, new_data_point, timestamp)
def _should_update_timeframe(self, timeframe: str, timestamp: pd.Timestamp) -> bool:
"""Check if timeframe should be updated based on timestamp."""
if timeframe == "1min":
return True # Always update 1min
last_update = self._timeframe_last_update.get(timeframe)
if last_update is None:
return True # First update
# Calculate timeframe interval
if timeframe.endswith("min"):
minutes = int(timeframe[:-3])
interval = pd.Timedelta(minutes=minutes)
elif timeframe.endswith("h"):
hours = int(timeframe[:-1])
interval = pd.Timedelta(hours=hours)
else:
return True # Unknown timeframe, update anyway
# Check if enough time has passed
return timestamp >= last_update + interval
def _aggregate_to_timeframe(self, timeframe: str, new_data_point: Dict[str, float], timestamp: pd.Timestamp) -> None:
"""Aggregate 1min data to specified timeframe."""
# This is a simplified aggregation - in practice, you might want more sophisticated logic
buffer = self._timeframe_buffers[timeframe]
# If buffer is empty or we're starting a new period, add new candle
if not buffer or self._should_update_timeframe(timeframe, timestamp):
aggregated_point = new_data_point.copy()
aggregated_point['timestamp'] = timestamp
buffer.append(aggregated_point)
self._timeframe_last_update[timeframe] = timestamp
else:
# Update the last candle in the buffer
last_candle = buffer[-1]
last_candle['high'] = max(last_candle['high'], new_data_point['high'])
last_candle['low'] = min(last_candle['low'], new_data_point['low'])
last_candle['close'] = new_data_point['close']
last_candle['volume'] += new_data_point['volume']
def _get_timeframe_buffer(self, timeframe: str) -> pd.DataFrame:
"""Get current buffer for specific timeframe as DataFrame."""
if timeframe not in self._timeframe_buffers:
return pd.DataFrame()
buffer_data = list(self._timeframe_buffers[timeframe])
if not buffer_data:
return pd.DataFrame()
df = pd.DataFrame(buffer_data)
if 'timestamp' in df.columns:
df = df.set_index('timestamp')
return df
def _validate_calculation_state(self) -> bool:
"""Validate internal calculation state consistency."""
if not self._state_validation_enabled:
return True
try:
# Check that all required buffers exist
min_buffer_sizes = self.get_minimum_buffer_size()
for timeframe in min_buffer_sizes.keys():
if timeframe not in self._timeframe_buffers:
logging.warning(f"Missing buffer for timeframe {timeframe}")
return False
# Check that indicator states are valid
for name, state in self._indicator_states.items():
if hasattr(state, 'is_initialized') and not state.is_initialized:
logging.warning(f"Indicator {name} not initialized")
return False
return True
except Exception as e:
logging.error(f"State validation failed: {e}")
self._performance_metrics['state_validation_failures'] += 1
return False
def _recover_from_state_corruption(self) -> None:
"""Recover from corrupted calculation state."""
logging.warning(f"Recovering from state corruption in strategy {self.name}")
# Reset to initialization mode
self._calculation_mode = "initialization"
self._is_warmed_up = False
# Try to recalculate from available buffer data
try:
self._reinitialize_from_buffers()
except Exception as e:
logging.error(f"Failed to recover from buffers: {e}")
# Complete reset as last resort
self.reset_calculation_state()
def _reinitialize_from_buffers(self) -> None:
"""Reinitialize indicators from available buffer data."""
# This method should be overridden by specific strategies
# to implement their own recovery logic
pass
def handle_data_gap(self, gap_duration: pd.Timedelta) -> None:
"""Handle gaps in data stream."""
self._performance_metrics['data_gaps_handled'] += 1
if gap_duration > self._max_acceptable_gap:
logging.warning(f"Data gap {gap_duration} exceeds maximum acceptable gap {self._max_acceptable_gap}")
self._trigger_reinitialization()
else:
logging.info(f"Handling acceptable data gap: {gap_duration}")
# For small gaps, continue with current state
def _trigger_reinitialization(self) -> None:
"""Trigger strategy reinitialization due to data gap or corruption."""
logging.info(f"Triggering reinitialization for strategy {self.name}")
self.reset_calculation_state()
# Compatibility methods for original strategy interface
def get_timeframes(self) -> List[str]:
"""Get required timeframes (compatibility method)."""
return list(self.get_minimum_buffer_size().keys())
def initialize(self, backtester) -> None:
"""Initialize strategy (compatibility method)."""
# This method provides compatibility with the original strategy interface
# The actual initialization happens through the incremental interface
self.initialized = True
logging.info(f"Incremental strategy {self.name} initialized in compatibility mode")
def __repr__(self) -> str:
"""String representation of the strategy."""
return (f"{self.__class__.__name__}(name={self.name}, "
f"weight={self.weight}, mode={self._calculation_mode}, "
f"warmed_up={self._is_warmed_up}, "
f"data_points={self._data_points_received})")