TimeFrame agregator with right logic

This commit is contained in:
Vasily.onl
2025-05-28 18:26:51 +08:00
parent 78ccb15fda
commit 1861c336f9
20 changed files with 5031 additions and 99 deletions

View File

@@ -21,6 +21,15 @@ from collections import deque
import logging
import time
# Import new timeframe utilities
from ..utils.timeframe_utils import (
aggregate_minute_data_to_timeframe,
parse_timeframe_to_minutes,
get_latest_complete_bar,
MinuteDataBuffer,
TimeframeError
)
logger = logging.getLogger(__name__)
@@ -89,108 +98,122 @@ class TimeframeAggregator:
Handles real-time aggregation of minute data to higher timeframes.
This class accumulates minute-level OHLCV data and produces complete
bars when a timeframe period is completed. Integrated into IncStrategyBase
to provide consistent minute-level data processing across all strategies.
bars when a timeframe period is completed. Now uses the new timeframe
utilities for mathematically correct aggregation that matches pandas
resampling behavior.
Key improvements:
- Uses bar END timestamps (prevents future data leakage)
- Proper OHLCV aggregation (first/max/min/last/sum)
- Mathematical equivalence to pandas resampling
- Memory-efficient buffer management
"""
def __init__(self, timeframe_minutes: int = 15):
def __init__(self, timeframe: str = "15min", max_buffer_size: int = 1440):
"""
Initialize timeframe aggregator.
Args:
timeframe_minutes: Target timeframe in minutes (e.g., 60 for 1h, 15 for 15min)
timeframe: Target timeframe string (e.g., "15min", "1h", "4h")
max_buffer_size: Maximum minute data buffer size (default: 1440 = 24h)
"""
self.timeframe_minutes = timeframe_minutes
self.current_bar = None
self.current_bar_start = None
self.last_completed_bar = None
self.timeframe = timeframe
self.timeframe_minutes = parse_timeframe_to_minutes(timeframe)
# Use MinuteDataBuffer for efficient minute data management
self.minute_buffer = MinuteDataBuffer(max_size=max_buffer_size)
# Track last processed bar to avoid reprocessing
self.last_processed_bar_timestamp = None
# Performance tracking
self._bars_completed = 0
self._minute_points_processed = 0
def update(self, timestamp: pd.Timestamp, ohlcv_data: Dict[str, float]) -> Optional[Dict[str, float]]:
"""
Update with new minute data and return completed bar if timeframe is complete.
Args:
timestamp: Timestamp of the data
timestamp: Timestamp of the minute data
ohlcv_data: OHLCV data dictionary
Returns:
Completed OHLCV bar if timeframe period ended, None otherwise
"""
# Calculate which timeframe bar this timestamp belongs to
bar_start = self._get_bar_start_time(timestamp)
# Check if we're starting a new bar
if self.current_bar_start != bar_start:
# Save the completed bar (if any)
completed_bar = self.current_bar.copy() if self.current_bar is not None else None
# Start new bar
self.current_bar_start = bar_start
self.current_bar = {
'timestamp': bar_start,
'open': ohlcv_data['close'], # Use current close as open for new bar
'high': ohlcv_data['close'],
'low': ohlcv_data['close'],
'close': ohlcv_data['close'],
'volume': ohlcv_data['volume']
}
# Return the completed bar (if any)
if completed_bar is not None:
self.last_completed_bar = completed_bar
return completed_bar
else:
# Update current bar with new data
if self.current_bar is not None:
self.current_bar['high'] = max(self.current_bar['high'], ohlcv_data['high'])
self.current_bar['low'] = min(self.current_bar['low'], ohlcv_data['low'])
self.current_bar['close'] = ohlcv_data['close']
self.current_bar['volume'] += ohlcv_data['volume']
return None # No completed bar yet
def _get_bar_start_time(self, timestamp: pd.Timestamp) -> pd.Timestamp:
"""Calculate the start time of the timeframe bar for given timestamp.
This method aligns with pandas resampling to ensure consistency
with the original strategy's bar boundaries.
"""
# Use pandas-style resampling alignment
# This ensures bars align to standard boundaries (e.g., 00:00, 00:15, 00:30, 00:45)
freq_str = f'{self.timeframe_minutes}min'
try:
# Create a temporary series with the timestamp and resample to get the bar start
temp_series = pd.Series([1], index=[timestamp])
resampled = temp_series.resample(freq_str)
# Add minute data to buffer
self.minute_buffer.add(timestamp, ohlcv_data)
self._minute_points_processed += 1
# Get the first group's name (which is the bar start time)
for bar_start, _ in resampled:
return bar_start
except Exception:
# Fallback to original method if resampling fails
pass
# Fallback method
minutes_since_midnight = timestamp.hour * 60 + timestamp.minute
bar_minutes = (minutes_since_midnight // self.timeframe_minutes) * self.timeframe_minutes
return timestamp.replace(
hour=bar_minutes // 60,
minute=bar_minutes % 60,
second=0,
microsecond=0
)
# Get latest complete bar using new utilities
latest_bar = get_latest_complete_bar(
self.minute_buffer.get_data(),
self.timeframe
)
if latest_bar is None:
return None
# Check if this is a new bar (avoid reprocessing)
bar_timestamp = latest_bar['timestamp']
if self.last_processed_bar_timestamp == bar_timestamp:
return None # Already processed this bar
# Update tracking
self.last_processed_bar_timestamp = bar_timestamp
self._bars_completed += 1
return latest_bar
except TimeframeError as e:
logger.error(f"Timeframe aggregation error: {e}")
return None
except Exception as e:
logger.error(f"Unexpected error in timeframe aggregation: {e}")
return None
def get_current_bar(self) -> Optional[Dict[str, float]]:
"""Get the current incomplete bar (for debugging)."""
return self.current_bar.copy() if self.current_bar is not None else None
"""
Get the current incomplete bar (for debugging).
Returns:
Current incomplete bar data or None
"""
try:
# Get recent data and try to aggregate
recent_data = self.minute_buffer.get_data(lookback_minutes=self.timeframe_minutes)
if not recent_data:
return None
# Aggregate to get current (possibly incomplete) bar
bars = aggregate_minute_data_to_timeframe(recent_data, self.timeframe, "end")
if bars:
return bars[-1] # Return most recent bar
return None
except Exception as e:
logger.debug(f"Error getting current bar: {e}")
return None
def reset(self):
"""Reset aggregator state."""
self.current_bar = None
self.current_bar_start = None
self.last_completed_bar = None
self.minute_buffer = MinuteDataBuffer(max_size=self.minute_buffer.max_size)
self.last_processed_bar_timestamp = None
self._bars_completed = 0
self._minute_points_processed = 0
def get_stats(self) -> Dict[str, Any]:
"""Get aggregator statistics."""
return {
'timeframe': self.timeframe,
'timeframe_minutes': self.timeframe_minutes,
'minute_points_processed': self._minute_points_processed,
'bars_completed': self._bars_completed,
'buffer_size': len(self.minute_buffer.get_data()),
'last_processed_bar': self.last_processed_bar_timestamp
}
class IncStrategyBase(ABC):
@@ -289,30 +312,23 @@ class IncStrategyBase(ABC):
self._state_validation_enabled = True
self._max_acceptable_gap = pd.Timedelta(minutes=5)
# Timeframe aggregation
self._primary_timeframe_minutes = self._extract_timeframe_minutes()
# Timeframe aggregation - Updated to use new utilities
self._primary_timeframe = self.params.get("timeframe", "1min")
self._timeframe_aggregator = None
if self._primary_timeframe_minutes > 1:
self._timeframe_aggregator = TimeframeAggregator(self._primary_timeframe_minutes)
logger.info(f"Initialized incremental strategy: {self.name}")
def _extract_timeframe_minutes(self) -> int:
"""Extract timeframe in minutes from strategy parameters."""
timeframe = self.params.get("timeframe", "1min")
# Only create aggregator if timeframe is not 1min (minute data processing)
if self._primary_timeframe != "1min":
try:
self._timeframe_aggregator = TimeframeAggregator(
timeframe=self._primary_timeframe,
max_buffer_size=1440 # 24 hours of minute data
)
logger.info(f"Created timeframe aggregator for {self._primary_timeframe}")
except TimeframeError as e:
logger.error(f"Failed to create timeframe aggregator: {e}")
self._timeframe_aggregator = None
if isinstance(timeframe, str):
if timeframe.endswith("min"):
return int(timeframe[:-3])
elif timeframe.endswith("h"):
return int(timeframe[:-1]) * 60
elif timeframe.endswith("d"):
return int(timeframe[:-1]) * 24 * 60
elif isinstance(timeframe, int):
return timeframe
# Default to 1 minute
return 1
logger.info(f"Initialized incremental strategy: {self.name} (timeframe: {self._primary_timeframe})")
def process_data_point(self, timestamp: pd.Timestamp, ohlcv_data: Dict[str, float]) -> Optional[IncStrategySignal]:
"""
@@ -423,6 +439,43 @@ class IncStrategyBase(ABC):
return self._timeframe_aggregator.get_current_bar()
return None
def get_timeframe_aggregator_stats(self) -> Optional[Dict[str, Any]]:
"""Get timeframe aggregator statistics."""
if self._timeframe_aggregator is not None:
return self._timeframe_aggregator.get_stats()
return None
def create_minute_data_buffer(self, max_size: int = 1440) -> MinuteDataBuffer:
"""
Create a MinuteDataBuffer for strategies that need direct minute data management.
Args:
max_size: Maximum buffer size in minutes (default: 1440 = 24h)
Returns:
MinuteDataBuffer instance
"""
return MinuteDataBuffer(max_size=max_size)
def aggregate_minute_data(self, minute_data: List[Dict[str, float]],
timeframe: str, timestamp_mode: str = "end") -> List[Dict[str, float]]:
"""
Helper method to aggregate minute data to specified timeframe.
Args:
minute_data: List of minute OHLCV data
timeframe: Target timeframe (e.g., "5min", "15min", "1h")
timestamp_mode: "end" (default) or "start" for bar timestamps
Returns:
List of aggregated OHLCV bars
"""
try:
return aggregate_minute_data_to_timeframe(minute_data, timeframe, timestamp_mode)
except TimeframeError as e:
logger.error(f"Error aggregating minute data in {self.name}: {e}")
return []
# Properties
@property
def calculation_mode(self) -> str:
@@ -550,7 +603,7 @@ class IncStrategyBase(ABC):
'last_signals': self._last_signals,
'timeframe_aggregator': {
'enabled': self._timeframe_aggregator is not None,
'primary_timeframe_minutes': self._primary_timeframe_minutes,
'primary_timeframe': self._primary_timeframe,
'current_incomplete_bar': self.get_current_incomplete_bar()
},
'performance_metrics': {

View File

@@ -120,6 +120,13 @@ class BBRSStrategy(IncStrategyBase):
logger.info(f"BBRSStrategy initialized: timeframe={self.primary_timeframe}, "
f"bb_period={self.bb_period}, rsi_period={self.rsi_period}, "
f"aggregation_enabled={self._timeframe_aggregator is not None}")
if self.enable_logging:
logger.info(f"Using new timeframe utilities with mathematically correct aggregation")
logger.info(f"Volume aggregation now uses proper sum() for accurate volume spike detection")
if self._timeframe_aggregator:
stats = self.get_timeframe_aggregator_stats()
logger.debug(f"Timeframe aggregator stats: {stats}")
def get_minimum_buffer_size(self) -> Dict[str, int]:
"""

View File

@@ -101,6 +101,13 @@ class MetaTrendStrategy(IncStrategyBase):
logger.info(f"MetaTrendStrategy initialized: timeframe={self.primary_timeframe}, "
f"aggregation_enabled={self._timeframe_aggregator is not None}")
if self.enable_logging:
logger.info(f"Using new timeframe utilities with mathematically correct aggregation")
logger.info(f"Bar timestamps use 'end' mode to prevent future data leakage")
if self._timeframe_aggregator:
stats = self.get_timeframe_aggregator_stats()
logger.debug(f"Timeframe aggregator stats: {stats}")
def get_minimum_buffer_size(self) -> Dict[str, int]:
"""

View File

@@ -79,6 +79,10 @@ class RandomStrategy(IncStrategyBase):
logger.info(f"RandomStrategy initialized with entry_prob={self.entry_probability}, "
f"exit_prob={self.exit_probability}, timeframe={self.timeframe}, "
f"aggregation_enabled={self._timeframe_aggregator is not None}")
if self._timeframe_aggregator is not None:
logger.info(f"Using new timeframe utilities with mathematically correct aggregation")
logger.info(f"Random signals will be generated on complete {self.timeframe} bars only")
def get_minimum_buffer_size(self) -> Dict[str, int]:
"""