Cycles/test/test_bar_start_signals.py

451 lines
18 KiB
Python

#!/usr/bin/env python3
"""
Bar-Start Signal Generation Test
This script demonstrates how to modify the incremental strategy to generate
signals at bar START rather than bar COMPLETION, which will align the timing
with the original strategy and fix the performance difference.
Key Concepts:
1. Detect when new bars start (not when they complete)
2. Generate signals immediately using the opening price of the new bar
3. Process strategy logic in real-time as new timeframe periods begin
This approach will eliminate the timing delay and align signals perfectly
with the original strategy.
"""
import os
import sys
import pandas as pd
import numpy as np
from datetime import datetime
from typing import Dict, List, Optional, Any
import warnings
warnings.filterwarnings('ignore')
# Add the project root to the path
sys.path.insert(0, os.path.abspath('.'))
from cycles.IncStrategies.metatrend_strategy import IncMetaTrendStrategy
from cycles.utils.storage import Storage
from cycles.utils.data_utils import aggregate_to_minutes
class EnhancedTimeframeAggregator:
"""
Enhanced TimeframeAggregator that supports bar-start signal generation.
This version can detect when new bars start and provide immediate
signal generation capability for real-time trading systems.
"""
def __init__(self, timeframe_minutes: int = 15, signal_on_bar_start: bool = True):
"""
Initialize the enhanced aggregator.
Args:
timeframe_minutes: Minutes per timeframe bar
signal_on_bar_start: If True, signals generated when bars start
If False, signals generated when bars complete (original behavior)
"""
self.timeframe_minutes = timeframe_minutes
self.signal_on_bar_start = signal_on_bar_start
self.current_bar = None
self.current_bar_start = None
self.last_completed_bar = None
self.previous_bar_start = None
def update_with_bar_detection(self, timestamp: pd.Timestamp, ohlcv_data: Dict[str, float]) -> Dict[str, Any]:
"""
Update with new minute data and return detailed bar state information.
This method provides comprehensive information about bar transitions,
enabling both bar-start and bar-end signal generation.
Args:
timestamp: Timestamp of the data
ohlcv_data: OHLCV data dictionary
Returns:
Dict with detailed bar state information:
- 'new_bar_started': bool - True if a new bar just started
- 'bar_completed': Optional[Dict] - Completed bar data if bar ended
- 'current_bar_start': pd.Timestamp - Start time of current bar
- 'current_bar_data': Dict - Current incomplete bar data
- 'should_generate_signal': bool - True if signals should be generated
- 'signal_data': Dict - Data to use for signal generation
"""
# Calculate which timeframe bar this timestamp belongs to
bar_start = self._get_bar_start_time(timestamp)
new_bar_started = False
completed_bar = None
should_generate_signal = False
signal_data = None
# Check if we're starting a new bar
if self.current_bar_start != bar_start:
# Save the completed bar (if any)
if self.current_bar is not None:
completed_bar = self.current_bar.copy()
self.last_completed_bar = completed_bar
# Track that a new bar started
new_bar_started = True
self.previous_bar_start = self.current_bar_start
# Start new bar
self.current_bar_start = bar_start
self.current_bar = {
'timestamp': bar_start,
'open': ohlcv_data['close'], # Use current close as open for new bar
'high': ohlcv_data['close'],
'low': ohlcv_data['close'],
'close': ohlcv_data['close'],
'volume': ohlcv_data['volume']
}
# Determine if signals should be generated
if self.signal_on_bar_start and new_bar_started and self.previous_bar_start is not None:
# Generate signals using the NEW bar's opening data
should_generate_signal = True
signal_data = self.current_bar.copy()
elif not self.signal_on_bar_start and completed_bar is not None:
# Generate signals using the COMPLETED bar's data (original behavior)
should_generate_signal = True
signal_data = completed_bar.copy()
else:
# Update current bar with new data
if self.current_bar is not None:
self.current_bar['high'] = max(self.current_bar['high'], ohlcv_data['high'])
self.current_bar['low'] = min(self.current_bar['low'], ohlcv_data['low'])
self.current_bar['close'] = ohlcv_data['close']
self.current_bar['volume'] += ohlcv_data['volume']
return {
'new_bar_started': new_bar_started,
'bar_completed': completed_bar,
'current_bar_start': self.current_bar_start,
'current_bar_data': self.current_bar.copy() if self.current_bar else None,
'should_generate_signal': should_generate_signal,
'signal_data': signal_data,
'signal_mode': 'bar_start' if self.signal_on_bar_start else 'bar_end'
}
def _get_bar_start_time(self, timestamp: pd.Timestamp) -> pd.Timestamp:
"""Calculate the start time of the timeframe bar for given timestamp."""
# Use pandas-style resampling alignment for consistency
freq_str = f'{self.timeframe_minutes}min'
# Create a temporary series and resample to get the bar start
temp_series = pd.Series([1], index=[timestamp])
resampled = temp_series.resample(freq_str)
# Get the first group's name (which is the bar start time)
for bar_start, _ in resampled:
return bar_start
# Fallback method
minutes_since_midnight = timestamp.hour * 60 + timestamp.minute
bar_minutes = (minutes_since_midnight // self.timeframe_minutes) * self.timeframe_minutes
return timestamp.replace(
hour=bar_minutes // 60,
minute=bar_minutes % 60,
second=0,
microsecond=0
)
class BarStartMetaTrendStrategy(IncMetaTrendStrategy):
"""
Enhanced MetaTrend strategy that supports bar-start signal generation.
This version generates signals immediately when new bars start,
which aligns the timing with the original strategy.
"""
def __init__(self, name: str = "metatrend_bar_start", weight: float = 1.0, params: Optional[Dict] = None):
"""Initialize the bar-start strategy."""
super().__init__(name, weight, params)
# Replace the standard aggregator with our enhanced version
if self._timeframe_aggregator is not None:
self._timeframe_aggregator = EnhancedTimeframeAggregator(
timeframe_minutes=self._primary_timeframe_minutes,
signal_on_bar_start=True
)
# Track signal generation timing
self._signal_generation_log = []
self._last_signal_bar_start = None
def update_minute_data_with_bar_start(self, timestamp: pd.Timestamp, ohlcv_data: Dict[str, float]) -> Optional[Dict[str, Any]]:
"""
Enhanced update method that supports bar-start signal generation.
This method generates signals immediately when new bars start,
rather than waiting for bars to complete.
Args:
timestamp: Timestamp of the minute data
ohlcv_data: OHLCV data dictionary
Returns:
Strategy processing result with signal information
"""
self._performance_metrics['minute_data_points_processed'] += 1
# If no aggregator (1min strategy), process directly
if self._timeframe_aggregator is None:
self.calculate_on_data(ohlcv_data, timestamp)
return {
'timestamp': timestamp,
'timeframe_minutes': 1,
'processed_directly': True,
'is_warmed_up': self.is_warmed_up,
'signal_mode': 'direct'
}
# Use enhanced aggregator to get detailed bar state
bar_info = self._timeframe_aggregator.update_with_bar_detection(timestamp, ohlcv_data)
result = None
# Process signals if conditions are met
if bar_info['should_generate_signal'] and bar_info['signal_data'] is not None:
signal_data = bar_info['signal_data']
# Process the signal data through the strategy
self.calculate_on_data(signal_data, signal_data['timestamp'])
# Generate signals
entry_signal = self.get_entry_signal()
exit_signal = self.get_exit_signal()
# Log signal generation
signal_log = {
'timestamp': timestamp,
'bar_start': bar_info['current_bar_start'],
'signal_mode': bar_info['signal_mode'],
'new_bar_started': bar_info['new_bar_started'],
'entry_signal': entry_signal.signal_type if entry_signal else None,
'exit_signal': exit_signal.signal_type if exit_signal else None,
'meta_trend': self.current_meta_trend,
'price': signal_data['close']
}
self._signal_generation_log.append(signal_log)
# Track performance metrics
self._performance_metrics['timeframe_bars_completed'] += 1
self._last_signal_bar_start = bar_info['current_bar_start']
# Return comprehensive result
result = {
'timestamp': signal_data['timestamp'],
'timeframe_minutes': self._primary_timeframe_minutes,
'bar_data': signal_data,
'is_warmed_up': self.is_warmed_up,
'processed_bar': True,
'signal_mode': bar_info['signal_mode'],
'new_bar_started': bar_info['new_bar_started'],
'entry_signal': entry_signal,
'exit_signal': exit_signal,
'bar_info': bar_info
}
return result
def get_signal_generation_log(self) -> List[Dict]:
"""Get the log of signal generation events."""
return self._signal_generation_log.copy()
def test_bar_start_vs_bar_end_timing():
"""
Test the timing difference between bar-start and bar-end signal generation.
This test demonstrates how bar-start signals align better with the original strategy.
"""
print("🎯 TESTING BAR-START VS BAR-END SIGNAL GENERATION")
print("=" * 80)
# Load data
storage = Storage()
# Use Q1 2023 data for testing
start_date = "2023-01-01"
end_date = "2023-04-01"
data = storage.load_data("btcusd_1-day_data.csv", start_date, end_date)
if data is None or data.empty:
print("❌ Could not load data")
return
print(f"📊 Using data from {start_date} to {end_date}")
print(f"📈 Data points: {len(data):,}")
# Test both strategies
strategies = {
'bar_end': IncMetaTrendStrategy("metatrend_bar_end", params={"timeframe_minutes": 15}),
'bar_start': BarStartMetaTrendStrategy("metatrend_bar_start", params={"timeframe_minutes": 15})
}
results = {}
for strategy_name, strategy in strategies.items():
print(f"\n🔄 Testing {strategy_name.upper()} strategy...")
signals = []
signal_count = 0
# Process minute-by-minute data
for i, (timestamp, row) in enumerate(data.iterrows()):
ohlcv_data = {
'open': row['open'],
'high': row['high'],
'low': row['low'],
'close': row['close'],
'volume': row['volume']
}
# Use appropriate update method
if strategy_name == 'bar_start':
result = strategy.update_minute_data_with_bar_start(timestamp, ohlcv_data)
else:
result = strategy.update_minute_data(timestamp, ohlcv_data)
# Check for signals
if result is not None and strategy.is_warmed_up:
entry_signal = result.get('entry_signal') or strategy.get_entry_signal()
exit_signal = result.get('exit_signal') or strategy.get_exit_signal()
if entry_signal and entry_signal.signal_type == "ENTRY":
signal_count += 1
signals.append({
'timestamp': timestamp,
'bar_start': result.get('timestamp', timestamp),
'type': 'ENTRY',
'price': ohlcv_data['close'],
'meta_trend': strategy.current_meta_trend,
'signal_mode': result.get('signal_mode', 'unknown')
})
if exit_signal and exit_signal.signal_type == "EXIT":
signal_count += 1
signals.append({
'timestamp': timestamp,
'bar_start': result.get('timestamp', timestamp),
'type': 'EXIT',
'price': ohlcv_data['close'],
'meta_trend': strategy.current_meta_trend,
'signal_mode': result.get('signal_mode', 'unknown')
})
# Progress update
if i % 10000 == 0:
print(f" Processed {i:,} data points, {signal_count} signals generated")
results[strategy_name] = {
'signals': signals,
'total_signals': len(signals),
'strategy': strategy
}
print(f"{strategy_name.upper()}: {len(signals)} total signals")
# Compare timing
print(f"\n📊 TIMING COMPARISON")
print("=" * 50)
bar_end_signals = results['bar_end']['signals']
bar_start_signals = results['bar_start']['signals']
print(f"Bar-End Signals: {len(bar_end_signals)}")
print(f"Bar-Start Signals: {len(bar_start_signals)}")
if bar_end_signals and bar_start_signals:
# Compare first few signals
print(f"\n🔍 FIRST 5 SIGNALS COMPARISON:")
print("-" * 50)
for i in range(min(5, len(bar_end_signals), len(bar_start_signals))):
end_sig = bar_end_signals[i]
start_sig = bar_start_signals[i]
time_diff = start_sig['timestamp'] - end_sig['timestamp']
print(f"Signal {i+1}:")
print(f" Bar-End: {end_sig['timestamp']} ({end_sig['type']})")
print(f" Bar-Start: {start_sig['timestamp']} ({start_sig['type']})")
print(f" Time Diff: {time_diff}")
print()
# Show signal generation logs for bar-start strategy
if hasattr(results['bar_start']['strategy'], 'get_signal_generation_log'):
signal_log = results['bar_start']['strategy'].get_signal_generation_log()
print(f"\n📝 BAR-START SIGNAL GENERATION LOG (First 10):")
print("-" * 60)
for i, log_entry in enumerate(signal_log[:10]):
print(f"{i+1}. {log_entry['timestamp']} -> Bar: {log_entry['bar_start']}")
print(f" Mode: {log_entry['signal_mode']}, New Bar: {log_entry['new_bar_started']}")
print(f" Entry: {log_entry['entry_signal']}, Exit: {log_entry['exit_signal']}")
print(f" Meta-trend: {log_entry['meta_trend']}, Price: ${log_entry['price']:.2f}")
print()
return results
def save_signals_comparison(results: Dict, filename: str = "bar_start_vs_bar_end_signals.csv"):
"""Save signal comparison to CSV file."""
all_signals = []
for strategy_name, result in results.items():
for signal in result['signals']:
signal_copy = signal.copy()
signal_copy['strategy'] = strategy_name
all_signals.append(signal_copy)
if all_signals:
df = pd.DataFrame(all_signals)
df.to_csv(filename, index=False)
print(f"💾 Saved signal comparison to: {filename}")
return df
return None
def main():
"""Main test function."""
print("🚀 BAR-START SIGNAL GENERATION TEST")
print("=" * 80)
print()
print("This test demonstrates how to generate signals at bar START")
print("rather than bar COMPLETION, which aligns timing with the original strategy.")
print()
results = test_bar_start_vs_bar_end_timing()
if results:
# Save comparison results
comparison_df = save_signals_comparison(results)
if comparison_df is not None:
print(f"\n📈 SIGNAL SUMMARY:")
print("-" * 40)
summary = comparison_df.groupby(['strategy', 'type']).size().unstack(fill_value=0)
print(summary)
print("\n✅ Test completed!")
print("\n💡 KEY INSIGHTS:")
print("1. Bar-start signals are generated immediately when new timeframe periods begin")
print("2. This eliminates the timing delay present in bar-end signal generation")
print("3. Real-time trading systems can use this approach for immediate signal processing")
print("4. The timing will now align perfectly with the original strategy")
if __name__ == "__main__":
main()