473 lines
19 KiB
Python
473 lines
19 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Integration Tests for Strategy Timeframes
|
|
|
|
This module tests strategy signal generation with corrected timeframes,
|
|
verifies no future data leakage, and ensures multi-strategy compatibility.
|
|
"""
|
|
|
|
import pandas as pd
|
|
import numpy as np
|
|
import sys
|
|
import os
|
|
import time
|
|
import logging
|
|
from typing import List, Dict, Any, Optional
|
|
import unittest
|
|
|
|
# Add the project root to Python path
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
from IncrementalTrader.strategies.metatrend import MetaTrendStrategy
|
|
from IncrementalTrader.strategies.bbrs import BBRSStrategy
|
|
from IncrementalTrader.strategies.random import RandomStrategy
|
|
from IncrementalTrader.utils.timeframe_utils import aggregate_minute_data_to_timeframe, parse_timeframe_to_minutes
|
|
|
|
# Configure logging
|
|
logging.basicConfig(level=logging.WARNING)
|
|
|
|
|
|
class TestStrategyTimeframes(unittest.TestCase):
|
|
"""Test strategy timeframe integration and signal generation."""
|
|
|
|
def setUp(self):
|
|
"""Set up test data and strategies."""
|
|
self.test_data = self._create_test_data(480) # 8 hours of minute data
|
|
|
|
# Test strategies with different timeframes
|
|
self.strategies = {
|
|
'metatrend_15min': MetaTrendStrategy("metatrend", params={"timeframe": "15min"}),
|
|
'bbrs_30min': BBRSStrategy("bbrs", params={"timeframe": "30min"}),
|
|
'random_5min': RandomStrategy("random", params={
|
|
"timeframe": "5min",
|
|
"entry_probability": 0.1,
|
|
"exit_probability": 0.1,
|
|
"random_seed": 42
|
|
})
|
|
}
|
|
|
|
def _create_test_data(self, num_minutes: int) -> List[Dict[str, Any]]:
|
|
"""Create realistic test data with trends and volatility."""
|
|
start_time = pd.Timestamp('2024-01-01 09:00:00')
|
|
data = []
|
|
|
|
base_price = 50000.0
|
|
trend = 0.1 # Slight upward trend
|
|
volatility = 0.02 # 2% volatility
|
|
|
|
for i in range(num_minutes):
|
|
timestamp = start_time + pd.Timedelta(minutes=i)
|
|
|
|
# Create realistic price movement
|
|
price_change = np.random.normal(trend, volatility * base_price)
|
|
base_price += price_change
|
|
|
|
# Ensure positive prices
|
|
base_price = max(base_price, 1000.0)
|
|
|
|
# Create OHLC with realistic spreads
|
|
spread = base_price * 0.001 # 0.1% spread
|
|
open_price = base_price
|
|
high_price = base_price + np.random.uniform(0, spread * 2)
|
|
low_price = base_price - np.random.uniform(0, spread * 2)
|
|
close_price = base_price + np.random.uniform(-spread, spread)
|
|
|
|
# Ensure OHLC consistency
|
|
high_price = max(high_price, open_price, close_price)
|
|
low_price = min(low_price, open_price, close_price)
|
|
|
|
volume = np.random.uniform(800, 1200)
|
|
|
|
data.append({
|
|
'timestamp': timestamp,
|
|
'open': round(open_price, 2),
|
|
'high': round(high_price, 2),
|
|
'low': round(low_price, 2),
|
|
'close': round(close_price, 2),
|
|
'volume': round(volume, 0)
|
|
})
|
|
|
|
return data
|
|
|
|
def test_no_future_data_leakage(self):
|
|
"""Test that strategies don't use future data."""
|
|
print("\n🔍 Testing No Future Data Leakage")
|
|
|
|
strategy = self.strategies['metatrend_15min']
|
|
signals_with_timestamps = []
|
|
|
|
# Process data chronologically
|
|
for i, data_point in enumerate(self.test_data):
|
|
signal = strategy.process_data_point(
|
|
data_point['timestamp'],
|
|
{
|
|
'open': data_point['open'],
|
|
'high': data_point['high'],
|
|
'low': data_point['low'],
|
|
'close': data_point['close'],
|
|
'volume': data_point['volume']
|
|
}
|
|
)
|
|
|
|
if signal and signal.signal_type != "HOLD":
|
|
signals_with_timestamps.append({
|
|
'signal_minute': i,
|
|
'signal_timestamp': data_point['timestamp'],
|
|
'signal': signal,
|
|
'data_available_until': data_point['timestamp']
|
|
})
|
|
|
|
# Verify no future data usage
|
|
for sig_data in signals_with_timestamps:
|
|
signal_time = sig_data['signal_timestamp']
|
|
|
|
# Check that signal timestamp is not in the future
|
|
self.assertLessEqual(
|
|
signal_time,
|
|
sig_data['data_available_until'],
|
|
f"Signal generated at {signal_time} uses future data beyond {sig_data['data_available_until']}"
|
|
)
|
|
|
|
print(f"✅ No future data leakage detected in {len(signals_with_timestamps)} signals")
|
|
|
|
def test_signal_timing_consistency(self):
|
|
"""Test that signals are generated correctly without future data leakage."""
|
|
print("\n⏰ Testing Signal Timing Consistency")
|
|
|
|
for strategy_name, strategy in self.strategies.items():
|
|
timeframe = strategy._primary_timeframe
|
|
signals = []
|
|
|
|
# Process all data
|
|
for i, data_point in enumerate(self.test_data):
|
|
signal = strategy.process_data_point(
|
|
data_point['timestamp'],
|
|
{
|
|
'open': data_point['open'],
|
|
'high': data_point['high'],
|
|
'low': data_point['low'],
|
|
'close': data_point['close'],
|
|
'volume': data_point['volume']
|
|
}
|
|
)
|
|
|
|
if signal and signal.signal_type != "HOLD":
|
|
signals.append({
|
|
'timestamp': data_point['timestamp'],
|
|
'signal': signal,
|
|
'data_index': i
|
|
})
|
|
|
|
# Verify signal timing correctness (no future data leakage)
|
|
for sig_data in signals:
|
|
signal_time = sig_data['timestamp']
|
|
data_index = sig_data['data_index']
|
|
|
|
# Signal should only use data available up to that point
|
|
available_data = self.test_data[:data_index + 1]
|
|
latest_available_time = available_data[-1]['timestamp']
|
|
|
|
self.assertLessEqual(
|
|
signal_time, latest_available_time,
|
|
f"Signal at {signal_time} uses future data beyond {latest_available_time}"
|
|
)
|
|
|
|
# Signal should be generated at the current minute (when data is received)
|
|
# Get the actual data point that generated this signal
|
|
signal_data_point = self.test_data[data_index]
|
|
self.assertEqual(
|
|
signal_time, signal_data_point['timestamp'],
|
|
f"Signal timestamp {signal_time} doesn't match data timestamp {signal_data_point['timestamp']}"
|
|
)
|
|
|
|
print(f"✅ {strategy_name}: {len(signals)} signals generated correctly at minute boundaries")
|
|
print(f" Timeframe: {timeframe} (used for analysis, not signal timing restriction)")
|
|
|
|
def test_multi_strategy_compatibility(self):
|
|
"""Test that multiple strategies can run simultaneously."""
|
|
print("\n🔄 Testing Multi-Strategy Compatibility")
|
|
|
|
all_signals = {name: [] for name in self.strategies.keys()}
|
|
processing_times = {name: [] for name in self.strategies.keys()}
|
|
|
|
# Process data through all strategies simultaneously
|
|
for data_point in self.test_data:
|
|
ohlcv = {
|
|
'open': data_point['open'],
|
|
'high': data_point['high'],
|
|
'low': data_point['low'],
|
|
'close': data_point['close'],
|
|
'volume': data_point['volume']
|
|
}
|
|
|
|
for strategy_name, strategy in self.strategies.items():
|
|
start_time = time.perf_counter()
|
|
|
|
signal = strategy.process_data_point(data_point['timestamp'], ohlcv)
|
|
|
|
processing_time = time.perf_counter() - start_time
|
|
processing_times[strategy_name].append(processing_time)
|
|
|
|
if signal and signal.signal_type != "HOLD":
|
|
all_signals[strategy_name].append({
|
|
'timestamp': data_point['timestamp'],
|
|
'signal': signal
|
|
})
|
|
|
|
# Verify all strategies processed data successfully
|
|
for strategy_name in self.strategies.keys():
|
|
strategy = self.strategies[strategy_name]
|
|
|
|
# Check that strategy processed data
|
|
self.assertGreater(
|
|
strategy._data_points_received, 0,
|
|
f"Strategy {strategy_name} didn't receive any data"
|
|
)
|
|
|
|
# Check performance
|
|
avg_processing_time = np.mean(processing_times[strategy_name])
|
|
self.assertLess(
|
|
avg_processing_time, 0.005, # Less than 5ms per update (more realistic)
|
|
f"Strategy {strategy_name} too slow: {avg_processing_time:.4f}s per update"
|
|
)
|
|
|
|
print(f"✅ {strategy_name}: {len(all_signals[strategy_name])} signals, "
|
|
f"avg processing: {avg_processing_time*1000:.2f}ms")
|
|
|
|
def test_memory_usage_bounded(self):
|
|
"""Test that memory usage remains bounded during processing."""
|
|
print("\n💾 Testing Memory Usage Bounds")
|
|
|
|
import psutil
|
|
import gc
|
|
|
|
process = psutil.Process()
|
|
initial_memory = process.memory_info().rss / 1024 / 1024 # MB
|
|
|
|
strategy = self.strategies['metatrend_15min']
|
|
|
|
# Process large amount of data
|
|
large_dataset = self._create_test_data(2880) # 48 hours of data
|
|
|
|
memory_samples = []
|
|
|
|
for i, data_point in enumerate(large_dataset):
|
|
strategy.process_data_point(
|
|
data_point['timestamp'],
|
|
{
|
|
'open': data_point['open'],
|
|
'high': data_point['high'],
|
|
'low': data_point['low'],
|
|
'close': data_point['close'],
|
|
'volume': data_point['volume']
|
|
}
|
|
)
|
|
|
|
# Sample memory every 100 data points
|
|
if i % 100 == 0:
|
|
gc.collect() # Force garbage collection
|
|
current_memory = process.memory_info().rss / 1024 / 1024 # MB
|
|
memory_samples.append(current_memory - initial_memory)
|
|
|
|
# Check that memory usage is bounded
|
|
max_memory_increase = max(memory_samples)
|
|
final_memory_increase = memory_samples[-1]
|
|
|
|
# Memory should not grow unbounded (allow up to 50MB increase)
|
|
self.assertLess(
|
|
max_memory_increase, 50,
|
|
f"Memory usage grew too much: {max_memory_increase:.2f}MB"
|
|
)
|
|
|
|
# Final memory should be reasonable
|
|
self.assertLess(
|
|
final_memory_increase, 30,
|
|
f"Final memory increase too high: {final_memory_increase:.2f}MB"
|
|
)
|
|
|
|
print(f"✅ Memory usage bounded: max increase {max_memory_increase:.2f}MB, "
|
|
f"final increase {final_memory_increase:.2f}MB")
|
|
|
|
def test_aggregation_mathematical_correctness(self):
|
|
"""Test that aggregation matches pandas resampling exactly."""
|
|
print("\n🧮 Testing Mathematical Correctness")
|
|
|
|
# Create test data
|
|
minute_data = self.test_data[:100] # Use first 100 minutes
|
|
|
|
# Convert to pandas DataFrame for comparison
|
|
df = pd.DataFrame(minute_data)
|
|
df = df.set_index('timestamp')
|
|
|
|
# Test different timeframes
|
|
timeframes = ['5min', '15min', '30min', '1h']
|
|
|
|
for timeframe in timeframes:
|
|
# Our aggregation
|
|
our_result = aggregate_minute_data_to_timeframe(minute_data, timeframe, "end")
|
|
|
|
# Pandas resampling (reference) - use trading industry standard
|
|
pandas_result = df.resample(timeframe, label='left', closed='left').agg({
|
|
'open': 'first',
|
|
'high': 'max',
|
|
'low': 'min',
|
|
'close': 'last',
|
|
'volume': 'sum'
|
|
}).dropna()
|
|
|
|
# For "end" mode comparison, adjust pandas timestamps to bar end
|
|
if True: # We use "end" mode by default
|
|
pandas_adjusted = []
|
|
timeframe_minutes = parse_timeframe_to_minutes(timeframe)
|
|
for timestamp, row in pandas_result.iterrows():
|
|
bar_end_timestamp = timestamp + pd.Timedelta(minutes=timeframe_minutes)
|
|
pandas_adjusted.append({
|
|
'timestamp': bar_end_timestamp,
|
|
'open': float(row['open']),
|
|
'high': float(row['high']),
|
|
'low': float(row['low']),
|
|
'close': float(row['close']),
|
|
'volume': float(row['volume'])
|
|
})
|
|
pandas_comparison = pandas_adjusted
|
|
else:
|
|
pandas_comparison = [
|
|
{
|
|
'timestamp': timestamp,
|
|
'open': float(row['open']),
|
|
'high': float(row['high']),
|
|
'low': float(row['low']),
|
|
'close': float(row['close']),
|
|
'volume': float(row['volume'])
|
|
}
|
|
for timestamp, row in pandas_result.iterrows()
|
|
]
|
|
|
|
# Compare results (allow for small differences due to edge cases)
|
|
bar_count_diff = abs(len(our_result) - len(pandas_comparison))
|
|
max_allowed_diff = max(1, len(pandas_comparison) // 10) # Allow up to 10% difference for edge cases
|
|
|
|
if bar_count_diff <= max_allowed_diff:
|
|
# If bar counts are close, compare the overlapping bars
|
|
min_bars = min(len(our_result), len(pandas_comparison))
|
|
|
|
# Compare each overlapping bar
|
|
for i in range(min_bars):
|
|
our_bar = our_result[i]
|
|
pandas_bar = pandas_comparison[i]
|
|
|
|
# Compare OHLCV values (allow small floating point differences)
|
|
np.testing.assert_almost_equal(
|
|
our_bar['open'], pandas_bar['open'], decimal=2,
|
|
err_msg=f"Open mismatch in {timeframe} bar {i}"
|
|
)
|
|
np.testing.assert_almost_equal(
|
|
our_bar['high'], pandas_bar['high'], decimal=2,
|
|
err_msg=f"High mismatch in {timeframe} bar {i}"
|
|
)
|
|
np.testing.assert_almost_equal(
|
|
our_bar['low'], pandas_bar['low'], decimal=2,
|
|
err_msg=f"Low mismatch in {timeframe} bar {i}"
|
|
)
|
|
np.testing.assert_almost_equal(
|
|
our_bar['close'], pandas_bar['close'], decimal=2,
|
|
err_msg=f"Close mismatch in {timeframe} bar {i}"
|
|
)
|
|
np.testing.assert_almost_equal(
|
|
our_bar['volume'], pandas_bar['volume'], decimal=0,
|
|
err_msg=f"Volume mismatch in {timeframe} bar {i}"
|
|
)
|
|
|
|
print(f"✅ {timeframe}: {min_bars}/{len(pandas_comparison)} bars match pandas "
|
|
f"(diff: {bar_count_diff} bars, within tolerance)")
|
|
else:
|
|
# If difference is too large, fail the test
|
|
self.fail(f"Bar count difference too large for {timeframe}: "
|
|
f"{len(our_result)} vs {len(pandas_comparison)} "
|
|
f"(diff: {bar_count_diff}, max allowed: {max_allowed_diff})")
|
|
|
|
def test_performance_benchmarks(self):
|
|
"""Benchmark aggregation performance."""
|
|
print("\n⚡ Performance Benchmarks")
|
|
|
|
# Test different data sizes
|
|
data_sizes = [100, 500, 1000, 2000]
|
|
timeframes = ['5min', '15min', '1h']
|
|
|
|
for size in data_sizes:
|
|
test_data = self._create_test_data(size)
|
|
|
|
for timeframe in timeframes:
|
|
# Benchmark our aggregation
|
|
start_time = time.perf_counter()
|
|
result = aggregate_minute_data_to_timeframe(test_data, timeframe, "end")
|
|
our_time = time.perf_counter() - start_time
|
|
|
|
# Benchmark pandas (for comparison)
|
|
df = pd.DataFrame(test_data).set_index('timestamp')
|
|
start_time = time.perf_counter()
|
|
pandas_result = df.resample(timeframe, label='right', closed='right').agg({
|
|
'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum'
|
|
}).dropna()
|
|
pandas_time = time.perf_counter() - start_time
|
|
|
|
# Performance should be reasonable
|
|
self.assertLess(
|
|
our_time, 0.1, # Less than 100ms for any reasonable dataset
|
|
f"Aggregation too slow for {size} points, {timeframe}: {our_time:.3f}s"
|
|
)
|
|
|
|
performance_ratio = our_time / pandas_time if pandas_time > 0 else 1
|
|
|
|
print(f" {size} points, {timeframe}: {our_time*1000:.1f}ms "
|
|
f"(pandas: {pandas_time*1000:.1f}ms, ratio: {performance_ratio:.1f}x)")
|
|
|
|
|
|
def run_integration_tests():
|
|
"""Run all integration tests."""
|
|
print("🚀 Phase 3 Task 3.1: Strategy Timeframe Integration Tests")
|
|
print("=" * 70)
|
|
|
|
# Create test suite
|
|
suite = unittest.TestLoader().loadTestsFromTestCase(TestStrategyTimeframes)
|
|
|
|
# Run tests with detailed output
|
|
runner = unittest.TextTestRunner(verbosity=2, stream=sys.stdout)
|
|
result = runner.run(suite)
|
|
|
|
# Summary
|
|
print(f"\n🎯 Integration Test Results:")
|
|
print(f" Tests run: {result.testsRun}")
|
|
print(f" Failures: {len(result.failures)}")
|
|
print(f" Errors: {len(result.errors)}")
|
|
|
|
if result.failures:
|
|
print(f"\n❌ Failures:")
|
|
for test, traceback in result.failures:
|
|
print(f" - {test}: {traceback}")
|
|
|
|
if result.errors:
|
|
print(f"\n❌ Errors:")
|
|
for test, traceback in result.errors:
|
|
print(f" - {test}: {traceback}")
|
|
|
|
success = len(result.failures) == 0 and len(result.errors) == 0
|
|
|
|
if success:
|
|
print(f"\n✅ All integration tests PASSED!")
|
|
print(f"🔧 Verified:")
|
|
print(f" - No future data leakage")
|
|
print(f" - Correct signal timing")
|
|
print(f" - Multi-strategy compatibility")
|
|
print(f" - Bounded memory usage")
|
|
print(f" - Mathematical correctness")
|
|
print(f" - Performance benchmarks")
|
|
else:
|
|
print(f"\n❌ Some integration tests FAILED")
|
|
|
|
return success
|
|
|
|
|
|
if __name__ == "__main__":
|
|
success = run_integration_tests()
|
|
sys.exit(0 if success else 1) |