TCPDashboard/tests/strategies/test_batch_processing.py
Vasily.onl 8c23489ff0 4.0 - 4.0 Implement real-time strategy execution and data integration features
- Added `realtime_execution.py` for real-time strategy execution, enabling live signal generation and integration with the dashboard's chart refresh cycle.
- Introduced `data_integration.py` to manage market data orchestration, caching, and technical indicator calculations for strategy signal generation.
- Implemented `validation.py` for comprehensive validation and quality assessment of strategy-generated signals, ensuring reliability and consistency.
- Developed `batch_processing.py` to facilitate efficient backtesting of multiple strategies across large datasets with memory management and performance optimization.
- Updated `__init__.py` files to include new modules and ensure proper exports, enhancing modularity and maintainability.
- Enhanced unit tests for the new features, ensuring robust functionality and adherence to project standards.

These changes establish a solid foundation for real-time strategy execution and data integration, aligning with project goals for modularity, performance, and maintainability.
2025-06-12 18:29:39 +08:00

798 lines
34 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Tests for Strategy Batch Processing
This module tests batch processing capabilities for strategy backtesting
including memory management, parallel processing, and performance monitoring.
"""
import pytest
from unittest.mock import patch, MagicMock
from datetime import datetime, timezone
import pandas as pd
from strategies.batch_processing import BacktestingBatchProcessor, BatchProcessingConfig
from strategies.data_types import StrategyResult, StrategySignal, SignalType
class TestBatchProcessingConfig:
"""Tests for BatchProcessingConfig dataclass."""
def test_default_config(self):
"""Test default batch processing configuration."""
config = BatchProcessingConfig()
assert config.max_concurrent_strategies == 4
assert config.max_memory_usage_percent == 80.0
assert config.chunk_size_days == 30
assert config.enable_memory_monitoring is True
assert config.enable_result_validation is True
assert config.result_cache_size == 1000
assert config.progress_reporting_interval == 10
def test_custom_config(self):
"""Test custom batch processing configuration."""
config = BatchProcessingConfig(
max_concurrent_strategies=8,
max_memory_usage_percent=90.0,
chunk_size_days=60,
enable_memory_monitoring=False,
enable_result_validation=False,
result_cache_size=500,
progress_reporting_interval=5
)
assert config.max_concurrent_strategies == 8
assert config.max_memory_usage_percent == 90.0
assert config.chunk_size_days == 60
assert config.enable_memory_monitoring is False
assert config.enable_result_validation is False
assert config.result_cache_size == 500
assert config.progress_reporting_interval == 5
class TestBacktestingBatchProcessor:
"""Tests for BacktestingBatchProcessor class."""
@pytest.fixture
def processor(self):
"""Create batch processor with default configuration."""
config = BatchProcessingConfig(
enable_memory_monitoring=False, # Disable for testing
progress_reporting_interval=1, # Report every strategy for testing
enable_result_validation=False # Disable validation for basic tests
)
with patch('strategies.batch_processing.StrategyDataIntegrator'):
return BacktestingBatchProcessor(config)
@pytest.fixture
def sample_strategy_configs(self):
"""Create sample strategy configurations for testing."""
return [
{
'name': 'ema_crossover',
'type': 'trend_following',
'parameters': {'fast_ema': 12, 'slow_ema': 26}
},
{
'name': 'rsi_momentum',
'type': 'momentum',
'parameters': {'rsi_period': 14, 'oversold': 30, 'overbought': 70}
},
{
'name': 'macd_trend',
'type': 'trend_following',
'parameters': {'fast_ema': 12, 'slow_ema': 26, 'signal': 9}
}
]
@pytest.fixture
def sample_strategy_results(self):
"""Create sample strategy results for testing."""
return [
StrategyResult(
timestamp=datetime.now(timezone.utc),
symbol='BTC-USDT',
timeframe='1h',
strategy_name='test_strategy',
signals=[
StrategySignal(
timestamp=datetime.now(timezone.utc),
symbol='BTC-USDT',
timeframe='1h',
signal_type=SignalType.BUY,
price=50000.0,
confidence=0.8,
metadata={'rsi': 30}
)
],
indicators_used={'rsi': 30, 'ema': 49000},
metadata={'execution_time': 0.5}
)
]
def test_initialization(self, processor):
"""Test batch processor initialization."""
assert processor.config is not None
assert processor.logger is not None
assert processor.data_integrator is not None
assert processor._processing_stats['strategies_processed'] == 0
assert processor._processing_stats['total_signals_generated'] == 0
assert processor._processing_stats['errors_count'] == 0
def test_initialization_with_validation_disabled(self):
"""Test initialization with validation disabled."""
config = BatchProcessingConfig(enable_result_validation=False)
with patch('strategies.batch_processing.StrategyDataIntegrator'):
processor = BacktestingBatchProcessor(config)
assert processor.signal_validator is None
@patch('strategies.batch_processing.StrategyDataIntegrator')
def test_process_strategies_batch(self, mock_integrator_class, processor, sample_strategy_configs, sample_strategy_results):
"""Test batch processing of multiple strategies."""
# Setup mock data integrator
mock_integrator = MagicMock()
mock_integrator.calculate_strategy_signals_orchestrated.return_value = sample_strategy_results
processor.data_integrator = mock_integrator
symbols = ['BTC-USDT', 'ETH-USDT']
timeframe = '1h'
days_back = 30
results = processor.process_strategies_batch(
strategy_configs=sample_strategy_configs,
symbols=symbols,
timeframe=timeframe,
days_back=days_back
)
# Verify results structure
assert len(results) == len(sample_strategy_configs)
assert 'ema_crossover' in results
assert 'rsi_momentum' in results
assert 'macd_trend' in results
# Verify statistics
stats = processor.get_processing_statistics()
assert stats['strategies_processed'] == 3
assert stats['total_signals_generated'] == 6 # 3 strategies × 2 symbols × 1 signal each
assert stats['errors_count'] == 0
def test_process_single_strategy_batch(self, processor, sample_strategy_results):
"""Test processing a single strategy across multiple symbols."""
# Setup mock data integrator
mock_integrator = MagicMock()
mock_integrator.calculate_strategy_signals_orchestrated.return_value = sample_strategy_results
processor.data_integrator = mock_integrator
strategy_config = {'name': 'test_strategy', 'type': 'test'}
symbols = ['BTC-USDT', 'ETH-USDT']
results = processor._process_single_strategy_batch(
strategy_config, symbols, '1h', 30, 'okx'
)
assert len(results) == 2 # Results for 2 symbols
assert processor._processing_stats['total_signals_generated'] == 2
def test_validate_strategy_results(self, processor, sample_strategy_results):
"""Test strategy result validation."""
# Setup mock signal validator
mock_validator = MagicMock()
mock_validator.validate_signals_batch.return_value = (
sample_strategy_results[0].signals, # valid signals
[] # no invalid signals
)
processor.signal_validator = mock_validator
validated_results = processor._validate_strategy_results(sample_strategy_results)
assert len(validated_results) == 1
assert len(validated_results[0].signals) == 1
mock_validator.validate_signals_batch.assert_called_once()
@patch('strategies.batch_processing.psutil')
def test_check_memory_usage_normal(self, mock_psutil, processor):
"""Test memory usage monitoring under normal conditions."""
# Mock memory usage below threshold
mock_process = MagicMock()
mock_process.memory_percent.return_value = 60.0 # Below 80% threshold
mock_process.memory_info.return_value.rss = 500 * 1024 * 1024 # 500 MB
mock_psutil.Process.return_value = mock_process
processor._check_memory_usage()
assert processor._processing_stats['memory_peak_mb'] == 500.0
@patch('strategies.batch_processing.psutil')
def test_check_memory_usage_high(self, mock_psutil, processor):
"""Test memory usage monitoring with high usage."""
# Mock memory usage above threshold
mock_process = MagicMock()
mock_process.memory_percent.return_value = 85.0 # Above 80% threshold
mock_process.memory_info.return_value.rss = 1000 * 1024 * 1024 # 1000 MB
mock_psutil.Process.return_value = mock_process
with patch.object(processor, '_cleanup_memory') as mock_cleanup:
processor._check_memory_usage()
mock_cleanup.assert_called_once()
def test_cleanup_memory(self, processor):
"""Test memory cleanup operations."""
# Fill result cache beyond limit
for i in range(1500): # Above 1000 limit
processor._result_cache[f'key_{i}'] = f'result_{i}'
initial_cache_size = len(processor._result_cache)
with patch.object(processor.data_integrator, 'clear_cache') as mock_clear, \
patch('strategies.batch_processing.gc.collect') as mock_gc:
processor._cleanup_memory()
# Verify cache was reduced
assert len(processor._result_cache) < initial_cache_size
assert len(processor._result_cache) == 500 # Half of cache size limit
# Verify other cleanup operations
mock_clear.assert_called_once()
mock_gc.assert_called_once()
def test_get_processing_statistics(self, processor):
"""Test processing statistics calculation."""
# Set some test statistics
processor._processing_stats.update({
'strategies_processed': 5,
'total_signals_generated': 25,
'processing_time_seconds': 10.0,
'errors_count': 1,
'validation_failures': 2
})
stats = processor.get_processing_statistics()
assert stats['strategies_processed'] == 5
assert stats['total_signals_generated'] == 25
assert stats['average_signals_per_strategy'] == 5.0
assert stats['average_processing_time_per_strategy'] == 2.0
assert stats['error_rate'] == 20.0 # 1/5 * 100
assert stats['validation_failure_rate'] == 8.0 # 2/25 * 100
def test_get_processing_statistics_zero_division(self, processor):
"""Test statistics calculation with zero values."""
stats = processor.get_processing_statistics()
assert stats['average_signals_per_strategy'] == 0
assert stats['average_processing_time_per_strategy'] == 0
assert stats['error_rate'] == 0.0
assert stats['validation_failure_rate'] == 0.0
def test_process_strategies_batch_with_error(self, processor, sample_strategy_configs):
"""Test batch processing with errors."""
# Setup mock to raise an exception
mock_integrator = MagicMock()
mock_integrator.calculate_strategy_signals_orchestrated.side_effect = Exception("Test error")
processor.data_integrator = mock_integrator
results = processor.process_strategies_batch(
strategy_configs=sample_strategy_configs,
symbols=['BTC-USDT'],
timeframe='1h',
days_back=30
)
# Should handle errors gracefully
assert isinstance(results, dict)
assert processor._processing_stats['errors_count'] > 0
@patch('strategies.batch_processing.StrategyDataIntegrator')
def test_process_strategies_parallel(self, mock_integrator_class, processor, sample_strategy_configs, sample_strategy_results):
"""Test parallel processing of multiple strategies."""
# Setup mock data integrator
mock_integrator = MagicMock()
mock_integrator.calculate_strategy_signals_orchestrated.return_value = sample_strategy_results
processor.data_integrator = mock_integrator
symbols = ['BTC-USDT', 'ETH-USDT']
timeframe = '1h'
days_back = 30
results = processor.process_strategies_parallel(
strategy_configs=sample_strategy_configs,
symbols=symbols,
timeframe=timeframe,
days_back=days_back
)
# Verify results structure (same as sequential processing)
assert len(results) == len(sample_strategy_configs)
assert 'ema_crossover' in results
assert 'rsi_momentum' in results
assert 'macd_trend' in results
# Verify statistics
stats = processor.get_processing_statistics()
assert stats['strategies_processed'] == 3
assert stats['total_signals_generated'] == 6 # 3 strategies × 2 symbols × 1 signal each
assert stats['errors_count'] == 0
def test_process_symbols_parallel(self, processor, sample_strategy_results):
"""Test parallel processing of single strategy across multiple symbols."""
# Setup mock data integrator
mock_integrator = MagicMock()
mock_integrator.calculate_strategy_signals_orchestrated.return_value = sample_strategy_results
processor.data_integrator = mock_integrator
strategy_config = {'name': 'test_strategy', 'type': 'test'}
symbols = ['BTC-USDT', 'ETH-USDT', 'BNB-USDT']
results = processor.process_symbols_parallel(
strategy_config=strategy_config,
symbols=symbols,
timeframe='1h',
days_back=30
)
# Should have results for all symbols
assert len(results) == 3 # Results for 3 symbols
assert processor._processing_stats['total_signals_generated'] == 3
def test_process_strategy_for_symbol(self, processor, sample_strategy_results):
"""Test processing a single strategy for a single symbol."""
# Setup mock data integrator
mock_integrator = MagicMock()
mock_integrator.calculate_strategy_signals_orchestrated.return_value = sample_strategy_results
processor.data_integrator = mock_integrator
strategy_config = {'name': 'test_strategy', 'type': 'test'}
results = processor._process_strategy_for_symbol(
strategy_config=strategy_config,
symbol='BTC-USDT',
timeframe='1h',
days_back=30,
exchange='okx'
)
assert len(results) == 1
assert results[0].strategy_name == 'test_strategy'
assert results[0].symbol == 'BTC-USDT'
def test_process_strategy_for_symbol_with_error(self, processor):
"""Test symbol processing with error handling."""
# Setup mock to raise an exception
mock_integrator = MagicMock()
mock_integrator.calculate_strategy_signals_orchestrated.side_effect = Exception("Test error")
processor.data_integrator = mock_integrator
strategy_config = {'name': 'test_strategy', 'type': 'test'}
results = processor._process_strategy_for_symbol(
strategy_config=strategy_config,
symbol='BTC-USDT',
timeframe='1h',
days_back=30,
exchange='okx'
)
# Should return empty list on error
assert results == []
def test_process_large_dataset_streaming(self, processor, sample_strategy_configs, sample_strategy_results):
"""Test streaming processing for large datasets."""
# Setup mock data integrator
mock_integrator = MagicMock()
mock_integrator.calculate_strategy_signals_orchestrated.return_value = sample_strategy_results
processor.data_integrator = mock_integrator
# Mock the parallel processing method to avoid actual parallel execution
with patch.object(processor, 'process_strategies_parallel') as mock_parallel:
mock_parallel.return_value = {
'test_strategy': sample_strategy_results
}
# Test streaming with 90 days split into 30-day chunks
stream = processor.process_large_dataset_streaming(
strategy_configs=sample_strategy_configs,
symbols=['BTC-USDT'],
timeframe='1h',
total_days_back=90 # Should create 3 chunks
)
# Collect all chunks
chunks = list(stream)
assert len(chunks) == 3 # 90 days / 30 days per chunk
# Each chunk should have results for all strategies
for chunk in chunks:
assert 'test_strategy' in chunk
def test_aggregate_streaming_results(self, processor, sample_strategy_results):
"""Test aggregation of streaming results."""
# Create mock streaming results
chunk1 = {'strategy1': sample_strategy_results[:1], 'strategy2': []}
chunk2 = {'strategy1': [], 'strategy2': sample_strategy_results[:1]}
chunk3 = {'strategy1': sample_strategy_results[:1], 'strategy2': sample_strategy_results[:1]}
stream = iter([chunk1, chunk2, chunk3])
aggregated = processor.aggregate_streaming_results(stream)
assert len(aggregated) == 2
assert 'strategy1' in aggregated
assert 'strategy2' in aggregated
assert len(aggregated['strategy1']) == 2 # From chunk1 and chunk3
assert len(aggregated['strategy2']) == 2 # From chunk2 and chunk3
@patch('strategies.batch_processing.psutil')
def test_process_with_memory_constraints_sufficient_memory(self, mock_psutil, processor, sample_strategy_configs):
"""Test memory-constrained processing with sufficient memory."""
# Mock low memory usage
mock_process = MagicMock()
mock_process.memory_info.return_value.rss = 100 * 1024 * 1024 # 100 MB
mock_psutil.Process.return_value = mock_process
with patch.object(processor, 'process_strategies_parallel') as mock_parallel:
mock_parallel.return_value = {}
processor.process_with_memory_constraints(
strategy_configs=sample_strategy_configs,
symbols=['BTC-USDT'],
timeframe='1h',
days_back=30,
max_memory_mb=1000.0 # High limit
)
# Should use parallel processing for sufficient memory
mock_parallel.assert_called_once()
@patch('strategies.batch_processing.psutil')
def test_process_with_memory_constraints_moderate_constraint(self, mock_psutil, processor, sample_strategy_configs):
"""Test memory-constrained processing with moderate constraint."""
# Mock moderate memory usage
mock_process = MagicMock()
mock_process.memory_info.return_value.rss = 400 * 1024 * 1024 # 400 MB
mock_psutil.Process.return_value = mock_process
with patch.object(processor, 'process_strategies_batch') as mock_batch:
mock_batch.return_value = {}
processor.process_with_memory_constraints(
strategy_configs=sample_strategy_configs,
symbols=['BTC-USDT'],
timeframe='1h',
days_back=30,
max_memory_mb=500.0 # Moderate limit
)
# Should use sequential batch processing
mock_batch.assert_called_once()
@patch('strategies.batch_processing.psutil')
def test_process_with_memory_constraints_severe_constraint(self, mock_psutil, processor, sample_strategy_configs):
"""Test memory-constrained processing with severe constraint."""
# Mock high memory usage
mock_process = MagicMock()
mock_process.memory_info.return_value.rss = 450 * 1024 * 1024 # 450 MB
mock_psutil.Process.return_value = mock_process
with patch.object(processor, 'process_large_dataset_streaming_with_warmup') as mock_streaming, \
patch.object(processor, 'aggregate_streaming_results') as mock_aggregate:
mock_streaming.return_value = iter([{}])
mock_aggregate.return_value = {}
processor.process_with_memory_constraints(
strategy_configs=sample_strategy_configs,
symbols=['BTC-USDT'],
timeframe='1h',
days_back=30,
max_memory_mb=500.0 # Low limit with high current usage
)
# Should use streaming processing with warm-up
mock_streaming.assert_called_once()
mock_aggregate.assert_called_once()
def test_get_performance_metrics(self, processor):
"""Test comprehensive performance metrics calculation."""
# Set some test statistics
processor._processing_stats.update({
'strategies_processed': 5,
'total_signals_generated': 25,
'processing_time_seconds': 10.0,
'memory_peak_mb': 500.0,
'errors_count': 1,
'validation_failures': 2
})
with patch.object(processor.data_integrator, 'get_cache_stats') as mock_cache_stats:
mock_cache_stats.return_value = {'cache_hits': 80, 'cache_misses': 20}
metrics = processor.get_performance_metrics()
assert 'cache_hit_rate' in metrics
assert 'memory_efficiency' in metrics
assert 'throughput_signals_per_second' in metrics
assert 'parallel_efficiency' in metrics
assert 'optimization_recommendations' in metrics
assert metrics['cache_hit_rate'] == 80.0 # 80/(80+20) * 100
assert metrics['throughput_signals_per_second'] == 2.5 # 25/10
def test_calculate_cache_hit_rate(self, processor):
"""Test cache hit rate calculation."""
with patch.object(processor.data_integrator, 'get_cache_stats') as mock_cache_stats:
mock_cache_stats.return_value = {'cache_hits': 70, 'cache_misses': 30}
hit_rate = processor._calculate_cache_hit_rate()
assert hit_rate == 70.0 # 70/(70+30) * 100
def test_calculate_memory_efficiency(self, processor):
"""Test memory efficiency calculation."""
processor._processing_stats.update({
'memory_peak_mb': 200.0,
'strategies_processed': 2
})
efficiency = processor._calculate_memory_efficiency()
# 200MB / 2 strategies = 100MB per strategy
# Baseline is 100MB, so efficiency should be 50%
assert efficiency == 50.0
def test_generate_optimization_recommendations(self, processor):
"""Test optimization recommendations generation."""
# Set up poor performance metrics
processor._processing_stats.update({
'strategies_processed': 1,
'total_signals_generated': 1,
'processing_time_seconds': 10.0,
'memory_peak_mb': 1000.0, # High memory usage
'errors_count': 2, # High error rate
'validation_failures': 0
})
with patch.object(processor.data_integrator, 'get_cache_stats') as mock_cache_stats:
mock_cache_stats.return_value = {'cache_hits': 1, 'cache_misses': 9} # Low cache hit rate
recommendations = processor._generate_optimization_recommendations()
assert isinstance(recommendations, list)
assert len(recommendations) > 0
# Should recommend memory efficiency improvement
assert any('memory efficiency' in rec.lower() for rec in recommendations)
def test_optimize_configuration(self, processor):
"""Test automatic configuration optimization."""
# Set up metrics that indicate poor memory efficiency
processor._processing_stats.update({
'strategies_processed': 4,
'total_signals_generated': 20,
'processing_time_seconds': 8.0,
'memory_peak_mb': 2000.0, # Very high memory usage
'errors_count': 0,
'validation_failures': 0
})
with patch.object(processor.data_integrator, 'get_cache_stats') as mock_cache_stats:
mock_cache_stats.return_value = {'cache_hits': 10, 'cache_misses': 90}
original_workers = processor.config.max_concurrent_strategies
original_chunk_size = processor.config.chunk_size_days
optimized_config = processor.optimize_configuration()
# Should reduce workers and chunk size due to poor memory efficiency
assert optimized_config.max_concurrent_strategies <= original_workers
assert optimized_config.chunk_size_days <= original_chunk_size
def test_benchmark_processing_methods(self, processor, sample_strategy_configs):
"""Test processing method benchmarking."""
with patch.object(processor, 'process_strategies_batch') as mock_batch, \
patch.object(processor, 'process_strategies_parallel') as mock_parallel:
# Mock batch processing results
mock_batch.return_value = {'strategy1': []}
# Mock parallel processing results
mock_parallel.return_value = {'strategy1': []}
benchmark_results = processor.benchmark_processing_methods(
strategy_configs=sample_strategy_configs,
symbols=['BTC-USDT'],
timeframe='1h',
days_back=7
)
assert 'sequential' in benchmark_results
assert 'parallel' in benchmark_results
assert 'recommendation' in benchmark_results
# Verify both methods were called
mock_batch.assert_called_once()
mock_parallel.assert_called_once()
def test_reset_stats(self, processor):
"""Test statistics reset functionality."""
# Set some statistics
processor._processing_stats.update({
'strategies_processed': 5,
'total_signals_generated': 25,
'processing_time_seconds': 10.0
})
processor._result_cache['test'] = 'data'
processor._reset_stats()
# Verify all stats are reset
assert processor._processing_stats['strategies_processed'] == 0
assert processor._processing_stats['total_signals_generated'] == 0
assert processor._processing_stats['processing_time_seconds'] == 0.0
assert len(processor._result_cache) == 0
def test_calculate_warmup_period_ema_strategy(self, processor):
"""Test warm-up period calculation for EMA strategy."""
strategy_configs = [
{
'name': 'ema_crossover',
'fast_period': 12,
'slow_period': 26
}
]
warmup = processor._calculate_warmup_period(strategy_configs)
# Should be max(12, 26) + 10 safety buffer = 36
assert warmup == 36
def test_calculate_warmup_period_macd_strategy(self, processor):
"""Test warm-up period calculation for MACD strategy."""
strategy_configs = [
{
'name': 'macd_trend',
'slow_period': 26,
'signal_period': 9
}
]
warmup = processor._calculate_warmup_period(strategy_configs)
# Should be max(26, 9) + 10 MACD buffer + 10 safety buffer = 46
assert warmup == 46
def test_calculate_warmup_period_rsi_strategy(self, processor):
"""Test warm-up period calculation for RSI strategy."""
strategy_configs = [
{
'name': 'rsi_momentum',
'period': 14
}
]
warmup = processor._calculate_warmup_period(strategy_configs)
# Should be 14 + 5 RSI buffer + 10 safety buffer = 29
assert warmup == 29
def test_calculate_warmup_period_multiple_strategies(self, processor):
"""Test warm-up period calculation with multiple strategies."""
strategy_configs = [
{'name': 'ema_crossover', 'slow_period': 26},
{'name': 'rsi_momentum', 'period': 14},
{'name': 'macd_trend', 'slow_period': 26, 'signal_period': 9}
]
warmup = processor._calculate_warmup_period(strategy_configs)
# Should be max of all strategies: 46 (from MACD)
assert warmup == 46
def test_calculate_warmup_period_unknown_strategy(self, processor):
"""Test warm-up period calculation for unknown strategy type."""
strategy_configs = [
{
'name': 'custom_strategy',
'some_param': 100
}
]
warmup = processor._calculate_warmup_period(strategy_configs)
# Should be 30 default + 10 safety buffer = 40
assert warmup == 40
def test_process_large_dataset_streaming_with_warmup(self, processor, sample_strategy_configs, sample_strategy_results):
"""Test streaming processing with warm-up period handling."""
# Mock the warm-up calculation
with patch.object(processor, '_calculate_warmup_period') as mock_warmup:
mock_warmup.return_value = 10 # 10 days warm-up
# Mock the parallel processing method
with patch.object(processor, 'process_strategies_parallel') as mock_parallel:
mock_parallel.return_value = {
'test_strategy': sample_strategy_results
}
# Mock the trimming method
with patch.object(processor, '_trim_warmup_from_results') as mock_trim:
mock_trim.return_value = {'test_strategy': sample_strategy_results}
# Test streaming with 60 days split into 30-day chunks
stream = processor.process_large_dataset_streaming_with_warmup(
strategy_configs=sample_strategy_configs,
symbols=['BTC-USDT'],
timeframe='1h',
total_days_back=60 # Should create 2 chunks
)
# Collect all chunks
chunks = list(stream)
assert len(chunks) == 2 # 60 days / 30 days per chunk
# Verify parallel processing was called with correct parameters
assert mock_parallel.call_count == 2
# First chunk should not have warm-up, second should
first_call_args = mock_parallel.call_args_list[0]
second_call_args = mock_parallel.call_args_list[1]
# First chunk: 30 days (no warm-up)
assert first_call_args[1]['days_back'] == 30
# Second chunk: 30 + 10 warm-up = 40 days
assert second_call_args[1]['days_back'] == 40
# Trimming should only be called for second chunk
assert mock_trim.call_count == 1
def test_trim_warmup_from_results(self, processor, sample_strategy_results):
"""Test trimming warm-up period from results."""
# Create test results with multiple signals
extended_results = sample_strategy_results * 10 # 10 results total
chunk_results = {
'strategy1': extended_results,
'strategy2': sample_strategy_results * 5 # 5 results
}
trimmed = processor._trim_warmup_from_results(
chunk_results=chunk_results,
warmup_days=10,
target_start_days=30,
target_end_days=60
)
# Verify trimming occurred
assert len(trimmed['strategy1']) <= len(extended_results)
assert len(trimmed['strategy2']) <= len(sample_strategy_results * 5)
# Results should be sorted by timestamp
for strategy_name, results in trimmed.items():
if len(results) > 1:
timestamps = [r.timestamp for r in results]
assert timestamps == sorted(timestamps)
def test_streaming_with_warmup_chunk_size_adjustment(self, processor, sample_strategy_configs):
"""Test automatic chunk size adjustment when too small for warm-up."""
# Set up small chunk size relative to warm-up
processor.config.chunk_size_days = 15 # Small chunk size
with patch.object(processor, '_calculate_warmup_period') as mock_warmup:
mock_warmup.return_value = 30 # Large warm-up period
with patch.object(processor, 'process_strategies_parallel') as mock_parallel:
mock_parallel.return_value = {}
# This should trigger chunk size adjustment
stream = processor.process_large_dataset_streaming_with_warmup(
strategy_configs=sample_strategy_configs,
symbols=['BTC-USDT'],
timeframe='1h',
total_days_back=90
)
# Consume the stream to trigger processing
list(stream)
# Verify warning was logged about chunk size adjustment
# (In a real implementation, you might want to capture log messages)