TCPDashboard/tests/test_data_collection_aggregation.py
Vasily.onl 2890ba2efa Implement Service Configuration Manager for data collection service
- Introduced `service_config.py` to manage configuration loading, validation, and schema management, enhancing modularity and security.
- Created a `ServiceConfig` class for handling configuration with robust error handling and default values.
- Refactored `DataCollectionService` to utilize the new `ServiceConfig`, streamlining configuration management and improving readability.
- Added a `CollectorFactory` to encapsulate collector creation logic, promoting separation of concerns.
- Updated `CollectorManager` and related components to align with the new architecture, ensuring better maintainability.
- Enhanced logging practices across the service for improved monitoring and debugging.

These changes significantly improve the architecture and maintainability of the data collection service, aligning with project standards for modularity and performance.
2025-06-10 12:55:27 +08:00

795 lines
29 KiB
Python

#!/usr/bin/env python3
"""
Comprehensive Unit Tests for Data Collection and Aggregation Logic
This module provides comprehensive unit tests for the data collection and aggregation
functionality, covering:
- OKX data collection and processing
- Real-time candle aggregation
- Data validation and transformation
- Error handling and edge cases
- Performance and reliability testing
This completes task 2.9 of phase 2.
"""
import pytest
import asyncio
import json
from datetime import datetime, timezone, timedelta
from decimal import Decimal
from typing import Dict, List, Any, Optional
from unittest.mock import Mock, AsyncMock, patch
from collections import defaultdict
# Import modules under test
from data.base_collector import BaseDataCollector, DataType, MarketDataPoint, CollectorStatus
from data.collector_manager import CollectorManager
from data.collector_types import CollectorConfig
from data.collection_service import DataCollectionService
from data.exchanges.okx.collector import OKXCollector
from data.exchanges.okx.data_processor import OKXDataProcessor, OKXDataValidator, OKXDataTransformer
from data.exchanges.okx.websocket import OKXWebSocketClient, OKXSubscription, OKXChannelType
from data.common.data_types import (
StandardizedTrade, OHLCVCandle, CandleProcessingConfig,
DataValidationResult
)
from data.common.aggregation.realtime import RealTimeCandleProcessor
from data.common.validation import BaseDataValidator, ValidationResult
from data.common.transformation import BaseDataTransformer
from utils.logger import get_logger
@pytest.fixture
def logger():
"""Create test logger."""
return get_logger("test_data_collection", log_level="DEBUG")
@pytest.fixture
def sample_trade_data():
"""Sample OKX trade data for testing."""
return {
"instId": "BTC-USDT",
"tradeId": "123456789",
"px": "50000.50",
"sz": "0.1",
"side": "buy",
"ts": "1640995200000" # 2022-01-01 00:00:00 UTC
}
@pytest.fixture
def sample_orderbook_data():
"""Sample OKX orderbook data for testing."""
return {
"instId": "BTC-USDT",
"asks": [["50001.00", "0.5", "0", "2"]],
"bids": [["49999.00", "0.3", "0", "1"]],
"ts": "1640995200000",
"seqId": "12345"
}
@pytest.fixture
def sample_ticker_data():
"""Sample OKX ticker data for testing."""
return {
"instId": "BTC-USDT",
"last": "50000.50",
"lastSz": "0.1",
"askPx": "50001.00",
"askSz": "0.5",
"bidPx": "49999.00",
"bidSz": "0.3",
"open24h": "49500.00",
"high24h": "50500.00",
"low24h": "49000.00",
"vol24h": "1000.5",
"volCcy24h": "50000000.00",
"ts": "1640995200000"
}
@pytest.fixture
def candle_config():
"""Sample candle processing configuration."""
return CandleProcessingConfig(
timeframes=['1s', '5s', '1m', '5m'],
auto_save_candles=False,
emit_incomplete_candles=False
)
class TestDataCollectionAndAggregation:
"""Comprehensive test suite for data collection and aggregation logic."""
def test_basic_imports(self):
"""Test that all required modules can be imported."""
# This test ensures all imports are working
assert StandardizedTrade is not None
assert OHLCVCandle is not None
assert CandleProcessingConfig is not None
assert DataValidationResult is not None
assert RealTimeCandleProcessor is not None
assert BaseDataValidator is not None
assert ValidationResult is not None
class TestOKXDataValidation:
"""Test OKX-specific data validation."""
@pytest.fixture
def validator(self, logger):
"""Create OKX data validator."""
return OKXDataValidator("test_validator", logger)
def test_symbol_format_validation(self, validator):
"""Test OKX symbol format validation."""
# Valid symbols
valid_symbols = ["BTC-USDT", "ETH-USDC", "SOL-USD", "DOGE-USDT"]
for symbol in valid_symbols:
result = validator.validate_symbol_format(symbol)
assert result.is_valid, f"Symbol {symbol} should be valid"
assert len(result.errors) == 0
# Invalid symbols
invalid_symbols = ["BTCUSDT", "BTC/USDT", "btc-usdt", "BTC-", "-USDT", ""]
for symbol in invalid_symbols:
result = validator.validate_symbol_format(symbol)
assert not result.is_valid, f"Symbol {symbol} should be invalid"
assert len(result.errors) > 0
def test_trade_data_validation(self, validator, sample_trade_data):
"""Test trade data validation."""
# Valid trade data
result = validator.validate_trade_data(sample_trade_data)
assert result.is_valid
assert len(result.errors) == 0
assert result.sanitized_data is not None
# Missing required field
incomplete_data = sample_trade_data.copy()
del incomplete_data['px']
result = validator.validate_trade_data(incomplete_data)
assert not result.is_valid
assert any("Missing required trade field: px" in error for error in result.errors)
# Invalid price
invalid_price_data = sample_trade_data.copy()
invalid_price_data['px'] = "invalid_price"
result = validator.validate_trade_data(invalid_price_data)
assert not result.is_valid
assert any("price" in error.lower() for error in result.errors)
def test_orderbook_data_validation(self, validator, sample_orderbook_data):
"""Test orderbook data validation."""
# Valid orderbook data
result = validator.validate_orderbook_data(sample_orderbook_data)
assert result.is_valid
assert len(result.errors) == 0
# Missing asks/bids
incomplete_data = sample_orderbook_data.copy()
del incomplete_data['asks']
result = validator.validate_orderbook_data(incomplete_data)
assert not result.is_valid
assert any("asks" in error.lower() for error in result.errors)
def test_ticker_data_validation(self, validator, sample_ticker_data):
"""Test ticker data validation."""
# Valid ticker data
result = validator.validate_ticker_data(sample_ticker_data)
assert result.is_valid
assert len(result.errors) == 0
# Missing required field
incomplete_data = sample_ticker_data.copy()
del incomplete_data['last']
result = validator.validate_ticker_data(incomplete_data)
assert not result.is_valid
assert any("last" in error.lower() for error in result.errors)
class TestOKXDataTransformation:
"""Test OKX-specific data transformation."""
@pytest.fixture
def transformer(self, logger):
"""Create OKX data transformer."""
return OKXDataTransformer("test_transformer", logger)
def test_trade_data_transformation(self, transformer, sample_trade_data):
"""Test trade data transformation to StandardizedTrade."""
result = transformer.transform_trade_data(sample_trade_data, "BTC-USDT")
assert result is not None
assert isinstance(result, StandardizedTrade)
assert result.symbol == "BTC-USDT"
assert result.trade_id == "123456789"
assert result.price == Decimal("50000.50")
assert result.size == Decimal("0.1")
assert result.side == "buy"
assert result.exchange == "okx"
assert result.timestamp.year == 2022
def test_orderbook_data_transformation(self, transformer, sample_orderbook_data):
"""Test orderbook data transformation."""
result = transformer.transform_orderbook_data(sample_orderbook_data, "BTC-USDT")
assert result is not None
assert result['symbol'] == "BTC-USDT"
assert result['exchange'] == "okx"
assert 'asks' in result
assert 'bids' in result
assert len(result['asks']) > 0
assert len(result['bids']) > 0
def test_ticker_data_transformation(self, transformer, sample_ticker_data):
"""Test ticker data transformation."""
result = transformer.transform_ticker_data(sample_ticker_data, "BTC-USDT")
assert result is not None
assert result['symbol'] == "BTC-USDT"
assert result['exchange'] == "okx"
assert result['last'] == Decimal("50000.50")
assert result['bid'] == Decimal("49999.00")
assert result['ask'] == Decimal("50001.00")
class TestRealTimeCandleAggregation:
"""Test real-time candle aggregation logic."""
@pytest.fixture
def processor(self, candle_config, logger):
"""Create real-time candle processor."""
return RealTimeCandleProcessor(
symbol="BTC-USDT",
exchange="okx",
config=candle_config,
component_name="test_processor",
logger=logger
)
def test_single_trade_processing(self, processor):
"""Test processing a single trade."""
trade = StandardizedTrade(
symbol="BTC-USDT",
trade_id="123",
price=Decimal("50000"),
size=Decimal("0.1"),
side="buy",
timestamp=datetime(2022, 1, 1, 12, 0, 0, tzinfo=timezone.utc),
exchange="okx"
)
completed_candles = processor.process_trade(trade)
# First trade shouldn't complete any candles
assert len(completed_candles) == 0
# Check that candles are being built
stats = processor.get_stats()
assert stats['trades_processed'] == 1
assert 'active_timeframes' in stats
assert len(stats['active_timeframes']) > 0 # Should have active timeframes
def test_candle_completion_timing(self, processor):
"""Test that candles complete at the correct time boundaries."""
base_time = datetime(2022, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
completed_candles = []
def candle_callback(candle):
completed_candles.append(candle)
processor.add_candle_callback(candle_callback)
# Add trades at different seconds to trigger candle completions
for i in range(6): # 6 seconds of trades
trade = StandardizedTrade(
symbol="BTC-USDT",
trade_id=str(i),
price=Decimal("50000") + Decimal(str(i)),
size=Decimal("0.1"),
side="buy",
timestamp=base_time + timedelta(seconds=i),
exchange="okx"
)
processor.process_trade(trade)
# Should have completed some 1s and 5s candles
assert len(completed_candles) > 0
# Check candle properties
for candle in completed_candles:
assert candle.symbol == "BTC-USDT"
assert candle.exchange == "okx"
assert candle.timeframe in ['1s', '5s']
assert candle.trade_count > 0
assert candle.volume > 0
def test_ohlcv_calculation_accuracy(self, processor):
"""Test OHLCV calculation accuracy."""
base_time = datetime(2022, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
completed_candles = []
def candle_callback(candle):
completed_candles.append(candle)
processor.add_candle_callback(candle_callback)
# Add trades with known prices to test OHLCV calculation
prices = [Decimal("50000"), Decimal("50100"), Decimal("49900"), Decimal("50050")]
sizes = [Decimal("0.1"), Decimal("0.2"), Decimal("0.15"), Decimal("0.05")]
for i, (price, size) in enumerate(zip(prices, sizes)):
trade = StandardizedTrade(
symbol="BTC-USDT",
trade_id=str(i),
price=price,
size=size,
side="buy",
timestamp=base_time + timedelta(milliseconds=i * 100),
exchange="okx"
)
processor.process_trade(trade)
# Force completion by adding trade in next second
trade = StandardizedTrade(
symbol="BTC-USDT",
trade_id="final",
price=Decimal("50000"),
size=Decimal("0.1"),
side="buy",
timestamp=base_time + timedelta(seconds=1),
exchange="okx"
)
processor.process_trade(trade)
# Find 1s candle
candle_1s = next((c for c in completed_candles if c.timeframe == '1s'), None)
assert candle_1s is not None
# Verify OHLCV values
assert candle_1s.open == Decimal("50000") # First trade price
assert candle_1s.high == Decimal("50100") # Highest price
assert candle_1s.low == Decimal("49900") # Lowest price
assert candle_1s.close == Decimal("50050") # Last trade price
assert candle_1s.volume == sum(sizes) # Total volume
assert candle_1s.trade_count == 4 # Number of trades
def test_multiple_timeframe_aggregation(self, processor):
"""Test aggregation across multiple timeframes."""
base_time = datetime(2022, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
completed_candles = []
def candle_callback(candle):
completed_candles.append(candle)
processor.add_candle_callback(candle_callback)
# Add trades over 6 seconds to trigger multiple timeframe completions
for second in range(6):
for ms in range(0, 1000, 100): # 10 trades per second
trade = StandardizedTrade(
symbol="BTC-USDT",
trade_id=f"{second}_{ms}",
price=Decimal("50000") + Decimal(str(second)),
size=Decimal("0.01"),
side="buy",
timestamp=base_time + timedelta(seconds=second, milliseconds=ms),
exchange="okx"
)
processor.process_trade(trade)
# Check that we have candles for different timeframes
timeframes_found = set(c.timeframe for c in completed_candles)
assert '1s' in timeframes_found
assert '5s' in timeframes_found
# Verify candle relationships (5s candle should aggregate 5 1s candles)
candles_1s = [c for c in completed_candles if c.timeframe == '1s']
candles_5s = [c for c in completed_candles if c.timeframe == '5s']
if candles_5s:
# Check that 5s candle volume is sum of constituent 1s candles
candle_5s = candles_5s[0]
related_1s_candles = [
c for c in candles_1s
if c.start_time >= candle_5s.start_time and c.end_time <= candle_5s.end_time
]
if related_1s_candles:
expected_volume = sum(c.volume for c in related_1s_candles)
expected_trades = sum(c.trade_count for c in related_1s_candles)
assert candle_5s.volume >= expected_volume # May include partial data
assert candle_5s.trade_count >= expected_trades
class TestOKXDataProcessor:
"""Test OKX data processor integration."""
@pytest.fixture
def processor(self, candle_config, logger):
"""Create OKX data processor."""
return OKXDataProcessor(
symbol="BTC-USDT",
config=candle_config,
component_name="test_okx_processor",
logger=logger
)
def test_websocket_message_processing(self, processor, sample_trade_data):
"""Test WebSocket message processing."""
# Create a valid OKX WebSocket message
message = {
"arg": {
"channel": "trades",
"instId": "BTC-USDT"
},
"data": [sample_trade_data]
}
success, data_points, errors = processor.validate_and_process_message(message, "BTC-USDT")
assert success
assert len(data_points) == 1
assert len(errors) == 0
assert data_points[0].data_type == DataType.TRADE
assert data_points[0].symbol == "BTC-USDT"
def test_invalid_message_handling(self, processor):
"""Test handling of invalid messages."""
# Invalid message structure
invalid_message = {"invalid": "message"}
success, data_points, errors = processor.validate_and_process_message(invalid_message)
assert not success
assert len(data_points) == 0
assert len(errors) > 0
def test_trade_callback_execution(self, processor, sample_trade_data):
"""Test that trade callbacks are executed."""
callback_called = False
received_trade = None
def trade_callback(trade):
nonlocal callback_called, received_trade
callback_called = True
received_trade = trade
processor.add_trade_callback(trade_callback)
# Process trade message
message = {
"arg": {"channel": "trades", "instId": "BTC-USDT"},
"data": [sample_trade_data]
}
processor.validate_and_process_message(message, "BTC-USDT")
assert callback_called
assert received_trade is not None
assert isinstance(received_trade, StandardizedTrade)
def test_candle_callback_execution(self, processor, sample_trade_data):
"""Test that candle callbacks are executed when candles complete."""
callback_called = False
received_candle = None
def candle_callback(candle):
nonlocal callback_called, received_candle
callback_called = True
received_candle = candle
processor.add_candle_callback(candle_callback)
# Process multiple trades to complete a candle
base_time = int(datetime(2022, 1, 1, 12, 0, 0, tzinfo=timezone.utc).timestamp() * 1000)
for i in range(2): # Two trades in different seconds
trade_data = sample_trade_data.copy()
trade_data['ts'] = str(base_time + i * 1000) # 1 second apart
trade_data['tradeId'] = str(i)
message = {
"arg": {"channel": "trades", "instId": "BTC-USDT"},
"data": [trade_data]
}
processor.validate_and_process_message(message, "BTC-USDT")
# May need to wait for candle completion
if callback_called:
assert received_candle is not None
assert isinstance(received_candle, OHLCVCandle)
class TestDataCollectionService:
"""Test the data collection service integration."""
@pytest.fixture
def service_config(self):
"""Create service configuration."""
return {
'exchanges': {
'okx': {
'enabled': True,
'symbols': ['BTC-USDT'],
'data_types': ['trade', 'ticker'],
'store_raw_data': False
}
},
'candle_config': {
'timeframes': ['1s', '1m'],
'auto_save_candles': False
}
}
@pytest.mark.asyncio
async def test_service_initialization(self, service_config, logger):
"""Test data collection service initialization."""
# Create a temporary config file for testing
import tempfile
import json
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
# Convert our test config to match expected format
test_config = {
"exchange": "okx",
"connection": {
"public_ws_url": "wss://ws.okx.com:8443/ws/v5/public",
"ping_interval": 25.0,
"pong_timeout": 10.0,
"max_reconnect_attempts": 5,
"reconnect_delay": 5.0
},
"data_collection": {
"store_raw_data": False,
"health_check_interval": 120.0,
"auto_restart": True,
"buffer_size": 1000
},
"trading_pairs": [
{
"symbol": "BTC-USDT",
"enabled": True,
"data_types": ["trade", "ticker"],
"timeframes": ["1s", "1m"],
"channels": {
"trades": "trades",
"ticker": "tickers"
}
}
]
}
json.dump(test_config, f)
config_path = f.name
try:
service = DataCollectionService(config_path=config_path)
assert service.config_path == config_path
assert not service.running
# Check that the service loaded configuration
assert service.config is not None
assert 'exchange' in service.config
finally:
# Clean up temporary file
import os
os.unlink(config_path)
@pytest.mark.asyncio
async def test_service_lifecycle(self, service_config, logger):
"""Test service start/stop lifecycle."""
# Create a temporary config file for testing
import tempfile
import json
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
# Convert our test config to match expected format
test_config = {
"exchange": "okx",
"connection": {
"public_ws_url": "wss://ws.okx.com:8443/ws/v5/public",
"ping_interval": 25.0,
"pong_timeout": 10.0,
"max_reconnect_attempts": 5,
"reconnect_delay": 5.0
},
"data_collection": {
"store_raw_data": False,
"health_check_interval": 120.0,
"auto_restart": True,
"buffer_size": 1000
},
"trading_pairs": [
{
"symbol": "BTC-USDT",
"enabled": True,
"data_types": ["trade", "ticker"],
"timeframes": ["1s", "1m"],
"channels": {
"trades": "trades",
"ticker": "tickers"
}
}
]
}
json.dump(test_config, f)
config_path = f.name
try:
service = DataCollectionService(config_path=config_path)
# Test initialization without actually starting collectors
# (to avoid network dependencies in unit tests)
assert not service.running
# Test status retrieval
status = service.get_status()
assert 'running' in status
assert 'collectors_total' in status
finally:
# Clean up temporary file
import os
os.unlink(config_path)
class TestErrorHandlingAndEdgeCases:
"""Test error handling and edge cases in data collection."""
def test_malformed_trade_data(self, logger):
"""Test handling of malformed trade data."""
validator = OKXDataValidator("test", logger)
malformed_data = {
"instId": "BTC-USDT",
"px": None, # Null price
"sz": "invalid_size",
"side": "invalid_side",
"ts": "not_a_timestamp"
}
result = validator.validate_trade_data(malformed_data)
assert not result.is_valid
assert len(result.errors) > 0
def test_empty_aggregation_data(self, candle_config, logger):
"""Test aggregation with no trade data."""
processor = RealTimeCandleProcessor(
symbol="BTC-USDT",
exchange="okx",
config=candle_config,
logger=logger
)
stats = processor.get_stats()
assert stats['trades_processed'] == 0
assert 'active_timeframes' in stats
assert isinstance(stats['active_timeframes'], list) # Should be a list, even if empty
assert stats['candles_emitted'] == 0
assert stats['errors_count'] == 0
def test_out_of_order_trades(self, candle_config, logger):
"""Test handling of out-of-order trade timestamps."""
processor = RealTimeCandleProcessor(
symbol="BTC-USDT",
exchange="okx",
config=candle_config,
logger=logger
)
base_time = datetime(2022, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
# Add trades in reverse chronological order
for i in range(3, 0, -1):
trade = StandardizedTrade(
symbol="BTC-USDT",
trade_id=str(i),
price=Decimal("50000"),
size=Decimal("0.1"),
side="buy",
timestamp=base_time + timedelta(seconds=i),
exchange="okx"
)
processor.process_trade(trade)
# Should handle gracefully without crashing
stats = processor.get_stats()
assert stats['trades_processed'] == 3
def test_extreme_price_values(self, logger):
"""Test handling of extreme price values."""
validator = OKXDataValidator("test", logger)
# Very large price
large_price_data = {
"instId": "BTC-USDT",
"tradeId": "123",
"px": "999999999999.99",
"sz": "0.1",
"side": "buy",
"ts": "1640995200000"
}
result = validator.validate_trade_data(large_price_data)
# Should handle large numbers gracefully
assert result.is_valid or "price" in str(result.errors)
# Very small price
small_price_data = large_price_data.copy()
small_price_data["px"] = "0.00000001"
result = validator.validate_trade_data(small_price_data)
assert result.is_valid or "price" in str(result.errors)
class TestPerformanceAndReliability:
"""Test performance and reliability aspects."""
def test_high_frequency_trade_processing(self, candle_config, logger):
"""Test processing high frequency of trades."""
processor = RealTimeCandleProcessor(
symbol="BTC-USDT",
exchange="okx",
config=candle_config,
logger=logger
)
base_time = datetime(2022, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
# Process 1000 trades rapidly
for i in range(1000):
trade = StandardizedTrade(
symbol="BTC-USDT",
trade_id=str(i),
price=Decimal("50000") + Decimal(str(i % 100)),
size=Decimal("0.001"),
side="buy" if i % 2 == 0 else "sell",
timestamp=base_time + timedelta(milliseconds=i),
exchange="okx"
)
processor.process_trade(trade)
stats = processor.get_stats()
assert stats['trades_processed'] == 1000
assert 'active_timeframes' in stats
assert len(stats['active_timeframes']) > 0
def test_memory_usage_with_long_running_aggregation(self, candle_config, logger):
"""Test memory usage doesn't grow unbounded."""
processor = RealTimeCandleProcessor(
symbol="BTC-USDT",
exchange="okx",
config=candle_config,
logger=logger
)
base_time = datetime(2022, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
# Process trades over a long time period
for minute in range(10): # 10 minutes
for second in range(60): # 60 seconds per minute
trade = StandardizedTrade(
symbol="BTC-USDT",
trade_id=f"{minute}_{second}",
price=Decimal("50000"),
size=Decimal("0.1"),
side="buy",
timestamp=base_time + timedelta(minutes=minute, seconds=second),
exchange="okx"
)
processor.process_trade(trade)
stats = processor.get_stats()
# Should have processed many trades but not keep unlimited candles in memory
assert stats['trades_processed'] == 600 # 10 minutes * 60 seconds
assert 'active_timeframes' in stats
assert len(stats['active_timeframes']) == len(candle_config.timeframes)
if __name__ == "__main__":
pytest.main([__file__, "-v"])