Implement comprehensive transformation module with safety limits and validations

- Introduced a new transformation module that includes safety limits for trade operations, enhancing data integrity and preventing errors.
- Refactored existing transformation logic into dedicated classes and functions, improving modularity and maintainability.
- Added detailed validation for trade sizes, prices, and symbol formats, ensuring compliance with trading rules.
- Implemented logging for significant operations and validation checks, aiding in monitoring and debugging.
- Created a changelog to document the new features and changes, providing clarity for future development.
- Developed extensive unit tests to cover the new functionality, ensuring reliability and preventing regressions.

These changes significantly enhance the architecture of the transformation module, making it more robust and easier to manage.
This commit is contained in:
Ajasra
2025-06-07 13:23:59 +08:00
parent 96ee25bd01
commit 68030730e9
17 changed files with 2020 additions and 534 deletions

View File

@@ -0,0 +1,138 @@
"""Tests for trade safety limits and validations."""
from decimal import Decimal
import pytest
from data.common.transformation.safety import (
TradeLimits,
DEFAULT_LIMITS,
STABLECOIN_LIMITS,
VOLATILE_LIMITS,
is_stablecoin_pair,
get_trade_limits,
validate_trade_size,
validate_trade_price,
validate_symbol_format
)
def test_stablecoin_detection():
"""Test stablecoin pair detection."""
# Test stablecoin pairs
assert is_stablecoin_pair('BTC-USDT')
assert is_stablecoin_pair('ETH/USDC')
assert is_stablecoin_pair('USDT-BTC')
assert is_stablecoin_pair('DAI/ETH')
# Test non-stablecoin pairs
assert not is_stablecoin_pair('BTC-ETH')
assert not is_stablecoin_pair('LTC/XRP')
assert not is_stablecoin_pair('DOT-SOL')
def test_get_trade_limits():
"""Test trade limits selection."""
# Stablecoin pairs should get higher limits
assert get_trade_limits('BTC-USDT') == STABLECOIN_LIMITS
assert get_trade_limits('ETH/USDC') == STABLECOIN_LIMITS
# Other pairs should get volatile limits
assert get_trade_limits('BTC-ETH') == VOLATILE_LIMITS
assert get_trade_limits('LTC/XRP') == VOLATILE_LIMITS
def test_validate_trade_size():
"""Test trade size validation."""
# Valid sizes should pass
validate_trade_size(
Decimal('1.0'),
Decimal('50000'),
'BTC-USDT'
)
# Test minimum size
with pytest.raises(ValueError, match='below minimum'):
validate_trade_size(
Decimal('0.000000001'),
Decimal('50000'),
'BTC-USDT'
)
# Test maximum size
with pytest.raises(ValueError, match='exceeds maximum'):
validate_trade_size(
Decimal('2000000'),
Decimal('50000'),
'BTC-USDT'
)
# Test minimum notional
with pytest.raises(ValueError, match='below minimum'):
validate_trade_size(
Decimal('0.00001'),
Decimal('10'),
'BTC-USDT'
)
# Test maximum notional
with pytest.raises(ValueError, match='exceeds maximum'):
validate_trade_size(
Decimal('1000'),
Decimal('1000000'),
'BTC-USDT'
)
def test_validate_trade_price():
"""Test trade price validation."""
# Valid prices should pass
validate_trade_price(
Decimal('50000'),
Decimal('49000'),
'BTC-USDT'
)
# Test maximum deviation for stablecoins
with pytest.raises(ValueError, match='deviation'):
validate_trade_price(
Decimal('1.10'),
Decimal('1.00'),
'USDT-USDC' # 10% deviation exceeds 5% limit
)
# Test maximum deviation for volatile pairs
with pytest.raises(ValueError, match='deviation'):
validate_trade_price(
Decimal('60000'),
Decimal('30000'),
'BTC-ETH' # 100% deviation exceeds 50% limit
)
# None market price should be handled
validate_trade_price(
Decimal('50000'),
None,
'BTC-USDT'
)
def test_validate_symbol_format():
"""Test symbol format validation."""
# Valid formats should pass
validate_symbol_format('BTC-USDT')
validate_symbol_format('ETH/USDC')
validate_symbol_format('LTC-BTC')
# Test invalid formats
with pytest.raises(ValueError):
validate_symbol_format('') # Empty
with pytest.raises(ValueError):
validate_symbol_format('BTCUSDT') # No separator
with pytest.raises(ValueError):
validate_symbol_format('BTC_USDT') # Wrong separator
with pytest.raises(ValueError):
validate_symbol_format('BTC-USD-T') # Too many parts
with pytest.raises(ValueError):
validate_symbol_format('a-b') # Too short
with pytest.raises(ValueError):
validate_symbol_format('VERYLONGTOKEN-BTC') # Too long

View File

@@ -266,8 +266,8 @@ class TestRealTimeCandleAggregation:
# Check that candles are being built
stats = processor.get_stats()
assert stats['trades_processed'] == 1
assert 'current_buckets' in stats
assert len(stats['current_buckets']) > 0 # Should have active buckets
assert 'active_timeframes' in stats
assert len(stats['active_timeframes']) > 0 # Should have active timeframes
def test_candle_completion_timing(self, processor):
"""Test that candles complete at the correct time boundaries."""
@@ -666,7 +666,10 @@ class TestErrorHandlingAndEdgeCases:
stats = processor.get_stats()
assert stats['trades_processed'] == 0
assert 'current_buckets' in stats
assert 'active_timeframes' in stats
assert isinstance(stats['active_timeframes'], list) # Should be a list, even if empty
assert stats['candles_emitted'] == 0
assert stats['errors_count'] == 0
def test_out_of_order_trades(self, candle_config, logger):
"""Test handling of out-of-order trade timestamps."""
@@ -751,7 +754,8 @@ class TestPerformanceAndReliability:
stats = processor.get_stats()
assert stats['trades_processed'] == 1000
assert 'current_buckets' in stats
assert 'active_timeframes' in stats
assert len(stats['active_timeframes']) > 0
def test_memory_usage_with_long_running_aggregation(self, candle_config, logger):
"""Test memory usage doesn't grow unbounded."""
@@ -782,8 +786,8 @@ class TestPerformanceAndReliability:
# Should have processed many trades but not keep unlimited candles in memory
assert stats['trades_processed'] == 600 # 10 minutes * 60 seconds
# Check current buckets instead of non-existent active_candles
assert 'current_buckets' in stats
assert 'active_timeframes' in stats
assert len(stats['active_timeframes']) == len(candle_config.timeframes)
if __name__ == "__main__":

View File

@@ -0,0 +1,429 @@
"""
Tests for the common transformation utilities.
This module provides comprehensive test coverage for the base transformation
utilities used across all exchanges.
"""
import pytest
from datetime import datetime, timezone
from decimal import Decimal
from typing import Dict, Any
from data.common.transformation import (
BaseDataTransformer,
UnifiedDataTransformer,
create_standardized_trade,
batch_create_standardized_trades
)
from data.common.data_types import StandardizedTrade
from data.exchanges.okx.data_processor import OKXDataTransformer
class MockDataTransformer(BaseDataTransformer):
"""Mock transformer for testing base functionality."""
def __init__(self, component_name: str = "mock_transformer"):
super().__init__("mock", component_name)
def transform_trade_data(self, raw_data: Dict[str, Any], symbol: str) -> StandardizedTrade:
return create_standardized_trade(
symbol=symbol,
trade_id=raw_data['id'],
price=raw_data['price'],
size=raw_data['size'],
side=raw_data['side'],
timestamp=raw_data['timestamp'],
exchange="mock",
raw_data=raw_data
)
def transform_orderbook_data(self, raw_data: Dict[str, Any], symbol: str) -> Dict[str, Any]:
return {
'symbol': symbol,
'asks': raw_data.get('asks', []),
'bids': raw_data.get('bids', []),
'timestamp': self.timestamp_to_datetime(raw_data['timestamp']),
'exchange': 'mock',
'raw_data': raw_data
}
def transform_ticker_data(self, raw_data: Dict[str, Any], symbol: str) -> Dict[str, Any]:
return {
'symbol': symbol,
'last': self.safe_decimal_conversion(raw_data.get('last')),
'timestamp': self.timestamp_to_datetime(raw_data['timestamp']),
'exchange': 'mock',
'raw_data': raw_data
}
@pytest.fixture
def mock_transformer():
"""Create mock transformer instance."""
return MockDataTransformer()
@pytest.fixture
def unified_transformer(mock_transformer):
"""Create unified transformer instance."""
return UnifiedDataTransformer(mock_transformer)
@pytest.fixture
def okx_transformer():
"""Create OKX transformer instance."""
return OKXDataTransformer("test_okx_transformer")
@pytest.fixture
def sample_trade_data():
"""Sample trade data for testing."""
return {
'id': '123456',
'price': '50000.50',
'size': '0.1',
'side': 'buy',
'timestamp': 1640995200000 # 2022-01-01 00:00:00 UTC
}
@pytest.fixture
def sample_okx_trade_data():
"""Sample OKX trade data for testing."""
return {
'instId': 'BTC-USDT',
'tradeId': '123456',
'px': '50000.50',
'sz': '0.1',
'side': 'buy',
'ts': '1640995200000'
}
@pytest.fixture
def sample_orderbook_data():
"""Sample orderbook data for testing."""
return {
'asks': [['50100.5', '1.5'], ['50200.0', '2.0']],
'bids': [['49900.5', '1.0'], ['49800.0', '2.5']],
'timestamp': 1640995200000
}
@pytest.fixture
def sample_okx_orderbook_data():
"""Sample OKX orderbook data for testing."""
return {
'instId': 'BTC-USDT',
'asks': [['50100.5', '1.5'], ['50200.0', '2.0']],
'bids': [['49900.5', '1.0'], ['49800.0', '2.5']],
'ts': '1640995200000'
}
@pytest.fixture
def sample_ticker_data():
"""Sample ticker data for testing."""
return {
'last': '50000.50',
'timestamp': 1640995200000
}
@pytest.fixture
def sample_okx_ticker_data():
"""Sample OKX ticker data for testing."""
return {
'instId': 'BTC-USDT',
'last': '50000.50',
'bidPx': '49999.00',
'askPx': '50001.00',
'open24h': '49000.00',
'high24h': '51000.00',
'low24h': '48000.00',
'vol24h': '1000.0',
'ts': '1640995200000'
}
class TestBaseDataTransformer:
"""Test base data transformer functionality."""
def test_timestamp_to_datetime(self, mock_transformer):
"""Test timestamp conversion to datetime."""
# Test millisecond timestamp
dt = mock_transformer.timestamp_to_datetime(1640995200000)
assert isinstance(dt, datetime)
assert dt.tzinfo == timezone.utc
assert dt.year == 2022
assert dt.month == 1
assert dt.day == 1
# Test second timestamp
dt = mock_transformer.timestamp_to_datetime(1640995200, is_milliseconds=False)
assert dt.year == 2022
# Test string timestamp
dt = mock_transformer.timestamp_to_datetime("1640995200000")
assert dt.year == 2022
# Test invalid timestamp
dt = mock_transformer.timestamp_to_datetime("invalid")
assert isinstance(dt, datetime)
assert dt.tzinfo == timezone.utc
def test_safe_decimal_conversion(self, mock_transformer):
"""Test safe decimal conversion."""
# Test valid decimal string
assert mock_transformer.safe_decimal_conversion("123.45") == Decimal("123.45")
# Test valid integer
assert mock_transformer.safe_decimal_conversion(123) == Decimal("123")
# Test None value
assert mock_transformer.safe_decimal_conversion(None) is None
# Test empty string
assert mock_transformer.safe_decimal_conversion("") is None
# Test invalid value
assert mock_transformer.safe_decimal_conversion("invalid") is None
def test_normalize_trade_side(self, mock_transformer):
"""Test trade side normalization."""
# Test buy variations
assert mock_transformer.normalize_trade_side("buy") == "buy"
assert mock_transformer.normalize_trade_side("BUY") == "buy"
assert mock_transformer.normalize_trade_side("bid") == "buy"
assert mock_transformer.normalize_trade_side("b") == "buy"
assert mock_transformer.normalize_trade_side("1") == "buy"
# Test sell variations
assert mock_transformer.normalize_trade_side("sell") == "sell"
assert mock_transformer.normalize_trade_side("SELL") == "sell"
assert mock_transformer.normalize_trade_side("ask") == "sell"
assert mock_transformer.normalize_trade_side("s") == "sell"
assert mock_transformer.normalize_trade_side("0") == "sell"
# Test unknown value
assert mock_transformer.normalize_trade_side("unknown") == "buy"
def test_validate_symbol_format(self, mock_transformer):
"""Test symbol format validation."""
# Test valid symbol
assert mock_transformer.validate_symbol_format("btc-usdt") == "BTC-USDT"
assert mock_transformer.validate_symbol_format("BTC-USDT") == "BTC-USDT"
# Test symbol with whitespace
assert mock_transformer.validate_symbol_format(" btc-usdt ") == "BTC-USDT"
# Test invalid symbols
with pytest.raises(ValueError):
mock_transformer.validate_symbol_format("")
with pytest.raises(ValueError):
mock_transformer.validate_symbol_format(None)
def test_get_transformer_info(self, mock_transformer):
"""Test transformer info retrieval."""
info = mock_transformer.get_transformer_info()
assert info['exchange'] == "mock"
assert info['component'] == "mock_transformer"
assert 'capabilities' in info
assert info['capabilities']['trade_transformation'] is True
assert info['capabilities']['orderbook_transformation'] is True
assert info['capabilities']['ticker_transformation'] is True
class TestUnifiedDataTransformer:
"""Test unified data transformer functionality."""
def test_transform_trade_data(self, unified_transformer, sample_trade_data):
"""Test trade data transformation."""
result = unified_transformer.transform_trade_data(sample_trade_data, "BTC-USDT")
assert isinstance(result, StandardizedTrade)
assert result.symbol == "BTC-USDT"
assert result.trade_id == "123456"
assert result.price == Decimal("50000.50")
assert result.size == Decimal("0.1")
assert result.side == "buy"
assert result.exchange == "mock"
def test_transform_orderbook_data(self, unified_transformer, sample_orderbook_data):
"""Test orderbook data transformation."""
result = unified_transformer.transform_orderbook_data(sample_orderbook_data, "BTC-USDT")
assert result is not None
assert result['symbol'] == "BTC-USDT"
assert result['exchange'] == "mock"
assert len(result['asks']) == 2
assert len(result['bids']) == 2
def test_transform_ticker_data(self, unified_transformer, sample_ticker_data):
"""Test ticker data transformation."""
result = unified_transformer.transform_ticker_data(sample_ticker_data, "BTC-USDT")
assert result is not None
assert result['symbol'] == "BTC-USDT"
assert result['exchange'] == "mock"
assert result['last'] == Decimal("50000.50")
def test_batch_transform_trades(self, unified_transformer):
"""Test batch trade transformation."""
raw_trades = [
{
'id': '123456',
'price': '50000.50',
'size': '0.1',
'side': 'buy',
'timestamp': 1640995200000
},
{
'id': '123457',
'price': '50001.00',
'size': '0.2',
'side': 'sell',
'timestamp': 1640995201000
}
]
results = unified_transformer.batch_transform_trades(raw_trades, "BTC-USDT")
assert len(results) == 2
assert all(isinstance(r, StandardizedTrade) for r in results)
assert results[0].trade_id == "123456"
assert results[1].trade_id == "123457"
def test_get_transformer_info(self, unified_transformer):
"""Test unified transformer info retrieval."""
info = unified_transformer.get_transformer_info()
assert info['exchange'] == "mock"
assert 'unified_component' in info
assert info['batch_processing'] is True
assert info['candle_aggregation'] is True
class TestOKXDataTransformer:
"""Test OKX-specific data transformer functionality."""
def test_transform_trade_data(self, okx_transformer, sample_okx_trade_data):
"""Test OKX trade data transformation."""
result = okx_transformer.transform_trade_data(sample_okx_trade_data, "BTC-USDT")
assert isinstance(result, StandardizedTrade)
assert result.symbol == "BTC-USDT"
assert result.trade_id == "123456"
assert result.price == Decimal("50000.50")
assert result.size == Decimal("0.1")
assert result.side == "buy"
assert result.exchange == "okx"
def test_transform_orderbook_data(self, okx_transformer, sample_okx_orderbook_data):
"""Test OKX orderbook data transformation."""
result = okx_transformer.transform_orderbook_data(sample_okx_orderbook_data, "BTC-USDT")
assert result is not None
assert result['symbol'] == "BTC-USDT"
assert result['exchange'] == "okx"
assert len(result['asks']) == 2
assert len(result['bids']) == 2
def test_transform_ticker_data(self, okx_transformer, sample_okx_ticker_data):
"""Test OKX ticker data transformation."""
result = okx_transformer.transform_ticker_data(sample_okx_ticker_data, "BTC-USDT")
assert result is not None
assert result['symbol'] == "BTC-USDT"
assert result['exchange'] == "okx"
assert result['last'] == Decimal("50000.50")
assert result['bid'] == Decimal("49999.00")
assert result['ask'] == Decimal("50001.00")
assert result['open_24h'] == Decimal("49000.00")
assert result['high_24h'] == Decimal("51000.00")
assert result['low_24h'] == Decimal("48000.00")
assert result['volume_24h'] == Decimal("1000.0")
class TestStandaloneTransformationFunctions:
"""Test standalone transformation utility functions."""
def test_create_standardized_trade(self):
"""Test standardized trade creation."""
trade = create_standardized_trade(
symbol="BTC-USDT",
trade_id="123456",
price="50000.50",
size="0.1",
side="buy",
timestamp=1640995200000,
exchange="test",
is_milliseconds=True
)
assert isinstance(trade, StandardizedTrade)
assert trade.symbol == "BTC-USDT"
assert trade.trade_id == "123456"
assert trade.price == Decimal("50000.50")
assert trade.size == Decimal("0.1")
assert trade.side == "buy"
assert trade.exchange == "test"
assert trade.timestamp.year == 2022
# Test with datetime input
dt = datetime(2022, 1, 1, tzinfo=timezone.utc)
trade = create_standardized_trade(
symbol="BTC-USDT",
trade_id="123456",
price="50000.50",
size="0.1",
side="buy",
timestamp=dt,
exchange="test"
)
assert trade.timestamp == dt
# Test invalid inputs
with pytest.raises(ValueError):
create_standardized_trade(
symbol="BTC-USDT",
trade_id="123456",
price="invalid",
size="0.1",
side="buy",
timestamp=1640995200000,
exchange="test"
)
with pytest.raises(ValueError):
create_standardized_trade(
symbol="BTC-USDT",
trade_id="123456",
price="50000.50",
size="0.1",
side="invalid",
timestamp=1640995200000,
exchange="test"
)
def test_batch_create_standardized_trades(self):
"""Test batch trade creation."""
raw_trades = [
{'id': '123456', 'px': '50000.50', 'sz': '0.1', 'side': 'buy', 'ts': 1640995200000},
{'id': '123457', 'px': '50001.00', 'sz': '0.2', 'side': 'sell', 'ts': 1640995201000}
]
field_mapping = {
'trade_id': 'id',
'price': 'px',
'size': 'sz',
'side': 'side',
'timestamp': 'ts'
}
trades = batch_create_standardized_trades(
raw_trades=raw_trades,
symbol="BTC-USDT",
exchange="test",
field_mapping=field_mapping
)
assert len(trades) == 2
assert all(isinstance(t, StandardizedTrade) for t in trades)
assert trades[0].trade_id == "123456"
assert trades[0].price == Decimal("50000.50")
assert trades[1].trade_id == "123457"
assert trades[1].side == "sell"