Implement comprehensive transformation module with safety limits and validations

- Introduced a new transformation module that includes safety limits for trade operations, enhancing data integrity and preventing errors.
- Refactored existing transformation logic into dedicated classes and functions, improving modularity and maintainability.
- Added detailed validation for trade sizes, prices, and symbol formats, ensuring compliance with trading rules.
- Implemented logging for significant operations and validation checks, aiding in monitoring and debugging.
- Created a changelog to document the new features and changes, providing clarity for future development.
- Developed extensive unit tests to cover the new functionality, ensuring reliability and preventing regressions.

These changes significantly enhance the architecture of the transformation module, making it more robust and easier to manage.
This commit is contained in:
Ajasra
2025-06-07 13:23:59 +08:00
parent 96ee25bd01
commit 68030730e9
17 changed files with 2020 additions and 534 deletions

View File

@@ -1,8 +1,8 @@
"""
Common data processing utilities for all exchanges.
Common utilities and data structures for the application.
This package contains shared components for data validation, transformation,
and aggregation that can be used across different exchange implementations.
This package provides shared functionality across different components
of the system, including data transformation, validation, and aggregation.
"""
from .data_types import (
@@ -13,14 +13,23 @@ from .data_types import (
CandleProcessingConfig
)
from .aggregation import TimeframeBucket
# Temporarily import from old location until we move these classes
from .aggregation import RealTimeCandleProcessor
from .transformation.trade import (
TradeTransformer,
create_standardized_trade,
batch_create_standardized_trades
)
from .transformation import (
BaseDataTransformer,
UnifiedDataTransformer,
create_standardized_trade
from .transformation.base import BaseDataTransformer
from .transformation.unified import UnifiedDataTransformer
from .transformation.safety import (
TradeLimits,
DEFAULT_LIMITS,
STABLECOIN_LIMITS,
VOLATILE_LIMITS,
validate_trade_size,
validate_trade_price,
validate_symbol_format
)
from .validation import (
@@ -28,37 +37,31 @@ from .validation import (
ValidationResult
)
from .indicators import (
TechnicalIndicators,
IndicatorResult,
create_default_indicators_config,
validate_indicator_config
)
__all__ = [
# Data types
'StandardizedTrade',
'OHLCVCandle',
'OHLCVCandle',
'MarketDataPoint',
'DataValidationResult',
'CandleProcessingConfig',
# Aggregation
'TimeframeBucket',
'RealTimeCandleProcessor',
# Transformation
# Trade transformation
'TradeTransformer',
'create_standardized_trade',
'batch_create_standardized_trades',
'BaseDataTransformer',
'UnifiedDataTransformer',
'create_standardized_trade',
# Safety limits and validation
'TradeLimits',
'DEFAULT_LIMITS',
'STABLECOIN_LIMITS',
'VOLATILE_LIMITS',
'validate_trade_size',
'validate_trade_price',
'validate_symbol_format',
# Validation
'BaseDataValidator',
'ValidationResult',
# Technical Indicators
'TechnicalIndicators',
'IndicatorResult',
'create_default_indicators_config',
'validate_indicator_config'
]

View File

@@ -10,7 +10,12 @@ from decimal import Decimal
from typing import Dict, List, Optional, Any, Callable
from collections import defaultdict
from ..data_types import StandardizedTrade, OHLCVCandle, CandleProcessingConfig, ProcessingStats
from ..data_types import (
StandardizedTrade,
OHLCVCandle,
CandleProcessingConfig,
ProcessingStats
)
from .bucket import TimeframeBucket
@@ -71,6 +76,7 @@ class RealTimeCandleProcessor:
# Stats tracking
self.stats = ProcessingStats()
self.stats.active_timeframes = len(self.config.timeframes)
def add_candle_callback(self, callback: Callable[[OHLCVCandle], None]) -> None:
"""Add callback to be called when candle is completed."""
@@ -87,6 +93,7 @@ class RealTimeCandleProcessor:
List of completed candles (if any time boundaries were crossed)
"""
self.stats.trades_processed += 1
self.stats.last_trade_time = trade.timestamp
completed_candles = []
for timeframe in self.config.timeframes:
@@ -94,6 +101,7 @@ class RealTimeCandleProcessor:
if completed:
completed_candles.append(completed)
self.stats.candles_emitted += 1
self.stats.last_candle_time = completed.end_time
return completed_candles
@@ -196,6 +204,7 @@ class RealTimeCandleProcessor:
except Exception as e:
if self.logger:
self.logger.error(f"Error in candle callback: {e}")
self.stats.errors_count += 1
def get_current_candles(self, incomplete: bool = True) -> List[OHLCVCandle]:
"""
@@ -221,15 +230,20 @@ class RealTimeCandleProcessor:
candle = bucket.to_candle(is_complete=True)
completed.append(candle)
self._emit_candle(candle)
self.stats.candles_emitted += 1
self.current_buckets.clear()
return completed
def get_stats(self) -> Dict[str, Any]:
"""Get processing statistics."""
return {
"component": self.component_name,
"stats": self.stats.to_dict()
}
stats_dict = self.stats.to_dict()
stats_dict.update({
'component': self.component_name,
'symbol': self.symbol,
'exchange': self.exchange,
'active_timeframes': list(self.current_buckets.keys())
})
return stats_dict
__all__ = ['RealTimeCandleProcessor']

View File

@@ -1,484 +0,0 @@
"""
Base transformation utilities for all exchanges.
This module provides common transformation patterns and base classes
for converting exchange-specific data to standardized formats.
"""
from datetime import datetime, timezone
from decimal import Decimal
from typing import Dict, List, Optional, Any, Iterator
from abc import ABC, abstractmethod
from .data_types import StandardizedTrade, OHLCVCandle, DataValidationResult
from .aggregation.batch import BatchCandleProcessor
class BaseDataTransformer(ABC):
"""
Abstract base class for exchange data transformers.
This class provides common transformation patterns that can be
extended by exchange-specific implementations.
"""
def __init__(self,
exchange_name: str,
component_name: str = "base_data_transformer",
logger = None):
"""
Initialize base data transformer.
Args:
exchange_name: Name of the exchange (e.g., 'okx', 'binance')
component_name: Name for logging
"""
self.exchange_name = exchange_name
self.component_name = component_name
self.logger = logger
if self.logger:
self.logger.info(f"{self.component_name}: Initialized base data transformer for {exchange_name}")
# Abstract methods that must be implemented by subclasses
@abstractmethod
def transform_trade_data(self, raw_data: Dict[str, Any], symbol: str) -> Optional[StandardizedTrade]:
"""Transform exchange-specific trade data to standardized format."""
pass
@abstractmethod
def transform_orderbook_data(self, raw_data: Dict[str, Any], symbol: str) -> Optional[Dict[str, Any]]:
"""Transform exchange-specific orderbook data to standardized format."""
pass
@abstractmethod
def transform_ticker_data(self, raw_data: Dict[str, Any], symbol: str) -> Optional[Dict[str, Any]]:
"""Transform exchange-specific ticker data to standardized format."""
pass
# Common transformation utilities available to all subclasses
def timestamp_to_datetime(self, timestamp: Any, is_milliseconds: bool = True) -> datetime:
"""
Convert various timestamp formats to timezone-aware datetime.
Args:
timestamp: Timestamp in various formats
is_milliseconds: True if timestamp is in milliseconds
Returns:
Timezone-aware datetime object
"""
try:
# Convert to int/float
if isinstance(timestamp, str):
timestamp_num = float(timestamp)
elif isinstance(timestamp, (int, float)):
timestamp_num = float(timestamp)
else:
raise ValueError(f"Invalid timestamp type: {type(timestamp)}")
# Convert to seconds if needed
if is_milliseconds:
timestamp_num = timestamp_num / 1000
# Create timezone-aware datetime
dt = datetime.fromtimestamp(timestamp_num, tz=timezone.utc)
return dt
except Exception as e:
if self.logger:
self.logger.error(f"{self.component_name}: Error converting timestamp {timestamp}: {e}")
# Return current time as fallback
return datetime.now(timezone.utc)
def safe_decimal_conversion(self, value: Any, field_name: str = "value") -> Optional[Decimal]:
"""
Safely convert value to Decimal with error handling.
Args:
value: Value to convert
field_name: Name of field for error logging
Returns:
Decimal value or None if conversion failed
"""
try:
if value is None or value == "":
return None
return Decimal(str(value))
except Exception as e:
if self.logger:
self.logger.warning(f"{self.component_name}: Failed to convert {field_name} '{value}' to Decimal: {e}")
return None
def normalize_trade_side(self, side: str) -> str:
"""
Normalize trade side to standard format.
Args:
side: Raw trade side string
Returns:
Normalized side ('buy' or 'sell')
"""
normalized = side.lower().strip()
# Handle common variations
if normalized in ['buy', 'bid', 'b', '1']:
return 'buy'
elif normalized in ['sell', 'ask', 's', '0']:
return 'sell'
else:
if self.logger:
self.logger.warning(f"{self.component_name}: Unknown trade side: {side}, defaulting to 'buy'")
return 'buy'
def validate_symbol_format(self, symbol: str) -> str:
"""
Validate and normalize symbol format.
Args:
symbol: Raw symbol string
Returns:
Normalized symbol string
"""
if not symbol or not isinstance(symbol, str):
raise ValueError(f"Invalid symbol: {symbol}")
# Basic normalization
normalized = symbol.upper().strip()
if not normalized:
raise ValueError("Empty symbol after normalization")
return normalized
def transform_database_record(self, record: Any) -> Optional[StandardizedTrade]:
"""
Transform database record to standardized format.
This method should be overridden by subclasses to handle
their specific database schema.
Args:
record: Database record
Returns:
StandardizedTrade or None if transformation failed
"""
if self.logger:
self.logger.warning(f"{self.component_name}: transform_database_record not implemented for this exchange")
return None
def get_transformer_info(self) -> Dict[str, Any]:
"""Get transformer information."""
return {
'exchange': self.exchange_name,
'component': self.component_name,
'capabilities': {
'trade_transformation': True,
'orderbook_transformation': True,
'ticker_transformation': True,
'database_transformation': hasattr(self, 'transform_database_record')
}
}
class UnifiedDataTransformer:
"""
Unified data transformation system for all scenarios.
This class provides a common interface for transforming data from
various sources (real-time, historical, backfill) into standardized
formats for further processing.
TRANSFORMATION PROCESS:
1. Raw Data Input (exchange format, database records, etc.)
2. Validation (using exchange-specific validators)
3. Transformation to StandardizedTrade format
4. Optional aggregation to candles
5. Output in consistent format
"""
def __init__(self,
exchange_transformer: BaseDataTransformer,
component_name: str = "unified_data_transformer",
logger = None):
"""
Initialize unified data transformer.
Args:
exchange_transformer: Exchange-specific transformer instance
component_name: Name for logging
"""
self.exchange_transformer = exchange_transformer
self.component_name = component_name
self.logger = logger
if self.logger:
self.logger.info(f"{self.component_name}: Initialized unified data transformer with {exchange_transformer.exchange_name} transformer")
def transform_trade_data(self, raw_data: Dict[str, Any], symbol: str) -> Optional[StandardizedTrade]:
"""
Transform trade data using exchange-specific transformer.
Args:
raw_data: Raw trade data from exchange
symbol: Trading symbol
Returns:
Standardized trade or None if transformation failed
"""
try:
return self.exchange_transformer.transform_trade_data(raw_data, symbol)
except Exception as e:
if self.logger:
self.logger.error(f"{self.component_name}: Error in trade transformation: {e}")
return None
def transform_orderbook_data(self, raw_data: Dict[str, Any], symbol: str) -> Optional[Dict[str, Any]]:
"""
Transform orderbook data using exchange-specific transformer.
Args:
raw_data: Raw orderbook data from exchange
symbol: Trading symbol
Returns:
Standardized orderbook data or None if transformation failed
"""
try:
return self.exchange_transformer.transform_orderbook_data(raw_data, symbol)
except Exception as e:
if self.logger:
self.logger.error(f"{self.component_name}: Error in orderbook transformation: {e}")
return None
def transform_ticker_data(self, raw_data: Dict[str, Any], symbol: str) -> Optional[Dict[str, Any]]:
"""
Transform ticker data using exchange-specific transformer.
Args:
raw_data: Raw ticker data from exchange
symbol: Trading symbol
Returns:
Standardized ticker data or None if transformation failed
"""
try:
return self.exchange_transformer.transform_ticker_data(raw_data, symbol)
except Exception as e:
if self.logger:
self.logger.error(f"{self.component_name}: Error in ticker transformation: {e}")
return None
def process_trades_to_candles(self,
trades: Iterator[StandardizedTrade],
timeframes: List[str],
symbol: str) -> List[OHLCVCandle]:
"""
Process any trade iterator to candles - unified processing function.
This function handles ALL scenarios:
- Real-time: Single trade iterators
- Historical: Batch trade iterators
- Backfill: API trade iterators
Args:
trades: Iterator of standardized trades
timeframes: List of timeframes to generate
symbol: Trading symbol
Returns:
List of completed candles
"""
try:
processor = BatchCandleProcessor(
symbol,
self.exchange_transformer.exchange_name,
timeframes,
f"unified_batch_processor_{symbol}"
)
candles = processor.process_trades_to_candles(trades)
if self.logger:
self.logger.info(f"{self.component_name}: Processed {processor.get_stats()['trades_processed']} trades to {len(candles)} candles")
return candles
except Exception as e:
if self.logger:
self.logger.error(f"{self.component_name}: Error processing trades to candles: {e}")
return []
def batch_transform_trades(self,
raw_trades: List[Dict[str, Any]],
symbol: str) -> List[StandardizedTrade]:
"""
Transform multiple trade records in batch.
Args:
raw_trades: List of raw trade data
symbol: Trading symbol
Returns:
List of successfully transformed trades
"""
transformed_trades = []
errors = 0
for raw_trade in raw_trades:
try:
trade = self.transform_trade_data(raw_trade, symbol)
if trade:
transformed_trades.append(trade)
else:
errors += 1
except Exception as e:
if self.logger:
self.logger.error(f"{self.component_name}: Error transforming trade: {e}")
errors += 1
if self.logger:
self.logger.info(f"{self.component_name}: Batch transformed {len(transformed_trades)} trades successfully, {errors} errors")
return transformed_trades
def get_transformer_info(self) -> Dict[str, Any]:
"""Get comprehensive transformer information."""
base_info = self.exchange_transformer.get_transformer_info()
base_info.update({
'unified_component': self.component_name,
'batch_processing': True,
'candle_aggregation': True
})
return base_info
# Utility functions for common transformation patterns
def create_standardized_trade(symbol: str,
trade_id: str,
price: Any,
size: Any,
side: str,
timestamp: Any,
exchange: str,
raw_data: Optional[Dict[str, Any]] = None,
is_milliseconds: bool = True) -> StandardizedTrade:
"""
Utility function to create StandardizedTrade with proper validation.
Args:
symbol: Trading symbol
trade_id: Trade identifier
price: Trade price (any numeric type)
size: Trade size (any numeric type)
side: Trade side ('buy' or 'sell')
timestamp: Trade timestamp
exchange: Exchange name
raw_data: Original raw data
is_milliseconds: True if timestamp is in milliseconds
Returns:
StandardizedTrade object
Raises:
ValueError: If data is invalid
"""
# Convert timestamp
if isinstance(timestamp, (int, float, str)):
timestamp_num = float(timestamp)
if is_milliseconds:
timestamp_num = timestamp_num / 1000
dt = datetime.fromtimestamp(timestamp_num, tz=timezone.utc)
elif isinstance(timestamp, datetime):
dt = timestamp
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
else:
raise ValueError(f"Invalid timestamp type: {type(timestamp)}")
# Convert price and size to Decimal
try:
decimal_price = Decimal(str(price))
decimal_size = Decimal(str(size))
except Exception as e:
raise ValueError(f"Invalid price or size: {e}")
# Normalize side
normalized_side = side.lower().strip()
if normalized_side not in ['buy', 'sell']:
raise ValueError(f"Invalid trade side: {side}")
return StandardizedTrade(
symbol=symbol.upper().strip(),
trade_id=str(trade_id),
price=decimal_price,
size=decimal_size,
side=normalized_side,
timestamp=dt,
exchange=exchange.lower(),
raw_data=raw_data
)
def batch_create_standardized_trades(raw_trades: List[Dict[str, Any]],
symbol: str,
exchange: str,
field_mapping: Dict[str, str],
is_milliseconds: bool = True) -> List[StandardizedTrade]:
"""
Batch create standardized trades from raw data.
Args:
raw_trades: List of raw trade dictionaries
symbol: Trading symbol
exchange: Exchange name
field_mapping: Mapping of StandardizedTrade fields to raw data fields
is_milliseconds: True if timestamps are in milliseconds
Returns:
List of successfully created StandardizedTrade objects
Example field_mapping:
{
'trade_id': 'id',
'price': 'px',
'size': 'sz',
'side': 'side',
'timestamp': 'ts'
}
"""
trades = []
for raw_trade in raw_trades:
try:
trade = create_standardized_trade(
symbol=symbol,
trade_id=raw_trade[field_mapping['trade_id']],
price=raw_trade[field_mapping['price']],
size=raw_trade[field_mapping['size']],
side=raw_trade[field_mapping['side']],
timestamp=raw_trade[field_mapping['timestamp']],
exchange=exchange,
raw_data=raw_trade,
is_milliseconds=is_milliseconds
)
trades.append(trade)
except Exception as e:
# Log error but continue processing
print(f"Failed to transform trade: {e}")
return trades
__all__ = [
'BaseDataTransformer',
'UnifiedDataTransformer',
'create_standardized_trade',
'batch_create_standardized_trades'
]

View File

@@ -0,0 +1,29 @@
"""
Common data transformation utilities for all exchanges.
This package provides common transformation patterns and base classes
for converting exchange-specific data to standardized formats.
"""
from .base import BaseDataTransformer
from .unified import UnifiedDataTransformer
from .trade import create_standardized_trade, batch_create_standardized_trades
from .time_utils import timestamp_to_datetime
from .numeric_utils import safe_decimal_conversion
from .normalization import normalize_trade_side, validate_symbol_format
__all__ = [
# Base classes
'BaseDataTransformer',
'UnifiedDataTransformer',
# Trade transformation
'create_standardized_trade',
'batch_create_standardized_trades',
# Utility functions
'timestamp_to_datetime',
'safe_decimal_conversion',
'normalize_trade_side',
'validate_symbol_format'
]

View File

@@ -0,0 +1,228 @@
"""
Base data transformer class.
This module provides the base class for all data transformers
with common functionality and interface definitions.
"""
import logging
from typing import Dict, Any, Optional, List
from datetime import datetime
from decimal import Decimal
from ..data_types import StandardizedTrade
from .trade import create_standardized_trade, batch_create_standardized_trades
from .time_utils import timestamp_to_datetime
from .numeric_utils import safe_decimal_conversion
from .normalization import normalize_trade_side, validate_symbol_format
class BaseDataTransformer:
"""Base class for all data transformers."""
def __init__(
self,
exchange: str,
component_name: str = "base_transformer",
logger: Optional[logging.Logger] = None
):
"""
Initialize base transformer.
Args:
exchange: Exchange name
component_name: Component name for logging
logger: Optional logger instance
"""
self.exchange = exchange
self.component_name = component_name
self.logger = logger or logging.getLogger(component_name)
def timestamp_to_datetime(
self,
timestamp: Any,
is_milliseconds: bool = True
) -> datetime:
"""Convert timestamp to datetime."""
return timestamp_to_datetime(
timestamp,
is_milliseconds,
logger=self.logger,
component_name=self.component_name
)
def safe_decimal_conversion(
self,
value: Any,
field_name: str = "value"
) -> Optional[Decimal]:
"""Convert value to Decimal safely."""
return safe_decimal_conversion(
value,
field_name,
logger=self.logger,
component_name=self.component_name
)
def normalize_trade_side(
self,
side: str
) -> str:
"""Normalize trade side."""
try:
return normalize_trade_side(
side,
logger=self.logger,
component_name=self.component_name
)
except ValueError as e:
self.logger.warning(
f"{self.component_name}: Unknown trade side: {side}, defaulting to 'buy'"
)
return 'buy'
def validate_symbol_format(
self,
symbol: str
) -> str:
"""Validate symbol format."""
return validate_symbol_format(
symbol,
logger=self.logger,
component_name=self.component_name
)
def get_transformer_info(self) -> Dict[str, Any]:
"""Get transformer information."""
return {
"exchange": self.exchange,
"component": self.component_name,
"capabilities": {
"trade_transformation": True,
"orderbook_transformation": True,
"ticker_transformation": True,
"batch_processing": True
}
}
def transform_trade_data(
self,
raw_data: Dict[str, Any],
symbol: str
) -> StandardizedTrade:
"""
Transform raw trade data to standardized format.
Args:
raw_data: Raw trade data
symbol: Trading symbol
Returns:
StandardizedTrade object
Raises:
ValueError: If data is invalid
"""
raise NotImplementedError("Subclasses must implement transform_trade_data")
def transform_orderbook_data(
self,
raw_data: Dict[str, Any],
symbol: str
) -> Dict[str, Any]:
"""
Transform raw orderbook data to standardized format.
Args:
raw_data: Raw orderbook data
symbol: Trading symbol
Returns:
Standardized orderbook data
Raises:
ValueError: If data is invalid
"""
raise NotImplementedError("Subclasses must implement transform_orderbook_data")
def transform_ticker_data(
self,
raw_data: Dict[str, Any],
symbol: str
) -> Dict[str, Any]:
"""
Transform raw ticker data to standardized format.
Args:
raw_data: Raw ticker data
symbol: Trading symbol
Returns:
Standardized ticker data
Raises:
ValueError: If data is invalid
"""
raise NotImplementedError("Subclasses must implement transform_ticker_data")
def batch_transform_trades(
self,
raw_trades: List[Dict[str, Any]],
symbol: str
) -> List[StandardizedTrade]:
"""
Transform multiple trades in batch.
Args:
raw_trades: List of raw trade data
symbol: Trading symbol
Returns:
List of StandardizedTrade objects
Raises:
ValueError: If data is invalid
"""
return [
self.transform_trade_data(trade, symbol)
for trade in raw_trades
]
def transform_trades_batch(
self,
raw_trades: List[Dict[str, Any]],
symbol: str,
field_mapping: Dict[str, str]
) -> List[StandardizedTrade]:
"""
Transform a batch of raw trades.
Args:
raw_trades: List of raw trade dictionaries
symbol: Trading symbol
field_mapping: Field mapping for raw data
Returns:
List of StandardizedTrade objects
"""
return batch_create_standardized_trades(
raw_trades=raw_trades,
symbol=symbol,
exchange=self.exchange,
field_mapping=field_mapping
)
def _log_error(self, message: str, error: Optional[Exception] = None) -> None:
"""Log error with component context."""
if error:
self.logger.error(f"{self.component_name}: {message}: {error}")
else:
self.logger.error(f"{self.component_name}: {message}")
def _log_warning(self, message: str) -> None:
"""Log warning with component context."""
self.logger.warning(f"{self.component_name}: {message}")
def _log_info(self, message: str) -> None:
"""Log info with component context."""
self.logger.info(f"{self.component_name}: {message}")

View File

@@ -0,0 +1,129 @@
"""
Data normalization utilities.
This module provides functions for normalizing various data formats
to consistent standards across the application.
"""
from typing import Optional
from logging import Logger
def normalize_trade_side(
side: str,
logger: Optional[Logger] = None,
component_name: str = "normalization"
) -> str:
"""
Normalize trade side to standard format.
Args:
side: Raw trade side string
logger: Optional logger for error messages
component_name: Name for logging
Returns:
Normalized side ('buy' or 'sell')
Raises:
ValueError: If side is invalid, empty, or unknown
"""
if not side or not isinstance(side, str):
error_msg = f"Invalid trade side: {side}"
if logger:
logger.error(f"{component_name}: {error_msg}")
raise ValueError(error_msg)
normalized = side.lower().strip()
# Handle common variations
if normalized in ['buy', 'bid', 'b', '1']:
return 'buy'
elif normalized in ['sell', 'ask', 's', '0', '2']:
return 'sell'
else:
error_msg = f"Invalid trade side: {side}"
if logger:
logger.error(f"{component_name}: {error_msg}")
raise ValueError(error_msg)
def validate_symbol_format(
symbol: str,
logger: Optional[Logger] = None,
component_name: str = "normalization"
) -> str:
"""
Validate and normalize symbol format.
Args:
symbol: Trading symbol
logger: Optional logger for error messages
component_name: Name for logging
Returns:
Normalized symbol
Raises:
ValueError: If symbol is invalid
"""
if not symbol or not isinstance(symbol, str):
error_msg = f"Invalid symbol: {symbol}"
if logger:
logger.error(f"{component_name}: {error_msg}")
raise ValueError(error_msg)
# Remove whitespace and convert to uppercase
normalized = symbol.strip().upper()
# Basic validation
if not normalized or len(normalized) < 3:
error_msg = f"Symbol too short: {symbol}"
if logger:
logger.error(f"{component_name}: {error_msg}")
raise ValueError(error_msg)
# Check for common delimiters
if '-' not in normalized and '/' not in normalized:
error_msg = f"Invalid symbol format (missing delimiter): {symbol}"
if logger:
logger.error(f"{component_name}: {error_msg}")
raise ValueError(error_msg)
return normalized
def normalize_exchange_name(
exchange: str,
logger: Optional[Logger] = None,
component_name: str = "normalization"
) -> str:
"""
Normalize exchange name.
Args:
exchange: Exchange name
logger: Optional logger for error messages
component_name: Name for logging
Returns:
Normalized exchange name
Raises:
ValueError: If exchange name is invalid
"""
if not exchange or not isinstance(exchange, str):
error_msg = f"Invalid exchange name: {exchange}"
if logger:
logger.error(f"{component_name}: {error_msg}")
raise ValueError(error_msg)
normalized = exchange.lower().strip()
if not normalized:
error_msg = "Exchange name cannot be empty"
if logger:
logger.error(f"{component_name}: {error_msg}")
raise ValueError(error_msg)
return normalized

View File

@@ -0,0 +1,68 @@
"""
Numeric transformation utilities.
This module provides functions for handling numeric conversions and validations
in a consistent way across the application.
"""
from decimal import Decimal
from typing import Any, Optional
from logging import Logger
def safe_decimal_conversion(
value: Any,
field_name: str = "value",
logger: Optional[Logger] = None,
component_name: str = "numeric_utils"
) -> Optional[Decimal]:
"""
Safely convert value to Decimal with error handling.
Args:
value: Value to convert
field_name: Name of field for error logging
logger: Optional logger for error messages
component_name: Name for logging
Returns:
Decimal value or None if conversion failed
"""
try:
if value is None or value == "":
return None
return Decimal(str(value))
except Exception as e:
if logger:
logger.warning(f"{component_name}: Failed to convert {field_name} '{value}' to Decimal: {e}")
return None
def validate_numeric_range(
value: Decimal,
min_value: Optional[Decimal] = None,
max_value: Optional[Decimal] = None,
field_name: str = "value"
) -> bool:
"""
Validate that a numeric value falls within specified range.
Args:
value: Value to validate
min_value: Optional minimum value
max_value: Optional maximum value
field_name: Name of field for error messages
Returns:
True if value is within range, False otherwise
Raises:
ValueError: If value is outside allowed range
"""
if min_value is not None and value < min_value:
raise ValueError(f"{field_name} {value} is below minimum allowed value {min_value}")
if max_value is not None and value > max_value:
raise ValueError(f"{field_name} {value} exceeds maximum allowed value {max_value}")
return True

View File

@@ -0,0 +1,191 @@
"""
Trading safety limits and validations.
This module provides safety checks and limits for crypto trading operations
with reasonable defaults that won't interfere with normal operations.
"""
from decimal import Decimal
from typing import Dict, NamedTuple, Optional, Pattern, Set
import re
import logging
# Common patterns for crypto trading pairs
SYMBOL_PATTERN = re.compile(r'^[A-Z0-9]{2,10}[-/][A-Z0-9]{2,10}$')
MAX_SYMBOL_LENGTH = 20 # Longest known pair + margin for future
class TradeLimits(NamedTuple):
"""Trading limits for a symbol."""
min_size: Decimal # Minimum trade size in base currency
max_size: Decimal # Maximum trade size in base currency
min_notional: Decimal # Minimum trade value in quote currency
max_notional: Decimal # Maximum trade value in quote currency
price_precision: int # Number of decimal places for price
size_precision: int # Number of decimal places for size
max_price_deviation: Decimal # Maximum allowed deviation from market price (in percent)
# Default limits that are generous but still protect against extreme errors
DEFAULT_LIMITS = TradeLimits(
min_size=Decimal('0.00000001'), # 1 satoshi equivalent
max_size=Decimal('10000.0'), # Large enough for most trades
min_notional=Decimal('1.0'), # Minimum $1 equivalent
max_notional=Decimal('10000000.0'), # $10M per trade limit
price_precision=8, # Standard for most exchanges
size_precision=8, # Standard for most exchanges
max_price_deviation=Decimal('30.0') # 30% max deviation
)
# Common stablecoin pairs can have higher limits
STABLECOIN_LIMITS = DEFAULT_LIMITS._replace(
max_size=Decimal('1000000.0'), # $1M equivalent
max_notional=Decimal('50000000.0'), # $50M per trade
max_price_deviation=Decimal('5.0') # 5% max deviation for stables
)
# More restrictive limits for volatile/illiquid pairs
VOLATILE_LIMITS = DEFAULT_LIMITS._replace(
max_size=Decimal('1000.0'), # Smaller position size
max_notional=Decimal('1000000.0'), # $1M per trade
max_price_deviation=Decimal('50.0') # 50% for very volatile markets
)
# Known stablecoin symbols
STABLECOINS = {'USDT', 'USDC', 'DAI', 'BUSD', 'UST', 'TUSD'}
def is_stablecoin_pair(symbol: str) -> bool:
"""Check if the trading pair involves a stablecoin."""
parts = re.split('[-/]', symbol.upper())
return any(coin in STABLECOINS for coin in parts)
def get_trade_limits(symbol: str) -> TradeLimits:
"""
Get appropriate trade limits for a symbol.
Args:
symbol: Trading pair symbol
Returns:
TradeLimits with appropriate limits for the symbol
"""
if is_stablecoin_pair(symbol):
return STABLECOIN_LIMITS
return VOLATILE_LIMITS
def validate_trade_size(
size: Decimal,
price: Decimal,
symbol: str,
logger: Optional[logging.Logger] = None
) -> None:
"""
Validate trade size against limits.
Args:
size: Trade size in base currency
price: Trade price
symbol: Trading pair symbol
logger: Optional logger for warnings
Raises:
ValueError: If size violates limits
"""
limits = get_trade_limits(symbol)
notional = size * price
# Check minimum size
if size < limits.min_size:
raise ValueError(
f"Trade size {size} below minimum {limits.min_size} for {symbol}"
)
# Check maximum size with warning at 90%
if size > limits.max_size * Decimal('0.9') and logger:
logger.warning(
f"Large trade size {size} approaching maximum {limits.max_size} for {symbol}"
)
if size > limits.max_size:
raise ValueError(
f"Trade size {size} exceeds maximum {limits.max_size} for {symbol}"
)
# Check minimum notional
if notional < limits.min_notional:
raise ValueError(
f"Trade value ${notional} below minimum ${limits.min_notional} for {symbol}"
)
# Check maximum notional with warning at 90%
if notional > limits.max_notional * Decimal('0.9') and logger:
logger.warning(
f"Large trade value ${notional} approaching maximum ${limits.max_notional} for {symbol}"
)
if notional > limits.max_notional:
raise ValueError(
f"Trade value ${notional} exceeds maximum ${limits.max_notional} for {symbol}"
)
def validate_trade_price(
price: Decimal,
market_price: Optional[Decimal],
symbol: str,
logger: Optional[logging.Logger] = None
) -> None:
"""
Validate trade price against limits and market price.
Args:
price: Trade price
market_price: Current market price (if available)
symbol: Trading pair symbol
logger: Optional logger for warnings
Raises:
ValueError: If price violates limits
"""
limits = get_trade_limits(symbol)
# Skip market price check if not available
if market_price is None:
return
# Calculate price deviation
deviation = abs(price - market_price) / market_price * 100
# Warn at 80% of maximum deviation
if deviation > limits.max_price_deviation * Decimal('0.8') and logger:
logger.warning(
f"Price deviation {deviation}% approaching maximum {limits.max_price_deviation}% for {symbol}"
)
# Error at maximum deviation
if deviation > limits.max_price_deviation:
raise ValueError(
f"Price deviation {deviation}% exceeds maximum {limits.max_price_deviation}% for {symbol}"
)
def validate_symbol_format(
symbol: str,
logger: Optional[logging.Logger] = None
) -> None:
"""
Validate trading symbol format.
Args:
symbol: Trading pair symbol
logger: Optional logger for warnings
Raises:
ValueError: If symbol format is invalid
"""
if not symbol or not isinstance(symbol, str):
raise ValueError(f"Invalid symbol: {symbol}")
# Check length
if len(symbol) > MAX_SYMBOL_LENGTH:
raise ValueError(f"Symbol too long: {symbol}")
# Check format
if not SYMBOL_PATTERN.match(symbol.upper()):
raise ValueError(
f"Invalid symbol format: {symbol}. Expected format: 'XXX-YYY' or 'XXX/YYY'"
)

View File

@@ -0,0 +1,52 @@
"""
Time-related transformation utilities.
This module provides functions for handling timestamps and datetime conversions
in a consistent way across the application.
"""
from datetime import datetime, timezone
from typing import Any, Optional
from logging import Logger
def timestamp_to_datetime(
timestamp: Any,
is_milliseconds: bool = True,
logger: Optional[Logger] = None,
component_name: str = "time_utils"
) -> datetime:
"""
Convert various timestamp formats to timezone-aware datetime.
Args:
timestamp: Timestamp in various formats
is_milliseconds: True if timestamp is in milliseconds
logger: Optional logger for error messages
component_name: Name for logging
Returns:
Timezone-aware datetime object
"""
try:
# Convert to int/float
if isinstance(timestamp, str):
timestamp_num = float(timestamp)
elif isinstance(timestamp, (int, float)):
timestamp_num = float(timestamp)
else:
raise ValueError(f"Invalid timestamp type: {type(timestamp)}")
# Convert to seconds if needed
if is_milliseconds:
timestamp_num = timestamp_num / 1000
# Create timezone-aware datetime
dt = datetime.fromtimestamp(timestamp_num, tz=timezone.utc)
return dt
except Exception as e:
if logger:
logger.error(f"{component_name}: Error converting timestamp {timestamp}: {e}")
# Return current time as fallback
return datetime.now(timezone.utc)

View File

@@ -0,0 +1,360 @@
"""
Trade data transformation with safety limits.
This module handles the transformation of trade data while enforcing safety limits
to prevent errors and protect against edge cases.
"""
import logging
from datetime import datetime, timezone
from decimal import Decimal, InvalidOperation
from typing import Dict, List, Optional, Any
from ..data_types import StandardizedTrade
from .time_utils import timestamp_to_datetime
from .numeric_utils import safe_decimal_conversion
from .normalization import normalize_trade_side, validate_symbol_format, normalize_exchange_name
from .safety import (
validate_trade_size,
validate_trade_price,
TradeLimits,
get_trade_limits
)
# Create a logger for this module
logger = logging.getLogger(__name__)
def create_standardized_trade(
symbol: str,
trade_id: str,
price: Any,
size: Any,
side: str,
timestamp: Any,
exchange: str,
raw_data: Optional[Dict[str, Any]] = None,
is_milliseconds: bool = True
) -> StandardizedTrade:
"""
Utility function to create StandardizedTrade with proper validation.
Args:
symbol: Trading symbol
trade_id: Trade identifier
price: Trade price (any numeric type)
size: Trade size (any numeric type)
side: Trade side ('buy' or 'sell')
timestamp: Trade timestamp
exchange: Exchange name
raw_data: Original raw data
is_milliseconds: True if timestamp is in milliseconds
Returns:
StandardizedTrade object
Raises:
ValueError: If data is invalid
"""
# Validate symbol
if not symbol or not isinstance(symbol, str):
raise ValueError(f"Invalid symbol: {symbol}")
# Validate trade_id
if not trade_id:
raise ValueError(f"Invalid trade_id: {trade_id}")
# Convert timestamp
try:
if isinstance(timestamp, (int, float, str)):
dt = timestamp_to_datetime(timestamp, is_milliseconds)
elif isinstance(timestamp, datetime):
dt = timestamp
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
else:
raise ValueError(f"Invalid timestamp type: {type(timestamp)}")
except Exception as e:
raise ValueError(f"Invalid timestamp: {timestamp}") from e
# Convert price and size to Decimal
try:
if not price or not size:
raise ValueError("Price and size must not be empty")
decimal_price = safe_decimal_conversion(price, "price")
decimal_size = safe_decimal_conversion(size, "size")
if decimal_price is None or decimal_size is None:
raise ValueError("Invalid price or size format")
if decimal_price <= 0:
raise ValueError(f"Price must be positive: {price}")
if decimal_size <= 0:
raise ValueError(f"Size must be positive: {size}")
except (InvalidOperation, TypeError, ValueError) as e:
raise ValueError(f"Invalid price or size: {e}")
# Normalize side with strict validation
try:
if not side or not isinstance(side, str):
raise ValueError(f"Invalid trade side: {side}")
normalized_side = normalize_trade_side(side, logger=logger)
except ValueError as e:
logger.error(f"Trade side validation failed: {e}")
raise ValueError(f"Invalid trade side: {side}")
# Normalize symbol and exchange
try:
normalized_symbol = validate_symbol_format(symbol)
normalized_exchange = normalize_exchange_name(exchange)
except ValueError as e:
raise ValueError(str(e))
return StandardizedTrade(
symbol=normalized_symbol,
trade_id=str(trade_id),
price=decimal_price,
size=decimal_size,
side=normalized_side,
timestamp=dt,
exchange=normalized_exchange,
raw_data=raw_data
)
def batch_create_standardized_trades(
raw_trades: List[Dict[str, Any]],
symbol: str,
exchange: str,
field_mapping: Dict[str, str],
is_milliseconds: bool = True
) -> List[StandardizedTrade]:
"""
Batch create standardized trades from raw data.
Args:
raw_trades: List of raw trade dictionaries
symbol: Trading symbol
exchange: Exchange name
field_mapping: Mapping of StandardizedTrade fields to raw data fields
is_milliseconds: True if timestamps are in milliseconds
Returns:
List of successfully created StandardizedTrade objects
Example field_mapping:
{
'trade_id': 'id',
'price': 'px',
'size': 'sz',
'side': 'side',
'timestamp': 'ts'
}
"""
trades = []
for raw_trade in raw_trades:
try:
trade = create_standardized_trade(
symbol=symbol,
trade_id=raw_trade[field_mapping['trade_id']],
price=raw_trade[field_mapping['price']],
size=raw_trade[field_mapping['size']],
side=raw_trade[field_mapping['side']],
timestamp=raw_trade[field_mapping['timestamp']],
exchange=exchange,
raw_data=raw_trade,
is_milliseconds=is_milliseconds
)
trades.append(trade)
except Exception as e:
# Log error but continue processing
print(f"Failed to transform trade: {e}")
return trades
class TradeTransformer:
"""Transform trade data with safety checks."""
VALID_SIDES = {'buy', 'sell'}
def __init__(self, market_data_provider: Optional[Any] = None):
"""
Initialize transformer.
Args:
market_data_provider: Optional provider of market data for price validation
"""
self.market_data_provider = market_data_provider
def normalize_trade_side(self, side: str) -> str:
"""
Normalize trade side to standard format.
Args:
side: Trade side indicator
Returns:
Normalized trade side ('buy' or 'sell')
Raises:
ValueError: If side is invalid
"""
side_lower = str(side).lower().strip()
# Handle common variations
if side_lower in {'buy', 'bid', 'long', '1', 'true'}:
return 'buy'
elif side_lower in {'sell', 'ask', 'short', '0', 'false'}:
return 'sell'
raise ValueError(f"Invalid trade side: {side}")
def normalize_trade_size(
self,
size: Any,
price: Any,
symbol: str
) -> Decimal:
"""
Normalize and validate trade size.
Args:
size: Raw trade size
price: Trade price for notional calculations
symbol: Trading pair symbol
Returns:
Normalized trade size as Decimal
Raises:
ValueError: If size is invalid or violates limits
"""
try:
size_decimal = Decimal(str(size))
price_decimal = Decimal(str(price))
except (TypeError, ValueError) as e:
raise ValueError(f"Invalid trade size or price format: {e}")
if size_decimal <= 0:
raise ValueError(f"Trade size must be positive: {size}")
# Get limits and validate
limits = get_trade_limits(symbol)
# Round to appropriate precision
size_decimal = round(size_decimal, limits.size_precision)
# Validate against limits
validate_trade_size(
size_decimal,
price_decimal,
symbol,
logger
)
return size_decimal
def normalize_trade_price(
self,
price: Any,
symbol: str
) -> Decimal:
"""
Normalize and validate trade price.
Args:
price: Raw trade price
symbol: Trading pair symbol
Returns:
Normalized price as Decimal
Raises:
ValueError: If price is invalid or violates limits
"""
try:
price_decimal = Decimal(str(price))
except (TypeError, ValueError) as e:
raise ValueError(f"Invalid price format: {e}")
if price_decimal <= 0:
raise ValueError(f"Price must be positive: {price}")
# Get limits and round to appropriate precision
limits = get_trade_limits(symbol)
price_decimal = round(price_decimal, limits.price_precision)
# Get market price if available
market_price = None
if self.market_data_provider is not None:
try:
market_price = self.market_data_provider.get_price(symbol)
except Exception as e:
logger.warning(f"Failed to get market price for {symbol}: {e}")
# Validate against limits and market price
validate_trade_price(
price_decimal,
market_price,
symbol,
logger
)
return price_decimal
def transform_trade(
self,
trade_data: Dict[str, Any]
) -> Dict[str, Any]:
"""
Transform trade data with safety checks.
Args:
trade_data: Raw trade data
Returns:
Transformed trade data with normalized values
Raises:
ValueError: If any validation fails
"""
if not isinstance(trade_data, dict):
raise ValueError(f"Trade data must be a dictionary: {trade_data}")
# Required fields
required = {'symbol', 'side', 'size', 'price'}
missing = required - set(trade_data.keys())
if missing:
raise ValueError(f"Missing required fields: {missing}")
# Validate and normalize symbol
symbol = str(trade_data['symbol']).upper()
validate_symbol_format(symbol, logger)
# Transform with safety checks
transformed = {
'symbol': symbol,
'side': self.normalize_trade_side(trade_data['side']),
'size': self.normalize_trade_size(
trade_data['size'],
trade_data['price'],
symbol
),
'price': self.normalize_trade_price(
trade_data['price'],
symbol
)
}
# Copy any additional fields
for key, value in trade_data.items():
if key not in transformed:
transformed[key] = value
return transformed

View File

@@ -0,0 +1,136 @@
"""
Unified data transformer class.
This module provides a unified transformer implementation that can be used
across different exchanges with consistent field mappings.
"""
from typing import Dict, Any, Optional, List
import logging
from ..data_types import StandardizedTrade
from .base import BaseDataTransformer
class UnifiedDataTransformer(BaseDataTransformer):
"""
Unified transformer for consistent data transformation across exchanges.
This class provides a standardized way to transform data by using
consistent field mappings across different exchanges.
"""
def __init__(
self,
base_transformer: BaseDataTransformer,
component_name: str = "unified_transformer",
logger: Optional[logging.Logger] = None
):
"""
Initialize unified transformer.
Args:
base_transformer: Base transformer instance to wrap
component_name: Component name for logging
logger: Optional logger instance
"""
super().__init__(
exchange=base_transformer.exchange,
component_name=component_name,
logger=logger or base_transformer.logger
)
self.base_transformer = base_transformer
def transform_trade_data(self, raw_data: Dict[str, Any], symbol: str) -> Optional[StandardizedTrade]:
"""
Transform raw trade data using base transformer.
Args:
raw_data: Raw trade data dictionary
symbol: Trading symbol
Returns:
StandardizedTrade object or None if transformation fails
"""
try:
return self.base_transformer.transform_trade_data(raw_data, symbol)
except Exception as e:
self._log_error(f"Failed to transform trade data", e)
return None
def transform_orderbook_data(self, raw_data: Dict[str, Any], symbol: str) -> Dict[str, Any]:
"""
Transform orderbook data using base transformer.
Args:
raw_data: Raw orderbook data dictionary
symbol: Trading symbol
Returns:
Transformed orderbook data
"""
try:
return self.base_transformer.transform_orderbook_data(raw_data, symbol)
except Exception as e:
self._log_error(f"Failed to transform orderbook data", e)
return {}
def transform_ticker_data(self, raw_data: Dict[str, Any], symbol: str) -> Dict[str, Any]:
"""
Transform ticker data using base transformer.
Args:
raw_data: Raw ticker data dictionary
symbol: Trading symbol
Returns:
Transformed ticker data
"""
try:
return self.base_transformer.transform_ticker_data(raw_data, symbol)
except Exception as e:
self._log_error(f"Failed to transform ticker data", e)
return {}
def batch_transform_trades(
self,
raw_trades: List[Dict[str, Any]],
symbol: str,
field_mapping: Optional[Dict[str, str]] = None
) -> List[StandardizedTrade]:
"""
Transform a batch of raw trades.
Args:
raw_trades: List of raw trade dictionaries
symbol: Trading symbol
field_mapping: Optional field mapping for raw data
Returns:
List of StandardizedTrade objects
"""
try:
return [
self.transform_trade_data(raw_trade, symbol)
for raw_trade in raw_trades
if raw_trade is not None
]
except Exception as e:
self._log_error(f"Failed to batch transform trades", e)
return []
def get_transformer_info(self) -> Dict[str, Any]:
"""Get transformer information."""
base_info = self.base_transformer.get_transformer_info()
return {
"exchange": base_info["exchange"],
"component": base_info["component"],
"unified_component": self.component_name,
"batch_processing": True,
"candle_aggregation": True,
"capabilities": {
**base_info["capabilities"],
"unified_transformation": True,
"candle_aggregation": True
}
}