Refactor data validation module for improved modularity and functionality

- Removed the existing `validation.py` file and replaced it with a modular structure, introducing separate files for validation results, field validators, and the base validator class.
- Implemented comprehensive validation functions for common data types, enhancing reusability and maintainability.
- Added a new `__init__.py` to expose the validation utilities, ensuring a clean public interface.
- Created detailed documentation for the validation module, including usage examples and architectural details.
- Introduced extensive unit tests to cover the new validation framework, ensuring reliability and preventing regressions.

These changes enhance the overall architecture of the data validation module, making it more scalable and easier to manage.
This commit is contained in:
Ajasra
2025-06-07 12:31:47 +08:00
parent 551316872c
commit 96ee25bd01
10 changed files with 1117 additions and 491 deletions

View File

@@ -1,486 +0,0 @@
"""
Base validation utilities for all exchanges.
This module provides common validation patterns and base classes
that can be extended by exchange-specific validators.
"""
import re
from datetime import datetime, timezone, timedelta
from decimal import Decimal, InvalidOperation
from typing import Dict, List, Optional, Any, Union, Pattern
from abc import ABC, abstractmethod
from .data_types import DataValidationResult, StandardizedTrade, TradeSide
class ValidationResult:
"""Simple validation result for individual field validation."""
def __init__(self, is_valid: bool, errors: List[str] = None, warnings: List[str] = None, sanitized_data: Any = None):
self.is_valid = is_valid
self.errors = errors or []
self.warnings = warnings or []
self.sanitized_data = sanitized_data
class BaseDataValidator(ABC):
"""
Abstract base class for exchange data validators.
This class provides common validation patterns and utilities
that can be reused across different exchange implementations.
"""
def __init__(self,
exchange_name: str,
component_name: str = "base_data_validator",
logger = None):
"""
Initialize base data validator.
Args:
exchange_name: Name of the exchange (e.g., 'okx', 'binance')
component_name: Name for logging
logger: Logger instance. If None, no logging will be performed.
"""
self.exchange_name = exchange_name
self.component_name = component_name
self.logger = logger
# Common validation patterns
self._numeric_pattern = re.compile(r'^-?\d*\.?\d+$')
self._trade_id_pattern = re.compile(r'^[a-zA-Z0-9_-]+$') # Flexible pattern
# Valid trade sides
self._valid_trade_sides = {'buy', 'sell'}
# Common price and size limits (can be overridden by subclasses)
self._min_price = Decimal('0.00000001') # 1 satoshi equivalent
self._max_price = Decimal('10000000') # 10 million
self._min_size = Decimal('0.00000001') # Minimum trade size
self._max_size = Decimal('1000000000') # 1 billion max size
# Timestamp validation (milliseconds since epoch)
self._min_timestamp = 1000000000000 # 2001-09-09 (reasonable minimum)
self._max_timestamp = 9999999999999 # 2286-11-20 (reasonable maximum)
if self.logger:
self.logger.debug(f"{self.component_name}: Initialized {exchange_name} data validator")
# Abstract methods that must be implemented by subclasses
@abstractmethod
def validate_symbol_format(self, symbol: str) -> ValidationResult:
"""Validate exchange-specific symbol format."""
pass
@abstractmethod
def validate_websocket_message(self, message: Dict[str, Any]) -> DataValidationResult:
"""Validate complete WebSocket message structure."""
pass
# Common validation methods available to all subclasses
def validate_price(self, price: Union[str, int, float, Decimal]) -> ValidationResult:
"""
Validate price value with common rules.
Args:
price: Price value to validate
Returns:
ValidationResult with sanitized decimal price
"""
errors = []
warnings = []
sanitized_data = None
try:
# Convert to Decimal for precise validation
if isinstance(price, str) and price.strip() == "":
errors.append("Empty price string")
return ValidationResult(False, errors, warnings)
decimal_price = Decimal(str(price))
sanitized_data = decimal_price
# Check for negative prices
if decimal_price <= 0:
errors.append(f"Price must be positive, got {decimal_price}")
# Check price bounds
if decimal_price < self._min_price:
warnings.append(f"Price {decimal_price} below minimum {self._min_price}")
elif decimal_price > self._max_price:
warnings.append(f"Price {decimal_price} above maximum {self._max_price}")
# Check for excessive decimal places (warn only)
if decimal_price.as_tuple().exponent < -12:
warnings.append(f"Price has excessive decimal precision: {decimal_price}")
except (InvalidOperation, ValueError, TypeError) as e:
errors.append(f"Invalid price value: {price} - {str(e)}")
return ValidationResult(len(errors) == 0, errors, warnings, sanitized_data)
def validate_size(self, size: Union[str, int, float, Decimal]) -> ValidationResult:
"""
Validate size/quantity value with common rules.
Args:
size: Size value to validate
Returns:
ValidationResult with sanitized decimal size
"""
errors = []
warnings = []
sanitized_data = None
try:
# Convert to Decimal for precise validation
if isinstance(size, str) and size.strip() == "":
errors.append("Empty size string")
return ValidationResult(False, errors, warnings)
decimal_size = Decimal(str(size))
sanitized_data = decimal_size
# Check for negative or zero sizes
if decimal_size <= 0:
errors.append(f"Size must be positive, got {decimal_size}")
# Check size bounds
if decimal_size < self._min_size:
warnings.append(f"Size {decimal_size} below minimum {self._min_size}")
elif decimal_size > self._max_size:
warnings.append(f"Size {decimal_size} above maximum {self._max_size}")
except (InvalidOperation, ValueError, TypeError) as e:
errors.append(f"Invalid size value: {size} - {str(e)}")
return ValidationResult(len(errors) == 0, errors, warnings, sanitized_data)
def validate_volume(self, volume: Union[str, int, float, Decimal]) -> ValidationResult:
"""
Validate volume value with common rules.
Args:
volume: Volume value to validate
Returns:
ValidationResult
"""
errors = []
warnings = []
try:
decimal_volume = Decimal(str(volume))
# Volume can be zero (no trades in period)
if decimal_volume < 0:
errors.append(f"Volume cannot be negative, got {decimal_volume}")
except (InvalidOperation, ValueError, TypeError) as e:
errors.append(f"Invalid volume value: {volume} - {str(e)}")
return ValidationResult(len(errors) == 0, errors, warnings)
def validate_trade_side(self, side: str) -> ValidationResult:
"""
Validate trade side with common rules.
Args:
side: Trade side string
Returns:
ValidationResult
"""
errors = []
warnings = []
if not isinstance(side, str):
errors.append(f"Trade side must be string, got {type(side)}")
return ValidationResult(False, errors, warnings)
normalized_side = side.lower()
if normalized_side not in self._valid_trade_sides:
errors.append(f"Invalid trade side: {side}. Must be 'buy' or 'sell'")
return ValidationResult(len(errors) == 0, errors, warnings)
def validate_timestamp(self, timestamp: Union[str, int], is_milliseconds: bool = True) -> ValidationResult:
"""
Validate timestamp value with common rules.
Args:
timestamp: Timestamp value to validate
is_milliseconds: True if timestamp is in milliseconds, False for seconds
Returns:
ValidationResult
"""
errors = []
warnings = []
try:
# Convert to int
if isinstance(timestamp, str):
if not timestamp.isdigit():
errors.append(f"Invalid timestamp format: {timestamp}")
return ValidationResult(False, errors, warnings)
timestamp_int = int(timestamp)
elif isinstance(timestamp, int):
timestamp_int = timestamp
else:
errors.append(f"Timestamp must be string or int, got {type(timestamp)}")
return ValidationResult(False, errors, warnings)
# Convert to milliseconds if needed
if not is_milliseconds:
timestamp_int = timestamp_int * 1000
# Check timestamp bounds
if timestamp_int < self._min_timestamp:
errors.append(f"Timestamp {timestamp_int} too old")
elif timestamp_int > self._max_timestamp:
errors.append(f"Timestamp {timestamp_int} too far in future")
# Check if timestamp is reasonable (within last year to next year)
current_time_ms = int(datetime.now(timezone.utc).timestamp() * 1000)
one_year_ms = 365 * 24 * 60 * 60 * 1000
if timestamp_int < (current_time_ms - one_year_ms):
warnings.append(f"Timestamp {timestamp_int} is older than 1 year")
elif timestamp_int > (current_time_ms + one_year_ms):
warnings.append(f"Timestamp {timestamp_int} is more than 1 year in future")
except (ValueError, TypeError) as e:
errors.append(f"Invalid timestamp: {timestamp} - {str(e)}")
return ValidationResult(len(errors) == 0, errors, warnings)
def validate_trade_id(self, trade_id: Union[str, int]) -> ValidationResult:
"""
Validate trade ID with flexible rules.
Args:
trade_id: Trade ID to validate
Returns:
ValidationResult
"""
errors = []
warnings = []
if isinstance(trade_id, int):
trade_id = str(trade_id)
if not isinstance(trade_id, str):
errors.append(f"Trade ID must be string or int, got {type(trade_id)}")
return ValidationResult(False, errors, warnings)
if not trade_id.strip():
errors.append("Trade ID cannot be empty")
return ValidationResult(False, errors, warnings)
# Flexible validation - allow alphanumeric, underscore, hyphen
if not self._trade_id_pattern.match(trade_id):
warnings.append(f"Trade ID has unusual format: {trade_id}")
return ValidationResult(len(errors) == 0, errors, warnings)
def validate_symbol_match(self, symbol: str, expected_symbol: Optional[str] = None) -> ValidationResult:
"""
Validate symbol matches expected value.
Args:
symbol: Symbol to validate
expected_symbol: Expected symbol value
Returns:
ValidationResult
"""
errors = []
warnings = []
if not isinstance(symbol, str):
errors.append(f"Symbol must be string, got {type(symbol)}")
return ValidationResult(False, errors, warnings)
if expected_symbol and symbol != expected_symbol:
warnings.append(f"Symbol mismatch: expected {expected_symbol}, got {symbol}")
return ValidationResult(len(errors) == 0, errors, warnings)
def validate_orderbook_side(self, side_data: List[List[str]], side_name: str) -> ValidationResult:
"""
Validate orderbook side (asks or bids) with common rules.
Args:
side_data: List of price/size pairs
side_name: Name of side for error messages
Returns:
ValidationResult with sanitized data
"""
errors = []
warnings = []
sanitized_data = []
if not isinstance(side_data, list):
errors.append(f"{side_name} must be a list")
return ValidationResult(False, errors, warnings)
for i, level in enumerate(side_data):
if not isinstance(level, list) or len(level) < 2:
errors.append(f"{side_name}[{i}] must be a list with at least 2 elements")
continue
# Validate price and size
price_result = self.validate_price(level[0])
size_result = self.validate_size(level[1])
if not price_result.is_valid:
errors.extend([f"{side_name}[{i}] price: {error}" for error in price_result.errors])
if not size_result.is_valid:
errors.extend([f"{side_name}[{i}] size: {error}" for error in size_result.errors])
# Add sanitized level
if price_result.is_valid and size_result.is_valid:
sanitized_level = [str(price_result.sanitized_data), str(size_result.sanitized_data)]
# Include additional fields if present
if len(level) > 2:
sanitized_level.extend(level[2:])
sanitized_data.append(sanitized_level)
return ValidationResult(len(errors) == 0, errors, warnings, sanitized_data)
def validate_standardized_trade(self, trade: StandardizedTrade) -> DataValidationResult:
"""
Validate a standardized trade object.
Args:
trade: StandardizedTrade object to validate
Returns:
DataValidationResult
"""
errors = []
warnings = []
try:
# Validate price
price_result = self.validate_price(trade.price)
if not price_result.is_valid:
errors.extend([f"price: {error}" for error in price_result.errors])
warnings.extend([f"price: {warning}" for warning in price_result.warnings])
# Validate size
size_result = self.validate_size(trade.size)
if not size_result.is_valid:
errors.extend([f"size: {error}" for error in size_result.errors])
warnings.extend([f"size: {warning}" for warning in size_result.warnings])
# Validate side
side_result = self.validate_trade_side(trade.side)
if not side_result.is_valid:
errors.extend([f"side: {error}" for error in side_result.errors])
# Validate trade ID
trade_id_result = self.validate_trade_id(trade.trade_id)
if not trade_id_result.is_valid:
errors.extend([f"trade_id: {error}" for error in trade_id_result.errors])
warnings.extend([f"trade_id: {warning}" for warning in trade_id_result.warnings])
# Validate symbol format (exchange-specific)
symbol_result = self.validate_symbol_format(trade.symbol)
if not symbol_result.is_valid:
errors.extend([f"symbol: {error}" for error in symbol_result.errors])
warnings.extend([f"symbol: {warning}" for warning in symbol_result.warnings])
# Validate timestamp
timestamp_ms = int(trade.timestamp.timestamp() * 1000)
timestamp_result = self.validate_timestamp(timestamp_ms, is_milliseconds=True)
if not timestamp_result.is_valid:
errors.extend([f"timestamp: {error}" for error in timestamp_result.errors])
warnings.extend([f"timestamp: {warning}" for warning in timestamp_result.warnings])
return DataValidationResult(len(errors) == 0, errors, warnings)
except Exception as e:
errors.append(f"Exception during trade validation: {str(e)}")
return DataValidationResult(False, errors, warnings)
def get_validator_info(self) -> Dict[str, Any]:
"""Get validator configuration information."""
return {
'exchange': self.exchange_name,
'component': self.component_name,
'limits': {
'min_price': str(self._min_price),
'max_price': str(self._max_price),
'min_size': str(self._min_size),
'max_size': str(self._max_size),
'min_timestamp': self._min_timestamp,
'max_timestamp': self._max_timestamp
},
'patterns': {
'numeric': self._numeric_pattern.pattern,
'trade_id': self._trade_id_pattern.pattern
}
}
# Utility functions for common validation patterns
def is_valid_decimal(value: Any) -> bool:
"""Check if value can be converted to a valid decimal."""
try:
Decimal(str(value))
return True
except (InvalidOperation, ValueError, TypeError):
return False
def normalize_symbol(symbol: str, exchange: str) -> str:
"""
Normalize symbol format for exchange.
Args:
symbol: Raw symbol string
exchange: Exchange name
Returns:
Normalized symbol string
"""
# Basic normalization - can be extended per exchange
return symbol.upper().strip()
def validate_required_fields(data: Dict[str, Any], required_fields: List[str]) -> List[str]:
"""
Validate that all required fields are present in data.
Args:
data: Data dictionary to check
required_fields: List of required field names
Returns:
List of missing field names
"""
missing_fields = []
for field in required_fields:
if field not in data or data[field] is None:
missing_fields.append(field)
return missing_fields
__all__ = [
'ValidationResult',
'BaseDataValidator',
'is_valid_decimal',
'normalize_symbol',
'validate_required_fields'
]

View File

@@ -0,0 +1,58 @@
"""
Data validation utilities for exchange data.
This package provides common validation patterns and base classes
that can be extended by exchange-specific validators.
"""
from .result import ValidationResult, DataValidationResult
from .base import BaseDataValidator
from .field_validators import (
validate_price,
validate_size,
validate_volume,
validate_trade_side,
validate_timestamp,
validate_trade_id,
validate_symbol_match,
validate_required_fields,
is_valid_decimal,
MIN_PRICE,
MAX_PRICE,
MIN_SIZE,
MAX_SIZE,
MIN_TIMESTAMP,
MAX_TIMESTAMP,
VALID_TRADE_SIDES,
NUMERIC_PATTERN,
TRADE_ID_PATTERN
)
__all__ = [
# Classes
'ValidationResult',
'DataValidationResult',
'BaseDataValidator',
# Field validation functions
'validate_price',
'validate_size',
'validate_volume',
'validate_trade_side',
'validate_timestamp',
'validate_trade_id',
'validate_symbol_match',
'validate_required_fields',
'is_valid_decimal',
# Constants
'MIN_PRICE',
'MAX_PRICE',
'MIN_SIZE',
'MAX_SIZE',
'MIN_TIMESTAMP',
'MAX_TIMESTAMP',
'VALID_TRADE_SIDES',
'NUMERIC_PATTERN',
'TRADE_ID_PATTERN'
]

View File

@@ -0,0 +1,255 @@
"""
Base validator class for exchange data validation.
This module provides the abstract base class for exchange-specific data validators,
along with common validation patterns and utilities.
"""
from abc import ABC, abstractmethod
from typing import Dict, Any, Optional, List, Union
from decimal import Decimal
from logging import Logger
from .result import ValidationResult, DataValidationResult
from .field_validators import (
validate_price,
validate_size,
validate_volume,
validate_trade_side,
validate_timestamp,
validate_trade_id,
validate_symbol_match,
validate_required_fields,
MIN_PRICE,
MAX_PRICE,
MIN_SIZE,
MAX_SIZE,
MIN_TIMESTAMP,
MAX_TIMESTAMP,
VALID_TRADE_SIDES,
NUMERIC_PATTERN,
TRADE_ID_PATTERN
)
class BaseDataValidator(ABC):
"""
Abstract base class for exchange data validators.
This class provides common validation patterns and utilities
that can be reused across different exchange implementations.
"""
def __init__(self,
exchange_name: str,
component_name: str = "base_data_validator",
logger: Optional[Logger] = None):
"""
Initialize base data validator.
Args:
exchange_name: Name of the exchange (e.g., 'okx', 'binance')
component_name: Name for logging
logger: Logger instance. If None, no logging will be performed.
"""
self.exchange_name = exchange_name
self.component_name = component_name
self.logger = logger
# Common validation patterns
self._numeric_pattern = NUMERIC_PATTERN
self._trade_id_pattern = TRADE_ID_PATTERN
# Valid trade sides
self._valid_trade_sides = VALID_TRADE_SIDES
# Common price and size limits (can be overridden by subclasses)
self._min_price = MIN_PRICE
self._max_price = MAX_PRICE
self._min_size = MIN_SIZE
self._max_size = MAX_SIZE
# Timestamp validation (milliseconds since epoch)
self._min_timestamp = MIN_TIMESTAMP
self._max_timestamp = MAX_TIMESTAMP
if self.logger:
self.logger.debug(f"{self.component_name}: Initialized {exchange_name} data validator")
# Abstract methods that must be implemented by subclasses
@abstractmethod
def validate_symbol_format(self, symbol: str) -> ValidationResult:
"""
Validate exchange-specific symbol format.
Args:
symbol: Symbol to validate
Returns:
ValidationResult
"""
pass
@abstractmethod
def validate_websocket_message(self, message: Dict[str, Any]) -> DataValidationResult:
"""
Validate complete WebSocket message structure.
Args:
message: WebSocket message to validate
Returns:
DataValidationResult
"""
pass
# Common validation methods available to all subclasses
def validate_price(self, price: Union[str, int, float, Decimal]) -> ValidationResult:
"""
Validate price value with common rules.
Args:
price: Price value to validate
Returns:
ValidationResult with sanitized decimal price
"""
return validate_price(price)
def validate_size(self, size: Union[str, int, float, Decimal]) -> ValidationResult:
"""
Validate size/quantity value with common rules.
Args:
size: Size value to validate
Returns:
ValidationResult with sanitized decimal size
"""
return validate_size(size)
def validate_volume(self, volume: Union[str, int, float, Decimal]) -> ValidationResult:
"""
Validate volume value with common rules.
Args:
volume: Volume value to validate
Returns:
ValidationResult
"""
return validate_volume(volume)
def validate_trade_side(self, side: str) -> ValidationResult:
"""
Validate trade side with common rules.
Args:
side: Trade side string
Returns:
ValidationResult
"""
return validate_trade_side(side)
def validate_timestamp(self, timestamp: Union[str, int], is_milliseconds: bool = True) -> ValidationResult:
"""
Validate timestamp value with common rules.
Args:
timestamp: Timestamp value to validate
is_milliseconds: True if timestamp is in milliseconds, False for seconds
Returns:
ValidationResult
"""
return validate_timestamp(timestamp, is_milliseconds)
def validate_trade_id(self, trade_id: Union[str, int]) -> ValidationResult:
"""
Validate trade ID with flexible rules.
Args:
trade_id: Trade ID to validate
Returns:
ValidationResult
"""
return validate_trade_id(trade_id)
def validate_symbol_match(self, symbol: str, expected_symbol: Optional[str] = None) -> ValidationResult:
"""
Validate symbol matches expected value.
Args:
symbol: Symbol to validate
expected_symbol: Expected symbol value
Returns:
ValidationResult
"""
return validate_symbol_match(symbol, expected_symbol)
def validate_orderbook_side(self, side_data: List[List[str]], side_name: str) -> ValidationResult:
"""
Validate orderbook side (asks or bids) with common rules.
Args:
side_data: List of price/size pairs
side_name: Name of side for error messages
Returns:
ValidationResult with sanitized data
"""
errors = []
warnings = []
sanitized_data = []
if not isinstance(side_data, list):
errors.append(f"{side_name} must be a list")
return ValidationResult(False, errors, warnings)
for i, level in enumerate(side_data):
if not isinstance(level, list) or len(level) < 2:
errors.append(f"{side_name}[{i}] must be a list with at least 2 elements")
continue
# Validate price and size
price_result = self.validate_price(level[0])
size_result = self.validate_size(level[1])
if not price_result.is_valid:
errors.extend([f"{side_name}[{i}] price: {error}" for error in price_result.errors])
if not size_result.is_valid:
errors.extend([f"{side_name}[{i}] size: {error}" for error in size_result.errors])
# Add sanitized level
if price_result.is_valid and size_result.is_valid:
sanitized_level = [str(price_result.sanitized_data), str(size_result.sanitized_data)]
# Include additional fields if present
if len(level) > 2:
sanitized_level.extend(level[2:])
sanitized_data.append(sanitized_level)
return ValidationResult(len(errors) == 0, errors, warnings, sanitized_data)
def get_validator_info(self) -> Dict[str, Any]:
"""Get validator configuration information."""
return {
'exchange': self.exchange_name,
'component': self.component_name,
'limits': {
'min_price': str(self._min_price),
'max_price': str(self._max_price),
'min_size': str(self._min_size),
'max_size': str(self._max_size),
'min_timestamp': self._min_timestamp,
'max_timestamp': self._max_timestamp
},
'patterns': {
'numeric': self._numeric_pattern.pattern,
'trade_id': self._trade_id_pattern.pattern
}
}

View File

@@ -0,0 +1,293 @@
"""
Field validation functions for common data types.
This module provides standalone validation functions for individual fields
like prices, sizes, timestamps, etc.
"""
import re
from datetime import datetime, timezone
from decimal import Decimal, InvalidOperation
from typing import Union, List, Dict, Any, Set, Pattern
from .result import ValidationResult
# Common validation patterns
NUMERIC_PATTERN: Pattern = re.compile(r'^-?\d*\.?\d+$')
TRADE_ID_PATTERN: Pattern = re.compile(r'^[a-zA-Z0-9_-]+$')
# Common validation constants
MIN_PRICE: Decimal = Decimal('0.00000001') # 1 satoshi equivalent
MAX_PRICE: Decimal = Decimal('10000000') # 10 million
MIN_SIZE: Decimal = Decimal('0.00000001') # Minimum trade size
MAX_SIZE: Decimal = Decimal('1000000000') # 1 billion max size
MIN_TIMESTAMP: int = 1000000000000 # 2001-09-09
MAX_TIMESTAMP: int = 9999999999999 # 2286-11-20
VALID_TRADE_SIDES: Set[str] = {'buy', 'sell'}
def validate_price(price: Union[str, int, float, Decimal]) -> ValidationResult:
"""
Validate price value with common rules.
Args:
price: Price value to validate
Returns:
ValidationResult with sanitized decimal price
"""
errors = []
warnings = []
sanitized_data = None
try:
# Convert to Decimal for precise validation
if isinstance(price, str) and price.strip() == "":
errors.append("Empty price string")
return ValidationResult(False, errors, warnings)
decimal_price = Decimal(str(price))
sanitized_data = decimal_price
# Check for negative prices
if decimal_price <= 0:
errors.append(f"Price must be positive, got {decimal_price}")
# Check price bounds
if decimal_price < MIN_PRICE:
warnings.append(f"Price {decimal_price} below minimum {MIN_PRICE}")
elif decimal_price > MAX_PRICE:
warnings.append(f"Price {decimal_price} above maximum {MAX_PRICE}")
# Check for excessive decimal places (warn only)
if decimal_price.as_tuple().exponent < -12:
warnings.append(f"Price has excessive decimal precision: {decimal_price}")
except (InvalidOperation, ValueError, TypeError) as e:
errors.append(f"Invalid price value: {price} - {str(e)}")
return ValidationResult(len(errors) == 0, errors, warnings, sanitized_data)
def validate_size(size: Union[str, int, float, Decimal]) -> ValidationResult:
"""
Validate size/quantity value with common rules.
Args:
size: Size value to validate
Returns:
ValidationResult with sanitized decimal size
"""
errors = []
warnings = []
sanitized_data = None
try:
# Convert to Decimal for precise validation
if isinstance(size, str) and size.strip() == "":
errors.append("Empty size string")
return ValidationResult(False, errors, warnings)
decimal_size = Decimal(str(size))
sanitized_data = decimal_size
# Check for negative or zero sizes
if decimal_size <= 0:
errors.append(f"Size must be positive, got {decimal_size}")
# Check size bounds
if decimal_size < MIN_SIZE:
warnings.append(f"Size {decimal_size} below minimum {MIN_SIZE}")
elif decimal_size > MAX_SIZE:
warnings.append(f"Size {decimal_size} above maximum {MAX_SIZE}")
except (InvalidOperation, ValueError, TypeError) as e:
errors.append(f"Invalid size value: {size} - {str(e)}")
return ValidationResult(len(errors) == 0, errors, warnings, sanitized_data)
def validate_volume(volume: Union[str, int, float, Decimal]) -> ValidationResult:
"""
Validate volume value with common rules.
Args:
volume: Volume value to validate
Returns:
ValidationResult
"""
errors = []
warnings = []
try:
decimal_volume = Decimal(str(volume))
# Volume can be zero (no trades in period)
if decimal_volume < 0:
errors.append(f"Volume cannot be negative, got {decimal_volume}")
except (InvalidOperation, ValueError, TypeError) as e:
errors.append(f"Invalid volume value: {volume} - {str(e)}")
return ValidationResult(len(errors) == 0, errors, warnings)
def validate_trade_side(side: str) -> ValidationResult:
"""
Validate trade side with common rules.
Args:
side: Trade side string
Returns:
ValidationResult
"""
errors = []
warnings = []
if not isinstance(side, str):
errors.append(f"Trade side must be string, got {type(side)}")
return ValidationResult(False, errors, warnings)
normalized_side = side.lower()
if normalized_side not in VALID_TRADE_SIDES:
errors.append(f"Invalid trade side: {side}. Must be 'buy' or 'sell'")
return ValidationResult(len(errors) == 0, errors, warnings)
def validate_timestamp(timestamp: Union[str, int], is_milliseconds: bool = True) -> ValidationResult:
"""
Validate timestamp value with common rules.
Args:
timestamp: Timestamp value to validate
is_milliseconds: True if timestamp is in milliseconds, False for seconds
Returns:
ValidationResult
"""
errors = []
warnings = []
try:
# Convert to int
if isinstance(timestamp, str):
if not timestamp.isdigit():
errors.append(f"Invalid timestamp format: {timestamp}")
return ValidationResult(False, errors, warnings)
timestamp_int = int(timestamp)
elif isinstance(timestamp, int):
timestamp_int = timestamp
else:
errors.append(f"Timestamp must be string or int, got {type(timestamp)}")
return ValidationResult(False, errors, warnings)
# Convert to milliseconds if needed
if not is_milliseconds:
timestamp_int = timestamp_int * 1000
# Check timestamp bounds
if timestamp_int < MIN_TIMESTAMP:
errors.append(f"Timestamp {timestamp_int} too old")
elif timestamp_int > MAX_TIMESTAMP:
errors.append(f"Timestamp {timestamp_int} too far in future")
# Check if timestamp is reasonable (within last year to next year)
current_time_ms = int(datetime.now(timezone.utc).timestamp() * 1000)
one_year_ms = 365 * 24 * 60 * 60 * 1000
if timestamp_int < (current_time_ms - one_year_ms):
warnings.append(f"Timestamp {timestamp_int} is older than 1 year")
elif timestamp_int > (current_time_ms + one_year_ms):
warnings.append(f"Timestamp {timestamp_int} is more than 1 year in future")
except (ValueError, TypeError) as e:
errors.append(f"Invalid timestamp: {timestamp} - {str(e)}")
return ValidationResult(len(errors) == 0, errors, warnings)
def validate_trade_id(trade_id: Union[str, int]) -> ValidationResult:
"""
Validate trade ID with flexible rules.
Args:
trade_id: Trade ID to validate
Returns:
ValidationResult
"""
errors = []
warnings = []
if isinstance(trade_id, int):
trade_id = str(trade_id)
if not isinstance(trade_id, str):
errors.append(f"Trade ID must be string or int, got {type(trade_id)}")
return ValidationResult(False, errors, warnings)
if not trade_id.strip():
errors.append("Trade ID cannot be empty")
return ValidationResult(False, errors, warnings)
# Flexible validation - allow alphanumeric, underscore, hyphen
if not TRADE_ID_PATTERN.match(trade_id):
warnings.append(f"Trade ID has unusual format: {trade_id}")
return ValidationResult(len(errors) == 0, errors, warnings)
def validate_symbol_match(symbol: str, expected_symbol: str = None) -> ValidationResult:
"""
Validate symbol matches expected value.
Args:
symbol: Symbol to validate
expected_symbol: Expected symbol value
Returns:
ValidationResult
"""
errors = []
warnings = []
if not isinstance(symbol, str):
errors.append(f"Symbol must be string, got {type(symbol)}")
return ValidationResult(False, errors, warnings)
if expected_symbol and symbol != expected_symbol:
warnings.append(f"Symbol mismatch: expected {expected_symbol}, got {symbol}")
return ValidationResult(len(errors) == 0, errors, warnings)
def validate_required_fields(data: Dict[str, Any], required_fields: List[str]) -> List[str]:
"""
Validate that all required fields are present in data.
Args:
data: Data dictionary to check
required_fields: List of required field names
Returns:
List of missing field names
"""
missing_fields = []
for field in required_fields:
if field not in data or data[field] is None:
missing_fields.append(field)
return missing_fields
def is_valid_decimal(value: Any) -> bool:
"""Check if value can be converted to a valid decimal."""
try:
Decimal(str(value))
return True
except (InvalidOperation, ValueError, TypeError):
return False

View File

@@ -0,0 +1,113 @@
"""
Validation result classes for data validation.
This module provides result classes used to represent validation outcomes
across the validation system.
"""
from typing import List, Any, Optional, Dict
class ValidationResult:
"""Simple validation result for individual field validation."""
def __init__(self,
is_valid: bool,
errors: List[str] = None,
warnings: List[str] = None,
sanitized_data: Any = None):
"""
Initialize validation result.
Args:
is_valid: Whether the validation passed
errors: List of error messages
warnings: List of warning messages
sanitized_data: Optional sanitized/normalized data
"""
self.is_valid = is_valid
self.errors = errors or []
self.warnings = warnings or []
self.sanitized_data = sanitized_data
def __str__(self) -> str:
"""String representation of validation result."""
status = "valid" if self.is_valid else "invalid"
details = []
if self.errors:
details.append(f"{len(self.errors)} errors")
if self.warnings:
details.append(f"{len(self.warnings)} warnings")
detail_str = f" with {', '.join(details)}" if details else ""
return f"ValidationResult: {status}{detail_str}"
def add_error(self, error: str) -> None:
"""Add an error message and set is_valid to False."""
self.errors.append(error)
self.is_valid = False
def add_warning(self, warning: str) -> None:
"""Add a warning message."""
self.warnings.append(warning)
def merge(self, other: 'ValidationResult') -> None:
"""Merge another validation result into this one."""
self.is_valid = self.is_valid and other.is_valid
self.errors.extend(other.errors)
self.warnings.extend(other.warnings)
# Don't merge sanitized data - it's context specific
class DataValidationResult:
"""Result of data validation - common across all exchanges."""
def __init__(self,
is_valid: bool,
errors: List[str],
warnings: List[str],
sanitized_data: Optional[Dict[str, Any]] = None):
"""
Initialize data validation result.
Args:
is_valid: Whether the validation passed
errors: List of error messages
warnings: List of warning messages
sanitized_data: Optional sanitized/normalized data dictionary
"""
self.is_valid = is_valid
self.errors = errors
self.warnings = warnings
self.sanitized_data = sanitized_data
def __str__(self) -> str:
"""String representation of data validation result."""
status = "valid" if self.is_valid else "invalid"
details = []
if self.errors:
details.append(f"{len(self.errors)} errors")
if self.warnings:
details.append(f"{len(self.warnings)} warnings")
if self.sanitized_data:
details.append("has sanitized data")
detail_str = f" with {', '.join(details)}" if details else ""
return f"DataValidationResult: {status}{detail_str}"
def add_error(self, error: str) -> None:
"""Add an error message and set is_valid to False."""
self.errors.append(error)
self.is_valid = False
def add_warning(self, warning: str) -> None:
"""Add a warning message."""
self.warnings.append(warning)
def merge(self, other: 'DataValidationResult') -> None:
"""Merge another data validation result into this one."""
self.is_valid = self.is_valid and other.is_valid
self.errors.extend(other.errors)
self.warnings.extend(other.warnings)
if other.sanitized_data:
if not self.sanitized_data:
self.sanitized_data = {}
self.sanitized_data.update(other.sanitized_data)