- Removed the existing `validation.py` file and replaced it with a modular structure, introducing separate files for validation results, field validators, and the base validator class. - Implemented comprehensive validation functions for common data types, enhancing reusability and maintainability. - Added a new `__init__.py` to expose the validation utilities, ensuring a clean public interface. - Created detailed documentation for the validation module, including usage examples and architectural details. - Introduced extensive unit tests to cover the new validation framework, ensuring reliability and preventing regressions. These changes enhance the overall architecture of the data validation module, making it more scalable and easier to manage.
293 lines
9.3 KiB
Python
293 lines
9.3 KiB
Python
"""
|
|
Field validation functions for common data types.
|
|
|
|
This module provides standalone validation functions for individual fields
|
|
like prices, sizes, timestamps, etc.
|
|
"""
|
|
|
|
import re
|
|
from datetime import datetime, timezone
|
|
from decimal import Decimal, InvalidOperation
|
|
from typing import Union, List, Dict, Any, Set, Pattern
|
|
|
|
from .result import ValidationResult
|
|
|
|
|
|
# Common validation patterns
|
|
NUMERIC_PATTERN: Pattern = re.compile(r'^-?\d*\.?\d+$')
|
|
TRADE_ID_PATTERN: Pattern = re.compile(r'^[a-zA-Z0-9_-]+$')
|
|
|
|
# Common validation constants
|
|
MIN_PRICE: Decimal = Decimal('0.00000001') # 1 satoshi equivalent
|
|
MAX_PRICE: Decimal = Decimal('10000000') # 10 million
|
|
MIN_SIZE: Decimal = Decimal('0.00000001') # Minimum trade size
|
|
MAX_SIZE: Decimal = Decimal('1000000000') # 1 billion max size
|
|
MIN_TIMESTAMP: int = 1000000000000 # 2001-09-09
|
|
MAX_TIMESTAMP: int = 9999999999999 # 2286-11-20
|
|
VALID_TRADE_SIDES: Set[str] = {'buy', 'sell'}
|
|
|
|
|
|
def validate_price(price: Union[str, int, float, Decimal]) -> ValidationResult:
|
|
"""
|
|
Validate price value with common rules.
|
|
|
|
Args:
|
|
price: Price value to validate
|
|
|
|
Returns:
|
|
ValidationResult with sanitized decimal price
|
|
"""
|
|
errors = []
|
|
warnings = []
|
|
sanitized_data = None
|
|
|
|
try:
|
|
# Convert to Decimal for precise validation
|
|
if isinstance(price, str) and price.strip() == "":
|
|
errors.append("Empty price string")
|
|
return ValidationResult(False, errors, warnings)
|
|
|
|
decimal_price = Decimal(str(price))
|
|
sanitized_data = decimal_price
|
|
|
|
# Check for negative prices
|
|
if decimal_price <= 0:
|
|
errors.append(f"Price must be positive, got {decimal_price}")
|
|
|
|
# Check price bounds
|
|
if decimal_price < MIN_PRICE:
|
|
warnings.append(f"Price {decimal_price} below minimum {MIN_PRICE}")
|
|
elif decimal_price > MAX_PRICE:
|
|
warnings.append(f"Price {decimal_price} above maximum {MAX_PRICE}")
|
|
|
|
# Check for excessive decimal places (warn only)
|
|
if decimal_price.as_tuple().exponent < -12:
|
|
warnings.append(f"Price has excessive decimal precision: {decimal_price}")
|
|
|
|
except (InvalidOperation, ValueError, TypeError) as e:
|
|
errors.append(f"Invalid price value: {price} - {str(e)}")
|
|
|
|
return ValidationResult(len(errors) == 0, errors, warnings, sanitized_data)
|
|
|
|
|
|
def validate_size(size: Union[str, int, float, Decimal]) -> ValidationResult:
|
|
"""
|
|
Validate size/quantity value with common rules.
|
|
|
|
Args:
|
|
size: Size value to validate
|
|
|
|
Returns:
|
|
ValidationResult with sanitized decimal size
|
|
"""
|
|
errors = []
|
|
warnings = []
|
|
sanitized_data = None
|
|
|
|
try:
|
|
# Convert to Decimal for precise validation
|
|
if isinstance(size, str) and size.strip() == "":
|
|
errors.append("Empty size string")
|
|
return ValidationResult(False, errors, warnings)
|
|
|
|
decimal_size = Decimal(str(size))
|
|
sanitized_data = decimal_size
|
|
|
|
# Check for negative or zero sizes
|
|
if decimal_size <= 0:
|
|
errors.append(f"Size must be positive, got {decimal_size}")
|
|
|
|
# Check size bounds
|
|
if decimal_size < MIN_SIZE:
|
|
warnings.append(f"Size {decimal_size} below minimum {MIN_SIZE}")
|
|
elif decimal_size > MAX_SIZE:
|
|
warnings.append(f"Size {decimal_size} above maximum {MAX_SIZE}")
|
|
|
|
except (InvalidOperation, ValueError, TypeError) as e:
|
|
errors.append(f"Invalid size value: {size} - {str(e)}")
|
|
|
|
return ValidationResult(len(errors) == 0, errors, warnings, sanitized_data)
|
|
|
|
|
|
def validate_volume(volume: Union[str, int, float, Decimal]) -> ValidationResult:
|
|
"""
|
|
Validate volume value with common rules.
|
|
|
|
Args:
|
|
volume: Volume value to validate
|
|
|
|
Returns:
|
|
ValidationResult
|
|
"""
|
|
errors = []
|
|
warnings = []
|
|
|
|
try:
|
|
decimal_volume = Decimal(str(volume))
|
|
|
|
# Volume can be zero (no trades in period)
|
|
if decimal_volume < 0:
|
|
errors.append(f"Volume cannot be negative, got {decimal_volume}")
|
|
|
|
except (InvalidOperation, ValueError, TypeError) as e:
|
|
errors.append(f"Invalid volume value: {volume} - {str(e)}")
|
|
|
|
return ValidationResult(len(errors) == 0, errors, warnings)
|
|
|
|
|
|
def validate_trade_side(side: str) -> ValidationResult:
|
|
"""
|
|
Validate trade side with common rules.
|
|
|
|
Args:
|
|
side: Trade side string
|
|
|
|
Returns:
|
|
ValidationResult
|
|
"""
|
|
errors = []
|
|
warnings = []
|
|
|
|
if not isinstance(side, str):
|
|
errors.append(f"Trade side must be string, got {type(side)}")
|
|
return ValidationResult(False, errors, warnings)
|
|
|
|
normalized_side = side.lower()
|
|
if normalized_side not in VALID_TRADE_SIDES:
|
|
errors.append(f"Invalid trade side: {side}. Must be 'buy' or 'sell'")
|
|
|
|
return ValidationResult(len(errors) == 0, errors, warnings)
|
|
|
|
|
|
def validate_timestamp(timestamp: Union[str, int], is_milliseconds: bool = True) -> ValidationResult:
|
|
"""
|
|
Validate timestamp value with common rules.
|
|
|
|
Args:
|
|
timestamp: Timestamp value to validate
|
|
is_milliseconds: True if timestamp is in milliseconds, False for seconds
|
|
|
|
Returns:
|
|
ValidationResult
|
|
"""
|
|
errors = []
|
|
warnings = []
|
|
|
|
try:
|
|
# Convert to int
|
|
if isinstance(timestamp, str):
|
|
if not timestamp.isdigit():
|
|
errors.append(f"Invalid timestamp format: {timestamp}")
|
|
return ValidationResult(False, errors, warnings)
|
|
timestamp_int = int(timestamp)
|
|
elif isinstance(timestamp, int):
|
|
timestamp_int = timestamp
|
|
else:
|
|
errors.append(f"Timestamp must be string or int, got {type(timestamp)}")
|
|
return ValidationResult(False, errors, warnings)
|
|
|
|
# Convert to milliseconds if needed
|
|
if not is_milliseconds:
|
|
timestamp_int = timestamp_int * 1000
|
|
|
|
# Check timestamp bounds
|
|
if timestamp_int < MIN_TIMESTAMP:
|
|
errors.append(f"Timestamp {timestamp_int} too old")
|
|
elif timestamp_int > MAX_TIMESTAMP:
|
|
errors.append(f"Timestamp {timestamp_int} too far in future")
|
|
|
|
# Check if timestamp is reasonable (within last year to next year)
|
|
current_time_ms = int(datetime.now(timezone.utc).timestamp() * 1000)
|
|
one_year_ms = 365 * 24 * 60 * 60 * 1000
|
|
|
|
if timestamp_int < (current_time_ms - one_year_ms):
|
|
warnings.append(f"Timestamp {timestamp_int} is older than 1 year")
|
|
elif timestamp_int > (current_time_ms + one_year_ms):
|
|
warnings.append(f"Timestamp {timestamp_int} is more than 1 year in future")
|
|
|
|
except (ValueError, TypeError) as e:
|
|
errors.append(f"Invalid timestamp: {timestamp} - {str(e)}")
|
|
|
|
return ValidationResult(len(errors) == 0, errors, warnings)
|
|
|
|
|
|
def validate_trade_id(trade_id: Union[str, int]) -> ValidationResult:
|
|
"""
|
|
Validate trade ID with flexible rules.
|
|
|
|
Args:
|
|
trade_id: Trade ID to validate
|
|
|
|
Returns:
|
|
ValidationResult
|
|
"""
|
|
errors = []
|
|
warnings = []
|
|
|
|
if isinstance(trade_id, int):
|
|
trade_id = str(trade_id)
|
|
|
|
if not isinstance(trade_id, str):
|
|
errors.append(f"Trade ID must be string or int, got {type(trade_id)}")
|
|
return ValidationResult(False, errors, warnings)
|
|
|
|
if not trade_id.strip():
|
|
errors.append("Trade ID cannot be empty")
|
|
return ValidationResult(False, errors, warnings)
|
|
|
|
# Flexible validation - allow alphanumeric, underscore, hyphen
|
|
if not TRADE_ID_PATTERN.match(trade_id):
|
|
warnings.append(f"Trade ID has unusual format: {trade_id}")
|
|
|
|
return ValidationResult(len(errors) == 0, errors, warnings)
|
|
|
|
|
|
def validate_symbol_match(symbol: str, expected_symbol: str = None) -> ValidationResult:
|
|
"""
|
|
Validate symbol matches expected value.
|
|
|
|
Args:
|
|
symbol: Symbol to validate
|
|
expected_symbol: Expected symbol value
|
|
|
|
Returns:
|
|
ValidationResult
|
|
"""
|
|
errors = []
|
|
warnings = []
|
|
|
|
if not isinstance(symbol, str):
|
|
errors.append(f"Symbol must be string, got {type(symbol)}")
|
|
return ValidationResult(False, errors, warnings)
|
|
|
|
if expected_symbol and symbol != expected_symbol:
|
|
warnings.append(f"Symbol mismatch: expected {expected_symbol}, got {symbol}")
|
|
|
|
return ValidationResult(len(errors) == 0, errors, warnings)
|
|
|
|
|
|
def validate_required_fields(data: Dict[str, Any], required_fields: List[str]) -> List[str]:
|
|
"""
|
|
Validate that all required fields are present in data.
|
|
|
|
Args:
|
|
data: Data dictionary to check
|
|
required_fields: List of required field names
|
|
|
|
Returns:
|
|
List of missing field names
|
|
"""
|
|
missing_fields = []
|
|
for field in required_fields:
|
|
if field not in data or data[field] is None:
|
|
missing_fields.append(field)
|
|
return missing_fields
|
|
|
|
|
|
def is_valid_decimal(value: Any) -> bool:
|
|
"""Check if value can be converted to a valid decimal."""
|
|
try:
|
|
Decimal(str(value))
|
|
return True
|
|
except (InvalidOperation, ValueError, TypeError):
|
|
return False |